Filter::FormatText#2

HTMLの本文を整形したPlainテキストに変換します。

サンプル

- module: Filter::FormatText
config:
bytes_per_line: 1行の長さ(optional def.72)
always: HTMLテキスト以外でも実行するか?(optional def.0)

使い方や目的はこちら


ダウンロード: FormatText.pm

 package Plagger::Plugin::Filter::FormatText;

use HTML::TreeBuilder;
use HTML::FormatText;
use Jcode;

use strict;
use warnings;
use base qw( Plagger::Plugin );

our $VERSION = 0.03;

sub register {
my($self, $context) = @_;
$context->register_hook(
$self,
'update.entry.fixup' => \&filter,
);
}

sub filter {
my($self, $context, $args) = @_;
my $entry = $args->{entry};
my $body = $entry->body;
my $body_text = $entry->body;
$body_text =~ s/<[^>]*>//g;
unless ($self->conf->{always} || $body eq $body_text){
my $tree = HTML::TreeBuilder->new()->parse($entry->body);
my $formatter = HTML::FormatText->new( lm => 0, rm => 998 );
my $line = $self->conf->{bytes_per_line} || 72;
# http://72.14.209.104/search?q=cache:ENkjSG7daIIJ:rcl.hp.infoseek.co.jp/cgi-bin/wiki.cgi%3Fpage%3DPerl%252FTIPS+jfold+%E6%94%B9%E8%A1%8C
my $body = join "\n", map {
jcode($_)->jfold( $line, "\n")
} split /\n/mx, $formatter->format($tree);
utf8::encode($body);
$entry->body($body);

$context->log(info => "format $entry->{link}") if $entry->{link};
}
}

1;

__END__

=head1 NAME

Plagger::Plugin::Filter::FormatText - Format HTML as plaintext

=head1 SYNOPSIS

- module: Filter::FormatText

=head1 DESCRIPTION

This plugin is formatted into plaintext from HTML using HTML::Format.

=head1 CONFiG

=over 4

=item bytes_per_line (Optional)

Default 72.

=item always (Optional)

When set to 1, always format. Default to 0.

=back

=head1 AUTHOR

Nobuhito Sato

=head1 SEE ALSO

L<Plagger>, L<HTML::FormatText>

=cut