Filter::FormatText#2
HTMLの本文を整形したPlainテキストに変換します。
- module: Filter::FormatText
config:
bytes_per_line: 1行の長さ(optional def.72)
always: HTMLテキスト以外でも実行するか?(optional def.0)
使い方や目的はこちら
ダウンロード: FormatText.pm
package Plagger::Plugin::Filter::FormatText; use HTML::TreeBuilder;
use HTML::FormatText;
use Jcode;
use strict;
use warnings;
use base qw( Plagger::Plugin );
our $VERSION = 0.03;
sub register {
my($self, $context) = @_;
$context->register_hook(
$self,
'update.entry.fixup' => \&filter,
);
}
sub filter {
my($self, $context, $args) = @_;
my $entry = $args->{entry};
my $body = $entry->body;
my $body_text = $entry->body;
$body_text =~ s/<[^>]*>//g;
unless ($self->conf->{always} || $body eq $body_text){
my $tree = HTML::TreeBuilder->new()->parse($entry->body);
my $formatter = HTML::FormatText->new( lm => 0, rm => 998 );
my $line = $self->conf->{bytes_per_line} || 72;
# http://72.14.209.104/search?q=cache:ENkjSG7daIIJ:rcl.hp.infoseek.co.jp/cgi-bin/wiki.cgi%3Fpage%3DPerl%252FTIPS+jfold+%E6%94%B9%E8%A1%8C
my $body = join "\n", map {
jcode($_)->jfold( $line, "\n")
} split /\n/mx, $formatter->format($tree);
utf8::encode($body);
$entry->body($body);
$context->log(info => "format $entry->{link}") if $entry->{link};
}
}
1;
__END__
=head1 NAME
Plagger::Plugin::Filter::FormatText - Format HTML as plaintext
=head1 SYNOPSIS
- module: Filter::FormatText
=head1 DESCRIPTION
This plugin is formatted into plaintext from HTML using HTML::Format.
=head1 CONFiG
=over 4
=item bytes_per_line (Optional)
Default 72.
=item always (Optional)
When set to 1, always format. Default to 0.
=back
=head1 AUTHOR
Nobuhito Sato
=head1 SEE ALSO
L<Plagger>, L<HTML::FormatText>
=cut