From 792ae733e8f54eca6d306ad523a7a6e166fcb0e0 Mon Sep 17 00:00:00 2001 From: Moritz Bunkus Date: Tue, 14 Jan 2014 10:02:55 +0100 Subject: [PATCH] Refactoring: {strip,restrict}_html in eigene Module ausgelagert --- SL/DB/Helper/AttrHTML.pm | 32 ++++---------------- SL/HTML/Restrict.pm | 63 ++++++++++++++++++++++++++++++++++++++++ SL/HTML/Util.pm | 62 +++++++++++++++++++++++++++++++++++++++ SL/Presenter/Tag.pm | 17 ++++++++++- 4 files changed, 146 insertions(+), 28 deletions(-) create mode 100644 SL/HTML/Restrict.pm create mode 100644 SL/HTML/Util.pm diff --git a/SL/DB/Helper/AttrHTML.pm b/SL/DB/Helper/AttrHTML.pm index b57997272..b5471e09a 100644 --- a/SL/DB/Helper/AttrHTML.pm +++ b/SL/DB/Helper/AttrHTML.pm @@ -5,30 +5,6 @@ use strict; use parent qw(Exporter); our @EXPORT = qw(attr_html); -use utf8; -use Carp; -use Encode (); -use HTML::Restrict (); -use HTML::Parser; - -my %stripper; - -sub _strip_html { - my ($value) = @_; - - if (!%stripper) { - %stripper = ( parser => HTML::Parser->new ); - - $stripper{parser}->handler(text => sub { $stripper{text} .= $_[1]; }); - } - - $stripper{text} = ''; - $stripper{parser}->parse($value); - $stripper{parser}->eof; - - return delete $stripper{text}; -} - sub attr_html { my ($package, $attributes, %params) = @_; @@ -49,12 +25,13 @@ sub _make_stripped { my ($package, $attribute, %params) = @_; no strict 'refs'; + require SL::HTML::Util; *{ $package . '::' . $attribute . '_as_stripped_html' } = sub { my ($self, $value) = @_; - return $self->$attribute(_strip_html($value)) if @_ > 1; - return _strip_html($self->$attribute); + return $self->$attribute(SL::HTML::Util->strip($value)) if @_ > 1; + return SL::HTML::Util->strip($self->$attribute); }; } @@ -62,8 +39,9 @@ sub _make_restricted { my ($package, $attribute, %params) = @_; no strict 'refs'; + require SL::HTML::Restrict; - my $cleaner = HTML::Restrict->new(rules => $params{allowed_tags}); + my $cleaner = SL::HTML::Restrict->create(%params); *{ $package . '::' . $attribute . '_as_restricted_html' } = sub { my ($self, $value) = @_; diff --git a/SL/HTML/Restrict.pm b/SL/HTML/Restrict.pm new file mode 100644 index 000000000..f30bc87fb --- /dev/null +++ b/SL/HTML/Restrict.pm @@ -0,0 +1,63 @@ +package SL::HTML::Restrict; + +use strict; +use warnings; + +use HTML::Restrict (); + +sub create { + my ($class, %params) = @_; + $params{allowed_tags} //= { map { ($_ => ['/']) } qw(b strong i em u ul ol li sub sup s strike br p div) }; + + return HTML::Restrict->new(rules => $params{allowed_tags}); +} + +1; +__END__ + +=pod + +=encoding utf8 + +=head1 NAME + +SL::HTML::Restrict - Restrict HTML tags to set of allowed tags + +=head1 SYNOPSIS + + my $cleaner = SL::HTML::Restrict->create; + my $cleaned = $cleaner->process($unsafe_html); + +=head1 + +Often you want to allow a fixed set of well-known HTML tags to be used +– but nothing else. This is a thin wrapper providing a default set of +the following elements: + +C + +This list can be overwritten. + +=head1 FUNCTIONS + +=over 4 + +=item C + +Creates and returns a new instance of L. The optional +parameter C must be an array reference of allowed tag +names. If it's missing then the default set will be used (see above). + +Returns an instance of L. + +=back + +=head1 BUGS + +Nothing here yet. + +=head1 AUTHOR + +Moritz Bunkus Em.bunkus@linet-services.deE + +=cut diff --git a/SL/HTML/Util.pm b/SL/HTML/Util.pm new file mode 100644 index 000000000..7212d646f --- /dev/null +++ b/SL/HTML/Util.pm @@ -0,0 +1,62 @@ +package SL::HTML::Util; + +use strict; +use warnings; + +use HTML::Parser; + +my %stripper; + +sub strip { + my ($class_or_value) = @_; + + my $value = !ref($class_or_value) && (($class_or_value // '') eq 'SL::HTML::Util') ? $_[1] : $class_or_value; + + if (!%stripper) { + %stripper = ( parser => HTML::Parser->new ); + + $stripper{parser}->handler(text => sub { $stripper{text} .= $_[1]; }); + } + + $stripper{text} = ''; + $stripper{parser}->parse($value); + $stripper{parser}->eof; + + return delete $stripper{text}; +} + +1; +__END__ + +=pod + +=encoding utf8 + +=head1 NAME + +SL::HTML::Util - Utility functions dealing with HTML + +=head1 SYNOPSIS + + my $plain_text = SL::HTML::Util->strip('

Hello World

'); + +=head1 FUNCTIONS + +=over 4 + +=item C + +Removes all HTML elements and tags from C<$html_content> and returns +the remaining plain text. + +=back + +=head1 BUGS + +Nothing here yet. + +=head1 AUTHOR + +Moritz Bunkus Em.bunkus@linet-services.deE + +=cut diff --git a/SL/Presenter/Tag.pm b/SL/Presenter/Tag.pm index 924ab8b57..bd489066b 100644 --- a/SL/Presenter/Tag.pm +++ b/SL/Presenter/Tag.pm @@ -2,10 +2,12 @@ package SL::Presenter::Tag; use strict; +use SL::HTML::Restrict; + use parent qw(Exporter); use Exporter qw(import); -our @EXPORT = qw(html_tag input_tag man_days_tag name_to_id select_tag stringify_attributes); +our @EXPORT = qw(html_tag input_tag man_days_tag name_to_id select_tag stringify_attributes restricted_html); use Carp; @@ -195,6 +197,15 @@ sub _set_id_attribute { return %{ $attributes }; } +my $html_restricter; + +sub restricted_html { + my ($self, $value) = @_; + + $html_restricter ||= SL::HTML::Restrict->create; + return $html_restricter->process($value); +} + 1; __END__ @@ -264,6 +275,10 @@ Creates a string from all elements in C<%items> suitable for usage as HTML tag attributes. Keys and values are HTML escaped even though keys must not contain non-ASCII characters for browsers to accept them. +=item C + +Returns HTML stripped of unknown tags. See L. + =back =head2 HIGH-LEVEL FUNCTIONS -- 2.20.1