X-Git-Url: http://wagnertech.de/git?a=blobdiff_plain;f=SL%2FHTML%2FUtil.pm;h=a54425d837c7b00f5d7370d18748d0aa5177b280;hb=f6191560e6db010cd9bf8b0854174d772b49b5c2;hp=72a7a7f57481a3522833d04bfc45740d3fccc27a;hpb=e09fbb1194ebfbb3b278bae40d31c67e416cf919;p=kivitendo-erp.git diff --git a/SL/HTML/Util.pm b/SL/HTML/Util.pm index 72a7a7f57..a54425d83 100644 --- a/SL/HTML/Util.pm +++ b/SL/HTML/Util.pm @@ -7,9 +7,11 @@ use HTML::Parser; my %stripper; my %entities = ( - 'lt' => '<', - 'gt' => '>', - 'amp' => '&', + 'lt' => '<', + 'gt' => '>', + 'amp' => '&', + 'nbsp' => ' ', # should be => "\x{00A0}", but this can lead to problems with + # a non-visible character in csv-exports for example ); sub strip { @@ -17,10 +19,15 @@ sub strip { my $value = !ref($class_or_value) && (($class_or_value // '') eq 'SL::HTML::Util') ? $_[1] : $class_or_value; + return '' unless defined $value; + + # Remove HTML comments. + $value =~ s{ }{}gx; + if (!%stripper) { %stripper = ( parser => HTML::Parser->new ); - $stripper{parser}->handler(text => sub { $stripper{text} .= $_[1]; }); + $stripper{parser}->handler(text => sub { $stripper{text} .= ' ' . $_[1]; }); } $stripper{text} = ''; @@ -28,6 +35,8 @@ sub strip { $stripper{parser}->eof; $stripper{text} =~ s{\&([^;]+);}{ $entities{$1} || "\&$1;" }eg; + $stripper{text} =~ s{^ +| +$}{}g; + $stripper{text} =~ s{ {2,}}{ }g; return delete $stripper{text}; }