X-Git-Url: http://wagnertech.de/gitweb/gitweb.cgi/mfinanz.git/blobdiff_plain/06a0f32d8602a6b79e51b0fcd51edb67e780a003..a13f2a9bd60851cbfd495dcff82871fd89ec9acc:/SL/HTML/Util.pm diff --git a/SL/HTML/Util.pm b/SL/HTML/Util.pm index c62fd9202..a54425d83 100644 --- a/SL/HTML/Util.pm +++ b/SL/HTML/Util.pm @@ -7,9 +7,11 @@ use HTML::Parser; my %stripper; my %entities = ( - 'lt' => '<', - 'gt' => '>', - 'amp' => '&', + 'lt' => '<', + 'gt' => '>', + 'amp' => '&', + 'nbsp' => ' ', # should be => "\x{00A0}", but this can lead to problems with + # a non-visible character in csv-exports for example ); sub strip { @@ -25,7 +27,7 @@ sub strip { if (!%stripper) { %stripper = ( parser => HTML::Parser->new ); - $stripper{parser}->handler(text => sub { $stripper{text} .= $_[1]; }); + $stripper{parser}->handler(text => sub { $stripper{text} .= ' ' . $_[1]; }); } $stripper{text} = ''; @@ -33,6 +35,8 @@ sub strip { $stripper{parser}->eof; $stripper{text} =~ s{\&([^;]+);}{ $entities{$1} || "\&$1;" }eg; + $stripper{text} =~ s{^ +| +$}{}g; + $stripper{text} =~ s{ {2,}}{ }g; return delete $stripper{text}; }