X-Git-Url: http://wagnertech.de/git?a=blobdiff_plain;f=SL%2FHTML%2FUtil.pm;h=07031869fd5886493ecd59073a28693f0acf1622;hb=53593baa211863fbf66540cf1bcc36c8fb37257f;hp=0b2e630d00d227829ee6e7835f28489c1bb66a74;hpb=5de9f9f8e33b1fe8ab69cf38bc58d74cbec81779;p=kivitendo-erp.git diff --git a/SL/HTML/Util.pm b/SL/HTML/Util.pm index 0b2e630d0..07031869f 100644 --- a/SL/HTML/Util.pm +++ b/SL/HTML/Util.pm @@ -7,9 +7,11 @@ use HTML::Parser; my %stripper; my %entities = ( - 'lt' => '<', - 'gt' => '>', - 'amp' => '&', + 'lt' => '<', + 'gt' => '>', + 'amp' => '&', + 'nbsp' => ' ', # should be => "\x{00A0}", but this can lead to problems with + # a non-visible character in csv-exports for example ); sub strip { @@ -17,21 +19,53 @@ sub strip { my $value = !ref($class_or_value) && (($class_or_value // '') eq 'SL::HTML::Util') ? $_[1] : $class_or_value; + return '' unless defined $value; + + # Remove HTML comments. + $value =~ s{ }{}gx; + if (!%stripper) { %stripper = ( parser => HTML::Parser->new ); - $stripper{parser}->handler(text => sub { $stripper{text} .= $_[1]; }); + $stripper{parser}->handler(text => sub { $stripper{text} .= ' ' . $_[1]; }); } $stripper{text} = ''; $stripper{parser}->parse($value); $stripper{parser}->eof; - $stripper{text} =~ s{\&([^;]+);}{ $entities{$1} }eg; + $stripper{text} =~ s{\&([^;]+);}{ $entities{$1} || "\&$1;" }eg; + $stripper{text} =~ s{^ +| +$}{}g; + $stripper{text} =~ s{ {2,}}{ }g; return delete $stripper{text}; } +sub plain_text_to_html { + my ($class_or_text) = @_; + + my $text = !ref($class_or_text) && (($class_or_text // '') eq 'SL::HTML::Util') ? $_[1] : $class_or_text; + + return $text if $text =~ m{^

.*

$}; + + $text =~ s{\r+}{}g; + $text =~ s{^[[:space:]]+|[[:space:]]+$}{}g; + + return '' if $text eq ''; + + my @paragraphs; + + foreach my $paragraph (split m{\n{2,}}, $text) { + no warnings 'once'; + $paragraph = $::locale->quote_special_chars('HTML', $paragraph); + $paragraph =~ s{\n}{
}g; + + push @paragraphs, $paragraph; + } + + return '

' . join('

', @paragraphs) . '

'; +} + 1; __END__ @@ -56,6 +90,12 @@ SL::HTML::Util - Utility functions dealing with HTML Removes all HTML elements and tags from C<$html_content> and returns the remaining plain text. +=item C + +Converts a plain text to HTML: paragraphs will be recognized by empty +lines; remaining newlines will be converted into forced line breaks; +the rest will be HTML escaped. + =back =head1 BUGS