X-Git-Url: http://wagnertech.de/git?a=blobdiff_plain;f=SL%2FHTML%2FUtil.pm;h=07031869fd5886493ecd59073a28693f0acf1622;hb=549f187d3a2b1d15f96c4556714666ed954447bb;hp=7212d646ffef763f22744972b07bf45b9c66c717;hpb=792ae733e8f54eca6d306ad523a7a6e166fcb0e0;p=kivitendo-erp.git

diff --git a/SL/HTML/Util.pm b/SL/HTML/Util.pm
index 7212d646f..07031869f 100644
--- a/SL/HTML/Util.pm
+++ b/SL/HTML/Util.pm
@@ -6,25 +6,66 @@ use warnings;
 use HTML::Parser;
 
 my %stripper;
+my %entities = (
+  'lt'   => '<',
+  'gt'   => '>',
+  'amp'  => '&',
+  'nbsp' => ' ',   # should be => "\x{00A0}", but this can lead to problems with
+                   # a non-visible character in csv-exports for example
+);
 
 sub strip {
   my ($class_or_value) = @_;
 
   my $value = !ref($class_or_value) && (($class_or_value // '') eq 'SL::HTML::Util') ? $_[1] : $class_or_value;
 
+  return '' unless defined $value;
+
+  # Remove HTML comments.
+  $value =~ s{ <!-- .*? --> }{}gx;
+
   if (!%stripper) {
     %stripper = ( parser => HTML::Parser->new );
 
-    $stripper{parser}->handler(text => sub { $stripper{text} .= $_[1]; });
+    $stripper{parser}->handler(text => sub { $stripper{text} .= ' ' . $_[1]; });
   }
 
   $stripper{text} = '';
   $stripper{parser}->parse($value);
   $stripper{parser}->eof;
 
+  $stripper{text} =~ s{\&([^;]+);}{ $entities{$1} || "\&$1;" }eg;
+  $stripper{text} =~ s{^ +| +$}{}g;
+  $stripper{text} =~ s{ {2,}}{ }g;
+
   return delete $stripper{text};
 }
 
+sub plain_text_to_html {
+  my ($class_or_text) = @_;
+
+  my $text = !ref($class_or_text) && (($class_or_text // '') eq 'SL::HTML::Util') ? $_[1] : $class_or_text;
+
+  return $text if $text =~ m{^<p>.*</p>$};
+
+  $text =~ s{\r+}{}g;
+  $text =~ s{^[[:space:]]+|[[:space:]]+$}{}g;
+
+  return '' if $text eq '';
+
+  my @paragraphs;
+
+  foreach my $paragraph (split m{\n{2,}}, $text) {
+    no warnings 'once';
+    $paragraph =  $::locale->quote_special_chars('HTML', $paragraph);
+    $paragraph =~ s{\n}{<br>}g;
+
+    push @paragraphs, $paragraph;
+  }
+
+  return '<p>' . join('</p><p>', @paragraphs) . '</p>';
+}
+
 1;
 __END__
 
@@ -49,6 +90,12 @@ SL::HTML::Util - Utility functions dealing with HTML
 Removes all HTML elements and tags from C<$html_content> and returns
 the remaining plain text.
 
+=item C<plain_text_to_html $text>
+
+Converts a plain text to HTML: paragraphs will be recognized by empty
+lines; remaining newlines will be converted into forced line breaks;
+the rest will be HTML escaped.
+
 =back
 
 =head1 BUGS