1 package SL::HTML::Util;
13 'nbsp' => ' ', # should be => "\x{00A0}", but this can lead to problems with
14 # a non-visible character in csv-exports for example
18 my ($class_or_value) = @_;
20 my $value = !ref($class_or_value) && (($class_or_value // '') eq 'SL::HTML::Util') ? $_[1] : $class_or_value;
22 return '' unless defined $value;
24 # Remove HTML comments.
25 $value =~ s{ <!-- .*? --> }{}gx;
28 %stripper = ( parser => HTML::Parser->new );
30 $stripper{parser}->handler(text => sub { $stripper{text} .= ' ' . $_[1]; });
34 $stripper{parser}->parse($value);
35 $stripper{parser}->eof;
37 $stripper{text} =~ s{\&([^;]+);}{ $entities{$1} || "\&$1;" }eg;
38 $stripper{text} =~ s{^ +| +$}{}g;
39 $stripper{text} =~ s{ {2,}}{ }g;
41 return delete $stripper{text};
44 sub plain_text_to_html {
45 my ($class_or_text) = @_;
47 my $text = !ref($class_or_text) && (($class_or_text // '') eq 'SL::HTML::Util') ? $_[1] : $class_or_text;
49 return $text if $text =~ m{^<p>.*</p>$};
52 $text =~ s{^[[:space:]]+|[[:space:]]+$}{}g;
54 return '' if $text eq '';
58 foreach my $paragraph (split m{\n{2,}}, $text) {
60 $paragraph = $::locale->quote_special_chars('HTML', $paragraph);
61 $paragraph =~ s{\n}{<br>}g;
63 push @paragraphs, $paragraph;
66 return '<p>' . join('</p><p>', @paragraphs) . '</p>';
78 SL::HTML::Util - Utility functions dealing with HTML
82 my $plain_text = SL::HTML::Util->strip('<h1>Hello World</h1>');
88 =item C<strip $html_content>
90 Removes all HTML elements and tags from C<$html_content> and returns
91 the remaining plain text.
93 =item C<plain_text_to_html $text>
95 Converts a plain text to HTML: paragraphs will be recognized by empty
96 lines; remaining newlines will be converted into forced line breaks;
97 the rest will be HTML escaped.
107 Moritz Bunkus E<lt>m.bunkus@linet-services.deE<gt>