+use Carp;
+use Cwd;
+use English qw(-no_match_vars);
+use File::Basename;
+use File::Temp;
+use HTML::Entities ();
+use List::MoreUtils qw(any);
+use Scalar::Util qw(blessed);
+use Unicode::Normalize qw();
+
+use SL::DB::Default;
+use SL::System::Process;
+
+my %text_markup_replace = (
+ b => 'textbf',
+ i => 'textit',
+ u => 'underline',
+);
+
+sub _format_text {
+ my ($self, $content, %params) = @_;
+
+ $content = $::locale->quote_special_chars('Template/LaTeX', $content);
+
+ # Allow some HTML markup to be converted into the output format's
+ # corresponding markup code, e.g. bold or italic.
+ foreach my $key (keys(%text_markup_replace)) {
+ my $new = $text_markup_replace{$key};
+ $content =~ s/\$\<\$${key}\$\>\$(.*?)\$<\$\/${key}\$>\$/\\${new}\{$1\}/gi;
+ }
+
+ $content =~ s/[\x00-\x1f]//g;
+
+ return $content;
+}
+
+my %html_replace = (
+ '</p>' => "\n\n",
+ '<ul>' => "\\begin{itemize} ",
+ '</ul>' => "\\end{itemize} ",
+ '<ol>' => "\\begin{enumerate} ",
+ '</ol>' => "\\end{enumerate} ",
+ '<li>' => "\\item ",
+ '</li>' => " ",
+ '<b>' => "\\textbf{",
+ '</b>' => "}",
+ '<strong>' => "\\textbf{",
+ '</strong>' => "}",
+ '<i>' => "\\textit{",
+ '</i>' => "}",
+ '<em>' => "\\textit{",
+ '</em>' => "}",
+ '<u>' => "\\underline{",
+ '</u>' => "}",
+ '<s>' => "\\sout{",
+ '</s>' => "}",
+ '<sub>' => "\\textsubscript{",
+ '</sub>' => "}",
+ '<sup>' => "\\textsuperscript{",
+ '</sup>' => "}",
+ '<br/>' => "\\newline ",
+ '<br>' => "\\newline ",
+);
+
+sub _lb_to_space {
+ my ($to_replace) = @_;
+
+ my $vspace = '\vspace*{0.5cm}';
+ return $vspace x (length($to_replace) / length($html_replace{'<br>'}));
+}
+
+sub _format_html {
+ my ($self, $content, %params) = @_;
+
+ $content =~ s{ \r+ }{}gx;
+ $content =~ s{ \n+ }{ }gx;
+ $content =~ s{ (?:\ |\s)+ }{ }gx;
+ $content =~ s{ (?:\ |\s)+$ }{}gx;
+ $content =~ s{ (?: <br/?> )+$ }{}gx;
+ $content =~ s{ <ul>\s*</ul> | <ol>\s*</ol> }{}igx;
+ $content =~ s{ (?: <p>\s*</p>\s* )+ \Z }{}imgx;
+
+ my @parts = grep { $_ } map {
+ if (substr($_, 0, 1) eq '<') {
+ s{ +}{}g;
+ $html_replace{$_} || '';
+
+ } else {
+ $::locale->quote_special_chars('Template/LaTeX', HTML::Entities::decode_entities($_));
+ }
+ } split(m{(<.*?>)}x, $content);
+
+ $content = join '', @parts;
+ $content =~ s{ (?: [\n\s] | \\newline )+ $ }{}gx; # remove line breaks at the end of the text
+ $content =~ s{ ^ \s+ }{}gx; # remove white space at the start of the text
+ $content =~ s{ ^ ( \\newline \ )+ }{ _lb_to_space($1) }gxe; # convert line breaks at the start of the text to vertical space
+ $content =~ s{ ( \n\n+ ) ( \\newline \ )+ }{ $1 . _lb_to_space($2) }gxe; # convert line breaks at the start of a paragraph to vertical space
+ $content =~ s{ ( \\end\{ [^\}]+ \} \h* ) ( \\newline \ )+ }{ $1 . _lb_to_space($2) }gxe; # convert line breaks after LaTeX environments like lists to vertical space
+ $content =~ s{ ^ \h+ \\newline }{\\newline}gmx;
+ $content =~ s{ \n\n \h* \\newline \h* }{\n\n}gmx;
+
+ return $content;
+}
+
+my %formatters = (
+ html => \&_format_html,
+ text => \&_format_text,
+);
+