From f5bc233522743646b118d0756b0041b16418fbdf Mon Sep 17 00:00:00 2001 From: Moritz Bunkus Date: Mon, 20 Sep 2010 17:40:06 +0200 Subject: [PATCH] Ausgaben mit FCGI richtig codieren MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Hintergrund: FCGI benutzt Perls I/O-Schitensystem nicht. Deshalb kümmert es sich auch nicht um mit 'binmode STDOUT, ":utf8"' gesetzte Konvertierungsparameter. Weiterhin enthält FCGI ab Version 0.69 Fixes, die doppeltes Encoding bei der Ausgabe vermeiden und damit eigentlich korrektes Verhalten herstellen. Leider geht damit Lx-Offices Art, wie Strings intern gehandhabt und ausgegeben werden, in die Hose. Lx-Office speichert Strings in Perls internem Encoding und verlässt sich auf die mit "binmode" aktivierte, autoatmische Konvertierung bei der Ausgabe. Dieser Workaround implementiert das Umcodieren vom internen Encoding nach UTF-8 mittels Monkeypatching, bevor die FCGIs originale PRINT-Routine aufgerufen wird. Das darf allerdings nicht passieren, wenn unbearbeitete Ausgabe benötigt wird -- z.B. beim Download von Binärdaten (PDFs). Glücklicherweise ist dies in der Funktion "with_raw_io" in Locale gekapselt, sodass dieser Workaround den Status "unbearbeitete Ausgabe?" bei Locale erfragen kann. --- SL/FCGIFixes.pm | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ SL/Locale.pm | 8 ++++++++ dispatcher.fpl | 3 +++ 3 files changed, 59 insertions(+) create mode 100644 SL/FCGIFixes.pm diff --git a/SL/FCGIFixes.pm b/SL/FCGIFixes.pm new file mode 100644 index 000000000..2bd312c9e --- /dev/null +++ b/SL/FCGIFixes.pm @@ -0,0 +1,48 @@ +package SL::FCGIFixes; + +use strict; + +use Encode; +use FCGI; + +# FCGI does not use Perl's I/O layer. Therefore it does not honor +# setting STDOUT to ":utf8" with "binmode". Also FCGI starting with +# 0.69 implements proper handling for UTF-8 flagged strings -- namely +# by downgrading them into bytes. The combination of the two causes +# Lx-Office's way of handling strings to go belly up (storing +# everything in Perl's internal encoding and using Perl's I/O layer +# for automatic conversion on output). +# +# This workaround monkeypatches FCGI's print routine so that all of +# its arguments safe for "$self" are encoded into UTF-8 before calling +# FCGI's original PRINT function. +# +# However, this must not be done if raw I/O is requested -- e.g. when +# sending out binary data. Fortunately that has been centralized via +# Locale's "with_raw_io" function which sets a variable indicating +# that current I/O operations should be raw. + +sub fix_print_and_internal_encoding_after_0_68 { + return if version->parse($FCGI::VERSION) <= version->parse("0.68"); + + my $encoder = Encode::find_encoding('UTF-8'); + my $original_fcgi_print = \&FCGI::Stream::PRINT; + + no warnings 'redefine'; + + *FCGI::Stream::PRINT = sub { + if (!$::locale || !$::locale->raw_io_active) { + my $self = shift; + my @vals = map { $encoder->encode($_, Encode::FB_CROAK|Encode::LEAVE_SRC) } @_; + @_ = ($self, @vals); + } + + goto $original_fcgi_print; + }; +} + +sub apply_fixes { + fix_print_and_internal_encoding_after_0_68(); +} + +1; diff --git a/SL/Locale.pm b/SL/Locale.pm index 881d5925e..4fde61654 100644 --- a/SL/Locale.pm +++ b/SL/Locale.pm @@ -462,14 +462,22 @@ sub remap_special_chars { return $self->quote_special_chars($dst_format, $self->quote_special_chars("${src_format}-reverse", shift)); } +sub raw_io_active { + my $self = shift; + + return !!$self->{raw_io_active}; +} + sub with_raw_io { my $self = shift; my $fh = shift; my $code = shift; + $self->{raw_io_active} = 1; binmode $fh, ":raw"; $code->(); binmode $fh, ":utf8" if $self->is_utf8; + $self->{raw_io_active} = 0; } 1; diff --git a/dispatcher.fpl b/dispatcher.fpl index 7fae716f1..afe1bc6ed 100755 --- a/dispatcher.fpl +++ b/dispatcher.fpl @@ -4,6 +4,9 @@ use strict; use FCGI; use SL::Dispatcher; +use SL::FCGIFixes; + +SL::FCGIFixes::apply_fixes(); SL::Dispatcher::pre_startup(); my $request = FCGI::Request(); -- 2.20.1