From: Sven Schöling Date: Wed, 8 Aug 2012 14:48:47 +0000 (+0200) Subject: Parsing von multipart/formdata beschleuningt. X-Git-Tag: release-3.0.0beta1~297^2~18 X-Git-Url: http://wagnertech.de/git?a=commitdiff_plain;h=dc3f6120f9bbacaa028e554d7fa71e481d4497b4;p=kivitendo-erp.git Parsing von multipart/formdata beschleuningt. Die entsprechende Routine hatte einen bösen Fall von Shlemiel the Painter's algorithm [1]. Dadurch wurden Fileuploads mit mehr als 20k Zeilen extrem langsam. Binärdaten wie pdfs oder Bilder hat das nicht gestört, aber bei CSV Imports hat eine 80k Zeilen Datei dann auch mal 2-5min gebraucht, nur um den Request zu parsen. Jetzt werden nur die Indizes geparst und hinterher direkt aus dem Request der substr gezogen. Ausserdem endlich einen Testfall dafür eingebaut. [1] http://en.wikipedia.org/wiki/Schlemiel_the_Painter%27s_algorithm --- diff --git a/SL/Request.pm b/SL/Request.pm index 64662875d..a8ae4c137 100644 --- a/SL/Request.pm +++ b/SL/Request.pm @@ -53,6 +53,10 @@ sub _input_to_hash { sub _parse_multipart_formdata { my ($target, $temp_target, $input) = @_; my ($name, $filename, $headers_done, $content_type, $boundary_found, $need_cr, $previous, $p_attachment, $encoding, $transfer_encoding); + my $data_start = 0; + + # teach substr and length to use good ol' bytes, not 'em fancy characters + use bytes; # We SHOULD honor encodings and transfer-encodings here, but as hard as I # looked I couldn't find a reasonably recent webbrowser that makes use of @@ -63,12 +67,21 @@ sub _parse_multipart_formdata { $ENV{'CONTENT_TYPE'} =~ /multipart\/form-data\s*;\s*boundary\s*=\s*(.+)$/; my $boundary = '--' . $1; + my $index = 0; + my $line_length; foreach my $line (split m/\n/, $input) { - last if (($line eq "${boundary}--") || ($line eq "${boundary}--\r")); + $line_length = length $line; + + if ($line =~ /^\Q$boundary\E(--)?\r?$/) { + my $last_boundary = $1; + my $data = substr $input, $data_start, $index - $data_start; + $data =~ s/\r?\n$//; - if (($line eq $boundary) || ($line eq "$boundary\r")) { - ${ $previous } =~ s|\r?\n$|| if $previous; - ${ $previous } = Encode::decode($encoding, $$previous) if $previous && !$filename && !$transfer_encoding eq 'binary'; + if ($previous && !$filename && $transfer_encoding && $transfer_encoding ne 'binary') { + ${ $previous } = Encode::decode($encoding, $data); + } else { + ${ $previous } = $data; + } undef $previous; undef $filename; @@ -79,7 +92,7 @@ sub _parse_multipart_formdata { $need_cr = 0; $encoding = $::lx_office_conf{system}->{dbcharset} || Common::DEFAULT_CHARSET; $transfer_encoding = undef; - + last if $last_boundary; next; } @@ -90,6 +103,7 @@ sub _parse_multipart_formdata { if (!$line) { $headers_done = 1; + $data_start = $index + $line_length + 1; next; } @@ -159,11 +173,10 @@ sub _parse_multipart_formdata { next unless $previous; - ${ $previous } .= "${line}\n"; + } continue { + $index += $line_length + 1; } - ${ $previous } =~ s|\r?\n$|| if $previous; - $::lxdebug->leave_sub(2); } diff --git a/t/request/post_multipart.t b/t/request/post_multipart.t new file mode 100644 index 000000000..767807b90 --- /dev/null +++ b/t/request/post_multipart.t @@ -0,0 +1,107 @@ +use strict; +use utf8; + +use lib 't'; +use lib 'modules/fallback'; +BEGIN { + unshift @INC, 'modules/override'; +} + +use Support::TestSetup; +use Test::More tests => 2; +use Data::Dumper; +require Test::Deep; +use Encode; + +use SL::Request; + +Support::TestSetup::login(); + +open my $fh, '<', 't/request/post_multipart_1' or die "can't load test"; +my $data = do { $/ = undef; <$fh> }; + +my $t = {}; +my $tt = {}; + +local $ENV{CONTENT_TYPE} = 'multipart/form-data; boundary=---------------------------23281168279961'; +SL::Request::_parse_multipart_formdata($t, $tt, $data); + + +my $blob = Encode::encode('utf-8', qq|\x{feff}Stunde;Montag;Dienstag;Mittwoch;Donnerstag;Freitag +1;Mathe;Deutsch;Englisch;Mathe;Kunst +2;Sport;Französisch;Geschichte;Sport;Geschichte +3;Sport;"Religion ev;kath";Kunst;;Kunst|); + +my $t_cmp = { + 'profile' => { + 'name' => undef, + 'type' => undef + }, + 'quote_char' => undef, + 'file' => $blob, + 'custom_sep_char' => undef, + 'sep_char' => undef, + 'settings' => { + 'article_number_policy' => undef, + 'sellprice_places' => undef, + 'charset' => undef, + 'apply_buchungsgruppe' => undef, + 'full_preview' => undef, + 'parts_type' => undef, + 'default_unit' => undef, + 'default_buchungsgruppe' => undef, + 'duplicates' => undef, + 'numberformat' => undef, + 'sellprice_adjustment_type' => undef, + 'shoparticle_if_missing' => undef, + 'sellprice_adjustment' => undef + }, + 'custom_escape_char' => undef, + 'action_test' => undef, + 'custom_quote_char' => undef, + 'escape_char' => undef, + 'action' => undef + }; +$t_cmp->{ATTACHMENTS}{file}{data} = \$t_cmp->{'file'}; + + +is_deeply $t, $t_cmp; + +is_deeply $tt, + { + 'profile' => { + 'name' => '', + 'type' =>'parts', + }, + 'file' => undef, + 'quote_char' => 'quote', + 'custom_sep_char' => '', + 'sep_char' => 'semicolon', + 'settings' => { + 'article_number_policy' => 'update_prices', + 'sellprice_places' => 2, + 'charset' => 'UTF-8', + 'apply_buchungsgruppe' => 'all', + 'full_preview' => '0', + 'parts_type' => 'part', + 'default_unit' => 'g', + 'default_buchungsgruppe' => '815', + 'duplicates' => 'no_check', + 'numberformat' => '1.000,00', + 'sellprice_adjustment_type' => 'percent', + 'shoparticle_if_missing' => '0', + 'sellprice_adjustment' =>'0' + }, + 'custom_escape_char' => '', + 'action_test' => 'Test und Vorschau', + 'ATTACHMENTS' => { + 'file' => { + 'filename' => 'from_wikipedia.csv' + } + }, + 'custom_quote_char' => '', + 'escape_char' => 'quote', + 'action' => 'CsvImport/dispatch', + 'FILENAME' => 'from_wikipedia.csv' + }; + diff --git a/t/request/post_multipart_1 b/t/request/post_multipart_1 new file mode 100644 index 000000000..c3ced0fc8 --- /dev/null +++ b/t/request/post_multipart_1 @@ -0,0 +1,101 @@ +-----------------------------23281168279961 +Content-Disposition: form-data; name="action" + +CsvImport/dispatch +-----------------------------23281168279961 +Content-Disposition: form-data; name="profile.type" + +parts +-----------------------------23281168279961 +Content-Disposition: form-data; name="profile.name" + + +-----------------------------23281168279961 +Content-Disposition: form-data; name="settings.numberformat" + +1.000,00 +-----------------------------23281168279961 +Content-Disposition: form-data; name="settings.charset" + +UTF-8 +-----------------------------23281168279961 +Content-Disposition: form-data; name="sep_char" + +semicolon +-----------------------------23281168279961 +Content-Disposition: form-data; name="custom_sep_char" + + +-----------------------------23281168279961 +Content-Disposition: form-data; name="quote_char" + +quote +-----------------------------23281168279961 +Content-Disposition: form-data; name="custom_quote_char" + + +-----------------------------23281168279961 +Content-Disposition: form-data; name="escape_char" + +quote +-----------------------------23281168279961 +Content-Disposition: form-data; name="custom_escape_char" + + +-----------------------------23281168279961 +Content-Disposition: form-data; name="settings.duplicates" + +no_check +-----------------------------23281168279961 +Content-Disposition: form-data; name="settings.article_number_policy" + +update_prices +-----------------------------23281168279961 +Content-Disposition: form-data; name="settings.sellprice_places" + +2 +-----------------------------23281168279961 +Content-Disposition: form-data; name="settings.sellprice_adjustment" + +0 +-----------------------------23281168279961 +Content-Disposition: form-data; name="settings.sellprice_adjustment_type" + +percent +-----------------------------23281168279961 +Content-Disposition: form-data; name="settings.shoparticle_if_missing" + +0 +-----------------------------23281168279961 +Content-Disposition: form-data; name="settings.parts_type" + +part +-----------------------------23281168279961 +Content-Disposition: form-data; name="settings.default_buchungsgruppe" + +815 +-----------------------------23281168279961 +Content-Disposition: form-data; name="settings.apply_buchungsgruppe" + +all +-----------------------------23281168279961 +Content-Disposition: form-data; name="settings.default_unit" + +g +-----------------------------23281168279961 +Content-Disposition: form-data; name="settings.full_preview" + +0 +-----------------------------23281168279961 +Content-Disposition: form-data; name="file"; filename="from_wikipedia.csv" +Content-Type: text/comma-separated-values + +Stunde;Montag;Dienstag;Mittwoch;Donnerstag;Freitag +1;Mathe;Deutsch;Englisch;Mathe;Kunst +2;Sport;Französisch;Geschichte;Sport;Geschichte +3;Sport;"Religion ev;kath";Kunst;;Kunst +-----------------------------23281168279961 +Content-Disposition: form-data; name="action_test" + +Test und Vorschau +-----------------------------23281168279961--