SL/Request.pm

   1 package SL::Request;
   2
   3 use strict;
   4
   5 use SL::Common;
   6 use SL::MoreCommon qw(uri_encode uri_decode);
   7 use List::Util qw(first max min sum);
   8 use List::MoreUtils qw(all any apply);
   9 use Exporter qw(import);
  10
  11 our @EXPORT_OK = qw(flatten unflatten read_cgi_input);
  12
  13 sub _store_value {
  14   my ($target, $key, $value) = @_;
  15   my @tokens = split /((?:\[\+?\])?(?:\.)|(?:\[\+?\]))/, $key;
  16   my $curr;
  17
  18   if (scalar @tokens) {
  19      $curr = \ $target->{ shift @tokens };
  20   }
  21
  22   while (@tokens) {
  23     my $sep = shift @tokens;
  24     my $key = shift @tokens;
  25
  26     $curr = \ $$curr->[$#$$curr], next   if $sep eq '[]' && @tokens;
  27     $curr = \ $$curr->[++$#$$curr], next if $sep eq '[]' && !@tokens;
  28     $curr = \ $$curr->[++$#$$curr], next if $sep eq '[+]';
  29     $curr = \ $$curr->[max 0, $#$$curr]  if $sep eq '[].';
  30     $curr = \ $$curr->[++$#$$curr]       if $sep eq '[+].';
  31     $curr = \ $$curr->{$key}
  32   }
  33
  34   $$curr = $value;
  35
  36   return $curr;
  37 }
  38
  39 sub _input_to_hash {
  40   $::lxdebug->enter_sub(2);
  41
  42   my ($target, $input) = @_;
  43   my @pairs = split(/&/, $input);
  44
  45   foreach (@pairs) {
  46     my ($key, $value) = split(/=/, $_, 2);
  47     _store_value($target, uri_decode($key), uri_decode($value)) if ($key);
  48   }
  49
  50   $::lxdebug->leave_sub(2);
  51 }
  52
  53 sub _parse_multipart_formdata {
  54   my ($target, $temp_target, $input) = @_;
  55   my ($name, $filename, $headers_done, $content_type, $boundary_found, $need_cr, $previous, $p_attachment, $encoding, $transfer_encoding);
  56
  57   # We SHOULD honor encodings and transfer-encodings here, but as hard as I
  58   # looked I couldn't find a reasonably recent webbrowser that makes use of
  59   # these. Transfer encoding just eats up bandwidth...
  60
  61   # so all I'm going to do is add a fail safe that if anyone ever encounters
  62   # this, it's going to croak so that debugging is easier
  63   $ENV{'CONTENT_TYPE'} =~ /multipart\/form-data\s*;\s*boundary\s*=\s*(.+)$/;
  64   my $boundary = '--' . $1;
  65
  66   foreach my $line (split m/\n/, $input) {
  67     last if (($line eq "${boundary}--") || ($line eq "${boundary}--\r"));
  68
  69     if (($line eq $boundary) || ($line eq "$boundary\r")) {
  70       ${ $previous } =~ s|\r?\n$|| if $previous;
  71       ${ $previous } =  Encode::decode($encoding, $$previous) if $previous && !$filename && !$transfer_encoding eq 'binary';
  72
  73       undef $previous;
  74       undef $filename;
  75
  76       $headers_done   = 0;
  77       $content_type   = "text/plain";
  78       $boundary_found = 1;
  79       $need_cr        = 0;
  80       $encoding       = $::lx_office_conf{system}->{dbcharset} || Common::DEFAULT_CHARSET;
  81       $transfer_encoding = undef;
  82
  83       next;
  84     }
  85
  86     next unless $boundary_found;
  87
  88     if (!$headers_done) {
  89       $line =~ s/[\r\n]*$//;
  90
  91       if (!$line) {
  92         $headers_done = 1;
  93         next;
  94       }
  95
  96       if ($line =~ m|^content-disposition\s*:.*?form-data\s*;|i) {
  97         if ($line =~ m|filename\s*=\s*"(.*?)"|i) {
  98           $filename = $1;
  99           substr $line, $-[0], $+[0] - $-[0], "";
 100         }
 101
 102         if ($line =~ m|name\s*=\s*"(.*?)"|i) {
 103           $name = $1;
 104           substr $line, $-[0], $+[0] - $-[0], "";
 105         }
 106
 107         if ($name) {
 108           # legacy, some old upload routines expect this to be here
 109           $temp_target->{FILENAME} = $filename if defined $filename;
 110
 111           # name can potentially be both a normal variable or a file upload
 112           # a file upload can be identified by its "filename" attribute
 113           # the thing is, if a [+] clause vivifies atructur in one of the
 114           # branches it must be done in both, or subsequent "[]" will fail
 115           my $temp_target_slot = _store_value($temp_target, $name);
 116           my $target_slot      = _store_value($target,      $name);
 117
 118           # set the reference for appending of multiline data to the correct one
 119           $previous            = defined $filename ? $target_slot : $temp_target_slot;
 120
 121           # for multiple uploads: save the attachments in a SL/Mailer like structure
 122           if (defined $filename) {
 123             my $target_attachment      = _store_value($target,      "ATTACHMENTS.$name", {});
 124             my $temp_target_attachment = _store_value($temp_target, "ATTACHMENTS.$name", {});
 125
 126             $$target_attachment->{data}          = $previous;
 127             $$temp_target_attachment->{filename} = $filename;
 128
 129             $p_attachment = $$temp_target_attachment;
 130           }
 131         }
 132
 133         next;
 134       }
 135
 136       if ($line =~ m|^content-type\s*:\s*(.*?)[;\$]|i) {
 137         $content_type = $1;
 138         $p_attachment->{content_type} = $1;
 139
 140         if ($content_type =~ /^text/ && $line =~ m|;\s*charset\s*:\s*("?)(.*?)\1$|i) {
 141           $encoding = $2;
 142         }
 143
 144         next;
 145       }
 146
 147       if ($line =~ m|^content-transfer-encoding\s*=\s*(.*?)$|i) {
 148         $transfer_encoding = lc($1);
 149         if ($transfer_encoding  && $transfer_encoding !~ /^[78]bit|binary$/) {
 150           die 'Transfer encodings beyond 7bit/8bit and binary are not implemented.';
 151         }
 152         $p_attachment->{transfer_encoding} = $transfer_encoding;
 153
 154         next;
 155       }
 156
 157       next;
 158     }
 159
 160     next unless $previous;
 161
 162     ${ $previous } .= "${line}\n";
 163   }
 164
 165   ${ $previous } =~ s|\r?\n$|| if $previous;
 166
 167   $::lxdebug->leave_sub(2);
 168 }
 169
 170 sub _recode_recursively {
 171   $::lxdebug->enter_sub;
 172   my ($iconv, $from, $to) = @_;
 173
 174   if (any { ref $from eq $_ } qw(Form HASH)) {
 175     for my $key (keys %{ $from }) {
 176       if (!ref $from->{$key}) {
 177         # Workaround for a bug: converting $from->{$key} directly
 178         # leads to 'undef'. I don't know why. Converting a copy works,
 179         # though.
 180         $to->{$key} = $iconv->convert("" . $from->{$key}) if defined $from->{$key} && !defined $to->{$key};
 181       } else {
 182         $to->{$key} ||= {} if 'HASH'  eq ref $from->{$key};
 183         $to->{$key} ||= [] if 'ARRAY' eq ref $from->{$key};
 184         _recode_recursively($iconv, $from->{$key}, $to->{$key});
 185       }
 186     }
 187
 188   } elsif (ref $from eq 'ARRAY') {
 189     foreach my $idx (0 .. scalar(@{ $from }) - 1) {
 190       if (!ref $from->[$idx]) {
 191         # Workaround for a bug: converting $from->[$idx] directly
 192         # leads to 'undef'. I don't know why. Converting a copy works,
 193         # though.
 194         $to->[$idx] = $iconv->convert("" . $from->[$idx]);
 195       } else {
 196         $to->[$idx] ||= {} if 'HASH'  eq ref $from->[$idx];
 197         $to->[$idx] ||= [] if 'ARRAY' eq ref $from->[$idx];
 198         _recode_recursively($iconv, $from->[$idx], $to->[$idx]);
 199       }
 200     }
 201   }
 202   $main::lxdebug->leave_sub();
 203 }
 204
 205 sub read_cgi_input {
 206   $::lxdebug->enter_sub;
 207
 208   my ($target) = @_;
 209   my $db_charset   = $::lx_office_conf{system}->{dbcharset} || Common::DEFAULT_CHARSET;
 210
 211   # yes i know, copying all those values around isn't terribly efficient, but
 212   # the old version of dumping everything into form and then launching a
 213   # tactical recode nuke at the data is still worse.
 214
 215   # this way the data can at least be recoded on the fly as soon as we get to
 216   # know the source encoding and only in the cases where encoding may be hidden
 217   # among the payload we take the hit of copying the request around
 218   my $temp_target = { };
 219
 220   # since both of these can potentially bring their encoding in INPUT_ENCODING
 221   # they get dumped into temp_target
 222   _input_to_hash($temp_target, $ENV{QUERY_STRING}) if $ENV{QUERY_STRING};
 223   _input_to_hash($temp_target, $ARGV[0])           if @ARGV && $ARGV[0];
 224
 225   if ($ENV{CONTENT_LENGTH}) {
 226     my $content;
 227     read STDIN, $content, $ENV{CONTENT_LENGTH};
 228     if ($ENV{'CONTENT_TYPE'} && $ENV{'CONTENT_TYPE'} =~ /multipart\/form-data/) {
 229       # multipart formdata can bring it's own encoding, so give it both
 230       # and let ti decide on it's own
 231       _parse_multipart_formdata($target, $temp_target, $content);
 232     } else {
 233       # normal encoding must be recoded
 234       _input_to_hash($temp_target, $content);
 235     }
 236   }
 237
 238   my $encoding     = delete $temp_target->{INPUT_ENCODING} || $db_charset;
 239
 240   _recode_recursively(SL::Iconv->new($encoding, $db_charset), $temp_target => $target) if keys %$target;
 241
 242   if ($target->{RESTORE_FORM_FROM_SESSION_ID}) {
 243     my %temp_form;
 244     $::auth->restore_form_from_session(delete $target->{RESTORE_FORM_FROM_SESSION_ID}, form => \%temp_form);
 245     _store_value($target, $_, $temp_form{$_}) for keys %temp_form;
 246   }
 247
 248   $::lxdebug->leave_sub;
 249
 250   return $target;
 251 }
 252
 253 sub flatten {
 254   my ($source, $target, $prefix, $in_array) = @_;
 255   $target ||= [];
 256
 257   # there are two edge cases that need attention. first: more than one hash
 258   # inside an array.  only the first of each nested can have a [+].  second: if
 259   # an array contains mixed values _store_value will rely on autovivification.
 260   # so any type change must have a [+]
 261   # this closure decides one recursion step AFTER an array has been found if a
 262   # [+] needs to be generated
 263   my $arr_prefix = sub {
 264     return $_[0] ? '[+]' : '[]' if $in_array;
 265     return '';
 266   };
 267
 268   for (ref $source) {
 269     /^HASH$/ && do {
 270       my $first = 1;
 271       for my $key (keys %$source) {
 272         flatten($source->{$key} => $target, (defined $prefix ? $prefix . $arr_prefix->($first) . '.' : '') . $key);
 273         $first = 0;
 274       };
 275       next;
 276     };
 277     /^ARRAY$/ && do {
 278       for my $i (0 .. $#$source) {
 279         flatten($source->[$i] => $target, $prefix . $arr_prefix->($i == 0), '1');
 280       }
 281       next;
 282     };
 283     !$_ && do {
 284       die "can't flatten a pure scalar" unless defined $prefix;
 285       push @$target, [ $prefix . $arr_prefix->(0) => $source ];
 286       next;
 287     };
 288     die "unrecognized reference of a data structure $_. cannot serialize refs, globs and code yet. to serialize Form please use the method there";
 289   }
 290
 291   return $target;
 292 }
 293
 294
 295 sub unflatten {
 296   my ($data, $target) = @_;
 297   $target ||= {};
 298
 299   for my $pair (@$data) {
 300     _store_value($target, @$pair) if defined $pair->[0];
 301   }
 302
 303   return $target;
 304 }
 305
 306 1;
 307
 308 __END__
 309
 310 =head1 NAME
 311
 312 SL::Request.pm - request parsing and data serialization
 313
 314 =head1 SYNOPSIS
 315
 316 This module handles unpacking of cgi parameters. usually you don't want to call
 317 anything in here directly.
 318
 319   use SL::Request qw(read_cgi_input);
 320
 321   # read cgi input depending on request type, unflatten and recode
 322   read_cgi_input($target_hash_ref);
 323
 324   # $hashref and $new_hashref should be identical
 325   my $new_arrayref = flatten($hashref);
 326   my $new_hashref  = unflatten($new_arrayref);
 327
 328
 329 =head1 DESCRIPTION
 330
 331 This module handles flattening and unflattening of data for request
 332 roundtrip purposes. Lx-Office uses the format as described below:
 333
 334 =over 4
 335
 336 =item Hashes
 337
 338 Hash entries will be connected with a dot (C<.>). A simple hash like this
 339
 340   order => {
 341     item     => 2,
 342     customer => 5
 343   }
 344
 345 will be serialized to
 346
 347   [ order.item     => 2 ],
 348   [ order.customer => 5 ],
 349
 350 =item Arrays
 351
 352 Arrays will by trailing empty brackets (C<[]>). An hash like this
 353
 354   selected_id => [ 2, 6, 8, 9 ]
 355
 356 will be flattened to
 357
 358   [ selected_id[] => 2 ],
 359   [ selected_id[] => 6 ],
 360   [ selected_id[] => 8 ],
 361   [ selected_id[] => 9 ],
 362
 363 Since this will produce identical keys, the resulting flattened list can not be
 364 used as a hash. It is however very easy to use this in a template to generate
 365 input:
 366
 367   [% FOREACH id = selected_ids %]
 368     <input type="hidden" name="selected_id[]" value="[% id | html %]">
 369   [% END %]
 370
 371 =item Nested structures
 372
 373 A special version of this are nested hashs in an array, which is very common.
 374 The combined operator (C<[].>) will be used. As a special case, every time a new
 375 array slice is started, the special convention (C<[+].>) will be used. Again this
 376 is because it's easy to write a template with it.
 377
 378 So this
 379
 380   order => {
 381     orderitems => [
 382       {
 383         id   => 1,
 384         part => 15
 385       },
 386       {
 387         id   => 2,
 388         part => 7
 389       },
 390     ]
 391   }
 392
 393 will be
 394
 395   [ order.orderitems[+].id  => 1  ],
 396   [ order.orderitems[].part => 15 ],
 397   [ order.orderitems[+].id  => 2  ],
 398   [ order.orderitems[].part => 7  ],
 399
 400 =item Limitations
 401
 402   The format currently does have certain limitations when compared to other
 403   serialization formats.
 404
 405 =over 4
 406
 407 =item Order
 408
 409 The order of serialized values matters to reconstruct arrays properly. This
 410 should rarely be a problem if you just flatten and dump into a url or a field
 411 of hiddens.
 412
 413 =item Empty Keys
 414
 415 The current implementation of flatten does produce correct serialization of
 416 empty keys, but unflatten is unable to resolve these. Do no use C<''> or
 417 C<undef> as keys. C<0> is fine.
 418
 419 =item Key Escaping
 420
 421 You cannot use the tokens C<[]>, C<[+]> and C<.> in keys. No way around it.
 422
 423 =item Sparse Arrays
 424
 425 It is not possible to serialize somehing like
 426
 427   sparse_array => do { my $sa = []; $sa[100] = 1; $sa },
 428
 429 This is a feature, as perl doesn't do well with very large arrays.
 430
 431 =item Recursion
 432
 433 There is currently no support nor prevention for flattening a circular structure.
 434
 435 =item Custom Delimiter
 436
 437 No support for other delimiters, sorry.
 438
 439 =item Other References
 440
 441 No support for globs, scalar refs, code refs, filehandles and the like. These will die.
 442
 443 =back
 444
 445 =back
 446
 447 =head1 FUNCTIONS
 448
 449 =over 4
 450
 451 =item C<flatten HASHREF [ ARRAYREF ]>
 452
 453 This function will flatten the provided hash ref into the provided array ref.
 454 The array ref may be non empty, but will be changed in this case.
 455
 456 Return value is the flattened array ref.
 457
 458 =item C<unflatten ARRAYREF [ HASHREF ]>
 459
 460 This function will parse the array ref, and will store the contents into the hash ref. The hash ref may be non empty, in this case any new keys will override the old ones only on leafs with same type. Type changes on a node will die.
 461
 462 =back
 463
 464 =head1 SPECIAL FUNCTIONS
 465
 466 =head2 C<_store_value()>
 467
 468 parses a complex var name, and stores it in the form.
 469
 470 syntax:
 471   _store_value($target, $key, $value);
 472
 473 keys must start with a string, and can contain various tokens.
 474 supported key structures are:
 475
 476 1. simple access
 477   simple key strings work as expected
 478
 479   id => $form->{id}
 480
 481 2. hash access.
 482   separating two keys by a dot (.) will result in a hash lookup for the inner value
 483   this is similar to the behaviour of java and templating mechanisms.
 484
 485   filter.description => $form->{filter}->{description}
 486
 487 3. array+hashref access
 488
 489   adding brackets ([]) before the dot will cause the next hash to be put into an array.
 490   using [+] instead of [] will force a new array index. this is useful for recurring
 491   data structures like part lists. put a [+] into the first varname, and use [] on the
 492   following ones.
 493
 494   repeating these names in your template:
 495
 496     invoice.items[+].id
 497     invoice.items[].parts_id
 498
 499   will result in:
 500
 501     $form->{invoice}->{items}->[
 502       {
 503         id       => ...
 504         parts_id => ...
 505       },
 506       {
 507         id       => ...
 508         parts_id => ...
 509       }
 510       ...
 511     ]
 512
 513 4. arrays
 514
 515   using brackets at the end of a name will result in a pure array to be created.
 516   note that you mustn't use [+], which is reserved for array+hash access and will
 517   result in undefined behaviour in array context.
 518
 519   filter.status[]  => $form->{status}->[ val1, val2, ... ]
 520
 521 =cut