SL/Request.pm

   1 package SL::Request;
   2
   3 use strict;
   4
   5 use SL::Common;
   6 use SL::MoreCommon qw(uri_encode uri_decode);
   7 use List::Util qw(first max min sum);
   8 use List::MoreUtils qw(all any apply);
   9 use Exporter qw(import);
  10
  11 our @EXPORT_OK = qw(flatten unflatten read_cgi_input);
  12
  13 sub _store_value {
  14   my ($target, $key, $value) = @_;
  15   my @tokens = split /((?:\[\+?\])?(?:\.)|(?:\[\+?\]))/, $key;
  16   my $curr;
  17
  18   if (scalar @tokens) {
  19      $curr = \ $target->{ shift @tokens };
  20   }
  21
  22   while (@tokens) {
  23     my $sep = shift @tokens;
  24     my $key = shift @tokens;
  25
  26     $curr = \ $$curr->[$#$$curr], next   if $sep eq '[]' && @tokens;
  27     $curr = \ $$curr->[++$#$$curr], next if $sep eq '[]' && !@tokens;
  28     $curr = \ $$curr->[++$#$$curr], next if $sep eq '[+]';
  29     $curr = \ $$curr->[max 0, $#$$curr]  if $sep eq '[].';
  30     $curr = \ $$curr->[++$#$$curr]       if $sep eq '[+].';
  31     $curr = \ $$curr->{$key}
  32   }
  33
  34   $$curr = $value;
  35
  36   return $curr;
  37 }
  38
  39 sub _input_to_hash {
  40   $::lxdebug->enter_sub(2);
  41
  42   my ($target, $input) = @_;
  43   my @pairs = split(/&/, $input);
  44
  45   foreach (@pairs) {
  46     my ($key, $value) = split(/=/, $_, 2);
  47     _store_value($target, uri_decode($key), uri_decode($value)) if ($key);
  48   }
  49
  50   $::lxdebug->leave_sub(2);
  51 }
  52
  53 sub _parse_multipart_formdata {
  54   my ($target, $temp_target, $input) = @_;
  55   my ($name, $filename, $headers_done, $content_type, $boundary_found, $need_cr, $previous, $p_attachment, $encoding, $transfer_encoding);
  56   my $data_start = 0;
  57
  58   # teach substr and length to use good ol' bytes, not 'em fancy characters
  59   use bytes;
  60
  61   # We SHOULD honor encodings and transfer-encodings here, but as hard as I
  62   # looked I couldn't find a reasonably recent webbrowser that makes use of
  63   # these. Transfer encoding just eats up bandwidth...
  64
  65   # so all I'm going to do is add a fail safe that if anyone ever encounters
  66   # this, it's going to croak so that debugging is easier
  67   $ENV{'CONTENT_TYPE'} =~ /multipart\/form-data\s*;\s*boundary\s*=\s*(.+)$/;
  68   my $boundary = '--' . $1;
  69
  70   my $index = 0;
  71   my $line_length;
  72   foreach my $line (split m/\n/, $input) {
  73     $line_length = length $line;
  74
  75     if ($line =~ /^\Q$boundary\E(--)?\r?$/) {
  76       my $last_boundary = $1;
  77       my $data       =  substr $input, $data_start, $index - $data_start;
  78       $data =~ s/\r?\n$//;
  79
  80       if ($previous && !$filename && $transfer_encoding && $transfer_encoding ne 'binary') {
  81         ${ $previous } = Encode::decode($encoding, $data);
  82       } else {
  83         ${ $previous } = $data;
  84       }
  85
  86       undef $previous;
  87       undef $filename;
  88
  89       $headers_done   = 0;
  90       $content_type   = "text/plain";
  91       $boundary_found = 1;
  92       $need_cr        = 0;
  93       $encoding       = $::lx_office_conf{system}->{dbcharset} || Common::DEFAULT_CHARSET;
  94       $transfer_encoding = undef;
  95       last if $last_boundary;
  96       next;
  97     }
  98
  99     next unless $boundary_found;
 100
 101     if (!$headers_done) {
 102       $line =~ s/[\r\n]*$//;
 103
 104       if (!$line) {
 105         $headers_done = 1;
 106         $data_start = $index + $line_length + 1;
 107         next;
 108       }
 109
 110       if ($line =~ m|^content-disposition\s*:.*?form-data\s*;|i) {
 111         if ($line =~ m|filename\s*=\s*"(.*?)"|i) {
 112           $filename = $1;
 113           substr $line, $-[0], $+[0] - $-[0], "";
 114         }
 115
 116         if ($line =~ m|name\s*=\s*"(.*?)"|i) {
 117           $name = $1;
 118           substr $line, $-[0], $+[0] - $-[0], "";
 119         }
 120
 121         if ($name) {
 122           # legacy, some old upload routines expect this to be here
 123           $temp_target->{FILENAME} = $filename if defined $filename;
 124
 125           # name can potentially be both a normal variable or a file upload
 126           # a file upload can be identified by its "filename" attribute
 127           # the thing is, if a [+] clause vivifies atructur in one of the
 128           # branches it must be done in both, or subsequent "[]" will fail
 129           my $temp_target_slot = _store_value($temp_target, $name);
 130           my $target_slot      = _store_value($target,      $name);
 131
 132           # set the reference for appending of multiline data to the correct one
 133           $previous            = defined $filename ? $target_slot : $temp_target_slot;
 134
 135           # for multiple uploads: save the attachments in a SL/Mailer like structure
 136           if (defined $filename) {
 137             my $target_attachment      = _store_value($target,      "ATTACHMENTS.$name", {});
 138             my $temp_target_attachment = _store_value($temp_target, "ATTACHMENTS.$name", {});
 139
 140             $$target_attachment->{data}          = $previous;
 141             $$temp_target_attachment->{filename} = $filename;
 142
 143             $p_attachment = $$temp_target_attachment;
 144           }
 145         }
 146
 147         next;
 148       }
 149
 150       if ($line =~ m|^content-type\s*:\s*(.*?)[;\$]|i) {
 151         $content_type = $1;
 152         $p_attachment->{content_type} = $1;
 153
 154         if ($content_type =~ /^text/ && $line =~ m|;\s*charset\s*:\s*("?)(.*?)\1$|i) {
 155           $encoding = $2;
 156         }
 157
 158         next;
 159       }
 160
 161       if ($line =~ m|^content-transfer-encoding\s*=\s*(.*?)$|i) {
 162         $transfer_encoding = lc($1);
 163         if ($transfer_encoding  && $transfer_encoding !~ /^[78]bit|binary$/) {
 164           die 'Transfer encodings beyond 7bit/8bit and binary are not implemented.';
 165         }
 166         $p_attachment->{transfer_encoding} = $transfer_encoding;
 167
 168         next;
 169       }
 170
 171       next;
 172     }
 173
 174     next unless $previous;
 175
 176   } continue {
 177     $index += $line_length + 1;
 178   }
 179
 180   $::lxdebug->leave_sub(2);
 181 }
 182
 183 sub _recode_recursively {
 184   $::lxdebug->enter_sub;
 185   my ($iconv, $from, $to) = @_;
 186
 187   if (any { ref $from eq $_ } qw(Form HASH)) {
 188     for my $key (keys %{ $from }) {
 189       if (!ref $from->{$key}) {
 190         # Workaround for a bug: converting $from->{$key} directly
 191         # leads to 'undef'. I don't know why. Converting a copy works,
 192         # though.
 193         $to->{$key} = $iconv->convert("" . $from->{$key}) if defined $from->{$key} && !defined $to->{$key};
 194       } else {
 195         $to->{$key} ||= {} if 'HASH'  eq ref $from->{$key};
 196         $to->{$key} ||= [] if 'ARRAY' eq ref $from->{$key};
 197         _recode_recursively($iconv, $from->{$key}, $to->{$key});
 198       }
 199     }
 200
 201   } elsif (ref $from eq 'ARRAY') {
 202     foreach my $idx (0 .. scalar(@{ $from }) - 1) {
 203       if (!ref $from->[$idx]) {
 204         # Workaround for a bug: converting $from->[$idx] directly
 205         # leads to 'undef'. I don't know why. Converting a copy works,
 206         # though.
 207         $to->[$idx] = $iconv->convert("" . $from->[$idx]);
 208       } else {
 209         $to->[$idx] ||= {} if 'HASH'  eq ref $from->[$idx];
 210         $to->[$idx] ||= [] if 'ARRAY' eq ref $from->[$idx];
 211         _recode_recursively($iconv, $from->[$idx], $to->[$idx]);
 212       }
 213     }
 214   }
 215   $main::lxdebug->leave_sub();
 216 }
 217
 218 sub read_cgi_input {
 219   $::lxdebug->enter_sub;
 220
 221   my ($target) = @_;
 222   my $db_charset   = $::lx_office_conf{system}->{dbcharset} || Common::DEFAULT_CHARSET;
 223
 224   # yes i know, copying all those values around isn't terribly efficient, but
 225   # the old version of dumping everything into form and then launching a
 226   # tactical recode nuke at the data is still worse.
 227
 228   # this way the data can at least be recoded on the fly as soon as we get to
 229   # know the source encoding and only in the cases where encoding may be hidden
 230   # among the payload we take the hit of copying the request around
 231   my $temp_target = { };
 232
 233   # since both of these can potentially bring their encoding in INPUT_ENCODING
 234   # they get dumped into temp_target
 235   _input_to_hash($temp_target, $ENV{QUERY_STRING}) if $ENV{QUERY_STRING};
 236   _input_to_hash($temp_target, $ARGV[0])           if @ARGV && $ARGV[0];
 237
 238   if ($ENV{CONTENT_LENGTH}) {
 239     my $content;
 240     read STDIN, $content, $ENV{CONTENT_LENGTH};
 241     if ($ENV{'CONTENT_TYPE'} && $ENV{'CONTENT_TYPE'} =~ /multipart\/form-data/) {
 242       # multipart formdata can bring it's own encoding, so give it both
 243       # and let ti decide on it's own
 244       _parse_multipart_formdata($target, $temp_target, $content);
 245     } else {
 246       # normal encoding must be recoded
 247       _input_to_hash($temp_target, $content);
 248     }
 249   }
 250
 251   my $encoding     = delete $temp_target->{INPUT_ENCODING} || $db_charset;
 252
 253   _recode_recursively(SL::Iconv->new($encoding, $db_charset), $temp_target => $target) if keys %$target;
 254
 255   if ($target->{RESTORE_FORM_FROM_SESSION_ID}) {
 256     my %temp_form;
 257     $::auth->restore_form_from_session(delete $target->{RESTORE_FORM_FROM_SESSION_ID}, form => \%temp_form);
 258     _store_value($target, $_, $temp_form{$_}) for keys %temp_form;
 259   }
 260
 261   $::lxdebug->leave_sub;
 262
 263   return $target;
 264 }
 265
 266 sub flatten {
 267   my ($source, $target, $prefix, $in_array) = @_;
 268   $target ||= [];
 269
 270   # there are two edge cases that need attention. first: more than one hash
 271   # inside an array.  only the first of each nested can have a [+].  second: if
 272   # an array contains mixed values _store_value will rely on autovivification.
 273   # so any type change must have a [+]
 274   # this closure decides one recursion step AFTER an array has been found if a
 275   # [+] needs to be generated
 276   my $arr_prefix = sub {
 277     return $_[0] ? '[+]' : '[]' if $in_array;
 278     return '';
 279   };
 280
 281   for (ref $source) {
 282     /^HASH$/ && do {
 283       my $first = 1;
 284       for my $key (keys %$source) {
 285         flatten($source->{$key} => $target, (defined $prefix ? $prefix . $arr_prefix->($first) . '.' : '') . $key);
 286         $first = 0;
 287       };
 288       next;
 289     };
 290     /^ARRAY$/ && do {
 291       for my $i (0 .. $#$source) {
 292         flatten($source->[$i] => $target, $prefix . $arr_prefix->($i == 0), '1');
 293       }
 294       next;
 295     };
 296     !$_ && do {
 297       die "can't flatten a pure scalar" unless defined $prefix;
 298       push @$target, [ $prefix . $arr_prefix->(0) => $source ];
 299       next;
 300     };
 301     die "unrecognized reference of a data structure $_. cannot serialize refs, globs and code yet. to serialize Form please use the method there";
 302   }
 303
 304   return $target;
 305 }
 306
 307
 308 sub unflatten {
 309   my ($data, $target) = @_;
 310   $target ||= {};
 311
 312   for my $pair (@$data) {
 313     _store_value($target, @$pair) if defined $pair->[0];
 314   }
 315
 316   return $target;
 317 }
 318
 319 1;
 320
 321 __END__
 322
 323 =head1 NAME
 324
 325 SL::Request.pm - request parsing and data serialization
 326
 327 =head1 SYNOPSIS
 328
 329 This module handles unpacking of cgi parameters. usually you don't want to call
 330 anything in here directly.
 331
 332   use SL::Request qw(read_cgi_input);
 333
 334   # read cgi input depending on request type, unflatten and recode
 335   read_cgi_input($target_hash_ref);
 336
 337   # $hashref and $new_hashref should be identical
 338   my $new_arrayref = flatten($hashref);
 339   my $new_hashref  = unflatten($new_arrayref);
 340
 341
 342 =head1 DESCRIPTION
 343
 344 This module handles flattening and unflattening of data for request
 345 roundtrip purposes. Lx-Office uses the format as described below:
 346
 347 =over 4
 348
 349 =item Hashes
 350
 351 Hash entries will be connected with a dot (C<.>). A simple hash like this
 352
 353   order => {
 354     item     => 2,
 355     customer => 5
 356   }
 357
 358 will be serialized to
 359
 360   [ order.item     => 2 ],
 361   [ order.customer => 5 ],
 362
 363 =item Arrays
 364
 365 Arrays will by trailing empty brackets (C<[]>). An hash like this
 366
 367   selected_id => [ 2, 6, 8, 9 ]
 368
 369 will be flattened to
 370
 371   [ selected_id[] => 2 ],
 372   [ selected_id[] => 6 ],
 373   [ selected_id[] => 8 ],
 374   [ selected_id[] => 9 ],
 375
 376 Since this will produce identical keys, the resulting flattened list can not be
 377 used as a hash. It is however very easy to use this in a template to generate
 378 input:
 379
 380   [% FOREACH id = selected_ids %]
 381     <input type="hidden" name="selected_id[]" value="[% id | html %]">
 382   [% END %]
 383
 384 =item Nested structures
 385
 386 A special version of this are nested hashs in an array, which is very common.
 387 The combined operator (C<[].>) will be used. As a special case, every time a new
 388 array slice is started, the special convention (C<[+].>) will be used. Again this
 389 is because it's easy to write a template with it.
 390
 391 So this
 392
 393   order => {
 394     orderitems => [
 395       {
 396         id   => 1,
 397         part => 15
 398       },
 399       {
 400         id   => 2,
 401         part => 7
 402       },
 403     ]
 404   }
 405
 406 will be
 407
 408   [ order.orderitems[+].id  => 1  ],
 409   [ order.orderitems[].part => 15 ],
 410   [ order.orderitems[+].id  => 2  ],
 411   [ order.orderitems[].part => 7  ],
 412
 413 =item Limitations
 414
 415   The format currently does have certain limitations when compared to other
 416   serialization formats.
 417
 418 =over 4
 419
 420 =item Order
 421
 422 The order of serialized values matters to reconstruct arrays properly. This
 423 should rarely be a problem if you just flatten and dump into a url or a field
 424 of hiddens.
 425
 426 =item Empty Keys
 427
 428 The current implementation of flatten does produce correct serialization of
 429 empty keys, but unflatten is unable to resolve these. Do no use C<''> or
 430 C<undef> as keys. C<0> is fine.
 431
 432 =item Key Escaping
 433
 434 You cannot use the tokens C<[]>, C<[+]> and C<.> in keys. No way around it.
 435
 436 =item Sparse Arrays
 437
 438 It is not possible to serialize somehing like
 439
 440   sparse_array => do { my $sa = []; $sa[100] = 1; $sa },
 441
 442 This is a feature, as perl doesn't do well with very large arrays.
 443
 444 =item Recursion
 445
 446 There is currently no support nor prevention for flattening a circular structure.
 447
 448 =item Custom Delimiter
 449
 450 No support for other delimiters, sorry.
 451
 452 =item Other References
 453
 454 No support for globs, scalar refs, code refs, filehandles and the like. These will die.
 455
 456 =back
 457
 458 =back
 459
 460 =head1 FUNCTIONS
 461
 462 =over 4
 463
 464 =item C<flatten HASHREF [ ARRAYREF ]>
 465
 466 This function will flatten the provided hash ref into the provided array ref.
 467 The array ref may be non empty, but will be changed in this case.
 468
 469 Return value is the flattened array ref.
 470
 471 =item C<unflatten ARRAYREF [ HASHREF ]>
 472
 473 This function will parse the array ref, and will store the contents into the hash ref. The hash ref may be non empty, in this case any new keys will override the old ones only on leafs with same type. Type changes on a node will die.
 474
 475 =back
 476
 477 =head1 SPECIAL FUNCTIONS
 478
 479 =head2 C<_store_value()>
 480
 481 parses a complex var name, and stores it in the form.
 482
 483 syntax:
 484   _store_value($target, $key, $value);
 485
 486 keys must start with a string, and can contain various tokens.
 487 supported key structures are:
 488
 489 1. simple access
 490   simple key strings work as expected
 491
 492   id => $form->{id}
 493
 494 2. hash access.
 495   separating two keys by a dot (.) will result in a hash lookup for the inner value
 496   this is similar to the behaviour of java and templating mechanisms.
 497
 498   filter.description => $form->{filter}->{description}
 499
 500 3. array+hashref access
 501
 502   adding brackets ([]) before the dot will cause the next hash to be put into an array.
 503   using [+] instead of [] will force a new array index. this is useful for recurring
 504   data structures like part lists. put a [+] into the first varname, and use [] on the
 505   following ones.
 506
 507   repeating these names in your template:
 508
 509     invoice.items[+].id
 510     invoice.items[].parts_id
 511
 512   will result in:
 513
 514     $form->{invoice}->{items}->[
 515       {
 516         id       => ...
 517         parts_id => ...
 518       },
 519       {
 520         id       => ...
 521         parts_id => ...
 522       }
 523       ...
 524     ]
 525
 526 4. arrays
 527
 528   using brackets at the end of a name will result in a pure array to be created.
 529   note that you mustn't use [+], which is reserved for array+hash access and will
 530   result in undefined behaviour in array context.
 531
 532   filter.status[]  => $form->{status}->[ val1, val2, ... ]
 533
 534 =cut