SL/Request.pm

   1 package SL::Request;
   2
   3 use strict;
   4
   5 use parent qw(Rose::Object);
   6
   7 use CGI qw(-no_xhtml);
   8 use List::Util qw(first max min sum);
   9 use List::MoreUtils qw(all any apply);
  10 use Exporter qw(import);
  11
  12 use SL::Common;
  13 use SL::MoreCommon qw(uri_encode uri_decode);
  14 use SL::Layout::None;
  15 use SL::Presenter;
  16
  17 our @EXPORT_OK = qw(flatten unflatten read_cgi_input);
  18
  19 use Rose::Object::MakeMethods::Generic
  20 (
  21   'scalar --get_set_init' => [ qw(cgi layout presenter is_ajax type) ],
  22 );
  23
  24 sub init_cgi {
  25   return CGI->new({});
  26 }
  27
  28 sub init_layout {
  29   return SL::Layout::None->new;
  30 }
  31
  32 sub init_presenter {
  33   return SL::Presenter->new;
  34 }
  35
  36 sub init_is_ajax {
  37   return ($ENV{HTTP_X_REQUESTED_WITH} || '') eq 'XMLHttpRequest' ? 1 : 0;
  38 }
  39
  40 sub init_type {
  41   return 'html';
  42 }
  43
  44 sub cache {
  45   my ($self, $topic, $default) = @_;
  46
  47   $topic = '::' . (caller(0))[0] . "::$topic" unless $topic =~ m{^::};
  48
  49   $self->{_cache}           //= {};
  50   $self->{_cache}->{$topic} //= ($default // {});
  51
  52   return $self->{_cache}->{$topic};
  53 }
  54
  55 sub _store_value {
  56   my ($target, $key, $value) = @_;
  57   my @tokens = split /((?:\[\+?\])?(?:\.)|(?:\[\+?\]))/, $key;
  58   my $curr;
  59
  60   if (scalar @tokens) {
  61      $curr = \ $target->{ shift @tokens };
  62   }
  63
  64   while (@tokens) {
  65     my $sep = shift @tokens;
  66     my $key = shift @tokens;
  67
  68     $curr = \ $$curr->[$#$$curr], next   if $sep eq '[]' && @tokens;
  69     $curr = \ $$curr->[++$#$$curr], next if $sep eq '[]' && !@tokens;
  70     $curr = \ $$curr->[++$#$$curr], next if $sep eq '[+]';
  71     $curr = \ $$curr->[max 0, $#$$curr]  if $sep eq '[].';
  72     $curr = \ $$curr->[++$#$$curr]       if $sep eq '[+].';
  73     $curr = \ $$curr->{$key}
  74   }
  75
  76   $$curr = $value;
  77
  78   return $curr;
  79 }
  80
  81 sub _input_to_hash {
  82   $::lxdebug->enter_sub(2);
  83
  84   my ($target, $input, $log) = @_;
  85   my @pairs = split(/&/, $input);
  86
  87   foreach (@pairs) {
  88     my ($key, $value) = split(/=/, $_, 2);
  89     next unless $key;
  90     _store_value($target, uri_decode($key), uri_decode($value));
  91
  92     # for debugging
  93     $::lxdebug->add_request_params(uri_decode($key) => uri_decode($value)) if $log;
  94   }
  95
  96   $::lxdebug->leave_sub(2);
  97 }
  98
  99 sub _parse_multipart_formdata {
 100   my ($target, $temp_target, $input, $log) = @_;
 101   my ($name, $filename, $headers_done, $content_type, $boundary_found, $need_cr, $previous, $p_attachment, $encoding, $transfer_encoding);
 102   my $data_start = 0;
 103
 104   # teach substr and length to use good ol' bytes, not 'em fancy characters
 105   use bytes;
 106
 107   # We SHOULD honor encodings and transfer-encodings here, but as hard as I
 108   # looked I couldn't find a reasonably recent webbrowser that makes use of
 109   # these. Transfer encoding just eats up bandwidth...
 110
 111   # so all I'm going to do is add a fail safe that if anyone ever encounters
 112   # this, it's going to croak so that debugging is easier
 113   $ENV{'CONTENT_TYPE'} =~ /multipart\/form-data\s*;\s*boundary\s*=\s*(.+)$/;
 114   my $boundary = '--' . $1;
 115
 116   my $index = 0;
 117   my $line_length;
 118   foreach my $line (split m/\n/, $input) {
 119     $line_length = length $line;
 120
 121     if ($line =~ /^\Q$boundary\E(--)?\r?$/) {
 122       my $last_boundary = $1;
 123       my $data       =  substr $input, $data_start, $index - $data_start;
 124       $data =~ s/\r?\n$//;
 125
 126       if ($previous && !$filename && $transfer_encoding && $transfer_encoding ne 'binary') {
 127         ${ $previous } = Encode::decode($encoding, $data);
 128       } else {
 129         ${ $previous } = $data;
 130       }
 131       $::lxdebug->add_request_params($name, $$previous) if $log;
 132
 133       undef $previous;
 134       undef $filename;
 135
 136       $headers_done   = 0;
 137       $content_type   = "text/plain";
 138       $boundary_found = 1;
 139       $need_cr        = 0;
 140       $encoding       = 'UTF-8';
 141       $transfer_encoding = undef;
 142       last if $last_boundary;
 143       next;
 144     }
 145
 146     next unless $boundary_found;
 147
 148     if (!$headers_done) {
 149       $line =~ s/[\r\n]*$//;
 150
 151       if (!$line) {
 152         $headers_done = 1;
 153         $data_start = $index + $line_length + 1;
 154         next;
 155       }
 156
 157       if ($line =~ m|^content-disposition\s*:.*?form-data\s*;|i) {
 158         if ($line =~ m|filename\s*=\s*"(.*?)"|i) {
 159           $filename = $1;
 160           substr $line, $-[0], $+[0] - $-[0], "";
 161         }
 162
 163         if ($line =~ m|name\s*=\s*"(.*?)"|i) {
 164           $name = $1;
 165           substr $line, $-[0], $+[0] - $-[0], "";
 166         }
 167
 168         if ($name) {
 169           # legacy, some old upload routines expect this to be here
 170           $temp_target->{FILENAME} = $filename if defined $filename;
 171
 172           # Name can potentially be both a normal variable or a file upload.
 173           # A file upload can be identified by its "filename" attribute.
 174           # The thing is, if a [+] clause vivifies structure in one of the
 175           # branches it must be done in both, or subsequent "[]" will fail
 176           my $temp_target_slot = _store_value($temp_target, $name);
 177           my $target_slot      = _store_value($target,      $name);
 178
 179           # set the reference for appending of multiline data to the correct one
 180           $previous            = defined $filename ? $target_slot : $temp_target_slot;
 181
 182           # for multiple uploads: save the attachments in a SL/Mailer like structure
 183           if (defined $filename) {
 184             my $target_attachment      = _store_value($target,      "ATTACHMENTS.$name", {});
 185             my $temp_target_attachment = _store_value($temp_target, "ATTACHMENTS.$name", {});
 186
 187             $$target_attachment->{data}          = $previous;
 188             $$temp_target_attachment->{filename} = $filename;
 189
 190             $p_attachment = $$temp_target_attachment;
 191           }
 192         }
 193
 194         next;
 195       }
 196
 197       if ($line =~ m|^content-type\s*:\s*(.*?)[;\$]|i) {
 198         $content_type = $1;
 199         $p_attachment->{content_type} = $1;
 200
 201         if ($content_type =~ /^text/ && $line =~ m|;\s*charset\s*:\s*("?)(.*?)\1$|i) {
 202           $encoding = $2;
 203         }
 204
 205         next;
 206       }
 207
 208       if ($line =~ m|^content-transfer-encoding\s*=\s*(.*?)$|i) {
 209         $transfer_encoding = lc($1);
 210         if ($transfer_encoding  && $transfer_encoding !~ /^[78]bit|binary$/) {
 211           die 'Transfer encodings beyond 7bit/8bit and binary are not implemented.';
 212         }
 213         $p_attachment->{transfer_encoding} = $transfer_encoding;
 214
 215         next;
 216       }
 217
 218       next;
 219     }
 220
 221     next unless $previous;
 222
 223   } continue {
 224     $index += $line_length + 1;
 225   }
 226
 227   $::lxdebug->leave_sub(2);
 228 }
 229
 230 sub _recode_recursively {
 231   $::lxdebug->enter_sub;
 232   my ($iconv, $from, $to) = @_;
 233
 234   if (any { ref $from eq $_ } qw(Form HASH)) {
 235     for my $key (keys %{ $from }) {
 236       if (!ref $from->{$key}) {
 237         # Workaround for a bug: converting $from->{$key} directly
 238         # leads to 'undef'. I don't know why. Converting a copy works,
 239         # though.
 240         $to->{$key} = $iconv->convert("" . $from->{$key}) if defined $from->{$key} && !defined $to->{$key};
 241       } else {
 242         $to->{$key} ||= {} if 'HASH'  eq ref $from->{$key};
 243         $to->{$key} ||= [] if 'ARRAY' eq ref $from->{$key};
 244         _recode_recursively($iconv, $from->{$key}, $to->{$key});
 245       }
 246     }
 247
 248   } elsif (ref $from eq 'ARRAY') {
 249     foreach my $idx (0 .. scalar(@{ $from }) - 1) {
 250       if (!ref $from->[$idx]) {
 251         # Workaround for a bug: converting $from->[$idx] directly
 252         # leads to 'undef'. I don't know why. Converting a copy works,
 253         # though.
 254         $to->[$idx] = $iconv->convert("" . $from->[$idx]) if defined $from->[$idx] && !defined $to->[$idx];
 255       } else {
 256         $to->[$idx] ||= {} if 'HASH'  eq ref $from->[$idx];
 257         $to->[$idx] ||= [] if 'ARRAY' eq ref $from->[$idx];
 258         _recode_recursively($iconv, $from->[$idx], $to->[$idx]);
 259       }
 260     }
 261   }
 262   $main::lxdebug->leave_sub();
 263 }
 264
 265 sub read_cgi_input {
 266   $::lxdebug->enter_sub;
 267
 268   my ($target) = @_;
 269
 270   # yes i know, copying all those values around isn't terribly efficient, but
 271   # the old version of dumping everything into form and then launching a
 272   # tactical recode nuke at the data is still worse.
 273
 274   # this way the data can at least be recoded on the fly as soon as we get to
 275   # know the source encoding and only in the cases where encoding may be hidden
 276   # among the payload we take the hit of copying the request around
 277   my $temp_target = { };
 278
 279   # since both of these can potentially bring their encoding in INPUT_ENCODING
 280   # they get dumped into temp_target
 281   _input_to_hash($temp_target, $ENV{QUERY_STRING}, 1) if $ENV{QUERY_STRING};
 282   _input_to_hash($temp_target, $ARGV[0],           1) if @ARGV && $ARGV[0];
 283
 284   if ($ENV{CONTENT_LENGTH}) {
 285     my $content;
 286     read STDIN, $content, $ENV{CONTENT_LENGTH};
 287     if ($ENV{'CONTENT_TYPE'} && $ENV{'CONTENT_TYPE'} =~ /multipart\/form-data/) {
 288       # multipart formdata can bring it's own encoding, so give it both
 289       # and let it decide on it's own
 290       _parse_multipart_formdata($target, $temp_target, $content, 1);
 291     } else {
 292       # normal encoding must be recoded
 293       _input_to_hash($temp_target, $content, 1);
 294     }
 295   }
 296
 297   my $encoding     = delete $temp_target->{INPUT_ENCODING} || 'UTF-8';
 298
 299   _recode_recursively(SL::Iconv->new($encoding, 'UTF-8'), $temp_target => $target) if keys %$target;
 300
 301   if ($target->{RESTORE_FORM_FROM_SESSION_ID}) {
 302     my %temp_form;
 303     $::auth->restore_form_from_session(delete $target->{RESTORE_FORM_FROM_SESSION_ID}, form => \%temp_form);
 304     _store_value($target, $_, $temp_form{$_}) for keys %temp_form;
 305   }
 306
 307   $::lxdebug->leave_sub;
 308
 309   return $target;
 310 }
 311
 312 sub flatten {
 313   my ($source, $target, $prefix, $in_array) = @_;
 314   $target ||= [];
 315
 316   # There are two edge cases that need attention. First: more than one hash
 317   # inside an array.  Only the first of each nested can have a [+].  Second: if
 318   # an array contains mixed values _store_value will rely on autovivification.
 319   # So any type change must have a [+]
 320   # This closure decides one recursion step AFTER an array has been found if a
 321   # [+] needs to be generated
 322   my $arr_prefix = sub {
 323     return $_[0] ? '[+]' : '[]' if $in_array;
 324     return '';
 325   };
 326
 327   for (ref $source) {
 328     /^HASH$/ && do {
 329       my $first = 1;
 330       for my $key (sort keys %$source) {
 331         flatten($source->{$key} => $target, (defined $prefix ? $prefix . $arr_prefix->($first) . '.' : '') . $key);
 332         $first = 0;
 333       };
 334       next;
 335     };
 336     /^ARRAY$/ && do {
 337       for my $i (0 .. $#$source) {
 338         flatten($source->[$i] => $target, $prefix . $arr_prefix->($i == 0), '1');
 339       }
 340       next;
 341     };
 342     !$_ && do {
 343       die "can't flatten a pure scalar" unless defined $prefix;
 344       push @$target, [ $prefix . $arr_prefix->(0) => $source ];
 345       next;
 346     };
 347     die "unrecognized reference of a data structure $_. cannot serialize refs, globs and code yet. to serialize Form please use the method there";
 348   }
 349
 350   return $target;
 351 }
 352
 353
 354 sub unflatten {
 355   my ($data, $target) = @_;
 356   $target ||= {};
 357
 358   for my $pair (@$data) {
 359     _store_value($target, @$pair) if defined $pair->[0];
 360   }
 361
 362   return $target;
 363 }
 364
 365 1;
 366
 367 __END__
 368
 369 =head1 NAME
 370
 371 SL::Request.pm - request parsing, data serialization, request information
 372
 373 =head1 SYNOPSIS
 374
 375 This module handles unpacking of CGI parameters. It also gives
 376 information about the request, such as whether or not it was done via AJAX,
 377 or the requested content type.
 378
 379   use SL::Request qw(read_cgi_input);
 380
 381   # read cgi input depending on request type, unflatten and recode
 382   read_cgi_input($target_hash_ref);
 383
 384   # $hashref and $new_hashref should be identical
 385   my $new_arrayref = flatten($hashref);
 386   my $new_hashref  = unflatten($new_arrayref);
 387
 388   # Handle AJAX requests differently than normal requests:
 389   if ($::request->is_ajax) {
 390     $controller->render('json-mask', { type => 'json' });
 391   } else {
 392     $controller->render('full-mask');
 393   }
 394
 395 =head1 DESCRIPTION
 396
 397 This module provides information about the request made by the
 398 browser.
 399
 400 It also handles flattening and unflattening of data for request
 401 roundtrip purposes. kivitendo uses the format as described below:
 402
 403 =over 4
 404
 405 =item Hashes
 406
 407 Hash entries will be connected with a dot (C<.>). A simple hash like this
 408
 409   order => {
 410     item     => 2,
 411     customer => 5
 412   }
 413
 414 will be serialized to
 415
 416   [ order.item     => 2 ],
 417   [ order.customer => 5 ],
 418
 419 =item Arrays
 420
 421 Arrays will be marked by empty brackets (C<[]>). A hash like this
 422
 423   selected_id => [ 2, 6, 8, 9 ]
 424
 425 will be flattened to
 426
 427   [ selected_id[] => 2 ],
 428   [ selected_id[] => 6 ],
 429   [ selected_id[] => 8 ],
 430   [ selected_id[] => 9 ],
 431
 432 Since this will produce identical keys, the resulting flattened list can not be
 433 used as a hash. It is however very easy to use this in a template to generate
 434 input:
 435
 436   [% FOREACH id = selected_ids %]
 437     <input type="hidden" name="selected_id[]" value="[% id | html %]">
 438   [% END %]
 439
 440 =item Nested structures
 441
 442 A special version of this are nested hashes in an array, which is very common.
 443 The combined operator (C<[].>) will be used. As a special case, every time a new
 444 array slice is started, the special convention (C<[+].>) will be used. Again this
 445 is because it's easy to write a template with it.
 446
 447 So this
 448
 449   order => {
 450     orderitems => [
 451       {
 452         id   => 1,
 453         part => 15
 454       },
 455       {
 456         id   => 2,
 457         part => 7
 458       },
 459     ]
 460   }
 461
 462 will be
 463
 464   [ order.orderitems[+].id  => 1  ],
 465   [ order.orderitems[].part => 15 ],
 466   [ order.orderitems[+].id  => 2  ],
 467   [ order.orderitems[].part => 7  ],
 468
 469 =item Limitations
 470
 471   The format currently does have certain limitations when compared to other
 472   serialization formats.
 473
 474 =over 4
 475
 476 =item Order
 477
 478 The order of serialized values matters to reconstruct arrays properly. This
 479 should rarely be a problem if you just flatten and dump into a url or a field
 480 of hiddens.
 481
 482 =item Empty Keys
 483
 484 The current implementation of flatten does produce correct serialization of
 485 empty keys, but unflatten is unable to resolve these. Do no use C<''> or
 486 C<undef> as keys. C<0> is fine.
 487
 488 =item Key Escaping
 489
 490 You cannot use the tokens C<[]>, C<[+]> and C<.> in keys. No way around it.
 491
 492 =item Sparse Arrays
 493
 494 It is not possible to serialize something like
 495
 496   sparse_array => do { my $sa = []; $sa[100] = 1; $sa },
 497
 498 This is a feature, as perl doesn't do well with very large arrays.
 499
 500 =item Recursion
 501
 502 There is currently no support nor prevention for flattening a circular structure.
 503
 504 =item Custom Delimiter
 505
 506 No support for other delimiters, sorry.
 507
 508 =item Other References
 509
 510 No support for globs, scalar refs, code refs, filehandles and the like. These will die.
 511
 512 =back
 513
 514 =back
 515
 516 =head1 FUNCTIONS
 517
 518 =over 4
 519
 520 =item C<flatten HASHREF [ ARRAYREF ]>
 521
 522 This function will flatten the provided hash ref into the provided array ref.
 523 The array ref may be non empty, but will be changed in this case.
 524
 525 The return value is the flattened array ref.
 526
 527 =item C<unflatten ARRAYREF [ HASHREF ]>
 528
 529 This function will parse the array ref, and will store the contents into the hash ref. The hash ref may be non empty, in this case any new keys will override the old ones only on leafs with same type. Type changes on a node will die.
 530
 531 =item C<is_ajax>
 532
 533 Returns trueish if the request is an XML HTTP request, also known as
 534 an 'AJAX' request.
 535
 536 =item C<type>
 537
 538 Returns the requested content type (either C<html>, C<js> or C<json>).
 539
 540 =item C<layout>
 541
 542 Set and retrieve the layout object for the current request. Must be an instance
 543 of L<SL::Layout::Base>. Defaults to an instance of L<SL::Layout::None>.
 544
 545 For more information about layouts, see L<SL::Layout::Dispatcher>.
 546
 547 =item C<cache $topic[, $default ]>
 548
 549 Caches an item for the duration of the request. C<$topic> must be an
 550 index name referring to the thing to cache. It is used for retrieving
 551 it later on. If C<$topic> doesn't start with C<::> then the caller's
 552 package name is prepended to the topic. For example, if the a from
 553 package C<SL::StuffedStuff> calls with topic = C<get_stuff> then the
 554 actual key will be C<::SL::StuffedStuff::get_stuff>.
 555
 556 If no item exists in the cache for C<$topic> then it is created and
 557 its initial value is set to C<$default>. If C<$default> is not given
 558 (undefined) then a new, empty hash reference is created.
 559
 560 Returns the cached item.
 561
 562 =back
 563
 564 =head1 SPECIAL FUNCTIONS
 565
 566 =head2 C<_store_value()>
 567
 568 Parses a complex var name, and stores it in the form.
 569
 570 Syntax:
 571   _store_value($target, $key, $value);
 572
 573 Keys must start with a string, and can contain various tokens.
 574 Supported key structures are:
 575
 576 1. simple access
 577   Simple key strings work as expected
 578
 579   id => $form->{id}
 580
 581 2. hash access.
 582   Separating two keys by a dot (.) will result in a hash lookup for the inner value
 583   This is similar to the behaviour of java and templating mechanisms.
 584
 585   filter.description => $form->{filter}->{description}
 586
 587 3. array+hashref access
 588
 589   Adding brackets ([]) before the dot will cause the next hash to be put into an array.
 590   Using [+] instead of [] will force a new array index. This is useful for recurring
 591   data structures like part lists. Put a [+] into the first varname, and use [] on the
 592   following ones.
 593
 594   Repeating these names in your template:
 595
 596     invoice.items[+].id
 597     invoice.items[].parts_id
 598
 599   will result in:
 600
 601     $form->{invoice}->{items}->[
 602       {
 603         id       => ...
 604         parts_id => ...
 605       },
 606       {
 607         id       => ...
 608         parts_id => ...
 609       }
 610       ...
 611     ]
 612
 613 4. arrays
 614
 615   Using brackets at the end of a name will result in the creation of a pure array.
 616   Note that you mustn't use [+], which is reserved for array+hash access and will
 617   result in undefined behaviour in array context.
 618
 619   filter.status[]  => $form->{status}->[ val1, val2, ... ]
 620
 621 =cut