SL/Request.pm

   1 package SL::Request;
   2
   3 use strict;
   4
   5 use parent qw(Rose::Object);
   6
   7 use CGI qw(-no_xhtml);
   8 use List::Util qw(first max min sum);
   9 use List::MoreUtils qw(all any apply);
  10 use Exporter qw(import);
  11
  12 use SL::Common;
  13 use SL::MoreCommon qw(uri_encode uri_decode);
  14 use SL::Layout::None;
  15 use SL::Presenter;
  16
  17 our @EXPORT_OK = qw(flatten unflatten read_cgi_input);
  18
  19 use Rose::Object::MakeMethods::Generic
  20 (
  21   scalar                  => [ qw(applying_database_upgrades) ],
  22   'scalar --get_set_init' => [ qw(cgi layout presenter is_ajax type) ],
  23 );
  24
  25 sub init_cgi {
  26   return CGI->new({});
  27 }
  28
  29 sub init_layout {
  30   return SL::Layout::None->new;
  31 }
  32
  33 sub init_presenter {
  34   return SL::Presenter->new;
  35 }
  36
  37 sub init_is_ajax {
  38   return ($ENV{HTTP_X_REQUESTED_WITH} || '') eq 'XMLHttpRequest' ? 1 : 0;
  39 }
  40
  41 sub init_type {
  42   return 'html';
  43 }
  44
  45 sub cache {
  46   my ($self, $topic, $default) = @_;
  47
  48   $topic = '::' . (caller(0))[0] . "::$topic" unless $topic =~ m{^::};
  49
  50   $self->{_cache}           //= {};
  51   $self->{_cache}->{$topic} //= ($default // {});
  52
  53   return $self->{_cache}->{$topic};
  54 }
  55
  56 sub _store_value {
  57   my ($target, $key, $value) = @_;
  58   my @tokens = split /((?:\[\+?\])?(?:\.)|(?:\[\+?\]))/, $key;
  59   my $curr;
  60
  61   if (scalar @tokens) {
  62      $curr = \ $target->{ shift @tokens };
  63   }
  64
  65   while (@tokens) {
  66     my $sep = shift @tokens;
  67     my $key = shift @tokens;
  68
  69     $curr = \ $$curr->[$#$$curr], next   if $sep eq '[]' && @tokens;
  70     $curr = \ $$curr->[++$#$$curr], next if $sep eq '[]' && !@tokens;
  71     $curr = \ $$curr->[++$#$$curr], next if $sep eq '[+]';
  72     $curr = \ $$curr->[max 0, $#$$curr]  if $sep eq '[].';
  73     $curr = \ $$curr->[++$#$$curr]       if $sep eq '[+].';
  74     $curr = \ $$curr->{$key}
  75   }
  76
  77   $$curr = $value;
  78
  79   return $curr;
  80 }
  81
  82 sub _input_to_hash {
  83   $::lxdebug->enter_sub(2);
  84
  85   my ($target, $input, $log) = @_;
  86   my @pairs = split(/&/, $input);
  87
  88   foreach (@pairs) {
  89     my ($key, $value) = split(/=/, $_, 2);
  90     next unless $key;
  91     _store_value($target, uri_decode($key), uri_decode($value));
  92
  93     # for debugging
  94     $::lxdebug->add_request_params(uri_decode($key) => uri_decode($value)) if $log;
  95   }
  96
  97   $::lxdebug->leave_sub(2);
  98 }
  99
 100 sub _parse_multipart_formdata {
 101   my ($target, $temp_target, $input, $log) = @_;
 102   my ($name, $filename, $headers_done, $content_type, $boundary_found, $need_cr, $previous, $p_attachment, $encoding, $transfer_encoding);
 103   my $data_start = 0;
 104
 105   # teach substr and length to use good ol' bytes, not 'em fancy characters
 106   use bytes;
 107
 108   # We SHOULD honor encodings and transfer-encodings here, but as hard as I
 109   # looked I couldn't find a reasonably recent webbrowser that makes use of
 110   # these. Transfer encoding just eats up bandwidth...
 111
 112   # so all I'm going to do is add a fail safe that if anyone ever encounters
 113   # this, it's going to croak so that debugging is easier
 114   $ENV{'CONTENT_TYPE'} =~ /multipart\/form-data\s*;\s*boundary\s*=\s*(.+)$/;
 115   my $boundary = '--' . $1;
 116
 117   my $index = 0;
 118   my $line_length;
 119   foreach my $line (split m/\n/, $input) {
 120     $line_length = length $line;
 121
 122     if ($line =~ /^\Q$boundary\E(--)?\r?$/) {
 123       my $last_boundary = $1;
 124       my $data       =  substr $input, $data_start, $index - $data_start;
 125       $data =~ s/\r?\n$//;
 126
 127       if ($previous && !$filename && $transfer_encoding && $transfer_encoding ne 'binary') {
 128         ${ $previous } = Encode::decode($encoding, $data);
 129       } else {
 130         ${ $previous } = $data;
 131       }
 132       $::lxdebug->add_request_params($name, $$previous) if $log;
 133
 134       undef $previous;
 135       undef $filename;
 136
 137       $headers_done   = 0;
 138       $content_type   = "text/plain";
 139       $boundary_found = 1;
 140       $need_cr        = 0;
 141       $encoding       = 'UTF-8';
 142       $transfer_encoding = undef;
 143       last if $last_boundary;
 144       next;
 145     }
 146
 147     next unless $boundary_found;
 148
 149     if (!$headers_done) {
 150       $line =~ s/[\r\n]*$//;
 151
 152       if (!$line) {
 153         $headers_done = 1;
 154         $data_start = $index + $line_length + 1;
 155         next;
 156       }
 157
 158       if ($line =~ m|^content-disposition\s*:.*?form-data\s*;|i) {
 159         if ($line =~ m|filename\s*=\s*"(.*?)"|i) {
 160           $filename = $1;
 161           substr $line, $-[0], $+[0] - $-[0], "";
 162         }
 163
 164         if ($line =~ m|name\s*=\s*"(.*?)"|i) {
 165           $name = $1;
 166           substr $line, $-[0], $+[0] - $-[0], "";
 167         }
 168
 169         if ($name) {
 170           # legacy, some old upload routines expect this to be here
 171           $temp_target->{FILENAME} = $filename if defined $filename;
 172
 173           # Name can potentially be both a normal variable or a file upload.
 174           # A file upload can be identified by its "filename" attribute.
 175           # The thing is, if a [+] clause vivifies structure in one of the
 176           # branches it must be done in both, or subsequent "[]" will fail
 177           my $temp_target_slot = _store_value($temp_target, $name);
 178           my $target_slot      = _store_value($target,      $name);
 179
 180           # set the reference for appending of multiline data to the correct one
 181           $previous            = defined $filename ? $target_slot : $temp_target_slot;
 182
 183           # for multiple uploads: save the attachments in a SL/Mailer like structure
 184           if (defined $filename) {
 185             my $target_attachment      = _store_value($target,      "ATTACHMENTS.$name", {});
 186             my $temp_target_attachment = _store_value($temp_target, "ATTACHMENTS.$name", {});
 187
 188             $$target_attachment->{data}          = $previous;
 189             $$temp_target_attachment->{filename} = $filename;
 190
 191             $p_attachment = $$temp_target_attachment;
 192           }
 193         }
 194
 195         next;
 196       }
 197
 198       if ($line =~ m|^content-type\s*:\s*(.*?)[;\$]|i) {
 199         $content_type = $1;
 200         $p_attachment->{content_type} = $1;
 201
 202         if ($content_type =~ /^text/ && $line =~ m|;\s*charset\s*:\s*("?)(.*?)\1$|i) {
 203           $encoding = $2;
 204         }
 205
 206         next;
 207       }
 208
 209       if ($line =~ m|^content-transfer-encoding\s*=\s*(.*?)$|i) {
 210         $transfer_encoding = lc($1);
 211         if ($transfer_encoding  && $transfer_encoding !~ /^[78]bit|binary$/) {
 212           die 'Transfer encodings beyond 7bit/8bit and binary are not implemented.';
 213         }
 214         $p_attachment->{transfer_encoding} = $transfer_encoding;
 215
 216         next;
 217       }
 218
 219       next;
 220     }
 221
 222     next unless $previous;
 223
 224   } continue {
 225     $index += $line_length + 1;
 226   }
 227
 228   $::lxdebug->leave_sub(2);
 229 }
 230
 231 sub _recode_recursively {
 232   $::lxdebug->enter_sub;
 233   my ($iconv, $from, $to) = @_;
 234
 235   if (any { ref $from eq $_ } qw(Form HASH)) {
 236     for my $key (keys %{ $from }) {
 237       if (!ref $from->{$key}) {
 238         # Workaround for a bug: converting $from->{$key} directly
 239         # leads to 'undef'. I don't know why. Converting a copy works,
 240         # though.
 241         $to->{$key} = $iconv->convert("" . $from->{$key}) if defined $from->{$key} && !defined $to->{$key};
 242       } else {
 243         $to->{$key} ||= {} if 'HASH'  eq ref $from->{$key};
 244         $to->{$key} ||= [] if 'ARRAY' eq ref $from->{$key};
 245         _recode_recursively($iconv, $from->{$key}, $to->{$key});
 246       }
 247     }
 248
 249   } elsif (ref $from eq 'ARRAY') {
 250     foreach my $idx (0 .. scalar(@{ $from }) - 1) {
 251       if (!ref $from->[$idx]) {
 252         # Workaround for a bug: converting $from->[$idx] directly
 253         # leads to 'undef'. I don't know why. Converting a copy works,
 254         # though.
 255         $to->[$idx] = $iconv->convert("" . $from->[$idx]) if defined $from->[$idx] && !defined $to->[$idx];
 256       } else {
 257         $to->[$idx] ||= {} if 'HASH'  eq ref $from->[$idx];
 258         $to->[$idx] ||= [] if 'ARRAY' eq ref $from->[$idx];
 259         _recode_recursively($iconv, $from->[$idx], $to->[$idx]);
 260       }
 261     }
 262   }
 263   $main::lxdebug->leave_sub();
 264 }
 265
 266 sub read_cgi_input {
 267   $::lxdebug->enter_sub;
 268
 269   my ($target) = @_;
 270
 271   # yes i know, copying all those values around isn't terribly efficient, but
 272   # the old version of dumping everything into form and then launching a
 273   # tactical recode nuke at the data is still worse.
 274
 275   # this way the data can at least be recoded on the fly as soon as we get to
 276   # know the source encoding and only in the cases where encoding may be hidden
 277   # among the payload we take the hit of copying the request around
 278   my $temp_target = { };
 279
 280   # since both of these can potentially bring their encoding in INPUT_ENCODING
 281   # they get dumped into temp_target
 282   _input_to_hash($temp_target, $ENV{QUERY_STRING}, 1) if $ENV{QUERY_STRING};
 283   _input_to_hash($temp_target, $ARGV[0],           1) if @ARGV && $ARGV[0];
 284
 285   if ($ENV{CONTENT_LENGTH}) {
 286     my $content;
 287     read STDIN, $content, $ENV{CONTENT_LENGTH};
 288     if ($ENV{'CONTENT_TYPE'} && $ENV{'CONTENT_TYPE'} =~ /multipart\/form-data/) {
 289       # multipart formdata can bring it's own encoding, so give it both
 290       # and let it decide on it's own
 291       _parse_multipart_formdata($target, $temp_target, $content, 1);
 292     } else {
 293       # normal encoding must be recoded
 294       _input_to_hash($temp_target, $content, 1);
 295     }
 296   }
 297
 298   my $encoding     = delete $temp_target->{INPUT_ENCODING} || 'UTF-8';
 299
 300   _recode_recursively(SL::Iconv->new($encoding, 'UTF-8'), $temp_target => $target) if keys %$target;
 301
 302   if ($target->{RESTORE_FORM_FROM_SESSION_ID}) {
 303     my %temp_form;
 304     $::auth->restore_form_from_session(delete $target->{RESTORE_FORM_FROM_SESSION_ID}, form => \%temp_form);
 305     _store_value($target, $_, $temp_form{$_}) for keys %temp_form;
 306   }
 307
 308   $::lxdebug->leave_sub;
 309
 310   return $target;
 311 }
 312
 313 sub flatten {
 314   my ($source, $target, $prefix, $in_array) = @_;
 315   $target ||= [];
 316
 317   # There are two edge cases that need attention. First: more than one hash
 318   # inside an array.  Only the first of each nested can have a [+].  Second: if
 319   # an array contains mixed values _store_value will rely on autovivification.
 320   # So any type change must have a [+]
 321   # This closure decides one recursion step AFTER an array has been found if a
 322   # [+] needs to be generated
 323   my $arr_prefix = sub {
 324     return $_[0] ? '[+]' : '[]' if $in_array;
 325     return '';
 326   };
 327
 328   for (ref $source) {
 329     /^HASH$/ && do {
 330       my $first = 1;
 331       for my $key (sort keys %$source) {
 332         flatten($source->{$key} => $target, (defined $prefix ? $prefix . $arr_prefix->($first) . '.' : '') . $key);
 333         $first = 0;
 334       };
 335       next;
 336     };
 337     /^ARRAY$/ && do {
 338       for my $i (0 .. $#$source) {
 339         flatten($source->[$i] => $target, $prefix . $arr_prefix->($i == 0), '1');
 340       }
 341       next;
 342     };
 343     !$_ && do {
 344       die "can't flatten a pure scalar" unless defined $prefix;
 345       push @$target, [ $prefix . $arr_prefix->(0) => $source ];
 346       next;
 347     };
 348     die "unrecognized reference of a data structure $_. cannot serialize refs, globs and code yet. to serialize Form please use the method there";
 349   }
 350
 351   return $target;
 352 }
 353
 354
 355 sub unflatten {
 356   my ($data, $target) = @_;
 357   $target ||= {};
 358
 359   for my $pair (@$data) {
 360     _store_value($target, @$pair) if defined $pair->[0];
 361   }
 362
 363   return $target;
 364 }
 365
 366 1;
 367
 368 __END__
 369
 370 =head1 NAME
 371
 372 SL::Request.pm - request parsing, data serialization, request information
 373
 374 =head1 SYNOPSIS
 375
 376 This module handles unpacking of CGI parameters. It also gives
 377 information about the request, such as whether or not it was done via AJAX,
 378 or the requested content type.
 379
 380   use SL::Request qw(read_cgi_input);
 381
 382   # read cgi input depending on request type, unflatten and recode
 383   read_cgi_input($target_hash_ref);
 384
 385   # $hashref and $new_hashref should be identical
 386   my $new_arrayref = flatten($hashref);
 387   my $new_hashref  = unflatten($new_arrayref);
 388
 389   # Handle AJAX requests differently than normal requests:
 390   if ($::request->is_ajax) {
 391     $controller->render('json-mask', { type => 'json' });
 392   } else {
 393     $controller->render('full-mask');
 394   }
 395
 396 =head1 DESCRIPTION
 397
 398 This module provides information about the request made by the
 399 browser.
 400
 401 It also handles flattening and unflattening of data for request
 402 roundtrip purposes. kivitendo uses the format as described below:
 403
 404 =over 4
 405
 406 =item Hashes
 407
 408 Hash entries will be connected with a dot (C<.>). A simple hash like this
 409
 410   order => {
 411     item     => 2,
 412     customer => 5
 413   }
 414
 415 will be serialized to
 416
 417   [ order.item     => 2 ],
 418   [ order.customer => 5 ],
 419
 420 =item Arrays
 421
 422 Arrays will be marked by empty brackets (C<[]>). A hash like this
 423
 424   selected_id => [ 2, 6, 8, 9 ]
 425
 426 will be flattened to
 427
 428   [ selected_id[] => 2 ],
 429   [ selected_id[] => 6 ],
 430   [ selected_id[] => 8 ],
 431   [ selected_id[] => 9 ],
 432
 433 Since this will produce identical keys, the resulting flattened list can not be
 434 used as a hash. It is however very easy to use this in a template to generate
 435 input:
 436
 437   [% FOREACH id = selected_ids %]
 438     <input type="hidden" name="selected_id[]" value="[% id | html %]">
 439   [% END %]
 440
 441 =item Nested structures
 442
 443 A special version of this are nested hashes in an array, which is very common.
 444 The combined operator (C<[].>) will be used. As a special case, every time a new
 445 array slice is started, the special convention (C<[+].>) will be used. Again this
 446 is because it's easy to write a template with it.
 447
 448 So this
 449
 450   order => {
 451     orderitems => [
 452       {
 453         id   => 1,
 454         part => 15
 455       },
 456       {
 457         id   => 2,
 458         part => 7
 459       },
 460     ]
 461   }
 462
 463 will be
 464
 465   [ order.orderitems[+].id  => 1  ],
 466   [ order.orderitems[].part => 15 ],
 467   [ order.orderitems[+].id  => 2  ],
 468   [ order.orderitems[].part => 7  ],
 469
 470 =item Limitations
 471
 472   The format currently does have certain limitations when compared to other
 473   serialization formats.
 474
 475 =over 4
 476
 477 =item Order
 478
 479 The order of serialized values matters to reconstruct arrays properly. This
 480 should rarely be a problem if you just flatten and dump into a url or a field
 481 of hiddens.
 482
 483 =item Empty Keys
 484
 485 The current implementation of flatten does produce correct serialization of
 486 empty keys, but unflatten is unable to resolve these. Do no use C<''> or
 487 C<undef> as keys. C<0> is fine.
 488
 489 =item Key Escaping
 490
 491 You cannot use the tokens C<[]>, C<[+]> and C<.> in keys. No way around it.
 492
 493 =item Sparse Arrays
 494
 495 It is not possible to serialize something like
 496
 497   sparse_array => do { my $sa = []; $sa[100] = 1; $sa },
 498
 499 This is a feature, as perl doesn't do well with very large arrays.
 500
 501 =item Recursion
 502
 503 There is currently no support nor prevention for flattening a circular structure.
 504
 505 =item Custom Delimiter
 506
 507 No support for other delimiters, sorry.
 508
 509 =item Other References
 510
 511 No support for globs, scalar refs, code refs, filehandles and the like. These will die.
 512
 513 =back
 514
 515 =back
 516
 517 =head1 FUNCTIONS
 518
 519 =over 4
 520
 521 =item C<flatten HASHREF [ ARRAYREF ]>
 522
 523 This function will flatten the provided hash ref into the provided array ref.
 524 The array ref may be non empty, but will be changed in this case.
 525
 526 The return value is the flattened array ref.
 527
 528 =item C<unflatten ARRAYREF [ HASHREF ]>
 529
 530 This function will parse the array ref, and will store the contents into the hash ref. The hash ref may be non empty, in this case any new keys will override the old ones only on leafs with same type. Type changes on a node will die.
 531
 532 =item C<is_ajax>
 533
 534 Returns trueish if the request is an XML HTTP request, also known as
 535 an 'AJAX' request.
 536
 537 =item C<type>
 538
 539 Returns the requested content type (either C<html>, C<js> or C<json>).
 540
 541 =item C<layout>
 542
 543 Set and retrieve the layout object for the current request. Must be an instance
 544 of L<SL::Layout::Base>. Defaults to an instance of L<SL::Layout::None>.
 545
 546 For more information about layouts, see L<SL::Layout::Dispatcher>.
 547
 548 =item C<cache $topic[, $default ]>
 549
 550 Caches an item for the duration of the request. C<$topic> must be an
 551 index name referring to the thing to cache. It is used for retrieving
 552 it later on. If C<$topic> doesn't start with C<::> then the caller's
 553 package name is prepended to the topic. For example, if the a from
 554 package C<SL::StuffedStuff> calls with topic = C<get_stuff> then the
 555 actual key will be C<::SL::StuffedStuff::get_stuff>.
 556
 557 If no item exists in the cache for C<$topic> then it is created and
 558 its initial value is set to C<$default>. If C<$default> is not given
 559 (undefined) then a new, empty hash reference is created.
 560
 561 Returns the cached item.
 562
 563 =back
 564
 565 =head1 SPECIAL FUNCTIONS
 566
 567 =head2 C<_store_value()>
 568
 569 Parses a complex var name, and stores it in the form.
 570
 571 Syntax:
 572   _store_value($target, $key, $value);
 573
 574 Keys must start with a string, and can contain various tokens.
 575 Supported key structures are:
 576
 577 1. simple access
 578   Simple key strings work as expected
 579
 580   id => $form->{id}
 581
 582 2. hash access.
 583   Separating two keys by a dot (.) will result in a hash lookup for the inner value
 584   This is similar to the behaviour of java and templating mechanisms.
 585
 586   filter.description => $form->{filter}->{description}
 587
 588 3. array+hashref access
 589
 590   Adding brackets ([]) before the dot will cause the next hash to be put into an array.
 591   Using [+] instead of [] will force a new array index. This is useful for recurring
 592   data structures like part lists. Put a [+] into the first varname, and use [] on the
 593   following ones.
 594
 595   Repeating these names in your template:
 596
 597     invoice.items[+].id
 598     invoice.items[].parts_id
 599
 600   will result in:
 601
 602     $form->{invoice}->{items}->[
 603       {
 604         id       => ...
 605         parts_id => ...
 606       },
 607       {
 608         id       => ...
 609         parts_id => ...
 610       }
 611       ...
 612     ]
 613
 614 4. arrays
 615
 616   Using brackets at the end of a name will result in the creation of a pure array.
 617   Note that you mustn't use [+], which is reserved for array+hash access and will
 618   result in undefined behaviour in array context.
 619
 620   filter.status[]  => $form->{status}->[ val1, val2, ... ]
 621
 622 =cut