Merge branch 'b-3.6.1' of ../kivitendo-erp_20220811

[kivitendo-erp.git] / SL / Helper / Csv.pm
diff --git a/SL/Helper/Csv.pm b/SL/Helper/Csv.pm

index 43aaabb..0d471eb 100644 (file)
--- a/SL/Helper/Csv.pm
+++ b/SL/Helper/Csv.pm
@@ -114,6 +114,7 @@ sub _check_multiplexed {
        # Each profile needs a class and a row_ident
        my $info_ok = all { defined $_->{class} && defined $_->{row_ident} } @profile;
        $self->_push_error([
+        undef,
          0,
          "missing class or row_ident in one of the profiles for multiplexed data",
          0,
@@ -125,6 +126,7 @@ sub _check_multiplexed {
          my @header = @{ $self->header };
          my $t_ok = scalar @profile == scalar @header;
          $self->_push_error([
+          undef,
            0,
            "number of headers and number of profiles must be the same for multiplexed data",
            0,
@@ -133,6 +135,7 @@ sub _check_multiplexed {
  
          $t_ok = all { scalar @$_ > 0} @header;
          $self->_push_error([
+          undef,
            0,
            "no empty headers are allowed for multiplexed data",
            0,
@@ -158,10 +161,11 @@ sub _check_header {
      foreach my $p_num (0..$n_header - 1) {
        my $h = $self->_csv->getline($self->_io);
  
+      my ($code, $string, $position, $record, $field) = $self->_csv->error_diag;
+
        $self->_push_error([
          $self->_csv->error_input,
-        $self->_csv->error_diag,
-        0,
+        $code, $string, $position, $record // 0,
        ]) unless $h;
  
        if ($self->is_multiplexed) {
@@ -202,12 +206,13 @@ sub _check_header {
        my $h_aref = ($self->is_multiplexed)? $header : [ $header ];
        my $p_num  = 0;
        foreach my $h (@{ $h_aref }) {
-        my @names = (
-          keys %{ $self->profile->[$p_num]->{profile} || {} },
+        my %names = (
+          (map { $_ => $_                                     } keys %{ $self->profile->[$p_num]->{profile} || {} }),
+          (map { $_ => $self->profile->[$p_num]{mapping}{$_}  } keys %{ $self->profile->[$p_num]->{mapping} || {} }),
          );
-        for my $name (@names) {
+        for my $name (keys %names) {
            for my $i (0..$#$h) {
-            $h->[$i] = $name if lc $h->[$i] eq lc $name;
+            $h->[$i] = $names{$name} if lc $h->[$i] eq lc $name;
            }
          }
          $p_num++;
@@ -229,7 +234,8 @@ sub _check_multiplex_datatype_position {
      $self->_multiplex_datatype_position($first_pos);
      return 1;
    } else {
-    $self->_push_error([0,
+    $self->_push_error([undef,
+                        0,
                          "datatype field must be at the same position for all datatypes for multiplexed data",
                          0,
                          0]);
@@ -237,6 +243,10 @@ sub _check_multiplex_datatype_position {
    }
  }
  
+sub _is_empty_row {
+  return !!all { !$_ } @{$_[0]};
+}
+
  sub _parse_data {
    my ($self, %params) = @_;
    my (@data, @errors);
@@ -244,25 +254,31 @@ sub _parse_data {
    while (1) {
      my $row = $self->_csv->getline($self->_io);
      if ($row) {
+      next if _is_empty_row($row);
        my $header = $self->_header_by_row($row);
+      if (!$header) {
+        push @errors, [
+          undef,
+          0,
+          "Cannot get header for row. Maybe row name and datatype field not matching.",
+          0,
+          0];
+        last;
+      }
        my %hr;
        @hr{@{ $header }} = @$row;
        push @data, \%hr;
      } else {
        last if $self->_csv->eof;
+
        # Text::CSV_XS 0.89 added record number to error_diag
-      if (qv(Text::CSV_XS->VERSION) >= qv('0.89')) {
-        push @errors, [
-          $self->_csv->error_input,
-          $self->_csv->error_diag,
-        ];
-      } else {
-        push @errors, [
-          $self->_csv->error_input,
-          $self->_csv->error_diag,
-          $self->_io->input_line_number,
-        ];
-      }
+      my ($code, $string, $position, $record, $field) = $self->_csv->error_diag;
+
+      push @errors, [
+        $self->_csv->error_input,
+        $code, $string, $position,
+        $record // $self->_io->input_line_number,
+      ];
      }
      last if $self->_csv->eof;
    }
@@ -332,6 +348,9 @@ sub _push_error {
    $self->_errors(\@new_errors);
  }
  
+sub specs {
+  $_[0]->dispatcher->_specs
+}
  
  1;
  
@@ -480,84 +499,132 @@ Examples:
    [ [ 'datatype', 'ordernumber', 'customer', 'transdate' ],
      [ 'datatype', 'partnumber', 'qty', 'sellprice' ] ]
  
-=item C<profile> [{profile => \%ACCESSORS, class => class, row_ident => ri},]
+=item C<profile> PROFILE_DATA
+
+The profile mapping csv to the objects.
+
+See section L</PROFILE> for information on this topic.
+
+=item C<ignore_unknown_columns>
+
+If set, the import will ignore unknown header columns. Useful for lazy imports,
+but deactivated by default.
+
+=item C<case_insensitive_header>
+
+If set, header columns will be matched against profile entries case
+insensitive, and on match the profile name will be taken.
+
+Only works if a profile is given, will die otherwise.
+
+If both C<case_insensitive_header> and C<strict_profile> is set, matched header
+columns will be accepted.
+
+=item C<strict_profile>
+
+If set, all columns to be parsed must be specified in C<profile>. Every header
+field not listed there will be treated like an unknown column.
+
+If both C<case_insensitive_header> and C<strict_profile> is set, matched header
+columns will be accepted.
+
+=back
+
+=head1 PROFILE
+
+The profile is needed for mapping csv data to the accessors in the data object.
  
-This is an ARRAYREF to HASHREFs which may contain the keys C<profile>, C<class>
-and C<row_ident>.
+The basic structure is:
  
-The C<profile> is a HASHREF which may be used to map header fields to custom
+  PROFILE       := [ CLASS_PROFILE, CLASS_PROFILE* ]
+  CLASS_PROFILE := {
+                      profile   => { ACCESSORS+ },
+                      class     => $classname,
+                      row_ident => $row_ident,
+                      mapping   => { MAPPINGS* },
+                   }
+  ACCESSORS     := $field => $accessor
+  MAPPINGS      := $alias => $field
+
+The C<ACCESSORS> may be used to map header fields to custom
  accessors. Example:
  
-  [ {profile => { listprice => listprice_as_number }} ]
+  profile => {
+    listprice => 'listprice_as_number',
+  }
  
-In this case C<listprice_as_number> will be used to read in values from the
+In this case C<listprice_as_number> will be used to store the values from the
  C<listprice> column.
  
  In case of a One-To-One relationship these can also be set over
  relationships by separating the steps with a dot (C<.>). This will work:
  
-  [ {profile => { customer => 'customer.name' }} ]
+  customer => 'customer.name',
  
  And will result in something like this:
  
    $obj->customer($obj->meta->relationship('customer')->class->new);
    $obj->customer->name($csv_line->{customer})
  
-But beware, this will not try to look up anything in the database. You will
+Beware, this will not try to look up anything in the database! You will
  simply receive objects that represent what the profile defined. If some of
-these information are unique, and should be connected to preexisting data, you
+these information are unique, or should be connected to preexisting data, you
  will have to do that for yourself. Since you provided the profile, it is
  assumed you know what to do in this case.
  
  If no profile is given, any header field found will be taken as is.
  
  If the path in a profile entry is empty, the field will be subjected to
-C<strict_profile> and C<case_insensitive_header> checking, will be parsed into
-C<get_data>, but will not be attempted to be dispatched into objects.
-
-If C<class> is present, the line will be handed to the new sub of this class,
-and the return value used instead of the line itself.
+C<strict_profile> and C<case_insensitive_header> checking and will be parsed
+into C<get_data>, but will not be attempted to be dispatched into objects.
  
-C<row_ident> is a string to recognize the right profile and class for each data
-line in multiplexed data. It must match the value in the column 'dataype' for
-each class.
-
-In case of multiplexed data, C<class> and C<row_ident> must be given.
-Example:
-  [ {
-      class     => 'SL::DB::Order',
-      row_ident => 'O'
-    },
-    {
-      class     => 'SL::DB::OrderItem',
-      row_ident => 'I',
-      profile   => {sellprice => sellprice_as_number}
-    } ]
+C<class> must be present. A new instance will be created for each line before
+dispatching into it.
  
-=item C<ignore_unknown_columns>
+C<row_ident> is used to determine the correct profile in multiplexed data and
+must be given there. It's not used in non-multiplexed data.
  
-If set, the import will ignore unkown header columns. Useful for lazy imports,
-but deactivated by default.
+If C<mappings> is present, it must contain a hashref that maps strings to known
+fields. This can be used to add custom profiles for known sources, that don't
+comply with the expected header identities.
  
-=item C<case_insensitive_header>
+Without strict profiles, mappings can also directly map header fields that
+should end up in the same accessor.
  
-If set, header columns will be matched against profile entries case
-insensitive, and on match the profile name will be taken.
+With case insensitive headings, mappings will also modify the headers, to fit
+the expected profile.
  
-Only works if a profile is given, will die otherwise.
+Mappings can be identical to known fields and will be prefered during lookup,
+but will not replace the field, meaning that:
  
-If both C<case_insensitive_header> and C<strict_profile> is set, matched header
-columns will be accepted.
+  profile => {
+    name        => 'name',
+    description => 'description',
+  }
+  mapping => {
+    name        => 'description',
+    shortname   => 'name',
+  }
  
-=item C<strict_profile>
+will work as expected, and shortname will not end up in description. This also
+works with the case insensitive option. Note however that the case insensitive
+option will not enable true unicode collating.
  
-If set, all columns to be parsed must be specified in C<profile>. Every header
-field not listed there will be treated like an unknown column.
  
-If both C<case_insensitive_header> and C<strict_profile> is set, matched header
-columns will be accepted.
+Here's a full example:
  
-=back
+  [
+    {
+      class     => 'SL::DB::Order',
+      row_ident => 'O'
+    },
+    {
+      class     => 'SL::DB::OrderItem',
+      row_ident => 'I',
+      profile   => { sellprice => 'sellprice_as_number' },
+      mapping   => { 'Verkaufspreis' => 'sellprice' }
+    },
+  ]
  
  =head1 ERROR HANDLING
  
@@ -572,6 +639,9 @@ Each entry is an object with the following attributes:
  
  Note that the last entry can be off, but will give an estimate.
  
+Error handling is also known to break on new Perl versions and need to be
+adjusted from time to time due to changes in Text::CSV_XS.
+
  =head1 CAVEATS
  
  =over 4