GDPDU: lokalisierte Spaltennamen und CSV Header

[kivitendo-erp.git] / SL / GDPDU.pm
diff --git a/SL/GDPDU.pm b/SL/GDPDU.pm

index 8a2ae27..0d0f77b 100644 (file)
--- a/SL/GDPDU.pm
+++ b/SL/GDPDU.pm
@@ -22,24 +22,52 @@ use SL::Locale::String qw(t8);
  
  use Rose::Object::MakeMethods::Generic (
    scalar                  => [ qw(from to writer company location) ],
-  'scalar --get_set_init' => [ qw(files tempfiles export_ids tables) ],
+  'scalar --get_set_init' => [ qw(files tempfiles export_ids tables csv_headers) ],
  );
  
  # in this we find:
  # key:         table name
  # name:        short name, translated
  # description: long description, translated
-# transdate:   column used to filter from/to, empty if table is filtered otherwise
-# keep:        arrayref of columns that should be saved for further referencing
-# tables:      arrayref with one column and one or many table.column references that were kept earlier
+# columns:     list of columns to export. export all columns if not present
+# primary_key: override primary key
  my %known_tables = (
    chart    => { name => t8('Charts'),    description => t8('Chart of Accounts'),    primary_key => 'accno', columns => [ qw(id accno description) ],     },
-  customer => { name => t8('Customers'), description => t8('Customer Master Data'), columns => [ qw(id name department_1 department_2 street zipcode city country contact phone fax email notes customernumber taxnumber obsolete ustid) ] },
-  vendor   => { name => t8('Vendors'),   description => t8('Vendor Master Data'),   columns => [ qw(id name department_1 department_2 street zipcode city country contact phone fax email notes customernumber taxnumber obsolete ustid) ] },
+  customer => { name => t8('Customers'), description => t8('Customer Master Data'), columns => [ qw(id customernumber name department_1 department_2 street zipcode city country contact phone fax email notes taxnumber obsolete ustid) ] },
+  vendor   => { name => t8('Vendors'),   description => t8('Vendor Master Data'),   columns => [ qw(id vendornumber name department_1 department_2 street zipcode city country contact phone fax email notes taxnumber obsolete ustid) ] },
  );
  
+my %column_titles = (
+   chart => {
+     id             => t8('ID'),
+     accno          => t8('Account Number'),
+     description    => t8('Description'),
+   },
+   customer_vendor => {
+     id             => t8('ID'),
+     name           => t8('Name'),
+     department_1   => t8('Department 1'),
+     department_2   => t8('Department 2'),
+     street         => t8('Street'),
+     zipcode        => t8('Zipcode'),
+     city           => t8('City'),
+     country        => t8('Country'),
+     contact        => t8('Contact'),
+     phone          => t8('Phone'),
+     fax            => t8('Fax'),
+     email          => t8('E-mail'),
+     notes          => t8('Notes'),
+     customernumber => t8('Customer Number'),
+     vendornumber   => t8('Vendor Number'),
+     taxnumber      => t8('Tax Number'),
+     obsolete       => t8('Obsolete'),
+     ustid          => t8('Tax ID number'),
+   },
+);
+$column_titles{$_} = $column_titles{customer_vendor} for qw(customer vendor);
+
  my %datev_column_defs = (
-  acc_trans_id      => { type => 'Rose::DB::Object::Metadata::Column::Integer', text => t8('ID'), },
+  trans_id          => { type => 'Rose::DB::Object::Metadata::Column::Integer', text => t8('ID'), },
    amount            => { type => 'Rose::DB::Object::Metadata::Column::Numeric', text => t8('Amount'), },
    credit_accname    => { type => 'Rose::DB::Object::Metadata::Column::Text',    text => t8('Credit Account Name'), },
    credit_accno      => { type => 'Rose::DB::Object::Metadata::Column::Text',    text => t8('Credit Account'), },
@@ -61,7 +89,7 @@ my %datev_column_defs = (
  );
  
  my @datev_columns = qw(
-  acc_trans_id
+  trans_id
    customer_id vendor_id
    name           vcnumber
    transdate    invnumber      amount
@@ -211,6 +239,9 @@ sub table {
      ->tag('UTF8')
      ->tag('DecimalSymbol', '.')
      ->tag('DigitGroupingSymbol', '|')     # see CAVEATS in documentation
+    ->tag('Range', sub { $self
+      ->tag('From', $self->csv_headers ? 2 : 1)
+    })
      ->tag('VariableLength', sub { $self
        ->tag('ColumnDelimiter', ',')       # see CAVEATS for missing RecordDelimiter
        ->tag('TextEncapsulator', '"')
@@ -252,7 +283,7 @@ sub columns {
      die "unknown col type @{[ ref $column ]}" unless $type;
  
      $self->tag('VariablePrimaryKey', sub { $self
-      ->tag('Name', $column->name);
+      ->tag('Name', $column_titles{$table}{$column->name});
        $type->($self);
      })
    }
@@ -263,7 +294,7 @@ sub columns {
      die "unknown col type @{[ ref $column]}" unless $type;
  
      $self->tag('VariableColumn', sub { $self
-      ->tag('Name', $column->name);
+      ->tag('Name', $column_titles{$table}{$column->name});
        $type->($self);
      })
    }
@@ -293,7 +324,7 @@ sub foreign_keys {
      }
  
      $self->tag('ForeignKey', sub {
-      $_[0]->tag('Name', $_) for keys %key_columns;
+      $_[0]->tag('Name',  $column_titles{$table}{$_}) for keys %key_columns;
        $_[0]->tag('References', $rel->class->meta->table);
     });
    }
@@ -317,6 +348,9 @@ sub do_datev_xml_table {
      ->tag('UTF8')
      ->tag('DecimalSymbol', '.')
      ->tag('DigitGroupingSymbol', '|')     # see CAVEATS in documentation
+    ->tag('Range', sub { $self
+      ->tag('From', $self->csv_headers ? 2 : 1)
+    })
      ->tag('VariableLength', sub { $self
        ->tag('ColumnDelimiter', ',')       # see CAVEATS for missing RecordDelimiter
        ->tag('TextEncapsulator', '"')
@@ -337,7 +371,7 @@ sub datev_columns {
      die "unknown col type @{[ $column ]}" unless $type;
  
      $self->tag('VariablePrimaryKey', sub { $self
-      ->tag('Name', $column);
+      ->tag('Name', $datev_column_defs{$column}{text});
        $type->($self);
      })
    }
@@ -348,7 +382,7 @@ sub datev_columns {
      die "unknown col type @{[ ref $column]}" unless $type;
  
      $self->tag('VariableColumn', sub { $self
-      ->tag('Name', $column);
+      ->tag('Name', $datev_column_defs{$column}{text});
        $type->($self);
      })
    }
@@ -360,15 +394,15 @@ sub datev_foreign_keys {
    my ($self) = @_;
    # hard code weeee
    $self->tag('ForeignKey', sub { $_[0]
-    ->tag('Name', 'customer_id')
+    ->tag('Name', $datev_column_defs{customer_id}{text})
      ->tag('References', 'customer')
    });
    $self->tag('ForeignKey', sub { $_[0]
-    ->tag('Name', 'vendor_id')
+    ->tag('Name', $datev_column_defs{vendor_id}{text})
      ->tag('References', 'vendor')
    });
    $self->tag('ForeignKey', sub { $_[0]
-    ->tag('Name', $_)
+    ->tag('Name', $datev_column_defs{$_}{text})
      ->tag('References', 'chart')
    }) for qw(debit_accno credit_accno tax_accno);
  }
@@ -396,6 +430,10 @@ sub do_datev_csv_export {
    $self->files->{"transactions.csv"} = $filename;
    push @{ $self->tempfiles }, $filename;
  
+  if ($self->csv_headers) {
+    $csv->print($fh, [ map { _normalize_cell($datev_column_defs{$_}{text}) } @datev_columns ]);
+  }
+
    for my $transaction (@transactions) {
      my $is_payment     = any { $_->{link} =~ m{A[PR]_paid} } @{ $transaction };
  
@@ -415,7 +453,7 @@ sub do_datev_csv_export {
        tax              => defined $amount->{net_amount} ? $::form->format_amount($myconfig, abs($amount->{amount}) - abs($amount->{net_amount}), 5) : 0,
        notes            => $haben->{notes},
        (map { ($_ => $tax->{$_})                    } qw(taxkey tax_accname tax_accno taxdescription)),
-      (map { ($_ => ($haben->{$_} // $soll->{$_})) } qw(acc_trans_id invnumber name vcnumber transdate itime customer_id vendor_id)),
+      (map { ($_ => ($haben->{$_} // $soll->{$_})) } qw(trans_id invnumber name vcnumber transdate itime customer_id vendor_id)),
      );
  
      _normalize_cell($_) for values %row; # see CAVEATS
@@ -442,6 +480,10 @@ sub do_csv_export {
    my @columns = (@{ $cols_by_primary_key{1} }, @{ $cols_by_primary_key{0} });
    my %col_index = do { my $i = 0; map {; "$_" => $i++ } @columns };
  
+  if ($self->csv_headers) {
+    $csv->print($fh, [ map { _normalize_cell($column_titles{$table}{$_->name}) } @columns ]) or die $csv->error_diag;
+  }
+
    # and normalize date stuff
    my @select_tokens = map { (ref $_) =~ /Time/ ? $_->name . '::date' : $_->name } @columns;
  
@@ -553,12 +595,14 @@ sub all_tables {
  sub _normalize_cell {
    $_[0] =~ s/\r\n/ /g;
    $_[0] =~ s/,/;/g;
+  $_[0]
  }
  
  sub init_files { +{} }
  sub init_export_ids { +{} }
  sub init_tempfiles { [] }
  sub init_tables { [ grep { $known_tables{$_} } @export_table_order ] }
+sub init_csv_headers { 1 }
  
  sub API_VERSION {
    DateTime->new(year => 2002, month => 8, day => 14)->to_kivitendo;
@@ -594,7 +638,7 @@ The name of the company, needed for the supplier header
  
  =item location
  
-Location of the company, needed for the suupplier header
+Location of the company, needed for the supplier header
  
  =item from
  
@@ -603,66 +647,70 @@ Location of the company, needed for the suupplier header
  Will only include records in the specified date range. Data pulled from other
  tables will be culled to match what is needed for these records.
  
+=item csv_headers
+
+Optional. If set, will include a header line in the exported CSV files. Default true.
+
  =item tables
  
-A list of tables to be exported.
+Ooptional list of tables to be exported. Defaults to all tables.
  
  =item all_tables
  
-Alternative to C<tables>, enables all known tables.
+Optional alternative to C<tables>, forces all known tables.
  
  =back
  
  =item C<generate_export>
  
-Do the work. Will return an absolut path to a temp file where all export files
+Do the work. Will return an absolute path to a temp file where all export files
  are zipped together.
  
  =back
  
  =head1 CAVEATS
  
+Sigh. There are a lot of issues with the IDEA software that were found out by
+trial and error.
+
+=head2 Problems in the Specification
+
  =over 4
  
  =item *
  
-Date format is shit. The official docs state that only C<YY>, C<YYYY>, C<MM>,
-and C<DD> are supported, timestamps do not exist.
+The specced date format is capable of only C<YY>, C<YYYY>, C<MM>,
+and C<DD>. There are no timestamps or timezones.
  
  =item *
  
-Number parsing seems to be fragile. Official docs state that behaviour for too
-low C<Accuracy> settings is undefined. Accuracy of 0 is not taken to mean
-Integer but instead generates a warning for redudancy.
+Numbers have the same issue. There is not dedicated integer type, and hinting
+at an integer type by setting accuracy to 0 generates a warning for redundant
+accuracy.
  
-There is no dedicated integer type.
+Also the number parsing is documented to be fragile. Official docs state that
+behaviour for too low C<Accuracy> settings is undefined.
  
  =item *
  
-Currently C<ar> and C<ap> have a foreign key to themself with the name
-C<storno_id>. If this foreign key is present in the C<INDEX.XML> then the
-storno records have to be too. Since this is extremely awkward to code and
-confusing for the examiner as to why there are records outside of the time
-range, this export skips all self-referential foreign keys.
-
-=item *
+Foreign key definition is broken. Instead of giving column maps it assumes that
+foreign keys map to the primary keys given for the target table, and in that
+order. Also the target table must be known in full before defining a foreign key.
  
-Documentation for foreign keys is extremely weird. Instead of giving column
-maps it assumes that foreign keys map to the primary keys given for the target
-table, and in that order. Foreign keys to keys that are not primary seems to be
-impossible. Changing type is also not allowed (which actually makes sense).
-Hopefully there are no bugs there.
+As a consequence any additional keys apart from primary keys are not possible.
+Self-referencing tables are also not possible.
  
  =item *
  
-It's currently disallowed to export the whole dataset. It's not clear if this
-is wanted.
+The spec does not support splitting data sets into smaller chunks. For data
+sets that exceed 700MB the spec helpfully suggests: "Use a bigger medium, such
+as a DVD".
  
  =item *
  
-It is not possible to set an empty C<DigiGroupingSymbol> since then the import
+It is not possible to set an empty C<DigitGroupingSymbol> since then the import
  will just work with the default. This was asked in their forum, and the
-response actually was:
+response actually was to use a bogus grouping symbol that is not used:
  
    Einfache Lösung: Definieren Sie das Tausendertrennzeichen als Komma, auch
    wenn es nicht verwendet wird. Sollten Sie das Komma bereits als Feldtrenner
@@ -681,12 +729,11 @@ generates the error message:
  
  Instead we just use the implicit default RecordDelimiter CRLF.
  
-=item *
+=back
  
-Not confirmed yet:
+=head2 Bugs in the IDEA software
  
-Foreign keys seem only to work with previously defined tables (which would be
-utterly insane).
+=over 4
  
  =item *
  
@@ -701,9 +748,45 @@ on that symbol no matter what surrounds or preceeds it.
  
  =item *
  
-Fun fact: Some auditors do not have a full license of the IDEA software, and
-can't do table joins. So it's best to provide denormalized data for them, so
-that the auditor may infer which object is meant.
+Despite the standard specifying UTF-8 as a valid encoding the IDEA software
+will just downgrade everything to latin1.
+
+=back
+
+=head2 Problems outside of the software
+
+=over 4
+
+=item *
+
+The law states that "all business related data" should be made available. In
+practice there's no definition for what makes data "business related", and
+different auditors seems to want different data.
+
+Currently we export most of the transactional data with supplementing
+customers, vendors and chart of accounts.
+
+=item *
+
+While the standard explicitely state to provide data normalized, in practice
+autditors aren't trained database operators and can not create complex vies on
+normalized data on their own. The reason this works for other software is, that
+DATEV and SAP seem to have written import plugins for their internal formats in
+the IDEA software.
+
+So what is really exported is not unlike a DATEV export. Each transaction gets
+splitted into chunks of 2 positions (3 with tax on one side). Those get
+denormalized into a single data row with credfit/debit/tax fields. The charts
+get denormalized into it as well, in addition to their account number serving
+as a foreign key.
+
+Customers and vendors get denormalized into this as well, but are linked by ids
+to their tables. And the reason for this is...
+
+=item *
+
+Some auditors do not have a full license of the IDEA software, and
+can't do table joins.
  
  =back