SL/GDPDU.pm

   1 package SL::GDPDU;
   2
   3 # TODO:
   4 # optional: background jobable
   5
   6 use strict;
   7 use utf8;
   8
   9 use parent qw(Rose::Object);
  10
  11 use Text::CSV_XS;
  12 use XML::Writer;
  13 use Archive::Zip;
  14 use File::Temp ();
  15 use File::Spec ();
  16 use List::UtilsBy qw(partition_by);
  17
  18 use SL::DB::Helper::ALL; # since we work on meta data, we need everything
  19 use SL::DB::Helper::Mappings;
  20 use SL::Locale::String qw(t8);
  21
  22 use Rose::Object::MakeMethods::Generic (
  23   scalar                  => [ qw(from to tables writer company location) ],
  24   'scalar --get_set_init' => [ qw(files tempfiles export_ids) ],
  25 );
  26
  27 # in this we find:
  28 # key:         table name
  29 # name:        short name, translated
  30 # description: long description, translated
  31 # transdate:   column used to filter from/to, empty if table is filtered otherwise
  32 # keep:        arrayref of columns that should be saved for further referencing
  33 # tables:      arrayref with one column and one or many table.column references that were kept earlier
  34 my %known_tables = (
  35   ar                    => { name => t8('Invoice'),                 description => t8('Sales Invoices and Accounts Receivables'),   keep => [ qw(id customer_id vendor_id) ], transdate => 'transdate', },
  36   ap                    => { name => t8('Purchase Invoice'),        description => t8('Purchase Invoices and Accounts Payables'),   keep => [ qw(id customer_id vendor_id) ], transdate => 'transdate', },
  37   oe                    => { name => t8('Orders'),                  description => t8('Orders and Quotations, Sales and Purchase'), keep => [ qw(id customer_id vendor_id) ], transdate => 'transdate', },
  38   delivery_orders       => { name => t8('Delivery Orders'),         description => t8('Delivery Orders'),                           keep => [ qw(id customer_id vendor_id) ], transdate => 'transdate', },
  39   gl                    => { name => t8('General Ledger'),          description => t8('General Ledger Entries'),                    keep => [ qw(id) ],                       transdate => 'transdate', },
  40   invoice               => { name => t8('Invoice Positions'),       description => t8('Positions for all Invoices'),                keep => [ qw(parts_id) ], tables => [ trans_id => "ar.id", "ap.id" ] },
  41   orderitems            => { name => t8('OrderItems'),              description => t8('Positions for all Orders'),                  keep => [ qw(parts_id) ], tables => [ trans_id => "oe.id" ] },
  42   delivery_order_items  => { name => t8('Delivery Order Items'),    description => t8('Positions for all Delivery Orders'),                      keep => [ qw(parts_id) ], tables => [ delivery_order_id => "delivery_orders.id" ] },
  43   acc_trans             => { name => t8('Transactions'),            description => t8('All general ledger entries'),                keep => [ qw(chart_id) ], tables => [ trans_id => "ar.id", "ap.id", "oe.id", "delivery_orders.id", "gl.id" ] },
  44   chart                 => { name => t8('Charts'),                  description => t8('Chart of Accounts'),                                                   tables => [ id => "acc_trans.chart_id" ] },
  45   customer              => { name => t8('Customers'),               description => t8('Customer Master Data'),                                                tables => [ id => "ar.customer_id", "ap.customer_id", "oe.customer_id", "delivery_orders.customer_id" ] },
  46   vendor                => { name => t8('Vendors'),                 description => t8('Vendor Master Data'),                                                  tables => [ id => "ar.vendor_id",   "ap.vendor_id",   "oe.vendor_id",   "delivery_orders.vendor_id" ] },
  47   parts                 => { name => t8('Parts'),                   description => t8('Parts, Services, and Assemblies'),                                     tables => [ id => "invoice.parts_id", "orderitems.parts_id", "delivery_order_items.parts_id" ] },
  48 );
  49
  50 # rows in this listing are tiers.
  51 # tables may depend on ids in a tier above them
  52 my @export_table_order = qw(
  53   ar ap gl oe delivery_orders
  54   invoice orderitems delivery_order_items
  55   customer vendor
  56   parts
  57   acc_trans
  58   chart
  59 );
  60
  61 # needed because the standard dbh sets datestyle german and we don't want to mess with that
  62 my $date_format = 'DD.MM.YYYY';
  63
  64 # callbacks that produce the xml spec for these column types
  65 my %column_types = (
  66   'Rose::DB::Object::Metadata::Column::Integer'   => sub { $_[0]->tag('Numeric') },  # see Caveats for integer issues
  67   'Rose::DB::Object::Metadata::Column::BigInt'    => sub { $_[0]->tag('Numeric') },  # see Caveats for integer issues
  68   'Rose::DB::Object::Metadata::Column::Text'      => sub { $_[0]->tag('AlphaNumeric') },
  69   'Rose::DB::Object::Metadata::Column::Varchar'   => sub { $_[0]->tag('AlphaNumeric') },
  70   'Rose::DB::Object::Metadata::Column::Character' => sub { $_[0]->tag('AlphaNumeric') },
  71   'Rose::DB::Object::Metadata::Column::Numeric'   => sub { $_[0]->tag('Numeric', sub { $_[0]->tag('Accuracy', 5) }) },
  72   'Rose::DB::Object::Metadata::Column::Date'      => sub { $_[0]->tag('Date', sub { $_[0]->tag('Format', $date_format) }) },
  73   'Rose::DB::Object::Metadata::Column::Timestamp' => sub { $_[0]->tag('Date', sub { $_[0]->tag('Format', $date_format) }) },
  74   'Rose::DB::Object::Metadata::Column::Float'     => sub { $_[0]->tag('Numeric') },
  75   'Rose::DB::Object::Metadata::Column::Boolean'   => sub { $_[0]
  76     ->tag('AlphaNumeric')
  77     ->tag('Map', sub { $_[0]
  78       ->tag('From', 1)
  79       ->tag('To', t8('true'))
  80     })
  81     ->tag('Map', sub { $_[0]
  82       ->tag('From', 0)
  83       ->tag('To', t8('false'))
  84     })
  85     ->tag('Map', sub { $_[0]
  86       ->tag('From', '')
  87       ->tag('To', t8('false'))
  88     })
  89   },
  90 );
  91
  92 sub generate_export {
  93   my ($self) = @_;
  94
  95   # verify data
  96   $self->from && 'DateTime' eq ref $self->from or die 'need from date';
  97   $self->to   && 'DateTime' eq ref $self->to   or die 'need to date';
  98   $self->from <= $self->to                     or die 'from date must be earlier or equal than to date';
  99   $self->tables && @{ $self->tables }          or die 'need tables';
 100   for (@{ $self->tables }) {
 101     next if $known_tables{$_};
 102     die "unknown table '$_'";
 103   }
 104
 105   # get data from those tables and save to csv
 106   # for that we need to build queries that fetch all the columns
 107   for ($self->sorted_tables) {
 108     $self->do_csv_export($_);
 109   }
 110
 111   # write xml file
 112   $self->do_xml_file;
 113
 114   # add dtd
 115   $self->files->{'gdpdu-01-08-2002.dtd'} = File::Spec->catfile('users', 'gdpdu-01-08-2002.dtd');
 116
 117   # make zip
 118   my ($fh, $zipfile) = File::Temp::tempfile();
 119   my $zip            = Archive::Zip->new;
 120
 121   while (my ($name, $file) = each %{ $self->files }) {
 122     $zip->addFile($file, $name);
 123   }
 124
 125   $zip->writeToFileHandle($fh) == Archive::Zip::AZ_OK() or die 'error writing zip file';
 126   close($fh);
 127
 128   return $zipfile;
 129 }
 130
 131 sub do_xml_file {
 132   my ($self) = @_;
 133
 134   my ($fh, $filename) = File::Temp::tempfile();
 135   binmode($fh, ':utf8');
 136
 137   $self->files->{'INDEX.XML'} = $filename;
 138   push @{ $self->tempfiles }, $filename;
 139
 140   my $writer = XML::Writer->new(
 141     OUTPUT      => $fh,
 142     ENCODING    => 'UTF-8',
 143   );
 144
 145   $self->writer($writer);
 146   $self->writer->xmlDecl('UTF-8');
 147   $self->writer->doctype('DataSet', undef, "gdpdu-01-08-2002.dtd");
 148   $self->tag('DataSet', sub { $self
 149     ->tag('Version', '1.0')
 150     ->tag('DataSupplier', sub { $self
 151       ->tag('Name', $self->client_name)
 152       ->tag('Location', $self->client_location)
 153       ->tag('Comment', $self->make_comment)
 154     })
 155     ->tag('Media', sub { $self
 156       ->tag('Name', t8('DataSet #1', 1));
 157       for (reverse $self->sorted_tables) { $self  # see CAVEATS for table order
 158         ->table($_)
 159       }
 160     })
 161   });
 162   close($fh);
 163 }
 164
 165 sub table {
 166   my ($self, $table) = @_;
 167   my $writer = $self->writer;
 168
 169   $self->tag('Table', sub { $self
 170     ->tag('URL', "$table.csv")
 171     ->tag('Name', $known_tables{$table}{name})
 172     ->tag('Description', $known_tables{$table}{description})
 173     ->tag('Validity', sub { $self
 174       ->tag('Range', sub { $self
 175         ->tag('From', $self->from->to_kivitendo(dateformat => 'dd.mm.yyyy'))
 176         ->tag('To',   $self->to->to_kivitendo(dateformat => 'dd.mm.yyyy'))
 177       })
 178       ->tag('Format', $date_format)
 179     })
 180     ->tag('UTF8'),
 181     ->tag('DecimalSymbol', '.')
 182     ->tag('DigitGroupingSymbol', '|')     # see CAVEATS in documentation
 183     ->tag('VariableLength', sub { $self
 184       ->tag('ColumnDelimiter', ',')       # see CAVEATS for missing RecordDelimiter
 185       ->tag('TextEncapsulator', '"')
 186       ->columns($table)
 187       ->foreign_keys($table)
 188     })
 189   });
 190 }
 191
 192 sub _table_columns {
 193   my ($table) = @_;
 194   my $package = SL::DB::Helper::Mappings::get_package_for_table($table);
 195
 196   # PrimaryKeys must come before regular columns, so partition first
 197   partition_by { 1 * $_->is_primary_key_member } $package->meta->columns;
 198 }
 199
 200 sub columns {
 201   my ($self, $table) = @_;
 202
 203   my %cols_by_primary_key = _table_columns($table);
 204
 205   for my $column (@{ $cols_by_primary_key{1} }) {
 206     my $type = $column_types{ ref $column };
 207
 208     die "unknown col type @{[ ref $column ]}" unless $type;
 209
 210     $self->tag('VariablePrimaryKey', sub { $self
 211       ->tag('Name', $column->name);
 212       $type->($self);
 213     })
 214   }
 215
 216   for my $column (@{ $cols_by_primary_key{0} }) {
 217     my $type = $column_types{ ref $column };
 218
 219     die "unknown col type @{[ ref $column]}" unless $type;
 220
 221     $self->tag('VariableColumn', sub { $self
 222       ->tag('Name', $column->name);
 223       $type->($self);
 224     })
 225   }
 226
 227   $self;
 228 }
 229
 230 sub foreign_keys {
 231   my ($self, $table) = @_;
 232   my $package = SL::DB::Helper::Mappings::get_package_for_table($table);
 233
 234   my %requested = map { $_ => 1 } @{ $self->tables };
 235
 236   for my $rel ($package->meta->foreign_keys) {
 237     next unless $requested{ $rel->class->meta->table };
 238
 239     # ok, now extract the columns used as foreign key
 240     my %key_columns = $rel->key_columns;
 241
 242     if (1 != keys %key_columns) {
 243       die "multi keys? we don't support this currently. fix it please";
 244     }
 245
 246     if ($table eq $rel->class->meta->table) {
 247       # self referential foreign keys are a PITA to export correctly. skip!
 248       next;
 249     }
 250
 251     $self->tag('ForeignKey', sub {
 252       $_[0]->tag('Name', $_) for keys %key_columns;
 253       $_[0]->tag('References', $rel->class->meta->table);
 254    });
 255   }
 256 }
 257
 258 sub do_csv_export {
 259   my ($self, $table) = @_;
 260
 261   my $csv = Text::CSV_XS->new({ binary => 1, eol => "\r\n", sep_char => ",", quote_char => '"' });
 262
 263   my ($fh, $filename) = File::Temp::tempfile();
 264   binmode($fh, ':utf8');
 265
 266   $self->files->{"$table.csv"} = $filename;
 267   push @{ $self->tempfiles }, $filename;
 268
 269   # in the right order (primary keys first)
 270   my %cols_by_primary_key = _table_columns($table);
 271   my @columns = (@{ $cols_by_primary_key{1} }, @{ $cols_by_primary_key{0} });
 272   my %col_index = do { my $i = 0; map {; "$_" => $i++ } @columns };
 273
 274   # and normalize date stuff
 275   my @select_tokens = map { (ref $_) =~ /Time/ ? $_->name . '::date' : $_->name } @columns;
 276
 277   my @where_tokens;
 278   my @values;
 279   if ($known_tables{$table}{transdate}) {
 280     if ($self->from) {
 281       push @where_tokens, "$known_tables{$table}{transdate} >= ?";
 282       push @values, $self->from;
 283     }
 284     if ($self->to) {
 285       push @where_tokens, "$known_tables{$table}{transdate} <= ?";
 286       push @values, $self->to;
 287     }
 288   }
 289   if ($known_tables{$table}{tables}) {
 290     my ($col, @col_specs) = @{ $known_tables{$table}{tables} };
 291     my %ids;
 292     for (@col_specs) {
 293       my ($ftable, $fkey) = split /\./, $_;
 294       if (!exists $self->export_ids->{$ftable}{$fkey}) {
 295          # check if we forgot to keep it
 296          if (!grep { $_ eq $fkey } @{ $known_tables{$ftable}{keep} || [] }) {
 297            die "unknown table spec '$_' for table $table, did you forget to keep $fkey in $ftable?"
 298          } else {
 299            # hmm, most likely just an empty set.
 300            $self->export_ids->{$ftable}{$fkey} = {};
 301          }
 302       }
 303       $ids{$_}++ for keys %{ $self->export_ids->{$ftable}{$fkey} };
 304     }
 305     if (keys %ids) {
 306       push @where_tokens, "$col IN (@{[ join ',', ('?') x keys %ids ]})";
 307       push @values, keys %ids;
 308     } else {
 309       push @where_tokens, '1=0';
 310     }
 311   }
 312
 313   my $where_clause = @where_tokens ? 'WHERE ' . join ' AND ', @where_tokens : '';
 314
 315   my $query = "SELECT " . join(', ', @select_tokens) . " FROM $table $where_clause";
 316
 317   my $sth = $::form->get_standard_dbh->prepare($query);
 318   $sth->execute(@values) or die "error executing query $query: " . $sth->errstr;
 319
 320   while (my $row = $sth->fetch) {
 321     for my $keep_col (@{ $known_tables{$table}{keep} || [] }) {
 322       next if !$row->[$col_index{$keep_col}];
 323       $self->export_ids->{$table}{$keep_col} ||= {};
 324       $self->export_ids->{$table}{$keep_col}{$row->[$col_index{$keep_col}]}++;
 325     }
 326     $csv->print($fh, $row) or $csv->error_diag;
 327   }
 328   $sth->finish();
 329 }
 330
 331 sub tag {
 332   my ($self, $tag, $content) = @_;
 333
 334   $self->writer->startTag($tag);
 335   if ('CODE' eq ref $content) {
 336     $content->($self);
 337   } else {
 338     $self->writer->characters($content);
 339   }
 340   $self->writer->endTag;
 341   return $self;
 342 }
 343
 344 sub make_comment {
 345   my $gdpdu_version = API_VERSION();
 346   my $kivi_version  = $::form->read_version;
 347   my $person        = $::myconfig{name};
 348   my $contact       = join ', ',
 349     (t8("Email") . ": $::myconfig{email}" ) x!! $::myconfig{email},
 350     (t8("Tel")   . ": $::myconfig{tel}" )   x!! $::myconfig{tel},
 351     (t8("Fax")   . ": $::myconfig{fax}" )   x!! $::myconfig{fax};
 352
 353   t8('DataSet for GDPdU version #1. Created with kivitendo #2 by #3 (#4)',
 354     $gdpdu_version, $kivi_version, $person, $contact
 355   );
 356 }
 357
 358 sub client_name {
 359   $_[0]->company
 360 }
 361
 362 sub client_location {
 363   $_[0]->location
 364 }
 365
 366 sub sorted_tables {
 367   my ($self) = @_;
 368
 369   my %given = map { $_ => 1 } @{ $self->tables };
 370
 371   grep { $given{$_} } @export_table_order;
 372 }
 373
 374 sub all_tables {
 375   my ($self, $yesno) = @_;
 376
 377   $self->tables(\@export_table_order) if $yesno;
 378 }
 379
 380 sub init_files { +{} }
 381 sub init_export_ids { +{} }
 382 sub init_tempfiles { [] }
 383
 384 sub API_VERSION {
 385   DateTime->new(year => 2002, month => 8, day => 14)->to_kivitendo;
 386 }
 387
 388 sub DESTROY {
 389   unlink $_ for @{ $_[0]->tempfiles || [] };
 390 }
 391
 392 1;
 393
 394 __END__
 395
 396 =encoding utf-8
 397
 398 =head1 NAME
 399
 400 SL::GDPDU - IDEA export generator
 401
 402 =head1 FUNCTIONS
 403
 404 =over 4
 405
 406 =item C<new PARAMS>
 407
 408 Create new export object. C<PARAMS> may contain:
 409
 410 =over 4
 411
 412 =item company
 413
 414 The name of the company, needed for the supplier header
 415
 416 =item location
 417
 418 Location of the company, needed for the suupplier header
 419
 420 =item from
 421
 422 =item to
 423
 424 Will only include records in the specified date range. Data pulled from other
 425 tables will be culled to match what is needed for these records.
 426
 427 =item tables
 428
 429 A list of tables to be exported.
 430
 431 =item all_tables
 432
 433 Alternative to C<tables>, enables all known tables.
 434
 435 =back
 436
 437 =item C<generate_export>
 438
 439 Do the work. Will return an absolut path to a temp file where all export files
 440 are zipped together.
 441
 442 =back
 443
 444 =head1 CAVEATS
 445
 446 =over 4
 447
 448 =item *
 449
 450 Date format is shit. The official docs state that only C<YY>, C<YYYY>, C<MM>,
 451 and C<DD> are supported, timestamps do not exist.
 452
 453 =item *
 454
 455 Number parsing seems to be fragile. Official docs state that behaviour for too
 456 low C<Accuracy> settings is undefined. Accuracy of 0 is not taken to mean
 457 Integer but instead generates a warning for redudancy.
 458
 459 There is no dedicated integer type.
 460
 461 =item *
 462
 463 Currently C<ar> and C<ap> have a foreign key to themself with the name
 464 C<storno_id>. If this foreign key is present in the C<INDEX.XML> then the
 465 storno records have to be too. Since this is extremely awkward to code and
 466 confusing for the examiner as to why there are records outside of the time
 467 range, this export skips all self-referential foreign keys.
 468
 469 =item *
 470
 471 Documentation for foreign keys is extremely weird. Instead of giving column
 472 maps it assumes that foreign keys map to the primary keys given for the target
 473 table, and in that order. Foreign keys to keys that are not primary seems to be
 474 impossible. Changing type is also not allowed (which actually makes sense).
 475 Hopefully there are no bugs there.
 476
 477 =item *
 478
 479 It's currently disallowed to export the whole dataset. It's not clear if this
 480 is wanted.
 481
 482 =item *
 483
 484 It is not possible to set an empty C<DigiGroupingSymbol> since then the import
 485 will just work with the default. This was asked in their forum, and the
 486 response actually was:
 487
 488   Einfache Lösung: Definieren Sie das Tausendertrennzeichen als Komma, auch
 489   wenn es nicht verwendet wird. Sollten Sie das Komma bereits als Feldtrenner
 490   verwenden, so wählen Sie als Tausendertrennzeichen eine Alternative wie das
 491   Pipe-Symbol |.
 492
 493 L<http://www.gdpdu-portal.com/forum/index.php?mode=thread&id=1392>
 494
 495 =item *
 496
 497 It is not possible to define a C<RecordDelimiter> with XML entities. &#x0A;
 498 generates the error message:
 499
 500   C<RecordDelimiter>-Wert (&#x0A;) sollte immer aus ein oder zwei Zeichen
 501   bestehen.
 502
 503 Instead we just use the implicit default RecordDelimiter CRLF.
 504
 505 =item *
 506
 507 Not confirmed yet:
 508
 509 Foreign keys seem only to work with previously defined tables (which would be
 510 utterly insane).
 511
 512 =back
 513
 514 =head1 AUTHOR
 515
 516 Sven Schöling E<lt>s.schoeling@linet-services.deE<gt>
 517
 518 =cut