SL/GDPDU.pm

   1 package SL::GDPDU;
   2
   3 # TODO:
   4 # optional: background jobable
   5
   6 use strict;
   7 use utf8;
   8
   9 use parent qw(Rose::Object);
  10
  11 use Text::CSV_XS;
  12 use XML::Writer;
  13 use Archive::Zip;
  14 use File::Temp ();
  15 use File::Spec ();
  16 use List::UtilsBy qw(partition_by);
  17
  18 use SL::DB::Helper::ALL; # since we work on meta data, we need everything
  19 use SL::DB::Helper::Mappings;
  20 use SL::Locale::String qw(t8);
  21
  22 use Rose::Object::MakeMethods::Generic (
  23   scalar                  => [ qw(from to tables writer company location) ],
  24   'scalar --get_set_init' => [ qw(files tempfiles export_ids) ],
  25 );
  26
  27 # in this we find:
  28 # key:         table name
  29 # name:        short name, translated
  30 # description: long description, translated
  31 # transdate:   column used to filter from/to, empty if table is filtered otherwise
  32 # keep:        arrayref of columns that should be saved for further referencing
  33 # tables:      arrayref with one column and one or many table.column references that were kept earlier
  34 my %known_tables = (
  35   ar                    => { name => t8('Invoice'),                 description => t8('Sales Invoices and Accounts Receivables'),   keep => [ qw(id customer_id vendor_id) ], transdate => 'transdate', },
  36   ap                    => { name => t8('Purchase Invoice'),        description => t8('Purchase Invoices and Accounts Payables'),   keep => [ qw(id customer_id vendor_id) ], transdate => 'transdate', },
  37   oe                    => { name => t8('Orders'),                  description => t8('Orders and Quotations, Sales and Purchase'), keep => [ qw(id customer_id vendor_id) ], transdate => 'transdate', },
  38   delivery_orders       => { name => t8('Delivery Orders'),         description => t8('Delivery Orders'),                           keep => [ qw(id customer_id vendor_id) ], transdate => 'transdate', },
  39   gl                    => { name => t8('General Ledger'),          description => t8('General Ledger Entries'),                    keep => [ qw(id) ],                       transdate => 'transdate', },
  40   invoice               => { name => t8('Invoice Positions'),       description => t8('Positions for all Invoices'),                keep => [ qw(parts_id) ], tables => [ trans_id => "ar.id", "ap.id" ] },
  41   orderitems            => { name => t8('OrderItems'),              description => t8('Positions for all Orders'),                  keep => [ qw(parts_id) ], tables => [ trans_id => "oe.id" ] },
  42   delivery_order_items  => { name => t8('Delivery Order Items'),    description => t8('Positions for all Delivery Orders'),                      keep => [ qw(parts_id) ], tables => [ delivery_order_id => "delivery_orders.id" ] },
  43   acc_trans             => { name => t8('Transactions'),            description => t8('All general ledger entries'),                keep => [ qw(chart_id) ], tables => [ trans_id => "ar.id", "ap.id", "oe.id", "delivery_orders.id", "gl.id" ] },
  44   chart                 => { name => t8('Charts'),                  description => t8('Chart of Accounts'),                                                   tables => [ id => "acc_trans.chart_id" ] },
  45   customer              => { name => t8('Customers'),               description => t8('Customer Master Data'),                                                tables => [ id => "ar.customer_id", "ap.customer_id", "oe.customer_id", "delivery_orders.customer_id" ] },
  46   vendor                => { name => t8('Vendors'),                 description => t8('Vendor Master Data'),                                                  tables => [ id => "ar.vendor_id",   "ap.vendor_id",   "oe.vendor_id",   "delivery_orders.vendor_id" ] },
  47   parts                 => { name => t8('Parts'),                   description => t8('Parts, Services, and Assemblies'),                                     tables => [ id => "invoice.parts_id", "orderitems.parts_id", "delivery_order_items.parts_id" ] },
  48 );
  49
  50 # rows in this listing are tiers.
  51 # tables may depend on ids in a tier above them
  52 my @export_table_order = qw(
  53   ar ap gl oe delivery_orders
  54   invoice orderitems delivery_order_items
  55   customer vendor
  56   parts
  57   acc_trans
  58   chart
  59 );
  60
  61 # needed because the standard dbh sets datestyle german and we don't want to mess with that
  62 my $date_format = 'DD.MM.YYYY';
  63
  64 # callbacks that produce the xml spec for these column types
  65 my %column_types = (
  66   'Rose::DB::Object::Metadata::Column::Integer'   => sub { $_[0]->tag('Numeric') },  # see Caveats for integer issues
  67   'Rose::DB::Object::Metadata::Column::BigInt'    => sub { $_[0]->tag('Numeric') },  # see Caveats for integer issues
  68   'Rose::DB::Object::Metadata::Column::Text'      => sub { $_[0]->tag('AlphaNumeric') },
  69   'Rose::DB::Object::Metadata::Column::Varchar'   => sub { $_[0]->tag('AlphaNumeric') },
  70   'Rose::DB::Object::Metadata::Column::Character' => sub { $_[0]->tag('AlphaNumeric') },
  71   'Rose::DB::Object::Metadata::Column::Numeric'   => sub { $_[0]->tag('Numeric', sub { $_[0]->tag('Accuracy', 5) }) },
  72   'Rose::DB::Object::Metadata::Column::Date'      => sub { $_[0]->tag('Date', sub { $_[0]->tag('Format', $date_format) }) },
  73   'Rose::DB::Object::Metadata::Column::Timestamp' => sub { $_[0]->tag('Date', sub { $_[0]->tag('Format', $date_format) }) },
  74   'Rose::DB::Object::Metadata::Column::Float'     => sub { $_[0]->tag('Numeric') },
  75   'Rose::DB::Object::Metadata::Column::Boolean'   => sub { $_[0]
  76     ->tag('AlphaNumeric')
  77     ->tag('Map', sub { $_[0]
  78       ->tag('From', 1)
  79       ->tag('To', t8('true'))
  80     })
  81     ->tag('Map', sub { $_[0]
  82       ->tag('From', 0)
  83       ->tag('To', t8('false'))
  84     })
  85     ->tag('Map', sub { $_[0]
  86       ->tag('From', '')
  87       ->tag('To', t8('false'))
  88     })
  89   },
  90 );
  91
  92 sub generate_export {
  93   my ($self) = @_;
  94
  95   # verify data
  96   $self->from && 'DateTime' eq ref $self->from or die 'need from date';
  97   $self->to   && 'DateTime' eq ref $self->to   or die 'need to date';
  98   $self->from <= $self->to                     or die 'from date must be earlier or equal than to date';
  99   $self->tables && @{ $self->tables }          or die 'need tables';
 100   for (@{ $self->tables }) {
 101     next if $known_tables{$_};
 102     die "unknown table '$_'";
 103   }
 104
 105   # get data from those tables and save to csv
 106   # for that we need to build queries that fetch all the columns
 107   for ($self->sorted_tables) {
 108     $self->do_csv_export($_);
 109   }
 110
 111   # write xml file
 112   $self->do_xml_file;
 113
 114   # add dtd
 115   $self->files->{'gdpdu-01-08-2002.dtd'} = File::Spec->catfile('users', 'gdpdu-01-08-2002.dtd');
 116
 117   # make zip
 118   my ($fh, $zipfile) = File::Temp::tempfile();
 119   my $zip            = Archive::Zip->new;
 120
 121   while (my ($name, $file) = each %{ $self->files }) {
 122     $zip->addFile($file, $name);
 123   }
 124
 125   $zip->writeToFileHandle($fh) == Archive::Zip::AZ_OK() or die 'error writing zip file';
 126   close($fh);
 127
 128   return $zipfile;
 129 }
 130
 131 sub do_xml_file {
 132   my ($self) = @_;
 133
 134   my ($fh, $filename) = File::Temp::tempfile();
 135   binmode($fh, ':utf8');
 136
 137   $self->files->{'INDEX.XML'} = $filename;
 138   push @{ $self->tempfiles }, $filename;
 139
 140   my $writer = XML::Writer->new(
 141     OUTPUT      => $fh,
 142     ENCODING    => 'UTF-8',
 143   );
 144
 145   $self->writer($writer);
 146   $self->writer->xmlDecl('UTF-8');
 147   $self->writer->doctype('DataSet', undef, "gdpdu-01-08-2002.dtd");
 148   $self->tag('DataSet', sub { $self
 149     ->tag('Version', '1.0')
 150     ->tag('DataSupplier', sub { $self
 151       ->tag('Name', $self->client_name)
 152       ->tag('Location', $self->client_location)
 153       ->tag('Comment', $self->make_comment)
 154     })
 155     ->tag('Media', sub { $self
 156       ->tag('Name', t8('DataSet #1', 1));
 157       for (reverse $self->sorted_tables) { $self  # see CAVEATS for table order
 158         ->table($_)
 159       }
 160     })
 161   });
 162   close($fh);
 163 }
 164
 165 sub table {
 166   my ($self, $table) = @_;
 167   my $writer = $self->writer;
 168
 169   $self->tag('Table', sub { $self
 170     ->tag('URL', "$table.csv")
 171     ->tag('Name', $known_tables{$table}{name})
 172     ->tag('Description', $known_tables{$table}{description})
 173     ->tag('Validity', sub { $self
 174       ->tag('Range', sub { $self
 175         ->tag('From', $self->from->to_kivitendo(dateformat => 'dd.mm.yyyy'))
 176         ->tag('To',   $self->to->to_kivitendo(dateformat => 'dd.mm.yyyy'))
 177       })
 178       ->tag('Format', $date_format)
 179     })
 180     ->tag('UTF8')
 181     ->tag('DecimalSymbol', '.')
 182     ->tag('DigitGroupingSymbol', '|')     # see CAVEATS in documentation
 183     ->tag('VariableLength', sub { $self
 184       ->tag('ColumnDelimiter', ',')       # see CAVEATS for missing RecordDelimiter
 185       ->tag('TextEncapsulator', '"')
 186       ->columns($table)
 187       ->foreign_keys($table)
 188     })
 189   });
 190 }
 191
 192 sub _table_columns {
 193   my ($table) = @_;
 194   my $package = SL::DB::Helper::Mappings::get_package_for_table($table);
 195
 196   # PrimaryKeys must come before regular columns, so partition first
 197   partition_by { 1 * $_->is_primary_key_member } $package->meta->columns;
 198 }
 199
 200 sub columns {
 201   my ($self, $table) = @_;
 202
 203   my %cols_by_primary_key = _table_columns($table);
 204
 205   for my $column (@{ $cols_by_primary_key{1} }) {
 206     my $type = $column_types{ ref $column };
 207
 208     die "unknown col type @{[ ref $column ]}" unless $type;
 209
 210     $self->tag('VariablePrimaryKey', sub { $self
 211       ->tag('Name', $column->name);
 212       $type->($self);
 213     })
 214   }
 215
 216   for my $column (@{ $cols_by_primary_key{0} }) {
 217     my $type = $column_types{ ref $column };
 218
 219     die "unknown col type @{[ ref $column]}" unless $type;
 220
 221     $self->tag('VariableColumn', sub { $self
 222       ->tag('Name', $column->name);
 223       $type->($self);
 224     })
 225   }
 226
 227   $self;
 228 }
 229
 230 sub foreign_keys {
 231   my ($self, $table) = @_;
 232   my $package = SL::DB::Helper::Mappings::get_package_for_table($table);
 233
 234   my %requested = map { $_ => 1 } @{ $self->tables };
 235
 236   for my $rel ($package->meta->foreign_keys) {
 237     next unless $requested{ $rel->class->meta->table };
 238
 239     # ok, now extract the columns used as foreign key
 240     my %key_columns = $rel->key_columns;
 241
 242     if (1 != keys %key_columns) {
 243       die "multi keys? we don't support this currently. fix it please";
 244     }
 245
 246     if ($table eq $rel->class->meta->table) {
 247       # self referential foreign keys are a PITA to export correctly. skip!
 248       next;
 249     }
 250
 251     $self->tag('ForeignKey', sub {
 252       $_[0]->tag('Name', $_) for keys %key_columns;
 253       $_[0]->tag('References', $rel->class->meta->table);
 254    });
 255   }
 256 }
 257
 258 sub do_csv_export {
 259   my ($self, $table) = @_;
 260
 261   my $csv = Text::CSV_XS->new({ binary => 1, eol => "\r\n", sep_char => ",", quote_char => '"' });
 262
 263   my ($fh, $filename) = File::Temp::tempfile();
 264   binmode($fh, ':utf8');
 265
 266   $self->files->{"$table.csv"} = $filename;
 267   push @{ $self->tempfiles }, $filename;
 268
 269   # in the right order (primary keys first)
 270   my %cols_by_primary_key = _table_columns($table);
 271   my @columns = (@{ $cols_by_primary_key{1} }, @{ $cols_by_primary_key{0} });
 272   my %col_index = do { my $i = 0; map {; "$_" => $i++ } @columns };
 273
 274   # and normalize date stuff
 275   my @select_tokens = map { (ref $_) =~ /Time/ ? $_->name . '::date' : $_->name } @columns;
 276
 277   my @where_tokens;
 278   my @values;
 279   if ($known_tables{$table}{transdate}) {
 280     if ($self->from) {
 281       push @where_tokens, "$known_tables{$table}{transdate} >= ?";
 282       push @values, $self->from;
 283     }
 284     if ($self->to) {
 285       push @where_tokens, "$known_tables{$table}{transdate} <= ?";
 286       push @values, $self->to;
 287     }
 288   }
 289   if ($known_tables{$table}{tables}) {
 290     my ($col, @col_specs) = @{ $known_tables{$table}{tables} };
 291     my %ids;
 292     for (@col_specs) {
 293       my ($ftable, $fkey) = split /\./, $_;
 294       if (!exists $self->export_ids->{$ftable}{$fkey}) {
 295          # check if we forgot to keep it
 296          if (!grep { $_ eq $fkey } @{ $known_tables{$ftable}{keep} || [] }) {
 297            die "unknown table spec '$_' for table $table, did you forget to keep $fkey in $ftable?"
 298          } else {
 299            # hmm, most likely just an empty set.
 300            $self->export_ids->{$ftable}{$fkey} = {};
 301          }
 302       }
 303       $ids{$_}++ for keys %{ $self->export_ids->{$ftable}{$fkey} };
 304     }
 305     if (keys %ids) {
 306       push @where_tokens, "$col IN (@{[ join ',', ('?') x keys %ids ]})";
 307       push @values, keys %ids;
 308     } else {
 309       push @where_tokens, '1=0';
 310     }
 311   }
 312
 313   my $where_clause = @where_tokens ? 'WHERE ' . join ' AND ', @where_tokens : '';
 314
 315   my $query = "SELECT " . join(', ', @select_tokens) . " FROM $table $where_clause";
 316
 317   my $sth = $::form->get_standard_dbh->prepare($query);
 318   $sth->execute(@values) or die "error executing query $query: " . $sth->errstr;
 319
 320   while (my $row = $sth->fetch) {
 321     for my $keep_col (@{ $known_tables{$table}{keep} || [] }) {
 322       next if !$row->[$col_index{$keep_col}];
 323       $self->export_ids->{$table}{$keep_col} ||= {};
 324       $self->export_ids->{$table}{$keep_col}{$row->[$col_index{$keep_col}]}++;
 325     }
 326     s/\r\n/ /g for @$row; # see CAVEATS
 327
 328     $csv->print($fh, $row) or $csv->error_diag;
 329   }
 330   $sth->finish();
 331 }
 332
 333 sub tag {
 334   my ($self, $tag, $content) = @_;
 335
 336   $self->writer->startTag($tag);
 337   if ('CODE' eq ref $content) {
 338     $content->($self);
 339   } else {
 340     $self->writer->characters($content);
 341   }
 342   $self->writer->endTag;
 343   return $self;
 344 }
 345
 346 sub make_comment {
 347   my $gdpdu_version = API_VERSION();
 348   my $kivi_version  = $::form->read_version;
 349   my $person        = $::myconfig{name};
 350   my $contact       = join ', ',
 351     (t8("Email") . ": $::myconfig{email}" ) x!! $::myconfig{email},
 352     (t8("Tel")   . ": $::myconfig{tel}" )   x!! $::myconfig{tel},
 353     (t8("Fax")   . ": $::myconfig{fax}" )   x!! $::myconfig{fax};
 354
 355   t8('DataSet for GDPdU version #1. Created with kivitendo #2 by #3 (#4)',
 356     $gdpdu_version, $kivi_version, $person, $contact
 357   );
 358 }
 359
 360 sub client_name {
 361   $_[0]->company
 362 }
 363
 364 sub client_location {
 365   $_[0]->location
 366 }
 367
 368 sub sorted_tables {
 369   my ($self) = @_;
 370
 371   my %given = map { $_ => 1 } @{ $self->tables };
 372
 373   grep { $given{$_} } @export_table_order;
 374 }
 375
 376 sub all_tables {
 377   my ($self, $yesno) = @_;
 378
 379   $self->tables(\@export_table_order) if $yesno;
 380 }
 381
 382 sub init_files { +{} }
 383 sub init_export_ids { +{} }
 384 sub init_tempfiles { [] }
 385
 386 sub API_VERSION {
 387   DateTime->new(year => 2002, month => 8, day => 14)->to_kivitendo;
 388 }
 389
 390 sub DESTROY {
 391   unlink $_ for @{ $_[0]->tempfiles || [] };
 392 }
 393
 394 1;
 395
 396 __END__
 397
 398 =encoding utf-8
 399
 400 =head1 NAME
 401
 402 SL::GDPDU - IDEA export generator
 403
 404 =head1 FUNCTIONS
 405
 406 =over 4
 407
 408 =item C<new PARAMS>
 409
 410 Create new export object. C<PARAMS> may contain:
 411
 412 =over 4
 413
 414 =item company
 415
 416 The name of the company, needed for the supplier header
 417
 418 =item location
 419
 420 Location of the company, needed for the suupplier header
 421
 422 =item from
 423
 424 =item to
 425
 426 Will only include records in the specified date range. Data pulled from other
 427 tables will be culled to match what is needed for these records.
 428
 429 =item tables
 430
 431 A list of tables to be exported.
 432
 433 =item all_tables
 434
 435 Alternative to C<tables>, enables all known tables.
 436
 437 =back
 438
 439 =item C<generate_export>
 440
 441 Do the work. Will return an absolut path to a temp file where all export files
 442 are zipped together.
 443
 444 =back
 445
 446 =head1 CAVEATS
 447
 448 =over 4
 449
 450 =item *
 451
 452 Date format is shit. The official docs state that only C<YY>, C<YYYY>, C<MM>,
 453 and C<DD> are supported, timestamps do not exist.
 454
 455 =item *
 456
 457 Number parsing seems to be fragile. Official docs state that behaviour for too
 458 low C<Accuracy> settings is undefined. Accuracy of 0 is not taken to mean
 459 Integer but instead generates a warning for redudancy.
 460
 461 There is no dedicated integer type.
 462
 463 =item *
 464
 465 Currently C<ar> and C<ap> have a foreign key to themself with the name
 466 C<storno_id>. If this foreign key is present in the C<INDEX.XML> then the
 467 storno records have to be too. Since this is extremely awkward to code and
 468 confusing for the examiner as to why there are records outside of the time
 469 range, this export skips all self-referential foreign keys.
 470
 471 =item *
 472
 473 Documentation for foreign keys is extremely weird. Instead of giving column
 474 maps it assumes that foreign keys map to the primary keys given for the target
 475 table, and in that order. Foreign keys to keys that are not primary seems to be
 476 impossible. Changing type is also not allowed (which actually makes sense).
 477 Hopefully there are no bugs there.
 478
 479 =item *
 480
 481 It's currently disallowed to export the whole dataset. It's not clear if this
 482 is wanted.
 483
 484 =item *
 485
 486 It is not possible to set an empty C<DigiGroupingSymbol> since then the import
 487 will just work with the default. This was asked in their forum, and the
 488 response actually was:
 489
 490   Einfache Lösung: Definieren Sie das Tausendertrennzeichen als Komma, auch
 491   wenn es nicht verwendet wird. Sollten Sie das Komma bereits als Feldtrenner
 492   verwenden, so wählen Sie als Tausendertrennzeichen eine Alternative wie das
 493   Pipe-Symbol |.
 494
 495 L<http://www.gdpdu-portal.com/forum/index.php?mode=thread&id=1392>
 496
 497 =item *
 498
 499 It is not possible to define a C<RecordDelimiter> with XML entities. &#x0A;
 500 generates the error message:
 501
 502   C<RecordDelimiter>-Wert (&#x0A;) sollte immer aus ein oder zwei Zeichen
 503   bestehen.
 504
 505 Instead we just use the implicit default RecordDelimiter CRLF.
 506
 507 =item *
 508
 509 Not confirmed yet:
 510
 511 Foreign keys seem only to work with previously defined tables (which would be
 512 utterly insane).
 513
 514 =item *
 515
 516 The CSV import library used in IDEA is not able to parse newlines (or more
 517 exactly RecordDelimiter) in data. So this export substites all of these with
 518 spaces.
 519
 520 =back
 521
 522 =head1 AUTHOR
 523
 524 Sven Schöling E<lt>s.schoeling@linet-services.deE<gt>
 525
 526 =cut