SL/GDPDU.pm

   1 package SL::GDPDU;
   2
   3 # TODO:
   4 # optional: background jobable
   5
   6 use strict;
   7 use utf8;
   8
   9 use parent qw(Rose::Object);
  10
  11 use Text::CSV_XS;
  12 use XML::Writer;
  13 use Archive::Zip;
  14 use File::Temp ();
  15 use File::Spec ();
  16 use List::UtilsBy qw(partition_by);
  17
  18 use SL::DB::Helper::ALL; # since we work on meta data, we need everything
  19 use SL::DB::Helper::Mappings;
  20 use SL::Locale::String qw(t8);
  21
  22 use Rose::Object::MakeMethods::Generic (
  23   scalar                  => [ qw(from to tables writer company location) ],
  24   'scalar --get_set_init' => [ qw(files tempfiles export_ids) ],
  25 );
  26
  27 # in this we find:
  28 # key:         table name
  29 # name:        short name, translated
  30 # description: long description, translated
  31 # transdate:   column used to filter from/to, empty if table is filtered otherwise
  32 # keep:        arrayref of columns that should be saved for further referencing
  33 # tables:      arrayref with one column and one or many table.column references that were kept earlier
  34 my %known_tables = (
  35   ar                    => { name => t8('Invoice'),                 description => t8('Sales Invoices and Accounts Receivables'),   keep => [ qw(id customer_id vendor_id) ], transdate => 'transdate', },
  36   ap                    => { name => t8('Purchase Invoice'),        description => t8('Purchase Invoices and Accounts Payables'),   keep => [ qw(id customer_id vendor_id) ], transdate => 'transdate', },
  37   oe                    => { name => t8('Orders'),                  description => t8('Orders and Quotations, Sales and Purchase'), keep => [ qw(id customer_id vendor_id) ], transdate => 'transdate', },
  38   delivery_orders       => { name => t8('Delivery Orders'),         description => t8('Delivery Orders'),                           keep => [ qw(id customer_id vendor_id) ], transdate => 'transdate', },
  39   gl                    => { name => t8('General Ledger'),          description => t8('General Ledger Entries'),                    keep => [ qw(id) ],                       transdate => 'transdate', },
  40   invoice               => { name => t8('Invoice Positions'),       description => t8('Positions for all Invoices'),                keep => [ qw(parts_id) ], tables => [ trans_id => "ar.id", "ap.id" ] },
  41   orderitems            => { name => t8('OrderItems'),              description => t8('Positions for all Orders'),                  keep => [ qw(parts_id) ], tables => [ trans_id => "oe.id" ] },
  42   delivery_order_items  => { name => t8('Delivery Order Items'),    description => t8('Positions for all Delivery Orders'),                      keep => [ qw(parts_id) ], tables => [ delivery_order_id => "delivery_orders.id" ] },
  43   acc_trans             => { name => t8('Transactions'),            description => t8('All general ledger entries'),                keep => [ qw(chart_id) ], tables => [ trans_id => "ar.id", "ap.id", "oe.id", "delivery_orders.id", "gl.id" ] },
  44   chart                 => { name => t8('Charts'),                  description => t8('Chart of Accounts'),                                                   tables => [ id => "acc_trans.chart_id" ] },
  45   customer              => { name => t8('Customers'),               description => t8('Customer Master Data'),                                                tables => [ id => "ar.customer_id", "ap.customer_id", "oe.customer_id", "delivery_orders.customer_id" ] },
  46   vendor                => { name => t8('Vendors'),                 description => t8('Vendor Master Data'),                                                  tables => [ id => "ar.vendor_id",   "ap.vendor_id",   "oe.vendor_id",   "delivery_orders.vendor_id" ] },
  47   parts                 => { name => t8('Parts'),                   description => t8('Parts, Services, and Assemblies'),                                     tables => [ id => "invoice.parts_id", "orderitems.parts_id", "delivery_order_items.parts_id" ] },
  48 );
  49
  50 # rows in this listing are tiers.
  51 # tables may depend on ids in a tier above them
  52 my @export_table_order = qw(
  53   ar ap gl oe delivery_orders
  54   invoice orderitems delivery_order_items
  55   customer vendor
  56   parts
  57   acc_trans
  58   chart
  59 );
  60
  61 # needed because the standard dbh sets datestyle german and we don't want to mess with that
  62 my $date_format = 'DD.MM.YYYY';
  63
  64 # callbacks that produce the xml spec for these column types
  65 my %column_types = (
  66   'Rose::DB::Object::Metadata::Column::Integer'   => sub { $_[0]->tag('Numeric') },  # see Caveats for integer issues
  67   'Rose::DB::Object::Metadata::Column::BigInt'    => sub { $_[0]->tag('Numeric') },  # see Caveats for integer issues
  68   'Rose::DB::Object::Metadata::Column::Text'      => sub { $_[0]->tag('AlphaNumeric') },
  69   'Rose::DB::Object::Metadata::Column::Varchar'   => sub { $_[0]->tag('AlphaNumeric') },
  70   'Rose::DB::Object::Metadata::Column::Character' => sub { $_[0]->tag('AlphaNumeric') },
  71   'Rose::DB::Object::Metadata::Column::Numeric'   => sub { $_[0]->tag('Numeric', sub { $_[0]->tag('Accuracy', 5) }) },
  72   'Rose::DB::Object::Metadata::Column::Date'      => sub { $_[0]->tag('Date', sub { $_[0]->tag('Format', $date_format) }) },
  73   'Rose::DB::Object::Metadata::Column::Timestamp' => sub { $_[0]->tag('Date', sub { $_[0]->tag('Format', $date_format) }) },
  74   'Rose::DB::Object::Metadata::Column::Float'     => sub { $_[0]->tag('Numeric') },
  75   'Rose::DB::Object::Metadata::Column::Boolean'   => sub { $_[0]
  76     ->tag('AlphaNumeric')
  77     ->tag('Map', sub { $_[0]
  78       ->tag('From', 1)
  79       ->tag('To', t8('true'))
  80     })
  81     ->tag('Map', sub { $_[0]
  82       ->tag('From', 0)
  83       ->tag('To', t8('false'))
  84     })
  85     ->tag('Map', sub { $_[0]
  86       ->tag('From', '')
  87       ->tag('To', t8('false'))
  88     })
  89   },
  90 );
  91
  92 sub generate_export {
  93   my ($self) = @_;
  94
  95   # verify data
  96   $self->from && 'DateTime' eq ref $self->from or die 'need from date';
  97   $self->to   && 'DateTime' eq ref $self->to   or die 'need to date';
  98   $self->from <= $self->to                     or die 'from date must be earlier or equal than to date';
  99   $self->tables && @{ $self->tables }          or die 'need tables';
 100   for (@{ $self->tables }) {
 101     next if $known_tables{$_};
 102     die "unknown table '$_'";
 103   }
 104
 105   # get data from those tables and save to csv
 106   # for that we need to build queries that fetch all the columns
 107   for ($self->sorted_tables) {
 108     $self->do_csv_export($_);
 109   }
 110
 111   # write xml file
 112   $self->do_xml_file;
 113
 114   # add dtd
 115   $self->files->{'gdpdu-01-08-2002.dtd'} = File::Spec->catfile('users', 'gdpdu-01-08-2002.dtd');
 116
 117   # make zip
 118   my ($fh, $zipfile) = File::Temp::tempfile();
 119   my $zip            = Archive::Zip->new;
 120
 121   while (my ($name, $file) = each %{ $self->files }) {
 122     $zip->addFile($file, $name);
 123   }
 124
 125   $zip->writeToFileHandle($fh) == Archive::Zip::AZ_OK() or die 'error writing zip file';
 126   close($fh);
 127
 128   return $zipfile;
 129 }
 130
 131 sub do_xml_file {
 132   my ($self) = @_;
 133
 134   my ($fh, $filename) = File::Temp::tempfile();
 135   binmode($fh, ':utf8');
 136
 137   $self->files->{'INDEX.XML'} = $filename;
 138   push @{ $self->tempfiles }, $filename;
 139
 140   my $writer = XML::Writer->new(
 141     OUTPUT      => $fh,
 142     ENCODING    => 'UTF-8',
 143   );
 144
 145   $self->writer($writer);
 146   $self->writer->xmlDecl('UTF-8');
 147   $self->writer->doctype('DataSet', undef, "gdpdu-01-08-2002.dtd");
 148   $self->tag('DataSet', sub { $self
 149     ->tag('Version', '1.0')
 150     ->tag('DataSupplier', sub { $self
 151       ->tag('Name', $self->client_name)
 152       ->tag('Location', $self->client_location)
 153       ->tag('Comment', $self->make_comment)
 154     })
 155     ->tag('Media', sub { $self
 156       ->tag('Name', t8('DataSet #1', 1));
 157       for (@{ $self->tables }) { $self
 158         ->table($_)
 159       }
 160     })
 161   });
 162   close($fh);
 163 }
 164
 165 sub table {
 166   my ($self, $table) = @_;
 167   my $writer = $self->writer;
 168
 169   $self->tag('Table', sub { $self
 170     ->tag('URL', "$table.csv")
 171     ->tag('Name', $known_tables{$table}{name})
 172     ->tag('Description', $known_tables{$table}{description})
 173     ->tag('Validity', sub { $self
 174       ->tag('Range', sub { $self
 175         ->tag('From', $self->from->to_kivitendo(dateformat => 'dd.mm.yyyy'))
 176         ->tag('To',   $self->to->to_kivitendo(dateformat => 'dd.mm.yyyy'))
 177       })
 178       ->tag('Format', $date_format)
 179     })
 180     ->tag('DecimalSymbol', '.')
 181     ->tag('DigitGroupingSymbol', '|')     # see CAVEATS in documentation
 182     ->tag('VariableLength', sub { $self
 183       ->tag('ColumnDelimiter', ',')       # see CAVEATS for missing RecordDelimiter
 184       ->tag('TextEncapsulator', '"')
 185       ->columns($table)
 186       ->foreign_keys($table)
 187     })
 188   });
 189 }
 190
 191 sub _table_columns {
 192   my ($table) = @_;
 193   my $package = SL::DB::Helper::Mappings::get_package_for_table($table);
 194
 195   # PrimaryKeys must come before regular columns, so partition first
 196   partition_by { 1 * $_->is_primary_key_member } $package->meta->columns;
 197 }
 198
 199 sub columns {
 200   my ($self, $table) = @_;
 201
 202   my %cols_by_primary_key = _table_columns($table);
 203
 204   for my $column (@{ $cols_by_primary_key{1} }) {
 205     my $type = $column_types{ ref $column };
 206
 207     die "unknown col type @{[ ref $column ]}" unless $type;
 208
 209     $self->tag('VariablePrimaryKey', sub { $self
 210       ->tag('Name', $column->name);
 211       $type->($self);
 212     })
 213   }
 214
 215   for my $column (@{ $cols_by_primary_key{0} }) {
 216     my $type = $column_types{ ref $column };
 217
 218     die "unknown col type @{[ ref $column]}" unless $type;
 219
 220     $self->tag('VariableColumn', sub { $self
 221       ->tag('Name', $column->name);
 222       $type->($self);
 223     })
 224   }
 225
 226   $self;
 227 }
 228
 229 sub foreign_keys {
 230   my ($self, $table) = @_;
 231   my $package = SL::DB::Helper::Mappings::get_package_for_table($table);
 232
 233   my %requested = map { $_ => 1 } @{ $self->tables };
 234
 235   for my $rel ($package->meta->foreign_keys) {
 236     next unless $requested{ $rel->class->meta->table };
 237
 238     # ok, now extract the columns used as foreign key
 239     my %key_columns = $rel->key_columns;
 240
 241     if (1 != keys %key_columns) {
 242       die "multi keys? we don't support this currently. fix it please";
 243     }
 244
 245     if ($table eq $rel->class->meta->table) {
 246       # self referential foreign keys are a PITA to export correctly. skip!
 247       next;
 248     }
 249
 250     $self->tag('ForeignKey', sub {
 251       $_[0]->tag('Name', $_) for keys %key_columns;
 252       $_[0]->tag('References', $rel->class->meta->table);
 253    });
 254   }
 255 }
 256
 257 sub do_csv_export {
 258   my ($self, $table) = @_;
 259
 260   my $csv = Text::CSV_XS->new({ binary => 1, eol => "\r\n", sep_char => ",", quote_char => '"' });
 261
 262   my ($fh, $filename) = File::Temp::tempfile();
 263   binmode($fh, ':utf8');
 264
 265   $self->files->{"$table.csv"} = $filename;
 266   push @{ $self->tempfiles }, $filename;
 267
 268   # in the right order (primary keys first)
 269   my %cols_by_primary_key = _table_columns($table);
 270   my @columns = (@{ $cols_by_primary_key{1} }, @{ $cols_by_primary_key{0} });
 271   my %col_index = do { my $i = 0; map {; "$_" => $i++ } @columns };
 272
 273   # and normalize date stuff
 274   my @select_tokens = map { (ref $_) =~ /Time/ ? $_->name . '::date' : $_->name } @columns;
 275
 276   my @where_tokens;
 277   my @values;
 278   if ($known_tables{$table}{transdate}) {
 279     if ($self->from) {
 280       push @where_tokens, "$known_tables{$table}{transdate} >= ?";
 281       push @values, $self->from;
 282     }
 283     if ($self->to) {
 284       push @where_tokens, "$known_tables{$table}{transdate} <= ?";
 285       push @values, $self->to;
 286     }
 287   }
 288   if ($known_tables{$table}{tables}) {
 289     my ($col, @col_specs) = @{ $known_tables{$table}{tables} };
 290     my %ids;
 291     for (@col_specs) {
 292       my ($ftable, $fkey) = split /\./, $_;
 293       if (!exists $self->export_ids->{$ftable}{$fkey}) {
 294          # check if we forgot to keep it
 295          if (!grep { $_ eq $fkey } @{ $known_tables{$ftable}{keep} || [] }) {
 296            die "unknown table spec '$_' for table $table, did you forget to keep $fkey in $ftable?"
 297          } else {
 298            # hmm, most likely just an empty set.
 299            $self->export_ids->{$ftable}{$fkey} = {};
 300          }
 301       }
 302       $ids{$_}++ for keys %{ $self->export_ids->{$ftable}{$fkey} };
 303     }
 304     if (keys %ids) {
 305       push @where_tokens, "$col IN (@{[ join ',', ('?') x keys %ids ]})";
 306       push @values, keys %ids;
 307     } else {
 308       push @where_tokens, '1=0';
 309     }
 310   }
 311
 312   my $where_clause = @where_tokens ? 'WHERE ' . join ' AND ', @where_tokens : '';
 313
 314   my $query = "SELECT " . join(', ', @select_tokens) . " FROM $table $where_clause";
 315
 316   my $sth = $::form->get_standard_dbh->prepare($query);
 317   $sth->execute(@values) or die "error executing query $query: " . $sth->errstr;
 318
 319   while (my $row = $sth->fetch) {
 320     for my $keep_col (@{ $known_tables{$table}{keep} || [] }) {
 321       next if !$row->[$col_index{$keep_col}];
 322       $self->export_ids->{$table}{$keep_col} ||= {};
 323       $self->export_ids->{$table}{$keep_col}{$row->[$col_index{$keep_col}]}++;
 324     }
 325     $csv->print($fh, $row) or $csv->error_diag;
 326   }
 327   $sth->finish();
 328 }
 329
 330 sub tag {
 331   my ($self, $tag, $content) = @_;
 332
 333   $self->writer->startTag($tag);
 334   if ('CODE' eq ref $content) {
 335     $content->($self);
 336   } else {
 337     $self->writer->characters($content);
 338   }
 339   $self->writer->endTag;
 340   return $self;
 341 }
 342
 343 sub make_comment {
 344   my $gdpdu_version = API_VERSION();
 345   my $kivi_version  = $::form->read_version;
 346   my $person        = $::myconfig{name};
 347   my $contact       = join ', ',
 348     (t8("Email") . ": $::myconfig{email}" ) x!! $::myconfig{email},
 349     (t8("Tel")   . ": $::myconfig{tel}" )   x!! $::myconfig{tel},
 350     (t8("Fax")   . ": $::myconfig{fax}" )   x!! $::myconfig{fax};
 351
 352   t8('DataSet for GDPdU version #1. Created with kivitendo #2 by #3 (#4)',
 353     $gdpdu_version, $kivi_version, $person, $contact
 354   );
 355 }
 356
 357 sub client_name {
 358   $_[0]->company
 359 }
 360
 361 sub client_location {
 362   $_[0]->location
 363 }
 364
 365 sub sorted_tables {
 366   my ($self) = @_;
 367
 368   my %given = map { $_ => 1 } @{ $self->tables };
 369
 370   grep { $given{$_} } @export_table_order;
 371 }
 372
 373 sub all_tables {
 374   my ($self, $yesno) = @_;
 375
 376   $self->tables(\@export_table_order) if $yesno;
 377 }
 378
 379 sub init_files { +{} }
 380 sub init_export_ids { +{} }
 381 sub init_tempfiles { [] }
 382
 383 sub API_VERSION {
 384   DateTime->new(year => 2002, month => 8, day => 14)->to_kivitendo;
 385 }
 386
 387 sub DESTROY {
 388   unlink $_ for @{ $_[0]->tempfiles || [] };
 389 }
 390
 391 1;
 392
 393 __END__
 394
 395 =encoding utf-8
 396
 397 =head1 NAME
 398
 399 SL::GDPDU - IDEA export generator
 400
 401 =head1 FUNCTIONS
 402
 403 =over 4
 404
 405 =item C<new PARAMS>
 406
 407 Create new export object. C<PARAMS> may contain:
 408
 409 =over 4
 410
 411 =item company
 412
 413 The name of the company, needed for the supplier header
 414
 415 =item location
 416
 417 Location of the company, needed for the suupplier header
 418
 419 =item from
 420
 421 =item to
 422
 423 Will only include records in the specified date range. Data pulled from other
 424 tables will be culled to match what is needed for these records.
 425
 426 =item tables
 427
 428 A list of tables to be exported.
 429
 430 =item all_tables
 431
 432 Alternative to C<tables>, enables all known tables.
 433
 434 =back
 435
 436 =item C<generate_export>
 437
 438 Do the work. Will return an absolut path to a temp file where all export files
 439 are zipped together.
 440
 441 =back
 442
 443 =head1 CAVEATS
 444
 445 =over 4
 446
 447 =item *
 448
 449 Date format is shit. The official docs state that only C<YY>, C<YYYY>, C<MM>,
 450 and C<DD> are supported, timestamps do not exist.
 451
 452 =item *
 453
 454 Number parsing seems to be fragile. Official docs state that behaviour for too
 455 low C<Accuracy> settings is undefined. Accuracy of 0 is not taken to mean
 456 Integer but instead generates a warning for redudancy.
 457
 458 There is no dedicated integer type.
 459
 460 =item *
 461
 462 Currently C<ar> and C<ap> have a foreign key to themself with the name
 463 C<storno_id>. If this foreign key is present in the C<INDEX.XML> then the
 464 storno records have to be too. Since this is extremely awkward to code and
 465 confusing for the examiner as to why there are records outside of the time
 466 range, this export skips all self-referential foreign keys.
 467
 468 =item *
 469
 470 Documentation for foreign keys is extremely weird. Instead of giving column
 471 maps it assumes that foreign keys map to the primary keys given for the target
 472 table, and in that order. Foreign keys to keys that are not primary seems to be
 473 impossible. Changing type is also not allowed (which actually makes sense).
 474 Hopefully there are no bugs there.
 475
 476 =item *
 477
 478 It's currently disallowed to export the whole dataset. It's not clear if this
 479 is wanted.
 480
 481 =item *
 482
 483 It is not possible to set an empty C<DigiGroupingSymbol> since then the import
 484 will just work with the default. This was asked in their forum, and the
 485 response actually was:
 486
 487   Einfache Lösung: Definieren Sie das Tausendertrennzeichen als Komma, auch
 488   wenn es nicht verwendet wird. Sollten Sie das Komma bereits als Feldtrenner
 489   verwenden, so wählen Sie als Tausendertrennzeichen eine Alternative wie das
 490   Pipe-Symbol |.
 491
 492 L<http://www.gdpdu-portal.com/forum/index.php?mode=thread&id=1392>
 493
 494 =item *
 495
 496 It is not possible to define a C<RecordDelimiter> with XML entities. &#x0A;
 497 generates the error message:
 498
 499   C<RecordDelimiter>-Wert (&#x0A;) sollte immer aus ein oder zwei Zeichen
 500   bestehen.
 501
 502 Instead we just use the implicit default RecordDelimiter CRLF.
 503
 504 =item *
 505
 506 Not confirmed yet:
 507
 508 Foreign keys seem only to work with previously defined tables (which would be
 509 utterly insane).
 510
 511 =back
 512
 513 =head1 AUTHOR
 514
 515 Sven Schöling E<lt>s.schoeling@linet-services.deE<gt>
 516
 517 =cut