From af2053931fb484b246ba42f93df4889c4257ab42 Mon Sep 17 00:00:00 2001 From: Bernd Blessmann Date: Mon, 17 Sep 2012 15:38:24 +0200 Subject: [PATCH] SL::Helper::Csv kann mit Multiplex-Daten (mehreren Profilen) umgehen. --- SL/Controller/CsvImport/Base.pm | 8 +- SL/Helper/Csv.pm | 168 ++++++++++++++++++++++++++------ SL/Helper/Csv/Dispatcher.pm | 103 +++++++++++++++++--- t/helper/csv.t | 66 +++++++------ 4 files changed, 271 insertions(+), 74 deletions(-) diff --git a/SL/Controller/CsvImport/Base.pm b/SL/Controller/CsvImport/Base.pm index c072bb228..92ee5565c 100644 --- a/SL/Controller/CsvImport/Base.pm +++ b/SL/Controller/CsvImport/Base.pm @@ -31,7 +31,7 @@ sub run { my $profile = $self->profile; $self->csv(SL::Helper::Csv->new(file => $self->file->file_name, encoding => $self->controller->profile->get('charset'), - profile => { profile => $profile, class => $self->class }, + profile => [{ profile => $profile, class => $self->class }], ignore_unknown_columns => 1, strict_profile => 1, case_insensitive_header => 1, @@ -47,11 +47,15 @@ sub run { $self->controller->track_progress(progress => 50); + if ($self->csv->is_multiplexed) { + die "controller for multiplex data is not implemented yet"; + } + $self->controller->errors([ $self->csv->errors ]) if $self->csv->errors; return if ( !$self->csv->header || $self->csv->errors ); - my $headers = { headers => [ grep { $profile->{$_} } @{ $self->csv->header } ] }; + my $headers = { headers => [ grep { $profile->{$_} } @{ $self->csv->header->[0] } ] }; $headers->{methods} = [ map { $profile->{$_} } @{ $headers->{headers} } ]; $headers->{used} = { map { ($_ => 1) } @{ $headers->{headers} } }; $self->controller->headers($headers); diff --git a/SL/Helper/Csv.pm b/SL/Helper/Csv.pm index 01312fe8a..4a8bc752e 100644 --- a/SL/Helper/Csv.pm +++ b/SL/Helper/Csv.pm @@ -10,8 +10,8 @@ use Params::Validate qw(:all); use Text::CSV_XS; use Rose::Object::MakeMethods::Generic scalar => [ qw( file encoding sep_char quote_char escape_char header profile - numberformat dateformat ignore_unknown_columns strict_profile _io _csv - _objects _parsed _data _errors all_cvar_configs case_insensitive_header + numberformat dateformat ignore_unknown_columns strict_profile is_multiplexed + _io _csv _objects _parsed _data _errors all_cvar_configs case_insensitive_header ) ]; use SL::Helper::Csv::Dispatcher; @@ -26,7 +26,7 @@ sub new { quote_char => { default => '"' }, escape_char => { default => '"' }, header => { type => ARRAYREF, optional => 1 }, - profile => { type => HASHREF, optional => 1 }, + profile => { type => ARRAYREF, optional => 1 }, file => 1, encoding => 0, numberformat => 0, @@ -56,6 +56,7 @@ sub parse { my ($self, %params) = @_; $self->_open_file; + return if ! $self->_check_multiplexed; return if ! $self->_check_header; return if ! $self->dispatcher->parse_profile; return if ! $self->_parse_data; @@ -97,29 +98,86 @@ sub _open_file { return $self->_io; } +# check, if data is multiplexed and if all nessesary infos are given +sub _check_multiplexed { + my ($self, %params) = @_; + + $self->is_multiplexed(0); + + # If more than one profile is given, it is multiplexed. + if ($self->profile) { + my @profile = @{ $self->profile }; + if (scalar @profile > 1) { + my $info_ok = 1; + # Each profile needs a class and a row_ident + foreach my $p (@profile) { + if ( !defined $p->{class} || !defined $p->{row_ident} ) { + $info_ok = 0; + last; + } + } + + # If header is given, there need to be a header for each profile + if ($info_ok && $self->header) { + my @header = @{ $self->header }; + if (scalar @profile != scalar @header) { + $info_ok = 0; + } + } + $self->is_multiplexed($info_ok); + return $info_ok; + } + } + + # ok, if not multiplexed + return 1; +} + sub _check_header { my ($self, %params) = @_; - my $header = $self->header; + my $header; - if (! $header) { - $header = $self->_csv->getline($self->_io); + $header = $self->header; + if (!$header) { + my $n_header = ($self->is_multiplexed)? scalar @{ $self->profile } : 1; + foreach my $p_num (0..$n_header - 1) { + my $h = $self->_csv->getline($self->_io); - $self->_push_error([ - $self->_csv->error_input, - $self->_csv->error_diag, - 0, - ]) unless $header; + $self->_push_error([ + $self->_csv->error_input, + $self->_csv->error_diag, + 0, + ]) unless $h; + + push @{ $header }, $h; + } } # Special case: utf8 BOM. # certain software (namely MS Office and notepad.exe insist on prefixing # data with a discouraged but valid byte order mark # if not removed, the first header field will not be recognized - if ($header && $header->[0] && $self->encoding =~ /utf-?8/i) { - $header->[0] =~ s/^\x{FEFF}//; + if ($header) { + foreach my $h (@{ $header }) { + if ($h && $h->[0] && $self->encoding =~ /utf-?8/i) { + $h->[0] =~ s/^\x{FEFF}//; + } + } } - return unless $header; + # check, if all header fields are parsed well + my $all_ok = 1; + if ($header) { + foreach my $h (@{ $header }) { + if (!$h) { + $all_ok = 0; + last; + } + } + } else { + $all_ok = 0; + } + return unless $all_ok; # Special case: human stupidity # people insist that case sensitivity doesn't exist and try to enter all @@ -145,13 +203,13 @@ sub _parse_data { my ($self, %params) = @_; my (@data, @errors); - $self->_csv->column_names(@{ $self->header }); - while (1) { my $row = $self->_csv->getline($self->_io); if ($row) { + my $header = $self->_header_by_row($row); + $self->_csv->column_names(@{ $header }); my %hr; - @hr{@{ $self->header }} = @$row; + @hr{@{ $header }} = @$row; push @data, \%hr; } else { last if $self->_csv->eof; @@ -178,6 +236,23 @@ sub _parse_data { return ! @errors; } +sub _header_by_row { + my ($self, $row) = @_; + + my @header = @{ $self->header }; + if ($self->is_multiplexed) { + my $i = 0; + foreach my $profile (@{ $self->profile }) { + if (@{ $row }[0] eq $profile->{row_ident}) { + return $header[$i]; + } + $i++; + } + } else { + return $header[0]; + } +} + sub _encode_layer { ':encoding(' . $_[0]->encoding . ')'; } @@ -243,8 +318,9 @@ SL::Helper::Csv - take care of csv file uploads sep_char => ',', # default ';' quote_char => '\'', # default '"' escape_char => '"', # default '"' - header => [qw(id text sellprice word)], # see later - profile => { profile => { sellprice => 'sellprice_as_number'}, class => SL::DB::Part }, + header => [ [qw(id text sellprice word)] ], # see later + profile => [ { profile => { sellprice => 'sellprice_as_number'}, + class => 'SL::DB::Part' } ], ); my $status = $csv->parse; @@ -285,6 +361,13 @@ unique, other data needs to be connected to existing data sets. This will not happen here. You will receive a plain mapping of the data into the class tree, nothing more. +=item Multiplex data + +This module can handle multiplexed data of different class types. In that case +multiple profiles with classes and row identifiers must be given. Multiple +headers may also be given or read from csv data. Data must contain the row +identifier in the first column and it's field name must be 'datatype'. + =back =head1 METHODS @@ -338,19 +421,33 @@ guessing. Know what your data is. Defaults to utf-8. Same as in L -=item C
\@FIELDS +=item C
\@HEADERS + +If given, it contains an ARRAYREF for each different class type (i.e. one +ARRAYREF if the data is only of one class type). These ARRAYREFS are the header +fields which are an array of columns. In this case the first lines are not used +as a header. Empty header fields will be ignored in objects. + +If not given, headers are taken from the first n lines of data, where n is the +number of different class types. -Can be an array of columns, in this case the first line is not used as a -header. Empty header fields will be ignored in objects. +Examples: -=item C {profile => \%ACCESSORS, class => class} + classic data of one type: + [ [ 'name', 'street', 'zipcode', 'city' ] ] -This is a HASHREF to hash which may contain the keys C and C. + multiplexed data with two different types + [ [ 'ordernumber', 'customer', 'transdate' ], [ 'partnumber', 'qty', 'sellprice' ] ] + +=item C [{profile => \%ACCESSORS, class => class, row_ident => ri},] + +This is an ARRAYREF to HASHREFs which may contain the keys C, C +and C. The C is a HASHREF which may be used to map header fields to custom accessors. Example: - {profile => { listprice => listprice_as_number }} + [ {profile => { listprice => listprice_as_number }} ] In this case C will be used to read in values from the C column. @@ -358,7 +455,7 @@ C column. In case of a One-To-One relationsship these can also be set over relationsships by sparating the steps with a dot (C<.>). This will work: - {profile => { customer => 'customer.name' }} + [ {profile => { customer => 'customer.name' }} ] And will result in something like this: @@ -374,6 +471,21 @@ assumed you know what to do in this case. If C is present, the line will be handed to the new sub of this class, and the return value used instead of the line itself. +C is a string to recognize the right profile and class for each data +line in multiplexed data. + +In case of multiplexed data, C and C must be given. +Example: + [ { + class => 'SL::DB::Order', + row_ident => 'O' + }, + { + class => 'SL::DB::OrderItem', + row_ident => 'I', + profile => {sellprice => sellprice_as_number} + } ] + =item C If set, the import will ignore unkown header columns. Useful for lazy imports, @@ -433,7 +545,7 @@ Dispatch to child objects, like this: $csv = SL::Helper::Csv->new( file => ... - profile => { + profile => [ { profile => [ makemodel => { make_1 => make, @@ -445,7 +557,7 @@ Dispatch to child objects, like this: }, ], class => SL::DB::Part, - } + } ] ); =head1 AUTHOR diff --git a/SL/Helper/Csv/Dispatcher.pm b/SL/Helper/Csv/Dispatcher.pm index c24356d8c..17d52d751 100644 --- a/SL/Helper/Csv/Dispatcher.pm +++ b/SL/Helper/Csv/Dispatcher.pm @@ -24,16 +24,65 @@ sub new { sub dispatch { my ($self, $line) = @_; - eval "require " . $self->_csv->profile->{class}; - my $obj = $self->_csv->profile->{class}->new; + my $class = $self->_class_by_line($line); + croak 'no class given' unless $class; - for my $spec (@{ $self->_specs }) { + eval "require " . $class; + my $obj = $class->new; + + my $specs = $self->_specs_by_line($line); + for my $spec (@{ $specs }) { $self->apply($obj, $spec, $line->{$spec->{key}}); } return $obj; } +# return class for given line +# if only one profile is given, return this profiles class +# if more than one profile is given, identify class by first +# column??? +sub _class_by_line { + my ($self, $line) = @_; + + my $class; + if ($self->_csv->is_multiplexed) { + foreach my $p (@{ $self->_csv->profile }) { + my $row_ident = $p->{row_ident}; + if ($line->{datatype} eq $row_ident) { + $class = $p->{class}; + last; + } + } + } else { + $class = @{ $self->_csv->profile }[0]->{class}; + } + + return $class; +} + +sub _specs_by_line { + my ($self, $line) = @_; + + my $spec; + my $i = 0; + if ($self->_csv->is_multiplexed) { + foreach my $p (@{ $self->_csv->profile }) { + my $row_ident = $p->{row_ident}; + if ($line->{datatype} eq $row_ident) { + $spec = @{ $self->_specs }[$i]; + last; + } + $i++; + } + } else { + $spec = @{ $self->_specs }[0]; + } + + return $spec; +} + + sub apply { my ($self, $obj, $spec, $value) = @_; return unless $value; @@ -70,41 +119,69 @@ sub is_known { sub parse_profile { my ($self, %params) = @_; - my $header = $self->_csv->header; - my $profile = $self->_csv->profile->{profile}; + my $profile; + my $class; + my $header; + my @specs; + + my $i = 0; + foreach my $h (@{ $self->_csv->header }) { + $header = $h; + if ($self->_csv->profile) { + $profile = @{ $self->_csv->profile }[$i]->{profile}; + $class = @{ $self->_csv->profile }[$i]->{class}; + } + + my $spec = $self->_parse_profile(profile => $profile, + class => $class, + header => $header); + push @specs, $spec; + $i++; + } + + $self->_specs(\@specs); + + return ! $self->errors; +} + +sub _parse_profile { + my ($self, %params) = @_; + + my $profile = $params{profile}; + my $class = $params{class}; + my $header = $params{header}; + my @specs; for my $col (@$header) { next unless $col; if ($self->_csv->strict_profile) { if (exists $profile->{$col}) { - push @specs, $self->make_spec($col, $profile->{$col}); + push @specs, $self->make_spec($col, $profile->{$col}, $class); } else { $self->unknown_column($col, undef); } } else { if (exists $profile->{$col}) { - push @specs, $self->make_spec($col, $profile->{$col}); + push @specs, $self->make_spec($col, $profile->{$col}, $class); } else { - push @specs, $self->make_spec($col, $col); + push @specs, $self->make_spec($col, $col, $class); } } } - $self->_specs(\@specs); $self->_csv->_push_error($self->errors); - return ! $self->errors; + + return \@specs; } sub make_spec { - my ($self, $col, $path) = @_; + my ($self, $col, $path, $cur_class) = @_; my $spec = { key => $col, steps => [] }; return unless $path; - my $cur_class = $self->_csv->profile->{class}; - return unless $cur_class; for my $step_index ( split /\.(?!\d)/, $path ) { diff --git a/t/helper/csv.t b/t/helper/csv.t index 972b3adb3..cd486ade4 100644 --- a/t/helper/csv.t +++ b/t/helper/csv.t @@ -1,4 +1,4 @@ -use Test::More tests => 47; +use Test::More tests => 56; use lib 't'; use utf8; @@ -11,9 +11,9 @@ use_ok 'SL::Helper::Csv'; Support::TestSetup::login(); my $csv = SL::Helper::Csv->new( - file => \"Kaffee\n", - header => [ 'description' ], - profile => { class => 'SL::DB::Part', }, + file => \"Kaffee\n", # " # make emacs happy + header => [[ 'description' ]], + profile => [{ class => 'SL::DB::Part', }], ); isa_ok $csv->_csv, 'Text::CSV_XS'; @@ -29,9 +29,9 @@ $::myconfig{dateformat} = 'dd.mm.yyyy'; $csv = SL::Helper::Csv->new( file => \"Kaffee;0.12;12,2;1,5234\n", - header => [ 'description', 'sellprice', 'lastcost_as_number', 'listprice' ], - profile => {profile => { listprice => 'listprice_as_number' }, - class => 'SL::DB::Part',}, + header => [[ 'description', 'sellprice', 'lastcost_as_number', 'listprice' ]], + profile => [{profile => { listprice => 'listprice_as_number' }, + class => 'SL::DB::Part',}], ); $csv->parse; @@ -49,8 +49,8 @@ Kaffee,0.12,'12,2','1,5234' EOL sep_char => ',', quote_char => "'", - profile => {profile => { listprice => 'listprice_as_number' }, - class => 'SL::DB::Part',} + profile => [{profile => { listprice => 'listprice_as_number' }, + class => 'SL::DB::Part',}] ); $csv->parse; is scalar @{ $csv->get_objects }, 1, 'auto header works'; @@ -64,7 +64,7 @@ $csv = SL::Helper::Csv->new( ;;description;sellprice;lastcost_as_number; #####;Puppy;Kaffee;0.12;12,2;1,5234 EOL - profile => {class => 'SL::DB::Part'}, + profile => [{class => 'SL::DB::Part'}], ); $csv->parse; is scalar @{ $csv->get_objects }, 1, 'bozo header doesn\'t blow things up'; @@ -77,7 +77,7 @@ description;partnumber;sellprice;lastcost_as_number; Kaffee;;0.12;12,2;1,5234 Beer;1123245;0.12;12,2;1,5234 EOL - profile => {class => 'SL::DB::Part'}, + profile => [{class => 'SL::DB::Part'}], ); $csv->parse; is scalar @{ $csv->get_objects }, 2, 'multiple objects work'; @@ -93,7 +93,7 @@ Kaffee;;0.12;1,221.52 Beer;1123245;0.12;1.5234 EOL numberformat => '1,000.00', - profile => {class => 'SL::DB::Part'}, + profile => [{class => 'SL::DB::Part'}], ); $csv->parse; is $csv->get_objects->[0]->lastcost, '1221.52', 'formatnumber'; @@ -107,7 +107,7 @@ Kaffee;;0.12;1,221.52 Beer;1123245;0.12;1.5234 EOL numberformat => '1,000.00', - profile => {class => 'SL::DB::Part'}, + profile => [{class => 'SL::DB::Part'}], ); is $csv->parse, undef, 'broken csv header won\'t get parsed'; @@ -120,7 +120,7 @@ description;partnumber;sellprice;lastcost_as_number; Beer;1123245;0.12;1.5234 EOL numberformat => '1,000.00', - profile => {class => 'SL::DB::Part'}, + profile => [{class => 'SL::DB::Part'}], ); is $csv->parse, undef, 'broken csv content won\'t get parsed'; is_deeply $csv->errors, [ '"Kaf"fee";;0.12;1,221.52'."\n", 2023, 'EIQ - QUO character not allowed', 5, 2 ], 'error'; @@ -136,7 +136,7 @@ Beer;1123245;0.12;1.5234;nein kein wieder EOL numberformat => '1,000.00', ignore_unknown_columns => 1, - profile => {class => 'SL::DB::Part'}, + profile => [{class => 'SL::DB::Part'}], ); $csv->parse; is $csv->get_objects->[0]->lastcost, '1221.52', 'ignore_unkown_columns works'; @@ -150,10 +150,10 @@ Kaffee;;0.12;1,221.52;Standard 7% Beer;1123245;0.12;1.5234;16 % EOL numberformat => '1,000.00', - profile => { + profile => [{ profile => {buchungsgruppe => "buchungsgruppen.description"}, class => 'SL::DB::Part', - } + }] ); $csv->parse; isa_ok $csv->get_objects->[0]->buchungsgruppe, 'SL::DB::Buchungsgruppe', 'deep dispatch auto vivify works'; @@ -169,13 +169,13 @@ description;partnumber;sellprice;lastcost_as_number;make_1;model_1; Beer;1123245;0.12;1.5234; EOL numberformat => '1,000.00', - profile => { + profile => [{ profile => { make_1 => "makemodels.0.make", model_1 => "makemodels.0.model", }, class => 'SL::DB::Part', - }, + }], ); $csv->parse; my @mm = $csv->get_objects->[0]->makemodel; @@ -191,7 +191,7 @@ description;partnumber;sellprice;lastcost_as_number;make_1;model_1;make_2;model_ Kaffee;;0.12;1,221.52;213;Chair 0815;523;Table 15 EOL numberformat => '1,000.00', - profile => { + profile => [{ profile => { make_1 => "makemodels.0.make", model_1 => "makemodels.0.model", @@ -199,7 +199,7 @@ EOL model_2 => "makemodels.1.model", }, class => 'SL::DB::Part', - } + }] ); $csv->parse; @@ -219,10 +219,10 @@ $csv = SL::Helper::Csv->new( description;partnumber;sellprice;lastcost_as_number;buchungsgruppe; EOL numberformat => '1,000.00', - profile => { + profile => [{ profile => {buchungsgruppe => "buchungsgruppen.1.description"}, class => 'SL::DB::Part', - } + }] ); is $csv->parse, undef, 'wrong profile gets rejected'; is_deeply $csv->errors, [ 'buchungsgruppen.1.description', undef, "Profile path error. Indexed relationship is not OneToMany around here: 'buchungsgruppen.1'", undef ,0 ], 'error indicates wrong header'; @@ -239,10 +239,10 @@ EOL numberformat => '1,000.00', ignore_unknown_columns => 1, strict_profile => 1, - profile => { + profile => [{ profile => {lastcost => 'lastcost_as_number'}, class => 'SL::DB::Part', - } + }] ); $csv->parse; is $csv->get_objects->[0]->lastcost, '1221.52', 'strict_profile with ignore'; @@ -258,10 +258,10 @@ Beer;1123245;0.12;1.5234;nein kein wieder EOL numberformat => '1,000.00', strict_profile => 1, - profile => { + profile => [{ profile => {lastcost => 'lastcost_as_number'}, class => 'SL::DB::Part', - } + }] ); $csv->parse; @@ -271,8 +271,8 @@ is_deeply( ($csv->errors)[0], [ 'description', undef, 'header field \'descriptio $csv = SL::Helper::Csv->new( file => \"Kaffee", - header => [ 'description' ], - profile => {class => 'SL::DB::Part'}, + header => [[ 'description' ]], + profile => [{class => 'SL::DB::Part'}], ); $csv->parse; is_deeply $csv->get_data, [ { description => 'Kaffee' } ], 'eol bug at the end of files'; @@ -302,7 +302,7 @@ is_deeply $csv->get_data, [ { description => 'Kaffee' } ], 'case insensitive hea $csv = SL::Helper::Csv->new( file => \"\x{EF}\x{BB}\x{BF}description\nKaffee", - profile => {class => 'SL::DB::Part'}, + profile => [{class => 'SL::DB::Part'}], encoding => 'utf8', ); $csv->parse; @@ -313,7 +313,7 @@ is_deeply $csv->get_data, [ { description => 'Kaffee' } ], 'utf8 BOM works (bug $csv = SL::Helper::Csv->new( file => \"Kaffee", header => [ 'Description' ], - class => 'SL::DB::Part', + profile => [{class => 'SL::DB::Part'}], ); $csv->parse; is_deeply $csv->get_data, undef, 'case insensitive header without flag ignores'; @@ -365,3 +365,7 @@ $csv->parse; is_deeply $csv->get_data, [ { description => 'Kaffee' } ], 'without profile and class works'; # vim: ft=perl +# set emacs to perl mode +# Local Variables: +# mode: perl +# End: -- 2.20.1