X-Git-Url: http://wagnertech.de/git?a=blobdiff_plain;f=SL%2FHelper%2FCsv.pm;h=2d454be37ecac7349251acc133c4ac9837c5dcc0;hb=beb61b2e270836ccd11e00ce68ff884cfb7a530d;hp=82e57011571e1a02fe0a7eec87f3dd98efdec14f;hpb=17d58914b5dbaf021dcea3375ee943bb6e798916;p=kivitendo-erp.git diff --git a/SL/Helper/Csv.pm b/SL/Helper/Csv.pm index 82e570115..2d454be37 100644 --- a/SL/Helper/Csv.pm +++ b/SL/Helper/Csv.pm @@ -3,17 +3,19 @@ package SL::Helper::Csv; use strict; use warnings; +use version 0.77; use Carp; use IO::File; use Params::Validate qw(:all); -use Text::CSV; +use Text::CSV_XS; use Rose::Object::MakeMethods::Generic scalar => [ qw( file encoding sep_char quote_char escape_char header profile class - numberformat dateformat ignore_unknown_columns _io _csv _objects _parsed - _data _errors + numberformat dateformat ignore_unknown_columns strict_profile _io _csv + _objects _parsed _data _errors all_cvar_configs case_insensitive_header ) ]; use SL::Helper::Csv::Dispatcher; +use SL::Helper::Csv::Error; # public interface @@ -31,13 +33,15 @@ sub new { numberformat => 0, dateformat => 0, ignore_unknown_columns => 0, + strict_profile => 0, + case_insensitive_header => 0, }); my $self = bless {}, $class; $self->$_($params{$_}) for keys %params; $self->_io(IO::File->new); - $self->_csv(Text::CSV->new({ + $self->_csv(Text::CSV_XS->new({ binary => 1, sep_char => $self->sep_char, quote_char => $self->quote_char, @@ -55,7 +59,6 @@ sub parse { $self->_open_file; return if ! $self->_check_header; return if ! $self->dispatcher->parse_profile; -# return if $self->class && ! $self->_check_header_for_class; return if ! $self->_parse_data; $self->_parsed(1); @@ -98,17 +101,46 @@ sub _open_file { sub _check_header { my ($self, %params) = @_; - return $self->header if $self->header; + my $header = $self->header; - my $header = $self->_csv->getline($self->_io); + if (! $header) { + $header = $self->_csv->getline($self->_io); - $self->_push_error([ - $self->_csv->error_input, - $self->_csv->error_diag, - 0, - ]) unless $header; + $self->_push_error([ + $self->_csv->error_input, + $self->_csv->error_diag, + 0, + ]) unless $header; + } + + # Special case: utf8 BOM. + # certain software (namely MS Office and notepad.exe insist on prefixing + # data with a discouraged but valid byte order mark + # if not removed, the first header field will not be recognized + if ($header && $header->[0] && $self->encoding =~ /utf-?8/i) { + $header->[0] =~ s/^\x{FEFF}//; + } - $self->header($header); + return unless $header; + + # Special case: human stupidity + # people insist that case sensitivity doesn't exist and try to enter all + # sorts of stuff. at this point we've got a profile (with keys that represent + # valid methods), and a header full of strings. if two of them match, the user + # mopst likely meant that field, so rewrite the header + if ($self->case_insensitive_header) { + die 'case_insensitive_header is only possible with profile' unless $self->profile; + my @names = ( + keys %{ $self->profile || {} }, + ); + for my $name (@names) { + for my $i (0..$#$header) { + $header->[$i] = $name if lc $header->[$i] eq lc $name; + } + } + } + + return $self->header($header); } sub _parse_data { @@ -119,18 +151,27 @@ sub _parse_data { while (1) { my $row = $self->_csv->getline($self->_io); - last if $self->_csv->eof; if ($row) { my %hr; @hr{@{ $self->header }} = @$row; push @data, \%hr; } else { - push @errors, [ - $self->_csv->error_input, - $self->_csv->error_diag, - $self->_io->input_line_number, - ]; + last if $self->_csv->eof; + # Text::CSV_XS 0.89 added record number to error_diag + if (qv(Text::CSV_XS->VERSION) >= qv('0.89')) { + push @errors, [ + $self->_csv->error_input, + $self->_csv->error_diag, + ]; + } else { + push @errors, [ + $self->_csv->error_input, + $self->_csv->error_diag, + $self->_io->input_line_number, + ]; + } } + last if $self->_csv->eof; } $self->_data(\@data); @@ -181,7 +222,7 @@ sub _guess_encoding { sub _push_error { my ($self, @errors) = @_; - my @new_errors = ($self->errors, @errors); + my @new_errors = ($self->errors, map { SL::Helper::Csv::Error->new(@$_) } @errors); $self->_errors(\@new_errors); } @@ -204,15 +245,18 @@ SL::Helper::Csv - take care of csv file uploads file => \$::form->{upload_file}, encoding => 'utf-8', # undef means utf8 sep_char => ',', # default ';' - quote_char => ''', # default '"' - header => [qw(id text sellprice word)] # see later - profile => { sellprice => 'sellprice_as_number' } + quote_char => '\'', # default '"' + escape_char => '"', # default '"' + header => [qw(id text sellprice word)], # see later + profile => { sellprice => 'sellprice_as_number' }, class => 'SL::DB::CsvLine', # if present, map lines to this - ) + ); my $status = $csv->parse; my $hrefs = $csv->get_data; - my @objects = $scv->get_objects; + my @objects = $csv->get_objects; + + my @errors = $csv->errors; =head1 DESCRIPTION @@ -222,8 +266,31 @@ Text::CSV offeres already good functions to get lines out of a csv file, but in most cases you will want those line to be parsed into hashes or even objects, so this model just skips ahead and gives you objects. -Encoding autodetection is not easy, and should not be trusted. Try to avoid it -if possible. +Its basic assumptions are: + +=over 4 + +=item You do know what you expect to be in that csv file. + +This means first and foremost you have knowledge about encoding, number and +date format, csv parameters such as quoting and separation characters. You also +know what content will be in that csv and what L is responsible for +it. You provide valid header columns and their mapping to the objects. + +=item You do NOT know if the csv provider yields to your expectations. + +Stuff that does not work with what you expect should not crash anything, but +give you a hint what went wrong. As a result, if you remeber to check for +errors after each step, you should be fine. + +=item Data does not make sense. It's just data. + +Almost all data imports have some type of constraints. Some data needs to be +unique, other data needs to be connected to existing data sets. This will not +happen here. You will receive a plain mapping of the data into the class tree, +nothing more. + +=back =head1 METHODS @@ -249,7 +316,7 @@ Returns an arrayref of the raw lines as hashrefs. =item C -Return all errors that came up druing parsing. See error handling for detailed +Return all errors that came up during parsing. See error handling for detailed information. =back @@ -266,7 +333,7 @@ scalar ref for memory data. =item C Encoding of the CSV file. Note that this module does not do any encoding -guessing. Know what your data ist. Defaults to utf-8. +guessing. Know what your data is. Defaults to utf-8. =item C @@ -290,6 +357,28 @@ May be used to map header fields to custom accessors. Example: In this case C will be used to read in values from the C column. +In case of a One-To-One relationsship these can also be set over +relationsships by sparating the steps with a dot (C<.>). This will work: + + { customer => 'customer.name' } + +And will result in something like this: + + $obj->customer($obj->meta->relationship('customer')->class->new); + $obj->customer->name($csv_line->{customer}) + +But beware, this will not try to look up anything in the database. You will +simply receive objects that represent what the profile defined. If some of +these information are unique, and should be connected to preexisting data, you +will have to do that for yourself. Since you provided the profile, it is +assumed you know what to do in this case. + +If no profile is given, any header field found will be taken as is. + +If the path in a profile entry is empty, the field will be subjected to +C and C checking, will be parsed into +C, but will not be attempted to be dispatched into objects. + =item C If present, the line will be handed to the new sub of this class, @@ -300,21 +389,36 @@ and the return value used instead of the line itself. If set, the import will ignore unkown header columns. Useful for lazy imports, but deactivated by default. +=item C + +If set, header columns will be matched against profile entries case +insensitive, and on match the profile name will be taken. + +Only works if a profile is given, will die otherwise. + +If both C and C is set, matched header +columns will be accepted. + +=item C + +If set, all columns to be parsed must be specified in C. Every header +field not listed there will be treated like an unknown column. + +If both C and C is set, matched header +columns will be accepted. + =back =head1 ERROR HANDLING After parsing a file all errors will be accumulated into C. +Each entry is an object with the following attributes: -Each entry is an arrayref with the following structure: - - [ - 0 offending raw input, - 1 Text::CSV error code if T:C error, 0 else, - 2 error diagnostics, - 3 position in line, - 4 estimated line in file, - ] + raw_input: offending raw input, + code: Text::CSV error code if Text:CSV signalled an error, 0 else, + diag: error diagnostics, + line: position in line, + col: estimated line in file, Note that the last entry can be off, but will give an estimate.