use Carp;
use IO::File;
use Params::Validate qw(:all);
+use List::MoreUtils qw(all pairwise);
use Text::CSV_XS;
use Rose::Object::MakeMethods::Generic scalar => [ qw(
file encoding sep_char quote_char escape_char header profile
numberformat dateformat ignore_unknown_columns strict_profile is_multiplexed
- _io _csv _objects _parsed _data _errors all_cvar_configs case_insensitive_header
+ _row_header _io _csv _objects _parsed _data _errors all_cvar_configs case_insensitive_header
) ];
use SL::Helper::Csv::Dispatcher;
if ($self->profile) {
my @profile = @{ $self->profile };
if (scalar @profile > 1) {
- my $info_ok = 1;
# Each profile needs a class and a row_ident
- foreach my $p (@profile) {
- if ( !defined $p->{class} || !defined $p->{row_ident} ) {
- $info_ok = 0;
- last;
- }
- }
+ my $info_ok = all { defined $_->{class} && defined $_->{row_ident} } @profile;
+ $self->_push_error([
+ 0,
+ "missing class or row_ident in one of the profiles for multiplexed data",
+ 0,
+ 0]) unless $info_ok;
# If header is given, there need to be a header for each profile
+ # and no empty headers.
if ($info_ok && $self->header) {
my @header = @{ $self->header };
- if (scalar @profile != scalar @header) {
- $info_ok = 0;
- }
+ my $t_ok = scalar @profile == scalar @header;
+ $self->_push_error([
+ 0,
+ "number of headers and number of profiles must be the same for multiplexed data",
+ 0,
+ 0]) unless $t_ok;
+ $info_ok = $info_ok && $t_ok;
+
+ $t_ok = all { scalar @$_ > 0} @header;
+ $self->_push_error([
+ 0,
+ "no empty headers are allowed for multiplexed data",
+ 0,
+ 0]) unless $t_ok;
+ $info_ok = $info_ok && $t_ok;
}
$self->is_multiplexed($info_ok);
return $info_ok;
0,
]) unless $h;
- push @{ $header }, $h;
+ if ($self->is_multiplexed) {
+ push @{ $header }, $h;
+ } else {
+ $header = $h;
+ }
}
}
# data with a discouraged but valid byte order mark
# if not removed, the first header field will not be recognized
if ($header) {
- foreach my $h (@{ $header }) {
- if ($h && $h->[0] && $self->encoding =~ /utf-?8/i) {
- $h->[0] =~ s/^\x{FEFF}//;
- }
+ my $h = ($self->is_multiplexed)? $header->[0] : $header;
+
+ if ($h && $h->[0] && $self->encoding =~ /utf-?8/i) {
+ $h->[0] =~ s/^\x{FEFF}//;
}
}
# check, if all header fields are parsed well
- my $all_ok = 1;
- if ($header) {
- foreach my $h (@{ $header }) {
- if (!$h) {
- $all_ok = 0;
- last;
- }
- }
+ if ($self->is_multiplexed) {
+ return unless $header && all { $_ } @$header;
} else {
- $all_ok = 0;
+ return unless $header;
}
- return unless $all_ok;
# Special case: human stupidity
# people insist that case sensitivity doesn't exist and try to enter all
# mopst likely meant that field, so rewrite the header
if ($self->case_insensitive_header) {
die 'case_insensitive_header is only possible with profile' unless $self->profile;
- my @names = (
- keys %{ $self->profile || {} },
- );
- for my $name (@names) {
- for my $i (0..$#$header) {
- $header->[$i] = $name if lc $header->[$i] eq lc $name;
+ if ($header) {
+ my $h_aref = ($self->is_multiplexed)? $header : [ $header ];
+ my $p_num = 0;
+ foreach my $h (@{ $h_aref }) {
+ my @names = (
+ keys %{ $self->profile->[$p_num]->{profile} || {} },
+ );
+ for my $name (@names) {
+ for my $i (0..$#$h) {
+ $h->[$i] = $name if lc $h->[$i] eq lc $name;
+ }
+ }
+ $p_num++;
}
}
}
my $row = $self->_csv->getline($self->_io);
if ($row) {
my $header = $self->_header_by_row($row);
- $self->_csv->column_names(@{ $header });
my %hr;
@hr{@{ $header }} = @$row;
push @data, \%hr;
sub _header_by_row {
my ($self, $row) = @_;
- my @header = @{ $self->header };
+ # initialize lookup hash if not already done
+ if ($self->is_multiplexed && ! defined $self->_row_header ) {
+ $self->_row_header({ pairwise { no warnings 'once'; $a->{row_ident} => $b } @{ $self->profile }, @{ $self->header } });
+ }
+
if ($self->is_multiplexed) {
- my $i = 0;
- foreach my $profile (@{ $self->profile }) {
- if (@{ $row }[0] eq $profile->{row_ident}) {
- return $header[$i];
- }
- $i++;
- }
+ return $self->_row_header->{$row->[0]}
} else {
- return $header[0];
+ return $self->header;
}
}
sep_char => ',', # default ';'
quote_char => '\'', # default '"'
escape_char => '"', # default '"'
- header => [ [qw(id text sellprice word)] ], # see later
+ header => [ qw(id text sellprice word) ], # see later
profile => [ { profile => { sellprice => 'sellprice_as_number'},
class => 'SL::DB::Part' } ],
);
=item C<header> \@HEADERS
-If given, it contains an ARRAYREF for each different class type (i.e. one
-ARRAYREF if the data is only of one class type). These ARRAYREFS are the header
-fields which are an array of columns. In this case the first lines are not used
-as a header. Empty header fields will be ignored in objects.
+If given, it contains an ARRAY of the header fields for not multiplexed data.
+Or an ARRAYREF for each different class type for multiplexed data. These
+ARRAYREFS are the header fields which are an array of columns. In this case
+the first lines are not used as a header. Empty header fields will be ignored
+in objects.
If not given, headers are taken from the first n lines of data, where n is the
number of different class types.
+In case of multiplexed data the first column must be named 'datatype'. This
+name must be given in the header.
+
Examples:
classic data of one type:
- [ [ 'name', 'street', 'zipcode', 'city' ] ]
+ [ 'name', 'street', 'zipcode', 'city' ]
multiplexed data with two different types
- [ [ 'ordernumber', 'customer', 'transdate' ], [ 'partnumber', 'qty', 'sellprice' ] ]
+ [ [ 'datatype', 'ordernumber', 'customer', 'transdate' ],
+ [ 'datatype', 'partnumber', 'qty', 'sellprice' ] ]
=item C<profile> [{profile => \%ACCESSORS, class => class, row_ident => ri},]
will have to do that for yourself. Since you provided the profile, it is
assumed you know what to do in this case.
+If no profile is given, any header field found will be taken as is.
+
+If the path in a profile entry is empty, the field will be subjected to
+C<strict_profile> and C<case_insensitive_header> checking, will be parsed into
+C<get_data>, but will not be attempted to be dispatched into objects.
+
If C<class> is present, the line will be handed to the new sub of this class,
and the return value used instead of the line itself.
C<row_ident> is a string to recognize the right profile and class for each data
-line in multiplexed data.
+line in multiplexed data. It must match the value in the column 'dataype' for
+each class.
In case of multiplexed data, C<class> and C<row_ident> must be given.
Example: