use Carp;
use IO::File;
use Params::Validate qw(:all);
-use List::MoreUtils qw(all pairwise);
+use List::MoreUtils qw(all pairwise firstidx);
use Text::CSV_XS;
use Rose::Object::MakeMethods::Generic scalar => [ qw(
file encoding sep_char quote_char escape_char header profile
numberformat dateformat ignore_unknown_columns strict_profile is_multiplexed
_row_header _io _csv _objects _parsed _data _errors all_cvar_configs case_insensitive_header
+ _multiplex_datatype_position
) ];
use SL::Helper::Csv::Dispatcher;
$self->_open_file;
return if ! $self->_check_multiplexed;
return if ! $self->_check_header;
+ return if ! $self->_check_multiplex_datatype_position;
return if ! $self->dispatcher->parse_profile;
return if ! $self->_parse_data;
croak 'must parse first' unless $self->_parsed;
$self->_make_objects unless $self->_objects;
- return wantarray ? @{ $self->_objects } : $self->_objects;
+ return $self->_objects;
}
sub errors {
0,
0]) unless $info_ok;
- # If header is given, there need to be a header for each profile
+ # If header is given, there needs to be a header for each profile
# and no empty headers.
if ($info_ok && $self->header) {
my @header = @{ $self->header };
# people insist that case sensitivity doesn't exist and try to enter all
# sorts of stuff. at this point we've got a profile (with keys that represent
# valid methods), and a header full of strings. if two of them match, the user
- # mopst likely meant that field, so rewrite the header
+ # most likely meant that field, so rewrite the header
if ($self->case_insensitive_header) {
die 'case_insensitive_header is only possible with profile' unless $self->profile;
if ($header) {
return $self->header($header);
}
+sub _check_multiplex_datatype_position {
+ my ($self) = @_;
+
+ return 1 if !$self->is_multiplexed; # ok if not multiplexed
+
+ my @positions = map { firstidx { 'datatype' eq lc($_) } @{ $_ } } @{ $self->header };
+ my $first_pos = $positions[0];
+ if (all { $first_pos == $_ } @positions) {
+ $self->_multiplex_datatype_position($first_pos);
+ return 1;
+ } else {
+ $self->_push_error([0,
+ "datatype field must be at the same position for all datatypes for multiplexed data",
+ 0,
+ 0]);
+ return 0;
+ }
+}
+
sub _parse_data {
my ($self, %params) = @_;
my (@data, @errors);
my $row = $self->_csv->getline($self->_io);
if ($row) {
my $header = $self->_header_by_row($row);
+ if (!$header) {
+ push @errors, [
+ 0,
+ "Cannot get header for row. Maybe row name and datatype field not matching.",
+ 0,
+ 0];
+ last;
+ }
my %hr;
@hr{@{ $header }} = @$row;
push @data, \%hr;
}
if ($self->is_multiplexed) {
- return $self->_row_header->{$row->[0]}
+ return $self->_row_header->{$row->[$self->_multiplex_datatype_position]}
} else {
return $self->header;
}
my $status = $csv->parse;
my $hrefs = $csv->get_data;
- my @objects = $csv->get_objects;
+ my $objects = $csv->get_objects;
my @errors = $csv->errors;
See Synopsis.
-Text::CSV offeres already good functions to get lines out of a csv file, but in
-most cases you will want those line to be parsed into hashes or even objects,
+Text::CSV already offers good functions to get lines out of a csv file, but in
+most cases you will want those lines to be parsed into hashes or even objects,
so this model just skips ahead and gives you objects.
Its basic assumptions are:
=item You do know what you expect to be in that csv file.
-This means first and foremost you have knowledge about encoding, number and
+This means first and foremost that you have knowledge about encoding, number and
date format, csv parameters such as quoting and separation characters. You also
know what content will be in that csv and what L<Rose::DB> is responsible for
it. You provide valid header columns and their mapping to the objects.
This module can handle multiplexed data of different class types. In that case
multiple profiles with classes and row identifiers must be given. Multiple
headers may also be given or read from csv data. Data must contain the row
-identifier in the first column and it's field name must be 'datatype'.
+identifier in the column named 'datatype'.
=back
Parse the data into objects and return those.
-This method will return list or arrayref depending on context.
+This method will return an arrayref of all objects.
=item C<get_data>
If not given, headers are taken from the first n lines of data, where n is the
number of different class types.
-In case of multiplexed data the first column must be named 'datatype'. This
-name must be given in the header.
+In case of multiplexed data there must be a column named 'datatype'. This
+column must be given in each header and must be at the same position in each
+header.
Examples:
classic data of one type:
[ 'name', 'street', 'zipcode', 'city' ]
- multiplexed data with two different types
+ multiplexed data with two different types:
[ [ 'datatype', 'ordernumber', 'customer', 'transdate' ],
[ 'datatype', 'partnumber', 'qty', 'sellprice' ] ]
In this case C<listprice_as_number> will be used to read in values from the
C<listprice> column.
-In case of a One-To-One relationsship these can also be set over
-relationsships by sparating the steps with a dot (C<.>). This will work:
+In case of a One-To-One relationship these can also be set over
+relationships by separating the steps with a dot (C<.>). This will work:
[ {profile => { customer => 'customer.name' }} ]