use Carp;
use IO::File;
use Params::Validate qw(:all);
-use Text::CSV;
+use Text::CSV_XS;
use Rose::Object::MakeMethods::Generic scalar => [ qw(
file encoding sep_char quote_char escape_char header profile class
- numberformat dateformat ignore_unknown_columns _io _csv _objects _parsed
- _data _errors
+ numberformat dateformat ignore_unknown_columns strict_profile _io _csv
+ _objects _parsed _data _errors
) ];
use SL::Helper::Csv::Dispatcher;
+use SL::Helper::Csv::Error;
# public interface
numberformat => 0,
dateformat => 0,
ignore_unknown_columns => 0,
+ strict_profile => 0,
});
my $self = bless {}, $class;
$self->$_($params{$_}) for keys %params;
$self->_io(IO::File->new);
- $self->_csv(Text::CSV->new({
+ $self->_csv(Text::CSV_XS->new({
binary => 1,
sep_char => $self->sep_char,
quote_char => $self->quote_char,
$self->_open_file;
return if ! $self->_check_header;
return if ! $self->dispatcher->parse_profile;
-# return if $self->class && ! $self->_check_header_for_class;
return if ! $self->_parse_data;
$self->_parsed(1);
sub _check_header {
my ($self, %params) = @_;
- return $self->header if $self->header;
+ my $header = $self->header;
- my $header = $self->_csv->getline($self->_io);
+ if (! $header) {
+ $header = $self->_csv->getline($self->_io);
- $self->_push_error([
- $self->_csv->error_input,
- $self->_csv->error_diag,
- 0,
- ]) unless $header;
+ $self->_push_error([
+ $self->_csv->error_input,
+ $self->_csv->error_diag,
+ 0,
+ ]) unless $header;
+ }
- $self->header($header);
+ return unless $header;
+ return $self->header([ map { lc } @$header ]);
}
sub _parse_data {
while (1) {
my $row = $self->_csv->getline($self->_io);
- last if $self->_csv->eof;
if ($row) {
my %hr;
@hr{@{ $self->header }} = @$row;
push @data, \%hr;
} else {
+ last if $self->_csv->eof;
push @errors, [
$self->_csv->error_input,
$self->_csv->error_diag,
$self->_io->input_line_number,
];
}
+ last if $self->_csv->eof;
}
$self->_data(\@data);
sub _push_error {
my ($self, @errors) = @_;
- my @new_errors = ($self->errors, @errors);
+ my @new_errors = ($self->errors, map { SL::Helper::Csv::Error->new(@$_) } @errors);
$self->_errors(\@new_errors);
}
file => \$::form->{upload_file},
encoding => 'utf-8', # undef means utf8
sep_char => ',', # default ';'
- quote_char => ''', # default '"'
- header => [qw(id text sellprice word)] # see later
- profile => { sellprice => 'sellprice_as_number' }
+ quote_char => '\'', # default '"'
+ escape_char => '"', # default '"'
+ header => [qw(id text sellprice word)], # see later
+ profile => { sellprice => 'sellprice_as_number' },
class => 'SL::DB::CsvLine', # if present, map lines to this
- )
+ );
my $status = $csv->parse;
my $hrefs = $csv->get_data;
- my @objects = $scv->get_objects;
+ my @objects = $csv->get_objects;
+
+ my @errors = $csv->errors;
=head1 DESCRIPTION
most cases you will want those line to be parsed into hashes or even objects,
so this model just skips ahead and gives you objects.
-Encoding autodetection is not easy, and should not be trusted. Try to avoid it
-if possible.
+Its basic assumptions are:
+
+=over 4
+
+=item You do know what you expect to be in that csv file.
+
+This means first and foremost you have knowledge about encoding, number and
+date format, csv parameters such as quoting and separation characters. You also
+know what content will be in that csv and what L<Rose::DB> is responsible for
+it. You provide valid header columns and their mapping to the objects.
+
+=item You do NOT know if the csv provider yields to your expectations.
+
+Stuff that does not work with what you expect should not crash anything, but
+give you a hint what went wrong. As a result, if you remeber to check for
+errors after each step, you should be fine.
+
+=item Data does not make sense. It's just data.
+
+Almost all data imports have some type of constraints. Some data needs to be
+unique, other data needs to be connected to existing data sets. This will not
+happen here. You will receive a plain mapping of the data into the class tree,
+nothing more.
+
+=back
=head1 METHODS
=item C<errors>
-Return all errors that came up druing parsing. See error handling for detailed
+Return all errors that came up during parsing. See error handling for detailed
information.
=back
=item C<encoding>
Encoding of the CSV file. Note that this module does not do any encoding
-guessing. Know what your data ist. Defaults to utf-8.
+guessing. Know what your data is. Defaults to utf-8.
=item C<sep_char>
In this case C<listprice_as_number> will be used to read in values from the
C<listprice> column.
+In case of a One-To-One relationsship these can also be set over
+relationsships by sparating the steps with a dot (C<.>). This will work:
+
+ { customer => 'customer.name' }
+
+And will result in something like this:
+
+ $obj->customer($obj->meta->relationship('customer')->class->new);
+ $obj->customer->name($csv_line->{customer})
+
+But beware, this will not try to look up anything in the database. You will
+simply receive objects that represent what the profile defined. If some of
+these information are unique, and should be connected to preexisting data, you
+will have to do that for yourself. Since you provided the profile, it is
+assumed you know what to do in this case.
+
=item C<class>
If present, the line will be handed to the new sub of this class,
If set, the import will ignore unkown header columns. Useful for lazy imports,
but deactivated by default.
+=item C<strict_profile>
+
+If set, all columns to be parsed must be specified in C<profile>. Every header
+field not listed there will be treated like an unknown column.
+
=back
=head1 ERROR HANDLING
After parsing a file all errors will be accumulated into C<errors>.
+Each entry is an object with the following attributes:
-Each entry is an arrayref with the following structure:
-
- [
- 0 offending raw input,
- 1 Text::CSV error code if T:C error, 0 else,
- 2 error diagnostics,
- 3 position in line,
- 4 estimated line in file,
- ]
+ raw_input: offending raw input,
+ code: Text::CSV error code if Text:CSV signalled an error, 0 else,
+ diag: error diagnostics,
+ line: position in line,
+ col: estimated line in file,
Note that the last entry can be off, but will give an estimate.