use Carp;
use IO::File;
use Params::Validate qw(:all);
-use Text::CSV;
+use Text::CSV_XS;
use Rose::Object::MakeMethods::Generic scalar => [ qw(
file encoding sep_char quote_char escape_char header profile class
- numberformat dateformat ignore_unknown_columns _io _csv _objects _parsed
- _data _errors
+ numberformat dateformat ignore_unknown_columns strict_profile _io _csv
+ _objects _parsed _data _errors all_cvar_configs case_insensitive_header
) ];
use SL::Helper::Csv::Dispatcher;
numberformat => 0,
dateformat => 0,
ignore_unknown_columns => 0,
+ strict_profile => 0,
+ case_insensitive_header => 0,
});
my $self = bless {}, $class;
$self->$_($params{$_}) for keys %params;
$self->_io(IO::File->new);
- $self->_csv(Text::CSV->new({
+ $self->_csv(Text::CSV_XS->new({
binary => 1,
sep_char => $self->sep_char,
quote_char => $self->quote_char,
sub _check_header {
my ($self, %params) = @_;
- return $self->header if $self->header;
+ my $header = $self->header;
- my $header = $self->_csv->getline($self->_io);
+ if (! $header) {
+ $header = $self->_csv->getline($self->_io);
- $self->_push_error([
- $self->_csv->error_input,
- $self->_csv->error_diag,
- 0,
- ]) unless $header;
+ $self->_push_error([
+ $self->_csv->error_input,
+ $self->_csv->error_diag,
+ 0,
+ ]) unless $header;
+ }
+
+ # Special case: utf8 BOM.
+ # certain software (namely MS Office and notepad.exe insist on prefixing
+ # data with a discouraged but valid byte order mark
+ # if not removed, the first header field will not be recognized
+ if ($header && $header->[0] && $self->encoding =~ /utf-?8/i) {
+ $header->[0] =~ s/^\x{FEFF}//;
+ }
- $self->header($header);
+ return unless $header;
+
+ # Special case: human stupidity
+ # people insist that case sensitivity doesn't exist and try to enter all
+ # sorts of stuff. at this point we've got a profile (with keys that represent
+ # valid methods), and a header full of strings. if two of them match, the user
+ # mopst likely meant that field, so rewrite the header
+ if ($self->case_insensitive_header) {
+ die 'case_insensitive_header is only possible with profile' unless $self->profile;
+ my @names = (
+ keys %{ $self->profile || {} },
+ );
+ for my $name (@names) {
+ for my $i (0..$#$header) {
+ $header->[$i] = $name if lc $header->[$i] eq lc $name;
+ }
+ }
+ }
+
+ return $self->header($header);
}
sub _parse_data {
while (1) {
my $row = $self->_csv->getline($self->_io);
- last if $self->_csv->eof;
if ($row) {
my %hr;
@hr{@{ $self->header }} = @$row;
push @data, \%hr;
} else {
+ last if $self->_csv->eof;
push @errors, [
$self->_csv->error_input,
$self->_csv->error_diag,
$self->_io->input_line_number,
];
}
+ last if $self->_csv->eof;
}
$self->_data(\@data);
will have to do that for yourself. Since you provided the profile, it is
assumed you know what to do in this case.
+If no profile is given, any header field found will be taken as is.
+
+If the path in a profile entry is empty, the field will be subjected to
+C<strict_profile> and C<case_insensitive_header> checking, will be parsed into
+C<get_data>, but will not be attempted to be dispatched into objects.
+
=item C<class>
If present, the line will be handed to the new sub of this class,
If set, the import will ignore unkown header columns. Useful for lazy imports,
but deactivated by default.
+=item C<case_insensitive_header>
+
+If set, header columns will be matched against profile entries case
+insensitive, and on match the profile name will be taken.
+
+Only works if a profile is given, will die otherwise.
+
+If both C<case_insensitive_header> and C<strict_profile> is set, matched header
+columns will be accepted.
+
+=item C<strict_profile>
+
+If set, all columns to be parsed must be specified in C<profile>. Every header
+field not listed there will be treated like an unknown column.
+
+If both C<case_insensitive_header> and C<strict_profile> is set, matched header
+columns will be accepted.
+
=back
=head1 ERROR HANDLING