Erweiterung SL::Iconv um convert-Methode, die auch UTF8-Flag setzt & Objekt-Interface

author Moritz Bunkus <m.bunkus@linet-services.de>

Wed, 10 Feb 2010 13:06:52 +0000 (14:06 +0100)

committer Sven Schöling <s.schoeling@linet-services.de>

Tue, 20 Jul 2010 11:16:05 +0000 (13:16 +0200)
author Moritz Bunkus <m.bunkus@linet-services.de>
Wed, 10 Feb 2010 13:06:52 +0000 (14:06 +0100)
committer Sven Schöling <s.schoeling@linet-services.de>
Tue, 20 Jul 2010 11:16:05 +0000 (13:16 +0200)
diff --git a/SL/Iconv.pm b/SL/Iconv.pm

index be0f7c2..9fb296e 100644 (file)
--- a/SL/Iconv.pm
+++ b/SL/Iconv.pm
@@ -1,5 +1,7 @@
  package SL::Iconv;
  
+use Encode;
+use English qw(-no_match_vars);
  use Text::Iconv;
  
  use SL::Common;
@@ -8,18 +10,27 @@ use vars qw(%converters);
  
  use strict;
  
+sub new {
+  my $class = shift;
+  my $self  = bless { }, $class;
+
+  $self->_init(@_);
+
+  return $self;
+}
+
  sub get_converter {
    my ($from_charset, $to_charset) = @_;
  
-  my $index = "${from_charset}::${to_charset}";
-  if (!$converters{$index}) {
-    $converters{$index} = Text::Iconv->new($from_charset, $to_charset) || die;
-  }
+  my $index             = join $SUBSCRIPT_SEPARATOR, $from_charset, $to_charset;
+  $converters{$index} ||= SL::Iconv->new($from_charset, $to_charset);
  
    return $converters{$index};
  }
  
  sub convert {
+  return _convert(@_) if ref $_[0];
+
    my ($from_charset, $to_charset, $text) = @_;
  
    $from_charset ||= Common::DEFAULT_CHARSET;
@@ -29,5 +40,101 @@ sub convert {
    return $converter->convert($text);
  }
  
+sub _convert {
+  my $self = shift;
+  my $text = shift;
+
+  $text    = $self->{handle}->convert($text) if !$self->{to_is_utf8} || !Encode::is_utf8($text);
+  $text    = decode("utf-8-strict", $text)   if  $self->{to_is_utf8} && !Encode::is_utf8($text);
+
+  return $text;
+}
+
+sub _init {
+  my $self = shift;
+  $self->{from}       = shift;
+  $self->{to}         = shift;
+  $self->{to}         = 'UTF-8' if lc $self->{to} eq 'unicode';
+  $self->{to_is_utf8} = $self->{to} =~ m/^utf-?8$/i;
+  $self->{handle}     = Text::Iconv->new($self->{from}, $self->{to}) || die;
+
+  return $self;
+}
+
+sub is_utf8 {
+  return shift->{to_is_utf8};
+}
+
  1;
  
+__END__
+
+=head1 NAME
+
+SL::Iconv -- Thin layer on top of Text::Iconv including decode_utf8 usage
+
+=head1 SYNOPSIS
+
+Usage:
+
+  use SL::Iconv;
+
+  # Conversion without creating objects:
+  my $text_utf8 = SL::Iconv::convert("ISO-8859-15", "UTF-8", $text_iso);
+
+  # Conversion with an object:
+  my $converter = SL::Iconv->new("ISO-8859-15", "UTF-8");
+  my $text_utf8 = $converter->convert($text_iso);
+
+=head1 DESCRIPTION
+
+A thin layer on top of L<Text::Iconv>. Special handling is implemented
+if the target charset is UTF-8: The resulting string has its UTF8 flag
+set via a call to C<Encode::decode("utf-8-strict", ...)>.
+
+=head1 CLASS FUNCTIONS
+
+=over 4
+
+=item C<new $from_charset, $to_charset>
+
+Create a new object for conversion from C<$from_charset> to
+C<$to_charset>.
+
+=item C<convert $from_charset, $to_charset, $text>
+
+Converts the string C<$text> from charset C<$from_charset> to charset
+C<$to_charset>. See the instance method C<convert> for further
+discussion.
+
+The object used for this conversion is cached. Therefore multiple
+calls to C<convert> do not result in multiple initializations of the
+iconv library.
+
+=back
+
+=head1 INSTANCE FUNCTIONS
+
+=over 4
+
+=item C<convert $text>
+
+Converts the string C<$text> from one charset to another (see C<new>).
+
+Special handling is implemented if the target charset is UTF-8: The
+resulting string has its UTF8 flag set via a call to
+C<Encode::decode("utf-8-strict", ...)>. It is also safe to call
+C<convert> multiple times for the same string in such cases as the
+conversion is only done if the UTF8 flag hasn't been set yet.
+
+=item C<is_utf8>
+
+Returns true if the handle converts into UTF8.
+
+=back
+
+=head1 MODULE AUTHORS
+
+Moritz Bunkus E<lt>m.bunkus@linet-services.deE<gt>
+
+L<http://linet-services.de>
author	Moritz Bunkus <m.bunkus@linet-services.de>
	Wed, 10 Feb 2010 13:06:52 +0000 (14:06 +0100)
committer	Sven Schöling <s.schoeling@linet-services.de>
	Tue, 20 Jul 2010 11:16:05 +0000 (13:16 +0200)