From ad7353df162d32e3b6f9348a5f8c1310079e6110 Mon Sep 17 00:00:00 2001 From: Moritz Bunkus Date: Wed, 10 Feb 2010 14:06:52 +0100 Subject: [PATCH] Erweiterung SL::Iconv um convert-Methode, die auch UTF8-Flag setzt & Objekt-Interface --- SL/Iconv.pm | 115 ++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 111 insertions(+), 4 deletions(-) diff --git a/SL/Iconv.pm b/SL/Iconv.pm index be0f7c2e4..9fb296e41 100644 --- a/SL/Iconv.pm +++ b/SL/Iconv.pm @@ -1,5 +1,7 @@ package SL::Iconv; +use Encode; +use English qw(-no_match_vars); use Text::Iconv; use SL::Common; @@ -8,18 +10,27 @@ use vars qw(%converters); use strict; +sub new { + my $class = shift; + my $self = bless { }, $class; + + $self->_init(@_); + + return $self; +} + sub get_converter { my ($from_charset, $to_charset) = @_; - my $index = "${from_charset}::${to_charset}"; - if (!$converters{$index}) { - $converters{$index} = Text::Iconv->new($from_charset, $to_charset) || die; - } + my $index = join $SUBSCRIPT_SEPARATOR, $from_charset, $to_charset; + $converters{$index} ||= SL::Iconv->new($from_charset, $to_charset); return $converters{$index}; } sub convert { + return _convert(@_) if ref $_[0]; + my ($from_charset, $to_charset, $text) = @_; $from_charset ||= Common::DEFAULT_CHARSET; @@ -29,5 +40,101 @@ sub convert { return $converter->convert($text); } +sub _convert { + my $self = shift; + my $text = shift; + + $text = $self->{handle}->convert($text) if !$self->{to_is_utf8} || !Encode::is_utf8($text); + $text = decode("utf-8-strict", $text) if $self->{to_is_utf8} && !Encode::is_utf8($text); + + return $text; +} + +sub _init { + my $self = shift; + $self->{from} = shift; + $self->{to} = shift; + $self->{to} = 'UTF-8' if lc $self->{to} eq 'unicode'; + $self->{to_is_utf8} = $self->{to} =~ m/^utf-?8$/i; + $self->{handle} = Text::Iconv->new($self->{from}, $self->{to}) || die; + + return $self; +} + +sub is_utf8 { + return shift->{to_is_utf8}; +} + 1; +__END__ + +=head1 NAME + +SL::Iconv -- Thin layer on top of Text::Iconv including decode_utf8 usage + +=head1 SYNOPSIS + +Usage: + + use SL::Iconv; + + # Conversion without creating objects: + my $text_utf8 = SL::Iconv::convert("ISO-8859-15", "UTF-8", $text_iso); + + # Conversion with an object: + my $converter = SL::Iconv->new("ISO-8859-15", "UTF-8"); + my $text_utf8 = $converter->convert($text_iso); + +=head1 DESCRIPTION + +A thin layer on top of L. Special handling is implemented +if the target charset is UTF-8: The resulting string has its UTF8 flag +set via a call to C. + +=head1 CLASS FUNCTIONS + +=over 4 + +=item C + +Create a new object for conversion from C<$from_charset> to +C<$to_charset>. + +=item C + +Converts the string C<$text> from charset C<$from_charset> to charset +C<$to_charset>. See the instance method C for further +discussion. + +The object used for this conversion is cached. Therefore multiple +calls to C do not result in multiple initializations of the +iconv library. + +=back + +=head1 INSTANCE FUNCTIONS + +=over 4 + +=item C + +Converts the string C<$text> from one charset to another (see C). + +Special handling is implemented if the target charset is UTF-8: The +resulting string has its UTF8 flag set via a call to +C. It is also safe to call +C multiple times for the same string in such cases as the +conversion is only done if the UTF8 flag hasn't been set yet. + +=item C + +Returns true if the handle converts into UTF8. + +=back + +=head1 MODULE AUTHORS + +Moritz Bunkus Em.bunkus@linet-services.deE + +L -- 2.20.1