1 package Email::Address;
3 ## no critic RequireUseWarnings
6 use vars qw[$VERSION $COMMENT_NEST_LEVEL $STRINGIFY
8 %PARSE_CACHE %FORMAT_CACHE %NAME_CACHE
9 $addr_spec $angle_addr $name_addr $mailbox];
14 $COMMENT_NEST_LEVEL ||= 2;
15 $STRINGIFY ||= 'format';
16 $COLLAPSE_SPACES = 1 unless defined $COLLAPSE_SPACES; # who wants //=? me!
20 Email::Address - RFC 2822 Address Parsing and Creation
26 my @addresses = Email::Address->parse($line);
27 my $address = Email::Address->new(Casey => 'casey@localhost');
29 print $address->format;
35 $Id: /my/pep/Email-Address/trunk/lib/Email/Address.pm 31900 2007-06-23T01:25:34.344997Z rjbs $
39 This class implements a regex-based RFC 2822 parser that locates email
40 addresses in strings and returns a list of C<Email::Address> objects found.
41 Alternatley you may construct objects manually. The goal of this software is to
42 be correct, and very very fast.
46 my $CTL = q{\x00-\x1F\x7F};
47 my $special = q{()<>\\[\\]:;@\\\\,."};
49 my $text = qr/[^\x0A\x0D]/;
51 my $quoted_pair = qr/\\$text/;
53 my $ctext = qr/(?>[^()\\]+)/;
54 my ($ccontent, $comment) = (q{})x2;
55 for (1 .. $COMMENT_NEST_LEVEL) {
56 $ccontent = qr/$ctext|$quoted_pair|$comment/;
57 $comment = qr/\s*\((?:\s*$ccontent)*\s*\)\s*/;
59 my $cfws = qr/$comment|\s+/;
61 my $atext = qq/[^$CTL$special\\s]/;
62 my $atom = qr/$cfws*$atext+$cfws*/;
63 my $dot_atom_text = qr/$atext+(?:\.$atext+)*/;
64 my $dot_atom = qr/$cfws*$dot_atom_text$cfws*/;
66 my $qtext = qr/[^\\"]/;
67 my $qcontent = qr/$qtext|$quoted_pair/;
68 my $quoted_string = qr/$cfws*"$qcontent+"$cfws*/;
70 my $word = qr/$atom|$quoted_string/;
72 # XXX: This ($phrase) used to just be: my $phrase = qr/$word+/; It was changed
73 # to resolve bug 22991, creating a significant slowdown. Given current speed
74 # problems. Once 16320 is resolved, this section should be dealt with.
76 #my $obs_phrase = qr/$word(?:$word|\.|$cfws)*/;
78 # XXX: ...and the above solution caused endless problems (never returned) when
79 # examining this address, now in a test:
80 # admin+=E6=96=B0=E5=8A=A0=E5=9D=A1_Weblog-- ATAT --test.socialtext.com
81 # So we disallow the hateful CFWS in this context for now. Of modern mail
82 # agents, only Apple Web Mail 2.0 is known to produce obs-phrase.
84 my $simple_word = qr/$atom|\.|\s*"$qcontent+"\s*/;
85 my $obs_phrase = qr/$simple_word+/;
87 my $phrase = qr/$obs_phrase|(?:$word+)/;
89 my $local_part = qr/$dot_atom|$quoted_string/;
90 my $dtext = qr/[^\[\]\\]/;
91 my $dcontent = qr/$dtext|$quoted_pair/;
92 my $domain_literal = qr/$cfws*\[(?:\s*$dcontent)*\s*\]$cfws*/;
93 my $domain = qr/$dot_atom|$domain_literal/;
95 my $display_name = $phrase;
97 =head2 Package Variables
99 Several regular expressions used in this package are useful to others.
100 For convenience, these variables are declared as package variables that
101 you may access from your program.
103 These regular expressions conform to the rules specified in RFC 2822.
105 You can access these variables using the full namespace. If you want
106 short names, define them yourself.
108 my $addr_spec = $Email::Address::addr_spec;
112 =item $Email::Address::addr_spec
114 This regular expression defined what an email address is allowed to
117 =item $Email::Address::angle_addr
119 This regular expression defines an C<$addr_spec> wrapped in angle
122 =item $Email::Address::name_addr
124 This regular expression defines what an email address can look like
125 with an optional preceeding display name, also known as the C<phrase>.
127 =item $Email::Address::mailbox
129 This is the complete regular expression defining an RFC 2822 emial
130 address with an optional preceeding display name and optional
137 $addr_spec = qr/$local_part\@$domain/;
138 $angle_addr = qr/$cfws*<$addr_spec>$cfws*/;
139 $name_addr = qr/$display_name?$angle_addr/;
140 $mailbox = qr/(?:$name_addr|$addr_spec)$comment*/;
143 sub _ADDRESS () { 1 }
144 sub _COMMENT () { 2 }
145 sub _ORIGINAL () { 3 }
146 sub _IN_CACHE () { 4 }
154 my @addrs = Email::Address->parse(
155 q[me@local, Casey <me@local>, "Casey" <me@local> (West)]
158 This method returns a list of C<Email::Address> objects it finds
161 The specification for an email address allows for infinitley
162 nestable comments. That's nice in theory, but a little over done.
163 By default this module allows for two (C<2>) levels of nested
164 comments. If you think you need more, modify the
165 C<$Email::Address::COMMENT_NEST_LEVEL> package variable to allow
168 $Email::Address::COMMENT_NEST_LEVEL = 10; # I'm deep
170 The reason for this hardly limiting limitation is simple: efficiency.
172 Long strings of whitespace can be problematic for this module to parse, a bug
173 which has not yet been adequately addressed. The default behavior is now to
174 collapse multiple spaces into a single space, which avoids this problem. To
175 prevent this behavior, set C<$Email::Address::COLLAPSE_SPACES> to zero. This
176 variable will go away when the bug is resolved properly.
180 sub __get_cached_parse {
183 my ($class, $line) = @_;
185 return @{$PARSE_CACHE{$line}} if exists $PARSE_CACHE{$line};
192 my ($class, $line, $addrs) = @_;
194 $PARSE_CACHE{$line} = $addrs;
198 my ($class, $line) = @_;
201 $line =~ s/[ \t]+/ /g if $COLLAPSE_SPACES;
203 if (my @cached = $class->__get_cached_parse($line)) {
207 my (@mailboxes) = ($line =~ /$mailbox/go);
209 foreach (@mailboxes) {
212 my @comments = /($comment)/go;
213 s/$comment//go if @comments;
215 my ($user, $host, $com);
216 ($user, $host) = ($1, $2) if s/<($local_part)\@($domain)>//o;
217 if (! defined($user) || ! defined($host)) {
218 s/($local_part)\@($domain)//o;
219 ($user, $host) = ($1, $2);
222 my ($phrase) = /($display_name)/o;
224 for ( $phrase, $host, $user, @comments ) {
225 next unless defined $_;
228 $_ = undef unless length $_;
231 my $new_comment = join q{ }, @comments;
233 $class->new($phrase, "$user\@$host", $new_comment, $original);
234 $addrs[-1]->[_IN_CACHE] = [ \$line, $#addrs ]
237 $class->__cache_parse($line, \@addrs);
245 my $address = Email::Address->new(undef, 'casey@local');
246 my $address = Email::Address->new('Casey West', 'casey@local');
247 my $address = Email::Address->new(undef, 'casey@local', '(Casey)');
249 Constructs and returns a new C<Email::Address> object. Takes four
250 positional arguments: phrase, email, and comment, and original string.
252 The original string should only really be set using C<parse>.
256 sub new { bless [@_[1..4]], $_[0] }
262 Email::Address->purge_cache;
264 One way this module stays fast is with internal caches. Caches live
265 in memory and there is the remote possibility that you will have a
266 memory problem. In the off chance that you think you're one of those
267 people, this class method will empty those caches.
269 I've loaded over 12000 objects and not encountered a memory problem.
283 Email::Address->disable_cache if memory_low();
285 If you'd rather not cache address parses at all, you can disable (and reenable) the Email::Address cache with these methods. The cache is enabled by default.
303 =head2 Instance Methods
309 my $phrase = $address->phrase;
310 $address->phrase( "Me oh my" );
312 Accessor and mutator for the phrase portion of an address.
316 my $addr = $address->address;
317 $addr->address( "me@PROTECTED.com" );
319 Accessor and mutator for the address portion of an address.
323 my $comment = $address->comment;
324 $address->comment( "(Work address)" );
326 Accessor and mutator for the comment portion of an address.
330 my $orig = $address->original;
332 Accessor for the original address found when parsing, or passed
337 my $host = $address->host;
339 Accessor for the host portion of an address's address.
343 my $user = $address->user;
345 Accessor for the user portion of an address's address.
354 original => _ORIGINAL,
357 for my $method (keys %_INDEX) {
359 my $index = $_INDEX{ $method };
362 if ($_[0][_IN_CACHE]) {
363 my $replicant = bless [ @{$_[0]} ] => ref $_[0];
364 $PARSE_CACHE{ ${ $_[0][_IN_CACHE][0] } }[ $_[0][_IN_CACHE][1] ]
366 $_[0][_IN_CACHE] = undef;
368 $_[0]->[ $index ] = $_[1];
376 sub host { ($_[0]->[_ADDRESS] =~ /\@($domain)/o)[0] }
377 sub user { ($_[0]->[_ADDRESS] =~ /($local_part)\@/o)[0] }
383 my $printable = $address->format;
385 Returns a properly formatted RFC 2822 address representing the
391 local $^W = 0; ## no critic
392 return $FORMAT_CACHE{"@{$_[0]}"} if exists $FORMAT_CACHE{"@{$_[0]}"};
393 $FORMAT_CACHE{"@{$_[0]}"} = $_[0]->_format;
400 defined $self->[_PHRASE] && length $self->[_PHRASE]
402 defined $self->[_COMMENT] && length $self->[_COMMENT]
404 return $self->[_ADDRESS];
407 my $format = sprintf q{%s <%s> %s},
408 $self->_enquoted_phrase, $self->[_ADDRESS], $self->[_COMMENT];
416 sub _enquoted_phrase {
419 my $phrase = $self->[_PHRASE];
421 # if it's encoded -- rjbs, 2007-02-28
422 return $phrase if $phrase =~ /\A=\?.+\?=\z/;
424 $phrase =~ s/\A"(.+)"\z/$1/;
425 $phrase =~ s/\"/\\"/g;
427 return qq{"$phrase"};
434 my $name = $address->name;
436 This method tries very hard to determine the name belonging to the address.
437 First the C<phrase> is checked. If that doesn't work out the C<comment>
438 is looked into. If that still doesn't work out, the C<user> portion of
439 the C<address> is returned.
441 This method does B<not> try to massage any name it identifies and instead
442 leaves that up to someone else. Who is it to decide if someone wants their
443 name capitalized, or if they're Irish?
449 return $NAME_CACHE{"@{$_[0]}"} if exists $NAME_CACHE{"@{$_[0]}"};
452 if ( $name = $self->[_PHRASE] ) {
455 $name =~ s/($quoted_pair)/substr $1, -1/goe;
456 } elsif ( $name = $self->[_COMMENT] ) {
459 $name =~ s/($quoted_pair)/substr $1, -1/goe;
460 $name =~ s/$comment/ /go;
462 ($name) = $self->[_ADDRESS] =~ /($local_part)\@/o;
464 $NAME_CACHE{"@{$_[0]}"} = $name;
471 =head2 Overloaded Operators
477 print "I have your email address, $address.";
479 Objects stringify to C<format> by default. It's possible that you don't
480 like that idea. Okay, then, you can change it by modifying
481 C<$Email:Address::STRINGIFY>. Please consider modifying this package
482 variable using C<local>. You might step on someone else's toes if you
486 local $Email::Address::STRINGIFY = 'address';
487 print "I have your address, $address.";
490 print "I have your address, $address.";
491 # "Casey West" <casey@geeknest.com>
496 warn 'altering $Email::Address::STRINGIFY is deprecated; subclass instead'
497 if $STRINGIFY ne 'format';
499 $_[0]->can($STRINGIFY)->($_[0]);
502 use overload '""' => 'as_string';
514 =head2 Did I Mention Fast?
516 On his 1.8GHz Apple MacBook, rjbs gets these results:
518 $ perl -Ilib bench/ea-vs-ma.pl bench/corpus.txt 5
519 Rate Mail::Address Email::Address
520 Mail::Address 2.59/s -- -44%
521 Email::Address 4.59/s 77% --
523 $ perl -Ilib bench/ea-vs-ma.pl bench/corpus.txt 25
524 Rate Mail::Address Email::Address
525 Mail::Address 2.58/s -- -67%
526 Email::Address 7.84/s 204% --
528 $ perl -Ilib bench/ea-vs-ma.pl bench/corpus.txt 50
529 Rate Mail::Address Email::Address
530 Mail::Address 2.57/s -- -70%
531 Email::Address 8.53/s 232% --
533 ...unfortunately, a known bug causes a loss of speed the string to parse has
534 certain known characteristics, and disabling cache will also degrade
537 =head1 PERL EMAIL PROJECT
539 This module is maintained by the Perl Email Project
541 L<http://emailproject.perl.org/wiki/Email::Address>
545 L<Email::Simple>, L<perl>.
549 Originally by Casey West, <F<casey@geeknest.com>>.
551 Maintained, 2006-2007, Ricardo SIGNES <F<rjbs@cpan.org>>.
553 =head1 ACKNOWLEDGEMENTS
555 Thanks to Kevin Riggle and Tatsuhiko Miyagawa for tests for annoying phrase-quoting bugs!
559 Copyright (c) 2004 Casey West. All rights reserved. This module is free
560 software; you can redistribute it and/or modify it under the same terms as Perl