2 use strict; use warnings;
4 use base 'YAML::Loader::Base';
8 use constant LEAF => 1;
9 use constant COLLECTION => 2;
10 use constant VALUE => "\x07YAML\x07VALUE\x07";
11 use constant COMMENT => "\x07YAML\x07COMMENT\x07";
13 # Common YAML character sets
14 my $ESCAPE_CHAR = '[\\x00-\\x08\\x0b-\\x0d\\x0e-\\x1f]';
17 my $LIT_CHAR_RX = "\\$LIT_CHAR";
21 $self->stream($_[0] || '');
22 return $self->_parse();
25 # Top level function for parsing. Parse each document in order and
26 # handle processing for YAML headers.
29 my (%directives, $preface);
30 $self->{stream} =~ s|\015\012|\012|g;
31 $self->{stream} =~ s|\015|\012|g;
33 $self->die('YAML_PARSE_ERR_BAD_CHARS')
34 if $self->stream =~ /$ESCAPE_CHAR/;
35 $self->die('YAML_PARSE_ERR_NO_FINAL_NEWLINE')
36 if length($self->stream) and
37 $self->{stream} !~ s/(.)\n\Z/$1/s;
38 $self->lines([split /\x0a/, $self->stream, -1]);
40 # Throw away any comments or blanks before the header (or start of
41 # content for headerless streams)
42 $self->_parse_throwaway_comments();
45 # Add an "assumed" header if there is no header and the stream is
46 # not empty (after initial throwaways).
48 if ($self->lines->[0] !~ /^---(\s|$)/) {
49 unshift @{$self->lines}, '---';
54 # Main Loop. Parse out all the top level nodes and return them.
55 while (not $self->eos) {
56 $self->anchor2node({});
60 $self->offset->[0] = -1;
62 if ($self->lines->[0] =~ /^---\s*(.*)$/) {
63 my @words = split /\s+/, $1;
65 while (@words && $words[0] =~ /^#(\w+):(\S.*)$/) {
66 my ($key, $value) = ($1, $2);
68 if (defined $directives{$key}) {
69 $self->warn('YAML_PARSE_WARN_MULTIPLE_DIRECTIVES',
70 $key, $self->document);
73 $directives{$key} = $value;
75 $self->preface(join ' ', @words);
78 $self->die('YAML_PARSE_ERR_NO_SEPARATOR');
81 if (not $self->done) {
82 $self->_parse_next_line(COLLECTION);
89 $directives{YAML} ||= '1.0';
90 $directives{TAB} ||= 'NONE';
91 ($self->{major_version}, $self->{minor_version}) =
92 split /\./, $directives{YAML}, 2;
93 $self->die('YAML_PARSE_ERR_BAD_MAJOR_VERSION', $directives{YAML})
94 if $self->major_version ne '1';
95 $self->warn('YAML_PARSE_WARN_BAD_MINOR_VERSION', $directives{YAML})
96 if $self->minor_version ne '0';
97 $self->die('Unrecognized TAB policy')
98 unless $directives{TAB} =~ /^(NONE|\d+)(:HARD)?$/;
100 push @{$self->documents}, $self->_parse_node();
102 return wantarray ? @{$self->documents} : $self->documents->[-1];
105 # This function is the dispatcher for parsing each node. Every node
106 # recurses back through here. (Inlines are an exception as they have
107 # their own sub-parser.)
110 my $preface = $self->preface;
112 my ($node, $type, $indicator, $escape, $chomp) = ('') x 5;
113 my ($anchor, $alias, $explicit, $implicit, $class) = ('') x 5;
114 ($anchor, $alias, $explicit, $implicit, $preface) =
115 $self->_parse_qualifiers($preface);
117 $self->anchor2node->{$anchor} = CORE::bless [], 'YAML-anchor2node';
120 while (length $preface) {
121 my $line = $self->line - 1;
122 if ($preface =~ s/^($FOLD_CHAR|$LIT_CHAR_RX)(-|\+)?\d*\s*//) {
124 $chomp = $2 if defined($2);
127 $self->die('YAML_PARSE_ERR_TEXT_AFTER_INDICATOR') if $indicator;
128 $self->inline($preface);
133 $self->die('YAML_PARSE_ERR_NO_ANCHOR', $alias)
134 unless defined $self->anchor2node->{$alias};
135 if (ref($self->anchor2node->{$alias}) ne 'YAML-anchor2node') {
136 $node = $self->anchor2node->{$alias};
139 $node = do {my $sv = "*$alias"};
140 push @{$self->anchor2node->{$alias}}, [\$node, $self->line];
143 elsif (length $self->inline) {
144 $node = $self->_parse_inline(1, $implicit, $explicit);
145 if (length $self->inline) {
146 $self->die('YAML_PARSE_ERR_SINGLE_LINE');
149 elsif ($indicator eq $LIT_CHAR) {
151 $node = $self->_parse_block($chomp);
152 $node = $self->_parse_implicit($node) if $implicit;
155 elsif ($indicator eq $FOLD_CHAR) {
157 $node = $self->_parse_unfold($chomp);
158 $node = $self->_parse_implicit($node) if $implicit;
163 $self->offset->[$self->level] ||= 0;
164 if ($self->indent == $self->offset->[$self->level]) {
165 if ($self->content =~ /^-( |$)/) {
166 $node = $self->_parse_seq($anchor);
168 elsif ($self->content =~ /(^\?|\:( |$))/) {
169 $node = $self->_parse_mapping($anchor);
171 elsif ($preface =~ /^\s*$/) {
172 $node = $self->_parse_implicit('');
175 $self->die('YAML_PARSE_ERR_BAD_NODE');
183 $#{$self->offset} = $self->level;
192 CORE::bless $node, $class;
195 $node = $self->_parse_explicit($node, $explicit);
199 if (ref($self->anchor2node->{$anchor}) eq 'YAML-anchor2node') {
200 # XXX Can't remember what this code actually does
201 for my $ref (@{$self->anchor2node->{$anchor}}) {
202 ${$ref->[0]} = $node;
203 $self->warn('YAML_LOAD_WARN_UNRESOLVED_ALIAS',
207 $self->anchor2node->{$anchor} = $node;
212 # Preprocess the qualifiers that may be attached to any node.
213 sub _parse_qualifiers {
216 my ($anchor, $alias, $explicit, $implicit, $token) = ('') x 5;
218 while ($preface =~ /^[&*!]/) {
219 my $line = $self->line - 1;
220 if ($preface =~ s/^\!(\S+)\s*//) {
221 $self->die('YAML_PARSE_ERR_MANY_EXPLICIT') if $explicit;
224 elsif ($preface =~ s/^\!\s*//) {
225 $self->die('YAML_PARSE_ERR_MANY_IMPLICIT') if $implicit;
228 elsif ($preface =~ s/^\&([^ ,:]+)\s*//) {
230 $self->die('YAML_PARSE_ERR_BAD_ANCHOR')
231 unless $token =~ /^[a-zA-Z0-9]+$/;
232 $self->die('YAML_PARSE_ERR_MANY_ANCHOR') if $anchor;
233 $self->die('YAML_PARSE_ERR_ANCHOR_ALIAS') if $alias;
236 elsif ($preface =~ s/^\*([^ ,:]+)\s*//) {
238 $self->die('YAML_PARSE_ERR_BAD_ALIAS')
239 unless $token =~ /^[a-zA-Z0-9]+$/;
240 $self->die('YAML_PARSE_ERR_MANY_ALIAS') if $alias;
241 $self->die('YAML_PARSE_ERR_ANCHOR_ALIAS') if $anchor;
245 return ($anchor, $alias, $explicit, $implicit, $preface);
248 # Morph a node to it's explicit type
249 sub _parse_explicit {
251 my ($node, $explicit) = @_;
253 if ($explicit =~ /^\!perl\/(hash|array|scalar)\:(\w(\w|\:\:)*)?$/) {
254 ($type, $class) = (($1 || ''), ($2 || ''));
256 return CORE::bless $node, $class;
259 return CORE::bless \$node, $class;
263 /^\!?perl\/(undef|glob|regexp|code|ref)\:(\w(\w|\:\:)*)?$/) {
264 ($type, $class) = (($1 || ''), ($2 || ''));
265 my $type_class = "YAML::Type::$type";
267 if ($type_class->can('yaml_load')) {
268 return $type_class->yaml_load($node, $class, $self);
271 $self->die('YAML_LOAD_ERR_NO_CONVERT', 'XXX', $explicit);
274 # This !perl/@Foo and !perl/$Foo are deprecated but still parsed
275 elsif ($YAML::TagClass->{$explicit} ||
276 $explicit =~ m{^perl/(\@|\$)?([a-zA-Z](\w|::)+)$}
278 $class = $YAML::TagClass->{$explicit} || $2;
279 if ($class->can('yaml_load')) {
281 return $class->yaml_load(YAML::Node->new($node, $explicit));
285 return CORE::bless $node, $class;
288 return CORE::bless \$node, $class;
294 return YAML::Node->new($node, $explicit);
297 # XXX This is likely wrong. Failing test:
298 # --- !unknown 'scalar value'
303 # Parse a YAML mapping into a Perl hash
308 $self->anchor2node->{$anchor} = $mapping;
310 while (not $self->done and $self->indent == $self->offset->[$self->level]) {
312 if ($self->{content} =~ s/^\?\s*//) {
313 $self->preface($self->content);
314 $self->_parse_next_line(COLLECTION);
315 $key = $self->_parse_node();
318 # If "default" key (equals sign)
319 elsif ($self->{content} =~ s/^\=\s*//) {
322 # If "comment" key (slash slash)
323 elsif ($self->{content} =~ s/^\=\s*//) {
326 # Regular scalar key:
328 $self->inline($self->content);
329 $key = $self->_parse_inline();
331 $self->content($self->inline);
335 unless ($self->{content} =~ s/^:\s*//) {
336 $self->die('YAML_LOAD_ERR_BAD_MAP_ELEMENT');
338 $self->preface($self->content);
339 my $line = $self->line;
340 $self->_parse_next_line(COLLECTION);
341 my $value = $self->_parse_node();
342 if (exists $mapping->{$key}) {
343 $self->warn('YAML_LOAD_WARN_DUPLICATE_KEY');
346 $mapping->{$key} = $value;
352 # Parse a YAML sequence into a Perl array
357 $self->anchor2node->{$anchor} = $seq;
358 while (not $self->done and $self->indent == $self->offset->[$self->level]) {
359 if ($self->content =~ /^-(?: (.*))?$/) {
360 $self->preface(defined($1) ? $1 : '');
363 $self->die('YAML_LOAD_ERR_BAD_SEQ_ELEMENT');
365 if ($self->preface =~ /^(\s*)(\w.*\:(?: |$).*)$/) {
366 $self->indent($self->offset->[$self->level] + 2 + length($1));
368 $self->level($self->level + 1);
369 $self->offset->[$self->level] = $self->indent;
371 push @$seq, $self->_parse_mapping('');
373 $#{$self->offset} = $self->level;
376 $self->_parse_next_line(COLLECTION);
377 push @$seq, $self->_parse_node();
383 # Parse an inline value. Since YAML supports inline collections, this is
384 # the top level of a sub parsing.
387 my ($top, $top_implicit, $top_explicit) = (@_, '', '', '');
388 $self->{inline} =~ s/^\s*(.*)\s*$/$1/; # OUCH - mugwump
389 my ($node, $anchor, $alias, $explicit, $implicit) = ('') x 5;
390 ($anchor, $alias, $explicit, $implicit, $self->{inline}) =
391 $self->_parse_qualifiers($self->inline);
393 $self->anchor2node->{$anchor} = CORE::bless [], 'YAML-anchor2node';
395 $implicit ||= $top_implicit;
396 $explicit ||= $top_explicit;
397 ($top_implicit, $top_explicit) = ('', '');
399 $self->die('YAML_PARSE_ERR_NO_ANCHOR', $alias)
400 unless defined $self->anchor2node->{$alias};
401 if (ref($self->anchor2node->{$alias}) ne 'YAML-anchor2node') {
402 $node = $self->anchor2node->{$alias};
405 $node = do {my $sv = "*$alias"};
406 push @{$self->anchor2node->{$alias}}, [\$node, $self->line];
409 elsif ($self->inline =~ /^\{/) {
410 $node = $self->_parse_inline_mapping($anchor);
412 elsif ($self->inline =~ /^\[/) {
413 $node = $self->_parse_inline_seq($anchor);
415 elsif ($self->inline =~ /^"/) {
416 $node = $self->_parse_inline_double_quoted();
417 $node = $self->_unescape($node);
418 $node = $self->_parse_implicit($node) if $implicit;
420 elsif ($self->inline =~ /^'/) {
421 $node = $self->_parse_inline_single_quoted();
422 $node = $self->_parse_implicit($node) if $implicit;
426 $node = $self->inline;
430 $node = $self->_parse_inline_simple();
432 $node = $self->_parse_implicit($node) unless $explicit;
435 $node = $self->_parse_explicit($node, $explicit);
438 if (ref($self->anchor2node->{$anchor}) eq 'YAML-anchor2node') {
439 for my $ref (@{$self->anchor2node->{$anchor}}) {
440 ${$ref->[0]} = $node;
441 $self->warn('YAML_LOAD_WARN_UNRESOLVED_ALIAS',
445 $self->anchor2node->{$anchor} = $node;
450 # Parse the inline YAML mapping into a Perl hash
451 sub _parse_inline_mapping {
455 $self->anchor2node->{$anchor} = $node;
457 $self->die('YAML_PARSE_ERR_INLINE_MAP')
458 unless $self->{inline} =~ s/^\{\s*//;
459 while (not $self->{inline} =~ s/^\s*\}//) {
460 my $key = $self->_parse_inline();
461 $self->die('YAML_PARSE_ERR_INLINE_MAP')
462 unless $self->{inline} =~ s/^\: \s*//;
463 my $value = $self->_parse_inline();
464 if (exists $node->{$key}) {
465 $self->warn('YAML_LOAD_WARN_DUPLICATE_KEY');
468 $node->{$key} = $value;
470 next if $self->inline =~ /^\s*\}/;
471 $self->die('YAML_PARSE_ERR_INLINE_MAP')
472 unless $self->{inline} =~ s/^\,\s*//;
477 # Parse the inline YAML sequence into a Perl array
478 sub _parse_inline_seq {
482 $self->anchor2node->{$anchor} = $node;
484 $self->die('YAML_PARSE_ERR_INLINE_SEQUENCE')
485 unless $self->{inline} =~ s/^\[\s*//;
486 while (not $self->{inline} =~ s/^\s*\]//) {
487 my $value = $self->_parse_inline();
489 next if $self->inline =~ /^\s*\]/;
490 $self->die('YAML_PARSE_ERR_INLINE_SEQUENCE')
491 unless $self->{inline} =~ s/^\,\s*//;
496 # Parse the inline double quoted string.
497 sub _parse_inline_double_quoted {
500 if ($self->inline =~ /^"((?:\\"|[^"])*)"\s*(.*)$/) {
506 $self->die('YAML_PARSE_ERR_BAD_DOUBLE');
512 # Parse the inline single quoted string.
513 sub _parse_inline_single_quoted {
516 if ($self->inline =~ /^'((?:''|[^'])*)'\s*(.*)$/) {
522 $self->die('YAML_PARSE_ERR_BAD_SINGLE');
527 # Parse the inline unquoted string and do implicit typing.
528 sub _parse_inline_simple {
531 if ($self->inline =~ /^(|[^!@#%^&*].*?)(?=[\[\]\{\},]|, |: |- |:\s*$|$)/) {
533 substr($self->{inline}, 0, length($1)) = '';
536 $self->die('YAML_PARSE_ERR_BAD_INLINE_IMPLICIT', $value);
541 sub _parse_implicit {
545 return $value if $value eq '';
546 return undef if $value =~ /^~$/;
548 unless $value =~ /^[\@\`\^]/ or
549 $value =~ /^[\-\?]\s/;
550 $self->die('YAML_PARSE_ERR_BAD_IMPLICIT', $value);
553 # Unfold a YAML multiline scalar into a single string.
559 while (not $self->done and $self->indent == $self->offset->[$self->level]) {
560 $node .= $self->content. "\n";
561 $self->_parse_next_line(LEAF);
563 $node =~ s/^(\S.*)\n(?=\S)/$1 /gm;
564 $node =~ s/^(\S.*)\n(\n+\S)/$1$2/gm;
565 $node =~ s/\n*\Z// unless $chomp eq '+';
566 $node .= "\n" unless $chomp;
570 # Parse a YAML block style scalar. This is like a Perl here-document.
575 while (not $self->done and $self->indent == $self->offset->[$self->level]) {
576 $node .= $self->content . "\n";
577 $self->_parse_next_line(LEAF);
579 return $node if '+' eq $chomp;
580 $node =~ s/\n*\Z/\n/;
581 $node =~ s/\n\Z// if $chomp eq '-';
585 # Handle Perl style '#' comments. Comments must be at the same indentation
586 # level as the collection line following them.
587 sub _parse_throwaway_comments {
589 while (@{$self->lines} and
590 $self->lines->[0] =~ m{^\s*(\#|$)}
592 shift @{$self->lines};
595 $self->eos($self->{done} = not @{$self->lines});
598 # This is the routine that controls what line is being parsed. It gets called
599 # once for each line in the YAML stream.
602 # 1) Skip past the current line
603 # 2) Determine the indentation offset for a new level
604 # 3) Find the next _content_ line
605 # A) Skip over any throwaways (Comments/blanks)
606 # B) Set $self->indent, $self->content, $self->line
607 # 4) Expand tabs appropriately
608 sub _parse_next_line {
611 my $level = $self->level;
612 my $offset = $self->offset->[$level];
613 $self->die('YAML_EMIT_ERR_BAD_LEVEL') unless defined $offset;
614 shift @{$self->lines};
615 $self->eos($self->{done} = not @{$self->lines});
616 return if $self->eos;
619 # Determine the offset for a new leaf node
620 if ($self->preface =~
621 qr/(?:^|\s)(?:$FOLD_CHAR|$LIT_CHAR_RX)(?:-|\+)?(\d*)\s*$/
623 $self->die('YAML_PARSE_ERR_ZERO_INDENT')
624 if length($1) and $1 == 0;
627 $self->offset->[$level + 1] = $offset + $1;
630 # First get rid of any comments.
631 while (@{$self->lines} && ($self->lines->[0] =~ /^\s*#/)) {
632 $self->lines->[0] =~ /^( *)/ or die;
633 last unless length($1) <= $offset;
634 shift @{$self->lines};
637 $self->eos($self->{done} = not @{$self->lines});
638 return if $self->eos;
639 if ($self->lines->[0] =~ /^( *)\S/ and length($1) > $offset) {
640 $self->offset->[$level+1] = length($1);
643 $self->offset->[$level+1] = $offset + 1;
646 $offset = $self->offset->[++$level];
648 # Determine the offset for a new collection level
649 elsif ($type == COLLECTION and
650 $self->preface =~ /^(\s*(\!\S*|\&\S+))*\s*$/) {
651 $self->_parse_throwaway_comments();
653 $self->offset->[$level+1] = $offset + 1;
657 $self->lines->[0] =~ /^( *)\S/ or die;
658 if (length($1) > $offset) {
659 $self->offset->[$level+1] = length($1);
662 $self->offset->[$level+1] = $offset + 1;
665 $offset = $self->offset->[++$level];
669 while (@{$self->lines} and
670 $self->lines->[0] =~ m{^( *)(\#)} and
673 shift @{$self->lines};
676 $self->eos($self->{done} = not @{$self->lines});
679 $self->_parse_throwaway_comments();
681 return if $self->eos;
683 if ($self->lines->[0] =~ /^---(\s|$)/) {
687 if ($type == LEAF and
688 $self->lines->[0] =~ /^ {$offset}(.*)$/
690 $self->indent($offset);
693 elsif ($self->lines->[0] =~ /^\s*$/) {
694 $self->indent($offset);
698 $self->lines->[0] =~ /^( *)(\S.*)$/;
699 while ($self->offset->[$level] > length($1)) {
702 $self->die('YAML_PARSE_ERR_INCONSISTENT_INDENTATION')
703 if $self->offset->[$level] != length($1);
704 $self->indent(length($1));
707 $self->die('YAML_PARSE_ERR_INDENTATION')
708 if $self->indent - $offset > 1;
711 #==============================================================================
712 # Utility subroutines.
713 #==============================================================================
715 # Printable characters for escapes
718 z => "\x00", a => "\x07", t => "\x09",
719 n => "\x0a", v => "\x0b", f => "\x0c",
720 r => "\x0d", e => "\x1b", '\\' => '\\',
723 # Transform all the backslash style escape characters to their literal meaning
727 $node =~ s/\\([never\\fartz]|x([0-9a-fA-F]{2}))/
728 (length($1)>1)?pack("H2",$2):$unescapes{$1}/gex;
738 YAML::Loader - YAML class for loading Perl objects to YAML
743 my $loader = YAML::Loader->new;
744 my $hash = $loader->load(<<'...');
750 YAML::Loader is the module that YAML.pm used to deserialize YAML to Perl
751 objects. It is fully object oriented and usable on its own.
755 Ingy döt Net <ingy@cpan.org>
759 Copyright (c) 2006. Ingy döt Net. All rights reserved.
761 This program is free software; you can redistribute it and/or modify it
762 under the same terms as Perl itself.
764 See L<http://www.perl.com/perl/misc/Artistic.html>