2 use strict; use warnings;
4 use base 'YAML::Loader::Base';
8 use constant LEAF => 1;
9 use constant COLLECTION => 2;
10 use constant VALUE => "\x07YAML\x07VALUE\x07";
11 use constant COMMENT => "\x07YAML\x07COMMENT\x07";
13 # Common YAML character sets
14 my $ESCAPE_CHAR = '[\\x00-\\x08\\x0b-\\x0d\\x0e-\\x1f]';
17 my $LIT_CHAR_RX = "\\$LIT_CHAR";
21 $self->stream($_[0] || '');
22 return $self->_parse();
25 # Top level function for parsing. Parse each document in order and
26 # handle processing for YAML headers.
29 my (%directives, $preface);
30 $self->{stream} =~ s|\015\012|\012|g;
31 $self->{stream} =~ s|\015|\012|g;
33 $self->die('YAML_PARSE_ERR_BAD_CHARS')
34 if $self->stream =~ /$ESCAPE_CHAR/;
35 # $self->die('YAML_PARSE_ERR_NO_FINAL_NEWLINE')
36 $self->{stream} .= "\n"
37 if length($self->stream) and
38 $self->{stream} !~ s/(.)\n\Z/$1/s;
39 $self->lines([split /\x0a/, $self->stream, -1]);
41 # Throw away any comments or blanks before the header (or start of
42 # content for headerless streams)
43 $self->_parse_throwaway_comments();
46 # Add an "assumed" header if there is no header and the stream is
47 # not empty (after initial throwaways).
49 if ($self->lines->[0] !~ /^---(\s|$)/) {
50 unshift @{$self->lines}, '---';
55 # Main Loop. Parse out all the top level nodes and return them.
56 while (not $self->eos) {
57 $self->anchor2node({});
61 $self->offset->[0] = -1;
63 if ($self->lines->[0] =~ /^---\s*(.*)$/) {
64 my @words = split /\s+/, $1;
66 while (@words && $words[0] =~ /^#(\w+):(\S.*)$/) {
67 my ($key, $value) = ($1, $2);
69 if (defined $directives{$key}) {
70 $self->warn('YAML_PARSE_WARN_MULTIPLE_DIRECTIVES',
71 $key, $self->document);
74 $directives{$key} = $value;
76 $self->preface(join ' ', @words);
79 $self->die('YAML_PARSE_ERR_NO_SEPARATOR');
82 if (not $self->done) {
83 $self->_parse_next_line(COLLECTION);
90 $directives{YAML} ||= '1.0';
91 $directives{TAB} ||= 'NONE';
92 ($self->{major_version}, $self->{minor_version}) =
93 split /\./, $directives{YAML}, 2;
94 $self->die('YAML_PARSE_ERR_BAD_MAJOR_VERSION', $directives{YAML})
95 if $self->major_version ne '1';
96 $self->warn('YAML_PARSE_WARN_BAD_MINOR_VERSION', $directives{YAML})
97 if $self->minor_version ne '0';
98 $self->die('Unrecognized TAB policy')
99 unless $directives{TAB} =~ /^(NONE|\d+)(:HARD)?$/;
101 push @{$self->documents}, $self->_parse_node();
103 return wantarray ? @{$self->documents} : $self->documents->[-1];
106 # This function is the dispatcher for parsing each node. Every node
107 # recurses back through here. (Inlines are an exception as they have
108 # their own sub-parser.)
111 my $preface = $self->preface;
113 my ($node, $type, $indicator, $escape, $chomp) = ('') x 5;
114 my ($anchor, $alias, $explicit, $implicit, $class) = ('') x 5;
115 ($anchor, $alias, $explicit, $implicit, $preface) =
116 $self->_parse_qualifiers($preface);
118 $self->anchor2node->{$anchor} = CORE::bless [], 'YAML-anchor2node';
121 while (length $preface) {
122 my $line = $self->line - 1;
123 if ($preface =~ s/^($FOLD_CHAR|$LIT_CHAR_RX)(-|\+)?\d*\s*//) {
125 $chomp = $2 if defined($2);
128 $self->die('YAML_PARSE_ERR_TEXT_AFTER_INDICATOR') if $indicator;
129 $self->inline($preface);
134 $self->die('YAML_PARSE_ERR_NO_ANCHOR', $alias)
135 unless defined $self->anchor2node->{$alias};
136 if (ref($self->anchor2node->{$alias}) ne 'YAML-anchor2node') {
137 $node = $self->anchor2node->{$alias};
140 $node = do {my $sv = "*$alias"};
141 push @{$self->anchor2node->{$alias}}, [\$node, $self->line];
144 elsif (length $self->inline) {
145 $node = $self->_parse_inline(1, $implicit, $explicit);
146 if (length $self->inline) {
147 $self->die('YAML_PARSE_ERR_SINGLE_LINE');
150 elsif ($indicator eq $LIT_CHAR) {
152 $node = $self->_parse_block($chomp);
153 $node = $self->_parse_implicit($node) if $implicit;
156 elsif ($indicator eq $FOLD_CHAR) {
158 $node = $self->_parse_unfold($chomp);
159 $node = $self->_parse_implicit($node) if $implicit;
164 $self->offset->[$self->level] ||= 0;
165 if ($self->indent == $self->offset->[$self->level]) {
166 if ($self->content =~ /^-( |$)/) {
167 $node = $self->_parse_seq($anchor);
169 elsif ($self->content =~ /(^\?|\:( |$))/) {
170 $node = $self->_parse_mapping($anchor);
172 elsif ($preface =~ /^\s*$/) {
173 $node = $self->_parse_implicit('');
176 $self->die('YAML_PARSE_ERR_BAD_NODE');
184 $#{$self->offset} = $self->level;
193 CORE::bless $node, $class;
196 $node = $self->_parse_explicit($node, $explicit);
200 if (ref($self->anchor2node->{$anchor}) eq 'YAML-anchor2node') {
201 # XXX Can't remember what this code actually does
202 for my $ref (@{$self->anchor2node->{$anchor}}) {
203 ${$ref->[0]} = $node;
204 $self->warn('YAML_LOAD_WARN_UNRESOLVED_ALIAS',
208 $self->anchor2node->{$anchor} = $node;
213 # Preprocess the qualifiers that may be attached to any node.
214 sub _parse_qualifiers {
217 my ($anchor, $alias, $explicit, $implicit, $token) = ('') x 5;
219 while ($preface =~ /^[&*!]/) {
220 my $line = $self->line - 1;
221 if ($preface =~ s/^\!(\S+)\s*//) {
222 $self->die('YAML_PARSE_ERR_MANY_EXPLICIT') if $explicit;
225 elsif ($preface =~ s/^\!\s*//) {
226 $self->die('YAML_PARSE_ERR_MANY_IMPLICIT') if $implicit;
229 elsif ($preface =~ s/^\&([^ ,:]+)\s*//) {
231 $self->die('YAML_PARSE_ERR_BAD_ANCHOR')
232 unless $token =~ /^[a-zA-Z0-9]+$/;
233 $self->die('YAML_PARSE_ERR_MANY_ANCHOR') if $anchor;
234 $self->die('YAML_PARSE_ERR_ANCHOR_ALIAS') if $alias;
237 elsif ($preface =~ s/^\*([^ ,:]+)\s*//) {
239 $self->die('YAML_PARSE_ERR_BAD_ALIAS')
240 unless $token =~ /^[a-zA-Z0-9]+$/;
241 $self->die('YAML_PARSE_ERR_MANY_ALIAS') if $alias;
242 $self->die('YAML_PARSE_ERR_ANCHOR_ALIAS') if $anchor;
246 return ($anchor, $alias, $explicit, $implicit, $preface);
249 # Morph a node to it's explicit type
250 sub _parse_explicit {
252 my ($node, $explicit) = @_;
254 if ($explicit =~ /^\!perl\/(hash|array|scalar)\:(\w(\w|\:\:)*)?$/) {
255 ($type, $class) = (($1 || ''), ($2 || ''));
257 return CORE::bless $node, $class;
260 return CORE::bless \$node, $class;
264 /^\!?perl\/(undef|glob|regexp|code|ref)\:(\w(\w|\:\:)*)?$/) {
265 ($type, $class) = (($1 || ''), ($2 || ''));
266 my $type_class = "YAML::Type::$type";
268 if ($type_class->can('yaml_load')) {
269 return $type_class->yaml_load($node, $class, $self);
272 $self->die('YAML_LOAD_ERR_NO_CONVERT', 'XXX', $explicit);
275 # This !perl/@Foo and !perl/$Foo are deprecated but still parsed
276 elsif ($YAML::TagClass->{$explicit} ||
277 $explicit =~ m{^perl/(\@|\$)?([a-zA-Z](\w|::)+)$}
279 $class = $YAML::TagClass->{$explicit} || $2;
280 if ($class->can('yaml_load')) {
282 return $class->yaml_load(YAML::Node->new($node, $explicit));
286 return CORE::bless $node, $class;
289 return CORE::bless \$node, $class;
295 return YAML::Node->new($node, $explicit);
298 # XXX This is likely wrong. Failing test:
299 # --- !unknown 'scalar value'
304 # Parse a YAML mapping into a Perl hash
309 $self->anchor2node->{$anchor} = $mapping;
311 while (not $self->done and $self->indent == $self->offset->[$self->level]) {
313 if ($self->{content} =~ s/^\?\s*//) {
314 $self->preface($self->content);
315 $self->_parse_next_line(COLLECTION);
316 $key = $self->_parse_node();
319 # If "default" key (equals sign)
320 elsif ($self->{content} =~ s/^\=\s*//) {
323 # If "comment" key (slash slash)
324 elsif ($self->{content} =~ s/^\=\s*//) {
327 # Regular scalar key:
329 $self->inline($self->content);
330 $key = $self->_parse_inline();
332 $self->content($self->inline);
336 unless ($self->{content} =~ s/^:\s*//) {
337 $self->die('YAML_LOAD_ERR_BAD_MAP_ELEMENT');
339 $self->preface($self->content);
340 my $line = $self->line;
341 $self->_parse_next_line(COLLECTION);
342 my $value = $self->_parse_node();
343 if (exists $mapping->{$key}) {
344 $self->warn('YAML_LOAD_WARN_DUPLICATE_KEY');
347 $mapping->{$key} = $value;
353 # Parse a YAML sequence into a Perl array
358 $self->anchor2node->{$anchor} = $seq;
359 while (not $self->done and $self->indent == $self->offset->[$self->level]) {
360 if ($self->content =~ /^-(?: (.*))?$/) {
361 $self->preface(defined($1) ? $1 : '');
364 $self->die('YAML_LOAD_ERR_BAD_SEQ_ELEMENT');
366 if ($self->preface =~ /^(\s*)(\w.*\:(?: |$).*)$/) {
367 $self->indent($self->offset->[$self->level] + 2 + length($1));
369 $self->level($self->level + 1);
370 $self->offset->[$self->level] = $self->indent;
372 push @$seq, $self->_parse_mapping('');
374 $#{$self->offset} = $self->level;
377 $self->_parse_next_line(COLLECTION);
378 push @$seq, $self->_parse_node();
384 # Parse an inline value. Since YAML supports inline collections, this is
385 # the top level of a sub parsing.
388 my ($top, $top_implicit, $top_explicit) = (@_, '', '', '');
389 $self->{inline} =~ s/^\s*(.*)\s*$/$1/; # OUCH - mugwump
390 my ($node, $anchor, $alias, $explicit, $implicit) = ('') x 5;
391 ($anchor, $alias, $explicit, $implicit, $self->{inline}) =
392 $self->_parse_qualifiers($self->inline);
394 $self->anchor2node->{$anchor} = CORE::bless [], 'YAML-anchor2node';
396 $implicit ||= $top_implicit;
397 $explicit ||= $top_explicit;
398 ($top_implicit, $top_explicit) = ('', '');
400 $self->die('YAML_PARSE_ERR_NO_ANCHOR', $alias)
401 unless defined $self->anchor2node->{$alias};
402 if (ref($self->anchor2node->{$alias}) ne 'YAML-anchor2node') {
403 $node = $self->anchor2node->{$alias};
406 $node = do {my $sv = "*$alias"};
407 push @{$self->anchor2node->{$alias}}, [\$node, $self->line];
410 elsif ($self->inline =~ /^\{/) {
411 $node = $self->_parse_inline_mapping($anchor);
413 elsif ($self->inline =~ /^\[/) {
414 $node = $self->_parse_inline_seq($anchor);
416 elsif ($self->inline =~ /^"/) {
417 $node = $self->_parse_inline_double_quoted();
418 $node = $self->_unescape($node);
419 $node = $self->_parse_implicit($node) if $implicit;
421 elsif ($self->inline =~ /^'/) {
422 $node = $self->_parse_inline_single_quoted();
423 $node = $self->_parse_implicit($node) if $implicit;
427 $node = $self->inline;
431 $node = $self->_parse_inline_simple();
433 $node = $self->_parse_implicit($node) unless $explicit;
436 $node = $self->_parse_explicit($node, $explicit);
439 if (ref($self->anchor2node->{$anchor}) eq 'YAML-anchor2node') {
440 for my $ref (@{$self->anchor2node->{$anchor}}) {
441 ${$ref->[0]} = $node;
442 $self->warn('YAML_LOAD_WARN_UNRESOLVED_ALIAS',
446 $self->anchor2node->{$anchor} = $node;
451 # Parse the inline YAML mapping into a Perl hash
452 sub _parse_inline_mapping {
456 $self->anchor2node->{$anchor} = $node;
458 $self->die('YAML_PARSE_ERR_INLINE_MAP')
459 unless $self->{inline} =~ s/^\{\s*//;
460 while (not $self->{inline} =~ s/^\s*\}//) {
461 my $key = $self->_parse_inline();
462 $self->die('YAML_PARSE_ERR_INLINE_MAP')
463 unless $self->{inline} =~ s/^\: \s*//;
464 my $value = $self->_parse_inline();
465 if (exists $node->{$key}) {
466 $self->warn('YAML_LOAD_WARN_DUPLICATE_KEY');
469 $node->{$key} = $value;
471 next if $self->inline =~ /^\s*\}/;
472 $self->die('YAML_PARSE_ERR_INLINE_MAP')
473 unless $self->{inline} =~ s/^\,\s*//;
478 # Parse the inline YAML sequence into a Perl array
479 sub _parse_inline_seq {
483 $self->anchor2node->{$anchor} = $node;
485 $self->die('YAML_PARSE_ERR_INLINE_SEQUENCE')
486 unless $self->{inline} =~ s/^\[\s*//;
487 while (not $self->{inline} =~ s/^\s*\]//) {
488 my $value = $self->_parse_inline();
490 next if $self->inline =~ /^\s*\]/;
491 $self->die('YAML_PARSE_ERR_INLINE_SEQUENCE')
492 unless $self->{inline} =~ s/^\,\s*//;
497 # Parse the inline double quoted string.
498 sub _parse_inline_double_quoted {
501 if ($self->inline =~ /^"((?:\\"|[^"])*)"\s*(.*)$/) {
507 $self->die('YAML_PARSE_ERR_BAD_DOUBLE');
513 # Parse the inline single quoted string.
514 sub _parse_inline_single_quoted {
517 if ($self->inline =~ /^'((?:''|[^'])*)'\s*(.*)$/) {
523 $self->die('YAML_PARSE_ERR_BAD_SINGLE');
528 # Parse the inline unquoted string and do implicit typing.
529 sub _parse_inline_simple {
532 if ($self->inline =~ /^(|[^!@#%^&*].*?)(?=[\[\]\{\},]|, |: |- |:\s*$|$)/) {
534 substr($self->{inline}, 0, length($1)) = '';
537 $self->die('YAML_PARSE_ERR_BAD_INLINE_IMPLICIT', $value);
542 sub _parse_implicit {
546 return $value if $value eq '';
547 return undef if $value =~ /^~$/;
549 unless $value =~ /^[\@\`\^]/ or
550 $value =~ /^[\-\?]\s/;
551 $self->die('YAML_PARSE_ERR_BAD_IMPLICIT', $value);
554 # Unfold a YAML multiline scalar into a single string.
560 while (not $self->done and $self->indent == $self->offset->[$self->level]) {
561 $node .= $self->content. "\n";
562 $self->_parse_next_line(LEAF);
564 $node =~ s/^(\S.*)\n(?=\S)/$1 /gm;
565 $node =~ s/^(\S.*)\n(\n+\S)/$1$2/gm;
566 $node =~ s/\n*\Z// unless $chomp eq '+';
567 $node .= "\n" unless $chomp;
571 # Parse a YAML block style scalar. This is like a Perl here-document.
576 while (not $self->done and $self->indent == $self->offset->[$self->level]) {
577 $node .= $self->content . "\n";
578 $self->_parse_next_line(LEAF);
580 return $node if '+' eq $chomp;
581 $node =~ s/\n*\Z/\n/;
582 $node =~ s/\n\Z// if $chomp eq '-';
586 # Handle Perl style '#' comments. Comments must be at the same indentation
587 # level as the collection line following them.
588 sub _parse_throwaway_comments {
590 while (@{$self->lines} and
591 $self->lines->[0] =~ m{^\s*(\#|$)}
593 shift @{$self->lines};
596 $self->eos($self->{done} = not @{$self->lines});
599 # This is the routine that controls what line is being parsed. It gets called
600 # once for each line in the YAML stream.
603 # 1) Skip past the current line
604 # 2) Determine the indentation offset for a new level
605 # 3) Find the next _content_ line
606 # A) Skip over any throwaways (Comments/blanks)
607 # B) Set $self->indent, $self->content, $self->line
608 # 4) Expand tabs appropriately
609 sub _parse_next_line {
612 my $level = $self->level;
613 my $offset = $self->offset->[$level];
614 $self->die('YAML_EMIT_ERR_BAD_LEVEL') unless defined $offset;
615 shift @{$self->lines};
616 $self->eos($self->{done} = not @{$self->lines});
617 return if $self->eos;
620 # Determine the offset for a new leaf node
621 if ($self->preface =~
622 qr/(?:^|\s)(?:$FOLD_CHAR|$LIT_CHAR_RX)(?:-|\+)?(\d*)\s*$/
624 $self->die('YAML_PARSE_ERR_ZERO_INDENT')
625 if length($1) and $1 == 0;
628 $self->offset->[$level + 1] = $offset + $1;
631 # First get rid of any comments.
632 while (@{$self->lines} && ($self->lines->[0] =~ /^\s*#/)) {
633 $self->lines->[0] =~ /^( *)/ or die;
634 last unless length($1) <= $offset;
635 shift @{$self->lines};
638 $self->eos($self->{done} = not @{$self->lines});
639 return if $self->eos;
640 if ($self->lines->[0] =~ /^( *)\S/ and length($1) > $offset) {
641 $self->offset->[$level+1] = length($1);
644 $self->offset->[$level+1] = $offset + 1;
647 $offset = $self->offset->[++$level];
649 # Determine the offset for a new collection level
650 elsif ($type == COLLECTION and
651 $self->preface =~ /^(\s*(\!\S*|\&\S+))*\s*$/) {
652 $self->_parse_throwaway_comments();
654 $self->offset->[$level+1] = $offset + 1;
658 $self->lines->[0] =~ /^( *)\S/ or die;
659 if (length($1) > $offset) {
660 $self->offset->[$level+1] = length($1);
663 $self->offset->[$level+1] = $offset + 1;
666 $offset = $self->offset->[++$level];
670 while (@{$self->lines} and
671 $self->lines->[0] =~ m{^( *)(\#)} and
674 shift @{$self->lines};
677 $self->eos($self->{done} = not @{$self->lines});
680 $self->_parse_throwaway_comments();
682 return if $self->eos;
684 if ($self->lines->[0] =~ /^---(\s|$)/) {
688 if ($type == LEAF and
689 $self->lines->[0] =~ /^ {$offset}(.*)$/
691 $self->indent($offset);
694 elsif ($self->lines->[0] =~ /^\s*$/) {
695 $self->indent($offset);
699 $self->lines->[0] =~ /^( *)(\S.*)$/;
700 while ($self->offset->[$level] > length($1)) {
703 $self->die('YAML_PARSE_ERR_INCONSISTENT_INDENTATION')
704 if $self->offset->[$level] != length($1);
705 $self->indent(length($1));
708 $self->die('YAML_PARSE_ERR_INDENTATION')
709 if $self->indent - $offset > 1;
712 #==============================================================================
713 # Utility subroutines.
714 #==============================================================================
716 # Printable characters for escapes
719 z => "\x00", a => "\x07", t => "\x09",
720 n => "\x0a", v => "\x0b", f => "\x0c",
721 r => "\x0d", e => "\x1b", '\\' => '\\',
724 # Transform all the backslash style escape characters to their literal meaning
728 $node =~ s/\\([never\\fartz]|x([0-9a-fA-F]{2}))/
729 (length($1)>1)?pack("H2",$2):$unescapes{$1}/gex;
739 YAML::Loader - YAML class for loading Perl objects to YAML
744 my $loader = YAML::Loader->new;
745 my $hash = $loader->load(<<'...');
751 YAML::Loader is the module that YAML.pm used to deserialize YAML to Perl
752 objects. It is fully object oriented and usable on its own.
756 Ingy döt Net <ingy@cpan.org>
760 Copyright (c) 2006. Ingy döt Net. All rights reserved.
762 This program is free software; you can redistribute it and/or modify it
763 under the same terms as Perl itself.
765 See L<http://www.perl.com/perl/misc/Artistic.html>