4 extends 'YAML::Loader::Base';
6 use YAML::Loader::Base;
10 use constant LEAF => 1;
11 use constant COLLECTION => 2;
12 use constant VALUE => "\x07YAML\x07VALUE\x07";
13 use constant COMMENT => "\x07YAML\x07COMMENT\x07";
15 # Common YAML character sets
16 my $ESCAPE_CHAR = '[\\x00-\\x08\\x0b-\\x0d\\x0e-\\x1f]';
19 my $LIT_CHAR_RX = "\\$LIT_CHAR";
23 $self->stream($_[0] || '');
24 return $self->_parse();
27 # Top level function for parsing. Parse each document in order and
28 # handle processing for YAML headers.
31 my (%directives, $preface);
32 $self->{stream} =~ s|\015\012|\012|g;
33 $self->{stream} =~ s|\015|\012|g;
35 $self->die('YAML_PARSE_ERR_BAD_CHARS')
36 if $self->stream =~ /$ESCAPE_CHAR/;
37 $self->die('YAML_PARSE_ERR_NO_FINAL_NEWLINE')
38 if length($self->stream) and
39 $self->{stream} !~ s/(.)\n\Z/$1/s;
40 $self->lines([split /\x0a/, $self->stream, -1]);
42 # Throw away any comments or blanks before the header (or start of
43 # content for headerless streams)
44 $self->_parse_throwaway_comments();
47 # Add an "assumed" header if there is no header and the stream is
48 # not empty (after initial throwaways).
50 if ($self->lines->[0] !~ /^---(\s|$)/) {
51 unshift @{$self->lines}, '---';
56 # Main Loop. Parse out all the top level nodes and return them.
57 while (not $self->eos) {
58 $self->anchor2node({});
62 $self->offset->[0] = -1;
64 if ($self->lines->[0] =~ /^---\s*(.*)$/) {
65 my @words = split /\s+/, $1;
67 while (@words && $words[0] =~ /^#(\w+):(\S.*)$/) {
68 my ($key, $value) = ($1, $2);
70 if (defined $directives{$key}) {
71 $self->warn('YAML_PARSE_WARN_MULTIPLE_DIRECTIVES',
72 $key, $self->document);
75 $directives{$key} = $value;
77 $self->preface(join ' ', @words);
80 $self->die('YAML_PARSE_ERR_NO_SEPARATOR');
83 if (not $self->done) {
84 $self->_parse_next_line(COLLECTION);
91 $directives{YAML} ||= '1.0';
92 $directives{TAB} ||= 'NONE';
93 ($self->{major_version}, $self->{minor_version}) =
94 split /\./, $directives{YAML}, 2;
95 $self->die('YAML_PARSE_ERR_BAD_MAJOR_VERSION', $directives{YAML})
96 if $self->major_version ne '1';
97 $self->warn('YAML_PARSE_WARN_BAD_MINOR_VERSION', $directives{YAML})
98 if $self->minor_version ne '0';
99 $self->die('Unrecognized TAB policy')
100 unless $directives{TAB} =~ /^(NONE|\d+)(:HARD)?$/;
102 push @{$self->documents}, $self->_parse_node();
104 return wantarray ? @{$self->documents} : $self->documents->[-1];
107 # This function is the dispatcher for parsing each node. Every node
108 # recurses back through here. (Inlines are an exception as they have
109 # their own sub-parser.)
112 my $preface = $self->preface;
114 my ($node, $type, $indicator, $escape, $chomp) = ('') x 5;
115 my ($anchor, $alias, $explicit, $implicit, $class) = ('') x 5;
116 ($anchor, $alias, $explicit, $implicit, $preface) =
117 $self->_parse_qualifiers($preface);
119 $self->anchor2node->{$anchor} = CORE::bless [], 'YAML-anchor2node';
122 while (length $preface) {
123 my $line = $self->line - 1;
124 if ($preface =~ s/^($FOLD_CHAR|$LIT_CHAR_RX)(-|\+)?\d*\s*//) {
126 $chomp = $2 if defined($2);
129 $self->die('YAML_PARSE_ERR_TEXT_AFTER_INDICATOR') if $indicator;
130 $self->inline($preface);
135 $self->die('YAML_PARSE_ERR_NO_ANCHOR', $alias)
136 unless defined $self->anchor2node->{$alias};
137 if (ref($self->anchor2node->{$alias}) ne 'YAML-anchor2node') {
138 $node = $self->anchor2node->{$alias};
141 $node = do {my $sv = "*$alias"};
142 push @{$self->anchor2node->{$alias}}, [\$node, $self->line];
145 elsif (length $self->inline) {
146 $node = $self->_parse_inline(1, $implicit, $explicit);
147 if (length $self->inline) {
148 $self->die('YAML_PARSE_ERR_SINGLE_LINE');
151 elsif ($indicator eq $LIT_CHAR) {
153 $node = $self->_parse_block($chomp);
154 $node = $self->_parse_implicit($node) if $implicit;
157 elsif ($indicator eq $FOLD_CHAR) {
159 $node = $self->_parse_unfold($chomp);
160 $node = $self->_parse_implicit($node) if $implicit;
165 $self->offset->[$self->level] ||= 0;
166 if ($self->indent == $self->offset->[$self->level]) {
167 if ($self->content =~ /^-( |$)/) {
168 $node = $self->_parse_seq($anchor);
170 elsif ($self->content =~ /(^\?|\:( |$))/) {
171 $node = $self->_parse_mapping($anchor);
173 elsif ($preface =~ /^\s*$/) {
174 $node = $self->_parse_implicit('');
177 $self->die('YAML_PARSE_ERR_BAD_NODE');
185 $#{$self->offset} = $self->level;
194 CORE::bless $node, $class;
197 $node = $self->_parse_explicit($node, $explicit);
201 if (ref($self->anchor2node->{$anchor}) eq 'YAML-anchor2node') {
202 # XXX Can't remember what this code actually does
203 for my $ref (@{$self->anchor2node->{$anchor}}) {
204 ${$ref->[0]} = $node;
205 $self->warn('YAML_LOAD_WARN_UNRESOLVED_ALIAS',
209 $self->anchor2node->{$anchor} = $node;
214 # Preprocess the qualifiers that may be attached to any node.
215 sub _parse_qualifiers {
218 my ($anchor, $alias, $explicit, $implicit, $token) = ('') x 5;
220 while ($preface =~ /^[&*!]/) {
221 my $line = $self->line - 1;
222 if ($preface =~ s/^\!(\S+)\s*//) {
223 $self->die('YAML_PARSE_ERR_MANY_EXPLICIT') if $explicit;
226 elsif ($preface =~ s/^\!\s*//) {
227 $self->die('YAML_PARSE_ERR_MANY_IMPLICIT') if $implicit;
230 elsif ($preface =~ s/^\&([^ ,:]+)\s*//) {
232 $self->die('YAML_PARSE_ERR_BAD_ANCHOR')
233 unless $token =~ /^[a-zA-Z0-9]+$/;
234 $self->die('YAML_PARSE_ERR_MANY_ANCHOR') if $anchor;
235 $self->die('YAML_PARSE_ERR_ANCHOR_ALIAS') if $alias;
238 elsif ($preface =~ s/^\*([^ ,:]+)\s*//) {
240 $self->die('YAML_PARSE_ERR_BAD_ALIAS')
241 unless $token =~ /^[a-zA-Z0-9]+$/;
242 $self->die('YAML_PARSE_ERR_MANY_ALIAS') if $alias;
243 $self->die('YAML_PARSE_ERR_ANCHOR_ALIAS') if $anchor;
247 return ($anchor, $alias, $explicit, $implicit, $preface);
250 # Morph a node to it's explicit type
251 sub _parse_explicit {
253 my ($node, $explicit) = @_;
255 if ($explicit =~ /^\!?perl\/(hash|array|ref|scalar)(?:\:(\w(\w|\:\:)*)?)?$/) {
256 ($type, $class) = (($1 || ''), ($2 || ''));
258 # FIXME # die unless uc($type) eq ref($node) ?
260 if ( $type eq "ref" ) {
261 $self->die('YAML_LOAD_ERR_NO_DEFAULT_VALUE', 'XXX', $explicit)
262 unless exists $node->{VALUE()} and scalar(keys %$node) == 1;
264 my $value = $node->{VALUE()};
268 if ( $type eq "scalar" and length($class) and !ref($node) ) {
273 if ( length($class) ) {
274 CORE::bless($node, $class);
279 if ($explicit =~ m{^!?perl/(glob|regexp|code)(?:\:(\w(\w|\:\:)*)?)?$}) {
280 ($type, $class) = (($1 || ''), ($2 || ''));
281 my $type_class = "YAML::Type::$type";
283 if ($type_class->can('yaml_load')) {
284 return $type_class->yaml_load($node, $class, $self);
287 $self->die('YAML_LOAD_ERR_NO_CONVERT', 'XXX', $explicit);
290 # This !perl/@Foo and !perl/$Foo are deprecated but still parsed
291 elsif ($YAML::TagClass->{$explicit} ||
292 $explicit =~ m{^perl/(\@|\$)?([a-zA-Z](\w|::)+)$}
294 $class = $YAML::TagClass->{$explicit} || $2;
295 if ($class->can('yaml_load')) {
297 return $class->yaml_load(YAML::Node->new($node, $explicit));
301 return CORE::bless $node, $class;
304 return CORE::bless \$node, $class;
310 return YAML::Node->new($node, $explicit);
313 # XXX This is likely wrong. Failing test:
314 # --- !unknown 'scalar value'
319 # Parse a YAML mapping into a Perl hash
324 $self->anchor2node->{$anchor} = $mapping;
326 while (not $self->done and $self->indent == $self->offset->[$self->level]) {
328 if ($self->{content} =~ s/^\?\s*//) {
329 $self->preface($self->content);
330 $self->_parse_next_line(COLLECTION);
331 $key = $self->_parse_node();
334 # If "default" key (equals sign)
335 elsif ($self->{content} =~ s/^\=\s*//) {
338 # If "comment" key (slash slash)
339 elsif ($self->{content} =~ s/^\=\s*//) {
342 # Regular scalar key:
344 $self->inline($self->content);
345 $key = $self->_parse_inline();
347 $self->content($self->inline);
351 unless ($self->{content} =~ s/^:\s*//) {
352 $self->die('YAML_LOAD_ERR_BAD_MAP_ELEMENT');
354 $self->preface($self->content);
355 my $line = $self->line;
356 $self->_parse_next_line(COLLECTION);
357 my $value = $self->_parse_node();
358 if (exists $mapping->{$key}) {
359 $self->warn('YAML_LOAD_WARN_DUPLICATE_KEY');
362 $mapping->{$key} = $value;
368 # Parse a YAML sequence into a Perl array
373 $self->anchor2node->{$anchor} = $seq;
374 while (not $self->done and $self->indent == $self->offset->[$self->level]) {
375 if ($self->content =~ /^-(?: (.*))?$/) {
376 $self->preface(defined($1) ? $1 : '');
379 $self->die('YAML_LOAD_ERR_BAD_SEQ_ELEMENT');
381 if ($self->preface =~ /^(\s*)(\w.*\:(?: |$).*)$/) {
382 $self->indent($self->offset->[$self->level] + 2 + length($1));
384 $self->level($self->level + 1);
385 $self->offset->[$self->level] = $self->indent;
387 push @$seq, $self->_parse_mapping('');
389 $#{$self->offset} = $self->level;
392 $self->_parse_next_line(COLLECTION);
393 push @$seq, $self->_parse_node();
399 # Parse an inline value. Since YAML supports inline collections, this is
400 # the top level of a sub parsing.
403 my ($top, $top_implicit, $top_explicit) = (@_, '', '', '');
404 $self->{inline} =~ s/^\s*(.*)\s*$/$1/; # OUCH - mugwump
405 my ($node, $anchor, $alias, $explicit, $implicit) = ('') x 5;
406 ($anchor, $alias, $explicit, $implicit, $self->{inline}) =
407 $self->_parse_qualifiers($self->inline);
409 $self->anchor2node->{$anchor} = CORE::bless [], 'YAML-anchor2node';
411 $implicit ||= $top_implicit;
412 $explicit ||= $top_explicit;
413 ($top_implicit, $top_explicit) = ('', '');
415 $self->die('YAML_PARSE_ERR_NO_ANCHOR', $alias)
416 unless defined $self->anchor2node->{$alias};
417 if (ref($self->anchor2node->{$alias}) ne 'YAML-anchor2node') {
418 $node = $self->anchor2node->{$alias};
421 $node = do {my $sv = "*$alias"};
422 push @{$self->anchor2node->{$alias}}, [\$node, $self->line];
425 elsif ($self->inline =~ /^\{/) {
426 $node = $self->_parse_inline_mapping($anchor);
428 elsif ($self->inline =~ /^\[/) {
429 $node = $self->_parse_inline_seq($anchor);
431 elsif ($self->inline =~ /^"/) {
432 $node = $self->_parse_inline_double_quoted();
433 $node = $self->_unescape($node);
434 $node = $self->_parse_implicit($node) if $implicit;
436 elsif ($self->inline =~ /^'/) {
437 $node = $self->_parse_inline_single_quoted();
438 $node = $self->_parse_implicit($node) if $implicit;
442 $node = $self->inline;
446 $node = $self->_parse_inline_simple();
448 $node = $self->_parse_implicit($node) unless $explicit;
451 $node = $self->_parse_explicit($node, $explicit);
454 if (ref($self->anchor2node->{$anchor}) eq 'YAML-anchor2node') {
455 for my $ref (@{$self->anchor2node->{$anchor}}) {
456 ${$ref->[0]} = $node;
457 $self->warn('YAML_LOAD_WARN_UNRESOLVED_ALIAS',
461 $self->anchor2node->{$anchor} = $node;
466 # Parse the inline YAML mapping into a Perl hash
467 sub _parse_inline_mapping {
471 $self->anchor2node->{$anchor} = $node;
473 $self->die('YAML_PARSE_ERR_INLINE_MAP')
474 unless $self->{inline} =~ s/^\{\s*//;
475 while (not $self->{inline} =~ s/^\s*\}//) {
476 my $key = $self->_parse_inline();
477 $self->die('YAML_PARSE_ERR_INLINE_MAP')
478 unless $self->{inline} =~ s/^\: \s*//;
479 my $value = $self->_parse_inline();
480 if (exists $node->{$key}) {
481 $self->warn('YAML_LOAD_WARN_DUPLICATE_KEY');
484 $node->{$key} = $value;
486 next if $self->inline =~ /^\s*\}/;
487 $self->die('YAML_PARSE_ERR_INLINE_MAP')
488 unless $self->{inline} =~ s/^\,\s*//;
493 # Parse the inline YAML sequence into a Perl array
494 sub _parse_inline_seq {
498 $self->anchor2node->{$anchor} = $node;
500 $self->die('YAML_PARSE_ERR_INLINE_SEQUENCE')
501 unless $self->{inline} =~ s/^\[\s*//;
502 while (not $self->{inline} =~ s/^\s*\]//) {
503 my $value = $self->_parse_inline();
505 next if $self->inline =~ /^\s*\]/;
506 $self->die('YAML_PARSE_ERR_INLINE_SEQUENCE')
507 unless $self->{inline} =~ s/^\,\s*//;
512 # Parse the inline double quoted string.
513 sub _parse_inline_double_quoted {
516 # https://rt.cpan.org/Public/Bug/Display.html?id=90593
517 if ($self->inline =~ /^"((?:(?:\\"|[^"]){0,32766}){0,32766})"\s*(.*)$/) {
523 $self->die('YAML_PARSE_ERR_BAD_DOUBLE');
529 # Parse the inline single quoted string.
530 sub _parse_inline_single_quoted {
533 if ($self->inline =~ /^'((?:(?:''|[^']){0,32766}){0,32766})'\s*(.*)$/) {
539 $self->die('YAML_PARSE_ERR_BAD_SINGLE');
544 # Parse the inline unquoted string and do implicit typing.
545 sub _parse_inline_simple {
548 if ($self->inline =~ /^(|[^!@#%^&*].*?)(?=[\[\]\{\},]|, |: |- |:\s*$|$)/) {
550 substr($self->{inline}, 0, length($1)) = '';
553 $self->die('YAML_PARSE_ERR_BAD_INLINE_IMPLICIT', $value);
558 sub _parse_implicit {
562 return $value if $value eq '';
563 return undef if $value =~ /^~$/;
565 unless $value =~ /^[\@\`]/ or
566 $value =~ /^[\-\?]\s/;
567 $self->die('YAML_PARSE_ERR_BAD_IMPLICIT', $value);
570 # Unfold a YAML multiline scalar into a single string.
576 while (not $self->done and $self->indent == $self->offset->[$self->level]) {
577 $node .= $self->content. "\n";
578 $self->_parse_next_line(LEAF);
580 $node =~ s/^(\S.*)\n(?=\S)/$1 /gm;
581 $node =~ s/^(\S.*)\n(\n+\S)/$1$2/gm;
582 $node =~ s/\n*\Z// unless $chomp eq '+';
583 $node .= "\n" unless $chomp;
587 # Parse a YAML block style scalar. This is like a Perl here-document.
592 while (not $self->done and $self->indent == $self->offset->[$self->level]) {
593 $node .= $self->content . "\n";
594 $self->_parse_next_line(LEAF);
596 return $node if '+' eq $chomp;
597 $node =~ s/\n*\Z/\n/;
598 $node =~ s/\n\Z// if $chomp eq '-';
602 # Handle Perl style '#' comments. Comments must be at the same indentation
603 # level as the collection line following them.
604 sub _parse_throwaway_comments {
606 while (@{$self->lines} and
607 $self->lines->[0] =~ m{^\s*(\#|$)}
609 shift @{$self->lines};
612 $self->eos($self->{done} = not @{$self->lines});
615 # This is the routine that controls what line is being parsed. It gets called
616 # once for each line in the YAML stream.
619 # 1) Skip past the current line
620 # 2) Determine the indentation offset for a new level
621 # 3) Find the next _content_ line
622 # A) Skip over any throwaways (Comments/blanks)
623 # B) Set $self->indent, $self->content, $self->line
624 # 4) Expand tabs appropriately
625 sub _parse_next_line {
628 my $level = $self->level;
629 my $offset = $self->offset->[$level];
630 $self->die('YAML_EMIT_ERR_BAD_LEVEL') unless defined $offset;
631 shift @{$self->lines};
632 $self->eos($self->{done} = not @{$self->lines});
633 return if $self->eos;
636 # Determine the offset for a new leaf node
637 if ($self->preface =~
638 qr/(?:^|\s)(?:$FOLD_CHAR|$LIT_CHAR_RX)(?:-|\+)?(\d*)\s*$/
640 $self->die('YAML_PARSE_ERR_ZERO_INDENT')
641 if length($1) and $1 == 0;
644 $self->offset->[$level + 1] = $offset + $1;
647 # First get rid of any comments.
648 while (@{$self->lines} && ($self->lines->[0] =~ /^\s*#/)) {
649 $self->lines->[0] =~ /^( *)/;
650 last unless length($1) <= $offset;
651 shift @{$self->lines};
654 $self->eos($self->{done} = not @{$self->lines});
655 return if $self->eos;
656 if ($self->lines->[0] =~ /^( *)\S/ and length($1) > $offset) {
657 $self->offset->[$level+1] = length($1);
660 $self->offset->[$level+1] = $offset + 1;
663 $offset = $self->offset->[++$level];
665 # Determine the offset for a new collection level
666 elsif ($type == COLLECTION and
667 $self->preface =~ /^(\s*(\!\S*|\&\S+))*\s*$/) {
668 $self->_parse_throwaway_comments();
670 $self->offset->[$level+1] = $offset + 1;
674 $self->lines->[0] =~ /^( *)\S/ or
675 $self->die('YAML_PARSE_ERR_NONSPACE_INDENTATION');
676 if (length($1) > $offset) {
677 $self->offset->[$level+1] = length($1);
680 $self->offset->[$level+1] = $offset + 1;
683 $offset = $self->offset->[++$level];
687 while (@{$self->lines} and
688 $self->lines->[0] =~ m{^( *)(\#)} and
691 shift @{$self->lines};
694 $self->eos($self->{done} = not @{$self->lines});
697 $self->_parse_throwaway_comments();
699 return if $self->eos;
701 if ($self->lines->[0] =~ /^---(\s|$)/) {
705 if ($type == LEAF and
706 $self->lines->[0] =~ /^ {$offset}(.*)$/
708 $self->indent($offset);
711 elsif ($self->lines->[0] =~ /^\s*$/) {
712 $self->indent($offset);
716 $self->lines->[0] =~ /^( *)(\S.*)$/;
717 while ($self->offset->[$level] > length($1)) {
720 $self->die('YAML_PARSE_ERR_INCONSISTENT_INDENTATION')
721 if $self->offset->[$level] != length($1);
722 $self->indent(length($1));
725 $self->die('YAML_PARSE_ERR_INDENTATION')
726 if $self->indent - $offset > 1;
729 #==============================================================================
730 # Utility subroutines.
731 #==============================================================================
733 # Printable characters for escapes
739 'v' => "\x0b", # Potential v-string error on 5.6.2 if not quoted
746 # Transform all the backslash style escape characters to their literal meaning
750 $node =~ s/\\([never\\fart0]|x([0-9a-fA-F]{2}))/
751 (length($1)>1)?pack("H2",$2):$unescapes{$1}/gex;