scripts/locales.pl

   1 #!/usr/bin/perl
   2
   3 # -n do not include custom_ scripts
   4 # -v verbose mode, shows progress stuff
   5
   6 # this version of locles processes not only all required .pl files
   7 # but also all parse_html_templated files.
   8
   9 use utf8;
  10 use strict;
  11
  12 use Data::Dumper;
  13 use English;
  14 use FileHandle;
  15 use Getopt::Long;
  16 use List::Util qw(first);
  17 use POSIX;
  18 use Pod::Usage;
  19 use Carp;
  20 use File::Slurp qw(slurp);
  21
  22 $OUTPUT_AUTOFLUSH = 1;
  23
  24 my $opt_v  = 0;
  25 my $opt_n  = 0;
  26 my $opt_c  = 0;
  27 my $debug  = 0;
  28
  29 parse_args();
  30
  31 my $basedir      = "../..";
  32 my $locales_dir  = ".";
  33 my $bindir       = "$basedir/bin/mozilla";
  34 my @progdirs     = ( "$basedir/SL/Template/Plugin" );
  35 my $dbupdir      = "$basedir/sql/Pg-upgrade";
  36 my $dbupdir2     = "$basedir/sql/Pg-upgrade2";
  37 my $menufile     = "menu.ini";
  38 my $submitsearch = qr/type\s*=\s*[\"\']?submit/i;
  39
  40 my (%referenced_html_files, %locale, %htmllocale, %alllocales, %cached, %submit);
  41 my ($ALL_HEADER, $MISSING_HEADER, $LOST_HEADER);
  42
  43 init();
  44
  45 opendir DIR, "$bindir" or die "$!";
  46 my @progfiles = map { [ $_, $bindir ] } grep { /\.pl$/ && !/(_custom|^\.)/ } readdir DIR;
  47 seekdir DIR, 0;
  48 my @customfiles = grep /_custom/, readdir DIR;
  49 closedir DIR;
  50
  51 foreach my $dir (@progdirs) {
  52   opendir DIR, $dir or die "$!";
  53   push @progfiles, map { [ $_, $dir ] } grep { /\.pm$/ } readdir DIR;
  54   closedir DIR;
  55 }
  56
  57 # put customized files into @customfiles
  58 my @menufiles;
  59
  60 if ($opt_n) {
  61   @customfiles = ();
  62   @menufiles   = ($menufile);
  63 } else {
  64   opendir DIR, "$basedir" or die "$!";
  65   @menufiles = grep { /.*?_$menufile$/ } readdir DIR;
  66   closedir DIR;
  67   unshift @menufiles, $menufile;
  68 }
  69
  70 opendir DIR, $dbupdir or die "$!";
  71 my @dbplfiles = grep { /\.pl$/ } readdir DIR;
  72 closedir DIR;
  73
  74 opendir DIR, $dbupdir2 or die "$!";
  75 my @dbplfiles2 = grep { /\.pl$/ } readdir DIR;
  76 closedir DIR;
  77
  78 # slurp the translations in
  79 our $self    = {};
  80 our $missing = {};
  81 our @missing = ();
  82 our @lost    = ();
  83
  84 if (-f "$locales_dir/all") {
  85   require "$locales_dir/all";
  86 }
  87 if (-f "$locales_dir/missing") {
  88   require "$locales_dir/missing" ;
  89   unlink "$locales_dir/missing";
  90 }
  91 if (-f "$locales_dir/lost") {
  92   require "$locales_dir/lost";
  93   unlink "$locales_dir/lost";
  94 }
  95
  96 my $charset = slurp("$locales_dir/charset") || 'utf-8';
  97 chomp $charset;
  98
  99 my %old_texts = %{ $self->{texts} || {} };
 100
 101 map({ handle_file(@{ $_ }); } @progfiles);
 102 map({ handle_file($_, $dbupdir); } @dbplfiles);
 103 map({ handle_file($_, $dbupdir2); } @dbplfiles2);
 104
 105 # generate all
 106 generate_file(
 107   file      => "$locales_dir/all",
 108   header    => $ALL_HEADER,
 109   data_name => '$self->{texts}',
 110   data_sub  => sub { _print_line($_, $self->{texts}{$_}, @_) for sort keys %alllocales },
 111 );
 112
 113 # calc and generate missing
 114 push @missing, grep { !$self->{texts}{$_} } sort keys %alllocales;
 115
 116 if (@missing) {
 117   generate_file(
 118     file      => "$locales_dir/missing",
 119     header    => $MISSING_HEADER,
 120     data_name => '$missing',
 121     data_sub  => sub { _print_line($_, '', @_) for @missing },
 122   );
 123 }
 124
 125 # calc and generate lost
 126 while (my ($text, $translation) = each %old_texts) {
 127   next if ($alllocales{$text});
 128   push @lost, { 'text' => $text, 'translation' => $translation };
 129 }
 130
 131 if (scalar @lost) {
 132   splice @lost, 0, (scalar @lost - 50) if (scalar @lost > 50);
 133   generate_file(
 134     file      => "$locales_dir/lost",
 135     header    => $LOST_HEADER,
 136     delim     => '()',
 137     data_name => '@lost',
 138     data_sub  => sub {
 139       _print_line($_->{text}, $_->{translation}, @_, template => "  { 'text' => %s, 'translation' => %s },\n") for @lost;
 140     },
 141   );
 142 }
 143
 144 my $trlanguage = slurp("$locales_dir/LANGUAGE");
 145 chomp $trlanguage;
 146
 147 search_unused_htmlfiles() if $opt_c;
 148
 149 my $count  = scalar keys %alllocales;
 150 my $notext = scalar @missing;
 151 my $per    = sprintf("%.1f", ($count - $notext) / $count * 100);
 152 print "\n$trlanguage - ${per}%";
 153 print " - $notext/$count missing" if $notext;
 154 print "\n";
 155
 156 exit;
 157
 158 # eom
 159
 160 sub init {
 161   $ALL_HEADER = <<EOL;
 162 # These are all the texts to build the translations files.
 163 # The file has the form of 'english text'  => 'foreign text',
 164 # you can add the translation in this file or in the 'missing' file
 165 # run locales.pl from this directory to rebuild the translation files
 166 EOL
 167   $MISSING_HEADER = <<EOL;
 168 # add the missing texts and run locales.pl to rebuild
 169 EOL
 170   $LOST_HEADER  = <<EOL;
 171 # The last 50 text strings, that have been removed.
 172 # This file has been auto-generated by locales.pl. Please don't edit!
 173 EOL
 174 }
 175
 176 sub parse_args {
 177   my ($help, $man);
 178
 179   GetOptions(
 180     'no-custom-files' => \$opt_n,
 181     'check-files'     => \$opt_c,
 182     'verbose'         => \$opt_v,
 183     'help'            => \$help,
 184     'man'             => \$man,
 185     'debug'           => \$debug,
 186   );
 187
 188   if ($help) {
 189     pod2usage(1);
 190     exit 0;
 191   }
 192
 193   if ($man) {
 194     pod2usage(-exitstatus => 0, -verbose => 2);
 195     exit 0;
 196   }
 197
 198   if (@ARGV) {
 199     my $arg = shift @ARGV;
 200     my $ok  = 0;
 201     foreach my $dir ("../locale/$arg", "locale/$arg", "../$arg", $arg) {
 202       next unless -d $dir && -f "$dir/all" && -f "$dir/LANGUAGE";
 203       $ok = chdir $dir;
 204       last;
 205     }
 206
 207     if (!$ok) {
 208       print "The locale directory '$arg' could not be found.\n";
 209       exit 1;
 210     }
 211
 212   } elsif (!-f 'all' || !-f 'LANGUAGE') {
 213     print "locales.pl was not called from a locale/* subdirectory,\n"
 214       .   "and no locale directory name was given.\n";
 215     exit 1;
 216   }
 217 }
 218
 219 sub handle_file {
 220   my ($file, $dir) = @_;
 221   print "\n$file" if $opt_v;
 222   %locale = ();
 223   %submit = ();
 224
 225   &scanfile("$dir/$file");
 226
 227   # scan custom_{module}.pl or {login}_{module}.pl files
 228   foreach my $customfile (@customfiles) {
 229     if ($customfile =~ /_$file/) {
 230       if (-f "$dir/$customfile") {
 231         &scanfile("$dir/$customfile");
 232       }
 233     }
 234   }
 235
 236   # if this is the menu.pl file
 237   if ($file eq 'menu.pl') {
 238     foreach my $item (@menufiles) {
 239       &scanmenu("$basedir/$item");
 240     }
 241   }
 242
 243   if ($file eq 'menunew.pl') {
 244     foreach my $item (@menufiles) {
 245       &scanmenu("$basedir/$item");
 246       print "." if $opt_v;
 247     }
 248   }
 249
 250   $file =~ s/\.pl//;
 251
 252   foreach my $text (keys %$missing) {
 253     if ($locale{$text} || $htmllocale{$text}) {
 254       unless ($self->{texts}{$text}) {
 255         $self->{texts}{$text} = $missing->{$text};
 256       }
 257     }
 258   }
 259 }
 260
 261 sub extract_text_between_parenthesis {
 262   my ($fh, $line) = @_;
 263   my ($inside_string, $pos, $text, $quote_next) = (undef, 0, "", 0);
 264
 265   while (1) {
 266     if (length($line) <= $pos) {
 267       $line = <$fh>;
 268       return ($text, "") unless ($line);
 269       $pos = 0;
 270     }
 271
 272     my $cur_char = substr($line, $pos, 1);
 273
 274     if (!$inside_string) {
 275       if ((length($line) >= ($pos + 3)) && (substr($line, $pos, 2)) eq "qq") {
 276         $inside_string = substr($line, $pos + 2, 1);
 277         $pos += 2;
 278
 279       } elsif ((length($line) >= ($pos + 2)) &&
 280                (substr($line, $pos, 1) eq "q")) {
 281         $inside_string = substr($line, $pos + 1, 1);
 282         $pos++;
 283
 284       } elsif (($cur_char eq '"') || ($cur_char eq '\'')) {
 285         $inside_string = $cur_char;
 286
 287       } elsif (($cur_char eq ")") || ($cur_char eq ',')) {
 288         return ($text, substr($line, $pos + 1));
 289       }
 290
 291     } else {
 292       if ($quote_next) {
 293         $text .= '\\' unless $cur_char eq "'";
 294         $text .= $cur_char;
 295         $quote_next = 0;
 296
 297       } elsif ($cur_char eq '\\') {
 298         $quote_next = 1;
 299
 300       } elsif ($cur_char eq $inside_string) {
 301         undef($inside_string);
 302
 303       } else {
 304         $text .= $cur_char;
 305
 306       }
 307     }
 308     $pos++;
 309   }
 310 }
 311
 312 sub scanfile {
 313   my $file = shift;
 314   my $dont_include_subs = shift;
 315   my $scanned_files = shift;
 316
 317   # sanitize file
 318   $file =~ s=/+=/=g;
 319
 320   $scanned_files = {} unless ($scanned_files);
 321   return if ($scanned_files->{$file});
 322   $scanned_files->{$file} = 1;
 323
 324   if (!defined $cached{$file}) {
 325
 326     return unless (-f "$file");
 327
 328     my $fh = new FileHandle;
 329     open $fh, "$file" or die "$! : $file";
 330
 331     my ($is_submit, $line_no, $sub_line_no) = (0, 0, 0);
 332
 333     while (<$fh>) {
 334       $line_no++;
 335
 336       # is this another file
 337       if (/require\s+\W.*\.pl/) {
 338         my $newfile = $&;
 339         $newfile =~ s/require\s+\W//;
 340         $newfile =~ s|bin/mozilla||;
 341          $cached{$file}{scan}{"$bindir/$newfile"} = 1;
 342       } elsif (/use\s+SL::([\w:]*)/) {
 343         my $module =  $1;
 344         $module    =~ s|::|/|g;
 345         $cached{$file}{scannosubs}{"../../SL/${module}.pm"} = 1;
 346       }
 347
 348       # is this a template call?
 349       if (/parse_html_template2?\s*\(\s*[\"\']([\w\/]+)\s*[\"\']/) {
 350         my $newfile = "$basedir/templates/webpages/$1.html";
 351         if (/parse_html_template2/) {
 352           print "E: " . strip_base($file) . " is still using 'parse_html_template2' for " . strip_base($newfile) . ".\n";
 353         }
 354         if (-f $newfile) {
 355            $cached{$file}{scanh}{$newfile} = 1;
 356           print "." if $opt_v;
 357         } elsif ($opt_c) {
 358           print "W: missing HTML template: " . strip_base($newfile) . " (referenced from " . strip_base($file) . ")\n";
 359         }
 360       }
 361
 362       my $rc = 1;
 363
 364       while ($rc) {
 365         if (/Locale/) {
 366           unless (/^use /) {
 367             my ($null, $country) = split /,/;
 368             $country =~ s/^ +[\"\']//;
 369             $country =~ s/[\"\'].*//;
 370           }
 371         }
 372
 373         my $postmatch = "";
 374
 375         # is it a submit button before $locale->
 376         if (/$submitsearch/) {
 377           $postmatch = "$'";
 378           if ($` !~ /locale->text/) {
 379             $is_submit   = 1;
 380             $sub_line_no = $line_no;
 381           }
 382         }
 383
 384         my ($found) = /locale->text.*?\(/;
 385         $postmatch = "$'";
 386
 387         if ($found) {
 388           my $string;
 389           ($string, $_) = extract_text_between_parenthesis($fh, $postmatch);
 390           $postmatch = $_;
 391
 392           # if there is no $ in the string record it
 393           unless (($string =~ /\$\D.*/) || ("" eq $string)) {
 394
 395             # this guarantees one instance of string
 396             $cached{$file}{locale}{$string} = 1;
 397
 398             # this one is for all the locales
 399             $cached{$file}{all}{$string} = 1;
 400
 401             # is it a submit button before $locale->
 402             if ($is_submit) {
 403               $cached{$file}{submit}{$string} = 1;
 404             }
 405           }
 406         } elsif ($postmatch =~ />/) {
 407           $is_submit = 0;
 408         }
 409
 410         # exit loop if there are no more locales on this line
 411         ($rc) = ($postmatch =~ /locale->text/);
 412
 413         if (   ($postmatch =~ />/)
 414             || (!$found && ($sub_line_no != $line_no) && />/)) {
 415           $is_submit = 0;
 416         }
 417       }
 418     }
 419
 420     close($fh);
 421
 422   }
 423
 424   map { $alllocales{$_} = 1 }   keys %{$cached{$file}{all}};
 425   map { $locale{$_} = 1 }       keys %{$cached{$file}{locale}};
 426   map { $submit{$_} = 1 }       keys %{$cached{$file}{submit}};
 427   map { &scanfile($_, 0, $scanned_files) } keys %{$cached{$file}{scan}};
 428   map { &scanfile($_, 1, $scanned_files) } keys %{$cached{$file}{scannosubs}};
 429   map { &scanhtmlfile($_)  }    keys %{$cached{$file}{scanh}};
 430
 431   @referenced_html_files{keys %{$cached{$file}{scanh}}} = (1) x scalar keys %{$cached{$file}{scanh}};
 432 }
 433
 434 sub scanmenu {
 435   my $file = shift;
 436
 437   my $fh = new FileHandle;
 438   open $fh, "$file" or die "$! : $file";
 439
 440   my @a = grep m/^\[/, <$fh>;
 441   close($fh);
 442
 443   # strip []
 444   grep { s/(\[|\])//g } @a;
 445
 446   foreach my $item (@a) {
 447     my @b = split /--/, $item;
 448     foreach my $string (@b) {
 449       chomp $string;
 450       $locale{$string}     = 1;
 451       $alllocales{$string} = 1;
 452     }
 453   }
 454
 455 }
 456
 457 sub unescape_template_string {
 458   my $in =  "$_[0]";
 459   $in    =~ s/\\(.)/$1/g;
 460   return $in;
 461 }
 462
 463 sub scanhtmlfile {
 464   local *IN;
 465
 466   my $file = shift;
 467
 468   if (!defined $cached{$file}) {
 469     my %plugins = ( 'loaded' => { }, 'needed' => { } );
 470
 471     open(IN, $file) || die $file;
 472
 473     my $copying  = 0;
 474     my $issubmit = 0;
 475     my $text     = "";
 476     while (my $line = <IN>) {
 477       chomp($line);
 478
 479       while ($line =~ m/\[\%[^\w]*use[^\w]+(\w+)[^\w]*?\%\]/gi) {
 480         $plugins{loaded}->{$1} = 1;
 481       }
 482
 483       while ($line =~ m/\[\%[^\w]*(\w+)\.\w+\(/g) {
 484         my $plugin = $1;
 485         $plugins{needed}->{$plugin} = 1 if (first { $_ eq $plugin } qw(HTML LxERP JavaScript MultiColumnIterator L));
 486       }
 487
 488       while ($line =~ m/(?:             # Start von Variante 1: LxERP.t8('...'); ohne darumliegende [% ... %]-Tags
 489                           (LxERP\.t8)\( #   LxERP.t8(                             ::Parameter $1::
 490                           ([\'\"])      #   Anfang des zu übersetzenden Strings   ::Parameter $2::
 491                           (.*?)         #   Der zu übersetzende String            ::Parameter $3::
 492                           (?<!\\)\2     #   Ende des zu übersetzenden Strings
 493                         |               # Start von Variante 2: [% '...' | $T8 %]
 494                           \[\%          #   Template-Start-Tag
 495                           [\-~#]?       #   Whitespace-Unterdrückung
 496                           \s*           #   Optional beliebig viele Whitespace
 497                           ([\'\"])      #   Anfang des zu übersetzenden Strings   ::Parameter $4::
 498                           (.*?)         #   Der zu übersetzende String            ::Parameter $5::
 499                           (?<!\\)\4     #   Ende des zu übersetzenden Strings
 500                           \s*\|\s*      #   Pipe-Zeichen mit optionalen Whitespace davor und danach
 501                           (\$T8)        #   Filteraufruf                          ::Parameter $6::
 502                           .*?           #   Optionale Argumente für den Filter
 503                           \s*           #   Whitespaces
 504                           [\-~#]?       #   Whitespace-Unterdrückung
 505                           \%\]          #   Template-Ende-Tag
 506                         )
 507                        /ix) {
 508         my $module = $1 || $6;
 509         my $string = $3 || $5;
 510         print "Found filter >>>$string<<<\n" if $debug;
 511         substr $line, $LAST_MATCH_START[1], $LAST_MATCH_END[0] - $LAST_MATCH_START[0], '';
 512
 513         $string                         = unescape_template_string($string);
 514         $cached{$file}{all}{$string}    = 1;
 515         $cached{$file}{html}{$string}   = 1;
 516         $cached{$file}{submit}{$string} = 1 if $PREMATCH =~ /$submitsearch/;
 517         $plugins{needed}->{T8}          = 1 if $module eq '$T8';
 518         $plugins{needed}->{LxERP}       = 1 if $module eq 'LxERP.t8';
 519       }
 520
 521       while ($line =~ m/\[\%          # Template-Start-Tag
 522                         [\-~#]?       # Whitespace-Unterdrückung
 523                         \s*           # Optional beliebig viele Whitespace
 524                         (?:           # Die erkannten Template-Direktiven
 525                           PROCESS
 526                         |
 527                           INCLUDE
 528                         )
 529                         \s+           # Mindestens ein Whitespace
 530                         [\'\"]?       # Anfang des Dateinamens
 531                         ([^\s]+)      # Beliebig viele Nicht-Whitespaces -- Dateiname
 532                         \.html        # Endung ".html", ansonsten kann es der Name eines Blocks sein
 533                        /ix) {
 534         my $new_file_name = "$basedir/templates/webpages/$1.html";
 535         $cached{$file}{scanh}{$new_file_name} = 1;
 536         substr $line, $LAST_MATCH_START[1], $LAST_MATCH_END[0] - $LAST_MATCH_START[0], '';
 537       }
 538     }
 539
 540     close(IN);
 541
 542     foreach my $plugin (keys %{ $plugins{needed} }) {
 543       next if ($plugins{loaded}->{$plugin});
 544       print "E: " . strip_base($file) . " requires the Template plugin '$plugin', but is not loaded with '[\% USE $plugin \%]'.\n";
 545     }
 546   }
 547
 548   # copy back into global arrays
 549   map { $alllocales{$_} = 1 } keys %{$cached{$file}{all}};
 550   map { $locale{$_} = 1 }     keys %{$cached{$file}{html}};
 551   map { $submit{$_} = 1 }     keys %{$cached{$file}{submit}};
 552
 553   map { scanhtmlfile($_)  }   keys %{$cached{$file}{scanh}};
 554
 555   @referenced_html_files{keys %{$cached{$file}{scanh}}} = (1) x scalar keys %{$cached{$file}{scanh}};
 556 }
 557
 558 sub search_unused_htmlfiles {
 559   my @unscanned_dirs = ('../../templates/webpages');
 560
 561   while (scalar @unscanned_dirs) {
 562     my $dir = shift @unscanned_dirs;
 563
 564     foreach my $entry (<$dir/*>) {
 565       if (-d $entry) {
 566         push @unscanned_dirs, $entry;
 567
 568       } elsif (($entry =~ /_master.html$/) && -f $entry && !$referenced_html_files{$entry}) {
 569         print "W: unused HTML template: " . strip_base($entry) . "\n";
 570
 571       }
 572     }
 573   }
 574 }
 575
 576 sub strip_base {
 577   my $s =  "$_[0]";             # Create a copy of the string.
 578
 579   $s    =~ s|^../../||;
 580   $s    =~ s|templates/webpages/||;
 581
 582   return $s;
 583 }
 584
 585 sub _single_quote {
 586   my $val = shift;
 587   $val =~ s/(\'|\\$)/\\$1/g;
 588   return  "'" . $val .  "'";
 589 }
 590
 591 sub _print_line {
 592   my $key      = _single_quote(shift);
 593   my $text     = _single_quote(shift);
 594   my %params   = @_;
 595   my $template = $params{template} || qq|  %-29s => %s,\n|;
 596   my $fh       = $params{fh}       || croak 'need filehandle in _print_line';
 597
 598   print $fh sprintf $template, $key, $text;
 599 }
 600
 601 sub generate_file {
 602   my %params = @_;
 603
 604   my $file      = $params{file}   || croak 'need filename in generate_file';
 605   my $header    = $params{header};
 606   my $lines     = $params{data_sub};
 607   my $data_name = $params{data_name};
 608   my @delim     = split //, ($params{delim} || '{}');
 609
 610   open my $fh, '>:encoding(utf8)', $file or die "$! : $file";
 611
 612   $charset =~ s/\r?\n//g;
 613   my $emacs_charset = lc $charset;
 614
 615   print $fh "#!/usr/bin/perl\n# -*- coding: $emacs_charset; -*-\n# vim: fenc=$charset\n\nuse utf8;\n\n";
 616   print $fh $header, "\n" if $header;
 617   print $fh "$data_name = $delim[0]\n" if $data_name;
 618
 619   $lines->(fh => $fh);
 620
 621   print $fh qq|$delim[1];\n\n1;\n|;
 622   close $fh;
 623 }
 624
 625 sub slurp {
 626   my $file = shift;
 627   do { local ( @ARGV, $/ ) = $file; <> }
 628 }
 629
 630 __END__
 631
 632 =head1 NAME
 633
 634 locales.pl - Collect strings for translation in Lx-Office
 635
 636 =head1 SYNOPSIS
 637
 638 locales.pl [options] lang_code
 639
 640  Options:
 641   -n, --no-custom-files  Do not process files whose name contains "_"
 642   -c, --check-files      Run extended checks on HTML files
 643   -v, --verbose          Be more verbose
 644   -h, --help             Show this help
 645
 646 =head1 OPTIONS
 647
 648 =over 8
 649
 650 =item B<-n>, B<--no-custom-files>
 651
 652 Do not process files whose name contains "_", e.g. "custom_io.pl".
 653
 654 =item B<-c>, B<--check-files>
 655
 656 Run extended checks on the usage of templates. This can be used to
 657 discover HTML templates that are never used as well as the usage of
 658 non-existing HTML templates.
 659
 660 =item B<-v>, B<--verbose>
 661
 662 Be more verbose.
 663
 664 =back
 665
 666 =head1 DESCRIPTION
 667
 668 This script collects strings from Perl files, the menu.ini file and
 669 HTML templates and puts them into the file "all" for translation.
 670
 671 =cut