scripts/locales.pl

   1 #!/usr/bin/perl
   2
   3 # -n do not include custom_ scripts
   4 # -v verbose mode, shows progress stuff
   5
   6 # this version of locles processes not only all required .pl files
   7 # but also all parse_html_templated files.
   8
   9 use utf8;
  10 use strict;
  11
  12 use Data::Dumper;
  13 use English;
  14 use FileHandle;
  15 use Getopt::Long;
  16 use List::Util qw(first);
  17 use POSIX;
  18 use Pod::Usage;
  19 use Carp;
  20 use File::Slurp qw(slurp);
  21
  22 $OUTPUT_AUTOFLUSH = 1;
  23
  24 my $opt_v  = 0;
  25 my $opt_n  = 0;
  26 my $opt_c  = 0;
  27 my $debug  = 0;
  28
  29 parse_args();
  30
  31 my $basedir      = "../..";
  32 my $locales_dir  = ".";
  33 my $bindir       = "$basedir/bin/mozilla";
  34 my @progdirs     = ( "$basedir/SL/Controller", "$basedir/SL/Template/Plugin", "$basedir/SL/Auth" );
  35 my $dbupdir      = "$basedir/sql/Pg-upgrade";
  36 my $dbupdir2     = "$basedir/sql/Pg-upgrade2";
  37 my $menufile     = "menu.ini";
  38 my $submitsearch = qr/type\s*=\s*[\"\']?submit/i;
  39
  40 my (%referenced_html_files, %locale, %htmllocale, %alllocales, %cached, %submit);
  41 my ($ALL_HEADER, $MISSING_HEADER, $LOST_HEADER);
  42
  43 init();
  44
  45 opendir DIR, "$bindir" or die "$!";
  46 my @progfiles = map { [ $_, $bindir ] } grep { /\.pl$/ && !/(_custom|^\.)/ } readdir DIR;
  47 seekdir DIR, 0;
  48 my @customfiles = grep /_custom/, readdir DIR;
  49 closedir DIR;
  50
  51 foreach my $dir (@progdirs) {
  52   opendir DIR, $dir or die "$!";
  53   push @progfiles, map { [ $_, $dir ] } grep { /\.pm$/ } readdir DIR;
  54   closedir DIR;
  55 }
  56
  57 # put customized files into @customfiles
  58 my @menufiles;
  59
  60 if ($opt_n) {
  61   @customfiles = ();
  62   @menufiles   = ($menufile);
  63 } else {
  64   opendir DIR, "$basedir" or die "$!";
  65   @menufiles = grep { /.*?_$menufile$/ } readdir DIR;
  66   closedir DIR;
  67   unshift @menufiles, $menufile;
  68 }
  69
  70 opendir DIR, $dbupdir or die "$!";
  71 my @dbplfiles = grep { /\.pl$/ } readdir DIR;
  72 closedir DIR;
  73
  74 opendir DIR, $dbupdir2 or die "$!";
  75 my @dbplfiles2 = grep { /\.pl$/ } readdir DIR;
  76 closedir DIR;
  77
  78 # slurp the translations in
  79 our $self    = {};
  80 our $missing = {};
  81 our @missing = ();
  82 our @lost    = ();
  83
  84 if (-f "$locales_dir/all") {
  85   require "$locales_dir/all";
  86 }
  87 if (-f "$locales_dir/missing") {
  88   require "$locales_dir/missing" ;
  89   unlink "$locales_dir/missing";
  90 }
  91 if (-f "$locales_dir/lost") {
  92   require "$locales_dir/lost";
  93   unlink "$locales_dir/lost";
  94 }
  95
  96 my $charset = slurp("$locales_dir/charset") || 'utf-8';
  97 chomp $charset;
  98
  99 my %old_texts = %{ $self->{texts} || {} };
 100
 101 map({ handle_file(@{ $_ }); } @progfiles);
 102 map({ handle_file($_, $dbupdir); } @dbplfiles);
 103 map({ handle_file($_, $dbupdir2); } @dbplfiles2);
 104
 105 # generate all
 106 generate_file(
 107   file      => "$locales_dir/all",
 108   header    => $ALL_HEADER,
 109   data_name => '$self->{texts}',
 110   data_sub  => sub { _print_line($_, $self->{texts}{$_}, @_) for sort keys %alllocales },
 111 );
 112
 113 # calc and generate missing
 114 push @missing, grep { !$self->{texts}{$_} } sort keys %alllocales;
 115
 116 if (@missing) {
 117   generate_file(
 118     file      => "$locales_dir/missing",
 119     header    => $MISSING_HEADER,
 120     data_name => '$missing',
 121     data_sub  => sub { _print_line($_, '', @_) for @missing },
 122   );
 123 }
 124
 125 # calc and generate lost
 126 while (my ($text, $translation) = each %old_texts) {
 127   next if ($alllocales{$text});
 128   push @lost, { 'text' => $text, 'translation' => $translation };
 129 }
 130
 131 if (scalar @lost) {
 132   splice @lost, 0, (scalar @lost - 50) if (scalar @lost > 50);
 133   generate_file(
 134     file      => "$locales_dir/lost",
 135     header    => $LOST_HEADER,
 136     delim     => '()',
 137     data_name => '@lost',
 138     data_sub  => sub {
 139       _print_line($_->{text}, $_->{translation}, @_, template => "  { 'text' => %s, 'translation' => %s },\n") for @lost;
 140     },
 141   );
 142 }
 143
 144 my $trlanguage = slurp("$locales_dir/LANGUAGE");
 145 chomp $trlanguage;
 146
 147 search_unused_htmlfiles() if $opt_c;
 148
 149 my $count  = scalar keys %alllocales;
 150 my $notext = scalar @missing;
 151 my $per    = sprintf("%.1f", ($count - $notext) / $count * 100);
 152 print "\n$trlanguage - ${per}%";
 153 print " - $notext/$count missing" if $notext;
 154 print "\n";
 155
 156 exit;
 157
 158 # eom
 159
 160 sub init {
 161   $ALL_HEADER = <<EOL;
 162 # These are all the texts to build the translations files.
 163 # The file has the form of 'english text'  => 'foreign text',
 164 # you can add the translation in this file or in the 'missing' file
 165 # run locales.pl from this directory to rebuild the translation files
 166 EOL
 167   $MISSING_HEADER = <<EOL;
 168 # add the missing texts and run locales.pl to rebuild
 169 EOL
 170   $LOST_HEADER  = <<EOL;
 171 # The last 50 text strings, that have been removed.
 172 # This file has been auto-generated by locales.pl. Please don't edit!
 173 EOL
 174 }
 175
 176 sub parse_args {
 177   my ($help, $man);
 178
 179   GetOptions(
 180     'no-custom-files' => \$opt_n,
 181     'check-files'     => \$opt_c,
 182     'verbose'         => \$opt_v,
 183     'help'            => \$help,
 184     'man'             => \$man,
 185     'debug'           => \$debug,
 186   );
 187
 188   if ($help) {
 189     pod2usage(1);
 190     exit 0;
 191   }
 192
 193   if ($man) {
 194     pod2usage(-exitstatus => 0, -verbose => 2);
 195     exit 0;
 196   }
 197
 198   if (@ARGV) {
 199     my $arg = shift @ARGV;
 200     my $ok  = 0;
 201     foreach my $dir ("../locale/$arg", "locale/$arg", "../$arg", $arg) {
 202       next unless -d $dir && -f "$dir/all" && -f "$dir/LANGUAGE";
 203       $ok = chdir $dir;
 204       last;
 205     }
 206
 207     if (!$ok) {
 208       print "The locale directory '$arg' could not be found.\n";
 209       exit 1;
 210     }
 211
 212   } elsif (!-f 'all' || !-f 'LANGUAGE') {
 213     print "locales.pl was not called from a locale/* subdirectory,\n"
 214       .   "and no locale directory name was given.\n";
 215     exit 1;
 216   }
 217 }
 218
 219 sub handle_file {
 220   my ($file, $dir) = @_;
 221   print "\n$file" if $opt_v;
 222   %locale = ();
 223   %submit = ();
 224
 225   &scanfile("$dir/$file");
 226
 227   # scan custom_{module}.pl or {login}_{module}.pl files
 228   foreach my $customfile (@customfiles) {
 229     if ($customfile =~ /_$file/) {
 230       if (-f "$dir/$customfile") {
 231         &scanfile("$dir/$customfile");
 232       }
 233     }
 234   }
 235
 236   # if this is the menu.pl file
 237   if ($file eq 'menu.pl') {
 238     foreach my $item (@menufiles) {
 239       &scanmenu("$basedir/$item");
 240     }
 241   }
 242
 243   if ($file eq 'menunew.pl') {
 244     foreach my $item (@menufiles) {
 245       &scanmenu("$basedir/$item");
 246       print "." if $opt_v;
 247     }
 248   }
 249
 250   $file =~ s/\.pl//;
 251
 252   foreach my $text (keys %$missing) {
 253     if ($locale{$text} || $htmllocale{$text}) {
 254       unless ($self->{texts}{$text}) {
 255         $self->{texts}{$text} = $missing->{$text};
 256       }
 257     }
 258   }
 259 }
 260
 261 sub extract_text_between_parenthesis {
 262   my ($fh, $line) = @_;
 263   my ($inside_string, $pos, $text, $quote_next) = (undef, 0, "", 0);
 264
 265   while (1) {
 266     if (length($line) <= $pos) {
 267       $line = <$fh>;
 268       return ($text, "") unless ($line);
 269       $pos = 0;
 270     }
 271
 272     my $cur_char = substr($line, $pos, 1);
 273
 274     if (!$inside_string) {
 275       if ((length($line) >= ($pos + 3)) && (substr($line, $pos, 2)) eq "qq") {
 276         $inside_string = substr($line, $pos + 2, 1);
 277         $pos += 2;
 278
 279       } elsif ((length($line) >= ($pos + 2)) &&
 280                (substr($line, $pos, 1) eq "q")) {
 281         $inside_string = substr($line, $pos + 1, 1);
 282         $pos++;
 283
 284       } elsif (($cur_char eq '"') || ($cur_char eq '\'')) {
 285         $inside_string = $cur_char;
 286
 287       } elsif (($cur_char eq ")") || ($cur_char eq ',')) {
 288         return ($text, substr($line, $pos + 1));
 289       }
 290
 291     } else {
 292       if ($quote_next) {
 293         $text .= '\\' unless $cur_char eq "'";
 294         $text .= $cur_char;
 295         $quote_next = 0;
 296
 297       } elsif ($cur_char eq '\\') {
 298         $quote_next = 1;
 299
 300       } elsif ($cur_char eq $inside_string) {
 301         undef($inside_string);
 302
 303       } else {
 304         $text .= $cur_char;
 305
 306       }
 307     }
 308     $pos++;
 309   }
 310 }
 311
 312 sub scanfile {
 313   my $file = shift;
 314   my $dont_include_subs = shift;
 315   my $scanned_files = shift;
 316
 317   # sanitize file
 318   $file =~ s=/+=/=g;
 319
 320   $scanned_files = {} unless ($scanned_files);
 321   return if ($scanned_files->{$file});
 322   $scanned_files->{$file} = 1;
 323
 324   if (!defined $cached{$file}) {
 325
 326     return unless (-f "$file");
 327
 328     my $fh = new FileHandle;
 329     open $fh, "$file" or die "$! : $file";
 330
 331     my ($is_submit, $line_no, $sub_line_no) = (0, 0, 0);
 332
 333     while (<$fh>) {
 334       last if /^\s*__END__/;
 335
 336       $line_no++;
 337
 338       # is this another file
 339       if (/require\s+\W.*\.pl/) {
 340         my $newfile = $&;
 341         $newfile =~ s/require\s+\W//;
 342         $newfile =~ s|bin/mozilla||;
 343          $cached{$file}{scan}{"$bindir/$newfile"} = 1;
 344       } elsif (/use\s+SL::([\w:]*)/) {
 345         my $module =  $1;
 346         $module    =~ s|::|/|g;
 347         $cached{$file}{scannosubs}{"../../SL/${module}.pm"} = 1;
 348       }
 349
 350       # is this a template call?
 351       if (/(?:parse_html_template2?|render)\s*\(\s*[\"\']([\w\/]+)\s*[\"\']/) {
 352         my $newfile = "$basedir/templates/webpages/$1.html";
 353         if (/parse_html_template2/) {
 354           print "E: " . strip_base($file) . " is still using 'parse_html_template2' for " . strip_base($newfile) . ".\n";
 355         }
 356         if (-f $newfile) {
 357            $cached{$file}{scanh}{$newfile} = 1;
 358           print "." if $opt_v;
 359         } elsif ($opt_c) {
 360           print "W: missing HTML template: " . strip_base($newfile) . " (referenced from " . strip_base($file) . ")\n";
 361         }
 362       }
 363
 364       my $rc = 1;
 365
 366       while ($rc) {
 367         if (/Locale/) {
 368           unless (/^use /) {
 369             my ($null, $country) = split /,/;
 370             $country =~ s/^ +[\"\']//;
 371             $country =~ s/[\"\'].*//;
 372           }
 373         }
 374
 375         my $postmatch = "";
 376
 377         # is it a submit button before $locale->
 378         if (/$submitsearch/) {
 379           $postmatch = "$'";
 380           if ($` !~ /locale->text/) {
 381             $is_submit   = 1;
 382             $sub_line_no = $line_no;
 383           }
 384         }
 385
 386         my ($found) = /locale->text.*?\(/;
 387         $postmatch = "$'";
 388
 389         if ($found) {
 390           my $string;
 391           ($string, $_) = extract_text_between_parenthesis($fh, $postmatch);
 392           $postmatch = $_;
 393
 394           # if there is no $ in the string record it
 395           unless (($string =~ /\$\D.*/) || ("" eq $string)) {
 396
 397             # this guarantees one instance of string
 398             $cached{$file}{locale}{$string} = 1;
 399
 400             # this one is for all the locales
 401             $cached{$file}{all}{$string} = 1;
 402
 403             # is it a submit button before $locale->
 404             if ($is_submit) {
 405               $cached{$file}{submit}{$string} = 1;
 406             }
 407           }
 408         } elsif ($postmatch =~ />/) {
 409           $is_submit = 0;
 410         }
 411
 412         # exit loop if there are no more locales on this line
 413         ($rc) = ($postmatch =~ /locale->text/);
 414
 415         if (   ($postmatch =~ />/)
 416             || (!$found && ($sub_line_no != $line_no) && />/)) {
 417           $is_submit = 0;
 418         }
 419       }
 420     }
 421
 422     close($fh);
 423
 424   }
 425
 426   map { $alllocales{$_} = 1 }   keys %{$cached{$file}{all}};
 427   map { $locale{$_} = 1 }       keys %{$cached{$file}{locale}};
 428   map { $submit{$_} = 1 }       keys %{$cached{$file}{submit}};
 429   map { &scanfile($_, 0, $scanned_files) } keys %{$cached{$file}{scan}};
 430   map { &scanfile($_, 1, $scanned_files) } keys %{$cached{$file}{scannosubs}};
 431   map { &scanhtmlfile($_)  }    keys %{$cached{$file}{scanh}};
 432
 433   @referenced_html_files{keys %{$cached{$file}{scanh}}} = (1) x scalar keys %{$cached{$file}{scanh}};
 434 }
 435
 436 sub scanmenu {
 437   my $file = shift;
 438
 439   my $fh = new FileHandle;
 440   open $fh, "$file" or die "$! : $file";
 441
 442   my @a = grep m/^\[/, <$fh>;
 443   close($fh);
 444
 445   # strip []
 446   grep { s/(\[|\])//g } @a;
 447
 448   foreach my $item (@a) {
 449     my @b = split /--/, $item;
 450     foreach my $string (@b) {
 451       chomp $string;
 452       $locale{$string}     = 1;
 453       $alllocales{$string} = 1;
 454     }
 455   }
 456
 457 }
 458
 459 sub unescape_template_string {
 460   my $in =  "$_[0]";
 461   $in    =~ s/\\(.)/$1/g;
 462   return $in;
 463 }
 464
 465 sub scanhtmlfile {
 466   local *IN;
 467
 468   my $file = shift;
 469
 470   if (!defined $cached{$file}) {
 471     my %plugins = ( 'loaded' => { }, 'needed' => { } );
 472
 473     open(IN, $file) || die $file;
 474
 475     my $copying  = 0;
 476     my $issubmit = 0;
 477     my $text     = "";
 478     while (my $line = <IN>) {
 479       chomp($line);
 480
 481       while ($line =~ m/\[\%[^\w]*use[^\w]+(\w+)[^\w]*?\%\]/gi) {
 482         $plugins{loaded}->{$1} = 1;
 483       }
 484
 485       while ($line =~ m/\[\%[^\w]*(\w+)\.\w+\(/g) {
 486         my $plugin = $1;
 487         $plugins{needed}->{$plugin} = 1 if (first { $_ eq $plugin } qw(HTML LxERP JavaScript MultiColumnIterator L));
 488       }
 489
 490       while ($line =~ m/(?:             # Start von Variante 1: LxERP.t8('...'); ohne darumliegende [% ... %]-Tags
 491                           (LxERP\.t8)\( #   LxERP.t8(                             ::Parameter $1::
 492                           ([\'\"])      #   Anfang des zu übersetzenden Strings   ::Parameter $2::
 493                           (.*?)         #   Der zu übersetzende String            ::Parameter $3::
 494                           (?<!\\)\2     #   Ende des zu übersetzenden Strings
 495                         |               # Start von Variante 2: [% '...' | $T8 %]
 496                           \[\%          #   Template-Start-Tag
 497                           [\-~#]?       #   Whitespace-Unterdrückung
 498                           \s*           #   Optional beliebig viele Whitespace
 499                           ([\'\"])      #   Anfang des zu übersetzenden Strings   ::Parameter $4::
 500                           (.*?)         #   Der zu übersetzende String            ::Parameter $5::
 501                           (?<!\\)\4     #   Ende des zu übersetzenden Strings
 502                           \s*\|\s*      #   Pipe-Zeichen mit optionalen Whitespace davor und danach
 503                           (\$T8)        #   Filteraufruf                          ::Parameter $6::
 504                           .*?           #   Optionale Argumente für den Filter
 505                           \s*           #   Whitespaces
 506                           [\-~#]?       #   Whitespace-Unterdrückung
 507                           \%\]          #   Template-Ende-Tag
 508                         )
 509                        /ix) {
 510         my $module = $1 || $6;
 511         my $string = $3 || $5;
 512         print "Found filter >>>$string<<<\n" if $debug;
 513         substr $line, $LAST_MATCH_START[1], $LAST_MATCH_END[0] - $LAST_MATCH_START[0], '';
 514
 515         $string                         = unescape_template_string($string);
 516         $cached{$file}{all}{$string}    = 1;
 517         $cached{$file}{html}{$string}   = 1;
 518         $cached{$file}{submit}{$string} = 1 if $PREMATCH =~ /$submitsearch/;
 519         $plugins{needed}->{T8}          = 1 if $module eq '$T8';
 520         $plugins{needed}->{LxERP}       = 1 if $module eq 'LxERP.t8';
 521       }
 522
 523       while ($line =~ m/\[\%          # Template-Start-Tag
 524                         [\-~#]?       # Whitespace-Unterdrückung
 525                         \s*           # Optional beliebig viele Whitespace
 526                         (?:           # Die erkannten Template-Direktiven
 527                           PROCESS
 528                         |
 529                           INCLUDE
 530                         )
 531                         \s+           # Mindestens ein Whitespace
 532                         [\'\"]?       # Anfang des Dateinamens
 533                         ([^\s]+)      # Beliebig viele Nicht-Whitespaces -- Dateiname
 534                         \.html        # Endung ".html", ansonsten kann es der Name eines Blocks sein
 535                        /ix) {
 536         my $new_file_name = "$basedir/templates/webpages/$1.html";
 537         $cached{$file}{scanh}{$new_file_name} = 1;
 538         substr $line, $LAST_MATCH_START[1], $LAST_MATCH_END[0] - $LAST_MATCH_START[0], '';
 539       }
 540     }
 541
 542     close(IN);
 543
 544     foreach my $plugin (keys %{ $plugins{needed} }) {
 545       next if ($plugins{loaded}->{$plugin});
 546       print "E: " . strip_base($file) . " requires the Template plugin '$plugin', but is not loaded with '[\% USE $plugin \%]'.\n";
 547     }
 548   }
 549
 550   # copy back into global arrays
 551   map { $alllocales{$_} = 1 } keys %{$cached{$file}{all}};
 552   map { $locale{$_} = 1 }     keys %{$cached{$file}{html}};
 553   map { $submit{$_} = 1 }     keys %{$cached{$file}{submit}};
 554
 555   map { scanhtmlfile($_)  }   keys %{$cached{$file}{scanh}};
 556
 557   @referenced_html_files{keys %{$cached{$file}{scanh}}} = (1) x scalar keys %{$cached{$file}{scanh}};
 558 }
 559
 560 sub search_unused_htmlfiles {
 561   my @unscanned_dirs = ('../../templates/webpages');
 562
 563   while (scalar @unscanned_dirs) {
 564     my $dir = shift @unscanned_dirs;
 565
 566     foreach my $entry (<$dir/*>) {
 567       if (-d $entry) {
 568         push @unscanned_dirs, $entry;
 569
 570       } elsif (($entry =~ /_master.html$/) && -f $entry && !$referenced_html_files{$entry}) {
 571         print "W: unused HTML template: " . strip_base($entry) . "\n";
 572
 573       }
 574     }
 575   }
 576 }
 577
 578 sub strip_base {
 579   my $s =  "$_[0]";             # Create a copy of the string.
 580
 581   $s    =~ s|^../../||;
 582   $s    =~ s|templates/webpages/||;
 583
 584   return $s;
 585 }
 586
 587 sub _single_quote {
 588   my $val = shift;
 589   $val =~ s/(\'|\\$)/\\$1/g;
 590   return  "'" . $val .  "'";
 591 }
 592
 593 sub _print_line {
 594   my $key      = _single_quote(shift);
 595   my $text     = _single_quote(shift);
 596   my %params   = @_;
 597   my $template = $params{template} || qq|  %-29s => %s,\n|;
 598   my $fh       = $params{fh}       || croak 'need filehandle in _print_line';
 599
 600   print $fh sprintf $template, $key, $text;
 601 }
 602
 603 sub generate_file {
 604   my %params = @_;
 605
 606   my $file      = $params{file}   || croak 'need filename in generate_file';
 607   my $header    = $params{header};
 608   my $lines     = $params{data_sub};
 609   my $data_name = $params{data_name};
 610   my @delim     = split //, ($params{delim} || '{}');
 611
 612   open my $fh, '>:encoding(utf8)', $file or die "$! : $file";
 613
 614   $charset =~ s/\r?\n//g;
 615   my $emacs_charset = lc $charset;
 616
 617   print $fh "#!/usr/bin/perl\n# -*- coding: $emacs_charset; -*-\n# vim: fenc=$charset\n\nuse utf8;\n\n";
 618   print $fh $header, "\n" if $header;
 619   print $fh "$data_name = $delim[0]\n" if $data_name;
 620
 621   $lines->(fh => $fh);
 622
 623   print $fh qq|$delim[1];\n\n1;\n|;
 624   close $fh;
 625 }
 626
 627 sub slurp {
 628   my $file = shift;
 629   do { local ( @ARGV, $/ ) = $file; <> }
 630 }
 631
 632 __END__
 633
 634 =head1 NAME
 635
 636 locales.pl - Collect strings for translation in Lx-Office
 637
 638 =head1 SYNOPSIS
 639
 640 locales.pl [options] lang_code
 641
 642  Options:
 643   -n, --no-custom-files  Do not process files whose name contains "_"
 644   -c, --check-files      Run extended checks on HTML files
 645   -v, --verbose          Be more verbose
 646   -h, --help             Show this help
 647
 648 =head1 OPTIONS
 649
 650 =over 8
 651
 652 =item B<-n>, B<--no-custom-files>
 653
 654 Do not process files whose name contains "_", e.g. "custom_io.pl".
 655
 656 =item B<-c>, B<--check-files>
 657
 658 Run extended checks on the usage of templates. This can be used to
 659 discover HTML templates that are never used as well as the usage of
 660 non-existing HTML templates.
 661
 662 =item B<-v>, B<--verbose>
 663
 664 Be more verbose.
 665
 666 =back
 667
 668 =head1 DESCRIPTION
 669
 670 This script collects strings from Perl files, the menu.ini file and
 671 HTML templates and puts them into the file "all" for translation.
 672
 673 =cut