source: trunk/third/ggv/intltool-merge.in @ 18703

Revision 18703, 15.8 KB checked in by ghudson, 21 years ago (diff)
This commit was generated by cvs2svn to compensate for changes in r18702, which included commits to RCS files with non-trunk default branches.
Line 
1#!@INTLTOOL_PERL@ -w
2
3#
4#  The Intltool Message Merger
5#
6#  Copyright (C) 2000, 2002 Free Software Foundation.
7#  Copyright (C) 2000, 2001 Eazel, Inc
8#
9#  Intltool is free software; you can redistribute it and/or
10#  modify it under the terms of the GNU General Public License
11#  version 2 published by the Free Software Foundation.
12#
13#  Intltool is distributed in the hope that it will be useful,
14#  but WITHOUT ANY WARRANTY; without even the implied warranty of
15#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16#  General Public License for more details.
17#
18#  You should have received a copy of the GNU General Public License
19#  along with this program; if not, write to the Free Software
20#  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21#
22#  As a special exception to the GNU General Public License, if you
23#  distribute this file as part of a program that contains a
24#  configuration script generated by Autoconf, you may include it under
25#  the same distribution terms that you use for the rest of that program.
26#
27#  Authors:  Maciej Stachowiak <mjs@noisehavoc.org>
28#            Kenneth Christiansen <kenneth@gnu.org>
29#            Darin Adler <darin@bentspoon.com>
30#
31#  Proper XML UTF-8'ification written by Cyrille Chepelov <chepelov@calixo.net>
32#
33
34## Release information
35my $PROGRAM = "intltool-merge";
36my $PACKAGE = "intltool";
37my $VERSION = "0.22";
38
39## Loaded modules
40use strict;
41use Getopt::Long;
42
43## Scalars used by the option stuff
44my $HELP_ARG = 0;
45my $VERSION_ARG = 0;
46my $BA_STYLE_ARG = 0;
47my $XML_STYLE_ARG = 0;
48my $KEYS_STYLE_ARG = 0;
49my $DESKTOP_STYLE_ARG = 0;
50my $SCHEMAS_STYLE_ARG = 0;
51my $QUIET_ARG = 0;
52my $PASS_THROUGH_ARG = 0;
53my $UTF8_ARG = 0;
54my $cache_file;
55
56## Handle options
57GetOptions
58(
59 "help" => \$HELP_ARG,
60 "version" => \$VERSION_ARG,
61 "quiet|q" => \$QUIET_ARG,
62 "oaf-style|o" => \$BA_STYLE_ARG, ## for compatibility
63 "ba-style|b" => \$BA_STYLE_ARG,
64 "xml-style|x" => \$XML_STYLE_ARG,
65 "keys-style|k" => \$KEYS_STYLE_ARG,
66 "desktop-style|d" => \$DESKTOP_STYLE_ARG,
67 "schemas-style|s" => \$SCHEMAS_STYLE_ARG,
68 "pass-through|p" => \$PASS_THROUGH_ARG,
69 "utf8|u" => \$UTF8_ARG,
70 "cache|c=s" => \$cache_file
71 ) or &error;
72
73my $PO_DIR;
74my $FILE;
75my $OUTFILE;
76
77my %po_files_by_lang = ();
78my %translations = ();
79
80# Use this instead of \w for XML files to handle more possible characters.
81my $w = "[-A-Za-z0-9._:]";
82
83# XML quoted string contents
84my $q = "[^\\\"]*";
85
86## Check for options.
87
88if ($VERSION_ARG) {
89        &print_version;
90} elsif ($HELP_ARG) {
91        &print_help;
92} elsif ($BA_STYLE_ARG && @ARGV > 2) {
93        &preparation;
94        &print_message;
95        &ba_merge_translations;
96        &finalize;
97} elsif ($XML_STYLE_ARG && @ARGV > 2) {
98        &utf8_sanity_check;
99        &preparation;
100        &print_message;
101        &xml_merge_translations;
102        &finalize;
103} elsif ($KEYS_STYLE_ARG && @ARGV > 2) {
104        &utf8_sanity_check;
105        &preparation;
106        &print_message;
107        &keys_merge_translations;
108        &finalize;
109} elsif ($DESKTOP_STYLE_ARG && @ARGV > 2) {
110        &preparation;
111        &print_message;
112        &desktop_merge_translations;
113        &finalize;
114} elsif ($SCHEMAS_STYLE_ARG && @ARGV > 2) {
115        &preparation;
116        &print_message;
117        &schemas_merge_translations;
118        &finalize;
119} else {
120        &print_help;
121}
122
123exit;
124
125## Sub for printing release information
126sub print_version
127{
128    print "${PROGRAM} (${PACKAGE}) ${VERSION}\n";
129    print "Written by Maciej Stachowiak, Darin Adler and Kenneth Christiansen.\n\n";
130    print "Copyright (C) 2000-2002 Free Software Foundation, Inc.\n";
131    print "Copyright (C) 2000-2001 Eazel, Inc.\n";
132    print "This is free software; see the source for copying conditions.  There is NO\n";
133    print "warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n";
134    exit;
135}
136
137## Sub for printing usage information
138sub print_help
139{
140    print "Usage: ${PROGRAM} [OPTIONS] PO_DIRECTORY FILENAME OUTPUT_FILE\n";
141    print "Generates an output file that includes translated versions of some attributes,\n";
142    print "from an untranslated source and a po directory that includes translations.\n\n";
143    print "  -b, --ba-style         includes translations in the bonobo-activation style\n";
144    print "  -d, --desktop-style    includes translations in the desktop style\n";
145    print "  -k, --keys-style       includes translations in the keys style\n";
146    print "  -s, --schemas-style    includes translations in the schemas style\n";
147    print "  -x, --xml-style        includes translations in the standard xml style\n";
148    print "  -u, --utf8             convert all strings to UTF-8 before merging\n";
149    print "  -p, --pass-through     use strings as found in .po files, without\n";
150    print "                         conversion (STRONGLY unrecommended with -x)\n";
151    print "  -q, --quiet            suppress most messages\n";
152    print "      --help             display this help and exit\n";
153    print "      --version          output version information and exit\n";
154    print "\nReport bugs to bugzilla.gnome.org, module intltool, or contact us through \n";
155    print "<xml-i18n-tools-list\@gnome.org>.\n";
156    exit;
157}
158
159
160## Sub for printing error messages
161sub print_error
162{
163    print "Try `${PROGRAM} --help' for more information.\n";
164    exit;
165}
166
167
168sub print_message
169{
170    print "Merging translations into $OUTFILE.\n" unless $QUIET_ARG;
171}
172
173
174sub preparation
175{
176    $PO_DIR = $ARGV[0];
177    $FILE = $ARGV[1];
178    $OUTFILE = $ARGV[2];
179
180    &gather_po_files;
181    &get_translation_database;
182}
183
184# General-purpose code for looking up translations in .po files
185
186sub po_file2lang
187{
188    my ($tmp) = @_;
189    $tmp =~ s/^.*\/(.*)\.po$/$1/;
190    return $tmp;
191}
192
193sub gather_po_files
194{
195    for my $po_file (glob "$PO_DIR/*.po") {
196        $po_files_by_lang{po_file2lang($po_file)} = $po_file;
197    }
198}
199
200sub get_po_encoding
201{
202    my ($in_po_file) = @_;
203    my $encoding = "";
204
205    open IN_PO_FILE, $in_po_file or die;
206    while (<IN_PO_FILE>) {
207        ## example: "Content-Type: text/plain; charset=ISO-8859-1\n"
208        if (/Content-Type\:.*charset=([-a-zA-Z0-9]+)\\n/) {
209            $encoding = $1;
210            last;
211        }
212    }
213    close IN_PO_FILE;
214
215    if (!$encoding) {
216        print "Warning: no encoding found in $in_po_file. Assuming ISO-8859-1\n";
217        $encoding = "ISO-8859-1";
218    }
219    return $encoding
220}
221
222sub utf8_sanity_check
223{
224    if (!$UTF8_ARG) {
225        if (!$PASS_THROUGH_ARG) {
226            $PASS_THROUGH_ARG="1";
227        }
228    }
229}
230
231sub get_translation_database
232{
233    if ($cache_file) {
234        &get_cached_translation_database;
235    } else {
236        &create_translation_database;
237    }
238}
239
240sub get_newest_po_age
241{
242    my $newest_age;
243
244    foreach my $file (values %po_files_by_lang) {
245        my $file_age = -M $file;
246        $newest_age = $file_age if !$newest_age || $file_age < $newest_age;
247    }
248
249    return $newest_age;
250}
251
252sub create_cache
253{
254    print "Generating and caching the translation database\n" unless $QUIET_ARG;
255
256    &create_translation_database;
257
258    open CACHE, ">$cache_file" || die;
259    print CACHE join "\x01", %translations;
260    close CACHE;
261}
262
263sub load_cache
264{
265    print "Found cached translation database\n" unless $QUIET_ARG;
266
267    my $contents;
268    open CACHE, "<$cache_file" || die;
269    {
270        local $/;
271        $contents = <CACHE>;
272    }
273    close CACHE;
274    %translations = split "\x01", $contents;
275}
276
277sub get_cached_translation_database
278{
279    my $cache_file_age = -M $cache_file;
280    if (defined $cache_file_age) {
281        if ($cache_file_age <= &get_newest_po_age) {
282            &load_cache;
283            return;
284        }
285        print "Found too-old cached translation database\n" unless $QUIET_ARG;
286    }
287
288    &create_cache;
289}
290
291sub create_translation_database
292{
293    for my $lang (keys %po_files_by_lang) {
294        my $po_file = $po_files_by_lang{$lang};
295
296        if ($UTF8_ARG) {
297            my $encoding = get_po_encoding ($po_file);
298            if (lc $encoding eq "utf-8") {
299                open PO_FILE, "<$po_file";     
300            } else {
301                my $iconv = $ENV{"INTLTOOL_ICONV"} || "iconv";
302                open PO_FILE, "$iconv -f $encoding -t UTF-8 $po_file|";
303            }
304        } else {
305            open PO_FILE, "<$po_file"; 
306        }
307
308        my $nextfuzzy = 0;
309        my $inmsgid = 0;
310        my $inmsgstr = 0;
311        my $msgid = "";
312        my $msgstr = "";
313        while (<PO_FILE>) {
314            $nextfuzzy = 1 if /^#, fuzzy/;
315            if (/^msgid "((\\.|[^\\])*)"/ ) {
316                $translations{$lang, $msgid} = $msgstr if $inmsgstr && $msgid && $msgstr;
317                $msgid = "";
318                $msgstr = "";
319
320                if ($nextfuzzy) {
321                    $inmsgid = 0;
322                } else {
323                    $msgid = unescape_po_string($1);
324                    $inmsgid = 1;
325                }
326                $inmsgstr = 0;
327                $nextfuzzy = 0;
328            }
329            if (/^msgstr "((\\.|[^\\])*)"/) {
330                $msgstr = unescape_po_string($1);
331                $inmsgstr = 1;
332                $inmsgid = 0;
333            }
334            if (/^"((\\.|[^\\])*)"/) {
335                $msgid .= unescape_po_string($1) if $inmsgid;
336                $msgstr .= unescape_po_string($1) if $inmsgstr;
337            }
338        }
339        $translations{$lang, $msgid} = $msgstr if $inmsgstr && $msgid && $msgstr;
340    }
341}
342
343sub finalize
344{
345}
346
347sub unescape_one_sequence
348{
349    my ($sequence) = @_;
350
351    return "\\" if $sequence eq "\\\\";
352    return "\"" if $sequence eq "\\\"";
353
354    # gettext also handles \n, \t, \b, \r, \f, \v, \a, \xxx (octal),
355    # \xXX (hex) and has a comment saying they want to handle \u and \U.
356
357    return $sequence;
358}
359
360sub unescape_po_string
361{
362    my ($string) = @_;
363
364    $string =~ s/(\\.)/unescape_one_sequence($1)/eg;
365
366    return $string;
367}
368
369sub entity_decode
370{
371    local ($_) = @_;
372
373    s/&apos;/'/g; # '
374    s/&quot;/"/g; # "
375    s/&amp;/&/g;
376
377    return $_;
378}
379
380sub entity_encode
381{
382    my ($pre_encoded) = @_;
383
384    my @list_of_chars = unpack ('C*', $pre_encoded);
385
386    if ($PASS_THROUGH_ARG) {
387        return join ('', map (&entity_encode_int_even_high_bit, @list_of_chars));
388    } else {
389        return join ('', map (&entity_encode_int_minimalist, @list_of_chars));
390    }
391}
392
393sub entity_encode_int_minimalist
394{
395    return "&quot;" if $_ == 34;
396    return "&amp;" if $_ == 38;
397    return "&apos;" if $_ == 39;
398    return chr $_;
399}
400
401sub entity_encode_int_even_high_bit
402{
403    if ($_ > 127 || $_ == 34 || $_ == 38 || $_ == 39) {
404        # the ($_ > 127) should probably be removed
405        return "&#" . $_ . ";";
406    } else {
407        return chr $_;
408    }
409}
410
411sub entity_encoded_translation
412{
413    my ($lang, $string) = @_;
414
415    my $translation = $translations{$lang, $string};
416    return $string if !$translation;
417    return entity_encode ($translation);
418}
419
420## XML (bonobo-activation specific) merge code
421
422sub ba_merge_translations
423{
424    my $source;
425
426    {
427       local $/; # slurp mode
428       open INPUT, "<$FILE" or die "can't open $FILE: $!";
429       $source = <INPUT>;
430       close INPUT;
431    }
432
433    open OUTPUT, ">$OUTFILE" or die "can't open $OUTFILE: $!";
434
435    while ($source =~ s|^(.*?)([ \t]*<\s*$w+\s+($w+\s*=\s*"$q"\s*)+/?>)([ \t]*\n)?||s) {
436        print OUTPUT $1;
437
438        my $node = $2 . "\n";
439
440        my @strings = ();
441        $_ = $node;
442        while (s/(\s)_($w+\s*=\s*"($q)")/$1$2/s) {
443             push @strings, entity_decode($3);
444        }
445        print OUTPUT;
446
447        my %langs;
448        for my $string (@strings) {
449            for my $lang (keys %po_files_by_lang) {
450                $langs{$lang} = 1 if $translations{$lang, $string};
451            }
452        }
453       
454        for my $lang (sort keys %langs) {
455            $_ = $node;
456            s/(\sname\s*=\s*)"($q)"/$1"$2-$lang"/s;
457            s/(\s)_($w+\s*=\s*")($q)"/$1 . $2 . entity_encoded_translation($lang, $3) . '"'/seg;
458            print OUTPUT;
459        }
460    }
461
462    print OUTPUT $source;
463
464    close OUTPUT;
465}
466
467
468## XML (non-bonobo-activation) merge code
469
470sub xml_merge_translations
471{
472    my $source;
473
474    {
475       local $/; # slurp mode
476       open INPUT, "<$FILE" or die "can't open $FILE: $!";
477       $source = <INPUT>;
478       close INPUT;
479    }
480
481    open OUTPUT, ">$OUTFILE" or die;
482
483    # FIXME: support attribute translations
484
485    # Empty nodes never need translation, so unmark all of them.
486    # For example, <_foo/> is just replaced by <foo/>.
487    $source =~ s|<\s*_($w+)\s*/>|<$1/>|g;
488
489    # Support for <_foo>blah</_foo> style translations.
490    while ($source =~ s|^(.*?)([ \t]*)<\s*_($w+)\s*>(.*?)<\s*/_\3\s*>([ \t]*\n)?||s) {
491        print OUTPUT $1;
492
493        my $spaces = $2;
494        my $tag = $3;
495        my $string = $4;
496
497        print OUTPUT "$spaces<$tag>$string</$tag>\n";
498
499        $string =~ s/\s+/ /g;
500        $string =~ s/^ //;
501        $string =~ s/ $//;
502        $string = entity_decode($string);
503
504        for my $lang (sort keys %po_files_by_lang) {
505            my $translation = $translations{$lang, $string};
506            next if !$translation;
507            $translation = entity_encode($translation);
508            print OUTPUT "$spaces<$tag xml:lang=\"$lang\">$translation</$tag>\n";
509        }
510    }
511
512    print OUTPUT $source;
513
514    close OUTPUT;
515}
516
517sub keys_merge_translations
518{
519    open INPUT, "<${FILE}" or die;
520    open OUTPUT, ">${OUTFILE}" or die;
521
522    while (<INPUT>) {
523        if (s/^(\s*)_(\w+=(.*))/$1$2/)  {
524            my $string = $3;
525
526            print OUTPUT;
527
528            my $non_translated_line = $_;
529
530            for my $lang (sort keys %po_files_by_lang) {
531                my $translation = $translations{$lang, $string};
532                next if !$translation;
533
534                $_ = $non_translated_line;
535                s/(\w+)=.*/[$lang]$1=$translation/;
536                print OUTPUT;
537            }
538        } else {
539            print OUTPUT;
540        }
541    }
542
543    close OUTPUT;
544    close INPUT;
545}
546
547sub desktop_merge_translations
548{
549    open INPUT, "<${FILE}" or die;
550    open OUTPUT, ">${OUTFILE}" or die;
551
552    while (<INPUT>) {
553        if (s/^(\s*)_(\w+=(.*))/$1$2/)  {
554            my $string = $3;
555
556            print OUTPUT;
557
558            my $non_translated_line = $_;
559
560            for my $lang (sort keys %po_files_by_lang) {
561                my $translation = $translations{$lang, $string};
562                next if !$translation;
563
564                $_ = $non_translated_line;
565                s/(\w+)=.*/${1}[$lang]=$translation/;
566                print OUTPUT;
567            }
568        } else {
569            print OUTPUT;
570        }
571    }
572
573    close OUTPUT;
574    close INPUT;
575}
576
577sub schemas_merge_translations
578{
579    my $source;
580
581    {
582       local $/; # slurp mode
583       open INPUT, "<$FILE" or die "can't open $FILE: $!";
584       $source = <INPUT>;
585       close INPUT;
586    }
587
588    open OUTPUT, ">$OUTFILE" or die;
589
590    # FIXME: support attribute translations
591
592    # Empty nodes never need translation, so unmark all of them.
593    # For example, <_foo/> is just replaced by <foo/>.
594    $source =~ s|<\s*_($w+)\s*/>|<$1/>|g;
595
596    # Support for <_foo>blah</_foo> style translations.
597
598    my $regex_start = "^(.*?)([ \t]*)<locale name=\"C\">";
599    my $regex_short = "([ \t\n]*)<short>(.*?)</short>";
600    my $regex_long  = "([ \t\n]*)<long>(.*?)</long>";
601    my $regex_end   = "([ \t\n]*)</locale>";
602
603    while ($source =~ s|$regex_start$regex_short$regex_long$regex_end||s) {
604        print OUTPUT $1;
605
606        my $locale_start_spaces = $2;
607        my $locale_end_spaces = $7;
608        my $short_spaces = $3;
609        my $short_string = $4;
610        my $long_spaces = $5;
611        my $long_string = $6;
612
613        # English first
614
615        print OUTPUT "$locale_start_spaces<locale name=\"C\">";
616        print OUTPUT "$short_spaces<short>$short_string</short>";
617        print OUTPUT "$long_spaces<long>$long_string</long>";
618        print OUTPUT "$locale_end_spaces</locale>";
619
620        $short_string =~ s/\s+/ /g;
621        $short_string =~ s/^ //;
622        $short_string =~ s/ $//;
623        $short_string = entity_decode($short_string);
624
625        $long_string =~ s/\s+/ /g;
626        $long_string =~ s/^ //;
627        $long_string =~ s/ $//;
628        $long_string = entity_decode($long_string);
629
630        for my $lang (sort keys %po_files_by_lang) {
631            my $short_translation = $translations{$lang, $short_string};
632            my $long_translation  = $translations{$lang, $long_string};
633
634            next if (!$short_translation && !$long_translation);
635
636            print OUTPUT "\n$locale_start_spaces<locale name=\"$lang\">";
637
638            if ($short_translation)
639            {
640                $short_translation = entity_encode($short_translation);
641                print OUTPUT "$short_spaces<short>$short_translation</short>";
642            }
643
644            if ($long_translation)
645            {
646                $long_translation = entity_encode($long_translation);
647                print OUTPUT "$long_spaces<long>$long_translation</long>";
648            }       
649
650            print OUTPUT "$locale_end_spaces</locale>";
651        }
652    }
653
654    print OUTPUT $source;
655
656    close OUTPUT;
657}
Note: See TracBrowser for help on using the repository browser.