source: trunk/third/control-center/xml-i18n-merge.in @ 17134

Revision 17134, 13.1 KB checked in by ghudson, 23 years ago (diff)
This commit was generated by cvs2svn to compensate for changes in r17133, which included commits to RCS files with non-trunk default branches.
RevLine 
[17133]1#!@INTLTOOL_PERL@ -w
[15849]2
3#
[17133]4#  The Intltool Message Merger
[15849]5#
[17133]6#  Copyright (C) 2000, 2002 Free Software Foundation.
[15849]7#  Copyright (C) 2000, 2001 Eazel, Inc
8#
[17133]9#  Intltool is free software; you can redistribute it and/or
10#  modify it under the terms of the GNU General Public License
11#  version 2 published by the Free Software Foundation.
[15849]12#
[17133]13#  Intltool is distributed in the hope that it will be useful,
[15849]14#  but WITHOUT ANY WARRANTY; without even the implied warranty of
15#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16#  General Public License for more details.
17#
18#  You should have received a copy of the GNU General Public License
[17133]19#  along with this program; if not, write to the Free Software
[15849]20#  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21#
[17133]22#  As a special exception to the GNU General Public License, if you
23#  distribute this file as part of a program that contains a
24#  configuration script generated by Autoconf, you may include it under
25#  the same distribution terms that you use for the rest of that program.
26#
27#  Authors:  Maciej Stachowiak <mjs@noisehavoc.org>
[15849]28#            Kenneth Christiansen <kenneth@gnu.org>
[17133]29#            Darin Adler <darin@bentspoon.com>
[15849]30#
[17133]31#  Proper XML UTF-8'ification written by Cyrille Chepelov <chepelov@calixo.net>
32#
[15849]33
34## Release information
[17133]35my $PROGRAM = "intltool-merge";
36my $PACKAGE = "intltool";
37my $VERSION = "0.15";
[15849]38
39## Loaded modules
40use strict;
41use Getopt::Long;
42
43## Scalars used by the option stuff
[17133]44my $HELP_ARG = 0;
45my $VERSION_ARG = 0;
46my $BA_STYLE_ARG = 0;
47my $XML_STYLE_ARG = 0;
48my $KEYS_STYLE_ARG = 0;
49my $DESKTOP_STYLE_ARG = 0;
50my $QUIET_ARG = 0;
51my $PASS_THROUGH_ARG = 0;
52my $UTF8_ARG = 0;
53my $cache_file;
[15849]54
55## Handle options
[17133]56GetOptions
57(
58 "help" => \$HELP_ARG,
59 "version" => \$VERSION_ARG,
60 "quiet|q" => \$QUIET_ARG,
61 "oaf-style|o" => \$BA_STYLE_ARG, ## for compatibility
62 "ba-style|b" => \$BA_STYLE_ARG,
63 "xml-style|x" => \$XML_STYLE_ARG,
64 "keys-style|k" => \$KEYS_STYLE_ARG,
65 "desktop-style|d" => \$DESKTOP_STYLE_ARG,
66 "pass-through|p" => \$PASS_THROUGH_ARG,
67 "utf8|u" => \$UTF8_ARG,
68 "cache|c=s" => \$cache_file
69 ) or &error;
[15849]70
71my $PO_DIR;
72my $FILE;
73my $OUTFILE;
74
75my %po_files_by_lang = ();
76my %translations = ();
77
[17133]78# Use this instead of \w for XML files to handle more possible characters.
79my $w = "[-A-Za-z0-9._:]";
[15849]80
[17133]81# XML quoted string contents
82my $q = "[^\\\"]*";
[15849]83
84## Check for options.
85
[17133]86if ($VERSION_ARG) {
87        &print_version;
88} elsif ($HELP_ARG) {
89        &print_help;
90} elsif ($BA_STYLE_ARG && @ARGV > 2) {
[15849]91        &preparation;
[17133]92        &print_message;
93        &ba_merge_translations;
94        &finalize;
95} elsif ($XML_STYLE_ARG && @ARGV > 2) {
96        &utf8_sanity_check;
97        &preparation;
98        &print_message;
[15849]99        &xml_merge_translations;
[17133]100        &finalize;
101} elsif ($KEYS_STYLE_ARG && @ARGV > 2) {
102        &utf8_sanity_check;
[15849]103        &preparation;
[17133]104        &print_message;
[15849]105        &keys_merge_translations;
[17133]106        &finalize;
107} elsif ($DESKTOP_STYLE_ARG && @ARGV > 2) {
[15849]108        &preparation;
[17133]109        &print_message;
[15849]110        &desktop_merge_translations;
[17133]111        &finalize;
112} else {
113        &print_help;
114}
[15849]115
[17133]116exit;
[15849]117
118## Sub for printing release information
[17133]119sub print_version
120{
[15849]121    print "${PROGRAM} (${PACKAGE}) ${VERSION}\n";
[17133]122    print "Written by Maciej Stachowiak, Darin Adler and Kenneth Christiansen.\n\n";
123    print "Copyright (C) 2000-2002 Free Software Foundation, Inc.\n";
124    print "Copyright (C) 2000-2001 Eazel, Inc.\n";
[15849]125    print "This is free software; see the source for copying conditions.  There is NO\n";
126    print "warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n";
127    exit;
128}
129
130## Sub for printing usage information
[17133]131sub print_help
132{
[15849]133    print "Usage: ${PROGRAM} [OPTIONS] PO_DIRECTORY FILENAME OUTPUT_FILE\n";
[17133]134    print "Generates an output file that includes translated versions of some attributes,\n";
135    print "from an untranslated source and a po directory that includes translations.\n\n";
136    print "  -b, --ba-style         includes translations in the bonobo-activation style\n";
137    print "  -d, --desktop-style    includes translations in the desktop style\n";
138    print "  -k, --keys-style       includes translations in the keys style\n";
139    print "  -x, --xml-style        includes translations in the standard xml style\n";
140    print "  -u, --utf8             convert all strings to UTF-8 before merging\n";
141    print "  -p, --pass-through     use strings as found in .po files, without\n";
142    print "                         conversion (STRONGLY unrecommended with -x)\n";
143    print "  -q, --quiet            suppress most messages\n";
144    print "      --help             display this help and exit\n";
145    print "      --version          output version information and exit\n";
146    print "\nReport bugs to bugzilla.gnome.org, module intltool, or contact us through \n";
147    print "<xml-i18n-tools-list\@gnome.org>.\n";
[15849]148    exit;
149}
150
151
152## Sub for printing error messages
[17133]153sub print_error
154{
[15849]155    print "Try `${PROGRAM} --help' for more information.\n";
156    exit;
157}
158
159
[17133]160sub print_message
161{
[15849]162    print "Merging translations into $OUTFILE.\n" unless $QUIET_ARG;
163}
164
165
[17133]166sub preparation
167{
168    $PO_DIR = $ARGV[0];
169    $FILE = $ARGV[1];
170    $OUTFILE = $ARGV[2];
[15849]171
[17133]172    &gather_po_files;
173    &get_translation_database;
[15849]174}
175
[17133]176# General-purpose code for looking up translations in .po files
[15849]177
[17133]178sub po_file2lang
179{
180    my ($tmp) = @_;
181    $tmp =~ s/^.*\/(.*)\.po$/$1/;
182    return $tmp;
183}
[15849]184
185sub gather_po_files
186{
[17133]187    for my $po_file (glob "$PO_DIR/*.po") {
188        $po_files_by_lang{po_file2lang($po_file)} = $po_file;
189    }
190}
[15849]191
[17133]192sub get_po_encoding
193{
194    my ($in_po_file) = @_;
195    my $encoding = "";
[15849]196
[17133]197    open IN_PO_FILE, $in_po_file or die;
198    while (<IN_PO_FILE>) {
199        ## example: "Content-Type: text/plain; charset=ISO-8859-1\n"
200        if (/Content-Type\:.*charset=([-a-zA-Z0-9]+)\\n/) {
201            $encoding = $1;
202            last;
203        }
[15849]204    }
[17133]205    close IN_PO_FILE;
206
207    if (!$encoding) {
208        print "Warning: no encoding found in $in_po_file. Assuming ISO-8859-1\n";
209        $encoding = "ISO-8859-1";
210    }
211    return $encoding
[15849]212}
213
[17133]214sub utf8_sanity_check
215{
216    if (!$UTF8_ARG) {
217        if (!$PASS_THROUGH_ARG) {
218            $PASS_THROUGH_ARG="1";
219        }
220    }
[15849]221}
222
[17133]223sub get_translation_database
224{
225    if ($cache_file) {
226        &get_cached_translation_database;
227    } else {
228        &create_translation_database;
229    }
230}
[15849]231
[17133]232sub get_newest_po_age
[15849]233{
[17133]234    my $newest_age;
[15849]235
[17133]236    foreach my $file (values %po_files_by_lang) {
237        my $file_age = -M $file;
238        $newest_age = $file_age if !$newest_age || $file_age < $newest_age;
239    }
[15849]240
[17133]241    return $newest_age;
242}
[15849]243
[17133]244sub create_cache
245{
246    print "Generating and caching the translation database\n" unless $QUIET_ARG;
247
248    &create_translation_database;
249
250    open CACHE, ">$cache_file" || die;
251    print CACHE join "\x01", %translations;
252    close CACHE;
253}
254
255sub load_cache
256{
257    print "Found cached translation database\n" unless $QUIET_ARG;
258
259    my $contents;
260    open CACHE, "<$cache_file" || die;
261    {
262        local $/;
263        $contents = <CACHE>
[15849]264    }
[17133]265    close CACHE;
266    %translations = split "\x01", $contents;
[15849]267}
268
[17133]269sub get_cached_translation_database
[15849]270{
[17133]271    my $cache_file_age = -M $cache_file;
272    if ($cache_file_age && $cache_file_age <= &get_newest_po_age) {
273        &load_cache;
274        return;
275   }
[15849]276
[17133]277   &create_cache;
278}
279
280sub create_translation_database
281{
282    for my $lang (keys %po_files_by_lang) {
283        my $po_file = $po_files_by_lang{$lang};
284
285        if ($UTF8_ARG) {
286            my $encoding = get_po_encoding ($po_file);
287            if (lc $encoding eq "utf-8") {
288                open PO_FILE, "<$po_file";     
289            } else {
290                open PO_FILE, "iconv -f $encoding -t UTF-8 $po_file|"; 
291            }
292        } else {
293            open PO_FILE, "<$po_file"; 
[15849]294        }
[17133]295
296        my $inmsgid = 0;
297        my $inmsgstr = 0;
298        my $msgid = "";
299        my $msgstr = "";
300        while (<PO_FILE>) {
301            if (/^msgid "((\\.|[^\\])*)"/ ) {
302                $translations{$lang, $msgid} = $msgstr if $inmsgstr && $msgstr && $msgstr ne $msgid;
303                $msgid = unescape_po_string($1);
304                $inmsgid = 1;
305                $inmsgstr = 0;
306            }
307            if (/^msgstr "((\\.|[^\\])*)"/) {
308                $msgstr = unescape_po_string($1);
309                $inmsgstr = 1;
310                $inmsgid = 0;
311            }
312            if (/^"((\\.|[^\\])*)"/) {
313                $msgid .= unescape_po_string($1) if $inmsgid;
314                $msgstr .= unescape_po_string($1) if $inmsgstr;
315            }
316        }
317        $translations{$lang, $msgid} = $msgstr if $inmsgstr && $msgstr && $msgstr ne $msgid;
[15849]318    }
[17133]319}
[15849]320
[17133]321sub finalize
322{
[15849]323}
324
[17133]325sub unescape_one_sequence
326{
327    my ($sequence) = @_;
[15849]328
[17133]329    return "\\" if $sequence eq "\\\\";
330    return "\"" if $sequence eq "\\\"";
331
332    # gettext also handles \n, \t, \b, \r, \f, \v, \a, \xxx (octal),
333    # \xXX (hex) and has a comment saying they want to handle \u and \U.
334
335    return $sequence;
336}
337
338sub unescape_po_string
[15849]339{
[17133]340    my ($string) = @_;
341
342    $string =~ s/(\\.)/unescape_one_sequence($1)/eg;
343
344    return $string;
[15849]345}
346
[17133]347sub entity_decode
[15849]348{
[17133]349    local ($_) = @_;
[15849]350
[17133]351    s/&apos;/'/g; # '
352    s/&gt;/>/g;
353    s/&lt;/</g;
354    s/&quot;/"/g; # "
355    s/&amp;/&/g;
[15849]356
[17133]357    return $_;
[15849]358}
359
360sub entity_encode
361{
362    my ($pre_encoded) = @_;
363
364    my @list_of_chars = unpack ('C*', $pre_encoded);
365
[17133]366    if ($PASS_THROUGH_ARG) {
367        return join ('', map (&entity_encode_int_even_high_bit, @list_of_chars));
368    } else {
369        return join ('', map (&entity_encode_int_minimalist, @list_of_chars));
370    }
[15849]371}
372
[17133]373sub entity_encode_int_minimalist
[15849]374{
[17133]375    return "&quot;" if $_ == 34;
376    return "&amp;" if $_ == 38;
377    return "&apos;" if $_ == 39;
378    return "&lt;" if $_ == 60;
379    return "&gt;" if $_ == 62;
380    return chr $_;
381}
382
383sub entity_encode_int_even_high_bit
384{
385    if ($_ > 127 || $_ == 34 || $_ == 38 || $_ == 39 || $_ == 60 || $_ == 62) {
386        # the ($_ > 127) should probably be removed
387        return "&#" . $_ . ";";
[15849]388    } else {
389        return chr $_;
390    }
391}
392
[17133]393sub entity_encoded_translation
394{
395    my ($lang, $string) = @_;
[15849]396
[17133]397    my $translation = $translations{$lang, $string};
398    return $string if !$translation;
399    return entity_encode ($translation);
400}
[15849]401
[17133]402## XML (bonobo-activation specific) merge code
[15849]403
[17133]404sub ba_merge_translations
405{
406    my $source;
[15849]407
[17133]408    {
409       local $/; # slurp mode
410       open INPUT, "<$FILE" or die "can't open $FILE: $!";
411       $source = <INPUT>;
412       close INPUT;
413    }
414
415    open OUTPUT, ">$OUTFILE" or die "can't open $OUTFILE: $!";
416
417    while ($source =~ s|^(.*?)([ \t]*<\s*$w+\s+($w+\s*=\s*"$q"\s*)+/?>)([ \t]*\n)?||s) {
418        print OUTPUT $1;
419
420        my $node = $2 . "\n";
421
422        my @strings = ();
423        $_ = $node;
424        while (s/(\s)_($w+\s*=\s*"($q)")/$1$2/s) {
425             push @strings, entity_decode($3);
426        }
427        print OUTPUT;
428
429        my %langs;
430        for my $string (@strings) {
431            for my $lang (keys %po_files_by_lang) {
432                $langs{$lang} = 1 if $translations{$lang, $string};
433            }
434        }
435       
436        for my $lang (sort keys %langs) {
437            $_ = $node;
438            s/(\sname\s*=\s*)"($q)"/$1"$2-$lang"/s;
439            s/(\s)_($w+\s*=\s*")($q)"/$1 . $2 . entity_encoded_translation($lang, $3) . '"'/seg;
440            print OUTPUT;
441        }
442    }
443
444    print OUTPUT $source;
445
446    close OUTPUT;
447}
448
449
450## XML (non-bonobo-activation) merge code
451
[15849]452sub xml_merge_translations
453{
[17133]454    my $source;
455
456    {
[15849]457       local $/; # slurp mode
458       open INPUT, "<$FILE" or die "can't open $FILE: $!";
[17133]459       $source = <INPUT>;
[15849]460       close INPUT;
461    }
462
[17133]463    open OUTPUT, ">$OUTFILE" or die;
[15849]464
[17133]465    # FIXME: support attribute translations
[15849]466
[17133]467    # Empty nodes never need translation, so unmark all of them.
468    # For example, <_foo/> is just replaced by <foo/>.
469    $source =~ s|<\s*_($w+)\s*/>|<$1/>|g;
[15849]470
[17133]471    # Support for <_foo>blah</_foo> style translations.
472    while ($source =~ s|^(.*?)([ \t]*)<\s*_($w+)\s*>(.*?)<\s*/_\3\s*>([ \t]*\n)?||s) {
473        print OUTPUT $1;
[15849]474
[17133]475        my $spaces = $2;
476        my $tag = $3;
477        my $string = $4;
[15849]478
[17133]479        print OUTPUT "$spaces<$tag>$string</$tag>\n";
480
481        $string =~ s/\s+/ /g;
482        $string =~ s/^ //;
483        $string =~ s/ $//;
484        $string = entity_decode($string);
485
486        for my $lang (sort keys %po_files_by_lang) {
487            my $translation = $translations{$lang, $string};
488            next if !$translation;
489            $translation = entity_encode($translation);
490            print OUTPUT "$spaces<$tag xml:lang=\"$lang\">$translation</$tag>\n";
[15849]491        }
492    }
493
[17133]494    print OUTPUT $source;
[15849]495
496    close OUTPUT;
497}
498
499sub keys_merge_translations
[17133]500{
501    open INPUT, "<${FILE}" or die;
502    open OUTPUT, ">${OUTFILE}" or die;
[15849]503
504    while (<INPUT>) {
[17133]505        if (s/^(\s*)_(\w+=(.*))/$1$2/)  {
506            my $string = $3;
[15849]507
[17133]508            print OUTPUT;
509
510            my $non_translated_line = $_;
511
512            for my $lang (sort keys %po_files_by_lang) {
513                my $translation = $translations{$lang, $string};
514                next if !$translation;
515
516                $_ = $non_translated_line;
517                s/(\w+)=.*/[$lang]$1=$translation/;
518                print OUTPUT;
[15849]519            }
[17133]520        } else {
521            print OUTPUT;
[15849]522        }
523    }
[17133]524
[15849]525    close OUTPUT;
526    close INPUT;
527}
528
529sub desktop_merge_translations
530{
[17133]531    open INPUT, "<${FILE}" or die;
532    open OUTPUT, ">${OUTFILE}" or die;
[15849]533
534    while (<INPUT>) {
[17133]535        if (s/^(\s*)_(\w+=(.*))/$1$2/)  {
536            my $string = $3;
[15849]537
[17133]538            print OUTPUT;
[15849]539
[17133]540            my $non_translated_line = $_;
[15849]541
[17133]542            for my $lang (sort keys %po_files_by_lang) {
543                my $translation = $translations{$lang, $string};
544                next if !$translation;
[15849]545
[17133]546                $_ = $non_translated_line;
547                s/(\w+)=.*/${1}[$lang]=$translation/;
548                print OUTPUT;
[15849]549            }
[17133]550        } else {
551            print OUTPUT;
[15849]552        }
553    }
554
555    close OUTPUT;
556    close INPUT;
557}
Note: See TracBrowser for help on using the repository browser.