source: trunk/third/control-center/xml-i18n-merge.in @ 17134

Revision 17134, 13.1 KB checked in by ghudson, 23 years ago (diff)
This commit was generated by cvs2svn to compensate for changes in r17133, which included commits to RCS files with non-trunk default branches.
Line 
1#!@INTLTOOL_PERL@ -w
2
3#
4#  The Intltool Message Merger
5#
6#  Copyright (C) 2000, 2002 Free Software Foundation.
7#  Copyright (C) 2000, 2001 Eazel, Inc
8#
9#  Intltool is free software; you can redistribute it and/or
10#  modify it under the terms of the GNU General Public License
11#  version 2 published by the Free Software Foundation.
12#
13#  Intltool is distributed in the hope that it will be useful,
14#  but WITHOUT ANY WARRANTY; without even the implied warranty of
15#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16#  General Public License for more details.
17#
18#  You should have received a copy of the GNU General Public License
19#  along with this program; if not, write to the Free Software
20#  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21#
22#  As a special exception to the GNU General Public License, if you
23#  distribute this file as part of a program that contains a
24#  configuration script generated by Autoconf, you may include it under
25#  the same distribution terms that you use for the rest of that program.
26#
27#  Authors:  Maciej Stachowiak <mjs@noisehavoc.org>
28#            Kenneth Christiansen <kenneth@gnu.org>
29#            Darin Adler <darin@bentspoon.com>
30#
31#  Proper XML UTF-8'ification written by Cyrille Chepelov <chepelov@calixo.net>
32#
33
34## Release information
35my $PROGRAM = "intltool-merge";
36my $PACKAGE = "intltool";
37my $VERSION = "0.15";
38
39## Loaded modules
40use strict;
41use Getopt::Long;
42
43## Scalars used by the option stuff
44my $HELP_ARG = 0;
45my $VERSION_ARG = 0;
46my $BA_STYLE_ARG = 0;
47my $XML_STYLE_ARG = 0;
48my $KEYS_STYLE_ARG = 0;
49my $DESKTOP_STYLE_ARG = 0;
50my $QUIET_ARG = 0;
51my $PASS_THROUGH_ARG = 0;
52my $UTF8_ARG = 0;
53my $cache_file;
54
55## Handle options
56GetOptions
57(
58 "help" => \$HELP_ARG,
59 "version" => \$VERSION_ARG,
60 "quiet|q" => \$QUIET_ARG,
61 "oaf-style|o" => \$BA_STYLE_ARG, ## for compatibility
62 "ba-style|b" => \$BA_STYLE_ARG,
63 "xml-style|x" => \$XML_STYLE_ARG,
64 "keys-style|k" => \$KEYS_STYLE_ARG,
65 "desktop-style|d" => \$DESKTOP_STYLE_ARG,
66 "pass-through|p" => \$PASS_THROUGH_ARG,
67 "utf8|u" => \$UTF8_ARG,
68 "cache|c=s" => \$cache_file
69 ) or &error;
70
71my $PO_DIR;
72my $FILE;
73my $OUTFILE;
74
75my %po_files_by_lang = ();
76my %translations = ();
77
78# Use this instead of \w for XML files to handle more possible characters.
79my $w = "[-A-Za-z0-9._:]";
80
81# XML quoted string contents
82my $q = "[^\\\"]*";
83
84## Check for options.
85
86if ($VERSION_ARG) {
87        &print_version;
88} elsif ($HELP_ARG) {
89        &print_help;
90} elsif ($BA_STYLE_ARG && @ARGV > 2) {
91        &preparation;
92        &print_message;
93        &ba_merge_translations;
94        &finalize;
95} elsif ($XML_STYLE_ARG && @ARGV > 2) {
96        &utf8_sanity_check;
97        &preparation;
98        &print_message;
99        &xml_merge_translations;
100        &finalize;
101} elsif ($KEYS_STYLE_ARG && @ARGV > 2) {
102        &utf8_sanity_check;
103        &preparation;
104        &print_message;
105        &keys_merge_translations;
106        &finalize;
107} elsif ($DESKTOP_STYLE_ARG && @ARGV > 2) {
108        &preparation;
109        &print_message;
110        &desktop_merge_translations;
111        &finalize;
112} else {
113        &print_help;
114}
115
116exit;
117
118## Sub for printing release information
119sub print_version
120{
121    print "${PROGRAM} (${PACKAGE}) ${VERSION}\n";
122    print "Written by Maciej Stachowiak, Darin Adler and Kenneth Christiansen.\n\n";
123    print "Copyright (C) 2000-2002 Free Software Foundation, Inc.\n";
124    print "Copyright (C) 2000-2001 Eazel, Inc.\n";
125    print "This is free software; see the source for copying conditions.  There is NO\n";
126    print "warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n";
127    exit;
128}
129
130## Sub for printing usage information
131sub print_help
132{
133    print "Usage: ${PROGRAM} [OPTIONS] PO_DIRECTORY FILENAME OUTPUT_FILE\n";
134    print "Generates an output file that includes translated versions of some attributes,\n";
135    print "from an untranslated source and a po directory that includes translations.\n\n";
136    print "  -b, --ba-style         includes translations in the bonobo-activation style\n";
137    print "  -d, --desktop-style    includes translations in the desktop style\n";
138    print "  -k, --keys-style       includes translations in the keys style\n";
139    print "  -x, --xml-style        includes translations in the standard xml style\n";
140    print "  -u, --utf8             convert all strings to UTF-8 before merging\n";
141    print "  -p, --pass-through     use strings as found in .po files, without\n";
142    print "                         conversion (STRONGLY unrecommended with -x)\n";
143    print "  -q, --quiet            suppress most messages\n";
144    print "      --help             display this help and exit\n";
145    print "      --version          output version information and exit\n";
146    print "\nReport bugs to bugzilla.gnome.org, module intltool, or contact us through \n";
147    print "<xml-i18n-tools-list\@gnome.org>.\n";
148    exit;
149}
150
151
152## Sub for printing error messages
153sub print_error
154{
155    print "Try `${PROGRAM} --help' for more information.\n";
156    exit;
157}
158
159
160sub print_message
161{
162    print "Merging translations into $OUTFILE.\n" unless $QUIET_ARG;
163}
164
165
166sub preparation
167{
168    $PO_DIR = $ARGV[0];
169    $FILE = $ARGV[1];
170    $OUTFILE = $ARGV[2];
171
172    &gather_po_files;
173    &get_translation_database;
174}
175
176# General-purpose code for looking up translations in .po files
177
178sub po_file2lang
179{
180    my ($tmp) = @_;
181    $tmp =~ s/^.*\/(.*)\.po$/$1/;
182    return $tmp;
183}
184
185sub gather_po_files
186{
187    for my $po_file (glob "$PO_DIR/*.po") {
188        $po_files_by_lang{po_file2lang($po_file)} = $po_file;
189    }
190}
191
192sub get_po_encoding
193{
194    my ($in_po_file) = @_;
195    my $encoding = "";
196
197    open IN_PO_FILE, $in_po_file or die;
198    while (<IN_PO_FILE>) {
199        ## example: "Content-Type: text/plain; charset=ISO-8859-1\n"
200        if (/Content-Type\:.*charset=([-a-zA-Z0-9]+)\\n/) {
201            $encoding = $1;
202            last;
203        }
204    }
205    close IN_PO_FILE;
206
207    if (!$encoding) {
208        print "Warning: no encoding found in $in_po_file. Assuming ISO-8859-1\n";
209        $encoding = "ISO-8859-1";
210    }
211    return $encoding
212}
213
214sub utf8_sanity_check
215{
216    if (!$UTF8_ARG) {
217        if (!$PASS_THROUGH_ARG) {
218            $PASS_THROUGH_ARG="1";
219        }
220    }
221}
222
223sub get_translation_database
224{
225    if ($cache_file) {
226        &get_cached_translation_database;
227    } else {
228        &create_translation_database;
229    }
230}
231
232sub get_newest_po_age
233{
234    my $newest_age;
235
236    foreach my $file (values %po_files_by_lang) {
237        my $file_age = -M $file;
238        $newest_age = $file_age if !$newest_age || $file_age < $newest_age;
239    }
240
241    return $newest_age;
242}
243
244sub create_cache
245{
246    print "Generating and caching the translation database\n" unless $QUIET_ARG;
247
248    &create_translation_database;
249
250    open CACHE, ">$cache_file" || die;
251    print CACHE join "\x01", %translations;
252    close CACHE;
253}
254
255sub load_cache
256{
257    print "Found cached translation database\n" unless $QUIET_ARG;
258
259    my $contents;
260    open CACHE, "<$cache_file" || die;
261    {
262        local $/;
263        $contents = <CACHE>
264    }
265    close CACHE;
266    %translations = split "\x01", $contents;
267}
268
269sub get_cached_translation_database
270{
271    my $cache_file_age = -M $cache_file;
272    if ($cache_file_age && $cache_file_age <= &get_newest_po_age) {
273        &load_cache;
274        return;
275   }
276
277   &create_cache;
278}
279
280sub create_translation_database
281{
282    for my $lang (keys %po_files_by_lang) {
283        my $po_file = $po_files_by_lang{$lang};
284
285        if ($UTF8_ARG) {
286            my $encoding = get_po_encoding ($po_file);
287            if (lc $encoding eq "utf-8") {
288                open PO_FILE, "<$po_file";     
289            } else {
290                open PO_FILE, "iconv -f $encoding -t UTF-8 $po_file|"; 
291            }
292        } else {
293            open PO_FILE, "<$po_file"; 
294        }
295
296        my $inmsgid = 0;
297        my $inmsgstr = 0;
298        my $msgid = "";
299        my $msgstr = "";
300        while (<PO_FILE>) {
301            if (/^msgid "((\\.|[^\\])*)"/ ) {
302                $translations{$lang, $msgid} = $msgstr if $inmsgstr && $msgstr && $msgstr ne $msgid;
303                $msgid = unescape_po_string($1);
304                $inmsgid = 1;
305                $inmsgstr = 0;
306            }
307            if (/^msgstr "((\\.|[^\\])*)"/) {
308                $msgstr = unescape_po_string($1);
309                $inmsgstr = 1;
310                $inmsgid = 0;
311            }
312            if (/^"((\\.|[^\\])*)"/) {
313                $msgid .= unescape_po_string($1) if $inmsgid;
314                $msgstr .= unescape_po_string($1) if $inmsgstr;
315            }
316        }
317        $translations{$lang, $msgid} = $msgstr if $inmsgstr && $msgstr && $msgstr ne $msgid;
318    }
319}
320
321sub finalize
322{
323}
324
325sub unescape_one_sequence
326{
327    my ($sequence) = @_;
328
329    return "\\" if $sequence eq "\\\\";
330    return "\"" if $sequence eq "\\\"";
331
332    # gettext also handles \n, \t, \b, \r, \f, \v, \a, \xxx (octal),
333    # \xXX (hex) and has a comment saying they want to handle \u and \U.
334
335    return $sequence;
336}
337
338sub unescape_po_string
339{
340    my ($string) = @_;
341
342    $string =~ s/(\\.)/unescape_one_sequence($1)/eg;
343
344    return $string;
345}
346
347sub entity_decode
348{
349    local ($_) = @_;
350
351    s/&apos;/'/g; # '
352    s/&gt;/>/g;
353    s/&lt;/</g;
354    s/&quot;/"/g; # "
355    s/&amp;/&/g;
356
357    return $_;
358}
359
360sub entity_encode
361{
362    my ($pre_encoded) = @_;
363
364    my @list_of_chars = unpack ('C*', $pre_encoded);
365
366    if ($PASS_THROUGH_ARG) {
367        return join ('', map (&entity_encode_int_even_high_bit, @list_of_chars));
368    } else {
369        return join ('', map (&entity_encode_int_minimalist, @list_of_chars));
370    }
371}
372
373sub entity_encode_int_minimalist
374{
375    return "&quot;" if $_ == 34;
376    return "&amp;" if $_ == 38;
377    return "&apos;" if $_ == 39;
378    return "&lt;" if $_ == 60;
379    return "&gt;" if $_ == 62;
380    return chr $_;
381}
382
383sub entity_encode_int_even_high_bit
384{
385    if ($_ > 127 || $_ == 34 || $_ == 38 || $_ == 39 || $_ == 60 || $_ == 62) {
386        # the ($_ > 127) should probably be removed
387        return "&#" . $_ . ";";
388    } else {
389        return chr $_;
390    }
391}
392
393sub entity_encoded_translation
394{
395    my ($lang, $string) = @_;
396
397    my $translation = $translations{$lang, $string};
398    return $string if !$translation;
399    return entity_encode ($translation);
400}
401
402## XML (bonobo-activation specific) merge code
403
404sub ba_merge_translations
405{
406    my $source;
407
408    {
409       local $/; # slurp mode
410       open INPUT, "<$FILE" or die "can't open $FILE: $!";
411       $source = <INPUT>;
412       close INPUT;
413    }
414
415    open OUTPUT, ">$OUTFILE" or die "can't open $OUTFILE: $!";
416
417    while ($source =~ s|^(.*?)([ \t]*<\s*$w+\s+($w+\s*=\s*"$q"\s*)+/?>)([ \t]*\n)?||s) {
418        print OUTPUT $1;
419
420        my $node = $2 . "\n";
421
422        my @strings = ();
423        $_ = $node;
424        while (s/(\s)_($w+\s*=\s*"($q)")/$1$2/s) {
425             push @strings, entity_decode($3);
426        }
427        print OUTPUT;
428
429        my %langs;
430        for my $string (@strings) {
431            for my $lang (keys %po_files_by_lang) {
432                $langs{$lang} = 1 if $translations{$lang, $string};
433            }
434        }
435       
436        for my $lang (sort keys %langs) {
437            $_ = $node;
438            s/(\sname\s*=\s*)"($q)"/$1"$2-$lang"/s;
439            s/(\s)_($w+\s*=\s*")($q)"/$1 . $2 . entity_encoded_translation($lang, $3) . '"'/seg;
440            print OUTPUT;
441        }
442    }
443
444    print OUTPUT $source;
445
446    close OUTPUT;
447}
448
449
450## XML (non-bonobo-activation) merge code
451
452sub xml_merge_translations
453{
454    my $source;
455
456    {
457       local $/; # slurp mode
458       open INPUT, "<$FILE" or die "can't open $FILE: $!";
459       $source = <INPUT>;
460       close INPUT;
461    }
462
463    open OUTPUT, ">$OUTFILE" or die;
464
465    # FIXME: support attribute translations
466
467    # Empty nodes never need translation, so unmark all of them.
468    # For example, <_foo/> is just replaced by <foo/>.
469    $source =~ s|<\s*_($w+)\s*/>|<$1/>|g;
470
471    # Support for <_foo>blah</_foo> style translations.
472    while ($source =~ s|^(.*?)([ \t]*)<\s*_($w+)\s*>(.*?)<\s*/_\3\s*>([ \t]*\n)?||s) {
473        print OUTPUT $1;
474
475        my $spaces = $2;
476        my $tag = $3;
477        my $string = $4;
478
479        print OUTPUT "$spaces<$tag>$string</$tag>\n";
480
481        $string =~ s/\s+/ /g;
482        $string =~ s/^ //;
483        $string =~ s/ $//;
484        $string = entity_decode($string);
485
486        for my $lang (sort keys %po_files_by_lang) {
487            my $translation = $translations{$lang, $string};
488            next if !$translation;
489            $translation = entity_encode($translation);
490            print OUTPUT "$spaces<$tag xml:lang=\"$lang\">$translation</$tag>\n";
491        }
492    }
493
494    print OUTPUT $source;
495
496    close OUTPUT;
497}
498
499sub keys_merge_translations
500{
501    open INPUT, "<${FILE}" or die;
502    open OUTPUT, ">${OUTFILE}" or die;
503
504    while (<INPUT>) {
505        if (s/^(\s*)_(\w+=(.*))/$1$2/)  {
506            my $string = $3;
507
508            print OUTPUT;
509
510            my $non_translated_line = $_;
511
512            for my $lang (sort keys %po_files_by_lang) {
513                my $translation = $translations{$lang, $string};
514                next if !$translation;
515
516                $_ = $non_translated_line;
517                s/(\w+)=.*/[$lang]$1=$translation/;
518                print OUTPUT;
519            }
520        } else {
521            print OUTPUT;
522        }
523    }
524
525    close OUTPUT;
526    close INPUT;
527}
528
529sub desktop_merge_translations
530{
531    open INPUT, "<${FILE}" or die;
532    open OUTPUT, ">${OUTFILE}" or die;
533
534    while (<INPUT>) {
535        if (s/^(\s*)_(\w+=(.*))/$1$2/)  {
536            my $string = $3;
537
538            print OUTPUT;
539
540            my $non_translated_line = $_;
541
542            for my $lang (sort keys %po_files_by_lang) {
543                my $translation = $translations{$lang, $string};
544                next if !$translation;
545
546                $_ = $non_translated_line;
547                s/(\w+)=.*/${1}[$lang]=$translation/;
548                print OUTPUT;
549            }
550        } else {
551            print OUTPUT;
552        }
553    }
554
555    close OUTPUT;
556    close INPUT;
557}
Note: See TracBrowser for help on using the repository browser.