source: trunk/third/oaf/xml-i18n-merge.in @ 18115

Revision 18115, 13.3 KB checked in by ghudson, 22 years ago (diff)
This commit was generated by cvs2svn to compensate for changes in r18114, which included commits to RCS files with non-trunk default branches.
Line 
1#!@INTLTOOL_PERL@ -w
2
3#
4#  The Intltool Message Merger
5#
6#  Copyright (C) 2000, 2002 Free Software Foundation.
7#  Copyright (C) 2000, 2001 Eazel, Inc
8#
9#  Intltool is free software; you can redistribute it and/or
10#  modify it under the terms of the GNU General Public License
11#  version 2 published by the Free Software Foundation.
12#
13#  Intltool is distributed in the hope that it will be useful,
14#  but WITHOUT ANY WARRANTY; without even the implied warranty of
15#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16#  General Public License for more details.
17#
18#  You should have received a copy of the GNU General Public License
19#  along with this program; if not, write to the Free Software
20#  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21#
22#  As a special exception to the GNU General Public License, if you
23#  distribute this file as part of a program that contains a
24#  configuration script generated by Autoconf, you may include it under
25#  the same distribution terms that you use for the rest of that program.
26#
27#  Authors:  Maciej Stachowiak <mjs@noisehavoc.org>
28#            Kenneth Christiansen <kenneth@gnu.org>
29#            Darin Adler <darin@bentspoon.com>
30#
31#  Proper XML UTF-8'ification written by Cyrille Chepelov <chepelov@calixo.net>
32#
33
34## Release information
35my $PROGRAM = "intltool-merge";
36my $PACKAGE = "intltool";
37my $VERSION = "0.18";
38
39## Loaded modules
40use strict;
41use Getopt::Long;
42
43## Scalars used by the option stuff
44my $HELP_ARG = 0;
45my $VERSION_ARG = 0;
46my $BA_STYLE_ARG = 0;
47my $XML_STYLE_ARG = 0;
48my $KEYS_STYLE_ARG = 0;
49my $DESKTOP_STYLE_ARG = 0;
50my $QUIET_ARG = 0;
51my $PASS_THROUGH_ARG = 0;
52my $UTF8_ARG = 0;
53my $cache_file;
54
55## Handle options
56GetOptions
57(
58 "help" => \$HELP_ARG,
59 "version" => \$VERSION_ARG,
60 "quiet|q" => \$QUIET_ARG,
61 "oaf-style|o" => \$BA_STYLE_ARG, ## for compatibility
62 "ba-style|b" => \$BA_STYLE_ARG,
63 "xml-style|x" => \$XML_STYLE_ARG,
64 "keys-style|k" => \$KEYS_STYLE_ARG,
65 "desktop-style|d" => \$DESKTOP_STYLE_ARG,
66 "pass-through|p" => \$PASS_THROUGH_ARG,
67 "utf8|u" => \$UTF8_ARG,
68 "cache|c=s" => \$cache_file
69 ) or &error;
70
71my $PO_DIR;
72my $FILE;
73my $OUTFILE;
74
75my %po_files_by_lang = ();
76my %translations = ();
77
78# Use this instead of \w for XML files to handle more possible characters.
79my $w = "[-A-Za-z0-9._:]";
80
81# XML quoted string contents
82my $q = "[^\\\"]*";
83
84## Check for options.
85
86if ($VERSION_ARG) {
87        &print_version;
88} elsif ($HELP_ARG) {
89        &print_help;
90} elsif ($BA_STYLE_ARG && @ARGV > 2) {
91        &preparation;
92        &print_message;
93        &ba_merge_translations;
94        &finalize;
95} elsif ($XML_STYLE_ARG && @ARGV > 2) {
96        &utf8_sanity_check;
97        &preparation;
98        &print_message;
99        &xml_merge_translations;
100        &finalize;
101} elsif ($KEYS_STYLE_ARG && @ARGV > 2) {
102        &utf8_sanity_check;
103        &preparation;
104        &print_message;
105        &keys_merge_translations;
106        &finalize;
107} elsif ($DESKTOP_STYLE_ARG && @ARGV > 2) {
108        &preparation;
109        &print_message;
110        &desktop_merge_translations;
111        &finalize;
112} else {
113        &print_help;
114}
115
116exit;
117
118## Sub for printing release information
119sub print_version
120{
121    print "${PROGRAM} (${PACKAGE}) ${VERSION}\n";
122    print "Written by Maciej Stachowiak, Darin Adler and Kenneth Christiansen.\n\n";
123    print "Copyright (C) 2000-2002 Free Software Foundation, Inc.\n";
124    print "Copyright (C) 2000-2001 Eazel, Inc.\n";
125    print "This is free software; see the source for copying conditions.  There is NO\n";
126    print "warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n";
127    exit;
128}
129
130## Sub for printing usage information
131sub print_help
132{
133    print "Usage: ${PROGRAM} [OPTIONS] PO_DIRECTORY FILENAME OUTPUT_FILE\n";
134    print "Generates an output file that includes translated versions of some attributes,\n";
135    print "from an untranslated source and a po directory that includes translations.\n\n";
136    print "  -b, --ba-style         includes translations in the bonobo-activation style\n";
137    print "  -d, --desktop-style    includes translations in the desktop style\n";
138    print "  -k, --keys-style       includes translations in the keys style\n";
139    print "  -x, --xml-style        includes translations in the standard xml style\n";
140    print "  -u, --utf8             convert all strings to UTF-8 before merging\n";
141    print "  -p, --pass-through     use strings as found in .po files, without\n";
142    print "                         conversion (STRONGLY unrecommended with -x)\n";
143    print "  -q, --quiet            suppress most messages\n";
144    print "      --help             display this help and exit\n";
145    print "      --version          output version information and exit\n";
146    print "\nReport bugs to bugzilla.gnome.org, module intltool, or contact us through \n";
147    print "<xml-i18n-tools-list\@gnome.org>.\n";
148    exit;
149}
150
151
152## Sub for printing error messages
153sub print_error
154{
155    print "Try `${PROGRAM} --help' for more information.\n";
156    exit;
157}
158
159
160sub print_message
161{
162    print "Merging translations into $OUTFILE.\n" unless $QUIET_ARG;
163}
164
165
166sub preparation
167{
168    $PO_DIR = $ARGV[0];
169    $FILE = $ARGV[1];
170    $OUTFILE = $ARGV[2];
171
172    &gather_po_files;
173    &get_translation_database;
174}
175
176# General-purpose code for looking up translations in .po files
177
178sub po_file2lang
179{
180    my ($tmp) = @_;
181    $tmp =~ s/^.*\/(.*)\.po$/$1/;
182    return $tmp;
183}
184
185sub gather_po_files
186{
187    for my $po_file (glob "$PO_DIR/*.po") {
188        $po_files_by_lang{po_file2lang($po_file)} = $po_file;
189    }
190}
191
192sub get_po_encoding
193{
194    my ($in_po_file) = @_;
195    my $encoding = "";
196
197    open IN_PO_FILE, $in_po_file or die;
198    while (<IN_PO_FILE>) {
199        ## example: "Content-Type: text/plain; charset=ISO-8859-1\n"
200        if (/Content-Type\:.*charset=([-a-zA-Z0-9]+)\\n/) {
201            $encoding = $1;
202            last;
203        }
204    }
205    close IN_PO_FILE;
206
207    if (!$encoding) {
208        print "Warning: no encoding found in $in_po_file. Assuming ISO-8859-1\n";
209        $encoding = "ISO-8859-1";
210    }
211    return $encoding
212}
213
214sub utf8_sanity_check
215{
216    if (!$UTF8_ARG) {
217        if (!$PASS_THROUGH_ARG) {
218            $PASS_THROUGH_ARG="1";
219        }
220    }
221}
222
223sub get_translation_database
224{
225    if ($cache_file) {
226        &get_cached_translation_database;
227    } else {
228        &create_translation_database;
229    }
230}
231
232sub get_newest_po_age
233{
234    my $newest_age;
235
236    foreach my $file (values %po_files_by_lang) {
237        my $file_age = -M $file;
238        $newest_age = $file_age if !$newest_age || $file_age < $newest_age;
239    }
240
241    return $newest_age;
242}
243
244sub create_cache
245{
246    print "Generating and caching the translation database\n" unless $QUIET_ARG;
247
248    &create_translation_database;
249
250    open CACHE, ">$cache_file" || die;
251    print CACHE join "\x01", %translations;
252    close CACHE;
253}
254
255sub load_cache
256{
257    print "Found cached translation database\n" unless $QUIET_ARG;
258
259    my $contents;
260    open CACHE, "<$cache_file" || die;
261    {
262        local $/;
263        $contents = <CACHE>;
264    }
265    close CACHE;
266    %translations = split "\x01", $contents;
267}
268
269sub get_cached_translation_database
270{
271    my $cache_file_age = -M $cache_file;
272    if (defined $cache_file_age) {
273        if ($cache_file_age <= &get_newest_po_age) {
274            &load_cache;
275            return;
276        }
277        print "Found too-old cached translation database\n" unless $QUIET_ARG;
278    }
279
280    &create_cache;
281}
282
283sub create_translation_database
284{
285    for my $lang (keys %po_files_by_lang) {
286        my $po_file = $po_files_by_lang{$lang};
287
288        if ($UTF8_ARG) {
289            my $encoding = get_po_encoding ($po_file);
290            if (lc $encoding eq "utf-8") {
291                open PO_FILE, "<$po_file";     
292            } else {
293                my $iconv = $ENV{"INTLTOOL_ICONV"} || "iconv";
294                open PO_FILE, "$iconv -f $encoding -t UTF-8 $po_file|";
295            }
296        } else {
297            open PO_FILE, "<$po_file"; 
298        }
299
300        my $nextfuzzy = 0;
301        my $inmsgid = 0;
302        my $inmsgstr = 0;
303        my $msgid = "";
304        my $msgstr = "";
305        while (<PO_FILE>) {
306            $nextfuzzy = 1 if /^#, fuzzy/;
307            if (/^msgid "((\\.|[^\\])*)"/ ) {
308                $translations{$lang, $msgid} = $msgstr if $inmsgstr && $msgid && $msgstr;
309                $msgid = "";
310                $msgstr = "";
311
312                if ($nextfuzzy) {
313                    $inmsgid = 0;
314                } else {
315                    $msgid = unescape_po_string($1);
316                    $inmsgid = 1;
317                }
318                $inmsgstr = 0;
319                $nextfuzzy = 0;
320            }
321            if (/^msgstr "((\\.|[^\\])*)"/) {
322                $msgstr = unescape_po_string($1);
323                $inmsgstr = 1;
324                $inmsgid = 0;
325            }
326            if (/^"((\\.|[^\\])*)"/) {
327                $msgid .= unescape_po_string($1) if $inmsgid;
328                $msgstr .= unescape_po_string($1) if $inmsgstr;
329            }
330        }
331        $translations{$lang, $msgid} = $msgstr if $inmsgstr && $msgid && $msgstr;
332    }
333}
334
335sub finalize
336{
337}
338
339sub unescape_one_sequence
340{
341    my ($sequence) = @_;
342
343    return "\\" if $sequence eq "\\\\";
344    return "\"" if $sequence eq "\\\"";
345
346    # gettext also handles \n, \t, \b, \r, \f, \v, \a, \xxx (octal),
347    # \xXX (hex) and has a comment saying they want to handle \u and \U.
348
349    return $sequence;
350}
351
352sub unescape_po_string
353{
354    my ($string) = @_;
355
356    $string =~ s/(\\.)/unescape_one_sequence($1)/eg;
357
358    return $string;
359}
360
361sub entity_decode
362{
363    local ($_) = @_;
364
365    s/&apos;/'/g; # '
366    s/&quot;/"/g; # "
367    s/&amp;/&/g;
368
369    return $_;
370}
371
372sub entity_encode
373{
374    my ($pre_encoded) = @_;
375
376    my @list_of_chars = unpack ('C*', $pre_encoded);
377
378    if ($PASS_THROUGH_ARG) {
379        return join ('', map (&entity_encode_int_even_high_bit, @list_of_chars));
380    } else {
381        return join ('', map (&entity_encode_int_minimalist, @list_of_chars));
382    }
383}
384
385sub entity_encode_int_minimalist
386{
387    return "&quot;" if $_ == 34;
388    return "&amp;" if $_ == 38;
389    return "&apos;" if $_ == 39;
390    return chr $_;
391}
392
393sub entity_encode_int_even_high_bit
394{
395    if ($_ > 127 || $_ == 34 || $_ == 38 || $_ == 39) {
396        # the ($_ > 127) should probably be removed
397        return "&#" . $_ . ";";
398    } else {
399        return chr $_;
400    }
401}
402
403sub entity_encoded_translation
404{
405    my ($lang, $string) = @_;
406
407    my $translation = $translations{$lang, $string};
408    return $string if !$translation;
409    return entity_encode ($translation);
410}
411
412## XML (bonobo-activation specific) merge code
413
414sub ba_merge_translations
415{
416    my $source;
417
418    {
419       local $/; # slurp mode
420       open INPUT, "<$FILE" or die "can't open $FILE: $!";
421       $source = <INPUT>;
422       close INPUT;
423    }
424
425    open OUTPUT, ">$OUTFILE" or die "can't open $OUTFILE: $!";
426
427    while ($source =~ s|^(.*?)([ \t]*<\s*$w+\s+($w+\s*=\s*"$q"\s*)+/?>)([ \t]*\n)?||s) {
428        print OUTPUT $1;
429
430        my $node = $2 . "\n";
431
432        my @strings = ();
433        $_ = $node;
434        while (s/(\s)_($w+\s*=\s*"($q)")/$1$2/s) {
435             push @strings, entity_decode($3);
436        }
437        print OUTPUT;
438
439        my %langs;
440        for my $string (@strings) {
441            for my $lang (keys %po_files_by_lang) {
442                $langs{$lang} = 1 if $translations{$lang, $string};
443            }
444        }
445       
446        for my $lang (sort keys %langs) {
447            $_ = $node;
448            s/(\sname\s*=\s*)"($q)"/$1"$2-$lang"/s;
449            s/(\s)_($w+\s*=\s*")($q)"/$1 . $2 . entity_encoded_translation($lang, $3) . '"'/seg;
450            print OUTPUT;
451        }
452    }
453
454    print OUTPUT $source;
455
456    close OUTPUT;
457}
458
459
460## XML (non-bonobo-activation) merge code
461
462sub xml_merge_translations
463{
464    my $source;
465
466    {
467       local $/; # slurp mode
468       open INPUT, "<$FILE" or die "can't open $FILE: $!";
469       $source = <INPUT>;
470       close INPUT;
471    }
472
473    open OUTPUT, ">$OUTFILE" or die;
474
475    # FIXME: support attribute translations
476
477    # Empty nodes never need translation, so unmark all of them.
478    # For example, <_foo/> is just replaced by <foo/>.
479    $source =~ s|<\s*_($w+)\s*/>|<$1/>|g;
480
481    # Support for <_foo>blah</_foo> style translations.
482    while ($source =~ s|^(.*?)([ \t]*)<\s*_($w+)\s*>(.*?)<\s*/_\3\s*>([ \t]*\n)?||s) {
483        print OUTPUT $1;
484
485        my $spaces = $2;
486        my $tag = $3;
487        my $string = $4;
488
489        print OUTPUT "$spaces<$tag>$string</$tag>\n";
490
491        $string =~ s/\s+/ /g;
492        $string =~ s/^ //;
493        $string =~ s/ $//;
494        $string = entity_decode($string);
495
496        for my $lang (sort keys %po_files_by_lang) {
497            my $translation = $translations{$lang, $string};
498            next if !$translation;
499            $translation = entity_encode($translation);
500            print OUTPUT "$spaces<$tag xml:lang=\"$lang\">$translation</$tag>\n";
501        }
502    }
503
504    print OUTPUT $source;
505
506    close OUTPUT;
507}
508
509sub keys_merge_translations
510{
511    open INPUT, "<${FILE}" or die;
512    open OUTPUT, ">${OUTFILE}" or die;
513
514    while (<INPUT>) {
515        if (s/^(\s*)_(\w+=(.*))/$1$2/)  {
516            my $string = $3;
517
518            print OUTPUT;
519
520            my $non_translated_line = $_;
521
522            for my $lang (sort keys %po_files_by_lang) {
523                my $translation = $translations{$lang, $string};
524                next if !$translation;
525
526                $_ = $non_translated_line;
527                s/(\w+)=.*/[$lang]$1=$translation/;
528                print OUTPUT;
529            }
530        } else {
531            print OUTPUT;
532        }
533    }
534
535    close OUTPUT;
536    close INPUT;
537}
538
539sub desktop_merge_translations
540{
541    open INPUT, "<${FILE}" or die;
542    open OUTPUT, ">${OUTFILE}" or die;
543
544    while (<INPUT>) {
545        if (s/^(\s*)_(\w+=(.*))/$1$2/)  {
546            my $string = $3;
547
548            print OUTPUT;
549
550            my $non_translated_line = $_;
551
552            for my $lang (sort keys %po_files_by_lang) {
553                my $translation = $translations{$lang, $string};
554                next if !$translation;
555
556                $_ = $non_translated_line;
557                s/(\w+)=.*/${1}[$lang]=$translation/;
558                print OUTPUT;
559            }
560        } else {
561            print OUTPUT;
562        }
563    }
564
565    close OUTPUT;
566    close INPUT;
567}
Note: See TracBrowser for help on using the repository browser.