source: trunk/third/bonobo/xml-i18n-merge.in @ 17169

Revision 17169, 12.2 KB checked in by ghudson, 23 years ago (diff)
This commit was generated by cvs2svn to compensate for changes in r17168, which included commits to RCS files with non-trunk default branches.
Line 
1#!@INTLTOOL_PERL@ -w
2
3#
4#  The Intltool Message Merger
5#
6#  Copyright (C) 2000 Free Software Foundation.
7#  Copyright (C) 2000, 2001 Eazel, Inc
8#
9#  Intltool is free software; you can redistribute it and/or
10#  modify it under the terms of the GNU General Public License as
11#  published by the Free Software Foundation; either version 2 of the
12#  License, or (at your option) any later version.
13#
14#  Intltool is distributed in the hope that it will be useful,
15#  but WITHOUT ANY WARRANTY; without even the implied warranty of
16#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17#  General Public License for more details.
18#
19#  You should have received a copy of the GNU General Public License
20#  along with this program; if not, write to the Free Software
21#  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22#
23#  As a special exception to the GNU General Public License, if you
24#  distribute this file as part of a program that contains a
25#  configuration script generated by Autoconf, you may include it under
26#  the same distribution terms that you use for the rest of that program.
27#
28#  Authors:  Maciej Stachowiak <mjs@noisehavoc.org>
29#            Kenneth Christiansen <kenneth@gnu.org>
30#            Darin Adler <darin@bentspoon.com>
31#
32#  Proper XML UTF-8ification written by Cyrille Chepelov <chepelov@calixo.net>
33#
34
35## Release information
36my $PROGRAM      = "intltool-merge";
37my $PACKAGE      = "intltool";
38my $VERSION      = "0.13";
39
40## Script options - Enable by setting value to 1
41my $ENABLE_XML   = "1";
42
43## Loaded modules
44use strict;
45use File::Basename;
46use Getopt::Long;
47
48## Scalars used by the option stuff
49my $HELP_ARG = "0";
50my $VERSION_ARG = "0";
51my $OAF_STYLE_ARG = "0";
52my $XML_STYLE_ARG = "0";
53my $KEYS_STYLE_ARG = "0";
54my $DESKTOP_STYLE_ARG = "0";
55my $QUIET_ARG = "0";
56my $PASS_THROUGH_ARG = "0";
57my $UTF8_ARG = "0";
58
59## Handle options
60GetOptions (
61            "help|h" => \$HELP_ARG,
62            "version|v" => \$VERSION_ARG,
63            "quiet|q" => \$QUIET_ARG,
64            "oaf-style|o" => \$OAF_STYLE_ARG,
65            "xml-style|x" => \$XML_STYLE_ARG,
66            "keys-style|k" => \$KEYS_STYLE_ARG,
67            "desktop-style|d" => \$DESKTOP_STYLE_ARG,
68            "pass-through|p" => \$PASS_THROUGH_ARG,
69            "utf8|u" => \$UTF8_ARG
70            ) or &error;
71
72
73my $PO_DIR;
74my $FILE;
75my $OUTFILE;
76
77my %po_files_by_lang = ();
78my %translations = ();
79
80# Use this instead of \w for XML files to handle more possible characters.
81my $w = "[-A-Za-z0-9._:]";
82
83# XML quoted string contents
84my $q = "[^\\\"]*";
85
86&split_on_argument;
87
88
89## Check for options.
90## This section will check for the different options.
91
92sub split_on_argument {
93
94    if ($VERSION_ARG) {
95        &version;
96    } elsif ($HELP_ARG) {
97        &help;
98    } elsif ($OAF_STYLE_ARG && @ARGV > 2) {
99        &place_normal;
100        &message;
101        &preparation;
102        &oaf_merge_translations;
103    } elsif ($XML_STYLE_ARG && @ARGV > 2) {
104        &utf8_sanity_check;
105        &place_normal;
106        &message;
107        &preparation;
108        &xml_merge_translations;
109    } elsif ($KEYS_STYLE_ARG && @ARGV > 2) {
110        &utf8_sanity_check;
111        &place_normal;
112        &message;
113        &preparation;
114        &keys_merge_translations;
115    } elsif ($DESKTOP_STYLE_ARG && @ARGV > 2) {
116        &place_normal;
117        &message;
118        &preparation;
119        &desktop_merge_translations;
120    } else {
121        &help;
122    }
123}
124
125sub utf8_sanity_check {
126    if (!$UTF8_ARG) {
127        if (!$PASS_THROUGH_ARG) {
128            $PASS_THROUGH_ARG="1";
129        }
130    }
131}
132
133sub place_normal {
134    $PO_DIR = $ARGV[0];
135    $FILE = $ARGV[1];
136    $OUTFILE = $ARGV[2];
137}
138
139
140## Sub for printing release information
141sub version{
142    print "${PROGRAM} (${PACKAGE}) ${VERSION}\n";
143    print "Written by Maciej Stachowiak and Kenneth Christiansen, 2000.\n\n";
144    print "Copyright (C) 2000 Free Software Foundation, Inc.\n";
145    print "Copyright (C) 2000, 2001 Eazel, Inc.\n";
146    print "This is free software; see the source for copying conditions.  There is NO\n";
147    print "warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n";
148    exit;
149}
150
151## Sub for printing usage information
152sub help{
153    print "Usage: ${PROGRAM} [OPTIONS] PO_DIRECTORY FILENAME OUTPUT_FILE\n";
154    print "Generates an xml file that includes translated versions of some attributes,\n";
155    print "from an untranslated source and a po directory that includes translations.\n";
156    print "  -v, --version                shows the version\n";
157    print "  -h, --help                   shows this help page\n";
158    print "  -q, --quiet                  quiet mode\n";
159    print "  -o, --oaf-style              includes translations in the oaf style\n";
160    print "  -x, --xml-style              includes translations in the xml style\n";
161    print "  -k, --keys-style             includes translations in the keys style\n";
162    print "  -d, --desktop-style          includes translations in the desktop style\n";
163    print "  -u, --utf8                   convert all strings to UTF-8 before merging\n";
164    print "  -p, --pass-through           use strings as found in .po files, without\n";
165    print "                               conversion (STRONGLY unrecommended with -x)\n";
166    print "\nReport bugs to bugzilla.gnome.org, module intltool or xml-i18n-tools-list\@gnome.org>\n";
167    exit;
168}
169
170
171## Sub for printing error messages
172sub error{
173    print "Try `${PROGRAM} --help' for more information.\n";
174    exit;
175}
176
177
178sub message {
179    print "Merging translations into $OUTFILE.\n" unless $QUIET_ARG;
180}
181
182
183sub preparation {
184   &gather_po_files;
185   &create_translation_database;
186}
187
188# General-purpose code for looking up translations in .po files
189
190sub gather_po_files
191{
192    my @po_files = glob("${PO_DIR}/*.po");
193    my @languages = map &po_file2lang, @po_files;
194    for my $lang (@languages) {
195        $po_files_by_lang{$lang} = shift (@po_files);
196    }
197}
198
199sub po_file2lang
200{
201    my $tmp = $_;
202    $tmp =~ s/^.*\/(.*)\.po$/$1/;
203    return $tmp;
204}
205
206sub get_po_encoding
207{
208    my ($in_po_file) = @_;
209    my $encoding = "";
210
211    open IN_PO_FILE, $in_po_file or die;
212    while (<IN_PO_FILE>) {
213        ## example: "Content-Type: text/plain; charset=ISO-8859-1\n"
214        if (/Content-Type\:.*charset=([-a-zA-Z0-9]+)\\n/) {
215            $encoding = $1;
216            last;
217        }
218    }
219    close IN_PO_FILE;
220
221    if (!$encoding) {
222        print "Warning: no encoding found in $in_po_file. Assuming ISO-8859-1\n";
223        $encoding = "ISO-8859-1";
224    }
225    return $encoding
226}
227
228sub create_translation_database
229{
230    for my $lang (keys %po_files_by_lang) {
231        my $po_file = $po_files_by_lang{$lang};
232
233        if ($UTF8_ARG) {
234            my $encoding = get_po_encoding($po_file);
235            open PO_FILE, "iconv -f $encoding -t UTF-8 $po_file|";     
236        } else {
237            open PO_FILE, "<$po_file"; 
238        }
239
240        my $inmsgid = 0;
241        my $inmsgstr = 0;
242        my $msgid = "";
243        my $msgstr = "";
244        while (<PO_FILE>) {
245            if (/^msgid "((\\.|[^\\])*)"/ ) {
246                $translations{$lang}{$msgid} = $msgstr if $inmsgstr && $msgstr && $msgstr ne $msgid;
247                $msgid = unescape_po_string($1);
248                $inmsgid = 1;
249                $inmsgstr = 0;
250            }
251            if (/^msgstr "((\\.|[^\\])*)"/) {
252                $msgstr = unescape_po_string($1);
253                $inmsgstr = 1;
254                $inmsgid = 0;
255            }
256            if (/^"((\\.|[^\\])*)"/) {
257                $msgid .= unescape_po_string($1) if $inmsgid;
258                $msgstr .= unescape_po_string($1) if $inmsgstr;
259            }
260        }
261        $translations{$lang}{$msgid} = $msgstr if $inmsgstr && $msgstr && $msgstr ne $msgid;
262    }
263}
264
265sub unescape_one_sequence
266{
267    my ($sequence) = @_;
268
269    return "\\" if $sequence eq "\\\\";
270    return "\"" if $sequence eq "\\\"";
271
272    # gettext also handles \n, \t, \b, \r, \f, \v, \a, \xxx (octal),
273    # \xXX (hex) and has a comment saying they want to handle \u and \U.
274
275    return $sequence;
276}
277
278sub unescape_po_string
279{
280    my ($string) = @_;
281
282    $string =~ s/(\\.)/unescape_one_sequence($1)/eg;
283
284    return $string;
285}
286
287sub entity_decode
288{
289    local ($_) = @_;
290
291    s/&apos;/'/g; # '
292    s/&gt;/>/g;
293    s/&lt;/</g;
294    s/&quot;/"/g; # "
295    s/&amp;/&/g;
296
297    return $_;
298}
299
300sub entity_encode
301{
302    my ($pre_encoded) = @_;
303
304    my @list_of_chars = unpack ('C*', $pre_encoded);
305
306    if ($PASS_THROUGH_ARG) {
307        return join ('', map (&entity_encode_int_even_high_bit, @list_of_chars));
308    } else {
309        return join ('', map (&entity_encode_int_minimalist, @list_of_chars));
310    }
311}
312
313sub entity_encode_int_minimalist
314{
315    return "&quot;" if $_ == 34;
316    return "&amp;" if $_ == 38;
317    return "&apos;" if $_ == 39;
318    return "&lt;" if $_ == 60;
319    return "&gt;" if $_ == 62;
320    return chr $_;
321}
322
323sub entity_encode_int_even_high_bit
324{
325    if ($_ > 127 || $_ == 34 || $_ == 38 || $_ == 39 || $_ == 60 || $_ == 62) {
326        # the ($_ > 127) should probably be removed
327        return "&#" . $_ . ";";
328    } else {
329        return chr $_;
330    }
331}
332
333sub entity_encoded_translation
334{
335    my ($lang, $string) = @_;
336
337    my $translation = $translations{$lang}{$string};
338    return $string if !$translation;
339    return entity_encode($translation);
340}
341
342## XML/OAF-specific merge code
343
344sub oaf_merge_translations
345{
346    my $source;
347
348    {
349       local $/; # slurp mode
350       open INPUT, "<$FILE" or die "can't open $FILE: $!";
351       $source = <INPUT>;
352       close INPUT;
353    }
354
355    open OUTPUT, ">$OUTFILE" or die "can't open $OUTFILE: $!";
356
357    while ($source =~ s|^(.*?)([ \t]*<\s*$w+\s+($w+\s*=\s*"$q"\s*)+/?>)([ \t]*\n)?||s) {
358        print OUTPUT $1;
359
360        my $node = $2 . "\n";
361
362        my @strings = ();
363        $_ = $node;
364        while (s/(\s)_($w+\s*=\s*"($q)")/$1$2/s) {
365             push @strings, entity_decode($3);
366        }
367        print OUTPUT;
368
369        my %langs;
370        for my $string (@strings) {
371            for my $lang (keys %translations) {
372                $langs{$lang} = 1 if $translations{$lang}{$string};
373            }
374        }
375       
376        for my $lang (sort keys %langs) {
377            $_ = $node;
378            s/(\sname\s*=\s*)"($q)"/$1"$2-$lang"/s;
379            s/(\s)_($w+\s*=\s*")($q)"/$1 . $2 . entity_encoded_translation($lang, $3) . '"'/seg;
380            print OUTPUT;
381        }
382    }
383
384    print OUTPUT $source;
385
386    close OUTPUT;
387}
388
389
390## XML (non-OAF) merge code
391
392sub xml_merge_translations
393{
394    my $source;
395
396    {
397       local $/; # slurp mode
398       open INPUT, "<$FILE" or die "can't open $FILE: $!";
399       $source = <INPUT>;
400       close INPUT;
401    }
402
403    open OUTPUT, ">$OUTFILE" or die;
404
405    # FIXME: support attribute translations
406
407    # Empty nodes never need translation, so unmark all of them.
408    # For example, <_foo/> is just replaced by <foo/>.
409    $source =~ s|<\s*_($w+)\s*/>|<$1/>|g;
410
411    # Support for <_foo>blah</_foo> style translations.
412    while ($source =~ s|^(.*?)([ \t]*)<\s*_($w+)\s*>(.*?)<\s*/_\3\s*>([ \t]*\n)?||s) {
413        print OUTPUT $1;
414
415        my $spaces = $2;
416        my $tag = $3;
417        my $string = $4;
418
419        print OUTPUT "$spaces<$tag>$string</$tag>\n";
420
421        $string =~ s/\s+/ /g;
422        $string =~ s/^ //;
423        $string =~ s/ $//;
424        $string = entity_decode($string);
425
426        for my $lang (sort keys %translations) {
427            my $translation = $translations{$lang}{$string};
428            next if !$translation;
429            $translation = entity_encode($translation);
430            print OUTPUT "$spaces<$tag xml:lang=\"$lang\">$translation</$tag>\n";
431        }
432    }
433
434    print OUTPUT $source;
435
436    close OUTPUT;
437}
438
439sub keys_merge_translations
440{
441    open INPUT, "<${FILE}" or die;
442    open OUTPUT, ">${OUTFILE}" or die;
443
444    while (<INPUT>) {
445        if (s/^(\s*)_(\w+=(.*))/$1$2/)  {
446            my $string = $3;
447
448            print OUTPUT;
449
450            my $non_translated_line = $_;
451
452            for my $lang (sort keys %translations) {
453                my $translation = $translations{$lang}{$string};
454                next if !$translation;
455
456                $_ = $non_translated_line;
457                s/(\w+)=.*/[$lang]$1=$translation/;
458                print OUTPUT;
459            }
460        } else {
461            print OUTPUT;
462        }
463    }
464
465    close OUTPUT;
466    close INPUT;
467}
468
469sub desktop_merge_translations
470{
471    open INPUT, "<${FILE}" or die;
472    open OUTPUT, ">${OUTFILE}" or die;
473
474    while (<INPUT>) {
475        if (s/^(\s*)_(\w+=(.*))/$1$2/)  {
476            my $string = $3;
477
478            print OUTPUT;
479
480            my $non_translated_line = $_;
481
482            for my $lang (sort keys %translations) {
483                my $translation = $translations{$lang}{$string};
484                next if !$translation;
485
486                $_ = $non_translated_line;
487                s/(\w+)=.*/${1}[$lang]=$translation/;
488                print OUTPUT;
489            }
490        } else {
491            print OUTPUT;
492        }
493    }
494
495    close OUTPUT;
496    close INPUT;
497}
Note: See TracBrowser for help on using the repository browser.