source: trunk/third/oaf/xml-i18n-merge.in @ 16756

Revision 16756, 13.8 KB checked in by ghudson, 23 years ago (diff)
This commit was generated by cvs2svn to compensate for changes in r16755, which included commits to RCS files with non-trunk default branches.
Line 
1#!@XML_I18N_TOOLS_PERL@ -w
2
3#
4#  The XML Translation Merge Tool
5#
6#  Copyright (C) 2000 Free Software Foundation.
7#  Copyright (C) 2000, 2001 Eazel, Inc
8#
9#  This library is free software; you can redistribute it and/or
10#  modify it under the terms of the GNU General Public License as
11#  published by the Free Software Foundation; either version 2 of the
12#  License, or (at your option) any later version.
13#
14#  This script is distributed in the hope that it will be useful,
15#  but WITHOUT ANY WARRANTY; without even the implied warranty of
16#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17#  General Public License for more details.
18#
19#  You should have received a copy of the GNU General Public License
20#  along with this library; if not, write to the Free Software
21#  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22#
23#  Authors:  Maciej Stachowiak <mjs@eazel.com>
24#            Kenneth Christiansen <kenneth@gnu.org>
25#            Darin Adler <darin@eazel.com>
26#
27#  Proper XML UTF-8ification written by Cyrille Chepelov <chepelov@calixo.net>
28#
29
30## Release information
31my $PROGRAM      = "xml-i18n-merge";
32my $PACKAGE      = "xml-i18n-tools";
33my $VERSION      = "0.9";
34
35## Script options - Enable by setting value to 1
36my $ENABLE_XML   = "1";
37
38## Loaded modules
39use strict;
40use File::Basename;
41use Getopt::Long;
42
43## Scalars used by the option stuff
44my $HELP_ARG    = "0";
45my $VERSION_ARG = "0";
46my $OAF_STYLE_ARG = "0";
47my $XML_STYLE_ARG = "0";
48my $KEYS_STYLE_ARG = "0";
49my $DESKTOP_STYLE_ARG = "0";
50my $QUIET_ARG = "0";
51my $PASS_THROUGH_ARG = "0";
52my $UTF8_ARG = "0";
53
54## Handle options
55GetOptions (
56            "help|h|?" => \$HELP_ARG,
57            "version|v" => \$VERSION_ARG,
58            "quiet|q" => \$QUIET_ARG,
59            "oaf-style|o" => \$OAF_STYLE_ARG,
60            "xml-style|x" => \$XML_STYLE_ARG,
61            "keys-style|k" => \$KEYS_STYLE_ARG,
62            "desktop-style|d" => \$DESKTOP_STYLE_ARG,
63            "pass-through|p" => \$PASS_THROUGH_ARG,
64            "utf8|u" => \$UTF8_ARG
65            ) or &error;
66
67
68my $PO_DIR;
69my $FILE;
70my $OUTFILE;
71
72my @languages;
73my %po_files_by_lang = ();
74my %translations = ();
75
76# Use this instead of \w for XML files to handle more possible characters.
77my $w = "[-A-Za-z0-9._:]";
78
79
80&split_on_argument;
81
82
83## Check for options.
84## This section will check for the different options.
85
86sub split_on_argument {
87
88    if ($VERSION_ARG) {
89        &version;
90    } elsif ($HELP_ARG) {
91        &help;
92    } elsif ($OAF_STYLE_ARG && @ARGV > 2) {
93        &place_normal;
94        &message;
95        &preparation;
96        &oaf_merge_translations;
97    } elsif ($XML_STYLE_ARG && @ARGV > 2) {
98        &utf8_sanity_check;
99        &place_normal;
100        &message;
101        &preparation;
102        &xml_merge_translations;
103    } elsif ($KEYS_STYLE_ARG && @ARGV > 2) {
104        &utf8_sanity_check;
105        &place_normal;
106        &message;
107        &preparation;
108        &keys_merge_translations;
109    } elsif ($DESKTOP_STYLE_ARG && @ARGV > 2) {
110        &place_normal;
111        &message;
112        &preparation;
113        &desktop_merge_translations;
114    } else {
115        &help;
116    } 
117}   
118
119sub utf8_sanity_check {
120    if (!$UTF8_ARG) {
121        if (!$PASS_THROUGH_ARG) {
122            print "****WARNING****: no default behavior to xml-i18n-merge (-p or -u) specified.\n";
123            print "Previous default was equivalent to -p, but generating broken \"XML\" files\n";
124            print "Please read the documentation and choose either -p or -u behavior\n";
125            print "For the moment, will assume -p, but this default may change in the future\n";
126            print "\n\n\n"; 
127         
128            print "xml-i18n-merge will now sleep 5 seconds.\n";
129            sleep 5;
130            $PASS_THROUGH_ARG="1";
131        } else {
132            #print "xml-i18n-merge warning: \"XML\" with 8-bit pass-through is not likely to be \nreal XML.\n";           
133        }
134    }
135}
136
137sub place_normal {
138    $PO_DIR = $ARGV[0];
139    $FILE = $ARGV[1];
140    $OUTFILE = $ARGV[2];
141}   
142
143
144## Sub for printing release information
145sub version{
146    print "${PROGRAM} (${PACKAGE}) ${VERSION}\n";
147    print "Written by Maciej Stachowiak and Kenneth Christiansen, 2000.\n\n";
148    print "Copyright (C) 2000 Free Software Foundation, Inc.\n";
149    print "Copyright (C) 2000, 2001 Eazel, Inc.\n";
150    print "This is free software; see the source for copying conditions.  There is NO\n";
151    print "warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n";
152    exit;
153}
154
155## Sub for printing usage information
156sub help{
157    print "Usage: ${PROGRAM} [OPTIONS] PO_DIRECTORY FILENAME OUTPUT_FILE\n";
158    print "Generates an xml file that includes translated versions of some attributes,\n";
159    print "from an untranslated source and a po directory that includes translations.\n";
160    print "  -v, --version                shows the version\n";
161    print "  -h, --help                   shows this help page\n";
162    print "  -q, --quiet                  quiet mode\n";
163    print "  -o, --oaf-style              includes translations in the oaf style\n";
164    print "  -x, --xml-style              includes translations in the xml style\n";
165    print "  -k, --keys-style             includes translations in the keys style\n";
166    print "  -d, --desktop-style          includes translations in the desktop style\n";
167    print "  -u, --utf8                   convert all strings to UTF-8 before merging\n";
168    print "  -p, --pass-through           use strings as found in .po files, without\n";
169    print "                               conversion (STRONGLY unrecommended with -x)\n";
170    print "\nReport bugs to bugzilla.eazel.com, module xml-i18n-tools or <mjs\@eazel.com>\n";
171    exit;
172}
173
174
175## Sub for printing error messages
176sub error{
177#   print "xml-i18n-merge: invalid option @ARGV\n";
178    print "Try `${PROGRAM} --help' for more information.\n";
179    exit;
180}
181
182
183sub message {
184    print "Merging translations into $OUTFILE.\n" unless $QUIET_ARG;
185}
186
187
188sub preparation {
189   &gather_po_files;
190   &create_translation_database;   
191}
192
193
194
195# General-purpose code for looking up translations in .po files
196
197sub gather_po_files
198{
199    my @po_files = glob("${PO_DIR}/*.po");
200
201    @languages = map (&po_file2lang, @po_files);
202
203    foreach my $lang (@languages) {
204        $po_files_by_lang{$lang} = shift (@po_files);
205    }
206}
207
208sub po_file2lang
209{
210    my $tmp = $_;
211    $tmp =~ s/^.*\/(.*)\.po$/$1/;
212    return $tmp;
213}
214
215sub get_po_encoding
216{
217    my ($in_po_file) = @_;
218    my $encoding = "";
219
220    open IN_PO_FILE, $in_po_file;
221   
222    while (<IN_PO_FILE>) {
223        ## example: "Content-Type: text/plain; charset=ISO-8859-1\n"
224        if (/Content-Type\:.*charset=([-a-zA-Z0-9]+)\\n/) {
225            $encoding = $1;
226            last;
227        }
228    }
229    close IN_PO_FILE;
230   
231    if (!$encoding) {
232        print ("Warning: no encoding found in $in_po_file. Assuming ISO-8859-1\n");
233        $encoding = "ISO-8859-1";
234    }
235    return $encoding
236}
237
238sub create_translation_database
239{
240    foreach my $lang (@languages) {
241
242        my $po_file = $po_files_by_lang{$lang};
243
244        if ($UTF8_ARG) {
245            my $encoding = get_po_encoding($po_file);
246            open PO_FILE, "iconv -f $encoding -t UTF-8 $po_file|";     
247        } else {
248            open PO_FILE, "<$po_file"; 
249        }
250
251        while (<PO_FILE>) {
252            if (/^#,.*fuzzy/) {
253                $_ = <PO_FILE>; next;
254            }
255            if (/^msgid "(.*)"/ ) {
256                my $msgid = unescape_po_string($1);
257                $_ = <PO_FILE>;
258               
259                if (/^msgstr "(.+)"/) {
260                    my $msgstr = unescape_po_string($1);
261                    $translations{$lang . "|" . $msgid} = $msgstr;
262                    # print "[$lang]$msgstr\n";
263                }
264            }           
265        }
266    }
267}
268
269sub unescape_po_string
270{
271    my ($value) = @_;
272
273    # this is likely to be quite incomplete.
274    # I suck at encoding thought as perl noi^W. -- CC
275    my $retval = $value;
276    $retval =~ s/\\([n\"\\])/$1/g;
277
278    return $retval;
279}
280
281sub lookup_translations
282{
283    my ($value) = @_;
284 
285    my %transmap = ();
286
287    foreach my $lang (@languages) {
288        my $translation = lookup_translation ($value, $lang);
289           
290        if ($translation) {
291            $transmap{$lang} = $translation;
292        }
293    }
294
295    return %transmap;
296}
297
298
299sub lookup_translation
300{
301    my ($string, $lang) = @_;
302    $string =~ s/\+/\\+/g;
303 
304    my $salt = "$lang|$string";
305     
306    if ($translations{$salt}) {
307        return $translations{$salt};
308    }
309 
310    return "";
311}
312
313
314sub entity_encode_translations
315{
316    my %transmap = @_;
317
318    foreach my $key (keys %transmap) {
319        $transmap{$key} = entity_encode ($transmap{$key});
320    }
321
322    return %transmap;
323}
324
325
326sub entity_encode
327{
328    my ($pre_encoded) = @_;
329
330    my @list_of_chars = unpack ('C*', $pre_encoded);
331
332    if ($PASS_THROUGH_ARG) {
333        return join ('', map (&entity_encode_int_even_high_bit, @list_of_chars));
334    } else {
335        return join ('', map (&entity_encode_int_minimalist, @list_of_chars));
336    }
337}
338
339sub entity_encode_int_minimalist
340{
341    if ($_ == 34) { return "&quot;" }
342    elsif ($_ == 38) { return "&amp;" }
343    elsif ($_ == 39) { return "&apos;" }
344    elsif ($_ == 60) { return "&lt;" }
345    elsif ($_ == 62) { return "&gt;" }
346    return chr $_;
347}
348
349sub entity_encode_int_even_high_bit
350{
351    if ($_ > 127 || $_ == 34 || $_ == 38 || $_ == 39 || $_ == 60 || $_ == 62) {
352        # the ($_ > 127) should probably be removed 
353        return "&#" . $_ . ";";
354    } else {
355        return chr $_;
356    }
357}
358
359
360## XML/OAF-specific merge code
361 
362sub oaf_merge_translations
363{
364    my $xml_source; {
365       local (*INPUT);
366       local $/; # slurp mode
367       open INPUT, "<$FILE" or die "can't open $FILE: $!";
368       $xml_source = <INPUT>;
369       close INPUT;
370    }
371
372    open OUTPUT, ">$OUTFILE";
373
374    while ($xml_source =~ /[ \t]*<[^<]*\s_$w+="[^"]*"[^<]*>/m) { #"
375        print OUTPUT $`; #`
376        my $orig_node = $&;
377        $xml_source = $'; #'
378
379        my $non_translated_line = $orig_node;
380        $non_translated_line =~ s/_($w+)="/$1="/;
381           
382        my $new_node = $non_translated_line;
383           
384        my $value_str = $orig_node;
385        $value_str =~ s/.*_$w+="([^"]*)".*/$1/s; #"
386
387        if ($value_str) {
388            my %value_translation_map = entity_encode_translations
389                (lookup_translations ($value_str));
390
391            foreach my $key (sort keys %value_translation_map) {
392                my $translation = $value_translation_map{$key};
393                   
394                my $translated_line = $orig_node;
395                $translated_line =~ s/name="([^"]*)"/name="$1-$key"/;
396                $translated_line =~ s/(\s*)_($w+)="[^"]*"/$1$2="$translation"/;
397
398                $new_node .= "\n$translated_line";
399            }
400        }
401
402        $xml_source = $new_node . $xml_source;
403    }
404
405    print OUTPUT $xml_source;
406
407    close OUTPUT;
408}
409
410
411## XML (non-OAF) merge code
412 
413sub xml_merge_translations
414{
415    my $xml_source; {
416       local (*INPUT);
417       local $/; # slurp mode
418       open INPUT, "<$FILE" or die "can't open $FILE: $!";
419       $xml_source = <INPUT>;
420       close INPUT;
421    }
422
423    open OUTPUT, ">$OUTFILE";
424
425    # FIXME: support attribute translations
426
427    # First just unmark for translation all empty nodes
428    # for example <_foo/> is just replaced by <foo/>
429    $xml_source =~ s/<_($w+)\/>/<$1\/>/mg;
430
431    # Support for XML <_foo>blah</_foo> style translations
432    while ($xml_source =~ /([ \t]*)<_($w+)>([^<]+)<\/_\2>/m) {
433        print OUTPUT $`;
434        $xml_source = $'; #'
435
436        my $spaces = $1;
437        my $tag_name = $2;
438        my $value_str = $3;
439
440        my $non_translated_line = "$spaces<$tag_name>$value_str</$tag_name>";
441           
442        my $new_node = $non_translated_line;
443
444        if ($value_str) {
445            my %value_translation_map = entity_encode_translations
446                (lookup_translations ($value_str));
447
448            foreach my $key (sort keys %value_translation_map) {
449                my $translation = $value_translation_map{$key};
450
451                $new_node .= "\n$spaces<$tag_name xml:lang=\"$key\">$translation</$tag_name>";
452            }
453        }
454
455        $xml_source = $new_node . $xml_source;
456    }
457
458    print OUTPUT $xml_source;
459
460    close OUTPUT;
461}
462
463sub keys_merge_translations
464{       
465    open INPUT, "<${FILE}";
466
467    open OUTPUT, ">${OUTFILE}";
468
469    while (<INPUT>) {
470        chomp;
471        if (/^\s*_\w+=.*/)  {
472            my $orig_line = $_;
473   
474            my $non_translated_line = $orig_line;
475            $non_translated_line =~ s/_([^="]*)=/$1=/;
476           
477            print OUTPUT "${non_translated_line}\n";
478           
479            my $value_str = $orig_line;
480            $value_str =~ s/.*_\w+=(.*)/$1/;
481           
482            if ($value_str) {
483                my %value_translation_map = lookup_translations ($value_str);
484           
485                foreach my $key (sort keys %value_translation_map) {
486                    my $translation = $value_translation_map{$key};
487
488                    my $translated_line = $orig_line; 
489                    $translated_line =~ s/_([^="]*)=([^\n]*)/\[$key]$1=$translation/;
490                    print OUTPUT "$translated_line\n";
491                }
492            }
493        } else {
494            print OUTPUT "$_\n";
495        }
496    }
497                 
498    close OUTPUT;
499    close INPUT;
500}
501
502sub desktop_merge_translations
503{
504    open INPUT, "<${FILE}";
505
506    open OUTPUT, ">${OUTFILE}";
507
508    while (<INPUT>) {
509        chomp;
510        if (/^\s*_\w+=.*/)  {
511            my $orig_line = $_;
512
513            my $non_translated_line = $orig_line;
514            $non_translated_line =~ s/_([^="]*)=/$1=/;
515
516            print OUTPUT "${non_translated_line}\n";
517
518            my $value_str = $orig_line;
519            $value_str =~ s/.*_\w+=(.*)/$1/;
520
521            if ($value_str) {
522                my %value_translation_map = lookup_translations ($value_str);
523
524                foreach my $key (sort keys %value_translation_map) {
525                    my $translation = $value_translation_map{$key};
526
527                    my $translated_line = $orig_line;
528                    $translated_line =~ s/^_([^="]*)=([^\n]*)/$1\[$key]=$translation/;
529                    print OUTPUT "$translated_line\n";
530                }
531            }
532        } else {
533            print OUTPUT "$_\n";
534        }
535    }
536
537    close OUTPUT;
538    close INPUT;
539
540}
Note: See TracBrowser for help on using the repository browser.