source: trunk/third/intltool/intltool-merge.in @ 21469

Revision 21469, 33.3 KB checked in by ghudson, 20 years ago (diff)
This commit was generated by cvs2svn to compensate for changes in r21468, which included commits to RCS files with non-trunk default branches.
Line 
1#!@INTLTOOL_PERL@ -w
2# -*- Mode: perl; indent-tabs-mode: nil; c-basic-offset: 4  -*-
3
4#
5#  The Intltool Message Merger
6#
7#  Copyright (C) 2000, 2003 Free Software Foundation.
8#  Copyright (C) 2000, 2001 Eazel, Inc
9#
10#  Intltool is free software; you can redistribute it and/or
11#  modify it under the terms of the GNU General Public License
12#  version 2 published by the Free Software Foundation.
13#
14#  Intltool is distributed in the hope that it will be useful,
15#  but WITHOUT ANY WARRANTY; without even the implied warranty of
16#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17#  General Public License for more details.
18#
19#  You should have received a copy of the GNU General Public License
20#  along with this program; if not, write to the Free Software
21#  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22#
23#  As a special exception to the GNU General Public License, if you
24#  distribute this file as part of a program that contains a
25#  configuration script generated by Autoconf, you may include it under
26#  the same distribution terms that you use for the rest of that program.
27#
28#  Authors:  Maciej Stachowiak <mjs@noisehavoc.org>
29#            Kenneth Christiansen <kenneth@gnu.org>
30#            Darin Adler <darin@bentspoon.com>
31#
32#  Proper XML UTF-8'ification written by Cyrille Chepelov <chepelov@calixo.net>
33#
34
35## Release information
36my $PROGRAM = "intltool-merge";
37my $PACKAGE = "intltool";
38my $VERSION = "0.33";
39
40## Loaded modules
41use strict;
42use Getopt::Long;
43use Text::Wrap;
44use File::Basename;
45
46my $must_end_tag      = -1;
47my $last_depth        = -1;
48my $translation_depth = -1;
49my @tag_stack = ();
50my @entered_tag = ();
51my @translation_strings = ();
52my $leading_space = "";
53
54## Scalars used by the option stuff
55my $HELP_ARG = 0;
56my $VERSION_ARG = 0;
57my $BA_STYLE_ARG = 0;
58my $XML_STYLE_ARG = 0;
59my $KEYS_STYLE_ARG = 0;
60my $DESKTOP_STYLE_ARG = 0;
61my $SCHEMAS_STYLE_ARG = 0;
62my $RFC822DEB_STYLE_ARG = 0;
63my $QUIET_ARG = 0;
64my $PASS_THROUGH_ARG = 0;
65my $UTF8_ARG = 0;
66my $MULTIPLE_OUTPUT = 0;
67my $cache_file;
68
69## Handle options
70GetOptions
71(
72 "help" => \$HELP_ARG,
73 "version" => \$VERSION_ARG,
74 "quiet|q" => \$QUIET_ARG,
75 "oaf-style|o" => \$BA_STYLE_ARG, ## for compatibility
76 "ba-style|b" => \$BA_STYLE_ARG,
77 "xml-style|x" => \$XML_STYLE_ARG,
78 "keys-style|k" => \$KEYS_STYLE_ARG,
79 "desktop-style|d" => \$DESKTOP_STYLE_ARG,
80 "schemas-style|s" => \$SCHEMAS_STYLE_ARG,
81 "rfc822deb-style|r" => \$RFC822DEB_STYLE_ARG,
82 "pass-through|p" => \$PASS_THROUGH_ARG,
83 "utf8|u" => \$UTF8_ARG,
84 "multiple-output|m" => \$MULTIPLE_OUTPUT,
85 "cache|c=s" => \$cache_file
86 ) or &error;
87
88my $PO_DIR;
89my $FILE;
90my $OUTFILE;
91
92my %po_files_by_lang = ();
93my %translations = ();
94my $iconv = $ENV{"ICONV"} || $ENV{"INTLTOOL_ICONV"} || "/usr/bin/iconv";
95
96# Use this instead of \w for XML files to handle more possible characters.
97my $w = "[-A-Za-z0-9._:]";
98
99# XML quoted string contents
100my $q = "[^\\\"]*";
101
102## Check for options.
103
104if ($VERSION_ARG)
105{
106        &print_version;
107}
108elsif ($HELP_ARG)
109{
110        &print_help;
111}
112elsif ($BA_STYLE_ARG && @ARGV > 2)
113{
114        &utf8_sanity_check;
115        &preparation;
116        &print_message;
117        &ba_merge_translations;
118        &finalize;
119}
120elsif ($XML_STYLE_ARG && @ARGV > 2)
121{
122        &utf8_sanity_check;
123        &preparation;
124        &print_message;
125        &xml_merge_output;
126        &finalize;
127}
128elsif ($KEYS_STYLE_ARG && @ARGV > 2)
129{
130        &utf8_sanity_check;
131        &preparation;
132        &print_message;
133        &keys_merge_translations;
134        &finalize;
135}
136elsif ($DESKTOP_STYLE_ARG && @ARGV > 2)
137{
138        &utf8_sanity_check;
139        &preparation;
140        &print_message;
141        &desktop_merge_translations;
142        &finalize;
143}
144elsif ($SCHEMAS_STYLE_ARG && @ARGV > 2)
145{
146        &utf8_sanity_check;
147        &preparation;
148        &print_message;
149        &schemas_merge_translations;
150        &finalize;
151}
152elsif ($RFC822DEB_STYLE_ARG && @ARGV > 2)
153{
154        &preparation;
155        &print_message;
156        &rfc822deb_merge_translations;
157        &finalize;
158}
159else
160{
161        &print_help;
162}
163
164exit;
165
166## Sub for printing release information
167sub print_version
168{
169    print <<_EOF_;
170${PROGRAM} (${PACKAGE}) ${VERSION}
171Written by Maciej Stachowiak, Darin Adler and Kenneth Christiansen.
172
173Copyright (C) 2000-2003 Free Software Foundation, Inc.
174Copyright (C) 2000-2001 Eazel, Inc.
175This is free software; see the source for copying conditions.  There is NO
176warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
177_EOF_
178    exit;
179}
180
181## Sub for printing usage information
182sub print_help
183{
184    print <<_EOF_;
185Usage: ${PROGRAM} [OPTION]... PO_DIRECTORY FILENAME OUTPUT_FILE
186Generates an output file that includes some localized attributes from an
187untranslated source file.
188
189Mandatory options: (exactly one must be specified)
190  -b, --ba-style         includes translations in the bonobo-activation style
191  -d, --desktop-style    includes translations in the desktop style
192  -k, --keys-style       includes translations in the keys style
193  -s, --schemas-style    includes translations in the schemas style
194  -r, --rfc822deb-style  includes translations in the RFC822 style
195  -x, --xml-style        includes translations in the standard xml style
196
197Other options:
198  -u, --utf8             convert all strings to UTF-8 before merging
199                         (default for everything except RFC822 style)
200  -p, --pass-through     deprecated, does nothing and issues a warning
201  -m, --multiple-output  output one localized file per locale, instead of
202                         a single file containing all localized elements
203  -c, --cache=FILE       specify cache file name
204                         (usually \$top_builddir/po/.intltool-merge-cache)
205  -q, --quiet            suppress most messages
206      --help             display this help and exit
207      --version          output version information and exit
208
209Report bugs to http://bugzilla.gnome.org/ (product name "$PACKAGE")
210or send email to <xml-i18n-tools\@gnome.org>.
211_EOF_
212    exit;
213}
214
215
216## Sub for printing error messages
217sub print_error
218{
219    print STDERR "Try `${PROGRAM} --help' for more information.\n";
220    exit;
221}
222
223
224sub print_message
225{
226    print "Merging translations into $OUTFILE.\n" unless $QUIET_ARG;
227}
228
229
230sub preparation
231{
232    $PO_DIR = $ARGV[0];
233    $FILE = $ARGV[1];
234    $OUTFILE = $ARGV[2];
235
236    &gather_po_files;
237    &get_translation_database;
238}
239
240# General-purpose code for looking up translations in .po files
241
242sub po_file2lang
243{
244    my ($tmp) = @_;
245    $tmp =~ s/^.*\/(.*)\.po$/$1/;
246    return $tmp;
247}
248
249sub gather_po_files
250{
251    for my $po_file (glob "$PO_DIR/*.po") {
252        $po_files_by_lang{po_file2lang($po_file)} = $po_file;
253    }
254}
255
256sub get_local_charset
257{
258    my ($encoding) = @_;
259    my $alias_file = $ENV{"G_CHARSET_ALIAS"} || "/usr/lib/charset.alias";
260
261    # seek character encoding aliases in charset.alias (glib)
262
263    if (open CHARSET_ALIAS, $alias_file)
264    {
265        while (<CHARSET_ALIAS>)
266        {
267            next if /^\#/;
268            return $1 if (/^\s*([-._a-zA-Z0-9]+)\s+$encoding\b/i)
269        }
270
271        close CHARSET_ALIAS;
272    }
273
274    # if not found, return input string
275
276    return $encoding;
277}
278
279sub get_po_encoding
280{
281    my ($in_po_file) = @_;
282    my $encoding = "";
283
284    open IN_PO_FILE, $in_po_file or die;
285    while (<IN_PO_FILE>)
286    {
287        ## example: "Content-Type: text/plain; charset=ISO-8859-1\n"
288        if (/Content-Type\:.*charset=([-a-zA-Z0-9]+)\\n/)
289        {
290            $encoding = $1;
291            last;
292        }
293    }
294    close IN_PO_FILE;
295
296    if (!$encoding)
297    {
298        print STDERR "Warning: no encoding found in $in_po_file. Assuming ISO-8859-1\n" unless $QUIET_ARG;
299        $encoding = "ISO-8859-1";
300    }
301
302    system ("$iconv -f $encoding -t UTF-8 </dev/null 2>/dev/null");
303    if ($?) {
304        $encoding = get_local_charset($encoding);
305    }
306
307    return $encoding
308}
309
310sub utf8_sanity_check
311{
312    print STDERR "Warning: option --pass-through has been removed.\n" if $PASS_THROUGH_ARG;
313    $UTF8_ARG = 1;
314}
315
316sub get_translation_database
317{
318    if ($cache_file) {
319        &get_cached_translation_database;
320    } else {
321        &create_translation_database;
322    }
323}
324
325sub get_newest_po_age
326{
327    my $newest_age;
328
329    foreach my $file (values %po_files_by_lang)
330    {
331        my $file_age = -M $file;
332        $newest_age = $file_age if !$newest_age || $file_age < $newest_age;
333    }
334
335    $newest_age = 0 if !$newest_age;
336
337    return $newest_age;
338}
339
340sub create_cache
341{
342    print "Generating and caching the translation database\n" unless $QUIET_ARG;
343
344    &create_translation_database;
345
346    open CACHE, ">$cache_file" || die;
347    print CACHE join "\x01", %translations;
348    close CACHE;
349}
350
351sub load_cache
352{
353    print "Found cached translation database\n" unless $QUIET_ARG;
354
355    my $contents;
356    open CACHE, "<$cache_file" || die;
357    {
358        local $/;
359        $contents = <CACHE>;
360    }
361    close CACHE;
362    %translations = split "\x01", $contents;
363}
364
365sub get_cached_translation_database
366{
367    my $cache_file_age = -M $cache_file;
368    if (defined $cache_file_age)
369    {
370        if ($cache_file_age <= &get_newest_po_age)
371        {
372            &load_cache;
373            return;
374        }
375        print "Found too-old cached translation database\n" unless $QUIET_ARG;
376    }
377
378    &create_cache;
379}
380
381sub create_translation_database
382{
383    for my $lang (keys %po_files_by_lang)
384    {
385        my $po_file = $po_files_by_lang{$lang};
386
387        if ($UTF8_ARG)
388        {
389            my $encoding = get_po_encoding ($po_file);
390
391            if (lc $encoding eq "utf-8")
392            {
393                open PO_FILE, "<$po_file";     
394            }
395            else
396            {
397                print STDERR "WARNING: $po_file is not in UTF-8 but $encoding, converting...\n" unless $QUIET_ARG;;
398
399                open PO_FILE, "$iconv -f $encoding -t UTF-8 $po_file|";
400            }
401        }
402        else
403        {
404            open PO_FILE, "<$po_file"; 
405        }
406
407        my $nextfuzzy = 0;
408        my $inmsgid = 0;
409        my $inmsgstr = 0;
410        my $msgid = "";
411        my $msgstr = "";
412
413        while (<PO_FILE>)
414        {
415            $nextfuzzy = 1 if /^#, fuzzy/;
416       
417            if (/^msgid "((\\.|[^\\])*)"/ )
418            {
419                $translations{$lang, $msgid} = $msgstr if $inmsgstr && $msgid && $msgstr;
420                $msgid = "";
421                $msgstr = "";
422
423                if ($nextfuzzy) {
424                    $inmsgid = 0;
425                } else {
426                    $msgid = unescape_po_string($1);
427                    $inmsgid = 1;
428                }
429                $inmsgstr = 0;
430                $nextfuzzy = 0;
431            }
432
433            if (/^msgstr "((\\.|[^\\])*)"/)
434            {
435                $msgstr = unescape_po_string($1);
436                $inmsgstr = 1;
437                $inmsgid = 0;
438            }
439
440            if (/^"((\\.|[^\\])*)"/)
441            {
442                $msgid .= unescape_po_string($1) if $inmsgid;
443                $msgstr .= unescape_po_string($1) if $inmsgstr;
444            }
445        }
446        $translations{$lang, $msgid} = $msgstr if $inmsgstr && $msgid && $msgstr;
447    }
448}
449
450sub finalize
451{
452}
453
454sub unescape_one_sequence
455{
456    my ($sequence) = @_;
457
458    return "\\" if $sequence eq "\\\\";
459    return "\"" if $sequence eq "\\\"";
460    return "\n" if $sequence eq "\\n";
461    return "\r" if $sequence eq "\\r";
462    return "\t" if $sequence eq "\\t";
463    return "\b" if $sequence eq "\\b";
464    return "\f" if $sequence eq "\\f";
465    return "\a" if $sequence eq "\\a";
466    return chr(11) if $sequence eq "\\v"; # vertical tab, see ascii(7)
467
468    return chr(hex($1)) if ($sequence =~ /\\x([0-9a-fA-F]{2})/);
469    return chr(oct($1)) if ($sequence =~ /\\([0-7]{3})/);
470
471    # FIXME: Is \0 supported as well? Kenneth and Rodney don't want it, see bug #48489
472
473    return $sequence;
474}
475
476sub unescape_po_string
477{
478    my ($string) = @_;
479
480    $string =~ s/(\\x[0-9a-fA-F]{2}|\\[0-7]{3}|\\.)/unescape_one_sequence($1)/eg;
481
482    return $string;
483}
484
485## NOTE: deal with < - &lt; but not > - &gt;  because it seems its ok to have
486## > in the entity. For further info please look at #84738.
487sub entity_decode
488{
489    local ($_) = @_;
490
491    s/&apos;/'/g; # '
492    s/&quot;/"/g; # "
493    s/&amp;/&/g;
494    s/&lt;/</g;
495
496    return $_;
497}
498 
499# entity_encode: (string)
500#
501# Encode the given string to XML format (encode '<' etc).
502
503sub entity_encode
504{
505    my ($pre_encoded) = @_;
506
507    my @list_of_chars = unpack ('C*', $pre_encoded);
508
509    # with UTF-8 we only encode minimalistic
510    return join ('', map (&entity_encode_int_minimalist, @list_of_chars));
511}
512
513sub entity_encode_int_minimalist
514{
515    return "&quot;" if $_ == 34;
516    return "&amp;" if $_ == 38;
517    return "&apos;" if $_ == 39;
518    return "&lt;" if $_ == 60;
519    return chr $_;
520}
521
522sub entity_encoded_translation
523{
524    my ($lang, $string) = @_;
525
526    my $translation = $translations{$lang, $string};
527    return $string if !$translation;
528    return entity_encode ($translation);
529}
530
531## XML (bonobo-activation specific) merge code
532
533sub ba_merge_translations
534{
535    my $source;
536
537    {
538       local $/; # slurp mode
539       open INPUT, "<$FILE" or die "can't open $FILE: $!";
540       $source = <INPUT>;
541       close INPUT;
542    }
543
544    open OUTPUT, ">$OUTFILE" or die "can't open $OUTFILE: $!";
545
546    while ($source =~ s|^(.*?)([ \t]*<\s*$w+\s+($w+\s*=\s*"$q"\s*)+/?>)([ \t]*\n)?||s)
547    {
548        print OUTPUT $1;
549
550        my $node = $2 . "\n";
551
552        my @strings = ();
553        $_ = $node;
554        while (s/(\s)_($w+\s*=\s*"($q)")/$1$2/s) {
555             push @strings, entity_decode($3);
556        }
557        print OUTPUT;
558
559        my %langs;
560        for my $string (@strings)
561        {
562            for my $lang (keys %po_files_by_lang)
563            {
564                $langs{$lang} = 1 if $translations{$lang, $string};
565            }
566        }
567       
568        for my $lang (sort keys %langs)
569        {
570            $_ = $node;
571            s/(\sname\s*=\s*)"($q)"/$1"$2-$lang"/s;
572            s/(\s)_($w+\s*=\s*")($q)"/$1 . $2 . entity_encoded_translation($lang, $3) . '"'/seg;
573            print OUTPUT;
574        }
575    }
576
577    print OUTPUT $source;
578
579    close OUTPUT;
580}
581
582
583## XML (non-bonobo-activation) merge code
584
585
586# Process tag attributes
587#   Only parameter is a HASH containing attributes -> values mapping
588sub getAttributeString
589{
590    my $sub = shift;
591    my $do_translate = shift || 0;
592    my $language = shift || "";
593    my $result = "";
594    my $translate = shift;
595    foreach my $e (reverse(sort(keys %{ $sub }))) {
596        my $key    = $e;
597        my $string = $sub->{$e};
598        my $quote = '"';
599       
600        $string =~ s/^[\s]+//;
601        $string =~ s/[\s]+$//;
602       
603        if ($string =~ /^'.*'$/)
604        {
605            $quote = "'";
606        }
607        $string =~ s/^['"]//g;
608        $string =~ s/['"]$//g;
609
610        if ($do_translate && $key =~ /^_/) {
611            $key =~ s|^_||g;
612            if ($language) {
613               
614                # Handle translation
615                #
616                my $decode_string = entity_decode($string);
617                my $translation = $translations{$language, $decode_string};
618                if ($translation) {
619                    $translation = entity_encode($translation);
620                    $string = $translation;
621                    $$translate = 2;
622                } else {
623                    $$translate = 2; # we still want translations for deep nesting (FIXME: this will cause
624                                     # problems since we might get untranslated duplicated entries, but with xml:lang set)
625                    # Fix would be to set it here to eg. 3, and do a check in traverse() to see if any of the containing tags
626                    # really need translation, and only emit "translation" if there is (this means parsing same data twice)
627                }
628            } else {
629                 $$translate = 2 if ($translate && (!$$translate)); # watch not to "overwrite" if $translate == 2
630            }
631        }
632       
633        $result .= " $key=$quote$string$quote";
634    }
635    return $result;
636}
637
638# Returns a translatable string from XML node, it works on contents of every node in XML::Parser tree
639#   doesn't support nesting of translatable tags (i.e. <_blah>this <_doh>doesn't</_doh> work</_blah> -- besides
640#   can you define the correct semantics for this?)
641#
642
643sub getXMLstring
644{
645    my $ref = shift;
646    my @list = @{ $ref };
647    my $result = "";
648
649    my $count = scalar(@list);
650    my $attrs = $list[0];
651    my $index = 1;
652
653    while ($index < $count) {
654        my $type = $list[$index];
655        my $content = $list[$index+1];
656        if (! $type ) {
657            # We've got CDATA
658            if ($content) {
659                # lets strip the whitespace here, and *ONLY* here
660                $content =~ s/\s+/ /gs if (!((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?preserve["']?$/)));
661                $result .= ($content);
662            } else {
663                #print "no cdata content when expected it\n"; # is this possible, is this ok?
664                # what to do if this happens?
665                # Did I mention that I hate XML::Parser tree style?
666            }
667        } else {
668            # We've got another element
669            $result .= "<$type";
670            $result .= getAttributeString(@{$content}[0], 0); # no nested translatable elements
671            if ($content) {
672                my $subresult = getXMLstring($content);
673                if ($subresult) {
674                    $result .= ">".$subresult . "</$type>";
675                } else {
676                    $result .= "/>";
677                }
678            } else {
679                $result .= "/>";
680            }
681        }
682        $index += 2;
683    }
684    return $result;
685}
686
687# Translate list of nodes if necessary
688sub translate_subnodes
689{
690    my $fh = shift;
691    my $content = shift;
692    my $language = shift || "";
693    my $singlelang = shift || 0;
694
695    my @nodes = @{ $content };
696
697    my $count = scalar(@nodes);
698    my $index = 0;
699    while ($index < $count) {
700        my $type = $nodes[$index];
701        my $rest = $nodes[$index+1];
702        if ($singlelang) {
703            my $oldMO = $MULTIPLE_OUTPUT;
704            $MULTIPLE_OUTPUT = 1;
705            traverse($fh, $type, $rest, $language);
706            $MULTIPLE_OUTPUT = $oldMO;
707        } else {
708            traverse($fh, $type, $rest, $language);
709        }
710        $index += 2;
711    }
712}
713
714sub traverse
715{
716    my $fh = shift;
717    my $nodename = shift;
718    my $content = shift;
719    my $language = shift || "";
720
721    if (!$nodename) {
722        if ($content =~ /^[\s]*$/) {
723            $leading_space .= $content;
724        }
725        print $fh $content;
726    } else {
727        # element
728        my @all = @{ $content };
729        my $attrs = shift @all;
730        my $translate = 0;
731        my $outattr = getAttributeString($attrs, 1, $language, \$translate);
732
733        if ($nodename =~ /^_/) {
734            $translate = 1;
735            $nodename =~ s/^_//;
736        }
737        my $lookup = '';
738        print $fh "<$nodename", $outattr;
739        if ($translate) {
740            $lookup = getXMLstring($content);
741            if (!((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?preserve["']?$/))) {
742                $lookup =~ s/^\s+//s;
743                $lookup =~ s/\s+$//s;
744            }
745
746            if ($lookup || $translate == 2) {
747                my $translation = $translations{$language, $lookup};
748                if ($MULTIPLE_OUTPUT && ($translation || $translate == 2)) {
749                    $translation = $lookup if (!$translation);
750                    print $fh " xml:lang=\"", $language, "\"" if $language;
751                    print $fh ">";
752                    if ($translate == 2) {
753                        translate_subnodes($fh, \@all, $language, 1);
754                    } else {
755                        print $fh $translation;
756                    }
757                    print $fh "</$nodename>";
758
759                    return; # this means there will be no same translation with xml:lang="$language"...
760                            # if we want them both, just remove this "return"
761                } else {
762                    print $fh ">";
763                    if ($translate == 2) {
764                        translate_subnodes($fh, \@all, $language, 1);
765                    } else {
766                        print $fh $lookup;
767                    }
768                    print $fh "</$nodename>";
769                }
770            } else {
771                print $fh "/>";
772            }
773
774            for my $lang (sort keys %po_files_by_lang) {
775                    if ($MULTIPLE_OUTPUT && $lang ne "$language") {
776                        next;
777                    }
778                    if ($lang) {
779                        # Handle translation
780                        #
781                        my $translate = 0;
782                        my $localattrs = getAttributeString($attrs, 1, $lang, \$translate);
783                        my $translation = $translations{$lang, $lookup};
784                        if ($translate && !$translation) {
785                            $translation = $lookup;
786                        }
787
788                        if ($translation || $translate) {
789                            print $fh "\n";
790                            $leading_space =~ s/.*\n//g;
791                            print $fh $leading_space;
792                            print $fh "<", $nodename, " xml:lang=\"", $lang, "\"", $localattrs, ">";
793                            if ($translate == 2) {
794                               translate_subnodes($fh, \@all, $lang, 1);
795                            } else {
796                                print $fh $translation;
797                            }
798                            print $fh "</$nodename>";
799                        }
800                    }
801            }
802
803        } else {
804            my $count = scalar(@all);
805            if ($count > 0) {
806                print $fh ">";
807                my $index = 0;
808                while ($index < $count) {
809                    my $type = $all[$index];
810                    my $rest = $all[$index+1];
811                    traverse($fh, $type, $rest, $language);
812                    $index += 2;
813                }
814                print $fh "</$nodename>";
815            } else {
816                print $fh "/>";
817            }
818        }
819    }
820}
821
822sub intltool_tree_cdatastart
823{
824    my $expat    = shift;
825    my $clist = $expat->{Curlist};
826    my $pos   = $#$clist;
827
828    push @$clist, 0 => $expat->original_string();
829}
830
831sub intltool_tree_cdataend
832{
833    my $expat    = shift;
834    my $clist = $expat->{Curlist};
835    my $pos   = $#$clist;
836
837    $clist->[$pos] .= $expat->original_string();
838}
839
840sub intltool_tree_char
841{
842    my $expat = shift;
843    my $text  = shift;
844    my $clist = $expat->{Curlist};
845    my $pos   = $#$clist;
846
847    # Use original_string so that we retain escaped entities
848    # in CDATA sections.
849    #
850    if ($pos > 0 and $clist->[$pos - 1] eq '0') {
851        $clist->[$pos] .= $expat->original_string();
852    } else {
853        push @$clist, 0 => $expat->original_string();
854    }
855}
856
857sub intltool_tree_start
858{
859    my $expat    = shift;
860    my $tag      = shift;
861    my @origlist = ();
862
863    # Use original_string so that we retain escaped entities
864    # in attribute values.  We must convert the string to an
865    # @origlist array to conform to the structure of the Tree
866    # Style.
867    #
868    my @original_array = split /\x/, $expat->original_string();
869    my $source         = $expat->original_string();
870
871    # Remove leading tag.
872    #
873    $source =~ s|^\s*<\s*(\S+)||s;
874
875    # Grab attribute key/value pairs and push onto @origlist array.
876    #
877    while ($source)
878    {
879       if ($source =~ /^\s*([\w:-]+)\s*[=]\s*["]/)
880       {
881           $source =~ s|^\s*([\w:-]+)\s*[=]\s*["]([^"]*)["]||s;
882           push @origlist, $1;
883           push @origlist, '"' . $2 . '"';
884       }
885       elsif ($source =~ /^\s*([\w:-]+)\s*[=]\s*[']/)
886       {
887           $source =~ s|^\s*([\w:-]+)\s*[=]\s*[']([^']*)[']||s;
888           push @origlist, $1;
889           push @origlist, "'" . $2 . "'";
890       }
891       else
892       {
893           last;
894       }
895    }
896
897    my $ol = [ { @origlist } ];
898
899    push @{ $expat->{Lists} }, $expat->{Curlist};
900    push @{ $expat->{Curlist} }, $tag => $ol;
901    $expat->{Curlist} = $ol;
902}
903
904sub readXml
905{
906    my $filename = shift || return;
907    if(!-f $filename) {
908        die "ERROR Cannot find filename: $filename\n";
909    }
910
911    my $ret = eval 'require XML::Parser';
912    if(!$ret) {
913        die "You must have XML::Parser installed to run $0\n\n";
914    }
915    my $xp = new XML::Parser(Style => 'Tree');
916    $xp->setHandlers(Char => \&intltool_tree_char);
917    $xp->setHandlers(Start => \&intltool_tree_start);
918    $xp->setHandlers(CdataStart => \&intltool_tree_cdatastart);
919    $xp->setHandlers(CdataEnd => \&intltool_tree_cdataend);
920    my $tree = $xp->parsefile($filename);
921
922# <foo><head id="a">Hello <em>there</em></head><bar>Howdy<ref/></bar>do</foo>
923# would be:
924# [foo, [{}, head, [{id => "a"}, 0, "Hello ",  em, [{}, 0, "there"]], bar, [{},
925# 0, "Howdy",  ref, [{}]], 0, "do" ] ]
926
927    return $tree;
928}
929
930sub print_header
931{
932    my $infile = shift;
933    my $fh = shift;
934    my $source;
935
936    if(!-f $infile) {
937        die "ERROR Cannot find filename: $infile\n";
938    }
939
940    print $fh qq{<?xml version="1.0" encoding="UTF-8"?>\n};
941    {
942        local $/;
943        open DOCINPUT, "<${FILE}" or die;
944        $source = <DOCINPUT>;
945        close DOCINPUT;
946    }
947    if ($source =~ /(<!DOCTYPE.*\[.*\]\s*>)/s)
948    {
949        print $fh "$1\n";
950    }
951    elsif ($source =~ /(<!DOCTYPE[^>]*>)/s)
952    {
953        print $fh "$1\n";
954    }
955}
956
957sub parseTree
958{
959    my $fh        = shift;
960    my $ref       = shift;
961    my $language  = shift || "";
962
963    my $name = shift @{ $ref };
964    my $cont = shift @{ $ref };
965    traverse($fh, $name, $cont, $language);
966}
967
968sub xml_merge_output
969{
970    my $source;
971
972    if ($MULTIPLE_OUTPUT) {
973        for my $lang (sort keys %po_files_by_lang) {
974            if ( ! -e $lang ) {
975                mkdir $lang or die "Cannot create subdirectory $lang: $!\n";
976            }
977            open OUTPUT, ">$lang/$OUTFILE" or die "Cannot open $lang/$OUTFILE: $!\n";
978            my $tree = readXml($FILE);
979            print_header($FILE, \*OUTPUT);
980            parseTree(\*OUTPUT, $tree, $lang);
981            close OUTPUT;
982            print "CREATED $lang/$OUTFILE\n" unless $QUIET_ARG;
983        }
984    }
985    open OUTPUT, ">$OUTFILE" or die "Cannot open $OUTFILE: $!\n";
986    my $tree = readXml($FILE);
987    print_header($FILE, \*OUTPUT);
988    parseTree(\*OUTPUT, $tree);
989    close OUTPUT;
990    print "CREATED $OUTFILE\n" unless $QUIET_ARG;
991}
992
993sub keys_merge_translations
994{
995    open INPUT, "<${FILE}" or die;
996    open OUTPUT, ">${OUTFILE}" or die;
997
998    while (<INPUT>)
999    {
1000        if (s/^(\s*)_(\w+=(.*))/$1$2/) 
1001        {
1002            my $string = $3;
1003
1004            print OUTPUT;
1005
1006            my $non_translated_line = $_;
1007
1008            for my $lang (sort keys %po_files_by_lang)
1009            {
1010                my $translation = $translations{$lang, $string};
1011                next if !$translation;
1012
1013                $_ = $non_translated_line;
1014                s/(\w+)=.*/[$lang]$1=$translation/;
1015                print OUTPUT;
1016            }
1017        }
1018        else
1019        {
1020            print OUTPUT;
1021        }
1022    }
1023
1024    close OUTPUT;
1025    close INPUT;
1026}
1027
1028sub desktop_merge_translations
1029{
1030    open INPUT, "<${FILE}" or die;
1031    open OUTPUT, ">${OUTFILE}" or die;
1032
1033    while (<INPUT>)
1034    {
1035        if (s/^(\s*)_(\w+=(.*))/$1$2/) 
1036        {
1037            my $string = $3;
1038
1039            print OUTPUT;
1040
1041            my $non_translated_line = $_;
1042
1043            for my $lang (sort keys %po_files_by_lang)
1044            {
1045                my $translation = $translations{$lang, $string};
1046                next if !$translation;
1047
1048                $_ = $non_translated_line;
1049                s/(\w+)=.*/${1}[$lang]=$translation/;
1050                print OUTPUT;
1051            }
1052        }
1053        else
1054        {
1055            print OUTPUT;
1056        }
1057    }
1058
1059    close OUTPUT;
1060    close INPUT;
1061}
1062
1063sub schemas_merge_translations
1064{
1065    my $source;
1066
1067    {
1068       local $/; # slurp mode
1069       open INPUT, "<$FILE" or die "can't open $FILE: $!";
1070       $source = <INPUT>;
1071       close INPUT;
1072    }
1073
1074    open OUTPUT, ">$OUTFILE" or die;
1075
1076    # FIXME: support attribute translations
1077
1078    # Empty nodes never need translation, so unmark all of them.
1079    # For example, <_foo/> is just replaced by <foo/>.
1080    $source =~ s|<\s*_($w+)\s*/>|<$1/>|g;
1081
1082    while ($source =~ s/
1083                        (.*?)
1084                        (\s+)(<locale\ name="C">(\s*)
1085                            (<default>\s*(?:<!--[^>]*?-->\s*)?(.*?)\s*<\/default>)?(\s*)
1086                            (<short>\s*(?:<!--[^>]*?-->\s*)?(.*?)\s*<\/short>)?(\s*)
1087                            (<long>\s*(?:<!--[^>]*?-->\s*)?(.*?)\s*<\/long>)?(\s*)
1088                        <\/locale>)
1089                       //sx)
1090    {
1091        print OUTPUT $1;
1092
1093        my $locale_start_spaces = $2 ? $2 : '';
1094        my $default_spaces = $4 ? $4 : '';
1095        my $short_spaces = $7 ? $7 : '';
1096        my $long_spaces = $10 ? $10 : '';
1097        my $locale_end_spaces = $13 ? $13 : '';
1098        my $c_default_block = $3 ? $3 : '';
1099        my $default_string = $6 ? $6 : '';
1100        my $short_string = $9 ? $9 : '';
1101        my $long_string = $12 ? $12 : '';
1102
1103        print OUTPUT "$locale_start_spaces$c_default_block";
1104
1105        $default_string =~ s/\s+/ /g;
1106        $default_string = entity_decode($default_string);
1107        $short_string =~ s/\s+/ /g;
1108        $short_string = entity_decode($short_string);
1109        $long_string =~ s/\s+/ /g;
1110        $long_string = entity_decode($long_string);
1111
1112        for my $lang (sort keys %po_files_by_lang)
1113        {
1114            my $default_translation = $translations{$lang, $default_string};
1115            my $short_translation = $translations{$lang, $short_string};
1116            my $long_translation  = $translations{$lang, $long_string};
1117
1118            next if (!$default_translation && !$short_translation &&
1119                     !$long_translation);
1120
1121            print OUTPUT "\n$locale_start_spaces<locale name=\"$lang\">";
1122
1123        print OUTPUT "$default_spaces";   
1124
1125        if ($default_translation)
1126        {
1127            $default_translation = entity_encode($default_translation);
1128            print OUTPUT "<default>$default_translation</default>";
1129        }
1130
1131            print OUTPUT "$short_spaces";
1132
1133            if ($short_translation)
1134            {
1135                        $short_translation = entity_encode($short_translation);
1136                        print OUTPUT "<short>$short_translation</short>";
1137            }
1138
1139            print OUTPUT "$long_spaces";
1140
1141            if ($long_translation)
1142            {
1143                        $long_translation = entity_encode($long_translation);
1144                        print OUTPUT "<long>$long_translation</long>";
1145            }       
1146
1147            print OUTPUT "$locale_end_spaces</locale>";
1148        }
1149    }
1150
1151    print OUTPUT $source;
1152
1153    close OUTPUT;
1154}
1155
1156sub rfc822deb_merge_translations
1157{
1158    my %encodings = ();
1159    for my $lang (keys %po_files_by_lang) {
1160        $encodings{$lang} = ($UTF8_ARG ? 'UTF-8' : get_po_encoding($po_files_by_lang{$lang}));
1161    }
1162
1163    my $source;
1164
1165    $Text::Wrap::huge = 'overflow';
1166    $Text::Wrap::break = qr/\n|\s(?=\S)/;
1167
1168    {
1169       local $/; # slurp mode
1170       open INPUT, "<$FILE" or die "can't open $FILE: $!";
1171       $source = <INPUT>;
1172       close INPUT;
1173    }
1174
1175    open OUTPUT, ">${OUTFILE}" or die;
1176
1177    while ($source =~ /(^|\n+)(_*)([^:\s]+)(:[ \t]*)(.*?)(?=\n[\S\n]|$)/sg)
1178    {
1179            my $sep = $1;
1180            my $non_translated_line = $3.$4;
1181            my $string = $5;
1182            my $underscore = length($2);
1183            next if $underscore eq 0 && $non_translated_line =~ /^#/;
1184            #  Remove [] dummy strings
1185            my $stripped = $string;
1186            $stripped =~ s/\[\s[^\[\]]*\],/,/g if $underscore eq 2;
1187            $stripped =~ s/\[\s[^\[\]]*\]$//;
1188            $non_translated_line .= $stripped;
1189
1190            print OUTPUT $sep.$non_translated_line;
1191   
1192            if ($underscore)
1193            {
1194                my @str_list = rfc822deb_split($underscore, $string);
1195
1196                for my $lang (sort keys %po_files_by_lang)
1197                {
1198                    my $is_translated = 1;
1199                    my $str_translated = '';
1200                    my $first = 1;
1201               
1202                    for my $str (@str_list)
1203                    {
1204                        my $translation = $translations{$lang, $str};
1205                   
1206                        if (!$translation)
1207                        {
1208                            $is_translated = 0;
1209                            last;
1210                        }
1211
1212                        #  $translation may also contain [] dummy
1213                        #  strings, mostly to indicate an empty string
1214                        $translation =~ s/\[\s[^\[\]]*\]$//;
1215                       
1216                        if ($first)
1217                        {
1218                            if ($underscore eq 2)
1219                            {
1220                                $str_translated .= $translation;
1221                            }
1222                            else
1223                            {
1224                                $str_translated .=
1225                                    Text::Tabs::expand($translation) .
1226                                    "\n";
1227                            }
1228                        }
1229                        else
1230                        {
1231                            if ($underscore eq 2)
1232                            {
1233                                $str_translated .= ', ' . $translation;
1234                            }
1235                            else
1236                            {
1237                                $str_translated .= Text::Tabs::expand(
1238                                    Text::Wrap::wrap(' ', ' ', $translation)) .
1239                                    "\n .\n";
1240                            }
1241                        }
1242                        $first = 0;
1243
1244                        #  To fix some problems with Text::Wrap::wrap
1245                        $str_translated =~ s/(\n )+\n/\n .\n/g;
1246                    }
1247                    next unless $is_translated;
1248
1249                    $str_translated =~ s/\n \.\n$//;
1250                    $str_translated =~ s/\s+$//;
1251
1252                    $_ = $non_translated_line;
1253                    s/^(\w+):\s*.*/$sep${1}-$lang.$encodings{$lang}: $str_translated/s;
1254                    print OUTPUT;
1255                }
1256            }
1257    }
1258    print OUTPUT "\n";
1259
1260    close OUTPUT;
1261    close INPUT;
1262}
1263
1264sub rfc822deb_split
1265{
1266    # Debian defines a special way to deal with rfc822-style files:
1267    # when a value contain newlines, it consists of
1268    #   1.  a short form (first line)
1269    #   2.  a long description, all lines begin with a space,
1270    #       and paragraphs are separated by a single dot on a line
1271    # This routine returns an array of all paragraphs, and reformat
1272    # them.
1273    # When first argument is 2, the string is a comma separated list of
1274    # values.
1275    my $type = shift;
1276    my $text = shift;
1277    $text =~ s/^[ \t]//mg;
1278    return (split(/, */, $text, 0)) if $type ne 1;
1279    return ($text) if $text !~ /\n/;
1280
1281    $text =~ s/([^\n]*)\n//;
1282    my @list = ($1);
1283    my $str = '';
1284
1285    for my $line (split (/\n/, $text))
1286    {
1287        chomp $line;
1288        if ($line =~ /^\.\s*$/)
1289        {
1290            #  New paragraph
1291            $str =~ s/\s*$//;
1292            push(@list, $str);
1293            $str = '';
1294        }
1295        elsif ($line =~ /^\s/)
1296        {
1297            #  Line which must not be reformatted
1298            $str .= "\n" if length ($str) && $str !~ /\n$/;
1299            $line =~ s/\s+$//;
1300            $str .= $line."\n";
1301        }
1302        else
1303        {
1304            #  Continuation line, remove newline
1305            $str .= " " if length ($str) && $str !~ /\n$/;
1306            $str .= $line;
1307        }
1308    }
1309
1310    $str =~ s/\s*$//;
1311    push(@list, $str) if length ($str);
1312
1313    return @list;
1314}
1315
Note: See TracBrowser for help on using the repository browser.