source: trunk/third/gnome-applets/intltool-merge.in @ 21373

Revision 21373, 31.0 KB checked in by ghudson, 20 years ago (diff)
This commit was generated by cvs2svn to compensate for changes in r21372, which included commits to RCS files with non-trunk default branches.
RevLine 
[18371]1#!@INTLTOOL_PERL@ -w
[20909]2# -*- Mode: perl; indent-tabs-mode: nil; c-basic-offset: 4  -*-
[18371]3
4#
5#  The Intltool Message Merger
6#
[20909]7#  Copyright (C) 2000, 2003 Free Software Foundation.
[18371]8#  Copyright (C) 2000, 2001 Eazel, Inc
9#
10#  Intltool is free software; you can redistribute it and/or
11#  modify it under the terms of the GNU General Public License
12#  version 2 published by the Free Software Foundation.
13#
14#  Intltool is distributed in the hope that it will be useful,
15#  but WITHOUT ANY WARRANTY; without even the implied warranty of
16#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17#  General Public License for more details.
18#
19#  You should have received a copy of the GNU General Public License
20#  along with this program; if not, write to the Free Software
21#  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22#
23#  As a special exception to the GNU General Public License, if you
24#  distribute this file as part of a program that contains a
25#  configuration script generated by Autoconf, you may include it under
26#  the same distribution terms that you use for the rest of that program.
27#
28#  Authors:  Maciej Stachowiak <mjs@noisehavoc.org>
29#            Kenneth Christiansen <kenneth@gnu.org>
30#            Darin Adler <darin@bentspoon.com>
31#
32#  Proper XML UTF-8'ification written by Cyrille Chepelov <chepelov@calixo.net>
33#
34
35## Release information
36my $PROGRAM = "intltool-merge";
37my $PACKAGE = "intltool";
[21372]38my $VERSION = "0.32.1";
[18371]39
40## Loaded modules
41use strict;
42use Getopt::Long;
[18646]43use Text::Wrap;
[20909]44use File::Basename;
[18371]45
[20909]46my $must_end_tag      = -1;
47my $last_depth        = -1;
48my $translation_depth = -1;
49my @tag_stack = ();
50my @entered_tag = ();
51my @translation_strings = ();
52my $leading_space = "";
53
[18371]54## Scalars used by the option stuff
55my $HELP_ARG = 0;
56my $VERSION_ARG = 0;
57my $BA_STYLE_ARG = 0;
58my $XML_STYLE_ARG = 0;
59my $KEYS_STYLE_ARG = 0;
60my $DESKTOP_STYLE_ARG = 0;
61my $SCHEMAS_STYLE_ARG = 0;
[18646]62my $RFC822DEB_STYLE_ARG = 0;
[18371]63my $QUIET_ARG = 0;
64my $PASS_THROUGH_ARG = 0;
65my $UTF8_ARG = 0;
[20909]66my $MULTIPLE_OUTPUT = 0;
[18371]67my $cache_file;
68
69## Handle options
70GetOptions
71(
72 "help" => \$HELP_ARG,
73 "version" => \$VERSION_ARG,
74 "quiet|q" => \$QUIET_ARG,
75 "oaf-style|o" => \$BA_STYLE_ARG, ## for compatibility
76 "ba-style|b" => \$BA_STYLE_ARG,
77 "xml-style|x" => \$XML_STYLE_ARG,
78 "keys-style|k" => \$KEYS_STYLE_ARG,
79 "desktop-style|d" => \$DESKTOP_STYLE_ARG,
80 "schemas-style|s" => \$SCHEMAS_STYLE_ARG,
[18646]81 "rfc822deb-style|r" => \$RFC822DEB_STYLE_ARG,
[18371]82 "pass-through|p" => \$PASS_THROUGH_ARG,
83 "utf8|u" => \$UTF8_ARG,
[20909]84 "multiple-output|m" => \$MULTIPLE_OUTPUT,
[18371]85 "cache|c=s" => \$cache_file
86 ) or &error;
87
88my $PO_DIR;
89my $FILE;
90my $OUTFILE;
91
92my %po_files_by_lang = ();
93my %translations = ();
[21372]94my $iconv = $ENV{"ICONV"} || $ENV{"INTLTOOL_ICONV"} || "@INTLTOOL_ICONV@";
[18371]95
96# Use this instead of \w for XML files to handle more possible characters.
97my $w = "[-A-Za-z0-9._:]";
98
99# XML quoted string contents
100my $q = "[^\\\"]*";
101
102## Check for options.
103
[18646]104if ($VERSION_ARG)
105{
[18371]106        &print_version;
[18646]107}
108elsif ($HELP_ARG)
109{
[18371]110        &print_help;
[18646]111}
112elsif ($BA_STYLE_ARG && @ARGV > 2)
113{
[21372]114        &utf8_sanity_check;
[18371]115        &preparation;
116        &print_message;
117        &ba_merge_translations;
118        &finalize;
[18646]119}
120elsif ($XML_STYLE_ARG && @ARGV > 2)
121{
[21372]122        &utf8_sanity_check;
[18371]123        &preparation;
124        &print_message;
[21372]125        &xml_merge_output;
[18371]126        &finalize;
[18646]127}
128elsif ($KEYS_STYLE_ARG && @ARGV > 2)
129{
[21372]130        &utf8_sanity_check;
131        &preparation;
132        &print_message;
133        &keys_merge_translations;
[18371]134        &finalize;
[18646]135}
136elsif ($DESKTOP_STYLE_ARG && @ARGV > 2)
137{
[21372]138        &utf8_sanity_check;
139        &preparation;
140        &print_message;
141        &desktop_merge_translations;
[18371]142        &finalize;
[18646]143}
144elsif ($SCHEMAS_STYLE_ARG && @ARGV > 2)
145{
[21372]146        &utf8_sanity_check;
147        &preparation;
148        &print_message;
149        &schemas_merge_translations;
[18371]150        &finalize;
[18646]151}
152elsif ($RFC822DEB_STYLE_ARG && @ARGV > 2)
153{
[21372]154        &preparation;
155        &print_message;
156        &rfc822deb_merge_translations;
[18646]157        &finalize;
158}
159else
160{
[18371]161        &print_help;
162}
163
164exit;
165
166## Sub for printing release information
167sub print_version
168{
[20909]169    print <<_EOF_;
170${PROGRAM} (${PACKAGE}) ${VERSION}
171Written by Maciej Stachowiak, Darin Adler and Kenneth Christiansen.
172
173Copyright (C) 2000-2003 Free Software Foundation, Inc.
174Copyright (C) 2000-2001 Eazel, Inc.
175This is free software; see the source for copying conditions.  There is NO
176warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
177_EOF_
[18371]178    exit;
179}
180
181## Sub for printing usage information
182sub print_help
183{
[20909]184    print <<_EOF_;
185Usage: ${PROGRAM} [OPTION]... PO_DIRECTORY FILENAME OUTPUT_FILE
186Generates an output file that includes some localized attributes from an
187untranslated source file.
188
189Mandatory options: (exactly one must be specified)
190  -b, --ba-style         includes translations in the bonobo-activation style
191  -d, --desktop-style    includes translations in the desktop style
192  -k, --keys-style       includes translations in the keys style
193  -s, --schemas-style    includes translations in the schemas style
194  -r, --rfc822deb-style  includes translations in the RFC822 style
195  -x, --xml-style        includes translations in the standard xml style
196
197Other options:
[21372]198  -u, --utf8             convert all strings to UTF-8 before merging
199                         (default for everything except RFC822 style)
200  -p, --pass-through     deprecated, does nothing and issues a warning
[20909]201  -m, --multiple-output  output one localized file per locale, instead of
202                         a single file containing all localized elements
203  -c, --cache=FILE       specify cache file name
204                         (usually \$top_builddir/po/.intltool-merge-cache)
205  -q, --quiet            suppress most messages
206      --help             display this help and exit
207      --version          output version information and exit
208
209Report bugs to http://bugzilla.gnome.org/ (product name "$PACKAGE")
210or send email to <xml-i18n-tools\@gnome.org>.
211_EOF_
[18371]212    exit;
213}
214
215
216## Sub for printing error messages
217sub print_error
218{
[20909]219    print STDERR "Try `${PROGRAM} --help' for more information.\n";
[18371]220    exit;
221}
222
223
224sub print_message
225{
226    print "Merging translations into $OUTFILE.\n" unless $QUIET_ARG;
227}
228
229
230sub preparation
231{
232    $PO_DIR = $ARGV[0];
233    $FILE = $ARGV[1];
234    $OUTFILE = $ARGV[2];
235
236    &gather_po_files;
237    &get_translation_database;
238}
239
240# General-purpose code for looking up translations in .po files
241
242sub po_file2lang
243{
244    my ($tmp) = @_;
245    $tmp =~ s/^.*\/(.*)\.po$/$1/;
246    return $tmp;
247}
248
249sub gather_po_files
250{
251    for my $po_file (glob "$PO_DIR/*.po") {
252        $po_files_by_lang{po_file2lang($po_file)} = $po_file;
253    }
254}
255
[18646]256sub get_local_charset
257{
258    my ($encoding) = @_;
[21372]259    my $alias_file = $ENV{"G_CHARSET_ALIAS"} || "/usr/lib/charset.alias";
[18646]260
261    # seek character encoding aliases in charset.alias (glib)
262
263    if (open CHARSET_ALIAS, $alias_file)
264    {
265        while (<CHARSET_ALIAS>)
266        {
267            next if /^\#/;
268            return $1 if (/^\s*([-._a-zA-Z0-9]+)\s+$encoding\b/i)
269        }
270
271        close CHARSET_ALIAS;
272    }
273
274    # if not found, return input string
275
276    return $encoding;
277}
278
[18371]279sub get_po_encoding
280{
281    my ($in_po_file) = @_;
282    my $encoding = "";
283
284    open IN_PO_FILE, $in_po_file or die;
[18646]285    while (<IN_PO_FILE>)
286    {
[18371]287        ## example: "Content-Type: text/plain; charset=ISO-8859-1\n"
[18646]288        if (/Content-Type\:.*charset=([-a-zA-Z0-9]+)\\n/)
289        {
[18371]290            $encoding = $1;
291            last;
292        }
293    }
294    close IN_PO_FILE;
295
[18646]296    if (!$encoding)
297    {
[20909]298        print STDERR "Warning: no encoding found in $in_po_file. Assuming ISO-8859-1\n" unless $QUIET_ARG;
[18371]299        $encoding = "ISO-8859-1";
300    }
[18646]301
[20909]302    system ("$iconv -f $encoding -t UTF-8 </dev/null 2>/dev/null");
303    if ($?) {
304        $encoding = get_local_charset($encoding);
305    }
[18646]306
[18371]307    return $encoding
308}
309
310sub utf8_sanity_check
311{
[21372]312    print STDERR "Warning: option --pass-through has been removed.\n" if $PASS_THROUGH_ARG;
313    $UTF8_ARG = 1;
[18371]314}
315
316sub get_translation_database
317{
318    if ($cache_file) {
319        &get_cached_translation_database;
320    } else {
321        &create_translation_database;
322    }
323}
324
325sub get_newest_po_age
326{
327    my $newest_age;
328
[18646]329    foreach my $file (values %po_files_by_lang)
330    {
[18371]331        my $file_age = -M $file;
332        $newest_age = $file_age if !$newest_age || $file_age < $newest_age;
333    }
334
[20909]335    $newest_age = 0 if !$newest_age;
336
[18371]337    return $newest_age;
338}
339
340sub create_cache
341{
342    print "Generating and caching the translation database\n" unless $QUIET_ARG;
343
344    &create_translation_database;
345
346    open CACHE, ">$cache_file" || die;
347    print CACHE join "\x01", %translations;
348    close CACHE;
349}
350
351sub load_cache
352{
353    print "Found cached translation database\n" unless $QUIET_ARG;
354
355    my $contents;
356    open CACHE, "<$cache_file" || die;
357    {
358        local $/;
359        $contents = <CACHE>;
360    }
361    close CACHE;
362    %translations = split "\x01", $contents;
363}
364
365sub get_cached_translation_database
366{
367    my $cache_file_age = -M $cache_file;
[18646]368    if (defined $cache_file_age)
369    {
370        if ($cache_file_age <= &get_newest_po_age)
371        {
[18371]372            &load_cache;
373            return;
374        }
375        print "Found too-old cached translation database\n" unless $QUIET_ARG;
376    }
377
378    &create_cache;
379}
380
381sub create_translation_database
382{
[18646]383    for my $lang (keys %po_files_by_lang)
384    {
[18371]385        my $po_file = $po_files_by_lang{$lang};
386
[18646]387        if ($UTF8_ARG)
388        {
[18371]389            my $encoding = get_po_encoding ($po_file);
[18646]390
391            if (lc $encoding eq "utf-8")
392            {
[18371]393                open PO_FILE, "<$po_file";     
[18646]394            }
395            else
396            {
[20909]397                print STDERR "WARNING: $po_file is not in UTF-8 but $encoding, converting...\n" unless $QUIET_ARG;;
398
[18371]399                open PO_FILE, "$iconv -f $encoding -t UTF-8 $po_file|";
400            }
[18646]401        }
402        else
403        {
[18371]404            open PO_FILE, "<$po_file"; 
405        }
406
407        my $nextfuzzy = 0;
408        my $inmsgid = 0;
409        my $inmsgstr = 0;
410        my $msgid = "";
411        my $msgstr = "";
[18646]412
413        while (<PO_FILE>)
414        {
[18371]415            $nextfuzzy = 1 if /^#, fuzzy/;
[18646]416       
417            if (/^msgid "((\\.|[^\\])*)"/ )
418            {
[18371]419                $translations{$lang, $msgid} = $msgstr if $inmsgstr && $msgid && $msgstr;
420                $msgid = "";
421                $msgstr = "";
422
423                if ($nextfuzzy) {
424                    $inmsgid = 0;
425                } else {
426                    $msgid = unescape_po_string($1);
427                    $inmsgid = 1;
428                }
429                $inmsgstr = 0;
430                $nextfuzzy = 0;
431            }
[18646]432
433            if (/^msgstr "((\\.|[^\\])*)"/)
434            {
[18371]435                $msgstr = unescape_po_string($1);
436                $inmsgstr = 1;
437                $inmsgid = 0;
438            }
[18646]439
440            if (/^"((\\.|[^\\])*)"/)
441            {
[18371]442                $msgid .= unescape_po_string($1) if $inmsgid;
443                $msgstr .= unescape_po_string($1) if $inmsgstr;
444            }
445        }
446        $translations{$lang, $msgid} = $msgstr if $inmsgstr && $msgid && $msgstr;
447    }
448}
449
450sub finalize
451{
452}
453
454sub unescape_one_sequence
455{
456    my ($sequence) = @_;
457
458    return "\\" if $sequence eq "\\\\";
459    return "\"" if $sequence eq "\\\"";
[18646]460    return "\n" if $sequence eq "\\n";
[21372]461    return "\r" if $sequence eq "\\r";
462    return "\t" if $sequence eq "\\t";
463    return "\b" if $sequence eq "\\b";
464    return "\f" if $sequence eq "\\f";
465    return "\a" if $sequence eq "\\a";
466    return chr(11) if $sequence eq "\\v"; # vertical tab, see ascii(7)
[18371]467
[21372]468    return chr(hex($1)) if ($sequence =~ /\\x([0-9a-fA-F]{2})/);
469    return chr(oct($1)) if ($sequence =~ /\\([0-7]{3})/);
[18371]470
[21372]471    # FIXME: Is \0 supported as well? Kenneth and Rodney don't want it, see bug #48489
472
[18371]473    return $sequence;
474}
475
476sub unescape_po_string
477{
478    my ($string) = @_;
479
[21372]480    $string =~ s/(\\x[0-9a-fA-F]{2}|\\[0-7]{3}|\\.)/unescape_one_sequence($1)/eg;
[18371]481
482    return $string;
483}
484
[18646]485## NOTE: deal with < - &lt; but not > - &gt;  because it seems its ok to have
486## > in the entity. For further info please look at #84738.
[18371]487sub entity_decode
488{
489    local ($_) = @_;
490
491    s/&apos;/'/g; # '
492    s/&quot;/"/g; # "
493    s/&amp;/&/g;
[18646]494    s/&lt;/</g;
[18371]495
496    return $_;
497}
[20909]498 
499# entity_encode: (string)
500#
[21372]501# Encode the given string to XML format (encode '<' etc).
[18371]502
503sub entity_encode
504{
505    my ($pre_encoded) = @_;
506
507    my @list_of_chars = unpack ('C*', $pre_encoded);
508
[21372]509    # with UTF-8 we only encode minimalistic
510    return join ('', map (&entity_encode_int_minimalist, @list_of_chars));
[18371]511}
512
513sub entity_encode_int_minimalist
514{
515    return "&quot;" if $_ == 34;
516    return "&amp;" if $_ == 38;
517    return "&apos;" if $_ == 39;
[18646]518    return "&lt;" if $_ == 60;
[18371]519    return chr $_;
520}
521
522sub entity_encoded_translation
523{
524    my ($lang, $string) = @_;
525
526    my $translation = $translations{$lang, $string};
527    return $string if !$translation;
528    return entity_encode ($translation);
529}
530
531## XML (bonobo-activation specific) merge code
532
533sub ba_merge_translations
534{
535    my $source;
536
537    {
538       local $/; # slurp mode
539       open INPUT, "<$FILE" or die "can't open $FILE: $!";
540       $source = <INPUT>;
541       close INPUT;
542    }
543
544    open OUTPUT, ">$OUTFILE" or die "can't open $OUTFILE: $!";
545
[18646]546    while ($source =~ s|^(.*?)([ \t]*<\s*$w+\s+($w+\s*=\s*"$q"\s*)+/?>)([ \t]*\n)?||s)
547    {
[18371]548        print OUTPUT $1;
549
550        my $node = $2 . "\n";
551
552        my @strings = ();
553        $_ = $node;
554        while (s/(\s)_($w+\s*=\s*"($q)")/$1$2/s) {
555             push @strings, entity_decode($3);
556        }
557        print OUTPUT;
558
559        my %langs;
[18646]560        for my $string (@strings)
561        {
562            for my $lang (keys %po_files_by_lang)
563            {
[18371]564                $langs{$lang} = 1 if $translations{$lang, $string};
565            }
566        }
567       
[18646]568        for my $lang (sort keys %langs)
569        {
[18371]570            $_ = $node;
571            s/(\sname\s*=\s*)"($q)"/$1"$2-$lang"/s;
572            s/(\s)_($w+\s*=\s*")($q)"/$1 . $2 . entity_encoded_translation($lang, $3) . '"'/seg;
573            print OUTPUT;
574        }
575    }
576
577    print OUTPUT $source;
578
579    close OUTPUT;
580}
581
582
583## XML (non-bonobo-activation) merge code
584
[20909]585
586# Process tag attributes
587#   Only parameter is a HASH containing attributes -> values mapping
588sub getAttributeString
[18371]589{
[20909]590    my $sub = shift;
591    my $do_translate = shift || 0;
592    my $language = shift || "";
593    my $result = "";
[21372]594    my $translate = shift;
[20909]595    foreach my $e (reverse(sort(keys %{ $sub }))) {
596        my $key    = $e;
597        my $string = $sub->{$e};
598        my $quote = '"';
599       
600        $string =~ s/^[\s]+//;
601        $string =~ s/[\s]+$//;
602       
603        if ($string =~ /^'.*'$/)
604        {
605            $quote = "'";
606        }
607        $string =~ s/^['"]//g;
608        $string =~ s/['"]$//g;
[18371]609
[20909]610        if ($do_translate && $key =~ /^_/) {
611            $key =~ s|^_||g;
612            if ($language) {
613               
614                # Handle translation
615                #
616                my $decode_string = entity_decode($string);
617                my $translation = $translations{$language, $decode_string};
618                if ($translation) {
619                    $translation = entity_encode($translation);
620                    $string = $translation;
[21372]621                    $$translate = 2;
[20909]622                }
[21372]623            } else {
624                 $$translate = 1 if ($translate && (!$$translate)); # watch not to "overwrite" if $translate == 2
625            }
[20909]626        }
627       
628        $result .= " $key=$quote$string$quote";
[18371]629    }
[20909]630    return $result;
631}
[18371]632
[20909]633# Returns a translatable string from XML node, it works on contents of every node in XML::Parser tree
634#   doesn't support nesting of translatable tags (i.e. <_blah>this <_doh>doesn't</_doh> work</_blah> -- besides
635#   can you define the correct semantics for this?)
636#
[18371]637
[20909]638sub getXMLstring
639{
640    my $ref = shift;
641    my @list = @{ $ref };
642    my $result = "";
[18371]643
[20909]644    my $count = scalar(@list);
645    my $attrs = $list[0];
646    my $index = 1;
[21372]647
[20909]648    while ($index < $count) {
649        my $type = $list[$index];
650        my $content = $list[$index+1];
651        if (! $type ) {
652            # We've got CDATA
653            if ($content) {
654                # lets strip the whitespace here, and *ONLY* here
[21372]655                $content =~ s/\s+/ /gs if (!((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?preserve["']?$/)));
[20909]656                $result .= ($content);
657            } else {
658                #print "no cdata content when expected it\n"; # is this possible, is this ok?
659                # what to do if this happens?
660                # Did I mention that I hate XML::Parser tree style?
661            }
662        } else {
663            # We've got another element
664            $result .= "<$type";
[21372]665            $result .= getAttributeString(@{$content}[0], 0); # no nested translatable elements
[20909]666            if ($content) {
667                my $subresult = getXMLstring($content);
668                if ($subresult) {
669                    $result .= ">".$subresult . "</$type>";
670                } else {
671                    $result .= "/>";
672                }
673            } else {
674                $result .= "/>";
675            }
676        }
677        $index += 2;
678    }
679    return $result;
680}
[18371]681
[20909]682sub traverse
683{
684    my $fh = shift;
685    my $nodename = shift;
686    my $content = shift;
687    my $language = shift || "";
688
689    if (!$nodename) {
690        if ($content =~ /^[\s]*$/) {
691            $leading_space .= $content;
692        }
693        print $fh $content;
694    } else {
695        # element
696        my @all = @{ $content };
697        my $attrs = shift @all;
698        my $translate = 0;
[21372]699        my $outattr = getAttributeString($attrs, 1, $language, \$translate);
[20909]700
701        if ($nodename =~ /^_/) {
702            $translate = 1;
703            $nodename =~ s/^_//;
704        }
705        my $lookup = '';
[21372]706        print $fh "<$nodename", $outattr;
[20909]707        if ($translate) {
708            $lookup = getXMLstring($content);
[21372]709            if (!((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?preserve["']?$/))) {
710                $lookup =~ s/^\s+//s;
711                $lookup =~ s/\s+$//s;
712            }
[20909]713
[21372]714            if ($lookup || $translate == 2) {
[20909]715                my $translation = $translations{$language, $lookup};
[21372]716                if ($MULTIPLE_OUTPUT && ($translation || $translate == 2)) {
717                    $translation = $lookup if (!$translation);
[20909]718                    print $fh " xml:lang=\"", $language, "\"";
719                    print $fh ">", $translation, "</$nodename>";
720                    return; # this means there will be no same translation with xml:lang="$language"...
721                            # if we want them both, just remove this "return"
722                } else {
723                    print $fh ">$lookup</$nodename>";
724                }
725            } else {
726                print $fh "/>";
727            }
[21372]728
[20909]729            for my $lang (sort keys %po_files_by_lang) {
730                    if ($MULTIPLE_OUTPUT && $lang ne "$language") {
731                        next;
732                    }
733                    if ($lang) {
734                        # Handle translation
735                        #
[21372]736                        my $translate = 0;
737                        my $localattrs = getAttributeString($attrs, 1, $lang, \$translate);
738                        my $translation = $translations{$lang, $lookup};
739                        if ($translate && !$translation) {
740                            $translation = $lookup;
741                        }
742
743                        if ($translation || $translate) {
[20909]744                            $translation = ($translation);
745                            print $fh "\n";
746                            $leading_space =~ s/.*\n//g;
747                            print $fh $leading_space;
748                            print $fh "<", $nodename, " xml:lang=\"", $lang, "\"", $localattrs;
749                            print $fh ">", $translation , "</$nodename>";
750                        }
751                    }
752            }
753
754        } else {
755            my $count = scalar(@all);
756            if ($count > 0) {
757                print $fh ">";
758            } else {
759                print $fh "/>";
760            }
761            my $index = 0;
762            while ($index < $count) {
763                my $type = $all[$index];
764                my $rest = $all[$index+1];
765                traverse($fh, $type, $rest, $language);
766                $index += 2;
767            }
768            if ($count > 0) {
769                print $fh "</$nodename>";
770            }
771        }
772    }
773}
774
775sub intltool_tree_char
776{
777    my $expat = shift;
778    my $text  = shift;
779    my $clist = $expat->{Curlist};
780    my $pos   = $#$clist;
781
782    # Use original_string so that we retain escaped entities
783    # in CDATA sections.
784    #
785    if ($pos > 0 and $clist->[$pos - 1] eq '0') {
786        $clist->[$pos] .= $expat->original_string();
787    } else {
788        push @$clist, 0 => $expat->original_string();
789    }
790}
791
792sub intltool_tree_start
793{
794    my $expat    = shift;
795    my $tag      = shift;
796    my @origlist = ();
797
798    # Use original_string so that we retain escaped entities
799    # in attribute values.  We must convert the string to an
800    # @origlist array to conform to the structure of the Tree
801    # Style.
802    #
803    my @original_array = split /\x/, $expat->original_string();
804    my $source         = $expat->original_string();
805
806    # Remove leading tag.
807    #
808    $source =~ s|^\s*<\s*(\S+)||s;
809
810    # Grab attribute key/value pairs and push onto @origlist array.
811    #
812    while ($source)
[18646]813    {
[20909]814       if ($source =~ /^\s*([\w:-]+)\s*[=]\s*["]/)
815       {
816           $source =~ s|^\s*([\w:-]+)\s*[=]\s*["]([^"]*)["]||s;
817           push @origlist, $1;
818           push @origlist, '"' . $2 . '"';
819       }
820       elsif ($source =~ /^\s*([\w:-]+)\s*[=]\s*[']/)
821       {
822           $source =~ s|^\s*([\w:-]+)\s*[=]\s*[']([^']*)[']||s;
823           push @origlist, $1;
824           push @origlist, "'" . $2 . "'";
825       }
826       else
827       {
828           last;
829       }
830    }
[18371]831
[20909]832    my $ol = [ { @origlist } ];
[18371]833
[20909]834    push @{ $expat->{Lists} }, $expat->{Curlist};
835    push @{ $expat->{Curlist} }, $tag => $ol;
836    $expat->{Curlist} = $ol;
837}
[18371]838
[20909]839sub readXml
840{
841    my $filename = shift || return;
842    if(!-f $filename) {
843        die "ERROR Cannot find filename: $filename\n";
844    }
[18371]845
[20909]846    my $ret = eval 'require XML::Parser';
847    if(!$ret) {
848        die "You must have XML::Parser installed to run $0\n\n";
849    }
850    my $xp = new XML::Parser(Style => 'Tree');
851    $xp->setHandlers(Char => \&intltool_tree_char);
852    $xp->setHandlers(Start => \&intltool_tree_start);
853    my $tree = $xp->parsefile($filename);
854
855# <foo><head id="a">Hello <em>there</em></head><bar>Howdy<ref/></bar>do</foo>
856# would be:
857# [foo, [{}, head, [{id => "a"}, 0, "Hello ",  em, [{}, 0, "there"]], bar, [{},
858# 0, "Howdy",  ref, [{}]], 0, "do" ] ]
859
860    return $tree;
861}
862
863sub print_header
864{
865    my $infile = shift;
866    my $fh = shift;
867    my $source;
868
869    if(!-f $infile) {
870        die "ERROR Cannot find filename: $infile\n";
[18371]871    }
872
[20909]873    print $fh qq{<?xml version="1.0" encoding="UTF-8"?>\n};
874    {
875        local $/;
876        open DOCINPUT, "<${FILE}" or die;
877        $source = <DOCINPUT>;
878        close DOCINPUT;
879    }
880    if ($source =~ /(<!DOCTYPE.*\[.*\]\s*>)/s)
881    {
882        print $fh "$1\n";
883    }
884    elsif ($source =~ /(<!DOCTYPE[^>]*>)/s)
885    {
886        print $fh "$1\n";
887    }
888}
[18371]889
[20909]890sub parseTree
891{
892    my $fh        = shift;
893    my $ref       = shift;
894    my $language  = shift || "";
895
896    my $name = shift @{ $ref };
897    my $cont = shift @{ $ref };
898    traverse($fh, $name, $cont, $language);
899}
900
901sub xml_merge_output
902{
903    my $source;
904
905    if ($MULTIPLE_OUTPUT) {
906        for my $lang (sort keys %po_files_by_lang) {
907            if ( ! -e $lang ) {
908                mkdir $lang or die "Cannot create subdirectory $lang: $!\n";
909            }
910            open OUTPUT, ">$lang/$OUTFILE" or die "Cannot open $lang/$OUTFILE: $!\n";
911            my $tree = readXml($FILE);
912            print_header($FILE, \*OUTPUT);
913            parseTree(\*OUTPUT, $tree, $lang);
914            close OUTPUT;
915            print "CREATED $lang/$OUTFILE\n" unless $QUIET_ARG;
916        }
917    }
918    open OUTPUT, ">$OUTFILE" or die "Cannot open $OUTFILE: $!\n";
919    my $tree = readXml($FILE);
920    print_header($FILE, \*OUTPUT);
921    parseTree(\*OUTPUT, $tree);
[18371]922    close OUTPUT;
[20909]923    print "CREATED $OUTFILE\n" unless $QUIET_ARG;
[18371]924}
925
926sub keys_merge_translations
927{
928    open INPUT, "<${FILE}" or die;
929    open OUTPUT, ">${OUTFILE}" or die;
930
[18646]931    while (<INPUT>)
932    {
933        if (s/^(\s*)_(\w+=(.*))/$1$2/) 
934        {
[18371]935            my $string = $3;
936
937            print OUTPUT;
938
939            my $non_translated_line = $_;
940
[18646]941            for my $lang (sort keys %po_files_by_lang)
942            {
[18371]943                my $translation = $translations{$lang, $string};
944                next if !$translation;
945
946                $_ = $non_translated_line;
947                s/(\w+)=.*/[$lang]$1=$translation/;
948                print OUTPUT;
949            }
[18646]950        }
951        else
952        {
[18371]953            print OUTPUT;
954        }
955    }
956
957    close OUTPUT;
958    close INPUT;
959}
960
961sub desktop_merge_translations
962{
963    open INPUT, "<${FILE}" or die;
964    open OUTPUT, ">${OUTFILE}" or die;
965
[18646]966    while (<INPUT>)
967    {
968        if (s/^(\s*)_(\w+=(.*))/$1$2/) 
969        {
[18371]970            my $string = $3;
971
972            print OUTPUT;
973
974            my $non_translated_line = $_;
975
[18646]976            for my $lang (sort keys %po_files_by_lang)
977            {
[18371]978                my $translation = $translations{$lang, $string};
979                next if !$translation;
980
981                $_ = $non_translated_line;
982                s/(\w+)=.*/${1}[$lang]=$translation/;
983                print OUTPUT;
984            }
[18646]985        }
986        else
987        {
[18371]988            print OUTPUT;
989        }
990    }
991
992    close OUTPUT;
993    close INPUT;
994}
995
996sub schemas_merge_translations
997{
998    my $source;
999
1000    {
1001       local $/; # slurp mode
1002       open INPUT, "<$FILE" or die "can't open $FILE: $!";
1003       $source = <INPUT>;
1004       close INPUT;
1005    }
1006
1007    open OUTPUT, ">$OUTFILE" or die;
1008
1009    # FIXME: support attribute translations
1010
1011    # Empty nodes never need translation, so unmark all of them.
1012    # For example, <_foo/> is just replaced by <foo/>.
1013    $source =~ s|<\s*_($w+)\s*/>|<$1/>|g;
1014
[18646]1015    while ($source =~ s/
1016                        (.*?)
1017                        (\s+)(<locale\ name="C">(\s*)
[20909]1018                            (<default>\s*(?:<!--[^>]*?-->\s*)?(.*?)\s*<\/default>)?(\s*)
1019                            (<short>\s*(?:<!--[^>]*?-->\s*)?(.*?)\s*<\/short>)?(\s*)
1020                            (<long>\s*(?:<!--[^>]*?-->\s*)?(.*?)\s*<\/long>)?(\s*)
[18646]1021                        <\/locale>)
1022                       //sx)
1023    {
[18371]1024        print OUTPUT $1;
1025
[18646]1026        my $locale_start_spaces = $2 ? $2 : '';
1027        my $default_spaces = $4 ? $4 : '';
1028        my $short_spaces = $7 ? $7 : '';
1029        my $long_spaces = $10 ? $10 : '';
1030        my $locale_end_spaces = $13 ? $13 : '';
1031        my $c_default_block = $3 ? $3 : '';
1032        my $default_string = $6 ? $6 : '';
1033        my $short_string = $9 ? $9 : '';
1034        my $long_string = $12 ? $12 : '';
[18371]1035
[18646]1036        print OUTPUT "$locale_start_spaces$c_default_block";
[18371]1037
[18646]1038        $default_string =~ s/\s+/ /g;
1039        $default_string = entity_decode($default_string);
[18371]1040        $short_string =~ s/\s+/ /g;
1041        $short_string = entity_decode($short_string);
1042        $long_string =~ s/\s+/ /g;
1043        $long_string = entity_decode($long_string);
1044
[18646]1045        for my $lang (sort keys %po_files_by_lang)
1046        {
1047            my $default_translation = $translations{$lang, $default_string};
[18371]1048            my $short_translation = $translations{$lang, $short_string};
1049            my $long_translation  = $translations{$lang, $long_string};
1050
[18646]1051            next if (!$default_translation && !$short_translation &&
1052                     !$long_translation);
[18371]1053
1054            print OUTPUT "\n$locale_start_spaces<locale name=\"$lang\">";
1055
[18646]1056        print OUTPUT "$default_spaces";   
1057
1058        if ($default_translation)
1059        {
1060            $default_translation = entity_encode($default_translation);
1061            print OUTPUT "<default>$default_translation</default>";
1062        }
1063
1064            print OUTPUT "$short_spaces";
1065
[18371]1066            if ($short_translation)
1067            {
[18646]1068                        $short_translation = entity_encode($short_translation);
1069                        print OUTPUT "<short>$short_translation</short>";
[18371]1070            }
1071
[18646]1072            print OUTPUT "$long_spaces";
1073
[18371]1074            if ($long_translation)
1075            {
[18646]1076                        $long_translation = entity_encode($long_translation);
1077                        print OUTPUT "<long>$long_translation</long>";
[18371]1078            }       
1079
1080            print OUTPUT "$locale_end_spaces</locale>";
1081        }
1082    }
1083
1084    print OUTPUT $source;
1085
1086    close OUTPUT;
1087}
[18646]1088
1089sub rfc822deb_merge_translations
1090{
[20909]1091    my %encodings = ();
1092    for my $lang (keys %po_files_by_lang) {
1093        $encodings{$lang} = ($UTF8_ARG ? 'UTF-8' : get_po_encoding($po_files_by_lang{$lang}));
1094    }
1095
[18646]1096    my $source;
1097
1098    $Text::Wrap::huge = 'overflow';
[20909]1099    $Text::Wrap::break = qr/\n|\s(?=\S)/;
[18646]1100
1101    {
1102       local $/; # slurp mode
1103       open INPUT, "<$FILE" or die "can't open $FILE: $!";
1104       $source = <INPUT>;
1105       close INPUT;
1106    }
1107
1108    open OUTPUT, ">${OUTFILE}" or die;
1109
[20909]1110    while ($source =~ /(^|\n+)(_*)([^:\s]+)(:[ \t]*)(.*?)(?=\n[\S\n]|$)/sg)
[18646]1111    {
1112            my $sep = $1;
1113            my $non_translated_line = $3.$4;
1114            my $string = $5;
[20909]1115            my $underscore = length($2);
1116            next if $underscore eq 0 && $non_translated_line =~ /^#/;
[18646]1117            #  Remove [] dummy strings
[20909]1118            my $stripped = $string;
1119            $stripped =~ s/\[\s[^\[\]]*\],/,/g if $underscore eq 2;
1120            $stripped =~ s/\[\s[^\[\]]*\]$//;
1121            $non_translated_line .= $stripped;
[18646]1122
[20909]1123            print OUTPUT $sep.$non_translated_line;
[18646]1124   
[20909]1125            if ($underscore)
1126            {
1127                my @str_list = rfc822deb_split($underscore, $string);
1128
[18646]1129                for my $lang (sort keys %po_files_by_lang)
1130                {
1131                    my $is_translated = 1;
1132                    my $str_translated = '';
1133                    my $first = 1;
1134               
1135                    for my $str (@str_list)
1136                    {
1137                        my $translation = $translations{$lang, $str};
1138                   
1139                        if (!$translation)
1140                        {
1141                            $is_translated = 0;
1142                            last;
1143                        }
1144
1145                        #  $translation may also contain [] dummy
1146                        #  strings, mostly to indicate an empty string
1147                        $translation =~ s/\[\s[^\[\]]*\]$//;
1148                       
1149                        if ($first)
1150                        {
[20909]1151                            if ($underscore eq 2)
1152                            {
1153                                $str_translated .= $translation;
1154                            }
1155                            else
1156                            {
1157                                $str_translated .=
1158                                    Text::Tabs::expand($translation) .
1159                                    "\n";
1160                            }
[18646]1161                        }
1162                        else
1163                        {
[20909]1164                            if ($underscore eq 2)
1165                            {
1166                                $str_translated .= ', ' . $translation;
1167                            }
1168                            else
1169                            {
1170                                $str_translated .= Text::Tabs::expand(
1171                                    Text::Wrap::wrap(' ', ' ', $translation)) .
1172                                    "\n .\n";
1173                            }
[18646]1174                        }
1175                        $first = 0;
1176
1177                        #  To fix some problems with Text::Wrap::wrap
1178                        $str_translated =~ s/(\n )+\n/\n .\n/g;
1179                    }
1180                    next unless $is_translated;
1181
1182                    $str_translated =~ s/\n \.\n$//;
1183                    $str_translated =~ s/\s+$//;
1184
1185                    $_ = $non_translated_line;
[20909]1186                    s/^(\w+):\s*.*/$sep${1}-$lang.$encodings{$lang}: $str_translated/s;
[18646]1187                    print OUTPUT;
1188                }
[20909]1189            }
[18646]1190    }
1191    print OUTPUT "\n";
1192
1193    close OUTPUT;
1194    close INPUT;
1195}
1196
1197sub rfc822deb_split
1198{
1199    # Debian defines a special way to deal with rfc822-style files:
1200    # when a value contain newlines, it consists of
1201    #   1.  a short form (first line)
1202    #   2.  a long description, all lines begin with a space,
1203    #       and paragraphs are separated by a single dot on a line
1204    # This routine returns an array of all paragraphs, and reformat
1205    # them.
[20909]1206    # When first argument is 2, the string is a comma separated list of
1207    # values.
1208    my $type = shift;
[18646]1209    my $text = shift;
[20909]1210    $text =~ s/^[ \t]//mg;
1211    return (split(/, */, $text, 0)) if $type ne 1;
[18646]1212    return ($text) if $text !~ /\n/;
1213
1214    $text =~ s/([^\n]*)\n//;
1215    my @list = ($1);
1216    my $str = '';
1217
1218    for my $line (split (/\n/, $text))
1219    {
1220        chomp $line;
[20909]1221        if ($line =~ /^\.\s*$/)
[18646]1222        {
1223            #  New paragraph
1224            $str =~ s/\s*$//;
1225            push(@list, $str);
1226            $str = '';
1227        }
1228        elsif ($line =~ /^\s/)
1229        {
1230            #  Line which must not be reformatted
1231            $str .= "\n" if length ($str) && $str !~ /\n$/;
[20909]1232            $line =~ s/\s+$//;
[18646]1233            $str .= $line."\n";
1234        }
1235        else
1236        {
1237            #  Continuation line, remove newline
[20909]1238            $str .= " " if length ($str) && $str !~ /\n$/;
[18646]1239            $str .= $line;
1240        }
1241    }
1242
1243    $str =~ s/\s*$//;
1244    push(@list, $str) if length ($str);
1245
1246    return @list;
1247}
1248
Note: See TracBrowser for help on using the repository browser.