source: trunk/third/metacity/intltool-merge.in @ 21543

Revision 21543, 31.3 KB checked in by ghudson, 20 years ago (diff)
This commit was generated by cvs2svn to compensate for changes in r21542, which included commits to RCS files with non-trunk default branches.
Line 
1#!@INTLTOOL_PERL@ -w
2# -*- Mode: perl; indent-tabs-mode: nil; c-basic-offset: 4  -*-
3
4#
5#  The Intltool Message Merger
6#
7#  Copyright (C) 2000, 2003 Free Software Foundation.
8#  Copyright (C) 2000, 2001 Eazel, Inc
9#
10#  Intltool is free software; you can redistribute it and/or
11#  modify it under the terms of the GNU General Public License
12#  version 2 published by the Free Software Foundation.
13#
14#  Intltool is distributed in the hope that it will be useful,
15#  but WITHOUT ANY WARRANTY; without even the implied warranty of
16#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17#  General Public License for more details.
18#
19#  You should have received a copy of the GNU General Public License
20#  along with this program; if not, write to the Free Software
21#  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22#
23#  As a special exception to the GNU General Public License, if you
24#  distribute this file as part of a program that contains a
25#  configuration script generated by Autoconf, you may include it under
26#  the same distribution terms that you use for the rest of that program.
27#
28#  Authors:  Maciej Stachowiak <mjs@noisehavoc.org>
29#            Kenneth Christiansen <kenneth@gnu.org>
30#            Darin Adler <darin@bentspoon.com>
31#
32#  Proper XML UTF-8'ification written by Cyrille Chepelov <chepelov@calixo.net>
33#
34
35## Release information
36my $PROGRAM = "intltool-merge";
37my $PACKAGE = "intltool";
38my $VERSION = "0.31.3";
39
40## Loaded modules
41use strict;
42use Getopt::Long;
43use Text::Wrap;
44use File::Basename;
45
46my $must_end_tag      = -1;
47my $last_depth        = -1;
48my $translation_depth = -1;
49my @tag_stack = ();
50my @entered_tag = ();
51my @translation_strings = ();
52my $leading_space = "";
53
54## Scalars used by the option stuff
55my $HELP_ARG = 0;
56my $VERSION_ARG = 0;
57my $BA_STYLE_ARG = 0;
58my $XML_STYLE_ARG = 0;
59my $KEYS_STYLE_ARG = 0;
60my $DESKTOP_STYLE_ARG = 0;
61my $SCHEMAS_STYLE_ARG = 0;
62my $RFC822DEB_STYLE_ARG = 0;
63my $QUIET_ARG = 0;
64my $PASS_THROUGH_ARG = 0;
65my $UTF8_ARG = 0;
66my $MULTIPLE_OUTPUT = 0;
67my $cache_file;
68
69## Handle options
70GetOptions
71(
72 "help" => \$HELP_ARG,
73 "version" => \$VERSION_ARG,
74 "quiet|q" => \$QUIET_ARG,
75 "oaf-style|o" => \$BA_STYLE_ARG, ## for compatibility
76 "ba-style|b" => \$BA_STYLE_ARG,
77 "xml-style|x" => \$XML_STYLE_ARG,
78 "keys-style|k" => \$KEYS_STYLE_ARG,
79 "desktop-style|d" => \$DESKTOP_STYLE_ARG,
80 "schemas-style|s" => \$SCHEMAS_STYLE_ARG,
81 "rfc822deb-style|r" => \$RFC822DEB_STYLE_ARG,
82 "pass-through|p" => \$PASS_THROUGH_ARG,
83 "utf8|u" => \$UTF8_ARG,
84 "multiple-output|m" => \$MULTIPLE_OUTPUT,
85 "cache|c=s" => \$cache_file
86 ) or &error;
87
88my $PO_DIR;
89my $FILE;
90my $OUTFILE;
91
92my %po_files_by_lang = ();
93my %translations = ();
94my $iconv = $ENV{"ICONV"} || $ENV{"INTLTOOL_ICONV"} || "@INTLTOOL_ICONV@";
95
96# Use this instead of \w for XML files to handle more possible characters.
97my $w = "[-A-Za-z0-9._:]";
98
99# XML quoted string contents
100my $q = "[^\\\"]*";
101
102## Check for options.
103
104if ($VERSION_ARG)
105{
106        &print_version;
107}
108elsif ($HELP_ARG)
109{
110        &print_help;
111}
112elsif ($BA_STYLE_ARG && @ARGV > 2)
113{
114        &preparation;
115        &print_message;
116        &ba_merge_translations;
117        &finalize;
118}
119elsif ($XML_STYLE_ARG && @ARGV > 2)
120{
121        &utf8_sanity_check;
122        &preparation;
123        &print_message;
124       
125    &xml_merge_output;
126
127        &finalize;
128}
129elsif ($KEYS_STYLE_ARG && @ARGV > 2)
130{
131        &utf8_sanity_check;
132        &preparation;
133        &print_message;
134        &keys_merge_translations;
135        &finalize;
136}
137elsif ($DESKTOP_STYLE_ARG && @ARGV > 2)
138{
139        &preparation;
140        &print_message;
141        &desktop_merge_translations;
142        &finalize;
143}
144elsif ($SCHEMAS_STYLE_ARG && @ARGV > 2)
145{
146        &preparation;
147        &print_message;
148        &schemas_merge_translations;
149        &finalize;
150}
151elsif ($RFC822DEB_STYLE_ARG && @ARGV > 2)
152{
153        &preparation;
154        &print_message;
155        &rfc822deb_merge_translations;
156        &finalize;
157}
158else
159{
160        &print_help;
161}
162
163exit;
164
165## Sub for printing release information
166sub print_version
167{
168    print <<_EOF_;
169${PROGRAM} (${PACKAGE}) ${VERSION}
170Written by Maciej Stachowiak, Darin Adler and Kenneth Christiansen.
171
172Copyright (C) 2000-2003 Free Software Foundation, Inc.
173Copyright (C) 2000-2001 Eazel, Inc.
174This is free software; see the source for copying conditions.  There is NO
175warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
176_EOF_
177    exit;
178}
179
180## Sub for printing usage information
181sub print_help
182{
183    print <<_EOF_;
184Usage: ${PROGRAM} [OPTION]... PO_DIRECTORY FILENAME OUTPUT_FILE
185Generates an output file that includes some localized attributes from an
186untranslated source file.
187
188Mandatory options: (exactly one must be specified)
189  -b, --ba-style         includes translations in the bonobo-activation style
190  -d, --desktop-style    includes translations in the desktop style
191  -k, --keys-style       includes translations in the keys style
192  -s, --schemas-style    includes translations in the schemas style
193  -r, --rfc822deb-style  includes translations in the RFC822 style
194  -x, --xml-style        includes translations in the standard xml style
195
196Other options:
197  -u, --utf8             convert all strings to UTF-8 before merging
198  -p, --pass-through     use strings as found in .po files, without
199                         conversion (STRONGLY unrecommended with -x)
200  -m, --multiple-output  output one localized file per locale, instead of
201                         a single file containing all localized elements
202  -c, --cache=FILE       specify cache file name
203                         (usually \$top_builddir/po/.intltool-merge-cache)
204  -q, --quiet            suppress most messages
205      --help             display this help and exit
206      --version          output version information and exit
207
208Report bugs to http://bugzilla.gnome.org/ (product name "$PACKAGE")
209or send email to <xml-i18n-tools\@gnome.org>.
210_EOF_
211    exit;
212}
213
214
215## Sub for printing error messages
216sub print_error
217{
218    print STDERR "Try `${PROGRAM} --help' for more information.\n";
219    exit;
220}
221
222
223sub print_message
224{
225    print "Merging translations into $OUTFILE.\n" unless $QUIET_ARG;
226}
227
228
229sub preparation
230{
231    $PO_DIR = $ARGV[0];
232    $FILE = $ARGV[1];
233    $OUTFILE = $ARGV[2];
234
235    &gather_po_files;
236    &get_translation_database;
237}
238
239# General-purpose code for looking up translations in .po files
240
241sub po_file2lang
242{
243    my ($tmp) = @_;
244    $tmp =~ s/^.*\/(.*)\.po$/$1/;
245    return $tmp;
246}
247
248sub gather_po_files
249{
250    for my $po_file (glob "$PO_DIR/*.po") {
251        $po_files_by_lang{po_file2lang($po_file)} = $po_file;
252    }
253}
254
255sub get_local_charset
256{
257    my ($encoding) = @_;
258    my $alias_file = $ENV{"G_CHARSET_ALIAS"} || "/opt/gnome2/lib/charset.alias";
259
260    # seek character encoding aliases in charset.alias (glib)
261
262    if (open CHARSET_ALIAS, $alias_file)
263    {
264        while (<CHARSET_ALIAS>)
265        {
266            next if /^\#/;
267            return $1 if (/^\s*([-._a-zA-Z0-9]+)\s+$encoding\b/i)
268        }
269
270        close CHARSET_ALIAS;
271    }
272
273    # if not found, return input string
274
275    return $encoding;
276}
277
278sub get_po_encoding
279{
280    my ($in_po_file) = @_;
281    my $encoding = "";
282
283    open IN_PO_FILE, $in_po_file or die;
284    while (<IN_PO_FILE>)
285    {
286        ## example: "Content-Type: text/plain; charset=ISO-8859-1\n"
287        if (/Content-Type\:.*charset=([-a-zA-Z0-9]+)\\n/)
288        {
289            $encoding = $1;
290            last;
291        }
292    }
293    close IN_PO_FILE;
294
295    if (!$encoding)
296    {
297        print STDERR "Warning: no encoding found in $in_po_file. Assuming ISO-8859-1\n" unless $QUIET_ARG;
298        $encoding = "ISO-8859-1";
299    }
300
301    system ("$iconv -f $encoding -t UTF-8 </dev/null 2>/dev/null");
302    if ($?) {
303        $encoding = get_local_charset($encoding);
304    }
305
306    return $encoding
307}
308
309sub utf8_sanity_check
310{
311    if (!$UTF8_ARG)
312    {
313        if (!$PASS_THROUGH_ARG)
314        {
315            $PASS_THROUGH_ARG="1";
316        }
317    }
318}
319
320sub get_translation_database
321{
322    if ($cache_file) {
323        &get_cached_translation_database;
324    } else {
325        &create_translation_database;
326    }
327}
328
329sub get_newest_po_age
330{
331    my $newest_age;
332
333    foreach my $file (values %po_files_by_lang)
334    {
335        my $file_age = -M $file;
336        $newest_age = $file_age if !$newest_age || $file_age < $newest_age;
337    }
338
339    $newest_age = 0 if !$newest_age;
340
341    return $newest_age;
342}
343
344sub create_cache
345{
346    print "Generating and caching the translation database\n" unless $QUIET_ARG;
347
348    &create_translation_database;
349
350    open CACHE, ">$cache_file" || die;
351    print CACHE join "\x01", %translations;
352    close CACHE;
353}
354
355sub load_cache
356{
357    print "Found cached translation database\n" unless $QUIET_ARG;
358
359    my $contents;
360    open CACHE, "<$cache_file" || die;
361    {
362        local $/;
363        $contents = <CACHE>;
364    }
365    close CACHE;
366    %translations = split "\x01", $contents;
367}
368
369sub get_cached_translation_database
370{
371    my $cache_file_age = -M $cache_file;
372    if (defined $cache_file_age)
373    {
374        if ($cache_file_age <= &get_newest_po_age)
375        {
376            &load_cache;
377            return;
378        }
379        print "Found too-old cached translation database\n" unless $QUIET_ARG;
380    }
381
382    &create_cache;
383}
384
385sub create_translation_database
386{
387    for my $lang (keys %po_files_by_lang)
388    {
389        my $po_file = $po_files_by_lang{$lang};
390
391        if ($UTF8_ARG)
392        {
393            my $encoding = get_po_encoding ($po_file);
394
395            if (lc $encoding eq "utf-8")
396            {
397                open PO_FILE, "<$po_file";     
398            }
399            else
400            {
401                print STDERR "WARNING: $po_file is not in UTF-8 but $encoding, converting...\n" unless $QUIET_ARG;;
402
403                open PO_FILE, "$iconv -f $encoding -t UTF-8 $po_file|";
404            }
405        }
406        else
407        {
408            open PO_FILE, "<$po_file"; 
409        }
410
411        my $nextfuzzy = 0;
412        my $inmsgid = 0;
413        my $inmsgstr = 0;
414        my $msgid = "";
415        my $msgstr = "";
416
417        while (<PO_FILE>)
418        {
419            $nextfuzzy = 1 if /^#, fuzzy/;
420       
421            if (/^msgid "((\\.|[^\\])*)"/ )
422            {
423                $translations{$lang, $msgid} = $msgstr if $inmsgstr && $msgid && $msgstr;
424                $msgid = "";
425                $msgstr = "";
426
427                if ($nextfuzzy) {
428                    $inmsgid = 0;
429                } else {
430                    $msgid = unescape_po_string($1);
431                    $inmsgid = 1;
432                }
433                $inmsgstr = 0;
434                $nextfuzzy = 0;
435            }
436
437            if (/^msgstr "((\\.|[^\\])*)"/)
438            {
439                $msgstr = unescape_po_string($1);
440                $inmsgstr = 1;
441                $inmsgid = 0;
442            }
443
444            if (/^"((\\.|[^\\])*)"/)
445            {
446                $msgid .= unescape_po_string($1) if $inmsgid;
447                $msgstr .= unescape_po_string($1) if $inmsgstr;
448            }
449        }
450        $translations{$lang, $msgid} = $msgstr if $inmsgstr && $msgid && $msgstr;
451    }
452}
453
454sub finalize
455{
456}
457
458sub unescape_one_sequence
459{
460    my ($sequence) = @_;
461
462    return "\\" if $sequence eq "\\\\";
463    return "\"" if $sequence eq "\\\"";
464    return "\n" if $sequence eq "\\n";
465    return "\r" if $sequence eq "\\r";
466    return "\t" if $sequence eq "\\t";
467    return "\b" if $sequence eq "\\b";
468    return "\f" if $sequence eq "\\f";
469    return "\a" if $sequence eq "\\a";
470    return chr(11) if $sequence eq "\\v"; # vertical tab, see ascii(7)
471
472    return chr(hex($1)) if ($sequence =~ /\\x([0-9a-fA-F]{2})/);
473    return chr(oct($1)) if ($sequence =~ /\\([0-7]{3})/);
474
475    # FIXME: Is \0 supported as well? Kenneth and Rodney don't want it, see bug #48489
476
477    return $sequence;
478}
479
480sub unescape_po_string
481{
482    my ($string) = @_;
483
484    $string =~ s/(\\x[0-9a-fA-F]{2}|\\[0-7]{3}|\\.)/unescape_one_sequence($1)/eg;
485
486    return $string;
487}
488
489## NOTE: deal with < - &lt; but not > - &gt;  because it seems its ok to have
490## > in the entity. For further info please look at #84738.
491sub entity_decode
492{
493    local ($_) = @_;
494
495    s/&apos;/'/g; # '
496    s/&quot;/"/g; # "
497    s/&amp;/&/g;
498    s/&lt;/</g;
499
500    return $_;
501}
502 
503# entity_encode: (string)
504#
505# Encode the given string to XML format (encode '<' etc). It also
506# encodes high bit if not in UTF-8 mode.
507
508sub entity_encode
509{
510    my ($pre_encoded) = @_;
511
512    my @list_of_chars = unpack ('C*', $pre_encoded);
513
514    if ($PASS_THROUGH_ARG)
515    {
516        return join ('', map (&entity_encode_int_even_high_bit, @list_of_chars));
517    }
518    else
519    {
520        # with UTF-8 we only encode minimalistic
521        return join ('', map (&entity_encode_int_minimalist, @list_of_chars));
522    }
523}
524
525sub entity_encode_int_minimalist
526{
527    return "&quot;" if $_ == 34;
528    return "&amp;" if $_ == 38;
529    return "&apos;" if $_ == 39;
530    return "&lt;" if $_ == 60;
531    return chr $_;
532}
533
534sub entity_encode_int_even_high_bit
535{
536    if ($_ > 127 || $_ == 34 || $_ == 38 || $_ == 39 || $_ == 60)
537    {
538        # the ($_ > 127) should probably be removed
539        return "&#" . $_ . ";";
540    }
541    else
542    {
543        return chr $_;
544    }
545}
546
547sub entity_encoded_translation
548{
549    my ($lang, $string) = @_;
550
551    my $translation = $translations{$lang, $string};
552    return $string if !$translation;
553    return entity_encode ($translation);
554}
555
556## XML (bonobo-activation specific) merge code
557
558sub ba_merge_translations
559{
560    my $source;
561
562    {
563       local $/; # slurp mode
564       open INPUT, "<$FILE" or die "can't open $FILE: $!";
565       $source = <INPUT>;
566       close INPUT;
567    }
568
569    open OUTPUT, ">$OUTFILE" or die "can't open $OUTFILE: $!";
570
571    while ($source =~ s|^(.*?)([ \t]*<\s*$w+\s+($w+\s*=\s*"$q"\s*)+/?>)([ \t]*\n)?||s)
572    {
573        print OUTPUT $1;
574
575        my $node = $2 . "\n";
576
577        my @strings = ();
578        $_ = $node;
579        while (s/(\s)_($w+\s*=\s*"($q)")/$1$2/s) {
580             push @strings, entity_decode($3);
581        }
582        print OUTPUT;
583
584        my %langs;
585        for my $string (@strings)
586        {
587            for my $lang (keys %po_files_by_lang)
588            {
589                $langs{$lang} = 1 if $translations{$lang, $string};
590            }
591        }
592       
593        for my $lang (sort keys %langs)
594        {
595            $_ = $node;
596            s/(\sname\s*=\s*)"($q)"/$1"$2-$lang"/s;
597            s/(\s)_($w+\s*=\s*")($q)"/$1 . $2 . entity_encoded_translation($lang, $3) . '"'/seg;
598            print OUTPUT;
599        }
600    }
601
602    print OUTPUT $source;
603
604    close OUTPUT;
605}
606
607
608## XML (non-bonobo-activation) merge code
609
610
611# Process tag attributes
612#   Only parameter is a HASH containing attributes -> values mapping
613sub getAttributeString
614{
615    my $sub = shift;
616    my $do_translate = shift || 0;
617    my $language = shift || "";
618    my $result = "";
619    my $translate = shift;
620    foreach my $e (reverse(sort(keys %{ $sub }))) {
621        my $key    = $e;
622        my $string = $sub->{$e};
623        my $quote = '"';
624       
625        $string =~ s/^[\s]+//;
626        $string =~ s/[\s]+$//;
627       
628        if ($string =~ /^'.*'$/)
629        {
630            $quote = "'";
631        }
632        $string =~ s/^['"]//g;
633        $string =~ s/['"]$//g;
634
635        if ($do_translate && $key =~ /^_/) {
636            $key =~ s|^_||g;
637            if ($language) {
638               
639                # Handle translation
640                #
641                my $decode_string = entity_decode($string);
642                my $translation = $translations{$language, $decode_string};
643                if ($translation) {
644                    $translation = entity_encode($translation);
645                    $string = $translation;
646                }
647            }
648        }
649       
650        $result .= " $key=$quote$string$quote";
651    }
652    return $result;
653}
654
655# Returns a translatable string from XML node, it works on contents of every node in XML::Parser tree
656#   doesn't support nesting of translatable tags (i.e. <_blah>this <_doh>doesn't</_doh> work</_blah> -- besides
657#   can you define the correct semantics for this?)
658#
659
660sub getXMLstring
661{
662    my $ref = shift;
663    my @list = @{ $ref };
664    my $result = "";
665
666    my $count = scalar(@list);
667    my $attrs = $list[0];
668    my $index = 1;
669
670    while ($index < $count) {
671        my $type = $list[$index];
672        my $content = $list[$index+1];
673        if (! $type ) {
674            # We've got CDATA
675            if ($content) {
676                # lets strip the whitespace here, and *ONLY* here
677                $content =~ s/\s+/ /gs if (!((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?preserve["']?$/)));
678                $result .= ($content);
679            } else {
680                #print "no cdata content when expected it\n"; # is this possible, is this ok?
681                # what to do if this happens?
682                # Did I mention that I hate XML::Parser tree style?
683            }
684        } else {
685            # We've got another element
686            $result .= "<$type";
687            $result .= getAttributeString(@{$content}[0], 0); # no nested translatable elements
688            if ($content) {
689                my $subresult = getXMLstring($content);
690                if ($subresult) {
691                    $result .= ">".$subresult . "</$type>";
692                } else {
693                    $result .= "/>";
694                }
695            } else {
696                $result .= "/>";
697            }
698        }
699        $index += 2;
700    }
701    return $result;
702}
703
704sub traverse
705{
706    my $fh = shift;
707    my $nodename = shift;
708    my $content = shift;
709    my $language = shift || "";
710
711    if (!$nodename) {
712        if ($content =~ /^[\s]*$/) {
713            $leading_space .= $content;
714        }
715        print $fh $content;
716    } else {
717        # element
718        my @all = @{ $content };
719        my $attrs = shift @all;
720        my $translate = 0;
721        my $outattr = getAttributeString($attrs, 1, $language, \$translate);
722
723        if ($nodename =~ /^_/) {
724            $translate = 1;
725            $nodename =~ s/^_//;
726        }
727        my $lookup = '';
728        print $fh "<$nodename$outattr";
729        if ($translate) {
730            $lookup = getXMLstring($content);
731            if (!((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?preserve["']?$/))) {
732                $lookup =~ s/^\s+//s;
733                $lookup =~ s/\s+$//s;
734            }
735
736            if ($lookup || $translate == 2) {
737                my $translation = $translations{$language, $lookup};
738                if ($MULTIPLE_OUTPUT && ($translation || $translate == 2)) {
739                    $translation = $lookup if (!$translation);
740                    print $fh " xml:lang=\"", $language, "\"";
741                    print $fh ">", $translation, "</$nodename>";
742                    return; # this means there will be no same translation with xml:lang="$language"...
743                            # if we want them both, just remove this "return"
744                } else {
745                    print $fh ">$lookup</$nodename>";
746                }
747            } else {
748                print $fh "/>";
749            }
750
751            for my $lang (sort keys %po_files_by_lang) {
752                    if ($MULTIPLE_OUTPUT && $lang ne "$language") {
753                        next;
754                    }
755                    if ($lang) {
756                        # Handle translation
757                        #
758                        my $translate = 0;
759                        my $localattrs = getAttributeString($attrs, 1, $lang, \$translate);
760                        my $translation = $translations{$lang, $lookup};
761                        if ($translate && !$translation) {
762                            $translation = $lookup;
763                        }
764
765                        if ($translation || $translate) {
766                            $translation = ($translation);
767                            print $fh "\n";
768                            $leading_space =~ s/.*\n//g;
769                            print $fh $leading_space;
770                            print $fh "<", $nodename, " xml:lang=\"", $lang, "\"", $localattrs;
771                            print $fh ">", $translation , "</$nodename>";
772                        }
773                    }
774            }
775
776        } else {
777            my $count = scalar(@all);
778            if ($count > 0) {
779                print $fh ">";
780            } else {
781                print $fh "/>";
782            }
783            my $index = 0;
784            while ($index < $count) {
785                my $type = $all[$index];
786                my $rest = $all[$index+1];
787                traverse($fh, $type, $rest, $language);
788                $index += 2;
789            }
790            if ($count > 0) {
791                print $fh "</$nodename>";
792            }
793        }
794    }
795}
796
797sub intltool_tree_char
798{
799    my $expat = shift;
800    my $text  = shift;
801    my $clist = $expat->{Curlist};
802    my $pos   = $#$clist;
803
804    # Use original_string so that we retain escaped entities
805    # in CDATA sections.
806    #
807    if ($pos > 0 and $clist->[$pos - 1] eq '0') {
808        $clist->[$pos] .= $expat->original_string();
809    } else {
810        push @$clist, 0 => $expat->original_string();
811    }
812}
813
814sub intltool_tree_start
815{
816    my $expat    = shift;
817    my $tag      = shift;
818    my @origlist = ();
819
820    # Use original_string so that we retain escaped entities
821    # in attribute values.  We must convert the string to an
822    # @origlist array to conform to the structure of the Tree
823    # Style.
824    #
825    my @original_array = split /\x/, $expat->original_string();
826    my $source         = $expat->original_string();
827
828    # Remove leading tag.
829    #
830    $source =~ s|^\s*<\s*(\S+)||s;
831
832    # Grab attribute key/value pairs and push onto @origlist array.
833    #
834    while ($source)
835    {
836       if ($source =~ /^\s*([\w:-]+)\s*[=]\s*["]/)
837       {
838           $source =~ s|^\s*([\w:-]+)\s*[=]\s*["]([^"]*)["]||s;
839           push @origlist, $1;
840           push @origlist, '"' . $2 . '"';
841       }
842       elsif ($source =~ /^\s*([\w:-]+)\s*[=]\s*[']/)
843       {
844           $source =~ s|^\s*([\w:-]+)\s*[=]\s*[']([^']*)[']||s;
845           push @origlist, $1;
846           push @origlist, "'" . $2 . "'";
847       }
848       else
849       {
850           last;
851       }
852    }
853
854    my $ol = [ { @origlist } ];
855
856    push @{ $expat->{Lists} }, $expat->{Curlist};
857    push @{ $expat->{Curlist} }, $tag => $ol;
858    $expat->{Curlist} = $ol;
859}
860
861sub readXml
862{
863    my $filename = shift || return;
864    if(!-f $filename) {
865        die "ERROR Cannot find filename: $filename\n";
866    }
867
868    my $ret = eval 'require XML::Parser';
869    if(!$ret) {
870        die "You must have XML::Parser installed to run $0\n\n";
871    }
872    my $xp = new XML::Parser(Style => 'Tree');
873    $xp->setHandlers(Char => \&intltool_tree_char);
874    $xp->setHandlers(Start => \&intltool_tree_start);
875    my $tree = $xp->parsefile($filename);
876
877# <foo><head id="a">Hello <em>there</em></head><bar>Howdy<ref/></bar>do</foo>
878# would be:
879# [foo, [{}, head, [{id => "a"}, 0, "Hello ",  em, [{}, 0, "there"]], bar, [{},
880# 0, "Howdy",  ref, [{}]], 0, "do" ] ]
881
882    return $tree;
883}
884
885sub print_header
886{
887    my $infile = shift;
888    my $fh = shift;
889    my $source;
890
891    if(!-f $infile) {
892        die "ERROR Cannot find filename: $infile\n";
893    }
894
895    print $fh qq{<?xml version="1.0" encoding="UTF-8"?>\n};
896    {
897        local $/;
898        open DOCINPUT, "<${FILE}" or die;
899        $source = <DOCINPUT>;
900        close DOCINPUT;
901    }
902    if ($source =~ /(<!DOCTYPE.*\[.*\]\s*>)/s)
903    {
904        print $fh "$1\n";
905    }
906    elsif ($source =~ /(<!DOCTYPE[^>]*>)/s)
907    {
908        print $fh "$1\n";
909    }
910}
911
912sub parseTree
913{
914    my $fh        = shift;
915    my $ref       = shift;
916    my $language  = shift || "";
917
918    my $name = shift @{ $ref };
919    my $cont = shift @{ $ref };
920    traverse($fh, $name, $cont, $language);
921}
922
923sub xml_merge_output
924{
925    my $source;
926
927    if ($MULTIPLE_OUTPUT) {
928        for my $lang (sort keys %po_files_by_lang) {
929            if ( ! -e $lang ) {
930                mkdir $lang or die "Cannot create subdirectory $lang: $!\n";
931            }
932            open OUTPUT, ">$lang/$OUTFILE" or die "Cannot open $lang/$OUTFILE: $!\n";
933            my $tree = readXml($FILE);
934            print_header($FILE, \*OUTPUT);
935            parseTree(\*OUTPUT, $tree, $lang);
936            close OUTPUT;
937            print "CREATED $lang/$OUTFILE\n" unless $QUIET_ARG;
938        }
939    }
940    open OUTPUT, ">$OUTFILE" or die "Cannot open $OUTFILE: $!\n";
941    my $tree = readXml($FILE);
942    print_header($FILE, \*OUTPUT);
943    parseTree(\*OUTPUT, $tree);
944    close OUTPUT;
945    print "CREATED $OUTFILE\n" unless $QUIET_ARG;
946}
947
948sub keys_merge_translations
949{
950    open INPUT, "<${FILE}" or die;
951    open OUTPUT, ">${OUTFILE}" or die;
952
953    while (<INPUT>)
954    {
955        if (s/^(\s*)_(\w+=(.*))/$1$2/) 
956        {
957            my $string = $3;
958
959            print OUTPUT;
960
961            my $non_translated_line = $_;
962
963            for my $lang (sort keys %po_files_by_lang)
964            {
965                my $translation = $translations{$lang, $string};
966                next if !$translation;
967
968                $_ = $non_translated_line;
969                s/(\w+)=.*/[$lang]$1=$translation/;
970                print OUTPUT;
971            }
972        }
973        else
974        {
975            print OUTPUT;
976        }
977    }
978
979    close OUTPUT;
980    close INPUT;
981}
982
983sub desktop_merge_translations
984{
985    open INPUT, "<${FILE}" or die;
986    open OUTPUT, ">${OUTFILE}" or die;
987
988    while (<INPUT>)
989    {
990        if (s/^(\s*)_(\w+=(.*))/$1$2/) 
991        {
992            my $string = $3;
993
994            print OUTPUT;
995
996            my $non_translated_line = $_;
997
998            for my $lang (sort keys %po_files_by_lang)
999            {
1000                my $translation = $translations{$lang, $string};
1001                next if !$translation;
1002
1003                $_ = $non_translated_line;
1004                s/(\w+)=.*/${1}[$lang]=$translation/;
1005                print OUTPUT;
1006            }
1007        }
1008        else
1009        {
1010            print OUTPUT;
1011        }
1012    }
1013
1014    close OUTPUT;
1015    close INPUT;
1016}
1017
1018sub schemas_merge_translations
1019{
1020    my $source;
1021
1022    {
1023       local $/; # slurp mode
1024       open INPUT, "<$FILE" or die "can't open $FILE: $!";
1025       $source = <INPUT>;
1026       close INPUT;
1027    }
1028
1029    open OUTPUT, ">$OUTFILE" or die;
1030
1031    # FIXME: support attribute translations
1032
1033    # Empty nodes never need translation, so unmark all of them.
1034    # For example, <_foo/> is just replaced by <foo/>.
1035    $source =~ s|<\s*_($w+)\s*/>|<$1/>|g;
1036
1037    while ($source =~ s/
1038                        (.*?)
1039                        (\s+)(<locale\ name="C">(\s*)
1040                            (<default>\s*(?:<!--[^>]*?-->\s*)?(.*?)\s*<\/default>)?(\s*)
1041                            (<short>\s*(?:<!--[^>]*?-->\s*)?(.*?)\s*<\/short>)?(\s*)
1042                            (<long>\s*(?:<!--[^>]*?-->\s*)?(.*?)\s*<\/long>)?(\s*)
1043                        <\/locale>)
1044                       //sx)
1045    {
1046        print OUTPUT $1;
1047
1048        my $locale_start_spaces = $2 ? $2 : '';
1049        my $default_spaces = $4 ? $4 : '';
1050        my $short_spaces = $7 ? $7 : '';
1051        my $long_spaces = $10 ? $10 : '';
1052        my $locale_end_spaces = $13 ? $13 : '';
1053        my $c_default_block = $3 ? $3 : '';
1054        my $default_string = $6 ? $6 : '';
1055        my $short_string = $9 ? $9 : '';
1056        my $long_string = $12 ? $12 : '';
1057
1058        print OUTPUT "$locale_start_spaces$c_default_block";
1059
1060        $default_string =~ s/\s+/ /g;
1061        $default_string = entity_decode($default_string);
1062        $short_string =~ s/\s+/ /g;
1063        $short_string = entity_decode($short_string);
1064        $long_string =~ s/\s+/ /g;
1065        $long_string = entity_decode($long_string);
1066
1067        for my $lang (sort keys %po_files_by_lang)
1068        {
1069            my $default_translation = $translations{$lang, $default_string};
1070            my $short_translation = $translations{$lang, $short_string};
1071            my $long_translation  = $translations{$lang, $long_string};
1072
1073            next if (!$default_translation && !$short_translation &&
1074                     !$long_translation);
1075
1076            print OUTPUT "\n$locale_start_spaces<locale name=\"$lang\">";
1077
1078        print OUTPUT "$default_spaces";   
1079
1080        if ($default_translation)
1081        {
1082            $default_translation = entity_encode($default_translation);
1083            print OUTPUT "<default>$default_translation</default>";
1084        }
1085
1086            print OUTPUT "$short_spaces";
1087
1088            if ($short_translation)
1089            {
1090                        $short_translation = entity_encode($short_translation);
1091                        print OUTPUT "<short>$short_translation</short>";
1092            }
1093
1094            print OUTPUT "$long_spaces";
1095
1096            if ($long_translation)
1097            {
1098                        $long_translation = entity_encode($long_translation);
1099                        print OUTPUT "<long>$long_translation</long>";
1100            }       
1101
1102            print OUTPUT "$locale_end_spaces</locale>";
1103        }
1104    }
1105
1106    print OUTPUT $source;
1107
1108    close OUTPUT;
1109}
1110
1111sub rfc822deb_merge_translations
1112{
1113    my %encodings = ();
1114    for my $lang (keys %po_files_by_lang) {
1115        $encodings{$lang} = ($UTF8_ARG ? 'UTF-8' : get_po_encoding($po_files_by_lang{$lang}));
1116    }
1117
1118    my $source;
1119
1120    $Text::Wrap::huge = 'overflow';
1121    $Text::Wrap::break = qr/\n|\s(?=\S)/;
1122
1123    {
1124       local $/; # slurp mode
1125       open INPUT, "<$FILE" or die "can't open $FILE: $!";
1126       $source = <INPUT>;
1127       close INPUT;
1128    }
1129
1130    open OUTPUT, ">${OUTFILE}" or die;
1131
1132    while ($source =~ /(^|\n+)(_*)([^:\s]+)(:[ \t]*)(.*?)(?=\n[\S\n]|$)/sg)
1133    {
1134            my $sep = $1;
1135            my $non_translated_line = $3.$4;
1136            my $string = $5;
1137            my $underscore = length($2);
1138            next if $underscore eq 0 && $non_translated_line =~ /^#/;
1139            #  Remove [] dummy strings
1140            my $stripped = $string;
1141            $stripped =~ s/\[\s[^\[\]]*\],/,/g if $underscore eq 2;
1142            $stripped =~ s/\[\s[^\[\]]*\]$//;
1143            $non_translated_line .= $stripped;
1144
1145            print OUTPUT $sep.$non_translated_line;
1146   
1147            if ($underscore)
1148            {
1149                my @str_list = rfc822deb_split($underscore, $string);
1150
1151                for my $lang (sort keys %po_files_by_lang)
1152                {
1153                    my $is_translated = 1;
1154                    my $str_translated = '';
1155                    my $first = 1;
1156               
1157                    for my $str (@str_list)
1158                    {
1159                        my $translation = $translations{$lang, $str};
1160                   
1161                        if (!$translation)
1162                        {
1163                            $is_translated = 0;
1164                            last;
1165                        }
1166
1167                        #  $translation may also contain [] dummy
1168                        #  strings, mostly to indicate an empty string
1169                        $translation =~ s/\[\s[^\[\]]*\]$//;
1170                       
1171                        if ($first)
1172                        {
1173                            if ($underscore eq 2)
1174                            {
1175                                $str_translated .= $translation;
1176                            }
1177                            else
1178                            {
1179                                $str_translated .=
1180                                    Text::Tabs::expand($translation) .
1181                                    "\n";
1182                            }
1183                        }
1184                        else
1185                        {
1186                            if ($underscore eq 2)
1187                            {
1188                                $str_translated .= ', ' . $translation;
1189                            }
1190                            else
1191                            {
1192                                $str_translated .= Text::Tabs::expand(
1193                                    Text::Wrap::wrap(' ', ' ', $translation)) .
1194                                    "\n .\n";
1195                            }
1196                        }
1197                        $first = 0;
1198
1199                        #  To fix some problems with Text::Wrap::wrap
1200                        $str_translated =~ s/(\n )+\n/\n .\n/g;
1201                    }
1202                    next unless $is_translated;
1203
1204                    $str_translated =~ s/\n \.\n$//;
1205                    $str_translated =~ s/\s+$//;
1206
1207                    $_ = $non_translated_line;
1208                    s/^(\w+):\s*.*/$sep${1}-$lang.$encodings{$lang}: $str_translated/s;
1209                    print OUTPUT;
1210                }
1211            }
1212    }
1213    print OUTPUT "\n";
1214
1215    close OUTPUT;
1216    close INPUT;
1217}
1218
1219sub rfc822deb_split
1220{
1221    # Debian defines a special way to deal with rfc822-style files:
1222    # when a value contain newlines, it consists of
1223    #   1.  a short form (first line)
1224    #   2.  a long description, all lines begin with a space,
1225    #       and paragraphs are separated by a single dot on a line
1226    # This routine returns an array of all paragraphs, and reformat
1227    # them.
1228    # When first argument is 2, the string is a comma separated list of
1229    # values.
1230    my $type = shift;
1231    my $text = shift;
1232    $text =~ s/^[ \t]//mg;
1233    return (split(/, */, $text, 0)) if $type ne 1;
1234    return ($text) if $text !~ /\n/;
1235
1236    $text =~ s/([^\n]*)\n//;
1237    my @list = ($1);
1238    my $str = '';
1239
1240    for my $line (split (/\n/, $text))
1241    {
1242        chomp $line;
1243        if ($line =~ /^\.\s*$/)
1244        {
1245            #  New paragraph
1246            $str =~ s/\s*$//;
1247            push(@list, $str);
1248            $str = '';
1249        }
1250        elsif ($line =~ /^\s/)
1251        {
1252            #  Line which must not be reformatted
1253            $str .= "\n" if length ($str) && $str !~ /\n$/;
1254            $line =~ s/\s+$//;
1255            $str .= $line."\n";
1256        }
1257        else
1258        {
1259            #  Continuation line, remove newline
1260            $str .= " " if length ($str) && $str !~ /\n$/;
1261            $str .= $line;
1262        }
1263    }
1264
1265    $str =~ s/\s*$//;
1266    push(@list, $str) if length ($str);
1267
1268    return @list;
1269}
1270
Note: See TracBrowser for help on using the repository browser.