source: trunk/third/gtkhtml3/intltool-merge.in @ 21116

Revision 21116, 30.4 KB checked in by ghudson, 20 years ago (diff)
This commit was generated by cvs2svn to compensate for changes in r21115, which included commits to RCS files with non-trunk default branches.
Line 
1#!@INTLTOOL_PERL@ -w
2# -*- Mode: perl; indent-tabs-mode: nil; c-basic-offset: 4  -*-
3
4#
5#  The Intltool Message Merger
6#
7#  Copyright (C) 2000, 2003 Free Software Foundation.
8#  Copyright (C) 2000, 2001 Eazel, Inc
9#
10#  Intltool is free software; you can redistribute it and/or
11#  modify it under the terms of the GNU General Public License
12#  version 2 published by the Free Software Foundation.
13#
14#  Intltool is distributed in the hope that it will be useful,
15#  but WITHOUT ANY WARRANTY; without even the implied warranty of
16#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17#  General Public License for more details.
18#
19#  You should have received a copy of the GNU General Public License
20#  along with this program; if not, write to the Free Software
21#  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22#
23#  As a special exception to the GNU General Public License, if you
24#  distribute this file as part of a program that contains a
25#  configuration script generated by Autoconf, you may include it under
26#  the same distribution terms that you use for the rest of that program.
27#
28#  Authors:  Maciej Stachowiak <mjs@noisehavoc.org>
29#            Kenneth Christiansen <kenneth@gnu.org>
30#            Darin Adler <darin@bentspoon.com>
31#
32#  Proper XML UTF-8'ification written by Cyrille Chepelov <chepelov@calixo.net>
33#
34
35## Release information
36my $PROGRAM = "intltool-merge";
37my $PACKAGE = "intltool";
38my $VERSION = "0.31.1";
39
40## Loaded modules
41use strict;
42use Getopt::Long;
43use Text::Wrap;
44use File::Basename;
45
46my $must_end_tag      = -1;
47my $last_depth        = -1;
48my $translation_depth = -1;
49my @tag_stack = ();
50my @entered_tag = ();
51my @translation_strings = ();
52my $leading_space = "";
53
54## Scalars used by the option stuff
55my $HELP_ARG = 0;
56my $VERSION_ARG = 0;
57my $BA_STYLE_ARG = 0;
58my $XML_STYLE_ARG = 0;
59my $KEYS_STYLE_ARG = 0;
60my $DESKTOP_STYLE_ARG = 0;
61my $SCHEMAS_STYLE_ARG = 0;
62my $RFC822DEB_STYLE_ARG = 0;
63my $QUIET_ARG = 0;
64my $PASS_THROUGH_ARG = 0;
65my $UTF8_ARG = 0;
66my $MULTIPLE_OUTPUT = 0;
67my $cache_file;
68
69## Handle options
70GetOptions
71(
72 "help" => \$HELP_ARG,
73 "version" => \$VERSION_ARG,
74 "quiet|q" => \$QUIET_ARG,
75 "oaf-style|o" => \$BA_STYLE_ARG, ## for compatibility
76 "ba-style|b" => \$BA_STYLE_ARG,
77 "xml-style|x" => \$XML_STYLE_ARG,
78 "keys-style|k" => \$KEYS_STYLE_ARG,
79 "desktop-style|d" => \$DESKTOP_STYLE_ARG,
80 "schemas-style|s" => \$SCHEMAS_STYLE_ARG,
81 "rfc822deb-style|r" => \$RFC822DEB_STYLE_ARG,
82 "pass-through|p" => \$PASS_THROUGH_ARG,
83 "utf8|u" => \$UTF8_ARG,
84 "multiple-output|m" => \$MULTIPLE_OUTPUT,
85 "cache|c=s" => \$cache_file
86 ) or &error;
87
88my $PO_DIR;
89my $FILE;
90my $OUTFILE;
91
92my %po_files_by_lang = ();
93my %translations = ();
94my $iconv = $ENV{"INTLTOOL_ICONV"} || "iconv";
95
96# Use this instead of \w for XML files to handle more possible characters.
97my $w = "[-A-Za-z0-9._:]";
98
99# XML quoted string contents
100my $q = "[^\\\"]*";
101
102## Check for options.
103
104if ($VERSION_ARG)
105{
106        &print_version;
107}
108elsif ($HELP_ARG)
109{
110        &print_help;
111}
112elsif ($BA_STYLE_ARG && @ARGV > 2)
113{
114        &preparation;
115        &print_message;
116        &ba_merge_translations;
117        &finalize;
118}
119elsif ($XML_STYLE_ARG && @ARGV > 2)
120{
121        &utf8_sanity_check;
122        &preparation;
123        &print_message;
124       
125    &xml_merge_output;
126
127        &finalize;
128}
129elsif ($KEYS_STYLE_ARG && @ARGV > 2)
130{
131        &utf8_sanity_check;
132        &preparation;
133        &print_message;
134        &keys_merge_translations;
135        &finalize;
136}
137elsif ($DESKTOP_STYLE_ARG && @ARGV > 2)
138{
139        &preparation;
140        &print_message;
141        &desktop_merge_translations;
142        &finalize;
143}
144elsif ($SCHEMAS_STYLE_ARG && @ARGV > 2)
145{
146        &preparation;
147        &print_message;
148        &schemas_merge_translations;
149        &finalize;
150}
151elsif ($RFC822DEB_STYLE_ARG && @ARGV > 2)
152{
153        &preparation;
154        &print_message;
155        &rfc822deb_merge_translations;
156        &finalize;
157}
158else
159{
160        &print_help;
161}
162
163exit;
164
165## Sub for printing release information
166sub print_version
167{
168    print <<_EOF_;
169${PROGRAM} (${PACKAGE}) ${VERSION}
170Written by Maciej Stachowiak, Darin Adler and Kenneth Christiansen.
171
172Copyright (C) 2000-2003 Free Software Foundation, Inc.
173Copyright (C) 2000-2001 Eazel, Inc.
174This is free software; see the source for copying conditions.  There is NO
175warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
176_EOF_
177    exit;
178}
179
180## Sub for printing usage information
181sub print_help
182{
183    print <<_EOF_;
184Usage: ${PROGRAM} [OPTION]... PO_DIRECTORY FILENAME OUTPUT_FILE
185Generates an output file that includes some localized attributes from an
186untranslated source file.
187
188Mandatory options: (exactly one must be specified)
189  -b, --ba-style         includes translations in the bonobo-activation style
190  -d, --desktop-style    includes translations in the desktop style
191  -k, --keys-style       includes translations in the keys style
192  -s, --schemas-style    includes translations in the schemas style
193  -r, --rfc822deb-style  includes translations in the RFC822 style
194  -x, --xml-style        includes translations in the standard xml style
195
196Other options:
197  -u, --utf8             convert all strings to UTF-8 before merging
198  -p, --pass-through     use strings as found in .po files, without
199                         conversion (STRONGLY unrecommended with -x)
200  -m, --multiple-output  output one localized file per locale, instead of
201                         a single file containing all localized elements
202  -c, --cache=FILE       specify cache file name
203                         (usually \$top_builddir/po/.intltool-merge-cache)
204  -q, --quiet            suppress most messages
205      --help             display this help and exit
206      --version          output version information and exit
207
208Report bugs to http://bugzilla.gnome.org/ (product name "$PACKAGE")
209or send email to <xml-i18n-tools\@gnome.org>.
210_EOF_
211    exit;
212}
213
214
215## Sub for printing error messages
216sub print_error
217{
218    print STDERR "Try `${PROGRAM} --help' for more information.\n";
219    exit;
220}
221
222
223sub print_message
224{
225    print "Merging translations into $OUTFILE.\n" unless $QUIET_ARG;
226}
227
228
229sub preparation
230{
231    $PO_DIR = $ARGV[0];
232    $FILE = $ARGV[1];
233    $OUTFILE = $ARGV[2];
234
235    &gather_po_files;
236    &get_translation_database;
237}
238
239# General-purpose code for looking up translations in .po files
240
241sub po_file2lang
242{
243    my ($tmp) = @_;
244    $tmp =~ s/^.*\/(.*)\.po$/$1/;
245    return $tmp;
246}
247
248sub gather_po_files
249{
250    for my $po_file (glob "$PO_DIR/*.po") {
251        $po_files_by_lang{po_file2lang($po_file)} = $po_file;
252    }
253}
254
255sub get_local_charset
256{
257    my ($encoding) = @_;
258    my $alias_file = $ENV{"G_CHARSET_ALIAS"} || "/home/evolution-dist/lib/charset.alias";
259
260    # seek character encoding aliases in charset.alias (glib)
261
262    if (open CHARSET_ALIAS, $alias_file)
263    {
264        while (<CHARSET_ALIAS>)
265        {
266            next if /^\#/;
267            return $1 if (/^\s*([-._a-zA-Z0-9]+)\s+$encoding\b/i)
268        }
269
270        close CHARSET_ALIAS;
271    }
272
273    # if not found, return input string
274
275    return $encoding;
276}
277
278sub get_po_encoding
279{
280    my ($in_po_file) = @_;
281    my $encoding = "";
282
283    open IN_PO_FILE, $in_po_file or die;
284    while (<IN_PO_FILE>)
285    {
286        ## example: "Content-Type: text/plain; charset=ISO-8859-1\n"
287        if (/Content-Type\:.*charset=([-a-zA-Z0-9]+)\\n/)
288        {
289            $encoding = $1;
290            last;
291        }
292    }
293    close IN_PO_FILE;
294
295    if (!$encoding)
296    {
297        print STDERR "Warning: no encoding found in $in_po_file. Assuming ISO-8859-1\n" unless $QUIET_ARG;
298        $encoding = "ISO-8859-1";
299    }
300
301    system ("$iconv -f $encoding -t UTF-8 </dev/null 2>/dev/null");
302    if ($?) {
303        $encoding = get_local_charset($encoding);
304    }
305
306    return $encoding
307}
308
309sub utf8_sanity_check
310{
311    if (!$UTF8_ARG)
312    {
313        if (!$PASS_THROUGH_ARG)
314        {
315            $PASS_THROUGH_ARG="1";
316        }
317    }
318}
319
320sub get_translation_database
321{
322    if ($cache_file) {
323        &get_cached_translation_database;
324    } else {
325        &create_translation_database;
326    }
327}
328
329sub get_newest_po_age
330{
331    my $newest_age;
332
333    foreach my $file (values %po_files_by_lang)
334    {
335        my $file_age = -M $file;
336        $newest_age = $file_age if !$newest_age || $file_age < $newest_age;
337    }
338
339    $newest_age = 0 if !$newest_age;
340
341    return $newest_age;
342}
343
344sub create_cache
345{
346    print "Generating and caching the translation database\n" unless $QUIET_ARG;
347
348    &create_translation_database;
349
350    open CACHE, ">$cache_file" || die;
351    print CACHE join "\x01", %translations;
352    close CACHE;
353}
354
355sub load_cache
356{
357    print "Found cached translation database\n" unless $QUIET_ARG;
358
359    my $contents;
360    open CACHE, "<$cache_file" || die;
361    {
362        local $/;
363        $contents = <CACHE>;
364    }
365    close CACHE;
366    %translations = split "\x01", $contents;
367}
368
369sub get_cached_translation_database
370{
371    my $cache_file_age = -M $cache_file;
372    if (defined $cache_file_age)
373    {
374        if ($cache_file_age <= &get_newest_po_age)
375        {
376            &load_cache;
377            return;
378        }
379        print "Found too-old cached translation database\n" unless $QUIET_ARG;
380    }
381
382    &create_cache;
383}
384
385sub create_translation_database
386{
387    for my $lang (keys %po_files_by_lang)
388    {
389        my $po_file = $po_files_by_lang{$lang};
390
391        if ($UTF8_ARG)
392        {
393            my $encoding = get_po_encoding ($po_file);
394
395            if (lc $encoding eq "utf-8")
396            {
397                open PO_FILE, "<$po_file";     
398            }
399            else
400            {
401                print STDERR "WARNING: $po_file is not in UTF-8 but $encoding, converting...\n" unless $QUIET_ARG;;
402
403                open PO_FILE, "$iconv -f $encoding -t UTF-8 $po_file|";
404            }
405        }
406        else
407        {
408            open PO_FILE, "<$po_file"; 
409        }
410
411        my $nextfuzzy = 0;
412        my $inmsgid = 0;
413        my $inmsgstr = 0;
414        my $msgid = "";
415        my $msgstr = "";
416
417        while (<PO_FILE>)
418        {
419            $nextfuzzy = 1 if /^#, fuzzy/;
420       
421            if (/^msgid "((\\.|[^\\])*)"/ )
422            {
423                $translations{$lang, $msgid} = $msgstr if $inmsgstr && $msgid && $msgstr;
424                $msgid = "";
425                $msgstr = "";
426
427                if ($nextfuzzy) {
428                    $inmsgid = 0;
429                } else {
430                    $msgid = unescape_po_string($1);
431                    $inmsgid = 1;
432                }
433                $inmsgstr = 0;
434                $nextfuzzy = 0;
435            }
436
437            if (/^msgstr "((\\.|[^\\])*)"/)
438            {
439                $msgstr = unescape_po_string($1);
440                $inmsgstr = 1;
441                $inmsgid = 0;
442            }
443
444            if (/^"((\\.|[^\\])*)"/)
445            {
446                $msgid .= unescape_po_string($1) if $inmsgid;
447                $msgstr .= unescape_po_string($1) if $inmsgstr;
448            }
449        }
450        $translations{$lang, $msgid} = $msgstr if $inmsgstr && $msgid && $msgstr;
451    }
452}
453
454sub finalize
455{
456}
457
458sub unescape_one_sequence
459{
460    my ($sequence) = @_;
461
462    return "\\" if $sequence eq "\\\\";
463    return "\"" if $sequence eq "\\\"";
464    return "\n" if $sequence eq "\\n";
465
466    # gettext also handles \n, \t, \b, \r, \f, \v, \a, \xxx (octal),
467    # \xXX (hex) and has a comment saying they want to handle \u and \U.
468
469    return $sequence;
470}
471
472sub unescape_po_string
473{
474    my ($string) = @_;
475
476    $string =~ s/(\\.)/unescape_one_sequence($1)/eg;
477
478    return $string;
479}
480
481## NOTE: deal with < - &lt; but not > - &gt;  because it seems its ok to have
482## > in the entity. For further info please look at #84738.
483sub entity_decode
484{
485    local ($_) = @_;
486
487    s/&apos;/'/g; # '
488    s/&quot;/"/g; # "
489    s/&amp;/&/g;
490    s/&lt;/</g;
491
492    return $_;
493}
494 
495# entity_encode: (string)
496#
497# Encode the given string to XML format (encode '<' etc). It also
498# encodes high bit if not in UTF-8 mode.
499
500sub entity_encode
501{
502    my ($pre_encoded) = @_;
503
504    my @list_of_chars = unpack ('C*', $pre_encoded);
505
506    if ($PASS_THROUGH_ARG)
507    {
508        return join ('', map (&entity_encode_int_even_high_bit, @list_of_chars));
509    }
510    else
511    {
512        # with UTF-8 we only encode minimalistic
513        return join ('', map (&entity_encode_int_minimalist, @list_of_chars));
514    }
515}
516
517sub entity_encode_int_minimalist
518{
519    return "&quot;" if $_ == 34;
520    return "&amp;" if $_ == 38;
521    return "&apos;" if $_ == 39;
522    return "&lt;" if $_ == 60;
523    return chr $_;
524}
525
526sub entity_encode_int_even_high_bit
527{
528    if ($_ > 127 || $_ == 34 || $_ == 38 || $_ == 39 || $_ == 60)
529    {
530        # the ($_ > 127) should probably be removed
531        return "&#" . $_ . ";";
532    }
533    else
534    {
535        return chr $_;
536    }
537}
538
539sub entity_encoded_translation
540{
541    my ($lang, $string) = @_;
542
543    my $translation = $translations{$lang, $string};
544    return $string if !$translation;
545    return entity_encode ($translation);
546}
547
548## XML (bonobo-activation specific) merge code
549
550sub ba_merge_translations
551{
552    my $source;
553
554    {
555       local $/; # slurp mode
556       open INPUT, "<$FILE" or die "can't open $FILE: $!";
557       $source = <INPUT>;
558       close INPUT;
559    }
560
561    open OUTPUT, ">$OUTFILE" or die "can't open $OUTFILE: $!";
562
563    while ($source =~ s|^(.*?)([ \t]*<\s*$w+\s+($w+\s*=\s*"$q"\s*)+/?>)([ \t]*\n)?||s)
564    {
565        print OUTPUT $1;
566
567        my $node = $2 . "\n";
568
569        my @strings = ();
570        $_ = $node;
571        while (s/(\s)_($w+\s*=\s*"($q)")/$1$2/s) {
572             push @strings, entity_decode($3);
573        }
574        print OUTPUT;
575
576        my %langs;
577        for my $string (@strings)
578        {
579            for my $lang (keys %po_files_by_lang)
580            {
581                $langs{$lang} = 1 if $translations{$lang, $string};
582            }
583        }
584       
585        for my $lang (sort keys %langs)
586        {
587            $_ = $node;
588            s/(\sname\s*=\s*)"($q)"/$1"$2-$lang"/s;
589            s/(\s)_($w+\s*=\s*")($q)"/$1 . $2 . entity_encoded_translation($lang, $3) . '"'/seg;
590            print OUTPUT;
591        }
592    }
593
594    print OUTPUT $source;
595
596    close OUTPUT;
597}
598
599
600## XML (non-bonobo-activation) merge code
601
602
603# Process tag attributes
604#   Only parameter is a HASH containing attributes -> values mapping
605sub getAttributeString
606{
607    my $sub = shift;
608    my $do_translate = shift || 0;
609    my $language = shift || "";
610    my $result = "";
611    foreach my $e (reverse(sort(keys %{ $sub }))) {
612        my $key    = $e;
613        my $string = $sub->{$e};
614        my $quote = '"';
615       
616        $string =~ s/^[\s]+//;
617        $string =~ s/[\s]+$//;
618       
619        if ($string =~ /^'.*'$/)
620        {
621            $quote = "'";
622        }
623        $string =~ s/^['"]//g;
624        $string =~ s/['"]$//g;
625
626        if ($do_translate && $key =~ /^_/) {
627            $key =~ s|^_||g;
628            if ($language) {
629               
630                # Handle translation
631                #
632                my $decode_string = entity_decode($string);
633                my $translation = $translations{$language, $decode_string};
634                if ($translation) {
635                    $translation = entity_encode($translation);
636                    $string = $translation;
637                }
638            }
639        }
640       
641        $result .= " $key=$quote$string$quote";
642    }
643    return $result;
644}
645
646# Returns a translatable string from XML node, it works on contents of every node in XML::Parser tree
647#   doesn't support nesting of translatable tags (i.e. <_blah>this <_doh>doesn't</_doh> work</_blah> -- besides
648#   can you define the correct semantics for this?)
649#
650
651sub getXMLstring
652{
653    my $ref = shift;
654    my @list = @{ $ref };
655    my $result = "";
656
657    my $count = scalar(@list);
658    my $attrs = $list[0];
659    my $index = 1;
660    while ($index < $count) {
661        my $type = $list[$index];
662        my $content = $list[$index+1];
663        if (! $type ) {
664            # We've got CDATA
665            if ($content) {
666                # lets strip the whitespace here, and *ONLY* here
667                $content =~ s/\s+/ /gs;
668                $result .= ($content);
669            } else {
670                #print "no cdata content when expected it\n"; # is this possible, is this ok?
671                # what to do if this happens?
672                # Did I mention that I hate XML::Parser tree style?
673            }
674        } else {
675            # We've got another element
676            $result .= "<$type";
677            $result .= getAttributeString($attrs, 0); # no nested translatable elements
678            if ($content) {
679                my $subresult = getXMLstring($content);
680                if ($subresult) {
681                    $result .= ">".$subresult . "</$type>";
682                } else {
683                    $result .= "/>";
684                }
685            } else {
686                $result .= "/>";
687            }
688        }
689        $index += 2;
690    }
691    return $result;
692}
693
694sub traverse
695{
696    my $fh = shift;
697    my $nodename = shift;
698    my $content = shift;
699    my $language = shift || "";
700
701    if (!$nodename) {
702        if ($content =~ /^[\s]*$/) {
703            $leading_space .= $content;
704        }
705        print $fh $content;
706    } else {
707        # element
708        my @all = @{ $content };
709        my $attrs = shift @all;
710        my $outattr = getAttributeString($attrs, 1, $language);
711        my $translate = 0;
712
713        if ($nodename =~ /^_/) {
714            $translate = 1;
715            $nodename =~ s/^_//;
716        }
717        my $lookup = '';
718        print $fh "<$nodename$outattr";
719        if ($translate) {
720            $lookup = getXMLstring($content);
721            $lookup =~ s/^\s+//s;
722            $lookup =~ s/\s+$//s;
723
724            if ($lookup) {
725                my $translation = $translations{$language, $lookup};
726                if ($MULTIPLE_OUTPUT && $translation) {
727                    print $fh " xml:lang=\"", $language, "\"";
728                    print $fh ">", $translation, "</$nodename>";
729                    return; # this means there will be no same translation with xml:lang="$language"...
730                            # if we want them both, just remove this "return"
731                } else {
732                    print $fh ">$lookup</$nodename>";
733                }
734            } else {
735                print $fh "/>";
736            }
737           
738 
739            for my $lang (sort keys %po_files_by_lang) {
740                    if ($MULTIPLE_OUTPUT && $lang ne "$language") {
741                        next;
742                    }
743                    if ($lang) {
744
745                        # Handle translation
746                        #
747                        my $localattrs = getAttributeString($attrs, 1, $lang);
748                        my $decode_string = ($lookup); #entity_decode($lookup);
749                        my $translation = $translations{$lang, $decode_string};
750                        if ($translation) {
751                            $translation = ($translation);
752                            print $fh "\n";
753                            $leading_space =~ s/.*\n//g;
754                            print $fh $leading_space;
755                            print $fh "<", $nodename, " xml:lang=\"", $lang, "\"", $localattrs;
756                            print $fh ">", $translation , "</$nodename>";
757                        }
758                    }
759            }
760
761        } else {
762            my $count = scalar(@all);
763            if ($count > 0) {
764                print $fh ">";
765            } else {
766                print $fh "/>";
767            }
768            my $index = 0;
769            while ($index < $count) {
770                my $type = $all[$index];
771                my $rest = $all[$index+1];
772                traverse($fh, $type, $rest, $language);
773                $index += 2;
774            }
775            if ($count > 0) {
776                print $fh "</$nodename>";
777            }
778        }
779    }
780}
781
782sub intltool_tree_char
783{
784    my $expat = shift;
785    my $text  = shift;
786    my $clist = $expat->{Curlist};
787    my $pos   = $#$clist;
788
789    # Use original_string so that we retain escaped entities
790    # in CDATA sections.
791    #
792    if ($pos > 0 and $clist->[$pos - 1] eq '0') {
793        $clist->[$pos] .= $expat->original_string();
794    } else {
795        push @$clist, 0 => $expat->original_string();
796    }
797}
798
799sub intltool_tree_start
800{
801    my $expat    = shift;
802    my $tag      = shift;
803    my @origlist = ();
804
805    # Use original_string so that we retain escaped entities
806    # in attribute values.  We must convert the string to an
807    # @origlist array to conform to the structure of the Tree
808    # Style.
809    #
810    my @original_array = split /\x/, $expat->original_string();
811    my $source         = $expat->original_string();
812
813    # Remove leading tag.
814    #
815    $source =~ s|^\s*<\s*(\S+)||s;
816
817    # Grab attribute key/value pairs and push onto @origlist array.
818    #
819    while ($source)
820    {
821       if ($source =~ /^\s*([\w:-]+)\s*[=]\s*["]/)
822       {
823           $source =~ s|^\s*([\w:-]+)\s*[=]\s*["]([^"]*)["]||s;
824           push @origlist, $1;
825           push @origlist, '"' . $2 . '"';
826       }
827       elsif ($source =~ /^\s*([\w:-]+)\s*[=]\s*[']/)
828       {
829           $source =~ s|^\s*([\w:-]+)\s*[=]\s*[']([^']*)[']||s;
830           push @origlist, $1;
831           push @origlist, "'" . $2 . "'";
832       }
833       else
834       {
835           last;
836       }
837    }
838
839    my $ol = [ { @origlist } ];
840
841    push @{ $expat->{Lists} }, $expat->{Curlist};
842    push @{ $expat->{Curlist} }, $tag => $ol;
843    $expat->{Curlist} = $ol;
844}
845
846sub readXml
847{
848    my $filename = shift || return;
849    if(!-f $filename) {
850        die "ERROR Cannot find filename: $filename\n";
851    }
852
853    my $ret = eval 'require XML::Parser';
854    if(!$ret) {
855        die "You must have XML::Parser installed to run $0\n\n";
856    }
857    my $xp = new XML::Parser(Style => 'Tree');
858    $xp->setHandlers(Char => \&intltool_tree_char);
859    $xp->setHandlers(Start => \&intltool_tree_start);
860    my $tree = $xp->parsefile($filename);
861
862# <foo><head id="a">Hello <em>there</em></head><bar>Howdy<ref/></bar>do</foo>
863# would be:
864# [foo, [{}, head, [{id => "a"}, 0, "Hello ",  em, [{}, 0, "there"]], bar, [{},
865# 0, "Howdy",  ref, [{}]], 0, "do" ] ]
866
867    return $tree;
868}
869
870sub print_header
871{
872    my $infile = shift;
873    my $fh = shift;
874    my $source;
875
876    if(!-f $infile) {
877        die "ERROR Cannot find filename: $infile\n";
878    }
879
880    print $fh qq{<?xml version="1.0" encoding="UTF-8"?>\n};
881    {
882        local $/;
883        open DOCINPUT, "<${FILE}" or die;
884        $source = <DOCINPUT>;
885        close DOCINPUT;
886    }
887    if ($source =~ /(<!DOCTYPE.*\[.*\]\s*>)/s)
888    {
889        print $fh "$1\n";
890    }
891    elsif ($source =~ /(<!DOCTYPE[^>]*>)/s)
892    {
893        print $fh "$1\n";
894    }
895}
896
897sub parseTree
898{
899    my $fh        = shift;
900    my $ref       = shift;
901    my $language  = shift || "";
902
903    my $name = shift @{ $ref };
904    my $cont = shift @{ $ref };
905    traverse($fh, $name, $cont, $language);
906}
907
908sub xml_merge_output
909{
910    my $source;
911
912    if ($MULTIPLE_OUTPUT) {
913        for my $lang (sort keys %po_files_by_lang) {
914            if ( ! -e $lang ) {
915                mkdir $lang or die "Cannot create subdirectory $lang: $!\n";
916            }
917            open OUTPUT, ">$lang/$OUTFILE" or die "Cannot open $lang/$OUTFILE: $!\n";
918            my $tree = readXml($FILE);
919            print_header($FILE, \*OUTPUT);
920            parseTree(\*OUTPUT, $tree, $lang);
921            close OUTPUT;
922            print "CREATED $lang/$OUTFILE\n" unless $QUIET_ARG;
923        }
924    }
925    open OUTPUT, ">$OUTFILE" or die "Cannot open $OUTFILE: $!\n";
926    my $tree = readXml($FILE);
927    print_header($FILE, \*OUTPUT);
928    parseTree(\*OUTPUT, $tree);
929    close OUTPUT;
930    print "CREATED $OUTFILE\n" unless $QUIET_ARG;
931}
932
933sub keys_merge_translations
934{
935    open INPUT, "<${FILE}" or die;
936    open OUTPUT, ">${OUTFILE}" or die;
937
938    while (<INPUT>)
939    {
940        if (s/^(\s*)_(\w+=(.*))/$1$2/) 
941        {
942            my $string = $3;
943
944            print OUTPUT;
945
946            my $non_translated_line = $_;
947
948            for my $lang (sort keys %po_files_by_lang)
949            {
950                my $translation = $translations{$lang, $string};
951                next if !$translation;
952
953                $_ = $non_translated_line;
954                s/(\w+)=.*/[$lang]$1=$translation/;
955                print OUTPUT;
956            }
957        }
958        else
959        {
960            print OUTPUT;
961        }
962    }
963
964    close OUTPUT;
965    close INPUT;
966}
967
968sub desktop_merge_translations
969{
970    open INPUT, "<${FILE}" or die;
971    open OUTPUT, ">${OUTFILE}" or die;
972
973    while (<INPUT>)
974    {
975        if (s/^(\s*)_(\w+=(.*))/$1$2/) 
976        {
977            my $string = $3;
978
979            print OUTPUT;
980
981            my $non_translated_line = $_;
982
983            for my $lang (sort keys %po_files_by_lang)
984            {
985                my $translation = $translations{$lang, $string};
986                next if !$translation;
987
988                $_ = $non_translated_line;
989                s/(\w+)=.*/${1}[$lang]=$translation/;
990                print OUTPUT;
991            }
992        }
993        else
994        {
995            print OUTPUT;
996        }
997    }
998
999    close OUTPUT;
1000    close INPUT;
1001}
1002
1003sub schemas_merge_translations
1004{
1005    my $source;
1006
1007    {
1008       local $/; # slurp mode
1009       open INPUT, "<$FILE" or die "can't open $FILE: $!";
1010       $source = <INPUT>;
1011       close INPUT;
1012    }
1013
1014    open OUTPUT, ">$OUTFILE" or die;
1015
1016    # FIXME: support attribute translations
1017
1018    # Empty nodes never need translation, so unmark all of them.
1019    # For example, <_foo/> is just replaced by <foo/>.
1020    $source =~ s|<\s*_($w+)\s*/>|<$1/>|g;
1021
1022    while ($source =~ s/
1023                        (.*?)
1024                        (\s+)(<locale\ name="C">(\s*)
1025                            (<default>\s*(?:<!--[^>]*?-->\s*)?(.*?)\s*<\/default>)?(\s*)
1026                            (<short>\s*(?:<!--[^>]*?-->\s*)?(.*?)\s*<\/short>)?(\s*)
1027                            (<long>\s*(?:<!--[^>]*?-->\s*)?(.*?)\s*<\/long>)?(\s*)
1028                        <\/locale>)
1029                       //sx)
1030    {
1031        print OUTPUT $1;
1032
1033        my $locale_start_spaces = $2 ? $2 : '';
1034        my $default_spaces = $4 ? $4 : '';
1035        my $short_spaces = $7 ? $7 : '';
1036        my $long_spaces = $10 ? $10 : '';
1037        my $locale_end_spaces = $13 ? $13 : '';
1038        my $c_default_block = $3 ? $3 : '';
1039        my $default_string = $6 ? $6 : '';
1040        my $short_string = $9 ? $9 : '';
1041        my $long_string = $12 ? $12 : '';
1042
1043        print OUTPUT "$locale_start_spaces$c_default_block";
1044
1045        $default_string =~ s/\s+/ /g;
1046        $default_string = entity_decode($default_string);
1047        $short_string =~ s/\s+/ /g;
1048        $short_string = entity_decode($short_string);
1049        $long_string =~ s/\s+/ /g;
1050        $long_string = entity_decode($long_string);
1051
1052        for my $lang (sort keys %po_files_by_lang)
1053        {
1054            my $default_translation = $translations{$lang, $default_string};
1055            my $short_translation = $translations{$lang, $short_string};
1056            my $long_translation  = $translations{$lang, $long_string};
1057
1058            next if (!$default_translation && !$short_translation &&
1059                     !$long_translation);
1060
1061            print OUTPUT "\n$locale_start_spaces<locale name=\"$lang\">";
1062
1063        print OUTPUT "$default_spaces";   
1064
1065        if ($default_translation)
1066        {
1067            $default_translation = entity_encode($default_translation);
1068            print OUTPUT "<default>$default_translation</default>";
1069        }
1070
1071            print OUTPUT "$short_spaces";
1072
1073            if ($short_translation)
1074            {
1075                        $short_translation = entity_encode($short_translation);
1076                        print OUTPUT "<short>$short_translation</short>";
1077            }
1078
1079            print OUTPUT "$long_spaces";
1080
1081            if ($long_translation)
1082            {
1083                        $long_translation = entity_encode($long_translation);
1084                        print OUTPUT "<long>$long_translation</long>";
1085            }       
1086
1087            print OUTPUT "$locale_end_spaces</locale>";
1088        }
1089    }
1090
1091    print OUTPUT $source;
1092
1093    close OUTPUT;
1094}
1095
1096sub rfc822deb_merge_translations
1097{
1098    my %encodings = ();
1099    for my $lang (keys %po_files_by_lang) {
1100        $encodings{$lang} = ($UTF8_ARG ? 'UTF-8' : get_po_encoding($po_files_by_lang{$lang}));
1101    }
1102
1103    my $source;
1104
1105    $Text::Wrap::huge = 'overflow';
1106    $Text::Wrap::break = qr/\n|\s(?=\S)/;
1107
1108    {
1109       local $/; # slurp mode
1110       open INPUT, "<$FILE" or die "can't open $FILE: $!";
1111       $source = <INPUT>;
1112       close INPUT;
1113    }
1114
1115    open OUTPUT, ">${OUTFILE}" or die;
1116
1117    while ($source =~ /(^|\n+)(_*)([^:\s]+)(:[ \t]*)(.*?)(?=\n[\S\n]|$)/sg)
1118    {
1119            my $sep = $1;
1120            my $non_translated_line = $3.$4;
1121            my $string = $5;
1122            my $underscore = length($2);
1123            next if $underscore eq 0 && $non_translated_line =~ /^#/;
1124            #  Remove [] dummy strings
1125            my $stripped = $string;
1126            $stripped =~ s/\[\s[^\[\]]*\],/,/g if $underscore eq 2;
1127            $stripped =~ s/\[\s[^\[\]]*\]$//;
1128            $non_translated_line .= $stripped;
1129
1130            print OUTPUT $sep.$non_translated_line;
1131   
1132            if ($underscore)
1133            {
1134                my @str_list = rfc822deb_split($underscore, $string);
1135
1136                for my $lang (sort keys %po_files_by_lang)
1137                {
1138                    my $is_translated = 1;
1139                    my $str_translated = '';
1140                    my $first = 1;
1141               
1142                    for my $str (@str_list)
1143                    {
1144                        my $translation = $translations{$lang, $str};
1145                   
1146                        if (!$translation)
1147                        {
1148                            $is_translated = 0;
1149                            last;
1150                        }
1151
1152                        #  $translation may also contain [] dummy
1153                        #  strings, mostly to indicate an empty string
1154                        $translation =~ s/\[\s[^\[\]]*\]$//;
1155                       
1156                        if ($first)
1157                        {
1158                            if ($underscore eq 2)
1159                            {
1160                                $str_translated .= $translation;
1161                            }
1162                            else
1163                            {
1164                                $str_translated .=
1165                                    Text::Tabs::expand($translation) .
1166                                    "\n";
1167                            }
1168                        }
1169                        else
1170                        {
1171                            if ($underscore eq 2)
1172                            {
1173                                $str_translated .= ', ' . $translation;
1174                            }
1175                            else
1176                            {
1177                                $str_translated .= Text::Tabs::expand(
1178                                    Text::Wrap::wrap(' ', ' ', $translation)) .
1179                                    "\n .\n";
1180                            }
1181                        }
1182                        $first = 0;
1183
1184                        #  To fix some problems with Text::Wrap::wrap
1185                        $str_translated =~ s/(\n )+\n/\n .\n/g;
1186                    }
1187                    next unless $is_translated;
1188
1189                    $str_translated =~ s/\n \.\n$//;
1190                    $str_translated =~ s/\s+$//;
1191
1192                    $_ = $non_translated_line;
1193                    s/^(\w+):\s*.*/$sep${1}-$lang.$encodings{$lang}: $str_translated/s;
1194                    print OUTPUT;
1195                }
1196            }
1197    }
1198    print OUTPUT "\n";
1199
1200    close OUTPUT;
1201    close INPUT;
1202}
1203
1204sub rfc822deb_split
1205{
1206    # Debian defines a special way to deal with rfc822-style files:
1207    # when a value contain newlines, it consists of
1208    #   1.  a short form (first line)
1209    #   2.  a long description, all lines begin with a space,
1210    #       and paragraphs are separated by a single dot on a line
1211    # This routine returns an array of all paragraphs, and reformat
1212    # them.
1213    # When first argument is 2, the string is a comma separated list of
1214    # values.
1215    my $type = shift;
1216    my $text = shift;
1217    $text =~ s/^[ \t]//mg;
1218    return (split(/, */, $text, 0)) if $type ne 1;
1219    return ($text) if $text !~ /\n/;
1220
1221    $text =~ s/([^\n]*)\n//;
1222    my @list = ($1);
1223    my $str = '';
1224
1225    for my $line (split (/\n/, $text))
1226    {
1227        chomp $line;
1228        if ($line =~ /^\.\s*$/)
1229        {
1230            #  New paragraph
1231            $str =~ s/\s*$//;
1232            push(@list, $str);
1233            $str = '';
1234        }
1235        elsif ($line =~ /^\s/)
1236        {
1237            #  Line which must not be reformatted
1238            $str .= "\n" if length ($str) && $str !~ /\n$/;
1239            $line =~ s/\s+$//;
1240            $str .= $line."\n";
1241        }
1242        else
1243        {
1244            #  Continuation line, remove newline
1245            $str .= " " if length ($str) && $str !~ /\n$/;
1246            $str .= $line;
1247        }
1248    }
1249
1250    $str =~ s/\s*$//;
1251    push(@list, $str) if length ($str);
1252
1253    return @list;
1254}
1255
Note: See TracBrowser for help on using the repository browser.