source: trunk/third/at-spi/intltool-merge.in @ 21314

Revision 21314, 30.6 KB checked in by ghudson, 20 years ago (diff)
This commit was generated by cvs2svn to compensate for changes in r21313, which included commits to RCS files with non-trunk default branches.
RevLine 
[18421]1#!@INTLTOOL_PERL@ -w
[21313]2# -*- Mode: perl; indent-tabs-mode: nil; c-basic-offset: 4  -*-
[18421]3
4#
5#  The Intltool Message Merger
6#
[21047]7#  Copyright (C) 2000, 2003 Free Software Foundation.
[18421]8#  Copyright (C) 2000, 2001 Eazel, Inc
9#
10#  Intltool is free software; you can redistribute it and/or
11#  modify it under the terms of the GNU General Public License
12#  version 2 published by the Free Software Foundation.
13#
14#  Intltool is distributed in the hope that it will be useful,
15#  but WITHOUT ANY WARRANTY; without even the implied warranty of
16#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17#  General Public License for more details.
18#
19#  You should have received a copy of the GNU General Public License
20#  along with this program; if not, write to the Free Software
21#  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22#
23#  As a special exception to the GNU General Public License, if you
24#  distribute this file as part of a program that contains a
25#  configuration script generated by Autoconf, you may include it under
26#  the same distribution terms that you use for the rest of that program.
27#
28#  Authors:  Maciej Stachowiak <mjs@noisehavoc.org>
29#            Kenneth Christiansen <kenneth@gnu.org>
30#            Darin Adler <darin@bentspoon.com>
31#
32#  Proper XML UTF-8'ification written by Cyrille Chepelov <chepelov@calixo.net>
33#
34
35## Release information
36my $PROGRAM = "intltool-merge";
37my $PACKAGE = "intltool";
[21313]38my $VERSION = "0.31.3";
[18421]39
40## Loaded modules
41use strict;
42use Getopt::Long;
[18687]43use Text::Wrap;
[21047]44use File::Basename;
[18421]45
[21047]46my $must_end_tag      = -1;
47my $last_depth        = -1;
48my $translation_depth = -1;
49my @tag_stack = ();
50my @entered_tag = ();
51my @translation_strings = ();
52my $leading_space = "";
53
[18421]54## Scalars used by the option stuff
55my $HELP_ARG = 0;
56my $VERSION_ARG = 0;
57my $BA_STYLE_ARG = 0;
58my $XML_STYLE_ARG = 0;
59my $KEYS_STYLE_ARG = 0;
60my $DESKTOP_STYLE_ARG = 0;
61my $SCHEMAS_STYLE_ARG = 0;
[18687]62my $RFC822DEB_STYLE_ARG = 0;
[18421]63my $QUIET_ARG = 0;
64my $PASS_THROUGH_ARG = 0;
65my $UTF8_ARG = 0;
[21047]66my $MULTIPLE_OUTPUT = 0;
[18421]67my $cache_file;
68
69## Handle options
70GetOptions
71(
72 "help" => \$HELP_ARG,
73 "version" => \$VERSION_ARG,
74 "quiet|q" => \$QUIET_ARG,
75 "oaf-style|o" => \$BA_STYLE_ARG, ## for compatibility
76 "ba-style|b" => \$BA_STYLE_ARG,
77 "xml-style|x" => \$XML_STYLE_ARG,
78 "keys-style|k" => \$KEYS_STYLE_ARG,
79 "desktop-style|d" => \$DESKTOP_STYLE_ARG,
80 "schemas-style|s" => \$SCHEMAS_STYLE_ARG,
[18687]81 "rfc822deb-style|r" => \$RFC822DEB_STYLE_ARG,
[18421]82 "pass-through|p" => \$PASS_THROUGH_ARG,
83 "utf8|u" => \$UTF8_ARG,
[21047]84 "multiple-output|m" => \$MULTIPLE_OUTPUT,
[18421]85 "cache|c=s" => \$cache_file
86 ) or &error;
87
88my $PO_DIR;
89my $FILE;
90my $OUTFILE;
91
92my %po_files_by_lang = ();
93my %translations = ();
[21313]94my $iconv = $ENV{"ICONV"} || $ENV{"INTLTOOL_ICONV"} || "@INTLTOOL_ICONV@";
[18421]95
96# Use this instead of \w for XML files to handle more possible characters.
97my $w = "[-A-Za-z0-9._:]";
98
99# XML quoted string contents
100my $q = "[^\\\"]*";
101
102## Check for options.
103
[18687]104if ($VERSION_ARG)
105{
[18421]106        &print_version;
[18687]107}
108elsif ($HELP_ARG)
109{
[18421]110        &print_help;
[18687]111}
112elsif ($BA_STYLE_ARG && @ARGV > 2)
113{
[18421]114        &preparation;
115        &print_message;
116        &ba_merge_translations;
117        &finalize;
[18687]118}
119elsif ($XML_STYLE_ARG && @ARGV > 2)
120{
[18421]121        &utf8_sanity_check;
122        &preparation;
123        &print_message;
[21047]124       
125    &xml_merge_output;
126
[18421]127        &finalize;
[18687]128}
129elsif ($KEYS_STYLE_ARG && @ARGV > 2)
130{
[18421]131        &utf8_sanity_check;
132        &preparation;
133        &print_message;
134        &keys_merge_translations;
135        &finalize;
[18687]136}
137elsif ($DESKTOP_STYLE_ARG && @ARGV > 2)
138{
[18421]139        &preparation;
140        &print_message;
141        &desktop_merge_translations;
142        &finalize;
[18687]143}
144elsif ($SCHEMAS_STYLE_ARG && @ARGV > 2)
145{
[18421]146        &preparation;
147        &print_message;
148        &schemas_merge_translations;
149        &finalize;
[18687]150}
151elsif ($RFC822DEB_STYLE_ARG && @ARGV > 2)
152{
153        &preparation;
154        &print_message;
155        &rfc822deb_merge_translations;
156        &finalize;
157}
158else
159{
[18421]160        &print_help;
161}
162
163exit;
164
165## Sub for printing release information
166sub print_version
167{
[21047]168    print <<_EOF_;
169${PROGRAM} (${PACKAGE}) ${VERSION}
170Written by Maciej Stachowiak, Darin Adler and Kenneth Christiansen.
171
172Copyright (C) 2000-2003 Free Software Foundation, Inc.
173Copyright (C) 2000-2001 Eazel, Inc.
174This is free software; see the source for copying conditions.  There is NO
175warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
176_EOF_
[18421]177    exit;
178}
179
180## Sub for printing usage information
181sub print_help
182{
[21047]183    print <<_EOF_;
184Usage: ${PROGRAM} [OPTION]... PO_DIRECTORY FILENAME OUTPUT_FILE
185Generates an output file that includes some localized attributes from an
186untranslated source file.
187
188Mandatory options: (exactly one must be specified)
189  -b, --ba-style         includes translations in the bonobo-activation style
190  -d, --desktop-style    includes translations in the desktop style
191  -k, --keys-style       includes translations in the keys style
192  -s, --schemas-style    includes translations in the schemas style
193  -r, --rfc822deb-style  includes translations in the RFC822 style
194  -x, --xml-style        includes translations in the standard xml style
195
196Other options:
197  -u, --utf8             convert all strings to UTF-8 before merging
198  -p, --pass-through     use strings as found in .po files, without
199                         conversion (STRONGLY unrecommended with -x)
200  -m, --multiple-output  output one localized file per locale, instead of
201                         a single file containing all localized elements
202  -c, --cache=FILE       specify cache file name
203                         (usually \$top_builddir/po/.intltool-merge-cache)
204  -q, --quiet            suppress most messages
205      --help             display this help and exit
206      --version          output version information and exit
207
208Report bugs to http://bugzilla.gnome.org/ (product name "$PACKAGE")
209or send email to <xml-i18n-tools\@gnome.org>.
210_EOF_
[18421]211    exit;
212}
213
214
215## Sub for printing error messages
216sub print_error
217{
[21047]218    print STDERR "Try `${PROGRAM} --help' for more information.\n";
[18421]219    exit;
220}
221
222
223sub print_message
224{
225    print "Merging translations into $OUTFILE.\n" unless $QUIET_ARG;
226}
227
228
229sub preparation
230{
231    $PO_DIR = $ARGV[0];
232    $FILE = $ARGV[1];
233    $OUTFILE = $ARGV[2];
234
235    &gather_po_files;
236    &get_translation_database;
237}
238
239# General-purpose code for looking up translations in .po files
240
241sub po_file2lang
242{
243    my ($tmp) = @_;
244    $tmp =~ s/^.*\/(.*)\.po$/$1/;
245    return $tmp;
246}
247
248sub gather_po_files
249{
250    for my $po_file (glob "$PO_DIR/*.po") {
251        $po_files_by_lang{po_file2lang($po_file)} = $po_file;
252    }
253}
254
[18687]255sub get_local_charset
256{
257    my ($encoding) = @_;
[21047]258    my $alias_file = $ENV{"G_CHARSET_ALIAS"} || "/usr/lib/charset.alias";
[18687]259
260    # seek character encoding aliases in charset.alias (glib)
261
262    if (open CHARSET_ALIAS, $alias_file)
263    {
264        while (<CHARSET_ALIAS>)
265        {
266            next if /^\#/;
267            return $1 if (/^\s*([-._a-zA-Z0-9]+)\s+$encoding\b/i)
268        }
269
270        close CHARSET_ALIAS;
271    }
272
273    # if not found, return input string
274
275    return $encoding;
276}
277
[18421]278sub get_po_encoding
279{
280    my ($in_po_file) = @_;
281    my $encoding = "";
282
283    open IN_PO_FILE, $in_po_file or die;
[18687]284    while (<IN_PO_FILE>)
285    {
[18421]286        ## example: "Content-Type: text/plain; charset=ISO-8859-1\n"
[18687]287        if (/Content-Type\:.*charset=([-a-zA-Z0-9]+)\\n/)
288        {
[18421]289            $encoding = $1;
290            last;
291        }
292    }
293    close IN_PO_FILE;
294
[18687]295    if (!$encoding)
296    {
[21047]297        print STDERR "Warning: no encoding found in $in_po_file. Assuming ISO-8859-1\n" unless $QUIET_ARG;
[18421]298        $encoding = "ISO-8859-1";
299    }
[18687]300
[21047]301    system ("$iconv -f $encoding -t UTF-8 </dev/null 2>/dev/null");
302    if ($?) {
303        $encoding = get_local_charset($encoding);
304    }
[18687]305
[18421]306    return $encoding
307}
308
309sub utf8_sanity_check
310{
[18687]311    if (!$UTF8_ARG)
312    {
313        if (!$PASS_THROUGH_ARG)
314        {
[18421]315            $PASS_THROUGH_ARG="1";
316        }
317    }
318}
319
320sub get_translation_database
321{
322    if ($cache_file) {
323        &get_cached_translation_database;
324    } else {
325        &create_translation_database;
326    }
327}
328
329sub get_newest_po_age
330{
331    my $newest_age;
332
[18687]333    foreach my $file (values %po_files_by_lang)
334    {
[18421]335        my $file_age = -M $file;
336        $newest_age = $file_age if !$newest_age || $file_age < $newest_age;
337    }
338
[21047]339    $newest_age = 0 if !$newest_age;
340
[18421]341    return $newest_age;
342}
343
344sub create_cache
345{
346    print "Generating and caching the translation database\n" unless $QUIET_ARG;
347
348    &create_translation_database;
349
350    open CACHE, ">$cache_file" || die;
351    print CACHE join "\x01", %translations;
352    close CACHE;
353}
354
355sub load_cache
356{
357    print "Found cached translation database\n" unless $QUIET_ARG;
358
359    my $contents;
360    open CACHE, "<$cache_file" || die;
361    {
362        local $/;
363        $contents = <CACHE>;
364    }
365    close CACHE;
366    %translations = split "\x01", $contents;
367}
368
369sub get_cached_translation_database
370{
371    my $cache_file_age = -M $cache_file;
[18687]372    if (defined $cache_file_age)
373    {
374        if ($cache_file_age <= &get_newest_po_age)
375        {
[18421]376            &load_cache;
377            return;
378        }
379        print "Found too-old cached translation database\n" unless $QUIET_ARG;
380    }
381
382    &create_cache;
383}
384
385sub create_translation_database
386{
[18687]387    for my $lang (keys %po_files_by_lang)
388    {
[18421]389        my $po_file = $po_files_by_lang{$lang};
390
[18687]391        if ($UTF8_ARG)
392        {
[18421]393            my $encoding = get_po_encoding ($po_file);
[18687]394
395            if (lc $encoding eq "utf-8")
396            {
[18421]397                open PO_FILE, "<$po_file";     
[18687]398            }
399            else
400            {
[21047]401                print STDERR "WARNING: $po_file is not in UTF-8 but $encoding, converting...\n" unless $QUIET_ARG;;
402
[18421]403                open PO_FILE, "$iconv -f $encoding -t UTF-8 $po_file|";
404            }
[18687]405        }
406        else
407        {
[18421]408            open PO_FILE, "<$po_file"; 
409        }
410
411        my $nextfuzzy = 0;
412        my $inmsgid = 0;
413        my $inmsgstr = 0;
414        my $msgid = "";
415        my $msgstr = "";
[18687]416
417        while (<PO_FILE>)
418        {
[18421]419            $nextfuzzy = 1 if /^#, fuzzy/;
[18687]420       
421            if (/^msgid "((\\.|[^\\])*)"/ )
422            {
[18421]423                $translations{$lang, $msgid} = $msgstr if $inmsgstr && $msgid && $msgstr;
424                $msgid = "";
425                $msgstr = "";
426
427                if ($nextfuzzy) {
428                    $inmsgid = 0;
429                } else {
430                    $msgid = unescape_po_string($1);
431                    $inmsgid = 1;
432                }
433                $inmsgstr = 0;
434                $nextfuzzy = 0;
435            }
[18687]436
437            if (/^msgstr "((\\.|[^\\])*)"/)
438            {
[18421]439                $msgstr = unescape_po_string($1);
440                $inmsgstr = 1;
441                $inmsgid = 0;
442            }
[18687]443
444            if (/^"((\\.|[^\\])*)"/)
445            {
[18421]446                $msgid .= unescape_po_string($1) if $inmsgid;
447                $msgstr .= unescape_po_string($1) if $inmsgstr;
448            }
449        }
450        $translations{$lang, $msgid} = $msgstr if $inmsgstr && $msgid && $msgstr;
451    }
452}
453
454sub finalize
455{
456}
457
458sub unescape_one_sequence
459{
460    my ($sequence) = @_;
461
462    return "\\" if $sequence eq "\\\\";
463    return "\"" if $sequence eq "\\\"";
[18687]464    return "\n" if $sequence eq "\\n";
[18421]465
466    # gettext also handles \n, \t, \b, \r, \f, \v, \a, \xxx (octal),
467    # \xXX (hex) and has a comment saying they want to handle \u and \U.
468
469    return $sequence;
470}
471
472sub unescape_po_string
473{
474    my ($string) = @_;
475
476    $string =~ s/(\\.)/unescape_one_sequence($1)/eg;
477
478    return $string;
479}
480
[18687]481## NOTE: deal with < - &lt; but not > - &gt;  because it seems its ok to have
482## > in the entity. For further info please look at #84738.
[18421]483sub entity_decode
484{
485    local ($_) = @_;
486
487    s/&apos;/'/g; # '
488    s/&quot;/"/g; # "
489    s/&amp;/&/g;
[18687]490    s/&lt;/</g;
[18421]491
492    return $_;
493}
[21047]494 
495# entity_encode: (string)
496#
497# Encode the given string to XML format (encode '<' etc). It also
498# encodes high bit if not in UTF-8 mode.
[18421]499
500sub entity_encode
501{
502    my ($pre_encoded) = @_;
503
504    my @list_of_chars = unpack ('C*', $pre_encoded);
505
[18687]506    if ($PASS_THROUGH_ARG)
507    {
[18421]508        return join ('', map (&entity_encode_int_even_high_bit, @list_of_chars));
[18687]509    }
510    else
511    {
[21047]512        # with UTF-8 we only encode minimalistic
[18421]513        return join ('', map (&entity_encode_int_minimalist, @list_of_chars));
514    }
515}
516
517sub entity_encode_int_minimalist
518{
519    return "&quot;" if $_ == 34;
520    return "&amp;" if $_ == 38;
521    return "&apos;" if $_ == 39;
[18687]522    return "&lt;" if $_ == 60;
[18421]523    return chr $_;
524}
525
526sub entity_encode_int_even_high_bit
527{
[18687]528    if ($_ > 127 || $_ == 34 || $_ == 38 || $_ == 39 || $_ == 60)
529    {
[18421]530        # the ($_ > 127) should probably be removed
531        return "&#" . $_ . ";";
[18687]532    }
533    else
534    {
[18421]535        return chr $_;
536    }
537}
538
539sub entity_encoded_translation
540{
541    my ($lang, $string) = @_;
542
543    my $translation = $translations{$lang, $string};
544    return $string if !$translation;
545    return entity_encode ($translation);
546}
547
548## XML (bonobo-activation specific) merge code
549
550sub ba_merge_translations
551{
552    my $source;
553
554    {
555       local $/; # slurp mode
556       open INPUT, "<$FILE" or die "can't open $FILE: $!";
557       $source = <INPUT>;
558       close INPUT;
559    }
560
561    open OUTPUT, ">$OUTFILE" or die "can't open $OUTFILE: $!";
562
[18687]563    while ($source =~ s|^(.*?)([ \t]*<\s*$w+\s+($w+\s*=\s*"$q"\s*)+/?>)([ \t]*\n)?||s)
564    {
[18421]565        print OUTPUT $1;
566
567        my $node = $2 . "\n";
568
569        my @strings = ();
570        $_ = $node;
571        while (s/(\s)_($w+\s*=\s*"($q)")/$1$2/s) {
572             push @strings, entity_decode($3);
573        }
574        print OUTPUT;
575
576        my %langs;
[18687]577        for my $string (@strings)
578        {
579            for my $lang (keys %po_files_by_lang)
580            {
[18421]581                $langs{$lang} = 1 if $translations{$lang, $string};
582            }
583        }
584       
[18687]585        for my $lang (sort keys %langs)
586        {
[18421]587            $_ = $node;
588            s/(\sname\s*=\s*)"($q)"/$1"$2-$lang"/s;
589            s/(\s)_($w+\s*=\s*")($q)"/$1 . $2 . entity_encoded_translation($lang, $3) . '"'/seg;
590            print OUTPUT;
591        }
592    }
593
594    print OUTPUT $source;
595
596    close OUTPUT;
597}
598
599
600## XML (non-bonobo-activation) merge code
601
[21313]602
603# Process tag attributes
604#   Only parameter is a HASH containing attributes -> values mapping
605sub getAttributeString
[18421]606{
[21313]607    my $sub = shift;
608    my $do_translate = shift || 0;
609    my $language = shift || "";
610    my $result = "";
611    foreach my $e (reverse(sort(keys %{ $sub }))) {
612        my $key    = $e;
613        my $string = $sub->{$e};
614        my $quote = '"';
615       
616        $string =~ s/^[\s]+//;
617        $string =~ s/[\s]+$//;
618       
619        if ($string =~ /^'.*'$/)
620        {
621            $quote = "'";
622        }
623        $string =~ s/^['"]//g;
624        $string =~ s/['"]$//g;
[18421]625
[21313]626        if ($do_translate && $key =~ /^_/) {
627            $key =~ s|^_||g;
628            if ($language) {
629               
630                # Handle translation
631                #
632                my $decode_string = entity_decode($string);
633                my $translation = $translations{$language, $decode_string};
634                if ($translation) {
635                    $translation = entity_encode($translation);
636                    $string = $translation;
637                }
638            }
639        }
640       
641        $result .= " $key=$quote$string$quote";
[18421]642    }
[21313]643    return $result;
644}
[18421]645
[21313]646# Returns a translatable string from XML node, it works on contents of every node in XML::Parser tree
647#   doesn't support nesting of translatable tags (i.e. <_blah>this <_doh>doesn't</_doh> work</_blah> -- besides
648#   can you define the correct semantics for this?)
649#
[18421]650
[21313]651sub getXMLstring
652{
653    my $ref = shift;
654    my @list = @{ $ref };
655    my $result = "";
[18421]656
[21313]657    my $count = scalar(@list);
658    my $attrs = $list[0];
659    my $index = 1;
660    while ($index < $count) {
661        my $type = $list[$index];
662        my $content = $list[$index+1];
663        if (! $type ) {
664            # We've got CDATA
665            if ($content) {
666                # lets strip the whitespace here, and *ONLY* here
667                $content =~ s/\s+/ /gs if (!((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?preserve["']?$/)));
668                $result .= ($content);
669            } else {
670                #print "no cdata content when expected it\n"; # is this possible, is this ok?
671                # what to do if this happens?
672                # Did I mention that I hate XML::Parser tree style?
673            }
674        } else {
675            # We've got another element
676            $result .= "<$type";
677            $result .= getAttributeString($attrs, 0); # no nested translatable elements
678            if ($content) {
679                my $subresult = getXMLstring($content);
680                if ($subresult) {
681                    $result .= ">".$subresult . "</$type>";
682                } else {
683                    $result .= "/>";
684                }
685            } else {
686                $result .= "/>";
687            }
688        }
689        $index += 2;
690    }
691    return $result;
692}
[18421]693
[21313]694sub traverse
695{
696    my $fh = shift;
697    my $nodename = shift;
698    my $content = shift;
699    my $language = shift || "";
[18421]700
[21313]701    if (!$nodename) {
702        if ($content =~ /^[\s]*$/) {
703            $leading_space .= $content;
704        }
705        print $fh $content;
706    } else {
707        # element
708        my @all = @{ $content };
709        my $attrs = shift @all;
710        my $outattr = getAttributeString($attrs, 1, $language);
711        my $translate = 0;
712
713        if ($nodename =~ /^_/) {
714            $translate = 1;
715            $nodename =~ s/^_//;
716        }
717        my $lookup = '';
718        print $fh "<$nodename$outattr";
719        if ($translate) {
720            $lookup = getXMLstring($content);
721            if (!((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?preserve["']?$/))) {
722                $lookup =~ s/^\s+//s;
723                $lookup =~ s/\s+$//s;
[21047]724            }
[18421]725
[21313]726            if ($lookup) {
727                my $translation = $translations{$language, $lookup};
728                if ($MULTIPLE_OUTPUT && $translation) {
729                    print $fh " xml:lang=\"", $language, "\"";
730                    print $fh ">", $translation, "</$nodename>";
731                    return; # this means there will be no same translation with xml:lang="$language"...
732                            # if we want them both, just remove this "return"
[21047]733                } else {
[21313]734                    print $fh ">$lookup</$nodename>";
[21047]735                }
[21313]736            } else {
737                print $fh "/>";
738            }
739           
[21047]740 
[21313]741            for my $lang (sort keys %po_files_by_lang) {
[21047]742                    if ($MULTIPLE_OUTPUT && $lang ne "$language") {
743                        next;
744                    }
[21313]745                    if ($lang) {
[21047]746
747                        # Handle translation
748                        #
[21313]749                        my $localattrs = getAttributeString($attrs, 1, $lang);
750                        my $decode_string = ($lookup); #entity_decode($lookup);
751                        my $translation = $translations{$lang, $decode_string};
[21047]752                        if ($translation) {
[21313]753                            $translation = ($translation);
754                            print $fh "\n";
755                            $leading_space =~ s/.*\n//g;
756                            print $fh $leading_space;
757                            print $fh "<", $nodename, " xml:lang=\"", $lang, "\"", $localattrs;
758                            print $fh ">", $translation , "</$nodename>";
759                        }
[21047]760                    }
[21313]761            }
[21047]762
[21313]763        } else {
764            my $count = scalar(@all);
765            if ($count > 0) {
766                print $fh ">";
767            } else {
768                print $fh "/>";
769            }
770            my $index = 0;
771            while ($index < $count) {
772                my $type = $all[$index];
773                my $rest = $all[$index+1];
774                traverse($fh, $type, $rest, $language);
775                $index += 2;
776            }
777            if ($count > 0) {
778                print $fh "</$nodename>";
779            }
780        }
[18421]781    }
[21047]782}
[18421]783
[21047]784sub intltool_tree_char
785{
786    my $expat = shift;
787    my $text  = shift;
788    my $clist = $expat->{Curlist};
789    my $pos   = $#$clist;
[18421]790
[21047]791    # Use original_string so that we retain escaped entities
792    # in CDATA sections.
793    #
794    if ($pos > 0 and $clist->[$pos - 1] eq '0') {
795        $clist->[$pos] .= $expat->original_string();
796    } else {
797        push @$clist, 0 => $expat->original_string();
798    }
799}
800
801sub intltool_tree_start
802{
803    my $expat    = shift;
804    my $tag      = shift;
805    my @origlist = ();
806
807    # Use original_string so that we retain escaped entities
808    # in attribute values.  We must convert the string to an
809    # @origlist array to conform to the structure of the Tree
810    # Style.
811    #
812    my @original_array = split /\x/, $expat->original_string();
813    my $source         = $expat->original_string();
814
815    # Remove leading tag.
816    #
817    $source =~ s|^\s*<\s*(\S+)||s;
818
819    # Grab attribute key/value pairs and push onto @origlist array.
820    #
821    while ($source)
822    {
823       if ($source =~ /^\s*([\w:-]+)\s*[=]\s*["]/)
824       {
825           $source =~ s|^\s*([\w:-]+)\s*[=]\s*["]([^"]*)["]||s;
826           push @origlist, $1;
827           push @origlist, '"' . $2 . '"';
828       }
829       elsif ($source =~ /^\s*([\w:-]+)\s*[=]\s*[']/)
830       {
831           $source =~ s|^\s*([\w:-]+)\s*[=]\s*[']([^']*)[']||s;
832           push @origlist, $1;
833           push @origlist, "'" . $2 . "'";
834       }
835       else
836       {
837           last;
838       }
839    }
840
841    my $ol = [ { @origlist } ];
842
843    push @{ $expat->{Lists} }, $expat->{Curlist};
844    push @{ $expat->{Curlist} }, $tag => $ol;
845    $expat->{Curlist} = $ol;
846}
847
848sub readXml
849{
850    my $filename = shift || return;
851    if(!-f $filename) {
852        die "ERROR Cannot find filename: $filename\n";
853    }
854
855    my $ret = eval 'require XML::Parser';
856    if(!$ret) {
857        die "You must have XML::Parser installed to run $0\n\n";
858    }
859    my $xp = new XML::Parser(Style => 'Tree');
860    $xp->setHandlers(Char => \&intltool_tree_char);
861    $xp->setHandlers(Start => \&intltool_tree_start);
862    my $tree = $xp->parsefile($filename);
863
864# <foo><head id="a">Hello <em>there</em></head><bar>Howdy<ref/></bar>do</foo>
865# would be:
866# [foo, [{}, head, [{id => "a"}, 0, "Hello ",  em, [{}, 0, "there"]], bar, [{},
867# 0, "Howdy",  ref, [{}]], 0, "do" ] ]
868
869    return $tree;
870}
871
872sub print_header
873{
874    my $infile = shift;
875    my $fh = shift;
876    my $source;
877
878    if(!-f $infile) {
879        die "ERROR Cannot find filename: $infile\n";
880    }
881
882    print $fh qq{<?xml version="1.0" encoding="UTF-8"?>\n};
883    {
884        local $/;
885        open DOCINPUT, "<${FILE}" or die;
886        $source = <DOCINPUT>;
887        close DOCINPUT;
888    }
889    if ($source =~ /(<!DOCTYPE.*\[.*\]\s*>)/s)
890    {
891        print $fh "$1\n";
892    }
893    elsif ($source =~ /(<!DOCTYPE[^>]*>)/s)
894    {
895        print $fh "$1\n";
896    }
897}
898
[21313]899sub parseTree
900{
901    my $fh        = shift;
902    my $ref       = shift;
903    my $language  = shift || "";
904
905    my $name = shift @{ $ref };
906    my $cont = shift @{ $ref };
907    traverse($fh, $name, $cont, $language);
908}
909
[21047]910sub xml_merge_output
911{
912    my $source;
913
914    if ($MULTIPLE_OUTPUT) {
915        for my $lang (sort keys %po_files_by_lang) {
916            if ( ! -e $lang ) {
917                mkdir $lang or die "Cannot create subdirectory $lang: $!\n";
918            }
919            open OUTPUT, ">$lang/$OUTFILE" or die "Cannot open $lang/$OUTFILE: $!\n";
920            my $tree = readXml($FILE);
921            print_header($FILE, \*OUTPUT);
[21313]922            parseTree(\*OUTPUT, $tree, $lang);
[21047]923            close OUTPUT;
924            print "CREATED $lang/$OUTFILE\n" unless $QUIET_ARG;
925        }
926    }
927    open OUTPUT, ">$OUTFILE" or die "Cannot open $OUTFILE: $!\n";
928    my $tree = readXml($FILE);
929    print_header($FILE, \*OUTPUT);
[21313]930    parseTree(\*OUTPUT, $tree);
[18421]931    close OUTPUT;
[21047]932    print "CREATED $OUTFILE\n" unless $QUIET_ARG;
[18421]933}
934
935sub keys_merge_translations
936{
937    open INPUT, "<${FILE}" or die;
938    open OUTPUT, ">${OUTFILE}" or die;
939
[18687]940    while (<INPUT>)
941    {
942        if (s/^(\s*)_(\w+=(.*))/$1$2/) 
943        {
[18421]944            my $string = $3;
945
946            print OUTPUT;
947
948            my $non_translated_line = $_;
949
[18687]950            for my $lang (sort keys %po_files_by_lang)
951            {
[18421]952                my $translation = $translations{$lang, $string};
953                next if !$translation;
954
955                $_ = $non_translated_line;
956                s/(\w+)=.*/[$lang]$1=$translation/;
957                print OUTPUT;
958            }
[18687]959        }
960        else
961        {
[18421]962            print OUTPUT;
963        }
964    }
965
966    close OUTPUT;
967    close INPUT;
968}
969
970sub desktop_merge_translations
971{
972    open INPUT, "<${FILE}" or die;
973    open OUTPUT, ">${OUTFILE}" or die;
974
[18687]975    while (<INPUT>)
976    {
977        if (s/^(\s*)_(\w+=(.*))/$1$2/) 
978        {
[18421]979            my $string = $3;
980
981            print OUTPUT;
982
983            my $non_translated_line = $_;
984
[18687]985            for my $lang (sort keys %po_files_by_lang)
986            {
[18421]987                my $translation = $translations{$lang, $string};
988                next if !$translation;
989
990                $_ = $non_translated_line;
991                s/(\w+)=.*/${1}[$lang]=$translation/;
992                print OUTPUT;
993            }
[18687]994        }
995        else
996        {
[18421]997            print OUTPUT;
998        }
999    }
1000
1001    close OUTPUT;
1002    close INPUT;
1003}
1004
1005sub schemas_merge_translations
1006{
1007    my $source;
1008
1009    {
1010       local $/; # slurp mode
1011       open INPUT, "<$FILE" or die "can't open $FILE: $!";
1012       $source = <INPUT>;
1013       close INPUT;
1014    }
1015
1016    open OUTPUT, ">$OUTFILE" or die;
1017
1018    # FIXME: support attribute translations
1019
1020    # Empty nodes never need translation, so unmark all of them.
1021    # For example, <_foo/> is just replaced by <foo/>.
1022    $source =~ s|<\s*_($w+)\s*/>|<$1/>|g;
1023
[18687]1024    while ($source =~ s/
1025                        (.*?)
1026                        (\s+)(<locale\ name="C">(\s*)
[21313]1027                            (<default>\s*(?:<!--[^>]*?-->\s*)?(.*?)\s*<\/default>)?(\s*)
1028                            (<short>\s*(?:<!--[^>]*?-->\s*)?(.*?)\s*<\/short>)?(\s*)
1029                            (<long>\s*(?:<!--[^>]*?-->\s*)?(.*?)\s*<\/long>)?(\s*)
[18687]1030                        <\/locale>)
1031                       //sx)
1032    {
[18421]1033        print OUTPUT $1;
1034
[18687]1035        my $locale_start_spaces = $2 ? $2 : '';
1036        my $default_spaces = $4 ? $4 : '';
1037        my $short_spaces = $7 ? $7 : '';
1038        my $long_spaces = $10 ? $10 : '';
1039        my $locale_end_spaces = $13 ? $13 : '';
1040        my $c_default_block = $3 ? $3 : '';
1041        my $default_string = $6 ? $6 : '';
1042        my $short_string = $9 ? $9 : '';
1043        my $long_string = $12 ? $12 : '';
[18421]1044
[18687]1045        print OUTPUT "$locale_start_spaces$c_default_block";
[18421]1046
[18687]1047        $default_string =~ s/\s+/ /g;
1048        $default_string = entity_decode($default_string);
[18421]1049        $short_string =~ s/\s+/ /g;
1050        $short_string = entity_decode($short_string);
1051        $long_string =~ s/\s+/ /g;
1052        $long_string = entity_decode($long_string);
1053
[18687]1054        for my $lang (sort keys %po_files_by_lang)
1055        {
1056            my $default_translation = $translations{$lang, $default_string};
[18421]1057            my $short_translation = $translations{$lang, $short_string};
1058            my $long_translation  = $translations{$lang, $long_string};
1059
[18687]1060            next if (!$default_translation && !$short_translation &&
1061                     !$long_translation);
[18421]1062
1063            print OUTPUT "\n$locale_start_spaces<locale name=\"$lang\">";
1064
[18687]1065        print OUTPUT "$default_spaces";   
1066
1067        if ($default_translation)
1068        {
1069            $default_translation = entity_encode($default_translation);
1070            print OUTPUT "<default>$default_translation</default>";
1071        }
1072
1073            print OUTPUT "$short_spaces";
1074
[18421]1075            if ($short_translation)
1076            {
[18687]1077                        $short_translation = entity_encode($short_translation);
1078                        print OUTPUT "<short>$short_translation</short>";
[18421]1079            }
1080
[18687]1081            print OUTPUT "$long_spaces";
1082
[18421]1083            if ($long_translation)
1084            {
[18687]1085                        $long_translation = entity_encode($long_translation);
1086                        print OUTPUT "<long>$long_translation</long>";
[18421]1087            }       
1088
1089            print OUTPUT "$locale_end_spaces</locale>";
1090        }
1091    }
1092
1093    print OUTPUT $source;
1094
1095    close OUTPUT;
1096}
[18687]1097
1098sub rfc822deb_merge_translations
1099{
[21047]1100    my %encodings = ();
1101    for my $lang (keys %po_files_by_lang) {
1102        $encodings{$lang} = ($UTF8_ARG ? 'UTF-8' : get_po_encoding($po_files_by_lang{$lang}));
1103    }
1104
[18687]1105    my $source;
1106
1107    $Text::Wrap::huge = 'overflow';
[21047]1108    $Text::Wrap::break = qr/\n|\s(?=\S)/;
[18687]1109
1110    {
1111       local $/; # slurp mode
1112       open INPUT, "<$FILE" or die "can't open $FILE: $!";
1113       $source = <INPUT>;
1114       close INPUT;
1115    }
1116
1117    open OUTPUT, ">${OUTFILE}" or die;
1118
[21047]1119    while ($source =~ /(^|\n+)(_*)([^:\s]+)(:[ \t]*)(.*?)(?=\n[\S\n]|$)/sg)
[18687]1120    {
1121            my $sep = $1;
1122            my $non_translated_line = $3.$4;
1123            my $string = $5;
[21047]1124            my $underscore = length($2);
1125            next if $underscore eq 0 && $non_translated_line =~ /^#/;
[18687]1126            #  Remove [] dummy strings
[21047]1127            my $stripped = $string;
1128            $stripped =~ s/\[\s[^\[\]]*\],/,/g if $underscore eq 2;
1129            $stripped =~ s/\[\s[^\[\]]*\]$//;
1130            $non_translated_line .= $stripped;
[18687]1131
[21047]1132            print OUTPUT $sep.$non_translated_line;
[18687]1133   
[21047]1134            if ($underscore)
1135            {
1136                my @str_list = rfc822deb_split($underscore, $string);
1137
[18687]1138                for my $lang (sort keys %po_files_by_lang)
1139                {
1140                    my $is_translated = 1;
1141                    my $str_translated = '';
1142                    my $first = 1;
1143               
1144                    for my $str (@str_list)
1145                    {
1146                        my $translation = $translations{$lang, $str};
1147                   
1148                        if (!$translation)
1149                        {
1150                            $is_translated = 0;
1151                            last;
1152                        }
1153
1154                        #  $translation may also contain [] dummy
1155                        #  strings, mostly to indicate an empty string
1156                        $translation =~ s/\[\s[^\[\]]*\]$//;
1157                       
1158                        if ($first)
1159                        {
[21047]1160                            if ($underscore eq 2)
1161                            {
1162                                $str_translated .= $translation;
1163                            }
1164                            else
1165                            {
1166                                $str_translated .=
1167                                    Text::Tabs::expand($translation) .
1168                                    "\n";
1169                            }
[18687]1170                        }
1171                        else
1172                        {
[21047]1173                            if ($underscore eq 2)
1174                            {
1175                                $str_translated .= ', ' . $translation;
1176                            }
1177                            else
1178                            {
1179                                $str_translated .= Text::Tabs::expand(
1180                                    Text::Wrap::wrap(' ', ' ', $translation)) .
1181                                    "\n .\n";
1182                            }
[18687]1183                        }
1184                        $first = 0;
1185
1186                        #  To fix some problems with Text::Wrap::wrap
1187                        $str_translated =~ s/(\n )+\n/\n .\n/g;
1188                    }
1189                    next unless $is_translated;
1190
1191                    $str_translated =~ s/\n \.\n$//;
1192                    $str_translated =~ s/\s+$//;
1193
1194                    $_ = $non_translated_line;
[21047]1195                    s/^(\w+):\s*.*/$sep${1}-$lang.$encodings{$lang}: $str_translated/s;
[18687]1196                    print OUTPUT;
1197                }
[21047]1198            }
[18687]1199    }
1200    print OUTPUT "\n";
1201
1202    close OUTPUT;
1203    close INPUT;
1204}
1205
1206sub rfc822deb_split
1207{
1208    # Debian defines a special way to deal with rfc822-style files:
1209    # when a value contain newlines, it consists of
1210    #   1.  a short form (first line)
1211    #   2.  a long description, all lines begin with a space,
1212    #       and paragraphs are separated by a single dot on a line
1213    # This routine returns an array of all paragraphs, and reformat
1214    # them.
[21047]1215    # When first argument is 2, the string is a comma separated list of
1216    # values.
1217    my $type = shift;
[18687]1218    my $text = shift;
[21047]1219    $text =~ s/^[ \t]//mg;
1220    return (split(/, */, $text, 0)) if $type ne 1;
[18687]1221    return ($text) if $text !~ /\n/;
1222
1223    $text =~ s/([^\n]*)\n//;
1224    my @list = ($1);
1225    my $str = '';
1226
1227    for my $line (split (/\n/, $text))
1228    {
1229        chomp $line;
[21047]1230        if ($line =~ /^\.\s*$/)
[18687]1231        {
1232            #  New paragraph
1233            $str =~ s/\s*$//;
1234            push(@list, $str);
1235            $str = '';
1236        }
1237        elsif ($line =~ /^\s/)
1238        {
1239            #  Line which must not be reformatted
1240            $str .= "\n" if length ($str) && $str !~ /\n$/;
[21047]1241            $line =~ s/\s+$//;
[18687]1242            $str .= $line."\n";
1243        }
1244        else
1245        {
1246            #  Continuation line, remove newline
[21047]1247            $str .= " " if length ($str) && $str !~ /\n$/;
[18687]1248            $str .= $line;
1249        }
1250    }
1251
1252    $str =~ s/\s*$//;
1253    push(@list, $str) if length ($str);
1254
1255    return @list;
1256}
1257
Note: See TracBrowser for help on using the repository browser.