source: trunk/third/gal/xml-i18n-extract.in @ 19185

Revision 19185, 9.8 KB checked in by ghudson, 21 years ago (diff)
This commit was generated by cvs2svn to compensate for changes in r19184, which included commits to RCS files with non-trunk default branches.
RevLine 
[18132]1#!@INTLTOOL_PERL@ -w
[16763]2# -*- Mode: perl; indent-tabs-mode: nil; c-basic-offset: 4  -*-
3
4#
[18132]5#  The Intltool Message Extractor
[16763]6#
[18132]7#  Copyright (C) 2000-2001 Free Software Foundation.
[16763]8#
[18132]9#  Intltool is free software; you can redistribute it and/or
[16763]10#  modify it under the terms of the GNU General Public License as
11#  published by the Free Software Foundation; either version 2 of the
12#  License, or (at your option) any later version.
13#
[18132]14#  Intltool is distributed in the hope that it will be useful,
[16763]15#  but WITHOUT ANY WARRANTY; without even the implied warranty of
16#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17#  General Public License for more details.
18#
19#  You should have received a copy of the GNU General Public License
[18132]20#  along with this program; if not, write to the Free Software
[16763]21#  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22#
[18132]23#  As a special exception to the GNU General Public License, if you
24#  distribute this file as part of a program that contains a
25#  configuration script generated by Autoconf, you may include it under
26#  the same distribution terms that you use for the rest of that program.
27#
[16763]28#  Authors: Kenneth Christiansen <kenneth@gnu.org>
[18132]29#           Darin Adler <darin@bentspoon.com>
[16763]30#
31
32## Release information
[18132]33my $PROGRAM      = "intltool-extract";
34my $PACKAGE      = "intltool";
[19184]35my $VERSION      = "0.25";
[16763]36
37## Loaded modules
38use strict;
39use File::Basename;
40use Getopt::Long;
41
42## Scalars used by the option stuff
43my $TYPE_ARG    = "0";
44my $LOCAL_ARG   = "0";
45my $HELP_ARG    = "0";
46my $VERSION_ARG = "0";
47my $UPDATE_ARG  = "0";
48my $QUIET_ARG   = "0";
49
50my $FILE;
51my $OUTFILE;
52
53my $gettext_type = "";
54my $input;
55my %messages = ();
56
[18132]57## Use this instead of \w for XML files to handle more possible characters.
58my $w = "[-A-Za-z0-9._:]";
59
[16763]60## Always print first
61$| = 1;
62
63## Handle options
64GetOptions (
65            "type=s"     => \$TYPE_ARG,
66            "local|l"    => \$LOCAL_ARG,
[18132]67            "help|h"     => \$HELP_ARG,
[16763]68            "version|v"  => \$VERSION_ARG,
69            "update"     => \$UPDATE_ARG,
70            "quiet|q"    => \$QUIET_ARG,
71            ) or &error;
72
73&split_on_argument;
74
75
76## Check for options.
77## This section will check for the different options.
78
79sub split_on_argument {
80
81    if ($VERSION_ARG) {
82        &version;
83
84    } elsif ($HELP_ARG) {
85        &help;
86       
87    } elsif ($LOCAL_ARG) {
88        &place_local;
89        &extract;
90
91    } elsif ($UPDATE_ARG) {
92        &place_normal;
93        &extract;
94
95    } elsif (@ARGV > 0) {
96        &place_normal;
97        &message;
98        &extract;
99
100    } else {
101        &help;
102
103    } 
104}   
105
106sub place_normal {
107    $FILE        = $ARGV[0];
108    $OUTFILE     = "$FILE.h";
109}   
110
111sub place_local {
112    $OUTFILE     = fileparse($FILE, ());
113    if (!-e "tmp/") {
114        system("mkdir tmp/");
115    }
116    $OUTFILE     = "./tmp/$OUTFILE.h"
117}
118
119sub determine_type {
120   if ($TYPE_ARG =~ /^gettext\/(.*)/) {
121        $gettext_type=$1
122   }
123}
124
125## Sub for printing release information
126sub version{
127    print "${PROGRAM} (${PACKAGE}) $VERSION\n";
128    print "Copyright (C) 2000 Free Software Foundation, Inc.\n";
129    print "Written by Kenneth Christiansen, 2000.\n\n";
130    print "This is free software; see the source for copying conditions. There is NO\n";
131    print "warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n";
132    exit;
133}
134
135## Sub for printing usage information
136sub help{
137    print "Usage: ${PROGRAM} [FILENAME] [OPTIONS] ...\n";
138    print "Generates a header file from an xml source file.\n\nGrabs all strings ";
139    print "between <_translatable_node> and it's end tag,\nwhere tag are all allowed ";
140    print "xml tags. Read the docs for more info.\n\n";
141    print "  -v, --version                shows the version\n";
142    print "  -h, --help                   shows this help page\n";
143    print "  -q, --quiet                  quiet mode\n";
144    print "\nReport bugs to <kenneth\@gnu.org>.\n";
145    exit;
146}
147
148## Sub for printing error messages
149sub error{
150    print "Try `${PROGRAM} --help' for more information.\n";
151    exit;
152}
153
154sub message {
155    print "Generating C format header file for translation.\n";
156}
157
158sub extract {
159    &determine_type;
160
161    &convert ($FILE);
162
163    open OUT, ">$OUTFILE";
164    &msg_write;
165    close OUT;
166
167    print "Wrote $OUTFILE\n" unless $QUIET_ARG;
168}
169
170sub convert($) {
171
172    ## Reading the file
173    {
174        local (*IN);
175        local $/; #slurp mode
176        open (IN, "<$FILE") || die "can't open $FILE: $!";
177        $input = <IN>;
178    }
179
[18132]180    &type_ini if $gettext_type eq "ini";
181    &type_keys if $gettext_type eq "keys";
182    &type_xml if $gettext_type eq "xml";
183    &type_glade if $gettext_type eq "glade";
184    &type_scheme if $gettext_type eq "scheme";
185    &type_schemas  if $gettext_type eq "schemas";
[19184]186    &type_rfc822deb  if $gettext_type eq "rfc822deb";
[16763]187}
188
[18132]189sub entity_decode_minimal
190{
191    local ($_) = @_;
[16763]192
[18132]193    s/&apos;/'/g; # '
194    s/&quot;/"/g; # "
195    s/&amp;/&/g;
[16763]196
[18132]197    return $_;
[16763]198}
199
[18132]200sub entity_decode
201{
202    local ($_) = @_;
[16763]203
[18132]204    s/&apos;/'/g; # '
205    s/&quot;/"/g; # "
206    s/&amp;/&/g;
207    s/&lt;/</g;
208    s/&gt;/>/g;
209
210    return $_;
[16763]211}
212
[18132]213sub escape_char
214{
215    return '\"' if $_ eq '"';
216    return '\n' if $_ eq "\n";
217    return '\\' if $_ eq '\\';
[16763]218
[18132]219    return $_;
220}
[16763]221
[18132]222sub escape
223{
224    my ($string) = @_;
225    return join "", map &escape_char, split //, $string;
226}
[16763]227
[18132]228sub type_ini {
229    ### For generic translatable desktop files ###
230    while ($input =~ /^_.*=(.*)$/mg) {
231        $messages{$1} = [];
232    }
233}
[16763]234
[18132]235sub type_keys {
236    ### For generic translatable mime/keys files ###
237    while ($input =~ /^\s*_\w+=(.*)$/mg) {
238        $messages{$1} = [];
[16763]239    }
240}
241
[18132]242sub type_xml {
243    ### For generic translatable XML files ###
[16763]244       
[18132]245    while ($input =~ /\s_$w+=\"([^"]+)\"/sg) { # "
246        $messages{entity_decode_minimal($1)} = [];
247    }
[16763]248
[19184]249    while ($input =~ /<_($w+)(?: xml:space="($w+)")?>(.+?)<\/_\1>/sg) {
250        $_ = $3;
251        if (!defined($2) || $2 ne "preserve") {
252            s/\s+/ /g;
253            s/^ //;
254            s/ $//;
255        }
[18132]256        $messages{entity_decode_minimal($_)} = [];
257    }
258}
[16763]259
[18132]260sub type_schemas {
261    ### For schemas XML files ###
262         
263    # FIXME: We should handle escaped < (less than)
[19184]264    while ($input =~ /
265                      <locale\ name="C">\s*
266                          (<default>\s*(.*?)\s*<\/default>\s*)?
267                          (<short>\s*(.*?)\s*<\/short>\s*)?
268                          (<long>\s*(.*?)\s*<\/long>\s*)?
269                      <\/locale>
270                     /sgx) {
271        my @totranslate = ($2,$4,$6);
272        foreach (@totranslate) {
273            next if !$_;
[18132]274        s/\s+/ /g;
275        $messages{entity_decode_minimal($_)} = [];
[19184]276        }
[18132]277    }
278}
[16763]279
[19184]280sub type_rfc822deb {
281    ### For rfc822-style Debian configuration files ###
282
283    while ($input =~ /(?:^|\n)_[^:]+:\s*(.*?)(?=\n\S|$)/sg) {
284        my @str_list = rfc822deb_split($1);
285        for my $str (@str_list) {
286            #   As rfc822deb is for configuration files, duplicates
287            #   should never happen.  Developers must use the
288            #   [] construct to make msgid unique, see also intltool-merge
289            print STDERR "Warning: msgid multiply defined:\n  $str\n"
290                if defined($messages{$str});
291            $messages{$str} = [];
292        }
293    }
294}
295
296sub rfc822deb_split {
297    # Debian defines a special way to deal with rfc822-style files:
298    # when a value contain newlines, it consists of
299    #   1.  a short form (first line)
300    #   2.  a long description, all lines begin with a space,
301    #       and paragraphs are separated by a single dot on a line
302    # This routine returns an array of all paragraphs, and reformat
303    # them.
304    my $text = shift;
305    $text =~ s/^ //mg;
306    return ($text) if $text !~ /\n/;
307
308    $text =~ s/([^\n]*)\n//;
309    my @list = ($1);
310    my $str = '';
311    for my $line (split (/\n/, $text)) {
312        chomp $line;
313        $line =~ /\s+$/;
314        if ($line =~ /^\.$/) {
315            #  New paragraph
316            $str =~ s/\s*$//;
317            push(@list, $str);
318            $str = '';
319        } elsif ($line =~ /^\s/) {
320            #  Line which must not be reformatted
321            $str .= "\n" if length ($str) && $str !~ /\n$/;
322            $str .= $line."\n";
323        } else {
324            #  Continuation line, remove newline
325            $str .= " " if length ($str) && $str !~ /[\n ]$/;
326            $str .= $line;
327        }
328    }
329    $str =~ s/\s*$//;
330    push(@list, $str) if length ($str);
331    return @list;
332}
333
[18132]334sub type_glade {
335    ### For translatable Glade XML files ###
[16763]336
[18132]337    my $tags = "label|title|text|format|copyright|comments|preview_text|tooltip|message";
[16763]338
[18132]339    while ($input =~ /<($tags)>([^<]+)<\/($tags)>/sg) {
340        # Glade sometimes uses tags that normally mark translatable things for
341        # little bits of non-translatable content. We work around this by not
342        # translating strings that only includes something like label4 or window1.
343        $messages{entity_decode($2)} = [] unless $2 =~ /^(window|label)[0-9]+$/;
344    }
345   
346    while ($input =~ /<items>(..[^<]*)<\/items>/sg) {
347        for my $item (split (/\n/, $1)) {
348            $messages{entity_decode($item)} = [];
[16763]349        }
350    }
351
[18132]352    ## handle new glade files
353    while ($input =~ /<(property|atkproperty)\s+[^>]*translatable\s*=\s*"yes"[^>]*>([^<]+)<\/\1>/sg) {
354        $messages{entity_decode($2)} = [] unless $2 =~ /^(window|label)[0-9]+$/;
355    }
356    while ($input =~ /<atkaction\s+action_name="([^>]*)"\s+description="([^>]+)"\/>/sg) {
357        $messages{entity_decode_minimal($2)} = [];
358    }
[16763]359}
360
[18132]361sub type_scheme {
362    while ($input =~ /_\(?"((?:[^"\\]+|\\.)*)"\)?/sg) {
363        $messages{$1} = [];
364    }
365}
366
[16763]367sub msg_write {
[18132]368    for my $message (sort keys %messages) {
[16763]369        print OUT "/* xgettext:no-c-format */\n" if $message =~ /%/;
370       
[19184]371        my @lines = split (/\n/, $message, -1);
[16763]372        for (my $n = 0; $n < @lines; $n++) {
373            if ($n == 0) {
374                print OUT "char *s = N_(\"";
375            } else { 
376                print OUT "             \"";
377            }
378
[18132]379            print OUT escape($lines[$n]);
[16763]380
381            if ($n < @lines - 1) {
382                print OUT "\\n\"\n";
383            } else {
384                print OUT "\");\n"; 
385            }
386        }
387    }
388}
389
Note: See TracBrowser for help on using the repository browser.