source: trunk/third/gal/xml-i18n-extract.in @ 18133

Revision 18133, 7.6 KB checked in by ghudson, 22 years ago (diff)
This commit was generated by cvs2svn to compensate for changes in r18132, which included commits to RCS files with non-trunk default branches.
Line 
1#!@INTLTOOL_PERL@ -w
2# -*- Mode: perl; indent-tabs-mode: nil; c-basic-offset: 4  -*-
3
4#
5#  The Intltool Message Extractor
6#
7#  Copyright (C) 2000-2001 Free Software Foundation.
8#
9#  Intltool is free software; you can redistribute it and/or
10#  modify it under the terms of the GNU General Public License as
11#  published by the Free Software Foundation; either version 2 of the
12#  License, or (at your option) any later version.
13#
14#  Intltool is distributed in the hope that it will be useful,
15#  but WITHOUT ANY WARRANTY; without even the implied warranty of
16#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17#  General Public License for more details.
18#
19#  You should have received a copy of the GNU General Public License
20#  along with this program; if not, write to the Free Software
21#  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22#
23#  As a special exception to the GNU General Public License, if you
24#  distribute this file as part of a program that contains a
25#  configuration script generated by Autoconf, you may include it under
26#  the same distribution terms that you use for the rest of that program.
27#
28#  Authors: Kenneth Christiansen <kenneth@gnu.org>
29#           Darin Adler <darin@bentspoon.com>
30#
31
32## Release information
33my $PROGRAM      = "intltool-extract";
34my $PACKAGE      = "intltool";
35my $VERSION      = "0.22";
36
37## Loaded modules
38use strict;
39use File::Basename;
40use Getopt::Long;
41
42## Scalars used by the option stuff
43my $TYPE_ARG    = "0";
44my $LOCAL_ARG   = "0";
45my $HELP_ARG    = "0";
46my $VERSION_ARG = "0";
47my $UPDATE_ARG  = "0";
48my $QUIET_ARG   = "0";
49
50my $FILE;
51my $OUTFILE;
52
53my $gettext_type = "";
54my $input;
55my %messages = ();
56
57## Use this instead of \w for XML files to handle more possible characters.
58my $w = "[-A-Za-z0-9._:]";
59
60## Always print first
61$| = 1;
62
63## Handle options
64GetOptions (
65            "type=s"     => \$TYPE_ARG,
66            "local|l"    => \$LOCAL_ARG,
67            "help|h"     => \$HELP_ARG,
68            "version|v"  => \$VERSION_ARG,
69            "update"     => \$UPDATE_ARG,
70            "quiet|q"    => \$QUIET_ARG,
71            ) or &error;
72
73&split_on_argument;
74
75
76## Check for options.
77## This section will check for the different options.
78
79sub split_on_argument {
80
81    if ($VERSION_ARG) {
82        &version;
83
84    } elsif ($HELP_ARG) {
85        &help;
86       
87    } elsif ($LOCAL_ARG) {
88        &place_local;
89        &extract;
90
91    } elsif ($UPDATE_ARG) {
92        &place_normal;
93        &extract;
94
95    } elsif (@ARGV > 0) {
96        &place_normal;
97        &message;
98        &extract;
99
100    } else {
101        &help;
102
103    } 
104}   
105
106sub place_normal {
107    $FILE        = $ARGV[0];
108    $OUTFILE     = "$FILE.h";
109}   
110
111sub place_local {
112    $OUTFILE     = fileparse($FILE, ());
113    if (!-e "tmp/") {
114        system("mkdir tmp/");
115    }
116    $OUTFILE     = "./tmp/$OUTFILE.h"
117}
118
119sub determine_type {
120   if ($TYPE_ARG =~ /^gettext\/(.*)/) {
121        $gettext_type=$1
122   }
123}
124
125## Sub for printing release information
126sub version{
127    print "${PROGRAM} (${PACKAGE}) $VERSION\n";
128    print "Copyright (C) 2000 Free Software Foundation, Inc.\n";
129    print "Written by Kenneth Christiansen, 2000.\n\n";
130    print "This is free software; see the source for copying conditions. There is NO\n";
131    print "warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n";
132    exit;
133}
134
135## Sub for printing usage information
136sub help{
137    print "Usage: ${PROGRAM} [FILENAME] [OPTIONS] ...\n";
138    print "Generates a header file from an xml source file.\n\nGrabs all strings ";
139    print "between <_translatable_node> and it's end tag,\nwhere tag are all allowed ";
140    print "xml tags. Read the docs for more info.\n\n";
141    print "  -v, --version                shows the version\n";
142    print "  -h, --help                   shows this help page\n";
143    print "  -q, --quiet                  quiet mode\n";
144    print "\nReport bugs to <kenneth\@gnu.org>.\n";
145    exit;
146}
147
148## Sub for printing error messages
149sub error{
150    print "Try `${PROGRAM} --help' for more information.\n";
151    exit;
152}
153
154sub message {
155    print "Generating C format header file for translation.\n";
156}
157
158sub extract {
159    &determine_type;
160
161    &convert ($FILE);
162
163    open OUT, ">$OUTFILE";
164    &msg_write;
165    close OUT;
166
167    print "Wrote $OUTFILE\n" unless $QUIET_ARG;
168}
169
170sub convert($) {
171
172    ## Reading the file
173    {
174        local (*IN);
175        local $/; #slurp mode
176        open (IN, "<$FILE") || die "can't open $FILE: $!";
177        $input = <IN>;
178    }
179
180    &type_ini if $gettext_type eq "ini";
181    &type_keys if $gettext_type eq "keys";
182    &type_xml if $gettext_type eq "xml";
183    &type_glade if $gettext_type eq "glade";
184    &type_scheme if $gettext_type eq "scheme";
185    &type_schemas  if $gettext_type eq "schemas";
186}
187
188sub entity_decode_minimal
189{
190    local ($_) = @_;
191
192    s/&apos;/'/g; # '
193    s/&quot;/"/g; # "
194    s/&amp;/&/g;
195
196    return $_;
197}
198
199sub entity_decode
200{
201    local ($_) = @_;
202
203    s/&apos;/'/g; # '
204    s/&quot;/"/g; # "
205    s/&amp;/&/g;
206    s/&lt;/</g;
207    s/&gt;/>/g;
208
209    return $_;
210}
211
212sub escape_char
213{
214    return '\"' if $_ eq '"';
215    return '\n' if $_ eq "\n";
216    return '\\' if $_ eq '\\';
217
218    return $_;
219}
220
221sub escape
222{
223    my ($string) = @_;
224    return join "", map &escape_char, split //, $string;
225}
226
227sub type_ini {
228    ### For generic translatable desktop files ###
229    while ($input =~ /^_.*=(.*)$/mg) {
230        $messages{$1} = [];
231    }
232}
233
234sub type_keys {
235    ### For generic translatable mime/keys files ###
236    while ($input =~ /^\s*_\w+=(.*)$/mg) {
237        $messages{$1} = [];
238    }
239}
240
241sub type_xml {
242    ### For generic translatable XML files ###
243       
244    while ($input =~ /\s_$w+=\"([^"]+)\"/sg) { # "
245        $messages{entity_decode_minimal($1)} = [];
246    }
247
248    while ($input =~ /<_($w+)>(.+?)<\/_\1>/sg) {
249        $_ = $2;
250        s/\s+/ /g;
251        s/^ //;
252        s/ $//;
253        $messages{entity_decode_minimal($_)} = [];
254    }
255}
256
257sub type_schemas {
258    ### For schemas XML files ###
259         
260    # FIXME: We should handle escaped < (less than)
261    while ($input =~ /<(short|long)>([^<]+)<\/\1>/sg) {
262        $_ = $2;
263        s/\s+/ /g;
264        s/^ //;
265        s/ $//;
266        $messages{entity_decode_minimal($_)} = [];
267    }
268}
269
270sub type_glade {
271    ### For translatable Glade XML files ###
272
273    my $tags = "label|title|text|format|copyright|comments|preview_text|tooltip|message";
274
275    while ($input =~ /<($tags)>([^<]+)<\/($tags)>/sg) {
276        # Glade sometimes uses tags that normally mark translatable things for
277        # little bits of non-translatable content. We work around this by not
278        # translating strings that only includes something like label4 or window1.
279        $messages{entity_decode($2)} = [] unless $2 =~ /^(window|label)[0-9]+$/;
280    }
281   
282    while ($input =~ /<items>(..[^<]*)<\/items>/sg) {
283        for my $item (split (/\n/, $1)) {
284            $messages{entity_decode($item)} = [];
285        }
286    }
287
288    ## handle new glade files
289    while ($input =~ /<(property|atkproperty)\s+[^>]*translatable\s*=\s*"yes"[^>]*>([^<]+)<\/\1>/sg) {
290        $messages{entity_decode($2)} = [] unless $2 =~ /^(window|label)[0-9]+$/;
291    }
292    while ($input =~ /<atkaction\s+action_name="([^>]*)"\s+description="([^>]+)"\/>/sg) {
293        $messages{entity_decode_minimal($2)} = [];
294    }
295}
296
297sub type_scheme {
298    while ($input =~ /_\(?"((?:[^"\\]+|\\.)*)"\)?/sg) {
299        $messages{$1} = [];
300    }
301}
302
303sub msg_write {
304    for my $message (sort keys %messages) {
305        print OUT "/* xgettext:no-c-format */\n" if $message =~ /%/;
306       
307        my @lines = split (/\n/, $message);
308        for (my $n = 0; $n < @lines; $n++) {
309            if ($n == 0) {
310                print OUT "char *s = N_(\"";
311            } else { 
312                print OUT "             \"";
313            }
314
315            print OUT escape($lines[$n]);
316
317            if ($n < @lines - 1) {
318                print OUT "\\n\"\n";
319            } else {
320                print OUT "\");\n"; 
321            }
322        }
323    }
324}
325
Note: See TracBrowser for help on using the repository browser.