source: trunk/third/nautilus/intltool-extract.in @ 18383

Revision 18383, 9.4 KB checked in by ghudson, 21 years ago (diff)
This commit was generated by cvs2svn to compensate for changes in r18382, which included commits to RCS files with non-trunk default branches.
Line 
1#!@INTLTOOL_PERL@ -w
2# -*- Mode: perl; indent-tabs-mode: nil; c-basic-offset: 4  -*-
3
4#
5#  The Intltool Message Extractor
6#
7#  Copyright (C) 2000-2001 Free Software Foundation.
8#
9#  Intltool is free software; you can redistribute it and/or
10#  modify it under the terms of the GNU General Public License as
11#  published by the Free Software Foundation; either version 2 of the
12#  License, or (at your option) any later version.
13#
14#  Intltool is distributed in the hope that it will be useful,
15#  but WITHOUT ANY WARRANTY; without even the implied warranty of
16#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17#  General Public License for more details.
18#
19#  You should have received a copy of the GNU General Public License
20#  along with this program; if not, write to the Free Software
21#  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22#
23#  As a special exception to the GNU General Public License, if you
24#  distribute this file as part of a program that contains a
25#  configuration script generated by Autoconf, you may include it under
26#  the same distribution terms that you use for the rest of that program.
27#
28#  Authors: Kenneth Christiansen <kenneth@gnu.org>
29#           Darin Adler <darin@bentspoon.com>
30#
31
32## Release information
33my $PROGRAM      = "intltool-extract";
34my $PACKAGE      = "intltool";
35my $VERSION      = "0.22";
36
37## Loaded modules
38use strict;
39use File::Basename;
40use Getopt::Long;
41
42## Scalars used by the option stuff
43my $TYPE_ARG    = "0";
44my $LOCAL_ARG   = "0";
45my $HELP_ARG    = "0";
46my $VERSION_ARG = "0";
47my $UPDATE_ARG  = "0";
48my $QUIET_ARG   = "0";
49
50my $FILE;
51my $OUTFILE;
52
53my $gettext_type = "";
54my $input;
55my %messages = ();
56
57## Use this instead of \w for XML files to handle more possible characters.
58my $w = "[-A-Za-z0-9._:]";
59
60## Always print first
61$| = 1;
62
63## Handle options
64GetOptions (
65            "type=s"     => \$TYPE_ARG,
66            "local|l"    => \$LOCAL_ARG,
67            "help|h"     => \$HELP_ARG,
68            "version|v"  => \$VERSION_ARG,
69            "update"     => \$UPDATE_ARG,
70            "quiet|q"    => \$QUIET_ARG,
71            ) or &error;
72
73&split_on_argument;
74
75
76## Check for options.
77## This section will check for the different options.
78
79sub split_on_argument {
80
81    if ($VERSION_ARG) {
82        &version;
83
84    } elsif ($HELP_ARG) {
85        &help;
86       
87    } elsif ($LOCAL_ARG) {
88        &place_local;
89        &extract;
90
91    } elsif ($UPDATE_ARG) {
92        &place_normal;
93        &extract;
94
95    } elsif (@ARGV > 0) {
96        &place_normal;
97        &message;
98        &extract;
99
100    } else {
101        &help;
102
103    } 
104}   
105
106sub place_normal {
107    $FILE        = $ARGV[0];
108    $OUTFILE     = "$FILE.h";
109}   
110
111sub place_local {
112    $OUTFILE     = fileparse($FILE, ());
113    if (!-e "tmp/") {
114        system("mkdir tmp/");
115    }
116    $OUTFILE     = "./tmp/$OUTFILE.h"
117}
118
119sub determine_type {
120   if ($TYPE_ARG =~ /^gettext\/(.*)/) {
121        $gettext_type=$1
122   }
123}
124
125## Sub for printing release information
126sub version{
127    print "${PROGRAM} (${PACKAGE}) $VERSION\n";
128    print "Copyright (C) 2000 Free Software Foundation, Inc.\n";
129    print "Written by Kenneth Christiansen, 2000.\n\n";
130    print "This is free software; see the source for copying conditions. There is NO\n";
131    print "warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n";
132    exit;
133}
134
135## Sub for printing usage information
136sub help{
137    print "Usage: ${PROGRAM} [FILENAME] [OPTIONS] ...\n";
138    print "Generates a header file from an xml source file.\n\nGrabs all strings ";
139    print "between <_translatable_node> and it's end tag,\nwhere tag are all allowed ";
140    print "xml tags. Read the docs for more info.\n\n";
141    print "  -v, --version                shows the version\n";
142    print "  -h, --help                   shows this help page\n";
143    print "  -q, --quiet                  quiet mode\n";
144    print "\nReport bugs to <kenneth\@gnu.org>.\n";
145    exit;
146}
147
148## Sub for printing error messages
149sub error{
150    print "Try `${PROGRAM} --help' for more information.\n";
151    exit;
152}
153
154sub message {
155    print "Generating C format header file for translation.\n";
156}
157
158sub extract {
159    &determine_type;
160
161    &convert ($FILE);
162
163    open OUT, ">$OUTFILE";
164    &msg_write;
165    close OUT;
166
167    print "Wrote $OUTFILE\n" unless $QUIET_ARG;
168}
169
170sub convert($) {
171
172    ## Reading the file
173    {
174        local (*IN);
175        local $/; #slurp mode
176        open (IN, "<$FILE") || die "can't open $FILE: $!";
177        $input = <IN>;
178    }
179
180    &type_ini if $gettext_type eq "ini";
181    &type_keys if $gettext_type eq "keys";
182    &type_xml if $gettext_type eq "xml";
183    &type_glade if $gettext_type eq "glade";
184    &type_scheme if $gettext_type eq "scheme";
185    &type_schemas  if $gettext_type eq "schemas";
186    &type_rfc822deb  if $gettext_type eq "rfc822deb";
187}
188
189sub entity_decode_minimal
190{
191    local ($_) = @_;
192
193    s/&apos;/'/g; # '
194    s/&quot;/"/g; # "
195    s/&amp;/&/g;
196
197    return $_;
198}
199
200sub entity_decode
201{
202    local ($_) = @_;
203
204    s/&apos;/'/g; # '
205    s/&quot;/"/g; # "
206    s/&amp;/&/g;
207    s/&lt;/</g;
208    s/&gt;/>/g;
209
210    return $_;
211}
212
213sub escape_char
214{
215    return '\"' if $_ eq '"';
216    return '\n' if $_ eq "\n";
217    return '\\' if $_ eq '\\';
218
219    return $_;
220}
221
222sub escape
223{
224    my ($string) = @_;
225    return join "", map &escape_char, split //, $string;
226}
227
228sub type_ini {
229    ### For generic translatable desktop files ###
230    while ($input =~ /^_.*=(.*)$/mg) {
231        $messages{$1} = [];
232    }
233}
234
235sub type_keys {
236    ### For generic translatable mime/keys files ###
237    while ($input =~ /^\s*_\w+=(.*)$/mg) {
238        $messages{$1} = [];
239    }
240}
241
242sub type_xml {
243    ### For generic translatable XML files ###
244       
245    while ($input =~ /\s_$w+=\"([^"]+)\"/sg) { # "
246        $messages{entity_decode_minimal($1)} = [];
247    }
248
249    while ($input =~ /<_($w+)>(.+?)<\/_\1>/sg) {
250        $_ = $2;
251        s/\s+/ /g;
252        s/^ //;
253        s/ $//;
254        $messages{entity_decode_minimal($_)} = [];
255    }
256}
257
258sub type_schemas {
259    ### For schemas XML files ###
260         
261    # FIXME: We should handle escaped < (less than)
262    while ($input =~ /<(default|short|long)>([^<]+)<\/\1>/sg) {
263        $_ = $2;
264        s/\s+/ /g;
265        s/^ //;
266        s/ $//;
267        $messages{entity_decode_minimal($_)} = [];
268    }
269}
270
271sub type_rfc822deb {
272    ### For rfc822-style Debian configuration files ###
273
274    while ($input =~ /(?:^|\n)_[^:]+:\s*(.*?)(?=\n\S|$)/sg) {
275        my @str_list = rfc822deb_split($1);
276        for my $str (@str_list) {
277            #   As rfc822deb is for configuration files, duplicates
278            #   should never happen.  Developers must use the
279            #   [] construct to make msgid unique, see also intltool-merge
280            print STDERR "Warning: msgid multiply defined:\n  $str\n"
281                if defined($messages{$str});
282            $messages{$str} = [];
283        }
284    }
285}
286
287sub rfc822deb_split {
288    # Debian defines a special way to deal with rfc822-style files:
289    # when a value contain newlines, it consists of
290    #   1.  a short form (first line)
291    #   2.  a long description, all lines begin with a space,
292    #       and paragraphs are separated by a single dot on a line
293    # This routine returns an array of all paragraphs, and reformat
294    # them.
295    my $text = shift;
296    $text =~ s/^ //mg;
297    return ($text) if $text !~ /\n/;
298
299    $text =~ s/([^\n]*)\n//;
300    my @list = ($1);
301    my $str = '';
302    for my $line (split (/\n/, $text)) {
303        chomp $line;
304        $line =~ /\s+$/;
305        if ($line =~ /^\.$/) {
306            #  New paragraph
307            $str =~ s/\s*$//;
308            push(@list, $str);
309            $str = '';
310        } elsif ($line =~s/^\s(\s+.*)/$1/) {
311            #  Line which must not be reformatted
312            $str .= "\n" if length ($str) && $str !~ /\n$/;
313            $str .= $line."\n";
314        } else {
315            #  Continuation line, remove newline
316            $line =~ s/^\s+//;
317            $str .= " " if length ($str) && $str !~ /[\n ]$/;
318            $str .= $line;
319        }
320    }
321    $str =~ s/\s*$//;
322    push(@list, $str) if length ($str);
323    return @list;
324}
325
326sub type_glade {
327    ### For translatable Glade XML files ###
328
329    my $tags = "label|title|text|format|copyright|comments|preview_text|tooltip|message";
330
331    while ($input =~ /<($tags)>([^<]+)<\/($tags)>/sg) {
332        # Glade sometimes uses tags that normally mark translatable things for
333        # little bits of non-translatable content. We work around this by not
334        # translating strings that only includes something like label4 or window1.
335        $messages{entity_decode($2)} = [] unless $2 =~ /^(window|label)[0-9]+$/;
336    }
337   
338    while ($input =~ /<items>(..[^<]*)<\/items>/sg) {
339        for my $item (split (/\n/, $1)) {
340            $messages{entity_decode($item)} = [];
341        }
342    }
343
344    ## handle new glade files
345    while ($input =~ /<(property|atkproperty)\s+[^>]*translatable\s*=\s*"yes"[^>]*>([^<]+)<\/\1>/sg) {
346        $messages{entity_decode($2)} = [] unless $2 =~ /^(window|label)[0-9]+$/;
347    }
348    while ($input =~ /<atkaction\s+action_name="([^>]*)"\s+description="([^>]+)"\/>/sg) {
349        $messages{entity_decode_minimal($2)} = [];
350    }
351}
352
353sub type_scheme {
354    while ($input =~ /_\(?"((?:[^"\\]+|\\.)*)"\)?/sg) {
355        $messages{$1} = [];
356    }
357}
358
359sub msg_write {
360    for my $message (sort keys %messages) {
361        print OUT "/* xgettext:no-c-format */\n" if $message =~ /%/;
362       
363        my @lines = split (/\n/, $message);
364        for (my $n = 0; $n < @lines; $n++) {
365            if ($n == 0) {
366                print OUT "char *s = N_(\"";
367            } else { 
368                print OUT "             \"";
369            }
370
371            print OUT escape($lines[$n]);
372
373            if ($n < @lines - 1) {
374                print OUT "\\n\"\n";
375            } else {
376                print OUT "\");\n"; 
377            }
378        }
379    }
380}
381
Note: See TracBrowser for help on using the repository browser.