source: trunk/third/gnome-nettool/intltool-extract.in @ 21079

Revision 21079, 11.6 KB checked in by ghudson, 20 years ago (diff)
This commit was generated by cvs2svn to compensate for changes in r21078, which included commits to RCS files with non-trunk default branches.
  • Property svn:executable set to *
Line 
1#!@INTLTOOL_PERL@ -w
2# -*- Mode: perl; indent-tabs-mode: nil; c-basic-offset: 4  -*-
3
4#
5#  The Intltool Message Extractor
6#
7#  Copyright (C) 2000-2001, 2003 Free Software Foundation.
8#
9#  Intltool is free software; you can redistribute it and/or
10#  modify it under the terms of the GNU General Public License as
11#  published by the Free Software Foundation; either version 2 of the
12#  License, or (at your option) any later version.
13#
14#  Intltool is distributed in the hope that it will be useful,
15#  but WITHOUT ANY WARRANTY; without even the implied warranty of
16#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17#  General Public License for more details.
18#
19#  You should have received a copy of the GNU General Public License
20#  along with this program; if not, write to the Free Software
21#  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22#
23#  As a special exception to the GNU General Public License, if you
24#  distribute this file as part of a program that contains a
25#  configuration script generated by Autoconf, you may include it under
26#  the same distribution terms that you use for the rest of that program.
27#
28#  Authors: Kenneth Christiansen <kenneth@gnu.org>
29#           Darin Adler <darin@bentspoon.com>
30#
31
32## Release information
33my $PROGRAM      = "intltool-extract";
34my $PACKAGE      = "intltool";
35my $VERSION      = "0.30";
36
37## Loaded modules
38use strict;
39use File::Basename;
40use Getopt::Long;
41
42## Scalars used by the option stuff
43my $TYPE_ARG    = "0";
44my $LOCAL_ARG   = "0";
45my $HELP_ARG    = "0";
46my $VERSION_ARG = "0";
47my $UPDATE_ARG  = "0";
48my $QUIET_ARG   = "0";
49my $SRCDIR_ARG  = ".";
50
51my $FILE;
52my $OUTFILE;
53
54my $gettext_type = "";
55my $input;
56my %messages = ();
57my %loc = ();
58my %count = ();
59my %comments = ();
60my $strcount = 0;
61
62## Use this instead of \w for XML files to handle more possible characters.
63my $w = "[-A-Za-z0-9._:]";
64
65## Always print first
66$| = 1;
67
68## Handle options
69GetOptions (
70            "type=s"     => \$TYPE_ARG,
71            "local|l"    => \$LOCAL_ARG,
72            "help|h"     => \$HELP_ARG,
73            "version|v"  => \$VERSION_ARG,
74            "update"     => \$UPDATE_ARG,
75            "quiet|q"    => \$QUIET_ARG,
76            "srcdir=s"   => \$SRCDIR_ARG,
77            ) or &error;
78
79&split_on_argument;
80
81
82## Check for options.
83## This section will check for the different options.
84
85sub split_on_argument {
86
87    if ($VERSION_ARG) {
88        &version;
89
90    } elsif ($HELP_ARG) {
91        &help;
92       
93    } elsif ($LOCAL_ARG) {
94        &place_local;
95        &extract;
96
97    } elsif ($UPDATE_ARG) {
98        &place_normal;
99        &extract;
100
101    } elsif (@ARGV > 0) {
102        &place_normal;
103        &message;
104        &extract;
105
106    } else {
107        &help;
108
109    } 
110}   
111
112sub place_normal {
113    $FILE        = $ARGV[0];
114    $OUTFILE     = "$FILE.h";
115}   
116
117sub place_local {
118    $OUTFILE     = fileparse($FILE, ());
119    if (!-e "tmp/") {
120        system("mkdir tmp/");
121    }
122    $OUTFILE     = "./tmp/$OUTFILE.h"
123}
124
125sub determine_type {
126   if ($TYPE_ARG =~ /^gettext\/(.*)/) {
127        $gettext_type=$1
128   }
129}
130
131## Sub for printing release information
132sub version{
133    print <<_EOF_;
134${PROGRAM} (${PACKAGE}) $VERSION
135Copyright (C) 2000, 2003 Free Software Foundation, Inc.
136Written by Kenneth Christiansen, 2000.
137
138This is free software; see the source for copying conditions.  There is NO
139warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
140_EOF_
141    exit;
142}
143
144## Sub for printing usage information
145sub help {
146    print <<_EOF_;
147Usage: ${PROGRAM} [OPTION]... [FILENAME]
148Generates a header file from an XML source file.
149
150It grabs all strings between <_translatable_node> and its end tag in
151XML files. Read manpage (man ${PROGRAM}) for more info.
152
153      --type=TYPE   Specify the file type of FILENAME. Currently supports:
154                    "gettext/glade", "gettext/ini", "gettext/keys"
155                    "gettext/rfc822deb", "gettext/schemas",
156                    "gettext/scheme", "gettext/xml"
157  -l, --local       Writes output into current working directory
158                    (conflicts with --update)
159      --update      Writes output into the same directory the source file
160                    reside (conflicts with --local)
161      --srcdir      Root of the source tree
162  -v, --version     Output version information and exit
163  -h, --help        Display this help and exit
164  -q, --quiet       Quiet mode
165
166Report bugs to http://bugzilla.gnome.org/ (product name "$PACKAGE")
167or send email to <xml-i18n-tools\@gnome.org>.
168_EOF_
169    exit;
170}
171
172## Sub for printing error messages
173sub error{
174    print STDERR "Try `${PROGRAM} --help' for more information.\n";
175    exit;
176}
177
178sub message {
179    print "Generating C format header file for translation.\n" unless $QUIET_ARG;
180}
181
182sub extract {
183    &determine_type;
184
185    &convert;
186
187    open OUT, ">$OUTFILE";
188    &msg_write;
189    close OUT;
190
191    print "Wrote $OUTFILE\n" unless $QUIET_ARG;
192}
193
194sub convert {
195
196    ## Reading the file
197    {
198        local (*IN);
199        local $/; #slurp mode
200        open (IN, "<$SRCDIR_ARG/$FILE") || die "can't open $SRCDIR_ARG/$FILE: $!";
201        $input = <IN>;
202    }
203
204    &type_ini if $gettext_type eq "ini";
205    &type_keys if $gettext_type eq "keys";
206    &type_xml if $gettext_type eq "xml";
207    &type_glade if $gettext_type eq "glade";
208    &type_scheme if $gettext_type eq "scheme";
209    &type_schemas  if $gettext_type eq "schemas";
210    &type_rfc822deb  if $gettext_type eq "rfc822deb";
211}
212
213sub entity_decode_minimal
214{
215    local ($_) = @_;
216
217    s/&apos;/'/g; # '
218    s/&quot;/"/g; # "
219    s/&amp;/&/g;
220
221    return $_;
222}
223
224sub entity_decode
225{
226    local ($_) = @_;
227
228    s/&apos;/'/g; # '
229    s/&quot;/"/g; # "
230    s/&amp;/&/g;
231    s/&lt;/</g;
232    s/&gt;/>/g;
233
234    return $_;
235}
236
237sub escape_char
238{
239    return '\"' if $_ eq '"';
240    return '\n' if $_ eq "\n";
241    return '\\' if $_ eq '\\';
242
243    return $_;
244}
245
246sub escape
247{
248    my ($string) = @_;
249    return join "", map &escape_char, split //, $string;
250}
251
252sub type_ini {
253    ### For generic translatable desktop files ###
254    while ($input =~ /^_.*=(.*)$/mg) {
255        $messages{$1} = [];
256    }
257}
258
259sub type_keys {
260    ### For generic translatable mime/keys files ###
261    while ($input =~ /^\s*_\w+=(.*)$/mg) {
262        $messages{$1} = [];
263    }
264}
265
266sub type_xml {
267    ### For generic translatable XML files ###
268       
269    while ($input =~ /\s_$w+\s*=\s*\"([^"]+)\"/sg) { # "
270        $messages{entity_decode_minimal($1)} = [];
271    }
272
273    while ($input =~ /<_($w+)(?: xml:space="($w+)")?>(.+?)<\/_\1>/sg) {
274        $_ = $3;
275        if (!defined($2) || $2 ne "preserve") {
276            s/\s+/ /g;
277            s/^ //;
278            s/ $//;
279        }
280        $messages{entity_decode_minimal($_)} = [];
281    }
282}
283
284sub type_schemas {
285    ### For schemas XML files ###
286         
287    # FIXME: We should handle escaped < (less than)
288    while ($input =~ /
289                      <locale\ name="C">\s*
290                          (<default>\s*(.*?)\s*<\/default>\s*)?
291                          (<short>\s*(.*?)\s*<\/short>\s*)?
292                          (<long>\s*(.*?)\s*<\/long>\s*)?
293                      <\/locale>
294                     /sgx) {
295        my @totranslate = ($2,$4,$6);
296        foreach (@totranslate) {
297            next if !$_;
298        s/\s+/ /g;
299        $messages{entity_decode_minimal($_)} = [];
300        }
301    }
302}
303
304sub type_rfc822deb {
305    ### For rfc822-style Debian configuration files ###
306
307    my $lineno = 1;
308    my $type = '';
309    while ($input =~ /\G(.*?)(^|\n)(_+)([^:]+):[ \t]*(.*?)(?=\n\S|$)/sg)
310    {
311        my ($pre, $newline, $underscore, $tag, $text) = ($1, $2, $3, $4, $5);
312        while ($pre =~ m/\n/g)
313        {
314            $lineno ++;
315        }
316        $lineno += length($newline);
317        my @str_list = rfc822deb_split(length($underscore), $text);
318        for my $str (@str_list)
319        {
320            $strcount++;
321            $messages{$str} = [];
322            $loc{$str} = $lineno;
323            $count{$str} = $strcount;
324            my $usercomment = '';
325            while($pre =~ s/(^|\n)#([^\n]*)$//s)
326            {
327                $usercomment = "\n" . $2 . $usercomment;
328            }
329            $comments{$str} = $tag . $usercomment;
330        }
331        $lineno += ($text =~ s/\n//g);
332    }
333}
334
335sub rfc822deb_split {
336    # Debian defines a special way to deal with rfc822-style files:
337    # when a value contain newlines, it consists of
338    #   1.  a short form (first line)
339    #   2.  a long description, all lines begin with a space,
340    #       and paragraphs are separated by a single dot on a line
341    # This routine returns an array of all paragraphs, and reformat
342    # them.
343    # When first argument is 2, the string is a comma separated list of
344    # values.
345    my $type = shift;
346    my $text = shift;
347    $text =~ s/^[ \t]//mg;
348    return (split(/, */, $text, 0)) if $type ne 1;
349    return ($text) if $text !~ /\n/;
350
351    $text =~ s/([^\n]*)\n//;
352    my @list = ($1);
353    my $str = '';
354    for my $line (split (/\n/, $text))
355    {
356        chomp $line;
357        if ($line =~ /^\.\s*$/)
358        {
359            #  New paragraph
360            $str =~ s/\s*$//;
361            push(@list, $str);
362            $str = '';
363        }
364        elsif ($line =~ /^\s/)
365        {
366            #  Line which must not be reformatted
367            $str .= "\n" if length ($str) && $str !~ /\n$/;
368            $line =~ s/\s+$//;
369            $str .= $line."\n";
370        }
371        else
372        {
373            #  Continuation line, remove newline
374            $str .= " " if length ($str) && $str !~ /\n$/;
375            $str .= $line;
376        }
377    }
378    $str =~ s/\s*$//;
379    push(@list, $str) if length ($str);
380    return @list;
381}
382
383sub type_glade {
384    ### For translatable Glade XML files ###
385
386    my $tags = "label|title|text|format|copyright|comments|preview_text|tooltip|message";
387
388    while ($input =~ /<($tags)>([^<]+)<\/($tags)>/sg) {
389        # Glade sometimes uses tags that normally mark translatable things for
390        # little bits of non-translatable content. We work around this by not
391        # translating strings that only includes something like label4 or window1.
392        $messages{entity_decode($2)} = [] unless $2 =~ /^(window|label)[0-9]+$/;
393    }
394   
395    while ($input =~ /<items>(..[^<]*)<\/items>/sg) {
396        for my $item (split (/\n/, $1)) {
397            $messages{entity_decode($item)} = [];
398        }
399    }
400
401    ## handle new glade files
402    while ($input =~ /<(property|atkproperty)\s+[^>]*translatable\s*=\s*"yes"[^>]*>([^<]+)<\/\1>/sg) {
403        $messages{entity_decode($2)} = [] unless $2 =~ /^(window|label)[0-9]+$/;
404    }
405    while ($input =~ /<atkaction\s+action_name="([^>]*)"\s+description="([^>]+)"\/>/sg) {
406        $messages{entity_decode_minimal($2)} = [];
407    }
408}
409
410sub type_scheme {
411    while ($input =~ /_\w*\(?"((?:[^"\\]+|\\.)*)"\)?/sg) {
412        $messages{$1} = [];
413    }
414}
415
416sub msg_write {
417    my @msgids;
418    if (%count)
419    {
420        @msgids = sort { $count{$a} <=> $count{$b} } keys %count;
421    }
422    else
423    {
424        @msgids = sort keys %messages;
425    }
426    for my $message (@msgids)
427    {
428        my $offsetlines = 1;
429        $offsetlines++ if $message =~ /%/;
430        if (defined ($comments{$message}))
431        {
432                while ($comments{$message} =~ m/\n/g)
433                {
434                    $offsetlines++;
435                }
436        }
437        print OUT "# ".($loc{$message} - $offsetlines).  " \"$FILE\"\n"
438                if defined $loc{$message};
439        print OUT "/* ".$comments{$message}." */\n"
440                if defined $comments{$message};
441        print OUT "/* xgettext:no-c-format */\n" if $message =~ /%/;
442       
443        my @lines = split (/\n/, $message, -1);
444        for (my $n = 0; $n < @lines; $n++)
445        {
446            if ($n == 0)
447            {
448                print OUT "char *s = N_(\"";
449            }
450            else
451            { 
452                print OUT "             \"";
453            }
454
455            print OUT escape($lines[$n]);
456
457            if ($n < @lines - 1)
458            {
459                print OUT "\\n\"\n";
460            }
461            else
462            {
463                print OUT "\");\n"; 
464            }
465        }
466    }
467}
468
Note: See TracBrowser for help on using the repository browser.