source: trunk/third/gnome-applets/intltool-extract.in @ 21373

Revision 21373, 13.6 KB checked in by ghudson, 20 years ago (diff)
This commit was generated by cvs2svn to compensate for changes in r21372, which included commits to RCS files with non-trunk default branches.
Line 
1#!@INTLTOOL_PERL@ -w
2# -*- Mode: perl; indent-tabs-mode: nil; c-basic-offset: 4  -*-
3
4#
5#  The Intltool Message Extractor
6#
7#  Copyright (C) 2000-2001, 2003 Free Software Foundation.
8#
9#  Intltool is free software; you can redistribute it and/or
10#  modify it under the terms of the GNU General Public License as
11#  published by the Free Software Foundation; either version 2 of the
12#  License, or (at your option) any later version.
13#
14#  Intltool is distributed in the hope that it will be useful,
15#  but WITHOUT ANY WARRANTY; without even the implied warranty of
16#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17#  General Public License for more details.
18#
19#  You should have received a copy of the GNU General Public License
20#  along with this program; if not, write to the Free Software
21#  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22#
23#  As a special exception to the GNU General Public License, if you
24#  distribute this file as part of a program that contains a
25#  configuration script generated by Autoconf, you may include it under
26#  the same distribution terms that you use for the rest of that program.
27#
28#  Authors: Kenneth Christiansen <kenneth@gnu.org>
29#           Darin Adler <darin@bentspoon.com>
30#
31
32## Release information
33my $PROGRAM      = "intltool-extract";
34my $PACKAGE      = "intltool";
35my $VERSION      = "0.32.1";
36
37## Loaded modules
38use strict;
39use File::Basename;
40use Getopt::Long;
41
42## Scalars used by the option stuff
43my $TYPE_ARG    = "0";
44my $LOCAL_ARG   = "0";
45my $HELP_ARG    = "0";
46my $VERSION_ARG = "0";
47my $UPDATE_ARG  = "0";
48my $QUIET_ARG   = "0";
49my $SRCDIR_ARG  = ".";
50
51my $FILE;
52my $OUTFILE;
53
54my $gettext_type = "";
55my $input;
56my %messages = ();
57my %loc = ();
58my %count = ();
59my %comments = ();
60my $strcount = 0;
61
62## Use this instead of \w for XML files to handle more possible characters.
63my $w = "[-A-Za-z0-9._:]";
64
65## Always print first
66$| = 1;
67
68## Handle options
69GetOptions (
70            "type=s"     => \$TYPE_ARG,
71            "local|l"    => \$LOCAL_ARG,
72            "help|h"     => \$HELP_ARG,
73            "version|v"  => \$VERSION_ARG,
74            "update"     => \$UPDATE_ARG,
75            "quiet|q"    => \$QUIET_ARG,
76            "srcdir=s"   => \$SRCDIR_ARG,
77            ) or &error;
78
79&split_on_argument;
80
81
82## Check for options.
83## This section will check for the different options.
84
85sub split_on_argument {
86
87    if ($VERSION_ARG) {
88        &version;
89
90    } elsif ($HELP_ARG) {
91        &help;
92       
93    } elsif ($LOCAL_ARG) {
94        &place_local;
95        &extract;
96
97    } elsif ($UPDATE_ARG) {
98        &place_normal;
99        &extract;
100
101    } elsif (@ARGV > 0) {
102        &place_normal;
103        &message;
104        &extract;
105
106    } else {
107        &help;
108
109    } 
110}   
111
112sub place_normal {
113    $FILE        = $ARGV[0];
114    $OUTFILE     = "$FILE.h";
115}   
116
117sub place_local {
118    $OUTFILE     = fileparse($FILE, ());
119    if (!-e "tmp/") {
120        system("mkdir tmp/");
121    }
122    $OUTFILE     = "./tmp/$OUTFILE.h"
123}
124
125sub determine_type {
126   if ($TYPE_ARG =~ /^gettext\/(.*)/) {
127        $gettext_type=$1
128   }
129}
130
131## Sub for printing release information
132sub version{
133    print <<_EOF_;
134${PROGRAM} (${PACKAGE}) $VERSION
135Copyright (C) 2000, 2003 Free Software Foundation, Inc.
136Written by Kenneth Christiansen, 2000.
137
138This is free software; see the source for copying conditions.  There is NO
139warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
140_EOF_
141    exit;
142}
143
144## Sub for printing usage information
145sub help {
146    print <<_EOF_;
147Usage: ${PROGRAM} [OPTION]... [FILENAME]
148Generates a header file from an XML source file.
149
150It grabs all strings between <_translatable_node> and its end tag in
151XML files. Read manpage (man ${PROGRAM}) for more info.
152
153      --type=TYPE   Specify the file type of FILENAME. Currently supports:
154                    "gettext/glade", "gettext/ini", "gettext/keys"
155                    "gettext/rfc822deb", "gettext/schemas",
156                    "gettext/scheme", "gettext/xml"
157  -l, --local       Writes output into current working directory
158                    (conflicts with --update)
159      --update      Writes output into the same directory the source file
160                    reside (conflicts with --local)
161      --srcdir      Root of the source tree
162  -v, --version     Output version information and exit
163  -h, --help        Display this help and exit
164  -q, --quiet       Quiet mode
165
166Report bugs to http://bugzilla.gnome.org/ (product name "$PACKAGE")
167or send email to <xml-i18n-tools\@gnome.org>.
168_EOF_
169    exit;
170}
171
172## Sub for printing error messages
173sub error{
174    print STDERR "Try `${PROGRAM} --help' for more information.\n";
175    exit;
176}
177
178sub message {
179    print "Generating C format header file for translation.\n" unless $QUIET_ARG;
180}
181
182sub extract {
183    &determine_type;
184
185    &convert;
186
187    open OUT, ">$OUTFILE";
188    &msg_write;
189    close OUT;
190
191    print "Wrote $OUTFILE\n" unless $QUIET_ARG;
192}
193
194sub convert {
195
196    ## Reading the file
197    {
198        local (*IN);
199        local $/; #slurp mode
200        open (IN, "<$SRCDIR_ARG/$FILE") || die "can't open $SRCDIR_ARG/$FILE: $!";
201        $input = <IN>;
202    }
203
204    &type_ini if $gettext_type eq "ini";
205    &type_keys if $gettext_type eq "keys";
206    &type_xml if $gettext_type eq "xml";
207    &type_glade if $gettext_type eq "glade";
208    &type_scheme if $gettext_type eq "scheme";
209    &type_schemas  if $gettext_type eq "schemas";
210    &type_rfc822deb  if $gettext_type eq "rfc822deb";
211}
212
213sub entity_decode_minimal
214{
215    local ($_) = @_;
216
217    s/&apos;/'/g; # '
218    s/&quot;/"/g; # "
219    s/&amp;/&/g;
220
221    return $_;
222}
223
224sub entity_decode
225{
226    local ($_) = @_;
227
228    s/&apos;/'/g; # '
229    s/&quot;/"/g; # "
230    s/&amp;/&/g;
231    s/&lt;/</g;
232    s/&gt;/>/g;
233
234    return $_;
235}
236
237sub escape_char
238{
239    return '\"' if $_ eq '"';
240    return '\n' if $_ eq "\n";
241    return '\\' if $_ eq '\\';
242
243    return $_;
244}
245
246sub escape
247{
248    my ($string) = @_;
249    return join "", map &escape_char, split //, $string;
250}
251
252sub type_ini {
253    ### For generic translatable desktop files ###
254    while ($input =~ /^_.*=(.*)$/mg) {
255        $messages{$1} = [];
256    }
257}
258
259sub type_keys {
260    ### For generic translatable mime/keys files ###
261    while ($input =~ /^\s*_\w+=(.*)$/mg) {
262        $messages{$1} = [];
263    }
264}
265
266sub type_xml {
267    ### For generic translatable XML files ###
268       
269    while ($input =~ /(?:<!--([^>]*?)-->[^\n]*\n?[^\n]*)?\s_$w+\s*=\s*\"([^"]+)\"/sg) { # "
270        $messages{entity_decode_minimal($2)} = [];
271        $comments{entity_decode_minimal($2)} = $1 if (defined($1));
272    }
273
274    while ($input =~ /(?:<!--([^>]*?)-->\s*)?<_($w+)(?: xml:space="($w+)")?[^>]*>(.+?)<\/_\2>/sg) {
275        $_ = $4;
276        if (!defined($3) || $3 ne "preserve") {
277            s/\s+/ /g;
278            s/^ //;
279            s/ $//;
280        }
281        $messages{$_} = [];
282        $comments{$_} = $1 if (defined($1));
283    }
284}
285
286sub type_schemas {
287    ### For schemas XML files ###
288         
289    # FIXME: We should handle escaped < (less than)
290    while ($input =~ /
291                      <locale\ name="C">\s*
292                          (<default>\s*(?:<!--([^>]*?)-->\s*)?(.*?)\s*<\/default>\s*)?
293                          (<short>\s*(?:<!--([^>]*?)-->\s*)?(.*?)\s*<\/short>\s*)?
294                          (<long>\s*(?:<!--([^>]*?)-->\s*)?(.*?)\s*<\/long>\s*)?
295                      <\/locale>
296                     /sgx) {
297        my @totranslate = ($3,$6,$9);
298        my @eachcomment = ($2,$5,$8);
299        foreach (@totranslate) {
300            my $currentcomment = shift @eachcomment;
301            next if !$_;
302            s/\s+/ /g;
303            $messages{entity_decode_minimal($_)} = [];
304            $comments{entity_decode_minimal($_)} = $currentcomment if (defined($currentcomment));
305        }
306    }
307}
308
309sub type_rfc822deb {
310    ### For rfc822-style Debian configuration files ###
311
312    my $lineno = 1;
313    my $type = '';
314    while ($input =~ /\G(.*?)(^|\n)(_+)([^:]+):[ \t]*(.*?)(?=\n\S|$)/sg)
315    {
316        my ($pre, $newline, $underscore, $tag, $text) = ($1, $2, $3, $4, $5);
317        while ($pre =~ m/\n/g)
318        {
319            $lineno ++;
320        }
321        $lineno += length($newline);
322        my @str_list = rfc822deb_split(length($underscore), $text);
323        for my $str (@str_list)
324        {
325            $strcount++;
326            $messages{$str} = [];
327            $loc{$str} = $lineno;
328            $count{$str} = $strcount;
329            my $usercomment = '';
330            while($pre =~ s/(^|\n)#([^\n]*)$//s)
331            {
332                $usercomment = "\n" . $2 . $usercomment;
333            }
334            $comments{$str} = $tag . $usercomment;
335        }
336        $lineno += ($text =~ s/\n//g);
337    }
338}
339
340sub rfc822deb_split {
341    # Debian defines a special way to deal with rfc822-style files:
342    # when a value contain newlines, it consists of
343    #   1.  a short form (first line)
344    #   2.  a long description, all lines begin with a space,
345    #       and paragraphs are separated by a single dot on a line
346    # This routine returns an array of all paragraphs, and reformat
347    # them.
348    # When first argument is 2, the string is a comma separated list of
349    # values.
350    my $type = shift;
351    my $text = shift;
352    $text =~ s/^[ \t]//mg;
353    return (split(/, */, $text, 0)) if $type ne 1;
354    return ($text) if $text !~ /\n/;
355
356    $text =~ s/([^\n]*)\n//;
357    my @list = ($1);
358    my $str = '';
359    for my $line (split (/\n/, $text))
360    {
361        chomp $line;
362        if ($line =~ /^\.\s*$/)
363        {
364            #  New paragraph
365            $str =~ s/\s*$//;
366            push(@list, $str);
367            $str = '';
368        }
369        elsif ($line =~ /^\s/)
370        {
371            #  Line which must not be reformatted
372            $str .= "\n" if length ($str) && $str !~ /\n$/;
373            $line =~ s/\s+$//;
374            $str .= $line."\n";
375        }
376        else
377        {
378            #  Continuation line, remove newline
379            $str .= " " if length ($str) && $str !~ /\n$/;
380            $str .= $line;
381        }
382    }
383    $str =~ s/\s*$//;
384    push(@list, $str) if length ($str);
385    return @list;
386}
387
388sub type_glade {
389    ### For translatable Glade XML files ###
390
391    my $tags = "label|title|text|format|copyright|comments|preview_text|tooltip|message";
392
393    while ($input =~ /<($tags)>([^<]+)<\/($tags)>/sg) {
394        # Glade sometimes uses tags that normally mark translatable things for
395        # little bits of non-translatable content. We work around this by not
396        # translating strings that only includes something like label4 or window1.
397        $messages{entity_decode($2)} = [] unless $2 =~ /^(window|label|dialog)[0-9]+$/;
398    }
399   
400    while ($input =~ /<items>(..[^<]*)<\/items>/sg) {
401        for my $item (split (/\n/, $1)) {
402            $messages{entity_decode($item)} = [];
403        }
404    }
405
406    ## handle new glade files
407    while ($input =~ /<(property|atkproperty)\s+[^>]*translatable\s*=\s*"yes"(?:\s+[^>]*comments\s*=\s*"([^"]*)")?[^>]*>([^<]+)<\/\1>/sg) {
408        $messages{entity_decode($3)} = [] unless $3 =~ /^(window|label)[0-9]+$/;
409        if (defined($2) and !($3 =~ /^(window|label)[0-9]+$/)) {
410           $comments{entity_decode($3)} = entity_decode($2) ;
411        }
412    }
413    while ($input =~ /<atkaction\s+action_name="([^>]*)"\s+description="([^>]+)"\/>/sg) {
414        $messages{entity_decode_minimal($2)} = [];
415    }
416}
417
418sub type_scheme {
419    my ($line, $i, $state, $str, $trcomment, $char);
420    for $line (split(/\n/, $input)) {
421        $i = 0;
422        $state = 0; # 0 - nothing, 1 - string, 2 - translatable string
423        while ($i < length($line)) {
424            if (substr($line,$i,1) eq "\"") {
425                if ($state == 2) {
426                    $comments{$str} = $trcomment if ($trcomment);
427                    $messages{$str} = [];
428                    $str = '';
429                    $state = 0; $trcomment = "";
430                } elsif ($state == 1) {
431                    $str = '';
432                    $state = 0; $trcomment = "";
433                } else {
434                    $state = 1;
435                    $str = '';
436                    if ($i>0 && substr($line,$i-1,1) eq '_') {
437                        $state = 2;
438                    }
439                }
440            } elsif (!$state) {
441                if (substr($line,$i,1) eq ";") {
442                    $trcomment = substr($line,$i+1);
443                    $trcomment =~ s/^;*\s*//;
444                    $i = length($line);
445                } elsif ($trcomment && substr($line,$i,1) !~ /\s|\(|\)|_/) {
446                    $trcomment = "";
447                }
448            } else {
449                if (substr($line,$i,1) eq "\\") {
450                    $char = substr($line,$i+1,1);
451                    if ($char ne "\"" && $char ne "\\") {
452                       $str = $str . "\\";
453                    }
454                    $i++;
455                }
456                $str = $str . substr($line,$i,1);
457            }
458            $i++;
459        }
460    }
461}
462
463sub msg_write {
464    my @msgids;
465    if (%count)
466    {
467        @msgids = sort { $count{$a} <=> $count{$b} } keys %count;
468    }
469    else
470    {
471        @msgids = sort keys %messages;
472    }
473    for my $message (@msgids)
474    {
475        my $offsetlines = 1;
476        $offsetlines++ if $message =~ /%/;
477        if (defined ($comments{$message}))
478        {
479                while ($comments{$message} =~ m/\n/g)
480                {
481                    $offsetlines++;
482                }
483        }
484        print OUT "# ".($loc{$message} - $offsetlines).  " \"$FILE\"\n"
485                if defined $loc{$message};
486        print OUT "/* ".$comments{$message}." */\n"
487                if defined $comments{$message};
488        print OUT "/* xgettext:no-c-format */\n" if $message =~ /%/;
489       
490        my @lines = split (/\n/, $message, -1);
491        for (my $n = 0; $n < @lines; $n++)
492        {
493            if ($n == 0)
494            {
495                print OUT "char *s = N_(\"";
496            }
497            else
498            { 
499                print OUT "             \"";
500            }
501
502            print OUT escape($lines[$n]);
503
504            if ($n < @lines - 1)
505            {
506                print OUT "\\n\"\n";
507            }
508            else
509            {
510                print OUT "\");\n"; 
511            }
512        }
513    }
514}
515
Note: See TracBrowser for help on using the repository browser.