source: trunk/third/gedit/intltool-extract.in @ 21363

Revision 21363, 13.6 KB checked in by ghudson, 19 years ago (diff)
This commit was generated by cvs2svn to compensate for changes in r21362, which included commits to RCS files with non-trunk default branches.
Line 
1#!@INTLTOOL_PERL@ -w
2# -*- Mode: perl; indent-tabs-mode: nil; c-basic-offset: 4  -*-
3
4#
5#  The Intltool Message Extractor
6#
7#  Copyright (C) 2000-2001, 2003 Free Software Foundation.
8#
9#  Intltool is free software; you can redistribute it and/or
10#  modify it under the terms of the GNU General Public License as
11#  published by the Free Software Foundation; either version 2 of the
12#  License, or (at your option) any later version.
13#
14#  Intltool is distributed in the hope that it will be useful,
15#  but WITHOUT ANY WARRANTY; without even the implied warranty of
16#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17#  General Public License for more details.
18#
19#  You should have received a copy of the GNU General Public License
20#  along with this program; if not, write to the Free Software
21#  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22#
23#  As a special exception to the GNU General Public License, if you
24#  distribute this file as part of a program that contains a
25#  configuration script generated by Autoconf, you may include it under
26#  the same distribution terms that you use for the rest of that program.
27#
28#  Authors: Kenneth Christiansen <kenneth@gnu.org>
29#           Darin Adler <darin@bentspoon.com>
30#
31
32## Release information
33my $PROGRAM      = "intltool-extract";
34my $PACKAGE      = "intltool";
35my $VERSION      = "0.33";
36
37## Loaded modules
38use strict;
39use File::Basename;
40use Getopt::Long;
41
42## Scalars used by the option stuff
43my $TYPE_ARG    = "0";
44my $LOCAL_ARG   = "0";
45my $HELP_ARG    = "0";
46my $VERSION_ARG = "0";
47my $UPDATE_ARG  = "0";
48my $QUIET_ARG   = "0";
49my $SRCDIR_ARG  = ".";
50
51my $FILE;
52my $OUTFILE;
53
54my $gettext_type = "";
55my $input;
56my %messages = ();
57my %loc = ();
58my %count = ();
59my %comments = ();
60my $strcount = 0;
61
62## Use this instead of \w for XML files to handle more possible characters.
63my $w = "[-A-Za-z0-9._:]";
64
65## Always print first
66$| = 1;
67
68## Handle options
69GetOptions (
70            "type=s"     => \$TYPE_ARG,
71            "local|l"    => \$LOCAL_ARG,
72            "help|h"     => \$HELP_ARG,
73            "version|v"  => \$VERSION_ARG,
74            "update"     => \$UPDATE_ARG,
75            "quiet|q"    => \$QUIET_ARG,
76            "srcdir=s"   => \$SRCDIR_ARG,
77            ) or &error;
78
79&split_on_argument;
80
81
82## Check for options.
83## This section will check for the different options.
84
85sub split_on_argument {
86
87    if ($VERSION_ARG) {
88        &version;
89
90    } elsif ($HELP_ARG) {
91        &help;
92       
93    } elsif ($LOCAL_ARG) {
94        &place_local;
95        &extract;
96
97    } elsif ($UPDATE_ARG) {
98        &place_normal;
99        &extract;
100
101    } elsif (@ARGV > 0) {
102        &place_normal;
103        &message;
104        &extract;
105
106    } else {
107        &help;
108
109    } 
110}   
111
112sub place_normal {
113    $FILE        = $ARGV[0];
114    $OUTFILE     = "$FILE.h";
115}   
116
117sub place_local {
118    $FILE        = $ARGV[0];
119    $OUTFILE     = fileparse($FILE, ());
120    if (!-e "tmp/") {
121        system("mkdir tmp/");
122    }
123    $OUTFILE     = "./tmp/$OUTFILE.h"
124}
125
126sub determine_type {
127   if ($TYPE_ARG =~ /^gettext\/(.*)/) {
128        $gettext_type=$1
129   }
130}
131
132## Sub for printing release information
133sub version{
134    print <<_EOF_;
135${PROGRAM} (${PACKAGE}) $VERSION
136Copyright (C) 2000, 2003 Free Software Foundation, Inc.
137Written by Kenneth Christiansen, 2000.
138
139This is free software; see the source for copying conditions.  There is NO
140warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
141_EOF_
142    exit;
143}
144
145## Sub for printing usage information
146sub help {
147    print <<_EOF_;
148Usage: ${PROGRAM} [OPTION]... [FILENAME]
149Generates a header file from an XML source file.
150
151It grabs all strings between <_translatable_node> and its end tag in
152XML files. Read manpage (man ${PROGRAM}) for more info.
153
154      --type=TYPE   Specify the file type of FILENAME. Currently supports:
155                    "gettext/glade", "gettext/ini", "gettext/keys"
156                    "gettext/rfc822deb", "gettext/schemas",
157                    "gettext/scheme", "gettext/xml"
158  -l, --local       Writes output into current working directory
159                    (conflicts with --update)
160      --update      Writes output into the same directory the source file
161                    reside (conflicts with --local)
162      --srcdir      Root of the source tree
163  -v, --version     Output version information and exit
164  -h, --help        Display this help and exit
165  -q, --quiet       Quiet mode
166
167Report bugs to http://bugzilla.gnome.org/ (product name "$PACKAGE")
168or send email to <xml-i18n-tools\@gnome.org>.
169_EOF_
170    exit;
171}
172
173## Sub for printing error messages
174sub error{
175    print STDERR "Try `${PROGRAM} --help' for more information.\n";
176    exit;
177}
178
179sub message {
180    print "Generating C format header file for translation.\n" unless $QUIET_ARG;
181}
182
183sub extract {
184    &determine_type;
185
186    &convert;
187
188    open OUT, ">$OUTFILE";
189    &msg_write;
190    close OUT;
191
192    print "Wrote $OUTFILE\n" unless $QUIET_ARG;
193}
194
195sub convert {
196
197    ## Reading the file
198    {
199        local (*IN);
200        local $/; #slurp mode
201        open (IN, "<$SRCDIR_ARG/$FILE") || die "can't open $SRCDIR_ARG/$FILE: $!";
202        $input = <IN>;
203    }
204
205    &type_ini if $gettext_type eq "ini";
206    &type_keys if $gettext_type eq "keys";
207    &type_xml if $gettext_type eq "xml";
208    &type_glade if $gettext_type eq "glade";
209    &type_scheme if $gettext_type eq "scheme";
210    &type_schemas  if $gettext_type eq "schemas";
211    &type_rfc822deb  if $gettext_type eq "rfc822deb";
212}
213
214sub entity_decode_minimal
215{
216    local ($_) = @_;
217
218    s/&apos;/'/g; # '
219    s/&quot;/"/g; # "
220    s/&amp;/&/g;
221
222    return $_;
223}
224
225sub entity_decode
226{
227    local ($_) = @_;
228
229    s/&apos;/'/g; # '
230    s/&quot;/"/g; # "
231    s/&amp;/&/g;
232    s/&lt;/</g;
233    s/&gt;/>/g;
234
235    return $_;
236}
237
238sub escape_char
239{
240    return '\"' if $_ eq '"';
241    return '\n' if $_ eq "\n";
242    return '\\' if $_ eq '\\';
243
244    return $_;
245}
246
247sub escape
248{
249    my ($string) = @_;
250    return join "", map &escape_char, split //, $string;
251}
252
253sub type_ini {
254    ### For generic translatable desktop files ###
255    while ($input =~ /^_.*=(.*)$/mg) {
256        $messages{$1} = [];
257    }
258}
259
260sub type_keys {
261    ### For generic translatable mime/keys files ###
262    while ($input =~ /^\s*_\w+=(.*)$/mg) {
263        $messages{$1} = [];
264    }
265}
266
267sub type_xml {
268    ### For generic translatable XML files ###
269       
270    while ($input =~ /(?:<!--([^>]*?)-->[^\n]*\n?[^\n]*)?\s_$w+\s*=\s*\"([^"]*)\"/sg) { # "
271        $messages{entity_decode_minimal($2)} = [];
272        $comments{entity_decode_minimal($2)} = $1 if (defined($1));
273    }
274
275    while ($input =~ /(?:<!--([^>]*?)-->\s*)?<_($w+)(?: xml:space="($w+)")?[^>]*>(.*?)<\/_\2>/sg) {
276        $_ = $4;
277        if (!defined($3) || $3 ne "preserve") {
278            s/\s+/ /g;
279            s/^ //;
280            s/ $//;
281        }
282        $messages{$_} = [];
283        $comments{$_} = $1 if (defined($1));
284    }
285}
286
287sub type_schemas {
288    ### For schemas XML files ###
289         
290    # FIXME: We should handle escaped < (less than)
291    while ($input =~ /
292                      <locale\ name="C">\s*
293                          (<default>\s*(?:<!--([^>]*?)-->\s*)?(.*?)\s*<\/default>\s*)?
294                          (<short>\s*(?:<!--([^>]*?)-->\s*)?(.*?)\s*<\/short>\s*)?
295                          (<long>\s*(?:<!--([^>]*?)-->\s*)?(.*?)\s*<\/long>\s*)?
296                      <\/locale>
297                     /sgx) {
298        my @totranslate = ($3,$6,$9);
299        my @eachcomment = ($2,$5,$8);
300        foreach (@totranslate) {
301            my $currentcomment = shift @eachcomment;
302            next if !$_;
303            s/\s+/ /g;
304            $messages{entity_decode_minimal($_)} = [];
305            $comments{entity_decode_minimal($_)} = $currentcomment if (defined($currentcomment));
306        }
307    }
308}
309
310sub type_rfc822deb {
311    ### For rfc822-style Debian configuration files ###
312
313    my $lineno = 1;
314    my $type = '';
315    while ($input =~ /\G(.*?)(^|\n)(_+)([^:]+):[ \t]*(.*?)(?=\n\S|$)/sg)
316    {
317        my ($pre, $newline, $underscore, $tag, $text) = ($1, $2, $3, $4, $5);
318        while ($pre =~ m/\n/g)
319        {
320            $lineno ++;
321        }
322        $lineno += length($newline);
323        my @str_list = rfc822deb_split(length($underscore), $text);
324        for my $str (@str_list)
325        {
326            $strcount++;
327            $messages{$str} = [];
328            $loc{$str} = $lineno;
329            $count{$str} = $strcount;
330            my $usercomment = '';
331            while($pre =~ s/(^|\n)#([^\n]*)$//s)
332            {
333                $usercomment = "\n" . $2 . $usercomment;
334            }
335            $comments{$str} = $tag . $usercomment;
336        }
337        $lineno += ($text =~ s/\n//g);
338    }
339}
340
341sub rfc822deb_split {
342    # Debian defines a special way to deal with rfc822-style files:
343    # when a value contain newlines, it consists of
344    #   1.  a short form (first line)
345    #   2.  a long description, all lines begin with a space,
346    #       and paragraphs are separated by a single dot on a line
347    # This routine returns an array of all paragraphs, and reformat
348    # them.
349    # When first argument is 2, the string is a comma separated list of
350    # values.
351    my $type = shift;
352    my $text = shift;
353    $text =~ s/^[ \t]//mg;
354    return (split(/, */, $text, 0)) if $type ne 1;
355    return ($text) if $text !~ /\n/;
356
357    $text =~ s/([^\n]*)\n//;
358    my @list = ($1);
359    my $str = '';
360    for my $line (split (/\n/, $text))
361    {
362        chomp $line;
363        if ($line =~ /^\.\s*$/)
364        {
365            #  New paragraph
366            $str =~ s/\s*$//;
367            push(@list, $str);
368            $str = '';
369        }
370        elsif ($line =~ /^\s/)
371        {
372            #  Line which must not be reformatted
373            $str .= "\n" if length ($str) && $str !~ /\n$/;
374            $line =~ s/\s+$//;
375            $str .= $line."\n";
376        }
377        else
378        {
379            #  Continuation line, remove newline
380            $str .= " " if length ($str) && $str !~ /\n$/;
381            $str .= $line;
382        }
383    }
384    $str =~ s/\s*$//;
385    push(@list, $str) if length ($str);
386    return @list;
387}
388
389sub type_glade {
390    ### For translatable Glade XML files ###
391
392    my $tags = "label|title|text|format|copyright|comments|preview_text|tooltip|message";
393
394    while ($input =~ /<($tags)>([^<]+)<\/($tags)>/sg) {
395        # Glade sometimes uses tags that normally mark translatable things for
396        # little bits of non-translatable content. We work around this by not
397        # translating strings that only includes something like label4 or window1.
398        $messages{entity_decode($2)} = [] unless $2 =~ /^(window|label|dialog)[0-9]+$/;
399    }
400   
401    while ($input =~ /<items>(..[^<]*)<\/items>/sg) {
402        for my $item (split (/\n/, $1)) {
403            $messages{entity_decode($item)} = [];
404        }
405    }
406
407    ## handle new glade files
408    while ($input =~ /<(property|atkproperty)\s+[^>]*translatable\s*=\s*"yes"(?:\s+[^>]*comments\s*=\s*"([^"]*)")?[^>]*>([^<]+)<\/\1>/sg) {
409        $messages{entity_decode($3)} = [] unless $3 =~ /^(window|label)[0-9]+$/;
410        if (defined($2) and !($3 =~ /^(window|label)[0-9]+$/)) {
411           $comments{entity_decode($3)} = entity_decode($2) ;
412        }
413    }
414    while ($input =~ /<atkaction\s+action_name="([^>]*)"\s+description="([^>]+)"\/>/sg) {
415        $messages{entity_decode_minimal($2)} = [];
416    }
417}
418
419sub type_scheme {
420    my ($line, $i, $state, $str, $trcomment, $char);
421    for $line (split(/\n/, $input)) {
422        $i = 0;
423        $state = 0; # 0 - nothing, 1 - string, 2 - translatable string
424        while ($i < length($line)) {
425            if (substr($line,$i,1) eq "\"") {
426                if ($state == 2) {
427                    $comments{$str} = $trcomment if ($trcomment);
428                    $messages{$str} = [];
429                    $str = '';
430                    $state = 0; $trcomment = "";
431                } elsif ($state == 1) {
432                    $str = '';
433                    $state = 0; $trcomment = "";
434                } else {
435                    $state = 1;
436                    $str = '';
437                    if ($i>0 && substr($line,$i-1,1) eq '_') {
438                        $state = 2;
439                    }
440                }
441            } elsif (!$state) {
442                if (substr($line,$i,1) eq ";") {
443                    $trcomment = substr($line,$i+1);
444                    $trcomment =~ s/^;*\s*//;
445                    $i = length($line);
446                } elsif ($trcomment && substr($line,$i,1) !~ /\s|\(|\)|_/) {
447                    $trcomment = "";
448                }
449            } else {
450                if (substr($line,$i,1) eq "\\") {
451                    $char = substr($line,$i+1,1);
452                    if ($char ne "\"" && $char ne "\\") {
453                       $str = $str . "\\";
454                    }
455                    $i++;
456                }
457                $str = $str . substr($line,$i,1);
458            }
459            $i++;
460        }
461    }
462}
463
464sub msg_write {
465    my @msgids;
466    if (%count)
467    {
468        @msgids = sort { $count{$a} <=> $count{$b} } keys %count;
469    }
470    else
471    {
472        @msgids = sort keys %messages;
473    }
474    for my $message (@msgids)
475    {
476        my $offsetlines = 1;
477        $offsetlines++ if $message =~ /%/;
478        if (defined ($comments{$message}))
479        {
480                while ($comments{$message} =~ m/\n/g)
481                {
482                    $offsetlines++;
483                }
484        }
485        print OUT "# ".($loc{$message} - $offsetlines).  " \"$FILE\"\n"
486                if defined $loc{$message};
487        print OUT "/* ".$comments{$message}." */\n"
488                if defined $comments{$message};
489        print OUT "/* xgettext:no-c-format */\n" if $message =~ /%/;
490       
491        my @lines = split (/\n/, $message, -1);
492        for (my $n = 0; $n < @lines; $n++)
493        {
494            if ($n == 0)
495            {
496                print OUT "char *s = N_(\"";
497            }
498            else
499            { 
500                print OUT "             \"";
501            }
502
503            print OUT escape($lines[$n]);
504
505            if ($n < @lines - 1)
506            {
507                print OUT "\\n\"\n";
508            }
509            else
510            {
511                print OUT "\");\n"; 
512            }
513        }
514    }
515}
516
Note: See TracBrowser for help on using the repository browser.