source: trunk/third/gtkhtml3/intltool-extract.in @ 21116

Revision 21116, 11.9 KB checked in by ghudson, 20 years ago (diff)
This commit was generated by cvs2svn to compensate for changes in r21115, which included commits to RCS files with non-trunk default branches.
Line 
1#!@INTLTOOL_PERL@ -w
2# -*- Mode: perl; indent-tabs-mode: nil; c-basic-offset: 4  -*-
3
4#
5#  The Intltool Message Extractor
6#
7#  Copyright (C) 2000-2001, 2003 Free Software Foundation.
8#
9#  Intltool is free software; you can redistribute it and/or
10#  modify it under the terms of the GNU General Public License as
11#  published by the Free Software Foundation; either version 2 of the
12#  License, or (at your option) any later version.
13#
14#  Intltool is distributed in the hope that it will be useful,
15#  but WITHOUT ANY WARRANTY; without even the implied warranty of
16#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17#  General Public License for more details.
18#
19#  You should have received a copy of the GNU General Public License
20#  along with this program; if not, write to the Free Software
21#  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22#
23#  As a special exception to the GNU General Public License, if you
24#  distribute this file as part of a program that contains a
25#  configuration script generated by Autoconf, you may include it under
26#  the same distribution terms that you use for the rest of that program.
27#
28#  Authors: Kenneth Christiansen <kenneth@gnu.org>
29#           Darin Adler <darin@bentspoon.com>
30#
31
32## Release information
33my $PROGRAM      = "intltool-extract";
34my $PACKAGE      = "intltool";
35my $VERSION      = "0.31.1";
36
37## Loaded modules
38use strict;
39use File::Basename;
40use Getopt::Long;
41
42## Scalars used by the option stuff
43my $TYPE_ARG    = "0";
44my $LOCAL_ARG   = "0";
45my $HELP_ARG    = "0";
46my $VERSION_ARG = "0";
47my $UPDATE_ARG  = "0";
48my $QUIET_ARG   = "0";
49my $SRCDIR_ARG  = ".";
50
51my $FILE;
52my $OUTFILE;
53
54my $gettext_type = "";
55my $input;
56my %messages = ();
57my %loc = ();
58my %count = ();
59my %comments = ();
60my $strcount = 0;
61
62## Use this instead of \w for XML files to handle more possible characters.
63my $w = "[-A-Za-z0-9._:]";
64
65## Always print first
66$| = 1;
67
68## Handle options
69GetOptions (
70            "type=s"     => \$TYPE_ARG,
71            "local|l"    => \$LOCAL_ARG,
72            "help|h"     => \$HELP_ARG,
73            "version|v"  => \$VERSION_ARG,
74            "update"     => \$UPDATE_ARG,
75            "quiet|q"    => \$QUIET_ARG,
76            "srcdir=s"   => \$SRCDIR_ARG,
77            ) or &error;
78
79&split_on_argument;
80
81
82## Check for options.
83## This section will check for the different options.
84
85sub split_on_argument {
86
87    if ($VERSION_ARG) {
88        &version;
89
90    } elsif ($HELP_ARG) {
91        &help;
92       
93    } elsif ($LOCAL_ARG) {
94        &place_local;
95        &extract;
96
97    } elsif ($UPDATE_ARG) {
98        &place_normal;
99        &extract;
100
101    } elsif (@ARGV > 0) {
102        &place_normal;
103        &message;
104        &extract;
105
106    } else {
107        &help;
108
109    } 
110}   
111
112sub place_normal {
113    $FILE        = $ARGV[0];
114    $OUTFILE     = "$FILE.h";
115}   
116
117sub place_local {
118    $OUTFILE     = fileparse($FILE, ());
119    if (!-e "tmp/") {
120        system("mkdir tmp/");
121    }
122    $OUTFILE     = "./tmp/$OUTFILE.h"
123}
124
125sub determine_type {
126   if ($TYPE_ARG =~ /^gettext\/(.*)/) {
127        $gettext_type=$1
128   }
129}
130
131## Sub for printing release information
132sub version{
133    print <<_EOF_;
134${PROGRAM} (${PACKAGE}) $VERSION
135Copyright (C) 2000, 2003 Free Software Foundation, Inc.
136Written by Kenneth Christiansen, 2000.
137
138This is free software; see the source for copying conditions.  There is NO
139warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
140_EOF_
141    exit;
142}
143
144## Sub for printing usage information
145sub help {
146    print <<_EOF_;
147Usage: ${PROGRAM} [OPTION]... [FILENAME]
148Generates a header file from an XML source file.
149
150It grabs all strings between <_translatable_node> and its end tag in
151XML files. Read manpage (man ${PROGRAM}) for more info.
152
153      --type=TYPE   Specify the file type of FILENAME. Currently supports:
154                    "gettext/glade", "gettext/ini", "gettext/keys"
155                    "gettext/rfc822deb", "gettext/schemas",
156                    "gettext/scheme", "gettext/xml"
157  -l, --local       Writes output into current working directory
158                    (conflicts with --update)
159      --update      Writes output into the same directory the source file
160                    reside (conflicts with --local)
161      --srcdir      Root of the source tree
162  -v, --version     Output version information and exit
163  -h, --help        Display this help and exit
164  -q, --quiet       Quiet mode
165
166Report bugs to http://bugzilla.gnome.org/ (product name "$PACKAGE")
167or send email to <xml-i18n-tools\@gnome.org>.
168_EOF_
169    exit;
170}
171
172## Sub for printing error messages
173sub error{
174    print STDERR "Try `${PROGRAM} --help' for more information.\n";
175    exit;
176}
177
178sub message {
179    print "Generating C format header file for translation.\n" unless $QUIET_ARG;
180}
181
182sub extract {
183    &determine_type;
184
185    &convert;
186
187    open OUT, ">$OUTFILE";
188    &msg_write;
189    close OUT;
190
191    print "Wrote $OUTFILE\n" unless $QUIET_ARG;
192}
193
194sub convert {
195
196    ## Reading the file
197    {
198        local (*IN);
199        local $/; #slurp mode
200        open (IN, "<$SRCDIR_ARG/$FILE") || die "can't open $SRCDIR_ARG/$FILE: $!";
201        $input = <IN>;
202    }
203
204    &type_ini if $gettext_type eq "ini";
205    &type_keys if $gettext_type eq "keys";
206    &type_xml if $gettext_type eq "xml";
207    &type_glade if $gettext_type eq "glade";
208    &type_scheme if $gettext_type eq "scheme";
209    &type_schemas  if $gettext_type eq "schemas";
210    &type_rfc822deb  if $gettext_type eq "rfc822deb";
211}
212
213sub entity_decode_minimal
214{
215    local ($_) = @_;
216
217    s/&apos;/'/g; # '
218    s/&quot;/"/g; # "
219    s/&amp;/&/g;
220
221    return $_;
222}
223
224sub entity_decode
225{
226    local ($_) = @_;
227
228    s/&apos;/'/g; # '
229    s/&quot;/"/g; # "
230    s/&amp;/&/g;
231    s/&lt;/</g;
232    s/&gt;/>/g;
233
234    return $_;
235}
236
237sub escape_char
238{
239    return '\"' if $_ eq '"';
240    return '\n' if $_ eq "\n";
241    return '\\' if $_ eq '\\';
242
243    return $_;
244}
245
246sub escape
247{
248    my ($string) = @_;
249    return join "", map &escape_char, split //, $string;
250}
251
252sub type_ini {
253    ### For generic translatable desktop files ###
254    while ($input =~ /^_.*=(.*)$/mg) {
255        $messages{$1} = [];
256    }
257}
258
259sub type_keys {
260    ### For generic translatable mime/keys files ###
261    while ($input =~ /^\s*_\w+=(.*)$/mg) {
262        $messages{$1} = [];
263    }
264}
265
266sub type_xml {
267    ### For generic translatable XML files ###
268       
269    while ($input =~ /\s_$w+\s*=\s*\"([^"]+)\"/sg) { # "
270        $messages{entity_decode_minimal($1)} = [];
271    }
272
273    while ($input =~ /(?:<!--([^>]*?)-->\s*)?<_($w+)(?: xml:space="($w+)")?>(.+?)<\/_\2>/sg) {
274        $_ = $4;
275        if (!defined($3) || $3 ne "preserve") {
276            s/\s+/ /g;
277            s/^ //;
278            s/ $//;
279        }
280        $messages{$_} = [];
281        $comments{$_} = $1 if (defined($1));
282    }
283}
284
285sub type_schemas {
286    ### For schemas XML files ###
287         
288    # FIXME: We should handle escaped < (less than)
289    while ($input =~ /
290                      <locale\ name="C">\s*
291                          (<default>\s*(?:<!--([^>]*?)-->\s*)?(.*?)\s*<\/default>\s*)?
292                          (<short>\s*(?:<!--([^>]*?)-->\s*)?(.*?)\s*<\/short>\s*)?
293                          (<long>\s*(?:<!--([^>]*?)-->\s*)?(.*?)\s*<\/long>\s*)?
294                      <\/locale>
295                     /sgx) {
296        my @totranslate = ($3,$6,$9);
297        my @eachcomment = ($2,$5,$8);
298        foreach (@totranslate) {
299            my $currentcomment = shift @eachcomment;
300            next if !$_;
301            s/\s+/ /g;
302            $messages{entity_decode_minimal($_)} = [];
303            $comments{entity_decode_minimal($_)} = $currentcomment if (defined($currentcomment));
304        }
305    }
306}
307
308sub type_rfc822deb {
309    ### For rfc822-style Debian configuration files ###
310
311    my $lineno = 1;
312    my $type = '';
313    while ($input =~ /\G(.*?)(^|\n)(_+)([^:]+):[ \t]*(.*?)(?=\n\S|$)/sg)
314    {
315        my ($pre, $newline, $underscore, $tag, $text) = ($1, $2, $3, $4, $5);
316        while ($pre =~ m/\n/g)
317        {
318            $lineno ++;
319        }
320        $lineno += length($newline);
321        my @str_list = rfc822deb_split(length($underscore), $text);
322        for my $str (@str_list)
323        {
324            $strcount++;
325            $messages{$str} = [];
326            $loc{$str} = $lineno;
327            $count{$str} = $strcount;
328            my $usercomment = '';
329            while($pre =~ s/(^|\n)#([^\n]*)$//s)
330            {
331                $usercomment = "\n" . $2 . $usercomment;
332            }
333            $comments{$str} = $tag . $usercomment;
334        }
335        $lineno += ($text =~ s/\n//g);
336    }
337}
338
339sub rfc822deb_split {
340    # Debian defines a special way to deal with rfc822-style files:
341    # when a value contain newlines, it consists of
342    #   1.  a short form (first line)
343    #   2.  a long description, all lines begin with a space,
344    #       and paragraphs are separated by a single dot on a line
345    # This routine returns an array of all paragraphs, and reformat
346    # them.
347    # When first argument is 2, the string is a comma separated list of
348    # values.
349    my $type = shift;
350    my $text = shift;
351    $text =~ s/^[ \t]//mg;
352    return (split(/, */, $text, 0)) if $type ne 1;
353    return ($text) if $text !~ /\n/;
354
355    $text =~ s/([^\n]*)\n//;
356    my @list = ($1);
357    my $str = '';
358    for my $line (split (/\n/, $text))
359    {
360        chomp $line;
361        if ($line =~ /^\.\s*$/)
362        {
363            #  New paragraph
364            $str =~ s/\s*$//;
365            push(@list, $str);
366            $str = '';
367        }
368        elsif ($line =~ /^\s/)
369        {
370            #  Line which must not be reformatted
371            $str .= "\n" if length ($str) && $str !~ /\n$/;
372            $line =~ s/\s+$//;
373            $str .= $line."\n";
374        }
375        else
376        {
377            #  Continuation line, remove newline
378            $str .= " " if length ($str) && $str !~ /\n$/;
379            $str .= $line;
380        }
381    }
382    $str =~ s/\s*$//;
383    push(@list, $str) if length ($str);
384    return @list;
385}
386
387sub type_glade {
388    ### For translatable Glade XML files ###
389
390    my $tags = "label|title|text|format|copyright|comments|preview_text|tooltip|message";
391
392    while ($input =~ /<($tags)>([^<]+)<\/($tags)>/sg) {
393        # Glade sometimes uses tags that normally mark translatable things for
394        # little bits of non-translatable content. We work around this by not
395        # translating strings that only includes something like label4 or window1.
396        $messages{entity_decode($2)} = [] unless $2 =~ /^(window|label)[0-9]+$/;
397    }
398   
399    while ($input =~ /<items>(..[^<]*)<\/items>/sg) {
400        for my $item (split (/\n/, $1)) {
401            $messages{entity_decode($item)} = [];
402        }
403    }
404
405    ## handle new glade files
406    while ($input =~ /<(property|atkproperty)\s+[^>]*translatable\s*=\s*"yes"[^>]*>([^<]+)<\/\1>/sg) {
407        $messages{entity_decode($2)} = [] unless $2 =~ /^(window|label)[0-9]+$/;
408    }
409    while ($input =~ /<atkaction\s+action_name="([^>]*)"\s+description="([^>]+)"\/>/sg) {
410        $messages{entity_decode_minimal($2)} = [];
411    }
412}
413
414sub type_scheme {
415    while ($input =~ /_\w*\(?"((?:[^"\\]+|\\.)*)"\)?/sg) {
416        $messages{$1} = [];
417    }
418}
419
420sub msg_write {
421    my @msgids;
422    if (%count)
423    {
424        @msgids = sort { $count{$a} <=> $count{$b} } keys %count;
425    }
426    else
427    {
428        @msgids = sort keys %messages;
429    }
430    for my $message (@msgids)
431    {
432        my $offsetlines = 1;
433        $offsetlines++ if $message =~ /%/;
434        if (defined ($comments{$message}))
435        {
436                while ($comments{$message} =~ m/\n/g)
437                {
438                    $offsetlines++;
439                }
440        }
441        print OUT "# ".($loc{$message} - $offsetlines).  " \"$FILE\"\n"
442                if defined $loc{$message};
443        print OUT "/* ".$comments{$message}." */\n"
444                if defined $comments{$message};
445        print OUT "/* xgettext:no-c-format */\n" if $message =~ /%/;
446       
447        my @lines = split (/\n/, $message, -1);
448        for (my $n = 0; $n < @lines; $n++)
449        {
450            if ($n == 0)
451            {
452                print OUT "char *s = N_(\"";
453            }
454            else
455            { 
456                print OUT "             \"";
457            }
458
459            print OUT escape($lines[$n]);
460
461            if ($n < @lines - 1)
462            {
463                print OUT "\\n\"\n";
464            }
465            else
466            {
467                print OUT "\");\n"; 
468            }
469        }
470    }
471}
472
Note: See TracBrowser for help on using the repository browser.