Context Navigation

intltool-merge.in.in @ 21469

Visit:

Revision 21469, 33.3 KB checked in by ghudson, 19 years ago (diff)
This commit was generated by cvs2svn to compensate for changes in r21468, which included commits to RCS files with non-trunk default branches.

Line
1	#!@INTLTOOL_PERL@ -w
2	# -- Mode: perl; indent-tabs-mode: nil; c-basic-offset: 4 --
3
4	#
5	# The Intltool Message Merger
6	#
7	# Copyright (C) 2000, 2003 Free Software Foundation.
8	# Copyright (C) 2000, 2001 Eazel, Inc
9	#
10	# Intltool is free software; you can redistribute it and/or
11	# modify it under the terms of the GNU General Public License
12	# version 2 published by the Free Software Foundation.
13	#
14	# Intltool is distributed in the hope that it will be useful,
15	# but WITHOUT ANY WARRANTY; without even the implied warranty of
16	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17	# General Public License for more details.
18	#
19	# You should have received a copy of the GNU General Public License
20	# along with this program; if not, write to the Free Software
21	# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22	#
23	# As a special exception to the GNU General Public License, if you
24	# distribute this file as part of a program that contains a
25	# configuration script generated by Autoconf, you may include it under
26	# the same distribution terms that you use for the rest of that program.
27	#
28	# Authors: Maciej Stachowiak <mjs@noisehavoc.org>
29	# Kenneth Christiansen <kenneth@gnu.org>
30	# Darin Adler <darin@bentspoon.com>
31	#
32	# Proper XML UTF-8'ification written by Cyrille Chepelov <chepelov@calixo.net>
33	#
34
35	## Release information
36	my $PROGRAM = "intltool-merge";
37	my $PACKAGE = "@PACKAGE@";
38	my $VERSION = "@VERSION@";
39
40	## Loaded modules
41	use strict;
42	use Getopt::Long;
43	use Text::Wrap;
44	use File::Basename;
45
46	my $must_end_tag = -1;
47	my $last_depth = -1;
48	my $translation_depth = -1;
49	my @tag_stack = ();
50	my @entered_tag = ();
51	my @translation_strings = ();
52	my $leading_space = "";
53
54	## Scalars used by the option stuff
55	my $HELP_ARG = 0;
56	my $VERSION_ARG = 0;
57	my $BA_STYLE_ARG = 0;
58	my $XML_STYLE_ARG = 0;
59	my $KEYS_STYLE_ARG = 0;
60	my $DESKTOP_STYLE_ARG = 0;
61	my $SCHEMAS_STYLE_ARG = 0;
62	my $RFC822DEB_STYLE_ARG = 0;
63	my $QUIET_ARG = 0;
64	my $PASS_THROUGH_ARG = 0;
65	my $UTF8_ARG = 0;
66	my $MULTIPLE_OUTPUT = 0;
67	my $cache_file;
68
69	## Handle options
70	GetOptions
71	(
72	"help" => \$HELP_ARG,
73	"version" => \$VERSION_ARG,
74	"quiet\|q" => \$QUIET_ARG,
75	"oaf-style\|o" => \$BA_STYLE_ARG, ## for compatibility
76	"ba-style\|b" => \$BA_STYLE_ARG,
77	"xml-style\|x" => \$XML_STYLE_ARG,
78	"keys-style\|k" => \$KEYS_STYLE_ARG,
79	"desktop-style\|d" => \$DESKTOP_STYLE_ARG,
80	"schemas-style\|s" => \$SCHEMAS_STYLE_ARG,
81	"rfc822deb-style\|r" => \$RFC822DEB_STYLE_ARG,
82	"pass-through\|p" => \$PASS_THROUGH_ARG,
83	"utf8\|u" => \$UTF8_ARG,
84	"multiple-output\|m" => \$MULTIPLE_OUTPUT,
85	"cache\|c=s" => \$cache_file
86	) or &error;
87
88	my $PO_DIR;
89	my $FILE;
90	my $OUTFILE;
91
92	my %po_files_by_lang = ();
93	my %translations = ();
94	my $iconv = $ENV{"ICONV"} \|\| $ENV{"INTLTOOL_ICONV"} \|\| "@INTLTOOL_ICONV@";
95
96	# Use this instead of \w for XML files to handle more possible characters.
97	my $w = "[-A-Za-z0-9._:]";
98
99	# XML quoted string contents
100	my $q = "[^\\\"]*";
101
102	## Check for options.
103
104	if ($VERSION_ARG)
105	{
106	&print_version;
107	}
108	elsif ($HELP_ARG)
109	{
110	&print_help;
111	}
112	elsif ($BA_STYLE_ARG && @ARGV > 2)
113	{
114	&utf8_sanity_check;
115	&preparation;
116	&print_message;
117	&ba_merge_translations;
118	&finalize;
119	}
120	elsif ($XML_STYLE_ARG && @ARGV > 2)
121	{
122	&utf8_sanity_check;
123	&preparation;
124	&print_message;
125	&xml_merge_output;
126	&finalize;
127	}
128	elsif ($KEYS_STYLE_ARG && @ARGV > 2)
129	{
130	&utf8_sanity_check;
131	&preparation;
132	&print_message;
133	&keys_merge_translations;
134	&finalize;
135	}
136	elsif ($DESKTOP_STYLE_ARG && @ARGV > 2)
137	{
138	&utf8_sanity_check;
139	&preparation;
140	&print_message;
141	&desktop_merge_translations;
142	&finalize;
143	}
144	elsif ($SCHEMAS_STYLE_ARG && @ARGV > 2)
145	{
146	&utf8_sanity_check;
147	&preparation;
148	&print_message;
149	&schemas_merge_translations;
150	&finalize;
151	}
152	elsif ($RFC822DEB_STYLE_ARG && @ARGV > 2)
153	{
154	&preparation;
155	&print_message;
156	&rfc822deb_merge_translations;
157	&finalize;
158	}
159	else
160	{
161	&print_help;
162	}
163
164	exit;
165
166	## Sub for printing release information
167	sub print_version
168	{
169	print <<_EOF_;
170	${PROGRAM} (${PACKAGE}) ${VERSION}
171	Written by Maciej Stachowiak, Darin Adler and Kenneth Christiansen.
172
173	Copyright (C) 2000-2003 Free Software Foundation, Inc.
174	Copyright (C) 2000-2001 Eazel, Inc.
175	This is free software; see the source for copying conditions. There is NO
176	warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
177	_EOF_
178	exit;
179	}
180
181	## Sub for printing usage information
182	sub print_help
183	{
184	print <<_EOF_;
185	Usage: ${PROGRAM} [OPTION]... PO_DIRECTORY FILENAME OUTPUT_FILE
186	Generates an output file that includes some localized attributes from an
187	untranslated source file.
188
189	Mandatory options: (exactly one must be specified)
190	-b, --ba-style includes translations in the bonobo-activation style
191	-d, --desktop-style includes translations in the desktop style
192	-k, --keys-style includes translations in the keys style
193	-s, --schemas-style includes translations in the schemas style
194	-r, --rfc822deb-style includes translations in the RFC822 style
195	-x, --xml-style includes translations in the standard xml style
196
197	Other options:
198	-u, --utf8 convert all strings to UTF-8 before merging
199	(default for everything except RFC822 style)
200	-p, --pass-through deprecated, does nothing and issues a warning
201	-m, --multiple-output output one localized file per locale, instead of
202	a single file containing all localized elements
203	-c, --cache=FILE specify cache file name
204	(usually \$top_builddir/po/.intltool-merge-cache)
205	-q, --quiet suppress most messages
206	--help display this help and exit
207	--version output version information and exit
208
209	Report bugs to http://bugzilla.gnome.org/ (product name "$PACKAGE")
210	or send email to <xml-i18n-tools\@gnome.org>.
211	_EOF_
212	exit;
213	}
214
215
216	## Sub for printing error messages
217	sub print_error
218	{
219	print STDERR "Try `${PROGRAM} --help' for more information.\n";
220	exit;
221	}
222
223
224	sub print_message
225	{
226	print "Merging translations into $OUTFILE.\n" unless $QUIET_ARG;
227	}
228
229
230	sub preparation
231	{
232	$PO_DIR = $ARGV[0];
233	$FILE = $ARGV[1];
234	$OUTFILE = $ARGV[2];
235
236	&gather_po_files;
237	&get_translation_database;
238	}
239
240	# General-purpose code for looking up translations in .po files
241
242	sub po_file2lang
243	{
244	my ($tmp) = @_;
245	$tmp =~ s/^.\/(.)\.po$/$1/;
246	return $tmp;
247	}
248
249	sub gather_po_files
250	{
251	for my $po_file (glob "$PO_DIR/*.po") {
252	$po_files_by_lang{po_file2lang($po_file)} = $po_file;
253	}
254	}
255
256	sub get_local_charset
257	{
258	my ($encoding) = @_;
259	my $alias_file = $ENV{"G_CHARSET_ALIAS"} \|\| "@EXPANDED_LIBDIR@/charset.alias";
260
261	# seek character encoding aliases in charset.alias (glib)
262
263	if (open CHARSET_ALIAS, $alias_file)
264	{
265	while (<CHARSET_ALIAS>)
266	{
267	next if /^\#/;
268	return $1 if (/^\s*([-._a-zA-Z0-9]+)\s+$encoding\b/i)
269	}
270
271	close CHARSET_ALIAS;
272	}
273
274	# if not found, return input string
275
276	return $encoding;
277	}
278
279	sub get_po_encoding
280	{
281	my ($in_po_file) = @_;
282	my $encoding = "";
283
284	open IN_PO_FILE, $in_po_file or die;
285	while (<IN_PO_FILE>)
286	{
287	## example: "Content-Type: text/plain; charset=ISO-8859-1\n"
288	if (/Content-Type\:.*charset=([-a-zA-Z0-9]+)\\n/)
289	{
290	$encoding = $1;
291	last;
292	}
293	}
294	close IN_PO_FILE;
295
296	if (!$encoding)
297	{
298	print STDERR "Warning: no encoding found in $in_po_file. Assuming ISO-8859-1\n" unless $QUIET_ARG;
299	$encoding = "ISO-8859-1";
300	}
301
302	system ("$iconv -f $encoding -t UTF-8 </dev/null 2>/dev/null");
303	if ($?) {
304	$encoding = get_local_charset($encoding);
305	}
306
307	return $encoding
308	}
309
310	sub utf8_sanity_check
311	{
312	print STDERR "Warning: option --pass-through has been removed.\n" if $PASS_THROUGH_ARG;
313	$UTF8_ARG = 1;
314	}
315
316	sub get_translation_database
317	{
318	if ($cache_file) {
319	&get_cached_translation_database;
320	} else {
321	&create_translation_database;
322	}
323	}
324
325	sub get_newest_po_age
326	{
327	my $newest_age;
328
329	foreach my $file (values %po_files_by_lang)
330	{
331	my $file_age = -M $file;
332	$newest_age = $file_age if !$newest_age \|\| $file_age < $newest_age;
333	}
334
335	$newest_age = 0 if !$newest_age;
336
337	return $newest_age;
338	}
339
340	sub create_cache
341	{
342	print "Generating and caching the translation database\n" unless $QUIET_ARG;
343
344	&create_translation_database;
345
346	open CACHE, ">$cache_file" \|\| die;
347	print CACHE join "\x01", %translations;
348	close CACHE;
349	}
350
351	sub load_cache
352	{
353	print "Found cached translation database\n" unless $QUIET_ARG;
354
355	my $contents;
356	open CACHE, "<$cache_file" \|\| die;
357	{
358	local $/;
359	$contents = <CACHE>;
360	}
361	close CACHE;
362	%translations = split "\x01", $contents;
363	}
364
365	sub get_cached_translation_database
366	{
367	my $cache_file_age = -M $cache_file;
368	if (defined $cache_file_age)
369	{
370	if ($cache_file_age <= &get_newest_po_age)
371	{
372	&load_cache;
373	return;
374	}
375	print "Found too-old cached translation database\n" unless $QUIET_ARG;
376	}
377
378	&create_cache;
379	}
380
381	sub create_translation_database
382	{
383	for my $lang (keys %po_files_by_lang)
384	{
385	my $po_file = $po_files_by_lang{$lang};
386
387	if ($UTF8_ARG)
388	{
389	my $encoding = get_po_encoding ($po_file);
390
391	if (lc $encoding eq "utf-8")
392	{
393	open PO_FILE, "<$po_file";
394	}
395	else
396	{
397	print STDERR "WARNING: $po_file is not in UTF-8 but $encoding, converting...\n" unless $QUIET_ARG;;
398
399	open PO_FILE, "$iconv -f $encoding -t UTF-8 $po_file\|";
400	}
401	}
402	else
403	{
404	open PO_FILE, "<$po_file";
405	}
406
407	my $nextfuzzy = 0;
408	my $inmsgid = 0;
409	my $inmsgstr = 0;
410	my $msgid = "";
411	my $msgstr = "";
412
413	while (<PO_FILE>)
414	{
415	$nextfuzzy = 1 if /^#, fuzzy/;
416
417	if (/^msgid "((\\.\|[^\\])*)"/ )
418	{
419	$translations{$lang, $msgid} = $msgstr if $inmsgstr && $msgid && $msgstr;
420	$msgid = "";
421	$msgstr = "";
422
423	if ($nextfuzzy) {
424	$inmsgid = 0;
425	} else {
426	$msgid = unescape_po_string($1);
427	$inmsgid = 1;
428	}
429	$inmsgstr = 0;
430	$nextfuzzy = 0;
431	}
432
433	if (/^msgstr "((\\.\|[^\\])*)"/)
434	{
435	$msgstr = unescape_po_string($1);
436	$inmsgstr = 1;
437	$inmsgid = 0;
438	}
439
440	if (/^"((\\.\|[^\\])*)"/)
441	{
442	$msgid .= unescape_po_string($1) if $inmsgid;
443	$msgstr .= unescape_po_string($1) if $inmsgstr;
444	}
445	}
446	$translations{$lang, $msgid} = $msgstr if $inmsgstr && $msgid && $msgstr;
447	}
448	}
449
450	sub finalize
451	{
452	}
453
454	sub unescape_one_sequence
455	{
456	my ($sequence) = @_;
457
458	return "\\" if $sequence eq "\\\\";
459	return "\"" if $sequence eq "\\\"";
460	return "\n" if $sequence eq "\\n";
461	return "\r" if $sequence eq "\\r";
462	return "\t" if $sequence eq "\\t";
463	return "\b" if $sequence eq "\\b";
464	return "\f" if $sequence eq "\\f";
465	return "\a" if $sequence eq "\\a";
466	return chr(11) if $sequence eq "\\v"; # vertical tab, see ascii(7)
467
468	return chr(hex($1)) if ($sequence =~ /\\x([0-9a-fA-F]{2})/);
469	return chr(oct($1)) if ($sequence =~ /\\([0-7]{3})/);
470
471	# FIXME: Is \0 supported as well? Kenneth and Rodney don't want it, see bug #48489
472
473	return $sequence;
474	}
475
476	sub unescape_po_string
477	{
478	my ($string) = @_;
479
480	$string =~ s/(\\x[0-9a-fA-F]{2}\|\\[0-7]{3}\|\\.)/unescape_one_sequence($1)/eg;
481
482	return $string;
483	}
484
485	## NOTE: deal with < - < but not > - > because it seems its ok to have
486	## > in the entity. For further info please look at #84738.
487	sub entity_decode
488	{
489	local ($_) = @_;
490
491	s/'/'/g; # '
492	s/"/"/g; # "
493	s/&/&/g;
494	s/</</g;
495
496	return $_;
497	}
498
499	# entity_encode: (string)
500	#
501	# Encode the given string to XML format (encode '<' etc).
502
503	sub entity_encode
504	{
505	my ($pre_encoded) = @_;
506
507	my @list_of_chars = unpack ('C*', $pre_encoded);
508
509	# with UTF-8 we only encode minimalistic
510	return join ('', map (&entity_encode_int_minimalist, @list_of_chars));
511	}
512
513	sub entity_encode_int_minimalist
514	{
515	return """ if $_ == 34;
516	return "&" if $_ == 38;
517	return "'" if $_ == 39;
518	return "<" if $_ == 60;
519	return chr $_;
520	}
521
522	sub entity_encoded_translation
523	{
524	my ($lang, $string) = @_;
525
526	my $translation = $translations{$lang, $string};
527	return $string if !$translation;
528	return entity_encode ($translation);
529	}
530
531	## XML (bonobo-activation specific) merge code
532
533	sub ba_merge_translations
534	{
535	my $source;
536
537	{
538	local $/; # slurp mode
539	open INPUT, "<$FILE" or die "can't open $FILE: $!";
540	$source = <INPUT>;
541	close INPUT;
542	}
543
544	open OUTPUT, ">$OUTFILE" or die "can't open $OUTFILE: $!";
545
546	while ($source =~ s\|^(.?)([ \t]<\s$w+\s+($w+\s=\s"$q"\s)+/?>)([ \t]*\n)?\|\|s)
547	{
548	print OUTPUT $1;
549
550	my $node = $2 . "\n";
551
552	my @strings = ();
553	$_ = $node;
554	while (s/(\s)_($w+\s=\s"($q)")/$1$2/s) {
555	push @strings, entity_decode($3);
556	}
557	print OUTPUT;
558
559	my %langs;
560	for my $string (@strings)
561	{
562	for my $lang (keys %po_files_by_lang)
563	{
564	$langs{$lang} = 1 if $translations{$lang, $string};
565	}
566	}
567
568	for my $lang (sort keys %langs)
569	{
570	$_ = $node;
571	s/(\sname\s=\s)"($q)"/$1"$2-$lang"/s;
572	s/(\s)_($w+\s=\s")($q)"/$1 . $2 . entity_encoded_translation($lang, $3) . '"'/seg;
573	print OUTPUT;
574	}
575	}
576
577	print OUTPUT $source;
578
579	close OUTPUT;
580	}
581
582
583	## XML (non-bonobo-activation) merge code
584
585
586	# Process tag attributes
587	# Only parameter is a HASH containing attributes -> values mapping
588	sub getAttributeString
589	{
590	my $sub = shift;
591	my $do_translate = shift \|\| 0;
592	my $language = shift \|\| "";
593	my $result = "";
594	my $translate = shift;
595	foreach my $e (reverse(sort(keys %{ $sub }))) {
596	my $key = $e;
597	my $string = $sub->{$e};
598	my $quote = '"';
599
600	$string =~ s/^[\s]+//;
601	$string =~ s/[\s]+$//;
602
603	if ($string =~ /^'.*'$/)
604	{
605	$quote = "'";
606	}
607	$string =~ s/^['"]//g;
608	$string =~ s/['"]$//g;
609
610	if ($do_translate && $key =~ /^_/) {
611	$key =~ s\|^_\|\|g;
612	if ($language) {
613
614	# Handle translation
615	#
616	my $decode_string = entity_decode($string);
617	my $translation = $translations{$language, $decode_string};
618	if ($translation) {
619	$translation = entity_encode($translation);
620	$string = $translation;
621	$$translate = 2;
622	} else {
623	$$translate = 2; # we still want translations for deep nesting (FIXME: this will cause
624	# problems since we might get untranslated duplicated entries, but with xml:lang set)
625	# Fix would be to set it here to eg. 3, and do a check in traverse() to see if any of the containing tags
626	# really need translation, and only emit "translation" if there is (this means parsing same data twice)
627	}
628	} else {
629	$$translate = 2 if ($translate && (!$$translate)); # watch not to "overwrite" if $translate == 2
630	}
631	}
632
633	$result .= " $key=$quote$string$quote";
634	}
635	return $result;
636	}
637
638	# Returns a translatable string from XML node, it works on contents of every node in XML::Parser tree
639	# doesn't support nesting of translatable tags (i.e. <_blah>this <_doh>doesn't</_doh> work</_blah> -- besides
640	# can you define the correct semantics for this?)
641	#
642
643	sub getXMLstring
644	{
645	my $ref = shift;
646	my @list = @{ $ref };
647	my $result = "";
648
649	my $count = scalar(@list);
650	my $attrs = $list[0];
651	my $index = 1;
652
653	while ($index < $count) {
654	my $type = $list[$index];
655	my $content = $list[$index+1];
656	if (! $type ) {
657	# We've got CDATA
658	if ($content) {
659	# lets strip the whitespace here, and ONLY here
660	$content =~ s/\s+/ /gs if (!((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?preserve["']?$/)));
661	$result .= ($content);
662	} else {
663	#print "no cdata content when expected it\n"; # is this possible, is this ok?
664	# what to do if this happens?
665	# Did I mention that I hate XML::Parser tree style?
666	}
667	} else {
668	# We've got another element
669	$result .= "<$type";
670	$result .= getAttributeString(@{$content}[0], 0); # no nested translatable elements
671	if ($content) {
672	my $subresult = getXMLstring($content);
673	if ($subresult) {
674	$result .= ">".$subresult . "</$type>";
675	} else {
676	$result .= "/>";
677	}
678	} else {
679	$result .= "/>";
680	}
681	}
682	$index += 2;
683	}
684	return $result;
685	}
686
687	# Translate list of nodes if necessary
688	sub translate_subnodes
689	{
690	my $fh = shift;
691	my $content = shift;
692	my $language = shift \|\| "";
693	my $singlelang = shift \|\| 0;
694
695	my @nodes = @{ $content };
696
697	my $count = scalar(@nodes);
698	my $index = 0;
699	while ($index < $count) {
700	my $type = $nodes[$index];
701	my $rest = $nodes[$index+1];
702	if ($singlelang) {
703	my $oldMO = $MULTIPLE_OUTPUT;
704	$MULTIPLE_OUTPUT = 1;
705	traverse($fh, $type, $rest, $language);
706	$MULTIPLE_OUTPUT = $oldMO;
707	} else {
708	traverse($fh, $type, $rest, $language);
709	}
710	$index += 2;
711	}
712	}
713
714	sub traverse
715	{
716	my $fh = shift;
717	my $nodename = shift;
718	my $content = shift;
719	my $language = shift \|\| "";
720
721	if (!$nodename) {
722	if ($content =~ /^[\s]*$/) {
723	$leading_space .= $content;
724	}
725	print $fh $content;
726	} else {
727	# element
728	my @all = @{ $content };
729	my $attrs = shift @all;
730	my $translate = 0;
731	my $outattr = getAttributeString($attrs, 1, $language, \$translate);
732
733	if ($nodename =~ /^_/) {
734	$translate = 1;
735	$nodename =~ s/^_//;
736	}
737	my $lookup = '';
738	print $fh "<$nodename", $outattr;
739	if ($translate) {
740	$lookup = getXMLstring($content);
741	if (!((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?preserve["']?$/))) {
742	$lookup =~ s/^\s+//s;
743	$lookup =~ s/\s+$//s;
744	}
745
746	if ($lookup \|\| $translate == 2) {
747	my $translation = $translations{$language, $lookup};
748	if ($MULTIPLE_OUTPUT && ($translation \|\| $translate == 2)) {
749	$translation = $lookup if (!$translation);
750	print $fh " xml:lang=\"", $language, "\"" if $language;
751	print $fh ">";
752	if ($translate == 2) {
753	translate_subnodes($fh, \@all, $language, 1);
754	} else {
755	print $fh $translation;
756	}
757	print $fh "</$nodename>";
758
759	return; # this means there will be no same translation with xml:lang="$language"...
760	# if we want them both, just remove this "return"
761	} else {
762	print $fh ">";
763	if ($translate == 2) {
764	translate_subnodes($fh, \@all, $language, 1);
765	} else {
766	print $fh $lookup;
767	}
768	print $fh "</$nodename>";
769	}
770	} else {
771	print $fh "/>";
772	}
773
774	for my $lang (sort keys %po_files_by_lang) {
775	if ($MULTIPLE_OUTPUT && $lang ne "$language") {
776	next;
777	}
778	if ($lang) {
779	# Handle translation
780	#
781	my $translate = 0;
782	my $localattrs = getAttributeString($attrs, 1, $lang, \$translate);
783	my $translation = $translations{$lang, $lookup};
784	if ($translate && !$translation) {
785	$translation = $lookup;
786	}
787
788	if ($translation \|\| $translate) {
789	print $fh "\n";
790	$leading_space =~ s/.*\n//g;
791	print $fh $leading_space;
792	print $fh "<", $nodename, " xml:lang=\"", $lang, "\"", $localattrs, ">";
793	if ($translate == 2) {
794	translate_subnodes($fh, \@all, $lang, 1);
795	} else {
796	print $fh $translation;
797	}
798	print $fh "</$nodename>";
799	}
800	}
801	}
802
803	} else {
804	my $count = scalar(@all);
805	if ($count > 0) {
806	print $fh ">";
807	my $index = 0;
808	while ($index < $count) {
809	my $type = $all[$index];
810	my $rest = $all[$index+1];
811	traverse($fh, $type, $rest, $language);
812	$index += 2;
813	}
814	print $fh "</$nodename>";
815	} else {
816	print $fh "/>";
817	}
818	}
819	}
820	}
821
822	sub intltool_tree_cdatastart
823	{
824	my $expat = shift;
825	my $clist = $expat->{Curlist};
826	my $pos = $#$clist;
827
828	push @$clist, 0 => $expat->original_string();
829	}
830
831	sub intltool_tree_cdataend
832	{
833	my $expat = shift;
834	my $clist = $expat->{Curlist};
835	my $pos = $#$clist;
836
837	$clist->[$pos] .= $expat->original_string();
838	}
839
840	sub intltool_tree_char
841	{
842	my $expat = shift;
843	my $text = shift;
844	my $clist = $expat->{Curlist};
845	my $pos = $#$clist;
846
847	# Use original_string so that we retain escaped entities
848	# in CDATA sections.
849	#
850	if ($pos > 0 and $clist->[$pos - 1] eq '0') {
851	$clist->[$pos] .= $expat->original_string();
852	} else {
853	push @$clist, 0 => $expat->original_string();
854	}
855	}
856
857	sub intltool_tree_start
858	{
859	my $expat = shift;
860	my $tag = shift;
861	my @origlist = ();
862
863	# Use original_string so that we retain escaped entities
864	# in attribute values. We must convert the string to an
865	# @origlist array to conform to the structure of the Tree
866	# Style.
867	#
868	my @original_array = split /\x/, $expat->original_string();
869	my $source = $expat->original_string();
870
871	# Remove leading tag.
872	#
873	$source =~ s\|^\s<\s(\S+)\|\|s;
874
875	# Grab attribute key/value pairs and push onto @origlist array.
876	#
877	while ($source)
878	{
879	if ($source =~ /^\s([\w:-]+)\s[=]\s*["]/)
880	{
881	$source =~ s\|^\s([\w:-]+)\s[=]\s["]([^"])["]\|\|s;
882	push @origlist, $1;
883	push @origlist, '"' . $2 . '"';
884	}
885	elsif ($source =~ /^\s([\w:-]+)\s[=]\s*[']/)
886	{
887	$source =~ s\|^\s([\w:-]+)\s[=]\s[']([^'])[']\|\|s;
888	push @origlist, $1;
889	push @origlist, "'" . $2 . "'";
890	}
891	else
892	{
893	last;
894	}
895	}
896
897	my $ol = [ { @origlist } ];
898
899	push @{ $expat->{Lists} }, $expat->{Curlist};
900	push @{ $expat->{Curlist} }, $tag => $ol;
901	$expat->{Curlist} = $ol;
902	}
903
904	sub readXml
905	{
906	my $filename = shift \|\| return;
907	if(!-f $filename) {
908	die "ERROR Cannot find filename: $filename\n";
909	}
910
911	my $ret = eval 'require XML::Parser';
912	if(!$ret) {
913	die "You must have XML::Parser installed to run $0\n\n";
914	}
915	my $xp = new XML::Parser(Style => 'Tree');
916	$xp->setHandlers(Char => \&intltool_tree_char);
917	$xp->setHandlers(Start => \&intltool_tree_start);
918	$xp->setHandlers(CdataStart => \&intltool_tree_cdatastart);
919	$xp->setHandlers(CdataEnd => \&intltool_tree_cdataend);
920	my $tree = $xp->parsefile($filename);
921
922	# <foo><head id="a">Hello <em>there</em></head><bar>Howdy<ref/></bar>do</foo>
923	# would be:
924	# [foo, [{}, head, [{id => "a"}, 0, "Hello ", em, [{}, 0, "there"]], bar, [{},
925	# 0, "Howdy", ref, [{}]], 0, "do" ] ]
926
927	return $tree;
928	}
929
930	sub print_header
931	{
932	my $infile = shift;
933	my $fh = shift;
934	my $source;
935
936	if(!-f $infile) {
937	die "ERROR Cannot find filename: $infile\n";
938	}
939
940	print $fh qq{<?xml version="1.0" encoding="UTF-8"?>\n};
941	{
942	local $/;
943	open DOCINPUT, "<${FILE}" or die;
944	$source = <DOCINPUT>;
945	close DOCINPUT;
946	}
947	if ($source =~ /(<!DOCTYPE.\[.\]\s*>)/s)
948	{
949	print $fh "$1\n";
950	}
951	elsif ($source =~ /(<!DOCTYPE[^>]*>)/s)
952	{
953	print $fh "$1\n";
954	}
955	}
956
957	sub parseTree
958	{
959	my $fh = shift;
960	my $ref = shift;
961	my $language = shift \|\| "";
962
963	my $name = shift @{ $ref };
964	my $cont = shift @{ $ref };
965	traverse($fh, $name, $cont, $language);
966	}
967
968	sub xml_merge_output
969	{
970	my $source;
971
972	if ($MULTIPLE_OUTPUT) {
973	for my $lang (sort keys %po_files_by_lang) {
974	if ( ! -e $lang ) {
975	mkdir $lang or die "Cannot create subdirectory $lang: $!\n";
976	}
977	open OUTPUT, ">$lang/$OUTFILE" or die "Cannot open $lang/$OUTFILE: $!\n";
978	my $tree = readXml($FILE);
979	print_header($FILE, \*OUTPUT);
980	parseTree(\*OUTPUT, $tree, $lang);
981	close OUTPUT;
982	print "CREATED $lang/$OUTFILE\n" unless $QUIET_ARG;
983	}
984	}
985	open OUTPUT, ">$OUTFILE" or die "Cannot open $OUTFILE: $!\n";
986	my $tree = readXml($FILE);
987	print_header($FILE, \*OUTPUT);
988	parseTree(\*OUTPUT, $tree);
989	close OUTPUT;
990	print "CREATED $OUTFILE\n" unless $QUIET_ARG;
991	}
992
993	sub keys_merge_translations
994	{
995	open INPUT, "<${FILE}" or die;
996	open OUTPUT, ">${OUTFILE}" or die;
997
998	while (<INPUT>)
999	{
1000	if (s/^(\s)_(\w+=(.))/$1$2/)
1001	{
1002	my $string = $3;
1003
1004	print OUTPUT;
1005
1006	my $non_translated_line = $_;
1007
1008	for my $lang (sort keys %po_files_by_lang)
1009	{
1010	my $translation = $translations{$lang, $string};
1011	next if !$translation;
1012
1013	$_ = $non_translated_line;
1014	s/(\w+)=.*/[$lang]$1=$translation/;
1015	print OUTPUT;
1016	}
1017	}
1018	else
1019	{
1020	print OUTPUT;
1021	}
1022	}
1023
1024	close OUTPUT;
1025	close INPUT;
1026	}
1027
1028	sub desktop_merge_translations
1029	{
1030	open INPUT, "<${FILE}" or die;
1031	open OUTPUT, ">${OUTFILE}" or die;
1032
1033	while (<INPUT>)
1034	{
1035	if (s/^(\s)_(\w+=(.))/$1$2/)
1036	{
1037	my $string = $3;
1038
1039	print OUTPUT;
1040
1041	my $non_translated_line = $_;
1042
1043	for my $lang (sort keys %po_files_by_lang)
1044	{
1045	my $translation = $translations{$lang, $string};
1046	next if !$translation;
1047
1048	$_ = $non_translated_line;
1049	s/(\w+)=.*/${1}[$lang]=$translation/;
1050	print OUTPUT;
1051	}
1052	}
1053	else
1054	{
1055	print OUTPUT;
1056	}
1057	}
1058
1059	close OUTPUT;
1060	close INPUT;
1061	}
1062
1063	sub schemas_merge_translations
1064	{
1065	my $source;
1066
1067	{
1068	local $/; # slurp mode
1069	open INPUT, "<$FILE" or die "can't open $FILE: $!";
1070	$source = <INPUT>;
1071	close INPUT;
1072	}
1073
1074	open OUTPUT, ">$OUTFILE" or die;
1075
1076	# FIXME: support attribute translations
1077
1078	# Empty nodes never need translation, so unmark all of them.
1079	# For example, <_foo/> is just replaced by <foo/>.
1080	$source =~ s\|<\s_($w+)\s/>\|<$1/>\|g;
1081
1082	while ($source =~ s/
1083	(.*?)
1084	(\s+)(<locale\ name="C">(\s*)
1085	(<default>\s(?:<!--[^>]?-->\s)?(.?)\s<\/default>)?(\s)
1086	(<short>\s(?:<!--[^>]?-->\s)?(.?)\s<\/short>)?(\s)
1087	(<long>\s(?:<!--[^>]?-->\s)?(.?)\s<\/long>)?(\s)
1088	<\/locale>)
1089	//sx)
1090	{
1091	print OUTPUT $1;
1092
1093	my $locale_start_spaces = $2 ? $2 : '';
1094	my $default_spaces = $4 ? $4 : '';
1095	my $short_spaces = $7 ? $7 : '';
1096	my $long_spaces = $10 ? $10 : '';
1097	my $locale_end_spaces = $13 ? $13 : '';
1098	my $c_default_block = $3 ? $3 : '';
1099	my $default_string = $6 ? $6 : '';
1100	my $short_string = $9 ? $9 : '';
1101	my $long_string = $12 ? $12 : '';
1102
1103	print OUTPUT "$locale_start_spaces$c_default_block";
1104
1105	$default_string =~ s/\s+/ /g;
1106	$default_string = entity_decode($default_string);
1107	$short_string =~ s/\s+/ /g;
1108	$short_string = entity_decode($short_string);
1109	$long_string =~ s/\s+/ /g;
1110	$long_string = entity_decode($long_string);
1111
1112	for my $lang (sort keys %po_files_by_lang)
1113	{
1114	my $default_translation = $translations{$lang, $default_string};
1115	my $short_translation = $translations{$lang, $short_string};
1116	my $long_translation = $translations{$lang, $long_string};
1117
1118	next if (!$default_translation && !$short_translation &&
1119	!$long_translation);
1120
1121	print OUTPUT "\n$locale_start_spaces<locale name=\"$lang\">";
1122
1123	print OUTPUT "$default_spaces";
1124
1125	if ($default_translation)
1126	{
1127	$default_translation = entity_encode($default_translation);
1128	print OUTPUT "<default>$default_translation</default>";
1129	}
1130
1131	print OUTPUT "$short_spaces";
1132
1133	if ($short_translation)
1134	{
1135	$short_translation = entity_encode($short_translation);
1136	print OUTPUT "<short>$short_translation</short>";
1137	}
1138
1139	print OUTPUT "$long_spaces";
1140
1141	if ($long_translation)
1142	{
1143	$long_translation = entity_encode($long_translation);
1144	print OUTPUT "<long>$long_translation</long>";
1145	}
1146
1147	print OUTPUT "$locale_end_spaces</locale>";
1148	}
1149	}
1150
1151	print OUTPUT $source;
1152
1153	close OUTPUT;
1154	}
1155
1156	sub rfc822deb_merge_translations
1157	{
1158	my %encodings = ();
1159	for my $lang (keys %po_files_by_lang) {
1160	$encodings{$lang} = ($UTF8_ARG ? 'UTF-8' : get_po_encoding($po_files_by_lang{$lang}));
1161	}
1162
1163	my $source;
1164
1165	$Text::Wrap::huge = 'overflow';
1166	$Text::Wrap::break = qr/\n\|\s(?=\S)/;
1167
1168	{
1169	local $/; # slurp mode
1170	open INPUT, "<$FILE" or die "can't open $FILE: $!";
1171	$source = <INPUT>;
1172	close INPUT;
1173	}
1174
1175	open OUTPUT, ">${OUTFILE}" or die;
1176
1177	while ($source =~ /(^\|\n+)(_)([^:\s]+)(:[ \t])(.*?)(?=\n[\S\n]\|$)/sg)
1178	{
1179	my $sep = $1;
1180	my $non_translated_line = $3.$4;
1181	my $string = $5;
1182	my $underscore = length($2);
1183	next if $underscore eq 0 && $non_translated_line =~ /^#/;
1184	# Remove [] dummy strings
1185	my $stripped = $string;
1186	$stripped =~ s/\[\s[^\[\]]*\],/,/g if $underscore eq 2;
1187	$stripped =~ s/\[\s[^\[\]]*\]$//;
1188	$non_translated_line .= $stripped;
1189
1190	print OUTPUT $sep.$non_translated_line;
1191
1192	if ($underscore)
1193	{
1194	my @str_list = rfc822deb_split($underscore, $string);
1195
1196	for my $lang (sort keys %po_files_by_lang)
1197	{
1198	my $is_translated = 1;
1199	my $str_translated = '';
1200	my $first = 1;
1201
1202	for my $str (@str_list)
1203	{
1204	my $translation = $translations{$lang, $str};
1205
1206	if (!$translation)
1207	{
1208	$is_translated = 0;
1209	last;
1210	}
1211
1212	# $translation may also contain [] dummy
1213	# strings, mostly to indicate an empty string
1214	$translation =~ s/\[\s[^\[\]]*\]$//;
1215
1216	if ($first)
1217	{
1218	if ($underscore eq 2)
1219	{
1220	$str_translated .= $translation;
1221	}
1222	else
1223	{
1224	$str_translated .=
1225	Text::Tabs::expand($translation) .
1226	"\n";
1227	}
1228	}
1229	else
1230	{
1231	if ($underscore eq 2)
1232	{
1233	$str_translated .= ', ' . $translation;
1234	}
1235	else
1236	{
1237	$str_translated .= Text::Tabs::expand(
1238	Text::Wrap::wrap(' ', ' ', $translation)) .
1239	"\n .\n";
1240	}
1241	}
1242	$first = 0;
1243
1244	# To fix some problems with Text::Wrap::wrap
1245	$str_translated =~ s/(\n )+\n/\n .\n/g;
1246	}
1247	next unless $is_translated;
1248
1249	$str_translated =~ s/\n \.\n$//;
1250	$str_translated =~ s/\s+$//;
1251
1252	$_ = $non_translated_line;
1253	s/^(\w+):\s./$sep${1}-$lang.$encodings{$lang}: $str_translated/s;
1254	print OUTPUT;
1255	}
1256	}
1257	}
1258	print OUTPUT "\n";
1259
1260	close OUTPUT;
1261	close INPUT;
1262	}
1263
1264	sub rfc822deb_split
1265	{
1266	# Debian defines a special way to deal with rfc822-style files:
1267	# when a value contain newlines, it consists of
1268	# 1. a short form (first line)
1269	# 2. a long description, all lines begin with a space,
1270	# and paragraphs are separated by a single dot on a line
1271	# This routine returns an array of all paragraphs, and reformat
1272	# them.
1273	# When first argument is 2, the string is a comma separated list of
1274	# values.
1275	my $type = shift;
1276	my $text = shift;
1277	$text =~ s/^[ \t]//mg;
1278	return (split(/, */, $text, 0)) if $type ne 1;
1279	return ($text) if $text !~ /\n/;
1280
1281	$text =~ s/([^\n]*)\n//;
1282	my @list = ($1);
1283	my $str = '';
1284
1285	for my $line (split (/\n/, $text))
1286	{
1287	chomp $line;
1288	if ($line =~ /^\.\s*$/)
1289	{
1290	# New paragraph
1291	$str =~ s/\s*$//;
1292	push(@list, $str);
1293	$str = '';
1294	}
1295	elsif ($line =~ /^\s/)
1296	{
1297	# Line which must not be reformatted
1298	$str .= "\n" if length ($str) && $str !~ /\n$/;
1299	$line =~ s/\s+$//;
1300	$str .= $line."\n";
1301	}
1302	else
1303	{
1304	# Continuation line, remove newline
1305	$str .= " " if length ($str) && $str !~ /\n$/;
1306	$str .= $line;
1307	}
1308	}
1309
1310	$str =~ s/\s*$//;
1311	push(@list, $str) if length ($str);
1312
1313	return @list;
1314	}
1315

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Original Format