Context Navigation

intltool-merge.in @ 20932

Visit:

Revision 20932, 30.4 KB checked in by ghudson, 20 years ago (diff)
This commit was generated by cvs2svn to compensate for changes in r20931, which included commits to RCS files with non-trunk default branches.

Line
1	#!@INTLTOOL_PERL@ -w
2	# -- Mode: perl; indent-tabs-mode: nil; c-basic-offset: 4 --
3
4	#
5	# The Intltool Message Merger
6	#
7	# Copyright (C) 2000, 2003 Free Software Foundation.
8	# Copyright (C) 2000, 2001 Eazel, Inc
9	#
10	# Intltool is free software; you can redistribute it and/or
11	# modify it under the terms of the GNU General Public License
12	# version 2 published by the Free Software Foundation.
13	#
14	# Intltool is distributed in the hope that it will be useful,
15	# but WITHOUT ANY WARRANTY; without even the implied warranty of
16	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17	# General Public License for more details.
18	#
19	# You should have received a copy of the GNU General Public License
20	# along with this program; if not, write to the Free Software
21	# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22	#
23	# As a special exception to the GNU General Public License, if you
24	# distribute this file as part of a program that contains a
25	# configuration script generated by Autoconf, you may include it under
26	# the same distribution terms that you use for the rest of that program.
27	#
28	# Authors: Maciej Stachowiak <mjs@noisehavoc.org>
29	# Kenneth Christiansen <kenneth@gnu.org>
30	# Darin Adler <darin@bentspoon.com>
31	#
32	# Proper XML UTF-8'ification written by Cyrille Chepelov <chepelov@calixo.net>
33	#
34
35	## Release information
36	my $PROGRAM = "intltool-merge";
37	my $PACKAGE = "intltool";
38	my $VERSION = "0.31.1";
39
40	## Loaded modules
41	use strict;
42	use Getopt::Long;
43	use Text::Wrap;
44	use File::Basename;
45
46	my $must_end_tag = -1;
47	my $last_depth = -1;
48	my $translation_depth = -1;
49	my @tag_stack = ();
50	my @entered_tag = ();
51	my @translation_strings = ();
52	my $leading_space = "";
53
54	## Scalars used by the option stuff
55	my $HELP_ARG = 0;
56	my $VERSION_ARG = 0;
57	my $BA_STYLE_ARG = 0;
58	my $XML_STYLE_ARG = 0;
59	my $KEYS_STYLE_ARG = 0;
60	my $DESKTOP_STYLE_ARG = 0;
61	my $SCHEMAS_STYLE_ARG = 0;
62	my $RFC822DEB_STYLE_ARG = 0;
63	my $QUIET_ARG = 0;
64	my $PASS_THROUGH_ARG = 0;
65	my $UTF8_ARG = 0;
66	my $MULTIPLE_OUTPUT = 0;
67	my $cache_file;
68
69	## Handle options
70	GetOptions
71	(
72	"help" => \$HELP_ARG,
73	"version" => \$VERSION_ARG,
74	"quiet\|q" => \$QUIET_ARG,
75	"oaf-style\|o" => \$BA_STYLE_ARG, ## for compatibility
76	"ba-style\|b" => \$BA_STYLE_ARG,
77	"xml-style\|x" => \$XML_STYLE_ARG,
78	"keys-style\|k" => \$KEYS_STYLE_ARG,
79	"desktop-style\|d" => \$DESKTOP_STYLE_ARG,
80	"schemas-style\|s" => \$SCHEMAS_STYLE_ARG,
81	"rfc822deb-style\|r" => \$RFC822DEB_STYLE_ARG,
82	"pass-through\|p" => \$PASS_THROUGH_ARG,
83	"utf8\|u" => \$UTF8_ARG,
84	"multiple-output\|m" => \$MULTIPLE_OUTPUT,
85	"cache\|c=s" => \$cache_file
86	) or &error;
87
88	my $PO_DIR;
89	my $FILE;
90	my $OUTFILE;
91
92	my %po_files_by_lang = ();
93	my %translations = ();
94	my $iconv = $ENV{"INTLTOOL_ICONV"} \|\| "iconv";
95
96	# Use this instead of \w for XML files to handle more possible characters.
97	my $w = "[-A-Za-z0-9._:]";
98
99	# XML quoted string contents
100	my $q = "[^\\\"]*";
101
102	## Check for options.
103
104	if ($VERSION_ARG)
105	{
106	&print_version;
107	}
108	elsif ($HELP_ARG)
109	{
110	&print_help;
111	}
112	elsif ($BA_STYLE_ARG && @ARGV > 2)
113	{
114	&preparation;
115	&print_message;
116	&ba_merge_translations;
117	&finalize;
118	}
119	elsif ($XML_STYLE_ARG && @ARGV > 2)
120	{
121	&utf8_sanity_check;
122	&preparation;
123	&print_message;
124
125	&xml_merge_output;
126
127	&finalize;
128	}
129	elsif ($KEYS_STYLE_ARG && @ARGV > 2)
130	{
131	&utf8_sanity_check;
132	&preparation;
133	&print_message;
134	&keys_merge_translations;
135	&finalize;
136	}
137	elsif ($DESKTOP_STYLE_ARG && @ARGV > 2)
138	{
139	&preparation;
140	&print_message;
141	&desktop_merge_translations;
142	&finalize;
143	}
144	elsif ($SCHEMAS_STYLE_ARG && @ARGV > 2)
145	{
146	&preparation;
147	&print_message;
148	&schemas_merge_translations;
149	&finalize;
150	}
151	elsif ($RFC822DEB_STYLE_ARG && @ARGV > 2)
152	{
153	&preparation;
154	&print_message;
155	&rfc822deb_merge_translations;
156	&finalize;
157	}
158	else
159	{
160	&print_help;
161	}
162
163	exit;
164
165	## Sub for printing release information
166	sub print_version
167	{
168	print <<_EOF_;
169	${PROGRAM} (${PACKAGE}) ${VERSION}
170	Written by Maciej Stachowiak, Darin Adler and Kenneth Christiansen.
171
172	Copyright (C) 2000-2003 Free Software Foundation, Inc.
173	Copyright (C) 2000-2001 Eazel, Inc.
174	This is free software; see the source for copying conditions. There is NO
175	warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
176	_EOF_
177	exit;
178	}
179
180	## Sub for printing usage information
181	sub print_help
182	{
183	print <<_EOF_;
184	Usage: ${PROGRAM} [OPTION]... PO_DIRECTORY FILENAME OUTPUT_FILE
185	Generates an output file that includes some localized attributes from an
186	untranslated source file.
187
188	Mandatory options: (exactly one must be specified)
189	-b, --ba-style includes translations in the bonobo-activation style
190	-d, --desktop-style includes translations in the desktop style
191	-k, --keys-style includes translations in the keys style
192	-s, --schemas-style includes translations in the schemas style
193	-r, --rfc822deb-style includes translations in the RFC822 style
194	-x, --xml-style includes translations in the standard xml style
195
196	Other options:
197	-u, --utf8 convert all strings to UTF-8 before merging
198	-p, --pass-through use strings as found in .po files, without
199	conversion (STRONGLY unrecommended with -x)
200	-m, --multiple-output output one localized file per locale, instead of
201	a single file containing all localized elements
202	-c, --cache=FILE specify cache file name
203	(usually \$top_builddir/po/.intltool-merge-cache)
204	-q, --quiet suppress most messages
205	--help display this help and exit
206	--version output version information and exit
207
208	Report bugs to http://bugzilla.gnome.org/ (product name "$PACKAGE")
209	or send email to <xml-i18n-tools\@gnome.org>.
210	_EOF_
211	exit;
212	}
213
214
215	## Sub for printing error messages
216	sub print_error
217	{
218	print STDERR "Try `${PROGRAM} --help' for more information.\n";
219	exit;
220	}
221
222
223	sub print_message
224	{
225	print "Merging translations into $OUTFILE.\n" unless $QUIET_ARG;
226	}
227
228
229	sub preparation
230	{
231	$PO_DIR = $ARGV[0];
232	$FILE = $ARGV[1];
233	$OUTFILE = $ARGV[2];
234
235	&gather_po_files;
236	&get_translation_database;
237	}
238
239	# General-purpose code for looking up translations in .po files
240
241	sub po_file2lang
242	{
243	my ($tmp) = @_;
244	$tmp =~ s/^.\/(.)\.po$/$1/;
245	return $tmp;
246	}
247
248	sub gather_po_files
249	{
250	for my $po_file (glob "$PO_DIR/*.po") {
251	$po_files_by_lang{po_file2lang($po_file)} = $po_file;
252	}
253	}
254
255	sub get_local_charset
256	{
257	my ($encoding) = @_;
258	my $alias_file = $ENV{"G_CHARSET_ALIAS"} \|\| "/gnome/head/INSTALL/lib/charset.alias";
259
260	# seek character encoding aliases in charset.alias (glib)
261
262	if (open CHARSET_ALIAS, $alias_file)
263	{
264	while (<CHARSET_ALIAS>)
265	{
266	next if /^\#/;
267	return $1 if (/^\s*([-._a-zA-Z0-9]+)\s+$encoding\b/i)
268	}
269
270	close CHARSET_ALIAS;
271	}
272
273	# if not found, return input string
274
275	return $encoding;
276	}
277
278	sub get_po_encoding
279	{
280	my ($in_po_file) = @_;
281	my $encoding = "";
282
283	open IN_PO_FILE, $in_po_file or die;
284	while (<IN_PO_FILE>)
285	{
286	## example: "Content-Type: text/plain; charset=ISO-8859-1\n"
287	if (/Content-Type\:.*charset=([-a-zA-Z0-9]+)\\n/)
288	{
289	$encoding = $1;
290	last;
291	}
292	}
293	close IN_PO_FILE;
294
295	if (!$encoding)
296	{
297	print STDERR "Warning: no encoding found in $in_po_file. Assuming ISO-8859-1\n" unless $QUIET_ARG;
298	$encoding = "ISO-8859-1";
299	}
300
301	system ("$iconv -f $encoding -t UTF-8 </dev/null 2>/dev/null");
302	if ($?) {
303	$encoding = get_local_charset($encoding);
304	}
305
306	return $encoding
307	}
308
309	sub utf8_sanity_check
310	{
311	if (!$UTF8_ARG)
312	{
313	if (!$PASS_THROUGH_ARG)
314	{
315	$PASS_THROUGH_ARG="1";
316	}
317	}
318	}
319
320	sub get_translation_database
321	{
322	if ($cache_file) {
323	&get_cached_translation_database;
324	} else {
325	&create_translation_database;
326	}
327	}
328
329	sub get_newest_po_age
330	{
331	my $newest_age;
332
333	foreach my $file (values %po_files_by_lang)
334	{
335	my $file_age = -M $file;
336	$newest_age = $file_age if !$newest_age \|\| $file_age < $newest_age;
337	}
338
339	$newest_age = 0 if !$newest_age;
340
341	return $newest_age;
342	}
343
344	sub create_cache
345	{
346	print "Generating and caching the translation database\n" unless $QUIET_ARG;
347
348	&create_translation_database;
349
350	open CACHE, ">$cache_file" \|\| die;
351	print CACHE join "\x01", %translations;
352	close CACHE;
353	}
354
355	sub load_cache
356	{
357	print "Found cached translation database\n" unless $QUIET_ARG;
358
359	my $contents;
360	open CACHE, "<$cache_file" \|\| die;
361	{
362	local $/;
363	$contents = <CACHE>;
364	}
365	close CACHE;
366	%translations = split "\x01", $contents;
367	}
368
369	sub get_cached_translation_database
370	{
371	my $cache_file_age = -M $cache_file;
372	if (defined $cache_file_age)
373	{
374	if ($cache_file_age <= &get_newest_po_age)
375	{
376	&load_cache;
377	return;
378	}
379	print "Found too-old cached translation database\n" unless $QUIET_ARG;
380	}
381
382	&create_cache;
383	}
384
385	sub create_translation_database
386	{
387	for my $lang (keys %po_files_by_lang)
388	{
389	my $po_file = $po_files_by_lang{$lang};
390
391	if ($UTF8_ARG)
392	{
393	my $encoding = get_po_encoding ($po_file);
394
395	if (lc $encoding eq "utf-8")
396	{
397	open PO_FILE, "<$po_file";
398	}
399	else
400	{
401	print STDERR "WARNING: $po_file is not in UTF-8 but $encoding, converting...\n" unless $QUIET_ARG;;
402
403	open PO_FILE, "$iconv -f $encoding -t UTF-8 $po_file\|";
404	}
405	}
406	else
407	{
408	open PO_FILE, "<$po_file";
409	}
410
411	my $nextfuzzy = 0;
412	my $inmsgid = 0;
413	my $inmsgstr = 0;
414	my $msgid = "";
415	my $msgstr = "";
416
417	while (<PO_FILE>)
418	{
419	$nextfuzzy = 1 if /^#, fuzzy/;
420
421	if (/^msgid "((\\.\|[^\\])*)"/ )
422	{
423	$translations{$lang, $msgid} = $msgstr if $inmsgstr && $msgid && $msgstr;
424	$msgid = "";
425	$msgstr = "";
426
427	if ($nextfuzzy) {
428	$inmsgid = 0;
429	} else {
430	$msgid = unescape_po_string($1);
431	$inmsgid = 1;
432	}
433	$inmsgstr = 0;
434	$nextfuzzy = 0;
435	}
436
437	if (/^msgstr "((\\.\|[^\\])*)"/)
438	{
439	$msgstr = unescape_po_string($1);
440	$inmsgstr = 1;
441	$inmsgid = 0;
442	}
443
444	if (/^"((\\.\|[^\\])*)"/)
445	{
446	$msgid .= unescape_po_string($1) if $inmsgid;
447	$msgstr .= unescape_po_string($1) if $inmsgstr;
448	}
449	}
450	$translations{$lang, $msgid} = $msgstr if $inmsgstr && $msgid && $msgstr;
451	}
452	}
453
454	sub finalize
455	{
456	}
457
458	sub unescape_one_sequence
459	{
460	my ($sequence) = @_;
461
462	return "\\" if $sequence eq "\\\\";
463	return "\"" if $sequence eq "\\\"";
464	return "\n" if $sequence eq "\\n";
465
466	# gettext also handles \n, \t, \b, \r, \f, \v, \a, \xxx (octal),
467	# \xXX (hex) and has a comment saying they want to handle \u and \U.
468
469	return $sequence;
470	}
471
472	sub unescape_po_string
473	{
474	my ($string) = @_;
475
476	$string =~ s/(\\.)/unescape_one_sequence($1)/eg;
477
478	return $string;
479	}
480
481	## NOTE: deal with < - < but not > - > because it seems its ok to have
482	## > in the entity. For further info please look at #84738.
483	sub entity_decode
484	{
485	local ($_) = @_;
486
487	s/'/'/g; # '
488	s/"/"/g; # "
489	s/&/&/g;
490	s/</</g;
491
492	return $_;
493	}
494
495	# entity_encode: (string)
496	#
497	# Encode the given string to XML format (encode '<' etc). It also
498	# encodes high bit if not in UTF-8 mode.
499
500	sub entity_encode
501	{
502	my ($pre_encoded) = @_;
503
504	my @list_of_chars = unpack ('C*', $pre_encoded);
505
506	if ($PASS_THROUGH_ARG)
507	{
508	return join ('', map (&entity_encode_int_even_high_bit, @list_of_chars));
509	}
510	else
511	{
512	# with UTF-8 we only encode minimalistic
513	return join ('', map (&entity_encode_int_minimalist, @list_of_chars));
514	}
515	}
516
517	sub entity_encode_int_minimalist
518	{
519	return """ if $_ == 34;
520	return "&" if $_ == 38;
521	return "'" if $_ == 39;
522	return "<" if $_ == 60;
523	return chr $_;
524	}
525
526	sub entity_encode_int_even_high_bit
527	{
528	if ($_ > 127 \|\| $_ == 34 \|\| $_ == 38 \|\| $_ == 39 \|\| $_ == 60)
529	{
530	# the ($_ > 127) should probably be removed
531	return "&#" . $_ . ";";
532	}
533	else
534	{
535	return chr $_;
536	}
537	}
538
539	sub entity_encoded_translation
540	{
541	my ($lang, $string) = @_;
542
543	my $translation = $translations{$lang, $string};
544	return $string if !$translation;
545	return entity_encode ($translation);
546	}
547
548	## XML (bonobo-activation specific) merge code
549
550	sub ba_merge_translations
551	{
552	my $source;
553
554	{
555	local $/; # slurp mode
556	open INPUT, "<$FILE" or die "can't open $FILE: $!";
557	$source = <INPUT>;
558	close INPUT;
559	}
560
561	open OUTPUT, ">$OUTFILE" or die "can't open $OUTFILE: $!";
562
563	while ($source =~ s\|^(.?)([ \t]<\s$w+\s+($w+\s=\s"$q"\s)+/?>)([ \t]*\n)?\|\|s)
564	{
565	print OUTPUT $1;
566
567	my $node = $2 . "\n";
568
569	my @strings = ();
570	$_ = $node;
571	while (s/(\s)_($w+\s=\s"($q)")/$1$2/s) {
572	push @strings, entity_decode($3);
573	}
574	print OUTPUT;
575
576	my %langs;
577	for my $string (@strings)
578	{
579	for my $lang (keys %po_files_by_lang)
580	{
581	$langs{$lang} = 1 if $translations{$lang, $string};
582	}
583	}
584
585	for my $lang (sort keys %langs)
586	{
587	$_ = $node;
588	s/(\sname\s=\s)"($q)"/$1"$2-$lang"/s;
589	s/(\s)_($w+\s=\s")($q)"/$1 . $2 . entity_encoded_translation($lang, $3) . '"'/seg;
590	print OUTPUT;
591	}
592	}
593
594	print OUTPUT $source;
595
596	close OUTPUT;
597	}
598
599
600	## XML (non-bonobo-activation) merge code
601
602
603	# Process tag attributes
604	# Only parameter is a HASH containing attributes -> values mapping
605	sub getAttributeString
606	{
607	my $sub = shift;
608	my $do_translate = shift \|\| 0;
609	my $language = shift \|\| "";
610	my $result = "";
611	foreach my $e (reverse(sort(keys %{ $sub }))) {
612	my $key = $e;
613	my $string = $sub->{$e};
614	my $quote = '"';
615
616	$string =~ s/^[\s]+//;
617	$string =~ s/[\s]+$//;
618
619	if ($string =~ /^'.*'$/)
620	{
621	$quote = "'";
622	}
623	$string =~ s/^['"]//g;
624	$string =~ s/['"]$//g;
625
626	if ($do_translate && $key =~ /^_/) {
627	$key =~ s\|^_\|\|g;
628	if ($language) {
629
630	# Handle translation
631	#
632	my $decode_string = entity_decode($string);
633	my $translation = $translations{$language, $decode_string};
634	if ($translation) {
635	$translation = entity_encode($translation);
636	$string = $translation;
637	}
638	}
639	}
640
641	$result .= " $key=$quote$string$quote";
642	}
643	return $result;
644	}
645
646	# Returns a translatable string from XML node, it works on contents of every node in XML::Parser tree
647	# doesn't support nesting of translatable tags (i.e. <_blah>this <_doh>doesn't</_doh> work</_blah> -- besides
648	# can you define the correct semantics for this?)
649	#
650
651	sub getXMLstring
652	{
653	my $ref = shift;
654	my @list = @{ $ref };
655	my $result = "";
656
657	my $count = scalar(@list);
658	my $attrs = $list[0];
659	my $index = 1;
660	while ($index < $count) {
661	my $type = $list[$index];
662	my $content = $list[$index+1];
663	if (! $type ) {
664	# We've got CDATA
665	if ($content) {
666	# lets strip the whitespace here, and ONLY here
667	$content =~ s/\s+/ /gs;
668	$result .= ($content);
669	} else {
670	#print "no cdata content when expected it\n"; # is this possible, is this ok?
671	# what to do if this happens?
672	# Did I mention that I hate XML::Parser tree style?
673	}
674	} else {
675	# We've got another element
676	$result .= "<$type";
677	$result .= getAttributeString($attrs, 0); # no nested translatable elements
678	if ($content) {
679	my $subresult = getXMLstring($content);
680	if ($subresult) {
681	$result .= ">".$subresult . "</$type>";
682	} else {
683	$result .= "/>";
684	}
685	} else {
686	$result .= "/>";
687	}
688	}
689	$index += 2;
690	}
691	return $result;
692	}
693
694	sub traverse
695	{
696	my $fh = shift;
697	my $nodename = shift;
698	my $content = shift;
699	my $language = shift \|\| "";
700
701	if (!$nodename) {
702	if ($content =~ /^[\s]*$/) {
703	$leading_space .= $content;
704	}
705	print $fh $content;
706	} else {
707	# element
708	my @all = @{ $content };
709	my $attrs = shift @all;
710	my $outattr = getAttributeString($attrs, 1, $language);
711	my $translate = 0;
712
713	if ($nodename =~ /^_/) {
714	$translate = 1;
715	$nodename =~ s/^_//;
716	}
717	my $lookup = '';
718	print $fh "<$nodename$outattr";
719	if ($translate) {
720	$lookup = getXMLstring($content);
721	$lookup =~ s/^\s+//s;
722	$lookup =~ s/\s+$//s;
723
724	if ($lookup) {
725	my $translation = $translations{$language, $lookup};
726	if ($MULTIPLE_OUTPUT && $translation) {
727	print $fh " xml:lang=\"", $language, "\"";
728	print $fh ">", $translation, "</$nodename>";
729	return; # this means there will be no same translation with xml:lang="$language"...
730	# if we want them both, just remove this "return"
731	} else {
732	print $fh ">$lookup</$nodename>";
733	}
734	} else {
735	print $fh "/>";
736	}
737
738
739	for my $lang (sort keys %po_files_by_lang) {
740	if ($MULTIPLE_OUTPUT && $lang ne "$language") {
741	next;
742	}
743	if ($lang) {
744
745	# Handle translation
746	#
747	my $localattrs = getAttributeString($attrs, 1, $lang);
748	my $decode_string = ($lookup); #entity_decode($lookup);
749	my $translation = $translations{$lang, $decode_string};
750	if ($translation) {
751	$translation = ($translation);
752	print $fh "\n";
753	$leading_space =~ s/.*\n//g;
754	print $fh $leading_space;
755	print $fh "<", $nodename, " xml:lang=\"", $lang, "\"", $localattrs;
756	print $fh ">", $translation , "</$nodename>";
757	}
758	}
759	}
760
761	} else {
762	my $count = scalar(@all);
763	if ($count > 0) {
764	print $fh ">";
765	} else {
766	print $fh "/>";
767	}
768	my $index = 0;
769	while ($index < $count) {
770	my $type = $all[$index];
771	my $rest = $all[$index+1];
772	traverse($fh, $type, $rest, $language);
773	$index += 2;
774	}
775	if ($count > 0) {
776	print $fh "</$nodename>";
777	}
778	}
779	}
780	}
781
782	sub intltool_tree_char
783	{
784	my $expat = shift;
785	my $text = shift;
786	my $clist = $expat->{Curlist};
787	my $pos = $#$clist;
788
789	# Use original_string so that we retain escaped entities
790	# in CDATA sections.
791	#
792	if ($pos > 0 and $clist->[$pos - 1] eq '0') {
793	$clist->[$pos] .= $expat->original_string();
794	} else {
795	push @$clist, 0 => $expat->original_string();
796	}
797	}
798
799	sub intltool_tree_start
800	{
801	my $expat = shift;
802	my $tag = shift;
803	my @origlist = ();
804
805	# Use original_string so that we retain escaped entities
806	# in attribute values. We must convert the string to an
807	# @origlist array to conform to the structure of the Tree
808	# Style.
809	#
810	my @original_array = split /\x/, $expat->original_string();
811	my $source = $expat->original_string();
812
813	# Remove leading tag.
814	#
815	$source =~ s\|^\s<\s(\S+)\|\|s;
816
817	# Grab attribute key/value pairs and push onto @origlist array.
818	#
819	while ($source)
820	{
821	if ($source =~ /^\s([\w:-]+)\s[=]\s*["]/)
822	{
823	$source =~ s\|^\s([\w:-]+)\s[=]\s["]([^"])["]\|\|s;
824	push @origlist, $1;
825	push @origlist, '"' . $2 . '"';
826	}
827	elsif ($source =~ /^\s([\w:-]+)\s[=]\s*[']/)
828	{
829	$source =~ s\|^\s([\w:-]+)\s[=]\s[']([^'])[']\|\|s;
830	push @origlist, $1;
831	push @origlist, "'" . $2 . "'";
832	}
833	else
834	{
835	last;
836	}
837	}
838
839	my $ol = [ { @origlist } ];
840
841	push @{ $expat->{Lists} }, $expat->{Curlist};
842	push @{ $expat->{Curlist} }, $tag => $ol;
843	$expat->{Curlist} = $ol;
844	}
845
846	sub readXml
847	{
848	my $filename = shift \|\| return;
849	if(!-f $filename) {
850	die "ERROR Cannot find filename: $filename\n";
851	}
852
853	my $ret = eval 'require XML::Parser';
854	if(!$ret) {
855	die "You must have XML::Parser installed to run $0\n\n";
856	}
857	my $xp = new XML::Parser(Style => 'Tree');
858	$xp->setHandlers(Char => \&intltool_tree_char);
859	$xp->setHandlers(Start => \&intltool_tree_start);
860	my $tree = $xp->parsefile($filename);
861
862	# <foo><head id="a">Hello <em>there</em></head><bar>Howdy<ref/></bar>do</foo>
863	# would be:
864	# [foo, [{}, head, [{id => "a"}, 0, "Hello ", em, [{}, 0, "there"]], bar, [{},
865	# 0, "Howdy", ref, [{}]], 0, "do" ] ]
866
867	return $tree;
868	}
869
870	sub print_header
871	{
872	my $infile = shift;
873	my $fh = shift;
874	my $source;
875
876	if(!-f $infile) {
877	die "ERROR Cannot find filename: $infile\n";
878	}
879
880	print $fh qq{<?xml version="1.0" encoding="UTF-8"?>\n};
881	{
882	local $/;
883	open DOCINPUT, "<${FILE}" or die;
884	$source = <DOCINPUT>;
885	close DOCINPUT;
886	}
887	if ($source =~ /(<!DOCTYPE.\[.\]\s*>)/s)
888	{
889	print $fh "$1\n";
890	}
891	elsif ($source =~ /(<!DOCTYPE[^>]*>)/s)
892	{
893	print $fh "$1\n";
894	}
895	}
896
897	sub parseTree
898	{
899	my $fh = shift;
900	my $ref = shift;
901	my $language = shift \|\| "";
902
903	my $name = shift @{ $ref };
904	my $cont = shift @{ $ref };
905	traverse($fh, $name, $cont, $language);
906	}
907
908	sub xml_merge_output
909	{
910	my $source;
911
912	if ($MULTIPLE_OUTPUT) {
913	for my $lang (sort keys %po_files_by_lang) {
914	if ( ! -e $lang ) {
915	mkdir $lang or die "Cannot create subdirectory $lang: $!\n";
916	}
917	open OUTPUT, ">$lang/$OUTFILE" or die "Cannot open $lang/$OUTFILE: $!\n";
918	my $tree = readXml($FILE);
919	print_header($FILE, \*OUTPUT);
920	parseTree(\*OUTPUT, $tree, $lang);
921	close OUTPUT;
922	print "CREATED $lang/$OUTFILE\n" unless $QUIET_ARG;
923	}
924	}
925	open OUTPUT, ">$OUTFILE" or die "Cannot open $OUTFILE: $!\n";
926	my $tree = readXml($FILE);
927	print_header($FILE, \*OUTPUT);
928	parseTree(\*OUTPUT, $tree);
929	close OUTPUT;
930	print "CREATED $OUTFILE\n" unless $QUIET_ARG;
931	}
932
933	sub keys_merge_translations
934	{
935	open INPUT, "<${FILE}" or die;
936	open OUTPUT, ">${OUTFILE}" or die;
937
938	while (<INPUT>)
939	{
940	if (s/^(\s)_(\w+=(.))/$1$2/)
941	{
942	my $string = $3;
943
944	print OUTPUT;
945
946	my $non_translated_line = $_;
947
948	for my $lang (sort keys %po_files_by_lang)
949	{
950	my $translation = $translations{$lang, $string};
951	next if !$translation;
952
953	$_ = $non_translated_line;
954	s/(\w+)=.*/[$lang]$1=$translation/;
955	print OUTPUT;
956	}
957	}
958	else
959	{
960	print OUTPUT;
961	}
962	}
963
964	close OUTPUT;
965	close INPUT;
966	}
967
968	sub desktop_merge_translations
969	{
970	open INPUT, "<${FILE}" or die;
971	open OUTPUT, ">${OUTFILE}" or die;
972
973	while (<INPUT>)
974	{
975	if (s/^(\s)_(\w+=(.))/$1$2/)
976	{
977	my $string = $3;
978
979	print OUTPUT;
980
981	my $non_translated_line = $_;
982
983	for my $lang (sort keys %po_files_by_lang)
984	{
985	my $translation = $translations{$lang, $string};
986	next if !$translation;
987
988	$_ = $non_translated_line;
989	s/(\w+)=.*/${1}[$lang]=$translation/;
990	print OUTPUT;
991	}
992	}
993	else
994	{
995	print OUTPUT;
996	}
997	}
998
999	close OUTPUT;
1000	close INPUT;
1001	}
1002
1003	sub schemas_merge_translations
1004	{
1005	my $source;
1006
1007	{
1008	local $/; # slurp mode
1009	open INPUT, "<$FILE" or die "can't open $FILE: $!";
1010	$source = <INPUT>;
1011	close INPUT;
1012	}
1013
1014	open OUTPUT, ">$OUTFILE" or die;
1015
1016	# FIXME: support attribute translations
1017
1018	# Empty nodes never need translation, so unmark all of them.
1019	# For example, <_foo/> is just replaced by <foo/>.
1020	$source =~ s\|<\s_($w+)\s/>\|<$1/>\|g;
1021
1022	while ($source =~ s/
1023	(.*?)
1024	(\s+)(<locale\ name="C">(\s*)
1025	(<default>\s(?:<!--[^>]?-->\s)?(.?)\s<\/default>)?(\s)
1026	(<short>\s(?:<!--[^>]?-->\s)?(.?)\s<\/short>)?(\s)
1027	(<long>\s(?:<!--[^>]?-->\s)?(.?)\s<\/long>)?(\s)
1028	<\/locale>)
1029	//sx)
1030	{
1031	print OUTPUT $1;
1032
1033	my $locale_start_spaces = $2 ? $2 : '';
1034	my $default_spaces = $4 ? $4 : '';
1035	my $short_spaces = $7 ? $7 : '';
1036	my $long_spaces = $10 ? $10 : '';
1037	my $locale_end_spaces = $13 ? $13 : '';
1038	my $c_default_block = $3 ? $3 : '';
1039	my $default_string = $6 ? $6 : '';
1040	my $short_string = $9 ? $9 : '';
1041	my $long_string = $12 ? $12 : '';
1042
1043	print OUTPUT "$locale_start_spaces$c_default_block";
1044
1045	$default_string =~ s/\s+/ /g;
1046	$default_string = entity_decode($default_string);
1047	$short_string =~ s/\s+/ /g;
1048	$short_string = entity_decode($short_string);
1049	$long_string =~ s/\s+/ /g;
1050	$long_string = entity_decode($long_string);
1051
1052	for my $lang (sort keys %po_files_by_lang)
1053	{
1054	my $default_translation = $translations{$lang, $default_string};
1055	my $short_translation = $translations{$lang, $short_string};
1056	my $long_translation = $translations{$lang, $long_string};
1057
1058	next if (!$default_translation && !$short_translation &&
1059	!$long_translation);
1060
1061	print OUTPUT "\n$locale_start_spaces<locale name=\"$lang\">";
1062
1063	print OUTPUT "$default_spaces";
1064
1065	if ($default_translation)
1066	{
1067	$default_translation = entity_encode($default_translation);
1068	print OUTPUT "<default>$default_translation</default>";
1069	}
1070
1071	print OUTPUT "$short_spaces";
1072
1073	if ($short_translation)
1074	{
1075	$short_translation = entity_encode($short_translation);
1076	print OUTPUT "<short>$short_translation</short>";
1077	}
1078
1079	print OUTPUT "$long_spaces";
1080
1081	if ($long_translation)
1082	{
1083	$long_translation = entity_encode($long_translation);
1084	print OUTPUT "<long>$long_translation</long>";
1085	}
1086
1087	print OUTPUT "$locale_end_spaces</locale>";
1088	}
1089	}
1090
1091	print OUTPUT $source;
1092
1093	close OUTPUT;
1094	}
1095
1096	sub rfc822deb_merge_translations
1097	{
1098	my %encodings = ();
1099	for my $lang (keys %po_files_by_lang) {
1100	$encodings{$lang} = ($UTF8_ARG ? 'UTF-8' : get_po_encoding($po_files_by_lang{$lang}));
1101	}
1102
1103	my $source;
1104
1105	$Text::Wrap::huge = 'overflow';
1106	$Text::Wrap::break = qr/\n\|\s(?=\S)/;
1107
1108	{
1109	local $/; # slurp mode
1110	open INPUT, "<$FILE" or die "can't open $FILE: $!";
1111	$source = <INPUT>;
1112	close INPUT;
1113	}
1114
1115	open OUTPUT, ">${OUTFILE}" or die;
1116
1117	while ($source =~ /(^\|\n+)(_)([^:\s]+)(:[ \t])(.*?)(?=\n[\S\n]\|$)/sg)
1118	{
1119	my $sep = $1;
1120	my $non_translated_line = $3.$4;
1121	my $string = $5;
1122	my $underscore = length($2);
1123	next if $underscore eq 0 && $non_translated_line =~ /^#/;
1124	# Remove [] dummy strings
1125	my $stripped = $string;
1126	$stripped =~ s/\[\s[^\[\]]*\],/,/g if $underscore eq 2;
1127	$stripped =~ s/\[\s[^\[\]]*\]$//;
1128	$non_translated_line .= $stripped;
1129
1130	print OUTPUT $sep.$non_translated_line;
1131
1132	if ($underscore)
1133	{
1134	my @str_list = rfc822deb_split($underscore, $string);
1135
1136	for my $lang (sort keys %po_files_by_lang)
1137	{
1138	my $is_translated = 1;
1139	my $str_translated = '';
1140	my $first = 1;
1141
1142	for my $str (@str_list)
1143	{
1144	my $translation = $translations{$lang, $str};
1145
1146	if (!$translation)
1147	{
1148	$is_translated = 0;
1149	last;
1150	}
1151
1152	# $translation may also contain [] dummy
1153	# strings, mostly to indicate an empty string
1154	$translation =~ s/\[\s[^\[\]]*\]$//;
1155
1156	if ($first)
1157	{
1158	if ($underscore eq 2)
1159	{
1160	$str_translated .= $translation;
1161	}
1162	else
1163	{
1164	$str_translated .=
1165	Text::Tabs::expand($translation) .
1166	"\n";
1167	}
1168	}
1169	else
1170	{
1171	if ($underscore eq 2)
1172	{
1173	$str_translated .= ', ' . $translation;
1174	}
1175	else
1176	{
1177	$str_translated .= Text::Tabs::expand(
1178	Text::Wrap::wrap(' ', ' ', $translation)) .
1179	"\n .\n";
1180	}
1181	}
1182	$first = 0;
1183
1184	# To fix some problems with Text::Wrap::wrap
1185	$str_translated =~ s/(\n )+\n/\n .\n/g;
1186	}
1187	next unless $is_translated;
1188
1189	$str_translated =~ s/\n \.\n$//;
1190	$str_translated =~ s/\s+$//;
1191
1192	$_ = $non_translated_line;
1193	s/^(\w+):\s./$sep${1}-$lang.$encodings{$lang}: $str_translated/s;
1194	print OUTPUT;
1195	}
1196	}
1197	}
1198	print OUTPUT "\n";
1199
1200	close OUTPUT;
1201	close INPUT;
1202	}
1203
1204	sub rfc822deb_split
1205	{
1206	# Debian defines a special way to deal with rfc822-style files:
1207	# when a value contain newlines, it consists of
1208	# 1. a short form (first line)
1209	# 2. a long description, all lines begin with a space,
1210	# and paragraphs are separated by a single dot on a line
1211	# This routine returns an array of all paragraphs, and reformat
1212	# them.
1213	# When first argument is 2, the string is a comma separated list of
1214	# values.
1215	my $type = shift;
1216	my $text = shift;
1217	$text =~ s/^[ \t]//mg;
1218	return (split(/, */, $text, 0)) if $type ne 1;
1219	return ($text) if $text !~ /\n/;
1220
1221	$text =~ s/([^\n]*)\n//;
1222	my @list = ($1);
1223	my $str = '';
1224
1225	for my $line (split (/\n/, $text))
1226	{
1227	chomp $line;
1228	if ($line =~ /^\.\s*$/)
1229	{
1230	# New paragraph
1231	$str =~ s/\s*$//;
1232	push(@list, $str);
1233	$str = '';
1234	}
1235	elsif ($line =~ /^\s/)
1236	{
1237	# Line which must not be reformatted
1238	$str .= "\n" if length ($str) && $str !~ /\n$/;
1239	$line =~ s/\s+$//;
1240	$str .= $line."\n";
1241	}
1242	else
1243	{
1244	# Continuation line, remove newline
1245	$str .= " " if length ($str) && $str !~ /\n$/;
1246	$str .= $line;
1247	}
1248	}
1249
1250	$str =~ s/\s*$//;
1251	push(@list, $str) if length ($str);
1252
1253	return @list;
1254	}
1255

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Original Format