Context Navigation

intltool-merge.in @ 21463

Visit:

Revision 21463, 30.6 KB checked in by ghudson, 19 years ago (diff)
This commit was generated by cvs2svn to compensate for changes in r21462, which included commits to RCS files with non-trunk default branches.

Line
1	#!@INTLTOOL_PERL@ -w
2	# -- Mode: perl; indent-tabs-mode: nil; c-basic-offset: 4 --
3
4	#
5	# The Intltool Message Merger
6	#
7	# Copyright (C) 2000, 2003 Free Software Foundation.
8	# Copyright (C) 2000, 2001 Eazel, Inc
9	#
10	# Intltool is free software; you can redistribute it and/or
11	# modify it under the terms of the GNU General Public License
12	# version 2 published by the Free Software Foundation.
13	#
14	# Intltool is distributed in the hope that it will be useful,
15	# but WITHOUT ANY WARRANTY; without even the implied warranty of
16	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17	# General Public License for more details.
18	#
19	# You should have received a copy of the GNU General Public License
20	# along with this program; if not, write to the Free Software
21	# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22	#
23	# As a special exception to the GNU General Public License, if you
24	# distribute this file as part of a program that contains a
25	# configuration script generated by Autoconf, you may include it under
26	# the same distribution terms that you use for the rest of that program.
27	#
28	# Authors: Maciej Stachowiak <mjs@noisehavoc.org>
29	# Kenneth Christiansen <kenneth@gnu.org>
30	# Darin Adler <darin@bentspoon.com>
31	#
32	# Proper XML UTF-8'ification written by Cyrille Chepelov <chepelov@calixo.net>
33	#
34
35	## Release information
36	my $PROGRAM = "intltool-merge";
37	my $PACKAGE = "intltool";
38	my $VERSION = "0.31.3";
39
40	## Loaded modules
41	use strict;
42	use Getopt::Long;
43	use Text::Wrap;
44	use File::Basename;
45
46	my $must_end_tag = -1;
47	my $last_depth = -1;
48	my $translation_depth = -1;
49	my @tag_stack = ();
50	my @entered_tag = ();
51	my @translation_strings = ();
52	my $leading_space = "";
53
54	## Scalars used by the option stuff
55	my $HELP_ARG = 0;
56	my $VERSION_ARG = 0;
57	my $BA_STYLE_ARG = 0;
58	my $XML_STYLE_ARG = 0;
59	my $KEYS_STYLE_ARG = 0;
60	my $DESKTOP_STYLE_ARG = 0;
61	my $SCHEMAS_STYLE_ARG = 0;
62	my $RFC822DEB_STYLE_ARG = 0;
63	my $QUIET_ARG = 0;
64	my $PASS_THROUGH_ARG = 0;
65	my $UTF8_ARG = 0;
66	my $MULTIPLE_OUTPUT = 0;
67	my $cache_file;
68
69	## Handle options
70	GetOptions
71	(
72	"help" => \$HELP_ARG,
73	"version" => \$VERSION_ARG,
74	"quiet\|q" => \$QUIET_ARG,
75	"oaf-style\|o" => \$BA_STYLE_ARG, ## for compatibility
76	"ba-style\|b" => \$BA_STYLE_ARG,
77	"xml-style\|x" => \$XML_STYLE_ARG,
78	"keys-style\|k" => \$KEYS_STYLE_ARG,
79	"desktop-style\|d" => \$DESKTOP_STYLE_ARG,
80	"schemas-style\|s" => \$SCHEMAS_STYLE_ARG,
81	"rfc822deb-style\|r" => \$RFC822DEB_STYLE_ARG,
82	"pass-through\|p" => \$PASS_THROUGH_ARG,
83	"utf8\|u" => \$UTF8_ARG,
84	"multiple-output\|m" => \$MULTIPLE_OUTPUT,
85	"cache\|c=s" => \$cache_file
86	) or &error;
87
88	my $PO_DIR;
89	my $FILE;
90	my $OUTFILE;
91
92	my %po_files_by_lang = ();
93	my %translations = ();
94	my $iconv = $ENV{"ICONV"} \|\| $ENV{"INTLTOOL_ICONV"} \|\| "@INTLTOOL_ICONV@";
95
96	# Use this instead of \w for XML files to handle more possible characters.
97	my $w = "[-A-Za-z0-9._:]";
98
99	# XML quoted string contents
100	my $q = "[^\\\"]*";
101
102	## Check for options.
103
104	if ($VERSION_ARG)
105	{
106	&print_version;
107	}
108	elsif ($HELP_ARG)
109	{
110	&print_help;
111	}
112	elsif ($BA_STYLE_ARG && @ARGV > 2)
113	{
114	&preparation;
115	&print_message;
116	&ba_merge_translations;
117	&finalize;
118	}
119	elsif ($XML_STYLE_ARG && @ARGV > 2)
120	{
121	&utf8_sanity_check;
122	&preparation;
123	&print_message;
124
125	&xml_merge_output;
126
127	&finalize;
128	}
129	elsif ($KEYS_STYLE_ARG && @ARGV > 2)
130	{
131	&utf8_sanity_check;
132	&preparation;
133	&print_message;
134	&keys_merge_translations;
135	&finalize;
136	}
137	elsif ($DESKTOP_STYLE_ARG && @ARGV > 2)
138	{
139	&preparation;
140	&print_message;
141	&desktop_merge_translations;
142	&finalize;
143	}
144	elsif ($SCHEMAS_STYLE_ARG && @ARGV > 2)
145	{
146	&preparation;
147	&print_message;
148	&schemas_merge_translations;
149	&finalize;
150	}
151	elsif ($RFC822DEB_STYLE_ARG && @ARGV > 2)
152	{
153	&preparation;
154	&print_message;
155	&rfc822deb_merge_translations;
156	&finalize;
157	}
158	else
159	{
160	&print_help;
161	}
162
163	exit;
164
165	## Sub for printing release information
166	sub print_version
167	{
168	print <<_EOF_;
169	${PROGRAM} (${PACKAGE}) ${VERSION}
170	Written by Maciej Stachowiak, Darin Adler and Kenneth Christiansen.
171
172	Copyright (C) 2000-2003 Free Software Foundation, Inc.
173	Copyright (C) 2000-2001 Eazel, Inc.
174	This is free software; see the source for copying conditions. There is NO
175	warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
176	_EOF_
177	exit;
178	}
179
180	## Sub for printing usage information
181	sub print_help
182	{
183	print <<_EOF_;
184	Usage: ${PROGRAM} [OPTION]... PO_DIRECTORY FILENAME OUTPUT_FILE
185	Generates an output file that includes some localized attributes from an
186	untranslated source file.
187
188	Mandatory options: (exactly one must be specified)
189	-b, --ba-style includes translations in the bonobo-activation style
190	-d, --desktop-style includes translations in the desktop style
191	-k, --keys-style includes translations in the keys style
192	-s, --schemas-style includes translations in the schemas style
193	-r, --rfc822deb-style includes translations in the RFC822 style
194	-x, --xml-style includes translations in the standard xml style
195
196	Other options:
197	-u, --utf8 convert all strings to UTF-8 before merging
198	-p, --pass-through use strings as found in .po files, without
199	conversion (STRONGLY unrecommended with -x)
200	-m, --multiple-output output one localized file per locale, instead of
201	a single file containing all localized elements
202	-c, --cache=FILE specify cache file name
203	(usually \$top_builddir/po/.intltool-merge-cache)
204	-q, --quiet suppress most messages
205	--help display this help and exit
206	--version output version information and exit
207
208	Report bugs to http://bugzilla.gnome.org/ (product name "$PACKAGE")
209	or send email to <xml-i18n-tools\@gnome.org>.
210	_EOF_
211	exit;
212	}
213
214
215	## Sub for printing error messages
216	sub print_error
217	{
218	print STDERR "Try `${PROGRAM} --help' for more information.\n";
219	exit;
220	}
221
222
223	sub print_message
224	{
225	print "Merging translations into $OUTFILE.\n" unless $QUIET_ARG;
226	}
227
228
229	sub preparation
230	{
231	$PO_DIR = $ARGV[0];
232	$FILE = $ARGV[1];
233	$OUTFILE = $ARGV[2];
234
235	&gather_po_files;
236	&get_translation_database;
237	}
238
239	# General-purpose code for looking up translations in .po files
240
241	sub po_file2lang
242	{
243	my ($tmp) = @_;
244	$tmp =~ s/^.\/(.)\.po$/$1/;
245	return $tmp;
246	}
247
248	sub gather_po_files
249	{
250	for my $po_file (glob "$PO_DIR/*.po") {
251	$po_files_by_lang{po_file2lang($po_file)} = $po_file;
252	}
253	}
254
255	sub get_local_charset
256	{
257	my ($encoding) = @_;
258	my $alias_file = $ENV{"G_CHARSET_ALIAS"} \|\| "/opt/gnomecvs/lib/charset.alias";
259
260	# seek character encoding aliases in charset.alias (glib)
261
262	if (open CHARSET_ALIAS, $alias_file)
263	{
264	while (<CHARSET_ALIAS>)
265	{
266	next if /^\#/;
267	return $1 if (/^\s*([-._a-zA-Z0-9]+)\s+$encoding\b/i)
268	}
269
270	close CHARSET_ALIAS;
271	}
272
273	# if not found, return input string
274
275	return $encoding;
276	}
277
278	sub get_po_encoding
279	{
280	my ($in_po_file) = @_;
281	my $encoding = "";
282
283	open IN_PO_FILE, $in_po_file or die;
284	while (<IN_PO_FILE>)
285	{
286	## example: "Content-Type: text/plain; charset=ISO-8859-1\n"
287	if (/Content-Type\:.*charset=([-a-zA-Z0-9]+)\\n/)
288	{
289	$encoding = $1;
290	last;
291	}
292	}
293	close IN_PO_FILE;
294
295	if (!$encoding)
296	{
297	print STDERR "Warning: no encoding found in $in_po_file. Assuming ISO-8859-1\n" unless $QUIET_ARG;
298	$encoding = "ISO-8859-1";
299	}
300
301	system ("$iconv -f $encoding -t UTF-8 </dev/null 2>/dev/null");
302	if ($?) {
303	$encoding = get_local_charset($encoding);
304	}
305
306	return $encoding
307	}
308
309	sub utf8_sanity_check
310	{
311	if (!$UTF8_ARG)
312	{
313	if (!$PASS_THROUGH_ARG)
314	{
315	$PASS_THROUGH_ARG="1";
316	}
317	}
318	}
319
320	sub get_translation_database
321	{
322	if ($cache_file) {
323	&get_cached_translation_database;
324	} else {
325	&create_translation_database;
326	}
327	}
328
329	sub get_newest_po_age
330	{
331	my $newest_age;
332
333	foreach my $file (values %po_files_by_lang)
334	{
335	my $file_age = -M $file;
336	$newest_age = $file_age if !$newest_age \|\| $file_age < $newest_age;
337	}
338
339	$newest_age = 0 if !$newest_age;
340
341	return $newest_age;
342	}
343
344	sub create_cache
345	{
346	print "Generating and caching the translation database\n" unless $QUIET_ARG;
347
348	&create_translation_database;
349
350	open CACHE, ">$cache_file" \|\| die;
351	print CACHE join "\x01", %translations;
352	close CACHE;
353	}
354
355	sub load_cache
356	{
357	print "Found cached translation database\n" unless $QUIET_ARG;
358
359	my $contents;
360	open CACHE, "<$cache_file" \|\| die;
361	{
362	local $/;
363	$contents = <CACHE>;
364	}
365	close CACHE;
366	%translations = split "\x01", $contents;
367	}
368
369	sub get_cached_translation_database
370	{
371	my $cache_file_age = -M $cache_file;
372	if (defined $cache_file_age)
373	{
374	if ($cache_file_age <= &get_newest_po_age)
375	{
376	&load_cache;
377	return;
378	}
379	print "Found too-old cached translation database\n" unless $QUIET_ARG;
380	}
381
382	&create_cache;
383	}
384
385	sub create_translation_database
386	{
387	for my $lang (keys %po_files_by_lang)
388	{
389	my $po_file = $po_files_by_lang{$lang};
390
391	if ($UTF8_ARG)
392	{
393	my $encoding = get_po_encoding ($po_file);
394
395	if (lc $encoding eq "utf-8")
396	{
397	open PO_FILE, "<$po_file";
398	}
399	else
400	{
401	print STDERR "WARNING: $po_file is not in UTF-8 but $encoding, converting...\n" unless $QUIET_ARG;;
402
403	open PO_FILE, "$iconv -f $encoding -t UTF-8 $po_file\|";
404	}
405	}
406	else
407	{
408	open PO_FILE, "<$po_file";
409	}
410
411	my $nextfuzzy = 0;
412	my $inmsgid = 0;
413	my $inmsgstr = 0;
414	my $msgid = "";
415	my $msgstr = "";
416
417	while (<PO_FILE>)
418	{
419	$nextfuzzy = 1 if /^#, fuzzy/;
420
421	if (/^msgid "((\\.\|[^\\])*)"/ )
422	{
423	$translations{$lang, $msgid} = $msgstr if $inmsgstr && $msgid && $msgstr;
424	$msgid = "";
425	$msgstr = "";
426
427	if ($nextfuzzy) {
428	$inmsgid = 0;
429	} else {
430	$msgid = unescape_po_string($1);
431	$inmsgid = 1;
432	}
433	$inmsgstr = 0;
434	$nextfuzzy = 0;
435	}
436
437	if (/^msgstr "((\\.\|[^\\])*)"/)
438	{
439	$msgstr = unescape_po_string($1);
440	$inmsgstr = 1;
441	$inmsgid = 0;
442	}
443
444	if (/^"((\\.\|[^\\])*)"/)
445	{
446	$msgid .= unescape_po_string($1) if $inmsgid;
447	$msgstr .= unescape_po_string($1) if $inmsgstr;
448	}
449	}
450	$translations{$lang, $msgid} = $msgstr if $inmsgstr && $msgid && $msgstr;
451	}
452	}
453
454	sub finalize
455	{
456	}
457
458	sub unescape_one_sequence
459	{
460	my ($sequence) = @_;
461
462	return "\\" if $sequence eq "\\\\";
463	return "\"" if $sequence eq "\\\"";
464	return "\n" if $sequence eq "\\n";
465
466	# gettext also handles \n, \t, \b, \r, \f, \v, \a, \xxx (octal),
467	# \xXX (hex) and has a comment saying they want to handle \u and \U.
468
469	return $sequence;
470	}
471
472	sub unescape_po_string
473	{
474	my ($string) = @_;
475
476	$string =~ s/(\\.)/unescape_one_sequence($1)/eg;
477
478	return $string;
479	}
480
481	## NOTE: deal with < - < but not > - > because it seems its ok to have
482	## > in the entity. For further info please look at #84738.
483	sub entity_decode
484	{
485	local ($_) = @_;
486
487	s/'/'/g; # '
488	s/"/"/g; # "
489	s/&/&/g;
490	s/</</g;
491
492	return $_;
493	}
494
495	# entity_encode: (string)
496	#
497	# Encode the given string to XML format (encode '<' etc). It also
498	# encodes high bit if not in UTF-8 mode.
499
500	sub entity_encode
501	{
502	my ($pre_encoded) = @_;
503
504	my @list_of_chars = unpack ('C*', $pre_encoded);
505
506	if ($PASS_THROUGH_ARG)
507	{
508	return join ('', map (&entity_encode_int_even_high_bit, @list_of_chars));
509	}
510	else
511	{
512	# with UTF-8 we only encode minimalistic
513	return join ('', map (&entity_encode_int_minimalist, @list_of_chars));
514	}
515	}
516
517	sub entity_encode_int_minimalist
518	{
519	return """ if $_ == 34;
520	return "&" if $_ == 38;
521	return "'" if $_ == 39;
522	return "<" if $_ == 60;
523	return chr $_;
524	}
525
526	sub entity_encode_int_even_high_bit
527	{
528	if ($_ > 127 \|\| $_ == 34 \|\| $_ == 38 \|\| $_ == 39 \|\| $_ == 60)
529	{
530	# the ($_ > 127) should probably be removed
531	return "&#" . $_ . ";";
532	}
533	else
534	{
535	return chr $_;
536	}
537	}
538
539	sub entity_encoded_translation
540	{
541	my ($lang, $string) = @_;
542
543	my $translation = $translations{$lang, $string};
544	return $string if !$translation;
545	return entity_encode ($translation);
546	}
547
548	## XML (bonobo-activation specific) merge code
549
550	sub ba_merge_translations
551	{
552	my $source;
553
554	{
555	local $/; # slurp mode
556	open INPUT, "<$FILE" or die "can't open $FILE: $!";
557	$source = <INPUT>;
558	close INPUT;
559	}
560
561	open OUTPUT, ">$OUTFILE" or die "can't open $OUTFILE: $!";
562
563	while ($source =~ s\|^(.?)([ \t]<\s$w+\s+($w+\s=\s"$q"\s)+/?>)([ \t]*\n)?\|\|s)
564	{
565	print OUTPUT $1;
566
567	my $node = $2 . "\n";
568
569	my @strings = ();
570	$_ = $node;
571	while (s/(\s)_($w+\s=\s"($q)")/$1$2/s) {
572	push @strings, entity_decode($3);
573	}
574	print OUTPUT;
575
576	my %langs;
577	for my $string (@strings)
578	{
579	for my $lang (keys %po_files_by_lang)
580	{
581	$langs{$lang} = 1 if $translations{$lang, $string};
582	}
583	}
584
585	for my $lang (sort keys %langs)
586	{
587	$_ = $node;
588	s/(\sname\s=\s)"($q)"/$1"$2-$lang"/s;
589	s/(\s)_($w+\s=\s")($q)"/$1 . $2 . entity_encoded_translation($lang, $3) . '"'/seg;
590	print OUTPUT;
591	}
592	}
593
594	print OUTPUT $source;
595
596	close OUTPUT;
597	}
598
599
600	## XML (non-bonobo-activation) merge code
601
602
603	# Process tag attributes
604	# Only parameter is a HASH containing attributes -> values mapping
605	sub getAttributeString
606	{
607	my $sub = shift;
608	my $do_translate = shift \|\| 0;
609	my $language = shift \|\| "";
610	my $result = "";
611	foreach my $e (reverse(sort(keys %{ $sub }))) {
612	my $key = $e;
613	my $string = $sub->{$e};
614	my $quote = '"';
615
616	$string =~ s/^[\s]+//;
617	$string =~ s/[\s]+$//;
618
619	if ($string =~ /^'.*'$/)
620	{
621	$quote = "'";
622	}
623	$string =~ s/^['"]//g;
624	$string =~ s/['"]$//g;
625
626	if ($do_translate && $key =~ /^_/) {
627	$key =~ s\|^_\|\|g;
628	if ($language) {
629
630	# Handle translation
631	#
632	my $decode_string = entity_decode($string);
633	my $translation = $translations{$language, $decode_string};
634	if ($translation) {
635	$translation = entity_encode($translation);
636	$string = $translation;
637	}
638	}
639	}
640
641	$result .= " $key=$quote$string$quote";
642	}
643	return $result;
644	}
645
646	# Returns a translatable string from XML node, it works on contents of every node in XML::Parser tree
647	# doesn't support nesting of translatable tags (i.e. <_blah>this <_doh>doesn't</_doh> work</_blah> -- besides
648	# can you define the correct semantics for this?)
649	#
650
651	sub getXMLstring
652	{
653	my $ref = shift;
654	my @list = @{ $ref };
655	my $result = "";
656
657	my $count = scalar(@list);
658	my $attrs = $list[0];
659	my $index = 1;
660	while ($index < $count) {
661	my $type = $list[$index];
662	my $content = $list[$index+1];
663	if (! $type ) {
664	# We've got CDATA
665	if ($content) {
666	# lets strip the whitespace here, and ONLY here
667	$content =~ s/\s+/ /gs if (!((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?preserve["']?$/)));
668	$result .= ($content);
669	} else {
670	#print "no cdata content when expected it\n"; # is this possible, is this ok?
671	# what to do if this happens?
672	# Did I mention that I hate XML::Parser tree style?
673	}
674	} else {
675	# We've got another element
676	$result .= "<$type";
677	$result .= getAttributeString($attrs, 0); # no nested translatable elements
678	if ($content) {
679	my $subresult = getXMLstring($content);
680	if ($subresult) {
681	$result .= ">".$subresult . "</$type>";
682	} else {
683	$result .= "/>";
684	}
685	} else {
686	$result .= "/>";
687	}
688	}
689	$index += 2;
690	}
691	return $result;
692	}
693
694	sub traverse
695	{
696	my $fh = shift;
697	my $nodename = shift;
698	my $content = shift;
699	my $language = shift \|\| "";
700
701	if (!$nodename) {
702	if ($content =~ /^[\s]*$/) {
703	$leading_space .= $content;
704	}
705	print $fh $content;
706	} else {
707	# element
708	my @all = @{ $content };
709	my $attrs = shift @all;
710	my $outattr = getAttributeString($attrs, 1, $language);
711	my $translate = 0;
712
713	if ($nodename =~ /^_/) {
714	$translate = 1;
715	$nodename =~ s/^_//;
716	}
717	my $lookup = '';
718	print $fh "<$nodename$outattr";
719	if ($translate) {
720	$lookup = getXMLstring($content);
721	if (!((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?preserve["']?$/))) {
722	$lookup =~ s/^\s+//s;
723	$lookup =~ s/\s+$//s;
724	}
725
726	if ($lookup) {
727	my $translation = $translations{$language, $lookup};
728	if ($MULTIPLE_OUTPUT && $translation) {
729	print $fh " xml:lang=\"", $language, "\"";
730	print $fh ">", $translation, "</$nodename>";
731	return; # this means there will be no same translation with xml:lang="$language"...
732	# if we want them both, just remove this "return"
733	} else {
734	print $fh ">$lookup</$nodename>";
735	}
736	} else {
737	print $fh "/>";
738	}
739
740
741	for my $lang (sort keys %po_files_by_lang) {
742	if ($MULTIPLE_OUTPUT && $lang ne "$language") {
743	next;
744	}
745	if ($lang) {
746
747	# Handle translation
748	#
749	my $localattrs = getAttributeString($attrs, 1, $lang);
750	my $decode_string = ($lookup); #entity_decode($lookup);
751	my $translation = $translations{$lang, $decode_string};
752	if ($translation) {
753	$translation = ($translation);
754	print $fh "\n";
755	$leading_space =~ s/.*\n//g;
756	print $fh $leading_space;
757	print $fh "<", $nodename, " xml:lang=\"", $lang, "\"", $localattrs;
758	print $fh ">", $translation , "</$nodename>";
759	}
760	}
761	}
762
763	} else {
764	my $count = scalar(@all);
765	if ($count > 0) {
766	print $fh ">";
767	} else {
768	print $fh "/>";
769	}
770	my $index = 0;
771	while ($index < $count) {
772	my $type = $all[$index];
773	my $rest = $all[$index+1];
774	traverse($fh, $type, $rest, $language);
775	$index += 2;
776	}
777	if ($count > 0) {
778	print $fh "</$nodename>";
779	}
780	}
781	}
782	}
783
784	sub intltool_tree_char
785	{
786	my $expat = shift;
787	my $text = shift;
788	my $clist = $expat->{Curlist};
789	my $pos = $#$clist;
790
791	# Use original_string so that we retain escaped entities
792	# in CDATA sections.
793	#
794	if ($pos > 0 and $clist->[$pos - 1] eq '0') {
795	$clist->[$pos] .= $expat->original_string();
796	} else {
797	push @$clist, 0 => $expat->original_string();
798	}
799	}
800
801	sub intltool_tree_start
802	{
803	my $expat = shift;
804	my $tag = shift;
805	my @origlist = ();
806
807	# Use original_string so that we retain escaped entities
808	# in attribute values. We must convert the string to an
809	# @origlist array to conform to the structure of the Tree
810	# Style.
811	#
812	my @original_array = split /\x/, $expat->original_string();
813	my $source = $expat->original_string();
814
815	# Remove leading tag.
816	#
817	$source =~ s\|^\s<\s(\S+)\|\|s;
818
819	# Grab attribute key/value pairs and push onto @origlist array.
820	#
821	while ($source)
822	{
823	if ($source =~ /^\s([\w:-]+)\s[=]\s*["]/)
824	{
825	$source =~ s\|^\s([\w:-]+)\s[=]\s["]([^"])["]\|\|s;
826	push @origlist, $1;
827	push @origlist, '"' . $2 . '"';
828	}
829	elsif ($source =~ /^\s([\w:-]+)\s[=]\s*[']/)
830	{
831	$source =~ s\|^\s([\w:-]+)\s[=]\s[']([^'])[']\|\|s;
832	push @origlist, $1;
833	push @origlist, "'" . $2 . "'";
834	}
835	else
836	{
837	last;
838	}
839	}
840
841	my $ol = [ { @origlist } ];
842
843	push @{ $expat->{Lists} }, $expat->{Curlist};
844	push @{ $expat->{Curlist} }, $tag => $ol;
845	$expat->{Curlist} = $ol;
846	}
847
848	sub readXml
849	{
850	my $filename = shift \|\| return;
851	if(!-f $filename) {
852	die "ERROR Cannot find filename: $filename\n";
853	}
854
855	my $ret = eval 'require XML::Parser';
856	if(!$ret) {
857	die "You must have XML::Parser installed to run $0\n\n";
858	}
859	my $xp = new XML::Parser(Style => 'Tree');
860	$xp->setHandlers(Char => \&intltool_tree_char);
861	$xp->setHandlers(Start => \&intltool_tree_start);
862	my $tree = $xp->parsefile($filename);
863
864	# <foo><head id="a">Hello <em>there</em></head><bar>Howdy<ref/></bar>do</foo>
865	# would be:
866	# [foo, [{}, head, [{id => "a"}, 0, "Hello ", em, [{}, 0, "there"]], bar, [{},
867	# 0, "Howdy", ref, [{}]], 0, "do" ] ]
868
869	return $tree;
870	}
871
872	sub print_header
873	{
874	my $infile = shift;
875	my $fh = shift;
876	my $source;
877
878	if(!-f $infile) {
879	die "ERROR Cannot find filename: $infile\n";
880	}
881
882	print $fh qq{<?xml version="1.0" encoding="UTF-8"?>\n};
883	{
884	local $/;
885	open DOCINPUT, "<${FILE}" or die;
886	$source = <DOCINPUT>;
887	close DOCINPUT;
888	}
889	if ($source =~ /(<!DOCTYPE.\[.\]\s*>)/s)
890	{
891	print $fh "$1\n";
892	}
893	elsif ($source =~ /(<!DOCTYPE[^>]*>)/s)
894	{
895	print $fh "$1\n";
896	}
897	}
898
899	sub parseTree
900	{
901	my $fh = shift;
902	my $ref = shift;
903	my $language = shift \|\| "";
904
905	my $name = shift @{ $ref };
906	my $cont = shift @{ $ref };
907	traverse($fh, $name, $cont, $language);
908	}
909
910	sub xml_merge_output
911	{
912	my $source;
913
914	if ($MULTIPLE_OUTPUT) {
915	for my $lang (sort keys %po_files_by_lang) {
916	if ( ! -e $lang ) {
917	mkdir $lang or die "Cannot create subdirectory $lang: $!\n";
918	}
919	open OUTPUT, ">$lang/$OUTFILE" or die "Cannot open $lang/$OUTFILE: $!\n";
920	my $tree = readXml($FILE);
921	print_header($FILE, \*OUTPUT);
922	parseTree(\*OUTPUT, $tree, $lang);
923	close OUTPUT;
924	print "CREATED $lang/$OUTFILE\n" unless $QUIET_ARG;
925	}
926	}
927	open OUTPUT, ">$OUTFILE" or die "Cannot open $OUTFILE: $!\n";
928	my $tree = readXml($FILE);
929	print_header($FILE, \*OUTPUT);
930	parseTree(\*OUTPUT, $tree);
931	close OUTPUT;
932	print "CREATED $OUTFILE\n" unless $QUIET_ARG;
933	}
934
935	sub keys_merge_translations
936	{
937	open INPUT, "<${FILE}" or die;
938	open OUTPUT, ">${OUTFILE}" or die;
939
940	while (<INPUT>)
941	{
942	if (s/^(\s)_(\w+=(.))/$1$2/)
943	{
944	my $string = $3;
945
946	print OUTPUT;
947
948	my $non_translated_line = $_;
949
950	for my $lang (sort keys %po_files_by_lang)
951	{
952	my $translation = $translations{$lang, $string};
953	next if !$translation;
954
955	$_ = $non_translated_line;
956	s/(\w+)=.*/[$lang]$1=$translation/;
957	print OUTPUT;
958	}
959	}
960	else
961	{
962	print OUTPUT;
963	}
964	}
965
966	close OUTPUT;
967	close INPUT;
968	}
969
970	sub desktop_merge_translations
971	{
972	open INPUT, "<${FILE}" or die;
973	open OUTPUT, ">${OUTFILE}" or die;
974
975	while (<INPUT>)
976	{
977	if (s/^(\s)_(\w+=(.))/$1$2/)
978	{
979	my $string = $3;
980
981	print OUTPUT;
982
983	my $non_translated_line = $_;
984
985	for my $lang (sort keys %po_files_by_lang)
986	{
987	my $translation = $translations{$lang, $string};
988	next if !$translation;
989
990	$_ = $non_translated_line;
991	s/(\w+)=.*/${1}[$lang]=$translation/;
992	print OUTPUT;
993	}
994	}
995	else
996	{
997	print OUTPUT;
998	}
999	}
1000
1001	close OUTPUT;
1002	close INPUT;
1003	}
1004
1005	sub schemas_merge_translations
1006	{
1007	my $source;
1008
1009	{
1010	local $/; # slurp mode
1011	open INPUT, "<$FILE" or die "can't open $FILE: $!";
1012	$source = <INPUT>;
1013	close INPUT;
1014	}
1015
1016	open OUTPUT, ">$OUTFILE" or die;
1017
1018	# FIXME: support attribute translations
1019
1020	# Empty nodes never need translation, so unmark all of them.
1021	# For example, <_foo/> is just replaced by <foo/>.
1022	$source =~ s\|<\s_($w+)\s/>\|<$1/>\|g;
1023
1024	while ($source =~ s/
1025	(.*?)
1026	(\s+)(<locale\ name="C">(\s*)
1027	(<default>\s(?:<!--[^>]?-->\s)?(.?)\s<\/default>)?(\s)
1028	(<short>\s(?:<!--[^>]?-->\s)?(.?)\s<\/short>)?(\s)
1029	(<long>\s(?:<!--[^>]?-->\s)?(.?)\s<\/long>)?(\s)
1030	<\/locale>)
1031	//sx)
1032	{
1033	print OUTPUT $1;
1034
1035	my $locale_start_spaces = $2 ? $2 : '';
1036	my $default_spaces = $4 ? $4 : '';
1037	my $short_spaces = $7 ? $7 : '';
1038	my $long_spaces = $10 ? $10 : '';
1039	my $locale_end_spaces = $13 ? $13 : '';
1040	my $c_default_block = $3 ? $3 : '';
1041	my $default_string = $6 ? $6 : '';
1042	my $short_string = $9 ? $9 : '';
1043	my $long_string = $12 ? $12 : '';
1044
1045	print OUTPUT "$locale_start_spaces$c_default_block";
1046
1047	$default_string =~ s/\s+/ /g;
1048	$default_string = entity_decode($default_string);
1049	$short_string =~ s/\s+/ /g;
1050	$short_string = entity_decode($short_string);
1051	$long_string =~ s/\s+/ /g;
1052	$long_string = entity_decode($long_string);
1053
1054	for my $lang (sort keys %po_files_by_lang)
1055	{
1056	my $default_translation = $translations{$lang, $default_string};
1057	my $short_translation = $translations{$lang, $short_string};
1058	my $long_translation = $translations{$lang, $long_string};
1059
1060	next if (!$default_translation && !$short_translation &&
1061	!$long_translation);
1062
1063	print OUTPUT "\n$locale_start_spaces<locale name=\"$lang\">";
1064
1065	print OUTPUT "$default_spaces";
1066
1067	if ($default_translation)
1068	{
1069	$default_translation = entity_encode($default_translation);
1070	print OUTPUT "<default>$default_translation</default>";
1071	}
1072
1073	print OUTPUT "$short_spaces";
1074
1075	if ($short_translation)
1076	{
1077	$short_translation = entity_encode($short_translation);
1078	print OUTPUT "<short>$short_translation</short>";
1079	}
1080
1081	print OUTPUT "$long_spaces";
1082
1083	if ($long_translation)
1084	{
1085	$long_translation = entity_encode($long_translation);
1086	print OUTPUT "<long>$long_translation</long>";
1087	}
1088
1089	print OUTPUT "$locale_end_spaces</locale>";
1090	}
1091	}
1092
1093	print OUTPUT $source;
1094
1095	close OUTPUT;
1096	}
1097
1098	sub rfc822deb_merge_translations
1099	{
1100	my %encodings = ();
1101	for my $lang (keys %po_files_by_lang) {
1102	$encodings{$lang} = ($UTF8_ARG ? 'UTF-8' : get_po_encoding($po_files_by_lang{$lang}));
1103	}
1104
1105	my $source;
1106
1107	$Text::Wrap::huge = 'overflow';
1108	$Text::Wrap::break = qr/\n\|\s(?=\S)/;
1109
1110	{
1111	local $/; # slurp mode
1112	open INPUT, "<$FILE" or die "can't open $FILE: $!";
1113	$source = <INPUT>;
1114	close INPUT;
1115	}
1116
1117	open OUTPUT, ">${OUTFILE}" or die;
1118
1119	while ($source =~ /(^\|\n+)(_)([^:\s]+)(:[ \t])(.*?)(?=\n[\S\n]\|$)/sg)
1120	{
1121	my $sep = $1;
1122	my $non_translated_line = $3.$4;
1123	my $string = $5;
1124	my $underscore = length($2);
1125	next if $underscore eq 0 && $non_translated_line =~ /^#/;
1126	# Remove [] dummy strings
1127	my $stripped = $string;
1128	$stripped =~ s/\[\s[^\[\]]*\],/,/g if $underscore eq 2;
1129	$stripped =~ s/\[\s[^\[\]]*\]$//;
1130	$non_translated_line .= $stripped;
1131
1132	print OUTPUT $sep.$non_translated_line;
1133
1134	if ($underscore)
1135	{
1136	my @str_list = rfc822deb_split($underscore, $string);
1137
1138	for my $lang (sort keys %po_files_by_lang)
1139	{
1140	my $is_translated = 1;
1141	my $str_translated = '';
1142	my $first = 1;
1143
1144	for my $str (@str_list)
1145	{
1146	my $translation = $translations{$lang, $str};
1147
1148	if (!$translation)
1149	{
1150	$is_translated = 0;
1151	last;
1152	}
1153
1154	# $translation may also contain [] dummy
1155	# strings, mostly to indicate an empty string
1156	$translation =~ s/\[\s[^\[\]]*\]$//;
1157
1158	if ($first)
1159	{
1160	if ($underscore eq 2)
1161	{
1162	$str_translated .= $translation;
1163	}
1164	else
1165	{
1166	$str_translated .=
1167	Text::Tabs::expand($translation) .
1168	"\n";
1169	}
1170	}
1171	else
1172	{
1173	if ($underscore eq 2)
1174	{
1175	$str_translated .= ', ' . $translation;
1176	}
1177	else
1178	{
1179	$str_translated .= Text::Tabs::expand(
1180	Text::Wrap::wrap(' ', ' ', $translation)) .
1181	"\n .\n";
1182	}
1183	}
1184	$first = 0;
1185
1186	# To fix some problems with Text::Wrap::wrap
1187	$str_translated =~ s/(\n )+\n/\n .\n/g;
1188	}
1189	next unless $is_translated;
1190
1191	$str_translated =~ s/\n \.\n$//;
1192	$str_translated =~ s/\s+$//;
1193
1194	$_ = $non_translated_line;
1195	s/^(\w+):\s./$sep${1}-$lang.$encodings{$lang}: $str_translated/s;
1196	print OUTPUT;
1197	}
1198	}
1199	}
1200	print OUTPUT "\n";
1201
1202	close OUTPUT;
1203	close INPUT;
1204	}
1205
1206	sub rfc822deb_split
1207	{
1208	# Debian defines a special way to deal with rfc822-style files:
1209	# when a value contain newlines, it consists of
1210	# 1. a short form (first line)
1211	# 2. a long description, all lines begin with a space,
1212	# and paragraphs are separated by a single dot on a line
1213	# This routine returns an array of all paragraphs, and reformat
1214	# them.
1215	# When first argument is 2, the string is a comma separated list of
1216	# values.
1217	my $type = shift;
1218	my $text = shift;
1219	$text =~ s/^[ \t]//mg;
1220	return (split(/, */, $text, 0)) if $type ne 1;
1221	return ($text) if $text !~ /\n/;
1222
1223	$text =~ s/([^\n]*)\n//;
1224	my @list = ($1);
1225	my $str = '';
1226
1227	for my $line (split (/\n/, $text))
1228	{
1229	chomp $line;
1230	if ($line =~ /^\.\s*$/)
1231	{
1232	# New paragraph
1233	$str =~ s/\s*$//;
1234	push(@list, $str);
1235	$str = '';
1236	}
1237	elsif ($line =~ /^\s/)
1238	{
1239	# Line which must not be reformatted
1240	$str .= "\n" if length ($str) && $str !~ /\n$/;
1241	$line =~ s/\s+$//;
1242	$str .= $line."\n";
1243	}
1244	else
1245	{
1246	# Continuation line, remove newline
1247	$str .= " " if length ($str) && $str !~ /\n$/;
1248	$str .= $line;
1249	}
1250	}
1251
1252	$str =~ s/\s*$//;
1253	push(@list, $str) if length ($str);
1254
1255	return @list;
1256	}
1257

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Original Format