Context Navigation

intltool-merge.in @ 18703

Visit:

Revision 18703, 15.8 KB checked in by ghudson, 21 years ago (diff)
This commit was generated by cvs2svn to compensate for changes in r18702, which included commits to RCS files with non-trunk default branches.

Line
1	#!@INTLTOOL_PERL@ -w
2
3	#
4	# The Intltool Message Merger
5	#
6	# Copyright (C) 2000, 2002 Free Software Foundation.
7	# Copyright (C) 2000, 2001 Eazel, Inc
8	#
9	# Intltool is free software; you can redistribute it and/or
10	# modify it under the terms of the GNU General Public License
11	# version 2 published by the Free Software Foundation.
12	#
13	# Intltool is distributed in the hope that it will be useful,
14	# but WITHOUT ANY WARRANTY; without even the implied warranty of
15	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16	# General Public License for more details.
17	#
18	# You should have received a copy of the GNU General Public License
19	# along with this program; if not, write to the Free Software
20	# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21	#
22	# As a special exception to the GNU General Public License, if you
23	# distribute this file as part of a program that contains a
24	# configuration script generated by Autoconf, you may include it under
25	# the same distribution terms that you use for the rest of that program.
26	#
27	# Authors: Maciej Stachowiak <mjs@noisehavoc.org>
28	# Kenneth Christiansen <kenneth@gnu.org>
29	# Darin Adler <darin@bentspoon.com>
30	#
31	# Proper XML UTF-8'ification written by Cyrille Chepelov <chepelov@calixo.net>
32	#
33
34	## Release information
35	my $PROGRAM = "intltool-merge";
36	my $PACKAGE = "intltool";
37	my $VERSION = "0.22";
38
39	## Loaded modules
40	use strict;
41	use Getopt::Long;
42
43	## Scalars used by the option stuff
44	my $HELP_ARG = 0;
45	my $VERSION_ARG = 0;
46	my $BA_STYLE_ARG = 0;
47	my $XML_STYLE_ARG = 0;
48	my $KEYS_STYLE_ARG = 0;
49	my $DESKTOP_STYLE_ARG = 0;
50	my $SCHEMAS_STYLE_ARG = 0;
51	my $QUIET_ARG = 0;
52	my $PASS_THROUGH_ARG = 0;
53	my $UTF8_ARG = 0;
54	my $cache_file;
55
56	## Handle options
57	GetOptions
58	(
59	"help" => \$HELP_ARG,
60	"version" => \$VERSION_ARG,
61	"quiet\|q" => \$QUIET_ARG,
62	"oaf-style\|o" => \$BA_STYLE_ARG, ## for compatibility
63	"ba-style\|b" => \$BA_STYLE_ARG,
64	"xml-style\|x" => \$XML_STYLE_ARG,
65	"keys-style\|k" => \$KEYS_STYLE_ARG,
66	"desktop-style\|d" => \$DESKTOP_STYLE_ARG,
67	"schemas-style\|s" => \$SCHEMAS_STYLE_ARG,
68	"pass-through\|p" => \$PASS_THROUGH_ARG,
69	"utf8\|u" => \$UTF8_ARG,
70	"cache\|c=s" => \$cache_file
71	) or &error;
72
73	my $PO_DIR;
74	my $FILE;
75	my $OUTFILE;
76
77	my %po_files_by_lang = ();
78	my %translations = ();
79
80	# Use this instead of \w for XML files to handle more possible characters.
81	my $w = "[-A-Za-z0-9._:]";
82
83	# XML quoted string contents
84	my $q = "[^\\\"]*";
85
86	## Check for options.
87
88	if ($VERSION_ARG) {
89	&print_version;
90	} elsif ($HELP_ARG) {
91	&print_help;
92	} elsif ($BA_STYLE_ARG && @ARGV > 2) {
93	&preparation;
94	&print_message;
95	&ba_merge_translations;
96	&finalize;
97	} elsif ($XML_STYLE_ARG && @ARGV > 2) {
98	&utf8_sanity_check;
99	&preparation;
100	&print_message;
101	&xml_merge_translations;
102	&finalize;
103	} elsif ($KEYS_STYLE_ARG && @ARGV > 2) {
104	&utf8_sanity_check;
105	&preparation;
106	&print_message;
107	&keys_merge_translations;
108	&finalize;
109	} elsif ($DESKTOP_STYLE_ARG && @ARGV > 2) {
110	&preparation;
111	&print_message;
112	&desktop_merge_translations;
113	&finalize;
114	} elsif ($SCHEMAS_STYLE_ARG && @ARGV > 2) {
115	&preparation;
116	&print_message;
117	&schemas_merge_translations;
118	&finalize;
119	} else {
120	&print_help;
121	}
122
123	exit;
124
125	## Sub for printing release information
126	sub print_version
127	{
128	print "${PROGRAM} (${PACKAGE}) ${VERSION}\n";
129	print "Written by Maciej Stachowiak, Darin Adler and Kenneth Christiansen.\n\n";
130	print "Copyright (C) 2000-2002 Free Software Foundation, Inc.\n";
131	print "Copyright (C) 2000-2001 Eazel, Inc.\n";
132	print "This is free software; see the source for copying conditions. There is NO\n";
133	print "warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n";
134	exit;
135	}
136
137	## Sub for printing usage information
138	sub print_help
139	{
140	print "Usage: ${PROGRAM} [OPTIONS] PO_DIRECTORY FILENAME OUTPUT_FILE\n";
141	print "Generates an output file that includes translated versions of some attributes,\n";
142	print "from an untranslated source and a po directory that includes translations.\n\n";
143	print " -b, --ba-style includes translations in the bonobo-activation style\n";
144	print " -d, --desktop-style includes translations in the desktop style\n";
145	print " -k, --keys-style includes translations in the keys style\n";
146	print " -s, --schemas-style includes translations in the schemas style\n";
147	print " -x, --xml-style includes translations in the standard xml style\n";
148	print " -u, --utf8 convert all strings to UTF-8 before merging\n";
149	print " -p, --pass-through use strings as found in .po files, without\n";
150	print " conversion (STRONGLY unrecommended with -x)\n";
151	print " -q, --quiet suppress most messages\n";
152	print " --help display this help and exit\n";
153	print " --version output version information and exit\n";
154	print "\nReport bugs to bugzilla.gnome.org, module intltool, or contact us through \n";
155	print "<xml-i18n-tools-list\@gnome.org>.\n";
156	exit;
157	}
158
159
160	## Sub for printing error messages
161	sub print_error
162	{
163	print "Try `${PROGRAM} --help' for more information.\n";
164	exit;
165	}
166
167
168	sub print_message
169	{
170	print "Merging translations into $OUTFILE.\n" unless $QUIET_ARG;
171	}
172
173
174	sub preparation
175	{
176	$PO_DIR = $ARGV[0];
177	$FILE = $ARGV[1];
178	$OUTFILE = $ARGV[2];
179
180	&gather_po_files;
181	&get_translation_database;
182	}
183
184	# General-purpose code for looking up translations in .po files
185
186	sub po_file2lang
187	{
188	my ($tmp) = @_;
189	$tmp =~ s/^.\/(.)\.po$/$1/;
190	return $tmp;
191	}
192
193	sub gather_po_files
194	{
195	for my $po_file (glob "$PO_DIR/*.po") {
196	$po_files_by_lang{po_file2lang($po_file)} = $po_file;
197	}
198	}
199
200	sub get_po_encoding
201	{
202	my ($in_po_file) = @_;
203	my $encoding = "";
204
205	open IN_PO_FILE, $in_po_file or die;
206	while (<IN_PO_FILE>) {
207	## example: "Content-Type: text/plain; charset=ISO-8859-1\n"
208	if (/Content-Type\:.*charset=([-a-zA-Z0-9]+)\\n/) {
209	$encoding = $1;
210	last;
211	}
212	}
213	close IN_PO_FILE;
214
215	if (!$encoding) {
216	print "Warning: no encoding found in $in_po_file. Assuming ISO-8859-1\n";
217	$encoding = "ISO-8859-1";
218	}
219	return $encoding
220	}
221
222	sub utf8_sanity_check
223	{
224	if (!$UTF8_ARG) {
225	if (!$PASS_THROUGH_ARG) {
226	$PASS_THROUGH_ARG="1";
227	}
228	}
229	}
230
231	sub get_translation_database
232	{
233	if ($cache_file) {
234	&get_cached_translation_database;
235	} else {
236	&create_translation_database;
237	}
238	}
239
240	sub get_newest_po_age
241	{
242	my $newest_age;
243
244	foreach my $file (values %po_files_by_lang) {
245	my $file_age = -M $file;
246	$newest_age = $file_age if !$newest_age \|\| $file_age < $newest_age;
247	}
248
249	return $newest_age;
250	}
251
252	sub create_cache
253	{
254	print "Generating and caching the translation database\n" unless $QUIET_ARG;
255
256	&create_translation_database;
257
258	open CACHE, ">$cache_file" \|\| die;
259	print CACHE join "\x01", %translations;
260	close CACHE;
261	}
262
263	sub load_cache
264	{
265	print "Found cached translation database\n" unless $QUIET_ARG;
266
267	my $contents;
268	open CACHE, "<$cache_file" \|\| die;
269	{
270	local $/;
271	$contents = <CACHE>;
272	}
273	close CACHE;
274	%translations = split "\x01", $contents;
275	}
276
277	sub get_cached_translation_database
278	{
279	my $cache_file_age = -M $cache_file;
280	if (defined $cache_file_age) {
281	if ($cache_file_age <= &get_newest_po_age) {
282	&load_cache;
283	return;
284	}
285	print "Found too-old cached translation database\n" unless $QUIET_ARG;
286	}
287
288	&create_cache;
289	}
290
291	sub create_translation_database
292	{
293	for my $lang (keys %po_files_by_lang) {
294	my $po_file = $po_files_by_lang{$lang};
295
296	if ($UTF8_ARG) {
297	my $encoding = get_po_encoding ($po_file);
298	if (lc $encoding eq "utf-8") {
299	open PO_FILE, "<$po_file";
300	} else {
301	my $iconv = $ENV{"INTLTOOL_ICONV"} \|\| "iconv";
302	open PO_FILE, "$iconv -f $encoding -t UTF-8 $po_file\|";
303	}
304	} else {
305	open PO_FILE, "<$po_file";
306	}
307
308	my $nextfuzzy = 0;
309	my $inmsgid = 0;
310	my $inmsgstr = 0;
311	my $msgid = "";
312	my $msgstr = "";
313	while (<PO_FILE>) {
314	$nextfuzzy = 1 if /^#, fuzzy/;
315	if (/^msgid "((\\.\|[^\\])*)"/ ) {
316	$translations{$lang, $msgid} = $msgstr if $inmsgstr && $msgid && $msgstr;
317	$msgid = "";
318	$msgstr = "";
319
320	if ($nextfuzzy) {
321	$inmsgid = 0;
322	} else {
323	$msgid = unescape_po_string($1);
324	$inmsgid = 1;
325	}
326	$inmsgstr = 0;
327	$nextfuzzy = 0;
328	}
329	if (/^msgstr "((\\.\|[^\\])*)"/) {
330	$msgstr = unescape_po_string($1);
331	$inmsgstr = 1;
332	$inmsgid = 0;
333	}
334	if (/^"((\\.\|[^\\])*)"/) {
335	$msgid .= unescape_po_string($1) if $inmsgid;
336	$msgstr .= unescape_po_string($1) if $inmsgstr;
337	}
338	}
339	$translations{$lang, $msgid} = $msgstr if $inmsgstr && $msgid && $msgstr;
340	}
341	}
342
343	sub finalize
344	{
345	}
346
347	sub unescape_one_sequence
348	{
349	my ($sequence) = @_;
350
351	return "\\" if $sequence eq "\\\\";
352	return "\"" if $sequence eq "\\\"";
353
354	# gettext also handles \n, \t, \b, \r, \f, \v, \a, \xxx (octal),
355	# \xXX (hex) and has a comment saying they want to handle \u and \U.
356
357	return $sequence;
358	}
359
360	sub unescape_po_string
361	{
362	my ($string) = @_;
363
364	$string =~ s/(\\.)/unescape_one_sequence($1)/eg;
365
366	return $string;
367	}
368
369	sub entity_decode
370	{
371	local ($_) = @_;
372
373	s/'/'/g; # '
374	s/"/"/g; # "
375	s/&/&/g;
376
377	return $_;
378	}
379
380	sub entity_encode
381	{
382	my ($pre_encoded) = @_;
383
384	my @list_of_chars = unpack ('C*', $pre_encoded);
385
386	if ($PASS_THROUGH_ARG) {
387	return join ('', map (&entity_encode_int_even_high_bit, @list_of_chars));
388	} else {
389	return join ('', map (&entity_encode_int_minimalist, @list_of_chars));
390	}
391	}
392
393	sub entity_encode_int_minimalist
394	{
395	return """ if $_ == 34;
396	return "&" if $_ == 38;
397	return "'" if $_ == 39;
398	return chr $_;
399	}
400
401	sub entity_encode_int_even_high_bit
402	{
403	if ($_ > 127 \|\| $_ == 34 \|\| $_ == 38 \|\| $_ == 39) {
404	# the ($_ > 127) should probably be removed
405	return "&#" . $_ . ";";
406	} else {
407	return chr $_;
408	}
409	}
410
411	sub entity_encoded_translation
412	{
413	my ($lang, $string) = @_;
414
415	my $translation = $translations{$lang, $string};
416	return $string if !$translation;
417	return entity_encode ($translation);
418	}
419
420	## XML (bonobo-activation specific) merge code
421
422	sub ba_merge_translations
423	{
424	my $source;
425
426	{
427	local $/; # slurp mode
428	open INPUT, "<$FILE" or die "can't open $FILE: $!";
429	$source = <INPUT>;
430	close INPUT;
431	}
432
433	open OUTPUT, ">$OUTFILE" or die "can't open $OUTFILE: $!";
434
435	while ($source =~ s\|^(.?)([ \t]<\s$w+\s+($w+\s=\s"$q"\s)+/?>)([ \t]*\n)?\|\|s) {
436	print OUTPUT $1;
437
438	my $node = $2 . "\n";
439
440	my @strings = ();
441	$_ = $node;
442	while (s/(\s)_($w+\s=\s"($q)")/$1$2/s) {
443	push @strings, entity_decode($3);
444	}
445	print OUTPUT;
446
447	my %langs;
448	for my $string (@strings) {
449	for my $lang (keys %po_files_by_lang) {
450	$langs{$lang} = 1 if $translations{$lang, $string};
451	}
452	}
453
454	for my $lang (sort keys %langs) {
455	$_ = $node;
456	s/(\sname\s=\s)"($q)"/$1"$2-$lang"/s;
457	s/(\s)_($w+\s=\s")($q)"/$1 . $2 . entity_encoded_translation($lang, $3) . '"'/seg;
458	print OUTPUT;
459	}
460	}
461
462	print OUTPUT $source;
463
464	close OUTPUT;
465	}
466
467
468	## XML (non-bonobo-activation) merge code
469
470	sub xml_merge_translations
471	{
472	my $source;
473
474	{
475	local $/; # slurp mode
476	open INPUT, "<$FILE" or die "can't open $FILE: $!";
477	$source = <INPUT>;
478	close INPUT;
479	}
480
481	open OUTPUT, ">$OUTFILE" or die;
482
483	# FIXME: support attribute translations
484
485	# Empty nodes never need translation, so unmark all of them.
486	# For example, <_foo/> is just replaced by <foo/>.
487	$source =~ s\|<\s_($w+)\s/>\|<$1/>\|g;
488
489	# Support for <_foo>blah</_foo> style translations.
490	while ($source =~ s\|^(.?)([ \t])<\s_($w+)\s>(.?)<\s/_\3\s>([ \t]\n)?\|\|s) {
491	print OUTPUT $1;
492
493	my $spaces = $2;
494	my $tag = $3;
495	my $string = $4;
496
497	print OUTPUT "$spaces<$tag>$string</$tag>\n";
498
499	$string =~ s/\s+/ /g;
500	$string =~ s/^ //;
501	$string =~ s/ $//;
502	$string = entity_decode($string);
503
504	for my $lang (sort keys %po_files_by_lang) {
505	my $translation = $translations{$lang, $string};
506	next if !$translation;
507	$translation = entity_encode($translation);
508	print OUTPUT "$spaces<$tag xml:lang=\"$lang\">$translation</$tag>\n";
509	}
510	}
511
512	print OUTPUT $source;
513
514	close OUTPUT;
515	}
516
517	sub keys_merge_translations
518	{
519	open INPUT, "<${FILE}" or die;
520	open OUTPUT, ">${OUTFILE}" or die;
521
522	while (<INPUT>) {
523	if (s/^(\s)_(\w+=(.))/$1$2/) {
524	my $string = $3;
525
526	print OUTPUT;
527
528	my $non_translated_line = $_;
529
530	for my $lang (sort keys %po_files_by_lang) {
531	my $translation = $translations{$lang, $string};
532	next if !$translation;
533
534	$_ = $non_translated_line;
535	s/(\w+)=.*/[$lang]$1=$translation/;
536	print OUTPUT;
537	}
538	} else {
539	print OUTPUT;
540	}
541	}
542
543	close OUTPUT;
544	close INPUT;
545	}
546
547	sub desktop_merge_translations
548	{
549	open INPUT, "<${FILE}" or die;
550	open OUTPUT, ">${OUTFILE}" or die;
551
552	while (<INPUT>) {
553	if (s/^(\s)_(\w+=(.))/$1$2/) {
554	my $string = $3;
555
556	print OUTPUT;
557
558	my $non_translated_line = $_;
559
560	for my $lang (sort keys %po_files_by_lang) {
561	my $translation = $translations{$lang, $string};
562	next if !$translation;
563
564	$_ = $non_translated_line;
565	s/(\w+)=.*/${1}[$lang]=$translation/;
566	print OUTPUT;
567	}
568	} else {
569	print OUTPUT;
570	}
571	}
572
573	close OUTPUT;
574	close INPUT;
575	}
576
577	sub schemas_merge_translations
578	{
579	my $source;
580
581	{
582	local $/; # slurp mode
583	open INPUT, "<$FILE" or die "can't open $FILE: $!";
584	$source = <INPUT>;
585	close INPUT;
586	}
587
588	open OUTPUT, ">$OUTFILE" or die;
589
590	# FIXME: support attribute translations
591
592	# Empty nodes never need translation, so unmark all of them.
593	# For example, <_foo/> is just replaced by <foo/>.
594	$source =~ s\|<\s_($w+)\s/>\|<$1/>\|g;
595
596	# Support for <_foo>blah</_foo> style translations.
597
598	my $regex_start = "^(.?)([ \t])<locale name=\"C\">";
599	my $regex_short = "([ \t\n])<short>(.?)</short>";
600	my $regex_long = "([ \t\n])<long>(.?)</long>";
601	my $regex_end = "([ \t\n]*)</locale>";
602
603	while ($source =~ s\|$regex_start$regex_short$regex_long$regex_end\|\|s) {
604	print OUTPUT $1;
605
606	my $locale_start_spaces = $2;
607	my $locale_end_spaces = $7;
608	my $short_spaces = $3;
609	my $short_string = $4;
610	my $long_spaces = $5;
611	my $long_string = $6;
612
613	# English first
614
615	print OUTPUT "$locale_start_spaces<locale name=\"C\">";
616	print OUTPUT "$short_spaces<short>$short_string</short>";
617	print OUTPUT "$long_spaces<long>$long_string</long>";
618	print OUTPUT "$locale_end_spaces</locale>";
619
620	$short_string =~ s/\s+/ /g;
621	$short_string =~ s/^ //;
622	$short_string =~ s/ $//;
623	$short_string = entity_decode($short_string);
624
625	$long_string =~ s/\s+/ /g;
626	$long_string =~ s/^ //;
627	$long_string =~ s/ $//;
628	$long_string = entity_decode($long_string);
629
630	for my $lang (sort keys %po_files_by_lang) {
631	my $short_translation = $translations{$lang, $short_string};
632	my $long_translation = $translations{$lang, $long_string};
633
634	next if (!$short_translation && !$long_translation);
635
636	print OUTPUT "\n$locale_start_spaces<locale name=\"$lang\">";
637
638	if ($short_translation)
639	{
640	$short_translation = entity_encode($short_translation);
641	print OUTPUT "$short_spaces<short>$short_translation</short>";
642	}
643
644	if ($long_translation)
645	{
646	$long_translation = entity_encode($long_translation);
647	print OUTPUT "$long_spaces<long>$long_translation</long>";
648	}
649
650	print OUTPUT "$locale_end_spaces</locale>";
651	}
652	}
653
654	print OUTPUT $source;
655
656	close OUTPUT;
657	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Original Format