trunk/package/bin/po4a/lib/Locale/Po4a/Po.pm - Chameleon Svn Source Tree - Chameleon open source boot loader project.

Root/trunk/package/bin/po4a/lib/Locale/Po4a/Po.pm

1	# Locale::Po4a::Po -- manipulation of PO files␊
2	#␊
3	# This program is free software; you may redistribute it and/or modify it␊
4	# under the terms of GPL (see COPYING).␊
5	␊
6	############################################################################␊
7	# Modules and declarations␊
8	############################################################################␊
9	␊
10	=encoding UTF-8␊
11	␊
12	=head1 NAME␊
13	␊
14	Locale::Po4a::Po - PO file manipulation module␊
15	␊
16	=head1 SYNOPSIS␊
17	␊
18	use Locale::Po4a::Po;␊
19	my $pofile=Locale::Po4a::Po->new();␊
20	␊
21	# Read PO file␊
22	$pofile->read('file.po');␊
23	␊
24	# Add an entry␊
25	$pofile->push('msgid' => 'Hello', 'msgstr' => 'bonjour',␊
26	'flags' => "wrap", 'reference'=>'file.c:46');␊
27	␊
28	# Extract a translation␊
29	$pofile->gettext("Hello"); # returns 'bonjour'␊
30	␊
31	# Write back to a file␊
32	$pofile->write('otherfile.po');␊
33	␊
34	=head1 DESCRIPTION␊
35	␊
36	Locale::Po4a::Po is a module that allows you to manipulate message␊
37	catalogs. You can load and write from/to a file (which extension is often␊
38	I<po>), you can build new entries on the fly or request for the translation␊
39	of a string.␊
40	␊
41	For a more complete description of message catalogs in the PO format and␊
42	their use, please refer to the documentation of the gettext program.␊
43	␊
44	This module is part of the po4a project, which objective is to use PO files␊
45	(designed at origin to ease the translation of program messages) to␊
46	translate everything, including documentation (man page, info manual),␊
47	package description, debconf templates, and everything which may benefit␊
48	from this.␊
49	␊
50	=head1 OPTIONS ACCEPTED BY THIS MODULE␊
51	␊
52	=over 4␊
53	␊
54	=item B<porefs> I<type>[,B<wrap>\|B<nowrap>]␊
55	␊
56	Specify the reference format. Argument I<type> can be one of B<none> to not␊
57	produce any reference, B<noline> to not specify the line number (more␊
58	accurately all line numbers are replaced by 1), B<counter> to replace line␊
59	number by an increasing counter, and B<full> to include complete␊
60	references.␊
61	␊
62	Argument can be followed by a comma and either B<wrap> or B<nowrap> keyword.␊
63	References are written by default on a single line. The B<wrap> option wraps␊
64	references on several lines, to mimic B<gettext> tools (B<xgettext> and␊
65	B<msgmerge>). This option will become the default in a future release, because␊
66	it is more sensible. The B<nowrap> option is available so that users who want␊
67	to keep the old behavior can do so.␊
68	␊
69	=item B<--msgid-bugs-address> I<email@address>␊
70	␊
71	Set the report address for msgid bugs. By default, the created POT files␊
72	have no Report-Msgid-Bugs-To fields.␊
73	␊
74	=item B<--copyright-holder> I<string>␊
75	␊
76	Set the copyright holder in the POT header. The default value is␊
77	"Free Software Foundation, Inc."␊
78	␊
79	=item B<--package-name> I<string>␊
80	␊
81	Set the package name for the POT header. The default is "PACKAGE".␊
82	␊
83	=item B<--package-version> I<string>␊
84	␊
85	Set the package version for the POT header. The default is "VERSION".␊
86	␊
87	=back␊
88	␊
89	=cut␊
90	␊
91	use IO::File;␊
92	␊
93	␊
94	require Exporter;␊
95	␊
96	package Locale::Po4a::Po;␊
97	use DynaLoader;␊
98	␊
99	use Locale::Po4a::Common qw(wrap_msg wrap_mod wrap_ref_mod dgettext);␊
100	␊
101	use subs qw(makespace);␊
102	use vars qw(@ISA @EXPORT_OK);␊
103	@ISA = qw(Exporter DynaLoader);␊
104	@EXPORT = qw(%debug);␊
105	@EXPORT_OK = qw(&move_po_if_needed);␊
106	␊
107	use Locale::Po4a::TransTractor;␊
108	# Try to use a C extension if present.␊
109	eval("bootstrap Locale::Po4a::Po $Locale::Po4a::TransTractor::VERSION");␊
110	␊
111	use 5.006;␊
112	use strict;␊
113	use warnings;␊
114	␊
115	use Carp qw(croak);␊
116	use File::Basename;␊
117	use File::Path; # mkdir before write␊
118	use File::Copy; # move␊
119	use POSIX qw(strftime floor);␊
120	use Time::Local;␊
121	␊
122	use Encode;␊
123	␊
124	my @known_flags=qw(wrap no-wrap c-format fuzzy);␊
125	␊
126	our %debug=('canonize' => 0,␊
127	'quote' => 0,␊
128	'escape' => 0,␊
129	'encoding' => 0,␊
130	'filter' => 0);␊
131	␊
132	=head1 Functions about whole message catalogs␊
133	␊
134	=over 4␊
135	␊
136	=item new()␊
137	␊
138	Creates a new message catalog. If an argument is provided, it's the name of␊
139	a PO file we should load.␊
140	␊
141	=cut␊
142	␊
143	sub new {␊
144	my ($this, $options) = (shift, shift);␊
145	my $class = ref($this) \|\| $this;␊
146	my $self = {};␊
147	bless $self, $class;␊
148	$self->initialize($options);␊
149	␊
150	my $filename = shift;␊
151	$self->read($filename) if defined($filename) && length($filename);␊
152	return $self;␊
153	}␊
154	␊
155	# Return the numerical timezone (e.g. +0200)␊
156	# Neither the %z nor the %s formats of strftime are portable:␊
157	# '%s' is not supported on Solaris and '%z' indicates␊
158	# "2006-10-25 19:36E. Europe Standard Time" on MS Windows.␊
159	sub timezone {␊
160	my @g = gmtime();␊
161	my @l = localtime();␊
162	␊
163	my $diff;␊
164	$diff = floor(timelocal(@l)/60 +0.5);␊
165	$diff -= floor(timelocal(@g)/60 +0.5);␊
166	␊
167	my $h = floor($diff / 60) + $l[8]; # $l[8] indicates if we are currently␊
168	# in a daylight saving time zone␊
169	my $m = $diff%60;␊
170	␊
171	return sprintf "%+03d%02d\n", $h, $m;␊
172	}␊
173	␊
174	sub initialize {␊
175	my ($self, $options) = (shift, shift);␊
176	my $date = strftime("%Y-%m-%d %H:%M", localtime).timezone();␊
177	chomp $date;␊
178	# $options = ref($options) \|\| $options;␊
179	␊
180	$self->{options}{'porefs'}= 'full,nowrap';␊
181	$self->{options}{'msgid-bugs-address'}= undef;␊
182	$self->{options}{'copyright-holder'}= "Free Software Foundation, Inc.";␊
183	$self->{options}{'package-name'}= "PACKAGE";␊
184	$self->{options}{'package-version'}= "VERSION";␊
185	foreach my $opt (keys %$options) {␊
186	if ($options->{$opt}) {␊
187	die wrap_mod("po4a::po",␊
188	dgettext ("po4a", "Unknown option: %s"), $opt)␊
189	unless exists $self->{options}{$opt};␊
190	$self->{options}{$opt} = $options->{$opt};␊
191	}␊
192	}␊
193	$self->{options}{'porefs'} =~ /^(full\|counter\|noline\|none)(,(no)?wrap)?$/ \|\|␊
194	die wrap_mod("po4a::po",␊
195	dgettext ("po4a",␊
196	"Invalid value for option 'porefs' ('%s' is ".␊
197	"not one of 'full', 'counter', 'noline' or 'none')"),␊
198	$self->{options}{'porefs'});␊
199	if ($self->{options}{'porefs'} =~ m/^counter/) {␊
200	$self->{counter} = {};␊
201	}␊
202	␊
203	$self->{po}=();␊
204	$self->{count}=0; # number of msgids in the PO␊
205	# count_doc: number of strings in the document␊
206	# (duplicate strings counted multiple times)␊
207	$self->{count_doc}=0;␊
208	$self->{header_comment}=␊
209	" SOME DESCRIPTIVE TITLE\n"␊
210	." Copyright (C) YEAR ".␊
211	$self->{options}{'copyright-holder'}."\n"␊
212	." This file is distributed under the same license ".␊
213	"as the ".$self->{options}{'package-name'}." package.\n"␊
214	." FIRST AUTHOR <EMAIL\@ADDRESS>, YEAR.\n"␊
215	."\n"␊
216	.", fuzzy";␊
217	# $self->header_tag="fuzzy";␊
218	$self->{header}=escape_text("Project-Id-Version: ".␊
219	$self->{options}{'package-name'}." ".␊
220	$self->{options}{'package-version'}."\n".␊
221	((defined $self->{options}{'msgid-bugs-address'})?␊
222	"Report-Msgid-Bugs-To: ".$self->{options}{'msgid-bugs-address'}."\n":␊
223	"").␊
224	"POT-Creation-Date: $date\n".␊
225	"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n".␊
226	"Last-Translator: FULL NAME <EMAIL\@ADDRESS>\n".␊
227	"Language-Team: LANGUAGE <LL\@li.org>\n".␊
228	"Language: \n".␊
229	"MIME-Version: 1.0\n".␊
230	"Content-Type: text/plain; charset=CHARSET\n".␊
231	"Content-Transfer-Encoding: 8bit\n");␊
232	␊
233	$self->{encoder}=find_encoding("ascii");␊
234	$self->{footer}=[];␊
235	␊
236	# To make stats about gettext hits␊
237	$self->stats_clear();␊
238	}␊
239	␊
240	=item read($)␊
241	␊
242	Reads a PO file (which name is given as argument). Previously existing␊
243	entries in self are not removed, the new ones are added to the end of the␊
244	catalog.␊
245	␊
246	=cut␊
247	␊
248	sub read {␊
249	my $self=shift;␊
250	my $filename=shift␊
251	or croak wrap_mod("po4a::po",␊
252	dgettext("po4a",␊
253	"Please provide a non-null filename"));␊
254	␊
255	my $lang = basename($filename);␊
256	$lang =~ s/\.po$//;␊
257	$self->{lang} = $lang;␊
258	␊
259	my $fh;␊
260	if ($filename eq '-') {␊
261	$fh=*STDIN;␊
262	} else {␊
263	open $fh,"<$filename"␊
264	or croak wrap_mod("po4a::po",␊
265	dgettext("po4a", "Can't read from %s: %s"),␊
266	$filename, $!);␊
267	}␊
268	␊
269	## Read paragraphs line-by-line␊
270	my $pofile="";␊
271	my $textline;␊
272	while (defined ($textline = <$fh>)) {␊
273	$pofile .= $textline;␊
274	}␊
275	# close INPUT␊
276	# or croak (sprintf(dgettext("po4a",␊
277	# "Can't close %s after reading: %s"),␊
278	# $filename,$!)."\n");␊
279	␊
280	my $linenum=0;␊
281	␊
282	foreach my $msg (split (/\n\n/,$pofile)) {␊
283	my ($msgid,$msgstr,$comment,$previous,$automatic,$reference,$flags,$buffer);␊
284	my ($msgid_plural, $msgstr_plural);␊
285	if ($msg =~ m/^#~/m) {␊
286	push(@{$self->{footer}}, $msg);␊
287	next;␊
288	}␊
289	foreach my $line (split (/\n/,$msg)) {␊
290	$linenum++;␊
291	if ($line =~ /^#\. ?(.*)$/) { # Automatic comment␊
292	$automatic .= (defined($automatic) ? "\n" : "").$1;␊
293	␊
294	} elsif ($line =~ /^#: ?(.*)$/) { # reference␊
295	$reference .= (defined($reference) ? "\n" : "").$1;␊
296	␊
297	} elsif ($line =~ /^#, ?(.*)$/) { # flags␊
298	$flags .= (defined($flags) ? "\n" : "").$1;␊
299	␊
300	} elsif ($line =~ /^#\\| ?(.*)$/) { # previous translation␊
301	$previous .= (defined($previous) ? "\n" : "").($1\|\|"");␊
302	␊
303	} elsif ($line =~ /^#(.*)$/) { # Translator comments␊
304	$comment .= (defined($comment) ? "\n" : "").($1\|\|"");␊
305	␊
306	} elsif ($line =~ /^msgid (".*")$/) { # begin of msgid␊
307	$buffer = $1;␊
308	␊
309	} elsif ($line =~ /^msgid_plural (".*")$/) {␊
310	# begin of msgid_plural, end of msgid␊
311	␊
312	$msgid = $buffer;␊
313	$buffer = $1;␊
314	␊
315	} elsif ($line =~ /^msgstr (".*")$/) {␊
316	# begin of msgstr, end of msgid␊
317	␊
318	$msgid = $buffer;␊
319	$buffer = "$1";␊
320	␊
321	} elsif ($line =~ /^msgstr\[([0-9]+)\] (".*")$/) {␊
322	# begin of msgstr[x], end of msgid_plural or msgstr[x-1]␊
323	␊
324	# Note: po4a cannot uses plural forms␊
325	# (no integer to use the plural form)␊
326	# * drop the msgstr[x] where x >= 2␊
327	# * use msgstr[0] as the translation of msgid␊
328	# * use msgstr[1] as the translation of msgid_plural␊
329	␊
330	if ($1 eq "0") {␊
331	$msgid_plural = $buffer;␊
332	$buffer = "$2";␊
333	} elsif ($1 eq "1") {␊
334	$msgstr = $buffer;␊
335	$buffer = "$2";␊
336	} elsif ($1 eq "2") {␊
337	$msgstr_plural = $buffer;␊
338	warn wrap_ref_mod("$filename:$linenum",␊
339	"po4a::po",␊
340	dgettext("po4a", "Messages with more than 2 plural forms are not supported."));␊
341	}␊
342	} elsif ($line =~ /^(".*")$/) {␊
343	# continuation of a line␊
344	$buffer .= "\n$1";␊
345	␊
346	} else {␊
347	warn wrap_ref_mod("$filename:$linenum",␊
348	"po4a::po",␊
349	dgettext("po4a", "Strange line: -->%s<--"),␊
350	$line);␊
351	}␊
352	}␊
353	$linenum++;␊
354	if (defined $msgid_plural) {␊
355	$msgstr_plural=$buffer;␊
356	␊
357	$msgid = unquote_text($msgid) if (defined($msgid));␊
358	$msgstr = unquote_text($msgstr) if (defined($msgstr));␊
359	␊
360	$self->push_raw ('msgid' => $msgid,␊
361	'msgstr' => $msgstr,␊
362	'reference' => $reference,␊
363	'flags' => $flags,␊
364	'comment' => $comment,␊
365	'previous' => $previous,␊
366	'automatic' => $automatic,␊
367	'plural' => 0);␊
368	␊
369	$msgid_plural = unquote_text($msgid_plural)␊
370	if (defined($msgid_plural));␊
371	$msgstr_plural = unquote_text($msgstr_plural)␊
372	if (defined($msgstr_plural));␊
373	␊
374	$self->push_raw ('msgid' => $msgid_plural,␊
375	'msgstr' => $msgstr_plural,␊
376	'reference' => $reference,␊
377	'flags' => $flags,␊
378	'comment' => $comment,␊
379	'previous' => $previous,␊
380	'automatic' => $automatic,␊
381	'plural' => 1);␊
382	} else {␊
383	$msgstr=$buffer;␊
384	␊
385	$msgid = unquote_text($msgid) if (defined($msgid));␊
386	$msgstr = unquote_text($msgstr) if (defined($msgstr));␊
387	␊
388	$self->push_raw ('msgid' => $msgid,␊
389	'msgstr' => $msgstr,␊
390	'reference' => $reference,␊
391	'flags' => $flags,␊
392	'comment' => $comment,␊
393	'previous' => $previous,␊
394	'automatic' => $automatic);␊
395	}␊
396	}␊
397	}␊
398	␊
399	=item write($)␊
400	␊
401	Writes the current catalog to the given file.␊
402	␊
403	=cut␊
404	␊
405	sub write{␊
406	my $self=shift;␊
407	my $filename=shift␊
408	or croak dgettext("po4a","Can't write to a file without filename")."\n";␊
409	␊
410	my $fh;␊
411	if ($filename eq '-') {␊
412	$fh=\*STDOUT;␊
413	} else {␊
414	# make sure the directory in which we should write the localized␊
415	# file exists␊
416	my $dir = $filename;␊
417	if ($dir =~ m\|/\|) {␊
418	$dir =~ s\|/[^/]*$\|\|;␊
419	␊
420	File::Path::mkpath($dir, 0, 0755) # Croaks on error␊
421	if (length ($dir) && ! -e $dir);␊
422	}␊
423	open $fh,">$filename"␊
424	or croak wrap_mod("po4a::po",␊
425	dgettext("po4a", "Can't write to %s: %s"),␊
426	$filename, $!);␊
427	}␊
428	␊
429	print $fh "".format_comment($self->{header_comment},"")␊
430	if defined($self->{header_comment}) && length($self->{header_comment});␊
431	␊
432	print $fh "msgid \"\"\n";␊
433	print $fh "msgstr ".quote_text($self->{header})."\n\n";␊
434	␊
435	␊
436	my $buf_msgstr_plural; # Used to keep the first msgstr of plural forms␊
437	my $first=1;␊
438	foreach my $msgid ( sort { ($self->{po}{"$a"}{'pos'}) <=>␊
439	($self->{po}{"$b"}{'pos'})␊
440	} keys %{$self->{po}}) {␊
441	my $output="";␊
442	␊
443	if ($first) {␊
444	$first=0;␊
445	} else {␊
446	$output .= "\n";␊
447	}␊
448	␊
449	$output .= format_comment($self->{po}{$msgid}{'comment'},"")␊
450	if defined($self->{po}{$msgid}{'comment'})␊
451	&& length ($self->{po}{$msgid}{'comment'});␊
452	if ( defined($self->{po}{$msgid}{'automatic'})␊
453	&& length ($self->{po}{$msgid}{'automatic'})) {␊
454	foreach my $comment (split(/\\n/,$self->{po}{$msgid}{'automatic'}))␊
455	{␊
456	$output .= format_comment($comment, ". ")␊
457	}␊
458	}␊
459	$output .= format_comment($self->{po}{$msgid}{'type'},". type: ")␊
460	if defined($self->{po}{$msgid}{'type'})␊
461	&& length ($self->{po}{$msgid}{'type'});␊
462	if ( defined($self->{po}{$msgid}{'reference'})␊
463	&& length ($self->{po}{$msgid}{'reference'})) {␊
464	my $output_ref = $self->{po}{$msgid}{'reference'};␊
465	if ($self->{options}{'porefs'} =~ m/,wrap$/) {␊
466	$output_ref = wrap($output_ref);␊
467	$output_ref =~ s/\s+$//mg;␊
468	}␊
469	$output .= format_comment($output_ref,": ");␊
470	}␊
471	$output .= "#, ". join(", ", sort split(/\s+/,$self->{po}{$msgid}{'flags'}))."\n"␊
472	if defined($self->{po}{$msgid}{'flags'})␊
473	&& length ($self->{po}{$msgid}{'flags'});␊
474	$output .= format_comment($self->{po}{$msgid}{'previous'},"\| ")␊
475	if defined($self->{po}{$msgid}{'previous'})␊
476	&& length ($self->{po}{$msgid}{'previous'});␊
477	␊
478	if (exists $self->{po}{$msgid}{'plural'}) {␊
479	if ($self->{po}{$msgid}{'plural'} == 0) {␊
480	if ($self->get_charset =~ /^utf-8$/i) {␊
481	my $msgstr = Encode::decode_utf8($self->{po}{$msgid}{'msgstr'});␊
482	$msgid = Encode::decode_utf8($msgid);␊
483	$output .= Encode::encode_utf8("msgid ".quote_text($msgid)."\n");␊
484	$buf_msgstr_plural = Encode::encode_utf8("msgstr[0] ".quote_text($msgstr)."\n");␊
485	} else {␊
486	$output = "msgid ".quote_text($msgid)."\n";␊
487	$buf_msgstr_plural = "msgstr[0] ".quote_text($self->{po}{$msgid}{'msgstr'})."\n";␊
488	}␊
489	} elsif ($self->{po}{$msgid}{'plural'} == 1) {␊
490	# TODO: there may be only one plural form␊
491	if ($self->get_charset =~ /^utf-8$/i) {␊
492	my $msgstr = Encode::decode_utf8($self->{po}{$msgid}{'msgstr'});␊
493	$msgid = Encode::decode_utf8($msgid);␊
494	$output = Encode::encode_utf8("msgid_plural ".quote_text($msgid)."\n");␊
495	$output .= $buf_msgstr_plural;␊
496	$output .= Encode::encode_utf8("msgstr[1] ".quote_text($msgstr)."\n");␊
497	$buf_msgstr_plural = "";␊
498	} else {␊
499	$output = "msgid_plural ".quote_text($msgid)."\n";␊
500	$output .= $buf_msgstr_plural;␊
501	$output .= "msgstr[1] ".quote_text($self->{po}{$msgid}{'msgstr'})."\n";␊
502	}␊
503	} else {␊
504	die wrap_msg(dgettext("po4a","Can't write PO files with more than two plural forms."));␊
505	}␊
506	} else {␊
507	if ($self->get_charset =~ /^utf-8$/i) {␊
508	my $msgstr = Encode::decode_utf8($self->{po}{$msgid}{'msgstr'});␊
509	$msgid = Encode::decode_utf8($msgid);␊
510	$output .= Encode::encode_utf8("msgid ".quote_text($msgid)."\n");␊
511	$output .= Encode::encode_utf8("msgstr ".quote_text($msgstr)."\n");␊
512	} else {␊
513	$output .= "msgid ".quote_text($msgid)."\n";␊
514	$output .= "msgstr ".quote_text($self->{po}{$msgid}{'msgstr'})."\n";␊
515	}␊
516	}␊
517	␊
518	print $fh $output;␊
519	}␊
520	print $fh join("\n\n", @{$self->{footer}}) if scalar @{$self->{footer}};␊
521	␊
522	# print STDERR "$fh";␊
523	# if ($filename ne '-') {␊
524	# close $fh␊
525	# or croak (sprintf(dgettext("po4a",␊
526	# "Can't close %s after writing: %s\n"),␊
527	# $filename,$!));␊
528	# }␊
529	}␊
530	␊
531	=item write_if_needed($$)␊
532	␊
533	Like write, but if the PO or POT file already exists, the object will be␊
534	written in a temporary file which will be compared with the existing file␊
535	to check if the update is needed (this avoids to change a POT just to␊
536	update a line reference or the POT-Creation-Date field).␊
537	␊
538	=cut␊
539	␊
540	sub move_po_if_needed {␊
541	my ($new_po, $old_po, $backup) = (shift, shift, shift);␊
542	my $diff;␊
543	␊
544	if (-e $old_po) {␊
545	my $diff_ignore = "-I'^#:' "␊
546	."-I'^\"POT-Creation-Date:' "␊
547	."-I'^\"PO-Revision-Date:'";␊
548	$diff = qx(diff -q $diff_ignore $old_po $new_po);␊
549	if ( $diff eq "" ) {␊
550	unlink $new_po␊
551	or die wrap_msg(dgettext("po4a","Can't unlink %s: %s."),␊
552	$new_po, $!);␊
553	# touch the old PO␊
554	my ($atime, $mtime) = (time,time);␊
555	utime $atime, $mtime, $old_po;␊
556	} else {␊
557	move $new_po, $old_po␊
558	or die wrap_msg(dgettext("po4a","Can't move %s to %s: %s."),␊
559	$new_po, $old_po, $!);␊
560	}␊
561	} else {␊
562	move $new_po, $old_po␊
563	or die wrap_msg(dgettext("po4a","Can't move %s to %s: %s."),␊
564	$new_po, $old_po, $!);␊
565	}␊
566	}␊
567	␊
568	sub write_if_needed {␊
569	my $self=shift;␊
570	my $filename=shift␊
571	or croak dgettext("po4a","Can't write to a file without filename")."\n";␊
572	␊
573	if (-e $filename) {␊
574	my ($tmp_filename);␊
575	(undef,$tmp_filename)=File::Temp->tempfile($filename."XXXX",␊
576	DIR => "/tmp",␊
577	OPEN => 0,␊
578	UNLINK => 0);␊
579	$self->write($tmp_filename);␊
580	move_po_if_needed($tmp_filename, $filename);␊
581	} else {␊
582	$self->write($filename);␊
583	}␊
584	}␊
585	␊
586	=item gettextize($$)␊
587	␊
588	This function produces one translated message catalog from two catalogs, an␊
589	original and a translation. This process is described in L<po4a(7)\|po4a.7>,␊
590	section I<Gettextization: how does it work?>.␊
591	␊
592	=cut␊
593	␊
594	sub gettextize {␊
595	my $this = shift;␊
596	my $class = ref($this) \|\| $this;␊
597	my ($poorig,$potrans)=(shift,shift);␊
598	␊
599	my $pores=Locale::Po4a::Po->new();␊
600	␊
601	my $please_fail = 0;␊
602	my $toobad = dgettext("po4a",␊
603	"\nThe gettextization failed (once again). Don't give up, ".␊
604	"gettextizing is a subtle art, but this is only needed once ".␊
605	"to convert a project to the gorgeous luxus offered by po4a ".␊
606	"to translators.".␊
607	"\nPlease refer to the po4a(7) documentation, the section ".␊
608	"\"HOWTO convert a pre-existing translation to po4a?\" ".␊
609	"contains several hints to help you in your task");␊
610	␊
611	# Don't fail right now when the entry count does not match. Instead, give␊
612	# it a try so that the user can see where we fail (which is probably where␊
613	# the problem is).␊
614	if ($poorig->count_entries_doc() > $potrans->count_entries_doc()) {␊
615	warn wrap_mod("po4a gettextize", dgettext("po4a",␊
616	"Original has more strings than the translation (%d>%d). ".␊
617	"Please fix it by editing the translated version to add ".␊
618	"some dummy entry."),␊
619	$poorig->count_entries_doc(),␊
620	$potrans->count_entries_doc());␊
621	$please_fail = 1;␊
622	} elsif ($poorig->count_entries_doc() < $potrans->count_entries_doc()) {␊
623	warn wrap_mod("po4a gettextize", dgettext("po4a",␊
624	"Original has less strings than the translation (%d<%d). ".␊
625	"Please fix it by removing the extra entry from the ".␊
626	"translated file. You may need an addendum (cf po4a(7)) ".␊
627	"to reput the chunk in place after gettextization. A ".␊
628	"possible cause is that a text duplicated in the original ".␊
629	"is not translated the same way each time. Remove one of ".␊
630	"the translations, and you're fine."),␊
631	$poorig->count_entries_doc(),␊
632	$potrans->count_entries_doc());␊
633	$please_fail = 1;␊
634	}␊
635	␊
636	if ( $poorig->get_charset =~ /^utf-8$/i ) {␊
637	$potrans->to_utf8;␊
638	$pores->set_charset("UTF-8");␊
639	} else {␊
640	if ($potrans->get_charset eq "CHARSET") {␊
641	$pores->set_charset("ascii");␊
642	} else {␊
643	$pores->set_charset($potrans->get_charset);␊
644	}␊
645	}␊
646	print "Po character sets:\n".␊
647	" original=".$poorig->get_charset."\n".␊
648	" translated=".$potrans->get_charset."\n".␊
649	" result=".$pores->get_charset."\n"␊
650	if $debug{'encoding'};␊
651	␊
652	for (my ($o,$t)=(0,0) ;␊
653	$o<$poorig->count_entries_doc() && $t<$potrans->count_entries_doc();␊
654	$o++,$t++) {␊
655	#␊
656	# Extract some informations␊
657	␊
658	my ($orig,$trans)=($poorig->msgid_doc($o),$potrans->msgid_doc($t));␊
659	# print STDERR "Matches [[$orig]]<<$trans>>\n";␊
660	␊
661	my ($reforig,$reftrans)=($poorig->{po}{$orig}{'reference'},␊
662	$potrans->{po}{$trans}{'reference'});␊
663	my ($typeorig,$typetrans)=($poorig->{po}{$orig}{'type'},␊
664	$potrans->{po}{$trans}{'type'});␊
665	␊
666	#␊
667	# Make sure the type of both string exist␊
668	#␊
669	die wrap_mod("po4a gettextize",␊
670	"Internal error: type of original string number %s ".␊
671	"isn't provided", $o)␊
672	if ($typeorig eq '');␊
673	␊
674	die wrap_mod("po4a gettextize",␊
675	"Internal error: type of translated string number %s ".␊
676	"isn't provided", $o)␊
677	if ($typetrans eq '');␊
678	␊
679	#␊
680	# Make sure both type are the same␊
681	#␊
682	if ($typeorig ne $typetrans){␊
683	$pores->write("gettextization.failed.po");␊
684	eval {␊
685	# Recode $trans into current charset, if possible␊
686	require I18N::Langinfo;␊
687	I18N::Langinfo->import(qw(langinfo CODESET));␊
688	my $codeset = langinfo(CODESET());␊
689	Encode::from_to($trans, $potrans->get_charset, $codeset);␊
690	};␊
691	die wrap_msg(dgettext("po4a",␊
692	"po4a gettextization: Structure disparity between ".␊
693	"original and translated files:\n".␊
694	"msgid (at %s) is of type '%s' while\n".␊
695	"msgstr (at %s) is of type '%s'.\n".␊
696	"Original text: %s\n".␊
697	"Translated text: %s\n".␊
698	"(result so far dumped to gettextization.failed.po)").␊
699	"%s",␊
700	$reforig, $typeorig,␊
701	$reftrans, $typetrans,␊
702	$orig,␊
703	$trans,␊
704	$toobad);␊
705	}␊
706	␊
707	#␊
708	# Push the entry␊
709	#␊
710	my $flags;␊
711	if (defined $poorig->{po}{$orig}{'flags'}) {␊
712	$flags = $poorig->{po}{$orig}{'flags'}." fuzzy";␊
713	} else {␊
714	$flags = "fuzzy";␊
715	}␊
716	$pores->push_raw('msgid' => $orig,␊
717	'msgstr' => $trans,␊
718	'flags' => $flags,␊
719	'type' => $typeorig,␊
720	'reference' => $reforig,␊
721	'conflict' => 1,␊
722	'transref' => $potrans->{po}{$trans}{'reference'})␊
723	unless (defined($pores->{po}{$orig})␊
724	and ($pores->{po}{$orig}{'msgstr'} eq $trans))␊
725	# FIXME: maybe we should be smarter about what reference should be␊
726	# sent to push_raw.␊
727	}␊
728	␊
729	# make sure we return a useful error message when entry count differ␊
730	die "$toobad\n" if $please_fail;␊
731	␊
732	return $pores;␊
733	}␊
734	␊
735	=item filter($)␊
736	␊
737	This function extracts a catalog from an existing one. Only the entries having␊
738	a reference in the given file will be placed in the resulting catalog.␊
739	␊
740	This function parses its argument, converts it to a Perl function definition,␊
741	evals this definition and filters the fields for which this function returns␊
742	true.␊
743	␊
744	I love Perl sometimes ;)␊
745	␊
746	=cut␊
747	␊
748	sub filter {␊
749	my $self=shift;␊
750	our $filter=shift;␊
751	␊
752	my $res;␊
753	$res = Locale::Po4a::Po->new();␊
754	␊
755	# Parse the filter␊
756	our $code="sub apply { return ";␊
757	our $pos=0;␊
758	our $length = length $filter;␊
759	␊
760	# explode chars to parts. How to subscript a string in Perl?␊
761	our @filter = split(//,$filter);␊
762	␊
763	sub gloups {␊
764	my $fmt=shift;␊
765	my $space = "";␊
766	for (1..$pos){␊
767	$space .= ' ';␊
768	}␊
769	die wrap_msg("$fmt\n$filter\n$space^ HERE");␊
770	}␊
771	sub showmethecode {␊
772	return unless $debug{'filter'};␊
773	my $fmt=shift;␊
774	my $space="";␊
775	for (1..$pos){␊
776	$space .= ' ';␊
777	}␊
778	print STDERR "$filter\n$space^ $fmt\n";#"$code\n";␊
779	}␊
780	␊
781	# I dream of a lex in perl :-/␊
782	sub parse_expression {␊
783	showmethecode("Begin expression")␊
784	if $debug{'filter'};␊
785	␊
786	gloups("Begin of expression expected, got '%s'",$filter[$pos])␊
787	unless ($filter[$pos] eq '(');␊
788	$pos ++; # pass the '('␊
789	if ($filter[$pos] eq '&') {␊
790	# AND␊
791	$pos++;␊
792	showmethecode("Begin of AND")␊
793	if $debug{'filter'};␊
794	$code .= "(";␊
795	while (1) {␊
796	gloups ("Unfinished AND statement.")␊
797	if ($pos == $length);␊
798	parse_expression();␊
799	if ($filter[$pos] eq '(') {␊
800	$code .= " && ";␊
801	} elsif ($filter[$pos] eq ')') {␊
802	last; # do not eat that char␊
803	} else {␊
804	gloups("End of AND or begin of sub-expression expected, got '%s'", $filter[$pos]);␊
805	}␊
806	}␊
807	$code .= ")";␊
808	} elsif ($filter[$pos] eq '\|') {␊
809	# OR␊
810	$pos++;␊
811	$code .= "(";␊
812	while (1) {␊
813	gloups("Unfinished OR statement.")␊
814	if ($pos == $length);␊
815	parse_expression();␊
816	if ($filter[$pos] eq '(') {␊
817	$code .= " \|\| ";␊
818	} elsif ($filter[$pos] eq ')') {␊
819	last; # do not eat that char␊
820	} else {␊
821	gloups("End of OR or begin of sub-expression expected, got '%s'",$filter[$pos]);␊
822	}␊
823	}␊
824	$code .= ")";␊
825	} elsif ($filter[$pos] eq '!') {␊
826	# NOT␊
827	$pos++;␊
828	$code .= "(!";␊
829	gloups("Missing sub-expression in NOT statement.")␊
830	if ($pos == $length);␊
831	parse_expression();␊
832	$code .= ")";␊
833	} else {␊
834	# must be an equal. Let's get field and argument␊
835	my ($field,$arg,$done);␊
836	$field = substr($filter,$pos);␊
837	gloups("EQ statement contains no '=' or invalid field name")␊
838	unless ($field =~ /([a-z]*)=/i);␊
839	$field = lc($1);␊
840	$pos += (length $field) + 1;␊
841	␊
842	# check that we've got a valid field name,␊
843	# and the number it referes to␊
844	# DO NOT CHANGE THE ORDER␊
845	my @names=qw(msgid msgstr reference flags comment previous automatic);␊
846	my $fieldpos;␊
847	for ($fieldpos = 0;␊
848	$fieldpos < scalar @names && $field ne $names[$fieldpos];␊
849	$fieldpos++) {}␊
850	gloups("Invalid field name: %s",$field)␊
851	if $fieldpos == scalar @names; # not found␊
852	␊
853	# Now, get the argument value. It has to be between quotes,␊
854	# which can be escaped␊
855	# We point right on the first char of the argument␊
856	# (first quote already eaten)␊
857	my $escaped = 0;␊
858	my $quoted = 0;␊
859	if ($filter[$pos] eq '"') {␊
860	$pos++;␊
861	$quoted = 1;␊
862	}␊
863	showmethecode(($quoted?"Quoted":"Unquoted")." argument of field '$field'")␊
864	if $debug{'filter'};␊
865	␊
866	while (!$done) {␊
867	gloups("Unfinished EQ argument.")␊
868	if ($pos == $length);␊
869	␊
870	if ($quoted) {␊
871	if ($filter[$pos] eq '\\') {␊
872	if ($escaped) {␊
873	$arg .= '\\';␊
874	$escaped = 0;␊
875	} else {␊
876	$escaped = 1;␊
877	}␊
878	} elsif ($escaped) {␊
879	if ($filter[$pos] eq '"') {␊
880	$arg .= '"';␊
881	$escaped = 0;␊
882	} else {␊
883	gloups("Invalid escape sequence in argument: '\\%s'",$filter[$pos]);␊
884	}␊
885	} else {␊
886	if ($filter[$pos] eq '"') {␊
887	$done = 1;␊
888	} else {␊
889	$arg .= $filter[$pos];␊
890	}␊
891	}␊
892	} else {␊
893	if ($filter[$pos] eq ')') {␊
894	# counter the next ++ since we don't want to eat␊
895	# this char␊
896	$pos--;␊
897	$done = 1;␊
898	} else {␊
899	$arg .= $filter[$pos];␊
900	}␊
901	}␊
902	$pos++;␊
903	}␊
904	# and now, add the code to check this equality␊
905	$code .= "(\$_[$fieldpos] =~ m{$arg})";␊
906	␊
907	}␊
908	showmethecode("End of expression")␊
909	if $debug{'filter'};␊
910	gloups("Unfinished statement.")␊
911	if ($pos == $length);␊
912	gloups("End of expression expected, got '%s'",$filter[$pos])␊
913	unless ($filter[$pos] eq ')');␊
914	$pos++;␊
915	}␊
916	# And now, launch the beast, finish the function and use eval␊
917	# to construct this function.␊
918	# Ok, the lack of lexer is a fair price for the eval ;)␊
919	parse_expression();␊
920	gloups("Garbage at the end of the expression")␊
921	if ($pos != $length);␊
922	$code .= "; }";␊
923	print STDERR "CODE = $code\n"␊
924	if $debug{'filter'};␊
925	eval $code;␊
926	die wrap_mod("po4a::po", dgettext("po4a", "Eval failure: %s"), $@)␊
927	if $@;␊
928	␊
929	for (my $cpt=(0) ;␊
930	$cpt<$self->count_entries();␊
931	$cpt++) {␊
932	␊
933	my ($msgid,$ref,$msgstr,$flags,$type,$comment,$previous,$automatic);␊
934	␊
935	$msgid = $self->msgid($cpt);␊
936	$ref=$self->{po}{$msgid}{'reference'};␊
937	␊
938	$msgstr= $self->{po}{$msgid}{'msgstr'};␊
939	$flags = $self->{po}{$msgid}{'flags'};␊
940	$type = $self->{po}{$msgid}{'type'};␊
941	$comment = $self->{po}{$msgid}{'comment'};␊
942	$previous = $self->{po}{$msgid}{'previous'};␊
943	$automatic = $self->{po}{$msgid}{'automatic'};␊
944	␊
945	# DO NOT CHANGE THE ORDER␊
946	$res->push_raw('msgid' => $msgid,␊
947	'msgstr' => $msgstr,␊
948	'flags' => $flags,␊
949	'type' => $type,␊
950	'reference' => $ref,␊
951	'comment' => $comment,␊
952	'previous' => $previous,␊
953	'automatic' => $automatic)␊
954	if (apply($msgid,$msgstr,$ref,$flags,$comment,$previous,$automatic));␊
955	}␊
956	# delete the apply subroutine␊
957	# otherwise it will be redefined.␊
958	undef &apply;␊
959	return $res;␊
960	}␊
961	␊
962	=item to_utf8()␊
963	␊
964	Recodes to UTF-8 the PO's msgstrs. Does nothing if the charset is not␊
965	specified in the PO file ("CHARSET" value), or if it's already UTF-8 or␊
966	ASCII.␊
967	␊
968	=cut␊
969	␊
970	sub to_utf8 {␊
971	my $this = shift;␊
972	my $charset = $this->get_charset();␊
973	␊
974	unless ($charset eq "CHARSET" or␊
975	$charset =~ /^ascii$/i or␊
976	$charset =~ /^utf-8$/i) {␊
977	foreach my $msgid ( keys %{$this->{po}} ) {␊
978	Encode::from_to($this->{po}{$msgid}{'msgstr'}, $charset, "utf-8");␊
979	}␊
980	$this->set_charset("UTF-8");␊
981	}␊
982	}␊
983	␊
984	=back␊
985	␊
986	=head1 Functions to use a message catalog for translations␊
987	␊
988	=over 4␊
989	␊
990	=item gettext($%)␊
991	␊
992	Request the translation of the string given as argument in the current catalog.␊
993	The function returns the original (untranslated) string if the string was not␊
994	found.␊
995	␊
996	After the string to translate, you can pass a hash of extra␊
997	arguments. Here are the valid entries:␊
998	␊
999	=over␊
1000	␊
1001	=item B<wrap>␊
1002	␊
1003	boolean indicating whether we can consider that whitespaces in string are␊
1004	not important. If yes, the function canonizes the string before looking for␊
1005	a translation, and wraps the result.␊
1006	␊
1007	=item B<wrapcol>␊
1008	␊
1009	the column at which we should wrap (default: 76).␊
1010	␊
1011	=back␊
1012	␊
1013	=cut␊
1014	␊
1015	sub gettext {␊
1016	my $self=shift;␊
1017	my $text=shift;␊
1018	my (%opt)=@_;␊
1019	my $res;␊
1020	␊
1021	return "" unless defined($text) && length($text); # Avoid returning the header.␊
1022	my $validoption="reference wrap wrapcol";␊
1023	my %validoption;␊
1024	␊
1025	map { $validoption{$_}=1 } (split(/ /,$validoption));␊
1026	foreach (keys %opt) {␊
1027	Carp::confess "internal error: unknown arg $_.\n".␊
1028	"Here are the valid options: $validoption.\n"␊
1029	unless $validoption{$_};␊
1030	}␊
1031	␊
1032	$text=canonize($text)␊
1033	if ($opt{'wrap'});␊
1034	␊
1035	my $esc_text=escape_text($text);␊
1036	␊
1037	$self->{gettextqueries}++;␊
1038	␊
1039	if ( defined $self->{po}{$esc_text}␊
1040	and defined $self->{po}{$esc_text}{'msgstr'}␊
1041	and length $self->{po}{$esc_text}{'msgstr'}␊
1042	and ( not defined $self->{po}{$esc_text}{'flags'}␊
1043	or $self->{po}{$esc_text}{'flags'} !~ /fuzzy/)) {␊
1044	␊
1045	$self->{gettexthits}++;␊
1046	$res = unescape_text($self->{po}{$esc_text}{'msgstr'});␊
1047	if (defined $self->{po}{$esc_text}{'plural'}) {␊
1048	if ($self->{po}{$esc_text}{'plural'} eq "0") {␊
1049	warn wrap_mod("po4a gettextize", dgettext("po4a",␊
1050	"'%s' is the singular form of a message, ".␊
1051	"po4a will use the msgstr[0] translation (%s)."),␊
1052	$esc_text, $res);␊
1053	} else {␊
1054	warn wrap_mod("po4a gettextize", dgettext("po4a",␊
1055	"'%s' is the plural form of a message, ".␊
1056	"po4a will use the msgstr[1] translation (%s)."),␊
1057	$esc_text, $res);␊
1058	}␊
1059	}␊
1060	} else {␊
1061	$res = $text;␊
1062	}␊
1063	␊
1064	if ($opt{'wrap'}) {␊
1065	if ($self->get_charset =~ /^utf-8$/i) {␊
1066	$res=Encode::decode_utf8($res);␊
1067	$res=wrap ($res, $opt{'wrapcol'} \|\| 76);␊
1068	$res=Encode::encode_utf8($res);␊
1069	} else {␊
1070	$res=wrap ($res, $opt{'wrapcol'} \|\| 76);␊
1071	}␊
1072	}␊
1073	# print STDERR "Gettext >>>$text<<<(escaped=$esc_text)=[[[$res]]]\n\n";␊
1074	return $res;␊
1075	}␊
1076	␊
1077	=item stats_get()␊
1078	␊
1079	Returns statistics about the hit ratio of gettext since the last time that␊
1080	stats_clear() was called. Please note that it's not the same␊
1081	statistics than the one printed by msgfmt --statistic. Here, it's statistics␊
1082	about recent usage of the PO file, while msgfmt reports the status of the␊
1083	file. Example of use:␊
1084	␊
1085	[some use of the PO file to translate stuff]␊
1086	␊
1087	($percent,$hit,$queries) = $pofile->stats_get();␊
1088	print "So far, we found translations for $percent\% ($hit of $queries) of strings.\n";␊
1089	␊
1090	=cut␊
1091	␊
1092	sub stats_get() {␊
1093	my $self=shift;␊
1094	my ($h,$q)=($self->{gettexthits},$self->{gettextqueries});␊
1095	my $p = ($q == 0 ? 100 : int($h/$q*10000)/100);␊
1096	␊
1097	# $p =~ s/\.00//;␊
1098	# $p =~ s/(\..)0/$1/;␊
1099	␊
1100	return ( $p,$h,$q );␊
1101	}␊
1102	␊
1103	=item stats_clear()␊
1104	␊
1105	Clears the statistics about gettext hits.␊
1106	␊
1107	=cut␊
1108	␊
1109	sub stats_clear {␊
1110	my $self = shift;␊
1111	$self->{gettextqueries} = 0;␊
1112	$self->{gettexthits} = 0;␊
1113	}␊
1114	␊
1115	=back␊
1116	␊
1117	=head1 Functions to build a message catalog␊
1118	␊
1119	=over 4␊
1120	␊
1121	=item push(%)␊
1122	␊
1123	Push a new entry at the end of the current catalog. The arguments should␊
1124	form a hash table. The valid keys are:␊
1125	␊
1126	=over 4␊
1127	␊
1128	=item B<msgid>␊
1129	␊
1130	the string in original language.␊
1131	␊
1132	=item B<msgstr>␊
1133	␊
1134	the translation.␊
1135	␊
1136	=item B<reference>␊
1137	␊
1138	an indication of where this string was found. Example: file.c:46 (meaning␊
1139	in 'file.c' at line 46). It can be a space-separated list in case of␊
1140	multiple occurrences.␊
1141	␊
1142	=item B<comment>␊
1143	␊
1144	a comment added here manually (by the translators). The format here is free.␊
1145	␊
1146	=item B<automatic>␊
1147	␊
1148	a comment which was automatically added by the string extraction␊
1149	program. See the B<--add-comments> option of the B<xgettext> program for␊
1150	more information.␊
1151	␊
1152	=item B<flags>␊
1153	␊
1154	space-separated list of all defined flags for this entry.␊
1155	␊
1156	Valid flags are: B<c-text>, B<python-text>, B<lisp-text>, B<elisp-text>, B<librep-text>,␊
1157	B<smalltalk-text>, B<java-text>, B<awk-text>, B<object-pascal-text>, B<ycp-text>,␊
1158	B<tcl-text>, B<wrap>, B<no-wrap> and B<fuzzy>.␊
1159	␊
1160	See the gettext documentation for their meaning.␊
1161	␊
1162	=item B<type>␊
1163	␊
1164	this is mostly an internal argument: it is used while gettextizing␊
1165	documents. The idea here is to parse both the original and the translation␊
1166	into a PO object, and merge them, using one's msgid as msgid and the␊
1167	other's msgid as msgstr. To make sure that things get ok, each msgid in PO␊
1168	objects are given a type, based on their structure (like "chapt", "sect1",␊
1169	"p" and so on in DocBook). If the types of strings are not the same, that␊
1170	means that both files do not share the same structure, and the process␊
1171	reports an error.␊
1172	␊
1173	This information is written as automatic comment in the PO file since this␊
1174	gives to translators some context about the strings to translate.␊
1175	␊
1176	=item B<wrap>␊
1177	␊
1178	boolean indicating whether whitespaces can be mangled in cosmetic␊
1179	reformattings. If true, the string is canonized before use.␊
1180	␊
1181	This information is written to the PO file using the B<wrap> or B<no-wrap> flag.␊
1182	␊
1183	=item B<wrapcol>␊
1184	␊
1185	the column at which we should wrap (default: 76).␊
1186	␊
1187	This information is not written to the PO file.␊
1188	␊
1189	=back␊
1190	␊
1191	=cut␊
1192	␊
1193	sub push {␊
1194	my $self=shift;␊
1195	my %entry=@_;␊
1196	␊
1197	my $validoption="wrap wrapcol type msgid msgstr automatic previous flags reference";␊
1198	my %validoption;␊
1199	␊
1200	map { $validoption{$_}=1 } (split(/ /,$validoption));␊
1201	foreach (keys %entry) {␊
1202	Carp::confess "internal error: unknown arg $_.\n".␊
1203	"Here are the valid options: $validoption.\n"␊
1204	unless $validoption{$_};␊
1205	}␊
1206	␊
1207	unless ($entry{'wrap'}) {␊
1208	$entry{'flags'} .= " no-wrap";␊
1209	}␊
1210	if (defined ($entry{'msgid'})) {␊
1211	$entry{'msgid'} = canonize($entry{'msgid'})␊
1212	if ($entry{'wrap'});␊
1213	␊
1214	$entry{'msgid'} = escape_text($entry{'msgid'});␊
1215	}␊
1216	if (defined ($entry{'msgstr'})) {␊
1217	$entry{'msgstr'} = canonize($entry{'msgstr'})␊
1218	if ($entry{'wrap'});␊
1219	␊
1220	$entry{'msgstr'} = escape_text($entry{'msgstr'});␊
1221	}␊
1222	␊
1223	$self->push_raw(%entry);␊
1224	}␊
1225	␊
1226	# The same as push(), but assuming that msgid and msgstr are already escaped␊
1227	sub push_raw {␊
1228	my $self=shift;␊
1229	my %entry=@_;␊
1230	my ($msgid,$msgstr,$reference,$comment,$automatic,$previous,$flags,$type,$transref)=␊
1231	($entry{'msgid'},$entry{'msgstr'},␊
1232	$entry{'reference'},$entry{'comment'},$entry{'automatic'},␊
1233	$entry{'previous'},$entry{'flags'},$entry{'type'},$entry{'transref'});␊
1234	my $keep_conflict = $entry{'conflict'};␊
1235	␊
1236	# print STDERR "Push_raw\n";␊
1237	# print STDERR " msgid=>>>$msgid<<<\n" if $msgid;␊
1238	# print STDERR " msgstr=[[[$msgstr]]]\n" if $msgstr;␊
1239	# Carp::cluck " flags=$flags\n" if $flags;␊
1240	␊
1241	return unless defined($entry{'msgid'});␊
1242	␊
1243	#no msgid => header definition␊
1244	unless (length($entry{'msgid'})) {␊
1245	# if (defined($self->{header}) && $self->{header} =~ /\S/) {␊
1246	# warn dgettext("po4a","Redefinition of the header. ".␊
1247	# "The old one will be discarded\n");␊
1248	# } FIXME: do that iff the header isn't the default one.␊
1249	$self->{header}=$msgstr;␊
1250	$self->{header_comment}=$comment;␊
1251	my $charset = $self->get_charset;␊
1252	if ($charset ne "CHARSET") {␊
1253	$self->{encoder}=find_encoding($charset);␊
1254	} else {␊
1255	$self->{encoder}=find_encoding("ascii");␊
1256	}␊
1257	return;␊
1258	}␊
1259	␊
1260	if ($self->{options}{'porefs'} =~ m/^none/) {␊
1261	$reference = "";␊
1262	} elsif ($self->{options}{'porefs'} =~ m/^counter/) {␊
1263	if ($reference =~ m/^(.+?)(?=\S+:\d+)/g) {␊
1264	my $new_ref = $1;␊
1265	1 while $reference =~ s{ # x modifier is added to add formatting and improve readability␊
1266	\G(\s*)(\S+):\d+ # \G is the last match in m//g (see also the (?=) syntax above)␊
1267	# $2 is the file name␊
1268	}{␊
1269	$self->{counter}{$2} \|\|= 0, # each file has its own counter␊
1270	++$self->{counter}{$2}, # increment it␊
1271	$new_ref .= "$1$2:".$self->{counter}{$2} # replace line number by this counter␊
1272	}gex && pos($reference);␊
1273	$reference = $new_ref;␊
1274	}␊
1275	} elsif ($self->{options}{'porefs'} =~ m/^noline/) {␊
1276	$reference =~ s/:\d+/:1/g;␊
1277	}␊
1278	␊
1279	if (defined($self->{po}{$msgid})) {␊
1280	warn wrap_mod("po4a::po",␊
1281	dgettext("po4a","msgid defined twice: %s"),␊
1282	$msgid)␊
1283	if (0); # FIXME: put a verbose stuff␊
1284	if ( defined $msgstr␊
1285	and defined $self->{po}{$msgid}{'msgstr'}␊
1286	and $self->{po}{$msgid}{'msgstr'} ne $msgstr) {␊
1287	my $txt=quote_text($msgid);␊
1288	my ($first,$second)=␊
1289	(format_comment(". ",$self->{po}{$msgid}{'reference'}).␊
1290	quote_text($self->{po}{$msgid}{'msgstr'}),␊
1291	␊
1292	format_comment(". ",$reference).␊
1293	quote_text($msgstr));␊
1294	␊
1295	if ($keep_conflict) {␊
1296	if ($self->{po}{$msgid}{'msgstr'} =~ m/^#-#-#-#-# .* #-#-#-#-#\\n/s) {␊
1297	$msgstr = $self->{po}{$msgid}{'msgstr'}.␊
1298	"\\n#-#-#-#-# $transref #-#-#-#-#\\n".␊
1299	$msgstr;␊
1300	} else {␊
1301	$msgstr = "#-#-#-#-# ".␊
1302	$self->{po}{$msgid}{'transref'}.␊
1303	" #-#-#-#-#\\n".␊
1304	$self->{po}{$msgid}{'msgstr'}."\\n".␊
1305	"#-#-#-#-# $transref #-#-#-#-#\\n".␊
1306	$msgstr;␊
1307	}␊
1308	# Every msgid will have the same list of references.␊
1309	# Only keep the last list.␊
1310	$self->{po}{$msgid}{'reference'} = "";␊
1311	} else {␊
1312	warn wrap_msg(dgettext("po4a",␊
1313	"Translations don't match for:\n".␊
1314	"%s\n".␊
1315	"-->First translation:\n".␊
1316	"%s\n".␊
1317	" Second translation:\n".␊
1318	"%s\n".␊
1319	" Old translation discarded."),␊
1320	$txt,$first,$second);␊
1321	}␊
1322	}␊
1323	}␊
1324	if (defined $transref) {␊
1325	$self->{po}{$msgid}{'transref'} = $transref;␊
1326	}␊
1327	if (defined($reference) && length($reference)) {␊
1328	if (defined $self->{po}{$msgid}{'reference'}) {␊
1329	$self->{po}{$msgid}{'reference'} .= " ".$reference;␊
1330	} else {␊
1331	$self->{po}{$msgid}{'reference'} = $reference;␊
1332	}␊
1333	}␊
1334	$self->{po}{$msgid}{'msgstr'} = $msgstr;␊
1335	$self->{po}{$msgid}{'comment'} = $comment;␊
1336	$self->{po}{$msgid}{'automatic'} = $automatic;␊
1337	$self->{po}{$msgid}{'previous'} = $previous;␊
1338	if (defined($self->{po}{$msgid}{'pos_doc'})) {␊
1339	$self->{po}{$msgid}{'pos_doc'} .= " ".$self->{count_doc}++;␊
1340	} else {␊
1341	$self->{po}{$msgid}{'pos_doc'} = $self->{count_doc}++;␊
1342	}␊
1343	unless (defined($self->{po}{$msgid}{'pos'})) {␊
1344	$self->{po}{$msgid}{'pos'} = $self->{count}++;␊
1345	}␊
1346	$self->{po}{$msgid}{'type'} = $type;␊
1347	$self->{po}{$msgid}{'plural'} = $entry{'plural'}␊
1348	if defined $entry{'plural'};␊
1349	␊
1350	if (defined($flags)) {␊
1351	$flags = " $flags ";␊
1352	$flags =~ s/,/ /g;␊
1353	foreach my $flag (@known_flags) {␊
1354	if ($flags =~ /\s$flag\s/) { # if flag to be set␊
1355	unless ( defined($self->{po}{$msgid}{'flags'})␊
1356	&& $self->{po}{$msgid}{'flags'} =~ /\b$flag\b/) {␊
1357	# flag not already set␊
1358	if (defined $self->{po}{$msgid}{'flags'}) {␊
1359	$self->{po}{$msgid}{'flags'} .= " ".$flag;␊
1360	} else {␊
1361	$self->{po}{$msgid}{'flags'} = $flag;␊
1362	}␊
1363	}␊
1364	}␊
1365	}␊
1366	}␊
1367	# print STDERR "stored ((($msgid)))=>(((".$self->{po}{$msgid}{'msgstr'}.")))\n\n";␊
1368	␊
1369	}␊
1370	␊
1371	=back␊
1372	␊
1373	=head1 Miscellaneous functions␊
1374	␊
1375	=over 4␊
1376	␊
1377	=item count_entries()␊
1378	␊
1379	Returns the number of entries in the catalog (without the header).␊
1380	␊
1381	=cut␊
1382	␊
1383	sub count_entries($) {␊
1384	my $self=shift;␊
1385	return $self->{count};␊
1386	}␊
1387	␊
1388	=item count_entries_doc()␊
1389	␊
1390	Returns the number of entries in document. If a string appears multiple times␊
1391	in the document, it will be counted multiple times␊
1392	␊
1393	=cut␊
1394	␊
1395	sub count_entries_doc($) {␊
1396	my $self=shift;␊
1397	return $self->{count_doc};␊
1398	}␊
1399	␊
1400	=item msgid($)␊
1401	␊
1402	Returns the msgid of the given number.␊
1403	␊
1404	=cut␊
1405	␊
1406	sub msgid($$) {␊
1407	my $self=shift;␊
1408	my $num=shift;␊
1409	␊
1410	foreach my $msgid ( keys %{$self->{po}} ) {␊
1411	return $msgid if ($self->{po}{$msgid}{'pos'} eq $num);␊
1412	}␊
1413	return undef;␊
1414	}␊
1415	␊
1416	=item msgid_doc($)␊
1417	␊
1418	Returns the msgid with the given position in the document.␊
1419	␊
1420	=cut␊
1421	␊
1422	sub msgid_doc($$) {␊
1423	my $self=shift;␊
1424	my $num=shift;␊
1425	␊
1426	foreach my $msgid ( keys %{$self->{po}} ) {␊
1427	foreach my $pos (split / /, $self->{po}{$msgid}{'pos_doc'}) {␊
1428	return $msgid if ($pos eq $num);␊
1429	}␊
1430	}␊
1431	return undef;␊
1432	}␊
1433	␊
1434	=item get_charset()␊
1435	␊
1436	Returns the character set specified in the PO header. If it hasn't been␊
1437	set, it will return "CHARSET".␊
1438	␊
1439	=cut␊
1440	␊
1441	sub get_charset() {␊
1442	my $self=shift;␊
1443	␊
1444	$self->{header} =~ /charset=(.*?)[\s\\]/;␊
1445	␊
1446	if (defined $1) {␊
1447	return $1;␊
1448	} else {␊
1449	return "CHARSET";␊
1450	}␊
1451	}␊
1452	␊
1453	=item set_charset($)␊
1454	␊
1455	This sets the character set of the PO header to the value specified in its␊
1456	first argument. If you never call this function (and no file with a specified␊
1457	character set is read), the default value is left to "CHARSET". This value␊
1458	doesn't change the behavior of this module, it's just used to fill that field␊
1459	in the header, and to return it in get_charset().␊
1460	␊
1461	=cut␊
1462	␊
1463	sub set_charset() {␊
1464	my $self=shift;␊
1465	␊
1466	my ($newchar,$oldchar);␊
1467	$newchar = shift;␊
1468	$oldchar = $self->get_charset();␊
1469	␊
1470	$self->{header} =~ s/$oldchar/$newchar/;␊
1471	$self->{encoder}=find_encoding($newchar);␊
1472	}␊
1473	␊
1474	#----[ helper functions ]---------------------------------------------------␊
1475	␊
1476	# transforme the string from its PO file representation to the form which␊
1477	# should be used to print it␊
1478	sub unescape_text {␊
1479	my $text = shift;␊
1480	␊
1481	print STDERR "\nunescape [$text]====" if $debug{'escape'};␊
1482	$text = join("",split(/\n/,$text));␊
1483	$text =~ s/\\"/"/g;␊
1484	# unescape newlines␊
1485	# NOTE on \G:␊
1486	# The following regular expression introduce newlines.␊
1487	# Thus, ^ doesn't match all beginnings of lines.␊
1488	# \G is a zero-width assertion that matches the position␊
1489	# of the previous substitution with s///g. As every␊
1490	# substitution ends by a newline, it always matches a␊
1491	# position just after a newline.␊
1492	$text =~ s/( # $1:␊
1493	(\G\|[^\\]) # beginning of the line or any char␊
1494	# different from '\'␊
1495	(\\\\)* # followed by any even number of '\'␊
1496	)\\n # and followed by an escaped newline␊
1497	/$1\n/sgx; # single string, match globally, allow comments␊
1498	# unescape carriage returns␊
1499	$text =~ s/( # $1:␊
1500	(\G\|[^\\]) # beginning of the line or any char␊
1501	# different from '\'␊
1502	(\\\\)* # followed by any even number of '\'␊
1503	)\\r # and followed by an escaped carriage return␊
1504	/$1\r/sgx; # single string, match globally, allow comments␊
1505	# unescape tabulations␊
1506	$text =~ s/( # $1:␊
1507	(\G\|[^\\])# beginning of the line or any char␊
1508	# different from '\'␊
1509	(\\\\)* # followed by any even number of '\'␊
1510	)\\t # and followed by an escaped tabulation␊
1511	/$1\t/mgx; # multilines string, match globally, allow comments␊
1512	# and unescape the escape character␊
1513	$text =~ s/\\\\/\\/g;␊
1514	print STDERR ">$text<\n" if $debug{'escape'};␊
1515	␊
1516	return $text;␊
1517	}␊
1518	␊
1519	# transform the string to its representation as it should be written in PO␊
1520	# files␊
1521	sub escape_text {␊
1522	my $text = shift;␊
1523	␊
1524	print STDERR "\nescape [$text]====" if $debug{'escape'};␊
1525	$text =~ s/\\/\\\\/g;␊
1526	$text =~ s/"/\\"/g;␊
1527	$text =~ s/\n/\\n/g;␊
1528	$text =~ s/\r/\\r/g;␊
1529	$text =~ s/\t/\\t/g;␊
1530	print STDERR ">$text<\n" if $debug{'escape'};␊
1531	␊
1532	return $text;␊
1533	}␊
1534	␊
1535	# put quotes around the string on each lines (without escaping it)␊
1536	# It does also normalize the text (ie, make sure its representation is wraped␊
1537	# on the 80th char, but without changing the meaning of the string)␊
1538	sub quote_text {␊
1539	my $string = shift;␊
1540	␊
1541	return '""' unless defined($string) && length($string);␊
1542	␊
1543	print STDERR "\nquote [$string]====" if $debug{'quote'};␊
1544	# break lines on newlines, if any␊
1545	# see unescape_text for an explanation on \G␊
1546	$string =~ s/( # $1:␊
1547	(\G\|[^\\]) # beginning of the line or any char␊
1548	# different from '\'␊
1549	(\\\\)* # followed by any even number of '\'␊
1550	\\n) # and followed by an escaped newline␊
1551	/$1\n/sgx; # single string, match globally, allow comments␊
1552	$string = wrap($string);␊
1553	my @string = split(/\n/,$string);␊
1554	$string = join ("\"\n\"",@string);␊
1555	$string = "\"$string\"";␊
1556	if (scalar @string > 1 && $string[0] ne '') {␊
1557	$string = "\"\"\n".$string;␊
1558	}␊
1559	␊
1560	print STDERR ">$string<\n" if $debug{'quote'};␊
1561	return $string;␊
1562	}␊
1563	␊
1564	# undo the work of the quote_text function␊
1565	sub unquote_text {␊
1566	my $string = shift;␊
1567	print STDERR "\nunquote [$string]====" if $debug{'quote'};␊
1568	$string =~ s/^""\\n//s;␊
1569	$string =~ s/^"(.*)"$/$1/s;␊
1570	$string =~ s/"\n"//gm;␊
1571	# Note: an even number of '\' could precede \\n, but I could not build a␊
1572	# document to test this␊
1573	$string =~ s/([^\\])\\n\n/$1!!DUMMYPOPM!!/gm;␊
1574	$string =~ s\|!!DUMMYPOPM!!\|\\n\|gm;␊
1575	print STDERR ">$string<\n" if $debug{'quote'};␊
1576	return $string;␊
1577	}␊
1578	␊
1579	# canonize the string: write it on only one line, changing consecutive␊
1580	# whitespace to only one space.␊
1581	# Warning, it changes the string and should only be called if the string is␊
1582	# plain text␊
1583	sub canonize {␊
1584	my $text=shift;␊
1585	print STDERR "\ncanonize [$text]====" if $debug{'canonize'};␊
1586	$text =~ s/^ *//s;␊
1587	$text =~ s/^[ \t]+/ /gm;␊
1588	# if ($text eq "\n"), it messed up the first string (header)␊
1589	$text =~ s/\n/ /gm if ($text ne "\n");␊
1590	$text =~ s/([.)]) +/$1 /gm;␊
1591	$text =~ s/([^.)]) */$1 /gm;␊
1592	$text =~ s/ *$//s;␊
1593	print STDERR ">$text<\n" if $debug{'canonize'};␊
1594	return $text;␊
1595	}␊
1596	␊
1597	# wraps the string. We don't use Text::Wrap since it mangles whitespace at␊
1598	# the end of splited line␊
1599	sub wrap {␊
1600	my $text=shift;␊
1601	return "0" if ($text eq '0');␊
1602	my $col=shift \|\| 76;␊
1603	my @lines=split(/\n/,"$text");␊
1604	my $res="";␊
1605	my $first=1;␊
1606	while (defined(my $line=shift @lines)) {␊
1607	if ($first && length($line) > $col - 10) {␊
1608	unshift @lines,$line;␊
1609	$first=0;␊
1610	next;␊
1611	}␊
1612	if (length($line) > $col) {␊
1613	my $pos=rindex($line," ",$col);␊
1614	while (substr($line,$pos-1,1) eq '.' && $pos != -1) {␊
1615	$pos=rindex($line," ",$pos-1);␊
1616	}␊
1617	if ($pos == -1) {␊
1618	# There are no spaces in the first $col chars, pick-up the␊
1619	# first space␊
1620	$pos = index($line," ");␊
1621	}␊
1622	if ($pos != -1) {␊
1623	my $end=substr($line,$pos+1);␊
1624	$line=substr($line,0,$pos+1);␊
1625	if ($end =~ s/^( +)//) {␊
1626	$line .= $1;␊
1627	}␊
1628	unshift @lines,$end;␊
1629	}␊
1630	}␊
1631	$first=0;␊
1632	$res.="$line\n";␊
1633	}␊
1634	# Restore the original trailing spaces␊
1635	$res =~ s/\s+$//s;␊
1636	if ($text =~ m/(\s+)$/s) {␊
1637	$res .= $1;␊
1638	}␊
1639	return $res;␊
1640	}␊
1641	␊
1642	# outputs properly a '# ... ' line to be put in the PO file␊
1643	sub format_comment {␊
1644	my $comment=shift;␊
1645	my $char=shift;␊
1646	my $result = "#". $char . $comment;␊
1647	$result =~ s/\n/\n#$char/gs;␊
1648	$result =~ s/^#$char$/#/gm;␊
1649	$result .= "\n";␊
1650	return $result;␊
1651	}␊
1652	␊
1653	␊
1654	1;␊
1655	__END__␊
1656	␊
1657	=back␊
1658	␊
1659	=head1 AUTHORS␊
1660	␊
1661	Denis Barbier <barbier@linuxfr.org>␊
1662	Martin Quinson (mquinson#debian.org)␊
1663	␊
1664	=cut␊
1665

Download this file