branches/Chimera/package/bin/po4a/lib/Locale/Po4a/Po.pm - Chameleon Svn Source Tree - Chameleon open source boot loader project.

Root/branches/Chimera/package/bin/po4a/lib/Locale/Po4a/Po.pm

1	# Locale::Po4a::Po -- manipulation of PO files␊
2	#␊
3	# This program is free software; you may redistribute it and/or modify it␊
4	# under the terms of GPL (see COPYING).␊
5	␊
6	############################################################################␊
7	# Modules and declarations␊
8	############################################################################␊
9	␊
10	=encoding UTF-8␊
11	␊
12	=head1 NAME␊
13	␊
14	Locale::Po4a::Po - PO file manipulation module␊
15	␊
16	=head1 SYNOPSIS␊
17	␊
18	use Locale::Po4a::Po;␊
19	my $pofile=Locale::Po4a::Po->new();␊
20	␊
21	# Read PO file␊
22	$pofile->read('file.po');␊
23	␊
24	# Add an entry␊
25	$pofile->push('msgid' => 'Hello', 'msgstr' => 'bonjour',␊
26	'flags' => "wrap", 'reference'=>'file.c:46');␊
27	␊
28	# Extract a translation␊
29	$pofile->gettext("Hello"); # returns 'bonjour'␊
30	␊
31	# Write back to a file␊
32	$pofile->write('otherfile.po');␊
33	␊
34	=head1 DESCRIPTION␊
35	␊
36	Locale::Po4a::Po is a module that allows you to manipulate message␊
37	catalogs. You can load and write from/to a file (which extension is often␊
38	I<po>), you can build new entries on the fly or request for the translation␊
39	of a string.␊
40	␊
41	For a more complete description of message catalogs in the PO format and␊
42	their use, please refer to the documentation of the gettext program.␊
43	␊
44	This module is part of the po4a project, which objective is to use PO files␊
45	(designed at origin to ease the translation of program messages) to␊
46	translate everything, including documentation (man page, info manual),␊
47	package description, debconf templates, and everything which may benefit␊
48	from this.␊
49	␊
50	=head1 OPTIONS ACCEPTED BY THIS MODULE␊
51	␊
52	=over 4␊
53	␊
54	=item B<porefs>␊
55	␊
56	This specifies the reference format. It can be one of B<none> to not produce␊
57	any reference, B<noline> to not specify the line number, and B<full> to␊
58	include complete references.␊
59	␊
60	=back␊
61	␊
62	=cut␊
63	␊
64	use IO::File;␊
65	␊
66	␊
67	require Exporter;␊
68	␊
69	package Locale::Po4a::Po;␊
70	use DynaLoader;␊
71	␊
72	use Locale::Po4a::Common qw(wrap_msg wrap_mod wrap_ref_mod dgettext);␊
73	␊
74	use subs qw(makespace);␊
75	use vars qw(@ISA @EXPORT_OK);␊
76	@ISA = qw(Exporter DynaLoader);␊
77	@EXPORT = qw(%debug);␊
78	@EXPORT_OK = qw(&move_po_if_needed);␊
79	␊
80	use Locale::Po4a::TransTractor;␊
81	# Try to use a C extension if present.␊
82	eval("bootstrap Locale::Po4a::Po $Locale::Po4a::TransTractor::VERSION");␊
83	␊
84	use 5.006;␊
85	use strict;␊
86	use warnings;␊
87	␊
88	use Carp qw(croak);␊
89	use File::Basename;␊
90	use File::Path; # mkdir before write␊
91	use File::Copy; # move␊
92	use POSIX qw(strftime floor);␊
93	use Time::Local;␊
94	␊
95	use Encode;␊
96	␊
97	my @known_flags=qw(wrap no-wrap c-format fuzzy);␊
98	␊
99	our %debug=('canonize' => 0,␊
100	'quote' => 0,␊
101	'escape' => 0,␊
102	'encoding' => 0,␊
103	'filter' => 0);␊
104	␊
105	=head1 Functions about whole message catalogs␊
106	␊
107	=over 4␊
108	␊
109	=item new()␊
110	␊
111	Creates a new message catalog. If an argument is provided, it's the name of␊
112	a PO file we should load.␊
113	␊
114	=cut␊
115	␊
116	sub new {␊
117	my ($this, $options) = (shift, shift);␊
118	my $class = ref($this) \|\| $this;␊
119	my $self = {};␊
120	bless $self, $class;␊
121	$self->initialize($options);␊
122	␊
123	my $filename = shift;␊
124	$self->read($filename) if defined($filename) && length($filename);␊
125	return $self;␊
126	}␊
127	␊
128	# Return the numerical timezone (e.g. +0200)␊
129	# Neither the %z nor the %s formats of strftime are portable:␊
130	# '%s' is not supported on Solaris and '%z' indicates␊
131	# "2006-10-25 19:36E. Europe Standard Time" on MS Windows.␊
132	sub timezone {␊
133	my @g = gmtime();␊
134	my @l = localtime();␊
135	␊
136	my $diff;␊
137	$diff = floor(timelocal(@l)/60 +0.5);␊
138	$diff -= floor(timelocal(@g)/60 +0.5);␊
139	␊
140	my $h = floor($diff / 60) + $l[8]; # $l[8] indicates if we are currently␊
141	# in a daylight saving time zone␊
142	my $m = $diff%60;␊
143	␊
144	return sprintf "%+03d%02d\n", $h, $m;␊
145	}␊
146	␊
147	sub initialize {␊
148	my ($self, $options) = (shift, shift);␊
149	my $date = strftime("%Y-%m-%d %H:%M", localtime).timezone();␊
150	chomp $date;␊
151	# $options = ref($options) \|\| $options;␊
152	␊
153	$self->{options}{'porefs'}= 'full';␊
154	$self->{options}{'msgid-bugs-address'}= undef;␊
155	$self->{options}{'copyright-holder'}= "Free Software Foundation, Inc.";␊
156	$self->{options}{'package-name'}= "PACKAGE";␊
157	$self->{options}{'package-version'}= "VERSION";␊
158	foreach my $opt (keys %$options) {␊
159	if ($options->{$opt}) {␊
160	die wrap_mod("po4a::po",␊
161	dgettext ("po4a", "Unknown option: %s"), $opt)␊
162	unless exists $self->{options}{$opt};␊
163	$self->{options}{$opt} = $options->{$opt};␊
164	}␊
165	}␊
166	$self->{options}{'porefs'} =~ /^(full\|noline\|none)$/ \|\|␊
167	die wrap_mod("po4a::po",␊
168	dgettext ("po4a",␊
169	"Invalid value for option 'porefs' ('%s' is ".␊
170	"not one of 'full', 'noline' or 'none')"),␊
171	$self->{options}{'porefs'});␊
172	␊
173	$self->{po}=();␊
174	$self->{count}=0; # number of msgids in the PO␊
175	# count_doc: number of strings in the document␊
176	# (duplicate strings counted multiple times)␊
177	$self->{count_doc}=0;␊
178	$self->{header_comment}=␊
179	" SOME DESCRIPTIVE TITLE\n"␊
180	." Copyright (C) YEAR ".␊
181	$self->{options}{'copyright-holder'}."\n"␊
182	." This file is distributed under the same license ".␊
183	"as the ".$self->{options}{'package-name'}." package.\n"␊
184	." FIRST AUTHOR <EMAIL\@ADDRESS>, YEAR.\n"␊
185	."\n"␊
186	.", fuzzy";␊
187	# $self->header_tag="fuzzy";␊
188	$self->{header}=escape_text("Project-Id-Version: ".␊
189	$self->{options}{'package-name'}." ".␊
190	$self->{options}{'package-version'}."\n".␊
191	((defined $self->{options}{'msgid-bugs-address'})?␊
192	"Report-Msgid-Bugs-To: ".$self->{options}{'msgid-bugs-address'}."\n":␊
193	"").␊
194	"POT-Creation-Date: $date\n".␊
195	"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n".␊
196	"Last-Translator: FULL NAME <EMAIL\@ADDRESS>\n".␊
197	"Language-Team: LANGUAGE <LL\@li.org>\n".␊
198	"Language: \n".␊
199	"MIME-Version: 1.0\n".␊
200	"Content-Type: text/plain; charset=CHARSET\n".␊
201	"Content-Transfer-Encoding: 8bit\n");␊
202	␊
203	$self->{encoder}=find_encoding("ascii");␊
204	$self->{footer}=[];␊
205	␊
206	# To make stats about gettext hits␊
207	$self->stats_clear();␊
208	}␊
209	␊
210	=item read($)␊
211	␊
212	Reads a PO file (which name is given as argument). Previously existing␊
213	entries in self are not removed, the new ones are added to the end of the␊
214	catalog.␊
215	␊
216	=cut␊
217	␊
218	sub read {␊
219	my $self=shift;␊
220	my $filename=shift␊
221	or croak wrap_mod("po4a::po",␊
222	dgettext("po4a",␊
223	"Please provide a non-null filename"));␊
224	␊
225	my $lang = basename($filename);␊
226	$lang =~ s/\.po$//;␊
227	$self->{lang} = $lang;␊
228	␊
229	my $fh;␊
230	if ($filename eq '-') {␊
231	$fh=*STDIN;␊
232	} else {␊
233	open $fh,"<$filename"␊
234	or croak wrap_mod("po4a::po",␊
235	dgettext("po4a", "Can't read from %s: %s"),␊
236	$filename, $!);␊
237	}␊
238	␊
239	## Read paragraphs line-by-line␊
240	my $pofile="";␊
241	my $textline;␊
242	while (defined ($textline = <$fh>)) {␊
243	$pofile .= $textline;␊
244	}␊
245	# close INPUT␊
246	# or croak (sprintf(dgettext("po4a",␊
247	# "Can't close %s after reading: %s"),␊
248	# $filename,$!)."\n");␊
249	␊
250	my $linenum=0;␊
251	␊
252	foreach my $msg (split (/\n\n/,$pofile)) {␊
253	my ($msgid,$msgstr,$comment,$previous,$automatic,$reference,$flags,$buffer);␊
254	my ($msgid_plural, $msgstr_plural);␊
255	if ($msg =~ m/^#~/m) {␊
256	push(@{$self->{footer}}, $msg);␊
257	next;␊
258	}␊
259	foreach my $line (split (/\n/,$msg)) {␊
260	$linenum++;␊
261	if ($line =~ /^#\. ?(.*)$/) { # Automatic comment␊
262	$automatic .= (defined($automatic) ? "\n" : "").$1;␊
263	␊
264	} elsif ($line =~ /^#: ?(.*)$/) { # reference␊
265	$reference .= (defined($reference) ? "\n" : "").$1;␊
266	␊
267	} elsif ($line =~ /^#, ?(.*)$/) { # flags␊
268	$flags .= (defined($flags) ? "\n" : "").$1;␊
269	␊
270	} elsif ($line =~ /^#\\| ?(.*)$/) { # previous translation␊
271	$previous .= (defined($previous) ? "\n" : "").($1\|\|"");␊
272	␊
273	} elsif ($line =~ /^#(.*)$/) { # Translator comments␊
274	$comment .= (defined($comment) ? "\n" : "").($1\|\|"");␊
275	␊
276	} elsif ($line =~ /^msgid (".*")$/) { # begin of msgid␊
277	$buffer = $1;␊
278	␊
279	} elsif ($line =~ /^msgid_plural (".*")$/) {␊
280	# begin of msgid_plural, end of msgid␊
281	␊
282	$msgid = $buffer;␊
283	$buffer = $1;␊
284	␊
285	} elsif ($line =~ /^msgstr (".*")$/) {␊
286	# begin of msgstr, end of msgid␊
287	␊
288	$msgid = $buffer;␊
289	$buffer = "$1";␊
290	␊
291	} elsif ($line =~ /^msgstr\[([0-9]+)\] (".*")$/) {␊
292	# begin of msgstr[x], end of msgid_plural or msgstr[x-1]␊
293	␊
294	# Note: po4a cannot uses plural forms␊
295	# (no integer to use the plural form)␊
296	# * drop the msgstr[x] where x >= 2␊
297	# * use msgstr[0] as the translation of msgid␊
298	# * use msgstr[1] as the translation of msgid_plural␊
299	␊
300	if ($1 eq "0") {␊
301	$msgid_plural = $buffer;␊
302	$buffer = "$2";␊
303	} elsif ($1 eq "1") {␊
304	$msgstr = $buffer;␊
305	$buffer = "$2";␊
306	} elsif ($1 eq "2") {␊
307	$msgstr_plural = $buffer;␊
308	warn wrap_ref_mod("$filename:$linenum",␊
309	"po4a::po",␊
310	dgettext("po4a", "Messages with more than 2 plural forms are not supported."));␊
311	}␊
312	} elsif ($line =~ /^(".*")$/) {␊
313	# continuation of a line␊
314	$buffer .= "\n$1";␊
315	␊
316	} else {␊
317	warn wrap_ref_mod("$filename:$linenum",␊
318	"po4a::po",␊
319	dgettext("po4a", "Strange line: -->%s<--"),␊
320	$line);␊
321	}␊
322	}␊
323	$linenum++;␊
324	if (defined $msgid_plural) {␊
325	$msgstr_plural=$buffer;␊
326	␊
327	$msgid = unquote_text($msgid) if (defined($msgid));␊
328	$msgstr = unquote_text($msgstr) if (defined($msgstr));␊
329	␊
330	$self->push_raw ('msgid' => $msgid,␊
331	'msgstr' => $msgstr,␊
332	'reference' => $reference,␊
333	'flags' => $flags,␊
334	'comment' => $comment,␊
335	'previous' => $previous,␊
336	'automatic' => $automatic,␊
337	'plural' => 0);␊
338	␊
339	$msgid_plural = unquote_text($msgid_plural)␊
340	if (defined($msgid_plural));␊
341	$msgstr_plural = unquote_text($msgstr_plural)␊
342	if (defined($msgstr_plural));␊
343	␊
344	$self->push_raw ('msgid' => $msgid_plural,␊
345	'msgstr' => $msgstr_plural,␊
346	'reference' => $reference,␊
347	'flags' => $flags,␊
348	'comment' => $comment,␊
349	'previous' => $previous,␊
350	'automatic' => $automatic,␊
351	'plural' => 1);␊
352	} else {␊
353	$msgstr=$buffer;␊
354	␊
355	$msgid = unquote_text($msgid) if (defined($msgid));␊
356	$msgstr = unquote_text($msgstr) if (defined($msgstr));␊
357	␊
358	$self->push_raw ('msgid' => $msgid,␊
359	'msgstr' => $msgstr,␊
360	'reference' => $reference,␊
361	'flags' => $flags,␊
362	'comment' => $comment,␊
363	'previous' => $previous,␊
364	'automatic' => $automatic);␊
365	}␊
366	}␊
367	}␊
368	␊
369	=item write($)␊
370	␊
371	Writes the current catalog to the given file.␊
372	␊
373	=cut␊
374	␊
375	sub write{␊
376	my $self=shift;␊
377	my $filename=shift␊
378	or croak dgettext("po4a","Can't write to a file without filename")."\n";␊
379	␊
380	my $fh;␊
381	if ($filename eq '-') {␊
382	$fh=\*STDOUT;␊
383	} else {␊
384	# make sure the directory in which we should write the localized␊
385	# file exists␊
386	my $dir = $filename;␊
387	if ($dir =~ m\|/\|) {␊
388	$dir =~ s\|/[^/]*$\|\|;␊
389	␊
390	File::Path::mkpath($dir, 0, 0755) # Croaks on error␊
391	if (length ($dir) && ! -e $dir);␊
392	}␊
393	open $fh,">$filename"␊
394	or croak wrap_mod("po4a::po",␊
395	dgettext("po4a", "Can't write to %s: %s"),␊
396	$filename, $!);␊
397	}␊
398	␊
399	print $fh "".format_comment($self->{header_comment},"")␊
400	if defined($self->{header_comment}) && length($self->{header_comment});␊
401	␊
402	print $fh "msgid \"\"\n";␊
403	print $fh "msgstr ".quote_text($self->{header})."\n\n";␊
404	␊
405	␊
406	my $buf_msgstr_plural; # Used to keep the first msgstr of plural forms␊
407	my $first=1;␊
408	foreach my $msgid ( sort { ($self->{po}{"$a"}{'pos'}) <=>␊
409	($self->{po}{"$b"}{'pos'})␊
410	} keys %{$self->{po}}) {␊
411	my $output="";␊
412	␊
413	if ($first) {␊
414	$first=0;␊
415	} else {␊
416	$output .= "\n";␊
417	}␊
418	␊
419	$output .= format_comment($self->{po}{$msgid}{'comment'},"")␊
420	if defined($self->{po}{$msgid}{'comment'})␊
421	&& length ($self->{po}{$msgid}{'comment'});␊
422	if ( defined($self->{po}{$msgid}{'automatic'})␊
423	&& length ($self->{po}{$msgid}{'automatic'})) {␊
424	foreach my $comment (split(/\\n/,$self->{po}{$msgid}{'automatic'}))␊
425	{␊
426	$output .= format_comment($comment, ". ")␊
427	}␊
428	}␊
429	$output .= format_comment($self->{po}{$msgid}{'type'},". type: ")␊
430	if defined($self->{po}{$msgid}{'type'})␊
431	&& length ($self->{po}{$msgid}{'type'});␊
432	$output .= format_comment($self->{po}{$msgid}{'reference'},": ")␊
433	if defined($self->{po}{$msgid}{'reference'})␊
434	&& length ($self->{po}{$msgid}{'reference'});␊
435	$output .= "#, ". join(", ", sort split(/\s+/,$self->{po}{$msgid}{'flags'}))."\n"␊
436	if defined($self->{po}{$msgid}{'flags'})␊
437	&& length ($self->{po}{$msgid}{'flags'});␊
438	$output .= format_comment($self->{po}{$msgid}{'previous'},"\| ")␊
439	if defined($self->{po}{$msgid}{'previous'})␊
440	&& length ($self->{po}{$msgid}{'previous'});␊
441	␊
442	if (exists $self->{po}{$msgid}{'plural'}) {␊
443	if ($self->{po}{$msgid}{'plural'} == 0) {␊
444	if ($self->get_charset =~ /^utf-8$/i) {␊
445	my $msgstr = Encode::decode_utf8($self->{po}{$msgid}{'msgstr'});␊
446	$msgid = Encode::decode_utf8($msgid);␊
447	$output .= Encode::encode_utf8("msgid ".quote_text($msgid)."\n");␊
448	$buf_msgstr_plural = Encode::encode_utf8("msgstr[0] ".quote_text($msgstr)."\n");␊
449	} else {␊
450	$output = "msgid ".quote_text($msgid)."\n";␊
451	$buf_msgstr_plural = "msgstr[0] ".quote_text($self->{po}{$msgid}{'msgstr'})."\n";␊
452	}␊
453	} elsif ($self->{po}{$msgid}{'plural'} == 1) {␊
454	# TODO: there may be only one plural form␊
455	if ($self->get_charset =~ /^utf-8$/i) {␊
456	my $msgstr = Encode::decode_utf8($self->{po}{$msgid}{'msgstr'});␊
457	$msgid = Encode::decode_utf8($msgid);␊
458	$output = Encode::encode_utf8("msgid_plural ".quote_text($msgid)."\n");␊
459	$output .= $buf_msgstr_plural;␊
460	$output .= Encode::encode_utf8("msgstr[1] ".quote_text($msgstr)."\n");␊
461	$buf_msgstr_plural = "";␊
462	} else {␊
463	$output = "msgid_plural ".quote_text($msgid)."\n";␊
464	$output .= $buf_msgstr_plural;␊
465	$output .= "msgstr[1] ".quote_text($self->{po}{$msgid}{'msgstr'})."\n";␊
466	}␊
467	} else {␊
468	die wrap_msg(dgettext("po4a","Can't write PO files with more than two plural forms."));␊
469	}␊
470	} else {␊
471	if ($self->get_charset =~ /^utf-8$/i) {␊
472	my $msgstr = Encode::decode_utf8($self->{po}{$msgid}{'msgstr'});␊
473	$msgid = Encode::decode_utf8($msgid);␊
474	$output .= Encode::encode_utf8("msgid ".quote_text($msgid)."\n");␊
475	$output .= Encode::encode_utf8("msgstr ".quote_text($msgstr)."\n");␊
476	} else {␊
477	$output .= "msgid ".quote_text($msgid)."\n";␊
478	$output .= "msgstr ".quote_text($self->{po}{$msgid}{'msgstr'})."\n";␊
479	}␊
480	}␊
481	␊
482	print $fh $output;␊
483	}␊
484	print $fh join("\n\n", @{$self->{footer}}) if scalar @{$self->{footer}};␊
485	␊
486	# print STDERR "$fh";␊
487	# if ($filename ne '-') {␊
488	# close $fh␊
489	# or croak (sprintf(dgettext("po4a",␊
490	# "Can't close %s after writing: %s\n"),␊
491	# $filename,$!));␊
492	# }␊
493	}␊
494	␊
495	=item write_if_needed($$)␊
496	␊
497	Like write, but if the PO or POT file already exists, the object will be␊
498	written in a temporary file which will be compared with the existing file␊
499	to check if the update is needed (this avoids to change a POT just to␊
500	update a line reference or the POT-Creation-Date field).␊
501	␊
502	=cut␊
503	␊
504	sub move_po_if_needed {␊
505	my ($new_po, $old_po, $backup) = (shift, shift, shift);␊
506	my $diff;␊
507	␊
508	if (-e $old_po) {␊
509	my $diff_ignore = "-I'^#:' "␊
510	."-I'^\"POT-Creation-Date:' "␊
511	."-I'^\"PO-Revision-Date:'";␊
512	$diff = qx(diff -q $diff_ignore $old_po $new_po);␊
513	if ( $diff eq "" ) {␊
514	unlink $new_po␊
515	or die wrap_msg(dgettext("po4a","Can't unlink %s: %s."),␊
516	$new_po, $!);␊
517	# touch the old PO␊
518	my ($atime, $mtime) = (time,time);␊
519	utime $atime, $mtime, $old_po;␊
520	} else {␊
521	move $new_po, $old_po␊
522	or die wrap_msg(dgettext("po4a","Can't move %s to %s: %s."),␊
523	$new_po, $old_po, $!);␊
524	}␊
525	} else {␊
526	move $new_po, $old_po␊
527	or die wrap_msg(dgettext("po4a","Can't move %s to %s: %s."),␊
528	$new_po, $old_po, $!);␊
529	}␊
530	}␊
531	␊
532	sub write_if_needed {␊
533	my $self=shift;␊
534	my $filename=shift␊
535	or croak dgettext("po4a","Can't write to a file without filename")."\n";␊
536	␊
537	if (-e $filename) {␊
538	my ($tmp_filename);␊
539	(undef,$tmp_filename)=File::Temp->tempfile($filename."XXXX",␊
540	DIR => "/tmp",␊
541	OPEN => 0,␊
542	UNLINK => 0);␊
543	$self->write($tmp_filename);␊
544	move_po_if_needed($tmp_filename, $filename);␊
545	} else {␊
546	$self->write($filename);␊
547	}␊
548	}␊
549	␊
550	=item gettextize($$)␊
551	␊
552	This function produces one translated message catalog from two catalogs, an␊
553	original and a translation. This process is described in L<po4a(7)\|po4a.7>,␊
554	section I<Gettextization: how does it work?>.␊
555	␊
556	=cut␊
557	␊
558	sub gettextize {␊
559	my $this = shift;␊
560	my $class = ref($this) \|\| $this;␊
561	my ($poorig,$potrans)=(shift,shift);␊
562	␊
563	my $pores=Locale::Po4a::Po->new();␊
564	␊
565	my $please_fail = 0;␊
566	my $toobad = dgettext("po4a",␊
567	"\nThe gettextization failed (once again). Don't give up, ".␊
568	"gettextizing is a subtle art, but this is only needed once ".␊
569	"to convert a project to the gorgeous luxus offered by po4a ".␊
570	"to translators.".␊
571	"\nPlease refer to the po4a(7) documentation, the section ".␊
572	"\"HOWTO convert a pre-existing translation to po4a?\" ".␊
573	"contains several hints to help you in your task");␊
574	␊
575	# Don't fail right now when the entry count does not match. Instead, give␊
576	# it a try so that the user can see where we fail (which is probably where␊
577	# the problem is).␊
578	if ($poorig->count_entries_doc() > $potrans->count_entries_doc()) {␊
579	warn wrap_mod("po4a gettextize", dgettext("po4a",␊
580	"Original has more strings than the translation (%d>%d). ".␊
581	"Please fix it by editing the translated version to add ".␊
582	"some dummy entry."),␊
583	$poorig->count_entries_doc(),␊
584	$potrans->count_entries_doc());␊
585	$please_fail = 1;␊
586	} elsif ($poorig->count_entries_doc() < $potrans->count_entries_doc()) {␊
587	warn wrap_mod("po4a gettextize", dgettext("po4a",␊
588	"Original has less strings than the translation (%d<%d). ".␊
589	"Please fix it by removing the extra entry from the ".␊
590	"translated file. You may need an addendum (cf po4a(7)) ".␊
591	"to reput the chunk in place after gettextization. A ".␊
592	"possible cause is that a text duplicated in the original ".␊
593	"is not translated the same way each time. Remove one of ".␊
594	"the translations, and you're fine."),␊
595	$poorig->count_entries_doc(),␊
596	$potrans->count_entries_doc());␊
597	$please_fail = 1;␊
598	}␊
599	␊
600	if ( $poorig->get_charset =~ /^utf-8$/i ) {␊
601	$potrans->to_utf8;␊
602	$pores->set_charset("UTF-8");␊
603	} else {␊
604	if ($potrans->get_charset eq "CHARSET") {␊
605	$pores->set_charset("ascii");␊
606	} else {␊
607	$pores->set_charset($potrans->get_charset);␊
608	}␊
609	}␊
610	print "Po character sets:\n".␊
611	" original=".$poorig->get_charset."\n".␊
612	" translated=".$potrans->get_charset."\n".␊
613	" result=".$pores->get_charset."\n"␊
614	if $debug{'encoding'};␊
615	␊
616	for (my ($o,$t)=(0,0) ;␊
617	$o<$poorig->count_entries_doc() && $t<$potrans->count_entries_doc();␊
618	$o++,$t++) {␊
619	#␊
620	# Extract some informations␊
621	␊
622	my ($orig,$trans)=($poorig->msgid_doc($o),$potrans->msgid_doc($t));␊
623	# print STDERR "Matches [[$orig]]<<$trans>>\n";␊
624	␊
625	my ($reforig,$reftrans)=($poorig->{po}{$orig}{'reference'},␊
626	$potrans->{po}{$trans}{'reference'});␊
627	my ($typeorig,$typetrans)=($poorig->{po}{$orig}{'type'},␊
628	$potrans->{po}{$trans}{'type'});␊
629	␊
630	#␊
631	# Make sure the type of both string exist␊
632	#␊
633	die wrap_mod("po4a gettextize",␊
634	"Internal error: type of original string number %s ".␊
635	"isn't provided", $o)␊
636	if ($typeorig eq '');␊
637	␊
638	die wrap_mod("po4a gettextize",␊
639	"Internal error: type of translated string number %s ".␊
640	"isn't provided", $o)␊
641	if ($typetrans eq '');␊
642	␊
643	#␊
644	# Make sure both type are the same␊
645	#␊
646	if ($typeorig ne $typetrans){␊
647	$pores->write("gettextization.failed.po");␊
648	eval {␊
649	# Recode $trans into current charset, if possible␊
650	require I18N::Langinfo;␊
651	I18N::Langinfo->import(qw(langinfo CODESET));␊
652	my $codeset = langinfo(CODESET());␊
653	Encode::from_to($trans, $potrans->get_charset, $codeset);␊
654	};␊
655	die wrap_msg(dgettext("po4a",␊
656	"po4a gettextization: Structure disparity between ".␊
657	"original and translated files:\n".␊
658	"msgid (at %s) is of type '%s' while\n".␊
659	"msgstr (at %s) is of type '%s'.\n".␊
660	"Original text: %s\n".␊
661	"Translated text: %s\n".␊
662	"(result so far dumped to gettextization.failed.po)").␊
663	"%s",␊
664	$reforig, $typeorig,␊
665	$reftrans, $typetrans,␊
666	$orig,␊
667	$trans,␊
668	$toobad);␊
669	}␊
670	␊
671	#␊
672	# Push the entry␊
673	#␊
674	my $flags;␊
675	if (defined $poorig->{po}{$orig}{'flags'}) {␊
676	$flags = $poorig->{po}{$orig}{'flags'}." fuzzy";␊
677	} else {␊
678	$flags = "fuzzy";␊
679	}␊
680	$pores->push_raw('msgid' => $orig,␊
681	'msgstr' => $trans,␊
682	'flags' => $flags,␊
683	'type' => $typeorig,␊
684	'reference' => $reforig,␊
685	'conflict' => 1,␊
686	'transref' => $potrans->{po}{$trans}{'reference'})␊
687	unless (defined($pores->{po}{$orig})␊
688	and ($pores->{po}{$orig}{'msgstr'} eq $trans))␊
689	# FIXME: maybe we should be smarter about what reference should be␊
690	# sent to push_raw.␊
691	}␊
692	␊
693	# make sure we return a useful error message when entry count differ␊
694	die "$toobad\n" if $please_fail;␊
695	␊
696	return $pores;␊
697	}␊
698	␊
699	=item filter($)␊
700	␊
701	This function extracts a catalog from an existing one. Only the entries having␊
702	a reference in the given file will be placed in the resulting catalog.␊
703	␊
704	This function parses its argument, converts it to a Perl function definition,␊
705	evals this definition and filters the fields for which this function returns␊
706	true.␊
707	␊
708	I love Perl sometimes ;)␊
709	␊
710	=cut␊
711	␊
712	sub filter {␊
713	my $self=shift;␊
714	our $filter=shift;␊
715	␊
716	my $res;␊
717	$res = Locale::Po4a::Po->new();␊
718	␊
719	# Parse the filter␊
720	our $code="sub apply { return ";␊
721	our $pos=0;␊
722	our $length = length $filter;␊
723	␊
724	# explode chars to parts. How to subscript a string in Perl?␊
725	our @filter = split(//,$filter);␊
726	␊
727	sub gloups {␊
728	my $fmt=shift;␊
729	my $space = "";␊
730	for (1..$pos){␊
731	$space .= ' ';␊
732	}␊
733	die wrap_msg("$fmt\n$filter\n$space^ HERE");␊
734	}␊
735	sub showmethecode {␊
736	return unless $debug{'filter'};␊
737	my $fmt=shift;␊
738	my $space="";␊
739	for (1..$pos){␊
740	$space .= ' ';␊
741	}␊
742	print STDERR "$filter\n$space^ $fmt\n";#"$code\n";␊
743	}␊
744	␊
745	# I dream of a lex in perl :-/␊
746	sub parse_expression {␊
747	showmethecode("Begin expression")␊
748	if $debug{'filter'};␊
749	␊
750	gloups("Begin of expression expected, got '%s'",$filter[$pos])␊
751	unless ($filter[$pos] eq '(');␊
752	$pos ++; # pass the '('␊
753	if ($filter[$pos] eq '&') {␊
754	# AND␊
755	$pos++;␊
756	showmethecode("Begin of AND")␊
757	if $debug{'filter'};␊
758	$code .= "(";␊
759	while (1) {␊
760	gloups ("Unfinished AND statement.")␊
761	if ($pos == $length);␊
762	parse_expression();␊
763	if ($filter[$pos] eq '(') {␊
764	$code .= " && ";␊
765	} elsif ($filter[$pos] eq ')') {␊
766	last; # do not eat that char␊
767	} else {␊
768	gloups("End of AND or begin of sub-expression expected, got '%s'", $filter[$pos]);␊
769	}␊
770	}␊
771	$code .= ")";␊
772	} elsif ($filter[$pos] eq '\|') {␊
773	# OR␊
774	$pos++;␊
775	$code .= "(";␊
776	while (1) {␊
777	gloups("Unfinished OR statement.")␊
778	if ($pos == $length);␊
779	parse_expression();␊
780	if ($filter[$pos] eq '(') {␊
781	$code .= " \|\| ";␊
782	} elsif ($filter[$pos] eq ')') {␊
783	last; # do not eat that char␊
784	} else {␊
785	gloups("End of OR or begin of sub-expression expected, got '%s'",$filter[$pos]);␊
786	}␊
787	}␊
788	$code .= ")";␊
789	} elsif ($filter[$pos] eq '!') {␊
790	# NOT␊
791	$pos++;␊
792	$code .= "(!";␊
793	gloups("Missing sub-expression in NOT statement.")␊
794	if ($pos == $length);␊
795	parse_expression();␊
796	$code .= ")";␊
797	} else {␊
798	# must be an equal. Let's get field and argument␊
799	my ($field,$arg,$done);␊
800	$field = substr($filter,$pos);␊
801	gloups("EQ statement contains no '=' or invalid field name")␊
802	unless ($field =~ /([a-z]*)=/i);␊
803	$field = lc($1);␊
804	$pos += (length $field) + 1;␊
805	␊
806	# check that we've got a valid field name,␊
807	# and the number it referes to␊
808	# DO NOT CHANGE THE ORDER␊
809	my @names=qw(msgid msgstr reference flags comment previous automatic);␊
810	my $fieldpos;␊
811	for ($fieldpos = 0;␊
812	$fieldpos < scalar @names && $field ne $names[$fieldpos];␊
813	$fieldpos++) {}␊
814	gloups("Invalid field name: %s",$field)␊
815	if $fieldpos == scalar @names; # not found␊
816	␊
817	# Now, get the argument value. It has to be between quotes,␊
818	# which can be escaped␊
819	# We point right on the first char of the argument␊
820	# (first quote already eaten)␊
821	my $escaped = 0;␊
822	my $quoted = 0;␊
823	if ($filter[$pos] eq '"') {␊
824	$pos++;␊
825	$quoted = 1;␊
826	}␊
827	showmethecode(($quoted?"Quoted":"Unquoted")." argument of field '$field'")␊
828	if $debug{'filter'};␊
829	␊
830	while (!$done) {␊
831	gloups("Unfinished EQ argument.")␊
832	if ($pos == $length);␊
833	␊
834	if ($quoted) {␊
835	if ($filter[$pos] eq '\\') {␊
836	if ($escaped) {␊
837	$arg .= '\\';␊
838	$escaped = 0;␊
839	} else {␊
840	$escaped = 1;␊
841	}␊
842	} elsif ($escaped) {␊
843	if ($filter[$pos] eq '"') {␊
844	$arg .= '"';␊
845	$escaped = 0;␊
846	} else {␊
847	gloups("Invalid escape sequence in argument: '\\%s'",$filter[$pos]);␊
848	}␊
849	} else {␊
850	if ($filter[$pos] eq '"') {␊
851	$done = 1;␊
852	} else {␊
853	$arg .= $filter[$pos];␊
854	}␊
855	}␊
856	} else {␊
857	if ($filter[$pos] eq ')') {␊
858	# counter the next ++ since we don't want to eat␊
859	# this char␊
860	$pos--;␊
861	$done = 1;␊
862	} else {␊
863	$arg .= $filter[$pos];␊
864	}␊
865	}␊
866	$pos++;␊
867	}␊
868	# and now, add the code to check this equality␊
869	$code .= "(\$_[$fieldpos] =~ m{$arg})";␊
870	␊
871	}␊
872	showmethecode("End of expression")␊
873	if $debug{'filter'};␊
874	gloups("Unfinished statement.")␊
875	if ($pos == $length);␊
876	gloups("End of expression expected, got '%s'",$filter[$pos])␊
877	unless ($filter[$pos] eq ')');␊
878	$pos++;␊
879	}␊
880	# And now, launch the beast, finish the function and use eval␊
881	# to construct this function.␊
882	# Ok, the lack of lexer is a fair price for the eval ;)␊
883	parse_expression();␊
884	gloups("Garbage at the end of the expression")␊
885	if ($pos != $length);␊
886	$code .= "; }";␊
887	print STDERR "CODE = $code\n"␊
888	if $debug{'filter'};␊
889	eval $code;␊
890	die wrap_mod("po4a::po", dgettext("po4a", "Eval failure: %s"), $@)␊
891	if $@;␊
892	␊
893	for (my $cpt=(0) ;␊
894	$cpt<$self->count_entries();␊
895	$cpt++) {␊
896	␊
897	my ($msgid,$ref,$msgstr,$flags,$type,$comment,$previous,$automatic);␊
898	␊
899	$msgid = $self->msgid($cpt);␊
900	$ref=$self->{po}{$msgid}{'reference'};␊
901	␊
902	$msgstr= $self->{po}{$msgid}{'msgstr'};␊
903	$flags = $self->{po}{$msgid}{'flags'};␊
904	$type = $self->{po}{$msgid}{'type'};␊
905	$comment = $self->{po}{$msgid}{'comment'};␊
906	$previous = $self->{po}{$msgid}{'previous'};␊
907	$automatic = $self->{po}{$msgid}{'automatic'};␊
908	␊
909	# DO NOT CHANGE THE ORDER␊
910	$res->push_raw('msgid' => $msgid,␊
911	'msgstr' => $msgstr,␊
912	'flags' => $flags,␊
913	'type' => $type,␊
914	'reference' => $ref,␊
915	'comment' => $comment,␊
916	'previous' => $previous,␊
917	'automatic' => $automatic)␊
918	if (apply($msgid,$msgstr,$ref,$flags,$comment,$previous,$automatic));␊
919	}␊
920	# delete the apply subroutine␊
921	# otherwise it will be redefined.␊
922	undef &apply;␊
923	return $res;␊
924	}␊
925	␊
926	=item to_utf8()␊
927	␊
928	Recodes to UTF-8 the PO's msgstrs. Does nothing if the charset is not␊
929	specified in the PO file ("CHARSET" value), or if it's already UTF-8 or␊
930	ASCII.␊
931	␊
932	=cut␊
933	␊
934	sub to_utf8 {␊
935	my $this = shift;␊
936	my $charset = $this->get_charset();␊
937	␊
938	unless ($charset eq "CHARSET" or␊
939	$charset =~ /^ascii$/i or␊
940	$charset =~ /^utf-8$/i) {␊
941	foreach my $msgid ( keys %{$this->{po}} ) {␊
942	Encode::from_to($this->{po}{$msgid}{'msgstr'}, $charset, "utf-8");␊
943	}␊
944	$this->set_charset("UTF-8");␊
945	}␊
946	}␊
947	␊
948	=back␊
949	␊
950	=head1 Functions to use a message catalog for translations␊
951	␊
952	=over 4␊
953	␊
954	=item gettext($%)␊
955	␊
956	Request the translation of the string given as argument in the current catalog.␊
957	The function returns the original (untranslated) string if the string was not␊
958	found.␊
959	␊
960	After the string to translate, you can pass a hash of extra␊
961	arguments. Here are the valid entries:␊
962	␊
963	=over␊
964	␊
965	=item B<wrap>␊
966	␊
967	boolean indicating whether we can consider that whitespaces in string are␊
968	not important. If yes, the function canonizes the string before looking for␊
969	a translation, and wraps the result.␊
970	␊
971	=item B<wrapcol>␊
972	␊
973	the column at which we should wrap (default: 76).␊
974	␊
975	=back␊
976	␊
977	=cut␊
978	␊
979	sub gettext {␊
980	my $self=shift;␊
981	my $text=shift;␊
982	my (%opt)=@_;␊
983	my $res;␊
984	␊
985	return "" unless defined($text) && length($text); # Avoid returning the header.␊
986	my $validoption="reference wrap wrapcol";␊
987	my %validoption;␊
988	␊
989	map { $validoption{$_}=1 } (split(/ /,$validoption));␊
990	foreach (keys %opt) {␊
991	Carp::confess "internal error: unknown arg $_.\n".␊
992	"Here are the valid options: $validoption.\n"␊
993	unless $validoption{$_};␊
994	}␊
995	␊
996	$text=canonize($text)␊
997	if ($opt{'wrap'});␊
998	␊
999	my $esc_text=escape_text($text);␊
1000	␊
1001	$self->{gettextqueries}++;␊
1002	␊
1003	if ( defined $self->{po}{$esc_text}␊
1004	and defined $self->{po}{$esc_text}{'msgstr'}␊
1005	and length $self->{po}{$esc_text}{'msgstr'}␊
1006	and ( not defined $self->{po}{$esc_text}{'flags'}␊
1007	or $self->{po}{$esc_text}{'flags'} !~ /fuzzy/)) {␊
1008	␊
1009	$self->{gettexthits}++;␊
1010	$res = unescape_text($self->{po}{$esc_text}{'msgstr'});␊
1011	if (defined $self->{po}{$esc_text}{'plural'}) {␊
1012	if ($self->{po}{$esc_text}{'plural'} eq "0") {␊
1013	warn wrap_mod("po4a gettextize", dgettext("po4a",␊
1014	"'%s' is the singular form of a message, ".␊
1015	"po4a will use the msgstr[0] translation (%s)."),␊
1016	$esc_text, $res);␊
1017	} else {␊
1018	warn wrap_mod("po4a gettextize", dgettext("po4a",␊
1019	"'%s' is the plural form of a message, ".␊
1020	"po4a will use the msgstr[1] translation (%s)."),␊
1021	$esc_text, $res);␊
1022	}␊
1023	}␊
1024	} else {␊
1025	$res = $text;␊
1026	}␊
1027	␊
1028	if ($opt{'wrap'}) {␊
1029	if ($self->get_charset =~ /^utf-8$/i) {␊
1030	$res=Encode::decode_utf8($res);␊
1031	$res=wrap ($res, $opt{'wrapcol'} \|\| 76);␊
1032	$res=Encode::encode_utf8($res);␊
1033	} else {␊
1034	$res=wrap ($res, $opt{'wrapcol'} \|\| 76);␊
1035	}␊
1036	}␊
1037	# print STDERR "Gettext >>>$text<<<(escaped=$esc_text)=[[[$res]]]\n\n";␊
1038	return $res;␊
1039	}␊
1040	␊
1041	=item stats_get()␊
1042	␊
1043	Returns statistics about the hit ratio of gettext since the last time that␊
1044	stats_clear() was called. Please note that it's not the same␊
1045	statistics than the one printed by msgfmt --statistic. Here, it's statistics␊
1046	about recent usage of the PO file, while msgfmt reports the status of the␊
1047	file. Example of use:␊
1048	␊
1049	[some use of the PO file to translate stuff]␊
1050	␊
1051	($percent,$hit,$queries) = $pofile->stats_get();␊
1052	print "So far, we found translations for $percent\% ($hit of $queries) of strings.\n";␊
1053	␊
1054	=cut␊
1055	␊
1056	sub stats_get() {␊
1057	my $self=shift;␊
1058	my ($h,$q)=($self->{gettexthits},$self->{gettextqueries});␊
1059	my $p = ($q == 0 ? 100 : int($h/$q*10000)/100);␊
1060	␊
1061	# $p =~ s/\.00//;␊
1062	# $p =~ s/(\..)0/$1/;␊
1063	␊
1064	return ( $p,$h,$q );␊
1065	}␊
1066	␊
1067	=item stats_clear()␊
1068	␊
1069	Clears the statistics about gettext hits.␊
1070	␊
1071	=cut␊
1072	␊
1073	sub stats_clear {␊
1074	my $self = shift;␊
1075	$self->{gettextqueries} = 0;␊
1076	$self->{gettexthits} = 0;␊
1077	}␊
1078	␊
1079	=back␊
1080	␊
1081	=head1 Functions to build a message catalog␊
1082	␊
1083	=over 4␊
1084	␊
1085	=item push(%)␊
1086	␊
1087	Push a new entry at the end of the current catalog. The arguments should␊
1088	form a hash table. The valid keys are:␊
1089	␊
1090	=over 4␊
1091	␊
1092	=item B<msgid>␊
1093	␊
1094	the string in original language.␊
1095	␊
1096	=item B<msgstr>␊
1097	␊
1098	the translation.␊
1099	␊
1100	=item B<reference>␊
1101	␊
1102	an indication of where this string was found. Example: file.c:46 (meaning␊
1103	in 'file.c' at line 46). It can be a space-separated list in case of␊
1104	multiple occurrences.␊
1105	␊
1106	=item B<comment>␊
1107	␊
1108	a comment added here manually (by the translators). The format here is free.␊
1109	␊
1110	=item B<automatic>␊
1111	␊
1112	a comment which was automatically added by the string extraction␊
1113	program. See the B<--add-comments> option of the B<xgettext> program for␊
1114	more information.␊
1115	␊
1116	=item B<flags>␊
1117	␊
1118	space-separated list of all defined flags for this entry.␊
1119	␊
1120	Valid flags are: B<c-text>, B<python-text>, B<lisp-text>, B<elisp-text>, B<librep-text>,␊
1121	B<smalltalk-text>, B<java-text>, B<awk-text>, B<object-pascal-text>, B<ycp-text>,␊
1122	B<tcl-text>, B<wrap>, B<no-wrap> and B<fuzzy>.␊
1123	␊
1124	See the gettext documentation for their meaning.␊
1125	␊
1126	=item B<type>␊
1127	␊
1128	this is mostly an internal argument: it is used while gettextizing␊
1129	documents. The idea here is to parse both the original and the translation␊
1130	into a PO object, and merge them, using one's msgid as msgid and the␊
1131	other's msgid as msgstr. To make sure that things get ok, each msgid in PO␊
1132	objects are given a type, based on their structure (like "chapt", "sect1",␊
1133	"p" and so on in DocBook). If the types of strings are not the same, that␊
1134	means that both files do not share the same structure, and the process␊
1135	reports an error.␊
1136	␊
1137	This information is written as automatic comment in the PO file since this␊
1138	gives to translators some context about the strings to translate.␊
1139	␊
1140	=item B<wrap>␊
1141	␊
1142	boolean indicating whether whitespaces can be mangled in cosmetic␊
1143	reformattings. If true, the string is canonized before use.␊
1144	␊
1145	This information is written to the PO file using the B<wrap> or B<no-wrap> flag.␊
1146	␊
1147	=item B<wrapcol>␊
1148	␊
1149	the column at which we should wrap (default: 76).␊
1150	␊
1151	This information is not written to the PO file.␊
1152	␊
1153	=back␊
1154	␊
1155	=cut␊
1156	␊
1157	sub push {␊
1158	my $self=shift;␊
1159	my %entry=@_;␊
1160	␊
1161	my $validoption="wrap wrapcol type msgid msgstr automatic previous flags reference";␊
1162	my %validoption;␊
1163	␊
1164	map { $validoption{$_}=1 } (split(/ /,$validoption));␊
1165	foreach (keys %entry) {␊
1166	Carp::confess "internal error: unknown arg $_.\n".␊
1167	"Here are the valid options: $validoption.\n"␊
1168	unless $validoption{$_};␊
1169	}␊
1170	␊
1171	unless ($entry{'wrap'}) {␊
1172	$entry{'flags'} .= " no-wrap";␊
1173	}␊
1174	if (defined ($entry{'msgid'})) {␊
1175	$entry{'msgid'} = canonize($entry{'msgid'})␊
1176	if ($entry{'wrap'});␊
1177	␊
1178	$entry{'msgid'} = escape_text($entry{'msgid'});␊
1179	}␊
1180	if (defined ($entry{'msgstr'})) {␊
1181	$entry{'msgstr'} = canonize($entry{'msgstr'})␊
1182	if ($entry{'wrap'});␊
1183	␊
1184	$entry{'msgstr'} = escape_text($entry{'msgstr'});␊
1185	}␊
1186	␊
1187	$self->push_raw(%entry);␊
1188	}␊
1189	␊
1190	# The same as push(), but assuming that msgid and msgstr are already escaped␊
1191	sub push_raw {␊
1192	my $self=shift;␊
1193	my %entry=@_;␊
1194	my ($msgid,$msgstr,$reference,$comment,$automatic,$previous,$flags,$type,$transref)=␊
1195	($entry{'msgid'},$entry{'msgstr'},␊
1196	$entry{'reference'},$entry{'comment'},$entry{'automatic'},␊
1197	$entry{'previous'},$entry{'flags'},$entry{'type'},$entry{'transref'});␊
1198	my $keep_conflict = $entry{'conflict'};␊
1199	␊
1200	# print STDERR "Push_raw\n";␊
1201	# print STDERR " msgid=>>>$msgid<<<\n" if $msgid;␊
1202	# print STDERR " msgstr=[[[$msgstr]]]\n" if $msgstr;␊
1203	# Carp::cluck " flags=$flags\n" if $flags;␊
1204	␊
1205	return unless defined($entry{'msgid'});␊
1206	␊
1207	#no msgid => header definition␊
1208	unless (length($entry{'msgid'})) {␊
1209	# if (defined($self->{header}) && $self->{header} =~ /\S/) {␊
1210	# warn dgettext("po4a","Redefinition of the header. ".␊
1211	# "The old one will be discarded\n");␊
1212	# } FIXME: do that iff the header isn't the default one.␊
1213	$self->{header}=$msgstr;␊
1214	$self->{header_comment}=$comment;␊
1215	my $charset = $self->get_charset;␊
1216	if ($charset ne "CHARSET") {␊
1217	$self->{encoder}=find_encoding($charset);␊
1218	} else {␊
1219	$self->{encoder}=find_encoding("ascii");␊
1220	}␊
1221	return;␊
1222	}␊
1223	␊
1224	if ($self->{options}{'porefs'} eq "none") {␊
1225	$reference = "";␊
1226	} elsif ($self->{options}{'porefs'} eq "noline") {␊
1227	$reference =~ s/:[0-9]*/:1/g;␊
1228	}␊
1229	␊
1230	if (defined($self->{po}{$msgid})) {␊
1231	warn wrap_mod("po4a::po",␊
1232	dgettext("po4a","msgid defined twice: %s"),␊
1233	$msgid)␊
1234	if (0); # FIXME: put a verbose stuff␊
1235	if ( defined $msgstr␊
1236	and defined $self->{po}{$msgid}{'msgstr'}␊
1237	and $self->{po}{$msgid}{'msgstr'} ne $msgstr) {␊
1238	my $txt=quote_text($msgid);␊
1239	my ($first,$second)=␊
1240	(format_comment(". ",$self->{po}{$msgid}{'reference'}).␊
1241	quote_text($self->{po}{$msgid}{'msgstr'}),␊
1242	␊
1243	format_comment(". ",$reference).␊
1244	quote_text($msgstr));␊
1245	␊
1246	if ($keep_conflict) {␊
1247	if ($self->{po}{$msgid}{'msgstr'} =~ m/^#-#-#-#-# .* #-#-#-#-#\\n/s) {␊
1248	$msgstr = $self->{po}{$msgid}{'msgstr'}.␊
1249	"\\n#-#-#-#-# $transref #-#-#-#-#\\n".␊
1250	$msgstr;␊
1251	} else {␊
1252	$msgstr = "#-#-#-#-# ".␊
1253	$self->{po}{$msgid}{'transref'}.␊
1254	" #-#-#-#-#\\n".␊
1255	$self->{po}{$msgid}{'msgstr'}."\\n".␊
1256	"#-#-#-#-# $transref #-#-#-#-#\\n".␊
1257	$msgstr;␊
1258	}␊
1259	# Every msgid will have the same list of references.␊
1260	# Only keep the last list.␊
1261	$self->{po}{$msgid}{'reference'} = "";␊
1262	} else {␊
1263	warn wrap_msg(dgettext("po4a",␊
1264	"Translations don't match for:\n".␊
1265	"%s\n".␊
1266	"-->First translation:\n".␊
1267	"%s\n".␊
1268	" Second translation:\n".␊
1269	"%s\n".␊
1270	" Old translation discarded."),␊
1271	$txt,$first,$second);␊
1272	}␊
1273	}␊
1274	}␊
1275	if (defined $transref) {␊
1276	$self->{po}{$msgid}{'transref'} = $transref;␊
1277	}␊
1278	if (defined $reference) {␊
1279	if (defined $self->{po}{$msgid}{'reference'}) {␊
1280	$self->{po}{$msgid}{'reference'} .= " ".$reference;␊
1281	} else {␊
1282	$self->{po}{$msgid}{'reference'} = $reference;␊
1283	}␊
1284	}␊
1285	$self->{po}{$msgid}{'msgstr'} = $msgstr;␊
1286	$self->{po}{$msgid}{'comment'} = $comment;␊
1287	$self->{po}{$msgid}{'automatic'} = $automatic;␊
1288	$self->{po}{$msgid}{'previous'} = $previous;␊
1289	if (defined($self->{po}{$msgid}{'pos_doc'})) {␊
1290	$self->{po}{$msgid}{'pos_doc'} .= " ".$self->{count_doc}++;␊
1291	} else {␊
1292	$self->{po}{$msgid}{'pos_doc'} = $self->{count_doc}++;␊
1293	}␊
1294	unless (defined($self->{po}{$msgid}{'pos'})) {␊
1295	$self->{po}{$msgid}{'pos'} = $self->{count}++;␊
1296	}␊
1297	$self->{po}{$msgid}{'type'} = $type;␊
1298	$self->{po}{$msgid}{'plural'} = $entry{'plural'}␊
1299	if defined $entry{'plural'};␊
1300	␊
1301	if (defined($flags)) {␊
1302	$flags = " $flags ";␊
1303	$flags =~ s/,/ /g;␊
1304	foreach my $flag (@known_flags) {␊
1305	if ($flags =~ /\s$flag\s/) { # if flag to be set␊
1306	unless ( defined($self->{po}{$msgid}{'flags'})␊
1307	&& $self->{po}{$msgid}{'flags'} =~ /\b$flag\b/) {␊
1308	# flag not already set␊
1309	if (defined $self->{po}{$msgid}{'flags'}) {␊
1310	$self->{po}{$msgid}{'flags'} .= " ".$flag;␊
1311	} else {␊
1312	$self->{po}{$msgid}{'flags'} = $flag;␊
1313	}␊
1314	}␊
1315	}␊
1316	}␊
1317	}␊
1318	# print STDERR "stored ((($msgid)))=>(((".$self->{po}{$msgid}{'msgstr'}.")))\n\n";␊
1319	␊
1320	}␊
1321	␊
1322	=back␊
1323	␊
1324	=head1 Miscellaneous functions␊
1325	␊
1326	=over 4␊
1327	␊
1328	=item count_entries()␊
1329	␊
1330	Returns the number of entries in the catalog (without the header).␊
1331	␊
1332	=cut␊
1333	␊
1334	sub count_entries($) {␊
1335	my $self=shift;␊
1336	return $self->{count};␊
1337	}␊
1338	␊
1339	=item count_entries_doc()␊
1340	␊
1341	Returns the number of entries in document. If a string appears multiple times␊
1342	in the document, it will be counted multiple times␊
1343	␊
1344	=cut␊
1345	␊
1346	sub count_entries_doc($) {␊
1347	my $self=shift;␊
1348	return $self->{count_doc};␊
1349	}␊
1350	␊
1351	=item msgid($)␊
1352	␊
1353	Returns the msgid of the given number.␊
1354	␊
1355	=cut␊
1356	␊
1357	sub msgid($$) {␊
1358	my $self=shift;␊
1359	my $num=shift;␊
1360	␊
1361	foreach my $msgid ( keys %{$self->{po}} ) {␊
1362	return $msgid if ($self->{po}{$msgid}{'pos'} eq $num);␊
1363	}␊
1364	return undef;␊
1365	}␊
1366	␊
1367	=item msgid_doc($)␊
1368	␊
1369	Returns the msgid with the given position in the document.␊
1370	␊
1371	=cut␊
1372	␊
1373	sub msgid_doc($$) {␊
1374	my $self=shift;␊
1375	my $num=shift;␊
1376	␊
1377	foreach my $msgid ( keys %{$self->{po}} ) {␊
1378	foreach my $pos (split / /, $self->{po}{$msgid}{'pos_doc'}) {␊
1379	return $msgid if ($pos eq $num);␊
1380	}␊
1381	}␊
1382	return undef;␊
1383	}␊
1384	␊
1385	=item get_charset()␊
1386	␊
1387	Returns the character set specified in the PO header. If it hasn't been␊
1388	set, it will return "CHARSET".␊
1389	␊
1390	=cut␊
1391	␊
1392	sub get_charset() {␊
1393	my $self=shift;␊
1394	␊
1395	$self->{header} =~ /charset=(.*?)[\s\\]/;␊
1396	␊
1397	if (defined $1) {␊
1398	return $1;␊
1399	} else {␊
1400	return "CHARSET";␊
1401	}␊
1402	}␊
1403	␊
1404	=item set_charset($)␊
1405	␊
1406	This sets the character set of the PO header to the value specified in its␊
1407	first argument. If you never call this function (and no file with a specified␊
1408	character set is read), the default value is left to "CHARSET". This value␊
1409	doesn't change the behavior of this module, it's just used to fill that field␊
1410	in the header, and to return it in get_charset().␊
1411	␊
1412	=cut␊
1413	␊
1414	sub set_charset() {␊
1415	my $self=shift;␊
1416	␊
1417	my ($newchar,$oldchar);␊
1418	$newchar = shift;␊
1419	$oldchar = $self->get_charset();␊
1420	␊
1421	$self->{header} =~ s/$oldchar/$newchar/;␊
1422	$self->{encoder}=find_encoding($newchar);␊
1423	}␊
1424	␊
1425	#----[ helper functions ]---------------------------------------------------␊
1426	␊
1427	# transforme the string from its PO file representation to the form which␊
1428	# should be used to print it␊
1429	sub unescape_text {␊
1430	my $text = shift;␊
1431	␊
1432	print STDERR "\nunescape [$text]====" if $debug{'escape'};␊
1433	$text = join("",split(/\n/,$text));␊
1434	$text =~ s/\\"/"/g;␊
1435	# unescape newlines␊
1436	# NOTE on \G:␊
1437	# The following regular expression introduce newlines.␊
1438	# Thus, ^ doesn't match all beginnings of lines.␊
1439	# \G is a zero-width assertion that matches the position␊
1440	# of the previous substitution with s///g. As every␊
1441	# substitution ends by a newline, it always matches a␊
1442	# position just after a newline.␊
1443	$text =~ s/( # $1:␊
1444	(\G\|[^\\]) # beginning of the line or any char␊
1445	# different from '\'␊
1446	(\\\\)* # followed by any even number of '\'␊
1447	)\\n # and followed by an escaped newline␊
1448	/$1\n/sgx; # single string, match globally, allow comments␊
1449	# unescape carriage returns␊
1450	$text =~ s/( # $1:␊
1451	(\G\|[^\\]) # beginning of the line or any char␊
1452	# different from '\'␊
1453	(\\\\)* # followed by any even number of '\'␊
1454	)\\r # and followed by an escaped carriage return␊
1455	/$1\r/sgx; # single string, match globally, allow comments␊
1456	# unescape tabulations␊
1457	$text =~ s/( # $1:␊
1458	(\G\|[^\\])# beginning of the line or any char␊
1459	# different from '\'␊
1460	(\\\\)* # followed by any even number of '\'␊
1461	)\\t # and followed by an escaped tabulation␊
1462	/$1\t/mgx; # multilines string, match globally, allow comments␊
1463	# and unescape the escape character␊
1464	$text =~ s/\\\\/\\/g;␊
1465	print STDERR ">$text<\n" if $debug{'escape'};␊
1466	␊
1467	return $text;␊
1468	}␊
1469	␊
1470	# transform the string to its representation as it should be written in PO␊
1471	# files␊
1472	sub escape_text {␊
1473	my $text = shift;␊
1474	␊
1475	print STDERR "\nescape [$text]====" if $debug{'escape'};␊
1476	$text =~ s/\\/\\\\/g;␊
1477	$text =~ s/"/\\"/g;␊
1478	$text =~ s/\n/\\n/g;␊
1479	$text =~ s/\r/\\r/g;␊
1480	$text =~ s/\t/\\t/g;␊
1481	print STDERR ">$text<\n" if $debug{'escape'};␊
1482	␊
1483	return $text;␊
1484	}␊
1485	␊
1486	# put quotes around the string on each lines (without escaping it)␊
1487	# It does also normalize the text (ie, make sure its representation is wraped␊
1488	# on the 80th char, but without changing the meaning of the string)␊
1489	sub quote_text {␊
1490	my $string = shift;␊
1491	␊
1492	return '""' unless defined($string) && length($string);␊
1493	␊
1494	print STDERR "\nquote [$string]====" if $debug{'quote'};␊
1495	# break lines on newlines, if any␊
1496	# see unescape_text for an explanation on \G␊
1497	$string =~ s/( # $1:␊
1498	(\G\|[^\\]) # beginning of the line or any char␊
1499	# different from '\'␊
1500	(\\\\)* # followed by any even number of '\'␊
1501	\\n) # and followed by an escaped newline␊
1502	/$1\n/sgx; # single string, match globally, allow comments␊
1503	$string = wrap($string);␊
1504	my @string = split(/\n/,$string);␊
1505	$string = join ("\"\n\"",@string);␊
1506	$string = "\"$string\"";␊
1507	if (scalar @string > 1 && $string[0] ne '') {␊
1508	$string = "\"\"\n".$string;␊
1509	}␊
1510	␊
1511	print STDERR ">$string<\n" if $debug{'quote'};␊
1512	return $string;␊
1513	}␊
1514	␊
1515	# undo the work of the quote_text function␊
1516	sub unquote_text {␊
1517	my $string = shift;␊
1518	print STDERR "\nunquote [$string]====" if $debug{'quote'};␊
1519	$string =~ s/^""\\n//s;␊
1520	$string =~ s/^"(.*)"$/$1/s;␊
1521	$string =~ s/"\n"//gm;␊
1522	# Note: an even number of '\' could precede \\n, but I could not build a␊
1523	# document to test this␊
1524	$string =~ s/([^\\])\\n\n/$1!!DUMMYPOPM!!/gm;␊
1525	$string =~ s\|!!DUMMYPOPM!!\|\\n\|gm;␊
1526	print STDERR ">$string<\n" if $debug{'quote'};␊
1527	return $string;␊
1528	}␊
1529	␊
1530	# canonize the string: write it on only one line, changing consecutive␊
1531	# whitespace to only one space.␊
1532	# Warning, it changes the string and should only be called if the string is␊
1533	# plain text␊
1534	sub canonize {␊
1535	my $text=shift;␊
1536	print STDERR "\ncanonize [$text]====" if $debug{'canonize'};␊
1537	$text =~ s/^ *//s;␊
1538	$text =~ s/^[ \t]+/ /gm;␊
1539	# if ($text eq "\n"), it messed up the first string (header)␊
1540	$text =~ s/\n/ /gm if ($text ne "\n");␊
1541	$text =~ s/([.)]) +/$1 /gm;␊
1542	$text =~ s/([^.)]) */$1 /gm;␊
1543	$text =~ s/ *$//s;␊
1544	print STDERR ">$text<\n" if $debug{'canonize'};␊
1545	return $text;␊
1546	}␊
1547	␊
1548	# wraps the string. We don't use Text::Wrap since it mangles whitespace at␊
1549	# the end of splited line␊
1550	sub wrap {␊
1551	my $text=shift;␊
1552	return "0" if ($text eq '0');␊
1553	my $col=shift \|\| 76;␊
1554	my @lines=split(/\n/,"$text");␊
1555	my $res="";␊
1556	my $first=1;␊
1557	while (defined(my $line=shift @lines)) {␊
1558	if ($first && length($line) > $col - 10) {␊
1559	unshift @lines,$line;␊
1560	$first=0;␊
1561	next;␊
1562	}␊
1563	if (length($line) > $col) {␊
1564	my $pos=rindex($line," ",$col);␊
1565	while (substr($line,$pos-1,1) eq '.' && $pos != -1) {␊
1566	$pos=rindex($line," ",$pos-1);␊
1567	}␊
1568	if ($pos == -1) {␊
1569	# There are no spaces in the first $col chars, pick-up the␊
1570	# first space␊
1571	$pos = index($line," ");␊
1572	}␊
1573	if ($pos != -1) {␊
1574	my $end=substr($line,$pos+1);␊
1575	$line=substr($line,0,$pos+1);␊
1576	if ($end =~ s/^( +)//) {␊
1577	$line .= $1;␊
1578	}␊
1579	unshift @lines,$end;␊
1580	}␊
1581	}␊
1582	$first=0;␊
1583	$res.="$line\n";␊
1584	}␊
1585	# Restore the original trailing spaces␊
1586	$res =~ s/\s+$//s;␊
1587	if ($text =~ m/(\s+)$/s) {␊
1588	$res .= $1;␊
1589	}␊
1590	return $res;␊
1591	}␊
1592	␊
1593	# outputs properly a '# ... ' line to be put in the PO file␊
1594	sub format_comment {␊
1595	my $comment=shift;␊
1596	my $char=shift;␊
1597	my $result = "#". $char . $comment;␊
1598	$result =~ s/\n/\n#$char/gs;␊
1599	$result =~ s/^#$char$/#/gm;␊
1600	$result .= "\n";␊
1601	return $result;␊
1602	}␊
1603	␊
1604	␊
1605	1;␊
1606	__END__␊
1607	␊
1608	=back␊
1609	␊
1610	=head1 AUTHORS␊
1611	␊
1612	Denis Barbier <barbier@linuxfr.org>␊
1613	Martin Quinson (mquinson#debian.org)␊
1614	␊
1615	=cut␊
1616

Download this file