branches/zenith432/package/bin/po4a/lib/Locale/Po4a/Po.pm - Chameleon Svn Source Tree - Chameleon open source boot loader project.

Root/branches/zenith432/package/bin/po4a/lib/Locale/Po4a/Po.pm

1	# Locale::Po4a::Po -- manipulation of PO files␊
2	#␊
3	# This program is free software; you may redistribute it and/or modify it␊
4	# under the terms of GPL (see COPYING).␊
5	␊
6	############################################################################␊
7	# Modules and declarations␊
8	############################################################################␊
9	␊
10	=encoding UTF-8␊
11	␊
12	=head1 NAME␊
13	␊
14	Locale::Po4a::Po - PO file manipulation module␊
15	␊
16	=head1 SYNOPSIS␊
17	␊
18	use Locale::Po4a::Po;␊
19	my $pofile=Locale::Po4a::Po->new();␊
20	␊
21	# Read PO file␊
22	$pofile->read('file.po');␊
23	␊
24	# Add an entry␊
25	$pofile->push('msgid' => 'Hello', 'msgstr' => 'bonjour',␊
26	'flags' => "wrap", 'reference'=>'file.c:46');␊
27	␊
28	# Extract a translation␊
29	$pofile->gettext("Hello"); # returns 'bonjour'␊
30	␊
31	# Write back to a file␊
32	$pofile->write('otherfile.po');␊
33	␊
34	=head1 DESCRIPTION␊
35	␊
36	Locale::Po4a::Po is a module that allows you to manipulate message␊
37	catalogs. You can load and write from/to a file (which extension is often␊
38	I<po>), you can build new entries on the fly or request for the translation␊
39	of a string.␊
40	␊
41	For a more complete description of message catalogs in the PO format and␊
42	their use, please refer to the documentation of the gettext program.␊
43	␊
44	This module is part of the po4a project, which objective is to use PO files␊
45	(designed at origin to ease the translation of program messages) to␊
46	translate everything, including documentation (man page, info manual),␊
47	package description, debconf templates, and everything which may benefit␊
48	from this.␊
49	␊
50	=head1 OPTIONS ACCEPTED BY THIS MODULE␊
51	␊
52	=over 4␊
53	␊
54	=item B<porefs> I<type>[,B<wrap>\|B<nowrap>]␊
55	␊
56	Specify the reference format. Argument I<type> can be one of B<none> to not␊
57	produce any reference, B<noline> to not specify the line number (more␊
58	accurately all line numbers are replaced by 1), B<counter> to replace line␊
59	number by an increasing counter, and B<full> to include complete␊
60	references.␊
61	␊
62	Argument can be followed by a comma and either B<wrap> or B<nowrap> keyword.␊
63	References are written by default on a single line. The B<wrap> option wraps␊
64	references on several lines, to mimic B<gettext> tools (B<xgettext> and␊
65	B<msgmerge>). This option will become the default in a future release, because␊
66	it is more sensible. The B<nowrap> option is available so that users who want␊
67	to keep the old behavior can do so.␊
68	␊
69	=item B<--msgid-bugs-address> I<email@address>␊
70	␊
71	Set the report address for msgid bugs. By default, the created POT files␊
72	have no Report-Msgid-Bugs-To fields.␊
73	␊
74	=item B<--copyright-holder> I<string>␊
75	␊
76	Set the copyright holder in the POT header. The default value is␊
77	"Free Software Foundation, Inc."␊
78	␊
79	=item B<--package-name> I<string>␊
80	␊
81	Set the package name for the POT header. The default is "PACKAGE".␊
82	␊
83	=item B<--package-version> I<string>␊
84	␊
85	Set the package version for the POT header. The default is "VERSION".␊
86	␊
87	=back␊
88	␊
89	=cut␊
90	␊
91	use IO::File;␊
92	␊
93	␊
94	require Exporter;␊
95	␊
96	package Locale::Po4a::Po;␊
97	use DynaLoader;␊
98	␊
99	use Locale::Po4a::Common qw(wrap_msg wrap_mod wrap_ref_mod dgettext);␊
100	␊
101	use subs qw(makespace);␊
102	use vars qw(@ISA @EXPORT_OK);␊
103	@ISA = qw(Exporter DynaLoader);␊
104	@EXPORT = qw(%debug);␊
105	@EXPORT_OK = qw(&move_po_if_needed);␊
106	␊
107	use Locale::Po4a::TransTractor;␊
108	# Try to use a C extension if present.␊
109	eval("bootstrap Locale::Po4a::Po $Locale::Po4a::TransTractor::VERSION");␊
110	␊
111	use 5.006;␊
112	use strict;␊
113	use warnings;␊
114	␊
115	use Carp qw(croak);␊
116	use File::Basename;␊
117	use File::Path; # mkdir before write␊
118	use File::Copy; # move␊
119	use POSIX qw(strftime floor);␊
120	use Time::Local;␊
121	␊
122	use Encode;␊
123	␊
124	my @known_flags=qw(wrap no-wrap c-format fuzzy);␊
125	␊
126	our %debug=('canonize' => 0,␊
127	'quote' => 0,␊
128	'escape' => 0,␊
129	'encoding' => 0,␊
130	'filter' => 0);␊
131	␊
132	=head1 Functions about whole message catalogs␊
133	␊
134	=over 4␊
135	␊
136	=item new()␊
137	␊
138	Creates a new message catalog. If an argument is provided, it's the name of␊
139	a PO file we should load.␊
140	␊
141	=cut␊
142	␊
143	sub new {␊
144	my ($this, $options) = (shift, shift);␊
145	my $class = ref($this) \|\| $this;␊
146	my $self = {};␊
147	bless $self, $class;␊
148	$self->initialize($options);␊
149	␊
150	my $filename = shift;␊
151	$self->read($filename) if defined($filename) && length($filename);␊
152	return $self;␊
153	}␊
154	␊
155	# Return the numerical timezone (e.g. +0200)␊
156	# Neither the %z nor the %s formats of strftime are portable:␊
157	# '%s' is not supported on Solaris and '%z' indicates␊
158	# "2006-10-25 19:36E. Europe Standard Time" on MS Windows.␊
159	sub timezone {␊
160	my ($time) = @_;␊
161	my @l = localtime($time);␊
162	␊
163	my $diff = floor(timegm(@l)/60 +0.5) - floor($time/60 +0.5);␊
164	my $sign = ($diff >= 0 ? 1 : -1);␊
165	$diff = abs($diff);␊
166	␊
167	my $h = $sign * floor($diff / 60);␊
168	my $m = $diff%60;␊
169	␊
170	return sprintf "%+03d%02d\n", $h, $m;␊
171	}␊
172	␊
173	sub initialize {␊
174	my ($self, $options) = (shift, shift);␊
175	my $time = time;␊
176	my $date = strftime("%Y-%m-%d %H:%M", localtime($time)) . timezone($time);␊
177	chomp $date;␊
178	# $options = ref($options) \|\| $options;␊
179	␊
180	$self->{options}{'porefs'}= 'full,nowrap';␊
181	$self->{options}{'msgid-bugs-address'}= undef;␊
182	$self->{options}{'copyright-holder'}= "Free Software Foundation, Inc.";␊
183	$self->{options}{'package-name'}= "PACKAGE";␊
184	$self->{options}{'package-version'}= "VERSION";␊
185	foreach my $opt (keys %$options) {␊
186	if ($options->{$opt}) {␊
187	die wrap_mod("po4a::po",␊
188	dgettext ("po4a", "Unknown option: %s"), $opt)␊
189	unless exists $self->{options}{$opt};␊
190	$self->{options}{$opt} = $options->{$opt};␊
191	}␊
192	}␊
193	$self->{options}{'porefs'} =~ /^(full\|counter\|noline\|none)(,(no)?wrap)?$/ \|\|␊
194	die wrap_mod("po4a::po",␊
195	dgettext ("po4a",␊
196	"Invalid value for option 'porefs' ('%s' is ".␊
197	"not one of 'full', 'counter', 'noline' or 'none')"),␊
198	$self->{options}{'porefs'});␊
199	if ($self->{options}{'porefs'} =~ m/^counter/) {␊
200	$self->{counter} = {};␊
201	}␊
202	␊
203	$self->{po}=();␊
204	$self->{count}=0; # number of msgids in the PO␊
205	# count_doc: number of strings in the document␊
206	# (duplicate strings counted multiple times)␊
207	$self->{count_doc}=0;␊
208	$self->{header_comment}=␊
209	" SOME DESCRIPTIVE TITLE\n"␊
210	." Copyright (C) YEAR ".␊
211	$self->{options}{'copyright-holder'}."\n"␊
212	." This file is distributed under the same license ".␊
213	"as the ".$self->{options}{'package-name'}." package.\n"␊
214	." FIRST AUTHOR <EMAIL\@ADDRESS>, YEAR.\n"␊
215	."\n"␊
216	.", fuzzy";␊
217	# $self->header_tag="fuzzy";␊
218	$self->{header}=escape_text("Project-Id-Version: ".␊
219	$self->{options}{'package-name'}." ".␊
220	$self->{options}{'package-version'}."\n".␊
221	((defined $self->{options}{'msgid-bugs-address'})?␊
222	"Report-Msgid-Bugs-To: ".$self->{options}{'msgid-bugs-address'}."\n":␊
223	"").␊
224	"POT-Creation-Date: $date\n".␊
225	"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n".␊
226	"Last-Translator: FULL NAME <EMAIL\@ADDRESS>\n".␊
227	"Language-Team: LANGUAGE <LL\@li.org>\n".␊
228	"Language: \n".␊
229	"MIME-Version: 1.0\n".␊
230	"Content-Type: text/plain; charset=CHARSET\n".␊
231	"Content-Transfer-Encoding: 8bit\n");␊
232	␊
233	$self->{encoder}=find_encoding("ascii");␊
234	$self->{footer}=[];␊
235	␊
236	# To make stats about gettext hits␊
237	$self->stats_clear();␊
238	}␊
239	␊
240	=item read($)␊
241	␊
242	Reads a PO file (which name is given as argument). Previously existing␊
243	entries in self are not removed, the new ones are added to the end of the␊
244	catalog.␊
245	␊
246	=cut␊
247	␊
248	sub read {␊
249	my $self=shift;␊
250	my $filename=shift␊
251	or croak wrap_mod("po4a::po",␊
252	dgettext("po4a",␊
253	"Please provide a non-null filename"));␊
254	␊
255	my $lang = basename($filename);␊
256	$lang =~ s/\.po$//;␊
257	$self->{lang} = $lang;␊
258	␊
259	my $fh;␊
260	if ($filename eq '-') {␊
261	$fh=*STDIN;␊
262	} else {␊
263	open $fh,"<$filename"␊
264	or croak wrap_mod("po4a::po",␊
265	dgettext("po4a", "Can't read from %s: %s"),␊
266	$filename, $!);␊
267	}␊
268	␊
269	## Read paragraphs line-by-line␊
270	my $pofile="";␊
271	my $textline;␊
272	while (defined ($textline = <$fh>)) {␊
273	$pofile .= $textline;␊
274	}␊
275	# close INPUT␊
276	# or croak (sprintf(dgettext("po4a",␊
277	# "Can't close %s after reading: %s"),␊
278	# $filename,$!)."\n");␊
279	␊
280	my $linenum=0;␊
281	␊
282	foreach my $msg (split (/\n\n/,$pofile)) {␊
283	my ($msgid,$msgstr,$comment,$previous,$automatic,$reference,$flags,$buffer);␊
284	my ($msgid_plural, $msgstr_plural);␊
285	if ($msg =~ m/^#~/m) {␊
286	push(@{$self->{footer}}, $msg);␊
287	next;␊
288	}␊
289	foreach my $line (split (/\n/,$msg)) {␊
290	$linenum++;␊
291	if ($line =~ /^#\. ?(.*)$/) { # Automatic comment␊
292	$automatic .= (defined($automatic) ? "\n" : "").$1;␊
293	␊
294	} elsif ($line =~ /^#: ?(.*)$/) { # reference␊
295	$reference .= (defined($reference) ? "\n" : "").$1;␊
296	␊
297	} elsif ($line =~ /^#, ?(.*)$/) { # flags␊
298	$flags .= (defined($flags) ? "\n" : "").$1;␊
299	␊
300	} elsif ($line =~ /^#\\| ?(.*)$/) { # previous translation␊
301	$previous .= (defined($previous) ? "\n" : "").($1\|\|"");␊
302	␊
303	} elsif ($line =~ /^#(.*)$/) { # Translator comments␊
304	$comment .= (defined($comment) ? "\n" : "").($1\|\|"");␊
305	␊
306	} elsif ($line =~ /^msgid (".*")$/) { # begin of msgid␊
307	$buffer = $1;␊
308	␊
309	} elsif ($line =~ /^msgid_plural (".*")$/) {␊
310	# begin of msgid_plural, end of msgid␊
311	␊
312	$msgid = $buffer;␊
313	$buffer = $1;␊
314	␊
315	} elsif ($line =~ /^msgstr (".*")$/) {␊
316	# begin of msgstr, end of msgid␊
317	␊
318	$msgid = $buffer;␊
319	$buffer = "$1";␊
320	␊
321	} elsif ($line =~ /^msgstr\[([0-9]+)\] (".*")$/) {␊
322	# begin of msgstr[x], end of msgid_plural or msgstr[x-1]␊
323	␊
324	# Note: po4a cannot uses plural forms␊
325	# (no integer to use the plural form)␊
326	# * drop the msgstr[x] where x >= 2␊
327	# * use msgstr[0] as the translation of msgid␊
328	# * use msgstr[1] as the translation of msgid_plural␊
329	␊
330	if ($1 eq "0") {␊
331	$msgid_plural = $buffer;␊
332	$buffer = "$2";␊
333	} elsif ($1 eq "1") {␊
334	$msgstr = $buffer;␊
335	$buffer = "$2";␊
336	} elsif ($1 eq "2") {␊
337	$msgstr_plural = $buffer;␊
338	warn wrap_ref_mod("$filename:$linenum",␊
339	"po4a::po",␊
340	dgettext("po4a", "Messages with more than 2 plural forms are not supported."));␊
341	}␊
342	} elsif ($line =~ /^(".*")$/) {␊
343	# continuation of a line␊
344	$buffer .= "\n$1";␊
345	␊
346	} else {␊
347	warn wrap_ref_mod("$filename:$linenum",␊
348	"po4a::po",␊
349	dgettext("po4a", "Strange line: -->%s<--"),␊
350	$line);␊
351	}␊
352	}␊
353	$linenum++;␊
354	if (defined $msgid_plural) {␊
355	$msgstr_plural=$buffer;␊
356	␊
357	$msgid = unquote_text($msgid) if (defined($msgid));␊
358	$msgstr = unquote_text($msgstr) if (defined($msgstr));␊
359	␊
360	$self->push_raw ('msgid' => $msgid,␊
361	'msgstr' => $msgstr,␊
362	'reference' => $reference,␊
363	'flags' => $flags,␊
364	'comment' => $comment,␊
365	'previous' => $previous,␊
366	'automatic' => $automatic,␊
367	'plural' => 0);␊
368	␊
369	$msgid_plural = unquote_text($msgid_plural)␊
370	if (defined($msgid_plural));␊
371	$msgstr_plural = unquote_text($msgstr_plural)␊
372	if (defined($msgstr_plural));␊
373	␊
374	$self->push_raw ('msgid' => $msgid_plural,␊
375	'msgstr' => $msgstr_plural,␊
376	'reference' => $reference,␊
377	'flags' => $flags,␊
378	'comment' => $comment,␊
379	'previous' => $previous,␊
380	'automatic' => $automatic,␊
381	'plural' => 1);␊
382	} else {␊
383	$msgstr=$buffer;␊
384	␊
385	$msgid = unquote_text($msgid) if (defined($msgid));␊
386	$msgstr = unquote_text($msgstr) if (defined($msgstr));␊
387	␊
388	$self->push_raw ('msgid' => $msgid,␊
389	'msgstr' => $msgstr,␊
390	'reference' => $reference,␊
391	'flags' => $flags,␊
392	'comment' => $comment,␊
393	'previous' => $previous,␊
394	'automatic' => $automatic);␊
395	}␊
396	}␊
397	}␊
398	␊
399	=item write($)␊
400	␊
401	Writes the current catalog to the given file.␊
402	␊
403	=cut␊
404	␊
405	sub write{␊
406	my $self=shift;␊
407	my $filename=shift␊
408	or croak dgettext("po4a","Can't write to a file without filename")."\n";␊
409	␊
410	my $fh;␊
411	if ($filename eq '-') {␊
412	$fh=\*STDOUT;␊
413	} else {␊
414	# make sure the directory in which we should write the localized␊
415	# file exists␊
416	my $dir = $filename;␊
417	if ($dir =~ m\|/\|) {␊
418	$dir =~ s\|/[^/]*$\|\|;␊
419	␊
420	File::Path::mkpath($dir, 0, 0755) # Croaks on error␊
421	if (length ($dir) && ! -e $dir);␊
422	}␊
423	open $fh,">$filename"␊
424	or croak wrap_mod("po4a::po",␊
425	dgettext("po4a", "Can't write to %s: %s"),␊
426	$filename, $!);␊
427	}␊
428	␊
429	print $fh "".format_comment($self->{header_comment},"")␊
430	if defined($self->{header_comment}) && length($self->{header_comment});␊
431	␊
432	print $fh "msgid \"\"\n";␊
433	print $fh "msgstr ".quote_text($self->{header})."\n\n";␊
434	␊
435	␊
436	my $buf_msgstr_plural; # Used to keep the first msgstr of plural forms␊
437	my $first=1;␊
438	foreach my $msgid ( sort { ($self->{po}{"$a"}{'pos'}) <=>␊
439	($self->{po}{"$b"}{'pos'})␊
440	} keys %{$self->{po}}) {␊
441	my $output="";␊
442	␊
443	if ($first) {␊
444	$first=0;␊
445	} else {␊
446	$output .= "\n";␊
447	}␊
448	␊
449	$output .= format_comment($self->{po}{$msgid}{'comment'},"")␊
450	if defined($self->{po}{$msgid}{'comment'})␊
451	&& length ($self->{po}{$msgid}{'comment'});␊
452	if ( defined($self->{po}{$msgid}{'automatic'})␊
453	&& length ($self->{po}{$msgid}{'automatic'})) {␊
454	foreach my $comment (split(/\\n/,$self->{po}{$msgid}{'automatic'}))␊
455	{␊
456	$output .= format_comment($comment, ". ")␊
457	}␊
458	}␊
459	$output .= format_comment($self->{po}{$msgid}{'type'},". type: ")␊
460	if defined($self->{po}{$msgid}{'type'})␊
461	&& length ($self->{po}{$msgid}{'type'});␊
462	if ( defined($self->{po}{$msgid}{'reference'})␊
463	&& length ($self->{po}{$msgid}{'reference'})) {␊
464	my $output_ref = $self->{po}{$msgid}{'reference'};␊
465	if ($self->{options}{'porefs'} =~ m/,wrap$/) {␊
466	$output_ref = wrap($output_ref);␊
467	$output_ref =~ s/\s+$//mg;␊
468	}␊
469	$output .= format_comment($output_ref,": ");␊
470	}␊
471	$output .= "#, ". join(", ", sort split(/\s+/,$self->{po}{$msgid}{'flags'}))."\n"␊
472	if defined($self->{po}{$msgid}{'flags'})␊
473	&& length ($self->{po}{$msgid}{'flags'});␊
474	$output .= format_comment($self->{po}{$msgid}{'previous'},"\| ")␊
475	if defined($self->{po}{$msgid}{'previous'})␊
476	&& length ($self->{po}{$msgid}{'previous'});␊
477	␊
478	if (exists $self->{po}{$msgid}{'plural'}) {␊
479	if ($self->{po}{$msgid}{'plural'} == 0) {␊
480	if ($self->get_charset =~ /^utf-8$/i) {␊
481	my $msgstr = Encode::decode_utf8($self->{po}{$msgid}{'msgstr'});␊
482	$msgid = Encode::decode_utf8($msgid);␊
483	$output .= Encode::encode_utf8("msgid ".quote_text($msgid)."\n");␊
484	$buf_msgstr_plural = Encode::encode_utf8("msgstr[0] ".quote_text($msgstr)."\n");␊
485	} else {␊
486	$output = "msgid ".quote_text($msgid)."\n";␊
487	$buf_msgstr_plural = "msgstr[0] ".quote_text($self->{po}{$msgid}{'msgstr'})."\n";␊
488	}␊
489	} elsif ($self->{po}{$msgid}{'plural'} == 1) {␊
490	# TODO: there may be only one plural form␊
491	if ($self->get_charset =~ /^utf-8$/i) {␊
492	my $msgstr = Encode::decode_utf8($self->{po}{$msgid}{'msgstr'});␊
493	$msgid = Encode::decode_utf8($msgid);␊
494	$output = Encode::encode_utf8("msgid_plural ".quote_text($msgid)."\n");␊
495	$output .= $buf_msgstr_plural;␊
496	$output .= Encode::encode_utf8("msgstr[1] ".quote_text($msgstr)."\n");␊
497	$buf_msgstr_plural = "";␊
498	} else {␊
499	$output = "msgid_plural ".quote_text($msgid)."\n";␊
500	$output .= $buf_msgstr_plural;␊
501	$output .= "msgstr[1] ".quote_text($self->{po}{$msgid}{'msgstr'})."\n";␊
502	}␊
503	} else {␊
504	die wrap_msg(dgettext("po4a","Can't write PO files with more than two plural forms."));␊
505	}␊
506	} else {␊
507	if ($self->get_charset =~ /^utf-8$/i) {␊
508	my $msgstr = Encode::decode_utf8($self->{po}{$msgid}{'msgstr'});␊
509	$msgid = Encode::decode_utf8($msgid);␊
510	$output .= Encode::encode_utf8("msgid ".quote_text($msgid)."\n");␊
511	$output .= Encode::encode_utf8("msgstr ".quote_text($msgstr)."\n");␊
512	} else {␊
513	$output .= "msgid ".quote_text($msgid)."\n";␊
514	$output .= "msgstr ".quote_text($self->{po}{$msgid}{'msgstr'})."\n";␊
515	}␊
516	}␊
517	␊
518	print $fh $output;␊
519	}␊
520	print $fh join("\n\n", @{$self->{footer}}) if scalar @{$self->{footer}};␊
521	␊
522	# print STDERR "$fh";␊
523	# if ($filename ne '-') {␊
524	# close $fh␊
525	# or croak (sprintf(dgettext("po4a",␊
526	# "Can't close %s after writing: %s\n"),␊
527	# $filename,$!));␊
528	# }␊
529	}␊
530	␊
531	=item write_if_needed($$)␊
532	␊
533	Like write, but if the PO or POT file already exists, the object will be␊
534	written in a temporary file which will be compared with the existing file␊
535	to check if the update is needed (this avoids to change a POT just to␊
536	update a line reference or the POT-Creation-Date field).␊
537	␊
538	=cut␊
539	␊
540	sub move_po_if_needed {␊
541	my ($new_po, $old_po, $backup) = (shift, shift, shift);␊
542	my $diff;␊
543	␊
544	if (-e $old_po) {␊
545	my $diff_ignore = "-I'^#:' "␊
546	."-I'^\"POT-Creation-Date:' "␊
547	."-I'^\"PO-Revision-Date:'";␊
548	$diff = qx(diff -q $diff_ignore $old_po $new_po);␊
549	if ( $diff eq "" ) {␊
550	unlink $new_po␊
551	or die wrap_msg(dgettext("po4a","Can't unlink %s: %s."),␊
552	$new_po, $!);␊
553	# touch the old PO␊
554	my ($atime, $mtime) = (time,time);␊
555	utime $atime, $mtime, $old_po;␊
556	} else {␊
557	move $new_po, $old_po␊
558	or die wrap_msg(dgettext("po4a","Can't move %s to %s: %s."),␊
559	$new_po, $old_po, $!);␊
560	}␊
561	} else {␊
562	move $new_po, $old_po␊
563	or die wrap_msg(dgettext("po4a","Can't move %s to %s: %s."),␊
564	$new_po, $old_po, $!);␊
565	}␊
566	}␊
567	␊
568	sub write_if_needed {␊
569	my $self=shift;␊
570	my $filename=shift␊
571	or croak dgettext("po4a","Can't write to a file without filename")."\n";␊
572	␊
573	if (-e $filename) {␊
574	my ($tmp_filename);␊
575	my $basename = basename($filename);␊
576	(undef,$tmp_filename)=File::Temp::tempfile($basename."XXXX",␊
577	DIR => $ENV{TMPDIR} \|\| "/tmp",␊
578	OPEN => 0,␊
579	UNLINK => 0);␊
580	$self->write($tmp_filename);␊
581	move_po_if_needed($tmp_filename, $filename);␊
582	} else {␊
583	$self->write($filename);␊
584	}␊
585	}␊
586	␊
587	=item gettextize($$)␊
588	␊
589	This function produces one translated message catalog from two catalogs, an␊
590	original and a translation. This process is described in L<po4a(7)\|po4a.7>,␊
591	section I<Gettextization: how does it work?>.␊
592	␊
593	=cut␊
594	␊
595	sub gettextize {␊
596	my $this = shift;␊
597	my $class = ref($this) \|\| $this;␊
598	my ($poorig,$potrans)=(shift,shift);␊
599	␊
600	my $pores=Locale::Po4a::Po->new();␊
601	␊
602	my $please_fail = 0;␊
603	my $toobad = dgettext("po4a",␊
604	"\nThe gettextization failed (once again). Don't give up, ".␊
605	"gettextizing is a subtle art, but this is only needed once ".␊
606	"to convert a project to the gorgeous luxus offered by po4a ".␊
607	"to translators.".␊
608	"\nPlease refer to the po4a(7) documentation, the section ".␊
609	"\"HOWTO convert a pre-existing translation to po4a?\" ".␊
610	"contains several hints to help you in your task");␊
611	␊
612	# Don't fail right now when the entry count does not match. Instead, give␊
613	# it a try so that the user can see where we fail (which is probably where␊
614	# the problem is).␊
615	if ($poorig->count_entries_doc() > $potrans->count_entries_doc()) {␊
616	warn wrap_mod("po4a gettextize", dgettext("po4a",␊
617	"Original has more strings than the translation (%d>%d). ".␊
618	"Please fix it by editing the translated version to add ".␊
619	"some dummy entry."),␊
620	$poorig->count_entries_doc(),␊
621	$potrans->count_entries_doc());␊
622	$please_fail = 1;␊
623	} elsif ($poorig->count_entries_doc() < $potrans->count_entries_doc()) {␊
624	warn wrap_mod("po4a gettextize", dgettext("po4a",␊
625	"Original has less strings than the translation (%d<%d). ".␊
626	"Please fix it by removing the extra entry from the ".␊
627	"translated file. You may need an addendum (cf po4a(7)) ".␊
628	"to reput the chunk in place after gettextization. A ".␊
629	"possible cause is that a text duplicated in the original ".␊
630	"is not translated the same way each time. Remove one of ".␊
631	"the translations, and you're fine."),␊
632	$poorig->count_entries_doc(),␊
633	$potrans->count_entries_doc());␊
634	$please_fail = 1;␊
635	}␊
636	␊
637	if ( $poorig->get_charset =~ /^utf-8$/i ) {␊
638	$potrans->to_utf8;␊
639	$pores->set_charset("UTF-8");␊
640	} else {␊
641	if ($potrans->get_charset eq "CHARSET") {␊
642	$pores->set_charset("ascii");␊
643	} else {␊
644	$pores->set_charset($potrans->get_charset);␊
645	}␊
646	}␊
647	print "Po character sets:\n".␊
648	" original=".$poorig->get_charset."\n".␊
649	" translated=".$potrans->get_charset."\n".␊
650	" result=".$pores->get_charset."\n"␊
651	if $debug{'encoding'};␊
652	␊
653	for (my ($o,$t)=(0,0) ;␊
654	$o<$poorig->count_entries_doc() && $t<$potrans->count_entries_doc();␊
655	$o++,$t++) {␊
656	#␊
657	# Extract some informations␊
658	␊
659	my ($orig,$trans)=($poorig->msgid_doc($o),$potrans->msgid_doc($t));␊
660	# print STDERR "Matches [[$orig]]<<$trans>>\n";␊
661	␊
662	my ($reforig,$reftrans)=($poorig->{po}{$orig}{'reference'},␊
663	$potrans->{po}{$trans}{'reference'});␊
664	my ($typeorig,$typetrans)=($poorig->{po}{$orig}{'type'},␊
665	$potrans->{po}{$trans}{'type'});␊
666	␊
667	#␊
668	# Make sure the type of both string exist␊
669	#␊
670	die wrap_mod("po4a gettextize",␊
671	"Internal error: type of original string number %s ".␊
672	"isn't provided", $o)␊
673	if ($typeorig eq '');␊
674	␊
675	die wrap_mod("po4a gettextize",␊
676	"Internal error: type of translated string number %s ".␊
677	"isn't provided", $o)␊
678	if ($typetrans eq '');␊
679	␊
680	#␊
681	# Make sure both type are the same␊
682	#␊
683	if ($typeorig ne $typetrans){␊
684	$pores->write("gettextization.failed.po");␊
685	eval {␊
686	# Recode $trans into current charset, if possible␊
687	require I18N::Langinfo;␊
688	I18N::Langinfo->import(qw(langinfo CODESET));␊
689	my $codeset = langinfo(CODESET());␊
690	Encode::from_to($trans, $potrans->get_charset, $codeset);␊
691	};␊
692	die wrap_msg(dgettext("po4a",␊
693	"po4a gettextization: Structure disparity between ".␊
694	"original and translated files:\n".␊
695	"msgid (at %s) is of type '%s' while\n".␊
696	"msgstr (at %s) is of type '%s'.\n".␊
697	"Original text: %s\n".␊
698	"Translated text: %s\n".␊
699	"(result so far dumped to gettextization.failed.po)").␊
700	"%s",␊
701	$reforig, $typeorig,␊
702	$reftrans, $typetrans,␊
703	$orig,␊
704	$trans,␊
705	$toobad);␊
706	}␊
707	␊
708	#␊
709	# Push the entry␊
710	#␊
711	my $flags;␊
712	if (defined $poorig->{po}{$orig}{'flags'}) {␊
713	$flags = $poorig->{po}{$orig}{'flags'}." fuzzy";␊
714	} else {␊
715	$flags = "fuzzy";␊
716	}␊
717	$pores->push_raw('msgid' => $orig,␊
718	'msgstr' => $trans,␊
719	'flags' => $flags,␊
720	'type' => $typeorig,␊
721	'reference' => $reforig,␊
722	'conflict' => 1,␊
723	'transref' => $potrans->{po}{$trans}{'reference'})␊
724	unless (defined($pores->{po}{$orig})␊
725	and ($pores->{po}{$orig}{'msgstr'} eq $trans))␊
726	# FIXME: maybe we should be smarter about what reference should be␊
727	# sent to push_raw.␊
728	}␊
729	␊
730	# make sure we return a useful error message when entry count differ␊
731	die "$toobad\n" if $please_fail;␊
732	␊
733	return $pores;␊
734	}␊
735	␊
736	=item filter($)␊
737	␊
738	This function extracts a catalog from an existing one. Only the entries having␊
739	a reference in the given file will be placed in the resulting catalog.␊
740	␊
741	This function parses its argument, converts it to a Perl function definition,␊
742	evals this definition and filters the fields for which this function returns␊
743	true.␊
744	␊
745	I love Perl sometimes ;)␊
746	␊
747	=cut␊
748	␊
749	sub filter {␊
750	my $self=shift;␊
751	our $filter=shift;␊
752	␊
753	my $res;␊
754	$res = Locale::Po4a::Po->new();␊
755	␊
756	# Parse the filter␊
757	our $code="sub apply { return ";␊
758	our $pos=0;␊
759	our $length = length $filter;␊
760	␊
761	# explode chars to parts. How to subscript a string in Perl?␊
762	our @filter = split(//,$filter);␊
763	␊
764	sub gloups {␊
765	my $fmt=shift;␊
766	my $space = "";␊
767	for (1..$pos){␊
768	$space .= ' ';␊
769	}␊
770	die wrap_msg("$fmt\n$filter\n$space^ HERE");␊
771	}␊
772	sub showmethecode {␊
773	return unless $debug{'filter'};␊
774	my $fmt=shift;␊
775	my $space="";␊
776	for (1..$pos){␊
777	$space .= ' ';␊
778	}␊
779	print STDERR "$filter\n$space^ $fmt\n";#"$code\n";␊
780	}␊
781	␊
782	# I dream of a lex in perl :-/␊
783	sub parse_expression {␊
784	showmethecode("Begin expression")␊
785	if $debug{'filter'};␊
786	␊
787	gloups("Begin of expression expected, got '%s'",$filter[$pos])␊
788	unless ($filter[$pos] eq '(');␊
789	$pos ++; # pass the '('␊
790	if ($filter[$pos] eq '&') {␊
791	# AND␊
792	$pos++;␊
793	showmethecode("Begin of AND")␊
794	if $debug{'filter'};␊
795	$code .= "(";␊
796	while (1) {␊
797	gloups ("Unfinished AND statement.")␊
798	if ($pos == $length);␊
799	parse_expression();␊
800	if ($filter[$pos] eq '(') {␊
801	$code .= " && ";␊
802	} elsif ($filter[$pos] eq ')') {␊
803	last; # do not eat that char␊
804	} else {␊
805	gloups("End of AND or begin of sub-expression expected, got '%s'", $filter[$pos]);␊
806	}␊
807	}␊
808	$code .= ")";␊
809	} elsif ($filter[$pos] eq '\|') {␊
810	# OR␊
811	$pos++;␊
812	$code .= "(";␊
813	while (1) {␊
814	gloups("Unfinished OR statement.")␊
815	if ($pos == $length);␊
816	parse_expression();␊
817	if ($filter[$pos] eq '(') {␊
818	$code .= " \|\| ";␊
819	} elsif ($filter[$pos] eq ')') {␊
820	last; # do not eat that char␊
821	} else {␊
822	gloups("End of OR or begin of sub-expression expected, got '%s'",$filter[$pos]);␊
823	}␊
824	}␊
825	$code .= ")";␊
826	} elsif ($filter[$pos] eq '!') {␊
827	# NOT␊
828	$pos++;␊
829	$code .= "(!";␊
830	gloups("Missing sub-expression in NOT statement.")␊
831	if ($pos == $length);␊
832	parse_expression();␊
833	$code .= ")";␊
834	} else {␊
835	# must be an equal. Let's get field and argument␊
836	my ($field,$arg,$done);␊
837	$field = substr($filter,$pos);␊
838	gloups("EQ statement contains no '=' or invalid field name")␊
839	unless ($field =~ /([a-z]*)=/i);␊
840	$field = lc($1);␊
841	$pos += (length $field) + 1;␊
842	␊
843	# check that we've got a valid field name,␊
844	# and the number it referes to␊
845	# DO NOT CHANGE THE ORDER␊
846	my @names=qw(msgid msgstr reference flags comment previous automatic);␊
847	my $fieldpos;␊
848	for ($fieldpos = 0;␊
849	$fieldpos < scalar @names && $field ne $names[$fieldpos];␊
850	$fieldpos++) {}␊
851	gloups("Invalid field name: %s",$field)␊
852	if $fieldpos == scalar @names; # not found␊
853	␊
854	# Now, get the argument value. It has to be between quotes,␊
855	# which can be escaped␊
856	# We point right on the first char of the argument␊
857	# (first quote already eaten)␊
858	my $escaped = 0;␊
859	my $quoted = 0;␊
860	if ($filter[$pos] eq '"') {␊
861	$pos++;␊
862	$quoted = 1;␊
863	}␊
864	showmethecode(($quoted?"Quoted":"Unquoted")." argument of field '$field'")␊
865	if $debug{'filter'};␊
866	␊
867	while (!$done) {␊
868	gloups("Unfinished EQ argument.")␊
869	if ($pos == $length);␊
870	␊
871	if ($quoted) {␊
872	if ($filter[$pos] eq '\\') {␊
873	if ($escaped) {␊
874	$arg .= '\\';␊
875	$escaped = 0;␊
876	} else {␊
877	$escaped = 1;␊
878	}␊
879	} elsif ($escaped) {␊
880	if ($filter[$pos] eq '"') {␊
881	$arg .= '"';␊
882	$escaped = 0;␊
883	} else {␊
884	gloups("Invalid escape sequence in argument: '\\%s'",$filter[$pos]);␊
885	}␊
886	} else {␊
887	if ($filter[$pos] eq '"') {␊
888	$done = 1;␊
889	} else {␊
890	$arg .= $filter[$pos];␊
891	}␊
892	}␊
893	} else {␊
894	if ($filter[$pos] eq ')') {␊
895	# counter the next ++ since we don't want to eat␊
896	# this char␊
897	$pos--;␊
898	$done = 1;␊
899	} else {␊
900	$arg .= $filter[$pos];␊
901	}␊
902	}␊
903	$pos++;␊
904	}␊
905	# and now, add the code to check this equality␊
906	$code .= "(\$_[$fieldpos] =~ m{$arg})";␊
907	␊
908	}␊
909	showmethecode("End of expression")␊
910	if $debug{'filter'};␊
911	gloups("Unfinished statement.")␊
912	if ($pos == $length);␊
913	gloups("End of expression expected, got '%s'",$filter[$pos])␊
914	unless ($filter[$pos] eq ')');␊
915	$pos++;␊
916	}␊
917	# And now, launch the beast, finish the function and use eval␊
918	# to construct this function.␊
919	# Ok, the lack of lexer is a fair price for the eval ;)␊
920	parse_expression();␊
921	gloups("Garbage at the end of the expression")␊
922	if ($pos != $length);␊
923	$code .= "; }";␊
924	print STDERR "CODE = $code\n"␊
925	if $debug{'filter'};␊
926	eval $code;␊
927	die wrap_mod("po4a::po", dgettext("po4a", "Eval failure: %s"), $@)␊
928	if $@;␊
929	␊
930	for (my $cpt=(0) ;␊
931	$cpt<$self->count_entries();␊
932	$cpt++) {␊
933	␊
934	my ($msgid,$ref,$msgstr,$flags,$type,$comment,$previous,$automatic);␊
935	␊
936	$msgid = $self->msgid($cpt);␊
937	$ref=$self->{po}{$msgid}{'reference'};␊
938	␊
939	$msgstr= $self->{po}{$msgid}{'msgstr'};␊
940	$flags = $self->{po}{$msgid}{'flags'};␊
941	$type = $self->{po}{$msgid}{'type'};␊
942	$comment = $self->{po}{$msgid}{'comment'};␊
943	$previous = $self->{po}{$msgid}{'previous'};␊
944	$automatic = $self->{po}{$msgid}{'automatic'};␊
945	␊
946	# DO NOT CHANGE THE ORDER␊
947	$res->push_raw('msgid' => $msgid,␊
948	'msgstr' => $msgstr,␊
949	'flags' => $flags,␊
950	'type' => $type,␊
951	'reference' => $ref,␊
952	'comment' => $comment,␊
953	'previous' => $previous,␊
954	'automatic' => $automatic)␊
955	if (apply($msgid,$msgstr,$ref,$flags,$comment,$previous,$automatic));␊
956	}␊
957	# delete the apply subroutine␊
958	# otherwise it will be redefined.␊
959	undef &apply;␊
960	return $res;␊
961	}␊
962	␊
963	=item to_utf8()␊
964	␊
965	Recodes to UTF-8 the PO's msgstrs. Does nothing if the charset is not␊
966	specified in the PO file ("CHARSET" value), or if it's already UTF-8 or␊
967	ASCII.␊
968	␊
969	=cut␊
970	␊
971	sub to_utf8 {␊
972	my $this = shift;␊
973	my $charset = $this->get_charset();␊
974	␊
975	unless ($charset eq "CHARSET" or␊
976	$charset =~ /^ascii$/i or␊
977	$charset =~ /^utf-8$/i) {␊
978	foreach my $msgid ( keys %{$this->{po}} ) {␊
979	Encode::from_to($this->{po}{$msgid}{'msgstr'}, $charset, "utf-8");␊
980	}␊
981	$this->set_charset("UTF-8");␊
982	}␊
983	}␊
984	␊
985	=back␊
986	␊
987	=head1 Functions to use a message catalog for translations␊
988	␊
989	=over 4␊
990	␊
991	=item gettext($%)␊
992	␊
993	Request the translation of the string given as argument in the current catalog.␊
994	The function returns the original (untranslated) string if the string was not␊
995	found.␊
996	␊
997	After the string to translate, you can pass a hash of extra␊
998	arguments. Here are the valid entries:␊
999	␊
1000	=over␊
1001	␊
1002	=item B<wrap>␊
1003	␊
1004	boolean indicating whether we can consider that whitespaces in string are␊
1005	not important. If yes, the function canonizes the string before looking for␊
1006	a translation, and wraps the result.␊
1007	␊
1008	=item B<wrapcol>␊
1009	␊
1010	the column at which we should wrap (default: 76).␊
1011	␊
1012	=back␊
1013	␊
1014	=cut␊
1015	␊
1016	sub gettext {␊
1017	my $self=shift;␊
1018	my $text=shift;␊
1019	my (%opt)=@_;␊
1020	my $res;␊
1021	␊
1022	return "" unless defined($text) && length($text); # Avoid returning the header.␊
1023	my $validoption="reference wrap wrapcol";␊
1024	my %validoption;␊
1025	␊
1026	map { $validoption{$_}=1 } (split(/ /,$validoption));␊
1027	foreach (keys %opt) {␊
1028	Carp::confess "internal error: unknown arg $_.\n".␊
1029	"Here are the valid options: $validoption.\n"␊
1030	unless $validoption{$_};␊
1031	}␊
1032	␊
1033	$text=canonize($text)␊
1034	if ($opt{'wrap'});␊
1035	␊
1036	my $esc_text=escape_text($text);␊
1037	␊
1038	$self->{gettextqueries}++;␊
1039	␊
1040	if ( defined $self->{po}{$esc_text}␊
1041	and defined $self->{po}{$esc_text}{'msgstr'}␊
1042	and length $self->{po}{$esc_text}{'msgstr'}␊
1043	and ( not defined $self->{po}{$esc_text}{'flags'}␊
1044	or $self->{po}{$esc_text}{'flags'} !~ /fuzzy/)) {␊
1045	␊
1046	$self->{gettexthits}++;␊
1047	$res = unescape_text($self->{po}{$esc_text}{'msgstr'});␊
1048	if (defined $self->{po}{$esc_text}{'plural'}) {␊
1049	if ($self->{po}{$esc_text}{'plural'} eq "0") {␊
1050	warn wrap_mod("po4a gettextize", dgettext("po4a",␊
1051	"'%s' is the singular form of a message, ".␊
1052	"po4a will use the msgstr[0] translation (%s)."),␊
1053	$esc_text, $res);␊
1054	} else {␊
1055	warn wrap_mod("po4a gettextize", dgettext("po4a",␊
1056	"'%s' is the plural form of a message, ".␊
1057	"po4a will use the msgstr[1] translation (%s)."),␊
1058	$esc_text, $res);␊
1059	}␊
1060	}␊
1061	} else {␊
1062	$res = $text;␊
1063	}␊
1064	␊
1065	if ($opt{'wrap'}) {␊
1066	if ($self->get_charset =~ /^utf-8$/i) {␊
1067	$res=Encode::decode_utf8($res);␊
1068	$res=wrap ($res, $opt{'wrapcol'} \|\| 76);␊
1069	$res=Encode::encode_utf8($res);␊
1070	} else {␊
1071	$res=wrap ($res, $opt{'wrapcol'} \|\| 76);␊
1072	}␊
1073	}␊
1074	# print STDERR "Gettext >>>$text<<<(escaped=$esc_text)=[[[$res]]]\n\n";␊
1075	return $res;␊
1076	}␊
1077	␊
1078	=item stats_get()␊
1079	␊
1080	Returns statistics about the hit ratio of gettext since the last time that␊
1081	stats_clear() was called. Please note that it's not the same␊
1082	statistics than the one printed by msgfmt --statistic. Here, it's statistics␊
1083	about recent usage of the PO file, while msgfmt reports the status of the␊
1084	file. Example of use:␊
1085	␊
1086	[some use of the PO file to translate stuff]␊
1087	␊
1088	($percent,$hit,$queries) = $pofile->stats_get();␊
1089	print "So far, we found translations for $percent\% ($hit of $queries) of strings.\n";␊
1090	␊
1091	=cut␊
1092	␊
1093	sub stats_get() {␊
1094	my $self=shift;␊
1095	my ($h,$q)=($self->{gettexthits},$self->{gettextqueries});␊
1096	my $p = ($q == 0 ? 100 : int($h/$q*10000)/100);␊
1097	␊
1098	# $p =~ s/\.00//;␊
1099	# $p =~ s/(\..)0/$1/;␊
1100	␊
1101	return ( $p,$h,$q );␊
1102	}␊
1103	␊
1104	=item stats_clear()␊
1105	␊
1106	Clears the statistics about gettext hits.␊
1107	␊
1108	=cut␊
1109	␊
1110	sub stats_clear {␊
1111	my $self = shift;␊
1112	$self->{gettextqueries} = 0;␊
1113	$self->{gettexthits} = 0;␊
1114	}␊
1115	␊
1116	=back␊
1117	␊
1118	=head1 Functions to build a message catalog␊
1119	␊
1120	=over 4␊
1121	␊
1122	=item push(%)␊
1123	␊
1124	Push a new entry at the end of the current catalog. The arguments should␊
1125	form a hash table. The valid keys are:␊
1126	␊
1127	=over 4␊
1128	␊
1129	=item B<msgid>␊
1130	␊
1131	the string in original language.␊
1132	␊
1133	=item B<msgstr>␊
1134	␊
1135	the translation.␊
1136	␊
1137	=item B<reference>␊
1138	␊
1139	an indication of where this string was found. Example: file.c:46 (meaning␊
1140	in 'file.c' at line 46). It can be a space-separated list in case of␊
1141	multiple occurrences.␊
1142	␊
1143	=item B<comment>␊
1144	␊
1145	a comment added here manually (by the translators). The format here is free.␊
1146	␊
1147	=item B<automatic>␊
1148	␊
1149	a comment which was automatically added by the string extraction␊
1150	program. See the B<--add-comments> option of the B<xgettext> program for␊
1151	more information.␊
1152	␊
1153	=item B<flags>␊
1154	␊
1155	space-separated list of all defined flags for this entry.␊
1156	␊
1157	Valid flags are: B<c-text>, B<python-text>, B<lisp-text>, B<elisp-text>, B<librep-text>,␊
1158	B<smalltalk-text>, B<java-text>, B<awk-text>, B<object-pascal-text>, B<ycp-text>,␊
1159	B<tcl-text>, B<wrap>, B<no-wrap> and B<fuzzy>.␊
1160	␊
1161	See the gettext documentation for their meaning.␊
1162	␊
1163	=item B<type>␊
1164	␊
1165	this is mostly an internal argument: it is used while gettextizing␊
1166	documents. The idea here is to parse both the original and the translation␊
1167	into a PO object, and merge them, using one's msgid as msgid and the␊
1168	other's msgid as msgstr. To make sure that things get ok, each msgid in PO␊
1169	objects are given a type, based on their structure (like "chapt", "sect1",␊
1170	"p" and so on in DocBook). If the types of strings are not the same, that␊
1171	means that both files do not share the same structure, and the process␊
1172	reports an error.␊
1173	␊
1174	This information is written as automatic comment in the PO file since this␊
1175	gives to translators some context about the strings to translate.␊
1176	␊
1177	=item B<wrap>␊
1178	␊
1179	boolean indicating whether whitespaces can be mangled in cosmetic␊
1180	reformattings. If true, the string is canonized before use.␊
1181	␊
1182	This information is written to the PO file using the B<wrap> or B<no-wrap> flag.␊
1183	␊
1184	=item B<wrapcol>␊
1185	␊
1186	the column at which we should wrap (default: 76).␊
1187	␊
1188	This information is not written to the PO file.␊
1189	␊
1190	=back␊
1191	␊
1192	=cut␊
1193	␊
1194	sub push {␊
1195	my $self=shift;␊
1196	my %entry=@_;␊
1197	␊
1198	my $validoption="wrap wrapcol type msgid msgstr automatic previous flags reference";␊
1199	my %validoption;␊
1200	␊
1201	map { $validoption{$_}=1 } (split(/ /,$validoption));␊
1202	foreach (keys %entry) {␊
1203	Carp::confess "internal error: unknown arg $_.\n".␊
1204	"Here are the valid options: $validoption.\n"␊
1205	unless $validoption{$_};␊
1206	}␊
1207	␊
1208	unless ($entry{'wrap'}) {␊
1209	$entry{'flags'} .= " no-wrap";␊
1210	}␊
1211	if (defined ($entry{'msgid'})) {␊
1212	$entry{'msgid'} = canonize($entry{'msgid'})␊
1213	if ($entry{'wrap'});␊
1214	␊
1215	$entry{'msgid'} = escape_text($entry{'msgid'});␊
1216	}␊
1217	if (defined ($entry{'msgstr'})) {␊
1218	$entry{'msgstr'} = canonize($entry{'msgstr'})␊
1219	if ($entry{'wrap'});␊
1220	␊
1221	$entry{'msgstr'} = escape_text($entry{'msgstr'});␊
1222	}␊
1223	␊
1224	$self->push_raw(%entry);␊
1225	}␊
1226	␊
1227	# The same as push(), but assuming that msgid and msgstr are already escaped␊
1228	sub push_raw {␊
1229	my $self=shift;␊
1230	my %entry=@_;␊
1231	my ($msgid,$msgstr,$reference,$comment,$automatic,$previous,$flags,$type,$transref)=␊
1232	($entry{'msgid'},$entry{'msgstr'},␊
1233	$entry{'reference'},$entry{'comment'},$entry{'automatic'},␊
1234	$entry{'previous'},$entry{'flags'},$entry{'type'},$entry{'transref'});␊
1235	my $keep_conflict = $entry{'conflict'};␊
1236	␊
1237	# print STDERR "Push_raw\n";␊
1238	# print STDERR " msgid=>>>$msgid<<<\n" if $msgid;␊
1239	# print STDERR " msgstr=[[[$msgstr]]]\n" if $msgstr;␊
1240	# Carp::cluck " flags=$flags\n" if $flags;␊
1241	␊
1242	return unless defined($entry{'msgid'});␊
1243	␊
1244	#no msgid => header definition␊
1245	unless (length($entry{'msgid'})) {␊
1246	# if (defined($self->{header}) && $self->{header} =~ /\S/) {␊
1247	# warn dgettext("po4a","Redefinition of the header. ".␊
1248	# "The old one will be discarded\n");␊
1249	# } FIXME: do that iff the header isn't the default one.␊
1250	$self->{header}=$msgstr;␊
1251	$self->{header_comment}=$comment;␊
1252	my $charset = $self->get_charset;␊
1253	if ($charset ne "CHARSET") {␊
1254	$self->{encoder}=find_encoding($charset);␊
1255	} else {␊
1256	$self->{encoder}=find_encoding("ascii");␊
1257	}␊
1258	return;␊
1259	}␊
1260	␊
1261	if ($self->{options}{'porefs'} =~ m/^none/) {␊
1262	$reference = "";␊
1263	} elsif ($self->{options}{'porefs'} =~ m/^counter/) {␊
1264	if ($reference =~ m/^(.+?)(?=\S+:\d+)/g) {␊
1265	my $new_ref = $1;␊
1266	1 while $reference =~ s{ # x modifier is added to add formatting and improve readability␊
1267	\G(\s*)(\S+):\d+ # \G is the last match in m//g (see also the (?=) syntax above)␊
1268	# $2 is the file name␊
1269	}{␊
1270	$self->{counter}{$2} \|\|= 0, # each file has its own counter␊
1271	++$self->{counter}{$2}, # increment it␊
1272	$new_ref .= "$1$2:".$self->{counter}{$2} # replace line number by this counter␊
1273	}gex && pos($reference);␊
1274	$reference = $new_ref;␊
1275	}␊
1276	} elsif ($self->{options}{'porefs'} =~ m/^noline/) {␊
1277	$reference =~ s/:\d+/:1/g;␊
1278	}␊
1279	␊
1280	if (defined($self->{po}{$msgid})) {␊
1281	warn wrap_mod("po4a::po",␊
1282	dgettext("po4a","msgid defined twice: %s"),␊
1283	$msgid)␊
1284	if (0); # FIXME: put a verbose stuff␊
1285	if ( defined $msgstr␊
1286	and defined $self->{po}{$msgid}{'msgstr'}␊
1287	and $self->{po}{$msgid}{'msgstr'} ne $msgstr) {␊
1288	my $txt=quote_text($msgid);␊
1289	my ($first,$second)=␊
1290	(format_comment(". ",$self->{po}{$msgid}{'reference'}).␊
1291	quote_text($self->{po}{$msgid}{'msgstr'}),␊
1292	␊
1293	format_comment(". ",$reference).␊
1294	quote_text($msgstr));␊
1295	␊
1296	if ($keep_conflict) {␊
1297	if ($self->{po}{$msgid}{'msgstr'} =~ m/^#-#-#-#-# .* #-#-#-#-#\\n/s) {␊
1298	$msgstr = $self->{po}{$msgid}{'msgstr'}.␊
1299	"\\n#-#-#-#-# $transref #-#-#-#-#\\n".␊
1300	$msgstr;␊
1301	} else {␊
1302	$msgstr = "#-#-#-#-# ".␊
1303	$self->{po}{$msgid}{'transref'}.␊
1304	" #-#-#-#-#\\n".␊
1305	$self->{po}{$msgid}{'msgstr'}."\\n".␊
1306	"#-#-#-#-# $transref #-#-#-#-#\\n".␊
1307	$msgstr;␊
1308	}␊
1309	# Every msgid will have the same list of references.␊
1310	# Only keep the last list.␊
1311	$self->{po}{$msgid}{'reference'} = "";␊
1312	} else {␊
1313	warn wrap_msg(dgettext("po4a",␊
1314	"Translations don't match for:\n".␊
1315	"%s\n".␊
1316	"-->First translation:\n".␊
1317	"%s\n".␊
1318	" Second translation:\n".␊
1319	"%s\n".␊
1320	" Old translation discarded."),␊
1321	$txt,$first,$second);␊
1322	}␊
1323	}␊
1324	}␊
1325	if (defined $transref) {␊
1326	$self->{po}{$msgid}{'transref'} = $transref;␊
1327	}␊
1328	if (defined($reference) && length($reference)) {␊
1329	if (defined $self->{po}{$msgid}{'reference'}) {␊
1330	$self->{po}{$msgid}{'reference'} .= " ".$reference;␊
1331	} else {␊
1332	$self->{po}{$msgid}{'reference'} = $reference;␊
1333	}␊
1334	}␊
1335	$self->{po}{$msgid}{'msgstr'} = $msgstr;␊
1336	$self->{po}{$msgid}{'comment'} = $comment;␊
1337	$self->{po}{$msgid}{'automatic'} = $automatic;␊
1338	$self->{po}{$msgid}{'previous'} = $previous;␊
1339	if (defined($self->{po}{$msgid}{'pos_doc'})) {␊
1340	$self->{po}{$msgid}{'pos_doc'} .= " ".$self->{count_doc}++;␊
1341	} else {␊
1342	$self->{po}{$msgid}{'pos_doc'} = $self->{count_doc}++;␊
1343	}␊
1344	unless (defined($self->{po}{$msgid}{'pos'})) {␊
1345	$self->{po}{$msgid}{'pos'} = $self->{count}++;␊
1346	}␊
1347	$self->{po}{$msgid}{'type'} = $type;␊
1348	$self->{po}{$msgid}{'plural'} = $entry{'plural'}␊
1349	if defined $entry{'plural'};␊
1350	␊
1351	if (defined($flags)) {␊
1352	$flags = " $flags ";␊
1353	$flags =~ s/,/ /g;␊
1354	foreach my $flag (@known_flags) {␊
1355	if ($flags =~ /\s$flag\s/) { # if flag to be set␊
1356	unless ( defined($self->{po}{$msgid}{'flags'})␊
1357	&& $self->{po}{$msgid}{'flags'} =~ /\b$flag\b/) {␊
1358	# flag not already set␊
1359	if (defined $self->{po}{$msgid}{'flags'}) {␊
1360	$self->{po}{$msgid}{'flags'} .= " ".$flag;␊
1361	} else {␊
1362	$self->{po}{$msgid}{'flags'} = $flag;␊
1363	}␊
1364	}␊
1365	}␊
1366	}␊
1367	}␊
1368	# print STDERR "stored ((($msgid)))=>(((".$self->{po}{$msgid}{'msgstr'}.")))\n\n";␊
1369	␊
1370	}␊
1371	␊
1372	=back␊
1373	␊
1374	=head1 Miscellaneous functions␊
1375	␊
1376	=over 4␊
1377	␊
1378	=item count_entries()␊
1379	␊
1380	Returns the number of entries in the catalog (without the header).␊
1381	␊
1382	=cut␊
1383	␊
1384	sub count_entries($) {␊
1385	my $self=shift;␊
1386	return $self->{count};␊
1387	}␊
1388	␊
1389	=item count_entries_doc()␊
1390	␊
1391	Returns the number of entries in document. If a string appears multiple times␊
1392	in the document, it will be counted multiple times␊
1393	␊
1394	=cut␊
1395	␊
1396	sub count_entries_doc($) {␊
1397	my $self=shift;␊
1398	return $self->{count_doc};␊
1399	}␊
1400	␊
1401	=item msgid($)␊
1402	␊
1403	Returns the msgid of the given number.␊
1404	␊
1405	=cut␊
1406	␊
1407	sub msgid($$) {␊
1408	my $self=shift;␊
1409	my $num=shift;␊
1410	␊
1411	foreach my $msgid ( keys %{$self->{po}} ) {␊
1412	return $msgid if ($self->{po}{$msgid}{'pos'} eq $num);␊
1413	}␊
1414	return undef;␊
1415	}␊
1416	␊
1417	=item msgid_doc($)␊
1418	␊
1419	Returns the msgid with the given position in the document.␊
1420	␊
1421	=cut␊
1422	␊
1423	sub msgid_doc($$) {␊
1424	my $self=shift;␊
1425	my $num=shift;␊
1426	␊
1427	foreach my $msgid ( keys %{$self->{po}} ) {␊
1428	foreach my $pos (split / /, $self->{po}{$msgid}{'pos_doc'}) {␊
1429	return $msgid if ($pos eq $num);␊
1430	}␊
1431	}␊
1432	return undef;␊
1433	}␊
1434	␊
1435	=item get_charset()␊
1436	␊
1437	Returns the character set specified in the PO header. If it hasn't been␊
1438	set, it will return "CHARSET".␊
1439	␊
1440	=cut␊
1441	␊
1442	sub get_charset() {␊
1443	my $self=shift;␊
1444	␊
1445	$self->{header} =~ /charset=(.*?)[\s\\]/;␊
1446	␊
1447	if (defined $1) {␊
1448	return $1;␊
1449	} else {␊
1450	return "CHARSET";␊
1451	}␊
1452	}␊
1453	␊
1454	=item set_charset($)␊
1455	␊
1456	This sets the character set of the PO header to the value specified in its␊
1457	first argument. If you never call this function (and no file with a specified␊
1458	character set is read), the default value is left to "CHARSET". This value␊
1459	doesn't change the behavior of this module, it's just used to fill that field␊
1460	in the header, and to return it in get_charset().␊
1461	␊
1462	=cut␊
1463	␊
1464	sub set_charset() {␊
1465	my $self=shift;␊
1466	␊
1467	my ($newchar,$oldchar);␊
1468	$newchar = shift;␊
1469	$oldchar = $self->get_charset();␊
1470	␊
1471	$self->{header} =~ s/$oldchar/$newchar/;␊
1472	$self->{encoder}=find_encoding($newchar);␊
1473	}␊
1474	␊
1475	#----[ helper functions ]---------------------------------------------------␊
1476	␊
1477	# transforme the string from its PO file representation to the form which␊
1478	# should be used to print it␊
1479	sub unescape_text {␊
1480	my $text = shift;␊
1481	␊
1482	print STDERR "\nunescape [$text]====" if $debug{'escape'};␊
1483	$text = join("",split(/\n/,$text));␊
1484	$text =~ s/\\"/"/g;␊
1485	# unescape newlines␊
1486	# NOTE on \G:␊
1487	# The following regular expression introduce newlines.␊
1488	# Thus, ^ doesn't match all beginnings of lines.␊
1489	# \G is a zero-width assertion that matches the position␊
1490	# of the previous substitution with s///g. As every␊
1491	# substitution ends by a newline, it always matches a␊
1492	# position just after a newline.␊
1493	$text =~ s/( # $1:␊
1494	(\G\|[^\\]) # beginning of the line or any char␊
1495	# different from '\'␊
1496	(\\\\)* # followed by any even number of '\'␊
1497	)\\n # and followed by an escaped newline␊
1498	/$1\n/sgx; # single string, match globally, allow comments␊
1499	# unescape carriage returns␊
1500	$text =~ s/( # $1:␊
1501	(\G\|[^\\]) # beginning of the line or any char␊
1502	# different from '\'␊
1503	(\\\\)* # followed by any even number of '\'␊
1504	)\\r # and followed by an escaped carriage return␊
1505	/$1\r/sgx; # single string, match globally, allow comments␊
1506	# unescape tabulations␊
1507	$text =~ s/( # $1:␊
1508	(\G\|[^\\])# beginning of the line or any char␊
1509	# different from '\'␊
1510	(\\\\)* # followed by any even number of '\'␊
1511	)\\t # and followed by an escaped tabulation␊
1512	/$1\t/mgx; # multilines string, match globally, allow comments␊
1513	# and unescape the escape character␊
1514	$text =~ s/\\\\/\\/g;␊
1515	print STDERR ">$text<\n" if $debug{'escape'};␊
1516	␊
1517	return $text;␊
1518	}␊
1519	␊
1520	# transform the string to its representation as it should be written in PO␊
1521	# files␊
1522	sub escape_text {␊
1523	my $text = shift;␊
1524	␊
1525	print STDERR "\nescape [$text]====" if $debug{'escape'};␊
1526	$text =~ s/\\/\\\\/g;␊
1527	$text =~ s/"/\\"/g;␊
1528	$text =~ s/\n/\\n/g;␊
1529	$text =~ s/\r/\\r/g;␊
1530	$text =~ s/\t/\\t/g;␊
1531	print STDERR ">$text<\n" if $debug{'escape'};␊
1532	␊
1533	return $text;␊
1534	}␊
1535	␊
1536	# put quotes around the string on each lines (without escaping it)␊
1537	# It does also normalize the text (ie, make sure its representation is wraped␊
1538	# on the 80th char, but without changing the meaning of the string)␊
1539	sub quote_text {␊
1540	my $string = shift;␊
1541	␊
1542	return '""' unless defined($string) && length($string);␊
1543	␊
1544	print STDERR "\nquote [$string]====" if $debug{'quote'};␊
1545	# break lines on newlines, if any␊
1546	# see unescape_text for an explanation on \G␊
1547	$string =~ s/( # $1:␊
1548	(\G\|[^\\]) # beginning of the line or any char␊
1549	# different from '\'␊
1550	(\\\\)* # followed by any even number of '\'␊
1551	\\n) # and followed by an escaped newline␊
1552	/$1\n/sgx; # single string, match globally, allow comments␊
1553	$string = wrap($string);␊
1554	my @string = split(/\n/,$string);␊
1555	$string = join ("\"\n\"",@string);␊
1556	$string = "\"$string\"";␊
1557	if (scalar @string > 1 && $string[0] ne '') {␊
1558	$string = "\"\"\n".$string;␊
1559	}␊
1560	␊
1561	print STDERR ">$string<\n" if $debug{'quote'};␊
1562	return $string;␊
1563	}␊
1564	␊
1565	# undo the work of the quote_text function␊
1566	sub unquote_text {␊
1567	my $string = shift;␊
1568	print STDERR "\nunquote [$string]====" if $debug{'quote'};␊
1569	$string =~ s/^""\\n//s;␊
1570	$string =~ s/^"(.*)"$/$1/s;␊
1571	$string =~ s/"\n"//gm;␊
1572	# Note: an even number of '\' could precede \\n, but I could not build a␊
1573	# document to test this␊
1574	$string =~ s/([^\\])\\n\n/$1!!DUMMYPOPM!!/gm;␊
1575	$string =~ s\|!!DUMMYPOPM!!\|\\n\|gm;␊
1576	print STDERR ">$string<\n" if $debug{'quote'};␊
1577	return $string;␊
1578	}␊
1579	␊
1580	# canonize the string: write it on only one line, changing consecutive␊
1581	# whitespace to only one space.␊
1582	# Warning, it changes the string and should only be called if the string is␊
1583	# plain text␊
1584	sub canonize {␊
1585	my $text=shift;␊
1586	print STDERR "\ncanonize [$text]====" if $debug{'canonize'};␊
1587	$text =~ s/^ *//s;␊
1588	$text =~ s/^[ \t]+/ /gm;␊
1589	# if ($text eq "\n"), it messed up the first string (header)␊
1590	$text =~ s/\n/ /gm if ($text ne "\n");␊
1591	$text =~ s/([.)]) +/$1 /gm;␊
1592	$text =~ s/([^.)]) */$1 /gm;␊
1593	$text =~ s/ *$//s;␊
1594	print STDERR ">$text<\n" if $debug{'canonize'};␊
1595	return $text;␊
1596	}␊
1597	␊
1598	# wraps the string. We don't use Text::Wrap since it mangles whitespace at␊
1599	# the end of splited line␊
1600	sub wrap {␊
1601	my $text=shift;␊
1602	return "0" if ($text eq '0');␊
1603	my $col=shift \|\| 76;␊
1604	my @lines=split(/\n/,"$text");␊
1605	my $res="";␊
1606	my $first=1;␊
1607	while (defined(my $line=shift @lines)) {␊
1608	if ($first && length($line) > $col - 10) {␊
1609	unshift @lines,$line;␊
1610	$first=0;␊
1611	next;␊
1612	}␊
1613	if (length($line) > $col) {␊
1614	my $pos=rindex($line," ",$col);␊
1615	while (substr($line,$pos-1,1) eq '.' && $pos != -1) {␊
1616	$pos=rindex($line," ",$pos-1);␊
1617	}␊
1618	if ($pos == -1) {␊
1619	# There are no spaces in the first $col chars, pick-up the␊
1620	# first space␊
1621	$pos = index($line," ");␊
1622	}␊
1623	if ($pos != -1) {␊
1624	my $end=substr($line,$pos+1);␊
1625	$line=substr($line,0,$pos+1);␊
1626	if ($end =~ s/^( +)//) {␊
1627	$line .= $1;␊
1628	}␊
1629	unshift @lines,$end;␊
1630	}␊
1631	}␊
1632	$first=0;␊
1633	$res.="$line\n";␊
1634	}␊
1635	# Restore the original trailing spaces␊
1636	$res =~ s/\s+$//s;␊
1637	if ($text =~ m/(\s+)$/s) {␊
1638	$res .= $1;␊
1639	}␊
1640	return $res;␊
1641	}␊
1642	␊
1643	# outputs properly a '# ... ' line to be put in the PO file␊
1644	sub format_comment {␊
1645	my $comment=shift;␊
1646	my $char=shift;␊
1647	my $result = "#". $char . $comment;␊
1648	$result =~ s/\n/\n#$char/gs;␊
1649	$result =~ s/^#$char$/#/gm;␊
1650	$result .= "\n";␊
1651	return $result;␊
1652	}␊
1653	␊
1654	␊
1655	1;␊
1656	__END__␊
1657	␊
1658	=back␊
1659	␊
1660	=head1 AUTHORS␊
1661	␊
1662	Denis Barbier <barbier@linuxfr.org>␊
1663	Martin Quinson (mquinson#debian.org)␊
1664	␊
1665	=cut␊
1666

Download this file