trunk/package/bin/po4a/lib/Locale/Po4a/TransTractor.pm - Chameleon Svn Source Tree - Chameleon open source boot loader project.

Root/trunk/package/bin/po4a/lib/Locale/Po4a/TransTractor.pm

1	#!/usr/bin/perl -w␊
2	␊
3	require Exporter;␊
4	␊
5	package Locale::Po4a::TransTractor;␊
6	use DynaLoader;␊
7	␊
8	use 5.006;␊
9	use strict;␊
10	use warnings;␊
11	␊
12	use subs qw(makespace);␊
13	use vars qw($VERSION @ISA @EXPORT);␊
14	$VERSION="0.44";␊
15	@ISA = qw(DynaLoader);␊
16	@EXPORT = qw(new process translate␊
17	read write readpo writepo␊
18	getpoout setpoout get_out_charset);␊
19	␊
20	# Try to use a C extension if present.␊
21	eval("bootstrap Locale::Po4a::TransTractor $VERSION");␊
22	␊
23	use Carp qw(croak);␊
24	use Locale::Po4a::Po;␊
25	use Locale::Po4a::Common;␊
26	␊
27	use File::Path; # mkdir before write␊
28	␊
29	use Encode;␊
30	use Encode::Guess;␊
31	␊
32	=encoding UTF-8␊
33	␊
34	=head1 NAME␊
35	␊
36	Locale::Po4a::TransTractor - generic trans(lator ex)tractor.␊
37	␊
38	=head1 DESCRIPTION␊
39	␊
40	The po4a (PO for anything) project goal is to ease translations (and more␊
41	interestingly, the maintenance of translations) using gettext tools on␊
42	areas where they were not expected like documentation.␊
43	␊
44	This class is the ancestor of every po4a parser used to parse a document, to␊
45	search translatable strings, to extract them to a PO file and to replace them by␊
46	their translation in the output document.␊
47	␊
48	More formally, it takes the following arguments as input:␊
49	␊
50	=over 2␊
51	␊
52	=item -␊
53	␊
54	a document to translate;␊
55	␊
56	=item -␊
57	␊
58	a PO file containing the translations to use.␊
59	␊
60	=back␊
61	␊
62	As output, it produces:␊
63	␊
64	=over 2␊
65	␊
66	=item -␊
67	␊
68	another PO file, resulting of the extraction of translatable strings from␊
69	the input document;␊
70	␊
71	=item -␊
72	␊
73	a translated document, with the same structure than the one in input, but␊
74	with all translatable strings replaced with the translations found in the␊
75	PO file provided in input.␊
76	␊
77	=back␊
78	␊
79	Here is a graphical representation of this:␊
80	␊
81	Input document --\ /---> Output document␊
82	\ / (translated)␊
83	+-> parse() function -----+␊
84	/ \␊
85	Input PO --------/ \---> Output PO␊
86	(extracted)␊
87	␊
88	=head1 FUNCTIONS YOUR PARSER SHOULD OVERRIDE␊
89	␊
90	=over 4␊
91	␊
92	=item parse()␊
93	␊
94	This is where all the work takes place: the parsing of input documents, the␊
95	generation of output, and the extraction of the translatable strings. This␊
96	is pretty simple using the provided functions presented in the section␊
97	B<INTERNAL FUNCTIONS> below. See also the B<SYNOPSIS>, which presents an␊
98	example.␊
99	␊
100	This function is called by the process() function below, but if you choose␊
101	to use the new() function, and to add content manually to your document,␊
102	you will have to call this function yourself.␊
103	␊
104	=item docheader()␊
105	␊
106	This function returns the header we should add to the produced document,␊
107	quoted properly to be a comment in the target language. See the section␊
108	B<Educating developers about translations>, from L<po4a(7)\|po4a.7>, for what␊
109	it is good for.␊
110	␊
111	=back␊
112	␊
113	=cut␊
114	␊
115	sub docheader {}␊
116	␊
117	sub parse {}␊
118	␊
119	=head1 SYNOPSIS␊
120	␊
121	The following example parses a list of paragraphs beginning with "<p>". For the sake␊
122	of simplicity, we assume that the document is well formatted, i.e. that '<p>'␊
123	tags are the only tags present, and that this tag is at the very beginning␊
124	of each paragraph.␊
125	␊
126	sub parse {␊
127	my $self = shift;␊
128	␊
129	PARAGRAPH: while (1) {␊
130	my ($paragraph,$pararef)=("","");␊
131	my $first=1;␊
132	my ($line,$lref)=$self->shiftline();␊
133	while (defined($line)) {␊
134	if ($line =~ m/<p>/ && !$first--; ) {␊
135	# Not the first time we see <p>.␊
136	# Reput the current line in input,␊
137	# and put the built paragraph to output␊
138	$self->unshiftline($line,$lref);␊
139	␊
140	# Now that the document is formed, translate it:␊
141	# - Remove the leading tag␊
142	$paragraph =~ s/^<p>//s;␊
143	␊
144	# - push to output the leading tag (untranslated) and the␊
145	# rest of the paragraph (translated)␊
146	$self->pushline( "<p>"␊
147	. $document->translate($paragraph,$pararef)␊
148	);␊
149	␊
150	next PARAGRAPH;␊
151	} else {␊
152	# Append to the paragraph␊
153	$paragraph .= $line;␊
154	$pararef = $lref unless(length($pararef));␊
155	}␊
156	␊
157	# Reinit the loop␊
158	($line,$lref)=$self->shiftline();␊
159	}␊
160	# Did not get a defined line? End of input file.␊
161	return;␊
162	}␊
163	}␊
164	␊
165	Once you've implemented the parse function, you can use your document␊
166	class, using the public interface presented in the next section.␊
167	␊
168	=head1 PUBLIC INTERFACE for scripts using your parser␊
169	␊
170	=head2 Constructor␊
171	␊
172	=over 4␊
173	␊
174	=item process(%)␊
175	␊
176	This function can do all you need to do with a po4a document in one␊
177	invocation. Its arguments must be packed as a hash. ACTIONS:␊
178	␊
179	=over 3␊
180	␊
181	=item a.␊
182	␊
183	Reads all the PO files specified in po_in_name␊
184	␊
185	=item b.␊
186	␊
187	Reads all original documents specified in file_in_name␊
188	␊
189	=item c.␊
190	␊
191	Parses the document␊
192	␊
193	=item d.␊
194	␊
195	Reads and applies all the addenda specified␊
196	␊
197	=item e.␊
198	␊
199	Writes the translated document to file_out_name (if given)␊
200	␊
201	=item f.␊
202	␊
203	Writes the extracted PO file to po_out_name (if given)␊
204	␊
205	=back␊
206	␊
207	ARGUMENTS, beside the ones accepted by new() (with expected type):␊
208	␊
209	=over 4␊
210	␊
211	=item file_in_name (@)␊
212	␊
213	List of filenames where we should read the input document.␊
214	␊
215	=item file_in_charset ($)␊
216	␊
217	Charset used in the input document (if it isn't specified, it will try␊
218	to detect it from the input document).␊
219	␊
220	=item file_out_name ($)␊
221	␊
222	Filename where we should write the output document.␊
223	␊
224	=item file_out_charset ($)␊
225	␊
226	Charset used in the output document (if it isn't specified, it will use␊
227	the PO file charset).␊
228	␊
229	=item po_in_name (@)␊
230	␊
231	List of filenames where we should read the input PO files from, containing␊
232	the translation which will be used to translate the document.␊
233	␊
234	=item po_out_name ($)␊
235	␊
236	Filename where we should write the output PO file, containing the strings␊
237	extracted from the input document.␊
238	␊
239	=item addendum (@)␊
240	␊
241	List of filenames where we should read the addenda from.␊
242	␊
243	=item addendum_charset ($)␊
244	␊
245	Charset for the addenda.␊
246	␊
247	=back␊
248	␊
249	=item new(%)␊
250	␊
251	Create a new po4a document. Accepted options (but be in a hash):␊
252	␊
253	=over 4␊
254	␊
255	=item verbose ($)␊
256	␊
257	Sets the verbosity.␊
258	␊
259	=item debug ($)␊
260	␊
261	Sets the debugging.␊
262	␊
263	=back␊
264	␊
265	=cut␊
266	␊
267	sub process {␊
268	## Determine if we were called via an object-ref or a classname␊
269	my $self = shift;␊
270	␊
271	## Any remaining arguments are treated as initial values for the␊
272	## hash that is used to represent this object.␊
273	my %params = @_;␊
274	␊
275	# Build the args for new()␊
276	my %newparams = ();␊
277	foreach (keys %params) {␊
278	next if ($_ eq 'po_in_name' \|\|␊
279	$_ eq 'po_out_name' \|\|␊
280	$_ eq 'file_in_name' \|\|␊
281	$_ eq 'file_in_charset' \|\|␊
282	$_ eq 'file_out_name' \|\|␊
283	$_ eq 'file_out_charset' \|\|␊
284	$_ eq 'addendum' \|\|␊
285	$_ eq 'addendum_charset');␊
286	$newparams{$_}=$params{$_};␊
287	}␊
288	␊
289	$self->detected_charset($params{'file_in_charset'});␊
290	$self->{TT}{'file_out_charset'}=$params{'file_out_charset'};␊
291	if (defined($self->{TT}{'file_out_charset'}) and␊
292	length($self->{TT}{'file_out_charset'})) {␊
293	$self->{TT}{'file_out_encoder'} = find_encoding($self->{TT}{'file_out_charset'});␊
294	}␊
295	$self->{TT}{'addendum_charset'}=$params{'addendum_charset'};␊
296	␊
297	chdir $params{'srcdir'}␊
298	if (defined $params{'srcdir'});␊
299	foreach my $file (@{$params{'po_in_name'}}) {␊
300	print STDERR "readpo($file)... " if $self->debug();␊
301	$self->readpo($file);␊
302	print STDERR "done.\n" if $self->debug()␊
303	}␊
304	foreach my $file (@{$params{'file_in_name'}}) {␊
305	print STDERR "read($file)..." if $self->debug();␊
306	$self->read($file);␊
307	print STDERR "done.\n" if $self->debug();␊
308	}␊
309	print STDERR "parse..." if $self->debug();␊
310	$self->parse();␊
311	print STDERR "done.\n" if $self->debug();␊
312	foreach my $file (@{$params{'addendum'}}) {␊
313	print STDERR "addendum($file)..." if $self->debug();␊
314	$self->addendum($file) \|\| die "An addendum failed\n";␊
315	print STDERR "done.\n" if $self->debug();␊
316	}␊
317	chdir $params{'destdir'}␊
318	if (defined $params{'destdir'});␊
319	if (defined $params{'file_out_name'}) {␊
320	print STDERR "write(".$params{'file_out_name'}.")... "␊
321	if $self->debug();␊
322	$self->write($params{'file_out_name'});␊
323	print STDERR "done.\n" if $self->debug();␊
324	}␊
325	chdir $params{'srcdir'}␊
326	if (defined $params{'srcdir'});␊
327	if (defined $params{'po_out_name'}) {␊
328	print STDERR "writepo(".$params{'po_out_name'}.")... "␊
329	if $self->debug();␊
330	$self->writepo($params{'po_out_name'});␊
331	print STDERR "done.\n" if $self->debug();␊
332	}␊
333	chdir $params{'calldir'}␊
334	if (defined $params{'calldir'});␊
335	return $self;␊
336	}␊
337	␊
338	sub new {␊
339	## Determine if we were called via an object-ref or a classname␊
340	my $this = shift;␊
341	my $class = ref($this) \|\| $this;␊
342	my $self = { };␊
343	my %options=@_;␊
344	## Bless ourselves into the desired class and perform any initialization␊
345	bless $self, $class;␊
346	␊
347	## initialize the plugin␊
348	# prevent the plugin from croaking on the options intended for Po.pm␊
349	$self->{options}{'porefs'} = '';␊
350	$self->{options}{'copyright-holder'} = '';␊
351	$self->{options}{'msgid-bugs-address'} = '';␊
352	$self->{options}{'package-name'} = '';␊
353	$self->{options}{'package-version'} = '';␊
354	# let the plugin parse the options and such␊
355	$self->initialize(%options);␊
356	␊
357	## Create our private data␊
358	my %po_options;␊
359	$po_options{'porefs'} = $self->{options}{'porefs'};␊
360	$po_options{'copyright-holder'} = $options{'copyright-holder'};␊
361	$po_options{'msgid-bugs-address'} = $options{'msgid-bugs-address'};␊
362	$po_options{'package-name'} = $options{'package-name'};␊
363	$po_options{'package-version'} = $options{'package-version'};␊
364	␊
365	# private data␊
366	$self->{TT}=();␊
367	$self->{TT}{po_in}=Locale::Po4a::Po->new(\%po_options);␊
368	$self->{TT}{po_out}=Locale::Po4a::Po->new(\%po_options);␊
369	# Warning, this is an array of array:␊
370	# The document is splited on lines, and for each␊
371	# [0] is the line content, [1] is the reference [2] the type␊
372	$self->{TT}{doc_in}=();␊
373	$self->{TT}{doc_out}=();␊
374	if (defined $options{'verbose'}) {␊
375	$self->{TT}{verbose} = $options{'verbose'};␊
376	}␊
377	if (defined $options{'debug'}) {␊
378	$self->{TT}{debug} = $options{'debug'};␊
379	}␊
380	# Input document is in ascii until we prove the opposite (in read())␊
381	$self->{TT}{ascii_input}=1;␊
382	# We try not to use utf unless it's forced from the outside (in case the␊
383	# document isn't in ascii)␊
384	$self->{TT}{utf_mode}=0;␊
385	␊
386	return $self;␊
387	}␊
388	␊
389	=back␊
390	␊
391	=head2 Manipulating document files␊
392	␊
393	=over 4␊
394	␊
395	=item read($)␊
396	␊
397	Add another input document at the end of the existing one. The argument is␊
398	the filename to read.␊
399	␊
400	Please note that it does not parse anything. You should use the parse()␊
401	function when you're done with packing input files into the document.␊
402	␊
403	=cut␊
404	␊
405	#'␊
406	sub read() {␊
407	my $self=shift;␊
408	my $filename=shift␊
409	or croak wrap_msg(dgettext("po4a", "Can't read from file without having a filename"));␊
410	my $linenum=0;␊
411	␊
412	open INPUT,"<$filename"␊
413	or croak wrap_msg(dgettext("po4a", "Can't read from %s: %s"), $filename, $!);␊
414	while (defined (my $textline = <INPUT>)) {␊
415	$linenum++;␊
416	my $ref="$filename:$linenum";␊
417	$textline =~ s/\r$//;␊
418	my @entry=($textline,$ref);␊
419	push @{$self->{TT}{doc_in}}, @entry;␊
420	␊
421	if (!defined($self->{TT}{'file_in_charset'})) {␊
422	# Detect if this file has non-ascii characters␊
423	if($self->{TT}{ascii_input}) {␊
424	my $decoder = guess_encoding($textline);␊
425	if (!ref($decoder) or $decoder !~ /Encode::XS=/) {␊
426	# We have detected a non-ascii line␊
427	$self->{TT}{ascii_input} = 0;␊
428	# Save the reference for future error message␊
429	$self->{TT}{non_ascii_ref} \|\|= $ref;␊
430	}␊
431	}␊
432	}␊
433	}␊
434	close INPUT␊
435	or croak wrap_msg(dgettext("po4a", "Can't close %s after reading: %s"), $filename, $!);␊
436	␊
437	}␊
438	␊
439	=item write($)␊
440	␊
441	Write the translated document to the given filename.␊
442	␊
443	=cut␊
444	␊
445	sub write {␊
446	my $self=shift;␊
447	my $filename=shift␊
448	or croak wrap_msg(dgettext("po4a", "Can't write to a file without filename"));␊
449	␊
450	my $fh;␊
451	if ($filename eq '-') {␊
452	$fh=\*STDOUT;␊
453	} else {␊
454	# make sure the directory in which we should write the localized file exists␊
455	my $dir = $filename;␊
456	if ($dir =~ m\|/\|) {␊
457	$dir =~ s\|/[^/]*$\|\|;␊
458	␊
459	File::Path::mkpath($dir, 0, 0755) # Croaks on error␊
460	if (length ($dir) && ! -e $dir);␊
461	}␊
462	open $fh,">$filename"␊
463	or croak wrap_msg(dgettext("po4a", "Can't write to %s: %s"), $filename, $!);␊
464	}␊
465	␊
466	map { print $fh $_ } $self->docheader();␊
467	map { print $fh $_ } @{$self->{TT}{doc_out}};␊
468	␊
469	if ($filename ne '-') {␊
470	close $fh or croak wrap_msg(dgettext("po4a", "Can't close %s after writing: %s"), $filename, $!);␊
471	}␊
472	␊
473	}␊
474	␊
475	=back␊
476	␊
477	=head2 Manipulating PO files␊
478	␊
479	=over 4␊
480	␊
481	=item readpo($)␊
482	␊
483	Add the content of a file (which name is passed as argument) to the␊
484	existing input PO. The old content is not discarded.␊
485	␊
486	=item writepo($)␊
487	␊
488	Write the extracted PO file to the given filename.␊
489	␊
490	=item stats()␊
491	␊
492	Returns some statistics about the translation done so far. Please note that␊
493	it's not the same statistics than the one printed by msgfmt␊
494	--statistic. Here, it's stats about recent usage of the PO file, while␊
495	msgfmt reports the status of the file. It is a wrapper to the␊
496	Locale::Po4a::Po::stats_get function applied to the input PO file. Example␊
497	of use:␊
498	␊
499	[normal use of the po4a document...]␊
500	␊
501	($percent,$hit,$queries) = $document->stats();␊
502	print "We found translations for $percent\% ($hit from $queries) of strings.\n";␊
503	␊
504	=back␊
505	␊
506	=cut␊
507	␊
508	sub getpoout {␊
509	return $_[0]->{TT}{po_out};␊
510	}␊
511	sub setpoout {␊
512	$_[0]->{TT}{po_out} = $_[1];␊
513	}␊
514	sub readpo {␊
515	$_[0]->{TT}{po_in}->read($_[1]);␊
516	}␊
517	sub writepo {␊
518	$_[0]->{TT}{po_out}->write( $_[1] );␊
519	}␊
520	sub stats {␊
521	return $_[0]->{TT}{po_in}->stats_get();␊
522	}␊
523	␊
524	=head2 Manipulating addenda␊
525	␊
526	=over 4␊
527	␊
528	=item addendum($)␊
529	␊
530	Please refer to L<po4a(7)\|po4a.7> for more information on what addenda are,␊
531	and how translators should write them. To apply an addendum to the translated␊
532	document, simply pass its filename to this function and you are done ;)␊
533	␊
534	This function returns a non-null integer on error.␊
535	␊
536	=cut␊
537	␊
538	# Internal function to read the header.␊
539	sub addendum_parse {␊
540	my ($filename,$header)=shift;␊
541	␊
542	my ($errcode,$mode,$position,$boundary,$bmode,$content)=␊
543	(1,"","","","","");␊
544	␊
545	unless (open (INS, "<$filename")) {␊
546	warn wrap_msg(dgettext("po4a", "Can't read from %s: %s"), $filename, $!);␊
547	goto END_PARSE_ADDFILE;␊
548	}␊
549	␊
550	unless (defined ($header=<INS>) && $header) {␊
551	warn wrap_msg(dgettext("po4a", "Can't read po4a header from %s."), $filename);␊
552	goto END_PARSE_ADDFILE;␊
553	}␊
554	␊
555	unless ($header =~ s/PO4A-HEADER://i) {␊
556	warn wrap_msg(dgettext("po4a", "First line of %s does not look like a po4a header."), $filename);␊
557	goto END_PARSE_ADDFILE;␊
558	}␊
559	foreach my $part (split(/;/,$header)) {␊
560	unless ($part =~ m/^\s([^=])=(.*)$/) {␊
561	warn wrap_msg(dgettext("po4a", "Syntax error in po4a header of %s, near \"%s\""), $filename, $part);␊
562	goto END_PARSE_ADDFILE;␊
563	}␊
564	my ($key,$value)=($1,$2);␊
565	$key=lc($key);␊
566	if ($key eq 'mode') {␊
567	$mode=lc($value);␊
568	} elsif ($key eq 'position') {␊
569	$position=$value;␊
570	} elsif ($key eq 'endboundary') {␊
571	$boundary=$value;␊
572	$bmode='after';␊
573	} elsif ($key eq 'beginboundary') {␊
574	$boundary=$value;␊
575	$bmode='before';␊
576	} else {␊
577	warn wrap_msg(dgettext("po4a", "Invalid argument in the po4a header of %s: %s"), $filename, $key);␊
578	goto END_PARSE_ADDFILE;␊
579	}␊
580	}␊
581	␊
582	unless (length($mode)) {␊
583	warn wrap_msg(dgettext("po4a", "The po4a header of %s does not define the mode."), $filename);␊
584	goto END_PARSE_ADDFILE;␊
585	}␊
586	unless ($mode eq "before" \|\| $mode eq "after") {␊
587	warn wrap_msg(dgettext("po4a", "Mode invalid in the po4a header of %s: should be 'before' or 'after' not %s."), $filename, $mode);␊
588	goto END_PARSE_ADDFILE;␊
589	}␊
590	␊
591	unless (length($position)) {␊
592	warn wrap_msg(dgettext("po4a", "The po4a header of %s does not define the position."), $filename);␊
593	goto END_PARSE_ADDFILE;␊
594	}␊
595	unless ($mode eq "before" \|\| length($boundary)) {␊
596	warn wrap_msg(dgettext("po4a", "No ending boundary given in the po4a header, but mode=after."));␊
597	goto END_PARSE_ADDFILE;␊
598	}␊
599	␊
600	while (defined(my $line = <INS>)) {␊
601	$content .= $line;␊
602	}␊
603	close INS;␊
604	␊
605	$errcode=0;␊
606	END_PARSE_ADDFILE:␊
607	return ($errcode,$mode,$position,$boundary,$bmode,$content);␊
608	}␊
609	␊
610	sub mychomp {␊
611	my ($str) = shift;␊
612	chomp($str);␊
613	return $str;␊
614	}␊
615	␊
616	sub addendum {␊
617	my ($self,$filename) = @_;␊
618	␊
619	print STDERR "Apply addendum $filename..." if $self->debug();␊
620	unless ($filename) {␊
621	warn wrap_msg(dgettext("po4a",␊
622	"Can't apply addendum when not given the filename"));␊
623	return 0;␊
624	}␊
625	die wrap_msg(dgettext("po4a", "Addendum %s does not exist."), $filename)␊
626	unless -e $filename;␊
627	␊
628	my ($errcode,$mode,$position,$boundary,$bmode,$content)=␊
629	addendum_parse($filename);␊
630	return 0 if ($errcode);␊
631	␊
632	print STDERR "mode=$mode;pos=$position;bound=$boundary;bmode=$bmode;ctn=$content\n"␊
633	if $self->debug();␊
634	␊
635	# We only recode the addendum if an origin charset is specified, else we␊
636	# suppose it's already in the output document's charset␊
637	if (defined($self->{TT}{'addendum_charset'}) &&␊
638	length($self->{TT}{'addendum_charset'})) {␊
639	Encode::from_to($content,$self->{TT}{'addendum_charset'},␊
640	$self->get_out_charset);␊
641	}␊
642	␊
643	my $found = scalar grep { /$position/ } @{$self->{TT}{doc_out}};␊
644	if ($found == 0) {␊
645	warn wrap_msg(dgettext("po4a",␊
646	"No candidate position for the addendum %s."), $filename);␊
647	return 0;␊
648	}␊
649	if ($found > 1) {␊
650	warn wrap_msg(dgettext("po4a",␊
651	"More than one candidate position found for the addendum %s."), $filename);␊
652	return 0;␊
653	}␊
654	␊
655	if ($mode eq "before") {␊
656	if ($self->verbose() > 1 \|\| $self->debug() ) {␊
657	map { print STDERR wrap_msg(dgettext("po4a", "Addendum '%s' applied before this line: %s"), $filename, $_) if (/$position/);␊
658	} @{$self->{TT}{doc_out}};␊
659	}␊
660	@{$self->{TT}{doc_out}} = map { /$position/ ? ($content,$_) : $_␊
661	} @{$self->{TT}{doc_out}};␊
662	} else {␊
663	my @newres=();␊
664	␊
665	do {␊
666	# make sure it doesn't whine on empty document␊
667	my $line = scalar @{$self->{TT}{doc_out}} ? shift @{$self->{TT}{doc_out}} : "";␊
668	push @newres,$line;␊
669	my $outline=mychomp($line);␊
670	$outline =~ s/^[ \t]*//;␊
671	␊
672	if ($line =~ m/$position/) {␊
673	while ($line=shift @{$self->{TT}{doc_out}}) {␊
674	last if ($line=~/$boundary/);␊
675	push @newres,$line;␊
676	}␊
677	if (defined $line) {␊
678	if ($bmode eq 'before') {␊
679	print wrap_msg(dgettext("po4a",␊
680	"Addendum '%s' applied before this line: %s"),␊
681	$filename, $outline)␊
682	if ($self->verbose() > 1 \|\| $self->debug());␊
683	push @newres,$content;␊
684	push @newres,$line;␊
685	} else {␊
686	print wrap_msg(dgettext("po4a",␊
687	"Addendum '%s' applied after the line: %s."),␊
688	$filename, $outline)␊
689	if ($self->verbose() > 1 \|\| $self->debug());␊
690	push @newres,$line;␊
691	push @newres,$content;␊
692	}␊
693	} else {␊
694	print wrap_msg(dgettext("po4a", "Addendum '%s' applied at the end of the file."), $filename)␊
695	if ($self->verbose() > 1 \|\| $self->debug());␊
696	push @newres,$content;␊
697	}␊
698	}␊
699	} while (scalar @{$self->{TT}{doc_out}});␊
700	@{$self->{TT}{doc_out}} = @newres;␊
701	}␊
702	print STDERR "done.\n" if $self->debug();␊
703	return 1;␊
704	}␊
705	␊
706	=back␊
707	␊
708	=head1 INTERNAL FUNCTIONS used to write derivated parsers␊
709	␊
710	=head2 Getting input, providing output␊
711	␊
712	Four functions are provided to get input and return output. They are very␊
713	similar to shift/unshift and push/pop. The first pair is about input, while␊
714	the second is about output. Mnemonic: in input, you are interested in the␊
715	first line, what shift gives, and in output you want to add your result at␊
716	the end, like push does.␊
717	␊
718	=over 4␊
719	␊
720	=item shiftline()␊
721	␊
722	This function returns the next line of the doc_in to be parsed and its␊
723	reference (packed as an array).␊
724	␊
725	=item unshiftline($$)␊
726	␊
727	Unshifts a line of the input document and its reference.␊
728	␊
729	=item pushline($)␊
730	␊
731	Push a new line to the doc_out.␊
732	␊
733	=item popline()␊
734	␊
735	Pop the last pushed line from the doc_out.␊
736	␊
737	=back␊
738	␊
739	=cut␊
740	␊
741	sub shiftline {␊
742	my ($line,$ref)=(shift @{$_[0]->{TT}{doc_in}},␊
743	shift @{$_[0]->{TT}{doc_in}});␊
744	return ($line,$ref);␊
745	}␊
746	sub unshiftline {␊
747	my $self = shift;␊
748	unshift @{$self->{TT}{doc_in}},@_;␊
749	}␊
750	␊
751	sub pushline { push @{$_[0]->{TT}{doc_out}}, $_[1] if defined $_[1]; }␊
752	sub popline { return pop @{$_[0]->{TT}{doc_out}}; }␊
753	␊
754	=head2 Marking strings as translatable␊
755	␊
756	One function is provided to handle the text which should be translated.␊
757	␊
758	=over 4␊
759	␊
760	=item translate($$$)␊
761	␊
762	Mandatory arguments:␊
763	␊
764	=over 2␊
765	␊
766	=item -␊
767	␊
768	A string to translate␊
769	␊
770	=item -␊
771	␊
772	The reference of this string (i.e. position in inputfile)␊
773	␊
774	=item -␊
775	␊
776	The type of this string (i.e. the textual description of its structural role;␊
777	used in Locale::Po4a::Po::gettextization(); see also L<po4a(7)\|po4a.7>,␊
778	section B<Gettextization: how does it work?>)␊
779	␊
780	=back␊
781	␊
782	This function can also take some extra arguments. They must be organized as␊
783	a hash. For example:␊
784	␊
785	$self->translate("string","ref","type",␊
786	'wrap' => 1);␊
787	␊
788	=over␊
789	␊
790	=item B<wrap>␊
791	␊
792	boolean indicating whether we can consider that whitespaces in string are␊
793	not important. If yes, the function canonizes the string before looking for␊
794	a translation or extracting it, and wraps the translation.␊
795	␊
796	=item B<wrapcol>␊
797	␊
798	the column at which we should wrap (default: 76).␊
799	␊
800	=item B<comment>␊
801	␊
802	an extra comment to add to the entry.␊
803	␊
804	=back␊
805	␊
806	Actions:␊
807	␊
808	=over 2␊
809	␊
810	=item -␊
811	␊
812	Pushes the string, reference and type to po_out.␊
813	␊
814	=item -␊
815	␊
816	Returns the translation of the string (as found in po_in) so that the␊
817	parser can build the doc_out.␊
818	␊
819	=item -␊
820	␊
821	Handles the charsets to recode the strings before sending them to␊
822	po_out and before returning the translations.␊
823	␊
824	=back␊
825	␊
826	=back␊
827	␊
828	=cut␊
829	␊
830	sub translate {␊
831	my $self=shift;␊
832	my ($string,$ref,$type)=(shift,shift,shift);␊
833	my (%options)=@_;␊
834	␊
835	# my $validoption="wrap wrapcol";␊
836	# my %validoption;␊
837	␊
838	return "" unless defined($string) && length($string);␊
839	␊
840	# map { $validoption{$_}=1 } (split(/ /,$validoption));␊
841	# foreach (keys %options) {␊
842	# Carp::confess "internal error: translate() called with unknown arg $_. Valid options: $validoption"␊
843	# unless $validoption{$_};␊
844	# }␊
845	␊
846	my $in_charset;␊
847	if ($self->{TT}{ascii_input}) {␊
848	$in_charset = "ascii";␊
849	} else {␊
850	if (defined($self->{TT}{'file_in_charset'}) and␊
851	length($self->{TT}{'file_in_charset'}) and␊
852	$self->{TT}{'file_in_charset'} !~ m/ascii/i) {␊
853	$in_charset=$self->{TT}{'file_in_charset'};␊
854	} else {␊
855	# FYI, the document charset have to be determined before we see the first␊
856	# string to recode.␊
857	die wrap_mod("po4a", dgettext("po4a", "Couldn't determine the input document's charset. Please specify it on the command line. (non-ASCII char at %s)"), $self->{TT}{non_ascii_ref})␊
858	}␊
859	}␊
860	␊
861	if ($self->{TT}{po_in}->get_charset ne "CHARSET") {␊
862	$string = encode_from_to($string,␊
863	$self->{TT}{'file_in_encoder'},␊
864	$self->{TT}{po_in}{encoder});␊
865	}␊
866	␊
867	if (defined $options{'wrapcol'} && $options{'wrapcol'} < 0) {␊
868	# FIXME: should be the parameter given with --width␊
869	$options{'wrapcol'} = 76 + $options{'wrapcol'};␊
870	}␊
871	my $transstring = $self->{TT}{po_in}->gettext($string,␊
872	'wrap' => $options{'wrap'}\|\|0,␊
873	'wrapcol' => $options{'wrapcol'});␊
874	␊
875	if ($self->{TT}{po_in}->get_charset ne "CHARSET") {␊
876	my $out_encoder = $self->{TT}{'file_out_encoder'};␊
877	unless (defined $out_encoder) {␊
878	$out_encoder = find_encoding($self->get_out_charset)␊
879	}␊
880	$transstring = encode_from_to($transstring,␊
881	$self->{TT}{po_in}{encoder},␊
882	$out_encoder);␊
883	}␊
884	␊
885	# If the input document isn't completely in ascii, we should see what to␊
886	# do with the current string␊
887	unless ($self->{TT}{ascii_input}) {␊
888	my $out_charset = $self->{TT}{po_out}->get_charset;␊
889	# We set the output po charset␊
890	if ($out_charset eq "CHARSET") {␊
891	if ($self->{TT}{utf_mode}) {␊
892	$out_charset="UTF-8";␊
893	} else {␊
894	$out_charset=$in_charset;␊
895	}␊
896	$self->{TT}{po_out}->set_charset($out_charset);␊
897	}␊
898	if ( $in_charset !~ /^$out_charset$/i ) {␊
899	Encode::from_to($string,$in_charset,$out_charset);␊
900	if (defined($options{'comment'}) and length($options{'comment'})) {␊
901	Encode::from_to($options{'comment'},$in_charset,$out_charset);␊
902	}␊
903	}␊
904	}␊
905	␊
906	# the comments provided by the modules are automatic comments from the PO point of view␊
907	$self->{TT}{po_out}->push('msgid' => $string,␊
908	'reference' => $ref,␊
909	'type' => $type,␊
910	'automatic' => $options{'comment'},␊
911	'wrap' => $options{'wrap'}\|\|0,␊
912	'wrapcol' => $options{'wrapcol'});␊
913	␊
914	# if ($self->{TT}{po_in}->get_charset ne "CHARSET") {␊
915	# Encode::from_to($transstring,$self->{TT}{po_in}->get_charset,␊
916	# $self->get_out_charset);␊
917	# }␊
918	␊
919	if ($options{'wrap'}\|\|0) {␊
920	$transstring =~ s/( *)$//s;␊
921	my $trailing_spaces = $1\|\|"";␊
922	$transstring =~ s/(?<!\\) +$//gm;␊
923	$transstring .= $trailing_spaces;␊
924	}␊
925	␊
926	return $transstring;␊
927	}␊
928	␊
929	=head2 Misc functions␊
930	␊
931	=over 4␊
932	␊
933	=item verbose()␊
934	␊
935	Returns if the verbose option was passed during the creation of the␊
936	TransTractor.␊
937	␊
938	=cut␊
939	␊
940	sub verbose {␊
941	if (defined $_[1]) {␊
942	$_[0]->{TT}{verbose} = $_[1];␊
943	} else {␊
944	return $_[0]->{TT}{verbose} \|\| 0; # undef and 0 have the same meaning, but one generates warnings␊
945	}␊
946	}␊
947	␊
948	=item debug()␊
949	␊
950	Returns if the debug option was passed during the creation of the␊
951	TransTractor.␊
952	␊
953	=cut␊
954	␊
955	sub debug {␊
956	return $_[0]->{TT}{debug};␊
957	}␊
958	␊
959	=item detected_charset($)␊
960	␊
961	This tells TransTractor that a new charset (the first argument) has been␊
962	detected from the input document. It can usually be read from the document␊
963	header. Only the first charset will remain, coming either from the␊
964	process() arguments or detected from the document.␊
965	␊
966	=cut␊
967	␊
968	sub detected_charset {␊
969	my ($self,$charset)=(shift,shift);␊
970	unless (defined($self->{TT}{'file_in_charset'}) and␊
971	length($self->{TT}{'file_in_charset'}) ) {␊
972	$self->{TT}{'file_in_charset'}=$charset;␊
973	if (defined $charset) {␊
974	$self->{TT}{'file_in_encoder'}=find_encoding($charset);␊
975	} else {␊
976	$self->{TT}{ascii_input}=1;␊
977	$self->{TT}{utf_mode}=0;␊
978	}␊
979	}␊
980	␊
981	if (defined $self->{TT}{'file_in_charset'} and␊
982	length $self->{TT}{'file_in_charset'} and␊
983	$self->{TT}{'file_in_charset'} !~ m/ascii/i) {␊
984	$self->{TT}{ascii_input}=0;␊
985	}␊
986	}␊
987	␊
988	=item get_out_charset()␊
989	␊
990	This function will return the charset that should be used in the output␊
991	document (usually useful to substitute the input document's detected charset␊
992	where it has been found).␊
993	␊
994	It will use the output charset specified in the command line. If it wasn't␊
995	specified, it will use the input PO's charset, and if the input PO has the␊
996	default "CHARSET", it will return the input document's charset, so that no␊
997	encoding is performed.␊
998	␊
999	=cut␊
1000	␊
1001	sub get_out_charset {␊
1002	my $self=shift;␊
1003	my $charset;␊
1004	␊
1005	# Use the value specified at the command line␊
1006	if (defined($self->{TT}{'file_out_charset'}) and␊
1007	length($self->{TT}{'file_out_charset'})) {␊
1008	$charset=$self->{TT}{'file_out_charset'};␊
1009	} else {␊
1010	if ($self->{TT}{utf_mode} && $self->{TT}{ascii_input}) {␊
1011	$charset="UTF-8";␊
1012	} else {␊
1013	$charset=$self->{TT}{po_in}->get_charset;␊
1014	$charset=$self->{TT}{'file_in_charset'}␊
1015	if $charset eq "CHARSET" and␊
1016	defined($self->{TT}{'file_in_charset'}) and␊
1017	length($self->{TT}{'file_in_charset'});␊
1018	$charset="ascii"␊
1019	if $charset eq "CHARSET";␊
1020	}␊
1021	}␊
1022	return $charset;␊
1023	}␊
1024	␊
1025	=item recode_skipped_text($)␊
1026	␊
1027	This function returns the recoded text passed as argument, from the input␊
1028	document's charset to the output document's one. This isn't needed when␊
1029	translating a string (translate() recodes everything itself), but it is when␊
1030	you skip a string from the input document and you want the output document to␊
1031	be consistent with the global encoding.␊
1032	␊
1033	=cut␊
1034	␊
1035	sub recode_skipped_text {␊
1036	my ($self,$text)=(shift,shift);␊
1037	unless ($self->{TT}{'ascii_input'}) {␊
1038	if(defined($self->{TT}{'file_in_charset'}) and␊
1039	length($self->{TT}{'file_in_charset'}) ) {␊
1040	$text = encode_from_to($text,␊
1041	$self->{TT}{'file_in_encoder'},␊
1042	find_encoding($self->get_out_charset));␊
1043	} else {␊
1044	die wrap_mod("po4a", dgettext("po4a", "Couldn't determine the input document's charset. Please specify it on the command line. (non-ASCII char at %s)"), $self->{TT}{non_ascii_ref})␊
1045	}␊
1046	}␊
1047	return $text;␊
1048	}␊
1049	␊
1050	␊
1051	# encode_from_to($,$,$)␊
1052	#␊
1053	# Encode the given text from one encoding to another one.␊
1054	# It differs from Encode::from_to because it does not take the name of the␊
1055	# encoding in argument, but the encoders (as returned by the␊
1056	# Encode::find_encoding(<name>) method). Thus it permits to save a bunch␊
1057	# of call to find_encoding.␊
1058	#␊
1059	# If the "from" encoding is undefined, it is considered as UTF-8 (or␊
1060	# ascii).␊
1061	# If the "to" encoding is undefined, it is considered as UTF-8.␊
1062	#␊
1063	sub encode_from_to {␊
1064	my ($text,$from,$to) = (shift,shift,shift);␊
1065	␊
1066	if (not defined $from) {␊
1067	# for ascii and UTF-8, no conversion needed to get an utf-8␊
1068	# string.␊
1069	} else {␊
1070	$text = $from->decode($text, 0);␊
1071	}␊
1072	␊
1073	if (not defined $to) {␊
1074	# Already in UTF-8, no conversion needed␊
1075	} else {␊
1076	$text = $to->encode($text, 0);␊
1077	}␊
1078	␊
1079	return $text;␊
1080	}␊
1081	␊
1082	=back␊
1083	␊
1084	=head1 FUTURE DIRECTIONS␊
1085	␊
1086	One shortcoming of the current TransTractor is that it can't handle␊
1087	translated document containing all languages, like debconf templates, or␊
1088	.desktop files.␊
1089	␊
1090	To address this problem, the only interface changes needed are:␊
1091	␊
1092	=over 2␊
1093	␊
1094	=item -␊
1095	␊
1096	take a hash as po_in_name (a list per language)␊
1097	␊
1098	=item -␊
1099	␊
1100	add an argument to translate to indicate the target language␊
1101	␊
1102	=item -␊
1103	␊
1104	make a pushline_all function, which would make pushline of its content for␊
1105	all language, using a map-like syntax:␊
1106	␊
1107	$self->pushline_all({ "Description[".$langcode."]=".␊
1108	$self->translate($line,$ref,$langcode)␊
1109	});␊
1110	␊
1111	=back␊
1112	␊
1113	Will see if it's enough ;)␊
1114	␊
1115	=head1 AUTHORS␊
1116	␊
1117	Denis Barbier <barbier@linuxfr.org>␊
1118	Martin Quinson (mquinson#debian.org)␊
1119	Jordi Vilalta <jvprat@gmail.com>␊
1120	␊
1121	=cut␊
1122	␊
1123	1;␊
1124

Download this file