branches/ErmaC/Trunk/package/bin/po4a/lib/Locale/Po4a/TransTractor.pm - Chameleon Svn Source Tree - Chameleon open source boot loader project.

Root/branches/ErmaC/Trunk/package/bin/po4a/lib/Locale/Po4a/TransTractor.pm

1	#!/usr/bin/perl -w␊
2	␊
3	require Exporter;␊
4	␊
5	package Locale::Po4a::TransTractor;␊
6	use DynaLoader;␊
7	␊
8	use 5.006;␊
9	use strict;␊
10	use warnings;␊
11	␊
12	use subs qw(makespace);␊
13	use vars qw($VERSION @ISA @EXPORT);␊
14	$VERSION="0.41.1";␊
15	@ISA = qw(DynaLoader);␊
16	@EXPORT = qw(new process translate␊
17	read write readpo writepo␊
18	getpoout setpoout get_out_charset);␊
19	␊
20	# Try to use a C extension if present.␊
21	eval("bootstrap Locale::Po4a::TransTractor $VERSION");␊
22	␊
23	use Carp qw(croak);␊
24	use Locale::Po4a::Po;␊
25	use Locale::Po4a::Common;␊
26	␊
27	use File::Path; # mkdir before write␊
28	␊
29	use Encode;␊
30	use Encode::Guess;␊
31	␊
32	=encoding UTF-8␊
33	␊
34	=head1 NAME␊
35	␊
36	Locale::Po4a::TransTractor - generic trans(lator ex)tractor.␊
37	␊
38	=head1 DESCRIPTION␊
39	␊
40	The po4a (PO for anything) project goal is to ease translations (and more␊
41	interestingly, the maintenance of translations) using gettext tools on␊
42	areas where they were not expected like documentation.␊
43	␊
44	This class is the ancestor of every po4a parser used to parse a document, to␊
45	search translatable strings, to extract them to a PO file and to replace them by␊
46	their translation in the output document.␊
47	␊
48	More formally, it takes the following arguments as input:␊
49	␊
50	=over 2␊
51	␊
52	=item -␊
53	␊
54	a document to translate;␊
55	␊
56	=item -␊
57	␊
58	a PO file containing the translations to use.␊
59	␊
60	=back␊
61	␊
62	As output, it produces:␊
63	␊
64	=over 2␊
65	␊
66	=item -␊
67	␊
68	another PO file, resulting of the extraction of translatable strings from␊
69	the input document;␊
70	␊
71	=item -␊
72	␊
73	a translated document, with the same structure than the one in input, but␊
74	with all translatable strings replaced with the translations found in the␊
75	PO file provided in input.␊
76	␊
77	=back␊
78	␊
79	Here is a graphical representation of this:␊
80	␊
81	Input document --\ /---> Output document␊
82	\ / (translated)␊
83	+-> parse() function -----+␊
84	/ \␊
85	Input PO --------/ \---> Output PO␊
86	(extracted)␊
87	␊
88	=head1 FUNCTIONS YOUR PARSER SHOULD OVERRIDE␊
89	␊
90	=over 4␊
91	␊
92	=item parse()␊
93	␊
94	This is where all the work takes place: the parsing of input documents, the␊
95	generation of output, and the extraction of the translatable strings. This␊
96	is pretty simple using the provided functions presented in the section␊
97	B<INTERNAL FUNCTIONS> below. See also the B<SYNOPSIS>, which presents an␊
98	example.␊
99	␊
100	This function is called by the process() function below, but if you choose␊
101	to use the new() function, and to add content manually to your document,␊
102	you will have to call this function yourself.␊
103	␊
104	=item docheader()␊
105	␊
106	This function returns the header we should add to the produced document,␊
107	quoted properly to be a comment in the target language. See the section␊
108	B<Educating developers about translations>, from L<po4a(7)\|po4a.7>, for what␊
109	it is good for.␊
110	␊
111	=back␊
112	␊
113	=cut␊
114	␊
115	sub docheader {}␊
116	␊
117	sub parse {}␊
118	␊
119	=head1 SYNOPSIS␊
120	␊
121	The following example parses a list of paragraphs beginning with "<p>". For the sake␊
122	of simplicity, we assume that the document is well formatted, i.e. that '<p>'␊
123	tags are the only tags present, and that this tag is at the very beginning␊
124	of each paragraph.␊
125	␊
126	sub parse {␊
127	my $self = shift;␊
128	␊
129	PARAGRAPH: while (1) {␊
130	my ($paragraph,$pararef)=("","");␊
131	my $first=1;␊
132	my ($line,$lref)=$self->shiftline();␊
133	while (defined($line)) {␊
134	␉ if ($line =~ m/<p>/ && !$first--; ) {␊
135	␉ # Not the first time we see <p>.␊
136	␉ # Reput the current line in input,␊
137	␉ # and put the built paragraph to output␊
138	␉ $self->unshiftline($line,$lref);␊
139	␊
140	␉ # Now that the document is formed, translate it:␊
141	␉ # - Remove the leading tag␊
142	␉ $paragraph =~ s/^<p>//s;␊
143	␊
144	␉ # - push to output the leading tag (untranslated) and the␊
145	␉ # rest of the paragraph (translated)␊
146	␉ $self->pushline( "<p>"␊
147	. $document->translate($paragraph,$pararef)␊
148	);␊
149	␊
150	␉ next PARAGRAPH;␊
151	␉ } else {␊
152	␉ # Append to the paragraph␊
153	␉ $paragraph .= $line;␊
154	␉ $pararef = $lref unless(length($pararef));␊
155	␉ }␊
156	␊
157	# Reinit the loop␊
158	($line,$lref)=$self->shiftline();␊
159	}␊
160	# Did not get a defined line? End of input file.␊
161	return;␊
162	}␊
163	}␊
164	␊
165	Once you've implemented the parse function, you can use your document␊
166	class, using the public interface presented in the next section.␊
167	␊
168	=head1 PUBLIC INTERFACE for scripts using your parser␊
169	␊
170	=head2 Constructor␊
171	␊
172	=over 4␊
173	␊
174	=item process(%)␊
175	␊
176	This function can do all you need to do with a po4a document in one␊
177	invocation. Its arguments must be packed as a hash. ACTIONS:␊
178	␊
179	=over 3␊
180	␊
181	=item a.␊
182	␊
183	Reads all the PO files specified in po_in_name␊
184	␊
185	=item b.␊
186	␊
187	Reads all original documents specified in file_in_name␊
188	␊
189	=item c.␊
190	␊
191	Parses the document␊
192	␊
193	=item d.␊
194	␊
195	Reads and applies all the addenda specified␊
196	␊
197	=item e.␊
198	␊
199	Writes the translated document to file_out_name (if given)␊
200	␊
201	=item f.␊
202	␊
203	Writes the extracted PO file to po_out_name (if given)␊
204	␊
205	=back␊
206	␊
207	ARGUMENTS, beside the ones accepted by new() (with expected type):␊
208	␊
209	=over 4␊
210	␊
211	=item file_in_name (@)␊
212	␊
213	List of filenames where we should read the input document.␊
214	␊
215	=item file_in_charset ($)␊
216	␊
217	Charset used in the input document (if it isn't specified, it will try␊
218	to detect it from the input document).␊
219	␊
220	=item file_out_name ($)␊
221	␊
222	Filename where we should write the output document.␊
223	␊
224	=item file_out_charset ($)␊
225	␊
226	Charset used in the output document (if it isn't specified, it will use␊
227	the PO file charset).␊
228	␊
229	=item po_in_name (@)␊
230	␊
231	List of filenames where we should read the input PO files from, containing␊
232	the translation which will be used to translate the document.␊
233	␊
234	=item po_out_name ($)␊
235	␊
236	Filename where we should write the output PO file, containing the strings␊
237	extracted from the input document.␊
238	␊
239	=item addendum (@)␊
240	␊
241	List of filenames where we should read the addenda from.␊
242	␊
243	=item addendum_charset ($)␊
244	␊
245	Charset for the addenda.␊
246	␊
247	=back␊
248	␊
249	=item new(%)␊
250	␊
251	Create a new po4a document. Accepted options (but be in a hash):␊
252	␊
253	=over 4␊
254	␊
255	=item verbose ($)␊
256	␊
257	Sets the verbosity.␊
258	␊
259	=item debug ($)␊
260	␊
261	Sets the debugging.␊
262	␊
263	=back␊
264	␊
265	=cut␊
266	␊
267	sub process {␊
268	## Determine if we were called via an object-ref or a classname␊
269	my $self = shift;␊
270	␊
271	## Any remaining arguments are treated as initial values for the␊
272	## hash that is used to represent this object.␊
273	my %params = @_;␊
274	␊
275	# Build the args for new()␊
276	my %newparams = ();␊
277	foreach (keys %params) {␊
278	␉next if ($_ eq 'po_in_name' \|\|␊
279	␉␉ $_ eq 'po_out_name' \|\|␊
280	␉␉ $_ eq 'file_in_name' \|\|␊
281	␉␉ $_ eq 'file_in_charset' \|\|␊
282	␉␉ $_ eq 'file_out_name' \|\|␊
283	␉␉ $_ eq 'file_out_charset' \|\|␊
284	␉␉ $_ eq 'addendum' \|\|␊
285	␉␉ $_ eq 'addendum_charset');␊
286	␉$newparams{$_}=$params{$_};␊
287	}␊
288	␊
289	$self->detected_charset($params{'file_in_charset'});␊
290	$self->{TT}{'file_out_charset'}=$params{'file_out_charset'};␊
291	if (defined($self->{TT}{'file_out_charset'}) and␊
292	␉length($self->{TT}{'file_out_charset'})) {␊
293	␉$self->{TT}{'file_out_encoder'} = find_encoding($self->{TT}{'file_out_charset'});␊
294	}␊
295	$self->{TT}{'addendum_charset'}=$params{'addendum_charset'};␊
296	␊
297	chdir $params{'srcdir'}␊
298	␉if (defined $params{'srcdir'});␊
299	foreach my $file (@{$params{'po_in_name'}}) {␊
300	␉print STDERR "readpo($file)... " if $self->debug();␊
301	␉$self->readpo($file);␊
302	␉print STDERR "done.\n" if $self->debug()␊
303	}␊
304	foreach my $file (@{$params{'file_in_name'}}) {␊
305	␉print STDERR "read($file)..." if $self->debug();␊
306	␉$self->read($file);␊
307	␉print STDERR "done.\n" if $self->debug();␊
308	}␊
309	print STDERR "parse..." if $self->debug();␊
310	$self->parse();␊
311	print STDERR "done.\n" if $self->debug();␊
312	foreach my $file (@{$params{'addendum'}}) {␊
313	␉print STDERR "addendum($file)..." if $self->debug();␊
314	␉$self->addendum($file) \|\| die "An addendum failed\n";␊
315	␉print STDERR "done.\n" if $self->debug();␊
316	}␊
317	chdir $params{'destdir'}␊
318	␉if (defined $params{'destdir'});␊
319	if (defined $params{'file_out_name'}) {␊
320	␉print STDERR "write(".$params{'file_out_name'}.")... "␊
321	␉ if $self->debug();␊
322	␉$self->write($params{'file_out_name'});␊
323	␉print STDERR "done.\n" if $self->debug();␊
324	}␊
325	chdir $params{'srcdir'}␊
326	␉if (defined $params{'srcdir'});␊
327	if (defined $params{'po_out_name'}) {␊
328	␉print STDERR "writepo(".$params{'po_out_name'}.")... "␊
329	␉ if $self->debug();␊
330	␉$self->writepo($params{'po_out_name'});␊
331	␉print STDERR "done.\n" if $self->debug();␊
332	}␊
333	chdir $params{'calldir'}␊
334	␉if (defined $params{'calldir'});␊
335	return $self;␊
336	}␊
337	␊
338	sub new {␊
339	## Determine if we were called via an object-ref or a classname␊
340	my $this = shift;␊
341	my $class = ref($this) \|\| $this;␊
342	my $self = { };␊
343	my %options=@_;␊
344	## Bless ourselves into the desired class and perform any initialization␊
345	bless $self, $class;␊
346	␊
347	## initialize the plugin␊
348	# prevent the plugin from croaking on the options intended for Po.pm␊
349	$self->{options}{'porefs'} = '';␊
350	$self->{options}{'copyright-holder'} = '';␊
351	$self->{options}{'msgid-bugs-address'} = '';␊
352	$self->{options}{'package-name'} = '';␊
353	$self->{options}{'package-version'} = '';␊
354	# let the plugin parse the options and such␊
355	$self->initialize(%options);␊
356	␊
357	## Create our private data␊
358	my %po_options;␊
359	$po_options{'porefs'} = $self->{options}{'porefs'};␊
360	$po_options{'copyright-holder'} = $options{'copyright-holder'};␊
361	$po_options{'msgid-bugs-address'} = $options{'msgid-bugs-address'};␊
362	$po_options{'package-name'} = $options{'package-name'};␊
363	$po_options{'package-version'} = $options{'package-version'};␊
364	␊
365	# private data␊
366	$self->{TT}=();␊
367	$self->{TT}{po_in}=Locale::Po4a::Po->new(\%po_options);␊
368	$self->{TT}{po_out}=Locale::Po4a::Po->new(\%po_options);␊
369	# Warning, this is an array of array:␊
370	# The document is splited on lines, and for each␊
371	# [0] is the line content, [1] is the reference [2] the type␊
372	$self->{TT}{doc_in}=();␊
373	$self->{TT}{doc_out}=();␊
374	if (defined $options{'verbose'}) {␊
375	␉$self->{TT}{verbose} = $options{'verbose'};␊
376	}␊
377	if (defined $options{'debug'}) {␊
378	␉$self->{TT}{debug} = $options{'debug'};␊
379	}␊
380	# Input document is in ascii until we prove the opposite (in read())␊
381	$self->{TT}{ascii_input}=1;␊
382	# We try not to use utf unless it's forced from the outside (in case the␊
383	# document isn't in ascii)␊
384	$self->{TT}{utf_mode}=0;␊
385	␊
386	␊
387	return $self;␊
388	}␊
389	␊
390	=back␊
391	␊
392	=head2 Manipulating document files␊
393	␊
394	=over 4␊
395	␊
396	=item read($)␊
397	␊
398	Add another input document at the end of the existing one. The argument is␊
399	the filename to read.␊
400	␊
401	Please note that it does not parse anything. You should use the parse()␊
402	function when you're done with packing input files into the document.␊
403	␊
404	=cut␊
405	␊
406	#'␊
407	sub read() {␊
408	my $self=shift;␊
409	my $filename=shift␊
410	␉or croak wrap_msg(dgettext("po4a", "Can't read from file without having a filename"));␊
411	my $linenum=0;␊
412	␊
413	open INPUT,"<$filename"␊
414	␉or croak wrap_msg(dgettext("po4a", "Can't read from %s: %s"), $filename, $!);␊
415	while (defined (my $textline = <INPUT>)) {␊
416	␉$linenum++;␊
417	␉my $ref="$filename:$linenum";␊
418	␉$textline =~ s/\r$//;␊
419	␉my @entry=($textline,$ref);␊
420	␉push @{$self->{TT}{doc_in}}, @entry;␊
421	␊
422	␉if (!defined($self->{TT}{'file_in_charset'})) {␊
423	␉ # Detect if this file has non-ascii characters␊
424	␉ if($self->{TT}{ascii_input}) {␊
425	␉␉my $decoder = guess_encoding($textline);␊
426	␉␉if (!ref($decoder) or $decoder !~ /Encode::XS=/) {␊
427	␉␉ # We have detected a non-ascii line␊
428	␉␉ $self->{TT}{ascii_input} = 0;␊
429	␉␉ # Save the reference for future error message␊
430	␉␉ $self->{TT}{non_ascii_ref} \|\|= $ref;␊
431	␉␉}␊
432	␉ }␊
433	␉}␊
434	}␊
435	close INPUT␊
436	␉or croak wrap_msg(dgettext("po4a", "Can't close %s after reading: %s"), $filename, $!);␊
437	␊
438	}␊
439	␊
440	=item write($)␊
441	␊
442	Write the translated document to the given filename.␊
443	␊
444	=cut␊
445	␊
446	sub write {␊
447	my $self=shift;␊
448	my $filename=shift␊
449	␉or croak wrap_msg(dgettext("po4a", "Can't write to a file without filename"));␊
450	␊
451	my $fh;␊
452	if ($filename eq '-') {␊
453	␉$fh=\*STDOUT;␊
454	} else {␊
455	␉# make sure the directory in which we should write the localized file exists␊
456	␉my $dir = $filename;␊
457	␉if ($dir =~ m\|/\|) {␊
458	␉ $dir =~ s\|/[^/]*$\|\|;␊
459	␊
460	␉ File::Path::mkpath($dir, 0, 0755) # Croaks on error␊
461	␉ if (length ($dir) && ! -e $dir);␊
462	␉}␊
463	␉open $fh,">$filename"␊
464	␉ or croak wrap_msg(dgettext("po4a", "Can't write to %s: %s"), $filename, $!);␊
465	}␊
466	␊
467	map { print $fh $_ } $self->docheader();␊
468	map { print $fh $_ } @{$self->{TT}{doc_out}};␊
469	␊
470	if ($filename ne '-') {␊
471	␉close $fh or croak wrap_msg(dgettext("po4a", "Can't close %s after writing: %s"), $filename, $!);␊
472	}␊
473	␊
474	}␊
475	␊
476	=back␊
477	␊
478	=head2 Manipulating PO files␊
479	␊
480	=over 4␊
481	␊
482	=item readpo($)␊
483	␊
484	Add the content of a file (which name is passed as argument) to the␊
485	existing input PO. The old content is not discarded.␊
486	␊
487	=item writepo($)␊
488	␊
489	Write the extracted PO file to the given filename.␊
490	␊
491	=item stats()␊
492	␊
493	Returns some statistics about the translation done so far. Please note that␊
494	it's not the same statistics than the one printed by msgfmt␊
495	--statistic. Here, it's stats about recent usage of the PO file, while␊
496	msgfmt reports the status of the file. It is a wrapper to the␊
497	Locale::Po4a::Po::stats_get function applied to the input PO file. Example␊
498	of use:␊
499	␊
500	[normal use of the po4a document...]␊
501	␊
502	($percent,$hit,$queries) = $document->stats();␊
503	print "We found translations for $percent\% ($hit from $queries) of strings.\n";␊
504	␊
505	=back␊
506	␊
507	=cut␊
508	␊
509	sub getpoout {␊
510	return $_[0]->{TT}{po_out};␊
511	}␊
512	sub setpoout {␊
513	$_[0]->{TT}{po_out} = $_[1];␊
514	}␊
515	sub readpo {␊
516	$_[0]->{TT}{po_in}->read($_[1]);␊
517	}␊
518	sub writepo {␊
519	$_[0]->{TT}{po_out}->write( $_[1] );␊
520	}␊
521	sub stats {␊
522	return $_[0]->{TT}{po_in}->stats_get();␊
523	}␊
524	␊
525	=head2 Manipulating addenda␊
526	␊
527	=over 4␊
528	␊
529	=item addendum($)␊
530	␊
531	Please refer to L<po4a(7)\|po4a.7> for more information on what addenda are,␊
532	and how translators should write them. To apply an addendum to the translated␊
533	document, simply pass its filename to this function and you are done ;)␊
534	␊
535	This function returns a non-null integer on error.␊
536	␊
537	=cut␊
538	␊
539	# Internal function to read the header.␊
540	sub addendum_parse {␊
541	my ($filename,$header)=shift;␊
542	␊
543	my ($errcode,$mode,$position,$boundary,$bmode,$content)=␊
544	␉(1,"","","","","");␊
545	␊
546	unless (open (INS, "<$filename")) {␊
547	␉warn wrap_msg(dgettext("po4a", "Can't read from %s: %s"), $filename, $!);␊
548	␉goto END_PARSE_ADDFILE;␊
549	}␊
550	␊
551	unless (defined ($header=<INS>) && $header) {␊
552	␉warn wrap_msg(dgettext("po4a", "Can't read po4a header from %s."), $filename);␊
553	␉goto END_PARSE_ADDFILE;␊
554	}␊
555	␊
556	unless ($header =~ s/PO4A-HEADER://i) {␊
557	␉warn wrap_msg(dgettext("po4a", "First line of %s does not look like a po4a header."), $filename);␊
558	␉goto END_PARSE_ADDFILE;␊
559	}␊
560	foreach my $part (split(/;/,$header)) {␊
561	␉unless ($part =~ m/^\s([^=])=(.*)$/) {␊
562	␉ warn wrap_msg(dgettext("po4a", "Syntax error in po4a header of %s, near \"%s\""), $filename, $part);␊
563	␉ goto END_PARSE_ADDFILE;␊
564	␉}␊
565	␉my ($key,$value)=($1,$2);␊
566	␉$key=lc($key);␊
567	␉ if ($key eq 'mode') { $mode=lc($value);␊
568	␉} elsif ($key eq 'position') { $position=$value;␊
569	␉} elsif ($key eq 'endboundary') {␊
570	␉ $boundary=$value;␊
571	␉ $bmode='after';␊
572	␉} elsif ($key eq 'beginboundary') {␊
573	␉ $boundary=$value;␊
574	␉ $bmode='before';␊
575	␉} else {␊
576	␉ warn wrap_msg(dgettext("po4a", "Invalid argument in the po4a header of %s: %s"), $filename, $key);␊
577	␉ goto END_PARSE_ADDFILE;␊
578	␉}␊
579	}␊
580	␊
581	unless (length($mode)) {␊
582	␉warn wrap_msg(dgettext("po4a", "The po4a header of %s does not define the mode."), $filename);␊
583	␉goto END_PARSE_ADDFILE;␊
584	}␊
585	unless ($mode eq "before" \|\| $mode eq "after") {␊
586	␉warn wrap_msg(dgettext("po4a", "Mode invalid in the po4a header of %s: should be 'before' or 'after' not %s."), $filename, $mode);␊
587	␉goto END_PARSE_ADDFILE;␊
588	}␊
589	␊
590	unless (length($position)) {␊
591	␉warn wrap_msg(dgettext("po4a", "The po4a header of %s does not define the position."), $filename);␊
592	␉goto END_PARSE_ADDFILE;␊
593	}␊
594	unless ($mode eq "before" \|\| length($boundary)) {␊
595	␉warn wrap_msg(dgettext("po4a", "No ending boundary given in the po4a header, but mode=after."));␊
596	␉goto END_PARSE_ADDFILE;␊
597	}␊
598	␊
599	while (defined(my $line = <INS>)) {␊
600	␉$content .= $line;␊
601	}␊
602	close INS;␊
603	␊
604	$errcode=0;␊
605	END_PARSE_ADDFILE:␊
606	return ($errcode,$mode,$position,$boundary,$bmode,$content);␊
607	}␊
608	␊
609	sub mychomp {␊
610	my ($str) = shift;␊
611	chomp($str);␊
612	return $str;␊
613	}␊
614	␊
615	sub addendum {␊
616	my ($self,$filename) = @_;␊
617	␊
618	print STDERR "Apply addendum $filename..." if $self->debug();␊
619	unless ($filename) {␊
620	␉warn wrap_msg(dgettext("po4a",␊
621	␉ "Can't apply addendum when not given the filename"));␊
622	␉return 0;␊
623	}␊
624	die wrap_msg(dgettext("po4a", "Addendum %s does not exist."), $filename)␊
625	unless -e $filename;␊
626	␊
627	my ($errcode,$mode,$position,$boundary,$bmode,$content)=␊
628	␉addendum_parse($filename);␊
629	return 0 if ($errcode);␊
630	␊
631	print STDERR "mode=$mode;pos=$position;bound=$boundary;bmode=$bmode;ctn=$content\n"␊
632	if $self->debug();␊
633	␊
634	# We only recode the addendum if an origin charset is specified, else we␊
635	# suppose it's already in the output document's charset␊
636	if (defined($self->{TT}{'addendum_charset'}) &&␊
637	length($self->{TT}{'addendum_charset'})) {␊
638	␉Encode::from_to($content,$self->{TT}{'addendum_charset'},␊
639	␉ $self->get_out_charset);␊
640	}␊
641	␊
642	my $found = scalar grep { /$position/ } @{$self->{TT}{doc_out}};␊
643	if ($found == 0) {␊
644	␉warn wrap_msg(dgettext("po4a",␊
645	␉ "No candidate position for the addendum %s."), $filename);␊
646	␉return 0;␊
647	}␊
648	if ($found > 1) {␊
649	␉warn wrap_msg(dgettext("po4a",␊
650	␉ "More than one candidate position found for the addendum %s."), $filename);␊
651	␉return 0;␊
652	}␊
653	␊
654	if ($mode eq "before") {␊
655	␉if ($self->verbose() > 1 \|\| $self->debug() ) {␊
656	␉ map { print STDERR wrap_msg(dgettext("po4a", "Addendum '%s' applied before this line: %s"), $filename, $_) if (/$position/);␊
657	␉ } @{$self->{TT}{doc_out}};␊
658	␉}␊
659	␉@{$self->{TT}{doc_out}} = map { /$position/ ? ($content,$_) : $_␊
660	} @{$self->{TT}{doc_out}};␊
661	} else {␊
662	␉my @newres=();␊
663	␊
664	␉do {␊
665	␉ # make sure it doesn't whine on empty document␊
666	␉ my $line = scalar @{$self->{TT}{doc_out}} ? shift @{$self->{TT}{doc_out}} : "";␊
667	␉ push @newres,$line;␊
668	␉ my $outline=mychomp($line);␊
669	␉ $outline =~ s/^[ \t]*//;␊
670	␊
671	␉ if ($line =~ m/$position/) {␊
672	␉␉while ($line=shift @{$self->{TT}{doc_out}}) {␊
673	␉␉ last if ($line=~/$boundary/);␊
674	␉␉ push @newres,$line;␊
675	␉␉}␊
676	␉␉if (defined $line) {␊
677	␉␉ if ($bmode eq 'before') {␊
678	␉␉␉print wrap_msg(dgettext("po4a",␊
679	␉␉␉ "Addendum '%s' applied before this line: %s"),␊
680	␉␉␉ $filename, $outline)␊
681	␉␉␉ if ($self->verbose() > 1 \|\| $self->debug());␊
682	␉␉␉push @newres,$content;␊
683	␉␉␉push @newres,$line;␊
684	␉␉ } else {␊
685	␉␉␉print wrap_msg(dgettext("po4a",␊
686	␉␉␉ "Addendum '%s' applied after the line: %s."),␊
687	␉␉␉ $filename, $outline)␊
688	␉␉␉ if ($self->verbose() > 1 \|\| $self->debug());␊
689	␉␉␉push @newres,$line;␊
690	␉␉␉push @newres,$content;␊
691	␉␉ }␊
692	␉␉} else {␊
693	␉␉ print wrap_msg(dgettext("po4a", "Addendum '%s' applied at the end of the file."), $filename)␊
694	␉␉ if ($self->verbose() > 1 \|\| $self->debug());␊
695	␉␉ push @newres,$content;␊
696	␉␉}␊
697	␉ }␊
698	␉} while (scalar @{$self->{TT}{doc_out}});␊
699	␉@{$self->{TT}{doc_out}} = @newres;␊
700	}␊
701	print STDERR "done.\n" if $self->debug();␊
702	return 1;␊
703	}␊
704	␊
705	=back␊
706	␊
707	=head1 INTERNAL FUNCTIONS used to write derivated parsers␊
708	␊
709	=head2 Getting input, providing output␊
710	␊
711	Four functions are provided to get input and return output. They are very␊
712	similar to shift/unshift and push/pop. The first pair is about input, while␊
713	the second is about output. Mnemonic: in input, you are interested in the␊
714	first line, what shift gives, and in output you want to add your result at␊
715	the end, like push does.␊
716	␊
717	=over 4␊
718	␊
719	=item shiftline()␊
720	␊
721	This function returns the next line of the doc_in to be parsed and its␊
722	reference (packed as an array).␊
723	␊
724	=item unshiftline($$)␊
725	␊
726	Unshifts a line of the input document and its reference.␊
727	␊
728	=item pushline($)␊
729	␊
730	Push a new line to the doc_out.␊
731	␊
732	=item popline()␊
733	␊
734	Pop the last pushed line from the doc_out.␊
735	␊
736	=back␊
737	␊
738	=cut␊
739	␊
740	sub shiftline {␊
741	my ($line,$ref)=(shift @{$_[0]->{TT}{doc_in}},␊
742	␉␉ shift @{$_[0]->{TT}{doc_in}});␊
743	return ($line,$ref);␊
744	}␊
745	sub unshiftline {␊
746	␉my $self = shift;␊
747	␉unshift @{$self->{TT}{doc_in}},@_;␊
748	}␊
749	␊
750	sub pushline { push @{$_[0]->{TT}{doc_out}}, $_[1] if defined $_[1]; }␊
751	sub popline { return pop @{$_[0]->{TT}{doc_out}}; }␊
752	␊
753	=head2 Marking strings as translatable␊
754	␊
755	One function is provided to handle the text which should be translated.␊
756	␊
757	=over 4␊
758	␊
759	=item translate($$$)␊
760	␊
761	Mandatory arguments:␊
762	␊
763	=over 2␊
764	␊
765	=item -␊
766	␊
767	A string to translate␊
768	␊
769	=item -␊
770	␊
771	The reference of this string (i.e. position in inputfile)␊
772	␊
773	=item -␊
774	␊
775	The type of this string (i.e. the textual description of its structural role;␊
776	used in Locale::Po4a::Po::gettextization(); see also L<po4a(7)\|po4a.7>,␊
777	section B<Gettextization: how does it work?>)␊
778	␊
779	=back␊
780	␊
781	This function can also take some extra arguments. They must be organized as␊
782	a hash. For example:␊
783	␊
784	$self->translate("string","ref","type",␊
785	␉␉ 'wrap' => 1);␊
786	␊
787	=over␊
788	␊
789	=item B<wrap>␊
790	␊
791	boolean indicating whether we can consider that whitespaces in string are␊
792	not important. If yes, the function canonizes the string before looking for␊
793	a translation or extracting it, and wraps the translation.␊
794	␊
795	=item B<wrapcol>␊
796	␊
797	the column at which we should wrap (default: 76).␊
798	␊
799	=item B<comment>␊
800	␊
801	an extra comment to add to the entry.␊
802	␊
803	=back␊
804	␊
805	Actions:␊
806	␊
807	=over 2␊
808	␊
809	=item -␊
810	␊
811	Pushes the string, reference and type to po_out.␊
812	␊
813	=item -␊
814	␊
815	Returns the translation of the string (as found in po_in) so that the␊
816	parser can build the doc_out.␊
817	␊
818	=item -␊
819	␊
820	Handles the charsets to recode the strings before sending them to␊
821	po_out and before returning the translations.␊
822	␊
823	=back␊
824	␊
825	=back␊
826	␊
827	=cut␊
828	␊
829	sub translate {␊
830	my $self=shift;␊
831	my ($string,$ref,$type)=(shift,shift,shift);␊
832	my (%options)=@_;␊
833	␊
834	# my $validoption="wrap wrapcol";␊
835	# my %validoption;␊
836	␊
837	return "" unless defined($string) && length($string);␊
838	␊
839	# map { $validoption{$_}=1 } (split(/ /,$validoption));␊
840	# foreach (keys %options) {␊
841	#␉Carp::confess "internal error: translate() called with unknown arg $_. Valid options: $validoption"␊
842	#␉ unless $validoption{$_};␊
843	# }␊
844	␊
845	my $in_charset;␊
846	if ($self->{TT}{ascii_input}) {␊
847	␉$in_charset = "ascii";␊
848	} else {␊
849	␉if (defined($self->{TT}{'file_in_charset'}) and␊
850	␉ length($self->{TT}{'file_in_charset'}) and␊
851	␉ $self->{TT}{'file_in_charset'} !~ m/ascii/i) {␊
852	␉ $in_charset=$self->{TT}{'file_in_charset'};␊
853	␉} else {␊
854	␉ # FYI, the document charset have to be determined before we see the first␊
855	␉ # string to recode.␊
856	␉ die wrap_mod("po4a", dgettext("po4a", "Couldn't determine the input document's charset. Please specify it on the command line. (non-ASCII char at %s)"), $self->{TT}{non_ascii_ref})␊
857	␉}␊
858	}␊
859	␊
860	if ($self->{TT}{po_in}->get_charset ne "CHARSET") {␊
861	␉$string = encode_from_to($string,␊
862	␉ $self->{TT}{'file_in_encoder'},␊
863	␉ $self->{TT}{po_in}{encoder});␊
864	}␊
865	␊
866	if (defined $options{'wrapcol'} && $options{'wrapcol'} < 0) {␊
867	# FIXME: should be the parameter given with --width␊
868	$options{'wrapcol'} = 76 + $options{'wrapcol'};␊
869	}␊
870	my $transstring = $self->{TT}{po_in}->gettext($string,␊
871	␉␉␉␉␉'wrap' => $options{'wrap'}\|\|0,␊
872	␉␉␉␉␉'wrapcol' => $options{'wrapcol'});␊
873	␊
874	if ($self->{TT}{po_in}->get_charset ne "CHARSET") {␊
875	␉my $out_encoder = $self->{TT}{'file_out_encoder'};␊
876	␉unless (defined $out_encoder) {␊
877	␉ $out_encoder = find_encoding($self->get_out_charset)␊
878	␉}␊
879	␉$transstring = encode_from_to($transstring,␊
880	␉ $self->{TT}{po_in}{encoder},␊
881	␉ $out_encoder);␊
882	}␊
883	␊
884	# If the input document isn't completely in ascii, we should see what to␊
885	# do with the current string␊
886	unless ($self->{TT}{ascii_input}) {␊
887	my $out_charset = $self->{TT}{po_out}->get_charset;␊
888	␉# We set the output po charset␊
889	if ($out_charset eq "CHARSET") {␊
890	␉ if ($self->{TT}{utf_mode}) {␊
891	␉␉$out_charset="UTF-8";␊
892	␉ } else {␊
893	␉␉$out_charset=$in_charset;␊
894	␉ }␊
895	␉ $self->{TT}{po_out}->set_charset($out_charset);␊
896	␉}␊
897	␉if ( $in_charset !~ /^$out_charset$/i ) {␊
898	␉ Encode::from_to($string,$in_charset,$out_charset);␊
899	␉ if (defined($options{'comment'}) and length($options{'comment'})) {␊
900	␉␉Encode::from_to($options{'comment'},$in_charset,$out_charset);␊
901	␉ }␊
902	␉}␊
903	}␊
904	␊
905	# the comments provided by the modules are automatic comments from the PO point of view␊
906	$self->{TT}{po_out}->push('msgid' => $string,␊
907	␉␉␉ 'reference' => $ref,␊
908	␉␉␉ 'type' => $type,␊
909	␉ 'automatic' => $options{'comment'},␊
910	␉␉␉ 'wrap' => $options{'wrap'}\|\|0,␊
911	␉␉␉ 'wrapcol' => $options{'wrapcol'});␊
912	␊
913	# if ($self->{TT}{po_in}->get_charset ne "CHARSET") {␊
914	#␉Encode::from_to($transstring,$self->{TT}{po_in}->get_charset,␊
915	#␉ $self->get_out_charset);␊
916	# }␊
917	␊
918	if ($options{'wrap'}\|\|0) {␊
919	$transstring =~ s/( *)$//s;␊
920	my $trailing_spaces = $1\|\|"";␊
921	$transstring =~ s/(?<!\\) +$//gm;␊
922	$transstring .= $trailing_spaces;␊
923	}␊
924	␊
925	return $transstring;␊
926	}␊
927	␊
928	=head2 Misc functions␊
929	␊
930	=over 4␊
931	␊
932	=item verbose()␊
933	␊
934	Returns if the verbose option was passed during the creation of the␊
935	TransTractor.␊
936	␊
937	=cut␊
938	␊
939	sub verbose {␊
940	if (defined $_[1]) {␊
941	␉$_[0]->{TT}{verbose} = $_[1];␊
942	} else {␊
943	␉return $_[0]->{TT}{verbose} \|\| 0; # undef and 0 have the same meaning, but one generates warnings␊
944	}␊
945	}␊
946	␊
947	=item debug()␊
948	␊
949	Returns if the debug option was passed during the creation of the␊
950	TransTractor.␊
951	␊
952	=cut␊
953	␊
954	sub debug {␊
955	return $_[0]->{TT}{debug};␊
956	}␊
957	␊
958	=item detected_charset($)␊
959	␊
960	This tells TransTractor that a new charset (the first argument) has been␊
961	detected from the input document. It can usually be read from the document␊
962	header. Only the first charset will remain, coming either from the␊
963	process() arguments or detected from the document.␊
964	␊
965	=cut␊
966	␊
967	sub detected_charset {␊
968	my ($self,$charset)=(shift,shift);␊
969	unless (defined($self->{TT}{'file_in_charset'}) and␊
970	length($self->{TT}{'file_in_charset'}) ) {␊
971	$self->{TT}{'file_in_charset'}=$charset;␊
972	if (defined $charset) {␊
973	$self->{TT}{'file_in_encoder'}=find_encoding($charset);␊
974	} else {␊
975	$self->{TT}{ascii_input}=1;␊
976	$self->{TT}{utf_mode}=0;␊
977	}␊
978	}␊
979	␊
980	if (defined $self->{TT}{'file_in_charset'} and␊
981	length $self->{TT}{'file_in_charset'} and␊
982	$self->{TT}{'file_in_charset'} !~ m/ascii/i) {␊
983	␉$self->{TT}{ascii_input}=0;␊
984	}␊
985	}␊
986	␊
987	=item get_out_charset()␊
988	␊
989	This function will return the charset that should be used in the output␊
990	document (usually useful to substitute the input document's detected charset␊
991	where it has been found).␊
992	␊
993	It will use the output charset specified in the command line. If it wasn't␊
994	specified, it will use the input PO's charset, and if the input PO has the␊
995	default "CHARSET", it will return the input document's charset, so that no␊
996	encoding is performed.␊
997	␊
998	=cut␊
999	␊
1000	sub get_out_charset {␊
1001	my $self=shift;␊
1002	my $charset;␊
1003	␊
1004	# Use the value specified at the command line␊
1005	if (defined($self->{TT}{'file_out_charset'}) and␊
1006	␉length($self->{TT}{'file_out_charset'})) {␊
1007	␉$charset=$self->{TT}{'file_out_charset'};␊
1008	} else {␊
1009	␉if ($self->{TT}{utf_mode} && $self->{TT}{ascii_input}) {␊
1010	␉ $charset="UTF-8";␊
1011	␉} else {␊
1012	␉ $charset=$self->{TT}{po_in}->get_charset;␊
1013	␉ $charset=$self->{TT}{'file_in_charset'}␊
1014	␉␉if $charset eq "CHARSET" and␊
1015	␉␉ defined($self->{TT}{'file_in_charset'}) and␊
1016	␉␉ length($self->{TT}{'file_in_charset'});␊
1017	␉ $charset="ascii"␊
1018	␉␉if $charset eq "CHARSET";␊
1019	␉}␊
1020	}␊
1021	return $charset;␊
1022	}␊
1023	␊
1024	=item recode_skipped_text($)␊
1025	␊
1026	This function returns the recoded text passed as argument, from the input␊
1027	document's charset to the output document's one. This isn't needed when␊
1028	translating a string (translate() recodes everything itself), but it is when␊
1029	you skip a string from the input document and you want the output document to␊
1030	be consistent with the global encoding.␊
1031	␊
1032	=cut␊
1033	␊
1034	sub recode_skipped_text {␊
1035	my ($self,$text)=(shift,shift);␊
1036	unless ($self->{TT}{'ascii_input'}) {␊
1037	␉if(defined($self->{TT}{'file_in_charset'}) and␊
1038	␉ length($self->{TT}{'file_in_charset'}) ) {␊
1039	␉ $text = encode_from_to($text,␊
1040	␉ $self->{TT}{'file_in_encoder'},␊
1041	␉ find_encoding($self->get_out_charset));␊
1042	␉} else {␊
1043	␉ die wrap_mod("po4a", dgettext("po4a", "Couldn't determine the input document's charset. Please specify it on the command line. (non-ASCII char at %s)"), $self->{TT}{non_ascii_ref})␊
1044	␉}␊
1045	}␊
1046	return $text;␊
1047	}␊
1048	␊
1049	␊
1050	# encode_from_to($,$,$)␊
1051	#␊
1052	# Encode the given text from one encoding to another one.␊
1053	# It differs from Encode::from_to because it does not take the name of the␊
1054	# encoding in argument, but the encoders (as returned by the␊
1055	# Encode::find_encoding(<name>) method). Thus it permits to save a bunch␊
1056	# of call to find_encoding.␊
1057	#␊
1058	# If the "from" encoding is undefined, it is considered as UTF-8 (or␊
1059	# ascii).␊
1060	# If the "to" encoding is undefined, it is considered as UTF-8.␊
1061	#␊
1062	sub encode_from_to {␊
1063	my ($text,$from,$to) = (shift,shift,shift);␊
1064	␊
1065	if (not defined $from) {␊
1066	# for ascii and UTF-8, no conversion needed to get an utf-8␊
1067	# string.␊
1068	} else {␊
1069	$text = $from->decode($text, 0);␊
1070	}␊
1071	␊
1072	if (not defined $to) {␊
1073	# Already in UTF-8, no conversion needed␊
1074	} else {␊
1075	$text = $to->encode($text, 0);␊
1076	}␊
1077	␊
1078	return $text;␊
1079	}␊
1080	␊
1081	=back␊
1082	␊
1083	=head1 FUTURE DIRECTIONS␊
1084	␊
1085	One shortcoming of the current TransTractor is that it can't handle␊
1086	translated document containing all languages, like debconf templates, or␊
1087	.desktop files.␊
1088	␊
1089	To address this problem, the only interface changes needed are:␊
1090	␊
1091	=over 2␊
1092	␊
1093	=item -␊
1094	␊
1095	take a hash as po_in_name (a list per language)␊
1096	␊
1097	=item -␊
1098	␊
1099	add an argument to translate to indicate the target language␊
1100	␊
1101	=item -␊
1102	␊
1103	make a pushline_all function, which would make pushline of its content for␊
1104	all language, using a map-like syntax:␊
1105	␊
1106	$self->pushline_all({ "Description[".$langcode."]=".␊
1107	␉␉␉ $self->translate($line,$ref,$langcode)␊
1108	␉␉ });␊
1109	␊
1110	=back␊
1111	␊
1112	Will see if it's enough ;)␊
1113	␊
1114	=head1 AUTHORS␊
1115	␊
1116	Denis Barbier <barbier@linuxfr.org>␊
1117	Martin Quinson (mquinson#debian.org)␊
1118	Jordi Vilalta <jvprat@gmail.com>␊
1119	␊
1120	=cut␊
1121	␊
1122	1;␊
1123

Download this file