branches/Chimera/package/bin/po4a/lib/Locale/Po4a/Xml.pm - Chameleon Svn Source Tree - Chameleon open source boot loader project.

Root/branches/Chimera/package/bin/po4a/lib/Locale/Po4a/Xml.pm

1	#!/usr/bin/perl␊
2	␊
3	# Po4a::Xml.pm␊
4	#␊
5	# extract and translate translatable strings from XML documents.␊
6	#␊
7	# This code extracts plain text from tags and attributes from generic␊
8	# XML documents, and it can be used as a base to build modules for␊
9	# XML-based documents.␊
10	#␊
11	# Copyright (c) 2004 by Jordi Vilalta <jvprat@gmail.com>␊
12	# Copyright (c) 2008-2009 by Nicolas François <nicolas.francois@centraliens.net>␊
13	#␊
14	# This program is free software; you can redistribute it and/or modify␊
15	# it under the terms of the GNU General Public License as published by␊
16	# the Free Software Foundation; either version 2 of the License, or␊
17	# (at your option) any later version.␊
18	#␊
19	# This program is distributed in the hope that it will be useful,␊
20	# but WITHOUT ANY WARRANTY; without even the implied warranty of␊
21	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the␊
22	# GNU General Public License for more details.␊
23	#␊
24	# You should have received a copy of the GNU General Public License␊
25	# along with this program; if not, write to the Free Software␊
26	# Foundation, Inc.,␊
27	# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA␊
28	#␊
29	########################################################################␊
30	␊
31	=encoding UTF-8␊
32	␊
33	=head1 NAME␊
34	␊
35	Locale::Po4a::Xml - convert XML documents and derivates from/to PO files␊
36	␊
37	=head1 DESCRIPTION␊
38	␊
39	The po4a (PO for anything) project goal is to ease translations (and more␊
40	interestingly, the maintenance of translations) using gettext tools on␊
41	areas where they were not expected like documentation.␊
42	␊
43	Locale::Po4a::Xml is a module to help the translation of XML documents into␊
44	other [human] languages. It can also be used as a base to build modules for␊
45	XML-based documents.␊
46	␊
47	=cut␊
48	␊
49	package Locale::Po4a::Xml;␊
50	␊
51	use 5.006;␊
52	use strict;␊
53	use warnings;␊
54	␊
55	require Exporter;␊
56	use vars qw(@ISA @EXPORT);␊
57	@ISA = qw(Locale::Po4a::TransTractor);␊
58	@EXPORT = qw(new initialize @tag_types);␊
59	␊
60	use Locale::Po4a::TransTractor;␊
61	use Locale::Po4a::Common;␊
62	use Carp qw(croak);␊
63	use File::Basename;␊
64	use File::Spec;␊
65	␊
66	#It will mantain the path from the root tag to the current one␊
67	my @path;␊
68	␊
69	#It will contain a list of external entities and their attached paths␊
70	my %entities;␊
71	␊
72	my @comments;␊
73	my %translate_options_cache;␊
74	␊
75	my $_shiftline_in_comment = 0;␊
76	sub shiftline {␊
77	my $self = shift;␊
78	# call Transtractor's shiftline␊
79	my ($line,$ref) = $self->SUPER::shiftline();␊
80	return ($line,$ref) if (not defined $line);␊
81	␊
82	if ($self->{options}{'includeexternal'}) {␊
83	my $tmp;␊
84	␊
85	for my $k (keys %entities) {␊
86	if ($line =~ m/^(.?)&$k;(.)$/s) {␊
87	my ($before, $after) = ($1, $2);␊
88	my $linenum=0;␊
89	my @textentries;␊
90	␊
91	$tmp = $before;␊
92	my $tmp_in_comment = 0;␊
93	if ($_shiftline_in_comment) {␊
94	if ($before =~ m/^.?-->(.)$/s) {␊
95	$tmp = $1;␊
96	$tmp_in_comment = 0;␊
97	} else {␊
98	$tmp_in_comment = 1;␊
99	}␊
100	}␊
101	if ($tmp_in_comment == 0) {␊
102	while ($tmp =~ m/^.?<!--.?-->(.*)$/s) {␊
103	$tmp = $1;␊
104	}␊
105	if ($tmp =~ m/<!--/s) {␊
106	$tmp_in_comment = 1;␊
107	}␊
108	}␊
109	next if ($tmp_in_comment);␊
110	␊
111	open (my $in, $entities{$k})␊
112	or croak wrap_mod("po4a::xml",␊
113	dgettext("po4a", "Can't read from %s: %s"),␊
114	$entities{$k}, $!);␊
115	while (defined (my $textline = <$in>)) {␊
116	$linenum++;␊
117	my $textref=$entities{$k}.":$linenum";␊
118	push @textentries, ($textline,$textref);␊
119	}␊
120	close $in␊
121	or croak wrap_mod("po4a::xml",␊
122	dgettext("po4a", "Can't close %s after reading: %s"),␊
123	$entities{$k}, $!);␊
124	␊
125	push @textentries, ($after, $ref);␊
126	$line = $before.(shift @textentries);␊
127	$ref .= " ".(shift @textentries);␊
128	$self->unshiftline(@textentries);␊
129	}␊
130	}␊
131	␊
132	$tmp = $line;␊
133	if ($_shiftline_in_comment) {␊
134	if ($line =~ m/^.?-->(.)$/s) {␊
135	$tmp = $1;␊
136	$_shiftline_in_comment = 0;␊
137	} else {␊
138	$_shiftline_in_comment = 1;␊
139	}␊
140	}␊
141	if ($_shiftline_in_comment == 0) {␊
142	while ($tmp =~ m/^.?<!--.?-->(.*)$/s) {␊
143	$tmp = $1;␊
144	}␊
145	if ($tmp =~ m/<!--/s) {␊
146	$_shiftline_in_comment = 1;␊
147	}␊
148	}␊
149	}␊
150	␊
151	return ($line,$ref);␊
152	}␊
153	␊
154	sub read {␊
155	␉my ($self,$filename)=@_;␊
156	␉push @{$self->{DOCPOD}{infile}}, $filename;␊
157	␉$self->Locale::Po4a::TransTractor::read($filename);␊
158	}␊
159	␊
160	sub parse {␊
161	␉my $self=shift;␊
162	␉map {$self->parse_file($_)} @{$self->{DOCPOD}{infile}};␊
163	}␊
164	␊
165	# @save_holders is a stack of references to ('paragraph', 'translation',␊
166	# 'sub_translations', 'open', 'close', 'folded_attributes') hashes, where:␊
167	# paragraph is a reference to an array (see paragraph in the␊
168	# treat_content() subroutine) of strings followed by␊
169	# references. It contains the @paragraph array as it was␊
170	# before the processing was interrupted by a tag instroducing␊
171	# a placeholder.␊
172	# translation is the translation of this level up to now␊
173	# sub_translations is a reference to an array of strings containing the␊
174	# translations which must replace the placeholders.␊
175	# open is the tag which opened the placeholder.␊
176	# close is the tag which closed the placeholder.␊
177	# folded_attributes is an hash of tags with their attributes (<tag attrs=...>␊
178	# strings), referenced by the folded tag id, which should␊
179	# replace the <tag po4a-id=id> strings in the current␊
180	# translation.␊
181	#␊
182	# If @save_holders only has 1 holder, then we are not processing the␊
183	# content of an holder, we are translating the document.␊
184	my @save_holders;␊
185	␊
186	␊
187	# If we are at the bottom of the stack and there is no <placeholder ...> in␊
188	# the current translation, we can push the translation in the translated␊
189	# document.␊
190	# Otherwise, we keep the translation in the current holder.␊
191	sub pushline {␊
192	␉my ($self, $line) = (shift, shift);␊
193	␊
194	␉my $holder = $save_holders[$#save_holders];␊
195	␉my $translation = $holder->{'translation'};␊
196	␉$translation .= $line;␊
197	␊
198	␉while ( %{$holder->{folded_attributes}}␊
199	␉ and $translation =~ m/^(.)<([^>]+?)\s+po4a-id=([0-9]+)>(.)$/s) {␊
200	␉␉my $begin = $1;␊
201	␉␉my $tag = $2;␊
202	␉␉my $id = $3;␊
203	␉␉my $end = $4;␊
204	␉␉if (defined $holder->{folded_attributes}->{$id}) {␊
205	␉␉␉# TODO: check if the tag is the same␊
206	␉␉␉$translation = $begin.$holder->{folded_attributes}->{$id}.$end;␊
207	␉␉␉delete $holder->{folded_attributes}->{$id};␊
208	␉␉} else {␊
209	␉␉␉# TODO: It will be hard to identify the location.␊
210	␉␉␉# => find a way to retrieve the reference.␊
211	␉␉␉die wrap_mod("po4a::xml", dgettext("po4a", "'po4a-id=%d' in the translation does not exist in the original string (or 'po4a-id=%d' used twice in the translation)."), $id, $id);␊
212	␉␉}␊
213	␉}␊
214	# TODO: check that %folded_attributes is empty at some time␊
215	# => in translate_paragraph?␊
216	␊
217	␉if ( ($#save_holders > 0)␊
218	␉ or ($translation =~ m/<placeholder\s+type="[^"]+"\s+id="(\d+)"\s*\/>/s)) {␊
219	␉␉$holder->{'translation'} = $translation;␊
220	␉} else {␊
221	␉␉$self->SUPER::pushline($translation);␊
222	␉␉$holder->{'translation'} = '';␊
223	␉}␊
224	}␊
225	␊
226	=head1 TRANSLATING WITH PO4A::XML␊
227	␊
228	This module can be used directly to handle generic XML documents. This will␊
229	extract all tag's content, and no attributes, since it's where the text is␊
230	written in most XML based documents.␊
231	␊
232	There are some options (described in the next section) that can customize␊
233	this behavior. If this doesn't fit to your document format you're encouraged␊
234	to write your own module derived from this, to describe your format's details.␊
235	See the section B<WRITING DERIVATE MODULES> below, for the process description.␊
236	␊
237	=cut␊
238	␊
239	#␊
240	# Parse file and translate it␊
241	#␊
242	sub parse_file {␊
243	␉my ($self,$filename) = @_;␊
244	␉my $eof = 0;␊
245	␊
246	␉while (!$eof) {␊
247	␉␉# We get all the text until the next breaking tag (not␊
248	␉␉# inline) and translate it␊
249	␉␉$eof = $self->treat_content;␊
250	␉␉if (!$eof) {␊
251	␉␉␉# And then we treat the following breaking tag␊
252	␉␉␉$eof = $self->treat_tag;␊
253	␉␉}␊
254	␉}␊
255	}␊
256	␊
257	=head1 OPTIONS ACCEPTED BY THIS MODULE␊
258	␊
259	The global debug option causes this module to show the excluded strings, in␊
260	order to see if it skips something important.␊
261	␊
262	These are this module's particular options:␊
263	␊
264	=over 4␊
265	␊
266	=item B<nostrip>␊
267	␊
268	Prevents it to strip the spaces around the extracted strings.␊
269	␊
270	=item B<wrap>␊
271	␊
272	Canonizes the string to translate, considering that whitespaces are not␊
273	important, and wraps the translated document. This option can be overridden␊
274	by custom tag options. See the "tags" option below.␊
275	␊
276	=item B<caseinsensitive>␊
277	␊
278	It makes the tags and attributes searching to work in a case insensitive␊
279	way. If it's defined, it will treat E<lt>BooKE<gt>laNG and E<lt>BOOKE<gt>Lang as E<lt>bookE<gt>lang.␊
280	␊
281	=item B<includeexternal>␊
282	␊
283	When defined, external entities are included in the generated (translated)␊
284	document, and for the extraction of strings. If it's not defined, you␊
285	will have to translate external entities separately as independent␊
286	documents.␊
287	␊
288	=item B<ontagerror>␊
289	␊
290	This option defines the behavior of the module when it encounter a invalid␊
291	XML syntax (a closing tag which does not match the last opening tag, or a␊
292	tag's attribute without value).␊
293	It can take the following values:␊
294	␊
295	=over␊
296	␊
297	=item I<fail>␊
298	␊
299	This is the default value.␊
300	The module will exit with an error.␊
301	␊
302	=item I<warn>␊
303	␊
304	The module will continue, and will issue a warning.␊
305	␊
306	=item I<silent>␊
307	␊
308	The module will continue without any warnings.␊
309	␊
310	=back␊
311	␊
312	Be careful when using this option.␊
313	It is generally recommended to fix the input file.␊
314	␊
315	=item B<tagsonly>␊
316	␊
317	Extracts only the specified tags in the "tags" option. Otherwise, it␊
318	will extract all the tags except the ones specified.␊
319	␊
320	Note: This option is deprecated.␊
321	␊
322	=item B<doctype>␊
323	␊
324	String that will try to match with the first line of the document's doctype␊
325	(if defined). If it doesn't, a warning will indicate that the document␊
326	might be of a bad type.␊
327	␊
328	=item B<addlang>␊
329	␊
330	String indicating the path (e.g. E<lt>bbbE<gt>E<lt>aaaE<gt>) of a tag␊
331	where a lang="..." attribute shall be added. The language will be defined␊
332	as the basename of the PO file without any .po extension.␊
333	␊
334	=item B<tags>␊
335	␊
336	Space-separated list of tags you want to translate or skip. By default,␊
337	the specified tags will be excluded, but if you use the "tagsonly" option,␊
338	the specified tags will be the only ones included. The tags must be in the␊
339	form E<lt>aaaE<gt>, but you can join some (E<lt>bbbE<gt>E<lt>aaaE<gt>) to say that the content of␊
340	the tag E<lt>aaaE<gt> will only be translated when it's into a E<lt>bbbE<gt> tag.␊
341	␊
342	You can also specify some tag options putting some characters in front of␊
343	the tag hierarchy. For example, you can put 'w' (wrap) or 'W' (don't wrap)␊
344	to override the default behavior specified by the global "wrap" option.␊
345	␊
346	Example: WE<lt>chapterE<gt>E<lt>titleE<gt>␊
347	␊
348	Note: This option is deprecated.␊
349	You should use the B<translated> and B<untranslated> options instead.␊
350	␊
351	=item B<attributes>␊
352	␊
353	Space-separated list of tag's attributes you want to translate. You can␊
354	specify the attributes by their name (for example, "lang"), but you can␊
355	prefix it with a tag hierarchy, to specify that this attribute will only be␊
356	translated when it's into the specified tag. For example: E<lt>bbbE<gt>E<lt>aaaE<gt>lang␊
357	specifies that the lang attribute will only be translated if it's into an␊
358	E<lt>aaaE<gt> tag, and it's into a E<lt>bbbE<gt> tag.␊
359	␊
360	=item B<foldattributes>␊
361	␊
362	Do not translate attributes in inline tags.␊
363	Instead, replace all attributes of a tag by po4a-id=<id>.␊
364	␊
365	This is useful when attributes shall not be translated, as this simplifies the␊
366	strings for translators, and avoids typos.␊
367	␊
368	=item B<customtag>␊
369	␊
370	Space-separated list of tags which should not be treated as tags.␊
371	These tags are treated as inline, and do not need to be closed.␊
372	␊
373	=item B<break>␊
374	␊
375	Space-separated list of tags which should break the sequence.␊
376	By default, all tags break the sequence.␊
377	␊
378	The tags must be in the form <aaa>, but you can join some␊
379	(<bbb><aaa>), if a tag (<aaa>) should only be considered␊
380	when it's into another tag (<bbb>).␊
381	␊
382	=item B<inline>␊
383	␊
384	Space-separated list of tags which should be treated as inline.␊
385	By default, all tags break the sequence.␊
386	␊
387	The tags must be in the form <aaa>, but you can join some␊
388	(<bbb><aaa>), if a tag (<aaa>) should only be considered␊
389	when it's into another tag (<bbb>).␊
390	␊
391	=item B<placeholder>␊
392	␊
393	Space-separated list of tags which should be treated as placeholders.␊
394	Placeholders do not break the sequence, but the content of placeholders is␊
395	translated separately.␊
396	␊
397	The location of the placeholder in its block will be marked with a string␊
398	similar to:␊
399	␊
400	<placeholder type=\"footnote\" id=\"0\"/>␊
401	␊
402	The tags must be in the form <aaa>, but you can join some␊
403	(<bbb><aaa>), if a tag (<aaa>) should only be considered␊
404	when it's into another tag (<bbb>).␊
405	␊
406	=item B<nodefault>␊
407	␊
408	Space separated list of tags that the module should not try to set by␊
409	default in any category.␊
410	␊
411	=item B<cpp>␊
412	␊
413	Support C preprocessor directives.␊
414	When this option is set, po4a will consider preprocessor directives as␊
415	paragraph separators.␊
416	This is important if the XML file must be preprocessed because otherwise␊
417	the directives may be inserted in the middle of lines if po4a consider it␊
418	belong to the current paragraph, and they won't be recognized by the␊
419	preprocessor.␊
420	Note: the preprocessor directives must only appear between tags␊
421	(they must not break a tag).␊
422	␊
423	=item B<translated>␊
424	␊
425	Space-separated list of tags you want to translate.␊
426	␊
427	The tags must be in the form <aaa>, but you can join some␊
428	(<bbb><aaa>), if a tag (<aaa>) should only be considered␊
429	when it's into another tag (<bbb>).␊
430	␊
431	You can also specify some tag options putting some characters in front of␊
432	the tag hierarchy. For example, you can put 'w' (wrap) or 'W' (don't wrap)␊
433	to overide the default behavior specified by the global "wrap" option.␊
434	␊
435	Example: WE<lt>chapterE<gt>E<lt>titleE<gt>␊
436	␊
437	=item B<untranslated>␊
438	␊
439	Space-separated list of tags you do not want to translate.␊
440	␊
441	The tags must be in the form <aaa>, but you can join some␊
442	(<bbb><aaa>), if a tag (<aaa>) should only be considered␊
443	when it's into another tag (<bbb>).␊
444	␊
445	=item B<defaulttranslateoption>␊
446	␊
447	The default categories for tags that are not in any of the translated,␊
448	untranslated, break, inline, or placeholder.␊
449	␊
450	This is a set of letters:␊
451	␊
452	=over␊
453	␊
454	=item I<w>␊
455	␊
456	Tags should be translated and content can be re-wrapped.␊
457	␊
458	=item I<W>␊
459	␊
460	Tags should be translated and content should not be re-wrapped.␊
461	␊
462	=item I<i>␊
463	␊
464	Tags should be translated inline.␊
465	␊
466	=item I<p>␊
467	␊
468	Tags should be translated as placeholders.␊
469	␊
470	=back␊
471	␊
472	=back␊
473	␊
474	=cut␊
475	# TODO: defaulttranslateoption␊
476	# w => indicate that it is only valid for translatable tags and do not␊
477	# care about inline/break/placeholder?␊
478	# ...␊
479	␊
480	sub initialize {␊
481	␉my $self = shift;␊
482	␉my %options = @_;␊
483	␊
484	␉# Reset the path␊
485	␉@path = ();␊
486	␊
487	␉# Initialize the stack of holders␊
488	␉my @paragraph = ();␊
489	␉my @sub_translations = ();␊
490	␉my %folded_attributes;␊
491	␉my %holder = ('paragraph' => \@paragraph,␊
492	␉ 'translation' => "",␊
493	␉ 'sub_translations' => \@sub_translations,␊
494	␉ 'folded_attributes' => \%folded_attributes);␊
495	␉@save_holders = (\%holder);␊
496	␊
497	␉$self->{options}{'addlang'}=0;␊
498	␉$self->{options}{'nostrip'}=0;␊
499	␉$self->{options}{'wrap'}=0;␊
500	␉$self->{options}{'caseinsensitive'}=0;␊
501	␉$self->{options}{'tagsonly'}=0;␊
502	␉$self->{options}{'tags'}='';␊
503	␉$self->{options}{'break'}='';␊
504	␉$self->{options}{'translated'}='';␊
505	␉$self->{options}{'untranslated'}='';␊
506	␉$self->{options}{'defaulttranslateoption'}='';␊
507	␉$self->{options}{'attributes'}='';␊
508	␉$self->{options}{'foldattributes'}=0;␊
509	␉$self->{options}{'inline'}='';␊
510	␉$self->{options}{'placeholder'}='';␊
511	␉$self->{options}{'customtag'}='';␊
512	␉$self->{options}{'doctype'}='';␊
513	␉$self->{options}{'nodefault'}='';␊
514	␉$self->{options}{'includeexternal'}=0;␊
515	␉$self->{options}{'ontagerror'}="fail";␊
516	␉$self->{options}{'cpp'}=0;␊
517	␊
518	␉$self->{options}{'verbose'}='';␊
519	␉$self->{options}{'debug'}='';␊
520	␊
521	␉foreach my $opt (keys %options) {␊
522	␉␉if ($options{$opt}) {␊
523	␉␉␉die wrap_mod("po4a::xml",␊
524	␉␉␉␉dgettext("po4a", "Unknown option: %s"), $opt)␊
525	␉␉␉␉unless exists $self->{options}{$opt};␊
526	␉␉␉$self->{options}{$opt} = $options{$opt};␊
527	␉␉}␊
528	␉}␊
529	␉# Default options set by modules. Forbidden for users.␊
530	␉$self->{options}{'_default_translated'}='';␊
531	␉$self->{options}{'_default_untranslated'}='';␊
532	␉$self->{options}{'_default_break'}='';␊
533	␉$self->{options}{'_default_inline'}='';␊
534	␉$self->{options}{'_default_placeholder'}='';␊
535	␉$self->{options}{'_default_attributes'}='';␊
536	␉$self->{options}{'_default_customtag'}='';␊
537	␊
538	␉#It will maintain the list of the translatable tags␊
539	␉$self->{tags}=();␊
540	␉$self->{translated}=();␊
541	␉$self->{untranslated}=();␊
542	␉#It will maintain the list of the translatable attributes␊
543	␉$self->{attributes}=();␊
544	␉#It will maintain the list of the breaking tags␊
545	␉$self->{break}=();␊
546	␉#It will maintain the list of the inline tags␊
547	␉$self->{inline}=();␊
548	␉#It will maintain the list of the placeholder tags␊
549	␉$self->{placeholder}=();␊
550	␉#It will maintain the list of the customtag tags␊
551	␉$self->{customtag}=();␊
552	␉#list of the tags that must not be set in the tags or inline category␊
553	␉#by this module or sub-module (unless specified in an option)␊
554	␉$self->{nodefault}=();␊
555	␊
556	␉$self->treat_options;␊
557	␊
558	␉# Clear cache␊
559	␉%translate_options_cache=();␊
560	}␊
561	␊
562	=head1 WRITING DERIVATE MODULES␊
563	␊
564	=head2 DEFINE WHAT TAGS AND ATTRIBUTES TO TRANSLATE␊
565	␊
566	The simplest customization is to define which tags and attributes you want␊
567	the parser to translate. This should be done in the initialize function.␊
568	First you should call the main initialize, to get the command-line options,␊
569	and then, append your custom definitions to the options hash. If you want␊
570	to treat some new options from command line, you should define them before␊
571	calling the main initialize:␊
572	␊
573	$self->{options}{'new_option'}='';␊
574	$self->SUPER::initialize(%options);␊
575	$self->{options}{'_default_translated'}.=' <p> <head><title>';␊
576	$self->{options}{'attributes'}.=' <p>lang id';␊
577	$self->{options}{'_default_inline'}.=' <br>';␊
578	$self->treat_options;␊
579	␊
580	You should use the B<_default_inline>, B<_default_break>,␊
581	B<_default_placeholder>, B<_default_translated>, B<_default_untranslated>,␊
582	and B<_default_attributes> options in derivated modules. This allow users␊
583	to override the default behavior defined in your module with command line␊
584	options.␊
585	␊
586	=head2 OVERRIDING THE found_string FUNCTION␊
587	␊
588	Another simple step is to override the function "found_string", which␊
589	receives the extracted strings from the parser, in order to translate them.␊
590	There you can control which strings you want to translate, and perform␊
591	transformations to them before or after the translation itself.␊
592	␊
593	It receives the extracted text, the reference on where it was, and a hash␊
594	that contains extra information to control what strings to translate, how␊
595	to translate them and to generate the comment.␊
596	␊
597	The content of these options depends on the kind of string it is (specified in an␊
598	entry of this hash):␊
599	␊
600	=over␊
601	␊
602	=item type="tag"␊
603	␊
604	The found string is the content of a translatable tag. The entry "tag_options"␊
605	contains the option characters in front of the tag hierarchy in the module␊
606	"tags" option.␊
607	␊
608	=item type="attribute"␊
609	␊
610	Means that the found string is the value of a translatable attribute. The␊
611	entry "attribute" has the name of the attribute.␊
612	␊
613	=back␊
614	␊
615	It must return the text that will replace the original in the translated␊
616	document. Here's a basic example of this function:␊
617	␊
618	sub found_string {␊
619	my ($self,$text,$ref,$options)=@_;␊
620	$text = $self->translate($text,$ref,"type ".$options->{'type'},␊
621	'wrap'=>$self->{options}{'wrap'});␊
622	return $text;␊
623	}␊
624	␊
625	There's another simple example in the new Dia module, which only filters␊
626	some strings.␊
627	␊
628	=cut␊
629	␊
630	sub found_string {␊
631	␉my ($self,$text,$ref,$options)=@_;␊
632	␊
633	␉if ($text =~ m/^\s*$/s) {␊
634	␉␉return $text;␊
635	␉}␊
636	␊
637	␉my $comment;␊
638	␉my $wrap = $self->{options}{'wrap'};␊
639	␊
640	␉if ($options->{'type'} eq "tag") {␊
641	␉␉$comment = "Content of: ".$self->get_path;␊
642	␊
643	␉␉if($options->{'tag_options'} =~ /w/) {␊
644	␉␉␉$wrap = 1;␊
645	␉␉}␊
646	␉␉if($options->{'tag_options'} =~ /W/) {␊
647	␉␉␉$wrap = 0;␊
648	␉␉}␊
649	␉} elsif ($options->{'type'} eq "attribute") {␊
650	␉␉$comment = "Attribute '".$options->{'attribute'}."' of: ".$self->get_path;␊
651	␉} elsif ($options->{'type'} eq "CDATA") {␊
652	␉␉$comment = "CDATA";␊
653	␉␉$wrap = 0;␊
654	␉} else {␊
655	␉␉die wrap_ref_mod($ref, "po4a::xml", dgettext("po4a", "Internal error: unknown type identifier '%s'."), $options->{'type'});␊
656	␉}␊
657	␉$text = $self->translate($text,$ref,$comment,'wrap'=>$wrap, comment => $options->{'comments'});␊
658	␉return $text;␊
659	}␊
660	␊
661	=head2 MODIFYING TAG TYPES (TODO)␊
662	␊
663	This is a more complex one, but it enables a (almost) total customization.␊
664	It's based in a list of hashes, each one defining a tag type's behavior. The␊
665	list should be sorted so that the most general tags are after the most␊
666	concrete ones (sorted first by the beginning and then by the end keys). To␊
667	define a tag type you'll have to make a hash with the following keys:␊
668	␊
669	=over 4␊
670	␊
671	=item B<beginning>␊
672	␊
673	Specifies the beginning of the tag, after the "E<lt>".␊
674	␊
675	=item B<end>␊
676	␊
677	Specifies the end of the tag, before the "E<gt>".␊
678	␊
679	=item B<breaking>␊
680	␊
681	It says if this is a breaking tag class. A non-breaking (inline) tag is one␊
682	that can be taken as part of the content of another tag. It can take the␊
683	values false (0), true (1) or undefined. If you leave this undefined, you'll␊
684	have to define the f_breaking function that will say whether a concrete tag of␊
685	this class is a breaking tag or not.␊
686	␊
687	=item B<f_breaking>␊
688	␊
689	It's a function that will tell if the next tag is a breaking one or not. It␊
690	should be defined if the B<breaking> option is not.␊
691	␊
692	=item B<f_extract>␊
693	␊
694	If you leave this key undefined, the generic extraction function will have to␊
695	extract the tag itself. It's useful for tags that can have other tags or␊
696	special structures in them, so that the main parser doesn't get mad. This␊
697	function receives a boolean that says if the tag should be removed from the␊
698	input stream or not.␊
699	␊
700	=item B<f_translate>␊
701	␊
702	This function receives the tag (in the get_string_until() format) and returns␊
703	the translated tag (translated attributes or all needed transformations) as a␊
704	single string.␊
705	␊
706	=back␊
707	␊
708	=cut␊
709	␊
710	##### Generic XML tag types #####'␊
711	␊
712	our @tag_types = (␊
713	␉{␉beginning␉=> "!--#",␊
714	␉␉end␉␉=> "--",␊
715	␉␉breaking␉=> 0,␊
716	␉␉f_extract␉=> \&tag_extract_comment,␊
717	␉␉f_translate␉=> \&tag_trans_comment},␊
718	␉{␉beginning␉=> "!--",␊
719	␉␉end␉␉=> "--",␊
720	␉␉breaking␉=> 0,␊
721	␉␉f_extract␉=> \&tag_extract_comment,␊
722	␉␉f_translate␉=> \&tag_trans_comment},␊
723	␉{␉beginning␉=> "?xml",␊
724	␉␉end␉␉=> "?",␊
725	␉␉breaking␉=> 1,␊
726	␉␉f_translate␉=> \&tag_trans_xmlhead},␊
727	␉{␉beginning␉=> "?",␊
728	␉␉end␉␉=> "?",␊
729	␉␉breaking␉=> 1,␊
730	␉␉f_translate␉=> \&tag_trans_procins},␊
731	␉{␉beginning␉=> "!DOCTYPE",␊
732	␉␉end␉␉=> "",␊
733	␉␉breaking␉=> 1,␊
734	␉␉f_extract␉=> \&tag_extract_doctype,␊
735	␉␉f_translate␉=> \&tag_trans_doctype},␊
736	␉{␉beginning␉=> "![CDATA[",␊
737	␉␉end␉␉=> "]]",␊
738	␉␉breaking␉=> 1,␊
739	␉␉f_extract␉=> \&CDATA_extract,␊
740	␉␉f_translate␉=> \&CDATA_trans},␊
741	␉{␉beginning␉=> "/",␊
742	␉␉end␉␉=> "",␊
743	␉␉f_breaking␉=> \&tag_break_close,␊
744	␉␉f_translate␉=> \&tag_trans_close},␊
745	␉{␉beginning␉=> "",␊
746	␉␉end␉␉=> "/",␊
747	␉␉f_breaking␉=> \&tag_break_alone,␊
748	␉␉f_translate␉=> \&tag_trans_alone},␊
749	␉{␉beginning␉=> "",␊
750	␉␉end␉␉=> "",␊
751	␉␉f_breaking␉=> \&tag_break_open,␊
752	␉␉f_translate␉=> \&tag_trans_open}␊
753	);␊
754	␊
755	sub tag_extract_comment {␊
756	␉my ($self,$remove)=(shift,shift);␊
757	␉my ($eof,@tag)=$self->get_string_until('-->',{include=>1,remove=>$remove});␊
758	␉return ($eof,@tag);␊
759	}␊
760	␊
761	sub tag_trans_comment {␊
762	␉my ($self,@tag)=@_;␊
763	␉return $self->join_lines(@tag);␊
764	}␊
765	␊
766	sub tag_trans_xmlhead {␊
767	␉my ($self,@tag)=@_;␊
768	␊
769	␉# We don't have to translate anything from here: throw away references␊
770	␉my $tag = $self->join_lines(@tag);␊
771	␉$tag =~ /encoding=(("\|')\|)(.*?)(\s\|\2)/s;␊
772	␉my $in_charset=$3;␊
773	␉$self->detected_charset($in_charset);␊
774	␉my $out_charset=$self->get_out_charset;␊
775	␊
776	␉if (defined $in_charset) {␊
777	␉␉$tag =~ s/$in_charset/$out_charset/;␊
778	␉} else {␊
779	␉␉if ($tag =~ m/standalone/) {␊
780	␉␉␉$tag =~ s/(standalone)/encoding="$out_charset" $1/;␊
781	␉␉} else {␊
782	␉␉␉$tag.= " encoding=\"$out_charset\"";␊
783	␉␉}␊
784	␉}␊
785	␊
786	␉return $tag;␊
787	}␊
788	␊
789	sub tag_trans_procins {␊
790	␉my ($self,@tag)=@_;␊
791	␉return $self->join_lines(@tag);␊
792	}␊
793	␊
794	sub tag_extract_doctype {␊
795	␉my ($self,$remove)=(shift,shift);␊
796	␊
797	␉# Check if there is an internal subset (between []).␊
798	␉my ($eof,@tag)=$self->get_string_until('>',{include=>1,unquoted=>1});␊
799	␉my $parity = 0;␊
800	␉my $paragraph = "";␊
801	␉map { $parity = 1 - $parity; $paragraph.= $parity?$_:""; } @tag;␊
802	␉my $found = 0;␊
803	␉if ($paragraph =~ m/<.\[.</s) {␊
804	␉␉$found = 1␊
805	␉}␊
806	␊
807	␉if (not $found) {␊
808	␉␉($eof,@tag)=$self->get_string_until('>',{include=>1,remove=>$remove,unquoted=>1});␊
809	␉} else {␊
810	␉␉($eof,@tag)=$self->get_string_until(']\s*>',{include=>1,remove=>$remove,unquoted=>1,regex=>1});␊
811	␉}␊
812	␉return ($eof,@tag);␊
813	}␊
814	␊
815	sub tag_trans_doctype {␊
816	# This check is not really reliable. There are system and public␊
817	# identifiers. Only the public one could be checked reliably.␊
818	␉my ($self,@tag)=@_;␊
819	␉if (defined $self->{options}{'doctype'} ) {␊
820	␉␉my $doctype = $self->{options}{'doctype'};␊
821	␉␉if ( $tag[0] !~ /\Q$doctype\E/i ) {␊
822	␉␉␉warn wrap_ref_mod($tag[1], "po4a::xml", dgettext("po4a", "Bad document type. '%s' expected. You can fix this warning with a -o doctype option, or ignore this check with -o doctype=\"\"."), $doctype);␊
823	␉␉}␊
824	␉}␊
825	␉my $i = 0;␊
826	␉my $basedir = $tag[1];␊
827	␉$basedir =~ s/:[0-9]+$//;␊
828	␉$basedir = dirname($basedir);␊
829	␊
830	␉while ( $i < $#tag ) {␊
831	␉␉my $t = $tag[$i];␊
832	␉␉my $ref = $tag[$i+1];␊
833	␉␉if ( $t =~ /^(\s<!ENTITY\s+)(.)$/is ) {␊
834	␉␉␉my $part1 = $1;␊
835	␉␉␉my $part2 = $2;␊
836	␉␉␉my $includenow = 0;␊
837	␉␉␉my $file = 0;␊
838	␉␉␉my $name = "";␊
839	␉␉␉if ($part2 =~ /^(%\s+)(.*)$/s ) {␊
840	␉␉␉␉$part1.= $1;␊
841	␉␉␉␉$part2 = $2;␊
842	␉␉␉␉$includenow = 1;␊
843	␉␉␉}␊
844	␉␉␉$part2 =~ /^(\S+)(\s+)(.*)$/s;␊
845	␉␉␉$name = $1;␊
846	␉␉␉$part1.= $1.$2;␊
847	␉␉␉$part2 = $3;␊
848	␉␉␉if ( $part2 =~ /^(SYSTEM\s+)(.*)$/is ) {␊
849	␉␉␉␉$part1.= $1;␊
850	␉␉␉␉$part2 = $2;␊
851	␉␉␉␉$file = 1;␊
852	␉␉␉␉if ($self->{options}{'includeexternal'}) {␊
853	␉␉␉␉␉$entities{$name} = $part2;␊
854	␉␉␉␉␉$entities{$name} =~ s/^"?(.?)".$/$1/s;␊
855	␉␉␉␉␉$entities{$name} = File::Spec->catfile($basedir, $entities{$name});␊
856	␉␉␉␉}␊
857	␉␉␉}␊
858	␉␉␉if ((not $file) and (not $includenow)) {␊
859	␉␉␉ if ($part2 =~ m/^\s(["'])(.)\1(\s>.)$/s) {␊
860	␉␉␉␉my $comment = "Content of the $name entity";␊
861	␉␉␉␉my $quote = $1;␊
862	␉␉␉␉my $text = $2;␊
863	␉␉␉␉$part2 = $3;␊
864	␉␉␉␉$text = $self->translate($text,␊
865	␉␉␉␉ $ref,␊
866	␉␉␉␉ $comment,␊
867	␉␉␉␉ 'wrap'=>1);␊
868	␉␉␉␉$t = $part1."$quote$text$quote$part2";␊
869	␉␉␉ }␊
870	␉␉␉}␊
871	#␉␉␉print $part1."\n";␊
872	#␉␉␉print $name."\n";␊
873	#␉␉␉print $part2."\n";␊
874	␉␉}␊
875	␉␉$tag[$i] = $t;␊
876	␉␉$i += 2;␊
877	␉}␊
878	␉return $self->join_lines(@tag);␊
879	}␊
880	␊
881	sub tag_break_close {␊
882	␉my ($self,@tag)=@_;␊
883	␉my $struct = $self->get_path;␊
884	␉my $options = $self->get_translate_options($struct);␊
885	␉if ($options =~ m/[ip]/) {␊
886	␉␉return 0;␊
887	␉} else {␊
888	␉␉return 1;␊
889	␉}␊
890	}␊
891	␊
892	sub tag_trans_close {␊
893	␉my ($self,@tag)=@_;␊
894	␉my $name = $self->get_tag_name(@tag);␊
895	␊
896	␉my $test = pop @path;␊
897	␉if (!defined($test) \|\| $test ne $name ) {␊
898	␉␉my $ontagerror = $self->{options}{'ontagerror'};␊
899	␉␉if ($ontagerror eq "warn") {␊
900	␉␉␉warn wrap_ref_mod($tag[1], "po4a::xml", dgettext("po4a", "Unexpected closing tag </%s> found. The main document may be wrong. Continuing..."), $name);␊
901	␉␉} elsif ($ontagerror ne "silent") {␊
902	␉␉␉die wrap_ref_mod($tag[1], "po4a::xml", dgettext("po4a", "Unexpected closing tag </%s> found. The main document may be wrong."), $name);␊
903	␉␉}␊
904	␉}␊
905	␉return $self->join_lines(@tag);␊
906	}␊
907	␊
908	sub CDATA_extract {␊
909	␉my ($self,$remove)=(shift,shift);␊
910	my ($eof, @tag) = $self->get_string_until(']]>',{include=>1,unquoted=>0,remove=>$remove});␊
911	␊
912	␉return ($eof, @tag);␊
913	}␊
914	␊
915	sub CDATA_trans {␊
916	␉my ($self,@tag)=@_;␊
917	␉return $self->found_string($self->join_lines(@tag),␊
918	␉ $tag[1],␊
919	␉ {'type' => "CDATA"});␊
920	}␊
921	␊
922	sub tag_break_alone {␊
923	␉my ($self,@tag)=@_;␊
924	␉my $struct = $self->get_path($self->get_tag_name(@tag));␊
925	␉if ($self->get_translate_options($struct) =~ m/i/) {␊
926	␉␉return 0;␊
927	␉} else {␊
928	␉␉return 1;␊
929	␉}␊
930	}␊
931	␊
932	sub tag_trans_alone {␊
933	␉my ($self,@tag)=@_;␊
934	␉my $name = $self->get_tag_name(@tag);␊
935	␉push @path, $name;␊
936	␊
937	␉$name = $self->treat_attributes(@tag);␊
938	␊
939	␉pop @path;␊
940	␉return $name;␊
941	}␊
942	␊
943	sub tag_break_open {␊
944	␉my ($self,@tag)=@_;␊
945	␉my $struct = $self->get_path($self->get_tag_name(@tag));␊
946	␉my $options = $self->get_translate_options($struct);␊
947	␉if ($options =~ m/[ip]/) {␊
948	␉␉return 0;␊
949	␉} else {␊
950	␉␉return 1;␊
951	␉}␊
952	}␊
953	␊
954	sub tag_trans_open {␊
955	␉my ($self,@tag)=@_;␊
956	␉my $name = $self->get_tag_name(@tag);␊
957	␉push @path, $name;␊
958	␊
959	␉$name = $self->treat_attributes(@tag);␊
960	␊
961	␉if (defined $self->{options}{'addlang'}) {␊
962	␉␉my $struct = $self->get_path();␊
963	␉␉if ($struct eq $self->{options}{'addlang'}) {␊
964	␉␉␉$name .= ' lang="'.$self->{TT}{po_in}->{lang}.'"';␊
965	␉␉}␊
966	␉}␊
967	␊
968	␉return $name;␊
969	}␊
970	␊
971	##### END of Generic XML tag types #####␊
972	␊
973	=head1 INTERNAL FUNCTIONS used to write derivated parsers␊
974	␊
975	=head2 WORKING WITH TAGS␊
976	␊
977	=over 4␊
978	␊
979	=item get_path()␊
980	␊
981	This function returns the path to the current tag from the document's root,␊
982	in the form E<lt>htmlE<gt>E<lt>bodyE<gt>E<lt>pE<gt>.␊
983	␊
984	An additional array of tags (without brackets) can be passed as argument.␊
985	These path elements are added to the end of the current path.␊
986	␊
987	=cut␊
988	␊
989	sub get_path {␊
990	␉my $self = shift;␊
991	␉my @add = @_;␊
992	␉if ( @path > 0 or @add > 0 ) {␊
993	␉␉return "<".join("><",@path,@add).">";␊
994	␉} else {␊
995	␉␉return "outside any tag (error?)";␊
996	␉}␊
997	}␊
998	␊
999	=item tag_type()␊
1000	␊
1001	This function returns the index from the tag_types list that fits to the next␊
1002	tag in the input stream, or -1 if it's at the end of the input file.␊
1003	␊
1004	=cut␊
1005	␊
1006	sub tag_type {␊
1007	␉my $self = shift;␊
1008	␉my ($line,$ref) = $self->shiftline();␊
1009	␉my ($match1,$match2);␊
1010	␉my $found = 0;␊
1011	␉my $i = 0;␊
1012	␊
1013	␉if (!defined($line)) { return -1; }␊
1014	␊
1015	␉$self->unshiftline($line,$ref);␊
1016	␉my ($eof,@lines) = $self->get_string_until(">",{include=>1,unquoted=>1});␊
1017	␉my $line2 = $self->join_lines(@lines);␊
1018	␉while (!$found && $i < @tag_types) {␊
1019	␉␉($match1,$match2) = ($tag_types[$i]->{beginning},$tag_types[$i]->{end});␊
1020	␉␉if ($line =~ /^<\Q$match1\E/) {␊
1021	␉␉␉if (!defined($tag_types[$i]->{f_extract})) {␊
1022	#print substr($line2,length($line2)-1-length($match2),1+length($match2))."\n";␊
1023	␉␉␉␉if (defined($line2) and $line2 =~ /\Q$match2\E>$/) {␊
1024	␉␉␉␉␉$found = 1;␊
1025	#print "YES: <".$match1." ".$match2.">\n";␊
1026	␉␉␉␉} else {␊
1027	#print "NO: <".$match1." ".$match2.">\n";␊
1028	␉␉␉␉␉$i++;␊
1029	␉␉␉␉}␊
1030	␉␉␉} else {␊
1031	␉␉␉␉$found = 1;␊
1032	␉␉␉}␊
1033	␉␉} else {␊
1034	␉␉␉$i++;␊
1035	␉␉}␊
1036	␉}␊
1037	␉if (!$found) {␊
1038	␉␉#It should never enter here, unless you undefine the most␊
1039	␉␉#general tags (as <...>)␊
1040	␉␉die "po4a::xml: Unknown tag type: ".$line."\n";␊
1041	␉} else {␊
1042	␉␉return $i;␊
1043	␉}␊
1044	}␊
1045	␊
1046	=item extract_tag($$)␊
1047	␊
1048	This function returns the next tag from the input stream without the beginning␊
1049	and end, in an array form, to maintain the references from the input file. It␊
1050	has two parameters: the type of the tag (as returned by tag_type) and a␊
1051	boolean, that indicates if it should be removed from the input stream.␊
1052	␊
1053	=cut␊
1054	␊
1055	sub extract_tag {␊
1056	␉my ($self,$type,$remove) = (shift,shift,shift);␊
1057	␉my ($match1,$match2) = ($tag_types[$type]->{beginning},$tag_types[$type]->{end});␊
1058	␉my ($eof,@tag);␊
1059	␉if (defined($tag_types[$type]->{f_extract})) {␊
1060	␉␉($eof,@tag) = &{$tag_types[$type]->{f_extract}}($self,$remove);␊
1061	␉} else {␊
1062	␉␉($eof,@tag) = $self->get_string_until($match2.">",{include=>1,remove=>$remove,unquoted=>1});␊
1063	␉}␊
1064	␉$tag[0] =~ /^<\Q$match1\E(.*)$/s;␊
1065	␉$tag[0] = $1;␊
1066	␉$tag[$#tag-1] =~ /^(.*)\Q$match2\E>$/s;␊
1067	␉$tag[$#tag-1] = $1;␊
1068	␉return ($eof,@tag);␊
1069	}␊
1070	␊
1071	=item get_tag_name(@)␊
1072	␊
1073	This function returns the name of the tag passed as an argument, in the array␊
1074	form returned by extract_tag.␊
1075	␊
1076	=cut␊
1077	␊
1078	sub get_tag_name {␊
1079	␉my ($self,@tag)=@_;␊
1080	␉$tag[0] =~ /^(\S*)/;␊
1081	␉return $1;␊
1082	}␊
1083	␊
1084	=item breaking_tag()␊
1085	␊
1086	This function returns a boolean that says if the next tag in the input stream␊
1087	is a breaking tag or not (inline tag). It leaves the input stream intact.␊
1088	␊
1089	=cut␊
1090	␊
1091	sub breaking_tag {␊
1092	␉my $self = shift;␊
1093	␉my $break;␊
1094	␊
1095	␉my $type = $self->tag_type;␊
1096	␉if ($type == -1) { return 0; }␊
1097	␊
1098	#print "TAG TYPE = ".$type."\n";␊
1099	␉$break = $tag_types[$type]->{breaking};␊
1100	␉if (!defined($break)) {␊
1101	␉␉# This tag's breaking depends on its content␊
1102	␉␉my ($eof,@lines) = $self->extract_tag($type,0);␊
1103	␉␉$break = &{$tag_types[$type]->{f_breaking}}($self,@lines);␊
1104	␉}␊
1105	#print "break = ".$break."\n";␊
1106	␉return $break;␊
1107	}␊
1108	␊
1109	=item treat_tag()␊
1110	␊
1111	This function translates the next tag from the input stream. Using each␊
1112	tag type's custom translation functions.␊
1113	␊
1114	=cut␊
1115	␊
1116	sub treat_tag {␊
1117	␉my $self = shift;␊
1118	␉my $type = $self->tag_type;␊
1119	␊
1120	␉my ($match1,$match2) = ($tag_types[$type]->{beginning},$tag_types[$type]->{end});␊
1121	␉my ($eof,@lines) = $self->extract_tag($type,1);␊
1122	␊
1123	␉$lines[0] =~ /^(\s)(.)$/s;␊
1124	␉my $space1 = $1;␊
1125	␉$lines[0] = $2;␊
1126	␉$lines[$#lines-1] =~ /^(.?)(\s)$/s;␊
1127	␉my $space2 = $2;␊
1128	␉$lines[$#lines-1] = $1;␊
1129	␊
1130	␉# Calling this tag type's specific handling (translation of␊
1131	␉# attributes...)␊
1132	␉my $line = &{$tag_types[$type]->{f_translate}}($self,@lines);␊
1133	␉$self->pushline("<".$match1.$space1.$line.$space2.$match2.">");␊
1134	␉return $eof;␊
1135	}␊
1136	␊
1137	=item tag_in_list($@)␊
1138	␊
1139	This function returns a string value that says if the first argument (a tag␊
1140	hierarchy) matches any of the tags from the second argument (a list of tags␊
1141	or tag hierarchies). If it doesn't match, it returns 0. Else, it returns the␊
1142	matched tag's options (the characters in front of the tag) or 1 (if that tag␊
1143	doesn't have options).␊
1144	␊
1145	=back␊
1146	␊
1147	=cut␊
1148	sub tag_in_list ($$$) {␊
1149	␉my ($self,$path,$list) = @_;␊
1150	␉if ($self->{options}{'caseinsensitive'}) {␊
1151	␉␉$path = lc $path;␊
1152	␉}␊
1153	␊
1154	␉while (1) {␊
1155	␉␉if (defined $list->{$path}) {␊
1156	␉␉␉if (length $list->{$path}) {␊
1157	␉␉␉␉return $list->{$path};␊
1158	␉␉␉} else {␊
1159	␉␉␉␉return 1;␊
1160	␉␉␉}␊
1161	␉␉}␊
1162	␉␉last unless ($path =~ m/</);␊
1163	␉␉$path =~ s/^<.*?>//;␊
1164	␉}␊
1165	␊
1166	␉return 0;␊
1167	}␊
1168	␊
1169	=head2 WORKING WITH ATTRIBUTES␊
1170	␊
1171	=over 4␊
1172	␊
1173	=item treat_attributes(@)␊
1174	␊
1175	This function handles the translation of the tags' attributes. It receives the tag␊
1176	without the beginning / end marks, and then it finds the attributes, and it␊
1177	translates the translatable ones (specified by the module option "attributes").␊
1178	This returns a plain string with the translated tag.␊
1179	␊
1180	=back␊
1181	␊
1182	=cut␊
1183	␊
1184	sub treat_attributes {␊
1185	␉my ($self,@tag)=@_;␊
1186	␊
1187	␉$tag[0] =~ /^(\S)(.)/s;␊
1188	␉my $text = $1;␊
1189	␉$tag[0] = $2;␊
1190	␊
1191	␉while (@tag) {␊
1192	␉␉my $complete = 1;␊
1193	␊
1194	␉␉$text .= $self->skip_spaces(\@tag);␊
1195	␉␉if (@tag) {␊
1196	␉␉␉# Get the attribute's name␊
1197	␉␉␉$complete = 0;␊
1198	␊
1199	␉␉␉$tag[0] =~ /^([^\s=]+)(.*)/s;␊
1200	␉␉␉my $name = $1;␊
1201	␉␉␉my $ref = $tag[1];␊
1202	␉␉␉$tag[0] = $2;␊
1203	␉␉␉$text .= $name;␊
1204	␉␉␉$text .= $self->skip_spaces(\@tag);␊
1205	␉␉␉if (@tag) {␊
1206	␉␉␉␉# Get the '='␊
1207	␉␉␉␉if ($tag[0] =~ /^=(.*)/s) {␊
1208	␉␉␉␉␉$tag[0] = $1;␊
1209	␉␉␉␉␉$text .= "=";␊
1210	␉␉␉␉␉$text .= $self->skip_spaces(\@tag);␊
1211	␉␉␉␉␉if (@tag) {␊
1212	␉␉␉␉␉␉# Get the value␊
1213	␉␉␉␉␉␉my $value="";␊
1214	␉␉␉␉␉␉$ref=$tag[1];␊
1215	␉␉␉␉␉␉my $quot=substr($tag[0],0,1);␊
1216	␉␉␉␉␉␉if ($quot ne "\"" and $quot ne "'") {␊
1217	␉␉␉␉␉␉␉# Unquoted value␊
1218	␉␉␉␉␉␉␉$quot="";␊
1219	␉␉␉␉␉␉␉$tag[0] =~ /^(\S+)(.*)/s;␊
1220	␉␉␉␉␉␉␉$value = $1;␊
1221	␉␉␉␉␉␉␉$tag[0] = $2;␊
1222	␉␉␉␉␉␉} else {␊
1223	␉␉␉␉␉␉␉# Quoted value␊
1224	␉␉␉␉␉␉␉$text .= $quot;␊
1225	␉␉␉␉␉␉␉$tag[0] =~ /^\Q$quot\E(.*)/s;␊
1226	␉␉␉␉␉␉␉$tag[0] = $1;␊
1227	␉␉␉␉␉␉␉while ($tag[0] !~ /\Q$quot\E/) {␊
1228	␉␉␉␉␉␉␉␉$value .= $tag[0];␊
1229	␉␉␉␉␉␉␉␉shift @tag;␊
1230	␉␉␉␉␉␉␉␉shift @tag;␊
1231	␉␉␉␉␉␉␉}␊
1232	␉␉␉␉␉␉␉$tag[0] =~ /^(.?)\Q$quot\E(.)/s;␊
1233	␉␉␉␉␉␉␉$value .= $1;␊
1234	␉␉␉␉␉␉␉$tag[0] = $2;␊
1235	␉␉␉␉␉␉}␊
1236	␉␉␉␉␉␉$complete = 1;␊
1237	␉␉␉␉␉␉if ($self->tag_in_list($self->get_path.$name,$self->{attributes})) {␊
1238	␉␉␉␉␉␉␉$text .= $self->found_string($value, $ref, { type=>"attribute", attribute=>$name });␊
1239	␉␉␉␉␉␉} else {␊
1240	␉␉␉␉␉␉␉print wrap_ref_mod($ref, "po4a::xml", dgettext("po4a", "Content of attribute %s excluded: %s"), $self->get_path.$name, $value)␊
1241	␉␉␉␉␉␉␉ if $self->debug();␊
1242	␉␉␉␉␉␉␉$text .= $self->recode_skipped_text($value);␊
1243	␉␉␉␉␉␉}␊
1244	␉␉␉␉␉␉$text .= $quot;␊
1245	␉␉␉␉␉}␊
1246	␉␉␉␉}␊
1247	␉␉␉}␊
1248	␊
1249	␉␉␉unless ($complete) {␊
1250	␉␉␉␉my $ontagerror = $self->{options}{'ontagerror'};␊
1251	␉␉␉␉if ($ontagerror eq "warn") {␊
1252	␉␉␉␉␉warn wrap_ref_mod($ref, "po4a::xml", dgettext ("po4a", "Bad attribute syntax. Continuing..."));␊
1253	␉␉␉␉} elsif ($ontagerror ne "silent") {␊
1254	␉␉␉␉␉die wrap_ref_mod($ref, "po4a::xml", dgettext ("po4a", "Bad attribute syntax"));␊
1255	␉␉␉␉}␊
1256	␉␉␉}␊
1257	␉␉}␊
1258	␉}␊
1259	␉return $text;␊
1260	}␊
1261	␊
1262	# Returns an empty string if the content in the $path should not be␊
1263	# translated.␊
1264	#␊
1265	# Otherwise, returns the set of options for translation:␊
1266	# w: the content shall be re-wrapped␊
1267	# W: the content shall not be re-wrapped␊
1268	# i: the tag shall be inlined␊
1269	# p: a placeholder shall replace the tag (and its content)␊
1270	# n: a custom tag␊
1271	#␊
1272	# A translatable inline tag in an untranslated tag is treated as a translatable breaking tag.␊
1273	sub get_translate_options {␊
1274	␉my $self = shift;␊
1275	␉my $path = shift;␊
1276	␊
1277	␉if (defined $translate_options_cache{$path}) {␊
1278	␉␉return $translate_options_cache{$path};␊
1279	␉}␊
1280	␊
1281	␉my $options = "";␊
1282	␉my $translate = 0;␊
1283	␉my $usedefault = 1;␊
1284	␊
1285	␉my $inlist = 0;␊
1286	␉my $tag = $self->get_tag_from_list($path, $self->{tags});␊
1287	␉if (defined $tag) {␊
1288	␉␉$inlist = 1;␊
1289	␉}␊
1290	␉if ($self->{options}{'tagsonly'} eq $inlist) {␊
1291	␉␉$usedefault = 0;␊
1292	␉␉if (defined $tag) {␊
1293	␉␉␉$options = $tag;␊
1294	␉␉␉$options =~ s/<.*$//;␊
1295	␉␉} else {␊
1296	␉␉␉if ($self->{options}{'wrap'}) {␊
1297	␉␉␉␉$options = "w";␊
1298	␉␉␉} else {␊
1299	␉␉␉␉$options = "W";␊
1300	␉␉␉}␊
1301	␉␉}␊
1302	␉␉$translate = 1;␊
1303	␉}␊
1304	␊
1305	# TODO: a less precise set of tags should not override a more precise one␊
1306	␉# The tags and tagsonly options are deprecated.␊
1307	␉# The translated and untranslated options have an higher priority.␊
1308	␉$tag = $self->get_tag_from_list($path, $self->{translated});␊
1309	␉if (defined $tag) {␊
1310	␉␉$usedefault = 0;␊
1311	␉␉$options = $tag;␊
1312	␉␉$options =~ s/<.*$//;␊
1313	␉␉$translate = 1;␊
1314	␉}␊
1315	␊
1316	␉if ($translate and $options !~ m/w/i) {␊
1317	␉␉$options .= ($self->{options}{'wrap'})?"w":"W";␊
1318	␉}␊
1319	␊
1320	␉if (not defined $tag) {␊
1321	␉␉$tag = $self->get_tag_from_list($path, $self->{untranslated});␊
1322	␉␉if (defined $tag) {␊
1323	␉␉␉$usedefault = 0;␊
1324	␉␉␉$options = "";␊
1325	␉␉␉$translate = 0;␊
1326	␉␉}␊
1327	␉}␊
1328	␊
1329	␉$tag = $self->get_tag_from_list($path, $self->{inline});␊
1330	␉if (defined $tag) {␊
1331	␉␉$usedefault = 0;␊
1332	␉␉$options .= "i";␊
1333	␉} else {␊
1334	␉␉$tag = $self->get_tag_from_list($path, $self->{placeholder});␊
1335	␉␉if (defined $tag) {␊
1336	␉␉␉$usedefault = 0;␊
1337	␉␉␉$options .= "p";␊
1338	␉␉}␊
1339	␉}␊
1340	␊
1341	␉$tag = $self->get_tag_from_list($path, $self->{customtag});␊
1342	␉if (defined $tag) {␊
1343	␉␉$usedefault = 0;␊
1344	␉␉$options = "in"; # This erase any other setting␊
1345	␉}␊
1346	␊
1347	␉if ($usedefault) {␊
1348	␉␉$options = $self->{options}{'defaulttranslateoption'};␊
1349	␉}␊
1350	␊
1351	␉# A translatable inline tag in an untranslated tag is treated as a␊
1352	␉# translatable breaking tag.␊
1353	␉if ($options =~ m/i/) {␊
1354	␉␉my $ppath = $path;␊
1355	␉␉$ppath =~ s/<[^>]*>$//;␊
1356	␉␉my $poptions = $self->get_translate_options ($ppath);␊
1357	␉␉if ($poptions eq "") {␊
1358	␉␉␉$options =~ s/i//;␊
1359	␉␉}␊
1360	␉}␊
1361	␊
1362	␉if ($options =~ m/i/ and $self->{options}{'foldattributes'}) {␊
1363	␉␉$options .= "f";␊
1364	␉}␊
1365	␊
1366	␉$translate_options_cache{$path} = $options;␊
1367	␉return $options;␊
1368	}␊
1369	␊
1370	␊
1371	# Return the tag (or biggest set of tags) of a list which matches with the␊
1372	# given path.␊
1373	#␊
1374	# The tag (or set of tags) is returned with its options.␊
1375	#␊
1376	# If no tags could match the path, undef is returned.␊
1377	sub get_tag_from_list ($$$) {␊
1378	␉my ($self,$path,$list) = @_;␊
1379	␉if ($self->{options}{'caseinsensitive'}) {␊
1380	␉␉$path = lc $path;␊
1381	␉}␊
1382	␊
1383	␉while (1) {␊
1384	␉␉if (defined $list->{$path}) {␊
1385	␉␉␉return $list->{$path}.$path;␊
1386	␉␉}␊
1387	␉␉last unless ($path =~ m/</);␊
1388	␉␉$path =~ s/^<.*?>//;␊
1389	␉}␊
1390	␊
1391	␉return undef;␊
1392	}␊
1393	␊
1394	␊
1395	␊
1396	sub treat_content {␊
1397	␉my $self = shift;␊
1398	␉my $blank="";␊
1399	␉# Indicates if the paragraph will have to be translated␊
1400	␉my $translate = "";␊
1401	␊
1402	␉my ($eof,@paragraph)=$self->get_string_until('<',{remove=>1});␊
1403	␊
1404	␉while (!$eof and !$self->breaking_tag) {␊
1405	␉NEXT_TAG:␊
1406	␉␉my @text;␊
1407	␉␉my $type = $self->tag_type;␊
1408	␉␉my $f_extract = $tag_types[$type]->{'f_extract'};␊
1409	␉␉if ( defined($f_extract)␊
1410	␉␉ and $f_extract eq \&tag_extract_comment) {␊
1411	␉␉␉# Remove the content of the comments␊
1412	␉␉␉($eof, @text) = $self->extract_tag($type,1);␊
1413	␉␉␉$text[$#text-1] .= "\0";␊
1414	␉␉␉if ($tag_types[$type]->{'beginning'} eq "!--#") {␊
1415	␉␉␉␉$text[0] = "#".$text[0];␊
1416	␉␉␉}␊
1417	␉␉␉push @comments, @text;␊
1418	␉␉} else {␊
1419	␉␉␉my ($tmpeof, @tag) = $self->extract_tag($type,0);␊
1420	␉␉␉# Append the found inline tag␊
1421	␉␉␉($eof,@text)=$self->get_string_until('>',␊
1422	␉␉␉ {include=>1,␊
1423	␉␉␉ remove=>1,␊
1424	␉␉␉ unquoted=>1});␊
1425	␉␉␉# Append or remove the opening/closing tag from␊
1426	␉␉␉# the tag path␊
1427	␉␉␉if ($tag_types[$type]->{'end'} eq "") {␊
1428	␉␉␉␉if ($tag_types[$type]->{'beginning'} eq "") {␊
1429	␉␉␉␉␉# Opening inline tag␊
1430	␉␉␉␉␉my $cur_tag_name = $self->get_tag_name(@tag);␊
1431	␉␉␉␉␉my $t_opts = $self->get_translate_options($self->get_path($cur_tag_name));␊
1432	␉␉␉␉␉if ($t_opts =~ m/p/) {␊
1433	␉␉␉␉␉␉# We enter a new holder.␊
1434	␉␉␉␉␉␉# Append a <placeholder ...> tag to the current␊
1435	␉␉␉␉␉␉# paragraph, and save the @paragraph in the␊
1436	␉␉␉␉␉␉# current holder.␊
1437	␉␉␉␉␉␉my $last_holder = $save_holders[$#save_holders];␊
1438	␉␉␉␉␉␉my $placeholder_str = "<placeholder type=\"".$cur_tag_name."\" id=\"".($#{$last_holder->{'sub_translations'}}+1)."\"/>";␊
1439	␉␉␉␉␉␉push @paragraph, ($placeholder_str, $text[1]);␊
1440	␉␉␉␉␉␉my @saved_paragraph = @paragraph;␊
1441	␊
1442	␉␉␉␉␉␉$last_holder->{'paragraph'} = \@saved_paragraph;␊
1443	␊
1444	␉␉␉␉␉␉# Then we must push a new holder␊
1445	␉␉␉␉␉␉my @new_paragraph = ();␊
1446	␉␉␉␉␉␉my @sub_translations = ();␊
1447	␉␉␉␉␉␉my %folded_attributes;␊
1448	␉␉␉␉␉␉my %new_holder = ('paragraph' => \@new_paragraph,␊
1449	␉␉␉␉␉␉ 'open' => $self->join_lines(@text),␊
1450	␉␉␉␉␉␉ 'translation' => "",␊
1451	␉␉␉␉␉␉ 'close' => undef,␊
1452	␉␉␉␉␉␉ 'sub_translations' => \@sub_translations,␊
1453	␉␉␉␉␉␉ 'folded_attributes' => \%folded_attributes);␊
1454	␉␉␉␉␉␉push @save_holders, \%new_holder;␊
1455	␉␉␉␉␉␉@text = ();␊
1456	␊
1457	␉␉␉␉␉␉# The current @paragraph␊
1458	␉␉␉␉␉␉# (for the current holder)␊
1459	␉␉␉␉␉␉# is empty.␊
1460	␉␉␉␉␉␉@paragraph = ();␊
1461	␉␉␉␉␉} elsif ($t_opts =~ m/f/) {␊
1462	␉␉␉␉␉␉my $tag_full = $self->join_lines(@text);␊
1463	␉␉␉␉␉␉my $tag_ref = $text[1];␊
1464	␉␉␉␉␉␉if ($tag_full =~ m/^<\s\S+\s+\S.>$/s) {␊
1465	␉␉␉␉␉␉␉my $holder = $save_holders[$#save_holders];␊
1466	␉␉␉␉␉␉␉my $id = 0;␊
1467	␉␉␉␉␉␉␉foreach (keys %{$holder->{folded_attributes}}) {␊
1468	␉␉␉␉␉␉␉␉$id = $_ + 1 if ($_ >= $id);␊
1469	␉␉␉␉␉␉␉}␊
1470	␉␉␉␉␉␉␉$holder->{folded_attributes}->{$id} = $tag_full;␊
1471	␊
1472	␉␉␉␉␉␉␉@text = ("<$cur_tag_name po4a-id=$id>", $tag_ref);␊
1473	␉␉␉␉␉␉}␊
1474	␉␉␉␉␉}␊
1475	␉␉␉␉␉unless ($t_opts =~ m/n/) {␊
1476	␉␉␉␉␉␉push @path, $cur_tag_name;␊
1477	␉␉␉␉␉}␊
1478	␉␉␉␉} elsif ($tag_types[$type]->{'beginning'} eq "/") {␊
1479	␉␉␉␉␉# Closing inline tag␊
1480	␊
1481	␉␉␉␉␉# Check if this is closing the␊
1482	␉␉␉␉␉# last opening tag we detected.␊
1483	␉␉␉␉␉my $test = pop @path;␊
1484	␉␉␉␉␉my $name = $self->get_tag_name(@tag);␊
1485	␉␉␉␉␉if (!defined($test) \|\|␊
1486	␉␉␉␉␉ $test ne $name ) {␊
1487	␉␉␉␉␉␉my $ontagerror = $self->{options}{'ontagerror'};␊
1488	␉␉␉␉␉␉if ($ontagerror eq "warn") {␊
1489	␉␉␉␉␉␉␉warn wrap_ref_mod($tag[1], "po4a::xml", dgettext("po4a", "Unexpected closing tag </%s> found. The main document may be wrong. Continuing..."), $name);␊
1490	␉␉␉␉␉␉} elsif ($ontagerror ne "silent") {␊
1491	␉␉␉␉␉␉␉die wrap_ref_mod($tag[1], "po4a::xml", dgettext("po4a", "Unexpected closing tag </%s> found. The main document may be wrong."), $name);␊
1492	␉␉␉␉␉␉}␊
1493	␉␉␉␉␉}␊
1494	␊
1495	␉␉␉␉␉if ($self->get_translate_options($self->get_path($self->get_tag_name(@tag))) =~ m/p/) {␊
1496	␉␉␉␉␉␉# This closes the current holder.␊
1497	␊
1498	␉␉␉␉␉␉push @path, $self->get_tag_name(@tag);␊
1499	␉␉␉␉␉␉# Now translate this paragraph if needed.␊
1500	␉␉␉␉␉␉# This will call pushline and append the␊
1501	␉␉␉␉␉␉# translation to the current holder's translation.␊
1502	␉␉␉␉␉␉$self->translate_paragraph(@paragraph);␊
1503	␉␉␉␉␉␉pop @path;␊
1504	␊
1505	␉␉␉␉␉␉# Now that this holder is closed, we can remove␊
1506	␉␉␉␉␉␉# the holder from the stack.␊
1507	␉␉␉␉␉␉my $holder = pop @save_holders;␊
1508	␉␉␉␉␉␉# We need to keep the translation of this holder␊
1509	␉␉␉␉␉␉my $translation = $holder->{'open'}.$holder->{'translation'};␊
1510	␉␉␉␉␉␉$translation .= $self->join_lines(@text);␊
1511	␊
1512	␉␉␉␉␉␉@text = ();␊
1513	␊
1514	␉␉␉␉␉␉# Then we store the translation in the previous␊
1515	␉␉␉␉␉␉# holder's sub_translations array␊
1516	␉␉␉␉␉␉my $previous_holder = $save_holders[$#save_holders];␊
1517	␉␉␉␉␉␉push @{$previous_holder->{'sub_translations'}}, $translation;␊
1518	␉␉␉␉␉␉# We also need to restore the @paragraph array, as␊
1519	␉␉␉␉␉␉# it was before we encountered the holder.␊
1520	␉␉␉␉␉␉@paragraph = @{$previous_holder->{'paragraph'}};␊
1521	␉␉␉␉␉}␊
1522	␉␉␉␉}␊
1523	␉␉␉}␊
1524	␉␉␉push @paragraph, @text;␊
1525	␉␉}␊
1526	␊
1527	␉␉# Next tag␊
1528	␉␉($eof,@text)=$self->get_string_until('<',{remove=>1});␊
1529	␉␉if ($#text > 0) {␊
1530	␉␉␉# Check if text (extracted after the inline tag)␊
1531	␉␉␉# has to be translated␊
1532	␉␉␉push @paragraph, @text;␊
1533	␉␉}␊
1534	␉}␊
1535	␊
1536	␉# This strips the extracted strings␊
1537	␉# (only if you don't specify the 'nostrip' option, and if the␊
1538	␉# paragraph can be re-wrapped)␊
1539	␉$translate = $self->get_translate_options($self->get_path);␊
1540	␉if (!$self->{options}{'nostrip'} and $translate !~ m/W/) {␊
1541	␉␉my $clean = 0;␊
1542	␉␉# Clean the beginning␊
1543	␉␉while (!$clean and $#paragraph > 0) {␊
1544	␉␉␉$paragraph[0] =~ /^(\s)(.)/s;␊
1545	␉␉␉my $match = $1;␊
1546	␉␉␉if ($paragraph[0] eq $match) {␊
1547	␉␉␉␉if ($match ne "") {␊
1548	␉␉␉␉␉$self->pushline($match);␊
1549	␉␉␉␉}␊
1550	␉␉␉␉shift @paragraph;␊
1551	␉␉␉␉shift @paragraph;␊
1552	␉␉␉} else {␊
1553	␉␉␉␉$paragraph[0] = $2;␊
1554	␉␉␉␉if ($match ne "") {␊
1555	␉␉␉␉␉$self->pushline($match);␊
1556	␉␉␉␉}␊
1557	␉␉␉␉$clean = 1;␊
1558	␉␉␉}␊
1559	␉␉}␊
1560	␉␉$clean = 0;␊
1561	␉␉# Clean the end␊
1562	␉␉while (!$clean and $#paragraph > 0) {␊
1563	␉␉␉$paragraph[$#paragraph-1] =~ /^(.?)(\s)$/s;␊
1564	␉␉␉my $match = $2;␊
1565	␉␉␉if ($paragraph[$#paragraph-1] eq $match) {␊
1566	␉␉␉␉if ($match ne "") {␊
1567	␉␉␉␉␉$blank = $match.$blank;␊
1568	␉␉␉␉}␊
1569	␉␉␉␉pop @paragraph;␊
1570	␉␉␉␉pop @paragraph;␊
1571	␉␉␉} else {␊
1572	␉␉␉␉$paragraph[$#paragraph-1] = $1;␊
1573	␉␉␉␉if ($match ne "") {␊
1574	␉␉␉␉␉$blank = $match.$blank;␊
1575	␉␉␉␉}␊
1576	␉␉␉␉$clean = 1;␊
1577	␉␉␉}␊
1578	␉␉}␊
1579	␉}␊
1580	␊
1581	␉# Translate the string when needed␊
1582	␉# This will either push the translation in the translated document or␊
1583	␉# in the current holder translation.␊
1584	␉$self->translate_paragraph(@paragraph);␊
1585	␊
1586	␉# Push the trailing blanks␊
1587	␉if ($blank ne "") {␊
1588	␉␉$self->pushline($blank);␊
1589	␉}␊
1590	␉return $eof;␊
1591	}␊
1592	␊
1593	# Translate a @paragraph array of (string, reference).␊
1594	# The $translate argument indicates if the strings must be translated or␊
1595	# just pushed␊
1596	sub translate_paragraph {␊
1597	␉my $self = shift;␊
1598	␉my @paragraph = @_;␊
1599	␉my $translate = $self->get_translate_options($self->get_path);␊
1600	␊
1601	␉while ( (scalar @paragraph)␊
1602	␉ and ($paragraph[0] =~ m/^\s*\n/s)) {␊
1603	␉␉$self->pushline($paragraph[0]);␊
1604	␉␉shift @paragraph;␊
1605	␉␉shift @paragraph;␊
1606	␉}␊
1607	␊
1608	␉my $comments;␊
1609	␉while (@comments) {␊
1610	␉␉my ($comment,$eoc);␊
1611	␉␉do {␊
1612	␉␉␉my ($t,$l) = (shift @comments, shift @comments);␊
1613	␉␉␉$t =~ s/\n?(\0)?$//;␊
1614	␉␉␉$eoc = $1;␊
1615	␉␉␉$comment .= "\n" if defined $comment;␊
1616	␉␉␉$comment .= $t;␊
1617	␉␉} until ($eoc);␊
1618	␉␉$comments .= "\n" if defined $comments;␊
1619	␉␉$comments .= $comment;␊
1620	␉␉$self->pushline("<!--".$comment."-->\n") if defined $comment;␊
1621	␉}␊
1622	␉@comments = ();␊
1623	␊
1624	␉if ($self->{options}{'cpp'}) {␊
1625	␉␉my @tmp = @paragraph;␊
1626	␉␉@paragraph = ();␊
1627	␉␉while (@tmp) {␊
1628	␉␉␉my ($t,$l) = (shift @tmp, shift @tmp);␊
1629	␉␉␉# #include can be followed by a filename between␊
1630	␉␉␉# <> brackets. In that case, the argument won't be␊
1631	␉␉␉# handled in the same call to translate_paragraph.␊
1632	␉␉␉# Thus do not try to match "include ".␊
1633	␉␉␉if ($t =~ m/^#[ \t]*(if \|endif\|undef \|include\|else\|ifdef \|ifndef \|define )/si) {␊
1634	␉␉␉␉if (@paragraph) {␊
1635	␉␉␉␉␉$self->translate_paragraph(@paragraph);␊
1636	␉␉␉␉␉@paragraph = ();␊
1637	␉␉␉␉␉$self->pushline("\n");␊
1638	␉␉␉␉}␊
1639	␉␉␉␉$self->pushline($t);␊
1640	␉␉␉} else {␊
1641	␉␉␉␉push @paragraph, ($t,$l);␊
1642	␉␉␉}␊
1643	␉␉}␊
1644	␉}␊
1645	␊
1646	␉my $para = $self->join_lines(@paragraph);␊
1647	␉if ( length($para) > 0 ) {␊
1648	␉␉if ($translate ne "") {␊
1649	␉␉␉# This tag should be translated␊
1650	␉␉␉$self->pushline($self->found_string(␊
1651	␉␉␉␉$para,␊
1652	␉␉␉␉$paragraph[1], {␊
1653	␉␉␉␉␉type=>"tag",␊
1654	␉␉␉␉␉tag_options=>$translate,␊
1655	␉␉␉␉␉comments=>$comments␊
1656	␉␉␉␉}));␊
1657	␉␉} else {␊
1658	␉␉␉# Inform that this tag isn't translated in debug mode␊
1659	␉␉␉print wrap_ref_mod($paragraph[1], "po4a::xml", dgettext ("po4a", "Content of tag %s excluded: %s"), $self->get_path, $para)␊
1660	␉␉␉ if $self->debug();␊
1661	␉␉␉$self->pushline($self->recode_skipped_text($para));␊
1662	␉␉}␊
1663	␉}␊
1664	␉# Now the paragraph is fully translated.␊
1665	␉# If we have all the holders' translation, we can replace the␊
1666	␉# placeholders by their translations.␊
1667	␉# We must wait to have all the translations because the holders are␊
1668	␉# numbered.␊
1669	␉{␊
1670	␉␉my $holder = $save_holders[$#save_holders];␊
1671	␉␉my $translation = $holder->{'translation'};␊
1672	␊
1673	␉␉# Count the number of <placeholder ...> in $translation␊
1674	␉␉my $count = 0;␊
1675	␉␉my $str = $translation;␊
1676	␉␉while ( (defined $str)␊
1677	␉␉ and ($str =~ m/^.?<placeholder\s+type="[^"]+"\s+id="(\d+)"\s\/>(.*)$/s)) {␊
1678	␉␉␉$count += 1;␊
1679	␉␉␉$str = $2;␊
1680	␉␉␉if ($holder->{'sub_translations'}->[$1] =~ m/<placeholder\s+type="[^"]+"\s+id="(\d+)"\s*\/>/s) {␊
1681	␉␉␉␉$count = -1;␊
1682	␉␉␉␉last;␊
1683	␉␉␉}␊
1684	␉␉}␊
1685	␊
1686	␉␉if ( (defined $translation)␊
1687	␉␉ and (scalar(@{$holder->{'sub_translations'}}) == $count)) {␊
1688	␉␉␉# OK, all the holders of the current paragraph are␊
1689	␉␉␉# closed (and translated).␊
1690	␉␉␉# Replace them by their translation.␊
1691	␉␉␉while ($translation =~ m/^(.?)<placeholder\s+type="[^"]+"\s+id="(\d+)"\s\/>(.*)$/s) {␊
1692	␉␉␉␉# FIXME: we could also check that␊
1693	␉␉␉␉# * the holder exists␊
1694	␉␉␉␉# * all the holders are used␊
1695	␉␉␉␉$translation = $1.$holder->{'sub_translations'}->[$2].$3;␊
1696	␉␉␉}␊
1697	␉␉␉# We have our translation␊
1698	␉␉␉$holder->{'translation'} = $translation;␊
1699	␉␉␉# And there is no need for any holder in it.␊
1700	␉␉␉my @sub_translations = ();␊
1701	␉␉␉$holder->{'sub_translations'} = \@sub_translations;␊
1702	␉␉}␊
1703	␉}␊
1704	␊
1705	}␊
1706	␊
1707	␊
1708	␊
1709	=head2 WORKING WITH THE MODULE OPTIONS␊
1710	␊
1711	=over 4␊
1712	␊
1713	=item treat_options()␊
1714	␊
1715	This function fills the internal structures that contain the tags, attributes␊
1716	and inline data with the options of the module (specified in the command-line␊
1717	or in the initialize function).␊
1718	␊
1719	=back␊
1720	␊
1721	=cut␊
1722	␊
1723	sub treat_options {␊
1724	␉my $self = shift;␊
1725	␊
1726	␉if ($self->{options}{'caseinsensitive'}) {␊
1727	␉␉$self->{options}{'nodefault'} = lc $self->{options}{'nodefault'};␊
1728	␉␉$self->{options}{'tags'} = lc $self->{options}{'tags'};␊
1729	␉␉$self->{options}{'break'} = lc $self->{options}{'break'};␊
1730	␉␉$self->{options}{'_default_break'} = lc $self->{options}{'_default_break'};␊
1731	␉␉$self->{options}{'translated'} = lc $self->{options}{'translated'};␊
1732	␉␉$self->{options}{'_default_translated'} = lc $self->{options}{'_default_translated'};␊
1733	␉␉$self->{options}{'untranslated'} = lc $self->{options}{'untranslated'};␊
1734	␉␉$self->{options}{'_default_untranslated'} = lc $self->{options}{'_default_untranslated'};␊
1735	␉␉$self->{options}{'attributes'} = lc $self->{options}{'attributes'};␊
1736	␉␉$self->{options}{'_default_attributes'} = lc $self->{options}{'_default_attributes'};␊
1737	␉␉$self->{options}{'inline'} = lc $self->{options}{'inline'};␊
1738	␉␉$self->{options}{'_default_inline'} = lc $self->{options}{'_default_inline'};␊
1739	␉␉$self->{options}{'placeholder'} = lc $self->{options}{'placeholder'};␊
1740	␉␉$self->{options}{'_default_placeholder'} = lc $self->{options}{'_default_placeholder'};␊
1741	␉␉$self->{options}{'customtag'} = lc $self->{options}{'customtag'};␊
1742	␉␉$self->{options}{'_default_customtag'} = lc $self->{options}{'_default_customtag'};␊
1743	␉}␊
1744	␊
1745	␉$self->{options}{'nodefault'} =~ /^\s(.)\s*$/s;␊
1746	␉my %list_nodefault;␊
1747	␉foreach (split(/\s+/s,$1)) {␊
1748	␉␉$list_nodefault{$_} = 1;␊
1749	␉}␊
1750	␉$self->{nodefault} = \%list_nodefault;␊
1751	␊
1752	␉$self->{options}{'tags'} =~ /^\s(.)\s*$/s;␊
1753	␉if (length $self->{options}{'tags'}) {␊
1754	␉␉warn wrap_mod("po4a::xml",␊
1755	␉␉ dgettext("po4a",␊
1756	␉␉ "The '%s' option is deprecated. Please use the translated/untranslated and/or break/inline/placeholder categories."), "tags");␊
1757	␉}␊
1758	␉foreach (split(/\s+/s,$1)) {␊
1759	␉␉$_ =~ m/^(.?)(<.)$/;␊
1760	␉␉$self->{tags}->{$2} = $1 \|\| "";␊
1761	␉}␊
1762	␊
1763	␉if ($self->{options}{'tagsonly'}) {␊
1764	␉␉warn wrap_mod("po4a::xml",␊
1765	␉␉ dgettext("po4a",␊
1766	␉␉ "The '%s' option is deprecated. Please use the translated/untranslated and/or break/inline/placeholder categories."), "tagsonly");␊
1767	␉}␊
1768	␊
1769	␉$self->{options}{'break'} =~ /^\s(.)\s*$/s;␊
1770	␉foreach my $tag (split(/\s+/s,$1)) {␊
1771	␉␉$tag =~ m/^(.?)(<.)$/;␊
1772	␉␉$self->{break}->{$2} = $1 \|\| "";␊
1773	␉}␊
1774	␉$self->{options}{'_default_break'} =~ /^\s(.)\s*$/s;␊
1775	␉foreach my $tag (split(/\s+/s,$1)) {␊
1776	␉␉$tag =~ m/^(.?)(<.)$/;␊
1777	␉␉$self->{break}->{$2} = $1 \|\| ""␊
1778	␉␉␉unless $list_nodefault{$2}␊
1779	␉␉␉ or defined $self->{break}->{$2};␊
1780	␉}␊
1781	␊
1782	␉$self->{options}{'translated'} =~ /^\s(.)\s*$/s;␊
1783	␉foreach my $tag (split(/\s+/s,$1)) {␊
1784	␉␉$tag =~ m/^(.?)(<.)$/;␊
1785	␉␉$self->{translated}->{$2} = $1 \|\| "";␊
1786	␉}␊
1787	␉$self->{options}{'_default_translated'} =~ /^\s(.)\s*$/s;␊
1788	␉foreach my $tag (split(/\s+/s,$1)) {␊
1789	␉␉$tag =~ m/^(.?)(<.)$/;␊
1790	␉␉$self->{translated}->{$2} = $1 \|\| ""␊
1791	␉␉␉unless $list_nodefault{$2}␊
1792	␉␉␉ or defined $self->{translated}->{$2};␊
1793	␉}␊
1794	␊
1795	␉$self->{options}{'untranslated'} =~ /^\s(.)\s*$/s;␊
1796	␉foreach my $tag (split(/\s+/s,$1)) {␊
1797	␉␉$tag =~ m/^(.?)(<.)$/;␊
1798	␉␉$self->{untranslated}->{$2} = $1 \|\| "";␊
1799	␉}␊
1800	␉$self->{options}{'_default_untranslated'} =~ /^\s(.)\s*$/s;␊
1801	␉foreach my $tag (split(/\s+/s,$1)) {␊
1802	␉␉$tag =~ m/^(.?)(<.)$/;␊
1803	␉␉$self->{untranslated}->{$2} = $1 \|\| ""␊
1804	␉␉␉unless $list_nodefault{$2}␊
1805	␉␉␉ or defined $self->{untranslated}->{$2};␊
1806	␉}␊
1807	␊
1808	␉$self->{options}{'attributes'} =~ /^\s(.)\s*$/s;␊
1809	␉foreach my $tag (split(/\s+/s,$1)) {␊
1810	␉␉if ($tag =~ m/^(.?)(<.)$/) {␊
1811	␉␉␉$self->{attributes}->{$2} = $1 \|\| "";␊
1812	␉␉} else {␊
1813	␉␉␉$self->{attributes}->{$tag} = "";␊
1814	␉␉}␊
1815	␉}␊
1816	␉$self->{options}{'_default_attributes'} =~ /^\s(.)\s*$/s;␊
1817	␉foreach my $tag (split(/\s+/s,$1)) {␊
1818	␉␉if ($tag =~ m/^(.?)(<.)$/) {␊
1819	␉␉␉$self->{attributes}->{$2} = $1 \|\| ""␊
1820	␉␉␉␉unless $list_nodefault{$2}␊
1821	␉␉␉␉ or defined $self->{attributes}->{$2};␊
1822	␉␉} else {␊
1823	␉␉␉$self->{attributes}->{$tag} = ""␊
1824	␉␉␉␉unless $list_nodefault{$tag}␊
1825	␉␉␉␉ or defined $self->{attributes}->{$tag};␊
1826	␉␉}␊
1827	␉}␊
1828	␊
1829	␉$self->{options}{'inline'} =~ /^\s(.)\s*$/s;␊
1830	␉foreach my $tag (split(/\s+/s,$1)) {␊
1831	␉␉$tag =~ m/^(.?)(<.)$/;␊
1832	␉␉$self->{inline}->{$2} = $1 \|\| "";␊
1833	␉}␊
1834	␉$self->{options}{'_default_inline'} =~ /^\s(.)\s*$/s;␊
1835	␉foreach my $tag (split(/\s+/s,$1)) {␊
1836	␉␉$tag =~ m/^(.?)(<.)$/;␊
1837	␉␉$self->{inline}->{$2} = $1 \|\| ""␊
1838	␉␉␉unless $list_nodefault{$2}␊
1839	␉␉␉ or defined $self->{inline}->{$2};␊
1840	␉}␊
1841	␊
1842	␉$self->{options}{'placeholder'} =~ /^\s(.)\s*$/s;␊
1843	␉foreach my $tag (split(/\s+/s,$1)) {␊
1844	␉␉$tag =~ m/^(.?)(<.)$/;␊
1845	␉␉$self->{placeholder}->{$2} = $1 \|\| "";␊
1846	␉}␊
1847	␉$self->{options}{'_default_placeholder'} =~ /^\s(.)\s*$/s;␊
1848	␉foreach my $tag (split(/\s+/s,$1)) {␊
1849	␉␉$tag =~ m/^(.?)(<.)$/;␊
1850	␉␉$self->{placeholder}->{$2} = $1 \|\| ""␊
1851	␉␉␉unless $list_nodefault{$2}␊
1852	␉␉␉ or defined $self->{placeholder}->{$2};␊
1853	␉}␊
1854	␊
1855	␉$self->{options}{'customtag'} =~ /^\s(.)\s*$/s;␊
1856	␉foreach my $tag (split(/\s+/s,$1)) {␊
1857	␉␉$tag =~ m/^(.?)(<.)$/;␊
1858	␉␉$self->{customtag}->{$2} = $1 \|\| "";␊
1859	␉}␊
1860	␉$self->{options}{'_default_customtag'} =~ /^\s(.)\s*$/s;␊
1861	␉foreach my $tag (split(/\s+/s,$1)) {␊
1862	␉␉$tag =~ m/^(.?)(<.)$/;␊
1863	␉␉$self->{customtag}->{$2} = $1 \|\| ""␊
1864	␉␉␉unless $list_nodefault{$2}␊
1865	␉␉␉ or defined $self->{customtag}->{$2};␊
1866	␉}␊
1867	␊
1868	␉# There should be no translated and untranslated tags␊
1869	␉foreach my $tag (keys %{$self->{translated}}) {␊
1870	␉␉die wrap_mod("po4a::xml",␊
1871	␉␉ dgettext("po4a",␊
1872	␉␉ "Tag '%s' both in the %s and %s categories."), $tag, "translated", "untranslated")␊
1873	␉␉␉if defined $self->{untranslated}->{$tag};␊
1874	␉}␊
1875	␉# There should be no inline, break, placeholder, and customtag tags␊
1876	␉foreach my $tag (keys %{$self->{inline}}) {␊
1877	␉␉die wrap_mod("po4a::xml",␊
1878	␉␉ dgettext("po4a",␊
1879	␉␉ "Tag '%s' both in the %s and %s categories."), $tag, "inline", "break")␊
1880	␉␉␉if defined $self->{break}->{$tag};␊
1881	␉␉die wrap_mod("po4a::xml",␊
1882	␉␉ dgettext("po4a",␊
1883	␉␉ "Tag '%s' both in the %s and %s categories."), $tag, "inline", "placeholder")␊
1884	␉␉␉if defined $self->{placeholder}->{$tag};␊
1885	␉␉die wrap_mod("po4a::xml",␊
1886	␉␉ dgettext("po4a",␊
1887	␉␉ "Tag '%s' both in the %s and %s categories."), $tag, "inline", "customtag")␊
1888	␉␉␉if defined $self->{customtag}->{$tag};␊
1889	␉}␊
1890	␉foreach my $tag (keys %{$self->{break}}) {␊
1891	␉␉die wrap_mod("po4a::xml",␊
1892	␉␉ dgettext("po4a",␊
1893	␉␉ "Tag '%s' both in the %s and %s categories."), $tag, "break", "placeholder")␊
1894	␉␉␉if defined $self->{placeholder}->{$tag};␊
1895	␉␉die wrap_mod("po4a::xml",␊
1896	␉␉ dgettext("po4a",␊
1897	␉␉ "Tag '%s' both in the %s and %s categories."), $tag, "break", "customtag")␊
1898	␉␉␉if defined $self->{customtag}->{$tag};␊
1899	␉}␊
1900	␉foreach my $tag (keys %{$self->{placeholder}}) {␊
1901	␉␉die wrap_mod("po4a::xml",␊
1902	␉␉ dgettext("po4a",␊
1903	␉␉ "Tag '%s' both in the %s and %s categories."), $tag, "placeholder", "customtag")␊
1904	␉␉␉if defined $self->{customtag}->{$tag};␊
1905	␉}␊
1906	}␊
1907	␊
1908	=head2 GETTING TEXT FROM THE INPUT DOCUMENT␊
1909	␊
1910	=over␊
1911	␊
1912	=item get_string_until($%)␊
1913	␊
1914	This function returns an array with the lines (and references) from the input␊
1915	document until it finds the first argument. The second argument is an options␊
1916	hash. Value 0 means disabled (the default) and 1, enabled.␊
1917	␊
1918	The valid options are:␊
1919	␊
1920	=over 4␊
1921	␊
1922	=item B<include>␊
1923	␊
1924	This makes the returned array to contain the searched text␊
1925	␊
1926	=item B<remove>␊
1927	␊
1928	This removes the returned stream from the input␊
1929	␊
1930	=item B<unquoted>␊
1931	␊
1932	This ensures that the searched text is outside any quotes␊
1933	␊
1934	=back␊
1935	␊
1936	=cut␊
1937	␊
1938	sub get_string_until {␊
1939	␉my ($self,$search) = (shift,shift);␊
1940	␉my $options = shift;␊
1941	␉my ($include,$remove,$unquoted, $regex) = (0,0,0,0);␊
1942	␊
1943	␉if (defined($options->{include})) { $include = $options->{include}; }␊
1944	␉if (defined($options->{remove})) { $remove = $options->{remove}; }␊
1945	␉if (defined($options->{unquoted})) { $unquoted = $options->{unquoted}; }␊
1946	␉if (defined($options->{regex})) { $regex = $options->{regex}; }␊
1947	␊
1948	␉my ($line,$ref) = $self->shiftline();␊
1949	␉my (@text,$paragraph);␊
1950	␉my ($eof,$found) = (0,0);␊
1951	␊
1952	␉$search = "\Q$search\E" unless $regex;␊
1953	␉while (defined($line) and !$found) {␊
1954	␉␉push @text, ($line,$ref);␊
1955	␉␉$paragraph .= $line;␊
1956	␉␉if ($unquoted) {␊
1957	␉␉␉if ( $paragraph =~ /^((\".?\")\|(\'.?\')\|[^\"\'])*$search/s ) {␊
1958	␉␉␉␉$found = 1;␊
1959	␉␉␉}␊
1960	␉␉} else {␊
1961	␉␉␉if ( $paragraph =~ /$search/s ) {␊
1962	␉␉␉␉$found = 1;␊
1963	␉␉␉}␊
1964	␉␉}␊
1965	␉␉if (!$found) {␊
1966	␉␉␉($line,$ref)=$self->shiftline();␊
1967	␉␉}␊
1968	␉}␊
1969	␊
1970	␉if (!defined($line)) { $eof = 1; }␊
1971	␊
1972	␉if ( $found ) {␊
1973	␉␉$line = "";␊
1974	␉␉if($unquoted) {␊
1975	␉␉␉$paragraph =~ /^(?:(?:\".?\")\|(?:\'.?\')\|[^\"\'])?$search(.)$/s;␊
1976	␉␉␉$line = $1;␊
1977	␉␉␉$text[$#text-1] =~ s/\Q$line\E$//s;␊
1978	␉␉} else {␊
1979	␉␉␉$paragraph =~ /$search(.*)$/s;␊
1980	␉␉␉$line = $1;␊
1981	␉␉␉$text[$#text-1] =~ s/\Q$line\E$//s;␊
1982	␉␉}␊
1983	␉␉if(!$include) {␊
1984	␉␉␉$text[$#text-1] =~ /^(.)($search.)$/s;␊
1985	␉␉␉$text[$#text-1] = $1;␊
1986	␉␉␉$line = $2.$line;␊
1987	␉␉}␊
1988	␉␉if (defined($line) and ($line ne "")) {␊
1989	␉␉␉$self->unshiftline ($line,$text[$#text]);␊
1990	␉␉}␊
1991	␉}␊
1992	␉if (!$remove) {␊
1993	␉␉$self->unshiftline (@text);␊
1994	␉}␊
1995	␊
1996	␉#If we get to the end of the file, we return the whole paragraph␊
1997	␉return ($eof,@text);␊
1998	}␊
1999	␊
2000	=item skip_spaces(\@)␊
2001	␊
2002	This function receives as argument the reference to a paragraph (in the format␊
2003	returned by get_string_until), skips his heading spaces and returns them as␊
2004	a simple string.␊
2005	␊
2006	=cut␊
2007	␊
2008	sub skip_spaces {␊
2009	␉my ($self,$pstring)=@_;␊
2010	␉my $space="";␊
2011	␊
2012	␉while (@$pstring and (@$pstring[0] =~ /^(\s+)(.*)$/s or @$pstring[0] eq "")) {␊
2013	␉␉if (@$pstring[0] ne "") {␊
2014	␉␉␉$space .= $1;␊
2015	␉␉␉@$pstring[0] = $2;␊
2016	␉␉}␊
2017	␊
2018	␉␉if (@$pstring[0] eq "") {␊
2019	␉␉␉shift @$pstring;␊
2020	␉␉␉shift @$pstring;␊
2021	␉␉}␊
2022	␉}␊
2023	␉return $space;␊
2024	}␊
2025	␊
2026	=item join_lines(@)␊
2027	␊
2028	This function returns a simple string with the text from the argument array␊
2029	(discarding the references).␊
2030	␊
2031	=cut␊
2032	␊
2033	sub join_lines {␊
2034	␉my ($self,@lines)=@_;␊
2035	␉my ($line,$ref);␊
2036	␉my $text = "";␊
2037	␉while ($#lines > 0) {␊
2038	␉␉($line,$ref) = (shift @lines,shift @lines);␊
2039	␉␉$text .= $line;␊
2040	␉}␊
2041	␉return $text;␊
2042	}␊
2043	␊
2044	=back␊
2045	␊
2046	=head1 STATUS OF THIS MODULE␊
2047	␊
2048	This module can translate tags and attributes.␊
2049	␊
2050	=head1 TODO LIST␊
2051	␊
2052	DOCTYPE (ENTITIES)␊
2053	␊
2054	There is a minimal support for the translation of entities. They are␊
2055	translated as a whole, and tags are not taken into account. Multilines␊
2056	entities are not supported and entities are always rewrapped during the␊
2057	translation.␊
2058	␊
2059	MODIFY TAG TYPES FROM INHERITED MODULES␊
2060	(move the tag_types structure inside the $self hash?)␊
2061	␊
2062	=head1 SEE ALSO␊
2063	␊
2064	L<Locale::Po4a::TransTractor(3pm)\|Locale::Po4a::TransTractor>,␊
2065	L<po4a(7)\|po4a.7>␊
2066	␊
2067	=head1 AUTHORS␊
2068	␊
2069	Jordi Vilalta <jvprat@gmail.com>␊
2070	Nicolas François <nicolas.francois@centraliens.net>␊
2071	␊
2072	=head1 COPYRIGHT AND LICENSE␊
2073	␊
2074	Copyright (c) 2004 by Jordi Vilalta <jvprat@gmail.com>␊
2075	Copyright (c) 2008-2009 by Nicolas François <nicolas.francois@centraliens.net>␊
2076	␊
2077	This program is free software; you may redistribute it and/or modify it␊
2078	under the terms of GPL (see the COPYING file).␊
2079	␊
2080	=cut␊
2081	␊
2082	1;␊
2083

Download this file