trunk/package/bin/po4a/lib/Locale/Po4a/Xml.pm - Chameleon Svn Source Tree - Chameleon open source boot loader project.

Root/trunk/package/bin/po4a/lib/Locale/Po4a/Xml.pm

1	#!/usr/bin/perl␊
2	␊
3	# Po4a::Xml.pm␊
4	#␊
5	# extract and translate translatable strings from XML documents.␊
6	#␊
7	# This code extracts plain text from tags and attributes from generic␊
8	# XML documents, and it can be used as a base to build modules for␊
9	# XML-based documents.␊
10	#␊
11	# Copyright (c) 2004 by Jordi Vilalta <jvprat@gmail.com>␊
12	# Copyright (c) 2008-2009 by Nicolas François <nicolas.francois@centraliens.net>␊
13	#␊
14	# This program is free software; you can redistribute it and/or modify␊
15	# it under the terms of the GNU General Public License as published by␊
16	# the Free Software Foundation; either version 2 of the License, or␊
17	# (at your option) any later version.␊
18	#␊
19	# This program is distributed in the hope that it will be useful,␊
20	# but WITHOUT ANY WARRANTY; without even the implied warranty of␊
21	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the␊
22	# GNU General Public License for more details.␊
23	#␊
24	# You should have received a copy of the GNU General Public License␊
25	# along with this program; if not, write to the Free Software␊
26	# Foundation, Inc.,␊
27	# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA␊
28	#␊
29	########################################################################␊
30	␊
31	=encoding UTF-8␊
32	␊
33	=head1 NAME␊
34	␊
35	Locale::Po4a::Xml - convert XML documents and derivates from/to PO files␊
36	␊
37	=head1 DESCRIPTION␊
38	␊
39	The po4a (PO for anything) project goal is to ease translations (and more␊
40	interestingly, the maintenance of translations) using gettext tools on␊
41	areas where they were not expected like documentation.␊
42	␊
43	Locale::Po4a::Xml is a module to help the translation of XML documents into␊
44	other [human] languages. It can also be used as a base to build modules for␊
45	XML-based documents.␊
46	␊
47	=cut␊
48	␊
49	package Locale::Po4a::Xml;␊
50	␊
51	use 5.006;␊
52	use strict;␊
53	use warnings;␊
54	␊
55	require Exporter;␊
56	use vars qw(@ISA @EXPORT);␊
57	@ISA = qw(Locale::Po4a::TransTractor);␊
58	@EXPORT = qw(new initialize @tag_types);␊
59	␊
60	use Locale::Po4a::TransTractor;␊
61	use Locale::Po4a::Common;␊
62	use Carp qw(croak);␊
63	use File::Basename;␊
64	use File::Spec;␊
65	␊
66	#It will mantain the path from the root tag to the current one␊
67	my @path;␊
68	␊
69	#It will contain a list of external entities and their attached paths␊
70	my %entities;␊
71	␊
72	my @comments;␊
73	my %translate_options_cache;␊
74	␊
75	my $_shiftline_in_comment = 0;␊
76	sub shiftline {␊
77	my $self = shift;␊
78	# call Transtractor's shiftline␊
79	my ($line,$ref) = $self->SUPER::shiftline();␊
80	return ($line,$ref) if (not defined $line);␊
81	␊
82	if ($self->{options}{'includeexternal'}) {␊
83	my $tmp;␊
84	␊
85	for my $k (keys %entities) {␊
86	if ($line =~ m/^(.?)&$k;(.)$/s) {␊
87	my ($before, $after) = ($1, $2);␊
88	my $linenum=0;␊
89	my @textentries;␊
90	␊
91	$tmp = $before;␊
92	my $tmp_in_comment = 0;␊
93	if ($_shiftline_in_comment) {␊
94	if ($before =~ m/^.?-->(.)$/s) {␊
95	$tmp = $1;␊
96	$tmp_in_comment = 0;␊
97	} else {␊
98	$tmp_in_comment = 1;␊
99	}␊
100	}␊
101	if ($tmp_in_comment == 0) {␊
102	while ($tmp =~ m/^.?<!--.?-->(.*)$/s) {␊
103	$tmp = $1;␊
104	}␊
105	if ($tmp =~ m/<!--/s) {␊
106	$tmp_in_comment = 1;␊
107	}␊
108	}␊
109	next if ($tmp_in_comment);␊
110	␊
111	open (my $in, $entities{$k})␊
112	or croak wrap_mod("po4a::xml",␊
113	dgettext("po4a", "Can't read from %s: %s"),␊
114	$entities{$k}, $!);␊
115	while (defined (my $textline = <$in>)) {␊
116	$linenum++;␊
117	my $textref=$entities{$k}.":$linenum";␊
118	push @textentries, ($textline,$textref);␊
119	}␊
120	close $in␊
121	or croak wrap_mod("po4a::xml",␊
122	dgettext("po4a", "Can't close %s after reading: %s"),␊
123	$entities{$k}, $!);␊
124	␊
125	push @textentries, ($after, $ref);␊
126	$line = $before.(shift @textentries);␊
127	$ref .= " ".(shift @textentries);␊
128	$self->unshiftline(@textentries);␊
129	}␊
130	}␊
131	␊
132	$tmp = $line;␊
133	if ($_shiftline_in_comment) {␊
134	if ($line =~ m/^.?-->(.)$/s) {␊
135	$tmp = $1;␊
136	$_shiftline_in_comment = 0;␊
137	} else {␊
138	$_shiftline_in_comment = 1;␊
139	}␊
140	}␊
141	if ($_shiftline_in_comment == 0) {␊
142	while ($tmp =~ m/^.?<!--.?-->(.*)$/s) {␊
143	$tmp = $1;␊
144	}␊
145	if ($tmp =~ m/<!--/s) {␊
146	$_shiftline_in_comment = 1;␊
147	}␊
148	}␊
149	}␊
150	␊
151	return ($line,$ref);␊
152	}␊
153	␊
154	sub read {␊
155	␉my ($self,$filename)=@_;␊
156	␉push @{$self->{DOCPOD}{infile}}, $filename;␊
157	␉$self->Locale::Po4a::TransTractor::read($filename);␊
158	}␊
159	␊
160	sub parse {␊
161	␉my $self=shift;␊
162	␉map {$self->parse_file($_)} @{$self->{DOCPOD}{infile}};␊
163	}␊
164	␊
165	# @save_holders is a stack of references to ('paragraph', 'translation',␊
166	# 'sub_translations', 'open', 'close', 'folded_attributes') hashes, where:␊
167	# paragraph is a reference to an array (see paragraph in the␊
168	# treat_content() subroutine) of strings followed by␊
169	# references. It contains the @paragraph array as it was␊
170	# before the processing was interrupted by a tag instroducing␊
171	# a placeholder.␊
172	# translation is the translation of this level up to now␊
173	# sub_translations is a reference to an array of strings containing the␊
174	# translations which must replace the placeholders.␊
175	# open is the tag which opened the placeholder.␊
176	# close is the tag which closed the placeholder.␊
177	# folded_attributes is an hash of tags with their attributes (<tag attrs=...>␊
178	# strings), referenced by the folded tag id, which should␊
179	# replace the <tag po4a-id=id> strings in the current␊
180	# translation.␊
181	#␊
182	# If @save_holders only has 1 holder, then we are not processing the␊
183	# content of an holder, we are translating the document.␊
184	my @save_holders;␊
185	␊
186	␊
187	# If we are at the bottom of the stack and there is no <placeholder ...> in␊
188	# the current translation, we can push the translation in the translated␊
189	# document.␊
190	# Otherwise, we keep the translation in the current holder.␊
191	sub pushline {␊
192	␉my ($self, $line) = (shift, shift);␊
193	␊
194	␉my $holder = $save_holders[$#save_holders];␊
195	␉my $translation = $holder->{'translation'};␊
196	␉$translation .= $line;␊
197	␊
198	␉while ( %{$holder->{folded_attributes}}␊
199	␉ and $translation =~ m/^(.)<([^>]+?)\s+po4a-id=([0-9]+)>(.)$/s) {␊
200	␉␉my $begin = $1;␊
201	␉␉my $tag = $2;␊
202	␉␉my $id = $3;␊
203	␉␉my $end = $4;␊
204	␉␉if (defined $holder->{folded_attributes}->{$id}) {␊
205	␉␉␉# TODO: check if the tag is the same␊
206	␉␉␉$translation = $begin.$holder->{folded_attributes}->{$id}.$end;␊
207	␉␉␉delete $holder->{folded_attributes}->{$id};␊
208	␉␉} else {␊
209	␉␉␉# TODO: It will be hard to identify the location.␊
210	␉␉␉# => find a way to retrieve the reference.␊
211	␉␉␉die wrap_mod("po4a::xml", dgettext("po4a", "'po4a-id=%d' in the translation does not exist in the original string (or 'po4a-id=%d' used twice in the translation)."), $id, $id);␊
212	␉␉}␊
213	␉}␊
214	# TODO: check that %folded_attributes is empty at some time␊
215	# => in translate_paragraph?␊
216	␊
217	␉if ( ($#save_holders > 0)␊
218	␉ or ($translation =~ m/<placeholder\s+type="[^"]+"\s+id="(\d+)"\s*\/>/s)) {␊
219	␉␉$holder->{'translation'} = $translation;␊
220	␉} else {␊
221	␉␉$self->SUPER::pushline($translation);␊
222	␉␉$holder->{'translation'} = '';␊
223	␉}␊
224	}␊
225	␊
226	=head1 TRANSLATING WITH PO4A::XML␊
227	␊
228	This module can be used directly to handle generic XML documents. This will␊
229	extract all tag's content, and no attributes, since it's where the text is␊
230	written in most XML based documents.␊
231	␊
232	There are some options (described in the next section) that can customize␊
233	this behavior. If this doesn't fit to your document format you're encouraged␊
234	to write your own module derived from this, to describe your format's details.␊
235	See the section B<WRITING DERIVATE MODULES> below, for the process description.␊
236	␊
237	=cut␊
238	␊
239	#␊
240	# Parse file and translate it␊
241	#␊
242	sub parse_file {␊
243	␉my ($self,$filename) = @_;␊
244	␉my $eof = 0;␊
245	␊
246	␉while (!$eof) {␊
247	␉␉# We get all the text until the next breaking tag (not␊
248	␉␉# inline) and translate it␊
249	␉␉$eof = $self->treat_content;␊
250	␉␉if (!$eof) {␊
251	␉␉␉# And then we treat the following breaking tag␊
252	␉␉␉$eof = $self->treat_tag;␊
253	␉␉}␊
254	␉}␊
255	}␊
256	␊
257	=head1 OPTIONS ACCEPTED BY THIS MODULE␊
258	␊
259	The global debug option causes this module to show the excluded strings, in␊
260	order to see if it skips something important.␊
261	␊
262	These are this module's particular options:␊
263	␊
264	=over 4␊
265	␊
266	=item B<nostrip>␊
267	␊
268	Prevents it to strip the spaces around the extracted strings.␊
269	␊
270	=item B<wrap>␊
271	␊
272	Canonizes the string to translate, considering that whitespaces are not␊
273	important, and wraps the translated document. This option can be overridden␊
274	by custom tag options. See the "tags" option below.␊
275	␊
276	=item B<caseinsensitive>␊
277	␊
278	It makes the tags and attributes searching to work in a case insensitive␊
279	way. If it's defined, it will treat E<lt>BooKE<gt>laNG and E<lt>BOOKE<gt>Lang as E<lt>bookE<gt>lang.␊
280	␊
281	=item B<includeexternal>␊
282	␊
283	When defined, external entities are included in the generated (translated)␊
284	document, and for the extraction of strings. If it's not defined, you␊
285	will have to translate external entities separately as independent␊
286	documents.␊
287	␊
288	=item B<ontagerror>␊
289	␊
290	This option defines the behavior of the module when it encounter a invalid␊
291	XML syntax (a closing tag which does not match the last opening tag, or a␊
292	tag's attribute without value).␊
293	It can take the following values:␊
294	␊
295	=over␊
296	␊
297	=item I<fail>␊
298	␊
299	This is the default value.␊
300	The module will exit with an error.␊
301	␊
302	=item I<warn>␊
303	␊
304	The module will continue, and will issue a warning.␊
305	␊
306	=item I<silent>␊
307	␊
308	The module will continue without any warnings.␊
309	␊
310	=back␊
311	␊
312	Be careful when using this option.␊
313	It is generally recommended to fix the input file.␊
314	␊
315	=item B<tagsonly>␊
316	␊
317	Extracts only the specified tags in the "tags" option. Otherwise, it␊
318	will extract all the tags except the ones specified.␊
319	␊
320	Note: This option is deprecated.␊
321	␊
322	=item B<doctype>␊
323	␊
324	String that will try to match with the first line of the document's doctype␊
325	(if defined). If it doesn't, a warning will indicate that the document␊
326	might be of a bad type.␊
327	␊
328	=item B<addlang>␊
329	␊
330	String indicating the path (e.g. E<lt>bbbE<gt>E<lt>aaaE<gt>) of a tag␊
331	where a lang="..." attribute shall be added. The language will be defined␊
332	as the basename of the PO file without any .po extension.␊
333	␊
334	=item B<tags>␊
335	␊
336	Space-separated list of tags you want to translate or skip. By default,␊
337	the specified tags will be excluded, but if you use the "tagsonly" option,␊
338	the specified tags will be the only ones included. The tags must be in the␊
339	form E<lt>aaaE<gt>, but you can join some (E<lt>bbbE<gt>E<lt>aaaE<gt>) to say that the content of␊
340	the tag E<lt>aaaE<gt> will only be translated when it's into a E<lt>bbbE<gt> tag.␊
341	␊
342	You can also specify some tag options by putting some characters in front of␊
343	the tag hierarchy. For example, you can put 'w' (wrap) or 'W' (don't wrap)␊
344	to override the default behavior specified by the global "wrap" option.␊
345	␊
346	Example: WE<lt>chapterE<gt>E<lt>titleE<gt>␊
347	␊
348	Note: This option is deprecated.␊
349	You should use the B<translated> and B<untranslated> options instead.␊
350	␊
351	=item B<attributes>␊
352	␊
353	Space-separated list of tag's attributes you want to translate. You can␊
354	specify the attributes by their name (for example, "lang"), but you can␊
355	prefix it with a tag hierarchy, to specify that this attribute will only be␊
356	translated when it's into the specified tag. For example: E<lt>bbbE<gt>E<lt>aaaE<gt>lang␊
357	specifies that the lang attribute will only be translated if it's into an␊
358	E<lt>aaaE<gt> tag, and it's into a E<lt>bbbE<gt> tag.␊
359	␊
360	=item B<foldattributes>␊
361	␊
362	Do not translate attributes in inline tags.␊
363	Instead, replace all attributes of a tag by po4a-id=<id>.␊
364	␊
365	This is useful when attributes shall not be translated, as this simplifies the␊
366	strings for translators, and avoids typos.␊
367	␊
368	=item B<customtag>␊
369	␊
370	Space-separated list of tags which should not be treated as tags.␊
371	These tags are treated as inline, and do not need to be closed.␊
372	␊
373	=item B<break>␊
374	␊
375	Space-separated list of tags which should break the sequence.␊
376	By default, all tags break the sequence.␊
377	␊
378	The tags must be in the form <aaa>, but you can join some␊
379	(<bbb><aaa>), if a tag (<aaa>) should only be considered␊
380	when it's into another tag (<bbb>).␊
381	␊
382	=item B<inline>␊
383	␊
384	Space-separated list of tags which should be treated as inline.␊
385	By default, all tags break the sequence.␊
386	␊
387	The tags must be in the form <aaa>, but you can join some␊
388	(<bbb><aaa>), if a tag (<aaa>) should only be considered␊
389	when it's into another tag (<bbb>).␊
390	␊
391	=item B<placeholder>␊
392	␊
393	Space-separated list of tags which should be treated as placeholders.␊
394	Placeholders do not break the sequence, but the content of placeholders is␊
395	translated separately.␊
396	␊
397	The location of the placeholder in its block will be marked with a string␊
398	similar to:␊
399	␊
400	<placeholder type=\"footnote\" id=\"0\"/>␊
401	␊
402	The tags must be in the form <aaa>, but you can join some␊
403	(<bbb><aaa>), if a tag (<aaa>) should only be considered␊
404	when it's into another tag (<bbb>).␊
405	␊
406	=item B<nodefault>␊
407	␊
408	Space separated list of tags that the module should not try to set by␊
409	default in any category.␊
410	␊
411	=item B<cpp>␊
412	␊
413	Support C preprocessor directives.␊
414	When this option is set, po4a will consider preprocessor directives as␊
415	paragraph separators.␊
416	This is important if the XML file must be preprocessed because otherwise␊
417	the directives may be inserted in the middle of lines if po4a consider it␊
418	belong to the current paragraph, and they won't be recognized by the␊
419	preprocessor.␊
420	Note: the preprocessor directives must only appear between tags␊
421	(they must not break a tag).␊
422	␊
423	=item B<translated>␊
424	␊
425	Space-separated list of tags you want to translate.␊
426	␊
427	The tags must be in the form <aaa>, but you can join some␊
428	(<bbb><aaa>), if a tag (<aaa>) should only be considered␊
429	when it's into another tag (<bbb>).␊
430	␊
431	You can also specify some tag options by putting some characters in front of␊
432	the tag hierarchy. For example, you can put 'w' (wrap) or 'W' (don't wrap)␊
433	to override the default behavior specified by the global "wrap" option.␊
434	␊
435	Example: WE<lt>chapterE<gt>E<lt>titleE<gt>␊
436	␊
437	=item B<untranslated>␊
438	␊
439	Space-separated list of tags you do not want to translate.␊
440	␊
441	The tags must be in the form <aaa>, but you can join some␊
442	(<bbb><aaa>), if a tag (<aaa>) should only be considered␊
443	when it's into another tag (<bbb>).␊
444	␊
445	=item B<defaulttranslateoption>␊
446	␊
447	The default categories for tags that are not in any of the translated,␊
448	untranslated, break, inline, or placeholder.␊
449	␊
450	This is a set of letters:␊
451	␊
452	=over␊
453	␊
454	=item I<w>␊
455	␊
456	Tags should be translated and content can be re-wrapped.␊
457	␊
458	=item I<W>␊
459	␊
460	Tags should be translated and content should not be re-wrapped.␊
461	␊
462	=item I<i>␊
463	␊
464	Tags should be translated inline.␊
465	␊
466	=item I<p>␊
467	␊
468	Tags should be translated as placeholders.␊
469	␊
470	=back␊
471	␊
472	=back␊
473	␊
474	=cut␊
475	# TODO: defaulttranslateoption␊
476	# w => indicate that it is only valid for translatable tags and do not␊
477	# care about inline/break/placeholder?␊
478	# ...␊
479	␊
480	sub initialize {␊
481	␉my $self = shift;␊
482	␉my %options = @_;␊
483	␊
484	␉# Reset the path␊
485	␉@path = ();␊
486	␊
487	␉# Initialize the stack of holders␊
488	␉my @paragraph = ();␊
489	␉my @sub_translations = ();␊
490	␉my %folded_attributes;␊
491	␉my %holder = ('paragraph' => \@paragraph,␊
492	␉ 'translation' => "",␊
493	␉ 'sub_translations' => \@sub_translations,␊
494	␉ 'folded_attributes' => \%folded_attributes);␊
495	␉@save_holders = (\%holder);␊
496	␊
497	␉$self->{options}{'addlang'}=0;␊
498	␉$self->{options}{'nostrip'}=0;␊
499	␉$self->{options}{'wrap'}=0;␊
500	␉$self->{options}{'caseinsensitive'}=0;␊
501	␉$self->{options}{'tagsonly'}=0;␊
502	␉$self->{options}{'tags'}='';␊
503	␉$self->{options}{'break'}='';␊
504	␉$self->{options}{'translated'}='';␊
505	␉$self->{options}{'untranslated'}='';␊
506	␉$self->{options}{'defaulttranslateoption'}='';␊
507	␉$self->{options}{'attributes'}='';␊
508	␉$self->{options}{'foldattributes'}=0;␊
509	␉$self->{options}{'inline'}='';␊
510	␉$self->{options}{'placeholder'}='';␊
511	␉$self->{options}{'customtag'}='';␊
512	␉$self->{options}{'doctype'}='';␊
513	␉$self->{options}{'nodefault'}='';␊
514	␉$self->{options}{'includeexternal'}=0;␊
515	␉$self->{options}{'ontagerror'}="fail";␊
516	␉$self->{options}{'cpp'}=0;␊
517	␊
518	␉$self->{options}{'verbose'}='';␊
519	␉$self->{options}{'debug'}='';␊
520	␊
521	␉foreach my $opt (keys %options) {␊
522	␉␉if ($options{$opt}) {␊
523	␉␉␉die wrap_mod("po4a::xml",␊
524	␉␉␉␉dgettext("po4a", "Unknown option: %s"), $opt)␊
525	␉␉␉␉unless exists $self->{options}{$opt};␊
526	␉␉␉$self->{options}{$opt} = $options{$opt};␊
527	␉␉}␊
528	␉}␊
529	␉# Default options set by modules. Forbidden for users.␊
530	␉$self->{options}{'_default_translated'}='';␊
531	␉$self->{options}{'_default_untranslated'}='';␊
532	␉$self->{options}{'_default_break'}='';␊
533	␉$self->{options}{'_default_inline'}='';␊
534	␉$self->{options}{'_default_placeholder'}='';␊
535	␉$self->{options}{'_default_attributes'}='';␊
536	␉$self->{options}{'_default_customtag'}='';␊
537	␊
538	␉#It will maintain the list of the translatable tags␊
539	␉$self->{tags}=();␊
540	␉$self->{translated}=();␊
541	␉$self->{untranslated}=();␊
542	␉#It will maintain the list of the translatable attributes␊
543	␉$self->{attributes}=();␊
544	␉#It will maintain the list of the breaking tags␊
545	␉$self->{break}=();␊
546	␉#It will maintain the list of the inline tags␊
547	␉$self->{inline}=();␊
548	␉#It will maintain the list of the placeholder tags␊
549	␉$self->{placeholder}=();␊
550	␉#It will maintain the list of the customtag tags␊
551	␉$self->{customtag}=();␊
552	␉#list of the tags that must not be set in the tags or inline category␊
553	␉#by this module or sub-module (unless specified in an option)␊
554	␉$self->{nodefault}=();␊
555	␊
556	␉$self->treat_options;␊
557	␊
558	␉# Clear cache␊
559	␉%translate_options_cache=();␊
560	}␊
561	␊
562	=head1 WRITING DERIVATE MODULES␊
563	␊
564	=head2 DEFINE WHAT TAGS AND ATTRIBUTES TO TRANSLATE␊
565	␊
566	The simplest customization is to define which tags and attributes you want␊
567	the parser to translate. This should be done in the initialize function.␊
568	First you should call the main initialize, to get the command-line options,␊
569	and then, append your custom definitions to the options hash. If you want␊
570	to treat some new options from command line, you should define them before␊
571	calling the main initialize:␊
572	␊
573	$self->{options}{'new_option'}='';␊
574	$self->SUPER::initialize(%options);␊
575	$self->{options}{'_default_translated'}.=' <p> <head><title>';␊
576	$self->{options}{'attributes'}.=' <p>lang id';␊
577	$self->{options}{'_default_inline'}.=' <br>';␊
578	$self->treat_options;␊
579	␊
580	You should use the B<_default_inline>, B<_default_break>,␊
581	B<_default_placeholder>, B<_default_translated>, B<_default_untranslated>,␊
582	and B<_default_attributes> options in derivated modules. This allow users␊
583	to override the default behavior defined in your module with command line␊
584	options.␊
585	␊
586	=head2 OVERRIDING THE found_string FUNCTION␊
587	␊
588	Another simple step is to override the function "found_string", which␊
589	receives the extracted strings from the parser, in order to translate them.␊
590	There you can control which strings you want to translate, and perform␊
591	transformations to them before or after the translation itself.␊
592	␊
593	It receives the extracted text, the reference on where it was, and a hash␊
594	that contains extra information to control what strings to translate, how␊
595	to translate them and to generate the comment.␊
596	␊
597	The content of these options depends on the kind of string it is (specified in an␊
598	entry of this hash):␊
599	␊
600	=over␊
601	␊
602	=item type="tag"␊
603	␊
604	The found string is the content of a translatable tag. The entry "tag_options"␊
605	contains the option characters in front of the tag hierarchy in the module␊
606	"tags" option.␊
607	␊
608	=item type="attribute"␊
609	␊
610	Means that the found string is the value of a translatable attribute. The␊
611	entry "attribute" has the name of the attribute.␊
612	␊
613	=back␊
614	␊
615	It must return the text that will replace the original in the translated␊
616	document. Here's a basic example of this function:␊
617	␊
618	sub found_string {␊
619	my ($self,$text,$ref,$options)=@_;␊
620	$text = $self->translate($text,$ref,"type ".$options->{'type'},␊
621	'wrap'=>$self->{options}{'wrap'});␊
622	return $text;␊
623	}␊
624	␊
625	There's another simple example in the new Dia module, which only filters␊
626	some strings.␊
627	␊
628	=cut␊
629	␊
630	sub found_string {␊
631	␉my ($self,$text,$ref,$options)=@_;␊
632	␊
633	␉if ($text =~ m/^\s*$/s) {␊
634	␉␉return $text;␊
635	␉}␊
636	␊
637	␉my $comment;␊
638	␉my $wrap = $self->{options}{'wrap'};␊
639	␊
640	␉if ($options->{'type'} eq "tag") {␊
641	␉␉$comment = "Content of: ".$self->get_path;␊
642	␊
643	␉␉if($options->{'tag_options'} =~ /w/) {␊
644	␉␉␉$wrap = 1;␊
645	␉␉}␊
646	␉␉if($options->{'tag_options'} =~ /W/) {␊
647	␉␉␉$wrap = 0;␊
648	␉␉}␊
649	␉} elsif ($options->{'type'} eq "attribute") {␊
650	␉␉$comment = "Attribute '".$options->{'attribute'}."' of: ".$self->get_path;␊
651	␉} elsif ($options->{'type'} eq "CDATA") {␊
652	␉␉$comment = "CDATA";␊
653	␉␉$wrap = 0;␊
654	␉} else {␊
655	␉␉die wrap_ref_mod($ref, "po4a::xml", dgettext("po4a", "Internal error: unknown type identifier '%s'."), $options->{'type'});␊
656	␉}␊
657	␉$text = $self->translate($text,$ref,$comment,'wrap'=>$wrap, comment => $options->{'comments'});␊
658	␉return $text;␊
659	}␊
660	␊
661	=head2 MODIFYING TAG TYPES (TODO)␊
662	␊
663	This is a more complex one, but it enables a (almost) total customization.␊
664	It's based in a list of hashes, each one defining a tag type's behavior. The␊
665	list should be sorted so that the most general tags are after the most␊
666	concrete ones (sorted first by the beginning and then by the end keys). To␊
667	define a tag type you'll have to make a hash with the following keys:␊
668	␊
669	=over 4␊
670	␊
671	=item B<beginning>␊
672	␊
673	Specifies the beginning of the tag, after the "E<lt>".␊
674	␊
675	=item B<end>␊
676	␊
677	Specifies the end of the tag, before the "E<gt>".␊
678	␊
679	=item B<breaking>␊
680	␊
681	It says if this is a breaking tag class. A non-breaking (inline) tag is one␊
682	that can be taken as part of the content of another tag. It can take the␊
683	values false (0), true (1) or undefined. If you leave this undefined, you'll␊
684	have to define the f_breaking function that will say whether a concrete tag of␊
685	this class is a breaking tag or not.␊
686	␊
687	=item B<f_breaking>␊
688	␊
689	It's a function that will tell if the next tag is a breaking one or not. It␊
690	should be defined if the B<breaking> option is not.␊
691	␊
692	=item B<f_extract>␊
693	␊
694	If you leave this key undefined, the generic extraction function will have to␊
695	extract the tag itself. It's useful for tags that can have other tags or␊
696	special structures in them, so that the main parser doesn't get mad. This␊
697	function receives a boolean that says if the tag should be removed from the␊
698	input stream or not.␊
699	␊
700	=item B<f_translate>␊
701	␊
702	This function receives the tag (in the get_string_until() format) and returns␊
703	the translated tag (translated attributes or all needed transformations) as a␊
704	single string.␊
705	␊
706	=back␊
707	␊
708	=cut␊
709	␊
710	##### Generic XML tag types #####'␊
711	␊
712	our @tag_types = (␊
713	␉{␉beginning␉=> "!--#",␊
714	␉␉end␉␉=> "--",␊
715	␉␉breaking␉=> 0,␊
716	␉␉f_extract␉=> \&tag_extract_comment,␊
717	␉␉f_translate␉=> \&tag_trans_comment},␊
718	␉{␉beginning␉=> "!--",␊
719	␉␉end␉␉=> "--",␊
720	␉␉breaking␉=> 0,␊
721	␉␉f_extract␉=> \&tag_extract_comment,␊
722	␉␉f_translate␉=> \&tag_trans_comment},␊
723	␉{␉beginning␉=> "?xml",␊
724	␉␉end␉␉=> "?",␊
725	␉␉breaking␉=> 1,␊
726	␉␉f_translate␉=> \&tag_trans_xmlhead},␊
727	␉{␉beginning␉=> "?",␊
728	␉␉end␉␉=> "?",␊
729	␉␉breaking␉=> 1,␊
730	␉␉f_translate␉=> \&tag_trans_procins},␊
731	␉{␉beginning␉=> "!DOCTYPE",␊
732	␉␉end␉␉=> "",␊
733	␉␉breaking␉=> 1,␊
734	␉␉f_extract␉=> \&tag_extract_doctype,␊
735	␉␉f_translate␉=> \&tag_trans_doctype},␊
736	␉{␉beginning␉=> "![CDATA[",␊
737	␉␉end␉␉=> "]]",␊
738	␉␉breaking␉=> 1,␊
739	␉␉f_extract␉=> \&CDATA_extract,␊
740	␉␉f_translate␉=> \&CDATA_trans},␊
741	␉{␉beginning␉=> "/",␊
742	␉␉end␉␉=> "",␊
743	␉␉f_breaking␉=> \&tag_break_close,␊
744	␉␉f_translate␉=> \&tag_trans_close},␊
745	␉{␉beginning␉=> "",␊
746	␉␉end␉␉=> "/",␊
747	␉␉f_breaking␉=> \&tag_break_alone,␊
748	␉␉f_translate␉=> \&tag_trans_alone},␊
749	␉{␉beginning␉=> "",␊
750	␉␉end␉␉=> "",␊
751	␉␉f_breaking␉=> \&tag_break_open,␊
752	␉␉f_translate␉=> \&tag_trans_open}␊
753	);␊
754	␊
755	sub tag_extract_comment {␊
756	␉my ($self,$remove)=(shift,shift);␊
757	␉my ($eof,@tag)=$self->get_string_until('-->',{include=>1,remove=>$remove});␊
758	␉return ($eof,@tag);␊
759	}␊
760	␊
761	sub tag_trans_comment {␊
762	␉my ($self,@tag)=@_;␊
763	␉return $self->join_lines(@tag);␊
764	}␊
765	␊
766	sub tag_trans_xmlhead {␊
767	␉my ($self,@tag)=@_;␊
768	␊
769	␉# We don't have to translate anything from here: throw away references␊
770	␉my $tag = $self->join_lines(@tag);␊
771	␉$tag =~ /encoding=(("\|')\|)(.*?)(\s\|\2)/s;␊
772	␉my $in_charset=$3;␊
773	␉$self->detected_charset($in_charset);␊
774	␉my $out_charset=$self->get_out_charset;␊
775	␊
776	␉if (defined $in_charset) {␊
777	␉␉$tag =~ s/$in_charset/$out_charset/;␊
778	␉} else {␊
779	␉␉if ($tag =~ m/standalone/) {␊
780	␉␉␉$tag =~ s/(standalone)/encoding="$out_charset" $1/;␊
781	␉␉} else {␊
782	␉␉␉$tag.= " encoding=\"$out_charset\"";␊
783	␉␉}␊
784	␉}␊
785	␊
786	␉return $tag;␊
787	}␊
788	␊
789	sub tag_trans_procins {␊
790	␉my ($self,@tag)=@_;␊
791	␉return $self->join_lines(@tag);␊
792	}␊
793	␊
794	sub tag_extract_doctype {␊
795	␉my ($self,$remove)=(shift,shift);␊
796	␊
797	␉# Check if there is an internal subset (between []).␊
798	␉my ($eof,@tag)=$self->get_string_until('>',{include=>1,unquoted=>1});␊
799	␉my $parity = 0;␊
800	␉my $paragraph = "";␊
801	␉map { $parity = 1 - $parity; $paragraph.= $parity?$_:""; } @tag;␊
802	␉my $found = 0;␊
803	␉if ($paragraph =~ m/<.\[.</s) {␊
804	␉␉$found = 1␊
805	␉}␊
806	␊
807	␉if (not $found) {␊
808	␉␉($eof,@tag)=$self->get_string_until('>',{include=>1,remove=>$remove,unquoted=>1});␊
809	␉} else {␊
810	␉␉($eof,@tag)=$self->get_string_until(']\s*>',{include=>1,remove=>$remove,unquoted=>1,regex=>1});␊
811	␉}␊
812	␉return ($eof,@tag);␊
813	}␊
814	␊
815	sub tag_trans_doctype {␊
816	# This check is not really reliable. There are system and public␊
817	# identifiers. Only the public one could be checked reliably.␊
818	␉my ($self,@tag)=@_;␊
819	␉if (defined $self->{options}{'doctype'} ) {␊
820	␉␉my $doctype = $self->{options}{'doctype'};␊
821	␉␉if ( $tag[0] !~ /\Q$doctype\E/i ) {␊
822	␉␉␉warn wrap_ref_mod($tag[1], "po4a::xml", dgettext("po4a", "Bad document type. '%s' expected. You can fix this warning with a -o doctype option, or ignore this check with -o doctype=\"\"."), $doctype);␊
823	␉␉}␊
824	␉}␊
825	␉my $i = 0;␊
826	␉my $basedir = $tag[1];␊
827	␉$basedir =~ s/:[0-9]+$//;␊
828	␉$basedir = dirname($basedir);␊
829	␊
830	␉while ( $i < $#tag ) {␊
831	␉␉my $t = $tag[$i];␊
832	␉␉my $ref = $tag[$i+1];␊
833	␉␉if ( $t =~ /^(\s<!ENTITY\s+)(.)$/is ) {␊
834	␉␉␉my $part1 = $1;␊
835	␉␉␉my $part2 = $2;␊
836	␉␉␉my $includenow = 0;␊
837	␉␉␉my $file = 0;␊
838	␉␉␉my $name = "";␊
839	␉␉␉if ($part2 =~ /^(%\s+)(.*)$/s ) {␊
840	␉␉␉␉$part1.= $1;␊
841	␉␉␉␉$part2 = $2;␊
842	␉␉␉␉$includenow = 1;␊
843	␉␉␉}␊
844	␉␉␉$part2 =~ /^(\S+)(\s+)(.*)$/s;␊
845	␉␉␉$name = $1;␊
846	␉␉␉$part1.= $1.$2;␊
847	␉␉␉$part2 = $3;␊
848	␉␉␉if ( $part2 =~ /^(SYSTEM\s+)(.*)$/is ) {␊
849	␉␉␉␉$part1.= $1;␊
850	␉␉␉␉$part2 = $2;␊
851	␉␉␉␉$file = 1;␊
852	␉␉␉␉if ($self->{options}{'includeexternal'}) {␊
853	␉␉␉␉␉$entities{$name} = $part2;␊
854	␉␉␉␉␉$entities{$name} =~ s/^"?(.?)".$/$1/s;␊
855	␉␉␉␉␉$entities{$name} = File::Spec->catfile($basedir, $entities{$name});␊
856	␉␉␉␉}␊
857	␉␉␉}␊
858	␉␉␉if ((not $file) and (not $includenow)) {␊
859	␉␉␉ if ($part2 =~ m/^\s(["'])(.)\1(\s>.)$/s) {␊
860	␉␉␉␉my $comment = "Content of the $name entity";␊
861	␉␉␉␉my $quote = $1;␊
862	␉␉␉␉my $text = $2;␊
863	␉␉␉␉$part2 = $3;␊
864	␉␉␉␉$text = $self->translate($text,␊
865	␉␉␉␉ $ref,␊
866	␉␉␉␉ $comment,␊
867	␉␉␉␉ 'wrap'=>1);␊
868	␉␉␉␉$t = $part1."$quote$text$quote$part2";␊
869	␉␉␉ }␊
870	␉␉␉}␊
871	#␉␉␉print $part1."\n";␊
872	#␉␉␉print $name."\n";␊
873	#␉␉␉print $part2."\n";␊
874	␉␉}␊
875	␉␉$tag[$i] = $t;␊
876	␉␉$i += 2;␊
877	␉}␊
878	␉return $self->join_lines(@tag);␊
879	}␊
880	␊
881	sub tag_break_close {␊
882	␉my ($self,@tag)=@_;␊
883	␉my $struct = $self->get_path;␊
884	␉my $options = $self->get_translate_options($struct);␊
885	␉if ($options =~ m/[ip]/) {␊
886	␉␉return 0;␊
887	␉} else {␊
888	␉␉return 1;␊
889	␉}␊
890	}␊
891	␊
892	sub tag_trans_close {␊
893	␉my ($self,@tag)=@_;␊
894	␉my $name = $self->get_tag_name(@tag);␊
895	␊
896	␉my $test = pop @path;␊
897	␉if (!defined($test) \|\| $test ne $name ) {␊
898	␉␉my $ontagerror = $self->{options}{'ontagerror'};␊
899	␉␉if ($ontagerror eq "warn") {␊
900	␉␉␉warn wrap_ref_mod($tag[1], "po4a::xml", dgettext("po4a", "Unexpected closing tag </%s> found. The main document may be wrong. Continuing..."), $name);␊
901	␉␉} elsif ($ontagerror ne "silent") {␊
902	␉␉␉die wrap_ref_mod($tag[1], "po4a::xml", dgettext("po4a", "Unexpected closing tag </%s> found. The main document may be wrong."), $name);␊
903	␉␉}␊
904	␉}␊
905	␉return $self->join_lines(@tag);␊
906	}␊
907	␊
908	sub CDATA_extract {␊
909	␉my ($self,$remove)=(shift,shift);␊
910	my ($eof, @tag) = $self->get_string_until(']]>',{include=>1,unquoted=>0,remove=>$remove});␊
911	␊
912	␉return ($eof, @tag);␊
913	}␊
914	␊
915	sub CDATA_trans {␊
916	␉my ($self,@tag)=@_;␊
917	␉return $self->found_string($self->join_lines(@tag),␊
918	␉ $tag[1],␊
919	␉ {'type' => "CDATA"});␊
920	}␊
921	␊
922	sub tag_break_alone {␊
923	␉my ($self,@tag)=@_;␊
924	␉my $struct = $self->get_path($self->get_tag_name(@tag));␊
925	␉if ($self->get_translate_options($struct) =~ m/i/) {␊
926	␉␉return 0;␊
927	␉} else {␊
928	␉␉return 1;␊
929	␉}␊
930	}␊
931	␊
932	sub tag_trans_alone {␊
933	␉my ($self,@tag)=@_;␊
934	␉my $name = $self->get_tag_name(@tag);␊
935	␉push @path, $name;␊
936	␊
937	␉$name = $self->treat_attributes(@tag);␊
938	␊
939	␉pop @path;␊
940	␉return $name;␊
941	}␊
942	␊
943	sub tag_break_open {␊
944	␉my ($self,@tag)=@_;␊
945	␉my $struct = $self->get_path($self->get_tag_name(@tag));␊
946	␉my $options = $self->get_translate_options($struct);␊
947	␉if ($options =~ m/[ip]/) {␊
948	␉␉return 0;␊
949	␉} else {␊
950	␉␉return 1;␊
951	␉}␊
952	}␊
953	␊
954	sub tag_trans_open {␊
955	␉my ($self,@tag)=@_;␊
956	␉my $name = $self->get_tag_name(@tag);␊
957	␉push @path, $name;␊
958	␊
959	␉$name = $self->treat_attributes(@tag);␊
960	␊
961	␉if (defined $self->{options}{'addlang'}) {␊
962	␉␉my $struct = $self->get_path();␊
963	␉␉if ($struct eq $self->{options}{'addlang'}) {␊
964	␉␉␉$name .= ' lang="'.$self->{TT}{po_in}->{lang}.'"';␊
965	␉␉}␊
966	␉}␊
967	␊
968	␉return $name;␊
969	}␊
970	␊
971	##### END of Generic XML tag types #####␊
972	␊
973	=head1 INTERNAL FUNCTIONS used to write derivated parsers␊
974	␊
975	=head2 WORKING WITH TAGS␊
976	␊
977	=over 4␊
978	␊
979	=item get_path()␊
980	␊
981	This function returns the path to the current tag from the document's root,␊
982	in the form E<lt>htmlE<gt>E<lt>bodyE<gt>E<lt>pE<gt>.␊
983	␊
984	An additional array of tags (without brackets) can be passed as argument.␊
985	These path elements are added to the end of the current path.␊
986	␊
987	=cut␊
988	␊
989	sub get_path {␊
990	␉my $self = shift;␊
991	␉my @add = @_;␊
992	␉if ( @path > 0 or @add > 0 ) {␊
993	␉␉return "<".join("><",@path,@add).">";␊
994	␉} else {␊
995	␉␉return "outside any tag (error?)";␊
996	␉}␊
997	}␊
998	␊
999	=item tag_type()␊
1000	␊
1001	This function returns the index from the tag_types list that fits to the next␊
1002	tag in the input stream, or -1 if it's at the end of the input file.␊
1003	␊
1004	=cut␊
1005	␊
1006	sub tag_type {␊
1007	␉my $self = shift;␊
1008	␉my ($line,$ref) = $self->shiftline();␊
1009	␉my ($match1,$match2);␊
1010	␉my $found = 0;␊
1011	␉my $i = 0;␊
1012	␊
1013	␉if (!defined($line)) { return -1; }␊
1014	␊
1015	␉$self->unshiftline($line,$ref);␊
1016	␉my ($eof,@lines) = $self->get_string_until(">",{include=>1,unquoted=>1});␊
1017	␉my $line2 = $self->join_lines(@lines);␊
1018	␉while (!$found && $i < @tag_types) {␊
1019	␉␉($match1,$match2) = ($tag_types[$i]->{beginning},$tag_types[$i]->{end});␊
1020	␉␉if ($line =~ /^<\Q$match1\E/) {␊
1021	␉␉␉if (!defined($tag_types[$i]->{f_extract})) {␊
1022	#print substr($line2,length($line2)-1-length($match2),1+length($match2))."\n";␊
1023	␉␉␉␉if (defined($line2) and $line2 =~ /\Q$match2\E>$/) {␊
1024	␉␉␉␉␉$found = 1;␊
1025	#print "YES: <".$match1." ".$match2.">\n";␊
1026	␉␉␉␉} else {␊
1027	#print "NO: <".$match1." ".$match2.">\n";␊
1028	␉␉␉␉␉$i++;␊
1029	␉␉␉␉}␊
1030	␉␉␉} else {␊
1031	␉␉␉␉$found = 1;␊
1032	␉␉␉}␊
1033	␉␉} else {␊
1034	␉␉␉$i++;␊
1035	␉␉}␊
1036	␉}␊
1037	␉if (!$found) {␊
1038	␉␉#It should never enter here, unless you undefine the most␊
1039	␉␉#general tags (as <...>)␊
1040	␉␉chomp $line;␊
1041	␉␉die $ref.": Unknown tag type: ".$line."\n";␊
1042	␉} else {␊
1043	␉␉return $i;␊
1044	␉}␊
1045	}␊
1046	␊
1047	=item extract_tag($$)␊
1048	␊
1049	This function returns the next tag from the input stream without the beginning␊
1050	and end, in an array form, to maintain the references from the input file. It␊
1051	has two parameters: the type of the tag (as returned by tag_type) and a␊
1052	boolean, that indicates if it should be removed from the input stream.␊
1053	␊
1054	=cut␊
1055	␊
1056	sub extract_tag {␊
1057	␉my ($self,$type,$remove) = (shift,shift,shift);␊
1058	␉my ($match1,$match2) = ($tag_types[$type]->{beginning},$tag_types[$type]->{end});␊
1059	␉my ($eof,@tag);␊
1060	␉if (defined($tag_types[$type]->{f_extract})) {␊
1061	␉␉($eof,@tag) = &{$tag_types[$type]->{f_extract}}($self,$remove);␊
1062	␉} else {␊
1063	␉␉($eof,@tag) = $self->get_string_until($match2.">",{include=>1,remove=>$remove,unquoted=>1});␊
1064	␉}␊
1065	␉$tag[0] =~ /^<\Q$match1\E(.*)$/s;␊
1066	␉$tag[0] = $1;␊
1067	␉$tag[$#tag-1] =~ /^(.*)\Q$match2\E>$/s;␊
1068	␉$tag[$#tag-1] = $1;␊
1069	␉return ($eof,@tag);␊
1070	}␊
1071	␊
1072	=item get_tag_name(@)␊
1073	␊
1074	This function returns the name of the tag passed as an argument, in the array␊
1075	form returned by extract_tag.␊
1076	␊
1077	=cut␊
1078	␊
1079	sub get_tag_name {␊
1080	␉my ($self,@tag)=@_;␊
1081	␉$tag[0] =~ /^(\S*)/;␊
1082	␉return $1;␊
1083	}␊
1084	␊
1085	=item breaking_tag()␊
1086	␊
1087	This function returns a boolean that says if the next tag in the input stream␊
1088	is a breaking tag or not (inline tag). It leaves the input stream intact.␊
1089	␊
1090	=cut␊
1091	␊
1092	sub breaking_tag {␊
1093	␉my $self = shift;␊
1094	␉my $break;␊
1095	␊
1096	␉my $type = $self->tag_type;␊
1097	␉if ($type == -1) { return 0; }␊
1098	␊
1099	#print "TAG TYPE = ".$type."\n";␊
1100	␉$break = $tag_types[$type]->{breaking};␊
1101	␉if (!defined($break)) {␊
1102	␉␉# This tag's breaking depends on its content␊
1103	␉␉my ($eof,@lines) = $self->extract_tag($type,0);␊
1104	␉␉$break = &{$tag_types[$type]->{f_breaking}}($self,@lines);␊
1105	␉}␊
1106	#print "break = ".$break."\n";␊
1107	␉return $break;␊
1108	}␊
1109	␊
1110	=item treat_tag()␊
1111	␊
1112	This function translates the next tag from the input stream. Using each␊
1113	tag type's custom translation functions.␊
1114	␊
1115	=cut␊
1116	␊
1117	sub treat_tag {␊
1118	␉my $self = shift;␊
1119	␉my $type = $self->tag_type;␊
1120	␊
1121	␉my ($match1,$match2) = ($tag_types[$type]->{beginning},$tag_types[$type]->{end});␊
1122	␉my ($eof,@lines) = $self->extract_tag($type,1);␊
1123	␊
1124	␉$lines[0] =~ /^(\s)(.)$/s;␊
1125	␉my $space1 = $1;␊
1126	␉$lines[0] = $2;␊
1127	␉$lines[$#lines-1] =~ /^(.?)(\s)$/s;␊
1128	␉my $space2 = $2;␊
1129	␉$lines[$#lines-1] = $1;␊
1130	␊
1131	␉# Calling this tag type's specific handling (translation of␊
1132	␉# attributes...)␊
1133	␉my $line = &{$tag_types[$type]->{f_translate}}($self,@lines);␊
1134	␉$self->pushline("<".$match1.$space1.$line.$space2.$match2.">");␊
1135	␉return $eof;␊
1136	}␊
1137	␊
1138	=item tag_in_list($@)␊
1139	␊
1140	This function returns a string value that says if the first argument (a tag␊
1141	hierarchy) matches any of the tags from the second argument (a list of tags␊
1142	or tag hierarchies). If it doesn't match, it returns 0. Else, it returns the␊
1143	matched tag's options (the characters in front of the tag) or 1 (if that tag␊
1144	doesn't have options).␊
1145	␊
1146	=back␊
1147	␊
1148	=cut␊
1149	sub tag_in_list ($$$) {␊
1150	␉my ($self,$path,$list) = @_;␊
1151	␉if ($self->{options}{'caseinsensitive'}) {␊
1152	␉␉$path = lc $path;␊
1153	␉}␊
1154	␊
1155	␉while (1) {␊
1156	␉␉if (defined $list->{$path}) {␊
1157	␉␉␉if (length $list->{$path}) {␊
1158	␉␉␉␉return $list->{$path};␊
1159	␉␉␉} else {␊
1160	␉␉␉␉return 1;␊
1161	␉␉␉}␊
1162	␉␉}␊
1163	␉␉last unless ($path =~ m/</);␊
1164	␉␉$path =~ s/^<.*?>//;␊
1165	␉}␊
1166	␊
1167	␉return 0;␊
1168	}␊
1169	␊
1170	=head2 WORKING WITH ATTRIBUTES␊
1171	␊
1172	=over 4␊
1173	␊
1174	=item treat_attributes(@)␊
1175	␊
1176	This function handles the translation of the tags' attributes. It receives the tag␊
1177	without the beginning / end marks, and then it finds the attributes, and it␊
1178	translates the translatable ones (specified by the module option "attributes").␊
1179	This returns a plain string with the translated tag.␊
1180	␊
1181	=back␊
1182	␊
1183	=cut␊
1184	␊
1185	sub treat_attributes {␊
1186	␉my ($self,@tag)=@_;␊
1187	␊
1188	␉$tag[0] =~ /^(\S)(.)/s;␊
1189	␉my $text = $1;␊
1190	␉$tag[0] = $2;␊
1191	␊
1192	␉while (@tag) {␊
1193	␉␉my $complete = 1;␊
1194	␊
1195	␉␉$text .= $self->skip_spaces(\@tag);␊
1196	␉␉if (@tag) {␊
1197	␉␉␉# Get the attribute's name␊
1198	␉␉␉$complete = 0;␊
1199	␊
1200	␉␉␉$tag[0] =~ /^([^\s=]+)(.*)/s;␊
1201	␉␉␉my $name = $1;␊
1202	␉␉␉my $ref = $tag[1];␊
1203	␉␉␉$tag[0] = $2;␊
1204	␉␉␉$text .= $name;␊
1205	␉␉␉$text .= $self->skip_spaces(\@tag);␊
1206	␉␉␉if (@tag) {␊
1207	␉␉␉␉# Get the '='␊
1208	␉␉␉␉if ($tag[0] =~ /^=(.*)/s) {␊
1209	␉␉␉␉␉$tag[0] = $1;␊
1210	␉␉␉␉␉$text .= "=";␊
1211	␉␉␉␉␉$text .= $self->skip_spaces(\@tag);␊
1212	␉␉␉␉␉if (@tag) {␊
1213	␉␉␉␉␉␉# Get the value␊
1214	␉␉␉␉␉␉my $value="";␊
1215	␉␉␉␉␉␉$ref=$tag[1];␊
1216	␉␉␉␉␉␉my $quot=substr($tag[0],0,1);␊
1217	␉␉␉␉␉␉if ($quot ne "\"" and $quot ne "'") {␊
1218	␉␉␉␉␉␉␉# Unquoted value␊
1219	␉␉␉␉␉␉␉$quot="";␊
1220	␉␉␉␉␉␉␉$tag[0] =~ /^(\S+)(.*)/s;␊
1221	␉␉␉␉␉␉␉$value = $1;␊
1222	␉␉␉␉␉␉␉$tag[0] = $2;␊
1223	␉␉␉␉␉␉} else {␊
1224	␉␉␉␉␉␉␉# Quoted value␊
1225	␉␉␉␉␉␉␉$text .= $quot;␊
1226	␉␉␉␉␉␉␉$tag[0] =~ /^\Q$quot\E(.*)/s;␊
1227	␉␉␉␉␉␉␉$tag[0] = $1;␊
1228	␉␉␉␉␉␉␉while ($tag[0] !~ /\Q$quot\E/) {␊
1229	␉␉␉␉␉␉␉␉$value .= $tag[0];␊
1230	␉␉␉␉␉␉␉␉shift @tag;␊
1231	␉␉␉␉␉␉␉␉shift @tag;␊
1232	␉␉␉␉␉␉␉}␊
1233	␉␉␉␉␉␉␉$tag[0] =~ /^(.?)\Q$quot\E(.)/s;␊
1234	␉␉␉␉␉␉␉$value .= $1;␊
1235	␉␉␉␉␉␉␉$tag[0] = $2;␊
1236	␉␉␉␉␉␉}␊
1237	␉␉␉␉␉␉$complete = 1;␊
1238	␉␉␉␉␉␉if ($self->tag_in_list($self->get_path.$name,$self->{attributes})) {␊
1239	␉␉␉␉␉␉␉$text .= $self->found_string($value, $ref, { type=>"attribute", attribute=>$name });␊
1240	␉␉␉␉␉␉} else {␊
1241	␉␉␉␉␉␉␉print wrap_ref_mod($ref, "po4a::xml", dgettext("po4a", "Content of attribute %s excluded: %s"), $self->get_path.$name, $value)␊
1242	␉␉␉␉␉␉␉ if $self->debug();␊
1243	␉␉␉␉␉␉␉$text .= $self->recode_skipped_text($value);␊
1244	␉␉␉␉␉␉}␊
1245	␉␉␉␉␉␉$text .= $quot;␊
1246	␉␉␉␉␉}␊
1247	␉␉␉␉}␊
1248	␉␉␉}␊
1249	␊
1250	␉␉␉unless ($complete) {␊
1251	␉␉␉␉my $ontagerror = $self->{options}{'ontagerror'};␊
1252	␉␉␉␉if ($ontagerror eq "warn") {␊
1253	␉␉␉␉␉warn wrap_ref_mod($ref, "po4a::xml", dgettext ("po4a", "Bad attribute syntax. Continuing..."));␊
1254	␉␉␉␉} elsif ($ontagerror ne "silent") {␊
1255	␉␉␉␉␉die wrap_ref_mod($ref, "po4a::xml", dgettext ("po4a", "Bad attribute syntax"));␊
1256	␉␉␉␉}␊
1257	␉␉␉}␊
1258	␉␉}␊
1259	␉}␊
1260	␉return $text;␊
1261	}␊
1262	␊
1263	# Returns an empty string if the content in the $path should not be␊
1264	# translated.␊
1265	#␊
1266	# Otherwise, returns the set of options for translation:␊
1267	# w: the content shall be re-wrapped␊
1268	# W: the content shall not be re-wrapped␊
1269	# i: the tag shall be inlined␊
1270	# p: a placeholder shall replace the tag (and its content)␊
1271	# n: a custom tag␊
1272	#␊
1273	# A translatable inline tag in an untranslated tag is treated as a translatable breaking tag.␊
1274	sub get_translate_options {␊
1275	␉my $self = shift;␊
1276	␉my $path = shift;␊
1277	␊
1278	␉if (defined $translate_options_cache{$path}) {␊
1279	␉␉return $translate_options_cache{$path};␊
1280	␉}␊
1281	␊
1282	␉my $options = "";␊
1283	␉my $translate = 0;␊
1284	␉my $usedefault = 1;␊
1285	␊
1286	␉my $inlist = 0;␊
1287	␉my $tag = $self->get_tag_from_list($path, $self->{tags});␊
1288	␉if (defined $tag) {␊
1289	␉␉$inlist = 1;␊
1290	␉}␊
1291	␉if ($self->{options}{'tagsonly'} eq $inlist) {␊
1292	␉␉$usedefault = 0;␊
1293	␉␉if (defined $tag) {␊
1294	␉␉␉$options = $tag;␊
1295	␉␉␉$options =~ s/<.*$//;␊
1296	␉␉} else {␊
1297	␉␉␉if ($self->{options}{'wrap'}) {␊
1298	␉␉␉␉$options = "w";␊
1299	␉␉␉} else {␊
1300	␉␉␉␉$options = "W";␊
1301	␉␉␉}␊
1302	␉␉}␊
1303	␉␉$translate = 1;␊
1304	␉}␊
1305	␊
1306	# TODO: a less precise set of tags should not override a more precise one␊
1307	␉# The tags and tagsonly options are deprecated.␊
1308	␉# The translated and untranslated options have an higher priority.␊
1309	␉$tag = $self->get_tag_from_list($path, $self->{translated});␊
1310	␉if (defined $tag) {␊
1311	␉␉$usedefault = 0;␊
1312	␉␉$options = $tag;␊
1313	␉␉$options =~ s/<.*$//;␊
1314	␉␉$translate = 1;␊
1315	␉}␊
1316	␊
1317	␉if ($translate and $options !~ m/w/i) {␊
1318	␉␉$options .= ($self->{options}{'wrap'})?"w":"W";␊
1319	␉}␊
1320	␊
1321	␉if (not defined $tag) {␊
1322	␉␉$tag = $self->get_tag_from_list($path, $self->{untranslated});␊
1323	␉␉if (defined $tag) {␊
1324	␉␉␉$usedefault = 0;␊
1325	␉␉␉$options = "";␊
1326	␉␉␉$translate = 0;␊
1327	␉␉}␊
1328	␉}␊
1329	␊
1330	␉$tag = $self->get_tag_from_list($path, $self->{inline});␊
1331	␉if (defined $tag) {␊
1332	␉␉$usedefault = 0;␊
1333	␉␉$options .= "i";␊
1334	␉} else {␊
1335	␉␉$tag = $self->get_tag_from_list($path, $self->{placeholder});␊
1336	␉␉if (defined $tag) {␊
1337	␉␉␉$usedefault = 0;␊
1338	␉␉␉$options .= "p";␊
1339	␉␉}␊
1340	␉}␊
1341	␊
1342	␉$tag = $self->get_tag_from_list($path, $self->{customtag});␊
1343	␉if (defined $tag) {␊
1344	␉␉$usedefault = 0;␊
1345	␉␉$options = "in"; # This erase any other setting␊
1346	␉}␊
1347	␊
1348	␉if ($usedefault) {␊
1349	␉␉$options = $self->{options}{'defaulttranslateoption'};␊
1350	␉}␊
1351	␊
1352	␉# A translatable inline tag in an untranslated tag is treated as a␊
1353	␉# translatable breaking tag.␊
1354	␉if ($options =~ m/i/) {␊
1355	␉␉my $ppath = $path;␊
1356	␉␉$ppath =~ s/<[^>]*>$//;␊
1357	␉␉my $poptions = $self->get_translate_options ($ppath);␊
1358	␉␉if ($poptions eq "") {␊
1359	␉␉␉$options =~ s/i//;␊
1360	␉␉}␊
1361	␉}␊
1362	␊
1363	␉if ($options =~ m/i/ and $self->{options}{'foldattributes'}) {␊
1364	␉␉$options .= "f";␊
1365	␉}␊
1366	␊
1367	␉$translate_options_cache{$path} = $options;␊
1368	␉return $options;␊
1369	}␊
1370	␊
1371	␊
1372	# Return the tag (or biggest set of tags) of a list which matches with the␊
1373	# given path.␊
1374	#␊
1375	# The tag (or set of tags) is returned with its options.␊
1376	#␊
1377	# If no tags could match the path, undef is returned.␊
1378	sub get_tag_from_list ($$$) {␊
1379	␉my ($self,$path,$list) = @_;␊
1380	␉if ($self->{options}{'caseinsensitive'}) {␊
1381	␉␉$path = lc $path;␊
1382	␉}␊
1383	␊
1384	␉while (1) {␊
1385	␉␉if (defined $list->{$path}) {␊
1386	␉␉␉return $list->{$path}.$path;␊
1387	␉␉}␊
1388	␉␉last unless ($path =~ m/</);␊
1389	␉␉$path =~ s/^<.*?>//;␊
1390	␉}␊
1391	␊
1392	␉return undef;␊
1393	}␊
1394	␊
1395	␊
1396	␊
1397	sub treat_content {␊
1398	␉my $self = shift;␊
1399	␉my $blank="";␊
1400	␉# Indicates if the paragraph will have to be translated␊
1401	␉my $translate = "";␊
1402	␊
1403	␉my ($eof,@paragraph)=$self->get_string_until('<',{remove=>1});␊
1404	␊
1405	␉while (!$eof and !$self->breaking_tag) {␊
1406	␉NEXT_TAG:␊
1407	␉␉my @text;␊
1408	␉␉my $type = $self->tag_type;␊
1409	␉␉my $f_extract = $tag_types[$type]->{'f_extract'};␊
1410	␉␉if ( defined($f_extract)␊
1411	␉␉ and $f_extract eq \&tag_extract_comment) {␊
1412	␉␉␉# Remove the content of the comments␊
1413	␉␉␉($eof, @text) = $self->extract_tag($type,1);␊
1414	␉␉␉$text[$#text-1] .= "\0";␊
1415	␉␉␉if ($tag_types[$type]->{'beginning'} eq "!--#") {␊
1416	␉␉␉␉$text[0] = "#".$text[0];␊
1417	␉␉␉}␊
1418	␉␉␉push @comments, @text;␊
1419	␉␉} else {␊
1420	␉␉␉my ($tmpeof, @tag) = $self->extract_tag($type,0);␊
1421	␉␉␉# Append the found inline tag␊
1422	␉␉␉($eof,@text)=$self->get_string_until('>',␊
1423	␉␉␉ {include=>1,␊
1424	␉␉␉ remove=>1,␊
1425	␉␉␉ unquoted=>1});␊
1426	␉␉␉# Append or remove the opening/closing tag from␊
1427	␉␉␉# the tag path␊
1428	␉␉␉if ($tag_types[$type]->{'end'} eq "") {␊
1429	␉␉␉␉if ($tag_types[$type]->{'beginning'} eq "") {␊
1430	␉␉␉␉␉# Opening inline tag␊
1431	␉␉␉␉␉my $cur_tag_name = $self->get_tag_name(@tag);␊
1432	␉␉␉␉␉my $t_opts = $self->get_translate_options($self->get_path($cur_tag_name));␊
1433	␉␉␉␉␉if ($t_opts =~ m/p/) {␊
1434	␉␉␉␉␉␉# We enter a new holder.␊
1435	␉␉␉␉␉␉# Append a <placeholder ...> tag to the current␊
1436	␉␉␉␉␉␉# paragraph, and save the @paragraph in the␊
1437	␉␉␉␉␉␉# current holder.␊
1438	␉␉␉␉␉␉my $last_holder = $save_holders[$#save_holders];␊
1439	␉␉␉␉␉␉my $placeholder_str = "<placeholder type=\"".$cur_tag_name."\" id=\"".($#{$last_holder->{'sub_translations'}}+1)."\"/>";␊
1440	␉␉␉␉␉␉push @paragraph, ($placeholder_str, $text[1]);␊
1441	␉␉␉␉␉␉my @saved_paragraph = @paragraph;␊
1442	␊
1443	␉␉␉␉␉␉$last_holder->{'paragraph'} = \@saved_paragraph;␊
1444	␊
1445	␉␉␉␉␉␉# Then we must push a new holder␊
1446	␉␉␉␉␉␉my @new_paragraph = ();␊
1447	␉␉␉␉␉␉my @sub_translations = ();␊
1448	␉␉␉␉␉␉my %folded_attributes;␊
1449	␉␉␉␉␉␉my %new_holder = ('paragraph' => \@new_paragraph,␊
1450	␉␉␉␉␉␉ 'open' => $self->join_lines(@text),␊
1451	␉␉␉␉␉␉ 'translation' => "",␊
1452	␉␉␉␉␉␉ 'close' => undef,␊
1453	␉␉␉␉␉␉ 'sub_translations' => \@sub_translations,␊
1454	␉␉␉␉␉␉ 'folded_attributes' => \%folded_attributes);␊
1455	␉␉␉␉␉␉push @save_holders, \%new_holder;␊
1456	␉␉␉␉␉␉@text = ();␊
1457	␊
1458	␉␉␉␉␉␉# The current @paragraph␊
1459	␉␉␉␉␉␉# (for the current holder)␊
1460	␉␉␉␉␉␉# is empty.␊
1461	␉␉␉␉␉␉@paragraph = ();␊
1462	␉␉␉␉␉} elsif ($t_opts =~ m/f/) {␊
1463	␉␉␉␉␉␉my $tag_full = $self->join_lines(@text);␊
1464	␉␉␉␉␉␉my $tag_ref = $text[1];␊
1465	␉␉␉␉␉␉if ($tag_full =~ m/^<\s\S+\s+\S.>$/s) {␊
1466	␉␉␉␉␉␉␉my $holder = $save_holders[$#save_holders];␊
1467	␉␉␉␉␉␉␉my $id = 0;␊
1468	␉␉␉␉␉␉␉foreach (keys %{$holder->{folded_attributes}}) {␊
1469	␉␉␉␉␉␉␉␉$id = $_ + 1 if ($_ >= $id);␊
1470	␉␉␉␉␉␉␉}␊
1471	␉␉␉␉␉␉␉$holder->{folded_attributes}->{$id} = $tag_full;␊
1472	␊
1473	␉␉␉␉␉␉␉@text = ("<$cur_tag_name po4a-id=$id>", $tag_ref);␊
1474	␉␉␉␉␉␉}␊
1475	␉␉␉␉␉}␊
1476	␉␉␉␉␉unless ($t_opts =~ m/n/) {␊
1477	␉␉␉␉␉␉push @path, $cur_tag_name;␊
1478	␉␉␉␉␉}␊
1479	␉␉␉␉} elsif ($tag_types[$type]->{'beginning'} eq "/") {␊
1480	␉␉␉␉␉# Closing inline tag␊
1481	␊
1482	␉␉␉␉␉# Check if this is closing the␊
1483	␉␉␉␉␉# last opening tag we detected.␊
1484	␉␉␉␉␉my $test = pop @path;␊
1485	␉␉␉␉␉my $name = $self->get_tag_name(@tag);␊
1486	␉␉␉␉␉if (!defined($test) \|\|␊
1487	␉␉␉␉␉ $test ne $name ) {␊
1488	␉␉␉␉␉␉my $ontagerror = $self->{options}{'ontagerror'};␊
1489	␉␉␉␉␉␉if ($ontagerror eq "warn") {␊
1490	␉␉␉␉␉␉␉warn wrap_ref_mod($tag[1], "po4a::xml", dgettext("po4a", "Unexpected closing tag </%s> found. The main document may be wrong. Continuing..."), $name);␊
1491	␉␉␉␉␉␉} elsif ($ontagerror ne "silent") {␊
1492	␉␉␉␉␉␉␉die wrap_ref_mod($tag[1], "po4a::xml", dgettext("po4a", "Unexpected closing tag </%s> found. The main document may be wrong."), $name);␊
1493	␉␉␉␉␉␉}␊
1494	␉␉␉␉␉}␊
1495	␊
1496	␉␉␉␉␉if ($self->get_translate_options($self->get_path($self->get_tag_name(@tag))) =~ m/p/) {␊
1497	␉␉␉␉␉␉# This closes the current holder.␊
1498	␊
1499	␉␉␉␉␉␉push @path, $self->get_tag_name(@tag);␊
1500	␉␉␉␉␉␉# Now translate this paragraph if needed.␊
1501	␉␉␉␉␉␉# This will call pushline and append the␊
1502	␉␉␉␉␉␉# translation to the current holder's translation.␊
1503	␉␉␉␉␉␉$self->translate_paragraph(@paragraph);␊
1504	␉␉␉␉␉␉pop @path;␊
1505	␊
1506	␉␉␉␉␉␉# Now that this holder is closed, we can remove␊
1507	␉␉␉␉␉␉# the holder from the stack.␊
1508	␉␉␉␉␉␉my $holder = pop @save_holders;␊
1509	␉␉␉␉␉␉# We need to keep the translation of this holder␊
1510	␉␉␉␉␉␉my $translation = $holder->{'open'}.$holder->{'translation'};␊
1511	␉␉␉␉␉␉$translation .= $self->join_lines(@text);␊
1512	␊
1513	␉␉␉␉␉␉@text = ();␊
1514	␊
1515	␉␉␉␉␉␉# Then we store the translation in the previous␊
1516	␉␉␉␉␉␉# holder's sub_translations array␊
1517	␉␉␉␉␉␉my $previous_holder = $save_holders[$#save_holders];␊
1518	␉␉␉␉␉␉push @{$previous_holder->{'sub_translations'}}, $translation;␊
1519	␉␉␉␉␉␉# We also need to restore the @paragraph array, as␊
1520	␉␉␉␉␉␉# it was before we encountered the holder.␊
1521	␉␉␉␉␉␉@paragraph = @{$previous_holder->{'paragraph'}};␊
1522	␉␉␉␉␉}␊
1523	␉␉␉␉}␊
1524	␉␉␉}␊
1525	␉␉␉push @paragraph, @text;␊
1526	␉␉}␊
1527	␊
1528	␉␉# Next tag␊
1529	␉␉($eof,@text)=$self->get_string_until('<',{remove=>1});␊
1530	␉␉if ($#text > 0) {␊
1531	␉␉␉# Check if text (extracted after the inline tag)␊
1532	␉␉␉# has to be translated␊
1533	␉␉␉push @paragraph, @text;␊
1534	␉␉}␊
1535	␉}␊
1536	␊
1537	␉# This strips the extracted strings␊
1538	␉# (only if you don't specify the 'nostrip' option, and if the␊
1539	␉# paragraph can be re-wrapped)␊
1540	␉$translate = $self->get_translate_options($self->get_path);␊
1541	␉if (!$self->{options}{'nostrip'} and $translate !~ m/W/) {␊
1542	␉␉my $clean = 0;␊
1543	␉␉# Clean the beginning␊
1544	␉␉while (!$clean and $#paragraph > 0) {␊
1545	␉␉␉$paragraph[0] =~ /^(\s)(.)/s;␊
1546	␉␉␉my $match = $1;␊
1547	␉␉␉if ($paragraph[0] eq $match) {␊
1548	␉␉␉␉if ($match ne "") {␊
1549	␉␉␉␉␉$self->pushline($match);␊
1550	␉␉␉␉}␊
1551	␉␉␉␉shift @paragraph;␊
1552	␉␉␉␉shift @paragraph;␊
1553	␉␉␉} else {␊
1554	␉␉␉␉$paragraph[0] = $2;␊
1555	␉␉␉␉if ($match ne "") {␊
1556	␉␉␉␉␉$self->pushline($match);␊
1557	␉␉␉␉}␊
1558	␉␉␉␉$clean = 1;␊
1559	␉␉␉}␊
1560	␉␉}␊
1561	␉␉$clean = 0;␊
1562	␉␉# Clean the end␊
1563	␉␉while (!$clean and $#paragraph > 0) {␊
1564	␉␉␉$paragraph[$#paragraph-1] =~ /^(.?)(\s)$/s;␊
1565	␉␉␉my $match = $2;␊
1566	␉␉␉if ($paragraph[$#paragraph-1] eq $match) {␊
1567	␉␉␉␉if ($match ne "") {␊
1568	␉␉␉␉␉$blank = $match.$blank;␊
1569	␉␉␉␉}␊
1570	␉␉␉␉pop @paragraph;␊
1571	␉␉␉␉pop @paragraph;␊
1572	␉␉␉} else {␊
1573	␉␉␉␉$paragraph[$#paragraph-1] = $1;␊
1574	␉␉␉␉if ($match ne "") {␊
1575	␉␉␉␉␉$blank = $match.$blank;␊
1576	␉␉␉␉}␊
1577	␉␉␉␉$clean = 1;␊
1578	␉␉␉}␊
1579	␉␉}␊
1580	␉}␊
1581	␊
1582	␉# Translate the string when needed␊
1583	␉# This will either push the translation in the translated document or␊
1584	␉# in the current holder translation.␊
1585	␉$self->translate_paragraph(@paragraph);␊
1586	␊
1587	␉# Push the trailing blanks␊
1588	␉if ($blank ne "") {␊
1589	␉␉$self->pushline($blank);␊
1590	␉}␊
1591	␉return $eof;␊
1592	}␊
1593	␊
1594	# Translate a @paragraph array of (string, reference).␊
1595	# The $translate argument indicates if the strings must be translated or␊
1596	# just pushed␊
1597	sub translate_paragraph {␊
1598	␉my $self = shift;␊
1599	␉my @paragraph = @_;␊
1600	␉my $translate = $self->get_translate_options($self->get_path);␊
1601	␊
1602	␉while ( (scalar @paragraph)␊
1603	␉ and ($paragraph[0] =~ m/^\s*\n/s)) {␊
1604	␉␉$self->pushline($paragraph[0]);␊
1605	␉␉shift @paragraph;␊
1606	␉␉shift @paragraph;␊
1607	␉}␊
1608	␊
1609	␉my $comments;␊
1610	␉while (@comments) {␊
1611	␉␉my ($comment,$eoc);␊
1612	␉␉do {␊
1613	␉␉␉my ($t,$l) = (shift @comments, shift @comments);␊
1614	␉␉␉$t =~ s/\n?(\0)?$//;␊
1615	␉␉␉$eoc = $1;␊
1616	␉␉␉$comment .= "\n" if defined $comment;␊
1617	␉␉␉$comment .= $t;␊
1618	␉␉} until ($eoc);␊
1619	␉␉$comments .= "\n" if defined $comments;␊
1620	␉␉$comments .= $comment;␊
1621	␉␉$self->pushline("<!--".$comment."-->\n") if defined $comment;␊
1622	␉}␊
1623	␉@comments = ();␊
1624	␊
1625	␉if ($self->{options}{'cpp'}) {␊
1626	␉␉my @tmp = @paragraph;␊
1627	␉␉@paragraph = ();␊
1628	␉␉while (@tmp) {␊
1629	␉␉␉my ($t,$l) = (shift @tmp, shift @tmp);␊
1630	␉␉␉# #include can be followed by a filename between␊
1631	␉␉␉# <> brackets. In that case, the argument won't be␊
1632	␉␉␉# handled in the same call to translate_paragraph.␊
1633	␉␉␉# Thus do not try to match "include ".␊
1634	␉␉␉if ($t =~ m/^#[ \t]*(if \|endif\|undef \|include\|else\|ifdef \|ifndef \|define )/si) {␊
1635	␉␉␉␉if (@paragraph) {␊
1636	␉␉␉␉␉$self->translate_paragraph(@paragraph);␊
1637	␉␉␉␉␉@paragraph = ();␊
1638	␉␉␉␉␉$self->pushline("\n");␊
1639	␉␉␉␉}␊
1640	␉␉␉␉$self->pushline($t);␊
1641	␉␉␉} else {␊
1642	␉␉␉␉push @paragraph, ($t,$l);␊
1643	␉␉␉}␊
1644	␉␉}␊
1645	␉}␊
1646	␊
1647	␉my $para = $self->join_lines(@paragraph);␊
1648	␉if ( length($para) > 0 ) {␊
1649	␉␉if ($translate ne "") {␊
1650	␉␉␉# This tag should be translated␊
1651	␉␉␉$self->pushline($self->found_string(␊
1652	␉␉␉␉$para,␊
1653	␉␉␉␉$paragraph[1], {␊
1654	␉␉␉␉␉type=>"tag",␊
1655	␉␉␉␉␉tag_options=>$translate,␊
1656	␉␉␉␉␉comments=>$comments␊
1657	␉␉␉␉}));␊
1658	␉␉} else {␊
1659	␉␉␉# Inform that this tag isn't translated in debug mode␊
1660	␉␉␉print wrap_ref_mod($paragraph[1], "po4a::xml", dgettext ("po4a", "Content of tag %s excluded: %s"), $self->get_path, $para)␊
1661	␉␉␉ if $self->debug();␊
1662	␉␉␉$self->pushline($self->recode_skipped_text($para));␊
1663	␉␉}␊
1664	␉}␊
1665	␉# Now the paragraph is fully translated.␊
1666	␉# If we have all the holders' translation, we can replace the␊
1667	␉# placeholders by their translations.␊
1668	␉# We must wait to have all the translations because the holders are␊
1669	␉# numbered.␊
1670	␉{␊
1671	␉␉my $holder = $save_holders[$#save_holders];␊
1672	␉␉my $translation = $holder->{'translation'};␊
1673	␊
1674	␉␉# Count the number of <placeholder ...> in $translation␊
1675	␉␉my $count = 0;␊
1676	␉␉my $str = $translation;␊
1677	␉␉while ( (defined $str)␊
1678	␉␉ and ($str =~ m/^.?<placeholder\s+type="[^"]+"\s+id="(\d+)"\s\/>(.*)$/s)) {␊
1679	␉␉␉$count += 1;␊
1680	␉␉␉$str = $2;␊
1681	␉␉␉if ($holder->{'sub_translations'}->[$1] =~ m/<placeholder\s+type="[^"]+"\s+id="(\d+)"\s*\/>/s) {␊
1682	␉␉␉␉$count = -1;␊
1683	␉␉␉␉last;␊
1684	␉␉␉}␊
1685	␉␉}␊
1686	␊
1687	␉␉if ( (defined $translation)␊
1688	␉␉ and (scalar(@{$holder->{'sub_translations'}}) == $count)) {␊
1689	␉␉␉# OK, all the holders of the current paragraph are␊
1690	␉␉␉# closed (and translated).␊
1691	␉␉␉# Replace them by their translation.␊
1692	␉␉␉while ($translation =~ m/^(.?)<placeholder\s+type="[^"]+"\s+id="(\d+)"\s\/>(.*)$/s) {␊
1693	␉␉␉␉# FIXME: we could also check that␊
1694	␉␉␉␉# * the holder exists␊
1695	␉␉␉␉# * all the holders are used␊
1696	␉␉␉␉$translation = $1.$holder->{'sub_translations'}->[$2].$3;␊
1697	␉␉␉}␊
1698	␉␉␉# We have our translation␊
1699	␉␉␉$holder->{'translation'} = $translation;␊
1700	␉␉␉# And there is no need for any holder in it.␊
1701	␉␉␉my @sub_translations = ();␊
1702	␉␉␉$holder->{'sub_translations'} = \@sub_translations;␊
1703	␉␉}␊
1704	␉}␊
1705	␊
1706	}␊
1707	␊
1708	␊
1709	␊
1710	=head2 WORKING WITH THE MODULE OPTIONS␊
1711	␊
1712	=over 4␊
1713	␊
1714	=item treat_options()␊
1715	␊
1716	This function fills the internal structures that contain the tags, attributes␊
1717	and inline data with the options of the module (specified in the command-line␊
1718	or in the initialize function).␊
1719	␊
1720	=back␊
1721	␊
1722	=cut␊
1723	␊
1724	sub treat_options {␊
1725	␉my $self = shift;␊
1726	␊
1727	␉if ($self->{options}{'caseinsensitive'}) {␊
1728	␉␉$self->{options}{'nodefault'} = lc $self->{options}{'nodefault'};␊
1729	␉␉$self->{options}{'tags'} = lc $self->{options}{'tags'};␊
1730	␉␉$self->{options}{'break'} = lc $self->{options}{'break'};␊
1731	␉␉$self->{options}{'_default_break'} = lc $self->{options}{'_default_break'};␊
1732	␉␉$self->{options}{'translated'} = lc $self->{options}{'translated'};␊
1733	␉␉$self->{options}{'_default_translated'} = lc $self->{options}{'_default_translated'};␊
1734	␉␉$self->{options}{'untranslated'} = lc $self->{options}{'untranslated'};␊
1735	␉␉$self->{options}{'_default_untranslated'} = lc $self->{options}{'_default_untranslated'};␊
1736	␉␉$self->{options}{'attributes'} = lc $self->{options}{'attributes'};␊
1737	␉␉$self->{options}{'_default_attributes'} = lc $self->{options}{'_default_attributes'};␊
1738	␉␉$self->{options}{'inline'} = lc $self->{options}{'inline'};␊
1739	␉␉$self->{options}{'_default_inline'} = lc $self->{options}{'_default_inline'};␊
1740	␉␉$self->{options}{'placeholder'} = lc $self->{options}{'placeholder'};␊
1741	␉␉$self->{options}{'_default_placeholder'} = lc $self->{options}{'_default_placeholder'};␊
1742	␉␉$self->{options}{'customtag'} = lc $self->{options}{'customtag'};␊
1743	␉␉$self->{options}{'_default_customtag'} = lc $self->{options}{'_default_customtag'};␊
1744	␉}␊
1745	␊
1746	␉$self->{options}{'nodefault'} =~ /^\s(.)\s*$/s;␊
1747	␉my %list_nodefault;␊
1748	␉foreach (split(/\s+/s,$1)) {␊
1749	␉␉$list_nodefault{$_} = 1;␊
1750	␉}␊
1751	␉$self->{nodefault} = \%list_nodefault;␊
1752	␊
1753	␉$self->{options}{'tags'} =~ /^\s(.)\s*$/s;␊
1754	␉if (length $self->{options}{'tags'}) {␊
1755	␉␉warn wrap_mod("po4a::xml",␊
1756	␉␉ dgettext("po4a",␊
1757	␉␉ "The '%s' option is deprecated. Please use the translated/untranslated and/or break/inline/placeholder categories."), "tags");␊
1758	␉}␊
1759	␉foreach (split(/\s+/s,$1)) {␊
1760	␉␉$_ =~ m/^(.?)(<.)$/;␊
1761	␉␉$self->{tags}->{$2} = $1 \|\| "";␊
1762	␉}␊
1763	␊
1764	␉if ($self->{options}{'tagsonly'}) {␊
1765	␉␉warn wrap_mod("po4a::xml",␊
1766	␉␉ dgettext("po4a",␊
1767	␉␉ "The '%s' option is deprecated. Please use the translated/untranslated and/or break/inline/placeholder categories."), "tagsonly");␊
1768	␉}␊
1769	␊
1770	␉$self->{options}{'break'} =~ /^\s(.)\s*$/s;␊
1771	␉foreach my $tag (split(/\s+/s,$1)) {␊
1772	␉␉$tag =~ m/^(.?)(<.)$/;␊
1773	␉␉$self->{break}->{$2} = $1 \|\| "";␊
1774	␉}␊
1775	␉$self->{options}{'_default_break'} =~ /^\s(.)\s*$/s;␊
1776	␉foreach my $tag (split(/\s+/s,$1)) {␊
1777	␉␉$tag =~ m/^(.?)(<.)$/;␊
1778	␉␉$self->{break}->{$2} = $1 \|\| ""␊
1779	␉␉␉unless $list_nodefault{$2}␊
1780	␉␉␉ or defined $self->{break}->{$2};␊
1781	␉}␊
1782	␊
1783	␉$self->{options}{'translated'} =~ /^\s(.)\s*$/s;␊
1784	␉foreach my $tag (split(/\s+/s,$1)) {␊
1785	␉␉$tag =~ m/^(.?)(<.)$/;␊
1786	␉␉$self->{translated}->{$2} = $1 \|\| "";␊
1787	␉}␊
1788	␉$self->{options}{'_default_translated'} =~ /^\s(.)\s*$/s;␊
1789	␉foreach my $tag (split(/\s+/s,$1)) {␊
1790	␉␉$tag =~ m/^(.?)(<.)$/;␊
1791	␉␉$self->{translated}->{$2} = $1 \|\| ""␊
1792	␉␉␉unless $list_nodefault{$2}␊
1793	␉␉␉ or defined $self->{translated}->{$2};␊
1794	␉}␊
1795	␊
1796	␉$self->{options}{'untranslated'} =~ /^\s(.)\s*$/s;␊
1797	␉foreach my $tag (split(/\s+/s,$1)) {␊
1798	␉␉$tag =~ m/^(.?)(<.)$/;␊
1799	␉␉$self->{untranslated}->{$2} = $1 \|\| "";␊
1800	␉}␊
1801	␉$self->{options}{'_default_untranslated'} =~ /^\s(.)\s*$/s;␊
1802	␉foreach my $tag (split(/\s+/s,$1)) {␊
1803	␉␉$tag =~ m/^(.?)(<.)$/;␊
1804	␉␉$self->{untranslated}->{$2} = $1 \|\| ""␊
1805	␉␉␉unless $list_nodefault{$2}␊
1806	␉␉␉ or defined $self->{untranslated}->{$2};␊
1807	␉}␊
1808	␊
1809	␉$self->{options}{'attributes'} =~ /^\s(.)\s*$/s;␊
1810	␉foreach my $tag (split(/\s+/s,$1)) {␊
1811	␉␉if ($tag =~ m/^(.?)(<.)$/) {␊
1812	␉␉␉$self->{attributes}->{$2} = $1 \|\| "";␊
1813	␉␉} else {␊
1814	␉␉␉$self->{attributes}->{$tag} = "";␊
1815	␉␉}␊
1816	␉}␊
1817	␉$self->{options}{'_default_attributes'} =~ /^\s(.)\s*$/s;␊
1818	␉foreach my $tag (split(/\s+/s,$1)) {␊
1819	␉␉if ($tag =~ m/^(.?)(<.)$/) {␊
1820	␉␉␉$self->{attributes}->{$2} = $1 \|\| ""␊
1821	␉␉␉␉unless $list_nodefault{$2}␊
1822	␉␉␉␉ or defined $self->{attributes}->{$2};␊
1823	␉␉} else {␊
1824	␉␉␉$self->{attributes}->{$tag} = ""␊
1825	␉␉␉␉unless $list_nodefault{$tag}␊
1826	␉␉␉␉ or defined $self->{attributes}->{$tag};␊
1827	␉␉}␊
1828	␉}␊
1829	␊
1830	␉$self->{options}{'inline'} =~ /^\s(.)\s*$/s;␊
1831	␉foreach my $tag (split(/\s+/s,$1)) {␊
1832	␉␉$tag =~ m/^(.?)(<.)$/;␊
1833	␉␉$self->{inline}->{$2} = $1 \|\| "";␊
1834	␉}␊
1835	␉$self->{options}{'_default_inline'} =~ /^\s(.)\s*$/s;␊
1836	␉foreach my $tag (split(/\s+/s,$1)) {␊
1837	␉␉$tag =~ m/^(.?)(<.)$/;␊
1838	␉␉$self->{inline}->{$2} = $1 \|\| ""␊
1839	␉␉␉unless $list_nodefault{$2}␊
1840	␉␉␉ or defined $self->{inline}->{$2};␊
1841	␉}␊
1842	␊
1843	␉$self->{options}{'placeholder'} =~ /^\s(.)\s*$/s;␊
1844	␉foreach my $tag (split(/\s+/s,$1)) {␊
1845	␉␉$tag =~ m/^(.?)(<.)$/;␊
1846	␉␉$self->{placeholder}->{$2} = $1 \|\| "";␊
1847	␉}␊
1848	␉$self->{options}{'_default_placeholder'} =~ /^\s(.)\s*$/s;␊
1849	␉foreach my $tag (split(/\s+/s,$1)) {␊
1850	␉␉$tag =~ m/^(.?)(<.)$/;␊
1851	␉␉$self->{placeholder}->{$2} = $1 \|\| ""␊
1852	␉␉␉unless $list_nodefault{$2}␊
1853	␉␉␉ or defined $self->{placeholder}->{$2};␊
1854	␉}␊
1855	␊
1856	␉$self->{options}{'customtag'} =~ /^\s(.)\s*$/s;␊
1857	␉foreach my $tag (split(/\s+/s,$1)) {␊
1858	␉␉$tag =~ m/^(.?)(<.)$/;␊
1859	␉␉$self->{customtag}->{$2} = $1 \|\| "";␊
1860	␉}␊
1861	␉$self->{options}{'_default_customtag'} =~ /^\s(.)\s*$/s;␊
1862	␉foreach my $tag (split(/\s+/s,$1)) {␊
1863	␉␉$tag =~ m/^(.?)(<.)$/;␊
1864	␉␉$self->{customtag}->{$2} = $1 \|\| ""␊
1865	␉␉␉unless $list_nodefault{$2}␊
1866	␉␉␉ or defined $self->{customtag}->{$2};␊
1867	␉}␊
1868	␊
1869	␉# There should be no translated and untranslated tags␊
1870	␉foreach my $tag (keys %{$self->{translated}}) {␊
1871	␉␉die wrap_mod("po4a::xml",␊
1872	␉␉ dgettext("po4a",␊
1873	␉␉ "Tag '%s' both in the %s and %s categories."), $tag, "translated", "untranslated")␊
1874	␉␉␉if defined $self->{untranslated}->{$tag};␊
1875	␉}␊
1876	␉# There should be no inline, break, placeholder, and customtag tags␊
1877	␉foreach my $tag (keys %{$self->{inline}}) {␊
1878	␉␉die wrap_mod("po4a::xml",␊
1879	␉␉ dgettext("po4a",␊
1880	␉␉ "Tag '%s' both in the %s and %s categories."), $tag, "inline", "break")␊
1881	␉␉␉if defined $self->{break}->{$tag};␊
1882	␉␉die wrap_mod("po4a::xml",␊
1883	␉␉ dgettext("po4a",␊
1884	␉␉ "Tag '%s' both in the %s and %s categories."), $tag, "inline", "placeholder")␊
1885	␉␉␉if defined $self->{placeholder}->{$tag};␊
1886	␉␉die wrap_mod("po4a::xml",␊
1887	␉␉ dgettext("po4a",␊
1888	␉␉ "Tag '%s' both in the %s and %s categories."), $tag, "inline", "customtag")␊
1889	␉␉␉if defined $self->{customtag}->{$tag};␊
1890	␉}␊
1891	␉foreach my $tag (keys %{$self->{break}}) {␊
1892	␉␉die wrap_mod("po4a::xml",␊
1893	␉␉ dgettext("po4a",␊
1894	␉␉ "Tag '%s' both in the %s and %s categories."), $tag, "break", "placeholder")␊
1895	␉␉␉if defined $self->{placeholder}->{$tag};␊
1896	␉␉die wrap_mod("po4a::xml",␊
1897	␉␉ dgettext("po4a",␊
1898	␉␉ "Tag '%s' both in the %s and %s categories."), $tag, "break", "customtag")␊
1899	␉␉␉if defined $self->{customtag}->{$tag};␊
1900	␉}␊
1901	␉foreach my $tag (keys %{$self->{placeholder}}) {␊
1902	␉␉die wrap_mod("po4a::xml",␊
1903	␉␉ dgettext("po4a",␊
1904	␉␉ "Tag '%s' both in the %s and %s categories."), $tag, "placeholder", "customtag")␊
1905	␉␉␉if defined $self->{customtag}->{$tag};␊
1906	␉}␊
1907	}␊
1908	␊
1909	=head2 GETTING TEXT FROM THE INPUT DOCUMENT␊
1910	␊
1911	=over␊
1912	␊
1913	=item get_string_until($%)␊
1914	␊
1915	This function returns an array with the lines (and references) from the input␊
1916	document until it finds the first argument. The second argument is an options␊
1917	hash. Value 0 means disabled (the default) and 1, enabled.␊
1918	␊
1919	The valid options are:␊
1920	␊
1921	=over 4␊
1922	␊
1923	=item B<include>␊
1924	␊
1925	This makes the returned array to contain the searched text␊
1926	␊
1927	=item B<remove>␊
1928	␊
1929	This removes the returned stream from the input␊
1930	␊
1931	=item B<unquoted>␊
1932	␊
1933	This ensures that the searched text is outside any quotes␊
1934	␊
1935	=back␊
1936	␊
1937	=cut␊
1938	␊
1939	sub get_string_until {␊
1940	␉my ($self,$search) = (shift,shift);␊
1941	␉my $options = shift;␊
1942	␉my ($include,$remove,$unquoted, $regex) = (0,0,0,0);␊
1943	␊
1944	␉if (defined($options->{include})) { $include = $options->{include}; }␊
1945	␉if (defined($options->{remove})) { $remove = $options->{remove}; }␊
1946	␉if (defined($options->{unquoted})) { $unquoted = $options->{unquoted}; }␊
1947	␉if (defined($options->{regex})) { $regex = $options->{regex}; }␊
1948	␊
1949	␉my ($line,$ref) = $self->shiftline();␊
1950	␉my (@text,$paragraph);␊
1951	␉my ($eof,$found) = (0,0);␊
1952	␊
1953	␉$search = "\Q$search\E" unless $regex;␊
1954	␉while (defined($line) and !$found) {␊
1955	␉␉push @text, ($line,$ref);␊
1956	␉␉$paragraph .= $line;␊
1957	␉␉if ($unquoted) {␊
1958	␉␉␉if ( $paragraph =~ /^((\".?\")\|(\'.?\')\|[^\"\'])*$search/s ) {␊
1959	␉␉␉␉$found = 1;␊
1960	␉␉␉}␊
1961	␉␉} else {␊
1962	␉␉␉if ( $paragraph =~ /$search/s ) {␊
1963	␉␉␉␉$found = 1;␊
1964	␉␉␉}␊
1965	␉␉}␊
1966	␉␉if (!$found) {␊
1967	␉␉␉($line,$ref)=$self->shiftline();␊
1968	␉␉}␊
1969	␉}␊
1970	␊
1971	␉if (!defined($line)) { $eof = 1; }␊
1972	␊
1973	␉if ( $found ) {␊
1974	␉␉$line = "";␊
1975	␉␉if($unquoted) {␊
1976	␉␉␉$paragraph =~ /^(?:(?:\".?\")\|(?:\'.?\')\|[^\"\'])?$search(.)$/s;␊
1977	␉␉␉$line = $1;␊
1978	␉␉␉$text[$#text-1] =~ s/\Q$line\E$//s;␊
1979	␉␉} else {␊
1980	␉␉␉$paragraph =~ /$search(.*)$/s;␊
1981	␉␉␉$line = $1;␊
1982	␉␉␉$text[$#text-1] =~ s/\Q$line\E$//s;␊
1983	␉␉}␊
1984	␉␉if(!$include) {␊
1985	␉␉␉$text[$#text-1] =~ /^(.)($search.)$/s;␊
1986	␉␉␉$text[$#text-1] = $1;␊
1987	␉␉␉$line = $2.$line;␊
1988	␉␉}␊
1989	␉␉if (defined($line) and ($line ne "")) {␊
1990	␉␉␉$self->unshiftline ($line,$text[$#text]);␊
1991	␉␉}␊
1992	␉}␊
1993	␉if (!$remove) {␊
1994	␉␉$self->unshiftline (@text);␊
1995	␉}␊
1996	␊
1997	␉#If we get to the end of the file, we return the whole paragraph␊
1998	␉return ($eof,@text);␊
1999	}␊
2000	␊
2001	=item skip_spaces(\@)␊
2002	␊
2003	This function receives as argument the reference to a paragraph (in the format␊
2004	returned by get_string_until), skips his heading spaces and returns them as␊
2005	a simple string.␊
2006	␊
2007	=cut␊
2008	␊
2009	sub skip_spaces {␊
2010	␉my ($self,$pstring)=@_;␊
2011	␉my $space="";␊
2012	␊
2013	␉while (@$pstring and (@$pstring[0] =~ /^(\s+)(.*)$/s or @$pstring[0] eq "")) {␊
2014	␉␉if (@$pstring[0] ne "") {␊
2015	␉␉␉$space .= $1;␊
2016	␉␉␉@$pstring[0] = $2;␊
2017	␉␉}␊
2018	␊
2019	␉␉if (@$pstring[0] eq "") {␊
2020	␉␉␉shift @$pstring;␊
2021	␉␉␉shift @$pstring;␊
2022	␉␉}␊
2023	␉}␊
2024	␉return $space;␊
2025	}␊
2026	␊
2027	=item join_lines(@)␊
2028	␊
2029	This function returns a simple string with the text from the argument array␊
2030	(discarding the references).␊
2031	␊
2032	=cut␊
2033	␊
2034	sub join_lines {␊
2035	␉my ($self,@lines)=@_;␊
2036	␉my ($line,$ref);␊
2037	␉my $text = "";␊
2038	␉while ($#lines > 0) {␊
2039	␉␉($line,$ref) = (shift @lines,shift @lines);␊
2040	␉␉$text .= $line;␊
2041	␉}␊
2042	␉return $text;␊
2043	}␊
2044	␊
2045	=back␊
2046	␊
2047	=head1 STATUS OF THIS MODULE␊
2048	␊
2049	This module can translate tags and attributes.␊
2050	␊
2051	=head1 TODO LIST␊
2052	␊
2053	DOCTYPE (ENTITIES)␊
2054	␊
2055	There is a minimal support for the translation of entities. They are␊
2056	translated as a whole, and tags are not taken into account. Multilines␊
2057	entities are not supported and entities are always rewrapped during the␊
2058	translation.␊
2059	␊
2060	MODIFY TAG TYPES FROM INHERITED MODULES␊
2061	(move the tag_types structure inside the $self hash?)␊
2062	␊
2063	=head1 SEE ALSO␊
2064	␊
2065	L<Locale::Po4a::TransTractor(3pm)\|Locale::Po4a::TransTractor>,␊
2066	L<po4a(7)\|po4a.7>␊
2067	␊
2068	=head1 AUTHORS␊
2069	␊
2070	Jordi Vilalta <jvprat@gmail.com>␊
2071	Nicolas François <nicolas.francois@centraliens.net>␊
2072	␊
2073	=head1 COPYRIGHT AND LICENSE␊
2074	␊
2075	Copyright (c) 2004 by Jordi Vilalta <jvprat@gmail.com>␊
2076	Copyright (c) 2008-2009 by Nicolas François <nicolas.francois@centraliens.net>␊
2077	␊
2078	This program is free software; you may redistribute it and/or modify it␊
2079	under the terms of GPL (see the COPYING file).␊
2080	␊
2081	=cut␊
2082	␊
2083	1;␊
2084

Download this file