Root/
Source at commit 2462 created 9 years 8 months ago. By ifabio, Split out states generator from acpi_patcher (Credits to Clover Teams) | |
---|---|
1 | #!/usr/bin/perl␊ |
2 | ␊ |
3 | # Po4a::Xml.pm␊ |
4 | #␊ |
5 | # extract and translate translatable strings from XML documents.␊ |
6 | #␊ |
7 | # This code extracts plain text from tags and attributes from generic␊ |
8 | # XML documents, and it can be used as a base to build modules for␊ |
9 | # XML-based documents.␊ |
10 | #␊ |
11 | # Copyright (c) 2004 by Jordi Vilalta <jvprat@gmail.com>␊ |
12 | # Copyright (c) 2008-2009 by Nicolas François <nicolas.francois@centraliens.net>␊ |
13 | #␊ |
14 | # This program is free software; you can redistribute it and/or modify␊ |
15 | # it under the terms of the GNU General Public License as published by␊ |
16 | # the Free Software Foundation; either version 2 of the License, or␊ |
17 | # (at your option) any later version.␊ |
18 | #␊ |
19 | # This program is distributed in the hope that it will be useful,␊ |
20 | # but WITHOUT ANY WARRANTY; without even the implied warranty of␊ |
21 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the␊ |
22 | # GNU General Public License for more details.␊ |
23 | #␊ |
24 | # You should have received a copy of the GNU General Public License␊ |
25 | # along with this program; if not, write to the Free Software␊ |
26 | # Foundation, Inc.,␊ |
27 | # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA␊ |
28 | #␊ |
29 | ########################################################################␊ |
30 | ␊ |
31 | =encoding UTF-8␊ |
32 | ␊ |
33 | =head1 NAME␊ |
34 | ␊ |
35 | Locale::Po4a::Xml - convert XML documents and derivates from/to PO files␊ |
36 | ␊ |
37 | =head1 DESCRIPTION␊ |
38 | ␊ |
39 | The po4a (PO for anything) project goal is to ease translations (and more␊ |
40 | interestingly, the maintenance of translations) using gettext tools on␊ |
41 | areas where they were not expected like documentation.␊ |
42 | ␊ |
43 | Locale::Po4a::Xml is a module to help the translation of XML documents into␊ |
44 | other [human] languages. It can also be used as a base to build modules for␊ |
45 | XML-based documents.␊ |
46 | ␊ |
47 | =cut␊ |
48 | ␊ |
49 | package Locale::Po4a::Xml;␊ |
50 | ␊ |
51 | use 5.006;␊ |
52 | use strict;␊ |
53 | use warnings;␊ |
54 | ␊ |
55 | require Exporter;␊ |
56 | use vars qw(@ISA @EXPORT);␊ |
57 | @ISA = qw(Locale::Po4a::TransTractor);␊ |
58 | @EXPORT = qw(new initialize @tag_types);␊ |
59 | ␊ |
60 | use Locale::Po4a::TransTractor;␊ |
61 | use Locale::Po4a::Common;␊ |
62 | use Carp qw(croak);␊ |
63 | use File::Basename;␊ |
64 | use File::Spec;␊ |
65 | ␊ |
66 | #It will mantain the path from the root tag to the current one␊ |
67 | my @path;␊ |
68 | ␊ |
69 | #It will contain a list of external entities and their attached paths␊ |
70 | my %entities;␊ |
71 | ␊ |
72 | my @comments;␊ |
73 | my %translate_options_cache;␊ |
74 | ␊ |
75 | my $_shiftline_in_comment = 0;␊ |
76 | sub shiftline {␊ |
77 | my $self = shift;␊ |
78 | # call Transtractor's shiftline␊ |
79 | my ($line,$ref) = $self->SUPER::shiftline();␊ |
80 | return ($line,$ref) if (not defined $line);␊ |
81 | ␊ |
82 | if ($self->{options}{'includeexternal'}) {␊ |
83 | my $tmp;␊ |
84 | ␊ |
85 | for my $k (keys %entities) {␊ |
86 | if ($line =~ m/^(.*?)&$k;(.*)$/s) {␊ |
87 | my ($before, $after) = ($1, $2);␊ |
88 | my $linenum=0;␊ |
89 | my @textentries;␊ |
90 | ␊ |
91 | $tmp = $before;␊ |
92 | my $tmp_in_comment = 0;␊ |
93 | if ($_shiftline_in_comment) {␊ |
94 | if ($before =~ m/^.*?-->(.*)$/s) {␊ |
95 | $tmp = $1;␊ |
96 | $tmp_in_comment = 0;␊ |
97 | } else {␊ |
98 | $tmp_in_comment = 1;␊ |
99 | }␊ |
100 | }␊ |
101 | if ($tmp_in_comment == 0) {␊ |
102 | while ($tmp =~ m/^.*?<!--.*?-->(.*)$/s) {␊ |
103 | $tmp = $1;␊ |
104 | }␊ |
105 | if ($tmp =~ m/<!--/s) {␊ |
106 | $tmp_in_comment = 1;␊ |
107 | }␊ |
108 | }␊ |
109 | next if ($tmp_in_comment);␊ |
110 | ␊ |
111 | open (my $in, $entities{$k})␊ |
112 | or croak wrap_mod("po4a::xml",␊ |
113 | dgettext("po4a", "Can't read from %s: %s"),␊ |
114 | $entities{$k}, $!);␊ |
115 | while (defined (my $textline = <$in>)) {␊ |
116 | $linenum++;␊ |
117 | my $textref=$entities{$k}.":$linenum";␊ |
118 | push @textentries, ($textline,$textref);␊ |
119 | }␊ |
120 | close $in␊ |
121 | or croak wrap_mod("po4a::xml",␊ |
122 | dgettext("po4a", "Can't close %s after reading: %s"),␊ |
123 | $entities{$k}, $!);␊ |
124 | ␊ |
125 | push @textentries, ($after, $ref);␊ |
126 | $line = $before.(shift @textentries);␊ |
127 | $ref .= " ".(shift @textentries);␊ |
128 | $self->unshiftline(@textentries);␊ |
129 | }␊ |
130 | }␊ |
131 | ␊ |
132 | $tmp = $line;␊ |
133 | if ($_shiftline_in_comment) {␊ |
134 | if ($line =~ m/^.*?-->(.*)$/s) {␊ |
135 | $tmp = $1;␊ |
136 | $_shiftline_in_comment = 0;␊ |
137 | } else {␊ |
138 | $_shiftline_in_comment = 1;␊ |
139 | }␊ |
140 | }␊ |
141 | if ($_shiftline_in_comment == 0) {␊ |
142 | while ($tmp =~ m/^.*?<!--.*?-->(.*)$/s) {␊ |
143 | $tmp = $1;␊ |
144 | }␊ |
145 | if ($tmp =~ m/<!--/s) {␊ |
146 | $_shiftline_in_comment = 1;␊ |
147 | }␊ |
148 | }␊ |
149 | }␊ |
150 | ␊ |
151 | return ($line,$ref);␊ |
152 | }␊ |
153 | ␊ |
154 | sub read {␊ |
155 | ␉my ($self,$filename)=@_;␊ |
156 | ␉push @{$self->{DOCPOD}{infile}}, $filename;␊ |
157 | ␉$self->Locale::Po4a::TransTractor::read($filename);␊ |
158 | }␊ |
159 | ␊ |
160 | sub parse {␊ |
161 | ␉my $self=shift;␊ |
162 | ␉map {$self->parse_file($_)} @{$self->{DOCPOD}{infile}};␊ |
163 | }␊ |
164 | ␊ |
165 | # @save_holders is a stack of references to ('paragraph', 'translation',␊ |
166 | # 'sub_translations', 'open', 'close', 'folded_attributes') hashes, where:␊ |
167 | # paragraph is a reference to an array (see paragraph in the␊ |
168 | # treat_content() subroutine) of strings followed by␊ |
169 | # references. It contains the @paragraph array as it was␊ |
170 | # before the processing was interrupted by a tag instroducing␊ |
171 | # a placeholder.␊ |
172 | # translation is the translation of this level up to now␊ |
173 | # sub_translations is a reference to an array of strings containing the␊ |
174 | # translations which must replace the placeholders.␊ |
175 | # open is the tag which opened the placeholder.␊ |
176 | # close is the tag which closed the placeholder.␊ |
177 | # folded_attributes is an hash of tags with their attributes (<tag attrs=...>␊ |
178 | # strings), referenced by the folded tag id, which should␊ |
179 | # replace the <tag po4a-id=id> strings in the current␊ |
180 | # translation.␊ |
181 | #␊ |
182 | # If @save_holders only has 1 holder, then we are not processing the␊ |
183 | # content of an holder, we are translating the document.␊ |
184 | my @save_holders;␊ |
185 | ␊ |
186 | ␊ |
187 | # If we are at the bottom of the stack and there is no <placeholder ...> in␊ |
188 | # the current translation, we can push the translation in the translated␊ |
189 | # document.␊ |
190 | # Otherwise, we keep the translation in the current holder.␊ |
191 | sub pushline {␊ |
192 | ␉my ($self, $line) = (shift, shift);␊ |
193 | ␊ |
194 | ␉my $holder = $save_holders[$#save_holders];␊ |
195 | ␉my $translation = $holder->{'translation'};␊ |
196 | ␉$translation .= $line;␊ |
197 | ␊ |
198 | ␉while ( %{$holder->{folded_attributes}}␊ |
199 | ␉ and $translation =~ m/^(.*)<([^>]+?)\s+po4a-id=([0-9]+)>(.*)$/s) {␊ |
200 | ␉␉my $begin = $1;␊ |
201 | ␉␉my $tag = $2;␊ |
202 | ␉␉my $id = $3;␊ |
203 | ␉␉my $end = $4;␊ |
204 | ␉␉if (defined $holder->{folded_attributes}->{$id}) {␊ |
205 | ␉␉␉# TODO: check if the tag is the same␊ |
206 | ␉␉␉$translation = $begin.$holder->{folded_attributes}->{$id}.$end;␊ |
207 | ␉␉␉delete $holder->{folded_attributes}->{$id};␊ |
208 | ␉␉} else {␊ |
209 | ␉␉␉# TODO: It will be hard to identify the location.␊ |
210 | ␉␉␉# => find a way to retrieve the reference.␊ |
211 | ␉␉␉die wrap_mod("po4a::xml", dgettext("po4a", "'po4a-id=%d' in the translation does not exist in the original string (or 'po4a-id=%d' used twice in the translation)."), $id, $id);␊ |
212 | ␉␉}␊ |
213 | ␉}␊ |
214 | # TODO: check that %folded_attributes is empty at some time␊ |
215 | # => in translate_paragraph?␊ |
216 | ␊ |
217 | ␉if ( ($#save_holders > 0)␊ |
218 | ␉ or ($translation =~ m/<placeholder\s+type="[^"]+"\s+id="(\d+)"\s*\/>/s)) {␊ |
219 | ␉␉$holder->{'translation'} = $translation;␊ |
220 | ␉} else {␊ |
221 | ␉␉$self->SUPER::pushline($translation);␊ |
222 | ␉␉$holder->{'translation'} = '';␊ |
223 | ␉}␊ |
224 | }␊ |
225 | ␊ |
226 | =head1 TRANSLATING WITH PO4A::XML␊ |
227 | ␊ |
228 | This module can be used directly to handle generic XML documents. This will␊ |
229 | extract all tag's content, and no attributes, since it's where the text is␊ |
230 | written in most XML based documents.␊ |
231 | ␊ |
232 | There are some options (described in the next section) that can customize␊ |
233 | this behavior. If this doesn't fit to your document format you're encouraged␊ |
234 | to write your own module derived from this, to describe your format's details.␊ |
235 | See the section B<WRITING DERIVATE MODULES> below, for the process description.␊ |
236 | ␊ |
237 | =cut␊ |
238 | ␊ |
239 | #␊ |
240 | # Parse file and translate it␊ |
241 | #␊ |
242 | sub parse_file {␊ |
243 | ␉my ($self,$filename) = @_;␊ |
244 | ␉my $eof = 0;␊ |
245 | ␊ |
246 | ␉while (!$eof) {␊ |
247 | ␉␉# We get all the text until the next breaking tag (not␊ |
248 | ␉␉# inline) and translate it␊ |
249 | ␉␉$eof = $self->treat_content;␊ |
250 | ␉␉if (!$eof) {␊ |
251 | ␉␉␉# And then we treat the following breaking tag␊ |
252 | ␉␉␉$eof = $self->treat_tag;␊ |
253 | ␉␉}␊ |
254 | ␉}␊ |
255 | }␊ |
256 | ␊ |
257 | =head1 OPTIONS ACCEPTED BY THIS MODULE␊ |
258 | ␊ |
259 | The global debug option causes this module to show the excluded strings, in␊ |
260 | order to see if it skips something important.␊ |
261 | ␊ |
262 | These are this module's particular options:␊ |
263 | ␊ |
264 | =over 4␊ |
265 | ␊ |
266 | =item B<nostrip>␊ |
267 | ␊ |
268 | Prevents it to strip the spaces around the extracted strings.␊ |
269 | ␊ |
270 | =item B<wrap>␊ |
271 | ␊ |
272 | Canonizes the string to translate, considering that whitespaces are not␊ |
273 | important, and wraps the translated document. This option can be overridden␊ |
274 | by custom tag options. See the "tags" option below.␊ |
275 | ␊ |
276 | =item B<caseinsensitive>␊ |
277 | ␊ |
278 | It makes the tags and attributes searching to work in a case insensitive␊ |
279 | way. If it's defined, it will treat E<lt>BooKE<gt>laNG and E<lt>BOOKE<gt>Lang as E<lt>bookE<gt>lang.␊ |
280 | ␊ |
281 | =item B<includeexternal>␊ |
282 | ␊ |
283 | When defined, external entities are included in the generated (translated)␊ |
284 | document, and for the extraction of strings. If it's not defined, you␊ |
285 | will have to translate external entities separately as independent␊ |
286 | documents.␊ |
287 | ␊ |
288 | =item B<ontagerror>␊ |
289 | ␊ |
290 | This option defines the behavior of the module when it encounter a invalid␊ |
291 | XML syntax (a closing tag which does not match the last opening tag, or a␊ |
292 | tag's attribute without value).␊ |
293 | It can take the following values:␊ |
294 | ␊ |
295 | =over␊ |
296 | ␊ |
297 | =item I<fail>␊ |
298 | ␊ |
299 | This is the default value.␊ |
300 | The module will exit with an error.␊ |
301 | ␊ |
302 | =item I<warn>␊ |
303 | ␊ |
304 | The module will continue, and will issue a warning.␊ |
305 | ␊ |
306 | =item I<silent>␊ |
307 | ␊ |
308 | The module will continue without any warnings.␊ |
309 | ␊ |
310 | =back␊ |
311 | ␊ |
312 | Be careful when using this option.␊ |
313 | It is generally recommended to fix the input file.␊ |
314 | ␊ |
315 | =item B<tagsonly>␊ |
316 | ␊ |
317 | Extracts only the specified tags in the "tags" option. Otherwise, it␊ |
318 | will extract all the tags except the ones specified.␊ |
319 | ␊ |
320 | Note: This option is deprecated.␊ |
321 | ␊ |
322 | =item B<doctype>␊ |
323 | ␊ |
324 | String that will try to match with the first line of the document's doctype␊ |
325 | (if defined). If it doesn't, a warning will indicate that the document␊ |
326 | might be of a bad type.␊ |
327 | ␊ |
328 | =item B<addlang>␊ |
329 | ␊ |
330 | String indicating the path (e.g. E<lt>bbbE<gt>E<lt>aaaE<gt>) of a tag␊ |
331 | where a lang="..." attribute shall be added. The language will be defined␊ |
332 | as the basename of the PO file without any .po extension.␊ |
333 | ␊ |
334 | =item B<tags>␊ |
335 | ␊ |
336 | Space-separated list of tags you want to translate or skip. By default,␊ |
337 | the specified tags will be excluded, but if you use the "tagsonly" option,␊ |
338 | the specified tags will be the only ones included. The tags must be in the␊ |
339 | form E<lt>aaaE<gt>, but you can join some (E<lt>bbbE<gt>E<lt>aaaE<gt>) to say that the content of␊ |
340 | the tag E<lt>aaaE<gt> will only be translated when it's into a E<lt>bbbE<gt> tag.␊ |
341 | ␊ |
342 | You can also specify some tag options putting some characters in front of␊ |
343 | the tag hierarchy. For example, you can put 'w' (wrap) or 'W' (don't wrap)␊ |
344 | to override the default behavior specified by the global "wrap" option.␊ |
345 | ␊ |
346 | Example: WE<lt>chapterE<gt>E<lt>titleE<gt>␊ |
347 | ␊ |
348 | Note: This option is deprecated.␊ |
349 | You should use the B<translated> and B<untranslated> options instead.␊ |
350 | ␊ |
351 | =item B<attributes>␊ |
352 | ␊ |
353 | Space-separated list of tag's attributes you want to translate. You can␊ |
354 | specify the attributes by their name (for example, "lang"), but you can␊ |
355 | prefix it with a tag hierarchy, to specify that this attribute will only be␊ |
356 | translated when it's into the specified tag. For example: E<lt>bbbE<gt>E<lt>aaaE<gt>lang␊ |
357 | specifies that the lang attribute will only be translated if it's into an␊ |
358 | E<lt>aaaE<gt> tag, and it's into a E<lt>bbbE<gt> tag.␊ |
359 | ␊ |
360 | =item B<foldattributes>␊ |
361 | ␊ |
362 | Do not translate attributes in inline tags.␊ |
363 | Instead, replace all attributes of a tag by po4a-id=<id>.␊ |
364 | ␊ |
365 | This is useful when attributes shall not be translated, as this simplifies the␊ |
366 | strings for translators, and avoids typos.␊ |
367 | ␊ |
368 | =item B<customtag>␊ |
369 | ␊ |
370 | Space-separated list of tags which should not be treated as tags.␊ |
371 | These tags are treated as inline, and do not need to be closed.␊ |
372 | ␊ |
373 | =item B<break>␊ |
374 | ␊ |
375 | Space-separated list of tags which should break the sequence.␊ |
376 | By default, all tags break the sequence.␊ |
377 | ␊ |
378 | The tags must be in the form <aaa>, but you can join some␊ |
379 | (<bbb><aaa>), if a tag (<aaa>) should only be considered␊ |
380 | when it's into another tag (<bbb>).␊ |
381 | ␊ |
382 | =item B<inline>␊ |
383 | ␊ |
384 | Space-separated list of tags which should be treated as inline.␊ |
385 | By default, all tags break the sequence.␊ |
386 | ␊ |
387 | The tags must be in the form <aaa>, but you can join some␊ |
388 | (<bbb><aaa>), if a tag (<aaa>) should only be considered␊ |
389 | when it's into another tag (<bbb>).␊ |
390 | ␊ |
391 | =item B<placeholder>␊ |
392 | ␊ |
393 | Space-separated list of tags which should be treated as placeholders.␊ |
394 | Placeholders do not break the sequence, but the content of placeholders is␊ |
395 | translated separately.␊ |
396 | ␊ |
397 | The location of the placeholder in its block will be marked with a string␊ |
398 | similar to:␊ |
399 | ␊ |
400 | <placeholder type=\"footnote\" id=\"0\"/>␊ |
401 | ␊ |
402 | The tags must be in the form <aaa>, but you can join some␊ |
403 | (<bbb><aaa>), if a tag (<aaa>) should only be considered␊ |
404 | when it's into another tag (<bbb>).␊ |
405 | ␊ |
406 | =item B<nodefault>␊ |
407 | ␊ |
408 | Space separated list of tags that the module should not try to set by␊ |
409 | default in any category.␊ |
410 | ␊ |
411 | =item B<cpp>␊ |
412 | ␊ |
413 | Support C preprocessor directives.␊ |
414 | When this option is set, po4a will consider preprocessor directives as␊ |
415 | paragraph separators.␊ |
416 | This is important if the XML file must be preprocessed because otherwise␊ |
417 | the directives may be inserted in the middle of lines if po4a consider it␊ |
418 | belong to the current paragraph, and they won't be recognized by the␊ |
419 | preprocessor.␊ |
420 | Note: the preprocessor directives must only appear between tags␊ |
421 | (they must not break a tag).␊ |
422 | ␊ |
423 | =item B<translated>␊ |
424 | ␊ |
425 | Space-separated list of tags you want to translate.␊ |
426 | ␊ |
427 | The tags must be in the form <aaa>, but you can join some␊ |
428 | (<bbb><aaa>), if a tag (<aaa>) should only be considered␊ |
429 | when it's into another tag (<bbb>).␊ |
430 | ␊ |
431 | You can also specify some tag options putting some characters in front of␊ |
432 | the tag hierarchy. For example, you can put 'w' (wrap) or 'W' (don't wrap)␊ |
433 | to overide the default behavior specified by the global "wrap" option.␊ |
434 | ␊ |
435 | Example: WE<lt>chapterE<gt>E<lt>titleE<gt>␊ |
436 | ␊ |
437 | =item B<untranslated>␊ |
438 | ␊ |
439 | Space-separated list of tags you do not want to translate.␊ |
440 | ␊ |
441 | The tags must be in the form <aaa>, but you can join some␊ |
442 | (<bbb><aaa>), if a tag (<aaa>) should only be considered␊ |
443 | when it's into another tag (<bbb>).␊ |
444 | ␊ |
445 | =item B<defaulttranslateoption>␊ |
446 | ␊ |
447 | The default categories for tags that are not in any of the translated,␊ |
448 | untranslated, break, inline, or placeholder.␊ |
449 | ␊ |
450 | This is a set of letters:␊ |
451 | ␊ |
452 | =over␊ |
453 | ␊ |
454 | =item I<w>␊ |
455 | ␊ |
456 | Tags should be translated and content can be re-wrapped.␊ |
457 | ␊ |
458 | =item I<W>␊ |
459 | ␊ |
460 | Tags should be translated and content should not be re-wrapped.␊ |
461 | ␊ |
462 | =item I<i>␊ |
463 | ␊ |
464 | Tags should be translated inline.␊ |
465 | ␊ |
466 | =item I<p>␊ |
467 | ␊ |
468 | Tags should be translated as placeholders.␊ |
469 | ␊ |
470 | =back␊ |
471 | ␊ |
472 | =back␊ |
473 | ␊ |
474 | =cut␊ |
475 | # TODO: defaulttranslateoption␊ |
476 | # w => indicate that it is only valid for translatable tags and do not␊ |
477 | # care about inline/break/placeholder?␊ |
478 | # ...␊ |
479 | ␊ |
480 | sub initialize {␊ |
481 | ␉my $self = shift;␊ |
482 | ␉my %options = @_;␊ |
483 | ␊ |
484 | ␉# Reset the path␊ |
485 | ␉@path = ();␊ |
486 | ␊ |
487 | ␉# Initialize the stack of holders␊ |
488 | ␉my @paragraph = ();␊ |
489 | ␉my @sub_translations = ();␊ |
490 | ␉my %folded_attributes;␊ |
491 | ␉my %holder = ('paragraph' => \@paragraph,␊ |
492 | ␉ 'translation' => "",␊ |
493 | ␉ 'sub_translations' => \@sub_translations,␊ |
494 | ␉ 'folded_attributes' => \%folded_attributes);␊ |
495 | ␉@save_holders = (\%holder);␊ |
496 | ␊ |
497 | ␉$self->{options}{'addlang'}=0;␊ |
498 | ␉$self->{options}{'nostrip'}=0;␊ |
499 | ␉$self->{options}{'wrap'}=0;␊ |
500 | ␉$self->{options}{'caseinsensitive'}=0;␊ |
501 | ␉$self->{options}{'tagsonly'}=0;␊ |
502 | ␉$self->{options}{'tags'}='';␊ |
503 | ␉$self->{options}{'break'}='';␊ |
504 | ␉$self->{options}{'translated'}='';␊ |
505 | ␉$self->{options}{'untranslated'}='';␊ |
506 | ␉$self->{options}{'defaulttranslateoption'}='';␊ |
507 | ␉$self->{options}{'attributes'}='';␊ |
508 | ␉$self->{options}{'foldattributes'}=0;␊ |
509 | ␉$self->{options}{'inline'}='';␊ |
510 | ␉$self->{options}{'placeholder'}='';␊ |
511 | ␉$self->{options}{'customtag'}='';␊ |
512 | ␉$self->{options}{'doctype'}='';␊ |
513 | ␉$self->{options}{'nodefault'}='';␊ |
514 | ␉$self->{options}{'includeexternal'}=0;␊ |
515 | ␉$self->{options}{'ontagerror'}="fail";␊ |
516 | ␉$self->{options}{'cpp'}=0;␊ |
517 | ␊ |
518 | ␉$self->{options}{'verbose'}='';␊ |
519 | ␉$self->{options}{'debug'}='';␊ |
520 | ␊ |
521 | ␉foreach my $opt (keys %options) {␊ |
522 | ␉␉if ($options{$opt}) {␊ |
523 | ␉␉␉die wrap_mod("po4a::xml",␊ |
524 | ␉␉␉␉dgettext("po4a", "Unknown option: %s"), $opt)␊ |
525 | ␉␉␉␉unless exists $self->{options}{$opt};␊ |
526 | ␉␉␉$self->{options}{$opt} = $options{$opt};␊ |
527 | ␉␉}␊ |
528 | ␉}␊ |
529 | ␉# Default options set by modules. Forbidden for users.␊ |
530 | ␉$self->{options}{'_default_translated'}='';␊ |
531 | ␉$self->{options}{'_default_untranslated'}='';␊ |
532 | ␉$self->{options}{'_default_break'}='';␊ |
533 | ␉$self->{options}{'_default_inline'}='';␊ |
534 | ␉$self->{options}{'_default_placeholder'}='';␊ |
535 | ␉$self->{options}{'_default_attributes'}='';␊ |
536 | ␉$self->{options}{'_default_customtag'}='';␊ |
537 | ␊ |
538 | ␉#It will maintain the list of the translatable tags␊ |
539 | ␉$self->{tags}=();␊ |
540 | ␉$self->{translated}=();␊ |
541 | ␉$self->{untranslated}=();␊ |
542 | ␉#It will maintain the list of the translatable attributes␊ |
543 | ␉$self->{attributes}=();␊ |
544 | ␉#It will maintain the list of the breaking tags␊ |
545 | ␉$self->{break}=();␊ |
546 | ␉#It will maintain the list of the inline tags␊ |
547 | ␉$self->{inline}=();␊ |
548 | ␉#It will maintain the list of the placeholder tags␊ |
549 | ␉$self->{placeholder}=();␊ |
550 | ␉#It will maintain the list of the customtag tags␊ |
551 | ␉$self->{customtag}=();␊ |
552 | ␉#list of the tags that must not be set in the tags or inline category␊ |
553 | ␉#by this module or sub-module (unless specified in an option)␊ |
554 | ␉$self->{nodefault}=();␊ |
555 | ␊ |
556 | ␉$self->treat_options;␊ |
557 | ␊ |
558 | ␉# Clear cache␊ |
559 | ␉%translate_options_cache=();␊ |
560 | }␊ |
561 | ␊ |
562 | =head1 WRITING DERIVATE MODULES␊ |
563 | ␊ |
564 | =head2 DEFINE WHAT TAGS AND ATTRIBUTES TO TRANSLATE␊ |
565 | ␊ |
566 | The simplest customization is to define which tags and attributes you want␊ |
567 | the parser to translate. This should be done in the initialize function.␊ |
568 | First you should call the main initialize, to get the command-line options,␊ |
569 | and then, append your custom definitions to the options hash. If you want␊ |
570 | to treat some new options from command line, you should define them before␊ |
571 | calling the main initialize:␊ |
572 | ␊ |
573 | $self->{options}{'new_option'}='';␊ |
574 | $self->SUPER::initialize(%options);␊ |
575 | $self->{options}{'_default_translated'}.=' <p> <head><title>';␊ |
576 | $self->{options}{'attributes'}.=' <p>lang id';␊ |
577 | $self->{options}{'_default_inline'}.=' <br>';␊ |
578 | $self->treat_options;␊ |
579 | ␊ |
580 | You should use the B<_default_inline>, B<_default_break>,␊ |
581 | B<_default_placeholder>, B<_default_translated>, B<_default_untranslated>,␊ |
582 | and B<_default_attributes> options in derivated modules. This allow users␊ |
583 | to override the default behavior defined in your module with command line␊ |
584 | options.␊ |
585 | ␊ |
586 | =head2 OVERRIDING THE found_string FUNCTION␊ |
587 | ␊ |
588 | Another simple step is to override the function "found_string", which␊ |
589 | receives the extracted strings from the parser, in order to translate them.␊ |
590 | There you can control which strings you want to translate, and perform␊ |
591 | transformations to them before or after the translation itself.␊ |
592 | ␊ |
593 | It receives the extracted text, the reference on where it was, and a hash␊ |
594 | that contains extra information to control what strings to translate, how␊ |
595 | to translate them and to generate the comment.␊ |
596 | ␊ |
597 | The content of these options depends on the kind of string it is (specified in an␊ |
598 | entry of this hash):␊ |
599 | ␊ |
600 | =over␊ |
601 | ␊ |
602 | =item type="tag"␊ |
603 | ␊ |
604 | The found string is the content of a translatable tag. The entry "tag_options"␊ |
605 | contains the option characters in front of the tag hierarchy in the module␊ |
606 | "tags" option.␊ |
607 | ␊ |
608 | =item type="attribute"␊ |
609 | ␊ |
610 | Means that the found string is the value of a translatable attribute. The␊ |
611 | entry "attribute" has the name of the attribute.␊ |
612 | ␊ |
613 | =back␊ |
614 | ␊ |
615 | It must return the text that will replace the original in the translated␊ |
616 | document. Here's a basic example of this function:␊ |
617 | ␊ |
618 | sub found_string {␊ |
619 | my ($self,$text,$ref,$options)=@_;␊ |
620 | $text = $self->translate($text,$ref,"type ".$options->{'type'},␊ |
621 | 'wrap'=>$self->{options}{'wrap'});␊ |
622 | return $text;␊ |
623 | }␊ |
624 | ␊ |
625 | There's another simple example in the new Dia module, which only filters␊ |
626 | some strings.␊ |
627 | ␊ |
628 | =cut␊ |
629 | ␊ |
630 | sub found_string {␊ |
631 | ␉my ($self,$text,$ref,$options)=@_;␊ |
632 | ␊ |
633 | ␉if ($text =~ m/^\s*$/s) {␊ |
634 | ␉␉return $text;␊ |
635 | ␉}␊ |
636 | ␊ |
637 | ␉my $comment;␊ |
638 | ␉my $wrap = $self->{options}{'wrap'};␊ |
639 | ␊ |
640 | ␉if ($options->{'type'} eq "tag") {␊ |
641 | ␉␉$comment = "Content of: ".$self->get_path;␊ |
642 | ␊ |
643 | ␉␉if($options->{'tag_options'} =~ /w/) {␊ |
644 | ␉␉␉$wrap = 1;␊ |
645 | ␉␉}␊ |
646 | ␉␉if($options->{'tag_options'} =~ /W/) {␊ |
647 | ␉␉␉$wrap = 0;␊ |
648 | ␉␉}␊ |
649 | ␉} elsif ($options->{'type'} eq "attribute") {␊ |
650 | ␉␉$comment = "Attribute '".$options->{'attribute'}."' of: ".$self->get_path;␊ |
651 | ␉} elsif ($options->{'type'} eq "CDATA") {␊ |
652 | ␉␉$comment = "CDATA";␊ |
653 | ␉␉$wrap = 0;␊ |
654 | ␉} else {␊ |
655 | ␉␉die wrap_ref_mod($ref, "po4a::xml", dgettext("po4a", "Internal error: unknown type identifier '%s'."), $options->{'type'});␊ |
656 | ␉}␊ |
657 | ␉$text = $self->translate($text,$ref,$comment,'wrap'=>$wrap, comment => $options->{'comments'});␊ |
658 | ␉return $text;␊ |
659 | }␊ |
660 | ␊ |
661 | =head2 MODIFYING TAG TYPES (TODO)␊ |
662 | ␊ |
663 | This is a more complex one, but it enables a (almost) total customization.␊ |
664 | It's based in a list of hashes, each one defining a tag type's behavior. The␊ |
665 | list should be sorted so that the most general tags are after the most␊ |
666 | concrete ones (sorted first by the beginning and then by the end keys). To␊ |
667 | define a tag type you'll have to make a hash with the following keys:␊ |
668 | ␊ |
669 | =over 4␊ |
670 | ␊ |
671 | =item B<beginning>␊ |
672 | ␊ |
673 | Specifies the beginning of the tag, after the "E<lt>".␊ |
674 | ␊ |
675 | =item B<end>␊ |
676 | ␊ |
677 | Specifies the end of the tag, before the "E<gt>".␊ |
678 | ␊ |
679 | =item B<breaking>␊ |
680 | ␊ |
681 | It says if this is a breaking tag class. A non-breaking (inline) tag is one␊ |
682 | that can be taken as part of the content of another tag. It can take the␊ |
683 | values false (0), true (1) or undefined. If you leave this undefined, you'll␊ |
684 | have to define the f_breaking function that will say whether a concrete tag of␊ |
685 | this class is a breaking tag or not.␊ |
686 | ␊ |
687 | =item B<f_breaking>␊ |
688 | ␊ |
689 | It's a function that will tell if the next tag is a breaking one or not. It␊ |
690 | should be defined if the B<breaking> option is not.␊ |
691 | ␊ |
692 | =item B<f_extract>␊ |
693 | ␊ |
694 | If you leave this key undefined, the generic extraction function will have to␊ |
695 | extract the tag itself. It's useful for tags that can have other tags or␊ |
696 | special structures in them, so that the main parser doesn't get mad. This␊ |
697 | function receives a boolean that says if the tag should be removed from the␊ |
698 | input stream or not.␊ |
699 | ␊ |
700 | =item B<f_translate>␊ |
701 | ␊ |
702 | This function receives the tag (in the get_string_until() format) and returns␊ |
703 | the translated tag (translated attributes or all needed transformations) as a␊ |
704 | single string.␊ |
705 | ␊ |
706 | =back␊ |
707 | ␊ |
708 | =cut␊ |
709 | ␊ |
710 | ##### Generic XML tag types #####'␊ |
711 | ␊ |
712 | our @tag_types = (␊ |
713 | ␉{␉beginning␉=> "!--#",␊ |
714 | ␉␉end␉␉=> "--",␊ |
715 | ␉␉breaking␉=> 0,␊ |
716 | ␉␉f_extract␉=> \&tag_extract_comment,␊ |
717 | ␉␉f_translate␉=> \&tag_trans_comment},␊ |
718 | ␉{␉beginning␉=> "!--",␊ |
719 | ␉␉end␉␉=> "--",␊ |
720 | ␉␉breaking␉=> 0,␊ |
721 | ␉␉f_extract␉=> \&tag_extract_comment,␊ |
722 | ␉␉f_translate␉=> \&tag_trans_comment},␊ |
723 | ␉{␉beginning␉=> "?xml",␊ |
724 | ␉␉end␉␉=> "?",␊ |
725 | ␉␉breaking␉=> 1,␊ |
726 | ␉␉f_translate␉=> \&tag_trans_xmlhead},␊ |
727 | ␉{␉beginning␉=> "?",␊ |
728 | ␉␉end␉␉=> "?",␊ |
729 | ␉␉breaking␉=> 1,␊ |
730 | ␉␉f_translate␉=> \&tag_trans_procins},␊ |
731 | ␉{␉beginning␉=> "!DOCTYPE",␊ |
732 | ␉␉end␉␉=> "",␊ |
733 | ␉␉breaking␉=> 1,␊ |
734 | ␉␉f_extract␉=> \&tag_extract_doctype,␊ |
735 | ␉␉f_translate␉=> \&tag_trans_doctype},␊ |
736 | ␉{␉beginning␉=> "![CDATA[",␊ |
737 | ␉␉end␉␉=> "]]",␊ |
738 | ␉␉breaking␉=> 1,␊ |
739 | ␉␉f_extract␉=> \&CDATA_extract,␊ |
740 | ␉␉f_translate␉=> \&CDATA_trans},␊ |
741 | ␉{␉beginning␉=> "/",␊ |
742 | ␉␉end␉␉=> "",␊ |
743 | ␉␉f_breaking␉=> \&tag_break_close,␊ |
744 | ␉␉f_translate␉=> \&tag_trans_close},␊ |
745 | ␉{␉beginning␉=> "",␊ |
746 | ␉␉end␉␉=> "/",␊ |
747 | ␉␉f_breaking␉=> \&tag_break_alone,␊ |
748 | ␉␉f_translate␉=> \&tag_trans_alone},␊ |
749 | ␉{␉beginning␉=> "",␊ |
750 | ␉␉end␉␉=> "",␊ |
751 | ␉␉f_breaking␉=> \&tag_break_open,␊ |
752 | ␉␉f_translate␉=> \&tag_trans_open}␊ |
753 | );␊ |
754 | ␊ |
755 | sub tag_extract_comment {␊ |
756 | ␉my ($self,$remove)=(shift,shift);␊ |
757 | ␉my ($eof,@tag)=$self->get_string_until('-->',{include=>1,remove=>$remove});␊ |
758 | ␉return ($eof,@tag);␊ |
759 | }␊ |
760 | ␊ |
761 | sub tag_trans_comment {␊ |
762 | ␉my ($self,@tag)=@_;␊ |
763 | ␉return $self->join_lines(@tag);␊ |
764 | }␊ |
765 | ␊ |
766 | sub tag_trans_xmlhead {␊ |
767 | ␉my ($self,@tag)=@_;␊ |
768 | ␊ |
769 | ␉# We don't have to translate anything from here: throw away references␊ |
770 | ␉my $tag = $self->join_lines(@tag);␊ |
771 | ␉$tag =~ /encoding=(("|')|)(.*?)(\s|\2)/s;␊ |
772 | ␉my $in_charset=$3;␊ |
773 | ␉$self->detected_charset($in_charset);␊ |
774 | ␉my $out_charset=$self->get_out_charset;␊ |
775 | ␊ |
776 | ␉if (defined $in_charset) {␊ |
777 | ␉␉$tag =~ s/$in_charset/$out_charset/;␊ |
778 | ␉} else {␊ |
779 | ␉␉if ($tag =~ m/standalone/) {␊ |
780 | ␉␉␉$tag =~ s/(standalone)/encoding="$out_charset" $1/;␊ |
781 | ␉␉} else {␊ |
782 | ␉␉␉$tag.= " encoding=\"$out_charset\"";␊ |
783 | ␉␉}␊ |
784 | ␉}␊ |
785 | ␊ |
786 | ␉return $tag;␊ |
787 | }␊ |
788 | ␊ |
789 | sub tag_trans_procins {␊ |
790 | ␉my ($self,@tag)=@_;␊ |
791 | ␉return $self->join_lines(@tag);␊ |
792 | }␊ |
793 | ␊ |
794 | sub tag_extract_doctype {␊ |
795 | ␉my ($self,$remove)=(shift,shift);␊ |
796 | ␊ |
797 | ␉# Check if there is an internal subset (between []).␊ |
798 | ␉my ($eof,@tag)=$self->get_string_until('>',{include=>1,unquoted=>1});␊ |
799 | ␉my $parity = 0;␊ |
800 | ␉my $paragraph = "";␊ |
801 | ␉map { $parity = 1 - $parity; $paragraph.= $parity?$_:""; } @tag;␊ |
802 | ␉my $found = 0;␊ |
803 | ␉if ($paragraph =~ m/<.*\[.*</s) {␊ |
804 | ␉␉$found = 1␊ |
805 | ␉}␊ |
806 | ␊ |
807 | ␉if (not $found) {␊ |
808 | ␉␉($eof,@tag)=$self->get_string_until('>',{include=>1,remove=>$remove,unquoted=>1});␊ |
809 | ␉} else {␊ |
810 | ␉␉($eof,@tag)=$self->get_string_until(']\s*>',{include=>1,remove=>$remove,unquoted=>1,regex=>1});␊ |
811 | ␉}␊ |
812 | ␉return ($eof,@tag);␊ |
813 | }␊ |
814 | ␊ |
815 | sub tag_trans_doctype {␊ |
816 | # This check is not really reliable. There are system and public␊ |
817 | # identifiers. Only the public one could be checked reliably.␊ |
818 | ␉my ($self,@tag)=@_;␊ |
819 | ␉if (defined $self->{options}{'doctype'} ) {␊ |
820 | ␉␉my $doctype = $self->{options}{'doctype'};␊ |
821 | ␉␉if ( $tag[0] !~ /\Q$doctype\E/i ) {␊ |
822 | ␉␉␉warn wrap_ref_mod($tag[1], "po4a::xml", dgettext("po4a", "Bad document type. '%s' expected. You can fix this warning with a -o doctype option, or ignore this check with -o doctype=\"\"."), $doctype);␊ |
823 | ␉␉}␊ |
824 | ␉}␊ |
825 | ␉my $i = 0;␊ |
826 | ␉my $basedir = $tag[1];␊ |
827 | ␉$basedir =~ s/:[0-9]+$//;␊ |
828 | ␉$basedir = dirname($basedir);␊ |
829 | ␊ |
830 | ␉while ( $i < $#tag ) {␊ |
831 | ␉␉my $t = $tag[$i];␊ |
832 | ␉␉my $ref = $tag[$i+1];␊ |
833 | ␉␉if ( $t =~ /^(\s*<!ENTITY\s+)(.*)$/is ) {␊ |
834 | ␉␉␉my $part1 = $1;␊ |
835 | ␉␉␉my $part2 = $2;␊ |
836 | ␉␉␉my $includenow = 0;␊ |
837 | ␉␉␉my $file = 0;␊ |
838 | ␉␉␉my $name = "";␊ |
839 | ␉␉␉if ($part2 =~ /^(%\s+)(.*)$/s ) {␊ |
840 | ␉␉␉␉$part1.= $1;␊ |
841 | ␉␉␉␉$part2 = $2;␊ |
842 | ␉␉␉␉$includenow = 1;␊ |
843 | ␉␉␉}␊ |
844 | ␉␉␉$part2 =~ /^(\S+)(\s+)(.*)$/s;␊ |
845 | ␉␉␉$name = $1;␊ |
846 | ␉␉␉$part1.= $1.$2;␊ |
847 | ␉␉␉$part2 = $3;␊ |
848 | ␉␉␉if ( $part2 =~ /^(SYSTEM\s+)(.*)$/is ) {␊ |
849 | ␉␉␉␉$part1.= $1;␊ |
850 | ␉␉␉␉$part2 = $2;␊ |
851 | ␉␉␉␉$file = 1;␊ |
852 | ␉␉␉␉if ($self->{options}{'includeexternal'}) {␊ |
853 | ␉␉␉␉␉$entities{$name} = $part2;␊ |
854 | ␉␉␉␉␉$entities{$name} =~ s/^"?(.*?)".*$/$1/s;␊ |
855 | ␉␉␉␉␉$entities{$name} = File::Spec->catfile($basedir, $entities{$name});␊ |
856 | ␉␉␉␉}␊ |
857 | ␉␉␉}␊ |
858 | ␉␉␉if ((not $file) and (not $includenow)) {␊ |
859 | ␉␉␉ if ($part2 =~ m/^\s*(["'])(.*)\1(\s*>.*)$/s) {␊ |
860 | ␉␉␉␉my $comment = "Content of the $name entity";␊ |
861 | ␉␉␉␉my $quote = $1;␊ |
862 | ␉␉␉␉my $text = $2;␊ |
863 | ␉␉␉␉$part2 = $3;␊ |
864 | ␉␉␉␉$text = $self->translate($text,␊ |
865 | ␉␉␉␉ $ref,␊ |
866 | ␉␉␉␉ $comment,␊ |
867 | ␉␉␉␉ 'wrap'=>1);␊ |
868 | ␉␉␉␉$t = $part1."$quote$text$quote$part2";␊ |
869 | ␉␉␉ }␊ |
870 | ␉␉␉}␊ |
871 | #␉␉␉print $part1."\n";␊ |
872 | #␉␉␉print $name."\n";␊ |
873 | #␉␉␉print $part2."\n";␊ |
874 | ␉␉}␊ |
875 | ␉␉$tag[$i] = $t;␊ |
876 | ␉␉$i += 2;␊ |
877 | ␉}␊ |
878 | ␉return $self->join_lines(@tag);␊ |
879 | }␊ |
880 | ␊ |
881 | sub tag_break_close {␊ |
882 | ␉my ($self,@tag)=@_;␊ |
883 | ␉my $struct = $self->get_path;␊ |
884 | ␉my $options = $self->get_translate_options($struct);␊ |
885 | ␉if ($options =~ m/[ip]/) {␊ |
886 | ␉␉return 0;␊ |
887 | ␉} else {␊ |
888 | ␉␉return 1;␊ |
889 | ␉}␊ |
890 | }␊ |
891 | ␊ |
892 | sub tag_trans_close {␊ |
893 | ␉my ($self,@tag)=@_;␊ |
894 | ␉my $name = $self->get_tag_name(@tag);␊ |
895 | ␊ |
896 | ␉my $test = pop @path;␊ |
897 | ␉if (!defined($test) || $test ne $name ) {␊ |
898 | ␉␉my $ontagerror = $self->{options}{'ontagerror'};␊ |
899 | ␉␉if ($ontagerror eq "warn") {␊ |
900 | ␉␉␉warn wrap_ref_mod($tag[1], "po4a::xml", dgettext("po4a", "Unexpected closing tag </%s> found. The main document may be wrong. Continuing..."), $name);␊ |
901 | ␉␉} elsif ($ontagerror ne "silent") {␊ |
902 | ␉␉␉die wrap_ref_mod($tag[1], "po4a::xml", dgettext("po4a", "Unexpected closing tag </%s> found. The main document may be wrong."), $name);␊ |
903 | ␉␉}␊ |
904 | ␉}␊ |
905 | ␉return $self->join_lines(@tag);␊ |
906 | }␊ |
907 | ␊ |
908 | sub CDATA_extract {␊ |
909 | ␉my ($self,$remove)=(shift,shift);␊ |
910 | my ($eof, @tag) = $self->get_string_until(']]>',{include=>1,unquoted=>0,remove=>$remove});␊ |
911 | ␊ |
912 | ␉return ($eof, @tag);␊ |
913 | }␊ |
914 | ␊ |
915 | sub CDATA_trans {␊ |
916 | ␉my ($self,@tag)=@_;␊ |
917 | ␉return $self->found_string($self->join_lines(@tag),␊ |
918 | ␉ $tag[1],␊ |
919 | ␉ {'type' => "CDATA"});␊ |
920 | }␊ |
921 | ␊ |
922 | sub tag_break_alone {␊ |
923 | ␉my ($self,@tag)=@_;␊ |
924 | ␉my $struct = $self->get_path($self->get_tag_name(@tag));␊ |
925 | ␉if ($self->get_translate_options($struct) =~ m/i/) {␊ |
926 | ␉␉return 0;␊ |
927 | ␉} else {␊ |
928 | ␉␉return 1;␊ |
929 | ␉}␊ |
930 | }␊ |
931 | ␊ |
932 | sub tag_trans_alone {␊ |
933 | ␉my ($self,@tag)=@_;␊ |
934 | ␉my $name = $self->get_tag_name(@tag);␊ |
935 | ␉push @path, $name;␊ |
936 | ␊ |
937 | ␉$name = $self->treat_attributes(@tag);␊ |
938 | ␊ |
939 | ␉pop @path;␊ |
940 | ␉return $name;␊ |
941 | }␊ |
942 | ␊ |
943 | sub tag_break_open {␊ |
944 | ␉my ($self,@tag)=@_;␊ |
945 | ␉my $struct = $self->get_path($self->get_tag_name(@tag));␊ |
946 | ␉my $options = $self->get_translate_options($struct);␊ |
947 | ␉if ($options =~ m/[ip]/) {␊ |
948 | ␉␉return 0;␊ |
949 | ␉} else {␊ |
950 | ␉␉return 1;␊ |
951 | ␉}␊ |
952 | }␊ |
953 | ␊ |
954 | sub tag_trans_open {␊ |
955 | ␉my ($self,@tag)=@_;␊ |
956 | ␉my $name = $self->get_tag_name(@tag);␊ |
957 | ␉push @path, $name;␊ |
958 | ␊ |
959 | ␉$name = $self->treat_attributes(@tag);␊ |
960 | ␊ |
961 | ␉if (defined $self->{options}{'addlang'}) {␊ |
962 | ␉␉my $struct = $self->get_path();␊ |
963 | ␉␉if ($struct eq $self->{options}{'addlang'}) {␊ |
964 | ␉␉␉$name .= ' lang="'.$self->{TT}{po_in}->{lang}.'"';␊ |
965 | ␉␉}␊ |
966 | ␉}␊ |
967 | ␊ |
968 | ␉return $name;␊ |
969 | }␊ |
970 | ␊ |
971 | ##### END of Generic XML tag types #####␊ |
972 | ␊ |
973 | =head1 INTERNAL FUNCTIONS used to write derivated parsers␊ |
974 | ␊ |
975 | =head2 WORKING WITH TAGS␊ |
976 | ␊ |
977 | =over 4␊ |
978 | ␊ |
979 | =item get_path()␊ |
980 | ␊ |
981 | This function returns the path to the current tag from the document's root,␊ |
982 | in the form E<lt>htmlE<gt>E<lt>bodyE<gt>E<lt>pE<gt>.␊ |
983 | ␊ |
984 | An additional array of tags (without brackets) can be passed as argument.␊ |
985 | These path elements are added to the end of the current path.␊ |
986 | ␊ |
987 | =cut␊ |
988 | ␊ |
989 | sub get_path {␊ |
990 | ␉my $self = shift;␊ |
991 | ␉my @add = @_;␊ |
992 | ␉if ( @path > 0 or @add > 0 ) {␊ |
993 | ␉␉return "<".join("><",@path,@add).">";␊ |
994 | ␉} else {␊ |
995 | ␉␉return "outside any tag (error?)";␊ |
996 | ␉}␊ |
997 | }␊ |
998 | ␊ |
999 | =item tag_type()␊ |
1000 | ␊ |
1001 | This function returns the index from the tag_types list that fits to the next␊ |
1002 | tag in the input stream, or -1 if it's at the end of the input file.␊ |
1003 | ␊ |
1004 | =cut␊ |
1005 | ␊ |
1006 | sub tag_type {␊ |
1007 | ␉my $self = shift;␊ |
1008 | ␉my ($line,$ref) = $self->shiftline();␊ |
1009 | ␉my ($match1,$match2);␊ |
1010 | ␉my $found = 0;␊ |
1011 | ␉my $i = 0;␊ |
1012 | ␊ |
1013 | ␉if (!defined($line)) { return -1; }␊ |
1014 | ␊ |
1015 | ␉$self->unshiftline($line,$ref);␊ |
1016 | ␉my ($eof,@lines) = $self->get_string_until(">",{include=>1,unquoted=>1});␊ |
1017 | ␉my $line2 = $self->join_lines(@lines);␊ |
1018 | ␉while (!$found && $i < @tag_types) {␊ |
1019 | ␉␉($match1,$match2) = ($tag_types[$i]->{beginning},$tag_types[$i]->{end});␊ |
1020 | ␉␉if ($line =~ /^<\Q$match1\E/) {␊ |
1021 | ␉␉␉if (!defined($tag_types[$i]->{f_extract})) {␊ |
1022 | #print substr($line2,length($line2)-1-length($match2),1+length($match2))."\n";␊ |
1023 | ␉␉␉␉if (defined($line2) and $line2 =~ /\Q$match2\E>$/) {␊ |
1024 | ␉␉␉␉␉$found = 1;␊ |
1025 | #print "YES: <".$match1." ".$match2.">\n";␊ |
1026 | ␉␉␉␉} else {␊ |
1027 | #print "NO: <".$match1." ".$match2.">\n";␊ |
1028 | ␉␉␉␉␉$i++;␊ |
1029 | ␉␉␉␉}␊ |
1030 | ␉␉␉} else {␊ |
1031 | ␉␉␉␉$found = 1;␊ |
1032 | ␉␉␉}␊ |
1033 | ␉␉} else {␊ |
1034 | ␉␉␉$i++;␊ |
1035 | ␉␉}␊ |
1036 | ␉}␊ |
1037 | ␉if (!$found) {␊ |
1038 | ␉␉#It should never enter here, unless you undefine the most␊ |
1039 | ␉␉#general tags (as <...>)␊ |
1040 | ␉␉die "po4a::xml: Unknown tag type: ".$line."\n";␊ |
1041 | ␉} else {␊ |
1042 | ␉␉return $i;␊ |
1043 | ␉}␊ |
1044 | }␊ |
1045 | ␊ |
1046 | =item extract_tag($$)␊ |
1047 | ␊ |
1048 | This function returns the next tag from the input stream without the beginning␊ |
1049 | and end, in an array form, to maintain the references from the input file. It␊ |
1050 | has two parameters: the type of the tag (as returned by tag_type) and a␊ |
1051 | boolean, that indicates if it should be removed from the input stream.␊ |
1052 | ␊ |
1053 | =cut␊ |
1054 | ␊ |
1055 | sub extract_tag {␊ |
1056 | ␉my ($self,$type,$remove) = (shift,shift,shift);␊ |
1057 | ␉my ($match1,$match2) = ($tag_types[$type]->{beginning},$tag_types[$type]->{end});␊ |
1058 | ␉my ($eof,@tag);␊ |
1059 | ␉if (defined($tag_types[$type]->{f_extract})) {␊ |
1060 | ␉␉($eof,@tag) = &{$tag_types[$type]->{f_extract}}($self,$remove);␊ |
1061 | ␉} else {␊ |
1062 | ␉␉($eof,@tag) = $self->get_string_until($match2.">",{include=>1,remove=>$remove,unquoted=>1});␊ |
1063 | ␉}␊ |
1064 | ␉$tag[0] =~ /^<\Q$match1\E(.*)$/s;␊ |
1065 | ␉$tag[0] = $1;␊ |
1066 | ␉$tag[$#tag-1] =~ /^(.*)\Q$match2\E>$/s;␊ |
1067 | ␉$tag[$#tag-1] = $1;␊ |
1068 | ␉return ($eof,@tag);␊ |
1069 | }␊ |
1070 | ␊ |
1071 | =item get_tag_name(@)␊ |
1072 | ␊ |
1073 | This function returns the name of the tag passed as an argument, in the array␊ |
1074 | form returned by extract_tag.␊ |
1075 | ␊ |
1076 | =cut␊ |
1077 | ␊ |
1078 | sub get_tag_name {␊ |
1079 | ␉my ($self,@tag)=@_;␊ |
1080 | ␉$tag[0] =~ /^(\S*)/;␊ |
1081 | ␉return $1;␊ |
1082 | }␊ |
1083 | ␊ |
1084 | =item breaking_tag()␊ |
1085 | ␊ |
1086 | This function returns a boolean that says if the next tag in the input stream␊ |
1087 | is a breaking tag or not (inline tag). It leaves the input stream intact.␊ |
1088 | ␊ |
1089 | =cut␊ |
1090 | ␊ |
1091 | sub breaking_tag {␊ |
1092 | ␉my $self = shift;␊ |
1093 | ␉my $break;␊ |
1094 | ␊ |
1095 | ␉my $type = $self->tag_type;␊ |
1096 | ␉if ($type == -1) { return 0; }␊ |
1097 | ␊ |
1098 | #print "TAG TYPE = ".$type."\n";␊ |
1099 | ␉$break = $tag_types[$type]->{breaking};␊ |
1100 | ␉if (!defined($break)) {␊ |
1101 | ␉␉# This tag's breaking depends on its content␊ |
1102 | ␉␉my ($eof,@lines) = $self->extract_tag($type,0);␊ |
1103 | ␉␉$break = &{$tag_types[$type]->{f_breaking}}($self,@lines);␊ |
1104 | ␉}␊ |
1105 | #print "break = ".$break."\n";␊ |
1106 | ␉return $break;␊ |
1107 | }␊ |
1108 | ␊ |
1109 | =item treat_tag()␊ |
1110 | ␊ |
1111 | This function translates the next tag from the input stream. Using each␊ |
1112 | tag type's custom translation functions.␊ |
1113 | ␊ |
1114 | =cut␊ |
1115 | ␊ |
1116 | sub treat_tag {␊ |
1117 | ␉my $self = shift;␊ |
1118 | ␉my $type = $self->tag_type;␊ |
1119 | ␊ |
1120 | ␉my ($match1,$match2) = ($tag_types[$type]->{beginning},$tag_types[$type]->{end});␊ |
1121 | ␉my ($eof,@lines) = $self->extract_tag($type,1);␊ |
1122 | ␊ |
1123 | ␉$lines[0] =~ /^(\s*)(.*)$/s;␊ |
1124 | ␉my $space1 = $1;␊ |
1125 | ␉$lines[0] = $2;␊ |
1126 | ␉$lines[$#lines-1] =~ /^(.*?)(\s*)$/s;␊ |
1127 | ␉my $space2 = $2;␊ |
1128 | ␉$lines[$#lines-1] = $1;␊ |
1129 | ␊ |
1130 | ␉# Calling this tag type's specific handling (translation of␊ |
1131 | ␉# attributes...)␊ |
1132 | ␉my $line = &{$tag_types[$type]->{f_translate}}($self,@lines);␊ |
1133 | ␉$self->pushline("<".$match1.$space1.$line.$space2.$match2.">");␊ |
1134 | ␉return $eof;␊ |
1135 | }␊ |
1136 | ␊ |
1137 | =item tag_in_list($@)␊ |
1138 | ␊ |
1139 | This function returns a string value that says if the first argument (a tag␊ |
1140 | hierarchy) matches any of the tags from the second argument (a list of tags␊ |
1141 | or tag hierarchies). If it doesn't match, it returns 0. Else, it returns the␊ |
1142 | matched tag's options (the characters in front of the tag) or 1 (if that tag␊ |
1143 | doesn't have options).␊ |
1144 | ␊ |
1145 | =back␊ |
1146 | ␊ |
1147 | =cut␊ |
1148 | sub tag_in_list ($$$) {␊ |
1149 | ␉my ($self,$path,$list) = @_;␊ |
1150 | ␉if ($self->{options}{'caseinsensitive'}) {␊ |
1151 | ␉␉$path = lc $path;␊ |
1152 | ␉}␊ |
1153 | ␊ |
1154 | ␉while (1) {␊ |
1155 | ␉␉if (defined $list->{$path}) {␊ |
1156 | ␉␉␉if (length $list->{$path}) {␊ |
1157 | ␉␉␉␉return $list->{$path};␊ |
1158 | ␉␉␉} else {␊ |
1159 | ␉␉␉␉return 1;␊ |
1160 | ␉␉␉}␊ |
1161 | ␉␉}␊ |
1162 | ␉␉last unless ($path =~ m/</);␊ |
1163 | ␉␉$path =~ s/^<.*?>//;␊ |
1164 | ␉}␊ |
1165 | ␊ |
1166 | ␉return 0;␊ |
1167 | }␊ |
1168 | ␊ |
1169 | =head2 WORKING WITH ATTRIBUTES␊ |
1170 | ␊ |
1171 | =over 4␊ |
1172 | ␊ |
1173 | =item treat_attributes(@)␊ |
1174 | ␊ |
1175 | This function handles the translation of the tags' attributes. It receives the tag␊ |
1176 | without the beginning / end marks, and then it finds the attributes, and it␊ |
1177 | translates the translatable ones (specified by the module option "attributes").␊ |
1178 | This returns a plain string with the translated tag.␊ |
1179 | ␊ |
1180 | =back␊ |
1181 | ␊ |
1182 | =cut␊ |
1183 | ␊ |
1184 | sub treat_attributes {␊ |
1185 | ␉my ($self,@tag)=@_;␊ |
1186 | ␊ |
1187 | ␉$tag[0] =~ /^(\S*)(.*)/s;␊ |
1188 | ␉my $text = $1;␊ |
1189 | ␉$tag[0] = $2;␊ |
1190 | ␊ |
1191 | ␉while (@tag) {␊ |
1192 | ␉␉my $complete = 1;␊ |
1193 | ␊ |
1194 | ␉␉$text .= $self->skip_spaces(\@tag);␊ |
1195 | ␉␉if (@tag) {␊ |
1196 | ␉␉␉# Get the attribute's name␊ |
1197 | ␉␉␉$complete = 0;␊ |
1198 | ␊ |
1199 | ␉␉␉$tag[0] =~ /^([^\s=]+)(.*)/s;␊ |
1200 | ␉␉␉my $name = $1;␊ |
1201 | ␉␉␉my $ref = $tag[1];␊ |
1202 | ␉␉␉$tag[0] = $2;␊ |
1203 | ␉␉␉$text .= $name;␊ |
1204 | ␉␉␉$text .= $self->skip_spaces(\@tag);␊ |
1205 | ␉␉␉if (@tag) {␊ |
1206 | ␉␉␉␉# Get the '='␊ |
1207 | ␉␉␉␉if ($tag[0] =~ /^=(.*)/s) {␊ |
1208 | ␉␉␉␉␉$tag[0] = $1;␊ |
1209 | ␉␉␉␉␉$text .= "=";␊ |
1210 | ␉␉␉␉␉$text .= $self->skip_spaces(\@tag);␊ |
1211 | ␉␉␉␉␉if (@tag) {␊ |
1212 | ␉␉␉␉␉␉# Get the value␊ |
1213 | ␉␉␉␉␉␉my $value="";␊ |
1214 | ␉␉␉␉␉␉$ref=$tag[1];␊ |
1215 | ␉␉␉␉␉␉my $quot=substr($tag[0],0,1);␊ |
1216 | ␉␉␉␉␉␉if ($quot ne "\"" and $quot ne "'") {␊ |
1217 | ␉␉␉␉␉␉␉# Unquoted value␊ |
1218 | ␉␉␉␉␉␉␉$quot="";␊ |
1219 | ␉␉␉␉␉␉␉$tag[0] =~ /^(\S+)(.*)/s;␊ |
1220 | ␉␉␉␉␉␉␉$value = $1;␊ |
1221 | ␉␉␉␉␉␉␉$tag[0] = $2;␊ |
1222 | ␉␉␉␉␉␉} else {␊ |
1223 | ␉␉␉␉␉␉␉# Quoted value␊ |
1224 | ␉␉␉␉␉␉␉$text .= $quot;␊ |
1225 | ␉␉␉␉␉␉␉$tag[0] =~ /^\Q$quot\E(.*)/s;␊ |
1226 | ␉␉␉␉␉␉␉$tag[0] = $1;␊ |
1227 | ␉␉␉␉␉␉␉while ($tag[0] !~ /\Q$quot\E/) {␊ |
1228 | ␉␉␉␉␉␉␉␉$value .= $tag[0];␊ |
1229 | ␉␉␉␉␉␉␉␉shift @tag;␊ |
1230 | ␉␉␉␉␉␉␉␉shift @tag;␊ |
1231 | ␉␉␉␉␉␉␉}␊ |
1232 | ␉␉␉␉␉␉␉$tag[0] =~ /^(.*?)\Q$quot\E(.*)/s;␊ |
1233 | ␉␉␉␉␉␉␉$value .= $1;␊ |
1234 | ␉␉␉␉␉␉␉$tag[0] = $2;␊ |
1235 | ␉␉␉␉␉␉}␊ |
1236 | ␉␉␉␉␉␉$complete = 1;␊ |
1237 | ␉␉␉␉␉␉if ($self->tag_in_list($self->get_path.$name,$self->{attributes})) {␊ |
1238 | ␉␉␉␉␉␉␉$text .= $self->found_string($value, $ref, { type=>"attribute", attribute=>$name });␊ |
1239 | ␉␉␉␉␉␉} else {␊ |
1240 | ␉␉␉␉␉␉␉print wrap_ref_mod($ref, "po4a::xml", dgettext("po4a", "Content of attribute %s excluded: %s"), $self->get_path.$name, $value)␊ |
1241 | ␉␉␉␉␉␉␉ if $self->debug();␊ |
1242 | ␉␉␉␉␉␉␉$text .= $self->recode_skipped_text($value);␊ |
1243 | ␉␉␉␉␉␉}␊ |
1244 | ␉␉␉␉␉␉$text .= $quot;␊ |
1245 | ␉␉␉␉␉}␊ |
1246 | ␉␉␉␉}␊ |
1247 | ␉␉␉}␊ |
1248 | ␊ |
1249 | ␉␉␉unless ($complete) {␊ |
1250 | ␉␉␉␉my $ontagerror = $self->{options}{'ontagerror'};␊ |
1251 | ␉␉␉␉if ($ontagerror eq "warn") {␊ |
1252 | ␉␉␉␉␉warn wrap_ref_mod($ref, "po4a::xml", dgettext ("po4a", "Bad attribute syntax. Continuing..."));␊ |
1253 | ␉␉␉␉} elsif ($ontagerror ne "silent") {␊ |
1254 | ␉␉␉␉␉die wrap_ref_mod($ref, "po4a::xml", dgettext ("po4a", "Bad attribute syntax"));␊ |
1255 | ␉␉␉␉}␊ |
1256 | ␉␉␉}␊ |
1257 | ␉␉}␊ |
1258 | ␉}␊ |
1259 | ␉return $text;␊ |
1260 | }␊ |
1261 | ␊ |
1262 | # Returns an empty string if the content in the $path should not be␊ |
1263 | # translated.␊ |
1264 | #␊ |
1265 | # Otherwise, returns the set of options for translation:␊ |
1266 | # w: the content shall be re-wrapped␊ |
1267 | # W: the content shall not be re-wrapped␊ |
1268 | # i: the tag shall be inlined␊ |
1269 | # p: a placeholder shall replace the tag (and its content)␊ |
1270 | # n: a custom tag␊ |
1271 | #␊ |
1272 | # A translatable inline tag in an untranslated tag is treated as a translatable breaking tag.␊ |
1273 | sub get_translate_options {␊ |
1274 | ␉my $self = shift;␊ |
1275 | ␉my $path = shift;␊ |
1276 | ␊ |
1277 | ␉if (defined $translate_options_cache{$path}) {␊ |
1278 | ␉␉return $translate_options_cache{$path};␊ |
1279 | ␉}␊ |
1280 | ␊ |
1281 | ␉my $options = "";␊ |
1282 | ␉my $translate = 0;␊ |
1283 | ␉my $usedefault = 1;␊ |
1284 | ␊ |
1285 | ␉my $inlist = 0;␊ |
1286 | ␉my $tag = $self->get_tag_from_list($path, $self->{tags});␊ |
1287 | ␉if (defined $tag) {␊ |
1288 | ␉␉$inlist = 1;␊ |
1289 | ␉}␊ |
1290 | ␉if ($self->{options}{'tagsonly'} eq $inlist) {␊ |
1291 | ␉␉$usedefault = 0;␊ |
1292 | ␉␉if (defined $tag) {␊ |
1293 | ␉␉␉$options = $tag;␊ |
1294 | ␉␉␉$options =~ s/<.*$//;␊ |
1295 | ␉␉} else {␊ |
1296 | ␉␉␉if ($self->{options}{'wrap'}) {␊ |
1297 | ␉␉␉␉$options = "w";␊ |
1298 | ␉␉␉} else {␊ |
1299 | ␉␉␉␉$options = "W";␊ |
1300 | ␉␉␉}␊ |
1301 | ␉␉}␊ |
1302 | ␉␉$translate = 1;␊ |
1303 | ␉}␊ |
1304 | ␊ |
1305 | # TODO: a less precise set of tags should not override a more precise one␊ |
1306 | ␉# The tags and tagsonly options are deprecated.␊ |
1307 | ␉# The translated and untranslated options have an higher priority.␊ |
1308 | ␉$tag = $self->get_tag_from_list($path, $self->{translated});␊ |
1309 | ␉if (defined $tag) {␊ |
1310 | ␉␉$usedefault = 0;␊ |
1311 | ␉␉$options = $tag;␊ |
1312 | ␉␉$options =~ s/<.*$//;␊ |
1313 | ␉␉$translate = 1;␊ |
1314 | ␉}␊ |
1315 | ␊ |
1316 | ␉if ($translate and $options !~ m/w/i) {␊ |
1317 | ␉␉$options .= ($self->{options}{'wrap'})?"w":"W";␊ |
1318 | ␉}␊ |
1319 | ␊ |
1320 | ␉if (not defined $tag) {␊ |
1321 | ␉␉$tag = $self->get_tag_from_list($path, $self->{untranslated});␊ |
1322 | ␉␉if (defined $tag) {␊ |
1323 | ␉␉␉$usedefault = 0;␊ |
1324 | ␉␉␉$options = "";␊ |
1325 | ␉␉␉$translate = 0;␊ |
1326 | ␉␉}␊ |
1327 | ␉}␊ |
1328 | ␊ |
1329 | ␉$tag = $self->get_tag_from_list($path, $self->{inline});␊ |
1330 | ␉if (defined $tag) {␊ |
1331 | ␉␉$usedefault = 0;␊ |
1332 | ␉␉$options .= "i";␊ |
1333 | ␉} else {␊ |
1334 | ␉␉$tag = $self->get_tag_from_list($path, $self->{placeholder});␊ |
1335 | ␉␉if (defined $tag) {␊ |
1336 | ␉␉␉$usedefault = 0;␊ |
1337 | ␉␉␉$options .= "p";␊ |
1338 | ␉␉}␊ |
1339 | ␉}␊ |
1340 | ␊ |
1341 | ␉$tag = $self->get_tag_from_list($path, $self->{customtag});␊ |
1342 | ␉if (defined $tag) {␊ |
1343 | ␉␉$usedefault = 0;␊ |
1344 | ␉␉$options = "in"; # This erase any other setting␊ |
1345 | ␉}␊ |
1346 | ␊ |
1347 | ␉if ($usedefault) {␊ |
1348 | ␉␉$options = $self->{options}{'defaulttranslateoption'};␊ |
1349 | ␉}␊ |
1350 | ␊ |
1351 | ␉# A translatable inline tag in an untranslated tag is treated as a␊ |
1352 | ␉# translatable breaking tag.␊ |
1353 | ␉if ($options =~ m/i/) {␊ |
1354 | ␉␉my $ppath = $path;␊ |
1355 | ␉␉$ppath =~ s/<[^>]*>$//;␊ |
1356 | ␉␉my $poptions = $self->get_translate_options ($ppath);␊ |
1357 | ␉␉if ($poptions eq "") {␊ |
1358 | ␉␉␉$options =~ s/i//;␊ |
1359 | ␉␉}␊ |
1360 | ␉}␊ |
1361 | ␊ |
1362 | ␉if ($options =~ m/i/ and $self->{options}{'foldattributes'}) {␊ |
1363 | ␉␉$options .= "f";␊ |
1364 | ␉}␊ |
1365 | ␊ |
1366 | ␉$translate_options_cache{$path} = $options;␊ |
1367 | ␉return $options;␊ |
1368 | }␊ |
1369 | ␊ |
1370 | ␊ |
1371 | # Return the tag (or biggest set of tags) of a list which matches with the␊ |
1372 | # given path.␊ |
1373 | #␊ |
1374 | # The tag (or set of tags) is returned with its options.␊ |
1375 | #␊ |
1376 | # If no tags could match the path, undef is returned.␊ |
1377 | sub get_tag_from_list ($$$) {␊ |
1378 | ␉my ($self,$path,$list) = @_;␊ |
1379 | ␉if ($self->{options}{'caseinsensitive'}) {␊ |
1380 | ␉␉$path = lc $path;␊ |
1381 | ␉}␊ |
1382 | ␊ |
1383 | ␉while (1) {␊ |
1384 | ␉␉if (defined $list->{$path}) {␊ |
1385 | ␉␉␉return $list->{$path}.$path;␊ |
1386 | ␉␉}␊ |
1387 | ␉␉last unless ($path =~ m/</);␊ |
1388 | ␉␉$path =~ s/^<.*?>//;␊ |
1389 | ␉}␊ |
1390 | ␊ |
1391 | ␉return undef;␊ |
1392 | }␊ |
1393 | ␊ |
1394 | ␊ |
1395 | ␊ |
1396 | sub treat_content {␊ |
1397 | ␉my $self = shift;␊ |
1398 | ␉my $blank="";␊ |
1399 | ␉# Indicates if the paragraph will have to be translated␊ |
1400 | ␉my $translate = "";␊ |
1401 | ␊ |
1402 | ␉my ($eof,@paragraph)=$self->get_string_until('<',{remove=>1});␊ |
1403 | ␊ |
1404 | ␉while (!$eof and !$self->breaking_tag) {␊ |
1405 | ␉NEXT_TAG:␊ |
1406 | ␉␉my @text;␊ |
1407 | ␉␉my $type = $self->tag_type;␊ |
1408 | ␉␉my $f_extract = $tag_types[$type]->{'f_extract'};␊ |
1409 | ␉␉if ( defined($f_extract)␊ |
1410 | ␉␉ and $f_extract eq \&tag_extract_comment) {␊ |
1411 | ␉␉␉# Remove the content of the comments␊ |
1412 | ␉␉␉($eof, @text) = $self->extract_tag($type,1);␊ |
1413 | ␉␉␉$text[$#text-1] .= "\0";␊ |
1414 | ␉␉␉if ($tag_types[$type]->{'beginning'} eq "!--#") {␊ |
1415 | ␉␉␉␉$text[0] = "#".$text[0];␊ |
1416 | ␉␉␉}␊ |
1417 | ␉␉␉push @comments, @text;␊ |
1418 | ␉␉} else {␊ |
1419 | ␉␉␉my ($tmpeof, @tag) = $self->extract_tag($type,0);␊ |
1420 | ␉␉␉# Append the found inline tag␊ |
1421 | ␉␉␉($eof,@text)=$self->get_string_until('>',␊ |
1422 | ␉␉␉ {include=>1,␊ |
1423 | ␉␉␉ remove=>1,␊ |
1424 | ␉␉␉ unquoted=>1});␊ |
1425 | ␉␉␉# Append or remove the opening/closing tag from␊ |
1426 | ␉␉␉# the tag path␊ |
1427 | ␉␉␉if ($tag_types[$type]->{'end'} eq "") {␊ |
1428 | ␉␉␉␉if ($tag_types[$type]->{'beginning'} eq "") {␊ |
1429 | ␉␉␉␉␉# Opening inline tag␊ |
1430 | ␉␉␉␉␉my $cur_tag_name = $self->get_tag_name(@tag);␊ |
1431 | ␉␉␉␉␉my $t_opts = $self->get_translate_options($self->get_path($cur_tag_name));␊ |
1432 | ␉␉␉␉␉if ($t_opts =~ m/p/) {␊ |
1433 | ␉␉␉␉␉␉# We enter a new holder.␊ |
1434 | ␉␉␉␉␉␉# Append a <placeholder ...> tag to the current␊ |
1435 | ␉␉␉␉␉␉# paragraph, and save the @paragraph in the␊ |
1436 | ␉␉␉␉␉␉# current holder.␊ |
1437 | ␉␉␉␉␉␉my $last_holder = $save_holders[$#save_holders];␊ |
1438 | ␉␉␉␉␉␉my $placeholder_str = "<placeholder type=\"".$cur_tag_name."\" id=\"".($#{$last_holder->{'sub_translations'}}+1)."\"/>";␊ |
1439 | ␉␉␉␉␉␉push @paragraph, ($placeholder_str, $text[1]);␊ |
1440 | ␉␉␉␉␉␉my @saved_paragraph = @paragraph;␊ |
1441 | ␊ |
1442 | ␉␉␉␉␉␉$last_holder->{'paragraph'} = \@saved_paragraph;␊ |
1443 | ␊ |
1444 | ␉␉␉␉␉␉# Then we must push a new holder␊ |
1445 | ␉␉␉␉␉␉my @new_paragraph = ();␊ |
1446 | ␉␉␉␉␉␉my @sub_translations = ();␊ |
1447 | ␉␉␉␉␉␉my %folded_attributes;␊ |
1448 | ␉␉␉␉␉␉my %new_holder = ('paragraph' => \@new_paragraph,␊ |
1449 | ␉␉␉␉␉␉ 'open' => $self->join_lines(@text),␊ |
1450 | ␉␉␉␉␉␉ 'translation' => "",␊ |
1451 | ␉␉␉␉␉␉ 'close' => undef,␊ |
1452 | ␉␉␉␉␉␉ 'sub_translations' => \@sub_translations,␊ |
1453 | ␉␉␉␉␉␉ 'folded_attributes' => \%folded_attributes);␊ |
1454 | ␉␉␉␉␉␉push @save_holders, \%new_holder;␊ |
1455 | ␉␉␉␉␉␉@text = ();␊ |
1456 | ␊ |
1457 | ␉␉␉␉␉␉# The current @paragraph␊ |
1458 | ␉␉␉␉␉␉# (for the current holder)␊ |
1459 | ␉␉␉␉␉␉# is empty.␊ |
1460 | ␉␉␉␉␉␉@paragraph = ();␊ |
1461 | ␉␉␉␉␉} elsif ($t_opts =~ m/f/) {␊ |
1462 | ␉␉␉␉␉␉my $tag_full = $self->join_lines(@text);␊ |
1463 | ␉␉␉␉␉␉my $tag_ref = $text[1];␊ |
1464 | ␉␉␉␉␉␉if ($tag_full =~ m/^<\s*\S+\s+\S.*>$/s) {␊ |
1465 | ␉␉␉␉␉␉␉my $holder = $save_holders[$#save_holders];␊ |
1466 | ␉␉␉␉␉␉␉my $id = 0;␊ |
1467 | ␉␉␉␉␉␉␉foreach (keys %{$holder->{folded_attributes}}) {␊ |
1468 | ␉␉␉␉␉␉␉␉$id = $_ + 1 if ($_ >= $id);␊ |
1469 | ␉␉␉␉␉␉␉}␊ |
1470 | ␉␉␉␉␉␉␉$holder->{folded_attributes}->{$id} = $tag_full;␊ |
1471 | ␊ |
1472 | ␉␉␉␉␉␉␉@text = ("<$cur_tag_name po4a-id=$id>", $tag_ref);␊ |
1473 | ␉␉␉␉␉␉}␊ |
1474 | ␉␉␉␉␉}␊ |
1475 | ␉␉␉␉␉unless ($t_opts =~ m/n/) {␊ |
1476 | ␉␉␉␉␉␉push @path, $cur_tag_name;␊ |
1477 | ␉␉␉␉␉}␊ |
1478 | ␉␉␉␉} elsif ($tag_types[$type]->{'beginning'} eq "/") {␊ |
1479 | ␉␉␉␉␉# Closing inline tag␊ |
1480 | ␊ |
1481 | ␉␉␉␉␉# Check if this is closing the␊ |
1482 | ␉␉␉␉␉# last opening tag we detected.␊ |
1483 | ␉␉␉␉␉my $test = pop @path;␊ |
1484 | ␉␉␉␉␉my $name = $self->get_tag_name(@tag);␊ |
1485 | ␉␉␉␉␉if (!defined($test) ||␊ |
1486 | ␉␉␉␉␉ $test ne $name ) {␊ |
1487 | ␉␉␉␉␉␉my $ontagerror = $self->{options}{'ontagerror'};␊ |
1488 | ␉␉␉␉␉␉if ($ontagerror eq "warn") {␊ |
1489 | ␉␉␉␉␉␉␉warn wrap_ref_mod($tag[1], "po4a::xml", dgettext("po4a", "Unexpected closing tag </%s> found. The main document may be wrong. Continuing..."), $name);␊ |
1490 | ␉␉␉␉␉␉} elsif ($ontagerror ne "silent") {␊ |
1491 | ␉␉␉␉␉␉␉die wrap_ref_mod($tag[1], "po4a::xml", dgettext("po4a", "Unexpected closing tag </%s> found. The main document may be wrong."), $name);␊ |
1492 | ␉␉␉␉␉␉}␊ |
1493 | ␉␉␉␉␉}␊ |
1494 | ␊ |
1495 | ␉␉␉␉␉if ($self->get_translate_options($self->get_path($self->get_tag_name(@tag))) =~ m/p/) {␊ |
1496 | ␉␉␉␉␉␉# This closes the current holder.␊ |
1497 | ␊ |
1498 | ␉␉␉␉␉␉push @path, $self->get_tag_name(@tag);␊ |
1499 | ␉␉␉␉␉␉# Now translate this paragraph if needed.␊ |
1500 | ␉␉␉␉␉␉# This will call pushline and append the␊ |
1501 | ␉␉␉␉␉␉# translation to the current holder's translation.␊ |
1502 | ␉␉␉␉␉␉$self->translate_paragraph(@paragraph);␊ |
1503 | ␉␉␉␉␉␉pop @path;␊ |
1504 | ␊ |
1505 | ␉␉␉␉␉␉# Now that this holder is closed, we can remove␊ |
1506 | ␉␉␉␉␉␉# the holder from the stack.␊ |
1507 | ␉␉␉␉␉␉my $holder = pop @save_holders;␊ |
1508 | ␉␉␉␉␉␉# We need to keep the translation of this holder␊ |
1509 | ␉␉␉␉␉␉my $translation = $holder->{'open'}.$holder->{'translation'};␊ |
1510 | ␉␉␉␉␉␉$translation .= $self->join_lines(@text);␊ |
1511 | ␊ |
1512 | ␉␉␉␉␉␉@text = ();␊ |
1513 | ␊ |
1514 | ␉␉␉␉␉␉# Then we store the translation in the previous␊ |
1515 | ␉␉␉␉␉␉# holder's sub_translations array␊ |
1516 | ␉␉␉␉␉␉my $previous_holder = $save_holders[$#save_holders];␊ |
1517 | ␉␉␉␉␉␉push @{$previous_holder->{'sub_translations'}}, $translation;␊ |
1518 | ␉␉␉␉␉␉# We also need to restore the @paragraph array, as␊ |
1519 | ␉␉␉␉␉␉# it was before we encountered the holder.␊ |
1520 | ␉␉␉␉␉␉@paragraph = @{$previous_holder->{'paragraph'}};␊ |
1521 | ␉␉␉␉␉}␊ |
1522 | ␉␉␉␉}␊ |
1523 | ␉␉␉}␊ |
1524 | ␉␉␉push @paragraph, @text;␊ |
1525 | ␉␉}␊ |
1526 | ␊ |
1527 | ␉␉# Next tag␊ |
1528 | ␉␉($eof,@text)=$self->get_string_until('<',{remove=>1});␊ |
1529 | ␉␉if ($#text > 0) {␊ |
1530 | ␉␉␉# Check if text (extracted after the inline tag)␊ |
1531 | ␉␉␉# has to be translated␊ |
1532 | ␉␉␉push @paragraph, @text;␊ |
1533 | ␉␉}␊ |
1534 | ␉}␊ |
1535 | ␊ |
1536 | ␉# This strips the extracted strings␊ |
1537 | ␉# (only if you don't specify the 'nostrip' option, and if the␊ |
1538 | ␉# paragraph can be re-wrapped)␊ |
1539 | ␉$translate = $self->get_translate_options($self->get_path);␊ |
1540 | ␉if (!$self->{options}{'nostrip'} and $translate !~ m/W/) {␊ |
1541 | ␉␉my $clean = 0;␊ |
1542 | ␉␉# Clean the beginning␊ |
1543 | ␉␉while (!$clean and $#paragraph > 0) {␊ |
1544 | ␉␉␉$paragraph[0] =~ /^(\s*)(.*)/s;␊ |
1545 | ␉␉␉my $match = $1;␊ |
1546 | ␉␉␉if ($paragraph[0] eq $match) {␊ |
1547 | ␉␉␉␉if ($match ne "") {␊ |
1548 | ␉␉␉␉␉$self->pushline($match);␊ |
1549 | ␉␉␉␉}␊ |
1550 | ␉␉␉␉shift @paragraph;␊ |
1551 | ␉␉␉␉shift @paragraph;␊ |
1552 | ␉␉␉} else {␊ |
1553 | ␉␉␉␉$paragraph[0] = $2;␊ |
1554 | ␉␉␉␉if ($match ne "") {␊ |
1555 | ␉␉␉␉␉$self->pushline($match);␊ |
1556 | ␉␉␉␉}␊ |
1557 | ␉␉␉␉$clean = 1;␊ |
1558 | ␉␉␉}␊ |
1559 | ␉␉}␊ |
1560 | ␉␉$clean = 0;␊ |
1561 | ␉␉# Clean the end␊ |
1562 | ␉␉while (!$clean and $#paragraph > 0) {␊ |
1563 | ␉␉␉$paragraph[$#paragraph-1] =~ /^(.*?)(\s*)$/s;␊ |
1564 | ␉␉␉my $match = $2;␊ |
1565 | ␉␉␉if ($paragraph[$#paragraph-1] eq $match) {␊ |
1566 | ␉␉␉␉if ($match ne "") {␊ |
1567 | ␉␉␉␉␉$blank = $match.$blank;␊ |
1568 | ␉␉␉␉}␊ |
1569 | ␉␉␉␉pop @paragraph;␊ |
1570 | ␉␉␉␉pop @paragraph;␊ |
1571 | ␉␉␉} else {␊ |
1572 | ␉␉␉␉$paragraph[$#paragraph-1] = $1;␊ |
1573 | ␉␉␉␉if ($match ne "") {␊ |
1574 | ␉␉␉␉␉$blank = $match.$blank;␊ |
1575 | ␉␉␉␉}␊ |
1576 | ␉␉␉␉$clean = 1;␊ |
1577 | ␉␉␉}␊ |
1578 | ␉␉}␊ |
1579 | ␉}␊ |
1580 | ␊ |
1581 | ␉# Translate the string when needed␊ |
1582 | ␉# This will either push the translation in the translated document or␊ |
1583 | ␉# in the current holder translation.␊ |
1584 | ␉$self->translate_paragraph(@paragraph);␊ |
1585 | ␊ |
1586 | ␉# Push the trailing blanks␊ |
1587 | ␉if ($blank ne "") {␊ |
1588 | ␉␉$self->pushline($blank);␊ |
1589 | ␉}␊ |
1590 | ␉return $eof;␊ |
1591 | }␊ |
1592 | ␊ |
1593 | # Translate a @paragraph array of (string, reference).␊ |
1594 | # The $translate argument indicates if the strings must be translated or␊ |
1595 | # just pushed␊ |
1596 | sub translate_paragraph {␊ |
1597 | ␉my $self = shift;␊ |
1598 | ␉my @paragraph = @_;␊ |
1599 | ␉my $translate = $self->get_translate_options($self->get_path);␊ |
1600 | ␊ |
1601 | ␉while ( (scalar @paragraph)␊ |
1602 | ␉ and ($paragraph[0] =~ m/^\s*\n/s)) {␊ |
1603 | ␉␉$self->pushline($paragraph[0]);␊ |
1604 | ␉␉shift @paragraph;␊ |
1605 | ␉␉shift @paragraph;␊ |
1606 | ␉}␊ |
1607 | ␊ |
1608 | ␉my $comments;␊ |
1609 | ␉while (@comments) {␊ |
1610 | ␉␉my ($comment,$eoc);␊ |
1611 | ␉␉do {␊ |
1612 | ␉␉␉my ($t,$l) = (shift @comments, shift @comments);␊ |
1613 | ␉␉␉$t =~ s/\n?(\0)?$//;␊ |
1614 | ␉␉␉$eoc = $1;␊ |
1615 | ␉␉␉$comment .= "\n" if defined $comment;␊ |
1616 | ␉␉␉$comment .= $t;␊ |
1617 | ␉␉} until ($eoc);␊ |
1618 | ␉␉$comments .= "\n" if defined $comments;␊ |
1619 | ␉␉$comments .= $comment;␊ |
1620 | ␉␉$self->pushline("<!--".$comment."-->\n") if defined $comment;␊ |
1621 | ␉}␊ |
1622 | ␉@comments = ();␊ |
1623 | ␊ |
1624 | ␉if ($self->{options}{'cpp'}) {␊ |
1625 | ␉␉my @tmp = @paragraph;␊ |
1626 | ␉␉@paragraph = ();␊ |
1627 | ␉␉while (@tmp) {␊ |
1628 | ␉␉␉my ($t,$l) = (shift @tmp, shift @tmp);␊ |
1629 | ␉␉␉# #include can be followed by a filename between␊ |
1630 | ␉␉␉# <> brackets. In that case, the argument won't be␊ |
1631 | ␉␉␉# handled in the same call to translate_paragraph.␊ |
1632 | ␉␉␉# Thus do not try to match "include ".␊ |
1633 | ␉␉␉if ($t =~ m/^#[ \t]*(if |endif|undef |include|else|ifdef |ifndef |define )/si) {␊ |
1634 | ␉␉␉␉if (@paragraph) {␊ |
1635 | ␉␉␉␉␉$self->translate_paragraph(@paragraph);␊ |
1636 | ␉␉␉␉␉@paragraph = ();␊ |
1637 | ␉␉␉␉␉$self->pushline("\n");␊ |
1638 | ␉␉␉␉}␊ |
1639 | ␉␉␉␉$self->pushline($t);␊ |
1640 | ␉␉␉} else {␊ |
1641 | ␉␉␉␉push @paragraph, ($t,$l);␊ |
1642 | ␉␉␉}␊ |
1643 | ␉␉}␊ |
1644 | ␉}␊ |
1645 | ␊ |
1646 | ␉my $para = $self->join_lines(@paragraph);␊ |
1647 | ␉if ( length($para) > 0 ) {␊ |
1648 | ␉␉if ($translate ne "") {␊ |
1649 | ␉␉␉# This tag should be translated␊ |
1650 | ␉␉␉$self->pushline($self->found_string(␊ |
1651 | ␉␉␉␉$para,␊ |
1652 | ␉␉␉␉$paragraph[1], {␊ |
1653 | ␉␉␉␉␉type=>"tag",␊ |
1654 | ␉␉␉␉␉tag_options=>$translate,␊ |
1655 | ␉␉␉␉␉comments=>$comments␊ |
1656 | ␉␉␉␉}));␊ |
1657 | ␉␉} else {␊ |
1658 | ␉␉␉# Inform that this tag isn't translated in debug mode␊ |
1659 | ␉␉␉print wrap_ref_mod($paragraph[1], "po4a::xml", dgettext ("po4a", "Content of tag %s excluded: %s"), $self->get_path, $para)␊ |
1660 | ␉␉␉ if $self->debug();␊ |
1661 | ␉␉␉$self->pushline($self->recode_skipped_text($para));␊ |
1662 | ␉␉}␊ |
1663 | ␉}␊ |
1664 | ␉# Now the paragraph is fully translated.␊ |
1665 | ␉# If we have all the holders' translation, we can replace the␊ |
1666 | ␉# placeholders by their translations.␊ |
1667 | ␉# We must wait to have all the translations because the holders are␊ |
1668 | ␉# numbered.␊ |
1669 | ␉{␊ |
1670 | ␉␉my $holder = $save_holders[$#save_holders];␊ |
1671 | ␉␉my $translation = $holder->{'translation'};␊ |
1672 | ␊ |
1673 | ␉␉# Count the number of <placeholder ...> in $translation␊ |
1674 | ␉␉my $count = 0;␊ |
1675 | ␉␉my $str = $translation;␊ |
1676 | ␉␉while ( (defined $str)␊ |
1677 | ␉␉ and ($str =~ m/^.*?<placeholder\s+type="[^"]+"\s+id="(\d+)"\s*\/>(.*)$/s)) {␊ |
1678 | ␉␉␉$count += 1;␊ |
1679 | ␉␉␉$str = $2;␊ |
1680 | ␉␉␉if ($holder->{'sub_translations'}->[$1] =~ m/<placeholder\s+type="[^"]+"\s+id="(\d+)"\s*\/>/s) {␊ |
1681 | ␉␉␉␉$count = -1;␊ |
1682 | ␉␉␉␉last;␊ |
1683 | ␉␉␉}␊ |
1684 | ␉␉}␊ |
1685 | ␊ |
1686 | ␉␉if ( (defined $translation)␊ |
1687 | ␉␉ and (scalar(@{$holder->{'sub_translations'}}) == $count)) {␊ |
1688 | ␉␉␉# OK, all the holders of the current paragraph are␊ |
1689 | ␉␉␉# closed (and translated).␊ |
1690 | ␉␉␉# Replace them by their translation.␊ |
1691 | ␉␉␉while ($translation =~ m/^(.*?)<placeholder\s+type="[^"]+"\s+id="(\d+)"\s*\/>(.*)$/s) {␊ |
1692 | ␉␉␉␉# FIXME: we could also check that␊ |
1693 | ␉␉␉␉# * the holder exists␊ |
1694 | ␉␉␉␉# * all the holders are used␊ |
1695 | ␉␉␉␉$translation = $1.$holder->{'sub_translations'}->[$2].$3;␊ |
1696 | ␉␉␉}␊ |
1697 | ␉␉␉# We have our translation␊ |
1698 | ␉␉␉$holder->{'translation'} = $translation;␊ |
1699 | ␉␉␉# And there is no need for any holder in it.␊ |
1700 | ␉␉␉my @sub_translations = ();␊ |
1701 | ␉␉␉$holder->{'sub_translations'} = \@sub_translations;␊ |
1702 | ␉␉}␊ |
1703 | ␉}␊ |
1704 | ␊ |
1705 | }␊ |
1706 | ␊ |
1707 | ␊ |
1708 | ␊ |
1709 | =head2 WORKING WITH THE MODULE OPTIONS␊ |
1710 | ␊ |
1711 | =over 4␊ |
1712 | ␊ |
1713 | =item treat_options()␊ |
1714 | ␊ |
1715 | This function fills the internal structures that contain the tags, attributes␊ |
1716 | and inline data with the options of the module (specified in the command-line␊ |
1717 | or in the initialize function).␊ |
1718 | ␊ |
1719 | =back␊ |
1720 | ␊ |
1721 | =cut␊ |
1722 | ␊ |
1723 | sub treat_options {␊ |
1724 | ␉my $self = shift;␊ |
1725 | ␊ |
1726 | ␉if ($self->{options}{'caseinsensitive'}) {␊ |
1727 | ␉␉$self->{options}{'nodefault'} = lc $self->{options}{'nodefault'};␊ |
1728 | ␉␉$self->{options}{'tags'} = lc $self->{options}{'tags'};␊ |
1729 | ␉␉$self->{options}{'break'} = lc $self->{options}{'break'};␊ |
1730 | ␉␉$self->{options}{'_default_break'} = lc $self->{options}{'_default_break'};␊ |
1731 | ␉␉$self->{options}{'translated'} = lc $self->{options}{'translated'};␊ |
1732 | ␉␉$self->{options}{'_default_translated'} = lc $self->{options}{'_default_translated'};␊ |
1733 | ␉␉$self->{options}{'untranslated'} = lc $self->{options}{'untranslated'};␊ |
1734 | ␉␉$self->{options}{'_default_untranslated'} = lc $self->{options}{'_default_untranslated'};␊ |
1735 | ␉␉$self->{options}{'attributes'} = lc $self->{options}{'attributes'};␊ |
1736 | ␉␉$self->{options}{'_default_attributes'} = lc $self->{options}{'_default_attributes'};␊ |
1737 | ␉␉$self->{options}{'inline'} = lc $self->{options}{'inline'};␊ |
1738 | ␉␉$self->{options}{'_default_inline'} = lc $self->{options}{'_default_inline'};␊ |
1739 | ␉␉$self->{options}{'placeholder'} = lc $self->{options}{'placeholder'};␊ |
1740 | ␉␉$self->{options}{'_default_placeholder'} = lc $self->{options}{'_default_placeholder'};␊ |
1741 | ␉␉$self->{options}{'customtag'} = lc $self->{options}{'customtag'};␊ |
1742 | ␉␉$self->{options}{'_default_customtag'} = lc $self->{options}{'_default_customtag'};␊ |
1743 | ␉}␊ |
1744 | ␊ |
1745 | ␉$self->{options}{'nodefault'} =~ /^\s*(.*)\s*$/s;␊ |
1746 | ␉my %list_nodefault;␊ |
1747 | ␉foreach (split(/\s+/s,$1)) {␊ |
1748 | ␉␉$list_nodefault{$_} = 1;␊ |
1749 | ␉}␊ |
1750 | ␉$self->{nodefault} = \%list_nodefault;␊ |
1751 | ␊ |
1752 | ␉$self->{options}{'tags'} =~ /^\s*(.*)\s*$/s;␊ |
1753 | ␉if (length $self->{options}{'tags'}) {␊ |
1754 | ␉␉warn wrap_mod("po4a::xml",␊ |
1755 | ␉␉ dgettext("po4a",␊ |
1756 | ␉␉ "The '%s' option is deprecated. Please use the translated/untranslated and/or break/inline/placeholder categories."), "tags");␊ |
1757 | ␉}␊ |
1758 | ␉foreach (split(/\s+/s,$1)) {␊ |
1759 | ␉␉$_ =~ m/^(.*?)(<.*)$/;␊ |
1760 | ␉␉$self->{tags}->{$2} = $1 || "";␊ |
1761 | ␉}␊ |
1762 | ␊ |
1763 | ␉if ($self->{options}{'tagsonly'}) {␊ |
1764 | ␉␉warn wrap_mod("po4a::xml",␊ |
1765 | ␉␉ dgettext("po4a",␊ |
1766 | ␉␉ "The '%s' option is deprecated. Please use the translated/untranslated and/or break/inline/placeholder categories."), "tagsonly");␊ |
1767 | ␉}␊ |
1768 | ␊ |
1769 | ␉$self->{options}{'break'} =~ /^\s*(.*)\s*$/s;␊ |
1770 | ␉foreach my $tag (split(/\s+/s,$1)) {␊ |
1771 | ␉␉$tag =~ m/^(.*?)(<.*)$/;␊ |
1772 | ␉␉$self->{break}->{$2} = $1 || "";␊ |
1773 | ␉}␊ |
1774 | ␉$self->{options}{'_default_break'} =~ /^\s*(.*)\s*$/s;␊ |
1775 | ␉foreach my $tag (split(/\s+/s,$1)) {␊ |
1776 | ␉␉$tag =~ m/^(.*?)(<.*)$/;␊ |
1777 | ␉␉$self->{break}->{$2} = $1 || ""␊ |
1778 | ␉␉␉unless $list_nodefault{$2}␊ |
1779 | ␉␉␉ or defined $self->{break}->{$2};␊ |
1780 | ␉}␊ |
1781 | ␊ |
1782 | ␉$self->{options}{'translated'} =~ /^\s*(.*)\s*$/s;␊ |
1783 | ␉foreach my $tag (split(/\s+/s,$1)) {␊ |
1784 | ␉␉$tag =~ m/^(.*?)(<.*)$/;␊ |
1785 | ␉␉$self->{translated}->{$2} = $1 || "";␊ |
1786 | ␉}␊ |
1787 | ␉$self->{options}{'_default_translated'} =~ /^\s*(.*)\s*$/s;␊ |
1788 | ␉foreach my $tag (split(/\s+/s,$1)) {␊ |
1789 | ␉␉$tag =~ m/^(.*?)(<.*)$/;␊ |
1790 | ␉␉$self->{translated}->{$2} = $1 || ""␊ |
1791 | ␉␉␉unless $list_nodefault{$2}␊ |
1792 | ␉␉␉ or defined $self->{translated}->{$2};␊ |
1793 | ␉}␊ |
1794 | ␊ |
1795 | ␉$self->{options}{'untranslated'} =~ /^\s*(.*)\s*$/s;␊ |
1796 | ␉foreach my $tag (split(/\s+/s,$1)) {␊ |
1797 | ␉␉$tag =~ m/^(.*?)(<.*)$/;␊ |
1798 | ␉␉$self->{untranslated}->{$2} = $1 || "";␊ |
1799 | ␉}␊ |
1800 | ␉$self->{options}{'_default_untranslated'} =~ /^\s*(.*)\s*$/s;␊ |
1801 | ␉foreach my $tag (split(/\s+/s,$1)) {␊ |
1802 | ␉␉$tag =~ m/^(.*?)(<.*)$/;␊ |
1803 | ␉␉$self->{untranslated}->{$2} = $1 || ""␊ |
1804 | ␉␉␉unless $list_nodefault{$2}␊ |
1805 | ␉␉␉ or defined $self->{untranslated}->{$2};␊ |
1806 | ␉}␊ |
1807 | ␊ |
1808 | ␉$self->{options}{'attributes'} =~ /^\s*(.*)\s*$/s;␊ |
1809 | ␉foreach my $tag (split(/\s+/s,$1)) {␊ |
1810 | ␉␉if ($tag =~ m/^(.*?)(<.*)$/) {␊ |
1811 | ␉␉␉$self->{attributes}->{$2} = $1 || "";␊ |
1812 | ␉␉} else {␊ |
1813 | ␉␉␉$self->{attributes}->{$tag} = "";␊ |
1814 | ␉␉}␊ |
1815 | ␉}␊ |
1816 | ␉$self->{options}{'_default_attributes'} =~ /^\s*(.*)\s*$/s;␊ |
1817 | ␉foreach my $tag (split(/\s+/s,$1)) {␊ |
1818 | ␉␉if ($tag =~ m/^(.*?)(<.*)$/) {␊ |
1819 | ␉␉␉$self->{attributes}->{$2} = $1 || ""␊ |
1820 | ␉␉␉␉unless $list_nodefault{$2}␊ |
1821 | ␉␉␉␉ or defined $self->{attributes}->{$2};␊ |
1822 | ␉␉} else {␊ |
1823 | ␉␉␉$self->{attributes}->{$tag} = ""␊ |
1824 | ␉␉␉␉unless $list_nodefault{$tag}␊ |
1825 | ␉␉␉␉ or defined $self->{attributes}->{$tag};␊ |
1826 | ␉␉}␊ |
1827 | ␉}␊ |
1828 | ␊ |
1829 | ␉$self->{options}{'inline'} =~ /^\s*(.*)\s*$/s;␊ |
1830 | ␉foreach my $tag (split(/\s+/s,$1)) {␊ |
1831 | ␉␉$tag =~ m/^(.*?)(<.*)$/;␊ |
1832 | ␉␉$self->{inline}->{$2} = $1 || "";␊ |
1833 | ␉}␊ |
1834 | ␉$self->{options}{'_default_inline'} =~ /^\s*(.*)\s*$/s;␊ |
1835 | ␉foreach my $tag (split(/\s+/s,$1)) {␊ |
1836 | ␉␉$tag =~ m/^(.*?)(<.*)$/;␊ |
1837 | ␉␉$self->{inline}->{$2} = $1 || ""␊ |
1838 | ␉␉␉unless $list_nodefault{$2}␊ |
1839 | ␉␉␉ or defined $self->{inline}->{$2};␊ |
1840 | ␉}␊ |
1841 | ␊ |
1842 | ␉$self->{options}{'placeholder'} =~ /^\s*(.*)\s*$/s;␊ |
1843 | ␉foreach my $tag (split(/\s+/s,$1)) {␊ |
1844 | ␉␉$tag =~ m/^(.*?)(<.*)$/;␊ |
1845 | ␉␉$self->{placeholder}->{$2} = $1 || "";␊ |
1846 | ␉}␊ |
1847 | ␉$self->{options}{'_default_placeholder'} =~ /^\s*(.*)\s*$/s;␊ |
1848 | ␉foreach my $tag (split(/\s+/s,$1)) {␊ |
1849 | ␉␉$tag =~ m/^(.*?)(<.*)$/;␊ |
1850 | ␉␉$self->{placeholder}->{$2} = $1 || ""␊ |
1851 | ␉␉␉unless $list_nodefault{$2}␊ |
1852 | ␉␉␉ or defined $self->{placeholder}->{$2};␊ |
1853 | ␉}␊ |
1854 | ␊ |
1855 | ␉$self->{options}{'customtag'} =~ /^\s*(.*)\s*$/s;␊ |
1856 | ␉foreach my $tag (split(/\s+/s,$1)) {␊ |
1857 | ␉␉$tag =~ m/^(.*?)(<.*)$/;␊ |
1858 | ␉␉$self->{customtag}->{$2} = $1 || "";␊ |
1859 | ␉}␊ |
1860 | ␉$self->{options}{'_default_customtag'} =~ /^\s*(.*)\s*$/s;␊ |
1861 | ␉foreach my $tag (split(/\s+/s,$1)) {␊ |
1862 | ␉␉$tag =~ m/^(.*?)(<.*)$/;␊ |
1863 | ␉␉$self->{customtag}->{$2} = $1 || ""␊ |
1864 | ␉␉␉unless $list_nodefault{$2}␊ |
1865 | ␉␉␉ or defined $self->{customtag}->{$2};␊ |
1866 | ␉}␊ |
1867 | ␊ |
1868 | ␉# There should be no translated and untranslated tags␊ |
1869 | ␉foreach my $tag (keys %{$self->{translated}}) {␊ |
1870 | ␉␉die wrap_mod("po4a::xml",␊ |
1871 | ␉␉ dgettext("po4a",␊ |
1872 | ␉␉ "Tag '%s' both in the %s and %s categories."), $tag, "translated", "untranslated")␊ |
1873 | ␉␉␉if defined $self->{untranslated}->{$tag};␊ |
1874 | ␉}␊ |
1875 | ␉# There should be no inline, break, placeholder, and customtag tags␊ |
1876 | ␉foreach my $tag (keys %{$self->{inline}}) {␊ |
1877 | ␉␉die wrap_mod("po4a::xml",␊ |
1878 | ␉␉ dgettext("po4a",␊ |
1879 | ␉␉ "Tag '%s' both in the %s and %s categories."), $tag, "inline", "break")␊ |
1880 | ␉␉␉if defined $self->{break}->{$tag};␊ |
1881 | ␉␉die wrap_mod("po4a::xml",␊ |
1882 | ␉␉ dgettext("po4a",␊ |
1883 | ␉␉ "Tag '%s' both in the %s and %s categories."), $tag, "inline", "placeholder")␊ |
1884 | ␉␉␉if defined $self->{placeholder}->{$tag};␊ |
1885 | ␉␉die wrap_mod("po4a::xml",␊ |
1886 | ␉␉ dgettext("po4a",␊ |
1887 | ␉␉ "Tag '%s' both in the %s and %s categories."), $tag, "inline", "customtag")␊ |
1888 | ␉␉␉if defined $self->{customtag}->{$tag};␊ |
1889 | ␉}␊ |
1890 | ␉foreach my $tag (keys %{$self->{break}}) {␊ |
1891 | ␉␉die wrap_mod("po4a::xml",␊ |
1892 | ␉␉ dgettext("po4a",␊ |
1893 | ␉␉ "Tag '%s' both in the %s and %s categories."), $tag, "break", "placeholder")␊ |
1894 | ␉␉␉if defined $self->{placeholder}->{$tag};␊ |
1895 | ␉␉die wrap_mod("po4a::xml",␊ |
1896 | ␉␉ dgettext("po4a",␊ |
1897 | ␉␉ "Tag '%s' both in the %s and %s categories."), $tag, "break", "customtag")␊ |
1898 | ␉␉␉if defined $self->{customtag}->{$tag};␊ |
1899 | ␉}␊ |
1900 | ␉foreach my $tag (keys %{$self->{placeholder}}) {␊ |
1901 | ␉␉die wrap_mod("po4a::xml",␊ |
1902 | ␉␉ dgettext("po4a",␊ |
1903 | ␉␉ "Tag '%s' both in the %s and %s categories."), $tag, "placeholder", "customtag")␊ |
1904 | ␉␉␉if defined $self->{customtag}->{$tag};␊ |
1905 | ␉}␊ |
1906 | }␊ |
1907 | ␊ |
1908 | =head2 GETTING TEXT FROM THE INPUT DOCUMENT␊ |
1909 | ␊ |
1910 | =over␊ |
1911 | ␊ |
1912 | =item get_string_until($%)␊ |
1913 | ␊ |
1914 | This function returns an array with the lines (and references) from the input␊ |
1915 | document until it finds the first argument. The second argument is an options␊ |
1916 | hash. Value 0 means disabled (the default) and 1, enabled.␊ |
1917 | ␊ |
1918 | The valid options are:␊ |
1919 | ␊ |
1920 | =over 4␊ |
1921 | ␊ |
1922 | =item B<include>␊ |
1923 | ␊ |
1924 | This makes the returned array to contain the searched text␊ |
1925 | ␊ |
1926 | =item B<remove>␊ |
1927 | ␊ |
1928 | This removes the returned stream from the input␊ |
1929 | ␊ |
1930 | =item B<unquoted>␊ |
1931 | ␊ |
1932 | This ensures that the searched text is outside any quotes␊ |
1933 | ␊ |
1934 | =back␊ |
1935 | ␊ |
1936 | =cut␊ |
1937 | ␊ |
1938 | sub get_string_until {␊ |
1939 | ␉my ($self,$search) = (shift,shift);␊ |
1940 | ␉my $options = shift;␊ |
1941 | ␉my ($include,$remove,$unquoted, $regex) = (0,0,0,0);␊ |
1942 | ␊ |
1943 | ␉if (defined($options->{include})) { $include = $options->{include}; }␊ |
1944 | ␉if (defined($options->{remove})) { $remove = $options->{remove}; }␊ |
1945 | ␉if (defined($options->{unquoted})) { $unquoted = $options->{unquoted}; }␊ |
1946 | ␉if (defined($options->{regex})) { $regex = $options->{regex}; }␊ |
1947 | ␊ |
1948 | ␉my ($line,$ref) = $self->shiftline();␊ |
1949 | ␉my (@text,$paragraph);␊ |
1950 | ␉my ($eof,$found) = (0,0);␊ |
1951 | ␊ |
1952 | ␉$search = "\Q$search\E" unless $regex;␊ |
1953 | ␉while (defined($line) and !$found) {␊ |
1954 | ␉␉push @text, ($line,$ref);␊ |
1955 | ␉␉$paragraph .= $line;␊ |
1956 | ␉␉if ($unquoted) {␊ |
1957 | ␉␉␉if ( $paragraph =~ /^((\".*?\")|(\'.*?\')|[^\"\'])*$search/s ) {␊ |
1958 | ␉␉␉␉$found = 1;␊ |
1959 | ␉␉␉}␊ |
1960 | ␉␉} else {␊ |
1961 | ␉␉␉if ( $paragraph =~ /$search/s ) {␊ |
1962 | ␉␉␉␉$found = 1;␊ |
1963 | ␉␉␉}␊ |
1964 | ␉␉}␊ |
1965 | ␉␉if (!$found) {␊ |
1966 | ␉␉␉($line,$ref)=$self->shiftline();␊ |
1967 | ␉␉}␊ |
1968 | ␉}␊ |
1969 | ␊ |
1970 | ␉if (!defined($line)) { $eof = 1; }␊ |
1971 | ␊ |
1972 | ␉if ( $found ) {␊ |
1973 | ␉␉$line = "";␊ |
1974 | ␉␉if($unquoted) {␊ |
1975 | ␉␉␉$paragraph =~ /^(?:(?:\".*?\")|(?:\'.*?\')|[^\"\'])*?$search(.*)$/s;␊ |
1976 | ␉␉␉$line = $1;␊ |
1977 | ␉␉␉$text[$#text-1] =~ s/\Q$line\E$//s;␊ |
1978 | ␉␉} else {␊ |
1979 | ␉␉␉$paragraph =~ /$search(.*)$/s;␊ |
1980 | ␉␉␉$line = $1;␊ |
1981 | ␉␉␉$text[$#text-1] =~ s/\Q$line\E$//s;␊ |
1982 | ␉␉}␊ |
1983 | ␉␉if(!$include) {␊ |
1984 | ␉␉␉$text[$#text-1] =~ /^(.*)($search.*)$/s;␊ |
1985 | ␉␉␉$text[$#text-1] = $1;␊ |
1986 | ␉␉␉$line = $2.$line;␊ |
1987 | ␉␉}␊ |
1988 | ␉␉if (defined($line) and ($line ne "")) {␊ |
1989 | ␉␉␉$self->unshiftline ($line,$text[$#text]);␊ |
1990 | ␉␉}␊ |
1991 | ␉}␊ |
1992 | ␉if (!$remove) {␊ |
1993 | ␉␉$self->unshiftline (@text);␊ |
1994 | ␉}␊ |
1995 | ␊ |
1996 | ␉#If we get to the end of the file, we return the whole paragraph␊ |
1997 | ␉return ($eof,@text);␊ |
1998 | }␊ |
1999 | ␊ |
2000 | =item skip_spaces(\@)␊ |
2001 | ␊ |
2002 | This function receives as argument the reference to a paragraph (in the format␊ |
2003 | returned by get_string_until), skips his heading spaces and returns them as␊ |
2004 | a simple string.␊ |
2005 | ␊ |
2006 | =cut␊ |
2007 | ␊ |
2008 | sub skip_spaces {␊ |
2009 | ␉my ($self,$pstring)=@_;␊ |
2010 | ␉my $space="";␊ |
2011 | ␊ |
2012 | ␉while (@$pstring and (@$pstring[0] =~ /^(\s+)(.*)$/s or @$pstring[0] eq "")) {␊ |
2013 | ␉␉if (@$pstring[0] ne "") {␊ |
2014 | ␉␉␉$space .= $1;␊ |
2015 | ␉␉␉@$pstring[0] = $2;␊ |
2016 | ␉␉}␊ |
2017 | ␊ |
2018 | ␉␉if (@$pstring[0] eq "") {␊ |
2019 | ␉␉␉shift @$pstring;␊ |
2020 | ␉␉␉shift @$pstring;␊ |
2021 | ␉␉}␊ |
2022 | ␉}␊ |
2023 | ␉return $space;␊ |
2024 | }␊ |
2025 | ␊ |
2026 | =item join_lines(@)␊ |
2027 | ␊ |
2028 | This function returns a simple string with the text from the argument array␊ |
2029 | (discarding the references).␊ |
2030 | ␊ |
2031 | =cut␊ |
2032 | ␊ |
2033 | sub join_lines {␊ |
2034 | ␉my ($self,@lines)=@_;␊ |
2035 | ␉my ($line,$ref);␊ |
2036 | ␉my $text = "";␊ |
2037 | ␉while ($#lines > 0) {␊ |
2038 | ␉␉($line,$ref) = (shift @lines,shift @lines);␊ |
2039 | ␉␉$text .= $line;␊ |
2040 | ␉}␊ |
2041 | ␉return $text;␊ |
2042 | }␊ |
2043 | ␊ |
2044 | =back␊ |
2045 | ␊ |
2046 | =head1 STATUS OF THIS MODULE␊ |
2047 | ␊ |
2048 | This module can translate tags and attributes.␊ |
2049 | ␊ |
2050 | =head1 TODO LIST␊ |
2051 | ␊ |
2052 | DOCTYPE (ENTITIES)␊ |
2053 | ␊ |
2054 | There is a minimal support for the translation of entities. They are␊ |
2055 | translated as a whole, and tags are not taken into account. Multilines␊ |
2056 | entities are not supported and entities are always rewrapped during the␊ |
2057 | translation.␊ |
2058 | ␊ |
2059 | MODIFY TAG TYPES FROM INHERITED MODULES␊ |
2060 | (move the tag_types structure inside the $self hash?)␊ |
2061 | ␊ |
2062 | =head1 SEE ALSO␊ |
2063 | ␊ |
2064 | L<Locale::Po4a::TransTractor(3pm)|Locale::Po4a::TransTractor>,␊ |
2065 | L<po4a(7)|po4a.7>␊ |
2066 | ␊ |
2067 | =head1 AUTHORS␊ |
2068 | ␊ |
2069 | Jordi Vilalta <jvprat@gmail.com>␊ |
2070 | Nicolas François <nicolas.francois@centraliens.net>␊ |
2071 | ␊ |
2072 | =head1 COPYRIGHT AND LICENSE␊ |
2073 | ␊ |
2074 | Copyright (c) 2004 by Jordi Vilalta <jvprat@gmail.com>␊ |
2075 | Copyright (c) 2008-2009 by Nicolas François <nicolas.francois@centraliens.net>␊ |
2076 | ␊ |
2077 | This program is free software; you may redistribute it and/or modify it␊ |
2078 | under the terms of GPL (see the COPYING file).␊ |
2079 | ␊ |
2080 | =cut␊ |
2081 | ␊ |
2082 | 1;␊ |
2083 |