1 | #!/usr/bin/perl -w␊ |
2 | ␊ |
3 | # Copyright (c) 2004, 2005 by Nicolas FRANÇOIS <nicolas.francois@centraliens.net>␊ |
4 | #␊ |
5 | # This file is part of po4a.␊ |
6 | #␊ |
7 | # This program is free software; you can redistribute it and/or modify␊ |
8 | # it under the terms of the GNU General Public License as published by␊ |
9 | # the Free Software Foundation; either version 2 of the License, or␊ |
10 | # (at your option) any later version.␊ |
11 | #␊ |
12 | # This program is distributed in the hope that it will be useful,␊ |
13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of␊ |
14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the␊ |
15 | # GNU General Public License for more details.␊ |
16 | #␊ |
17 | # You should have received a copy of the GNU General Public License␊ |
18 | # along with po4a; if not, write to the Free Software␊ |
19 | # Foundation, Inc.,␊ |
20 | # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA␊ |
21 | #␊ |
22 | ########################################################################␊ |
23 | ␊ |
24 | =encoding UTF-8␊ |
25 | ␊ |
26 | =head1 NAME␊ |
27 | ␊ |
28 | Locale::Po4a::TeX - convert TeX documents and derivates from/to PO files␊ |
29 | ␊ |
30 | =head1 DESCRIPTION␊ |
31 | ␊ |
32 | The po4a (PO for anything) project goal is to ease translations (and more␊ |
33 | interestingly, the maintenance of translations) using gettext tools on␊ |
34 | areas where they were not expected like documentation.␊ |
35 | ␊ |
36 | Locale::Po4a::TeX is a module to help the translation of TeX documents into␊ |
37 | other [human] languages. It can also be used as a base to build modules for␊ |
38 | TeX-based documents.␊ |
39 | ␊ |
40 | Users should probably use the LaTeX module, which inherite from the TeX module␊ |
41 | and contains the definitions of common LaTeX commands.␊ |
42 | ␊ |
43 | =head1 TRANSLATING WITH PO4A::TEX␊ |
44 | ␊ |
45 | This module can be used directly to handle generic TeX documents.␊ |
46 | This will split your document in smaller blocks (paragraphs, verbatim␊ |
47 | blocks, or even smaller like titles or indexes).␊ |
48 | ␊ |
49 | There are some options (described in the next section) that can customize␊ |
50 | this behavior. If this doesn't fit to your document format you're encouraged␊ |
51 | to write your own module derived from this, to describe your format's details.␊ |
52 | See the section B<WRITING DERIVATE MODULES> below, for the process description.␊ |
53 | ␊ |
54 | This module can also be customized by lines starting with "% po4a:" in the␊ |
55 | TeX file.␊ |
56 | These customizations are described in the B<INLINE CUSTOMIZATION> section.␊ |
57 | ␊ |
58 | =head1 OPTIONS ACCEPTED BY THIS MODULE␊ |
59 | ␊ |
60 | These are this module's particular options:␊ |
61 | ␊ |
62 | =over 4␊ |
63 | ␊ |
64 | =cut␊ |
65 | ␊ |
66 | package Locale::Po4a::TeX;␊ |
67 | ␊ |
68 | use 5.006;␊ |
69 | use strict;␊ |
70 | use warnings;␊ |
71 | ␊ |
72 | require Exporter;␊ |
73 | use vars qw(@ISA @EXPORT);␊ |
74 | @ISA = qw(Locale::Po4a::TransTractor);␊ |
75 | @EXPORT = qw(%commands %environments␊ |
76 | $RE_ESCAPE $ESCAPE $RE_VERBATIM␊ |
77 | $no_wrap_environments␊ |
78 | $verbatim_environments␊ |
79 | %separated_command␊ |
80 | %separated_environment␊ |
81 | %translate_buffer_env␊ |
82 | &generic_command␊ |
83 | ®ister_generic_command␊ |
84 | ®ister_generic_environment);␊ |
85 | ␊ |
86 | use Locale::Po4a::TransTractor;␊ |
87 | use Locale::Po4a::Common;␊ |
88 | use File::Basename qw(dirname);␊ |
89 | use Carp qw(croak);␊ |
90 | ␊ |
91 | use Encode;␊ |
92 | use Encode::Guess;␊ |
93 | ␊ |
94 | # hash of known commands and environments, with parsing sub.␊ |
95 | # See end of this file␊ |
96 | use vars qw(%commands %environments);␊ |
97 | # hash to describe the number of parameters and which one have to be␊ |
98 | # translated. Used by generic commands␊ |
99 | our %command_parameters = ();␊ |
100 | our %environment_parameters = ();␊ |
101 | # hash to describe the separators of environments.␊ |
102 | our %env_separators =();␊ |
103 | ␊ |
104 | # The escape character used to introduce commands.␊ |
105 | our $RE_ESCAPE = "\\\\";␊ |
106 | our $ESCAPE = "\\";␊ |
107 | # match the beginning of a verbatim block␊ |
108 | our $RE_VERBATIM = "\\\\begin\\{(?:verbatim)\\*?\\}";␊ |
109 | # match the beginning of a comment.␊ |
110 | # NOTE: It must contain a group, with chars preceding the comment␊ |
111 | our $RE_PRE_COMMENT= "(?<!\\\\)(?:\\\\\\\\)*";␊ |
112 | our $RE_COMMENT= "\\\%";␊ |
113 | ␊ |
114 | # Space separated list of environments that should not be re-wrapped.␊ |
115 | our $no_wrap_environments = "verbatim";␊ |
116 | our $verbatim_environments = "verbatim";␊ |
117 | # hash with the commands that have to be separated (or have to be joined).␊ |
118 | # 3 modes are currently used:␊ |
119 | # '*' The command is separated if it appear at an extremity of a␊ |
120 | # paragraph␊ |
121 | # '+' The command is separated, but its arguments are joined together␊ |
122 | # with the command name for the translation␊ |
123 | # '-' The command is not separated, unless it appear alone on a paragraph␊ |
124 | # (e.g. \strong)␊ |
125 | our %separated_command = ();␊ |
126 | our %separated_environment = ();␊ |
127 | ␊ |
128 | =item B<debug>␊ |
129 | ␊ |
130 | Activate debugging for some internal mechanisms of this module.␊ |
131 | Use the source to see which parts can be debugged.␊ |
132 | ␊ |
133 | =item B<no_wrap>␊ |
134 | ␊ |
135 | Comma-separated list of environments which should not be re-wrapped.␊ |
136 | ␊ |
137 | Note that there is a difference between verbatim and no_wrap environments.␊ |
138 | There is no command and comments analysis in verbatim blocks.␊ |
139 | ␊ |
140 | If this environment was not already registered, po4a will consider that␊ |
141 | this environment does not take any parameters.␊ |
142 | ␊ |
143 | =item B<exclude_include>␊ |
144 | ␊ |
145 | Colon-separated list of files that should not be included by \input and␊ |
146 | \include.␊ |
147 | ␊ |
148 | =item B<definitions>␊ |
149 | ␊ |
150 | The name of a file containing definitions for po4a, as defined in the␊ |
151 | B<INLINE CUSTOMIZATION> section.␊ |
152 | You can use this option if it is not possible to put the definitions in␊ |
153 | the document being translated.␊ |
154 | ␊ |
155 | =item B<verbatim>␊ |
156 | ␊ |
157 | Comma-separated list of environments which should be taken as verbatim.␊ |
158 | ␊ |
159 | If this environment was not already registered, po4a will consider that␊ |
160 | this environment does not take any parameters.␊ |
161 | ␊ |
162 | =back␊ |
163 | ␊ |
164 | Using these options permits to override the behaviour of the commands defined␊ |
165 | in the default lists.␊ |
166 | ␊ |
167 | =head1 INLINE CUSTOMIZATION␊ |
168 | ␊ |
169 | The TeX module can be customized with lines starting by B<% po4a:>.␊ |
170 | These lines are interpreted as commands to the parser.␊ |
171 | The following commands are recognized:␊ |
172 | ␊ |
173 | =over 4␊ |
174 | ␊ |
175 | =item B<% po4a: command> I<command1> B<alias> I<command2>␊ |
176 | ␊ |
177 | Indicates that the arguments of the I<command1> command should be␊ |
178 | treated as the arguments of the I<command2> command.␊ |
179 | ␊ |
180 | =item B<% po4a: command> I<command1> I<parameters>␊ |
181 | ␊ |
182 | This permit to describe in detail the parameters of the I<command1>␊ |
183 | command.␊ |
184 | This information will be used to check the number of arguments and their␊ |
185 | types.␊ |
186 | ␊ |
187 | You can precede the I<command1> command by␊ |
188 | ␊ |
189 | =over 4␊ |
190 | ␊ |
191 | =item an asterisk (B<*>)␊ |
192 | ␊ |
193 | po4a will extract this command from paragraphs (if it is located at␊ |
194 | the beginning or the end of a paragraph).␊ |
195 | The translators will then have to translate the parameters that are marked␊ |
196 | as translatable.␊ |
197 | ␊ |
198 | =item a plus (B<+>)␊ |
199 | ␊ |
200 | As for an asterisk, the command will be extracted if it appear at an␊ |
201 | extremity of a block, but the parameters won't be translated separately.␊ |
202 | The translator will have to translate the command concatenated to all its␊ |
203 | parameters.␊ |
204 | This permits to keep more context, and is useful for commands with small␊ |
205 | words in parameter, which can have multiple meanings (and translations).␊ |
206 | ␊ |
207 | Note: In this case you don't have to specify which parameters are␊ |
208 | translatable, but po4a must know the type and number of parameters.␊ |
209 | ␊ |
210 | =item a minus (B<->)␊ |
211 | ␊ |
212 | In this case, the command won't be extracted from any block.␊ |
213 | But if it appears alone on a block, then only the parameters marked as␊ |
214 | translatable will be presented to the translator.␊ |
215 | This is useful for font commands. These commands should generally not be␊ |
216 | separated from their paragraph (to keep the context), but there is no␊ |
217 | reason to annoy the translator with them if a whole string is enclosed in␊ |
218 | such a command.␊ |
219 | ␊ |
220 | =back␊ |
221 | ␊ |
222 | The I<parameters> argument is a set of [] (to indicate an optional␊ |
223 | argument) or {} (to indicate a mandatory argument).␊ |
224 | You can place an underscore (_) between these brackets to indicate that␊ |
225 | the parameter must be translated. For example:␊ |
226 | % po4a: command *chapter [_]{_}␊ |
227 | ␊ |
228 | This indicates that the chapter command has two parameters: an optional␊ |
229 | (short title) and a mandatory one, which must both be translated.␊ |
230 | If you want to specify that the href command has two mandatory parameters,␊ |
231 | that you don't want to translate the URL (first parameter), and that you␊ |
232 | don't want this command to be separated from its paragraph (which allow␊ |
233 | the translator to move the link in the sentence), you can use:␊ |
234 | % po4a: command -href {}{_}␊ |
235 | ␊ |
236 | In this case, the information indicating which arguments must be␊ |
237 | translated is only used if a paragraph is only composed of this href␊ |
238 | command.␊ |
239 | ␊ |
240 | =item B<% po4a: environment> I<env> I<parameters>␊ |
241 | ␊ |
242 | This permits to define the parameters accepted by the I<env> environment.␊ |
243 | This information is latter used to check the number of arguments of the␊ |
244 | \begin command, and permit to specify which one must be translated.␊ |
245 | The syntax of the I<parameters> argument is the same as described for the␊ |
246 | commands.␊ |
247 | The first parameter of the \begin command is the name of the environment.␊ |
248 | This parameter must not be specified in the list of parameters. Here are␊ |
249 | some examples:␊ |
250 | % po4a: environment multicols {}␊ |
251 | % po4a: environment equation␊ |
252 | ␊ |
253 | As for the commands, I<env> can be preceded by a plus (+) to indicate␊ |
254 | that the \begin command must be translated with all its arguments.␊ |
255 | ␊ |
256 | =item B<% po4a: separator> I<env> B<">I<regex>B<">␊ |
257 | ␊ |
258 | Indicates that an environment should be split according to the given␊ |
259 | regular expression.␊ |
260 | ␊ |
261 | The regular expression is delimited by quotes.␊ |
262 | It should not create any backreference.␊ |
263 | You should use (?:) if you need a group.␊ |
264 | It may also need some escapes.␊ |
265 | ␊ |
266 | For example, the LaTeX module uses the "(?:&|\\\\)" regular expression to␊ |
267 | translate separately each cell of a table (lines are separated by '\\' and␊ |
268 | cells by '&').␊ |
269 | ␊ |
270 | The notion of environment is expended to the type displayed in the PO file.␊ |
271 | This can be used to split on "\\\\" in the first mandatory argument of the␊ |
272 | title command. In this case, the environment is title{#1}.␊ |
273 | ␊ |
274 | =item B<% po4a: verbatim environment> I<env>␊ |
275 | ␊ |
276 | Indicate that I<env> is a verbatim environment.␊ |
277 | Comments and commands will be ignored in this environment.␊ |
278 | ␊ |
279 | If this environment was not already registered, po4a will consider that␊ |
280 | this environment does not take any parameters.␊ |
281 | ␊ |
282 | =back␊ |
283 | ␊ |
284 | =cut␊ |
285 | ␊ |
286 | # Directory name of the main file.␊ |
287 | # It is the directory where included files will be searched.␊ |
288 | # See read_file.␊ |
289 | my $my_dirname;␊ |
290 | ␊ |
291 | # Array of files that should not be included by read_file.␊ |
292 | # See read_file.␊ |
293 | our @exclude_include;␊ |
294 | ␊ |
295 | my %type_end=('{'=>'}', '['=>']', ' '=>'');␊ |
296 | ␊ |
297 | #########################␊ |
298 | #### DEBUGGING STUFF ####␊ |
299 | #########################␊ |
300 | my %debug=('pretrans' => 0, # see pre-conditioning of translation␊ |
301 | 'postrans' => 0, # see post-conditioning of translation␊ |
302 | 'translate' => 0, # see translation␊ |
303 | 'extract_commands' => 0, # see commands extraction␊ |
304 | 'commands' => 0, # see command subroutines␊ |
305 | 'environments' => 0, # see environment subroutines␊ |
306 | 'translate_buffer' => 0 # see buffer translation␊ |
307 | );␊ |
308 | ␊ |
309 | =head1 WRITING DERIVATE MODULES␊ |
310 | ␊ |
311 | =over 4␊ |
312 | ␊ |
313 | =item B<pre_trans>␊ |
314 | ␊ |
315 | =cut␊ |
316 | ␊ |
317 | sub pre_trans {␊ |
318 | my ($self,$str,$ref,$type)=@_;␊ |
319 | # Preformatting, so that translators don't see␊ |
320 | # strange chars␊ |
321 | my $origstr=$str;␊ |
322 | print STDERR "pre_trans($str)="␊ |
323 | if ($debug{'pretrans'});␊ |
324 | ␊ |
325 | # Accentuated characters␊ |
326 | # FIXME: only do this if the encoding is UTF-8?␊ |
327 | # $str =~ s/${RE_ESCAPE}`a/à/g;␊ |
328 | ## $str =~ s/${RE_ESCAPE}c{c}/ç/g; # not in texinfo: @,{c}␊ |
329 | # $str =~ s/${RE_ESCAPE}^e/ê/g;␊ |
330 | # $str =~ s/${RE_ESCAPE}'e/é/g;␊ |
331 | # $str =~ s/${RE_ESCAPE}`e/è/g;␊ |
332 | # $str =~ s/${RE_ESCAPE}`u/ù/g;␊ |
333 | # $str =~ s/${RE_ESCAPE}"i/ï/g;␊ |
334 | # # Non breaking space. FIXME: should we change $\sim$ to ~␊ |
335 | # $str =~ s/~/\xA0/g; # FIXME: not in texinfo: @w{ }␊ |
336 | ␊ |
337 | print STDERR "$str\n" if ($debug{'pretrans'});␊ |
338 | return $str;␊ |
339 | }␊ |
340 | ␊ |
341 | =item B<post_trans>␊ |
342 | ␊ |
343 | =cut␊ |
344 | ␊ |
345 | sub post_trans {␊ |
346 | my ($self,$str,$ref,$type)=@_;␊ |
347 | my $transstr=$str;␊ |
348 | ␊ |
349 | print STDERR "post_trans($str)="␊ |
350 | if ($debug{'postrans'});␊ |
351 | ␊ |
352 | # Accentuated characters␊ |
353 | # $str =~ s/à/${ESCAPE}`a/g;␊ |
354 | ## $str =~ s/ç/$ESCAPEc{c}/g; # FIXME: not in texinfo␊ |
355 | # $str =~ s/ê/${ESCAPE}^e/g;␊ |
356 | # $str =~ s/é/${ESCAPE}'e/g;␊ |
357 | # $str =~ s/è/${ESCAPE}`e/g;␊ |
358 | # $str =~ s/ù/${ESCAPE}`u/g;␊ |
359 | # $str =~ s/ï/${ESCAPE}"i/g;␊ |
360 | # # Non breaking space. FIXME: should we change ~ to $\sim$␊ |
361 | # $str =~ s/\xA0/~/g; # FIXME: not in texinfo␊ |
362 | ␊ |
363 | print STDERR "$str\n" if ($debug{'postrans'});␊ |
364 | return $str;␊ |
365 | }␊ |
366 | ␊ |
367 | # Comments are extracted in the parse function.␊ |
368 | # They are stored in the @comments array, and then displayed as a PO␊ |
369 | # comment with the first translated string of the paragraph.␊ |
370 | my @comments = ();␊ |
371 | ␊ |
372 | =item B<translate>␊ |
373 | ␊ |
374 | Wrapper around Transtractor's translate, with pre- and post-processing␊ |
375 | filters.␊ |
376 | ␊ |
377 | Comments of a paragraph are inserted as a PO comment for the first␊ |
378 | translated string of this paragraph.␊ |
379 | ␊ |
380 | =cut␊ |
381 | ␊ |
382 | sub translate {␊ |
383 | my ($self,$str,$ref,$type) = @_;␊ |
384 | my (%options)=@_;␊ |
385 | my $origstr=$str;␊ |
386 | print STDERR "translate($str)="␊ |
387 | if ($debug{'translate'});␊ |
388 | ␊ |
389 | return $str unless (defined $str) && length($str);␊ |
390 | return $str if ($str eq "\n");␊ |
391 | ␊ |
392 | $str=pre_trans($self,$str,$ref||$self->{ref},$type);␊ |
393 | ␊ |
394 | # add comments (if any and not already added to the PO)␊ |
395 | if (@comments) {␊ |
396 | $options{'comment'} .= join('\n', @comments);␊ |
397 | ␊ |
398 | @comments = ();␊ |
399 | }␊ |
400 | ␊ |
401 | # FIXME: translate may append a newline, keep the trailing spaces so we can␊ |
402 | # recover them.␊ |
403 | my $spaces = "";␊ |
404 | if ($options{'wrap'} and $str =~ m/^(.*?)(\s+)$/s) {␊ |
405 | $str = $1;␊ |
406 | $spaces = $2;␊ |
407 | }␊ |
408 | ␊ |
409 | # Translate this␊ |
410 | $str = $self->SUPER::translate($str,␊ |
411 | $ref||$self->{ref},␊ |
412 | $type || $self->{type},␊ |
413 | %options);␊ |
414 | ␊ |
415 | # FIXME: translate may append a newline, see above␊ |
416 | if ($options{'wrap'}) {␊ |
417 | chomp $str;␊ |
418 | $str .= $spaces;␊ |
419 | }␊ |
420 | ␊ |
421 | $str=post_trans($self,$str,$ref||$self->{ref},$type);␊ |
422 | ␊ |
423 | print STDERR "'$str'\n" if ($debug{'translate'});␊ |
424 | return $str;␊ |
425 | }␊ |
426 | ␊ |
427 | ###########################␊ |
428 | ### COMMANDS SEPARATION ###␊ |
429 | ###########################␊ |
430 | ␊ |
431 | =item B<get_leading_command>($buffer)␊ |
432 | ␊ |
433 | This function returns:␊ |
434 | ␊ |
435 | =over 4␊ |
436 | ␊ |
437 | =item A command name␊ |
438 | ␊ |
439 | If no command is found at the beginning of the given buffer, this string␊ |
440 | will be empty. Only commands that can be separated are considered.␊ |
441 | The %separated_command hash contains the list of these commands.␊ |
442 | ␊ |
443 | =item A variant␊ |
444 | ␊ |
445 | This indicates if a variant is used. For example, an asterisk (*) can␊ |
446 | be added at the end of sections command to specify that they should␊ |
447 | not be numbered. In this case, this field will contain "*". If there␊ |
448 | is no variant, the field is an empty string.␊ |
449 | ␊ |
450 | =item An array of tuples (type of argument, argument)␊ |
451 | ␊ |
452 | The type of argument can be either '{' (for mandatory arguments) or '['␊ |
453 | (for optional arguments).␊ |
454 | ␊ |
455 | =item The remaining buffer␊ |
456 | ␊ |
457 | The rest of the buffer after the removal of this leading command and␊ |
458 | its arguments. If no command is found, the original buffer is not␊ |
459 | touched and returned in this field.␊ |
460 | ␊ |
461 | =back␊ |
462 | ␊ |
463 | =cut␊ |
464 | ␊ |
465 | sub get_leading_command {␊ |
466 | my ($self, $buffer) = (shift,shift);␊ |
467 | my $command = ""; # the command name␊ |
468 | my $variant = ""; # a varriant for the command (e.g. an asterisk)␊ |
469 | my @args; # array of arguments␊ |
470 | print STDERR "get_leading_command($buffer)="␊ |
471 | if ($debug{'extract_commands'});␊ |
472 | ␊ |
473 | if ($buffer =~ m/^$RE_ESCAPE([[:alnum:]]+)(\*?)(.*)$/s␊ |
474 | && defined $separated_command{$1}) {␊ |
475 | # The buffer begin by a comand (possibly preceded by some␊ |
476 | # whitespaces).␊ |
477 | $command = $1;␊ |
478 | $variant = $2;␊ |
479 | $buffer = $3;␊ |
480 | # read the arguments (if any)␊ |
481 | while ($buffer =~ m/^\s*$RE_PRE_COMMENT([\[\{])(.*)$/s) {␊ |
482 | my $type = $1;␊ |
483 | my $arg = "";␊ |
484 | my $count = 1;␊ |
485 | $buffer = $2;␊ |
486 | # stop reading the buffer when the number of ] (or }) matches the␊ |
487 | # the number of [ (or {).␊ |
488 | while ($count > 0) {␊ |
489 | if ($buffer =~ m/^(.*?$RE_PRE_COMMENT)([\[\]\{\}])(.*)$/s) {␊ |
490 | $arg .= $1;␊ |
491 | $buffer = $3;␊ |
492 | if ($2 eq $type) {␊ |
493 | $count++;␊ |
494 | } elsif ($2 eq $type_end{$type}) {␊ |
495 | $count--;␊ |
496 | }␊ |
497 | if ($count > 0) {␊ |
498 | $arg .= $2␊ |
499 | }␊ |
500 | } else {␊ |
501 | die wrap_ref_mod($self->{ref},␊ |
502 | "po4a::tex",␊ |
503 | dgettext("po4a", "un-balanced %s in '%s'"),␊ |
504 | $type,␊ |
505 | $buffer);␊ |
506 | }␊ |
507 | }␊ |
508 | push @args, ($type,$arg);␊ |
509 | }␊ |
510 | }␊ |
511 | if (defined $command and length $command) {␊ |
512 | # verify the number of arguments␊ |
513 | my($check,$reason,$remainder) = check_arg_count($self,$command,\@args);␊ |
514 | if (not $check) {␊ |
515 | die wrap_ref_mod($self->{ref}, "po4a::tex",␊ |
516 | dgettext("po4a",␊ |
517 | "Error while checking the number of ".␊ |
518 | "arguments of the '%s' command: %s")."\n",␊ |
519 | $command, $reason);␊ |
520 | }␊ |
521 | ␊ |
522 | if (@$remainder) {␊ |
523 | # FIXME: we should also keep the spaces to be idempotent␊ |
524 | my ($temp,$type,$arg);␊ |
525 | while (@$remainder) {␊ |
526 | $type = shift @$remainder;␊ |
527 | $arg = shift @$remainder;␊ |
528 | $temp .= $type.$arg.$type_end{$type};␊ |
529 | # And remove the same number of arguments from @args␊ |
530 | pop @args;␊ |
531 | pop @args;␊ |
532 | }␊ |
533 | $buffer = $temp.$buffer;␊ |
534 | }␊ |
535 | }␊ |
536 | ␊ |
537 | print STDERR "($command,$variant,@args,$buffer)\n"␊ |
538 | if ($debug{'extract_commands'});␊ |
539 | return ($command,$variant,\@args,$buffer);␊ |
540 | }␊ |
541 | ␊ |
542 | =item B<get_trailing_command>($buffer)␊ |
543 | ␊ |
544 | The same as B<get_leading_command>, but for commands at the end of a buffer.␊ |
545 | ␊ |
546 | =cut␊ |
547 | ␊ |
548 | sub get_trailing_command {␊ |
549 | my ($self, $buffer) = (shift,shift);␊ |
550 | my $orig_buffer = $buffer;␊ |
551 | print STDERR "get_trailing_command($buffer)="␊ |
552 | if ($debug{'extract_commands'});␊ |
553 | ␊ |
554 | my @args;␊ |
555 | my $command = "";␊ |
556 | my $variant = "";␊ |
557 | ␊ |
558 | # While the buffer ends by }, consider it is a mandatory argument␊ |
559 | # and extract this argument.␊ |
560 | while ( $buffer =~ m/^(.*$RE_PRE_COMMENT(\{).*)$RE_PRE_COMMENT\}$/s␊ |
561 | or $buffer =~ m/^(.*$RE_PRE_COMMENT(\[).*)$RE_PRE_COMMENT\]$/s) {␊ |
562 | my $arg = "";␊ |
563 | my $count = 1;␊ |
564 | $buffer = $1;␊ |
565 | my $type = $2;␊ |
566 | # stop reading the buffer when the number of } (or ]) matches the␊ |
567 | # the number of { (or [).␊ |
568 | while ($count > 0) {␊ |
569 | if ($buffer =~ m/^(.*$RE_PRE_COMMENT)([\{\}\[\]])(.*)$/s) {␊ |
570 | $arg = $3.$arg;␊ |
571 | $buffer = $1;␊ |
572 | if ($2 eq $type) {␊ |
573 | $count--;␊ |
574 | } elsif ($2 eq $type_end{$type}) {␊ |
575 | $count++;␊ |
576 | }␊ |
577 | if ($count > 0) {␊ |
578 | $arg = $2.$arg;␊ |
579 | }␊ |
580 | } else {␊ |
581 | die wrap_ref_mod($self->{ref},␊ |
582 | "po4a::tex",␊ |
583 | dgettext("po4a", "un-balanced %s in '%s'"),␊ |
584 | $type_end{$type},␊ |
585 | $buffer);␊ |
586 | }␊ |
587 | }␊ |
588 | unshift @args, ($type,$arg);␊ |
589 | }␊ |
590 | ␊ |
591 | # There should now be a command, maybe followed by an asterisk.␊ |
592 | if ($buffer =~ m/^(.*$RE_PRE_COMMENT)$RE_ESCAPE([[:alnum:]]+)(\*?)\s*$/s␊ |
593 | && defined $separated_command{$2}) {␊ |
594 | $buffer = $1;␊ |
595 | $command = $2;␊ |
596 | $variant = $3;␊ |
597 | my($check,$reason,$remainder) = check_arg_count($self,$command,\@args);␊ |
598 | if (not $check) {␊ |
599 | die wrap_ref_mod($self->{ref}, "po4a::tex",␊ |
600 | dgettext("po4a",␊ |
601 | "Error while checking the number of ".␊ |
602 | "arguments of the '%s' command: %s")."\n",␊ |
603 | $command, $reason);␊ |
604 | }␊ |
605 | if (@$remainder) {␊ |
606 | # There are some arguments after the command.␊ |
607 | # We can't extract this comand.␊ |
608 | $command = "";␊ |
609 | }␊ |
610 | }␊ |
611 | ␊ |
612 | # sanitize return values if no command was found.␊ |
613 | if (!length($command)) {␊ |
614 | $command = "";␊ |
615 | $variant = "";␊ |
616 | undef @args;␊ |
617 | $buffer = $orig_buffer;␊ |
618 | }␊ |
619 | # verify the number of arguments␊ |
620 | ␊ |
621 | print STDERR "($command,$variant,@args,$buffer)\n"␊ |
622 | if ($debug{'extract_commands'});␊ |
623 | return ($command,$variant,\@args,$buffer);␊ |
624 | }␊ |
625 | ␊ |
626 | =item B<translate_buffer>␊ |
627 | ␊ |
628 | Recursively translate a buffer by separating leading and trailing␊ |
629 | commands (those which should be translated separately) from the␊ |
630 | buffer.␊ |
631 | ␊ |
632 | If a function is defined in %translate_buffer_env for the current␊ |
633 | environment, this function will be used to translate the buffer instead of␊ |
634 | translate_buffer().␊ |
635 | ␊ |
636 | =cut␊ |
637 | ␊ |
638 | our %translate_buffer_env = ();␊ |
639 | sub translate_buffer {␊ |
640 | my ($self,$buffer,$no_wrap,@env) = (shift,shift,shift,@_);␊ |
641 | ␊ |
642 | if (@env and defined $translate_buffer_env{$env[-1]}) {␊ |
643 | return &{$translate_buffer_env{$env[-1]}}($self,$buffer,$no_wrap,@env);␊ |
644 | }␊ |
645 | ␊ |
646 | print STDERR "translate_buffer($buffer,$no_wrap,@env)="␊ |
647 | if ($debug{'translate_buffer'});␊ |
648 | ␊ |
649 | my ($command,$variant) = ("","");␊ |
650 | my $args;␊ |
651 | my $translated_buffer = "";␊ |
652 | my $orig_buffer = $buffer;␊ |
653 | my $t = ""; # a temporary string␊ |
654 | ␊ |
655 | if ($buffer =~ /^\s*$/s) {␊ |
656 | print STDERR "($buffer,@env)\n"␊ |
657 | if ($debug{'translate_buffer'});␊ |
658 | return ($buffer, @env);␊ |
659 | }␊ |
660 | # verbatim blocks.␊ |
661 | # Buffers starting by \end{verbatim} are handled after.␊ |
662 | if (in_verbatim(@env) and $buffer !~ m/^\n?\Q$ESCAPE\Eend\{$env[-1]\*?\}/) {␊ |
663 | if($buffer =~ m/^(.*?)(\n?\Q$ESCAPE\Eend\{$env[-1]\*?\}.*)$/s) {␊ |
664 | # end of a verbatim block␊ |
665 | my ($begin, $end) = ($1?$1:"", $2);␊ |
666 | my ($t1, $t2) = ("", "");␊ |
667 | if (defined $begin) {␊ |
668 | $t1 = $self->translate($begin,$self->{ref},␊ |
669 | $env[-1],␊ |
670 | "wrap" => 0);␊ |
671 | }␊ |
672 | ($t2, @env) = translate_buffer($self, $end, $no_wrap, @env);␊ |
673 | print STDERR "($t1$t2,@env)\n"␊ |
674 | if ($debug{'translate_buffer'});␊ |
675 | return ($t1.$t2, @env);␊ |
676 | } else {␊ |
677 | $translated_buffer = $self->translate($buffer,$self->{ref},␊ |
678 | $env[-1],␊ |
679 | "wrap" => 0);␊ |
680 | print STDERR "($translated_buffer,@env)\n"␊ |
681 | if ($debug{'translate_buffer'});␊ |
682 | return ($translated_buffer, @env);␊ |
683 | }␊ |
684 | }␊ |
685 | # early detection of verbatim environment␊ |
686 | if ($buffer =~ /^($RE_VERBATIM\n?)(.*)$/s and length $2) {␊ |
687 | my ($begin, $end) = ($1, $2);␊ |
688 | my ($t1, $t2) = ("", "");␊ |
689 | ($t1, @env) = translate_buffer($self, $begin, $no_wrap, @env);␊ |
690 | ($t2, @env) = translate_buffer($self, $end, $no_wrap, @env);␊ |
691 | ␊ |
692 | print STDERR "($t1$t2,@env)\n"␊ |
693 | if ($debug{'translate_buffer'});␊ |
694 | return ($t1.$t2, @env);␊ |
695 | }␊ |
696 | # detect \begin and \end (if they are not commented)␊ |
697 | if ($buffer =~ /^((?:.*?\n)? # $1 is␊ |
698 | (?:[^%] # either not a %␊ |
699 | | # or␊ |
700 | (?<!\\)(?:\\\\)*\\%)*? # a % preceded by an odd nb of \␊ |
701 | ) # $2 is a \begin{ with the end of the line␊ |
702 | (${RE_ESCAPE}(?:begin|end)\{.*)$/sx␊ |
703 | and length $1) {␊ |
704 | my ($begin, $end) = ($1, $2);␊ |
705 | my ($t1, $t2) = ("", "");␊ |
706 | if (is_closed($begin)) {␊ |
707 | ($t1, @env) = translate_buffer($self, $begin, $no_wrap, @env);␊ |
708 | ($t2, @env) = translate_buffer($self, $end, $no_wrap, @env);␊ |
709 | ␊ |
710 | print STDERR "($t1$t2,@env)\n"␊ |
711 | if ($debug{'translate_buffer'});␊ |
712 | return ($t1.$t2, @env);␊ |
713 | }␊ |
714 | }␊ |
715 | ␊ |
716 | # remove comments from the buffer.␊ |
717 | # Comments are stored in an array and shown as comments in the PO.␊ |
718 | while ($buffer =~ m/($RE_PRE_COMMENT)$RE_COMMENT([^\n]*)(\n[ \t]*)(.*)$/s) {␊ |
719 | my $comment = $2;␊ |
720 | my $end = "";␊ |
721 | if ($4 =~ m/^\n/s and $buffer !~ m/^$RE_COMMENT/s) {␊ |
722 | # a line with comments, followed by an empty line.␊ |
723 | # Keep the empty line, but remove the comment.␊ |
724 | # This is an empirical heuristic, but seems to work;)␊ |
725 | $end = "\n";␊ |
726 | }␊ |
727 | if (defined $comment and $comment !~ /^\s*$/s) {␊ |
728 | push @comments, $comment;␊ |
729 | }␊ |
730 | $buffer =~ s/($RE_PRE_COMMENT)$RE_COMMENT([^\n]*)(\n[ \t]*)/$1$end/s;␊ |
731 | }␊ |
732 | ␊ |
733 | ␊ |
734 | # translate leading commands.␊ |
735 | do {␊ |
736 | # keep the leading space to put them back after the translation of␊ |
737 | # the command.␊ |
738 | my $spaces = "";␊ |
739 | if ($buffer =~ /^(\s+)(.*?)$/s) {␊ |
740 | $spaces = $1;␊ |
741 | # $buffer = $2; # FIXME: this also remove trailing spaces!!␊ |
742 | $buffer =~ s/^\s*//s;␊ |
743 | }␊ |
744 | my $buffer_save = $buffer;␊ |
745 | ($command, $variant, $args, $buffer) =␊ |
746 | get_leading_command($self,$buffer);␊ |
747 | if ( (length $command)␊ |
748 | and (defined $separated_command{$command})␊ |
749 | and ($separated_command{$command} eq '-')␊ |
750 | and ( (not (defined($buffer)))␊ |
751 | or ($buffer !~ m/^\s*$/s) )) {␊ |
752 | # This command can be separated only if alone on a buffer.␊ |
753 | # We need to remove the trailing commands first, and see if it␊ |
754 | # will be alone on this buffer.␊ |
755 | $buffer = $buffer_save;␊ |
756 | $command = "";␊ |
757 | }␊ |
758 | if (length($command)) {␊ |
759 | # call the command subroutine.␊ |
760 | # These command subroutines will probably call translate_buffer␊ |
761 | # with the content of each argument that need a translation.␊ |
762 | if (defined ($commands{$command})) {␊ |
763 | ($t,@env) = &{$commands{$command}}($self,$command,$variant,␊ |
764 | $args,\@env,$no_wrap);␊ |
765 | $translated_buffer .= $spaces.$t;␊ |
766 | # Handle spaces after a command.␊ |
767 | $spaces = "";␊ |
768 | if ($buffer =~ /^(\s+)(.*?)$/s) {␊ |
769 | $spaces = $1;␊ |
770 | # $buffer = $2; # FIXME: this also remove trailing spaces!!␊ |
771 | $buffer =~ s/^\s*//s;␊ |
772 | }␊ |
773 | $translated_buffer .= $spaces;␊ |
774 | } else {␊ |
775 | die wrap_ref_mod($self->{ref},␊ |
776 | "po4a::tex",␊ |
777 | dgettext("po4a", "Unknown command: '%s'"),␊ |
778 | $command);␊ |
779 | }␊ |
780 | } else {␊ |
781 | $buffer = $spaces.$buffer;␊ |
782 | }␊ |
783 | } while (length($command));␊ |
784 | ␊ |
785 | # array of trailing commands, which will be translated later.␊ |
786 | my @trailing_commands = ();␊ |
787 | do {␊ |
788 | my $spaces = "";␊ |
789 | if ($buffer =~ /^(.*?)(\s+)$/s) {␊ |
790 | $buffer = $1;␊ |
791 | $spaces = $2;␊ |
792 | }␊ |
793 | my $buffer_save = $buffer;␊ |
794 | ($command, $variant, $args, $buffer) =␊ |
795 | get_trailing_command($self,$buffer);␊ |
796 | if ( (length $command)␊ |
797 | and (defined $separated_command{$command})␊ |
798 | and ($separated_command{$command} eq '-')␊ |
799 | and ( (not defined $buffer)␊ |
800 | or ($buffer !~ m/^\s*$/s))) {␊ |
801 | # We can extract this command.␊ |
802 | $command = "";␊ |
803 | $buffer = $buffer_save;␊ |
804 | }␊ |
805 | if (length($command)) {␊ |
806 | unshift @trailing_commands, ($command, $variant, $args, $spaces);␊ |
807 | } else {␊ |
808 | $buffer .= $spaces;␊ |
809 | }␊ |
810 | } while (length($command));␊ |
811 | ␊ |
812 | # Now, $buffer is just a block that can be translated.␊ |
813 | ␊ |
814 | # environment specific treatment␊ |
815 | if (@env and defined $env_separators{$env[-1]}) {␊ |
816 | my $re_separator = $env_separators{$env[-1]};␊ |
817 | my $buf_begin = "";␊ |
818 | # FIXME: the separator may have to be translated.␊ |
819 | while ($buffer =~ m/^(.*?)(\s*$re_separator\s*)(.*)$/s) {␊ |
820 | my ($begin, $sep, $end) = ($1, $2, $3);␊ |
821 | $buf_begin .= $begin;␊ |
822 | if (is_closed($buf_begin)) {␊ |
823 | my $t = "";␊ |
824 | ($t, @env) = translate_buffer($self,$buf_begin,$no_wrap,@env);␊ |
825 | $translated_buffer .= $t.$sep;␊ |
826 | $buf_begin = "";␊ |
827 | } else {␊ |
828 | # the command is in a command argument␊ |
829 | $buf_begin .= $sep;␊ |
830 | }␊ |
831 | $buffer = $end;␊ |
832 | }␊ |
833 | $buffer = $buf_begin . $buffer;␊ |
834 | }␊ |
835 | ␊ |
836 | # finally, translate␊ |
837 | if (length($buffer)) {␊ |
838 | my $wrap = 1;␊ |
839 | my ($e1, $e2);␊ |
840 | NO_WRAP_LOOP: foreach $e1 (@env) {␊ |
841 | foreach $e2 (split(' ', $no_wrap_environments)) {␊ |
842 | if ($e1 eq $e2) {␊ |
843 | $wrap = 0;␊ |
844 | last NO_WRAP_LOOP;␊ |
845 | }␊ |
846 | }␊ |
847 | }␊ |
848 | $wrap = 0 if (defined $no_wrap and $no_wrap == 1);␊ |
849 | # Keep spaces at the end of the buffer.␊ |
850 | my $spaces_end = "";␊ |
851 | if ($buffer =~ /^(.*?)(\s+)$/s) {␊ |
852 | $spaces_end = $2;␊ |
853 | $buffer = $1;␊ |
854 | }␊ |
855 | if ($wrap and $buffer =~ s/^(\s+)//s) {␊ |
856 | $translated_buffer .= $1;␊ |
857 | }␊ |
858 | $translated_buffer .= $self->translate($buffer,$self->{ref},␊ |
859 | @env?$env[-1]:"Plain text",␊ |
860 | "wrap" => $wrap);␊ |
861 | # Restore spaces at the end of the buffer.␊ |
862 | $translated_buffer .= $spaces_end;␊ |
863 | }␊ |
864 | ␊ |
865 | # append the translation of the trailing commands␊ |
866 | while (@trailing_commands) {␊ |
867 | my $command = shift @trailing_commands;␊ |
868 | my $variant = shift @trailing_commands;␊ |
869 | my $args = shift @trailing_commands;␊ |
870 | my $spaces = shift @trailing_commands;␊ |
871 | if (defined ($commands{$command})) {␊ |
872 | ($t,@env) = &{$commands{$command}}($self,$command,$variant,␊ |
873 | $args,\@env,$no_wrap);␊ |
874 | $translated_buffer .= $t.$spaces;␊ |
875 | } else {␊ |
876 | die wrap_ref_mod($self->{ref},␊ |
877 | "po4a::tex",␊ |
878 | dgettext("po4a", "Unknown command: '%s'"),␊ |
879 | $command);␊ |
880 | }␊ |
881 | }␊ |
882 | ␊ |
883 | print STDERR "($translated_buffer,@env)\n"␊ |
884 | if ($debug{'translate_buffer'});␊ |
885 | return ($translated_buffer,@env);␊ |
886 | }␊ |
887 | ␊ |
888 | ################################␊ |
889 | #### EXTERNAL CUSTOMIZATION ####␊ |
890 | ################################␊ |
891 | ␊ |
892 | =item B<read>␊ |
893 | ␊ |
894 | Overload Transtractor's read␊ |
895 | ␊ |
896 | =cut␊ |
897 | ␊ |
898 | sub read {␊ |
899 | my $self=shift;␊ |
900 | my $filename=shift;␊ |
901 | ␊ |
902 | # keep the directory name of the main file.␊ |
903 | $my_dirname = dirname($filename);␊ |
904 | ␊ |
905 | push @{$self->{TT}{doc_in}}, read_file($self, $filename);␊ |
906 | }␊ |
907 | ␊ |
908 | =item B<read_file>␊ |
909 | ␊ |
910 | Recursively read a file, appending included files which are not listed in the␊ |
911 | @exclude_include array. Included files are searched using the B<kpsewhich>␊ |
912 | command from the Kpathsea library.␊ |
913 | ␊ |
914 | Except from the file inclusion part, it is a cut and paste from␊ |
915 | Transtractor's read.␊ |
916 | ␊ |
917 | =cut␊ |
918 | ␊ |
919 | # TODO: fix DOS end of lines␊ |
920 | sub read_file {␊ |
921 | my $self=shift;␊ |
922 | my $filename=shift␊ |
923 | or croak wrap_mod("po4a::tex",␊ |
924 | dgettext("po4a", "Can't read from file without having a filename"));␊ |
925 | my $linenum=0;␊ |
926 | my @entries=();␊ |
927 | ␊ |
928 | open (my $in, $filename)␊ |
929 | or croak wrap_mod("po4a::tex",␊ |
930 | dgettext("po4a", "Can't read from %s: %s"), $filename, $!);␊ |
931 | while (defined (my $textline = <$in>)) {␊ |
932 | $linenum++;␊ |
933 | my $ref="$filename:$linenum";␊ |
934 | # TODO: add support for includeonly␊ |
935 | # The next regular expression matches \input or \includes that are␊ |
936 | # not commented (but can be preceded by a \%.␊ |
937 | while ($textline =~ /^((?:[^%]|(?<!\\)(?:\\\\)*\\%)*)␊ |
938 | \\(include|input)␊ |
939 | \{([^\{]*)\}(.*)$/x) {␊ |
940 | my ($begin,$newfilename,$end) = ($1,$3,$4);␊ |
941 | my $tag = $2;␊ |
942 | my $include = 1;␊ |
943 | foreach my $f (@exclude_include) {␊ |
944 | if ($f eq $newfilename) {␊ |
945 | $include = 0;␊ |
946 | $begin .= "\\$tag"."{$newfilename}";␊ |
947 | $textline = $end;␊ |
948 | last;␊ |
949 | }␊ |
950 | }␊ |
951 | if ($include and ($tag eq "include")) {␊ |
952 | $begin .= "\\clearpage";␊ |
953 | }␊ |
954 | if ($begin !~ /^\s*$/) {␊ |
955 | push @entries, ($begin,$ref);␊ |
956 | }␊ |
957 | if ($include) {␊ |
958 | # search the file␊ |
959 | open (KPSEA, "kpsewhich " . $newfilename . " |");␊ |
960 | my $newfilepath = <KPSEA>;␊ |
961 | ␊ |
962 | if ($newfilename ne "" and $newfilepath eq "") {␊ |
963 | die wrap_mod("po4a::tex",␊ |
964 | dgettext("po4a",␊ |
965 | "Can't find %s with kpsewhich"),␊ |
966 | $filename);␊ |
967 | }␊ |
968 | ␊ |
969 | push @entries, read_file($self,␊ |
970 | $newfilepath);␊ |
971 | if ($tag eq "include") {␊ |
972 | $textline = "\\clearpage".$end;␊ |
973 | } else {␊ |
974 | $textline = $end;␊ |
975 | }␊ |
976 | }␊ |
977 | }␊ |
978 | if (length($textline)) {␊ |
979 | my @entry=($textline,$ref);␊ |
980 | push @entries, @entry;␊ |
981 | ␊ |
982 | # Detect if this file has non-ascii characters␊ |
983 | if($self->{TT}{ascii_input}) {␊ |
984 | ␊ |
985 | my $decoder = guess_encoding($textline);␊ |
986 | if (!ref($decoder) or $decoder !~ /Encode::XS=/) {␊ |
987 | # We have detected a non-ascii line␊ |
988 | $self->{TT}{ascii_input} = 0;␊ |
989 | # Save the reference for future error message␊ |
990 | $self->{TT}{non_ascii_ref} ||= $ref;␊ |
991 | }␊ |
992 | }␊ |
993 | }␊ |
994 | }␊ |
995 | close $in␊ |
996 | or croak wrap_mod("po4a::tex",␊ |
997 | dgettext("po4a", "Can't close %s after reading: %s"), $filename, $!);␊ |
998 | ␊ |
999 | return @entries;␊ |
1000 | }␊ |
1001 | ␊ |
1002 | =back␊ |
1003 | ␊ |
1004 | ␊ |
1005 | =over 4␊ |
1006 | ␊ |
1007 | =item B<parse_definition_file>␊ |
1008 | ␊ |
1009 | Subroutine for parsing a file with po4a directives (definitions for␊ |
1010 | new commands).␊ |
1011 | ␊ |
1012 | =cut␊ |
1013 | ␊ |
1014 | sub parse_definition_file {␊ |
1015 | my ($self,$filename,$only_try)=@_;␊ |
1016 | my $filename_org = $filename;␊ |
1017 | ␊ |
1018 | open (KPSEA, "kpsewhich " . $filename . " |");␊ |
1019 | $filename = <KPSEA>;␊ |
1020 | ␊ |
1021 | if (not defined $filename) {␊ |
1022 | warn wrap_mod("po4a::tex",␊ |
1023 | dgettext("po4a", "kpsewhich cannot find %s"), $filename_org);␊ |
1024 | if (defined $only_try && $only_try) {␊ |
1025 | return;␊ |
1026 | } else {␊ |
1027 | exit 1;␊ |
1028 | }␊ |
1029 | }␊ |
1030 | ␊ |
1031 | if (! open (IN,"<$filename")) {␊ |
1032 | warn wrap_mod("po4a::tex",␊ |
1033 | dgettext("po4a", "Can't open %s: %s"), $filename, $!);␊ |
1034 | if (defined $only_try && $only_try) {␊ |
1035 | return;␊ |
1036 | } else {␊ |
1037 | exit 1;␊ |
1038 | }␊ |
1039 | }␊ |
1040 | while (<IN>) {␊ |
1041 | if (/^\s*%\s*po4a\s*:/) {␊ |
1042 | parse_definition_line($self, $_);␊ |
1043 | }␊ |
1044 | }␊ |
1045 | }␊ |
1046 | ␊ |
1047 | =item B<parse_definition_line>␊ |
1048 | ␊ |
1049 | Parse a definition line of the form "% po4a: ".␊ |
1050 | ␊ |
1051 | See the B<INLINE CUSTOMIZATION> section for more details.␊ |
1052 | ␊ |
1053 | =cut␊ |
1054 | ␊ |
1055 | sub parse_definition_line {␊ |
1056 | my ($self,$line)=@_;␊ |
1057 | $line =~ s/^\s*%\s*po4a\s*:\s*//;␊ |
1058 | ␊ |
1059 | if ($line =~ /^command\s+([-*+]?)(\w+)\s+(.*)$/) {␊ |
1060 | my $command = $2;␊ |
1061 | $line = $3;␊ |
1062 | if ($1) {␊ |
1063 | $separated_command{$command} = $1;␊ |
1064 | }␊ |
1065 | if ($line =~ /^alias\s+(\w+)\s*$/) {␊ |
1066 | if (defined ($commands{$1})) {␊ |
1067 | $commands{$command} = $commands{$1};␊ |
1068 | $command_parameters{$command} = $command_parameters{$1};␊ |
1069 | } else {␊ |
1070 | die wrap_mod("po4a::tex",␊ |
1071 | dgettext("po4a", "Cannot use an alias to the unknown command '%s'"),␊ |
1072 | $2);␊ |
1073 | }␊ |
1074 | } elsif ($line =~ /^(-1|\d+),(-1|\d+),(-1|[ 0-9]*),(-1|[ 0-9]*?)\s*$/) {␊ |
1075 | die wrap_ref_mod($self->{ref},␊ |
1076 | "po4a::tex",␊ |
1077 | dgettext("po4a", "You are using the old ".␊ |
1078 | "definitions format (%s). ".␊ |
1079 | "Please update this definition line."),␊ |
1080 | $_[1])␊ |
1081 | } elsif ($line =~ m/^((?:\{_?\}|\[_?\])*)\s*$/) {␊ |
1082 | register_generic_command("$command,$1");␊ |
1083 | }␊ |
1084 | } elsif ($line =~ /^environment\s+([+]?\w+\*?)(.*)$/) {␊ |
1085 | my $env = $1;␊ |
1086 | $line = $2;␊ |
1087 | if ($line =~ m/^\s*((?:\{_?\}|\[_?\])*)\s*$/) {␊ |
1088 | register_generic_environment("$env,$1");␊ |
1089 | }␊ |
1090 | } elsif ($line =~ /^separator\s+(\w+(?:\[#[0-9]+\])?)\s+\"(.*)\"\s*$/) {␊ |
1091 | my $env = $1; # This is not necessarily an environment.␊ |
1092 | # It can also be smth like 'title[#1]'.␊ |
1093 | $env_separators{$env} = $2;␊ |
1094 | } elsif ($line =~ /^verbatim\s+environment\s+(\w+)\s+$/) {␊ |
1095 | register_verbatim_environment($1);␊ |
1096 | }␊ |
1097 | }␊ |
1098 | ␊ |
1099 | =item B<is_closed>␊ |
1100 | ␊ |
1101 | =cut␊ |
1102 | ␊ |
1103 | sub is_closed {␊ |
1104 | my $paragraph = shift;␊ |
1105 | # FIXME: [ and ] are more difficult to handle, because it is not easy to detect if it introduce an optional argument␊ |
1106 | my $tmp = $paragraph;␊ |
1107 | my $closing = 0;␊ |
1108 | my $opening = 0;␊ |
1109 | # FIXME: { and } should not be counted in verbatim blocks␊ |
1110 | # Remove comments␊ |
1111 | $tmp =~ s/$RE_PRE_COMMENT$RE_COMMENT.*//mg;␊ |
1112 | while ($tmp =~ /^.*?$RE_PRE_COMMENT\{(.*)$/s) {␊ |
1113 | $opening += 1;␊ |
1114 | $tmp = $1;␊ |
1115 | }␊ |
1116 | $tmp = $paragraph;␊ |
1117 | # Remove comments␊ |
1118 | $tmp =~ s/$RE_PRE_COMMENT$RE_COMMENT.*//mg;␊ |
1119 | while ($tmp =~ /^.*?$RE_PRE_COMMENT\}(.*)$/s) {␊ |
1120 | $closing += 1;␊ |
1121 | $tmp = $1;␊ |
1122 | }␊ |
1123 | return $opening eq $closing;␊ |
1124 | }␊ |
1125 | ␊ |
1126 | sub in_verbatim {␊ |
1127 | foreach my $e1 (@_) {␊ |
1128 | foreach my $e2 (split(' ', $verbatim_environments)) {␊ |
1129 | if ($e1 eq $e2) {␊ |
1130 | return 1;␊ |
1131 | }␊ |
1132 | }␊ |
1133 | }␊ |
1134 | ␊ |
1135 | return 0;␊ |
1136 | }␊ |
1137 | ␊ |
1138 | #############################␊ |
1139 | #### MAIN PARSE FUNCTION ####␊ |
1140 | #############################␊ |
1141 | =item B<parse>␊ |
1142 | ␊ |
1143 | =cut␊ |
1144 | ␊ |
1145 | sub parse {␊ |
1146 | my $self = shift;␊ |
1147 | my ($line,$ref);␊ |
1148 | my $paragraph = ""; # Buffer where we put the paragraph while building␊ |
1149 | my @env = (); # environment stack␊ |
1150 | my $t = "";␊ |
1151 | ␊ |
1152 | LINE:␊ |
1153 | undef $self->{type};␊ |
1154 | ($line,$ref)=$self->shiftline();␊ |
1155 | ␊ |
1156 | while (defined($line)) {␊ |
1157 | chomp($line);␊ |
1158 | $self->{ref}="$ref";␊ |
1159 | ␊ |
1160 | if ($line =~ /^\s*%\s*po4a\s*:/) {␊ |
1161 | parse_definition_line($self, $line);␊ |
1162 | goto LINE;␊ |
1163 | }␊ |
1164 | ␊ |
1165 | my $closed = is_closed($paragraph);␊ |
1166 | ␊ |
1167 | #FIXME: what happens if a \begin{verbatim} or \end{verbatim} is in the␊ |
1168 | # middle of a line. (This is only an issue if the verbatim␊ |
1169 | # environment contains an un-closed bracket)␊ |
1170 | if ( ($closed and ($line =~ /^\s*$/ or␊ |
1171 | $line =~ /^\s*$RE_VERBATIM\s*$/))␊ |
1172 | or (in_verbatim(@env) and $line =~ /^\s*\Q$ESCAPE\Eend{$env[-1]}\s*$/)␊ |
1173 | ) {␊ |
1174 | # An empty line. This indicates the end of the current␊ |
1175 | # paragraph.␊ |
1176 | $paragraph .= $line."\n";␊ |
1177 | if (length($paragraph)) {␊ |
1178 | ($t, @env) = translate_buffer($self,$paragraph,undef,@env);␊ |
1179 | $self->pushline($t);␊ |
1180 | $paragraph="";␊ |
1181 | @comments = ();␊ |
1182 | }␊ |
1183 | } else {␊ |
1184 | # continue the same paragraph␊ |
1185 | $paragraph .= $line."\n";␊ |
1186 | }␊ |
1187 | ␊ |
1188 | # Reinit the loop␊ |
1189 | ($line,$ref)=$self->shiftline();␊ |
1190 | undef $self->{type};␊ |
1191 | }␊ |
1192 | ␊ |
1193 | if (length($paragraph)) {␊ |
1194 | ($t, @env) = translate_buffer($self,$paragraph,undef,@env);␊ |
1195 | $self->pushline($t);␊ |
1196 | $paragraph="";␊ |
1197 | }␊ |
1198 | } # end of parse␊ |
1199 | ␊ |
1200 | =item B<docheader>␊ |
1201 | ␊ |
1202 | =back␊ |
1203 | ␊ |
1204 | =cut␊ |
1205 | ␊ |
1206 | sub docheader {␊ |
1207 | return "% This file was generated with po4a. Translate the source file.\n".␊ |
1208 | "%\n";␊ |
1209 | }␊ |
1210 | ␊ |
1211 | ␊ |
1212 | ####################################␊ |
1213 | #### DEFINITION OF THE COMMANDS ####␊ |
1214 | ####################################␊ |
1215 | ␊ |
1216 | =head1 INTERNAL FUNCTIONS used to write derivated parsers␊ |
1217 | ␊ |
1218 | Command and environment functions take the following arguments (in␊ |
1219 | addition to the $self object):␊ |
1220 | ␊ |
1221 | =over␊ |
1222 | ␊ |
1223 | =item A command name␊ |
1224 | ␊ |
1225 | =item A variant␊ |
1226 | ␊ |
1227 | =item An array of (type, argument) tuples␊ |
1228 | ␊ |
1229 | =item The current environment␊ |
1230 | ␊ |
1231 | =back␊ |
1232 | ␊ |
1233 | The first 3 arguments are extracted by get_leading_command or␊ |
1234 | get_trailing_command.␊ |
1235 | ␊ |
1236 | Command and environment functions return the translation of the command␊ |
1237 | with its arguments and a new environment.␊ |
1238 | ␊ |
1239 | Environment functions are called when a \begin command is found. They are␊ |
1240 | called with the \begin command and its arguments.␊ |
1241 | ␊ |
1242 | The TeX module only proposes one command function and one environment␊ |
1243 | function: generic_command and generic_environment.␊ |
1244 | ␊ |
1245 | generic_command uses the information specified by␊ |
1246 | register_generic_command or by adding definition to the TeX file:␊ |
1247 | % po4a: command I<command1> I<parameters>␊ |
1248 | ␊ |
1249 | generic_environment uses the information specified by␊ |
1250 | register_generic_environment or by adding definition to the TeX file:␊ |
1251 | % po4a: environment I<env> I<parameters>␊ |
1252 | ␊ |
1253 | Both functions will only translate the parameters that were specified as␊ |
1254 | translatable (with a '_').␊ |
1255 | generic_environment will append the name of the environment to the␊ |
1256 | environment stack and generic_command will append the name of the command␊ |
1257 | followed by an identifier of the parameter (like {#7} or [#2]).␊ |
1258 | ␊ |
1259 | =cut␊ |
1260 | ␊ |
1261 | # definition of environment related commands␊ |
1262 | ␊ |
1263 | $commands{'begin'}= sub {␊ |
1264 | my $self = shift;␊ |
1265 | my ($command,$variant,$args,$env) = (shift,shift,shift,shift);␊ |
1266 | my $no_wrap = shift;␊ |
1267 | print "begin($command,$variant,@$args,@$env,$no_wrap)="␊ |
1268 | if ($debug{'commands'} || $debug{'environments'});␊ |
1269 | my ($t,@e) = ("",());␊ |
1270 | ␊ |
1271 | my $envir = $args->[1];␊ |
1272 | if (defined($envir) and $envir =~ /^(.*)\*$/) {␊ |
1273 | $envir = $1;␊ |
1274 | }␊ |
1275 | ␊ |
1276 | if (defined($envir) && defined($environments{$envir})) {␊ |
1277 | ($t, @e) = &{$environments{$envir}}($self,$command,$variant,␊ |
1278 | $args,$env,$no_wrap);␊ |
1279 | } else {␊ |
1280 | die wrap_ref_mod($self->{ref}, "po4a::tex",␊ |
1281 | dgettext("po4a", "unknown environment: '%s'"),␊ |
1282 | $args->[1]);␊ |
1283 | }␊ |
1284 | ␊ |
1285 | print "($t, @e)\n"␊ |
1286 | if ($debug{'commands'} || $debug{'environments'});␊ |
1287 | return ($t, @e);␊ |
1288 | };␊ |
1289 | # Use register_generic to set the type of arguments. The function is then␊ |
1290 | # overwritten:␊ |
1291 | register_generic_command("*end,{}");␊ |
1292 | $commands{'end'}= sub {␊ |
1293 | my $self = shift;␊ |
1294 | my ($command,$variant,$args,$env) = (shift,shift,shift,shift);␊ |
1295 | my $no_wrap = shift;␊ |
1296 | print "end($command,$variant,@$args,@$env,$no_wrap)="␊ |
1297 | if ($debug{'commands'} || $debug{'environments'});␊ |
1298 | ␊ |
1299 | # verify that this environment was the last pushed environment.␊ |
1300 | if (!@$env || @$env[-1] ne $args->[1]) {␊ |
1301 | # a begin may have been hidden in the middle of a translated␊ |
1302 | # buffer. FIXME: Just warn for now.␊ |
1303 | warn wrap_ref_mod($self->{'ref'}, "po4a::tex",␊ |
1304 | dgettext("po4a", "unmatched end of environment '%s'"),␊ |
1305 | $args->[1]);␊ |
1306 | } else {␊ |
1307 | pop @$env;␊ |
1308 | }␊ |
1309 | ␊ |
1310 | my ($t,@e) = generic_command($self,$command,$variant,$args,$env,$no_wrap);␊ |
1311 | ␊ |
1312 | print "($t, @$env)\n"␊ |
1313 | if ($debug{'commands'} || $debug{'environments'});␊ |
1314 | return ($t, @$env);␊ |
1315 | };␊ |
1316 | $separated_command{'begin'} = '*';␊ |
1317 | ␊ |
1318 | sub generic_command {␊ |
1319 | my $self = shift;␊ |
1320 | my ($command,$variant,$args,$env) = (shift,shift,shift,shift);␊ |
1321 | my $no_wrap = shift;␊ |
1322 | print "generic_command($command,$variant,@$args,@$env,$no_wrap)="␊ |
1323 | if ($debug{'commands'} || $debug{'environments'});␊ |
1324 | ␊ |
1325 | my ($t,@e)=("",());␊ |
1326 | my $translated = "";␊ |
1327 | ␊ |
1328 | # the number of arguments is checked during the extraction of the␊ |
1329 | # arguments␊ |
1330 | ␊ |
1331 | if ( (not (defined $separated_command{$command}))␊ |
1332 | or $separated_command{$command} ne '+') {␊ |
1333 | # Use the information from %command_parameters to only translate␊ |
1334 | # the needed parameters␊ |
1335 | $translated = "$ESCAPE$command$variant";␊ |
1336 | # handle arguments␊ |
1337 | my @arg_types = @{$command_parameters{$command}{'types'}};␊ |
1338 | my @arg_translated = @{$command_parameters{$command}{'translated'}};␊ |
1339 | my ($type, $opt);␊ |
1340 | my @targs = @$args;␊ |
1341 | my $count = 0;␊ |
1342 | while (@targs) {␊ |
1343 | $type = shift @targs;␊ |
1344 | $opt = shift @targs;␊ |
1345 | my $have_to_be_translated = 0;␊ |
1346 | TEST_TYPE:␊ |
1347 | if ($count >= scalar @arg_types) {␊ |
1348 | # The number of arguments does not match,␊ |
1349 | # and a variable number of arguments was not specified␊ |
1350 | die wrap_ref_mod($self->{ref}, "po4a::tex",␊ |
1351 | dgettext("po4a",␊ |
1352 | "Wrong number of arguments for ".␊ |
1353 | "the '%s' command.")."\n",␊ |
1354 | $command);␊ |
1355 | } elsif ($type eq $arg_types[$count]) {␊ |
1356 | $have_to_be_translated = $arg_translated[$count];␊ |
1357 | $count ++;␊ |
1358 | } elsif ($type eq '{' and $arg_types[$count] eq '[') {␊ |
1359 | # an optionnal argument was not provided,␊ |
1360 | # try with the next argument.␊ |
1361 | $count++;␊ |
1362 | goto TEST_TYPE;␊ |
1363 | } else {␊ |
1364 | my $reason = dgettext("po4a",␊ |
1365 | "An optional argument ".␊ |
1366 | "was provided, but a mandatory one ".␊ |
1367 | "is expected.");␊ |
1368 | die wrap_ref_mod($self->{ref}, "po4a::tex",␊ |
1369 | dgettext("po4a", "Command '%s': %s")."\n",␊ |
1370 | $command, $reason);␊ |
1371 | }␊ |
1372 | if ($have_to_be_translated) {␊ |
1373 | ($t, @e) = translate_buffer($self,$opt,$no_wrap,(@$env,$command.$type."#".$count.$type_end{$type}));␊ |
1374 | } else {␊ |
1375 | $t = $opt;␊ |
1376 | }␊ |
1377 | $translated .= $type.$t.$type_end{$type};␊ |
1378 | }␊ |
1379 | } else {␊ |
1380 | # Translate the command with all its arguments joined␊ |
1381 | my $tmp = "$ESCAPE$command$variant";␊ |
1382 | my ($type, $opt);␊ |
1383 | while (@$args) {␊ |
1384 | $type = shift @$args;␊ |
1385 | $opt = shift @$args;␊ |
1386 | $tmp .= $type.$opt.$type_end{$type};␊ |
1387 | }␊ |
1388 | @e = @$env;␊ |
1389 | my $wrap = 1;␊ |
1390 | $wrap = 0 if (defined $no_wrap and $no_wrap == 1);␊ |
1391 | $translated = $self->translate($tmp,$self->{ref},␊ |
1392 | @e?$e[-1]:"Plain text",␊ |
1393 | "wrap" => $wrap);␊ |
1394 | }␊ |
1395 | ␊ |
1396 | print "($translated, @$env)\n"␊ |
1397 | if ($debug{'commands'} || $debug{'environments'});␊ |
1398 | return ($translated, @$env);␊ |
1399 | }␊ |
1400 | ␊ |
1401 | sub register_generic_command {␊ |
1402 | if ($_[0] =~ m/^(.*),((\{_?\}|\[_?\]| _? )*)$/) {␊ |
1403 | my $command = $1;␊ |
1404 | my $arg_types = $2;␊ |
1405 | if ($command =~ /^([-*+])(.*)$/) {␊ |
1406 | $command = $2;␊ |
1407 | $separated_command{$command}=$1;␊ |
1408 | }␊ |
1409 | my @types = ();␊ |
1410 | my @translated = ();␊ |
1411 | while ( defined $arg_types␊ |
1412 | and length $arg_types␊ |
1413 | and $arg_types =~ m/^(?:([\{\[ ])(_?)[\}\] ])(.*)$/) {␊ |
1414 | push @types, $1;␊ |
1415 | push @translated, ($2 eq "_")?1:0;␊ |
1416 | $arg_types = $3;␊ |
1417 | }␊ |
1418 | $command_parameters{$command}{'types'} = \@types;␊ |
1419 | $command_parameters{$command}{'translated'} = \@translated;␊ |
1420 | $command_parameters{$command}{'nb_args'} = "";␊ |
1421 | $commands{$command} = \&generic_command;␊ |
1422 | } else {␊ |
1423 | die wrap_mod("po4a::tex",␊ |
1424 | dgettext("po4a",␊ |
1425 | "register_generic_command: unsupported ".␊ |
1426 | "format: '%s'.")."\n",␊ |
1427 | $_[0]);␊ |
1428 | }␊ |
1429 | }␊ |
1430 | ␊ |
1431 | ########################################␊ |
1432 | #### DEFINITION OF THE ENVIRONMENTS ####␊ |
1433 | ########################################␊ |
1434 | sub generic_environment {␊ |
1435 | my $self = shift;␊ |
1436 | my ($command,$variant,$args,$env) = (shift,shift,shift,shift);␊ |
1437 | my $no_wrap = shift;␊ |
1438 | print "generic_environment($command,$variant,$args,$env,$no_wrap)="␊ |
1439 | if ($debug{'environments'});␊ |
1440 | my ($t,@e)=("",());␊ |
1441 | my $translated = "";␊ |
1442 | ␊ |
1443 | # The first argument (the name of the environment is never translated)␊ |
1444 | # For the others, @types and @translated are used.␊ |
1445 | $translated = "$ESCAPE$command$variant";␊ |
1446 | my @targs = @$args;␊ |
1447 | my $type = shift @targs;␊ |
1448 | my $opt = shift @targs;␊ |
1449 | my $new_env = $opt;␊ |
1450 | $translated .= $type.$new_env.$type_end{$type};␊ |
1451 | if ( (not (defined $separated_environment{$new_env}))␊ |
1452 | or $separated_environment{$new_env} ne '+') {␊ |
1453 | # Use the information from %command_parameters to only translate␊ |
1454 | # the needed parameters␊ |
1455 | my @arg_types = @{$environment_parameters{$new_env}{'types'}};␊ |
1456 | my @arg_translated = @{$environment_parameters{$new_env}{'translated'}};␊ |
1457 | ␊ |
1458 | my $count = 0;␊ |
1459 | while (@targs) {␊ |
1460 | $type = shift @targs;␊ |
1461 | $opt = shift @targs;␊ |
1462 | my $have_to_be_translated = 0;␊ |
1463 | TEST_TYPE:␊ |
1464 | if ($count >= scalar @arg_types) {␊ |
1465 | die wrap_ref_mod($self->{ref}, "po4a::tex",␊ |
1466 | dgettext("po4a",␊ |
1467 | "Wrong number of arguments for ".␊ |
1468 | "the '%s' command.")."\n",␊ |
1469 | $command);␊ |
1470 | } elsif ($type eq $arg_types[$count]) {␊ |
1471 | $have_to_be_translated = $arg_translated[$count];␊ |
1472 | $count ++;␊ |
1473 | } elsif ($type eq '{' and $arg_types[$count] eq '[') {␊ |
1474 | # an optionnal argument was not provided,␊ |
1475 | # try with the next argument.␊ |
1476 | $count++;␊ |
1477 | goto TEST_TYPE;␊ |
1478 | } else {␊ |
1479 | my $reason = dgettext("po4a",␊ |
1480 | "An optional argument ".␊ |
1481 | "was provided, but a mandatory one ".␊ |
1482 | "is expected.");␊ |
1483 | die wrap_ref_mod($self->{ref}, "po4a::tex",␊ |
1484 | dgettext("po4a", "Command '%s': %s")."\n",␊ |
1485 | $command, $reason);␊ |
1486 | }␊ |
1487 | ␊ |
1488 | if ($have_to_be_translated) {␊ |
1489 | ($t, @e) = translate_buffer($self,$opt,$no_wrap,(@$env,$new_env.$type."#".$count.$type_end{$type}));␊ |
1490 | } else {␊ |
1491 | $t = $opt;␊ |
1492 | }␊ |
1493 | $translated .= $type.$t.$type_end{$type};␊ |
1494 | ␊ |
1495 | }␊ |
1496 | } else {␊ |
1497 | # Translate the \begin command with all its arguments joined␊ |
1498 | my ($type, $opt);␊ |
1499 | my $buf = $translated;␊ |
1500 | while (@targs) {␊ |
1501 | $type = shift @targs;␊ |
1502 | $opt = shift @targs;␊ |
1503 | $buf .= $type.$opt.$type_end{$type};␊ |
1504 | }␊ |
1505 | @e = @$env;␊ |
1506 | my $wrap = 1;␊ |
1507 | $wrap = 0 if $no_wrap == 1;␊ |
1508 | $translated = $self->translate($buf,$self->{ref},␊ |
1509 | @e?$e[-1]:"Plain text",␊ |
1510 | "wrap" => $wrap);␊ |
1511 | }␊ |
1512 | @e = (@$env, $new_env);␊ |
1513 | ␊ |
1514 | print "($translated,@e)\n"␊ |
1515 | if ($debug{'environments'});␊ |
1516 | return ($translated,@e);␊ |
1517 | }␊ |
1518 | ␊ |
1519 | ␊ |
1520 | sub check_arg_count {␊ |
1521 | my $self = shift;␊ |
1522 | my $command = shift;␊ |
1523 | my $args = shift;␊ |
1524 | my @targs = @$args;␊ |
1525 | my $check = 1;␊ |
1526 | my @remainder = ();␊ |
1527 | my $reason = "";␊ |
1528 | my ($type, $arg);␊ |
1529 | my @arg_types;␊ |
1530 | ␊ |
1531 | if ($command eq 'begin') {␊ |
1532 | $type = shift @targs;␊ |
1533 | # The name of the environment is mandatory␊ |
1534 | if ( (not defined $type)␊ |
1535 | or ($type ne '{')) {␊ |
1536 | $reason = dgettext("po4a",␊ |
1537 | "The first argument of \\begin is mandatory.");␊ |
1538 | $check = 0;␊ |
1539 | }␊ |
1540 | my $env = shift @targs;␊ |
1541 | if (not defined $environment_parameters{$env}) {␊ |
1542 | die wrap_ref_mod($self->{ref},"po4a::tex",␊ |
1543 | dgettext("po4a", "unknown environment: '%s'"),␊ |
1544 | $env);␊ |
1545 | }␊ |
1546 | @arg_types = @{$environment_parameters{$env}{'types'}};␊ |
1547 | } else {␊ |
1548 | @arg_types = @{$command_parameters{$command}{'types'}};␊ |
1549 | }␊ |
1550 | ␊ |
1551 | my $count = 0;␊ |
1552 | while ($check and @targs) {␊ |
1553 | $type = shift @targs;␊ |
1554 | $arg = shift @targs;␊ |
1555 | TEST_TYPE:␊ |
1556 | if ($count >= scalar @arg_types) {␊ |
1557 | # Too many arguments some will remain␊ |
1558 | @remainder = ($type, $arg, @targs);␊ |
1559 | last;␊ |
1560 | } elsif ($type eq $arg_types[$count]) {␊ |
1561 | $count ++;␊ |
1562 | } elsif ($type eq '{' and $arg_types[$count] eq '[') {␊ |
1563 | # an optionnal argument was not provided,␊ |
1564 | # try with the next argument.␊ |
1565 | $count++;␊ |
1566 | goto TEST_TYPE;␊ |
1567 | } else {␊ |
1568 | $check = 0;␊ |
1569 | $reason = dgettext("po4a",␊ |
1570 | "An optional argument was ".␊ |
1571 | "provided, but a mandatory one is expected.");␊ |
1572 | }␊ |
1573 | }␊ |
1574 | ␊ |
1575 | return ($check, $reason, \@remainder);␊ |
1576 | }␊ |
1577 | ␊ |
1578 | sub register_generic_environment {␊ |
1579 | print "register_generic_environment($_[0])\n"␊ |
1580 | if ($debug{'environments'});␊ |
1581 | if ($_[0] =~ m/^(.*),((?:\{_?\}|\[_?\])*)$/) {␊ |
1582 | my $env = $1;␊ |
1583 | my $arg_types = $2;␊ |
1584 | if ($env =~ /^([+])(.*)$/) {␊ |
1585 | $separated_environment{$2} = $1;␊ |
1586 | $env = $2;␊ |
1587 | }␊ |
1588 | my @types = ();␊ |
1589 | my @translated = ();␊ |
1590 | while ( defined $arg_types␊ |
1591 | and length $arg_types␊ |
1592 | and $arg_types =~ m/^(?:([\{\[])(_?)[\}\]])(.*)$/) {␊ |
1593 | push @types, $1;␊ |
1594 | push @translated, ($2 eq "_")?1:0;␊ |
1595 | $arg_types = $3;␊ |
1596 | }␊ |
1597 | $environment_parameters{$env} = {␊ |
1598 | 'types' => \@types,␊ |
1599 | 'translated' => \@translated␊ |
1600 | };␊ |
1601 | $environments{$env} = \&generic_environment;␊ |
1602 | }␊ |
1603 | }␊ |
1604 | ␊ |
1605 | sub register_verbatim_environment {␊ |
1606 | my $env = shift;␊ |
1607 | $no_wrap_environments .= " $env";␊ |
1608 | $verbatim_environments .= " $env";␊ |
1609 | $RE_VERBATIM = "\\\\begin\\{(?:".␊ |
1610 | join("|", split(/ /, $verbatim_environments)).␊ |
1611 | ")\\*?\\}";␊ |
1612 | register_generic_environment("$env,")␊ |
1613 | unless (defined $environments{$env});␊ |
1614 | }␊ |
1615 | ␊ |
1616 | ####################################␊ |
1617 | ### INITIALIZATION OF THE PARSER ###␊ |
1618 | ####################################␊ |
1619 | sub initialize {␊ |
1620 | my $self = shift;␊ |
1621 | my %options = @_;␊ |
1622 | ␊ |
1623 | $self->{options}{'definitions'}='';␊ |
1624 | $self->{options}{'exclude_include'}='';␊ |
1625 | $self->{options}{'no_wrap'}='';␊ |
1626 | $self->{options}{'verbatim'}='';␊ |
1627 | $self->{options}{'debug'}='';␊ |
1628 | $self->{options}{'verbose'}='';␊ |
1629 | ␊ |
1630 | %debug = ();␊ |
1631 | # FIXME: %commands and %separated_command should also be restored to their␊ |
1632 | # default values.␊ |
1633 | ␊ |
1634 | foreach my $opt (keys %options) {␊ |
1635 | if ($options{$opt}) {␊ |
1636 | die wrap_mod("po4a::tex",␊ |
1637 | dgettext("po4a", "Unknown option: %s"), $opt)␊ |
1638 | unless exists $self->{options}{$opt};␊ |
1639 | $self->{options}{$opt} = $options{$opt};␊ |
1640 | }␊ |
1641 | }␊ |
1642 | ␊ |
1643 | if ($options{'debug'}) {␊ |
1644 | foreach ($options{'debug'}) {␊ |
1645 | $debug{$_} = 1;␊ |
1646 | }␊ |
1647 | }␊ |
1648 | ␊ |
1649 | if ($options{'exclude_include'}) {␊ |
1650 | foreach (split(/:/, $options{'exclude_include'})) {␊ |
1651 | push @exclude_include, $_;␊ |
1652 | }␊ |
1653 | }␊ |
1654 | ␊ |
1655 | if ($options{'no_wrap'}) {␊ |
1656 | foreach (split(/,/, $options{'no_wrap'})) {␊ |
1657 | $no_wrap_environments .= " $_";␊ |
1658 | register_generic_environment("$_,")␊ |
1659 | unless (defined $environments{$_});␊ |
1660 | }␊ |
1661 | }␊ |
1662 | ␊ |
1663 | if ($options{'verbatim'}) {␊ |
1664 | foreach (split(/,/, $options{'verbatim'})) {␊ |
1665 | register_verbatim_environment($_);␊ |
1666 | }␊ |
1667 | }␊ |
1668 | ␊ |
1669 | if ($options{'definitions'}) {␊ |
1670 | $self->parse_definition_file($options{'definitions'})␊ |
1671 | }␊ |
1672 | }␊ |
1673 | ␊ |
1674 | =head1 STATUS OF THIS MODULE␊ |
1675 | ␊ |
1676 | This module needs more tests.␊ |
1677 | ␊ |
1678 | It was tested on a book and with the Python documentation.␊ |
1679 | ␊ |
1680 | =head1 TODO LIST␊ |
1681 | ␊ |
1682 | =over 4␊ |
1683 | ␊ |
1684 | =item Automatic detection of new commands␊ |
1685 | ␊ |
1686 | The TeX module could parse the newcommand arguments and try to guess the␊ |
1687 | number of arguments, their type and whether or not they should be␊ |
1688 | translated.␊ |
1689 | ␊ |
1690 | =item Translation of the environment separator␊ |
1691 | ␊ |
1692 | When \item is used as an environment separator, the item argument is␊ |
1693 | attached to the following string.␊ |
1694 | ␊ |
1695 | =item Some commands should be added to the environment stack␊ |
1696 | ␊ |
1697 | These commands should be specified by couples.␊ |
1698 | This could allow to specify commands beginning or ending a verbatim␊ |
1699 | environment.␊ |
1700 | ␊ |
1701 | =item Others␊ |
1702 | ␊ |
1703 | Various other points are tagged TODO in the source.␊ |
1704 | ␊ |
1705 | =back␊ |
1706 | ␊ |
1707 | =head1 KNOWN BUGS␊ |
1708 | ␊ |
1709 | Various points are tagged FIXME in the source.␊ |
1710 | ␊ |
1711 | =head1 SEE ALSO␊ |
1712 | ␊ |
1713 | L<Locale::Po4a::LaTeX(3pm)|Locale::Po4a::LaTeX>,␊ |
1714 | L<Locale::Po4a::TransTractor(3pm)|Locale::Po4a::TransTractor>,␊ |
1715 | L<po4a(7)|po4a.7>␊ |
1716 | ␊ |
1717 | =head1 AUTHORS␊ |
1718 | ␊ |
1719 | Nicolas François <nicolas.francois@centraliens.net>␊ |
1720 | ␊ |
1721 | =head1 COPYRIGHT AND LICENSE␊ |
1722 | ␊ |
1723 | Copyright 2004, 2005 by Nicolas FRANÇOIS <nicolas.francois@centraliens.net>.␊ |
1724 | ␊ |
1725 | This program is free software; you may redistribute it and/or modify it␊ |
1726 | under the terms of GPL (see the COPYING file).␊ |
1727 | ␊ |
1728 | =cut␊ |
1729 | ␊ |
1730 | 1;␊ |
1731 | |