1 | #!/usr/bin/perl -w␊ |
2 | ␊ |
3 | # Copyright (c) 2004, 2005 by Nicolas FRANÇOIS <nicolas.francois@centraliens.net>␊ |
4 | #␊ |
5 | # This file is part of po4a.␊ |
6 | #␊ |
7 | # This program is free software; you can redistribute it and/or modify␊ |
8 | # it under the terms of the GNU General Public License as published by␊ |
9 | # the Free Software Foundation; either version 2 of the License, or␊ |
10 | # (at your option) any later version.␊ |
11 | #␊ |
12 | # This program is distributed in the hope that it will be useful,␊ |
13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of␊ |
14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the␊ |
15 | # GNU General Public License for more details.␊ |
16 | #␊ |
17 | # You should have received a copy of the GNU General Public License␊ |
18 | # along with po4a; if not, write to the Free Software␊ |
19 | # Foundation, Inc.,␊ |
20 | # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA␊ |
21 | #␊ |
22 | ########################################################################␊ |
23 | ␊ |
24 | =encoding UTF-8␊ |
25 | ␊ |
26 | =head1 NAME␊ |
27 | ␊ |
28 | Locale::Po4a::TeX - convert TeX documents and derivates from/to PO files␊ |
29 | ␊ |
30 | =head1 DESCRIPTION␊ |
31 | ␊ |
32 | The po4a (PO for anything) project goal is to ease translations (and more␊ |
33 | interestingly, the maintenance of translations) using gettext tools on␊ |
34 | areas where they were not expected like documentation.␊ |
35 | ␊ |
36 | Locale::Po4a::TeX is a module to help the translation of TeX documents into␊ |
37 | other [human] languages. It can also be used as a base to build modules for␊ |
38 | TeX-based documents.␊ |
39 | ␊ |
40 | Users should probably use the LaTeX module, which inherite from the TeX module␊ |
41 | and contains the definitions of common LaTeX commands.␊ |
42 | ␊ |
43 | =head1 TRANSLATING WITH PO4A::TEX␊ |
44 | ␊ |
45 | This module can be used directly to handle generic TeX documents.␊ |
46 | This will split your document in smaller blocks (paragraphs, verbatim␊ |
47 | blocks, or even smaller like titles or indexes).␊ |
48 | ␊ |
49 | There are some options (described in the next section) that can customize␊ |
50 | this behavior. If this doesn't fit to your document format you're encouraged␊ |
51 | to write your own module derived from this, to describe your format's details.␊ |
52 | See the section B<WRITING DERIVATE MODULES> below, for the process description.␊ |
53 | ␊ |
54 | This module can also be customized by lines starting with "% po4a:" in the␊ |
55 | TeX file.␊ |
56 | These customizations are described in the B<INLINE CUSTOMIZATION> section.␊ |
57 | ␊ |
58 | =head1 OPTIONS ACCEPTED BY THIS MODULE␊ |
59 | ␊ |
60 | These are this module's particular options:␊ |
61 | ␊ |
62 | =over 4␊ |
63 | ␊ |
64 | =cut␊ |
65 | ␊ |
66 | package Locale::Po4a::TeX;␊ |
67 | ␊ |
68 | use 5.006;␊ |
69 | use strict;␊ |
70 | use warnings;␊ |
71 | ␊ |
72 | require Exporter;␊ |
73 | use vars qw(@ISA @EXPORT);␊ |
74 | @ISA = qw(Locale::Po4a::TransTractor);␊ |
75 | @EXPORT = qw(%commands %environments␊ |
76 | $RE_ESCAPE $ESCAPE $RE_VERBATIM␊ |
77 | $no_wrap_environments␊ |
78 | $verbatim_environments␊ |
79 | %separated_command␊ |
80 | %separated_environment␊ |
81 | %translate_buffer_env␊ |
82 | &generic_command␊ |
83 | ®ister_generic_command␊ |
84 | ®ister_generic_environment);␊ |
85 | ␊ |
86 | use Locale::Po4a::TransTractor;␊ |
87 | use Locale::Po4a::Common;␊ |
88 | use File::Basename qw(dirname);␊ |
89 | use Carp qw(croak);␊ |
90 | ␊ |
91 | use Encode;␊ |
92 | use Encode::Guess;␊ |
93 | ␊ |
94 | # hash of known commands and environments, with parsing sub.␊ |
95 | # See end of this file␊ |
96 | use vars qw(%commands %environments);␊ |
97 | # hash to describe the number of parameters and which one have to be␊ |
98 | # translated. Used by generic commands␊ |
99 | our %command_parameters = ();␊ |
100 | our %environment_parameters = ();␊ |
101 | # hash to describe the separators of environments.␊ |
102 | our %env_separators =();␊ |
103 | ␊ |
104 | # The escape character used to introduce commands.␊ |
105 | our $RE_ESCAPE = "\\\\";␊ |
106 | our $ESCAPE = "\\";␊ |
107 | # match the beginning of a verbatim block␊ |
108 | our $RE_VERBATIM = "\\\\begin\\{(?:verbatim)\\*?\\}";␊ |
109 | # match the beginning of a comment.␊ |
110 | # NOTE: It must contain a group, with chars preceding the comment␊ |
111 | our $RE_PRE_COMMENT= "(?<!\\\\)(?:\\\\\\\\)*";␊ |
112 | our $RE_COMMENT= "\\\%";␊ |
113 | ␊ |
114 | # Space separated list of environments that should not be re-wrapped.␊ |
115 | our $no_wrap_environments = "verbatim";␊ |
116 | our $verbatim_environments = "verbatim";␊ |
117 | # hash with the commands that have to be separated (or have to be joined).␊ |
118 | # 3 modes are currently used:␊ |
119 | # '*' The command is separated if it appear at an extremity of a␊ |
120 | # paragraph␊ |
121 | # '+' The command is separated, but its arguments are joined together␊ |
122 | # with the command name for the translation␊ |
123 | # '-' The command is not separated, unless it appear alone on a paragraph␊ |
124 | # (e.g. \strong)␊ |
125 | our %separated_command = ();␊ |
126 | our %separated_environment = ();␊ |
127 | ␊ |
128 | =item B<debug>␊ |
129 | ␊ |
130 | Activate debugging for some internal mechanisms of this module.␊ |
131 | Use the source to see which parts can be debugged.␊ |
132 | ␊ |
133 | =item B<no_wrap>␊ |
134 | ␊ |
135 | Comma-separated list of environments which should not be re-wrapped.␊ |
136 | ␊ |
137 | Note that there is a difference between verbatim and no_wrap environments.␊ |
138 | There is no command and comments analysis in verbatim blocks.␊ |
139 | ␊ |
140 | If this environment was not already registered, po4a will consider that␊ |
141 | this environment does not take any parameters.␊ |
142 | ␊ |
143 | =item B<exclude_include>␊ |
144 | ␊ |
145 | Colon-separated list of files that should not be included by \input and␊ |
146 | \include.␊ |
147 | ␊ |
148 | =item B<definitions>␊ |
149 | ␊ |
150 | The name of a file containing definitions for po4a, as defined in the␊ |
151 | B<INLINE CUSTOMIZATION> section.␊ |
152 | You can use this option if it is not possible to put the definitions in␊ |
153 | the document being translated.␊ |
154 | ␊ |
155 | =item B<verbatim>␊ |
156 | ␊ |
157 | Comma-separated list of environments which should be taken as verbatim.␊ |
158 | ␊ |
159 | If this environment was not already registered, po4a will consider that␊ |
160 | this environment does not take any parameters.␊ |
161 | ␊ |
162 | =back␊ |
163 | ␊ |
164 | Using these options permits to override the behaviour of the commands defined␊ |
165 | in the default lists.␊ |
166 | ␊ |
167 | =head1 INLINE CUSTOMIZATION␊ |
168 | ␊ |
169 | The TeX module can be customized with lines starting by B<% po4a:>.␊ |
170 | These lines are interpreted as commands to the parser.␊ |
171 | The following commands are recognized:␊ |
172 | ␊ |
173 | =over 4␊ |
174 | ␊ |
175 | =item B<% po4a: command> I<command1> B<alias> I<command2>␊ |
176 | ␊ |
177 | Indicates that the arguments of the I<command1> command should be␊ |
178 | treated as the arguments of the I<command2> command.␊ |
179 | ␊ |
180 | =item B<% po4a: command> I<command1> I<parameters>␊ |
181 | ␊ |
182 | This permit to describe in detail the parameters of the I<command1>␊ |
183 | command.␊ |
184 | This information will be used to check the number of arguments and their␊ |
185 | types.␊ |
186 | ␊ |
187 | You can precede the I<command1> command by␊ |
188 | ␊ |
189 | =over 4␊ |
190 | ␊ |
191 | =item an asterisk (B<*>)␊ |
192 | ␊ |
193 | po4a will extract this command from paragraphs (if it is located at␊ |
194 | the beginning or the end of a paragraph).␊ |
195 | The translators will then have to translate the parameters that are marked␊ |
196 | as translatable.␊ |
197 | ␊ |
198 | =item a plus (B<+>)␊ |
199 | ␊ |
200 | As for an asterisk, the command will be extracted if it appear at an␊ |
201 | extremity of a block, but the parameters won't be translated separately.␊ |
202 | The translator will have to translate the command concatenated to all its␊ |
203 | parameters.␊ |
204 | This permits to keep more context, and is useful for commands with small␊ |
205 | words in parameter, which can have multiple meanings (and translations).␊ |
206 | ␊ |
207 | Note: In this case you don't have to specify which parameters are␊ |
208 | translatable, but po4a must know the type and number of parameters.␊ |
209 | ␊ |
210 | =item a minus (B<->)␊ |
211 | ␊ |
212 | In this case, the command won't be extracted from any block.␊ |
213 | But if it appears alone on a block, then only the parameters marked as␊ |
214 | translatable will be presented to the translator.␊ |
215 | This is useful for font commands. These commands should generally not be␊ |
216 | separated from their paragraph (to keep the context), but there is no␊ |
217 | reason to annoy the translator with them if a whole string is enclosed in␊ |
218 | such a command.␊ |
219 | ␊ |
220 | =back␊ |
221 | ␊ |
222 | The I<parameters> argument is a set of [] (to indicate an optional␊ |
223 | argument) or {} (to indicate a mandatory argument).␊ |
224 | You can place an underscore (_) between these brackets to indicate that␊ |
225 | the parameter must be translated. For example:␊ |
226 | % po4a: command *chapter [_]{_}␊ |
227 | ␊ |
228 | This indicates that the chapter command has two parameters: an optional␊ |
229 | (short title) and a mandatory one, which must both be translated.␊ |
230 | If you want to specify that the href command has two mandatory parameters,␊ |
231 | that you don't want to translate the URL (first parameter), and that you␊ |
232 | don't want this command to be separated from its paragraph (which allow␊ |
233 | the translator to move the link in the sentence), you can use:␊ |
234 | % po4a: command -href {}{_}␊ |
235 | ␊ |
236 | In this case, the information indicating which arguments must be␊ |
237 | translated is only used if a paragraph is only composed of this href␊ |
238 | command.␊ |
239 | ␊ |
240 | =item B<% po4a: environment> I<env> I<parameters>␊ |
241 | ␊ |
242 | This permits to define the parameters accepted by the I<env> environment.␊ |
243 | This information is latter used to check the number of arguments of the␊ |
244 | \begin command, and permit to specify which one must be translated.␊ |
245 | The syntax of the I<parameters> argument is the same as described for the␊ |
246 | commands.␊ |
247 | The first parameter of the \begin command is the name of the environment.␊ |
248 | This parameter must not be specified in the list of parameters. Here are␊ |
249 | some examples:␊ |
250 | % po4a: environment multicols {}␊ |
251 | % po4a: environment equation␊ |
252 | ␊ |
253 | As for the commands, I<env> can be preceded by a plus (+) to indicate␊ |
254 | that the \begin command must be translated with all its arguments.␊ |
255 | ␊ |
256 | =item B<% po4a: separator> I<env> B<">I<regex>B<">␊ |
257 | ␊ |
258 | Indicates that an environment should be split according to the given␊ |
259 | regular expression.␊ |
260 | ␊ |
261 | The regular expression is delimited by quotes.␊ |
262 | It should not create any backreference.␊ |
263 | You should use (?:) if you need a group.␊ |
264 | It may also need some escapes.␊ |
265 | ␊ |
266 | For example, the LaTeX module uses the "(?:&|\\\\)" regular expression to␊ |
267 | translate separately each cell of a table (lines are separated by '\\' and␊ |
268 | cells by '&').␊ |
269 | ␊ |
270 | The notion of environment is expended to the type displayed in the PO file.␊ |
271 | This can be used to split on "\\\\" in the first mandatory argument of the␊ |
272 | title command. In this case, the environment is title{#1}.␊ |
273 | ␊ |
274 | =item B<% po4a: verbatim environment> I<env>␊ |
275 | ␊ |
276 | Indicate that I<env> is a verbatim environment.␊ |
277 | Comments and commands will be ignored in this environment.␊ |
278 | ␊ |
279 | If this environment was not already registered, po4a will consider that␊ |
280 | this environment does not take any parameters.␊ |
281 | ␊ |
282 | =back␊ |
283 | ␊ |
284 | =cut␊ |
285 | ␊ |
286 | # Directory name of the main file.␊ |
287 | # It is the directory where included files will be searched.␊ |
288 | # See read_file.␊ |
289 | my $my_dirname;␊ |
290 | ␊ |
291 | # Array of files that should not be included by read_file.␊ |
292 | # See read_file.␊ |
293 | our @exclude_include;␊ |
294 | ␊ |
295 | my %type_end=('{'=>'}', '['=>']', ' '=>'');␊ |
296 | ␊ |
297 | #########################␊ |
298 | #### DEBUGGING STUFF ####␊ |
299 | #########################␊ |
300 | my %debug=('pretrans' => 0, # see pre-conditioning of translation␊ |
301 | 'postrans' => 0, # see post-conditioning of translation␊ |
302 | 'translate' => 0, # see translation␊ |
303 | 'extract_commands' => 0, # see commands extraction␊ |
304 | 'commands' => 0, # see command subroutines␊ |
305 | 'environments' => 0, # see environment subroutines␊ |
306 | 'translate_buffer' => 0 # see buffer translation␊ |
307 | );␊ |
308 | ␊ |
309 | =head1 WRITING DERIVATE MODULES␊ |
310 | ␊ |
311 | =over 4␊ |
312 | ␊ |
313 | =item B<pre_trans>␊ |
314 | ␊ |
315 | =cut␊ |
316 | ␊ |
317 | sub pre_trans {␊ |
318 | my ($self,$str,$ref,$type)=@_;␊ |
319 | # Preformatting, so that translators don't see␊ |
320 | # strange chars␊ |
321 | my $origstr=$str;␊ |
322 | print STDERR "pre_trans($str)="␊ |
323 | if ($debug{'pretrans'});␊ |
324 | ␊ |
325 | # Accentuated characters␊ |
326 | # FIXME: only do this if the encoding is UTF-8?␊ |
327 | # $str =~ s/${RE_ESCAPE}`a/à/g;␊ |
328 | ## $str =~ s/${RE_ESCAPE}c{c}/ç/g; # not in texinfo: @,{c}␊ |
329 | # $str =~ s/${RE_ESCAPE}^e/ê/g;␊ |
330 | # $str =~ s/${RE_ESCAPE}'e/é/g;␊ |
331 | # $str =~ s/${RE_ESCAPE}`e/è/g;␊ |
332 | # $str =~ s/${RE_ESCAPE}`u/ù/g;␊ |
333 | # $str =~ s/${RE_ESCAPE}"i/ï/g;␊ |
334 | # # Non breaking space. FIXME: should we change $\sim$ to ~␊ |
335 | # $str =~ s/~/\xA0/g; # FIXME: not in texinfo: @w{ }␊ |
336 | ␊ |
337 | print STDERR "$str\n" if ($debug{'pretrans'});␊ |
338 | return $str;␊ |
339 | }␊ |
340 | ␊ |
341 | =item B<post_trans>␊ |
342 | ␊ |
343 | =cut␊ |
344 | ␊ |
345 | sub post_trans {␊ |
346 | my ($self,$str,$ref,$type)=@_;␊ |
347 | my $transstr=$str;␊ |
348 | ␊ |
349 | print STDERR "post_trans($str)="␊ |
350 | if ($debug{'postrans'});␊ |
351 | ␊ |
352 | # Accentuated characters␊ |
353 | # $str =~ s/à/${ESCAPE}`a/g;␊ |
354 | ## $str =~ s/ç/$ESCAPEc{c}/g; # FIXME: not in texinfo␊ |
355 | # $str =~ s/ê/${ESCAPE}^e/g;␊ |
356 | # $str =~ s/é/${ESCAPE}'e/g;␊ |
357 | # $str =~ s/è/${ESCAPE}`e/g;␊ |
358 | # $str =~ s/ù/${ESCAPE}`u/g;␊ |
359 | # $str =~ s/ï/${ESCAPE}"i/g;␊ |
360 | # # Non breaking space. FIXME: should we change ~ to $\sim$␊ |
361 | # $str =~ s/\xA0/~/g; # FIXME: not in texinfo␊ |
362 | ␊ |
363 | print STDERR "$str\n" if ($debug{'postrans'});␊ |
364 | return $str;␊ |
365 | }␊ |
366 | ␊ |
367 | # Comments are extracted in the parse function.␊ |
368 | # They are stored in the @comments array, and then displayed as a PO␊ |
369 | # comment with the first translated string of the paragraph.␊ |
370 | my @comments = ();␊ |
371 | ␊ |
372 | =item B<translate>␊ |
373 | ␊ |
374 | Wrapper around Transtractor's translate, with pre- and post-processing␊ |
375 | filters.␊ |
376 | ␊ |
377 | Comments of a paragraph are inserted as a PO comment for the first␊ |
378 | translated string of this paragraph.␊ |
379 | ␊ |
380 | =cut␊ |
381 | ␊ |
382 | sub translate {␊ |
383 | my ($self,$str,$ref,$type) = @_;␊ |
384 | my (%options)=@_;␊ |
385 | my $origstr=$str;␊ |
386 | print STDERR "translate($str)="␊ |
387 | if ($debug{'translate'});␊ |
388 | ␊ |
389 | return $str unless (defined $str) && length($str);␊ |
390 | return $str if ($str eq "\n");␊ |
391 | ␊ |
392 | $str=pre_trans($self,$str,$ref||$self->{ref},$type);␊ |
393 | ␊ |
394 | # add comments (if any and not already added to the PO)␊ |
395 | if (@comments) {␊ |
396 | $options{'comment'} .= join('\n', @comments);␊ |
397 | ␊ |
398 | @comments = ();␊ |
399 | }␊ |
400 | ␊ |
401 | # FIXME: translate may append a newline, keep the trailing spaces so we can␊ |
402 | # recover them.␊ |
403 | my $spaces = "";␊ |
404 | if ($options{'wrap'} and $str =~ m/^(.*?)(\s+)$/s) {␊ |
405 | $str = $1;␊ |
406 | $spaces = $2;␊ |
407 | }␊ |
408 | ␊ |
409 | # Translate this␊ |
410 | $str = $self->SUPER::translate($str,␊ |
411 | $ref||$self->{ref},␊ |
412 | $type || $self->{type},␊ |
413 | %options);␊ |
414 | ␊ |
415 | # FIXME: translate may append a newline, see above␊ |
416 | if ($options{'wrap'}) {␊ |
417 | chomp $str;␊ |
418 | $str .= $spaces;␊ |
419 | }␊ |
420 | ␊ |
421 | $str=post_trans($self,$str,$ref||$self->{ref},$type);␊ |
422 | ␊ |
423 | print STDERR "'$str'\n" if ($debug{'translate'});␊ |
424 | return $str;␊ |
425 | }␊ |
426 | ␊ |
427 | ###########################␊ |
428 | ### COMMANDS SEPARATION ###␊ |
429 | ###########################␊ |
430 | ␊ |
431 | =item B<get_leading_command>($buffer)␊ |
432 | ␊ |
433 | This function returns:␊ |
434 | ␊ |
435 | =over 4␊ |
436 | ␊ |
437 | =item A command name␊ |
438 | ␊ |
439 | If no command is found at the beginning of the given buffer, this string␊ |
440 | will be empty. Only commands that can be separated are considered.␊ |
441 | The %separated_command hash contains the list of these commands.␊ |
442 | ␊ |
443 | =item A variant␊ |
444 | ␊ |
445 | This indicates if a variant is used. For example, an asterisk (*) can␊ |
446 | be added at the end of sections command to specify that they should␊ |
447 | not be numbered. In this case, this field will contain "*". If there␊ |
448 | is no variant, the field is an empty string.␊ |
449 | ␊ |
450 | =item An array of tuples (type of argument, argument)␊ |
451 | ␊ |
452 | The type of argument can be either '{' (for mandatory arguments) or '['␊ |
453 | (for optional arguments).␊ |
454 | ␊ |
455 | =item The remaining buffer␊ |
456 | ␊ |
457 | The rest of the buffer after the removal of this leading command and␊ |
458 | its arguments. If no command is found, the original buffer is not␊ |
459 | touched and returned in this field.␊ |
460 | ␊ |
461 | =back␊ |
462 | ␊ |
463 | =cut␊ |
464 | ␊ |
465 | sub get_leading_command {␊ |
466 | my ($self, $buffer) = (shift,shift);␊ |
467 | my $command = ""; # the command name␊ |
468 | my $variant = ""; # a varriant for the command (e.g. an asterisk)␊ |
469 | my @args; # array of arguments␊ |
470 | print STDERR "get_leading_command($buffer)="␊ |
471 | if ($debug{'extract_commands'});␊ |
472 | ␊ |
473 | if ($buffer =~ m/^$RE_ESCAPE([[:alnum:]]+)(\*?)(.*)$/s␊ |
474 | && defined $separated_command{$1}) {␊ |
475 | # The buffer begin by a comand (possibly preceded by some␊ |
476 | # whitespaces).␊ |
477 | $command = $1;␊ |
478 | $variant = $2;␊ |
479 | $buffer = $3;␊ |
480 | # read the arguments (if any)␊ |
481 | while ($buffer =~ m/^\s*$RE_PRE_COMMENT([\[\{])(.*)$/s) {␊ |
482 | my $type = $1;␊ |
483 | my $arg = "";␊ |
484 | my $count = 1;␊ |
485 | $buffer = $2;␊ |
486 | # stop reading the buffer when the number of ] (or }) matches the␊ |
487 | # the number of [ (or {).␊ |
488 | while ($count > 0) {␊ |
489 | if ($buffer =~ m/^(.*?$RE_PRE_COMMENT)([\[\]\{\}])(.*)$/s) {␊ |
490 | $arg .= $1;␊ |
491 | $buffer = $3;␊ |
492 | if ($2 eq $type) {␊ |
493 | $count++;␊ |
494 | } elsif ($2 eq $type_end{$type}) {␊ |
495 | $count--;␊ |
496 | }␊ |
497 | if ($count > 0) {␊ |
498 | $arg .= $2␊ |
499 | }␊ |
500 | } else {␊ |
501 | die wrap_ref_mod($self->{ref},␊ |
502 | "po4a::tex",␊ |
503 | dgettext("po4a", "un-balanced %s in '%s'"),␊ |
504 | $type,␊ |
505 | $buffer);␊ |
506 | }␊ |
507 | }␊ |
508 | push @args, ($type,$arg);␊ |
509 | }␊ |
510 | }␊ |
511 | if (defined $command and length $command) {␊ |
512 | # verify the number of arguments␊ |
513 | my($check,$reason,$remainder) = check_arg_count($self,$command,\@args);␊ |
514 | if (not $check) {␊ |
515 | die wrap_ref_mod($self->{ref}, "po4a::tex",␊ |
516 | dgettext("po4a",␊ |
517 | "Error while checking the number of ".␊ |
518 | "arguments of the '%s' command: %s")."\n",␊ |
519 | $command, $reason);␊ |
520 | }␊ |
521 | ␊ |
522 | if (@$remainder) {␊ |
523 | # FIXME: we should also keep the spaces to be idempotent␊ |
524 | my ($temp,$type,$arg);␊ |
525 | while (@$remainder) {␊ |
526 | $type = shift @$remainder;␊ |
527 | $arg = shift @$remainder;␊ |
528 | $temp .= $type.$arg.$type_end{$type};␊ |
529 | # And remove the same number of arguments from @args␊ |
530 | pop @args;␊ |
531 | pop @args;␊ |
532 | }␊ |
533 | $buffer = $temp.$buffer;␊ |
534 | }␊ |
535 | }␊ |
536 | ␊ |
537 | print STDERR "($command,$variant,@args,$buffer)\n"␊ |
538 | if ($debug{'extract_commands'});␊ |
539 | return ($command,$variant,\@args,$buffer);␊ |
540 | }␊ |
541 | ␊ |
542 | =item B<get_trailing_command>($buffer)␊ |
543 | ␊ |
544 | The same as B<get_leading_command>, but for commands at the end of a buffer.␊ |
545 | ␊ |
546 | =cut␊ |
547 | ␊ |
548 | sub get_trailing_command {␊ |
549 | my ($self, $buffer) = (shift,shift);␊ |
550 | my $orig_buffer = $buffer;␊ |
551 | print STDERR "get_trailing_command($buffer)="␊ |
552 | if ($debug{'extract_commands'});␊ |
553 | ␊ |
554 | my @args;␊ |
555 | my $command = "";␊ |
556 | my $variant = "";␊ |
557 | ␊ |
558 | # While the buffer ends by }, consider it is a mandatory argument␊ |
559 | # and extract this argument.␊ |
560 | while ( $buffer =~ m/^(.*$RE_PRE_COMMENT(\{).*)$RE_PRE_COMMENT\}$/s␊ |
561 | or $buffer =~ m/^(.*$RE_PRE_COMMENT(\[).*)$RE_PRE_COMMENT\]$/s) {␊ |
562 | my $arg = "";␊ |
563 | my $count = 1;␊ |
564 | $buffer = $1;␊ |
565 | my $type = $2;␊ |
566 | # stop reading the buffer when the number of } (or ]) matches the␊ |
567 | # the number of { (or [).␊ |
568 | while ($count > 0) {␊ |
569 | if ($buffer =~ m/^(.*$RE_PRE_COMMENT)([\{\}\[\]])(.*)$/s) {␊ |
570 | $arg = $3.$arg;␊ |
571 | $buffer = $1;␊ |
572 | if ($2 eq $type) {␊ |
573 | $count--;␊ |
574 | } elsif ($2 eq $type_end{$type}) {␊ |
575 | $count++;␊ |
576 | }␊ |
577 | if ($count > 0) {␊ |
578 | $arg = $2.$arg;␊ |
579 | }␊ |
580 | } else {␊ |
581 | die wrap_ref_mod($self->{ref},␊ |
582 | "po4a::tex",␊ |
583 | dgettext("po4a", "un-balanced %s in '%s'"),␊ |
584 | $type_end{$type},␊ |
585 | $buffer);␊ |
586 | }␊ |
587 | }␊ |
588 | unshift @args, ($type,$arg);␊ |
589 | }␊ |
590 | ␊ |
591 | # There should now be a command, maybe followed by an asterisk.␊ |
592 | if ($buffer =~ m/^(.*$RE_PRE_COMMENT)$RE_ESCAPE([[:alnum:]]+)(\*?)\s*$/s␊ |
593 | && defined $separated_command{$2}) {␊ |
594 | $buffer = $1;␊ |
595 | $command = $2;␊ |
596 | $variant = $3;␊ |
597 | my($check,$reason,$remainder) = check_arg_count($self,$command,\@args);␊ |
598 | if (not $check) {␊ |
599 | die wrap_ref_mod($self->{ref}, "po4a::tex",␊ |
600 | dgettext("po4a",␊ |
601 | "Error while checking the number of ".␊ |
602 | "arguments of the '%s' command: %s")."\n",␊ |
603 | $command, $reason);␊ |
604 | }␊ |
605 | if (@$remainder) {␊ |
606 | # There are some arguments after the command.␊ |
607 | # We can't extract this comand.␊ |
608 | $command = "";␊ |
609 | }␊ |
610 | }␊ |
611 | ␊ |
612 | # sanitize return values if no command was found.␊ |
613 | if (!length($command)) {␊ |
614 | $command = "";␊ |
615 | $variant = "";␊ |
616 | undef @args;␊ |
617 | $buffer = $orig_buffer;␊ |
618 | }␊ |
619 | # verify the number of arguments␊ |
620 | ␊ |
621 | print STDERR "($command,$variant,@args,$buffer)\n"␊ |
622 | if ($debug{'extract_commands'});␊ |
623 | return ($command,$variant,\@args,$buffer);␊ |
624 | }␊ |
625 | ␊ |
626 | =item B<translate_buffer>␊ |
627 | ␊ |
628 | Recursively translate a buffer by separating leading and trailing␊ |
629 | commands (those which should be translated separately) from the␊ |
630 | buffer.␊ |
631 | ␊ |
632 | If a function is defined in %translate_buffer_env for the current␊ |
633 | environment, this function will be used to translate the buffer instead of␊ |
634 | translate_buffer().␊ |
635 | ␊ |
636 | =cut␊ |
637 | ␊ |
638 | our %translate_buffer_env = ();␊ |
639 | sub translate_buffer {␊ |
640 | my ($self,$buffer,$no_wrap,@env) = (shift,shift,shift,@_);␊ |
641 | ␊ |
642 | if (@env and defined $translate_buffer_env{$env[-1]}) {␊ |
643 | return &{$translate_buffer_env{$env[-1]}}($self,$buffer,$no_wrap,@env);␊ |
644 | }␊ |
645 | ␊ |
646 | print STDERR "translate_buffer($buffer,$no_wrap,@env)="␊ |
647 | if ($debug{'translate_buffer'});␊ |
648 | ␊ |
649 | my ($command,$variant) = ("","");␊ |
650 | my $args;␊ |
651 | my $translated_buffer = "";␊ |
652 | my $orig_buffer = $buffer;␊ |
653 | my $t = ""; # a temporary string␊ |
654 | ␊ |
655 | if ($buffer =~ /^\s*$/s) {␊ |
656 | print STDERR "($buffer,@env)\n"␊ |
657 | if ($debug{'translate_buffer'});␊ |
658 | return ($buffer, @env);␊ |
659 | }␊ |
660 | # verbatim blocks.␊ |
661 | # Buffers starting by \end{verbatim} are handled after.␊ |
662 | if (in_verbatim(@env) and $buffer !~ m/^\n?\Q$ESCAPE\Eend\{$env[-1]\*?\}/) {␊ |
663 | if($buffer =~ m/^(.*?)(\n?\Q$ESCAPE\Eend\{$env[-1]\*?\}.*)$/s) {␊ |
664 | # end of a verbatim block␊ |
665 | my ($begin, $end) = ($1?$1:"", $2);␊ |
666 | my ($t1, $t2) = ("", "");␊ |
667 | if (defined $begin) {␊ |
668 | $t1 = $self->translate($begin,$self->{ref},␊ |
669 | $env[-1],␊ |
670 | "wrap" => 0);␊ |
671 | }␊ |
672 | ($t2, @env) = translate_buffer($self, $end, $no_wrap, @env);␊ |
673 | print STDERR "($t1$t2,@env)\n"␊ |
674 | if ($debug{'translate_buffer'});␊ |
675 | return ($t1.$t2, @env);␊ |
676 | } else {␊ |
677 | $translated_buffer = $self->translate($buffer,$self->{ref},␊ |
678 | $env[-1],␊ |
679 | "wrap" => 0);␊ |
680 | print STDERR "($translated_buffer,@env)\n"␊ |
681 | if ($debug{'translate_buffer'});␊ |
682 | return ($translated_buffer, @env);␊ |
683 | }␊ |
684 | }␊ |
685 | # early detection of verbatim environment␊ |
686 | if ($buffer =~ /^($RE_VERBATIM\n?)(.*)$/s and length $2) {␊ |
687 | my ($begin, $end) = ($1, $2);␊ |
688 | my ($t1, $t2) = ("", "");␊ |
689 | ($t1, @env) = translate_buffer($self, $begin, $no_wrap, @env);␊ |
690 | ($t2, @env) = translate_buffer($self, $end, $no_wrap, @env);␊ |
691 | ␊ |
692 | print STDERR "($t1$t2,@env)\n"␊ |
693 | if ($debug{'translate_buffer'});␊ |
694 | return ($t1.$t2, @env);␊ |
695 | }␊ |
696 | # detect \begin and \end (if they are not commented)␊ |
697 | if ($buffer =~ /^((?:.*?\n)? # $1 is␊ |
698 | (?:[^%] # either not a %␊ |
699 | | # or␊ |
700 | (?<!\\)(?:\\\\)*\\%)*? # a % preceded by an odd nb of \␊ |
701 | ) # $2 is a \begin{ with the end of the line␊ |
702 | (${RE_ESCAPE}(?:begin|end)\{.*)$/sx␊ |
703 | and length $1) {␊ |
704 | my ($begin, $end) = ($1, $2);␊ |
705 | my ($t1, $t2) = ("", "");␊ |
706 | if (is_closed($begin)) {␊ |
707 | ($t1, @env) = translate_buffer($self, $begin, $no_wrap, @env);␊ |
708 | ($t2, @env) = translate_buffer($self, $end, $no_wrap, @env);␊ |
709 | ␊ |
710 | print STDERR "($t1$t2,@env)\n"␊ |
711 | if ($debug{'translate_buffer'});␊ |
712 | return ($t1.$t2, @env);␊ |
713 | }␊ |
714 | }␊ |
715 | ␊ |
716 | # remove comments from the buffer.␊ |
717 | # Comments are stored in an array and shown as comments in the PO.␊ |
718 | while ($buffer =~ m/($RE_PRE_COMMENT)$RE_COMMENT([^\n]*)(\n[ \t]*)(.*)$/s) {␊ |
719 | my $comment = $2;␊ |
720 | my $end = "";␊ |
721 | if ($4 =~ m/^\n/s and $buffer !~ m/^$RE_COMMENT/s) {␊ |
722 | # a line with comments, followed by an empty line.␊ |
723 | # Keep the empty line, but remove the comment.␊ |
724 | # This is an empirical heuristic, but seems to work;)␊ |
725 | $end = "\n";␊ |
726 | }␊ |
727 | if (defined $comment and $comment !~ /^\s*$/s) {␊ |
728 | push @comments, $comment;␊ |
729 | }␊ |
730 | $buffer =~ s/($RE_PRE_COMMENT)$RE_COMMENT([^\n]*)(\n[ \t]*)/$1$end/s;␊ |
731 | }␊ |
732 | ␊ |
733 | # translate leading commands.␊ |
734 | do {␊ |
735 | # keep the leading space to put them back after the translation of␊ |
736 | # the command.␊ |
737 | my $spaces = "";␊ |
738 | if ($buffer =~ /^(\s+)(.*?)$/s) {␊ |
739 | $spaces = $1;␊ |
740 | # $buffer = $2; # FIXME: this also remove trailing spaces!!␊ |
741 | $buffer =~ s/^\s*//s;␊ |
742 | }␊ |
743 | my $buffer_save = $buffer;␊ |
744 | ($command, $variant, $args, $buffer) =␊ |
745 | get_leading_command($self,$buffer);␊ |
746 | if ( (length $command)␊ |
747 | and (defined $separated_command{$command})␊ |
748 | and ($separated_command{$command} eq '-')␊ |
749 | and ( (not (defined($buffer)))␊ |
750 | or ($buffer !~ m/^\s*$/s) )) {␊ |
751 | # This command can be separated only if alone on a buffer.␊ |
752 | # We need to remove the trailing commands first, and see if it␊ |
753 | # will be alone on this buffer.␊ |
754 | $buffer = $buffer_save;␊ |
755 | $command = "";␊ |
756 | }␊ |
757 | if (length($command)) {␊ |
758 | # call the command subroutine.␊ |
759 | # These command subroutines will probably call translate_buffer␊ |
760 | # with the content of each argument that need a translation.␊ |
761 | if (defined ($commands{$command})) {␊ |
762 | ($t,@env) = &{$commands{$command}}($self,$command,$variant,␊ |
763 | $args,\@env,$no_wrap);␊ |
764 | $translated_buffer .= $spaces.$t;␊ |
765 | # Handle spaces after a command.␊ |
766 | $spaces = "";␊ |
767 | if ($buffer =~ /^(\s+)(.*?)$/s) {␊ |
768 | $spaces = $1;␊ |
769 | # $buffer = $2; # FIXME: this also remove trailing spaces!!␊ |
770 | $buffer =~ s/^\s*//s;␊ |
771 | }␊ |
772 | $translated_buffer .= $spaces;␊ |
773 | } else {␊ |
774 | die wrap_ref_mod($self->{ref},␊ |
775 | "po4a::tex",␊ |
776 | dgettext("po4a", "Unknown command: '%s'"),␊ |
777 | $command);␊ |
778 | }␊ |
779 | } else {␊ |
780 | $buffer = $spaces.$buffer;␊ |
781 | }␊ |
782 | } while (length($command));␊ |
783 | ␊ |
784 | # array of trailing commands, which will be translated later.␊ |
785 | my @trailing_commands = ();␊ |
786 | do {␊ |
787 | my $spaces = "";␊ |
788 | if ($buffer =~ /^(.*?)(\s+)$/s) {␊ |
789 | $buffer = $1;␊ |
790 | $spaces = $2;␊ |
791 | }␊ |
792 | my $buffer_save = $buffer;␊ |
793 | ($command, $variant, $args, $buffer) =␊ |
794 | get_trailing_command($self,$buffer);␊ |
795 | if ( (length $command)␊ |
796 | and (defined $separated_command{$command})␊ |
797 | and ($separated_command{$command} eq '-')␊ |
798 | and ( (not defined $buffer)␊ |
799 | or ($buffer !~ m/^\s*$/s))) {␊ |
800 | # We can extract this command.␊ |
801 | $command = "";␊ |
802 | $buffer = $buffer_save;␊ |
803 | }␊ |
804 | if (length($command)) {␊ |
805 | unshift @trailing_commands, ($command, $variant, $args, $spaces);␊ |
806 | } else {␊ |
807 | $buffer .= $spaces;␊ |
808 | }␊ |
809 | } while (length($command));␊ |
810 | ␊ |
811 | # Now, $buffer is just a block that can be translated.␊ |
812 | ␊ |
813 | # environment specific treatment␊ |
814 | if (@env and defined $env_separators{$env[-1]}) {␊ |
815 | my $re_separator = $env_separators{$env[-1]};␊ |
816 | my $buf_begin = "";␊ |
817 | # FIXME: the separator may have to be translated.␊ |
818 | while ($buffer =~ m/^(.*?)(\s*$re_separator\s*)(.*)$/s) {␊ |
819 | my ($begin, $sep, $end) = ($1, $2, $3);␊ |
820 | $buf_begin .= $begin;␊ |
821 | if (is_closed($buf_begin)) {␊ |
822 | my $t = "";␊ |
823 | ($t, @env) = translate_buffer($self,$buf_begin,$no_wrap,@env);␊ |
824 | $translated_buffer .= $t.$sep;␊ |
825 | $buf_begin = "";␊ |
826 | } else {␊ |
827 | # the command is in a command argument␊ |
828 | $buf_begin .= $sep;␊ |
829 | }␊ |
830 | $buffer = $end;␊ |
831 | }␊ |
832 | $buffer = $buf_begin . $buffer;␊ |
833 | }␊ |
834 | ␊ |
835 | # finally, translate␊ |
836 | if (length($buffer)) {␊ |
837 | my $wrap = 1;␊ |
838 | my ($e1, $e2);␊ |
839 | NO_WRAP_LOOP: foreach $e1 (@env) {␊ |
840 | foreach $e2 (split(' ', $no_wrap_environments)) {␊ |
841 | if ($e1 eq $e2) {␊ |
842 | $wrap = 0;␊ |
843 | last NO_WRAP_LOOP;␊ |
844 | }␊ |
845 | }␊ |
846 | }␊ |
847 | $wrap = 0 if (defined $no_wrap and $no_wrap == 1);␊ |
848 | # Keep spaces at the end of the buffer.␊ |
849 | my $spaces_end = "";␊ |
850 | if ($buffer =~ /^(.*?)(\s+)$/s) {␊ |
851 | $spaces_end = $2;␊ |
852 | $buffer = $1;␊ |
853 | }␊ |
854 | if ($wrap and $buffer =~ s/^(\s+)//s) {␊ |
855 | $translated_buffer .= $1;␊ |
856 | }␊ |
857 | $translated_buffer .= $self->translate($buffer,$self->{ref},␊ |
858 | @env?$env[-1]:"Plain text",␊ |
859 | "wrap" => $wrap);␊ |
860 | # Restore spaces at the end of the buffer.␊ |
861 | $translated_buffer .= $spaces_end;␊ |
862 | }␊ |
863 | ␊ |
864 | # append the translation of the trailing commands␊ |
865 | while (@trailing_commands) {␊ |
866 | my $command = shift @trailing_commands;␊ |
867 | my $variant = shift @trailing_commands;␊ |
868 | my $args = shift @trailing_commands;␊ |
869 | my $spaces = shift @trailing_commands;␊ |
870 | if (defined ($commands{$command})) {␊ |
871 | ($t,@env) = &{$commands{$command}}($self,$command,$variant,␊ |
872 | $args,\@env,$no_wrap);␊ |
873 | $translated_buffer .= $t.$spaces;␊ |
874 | } else {␊ |
875 | die wrap_ref_mod($self->{ref},␊ |
876 | "po4a::tex",␊ |
877 | dgettext("po4a", "Unknown command: '%s'"),␊ |
878 | $command);␊ |
879 | }␊ |
880 | }␊ |
881 | ␊ |
882 | print STDERR "($translated_buffer,@env)\n"␊ |
883 | if ($debug{'translate_buffer'});␊ |
884 | return ($translated_buffer,@env);␊ |
885 | }␊ |
886 | ␊ |
887 | ################################␊ |
888 | #### EXTERNAL CUSTOMIZATION ####␊ |
889 | ################################␊ |
890 | ␊ |
891 | =item B<read>␊ |
892 | ␊ |
893 | Overload Transtractor's read␊ |
894 | ␊ |
895 | =cut␊ |
896 | ␊ |
897 | sub read {␊ |
898 | my $self=shift;␊ |
899 | my $filename=shift;␊ |
900 | ␊ |
901 | # keep the directory name of the main file.␊ |
902 | $my_dirname = dirname($filename);␊ |
903 | ␊ |
904 | push @{$self->{TT}{doc_in}}, read_file($self, $filename);␊ |
905 | }␊ |
906 | ␊ |
907 | =item B<read_file>␊ |
908 | ␊ |
909 | Recursively read a file, appending included files which are not listed in the␊ |
910 | @exclude_include array. Included files are searched using the B<kpsewhich>␊ |
911 | command from the Kpathsea library.␊ |
912 | ␊ |
913 | Except from the file inclusion part, it is a cut and paste from␊ |
914 | Transtractor's read.␊ |
915 | ␊ |
916 | =cut␊ |
917 | ␊ |
918 | # TODO: fix DOS end of lines␊ |
919 | sub read_file {␊ |
920 | my $self=shift;␊ |
921 | my $filename=shift␊ |
922 | or croak wrap_mod("po4a::tex",␊ |
923 | dgettext("po4a", "Can't read from file without having a filename"));␊ |
924 | my $linenum=0;␊ |
925 | my @entries=();␊ |
926 | ␊ |
927 | open (my $in, $filename)␊ |
928 | or croak wrap_mod("po4a::tex",␊ |
929 | dgettext("po4a", "Can't read from %s: %s"), $filename, $!);␊ |
930 | while (defined (my $textline = <$in>)) {␊ |
931 | $linenum++;␊ |
932 | my $ref="$filename:$linenum";␊ |
933 | # TODO: add support for includeonly␊ |
934 | # The next regular expression matches \input or \includes that are␊ |
935 | # not commented (but can be preceded by a \%.␊ |
936 | while ($textline =~ /^((?:[^%]|(?<!\\)(?:\\\\)*\\%)*)␊ |
937 | \\(include|input)␊ |
938 | \{([^\{]*)\}(.*)$/x) {␊ |
939 | my ($begin,$newfilename,$end) = ($1,$3,$4);␊ |
940 | my $tag = $2;␊ |
941 | my $include = 1;␊ |
942 | foreach my $f (@exclude_include) {␊ |
943 | if ($f eq $newfilename) {␊ |
944 | $include = 0;␊ |
945 | $begin .= "\\$tag"."{$newfilename}";␊ |
946 | $textline = $end;␊ |
947 | last;␊ |
948 | }␊ |
949 | }␊ |
950 | if ($include and ($tag eq "include")) {␊ |
951 | $begin .= "\\clearpage";␊ |
952 | }␊ |
953 | if ($begin !~ /^\s*$/) {␊ |
954 | push @entries, ($begin,$ref);␊ |
955 | }␊ |
956 | if ($include) {␊ |
957 | # search the file␊ |
958 | open (KPSEA, "kpsewhich " . $newfilename . " |");␊ |
959 | my $newfilepath = <KPSEA>;␊ |
960 | ␊ |
961 | if ($newfilename ne "" and $newfilepath eq "") {␊ |
962 | die wrap_mod("po4a::tex",␊ |
963 | dgettext("po4a",␊ |
964 | "Can't find %s with kpsewhich"),␊ |
965 | $filename);␊ |
966 | }␊ |
967 | ␊ |
968 | push @entries, read_file($self,␊ |
969 | $newfilepath);␊ |
970 | if ($tag eq "include") {␊ |
971 | $textline = "\\clearpage".$end;␊ |
972 | } else {␊ |
973 | $textline = $end;␊ |
974 | }␊ |
975 | }␊ |
976 | }␊ |
977 | if (length($textline)) {␊ |
978 | my @entry=($textline,$ref);␊ |
979 | push @entries, @entry;␊ |
980 | ␊ |
981 | # Detect if this file has non-ascii characters␊ |
982 | if($self->{TT}{ascii_input}) {␊ |
983 | ␊ |
984 | my $decoder = guess_encoding($textline);␊ |
985 | if (!ref($decoder) or $decoder !~ /Encode::XS=/) {␊ |
986 | # We have detected a non-ascii line␊ |
987 | $self->{TT}{ascii_input} = 0;␊ |
988 | # Save the reference for future error message␊ |
989 | $self->{TT}{non_ascii_ref} ||= $ref;␊ |
990 | }␊ |
991 | }␊ |
992 | }␊ |
993 | }␊ |
994 | close $in␊ |
995 | or croak wrap_mod("po4a::tex",␊ |
996 | dgettext("po4a", "Can't close %s after reading: %s"), $filename, $!);␊ |
997 | ␊ |
998 | return @entries;␊ |
999 | }␊ |
1000 | ␊ |
1001 | =back␊ |
1002 | ␊ |
1003 | ␊ |
1004 | =over 4␊ |
1005 | ␊ |
1006 | =item B<parse_definition_file>␊ |
1007 | ␊ |
1008 | Subroutine for parsing a file with po4a directives (definitions for␊ |
1009 | new commands).␊ |
1010 | ␊ |
1011 | =cut␊ |
1012 | ␊ |
1013 | sub parse_definition_file {␊ |
1014 | my ($self,$filename,$only_try)=@_;␊ |
1015 | my $filename_org = $filename;␊ |
1016 | ␊ |
1017 | open (KPSEA, "kpsewhich " . $filename . " |");␊ |
1018 | $filename = <KPSEA>;␊ |
1019 | ␊ |
1020 | if (not defined $filename) {␊ |
1021 | warn wrap_mod("po4a::tex",␊ |
1022 | dgettext("po4a", "kpsewhich cannot find %s"), $filename_org);␊ |
1023 | if (defined $only_try && $only_try) {␊ |
1024 | return;␊ |
1025 | } else {␊ |
1026 | exit 1;␊ |
1027 | }␊ |
1028 | }␊ |
1029 | ␊ |
1030 | if (! open (IN,"<$filename")) {␊ |
1031 | warn wrap_mod("po4a::tex",␊ |
1032 | dgettext("po4a", "Can't open %s: %s"), $filename, $!);␊ |
1033 | if (defined $only_try && $only_try) {␊ |
1034 | return;␊ |
1035 | } else {␊ |
1036 | exit 1;␊ |
1037 | }␊ |
1038 | }␊ |
1039 | while (<IN>) {␊ |
1040 | if (/^\s*%\s*po4a\s*:/) {␊ |
1041 | parse_definition_line($self, $_);␊ |
1042 | }␊ |
1043 | }␊ |
1044 | }␊ |
1045 | ␊ |
1046 | =item B<parse_definition_line>␊ |
1047 | ␊ |
1048 | Parse a definition line of the form "% po4a: ".␊ |
1049 | ␊ |
1050 | See the B<INLINE CUSTOMIZATION> section for more details.␊ |
1051 | ␊ |
1052 | =cut␊ |
1053 | ␊ |
1054 | sub parse_definition_line {␊ |
1055 | my ($self,$line)=@_;␊ |
1056 | $line =~ s/^\s*%\s*po4a\s*:\s*//;␊ |
1057 | ␊ |
1058 | if ($line =~ /^command\s+([-*+]?)(\w+)\s+(.*)$/) {␊ |
1059 | my $command = $2;␊ |
1060 | $line = $3;␊ |
1061 | if ($1) {␊ |
1062 | $separated_command{$command} = $1;␊ |
1063 | }␊ |
1064 | if ($line =~ /^alias\s+(\w+)\s*$/) {␊ |
1065 | if (defined ($commands{$1})) {␊ |
1066 | $commands{$command} = $commands{$1};␊ |
1067 | $command_parameters{$command} = $command_parameters{$1};␊ |
1068 | } else {␊ |
1069 | die wrap_mod("po4a::tex",␊ |
1070 | dgettext("po4a", "Cannot use an alias to the unknown command '%s'"),␊ |
1071 | $2);␊ |
1072 | }␊ |
1073 | } elsif ($line =~ /^(-1|\d+),(-1|\d+),(-1|[ 0-9]*),(-1|[ 0-9]*?)\s*$/) {␊ |
1074 | die wrap_ref_mod($self->{ref},␊ |
1075 | "po4a::tex",␊ |
1076 | dgettext("po4a", "You are using the old ".␊ |
1077 | "definitions format (%s). ".␊ |
1078 | "Please update this definition line."),␊ |
1079 | $_[1])␊ |
1080 | } elsif ($line =~ m/^((?:\{_?\}|\[_?\])*)\s*$/) {␊ |
1081 | register_generic_command("$command,$1");␊ |
1082 | }␊ |
1083 | } elsif ($line =~ /^environment\s+([+]?\w+\*?)(.*)$/) {␊ |
1084 | my $env = $1;␊ |
1085 | $line = $2;␊ |
1086 | if ($line =~ m/^\s*((?:\{_?\}|\[_?\])*)\s*$/) {␊ |
1087 | register_generic_environment("$env,$1");␊ |
1088 | }␊ |
1089 | } elsif ($line =~ /^separator\s+(\w+(?:\[#[0-9]+\])?)\s+\"(.*)\"\s*$/) {␊ |
1090 | my $env = $1; # This is not necessarily an environment.␊ |
1091 | # It can also be smth like 'title[#1]'.␊ |
1092 | $env_separators{$env} = $2;␊ |
1093 | } elsif ($line =~ /^verbatim\s+environment\s+(\w+)\s+$/) {␊ |
1094 | register_verbatim_environment($1);␊ |
1095 | }␊ |
1096 | }␊ |
1097 | ␊ |
1098 | =item B<is_closed>␊ |
1099 | ␊ |
1100 | =cut␊ |
1101 | ␊ |
1102 | sub is_closed {␊ |
1103 | my $paragraph = shift;␊ |
1104 | # FIXME: [ and ] are more difficult to handle, because it is not easy to detect if it introduce an optional argument␊ |
1105 | my $tmp = $paragraph;␊ |
1106 | my $closing = 0;␊ |
1107 | my $opening = 0;␊ |
1108 | # FIXME: { and } should not be counted in verbatim blocks␊ |
1109 | # Remove comments␊ |
1110 | $tmp =~ s/$RE_PRE_COMMENT$RE_COMMENT.*//mg;␊ |
1111 | while ($tmp =~ /^.*?$RE_PRE_COMMENT\{(.*)$/s) {␊ |
1112 | $opening += 1;␊ |
1113 | $tmp = $1;␊ |
1114 | }␊ |
1115 | $tmp = $paragraph;␊ |
1116 | # Remove comments␊ |
1117 | $tmp =~ s/$RE_PRE_COMMENT$RE_COMMENT.*//mg;␊ |
1118 | while ($tmp =~ /^.*?$RE_PRE_COMMENT\}(.*)$/s) {␊ |
1119 | $closing += 1;␊ |
1120 | $tmp = $1;␊ |
1121 | }␊ |
1122 | return $opening eq $closing;␊ |
1123 | }␊ |
1124 | ␊ |
1125 | sub in_verbatim {␊ |
1126 | foreach my $e1 (@_) {␊ |
1127 | foreach my $e2 (split(' ', $verbatim_environments)) {␊ |
1128 | if ($e1 eq $e2) {␊ |
1129 | return 1;␊ |
1130 | }␊ |
1131 | }␊ |
1132 | }␊ |
1133 | ␊ |
1134 | return 0;␊ |
1135 | }␊ |
1136 | ␊ |
1137 | #############################␊ |
1138 | #### MAIN PARSE FUNCTION ####␊ |
1139 | #############################␊ |
1140 | =item B<parse>␊ |
1141 | ␊ |
1142 | =cut␊ |
1143 | ␊ |
1144 | sub parse {␊ |
1145 | my $self = shift;␊ |
1146 | my ($line,$ref);␊ |
1147 | my $paragraph = ""; # Buffer where we put the paragraph while building␊ |
1148 | my @env = (); # environment stack␊ |
1149 | my $t = "";␊ |
1150 | ␊ |
1151 | LINE:␊ |
1152 | undef $self->{type};␊ |
1153 | ($line,$ref)=$self->shiftline();␊ |
1154 | ␊ |
1155 | while (defined($line)) {␊ |
1156 | chomp($line);␊ |
1157 | $self->{ref}="$ref";␊ |
1158 | ␊ |
1159 | if ($line =~ /^\s*%\s*po4a\s*:/) {␊ |
1160 | parse_definition_line($self, $line);␊ |
1161 | goto LINE;␊ |
1162 | }␊ |
1163 | ␊ |
1164 | my $closed = is_closed($paragraph);␊ |
1165 | ␊ |
1166 | #FIXME: what happens if a \begin{verbatim} or \end{verbatim} is in the␊ |
1167 | # middle of a line. (This is only an issue if the verbatim␊ |
1168 | # environment contains an un-closed bracket)␊ |
1169 | if ( ($closed and ($line =~ /^\s*$/ or␊ |
1170 | $line =~ /^\s*$RE_VERBATIM\s*$/))␊ |
1171 | or (in_verbatim(@env) and $line =~ /^\s*\Q$ESCAPE\Eend{$env[-1]}\s*$/)␊ |
1172 | ) {␊ |
1173 | # An empty line. This indicates the end of the current␊ |
1174 | # paragraph.␊ |
1175 | $paragraph .= $line."\n";␊ |
1176 | if (length($paragraph)) {␊ |
1177 | ($t, @env) = translate_buffer($self,$paragraph,undef,@env);␊ |
1178 | $self->pushline($t);␊ |
1179 | $paragraph="";␊ |
1180 | @comments = ();␊ |
1181 | }␊ |
1182 | } else {␊ |
1183 | # continue the same paragraph␊ |
1184 | $paragraph .= $line."\n";␊ |
1185 | }␊ |
1186 | ␊ |
1187 | # Reinit the loop␊ |
1188 | ($line,$ref)=$self->shiftline();␊ |
1189 | undef $self->{type};␊ |
1190 | }␊ |
1191 | ␊ |
1192 | if (length($paragraph)) {␊ |
1193 | ($t, @env) = translate_buffer($self,$paragraph,undef,@env);␊ |
1194 | $self->pushline($t);␊ |
1195 | $paragraph="";␊ |
1196 | }␊ |
1197 | } # end of parse␊ |
1198 | ␊ |
1199 | =item B<docheader>␊ |
1200 | ␊ |
1201 | =back␊ |
1202 | ␊ |
1203 | =cut␊ |
1204 | ␊ |
1205 | sub docheader {␊ |
1206 | return "% This file was generated with po4a. Translate the source file.\n".␊ |
1207 | "%\n";␊ |
1208 | }␊ |
1209 | ␊ |
1210 | ␊ |
1211 | ####################################␊ |
1212 | #### DEFINITION OF THE COMMANDS ####␊ |
1213 | ####################################␊ |
1214 | ␊ |
1215 | =head1 INTERNAL FUNCTIONS used to write derivated parsers␊ |
1216 | ␊ |
1217 | Command and environment functions take the following arguments (in␊ |
1218 | addition to the $self object):␊ |
1219 | ␊ |
1220 | =over␊ |
1221 | ␊ |
1222 | =item A command name␊ |
1223 | ␊ |
1224 | =item A variant␊ |
1225 | ␊ |
1226 | =item An array of (type, argument) tuples␊ |
1227 | ␊ |
1228 | =item The current environment␊ |
1229 | ␊ |
1230 | =back␊ |
1231 | ␊ |
1232 | The first 3 arguments are extracted by get_leading_command or␊ |
1233 | get_trailing_command.␊ |
1234 | ␊ |
1235 | Command and environment functions return the translation of the command␊ |
1236 | with its arguments and a new environment.␊ |
1237 | ␊ |
1238 | Environment functions are called when a \begin command is found. They are␊ |
1239 | called with the \begin command and its arguments.␊ |
1240 | ␊ |
1241 | The TeX module only proposes one command function and one environment␊ |
1242 | function: generic_command and generic_environment.␊ |
1243 | ␊ |
1244 | generic_command uses the information specified by␊ |
1245 | register_generic_command or by adding definition to the TeX file:␊ |
1246 | % po4a: command I<command1> I<parameters>␊ |
1247 | ␊ |
1248 | generic_environment uses the information specified by␊ |
1249 | register_generic_environment or by adding definition to the TeX file:␊ |
1250 | % po4a: environment I<env> I<parameters>␊ |
1251 | ␊ |
1252 | Both functions will only translate the parameters that were specified as␊ |
1253 | translatable (with a '_').␊ |
1254 | generic_environment will append the name of the environment to the␊ |
1255 | environment stack and generic_command will append the name of the command␊ |
1256 | followed by an identifier of the parameter (like {#7} or [#2]).␊ |
1257 | ␊ |
1258 | =cut␊ |
1259 | ␊ |
1260 | # definition of environment related commands␊ |
1261 | ␊ |
1262 | $commands{'begin'}= sub {␊ |
1263 | my $self = shift;␊ |
1264 | my ($command,$variant,$args,$env) = (shift,shift,shift,shift);␊ |
1265 | my $no_wrap = shift;␊ |
1266 | print "begin($command,$variant,@$args,@$env,$no_wrap)="␊ |
1267 | if ($debug{'commands'} || $debug{'environments'});␊ |
1268 | my ($t,@e) = ("",());␊ |
1269 | ␊ |
1270 | my $envir = $args->[1];␊ |
1271 | if (defined($envir) and $envir =~ /^(.*)\*$/) {␊ |
1272 | $envir = $1;␊ |
1273 | }␊ |
1274 | ␊ |
1275 | if (defined($envir) && defined($environments{$envir})) {␊ |
1276 | ($t, @e) = &{$environments{$envir}}($self,$command,$variant,␊ |
1277 | $args,$env,$no_wrap);␊ |
1278 | } else {␊ |
1279 | die wrap_ref_mod($self->{ref}, "po4a::tex",␊ |
1280 | dgettext("po4a", "unknown environment: '%s'"),␊ |
1281 | $args->[1]);␊ |
1282 | }␊ |
1283 | ␊ |
1284 | print "($t, @e)\n"␊ |
1285 | if ($debug{'commands'} || $debug{'environments'});␊ |
1286 | return ($t, @e);␊ |
1287 | };␊ |
1288 | # Use register_generic to set the type of arguments. The function is then␊ |
1289 | # overwritten:␊ |
1290 | register_generic_command("*end,{}");␊ |
1291 | $commands{'end'}= sub {␊ |
1292 | my $self = shift;␊ |
1293 | my ($command,$variant,$args,$env) = (shift,shift,shift,shift);␊ |
1294 | my $no_wrap = shift;␊ |
1295 | print "end($command,$variant,@$args,@$env,$no_wrap)="␊ |
1296 | if ($debug{'commands'} || $debug{'environments'});␊ |
1297 | ␊ |
1298 | # verify that this environment was the last pushed environment.␊ |
1299 | if (!@$env || @$env[-1] ne $args->[1]) {␊ |
1300 | # a begin may have been hidden in the middle of a translated␊ |
1301 | # buffer. FIXME: Just warn for now.␊ |
1302 | warn wrap_ref_mod($self->{'ref'}, "po4a::tex",␊ |
1303 | dgettext("po4a", "unmatched end of environment '%s'"),␊ |
1304 | $args->[1]);␊ |
1305 | } else {␊ |
1306 | pop @$env;␊ |
1307 | }␊ |
1308 | ␊ |
1309 | my ($t,@e) = generic_command($self,$command,$variant,$args,$env,$no_wrap);␊ |
1310 | ␊ |
1311 | print "($t, @$env)\n"␊ |
1312 | if ($debug{'commands'} || $debug{'environments'});␊ |
1313 | return ($t, @$env);␊ |
1314 | };␊ |
1315 | $separated_command{'begin'} = '*';␊ |
1316 | ␊ |
1317 | sub generic_command {␊ |
1318 | my $self = shift;␊ |
1319 | my ($command,$variant,$args,$env) = (shift,shift,shift,shift);␊ |
1320 | my $no_wrap = shift;␊ |
1321 | print "generic_command($command,$variant,@$args,@$env,$no_wrap)="␊ |
1322 | if ($debug{'commands'} || $debug{'environments'});␊ |
1323 | ␊ |
1324 | my ($t,@e)=("",());␊ |
1325 | my $translated = "";␊ |
1326 | ␊ |
1327 | # the number of arguments is checked during the extraction of the␊ |
1328 | # arguments␊ |
1329 | ␊ |
1330 | if ( (not (defined $separated_command{$command}))␊ |
1331 | or $separated_command{$command} ne '+') {␊ |
1332 | # Use the information from %command_parameters to only translate␊ |
1333 | # the needed parameters␊ |
1334 | $translated = "$ESCAPE$command$variant";␊ |
1335 | # handle arguments␊ |
1336 | my @arg_types = @{$command_parameters{$command}{'types'}};␊ |
1337 | my @arg_translated = @{$command_parameters{$command}{'translated'}};␊ |
1338 | my ($type, $opt);␊ |
1339 | my @targs = @$args;␊ |
1340 | my $count = 0;␊ |
1341 | while (@targs) {␊ |
1342 | $type = shift @targs;␊ |
1343 | $opt = shift @targs;␊ |
1344 | my $have_to_be_translated = 0;␊ |
1345 | TEST_TYPE:␊ |
1346 | if ($count >= scalar @arg_types) {␊ |
1347 | # The number of arguments does not match,␊ |
1348 | # and a variable number of arguments was not specified␊ |
1349 | die wrap_ref_mod($self->{ref}, "po4a::tex",␊ |
1350 | dgettext("po4a",␊ |
1351 | "Wrong number of arguments for ".␊ |
1352 | "the '%s' command.")."\n",␊ |
1353 | $command);␊ |
1354 | } elsif ($type eq $arg_types[$count]) {␊ |
1355 | $have_to_be_translated = $arg_translated[$count];␊ |
1356 | $count ++;␊ |
1357 | } elsif ($type eq '{' and $arg_types[$count] eq '[') {␊ |
1358 | # an optionnal argument was not provided,␊ |
1359 | # try with the next argument.␊ |
1360 | $count++;␊ |
1361 | goto TEST_TYPE;␊ |
1362 | } else {␊ |
1363 | my $reason = dgettext("po4a",␊ |
1364 | "An optional argument ".␊ |
1365 | "was provided, but a mandatory one ".␊ |
1366 | "is expected.");␊ |
1367 | die wrap_ref_mod($self->{ref}, "po4a::tex",␊ |
1368 | dgettext("po4a", "Command '%s': %s")."\n",␊ |
1369 | $command, $reason);␊ |
1370 | }␊ |
1371 | if ($have_to_be_translated) {␊ |
1372 | ($t, @e) = translate_buffer($self,$opt,$no_wrap,(@$env,$command.$type."#".$count.$type_end{$type}));␊ |
1373 | } else {␊ |
1374 | $t = $opt;␊ |
1375 | }␊ |
1376 | $translated .= $type.$t.$type_end{$type};␊ |
1377 | }␊ |
1378 | } else {␊ |
1379 | # Translate the command with all its arguments joined␊ |
1380 | my $tmp = "$ESCAPE$command$variant";␊ |
1381 | my ($type, $opt);␊ |
1382 | while (@$args) {␊ |
1383 | $type = shift @$args;␊ |
1384 | $opt = shift @$args;␊ |
1385 | $tmp .= $type.$opt.$type_end{$type};␊ |
1386 | }␊ |
1387 | @e = @$env;␊ |
1388 | my $wrap = 1;␊ |
1389 | $wrap = 0 if (defined $no_wrap and $no_wrap == 1);␊ |
1390 | $translated = $self->translate($tmp,$self->{ref},␊ |
1391 | @e?$e[-1]:"Plain text",␊ |
1392 | "wrap" => $wrap);␊ |
1393 | }␊ |
1394 | ␊ |
1395 | print "($translated, @$env)\n"␊ |
1396 | if ($debug{'commands'} || $debug{'environments'});␊ |
1397 | return ($translated, @$env);␊ |
1398 | }␊ |
1399 | ␊ |
1400 | sub register_generic_command {␊ |
1401 | if ($_[0] =~ m/^(.*),((\{_?\}|\[_?\]| _? )*)$/) {␊ |
1402 | my $command = $1;␊ |
1403 | my $arg_types = $2;␊ |
1404 | if ($command =~ /^([-*+])(.*)$/) {␊ |
1405 | $command = $2;␊ |
1406 | $separated_command{$command}=$1;␊ |
1407 | }␊ |
1408 | my @types = ();␊ |
1409 | my @translated = ();␊ |
1410 | while ( defined $arg_types␊ |
1411 | and length $arg_types␊ |
1412 | and $arg_types =~ m/^(?:([\{\[ ])(_?)[\}\] ])(.*)$/) {␊ |
1413 | push @types, $1;␊ |
1414 | push @translated, ($2 eq "_")?1:0;␊ |
1415 | $arg_types = $3;␊ |
1416 | }␊ |
1417 | $command_parameters{$command}{'types'} = \@types;␊ |
1418 | $command_parameters{$command}{'translated'} = \@translated;␊ |
1419 | $command_parameters{$command}{'nb_args'} = "";␊ |
1420 | $commands{$command} = \&generic_command;␊ |
1421 | } else {␊ |
1422 | die wrap_mod("po4a::tex",␊ |
1423 | dgettext("po4a",␊ |
1424 | "register_generic_command: unsupported ".␊ |
1425 | "format: '%s'.")."\n",␊ |
1426 | $_[0]);␊ |
1427 | }␊ |
1428 | }␊ |
1429 | ␊ |
1430 | ########################################␊ |
1431 | #### DEFINITION OF THE ENVIRONMENTS ####␊ |
1432 | ########################################␊ |
1433 | sub generic_environment {␊ |
1434 | my $self = shift;␊ |
1435 | my ($command,$variant,$args,$env) = (shift,shift,shift,shift);␊ |
1436 | my $no_wrap = shift;␊ |
1437 | print "generic_environment($command,$variant,$args,$env,$no_wrap)="␊ |
1438 | if ($debug{'environments'});␊ |
1439 | my ($t,@e)=("",());␊ |
1440 | my $translated = "";␊ |
1441 | ␊ |
1442 | # The first argument (the name of the environment is never translated)␊ |
1443 | # For the others, @types and @translated are used.␊ |
1444 | $translated = "$ESCAPE$command$variant";␊ |
1445 | my @targs = @$args;␊ |
1446 | my $type = shift @targs;␊ |
1447 | my $opt = shift @targs;␊ |
1448 | my $new_env = $opt;␊ |
1449 | $translated .= $type.$new_env.$type_end{$type};␊ |
1450 | if ( (not (defined $separated_environment{$new_env}))␊ |
1451 | or $separated_environment{$new_env} ne '+') {␊ |
1452 | # Use the information from %command_parameters to only translate␊ |
1453 | # the needed parameters␊ |
1454 | my @arg_types = @{$environment_parameters{$new_env}{'types'}};␊ |
1455 | my @arg_translated = @{$environment_parameters{$new_env}{'translated'}};␊ |
1456 | ␊ |
1457 | my $count = 0;␊ |
1458 | while (@targs) {␊ |
1459 | $type = shift @targs;␊ |
1460 | $opt = shift @targs;␊ |
1461 | my $have_to_be_translated = 0;␊ |
1462 | TEST_TYPE:␊ |
1463 | if ($count >= scalar @arg_types) {␊ |
1464 | die wrap_ref_mod($self->{ref}, "po4a::tex",␊ |
1465 | dgettext("po4a",␊ |
1466 | "Wrong number of arguments for ".␊ |
1467 | "the '%s' command.")."\n",␊ |
1468 | $command);␊ |
1469 | } elsif ($type eq $arg_types[$count]) {␊ |
1470 | $have_to_be_translated = $arg_translated[$count];␊ |
1471 | $count ++;␊ |
1472 | } elsif ($type eq '{' and $arg_types[$count] eq '[') {␊ |
1473 | # an optionnal argument was not provided,␊ |
1474 | # try with the next argument.␊ |
1475 | $count++;␊ |
1476 | goto TEST_TYPE;␊ |
1477 | } else {␊ |
1478 | my $reason = dgettext("po4a",␊ |
1479 | "An optional argument ".␊ |
1480 | "was provided, but a mandatory one ".␊ |
1481 | "is expected.");␊ |
1482 | die wrap_ref_mod($self->{ref}, "po4a::tex",␊ |
1483 | dgettext("po4a", "Command '%s': %s")."\n",␊ |
1484 | $command, $reason);␊ |
1485 | }␊ |
1486 | ␊ |
1487 | if ($have_to_be_translated) {␊ |
1488 | ($t, @e) = translate_buffer($self,$opt,$no_wrap,(@$env,$new_env.$type."#".$count.$type_end{$type}));␊ |
1489 | } else {␊ |
1490 | $t = $opt;␊ |
1491 | }␊ |
1492 | $translated .= $type.$t.$type_end{$type};␊ |
1493 | ␊ |
1494 | }␊ |
1495 | } else {␊ |
1496 | # Translate the \begin command with all its arguments joined␊ |
1497 | my ($type, $opt);␊ |
1498 | my $buf = $translated;␊ |
1499 | while (@targs) {␊ |
1500 | $type = shift @targs;␊ |
1501 | $opt = shift @targs;␊ |
1502 | $buf .= $type.$opt.$type_end{$type};␊ |
1503 | }␊ |
1504 | @e = @$env;␊ |
1505 | my $wrap = 1;␊ |
1506 | $wrap = 0 if $no_wrap == 1;␊ |
1507 | $translated = $self->translate($buf,$self->{ref},␊ |
1508 | @e?$e[-1]:"Plain text",␊ |
1509 | "wrap" => $wrap);␊ |
1510 | }␊ |
1511 | @e = (@$env, $new_env);␊ |
1512 | ␊ |
1513 | print "($translated,@e)\n"␊ |
1514 | if ($debug{'environments'});␊ |
1515 | return ($translated,@e);␊ |
1516 | }␊ |
1517 | ␊ |
1518 | ␊ |
1519 | sub check_arg_count {␊ |
1520 | my $self = shift;␊ |
1521 | my $command = shift;␊ |
1522 | my $args = shift;␊ |
1523 | my @targs = @$args;␊ |
1524 | my $check = 1;␊ |
1525 | my @remainder = ();␊ |
1526 | my $reason = "";␊ |
1527 | my ($type, $arg);␊ |
1528 | my @arg_types;␊ |
1529 | ␊ |
1530 | if ($command eq 'begin') {␊ |
1531 | $type = shift @targs;␊ |
1532 | # The name of the environment is mandatory␊ |
1533 | if ( (not defined $type)␊ |
1534 | or ($type ne '{')) {␊ |
1535 | $reason = dgettext("po4a",␊ |
1536 | "The first argument of \\begin is mandatory.");␊ |
1537 | $check = 0;␊ |
1538 | }␊ |
1539 | my $env = shift @targs;␊ |
1540 | if (not defined $environment_parameters{$env}) {␊ |
1541 | die wrap_ref_mod($self->{ref},"po4a::tex",␊ |
1542 | dgettext("po4a", "unknown environment: '%s'"),␊ |
1543 | $env);␊ |
1544 | }␊ |
1545 | @arg_types = @{$environment_parameters{$env}{'types'}};␊ |
1546 | } else {␊ |
1547 | @arg_types = @{$command_parameters{$command}{'types'}};␊ |
1548 | }␊ |
1549 | ␊ |
1550 | my $count = 0;␊ |
1551 | while ($check and @targs) {␊ |
1552 | $type = shift @targs;␊ |
1553 | $arg = shift @targs;␊ |
1554 | TEST_TYPE:␊ |
1555 | if ($count >= scalar @arg_types) {␊ |
1556 | # Too many arguments some will remain␊ |
1557 | @remainder = ($type, $arg, @targs);␊ |
1558 | last;␊ |
1559 | } elsif ($type eq $arg_types[$count]) {␊ |
1560 | $count ++;␊ |
1561 | } elsif ($type eq '{' and $arg_types[$count] eq '[') {␊ |
1562 | # an optionnal argument was not provided,␊ |
1563 | # try with the next argument.␊ |
1564 | $count++;␊ |
1565 | goto TEST_TYPE;␊ |
1566 | } else {␊ |
1567 | $check = 0;␊ |
1568 | $reason = dgettext("po4a",␊ |
1569 | "An optional argument was ".␊ |
1570 | "provided, but a mandatory one is expected.");␊ |
1571 | }␊ |
1572 | }␊ |
1573 | ␊ |
1574 | return ($check, $reason, \@remainder);␊ |
1575 | }␊ |
1576 | ␊ |
1577 | sub register_generic_environment {␊ |
1578 | print "register_generic_environment($_[0])\n"␊ |
1579 | if ($debug{'environments'});␊ |
1580 | if ($_[0] =~ m/^(.*),((?:\{_?\}|\[_?\])*)$/) {␊ |
1581 | my $env = $1;␊ |
1582 | my $arg_types = $2;␊ |
1583 | if ($env =~ /^([+])(.*)$/) {␊ |
1584 | $separated_environment{$2} = $1;␊ |
1585 | $env = $2;␊ |
1586 | }␊ |
1587 | my @types = ();␊ |
1588 | my @translated = ();␊ |
1589 | while ( defined $arg_types␊ |
1590 | and length $arg_types␊ |
1591 | and $arg_types =~ m/^(?:([\{\[])(_?)[\}\]])(.*)$/) {␊ |
1592 | push @types, $1;␊ |
1593 | push @translated, ($2 eq "_")?1:0;␊ |
1594 | $arg_types = $3;␊ |
1595 | }␊ |
1596 | $environment_parameters{$env} = {␊ |
1597 | 'types' => \@types,␊ |
1598 | 'translated' => \@translated␊ |
1599 | };␊ |
1600 | $environments{$env} = \&generic_environment;␊ |
1601 | }␊ |
1602 | }␊ |
1603 | ␊ |
1604 | sub register_verbatim_environment {␊ |
1605 | my $env = shift;␊ |
1606 | $no_wrap_environments .= " $env";␊ |
1607 | $verbatim_environments .= " $env";␊ |
1608 | $RE_VERBATIM = "\\\\begin\\{(?:".␊ |
1609 | join("|", split(/ /, $verbatim_environments)).␊ |
1610 | ")\\*?\\}";␊ |
1611 | register_generic_environment("$env,")␊ |
1612 | unless (defined $environments{$env});␊ |
1613 | }␊ |
1614 | ␊ |
1615 | ####################################␊ |
1616 | ### INITIALIZATION OF THE PARSER ###␊ |
1617 | ####################################␊ |
1618 | sub initialize {␊ |
1619 | my $self = shift;␊ |
1620 | my %options = @_;␊ |
1621 | ␊ |
1622 | $self->{options}{'definitions'}='';␊ |
1623 | $self->{options}{'exclude_include'}='';␊ |
1624 | $self->{options}{'no_wrap'}='';␊ |
1625 | $self->{options}{'verbatim'}='';␊ |
1626 | $self->{options}{'debug'}='';␊ |
1627 | $self->{options}{'verbose'}='';␊ |
1628 | ␊ |
1629 | %debug = ();␊ |
1630 | # FIXME: %commands and %separated_command should also be restored to their␊ |
1631 | # default values.␊ |
1632 | ␊ |
1633 | foreach my $opt (keys %options) {␊ |
1634 | if ($options{$opt}) {␊ |
1635 | die wrap_mod("po4a::tex",␊ |
1636 | dgettext("po4a", "Unknown option: %s"), $opt)␊ |
1637 | unless exists $self->{options}{$opt};␊ |
1638 | $self->{options}{$opt} = $options{$opt};␊ |
1639 | }␊ |
1640 | }␊ |
1641 | ␊ |
1642 | if ($options{'debug'}) {␊ |
1643 | foreach ($options{'debug'}) {␊ |
1644 | $debug{$_} = 1;␊ |
1645 | }␊ |
1646 | }␊ |
1647 | ␊ |
1648 | if ($options{'exclude_include'}) {␊ |
1649 | foreach (split(/:/, $options{'exclude_include'})) {␊ |
1650 | push @exclude_include, $_;␊ |
1651 | }␊ |
1652 | }␊ |
1653 | ␊ |
1654 | if ($options{'no_wrap'}) {␊ |
1655 | foreach (split(/,/, $options{'no_wrap'})) {␊ |
1656 | $no_wrap_environments .= " $_";␊ |
1657 | register_generic_environment("$_,")␊ |
1658 | unless (defined $environments{$_});␊ |
1659 | }␊ |
1660 | }␊ |
1661 | ␊ |
1662 | if ($options{'verbatim'}) {␊ |
1663 | foreach (split(/,/, $options{'verbatim'})) {␊ |
1664 | register_verbatim_environment($_);␊ |
1665 | }␊ |
1666 | }␊ |
1667 | ␊ |
1668 | if ($options{'definitions'}) {␊ |
1669 | $self->parse_definition_file($options{'definitions'})␊ |
1670 | }␊ |
1671 | }␊ |
1672 | ␊ |
1673 | =head1 STATUS OF THIS MODULE␊ |
1674 | ␊ |
1675 | This module needs more tests.␊ |
1676 | ␊ |
1677 | It was tested on a book and with the Python documentation.␊ |
1678 | ␊ |
1679 | =head1 TODO LIST␊ |
1680 | ␊ |
1681 | =over 4␊ |
1682 | ␊ |
1683 | =item Automatic detection of new commands␊ |
1684 | ␊ |
1685 | The TeX module could parse the newcommand arguments and try to guess the␊ |
1686 | number of arguments, their type and whether or not they should be␊ |
1687 | translated.␊ |
1688 | ␊ |
1689 | =item Translation of the environment separator␊ |
1690 | ␊ |
1691 | When \item is used as an environment separator, the item argument is␊ |
1692 | attached to the following string.␊ |
1693 | ␊ |
1694 | =item Some commands should be added to the environment stack␊ |
1695 | ␊ |
1696 | These commands should be specified by couples.␊ |
1697 | This could allow to specify commands beginning or ending a verbatim␊ |
1698 | environment.␊ |
1699 | ␊ |
1700 | =item Others␊ |
1701 | ␊ |
1702 | Various other points are tagged TODO in the source.␊ |
1703 | ␊ |
1704 | =back␊ |
1705 | ␊ |
1706 | =head1 KNOWN BUGS␊ |
1707 | ␊ |
1708 | Various points are tagged FIXME in the source.␊ |
1709 | ␊ |
1710 | =head1 SEE ALSO␊ |
1711 | ␊ |
1712 | L<Locale::Po4a::LaTeX(3pm)|Locale::Po4a::LaTeX>,␊ |
1713 | L<Locale::Po4a::TransTractor(3pm)|Locale::Po4a::TransTractor>,␊ |
1714 | L<po4a(7)|po4a.7>␊ |
1715 | ␊ |
1716 | =head1 AUTHORS␊ |
1717 | ␊ |
1718 | Nicolas François <nicolas.francois@centraliens.net>␊ |
1719 | ␊ |
1720 | =head1 COPYRIGHT AND LICENSE␊ |
1721 | ␊ |
1722 | Copyright 2004, 2005 by Nicolas FRANÇOIS <nicolas.francois@centraliens.net>.␊ |
1723 | ␊ |
1724 | This program is free software; you may redistribute it and/or modify it␊ |
1725 | under the terms of GPL (see the COPYING file).␊ |
1726 | ␊ |
1727 | =cut␊ |
1728 | ␊ |
1729 | 1;␊ |
1730 | |