1 | #!/usr/bin/perl -w␊ |
2 | ␊ |
3 | # Copyright (c) 2004-2008 by Nicolas FRANÇOIS <nicolas.francois@centraliens.net>␊ |
4 | #␊ |
5 | # This file is part of po4a.␊ |
6 | #␊ |
7 | # This program is free software; you can redistribute it and/or modify␊ |
8 | # it under the terms of the GNU General Public License as published by␊ |
9 | # the Free Software Foundation; either version 2 of the License, or␊ |
10 | # (at your option) any later version.␊ |
11 | #␊ |
12 | # This program is distributed in the hope that it will be useful,␊ |
13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of␊ |
14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the␊ |
15 | # GNU General Public License for more details.␊ |
16 | #␊ |
17 | # You should have received a copy of the GNU General Public License␊ |
18 | # along with po4a; if not, write to the Free Software␊ |
19 | # Foundation, Inc.,␊ |
20 | # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA␊ |
21 | #␊ |
22 | ########################################################################␊ |
23 | ␊ |
24 | =encoding UTF-8␊ |
25 | ␊ |
26 | =head1 NAME␊ |
27 | ␊ |
28 | Locale::Po4a::Halibut - convert Halibut documents and derivates from/to PO files␊ |
29 | ␊ |
30 | =head1 DESCRIPTION␊ |
31 | ␊ |
32 | The po4a (PO for anything) project goal is to ease translations (and more␊ |
33 | interestingly, the maintenance of translations) using gettext tools on␊ |
34 | areas where they were not expected like documentation.␊ |
35 | ␊ |
36 | Locale::Po4a::Halibut is a module to help the translation of Halibut documents into␊ |
37 | other [human] languages.␊ |
38 | ␊ |
39 | This module contains the definitions of common Halibut commands and␊ |
40 | environments.␊ |
41 | ␊ |
42 | =head1 STATUS OF THIS MODULE␊ |
43 | ␊ |
44 | This module is still beta.␊ |
45 | Please send feedback and feature requests.␊ |
46 | ␊ |
47 | =head1 CAVEAT␊ |
48 | ␊ |
49 | Some constructs are badly supported. The known ones are documented below.␊ |
50 | ␊ |
51 | =head2 Verbatim blocks␊ |
52 | ␊ |
53 | \c foo␊ |
54 | \c bar␊ |
55 | ␊ |
56 | The verbatim block is not considered as a whole. Each line will be␊ |
57 | translated separately.␊ |
58 | ␊ |
59 | =head1 SEE ALSO␊ |
60 | ␊ |
61 | L<Locale::Po4a::TeX(3pm)|Locale::Po4a::TeX>,␊ |
62 | L<Locale::Po4a::TransTractor(3pm)|Locale::Po4a::TransTractor>,␊ |
63 | L<po4a(7)|po4a.7>␊ |
64 | ␊ |
65 | =head1 AUTHORS␊ |
66 | ␊ |
67 | Nicolas François <nicolas.francois@centraliens.net>␊ |
68 | ␊ |
69 | =head1 COPYRIGHT AND LICENSE␊ |
70 | ␊ |
71 | Copyright 2004-2008 by Nicolas FRANÇOIS <nicolas.francois@centraliens.net>.␊ |
72 | ␊ |
73 | This program is free software; you may redistribute it and/or modify it␊ |
74 | under the terms of GPL (see COPYING file).␊ |
75 | ␊ |
76 | =cut␊ |
77 | ␊ |
78 | package Locale::Po4a::Halibut;␊ |
79 | ␊ |
80 | use 5.006;␊ |
81 | use strict;␊ |
82 | use warnings;␊ |
83 | ␊ |
84 | require Exporter;␊ |
85 | use vars qw($VERSION @ISA @EXPORT);␊ |
86 | $VERSION= $Locale::Po4a::TeX::VERSION;␊ |
87 | @ISA= qw(Locale::Po4a::TeX);␊ |
88 | @EXPORT= qw();␊ |
89 | ␊ |
90 | use Locale::Po4a::Common;␊ |
91 | use Locale::Po4a::TeX;␊ |
92 | use subs qw(&parse_definition_file␊ |
93 | ®ister_generic_command &is_closed &translate_buffer␊ |
94 | ®ister_verbatim_environment␊ |
95 | &generic_command␊ |
96 | &in_verbatim␊ |
97 | &get_leading_command);␊ |
98 | *parse_definition_file = \&Locale::Po4a::TeX::parse_definition_file;␊ |
99 | *get_leading_command = \&Locale::Po4a::TeX::get_leading_command;␊ |
100 | *register_generic_command = \&Locale::Po4a::TeX::register_generic_command;␊ |
101 | *register_verbatim_environment = \&Locale::Po4a::TeX::register_verbatim_environment;␊ |
102 | *generic_command = \&Locale::Po4a::TeX::generic_command;␊ |
103 | *is_closed = \&Locale::Po4a::TeX::is_closed;␊ |
104 | *in_verbatim = \&Locale::Po4a::TeX::in_verbatim;␊ |
105 | *translate_buffer = \&Locale::Po4a::TeX::translate_buffer;␊ |
106 | use vars qw($RE_ESCAPE $ESCAPE␊ |
107 | $RE_VERBATIM␊ |
108 | $RE_COMMENT $RE_PRE_COMMENT␊ |
109 | $no_wrap_environments $separated_commands␊ |
110 | %commands %environments␊ |
111 | %command_categories %separated␊ |
112 | %env_separators %debug␊ |
113 | %translate_buffer_env␊ |
114 | @exclude_include @comments);␊ |
115 | *RE_ESCAPE = \$Locale::Po4a::TeX::RE_ESCAPE;␊ |
116 | *ESCAPE = \$Locale::Po4a::TeX::ESCAPE;␊ |
117 | *RE_VERBATIM = \$Locale::Po4a::TeX::RE_VERBATIM;␊ |
118 | *RE_COMMENT = \$Locale::Po4a::TeX::RE_COMMENT;␊ |
119 | *RE_PRE_COMMENT = \$Locale::Po4a::TeX::RE_PRE_COMMENT;␊ |
120 | *no_wrap_environments = \$Locale::Po4a::TeX::no_wrap_environments;␊ |
121 | *separated_commands = \$Locale::Po4a::TeX::separated_commands;␊ |
122 | *commands = \%Locale::Po4a::TeX::commands;␊ |
123 | *environments = \%Locale::Po4a::TeX::environments;␊ |
124 | *command_categories = \%Locale::Po4a::TeX::command_categories;␊ |
125 | *separated = \%Locale::Po4a::TeX::separated;␊ |
126 | *env_separators = \%Locale::Po4a::TeX::env_separators;␊ |
127 | *debug = \%Locale::Po4a::TeX::debug;␊ |
128 | *translate_buffer_env = \%Locale::Po4a::TeX::translate_buffer_env;␊ |
129 | *exclude_include = \@Locale::Po4a::TeX::exclude_include;␊ |
130 | *comments = \@Locale::Po4a::TeX::comments;␊ |
131 | ␊ |
132 | #$ESCAPE = "\\";␊ |
133 | #$RE_ESCAPE = "\\\\";␊ |
134 | #$RE_VERBATIM = "\@example";␊ |
135 | $RE_VERBATIM = "PO4A_FAKE_VERBATIM";␊ |
136 | #$RE_COMMENT = "\\\@(?:c|comment)\\b";␊ |
137 | $RE_COMMENT = "PO4A_FAKE_COMMENT";␊ |
138 | ␊ |
139 | sub docheader {␊ |
140 | return "\\# This file was generated with po4a. Translate the source file.\n".␊ |
141 | "\n";␊ |
142 | }␊ |
143 | ␊ |
144 | my %break_line = ();␊ |
145 | ␊ |
146 | # translate_line_command indicate if the arguments to the command handled␊ |
147 | # by line_command() should be translated:␊ |
148 | # undefined: arguments are not translated␊ |
149 | # 0: there should be no arguments␊ |
150 | # 1: arguments should be translated␊ |
151 | my %translate_line_command = ();␊ |
152 | ␊ |
153 | sub parse {␊ |
154 | my $self = shift;␊ |
155 | my ($line,$ref);␊ |
156 | my $paragraph = ""; # Buffer where we put the paragraph while building␊ |
157 | my @env = (); # environment stack␊ |
158 | my $t = "";␊ |
159 | # $docheader_pushed = 0;␊ |
160 | ␊ |
161 | LINE:␊ |
162 | undef $self->{type};␊ |
163 | ($line,$ref)=$self->shiftline();␊ |
164 | ␊ |
165 | while (defined($line)) {␊ |
166 | chomp($line);␊ |
167 | $self->{ref}="$ref";␊ |
168 | ␊ |
169 | if ($line =~ /^\s*\\\s*po4a\s*:/) {␊ |
170 | parse_definition_line($self, $line);␊ |
171 | goto LINE;␊ |
172 | }␊ |
173 | ␊ |
174 | my $t;␊ |
175 | ($paragraph, $t, @env) = parse_line($self, $line, $paragraph, \@env);␊ |
176 | $self->pushline($t);␊ |
177 | ␊ |
178 | ␊ |
179 | # Reinit the loop␊ |
180 | ($line,$ref)=$self->shiftline();␊ |
181 | undef $self->{type};␊ |
182 | }␊ |
183 | ␊ |
184 | if (length($paragraph)) {␊ |
185 | ($t, @env) = translate_buffer($self,$paragraph,undef,@env);␊ |
186 | $self->pushline($t);␊ |
187 | $paragraph="";␊ |
188 | }␊ |
189 | } # end of parse␊ |
190 | ␊ |
191 | sub parse_line {␊ |
192 | my $self = shift;␊ |
193 | my $line = shift;␊ |
194 | my $paragraph = shift;␊ |
195 | my $env = shift;␊ |
196 | my @e = @$env;␊ |
197 | my $translated = "";␊ |
198 | ␊ |
199 | my $closed = 1;␊ |
200 | if (!in_verbatim(@e)) {␊ |
201 | $closed = is_closed($paragraph);␊ |
202 | }␊ |
203 | # if (not $closed) {␊ |
204 | # print "not closed. line: '$line'\n para: '$paragraph'\n";␊ |
205 | # }␊ |
206 | ␊ |
207 | #warn "closed'$closed'$line'$paragraph'\n";␊ |
208 | if ($closed and $line =~ /^\s*$/) {␊ |
209 | # An empty line. This indicates the end of the current␊ |
210 | # paragraph.␊ |
211 | $paragraph .= $line."\n";␊ |
212 | if (length($paragraph)) {␊ |
213 | ($translated, @e) = translate_buffer($self,$paragraph,undef,@e);␊ |
214 | $paragraph="";␊ |
215 | }␊ |
216 | } elsif ($line =~ m/^\\input /) {␊ |
217 | if (length($paragraph)) {␊ |
218 | ($translated, @e) = translate_buffer($self,$paragraph,undef,@e);␊ |
219 | $paragraph="";␊ |
220 | }␊ |
221 | $translated .= $line."\n";␊ |
222 | } elsif ($line =~ m/^$RE_COMMENT/) {␊ |
223 | $translated = $line."\n";␊ |
224 | } elsif ( $closed␊ |
225 | and (is_closed($line) or $line =~ /^\\[ce] /)␊ |
226 | and ($line =~ /^\\([^ ]*?)( +.*)?$/)) {␊ |
227 | my ($command,$variant,$args,$buffer);␊ |
228 | if ($break_line{$1}) {␊ |
229 | my @a = ();␊ |
230 | $variant = "";␊ |
231 | $args = \@a;␊ |
232 | $command = $1;␊ |
233 | $buffer = $2||"";␊ |
234 | } else {␊ |
235 | ($command,$variant,$args,$buffer) = get_leading_command($self, $line);␊ |
236 | }␊ |
237 | if ( $break_line{$command}␊ |
238 | and not ( ($command eq "c" or $command eq "e")␊ |
239 | and defined $args->[0])) {␊ |
240 | # NOTE: This is just a workaround: "\c " is a verbatim line␊ |
241 | # and \c{...} is just a verbatim block␊ |
242 | my $t;␊ |
243 | if (length($paragraph)) {␊ |
244 | ($t, @e) = translate_buffer($self,$paragraph,undef,@e);␊ |
245 | $translated .= $t;␊ |
246 | $paragraph="";␊ |
247 | }␊ |
248 | ($t, @e) = generic_command($self, $command, $variant, $args, \@e);␊ |
249 | $translated .= $t;␊ |
250 | ␊ |
251 | my $arg = $buffer;␊ |
252 | my @args = ();␊ |
253 | if (defined $arg and length $arg) {␊ |
254 | # FIXME: keep the spaces ?␊ |
255 | $arg =~ s/\s*$//s;␊ |
256 | @args= (" ", $arg);␊ |
257 | }␊ |
258 | ($t, @e) = line_command($self, $command, "", \@args, \@e, 1);␊ |
259 | $translated .= $t."\n";␊ |
260 | } else {␊ |
261 | # continue the same paragraph␊ |
262 | $paragraph .= $line."\n";␊ |
263 | }␊ |
264 | } else {␊ |
265 | # continue the same paragraph␊ |
266 | $paragraph .= $line."\n";␊ |
267 | }␊ |
268 | ␊ |
269 | return ($paragraph, $translated, @e);␊ |
270 | }␊ |
271 | ␊ |
272 | sub line_command {␊ |
273 | my $self = shift;␊ |
274 | my ($command,$variant,$args,$env) = (shift,shift,shift,shift);␊ |
275 | my $no_wrap = shift;␊ |
276 | print "line_command($command,$variant,@$args,@$env,$no_wrap)="␊ |
277 | if ($debug{'commands'});␊ |
278 | ␊ |
279 | my $translated = ""; # $ESCAPE.$command;␊ |
280 | my $line = $args->[1];␊ |
281 | #warn "line_command: '$line'\n";␊ |
282 | if (defined $line and length $line) {␊ |
283 | if ( defined $translate_line_command{$command}␊ |
284 | and $translate_line_command{$command}) {␊ |
285 | # $no_wrap could be forced to 1, but it should already be set␊ |
286 | $no_wrap = 1;␊ |
287 | $line =~ s/^(\s*)//;␊ |
288 | my $spaces = $1 || "";␊ |
289 | my ($t,$e) = $self->translate_buffer($line,$no_wrap,@$env,$command);␊ |
290 | #warn "line_command: '$t'\n";␊ |
291 | $translated .= $spaces.$t;␊ |
292 | } else {␊ |
293 | $translated .= $line;␊ |
294 | }␊ |
295 | }␊ |
296 | print "($translated,@$env)\n"␊ |
297 | if ($debug{'commands'});␊ |
298 | return ($translated,@$env);␊ |
299 | }␊ |
300 | ␊ |
301 | ␊ |
302 | # 3.2 Simple inline formatting commands␊ |
303 | # 3.2.1 `\e': Emphasising text␊ |
304 | # inline. extract only if alone␊ |
305 | register_generic_command("-e,{_}");␊ |
306 | $translate_line_command{e} = 1;␊ |
307 | $break_line{e} = 1;␊ |
308 | # 3.2.2 `\c' and `\cw': Displaying computer code inline␊ |
309 | # inline. extract only if alone␊ |
310 | # NOTE: \c and \c{...} differs.␊ |
311 | # \c is marked as a break_line command, but this is reversed in␊ |
312 | # parse_line when the \c{...} form is used.␊ |
313 | register_generic_command("-c,{_}");␊ |
314 | $translate_line_command{c} = 1;␊ |
315 | $break_line{c} = 1;␊ |
316 | register_generic_command("-cw,{_}");␊ |
317 | # 3.2.3 `\q': Quotation marks␊ |
318 | # inline. extract only if alone␊ |
319 | register_generic_command("-q,{_}");␊ |
320 | # 3.2.4 `\-' and `\_': Non-breaking hyphens and spaces␊ |
321 | # inline.␊ |
322 | ␊ |
323 | # 3.2.5 `\date': Automatic date generation␊ |
324 | # inline.␊ |
325 | ␊ |
326 | # 3.2.6 `\W': WWW hyperlinks␊ |
327 | # inline. extract only if alone␊ |
328 | register_generic_command("-W,{_}");␊ |
329 | # 3.2.7 `\u': Specifying arbitrary Unicode characters␊ |
330 | # inline.␊ |
331 | ␊ |
332 | # 3.2.8 `\k' and `\K': Cross-references to other sections␊ |
333 | # inline. They should not be translated. extract only if alone␊ |
334 | # FIXME: it will expand to "Section ..." or "section ..."␊ |
335 | # Section and section should be translated.␊ |
336 | register_generic_command("-k,{}");␊ |
337 | register_generic_command("-K,{}");␊ |
338 | # 3.2.9 `\#': Inline comments␊ |
339 | # inline. But can be removed from the head or tail.␊ |
340 | register_generic_command("-#,{}");␊ |
341 | $translate_line_command{"#"} = 0;␊ |
342 | $break_line{"#"} = 1;␊ |
343 | # 3.3 Paragraph-level commands␊ |
344 | # 3.3.1 `\c': Displaying whole paragraphs of computer code␊ |
345 | # see above␊ |
346 | # 3.3.2 `\b', `\n', `\dt', `\dd', `\lcont': Lists␊ |
347 | register_generic_command("*b,");␊ |
348 | register_generic_command("*n,"); # FIXME: \n{this-one} not supported?␊ |
349 | register_generic_command("*dd,");␊ |
350 | register_generic_command("*dt,");␊ |
351 | # 3.3.2.4 Continuing list items into further paragraphs␊ |
352 | register_generic_command("*lcont,{_}"); # registered, but redefined␊ |
353 | $commands{lcont} = sub {␊ |
354 | my $self = shift;␊ |
355 | my ($command,$variant,$args,$env) = (shift,shift,shift,shift);␊ |
356 | my $no_wrap = shift;␊ |
357 | my ($t,@e)=("",@$env);␊ |
358 | my $translated = $ESCAPE.$command.$variant."{";␊ |
359 | my $text = $args->[1];␊ |
360 | my $paragraph = "";␊ |
361 | while ( $text =~ s/^(.*?)\n(.*)$/$2/s␊ |
362 | or $text =~ s/^([^\n]+)$//s) {␊ |
363 | ($paragraph, $t, @e) = parse_line($self, $1, $paragraph, \@e);␊ |
364 | $translated .= $t;␊ |
365 | }␊ |
366 | ($t, @e) = translate_buffer ($self, $paragraph, $no_wrap, @e);␊ |
367 | $translated .= $t;␊ |
368 | $translated .= "}";␊ |
369 | ␊ |
370 | return ($translated, @$env);␊ |
371 | };␊ |
372 | # 3.3.3 `\rule': Horizontal rules␊ |
373 | register_generic_command("rule,"); # TODO: TBC does it break paragraphs␊ |
374 | # 3.3.4 `\quote': Indenting multiple paragraphs as a long quotation␊ |
375 | register_generic_command("*quote,{_}"); # TODO: TBC␊ |
376 | # 3.3.5 `\C', `\H', `\S', `\A', `\U': Chapter and section headings␊ |
377 | # FIXME: What happens if the the line is rewrapped?␊ |
378 | # NOTE: The name of the section is not translated.␊ |
379 | register_generic_command("*C,{}");␊ |
380 | register_generic_command("*S0,{}"); # Synonym for \H␊ |
381 | register_generic_command("*H,{}");␊ |
382 | register_generic_command("*S,{}");␊ |
383 | register_generic_command("*S1,{}"); # Synonym for \S␊ |
384 | register_generic_command("*S2,{}");␊ |
385 | register_generic_command("*S3,{}"); # FIXME: and so on␊ |
386 | # FIXME: \S{question-about-fish}{Question}␊ |
387 | register_generic_command("*A,{}");␊ |
388 | register_generic_command("*U,{}");␊ |
389 | # 3.3.6 `\copyright', `\title', `\versionid': Miscellaneous blurb commands␊ |
390 | register_generic_command("*title,");␊ |
391 | register_generic_command("*copyright,");␊ |
392 | register_generic_command("*versionid,");␊ |
393 | # 3.4 Creating a bibliography␊ |
394 | # nocite␊ |
395 | register_generic_command("*nocite,{}");␊ |
396 | # B␊ |
397 | register_generic_command("*B,{}");␊ |
398 | # BR␊ |
399 | register_generic_command("*BR,{}"); # FIXME: \BR{freds-book} [Fred1993]␊ |
400 | # 3.5 Creating an index␊ |
401 | # 3.5.1 Simple indexing␊ |
402 | # \i: inline \i{index} or \i\x{grep}␊ |
403 | # \ii␊ |
404 | register_generic_command("-ii,{_}");␊ |
405 | # \IM: inline. Variable number of arguments␊ |
406 | register_generic_command("*IM,{_}");␊ |
407 | $translate_line_command{IM} = 1;␊ |
408 | $break_line{IM} = 1;␊ |
409 | # 3.6 Configuring Halibut␊ |
410 | # \cfg␊ |
411 | register_generic_command("+cfg,{}{_}"); # NOTE: the new command is not registered␊ |
412 | # 3.7 Defining macros␊ |
413 | register_generic_command("*define,{}"); # FIXME: line␊ |
414 | $translate_line_command{define} = 1;␊ |
415 | $break_line{define} = 1;␊ |
416 | ␊ |
417 | 1;␊ |
418 | |