1 | #!/usr/bin/perl -w␊ |
2 | ␊ |
3 | # Po4a::Text.pm␊ |
4 | #␊ |
5 | # extract and translate translatable strings from a text documents␊ |
6 | #␊ |
7 | # This program is free software; you can redistribute it and/or modify␊ |
8 | # it under the terms of the GNU General Public License as published by␊ |
9 | # the Free Software Foundation; either version 2 of the License, or␊ |
10 | # (at your option) any later version.␊ |
11 | #␊ |
12 | # This program is distributed in the hope that it will be useful,␊ |
13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of␊ |
14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the␊ |
15 | # GNU General Public License for more details.␊ |
16 | #␊ |
17 | # You should have received a copy of the GNU General Public License␊ |
18 | # along with this program; if not, write to the Free Software␊ |
19 | # Foundation, Inc.,␊ |
20 | # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA␊ |
21 | #␊ |
22 | ########################################################################␊ |
23 | ␊ |
24 | =encoding UTF-8␊ |
25 | ␊ |
26 | =head1 NAME␊ |
27 | ␊ |
28 | Locale::Po4a::Text - convert text documents from/to PO files␊ |
29 | ␊ |
30 | =head1 DESCRIPTION␊ |
31 | ␊ |
32 | The po4a (PO for anything) project goal is to ease translations (and more␊ |
33 | interestingly, the maintenance of translations) using gettext tools on␊ |
34 | areas where they were not expected like documentation.␊ |
35 | ␊ |
36 | Locale::Po4a::Text is a module to help the translation of text documents into␊ |
37 | other [human] languages.␊ |
38 | ␊ |
39 | Paragraphs are split on empty lines (or lines containing only spaces or␊ |
40 | tabulations).␊ |
41 | ␊ |
42 | If a paragraph contains a line starting by a space (or tabulation), this␊ |
43 | paragraph won't be rewrapped.␊ |
44 | ␊ |
45 | =cut␊ |
46 | ␊ |
47 | package Locale::Po4a::Text;␊ |
48 | ␊ |
49 | use 5.006;␊ |
50 | use strict;␊ |
51 | use warnings;␊ |
52 | ␊ |
53 | require Exporter;␊ |
54 | use vars qw(@ISA @EXPORT);␊ |
55 | @ISA = qw(Locale::Po4a::TransTractor);␊ |
56 | @EXPORT = qw();␊ |
57 | ␊ |
58 | use Locale::Po4a::TransTractor;␊ |
59 | use Locale::Po4a::Common;␊ |
60 | ␊ |
61 | =head1 OPTIONS ACCEPTED BY THIS MODULE␊ |
62 | ␊ |
63 | These are this module's particular options:␊ |
64 | ␊ |
65 | =over␊ |
66 | ␊ |
67 | =item B<nobullets>␊ |
68 | ␊ |
69 | Deactivate detection of bullets.␊ |
70 | ␊ |
71 | By default, when a bullet is detected, the bullet paragraph is not considered␊ |
72 | as a verbatim paragraph (with the no-wrap flag in the PO file), but the module␊ |
73 | rewraps this paragraph in the generated PO file and in the translation.␊ |
74 | ␊ |
75 | =cut␊ |
76 | ␊ |
77 | my $bullets = 1;␊ |
78 | ␊ |
79 | =item B<tabs=>I<mode>␊ |
80 | ␊ |
81 | Specify how tabulations shall be handled. The I<mode> can be any of:␊ |
82 | ␊ |
83 | =over␊ |
84 | ␊ |
85 | =item B<split>␊ |
86 | ␊ |
87 | Lines with tabulations introduce breaks in the current paragraph.␊ |
88 | ␊ |
89 | =item B<verbatim>␊ |
90 | ␊ |
91 | Paragraph containing tabulations will not be re-wrapped.␊ |
92 | ␊ |
93 | =back␊ |
94 | ␊ |
95 | By default, tabulations are considered as spaces.␊ |
96 | ␊ |
97 | =cut␊ |
98 | ␊ |
99 | my $tabs = "";␊ |
100 | ␊ |
101 | =item B<breaks=>I<regex>␊ |
102 | ␊ |
103 | A regular expression matching lines which introduce breaks.␊ |
104 | The regular expression will be anchored so that the whole line must match.␊ |
105 | ␊ |
106 | =cut␊ |
107 | ␊ |
108 | my $breaks;␊ |
109 | ␊ |
110 | =item B<debianchangelog>␊ |
111 | ␊ |
112 | Handle the header and footer of␊ |
113 | released versions, which only contain non translatable informations.␊ |
114 | ␊ |
115 | =cut␊ |
116 | ␊ |
117 | my $debianchangelog = 0;␊ |
118 | ␊ |
119 | =item B<fortunes>␊ |
120 | ␊ |
121 | Handle the fortunes format, which separate fortunes with a line which␊ |
122 | consists in '%' or '%%', and use '%%' as the beginning of a comment.␊ |
123 | ␊ |
124 | =cut␊ |
125 | ␊ |
126 | my $fortunes = 0;␊ |
127 | ␊ |
128 | =item B<markdown>␊ |
129 | ␊ |
130 | Handle some special markup in Markdown-formatted texts.␊ |
131 | ␊ |
132 | =cut␊ |
133 | ␊ |
134 | my $markdown = 0;␊ |
135 | ␊ |
136 | =item B<asciidoc>␊ |
137 | ␊ |
138 | Handle documents in the AsciiDoc format.␊ |
139 | ␊ |
140 | =cut␊ |
141 | ␊ |
142 | my $asciidoc = 0;␊ |
143 | ␊ |
144 | =item B<control>[B<=>I<taglist>]␊ |
145 | ␊ |
146 | Handle control files.␊ |
147 | A comma-separated list of tags to be translated can be provided.␊ |
148 | ␊ |
149 | =cut␊ |
150 | ␊ |
151 | my %control = ();␊ |
152 | ␊ |
153 | =back␊ |
154 | ␊ |
155 | =cut␊ |
156 | ␊ |
157 | sub initialize {␊ |
158 | my $self = shift;␊ |
159 | my %options = @_;␊ |
160 | ␊ |
161 | $self->{options}{'control'} = "";␊ |
162 | $self->{options}{'asciidoc'} = 1;␊ |
163 | $self->{options}{'breaks'} = 1;␊ |
164 | $self->{options}{'debianchangelog'} = 1;␊ |
165 | $self->{options}{'debug'} = 1;␊ |
166 | $self->{options}{'fortunes'} = 1;␊ |
167 | $self->{options}{'markdown'} = 1;␊ |
168 | $self->{options}{'nobullets'} = 1;␊ |
169 | $self->{options}{'tabs'} = 1;␊ |
170 | $self->{options}{'verbose'} = 1;␊ |
171 | ␊ |
172 | foreach my $opt (keys %options) {␊ |
173 | die wrap_mod("po4a::text",␊ |
174 | dgettext("po4a", "Unknown option: %s"), $opt)␊ |
175 | unless exists $self->{options}{$opt};␊ |
176 | $self->{options}{$opt} = $options{$opt};␊ |
177 | }␊ |
178 | ␊ |
179 | if (defined $options{'nobullets'}) {␊ |
180 | $bullets = 0;␊ |
181 | }␊ |
182 | ␊ |
183 | if (defined $options{'tabs'}) {␊ |
184 | $tabs = $options{'tabs'};␊ |
185 | }␊ |
186 | ␊ |
187 | if (defined $options{'breaks'}) {␊ |
188 | $breaks = $options{'breaks'};␊ |
189 | }␊ |
190 | ␊ |
191 | if (defined $options{'debianchangelog'}) {␊ |
192 | $debianchangelog=1;␊ |
193 | }␊ |
194 | ␊ |
195 | if (defined $options{'fortunes'}) {␊ |
196 | $fortunes=1;␊ |
197 | }␊ |
198 | ␊ |
199 | if (defined $options{'markdown'}) {␊ |
200 | $markdown=1;␊ |
201 | }␊ |
202 | ␊ |
203 | if (defined $options{'asciidoc'}) {␊ |
204 | $asciidoc=1;␊ |
205 | }␊ |
206 | ␊ |
207 | if (defined $options{'control'}) {␊ |
208 | if ($options{'control'} eq "1") {␊ |
209 | $control{''}=1;␊ |
210 | } else {␊ |
211 | foreach my $tag (split(',',$options{'control'})) {␊ |
212 | $control{$tag}=1;␊ |
213 | }␊ |
214 | }␊ |
215 | }␊ |
216 | }␊ |
217 | ␊ |
218 | sub parse {␊ |
219 | my $self = shift;␊ |
220 | my ($line,$ref);␊ |
221 | my $paragraph="";␊ |
222 | my $wrapped_mode = 1;␊ |
223 | my $expect_header = 1;␊ |
224 | my $end_of_paragraph = 0;␊ |
225 | ($line,$ref)=$self->shiftline();␊ |
226 | my $file = $ref;␊ |
227 | $file =~ s/:[0-9]+$// if defined($line);␊ |
228 | while (defined($line)) {␊ |
229 | $ref =~ m/^(.*):[0-9]+$/;␊ |
230 | if ($1 ne $file) {␊ |
231 | $file = $1;␊ |
232 | do_paragraph($self,$paragraph,$wrapped_mode);␊ |
233 | $paragraph="";␊ |
234 | $wrapped_mode = 1;␊ |
235 | $expect_header = 1;␊ |
236 | }␊ |
237 | ␊ |
238 | chomp($line);␊ |
239 | $self->{ref}="$ref";␊ |
240 | if ($debianchangelog and␊ |
241 | $expect_header and␊ |
242 | $line =~ /^(\w[-+0-9a-z.]*)\ \(([^\(\) \t]+)\) # src, version␊ |
243 | \s+([-+0-9a-z.]+); # distribution␊ |
244 | \s*urgency\s*\=\s*(.*\S)\s*$/ix) { #␊ |
245 | do_paragraph($self,$paragraph,$wrapped_mode);␊ |
246 | $paragraph="";␊ |
247 | $self->pushline("$line\n");␊ |
248 | $expect_header=0;␊ |
249 | } elsif ($debianchangelog and␊ |
250 | $line =~ m/^ \-\- (.*) <(.*)> ((\w+\,\s*)?\d{1,2}\s+\w+\s+\d{4}\s+\d{1,2}:\d\d:\d\d\s+[-+]\d{4}(\s+\([^\\\(\)]\))?)$/) {␊ |
251 | # Found trailer␊ |
252 | do_paragraph($self,$paragraph,$wrapped_mode);␊ |
253 | $paragraph="";␊ |
254 | $self->pushline("$line\n");␊ |
255 | $expect_header=1;␊ |
256 | } elsif ($fortunes and␊ |
257 | $line =~ m/^%%?\s*$/) {␊ |
258 | # Found end of fortune␊ |
259 | do_paragraph($self,$paragraph,$wrapped_mode);␊ |
260 | $self->pushline("\n") unless ( $wrapped_mode == 0␊ |
261 | or $paragraph eq "");␊ |
262 | $paragraph="";␊ |
263 | $wrapped_mode = 1;␊ |
264 | $self->pushline("$line\n");␊ |
265 | } elsif ( $line =~ m/^([^ :]*): *(.*)$/␊ |
266 | and %control) {␊ |
267 | warn "Unrecognized section: '$paragraph'\n"␊ |
268 | unless $paragraph eq "";␊ |
269 | my $tag = $1;␊ |
270 | my $val = $2;␊ |
271 | my $t;␊ |
272 | if ($control{''} or $control{$tag}) {␊ |
273 | $t = $self->translate($val,␊ |
274 | $self->{ref},␊ |
275 | $tag.(defined $self->{controlkey}?", ".$self->{controlkey}:""),␊ |
276 | "wrap" => 0);␊ |
277 | } else {␊ |
278 | $t = $val;␊ |
279 | }␊ |
280 | if (not defined $self->{controlkey}) {␊ |
281 | $self->{controlkey} = "$tag: $val";␊ |
282 | }␊ |
283 | $self->pushline("$tag: $t\n");␊ |
284 | $paragraph="";␊ |
285 | $wrapped_mode = 1;␊ |
286 | $self->{bullet} = "";␊ |
287 | $self->{indent} = " ";␊ |
288 | } elsif (%control and␊ |
289 | $line eq " .") {␊ |
290 | do_paragraph($self,$paragraph,$wrapped_mode,␊ |
291 | "Long Description".(defined $self->{controlkey}?", ".$self->{controlkey}:""));␊ |
292 | $paragraph="";␊ |
293 | $self->pushline($line."\n");␊ |
294 | $self->{bullet} = "";␊ |
295 | $self->{indent} = " ";␊ |
296 | } elsif (%control and␊ |
297 | $line =~ m/^ Link: +(.*)$/) {␊ |
298 | do_paragraph($self,$paragraph,$wrapped_mode,␊ |
299 | "Long Description".(defined $self->{controlkey}?", ".$self->{controlkey}:""));␊ |
300 | my $link=$1;␊ |
301 | my $t1 = $self->translate("Link: ",␊ |
302 | $self->{ref},␊ |
303 | "Link",␊ |
304 | "wrap" => 0);␊ |
305 | my $t2 = $self->translate($link,␊ |
306 | $self->{ref},␊ |
307 | "Link".(defined $self->{controlkey}?", ".$self->{controlkey}:""),␊ |
308 | "wrap" => 0);␊ |
309 | $self->pushline(" $t1$t2\n");␊ |
310 | $paragraph="";␊ |
311 | } elsif (%control and␊ |
312 | defined $self->{indent} and␊ |
313 | $line =~ m/^$self->{indent}\S/) {␊ |
314 | $paragraph .= $line."\n";␊ |
315 | $self->{type} = "Long Description".(defined $self->{controlkey}?", ".$self->{controlkey}:"");␊ |
316 | } elsif ( (defined $self->{verbatim})␊ |
317 | and ($self->{verbatim} == 2)) {␊ |
318 | # Untranslated blocks␊ |
319 | $self->pushline($line."\n");␊ |
320 | if ($asciidoc and␊ |
321 | ($line =~ m/^(\/{4,}|~{4,})$/)) {␊ |
322 | undef $self->{verbatim};␊ |
323 | undef $self->{type};␊ |
324 | $wrapped_mode = 1;␊ |
325 | }␊ |
326 | } elsif ( ($line =~ /^\s*$/)␊ |
327 | or ( defined $breaks␊ |
328 | and $line =~ m/^$breaks$/)) {␊ |
329 | # Break paragraphs on lines containing only spaces␊ |
330 | do_paragraph($self,$paragraph,$wrapped_mode);␊ |
331 | $paragraph="";␊ |
332 | $wrapped_mode = 1 unless defined($self->{verbatim});␊ |
333 | $self->pushline($line."\n");␊ |
334 | undef $self->{controlkey};␊ |
335 | } elsif ($asciidoc and (not defined($self->{verbatim})) and␊ |
336 | ($line =~ m/^(\+|--)$/)) {␊ |
337 | # List Item Continuation or List Block␊ |
338 | do_paragraph($self,$paragraph,$wrapped_mode);␊ |
339 | $paragraph="";␊ |
340 | $self->pushline($line."\n");␊ |
341 | } elsif ($asciidoc and (not defined($self->{verbatim})) and␊ |
342 | ($line =~ m/^(={4,}|-{4,}|~{4,}|\^{4,}|\+{4,})$/) and␊ |
343 | (defined($paragraph) )and␊ |
344 | ($paragraph =~ m/^[^\n]*\n$/s) and␊ |
345 | (length($paragraph) == (length($line)+1))) {␊ |
346 | # Found title␊ |
347 | $wrapped_mode = 0;␊ |
348 | my $level = $line;␊ |
349 | $level =~ s/^(.).*$/$1/;␊ |
350 | $paragraph =~ s/\n$//s;␊ |
351 | my $t = $self->translate($paragraph,␊ |
352 | $self->{ref},␊ |
353 | "Title $level",␊ |
354 | "wrap" => 0);␊ |
355 | $self->pushline($t."\n");␊ |
356 | $paragraph="";␊ |
357 | $wrapped_mode = 1;␊ |
358 | $self->pushline(($level x (length($t)))."\n");␊ |
359 | } elsif ($asciidoc and␊ |
360 | ($line =~ m/^(={1,5})( +)(.*?)( +\1)?$/)) {␊ |
361 | my $titlelevel1 = $1;␊ |
362 | my $titlespaces = $2;␊ |
363 | my $title = $3;␊ |
364 | my $titlelevel2 = $4||"";␊ |
365 | # Found one line title␊ |
366 | do_paragraph($self,$paragraph,$wrapped_mode);␊ |
367 | $wrapped_mode = 0;␊ |
368 | $paragraph="";␊ |
369 | my $t = $self->translate($title,␊ |
370 | $self->{ref},␊ |
371 | "Title $titlelevel1",␊ |
372 | "wrap" => 0);␊ |
373 | $self->pushline($titlelevel1.$titlespaces.$t.$titlelevel2."\n");␊ |
374 | $wrapped_mode = 1;␊ |
375 | } elsif ($asciidoc and␊ |
376 | ($line =~ m/^(\/{4,}|\+{4,}|-{4,}|\.{4,}|\*{4,}|_{4,}|={4,}|~{4,})$/)) {␊ |
377 | # Found one delimited block␊ |
378 | my $t = $line;␊ |
379 | $t =~ s/^(.).*$/$1/;␊ |
380 | my $type = "delimited block $t";␊ |
381 | if (defined $self->{verbatim} and ($self->{type} ne $type)) {␊ |
382 | $paragraph .= "$line\n";␊ |
383 | } else {␊ |
384 | do_paragraph($self,$paragraph,$wrapped_mode);␊ |
385 | if ( (defined $self->{type})␊ |
386 | and ($self->{type} eq $type)) {␊ |
387 | undef $self->{type};␊ |
388 | undef $self->{verbatim};␊ |
389 | $wrapped_mode = 1;␊ |
390 | } else {␊ |
391 | if ($t eq "\/") {␊ |
392 | # CommentBlock, should not be treated␊ |
393 | $self->{verbatim} = 2;␊ |
394 | } elsif ($t eq "+") {␊ |
395 | # PassthroughBlock␊ |
396 | $wrapped_mode = 0;␊ |
397 | $self->{verbatim} = 1;␊ |
398 | } elsif ($t eq "-") {␊ |
399 | # ListingBlock␊ |
400 | $wrapped_mode = 0;␊ |
401 | $self->{verbatim} = 1;␊ |
402 | } elsif ($t eq ".") {␊ |
403 | # LiteralBlock␊ |
404 | $wrapped_mode = 0;␊ |
405 | $self->{verbatim} = 1;␊ |
406 | } elsif ($t eq "*") {␊ |
407 | # SidebarBlock␊ |
408 | $wrapped_mode = 1;␊ |
409 | } elsif ($t eq "_") {␊ |
410 | # QuoteBlock␊ |
411 | if ( (defined $self->{type})␊ |
412 | and ($self->{type} eq "verse")) {␊ |
413 | $wrapped_mode = 0;␊ |
414 | $self->{verbatim} = 1;␊ |
415 | } else {␊ |
416 | $wrapped_mode = 1;␊ |
417 | }␊ |
418 | } elsif ($t eq "=") {␊ |
419 | # ExampleBlock␊ |
420 | $wrapped_mode = 1;␊ |
421 | } elsif ($t eq "~") {␊ |
422 | # Filter blocks, TBC: not translated␊ |
423 | $wrapped_mode = 0;␊ |
424 | $self->{verbatim} = 2;␊ |
425 | }␊ |
426 | $self->{type} = $type;␊ |
427 | }␊ |
428 | $paragraph="";␊ |
429 | $self->pushline($line."\n");␊ |
430 | }␊ |
431 | } elsif ($asciidoc and not defined $self->{verbatim} and␊ |
432 | ($line =~ m/^\[\[([^\]]*)\]\]$/)) {␊ |
433 | # Found BlockId␊ |
434 | do_paragraph($self,$paragraph,$wrapped_mode);␊ |
435 | $paragraph="";␊ |
436 | $wrapped_mode = 1;␊ |
437 | $self->pushline($line."\n");␊ |
438 | undef $self->{bullet};␊ |
439 | undef $self->{indent};␊ |
440 | } elsif ($asciidoc and not defined $self->{verbatim} and␊ |
441 | ($paragraph eq "") and␊ |
442 | ($line =~ m/^((?:NOTE|TIP|IMPORTANT|WARNING|CAUTION):\s+)(.*)$/)) {␊ |
443 | my $type = $1;␊ |
444 | my $text = $2;␊ |
445 | do_paragraph($self,$paragraph,$wrapped_mode);␊ |
446 | $paragraph=$text."\n";␊ |
447 | $wrapped_mode = 1;␊ |
448 | $self->pushline($type);␊ |
449 | undef $self->{bullet};␊ |
450 | undef $self->{indent};␊ |
451 | } elsif ($asciidoc and not defined $self->{verbatim} and␊ |
452 | ($line =~ m/^\[(NOTE|TIP|IMPORTANT|WARNING|CAUTION|verse|quote)\]$/)) {␊ |
453 | my $type = $1;␊ |
454 | do_paragraph($self,$paragraph,$wrapped_mode);␊ |
455 | $paragraph="";␊ |
456 | $wrapped_mode = 1;␊ |
457 | $self->pushline($line."\n");␊ |
458 | if ($type eq "verse") {␊ |
459 | $wrapped_mode = 0;␊ |
460 | }␊ |
461 | undef $self->{bullet};␊ |
462 | undef $self->{indent};␊ |
463 | } elsif ($asciidoc and not defined $self->{verbatim} and␊ |
464 | ($line =~ m/^\[(verse|quote), +(.*)\]$/)) {␊ |
465 | my $type = $1;␊ |
466 | my $arg = $2;␊ |
467 | do_paragraph($self,$paragraph,$wrapped_mode);␊ |
468 | $paragraph="";␊ |
469 | my $t = $self->translate($arg,␊ |
470 | $self->{ref},␊ |
471 | "$type",␊ |
472 | "wrap" => 0);␊ |
473 | $self->pushline("[$type, $t]\n");␊ |
474 | $wrapped_mode = 1;␊ |
475 | if ($type eq "verse") {␊ |
476 | $wrapped_mode = 0;␊ |
477 | }␊ |
478 | $self->{type} = $type;␊ |
479 | undef $self->{bullet};␊ |
480 | undef $self->{indent};␊ |
481 | } elsif ($asciidoc and not defined $self->{verbatim} and␊ |
482 | ($line =~ m/^\[icon="(.*)"\]$/)) {␊ |
483 | my $arg = $1;␊ |
484 | do_paragraph($self,$paragraph,$wrapped_mode);␊ |
485 | $paragraph="";␊ |
486 | my $t = $self->translate($arg,␊ |
487 | $self->{ref},␊ |
488 | "icon",␊ |
489 | "wrap" => 0);␊ |
490 | $self->pushline("[icon=\"$t\"]\n");␊ |
491 | $wrapped_mode = 1;␊ |
492 | undef $self->{bullet};␊ |
493 | undef $self->{indent};␊ |
494 | } elsif ($asciidoc and not defined $self->{verbatim} and␊ |
495 | ($line =~ m/^\[icons=None, +caption="(.*)"\]$/)) {␊ |
496 | my $arg = $1;␊ |
497 | do_paragraph($self,$paragraph,$wrapped_mode);␊ |
498 | $paragraph="";␊ |
499 | my $t = $self->translate($arg,␊ |
500 | $self->{ref},␊ |
501 | "caption",␊ |
502 | "wrap" => 0);␊ |
503 | $self->pushline("[icons=None, caption=\"$t\"]\n");␊ |
504 | $wrapped_mode = 1;␊ |
505 | undef $self->{bullet};␊ |
506 | undef $self->{indent};␊ |
507 | } elsif ($asciidoc and not defined $self->{verbatim} and␊ |
508 | ($line =~ m/^(\s*)([*_+`'#[:alnum:]].*)((?:::|;;|\?\?|:-)(?: *\\)?)$/)) {␊ |
509 | my $indent = $1;␊ |
510 | my $label = $2;␊ |
511 | my $labelend = $3;␊ |
512 | # Found labeled list␊ |
513 | do_paragraph($self,$paragraph,$wrapped_mode);␊ |
514 | $paragraph="";␊ |
515 | $wrapped_mode = 1;␊ |
516 | $self->{bullet} = "";␊ |
517 | $self->{indent} = $indent;␊ |
518 | my $t = $self->translate($label,␊ |
519 | $self->{ref},␊ |
520 | "Labeled list",␊ |
521 | "wrap" => 0);␊ |
522 | $self->pushline("$indent$t$labelend\n");␊ |
523 | } elsif ($asciidoc and not defined $self->{verbatim} and␊ |
524 | ($line =~ m/^(\s*)(\S.*)((?:::|;;)\s+)(.*)$/)) {␊ |
525 | my $indent = $1;␊ |
526 | my $label = $2;␊ |
527 | my $labelend = $3;␊ |
528 | my $labeltext = $4;␊ |
529 | # Found Horizontal Labeled Lists␊ |
530 | do_paragraph($self,$paragraph,$wrapped_mode);␊ |
531 | $paragraph=$labeltext."\n";␊ |
532 | $wrapped_mode = 1;␊ |
533 | $self->{bullet} = "";␊ |
534 | $self->{indent} = $indent;␊ |
535 | my $t = $self->translate($label,␊ |
536 | $self->{ref},␊ |
537 | "Labeled list",␊ |
538 | "wrap" => 0);␊ |
539 | $self->pushline("$indent$t$labelend");␊ |
540 | } elsif ($asciidoc and not defined $self->{verbatim} and␊ |
541 | ($line =~ m/^\:(\S.*?)(:\s*)(.*)$/)) {␊ |
542 | my $attrname = $1;␊ |
543 | my $attrsep = $2;␊ |
544 | my $attrvalue = $3;␊ |
545 | # Found a Attribute entry␊ |
546 | do_paragraph($self,$paragraph,$wrapped_mode);␊ |
547 | $paragraph="";␊ |
548 | $wrapped_mode = 1;␊ |
549 | undef $self->{bullet};␊ |
550 | undef $self->{indent};␊ |
551 | my $t = $self->translate($attrvalue,␊ |
552 | $self->{ref},␊ |
553 | "Attribute :$attrname:",␊ |
554 | "wrap" => 0);␊ |
555 | $self->pushline(":$attrname$attrsep$t\n");␊ |
556 | } elsif ($asciidoc and not defined $self->{verbatim} and␊ |
557 | ($line !~ m/^\.\./) and ($line =~ m/^\.(\S.*)$/)) {␊ |
558 | my $title = $1;␊ |
559 | # Found block title␊ |
560 | do_paragraph($self,$paragraph,$wrapped_mode);␊ |
561 | $paragraph="";␊ |
562 | $wrapped_mode = 1;␊ |
563 | undef $self->{bullet};␊ |
564 | undef $self->{indent};␊ |
565 | my $t = $self->translate($title,␊ |
566 | $self->{ref},␊ |
567 | "Block title",␊ |
568 | "wrap" => 0);␊ |
569 | $self->pushline(".$t\n");␊ |
570 | } elsif ($asciidoc and not defined $self->{verbatim} and␊ |
571 | ($line =~ m/^(\s*)((?:[-*o+]|(?:[0-9]+[.\)])|(?:[a-z][.\)])|\([0-9]+\)|\.|\.\.)\s+)(.*)$/)) {␊ |
572 | my $indent = $1||"";␊ |
573 | my $bullet = $2;␊ |
574 | my $text = $3;␊ |
575 | do_paragraph($self,$paragraph,$wrapped_mode);␊ |
576 | $paragraph = $text."\n";␊ |
577 | $self->{indent} = $indent;␊ |
578 | $self->{bullet} = $bullet;␊ |
579 | } elsif ($asciidoc and not defined $self->{verbatim} and␊ |
580 | ($line =~ m/^((?:<?[0-9]+)?> +)(.*)$/)) {␊ |
581 | my $bullet = $1;␊ |
582 | my $text = $2;␊ |
583 | do_paragraph($self,$paragraph,$wrapped_mode);␊ |
584 | $paragraph = $text."\n";␊ |
585 | $self->{indent} = "";␊ |
586 | $self->{bullet} = $bullet;␊ |
587 | } elsif ($asciidoc and not defined $self->{verbatim} and␊ |
588 | (defined $self->{bullet} and $line =~ m/^(\s+)(.*)$/)) {␊ |
589 | my $indent = $1;␊ |
590 | my $text = $2;␊ |
591 | if (not defined $self->{indent}) {␊ |
592 | $paragraph .= $text."\n";␊ |
593 | $self->{indent} = $indent;␊ |
594 | } elsif (length($paragraph) and (length($self->{bullet}) + length($self->{indent}) == length($indent))) {␊ |
595 | $paragraph .= $text."\n";␊ |
596 | } else {␊ |
597 | ␊ |
598 | do_paragraph($self,$paragraph,$wrapped_mode);␊ |
599 | $paragraph = $text."\n";␊ |
600 | $self->{indent} = $indent;␊ |
601 | $self->{bullet} = "";␊ |
602 | }␊ |
603 | } elsif ($markdown and␊ |
604 | (not defined($self->{verbatim})) and␊ |
605 | ($line =~ m/^(={4,}|-{4,})$/) and␊ |
606 | (defined($paragraph) )and␊ |
607 | ($paragraph =~ m/^[^\n]*\n$/s) and␊ |
608 | (length($paragraph) == (length($line)+1))) {␊ |
609 | # XXX: There can be any number of underlining according␊ |
610 | # to the documentation. This detection, which avoid␊ |
611 | # translating the formatting, is only supported if␊ |
612 | # the underlining has the same size as the header text.␊ |
613 | # Found title␊ |
614 | $wrapped_mode = 0;␊ |
615 | my $level = $line;␊ |
616 | $level =~ s/^(.).*$/$1/;␊ |
617 | my $t = $self->translate($paragraph,␊ |
618 | $self->{ref},␊ |
619 | "Title $level",␊ |
620 | "wrap" => 0);␊ |
621 | $self->pushline($t);␊ |
622 | $paragraph="";␊ |
623 | $wrapped_mode = 1;␊ |
624 | $self->pushline(($level x (length($t)-1))."\n");␊ |
625 | } elsif ($markdown and␊ |
626 | ($line =~ m/^(#{1,6})( +)(.*?)( +\1)?$/)) {␊ |
627 | my $titlelevel1 = $1;␊ |
628 | my $titlespaces = $2;␊ |
629 | my $title = $3;␊ |
630 | my $titlelevel2 = $4||"";␊ |
631 | # Found one line title␊ |
632 | do_paragraph($self,$paragraph,$wrapped_mode);␊ |
633 | $wrapped_mode = 0;␊ |
634 | $paragraph="";␊ |
635 | my $t = $self->translate($title,␊ |
636 | $self->{ref},␊ |
637 | "Title $titlelevel1",␊ |
638 | "wrap" => 0);␊ |
639 | $self->pushline($titlelevel1.$titlespaces.$t.$titlelevel2."\n");␊ |
640 | $wrapped_mode = 1;␊ |
641 | } elsif ($markdown and␊ |
642 | ($paragraph eq "") and␊ |
643 | ($line =~ /^((\*\s*){3,}|(-\s*){3,}|(_\s*){3,})$/)) {␊ |
644 | # Horizontal rule␊ |
645 | $wrapped_mode = 1;␊ |
646 | $self->pushline($line."\n");␊ |
647 | } elsif ($line =~ /^-- $/) {␊ |
648 | # Break paragraphs on email signature hint␊ |
649 | do_paragraph($self,$paragraph,$wrapped_mode);␊ |
650 | $paragraph="";␊ |
651 | $wrapped_mode = 1;␊ |
652 | $self->pushline($line."\n");␊ |
653 | } elsif ( $line =~ /^=+$/␊ |
654 | or $line =~ /^_+$/␊ |
655 | or $line =~ /^-+$/) {␊ |
656 | $wrapped_mode = 0;␊ |
657 | $paragraph .= $line."\n";␊ |
658 | do_paragraph($self,$paragraph,$wrapped_mode);␊ |
659 | $paragraph="";␊ |
660 | $wrapped_mode = 1;␊ |
661 | } elsif ($markdown and␊ |
662 | ( $line =~ /^\s*\[\[\!\S+\s*$/ # macro begin␊ |
663 | or $line =~ /^\s*"""\s*\]\]\s*$/)) { # """ textblock inside macro end␊ |
664 | # Avoid translating Markdown lines containing only markup␊ |
665 | do_paragraph($self,$paragraph,$wrapped_mode);␊ |
666 | $paragraph="";␊ |
667 | $wrapped_mode = 1;␊ |
668 | $self->pushline("$line\n");␊ |
669 | } elsif ($markdown and␊ |
670 | ( $line =~ /^#/ # headline␊ |
671 | or $line =~ /^\s*\[\[\!\S[^\]]*\]\]\s*$/)) { # sole macro␊ |
672 | # Preserve some Markdown markup as a single line␊ |
673 | do_paragraph($self,$paragraph,$wrapped_mode);␊ |
674 | $paragraph="$line\n";␊ |
675 | $wrapped_mode = 0;␊ |
676 | $end_of_paragraph = 1;␊ |
677 | } elsif ($markdown and␊ |
678 | ( $line =~ /^"""/)) { # """ textblock inside macro end␊ |
679 | # Markdown markup needing separation _before_ this line␊ |
680 | do_paragraph($self,$paragraph,$wrapped_mode);␊ |
681 | $paragraph="$line\n";␊ |
682 | $wrapped_mode = 1;␊ |
683 | } elsif ($tabs eq "split" and $line =~ m/\t/ and $paragraph !~ m/\t/s) {␊ |
684 | $wrapped_mode = 0;␊ |
685 | do_paragraph($self,$paragraph,$wrapped_mode);␊ |
686 | $paragraph = "$line\n";␊ |
687 | $wrapped_mode = 0;␊ |
688 | } elsif ($tabs eq "split" and $line !~ m/\t/ and $paragraph =~ m/\t/s) {␊ |
689 | do_paragraph($self,$paragraph,$wrapped_mode);␊ |
690 | $paragraph = "$line\n";␊ |
691 | $wrapped_mode = 1;␊ |
692 | } else {␊ |
693 | if ($line =~ /^\s/) {␊ |
694 | # A line starting by a space indicates a non-wrap␊ |
695 | # paragraph␊ |
696 | $wrapped_mode = 0;␊ |
697 | }␊ |
698 | if ($markdown and␊ |
699 | ( $line =~ /\S $/ # explicit newline␊ |
700 | or $line =~ /"""$/)) { # """ textblock inside macro begin␊ |
701 | # Markdown markup needing separation _after_ this line␊ |
702 | $end_of_paragraph = 1;␊ |
703 | } else {␊ |
704 | undef $self->{bullet};␊ |
705 | undef $self->{indent};␊ |
706 | }␊ |
707 | if ($fortunes) {␊ |
708 | $line =~ s/%%(.*)$//;␊ |
709 | }␊ |
710 | # TODO: comments␊ |
711 | $paragraph .= $line."\n";␊ |
712 | }␊ |
713 | # paragraphs starting by a bullet, or numbered␊ |
714 | # or paragraphs with a line containing many consecutive spaces␊ |
715 | # (more than 3)␊ |
716 | # are considered as verbatim paragraphs␊ |
717 | $wrapped_mode = 0 if ( $paragraph =~ m/^(\*|[0-9]+[.)] )/s␊ |
718 | or $paragraph =~ m/[ \t][ \t][ \t]/s);␊ |
719 | $wrapped_mode = 0 if ( $tabs eq "verbatim"␊ |
720 | and $paragraph =~ m/\t/s);␊ |
721 | if ($markdown) {␊ |
722 | # Some Markdown markup can (or might) not survive wrapping␊ |
723 | $wrapped_mode = 0 if (␊ |
724 | $paragraph =~ /^>/ms # blockquote␊ |
725 | or $paragraph =~ /^( {8}|\t)/ms # monospaced␊ |
726 | or $paragraph =~ /^\$(\S+[{}]\S*\s*)+/ms # Xapian macro␊ |
727 | or $paragraph =~ /<(?![a-z]+[:@])/ms # maybe html (tags but not wiki <URI>)␊ |
728 | or $paragraph =~ /^[^<]+>/ms # maybe html (tag with vertical space)␊ |
729 | or $paragraph =~ /\S $/ms # explicit newline␊ |
730 | or $paragraph =~ /\[\[\!\S[^\]]+$/ms # macro begin␊ |
731 | );␊ |
732 | }␊ |
733 | if ($end_of_paragraph) {␊ |
734 | do_paragraph($self,$paragraph,$wrapped_mode);␊ |
735 | $paragraph="";␊ |
736 | $wrapped_mode = 1;␊ |
737 | $end_of_paragraph = 0;␊ |
738 | }␊ |
739 | ($line,$ref)=$self->shiftline();␊ |
740 | }␊ |
741 | if (length $paragraph) {␊ |
742 | do_paragraph($self,$paragraph,$wrapped_mode);␊ |
743 | }␊ |
744 | }␊ |
745 | ␊ |
746 | sub do_paragraph {␊ |
747 | my ($self, $paragraph, $wrap) = (shift, shift, shift);␊ |
748 | my $type = shift || $self->{type} || "Plain text";␊ |
749 | return if ($paragraph eq "");␊ |
750 | ␊ |
751 | # DEBUG␊ |
752 | # my $b;␊ |
753 | # if (defined $self->{bullet}) {␊ |
754 | # $b = $self->{bullet};␊ |
755 | # } else {␊ |
756 | # $b = "UNDEF";␊ |
757 | # }␊ |
758 | # $type .= " verbatim: '".($self->{verbatim}||"NONE")."' bullet: '$b' indent: '".($self->{indent}||"NONE")."' type: '".($self->{type}||"NONE")."'";␊ |
759 | ␊ |
760 | if ($bullets and not $wrap and not defined $self->{verbatim}) {␊ |
761 | # Detect bullets␊ |
762 | # | * blah blah␊ |
763 | # |<spaces> blah␊ |
764 | # | ^-- aligned␊ |
765 | # <empty line>␊ |
766 | #␊ |
767 | # Other bullets supported:␊ |
768 | # - blah o blah + blah␊ |
769 | # 1. blah 1) blah (1) blah␊ |
770 | TEST_BULLET:␊ |
771 | if ($paragraph =~ m/^(\s*)((?:[-*o+]|([0-9]+[.\)])|\([0-9]+\))\s+)([^\n]*\n)(.*)$/s) {␊ |
772 | my $para = $5;␊ |
773 | my $bullet = $2;␊ |
774 | my $indent1 = $1;␊ |
775 | my $indent2 = "$1".(' ' x length $bullet);␊ |
776 | my $text = $4;␊ |
777 | while ($para !~ m/$indent2(?:[-*o+]|([0-9]+[.\)])|\([0-9]+\))\s+/␊ |
778 | and $para =~ s/^$indent2(\S[^\n]*\n)//s) {␊ |
779 | $text .= $1;␊ |
780 | }␊ |
781 | # TODO: detect if a line starts with the same bullet␊ |
782 | if ($text !~ m/\S[ \t][ \t][ \t]+\S/s) {␊ |
783 | my $bullet_regex = quotemeta($indent1.$bullet);␊ |
784 | $bullet_regex =~ s/[0-9]+/\\d\+/;␊ |
785 | if ($para eq '' or $para =~ m/^$bullet_regex\S/s) {␊ |
786 | my $trans = $self->translate($text,␊ |
787 | $self->{ref},␊ |
788 | "Bullet: '$indent1$bullet'",␊ |
789 | "wrap" => 1,␊ |
790 | "wrapcol" => - (length $indent2));␊ |
791 | $trans =~ s/^/$indent1$bullet/s;␊ |
792 | $trans =~ s/\n(.)/\n$indent2$1/sg;␊ |
793 | $self->pushline( $trans."\n" );␊ |
794 | if ($para eq '') {␊ |
795 | return;␊ |
796 | } else {␊ |
797 | # Another bullet␊ |
798 | $paragraph = $para;␊ |
799 | goto TEST_BULLET;␊ |
800 | }␊ |
801 | }␊ |
802 | }␊ |
803 | }␊ |
804 | }␊ |
805 | ␊ |
806 | my $end = "";␊ |
807 | if ($wrap) {␊ |
808 | $paragraph =~ s/^(.*?)(\n*)$/$1/s;␊ |
809 | $end = $2 || "";␊ |
810 | }␊ |
811 | my $t = $self->translate($paragraph,␊ |
812 | $self->{ref},␊ |
813 | $type,␊ |
814 | "wrap" => $wrap);␊ |
815 | if (defined $self->{bullet}) {␊ |
816 | my $bullet = $self->{bullet};␊ |
817 | my $indent1 = $self->{indent};␊ |
818 | my $indent2 = $indent1.(' ' x length($bullet));␊ |
819 | $t =~ s/^/$indent1$bullet/s;␊ |
820 | $t =~ s/\n(.)/\n$indent2$1/sg;␊ |
821 | }␊ |
822 | $self->pushline( $t.$end );␊ |
823 | }␊ |
824 | ␊ |
825 | 1;␊ |
826 | ␊ |
827 | =head1 STATUS OF THIS MODULE␊ |
828 | ␊ |
829 | Tested successfully on simple text files and NEWS.Debian files.␊ |
830 | ␊ |
831 | =head1 AUTHORS␊ |
832 | ␊ |
833 | Nicolas François <nicolas.francois@centraliens.net>␊ |
834 | ␊ |
835 | =head1 COPYRIGHT AND LICENSE␊ |
836 | ␊ |
837 | Copyright 2005-2008 by Nicolas FRANÇOIS <nicolas.francois@centraliens.net>.␊ |
838 | ␊ |
839 | This program is free software; you may redistribute it and/or modify it␊ |
840 | under the terms of GPL (see the COPYING file).␊ |
841 | |