1 | #!/usr/bin/perl -w␊ |
2 | ␊ |
3 | # Po4a::Text.pm␊ |
4 | #␊ |
5 | # extract and translate translatable strings from a text documents␊ |
6 | #␊ |
7 | # This program is free software; you can redistribute it and/or modify␊ |
8 | # it under the terms of the GNU General Public License as published by␊ |
9 | # the Free Software Foundation; either version 2 of the License, or␊ |
10 | # (at your option) any later version.␊ |
11 | #␊ |
12 | # This program is distributed in the hope that it will be useful,␊ |
13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of␊ |
14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the␊ |
15 | # GNU General Public License for more details.␊ |
16 | #␊ |
17 | # You should have received a copy of the GNU General Public License␊ |
18 | # along with this program; if not, write to the Free Software␊ |
19 | # Foundation, Inc.,␊ |
20 | # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA␊ |
21 | #␊ |
22 | ########################################################################␊ |
23 | ␊ |
24 | =encoding UTF-8␊ |
25 | ␊ |
26 | =head1 NAME␊ |
27 | ␊ |
28 | Locale::Po4a::Text - convert text documents from/to PO files␊ |
29 | ␊ |
30 | =head1 DESCRIPTION␊ |
31 | ␊ |
32 | The po4a (PO for anything) project goal is to ease translations (and more␊ |
33 | interestingly, the maintenance of translations) using gettext tools on␊ |
34 | areas where they were not expected like documentation.␊ |
35 | ␊ |
36 | Locale::Po4a::Text is a module to help the translation of text documents into␊ |
37 | other [human] languages.␊ |
38 | ␊ |
39 | Paragraphs are split on empty lines (or lines containing only spaces or␊ |
40 | tabulations).␊ |
41 | ␊ |
42 | If a paragraph contains a line starting by a space (or tabulation), this␊ |
43 | paragraph won't be rewrapped.␊ |
44 | ␊ |
45 | =cut␊ |
46 | ␊ |
47 | package Locale::Po4a::Text;␊ |
48 | ␊ |
49 | use 5.006;␊ |
50 | use strict;␊ |
51 | use warnings;␊ |
52 | ␊ |
53 | require Exporter;␊ |
54 | use vars qw(@ISA @EXPORT);␊ |
55 | @ISA = qw(Locale::Po4a::TransTractor);␊ |
56 | @EXPORT = qw();␊ |
57 | ␊ |
58 | use Locale::Po4a::TransTractor;␊ |
59 | use Locale::Po4a::Common;␊ |
60 | ␊ |
61 | =head1 OPTIONS ACCEPTED BY THIS MODULE␊ |
62 | ␊ |
63 | These are this module's particular options:␊ |
64 | ␊ |
65 | =over␊ |
66 | ␊ |
67 | =item B<nobullets>␊ |
68 | ␊ |
69 | Deactivate detection of bullets.␊ |
70 | ␊ |
71 | By default, when a bullet is detected, the bullet paragraph is not considered␊ |
72 | as a verbatim paragraph (with the no-wrap flag in the PO file), but the module␊ |
73 | rewraps this paragraph in the generated PO file and in the translation.␊ |
74 | ␊ |
75 | =cut␊ |
76 | ␊ |
77 | my $bullets = 1;␊ |
78 | ␊ |
79 | =item B<tabs=>I<mode>␊ |
80 | ␊ |
81 | Specify how tabulations shall be handled. The I<mode> can be any of:␊ |
82 | ␊ |
83 | =over␊ |
84 | ␊ |
85 | =item B<split>␊ |
86 | ␊ |
87 | Lines with tabulations introduce breaks in the current paragraph.␊ |
88 | ␊ |
89 | =item B<verbatim>␊ |
90 | ␊ |
91 | Paragraph containing tabulations will not be re-wrapped.␊ |
92 | ␊ |
93 | =back␊ |
94 | ␊ |
95 | By default, tabulations are considered as spaces.␊ |
96 | ␊ |
97 | =cut␊ |
98 | ␊ |
99 | my $tabs = "";␊ |
100 | ␊ |
101 | =item B<breaks=>I<regex>␊ |
102 | ␊ |
103 | A regular expression matching lines which introduce breaks.␊ |
104 | The regular expression will be anchored so that the whole line must match.␊ |
105 | ␊ |
106 | =cut␊ |
107 | ␊ |
108 | my $breaks;␊ |
109 | ␊ |
110 | =item B<debianchangelog>␊ |
111 | ␊ |
112 | Handle the header and footer of␊ |
113 | released versions, which only contain non translatable informations.␊ |
114 | ␊ |
115 | =cut␊ |
116 | ␊ |
117 | my $debianchangelog = 0;␊ |
118 | ␊ |
119 | =item B<fortunes>␊ |
120 | ␊ |
121 | Handle the fortunes format, which separate fortunes with a line which␊ |
122 | consists in '%' or '%%', and use '%%' as the beginning of a comment.␊ |
123 | ␊ |
124 | =cut␊ |
125 | ␊ |
126 | my $fortunes = 0;␊ |
127 | ␊ |
128 | =item B<markdown>␊ |
129 | ␊ |
130 | Handle some special markup in Markdown-formatted texts.␊ |
131 | ␊ |
132 | =cut␊ |
133 | ␊ |
134 | my $markdown = 0;␊ |
135 | ␊ |
136 | =item B<asciidoc>␊ |
137 | ␊ |
138 | Handle documents in the AsciiDoc format.␊ |
139 | ␊ |
140 | =cut␊ |
141 | ␊ |
142 | my $asciidoc = 0;␊ |
143 | ␊ |
144 | =item B<control>[B<=>I<taglist>]␊ |
145 | ␊ |
146 | Handle control files.␊ |
147 | A comma-separated list of tags to be translated can be provided.␊ |
148 | ␊ |
149 | =cut␊ |
150 | ␊ |
151 | my %control = ();␊ |
152 | ␊ |
153 | my $parse_func = \&parse_fallback;␊ |
154 | ␊ |
155 | my @comments = ();␊ |
156 | ␊ |
157 | =back␊ |
158 | ␊ |
159 | =cut␊ |
160 | ␊ |
161 | sub initialize {␊ |
162 | my $self = shift;␊ |
163 | my %options = @_;␊ |
164 | ␊ |
165 | $self->{options}{'control'} = "";␊ |
166 | $self->{options}{'asciidoc'} = 1;␊ |
167 | $self->{options}{'breaks'} = 1;␊ |
168 | $self->{options}{'debianchangelog'} = 1;␊ |
169 | $self->{options}{'debug'} = 1;␊ |
170 | $self->{options}{'fortunes'} = 1;␊ |
171 | $self->{options}{'markdown'} = 1;␊ |
172 | $self->{options}{'nobullets'} = 1;␊ |
173 | $self->{options}{'tabs'} = 1;␊ |
174 | $self->{options}{'verbose'} = 1;␊ |
175 | ␊ |
176 | foreach my $opt (keys %options) {␊ |
177 | die wrap_mod("po4a::text",␊ |
178 | dgettext("po4a", "Unknown option: %s"), $opt)␊ |
179 | unless exists $self->{options}{$opt};␊ |
180 | $self->{options}{$opt} = $options{$opt};␊ |
181 | }␊ |
182 | ␊ |
183 | if (defined $options{'nobullets'}) {␊ |
184 | $bullets = 0;␊ |
185 | }␊ |
186 | ␊ |
187 | if (defined $options{'tabs'}) {␊ |
188 | $tabs = $options{'tabs'};␊ |
189 | }␊ |
190 | ␊ |
191 | if (defined $options{'breaks'}) {␊ |
192 | $breaks = $options{'breaks'};␊ |
193 | }␊ |
194 | ␊ |
195 | if (defined $options{'debianchangelog'}) {␊ |
196 | $parse_func = \&parse_debianchangelog;␊ |
197 | }␊ |
198 | ␊ |
199 | if (defined $options{'fortunes'}) {␊ |
200 | $parse_func = \&parse_fortunes;␊ |
201 | }␊ |
202 | ␊ |
203 | if (defined $options{'markdown'}) {␊ |
204 | $parse_func = \&parse_markdown;␊ |
205 | $markdown=1;␊ |
206 | }␊ |
207 | ␊ |
208 | if (defined $options{'asciidoc'}) {␊ |
209 | $parse_func = \&parse_asciidoc;␊ |
210 | $asciidoc=1;␊ |
211 | }␊ |
212 | ␊ |
213 | if (defined $options{'control'}) {␊ |
214 | $parse_func = \&parse_control;␊ |
215 | if ($options{'control'} eq "1") {␊ |
216 | $control{''}=1;␊ |
217 | } else {␊ |
218 | foreach my $tag (split(',',$options{'control'})) {␊ |
219 | $control{$tag}=1;␊ |
220 | }␊ |
221 | }␊ |
222 | }␊ |
223 | }␊ |
224 | ␊ |
225 | sub parse_fallback {␊ |
226 | my ($self,$line,$ref,$paragraph,$wrapped_mode,$expect_header,$end_of_paragraph) = @_;␊ |
227 | if ( ($line =~ /^\s*$/)␊ |
228 | or ( defined $breaks␊ |
229 | and $line =~ m/^$breaks$/)) {␊ |
230 | # Break paragraphs on lines containing only spaces␊ |
231 | do_paragraph($self,$paragraph,$wrapped_mode);␊ |
232 | $paragraph="";␊ |
233 | $wrapped_mode = 1 unless defined($self->{verbatim});␊ |
234 | $self->pushline($line."\n");␊ |
235 | undef $self->{controlkey};␊ |
236 | } elsif ($line =~ /^-- $/) {␊ |
237 | # Break paragraphs on email signature hint␊ |
238 | do_paragraph($self,$paragraph,$wrapped_mode);␊ |
239 | $paragraph="";␊ |
240 | $wrapped_mode = 1;␊ |
241 | $self->pushline($line."\n");␊ |
242 | } elsif ( $line =~ /^=+$/␊ |
243 | or $line =~ /^_+$/␊ |
244 | or $line =~ /^-+$/) {␊ |
245 | $wrapped_mode = 0;␊ |
246 | $paragraph .= $line."\n";␊ |
247 | do_paragraph($self,$paragraph,$wrapped_mode);␊ |
248 | $paragraph="";␊ |
249 | $wrapped_mode = 1;␊ |
250 | } elsif ($tabs eq "split" and $line =~ m/\t/ and $paragraph !~ m/\t/s) {␊ |
251 | $wrapped_mode = 0;␊ |
252 | do_paragraph($self,$paragraph,$wrapped_mode);␊ |
253 | $paragraph = "$line\n";␊ |
254 | $wrapped_mode = 0;␊ |
255 | } elsif ($tabs eq "split" and $line !~ m/\t/ and $paragraph =~ m/\t/s) {␊ |
256 | do_paragraph($self,$paragraph,$wrapped_mode);␊ |
257 | $paragraph = "$line\n";␊ |
258 | $wrapped_mode = 1;␊ |
259 | } else {␊ |
260 | if ($line =~ /^\s/) {␊ |
261 | # A line starting by a space indicates a non-wrap␊ |
262 | # paragraph␊ |
263 | $wrapped_mode = 0;␊ |
264 | }␊ |
265 | if ($markdown and␊ |
266 | ( $line =~ /\S $/ # explicit newline␊ |
267 | or $line =~ /"""$/)) { # """ textblock inside macro begin␊ |
268 | # Markdown markup needing separation _after_ this line␊ |
269 | $end_of_paragraph = 1;␊ |
270 | } else {␊ |
271 | undef $self->{bullet};␊ |
272 | undef $self->{indent};␊ |
273 | }␊ |
274 | # TODO: comments␊ |
275 | $paragraph .= $line."\n";␊ |
276 | }␊ |
277 | return ($paragraph,$wrapped_mode,$expect_header,$end_of_paragraph);␊ |
278 | }␊ |
279 | ␊ |
280 | sub parse_debianchangelog {␊ |
281 | my ($self,$line,$ref,$paragraph,$wrapped_mode,$expect_header,$end_of_paragraph) = @_;␊ |
282 | if ($expect_header and␊ |
283 | $line =~ /^(\w[-+0-9a-z.]*)\ \(([^\(\) \t]+)\) # src, version␊ |
284 | \s+([-+0-9a-z.]+); # distribution␊ |
285 | \s*urgency\s*\=\s*(.*\S)\s*$/ix) { #␊ |
286 | do_paragraph($self,$paragraph,$wrapped_mode);␊ |
287 | $paragraph="";␊ |
288 | $self->pushline("$line\n");␊ |
289 | $expect_header=0;␊ |
290 | } elsif ($line =~ m/^ \-\- (.*) <(.*)> ((\w+\,\s*)?\d{1,2}\s+\w+\s+\d{4}\s+\d{1,2}:\d\d:\d\d\s+[-+]\d{4}(\s+\([^\\\(\)]\))?)$/) {␊ |
291 | # Found trailer␊ |
292 | do_paragraph($self,$paragraph,$wrapped_mode);␊ |
293 | $paragraph="";␊ |
294 | $self->pushline("$line\n");␊ |
295 | $expect_header=1;␊ |
296 | } else {␊ |
297 | return parse_fallback($self,$line,$ref,$paragraph,$wrapped_mode,$expect_header,$end_of_paragraph);␊ |
298 | }␊ |
299 | return ($paragraph,$wrapped_mode,$expect_header,$end_of_paragraph);␊ |
300 | }␊ |
301 | ␊ |
302 | sub parse_fortunes {␊ |
303 | my ($self,$line,$ref,$paragraph,$wrapped_mode,$expect_header,$end_of_paragraph) = @_;␊ |
304 | if ($line =~ m/^%%?\s*$/) {␊ |
305 | # Found end of fortune␊ |
306 | do_paragraph($self,$paragraph,$wrapped_mode);␊ |
307 | $self->pushline("\n") unless ( $wrapped_mode == 0␊ |
308 | or $paragraph eq "");␊ |
309 | $paragraph="";␊ |
310 | $wrapped_mode = 1;␊ |
311 | $self->pushline("$line\n");␊ |
312 | } else {␊ |
313 | $line =~ s/%%(.*)$//;␊ |
314 | }␊ |
315 | return ($paragraph,$wrapped_mode,$expect_header,$end_of_paragraph);␊ |
316 | }␊ |
317 | ␊ |
318 | sub parse_control {␊ |
319 | my ($self,$line,$ref,$paragraph,$wrapped_mode,$expect_header,$end_of_paragraph) = @_;␊ |
320 | if ($line =~ m/^([^ :]*): *(.*)$/) {␊ |
321 | warn "Unrecognized section: '$paragraph'\n"␊ |
322 | unless $paragraph eq "";␊ |
323 | my $tag = $1;␊ |
324 | my $val = $2;␊ |
325 | my $t;␊ |
326 | if ($control{''} or $control{$tag}) {␊ |
327 | $t = $self->translate($val,␊ |
328 | $self->{ref},␊ |
329 | $tag.(defined $self->{controlkey}?", ".$self->{controlkey}:""),␊ |
330 | "wrap" => 0);␊ |
331 | } else {␊ |
332 | $t = $val;␊ |
333 | }␊ |
334 | if (not defined $self->{controlkey}) {␊ |
335 | $self->{controlkey} = "$tag: $val";␊ |
336 | }␊ |
337 | $self->pushline("$tag: $t\n");␊ |
338 | $paragraph="";␊ |
339 | $wrapped_mode = 1;␊ |
340 | $self->{bullet} = "";␊ |
341 | $self->{indent} = " ";␊ |
342 | } elsif ($line eq " .") {␊ |
343 | do_paragraph($self,$paragraph,$wrapped_mode,␊ |
344 | "Long Description".(defined $self->{controlkey}?", ".$self->{controlkey}:""));␊ |
345 | $paragraph="";␊ |
346 | $self->pushline($line."\n");␊ |
347 | $self->{bullet} = "";␊ |
348 | $self->{indent} = " ";␊ |
349 | } elsif ($line =~ m/^ Link: +(.*)$/) {␊ |
350 | do_paragraph($self,$paragraph,$wrapped_mode,␊ |
351 | "Long Description".(defined $self->{controlkey}?", ".$self->{controlkey}:""));␊ |
352 | my $link=$1;␊ |
353 | my $t1 = $self->translate("Link: ",␊ |
354 | $self->{ref},␊ |
355 | "Link",␊ |
356 | "wrap" => 0);␊ |
357 | my $t2 = $self->translate($link,␊ |
358 | $self->{ref},␊ |
359 | "Link".(defined $self->{controlkey}?", ".$self->{controlkey}:""),␊ |
360 | "wrap" => 0);␊ |
361 | $self->pushline(" $t1$t2\n");␊ |
362 | $paragraph="";␊ |
363 | } elsif (defined $self->{indent} and␊ |
364 | $line =~ m/^$self->{indent}\S/) {␊ |
365 | $paragraph .= $line."\n";␊ |
366 | $self->{type} = "Long Description".(defined $self->{controlkey}?", ".$self->{controlkey}:"");␊ |
367 | } else {␊ |
368 | return parse_fallback($self,$line,$ref,$paragraph,$wrapped_mode,$expect_header,$end_of_paragraph);␊ |
369 | }␊ |
370 | return ($paragraph,$wrapped_mode,$expect_header,$end_of_paragraph);␊ |
371 | }␊ |
372 | ␊ |
373 | my $asciidoc_RE_SECTION_TEMPLATES = "sect1|sect2|sect3|sect4|preface|colophon|dedication|synopsis|index";␊ |
374 | my $asciidoc_RE_STYLE_ADMONITION = "TIP|NOTE|IMPORTANT|WARNING|CAUTION";␊ |
375 | my $asciidoc_RE_STYLE_PARAGRAPH = "normal|literal|verse|quote|listing|abstract|partintro|comment|example|sidebar|source|music|latex|graphviz";␊ |
376 | my $asciidoc_RE_STYLE_NUMBERING = "arabic|loweralpha|upperalpha|lowerroman|upperroman";␊ |
377 | my $asciidoc_RE_STYLE_LIST = "appendix|horizontal|qanda|glossary|bibliography";␊ |
378 | my $asciidoc_RE_STYLES = "$asciidoc_RE_SECTION_TEMPLATES|$asciidoc_RE_STYLE_ADMONITION|$asciidoc_RE_STYLE_PARAGRAPH|$asciidoc_RE_STYLE_NUMBERING|$asciidoc_RE_STYLE_LIST|float";␊ |
379 | ␊ |
380 | BEGIN {␊ |
381 | my $UnicodeGCString_available = 0;␊ |
382 | $UnicodeGCString_available = 1 if (eval { require Unicode::GCString });␊ |
383 | eval {␊ |
384 | sub columns($$$) {␊ |
385 | my $text = shift;␊ |
386 | my $encoder = shift;␊ |
387 | $text = $encoder->decode($text) if (defined($encoder) && $encoder->name ne "ascii");␊ |
388 | if ($UnicodeGCString_available) {␊ |
389 | return Unicode::GCString->new($text)->columns();␊ |
390 | } else {␊ |
391 | $text =~ s/\n$//s;␊ |
392 | return length($text) if !(defined($encoder) && $encoder->name ne "ascii");␊ |
393 | die wrap_mod("po4a::text",␊ |
394 | dgettext("po4a", "Detection of two line titles failed at %s\nInstall the Unicode::GCString module!"), shift)␊ |
395 | }␊ |
396 | }␊ |
397 | };␊ |
398 | }␊ |
399 | ␊ |
400 | sub parse_asciidoc {␊ |
401 | my ($self,$line,$ref,$paragraph,$wrapped_mode,$expect_header,$end_of_paragraph) = @_;␊ |
402 | if ((defined $self->{verbatim}) and ($self->{verbatim} == 3)) {␊ |
403 | # Untranslated blocks␊ |
404 | $self->pushline($line."\n");␊ |
405 | if ($line =~ m/^~{4,}$/) {␊ |
406 | undef $self->{verbatim};␊ |
407 | undef $self->{type};␊ |
408 | $wrapped_mode = 1;␊ |
409 | }␊ |
410 | } elsif ((defined $self->{verbatim}) and ($self->{verbatim} == 2)) {␊ |
411 | # CommentBlock␊ |
412 | if ($line =~ m/^\/{4,}$/) {␊ |
413 | undef $self->{verbatim};␊ |
414 | undef $self->{type};␊ |
415 | $wrapped_mode = 1;␊ |
416 | } else {␊ |
417 | push @comments, $line;␊ |
418 | }␊ |
419 | } elsif ((not defined($self->{verbatim})) and ($line =~ m/^(\+|--)$/)) {␊ |
420 | # List Item Continuation or List Block␊ |
421 | do_paragraph($self,$paragraph,$wrapped_mode);␊ |
422 | $paragraph="";␊ |
423 | $self->pushline($line."\n");␊ |
424 | } elsif ((not defined($self->{verbatim})) and␊ |
425 | ($line =~ m/^(={2,}|-{2,}|~{2,}|\^{2,}|\+{2,})$/) and␊ |
426 | (defined($paragraph) )and␊ |
427 | ($paragraph =~ m/^[^\n]*\n$/s) and␊ |
428 | (columns($paragraph, $self->{TT}{po_in}{encoder}, $ref) == (length($line)))) {␊ |
429 | # Found title␊ |
430 | $wrapped_mode = 0;␊ |
431 | my $level = $line;␊ |
432 | $level =~ s/^(.).*$/$1/;␊ |
433 | $paragraph =~ s/\n$//s;␊ |
434 | my $t = $self->translate($paragraph,␊ |
435 | $self->{ref},␊ |
436 | "Title $level",␊ |
437 | "comment" => join("\n", @comments),␊ |
438 | "wrap" => 0);␊ |
439 | $self->pushline($t."\n");␊ |
440 | $paragraph="";␊ |
441 | @comments=();␊ |
442 | $wrapped_mode = 1;␊ |
443 | $self->pushline(($level x (columns($t, $self->{TT}{po_in}{encoder}, $ref)))."\n");␊ |
444 | } elsif ($line =~ m/^(={1,5})( +)(.*?)( +\1)?$/) {␊ |
445 | my $titlelevel1 = $1;␊ |
446 | my $titlespaces = $2;␊ |
447 | my $title = $3;␊ |
448 | my $titlelevel2 = $4||"";␊ |
449 | # Found one line title␊ |
450 | do_paragraph($self,$paragraph,$wrapped_mode);␊ |
451 | $wrapped_mode = 0;␊ |
452 | $paragraph="";␊ |
453 | my $t = $self->translate($title,␊ |
454 | $self->{ref},␊ |
455 | "Title $titlelevel1",␊ |
456 | "comment" => join("\n", @comments),␊ |
457 | "wrap" => 0);␊ |
458 | $self->pushline($titlelevel1.$titlespaces.$t.$titlelevel2."\n");␊ |
459 | @comments=();␊ |
460 | $wrapped_mode = 1;␊ |
461 | } elsif ($line =~ m/^(\/{4,}|\+{4,}|-{4,}|\.{4,}|\*{4,}|_{4,}|={4,}|~{4,}|\|={4,})$/) {␊ |
462 | # Found one delimited block␊ |
463 | my $t = $line;␊ |
464 | $t =~ s/^(.).*$/$1/;␊ |
465 | my $type = "delimited block $t";␊ |
466 | if (defined $self->{verbatim} and ($self->{type} ne $type)) {␊ |
467 | $paragraph .= "$line\n";␊ |
468 | } else {␊ |
469 | do_paragraph($self,$paragraph,$wrapped_mode);␊ |
470 | if ( (defined $self->{type})␊ |
471 | and ($self->{type} eq $type)) {␊ |
472 | undef $self->{type};␊ |
473 | undef $self->{verbatim};␊ |
474 | $wrapped_mode = 1;␊ |
475 | } else {␊ |
476 | if ($t eq "\/") {␊ |
477 | # CommentBlock, should not be treated␊ |
478 | $self->{verbatim} = 2;␊ |
479 | } elsif ($t eq "+") {␊ |
480 | # PassthroughBlock␊ |
481 | $wrapped_mode = 0;␊ |
482 | $self->{verbatim} = 1;␊ |
483 | } elsif ($t eq "-" or $t eq "|") {␊ |
484 | # ListingBlock␊ |
485 | $wrapped_mode = 0;␊ |
486 | $self->{verbatim} = 1;␊ |
487 | } elsif ($t eq ".") {␊ |
488 | # LiteralBlock␊ |
489 | $wrapped_mode = 0;␊ |
490 | $self->{verbatim} = 1;␊ |
491 | } elsif ($t eq "*") {␊ |
492 | # SidebarBlock␊ |
493 | $wrapped_mode = 1;␊ |
494 | } elsif ($t eq "_") {␊ |
495 | # QuoteBlock␊ |
496 | if ( (defined $self->{type})␊ |
497 | and ($self->{type} eq "verse")) {␊ |
498 | $wrapped_mode = 0;␊ |
499 | $self->{verbatim} = 1;␊ |
500 | } else {␊ |
501 | $wrapped_mode = 1;␊ |
502 | }␊ |
503 | } elsif ($t eq "=") {␊ |
504 | # ExampleBlock␊ |
505 | $wrapped_mode = 1;␊ |
506 | } elsif ($t eq "~") {␊ |
507 | # Filter blocks, TBC: not translated␊ |
508 | $wrapped_mode = 0;␊ |
509 | $self->{verbatim} = 3;␊ |
510 | }␊ |
511 | $self->{type} = $type;␊ |
512 | }␊ |
513 | $paragraph="";␊ |
514 | $self->pushline($line."\n") unless defined($self->{verbatim}) && $self->{verbatim} == 2;␊ |
515 | }␊ |
516 | } elsif ((not defined($self->{verbatim})) and ($line =~ m/^\/\/(.*)/)) {␊ |
517 | # Comment line␊ |
518 | push @comments, $1;␊ |
519 | } elsif (not defined $self->{verbatim} and␊ |
520 | ($line =~ m/^\[\[([^\]]*)\]\]$/)) {␊ |
521 | # Found BlockId␊ |
522 | do_paragraph($self,$paragraph,$wrapped_mode);␊ |
523 | $paragraph="";␊ |
524 | $wrapped_mode = 1;␊ |
525 | $self->pushline($line."\n");␊ |
526 | undef $self->{bullet};␊ |
527 | undef $self->{indent};␊ |
528 | } elsif (not defined $self->{verbatim} and␊ |
529 | ($paragraph eq "") and␊ |
530 | ($line =~ m/^((?:$asciidoc_RE_STYLE_ADMONITION):\s+)(.*)$/)) {␊ |
531 | my $type = $1;␊ |
532 | my $text = $2;␊ |
533 | do_paragraph($self,$paragraph,$wrapped_mode);␊ |
534 | $paragraph=$text."\n";␊ |
535 | $wrapped_mode = 1;␊ |
536 | $self->pushline($type);␊ |
537 | undef $self->{bullet};␊ |
538 | undef $self->{indent};␊ |
539 | } elsif (not defined $self->{verbatim} and␊ |
540 | ($line =~ m/^\[($asciidoc_RE_STYLES)\]$/)) {␊ |
541 | my $type = $1;␊ |
542 | do_paragraph($self,$paragraph,$wrapped_mode);␊ |
543 | $paragraph="";␊ |
544 | $wrapped_mode = 1;␊ |
545 | $self->pushline($line."\n");␊ |
546 | if ($type eq "verse") {␊ |
547 | $wrapped_mode = 0;␊ |
548 | }␊ |
549 | undef $self->{bullet};␊ |
550 | undef $self->{indent};␊ |
551 | } elsif (not defined $self->{verbatim} and␊ |
552 | ($line =~ m/^\[(['"]?)(verse|quote)\1, +(.*)\]$/)) {␊ |
553 | my $quote = $1 || '';␊ |
554 | my $type = $2;␊ |
555 | my $arg = $3;␊ |
556 | do_paragraph($self,$paragraph,$wrapped_mode);␊ |
557 | $paragraph="";␊ |
558 | my $t = $self->translate($arg,␊ |
559 | $self->{ref},␊ |
560 | "$type",␊ |
561 | "comment" => join("\n", @comments),␊ |
562 | "wrap" => 0);␊ |
563 | $self->pushline("[$quote$type$quote, $t]\n");␊ |
564 | @comments=();␊ |
565 | $wrapped_mode = 1;␊ |
566 | if ($type eq "verse") {␊ |
567 | $wrapped_mode = 0;␊ |
568 | }␊ |
569 | $self->{type} = $type;␊ |
570 | undef $self->{bullet};␊ |
571 | undef $self->{indent};␊ |
572 | } elsif (not defined $self->{verbatim} and␊ |
573 | ($line =~ m/^\[icon="(.*)"\]$/)) {␊ |
574 | my $arg = $1;␊ |
575 | do_paragraph($self,$paragraph,$wrapped_mode);␊ |
576 | $paragraph="";␊ |
577 | my $t = $self->translate($arg,␊ |
578 | $self->{ref},␊ |
579 | "icon",␊ |
580 | "comment" => join("\n", @comments),␊ |
581 | "wrap" => 0);␊ |
582 | $self->pushline("[icon=\"$t\"]\n");␊ |
583 | @comments=();␊ |
584 | $wrapped_mode = 1;␊ |
585 | undef $self->{bullet};␊ |
586 | undef $self->{indent};␊ |
587 | } elsif (not defined $self->{verbatim} and␊ |
588 | ($line =~ m/^\[icons=None, +caption="(.*)"\]$/)) {␊ |
589 | my $arg = $1;␊ |
590 | do_paragraph($self,$paragraph,$wrapped_mode);␊ |
591 | $paragraph="";␊ |
592 | my $t = $self->translate($arg,␊ |
593 | $self->{ref},␊ |
594 | "caption",␊ |
595 | "comment" => join("\n", @comments),␊ |
596 | "wrap" => 0);␊ |
597 | $self->pushline("[icons=None, caption=\"$t\"]\n");␊ |
598 | @comments=();␊ |
599 | $wrapped_mode = 1;␊ |
600 | undef $self->{bullet};␊ |
601 | undef $self->{indent};␊ |
602 | } elsif (not defined $self->{verbatim} and␊ |
603 | ($line =~ m/^(\s*)([*_+`'#[:alnum:]].*)((?:::|;;|\?\?|:-)(?: *\\)?)$/)) {␊ |
604 | my $indent = $1;␊ |
605 | my $label = $2;␊ |
606 | my $labelend = $3;␊ |
607 | # Found labeled list␊ |
608 | do_paragraph($self,$paragraph,$wrapped_mode);␊ |
609 | $paragraph="";␊ |
610 | $wrapped_mode = 1;␊ |
611 | $self->{bullet} = "";␊ |
612 | $self->{indent} = $indent;␊ |
613 | my $t = $self->translate($label,␊ |
614 | $self->{ref},␊ |
615 | "Labeled list",␊ |
616 | "comment" => join("\n", @comments),␊ |
617 | "wrap" => 0);␊ |
618 | $self->pushline("$indent$t$labelend\n");␊ |
619 | @comments=();␊ |
620 | } elsif (not defined $self->{verbatim} and␊ |
621 | ($line =~ m/^(\s*)(\S.*)((?:::|;;)\s+)(.*)$/)) {␊ |
622 | my $indent = $1;␊ |
623 | my $label = $2;␊ |
624 | my $labelend = $3;␊ |
625 | my $labeltext = $4;␊ |
626 | # Found Horizontal Labeled Lists␊ |
627 | do_paragraph($self,$paragraph,$wrapped_mode);␊ |
628 | $paragraph=$labeltext."\n";␊ |
629 | $wrapped_mode = 1;␊ |
630 | $self->{bullet} = "";␊ |
631 | $self->{indent} = $indent;␊ |
632 | my $t = $self->translate($label,␊ |
633 | $self->{ref},␊ |
634 | "Labeled list",␊ |
635 | "comment" => join("\n", @comments),␊ |
636 | "wrap" => 0);␊ |
637 | $self->pushline("$indent$t$labelend");␊ |
638 | @comments=();␊ |
639 | } elsif (not defined $self->{verbatim} and␊ |
640 | ($line =~ m/^\:(\S.*?)(:\s*)(.*)$/)) {␊ |
641 | my $attrname = $1;␊ |
642 | my $attrsep = $2;␊ |
643 | my $attrvalue = $3;␊ |
644 | # Found a Attribute entry␊ |
645 | do_paragraph($self,$paragraph,$wrapped_mode);␊ |
646 | $paragraph="";␊ |
647 | $wrapped_mode = 1;␊ |
648 | undef $self->{bullet};␊ |
649 | undef $self->{indent};␊ |
650 | my $t = $self->translate($attrvalue,␊ |
651 | $self->{ref},␊ |
652 | "Attribute :$attrname:",␊ |
653 | "comment" => join("\n", @comments),␊ |
654 | "wrap" => 0);␊ |
655 | $self->pushline(":$attrname$attrsep$t\n");␊ |
656 | @comments=();␊ |
657 | } elsif (not defined $self->{verbatim} and␊ |
658 | ($line !~ m/^\.\./) and ($line =~ m/^\.(\S.*)$/)) {␊ |
659 | my $title = $1;␊ |
660 | # Found block title␊ |
661 | do_paragraph($self,$paragraph,$wrapped_mode);␊ |
662 | $paragraph="";␊ |
663 | $wrapped_mode = 1;␊ |
664 | undef $self->{bullet};␊ |
665 | undef $self->{indent};␊ |
666 | my $t = $self->translate($title,␊ |
667 | $self->{ref},␊ |
668 | "Block title",␊ |
669 | "comment" => join("\n", @comments),␊ |
670 | "wrap" => 0);␊ |
671 | $self->pushline(".$t\n");␊ |
672 | @comments=();␊ |
673 | } elsif (not defined $self->{verbatim} and␊ |
674 | ($line =~ m/^(\s*)((?:[-*o+]|(?:[0-9]+[.\)])|(?:[a-z][.\)])|\([0-9]+\)|\.|\.\.)\s+)(.*)$/)) {␊ |
675 | my $indent = $1||"";␊ |
676 | my $bullet = $2;␊ |
677 | my $text = $3;␊ |
678 | do_paragraph($self,$paragraph,$wrapped_mode);␊ |
679 | $paragraph = $text."\n";␊ |
680 | $self->{indent} = $indent;␊ |
681 | $self->{bullet} = $bullet;␊ |
682 | } elsif (not defined $self->{verbatim} and␊ |
683 | ($line =~ m/^((?:<?[0-9]+)?> +)(.*)$/)) {␊ |
684 | my $bullet = $1;␊ |
685 | my $text = $2;␊ |
686 | do_paragraph($self,$paragraph,$wrapped_mode);␊ |
687 | $paragraph = $text."\n";␊ |
688 | $self->{indent} = "";␊ |
689 | $self->{bullet} = $bullet;␊ |
690 | } elsif (not defined $self->{verbatim} and␊ |
691 | (defined $self->{bullet} and $line =~ m/^(\s+)(.*)$/)) {␊ |
692 | my $indent = $1;␊ |
693 | my $text = $2;␊ |
694 | if (not defined $self->{indent}) {␊ |
695 | $paragraph .= $text."\n";␊ |
696 | $self->{indent} = $indent;␊ |
697 | } elsif (length($paragraph) and (length($self->{bullet}) + length($self->{indent}) == length($indent))) {␊ |
698 | $paragraph .= $text."\n";␊ |
699 | } else {␊ |
700 | do_paragraph($self,$paragraph,$wrapped_mode);␊ |
701 | $paragraph = $text."\n";␊ |
702 | $self->{indent} = $indent;␊ |
703 | $self->{bullet} = "";␊ |
704 | }␊ |
705 | } else {␊ |
706 | return parse_fallback($self,$line,$ref,$paragraph,$wrapped_mode,$expect_header,$end_of_paragraph);␊ |
707 | }␊ |
708 | return ($paragraph,$wrapped_mode,$expect_header,$end_of_paragraph);␊ |
709 | }␊ |
710 | ␊ |
711 | sub parse_markdown {␊ |
712 | my ($self,$line,$ref,$paragraph,$wrapped_mode,$expect_header,$end_of_paragraph) = @_;␊ |
713 | if (($line =~ m/^(={4,}|-{4,})$/) and␊ |
714 | (defined($paragraph) ) and␊ |
715 | ($paragraph =~ m/^[^\n]*\n$/s) and␊ |
716 | (length($paragraph) == (length($line)+1))) {␊ |
717 | # XXX: There can be any number of underlining according␊ |
718 | # to the documentation. This detection, which avoid␊ |
719 | # translating the formatting, is only supported if␊ |
720 | # the underlining has the same size as the header text.␊ |
721 | # Found title␊ |
722 | $wrapped_mode = 0;␊ |
723 | my $level = $line;␊ |
724 | $level =~ s/^(.).*$/$1/;␊ |
725 | my $t = $self->translate($paragraph,␊ |
726 | $self->{ref},␊ |
727 | "Title $level",␊ |
728 | "wrap" => 0);␊ |
729 | $self->pushline($t);␊ |
730 | $paragraph="";␊ |
731 | $wrapped_mode = 1;␊ |
732 | $self->pushline(($level x (length($t)-1))."\n");␊ |
733 | } elsif ($line =~ m/^(#{1,6})( +)(.*?)( +\1)?$/) {␊ |
734 | my $titlelevel1 = $1;␊ |
735 | my $titlespaces = $2;␊ |
736 | my $title = $3;␊ |
737 | my $titlelevel2 = $4||"";␊ |
738 | # Found one line title␊ |
739 | do_paragraph($self,$paragraph,$wrapped_mode);␊ |
740 | $wrapped_mode = 0;␊ |
741 | $paragraph="";␊ |
742 | my $t = $self->translate($title,␊ |
743 | $self->{ref},␊ |
744 | "Title $titlelevel1",␊ |
745 | "wrap" => 0);␊ |
746 | $self->pushline($titlelevel1.$titlespaces.$t.$titlelevel2."\n");␊ |
747 | $wrapped_mode = 1;␊ |
748 | } elsif (($paragraph eq "") and␊ |
749 | ($line =~ /^((\*\s*){3,}|(-\s*){3,}|(_\s*){3,})$/)) {␊ |
750 | # Horizontal rule␊ |
751 | $wrapped_mode = 1;␊ |
752 | $self->pushline($line."\n");␊ |
753 | } elsif ( $line =~ /^\s*\[\[\!\S+\s*$/ # macro begin␊ |
754 | or $line =~ /^\s*"""\s*\]\]\s*$/) { # """ textblock inside macro end␊ |
755 | # Avoid translating Markdown lines containing only markup␊ |
756 | do_paragraph($self,$paragraph,$wrapped_mode);␊ |
757 | $paragraph="";␊ |
758 | $wrapped_mode = 1;␊ |
759 | $self->pushline("$line\n");␊ |
760 | } elsif ( $line =~ /^#/ # headline␊ |
761 | or $line =~ /^\s*\[\[\!\S[^\]]*\]\]\s*$/) { # sole macro␊ |
762 | # Preserve some Markdown markup as a single line␊ |
763 | do_paragraph($self,$paragraph,$wrapped_mode);␊ |
764 | $paragraph="$line\n";␊ |
765 | $wrapped_mode = 0;␊ |
766 | $end_of_paragraph = 1;␊ |
767 | } elsif ($line =~ /^"""/) { # """ textblock inside macro end␊ |
768 | # Markdown markup needing separation _before_ this line␊ |
769 | do_paragraph($self,$paragraph,$wrapped_mode);␊ |
770 | $paragraph="$line\n";␊ |
771 | $wrapped_mode = 1;␊ |
772 | } else {␊ |
773 | return parse_fallback($self,$line,$ref,$paragraph,$wrapped_mode,$expect_header,$end_of_paragraph);␊ |
774 | }␊ |
775 | return ($paragraph,$wrapped_mode,$expect_header,$end_of_paragraph);␊ |
776 | }␊ |
777 | ␊ |
778 | sub parse {␊ |
779 | my $self = shift;␊ |
780 | my ($line,$ref);␊ |
781 | my $paragraph="";␊ |
782 | my $wrapped_mode = 1;␊ |
783 | my $expect_header = 1;␊ |
784 | my $end_of_paragraph = 0;␊ |
785 | ($line,$ref)=$self->shiftline();␊ |
786 | my $file = $ref;␊ |
787 | $file =~ s/:[0-9]+$// if defined($line);␊ |
788 | while (defined($line)) {␊ |
789 | $ref =~ m/^(.*):[0-9]+$/;␊ |
790 | if ($1 ne $file) {␊ |
791 | $file = $1;␊ |
792 | do_paragraph($self,$paragraph,$wrapped_mode);␊ |
793 | $paragraph="";␊ |
794 | $wrapped_mode = 1;␊ |
795 | $expect_header = 1;␊ |
796 | }␊ |
797 | ␊ |
798 | chomp($line);␊ |
799 | $self->{ref}="$ref";␊ |
800 | ($paragraph,$wrapped_mode,$expect_header,$end_of_paragraph) = &$parse_func($self,$line,$ref,$paragraph,$wrapped_mode,$expect_header,$end_of_paragraph);␊ |
801 | # paragraphs starting by a bullet, or numbered␊ |
802 | # or paragraphs with a line containing many consecutive spaces␊ |
803 | # (more than 3)␊ |
804 | # are considered as verbatim paragraphs␊ |
805 | $wrapped_mode = 0 if ( $paragraph =~ m/^(\*|[0-9]+[.)] )/s␊ |
806 | or $paragraph =~ m/[ \t][ \t][ \t]/s);␊ |
807 | $wrapped_mode = 0 if ( $tabs eq "verbatim"␊ |
808 | and $paragraph =~ m/\t/s);␊ |
809 | if ($markdown) {␊ |
810 | # Some Markdown markup can (or might) not survive wrapping␊ |
811 | $wrapped_mode = 0 if (␊ |
812 | $paragraph =~ /^>/ms # blockquote␊ |
813 | or $paragraph =~ /^( {8}|\t)/ms # monospaced␊ |
814 | or $paragraph =~ /^\$(\S+[{}]\S*\s*)+/ms # Xapian macro␊ |
815 | or $paragraph =~ /<(?![a-z]+[:@])/ms # maybe html (tags but not wiki <URI>)␊ |
816 | or $paragraph =~ /^[^<]+>/ms # maybe html (tag with vertical space)␊ |
817 | or $paragraph =~ /\S $/ms # explicit newline␊ |
818 | or $paragraph =~ /\[\[\!\S[^\]]+$/ms # macro begin␊ |
819 | );␊ |
820 | }␊ |
821 | if ($end_of_paragraph) {␊ |
822 | do_paragraph($self,$paragraph,$wrapped_mode);␊ |
823 | $paragraph="";␊ |
824 | $wrapped_mode = 1;␊ |
825 | $end_of_paragraph = 0;␊ |
826 | }␊ |
827 | ($line,$ref)=$self->shiftline();␊ |
828 | }␊ |
829 | if (length $paragraph) {␊ |
830 | do_paragraph($self,$paragraph,$wrapped_mode);␊ |
831 | }␊ |
832 | }␊ |
833 | ␊ |
834 | sub do_paragraph {␊ |
835 | my ($self, $paragraph, $wrap) = (shift, shift, shift);␊ |
836 | my $type = shift || $self->{type} || "Plain text";␊ |
837 | return if ($paragraph eq "");␊ |
838 | ␊ |
839 | # DEBUG␊ |
840 | # my $b;␊ |
841 | # if (defined $self->{bullet}) {␊ |
842 | # $b = $self->{bullet};␊ |
843 | # } else {␊ |
844 | # $b = "UNDEF";␊ |
845 | # }␊ |
846 | # $type .= " verbatim: '".($self->{verbatim}||"NONE")."' bullet: '$b' indent: '".($self->{indent}||"NONE")."' type: '".($self->{type}||"NONE")."'";␊ |
847 | ␊ |
848 | if ($bullets and not $wrap and not defined $self->{verbatim}) {␊ |
849 | # Detect bullets␊ |
850 | # | * blah blah␊ |
851 | # |<spaces> blah␊ |
852 | # | ^-- aligned␊ |
853 | # <empty line>␊ |
854 | #␊ |
855 | # Other bullets supported:␊ |
856 | # - blah o blah + blah␊ |
857 | # 1. blah 1) blah (1) blah␊ |
858 | TEST_BULLET:␊ |
859 | if ($paragraph =~ m/^(\s*)((?:[-*o+]|([0-9]+[.\)])|\([0-9]+\))\s+)([^\n]*\n)(.*)$/s) {␊ |
860 | my $para = $5;␊ |
861 | my $bullet = $2;␊ |
862 | my $indent1 = $1;␊ |
863 | my $indent2 = "$1".(' ' x length $bullet);␊ |
864 | my $text = $4;␊ |
865 | while ($para !~ m/$indent2(?:[-*o+]|([0-9]+[.\)])|\([0-9]+\))\s+/␊ |
866 | and $para =~ s/^$indent2(\S[^\n]*\n)//s) {␊ |
867 | $text .= $1;␊ |
868 | }␊ |
869 | # TODO: detect if a line starts with the same bullet␊ |
870 | if ($text !~ m/\S[ \t][ \t][ \t]+\S/s) {␊ |
871 | my $bullet_regex = quotemeta($indent1.$bullet);␊ |
872 | $bullet_regex =~ s/[0-9]+/\\d\+/;␊ |
873 | if ($para eq '' or $para =~ m/^$bullet_regex\S/s) {␊ |
874 | my $trans = $self->translate($text,␊ |
875 | $self->{ref},␊ |
876 | "Bullet: '$indent1$bullet'",␊ |
877 | "wrap" => 1,␊ |
878 | "wrapcol" => - (length $indent2));␊ |
879 | $trans =~ s/^/$indent1$bullet/s;␊ |
880 | $trans =~ s/\n(.)/\n$indent2$1/sg;␊ |
881 | $self->pushline( $trans."\n" );␊ |
882 | if ($para eq '') {␊ |
883 | return;␊ |
884 | } else {␊ |
885 | # Another bullet␊ |
886 | $paragraph = $para;␊ |
887 | goto TEST_BULLET;␊ |
888 | }␊ |
889 | }␊ |
890 | }␊ |
891 | }␊ |
892 | }␊ |
893 | ␊ |
894 | my $end = "";␊ |
895 | if ($wrap) {␊ |
896 | $paragraph =~ s/^(.*?)(\n*)$/$1/s;␊ |
897 | $end = $2 || "";␊ |
898 | }␊ |
899 | my $t = $self->translate($paragraph,␊ |
900 | $self->{ref},␊ |
901 | $type,␊ |
902 | "comment" => join("\n", @comments),␊ |
903 | "wrap" => $wrap);␊ |
904 | @comments = ();␊ |
905 | if (defined $self->{bullet}) {␊ |
906 | my $bullet = $self->{bullet};␊ |
907 | my $indent1 = $self->{indent};␊ |
908 | my $indent2 = $indent1.(' ' x length($bullet));␊ |
909 | $t =~ s/^/$indent1$bullet/s;␊ |
910 | $t =~ s/\n(.)/\n$indent2$1/sg;␊ |
911 | }␊ |
912 | $self->pushline( $t.$end );␊ |
913 | }␊ |
914 | ␊ |
915 | 1;␊ |
916 | ␊ |
917 | =head1 STATUS OF THIS MODULE␊ |
918 | ␊ |
919 | Tested successfully on simple text files and NEWS.Debian files.␊ |
920 | ␊ |
921 | =head1 AUTHORS␊ |
922 | ␊ |
923 | Nicolas François <nicolas.francois@centraliens.net>␊ |
924 | ␊ |
925 | =head1 COPYRIGHT AND LICENSE␊ |
926 | ␊ |
927 | Copyright 2005-2008 by Nicolas FRANÇOIS <nicolas.francois@centraliens.net>.␊ |
928 | ␊ |
929 | This program is free software; you may redistribute it and/or modify it␊ |
930 | under the terms of GPL (see the COPYING file).␊ |
931 | |