Root/
Source at commit 2903 created 6 years 8 months ago. By ifabio, Comment out message info. | |
---|---|
1 | #!/usr/bin/perl -w␊ |
2 | ␊ |
3 | # Po4a::Text.pm␊ |
4 | #␊ |
5 | # extract and translate translatable strings from a text documents␊ |
6 | #␊ |
7 | # This program is free software; you can redistribute it and/or modify␊ |
8 | # it under the terms of the GNU General Public License as published by␊ |
9 | # the Free Software Foundation; either version 2 of the License, or␊ |
10 | # (at your option) any later version.␊ |
11 | #␊ |
12 | # This program is distributed in the hope that it will be useful,␊ |
13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of␊ |
14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the␊ |
15 | # GNU General Public License for more details.␊ |
16 | #␊ |
17 | # You should have received a copy of the GNU General Public License␊ |
18 | # along with this program; if not, write to the Free Software␊ |
19 | # Foundation, Inc.,␊ |
20 | # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA␊ |
21 | #␊ |
22 | ########################################################################␊ |
23 | ␊ |
24 | =encoding UTF-8␊ |
25 | ␊ |
26 | =head1 NAME␊ |
27 | ␊ |
28 | Locale::Po4a::Text - convert text documents from/to PO files␊ |
29 | ␊ |
30 | =head1 DESCRIPTION␊ |
31 | ␊ |
32 | The po4a (PO for anything) project goal is to ease translations (and more␊ |
33 | interestingly, the maintenance of translations) using gettext tools on␊ |
34 | areas where they were not expected like documentation.␊ |
35 | ␊ |
36 | Locale::Po4a::Text is a module to help the translation of text documents into␊ |
37 | other [human] languages.␊ |
38 | ␊ |
39 | Paragraphs are split on empty lines (or lines containing only spaces or␊ |
40 | tabulations).␊ |
41 | ␊ |
42 | If a paragraph contains a line starting by a space (or tabulation), this␊ |
43 | paragraph won't be rewrapped.␊ |
44 | ␊ |
45 | =cut␊ |
46 | ␊ |
47 | package Locale::Po4a::Text;␊ |
48 | ␊ |
49 | use 5.006;␊ |
50 | use strict;␊ |
51 | use warnings;␊ |
52 | ␊ |
53 | require Exporter;␊ |
54 | use vars qw(@ISA @EXPORT);␊ |
55 | @ISA = qw(Locale::Po4a::TransTractor);␊ |
56 | @EXPORT = qw();␊ |
57 | ␊ |
58 | use Locale::Po4a::TransTractor;␊ |
59 | use Locale::Po4a::Common;␊ |
60 | ␊ |
61 | =head1 OPTIONS ACCEPTED BY THIS MODULE␊ |
62 | ␊ |
63 | These are this module's particular options:␊ |
64 | ␊ |
65 | =over␊ |
66 | ␊ |
67 | =item B<nobullets>␊ |
68 | ␊ |
69 | Deactivate detection of bullets.␊ |
70 | ␊ |
71 | By default, when a bullet is detected, the bullet paragraph is not considered␊ |
72 | as a verbatim paragraph (with the no-wrap flag in the PO file), but the module␊ |
73 | rewraps this paragraph in the generated PO file and in the translation.␊ |
74 | ␊ |
75 | =cut␊ |
76 | ␊ |
77 | my $bullets = 1;␊ |
78 | ␊ |
79 | =item B<tabs=>I<mode>␊ |
80 | ␊ |
81 | Specify how tabulations shall be handled. The I<mode> can be any of:␊ |
82 | ␊ |
83 | =over␊ |
84 | ␊ |
85 | =item B<split>␊ |
86 | ␊ |
87 | Lines with tabulations introduce breaks in the current paragraph.␊ |
88 | ␊ |
89 | =item B<verbatim>␊ |
90 | ␊ |
91 | Paragraph containing tabulations will not be re-wrapped.␊ |
92 | ␊ |
93 | =back␊ |
94 | ␊ |
95 | By default, tabulations are considered as spaces.␊ |
96 | ␊ |
97 | =cut␊ |
98 | ␊ |
99 | my $tabs = "";␊ |
100 | ␊ |
101 | =item B<breaks=>I<regex>␊ |
102 | ␊ |
103 | A regular expression matching lines which introduce breaks.␊ |
104 | The regular expression will be anchored so that the whole line must match.␊ |
105 | ␊ |
106 | =cut␊ |
107 | ␊ |
108 | my $breaks;␊ |
109 | ␊ |
110 | =item B<debianchangelog>␊ |
111 | ␊ |
112 | Handle the header and footer of␊ |
113 | released versions, which only contain non translatable informations.␊ |
114 | ␊ |
115 | =cut␊ |
116 | ␊ |
117 | my $debianchangelog = 0;␊ |
118 | ␊ |
119 | =item B<fortunes>␊ |
120 | ␊ |
121 | Handle the fortunes format, which separate fortunes with a line which␊ |
122 | consists in '%' or '%%', and use '%%' as the beginning of a comment.␊ |
123 | ␊ |
124 | =cut␊ |
125 | ␊ |
126 | my $fortunes = 0;␊ |
127 | ␊ |
128 | =item B<markdown>␊ |
129 | ␊ |
130 | Handle some special markup in Markdown-formatted texts.␊ |
131 | ␊ |
132 | =cut␊ |
133 | ␊ |
134 | my $markdown = 0;␊ |
135 | ␊ |
136 | =item B<asciidoc>␊ |
137 | ␊ |
138 | Handle documents in the AsciiDoc format.␊ |
139 | ␊ |
140 | =cut␊ |
141 | ␊ |
142 | my $asciidoc = 0;␊ |
143 | ␊ |
144 | =item B<control>[B<=>I<taglist>]␊ |
145 | ␊ |
146 | Handle control files.␊ |
147 | A comma-separated list of tags to be translated can be provided.␊ |
148 | ␊ |
149 | =cut␊ |
150 | ␊ |
151 | my %control = ();␊ |
152 | ␊ |
153 | my $parse_func = \&parse_fallback;␊ |
154 | ␊ |
155 | my @comments = ();␊ |
156 | ␊ |
157 | =back␊ |
158 | ␊ |
159 | =cut␊ |
160 | ␊ |
161 | sub initialize {␊ |
162 | my $self = shift;␊ |
163 | my %options = @_;␊ |
164 | ␊ |
165 | $self->{options}{'control'} = "";␊ |
166 | $self->{options}{'asciidoc'} = 1;␊ |
167 | $self->{options}{'breaks'} = 1;␊ |
168 | $self->{options}{'debianchangelog'} = 1;␊ |
169 | $self->{options}{'debug'} = 1;␊ |
170 | $self->{options}{'fortunes'} = 1;␊ |
171 | $self->{options}{'markdown'} = 1;␊ |
172 | $self->{options}{'nobullets'} = 1;␊ |
173 | $self->{options}{'tabs'} = 1;␊ |
174 | $self->{options}{'verbose'} = 1;␊ |
175 | ␊ |
176 | foreach my $opt (keys %options) {␊ |
177 | die wrap_mod("po4a::text",␊ |
178 | dgettext("po4a", "Unknown option: %s"), $opt)␊ |
179 | unless exists $self->{options}{$opt};␊ |
180 | $self->{options}{$opt} = $options{$opt};␊ |
181 | }␊ |
182 | ␊ |
183 | if (defined $options{'nobullets'}) {␊ |
184 | $bullets = 0;␊ |
185 | }␊ |
186 | ␊ |
187 | if (defined $options{'tabs'}) {␊ |
188 | $tabs = $options{'tabs'};␊ |
189 | }␊ |
190 | ␊ |
191 | if (defined $options{'breaks'}) {␊ |
192 | $breaks = $options{'breaks'};␊ |
193 | }␊ |
194 | ␊ |
195 | if (defined $options{'debianchangelog'}) {␊ |
196 | $parse_func = \&parse_debianchangelog;␊ |
197 | }␊ |
198 | ␊ |
199 | if (defined $options{'fortunes'}) {␊ |
200 | $parse_func = \&parse_fortunes;␊ |
201 | }␊ |
202 | ␊ |
203 | if (defined $options{'markdown'}) {␊ |
204 | $parse_func = \&parse_markdown;␊ |
205 | $markdown=1;␊ |
206 | }␊ |
207 | ␊ |
208 | if (defined $options{'asciidoc'}) {␊ |
209 | $parse_func = \&parse_asciidoc;␊ |
210 | $asciidoc=1;␊ |
211 | warn wrap_mod("po4a::text",␊ |
212 | dgettext("po4a", "asciidoc option deprecated, use asciidoc format instead of text"));␊ |
213 | }␊ |
214 | ␊ |
215 | if (defined $options{'control'}) {␊ |
216 | $parse_func = \&parse_control;␊ |
217 | if ($options{'control'} eq "1") {␊ |
218 | $control{''}=1;␊ |
219 | } else {␊ |
220 | foreach my $tag (split(',',$options{'control'})) {␊ |
221 | $control{$tag}=1;␊ |
222 | }␊ |
223 | }␊ |
224 | }␊ |
225 | }␊ |
226 | ␊ |
227 | sub parse_fallback {␊ |
228 | my ($self,$line,$ref,$paragraph,$wrapped_mode,$expect_header,$end_of_paragraph) = @_;␊ |
229 | if ( ($line =~ /^\s*$/)␊ |
230 | or ( defined $breaks␊ |
231 | and $line =~ m/^$breaks$/)) {␊ |
232 | # Break paragraphs on lines containing only spaces␊ |
233 | do_paragraph($self,$paragraph,$wrapped_mode);␊ |
234 | $paragraph="";␊ |
235 | $wrapped_mode = 1 unless defined($self->{verbatim});␊ |
236 | $self->pushline($line."\n");␊ |
237 | undef $self->{controlkey};␊ |
238 | } elsif ($line =~ /^-- $/) {␊ |
239 | # Break paragraphs on email signature hint␊ |
240 | do_paragraph($self,$paragraph,$wrapped_mode);␊ |
241 | $paragraph="";␊ |
242 | $wrapped_mode = 1;␊ |
243 | $self->pushline($line."\n");␊ |
244 | } elsif ( $line =~ /^=+$/␊ |
245 | or $line =~ /^_+$/␊ |
246 | or $line =~ /^-+$/) {␊ |
247 | $wrapped_mode = 0;␊ |
248 | $paragraph .= $line."\n";␊ |
249 | do_paragraph($self,$paragraph,$wrapped_mode);␊ |
250 | $paragraph="";␊ |
251 | $wrapped_mode = 1;␊ |
252 | } elsif ($tabs eq "split" and $line =~ m/\t/ and $paragraph !~ m/\t/s) {␊ |
253 | $wrapped_mode = 0;␊ |
254 | do_paragraph($self,$paragraph,$wrapped_mode);␊ |
255 | $paragraph = "$line\n";␊ |
256 | $wrapped_mode = 0;␊ |
257 | } elsif ($tabs eq "split" and $line !~ m/\t/ and $paragraph =~ m/\t/s) {␊ |
258 | do_paragraph($self,$paragraph,$wrapped_mode);␊ |
259 | $paragraph = "$line\n";␊ |
260 | $wrapped_mode = 1;␊ |
261 | } else {␊ |
262 | if ($line =~ /^\s/) {␊ |
263 | # A line starting by a space indicates a non-wrap␊ |
264 | # paragraph␊ |
265 | $wrapped_mode = 0;␊ |
266 | }␊ |
267 | if ($markdown and␊ |
268 | ( $line =~ /\S $/ # explicit newline␊ |
269 | or $line =~ /"""$/)) { # """ textblock inside macro begin␊ |
270 | # Markdown markup needing separation _after_ this line␊ |
271 | $end_of_paragraph = 1;␊ |
272 | } else {␊ |
273 | undef $self->{bullet};␊ |
274 | undef $self->{indent};␊ |
275 | }␊ |
276 | # TODO: comments␊ |
277 | $paragraph .= $line."\n";␊ |
278 | }␊ |
279 | return ($paragraph,$wrapped_mode,$expect_header,$end_of_paragraph);␊ |
280 | }␊ |
281 | ␊ |
282 | sub parse_debianchangelog {␊ |
283 | my ($self,$line,$ref,$paragraph,$wrapped_mode,$expect_header,$end_of_paragraph) = @_;␊ |
284 | if ($expect_header and␊ |
285 | $line =~ /^(\w[-+0-9a-z.]*)\ \(([^\(\) \t]+)\) # src, version␊ |
286 | \s+([-+0-9a-z.]+); # distribution␊ |
287 | \s*urgency\s*\=\s*(.*\S)\s*$/ix) { #␊ |
288 | do_paragraph($self,$paragraph,$wrapped_mode);␊ |
289 | $paragraph="";␊ |
290 | $self->pushline("$line\n");␊ |
291 | $expect_header=0;␊ |
292 | } elsif ($line =~ m/^ \-\- (.*) <(.*)> ((\w+\,\s*)?\d{1,2}\s+\w+\s+\d{4}\s+\d{1,2}:\d\d:\d\d\s+[-+]\d{4}(\s+\([^\\\(\)]\))?)$/) {␊ |
293 | # Found trailer␊ |
294 | do_paragraph($self,$paragraph,$wrapped_mode);␊ |
295 | $paragraph="";␊ |
296 | $self->pushline("$line\n");␊ |
297 | $expect_header=1;␊ |
298 | } else {␊ |
299 | return parse_fallback($self,$line,$ref,$paragraph,$wrapped_mode,$expect_header,$end_of_paragraph);␊ |
300 | }␊ |
301 | return ($paragraph,$wrapped_mode,$expect_header,$end_of_paragraph);␊ |
302 | }␊ |
303 | ␊ |
304 | sub parse_fortunes {␊ |
305 | my ($self,$line,$ref,$paragraph,$wrapped_mode,$expect_header,$end_of_paragraph) = @_;␊ |
306 | if ($line =~ m/^%%?\s*$/) {␊ |
307 | # Found end of fortune␊ |
308 | do_paragraph($self,$paragraph,$wrapped_mode);␊ |
309 | $self->pushline("\n") unless ( $wrapped_mode == 0␊ |
310 | or $paragraph eq "");␊ |
311 | $paragraph="";␊ |
312 | $wrapped_mode = 1;␊ |
313 | $self->pushline("$line\n");␊ |
314 | } else {␊ |
315 | $line =~ s/%%(.*)$//;␊ |
316 | }␊ |
317 | return ($paragraph,$wrapped_mode,$expect_header,$end_of_paragraph);␊ |
318 | }␊ |
319 | ␊ |
320 | sub parse_control {␊ |
321 | my ($self,$line,$ref,$paragraph,$wrapped_mode,$expect_header,$end_of_paragraph) = @_;␊ |
322 | if ($line =~ m/^([^ :]*): *(.*)$/) {␊ |
323 | warn wrap_mod("po4a::text", dgettext("po4a", "Unrecognized section: %s"), $paragraph)␊ |
324 | unless $paragraph eq "";␊ |
325 | my $tag = $1;␊ |
326 | my $val = $2;␊ |
327 | my $t;␊ |
328 | if ($control{''} or $control{$tag}) {␊ |
329 | $t = $self->translate($val,␊ |
330 | $self->{ref},␊ |
331 | $tag.(defined $self->{controlkey}?", ".$self->{controlkey}:""),␊ |
332 | "wrap" => 0);␊ |
333 | } else {␊ |
334 | $t = $val;␊ |
335 | }␊ |
336 | if (not defined $self->{controlkey}) {␊ |
337 | $self->{controlkey} = "$tag: $val";␊ |
338 | }␊ |
339 | $self->pushline("$tag: $t\n");␊ |
340 | $paragraph="";␊ |
341 | $wrapped_mode = 1;␊ |
342 | $self->{bullet} = "";␊ |
343 | $self->{indent} = " ";␊ |
344 | } elsif ($line eq " .") {␊ |
345 | do_paragraph($self,$paragraph,$wrapped_mode,␊ |
346 | "Long Description".(defined $self->{controlkey}?", ".$self->{controlkey}:""));␊ |
347 | $paragraph="";␊ |
348 | $self->pushline($line."\n");␊ |
349 | $self->{bullet} = "";␊ |
350 | $self->{indent} = " ";␊ |
351 | } elsif ($line =~ m/^ Link: +(.*)$/) {␊ |
352 | do_paragraph($self,$paragraph,$wrapped_mode,␊ |
353 | "Long Description".(defined $self->{controlkey}?", ".$self->{controlkey}:""));␊ |
354 | my $link=$1;␊ |
355 | my $t1 = $self->translate("Link: ",␊ |
356 | $self->{ref},␊ |
357 | "Link",␊ |
358 | "wrap" => 0);␊ |
359 | my $t2 = $self->translate($link,␊ |
360 | $self->{ref},␊ |
361 | "Link".(defined $self->{controlkey}?", ".$self->{controlkey}:""),␊ |
362 | "wrap" => 0);␊ |
363 | $self->pushline(" $t1$t2\n");␊ |
364 | $paragraph="";␊ |
365 | } elsif (defined $self->{indent} and␊ |
366 | $line =~ m/^$self->{indent}\S/) {␊ |
367 | $paragraph .= $line."\n";␊ |
368 | $self->{type} = "Long Description".(defined $self->{controlkey}?", ".$self->{controlkey}:"");␊ |
369 | } else {␊ |
370 | return parse_fallback($self,$line,$ref,$paragraph,$wrapped_mode,$expect_header,$end_of_paragraph);␊ |
371 | }␊ |
372 | return ($paragraph,$wrapped_mode,$expect_header,$end_of_paragraph);␊ |
373 | }␊ |
374 | ␊ |
375 | my $asciidoc_RE_SECTION_TEMPLATES = "sect1|sect2|sect3|sect4|preface|colophon|dedication|synopsis|index";␊ |
376 | my $asciidoc_RE_STYLE_ADMONITION = "TIP|NOTE|IMPORTANT|WARNING|CAUTION";␊ |
377 | my $asciidoc_RE_STYLE_PARAGRAPH = "normal|literal|verse|quote|listing|abstract|partintro|comment|example|sidebar|source|music|latex|graphviz";␊ |
378 | my $asciidoc_RE_STYLE_NUMBERING = "arabic|loweralpha|upperalpha|lowerroman|upperroman";␊ |
379 | my $asciidoc_RE_STYLE_LIST = "appendix|horizontal|qanda|glossary|bibliography";␊ |
380 | my $asciidoc_RE_STYLES = "$asciidoc_RE_SECTION_TEMPLATES|$asciidoc_RE_STYLE_ADMONITION|$asciidoc_RE_STYLE_PARAGRAPH|$asciidoc_RE_STYLE_NUMBERING|$asciidoc_RE_STYLE_LIST|float";␊ |
381 | ␊ |
382 | BEGIN {␊ |
383 | my $UnicodeGCString_available = 0;␊ |
384 | $UnicodeGCString_available = 1 if (eval { require Unicode::GCString });␊ |
385 | eval {␊ |
386 | sub columns($$$) {␊ |
387 | my $text = shift;␊ |
388 | my $encoder = shift;␊ |
389 | $text = $encoder->decode($text) if (defined($encoder) && $encoder->name ne "ascii");␊ |
390 | if ($UnicodeGCString_available) {␊ |
391 | return Unicode::GCString->new($text)->columns();␊ |
392 | } else {␊ |
393 | $text =~ s/\n$//s;␊ |
394 | return length($text) if !(defined($encoder) && $encoder->name ne "ascii");␊ |
395 | die wrap_mod("po4a::text",␊ |
396 | dgettext("po4a", "Detection of two line titles failed at %s\nInstall the Unicode::GCString module!"), shift)␊ |
397 | }␊ |
398 | }␊ |
399 | };␊ |
400 | }␊ |
401 | ␊ |
402 | sub parse_asciidoc {␊ |
403 | my ($self,$line,$ref,$paragraph,$wrapped_mode,$expect_header,$end_of_paragraph) = @_;␊ |
404 | if ((defined $self->{verbatim}) and ($self->{verbatim} == 3)) {␊ |
405 | # Untranslated blocks␊ |
406 | $self->pushline($line."\n");␊ |
407 | if ($line =~ m/^~{4,}$/) {␊ |
408 | undef $self->{verbatim};␊ |
409 | undef $self->{type};␊ |
410 | $wrapped_mode = 1;␊ |
411 | }␊ |
412 | } elsif ((defined $self->{verbatim}) and ($self->{verbatim} == 2)) {␊ |
413 | # CommentBlock␊ |
414 | if ($line =~ m/^\/{4,}$/) {␊ |
415 | undef $self->{verbatim};␊ |
416 | undef $self->{type};␊ |
417 | $wrapped_mode = 1;␊ |
418 | } else {␊ |
419 | push @comments, $line;␊ |
420 | }␊ |
421 | } elsif ((not defined($self->{verbatim})) and ($line =~ m/^(\+|--)$/)) {␊ |
422 | # List Item Continuation or List Block␊ |
423 | do_paragraph($self,$paragraph,$wrapped_mode);␊ |
424 | $paragraph="";␊ |
425 | $self->pushline($line."\n");␊ |
426 | } elsif ((not defined($self->{verbatim})) and␊ |
427 | ($line =~ m/^(={2,}|-{2,}|~{2,}|\^{2,}|\+{2,})$/) and␊ |
428 | (defined($paragraph) )and␊ |
429 | ($paragraph =~ m/^[^\n]*\n$/s) and␊ |
430 | (columns($paragraph, $self->{TT}{po_in}{encoder}, $ref) == (length($line)))) {␊ |
431 | # Found title␊ |
432 | $wrapped_mode = 0;␊ |
433 | my $level = $line;␊ |
434 | $level =~ s/^(.).*$/$1/;␊ |
435 | $paragraph =~ s/\n$//s;␊ |
436 | my $t = $self->translate($paragraph,␊ |
437 | $self->{ref},␊ |
438 | "Title $level",␊ |
439 | "comment" => join("\n", @comments),␊ |
440 | "wrap" => 0);␊ |
441 | $self->pushline($t."\n");␊ |
442 | $paragraph="";␊ |
443 | @comments=();␊ |
444 | $wrapped_mode = 1;␊ |
445 | $self->pushline(($level x (columns($t, $self->{TT}{po_in}{encoder}, $ref)))."\n");␊ |
446 | } elsif ($line =~ m/^(={1,5})( +)(.*?)( +\1)?$/) {␊ |
447 | my $titlelevel1 = $1;␊ |
448 | my $titlespaces = $2;␊ |
449 | my $title = $3;␊ |
450 | my $titlelevel2 = $4||"";␊ |
451 | # Found one line title␊ |
452 | do_paragraph($self,$paragraph,$wrapped_mode);␊ |
453 | $wrapped_mode = 0;␊ |
454 | $paragraph="";␊ |
455 | my $t = $self->translate($title,␊ |
456 | $self->{ref},␊ |
457 | "Title $titlelevel1",␊ |
458 | "comment" => join("\n", @comments),␊ |
459 | "wrap" => 0);␊ |
460 | $self->pushline($titlelevel1.$titlespaces.$t.$titlelevel2."\n");␊ |
461 | @comments=();␊ |
462 | $wrapped_mode = 1;␊ |
463 | } elsif ($line =~ m/^(\/{4,}|\+{4,}|-{4,}|\.{4,}|\*{4,}|_{4,}|={4,}|~{4,}|\|={4,})$/) {␊ |
464 | # Found one delimited block␊ |
465 | my $t = $line;␊ |
466 | $t =~ s/^(.).*$/$1/;␊ |
467 | my $type = "delimited block $t";␊ |
468 | if (defined $self->{verbatim} and ($self->{type} ne $type)) {␊ |
469 | $paragraph .= "$line\n";␊ |
470 | } else {␊ |
471 | do_paragraph($self,$paragraph,$wrapped_mode);␊ |
472 | if ( (defined $self->{type})␊ |
473 | and ($self->{type} eq $type)) {␊ |
474 | undef $self->{type};␊ |
475 | undef $self->{verbatim};␊ |
476 | $wrapped_mode = 1;␊ |
477 | } else {␊ |
478 | if ($t eq "\/") {␊ |
479 | # CommentBlock, should not be treated␊ |
480 | $self->{verbatim} = 2;␊ |
481 | } elsif ($t eq "+") {␊ |
482 | # PassthroughBlock␊ |
483 | $wrapped_mode = 0;␊ |
484 | $self->{verbatim} = 1;␊ |
485 | } elsif ($t eq "-" or $t eq "|") {␊ |
486 | # ListingBlock␊ |
487 | $wrapped_mode = 0;␊ |
488 | $self->{verbatim} = 1;␊ |
489 | } elsif ($t eq ".") {␊ |
490 | # LiteralBlock␊ |
491 | $wrapped_mode = 0;␊ |
492 | $self->{verbatim} = 1;␊ |
493 | } elsif ($t eq "*") {␊ |
494 | # SidebarBlock␊ |
495 | $wrapped_mode = 1;␊ |
496 | } elsif ($t eq "_") {␊ |
497 | # QuoteBlock␊ |
498 | if ( (defined $self->{type})␊ |
499 | and ($self->{type} eq "verse")) {␊ |
500 | $wrapped_mode = 0;␊ |
501 | $self->{verbatim} = 1;␊ |
502 | } else {␊ |
503 | $wrapped_mode = 1;␊ |
504 | }␊ |
505 | } elsif ($t eq "=") {␊ |
506 | # ExampleBlock␊ |
507 | $wrapped_mode = 1;␊ |
508 | } elsif ($t eq "~") {␊ |
509 | # Filter blocks, TBC: not translated␊ |
510 | $wrapped_mode = 0;␊ |
511 | $self->{verbatim} = 3;␊ |
512 | }␊ |
513 | $self->{type} = $type;␊ |
514 | }␊ |
515 | $paragraph="";␊ |
516 | $self->pushline($line."\n") unless defined($self->{verbatim}) && $self->{verbatim} == 2;␊ |
517 | }␊ |
518 | } elsif ((not defined($self->{verbatim})) and ($line =~ m/^\/\/(.*)/)) {␊ |
519 | # Comment line␊ |
520 | push @comments, $1;␊ |
521 | } elsif (not defined $self->{verbatim} and␊ |
522 | ($line =~ m/^\[\[([^\]]*)\]\]$/)) {␊ |
523 | # Found BlockId␊ |
524 | do_paragraph($self,$paragraph,$wrapped_mode);␊ |
525 | $paragraph="";␊ |
526 | $wrapped_mode = 1;␊ |
527 | $self->pushline($line."\n");␊ |
528 | undef $self->{bullet};␊ |
529 | undef $self->{indent};␊ |
530 | } elsif (not defined $self->{verbatim} and␊ |
531 | ($paragraph eq "") and␊ |
532 | ($line =~ m/^((?:$asciidoc_RE_STYLE_ADMONITION):\s+)(.*)$/)) {␊ |
533 | my $type = $1;␊ |
534 | my $text = $2;␊ |
535 | do_paragraph($self,$paragraph,$wrapped_mode);␊ |
536 | $paragraph=$text."\n";␊ |
537 | $wrapped_mode = 1;␊ |
538 | $self->pushline($type);␊ |
539 | undef $self->{bullet};␊ |
540 | undef $self->{indent};␊ |
541 | } elsif (not defined $self->{verbatim} and␊ |
542 | ($line =~ m/^\[($asciidoc_RE_STYLES)\]$/)) {␊ |
543 | my $type = $1;␊ |
544 | do_paragraph($self,$paragraph,$wrapped_mode);␊ |
545 | $paragraph="";␊ |
546 | $wrapped_mode = 1;␊ |
547 | $self->pushline($line."\n");␊ |
548 | if ($type eq "verse") {␊ |
549 | $wrapped_mode = 0;␊ |
550 | }␊ |
551 | undef $self->{bullet};␊ |
552 | undef $self->{indent};␊ |
553 | } elsif (not defined $self->{verbatim} and␊ |
554 | ($line =~ m/^\[(['"]?)(verse|quote)\1, +(.*)\]$/)) {␊ |
555 | my $quote = $1 || '';␊ |
556 | my $type = $2;␊ |
557 | my $arg = $3;␊ |
558 | do_paragraph($self,$paragraph,$wrapped_mode);␊ |
559 | $paragraph="";␊ |
560 | my $t = $self->translate($arg,␊ |
561 | $self->{ref},␊ |
562 | "$type",␊ |
563 | "comment" => join("\n", @comments),␊ |
564 | "wrap" => 0);␊ |
565 | $self->pushline("[$quote$type$quote, $t]\n");␊ |
566 | @comments=();␊ |
567 | $wrapped_mode = 1;␊ |
568 | if ($type eq "verse") {␊ |
569 | $wrapped_mode = 0;␊ |
570 | }␊ |
571 | $self->{type} = $type;␊ |
572 | undef $self->{bullet};␊ |
573 | undef $self->{indent};␊ |
574 | } elsif (not defined $self->{verbatim} and␊ |
575 | ($line =~ m/^\[icon="(.*)"\]$/)) {␊ |
576 | my $arg = $1;␊ |
577 | do_paragraph($self,$paragraph,$wrapped_mode);␊ |
578 | $paragraph="";␊ |
579 | my $t = $self->translate($arg,␊ |
580 | $self->{ref},␊ |
581 | "icon",␊ |
582 | "comment" => join("\n", @comments),␊ |
583 | "wrap" => 0);␊ |
584 | $self->pushline("[icon=\"$t\"]\n");␊ |
585 | @comments=();␊ |
586 | $wrapped_mode = 1;␊ |
587 | undef $self->{bullet};␊ |
588 | undef $self->{indent};␊ |
589 | } elsif (not defined $self->{verbatim} and␊ |
590 | ($line =~ m/^\[icons=None, +caption="(.*)"\]$/)) {␊ |
591 | my $arg = $1;␊ |
592 | do_paragraph($self,$paragraph,$wrapped_mode);␊ |
593 | $paragraph="";␊ |
594 | my $t = $self->translate($arg,␊ |
595 | $self->{ref},␊ |
596 | "caption",␊ |
597 | "comment" => join("\n", @comments),␊ |
598 | "wrap" => 0);␊ |
599 | $self->pushline("[icons=None, caption=\"$t\"]\n");␊ |
600 | @comments=();␊ |
601 | $wrapped_mode = 1;␊ |
602 | undef $self->{bullet};␊ |
603 | undef $self->{indent};␊ |
604 | } elsif (not defined $self->{verbatim} and␊ |
605 | ($line =~ m/^(\s*)([*_+`'#[:alnum:]].*)((?:::|;;|\?\?|:-)(?: *\\)?)$/)) {␊ |
606 | my $indent = $1;␊ |
607 | my $label = $2;␊ |
608 | my $labelend = $3;␊ |
609 | # Found labeled list␊ |
610 | do_paragraph($self,$paragraph,$wrapped_mode);␊ |
611 | $paragraph="";␊ |
612 | $wrapped_mode = 1;␊ |
613 | $self->{bullet} = "";␊ |
614 | $self->{indent} = $indent;␊ |
615 | my $t = $self->translate($label,␊ |
616 | $self->{ref},␊ |
617 | "Labeled list",␊ |
618 | "comment" => join("\n", @comments),␊ |
619 | "wrap" => 0);␊ |
620 | $self->pushline("$indent$t$labelend\n");␊ |
621 | @comments=();␊ |
622 | } elsif (not defined $self->{verbatim} and␊ |
623 | ($line =~ m/^(\s*)(\S.*)((?:::|;;)\s+)(.*)$/)) {␊ |
624 | my $indent = $1;␊ |
625 | my $label = $2;␊ |
626 | my $labelend = $3;␊ |
627 | my $labeltext = $4;␊ |
628 | # Found Horizontal Labeled Lists␊ |
629 | do_paragraph($self,$paragraph,$wrapped_mode);␊ |
630 | $paragraph=$labeltext."\n";␊ |
631 | $wrapped_mode = 1;␊ |
632 | $self->{bullet} = "";␊ |
633 | $self->{indent} = $indent;␊ |
634 | my $t = $self->translate($label,␊ |
635 | $self->{ref},␊ |
636 | "Labeled list",␊ |
637 | "comment" => join("\n", @comments),␊ |
638 | "wrap" => 0);␊ |
639 | $self->pushline("$indent$t$labelend");␊ |
640 | @comments=();␊ |
641 | } elsif (not defined $self->{verbatim} and␊ |
642 | ($line =~ m/^\:(\S.*?)(:\s*)(.*)$/)) {␊ |
643 | my $attrname = $1;␊ |
644 | my $attrsep = $2;␊ |
645 | my $attrvalue = $3;␊ |
646 | # Found a Attribute entry␊ |
647 | do_paragraph($self,$paragraph,$wrapped_mode);␊ |
648 | $paragraph="";␊ |
649 | $wrapped_mode = 1;␊ |
650 | undef $self->{bullet};␊ |
651 | undef $self->{indent};␊ |
652 | my $t = $self->translate($attrvalue,␊ |
653 | $self->{ref},␊ |
654 | "Attribute :$attrname:",␊ |
655 | "comment" => join("\n", @comments),␊ |
656 | "wrap" => 0);␊ |
657 | $self->pushline(":$attrname$attrsep$t\n");␊ |
658 | @comments=();␊ |
659 | } elsif (not defined $self->{verbatim} and␊ |
660 | ($line !~ m/^\.\./) and ($line =~ m/^\.(\S.*)$/)) {␊ |
661 | my $title = $1;␊ |
662 | # Found block title␊ |
663 | do_paragraph($self,$paragraph,$wrapped_mode);␊ |
664 | $paragraph="";␊ |
665 | $wrapped_mode = 1;␊ |
666 | undef $self->{bullet};␊ |
667 | undef $self->{indent};␊ |
668 | my $t = $self->translate($title,␊ |
669 | $self->{ref},␊ |
670 | "Block title",␊ |
671 | "comment" => join("\n", @comments),␊ |
672 | "wrap" => 0);␊ |
673 | $self->pushline(".$t\n");␊ |
674 | @comments=();␊ |
675 | } elsif (not defined $self->{verbatim} and␊ |
676 | ($line =~ m/^(\s*)((?:[-*o+]|(?:[0-9]+[.\)])|(?:[a-z][.\)])|\([0-9]+\)|\.|\.\.)\s+)(.*)$/)) {␊ |
677 | my $indent = $1||"";␊ |
678 | my $bullet = $2;␊ |
679 | my $text = $3;␊ |
680 | do_paragraph($self,$paragraph,$wrapped_mode);␊ |
681 | $paragraph = $text."\n";␊ |
682 | $self->{indent} = $indent;␊ |
683 | $self->{bullet} = $bullet;␊ |
684 | } elsif (not defined $self->{verbatim} and␊ |
685 | ($line =~ m/^((?:<?[0-9]+)?> +)(.*)$/)) {␊ |
686 | my $bullet = $1;␊ |
687 | my $text = $2;␊ |
688 | do_paragraph($self,$paragraph,$wrapped_mode);␊ |
689 | $paragraph = $text."\n";␊ |
690 | $self->{indent} = "";␊ |
691 | $self->{bullet} = $bullet;␊ |
692 | } elsif (not defined $self->{verbatim} and␊ |
693 | (defined $self->{bullet} and $line =~ m/^(\s+)(.*)$/)) {␊ |
694 | my $indent = $1;␊ |
695 | my $text = $2;␊ |
696 | if (not defined $self->{indent}) {␊ |
697 | $paragraph .= $text."\n";␊ |
698 | $self->{indent} = $indent;␊ |
699 | } elsif (length($paragraph) and (length($self->{bullet}) + length($self->{indent}) == length($indent))) {␊ |
700 | $paragraph .= $text."\n";␊ |
701 | } else {␊ |
702 | do_paragraph($self,$paragraph,$wrapped_mode);␊ |
703 | $paragraph = $text."\n";␊ |
704 | $self->{indent} = $indent;␊ |
705 | $self->{bullet} = "";␊ |
706 | }␊ |
707 | } else {␊ |
708 | return parse_fallback($self,$line,$ref,$paragraph,$wrapped_mode,$expect_header,$end_of_paragraph);␊ |
709 | }␊ |
710 | return ($paragraph,$wrapped_mode,$expect_header,$end_of_paragraph);␊ |
711 | }␊ |
712 | ␊ |
713 | sub parse_markdown {␊ |
714 | my ($self,$line,$ref,$paragraph,$wrapped_mode,$expect_header,$end_of_paragraph) = @_;␊ |
715 | if (($line =~ m/^(={4,}|-{4,})$/) and␊ |
716 | (defined($paragraph) ) and␊ |
717 | ($paragraph =~ m/^[^\n]*\n$/s) and␊ |
718 | (length($paragraph) == (length($line)+1))) {␊ |
719 | # XXX: There can be any number of underlining according␊ |
720 | # to the documentation. This detection, which avoid␊ |
721 | # translating the formatting, is only supported if␊ |
722 | # the underlining has the same size as the header text.␊ |
723 | # Found title␊ |
724 | $wrapped_mode = 0;␊ |
725 | my $level = $line;␊ |
726 | $level =~ s/^(.).*$/$1/;␊ |
727 | my $t = $self->translate($paragraph,␊ |
728 | $self->{ref},␊ |
729 | "Title $level",␊ |
730 | "wrap" => 0);␊ |
731 | $self->pushline($t);␊ |
732 | $paragraph="";␊ |
733 | $wrapped_mode = 1;␊ |
734 | $self->pushline(($level x (length($t)-1))."\n");␊ |
735 | } elsif ($line =~ m/^(#{1,6})( +)(.*?)( +\1)?$/) {␊ |
736 | my $titlelevel1 = $1;␊ |
737 | my $titlespaces = $2;␊ |
738 | my $title = $3;␊ |
739 | my $titlelevel2 = $4||"";␊ |
740 | # Found one line title␊ |
741 | do_paragraph($self,$paragraph,$wrapped_mode);␊ |
742 | $wrapped_mode = 0;␊ |
743 | $paragraph="";␊ |
744 | my $t = $self->translate($title,␊ |
745 | $self->{ref},␊ |
746 | "Title $titlelevel1",␊ |
747 | "wrap" => 0);␊ |
748 | $self->pushline($titlelevel1.$titlespaces.$t.$titlelevel2."\n");␊ |
749 | $wrapped_mode = 1;␊ |
750 | } elsif (($paragraph eq "") and␊ |
751 | ($line =~ /^((\*\s*){3,}|(-\s*){3,}|(_\s*){3,})$/)) {␊ |
752 | # Horizontal rule␊ |
753 | $wrapped_mode = 1;␊ |
754 | $self->pushline($line."\n");␊ |
755 | } elsif ( $line =~ /^\s*\[\[\!\S+\s*$/ # macro begin␊ |
756 | or $line =~ /^\s*"""\s*\]\]\s*$/) { # """ textblock inside macro end␊ |
757 | # Avoid translating Markdown lines containing only markup␊ |
758 | do_paragraph($self,$paragraph,$wrapped_mode);␊ |
759 | $paragraph="";␊ |
760 | $wrapped_mode = 1;␊ |
761 | $self->pushline("$line\n");␊ |
762 | } elsif ( $line =~ /^#/ # headline␊ |
763 | or $line =~ /^\s*\[\[\!\S[^\]]*\]\]\s*$/) { # sole macro␊ |
764 | # Preserve some Markdown markup as a single line␊ |
765 | do_paragraph($self,$paragraph,$wrapped_mode);␊ |
766 | $paragraph="$line\n";␊ |
767 | $wrapped_mode = 0;␊ |
768 | $end_of_paragraph = 1;␊ |
769 | } elsif ($line =~ /^"""/) { # """ textblock inside macro end␊ |
770 | # Markdown markup needing separation _before_ this line␊ |
771 | do_paragraph($self,$paragraph,$wrapped_mode);␊ |
772 | $paragraph="$line\n";␊ |
773 | $wrapped_mode = 1;␊ |
774 | } else {␊ |
775 | return parse_fallback($self,$line,$ref,$paragraph,$wrapped_mode,$expect_header,$end_of_paragraph);␊ |
776 | }␊ |
777 | return ($paragraph,$wrapped_mode,$expect_header,$end_of_paragraph);␊ |
778 | }␊ |
779 | ␊ |
780 | sub parse {␊ |
781 | my $self = shift;␊ |
782 | my ($line,$ref);␊ |
783 | my $paragraph="";␊ |
784 | my $wrapped_mode = 1;␊ |
785 | my $expect_header = 1;␊ |
786 | my $end_of_paragraph = 0;␊ |
787 | ($line,$ref)=$self->shiftline();␊ |
788 | my $file = $ref;␊ |
789 | $file =~ s/:[0-9]+$// if defined($line);␊ |
790 | while (defined($line)) {␊ |
791 | $ref =~ m/^(.*):[0-9]+$/;␊ |
792 | if ($1 ne $file) {␊ |
793 | $file = $1;␊ |
794 | do_paragraph($self,$paragraph,$wrapped_mode);␊ |
795 | $paragraph="";␊ |
796 | $wrapped_mode = 1;␊ |
797 | $expect_header = 1;␊ |
798 | }␊ |
799 | ␊ |
800 | chomp($line);␊ |
801 | $self->{ref}="$ref";␊ |
802 | ($paragraph,$wrapped_mode,$expect_header,$end_of_paragraph) = &$parse_func($self,$line,$ref,$paragraph,$wrapped_mode,$expect_header,$end_of_paragraph);␊ |
803 | # paragraphs starting by a bullet, or numbered␊ |
804 | # or paragraphs with a line containing many consecutive spaces␊ |
805 | # (more than 3)␊ |
806 | # are considered as verbatim paragraphs␊ |
807 | $wrapped_mode = 0 if ( $paragraph =~ m/^(\*|[0-9]+[.)] )/s␊ |
808 | or $paragraph =~ m/[ \t][ \t][ \t]/s);␊ |
809 | $wrapped_mode = 0 if ( $tabs eq "verbatim"␊ |
810 | and $paragraph =~ m/\t/s);␊ |
811 | if ($markdown) {␊ |
812 | # Some Markdown markup can (or might) not survive wrapping␊ |
813 | $wrapped_mode = 0 if (␊ |
814 | $paragraph =~ /^>/ms # blockquote␊ |
815 | or $paragraph =~ /^( {8}|\t)/ms # monospaced␊ |
816 | or $paragraph =~ /^\$(\S+[{}]\S*\s*)+/ms # Xapian macro␊ |
817 | or $paragraph =~ /<(?![a-z]+[:@])/ms # maybe html (tags but not wiki <URI>)␊ |
818 | or $paragraph =~ /^[^<]+>/ms # maybe html (tag with vertical space)␊ |
819 | or $paragraph =~ /\S $/ms # explicit newline␊ |
820 | or $paragraph =~ /\[\[\!\S[^\]]+$/ms # macro begin␊ |
821 | );␊ |
822 | }␊ |
823 | if ($end_of_paragraph) {␊ |
824 | do_paragraph($self,$paragraph,$wrapped_mode);␊ |
825 | $paragraph="";␊ |
826 | $wrapped_mode = 1;␊ |
827 | $end_of_paragraph = 0;␊ |
828 | }␊ |
829 | ($line,$ref)=$self->shiftline();␊ |
830 | }␊ |
831 | if (length $paragraph) {␊ |
832 | do_paragraph($self,$paragraph,$wrapped_mode);␊ |
833 | }␊ |
834 | }␊ |
835 | ␊ |
836 | sub do_paragraph {␊ |
837 | my ($self, $paragraph, $wrap) = (shift, shift, shift);␊ |
838 | my $type = shift || $self->{type} || "Plain text";␊ |
839 | return if ($paragraph eq "");␊ |
840 | ␊ |
841 | # DEBUG␊ |
842 | # my $b;␊ |
843 | # if (defined $self->{bullet}) {␊ |
844 | # $b = $self->{bullet};␊ |
845 | # } else {␊ |
846 | # $b = "UNDEF";␊ |
847 | # }␊ |
848 | # $type .= " verbatim: '".($self->{verbatim}||"NONE")."' bullet: '$b' indent: '".($self->{indent}||"NONE")."' type: '".($self->{type}||"NONE")."'";␊ |
849 | ␊ |
850 | if ($bullets and not $wrap and not defined $self->{verbatim}) {␊ |
851 | # Detect bullets␊ |
852 | # | * blah blah␊ |
853 | # |<spaces> blah␊ |
854 | # | ^-- aligned␊ |
855 | # <empty line>␊ |
856 | #␊ |
857 | # Other bullets supported:␊ |
858 | # - blah o blah + blah␊ |
859 | # 1. blah 1) blah (1) blah␊ |
860 | TEST_BULLET:␊ |
861 | if ($paragraph =~ m/^(\s*)((?:[-*o+]|([0-9]+[.\)])|\([0-9]+\))\s+)([^\n]*\n)(.*)$/s) {␊ |
862 | my $para = $5;␊ |
863 | my $bullet = $2;␊ |
864 | my $indent1 = $1;␊ |
865 | my $indent2 = "$1".(' ' x length $bullet);␊ |
866 | my $text = $4;␊ |
867 | while ($para !~ m/$indent2(?:[-*o+]|([0-9]+[.\)])|\([0-9]+\))\s+/␊ |
868 | and $para =~ s/^$indent2(\S[^\n]*\n)//s) {␊ |
869 | $text .= $1;␊ |
870 | }␊ |
871 | # TODO: detect if a line starts with the same bullet␊ |
872 | if ($text !~ m/\S[ \t][ \t][ \t]+\S/s) {␊ |
873 | my $bullet_regex = quotemeta($indent1.$bullet);␊ |
874 | $bullet_regex =~ s/[0-9]+/\\d\+/;␊ |
875 | if ($para eq '' or $para =~ m/^$bullet_regex\S/s) {␊ |
876 | my $trans = $self->translate($text,␊ |
877 | $self->{ref},␊ |
878 | "Bullet: '$indent1$bullet'",␊ |
879 | "wrap" => 1,␊ |
880 | "wrapcol" => - (length $indent2));␊ |
881 | $trans =~ s/^/$indent1$bullet/s;␊ |
882 | $trans =~ s/\n(.)/\n$indent2$1/sg;␊ |
883 | $self->pushline( $trans."\n" );␊ |
884 | if ($para eq '') {␊ |
885 | return;␊ |
886 | } else {␊ |
887 | # Another bullet␊ |
888 | $paragraph = $para;␊ |
889 | goto TEST_BULLET;␊ |
890 | }␊ |
891 | }␊ |
892 | }␊ |
893 | }␊ |
894 | }␊ |
895 | ␊ |
896 | my $end = "";␊ |
897 | if ($wrap) {␊ |
898 | $paragraph =~ s/^(.*?)(\n*)$/$1/s;␊ |
899 | $end = $2 || "";␊ |
900 | }␊ |
901 | my $t = $self->translate($paragraph,␊ |
902 | $self->{ref},␊ |
903 | $type,␊ |
904 | "comment" => join("\n", @comments),␊ |
905 | "wrap" => $wrap);␊ |
906 | @comments = ();␊ |
907 | if (defined $self->{bullet}) {␊ |
908 | my $bullet = $self->{bullet};␊ |
909 | my $indent1 = $self->{indent};␊ |
910 | my $indent2 = $indent1.(' ' x length($bullet));␊ |
911 | $t =~ s/^/$indent1$bullet/s;␊ |
912 | $t =~ s/\n(.)/\n$indent2$1/sg;␊ |
913 | }␊ |
914 | $self->pushline( $t.$end );␊ |
915 | }␊ |
916 | ␊ |
917 | 1;␊ |
918 | ␊ |
919 | =head1 STATUS OF THIS MODULE␊ |
920 | ␊ |
921 | Tested successfully on simple text files and NEWS.Debian files.␊ |
922 | ␊ |
923 | =head1 AUTHORS␊ |
924 | ␊ |
925 | Nicolas François <nicolas.francois@centraliens.net>␊ |
926 | ␊ |
927 | =head1 COPYRIGHT AND LICENSE␊ |
928 | ␊ |
929 | Copyright 2005-2008 by Nicolas FRANÇOIS <nicolas.francois@centraliens.net>.␊ |
930 | ␊ |
931 | This program is free software; you may redistribute it and/or modify it␊ |
932 | under the terms of GPL (see the COPYING file).␊ |
933 |