Chameleon

Chameleon Svn Source Tree

Root/trunk/package/bin/po4a/lib/Locale/Po4a/Text.pm

1#!/usr/bin/perl -w
2
3# Po4a::Text.pm
4#
5# extract and translate translatable strings from a text documents
6#
7# This program is free software; you can redistribute it and/or modify
8# it under the terms of the GNU General Public License as published by
9# the Free Software Foundation; either version 2 of the License, or
10# (at your option) any later version.
11#
12# This program is distributed in the hope that it will be useful,
13# but WITHOUT ANY WARRANTY; without even the implied warranty of
14# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15# GNU General Public License for more details.
16#
17# You should have received a copy of the GNU General Public License
18# along with this program; if not, write to the Free Software
19# Foundation, Inc.,
20# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21#
22########################################################################
23
24=encoding UTF-8
25
26=head1 NAME
27
28Locale::Po4a::Text - convert text documents from/to PO files
29
30=head1 DESCRIPTION
31
32The po4a (PO for anything) project goal is to ease translations (and more
33interestingly, the maintenance of translations) using gettext tools on
34areas where they were not expected like documentation.
35
36Locale::Po4a::Text is a module to help the translation of text documents into
37other [human] languages.
38
39Paragraphs are split on empty lines (or lines containing only spaces or
40tabulations).
41
42If a paragraph contains a line starting by a space (or tabulation), this
43paragraph won't be rewrapped.
44
45=cut
46
47package Locale::Po4a::Text;
48
49use 5.006;
50use strict;
51use warnings;
52
53require Exporter;
54use vars qw(@ISA @EXPORT);
55@ISA = qw(Locale::Po4a::TransTractor);
56@EXPORT = qw();
57
58use Locale::Po4a::TransTractor;
59use Locale::Po4a::Common;
60
61=head1 OPTIONS ACCEPTED BY THIS MODULE
62
63These are this module's particular options:
64
65=over
66
67=item B<nobullets>
68
69Deactivate detection of bullets.
70
71By default, when a bullet is detected, the bullet paragraph is not considered
72as a verbatim paragraph (with the no-wrap flag in the PO file), but the module
73rewraps this paragraph in the generated PO file and in the translation.
74
75=cut
76
77my $bullets = 1;
78
79=item B<tabs=>I<mode>
80
81Specify how tabulations shall be handled. The I<mode> can be any of:
82
83=over
84
85=item B<split>
86
87Lines with tabulations introduce breaks in the current paragraph.
88
89=item B<verbatim>
90
91Paragraph containing tabulations will not be re-wrapped.
92
93=back
94
95By default, tabulations are considered as spaces.
96
97=cut
98
99my $tabs = "";
100
101=item B<breaks=>I<regex>
102
103A regular expression matching lines which introduce breaks.
104The regular expression will be anchored so that the whole line must match.
105
106=cut
107
108my $breaks;
109
110=item B<debianchangelog>
111
112Handle the header and footer of
113released versions, which only contain non translatable informations.
114
115=cut
116
117my $debianchangelog = 0;
118
119=item B<fortunes>
120
121Handle the fortunes format, which separate fortunes with a line which
122consists in '%' or '%%', and use '%%' as the beginning of a comment.
123
124=cut
125
126my $fortunes = 0;
127
128=item B<markdown>
129
130Handle some special markup in Markdown-formatted texts.
131
132=cut
133
134my $markdown = 0;
135
136=item B<asciidoc>
137
138Handle documents in the AsciiDoc format.
139
140=cut
141
142my $asciidoc = 0;
143
144=item B<control>[B<=>I<taglist>]
145
146Handle control files.
147A comma-separated list of tags to be translated can be provided.
148
149=cut
150
151my %control = ();
152
153my $parse_func = \&parse_fallback;
154
155my @comments = ();
156
157=back
158
159=cut
160
161sub initialize {
162 my $self = shift;
163 my %options = @_;
164
165 $self->{options}{'control'} = "";
166 $self->{options}{'asciidoc'} = 1;
167 $self->{options}{'breaks'} = 1;
168 $self->{options}{'debianchangelog'} = 1;
169 $self->{options}{'debug'} = 1;
170 $self->{options}{'fortunes'} = 1;
171 $self->{options}{'markdown'} = 1;
172 $self->{options}{'nobullets'} = 1;
173 $self->{options}{'tabs'} = 1;
174 $self->{options}{'verbose'} = 1;
175
176 foreach my $opt (keys %options) {
177 die wrap_mod("po4a::text",
178 dgettext("po4a", "Unknown option: %s"), $opt)
179 unless exists $self->{options}{$opt};
180 $self->{options}{$opt} = $options{$opt};
181 }
182
183 if (defined $options{'nobullets'}) {
184 $bullets = 0;
185 }
186
187 if (defined $options{'tabs'}) {
188 $tabs = $options{'tabs'};
189 }
190
191 if (defined $options{'breaks'}) {
192 $breaks = $options{'breaks'};
193 }
194
195 if (defined $options{'debianchangelog'}) {
196 $parse_func = \&parse_debianchangelog;
197 }
198
199 if (defined $options{'fortunes'}) {
200 $parse_func = \&parse_fortunes;
201 }
202
203 if (defined $options{'markdown'}) {
204 $parse_func = \&parse_markdown;
205 $markdown=1;
206 }
207
208 if (defined $options{'asciidoc'}) {
209 $parse_func = \&parse_asciidoc;
210 $asciidoc=1;
211 warn wrap_mod("po4a::text",
212 dgettext("po4a", "asciidoc option deprecated, use asciidoc format instead of text"));
213 }
214
215 if (defined $options{'control'}) {
216 $parse_func = \&parse_control;
217 if ($options{'control'} eq "1") {
218 $control{''}=1;
219 } else {
220 foreach my $tag (split(',',$options{'control'})) {
221 $control{$tag}=1;
222 }
223 }
224 }
225}
226
227sub parse_fallback {
228 my ($self,$line,$ref,$paragraph,$wrapped_mode,$expect_header,$end_of_paragraph) = @_;
229 if ( ($line =~ /^\s*$/)
230 or ( defined $breaks
231 and $line =~ m/^$breaks$/)) {
232 # Break paragraphs on lines containing only spaces
233 do_paragraph($self,$paragraph,$wrapped_mode);
234 $paragraph="";
235 $wrapped_mode = 1 unless defined($self->{verbatim});
236 $self->pushline($line."\n");
237 undef $self->{controlkey};
238 } elsif ($line =~ /^-- $/) {
239 # Break paragraphs on email signature hint
240 do_paragraph($self,$paragraph,$wrapped_mode);
241 $paragraph="";
242 $wrapped_mode = 1;
243 $self->pushline($line."\n");
244 } elsif ( $line =~ /^=+$/
245 or $line =~ /^_+$/
246 or $line =~ /^-+$/) {
247 $wrapped_mode = 0;
248 $paragraph .= $line."\n";
249 do_paragraph($self,$paragraph,$wrapped_mode);
250 $paragraph="";
251 $wrapped_mode = 1;
252 } elsif ($tabs eq "split" and $line =~ m/\t/ and $paragraph !~ m/\t/s) {
253 $wrapped_mode = 0;
254 do_paragraph($self,$paragraph,$wrapped_mode);
255 $paragraph = "$line\n";
256 $wrapped_mode = 0;
257 } elsif ($tabs eq "split" and $line !~ m/\t/ and $paragraph =~ m/\t/s) {
258 do_paragraph($self,$paragraph,$wrapped_mode);
259 $paragraph = "$line\n";
260 $wrapped_mode = 1;
261 } else {
262 if ($line =~ /^\s/) {
263 # A line starting by a space indicates a non-wrap
264 # paragraph
265 $wrapped_mode = 0;
266 }
267 if ($markdown and
268 ( $line =~ /\S $/ # explicit newline
269 or $line =~ /"""$/)) { # """ textblock inside macro begin
270 # Markdown markup needing separation _after_ this line
271 $end_of_paragraph = 1;
272 } else {
273 undef $self->{bullet};
274 undef $self->{indent};
275 }
276# TODO: comments
277 $paragraph .= $line."\n";
278 }
279 return ($paragraph,$wrapped_mode,$expect_header,$end_of_paragraph);
280}
281
282sub parse_debianchangelog {
283 my ($self,$line,$ref,$paragraph,$wrapped_mode,$expect_header,$end_of_paragraph) = @_;
284 if ($expect_header and
285 $line =~ /^(\w[-+0-9a-z.]*)\ \(([^\(\) \t]+)\) # src, version
286 \s+([-+0-9a-z.]+); # distribution
287 \s*urgency\s*\=\s*(.*\S)\s*$/ix) { #
288 do_paragraph($self,$paragraph,$wrapped_mode);
289 $paragraph="";
290 $self->pushline("$line\n");
291 $expect_header=0;
292 } elsif ($line =~ m/^ \-\- (.*) <(.*)> ((\w+\,\s*)?\d{1,2}\s+\w+\s+\d{4}\s+\d{1,2}:\d\d:\d\d\s+[-+]\d{4}(\s+\([^\\\(\)]\))?)$/) {
293 # Found trailer
294 do_paragraph($self,$paragraph,$wrapped_mode);
295 $paragraph="";
296 $self->pushline("$line\n");
297 $expect_header=1;
298 } else {
299 return parse_fallback($self,$line,$ref,$paragraph,$wrapped_mode,$expect_header,$end_of_paragraph);
300 }
301 return ($paragraph,$wrapped_mode,$expect_header,$end_of_paragraph);
302}
303
304sub parse_fortunes {
305 my ($self,$line,$ref,$paragraph,$wrapped_mode,$expect_header,$end_of_paragraph) = @_;
306 if ($line =~ m/^%%?\s*$/) {
307 # Found end of fortune
308 do_paragraph($self,$paragraph,$wrapped_mode);
309 $self->pushline("\n") unless ( $wrapped_mode == 0
310 or $paragraph eq "");
311 $paragraph="";
312 $wrapped_mode = 1;
313 $self->pushline("$line\n");
314 } else {
315 $line =~ s/%%(.*)$//;
316 }
317 return ($paragraph,$wrapped_mode,$expect_header,$end_of_paragraph);
318}
319
320sub parse_control {
321 my ($self,$line,$ref,$paragraph,$wrapped_mode,$expect_header,$end_of_paragraph) = @_;
322 if ($line =~ m/^([^ :]*): *(.*)$/) {
323 warn wrap_mod("po4a::text", dgettext("po4a", "Unrecognized section: %s"), $paragraph)
324 unless $paragraph eq "";
325 my $tag = $1;
326 my $val = $2;
327 my $t;
328 if ($control{''} or $control{$tag}) {
329 $t = $self->translate($val,
330 $self->{ref},
331 $tag.(defined $self->{controlkey}?", ".$self->{controlkey}:""),
332 "wrap" => 0);
333 } else {
334 $t = $val;
335 }
336 if (not defined $self->{controlkey}) {
337 $self->{controlkey} = "$tag: $val";
338 }
339 $self->pushline("$tag: $t\n");
340 $paragraph="";
341 $wrapped_mode = 1;
342 $self->{bullet} = "";
343 $self->{indent} = " ";
344 } elsif ($line eq " .") {
345 do_paragraph($self,$paragraph,$wrapped_mode,
346 "Long Description".(defined $self->{controlkey}?", ".$self->{controlkey}:""));
347 $paragraph="";
348 $self->pushline($line."\n");
349 $self->{bullet} = "";
350 $self->{indent} = " ";
351 } elsif ($line =~ m/^ Link: +(.*)$/) {
352 do_paragraph($self,$paragraph,$wrapped_mode,
353 "Long Description".(defined $self->{controlkey}?", ".$self->{controlkey}:""));
354 my $link=$1;
355 my $t1 = $self->translate("Link: ",
356 $self->{ref},
357 "Link",
358 "wrap" => 0);
359 my $t2 = $self->translate($link,
360 $self->{ref},
361 "Link".(defined $self->{controlkey}?", ".$self->{controlkey}:""),
362 "wrap" => 0);
363 $self->pushline(" $t1$t2\n");
364 $paragraph="";
365 } elsif (defined $self->{indent} and
366 $line =~ m/^$self->{indent}\S/) {
367 $paragraph .= $line."\n";
368 $self->{type} = "Long Description".(defined $self->{controlkey}?", ".$self->{controlkey}:"");
369 } else {
370 return parse_fallback($self,$line,$ref,$paragraph,$wrapped_mode,$expect_header,$end_of_paragraph);
371 }
372 return ($paragraph,$wrapped_mode,$expect_header,$end_of_paragraph);
373}
374
375my $asciidoc_RE_SECTION_TEMPLATES = "sect1|sect2|sect3|sect4|preface|colophon|dedication|synopsis|index";
376my $asciidoc_RE_STYLE_ADMONITION = "TIP|NOTE|IMPORTANT|WARNING|CAUTION";
377my $asciidoc_RE_STYLE_PARAGRAPH = "normal|literal|verse|quote|listing|abstract|partintro|comment|example|sidebar|source|music|latex|graphviz";
378my $asciidoc_RE_STYLE_NUMBERING = "arabic|loweralpha|upperalpha|lowerroman|upperroman";
379my $asciidoc_RE_STYLE_LIST = "appendix|horizontal|qanda|glossary|bibliography";
380my $asciidoc_RE_STYLES = "$asciidoc_RE_SECTION_TEMPLATES|$asciidoc_RE_STYLE_ADMONITION|$asciidoc_RE_STYLE_PARAGRAPH|$asciidoc_RE_STYLE_NUMBERING|$asciidoc_RE_STYLE_LIST|float";
381
382BEGIN {
383 my $UnicodeGCString_available = 0;
384 $UnicodeGCString_available = 1 if (eval { require Unicode::GCString });
385 eval {
386 sub columns($$$) {
387 my $text = shift;
388 my $encoder = shift;
389 $text = $encoder->decode($text) if (defined($encoder) && $encoder->name ne "ascii");
390 if ($UnicodeGCString_available) {
391 return Unicode::GCString->new($text)->columns();
392 } else {
393 $text =~ s/\n$//s;
394 return length($text) if !(defined($encoder) && $encoder->name ne "ascii");
395 die wrap_mod("po4a::text",
396 dgettext("po4a", "Detection of two line titles failed at %s\nInstall the Unicode::GCString module!"), shift)
397 }
398 }
399 };
400}
401
402sub parse_asciidoc {
403 my ($self,$line,$ref,$paragraph,$wrapped_mode,$expect_header,$end_of_paragraph) = @_;
404 if ((defined $self->{verbatim}) and ($self->{verbatim} == 3)) {
405 # Untranslated blocks
406 $self->pushline($line."\n");
407 if ($line =~ m/^~{4,}$/) {
408 undef $self->{verbatim};
409 undef $self->{type};
410 $wrapped_mode = 1;
411 }
412 } elsif ((defined $self->{verbatim}) and ($self->{verbatim} == 2)) {
413 # CommentBlock
414 if ($line =~ m/^\/{4,}$/) {
415 undef $self->{verbatim};
416 undef $self->{type};
417 $wrapped_mode = 1;
418 } else {
419 push @comments, $line;
420 }
421 } elsif ((not defined($self->{verbatim})) and ($line =~ m/^(\+|--)$/)) {
422 # List Item Continuation or List Block
423 do_paragraph($self,$paragraph,$wrapped_mode);
424 $paragraph="";
425 $self->pushline($line."\n");
426 } elsif ((not defined($self->{verbatim})) and
427 ($line =~ m/^(={2,}|-{2,}|~{2,}|\^{2,}|\+{2,})$/) and
428 (defined($paragraph) )and
429 ($paragraph =~ m/^[^\n]*\n$/s) and
430 (columns($paragraph, $self->{TT}{po_in}{encoder}, $ref) == (length($line)))) {
431 # Found title
432 $wrapped_mode = 0;
433 my $level = $line;
434 $level =~ s/^(.).*$/$1/;
435 $paragraph =~ s/\n$//s;
436 my $t = $self->translate($paragraph,
437 $self->{ref},
438 "Title $level",
439 "comment" => join("\n", @comments),
440 "wrap" => 0);
441 $self->pushline($t."\n");
442 $paragraph="";
443 @comments=();
444 $wrapped_mode = 1;
445 $self->pushline(($level x (columns($t, $self->{TT}{po_in}{encoder}, $ref)))."\n");
446 } elsif ($line =~ m/^(={1,5})( +)(.*?)( +\1)?$/) {
447 my $titlelevel1 = $1;
448 my $titlespaces = $2;
449 my $title = $3;
450 my $titlelevel2 = $4||"";
451 # Found one line title
452 do_paragraph($self,$paragraph,$wrapped_mode);
453 $wrapped_mode = 0;
454 $paragraph="";
455 my $t = $self->translate($title,
456 $self->{ref},
457 "Title $titlelevel1",
458 "comment" => join("\n", @comments),
459 "wrap" => 0);
460 $self->pushline($titlelevel1.$titlespaces.$t.$titlelevel2."\n");
461 @comments=();
462 $wrapped_mode = 1;
463 } elsif ($line =~ m/^(\/{4,}|\+{4,}|-{4,}|\.{4,}|\*{4,}|_{4,}|={4,}|~{4,}|\|={4,})$/) {
464 # Found one delimited block
465 my $t = $line;
466 $t =~ s/^(.).*$/$1/;
467 my $type = "delimited block $t";
468 if (defined $self->{verbatim} and ($self->{type} ne $type)) {
469 $paragraph .= "$line\n";
470 } else {
471 do_paragraph($self,$paragraph,$wrapped_mode);
472 if ( (defined $self->{type})
473 and ($self->{type} eq $type)) {
474 undef $self->{type};
475 undef $self->{verbatim};
476 $wrapped_mode = 1;
477 } else {
478 if ($t eq "\/") {
479 # CommentBlock, should not be treated
480 $self->{verbatim} = 2;
481 } elsif ($t eq "+") {
482 # PassthroughBlock
483 $wrapped_mode = 0;
484 $self->{verbatim} = 1;
485 } elsif ($t eq "-" or $t eq "|") {
486 # ListingBlock
487 $wrapped_mode = 0;
488 $self->{verbatim} = 1;
489 } elsif ($t eq ".") {
490 # LiteralBlock
491 $wrapped_mode = 0;
492 $self->{verbatim} = 1;
493 } elsif ($t eq "*") {
494 # SidebarBlock
495 $wrapped_mode = 1;
496 } elsif ($t eq "_") {
497 # QuoteBlock
498 if ( (defined $self->{type})
499 and ($self->{type} eq "verse")) {
500 $wrapped_mode = 0;
501 $self->{verbatim} = 1;
502 } else {
503 $wrapped_mode = 1;
504 }
505 } elsif ($t eq "=") {
506 # ExampleBlock
507 $wrapped_mode = 1;
508 } elsif ($t eq "~") {
509 # Filter blocks, TBC: not translated
510 $wrapped_mode = 0;
511 $self->{verbatim} = 3;
512 }
513 $self->{type} = $type;
514 }
515 $paragraph="";
516 $self->pushline($line."\n") unless defined($self->{verbatim}) && $self->{verbatim} == 2;
517 }
518 } elsif ((not defined($self->{verbatim})) and ($line =~ m/^\/\/(.*)/)) {
519 # Comment line
520 push @comments, $1;
521 } elsif (not defined $self->{verbatim} and
522 ($line =~ m/^\[\[([^\]]*)\]\]$/)) {
523 # Found BlockId
524 do_paragraph($self,$paragraph,$wrapped_mode);
525 $paragraph="";
526 $wrapped_mode = 1;
527 $self->pushline($line."\n");
528 undef $self->{bullet};
529 undef $self->{indent};
530 } elsif (not defined $self->{verbatim} and
531 ($paragraph eq "") and
532 ($line =~ m/^((?:$asciidoc_RE_STYLE_ADMONITION):\s+)(.*)$/)) {
533 my $type = $1;
534 my $text = $2;
535 do_paragraph($self,$paragraph,$wrapped_mode);
536 $paragraph=$text."\n";
537 $wrapped_mode = 1;
538 $self->pushline($type);
539 undef $self->{bullet};
540 undef $self->{indent};
541 } elsif (not defined $self->{verbatim} and
542 ($line =~ m/^\[($asciidoc_RE_STYLES)\]$/)) {
543 my $type = $1;
544 do_paragraph($self,$paragraph,$wrapped_mode);
545 $paragraph="";
546 $wrapped_mode = 1;
547 $self->pushline($line."\n");
548 if ($type eq "verse") {
549 $wrapped_mode = 0;
550 }
551 undef $self->{bullet};
552 undef $self->{indent};
553 } elsif (not defined $self->{verbatim} and
554 ($line =~ m/^\[(['"]?)(verse|quote)\1, +(.*)\]$/)) {
555 my $quote = $1 || '';
556 my $type = $2;
557 my $arg = $3;
558 do_paragraph($self,$paragraph,$wrapped_mode);
559 $paragraph="";
560 my $t = $self->translate($arg,
561 $self->{ref},
562 "$type",
563 "comment" => join("\n", @comments),
564 "wrap" => 0);
565 $self->pushline("[$quote$type$quote, $t]\n");
566 @comments=();
567 $wrapped_mode = 1;
568 if ($type eq "verse") {
569 $wrapped_mode = 0;
570 }
571 $self->{type} = $type;
572 undef $self->{bullet};
573 undef $self->{indent};
574 } elsif (not defined $self->{verbatim} and
575 ($line =~ m/^\[icon="(.*)"\]$/)) {
576 my $arg = $1;
577 do_paragraph($self,$paragraph,$wrapped_mode);
578 $paragraph="";
579 my $t = $self->translate($arg,
580 $self->{ref},
581 "icon",
582 "comment" => join("\n", @comments),
583 "wrap" => 0);
584 $self->pushline("[icon=\"$t\"]\n");
585 @comments=();
586 $wrapped_mode = 1;
587 undef $self->{bullet};
588 undef $self->{indent};
589 } elsif (not defined $self->{verbatim} and
590 ($line =~ m/^\[icons=None, +caption="(.*)"\]$/)) {
591 my $arg = $1;
592 do_paragraph($self,$paragraph,$wrapped_mode);
593 $paragraph="";
594 my $t = $self->translate($arg,
595 $self->{ref},
596 "caption",
597 "comment" => join("\n", @comments),
598 "wrap" => 0);
599 $self->pushline("[icons=None, caption=\"$t\"]\n");
600 @comments=();
601 $wrapped_mode = 1;
602 undef $self->{bullet};
603 undef $self->{indent};
604 } elsif (not defined $self->{verbatim} and
605 ($line =~ m/^(\s*)([*_+`'#[:alnum:]].*)((?:::|;;|\?\?|:-)(?: *\\)?)$/)) {
606 my $indent = $1;
607 my $label = $2;
608 my $labelend = $3;
609 # Found labeled list
610 do_paragraph($self,$paragraph,$wrapped_mode);
611 $paragraph="";
612 $wrapped_mode = 1;
613 $self->{bullet} = "";
614 $self->{indent} = $indent;
615 my $t = $self->translate($label,
616 $self->{ref},
617 "Labeled list",
618 "comment" => join("\n", @comments),
619 "wrap" => 0);
620 $self->pushline("$indent$t$labelend\n");
621 @comments=();
622 } elsif (not defined $self->{verbatim} and
623 ($line =~ m/^(\s*)(\S.*)((?:::|;;)\s+)(.*)$/)) {
624 my $indent = $1;
625 my $label = $2;
626 my $labelend = $3;
627 my $labeltext = $4;
628 # Found Horizontal Labeled Lists
629 do_paragraph($self,$paragraph,$wrapped_mode);
630 $paragraph=$labeltext."\n";
631 $wrapped_mode = 1;
632 $self->{bullet} = "";
633 $self->{indent} = $indent;
634 my $t = $self->translate($label,
635 $self->{ref},
636 "Labeled list",
637 "comment" => join("\n", @comments),
638 "wrap" => 0);
639 $self->pushline("$indent$t$labelend");
640 @comments=();
641 } elsif (not defined $self->{verbatim} and
642 ($line =~ m/^\:(\S.*?)(:\s*)(.*)$/)) {
643 my $attrname = $1;
644 my $attrsep = $2;
645 my $attrvalue = $3;
646 # Found a Attribute entry
647 do_paragraph($self,$paragraph,$wrapped_mode);
648 $paragraph="";
649 $wrapped_mode = 1;
650 undef $self->{bullet};
651 undef $self->{indent};
652 my $t = $self->translate($attrvalue,
653 $self->{ref},
654 "Attribute :$attrname:",
655 "comment" => join("\n", @comments),
656 "wrap" => 0);
657 $self->pushline(":$attrname$attrsep$t\n");
658 @comments=();
659 } elsif (not defined $self->{verbatim} and
660 ($line !~ m/^\.\./) and ($line =~ m/^\.(\S.*)$/)) {
661 my $title = $1;
662 # Found block title
663 do_paragraph($self,$paragraph,$wrapped_mode);
664 $paragraph="";
665 $wrapped_mode = 1;
666 undef $self->{bullet};
667 undef $self->{indent};
668 my $t = $self->translate($title,
669 $self->{ref},
670 "Block title",
671 "comment" => join("\n", @comments),
672 "wrap" => 0);
673 $self->pushline(".$t\n");
674 @comments=();
675 } elsif (not defined $self->{verbatim} and
676 ($line =~ m/^(\s*)((?:[-*o+]|(?:[0-9]+[.\)])|(?:[a-z][.\)])|\([0-9]+\)|\.|\.\.)\s+)(.*)$/)) {
677 my $indent = $1||"";
678 my $bullet = $2;
679 my $text = $3;
680 do_paragraph($self,$paragraph,$wrapped_mode);
681 $paragraph = $text."\n";
682 $self->{indent} = $indent;
683 $self->{bullet} = $bullet;
684 } elsif (not defined $self->{verbatim} and
685 ($line =~ m/^((?:<?[0-9]+)?> +)(.*)$/)) {
686 my $bullet = $1;
687 my $text = $2;
688 do_paragraph($self,$paragraph,$wrapped_mode);
689 $paragraph = $text."\n";
690 $self->{indent} = "";
691 $self->{bullet} = $bullet;
692 } elsif (not defined $self->{verbatim} and
693 (defined $self->{bullet} and $line =~ m/^(\s+)(.*)$/)) {
694 my $indent = $1;
695 my $text = $2;
696 if (not defined $self->{indent}) {
697 $paragraph .= $text."\n";
698 $self->{indent} = $indent;
699 } elsif (length($paragraph) and (length($self->{bullet}) + length($self->{indent}) == length($indent))) {
700 $paragraph .= $text."\n";
701 } else {
702 do_paragraph($self,$paragraph,$wrapped_mode);
703 $paragraph = $text."\n";
704 $self->{indent} = $indent;
705 $self->{bullet} = "";
706 }
707 } else {
708 return parse_fallback($self,$line,$ref,$paragraph,$wrapped_mode,$expect_header,$end_of_paragraph);
709 }
710 return ($paragraph,$wrapped_mode,$expect_header,$end_of_paragraph);
711}
712
713sub parse_markdown {
714 my ($self,$line,$ref,$paragraph,$wrapped_mode,$expect_header,$end_of_paragraph) = @_;
715 if (($line =~ m/^(={4,}|-{4,})$/) and
716 (defined($paragraph) ) and
717 ($paragraph =~ m/^[^\n]*\n$/s) and
718 (length($paragraph) == (length($line)+1))) {
719 # XXX: There can be any number of underlining according
720 # to the documentation. This detection, which avoid
721 # translating the formatting, is only supported if
722 # the underlining has the same size as the header text.
723 # Found title
724 $wrapped_mode = 0;
725 my $level = $line;
726 $level =~ s/^(.).*$/$1/;
727 my $t = $self->translate($paragraph,
728 $self->{ref},
729 "Title $level",
730 "wrap" => 0);
731 $self->pushline($t);
732 $paragraph="";
733 $wrapped_mode = 1;
734 $self->pushline(($level x (length($t)-1))."\n");
735 } elsif ($line =~ m/^(#{1,6})( +)(.*?)( +\1)?$/) {
736 my $titlelevel1 = $1;
737 my $titlespaces = $2;
738 my $title = $3;
739 my $titlelevel2 = $4||"";
740 # Found one line title
741 do_paragraph($self,$paragraph,$wrapped_mode);
742 $wrapped_mode = 0;
743 $paragraph="";
744 my $t = $self->translate($title,
745 $self->{ref},
746 "Title $titlelevel1",
747 "wrap" => 0);
748 $self->pushline($titlelevel1.$titlespaces.$t.$titlelevel2."\n");
749 $wrapped_mode = 1;
750 } elsif (($paragraph eq "") and
751 ($line =~ /^((\*\s*){3,}|(-\s*){3,}|(_\s*){3,})$/)) {
752 # Horizontal rule
753 $wrapped_mode = 1;
754 $self->pushline($line."\n");
755 } elsif ( $line =~ /^\s*\[\[\!\S+\s*$/ # macro begin
756 or $line =~ /^\s*"""\s*\]\]\s*$/) { # """ textblock inside macro end
757 # Avoid translating Markdown lines containing only markup
758 do_paragraph($self,$paragraph,$wrapped_mode);
759 $paragraph="";
760 $wrapped_mode = 1;
761 $self->pushline("$line\n");
762 } elsif ( $line =~ /^#/ # headline
763 or $line =~ /^\s*\[\[\!\S[^\]]*\]\]\s*$/) { # sole macro
764 # Preserve some Markdown markup as a single line
765 do_paragraph($self,$paragraph,$wrapped_mode);
766 $paragraph="$line\n";
767 $wrapped_mode = 0;
768 $end_of_paragraph = 1;
769 } elsif ($line =~ /^"""/) { # """ textblock inside macro end
770 # Markdown markup needing separation _before_ this line
771 do_paragraph($self,$paragraph,$wrapped_mode);
772 $paragraph="$line\n";
773 $wrapped_mode = 1;
774 } else {
775 return parse_fallback($self,$line,$ref,$paragraph,$wrapped_mode,$expect_header,$end_of_paragraph);
776 }
777 return ($paragraph,$wrapped_mode,$expect_header,$end_of_paragraph);
778}
779
780sub parse {
781 my $self = shift;
782 my ($line,$ref);
783 my $paragraph="";
784 my $wrapped_mode = 1;
785 my $expect_header = 1;
786 my $end_of_paragraph = 0;
787 ($line,$ref)=$self->shiftline();
788 my $file = $ref;
789 $file =~ s/:[0-9]+$// if defined($line);
790 while (defined($line)) {
791 $ref =~ m/^(.*):[0-9]+$/;
792 if ($1 ne $file) {
793 $file = $1;
794 do_paragraph($self,$paragraph,$wrapped_mode);
795 $paragraph="";
796 $wrapped_mode = 1;
797 $expect_header = 1;
798 }
799
800 chomp($line);
801 $self->{ref}="$ref";
802 ($paragraph,$wrapped_mode,$expect_header,$end_of_paragraph) = &$parse_func($self,$line,$ref,$paragraph,$wrapped_mode,$expect_header,$end_of_paragraph);
803 # paragraphs starting by a bullet, or numbered
804 # or paragraphs with a line containing many consecutive spaces
805 # (more than 3)
806 # are considered as verbatim paragraphs
807 $wrapped_mode = 0 if ( $paragraph =~ m/^(\*|[0-9]+[.)] )/s
808 or $paragraph =~ m/[ \t][ \t][ \t]/s);
809 $wrapped_mode = 0 if ( $tabs eq "verbatim"
810 and $paragraph =~ m/\t/s);
811 if ($markdown) {
812 # Some Markdown markup can (or might) not survive wrapping
813 $wrapped_mode = 0 if (
814 $paragraph =~ /^>/ms # blockquote
815 or $paragraph =~ /^( {8}|\t)/ms # monospaced
816 or $paragraph =~ /^\$(\S+[{}]\S*\s*)+/ms # Xapian macro
817 or $paragraph =~ /<(?![a-z]+[:@])/ms # maybe html (tags but not wiki <URI>)
818 or $paragraph =~ /^[^<]+>/ms # maybe html (tag with vertical space)
819 or $paragraph =~ /\S $/ms # explicit newline
820 or $paragraph =~ /\[\[\!\S[^\]]+$/ms # macro begin
821 );
822 }
823 if ($end_of_paragraph) {
824 do_paragraph($self,$paragraph,$wrapped_mode);
825 $paragraph="";
826 $wrapped_mode = 1;
827 $end_of_paragraph = 0;
828 }
829 ($line,$ref)=$self->shiftline();
830 }
831 if (length $paragraph) {
832 do_paragraph($self,$paragraph,$wrapped_mode);
833 }
834}
835
836sub do_paragraph {
837 my ($self, $paragraph, $wrap) = (shift, shift, shift);
838 my $type = shift || $self->{type} || "Plain text";
839 return if ($paragraph eq "");
840
841# DEBUG
842# my $b;
843# if (defined $self->{bullet}) {
844# $b = $self->{bullet};
845# } else {
846# $b = "UNDEF";
847# }
848# $type .= " verbatim: '".($self->{verbatim}||"NONE")."' bullet: '$b' indent: '".($self->{indent}||"NONE")."' type: '".($self->{type}||"NONE")."'";
849
850 if ($bullets and not $wrap and not defined $self->{verbatim}) {
851 # Detect bullets
852 # | * blah blah
853 # |<spaces> blah
854 # | ^-- aligned
855 # <empty line>
856 #
857 # Other bullets supported:
858 # - blah o blah + blah
859 # 1. blah 1) blah (1) blah
860TEST_BULLET:
861 if ($paragraph =~ m/^(\s*)((?:[-*o+]|([0-9]+[.\)])|\([0-9]+\))\s+)([^\n]*\n)(.*)$/s) {
862 my $para = $5;
863 my $bullet = $2;
864 my $indent1 = $1;
865 my $indent2 = "$1".(' ' x length $bullet);
866 my $text = $4;
867 while ($para !~ m/$indent2(?:[-*o+]|([0-9]+[.\)])|\([0-9]+\))\s+/
868 and $para =~ s/^$indent2(\S[^\n]*\n)//s) {
869 $text .= $1;
870 }
871 # TODO: detect if a line starts with the same bullet
872 if ($text !~ m/\S[ \t][ \t][ \t]+\S/s) {
873 my $bullet_regex = quotemeta($indent1.$bullet);
874 $bullet_regex =~ s/[0-9]+/\\d\+/;
875 if ($para eq '' or $para =~ m/^$bullet_regex\S/s) {
876 my $trans = $self->translate($text,
877 $self->{ref},
878 "Bullet: '$indent1$bullet'",
879 "wrap" => 1,
880 "wrapcol" => - (length $indent2));
881 $trans =~ s/^/$indent1$bullet/s;
882 $trans =~ s/\n(.)/\n$indent2$1/sg;
883 $self->pushline( $trans."\n" );
884 if ($para eq '') {
885 return;
886 } else {
887 # Another bullet
888 $paragraph = $para;
889 goto TEST_BULLET;
890 }
891 }
892 }
893 }
894 }
895
896 my $end = "";
897 if ($wrap) {
898 $paragraph =~ s/^(.*?)(\n*)$/$1/s;
899 $end = $2 || "";
900 }
901 my $t = $self->translate($paragraph,
902 $self->{ref},
903 $type,
904 "comment" => join("\n", @comments),
905 "wrap" => $wrap);
906 @comments = ();
907 if (defined $self->{bullet}) {
908 my $bullet = $self->{bullet};
909 my $indent1 = $self->{indent};
910 my $indent2 = $indent1.(' ' x length($bullet));
911 $t =~ s/^/$indent1$bullet/s;
912 $t =~ s/\n(.)/\n$indent2$1/sg;
913 }
914 $self->pushline( $t.$end );
915}
916
9171;
918
919=head1 STATUS OF THIS MODULE
920
921Tested successfully on simple text files and NEWS.Debian files.
922
923=head1 AUTHORS
924
925 Nicolas François <nicolas.francois@centraliens.net>
926
927=head1 COPYRIGHT AND LICENSE
928
929 Copyright 2005-2008 by Nicolas FRANÇOIS <nicolas.francois@centraliens.net>.
930
931This program is free software; you may redistribute it and/or modify it
932under the terms of GPL (see the COPYING file).
933

Archive Download this file

Revision: 2790