Chameleon

Chameleon Svn Source Tree

Root/trunk/package/bin/po4a/lib/Locale/Po4a/Text.pm

1#!/usr/bin/perl -w
2
3# Po4a::Text.pm
4#
5# extract and translate translatable strings from a text documents
6#
7# This program is free software; you can redistribute it and/or modify
8# it under the terms of the GNU General Public License as published by
9# the Free Software Foundation; either version 2 of the License, or
10# (at your option) any later version.
11#
12# This program is distributed in the hope that it will be useful,
13# but WITHOUT ANY WARRANTY; without even the implied warranty of
14# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15# GNU General Public License for more details.
16#
17# You should have received a copy of the GNU General Public License
18# along with this program; if not, write to the Free Software
19# Foundation, Inc.,
20# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21#
22########################################################################
23
24=encoding UTF-8
25
26=head1 NAME
27
28Locale::Po4a::Text - convert text documents from/to PO files
29
30=head1 DESCRIPTION
31
32The po4a (PO for anything) project goal is to ease translations (and more
33interestingly, the maintenance of translations) using gettext tools on
34areas where they were not expected like documentation.
35
36Locale::Po4a::Text is a module to help the translation of text documents into
37other [human] languages.
38
39Paragraphs are split on empty lines (or lines containing only spaces or
40tabulations).
41
42If a paragraph contains a line starting by a space (or tabulation), this
43paragraph won't be rewrapped.
44
45=cut
46
47package Locale::Po4a::Text;
48
49use 5.006;
50use strict;
51use warnings;
52
53require Exporter;
54use vars qw(@ISA @EXPORT);
55@ISA = qw(Locale::Po4a::TransTractor);
56@EXPORT = qw();
57
58use Locale::Po4a::TransTractor;
59use Locale::Po4a::Common;
60
61=head1 OPTIONS ACCEPTED BY THIS MODULE
62
63These are this module's particular options:
64
65=over
66
67=item B<nobullets>
68
69Deactivate detection of bullets.
70
71By default, when a bullet is detected, the bullet paragraph is not considered
72as a verbatim paragraph (with the no-wrap flag in the PO file), but the module
73rewraps this paragraph in the generated PO file and in the translation.
74
75=cut
76
77my $bullets = 1;
78
79=item B<tabs=>I<mode>
80
81Specify how tabulations shall be handled. The I<mode> can be any of:
82
83=over
84
85=item B<split>
86
87Lines with tabulations introduce breaks in the current paragraph.
88
89=item B<verbatim>
90
91Paragraph containing tabulations will not be re-wrapped.
92
93=back
94
95By default, tabulations are considered as spaces.
96
97=cut
98
99my $tabs = "";
100
101=item B<breaks=>I<regex>
102
103A regular expression matching lines which introduce breaks.
104The regular expression will be anchored so that the whole line must match.
105
106=cut
107
108my $breaks;
109
110=item B<debianchangelog>
111
112Handle the header and footer of
113released versions, which only contain non translatable informations.
114
115=cut
116
117my $debianchangelog = 0;
118
119=item B<fortunes>
120
121Handle the fortunes format, which separate fortunes with a line which
122consists in '%' or '%%', and use '%%' as the beginning of a comment.
123
124=cut
125
126my $fortunes = 0;
127
128=item B<markdown>
129
130Handle some special markup in Markdown-formatted texts.
131
132=cut
133
134my $markdown = 0;
135
136=item B<asciidoc>
137
138Handle documents in the AsciiDoc format.
139
140=cut
141
142my $asciidoc = 0;
143
144=item B<control>[B<=>I<taglist>]
145
146Handle control files.
147A comma-separated list of tags to be translated can be provided.
148
149=cut
150
151my %control = ();
152
153my $parse_func = \&parse_fallback;
154
155my @comments = ();
156
157=back
158
159=cut
160
161sub initialize {
162 my $self = shift;
163 my %options = @_;
164
165 $self->{options}{'control'} = "";
166 $self->{options}{'asciidoc'} = 1;
167 $self->{options}{'breaks'} = 1;
168 $self->{options}{'debianchangelog'} = 1;
169 $self->{options}{'debug'} = 1;
170 $self->{options}{'fortunes'} = 1;
171 $self->{options}{'markdown'} = 1;
172 $self->{options}{'nobullets'} = 1;
173 $self->{options}{'tabs'} = 1;
174 $self->{options}{'verbose'} = 1;
175
176 foreach my $opt (keys %options) {
177 die wrap_mod("po4a::text",
178 dgettext("po4a", "Unknown option: %s"), $opt)
179 unless exists $self->{options}{$opt};
180 $self->{options}{$opt} = $options{$opt};
181 }
182
183 if (defined $options{'nobullets'}) {
184 $bullets = 0;
185 }
186
187 if (defined $options{'tabs'}) {
188 $tabs = $options{'tabs'};
189 }
190
191 if (defined $options{'breaks'}) {
192 $breaks = $options{'breaks'};
193 }
194
195 if (defined $options{'debianchangelog'}) {
196 $parse_func = \&parse_debianchangelog;
197 }
198
199 if (defined $options{'fortunes'}) {
200 $parse_func = \&parse_fortunes;
201 }
202
203 if (defined $options{'markdown'}) {
204 $parse_func = \&parse_markdown;
205 $markdown=1;
206 }
207
208 if (defined $options{'asciidoc'}) {
209 $parse_func = \&parse_asciidoc;
210 $asciidoc=1;
211 }
212
213 if (defined $options{'control'}) {
214 $parse_func = \&parse_control;
215 if ($options{'control'} eq "1") {
216 $control{''}=1;
217 } else {
218 foreach my $tag (split(',',$options{'control'})) {
219 $control{$tag}=1;
220 }
221 }
222 }
223}
224
225sub parse_fallback {
226 my ($self,$line,$ref,$paragraph,$wrapped_mode,$expect_header,$end_of_paragraph) = @_;
227 if ( ($line =~ /^\s*$/)
228 or ( defined $breaks
229 and $line =~ m/^$breaks$/)) {
230 # Break paragraphs on lines containing only spaces
231 do_paragraph($self,$paragraph,$wrapped_mode);
232 $paragraph="";
233 $wrapped_mode = 1 unless defined($self->{verbatim});
234 $self->pushline($line."\n");
235 undef $self->{controlkey};
236 } elsif ($line =~ /^-- $/) {
237 # Break paragraphs on email signature hint
238 do_paragraph($self,$paragraph,$wrapped_mode);
239 $paragraph="";
240 $wrapped_mode = 1;
241 $self->pushline($line."\n");
242 } elsif ( $line =~ /^=+$/
243 or $line =~ /^_+$/
244 or $line =~ /^-+$/) {
245 $wrapped_mode = 0;
246 $paragraph .= $line."\n";
247 do_paragraph($self,$paragraph,$wrapped_mode);
248 $paragraph="";
249 $wrapped_mode = 1;
250 } elsif ($tabs eq "split" and $line =~ m/\t/ and $paragraph !~ m/\t/s) {
251 $wrapped_mode = 0;
252 do_paragraph($self,$paragraph,$wrapped_mode);
253 $paragraph = "$line\n";
254 $wrapped_mode = 0;
255 } elsif ($tabs eq "split" and $line !~ m/\t/ and $paragraph =~ m/\t/s) {
256 do_paragraph($self,$paragraph,$wrapped_mode);
257 $paragraph = "$line\n";
258 $wrapped_mode = 1;
259 } else {
260 if ($line =~ /^\s/) {
261 # A line starting by a space indicates a non-wrap
262 # paragraph
263 $wrapped_mode = 0;
264 }
265 if ($markdown and
266 ( $line =~ /\S $/ # explicit newline
267 or $line =~ /"""$/)) { # """ textblock inside macro begin
268 # Markdown markup needing separation _after_ this line
269 $end_of_paragraph = 1;
270 } else {
271 undef $self->{bullet};
272 undef $self->{indent};
273 }
274# TODO: comments
275 $paragraph .= $line."\n";
276 }
277 return ($paragraph,$wrapped_mode,$expect_header,$end_of_paragraph);
278}
279
280sub parse_debianchangelog {
281 my ($self,$line,$ref,$paragraph,$wrapped_mode,$expect_header,$end_of_paragraph) = @_;
282 if ($expect_header and
283 $line =~ /^(\w[-+0-9a-z.]*)\ \(([^\(\) \t]+)\) # src, version
284 \s+([-+0-9a-z.]+); # distribution
285 \s*urgency\s*\=\s*(.*\S)\s*$/ix) { #
286 do_paragraph($self,$paragraph,$wrapped_mode);
287 $paragraph="";
288 $self->pushline("$line\n");
289 $expect_header=0;
290 } elsif ($line =~ m/^ \-\- (.*) <(.*)> ((\w+\,\s*)?\d{1,2}\s+\w+\s+\d{4}\s+\d{1,2}:\d\d:\d\d\s+[-+]\d{4}(\s+\([^\\\(\)]\))?)$/) {
291 # Found trailer
292 do_paragraph($self,$paragraph,$wrapped_mode);
293 $paragraph="";
294 $self->pushline("$line\n");
295 $expect_header=1;
296 } else {
297 return parse_fallback($self,$line,$ref,$paragraph,$wrapped_mode,$expect_header,$end_of_paragraph);
298 }
299 return ($paragraph,$wrapped_mode,$expect_header,$end_of_paragraph);
300}
301
302sub parse_fortunes {
303 my ($self,$line,$ref,$paragraph,$wrapped_mode,$expect_header,$end_of_paragraph) = @_;
304 if ($line =~ m/^%%?\s*$/) {
305 # Found end of fortune
306 do_paragraph($self,$paragraph,$wrapped_mode);
307 $self->pushline("\n") unless ( $wrapped_mode == 0
308 or $paragraph eq "");
309 $paragraph="";
310 $wrapped_mode = 1;
311 $self->pushline("$line\n");
312 } else {
313 $line =~ s/%%(.*)$//;
314 }
315 return ($paragraph,$wrapped_mode,$expect_header,$end_of_paragraph);
316}
317
318sub parse_control {
319 my ($self,$line,$ref,$paragraph,$wrapped_mode,$expect_header,$end_of_paragraph) = @_;
320 if ($line =~ m/^([^ :]*): *(.*)$/) {
321 warn "Unrecognized section: '$paragraph'\n"
322 unless $paragraph eq "";
323 my $tag = $1;
324 my $val = $2;
325 my $t;
326 if ($control{''} or $control{$tag}) {
327 $t = $self->translate($val,
328 $self->{ref},
329 $tag.(defined $self->{controlkey}?", ".$self->{controlkey}:""),
330 "wrap" => 0);
331 } else {
332 $t = $val;
333 }
334 if (not defined $self->{controlkey}) {
335 $self->{controlkey} = "$tag: $val";
336 }
337 $self->pushline("$tag: $t\n");
338 $paragraph="";
339 $wrapped_mode = 1;
340 $self->{bullet} = "";
341 $self->{indent} = " ";
342 } elsif ($line eq " .") {
343 do_paragraph($self,$paragraph,$wrapped_mode,
344 "Long Description".(defined $self->{controlkey}?", ".$self->{controlkey}:""));
345 $paragraph="";
346 $self->pushline($line."\n");
347 $self->{bullet} = "";
348 $self->{indent} = " ";
349 } elsif ($line =~ m/^ Link: +(.*)$/) {
350 do_paragraph($self,$paragraph,$wrapped_mode,
351 "Long Description".(defined $self->{controlkey}?", ".$self->{controlkey}:""));
352 my $link=$1;
353 my $t1 = $self->translate("Link: ",
354 $self->{ref},
355 "Link",
356 "wrap" => 0);
357 my $t2 = $self->translate($link,
358 $self->{ref},
359 "Link".(defined $self->{controlkey}?", ".$self->{controlkey}:""),
360 "wrap" => 0);
361 $self->pushline(" $t1$t2\n");
362 $paragraph="";
363 } elsif (defined $self->{indent} and
364 $line =~ m/^$self->{indent}\S/) {
365 $paragraph .= $line."\n";
366 $self->{type} = "Long Description".(defined $self->{controlkey}?", ".$self->{controlkey}:"");
367 } else {
368 return parse_fallback($self,$line,$ref,$paragraph,$wrapped_mode,$expect_header,$end_of_paragraph);
369 }
370 return ($paragraph,$wrapped_mode,$expect_header,$end_of_paragraph);
371}
372
373my $asciidoc_RE_SECTION_TEMPLATES = "sect1|sect2|sect3|sect4|preface|colophon|dedication|synopsis|index";
374my $asciidoc_RE_STYLE_ADMONITION = "TIP|NOTE|IMPORTANT|WARNING|CAUTION";
375my $asciidoc_RE_STYLE_PARAGRAPH = "normal|literal|verse|quote|listing|abstract|partintro|comment|example|sidebar|source|music|latex|graphviz";
376my $asciidoc_RE_STYLE_NUMBERING = "arabic|loweralpha|upperalpha|lowerroman|upperroman";
377my $asciidoc_RE_STYLE_LIST = "appendix|horizontal|qanda|glossary|bibliography";
378my $asciidoc_RE_STYLES = "$asciidoc_RE_SECTION_TEMPLATES|$asciidoc_RE_STYLE_ADMONITION|$asciidoc_RE_STYLE_PARAGRAPH|$asciidoc_RE_STYLE_NUMBERING|$asciidoc_RE_STYLE_LIST|float";
379
380BEGIN {
381 my $UnicodeGCString_available = 0;
382 $UnicodeGCString_available = 1 if (eval { require Unicode::GCString });
383 eval {
384 sub columns($$$) {
385 my $text = shift;
386 my $encoder = shift;
387 $text = $encoder->decode($text) if (defined($encoder) && $encoder->name ne "ascii");
388 if ($UnicodeGCString_available) {
389 return Unicode::GCString->new($text)->columns();
390 } else {
391 $text =~ s/\n$//s;
392 return length($text) if !(defined($encoder) && $encoder->name ne "ascii");
393 die wrap_mod("po4a::text",
394 dgettext("po4a", "Detection of two line titles failed at %s\nInstall the Unicode::GCString module!"), shift)
395 }
396 }
397 };
398}
399
400sub parse_asciidoc {
401 my ($self,$line,$ref,$paragraph,$wrapped_mode,$expect_header,$end_of_paragraph) = @_;
402 if ((defined $self->{verbatim}) and ($self->{verbatim} == 3)) {
403 # Untranslated blocks
404 $self->pushline($line."\n");
405 if ($line =~ m/^~{4,}$/) {
406 undef $self->{verbatim};
407 undef $self->{type};
408 $wrapped_mode = 1;
409 }
410 } elsif ((defined $self->{verbatim}) and ($self->{verbatim} == 2)) {
411 # CommentBlock
412 if ($line =~ m/^\/{4,}$/) {
413 undef $self->{verbatim};
414 undef $self->{type};
415 $wrapped_mode = 1;
416 } else {
417 push @comments, $line;
418 }
419 } elsif ((not defined($self->{verbatim})) and ($line =~ m/^(\+|--)$/)) {
420 # List Item Continuation or List Block
421 do_paragraph($self,$paragraph,$wrapped_mode);
422 $paragraph="";
423 $self->pushline($line."\n");
424 } elsif ((not defined($self->{verbatim})) and
425 ($line =~ m/^(={2,}|-{2,}|~{2,}|\^{2,}|\+{2,})$/) and
426 (defined($paragraph) )and
427 ($paragraph =~ m/^[^\n]*\n$/s) and
428 (columns($paragraph, $self->{TT}{po_in}{encoder}, $ref) == (length($line)))) {
429 # Found title
430 $wrapped_mode = 0;
431 my $level = $line;
432 $level =~ s/^(.).*$/$1/;
433 $paragraph =~ s/\n$//s;
434 my $t = $self->translate($paragraph,
435 $self->{ref},
436 "Title $level",
437 "comment" => join("\n", @comments),
438 "wrap" => 0);
439 $self->pushline($t."\n");
440 $paragraph="";
441 @comments=();
442 $wrapped_mode = 1;
443 $self->pushline(($level x (columns($t, $self->{TT}{po_in}{encoder}, $ref)))."\n");
444 } elsif ($line =~ m/^(={1,5})( +)(.*?)( +\1)?$/) {
445 my $titlelevel1 = $1;
446 my $titlespaces = $2;
447 my $title = $3;
448 my $titlelevel2 = $4||"";
449 # Found one line title
450 do_paragraph($self,$paragraph,$wrapped_mode);
451 $wrapped_mode = 0;
452 $paragraph="";
453 my $t = $self->translate($title,
454 $self->{ref},
455 "Title $titlelevel1",
456 "comment" => join("\n", @comments),
457 "wrap" => 0);
458 $self->pushline($titlelevel1.$titlespaces.$t.$titlelevel2."\n");
459 @comments=();
460 $wrapped_mode = 1;
461 } elsif ($line =~ m/^(\/{4,}|\+{4,}|-{4,}|\.{4,}|\*{4,}|_{4,}|={4,}|~{4,}|\|={4,})$/) {
462 # Found one delimited block
463 my $t = $line;
464 $t =~ s/^(.).*$/$1/;
465 my $type = "delimited block $t";
466 if (defined $self->{verbatim} and ($self->{type} ne $type)) {
467 $paragraph .= "$line\n";
468 } else {
469 do_paragraph($self,$paragraph,$wrapped_mode);
470 if ( (defined $self->{type})
471 and ($self->{type} eq $type)) {
472 undef $self->{type};
473 undef $self->{verbatim};
474 $wrapped_mode = 1;
475 } else {
476 if ($t eq "\/") {
477 # CommentBlock, should not be treated
478 $self->{verbatim} = 2;
479 } elsif ($t eq "+") {
480 # PassthroughBlock
481 $wrapped_mode = 0;
482 $self->{verbatim} = 1;
483 } elsif ($t eq "-" or $t eq "|") {
484 # ListingBlock
485 $wrapped_mode = 0;
486 $self->{verbatim} = 1;
487 } elsif ($t eq ".") {
488 # LiteralBlock
489 $wrapped_mode = 0;
490 $self->{verbatim} = 1;
491 } elsif ($t eq "*") {
492 # SidebarBlock
493 $wrapped_mode = 1;
494 } elsif ($t eq "_") {
495 # QuoteBlock
496 if ( (defined $self->{type})
497 and ($self->{type} eq "verse")) {
498 $wrapped_mode = 0;
499 $self->{verbatim} = 1;
500 } else {
501 $wrapped_mode = 1;
502 }
503 } elsif ($t eq "=") {
504 # ExampleBlock
505 $wrapped_mode = 1;
506 } elsif ($t eq "~") {
507 # Filter blocks, TBC: not translated
508 $wrapped_mode = 0;
509 $self->{verbatim} = 3;
510 }
511 $self->{type} = $type;
512 }
513 $paragraph="";
514 $self->pushline($line."\n") unless defined($self->{verbatim}) && $self->{verbatim} == 2;
515 }
516 } elsif ((not defined($self->{verbatim})) and ($line =~ m/^\/\/(.*)/)) {
517 # Comment line
518 push @comments, $1;
519 } elsif (not defined $self->{verbatim} and
520 ($line =~ m/^\[\[([^\]]*)\]\]$/)) {
521 # Found BlockId
522 do_paragraph($self,$paragraph,$wrapped_mode);
523 $paragraph="";
524 $wrapped_mode = 1;
525 $self->pushline($line."\n");
526 undef $self->{bullet};
527 undef $self->{indent};
528 } elsif (not defined $self->{verbatim} and
529 ($paragraph eq "") and
530 ($line =~ m/^((?:$asciidoc_RE_STYLE_ADMONITION):\s+)(.*)$/)) {
531 my $type = $1;
532 my $text = $2;
533 do_paragraph($self,$paragraph,$wrapped_mode);
534 $paragraph=$text."\n";
535 $wrapped_mode = 1;
536 $self->pushline($type);
537 undef $self->{bullet};
538 undef $self->{indent};
539 } elsif (not defined $self->{verbatim} and
540 ($line =~ m/^\[($asciidoc_RE_STYLES)\]$/)) {
541 my $type = $1;
542 do_paragraph($self,$paragraph,$wrapped_mode);
543 $paragraph="";
544 $wrapped_mode = 1;
545 $self->pushline($line."\n");
546 if ($type eq "verse") {
547 $wrapped_mode = 0;
548 }
549 undef $self->{bullet};
550 undef $self->{indent};
551 } elsif (not defined $self->{verbatim} and
552 ($line =~ m/^\[(['"]?)(verse|quote)\1, +(.*)\]$/)) {
553 my $quote = $1 || '';
554 my $type = $2;
555 my $arg = $3;
556 do_paragraph($self,$paragraph,$wrapped_mode);
557 $paragraph="";
558 my $t = $self->translate($arg,
559 $self->{ref},
560 "$type",
561 "comment" => join("\n", @comments),
562 "wrap" => 0);
563 $self->pushline("[$quote$type$quote, $t]\n");
564 @comments=();
565 $wrapped_mode = 1;
566 if ($type eq "verse") {
567 $wrapped_mode = 0;
568 }
569 $self->{type} = $type;
570 undef $self->{bullet};
571 undef $self->{indent};
572 } elsif (not defined $self->{verbatim} and
573 ($line =~ m/^\[icon="(.*)"\]$/)) {
574 my $arg = $1;
575 do_paragraph($self,$paragraph,$wrapped_mode);
576 $paragraph="";
577 my $t = $self->translate($arg,
578 $self->{ref},
579 "icon",
580 "comment" => join("\n", @comments),
581 "wrap" => 0);
582 $self->pushline("[icon=\"$t\"]\n");
583 @comments=();
584 $wrapped_mode = 1;
585 undef $self->{bullet};
586 undef $self->{indent};
587 } elsif (not defined $self->{verbatim} and
588 ($line =~ m/^\[icons=None, +caption="(.*)"\]$/)) {
589 my $arg = $1;
590 do_paragraph($self,$paragraph,$wrapped_mode);
591 $paragraph="";
592 my $t = $self->translate($arg,
593 $self->{ref},
594 "caption",
595 "comment" => join("\n", @comments),
596 "wrap" => 0);
597 $self->pushline("[icons=None, caption=\"$t\"]\n");
598 @comments=();
599 $wrapped_mode = 1;
600 undef $self->{bullet};
601 undef $self->{indent};
602 } elsif (not defined $self->{verbatim} and
603 ($line =~ m/^(\s*)([*_+`'#[:alnum:]].*)((?:::|;;|\?\?|:-)(?: *\\)?)$/)) {
604 my $indent = $1;
605 my $label = $2;
606 my $labelend = $3;
607 # Found labeled list
608 do_paragraph($self,$paragraph,$wrapped_mode);
609 $paragraph="";
610 $wrapped_mode = 1;
611 $self->{bullet} = "";
612 $self->{indent} = $indent;
613 my $t = $self->translate($label,
614 $self->{ref},
615 "Labeled list",
616 "comment" => join("\n", @comments),
617 "wrap" => 0);
618 $self->pushline("$indent$t$labelend\n");
619 @comments=();
620 } elsif (not defined $self->{verbatim} and
621 ($line =~ m/^(\s*)(\S.*)((?:::|;;)\s+)(.*)$/)) {
622 my $indent = $1;
623 my $label = $2;
624 my $labelend = $3;
625 my $labeltext = $4;
626 # Found Horizontal Labeled Lists
627 do_paragraph($self,$paragraph,$wrapped_mode);
628 $paragraph=$labeltext."\n";
629 $wrapped_mode = 1;
630 $self->{bullet} = "";
631 $self->{indent} = $indent;
632 my $t = $self->translate($label,
633 $self->{ref},
634 "Labeled list",
635 "comment" => join("\n", @comments),
636 "wrap" => 0);
637 $self->pushline("$indent$t$labelend");
638 @comments=();
639 } elsif (not defined $self->{verbatim} and
640 ($line =~ m/^\:(\S.*?)(:\s*)(.*)$/)) {
641 my $attrname = $1;
642 my $attrsep = $2;
643 my $attrvalue = $3;
644 # Found a Attribute entry
645 do_paragraph($self,$paragraph,$wrapped_mode);
646 $paragraph="";
647 $wrapped_mode = 1;
648 undef $self->{bullet};
649 undef $self->{indent};
650 my $t = $self->translate($attrvalue,
651 $self->{ref},
652 "Attribute :$attrname:",
653 "comment" => join("\n", @comments),
654 "wrap" => 0);
655 $self->pushline(":$attrname$attrsep$t\n");
656 @comments=();
657 } elsif (not defined $self->{verbatim} and
658 ($line !~ m/^\.\./) and ($line =~ m/^\.(\S.*)$/)) {
659 my $title = $1;
660 # Found block title
661 do_paragraph($self,$paragraph,$wrapped_mode);
662 $paragraph="";
663 $wrapped_mode = 1;
664 undef $self->{bullet};
665 undef $self->{indent};
666 my $t = $self->translate($title,
667 $self->{ref},
668 "Block title",
669 "comment" => join("\n", @comments),
670 "wrap" => 0);
671 $self->pushline(".$t\n");
672 @comments=();
673 } elsif (not defined $self->{verbatim} and
674 ($line =~ m/^(\s*)((?:[-*o+]|(?:[0-9]+[.\)])|(?:[a-z][.\)])|\([0-9]+\)|\.|\.\.)\s+)(.*)$/)) {
675 my $indent = $1||"";
676 my $bullet = $2;
677 my $text = $3;
678 do_paragraph($self,$paragraph,$wrapped_mode);
679 $paragraph = $text."\n";
680 $self->{indent} = $indent;
681 $self->{bullet} = $bullet;
682 } elsif (not defined $self->{verbatim} and
683 ($line =~ m/^((?:<?[0-9]+)?> +)(.*)$/)) {
684 my $bullet = $1;
685 my $text = $2;
686 do_paragraph($self,$paragraph,$wrapped_mode);
687 $paragraph = $text."\n";
688 $self->{indent} = "";
689 $self->{bullet} = $bullet;
690 } elsif (not defined $self->{verbatim} and
691 (defined $self->{bullet} and $line =~ m/^(\s+)(.*)$/)) {
692 my $indent = $1;
693 my $text = $2;
694 if (not defined $self->{indent}) {
695 $paragraph .= $text."\n";
696 $self->{indent} = $indent;
697 } elsif (length($paragraph) and (length($self->{bullet}) + length($self->{indent}) == length($indent))) {
698 $paragraph .= $text."\n";
699 } else {
700 do_paragraph($self,$paragraph,$wrapped_mode);
701 $paragraph = $text."\n";
702 $self->{indent} = $indent;
703 $self->{bullet} = "";
704 }
705 } else {
706 return parse_fallback($self,$line,$ref,$paragraph,$wrapped_mode,$expect_header,$end_of_paragraph);
707 }
708 return ($paragraph,$wrapped_mode,$expect_header,$end_of_paragraph);
709}
710
711sub parse_markdown {
712 my ($self,$line,$ref,$paragraph,$wrapped_mode,$expect_header,$end_of_paragraph) = @_;
713 if (($line =~ m/^(={4,}|-{4,})$/) and
714 (defined($paragraph) ) and
715 ($paragraph =~ m/^[^\n]*\n$/s) and
716 (length($paragraph) == (length($line)+1))) {
717 # XXX: There can be any number of underlining according
718 # to the documentation. This detection, which avoid
719 # translating the formatting, is only supported if
720 # the underlining has the same size as the header text.
721 # Found title
722 $wrapped_mode = 0;
723 my $level = $line;
724 $level =~ s/^(.).*$/$1/;
725 my $t = $self->translate($paragraph,
726 $self->{ref},
727 "Title $level",
728 "wrap" => 0);
729 $self->pushline($t);
730 $paragraph="";
731 $wrapped_mode = 1;
732 $self->pushline(($level x (length($t)-1))."\n");
733 } elsif ($line =~ m/^(#{1,6})( +)(.*?)( +\1)?$/) {
734 my $titlelevel1 = $1;
735 my $titlespaces = $2;
736 my $title = $3;
737 my $titlelevel2 = $4||"";
738 # Found one line title
739 do_paragraph($self,$paragraph,$wrapped_mode);
740 $wrapped_mode = 0;
741 $paragraph="";
742 my $t = $self->translate($title,
743 $self->{ref},
744 "Title $titlelevel1",
745 "wrap" => 0);
746 $self->pushline($titlelevel1.$titlespaces.$t.$titlelevel2."\n");
747 $wrapped_mode = 1;
748 } elsif (($paragraph eq "") and
749 ($line =~ /^((\*\s*){3,}|(-\s*){3,}|(_\s*){3,})$/)) {
750 # Horizontal rule
751 $wrapped_mode = 1;
752 $self->pushline($line."\n");
753 } elsif ( $line =~ /^\s*\[\[\!\S+\s*$/ # macro begin
754 or $line =~ /^\s*"""\s*\]\]\s*$/) { # """ textblock inside macro end
755 # Avoid translating Markdown lines containing only markup
756 do_paragraph($self,$paragraph,$wrapped_mode);
757 $paragraph="";
758 $wrapped_mode = 1;
759 $self->pushline("$line\n");
760 } elsif ( $line =~ /^#/ # headline
761 or $line =~ /^\s*\[\[\!\S[^\]]*\]\]\s*$/) { # sole macro
762 # Preserve some Markdown markup as a single line
763 do_paragraph($self,$paragraph,$wrapped_mode);
764 $paragraph="$line\n";
765 $wrapped_mode = 0;
766 $end_of_paragraph = 1;
767 } elsif ($line =~ /^"""/) { # """ textblock inside macro end
768 # Markdown markup needing separation _before_ this line
769 do_paragraph($self,$paragraph,$wrapped_mode);
770 $paragraph="$line\n";
771 $wrapped_mode = 1;
772 } else {
773 return parse_fallback($self,$line,$ref,$paragraph,$wrapped_mode,$expect_header,$end_of_paragraph);
774 }
775 return ($paragraph,$wrapped_mode,$expect_header,$end_of_paragraph);
776}
777
778sub parse {
779 my $self = shift;
780 my ($line,$ref);
781 my $paragraph="";
782 my $wrapped_mode = 1;
783 my $expect_header = 1;
784 my $end_of_paragraph = 0;
785 ($line,$ref)=$self->shiftline();
786 my $file = $ref;
787 $file =~ s/:[0-9]+$// if defined($line);
788 while (defined($line)) {
789 $ref =~ m/^(.*):[0-9]+$/;
790 if ($1 ne $file) {
791 $file = $1;
792 do_paragraph($self,$paragraph,$wrapped_mode);
793 $paragraph="";
794 $wrapped_mode = 1;
795 $expect_header = 1;
796 }
797
798 chomp($line);
799 $self->{ref}="$ref";
800 ($paragraph,$wrapped_mode,$expect_header,$end_of_paragraph) = &$parse_func($self,$line,$ref,$paragraph,$wrapped_mode,$expect_header,$end_of_paragraph);
801 # paragraphs starting by a bullet, or numbered
802 # or paragraphs with a line containing many consecutive spaces
803 # (more than 3)
804 # are considered as verbatim paragraphs
805 $wrapped_mode = 0 if ( $paragraph =~ m/^(\*|[0-9]+[.)] )/s
806 or $paragraph =~ m/[ \t][ \t][ \t]/s);
807 $wrapped_mode = 0 if ( $tabs eq "verbatim"
808 and $paragraph =~ m/\t/s);
809 if ($markdown) {
810 # Some Markdown markup can (or might) not survive wrapping
811 $wrapped_mode = 0 if (
812 $paragraph =~ /^>/ms # blockquote
813 or $paragraph =~ /^( {8}|\t)/ms # monospaced
814 or $paragraph =~ /^\$(\S+[{}]\S*\s*)+/ms # Xapian macro
815 or $paragraph =~ /<(?![a-z]+[:@])/ms # maybe html (tags but not wiki <URI>)
816 or $paragraph =~ /^[^<]+>/ms # maybe html (tag with vertical space)
817 or $paragraph =~ /\S $/ms # explicit newline
818 or $paragraph =~ /\[\[\!\S[^\]]+$/ms # macro begin
819 );
820 }
821 if ($end_of_paragraph) {
822 do_paragraph($self,$paragraph,$wrapped_mode);
823 $paragraph="";
824 $wrapped_mode = 1;
825 $end_of_paragraph = 0;
826 }
827 ($line,$ref)=$self->shiftline();
828 }
829 if (length $paragraph) {
830 do_paragraph($self,$paragraph,$wrapped_mode);
831 }
832}
833
834sub do_paragraph {
835 my ($self, $paragraph, $wrap) = (shift, shift, shift);
836 my $type = shift || $self->{type} || "Plain text";
837 return if ($paragraph eq "");
838
839# DEBUG
840# my $b;
841# if (defined $self->{bullet}) {
842# $b = $self->{bullet};
843# } else {
844# $b = "UNDEF";
845# }
846# $type .= " verbatim: '".($self->{verbatim}||"NONE")."' bullet: '$b' indent: '".($self->{indent}||"NONE")."' type: '".($self->{type}||"NONE")."'";
847
848 if ($bullets and not $wrap and not defined $self->{verbatim}) {
849 # Detect bullets
850 # | * blah blah
851 # |<spaces> blah
852 # | ^-- aligned
853 # <empty line>
854 #
855 # Other bullets supported:
856 # - blah o blah + blah
857 # 1. blah 1) blah (1) blah
858TEST_BULLET:
859 if ($paragraph =~ m/^(\s*)((?:[-*o+]|([0-9]+[.\)])|\([0-9]+\))\s+)([^\n]*\n)(.*)$/s) {
860 my $para = $5;
861 my $bullet = $2;
862 my $indent1 = $1;
863 my $indent2 = "$1".(' ' x length $bullet);
864 my $text = $4;
865 while ($para !~ m/$indent2(?:[-*o+]|([0-9]+[.\)])|\([0-9]+\))\s+/
866 and $para =~ s/^$indent2(\S[^\n]*\n)//s) {
867 $text .= $1;
868 }
869 # TODO: detect if a line starts with the same bullet
870 if ($text !~ m/\S[ \t][ \t][ \t]+\S/s) {
871 my $bullet_regex = quotemeta($indent1.$bullet);
872 $bullet_regex =~ s/[0-9]+/\\d\+/;
873 if ($para eq '' or $para =~ m/^$bullet_regex\S/s) {
874 my $trans = $self->translate($text,
875 $self->{ref},
876 "Bullet: '$indent1$bullet'",
877 "wrap" => 1,
878 "wrapcol" => - (length $indent2));
879 $trans =~ s/^/$indent1$bullet/s;
880 $trans =~ s/\n(.)/\n$indent2$1/sg;
881 $self->pushline( $trans."\n" );
882 if ($para eq '') {
883 return;
884 } else {
885 # Another bullet
886 $paragraph = $para;
887 goto TEST_BULLET;
888 }
889 }
890 }
891 }
892 }
893
894 my $end = "";
895 if ($wrap) {
896 $paragraph =~ s/^(.*?)(\n*)$/$1/s;
897 $end = $2 || "";
898 }
899 my $t = $self->translate($paragraph,
900 $self->{ref},
901 $type,
902 "comment" => join("\n", @comments),
903 "wrap" => $wrap);
904 @comments = ();
905 if (defined $self->{bullet}) {
906 my $bullet = $self->{bullet};
907 my $indent1 = $self->{indent};
908 my $indent2 = $indent1.(' ' x length($bullet));
909 $t =~ s/^/$indent1$bullet/s;
910 $t =~ s/\n(.)/\n$indent2$1/sg;
911 }
912 $self->pushline( $t.$end );
913}
914
9151;
916
917=head1 STATUS OF THIS MODULE
918
919Tested successfully on simple text files and NEWS.Debian files.
920
921=head1 AUTHORS
922
923 Nicolas François <nicolas.francois@centraliens.net>
924
925=head1 COPYRIGHT AND LICENSE
926
927 Copyright 2005-2008 by Nicolas FRANÇOIS <nicolas.francois@centraliens.net>.
928
929This program is free software; you may redistribute it and/or modify it
930under the terms of GPL (see the COPYING file).
931

Archive Download this file

Revision: 2380