Chameleon

Chameleon Svn Source Tree

Root/branches/ErmaC/Enoch_Modules/package/bin/po4a/lib/Locale/Po4a/Text.pm

1#!/usr/bin/perl -w
2
3# Po4a::Text.pm
4#
5# extract and translate translatable strings from a text documents
6#
7# This program is free software; you can redistribute it and/or modify
8# it under the terms of the GNU General Public License as published by
9# the Free Software Foundation; either version 2 of the License, or
10# (at your option) any later version.
11#
12# This program is distributed in the hope that it will be useful,
13# but WITHOUT ANY WARRANTY; without even the implied warranty of
14# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15# GNU General Public License for more details.
16#
17# You should have received a copy of the GNU General Public License
18# along with this program; if not, write to the Free Software
19# Foundation, Inc.,
20# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21#
22########################################################################
23
24=encoding UTF-8
25
26=head1 NAME
27
28Locale::Po4a::Text - convert text documents from/to PO files
29
30=head1 DESCRIPTION
31
32The po4a (PO for anything) project goal is to ease translations (and more
33interestingly, the maintenance of translations) using gettext tools on
34areas where they were not expected like documentation.
35
36Locale::Po4a::Text is a module to help the translation of text documents into
37other [human] languages.
38
39Paragraphs are split on empty lines (or lines containing only spaces or
40tabulations).
41
42If a paragraph contains a line starting by a space (or tabulation), this
43paragraph won't be rewrapped.
44
45=cut
46
47package Locale::Po4a::Text;
48
49use 5.006;
50use strict;
51use warnings;
52
53require Exporter;
54use vars qw(@ISA @EXPORT);
55@ISA = qw(Locale::Po4a::TransTractor);
56@EXPORT = qw();
57
58use Locale::Po4a::TransTractor;
59use Locale::Po4a::Common;
60
61=head1 OPTIONS ACCEPTED BY THIS MODULE
62
63These are this module's particular options:
64
65=over
66
67=item B<nobullets>
68
69Deactivate detection of bullets.
70
71By default, when a bullet is detected, the bullet paragraph is not considered
72as a verbatim paragraph (with the no-wrap flag in the PO file), but the module
73rewraps this paragraph in the generated PO file and in the translation.
74
75=cut
76
77my $bullets = 1;
78
79=item B<tabs=>I<mode>
80
81Specify how tabulations shall be handled. The I<mode> can be any of:
82
83=over
84
85=item B<split>
86
87Lines with tabulations introduce breaks in the current paragraph.
88
89=item B<verbatim>
90
91Paragraph containing tabulations will not be re-wrapped.
92
93=back
94
95By default, tabulations are considered as spaces.
96
97=cut
98
99my $tabs = "";
100
101=item B<breaks=>I<regex>
102
103A regular expression matching lines which introduce breaks.
104The regular expression will be anchored so that the whole line must match.
105
106=cut
107
108my $breaks;
109
110=item B<debianchangelog>
111
112Handle the header and footer of
113released versions, which only contain non translatable informations.
114
115=cut
116
117my $debianchangelog = 0;
118
119=item B<fortunes>
120
121Handle the fortunes format, which separate fortunes with a line which
122consists in '%' or '%%', and use '%%' as the beginning of a comment.
123
124=cut
125
126my $fortunes = 0;
127
128=item B<markdown>
129
130Handle some special markup in Markdown-formatted texts.
131
132=cut
133
134my $markdown = 0;
135
136=item B<asciidoc>
137
138Handle documents in the AsciiDoc format.
139
140=cut
141
142my $asciidoc = 0;
143
144=item B<control>[B<=>I<taglist>]
145
146Handle control files.
147A comma-separated list of tags to be translated can be provided.
148
149=cut
150
151my %control = ();
152
153=back
154
155=cut
156
157sub initialize {
158 my $self = shift;
159 my %options = @_;
160
161 $self->{options}{'control'} = "";
162 $self->{options}{'asciidoc'} = 1;
163 $self->{options}{'breaks'} = 1;
164 $self->{options}{'debianchangelog'} = 1;
165 $self->{options}{'debug'} = 1;
166 $self->{options}{'fortunes'} = 1;
167 $self->{options}{'markdown'} = 1;
168 $self->{options}{'nobullets'} = 1;
169 $self->{options}{'tabs'} = 1;
170 $self->{options}{'verbose'} = 1;
171
172 foreach my $opt (keys %options) {
173 die wrap_mod("po4a::text",
174 dgettext("po4a", "Unknown option: %s"), $opt)
175 unless exists $self->{options}{$opt};
176 $self->{options}{$opt} = $options{$opt};
177 }
178
179 if (defined $options{'nobullets'}) {
180 $bullets = 0;
181 }
182
183 if (defined $options{'tabs'}) {
184 $tabs = $options{'tabs'};
185 }
186
187 if (defined $options{'breaks'}) {
188 $breaks = $options{'breaks'};
189 }
190
191 if (defined $options{'debianchangelog'}) {
192 $debianchangelog=1;
193 }
194
195 if (defined $options{'fortunes'}) {
196 $fortunes=1;
197 }
198
199 if (defined $options{'markdown'}) {
200 $markdown=1;
201 }
202
203 if (defined $options{'asciidoc'}) {
204 $asciidoc=1;
205 }
206
207 if (defined $options{'control'}) {
208 if ($options{'control'} eq "1") {
209 $control{''}=1;
210 } else {
211 foreach my $tag (split(',',$options{'control'})) {
212 $control{$tag}=1;
213 }
214 }
215 }
216}
217
218sub parse {
219 my $self = shift;
220 my ($line,$ref);
221 my $paragraph="";
222 my $wrapped_mode = 1;
223 my $expect_header = 1;
224 my $end_of_paragraph = 0;
225 ($line,$ref)=$self->shiftline();
226 my $file = $ref;
227 $file =~ s/:[0-9]+$// if defined($line);
228 while (defined($line)) {
229 $ref =~ m/^(.*):[0-9]+$/;
230 if ($1 ne $file) {
231 $file = $1;
232 do_paragraph($self,$paragraph,$wrapped_mode);
233 $paragraph="";
234 $wrapped_mode = 1;
235 $expect_header = 1;
236 }
237
238 chomp($line);
239 $self->{ref}="$ref";
240 if ($debianchangelog and
241 $expect_header and
242 $line =~ /^(\w[-+0-9a-z.]*)\ \(([^\(\) \t]+)\) # src, version
243 \s+([-+0-9a-z.]+); # distribution
244 \s*urgency\s*\=\s*(.*\S)\s*$/ix) { #
245 do_paragraph($self,$paragraph,$wrapped_mode);
246 $paragraph="";
247 $self->pushline("$line\n");
248 $expect_header=0;
249 } elsif ($debianchangelog and
250 $line =~ m/^ \-\- (.*) <(.*)> ((\w+\,\s*)?\d{1,2}\s+\w+\s+\d{4}\s+\d{1,2}:\d\d:\d\d\s+[-+]\d{4}(\s+\([^\\\(\)]\))?)$/) {
251 # Found trailer
252 do_paragraph($self,$paragraph,$wrapped_mode);
253 $paragraph="";
254 $self->pushline("$line\n");
255 $expect_header=1;
256 } elsif ($fortunes and
257 $line =~ m/^%%?\s*$/) {
258 # Found end of fortune
259 do_paragraph($self,$paragraph,$wrapped_mode);
260 $self->pushline("\n") unless ( $wrapped_mode == 0
261 or $paragraph eq "");
262 $paragraph="";
263 $wrapped_mode = 1;
264 $self->pushline("$line\n");
265 } elsif ( $line =~ m/^([^ :]*): *(.*)$/
266 and %control) {
267 warn "Unrecognized section: '$paragraph'\n"
268 unless $paragraph eq "";
269 my $tag = $1;
270 my $val = $2;
271 my $t;
272 if ($control{''} or $control{$tag}) {
273 $t = $self->translate($val,
274 $self->{ref},
275 $tag.(defined $self->{controlkey}?", ".$self->{controlkey}:""),
276 "wrap" => 0);
277 } else {
278 $t = $val;
279 }
280 if (not defined $self->{controlkey}) {
281 $self->{controlkey} = "$tag: $val";
282 }
283 $self->pushline("$tag: $t\n");
284 $paragraph="";
285 $wrapped_mode = 1;
286 $self->{bullet} = "";
287 $self->{indent} = " ";
288 } elsif (%control and
289 $line eq " .") {
290 do_paragraph($self,$paragraph,$wrapped_mode,
291 "Long Description".(defined $self->{controlkey}?", ".$self->{controlkey}:""));
292 $paragraph="";
293 $self->pushline($line."\n");
294 $self->{bullet} = "";
295 $self->{indent} = " ";
296 } elsif (%control and
297 $line =~ m/^ Link: +(.*)$/) {
298 do_paragraph($self,$paragraph,$wrapped_mode,
299 "Long Description".(defined $self->{controlkey}?", ".$self->{controlkey}:""));
300 my $link=$1;
301 my $t1 = $self->translate("Link: ",
302 $self->{ref},
303 "Link",
304 "wrap" => 0);
305 my $t2 = $self->translate($link,
306 $self->{ref},
307 "Link".(defined $self->{controlkey}?", ".$self->{controlkey}:""),
308 "wrap" => 0);
309 $self->pushline(" $t1$t2\n");
310 $paragraph="";
311 } elsif (%control and
312 defined $self->{indent} and
313 $line =~ m/^$self->{indent}\S/) {
314 $paragraph .= $line."\n";
315 $self->{type} = "Long Description".(defined $self->{controlkey}?", ".$self->{controlkey}:"");
316 } elsif ( (defined $self->{verbatim})
317 and ($self->{verbatim} == 2)) {
318 # Untranslated blocks
319 $self->pushline($line."\n");
320 if ($asciidoc and
321 ($line =~ m/^(\/{4,}|~{4,})$/)) {
322 undef $self->{verbatim};
323 undef $self->{type};
324 $wrapped_mode = 1;
325 }
326 } elsif ( ($line =~ /^\s*$/)
327 or ( defined $breaks
328 and $line =~ m/^$breaks$/)) {
329 # Break paragraphs on lines containing only spaces
330 do_paragraph($self,$paragraph,$wrapped_mode);
331 $paragraph="";
332 $wrapped_mode = 1 unless defined($self->{verbatim});
333 $self->pushline($line."\n");
334 undef $self->{controlkey};
335 } elsif ($asciidoc and (not defined($self->{verbatim})) and
336 ($line =~ m/^(\+|--)$/)) {
337 # List Item Continuation or List Block
338 do_paragraph($self,$paragraph,$wrapped_mode);
339 $paragraph="";
340 $self->pushline($line."\n");
341 } elsif ($asciidoc and (not defined($self->{verbatim})) and
342 ($line =~ m/^(={4,}|-{4,}|~{4,}|\^{4,}|\+{4,})$/) and
343 (defined($paragraph) )and
344 ($paragraph =~ m/^[^\n]*\n$/s) and
345 (length($paragraph) == (length($line)+1))) {
346 # Found title
347 $wrapped_mode = 0;
348 my $level = $line;
349 $level =~ s/^(.).*$/$1/;
350 $paragraph =~ s/\n$//s;
351 my $t = $self->translate($paragraph,
352 $self->{ref},
353 "Title $level",
354 "wrap" => 0);
355 $self->pushline($t."\n");
356 $paragraph="";
357 $wrapped_mode = 1;
358 $self->pushline(($level x (length($t)))."\n");
359 } elsif ($asciidoc and
360 ($line =~ m/^(={1,5})( +)(.*?)( +\1)?$/)) {
361 my $titlelevel1 = $1;
362 my $titlespaces = $2;
363 my $title = $3;
364 my $titlelevel2 = $4||"";
365 # Found one line title
366 do_paragraph($self,$paragraph,$wrapped_mode);
367 $wrapped_mode = 0;
368 $paragraph="";
369 my $t = $self->translate($title,
370 $self->{ref},
371 "Title $titlelevel1",
372 "wrap" => 0);
373 $self->pushline($titlelevel1.$titlespaces.$t.$titlelevel2."\n");
374 $wrapped_mode = 1;
375 } elsif ($asciidoc and
376 ($line =~ m/^(\/{4,}|\+{4,}|-{4,}|\.{4,}|\*{4,}|_{4,}|={4,}|~{4,})$/)) {
377 # Found one delimited block
378 my $t = $line;
379 $t =~ s/^(.).*$/$1/;
380 my $type = "delimited block $t";
381 if (defined $self->{verbatim} and ($self->{type} ne $type)) {
382 $paragraph .= "$line\n";
383 } else {
384 do_paragraph($self,$paragraph,$wrapped_mode);
385 if ( (defined $self->{type})
386 and ($self->{type} eq $type)) {
387 undef $self->{type};
388 undef $self->{verbatim};
389 $wrapped_mode = 1;
390 } else {
391 if ($t eq "\/") {
392 # CommentBlock, should not be treated
393 $self->{verbatim} = 2;
394 } elsif ($t eq "+") {
395 # PassthroughBlock
396 $wrapped_mode = 0;
397 $self->{verbatim} = 1;
398 } elsif ($t eq "-") {
399 # ListingBlock
400 $wrapped_mode = 0;
401 $self->{verbatim} = 1;
402 } elsif ($t eq ".") {
403 # LiteralBlock
404 $wrapped_mode = 0;
405 $self->{verbatim} = 1;
406 } elsif ($t eq "*") {
407 # SidebarBlock
408 $wrapped_mode = 1;
409 } elsif ($t eq "_") {
410 # QuoteBlock
411 if ( (defined $self->{type})
412 and ($self->{type} eq "verse")) {
413 $wrapped_mode = 0;
414 $self->{verbatim} = 1;
415 } else {
416 $wrapped_mode = 1;
417 }
418 } elsif ($t eq "=") {
419 # ExampleBlock
420 $wrapped_mode = 1;
421 } elsif ($t eq "~") {
422 # Filter blocks, TBC: not translated
423 $wrapped_mode = 0;
424 $self->{verbatim} = 2;
425 }
426 $self->{type} = $type;
427 }
428 $paragraph="";
429 $self->pushline($line."\n");
430 }
431 } elsif ($asciidoc and not defined $self->{verbatim} and
432 ($line =~ m/^\[\[([^\]]*)\]\]$/)) {
433 # Found BlockId
434 do_paragraph($self,$paragraph,$wrapped_mode);
435 $paragraph="";
436 $wrapped_mode = 1;
437 $self->pushline($line."\n");
438 undef $self->{bullet};
439 undef $self->{indent};
440 } elsif ($asciidoc and not defined $self->{verbatim} and
441 ($paragraph eq "") and
442 ($line =~ m/^((?:NOTE|TIP|IMPORTANT|WARNING|CAUTION):\s+)(.*)$/)) {
443 my $type = $1;
444 my $text = $2;
445 do_paragraph($self,$paragraph,$wrapped_mode);
446 $paragraph=$text."\n";
447 $wrapped_mode = 1;
448 $self->pushline($type);
449 undef $self->{bullet};
450 undef $self->{indent};
451 } elsif ($asciidoc and not defined $self->{verbatim} and
452 ($line =~ m/^\[(NOTE|TIP|IMPORTANT|WARNING|CAUTION|verse|quote)\]$/)) {
453 my $type = $1;
454 do_paragraph($self,$paragraph,$wrapped_mode);
455 $paragraph="";
456 $wrapped_mode = 1;
457 $self->pushline($line."\n");
458 if ($type eq "verse") {
459 $wrapped_mode = 0;
460 }
461 undef $self->{bullet};
462 undef $self->{indent};
463 } elsif ($asciidoc and not defined $self->{verbatim} and
464 ($line =~ m/^\[(verse|quote), +(.*)\]$/)) {
465 my $type = $1;
466 my $arg = $2;
467 do_paragraph($self,$paragraph,$wrapped_mode);
468 $paragraph="";
469 my $t = $self->translate($arg,
470 $self->{ref},
471 "$type",
472 "wrap" => 0);
473 $self->pushline("[$type, $t]\n");
474 $wrapped_mode = 1;
475 if ($type eq "verse") {
476 $wrapped_mode = 0;
477 }
478 $self->{type} = $type;
479 undef $self->{bullet};
480 undef $self->{indent};
481 } elsif ($asciidoc and not defined $self->{verbatim} and
482 ($line =~ m/^\[icon="(.*)"\]$/)) {
483 my $arg = $1;
484 do_paragraph($self,$paragraph,$wrapped_mode);
485 $paragraph="";
486 my $t = $self->translate($arg,
487 $self->{ref},
488 "icon",
489 "wrap" => 0);
490 $self->pushline("[icon=\"$t\"]\n");
491 $wrapped_mode = 1;
492 undef $self->{bullet};
493 undef $self->{indent};
494 } elsif ($asciidoc and not defined $self->{verbatim} and
495 ($line =~ m/^\[icons=None, +caption="(.*)"\]$/)) {
496 my $arg = $1;
497 do_paragraph($self,$paragraph,$wrapped_mode);
498 $paragraph="";
499 my $t = $self->translate($arg,
500 $self->{ref},
501 "caption",
502 "wrap" => 0);
503 $self->pushline("[icons=None, caption=\"$t\"]\n");
504 $wrapped_mode = 1;
505 undef $self->{bullet};
506 undef $self->{indent};
507 } elsif ($asciidoc and not defined $self->{verbatim} and
508 ($line =~ m/^(\s*)([*_+`'#[:alnum:]].*)((?:::|;;|\?\?|:-)(?: *\\)?)$/)) {
509 my $indent = $1;
510 my $label = $2;
511 my $labelend = $3;
512 # Found labeled list
513 do_paragraph($self,$paragraph,$wrapped_mode);
514 $paragraph="";
515 $wrapped_mode = 1;
516 $self->{bullet} = "";
517 $self->{indent} = $indent;
518 my $t = $self->translate($label,
519 $self->{ref},
520 "Labeled list",
521 "wrap" => 0);
522 $self->pushline("$indent$t$labelend\n");
523 } elsif ($asciidoc and not defined $self->{verbatim} and
524 ($line =~ m/^(\s*)(\S.*)((?:::|;;)\s+)(.*)$/)) {
525 my $indent = $1;
526 my $label = $2;
527 my $labelend = $3;
528 my $labeltext = $4;
529 # Found Horizontal Labeled Lists
530 do_paragraph($self,$paragraph,$wrapped_mode);
531 $paragraph=$labeltext."\n";
532 $wrapped_mode = 1;
533 $self->{bullet} = "";
534 $self->{indent} = $indent;
535 my $t = $self->translate($label,
536 $self->{ref},
537 "Labeled list",
538 "wrap" => 0);
539 $self->pushline("$indent$t$labelend");
540 } elsif ($asciidoc and not defined $self->{verbatim} and
541 ($line =~ m/^\:(\S.*?)(:\s*)(.*)$/)) {
542 my $attrname = $1;
543 my $attrsep = $2;
544 my $attrvalue = $3;
545 # Found a Attribute entry
546 do_paragraph($self,$paragraph,$wrapped_mode);
547 $paragraph="";
548 $wrapped_mode = 1;
549 undef $self->{bullet};
550 undef $self->{indent};
551 my $t = $self->translate($attrvalue,
552 $self->{ref},
553 "Attribute :$attrname:",
554 "wrap" => 0);
555 $self->pushline(":$attrname$attrsep$t\n");
556 } elsif ($asciidoc and not defined $self->{verbatim} and
557 ($line !~ m/^\.\./) and ($line =~ m/^\.(\S.*)$/)) {
558 my $title = $1;
559 # Found block title
560 do_paragraph($self,$paragraph,$wrapped_mode);
561 $paragraph="";
562 $wrapped_mode = 1;
563 undef $self->{bullet};
564 undef $self->{indent};
565 my $t = $self->translate($title,
566 $self->{ref},
567 "Block title",
568 "wrap" => 0);
569 $self->pushline(".$t\n");
570 } elsif ($asciidoc and not defined $self->{verbatim} and
571 ($line =~ m/^(\s*)((?:[-*o+]|(?:[0-9]+[.\)])|(?:[a-z][.\)])|\([0-9]+\)|\.|\.\.)\s+)(.*)$/)) {
572 my $indent = $1||"";
573 my $bullet = $2;
574 my $text = $3;
575 do_paragraph($self,$paragraph,$wrapped_mode);
576 $paragraph = $text."\n";
577 $self->{indent} = $indent;
578 $self->{bullet} = $bullet;
579 } elsif ($asciidoc and not defined $self->{verbatim} and
580 ($line =~ m/^((?:<?[0-9]+)?> +)(.*)$/)) {
581 my $bullet = $1;
582 my $text = $2;
583 do_paragraph($self,$paragraph,$wrapped_mode);
584 $paragraph = $text."\n";
585 $self->{indent} = "";
586 $self->{bullet} = $bullet;
587 } elsif ($asciidoc and not defined $self->{verbatim} and
588 (defined $self->{bullet} and $line =~ m/^(\s+)(.*)$/)) {
589 my $indent = $1;
590 my $text = $2;
591 if (not defined $self->{indent}) {
592 $paragraph .= $text."\n";
593 $self->{indent} = $indent;
594 } elsif (length($paragraph) and (length($self->{bullet}) + length($self->{indent}) == length($indent))) {
595 $paragraph .= $text."\n";
596 } else {
597
598 do_paragraph($self,$paragraph,$wrapped_mode);
599 $paragraph = $text."\n";
600 $self->{indent} = $indent;
601 $self->{bullet} = "";
602 }
603 } elsif ($markdown and
604 (not defined($self->{verbatim})) and
605 ($line =~ m/^(={4,}|-{4,})$/) and
606 (defined($paragraph) )and
607 ($paragraph =~ m/^[^\n]*\n$/s) and
608 (length($paragraph) == (length($line)+1))) {
609 # XXX: There can be any number of underlining according
610 # to the documentation. This detection, which avoid
611 # translating the formatting, is only supported if
612 # the underlining has the same size as the header text.
613 # Found title
614 $wrapped_mode = 0;
615 my $level = $line;
616 $level =~ s/^(.).*$/$1/;
617 my $t = $self->translate($paragraph,
618 $self->{ref},
619 "Title $level",
620 "wrap" => 0);
621 $self->pushline($t);
622 $paragraph="";
623 $wrapped_mode = 1;
624 $self->pushline(($level x (length($t)-1))."\n");
625 } elsif ($markdown and
626 ($line =~ m/^(#{1,6})( +)(.*?)( +\1)?$/)) {
627 my $titlelevel1 = $1;
628 my $titlespaces = $2;
629 my $title = $3;
630 my $titlelevel2 = $4||"";
631 # Found one line title
632 do_paragraph($self,$paragraph,$wrapped_mode);
633 $wrapped_mode = 0;
634 $paragraph="";
635 my $t = $self->translate($title,
636 $self->{ref},
637 "Title $titlelevel1",
638 "wrap" => 0);
639 $self->pushline($titlelevel1.$titlespaces.$t.$titlelevel2."\n");
640 $wrapped_mode = 1;
641 } elsif ($markdown and
642 ($paragraph eq "") and
643 ($line =~ /^((\*\s*){3,}|(-\s*){3,}|(_\s*){3,})$/)) {
644 # Horizontal rule
645 $wrapped_mode = 1;
646 $self->pushline($line."\n");
647 } elsif ($line =~ /^-- $/) {
648 # Break paragraphs on email signature hint
649 do_paragraph($self,$paragraph,$wrapped_mode);
650 $paragraph="";
651 $wrapped_mode = 1;
652 $self->pushline($line."\n");
653 } elsif ( $line =~ /^=+$/
654 or $line =~ /^_+$/
655 or $line =~ /^-+$/) {
656 $wrapped_mode = 0;
657 $paragraph .= $line."\n";
658 do_paragraph($self,$paragraph,$wrapped_mode);
659 $paragraph="";
660 $wrapped_mode = 1;
661 } elsif ($markdown and
662 ( $line =~ /^\s*\[\[\!\S+\s*$/ # macro begin
663 or $line =~ /^\s*"""\s*\]\]\s*$/)) { # """ textblock inside macro end
664 # Avoid translating Markdown lines containing only markup
665 do_paragraph($self,$paragraph,$wrapped_mode);
666 $paragraph="";
667 $wrapped_mode = 1;
668 $self->pushline("$line\n");
669 } elsif ($markdown and
670 ( $line =~ /^#/ # headline
671 or $line =~ /^\s*\[\[\!\S[^\]]*\]\]\s*$/)) { # sole macro
672 # Preserve some Markdown markup as a single line
673 do_paragraph($self,$paragraph,$wrapped_mode);
674 $paragraph="$line\n";
675 $wrapped_mode = 0;
676 $end_of_paragraph = 1;
677 } elsif ($markdown and
678 ( $line =~ /^"""/)) { # """ textblock inside macro end
679 # Markdown markup needing separation _before_ this line
680 do_paragraph($self,$paragraph,$wrapped_mode);
681 $paragraph="$line\n";
682 $wrapped_mode = 1;
683 } elsif ($tabs eq "split" and $line =~ m/\t/ and $paragraph !~ m/\t/s) {
684 $wrapped_mode = 0;
685 do_paragraph($self,$paragraph,$wrapped_mode);
686 $paragraph = "$line\n";
687 $wrapped_mode = 0;
688 } elsif ($tabs eq "split" and $line !~ m/\t/ and $paragraph =~ m/\t/s) {
689 do_paragraph($self,$paragraph,$wrapped_mode);
690 $paragraph = "$line\n";
691 $wrapped_mode = 1;
692 } else {
693 if ($line =~ /^\s/) {
694 # A line starting by a space indicates a non-wrap
695 # paragraph
696 $wrapped_mode = 0;
697 }
698 if ($markdown and
699 ( $line =~ /\S $/ # explicit newline
700 or $line =~ /"""$/)) { # """ textblock inside macro begin
701 # Markdown markup needing separation _after_ this line
702 $end_of_paragraph = 1;
703 } else {
704 undef $self->{bullet};
705 undef $self->{indent};
706 }
707 if ($fortunes) {
708 $line =~ s/%%(.*)$//;
709 }
710# TODO: comments
711 $paragraph .= $line."\n";
712 }
713 # paragraphs starting by a bullet, or numbered
714 # or paragraphs with a line containing many consecutive spaces
715 # (more than 3)
716 # are considered as verbatim paragraphs
717 $wrapped_mode = 0 if ( $paragraph =~ m/^(\*|[0-9]+[.)] )/s
718 or $paragraph =~ m/[ \t][ \t][ \t]/s);
719 $wrapped_mode = 0 if ( $tabs eq "verbatim"
720 and $paragraph =~ m/\t/s);
721 if ($markdown) {
722 # Some Markdown markup can (or might) not survive wrapping
723 $wrapped_mode = 0 if (
724 $paragraph =~ /^>/ms # blockquote
725 or $paragraph =~ /^( {8}|\t)/ms # monospaced
726 or $paragraph =~ /^\$(\S+[{}]\S*\s*)+/ms # Xapian macro
727 or $paragraph =~ /<(?![a-z]+[:@])/ms # maybe html (tags but not wiki <URI>)
728 or $paragraph =~ /^[^<]+>/ms # maybe html (tag with vertical space)
729 or $paragraph =~ /\S $/ms # explicit newline
730 or $paragraph =~ /\[\[\!\S[^\]]+$/ms # macro begin
731 );
732 }
733 if ($end_of_paragraph) {
734 do_paragraph($self,$paragraph,$wrapped_mode);
735 $paragraph="";
736 $wrapped_mode = 1;
737 $end_of_paragraph = 0;
738 }
739 ($line,$ref)=$self->shiftline();
740 }
741 if (length $paragraph) {
742 do_paragraph($self,$paragraph,$wrapped_mode);
743 }
744}
745
746sub do_paragraph {
747 my ($self, $paragraph, $wrap) = (shift, shift, shift);
748 my $type = shift || $self->{type} || "Plain text";
749 return if ($paragraph eq "");
750
751# DEBUG
752# my $b;
753# if (defined $self->{bullet}) {
754# $b = $self->{bullet};
755# } else {
756# $b = "UNDEF";
757# }
758# $type .= " verbatim: '".($self->{verbatim}||"NONE")."' bullet: '$b' indent: '".($self->{indent}||"NONE")."' type: '".($self->{type}||"NONE")."'";
759
760 if ($bullets and not $wrap and not defined $self->{verbatim}) {
761 # Detect bullets
762 # | * blah blah
763 # |<spaces> blah
764 # | ^-- aligned
765 # <empty line>
766 #
767 # Other bullets supported:
768 # - blah o blah + blah
769 # 1. blah 1) blah (1) blah
770TEST_BULLET:
771 if ($paragraph =~ m/^(\s*)((?:[-*o+]|([0-9]+[.\)])|\([0-9]+\))\s+)([^\n]*\n)(.*)$/s) {
772 my $para = $5;
773 my $bullet = $2;
774 my $indent1 = $1;
775 my $indent2 = "$1".(' ' x length $bullet);
776 my $text = $4;
777 while ($para !~ m/$indent2(?:[-*o+]|([0-9]+[.\)])|\([0-9]+\))\s+/
778 and $para =~ s/^$indent2(\S[^\n]*\n)//s) {
779 $text .= $1;
780 }
781 # TODO: detect if a line starts with the same bullet
782 if ($text !~ m/\S[ \t][ \t][ \t]+\S/s) {
783 my $bullet_regex = quotemeta($indent1.$bullet);
784 $bullet_regex =~ s/[0-9]+/\\d\+/;
785 if ($para eq '' or $para =~ m/^$bullet_regex\S/s) {
786 my $trans = $self->translate($text,
787 $self->{ref},
788 "Bullet: '$indent1$bullet'",
789 "wrap" => 1,
790 "wrapcol" => - (length $indent2));
791 $trans =~ s/^/$indent1$bullet/s;
792 $trans =~ s/\n(.)/\n$indent2$1/sg;
793 $self->pushline( $trans."\n" );
794 if ($para eq '') {
795 return;
796 } else {
797 # Another bullet
798 $paragraph = $para;
799 goto TEST_BULLET;
800 }
801 }
802 }
803 }
804 }
805
806 my $end = "";
807 if ($wrap) {
808 $paragraph =~ s/^(.*?)(\n*)$/$1/s;
809 $end = $2 || "";
810 }
811 my $t = $self->translate($paragraph,
812 $self->{ref},
813 $type,
814 "wrap" => $wrap);
815 if (defined $self->{bullet}) {
816 my $bullet = $self->{bullet};
817 my $indent1 = $self->{indent};
818 my $indent2 = $indent1.(' ' x length($bullet));
819 $t =~ s/^/$indent1$bullet/s;
820 $t =~ s/\n(.)/\n$indent2$1/sg;
821 }
822 $self->pushline( $t.$end );
823}
824
8251;
826
827=head1 STATUS OF THIS MODULE
828
829Tested successfully on simple text files and NEWS.Debian files.
830
831=head1 AUTHORS
832
833 Nicolas François <nicolas.francois@centraliens.net>
834
835=head1 COPYRIGHT AND LICENSE
836
837 Copyright 2005-2008 by Nicolas FRANÇOIS <nicolas.francois@centraliens.net>.
838
839This program is free software; you may redistribute it and/or modify it
840under the terms of GPL (see the COPYING file).
841

Archive Download this file

Revision: 2238