Chameleon

Chameleon Svn Source Tree

Root/branches/JrCs/package/bin/po4a/lib/Locale/Po4a/Po.pm

1# Locale::Po4a::Po -- manipulation of PO files
2#
3# This program is free software; you may redistribute it and/or modify it
4# under the terms of GPL (see COPYING).
5
6############################################################################
7# Modules and declarations
8############################################################################
9
10=encoding UTF-8
11
12=head1 NAME
13
14Locale::Po4a::Po - PO file manipulation module
15
16=head1 SYNOPSIS
17
18 use Locale::Po4a::Po;
19 my $pofile=Locale::Po4a::Po->new();
20
21 # Read PO file
22 $pofile->read('file.po');
23
24 # Add an entry
25 $pofile->push('msgid' => 'Hello', 'msgstr' => 'bonjour',
26 'flags' => "wrap", 'reference'=>'file.c:46');
27
28 # Extract a translation
29 $pofile->gettext("Hello"); # returns 'bonjour'
30
31 # Write back to a file
32 $pofile->write('otherfile.po');
33
34=head1 DESCRIPTION
35
36Locale::Po4a::Po is a module that allows you to manipulate message
37catalogs. You can load and write from/to a file (which extension is often
38I<po>), you can build new entries on the fly or request for the translation
39of a string.
40
41For a more complete description of message catalogs in the PO format and
42their use, please refer to the documentation of the gettext program.
43
44This module is part of the po4a project, which objective is to use PO files
45(designed at origin to ease the translation of program messages) to
46translate everything, including documentation (man page, info manual),
47package description, debconf templates, and everything which may benefit
48from this.
49
50=head1 OPTIONS ACCEPTED BY THIS MODULE
51
52=over 4
53
54=item B<porefs>
55
56This specifies the reference format. It can be one of B<none> to not produce
57any reference, B<noline> to not specify the line number, and B<full> to
58include complete references.
59
60=back
61
62=cut
63
64use IO::File;
65
66
67require Exporter;
68
69package Locale::Po4a::Po;
70use DynaLoader;
71
72use Locale::Po4a::Common qw(wrap_msg wrap_mod wrap_ref_mod dgettext);
73
74use subs qw(makespace);
75use vars qw(@ISA @EXPORT_OK);
76@ISA = qw(Exporter DynaLoader);
77@EXPORT = qw(%debug);
78@EXPORT_OK = qw(&move_po_if_needed);
79
80use Locale::Po4a::TransTractor;
81# Try to use a C extension if present.
82eval("bootstrap Locale::Po4a::Po $Locale::Po4a::TransTractor::VERSION");
83
84use 5.006;
85use strict;
86use warnings;
87
88use Carp qw(croak);
89use File::Basename;
90use File::Path; # mkdir before write
91use File::Copy; # move
92use POSIX qw(strftime floor);
93use Time::Local;
94
95use Encode;
96
97my @known_flags=qw(wrap no-wrap c-format fuzzy);
98
99our %debug=('canonize' => 0,
100 'quote' => 0,
101 'escape' => 0,
102 'encoding' => 0,
103 'filter' => 0);
104
105=head1 Functions about whole message catalogs
106
107=over 4
108
109=item new()
110
111Creates a new message catalog. If an argument is provided, it's the name of
112a PO file we should load.
113
114=cut
115
116sub new {
117 my ($this, $options) = (shift, shift);
118 my $class = ref($this) || $this;
119 my $self = {};
120 bless $self, $class;
121 $self->initialize($options);
122
123 my $filename = shift;
124 $self->read($filename) if defined($filename) && length($filename);
125 return $self;
126}
127
128# Return the numerical timezone (e.g. +0200)
129# Neither the %z nor the %s formats of strftime are portable:
130# '%s' is not supported on Solaris and '%z' indicates
131# "2006-10-25 19:36E. Europe Standard Time" on MS Windows.
132sub timezone {
133 my @g = gmtime();
134 my @l = localtime();
135
136 my $diff;
137 $diff = floor(timelocal(@l)/60 +0.5);
138 $diff -= floor(timelocal(@g)/60 +0.5);
139
140 my $h = floor($diff / 60) + $l[8]; # $l[8] indicates if we are currently
141 # in a daylight saving time zone
142 my $m = $diff%60;
143
144 return sprintf "%+03d%02d\n", $h, $m;
145}
146
147sub initialize {
148 my ($self, $options) = (shift, shift);
149 my $date = strftime("%Y-%m-%d %H:%M", localtime).timezone();
150 chomp $date;
151# $options = ref($options) || $options;
152
153 $self->{options}{'porefs'}= 'full';
154 $self->{options}{'msgid-bugs-address'}= undef;
155 $self->{options}{'copyright-holder'}= "Free Software Foundation, Inc.";
156 $self->{options}{'package-name'}= "PACKAGE";
157 $self->{options}{'package-version'}= "VERSION";
158 foreach my $opt (keys %$options) {
159 if ($options->{$opt}) {
160 die wrap_mod("po4a::po",
161 dgettext ("po4a", "Unknown option: %s"), $opt)
162 unless exists $self->{options}{$opt};
163 $self->{options}{$opt} = $options->{$opt};
164 }
165 }
166 $self->{options}{'porefs'} =~ /^(full|noline|none)$/ ||
167 die wrap_mod("po4a::po",
168 dgettext ("po4a",
169 "Invalid value for option 'porefs' ('%s' is ".
170 "not one of 'full', 'noline' or 'none')"),
171 $self->{options}{'porefs'});
172
173 $self->{po}=();
174 $self->{count}=0; # number of msgids in the PO
175 # count_doc: number of strings in the document
176 # (duplicate strings counted multiple times)
177 $self->{count_doc}=0;
178 $self->{header_comment}=
179 " SOME DESCRIPTIVE TITLE\n"
180 ." Copyright (C) YEAR ".
181 $self->{options}{'copyright-holder'}."\n"
182 ." This file is distributed under the same license ".
183 "as the ".$self->{options}{'package-name'}." package.\n"
184 ." FIRST AUTHOR <EMAIL\@ADDRESS>, YEAR.\n"
185 ."\n"
186 .", fuzzy";
187# $self->header_tag="fuzzy";
188 $self->{header}=escape_text("Project-Id-Version: ".
189 $self->{options}{'package-name'}." ".
190 $self->{options}{'package-version'}."\n".
191 ((defined $self->{options}{'msgid-bugs-address'})?
192 "Report-Msgid-Bugs-To: ".$self->{options}{'msgid-bugs-address'}."\n":
193 "").
194 "POT-Creation-Date: $date\n".
195 "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n".
196 "Last-Translator: FULL NAME <EMAIL\@ADDRESS>\n".
197 "Language-Team: LANGUAGE <LL\@li.org>\n".
198 "Language: \n".
199 "MIME-Version: 1.0\n".
200 "Content-Type: text/plain; charset=CHARSET\n".
201 "Content-Transfer-Encoding: 8bit\n");
202
203 $self->{encoder}=find_encoding("ascii");
204 $self->{footer}=[];
205
206 # To make stats about gettext hits
207 $self->stats_clear();
208}
209
210=item read($)
211
212Reads a PO file (which name is given as argument). Previously existing
213entries in self are not removed, the new ones are added to the end of the
214catalog.
215
216=cut
217
218sub read {
219 my $self=shift;
220 my $filename=shift
221 or croak wrap_mod("po4a::po",
222 dgettext("po4a",
223 "Please provide a non-null filename"));
224
225 my $lang = basename($filename);
226 $lang =~ s/\.po$//;
227 $self->{lang} = $lang;
228
229 my $fh;
230 if ($filename eq '-') {
231 $fh=*STDIN;
232 } else {
233 open $fh,"<$filename"
234 or croak wrap_mod("po4a::po",
235 dgettext("po4a", "Can't read from %s: %s"),
236 $filename, $!);
237 }
238
239 ## Read paragraphs line-by-line
240 my $pofile="";
241 my $textline;
242 while (defined ($textline = <$fh>)) {
243 $pofile .= $textline;
244 }
245# close INPUT
246# or croak (sprintf(dgettext("po4a",
247# "Can't close %s after reading: %s"),
248# $filename,$!)."\n");
249
250 my $linenum=0;
251
252 foreach my $msg (split (/\n\n/,$pofile)) {
253 my ($msgid,$msgstr,$comment,$previous,$automatic,$reference,$flags,$buffer);
254 my ($msgid_plural, $msgstr_plural);
255 if ($msg =~ m/^#~/m) {
256 push(@{$self->{footer}}, $msg);
257 next;
258 }
259 foreach my $line (split (/\n/,$msg)) {
260 $linenum++;
261 if ($line =~ /^#\. ?(.*)$/) { # Automatic comment
262 $automatic .= (defined($automatic) ? "\n" : "").$1;
263
264 } elsif ($line =~ /^#: ?(.*)$/) { # reference
265 $reference .= (defined($reference) ? "\n" : "").$1;
266
267 } elsif ($line =~ /^#, ?(.*)$/) { # flags
268 $flags .= (defined($flags) ? "\n" : "").$1;
269
270 } elsif ($line =~ /^#\| ?(.*)$/) { # previous translation
271 $previous .= (defined($previous) ? "\n" : "").($1||"");
272
273 } elsif ($line =~ /^#(.*)$/) { # Translator comments
274 $comment .= (defined($comment) ? "\n" : "").($1||"");
275
276 } elsif ($line =~ /^msgid (".*")$/) { # begin of msgid
277 $buffer = $1;
278
279 } elsif ($line =~ /^msgid_plural (".*")$/) {
280 # begin of msgid_plural, end of msgid
281
282 $msgid = $buffer;
283 $buffer = $1;
284
285 } elsif ($line =~ /^msgstr (".*")$/) {
286 # begin of msgstr, end of msgid
287
288 $msgid = $buffer;
289 $buffer = "$1";
290
291 } elsif ($line =~ /^msgstr\[([0-9]+)\] (".*")$/) {
292 # begin of msgstr[x], end of msgid_plural or msgstr[x-1]
293
294 # Note: po4a cannot uses plural forms
295 # (no integer to use the plural form)
296 # * drop the msgstr[x] where x >= 2
297 # * use msgstr[0] as the translation of msgid
298 # * use msgstr[1] as the translation of msgid_plural
299
300 if ($1 eq "0") {
301 $msgid_plural = $buffer;
302 $buffer = "$2";
303 } elsif ($1 eq "1") {
304 $msgstr = $buffer;
305 $buffer = "$2";
306 } elsif ($1 eq "2") {
307 $msgstr_plural = $buffer;
308 warn wrap_ref_mod("$filename:$linenum",
309 "po4a::po",
310 dgettext("po4a", "Messages with more than 2 plural forms are not supported."));
311 }
312 } elsif ($line =~ /^(".*")$/) {
313 # continuation of a line
314 $buffer .= "\n$1";
315
316 } else {
317 warn wrap_ref_mod("$filename:$linenum",
318 "po4a::po",
319 dgettext("po4a", "Strange line: -->%s<--"),
320 $line);
321 }
322 }
323 $linenum++;
324 if (defined $msgid_plural) {
325 $msgstr_plural=$buffer;
326
327 $msgid = unquote_text($msgid) if (defined($msgid));
328 $msgstr = unquote_text($msgstr) if (defined($msgstr));
329
330 $self->push_raw ('msgid' => $msgid,
331 'msgstr' => $msgstr,
332 'reference' => $reference,
333 'flags' => $flags,
334 'comment' => $comment,
335 'previous' => $previous,
336 'automatic' => $automatic,
337 'plural' => 0);
338
339 $msgid_plural = unquote_text($msgid_plural)
340 if (defined($msgid_plural));
341 $msgstr_plural = unquote_text($msgstr_plural)
342 if (defined($msgstr_plural));
343
344 $self->push_raw ('msgid' => $msgid_plural,
345 'msgstr' => $msgstr_plural,
346 'reference' => $reference,
347 'flags' => $flags,
348 'comment' => $comment,
349 'previous' => $previous,
350 'automatic' => $automatic,
351 'plural' => 1);
352 } else {
353 $msgstr=$buffer;
354
355 $msgid = unquote_text($msgid) if (defined($msgid));
356 $msgstr = unquote_text($msgstr) if (defined($msgstr));
357
358 $self->push_raw ('msgid' => $msgid,
359 'msgstr' => $msgstr,
360 'reference' => $reference,
361 'flags' => $flags,
362 'comment' => $comment,
363 'previous' => $previous,
364 'automatic' => $automatic);
365 }
366 }
367}
368
369=item write($)
370
371Writes the current catalog to the given file.
372
373=cut
374
375sub write{
376 my $self=shift;
377 my $filename=shift
378 or croak dgettext("po4a","Can't write to a file without filename")."\n";
379
380 my $fh;
381 if ($filename eq '-') {
382 $fh=\*STDOUT;
383 } else {
384 # make sure the directory in which we should write the localized
385 # file exists
386 my $dir = $filename;
387 if ($dir =~ m|/|) {
388 $dir =~ s|/[^/]*$||;
389
390 File::Path::mkpath($dir, 0, 0755) # Croaks on error
391 if (length ($dir) && ! -e $dir);
392 }
393 open $fh,">$filename"
394 or croak wrap_mod("po4a::po",
395 dgettext("po4a", "Can't write to %s: %s"),
396 $filename, $!);
397 }
398
399 print $fh "".format_comment($self->{header_comment},"")
400 if defined($self->{header_comment}) && length($self->{header_comment});
401
402 print $fh "msgid \"\"\n";
403 print $fh "msgstr ".quote_text($self->{header})."\n\n";
404
405
406 my $buf_msgstr_plural; # Used to keep the first msgstr of plural forms
407 my $first=1;
408 foreach my $msgid ( sort { ($self->{po}{"$a"}{'pos'}) <=>
409 ($self->{po}{"$b"}{'pos'})
410 } keys %{$self->{po}}) {
411 my $output="";
412
413 if ($first) {
414 $first=0;
415 } else {
416 $output .= "\n";
417 }
418
419 $output .= format_comment($self->{po}{$msgid}{'comment'},"")
420 if defined($self->{po}{$msgid}{'comment'})
421 && length ($self->{po}{$msgid}{'comment'});
422 if ( defined($self->{po}{$msgid}{'automatic'})
423 && length ($self->{po}{$msgid}{'automatic'})) {
424 foreach my $comment (split(/\\n/,$self->{po}{$msgid}{'automatic'}))
425 {
426 $output .= format_comment($comment, ". ")
427 }
428 }
429 $output .= format_comment($self->{po}{$msgid}{'type'},". type: ")
430 if defined($self->{po}{$msgid}{'type'})
431 && length ($self->{po}{$msgid}{'type'});
432 $output .= format_comment($self->{po}{$msgid}{'reference'},": ")
433 if defined($self->{po}{$msgid}{'reference'})
434 && length ($self->{po}{$msgid}{'reference'});
435 $output .= "#, ". join(", ", sort split(/\s+/,$self->{po}{$msgid}{'flags'}))."\n"
436 if defined($self->{po}{$msgid}{'flags'})
437 && length ($self->{po}{$msgid}{'flags'});
438 $output .= format_comment($self->{po}{$msgid}{'previous'},"| ")
439 if defined($self->{po}{$msgid}{'previous'})
440 && length ($self->{po}{$msgid}{'previous'});
441
442 if (exists $self->{po}{$msgid}{'plural'}) {
443 if ($self->{po}{$msgid}{'plural'} == 0) {
444 if ($self->get_charset =~ /^utf-8$/i) {
445 my $msgstr = Encode::decode_utf8($self->{po}{$msgid}{'msgstr'});
446 $msgid = Encode::decode_utf8($msgid);
447 $output .= Encode::encode_utf8("msgid ".quote_text($msgid)."\n");
448 $buf_msgstr_plural = Encode::encode_utf8("msgstr[0] ".quote_text($msgstr)."\n");
449 } else {
450 $output = "msgid ".quote_text($msgid)."\n";
451 $buf_msgstr_plural = "msgstr[0] ".quote_text($self->{po}{$msgid}{'msgstr'})."\n";
452 }
453 } elsif ($self->{po}{$msgid}{'plural'} == 1) {
454# TODO: there may be only one plural form
455 if ($self->get_charset =~ /^utf-8$/i) {
456 my $msgstr = Encode::decode_utf8($self->{po}{$msgid}{'msgstr'});
457 $msgid = Encode::decode_utf8($msgid);
458 $output = Encode::encode_utf8("msgid_plural ".quote_text($msgid)."\n");
459 $output .= $buf_msgstr_plural;
460 $output .= Encode::encode_utf8("msgstr[1] ".quote_text($msgstr)."\n");
461 $buf_msgstr_plural = "";
462 } else {
463 $output = "msgid_plural ".quote_text($msgid)."\n";
464 $output .= $buf_msgstr_plural;
465 $output .= "msgstr[1] ".quote_text($self->{po}{$msgid}{'msgstr'})."\n";
466 }
467 } else {
468 die wrap_msg(dgettext("po4a","Can't write PO files with more than two plural forms."));
469 }
470 } else {
471 if ($self->get_charset =~ /^utf-8$/i) {
472 my $msgstr = Encode::decode_utf8($self->{po}{$msgid}{'msgstr'});
473 $msgid = Encode::decode_utf8($msgid);
474 $output .= Encode::encode_utf8("msgid ".quote_text($msgid)."\n");
475 $output .= Encode::encode_utf8("msgstr ".quote_text($msgstr)."\n");
476 } else {
477 $output .= "msgid ".quote_text($msgid)."\n";
478 $output .= "msgstr ".quote_text($self->{po}{$msgid}{'msgstr'})."\n";
479 }
480 }
481
482 print $fh $output;
483 }
484 print $fh join("\n\n", @{$self->{footer}}) if scalar @{$self->{footer}};
485
486# print STDERR "$fh";
487# if ($filename ne '-') {
488# close $fh
489# or croak (sprintf(dgettext("po4a",
490# "Can't close %s after writing: %s\n"),
491# $filename,$!));
492# }
493}
494
495=item write_if_needed($$)
496
497Like write, but if the PO or POT file already exists, the object will be
498written in a temporary file which will be compared with the existing file
499to check if the update is needed (this avoids to change a POT just to
500update a line reference or the POT-Creation-Date field).
501
502=cut
503
504sub move_po_if_needed {
505 my ($new_po, $old_po, $backup) = (shift, shift, shift);
506 my $diff;
507
508 if (-e $old_po) {
509 my $diff_ignore = "-I'^#:' "
510 ."-I'^\"POT-Creation-Date:' "
511 ."-I'^\"PO-Revision-Date:'";
512 $diff = qx(diff -q $diff_ignore $old_po $new_po);
513 if ( $diff eq "" ) {
514 unlink $new_po
515 or die wrap_msg(dgettext("po4a","Can't unlink %s: %s."),
516 $new_po, $!);
517 # touch the old PO
518 my ($atime, $mtime) = (time,time);
519 utime $atime, $mtime, $old_po;
520 } else {
521 move $new_po, $old_po
522 or die wrap_msg(dgettext("po4a","Can't move %s to %s: %s."),
523 $new_po, $old_po, $!);
524 }
525 } else {
526 move $new_po, $old_po
527 or die wrap_msg(dgettext("po4a","Can't move %s to %s: %s."),
528 $new_po, $old_po, $!);
529 }
530}
531
532sub write_if_needed {
533 my $self=shift;
534 my $filename=shift
535 or croak dgettext("po4a","Can't write to a file without filename")."\n";
536
537 if (-e $filename) {
538 my ($tmp_filename);
539 (undef,$tmp_filename)=File::Temp->tempfile($filename."XXXX",
540 DIR => "/tmp",
541 OPEN => 0,
542 UNLINK => 0);
543 $self->write($tmp_filename);
544 move_po_if_needed($tmp_filename, $filename);
545 } else {
546 $self->write($filename);
547 }
548}
549
550=item gettextize($$)
551
552This function produces one translated message catalog from two catalogs, an
553original and a translation. This process is described in L<po4a(7)|po4a.7>,
554section I<Gettextization: how does it work?>.
555
556=cut
557
558sub gettextize {
559 my $this = shift;
560 my $class = ref($this) || $this;
561 my ($poorig,$potrans)=(shift,shift);
562
563 my $pores=Locale::Po4a::Po->new();
564
565 my $please_fail = 0;
566 my $toobad = dgettext("po4a",
567 "\nThe gettextization failed (once again). Don't give up, ".
568 "gettextizing is a subtle art, but this is only needed once ".
569 "to convert a project to the gorgeous luxus offered by po4a ".
570 "to translators.".
571 "\nPlease refer to the po4a(7) documentation, the section ".
572 "\"HOWTO convert a pre-existing translation to po4a?\" ".
573 "contains several hints to help you in your task");
574
575 # Don't fail right now when the entry count does not match. Instead, give
576 # it a try so that the user can see where we fail (which is probably where
577 # the problem is).
578 if ($poorig->count_entries_doc() > $potrans->count_entries_doc()) {
579 warn wrap_mod("po4a gettextize", dgettext("po4a",
580 "Original has more strings than the translation (%d>%d). ".
581 "Please fix it by editing the translated version to add ".
582 "some dummy entry."),
583 $poorig->count_entries_doc(),
584 $potrans->count_entries_doc());
585 $please_fail = 1;
586 } elsif ($poorig->count_entries_doc() < $potrans->count_entries_doc()) {
587 warn wrap_mod("po4a gettextize", dgettext("po4a",
588 "Original has less strings than the translation (%d<%d). ".
589 "Please fix it by removing the extra entry from the ".
590 "translated file. You may need an addendum (cf po4a(7)) ".
591 "to reput the chunk in place after gettextization. A ".
592 "possible cause is that a text duplicated in the original ".
593 "is not translated the same way each time. Remove one of ".
594 "the translations, and you're fine."),
595 $poorig->count_entries_doc(),
596 $potrans->count_entries_doc());
597 $please_fail = 1;
598 }
599
600 if ( $poorig->get_charset =~ /^utf-8$/i ) {
601 $potrans->to_utf8;
602 $pores->set_charset("UTF-8");
603 } else {
604 if ($potrans->get_charset eq "CHARSET") {
605 $pores->set_charset("ascii");
606 } else {
607 $pores->set_charset($potrans->get_charset);
608 }
609 }
610 print "Po character sets:\n".
611 " original=".$poorig->get_charset."\n".
612 " translated=".$potrans->get_charset."\n".
613 " result=".$pores->get_charset."\n"
614 if $debug{'encoding'};
615
616 for (my ($o,$t)=(0,0) ;
617 $o<$poorig->count_entries_doc() && $t<$potrans->count_entries_doc();
618 $o++,$t++) {
619 #
620 # Extract some informations
621
622 my ($orig,$trans)=($poorig->msgid_doc($o),$potrans->msgid_doc($t));
623# print STDERR "Matches [[$orig]]<<$trans>>\n";
624
625 my ($reforig,$reftrans)=($poorig->{po}{$orig}{'reference'},
626 $potrans->{po}{$trans}{'reference'});
627 my ($typeorig,$typetrans)=($poorig->{po}{$orig}{'type'},
628 $potrans->{po}{$trans}{'type'});
629
630 #
631 # Make sure the type of both string exist
632 #
633 die wrap_mod("po4a gettextize",
634 "Internal error: type of original string number %s ".
635 "isn't provided", $o)
636 if ($typeorig eq '');
637
638 die wrap_mod("po4a gettextize",
639 "Internal error: type of translated string number %s ".
640 "isn't provided", $o)
641 if ($typetrans eq '');
642
643 #
644 # Make sure both type are the same
645 #
646 if ($typeorig ne $typetrans){
647 $pores->write("gettextization.failed.po");
648 eval {
649 # Recode $trans into current charset, if possible
650 require I18N::Langinfo;
651 I18N::Langinfo->import(qw(langinfo CODESET));
652 my $codeset = langinfo(CODESET());
653 Encode::from_to($trans, $potrans->get_charset, $codeset);
654 };
655 die wrap_msg(dgettext("po4a",
656 "po4a gettextization: Structure disparity between ".
657 "original and translated files:\n".
658 "msgid (at %s) is of type '%s' while\n".
659 "msgstr (at %s) is of type '%s'.\n".
660 "Original text: %s\n".
661 "Translated text: %s\n".
662 "(result so far dumped to gettextization.failed.po)").
663 "%s",
664 $reforig, $typeorig,
665 $reftrans, $typetrans,
666 $orig,
667 $trans,
668 $toobad);
669 }
670
671 #
672 # Push the entry
673 #
674 my $flags;
675 if (defined $poorig->{po}{$orig}{'flags'}) {
676 $flags = $poorig->{po}{$orig}{'flags'}." fuzzy";
677 } else {
678 $flags = "fuzzy";
679 }
680 $pores->push_raw('msgid' => $orig,
681 'msgstr' => $trans,
682 'flags' => $flags,
683 'type' => $typeorig,
684 'reference' => $reforig,
685 'conflict' => 1,
686 'transref' => $potrans->{po}{$trans}{'reference'})
687 unless (defined($pores->{po}{$orig})
688 and ($pores->{po}{$orig}{'msgstr'} eq $trans))
689 # FIXME: maybe we should be smarter about what reference should be
690 # sent to push_raw.
691 }
692
693 # make sure we return a useful error message when entry count differ
694 die "$toobad\n" if $please_fail;
695
696 return $pores;
697}
698
699=item filter($)
700
701This function extracts a catalog from an existing one. Only the entries having
702a reference in the given file will be placed in the resulting catalog.
703
704This function parses its argument, converts it to a Perl function definition,
705evals this definition and filters the fields for which this function returns
706true.
707
708I love Perl sometimes ;)
709
710=cut
711
712sub filter {
713 my $self=shift;
714 our $filter=shift;
715
716 my $res;
717 $res = Locale::Po4a::Po->new();
718
719 # Parse the filter
720 our $code="sub apply { return ";
721 our $pos=0;
722 our $length = length $filter;
723
724 # explode chars to parts. How to subscript a string in Perl?
725 our @filter = split(//,$filter);
726
727 sub gloups {
728 my $fmt=shift;
729 my $space = "";
730 for (1..$pos){
731 $space .= ' ';
732 }
733 die wrap_msg("$fmt\n$filter\n$space^ HERE");
734 }
735 sub showmethecode {
736 return unless $debug{'filter'};
737 my $fmt=shift;
738 my $space="";
739 for (1..$pos){
740 $space .= ' ';
741 }
742 print STDERR "$filter\n$space^ $fmt\n";#"$code\n";
743 }
744
745 # I dream of a lex in perl :-/
746 sub parse_expression {
747 showmethecode("Begin expression")
748 if $debug{'filter'};
749
750 gloups("Begin of expression expected, got '%s'",$filter[$pos])
751 unless ($filter[$pos] eq '(');
752 $pos ++; # pass the '('
753 if ($filter[$pos] eq '&') {
754 # AND
755 $pos++;
756 showmethecode("Begin of AND")
757 if $debug{'filter'};
758 $code .= "(";
759 while (1) {
760 gloups ("Unfinished AND statement.")
761 if ($pos == $length);
762 parse_expression();
763 if ($filter[$pos] eq '(') {
764 $code .= " && ";
765 } elsif ($filter[$pos] eq ')') {
766 last; # do not eat that char
767 } else {
768 gloups("End of AND or begin of sub-expression expected, got '%s'", $filter[$pos]);
769 }
770 }
771 $code .= ")";
772 } elsif ($filter[$pos] eq '|') {
773 # OR
774 $pos++;
775 $code .= "(";
776 while (1) {
777 gloups("Unfinished OR statement.")
778 if ($pos == $length);
779 parse_expression();
780 if ($filter[$pos] eq '(') {
781 $code .= " || ";
782 } elsif ($filter[$pos] eq ')') {
783 last; # do not eat that char
784 } else {
785 gloups("End of OR or begin of sub-expression expected, got '%s'",$filter[$pos]);
786 }
787 }
788 $code .= ")";
789 } elsif ($filter[$pos] eq '!') {
790 # NOT
791 $pos++;
792 $code .= "(!";
793 gloups("Missing sub-expression in NOT statement.")
794 if ($pos == $length);
795 parse_expression();
796 $code .= ")";
797 } else {
798 # must be an equal. Let's get field and argument
799 my ($field,$arg,$done);
800 $field = substr($filter,$pos);
801 gloups("EQ statement contains no '=' or invalid field name")
802 unless ($field =~ /([a-z]*)=/i);
803 $field = lc($1);
804 $pos += (length $field) + 1;
805
806 # check that we've got a valid field name,
807 # and the number it referes to
808 # DO NOT CHANGE THE ORDER
809 my @names=qw(msgid msgstr reference flags comment previous automatic);
810 my $fieldpos;
811 for ($fieldpos = 0;
812 $fieldpos < scalar @names && $field ne $names[$fieldpos];
813 $fieldpos++) {}
814 gloups("Invalid field name: %s",$field)
815 if $fieldpos == scalar @names; # not found
816
817 # Now, get the argument value. It has to be between quotes,
818 # which can be escaped
819 # We point right on the first char of the argument
820 # (first quote already eaten)
821 my $escaped = 0;
822 my $quoted = 0;
823 if ($filter[$pos] eq '"') {
824 $pos++;
825 $quoted = 1;
826 }
827 showmethecode(($quoted?"Quoted":"Unquoted")." argument of field '$field'")
828 if $debug{'filter'};
829
830 while (!$done) {
831 gloups("Unfinished EQ argument.")
832 if ($pos == $length);
833
834 if ($quoted) {
835 if ($filter[$pos] eq '\\') {
836 if ($escaped) {
837 $arg .= '\\';
838 $escaped = 0;
839 } else {
840 $escaped = 1;
841 }
842 } elsif ($escaped) {
843 if ($filter[$pos] eq '"') {
844 $arg .= '"';
845 $escaped = 0;
846 } else {
847 gloups("Invalid escape sequence in argument: '\\%s'",$filter[$pos]);
848 }
849 } else {
850 if ($filter[$pos] eq '"') {
851 $done = 1;
852 } else {
853 $arg .= $filter[$pos];
854 }
855 }
856 } else {
857 if ($filter[$pos] eq ')') {
858 # counter the next ++ since we don't want to eat
859 # this char
860 $pos--;
861 $done = 1;
862 } else {
863 $arg .= $filter[$pos];
864 }
865 }
866 $pos++;
867 }
868 # and now, add the code to check this equality
869 $code .= "(\$_[$fieldpos] =~ m{$arg})";
870
871 }
872 showmethecode("End of expression")
873 if $debug{'filter'};
874 gloups("Unfinished statement.")
875 if ($pos == $length);
876 gloups("End of expression expected, got '%s'",$filter[$pos])
877 unless ($filter[$pos] eq ')');
878 $pos++;
879 }
880 # And now, launch the beast, finish the function and use eval
881 # to construct this function.
882 # Ok, the lack of lexer is a fair price for the eval ;)
883 parse_expression();
884 gloups("Garbage at the end of the expression")
885 if ($pos != $length);
886 $code .= "; }";
887 print STDERR "CODE = $code\n"
888 if $debug{'filter'};
889 eval $code;
890 die wrap_mod("po4a::po", dgettext("po4a", "Eval failure: %s"), $@)
891 if $@;
892
893 for (my $cpt=(0) ;
894 $cpt<$self->count_entries();
895 $cpt++) {
896
897 my ($msgid,$ref,$msgstr,$flags,$type,$comment,$previous,$automatic);
898
899 $msgid = $self->msgid($cpt);
900 $ref=$self->{po}{$msgid}{'reference'};
901
902 $msgstr= $self->{po}{$msgid}{'msgstr'};
903 $flags = $self->{po}{$msgid}{'flags'};
904 $type = $self->{po}{$msgid}{'type'};
905 $comment = $self->{po}{$msgid}{'comment'};
906 $previous = $self->{po}{$msgid}{'previous'};
907 $automatic = $self->{po}{$msgid}{'automatic'};
908
909 # DO NOT CHANGE THE ORDER
910 $res->push_raw('msgid' => $msgid,
911 'msgstr' => $msgstr,
912 'flags' => $flags,
913 'type' => $type,
914 'reference' => $ref,
915 'comment' => $comment,
916 'previous' => $previous,
917 'automatic' => $automatic)
918 if (apply($msgid,$msgstr,$ref,$flags,$comment,$previous,$automatic));
919 }
920 # delete the apply subroutine
921 # otherwise it will be redefined.
922 undef &apply;
923 return $res;
924}
925
926=item to_utf8()
927
928Recodes to UTF-8 the PO's msgstrs. Does nothing if the charset is not
929specified in the PO file ("CHARSET" value), or if it's already UTF-8 or
930ASCII.
931
932=cut
933
934sub to_utf8 {
935 my $this = shift;
936 my $charset = $this->get_charset();
937
938 unless ($charset eq "CHARSET" or
939 $charset =~ /^ascii$/i or
940 $charset =~ /^utf-8$/i) {
941 foreach my $msgid ( keys %{$this->{po}} ) {
942 Encode::from_to($this->{po}{$msgid}{'msgstr'}, $charset, "utf-8");
943 }
944 $this->set_charset("UTF-8");
945 }
946}
947
948=back
949
950=head1 Functions to use a message catalog for translations
951
952=over 4
953
954=item gettext($%)
955
956Request the translation of the string given as argument in the current catalog.
957The function returns the original (untranslated) string if the string was not
958found.
959
960After the string to translate, you can pass a hash of extra
961arguments. Here are the valid entries:
962
963=over
964
965=item B<wrap>
966
967boolean indicating whether we can consider that whitespaces in string are
968not important. If yes, the function canonizes the string before looking for
969a translation, and wraps the result.
970
971=item B<wrapcol>
972
973the column at which we should wrap (default: 76).
974
975=back
976
977=cut
978
979sub gettext {
980 my $self=shift;
981 my $text=shift;
982 my (%opt)=@_;
983 my $res;
984
985 return "" unless defined($text) && length($text); # Avoid returning the header.
986 my $validoption="reference wrap wrapcol";
987 my %validoption;
988
989 map { $validoption{$_}=1 } (split(/ /,$validoption));
990 foreach (keys %opt) {
991 Carp::confess "internal error: unknown arg $_.\n".
992 "Here are the valid options: $validoption.\n"
993 unless $validoption{$_};
994 }
995
996 $text=canonize($text)
997 if ($opt{'wrap'});
998
999 my $esc_text=escape_text($text);
1000
1001 $self->{gettextqueries}++;
1002
1003 if ( defined $self->{po}{$esc_text}
1004 and defined $self->{po}{$esc_text}{'msgstr'}
1005 and length $self->{po}{$esc_text}{'msgstr'}
1006 and ( not defined $self->{po}{$esc_text}{'flags'}
1007 or $self->{po}{$esc_text}{'flags'} !~ /fuzzy/)) {
1008
1009 $self->{gettexthits}++;
1010 $res = unescape_text($self->{po}{$esc_text}{'msgstr'});
1011 if (defined $self->{po}{$esc_text}{'plural'}) {
1012 if ($self->{po}{$esc_text}{'plural'} eq "0") {
1013 warn wrap_mod("po4a gettextize", dgettext("po4a",
1014 "'%s' is the singular form of a message, ".
1015 "po4a will use the msgstr[0] translation (%s)."),
1016 $esc_text, $res);
1017 } else {
1018 warn wrap_mod("po4a gettextize", dgettext("po4a",
1019 "'%s' is the plural form of a message, ".
1020 "po4a will use the msgstr[1] translation (%s)."),
1021 $esc_text, $res);
1022 }
1023 }
1024 } else {
1025 $res = $text;
1026 }
1027
1028 if ($opt{'wrap'}) {
1029 if ($self->get_charset =~ /^utf-8$/i) {
1030 $res=Encode::decode_utf8($res);
1031 $res=wrap ($res, $opt{'wrapcol'} || 76);
1032 $res=Encode::encode_utf8($res);
1033 } else {
1034 $res=wrap ($res, $opt{'wrapcol'} || 76);
1035 }
1036 }
1037# print STDERR "Gettext >>>$text<<<(escaped=$esc_text)=[[[$res]]]\n\n";
1038 return $res;
1039}
1040
1041=item stats_get()
1042
1043Returns statistics about the hit ratio of gettext since the last time that
1044stats_clear() was called. Please note that it's not the same
1045statistics than the one printed by msgfmt --statistic. Here, it's statistics
1046about recent usage of the PO file, while msgfmt reports the status of the
1047file. Example of use:
1048
1049 [some use of the PO file to translate stuff]
1050
1051 ($percent,$hit,$queries) = $pofile->stats_get();
1052 print "So far, we found translations for $percent\% ($hit of $queries) of strings.\n";
1053
1054=cut
1055
1056sub stats_get() {
1057 my $self=shift;
1058 my ($h,$q)=($self->{gettexthits},$self->{gettextqueries});
1059 my $p = ($q == 0 ? 100 : int($h/$q*10000)/100);
1060
1061# $p =~ s/\.00//;
1062# $p =~ s/(\..)0/$1/;
1063
1064 return ( $p,$h,$q );
1065}
1066
1067=item stats_clear()
1068
1069Clears the statistics about gettext hits.
1070
1071=cut
1072
1073sub stats_clear {
1074 my $self = shift;
1075 $self->{gettextqueries} = 0;
1076 $self->{gettexthits} = 0;
1077}
1078
1079=back
1080
1081=head1 Functions to build a message catalog
1082
1083=over 4
1084
1085=item push(%)
1086
1087Push a new entry at the end of the current catalog. The arguments should
1088form a hash table. The valid keys are:
1089
1090=over 4
1091
1092=item B<msgid>
1093
1094the string in original language.
1095
1096=item B<msgstr>
1097
1098the translation.
1099
1100=item B<reference>
1101
1102an indication of where this string was found. Example: file.c:46 (meaning
1103in 'file.c' at line 46). It can be a space-separated list in case of
1104multiple occurrences.
1105
1106=item B<comment>
1107
1108a comment added here manually (by the translators). The format here is free.
1109
1110=item B<automatic>
1111
1112a comment which was automatically added by the string extraction
1113program. See the B<--add-comments> option of the B<xgettext> program for
1114more information.
1115
1116=item B<flags>
1117
1118space-separated list of all defined flags for this entry.
1119
1120Valid flags are: B<c-text>, B<python-text>, B<lisp-text>, B<elisp-text>, B<librep-text>,
1121B<smalltalk-text>, B<java-text>, B<awk-text>, B<object-pascal-text>, B<ycp-text>,
1122B<tcl-text>, B<wrap>, B<no-wrap> and B<fuzzy>.
1123
1124See the gettext documentation for their meaning.
1125
1126=item B<type>
1127
1128this is mostly an internal argument: it is used while gettextizing
1129documents. The idea here is to parse both the original and the translation
1130into a PO object, and merge them, using one's msgid as msgid and the
1131other's msgid as msgstr. To make sure that things get ok, each msgid in PO
1132objects are given a type, based on their structure (like "chapt", "sect1",
1133"p" and so on in DocBook). If the types of strings are not the same, that
1134means that both files do not share the same structure, and the process
1135reports an error.
1136
1137This information is written as automatic comment in the PO file since this
1138gives to translators some context about the strings to translate.
1139
1140=item B<wrap>
1141
1142boolean indicating whether whitespaces can be mangled in cosmetic
1143reformattings. If true, the string is canonized before use.
1144
1145This information is written to the PO file using the B<wrap> or B<no-wrap> flag.
1146
1147=item B<wrapcol>
1148
1149the column at which we should wrap (default: 76).
1150
1151This information is not written to the PO file.
1152
1153=back
1154
1155=cut
1156
1157sub push {
1158 my $self=shift;
1159 my %entry=@_;
1160
1161 my $validoption="wrap wrapcol type msgid msgstr automatic previous flags reference";
1162 my %validoption;
1163
1164 map { $validoption{$_}=1 } (split(/ /,$validoption));
1165 foreach (keys %entry) {
1166 Carp::confess "internal error: unknown arg $_.\n".
1167 "Here are the valid options: $validoption.\n"
1168 unless $validoption{$_};
1169 }
1170
1171 unless ($entry{'wrap'}) {
1172 $entry{'flags'} .= " no-wrap";
1173 }
1174 if (defined ($entry{'msgid'})) {
1175 $entry{'msgid'} = canonize($entry{'msgid'})
1176 if ($entry{'wrap'});
1177
1178 $entry{'msgid'} = escape_text($entry{'msgid'});
1179 }
1180 if (defined ($entry{'msgstr'})) {
1181 $entry{'msgstr'} = canonize($entry{'msgstr'})
1182 if ($entry{'wrap'});
1183
1184 $entry{'msgstr'} = escape_text($entry{'msgstr'});
1185 }
1186
1187 $self->push_raw(%entry);
1188}
1189
1190# The same as push(), but assuming that msgid and msgstr are already escaped
1191sub push_raw {
1192 my $self=shift;
1193 my %entry=@_;
1194 my ($msgid,$msgstr,$reference,$comment,$automatic,$previous,$flags,$type,$transref)=
1195 ($entry{'msgid'},$entry{'msgstr'},
1196 $entry{'reference'},$entry{'comment'},$entry{'automatic'},
1197 $entry{'previous'},$entry{'flags'},$entry{'type'},$entry{'transref'});
1198 my $keep_conflict = $entry{'conflict'};
1199
1200# print STDERR "Push_raw\n";
1201# print STDERR " msgid=>>>$msgid<<<\n" if $msgid;
1202# print STDERR " msgstr=[[[$msgstr]]]\n" if $msgstr;
1203# Carp::cluck " flags=$flags\n" if $flags;
1204
1205 return unless defined($entry{'msgid'});
1206
1207 #no msgid => header definition
1208 unless (length($entry{'msgid'})) {
1209# if (defined($self->{header}) && $self->{header} =~ /\S/) {
1210# warn dgettext("po4a","Redefinition of the header. ".
1211# "The old one will be discarded\n");
1212# } FIXME: do that iff the header isn't the default one.
1213 $self->{header}=$msgstr;
1214 $self->{header_comment}=$comment;
1215 my $charset = $self->get_charset;
1216 if ($charset ne "CHARSET") {
1217 $self->{encoder}=find_encoding($charset);
1218 } else {
1219 $self->{encoder}=find_encoding("ascii");
1220 }
1221 return;
1222 }
1223
1224 if ($self->{options}{'porefs'} eq "none") {
1225 $reference = "";
1226 } elsif ($self->{options}{'porefs'} eq "noline") {
1227 $reference =~ s/:[0-9]*/:1/g;
1228 }
1229
1230 if (defined($self->{po}{$msgid})) {
1231 warn wrap_mod("po4a::po",
1232 dgettext("po4a","msgid defined twice: %s"),
1233 $msgid)
1234 if (0); # FIXME: put a verbose stuff
1235 if ( defined $msgstr
1236 and defined $self->{po}{$msgid}{'msgstr'}
1237 and $self->{po}{$msgid}{'msgstr'} ne $msgstr) {
1238 my $txt=quote_text($msgid);
1239 my ($first,$second)=
1240 (format_comment(". ",$self->{po}{$msgid}{'reference'}).
1241 quote_text($self->{po}{$msgid}{'msgstr'}),
1242
1243 format_comment(". ",$reference).
1244 quote_text($msgstr));
1245
1246 if ($keep_conflict) {
1247 if ($self->{po}{$msgid}{'msgstr'} =~ m/^#-#-#-#-# .* #-#-#-#-#\\n/s) {
1248 $msgstr = $self->{po}{$msgid}{'msgstr'}.
1249 "\\n#-#-#-#-# $transref #-#-#-#-#\\n".
1250 $msgstr;
1251 } else {
1252 $msgstr = "#-#-#-#-# ".
1253 $self->{po}{$msgid}{'transref'}.
1254 " #-#-#-#-#\\n".
1255 $self->{po}{$msgid}{'msgstr'}."\\n".
1256 "#-#-#-#-# $transref #-#-#-#-#\\n".
1257 $msgstr;
1258 }
1259 # Every msgid will have the same list of references.
1260 # Only keep the last list.
1261 $self->{po}{$msgid}{'reference'} = "";
1262 } else {
1263 warn wrap_msg(dgettext("po4a",
1264 "Translations don't match for:\n".
1265 "%s\n".
1266 "-->First translation:\n".
1267 "%s\n".
1268 " Second translation:\n".
1269 "%s\n".
1270 " Old translation discarded."),
1271 $txt,$first,$second);
1272 }
1273 }
1274 }
1275 if (defined $transref) {
1276 $self->{po}{$msgid}{'transref'} = $transref;
1277 }
1278 if (defined $reference) {
1279 if (defined $self->{po}{$msgid}{'reference'}) {
1280 $self->{po}{$msgid}{'reference'} .= " ".$reference;
1281 } else {
1282 $self->{po}{$msgid}{'reference'} = $reference;
1283 }
1284 }
1285 $self->{po}{$msgid}{'msgstr'} = $msgstr;
1286 $self->{po}{$msgid}{'comment'} = $comment;
1287 $self->{po}{$msgid}{'automatic'} = $automatic;
1288 $self->{po}{$msgid}{'previous'} = $previous;
1289 if (defined($self->{po}{$msgid}{'pos_doc'})) {
1290 $self->{po}{$msgid}{'pos_doc'} .= " ".$self->{count_doc}++;
1291 } else {
1292 $self->{po}{$msgid}{'pos_doc'} = $self->{count_doc}++;
1293 }
1294 unless (defined($self->{po}{$msgid}{'pos'})) {
1295 $self->{po}{$msgid}{'pos'} = $self->{count}++;
1296 }
1297 $self->{po}{$msgid}{'type'} = $type;
1298 $self->{po}{$msgid}{'plural'} = $entry{'plural'}
1299 if defined $entry{'plural'};
1300
1301 if (defined($flags)) {
1302 $flags = " $flags ";
1303 $flags =~ s/,/ /g;
1304 foreach my $flag (@known_flags) {
1305 if ($flags =~ /\s$flag\s/) { # if flag to be set
1306 unless ( defined($self->{po}{$msgid}{'flags'})
1307 && $self->{po}{$msgid}{'flags'} =~ /\b$flag\b/) {
1308 # flag not already set
1309 if (defined $self->{po}{$msgid}{'flags'}) {
1310 $self->{po}{$msgid}{'flags'} .= " ".$flag;
1311 } else {
1312 $self->{po}{$msgid}{'flags'} = $flag;
1313 }
1314 }
1315 }
1316 }
1317 }
1318# print STDERR "stored ((($msgid)))=>(((".$self->{po}{$msgid}{'msgstr'}.")))\n\n";
1319
1320}
1321
1322=back
1323
1324=head1 Miscellaneous functions
1325
1326=over 4
1327
1328=item count_entries()
1329
1330Returns the number of entries in the catalog (without the header).
1331
1332=cut
1333
1334sub count_entries($) {
1335 my $self=shift;
1336 return $self->{count};
1337}
1338
1339=item count_entries_doc()
1340
1341Returns the number of entries in document. If a string appears multiple times
1342in the document, it will be counted multiple times
1343
1344=cut
1345
1346sub count_entries_doc($) {
1347 my $self=shift;
1348 return $self->{count_doc};
1349}
1350
1351=item msgid($)
1352
1353Returns the msgid of the given number.
1354
1355=cut
1356
1357sub msgid($$) {
1358 my $self=shift;
1359 my $num=shift;
1360
1361 foreach my $msgid ( keys %{$self->{po}} ) {
1362 return $msgid if ($self->{po}{$msgid}{'pos'} eq $num);
1363 }
1364 return undef;
1365}
1366
1367=item msgid_doc($)
1368
1369Returns the msgid with the given position in the document.
1370
1371=cut
1372
1373sub msgid_doc($$) {
1374 my $self=shift;
1375 my $num=shift;
1376
1377 foreach my $msgid ( keys %{$self->{po}} ) {
1378 foreach my $pos (split / /, $self->{po}{$msgid}{'pos_doc'}) {
1379 return $msgid if ($pos eq $num);
1380 }
1381 }
1382 return undef;
1383}
1384
1385=item get_charset()
1386
1387Returns the character set specified in the PO header. If it hasn't been
1388set, it will return "CHARSET".
1389
1390=cut
1391
1392sub get_charset() {
1393 my $self=shift;
1394
1395 $self->{header} =~ /charset=(.*?)[\s\\]/;
1396
1397 if (defined $1) {
1398 return $1;
1399 } else {
1400 return "CHARSET";
1401 }
1402}
1403
1404=item set_charset($)
1405
1406This sets the character set of the PO header to the value specified in its
1407first argument. If you never call this function (and no file with a specified
1408character set is read), the default value is left to "CHARSET". This value
1409doesn't change the behavior of this module, it's just used to fill that field
1410in the header, and to return it in get_charset().
1411
1412=cut
1413
1414sub set_charset() {
1415 my $self=shift;
1416
1417 my ($newchar,$oldchar);
1418 $newchar = shift;
1419 $oldchar = $self->get_charset();
1420
1421 $self->{header} =~ s/$oldchar/$newchar/;
1422 $self->{encoder}=find_encoding($newchar);
1423}
1424
1425#----[ helper functions ]---------------------------------------------------
1426
1427# transforme the string from its PO file representation to the form which
1428# should be used to print it
1429sub unescape_text {
1430 my $text = shift;
1431
1432 print STDERR "\nunescape [$text]====" if $debug{'escape'};
1433 $text = join("",split(/\n/,$text));
1434 $text =~ s/\\"/"/g;
1435 # unescape newlines
1436 # NOTE on \G:
1437 # The following regular expression introduce newlines.
1438 # Thus, ^ doesn't match all beginnings of lines.
1439 # \G is a zero-width assertion that matches the position
1440 # of the previous substitution with s///g. As every
1441 # substitution ends by a newline, it always matches a
1442 # position just after a newline.
1443 $text =~ s/( # $1:
1444 (\G|[^\\]) # beginning of the line or any char
1445 # different from '\'
1446 (\\\\)* # followed by any even number of '\'
1447 )\\n # and followed by an escaped newline
1448 /$1\n/sgx; # single string, match globally, allow comments
1449 # unescape carriage returns
1450 $text =~ s/( # $1:
1451 (\G|[^\\]) # beginning of the line or any char
1452 # different from '\'
1453 (\\\\)* # followed by any even number of '\'
1454 )\\r # and followed by an escaped carriage return
1455 /$1\r/sgx; # single string, match globally, allow comments
1456 # unescape tabulations
1457 $text =~ s/( # $1:
1458 (\G|[^\\])# beginning of the line or any char
1459 # different from '\'
1460 (\\\\)* # followed by any even number of '\'
1461 )\\t # and followed by an escaped tabulation
1462 /$1\t/mgx; # multilines string, match globally, allow comments
1463 # and unescape the escape character
1464 $text =~ s/\\\\/\\/g;
1465 print STDERR ">$text<\n" if $debug{'escape'};
1466
1467 return $text;
1468}
1469
1470# transform the string to its representation as it should be written in PO
1471# files
1472sub escape_text {
1473 my $text = shift;
1474
1475 print STDERR "\nescape [$text]====" if $debug{'escape'};
1476 $text =~ s/\\/\\\\/g;
1477 $text =~ s/"/\\"/g;
1478 $text =~ s/\n/\\n/g;
1479 $text =~ s/\r/\\r/g;
1480 $text =~ s/\t/\\t/g;
1481 print STDERR ">$text<\n" if $debug{'escape'};
1482
1483 return $text;
1484}
1485
1486# put quotes around the string on each lines (without escaping it)
1487# It does also normalize the text (ie, make sure its representation is wraped
1488# on the 80th char, but without changing the meaning of the string)
1489sub quote_text {
1490 my $string = shift;
1491
1492 return '""' unless defined($string) && length($string);
1493
1494 print STDERR "\nquote [$string]====" if $debug{'quote'};
1495 # break lines on newlines, if any
1496 # see unescape_text for an explanation on \G
1497 $string =~ s/( # $1:
1498 (\G|[^\\]) # beginning of the line or any char
1499 # different from '\'
1500 (\\\\)* # followed by any even number of '\'
1501 \\n) # and followed by an escaped newline
1502 /$1\n/sgx; # single string, match globally, allow comments
1503 $string = wrap($string);
1504 my @string = split(/\n/,$string);
1505 $string = join ("\"\n\"",@string);
1506 $string = "\"$string\"";
1507 if (scalar @string > 1 && $string[0] ne '') {
1508 $string = "\"\"\n".$string;
1509 }
1510
1511 print STDERR ">$string<\n" if $debug{'quote'};
1512 return $string;
1513}
1514
1515# undo the work of the quote_text function
1516sub unquote_text {
1517 my $string = shift;
1518 print STDERR "\nunquote [$string]====" if $debug{'quote'};
1519 $string =~ s/^""\\n//s;
1520 $string =~ s/^"(.*)"$/$1/s;
1521 $string =~ s/"\n"//gm;
1522 # Note: an even number of '\' could precede \\n, but I could not build a
1523 # document to test this
1524 $string =~ s/([^\\])\\n\n/$1!!DUMMYPOPM!!/gm;
1525 $string =~ s|!!DUMMYPOPM!!|\\n|gm;
1526 print STDERR ">$string<\n" if $debug{'quote'};
1527 return $string;
1528}
1529
1530# canonize the string: write it on only one line, changing consecutive
1531# whitespace to only one space.
1532# Warning, it changes the string and should only be called if the string is
1533# plain text
1534sub canonize {
1535 my $text=shift;
1536 print STDERR "\ncanonize [$text]====" if $debug{'canonize'};
1537 $text =~ s/^ *//s;
1538 $text =~ s/^[ \t]+/ /gm;
1539 # if ($text eq "\n"), it messed up the first string (header)
1540 $text =~ s/\n/ /gm if ($text ne "\n");
1541 $text =~ s/([.)]) +/$1 /gm;
1542 $text =~ s/([^.)]) */$1 /gm;
1543 $text =~ s/ *$//s;
1544 print STDERR ">$text<\n" if $debug{'canonize'};
1545 return $text;
1546}
1547
1548# wraps the string. We don't use Text::Wrap since it mangles whitespace at
1549# the end of splited line
1550sub wrap {
1551 my $text=shift;
1552 return "0" if ($text eq '0');
1553 my $col=shift || 76;
1554 my @lines=split(/\n/,"$text");
1555 my $res="";
1556 my $first=1;
1557 while (defined(my $line=shift @lines)) {
1558 if ($first && length($line) > $col - 10) {
1559 unshift @lines,$line;
1560 $first=0;
1561 next;
1562 }
1563 if (length($line) > $col) {
1564 my $pos=rindex($line," ",$col);
1565 while (substr($line,$pos-1,1) eq '.' && $pos != -1) {
1566 $pos=rindex($line," ",$pos-1);
1567 }
1568 if ($pos == -1) {
1569 # There are no spaces in the first $col chars, pick-up the
1570 # first space
1571 $pos = index($line," ");
1572 }
1573 if ($pos != -1) {
1574 my $end=substr($line,$pos+1);
1575 $line=substr($line,0,$pos+1);
1576 if ($end =~ s/^( +)//) {
1577 $line .= $1;
1578 }
1579 unshift @lines,$end;
1580 }
1581 }
1582 $first=0;
1583 $res.="$line\n";
1584 }
1585 # Restore the original trailing spaces
1586 $res =~ s/\s+$//s;
1587 if ($text =~ m/(\s+)$/s) {
1588 $res .= $1;
1589 }
1590 return $res;
1591}
1592
1593# outputs properly a '# ... ' line to be put in the PO file
1594sub format_comment {
1595 my $comment=shift;
1596 my $char=shift;
1597 my $result = "#". $char . $comment;
1598 $result =~ s/\n/\n#$char/gs;
1599 $result =~ s/^#$char$/#/gm;
1600 $result .= "\n";
1601 return $result;
1602}
1603
1604
16051;
1606__END__
1607
1608=back
1609
1610=head1 AUTHORS
1611
1612 Denis Barbier <barbier@linuxfr.org>
1613 Martin Quinson (mquinson#debian.org)
1614
1615=cut
1616

Archive Download this file

Revision: 1854