Chameleon

Chameleon Svn Source Tree

Root/branches/zenith432/package/bin/po4a/lib/Locale/Po4a/Po.pm

1# Locale::Po4a::Po -- manipulation of PO files
2#
3# This program is free software; you may redistribute it and/or modify it
4# under the terms of GPL (see COPYING).
5
6############################################################################
7# Modules and declarations
8############################################################################
9
10=encoding UTF-8
11
12=head1 NAME
13
14Locale::Po4a::Po - PO file manipulation module
15
16=head1 SYNOPSIS
17
18 use Locale::Po4a::Po;
19 my $pofile=Locale::Po4a::Po->new();
20
21 # Read PO file
22 $pofile->read('file.po');
23
24 # Add an entry
25 $pofile->push('msgid' => 'Hello', 'msgstr' => 'bonjour',
26 'flags' => "wrap", 'reference'=>'file.c:46');
27
28 # Extract a translation
29 $pofile->gettext("Hello"); # returns 'bonjour'
30
31 # Write back to a file
32 $pofile->write('otherfile.po');
33
34=head1 DESCRIPTION
35
36Locale::Po4a::Po is a module that allows you to manipulate message
37catalogs. You can load and write from/to a file (which extension is often
38I<po>), you can build new entries on the fly or request for the translation
39of a string.
40
41For a more complete description of message catalogs in the PO format and
42their use, please refer to the documentation of the gettext program.
43
44This module is part of the po4a project, which objective is to use PO files
45(designed at origin to ease the translation of program messages) to
46translate everything, including documentation (man page, info manual),
47package description, debconf templates, and everything which may benefit
48from this.
49
50=head1 OPTIONS ACCEPTED BY THIS MODULE
51
52=over 4
53
54=item B<porefs> I<type>[,B<wrap>|B<nowrap>]
55
56Specify the reference format. Argument I<type> can be one of B<none> to not
57produce any reference, B<noline> to not specify the line number (more
58accurately all line numbers are replaced by 1), B<counter> to replace line
59number by an increasing counter, and B<full> to include complete
60references.
61
62Argument can be followed by a comma and either B<wrap> or B<nowrap> keyword.
63References are written by default on a single line. The B<wrap> option wraps
64references on several lines, to mimic B<gettext> tools (B<xgettext> and
65B<msgmerge>). This option will become the default in a future release, because
66it is more sensible. The B<nowrap> option is available so that users who want
67to keep the old behavior can do so.
68
69=item B<--msgid-bugs-address> I<email@address>
70
71Set the report address for msgid bugs. By default, the created POT files
72have no Report-Msgid-Bugs-To fields.
73
74=item B<--copyright-holder> I<string>
75
76Set the copyright holder in the POT header. The default value is
77"Free Software Foundation, Inc."
78
79=item B<--package-name> I<string>
80
81Set the package name for the POT header. The default is "PACKAGE".
82
83=item B<--package-version> I<string>
84
85Set the package version for the POT header. The default is "VERSION".
86
87=back
88
89=cut
90
91use IO::File;
92
93
94require Exporter;
95
96package Locale::Po4a::Po;
97use DynaLoader;
98
99use Locale::Po4a::Common qw(wrap_msg wrap_mod wrap_ref_mod dgettext);
100
101use subs qw(makespace);
102use vars qw(@ISA @EXPORT_OK);
103@ISA = qw(Exporter DynaLoader);
104@EXPORT = qw(%debug);
105@EXPORT_OK = qw(&move_po_if_needed);
106
107use Locale::Po4a::TransTractor;
108# Try to use a C extension if present.
109eval("bootstrap Locale::Po4a::Po $Locale::Po4a::TransTractor::VERSION");
110
111use 5.006;
112use strict;
113use warnings;
114
115use Carp qw(croak);
116use File::Basename;
117use File::Path; # mkdir before write
118use File::Copy; # move
119use POSIX qw(strftime floor);
120use Time::Local;
121
122use Encode;
123
124my @known_flags=qw(wrap no-wrap c-format fuzzy);
125
126our %debug=('canonize' => 0,
127 'quote' => 0,
128 'escape' => 0,
129 'encoding' => 0,
130 'filter' => 0);
131
132=head1 Functions about whole message catalogs
133
134=over 4
135
136=item new()
137
138Creates a new message catalog. If an argument is provided, it's the name of
139a PO file we should load.
140
141=cut
142
143sub new {
144 my ($this, $options) = (shift, shift);
145 my $class = ref($this) || $this;
146 my $self = {};
147 bless $self, $class;
148 $self->initialize($options);
149
150 my $filename = shift;
151 $self->read($filename) if defined($filename) && length($filename);
152 return $self;
153}
154
155# Return the numerical timezone (e.g. +0200)
156# Neither the %z nor the %s formats of strftime are portable:
157# '%s' is not supported on Solaris and '%z' indicates
158# "2006-10-25 19:36E. Europe Standard Time" on MS Windows.
159sub timezone {
160 my ($time) = @_;
161 my @l = localtime($time);
162
163 my $diff = floor(timegm(@l)/60 +0.5) - floor($time/60 +0.5);
164 my $sign = ($diff >= 0 ? 1 : -1);
165 $diff = abs($diff);
166
167 my $h = $sign * floor($diff / 60);
168 my $m = $diff%60;
169
170 return sprintf "%+03d%02d\n", $h, $m;
171}
172
173sub initialize {
174 my ($self, $options) = (shift, shift);
175 my $time = time;
176 my $date = strftime("%Y-%m-%d %H:%M", localtime($time)) . timezone($time);
177 chomp $date;
178# $options = ref($options) || $options;
179
180 $self->{options}{'porefs'}= 'full,nowrap';
181 $self->{options}{'msgid-bugs-address'}= undef;
182 $self->{options}{'copyright-holder'}= "Free Software Foundation, Inc.";
183 $self->{options}{'package-name'}= "PACKAGE";
184 $self->{options}{'package-version'}= "VERSION";
185 foreach my $opt (keys %$options) {
186 if ($options->{$opt}) {
187 die wrap_mod("po4a::po",
188 dgettext ("po4a", "Unknown option: %s"), $opt)
189 unless exists $self->{options}{$opt};
190 $self->{options}{$opt} = $options->{$opt};
191 }
192 }
193 $self->{options}{'porefs'} =~ /^(full|counter|noline|none)(,(no)?wrap)?$/ ||
194 die wrap_mod("po4a::po",
195 dgettext ("po4a",
196 "Invalid value for option 'porefs' ('%s' is ".
197 "not one of 'full', 'counter', 'noline' or 'none')"),
198 $self->{options}{'porefs'});
199 if ($self->{options}{'porefs'} =~ m/^counter/) {
200 $self->{counter} = {};
201 }
202
203 $self->{po}=();
204 $self->{count}=0; # number of msgids in the PO
205 # count_doc: number of strings in the document
206 # (duplicate strings counted multiple times)
207 $self->{count_doc}=0;
208 $self->{header_comment}=
209 " SOME DESCRIPTIVE TITLE\n"
210 ." Copyright (C) YEAR ".
211 $self->{options}{'copyright-holder'}."\n"
212 ." This file is distributed under the same license ".
213 "as the ".$self->{options}{'package-name'}." package.\n"
214 ." FIRST AUTHOR <EMAIL\@ADDRESS>, YEAR.\n"
215 ."\n"
216 .", fuzzy";
217# $self->header_tag="fuzzy";
218 $self->{header}=escape_text("Project-Id-Version: ".
219 $self->{options}{'package-name'}." ".
220 $self->{options}{'package-version'}."\n".
221 ((defined $self->{options}{'msgid-bugs-address'})?
222 "Report-Msgid-Bugs-To: ".$self->{options}{'msgid-bugs-address'}."\n":
223 "").
224 "POT-Creation-Date: $date\n".
225 "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n".
226 "Last-Translator: FULL NAME <EMAIL\@ADDRESS>\n".
227 "Language-Team: LANGUAGE <LL\@li.org>\n".
228 "Language: \n".
229 "MIME-Version: 1.0\n".
230 "Content-Type: text/plain; charset=CHARSET\n".
231 "Content-Transfer-Encoding: 8bit\n");
232
233 $self->{encoder}=find_encoding("ascii");
234 $self->{footer}=[];
235
236 # To make stats about gettext hits
237 $self->stats_clear();
238}
239
240=item read($)
241
242Reads a PO file (which name is given as argument). Previously existing
243entries in self are not removed, the new ones are added to the end of the
244catalog.
245
246=cut
247
248sub read {
249 my $self=shift;
250 my $filename=shift
251 or croak wrap_mod("po4a::po",
252 dgettext("po4a",
253 "Please provide a non-null filename"));
254
255 my $lang = basename($filename);
256 $lang =~ s/\.po$//;
257 $self->{lang} = $lang;
258
259 my $fh;
260 if ($filename eq '-') {
261 $fh=*STDIN;
262 } else {
263 open $fh,"<$filename"
264 or croak wrap_mod("po4a::po",
265 dgettext("po4a", "Can't read from %s: %s"),
266 $filename, $!);
267 }
268
269 ## Read paragraphs line-by-line
270 my $pofile="";
271 my $textline;
272 while (defined ($textline = <$fh>)) {
273 $pofile .= $textline;
274 }
275# close INPUT
276# or croak (sprintf(dgettext("po4a",
277# "Can't close %s after reading: %s"),
278# $filename,$!)."\n");
279
280 my $linenum=0;
281
282 foreach my $msg (split (/\n\n/,$pofile)) {
283 my ($msgid,$msgstr,$comment,$previous,$automatic,$reference,$flags,$buffer);
284 my ($msgid_plural, $msgstr_plural);
285 if ($msg =~ m/^#~/m) {
286 push(@{$self->{footer}}, $msg);
287 next;
288 }
289 foreach my $line (split (/\n/,$msg)) {
290 $linenum++;
291 if ($line =~ /^#\. ?(.*)$/) { # Automatic comment
292 $automatic .= (defined($automatic) ? "\n" : "").$1;
293
294 } elsif ($line =~ /^#: ?(.*)$/) { # reference
295 $reference .= (defined($reference) ? "\n" : "").$1;
296
297 } elsif ($line =~ /^#, ?(.*)$/) { # flags
298 $flags .= (defined($flags) ? "\n" : "").$1;
299
300 } elsif ($line =~ /^#\| ?(.*)$/) { # previous translation
301 $previous .= (defined($previous) ? "\n" : "").($1||"");
302
303 } elsif ($line =~ /^#(.*)$/) { # Translator comments
304 $comment .= (defined($comment) ? "\n" : "").($1||"");
305
306 } elsif ($line =~ /^msgid (".*")$/) { # begin of msgid
307 $buffer = $1;
308
309 } elsif ($line =~ /^msgid_plural (".*")$/) {
310 # begin of msgid_plural, end of msgid
311
312 $msgid = $buffer;
313 $buffer = $1;
314
315 } elsif ($line =~ /^msgstr (".*")$/) {
316 # begin of msgstr, end of msgid
317
318 $msgid = $buffer;
319 $buffer = "$1";
320
321 } elsif ($line =~ /^msgstr\[([0-9]+)\] (".*")$/) {
322 # begin of msgstr[x], end of msgid_plural or msgstr[x-1]
323
324 # Note: po4a cannot uses plural forms
325 # (no integer to use the plural form)
326 # * drop the msgstr[x] where x >= 2
327 # * use msgstr[0] as the translation of msgid
328 # * use msgstr[1] as the translation of msgid_plural
329
330 if ($1 eq "0") {
331 $msgid_plural = $buffer;
332 $buffer = "$2";
333 } elsif ($1 eq "1") {
334 $msgstr = $buffer;
335 $buffer = "$2";
336 } elsif ($1 eq "2") {
337 $msgstr_plural = $buffer;
338 warn wrap_ref_mod("$filename:$linenum",
339 "po4a::po",
340 dgettext("po4a", "Messages with more than 2 plural forms are not supported."));
341 }
342 } elsif ($line =~ /^(".*")$/) {
343 # continuation of a line
344 $buffer .= "\n$1";
345
346 } else {
347 warn wrap_ref_mod("$filename:$linenum",
348 "po4a::po",
349 dgettext("po4a", "Strange line: -->%s<--"),
350 $line);
351 }
352 }
353 $linenum++;
354 if (defined $msgid_plural) {
355 $msgstr_plural=$buffer;
356
357 $msgid = unquote_text($msgid) if (defined($msgid));
358 $msgstr = unquote_text($msgstr) if (defined($msgstr));
359
360 $self->push_raw ('msgid' => $msgid,
361 'msgstr' => $msgstr,
362 'reference' => $reference,
363 'flags' => $flags,
364 'comment' => $comment,
365 'previous' => $previous,
366 'automatic' => $automatic,
367 'plural' => 0);
368
369 $msgid_plural = unquote_text($msgid_plural)
370 if (defined($msgid_plural));
371 $msgstr_plural = unquote_text($msgstr_plural)
372 if (defined($msgstr_plural));
373
374 $self->push_raw ('msgid' => $msgid_plural,
375 'msgstr' => $msgstr_plural,
376 'reference' => $reference,
377 'flags' => $flags,
378 'comment' => $comment,
379 'previous' => $previous,
380 'automatic' => $automatic,
381 'plural' => 1);
382 } else {
383 $msgstr=$buffer;
384
385 $msgid = unquote_text($msgid) if (defined($msgid));
386 $msgstr = unquote_text($msgstr) if (defined($msgstr));
387
388 $self->push_raw ('msgid' => $msgid,
389 'msgstr' => $msgstr,
390 'reference' => $reference,
391 'flags' => $flags,
392 'comment' => $comment,
393 'previous' => $previous,
394 'automatic' => $automatic);
395 }
396 }
397}
398
399=item write($)
400
401Writes the current catalog to the given file.
402
403=cut
404
405sub write{
406 my $self=shift;
407 my $filename=shift
408 or croak dgettext("po4a","Can't write to a file without filename")."\n";
409
410 my $fh;
411 if ($filename eq '-') {
412 $fh=\*STDOUT;
413 } else {
414 # make sure the directory in which we should write the localized
415 # file exists
416 my $dir = $filename;
417 if ($dir =~ m|/|) {
418 $dir =~ s|/[^/]*$||;
419
420 File::Path::mkpath($dir, 0, 0755) # Croaks on error
421 if (length ($dir) && ! -e $dir);
422 }
423 open $fh,">$filename"
424 or croak wrap_mod("po4a::po",
425 dgettext("po4a", "Can't write to %s: %s"),
426 $filename, $!);
427 }
428
429 print $fh "".format_comment($self->{header_comment},"")
430 if defined($self->{header_comment}) && length($self->{header_comment});
431
432 print $fh "msgid \"\"\n";
433 print $fh "msgstr ".quote_text($self->{header})."\n\n";
434
435
436 my $buf_msgstr_plural; # Used to keep the first msgstr of plural forms
437 my $first=1;
438 foreach my $msgid ( sort { ($self->{po}{"$a"}{'pos'}) <=>
439 ($self->{po}{"$b"}{'pos'})
440 } keys %{$self->{po}}) {
441 my $output="";
442
443 if ($first) {
444 $first=0;
445 } else {
446 $output .= "\n";
447 }
448
449 $output .= format_comment($self->{po}{$msgid}{'comment'},"")
450 if defined($self->{po}{$msgid}{'comment'})
451 && length ($self->{po}{$msgid}{'comment'});
452 if ( defined($self->{po}{$msgid}{'automatic'})
453 && length ($self->{po}{$msgid}{'automatic'})) {
454 foreach my $comment (split(/\\n/,$self->{po}{$msgid}{'automatic'}))
455 {
456 $output .= format_comment($comment, ". ")
457 }
458 }
459 $output .= format_comment($self->{po}{$msgid}{'type'},". type: ")
460 if defined($self->{po}{$msgid}{'type'})
461 && length ($self->{po}{$msgid}{'type'});
462 if ( defined($self->{po}{$msgid}{'reference'})
463 && length ($self->{po}{$msgid}{'reference'})) {
464 my $output_ref = $self->{po}{$msgid}{'reference'};
465 if ($self->{options}{'porefs'} =~ m/,wrap$/) {
466 $output_ref = wrap($output_ref);
467 $output_ref =~ s/\s+$//mg;
468 }
469 $output .= format_comment($output_ref,": ");
470 }
471 $output .= "#, ". join(", ", sort split(/\s+/,$self->{po}{$msgid}{'flags'}))."\n"
472 if defined($self->{po}{$msgid}{'flags'})
473 && length ($self->{po}{$msgid}{'flags'});
474 $output .= format_comment($self->{po}{$msgid}{'previous'},"| ")
475 if defined($self->{po}{$msgid}{'previous'})
476 && length ($self->{po}{$msgid}{'previous'});
477
478 if (exists $self->{po}{$msgid}{'plural'}) {
479 if ($self->{po}{$msgid}{'plural'} == 0) {
480 if ($self->get_charset =~ /^utf-8$/i) {
481 my $msgstr = Encode::decode_utf8($self->{po}{$msgid}{'msgstr'});
482 $msgid = Encode::decode_utf8($msgid);
483 $output .= Encode::encode_utf8("msgid ".quote_text($msgid)."\n");
484 $buf_msgstr_plural = Encode::encode_utf8("msgstr[0] ".quote_text($msgstr)."\n");
485 } else {
486 $output = "msgid ".quote_text($msgid)."\n";
487 $buf_msgstr_plural = "msgstr[0] ".quote_text($self->{po}{$msgid}{'msgstr'})."\n";
488 }
489 } elsif ($self->{po}{$msgid}{'plural'} == 1) {
490# TODO: there may be only one plural form
491 if ($self->get_charset =~ /^utf-8$/i) {
492 my $msgstr = Encode::decode_utf8($self->{po}{$msgid}{'msgstr'});
493 $msgid = Encode::decode_utf8($msgid);
494 $output = Encode::encode_utf8("msgid_plural ".quote_text($msgid)."\n");
495 $output .= $buf_msgstr_plural;
496 $output .= Encode::encode_utf8("msgstr[1] ".quote_text($msgstr)."\n");
497 $buf_msgstr_plural = "";
498 } else {
499 $output = "msgid_plural ".quote_text($msgid)."\n";
500 $output .= $buf_msgstr_plural;
501 $output .= "msgstr[1] ".quote_text($self->{po}{$msgid}{'msgstr'})."\n";
502 }
503 } else {
504 die wrap_msg(dgettext("po4a","Can't write PO files with more than two plural forms."));
505 }
506 } else {
507 if ($self->get_charset =~ /^utf-8$/i) {
508 my $msgstr = Encode::decode_utf8($self->{po}{$msgid}{'msgstr'});
509 $msgid = Encode::decode_utf8($msgid);
510 $output .= Encode::encode_utf8("msgid ".quote_text($msgid)."\n");
511 $output .= Encode::encode_utf8("msgstr ".quote_text($msgstr)."\n");
512 } else {
513 $output .= "msgid ".quote_text($msgid)."\n";
514 $output .= "msgstr ".quote_text($self->{po}{$msgid}{'msgstr'})."\n";
515 }
516 }
517
518 print $fh $output;
519 }
520 print $fh join("\n\n", @{$self->{footer}}) if scalar @{$self->{footer}};
521
522# print STDERR "$fh";
523# if ($filename ne '-') {
524# close $fh
525# or croak (sprintf(dgettext("po4a",
526# "Can't close %s after writing: %s\n"),
527# $filename,$!));
528# }
529}
530
531=item write_if_needed($$)
532
533Like write, but if the PO or POT file already exists, the object will be
534written in a temporary file which will be compared with the existing file
535to check if the update is needed (this avoids to change a POT just to
536update a line reference or the POT-Creation-Date field).
537
538=cut
539
540sub move_po_if_needed {
541 my ($new_po, $old_po, $backup) = (shift, shift, shift);
542 my $diff;
543
544 if (-e $old_po) {
545 my $diff_ignore = "-I'^#:' "
546 ."-I'^\"POT-Creation-Date:' "
547 ."-I'^\"PO-Revision-Date:'";
548 $diff = qx(diff -q $diff_ignore $old_po $new_po);
549 if ( $diff eq "" ) {
550 unlink $new_po
551 or die wrap_msg(dgettext("po4a","Can't unlink %s: %s."),
552 $new_po, $!);
553 # touch the old PO
554 my ($atime, $mtime) = (time,time);
555 utime $atime, $mtime, $old_po;
556 } else {
557 move $new_po, $old_po
558 or die wrap_msg(dgettext("po4a","Can't move %s to %s: %s."),
559 $new_po, $old_po, $!);
560 }
561 } else {
562 move $new_po, $old_po
563 or die wrap_msg(dgettext("po4a","Can't move %s to %s: %s."),
564 $new_po, $old_po, $!);
565 }
566}
567
568sub write_if_needed {
569 my $self=shift;
570 my $filename=shift
571 or croak dgettext("po4a","Can't write to a file without filename")."\n";
572
573 if (-e $filename) {
574 my ($tmp_filename);
575 my $basename = basename($filename);
576 (undef,$tmp_filename)=File::Temp::tempfile($basename."XXXX",
577 DIR => $ENV{TMPDIR} || "/tmp",
578 OPEN => 0,
579 UNLINK => 0);
580 $self->write($tmp_filename);
581 move_po_if_needed($tmp_filename, $filename);
582 } else {
583 $self->write($filename);
584 }
585}
586
587=item gettextize($$)
588
589This function produces one translated message catalog from two catalogs, an
590original and a translation. This process is described in L<po4a(7)|po4a.7>,
591section I<Gettextization: how does it work?>.
592
593=cut
594
595sub gettextize {
596 my $this = shift;
597 my $class = ref($this) || $this;
598 my ($poorig,$potrans)=(shift,shift);
599
600 my $pores=Locale::Po4a::Po->new();
601
602 my $please_fail = 0;
603 my $toobad = dgettext("po4a",
604 "\nThe gettextization failed (once again). Don't give up, ".
605 "gettextizing is a subtle art, but this is only needed once ".
606 "to convert a project to the gorgeous luxus offered by po4a ".
607 "to translators.".
608 "\nPlease refer to the po4a(7) documentation, the section ".
609 "\"HOWTO convert a pre-existing translation to po4a?\" ".
610 "contains several hints to help you in your task");
611
612 # Don't fail right now when the entry count does not match. Instead, give
613 # it a try so that the user can see where we fail (which is probably where
614 # the problem is).
615 if ($poorig->count_entries_doc() > $potrans->count_entries_doc()) {
616 warn wrap_mod("po4a gettextize", dgettext("po4a",
617 "Original has more strings than the translation (%d>%d). ".
618 "Please fix it by editing the translated version to add ".
619 "some dummy entry."),
620 $poorig->count_entries_doc(),
621 $potrans->count_entries_doc());
622 $please_fail = 1;
623 } elsif ($poorig->count_entries_doc() < $potrans->count_entries_doc()) {
624 warn wrap_mod("po4a gettextize", dgettext("po4a",
625 "Original has less strings than the translation (%d<%d). ".
626 "Please fix it by removing the extra entry from the ".
627 "translated file. You may need an addendum (cf po4a(7)) ".
628 "to reput the chunk in place after gettextization. A ".
629 "possible cause is that a text duplicated in the original ".
630 "is not translated the same way each time. Remove one of ".
631 "the translations, and you're fine."),
632 $poorig->count_entries_doc(),
633 $potrans->count_entries_doc());
634 $please_fail = 1;
635 }
636
637 if ( $poorig->get_charset =~ /^utf-8$/i ) {
638 $potrans->to_utf8;
639 $pores->set_charset("UTF-8");
640 } else {
641 if ($potrans->get_charset eq "CHARSET") {
642 $pores->set_charset("ascii");
643 } else {
644 $pores->set_charset($potrans->get_charset);
645 }
646 }
647 print "Po character sets:\n".
648 " original=".$poorig->get_charset."\n".
649 " translated=".$potrans->get_charset."\n".
650 " result=".$pores->get_charset."\n"
651 if $debug{'encoding'};
652
653 for (my ($o,$t)=(0,0) ;
654 $o<$poorig->count_entries_doc() && $t<$potrans->count_entries_doc();
655 $o++,$t++) {
656 #
657 # Extract some informations
658
659 my ($orig,$trans)=($poorig->msgid_doc($o),$potrans->msgid_doc($t));
660# print STDERR "Matches [[$orig]]<<$trans>>\n";
661
662 my ($reforig,$reftrans)=($poorig->{po}{$orig}{'reference'},
663 $potrans->{po}{$trans}{'reference'});
664 my ($typeorig,$typetrans)=($poorig->{po}{$orig}{'type'},
665 $potrans->{po}{$trans}{'type'});
666
667 #
668 # Make sure the type of both string exist
669 #
670 die wrap_mod("po4a gettextize",
671 "Internal error: type of original string number %s ".
672 "isn't provided", $o)
673 if ($typeorig eq '');
674
675 die wrap_mod("po4a gettextize",
676 "Internal error: type of translated string number %s ".
677 "isn't provided", $o)
678 if ($typetrans eq '');
679
680 #
681 # Make sure both type are the same
682 #
683 if ($typeorig ne $typetrans){
684 $pores->write("gettextization.failed.po");
685 eval {
686 # Recode $trans into current charset, if possible
687 require I18N::Langinfo;
688 I18N::Langinfo->import(qw(langinfo CODESET));
689 my $codeset = langinfo(CODESET());
690 Encode::from_to($trans, $potrans->get_charset, $codeset);
691 };
692 die wrap_msg(dgettext("po4a",
693 "po4a gettextization: Structure disparity between ".
694 "original and translated files:\n".
695 "msgid (at %s) is of type '%s' while\n".
696 "msgstr (at %s) is of type '%s'.\n".
697 "Original text: %s\n".
698 "Translated text: %s\n".
699 "(result so far dumped to gettextization.failed.po)").
700 "%s",
701 $reforig, $typeorig,
702 $reftrans, $typetrans,
703 $orig,
704 $trans,
705 $toobad);
706 }
707
708 #
709 # Push the entry
710 #
711 my $flags;
712 if (defined $poorig->{po}{$orig}{'flags'}) {
713 $flags = $poorig->{po}{$orig}{'flags'}." fuzzy";
714 } else {
715 $flags = "fuzzy";
716 }
717 $pores->push_raw('msgid' => $orig,
718 'msgstr' => $trans,
719 'flags' => $flags,
720 'type' => $typeorig,
721 'reference' => $reforig,
722 'conflict' => 1,
723 'transref' => $potrans->{po}{$trans}{'reference'})
724 unless (defined($pores->{po}{$orig})
725 and ($pores->{po}{$orig}{'msgstr'} eq $trans))
726 # FIXME: maybe we should be smarter about what reference should be
727 # sent to push_raw.
728 }
729
730 # make sure we return a useful error message when entry count differ
731 die "$toobad\n" if $please_fail;
732
733 return $pores;
734}
735
736=item filter($)
737
738This function extracts a catalog from an existing one. Only the entries having
739a reference in the given file will be placed in the resulting catalog.
740
741This function parses its argument, converts it to a Perl function definition,
742evals this definition and filters the fields for which this function returns
743true.
744
745I love Perl sometimes ;)
746
747=cut
748
749sub filter {
750 my $self=shift;
751 our $filter=shift;
752
753 my $res;
754 $res = Locale::Po4a::Po->new();
755
756 # Parse the filter
757 our $code="sub apply { return ";
758 our $pos=0;
759 our $length = length $filter;
760
761 # explode chars to parts. How to subscript a string in Perl?
762 our @filter = split(//,$filter);
763
764 sub gloups {
765 my $fmt=shift;
766 my $space = "";
767 for (1..$pos){
768 $space .= ' ';
769 }
770 die wrap_msg("$fmt\n$filter\n$space^ HERE");
771 }
772 sub showmethecode {
773 return unless $debug{'filter'};
774 my $fmt=shift;
775 my $space="";
776 for (1..$pos){
777 $space .= ' ';
778 }
779 print STDERR "$filter\n$space^ $fmt\n";#"$code\n";
780 }
781
782 # I dream of a lex in perl :-/
783 sub parse_expression {
784 showmethecode("Begin expression")
785 if $debug{'filter'};
786
787 gloups("Begin of expression expected, got '%s'",$filter[$pos])
788 unless ($filter[$pos] eq '(');
789 $pos ++; # pass the '('
790 if ($filter[$pos] eq '&') {
791 # AND
792 $pos++;
793 showmethecode("Begin of AND")
794 if $debug{'filter'};
795 $code .= "(";
796 while (1) {
797 gloups ("Unfinished AND statement.")
798 if ($pos == $length);
799 parse_expression();
800 if ($filter[$pos] eq '(') {
801 $code .= " && ";
802 } elsif ($filter[$pos] eq ')') {
803 last; # do not eat that char
804 } else {
805 gloups("End of AND or begin of sub-expression expected, got '%s'", $filter[$pos]);
806 }
807 }
808 $code .= ")";
809 } elsif ($filter[$pos] eq '|') {
810 # OR
811 $pos++;
812 $code .= "(";
813 while (1) {
814 gloups("Unfinished OR statement.")
815 if ($pos == $length);
816 parse_expression();
817 if ($filter[$pos] eq '(') {
818 $code .= " || ";
819 } elsif ($filter[$pos] eq ')') {
820 last; # do not eat that char
821 } else {
822 gloups("End of OR or begin of sub-expression expected, got '%s'",$filter[$pos]);
823 }
824 }
825 $code .= ")";
826 } elsif ($filter[$pos] eq '!') {
827 # NOT
828 $pos++;
829 $code .= "(!";
830 gloups("Missing sub-expression in NOT statement.")
831 if ($pos == $length);
832 parse_expression();
833 $code .= ")";
834 } else {
835 # must be an equal. Let's get field and argument
836 my ($field,$arg,$done);
837 $field = substr($filter,$pos);
838 gloups("EQ statement contains no '=' or invalid field name")
839 unless ($field =~ /([a-z]*)=/i);
840 $field = lc($1);
841 $pos += (length $field) + 1;
842
843 # check that we've got a valid field name,
844 # and the number it referes to
845 # DO NOT CHANGE THE ORDER
846 my @names=qw(msgid msgstr reference flags comment previous automatic);
847 my $fieldpos;
848 for ($fieldpos = 0;
849 $fieldpos < scalar @names && $field ne $names[$fieldpos];
850 $fieldpos++) {}
851 gloups("Invalid field name: %s",$field)
852 if $fieldpos == scalar @names; # not found
853
854 # Now, get the argument value. It has to be between quotes,
855 # which can be escaped
856 # We point right on the first char of the argument
857 # (first quote already eaten)
858 my $escaped = 0;
859 my $quoted = 0;
860 if ($filter[$pos] eq '"') {
861 $pos++;
862 $quoted = 1;
863 }
864 showmethecode(($quoted?"Quoted":"Unquoted")." argument of field '$field'")
865 if $debug{'filter'};
866
867 while (!$done) {
868 gloups("Unfinished EQ argument.")
869 if ($pos == $length);
870
871 if ($quoted) {
872 if ($filter[$pos] eq '\\') {
873 if ($escaped) {
874 $arg .= '\\';
875 $escaped = 0;
876 } else {
877 $escaped = 1;
878 }
879 } elsif ($escaped) {
880 if ($filter[$pos] eq '"') {
881 $arg .= '"';
882 $escaped = 0;
883 } else {
884 gloups("Invalid escape sequence in argument: '\\%s'",$filter[$pos]);
885 }
886 } else {
887 if ($filter[$pos] eq '"') {
888 $done = 1;
889 } else {
890 $arg .= $filter[$pos];
891 }
892 }
893 } else {
894 if ($filter[$pos] eq ')') {
895 # counter the next ++ since we don't want to eat
896 # this char
897 $pos--;
898 $done = 1;
899 } else {
900 $arg .= $filter[$pos];
901 }
902 }
903 $pos++;
904 }
905 # and now, add the code to check this equality
906 $code .= "(\$_[$fieldpos] =~ m{$arg})";
907
908 }
909 showmethecode("End of expression")
910 if $debug{'filter'};
911 gloups("Unfinished statement.")
912 if ($pos == $length);
913 gloups("End of expression expected, got '%s'",$filter[$pos])
914 unless ($filter[$pos] eq ')');
915 $pos++;
916 }
917 # And now, launch the beast, finish the function and use eval
918 # to construct this function.
919 # Ok, the lack of lexer is a fair price for the eval ;)
920 parse_expression();
921 gloups("Garbage at the end of the expression")
922 if ($pos != $length);
923 $code .= "; }";
924 print STDERR "CODE = $code\n"
925 if $debug{'filter'};
926 eval $code;
927 die wrap_mod("po4a::po", dgettext("po4a", "Eval failure: %s"), $@)
928 if $@;
929
930 for (my $cpt=(0) ;
931 $cpt<$self->count_entries();
932 $cpt++) {
933
934 my ($msgid,$ref,$msgstr,$flags,$type,$comment,$previous,$automatic);
935
936 $msgid = $self->msgid($cpt);
937 $ref=$self->{po}{$msgid}{'reference'};
938
939 $msgstr= $self->{po}{$msgid}{'msgstr'};
940 $flags = $self->{po}{$msgid}{'flags'};
941 $type = $self->{po}{$msgid}{'type'};
942 $comment = $self->{po}{$msgid}{'comment'};
943 $previous = $self->{po}{$msgid}{'previous'};
944 $automatic = $self->{po}{$msgid}{'automatic'};
945
946 # DO NOT CHANGE THE ORDER
947 $res->push_raw('msgid' => $msgid,
948 'msgstr' => $msgstr,
949 'flags' => $flags,
950 'type' => $type,
951 'reference' => $ref,
952 'comment' => $comment,
953 'previous' => $previous,
954 'automatic' => $automatic)
955 if (apply($msgid,$msgstr,$ref,$flags,$comment,$previous,$automatic));
956 }
957 # delete the apply subroutine
958 # otherwise it will be redefined.
959 undef &apply;
960 return $res;
961}
962
963=item to_utf8()
964
965Recodes to UTF-8 the PO's msgstrs. Does nothing if the charset is not
966specified in the PO file ("CHARSET" value), or if it's already UTF-8 or
967ASCII.
968
969=cut
970
971sub to_utf8 {
972 my $this = shift;
973 my $charset = $this->get_charset();
974
975 unless ($charset eq "CHARSET" or
976 $charset =~ /^ascii$/i or
977 $charset =~ /^utf-8$/i) {
978 foreach my $msgid ( keys %{$this->{po}} ) {
979 Encode::from_to($this->{po}{$msgid}{'msgstr'}, $charset, "utf-8");
980 }
981 $this->set_charset("UTF-8");
982 }
983}
984
985=back
986
987=head1 Functions to use a message catalog for translations
988
989=over 4
990
991=item gettext($%)
992
993Request the translation of the string given as argument in the current catalog.
994The function returns the original (untranslated) string if the string was not
995found.
996
997After the string to translate, you can pass a hash of extra
998arguments. Here are the valid entries:
999
1000=over
1001
1002=item B<wrap>
1003
1004boolean indicating whether we can consider that whitespaces in string are
1005not important. If yes, the function canonizes the string before looking for
1006a translation, and wraps the result.
1007
1008=item B<wrapcol>
1009
1010the column at which we should wrap (default: 76).
1011
1012=back
1013
1014=cut
1015
1016sub gettext {
1017 my $self=shift;
1018 my $text=shift;
1019 my (%opt)=@_;
1020 my $res;
1021
1022 return "" unless defined($text) && length($text); # Avoid returning the header.
1023 my $validoption="reference wrap wrapcol";
1024 my %validoption;
1025
1026 map { $validoption{$_}=1 } (split(/ /,$validoption));
1027 foreach (keys %opt) {
1028 Carp::confess "internal error: unknown arg $_.\n".
1029 "Here are the valid options: $validoption.\n"
1030 unless $validoption{$_};
1031 }
1032
1033 $text=canonize($text)
1034 if ($opt{'wrap'});
1035
1036 my $esc_text=escape_text($text);
1037
1038 $self->{gettextqueries}++;
1039
1040 if ( defined $self->{po}{$esc_text}
1041 and defined $self->{po}{$esc_text}{'msgstr'}
1042 and length $self->{po}{$esc_text}{'msgstr'}
1043 and ( not defined $self->{po}{$esc_text}{'flags'}
1044 or $self->{po}{$esc_text}{'flags'} !~ /fuzzy/)) {
1045
1046 $self->{gettexthits}++;
1047 $res = unescape_text($self->{po}{$esc_text}{'msgstr'});
1048 if (defined $self->{po}{$esc_text}{'plural'}) {
1049 if ($self->{po}{$esc_text}{'plural'} eq "0") {
1050 warn wrap_mod("po4a gettextize", dgettext("po4a",
1051 "'%s' is the singular form of a message, ".
1052 "po4a will use the msgstr[0] translation (%s)."),
1053 $esc_text, $res);
1054 } else {
1055 warn wrap_mod("po4a gettextize", dgettext("po4a",
1056 "'%s' is the plural form of a message, ".
1057 "po4a will use the msgstr[1] translation (%s)."),
1058 $esc_text, $res);
1059 }
1060 }
1061 } else {
1062 $res = $text;
1063 }
1064
1065 if ($opt{'wrap'}) {
1066 if ($self->get_charset =~ /^utf-8$/i) {
1067 $res=Encode::decode_utf8($res);
1068 $res=wrap ($res, $opt{'wrapcol'} || 76);
1069 $res=Encode::encode_utf8($res);
1070 } else {
1071 $res=wrap ($res, $opt{'wrapcol'} || 76);
1072 }
1073 }
1074# print STDERR "Gettext >>>$text<<<(escaped=$esc_text)=[[[$res]]]\n\n";
1075 return $res;
1076}
1077
1078=item stats_get()
1079
1080Returns statistics about the hit ratio of gettext since the last time that
1081stats_clear() was called. Please note that it's not the same
1082statistics than the one printed by msgfmt --statistic. Here, it's statistics
1083about recent usage of the PO file, while msgfmt reports the status of the
1084file. Example of use:
1085
1086 [some use of the PO file to translate stuff]
1087
1088 ($percent,$hit,$queries) = $pofile->stats_get();
1089 print "So far, we found translations for $percent\% ($hit of $queries) of strings.\n";
1090
1091=cut
1092
1093sub stats_get() {
1094 my $self=shift;
1095 my ($h,$q)=($self->{gettexthits},$self->{gettextqueries});
1096 my $p = ($q == 0 ? 100 : int($h/$q*10000)/100);
1097
1098# $p =~ s/\.00//;
1099# $p =~ s/(\..)0/$1/;
1100
1101 return ( $p,$h,$q );
1102}
1103
1104=item stats_clear()
1105
1106Clears the statistics about gettext hits.
1107
1108=cut
1109
1110sub stats_clear {
1111 my $self = shift;
1112 $self->{gettextqueries} = 0;
1113 $self->{gettexthits} = 0;
1114}
1115
1116=back
1117
1118=head1 Functions to build a message catalog
1119
1120=over 4
1121
1122=item push(%)
1123
1124Push a new entry at the end of the current catalog. The arguments should
1125form a hash table. The valid keys are:
1126
1127=over 4
1128
1129=item B<msgid>
1130
1131the string in original language.
1132
1133=item B<msgstr>
1134
1135the translation.
1136
1137=item B<reference>
1138
1139an indication of where this string was found. Example: file.c:46 (meaning
1140in 'file.c' at line 46). It can be a space-separated list in case of
1141multiple occurrences.
1142
1143=item B<comment>
1144
1145a comment added here manually (by the translators). The format here is free.
1146
1147=item B<automatic>
1148
1149a comment which was automatically added by the string extraction
1150program. See the B<--add-comments> option of the B<xgettext> program for
1151more information.
1152
1153=item B<flags>
1154
1155space-separated list of all defined flags for this entry.
1156
1157Valid flags are: B<c-text>, B<python-text>, B<lisp-text>, B<elisp-text>, B<librep-text>,
1158B<smalltalk-text>, B<java-text>, B<awk-text>, B<object-pascal-text>, B<ycp-text>,
1159B<tcl-text>, B<wrap>, B<no-wrap> and B<fuzzy>.
1160
1161See the gettext documentation for their meaning.
1162
1163=item B<type>
1164
1165this is mostly an internal argument: it is used while gettextizing
1166documents. The idea here is to parse both the original and the translation
1167into a PO object, and merge them, using one's msgid as msgid and the
1168other's msgid as msgstr. To make sure that things get ok, each msgid in PO
1169objects are given a type, based on their structure (like "chapt", "sect1",
1170"p" and so on in DocBook). If the types of strings are not the same, that
1171means that both files do not share the same structure, and the process
1172reports an error.
1173
1174This information is written as automatic comment in the PO file since this
1175gives to translators some context about the strings to translate.
1176
1177=item B<wrap>
1178
1179boolean indicating whether whitespaces can be mangled in cosmetic
1180reformattings. If true, the string is canonized before use.
1181
1182This information is written to the PO file using the B<wrap> or B<no-wrap> flag.
1183
1184=item B<wrapcol>
1185
1186the column at which we should wrap (default: 76).
1187
1188This information is not written to the PO file.
1189
1190=back
1191
1192=cut
1193
1194sub push {
1195 my $self=shift;
1196 my %entry=@_;
1197
1198 my $validoption="wrap wrapcol type msgid msgstr automatic previous flags reference";
1199 my %validoption;
1200
1201 map { $validoption{$_}=1 } (split(/ /,$validoption));
1202 foreach (keys %entry) {
1203 Carp::confess "internal error: unknown arg $_.\n".
1204 "Here are the valid options: $validoption.\n"
1205 unless $validoption{$_};
1206 }
1207
1208 unless ($entry{'wrap'}) {
1209 $entry{'flags'} .= " no-wrap";
1210 }
1211 if (defined ($entry{'msgid'})) {
1212 $entry{'msgid'} = canonize($entry{'msgid'})
1213 if ($entry{'wrap'});
1214
1215 $entry{'msgid'} = escape_text($entry{'msgid'});
1216 }
1217 if (defined ($entry{'msgstr'})) {
1218 $entry{'msgstr'} = canonize($entry{'msgstr'})
1219 if ($entry{'wrap'});
1220
1221 $entry{'msgstr'} = escape_text($entry{'msgstr'});
1222 }
1223
1224 $self->push_raw(%entry);
1225}
1226
1227# The same as push(), but assuming that msgid and msgstr are already escaped
1228sub push_raw {
1229 my $self=shift;
1230 my %entry=@_;
1231 my ($msgid,$msgstr,$reference,$comment,$automatic,$previous,$flags,$type,$transref)=
1232 ($entry{'msgid'},$entry{'msgstr'},
1233 $entry{'reference'},$entry{'comment'},$entry{'automatic'},
1234 $entry{'previous'},$entry{'flags'},$entry{'type'},$entry{'transref'});
1235 my $keep_conflict = $entry{'conflict'};
1236
1237# print STDERR "Push_raw\n";
1238# print STDERR " msgid=>>>$msgid<<<\n" if $msgid;
1239# print STDERR " msgstr=[[[$msgstr]]]\n" if $msgstr;
1240# Carp::cluck " flags=$flags\n" if $flags;
1241
1242 return unless defined($entry{'msgid'});
1243
1244 #no msgid => header definition
1245 unless (length($entry{'msgid'})) {
1246# if (defined($self->{header}) && $self->{header} =~ /\S/) {
1247# warn dgettext("po4a","Redefinition of the header. ".
1248# "The old one will be discarded\n");
1249# } FIXME: do that iff the header isn't the default one.
1250 $self->{header}=$msgstr;
1251 $self->{header_comment}=$comment;
1252 my $charset = $self->get_charset;
1253 if ($charset ne "CHARSET") {
1254 $self->{encoder}=find_encoding($charset);
1255 } else {
1256 $self->{encoder}=find_encoding("ascii");
1257 }
1258 return;
1259 }
1260
1261 if ($self->{options}{'porefs'} =~ m/^none/) {
1262 $reference = "";
1263 } elsif ($self->{options}{'porefs'} =~ m/^counter/) {
1264 if ($reference =~ m/^(.+?)(?=\S+:\d+)/g) {
1265 my $new_ref = $1;
1266 1 while $reference =~ s{ # x modifier is added to add formatting and improve readability
1267 \G(\s*)(\S+):\d+ # \G is the last match in m//g (see also the (?=) syntax above)
1268 # $2 is the file name
1269 }{
1270 $self->{counter}{$2} ||= 0, # each file has its own counter
1271 ++$self->{counter}{$2}, # increment it
1272 $new_ref .= "$1$2:".$self->{counter}{$2} # replace line number by this counter
1273 }gex && pos($reference);
1274 $reference = $new_ref;
1275 }
1276 } elsif ($self->{options}{'porefs'} =~ m/^noline/) {
1277 $reference =~ s/:\d+/:1/g;
1278 }
1279
1280 if (defined($self->{po}{$msgid})) {
1281 warn wrap_mod("po4a::po",
1282 dgettext("po4a","msgid defined twice: %s"),
1283 $msgid)
1284 if (0); # FIXME: put a verbose stuff
1285 if ( defined $msgstr
1286 and defined $self->{po}{$msgid}{'msgstr'}
1287 and $self->{po}{$msgid}{'msgstr'} ne $msgstr) {
1288 my $txt=quote_text($msgid);
1289 my ($first,$second)=
1290 (format_comment(". ",$self->{po}{$msgid}{'reference'}).
1291 quote_text($self->{po}{$msgid}{'msgstr'}),
1292
1293 format_comment(". ",$reference).
1294 quote_text($msgstr));
1295
1296 if ($keep_conflict) {
1297 if ($self->{po}{$msgid}{'msgstr'} =~ m/^#-#-#-#-# .* #-#-#-#-#\\n/s) {
1298 $msgstr = $self->{po}{$msgid}{'msgstr'}.
1299 "\\n#-#-#-#-# $transref #-#-#-#-#\\n".
1300 $msgstr;
1301 } else {
1302 $msgstr = "#-#-#-#-# ".
1303 $self->{po}{$msgid}{'transref'}.
1304 " #-#-#-#-#\\n".
1305 $self->{po}{$msgid}{'msgstr'}."\\n".
1306 "#-#-#-#-# $transref #-#-#-#-#\\n".
1307 $msgstr;
1308 }
1309 # Every msgid will have the same list of references.
1310 # Only keep the last list.
1311 $self->{po}{$msgid}{'reference'} = "";
1312 } else {
1313 warn wrap_msg(dgettext("po4a",
1314 "Translations don't match for:\n".
1315 "%s\n".
1316 "-->First translation:\n".
1317 "%s\n".
1318 " Second translation:\n".
1319 "%s\n".
1320 " Old translation discarded."),
1321 $txt,$first,$second);
1322 }
1323 }
1324 }
1325 if (defined $transref) {
1326 $self->{po}{$msgid}{'transref'} = $transref;
1327 }
1328 if (defined($reference) && length($reference)) {
1329 if (defined $self->{po}{$msgid}{'reference'}) {
1330 $self->{po}{$msgid}{'reference'} .= " ".$reference;
1331 } else {
1332 $self->{po}{$msgid}{'reference'} = $reference;
1333 }
1334 }
1335 $self->{po}{$msgid}{'msgstr'} = $msgstr;
1336 $self->{po}{$msgid}{'comment'} = $comment;
1337 $self->{po}{$msgid}{'automatic'} = $automatic;
1338 $self->{po}{$msgid}{'previous'} = $previous;
1339 if (defined($self->{po}{$msgid}{'pos_doc'})) {
1340 $self->{po}{$msgid}{'pos_doc'} .= " ".$self->{count_doc}++;
1341 } else {
1342 $self->{po}{$msgid}{'pos_doc'} = $self->{count_doc}++;
1343 }
1344 unless (defined($self->{po}{$msgid}{'pos'})) {
1345 $self->{po}{$msgid}{'pos'} = $self->{count}++;
1346 }
1347 $self->{po}{$msgid}{'type'} = $type;
1348 $self->{po}{$msgid}{'plural'} = $entry{'plural'}
1349 if defined $entry{'plural'};
1350
1351 if (defined($flags)) {
1352 $flags = " $flags ";
1353 $flags =~ s/,/ /g;
1354 foreach my $flag (@known_flags) {
1355 if ($flags =~ /\s$flag\s/) { # if flag to be set
1356 unless ( defined($self->{po}{$msgid}{'flags'})
1357 && $self->{po}{$msgid}{'flags'} =~ /\b$flag\b/) {
1358 # flag not already set
1359 if (defined $self->{po}{$msgid}{'flags'}) {
1360 $self->{po}{$msgid}{'flags'} .= " ".$flag;
1361 } else {
1362 $self->{po}{$msgid}{'flags'} = $flag;
1363 }
1364 }
1365 }
1366 }
1367 }
1368# print STDERR "stored ((($msgid)))=>(((".$self->{po}{$msgid}{'msgstr'}.")))\n\n";
1369
1370}
1371
1372=back
1373
1374=head1 Miscellaneous functions
1375
1376=over 4
1377
1378=item count_entries()
1379
1380Returns the number of entries in the catalog (without the header).
1381
1382=cut
1383
1384sub count_entries($) {
1385 my $self=shift;
1386 return $self->{count};
1387}
1388
1389=item count_entries_doc()
1390
1391Returns the number of entries in document. If a string appears multiple times
1392in the document, it will be counted multiple times
1393
1394=cut
1395
1396sub count_entries_doc($) {
1397 my $self=shift;
1398 return $self->{count_doc};
1399}
1400
1401=item msgid($)
1402
1403Returns the msgid of the given number.
1404
1405=cut
1406
1407sub msgid($$) {
1408 my $self=shift;
1409 my $num=shift;
1410
1411 foreach my $msgid ( keys %{$self->{po}} ) {
1412 return $msgid if ($self->{po}{$msgid}{'pos'} eq $num);
1413 }
1414 return undef;
1415}
1416
1417=item msgid_doc($)
1418
1419Returns the msgid with the given position in the document.
1420
1421=cut
1422
1423sub msgid_doc($$) {
1424 my $self=shift;
1425 my $num=shift;
1426
1427 foreach my $msgid ( keys %{$self->{po}} ) {
1428 foreach my $pos (split / /, $self->{po}{$msgid}{'pos_doc'}) {
1429 return $msgid if ($pos eq $num);
1430 }
1431 }
1432 return undef;
1433}
1434
1435=item get_charset()
1436
1437Returns the character set specified in the PO header. If it hasn't been
1438set, it will return "CHARSET".
1439
1440=cut
1441
1442sub get_charset() {
1443 my $self=shift;
1444
1445 $self->{header} =~ /charset=(.*?)[\s\\]/;
1446
1447 if (defined $1) {
1448 return $1;
1449 } else {
1450 return "CHARSET";
1451 }
1452}
1453
1454=item set_charset($)
1455
1456This sets the character set of the PO header to the value specified in its
1457first argument. If you never call this function (and no file with a specified
1458character set is read), the default value is left to "CHARSET". This value
1459doesn't change the behavior of this module, it's just used to fill that field
1460in the header, and to return it in get_charset().
1461
1462=cut
1463
1464sub set_charset() {
1465 my $self=shift;
1466
1467 my ($newchar,$oldchar);
1468 $newchar = shift;
1469 $oldchar = $self->get_charset();
1470
1471 $self->{header} =~ s/$oldchar/$newchar/;
1472 $self->{encoder}=find_encoding($newchar);
1473}
1474
1475#----[ helper functions ]---------------------------------------------------
1476
1477# transforme the string from its PO file representation to the form which
1478# should be used to print it
1479sub unescape_text {
1480 my $text = shift;
1481
1482 print STDERR "\nunescape [$text]====" if $debug{'escape'};
1483 $text = join("",split(/\n/,$text));
1484 $text =~ s/\\"/"/g;
1485 # unescape newlines
1486 # NOTE on \G:
1487 # The following regular expression introduce newlines.
1488 # Thus, ^ doesn't match all beginnings of lines.
1489 # \G is a zero-width assertion that matches the position
1490 # of the previous substitution with s///g. As every
1491 # substitution ends by a newline, it always matches a
1492 # position just after a newline.
1493 $text =~ s/( # $1:
1494 (\G|[^\\]) # beginning of the line or any char
1495 # different from '\'
1496 (\\\\)* # followed by any even number of '\'
1497 )\\n # and followed by an escaped newline
1498 /$1\n/sgx; # single string, match globally, allow comments
1499 # unescape carriage returns
1500 $text =~ s/( # $1:
1501 (\G|[^\\]) # beginning of the line or any char
1502 # different from '\'
1503 (\\\\)* # followed by any even number of '\'
1504 )\\r # and followed by an escaped carriage return
1505 /$1\r/sgx; # single string, match globally, allow comments
1506 # unescape tabulations
1507 $text =~ s/( # $1:
1508 (\G|[^\\])# beginning of the line or any char
1509 # different from '\'
1510 (\\\\)* # followed by any even number of '\'
1511 )\\t # and followed by an escaped tabulation
1512 /$1\t/mgx; # multilines string, match globally, allow comments
1513 # and unescape the escape character
1514 $text =~ s/\\\\/\\/g;
1515 print STDERR ">$text<\n" if $debug{'escape'};
1516
1517 return $text;
1518}
1519
1520# transform the string to its representation as it should be written in PO
1521# files
1522sub escape_text {
1523 my $text = shift;
1524
1525 print STDERR "\nescape [$text]====" if $debug{'escape'};
1526 $text =~ s/\\/\\\\/g;
1527 $text =~ s/"/\\"/g;
1528 $text =~ s/\n/\\n/g;
1529 $text =~ s/\r/\\r/g;
1530 $text =~ s/\t/\\t/g;
1531 print STDERR ">$text<\n" if $debug{'escape'};
1532
1533 return $text;
1534}
1535
1536# put quotes around the string on each lines (without escaping it)
1537# It does also normalize the text (ie, make sure its representation is wraped
1538# on the 80th char, but without changing the meaning of the string)
1539sub quote_text {
1540 my $string = shift;
1541
1542 return '""' unless defined($string) && length($string);
1543
1544 print STDERR "\nquote [$string]====" if $debug{'quote'};
1545 # break lines on newlines, if any
1546 # see unescape_text for an explanation on \G
1547 $string =~ s/( # $1:
1548 (\G|[^\\]) # beginning of the line or any char
1549 # different from '\'
1550 (\\\\)* # followed by any even number of '\'
1551 \\n) # and followed by an escaped newline
1552 /$1\n/sgx; # single string, match globally, allow comments
1553 $string = wrap($string);
1554 my @string = split(/\n/,$string);
1555 $string = join ("\"\n\"",@string);
1556 $string = "\"$string\"";
1557 if (scalar @string > 1 && $string[0] ne '') {
1558 $string = "\"\"\n".$string;
1559 }
1560
1561 print STDERR ">$string<\n" if $debug{'quote'};
1562 return $string;
1563}
1564
1565# undo the work of the quote_text function
1566sub unquote_text {
1567 my $string = shift;
1568 print STDERR "\nunquote [$string]====" if $debug{'quote'};
1569 $string =~ s/^""\\n//s;
1570 $string =~ s/^"(.*)"$/$1/s;
1571 $string =~ s/"\n"//gm;
1572 # Note: an even number of '\' could precede \\n, but I could not build a
1573 # document to test this
1574 $string =~ s/([^\\])\\n\n/$1!!DUMMYPOPM!!/gm;
1575 $string =~ s|!!DUMMYPOPM!!|\\n|gm;
1576 print STDERR ">$string<\n" if $debug{'quote'};
1577 return $string;
1578}
1579
1580# canonize the string: write it on only one line, changing consecutive
1581# whitespace to only one space.
1582# Warning, it changes the string and should only be called if the string is
1583# plain text
1584sub canonize {
1585 my $text=shift;
1586 print STDERR "\ncanonize [$text]====" if $debug{'canonize'};
1587 $text =~ s/^ *//s;
1588 $text =~ s/^[ \t]+/ /gm;
1589 # if ($text eq "\n"), it messed up the first string (header)
1590 $text =~ s/\n/ /gm if ($text ne "\n");
1591 $text =~ s/([.)]) +/$1 /gm;
1592 $text =~ s/([^.)]) */$1 /gm;
1593 $text =~ s/ *$//s;
1594 print STDERR ">$text<\n" if $debug{'canonize'};
1595 return $text;
1596}
1597
1598# wraps the string. We don't use Text::Wrap since it mangles whitespace at
1599# the end of splited line
1600sub wrap {
1601 my $text=shift;
1602 return "0" if ($text eq '0');
1603 my $col=shift || 76;
1604 my @lines=split(/\n/,"$text");
1605 my $res="";
1606 my $first=1;
1607 while (defined(my $line=shift @lines)) {
1608 if ($first && length($line) > $col - 10) {
1609 unshift @lines,$line;
1610 $first=0;
1611 next;
1612 }
1613 if (length($line) > $col) {
1614 my $pos=rindex($line," ",$col);
1615 while (substr($line,$pos-1,1) eq '.' && $pos != -1) {
1616 $pos=rindex($line," ",$pos-1);
1617 }
1618 if ($pos == -1) {
1619 # There are no spaces in the first $col chars, pick-up the
1620 # first space
1621 $pos = index($line," ");
1622 }
1623 if ($pos != -1) {
1624 my $end=substr($line,$pos+1);
1625 $line=substr($line,0,$pos+1);
1626 if ($end =~ s/^( +)//) {
1627 $line .= $1;
1628 }
1629 unshift @lines,$end;
1630 }
1631 }
1632 $first=0;
1633 $res.="$line\n";
1634 }
1635 # Restore the original trailing spaces
1636 $res =~ s/\s+$//s;
1637 if ($text =~ m/(\s+)$/s) {
1638 $res .= $1;
1639 }
1640 return $res;
1641}
1642
1643# outputs properly a '# ... ' line to be put in the PO file
1644sub format_comment {
1645 my $comment=shift;
1646 my $char=shift;
1647 my $result = "#". $char . $comment;
1648 $result =~ s/\n/\n#$char/gs;
1649 $result =~ s/^#$char$/#/gm;
1650 $result .= "\n";
1651 return $result;
1652}
1653
1654
16551;
1656__END__
1657
1658=back
1659
1660=head1 AUTHORS
1661
1662 Denis Barbier <barbier@linuxfr.org>
1663 Martin Quinson (mquinson#debian.org)
1664
1665=cut
1666

Archive Download this file

Revision: 2805