Chameleon

Chameleon Svn Source Tree

Root/branches/chucko/package/bin/po4a/lib/Locale/Po4a/Po.pm

1# Locale::Po4a::Po -- manipulation of PO files
2#
3# This program is free software; you may redistribute it and/or modify it
4# under the terms of GPL (see COPYING).
5
6############################################################################
7# Modules and declarations
8############################################################################
9
10=encoding UTF-8
11
12=head1 NAME
13
14Locale::Po4a::Po - PO file manipulation module
15
16=head1 SYNOPSIS
17
18 use Locale::Po4a::Po;
19 my $pofile=Locale::Po4a::Po->new();
20
21 # Read PO file
22 $pofile->read('file.po');
23
24 # Add an entry
25 $pofile->push('msgid' => 'Hello', 'msgstr' => 'bonjour',
26 'flags' => "wrap", 'reference'=>'file.c:46');
27
28 # Extract a translation
29 $pofile->gettext("Hello"); # returns 'bonjour'
30
31 # Write back to a file
32 $pofile->write('otherfile.po');
33
34=head1 DESCRIPTION
35
36Locale::Po4a::Po is a module that allows you to manipulate message
37catalogs. You can load and write from/to a file (which extension is often
38I<po>), you can build new entries on the fly or request for the translation
39of a string.
40
41For a more complete description of message catalogs in the PO format and
42their use, please refer to the documentation of the gettext program.
43
44This module is part of the po4a project, which objective is to use PO files
45(designed at origin to ease the translation of program messages) to
46translate everything, including documentation (man page, info manual),
47package description, debconf templates, and everything which may benefit
48from this.
49
50=head1 OPTIONS ACCEPTED BY THIS MODULE
51
52=over 4
53
54=item B<porefs> I<type>[,B<wrap>|B<nowrap>]
55
56Specify the reference format. Argument I<type> can be one of B<none> to not
57produce any reference, B<noline> to not specify the line number (more
58accurately all line numbers are replaced by 1), B<counter> to replace line
59number by an increasing counter, and B<full> to include complete
60references.
61
62Argument can be followed by a comma and either B<wrap> or B<nowrap> keyword.
63References are written by default on a single line. The B<wrap> option wraps
64references on several lines, to mimic B<gettext> tools (B<xgettext> and
65B<msgmerge>). This option will become the default in a future release, because
66it is more sensible. The B<nowrap> option is available so that users who want
67to keep the old behavior can do so.
68
69=item B<--msgid-bugs-address> I<email@address>
70
71Set the report address for msgid bugs. By default, the created POT files
72have no Report-Msgid-Bugs-To fields.
73
74=item B<--copyright-holder> I<string>
75
76Set the copyright holder in the POT header. The default value is
77"Free Software Foundation, Inc."
78
79=item B<--package-name> I<string>
80
81Set the package name for the POT header. The default is "PACKAGE".
82
83=item B<--package-version> I<string>
84
85Set the package version for the POT header. The default is "VERSION".
86
87=back
88
89=cut
90
91use IO::File;
92
93
94require Exporter;
95
96package Locale::Po4a::Po;
97use DynaLoader;
98
99use Locale::Po4a::Common qw(wrap_msg wrap_mod wrap_ref_mod dgettext);
100
101use subs qw(makespace);
102use vars qw(@ISA @EXPORT_OK);
103@ISA = qw(Exporter DynaLoader);
104@EXPORT = qw(%debug);
105@EXPORT_OK = qw(&move_po_if_needed);
106
107use Locale::Po4a::TransTractor;
108# Try to use a C extension if present.
109eval("bootstrap Locale::Po4a::Po $Locale::Po4a::TransTractor::VERSION");
110
111use 5.006;
112use strict;
113use warnings;
114
115use Carp qw(croak);
116use File::Basename;
117use File::Path; # mkdir before write
118use File::Copy; # move
119use POSIX qw(strftime floor);
120use Time::Local;
121
122use Encode;
123
124my @known_flags=qw(wrap no-wrap c-format fuzzy);
125
126our %debug=('canonize' => 0,
127 'quote' => 0,
128 'escape' => 0,
129 'encoding' => 0,
130 'filter' => 0);
131
132=head1 Functions about whole message catalogs
133
134=over 4
135
136=item new()
137
138Creates a new message catalog. If an argument is provided, it's the name of
139a PO file we should load.
140
141=cut
142
143sub new {
144 my ($this, $options) = (shift, shift);
145 my $class = ref($this) || $this;
146 my $self = {};
147 bless $self, $class;
148 $self->initialize($options);
149
150 my $filename = shift;
151 $self->read($filename) if defined($filename) && length($filename);
152 return $self;
153}
154
155# Return the numerical timezone (e.g. +0200)
156# Neither the %z nor the %s formats of strftime are portable:
157# '%s' is not supported on Solaris and '%z' indicates
158# "2006-10-25 19:36E. Europe Standard Time" on MS Windows.
159sub timezone {
160 my @g = gmtime();
161 my @l = localtime();
162
163 my $diff;
164 $diff = floor(timelocal(@l)/60 +0.5);
165 $diff -= floor(timelocal(@g)/60 +0.5);
166
167 my $h = floor($diff / 60) + $l[8]; # $l[8] indicates if we are currently
168 # in a daylight saving time zone
169 my $m = $diff%60;
170
171 return sprintf "%+03d%02d\n", $h, $m;
172}
173
174sub initialize {
175 my ($self, $options) = (shift, shift);
176 my $date = strftime("%Y-%m-%d %H:%M", localtime).timezone();
177 chomp $date;
178# $options = ref($options) || $options;
179
180 $self->{options}{'porefs'}= 'full,nowrap';
181 $self->{options}{'msgid-bugs-address'}= undef;
182 $self->{options}{'copyright-holder'}= "Free Software Foundation, Inc.";
183 $self->{options}{'package-name'}= "PACKAGE";
184 $self->{options}{'package-version'}= "VERSION";
185 foreach my $opt (keys %$options) {
186 if ($options->{$opt}) {
187 die wrap_mod("po4a::po",
188 dgettext ("po4a", "Unknown option: %s"), $opt)
189 unless exists $self->{options}{$opt};
190 $self->{options}{$opt} = $options->{$opt};
191 }
192 }
193 $self->{options}{'porefs'} =~ /^(full|counter|noline|none)(,(no)?wrap)?$/ ||
194 die wrap_mod("po4a::po",
195 dgettext ("po4a",
196 "Invalid value for option 'porefs' ('%s' is ".
197 "not one of 'full', 'counter', 'noline' or 'none')"),
198 $self->{options}{'porefs'});
199 if ($self->{options}{'porefs'} =~ m/^counter/) {
200 $self->{counter} = {};
201 }
202
203 $self->{po}=();
204 $self->{count}=0; # number of msgids in the PO
205 # count_doc: number of strings in the document
206 # (duplicate strings counted multiple times)
207 $self->{count_doc}=0;
208 $self->{header_comment}=
209 " SOME DESCRIPTIVE TITLE\n"
210 ." Copyright (C) YEAR ".
211 $self->{options}{'copyright-holder'}."\n"
212 ." This file is distributed under the same license ".
213 "as the ".$self->{options}{'package-name'}." package.\n"
214 ." FIRST AUTHOR <EMAIL\@ADDRESS>, YEAR.\n"
215 ."\n"
216 .", fuzzy";
217# $self->header_tag="fuzzy";
218 $self->{header}=escape_text("Project-Id-Version: ".
219 $self->{options}{'package-name'}." ".
220 $self->{options}{'package-version'}."\n".
221 ((defined $self->{options}{'msgid-bugs-address'})?
222 "Report-Msgid-Bugs-To: ".$self->{options}{'msgid-bugs-address'}."\n":
223 "").
224 "POT-Creation-Date: $date\n".
225 "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n".
226 "Last-Translator: FULL NAME <EMAIL\@ADDRESS>\n".
227 "Language-Team: LANGUAGE <LL\@li.org>\n".
228 "Language: \n".
229 "MIME-Version: 1.0\n".
230 "Content-Type: text/plain; charset=CHARSET\n".
231 "Content-Transfer-Encoding: 8bit\n");
232
233 $self->{encoder}=find_encoding("ascii");
234 $self->{footer}=[];
235
236 # To make stats about gettext hits
237 $self->stats_clear();
238}
239
240=item read($)
241
242Reads a PO file (which name is given as argument). Previously existing
243entries in self are not removed, the new ones are added to the end of the
244catalog.
245
246=cut
247
248sub read {
249 my $self=shift;
250 my $filename=shift
251 or croak wrap_mod("po4a::po",
252 dgettext("po4a",
253 "Please provide a non-null filename"));
254
255 my $lang = basename($filename);
256 $lang =~ s/\.po$//;
257 $self->{lang} = $lang;
258
259 my $fh;
260 if ($filename eq '-') {
261 $fh=*STDIN;
262 } else {
263 open $fh,"<$filename"
264 or croak wrap_mod("po4a::po",
265 dgettext("po4a", "Can't read from %s: %s"),
266 $filename, $!);
267 }
268
269 ## Read paragraphs line-by-line
270 my $pofile="";
271 my $textline;
272 while (defined ($textline = <$fh>)) {
273 $pofile .= $textline;
274 }
275# close INPUT
276# or croak (sprintf(dgettext("po4a",
277# "Can't close %s after reading: %s"),
278# $filename,$!)."\n");
279
280 my $linenum=0;
281
282 foreach my $msg (split (/\n\n/,$pofile)) {
283 my ($msgid,$msgstr,$comment,$previous,$automatic,$reference,$flags,$buffer);
284 my ($msgid_plural, $msgstr_plural);
285 if ($msg =~ m/^#~/m) {
286 push(@{$self->{footer}}, $msg);
287 next;
288 }
289 foreach my $line (split (/\n/,$msg)) {
290 $linenum++;
291 if ($line =~ /^#\. ?(.*)$/) { # Automatic comment
292 $automatic .= (defined($automatic) ? "\n" : "").$1;
293
294 } elsif ($line =~ /^#: ?(.*)$/) { # reference
295 $reference .= (defined($reference) ? "\n" : "").$1;
296
297 } elsif ($line =~ /^#, ?(.*)$/) { # flags
298 $flags .= (defined($flags) ? "\n" : "").$1;
299
300 } elsif ($line =~ /^#\| ?(.*)$/) { # previous translation
301 $previous .= (defined($previous) ? "\n" : "").($1||"");
302
303 } elsif ($line =~ /^#(.*)$/) { # Translator comments
304 $comment .= (defined($comment) ? "\n" : "").($1||"");
305
306 } elsif ($line =~ /^msgid (".*")$/) { # begin of msgid
307 $buffer = $1;
308
309 } elsif ($line =~ /^msgid_plural (".*")$/) {
310 # begin of msgid_plural, end of msgid
311
312 $msgid = $buffer;
313 $buffer = $1;
314
315 } elsif ($line =~ /^msgstr (".*")$/) {
316 # begin of msgstr, end of msgid
317
318 $msgid = $buffer;
319 $buffer = "$1";
320
321 } elsif ($line =~ /^msgstr\[([0-9]+)\] (".*")$/) {
322 # begin of msgstr[x], end of msgid_plural or msgstr[x-1]
323
324 # Note: po4a cannot uses plural forms
325 # (no integer to use the plural form)
326 # * drop the msgstr[x] where x >= 2
327 # * use msgstr[0] as the translation of msgid
328 # * use msgstr[1] as the translation of msgid_plural
329
330 if ($1 eq "0") {
331 $msgid_plural = $buffer;
332 $buffer = "$2";
333 } elsif ($1 eq "1") {
334 $msgstr = $buffer;
335 $buffer = "$2";
336 } elsif ($1 eq "2") {
337 $msgstr_plural = $buffer;
338 warn wrap_ref_mod("$filename:$linenum",
339 "po4a::po",
340 dgettext("po4a", "Messages with more than 2 plural forms are not supported."));
341 }
342 } elsif ($line =~ /^(".*")$/) {
343 # continuation of a line
344 $buffer .= "\n$1";
345
346 } else {
347 warn wrap_ref_mod("$filename:$linenum",
348 "po4a::po",
349 dgettext("po4a", "Strange line: -->%s<--"),
350 $line);
351 }
352 }
353 $linenum++;
354 if (defined $msgid_plural) {
355 $msgstr_plural=$buffer;
356
357 $msgid = unquote_text($msgid) if (defined($msgid));
358 $msgstr = unquote_text($msgstr) if (defined($msgstr));
359
360 $self->push_raw ('msgid' => $msgid,
361 'msgstr' => $msgstr,
362 'reference' => $reference,
363 'flags' => $flags,
364 'comment' => $comment,
365 'previous' => $previous,
366 'automatic' => $automatic,
367 'plural' => 0);
368
369 $msgid_plural = unquote_text($msgid_plural)
370 if (defined($msgid_plural));
371 $msgstr_plural = unquote_text($msgstr_plural)
372 if (defined($msgstr_plural));
373
374 $self->push_raw ('msgid' => $msgid_plural,
375 'msgstr' => $msgstr_plural,
376 'reference' => $reference,
377 'flags' => $flags,
378 'comment' => $comment,
379 'previous' => $previous,
380 'automatic' => $automatic,
381 'plural' => 1);
382 } else {
383 $msgstr=$buffer;
384
385 $msgid = unquote_text($msgid) if (defined($msgid));
386 $msgstr = unquote_text($msgstr) if (defined($msgstr));
387
388 $self->push_raw ('msgid' => $msgid,
389 'msgstr' => $msgstr,
390 'reference' => $reference,
391 'flags' => $flags,
392 'comment' => $comment,
393 'previous' => $previous,
394 'automatic' => $automatic);
395 }
396 }
397}
398
399=item write($)
400
401Writes the current catalog to the given file.
402
403=cut
404
405sub write{
406 my $self=shift;
407 my $filename=shift
408 or croak dgettext("po4a","Can't write to a file without filename")."\n";
409
410 my $fh;
411 if ($filename eq '-') {
412 $fh=\*STDOUT;
413 } else {
414 # make sure the directory in which we should write the localized
415 # file exists
416 my $dir = $filename;
417 if ($dir =~ m|/|) {
418 $dir =~ s|/[^/]*$||;
419
420 File::Path::mkpath($dir, 0, 0755) # Croaks on error
421 if (length ($dir) && ! -e $dir);
422 }
423 open $fh,">$filename"
424 or croak wrap_mod("po4a::po",
425 dgettext("po4a", "Can't write to %s: %s"),
426 $filename, $!);
427 }
428
429 print $fh "".format_comment($self->{header_comment},"")
430 if defined($self->{header_comment}) && length($self->{header_comment});
431
432 print $fh "msgid \"\"\n";
433 print $fh "msgstr ".quote_text($self->{header})."\n\n";
434
435
436 my $buf_msgstr_plural; # Used to keep the first msgstr of plural forms
437 my $first=1;
438 foreach my $msgid ( sort { ($self->{po}{"$a"}{'pos'}) <=>
439 ($self->{po}{"$b"}{'pos'})
440 } keys %{$self->{po}}) {
441 my $output="";
442
443 if ($first) {
444 $first=0;
445 } else {
446 $output .= "\n";
447 }
448
449 $output .= format_comment($self->{po}{$msgid}{'comment'},"")
450 if defined($self->{po}{$msgid}{'comment'})
451 && length ($self->{po}{$msgid}{'comment'});
452 if ( defined($self->{po}{$msgid}{'automatic'})
453 && length ($self->{po}{$msgid}{'automatic'})) {
454 foreach my $comment (split(/\\n/,$self->{po}{$msgid}{'automatic'}))
455 {
456 $output .= format_comment($comment, ". ")
457 }
458 }
459 $output .= format_comment($self->{po}{$msgid}{'type'},". type: ")
460 if defined($self->{po}{$msgid}{'type'})
461 && length ($self->{po}{$msgid}{'type'});
462 if ( defined($self->{po}{$msgid}{'reference'})
463 && length ($self->{po}{$msgid}{'reference'})) {
464 my $output_ref = $self->{po}{$msgid}{'reference'};
465 if ($self->{options}{'porefs'} =~ m/,wrap$/) {
466 $output_ref = wrap($output_ref);
467 $output_ref =~ s/\s+$//mg;
468 }
469 $output .= format_comment($output_ref,": ");
470 }
471 $output .= "#, ". join(", ", sort split(/\s+/,$self->{po}{$msgid}{'flags'}))."\n"
472 if defined($self->{po}{$msgid}{'flags'})
473 && length ($self->{po}{$msgid}{'flags'});
474 $output .= format_comment($self->{po}{$msgid}{'previous'},"| ")
475 if defined($self->{po}{$msgid}{'previous'})
476 && length ($self->{po}{$msgid}{'previous'});
477
478 if (exists $self->{po}{$msgid}{'plural'}) {
479 if ($self->{po}{$msgid}{'plural'} == 0) {
480 if ($self->get_charset =~ /^utf-8$/i) {
481 my $msgstr = Encode::decode_utf8($self->{po}{$msgid}{'msgstr'});
482 $msgid = Encode::decode_utf8($msgid);
483 $output .= Encode::encode_utf8("msgid ".quote_text($msgid)."\n");
484 $buf_msgstr_plural = Encode::encode_utf8("msgstr[0] ".quote_text($msgstr)."\n");
485 } else {
486 $output = "msgid ".quote_text($msgid)."\n";
487 $buf_msgstr_plural = "msgstr[0] ".quote_text($self->{po}{$msgid}{'msgstr'})."\n";
488 }
489 } elsif ($self->{po}{$msgid}{'plural'} == 1) {
490# TODO: there may be only one plural form
491 if ($self->get_charset =~ /^utf-8$/i) {
492 my $msgstr = Encode::decode_utf8($self->{po}{$msgid}{'msgstr'});
493 $msgid = Encode::decode_utf8($msgid);
494 $output = Encode::encode_utf8("msgid_plural ".quote_text($msgid)."\n");
495 $output .= $buf_msgstr_plural;
496 $output .= Encode::encode_utf8("msgstr[1] ".quote_text($msgstr)."\n");
497 $buf_msgstr_plural = "";
498 } else {
499 $output = "msgid_plural ".quote_text($msgid)."\n";
500 $output .= $buf_msgstr_plural;
501 $output .= "msgstr[1] ".quote_text($self->{po}{$msgid}{'msgstr'})."\n";
502 }
503 } else {
504 die wrap_msg(dgettext("po4a","Can't write PO files with more than two plural forms."));
505 }
506 } else {
507 if ($self->get_charset =~ /^utf-8$/i) {
508 my $msgstr = Encode::decode_utf8($self->{po}{$msgid}{'msgstr'});
509 $msgid = Encode::decode_utf8($msgid);
510 $output .= Encode::encode_utf8("msgid ".quote_text($msgid)."\n");
511 $output .= Encode::encode_utf8("msgstr ".quote_text($msgstr)."\n");
512 } else {
513 $output .= "msgid ".quote_text($msgid)."\n";
514 $output .= "msgstr ".quote_text($self->{po}{$msgid}{'msgstr'})."\n";
515 }
516 }
517
518 print $fh $output;
519 }
520 print $fh join("\n\n", @{$self->{footer}}) if scalar @{$self->{footer}};
521
522# print STDERR "$fh";
523# if ($filename ne '-') {
524# close $fh
525# or croak (sprintf(dgettext("po4a",
526# "Can't close %s after writing: %s\n"),
527# $filename,$!));
528# }
529}
530
531=item write_if_needed($$)
532
533Like write, but if the PO or POT file already exists, the object will be
534written in a temporary file which will be compared with the existing file
535to check if the update is needed (this avoids to change a POT just to
536update a line reference or the POT-Creation-Date field).
537
538=cut
539
540sub move_po_if_needed {
541 my ($new_po, $old_po, $backup) = (shift, shift, shift);
542 my $diff;
543
544 if (-e $old_po) {
545 my $diff_ignore = "-I'^#:' "
546 ."-I'^\"POT-Creation-Date:' "
547 ."-I'^\"PO-Revision-Date:'";
548 $diff = qx(diff -q $diff_ignore $old_po $new_po);
549 if ( $diff eq "" ) {
550 unlink $new_po
551 or die wrap_msg(dgettext("po4a","Can't unlink %s: %s."),
552 $new_po, $!);
553 # touch the old PO
554 my ($atime, $mtime) = (time,time);
555 utime $atime, $mtime, $old_po;
556 } else {
557 move $new_po, $old_po
558 or die wrap_msg(dgettext("po4a","Can't move %s to %s: %s."),
559 $new_po, $old_po, $!);
560 }
561 } else {
562 move $new_po, $old_po
563 or die wrap_msg(dgettext("po4a","Can't move %s to %s: %s."),
564 $new_po, $old_po, $!);
565 }
566}
567
568sub write_if_needed {
569 my $self=shift;
570 my $filename=shift
571 or croak dgettext("po4a","Can't write to a file without filename")."\n";
572
573 if (-e $filename) {
574 my ($tmp_filename);
575 (undef,$tmp_filename)=File::Temp->tempfile($filename."XXXX",
576 DIR => "/tmp",
577 OPEN => 0,
578 UNLINK => 0);
579 $self->write($tmp_filename);
580 move_po_if_needed($tmp_filename, $filename);
581 } else {
582 $self->write($filename);
583 }
584}
585
586=item gettextize($$)
587
588This function produces one translated message catalog from two catalogs, an
589original and a translation. This process is described in L<po4a(7)|po4a.7>,
590section I<Gettextization: how does it work?>.
591
592=cut
593
594sub gettextize {
595 my $this = shift;
596 my $class = ref($this) || $this;
597 my ($poorig,$potrans)=(shift,shift);
598
599 my $pores=Locale::Po4a::Po->new();
600
601 my $please_fail = 0;
602 my $toobad = dgettext("po4a",
603 "\nThe gettextization failed (once again). Don't give up, ".
604 "gettextizing is a subtle art, but this is only needed once ".
605 "to convert a project to the gorgeous luxus offered by po4a ".
606 "to translators.".
607 "\nPlease refer to the po4a(7) documentation, the section ".
608 "\"HOWTO convert a pre-existing translation to po4a?\" ".
609 "contains several hints to help you in your task");
610
611 # Don't fail right now when the entry count does not match. Instead, give
612 # it a try so that the user can see where we fail (which is probably where
613 # the problem is).
614 if ($poorig->count_entries_doc() > $potrans->count_entries_doc()) {
615 warn wrap_mod("po4a gettextize", dgettext("po4a",
616 "Original has more strings than the translation (%d>%d). ".
617 "Please fix it by editing the translated version to add ".
618 "some dummy entry."),
619 $poorig->count_entries_doc(),
620 $potrans->count_entries_doc());
621 $please_fail = 1;
622 } elsif ($poorig->count_entries_doc() < $potrans->count_entries_doc()) {
623 warn wrap_mod("po4a gettextize", dgettext("po4a",
624 "Original has less strings than the translation (%d<%d). ".
625 "Please fix it by removing the extra entry from the ".
626 "translated file. You may need an addendum (cf po4a(7)) ".
627 "to reput the chunk in place after gettextization. A ".
628 "possible cause is that a text duplicated in the original ".
629 "is not translated the same way each time. Remove one of ".
630 "the translations, and you're fine."),
631 $poorig->count_entries_doc(),
632 $potrans->count_entries_doc());
633 $please_fail = 1;
634 }
635
636 if ( $poorig->get_charset =~ /^utf-8$/i ) {
637 $potrans->to_utf8;
638 $pores->set_charset("UTF-8");
639 } else {
640 if ($potrans->get_charset eq "CHARSET") {
641 $pores->set_charset("ascii");
642 } else {
643 $pores->set_charset($potrans->get_charset);
644 }
645 }
646 print "Po character sets:\n".
647 " original=".$poorig->get_charset."\n".
648 " translated=".$potrans->get_charset."\n".
649 " result=".$pores->get_charset."\n"
650 if $debug{'encoding'};
651
652 for (my ($o,$t)=(0,0) ;
653 $o<$poorig->count_entries_doc() && $t<$potrans->count_entries_doc();
654 $o++,$t++) {
655 #
656 # Extract some informations
657
658 my ($orig,$trans)=($poorig->msgid_doc($o),$potrans->msgid_doc($t));
659# print STDERR "Matches [[$orig]]<<$trans>>\n";
660
661 my ($reforig,$reftrans)=($poorig->{po}{$orig}{'reference'},
662 $potrans->{po}{$trans}{'reference'});
663 my ($typeorig,$typetrans)=($poorig->{po}{$orig}{'type'},
664 $potrans->{po}{$trans}{'type'});
665
666 #
667 # Make sure the type of both string exist
668 #
669 die wrap_mod("po4a gettextize",
670 "Internal error: type of original string number %s ".
671 "isn't provided", $o)
672 if ($typeorig eq '');
673
674 die wrap_mod("po4a gettextize",
675 "Internal error: type of translated string number %s ".
676 "isn't provided", $o)
677 if ($typetrans eq '');
678
679 #
680 # Make sure both type are the same
681 #
682 if ($typeorig ne $typetrans){
683 $pores->write("gettextization.failed.po");
684 eval {
685 # Recode $trans into current charset, if possible
686 require I18N::Langinfo;
687 I18N::Langinfo->import(qw(langinfo CODESET));
688 my $codeset = langinfo(CODESET());
689 Encode::from_to($trans, $potrans->get_charset, $codeset);
690 };
691 die wrap_msg(dgettext("po4a",
692 "po4a gettextization: Structure disparity between ".
693 "original and translated files:\n".
694 "msgid (at %s) is of type '%s' while\n".
695 "msgstr (at %s) is of type '%s'.\n".
696 "Original text: %s\n".
697 "Translated text: %s\n".
698 "(result so far dumped to gettextization.failed.po)").
699 "%s",
700 $reforig, $typeorig,
701 $reftrans, $typetrans,
702 $orig,
703 $trans,
704 $toobad);
705 }
706
707 #
708 # Push the entry
709 #
710 my $flags;
711 if (defined $poorig->{po}{$orig}{'flags'}) {
712 $flags = $poorig->{po}{$orig}{'flags'}." fuzzy";
713 } else {
714 $flags = "fuzzy";
715 }
716 $pores->push_raw('msgid' => $orig,
717 'msgstr' => $trans,
718 'flags' => $flags,
719 'type' => $typeorig,
720 'reference' => $reforig,
721 'conflict' => 1,
722 'transref' => $potrans->{po}{$trans}{'reference'})
723 unless (defined($pores->{po}{$orig})
724 and ($pores->{po}{$orig}{'msgstr'} eq $trans))
725 # FIXME: maybe we should be smarter about what reference should be
726 # sent to push_raw.
727 }
728
729 # make sure we return a useful error message when entry count differ
730 die "$toobad\n" if $please_fail;
731
732 return $pores;
733}
734
735=item filter($)
736
737This function extracts a catalog from an existing one. Only the entries having
738a reference in the given file will be placed in the resulting catalog.
739
740This function parses its argument, converts it to a Perl function definition,
741evals this definition and filters the fields for which this function returns
742true.
743
744I love Perl sometimes ;)
745
746=cut
747
748sub filter {
749 my $self=shift;
750 our $filter=shift;
751
752 my $res;
753 $res = Locale::Po4a::Po->new();
754
755 # Parse the filter
756 our $code="sub apply { return ";
757 our $pos=0;
758 our $length = length $filter;
759
760 # explode chars to parts. How to subscript a string in Perl?
761 our @filter = split(//,$filter);
762
763 sub gloups {
764 my $fmt=shift;
765 my $space = "";
766 for (1..$pos){
767 $space .= ' ';
768 }
769 die wrap_msg("$fmt\n$filter\n$space^ HERE");
770 }
771 sub showmethecode {
772 return unless $debug{'filter'};
773 my $fmt=shift;
774 my $space="";
775 for (1..$pos){
776 $space .= ' ';
777 }
778 print STDERR "$filter\n$space^ $fmt\n";#"$code\n";
779 }
780
781 # I dream of a lex in perl :-/
782 sub parse_expression {
783 showmethecode("Begin expression")
784 if $debug{'filter'};
785
786 gloups("Begin of expression expected, got '%s'",$filter[$pos])
787 unless ($filter[$pos] eq '(');
788 $pos ++; # pass the '('
789 if ($filter[$pos] eq '&') {
790 # AND
791 $pos++;
792 showmethecode("Begin of AND")
793 if $debug{'filter'};
794 $code .= "(";
795 while (1) {
796 gloups ("Unfinished AND statement.")
797 if ($pos == $length);
798 parse_expression();
799 if ($filter[$pos] eq '(') {
800 $code .= " && ";
801 } elsif ($filter[$pos] eq ')') {
802 last; # do not eat that char
803 } else {
804 gloups("End of AND or begin of sub-expression expected, got '%s'", $filter[$pos]);
805 }
806 }
807 $code .= ")";
808 } elsif ($filter[$pos] eq '|') {
809 # OR
810 $pos++;
811 $code .= "(";
812 while (1) {
813 gloups("Unfinished OR statement.")
814 if ($pos == $length);
815 parse_expression();
816 if ($filter[$pos] eq '(') {
817 $code .= " || ";
818 } elsif ($filter[$pos] eq ')') {
819 last; # do not eat that char
820 } else {
821 gloups("End of OR or begin of sub-expression expected, got '%s'",$filter[$pos]);
822 }
823 }
824 $code .= ")";
825 } elsif ($filter[$pos] eq '!') {
826 # NOT
827 $pos++;
828 $code .= "(!";
829 gloups("Missing sub-expression in NOT statement.")
830 if ($pos == $length);
831 parse_expression();
832 $code .= ")";
833 } else {
834 # must be an equal. Let's get field and argument
835 my ($field,$arg,$done);
836 $field = substr($filter,$pos);
837 gloups("EQ statement contains no '=' or invalid field name")
838 unless ($field =~ /([a-z]*)=/i);
839 $field = lc($1);
840 $pos += (length $field) + 1;
841
842 # check that we've got a valid field name,
843 # and the number it referes to
844 # DO NOT CHANGE THE ORDER
845 my @names=qw(msgid msgstr reference flags comment previous automatic);
846 my $fieldpos;
847 for ($fieldpos = 0;
848 $fieldpos < scalar @names && $field ne $names[$fieldpos];
849 $fieldpos++) {}
850 gloups("Invalid field name: %s",$field)
851 if $fieldpos == scalar @names; # not found
852
853 # Now, get the argument value. It has to be between quotes,
854 # which can be escaped
855 # We point right on the first char of the argument
856 # (first quote already eaten)
857 my $escaped = 0;
858 my $quoted = 0;
859 if ($filter[$pos] eq '"') {
860 $pos++;
861 $quoted = 1;
862 }
863 showmethecode(($quoted?"Quoted":"Unquoted")." argument of field '$field'")
864 if $debug{'filter'};
865
866 while (!$done) {
867 gloups("Unfinished EQ argument.")
868 if ($pos == $length);
869
870 if ($quoted) {
871 if ($filter[$pos] eq '\\') {
872 if ($escaped) {
873 $arg .= '\\';
874 $escaped = 0;
875 } else {
876 $escaped = 1;
877 }
878 } elsif ($escaped) {
879 if ($filter[$pos] eq '"') {
880 $arg .= '"';
881 $escaped = 0;
882 } else {
883 gloups("Invalid escape sequence in argument: '\\%s'",$filter[$pos]);
884 }
885 } else {
886 if ($filter[$pos] eq '"') {
887 $done = 1;
888 } else {
889 $arg .= $filter[$pos];
890 }
891 }
892 } else {
893 if ($filter[$pos] eq ')') {
894 # counter the next ++ since we don't want to eat
895 # this char
896 $pos--;
897 $done = 1;
898 } else {
899 $arg .= $filter[$pos];
900 }
901 }
902 $pos++;
903 }
904 # and now, add the code to check this equality
905 $code .= "(\$_[$fieldpos] =~ m{$arg})";
906
907 }
908 showmethecode("End of expression")
909 if $debug{'filter'};
910 gloups("Unfinished statement.")
911 if ($pos == $length);
912 gloups("End of expression expected, got '%s'",$filter[$pos])
913 unless ($filter[$pos] eq ')');
914 $pos++;
915 }
916 # And now, launch the beast, finish the function and use eval
917 # to construct this function.
918 # Ok, the lack of lexer is a fair price for the eval ;)
919 parse_expression();
920 gloups("Garbage at the end of the expression")
921 if ($pos != $length);
922 $code .= "; }";
923 print STDERR "CODE = $code\n"
924 if $debug{'filter'};
925 eval $code;
926 die wrap_mod("po4a::po", dgettext("po4a", "Eval failure: %s"), $@)
927 if $@;
928
929 for (my $cpt=(0) ;
930 $cpt<$self->count_entries();
931 $cpt++) {
932
933 my ($msgid,$ref,$msgstr,$flags,$type,$comment,$previous,$automatic);
934
935 $msgid = $self->msgid($cpt);
936 $ref=$self->{po}{$msgid}{'reference'};
937
938 $msgstr= $self->{po}{$msgid}{'msgstr'};
939 $flags = $self->{po}{$msgid}{'flags'};
940 $type = $self->{po}{$msgid}{'type'};
941 $comment = $self->{po}{$msgid}{'comment'};
942 $previous = $self->{po}{$msgid}{'previous'};
943 $automatic = $self->{po}{$msgid}{'automatic'};
944
945 # DO NOT CHANGE THE ORDER
946 $res->push_raw('msgid' => $msgid,
947 'msgstr' => $msgstr,
948 'flags' => $flags,
949 'type' => $type,
950 'reference' => $ref,
951 'comment' => $comment,
952 'previous' => $previous,
953 'automatic' => $automatic)
954 if (apply($msgid,$msgstr,$ref,$flags,$comment,$previous,$automatic));
955 }
956 # delete the apply subroutine
957 # otherwise it will be redefined.
958 undef &apply;
959 return $res;
960}
961
962=item to_utf8()
963
964Recodes to UTF-8 the PO's msgstrs. Does nothing if the charset is not
965specified in the PO file ("CHARSET" value), or if it's already UTF-8 or
966ASCII.
967
968=cut
969
970sub to_utf8 {
971 my $this = shift;
972 my $charset = $this->get_charset();
973
974 unless ($charset eq "CHARSET" or
975 $charset =~ /^ascii$/i or
976 $charset =~ /^utf-8$/i) {
977 foreach my $msgid ( keys %{$this->{po}} ) {
978 Encode::from_to($this->{po}{$msgid}{'msgstr'}, $charset, "utf-8");
979 }
980 $this->set_charset("UTF-8");
981 }
982}
983
984=back
985
986=head1 Functions to use a message catalog for translations
987
988=over 4
989
990=item gettext($%)
991
992Request the translation of the string given as argument in the current catalog.
993The function returns the original (untranslated) string if the string was not
994found.
995
996After the string to translate, you can pass a hash of extra
997arguments. Here are the valid entries:
998
999=over
1000
1001=item B<wrap>
1002
1003boolean indicating whether we can consider that whitespaces in string are
1004not important. If yes, the function canonizes the string before looking for
1005a translation, and wraps the result.
1006
1007=item B<wrapcol>
1008
1009the column at which we should wrap (default: 76).
1010
1011=back
1012
1013=cut
1014
1015sub gettext {
1016 my $self=shift;
1017 my $text=shift;
1018 my (%opt)=@_;
1019 my $res;
1020
1021 return "" unless defined($text) && length($text); # Avoid returning the header.
1022 my $validoption="reference wrap wrapcol";
1023 my %validoption;
1024
1025 map { $validoption{$_}=1 } (split(/ /,$validoption));
1026 foreach (keys %opt) {
1027 Carp::confess "internal error: unknown arg $_.\n".
1028 "Here are the valid options: $validoption.\n"
1029 unless $validoption{$_};
1030 }
1031
1032 $text=canonize($text)
1033 if ($opt{'wrap'});
1034
1035 my $esc_text=escape_text($text);
1036
1037 $self->{gettextqueries}++;
1038
1039 if ( defined $self->{po}{$esc_text}
1040 and defined $self->{po}{$esc_text}{'msgstr'}
1041 and length $self->{po}{$esc_text}{'msgstr'}
1042 and ( not defined $self->{po}{$esc_text}{'flags'}
1043 or $self->{po}{$esc_text}{'flags'} !~ /fuzzy/)) {
1044
1045 $self->{gettexthits}++;
1046 $res = unescape_text($self->{po}{$esc_text}{'msgstr'});
1047 if (defined $self->{po}{$esc_text}{'plural'}) {
1048 if ($self->{po}{$esc_text}{'plural'} eq "0") {
1049 warn wrap_mod("po4a gettextize", dgettext("po4a",
1050 "'%s' is the singular form of a message, ".
1051 "po4a will use the msgstr[0] translation (%s)."),
1052 $esc_text, $res);
1053 } else {
1054 warn wrap_mod("po4a gettextize", dgettext("po4a",
1055 "'%s' is the plural form of a message, ".
1056 "po4a will use the msgstr[1] translation (%s)."),
1057 $esc_text, $res);
1058 }
1059 }
1060 } else {
1061 $res = $text;
1062 }
1063
1064 if ($opt{'wrap'}) {
1065 if ($self->get_charset =~ /^utf-8$/i) {
1066 $res=Encode::decode_utf8($res);
1067 $res=wrap ($res, $opt{'wrapcol'} || 76);
1068 $res=Encode::encode_utf8($res);
1069 } else {
1070 $res=wrap ($res, $opt{'wrapcol'} || 76);
1071 }
1072 }
1073# print STDERR "Gettext >>>$text<<<(escaped=$esc_text)=[[[$res]]]\n\n";
1074 return $res;
1075}
1076
1077=item stats_get()
1078
1079Returns statistics about the hit ratio of gettext since the last time that
1080stats_clear() was called. Please note that it's not the same
1081statistics than the one printed by msgfmt --statistic. Here, it's statistics
1082about recent usage of the PO file, while msgfmt reports the status of the
1083file. Example of use:
1084
1085 [some use of the PO file to translate stuff]
1086
1087 ($percent,$hit,$queries) = $pofile->stats_get();
1088 print "So far, we found translations for $percent\% ($hit of $queries) of strings.\n";
1089
1090=cut
1091
1092sub stats_get() {
1093 my $self=shift;
1094 my ($h,$q)=($self->{gettexthits},$self->{gettextqueries});
1095 my $p = ($q == 0 ? 100 : int($h/$q*10000)/100);
1096
1097# $p =~ s/\.00//;
1098# $p =~ s/(\..)0/$1/;
1099
1100 return ( $p,$h,$q );
1101}
1102
1103=item stats_clear()
1104
1105Clears the statistics about gettext hits.
1106
1107=cut
1108
1109sub stats_clear {
1110 my $self = shift;
1111 $self->{gettextqueries} = 0;
1112 $self->{gettexthits} = 0;
1113}
1114
1115=back
1116
1117=head1 Functions to build a message catalog
1118
1119=over 4
1120
1121=item push(%)
1122
1123Push a new entry at the end of the current catalog. The arguments should
1124form a hash table. The valid keys are:
1125
1126=over 4
1127
1128=item B<msgid>
1129
1130the string in original language.
1131
1132=item B<msgstr>
1133
1134the translation.
1135
1136=item B<reference>
1137
1138an indication of where this string was found. Example: file.c:46 (meaning
1139in 'file.c' at line 46). It can be a space-separated list in case of
1140multiple occurrences.
1141
1142=item B<comment>
1143
1144a comment added here manually (by the translators). The format here is free.
1145
1146=item B<automatic>
1147
1148a comment which was automatically added by the string extraction
1149program. See the B<--add-comments> option of the B<xgettext> program for
1150more information.
1151
1152=item B<flags>
1153
1154space-separated list of all defined flags for this entry.
1155
1156Valid flags are: B<c-text>, B<python-text>, B<lisp-text>, B<elisp-text>, B<librep-text>,
1157B<smalltalk-text>, B<java-text>, B<awk-text>, B<object-pascal-text>, B<ycp-text>,
1158B<tcl-text>, B<wrap>, B<no-wrap> and B<fuzzy>.
1159
1160See the gettext documentation for their meaning.
1161
1162=item B<type>
1163
1164this is mostly an internal argument: it is used while gettextizing
1165documents. The idea here is to parse both the original and the translation
1166into a PO object, and merge them, using one's msgid as msgid and the
1167other's msgid as msgstr. To make sure that things get ok, each msgid in PO
1168objects are given a type, based on their structure (like "chapt", "sect1",
1169"p" and so on in DocBook). If the types of strings are not the same, that
1170means that both files do not share the same structure, and the process
1171reports an error.
1172
1173This information is written as automatic comment in the PO file since this
1174gives to translators some context about the strings to translate.
1175
1176=item B<wrap>
1177
1178boolean indicating whether whitespaces can be mangled in cosmetic
1179reformattings. If true, the string is canonized before use.
1180
1181This information is written to the PO file using the B<wrap> or B<no-wrap> flag.
1182
1183=item B<wrapcol>
1184
1185the column at which we should wrap (default: 76).
1186
1187This information is not written to the PO file.
1188
1189=back
1190
1191=cut
1192
1193sub push {
1194 my $self=shift;
1195 my %entry=@_;
1196
1197 my $validoption="wrap wrapcol type msgid msgstr automatic previous flags reference";
1198 my %validoption;
1199
1200 map { $validoption{$_}=1 } (split(/ /,$validoption));
1201 foreach (keys %entry) {
1202 Carp::confess "internal error: unknown arg $_.\n".
1203 "Here are the valid options: $validoption.\n"
1204 unless $validoption{$_};
1205 }
1206
1207 unless ($entry{'wrap'}) {
1208 $entry{'flags'} .= " no-wrap";
1209 }
1210 if (defined ($entry{'msgid'})) {
1211 $entry{'msgid'} = canonize($entry{'msgid'})
1212 if ($entry{'wrap'});
1213
1214 $entry{'msgid'} = escape_text($entry{'msgid'});
1215 }
1216 if (defined ($entry{'msgstr'})) {
1217 $entry{'msgstr'} = canonize($entry{'msgstr'})
1218 if ($entry{'wrap'});
1219
1220 $entry{'msgstr'} = escape_text($entry{'msgstr'});
1221 }
1222
1223 $self->push_raw(%entry);
1224}
1225
1226# The same as push(), but assuming that msgid and msgstr are already escaped
1227sub push_raw {
1228 my $self=shift;
1229 my %entry=@_;
1230 my ($msgid,$msgstr,$reference,$comment,$automatic,$previous,$flags,$type,$transref)=
1231 ($entry{'msgid'},$entry{'msgstr'},
1232 $entry{'reference'},$entry{'comment'},$entry{'automatic'},
1233 $entry{'previous'},$entry{'flags'},$entry{'type'},$entry{'transref'});
1234 my $keep_conflict = $entry{'conflict'};
1235
1236# print STDERR "Push_raw\n";
1237# print STDERR " msgid=>>>$msgid<<<\n" if $msgid;
1238# print STDERR " msgstr=[[[$msgstr]]]\n" if $msgstr;
1239# Carp::cluck " flags=$flags\n" if $flags;
1240
1241 return unless defined($entry{'msgid'});
1242
1243 #no msgid => header definition
1244 unless (length($entry{'msgid'})) {
1245# if (defined($self->{header}) && $self->{header} =~ /\S/) {
1246# warn dgettext("po4a","Redefinition of the header. ".
1247# "The old one will be discarded\n");
1248# } FIXME: do that iff the header isn't the default one.
1249 $self->{header}=$msgstr;
1250 $self->{header_comment}=$comment;
1251 my $charset = $self->get_charset;
1252 if ($charset ne "CHARSET") {
1253 $self->{encoder}=find_encoding($charset);
1254 } else {
1255 $self->{encoder}=find_encoding("ascii");
1256 }
1257 return;
1258 }
1259
1260 if ($self->{options}{'porefs'} =~ m/^none/) {
1261 $reference = "";
1262 } elsif ($self->{options}{'porefs'} =~ m/^counter/) {
1263 if ($reference =~ m/^(.+?)(?=\S+:\d+)/g) {
1264 my $new_ref = $1;
1265 1 while $reference =~ s{ # x modifier is added to add formatting and improve readability
1266 \G(\s*)(\S+):\d+ # \G is the last match in m//g (see also the (?=) syntax above)
1267 # $2 is the file name
1268 }{
1269 $self->{counter}{$2} ||= 0, # each file has its own counter
1270 ++$self->{counter}{$2}, # increment it
1271 $new_ref .= "$1$2:".$self->{counter}{$2} # replace line number by this counter
1272 }gex && pos($reference);
1273 $reference = $new_ref;
1274 }
1275 } elsif ($self->{options}{'porefs'} =~ m/^noline/) {
1276 $reference =~ s/:\d+/:1/g;
1277 }
1278
1279 if (defined($self->{po}{$msgid})) {
1280 warn wrap_mod("po4a::po",
1281 dgettext("po4a","msgid defined twice: %s"),
1282 $msgid)
1283 if (0); # FIXME: put a verbose stuff
1284 if ( defined $msgstr
1285 and defined $self->{po}{$msgid}{'msgstr'}
1286 and $self->{po}{$msgid}{'msgstr'} ne $msgstr) {
1287 my $txt=quote_text($msgid);
1288 my ($first,$second)=
1289 (format_comment(". ",$self->{po}{$msgid}{'reference'}).
1290 quote_text($self->{po}{$msgid}{'msgstr'}),
1291
1292 format_comment(". ",$reference).
1293 quote_text($msgstr));
1294
1295 if ($keep_conflict) {
1296 if ($self->{po}{$msgid}{'msgstr'} =~ m/^#-#-#-#-# .* #-#-#-#-#\\n/s) {
1297 $msgstr = $self->{po}{$msgid}{'msgstr'}.
1298 "\\n#-#-#-#-# $transref #-#-#-#-#\\n".
1299 $msgstr;
1300 } else {
1301 $msgstr = "#-#-#-#-# ".
1302 $self->{po}{$msgid}{'transref'}.
1303 " #-#-#-#-#\\n".
1304 $self->{po}{$msgid}{'msgstr'}."\\n".
1305 "#-#-#-#-# $transref #-#-#-#-#\\n".
1306 $msgstr;
1307 }
1308 # Every msgid will have the same list of references.
1309 # Only keep the last list.
1310 $self->{po}{$msgid}{'reference'} = "";
1311 } else {
1312 warn wrap_msg(dgettext("po4a",
1313 "Translations don't match for:\n".
1314 "%s\n".
1315 "-->First translation:\n".
1316 "%s\n".
1317 " Second translation:\n".
1318 "%s\n".
1319 " Old translation discarded."),
1320 $txt,$first,$second);
1321 }
1322 }
1323 }
1324 if (defined $transref) {
1325 $self->{po}{$msgid}{'transref'} = $transref;
1326 }
1327 if (defined($reference) && length($reference)) {
1328 if (defined $self->{po}{$msgid}{'reference'}) {
1329 $self->{po}{$msgid}{'reference'} .= " ".$reference;
1330 } else {
1331 $self->{po}{$msgid}{'reference'} = $reference;
1332 }
1333 }
1334 $self->{po}{$msgid}{'msgstr'} = $msgstr;
1335 $self->{po}{$msgid}{'comment'} = $comment;
1336 $self->{po}{$msgid}{'automatic'} = $automatic;
1337 $self->{po}{$msgid}{'previous'} = $previous;
1338 if (defined($self->{po}{$msgid}{'pos_doc'})) {
1339 $self->{po}{$msgid}{'pos_doc'} .= " ".$self->{count_doc}++;
1340 } else {
1341 $self->{po}{$msgid}{'pos_doc'} = $self->{count_doc}++;
1342 }
1343 unless (defined($self->{po}{$msgid}{'pos'})) {
1344 $self->{po}{$msgid}{'pos'} = $self->{count}++;
1345 }
1346 $self->{po}{$msgid}{'type'} = $type;
1347 $self->{po}{$msgid}{'plural'} = $entry{'plural'}
1348 if defined $entry{'plural'};
1349
1350 if (defined($flags)) {
1351 $flags = " $flags ";
1352 $flags =~ s/,/ /g;
1353 foreach my $flag (@known_flags) {
1354 if ($flags =~ /\s$flag\s/) { # if flag to be set
1355 unless ( defined($self->{po}{$msgid}{'flags'})
1356 && $self->{po}{$msgid}{'flags'} =~ /\b$flag\b/) {
1357 # flag not already set
1358 if (defined $self->{po}{$msgid}{'flags'}) {
1359 $self->{po}{$msgid}{'flags'} .= " ".$flag;
1360 } else {
1361 $self->{po}{$msgid}{'flags'} = $flag;
1362 }
1363 }
1364 }
1365 }
1366 }
1367# print STDERR "stored ((($msgid)))=>(((".$self->{po}{$msgid}{'msgstr'}.")))\n\n";
1368
1369}
1370
1371=back
1372
1373=head1 Miscellaneous functions
1374
1375=over 4
1376
1377=item count_entries()
1378
1379Returns the number of entries in the catalog (without the header).
1380
1381=cut
1382
1383sub count_entries($) {
1384 my $self=shift;
1385 return $self->{count};
1386}
1387
1388=item count_entries_doc()
1389
1390Returns the number of entries in document. If a string appears multiple times
1391in the document, it will be counted multiple times
1392
1393=cut
1394
1395sub count_entries_doc($) {
1396 my $self=shift;
1397 return $self->{count_doc};
1398}
1399
1400=item msgid($)
1401
1402Returns the msgid of the given number.
1403
1404=cut
1405
1406sub msgid($$) {
1407 my $self=shift;
1408 my $num=shift;
1409
1410 foreach my $msgid ( keys %{$self->{po}} ) {
1411 return $msgid if ($self->{po}{$msgid}{'pos'} eq $num);
1412 }
1413 return undef;
1414}
1415
1416=item msgid_doc($)
1417
1418Returns the msgid with the given position in the document.
1419
1420=cut
1421
1422sub msgid_doc($$) {
1423 my $self=shift;
1424 my $num=shift;
1425
1426 foreach my $msgid ( keys %{$self->{po}} ) {
1427 foreach my $pos (split / /, $self->{po}{$msgid}{'pos_doc'}) {
1428 return $msgid if ($pos eq $num);
1429 }
1430 }
1431 return undef;
1432}
1433
1434=item get_charset()
1435
1436Returns the character set specified in the PO header. If it hasn't been
1437set, it will return "CHARSET".
1438
1439=cut
1440
1441sub get_charset() {
1442 my $self=shift;
1443
1444 $self->{header} =~ /charset=(.*?)[\s\\]/;
1445
1446 if (defined $1) {
1447 return $1;
1448 } else {
1449 return "CHARSET";
1450 }
1451}
1452
1453=item set_charset($)
1454
1455This sets the character set of the PO header to the value specified in its
1456first argument. If you never call this function (and no file with a specified
1457character set is read), the default value is left to "CHARSET". This value
1458doesn't change the behavior of this module, it's just used to fill that field
1459in the header, and to return it in get_charset().
1460
1461=cut
1462
1463sub set_charset() {
1464 my $self=shift;
1465
1466 my ($newchar,$oldchar);
1467 $newchar = shift;
1468 $oldchar = $self->get_charset();
1469
1470 $self->{header} =~ s/$oldchar/$newchar/;
1471 $self->{encoder}=find_encoding($newchar);
1472}
1473
1474#----[ helper functions ]---------------------------------------------------
1475
1476# transforme the string from its PO file representation to the form which
1477# should be used to print it
1478sub unescape_text {
1479 my $text = shift;
1480
1481 print STDERR "\nunescape [$text]====" if $debug{'escape'};
1482 $text = join("",split(/\n/,$text));
1483 $text =~ s/\\"/"/g;
1484 # unescape newlines
1485 # NOTE on \G:
1486 # The following regular expression introduce newlines.
1487 # Thus, ^ doesn't match all beginnings of lines.
1488 # \G is a zero-width assertion that matches the position
1489 # of the previous substitution with s///g. As every
1490 # substitution ends by a newline, it always matches a
1491 # position just after a newline.
1492 $text =~ s/( # $1:
1493 (\G|[^\\]) # beginning of the line or any char
1494 # different from '\'
1495 (\\\\)* # followed by any even number of '\'
1496 )\\n # and followed by an escaped newline
1497 /$1\n/sgx; # single string, match globally, allow comments
1498 # unescape carriage returns
1499 $text =~ s/( # $1:
1500 (\G|[^\\]) # beginning of the line or any char
1501 # different from '\'
1502 (\\\\)* # followed by any even number of '\'
1503 )\\r # and followed by an escaped carriage return
1504 /$1\r/sgx; # single string, match globally, allow comments
1505 # unescape tabulations
1506 $text =~ s/( # $1:
1507 (\G|[^\\])# beginning of the line or any char
1508 # different from '\'
1509 (\\\\)* # followed by any even number of '\'
1510 )\\t # and followed by an escaped tabulation
1511 /$1\t/mgx; # multilines string, match globally, allow comments
1512 # and unescape the escape character
1513 $text =~ s/\\\\/\\/g;
1514 print STDERR ">$text<\n" if $debug{'escape'};
1515
1516 return $text;
1517}
1518
1519# transform the string to its representation as it should be written in PO
1520# files
1521sub escape_text {
1522 my $text = shift;
1523
1524 print STDERR "\nescape [$text]====" if $debug{'escape'};
1525 $text =~ s/\\/\\\\/g;
1526 $text =~ s/"/\\"/g;
1527 $text =~ s/\n/\\n/g;
1528 $text =~ s/\r/\\r/g;
1529 $text =~ s/\t/\\t/g;
1530 print STDERR ">$text<\n" if $debug{'escape'};
1531
1532 return $text;
1533}
1534
1535# put quotes around the string on each lines (without escaping it)
1536# It does also normalize the text (ie, make sure its representation is wraped
1537# on the 80th char, but without changing the meaning of the string)
1538sub quote_text {
1539 my $string = shift;
1540
1541 return '""' unless defined($string) && length($string);
1542
1543 print STDERR "\nquote [$string]====" if $debug{'quote'};
1544 # break lines on newlines, if any
1545 # see unescape_text for an explanation on \G
1546 $string =~ s/( # $1:
1547 (\G|[^\\]) # beginning of the line or any char
1548 # different from '\'
1549 (\\\\)* # followed by any even number of '\'
1550 \\n) # and followed by an escaped newline
1551 /$1\n/sgx; # single string, match globally, allow comments
1552 $string = wrap($string);
1553 my @string = split(/\n/,$string);
1554 $string = join ("\"\n\"",@string);
1555 $string = "\"$string\"";
1556 if (scalar @string > 1 && $string[0] ne '') {
1557 $string = "\"\"\n".$string;
1558 }
1559
1560 print STDERR ">$string<\n" if $debug{'quote'};
1561 return $string;
1562}
1563
1564# undo the work of the quote_text function
1565sub unquote_text {
1566 my $string = shift;
1567 print STDERR "\nunquote [$string]====" if $debug{'quote'};
1568 $string =~ s/^""\\n//s;
1569 $string =~ s/^"(.*)"$/$1/s;
1570 $string =~ s/"\n"//gm;
1571 # Note: an even number of '\' could precede \\n, but I could not build a
1572 # document to test this
1573 $string =~ s/([^\\])\\n\n/$1!!DUMMYPOPM!!/gm;
1574 $string =~ s|!!DUMMYPOPM!!|\\n|gm;
1575 print STDERR ">$string<\n" if $debug{'quote'};
1576 return $string;
1577}
1578
1579# canonize the string: write it on only one line, changing consecutive
1580# whitespace to only one space.
1581# Warning, it changes the string and should only be called if the string is
1582# plain text
1583sub canonize {
1584 my $text=shift;
1585 print STDERR "\ncanonize [$text]====" if $debug{'canonize'};
1586 $text =~ s/^ *//s;
1587 $text =~ s/^[ \t]+/ /gm;
1588 # if ($text eq "\n"), it messed up the first string (header)
1589 $text =~ s/\n/ /gm if ($text ne "\n");
1590 $text =~ s/([.)]) +/$1 /gm;
1591 $text =~ s/([^.)]) */$1 /gm;
1592 $text =~ s/ *$//s;
1593 print STDERR ">$text<\n" if $debug{'canonize'};
1594 return $text;
1595}
1596
1597# wraps the string. We don't use Text::Wrap since it mangles whitespace at
1598# the end of splited line
1599sub wrap {
1600 my $text=shift;
1601 return "0" if ($text eq '0');
1602 my $col=shift || 76;
1603 my @lines=split(/\n/,"$text");
1604 my $res="";
1605 my $first=1;
1606 while (defined(my $line=shift @lines)) {
1607 if ($first && length($line) > $col - 10) {
1608 unshift @lines,$line;
1609 $first=0;
1610 next;
1611 }
1612 if (length($line) > $col) {
1613 my $pos=rindex($line," ",$col);
1614 while (substr($line,$pos-1,1) eq '.' && $pos != -1) {
1615 $pos=rindex($line," ",$pos-1);
1616 }
1617 if ($pos == -1) {
1618 # There are no spaces in the first $col chars, pick-up the
1619 # first space
1620 $pos = index($line," ");
1621 }
1622 if ($pos != -1) {
1623 my $end=substr($line,$pos+1);
1624 $line=substr($line,0,$pos+1);
1625 if ($end =~ s/^( +)//) {
1626 $line .= $1;
1627 }
1628 unshift @lines,$end;
1629 }
1630 }
1631 $first=0;
1632 $res.="$line\n";
1633 }
1634 # Restore the original trailing spaces
1635 $res =~ s/\s+$//s;
1636 if ($text =~ m/(\s+)$/s) {
1637 $res .= $1;
1638 }
1639 return $res;
1640}
1641
1642# outputs properly a '# ... ' line to be put in the PO file
1643sub format_comment {
1644 my $comment=shift;
1645 my $char=shift;
1646 my $result = "#". $char . $comment;
1647 $result =~ s/\n/\n#$char/gs;
1648 $result =~ s/^#$char$/#/gm;
1649 $result .= "\n";
1650 return $result;
1651}
1652
1653
16541;
1655__END__
1656
1657=back
1658
1659=head1 AUTHORS
1660
1661 Denis Barbier <barbier@linuxfr.org>
1662 Martin Quinson (mquinson#debian.org)
1663
1664=cut
1665

Archive Download this file

Revision: 2406