1 | # Locale::Po4a::Po -- manipulation of PO files␊ |
2 | #␊ |
3 | # This program is free software; you may redistribute it and/or modify it␊ |
4 | # under the terms of GPL (see COPYING).␊ |
5 | ␊ |
6 | ############################################################################␊ |
7 | # Modules and declarations␊ |
8 | ############################################################################␊ |
9 | ␊ |
10 | =encoding UTF-8␊ |
11 | ␊ |
12 | =head1 NAME␊ |
13 | ␊ |
14 | Locale::Po4a::Po - PO file manipulation module␊ |
15 | ␊ |
16 | =head1 SYNOPSIS␊ |
17 | ␊ |
18 | use Locale::Po4a::Po;␊ |
19 | my $pofile=Locale::Po4a::Po->new();␊ |
20 | ␊ |
21 | # Read PO file␊ |
22 | $pofile->read('file.po');␊ |
23 | ␊ |
24 | # Add an entry␊ |
25 | $pofile->push('msgid' => 'Hello', 'msgstr' => 'bonjour',␊ |
26 | 'flags' => "wrap", 'reference'=>'file.c:46');␊ |
27 | ␊ |
28 | # Extract a translation␊ |
29 | $pofile->gettext("Hello"); # returns 'bonjour'␊ |
30 | ␊ |
31 | # Write back to a file␊ |
32 | $pofile->write('otherfile.po');␊ |
33 | ␊ |
34 | =head1 DESCRIPTION␊ |
35 | ␊ |
36 | Locale::Po4a::Po is a module that allows you to manipulate message␊ |
37 | catalogs. You can load and write from/to a file (which extension is often␊ |
38 | I<po>), you can build new entries on the fly or request for the translation␊ |
39 | of a string.␊ |
40 | ␊ |
41 | For a more complete description of message catalogs in the PO format and␊ |
42 | their use, please refer to the documentation of the gettext program.␊ |
43 | ␊ |
44 | This module is part of the po4a project, which objective is to use PO files␊ |
45 | (designed at origin to ease the translation of program messages) to␊ |
46 | translate everything, including documentation (man page, info manual),␊ |
47 | package description, debconf templates, and everything which may benefit␊ |
48 | from this.␊ |
49 | ␊ |
50 | =head1 OPTIONS ACCEPTED BY THIS MODULE␊ |
51 | ␊ |
52 | =over 4␊ |
53 | ␊ |
54 | =item B<porefs> I<type>[,B<wrap>|B<nowrap>]␊ |
55 | ␊ |
56 | Specify the reference format. Argument I<type> can be one of B<none> to not␊ |
57 | produce any reference, B<noline> to not specify the line number (more␊ |
58 | accurately all line numbers are replaced by 1), B<counter> to replace line␊ |
59 | number by an increasing counter, and B<full> to include complete␊ |
60 | references.␊ |
61 | ␊ |
62 | Argument can be followed by a comma and either B<wrap> or B<nowrap> keyword.␊ |
63 | References are written by default on a single line. The B<wrap> option wraps␊ |
64 | references on several lines, to mimic B<gettext> tools (B<xgettext> and␊ |
65 | B<msgmerge>). This option will become the default in a future release, because␊ |
66 | it is more sensible. The B<nowrap> option is available so that users who want␊ |
67 | to keep the old behavior can do so.␊ |
68 | ␊ |
69 | =item B<--msgid-bugs-address> I<email@address>␊ |
70 | ␊ |
71 | Set the report address for msgid bugs. By default, the created POT files␊ |
72 | have no Report-Msgid-Bugs-To fields.␊ |
73 | ␊ |
74 | =item B<--copyright-holder> I<string>␊ |
75 | ␊ |
76 | Set the copyright holder in the POT header. The default value is␊ |
77 | "Free Software Foundation, Inc."␊ |
78 | ␊ |
79 | =item B<--package-name> I<string>␊ |
80 | ␊ |
81 | Set the package name for the POT header. The default is "PACKAGE".␊ |
82 | ␊ |
83 | =item B<--package-version> I<string>␊ |
84 | ␊ |
85 | Set the package version for the POT header. The default is "VERSION".␊ |
86 | ␊ |
87 | =back␊ |
88 | ␊ |
89 | =cut␊ |
90 | ␊ |
91 | use IO::File;␊ |
92 | ␊ |
93 | ␊ |
94 | require Exporter;␊ |
95 | ␊ |
96 | package Locale::Po4a::Po;␊ |
97 | use DynaLoader;␊ |
98 | ␊ |
99 | use Locale::Po4a::Common qw(wrap_msg wrap_mod wrap_ref_mod dgettext);␊ |
100 | ␊ |
101 | use subs qw(makespace);␊ |
102 | use vars qw(@ISA @EXPORT_OK);␊ |
103 | @ISA = qw(Exporter DynaLoader);␊ |
104 | @EXPORT = qw(%debug);␊ |
105 | @EXPORT_OK = qw(&move_po_if_needed);␊ |
106 | ␊ |
107 | use Locale::Po4a::TransTractor;␊ |
108 | # Try to use a C extension if present.␊ |
109 | eval("bootstrap Locale::Po4a::Po $Locale::Po4a::TransTractor::VERSION");␊ |
110 | ␊ |
111 | use 5.006;␊ |
112 | use strict;␊ |
113 | use warnings;␊ |
114 | ␊ |
115 | use Carp qw(croak);␊ |
116 | use File::Basename;␊ |
117 | use File::Path; # mkdir before write␊ |
118 | use File::Copy; # move␊ |
119 | use POSIX qw(strftime floor);␊ |
120 | use Time::Local;␊ |
121 | ␊ |
122 | use Encode;␊ |
123 | ␊ |
124 | my @known_flags=qw(wrap no-wrap c-format fuzzy);␊ |
125 | ␊ |
126 | our %debug=('canonize' => 0,␊ |
127 | 'quote' => 0,␊ |
128 | 'escape' => 0,␊ |
129 | 'encoding' => 0,␊ |
130 | 'filter' => 0);␊ |
131 | ␊ |
132 | =head1 Functions about whole message catalogs␊ |
133 | ␊ |
134 | =over 4␊ |
135 | ␊ |
136 | =item new()␊ |
137 | ␊ |
138 | Creates a new message catalog. If an argument is provided, it's the name of␊ |
139 | a PO file we should load.␊ |
140 | ␊ |
141 | =cut␊ |
142 | ␊ |
143 | sub new {␊ |
144 | my ($this, $options) = (shift, shift);␊ |
145 | my $class = ref($this) || $this;␊ |
146 | my $self = {};␊ |
147 | bless $self, $class;␊ |
148 | $self->initialize($options);␊ |
149 | ␊ |
150 | my $filename = shift;␊ |
151 | $self->read($filename) if defined($filename) && length($filename);␊ |
152 | return $self;␊ |
153 | }␊ |
154 | ␊ |
155 | # Return the numerical timezone (e.g. +0200)␊ |
156 | # Neither the %z nor the %s formats of strftime are portable:␊ |
157 | # '%s' is not supported on Solaris and '%z' indicates␊ |
158 | # "2006-10-25 19:36E. Europe Standard Time" on MS Windows.␊ |
159 | sub timezone {␊ |
160 | my @g = gmtime();␊ |
161 | my @l = localtime();␊ |
162 | ␊ |
163 | my $diff;␊ |
164 | $diff = floor(timelocal(@l)/60 +0.5);␊ |
165 | $diff -= floor(timelocal(@g)/60 +0.5);␊ |
166 | ␊ |
167 | my $h = floor($diff / 60) + $l[8]; # $l[8] indicates if we are currently␊ |
168 | # in a daylight saving time zone␊ |
169 | my $m = $diff%60;␊ |
170 | ␊ |
171 | return sprintf "%+03d%02d\n", $h, $m;␊ |
172 | }␊ |
173 | ␊ |
174 | sub initialize {␊ |
175 | my ($self, $options) = (shift, shift);␊ |
176 | my $date = strftime("%Y-%m-%d %H:%M", localtime).timezone();␊ |
177 | chomp $date;␊ |
178 | # $options = ref($options) || $options;␊ |
179 | ␊ |
180 | $self->{options}{'porefs'}= 'full,nowrap';␊ |
181 | $self->{options}{'msgid-bugs-address'}= undef;␊ |
182 | $self->{options}{'copyright-holder'}= "Free Software Foundation, Inc.";␊ |
183 | $self->{options}{'package-name'}= "PACKAGE";␊ |
184 | $self->{options}{'package-version'}= "VERSION";␊ |
185 | foreach my $opt (keys %$options) {␊ |
186 | if ($options->{$opt}) {␊ |
187 | die wrap_mod("po4a::po",␊ |
188 | dgettext ("po4a", "Unknown option: %s"), $opt)␊ |
189 | unless exists $self->{options}{$opt};␊ |
190 | $self->{options}{$opt} = $options->{$opt};␊ |
191 | }␊ |
192 | }␊ |
193 | $self->{options}{'porefs'} =~ /^(full|counter|noline|none)(,(no)?wrap)?$/ ||␊ |
194 | die wrap_mod("po4a::po",␊ |
195 | dgettext ("po4a",␊ |
196 | "Invalid value for option 'porefs' ('%s' is ".␊ |
197 | "not one of 'full', 'counter', 'noline' or 'none')"),␊ |
198 | $self->{options}{'porefs'});␊ |
199 | if ($self->{options}{'porefs'} =~ m/^counter/) {␊ |
200 | $self->{counter} = {};␊ |
201 | }␊ |
202 | ␊ |
203 | $self->{po}=();␊ |
204 | $self->{count}=0; # number of msgids in the PO␊ |
205 | # count_doc: number of strings in the document␊ |
206 | # (duplicate strings counted multiple times)␊ |
207 | $self->{count_doc}=0;␊ |
208 | $self->{header_comment}=␊ |
209 | " SOME DESCRIPTIVE TITLE\n"␊ |
210 | ." Copyright (C) YEAR ".␊ |
211 | $self->{options}{'copyright-holder'}."\n"␊ |
212 | ." This file is distributed under the same license ".␊ |
213 | "as the ".$self->{options}{'package-name'}." package.\n"␊ |
214 | ." FIRST AUTHOR <EMAIL\@ADDRESS>, YEAR.\n"␊ |
215 | ."\n"␊ |
216 | .", fuzzy";␊ |
217 | # $self->header_tag="fuzzy";␊ |
218 | $self->{header}=escape_text("Project-Id-Version: ".␊ |
219 | $self->{options}{'package-name'}." ".␊ |
220 | $self->{options}{'package-version'}."\n".␊ |
221 | ((defined $self->{options}{'msgid-bugs-address'})?␊ |
222 | "Report-Msgid-Bugs-To: ".$self->{options}{'msgid-bugs-address'}."\n":␊ |
223 | "").␊ |
224 | "POT-Creation-Date: $date\n".␊ |
225 | "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n".␊ |
226 | "Last-Translator: FULL NAME <EMAIL\@ADDRESS>\n".␊ |
227 | "Language-Team: LANGUAGE <LL\@li.org>\n".␊ |
228 | "Language: \n".␊ |
229 | "MIME-Version: 1.0\n".␊ |
230 | "Content-Type: text/plain; charset=CHARSET\n".␊ |
231 | "Content-Transfer-Encoding: 8bit\n");␊ |
232 | ␊ |
233 | $self->{encoder}=find_encoding("ascii");␊ |
234 | $self->{footer}=[];␊ |
235 | ␊ |
236 | # To make stats about gettext hits␊ |
237 | $self->stats_clear();␊ |
238 | }␊ |
239 | ␊ |
240 | =item read($)␊ |
241 | ␊ |
242 | Reads a PO file (which name is given as argument). Previously existing␊ |
243 | entries in self are not removed, the new ones are added to the end of the␊ |
244 | catalog.␊ |
245 | ␊ |
246 | =cut␊ |
247 | ␊ |
248 | sub read {␊ |
249 | my $self=shift;␊ |
250 | my $filename=shift␊ |
251 | or croak wrap_mod("po4a::po",␊ |
252 | dgettext("po4a",␊ |
253 | "Please provide a non-null filename"));␊ |
254 | ␊ |
255 | my $lang = basename($filename);␊ |
256 | $lang =~ s/\.po$//;␊ |
257 | $self->{lang} = $lang;␊ |
258 | ␊ |
259 | my $fh;␊ |
260 | if ($filename eq '-') {␊ |
261 | $fh=*STDIN;␊ |
262 | } else {␊ |
263 | open $fh,"<$filename"␊ |
264 | or croak wrap_mod("po4a::po",␊ |
265 | dgettext("po4a", "Can't read from %s: %s"),␊ |
266 | $filename, $!);␊ |
267 | }␊ |
268 | ␊ |
269 | ## Read paragraphs line-by-line␊ |
270 | my $pofile="";␊ |
271 | my $textline;␊ |
272 | while (defined ($textline = <$fh>)) {␊ |
273 | $pofile .= $textline;␊ |
274 | }␊ |
275 | # close INPUT␊ |
276 | # or croak (sprintf(dgettext("po4a",␊ |
277 | # "Can't close %s after reading: %s"),␊ |
278 | # $filename,$!)."\n");␊ |
279 | ␊ |
280 | my $linenum=0;␊ |
281 | ␊ |
282 | foreach my $msg (split (/\n\n/,$pofile)) {␊ |
283 | my ($msgid,$msgstr,$comment,$previous,$automatic,$reference,$flags,$buffer);␊ |
284 | my ($msgid_plural, $msgstr_plural);␊ |
285 | if ($msg =~ m/^#~/m) {␊ |
286 | push(@{$self->{footer}}, $msg);␊ |
287 | next;␊ |
288 | }␊ |
289 | foreach my $line (split (/\n/,$msg)) {␊ |
290 | $linenum++;␊ |
291 | if ($line =~ /^#\. ?(.*)$/) { # Automatic comment␊ |
292 | $automatic .= (defined($automatic) ? "\n" : "").$1;␊ |
293 | ␊ |
294 | } elsif ($line =~ /^#: ?(.*)$/) { # reference␊ |
295 | $reference .= (defined($reference) ? "\n" : "").$1;␊ |
296 | ␊ |
297 | } elsif ($line =~ /^#, ?(.*)$/) { # flags␊ |
298 | $flags .= (defined($flags) ? "\n" : "").$1;␊ |
299 | ␊ |
300 | } elsif ($line =~ /^#\| ?(.*)$/) { # previous translation␊ |
301 | $previous .= (defined($previous) ? "\n" : "").($1||"");␊ |
302 | ␊ |
303 | } elsif ($line =~ /^#(.*)$/) { # Translator comments␊ |
304 | $comment .= (defined($comment) ? "\n" : "").($1||"");␊ |
305 | ␊ |
306 | } elsif ($line =~ /^msgid (".*")$/) { # begin of msgid␊ |
307 | $buffer = $1;␊ |
308 | ␊ |
309 | } elsif ($line =~ /^msgid_plural (".*")$/) {␊ |
310 | # begin of msgid_plural, end of msgid␊ |
311 | ␊ |
312 | $msgid = $buffer;␊ |
313 | $buffer = $1;␊ |
314 | ␊ |
315 | } elsif ($line =~ /^msgstr (".*")$/) {␊ |
316 | # begin of msgstr, end of msgid␊ |
317 | ␊ |
318 | $msgid = $buffer;␊ |
319 | $buffer = "$1";␊ |
320 | ␊ |
321 | } elsif ($line =~ /^msgstr\[([0-9]+)\] (".*")$/) {␊ |
322 | # begin of msgstr[x], end of msgid_plural or msgstr[x-1]␊ |
323 | ␊ |
324 | # Note: po4a cannot uses plural forms␊ |
325 | # (no integer to use the plural form)␊ |
326 | # * drop the msgstr[x] where x >= 2␊ |
327 | # * use msgstr[0] as the translation of msgid␊ |
328 | # * use msgstr[1] as the translation of msgid_plural␊ |
329 | ␊ |
330 | if ($1 eq "0") {␊ |
331 | $msgid_plural = $buffer;␊ |
332 | $buffer = "$2";␊ |
333 | } elsif ($1 eq "1") {␊ |
334 | $msgstr = $buffer;␊ |
335 | $buffer = "$2";␊ |
336 | } elsif ($1 eq "2") {␊ |
337 | $msgstr_plural = $buffer;␊ |
338 | warn wrap_ref_mod("$filename:$linenum",␊ |
339 | "po4a::po",␊ |
340 | dgettext("po4a", "Messages with more than 2 plural forms are not supported."));␊ |
341 | }␊ |
342 | } elsif ($line =~ /^(".*")$/) {␊ |
343 | # continuation of a line␊ |
344 | $buffer .= "\n$1";␊ |
345 | ␊ |
346 | } else {␊ |
347 | warn wrap_ref_mod("$filename:$linenum",␊ |
348 | "po4a::po",␊ |
349 | dgettext("po4a", "Strange line: -->%s<--"),␊ |
350 | $line);␊ |
351 | }␊ |
352 | }␊ |
353 | $linenum++;␊ |
354 | if (defined $msgid_plural) {␊ |
355 | $msgstr_plural=$buffer;␊ |
356 | ␊ |
357 | $msgid = unquote_text($msgid) if (defined($msgid));␊ |
358 | $msgstr = unquote_text($msgstr) if (defined($msgstr));␊ |
359 | ␊ |
360 | $self->push_raw ('msgid' => $msgid,␊ |
361 | 'msgstr' => $msgstr,␊ |
362 | 'reference' => $reference,␊ |
363 | 'flags' => $flags,␊ |
364 | 'comment' => $comment,␊ |
365 | 'previous' => $previous,␊ |
366 | 'automatic' => $automatic,␊ |
367 | 'plural' => 0);␊ |
368 | ␊ |
369 | $msgid_plural = unquote_text($msgid_plural)␊ |
370 | if (defined($msgid_plural));␊ |
371 | $msgstr_plural = unquote_text($msgstr_plural)␊ |
372 | if (defined($msgstr_plural));␊ |
373 | ␊ |
374 | $self->push_raw ('msgid' => $msgid_plural,␊ |
375 | 'msgstr' => $msgstr_plural,␊ |
376 | 'reference' => $reference,␊ |
377 | 'flags' => $flags,␊ |
378 | 'comment' => $comment,␊ |
379 | 'previous' => $previous,␊ |
380 | 'automatic' => $automatic,␊ |
381 | 'plural' => 1);␊ |
382 | } else {␊ |
383 | $msgstr=$buffer;␊ |
384 | ␊ |
385 | $msgid = unquote_text($msgid) if (defined($msgid));␊ |
386 | $msgstr = unquote_text($msgstr) if (defined($msgstr));␊ |
387 | ␊ |
388 | $self->push_raw ('msgid' => $msgid,␊ |
389 | 'msgstr' => $msgstr,␊ |
390 | 'reference' => $reference,␊ |
391 | 'flags' => $flags,␊ |
392 | 'comment' => $comment,␊ |
393 | 'previous' => $previous,␊ |
394 | 'automatic' => $automatic);␊ |
395 | }␊ |
396 | }␊ |
397 | }␊ |
398 | ␊ |
399 | =item write($)␊ |
400 | ␊ |
401 | Writes the current catalog to the given file.␊ |
402 | ␊ |
403 | =cut␊ |
404 | ␊ |
405 | sub write{␊ |
406 | my $self=shift;␊ |
407 | my $filename=shift␊ |
408 | or croak dgettext("po4a","Can't write to a file without filename")."\n";␊ |
409 | ␊ |
410 | my $fh;␊ |
411 | if ($filename eq '-') {␊ |
412 | $fh=\*STDOUT;␊ |
413 | } else {␊ |
414 | # make sure the directory in which we should write the localized␊ |
415 | # file exists␊ |
416 | my $dir = $filename;␊ |
417 | if ($dir =~ m|/|) {␊ |
418 | $dir =~ s|/[^/]*$||;␊ |
419 | ␊ |
420 | File::Path::mkpath($dir, 0, 0755) # Croaks on error␊ |
421 | if (length ($dir) && ! -e $dir);␊ |
422 | }␊ |
423 | open $fh,">$filename"␊ |
424 | or croak wrap_mod("po4a::po",␊ |
425 | dgettext("po4a", "Can't write to %s: %s"),␊ |
426 | $filename, $!);␊ |
427 | }␊ |
428 | ␊ |
429 | print $fh "".format_comment($self->{header_comment},"")␊ |
430 | if defined($self->{header_comment}) && length($self->{header_comment});␊ |
431 | ␊ |
432 | print $fh "msgid \"\"\n";␊ |
433 | print $fh "msgstr ".quote_text($self->{header})."\n\n";␊ |
434 | ␊ |
435 | ␊ |
436 | my $buf_msgstr_plural; # Used to keep the first msgstr of plural forms␊ |
437 | my $first=1;␊ |
438 | foreach my $msgid ( sort { ($self->{po}{"$a"}{'pos'}) <=>␊ |
439 | ($self->{po}{"$b"}{'pos'})␊ |
440 | } keys %{$self->{po}}) {␊ |
441 | my $output="";␊ |
442 | ␊ |
443 | if ($first) {␊ |
444 | $first=0;␊ |
445 | } else {␊ |
446 | $output .= "\n";␊ |
447 | }␊ |
448 | ␊ |
449 | $output .= format_comment($self->{po}{$msgid}{'comment'},"")␊ |
450 | if defined($self->{po}{$msgid}{'comment'})␊ |
451 | && length ($self->{po}{$msgid}{'comment'});␊ |
452 | if ( defined($self->{po}{$msgid}{'automatic'})␊ |
453 | && length ($self->{po}{$msgid}{'automatic'})) {␊ |
454 | foreach my $comment (split(/\\n/,$self->{po}{$msgid}{'automatic'}))␊ |
455 | {␊ |
456 | $output .= format_comment($comment, ". ")␊ |
457 | }␊ |
458 | }␊ |
459 | $output .= format_comment($self->{po}{$msgid}{'type'},". type: ")␊ |
460 | if defined($self->{po}{$msgid}{'type'})␊ |
461 | && length ($self->{po}{$msgid}{'type'});␊ |
462 | if ( defined($self->{po}{$msgid}{'reference'})␊ |
463 | && length ($self->{po}{$msgid}{'reference'})) {␊ |
464 | my $output_ref = $self->{po}{$msgid}{'reference'};␊ |
465 | if ($self->{options}{'porefs'} =~ m/,wrap$/) {␊ |
466 | $output_ref = wrap($output_ref);␊ |
467 | $output_ref =~ s/\s+$//mg;␊ |
468 | }␊ |
469 | $output .= format_comment($output_ref,": ");␊ |
470 | }␊ |
471 | $output .= "#, ". join(", ", sort split(/\s+/,$self->{po}{$msgid}{'flags'}))."\n"␊ |
472 | if defined($self->{po}{$msgid}{'flags'})␊ |
473 | && length ($self->{po}{$msgid}{'flags'});␊ |
474 | $output .= format_comment($self->{po}{$msgid}{'previous'},"| ")␊ |
475 | if defined($self->{po}{$msgid}{'previous'})␊ |
476 | && length ($self->{po}{$msgid}{'previous'});␊ |
477 | ␊ |
478 | if (exists $self->{po}{$msgid}{'plural'}) {␊ |
479 | if ($self->{po}{$msgid}{'plural'} == 0) {␊ |
480 | if ($self->get_charset =~ /^utf-8$/i) {␊ |
481 | my $msgstr = Encode::decode_utf8($self->{po}{$msgid}{'msgstr'});␊ |
482 | $msgid = Encode::decode_utf8($msgid);␊ |
483 | $output .= Encode::encode_utf8("msgid ".quote_text($msgid)."\n");␊ |
484 | $buf_msgstr_plural = Encode::encode_utf8("msgstr[0] ".quote_text($msgstr)."\n");␊ |
485 | } else {␊ |
486 | $output = "msgid ".quote_text($msgid)."\n";␊ |
487 | $buf_msgstr_plural = "msgstr[0] ".quote_text($self->{po}{$msgid}{'msgstr'})."\n";␊ |
488 | }␊ |
489 | } elsif ($self->{po}{$msgid}{'plural'} == 1) {␊ |
490 | # TODO: there may be only one plural form␊ |
491 | if ($self->get_charset =~ /^utf-8$/i) {␊ |
492 | my $msgstr = Encode::decode_utf8($self->{po}{$msgid}{'msgstr'});␊ |
493 | $msgid = Encode::decode_utf8($msgid);␊ |
494 | $output = Encode::encode_utf8("msgid_plural ".quote_text($msgid)."\n");␊ |
495 | $output .= $buf_msgstr_plural;␊ |
496 | $output .= Encode::encode_utf8("msgstr[1] ".quote_text($msgstr)."\n");␊ |
497 | $buf_msgstr_plural = "";␊ |
498 | } else {␊ |
499 | $output = "msgid_plural ".quote_text($msgid)."\n";␊ |
500 | $output .= $buf_msgstr_plural;␊ |
501 | $output .= "msgstr[1] ".quote_text($self->{po}{$msgid}{'msgstr'})."\n";␊ |
502 | }␊ |
503 | } else {␊ |
504 | die wrap_msg(dgettext("po4a","Can't write PO files with more than two plural forms."));␊ |
505 | }␊ |
506 | } else {␊ |
507 | if ($self->get_charset =~ /^utf-8$/i) {␊ |
508 | my $msgstr = Encode::decode_utf8($self->{po}{$msgid}{'msgstr'});␊ |
509 | $msgid = Encode::decode_utf8($msgid);␊ |
510 | $output .= Encode::encode_utf8("msgid ".quote_text($msgid)."\n");␊ |
511 | $output .= Encode::encode_utf8("msgstr ".quote_text($msgstr)."\n");␊ |
512 | } else {␊ |
513 | $output .= "msgid ".quote_text($msgid)."\n";␊ |
514 | $output .= "msgstr ".quote_text($self->{po}{$msgid}{'msgstr'})."\n";␊ |
515 | }␊ |
516 | }␊ |
517 | ␊ |
518 | print $fh $output;␊ |
519 | }␊ |
520 | print $fh join("\n\n", @{$self->{footer}}) if scalar @{$self->{footer}};␊ |
521 | ␊ |
522 | # print STDERR "$fh";␊ |
523 | # if ($filename ne '-') {␊ |
524 | # close $fh␊ |
525 | # or croak (sprintf(dgettext("po4a",␊ |
526 | # "Can't close %s after writing: %s\n"),␊ |
527 | # $filename,$!));␊ |
528 | # }␊ |
529 | }␊ |
530 | ␊ |
531 | =item write_if_needed($$)␊ |
532 | ␊ |
533 | Like write, but if the PO or POT file already exists, the object will be␊ |
534 | written in a temporary file which will be compared with the existing file␊ |
535 | to check if the update is needed (this avoids to change a POT just to␊ |
536 | update a line reference or the POT-Creation-Date field).␊ |
537 | ␊ |
538 | =cut␊ |
539 | ␊ |
540 | sub move_po_if_needed {␊ |
541 | my ($new_po, $old_po, $backup) = (shift, shift, shift);␊ |
542 | my $diff;␊ |
543 | ␊ |
544 | if (-e $old_po) {␊ |
545 | my $diff_ignore = "-I'^#:' "␊ |
546 | ."-I'^\"POT-Creation-Date:' "␊ |
547 | ."-I'^\"PO-Revision-Date:'";␊ |
548 | $diff = qx(diff -q $diff_ignore $old_po $new_po);␊ |
549 | if ( $diff eq "" ) {␊ |
550 | unlink $new_po␊ |
551 | or die wrap_msg(dgettext("po4a","Can't unlink %s: %s."),␊ |
552 | $new_po, $!);␊ |
553 | # touch the old PO␊ |
554 | my ($atime, $mtime) = (time,time);␊ |
555 | utime $atime, $mtime, $old_po;␊ |
556 | } else {␊ |
557 | move $new_po, $old_po␊ |
558 | or die wrap_msg(dgettext("po4a","Can't move %s to %s: %s."),␊ |
559 | $new_po, $old_po, $!);␊ |
560 | }␊ |
561 | } else {␊ |
562 | move $new_po, $old_po␊ |
563 | or die wrap_msg(dgettext("po4a","Can't move %s to %s: %s."),␊ |
564 | $new_po, $old_po, $!);␊ |
565 | }␊ |
566 | }␊ |
567 | ␊ |
568 | sub write_if_needed {␊ |
569 | my $self=shift;␊ |
570 | my $filename=shift␊ |
571 | or croak dgettext("po4a","Can't write to a file without filename")."\n";␊ |
572 | ␊ |
573 | if (-e $filename) {␊ |
574 | my ($tmp_filename);␊ |
575 | (undef,$tmp_filename)=File::Temp->tempfile($filename."XXXX",␊ |
576 | DIR => "/tmp",␊ |
577 | OPEN => 0,␊ |
578 | UNLINK => 0);␊ |
579 | $self->write($tmp_filename);␊ |
580 | move_po_if_needed($tmp_filename, $filename);␊ |
581 | } else {␊ |
582 | $self->write($filename);␊ |
583 | }␊ |
584 | }␊ |
585 | ␊ |
586 | =item gettextize($$)␊ |
587 | ␊ |
588 | This function produces one translated message catalog from two catalogs, an␊ |
589 | original and a translation. This process is described in L<po4a(7)|po4a.7>,␊ |
590 | section I<Gettextization: how does it work?>.␊ |
591 | ␊ |
592 | =cut␊ |
593 | ␊ |
594 | sub gettextize {␊ |
595 | my $this = shift;␊ |
596 | my $class = ref($this) || $this;␊ |
597 | my ($poorig,$potrans)=(shift,shift);␊ |
598 | ␊ |
599 | my $pores=Locale::Po4a::Po->new();␊ |
600 | ␊ |
601 | my $please_fail = 0;␊ |
602 | my $toobad = dgettext("po4a",␊ |
603 | "\nThe gettextization failed (once again). Don't give up, ".␊ |
604 | "gettextizing is a subtle art, but this is only needed once ".␊ |
605 | "to convert a project to the gorgeous luxus offered by po4a ".␊ |
606 | "to translators.".␊ |
607 | "\nPlease refer to the po4a(7) documentation, the section ".␊ |
608 | "\"HOWTO convert a pre-existing translation to po4a?\" ".␊ |
609 | "contains several hints to help you in your task");␊ |
610 | ␊ |
611 | # Don't fail right now when the entry count does not match. Instead, give␊ |
612 | # it a try so that the user can see where we fail (which is probably where␊ |
613 | # the problem is).␊ |
614 | if ($poorig->count_entries_doc() > $potrans->count_entries_doc()) {␊ |
615 | warn wrap_mod("po4a gettextize", dgettext("po4a",␊ |
616 | "Original has more strings than the translation (%d>%d). ".␊ |
617 | "Please fix it by editing the translated version to add ".␊ |
618 | "some dummy entry."),␊ |
619 | $poorig->count_entries_doc(),␊ |
620 | $potrans->count_entries_doc());␊ |
621 | $please_fail = 1;␊ |
622 | } elsif ($poorig->count_entries_doc() < $potrans->count_entries_doc()) {␊ |
623 | warn wrap_mod("po4a gettextize", dgettext("po4a",␊ |
624 | "Original has less strings than the translation (%d<%d). ".␊ |
625 | "Please fix it by removing the extra entry from the ".␊ |
626 | "translated file. You may need an addendum (cf po4a(7)) ".␊ |
627 | "to reput the chunk in place after gettextization. A ".␊ |
628 | "possible cause is that a text duplicated in the original ".␊ |
629 | "is not translated the same way each time. Remove one of ".␊ |
630 | "the translations, and you're fine."),␊ |
631 | $poorig->count_entries_doc(),␊ |
632 | $potrans->count_entries_doc());␊ |
633 | $please_fail = 1;␊ |
634 | }␊ |
635 | ␊ |
636 | if ( $poorig->get_charset =~ /^utf-8$/i ) {␊ |
637 | $potrans->to_utf8;␊ |
638 | $pores->set_charset("UTF-8");␊ |
639 | } else {␊ |
640 | if ($potrans->get_charset eq "CHARSET") {␊ |
641 | $pores->set_charset("ascii");␊ |
642 | } else {␊ |
643 | $pores->set_charset($potrans->get_charset);␊ |
644 | }␊ |
645 | }␊ |
646 | print "Po character sets:\n".␊ |
647 | " original=".$poorig->get_charset."\n".␊ |
648 | " translated=".$potrans->get_charset."\n".␊ |
649 | " result=".$pores->get_charset."\n"␊ |
650 | if $debug{'encoding'};␊ |
651 | ␊ |
652 | for (my ($o,$t)=(0,0) ;␊ |
653 | $o<$poorig->count_entries_doc() && $t<$potrans->count_entries_doc();␊ |
654 | $o++,$t++) {␊ |
655 | #␊ |
656 | # Extract some informations␊ |
657 | ␊ |
658 | my ($orig,$trans)=($poorig->msgid_doc($o),$potrans->msgid_doc($t));␊ |
659 | # print STDERR "Matches [[$orig]]<<$trans>>\n";␊ |
660 | ␊ |
661 | my ($reforig,$reftrans)=($poorig->{po}{$orig}{'reference'},␊ |
662 | $potrans->{po}{$trans}{'reference'});␊ |
663 | my ($typeorig,$typetrans)=($poorig->{po}{$orig}{'type'},␊ |
664 | $potrans->{po}{$trans}{'type'});␊ |
665 | ␊ |
666 | #␊ |
667 | # Make sure the type of both string exist␊ |
668 | #␊ |
669 | die wrap_mod("po4a gettextize",␊ |
670 | "Internal error: type of original string number %s ".␊ |
671 | "isn't provided", $o)␊ |
672 | if ($typeorig eq '');␊ |
673 | ␊ |
674 | die wrap_mod("po4a gettextize",␊ |
675 | "Internal error: type of translated string number %s ".␊ |
676 | "isn't provided", $o)␊ |
677 | if ($typetrans eq '');␊ |
678 | ␊ |
679 | #␊ |
680 | # Make sure both type are the same␊ |
681 | #␊ |
682 | if ($typeorig ne $typetrans){␊ |
683 | $pores->write("gettextization.failed.po");␊ |
684 | eval {␊ |
685 | # Recode $trans into current charset, if possible␊ |
686 | require I18N::Langinfo;␊ |
687 | I18N::Langinfo->import(qw(langinfo CODESET));␊ |
688 | my $codeset = langinfo(CODESET());␊ |
689 | Encode::from_to($trans, $potrans->get_charset, $codeset);␊ |
690 | };␊ |
691 | die wrap_msg(dgettext("po4a",␊ |
692 | "po4a gettextization: Structure disparity between ".␊ |
693 | "original and translated files:\n".␊ |
694 | "msgid (at %s) is of type '%s' while\n".␊ |
695 | "msgstr (at %s) is of type '%s'.\n".␊ |
696 | "Original text: %s\n".␊ |
697 | "Translated text: %s\n".␊ |
698 | "(result so far dumped to gettextization.failed.po)").␊ |
699 | "%s",␊ |
700 | $reforig, $typeorig,␊ |
701 | $reftrans, $typetrans,␊ |
702 | $orig,␊ |
703 | $trans,␊ |
704 | $toobad);␊ |
705 | }␊ |
706 | ␊ |
707 | #␊ |
708 | # Push the entry␊ |
709 | #␊ |
710 | my $flags;␊ |
711 | if (defined $poorig->{po}{$orig}{'flags'}) {␊ |
712 | $flags = $poorig->{po}{$orig}{'flags'}." fuzzy";␊ |
713 | } else {␊ |
714 | $flags = "fuzzy";␊ |
715 | }␊ |
716 | $pores->push_raw('msgid' => $orig,␊ |
717 | 'msgstr' => $trans,␊ |
718 | 'flags' => $flags,␊ |
719 | 'type' => $typeorig,␊ |
720 | 'reference' => $reforig,␊ |
721 | 'conflict' => 1,␊ |
722 | 'transref' => $potrans->{po}{$trans}{'reference'})␊ |
723 | unless (defined($pores->{po}{$orig})␊ |
724 | and ($pores->{po}{$orig}{'msgstr'} eq $trans))␊ |
725 | # FIXME: maybe we should be smarter about what reference should be␊ |
726 | # sent to push_raw.␊ |
727 | }␊ |
728 | ␊ |
729 | # make sure we return a useful error message when entry count differ␊ |
730 | die "$toobad\n" if $please_fail;␊ |
731 | ␊ |
732 | return $pores;␊ |
733 | }␊ |
734 | ␊ |
735 | =item filter($)␊ |
736 | ␊ |
737 | This function extracts a catalog from an existing one. Only the entries having␊ |
738 | a reference in the given file will be placed in the resulting catalog.␊ |
739 | ␊ |
740 | This function parses its argument, converts it to a Perl function definition,␊ |
741 | evals this definition and filters the fields for which this function returns␊ |
742 | true.␊ |
743 | ␊ |
744 | I love Perl sometimes ;)␊ |
745 | ␊ |
746 | =cut␊ |
747 | ␊ |
748 | sub filter {␊ |
749 | my $self=shift;␊ |
750 | our $filter=shift;␊ |
751 | ␊ |
752 | my $res;␊ |
753 | $res = Locale::Po4a::Po->new();␊ |
754 | ␊ |
755 | # Parse the filter␊ |
756 | our $code="sub apply { return ";␊ |
757 | our $pos=0;␊ |
758 | our $length = length $filter;␊ |
759 | ␊ |
760 | # explode chars to parts. How to subscript a string in Perl?␊ |
761 | our @filter = split(//,$filter);␊ |
762 | ␊ |
763 | sub gloups {␊ |
764 | my $fmt=shift;␊ |
765 | my $space = "";␊ |
766 | for (1..$pos){␊ |
767 | $space .= ' ';␊ |
768 | }␊ |
769 | die wrap_msg("$fmt\n$filter\n$space^ HERE");␊ |
770 | }␊ |
771 | sub showmethecode {␊ |
772 | return unless $debug{'filter'};␊ |
773 | my $fmt=shift;␊ |
774 | my $space="";␊ |
775 | for (1..$pos){␊ |
776 | $space .= ' ';␊ |
777 | }␊ |
778 | print STDERR "$filter\n$space^ $fmt\n";#"$code\n";␊ |
779 | }␊ |
780 | ␊ |
781 | # I dream of a lex in perl :-/␊ |
782 | sub parse_expression {␊ |
783 | showmethecode("Begin expression")␊ |
784 | if $debug{'filter'};␊ |
785 | ␊ |
786 | gloups("Begin of expression expected, got '%s'",$filter[$pos])␊ |
787 | unless ($filter[$pos] eq '(');␊ |
788 | $pos ++; # pass the '('␊ |
789 | if ($filter[$pos] eq '&') {␊ |
790 | # AND␊ |
791 | $pos++;␊ |
792 | showmethecode("Begin of AND")␊ |
793 | if $debug{'filter'};␊ |
794 | $code .= "(";␊ |
795 | while (1) {␊ |
796 | gloups ("Unfinished AND statement.")␊ |
797 | if ($pos == $length);␊ |
798 | parse_expression();␊ |
799 | if ($filter[$pos] eq '(') {␊ |
800 | $code .= " && ";␊ |
801 | } elsif ($filter[$pos] eq ')') {␊ |
802 | last; # do not eat that char␊ |
803 | } else {␊ |
804 | gloups("End of AND or begin of sub-expression expected, got '%s'", $filter[$pos]);␊ |
805 | }␊ |
806 | }␊ |
807 | $code .= ")";␊ |
808 | } elsif ($filter[$pos] eq '|') {␊ |
809 | # OR␊ |
810 | $pos++;␊ |
811 | $code .= "(";␊ |
812 | while (1) {␊ |
813 | gloups("Unfinished OR statement.")␊ |
814 | if ($pos == $length);␊ |
815 | parse_expression();␊ |
816 | if ($filter[$pos] eq '(') {␊ |
817 | $code .= " || ";␊ |
818 | } elsif ($filter[$pos] eq ')') {␊ |
819 | last; # do not eat that char␊ |
820 | } else {␊ |
821 | gloups("End of OR or begin of sub-expression expected, got '%s'",$filter[$pos]);␊ |
822 | }␊ |
823 | }␊ |
824 | $code .= ")";␊ |
825 | } elsif ($filter[$pos] eq '!') {␊ |
826 | # NOT␊ |
827 | $pos++;␊ |
828 | $code .= "(!";␊ |
829 | gloups("Missing sub-expression in NOT statement.")␊ |
830 | if ($pos == $length);␊ |
831 | parse_expression();␊ |
832 | $code .= ")";␊ |
833 | } else {␊ |
834 | # must be an equal. Let's get field and argument␊ |
835 | my ($field,$arg,$done);␊ |
836 | $field = substr($filter,$pos);␊ |
837 | gloups("EQ statement contains no '=' or invalid field name")␊ |
838 | unless ($field =~ /([a-z]*)=/i);␊ |
839 | $field = lc($1);␊ |
840 | $pos += (length $field) + 1;␊ |
841 | ␊ |
842 | # check that we've got a valid field name,␊ |
843 | # and the number it referes to␊ |
844 | # DO NOT CHANGE THE ORDER␊ |
845 | my @names=qw(msgid msgstr reference flags comment previous automatic);␊ |
846 | my $fieldpos;␊ |
847 | for ($fieldpos = 0;␊ |
848 | $fieldpos < scalar @names && $field ne $names[$fieldpos];␊ |
849 | $fieldpos++) {}␊ |
850 | gloups("Invalid field name: %s",$field)␊ |
851 | if $fieldpos == scalar @names; # not found␊ |
852 | ␊ |
853 | # Now, get the argument value. It has to be between quotes,␊ |
854 | # which can be escaped␊ |
855 | # We point right on the first char of the argument␊ |
856 | # (first quote already eaten)␊ |
857 | my $escaped = 0;␊ |
858 | my $quoted = 0;␊ |
859 | if ($filter[$pos] eq '"') {␊ |
860 | $pos++;␊ |
861 | $quoted = 1;␊ |
862 | }␊ |
863 | showmethecode(($quoted?"Quoted":"Unquoted")." argument of field '$field'")␊ |
864 | if $debug{'filter'};␊ |
865 | ␊ |
866 | while (!$done) {␊ |
867 | gloups("Unfinished EQ argument.")␊ |
868 | if ($pos == $length);␊ |
869 | ␊ |
870 | if ($quoted) {␊ |
871 | if ($filter[$pos] eq '\\') {␊ |
872 | if ($escaped) {␊ |
873 | $arg .= '\\';␊ |
874 | $escaped = 0;␊ |
875 | } else {␊ |
876 | $escaped = 1;␊ |
877 | }␊ |
878 | } elsif ($escaped) {␊ |
879 | if ($filter[$pos] eq '"') {␊ |
880 | $arg .= '"';␊ |
881 | $escaped = 0;␊ |
882 | } else {␊ |
883 | gloups("Invalid escape sequence in argument: '\\%s'",$filter[$pos]);␊ |
884 | }␊ |
885 | } else {␊ |
886 | if ($filter[$pos] eq '"') {␊ |
887 | $done = 1;␊ |
888 | } else {␊ |
889 | $arg .= $filter[$pos];␊ |
890 | }␊ |
891 | }␊ |
892 | } else {␊ |
893 | if ($filter[$pos] eq ')') {␊ |
894 | # counter the next ++ since we don't want to eat␊ |
895 | # this char␊ |
896 | $pos--;␊ |
897 | $done = 1;␊ |
898 | } else {␊ |
899 | $arg .= $filter[$pos];␊ |
900 | }␊ |
901 | }␊ |
902 | $pos++;␊ |
903 | }␊ |
904 | # and now, add the code to check this equality␊ |
905 | $code .= "(\$_[$fieldpos] =~ m{$arg})";␊ |
906 | ␊ |
907 | }␊ |
908 | showmethecode("End of expression")␊ |
909 | if $debug{'filter'};␊ |
910 | gloups("Unfinished statement.")␊ |
911 | if ($pos == $length);␊ |
912 | gloups("End of expression expected, got '%s'",$filter[$pos])␊ |
913 | unless ($filter[$pos] eq ')');␊ |
914 | $pos++;␊ |
915 | }␊ |
916 | # And now, launch the beast, finish the function and use eval␊ |
917 | # to construct this function.␊ |
918 | # Ok, the lack of lexer is a fair price for the eval ;)␊ |
919 | parse_expression();␊ |
920 | gloups("Garbage at the end of the expression")␊ |
921 | if ($pos != $length);␊ |
922 | $code .= "; }";␊ |
923 | print STDERR "CODE = $code\n"␊ |
924 | if $debug{'filter'};␊ |
925 | eval $code;␊ |
926 | die wrap_mod("po4a::po", dgettext("po4a", "Eval failure: %s"), $@)␊ |
927 | if $@;␊ |
928 | ␊ |
929 | for (my $cpt=(0) ;␊ |
930 | $cpt<$self->count_entries();␊ |
931 | $cpt++) {␊ |
932 | ␊ |
933 | my ($msgid,$ref,$msgstr,$flags,$type,$comment,$previous,$automatic);␊ |
934 | ␊ |
935 | $msgid = $self->msgid($cpt);␊ |
936 | $ref=$self->{po}{$msgid}{'reference'};␊ |
937 | ␊ |
938 | $msgstr= $self->{po}{$msgid}{'msgstr'};␊ |
939 | $flags = $self->{po}{$msgid}{'flags'};␊ |
940 | $type = $self->{po}{$msgid}{'type'};␊ |
941 | $comment = $self->{po}{$msgid}{'comment'};␊ |
942 | $previous = $self->{po}{$msgid}{'previous'};␊ |
943 | $automatic = $self->{po}{$msgid}{'automatic'};␊ |
944 | ␊ |
945 | # DO NOT CHANGE THE ORDER␊ |
946 | $res->push_raw('msgid' => $msgid,␊ |
947 | 'msgstr' => $msgstr,␊ |
948 | 'flags' => $flags,␊ |
949 | 'type' => $type,␊ |
950 | 'reference' => $ref,␊ |
951 | 'comment' => $comment,␊ |
952 | 'previous' => $previous,␊ |
953 | 'automatic' => $automatic)␊ |
954 | if (apply($msgid,$msgstr,$ref,$flags,$comment,$previous,$automatic));␊ |
955 | }␊ |
956 | # delete the apply subroutine␊ |
957 | # otherwise it will be redefined.␊ |
958 | undef &apply;␊ |
959 | return $res;␊ |
960 | }␊ |
961 | ␊ |
962 | =item to_utf8()␊ |
963 | ␊ |
964 | Recodes to UTF-8 the PO's msgstrs. Does nothing if the charset is not␊ |
965 | specified in the PO file ("CHARSET" value), or if it's already UTF-8 or␊ |
966 | ASCII.␊ |
967 | ␊ |
968 | =cut␊ |
969 | ␊ |
970 | sub to_utf8 {␊ |
971 | my $this = shift;␊ |
972 | my $charset = $this->get_charset();␊ |
973 | ␊ |
974 | unless ($charset eq "CHARSET" or␊ |
975 | $charset =~ /^ascii$/i or␊ |
976 | $charset =~ /^utf-8$/i) {␊ |
977 | foreach my $msgid ( keys %{$this->{po}} ) {␊ |
978 | Encode::from_to($this->{po}{$msgid}{'msgstr'}, $charset, "utf-8");␊ |
979 | }␊ |
980 | $this->set_charset("UTF-8");␊ |
981 | }␊ |
982 | }␊ |
983 | ␊ |
984 | =back␊ |
985 | ␊ |
986 | =head1 Functions to use a message catalog for translations␊ |
987 | ␊ |
988 | =over 4␊ |
989 | ␊ |
990 | =item gettext($%)␊ |
991 | ␊ |
992 | Request the translation of the string given as argument in the current catalog.␊ |
993 | The function returns the original (untranslated) string if the string was not␊ |
994 | found.␊ |
995 | ␊ |
996 | After the string to translate, you can pass a hash of extra␊ |
997 | arguments. Here are the valid entries:␊ |
998 | ␊ |
999 | =over␊ |
1000 | ␊ |
1001 | =item B<wrap>␊ |
1002 | ␊ |
1003 | boolean indicating whether we can consider that whitespaces in string are␊ |
1004 | not important. If yes, the function canonizes the string before looking for␊ |
1005 | a translation, and wraps the result.␊ |
1006 | ␊ |
1007 | =item B<wrapcol>␊ |
1008 | ␊ |
1009 | the column at which we should wrap (default: 76).␊ |
1010 | ␊ |
1011 | =back␊ |
1012 | ␊ |
1013 | =cut␊ |
1014 | ␊ |
1015 | sub gettext {␊ |
1016 | my $self=shift;␊ |
1017 | my $text=shift;␊ |
1018 | my (%opt)=@_;␊ |
1019 | my $res;␊ |
1020 | ␊ |
1021 | return "" unless defined($text) && length($text); # Avoid returning the header.␊ |
1022 | my $validoption="reference wrap wrapcol";␊ |
1023 | my %validoption;␊ |
1024 | ␊ |
1025 | map { $validoption{$_}=1 } (split(/ /,$validoption));␊ |
1026 | foreach (keys %opt) {␊ |
1027 | Carp::confess "internal error: unknown arg $_.\n".␊ |
1028 | "Here are the valid options: $validoption.\n"␊ |
1029 | unless $validoption{$_};␊ |
1030 | }␊ |
1031 | ␊ |
1032 | $text=canonize($text)␊ |
1033 | if ($opt{'wrap'});␊ |
1034 | ␊ |
1035 | my $esc_text=escape_text($text);␊ |
1036 | ␊ |
1037 | $self->{gettextqueries}++;␊ |
1038 | ␊ |
1039 | if ( defined $self->{po}{$esc_text}␊ |
1040 | and defined $self->{po}{$esc_text}{'msgstr'}␊ |
1041 | and length $self->{po}{$esc_text}{'msgstr'}␊ |
1042 | and ( not defined $self->{po}{$esc_text}{'flags'}␊ |
1043 | or $self->{po}{$esc_text}{'flags'} !~ /fuzzy/)) {␊ |
1044 | ␊ |
1045 | $self->{gettexthits}++;␊ |
1046 | $res = unescape_text($self->{po}{$esc_text}{'msgstr'});␊ |
1047 | if (defined $self->{po}{$esc_text}{'plural'}) {␊ |
1048 | if ($self->{po}{$esc_text}{'plural'} eq "0") {␊ |
1049 | warn wrap_mod("po4a gettextize", dgettext("po4a",␊ |
1050 | "'%s' is the singular form of a message, ".␊ |
1051 | "po4a will use the msgstr[0] translation (%s)."),␊ |
1052 | $esc_text, $res);␊ |
1053 | } else {␊ |
1054 | warn wrap_mod("po4a gettextize", dgettext("po4a",␊ |
1055 | "'%s' is the plural form of a message, ".␊ |
1056 | "po4a will use the msgstr[1] translation (%s)."),␊ |
1057 | $esc_text, $res);␊ |
1058 | }␊ |
1059 | }␊ |
1060 | } else {␊ |
1061 | $res = $text;␊ |
1062 | }␊ |
1063 | ␊ |
1064 | if ($opt{'wrap'}) {␊ |
1065 | if ($self->get_charset =~ /^utf-8$/i) {␊ |
1066 | $res=Encode::decode_utf8($res);␊ |
1067 | $res=wrap ($res, $opt{'wrapcol'} || 76);␊ |
1068 | $res=Encode::encode_utf8($res);␊ |
1069 | } else {␊ |
1070 | $res=wrap ($res, $opt{'wrapcol'} || 76);␊ |
1071 | }␊ |
1072 | }␊ |
1073 | # print STDERR "Gettext >>>$text<<<(escaped=$esc_text)=[[[$res]]]\n\n";␊ |
1074 | return $res;␊ |
1075 | }␊ |
1076 | ␊ |
1077 | =item stats_get()␊ |
1078 | ␊ |
1079 | Returns statistics about the hit ratio of gettext since the last time that␊ |
1080 | stats_clear() was called. Please note that it's not the same␊ |
1081 | statistics than the one printed by msgfmt --statistic. Here, it's statistics␊ |
1082 | about recent usage of the PO file, while msgfmt reports the status of the␊ |
1083 | file. Example of use:␊ |
1084 | ␊ |
1085 | [some use of the PO file to translate stuff]␊ |
1086 | ␊ |
1087 | ($percent,$hit,$queries) = $pofile->stats_get();␊ |
1088 | print "So far, we found translations for $percent\% ($hit of $queries) of strings.\n";␊ |
1089 | ␊ |
1090 | =cut␊ |
1091 | ␊ |
1092 | sub stats_get() {␊ |
1093 | my $self=shift;␊ |
1094 | my ($h,$q)=($self->{gettexthits},$self->{gettextqueries});␊ |
1095 | my $p = ($q == 0 ? 100 : int($h/$q*10000)/100);␊ |
1096 | ␊ |
1097 | # $p =~ s/\.00//;␊ |
1098 | # $p =~ s/(\..)0/$1/;␊ |
1099 | ␊ |
1100 | return ( $p,$h,$q );␊ |
1101 | }␊ |
1102 | ␊ |
1103 | =item stats_clear()␊ |
1104 | ␊ |
1105 | Clears the statistics about gettext hits.␊ |
1106 | ␊ |
1107 | =cut␊ |
1108 | ␊ |
1109 | sub stats_clear {␊ |
1110 | my $self = shift;␊ |
1111 | $self->{gettextqueries} = 0;␊ |
1112 | $self->{gettexthits} = 0;␊ |
1113 | }␊ |
1114 | ␊ |
1115 | =back␊ |
1116 | ␊ |
1117 | =head1 Functions to build a message catalog␊ |
1118 | ␊ |
1119 | =over 4␊ |
1120 | ␊ |
1121 | =item push(%)␊ |
1122 | ␊ |
1123 | Push a new entry at the end of the current catalog. The arguments should␊ |
1124 | form a hash table. The valid keys are:␊ |
1125 | ␊ |
1126 | =over 4␊ |
1127 | ␊ |
1128 | =item B<msgid>␊ |
1129 | ␊ |
1130 | the string in original language.␊ |
1131 | ␊ |
1132 | =item B<msgstr>␊ |
1133 | ␊ |
1134 | the translation.␊ |
1135 | ␊ |
1136 | =item B<reference>␊ |
1137 | ␊ |
1138 | an indication of where this string was found. Example: file.c:46 (meaning␊ |
1139 | in 'file.c' at line 46). It can be a space-separated list in case of␊ |
1140 | multiple occurrences.␊ |
1141 | ␊ |
1142 | =item B<comment>␊ |
1143 | ␊ |
1144 | a comment added here manually (by the translators). The format here is free.␊ |
1145 | ␊ |
1146 | =item B<automatic>␊ |
1147 | ␊ |
1148 | a comment which was automatically added by the string extraction␊ |
1149 | program. See the B<--add-comments> option of the B<xgettext> program for␊ |
1150 | more information.␊ |
1151 | ␊ |
1152 | =item B<flags>␊ |
1153 | ␊ |
1154 | space-separated list of all defined flags for this entry.␊ |
1155 | ␊ |
1156 | Valid flags are: B<c-text>, B<python-text>, B<lisp-text>, B<elisp-text>, B<librep-text>,␊ |
1157 | B<smalltalk-text>, B<java-text>, B<awk-text>, B<object-pascal-text>, B<ycp-text>,␊ |
1158 | B<tcl-text>, B<wrap>, B<no-wrap> and B<fuzzy>.␊ |
1159 | ␊ |
1160 | See the gettext documentation for their meaning.␊ |
1161 | ␊ |
1162 | =item B<type>␊ |
1163 | ␊ |
1164 | this is mostly an internal argument: it is used while gettextizing␊ |
1165 | documents. The idea here is to parse both the original and the translation␊ |
1166 | into a PO object, and merge them, using one's msgid as msgid and the␊ |
1167 | other's msgid as msgstr. To make sure that things get ok, each msgid in PO␊ |
1168 | objects are given a type, based on their structure (like "chapt", "sect1",␊ |
1169 | "p" and so on in DocBook). If the types of strings are not the same, that␊ |
1170 | means that both files do not share the same structure, and the process␊ |
1171 | reports an error.␊ |
1172 | ␊ |
1173 | This information is written as automatic comment in the PO file since this␊ |
1174 | gives to translators some context about the strings to translate.␊ |
1175 | ␊ |
1176 | =item B<wrap>␊ |
1177 | ␊ |
1178 | boolean indicating whether whitespaces can be mangled in cosmetic␊ |
1179 | reformattings. If true, the string is canonized before use.␊ |
1180 | ␊ |
1181 | This information is written to the PO file using the B<wrap> or B<no-wrap> flag.␊ |
1182 | ␊ |
1183 | =item B<wrapcol>␊ |
1184 | ␊ |
1185 | the column at which we should wrap (default: 76).␊ |
1186 | ␊ |
1187 | This information is not written to the PO file.␊ |
1188 | ␊ |
1189 | =back␊ |
1190 | ␊ |
1191 | =cut␊ |
1192 | ␊ |
1193 | sub push {␊ |
1194 | my $self=shift;␊ |
1195 | my %entry=@_;␊ |
1196 | ␊ |
1197 | my $validoption="wrap wrapcol type msgid msgstr automatic previous flags reference";␊ |
1198 | my %validoption;␊ |
1199 | ␊ |
1200 | map { $validoption{$_}=1 } (split(/ /,$validoption));␊ |
1201 | foreach (keys %entry) {␊ |
1202 | Carp::confess "internal error: unknown arg $_.\n".␊ |
1203 | "Here are the valid options: $validoption.\n"␊ |
1204 | unless $validoption{$_};␊ |
1205 | }␊ |
1206 | ␊ |
1207 | unless ($entry{'wrap'}) {␊ |
1208 | $entry{'flags'} .= " no-wrap";␊ |
1209 | }␊ |
1210 | if (defined ($entry{'msgid'})) {␊ |
1211 | $entry{'msgid'} = canonize($entry{'msgid'})␊ |
1212 | if ($entry{'wrap'});␊ |
1213 | ␊ |
1214 | $entry{'msgid'} = escape_text($entry{'msgid'});␊ |
1215 | }␊ |
1216 | if (defined ($entry{'msgstr'})) {␊ |
1217 | $entry{'msgstr'} = canonize($entry{'msgstr'})␊ |
1218 | if ($entry{'wrap'});␊ |
1219 | ␊ |
1220 | $entry{'msgstr'} = escape_text($entry{'msgstr'});␊ |
1221 | }␊ |
1222 | ␊ |
1223 | $self->push_raw(%entry);␊ |
1224 | }␊ |
1225 | ␊ |
1226 | # The same as push(), but assuming that msgid and msgstr are already escaped␊ |
1227 | sub push_raw {␊ |
1228 | my $self=shift;␊ |
1229 | my %entry=@_;␊ |
1230 | my ($msgid,$msgstr,$reference,$comment,$automatic,$previous,$flags,$type,$transref)=␊ |
1231 | ($entry{'msgid'},$entry{'msgstr'},␊ |
1232 | $entry{'reference'},$entry{'comment'},$entry{'automatic'},␊ |
1233 | $entry{'previous'},$entry{'flags'},$entry{'type'},$entry{'transref'});␊ |
1234 | my $keep_conflict = $entry{'conflict'};␊ |
1235 | ␊ |
1236 | # print STDERR "Push_raw\n";␊ |
1237 | # print STDERR " msgid=>>>$msgid<<<\n" if $msgid;␊ |
1238 | # print STDERR " msgstr=[[[$msgstr]]]\n" if $msgstr;␊ |
1239 | # Carp::cluck " flags=$flags\n" if $flags;␊ |
1240 | ␊ |
1241 | return unless defined($entry{'msgid'});␊ |
1242 | ␊ |
1243 | #no msgid => header definition␊ |
1244 | unless (length($entry{'msgid'})) {␊ |
1245 | # if (defined($self->{header}) && $self->{header} =~ /\S/) {␊ |
1246 | # warn dgettext("po4a","Redefinition of the header. ".␊ |
1247 | # "The old one will be discarded\n");␊ |
1248 | # } FIXME: do that iff the header isn't the default one.␊ |
1249 | $self->{header}=$msgstr;␊ |
1250 | $self->{header_comment}=$comment;␊ |
1251 | my $charset = $self->get_charset;␊ |
1252 | if ($charset ne "CHARSET") {␊ |
1253 | $self->{encoder}=find_encoding($charset);␊ |
1254 | } else {␊ |
1255 | $self->{encoder}=find_encoding("ascii");␊ |
1256 | }␊ |
1257 | return;␊ |
1258 | }␊ |
1259 | ␊ |
1260 | if ($self->{options}{'porefs'} =~ m/^none/) {␊ |
1261 | $reference = "";␊ |
1262 | } elsif ($self->{options}{'porefs'} =~ m/^counter/) {␊ |
1263 | if ($reference =~ m/^(.+?)(?=\S+:\d+)/g) {␊ |
1264 | my $new_ref = $1;␊ |
1265 | 1 while $reference =~ s{ # x modifier is added to add formatting and improve readability␊ |
1266 | \G(\s*)(\S+):\d+ # \G is the last match in m//g (see also the (?=) syntax above)␊ |
1267 | # $2 is the file name␊ |
1268 | }{␊ |
1269 | $self->{counter}{$2} ||= 0, # each file has its own counter␊ |
1270 | ++$self->{counter}{$2}, # increment it␊ |
1271 | $new_ref .= "$1$2:".$self->{counter}{$2} # replace line number by this counter␊ |
1272 | }gex && pos($reference);␊ |
1273 | $reference = $new_ref;␊ |
1274 | }␊ |
1275 | } elsif ($self->{options}{'porefs'} =~ m/^noline/) {␊ |
1276 | $reference =~ s/:\d+/:1/g;␊ |
1277 | }␊ |
1278 | ␊ |
1279 | if (defined($self->{po}{$msgid})) {␊ |
1280 | warn wrap_mod("po4a::po",␊ |
1281 | dgettext("po4a","msgid defined twice: %s"),␊ |
1282 | $msgid)␊ |
1283 | if (0); # FIXME: put a verbose stuff␊ |
1284 | if ( defined $msgstr␊ |
1285 | and defined $self->{po}{$msgid}{'msgstr'}␊ |
1286 | and $self->{po}{$msgid}{'msgstr'} ne $msgstr) {␊ |
1287 | my $txt=quote_text($msgid);␊ |
1288 | my ($first,$second)=␊ |
1289 | (format_comment(". ",$self->{po}{$msgid}{'reference'}).␊ |
1290 | quote_text($self->{po}{$msgid}{'msgstr'}),␊ |
1291 | ␊ |
1292 | format_comment(". ",$reference).␊ |
1293 | quote_text($msgstr));␊ |
1294 | ␊ |
1295 | if ($keep_conflict) {␊ |
1296 | if ($self->{po}{$msgid}{'msgstr'} =~ m/^#-#-#-#-# .* #-#-#-#-#\\n/s) {␊ |
1297 | $msgstr = $self->{po}{$msgid}{'msgstr'}.␊ |
1298 | "\\n#-#-#-#-# $transref #-#-#-#-#\\n".␊ |
1299 | $msgstr;␊ |
1300 | } else {␊ |
1301 | $msgstr = "#-#-#-#-# ".␊ |
1302 | $self->{po}{$msgid}{'transref'}.␊ |
1303 | " #-#-#-#-#\\n".␊ |
1304 | $self->{po}{$msgid}{'msgstr'}."\\n".␊ |
1305 | "#-#-#-#-# $transref #-#-#-#-#\\n".␊ |
1306 | $msgstr;␊ |
1307 | }␊ |
1308 | # Every msgid will have the same list of references.␊ |
1309 | # Only keep the last list.␊ |
1310 | $self->{po}{$msgid}{'reference'} = "";␊ |
1311 | } else {␊ |
1312 | warn wrap_msg(dgettext("po4a",␊ |
1313 | "Translations don't match for:\n".␊ |
1314 | "%s\n".␊ |
1315 | "-->First translation:\n".␊ |
1316 | "%s\n".␊ |
1317 | " Second translation:\n".␊ |
1318 | "%s\n".␊ |
1319 | " Old translation discarded."),␊ |
1320 | $txt,$first,$second);␊ |
1321 | }␊ |
1322 | }␊ |
1323 | }␊ |
1324 | if (defined $transref) {␊ |
1325 | $self->{po}{$msgid}{'transref'} = $transref;␊ |
1326 | }␊ |
1327 | if (defined($reference) && length($reference)) {␊ |
1328 | if (defined $self->{po}{$msgid}{'reference'}) {␊ |
1329 | $self->{po}{$msgid}{'reference'} .= " ".$reference;␊ |
1330 | } else {␊ |
1331 | $self->{po}{$msgid}{'reference'} = $reference;␊ |
1332 | }␊ |
1333 | }␊ |
1334 | $self->{po}{$msgid}{'msgstr'} = $msgstr;␊ |
1335 | $self->{po}{$msgid}{'comment'} = $comment;␊ |
1336 | $self->{po}{$msgid}{'automatic'} = $automatic;␊ |
1337 | $self->{po}{$msgid}{'previous'} = $previous;␊ |
1338 | if (defined($self->{po}{$msgid}{'pos_doc'})) {␊ |
1339 | $self->{po}{$msgid}{'pos_doc'} .= " ".$self->{count_doc}++;␊ |
1340 | } else {␊ |
1341 | $self->{po}{$msgid}{'pos_doc'} = $self->{count_doc}++;␊ |
1342 | }␊ |
1343 | unless (defined($self->{po}{$msgid}{'pos'})) {␊ |
1344 | $self->{po}{$msgid}{'pos'} = $self->{count}++;␊ |
1345 | }␊ |
1346 | $self->{po}{$msgid}{'type'} = $type;␊ |
1347 | $self->{po}{$msgid}{'plural'} = $entry{'plural'}␊ |
1348 | if defined $entry{'plural'};␊ |
1349 | ␊ |
1350 | if (defined($flags)) {␊ |
1351 | $flags = " $flags ";␊ |
1352 | $flags =~ s/,/ /g;␊ |
1353 | foreach my $flag (@known_flags) {␊ |
1354 | if ($flags =~ /\s$flag\s/) { # if flag to be set␊ |
1355 | unless ( defined($self->{po}{$msgid}{'flags'})␊ |
1356 | && $self->{po}{$msgid}{'flags'} =~ /\b$flag\b/) {␊ |
1357 | # flag not already set␊ |
1358 | if (defined $self->{po}{$msgid}{'flags'}) {␊ |
1359 | $self->{po}{$msgid}{'flags'} .= " ".$flag;␊ |
1360 | } else {␊ |
1361 | $self->{po}{$msgid}{'flags'} = $flag;␊ |
1362 | }␊ |
1363 | }␊ |
1364 | }␊ |
1365 | }␊ |
1366 | }␊ |
1367 | # print STDERR "stored ((($msgid)))=>(((".$self->{po}{$msgid}{'msgstr'}.")))\n\n";␊ |
1368 | ␊ |
1369 | }␊ |
1370 | ␊ |
1371 | =back␊ |
1372 | ␊ |
1373 | =head1 Miscellaneous functions␊ |
1374 | ␊ |
1375 | =over 4␊ |
1376 | ␊ |
1377 | =item count_entries()␊ |
1378 | ␊ |
1379 | Returns the number of entries in the catalog (without the header).␊ |
1380 | ␊ |
1381 | =cut␊ |
1382 | ␊ |
1383 | sub count_entries($) {␊ |
1384 | my $self=shift;␊ |
1385 | return $self->{count};␊ |
1386 | }␊ |
1387 | ␊ |
1388 | =item count_entries_doc()␊ |
1389 | ␊ |
1390 | Returns the number of entries in document. If a string appears multiple times␊ |
1391 | in the document, it will be counted multiple times␊ |
1392 | ␊ |
1393 | =cut␊ |
1394 | ␊ |
1395 | sub count_entries_doc($) {␊ |
1396 | my $self=shift;␊ |
1397 | return $self->{count_doc};␊ |
1398 | }␊ |
1399 | ␊ |
1400 | =item msgid($)␊ |
1401 | ␊ |
1402 | Returns the msgid of the given number.␊ |
1403 | ␊ |
1404 | =cut␊ |
1405 | ␊ |
1406 | sub msgid($$) {␊ |
1407 | my $self=shift;␊ |
1408 | my $num=shift;␊ |
1409 | ␊ |
1410 | foreach my $msgid ( keys %{$self->{po}} ) {␊ |
1411 | return $msgid if ($self->{po}{$msgid}{'pos'} eq $num);␊ |
1412 | }␊ |
1413 | return undef;␊ |
1414 | }␊ |
1415 | ␊ |
1416 | =item msgid_doc($)␊ |
1417 | ␊ |
1418 | Returns the msgid with the given position in the document.␊ |
1419 | ␊ |
1420 | =cut␊ |
1421 | ␊ |
1422 | sub msgid_doc($$) {␊ |
1423 | my $self=shift;␊ |
1424 | my $num=shift;␊ |
1425 | ␊ |
1426 | foreach my $msgid ( keys %{$self->{po}} ) {␊ |
1427 | foreach my $pos (split / /, $self->{po}{$msgid}{'pos_doc'}) {␊ |
1428 | return $msgid if ($pos eq $num);␊ |
1429 | }␊ |
1430 | }␊ |
1431 | return undef;␊ |
1432 | }␊ |
1433 | ␊ |
1434 | =item get_charset()␊ |
1435 | ␊ |
1436 | Returns the character set specified in the PO header. If it hasn't been␊ |
1437 | set, it will return "CHARSET".␊ |
1438 | ␊ |
1439 | =cut␊ |
1440 | ␊ |
1441 | sub get_charset() {␊ |
1442 | my $self=shift;␊ |
1443 | ␊ |
1444 | $self->{header} =~ /charset=(.*?)[\s\\]/;␊ |
1445 | ␊ |
1446 | if (defined $1) {␊ |
1447 | return $1;␊ |
1448 | } else {␊ |
1449 | return "CHARSET";␊ |
1450 | }␊ |
1451 | }␊ |
1452 | ␊ |
1453 | =item set_charset($)␊ |
1454 | ␊ |
1455 | This sets the character set of the PO header to the value specified in its␊ |
1456 | first argument. If you never call this function (and no file with a specified␊ |
1457 | character set is read), the default value is left to "CHARSET". This value␊ |
1458 | doesn't change the behavior of this module, it's just used to fill that field␊ |
1459 | in the header, and to return it in get_charset().␊ |
1460 | ␊ |
1461 | =cut␊ |
1462 | ␊ |
1463 | sub set_charset() {␊ |
1464 | my $self=shift;␊ |
1465 | ␊ |
1466 | my ($newchar,$oldchar);␊ |
1467 | $newchar = shift;␊ |
1468 | $oldchar = $self->get_charset();␊ |
1469 | ␊ |
1470 | $self->{header} =~ s/$oldchar/$newchar/;␊ |
1471 | $self->{encoder}=find_encoding($newchar);␊ |
1472 | }␊ |
1473 | ␊ |
1474 | #----[ helper functions ]---------------------------------------------------␊ |
1475 | ␊ |
1476 | # transforme the string from its PO file representation to the form which␊ |
1477 | # should be used to print it␊ |
1478 | sub unescape_text {␊ |
1479 | my $text = shift;␊ |
1480 | ␊ |
1481 | print STDERR "\nunescape [$text]====" if $debug{'escape'};␊ |
1482 | $text = join("",split(/\n/,$text));␊ |
1483 | $text =~ s/\\"/"/g;␊ |
1484 | # unescape newlines␊ |
1485 | # NOTE on \G:␊ |
1486 | # The following regular expression introduce newlines.␊ |
1487 | # Thus, ^ doesn't match all beginnings of lines.␊ |
1488 | # \G is a zero-width assertion that matches the position␊ |
1489 | # of the previous substitution with s///g. As every␊ |
1490 | # substitution ends by a newline, it always matches a␊ |
1491 | # position just after a newline.␊ |
1492 | $text =~ s/( # $1:␊ |
1493 | (\G|[^\\]) # beginning of the line or any char␊ |
1494 | # different from '\'␊ |
1495 | (\\\\)* # followed by any even number of '\'␊ |
1496 | )\\n # and followed by an escaped newline␊ |
1497 | /$1\n/sgx; # single string, match globally, allow comments␊ |
1498 | # unescape carriage returns␊ |
1499 | $text =~ s/( # $1:␊ |
1500 | (\G|[^\\]) # beginning of the line or any char␊ |
1501 | # different from '\'␊ |
1502 | (\\\\)* # followed by any even number of '\'␊ |
1503 | )\\r # and followed by an escaped carriage return␊ |
1504 | /$1\r/sgx; # single string, match globally, allow comments␊ |
1505 | # unescape tabulations␊ |
1506 | $text =~ s/( # $1:␊ |
1507 | (\G|[^\\])# beginning of the line or any char␊ |
1508 | # different from '\'␊ |
1509 | (\\\\)* # followed by any even number of '\'␊ |
1510 | )\\t # and followed by an escaped tabulation␊ |
1511 | /$1\t/mgx; # multilines string, match globally, allow comments␊ |
1512 | # and unescape the escape character␊ |
1513 | $text =~ s/\\\\/\\/g;␊ |
1514 | print STDERR ">$text<\n" if $debug{'escape'};␊ |
1515 | ␊ |
1516 | return $text;␊ |
1517 | }␊ |
1518 | ␊ |
1519 | # transform the string to its representation as it should be written in PO␊ |
1520 | # files␊ |
1521 | sub escape_text {␊ |
1522 | my $text = shift;␊ |
1523 | ␊ |
1524 | print STDERR "\nescape [$text]====" if $debug{'escape'};␊ |
1525 | $text =~ s/\\/\\\\/g;␊ |
1526 | $text =~ s/"/\\"/g;␊ |
1527 | $text =~ s/\n/\\n/g;␊ |
1528 | $text =~ s/\r/\\r/g;␊ |
1529 | $text =~ s/\t/\\t/g;␊ |
1530 | print STDERR ">$text<\n" if $debug{'escape'};␊ |
1531 | ␊ |
1532 | return $text;␊ |
1533 | }␊ |
1534 | ␊ |
1535 | # put quotes around the string on each lines (without escaping it)␊ |
1536 | # It does also normalize the text (ie, make sure its representation is wraped␊ |
1537 | # on the 80th char, but without changing the meaning of the string)␊ |
1538 | sub quote_text {␊ |
1539 | my $string = shift;␊ |
1540 | ␊ |
1541 | return '""' unless defined($string) && length($string);␊ |
1542 | ␊ |
1543 | print STDERR "\nquote [$string]====" if $debug{'quote'};␊ |
1544 | # break lines on newlines, if any␊ |
1545 | # see unescape_text for an explanation on \G␊ |
1546 | $string =~ s/( # $1:␊ |
1547 | (\G|[^\\]) # beginning of the line or any char␊ |
1548 | # different from '\'␊ |
1549 | (\\\\)* # followed by any even number of '\'␊ |
1550 | \\n) # and followed by an escaped newline␊ |
1551 | /$1\n/sgx; # single string, match globally, allow comments␊ |
1552 | $string = wrap($string);␊ |
1553 | my @string = split(/\n/,$string);␊ |
1554 | $string = join ("\"\n\"",@string);␊ |
1555 | $string = "\"$string\"";␊ |
1556 | if (scalar @string > 1 && $string[0] ne '') {␊ |
1557 | $string = "\"\"\n".$string;␊ |
1558 | }␊ |
1559 | ␊ |
1560 | print STDERR ">$string<\n" if $debug{'quote'};␊ |
1561 | return $string;␊ |
1562 | }␊ |
1563 | ␊ |
1564 | # undo the work of the quote_text function␊ |
1565 | sub unquote_text {␊ |
1566 | my $string = shift;␊ |
1567 | print STDERR "\nunquote [$string]====" if $debug{'quote'};␊ |
1568 | $string =~ s/^""\\n//s;␊ |
1569 | $string =~ s/^"(.*)"$/$1/s;␊ |
1570 | $string =~ s/"\n"//gm;␊ |
1571 | # Note: an even number of '\' could precede \\n, but I could not build a␊ |
1572 | # document to test this␊ |
1573 | $string =~ s/([^\\])\\n\n/$1!!DUMMYPOPM!!/gm;␊ |
1574 | $string =~ s|!!DUMMYPOPM!!|\\n|gm;␊ |
1575 | print STDERR ">$string<\n" if $debug{'quote'};␊ |
1576 | return $string;␊ |
1577 | }␊ |
1578 | ␊ |
1579 | # canonize the string: write it on only one line, changing consecutive␊ |
1580 | # whitespace to only one space.␊ |
1581 | # Warning, it changes the string and should only be called if the string is␊ |
1582 | # plain text␊ |
1583 | sub canonize {␊ |
1584 | my $text=shift;␊ |
1585 | print STDERR "\ncanonize [$text]====" if $debug{'canonize'};␊ |
1586 | $text =~ s/^ *//s;␊ |
1587 | $text =~ s/^[ \t]+/ /gm;␊ |
1588 | # if ($text eq "\n"), it messed up the first string (header)␊ |
1589 | $text =~ s/\n/ /gm if ($text ne "\n");␊ |
1590 | $text =~ s/([.)]) +/$1 /gm;␊ |
1591 | $text =~ s/([^.)]) */$1 /gm;␊ |
1592 | $text =~ s/ *$//s;␊ |
1593 | print STDERR ">$text<\n" if $debug{'canonize'};␊ |
1594 | return $text;␊ |
1595 | }␊ |
1596 | ␊ |
1597 | # wraps the string. We don't use Text::Wrap since it mangles whitespace at␊ |
1598 | # the end of splited line␊ |
1599 | sub wrap {␊ |
1600 | my $text=shift;␊ |
1601 | return "0" if ($text eq '0');␊ |
1602 | my $col=shift || 76;␊ |
1603 | my @lines=split(/\n/,"$text");␊ |
1604 | my $res="";␊ |
1605 | my $first=1;␊ |
1606 | while (defined(my $line=shift @lines)) {␊ |
1607 | if ($first && length($line) > $col - 10) {␊ |
1608 | unshift @lines,$line;␊ |
1609 | $first=0;␊ |
1610 | next;␊ |
1611 | }␊ |
1612 | if (length($line) > $col) {␊ |
1613 | my $pos=rindex($line," ",$col);␊ |
1614 | while (substr($line,$pos-1,1) eq '.' && $pos != -1) {␊ |
1615 | $pos=rindex($line," ",$pos-1);␊ |
1616 | }␊ |
1617 | if ($pos == -1) {␊ |
1618 | # There are no spaces in the first $col chars, pick-up the␊ |
1619 | # first space␊ |
1620 | $pos = index($line," ");␊ |
1621 | }␊ |
1622 | if ($pos != -1) {␊ |
1623 | my $end=substr($line,$pos+1);␊ |
1624 | $line=substr($line,0,$pos+1);␊ |
1625 | if ($end =~ s/^( +)//) {␊ |
1626 | $line .= $1;␊ |
1627 | }␊ |
1628 | unshift @lines,$end;␊ |
1629 | }␊ |
1630 | }␊ |
1631 | $first=0;␊ |
1632 | $res.="$line\n";␊ |
1633 | }␊ |
1634 | # Restore the original trailing spaces␊ |
1635 | $res =~ s/\s+$//s;␊ |
1636 | if ($text =~ m/(\s+)$/s) {␊ |
1637 | $res .= $1;␊ |
1638 | }␊ |
1639 | return $res;␊ |
1640 | }␊ |
1641 | ␊ |
1642 | # outputs properly a '# ... ' line to be put in the PO file␊ |
1643 | sub format_comment {␊ |
1644 | my $comment=shift;␊ |
1645 | my $char=shift;␊ |
1646 | my $result = "#". $char . $comment;␊ |
1647 | $result =~ s/\n/\n#$char/gs;␊ |
1648 | $result =~ s/^#$char$/#/gm;␊ |
1649 | $result .= "\n";␊ |
1650 | return $result;␊ |
1651 | }␊ |
1652 | ␊ |
1653 | ␊ |
1654 | 1;␊ |
1655 | __END__␊ |
1656 | ␊ |
1657 | =back␊ |
1658 | ␊ |
1659 | =head1 AUTHORS␊ |
1660 | ␊ |
1661 | Denis Barbier <barbier@linuxfr.org>␊ |
1662 | Martin Quinson (mquinson#debian.org)␊ |
1663 | ␊ |
1664 | =cut␊ |
1665 | |