1 | # Locale::Po4a::Po -- manipulation of PO files␊ |
2 | #␊ |
3 | # This program is free software; you may redistribute it and/or modify it␊ |
4 | # under the terms of GPL (see COPYING).␊ |
5 | ␊ |
6 | ############################################################################␊ |
7 | # Modules and declarations␊ |
8 | ############################################################################␊ |
9 | ␊ |
10 | =encoding UTF-8␊ |
11 | ␊ |
12 | =head1 NAME␊ |
13 | ␊ |
14 | Locale::Po4a::Po - PO file manipulation module␊ |
15 | ␊ |
16 | =head1 SYNOPSIS␊ |
17 | ␊ |
18 | use Locale::Po4a::Po;␊ |
19 | my $pofile=Locale::Po4a::Po->new();␊ |
20 | ␊ |
21 | # Read PO file␊ |
22 | $pofile->read('file.po');␊ |
23 | ␊ |
24 | # Add an entry␊ |
25 | $pofile->push('msgid' => 'Hello', 'msgstr' => 'bonjour',␊ |
26 | 'flags' => "wrap", 'reference'=>'file.c:46');␊ |
27 | ␊ |
28 | # Extract a translation␊ |
29 | $pofile->gettext("Hello"); # returns 'bonjour'␊ |
30 | ␊ |
31 | # Write back to a file␊ |
32 | $pofile->write('otherfile.po');␊ |
33 | ␊ |
34 | =head1 DESCRIPTION␊ |
35 | ␊ |
36 | Locale::Po4a::Po is a module that allows you to manipulate message␊ |
37 | catalogs. You can load and write from/to a file (which extension is often␊ |
38 | I<po>), you can build new entries on the fly or request for the translation␊ |
39 | of a string.␊ |
40 | ␊ |
41 | For a more complete description of message catalogs in the PO format and␊ |
42 | their use, please refer to the documentation of the gettext program.␊ |
43 | ␊ |
44 | This module is part of the po4a project, which objective is to use PO files␊ |
45 | (designed at origin to ease the translation of program messages) to␊ |
46 | translate everything, including documentation (man page, info manual),␊ |
47 | package description, debconf templates, and everything which may benefit␊ |
48 | from this.␊ |
49 | ␊ |
50 | =head1 OPTIONS ACCEPTED BY THIS MODULE␊ |
51 | ␊ |
52 | =over 4␊ |
53 | ␊ |
54 | =item B<porefs>␊ |
55 | ␊ |
56 | This specifies the reference format. It can be one of B<none> to not produce␊ |
57 | any reference, B<noline> to not specify the line number, and B<full> to␊ |
58 | include complete references.␊ |
59 | ␊ |
60 | =back␊ |
61 | ␊ |
62 | =cut␊ |
63 | ␊ |
64 | use IO::File;␊ |
65 | ␊ |
66 | ␊ |
67 | require Exporter;␊ |
68 | ␊ |
69 | package Locale::Po4a::Po;␊ |
70 | use DynaLoader;␊ |
71 | ␊ |
72 | use Locale::Po4a::Common qw(wrap_msg wrap_mod wrap_ref_mod dgettext);␊ |
73 | ␊ |
74 | use subs qw(makespace);␊ |
75 | use vars qw(@ISA @EXPORT_OK);␊ |
76 | @ISA = qw(Exporter DynaLoader);␊ |
77 | @EXPORT = qw(%debug);␊ |
78 | @EXPORT_OK = qw(&move_po_if_needed);␊ |
79 | ␊ |
80 | use Locale::Po4a::TransTractor;␊ |
81 | # Try to use a C extension if present.␊ |
82 | eval("bootstrap Locale::Po4a::Po $Locale::Po4a::TransTractor::VERSION");␊ |
83 | ␊ |
84 | use 5.006;␊ |
85 | use strict;␊ |
86 | use warnings;␊ |
87 | ␊ |
88 | use Carp qw(croak);␊ |
89 | use File::Basename;␊ |
90 | use File::Path; # mkdir before write␊ |
91 | use File::Copy; # move␊ |
92 | use POSIX qw(strftime floor);␊ |
93 | use Time::Local;␊ |
94 | ␊ |
95 | use Encode;␊ |
96 | ␊ |
97 | my @known_flags=qw(wrap no-wrap c-format fuzzy);␊ |
98 | ␊ |
99 | our %debug=('canonize' => 0,␊ |
100 | 'quote' => 0,␊ |
101 | 'escape' => 0,␊ |
102 | 'encoding' => 0,␊ |
103 | 'filter' => 0);␊ |
104 | ␊ |
105 | =head1 Functions about whole message catalogs␊ |
106 | ␊ |
107 | =over 4␊ |
108 | ␊ |
109 | =item new()␊ |
110 | ␊ |
111 | Creates a new message catalog. If an argument is provided, it's the name of␊ |
112 | a PO file we should load.␊ |
113 | ␊ |
114 | =cut␊ |
115 | ␊ |
116 | sub new {␊ |
117 | my ($this, $options) = (shift, shift);␊ |
118 | my $class = ref($this) || $this;␊ |
119 | my $self = {};␊ |
120 | bless $self, $class;␊ |
121 | $self->initialize($options);␊ |
122 | ␊ |
123 | my $filename = shift;␊ |
124 | $self->read($filename) if defined($filename) && length($filename);␊ |
125 | return $self;␊ |
126 | }␊ |
127 | ␊ |
128 | # Return the numerical timezone (e.g. +0200)␊ |
129 | # Neither the %z nor the %s formats of strftime are portable:␊ |
130 | # '%s' is not supported on Solaris and '%z' indicates␊ |
131 | # "2006-10-25 19:36E. Europe Standard Time" on MS Windows.␊ |
132 | sub timezone {␊ |
133 | my @g = gmtime();␊ |
134 | my @l = localtime();␊ |
135 | ␊ |
136 | my $diff;␊ |
137 | $diff = floor(timelocal(@l)/60 +0.5);␊ |
138 | $diff -= floor(timelocal(@g)/60 +0.5);␊ |
139 | ␊ |
140 | my $h = floor($diff / 60) + $l[8]; # $l[8] indicates if we are currently␊ |
141 | # in a daylight saving time zone␊ |
142 | my $m = $diff%60;␊ |
143 | ␊ |
144 | return sprintf "%+03d%02d\n", $h, $m;␊ |
145 | }␊ |
146 | ␊ |
147 | sub initialize {␊ |
148 | my ($self, $options) = (shift, shift);␊ |
149 | my $date = strftime("%Y-%m-%d %H:%M", localtime).timezone();␊ |
150 | chomp $date;␊ |
151 | # $options = ref($options) || $options;␊ |
152 | ␊ |
153 | $self->{options}{'porefs'}= 'full';␊ |
154 | $self->{options}{'msgid-bugs-address'}= undef;␊ |
155 | $self->{options}{'copyright-holder'}= "Free Software Foundation, Inc.";␊ |
156 | $self->{options}{'package-name'}= "PACKAGE";␊ |
157 | $self->{options}{'package-version'}= "VERSION";␊ |
158 | foreach my $opt (keys %$options) {␊ |
159 | if ($options->{$opt}) {␊ |
160 | die wrap_mod("po4a::po",␊ |
161 | dgettext ("po4a", "Unknown option: %s"), $opt)␊ |
162 | unless exists $self->{options}{$opt};␊ |
163 | $self->{options}{$opt} = $options->{$opt};␊ |
164 | }␊ |
165 | }␊ |
166 | $self->{options}{'porefs'} =~ /^(full|noline|none)$/ ||␊ |
167 | die wrap_mod("po4a::po",␊ |
168 | dgettext ("po4a",␊ |
169 | "Invalid value for option 'porefs' ('%s' is ".␊ |
170 | "not one of 'full', 'noline' or 'none')"),␊ |
171 | $self->{options}{'porefs'});␊ |
172 | ␊ |
173 | $self->{po}=();␊ |
174 | $self->{count}=0; # number of msgids in the PO␊ |
175 | # count_doc: number of strings in the document␊ |
176 | # (duplicate strings counted multiple times)␊ |
177 | $self->{count_doc}=0;␊ |
178 | $self->{header_comment}=␊ |
179 | " SOME DESCRIPTIVE TITLE\n"␊ |
180 | ." Copyright (C) YEAR ".␊ |
181 | $self->{options}{'copyright-holder'}."\n"␊ |
182 | ." This file is distributed under the same license ".␊ |
183 | "as the ".$self->{options}{'package-name'}." package.\n"␊ |
184 | ." FIRST AUTHOR <EMAIL\@ADDRESS>, YEAR.\n"␊ |
185 | ."\n"␊ |
186 | .", fuzzy";␊ |
187 | # $self->header_tag="fuzzy";␊ |
188 | $self->{header}=escape_text("Project-Id-Version: ".␊ |
189 | $self->{options}{'package-name'}." ".␊ |
190 | $self->{options}{'package-version'}."\n".␊ |
191 | ((defined $self->{options}{'msgid-bugs-address'})?␊ |
192 | "Report-Msgid-Bugs-To: ".$self->{options}{'msgid-bugs-address'}."\n":␊ |
193 | "").␊ |
194 | "POT-Creation-Date: $date\n".␊ |
195 | "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n".␊ |
196 | "Last-Translator: FULL NAME <EMAIL\@ADDRESS>\n".␊ |
197 | "Language-Team: LANGUAGE <LL\@li.org>\n".␊ |
198 | "Language: \n".␊ |
199 | "MIME-Version: 1.0\n".␊ |
200 | "Content-Type: text/plain; charset=CHARSET\n".␊ |
201 | "Content-Transfer-Encoding: 8bit\n");␊ |
202 | ␊ |
203 | $self->{encoder}=find_encoding("ascii");␊ |
204 | $self->{footer}=[];␊ |
205 | ␊ |
206 | # To make stats about gettext hits␊ |
207 | $self->stats_clear();␊ |
208 | }␊ |
209 | ␊ |
210 | =item read($)␊ |
211 | ␊ |
212 | Reads a PO file (which name is given as argument). Previously existing␊ |
213 | entries in self are not removed, the new ones are added to the end of the␊ |
214 | catalog.␊ |
215 | ␊ |
216 | =cut␊ |
217 | ␊ |
218 | sub read {␊ |
219 | my $self=shift;␊ |
220 | my $filename=shift␊ |
221 | or croak wrap_mod("po4a::po",␊ |
222 | dgettext("po4a",␊ |
223 | "Please provide a non-null filename"));␊ |
224 | ␊ |
225 | my $lang = basename($filename);␊ |
226 | $lang =~ s/\.po$//;␊ |
227 | $self->{lang} = $lang;␊ |
228 | ␊ |
229 | my $fh;␊ |
230 | if ($filename eq '-') {␊ |
231 | $fh=*STDIN;␊ |
232 | } else {␊ |
233 | open $fh,"<$filename"␊ |
234 | or croak wrap_mod("po4a::po",␊ |
235 | dgettext("po4a", "Can't read from %s: %s"),␊ |
236 | $filename, $!);␊ |
237 | }␊ |
238 | ␊ |
239 | ## Read paragraphs line-by-line␊ |
240 | my $pofile="";␊ |
241 | my $textline;␊ |
242 | while (defined ($textline = <$fh>)) {␊ |
243 | $pofile .= $textline;␊ |
244 | }␊ |
245 | # close INPUT␊ |
246 | # or croak (sprintf(dgettext("po4a",␊ |
247 | # "Can't close %s after reading: %s"),␊ |
248 | # $filename,$!)."\n");␊ |
249 | ␊ |
250 | my $linenum=0;␊ |
251 | ␊ |
252 | foreach my $msg (split (/\n\n/,$pofile)) {␊ |
253 | my ($msgid,$msgstr,$comment,$previous,$automatic,$reference,$flags,$buffer);␊ |
254 | my ($msgid_plural, $msgstr_plural);␊ |
255 | if ($msg =~ m/^#~/m) {␊ |
256 | push(@{$self->{footer}}, $msg);␊ |
257 | next;␊ |
258 | }␊ |
259 | foreach my $line (split (/\n/,$msg)) {␊ |
260 | $linenum++;␊ |
261 | if ($line =~ /^#\. ?(.*)$/) { # Automatic comment␊ |
262 | $automatic .= (defined($automatic) ? "\n" : "").$1;␊ |
263 | ␊ |
264 | } elsif ($line =~ /^#: ?(.*)$/) { # reference␊ |
265 | $reference .= (defined($reference) ? "\n" : "").$1;␊ |
266 | ␊ |
267 | } elsif ($line =~ /^#, ?(.*)$/) { # flags␊ |
268 | $flags .= (defined($flags) ? "\n" : "").$1;␊ |
269 | ␊ |
270 | } elsif ($line =~ /^#\| ?(.*)$/) { # previous translation␊ |
271 | $previous .= (defined($previous) ? "\n" : "").($1||"");␊ |
272 | ␊ |
273 | } elsif ($line =~ /^#(.*)$/) { # Translator comments␊ |
274 | $comment .= (defined($comment) ? "\n" : "").($1||"");␊ |
275 | ␊ |
276 | } elsif ($line =~ /^msgid (".*")$/) { # begin of msgid␊ |
277 | $buffer = $1;␊ |
278 | ␊ |
279 | } elsif ($line =~ /^msgid_plural (".*")$/) {␊ |
280 | # begin of msgid_plural, end of msgid␊ |
281 | ␊ |
282 | $msgid = $buffer;␊ |
283 | $buffer = $1;␊ |
284 | ␊ |
285 | } elsif ($line =~ /^msgstr (".*")$/) {␊ |
286 | # begin of msgstr, end of msgid␊ |
287 | ␊ |
288 | $msgid = $buffer;␊ |
289 | $buffer = "$1";␊ |
290 | ␊ |
291 | } elsif ($line =~ /^msgstr\[([0-9]+)\] (".*")$/) {␊ |
292 | # begin of msgstr[x], end of msgid_plural or msgstr[x-1]␊ |
293 | ␊ |
294 | # Note: po4a cannot uses plural forms␊ |
295 | # (no integer to use the plural form)␊ |
296 | # * drop the msgstr[x] where x >= 2␊ |
297 | # * use msgstr[0] as the translation of msgid␊ |
298 | # * use msgstr[1] as the translation of msgid_plural␊ |
299 | ␊ |
300 | if ($1 eq "0") {␊ |
301 | $msgid_plural = $buffer;␊ |
302 | $buffer = "$2";␊ |
303 | } elsif ($1 eq "1") {␊ |
304 | $msgstr = $buffer;␊ |
305 | $buffer = "$2";␊ |
306 | } elsif ($1 eq "2") {␊ |
307 | $msgstr_plural = $buffer;␊ |
308 | warn wrap_ref_mod("$filename:$linenum",␊ |
309 | "po4a::po",␊ |
310 | dgettext("po4a", "Messages with more than 2 plural forms are not supported."));␊ |
311 | }␊ |
312 | } elsif ($line =~ /^(".*")$/) {␊ |
313 | # continuation of a line␊ |
314 | $buffer .= "\n$1";␊ |
315 | ␊ |
316 | } else {␊ |
317 | warn wrap_ref_mod("$filename:$linenum",␊ |
318 | "po4a::po",␊ |
319 | dgettext("po4a", "Strange line: -->%s<--"),␊ |
320 | $line);␊ |
321 | }␊ |
322 | }␊ |
323 | $linenum++;␊ |
324 | if (defined $msgid_plural) {␊ |
325 | $msgstr_plural=$buffer;␊ |
326 | ␊ |
327 | $msgid = unquote_text($msgid) if (defined($msgid));␊ |
328 | $msgstr = unquote_text($msgstr) if (defined($msgstr));␊ |
329 | ␊ |
330 | $self->push_raw ('msgid' => $msgid,␊ |
331 | 'msgstr' => $msgstr,␊ |
332 | 'reference' => $reference,␊ |
333 | 'flags' => $flags,␊ |
334 | 'comment' => $comment,␊ |
335 | 'previous' => $previous,␊ |
336 | 'automatic' => $automatic,␊ |
337 | 'plural' => 0);␊ |
338 | ␊ |
339 | $msgid_plural = unquote_text($msgid_plural)␊ |
340 | if (defined($msgid_plural));␊ |
341 | $msgstr_plural = unquote_text($msgstr_plural)␊ |
342 | if (defined($msgstr_plural));␊ |
343 | ␊ |
344 | $self->push_raw ('msgid' => $msgid_plural,␊ |
345 | 'msgstr' => $msgstr_plural,␊ |
346 | 'reference' => $reference,␊ |
347 | 'flags' => $flags,␊ |
348 | 'comment' => $comment,␊ |
349 | 'previous' => $previous,␊ |
350 | 'automatic' => $automatic,␊ |
351 | 'plural' => 1);␊ |
352 | } else {␊ |
353 | $msgstr=$buffer;␊ |
354 | ␊ |
355 | $msgid = unquote_text($msgid) if (defined($msgid));␊ |
356 | $msgstr = unquote_text($msgstr) if (defined($msgstr));␊ |
357 | ␊ |
358 | $self->push_raw ('msgid' => $msgid,␊ |
359 | 'msgstr' => $msgstr,␊ |
360 | 'reference' => $reference,␊ |
361 | 'flags' => $flags,␊ |
362 | 'comment' => $comment,␊ |
363 | 'previous' => $previous,␊ |
364 | 'automatic' => $automatic);␊ |
365 | }␊ |
366 | }␊ |
367 | }␊ |
368 | ␊ |
369 | =item write($)␊ |
370 | ␊ |
371 | Writes the current catalog to the given file.␊ |
372 | ␊ |
373 | =cut␊ |
374 | ␊ |
375 | sub write{␊ |
376 | my $self=shift;␊ |
377 | my $filename=shift␊ |
378 | or croak dgettext("po4a","Can't write to a file without filename")."\n";␊ |
379 | ␊ |
380 | my $fh;␊ |
381 | if ($filename eq '-') {␊ |
382 | $fh=\*STDOUT;␊ |
383 | } else {␊ |
384 | # make sure the directory in which we should write the localized␊ |
385 | # file exists␊ |
386 | my $dir = $filename;␊ |
387 | if ($dir =~ m|/|) {␊ |
388 | $dir =~ s|/[^/]*$||;␊ |
389 | ␊ |
390 | File::Path::mkpath($dir, 0, 0755) # Croaks on error␊ |
391 | if (length ($dir) && ! -e $dir);␊ |
392 | }␊ |
393 | open $fh,">$filename"␊ |
394 | or croak wrap_mod("po4a::po",␊ |
395 | dgettext("po4a", "Can't write to %s: %s"),␊ |
396 | $filename, $!);␊ |
397 | }␊ |
398 | ␊ |
399 | print $fh "".format_comment($self->{header_comment},"")␊ |
400 | if defined($self->{header_comment}) && length($self->{header_comment});␊ |
401 | ␊ |
402 | print $fh "msgid \"\"\n";␊ |
403 | print $fh "msgstr ".quote_text($self->{header})."\n\n";␊ |
404 | ␊ |
405 | ␊ |
406 | my $buf_msgstr_plural; # Used to keep the first msgstr of plural forms␊ |
407 | my $first=1;␊ |
408 | foreach my $msgid ( sort { ($self->{po}{"$a"}{'pos'}) <=>␊ |
409 | ($self->{po}{"$b"}{'pos'})␊ |
410 | } keys %{$self->{po}}) {␊ |
411 | my $output="";␊ |
412 | ␊ |
413 | if ($first) {␊ |
414 | $first=0;␊ |
415 | } else {␊ |
416 | $output .= "\n";␊ |
417 | }␊ |
418 | ␊ |
419 | $output .= format_comment($self->{po}{$msgid}{'comment'},"")␊ |
420 | if defined($self->{po}{$msgid}{'comment'})␊ |
421 | && length ($self->{po}{$msgid}{'comment'});␊ |
422 | if ( defined($self->{po}{$msgid}{'automatic'})␊ |
423 | && length ($self->{po}{$msgid}{'automatic'})) {␊ |
424 | foreach my $comment (split(/\\n/,$self->{po}{$msgid}{'automatic'}))␊ |
425 | {␊ |
426 | $output .= format_comment($comment, ". ")␊ |
427 | }␊ |
428 | }␊ |
429 | $output .= format_comment($self->{po}{$msgid}{'type'},". type: ")␊ |
430 | if defined($self->{po}{$msgid}{'type'})␊ |
431 | && length ($self->{po}{$msgid}{'type'});␊ |
432 | $output .= format_comment($self->{po}{$msgid}{'reference'},": ")␊ |
433 | if defined($self->{po}{$msgid}{'reference'})␊ |
434 | && length ($self->{po}{$msgid}{'reference'});␊ |
435 | $output .= "#, ". join(", ", sort split(/\s+/,$self->{po}{$msgid}{'flags'}))."\n"␊ |
436 | if defined($self->{po}{$msgid}{'flags'})␊ |
437 | && length ($self->{po}{$msgid}{'flags'});␊ |
438 | $output .= format_comment($self->{po}{$msgid}{'previous'},"| ")␊ |
439 | if defined($self->{po}{$msgid}{'previous'})␊ |
440 | && length ($self->{po}{$msgid}{'previous'});␊ |
441 | ␊ |
442 | if (exists $self->{po}{$msgid}{'plural'}) {␊ |
443 | if ($self->{po}{$msgid}{'plural'} == 0) {␊ |
444 | if ($self->get_charset =~ /^utf-8$/i) {␊ |
445 | my $msgstr = Encode::decode_utf8($self->{po}{$msgid}{'msgstr'});␊ |
446 | $msgid = Encode::decode_utf8($msgid);␊ |
447 | $output .= Encode::encode_utf8("msgid ".quote_text($msgid)."\n");␊ |
448 | $buf_msgstr_plural = Encode::encode_utf8("msgstr[0] ".quote_text($msgstr)."\n");␊ |
449 | } else {␊ |
450 | $output = "msgid ".quote_text($msgid)."\n";␊ |
451 | $buf_msgstr_plural = "msgstr[0] ".quote_text($self->{po}{$msgid}{'msgstr'})."\n";␊ |
452 | }␊ |
453 | } elsif ($self->{po}{$msgid}{'plural'} == 1) {␊ |
454 | # TODO: there may be only one plural form␊ |
455 | if ($self->get_charset =~ /^utf-8$/i) {␊ |
456 | my $msgstr = Encode::decode_utf8($self->{po}{$msgid}{'msgstr'});␊ |
457 | $msgid = Encode::decode_utf8($msgid);␊ |
458 | $output = Encode::encode_utf8("msgid_plural ".quote_text($msgid)."\n");␊ |
459 | $output .= $buf_msgstr_plural;␊ |
460 | $output .= Encode::encode_utf8("msgstr[1] ".quote_text($msgstr)."\n");␊ |
461 | $buf_msgstr_plural = "";␊ |
462 | } else {␊ |
463 | $output = "msgid_plural ".quote_text($msgid)."\n";␊ |
464 | $output .= $buf_msgstr_plural;␊ |
465 | $output .= "msgstr[1] ".quote_text($self->{po}{$msgid}{'msgstr'})."\n";␊ |
466 | }␊ |
467 | } else {␊ |
468 | die wrap_msg(dgettext("po4a","Can't write PO files with more than two plural forms."));␊ |
469 | }␊ |
470 | } else {␊ |
471 | if ($self->get_charset =~ /^utf-8$/i) {␊ |
472 | my $msgstr = Encode::decode_utf8($self->{po}{$msgid}{'msgstr'});␊ |
473 | $msgid = Encode::decode_utf8($msgid);␊ |
474 | $output .= Encode::encode_utf8("msgid ".quote_text($msgid)."\n");␊ |
475 | $output .= Encode::encode_utf8("msgstr ".quote_text($msgstr)."\n");␊ |
476 | } else {␊ |
477 | $output .= "msgid ".quote_text($msgid)."\n";␊ |
478 | $output .= "msgstr ".quote_text($self->{po}{$msgid}{'msgstr'})."\n";␊ |
479 | }␊ |
480 | }␊ |
481 | ␊ |
482 | print $fh $output;␊ |
483 | }␊ |
484 | print $fh join("\n\n", @{$self->{footer}}) if scalar @{$self->{footer}};␊ |
485 | ␊ |
486 | # print STDERR "$fh";␊ |
487 | # if ($filename ne '-') {␊ |
488 | # close $fh␊ |
489 | # or croak (sprintf(dgettext("po4a",␊ |
490 | # "Can't close %s after writing: %s\n"),␊ |
491 | # $filename,$!));␊ |
492 | # }␊ |
493 | }␊ |
494 | ␊ |
495 | =item write_if_needed($$)␊ |
496 | ␊ |
497 | Like write, but if the PO or POT file already exists, the object will be␊ |
498 | written in a temporary file which will be compared with the existing file␊ |
499 | to check if the update is needed (this avoids to change a POT just to␊ |
500 | update a line reference or the POT-Creation-Date field).␊ |
501 | ␊ |
502 | =cut␊ |
503 | ␊ |
504 | sub move_po_if_needed {␊ |
505 | my ($new_po, $old_po, $backup) = (shift, shift, shift);␊ |
506 | my $diff;␊ |
507 | ␊ |
508 | if (-e $old_po) {␊ |
509 | my $diff_ignore = "-I'^#:' "␊ |
510 | ."-I'^\"POT-Creation-Date:' "␊ |
511 | ."-I'^\"PO-Revision-Date:'";␊ |
512 | $diff = qx(diff -q $diff_ignore $old_po $new_po);␊ |
513 | if ( $diff eq "" ) {␊ |
514 | unlink $new_po␊ |
515 | or die wrap_msg(dgettext("po4a","Can't unlink %s: %s."),␊ |
516 | $new_po, $!);␊ |
517 | # touch the old PO␊ |
518 | my ($atime, $mtime) = (time,time);␊ |
519 | utime $atime, $mtime, $old_po;␊ |
520 | } else {␊ |
521 | move $new_po, $old_po␊ |
522 | or die wrap_msg(dgettext("po4a","Can't move %s to %s: %s."),␊ |
523 | $new_po, $old_po, $!);␊ |
524 | }␊ |
525 | } else {␊ |
526 | move $new_po, $old_po␊ |
527 | or die wrap_msg(dgettext("po4a","Can't move %s to %s: %s."),␊ |
528 | $new_po, $old_po, $!);␊ |
529 | }␊ |
530 | }␊ |
531 | ␊ |
532 | sub write_if_needed {␊ |
533 | my $self=shift;␊ |
534 | my $filename=shift␊ |
535 | or croak dgettext("po4a","Can't write to a file without filename")."\n";␊ |
536 | ␊ |
537 | if (-e $filename) {␊ |
538 | my ($tmp_filename);␊ |
539 | (undef,$tmp_filename)=File::Temp->tempfile($filename."XXXX",␊ |
540 | DIR => "/tmp",␊ |
541 | OPEN => 0,␊ |
542 | UNLINK => 0);␊ |
543 | $self->write($tmp_filename);␊ |
544 | move_po_if_needed($tmp_filename, $filename);␊ |
545 | } else {␊ |
546 | $self->write($filename);␊ |
547 | }␊ |
548 | }␊ |
549 | ␊ |
550 | =item gettextize($$)␊ |
551 | ␊ |
552 | This function produces one translated message catalog from two catalogs, an␊ |
553 | original and a translation. This process is described in L<po4a(7)|po4a.7>,␊ |
554 | section I<Gettextization: how does it work?>.␊ |
555 | ␊ |
556 | =cut␊ |
557 | ␊ |
558 | sub gettextize {␊ |
559 | my $this = shift;␊ |
560 | my $class = ref($this) || $this;␊ |
561 | my ($poorig,$potrans)=(shift,shift);␊ |
562 | ␊ |
563 | my $pores=Locale::Po4a::Po->new();␊ |
564 | ␊ |
565 | my $please_fail = 0;␊ |
566 | my $toobad = dgettext("po4a",␊ |
567 | "\nThe gettextization failed (once again). Don't give up, ".␊ |
568 | "gettextizing is a subtle art, but this is only needed once ".␊ |
569 | "to convert a project to the gorgeous luxus offered by po4a ".␊ |
570 | "to translators.".␊ |
571 | "\nPlease refer to the po4a(7) documentation, the section ".␊ |
572 | "\"HOWTO convert a pre-existing translation to po4a?\" ".␊ |
573 | "contains several hints to help you in your task");␊ |
574 | ␊ |
575 | # Don't fail right now when the entry count does not match. Instead, give␊ |
576 | # it a try so that the user can see where we fail (which is probably where␊ |
577 | # the problem is).␊ |
578 | if ($poorig->count_entries_doc() > $potrans->count_entries_doc()) {␊ |
579 | warn wrap_mod("po4a gettextize", dgettext("po4a",␊ |
580 | "Original has more strings than the translation (%d>%d). ".␊ |
581 | "Please fix it by editing the translated version to add ".␊ |
582 | "some dummy entry."),␊ |
583 | $poorig->count_entries_doc(),␊ |
584 | $potrans->count_entries_doc());␊ |
585 | $please_fail = 1;␊ |
586 | } elsif ($poorig->count_entries_doc() < $potrans->count_entries_doc()) {␊ |
587 | warn wrap_mod("po4a gettextize", dgettext("po4a",␊ |
588 | "Original has less strings than the translation (%d<%d). ".␊ |
589 | "Please fix it by removing the extra entry from the ".␊ |
590 | "translated file. You may need an addendum (cf po4a(7)) ".␊ |
591 | "to reput the chunk in place after gettextization. A ".␊ |
592 | "possible cause is that a text duplicated in the original ".␊ |
593 | "is not translated the same way each time. Remove one of ".␊ |
594 | "the translations, and you're fine."),␊ |
595 | $poorig->count_entries_doc(),␊ |
596 | $potrans->count_entries_doc());␊ |
597 | $please_fail = 1;␊ |
598 | }␊ |
599 | ␊ |
600 | if ( $poorig->get_charset =~ /^utf-8$/i ) {␊ |
601 | $potrans->to_utf8;␊ |
602 | $pores->set_charset("UTF-8");␊ |
603 | } else {␊ |
604 | if ($potrans->get_charset eq "CHARSET") {␊ |
605 | $pores->set_charset("ascii");␊ |
606 | } else {␊ |
607 | $pores->set_charset($potrans->get_charset);␊ |
608 | }␊ |
609 | }␊ |
610 | print "Po character sets:\n".␊ |
611 | " original=".$poorig->get_charset."\n".␊ |
612 | " translated=".$potrans->get_charset."\n".␊ |
613 | " result=".$pores->get_charset."\n"␊ |
614 | if $debug{'encoding'};␊ |
615 | ␊ |
616 | for (my ($o,$t)=(0,0) ;␊ |
617 | $o<$poorig->count_entries_doc() && $t<$potrans->count_entries_doc();␊ |
618 | $o++,$t++) {␊ |
619 | #␊ |
620 | # Extract some informations␊ |
621 | ␊ |
622 | my ($orig,$trans)=($poorig->msgid_doc($o),$potrans->msgid_doc($t));␊ |
623 | # print STDERR "Matches [[$orig]]<<$trans>>\n";␊ |
624 | ␊ |
625 | my ($reforig,$reftrans)=($poorig->{po}{$orig}{'reference'},␊ |
626 | $potrans->{po}{$trans}{'reference'});␊ |
627 | my ($typeorig,$typetrans)=($poorig->{po}{$orig}{'type'},␊ |
628 | $potrans->{po}{$trans}{'type'});␊ |
629 | ␊ |
630 | #␊ |
631 | # Make sure the type of both string exist␊ |
632 | #␊ |
633 | die wrap_mod("po4a gettextize",␊ |
634 | "Internal error: type of original string number %s ".␊ |
635 | "isn't provided", $o)␊ |
636 | if ($typeorig eq '');␊ |
637 | ␊ |
638 | die wrap_mod("po4a gettextize",␊ |
639 | "Internal error: type of translated string number %s ".␊ |
640 | "isn't provided", $o)␊ |
641 | if ($typetrans eq '');␊ |
642 | ␊ |
643 | #␊ |
644 | # Make sure both type are the same␊ |
645 | #␊ |
646 | if ($typeorig ne $typetrans){␊ |
647 | $pores->write("gettextization.failed.po");␊ |
648 | eval {␊ |
649 | # Recode $trans into current charset, if possible␊ |
650 | require I18N::Langinfo;␊ |
651 | I18N::Langinfo->import(qw(langinfo CODESET));␊ |
652 | my $codeset = langinfo(CODESET());␊ |
653 | Encode::from_to($trans, $potrans->get_charset, $codeset);␊ |
654 | };␊ |
655 | die wrap_msg(dgettext("po4a",␊ |
656 | "po4a gettextization: Structure disparity between ".␊ |
657 | "original and translated files:\n".␊ |
658 | "msgid (at %s) is of type '%s' while\n".␊ |
659 | "msgstr (at %s) is of type '%s'.\n".␊ |
660 | "Original text: %s\n".␊ |
661 | "Translated text: %s\n".␊ |
662 | "(result so far dumped to gettextization.failed.po)").␊ |
663 | "%s",␊ |
664 | $reforig, $typeorig,␊ |
665 | $reftrans, $typetrans,␊ |
666 | $orig,␊ |
667 | $trans,␊ |
668 | $toobad);␊ |
669 | }␊ |
670 | ␊ |
671 | #␊ |
672 | # Push the entry␊ |
673 | #␊ |
674 | my $flags;␊ |
675 | if (defined $poorig->{po}{$orig}{'flags'}) {␊ |
676 | $flags = $poorig->{po}{$orig}{'flags'}." fuzzy";␊ |
677 | } else {␊ |
678 | $flags = "fuzzy";␊ |
679 | }␊ |
680 | $pores->push_raw('msgid' => $orig,␊ |
681 | 'msgstr' => $trans,␊ |
682 | 'flags' => $flags,␊ |
683 | 'type' => $typeorig,␊ |
684 | 'reference' => $reforig,␊ |
685 | 'conflict' => 1,␊ |
686 | 'transref' => $potrans->{po}{$trans}{'reference'})␊ |
687 | unless (defined($pores->{po}{$orig})␊ |
688 | and ($pores->{po}{$orig}{'msgstr'} eq $trans))␊ |
689 | # FIXME: maybe we should be smarter about what reference should be␊ |
690 | # sent to push_raw.␊ |
691 | }␊ |
692 | ␊ |
693 | # make sure we return a useful error message when entry count differ␊ |
694 | die "$toobad\n" if $please_fail;␊ |
695 | ␊ |
696 | return $pores;␊ |
697 | }␊ |
698 | ␊ |
699 | =item filter($)␊ |
700 | ␊ |
701 | This function extracts a catalog from an existing one. Only the entries having␊ |
702 | a reference in the given file will be placed in the resulting catalog.␊ |
703 | ␊ |
704 | This function parses its argument, converts it to a Perl function definition,␊ |
705 | evals this definition and filters the fields for which this function returns␊ |
706 | true.␊ |
707 | ␊ |
708 | I love Perl sometimes ;)␊ |
709 | ␊ |
710 | =cut␊ |
711 | ␊ |
712 | sub filter {␊ |
713 | my $self=shift;␊ |
714 | our $filter=shift;␊ |
715 | ␊ |
716 | my $res;␊ |
717 | $res = Locale::Po4a::Po->new();␊ |
718 | ␊ |
719 | # Parse the filter␊ |
720 | our $code="sub apply { return ";␊ |
721 | our $pos=0;␊ |
722 | our $length = length $filter;␊ |
723 | ␊ |
724 | # explode chars to parts. How to subscript a string in Perl?␊ |
725 | our @filter = split(//,$filter);␊ |
726 | ␊ |
727 | sub gloups {␊ |
728 | my $fmt=shift;␊ |
729 | my $space = "";␊ |
730 | for (1..$pos){␊ |
731 | $space .= ' ';␊ |
732 | }␊ |
733 | die wrap_msg("$fmt\n$filter\n$space^ HERE");␊ |
734 | }␊ |
735 | sub showmethecode {␊ |
736 | return unless $debug{'filter'};␊ |
737 | my $fmt=shift;␊ |
738 | my $space="";␊ |
739 | for (1..$pos){␊ |
740 | $space .= ' ';␊ |
741 | }␊ |
742 | print STDERR "$filter\n$space^ $fmt\n";#"$code\n";␊ |
743 | }␊ |
744 | ␊ |
745 | # I dream of a lex in perl :-/␊ |
746 | sub parse_expression {␊ |
747 | showmethecode("Begin expression")␊ |
748 | if $debug{'filter'};␊ |
749 | ␊ |
750 | gloups("Begin of expression expected, got '%s'",$filter[$pos])␊ |
751 | unless ($filter[$pos] eq '(');␊ |
752 | $pos ++; # pass the '('␊ |
753 | if ($filter[$pos] eq '&') {␊ |
754 | # AND␊ |
755 | $pos++;␊ |
756 | showmethecode("Begin of AND")␊ |
757 | if $debug{'filter'};␊ |
758 | $code .= "(";␊ |
759 | while (1) {␊ |
760 | gloups ("Unfinished AND statement.")␊ |
761 | if ($pos == $length);␊ |
762 | parse_expression();␊ |
763 | if ($filter[$pos] eq '(') {␊ |
764 | $code .= " && ";␊ |
765 | } elsif ($filter[$pos] eq ')') {␊ |
766 | last; # do not eat that char␊ |
767 | } else {␊ |
768 | gloups("End of AND or begin of sub-expression expected, got '%s'", $filter[$pos]);␊ |
769 | }␊ |
770 | }␊ |
771 | $code .= ")";␊ |
772 | } elsif ($filter[$pos] eq '|') {␊ |
773 | # OR␊ |
774 | $pos++;␊ |
775 | $code .= "(";␊ |
776 | while (1) {␊ |
777 | gloups("Unfinished OR statement.")␊ |
778 | if ($pos == $length);␊ |
779 | parse_expression();␊ |
780 | if ($filter[$pos] eq '(') {␊ |
781 | $code .= " || ";␊ |
782 | } elsif ($filter[$pos] eq ')') {␊ |
783 | last; # do not eat that char␊ |
784 | } else {␊ |
785 | gloups("End of OR or begin of sub-expression expected, got '%s'",$filter[$pos]);␊ |
786 | }␊ |
787 | }␊ |
788 | $code .= ")";␊ |
789 | } elsif ($filter[$pos] eq '!') {␊ |
790 | # NOT␊ |
791 | $pos++;␊ |
792 | $code .= "(!";␊ |
793 | gloups("Missing sub-expression in NOT statement.")␊ |
794 | if ($pos == $length);␊ |
795 | parse_expression();␊ |
796 | $code .= ")";␊ |
797 | } else {␊ |
798 | # must be an equal. Let's get field and argument␊ |
799 | my ($field,$arg,$done);␊ |
800 | $field = substr($filter,$pos);␊ |
801 | gloups("EQ statement contains no '=' or invalid field name")␊ |
802 | unless ($field =~ /([a-z]*)=/i);␊ |
803 | $field = lc($1);␊ |
804 | $pos += (length $field) + 1;␊ |
805 | ␊ |
806 | # check that we've got a valid field name,␊ |
807 | # and the number it referes to␊ |
808 | # DO NOT CHANGE THE ORDER␊ |
809 | my @names=qw(msgid msgstr reference flags comment previous automatic);␊ |
810 | my $fieldpos;␊ |
811 | for ($fieldpos = 0;␊ |
812 | $fieldpos < scalar @names && $field ne $names[$fieldpos];␊ |
813 | $fieldpos++) {}␊ |
814 | gloups("Invalid field name: %s",$field)␊ |
815 | if $fieldpos == scalar @names; # not found␊ |
816 | ␊ |
817 | # Now, get the argument value. It has to be between quotes,␊ |
818 | # which can be escaped␊ |
819 | # We point right on the first char of the argument␊ |
820 | # (first quote already eaten)␊ |
821 | my $escaped = 0;␊ |
822 | my $quoted = 0;␊ |
823 | if ($filter[$pos] eq '"') {␊ |
824 | $pos++;␊ |
825 | $quoted = 1;␊ |
826 | }␊ |
827 | showmethecode(($quoted?"Quoted":"Unquoted")." argument of field '$field'")␊ |
828 | if $debug{'filter'};␊ |
829 | ␊ |
830 | while (!$done) {␊ |
831 | gloups("Unfinished EQ argument.")␊ |
832 | if ($pos == $length);␊ |
833 | ␊ |
834 | if ($quoted) {␊ |
835 | if ($filter[$pos] eq '\\') {␊ |
836 | if ($escaped) {␊ |
837 | $arg .= '\\';␊ |
838 | $escaped = 0;␊ |
839 | } else {␊ |
840 | $escaped = 1;␊ |
841 | }␊ |
842 | } elsif ($escaped) {␊ |
843 | if ($filter[$pos] eq '"') {␊ |
844 | $arg .= '"';␊ |
845 | $escaped = 0;␊ |
846 | } else {␊ |
847 | gloups("Invalid escape sequence in argument: '\\%s'",$filter[$pos]);␊ |
848 | }␊ |
849 | } else {␊ |
850 | if ($filter[$pos] eq '"') {␊ |
851 | $done = 1;␊ |
852 | } else {␊ |
853 | $arg .= $filter[$pos];␊ |
854 | }␊ |
855 | }␊ |
856 | } else {␊ |
857 | if ($filter[$pos] eq ')') {␊ |
858 | # counter the next ++ since we don't want to eat␊ |
859 | # this char␊ |
860 | $pos--;␊ |
861 | $done = 1;␊ |
862 | } else {␊ |
863 | $arg .= $filter[$pos];␊ |
864 | }␊ |
865 | }␊ |
866 | $pos++;␊ |
867 | }␊ |
868 | # and now, add the code to check this equality␊ |
869 | $code .= "(\$_[$fieldpos] =~ m{$arg})";␊ |
870 | ␊ |
871 | }␊ |
872 | showmethecode("End of expression")␊ |
873 | if $debug{'filter'};␊ |
874 | gloups("Unfinished statement.")␊ |
875 | if ($pos == $length);␊ |
876 | gloups("End of expression expected, got '%s'",$filter[$pos])␊ |
877 | unless ($filter[$pos] eq ')');␊ |
878 | $pos++;␊ |
879 | }␊ |
880 | # And now, launch the beast, finish the function and use eval␊ |
881 | # to construct this function.␊ |
882 | # Ok, the lack of lexer is a fair price for the eval ;)␊ |
883 | parse_expression();␊ |
884 | gloups("Garbage at the end of the expression")␊ |
885 | if ($pos != $length);␊ |
886 | $code .= "; }";␊ |
887 | print STDERR "CODE = $code\n"␊ |
888 | if $debug{'filter'};␊ |
889 | eval $code;␊ |
890 | die wrap_mod("po4a::po", dgettext("po4a", "Eval failure: %s"), $@)␊ |
891 | if $@;␊ |
892 | ␊ |
893 | for (my $cpt=(0) ;␊ |
894 | $cpt<$self->count_entries();␊ |
895 | $cpt++) {␊ |
896 | ␊ |
897 | my ($msgid,$ref,$msgstr,$flags,$type,$comment,$previous,$automatic);␊ |
898 | ␊ |
899 | $msgid = $self->msgid($cpt);␊ |
900 | $ref=$self->{po}{$msgid}{'reference'};␊ |
901 | ␊ |
902 | $msgstr= $self->{po}{$msgid}{'msgstr'};␊ |
903 | $flags = $self->{po}{$msgid}{'flags'};␊ |
904 | $type = $self->{po}{$msgid}{'type'};␊ |
905 | $comment = $self->{po}{$msgid}{'comment'};␊ |
906 | $previous = $self->{po}{$msgid}{'previous'};␊ |
907 | $automatic = $self->{po}{$msgid}{'automatic'};␊ |
908 | ␊ |
909 | # DO NOT CHANGE THE ORDER␊ |
910 | $res->push_raw('msgid' => $msgid,␊ |
911 | 'msgstr' => $msgstr,␊ |
912 | 'flags' => $flags,␊ |
913 | 'type' => $type,␊ |
914 | 'reference' => $ref,␊ |
915 | 'comment' => $comment,␊ |
916 | 'previous' => $previous,␊ |
917 | 'automatic' => $automatic)␊ |
918 | if (apply($msgid,$msgstr,$ref,$flags,$comment,$previous,$automatic));␊ |
919 | }␊ |
920 | # delete the apply subroutine␊ |
921 | # otherwise it will be redefined.␊ |
922 | undef &apply;␊ |
923 | return $res;␊ |
924 | }␊ |
925 | ␊ |
926 | =item to_utf8()␊ |
927 | ␊ |
928 | Recodes to UTF-8 the PO's msgstrs. Does nothing if the charset is not␊ |
929 | specified in the PO file ("CHARSET" value), or if it's already UTF-8 or␊ |
930 | ASCII.␊ |
931 | ␊ |
932 | =cut␊ |
933 | ␊ |
934 | sub to_utf8 {␊ |
935 | my $this = shift;␊ |
936 | my $charset = $this->get_charset();␊ |
937 | ␊ |
938 | unless ($charset eq "CHARSET" or␊ |
939 | $charset =~ /^ascii$/i or␊ |
940 | $charset =~ /^utf-8$/i) {␊ |
941 | foreach my $msgid ( keys %{$this->{po}} ) {␊ |
942 | Encode::from_to($this->{po}{$msgid}{'msgstr'}, $charset, "utf-8");␊ |
943 | }␊ |
944 | $this->set_charset("UTF-8");␊ |
945 | }␊ |
946 | }␊ |
947 | ␊ |
948 | =back␊ |
949 | ␊ |
950 | =head1 Functions to use a message catalog for translations␊ |
951 | ␊ |
952 | =over 4␊ |
953 | ␊ |
954 | =item gettext($%)␊ |
955 | ␊ |
956 | Request the translation of the string given as argument in the current catalog.␊ |
957 | The function returns the original (untranslated) string if the string was not␊ |
958 | found.␊ |
959 | ␊ |
960 | After the string to translate, you can pass a hash of extra␊ |
961 | arguments. Here are the valid entries:␊ |
962 | ␊ |
963 | =over␊ |
964 | ␊ |
965 | =item B<wrap>␊ |
966 | ␊ |
967 | boolean indicating whether we can consider that whitespaces in string are␊ |
968 | not important. If yes, the function canonizes the string before looking for␊ |
969 | a translation, and wraps the result.␊ |
970 | ␊ |
971 | =item B<wrapcol>␊ |
972 | ␊ |
973 | the column at which we should wrap (default: 76).␊ |
974 | ␊ |
975 | =back␊ |
976 | ␊ |
977 | =cut␊ |
978 | ␊ |
979 | sub gettext {␊ |
980 | my $self=shift;␊ |
981 | my $text=shift;␊ |
982 | my (%opt)=@_;␊ |
983 | my $res;␊ |
984 | ␊ |
985 | return "" unless defined($text) && length($text); # Avoid returning the header.␊ |
986 | my $validoption="reference wrap wrapcol";␊ |
987 | my %validoption;␊ |
988 | ␊ |
989 | map { $validoption{$_}=1 } (split(/ /,$validoption));␊ |
990 | foreach (keys %opt) {␊ |
991 | Carp::confess "internal error: unknown arg $_.\n".␊ |
992 | "Here are the valid options: $validoption.\n"␊ |
993 | unless $validoption{$_};␊ |
994 | }␊ |
995 | ␊ |
996 | $text=canonize($text)␊ |
997 | if ($opt{'wrap'});␊ |
998 | ␊ |
999 | my $esc_text=escape_text($text);␊ |
1000 | ␊ |
1001 | $self->{gettextqueries}++;␊ |
1002 | ␊ |
1003 | if ( defined $self->{po}{$esc_text}␊ |
1004 | and defined $self->{po}{$esc_text}{'msgstr'}␊ |
1005 | and length $self->{po}{$esc_text}{'msgstr'}␊ |
1006 | and ( not defined $self->{po}{$esc_text}{'flags'}␊ |
1007 | or $self->{po}{$esc_text}{'flags'} !~ /fuzzy/)) {␊ |
1008 | ␊ |
1009 | $self->{gettexthits}++;␊ |
1010 | $res = unescape_text($self->{po}{$esc_text}{'msgstr'});␊ |
1011 | if (defined $self->{po}{$esc_text}{'plural'}) {␊ |
1012 | if ($self->{po}{$esc_text}{'plural'} eq "0") {␊ |
1013 | warn wrap_mod("po4a gettextize", dgettext("po4a",␊ |
1014 | "'%s' is the singular form of a message, ".␊ |
1015 | "po4a will use the msgstr[0] translation (%s)."),␊ |
1016 | $esc_text, $res);␊ |
1017 | } else {␊ |
1018 | warn wrap_mod("po4a gettextize", dgettext("po4a",␊ |
1019 | "'%s' is the plural form of a message, ".␊ |
1020 | "po4a will use the msgstr[1] translation (%s)."),␊ |
1021 | $esc_text, $res);␊ |
1022 | }␊ |
1023 | }␊ |
1024 | } else {␊ |
1025 | $res = $text;␊ |
1026 | }␊ |
1027 | ␊ |
1028 | if ($opt{'wrap'}) {␊ |
1029 | if ($self->get_charset =~ /^utf-8$/i) {␊ |
1030 | $res=Encode::decode_utf8($res);␊ |
1031 | $res=wrap ($res, $opt{'wrapcol'} || 76);␊ |
1032 | $res=Encode::encode_utf8($res);␊ |
1033 | } else {␊ |
1034 | $res=wrap ($res, $opt{'wrapcol'} || 76);␊ |
1035 | }␊ |
1036 | }␊ |
1037 | # print STDERR "Gettext >>>$text<<<(escaped=$esc_text)=[[[$res]]]\n\n";␊ |
1038 | return $res;␊ |
1039 | }␊ |
1040 | ␊ |
1041 | =item stats_get()␊ |
1042 | ␊ |
1043 | Returns statistics about the hit ratio of gettext since the last time that␊ |
1044 | stats_clear() was called. Please note that it's not the same␊ |
1045 | statistics than the one printed by msgfmt --statistic. Here, it's statistics␊ |
1046 | about recent usage of the PO file, while msgfmt reports the status of the␊ |
1047 | file. Example of use:␊ |
1048 | ␊ |
1049 | [some use of the PO file to translate stuff]␊ |
1050 | ␊ |
1051 | ($percent,$hit,$queries) = $pofile->stats_get();␊ |
1052 | print "So far, we found translations for $percent\% ($hit of $queries) of strings.\n";␊ |
1053 | ␊ |
1054 | =cut␊ |
1055 | ␊ |
1056 | sub stats_get() {␊ |
1057 | my $self=shift;␊ |
1058 | my ($h,$q)=($self->{gettexthits},$self->{gettextqueries});␊ |
1059 | my $p = ($q == 0 ? 100 : int($h/$q*10000)/100);␊ |
1060 | ␊ |
1061 | # $p =~ s/\.00//;␊ |
1062 | # $p =~ s/(\..)0/$1/;␊ |
1063 | ␊ |
1064 | return ( $p,$h,$q );␊ |
1065 | }␊ |
1066 | ␊ |
1067 | =item stats_clear()␊ |
1068 | ␊ |
1069 | Clears the statistics about gettext hits.␊ |
1070 | ␊ |
1071 | =cut␊ |
1072 | ␊ |
1073 | sub stats_clear {␊ |
1074 | my $self = shift;␊ |
1075 | $self->{gettextqueries} = 0;␊ |
1076 | $self->{gettexthits} = 0;␊ |
1077 | }␊ |
1078 | ␊ |
1079 | =back␊ |
1080 | ␊ |
1081 | =head1 Functions to build a message catalog␊ |
1082 | ␊ |
1083 | =over 4␊ |
1084 | ␊ |
1085 | =item push(%)␊ |
1086 | ␊ |
1087 | Push a new entry at the end of the current catalog. The arguments should␊ |
1088 | form a hash table. The valid keys are:␊ |
1089 | ␊ |
1090 | =over 4␊ |
1091 | ␊ |
1092 | =item B<msgid>␊ |
1093 | ␊ |
1094 | the string in original language.␊ |
1095 | ␊ |
1096 | =item B<msgstr>␊ |
1097 | ␊ |
1098 | the translation.␊ |
1099 | ␊ |
1100 | =item B<reference>␊ |
1101 | ␊ |
1102 | an indication of where this string was found. Example: file.c:46 (meaning␊ |
1103 | in 'file.c' at line 46). It can be a space-separated list in case of␊ |
1104 | multiple occurrences.␊ |
1105 | ␊ |
1106 | =item B<comment>␊ |
1107 | ␊ |
1108 | a comment added here manually (by the translators). The format here is free.␊ |
1109 | ␊ |
1110 | =item B<automatic>␊ |
1111 | ␊ |
1112 | a comment which was automatically added by the string extraction␊ |
1113 | program. See the B<--add-comments> option of the B<xgettext> program for␊ |
1114 | more information.␊ |
1115 | ␊ |
1116 | =item B<flags>␊ |
1117 | ␊ |
1118 | space-separated list of all defined flags for this entry.␊ |
1119 | ␊ |
1120 | Valid flags are: B<c-text>, B<python-text>, B<lisp-text>, B<elisp-text>, B<librep-text>,␊ |
1121 | B<smalltalk-text>, B<java-text>, B<awk-text>, B<object-pascal-text>, B<ycp-text>,␊ |
1122 | B<tcl-text>, B<wrap>, B<no-wrap> and B<fuzzy>.␊ |
1123 | ␊ |
1124 | See the gettext documentation for their meaning.␊ |
1125 | ␊ |
1126 | =item B<type>␊ |
1127 | ␊ |
1128 | this is mostly an internal argument: it is used while gettextizing␊ |
1129 | documents. The idea here is to parse both the original and the translation␊ |
1130 | into a PO object, and merge them, using one's msgid as msgid and the␊ |
1131 | other's msgid as msgstr. To make sure that things get ok, each msgid in PO␊ |
1132 | objects are given a type, based on their structure (like "chapt", "sect1",␊ |
1133 | "p" and so on in DocBook). If the types of strings are not the same, that␊ |
1134 | means that both files do not share the same structure, and the process␊ |
1135 | reports an error.␊ |
1136 | ␊ |
1137 | This information is written as automatic comment in the PO file since this␊ |
1138 | gives to translators some context about the strings to translate.␊ |
1139 | ␊ |
1140 | =item B<wrap>␊ |
1141 | ␊ |
1142 | boolean indicating whether whitespaces can be mangled in cosmetic␊ |
1143 | reformattings. If true, the string is canonized before use.␊ |
1144 | ␊ |
1145 | This information is written to the PO file using the B<wrap> or B<no-wrap> flag.␊ |
1146 | ␊ |
1147 | =item B<wrapcol>␊ |
1148 | ␊ |
1149 | the column at which we should wrap (default: 76).␊ |
1150 | ␊ |
1151 | This information is not written to the PO file.␊ |
1152 | ␊ |
1153 | =back␊ |
1154 | ␊ |
1155 | =cut␊ |
1156 | ␊ |
1157 | sub push {␊ |
1158 | my $self=shift;␊ |
1159 | my %entry=@_;␊ |
1160 | ␊ |
1161 | my $validoption="wrap wrapcol type msgid msgstr automatic previous flags reference";␊ |
1162 | my %validoption;␊ |
1163 | ␊ |
1164 | map { $validoption{$_}=1 } (split(/ /,$validoption));␊ |
1165 | foreach (keys %entry) {␊ |
1166 | Carp::confess "internal error: unknown arg $_.\n".␊ |
1167 | "Here are the valid options: $validoption.\n"␊ |
1168 | unless $validoption{$_};␊ |
1169 | }␊ |
1170 | ␊ |
1171 | unless ($entry{'wrap'}) {␊ |
1172 | $entry{'flags'} .= " no-wrap";␊ |
1173 | }␊ |
1174 | if (defined ($entry{'msgid'})) {␊ |
1175 | $entry{'msgid'} = canonize($entry{'msgid'})␊ |
1176 | if ($entry{'wrap'});␊ |
1177 | ␊ |
1178 | $entry{'msgid'} = escape_text($entry{'msgid'});␊ |
1179 | }␊ |
1180 | if (defined ($entry{'msgstr'})) {␊ |
1181 | $entry{'msgstr'} = canonize($entry{'msgstr'})␊ |
1182 | if ($entry{'wrap'});␊ |
1183 | ␊ |
1184 | $entry{'msgstr'} = escape_text($entry{'msgstr'});␊ |
1185 | }␊ |
1186 | ␊ |
1187 | $self->push_raw(%entry);␊ |
1188 | }␊ |
1189 | ␊ |
1190 | # The same as push(), but assuming that msgid and msgstr are already escaped␊ |
1191 | sub push_raw {␊ |
1192 | my $self=shift;␊ |
1193 | my %entry=@_;␊ |
1194 | my ($msgid,$msgstr,$reference,$comment,$automatic,$previous,$flags,$type,$transref)=␊ |
1195 | ($entry{'msgid'},$entry{'msgstr'},␊ |
1196 | $entry{'reference'},$entry{'comment'},$entry{'automatic'},␊ |
1197 | $entry{'previous'},$entry{'flags'},$entry{'type'},$entry{'transref'});␊ |
1198 | my $keep_conflict = $entry{'conflict'};␊ |
1199 | ␊ |
1200 | # print STDERR "Push_raw\n";␊ |
1201 | # print STDERR " msgid=>>>$msgid<<<\n" if $msgid;␊ |
1202 | # print STDERR " msgstr=[[[$msgstr]]]\n" if $msgstr;␊ |
1203 | # Carp::cluck " flags=$flags\n" if $flags;␊ |
1204 | ␊ |
1205 | return unless defined($entry{'msgid'});␊ |
1206 | ␊ |
1207 | #no msgid => header definition␊ |
1208 | unless (length($entry{'msgid'})) {␊ |
1209 | # if (defined($self->{header}) && $self->{header} =~ /\S/) {␊ |
1210 | # warn dgettext("po4a","Redefinition of the header. ".␊ |
1211 | # "The old one will be discarded\n");␊ |
1212 | # } FIXME: do that iff the header isn't the default one.␊ |
1213 | $self->{header}=$msgstr;␊ |
1214 | $self->{header_comment}=$comment;␊ |
1215 | my $charset = $self->get_charset;␊ |
1216 | if ($charset ne "CHARSET") {␊ |
1217 | $self->{encoder}=find_encoding($charset);␊ |
1218 | } else {␊ |
1219 | $self->{encoder}=find_encoding("ascii");␊ |
1220 | }␊ |
1221 | return;␊ |
1222 | }␊ |
1223 | ␊ |
1224 | if ($self->{options}{'porefs'} eq "none") {␊ |
1225 | $reference = "";␊ |
1226 | } elsif ($self->{options}{'porefs'} eq "noline") {␊ |
1227 | $reference =~ s/:[0-9]*/:1/g;␊ |
1228 | }␊ |
1229 | ␊ |
1230 | if (defined($self->{po}{$msgid})) {␊ |
1231 | warn wrap_mod("po4a::po",␊ |
1232 | dgettext("po4a","msgid defined twice: %s"),␊ |
1233 | $msgid)␊ |
1234 | if (0); # FIXME: put a verbose stuff␊ |
1235 | if ( defined $msgstr␊ |
1236 | and defined $self->{po}{$msgid}{'msgstr'}␊ |
1237 | and $self->{po}{$msgid}{'msgstr'} ne $msgstr) {␊ |
1238 | my $txt=quote_text($msgid);␊ |
1239 | my ($first,$second)=␊ |
1240 | (format_comment(". ",$self->{po}{$msgid}{'reference'}).␊ |
1241 | quote_text($self->{po}{$msgid}{'msgstr'}),␊ |
1242 | ␊ |
1243 | format_comment(". ",$reference).␊ |
1244 | quote_text($msgstr));␊ |
1245 | ␊ |
1246 | if ($keep_conflict) {␊ |
1247 | if ($self->{po}{$msgid}{'msgstr'} =~ m/^#-#-#-#-# .* #-#-#-#-#\\n/s) {␊ |
1248 | $msgstr = $self->{po}{$msgid}{'msgstr'}.␊ |
1249 | "\\n#-#-#-#-# $transref #-#-#-#-#\\n".␊ |
1250 | $msgstr;␊ |
1251 | } else {␊ |
1252 | $msgstr = "#-#-#-#-# ".␊ |
1253 | $self->{po}{$msgid}{'transref'}.␊ |
1254 | " #-#-#-#-#\\n".␊ |
1255 | $self->{po}{$msgid}{'msgstr'}."\\n".␊ |
1256 | "#-#-#-#-# $transref #-#-#-#-#\\n".␊ |
1257 | $msgstr;␊ |
1258 | }␊ |
1259 | # Every msgid will have the same list of references.␊ |
1260 | # Only keep the last list.␊ |
1261 | $self->{po}{$msgid}{'reference'} = "";␊ |
1262 | } else {␊ |
1263 | warn wrap_msg(dgettext("po4a",␊ |
1264 | "Translations don't match for:\n".␊ |
1265 | "%s\n".␊ |
1266 | "-->First translation:\n".␊ |
1267 | "%s\n".␊ |
1268 | " Second translation:\n".␊ |
1269 | "%s\n".␊ |
1270 | " Old translation discarded."),␊ |
1271 | $txt,$first,$second);␊ |
1272 | }␊ |
1273 | }␊ |
1274 | }␊ |
1275 | if (defined $transref) {␊ |
1276 | $self->{po}{$msgid}{'transref'} = $transref;␊ |
1277 | }␊ |
1278 | if (defined $reference) {␊ |
1279 | if (defined $self->{po}{$msgid}{'reference'}) {␊ |
1280 | $self->{po}{$msgid}{'reference'} .= " ".$reference;␊ |
1281 | } else {␊ |
1282 | $self->{po}{$msgid}{'reference'} = $reference;␊ |
1283 | }␊ |
1284 | }␊ |
1285 | $self->{po}{$msgid}{'msgstr'} = $msgstr;␊ |
1286 | $self->{po}{$msgid}{'comment'} = $comment;␊ |
1287 | $self->{po}{$msgid}{'automatic'} = $automatic;␊ |
1288 | $self->{po}{$msgid}{'previous'} = $previous;␊ |
1289 | if (defined($self->{po}{$msgid}{'pos_doc'})) {␊ |
1290 | $self->{po}{$msgid}{'pos_doc'} .= " ".$self->{count_doc}++;␊ |
1291 | } else {␊ |
1292 | $self->{po}{$msgid}{'pos_doc'} = $self->{count_doc}++;␊ |
1293 | }␊ |
1294 | unless (defined($self->{po}{$msgid}{'pos'})) {␊ |
1295 | $self->{po}{$msgid}{'pos'} = $self->{count}++;␊ |
1296 | }␊ |
1297 | $self->{po}{$msgid}{'type'} = $type;␊ |
1298 | $self->{po}{$msgid}{'plural'} = $entry{'plural'}␊ |
1299 | if defined $entry{'plural'};␊ |
1300 | ␊ |
1301 | if (defined($flags)) {␊ |
1302 | $flags = " $flags ";␊ |
1303 | $flags =~ s/,/ /g;␊ |
1304 | foreach my $flag (@known_flags) {␊ |
1305 | if ($flags =~ /\s$flag\s/) { # if flag to be set␊ |
1306 | unless ( defined($self->{po}{$msgid}{'flags'})␊ |
1307 | && $self->{po}{$msgid}{'flags'} =~ /\b$flag\b/) {␊ |
1308 | # flag not already set␊ |
1309 | if (defined $self->{po}{$msgid}{'flags'}) {␊ |
1310 | $self->{po}{$msgid}{'flags'} .= " ".$flag;␊ |
1311 | } else {␊ |
1312 | $self->{po}{$msgid}{'flags'} = $flag;␊ |
1313 | }␊ |
1314 | }␊ |
1315 | }␊ |
1316 | }␊ |
1317 | }␊ |
1318 | # print STDERR "stored ((($msgid)))=>(((".$self->{po}{$msgid}{'msgstr'}.")))\n\n";␊ |
1319 | ␊ |
1320 | }␊ |
1321 | ␊ |
1322 | =back␊ |
1323 | ␊ |
1324 | =head1 Miscellaneous functions␊ |
1325 | ␊ |
1326 | =over 4␊ |
1327 | ␊ |
1328 | =item count_entries()␊ |
1329 | ␊ |
1330 | Returns the number of entries in the catalog (without the header).␊ |
1331 | ␊ |
1332 | =cut␊ |
1333 | ␊ |
1334 | sub count_entries($) {␊ |
1335 | my $self=shift;␊ |
1336 | return $self->{count};␊ |
1337 | }␊ |
1338 | ␊ |
1339 | =item count_entries_doc()␊ |
1340 | ␊ |
1341 | Returns the number of entries in document. If a string appears multiple times␊ |
1342 | in the document, it will be counted multiple times␊ |
1343 | ␊ |
1344 | =cut␊ |
1345 | ␊ |
1346 | sub count_entries_doc($) {␊ |
1347 | my $self=shift;␊ |
1348 | return $self->{count_doc};␊ |
1349 | }␊ |
1350 | ␊ |
1351 | =item msgid($)␊ |
1352 | ␊ |
1353 | Returns the msgid of the given number.␊ |
1354 | ␊ |
1355 | =cut␊ |
1356 | ␊ |
1357 | sub msgid($$) {␊ |
1358 | my $self=shift;␊ |
1359 | my $num=shift;␊ |
1360 | ␊ |
1361 | foreach my $msgid ( keys %{$self->{po}} ) {␊ |
1362 | return $msgid if ($self->{po}{$msgid}{'pos'} eq $num);␊ |
1363 | }␊ |
1364 | return undef;␊ |
1365 | }␊ |
1366 | ␊ |
1367 | =item msgid_doc($)␊ |
1368 | ␊ |
1369 | Returns the msgid with the given position in the document.␊ |
1370 | ␊ |
1371 | =cut␊ |
1372 | ␊ |
1373 | sub msgid_doc($$) {␊ |
1374 | my $self=shift;␊ |
1375 | my $num=shift;␊ |
1376 | ␊ |
1377 | foreach my $msgid ( keys %{$self->{po}} ) {␊ |
1378 | foreach my $pos (split / /, $self->{po}{$msgid}{'pos_doc'}) {␊ |
1379 | return $msgid if ($pos eq $num);␊ |
1380 | }␊ |
1381 | }␊ |
1382 | return undef;␊ |
1383 | }␊ |
1384 | ␊ |
1385 | =item get_charset()␊ |
1386 | ␊ |
1387 | Returns the character set specified in the PO header. If it hasn't been␊ |
1388 | set, it will return "CHARSET".␊ |
1389 | ␊ |
1390 | =cut␊ |
1391 | ␊ |
1392 | sub get_charset() {␊ |
1393 | my $self=shift;␊ |
1394 | ␊ |
1395 | $self->{header} =~ /charset=(.*?)[\s\\]/;␊ |
1396 | ␊ |
1397 | if (defined $1) {␊ |
1398 | return $1;␊ |
1399 | } else {␊ |
1400 | return "CHARSET";␊ |
1401 | }␊ |
1402 | }␊ |
1403 | ␊ |
1404 | =item set_charset($)␊ |
1405 | ␊ |
1406 | This sets the character set of the PO header to the value specified in its␊ |
1407 | first argument. If you never call this function (and no file with a specified␊ |
1408 | character set is read), the default value is left to "CHARSET". This value␊ |
1409 | doesn't change the behavior of this module, it's just used to fill that field␊ |
1410 | in the header, and to return it in get_charset().␊ |
1411 | ␊ |
1412 | =cut␊ |
1413 | ␊ |
1414 | sub set_charset() {␊ |
1415 | my $self=shift;␊ |
1416 | ␊ |
1417 | my ($newchar,$oldchar);␊ |
1418 | $newchar = shift;␊ |
1419 | $oldchar = $self->get_charset();␊ |
1420 | ␊ |
1421 | $self->{header} =~ s/$oldchar/$newchar/;␊ |
1422 | $self->{encoder}=find_encoding($newchar);␊ |
1423 | }␊ |
1424 | ␊ |
1425 | #----[ helper functions ]---------------------------------------------------␊ |
1426 | ␊ |
1427 | # transforme the string from its PO file representation to the form which␊ |
1428 | # should be used to print it␊ |
1429 | sub unescape_text {␊ |
1430 | my $text = shift;␊ |
1431 | ␊ |
1432 | print STDERR "\nunescape [$text]====" if $debug{'escape'};␊ |
1433 | $text = join("",split(/\n/,$text));␊ |
1434 | $text =~ s/\\"/"/g;␊ |
1435 | # unescape newlines␊ |
1436 | # NOTE on \G:␊ |
1437 | # The following regular expression introduce newlines.␊ |
1438 | # Thus, ^ doesn't match all beginnings of lines.␊ |
1439 | # \G is a zero-width assertion that matches the position␊ |
1440 | # of the previous substitution with s///g. As every␊ |
1441 | # substitution ends by a newline, it always matches a␊ |
1442 | # position just after a newline.␊ |
1443 | $text =~ s/( # $1:␊ |
1444 | (\G|[^\\]) # beginning of the line or any char␊ |
1445 | # different from '\'␊ |
1446 | (\\\\)* # followed by any even number of '\'␊ |
1447 | )\\n # and followed by an escaped newline␊ |
1448 | /$1\n/sgx; # single string, match globally, allow comments␊ |
1449 | # unescape carriage returns␊ |
1450 | $text =~ s/( # $1:␊ |
1451 | (\G|[^\\]) # beginning of the line or any char␊ |
1452 | # different from '\'␊ |
1453 | (\\\\)* # followed by any even number of '\'␊ |
1454 | )\\r # and followed by an escaped carriage return␊ |
1455 | /$1\r/sgx; # single string, match globally, allow comments␊ |
1456 | # unescape tabulations␊ |
1457 | $text =~ s/( # $1:␊ |
1458 | (\G|[^\\])# beginning of the line or any char␊ |
1459 | # different from '\'␊ |
1460 | (\\\\)* # followed by any even number of '\'␊ |
1461 | )\\t # and followed by an escaped tabulation␊ |
1462 | /$1\t/mgx; # multilines string, match globally, allow comments␊ |
1463 | # and unescape the escape character␊ |
1464 | $text =~ s/\\\\/\\/g;␊ |
1465 | print STDERR ">$text<\n" if $debug{'escape'};␊ |
1466 | ␊ |
1467 | return $text;␊ |
1468 | }␊ |
1469 | ␊ |
1470 | # transform the string to its representation as it should be written in PO␊ |
1471 | # files␊ |
1472 | sub escape_text {␊ |
1473 | my $text = shift;␊ |
1474 | ␊ |
1475 | print STDERR "\nescape [$text]====" if $debug{'escape'};␊ |
1476 | $text =~ s/\\/\\\\/g;␊ |
1477 | $text =~ s/"/\\"/g;␊ |
1478 | $text =~ s/\n/\\n/g;␊ |
1479 | $text =~ s/\r/\\r/g;␊ |
1480 | $text =~ s/\t/\\t/g;␊ |
1481 | print STDERR ">$text<\n" if $debug{'escape'};␊ |
1482 | ␊ |
1483 | return $text;␊ |
1484 | }␊ |
1485 | ␊ |
1486 | # put quotes around the string on each lines (without escaping it)␊ |
1487 | # It does also normalize the text (ie, make sure its representation is wraped␊ |
1488 | # on the 80th char, but without changing the meaning of the string)␊ |
1489 | sub quote_text {␊ |
1490 | my $string = shift;␊ |
1491 | ␊ |
1492 | return '""' unless defined($string) && length($string);␊ |
1493 | ␊ |
1494 | print STDERR "\nquote [$string]====" if $debug{'quote'};␊ |
1495 | # break lines on newlines, if any␊ |
1496 | # see unescape_text for an explanation on \G␊ |
1497 | $string =~ s/( # $1:␊ |
1498 | (\G|[^\\]) # beginning of the line or any char␊ |
1499 | # different from '\'␊ |
1500 | (\\\\)* # followed by any even number of '\'␊ |
1501 | \\n) # and followed by an escaped newline␊ |
1502 | /$1\n/sgx; # single string, match globally, allow comments␊ |
1503 | $string = wrap($string);␊ |
1504 | my @string = split(/\n/,$string);␊ |
1505 | $string = join ("\"\n\"",@string);␊ |
1506 | $string = "\"$string\"";␊ |
1507 | if (scalar @string > 1 && $string[0] ne '') {␊ |
1508 | $string = "\"\"\n".$string;␊ |
1509 | }␊ |
1510 | ␊ |
1511 | print STDERR ">$string<\n" if $debug{'quote'};␊ |
1512 | return $string;␊ |
1513 | }␊ |
1514 | ␊ |
1515 | # undo the work of the quote_text function␊ |
1516 | sub unquote_text {␊ |
1517 | my $string = shift;␊ |
1518 | print STDERR "\nunquote [$string]====" if $debug{'quote'};␊ |
1519 | $string =~ s/^""\\n//s;␊ |
1520 | $string =~ s/^"(.*)"$/$1/s;␊ |
1521 | $string =~ s/"\n"//gm;␊ |
1522 | # Note: an even number of '\' could precede \\n, but I could not build a␊ |
1523 | # document to test this␊ |
1524 | $string =~ s/([^\\])\\n\n/$1!!DUMMYPOPM!!/gm;␊ |
1525 | $string =~ s|!!DUMMYPOPM!!|\\n|gm;␊ |
1526 | print STDERR ">$string<\n" if $debug{'quote'};␊ |
1527 | return $string;␊ |
1528 | }␊ |
1529 | ␊ |
1530 | # canonize the string: write it on only one line, changing consecutive␊ |
1531 | # whitespace to only one space.␊ |
1532 | # Warning, it changes the string and should only be called if the string is␊ |
1533 | # plain text␊ |
1534 | sub canonize {␊ |
1535 | my $text=shift;␊ |
1536 | print STDERR "\ncanonize [$text]====" if $debug{'canonize'};␊ |
1537 | $text =~ s/^ *//s;␊ |
1538 | $text =~ s/^[ \t]+/ /gm;␊ |
1539 | # if ($text eq "\n"), it messed up the first string (header)␊ |
1540 | $text =~ s/\n/ /gm if ($text ne "\n");␊ |
1541 | $text =~ s/([.)]) +/$1 /gm;␊ |
1542 | $text =~ s/([^.)]) */$1 /gm;␊ |
1543 | $text =~ s/ *$//s;␊ |
1544 | print STDERR ">$text<\n" if $debug{'canonize'};␊ |
1545 | return $text;␊ |
1546 | }␊ |
1547 | ␊ |
1548 | # wraps the string. We don't use Text::Wrap since it mangles whitespace at␊ |
1549 | # the end of splited line␊ |
1550 | sub wrap {␊ |
1551 | my $text=shift;␊ |
1552 | return "0" if ($text eq '0');␊ |
1553 | my $col=shift || 76;␊ |
1554 | my @lines=split(/\n/,"$text");␊ |
1555 | my $res="";␊ |
1556 | my $first=1;␊ |
1557 | while (defined(my $line=shift @lines)) {␊ |
1558 | if ($first && length($line) > $col - 10) {␊ |
1559 | unshift @lines,$line;␊ |
1560 | $first=0;␊ |
1561 | next;␊ |
1562 | }␊ |
1563 | if (length($line) > $col) {␊ |
1564 | my $pos=rindex($line," ",$col);␊ |
1565 | while (substr($line,$pos-1,1) eq '.' && $pos != -1) {␊ |
1566 | $pos=rindex($line," ",$pos-1);␊ |
1567 | }␊ |
1568 | if ($pos == -1) {␊ |
1569 | # There are no spaces in the first $col chars, pick-up the␊ |
1570 | # first space␊ |
1571 | $pos = index($line," ");␊ |
1572 | }␊ |
1573 | if ($pos != -1) {␊ |
1574 | my $end=substr($line,$pos+1);␊ |
1575 | $line=substr($line,0,$pos+1);␊ |
1576 | if ($end =~ s/^( +)//) {␊ |
1577 | $line .= $1;␊ |
1578 | }␊ |
1579 | unshift @lines,$end;␊ |
1580 | }␊ |
1581 | }␊ |
1582 | $first=0;␊ |
1583 | $res.="$line\n";␊ |
1584 | }␊ |
1585 | # Restore the original trailing spaces␊ |
1586 | $res =~ s/\s+$//s;␊ |
1587 | if ($text =~ m/(\s+)$/s) {␊ |
1588 | $res .= $1;␊ |
1589 | }␊ |
1590 | return $res;␊ |
1591 | }␊ |
1592 | ␊ |
1593 | # outputs properly a '# ... ' line to be put in the PO file␊ |
1594 | sub format_comment {␊ |
1595 | my $comment=shift;␊ |
1596 | my $char=shift;␊ |
1597 | my $result = "#". $char . $comment;␊ |
1598 | $result =~ s/\n/\n#$char/gs;␊ |
1599 | $result =~ s/^#$char$/#/gm;␊ |
1600 | $result .= "\n";␊ |
1601 | return $result;␊ |
1602 | }␊ |
1603 | ␊ |
1604 | ␊ |
1605 | 1;␊ |
1606 | __END__␊ |
1607 | ␊ |
1608 | =back␊ |
1609 | ␊ |
1610 | =head1 AUTHORS␊ |
1611 | ␊ |
1612 | Denis Barbier <barbier@linuxfr.org>␊ |
1613 | Martin Quinson (mquinson#debian.org)␊ |
1614 | ␊ |
1615 | =cut␊ |
1616 | |