Root/
Source at commit 2462 created 9 years 8 months ago. By ifabio, Split out states generator from acpi_patcher (Credits to Clover Teams) | |
---|---|
1 | #!/usr/bin/perl␊ |
2 | ␊ |
3 | # Po4a::Html.pm␊ |
4 | #␊ |
5 | # extract and translate translatable strings from a HTML document.␊ |
6 | #␊ |
7 | # This code extracts plain text between HTML tags and some "alt" or␊ |
8 | # "title" attributes.␊ |
9 | #␊ |
10 | # Copyright (c) 2003 by Laurent Hausermann <laurent@hausermann.org>␊ |
11 | #␊ |
12 | # This program is free software; you can redistribute it and/or modify␊ |
13 | # it under the terms of the GNU General Public License as published by␊ |
14 | # the Free Software Foundation; either version 2 of the License, or␊ |
15 | # (at your option) any later version.␊ |
16 | #␊ |
17 | # This program is distributed in the hope that it will be useful,␊ |
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of␊ |
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the␊ |
20 | # GNU General Public License for more details.␊ |
21 | #␊ |
22 | # You should have received a copy of the GNU General Public License␊ |
23 | # along with this program; if not, write to the Free Software␊ |
24 | # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.␊ |
25 | #␊ |
26 | ########################################################################␊ |
27 | ␊ |
28 | =encoding UTF-8␊ |
29 | ␊ |
30 | =head1 NAME␊ |
31 | ␊ |
32 | Locale::Po4a::Html - convert HTML documents from/to PO files␊ |
33 | ␊ |
34 | =head1 DESCRIPTION␊ |
35 | ␊ |
36 | The po4a (PO for anything) project goal is to ease translations (and more␊ |
37 | interestingly, the maintenance of translations) using gettext tools on␊ |
38 | areas where they were not expected like documentation.␊ |
39 | ␊ |
40 | Locale::Po4a::Html is a module to help the translation of documentation in␊ |
41 | the HTML format into other [human] languages.␊ |
42 | ␊ |
43 | Please note that this module is not distributed with the main po4a archive␊ |
44 | because we don't feel it mature enough for that. If you insist on using it␊ |
45 | anyway, check it from the CVS out.␊ |
46 | ␊ |
47 | =cut␊ |
48 | ␊ |
49 | package Locale::Po4a::Html;␊ |
50 | require Exporter;␊ |
51 | use vars qw(@ISA @EXPORT);␊ |
52 | @ISA = qw(Locale::Po4a::TransTractor);␊ |
53 | @EXPORT = qw(new initialize);␊ |
54 | ␊ |
55 | use Locale::Po4a::TransTractor;␊ |
56 | ␊ |
57 | use strict;␊ |
58 | use warnings;␊ |
59 | ␊ |
60 | use HTML::TokeParser;␊ |
61 | ␊ |
62 | use File::Temp;␊ |
63 | ␊ |
64 | sub initialize {}␊ |
65 | ␊ |
66 | sub read {␊ |
67 | my ($self,$filename)=@_;␊ |
68 | push @{$self->{DOCPOD}{infile}}, $filename;␊ |
69 | $self->Locale::Po4a::TransTractor::read($filename);␊ |
70 | }␊ |
71 | ␊ |
72 | sub parse {␊ |
73 | my $self=shift;␊ |
74 | map {$self->parse_file($_)} @{$self->{DOCPOD}{infile}};␊ |
75 | }␊ |
76 | ␊ |
77 | #␊ |
78 | # Parse file and translate it␊ |
79 | #␊ |
80 | sub parse_file {␊ |
81 | my ($self,$filename)=@_;␊ |
82 | my $stream = HTML::TokeParser->new($filename)␊ |
83 | || die "Couldn't read HTML file $filename : $!";␊ |
84 | ␊ |
85 | $stream->unbroken_text( [1] );␊ |
86 | ␊ |
87 | my @type=();␊ |
88 | NEXT : while (my $token = $stream->get_token) {␊ |
89 | if($token->[0] eq 'T') {␊ |
90 | my $text = $token->[1];␊ |
91 | my ($pre_spaces) = ($text =~ /^(\s*)/);␊ |
92 | my ($post_spaces) = ($text =~ /(\s*)$/);␊ |
93 | $text = trim($text);␊ |
94 | if (notranslation($text) == 1) {␊ |
95 | $self->pushline( get_tag( $token ) );␊ |
96 | next NEXT;␊ |
97 | }␊ |
98 | # FIXME : it should be useful to encode characters␊ |
99 | # in UTF8 in the po, but converting them in HTML::Entities␊ |
100 | # in the doc_out, translate acts both way␊ |
101 | # so we cant do that.␊ |
102 | # use HTML::Entities ();␊ |
103 | # $encoded = HTML::Entities::encode($a);␊ |
104 | # $decoded = HTML::Entities::decode($a);␊ |
105 | #print STDERR $token->[0];␊ |
106 | $self->pushline( $pre_spaces . $self->translate($text,␊ |
107 | "FIXME:0",␊ |
108 | (scalar @type ? $type[scalar @type-1]: "NOTYPE")␊ |
109 | ) . $post_spaces,␊ |
110 | 'wrap' => 1␊ |
111 | );␊ |
112 | next NEXT;␊ |
113 | } elsif ($token->[0] eq 'S') {␊ |
114 | push @type,$token->[1];␊ |
115 | my $text = get_tag( $token );␊ |
116 | my $tag = $token->[1];␊ |
117 | # TODO: It would be nice to support an option to specify these␊ |
118 | # (e.g. a list of tag.attribute)␊ |
119 | my @trans_attr = (( $tag eq 'img' ) || ( $tag eq 'input' ) ||␊ |
120 | ( $tag eq 'area' ) || ( $tag eq 'applet'))␊ |
121 | ? qw/title alt/ : qw/title/;␊ |
122 | my %attr = %{$token->[2]};␊ |
123 | for my $a (@trans_attr) {␊ |
124 | my $content = $attr{$a};␊ |
125 | if (defined $content) {␊ |
126 | $content = trim($content);␊ |
127 | my $translated = $self->translate(␊ |
128 | $content,␊ |
129 | "FIXME:0",␊ |
130 | "${tag}_$a"␊ |
131 | );␊ |
132 | $attr{$a} = $translated;␊ |
133 | }␊ |
134 | }␊ |
135 | my ($closing) = ( $text =~ /(\s*\/?>)/ );␊ |
136 | # reconstruct the tag from scratch␊ |
137 | delete $attr{'/'}; # Parser thinks closing / in XHTML is an attribute␊ |
138 | $text = "<$tag";␊ |
139 | $text .= " $_=\"$attr{$_}\"" foreach keys %attr;␊ |
140 | $text .= $closing;␊ |
141 | $self->pushline( $text );␊ |
142 | } elsif ($token->[0] eq 'E') {␊ |
143 | pop @type;␊ |
144 | $self->pushline( get_tag( $token ) );␊ |
145 | } else {␊ |
146 | $self->pushline( get_tag( $token ) );␊ |
147 | }␊ |
148 | }␊ |
149 | }␊ |
150 | ␊ |
151 | sub get_tag {␊ |
152 | my $token = shift;␊ |
153 | my $tag = "";␊ |
154 | ␊ |
155 | if ($token->[0] eq 'S') {␊ |
156 | $tag = $token->[4];␊ |
157 | }␊ |
158 | if ( ($token->[0] eq 'C') ||␊ |
159 | ($token->[0] eq 'D') ||␊ |
160 | ($token->[0] eq 'T') ) {␊ |
161 | $tag = $token->[1];␊ |
162 | }␊ |
163 | if ( ($token->[0] eq 'E') ||␊ |
164 | ($token->[0] eq 'PI') ) {␊ |
165 | $tag = $token->[2];␊ |
166 | }␊ |
167 | ␊ |
168 | return $tag;␊ |
169 | }␊ |
170 | ␊ |
171 | sub trim {␊ |
172 | my $s=shift;␊ |
173 | $s =~ s/\n/ /g; # remove \n in text␊ |
174 | $s =~ s/\r/ /g; # remove \r in text␊ |
175 | $s =~ s/\t/ /g; # remove tabulations␊ |
176 | $s =~ s/\s+/ /g; # remove multiple spaces␊ |
177 | $s =~ s/^\s*//g; # remove leading spaces␊ |
178 | $s =~ s/\s*$//g; # remove trailing spaces␊ |
179 | return $s;␊ |
180 | }␊ |
181 | ␊ |
182 | #␊ |
183 | # This method says if a string must be␊ |
184 | # translated or not.␊ |
185 | # To be improved with more test or regexp␊ |
186 | # Maybe there is a way to do this in TransTractor␊ |
187 | # for example ::^ should not be translated␊ |
188 | sub notranslation {␊ |
189 | my $s=shift;␊ |
190 | return 1 if ( ($s cmp "") == 0);␊ |
191 | return 1 if ( ($s cmp "-") == 0);␊ |
192 | return 1 if ( ($s cmp "::") == 0);␊ |
193 | return 1 if ( ($s cmp ":") == 0);␊ |
194 | return 1 if ( ($s cmp ".") == 0);␊ |
195 | return 1 if ( ($s cmp "|") == 0);␊ |
196 | return 1 if ( ($s cmp '"') == 0);␊ |
197 | return 1 if ( ($s cmp "'") == 0);␊ |
198 | # don't translate entries composed of one entity␊ |
199 | return 1 if ($s =~ /^&[^;]*;$/);␊ |
200 | ␊ |
201 | # don't translate entries with no letters␊ |
202 | # (happens with e.g. <b>Hello</b>, <i>world</i> )␊ |
203 | # ^^␊ |
204 | # ", " doesn't need translation␊ |
205 | return 1 unless $s =~ /\w/;␊ |
206 | return 0;␊ |
207 | }␊ |
208 | ␊ |
209 | =head1 AUTHORS␊ |
210 | ␊ |
211 | Laurent Hausermann <laurent@hausermann.org>␊ |
212 | ␊ |
213 | =head1 COPYRIGHT AND LICENSE␊ |
214 | ␊ |
215 | Laurent Hausermann <laurent@hausermann.org>␊ |
216 | ␊ |
217 | This program is free software; you may redistribute it and/or modify it␊ |
218 | under the terms of GPL (see the COPYING file).␊ |
219 |