1 | #!/usr/bin/perl␊ |
2 | ␊ |
3 | # Po4a::Xhtml.pm␊ |
4 | #␊ |
5 | # extract and translate translatable strings from XHTML documents.␊ |
6 | #␊ |
7 | # This code extracts plain text from tags and attributes from strict XHTML␊ |
8 | # documents.␊ |
9 | #␊ |
10 | # Copyright (c) 2005 by Yves Rütschlé <po4a@rutschle.net>␊ |
11 | # Copyright (c) 2007-2008 by Nicolas François <nicolas.francois@centraliens.net>␊ |
12 | #␊ |
13 | # This program is free software; you can redistribute it and/or modify␊ |
14 | # it under the terms of the GNU General Public License as published by␊ |
15 | # the Free Software Foundation; either version 2 of the License, or␊ |
16 | # (at your option) any later version.␊ |
17 | #␊ |
18 | # This program is distributed in the hope that it will be useful,␊ |
19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of␊ |
20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the␊ |
21 | # GNU General Public License for more details.␊ |
22 | #␊ |
23 | # You should have received a copy of the GNU General Public License␊ |
24 | # along with this program; if not, write to the Free Software␊ |
25 | # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.␊ |
26 | #␊ |
27 | ########################################################################␊ |
28 | ␊ |
29 | =encoding UTF-8␊ |
30 | ␊ |
31 | =head1 NAME␊ |
32 | ␊ |
33 | Locale::Po4a::Xhtml - convert XHTML documents from/to PO files␊ |
34 | ␊ |
35 | =head1 DESCRIPTION␊ |
36 | ␊ |
37 | The po4a (PO for anything) project goal is to ease translations (and more␊ |
38 | interestingly, the maintenance of translations) using gettext tools on␊ |
39 | areas where they were not expected like documentation.␊ |
40 | ␊ |
41 | Locale::Po4a::Xhtml is a module to help the translation of XHTML documents into␊ |
42 | other [human] languages.␊ |
43 | ␊ |
44 | =head1 OPTIONS ACCEPTED BY THIS MODULE␊ |
45 | ␊ |
46 | These are this module's particular options:␊ |
47 | ␊ |
48 | =over 4␊ |
49 | ␊ |
50 | =item B<includessi>[B<=>I<rootpath>]␊ |
51 | ␊ |
52 | Include files specified by an include SSI (Server Side Includes) element␊ |
53 | (e.g. <!--#include virtual="/foo/bar.html" -->).␊ |
54 | ␊ |
55 | B<Note:> You should use it only for static files.␊ |
56 | ␊ |
57 | An additional I<rootpath> parameter can be specified. It specifies the root␊ |
58 | path to find files included by a B<virtual> attribute.␊ |
59 | ␊ |
60 | =back␊ |
61 | ␊ |
62 | =head1 STATUS OF THIS MODULE␊ |
63 | ␊ |
64 | This module is fully functional, as it relies in the L<Locale::Po4a::Xml>␊ |
65 | module. This only defines the translatable tags and attributes.␊ |
66 | ␊ |
67 | "It works for me", which means I use it successfully on my personal Web site.␊ |
68 | However, YMMV: please let me know if something doesn't work for you. In␊ |
69 | particular, tables are getting no testing whatsoever, as we don't use them.␊ |
70 | ␊ |
71 | =head1 SEE ALSO␊ |
72 | ␊ |
73 | L<Locale::Po4a::TransTractor(3pm)>, L<Locale::Po4a::Xml(3pm)>, L<po4a(7)|po4a.7>␊ |
74 | ␊ |
75 | =head1 AUTHORS␊ |
76 | ␊ |
77 | Yves Rütschlé <po4a@rutschle.net>␊ |
78 | Nicolas François <nicolas.francois@centraliens.net>␊ |
79 | ␊ |
80 | =head1 COPYRIGHT AND LICENSE␊ |
81 | ␊ |
82 | Copyright (c) 2004 by Yves Rütschlé <po4a@rutschle.net>␊ |
83 | Copyright (c) 2007-2008 by Nicolas François <nicolas.francois@centraliens.net>␊ |
84 | ␊ |
85 | This program is free software; you may redistribute it and/or modify it␊ |
86 | under the terms of GPL (see the COPYING file).␊ |
87 | ␊ |
88 | =cut␊ |
89 | ␊ |
90 | package Locale::Po4a::Xhtml;␊ |
91 | ␊ |
92 | use 5.006;␊ |
93 | use strict;␊ |
94 | use warnings;␊ |
95 | ␊ |
96 | use Locale::Po4a::Xml;␊ |
97 | use vars qw(@tag_types);␊ |
98 | *tag_types = \@Locale::Po4a::Xml::tag_types;␊ |
99 | ␊ |
100 | use Locale::Po4a::Common;␊ |
101 | use Carp qw(croak);␊ |
102 | ␊ |
103 | use vars qw(@ISA);␊ |
104 | @ISA = qw(Locale::Po4a::Xml);␊ |
105 | ␊ |
106 | sub tag_extract_SSI {␊ |
107 | my ($self,$remove)=(shift,shift);␊ |
108 | my ($eof,@tag)=$self->get_string_until("-->",␊ |
109 | {include=>1,␊ |
110 | remove=>$remove,␊ |
111 | unquoted=>1});␊ |
112 | my ($t,$r) = @tag;␊ |
113 | if ($t =~ m/<!--#include (file|virtual)="(.*?)"\s-->/s) {␊ |
114 | my $includefile;␊ |
115 | if ($1 eq "file") {␊ |
116 | $includefile = ".";␊ |
117 | } else {␊ |
118 | $includefile = $self->{options}{'includessi'};␊ |
119 | }␊ |
120 | $includefile .= $2;␊ |
121 | if (!$remove) {␊ |
122 | $self->get_string_until("-->",␊ |
123 | {include=>1,␊ |
124 | remove=>1,␊ |
125 | unquoted=>1});␊ |
126 | }␊ |
127 | my $linenum=0;␊ |
128 | my @include;␊ |
129 | ␊ |
130 | open (my $in, $includefile)␊ |
131 | or croak wrap_mod("po4a::xml",␊ |
132 | dgettext("po4a", "Can't read from %s: %s"),␊ |
133 | $includefile, $!);␊ |
134 | while (defined (my $includeline = <$in>)) {␊ |
135 | $linenum++;␊ |
136 | my $includeref=$includefile.":$linenum";␊ |
137 | push @include, ($includeline,$includeref);␊ |
138 | }␊ |
139 | close $in␊ |
140 | or croak wrap_mod("po4a::xml",␊ |
141 | dgettext("po4a", "Can't close %s after reading: %s"),␊ |
142 | $includefile, $!);␊ |
143 | ␊ |
144 | while (@include) {␊ |
145 | my ($ir, $il) = (pop @include, pop @include);␊ |
146 | $self->unshiftline($il,$ir);␊ |
147 | }␊ |
148 | $t =~ s/<!--#include/<!-- SSI included by po4a: /;␊ |
149 | $self->unshiftline($t, $r);␊ |
150 | }␊ |
151 | return ($eof,@tag);␊ |
152 | }␊ |
153 | ␊ |
154 | sub initialize {␊ |
155 | my $self = shift;␊ |
156 | my %options = @_;␊ |
157 | ␊ |
158 | $self->{options}{'includessi'}='';␊ |
159 | ␊ |
160 | $self->SUPER::initialize(%options);␊ |
161 | ␊ |
162 | $self->{options}{'wrap'}=1;␊ |
163 | $self->{options}{'doctype'}=$self->{options}{'doctype'} || 'html';␊ |
164 | ␊ |
165 | # Default tags are translated (text rewrapped), and introduce a␊ |
166 | # break.␊ |
167 | # The following list indicates the list of tags which should be␊ |
168 | # translated without rewrapping.␊ |
169 | $self->{options}{'_default_translated'}.='␊ |
170 | W<pre>␊ |
171 | ';␊ |
172 | ␊ |
173 | # The following list indicates the list of tags which should be␊ |
174 | # translated inside the current block, whithout introducing a␊ |
175 | # break.␊ |
176 | $self->{options}{'_default_inline'}.='␊ |
177 | <a>␊ |
178 | <abbr>␊ |
179 | <acronym>␊ |
180 | <b>␊ |
181 | <big>␊ |
182 | <bdo>␊ |
183 | <button>␊ |
184 | <cite>␊ |
185 | <code>␊ |
186 | <del>␊ |
187 | <dfn>␊ |
188 | <em>␊ |
189 | <i>␊ |
190 | <ins>␊ |
191 | <input>␊ |
192 | <kbd>␊ |
193 | <label>␊ |
194 | <object>␊ |
195 | <q>␊ |
196 | <samp>␊ |
197 | <select>␊ |
198 | <small>␊ |
199 | <span>␊ |
200 | <strong>␊ |
201 | <sub>␊ |
202 | <sup>␊ |
203 | <textarea>␊ |
204 | <tt>␊ |
205 | <u>␊ |
206 | <var>␊ |
207 | ';␊ |
208 | ␊ |
209 | # Ignored tags: <img>␊ |
210 | # Technically, <img> is an inline tag, but setting it as such is␊ |
211 | # annoying, and not usually useful, unless you use images to␊ |
212 | # write text (in which case you have bigger problems than this␊ |
213 | # program not inlining img: you now have to translate all your␊ |
214 | # images. That'll teach you).␊ |
215 | # If you choose to translate images, you may also want to set␊ |
216 | # <map> as placeholder and <area> as inline.␊ |
217 | ␊ |
218 | $self->{options}{'_default_attributes'}.='␊ |
219 | alt␊ |
220 | lang␊ |
221 | title␊ |
222 | ';␊ |
223 | $self->treat_options;␊ |
224 | ␊ |
225 | if ( defined $self->{options}{'includessi'}␊ |
226 | and length $self->{options}{'includessi'}) {␊ |
227 | foreach (@tag_types) {␊ |
228 | if ($_->{beginning} eq "!--#") {␊ |
229 | $_->{f_extract} = \&tag_extract_SSI;␊ |
230 | }␊ |
231 | }␊ |
232 | # FIXME: the directory may be named "1" ;(␊ |
233 | if ($self->{options}{'includessi'} eq "1") {␊ |
234 | $self->{options}{'includessi'} = ".";␊ |
235 | }␊ |
236 | }␊ |
237 | }␊ |
238 | |