tags/2.1/package/bin/po4a/lib/Locale/Po4a/Html.pm - Chameleon Svn Source Tree - Chameleon open source boot loader project.

Root/tags/2.1/package/bin/po4a/lib/Locale/Po4a/Html.pm

Source at commit 2462 created 9 years 8 months ago. By ifabio, Split out states generator from acpi_patcher (Credits to Clover Teams)
1	#!/usr/bin/perl␊
2	␊
3	# Po4a::Html.pm␊
4	#␊
5	# extract and translate translatable strings from a HTML document.␊
6	#␊
7	# This code extracts plain text between HTML tags and some "alt" or␊
8	# "title" attributes.␊
9	#␊
10	# Copyright (c) 2003 by Laurent Hausermann <laurent@hausermann.org>␊
11	#␊
12	# This program is free software; you can redistribute it and/or modify␊
13	# it under the terms of the GNU General Public License as published by␊
14	# the Free Software Foundation; either version 2 of the License, or␊
15	# (at your option) any later version.␊
16	#␊
17	# This program is distributed in the hope that it will be useful,␊
18	# but WITHOUT ANY WARRANTY; without even the implied warranty of␊
19	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the␊
20	# GNU General Public License for more details.␊
21	#␊
22	# You should have received a copy of the GNU General Public License␊
23	# along with this program; if not, write to the Free Software␊
24	# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.␊
25	#␊
26	########################################################################␊
27	␊
28	=encoding UTF-8␊
29	␊
30	=head1 NAME␊
31	␊
32	Locale::Po4a::Html - convert HTML documents from/to PO files␊
33	␊
34	=head1 DESCRIPTION␊
35	␊
36	The po4a (PO for anything) project goal is to ease translations (and more␊
37	interestingly, the maintenance of translations) using gettext tools on␊
38	areas where they were not expected like documentation.␊
39	␊
40	Locale::Po4a::Html is a module to help the translation of documentation in␊
41	the HTML format into other [human] languages.␊
42	␊
43	Please note that this module is not distributed with the main po4a archive␊
44	because we don't feel it mature enough for that. If you insist on using it␊
45	anyway, check it from the CVS out.␊
46	␊
47	=cut␊
48	␊
49	package Locale::Po4a::Html;␊
50	require Exporter;␊
51	use vars qw(@ISA @EXPORT);␊
52	@ISA = qw(Locale::Po4a::TransTractor);␊
53	@EXPORT = qw(new initialize);␊
54	␊
55	use Locale::Po4a::TransTractor;␊
56	␊
57	use strict;␊
58	use warnings;␊
59	␊
60	use HTML::TokeParser;␊
61	␊
62	use File::Temp;␊
63	␊
64	sub initialize {}␊
65	␊
66	sub read {␊
67	my ($self,$filename)=@_;␊
68	push @{$self->{DOCPOD}{infile}}, $filename;␊
69	$self->Locale::Po4a::TransTractor::read($filename);␊
70	}␊
71	␊
72	sub parse {␊
73	my $self=shift;␊
74	map {$self->parse_file($_)} @{$self->{DOCPOD}{infile}};␊
75	}␊
76	␊
77	#␊
78	# Parse file and translate it␊
79	#␊
80	sub parse_file {␊
81	my ($self,$filename)=@_;␊
82	my $stream = HTML::TokeParser->new($filename)␊
83	\|\| die "Couldn't read HTML file $filename : $!";␊
84	␊
85	$stream->unbroken_text( [1] );␊
86	␊
87	my @type=();␊
88	NEXT : while (my $token = $stream->get_token) {␊
89	if($token->[0] eq 'T') {␊
90	my $text = $token->[1];␊
91	my ($pre_spaces) = ($text =~ /^(\s*)/);␊
92	my ($post_spaces) = ($text =~ /(\s*)$/);␊
93	$text = trim($text);␊
94	if (notranslation($text) == 1) {␊
95	$self->pushline( get_tag( $token ) );␊
96	next NEXT;␊
97	}␊
98	# FIXME : it should be useful to encode characters␊
99	# in UTF8 in the po, but converting them in HTML::Entities␊
100	# in the doc_out, translate acts both way␊
101	# so we cant do that.␊
102	# use HTML::Entities ();␊
103	# $encoded = HTML::Entities::encode($a);␊
104	# $decoded = HTML::Entities::decode($a);␊
105	#print STDERR $token->[0];␊
106	$self->pushline( $pre_spaces . $self->translate($text,␊
107	"FIXME:0",␊
108	(scalar @type ? $type[scalar @type-1]: "NOTYPE")␊
109	) . $post_spaces,␊
110	'wrap' => 1␊
111	);␊
112	next NEXT;␊
113	} elsif ($token->[0] eq 'S') {␊
114	push @type,$token->[1];␊
115	my $text = get_tag( $token );␊
116	my $tag = $token->[1];␊
117	# TODO: It would be nice to support an option to specify these␊
118	# (e.g. a list of tag.attribute)␊
119	my @trans_attr = (( $tag eq 'img' ) \|\| ( $tag eq 'input' ) \|\|␊
120	( $tag eq 'area' ) \|\| ( $tag eq 'applet'))␊
121	? qw/title alt/ : qw/title/;␊
122	my %attr = %{$token->[2]};␊
123	for my $a (@trans_attr) {␊
124	my $content = $attr{$a};␊
125	if (defined $content) {␊
126	$content = trim($content);␊
127	my $translated = $self->translate(␊
128	$content,␊
129	"FIXME:0",␊
130	"${tag}_$a"␊
131	);␊
132	$attr{$a} = $translated;␊
133	}␊
134	}␊
135	my ($closing) = ( $text =~ /(\s*\/?>)/ );␊
136	# reconstruct the tag from scratch␊
137	delete $attr{'/'}; # Parser thinks closing / in XHTML is an attribute␊
138	$text = "<$tag";␊
139	$text .= " $_=\"$attr{$_}\"" foreach keys %attr;␊
140	$text .= $closing;␊
141	$self->pushline( $text );␊
142	} elsif ($token->[0] eq 'E') {␊
143	pop @type;␊
144	$self->pushline( get_tag( $token ) );␊
145	} else {␊
146	$self->pushline( get_tag( $token ) );␊
147	}␊
148	}␊
149	}␊
150	␊
151	sub get_tag {␊
152	my $token = shift;␊
153	my $tag = "";␊
154	␊
155	if ($token->[0] eq 'S') {␊
156	$tag = $token->[4];␊
157	}␊
158	if ( ($token->[0] eq 'C') \|\|␊
159	($token->[0] eq 'D') \|\|␊
160	($token->[0] eq 'T') ) {␊
161	$tag = $token->[1];␊
162	}␊
163	if ( ($token->[0] eq 'E') \|\|␊
164	($token->[0] eq 'PI') ) {␊
165	$tag = $token->[2];␊
166	}␊
167	␊
168	return $tag;␊
169	}␊
170	␊
171	sub trim {␊
172	my $s=shift;␊
173	$s =~ s/\n/ /g; # remove \n in text␊
174	$s =~ s/\r/ /g; # remove \r in text␊
175	$s =~ s/\t/ /g; # remove tabulations␊
176	$s =~ s/\s+/ /g; # remove multiple spaces␊
177	$s =~ s/^\s*//g; # remove leading spaces␊
178	$s =~ s/\s*$//g; # remove trailing spaces␊
179	return $s;␊
180	}␊
181	␊
182	#␊
183	# This method says if a string must be␊
184	# translated or not.␊
185	# To be improved with more test or regexp␊
186	# Maybe there is a way to do this in TransTractor␊
187	# for example ::^ should not be translated␊
188	sub notranslation {␊
189	my $s=shift;␊
190	return 1 if ( ($s cmp "") == 0);␊
191	return 1 if ( ($s cmp "-") == 0);␊
192	return 1 if ( ($s cmp "::") == 0);␊
193	return 1 if ( ($s cmp ":") == 0);␊
194	return 1 if ( ($s cmp ".") == 0);␊
195	return 1 if ( ($s cmp "\|") == 0);␊
196	return 1 if ( ($s cmp '"') == 0);␊
197	return 1 if ( ($s cmp "'") == 0);␊
198	# don't translate entries composed of one entity␊
199	return 1 if ($s =~ /^&[^;]*;$/);␊
200	␊
201	# don't translate entries with no letters␊
202	# (happens with e.g. <b>Hello</b>, <i>world</i> )␊
203	# ^^␊
204	# ", " doesn't need translation␊
205	return 1 unless $s =~ /\w/;␊
206	return 0;␊
207	}␊
208	␊
209	=head1 AUTHORS␊
210	␊
211	Laurent Hausermann <laurent@hausermann.org>␊
212	␊
213	=head1 COPYRIGHT AND LICENSE␊
214	␊
215	Laurent Hausermann <laurent@hausermann.org>␊
216	␊
217	This program is free software; you may redistribute it and/or modify it␊
218	under the terms of GPL (see the COPYING file).␊
219

Download this file