Root/
Source at commit 1322 created 12 years 9 months ago. By meklort, Add doxygen to utils folder | |
---|---|
1 | /****************************************************************************␊ |
2 | ** ␊ |
3 | **␊ |
4 | ** Implementation of QTextCodec class␊ |
5 | **␊ |
6 | ** Created : 981015␊ |
7 | **␊ |
8 | ** Copyright (C)1998-2000 Trolltech AS. All rights reserved.␊ |
9 | **␊ |
10 | ** This file is part of the tools module of the Qt GUI Toolkit.␊ |
11 | **␊ |
12 | ** This file may be distributed under the terms of the Q Public License␊ |
13 | ** as defined by Trolltech AS of Norway and appearing in the file␊ |
14 | ** LICENSE.QPL included in the packaging of this file.␊ |
15 | **␊ |
16 | ** This file may be distributed and/or modified under the terms of the␊ |
17 | ** GNU General Public License version 2 as published by the Free Software␊ |
18 | ** Foundation and appearing in the file LICENSE.GPL included in the␊ |
19 | ** packaging of this file.␊ |
20 | **␊ |
21 | ** Licensees holding valid Qt Enterprise Edition or Qt Professional Edition␊ |
22 | ** licenses may use this file in accordance with the Qt Commercial License␊ |
23 | ** Agreement provided with the Software.␊ |
24 | **␊ |
25 | ** This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE␊ |
26 | ** WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.␊ |
27 | **␊ |
28 | ** See http://www.trolltech.com/pricing.html or email sales@trolltech.com for␊ |
29 | ** information about Qt Commercial License Agreements.␊ |
30 | ** See http://www.trolltech.com/qpl/ for QPL licensing information.␊ |
31 | ** See http://www.trolltech.com/gpl/ for GPL licensing information.␊ |
32 | **␊ |
33 | ** Contact info@trolltech.com if any conditions of this licensing are␊ |
34 | ** not clear to you.␊ |
35 | **␊ |
36 | **********************************************************************/␊ |
37 | ␊ |
38 | #include "qtextcodec.h"␊ |
39 | #ifndef QT_NO_TEXTCODEC␊ |
40 | ␊ |
41 | #include "qlist.h"␊ |
42 | #ifndef QT_NO_CODECS␊ |
43 | #include "qutfcodec.h"␊ |
44 | //#include "qgbkcodec.h"␊ |
45 | //#include "qeucjpcodec.h"␊ |
46 | //#include "qjiscodec.h"␊ |
47 | //#include "qsjiscodec.h"␊ |
48 | //#include "qeuckrcodec.h"␊ |
49 | //#include "qbig5codec.h"␊ |
50 | //#include "qrtlcodec.h"␊ |
51 | //#include "qtsciicodec.h"␊ |
52 | #endif␊ |
53 | ␊ |
54 | #include "qfile.h"␊ |
55 | #include "qstrlist.h"␊ |
56 | #include "qstring.h"␊ |
57 | ␊ |
58 | #include <stdlib.h>␊ |
59 | #include <ctype.h>␊ |
60 | #include <locale.h>␊ |
61 | ␊ |
62 | ␊ |
63 | static QList<QTextCodec> * all = 0;␊ |
64 | static bool destroying_is_ok; // starts out as 0␊ |
65 | ␊ |
66 | /*! Deletes all the created codecs.␊ |
67 | ␊ |
68 | \warning Do not call this function.␊ |
69 | ␊ |
70 | QApplication calls this just before exiting, to delete any␊ |
71 | QTextCodec objects that may be lying around. Since various other␊ |
72 | classes hold pointers to QTextCodec objects, it is not safe to call␊ |
73 | this function earlier.␊ |
74 | ␊ |
75 | If you are using the utility classes (like QString) but not using␊ |
76 | QApplication, calling this function at the very end of your␊ |
77 | application can be helpful to chasing down memory leaks, as␊ |
78 | QTextCodec objects will not show up.␊ |
79 | */␊ |
80 | ␊ |
81 | void QTextCodec::deleteAllCodecs()␊ |
82 | {␊ |
83 | if ( !all )␊ |
84 | return;␊ |
85 | ␊ |
86 | destroying_is_ok = TRUE;␊ |
87 | QList<QTextCodec> * ball = all;␊ |
88 | all = 0;␊ |
89 | ball->clear();␊ |
90 | delete ball;␊ |
91 | destroying_is_ok = FALSE;␊ |
92 | }␊ |
93 | ␊ |
94 | ␊ |
95 | static void setupBuiltinCodecs();␊ |
96 | ␊ |
97 | ␊ |
98 | static void realSetup()␊ |
99 | {␊ |
100 | #if defined(CHECK_STATE)␊ |
101 | if ( destroying_is_ok )␊ |
102 | qWarning( "creating new codec during codec cleanup" );␊ |
103 | #endif␊ |
104 | all = new QList<QTextCodec>;␊ |
105 | all->setAutoDelete( TRUE );␊ |
106 | setupBuiltinCodecs();␊ |
107 | }␊ |
108 | ␊ |
109 | ␊ |
110 | static inline void setup()␊ |
111 | {␊ |
112 | if ( !all )␊ |
113 | realSetup();␊ |
114 | }␊ |
115 | ␊ |
116 | ␊ |
117 | class QTextStatelessEncoder: public QTextEncoder {␊ |
118 | const QTextCodec* codec;␊ |
119 | public:␊ |
120 | QTextStatelessEncoder(const QTextCodec*);␊ |
121 | QCString fromUnicode(const QString& uc, int& lenInOut);␊ |
122 | };␊ |
123 | ␊ |
124 | ␊ |
125 | class QTextStatelessDecoder : public QTextDecoder {␊ |
126 | const QTextCodec* codec;␊ |
127 | public:␊ |
128 | QTextStatelessDecoder(const QTextCodec*);␊ |
129 | QString toUnicode(const char* chars, int len);␊ |
130 | };␊ |
131 | ␊ |
132 | QTextStatelessEncoder::QTextStatelessEncoder(const QTextCodec* c) :␊ |
133 | codec(c)␊ |
134 | {␊ |
135 | }␊ |
136 | ␊ |
137 | ␊ |
138 | QCString QTextStatelessEncoder::fromUnicode(const QString& uc, int& lenInOut)␊ |
139 | {␊ |
140 | return codec->fromUnicode(uc,lenInOut);␊ |
141 | }␊ |
142 | ␊ |
143 | ␊ |
144 | QTextStatelessDecoder::QTextStatelessDecoder(const QTextCodec* c) :␊ |
145 | codec(c)␊ |
146 | {␊ |
147 | }␊ |
148 | ␊ |
149 | ␊ |
150 | QString QTextStatelessDecoder::toUnicode(const char* chars, int len)␊ |
151 | {␊ |
152 | return codec->toUnicode(chars,len);␊ |
153 | }␊ |
154 | ␊ |
155 | ␊ |
156 | ␊ |
157 | // NOT REVISED␊ |
158 | /*!␊ |
159 | \class QTextCodec qtextcodec.h␊ |
160 | \brief Provides conversion between text encodings.␊ |
161 | ␊ |
162 | By making objects of subclasses of QTextCodec, support for␊ |
163 | new text encodings can be added to Qt.␊ |
164 | ␊ |
165 | The abstract virtual functions describe the encoder to the␊ |
166 | system and the coder is used as required in the different␊ |
167 | text file formats supported QTextStream and, under X11 for the␊ |
168 | locale-specific character input and output (under Windows NT␊ |
169 | codecs are not needed for GUI I/O since the system works␊ |
170 | with Unicode already, and Windows 95/98 has built-in convertors␊ |
171 | for the 8-bit local encoding).␊ |
172 | ␊ |
173 | More recently created QTextCodec objects take precedence␊ |
174 | over earlier ones.␊ |
175 | ␊ |
176 | To add support for another 8-bit encoding to Qt, make a subclass␊ |
177 | or QTextCodec and implement at least the following methods:␊ |
178 | <dl>␊ |
179 | <dt>\c const char* name() const␊ |
180 | <dd>Return the official name for the encoding.␊ |
181 | <dt>\c int mibEnum() const␊ |
182 | <dd>Return the MIB enum for the encoding if it is listed in the␊ |
183 | <a href=ftp://ftp.isi.edu/in-notes/iana/assignments/character-sets>␊ |
184 | IANA character-sets encoding file</a>.␊ |
185 | </dl>␊ |
186 | If the encoding is multi-byte then it will have "state"; that is,␊ |
187 | the interpretation of some bytes will be dependent on some preceding␊ |
188 | bytes. For such an encoding, you will need to implement␊ |
189 | <dl>␊ |
190 | <dt> \c QTextDecoder* makeDecoder() const␊ |
191 | <dd>Return a QTextDecoder that remembers incomplete multibyte␊ |
192 | sequence prefixes or other required state.␊ |
193 | </dl>␊ |
194 | If the encoding does \e not require state, you should implement:␊ |
195 | <dl>␊ |
196 | <dt> \c QString toUnicode(const char* chars, int len) const␊ |
197 | <dd>Converts \e len characters from \e chars to Unicode.␊ |
198 | </dl>␊ |
199 | The base QTextCodec class has default implementations of the above␊ |
200 | two functions, <i>but they are mutually recursive</i>, so you must␊ |
201 | re-implement at least one of them, or both for improved efficiency.␊ |
202 | ␊ |
203 | For conversion from Unicode to 8-bit encodings, it is rarely necessary␊ |
204 | to maintain state. However, two functions similar to the two above␊ |
205 | are used for encoding:␊ |
206 | <dl>␊ |
207 | <dt> \c QTextEncoder* makeEncoder() const␊ |
208 | <dd>Return a QTextDecoder.␊ |
209 | <dt> \c QCString fromUnicode(const QString& uc, int& lenInOut ) const;␊ |
210 | <dd>Converts \e lenInOut characters (of type QChar) from the start␊ |
211 | of the string \a uc, returning a QCString result, and also returning␊ |
212 | the \link QCString::length() length\endlink␊ |
213 | of the result in lenInOut.␊ |
214 | </dl>␊ |
215 | Again, these are mutually recursive so only one needs to be implemented,␊ |
216 | or both if better efficiency is possible.␊ |
217 | ␊ |
218 | Finally, you must implement:␊ |
219 | <dl>␊ |
220 | <dt> \c int heuristicContentMatch(const char* chars, int len) const␊ |
221 | <dd>Gives a value indicating how likely it is that \e len characters␊ |
222 | from \e chars are in the encoding.␊ |
223 | </dl>␊ |
224 | A good model for this function is the␊ |
225 | QWindowsLocalCodec::heuristicContentMatch function found in the Qt sources.␊ |
226 | ␊ |
227 | A QTextCodec subclass might have improved performance if you also␊ |
228 | re-implement:␊ |
229 | <dl>␊ |
230 | <dt> \c bool canEncode( QChar ) const␊ |
231 | <dd>Test if a Unicode character can be encoded.␊ |
232 | <dt> \c bool canEncode( const QString& ) const␊ |
233 | <dd>Test if a string of Unicode characters can be encoded.␊ |
234 | <dt> \c int heuristicNameMatch(const char* hint) const␊ |
235 | <dd>Test if a possibly non-standard name is referring to the codec.␊ |
236 | </dl>␊ |
237 | */␊ |
238 | ␊ |
239 | ␊ |
240 | /*!␊ |
241 | Constructs a QTextCodec, making it of highest precedence.␊ |
242 | The QTextCodec should always be constructed on the heap␊ |
243 | (with new), and once constructed it becomes the responsibility␊ |
244 | of Qt to delete it (which is done at QApplication destruction).␊ |
245 | */␊ |
246 | QTextCodec::QTextCodec()␊ |
247 | {␊ |
248 | setup();␊ |
249 | all->insert(0,this);␊ |
250 | }␊ |
251 | ␊ |
252 | ␊ |
253 | /*!␊ |
254 | Destructs the QTextCodec. Note that you should not delete␊ |
255 | codecs yourself - once created they become the responsibility␊ |
256 | of Qt to delete.␊ |
257 | */␊ |
258 | QTextCodec::~QTextCodec()␊ |
259 | {␊ |
260 | if ( !destroying_is_ok )␊ |
261 | qWarning("QTextCodec::~QTextCodec() called by application");␊ |
262 | if ( all )␊ |
263 | all->remove( this );␊ |
264 | }␊ |
265 | ␊ |
266 | ␊ |
267 | /*!␊ |
268 | Returns a value indicating how likely this decoder is␊ |
269 | for decoding some format that has the given name.␊ |
270 | ␊ |
271 | A good match returns a positive number around␊ |
272 | the length of the string. A bad match is negative.␊ |
273 | ␊ |
274 | The default implementation calls simpleHeuristicNameMatch()␊ |
275 | with the name of the codec.␊ |
276 | */␊ |
277 | int QTextCodec::heuristicNameMatch(const char* hint) const␊ |
278 | {␊ |
279 | return simpleHeuristicNameMatch(name(),hint);␊ |
280 | }␊ |
281 | ␊ |
282 | ␊ |
283 | // returns a string cotnaining the letters and numbers from input,␊ |
284 | // with a space separating run of a character class. e.g. "iso8859-1"␊ |
285 | // becomes "iso 8859 1"␊ |
286 | static QString lettersAndNumbers( const char * input )␊ |
287 | {␊ |
288 | QString result;␊ |
289 | QChar c;␊ |
290 | ␊ |
291 | while( input && *input ) {␊ |
292 | c = *input;␊ |
293 | if ( c.isLetter() || c.isNumber() )␊ |
294 | result += c.lower();␊ |
295 | if ( input[1] ) {␊ |
296 | // add space at character class transition, except␊ |
297 | // transition from upper-case to lower-case letter␊ |
298 | QChar n( input[1] );␊ |
299 | if ( c.isLetter() && n.isLetter() ) {␊ |
300 | if ( c == c.lower() && n == n.upper() )␊ |
301 | result += ' ';␊ |
302 | } else if ( c.category() != n.category() ) {␊ |
303 | result += ' ';␊ |
304 | }␊ |
305 | }␊ |
306 | input++;␊ |
307 | }␊ |
308 | return result.simplifyWhiteSpace();␊ |
309 | }␊ |
310 | ␊ |
311 | /*!␊ |
312 | A simple utility function for heuristicNameMatch() - it␊ |
313 | does some very minor character-skipping␊ |
314 | so that almost-exact matches score high.␊ |
315 | */␊ |
316 | int QTextCodec::simpleHeuristicNameMatch(const char* name, const char* hint)␊ |
317 | {␊ |
318 | // if they're the same, return a perfect score.␊ |
319 | if ( name && hint && qstrcmp( name, hint ) == 0 )␊ |
320 | return qstrlen( hint );␊ |
321 | ␊ |
322 | // if the letters and numbers are the same, we have an "almost"␊ |
323 | // perfect match.␊ |
324 | QString h( lettersAndNumbers( hint ) );␊ |
325 | QString n( lettersAndNumbers( name ) );␊ |
326 | if ( h == n )␊ |
327 | return qstrlen( hint )-1;␊ |
328 | ␊ |
329 | if ( h.stripWhiteSpace() == n.stripWhiteSpace() )␊ |
330 | return qstrlen( hint )-2;␊ |
331 | ␊ |
332 | // could do some more here, but I don't think it's worth it␊ |
333 | ␊ |
334 | return 0;␊ |
335 | }␊ |
336 | ␊ |
337 | ␊ |
338 | /*!␊ |
339 | Returns the QTextCodec \a i places from the more recently␊ |
340 | inserted, or NULL if there is no such QTextCodec. Thus,␊ |
341 | codecForIndex(0) returns the most recently created QTextCodec.␊ |
342 | */␊ |
343 | QTextCodec* QTextCodec::codecForIndex(int i)␊ |
344 | {␊ |
345 | setup();␊ |
346 | return (uint)i >= all->count() ? 0 : all->at(i);␊ |
347 | }␊ |
348 | ␊ |
349 | ␊ |
350 | /*!␊ |
351 | Returns the QTextCodec which matches the␊ |
352 | \link QTextCodec::mibEnum() MIBenum\endlink \a mib.␊ |
353 | */␊ |
354 | QTextCodec* QTextCodec::codecForMib(int mib)␊ |
355 | {␊ |
356 | setup();␊ |
357 | QListIterator<QTextCodec> i(*all);␊ |
358 | QTextCodec* result;␊ |
359 | for ( ; (result=i); ++i ) {␊ |
360 | if ( result->mibEnum()==mib )␊ |
361 | break;␊ |
362 | }␊ |
363 | return result;␊ |
364 | }␊ |
365 | ␊ |
366 | ␊ |
367 | ␊ |
368 | ␊ |
369 | ␊ |
370 | #ifdef _OS_WIN32_␊ |
371 | class QWindowsLocalCodec: public QTextCodec␊ |
372 | {␊ |
373 | public:␊ |
374 | QWindowsLocalCodec();␊ |
375 | ~QWindowsLocalCodec();␊ |
376 | ␊ |
377 | QString toUnicode(const char* chars, int len) const;␊ |
378 | QCString fromUnicode(const QString& uc, int& lenInOut ) const;␊ |
379 | ␊ |
380 | const char* name() const;␊ |
381 | int mibEnum() const;␊ |
382 | ␊ |
383 | int heuristicContentMatch(const char* chars, int len) const;␊ |
384 | };␊ |
385 | ␊ |
386 | QWindowsLocalCodec::QWindowsLocalCodec()␊ |
387 | {␊ |
388 | }␊ |
389 | ␊ |
390 | QWindowsLocalCodec::~QWindowsLocalCodec()␊ |
391 | {␊ |
392 | }␊ |
393 | ␊ |
394 | ␊ |
395 | QString QWindowsLocalCodec::toUnicode(const char* chars, int len) const␊ |
396 | {␊ |
397 | if ( len == 1 && chars ) { // Optimization; avoids allocation␊ |
398 | char c[2];␊ |
399 | c[0] = *chars;␊ |
400 | c[1] = 0;␊ |
401 | return qt_winMB2QString( c, 2 );␊ |
402 | }␊ |
403 | if ( len < 0 )␊ |
404 | return qt_winMB2QString( chars );␊ |
405 | QCString s(chars,len+1);␊ |
406 | return qt_winMB2QString(s);␊ |
407 | }␊ |
408 | ␊ |
409 | QCString QWindowsLocalCodec::fromUnicode(const QString& uc, int& lenInOut ) const␊ |
410 | {␊ |
411 | QCString r = qt_winQString2MB( uc, lenInOut );␊ |
412 | lenInOut = r.length();␊ |
413 | return r;␊ |
414 | }␊ |
415 | ␊ |
416 | ␊ |
417 | const char* QWindowsLocalCodec::name() const␊ |
418 | {␊ |
419 | return "System";␊ |
420 | }␊ |
421 | ␊ |
422 | int QWindowsLocalCodec::mibEnum() const␊ |
423 | {␊ |
424 | return 0;␊ |
425 | }␊ |
426 | ␊ |
427 | ␊ |
428 | int QWindowsLocalCodec::heuristicContentMatch(const char* chars, int len) const␊ |
429 | {␊ |
430 | // ### Not a bad default implementation?␊ |
431 | QString t = toUnicode(chars,len);␊ |
432 | int l = t.length();␊ |
433 | QCString mb = fromUnicode(t,l);␊ |
434 | int i=0;␊ |
435 | while ( i < len )␊ |
436 | if ( chars[i] == mb[i] )␊ |
437 | i++;␊ |
438 | return i;␊ |
439 | }␊ |
440 | ␊ |
441 | #else␊ |
442 | ␊ |
443 | /* locale names mostly copied from XFree86 */␊ |
444 | static const char * const iso8859_2locales[] = {␊ |
445 | "croatian", "cs", "cs_CS", "cs_CZ","cz", "cz_CZ", "czech", "hr",␊ |
446 | "hr_HR", "hu", "hu_HU", "hungarian", "pl", "pl_PL", "polish", "ro",␊ |
447 | "ro_RO", "rumanian", "serbocroatian", "sh", "sh_SP", "sh_YU", "sk",␊ |
448 | "sk_SK", "sl", "sl_CS", "sl_SI", "slovak", "slovene", "sr_SP", 0 };␊ |
449 | ␊ |
450 | static const char * const iso8859_3locales[] = {␊ |
451 | "eo", 0 };␊ |
452 | ␊ |
453 | static const char * const iso8859_4locales[] = {␊ |
454 | "ee", "ee_EE", "lt", "lt_LT", "lv", "lv_LV", 0 };␊ |
455 | ␊ |
456 | static const char * const iso8859_5locales[] = {␊ |
457 | "bg", "bg_BG", "bulgarian", "mk", "mk_MK",␊ |
458 | "sp", "sp_YU", 0 };␊ |
459 | ␊ |
460 | static const char * const iso8859_6locales[] = {␊ |
461 | "ar_AA", "ar_SA", "arabic", 0 };␊ |
462 | ␊ |
463 | static const char * const iso8859_7locales[] = {␊ |
464 | "el", "el_GR", "greek", 0 };␊ |
465 | ␊ |
466 | static const char * const iso8859_8locales[] = {␊ |
467 | "hebrew", "he", "he_IL", "iw", "iw_IL", 0 };␊ |
468 | ␊ |
469 | static const char * const iso8859_9locales[] = {␊ |
470 | "tr", "tr_TR", "turkish", 0 };␊ |
471 | ␊ |
472 | static const char * const iso8859_15locales[] = {␊ |
473 | "fr", "fi", "french", "finnish", "et", "et_EE", 0 };␊ |
474 | ␊ |
475 | static const char * const koi8_ulocales[] = {␊ |
476 | "uk", "uk_UA", "ru_UA", "ukrainian", 0 };␊ |
477 | ␊ |
478 | static const char * const tis_620locales[] = {␊ |
479 | "th", "th_TH", "thai", 0 };␊ |
480 | ␊ |
481 | ␊ |
482 | static bool try_locale_list( const char * const locale[], const char * lang )␊ |
483 | {␊ |
484 | int i;␊ |
485 | for( i=0; locale[i] && qstrcmp(locale[i], lang); i++ )␊ |
486 | { }␊ |
487 | return locale[i] != 0;␊ |
488 | }␊ |
489 | ␊ |
490 | // For the probably_koi8_locales we have to look. the standard says␊ |
491 | // these are 8859-5, but almsot all Russion users uses KOI8-R and␊ |
492 | // incorrectly set $LANG to ru_RU. We'll check tolower() to see what␊ |
493 | // tolower() thinks ru_RU means.␊ |
494 | ␊ |
495 | // If you read the history, it seems that many Russians blame ISO and␊ |
496 | // Peristroika for the confusion.␊ |
497 | //␊ |
498 | // The real bug is that some programs break if the user specifies␊ |
499 | // ru_RU.KOI8-R.␊ |
500 | ␊ |
501 | static const char * const probably_koi8_rlocales[] = {␊ |
502 | "ru", "ru_SU", "ru_RU", "russian", 0 };␊ |
503 | ␊ |
504 | // this means ANY of these locale aliases. if they're aliases for␊ |
505 | // different locales, the code breaks.␊ |
506 | static QTextCodec * ru_RU_codec = 0;␊ |
507 | ␊ |
508 | static QTextCodec * ru_RU_hack( const char * i ) {␊ |
509 | if ( ! ru_RU_codec ) {␊ |
510 | QCString origlocale = setlocale( LC_CTYPE, i );␊ |
511 | // unicode koi8r latin5 name␊ |
512 | // 0x044E 0xC0 0xEE CYRILLIC SMALL LETTER YU␊ |
513 | // 0x042E 0xE0 0xCE CYRILLIC CAPITAL LETTER YU␊ |
514 | int latin5 = tolower( 0xCE );␊ |
515 | int koi8r = tolower( 0xE0 );␊ |
516 | if ( koi8r == 0xC0 && latin5 != 0xEE ) {␊ |
517 | ru_RU_codec = QTextCodec::codecForName( "KOI8-R" );␊ |
518 | } else if ( koi8r != 0xC0 && latin5 == 0xEE ) {␊ |
519 | ru_RU_codec = QTextCodec::codecForName( "ISO 8859-5" );␊ |
520 | } else {␊ |
521 | // something else again... let's assume... *throws dice*␊ |
522 | ru_RU_codec = QTextCodec::codecForName( "KOI8-R" );␊ |
523 | qWarning( "QTextCodec: using KOI8-R, probe failed (%02x %02x %s)",␊ |
524 | koi8r, latin5, i );␊ |
525 | }␊ |
526 | setlocale( LC_CTYPE, origlocale.data() );␊ |
527 | }␊ |
528 | return ru_RU_codec;␊ |
529 | }␊ |
530 | ␊ |
531 | #endif␊ |
532 | ␊ |
533 | static QTextCodec * localeMapper = 0;␊ |
534 | ␊ |
535 | void qt_set_locale_codec( QTextCodec *codec )␊ |
536 | {␊ |
537 | localeMapper = codec;␊ |
538 | }␊ |
539 | ␊ |
540 | /*! Returns a pointer to the codec most suitable for this locale. */␊ |
541 | ␊ |
542 | QTextCodec* QTextCodec::codecForLocale()␊ |
543 | {␊ |
544 | if ( localeMapper )␊ |
545 | return localeMapper;␊ |
546 | ␊ |
547 | setup();␊ |
548 | ␊ |
549 | #ifdef _OS_WIN32_␊ |
550 | localeMapper = new QWindowsLocalCodec;␊ |
551 | #else␊ |
552 | // Very poorly defined and followed standards causes lots of code␊ |
553 | // to try to get all the cases...␊ |
554 | ␊ |
555 | char * lang = qstrdup( getenv("LANG") );␊ |
556 | ␊ |
557 | char * p = lang ? strchr( lang, '.' ) : 0;␊ |
558 | if ( !p || *p != '.' ) {␊ |
559 | // Some versions of setlocale return encoding, others not.␊ |
560 | char *ctype = qstrdup( setlocale( LC_CTYPE, 0 ) );␊ |
561 | // Some Linux distributions have broken locales which will return␊ |
562 | // "C" for LC_CTYPE␊ |
563 | if ( qstrcmp( ctype, "C" ) == 0 ) {␊ |
564 | delete [] ctype;␊ |
565 | } else {␊ |
566 | if ( lang )␊ |
567 | delete [] lang;␊ |
568 | lang = ctype;␊ |
569 | p = lang ? strchr( lang, '.' ) : 0;␊ |
570 | }␊ |
571 | }␊ |
572 | ␊ |
573 | if( p && *p == '.' ) {␊ |
574 | // if there is an encoding and we don't know it, we return 0␊ |
575 | // User knows what they are doing. Codecs will believe them.␊ |
576 | localeMapper = codecForName( lang );␊ |
577 | if ( !localeMapper ) {␊ |
578 | // Use or codec disagree.␊ |
579 | localeMapper = codecForName( p+1 );␊ |
580 | }␊ |
581 | }␊ |
582 | if ( !localeMapper || !(p && *p == '.') ) {␊ |
583 | // if there is none, we default to 8859-1␊ |
584 | // We could perhaps default to 8859-15.␊ |
585 | if ( try_locale_list( iso8859_2locales, lang ) )␊ |
586 | localeMapper = codecForName( "ISO 8859-2" );␊ |
587 | else if ( try_locale_list( iso8859_3locales, lang ) )␊ |
588 | localeMapper = codecForName( "ISO 8859-3" );␊ |
589 | else if ( try_locale_list( iso8859_4locales, lang ) )␊ |
590 | localeMapper = codecForName( "ISO 8859-4" );␊ |
591 | else if ( try_locale_list( iso8859_5locales, lang ) )␊ |
592 | localeMapper = codecForName( "ISO 8859-5" );␊ |
593 | else if ( try_locale_list( iso8859_6locales, lang ) )␊ |
594 | localeMapper = codecForName( "ISO 8859-6-I" );␊ |
595 | else if ( try_locale_list( iso8859_7locales, lang ) )␊ |
596 | localeMapper = codecForName( "ISO 8859-7" );␊ |
597 | else if ( try_locale_list( iso8859_8locales, lang ) )␊ |
598 | localeMapper = codecForName( "ISO 8859-8-I" );␊ |
599 | else if ( try_locale_list( iso8859_9locales, lang ) )␊ |
600 | localeMapper = codecForName( "ISO 8859-9" );␊ |
601 | else if ( try_locale_list( iso8859_15locales, lang ) )␊ |
602 | localeMapper = codecForName( "ISO 8859-15" );␊ |
603 | else if ( try_locale_list( tis_620locales, lang ) )␊ |
604 | localeMapper = codecForName( "ISO 8859-11" );␊ |
605 | else if ( try_locale_list( koi8_ulocales, lang ) )␊ |
606 | localeMapper = codecForName( "KOI8-U" );␊ |
607 | else if ( try_locale_list( probably_koi8_rlocales, lang ) )␊ |
608 | localeMapper = ru_RU_hack( lang );␊ |
609 | else if (!lang || !(localeMapper = codecForName(lang) ))␊ |
610 | localeMapper = codecForName( "ISO 8859-1" );␊ |
611 | }␊ |
612 | delete[] lang;␊ |
613 | #endif␊ |
614 | ␊ |
615 | return localeMapper;␊ |
616 | }␊ |
617 | ␊ |
618 | ␊ |
619 | /*!␊ |
620 | Searches all installed QTextCodec objects, returning the one␊ |
621 | which best matches given name. Returns NULL if no codec has␊ |
622 | a match closeness above \a accuracy.␊ |
623 | ␊ |
624 | \sa heuristicNameMatch()␊ |
625 | */␊ |
626 | QTextCodec* QTextCodec::codecForName(const char* hint, int accuracy)␊ |
627 | {␊ |
628 | setup();␊ |
629 | QListIterator<QTextCodec> i(*all);␊ |
630 | QTextCodec* result = 0;␊ |
631 | int best=accuracy;␊ |
632 | for ( QTextCodec* cursor; (cursor=i); ++i ) {␊ |
633 | int s = cursor->heuristicNameMatch(hint);␊ |
634 | if ( s > best ) {␊ |
635 | best = s;␊ |
636 | result = cursor;␊ |
637 | }␊ |
638 | }␊ |
639 | return result;␊ |
640 | }␊ |
641 | ␊ |
642 | ␊ |
643 | /*!␊ |
644 | Searches all installed QTextCodec objects, returning the one␊ |
645 | which most recognizes the given content. May return 0.␊ |
646 | ␊ |
647 | Note that this is often a poor choice, since character␊ |
648 | encodings often use most of the available character sequences,␊ |
649 | and so only by linguistic analysis could a true match be made.␊ |
650 | ␊ |
651 | \sa heuristicContentMatch()␊ |
652 | */␊ |
653 | QTextCodec* QTextCodec::codecForContent(const char* chars, int len)␊ |
654 | {␊ |
655 | setup();␊ |
656 | QListIterator<QTextCodec> i(*all);␊ |
657 | QTextCodec* result = 0;␊ |
658 | int best=0;␊ |
659 | for ( QTextCodec* cursor; (cursor=i); ++i ) {␊ |
660 | int s = cursor->heuristicContentMatch(chars,len);␊ |
661 | if ( s > best ) {␊ |
662 | best = s;␊ |
663 | result = cursor;␊ |
664 | }␊ |
665 | }␊ |
666 | return result;␊ |
667 | }␊ |
668 | ␊ |
669 | ␊ |
670 | /*!␊ |
671 | \fn const char* QTextCodec::name() const␊ |
672 | Subclasses of QTextCodec must reimplement this function. It returns␊ |
673 | the name of the encoding supported by the subclass. When choosing␊ |
674 | a name for an encoding, consider these points:␊ |
675 | <ul>␊ |
676 | <li>On X11, heuristicNameMatch( const char * hint )␊ |
677 | is used to test if a the QTextCodec␊ |
678 | can convert between Unicode and the encoding of a font␊ |
679 | with encoding \e hint, such as "iso8859-1" for Latin-1 fonts,␊ |
680 | "koi8-r" for Russian KOI8 fonts.␊ |
681 | The default algorithm of heuristicNameMatch() uses name().␊ |
682 | <li>Some applications may use this function to present␊ |
683 | encodings to the end user.␊ |
684 | </ul>␊ |
685 | */␊ |
686 | ␊ |
687 | /*!␊ |
688 | \fn int QTextCodec::mibEnum() const␊ |
689 | ␊ |
690 | Subclasses of QTextCodec must reimplement this function. It returns the␊ |
691 | MIBenum (see␊ |
692 | <a href="ftp://ftp.isi.edu/in-notes/iana/assignments/character-sets">␊ |
693 | the IANA character-sets encoding file</a> for more information).␊ |
694 | It is important that each QTextCodec subclass return the correct unique␊ |
695 | value for this function.␊ |
696 | */␊ |
697 | ␊ |
698 | ␊ |
699 | /*!␊ |
700 | \fn int QTextCodec::heuristicContentMatch(const char* chars, int len) const␊ |
701 | ␊ |
702 | Subclasses of QTextCodec must reimplement this function. It examines␊ |
703 | the first \a len bytes of \a chars and returns a value indicating how␊ |
704 | likely it is that the string is a prefix of text encoded in the␊ |
705 | encoding of the subclass. Any negative return value indicates that the text␊ |
706 | is detectably not in the encoding (eg. it contains undefined characters).␊ |
707 | A return value of 0 indicates that the text should be decoded with this␊ |
708 | codec rather than as ASCII, but there␊ |
709 | is no particular evidence. The value should range up to \a len. Thus,␊ |
710 | most decoders will return -1, 0, or -\a len.␊ |
711 | ␊ |
712 | The characters are not null terminated.␊ |
713 | ␊ |
714 | \sa codecForContent().␊ |
715 | */␊ |
716 | ␊ |
717 | ␊ |
718 | /*!␊ |
719 | Creates a QTextDecoder which stores enough state to decode chunks␊ |
720 | of char* data to create chunks of Unicode data. The default implementation␊ |
721 | creates a stateless decoder, which is sufficient for only the simplest␊ |
722 | encodings where each byte corresponds to exactly one Unicode character.␊ |
723 | ␊ |
724 | The caller is responsible for deleting the returned object.␊ |
725 | */␊ |
726 | QTextDecoder* QTextCodec::makeDecoder() const␊ |
727 | {␊ |
728 | return new QTextStatelessDecoder(this);␊ |
729 | }␊ |
730 | ␊ |
731 | ␊ |
732 | /*!␊ |
733 | Creates a QTextEncoder which stores enough state to encode chunks␊ |
734 | of Unicode data as char* data. The default implementation␊ |
735 | creates a stateless encoder, which is sufficient for only the simplest␊ |
736 | encodings where each Unicode character corresponds to exactly one char.␊ |
737 | ␊ |
738 | The caller is responsible for deleting the returned object.␊ |
739 | */␊ |
740 | QTextEncoder* QTextCodec::makeEncoder() const␊ |
741 | {␊ |
742 | return new QTextStatelessEncoder(this);␊ |
743 | }␊ |
744 | ␊ |
745 | ␊ |
746 | /*!␊ |
747 | Subclasses of QTextCodec must reimplement this function or␊ |
748 | makeDecoder(). It converts the first \a len characters of \a chars␊ |
749 | to Unicode.␊ |
750 | ␊ |
751 | The default implementation makes a decoder with makeDecoder() and␊ |
752 | converts the input with that. Note that the default makeDecoder()␊ |
753 | implementation makes a decoder that simply calls␊ |
754 | this function, hence subclasses \e must reimplement one function or␊ |
755 | the other to avoid infinite recursion.␊ |
756 | */␊ |
757 | QString QTextCodec::toUnicode(const char* chars, int len) const␊ |
758 | {␊ |
759 | QTextDecoder* i = makeDecoder();␊ |
760 | QString result = i->toUnicode(chars,len);␊ |
761 | delete i;␊ |
762 | return result;␊ |
763 | }␊ |
764 | ␊ |
765 | ␊ |
766 | /*!␊ |
767 | Subclasses of QTextCodec must reimplement either this function or␊ |
768 | makeEncoder(). It converts the first \a lenInOut characters of \a␊ |
769 | uc from Unicode to the encoding of the subclass. If \a lenInOut␊ |
770 | is negative or too large, the length of \a uc is used instead.␊ |
771 | ␊ |
772 | The value returned is the property of the caller, which is␊ |
773 | responsible for deleting it with "delete []". The length of the␊ |
774 | resulting Unicode character sequence is returned in \a lenInOut.␊ |
775 | ␊ |
776 | The default implementation makes an encoder with makeEncoder() and␊ |
777 | converts the input with that. Note that the default makeEncoder()␊ |
778 | implementation makes an encoder that simply calls␊ |
779 | this function, hence subclasses \e must reimplement one function or␊ |
780 | the other to avoid infinite recursion.␊ |
781 | */␊ |
782 | ␊ |
783 | QCString QTextCodec::fromUnicode(const QString& uc, int& lenInOut) const␊ |
784 | {␊ |
785 | QTextEncoder* i = makeEncoder();␊ |
786 | QCString result = i->fromUnicode(uc, lenInOut);␊ |
787 | delete i;␊ |
788 | return result;␊ |
789 | }␊ |
790 | ␊ |
791 | /*!␊ |
792 | \overload QCString QTextCodec::fromUnicode(const QString& uc) const␊ |
793 | */␊ |
794 | QCString QTextCodec::fromUnicode(const QString& uc) const␊ |
795 | {␊ |
796 | int l = uc.length();␊ |
797 | return fromUnicode(uc,l);␊ |
798 | }␊ |
799 | ␊ |
800 | /*!␊ |
801 | \overload QString QTextCodec::toUnicode(const QByteArray& a, int len) const␊ |
802 | */␊ |
803 | QString QTextCodec::toUnicode(const QByteArray& a, int len) const␊ |
804 | {␊ |
805 | int l = a.size();␊ |
806 | if( l > 0 && a.data()[l - 1] == '\0' ) l--;␊ |
807 | l = QMIN( l, len );␊ |
808 | return toUnicode( a.data(), l );␊ |
809 | }␊ |
810 | ␊ |
811 | /*!␊ |
812 | \overload QString QTextCodec::toUnicode(const QByteArray& a) const␊ |
813 | */␊ |
814 | QString QTextCodec::toUnicode(const QByteArray& a) const␊ |
815 | {␊ |
816 | int l = a.size();␊ |
817 | if( l > 0 && a.data()[l - 1] == '\0' ) l--;␊ |
818 | return toUnicode( a.data(), l );␊ |
819 | }␊ |
820 | ␊ |
821 | /*!␊ |
822 | \overload QString QTextCodec::toUnicode(const char* chars) const␊ |
823 | */␊ |
824 | QString QTextCodec::toUnicode(const char* chars) const␊ |
825 | {␊ |
826 | return toUnicode(chars,qstrlen(chars));␊ |
827 | }␊ |
828 | ␊ |
829 | /*!␊ |
830 | Returns TRUE if the unicode character \a ch can be fully encoded␊ |
831 | with this codec. The default implementation tests if the result of␊ |
832 | toUnicode(fromUnicode(ch)) is the original \a ch. Subclasses may be␊ |
833 | able to improve the efficiency.␊ |
834 | */␊ |
835 | bool QTextCodec::canEncode( QChar ch ) const␊ |
836 | {␊ |
837 | return toUnicode(fromUnicode(ch)) == ch;␊ |
838 | }␊ |
839 | ␊ |
840 | /*!␊ |
841 | Returns TRUE if the unicode string \a s can be fully encoded␊ |
842 | with this codec. The default implementation tests if the result of␊ |
843 | toUnicode(fromUnicode(s)) is the original \a s. Subclasses may be␊ |
844 | able to improve the efficiency.␊ |
845 | */␊ |
846 | bool QTextCodec::canEncode( const QString& s ) const␊ |
847 | {␊ |
848 | return toUnicode(fromUnicode(s)) == s;␊ |
849 | }␊ |
850 | ␊ |
851 | ␊ |
852 | ␊ |
853 | /*!␊ |
854 | \class QTextEncoder qtextcodec.h␊ |
855 | \brief State-based encoder␊ |
856 | ␊ |
857 | A QTextEncoder converts Unicode into another format, remembering␊ |
858 | any state that is required between calls.␊ |
859 | ␊ |
860 | \sa QTextCodec::makeEncoder()␊ |
861 | */␊ |
862 | ␊ |
863 | /*!␊ |
864 | Destructs the encoder.␊ |
865 | */␊ |
866 | QTextEncoder::~QTextEncoder()␊ |
867 | {␊ |
868 | }␊ |
869 | /*!␊ |
870 | \fn QCString QTextEncoder::fromUnicode(const QString& uc, int& lenInOut)␊ |
871 | ␊ |
872 | Converts \a lenInOut characters (not bytes) from \a uc, producing␊ |
873 | a QCString. \a lenInOut will also be set to the␊ |
874 | \link QCString::length() length\endlink of the result (in bytes).␊ |
875 | ␊ |
876 | The encoder is free to record state to use when subsequent calls are␊ |
877 | made to this function (for example, it might change modes with escape␊ |
878 | sequences if needed during the encoding of one string, then assume that␊ |
879 | mode applies when a subsequent call begins).␊ |
880 | */␊ |
881 | ␊ |
882 | /*!␊ |
883 | \class QTextDecoder qtextcodec.h␊ |
884 | \brief State-based decoder␊ |
885 | ␊ |
886 | A QTextEncoder converts a text format into Unicode, remembering␊ |
887 | any state that is required between calls.␊ |
888 | ␊ |
889 | \sa QTextCodec::makeEncoder()␊ |
890 | */␊ |
891 | ␊ |
892 | ␊ |
893 | /*!␊ |
894 | Destructs the decoder.␊ |
895 | */␊ |
896 | QTextDecoder::~QTextDecoder()␊ |
897 | {␊ |
898 | }␊ |
899 | ␊ |
900 | /*!␊ |
901 | \fn QString QTextDecoder::toUnicode(const char* chars, int len)␊ |
902 | ␊ |
903 | Converts the first \a len bytes at \a chars to Unicode, returning the␊ |
904 | result.␊ |
905 | ␊ |
906 | If not all characters are used (eg. only part of a multi-byte␊ |
907 | encoding is at the end of the characters), the decoder remembers␊ |
908 | enough state to continue with the next call to this function.␊ |
909 | */␊ |
910 | ␊ |
911 | #define CHAINED 0xffff␊ |
912 | ␊ |
913 | struct QMultiByteUnicodeTable {␊ |
914 | // If multibyte, ignore unicode and index into multibyte␊ |
915 | // with the next character.␊ |
916 | QMultiByteUnicodeTable() : unicode(0xfffd), multibyte(0) { }␊ |
917 | ␊ |
918 | ~QMultiByteUnicodeTable()␊ |
919 | {␊ |
920 | if ( multibyte )␊ |
921 | delete [] multibyte;␊ |
922 | }␊ |
923 | ␊ |
924 | ushort unicode;␊ |
925 | QMultiByteUnicodeTable* multibyte;␊ |
926 | };␊ |
927 | ␊ |
928 | #ifndef QT_NO_CODECS␊ |
929 | static int getByte(char* &cursor)␊ |
930 | {␊ |
931 | int byte = 0;␊ |
932 | if ( *cursor ) {␊ |
933 | if ( cursor[1] == 'x' )␊ |
934 | byte = strtol(cursor+2,&cursor,16);␊ |
935 | else if ( cursor[1] == 'd' )␊ |
936 | byte = strtol(cursor+2,&cursor,10);␊ |
937 | else␊ |
938 | byte = strtol(cursor+2,&cursor,8);␊ |
939 | }␊ |
940 | return byte&0xff;␊ |
941 | }␊ |
942 | ␊ |
943 | class QTextCodecFromIOD;␊ |
944 | ␊ |
945 | class QTextCodecFromIODDecoder : public QTextDecoder {␊ |
946 | const QTextCodecFromIOD* codec;␊ |
947 | QMultiByteUnicodeTable* mb;␊ |
948 | public:␊ |
949 | QTextCodecFromIODDecoder(const QTextCodecFromIOD* c);␊ |
950 | QString toUnicode(const char* chars, int len);␊ |
951 | };␊ |
952 | ␊ |
953 | class QTextCodecFromIOD : public QTextCodec {␊ |
954 | friend class QTextCodecFromIODDecoder;␊ |
955 | ␊ |
956 | QCString n;␊ |
957 | ␊ |
958 | // If from_unicode_page[row()][cell()] is 0 and from_unicode_page_multibyte,␊ |
959 | // use from_unicode_page_multibyte[row()][cell()] as string.␊ |
960 | char** from_unicode_page;␊ |
961 | char*** from_unicode_page_multibyte;␊ |
962 | char unkn;␊ |
963 | ␊ |
964 | // Only one of these is used␊ |
965 | ushort* to_unicode;␊ |
966 | QMultiByteUnicodeTable* to_unicode_multibyte;␊ |
967 | int max_bytes_per_char;␊ |
968 | QStrList aliases;␊ |
969 | ␊ |
970 | bool stateless() const { return !to_unicode_multibyte; }␊ |
971 | ␊ |
972 | public:␊ |
973 | QTextCodecFromIOD(QIODevice* iod)␊ |
974 | {␊ |
975 | from_unicode_page = 0;␊ |
976 | to_unicode_multibyte = 0;␊ |
977 | to_unicode = 0;␊ |
978 | from_unicode_page_multibyte = 0;␊ |
979 | max_bytes_per_char = 1;␊ |
980 | ␊ |
981 | const int maxlen=100;␊ |
982 | char line[maxlen];␊ |
983 | char esc='\\';␊ |
984 | char comm='%';␊ |
985 | bool incmap = FALSE;␊ |
986 | while (iod->readLine(line,maxlen) > 0) {␊ |
987 | if (0==qstrnicmp(line,"<code_set_name>",15))␊ |
988 | n = line+15;␊ |
989 | else if (0==qstrnicmp(line,"<escape_char> ",14))␊ |
990 | esc = line[14];␊ |
991 | else if (0==qstrnicmp(line,"<comment_char> ",15))␊ |
992 | comm = line[15];␊ |
993 | else if (line[0]==comm && 0==qstrnicmp(line+1," alias ",7)) {␊ |
994 | aliases.append(line+8);␊ |
995 | } else if (0==qstrnicmp(line,"CHARMAP",7)) {␊ |
996 | if (!from_unicode_page) {␊ |
997 | from_unicode_page = new char*[256];␊ |
998 | for (int i=0; i<256; i++)␊ |
999 | from_unicode_page[i]=0;␊ |
1000 | }␊ |
1001 | if (!to_unicode) {␊ |
1002 | to_unicode = new ushort[256];␊ |
1003 | }␊ |
1004 | incmap = TRUE;␊ |
1005 | } else if (0==qstrnicmp(line,"END CHARMAP",11))␊ |
1006 | break;␊ |
1007 | else if (incmap) {␊ |
1008 | char* cursor = line;␊ |
1009 | int byte=0,unicode=-1;␊ |
1010 | ushort* mb_unicode=0;␊ |
1011 | const int maxmb=8; // more -> we'll need to improve datastructures␊ |
1012 | char mb[maxmb+1];␊ |
1013 | int nmb=0;␊ |
1014 | ␊ |
1015 | while (*cursor) {␊ |
1016 | if (cursor[0]=='<' && cursor[1]=='U' &&␊ |
1017 | cursor[2]>='0' && cursor[2]<='9' &&␊ |
1018 | cursor[3]>='0' && cursor[3]<='9') {␊ |
1019 | ␊ |
1020 | unicode = strtol(cursor+2,&cursor,16);␊ |
1021 | ␊ |
1022 | } else if (*cursor==esc) {␊ |
1023 | ␊ |
1024 | byte = getByte(cursor);␊ |
1025 | ␊ |
1026 | if ( *cursor == esc ) {␊ |
1027 | if ( !to_unicode_multibyte ) {␊ |
1028 | to_unicode_multibyte =␊ |
1029 | new QMultiByteUnicodeTable[256];␊ |
1030 | for (int i=0; i<256; i++) {␊ |
1031 | to_unicode_multibyte[i].unicode =␊ |
1032 | to_unicode[i];␊ |
1033 | to_unicode_multibyte[i].multibyte = 0;␊ |
1034 | }␊ |
1035 | delete [] to_unicode;␊ |
1036 | to_unicode = 0;␊ |
1037 | }␊ |
1038 | QMultiByteUnicodeTable* mbut =␊ |
1039 | to_unicode_multibyte+byte;␊ |
1040 | mb[nmb++] = byte;␊ |
1041 | while ( nmb < maxmb && *cursor == esc ) {␊ |
1042 | // Always at least once␊ |
1043 | ␊ |
1044 | mbut->unicode = CHAINED;␊ |
1045 | byte = getByte(cursor);␊ |
1046 | mb[nmb++] = byte;␊ |
1047 | if (!mbut->multibyte) {␊ |
1048 | mbut->multibyte =␊ |
1049 | new QMultiByteUnicodeTable[256];␊ |
1050 | }␊ |
1051 | mbut = mbut->multibyte+byte;␊ |
1052 | mb_unicode = & mbut->unicode;␊ |
1053 | }␊ |
1054 | ␊ |
1055 | if ( nmb > max_bytes_per_char )␊ |
1056 | max_bytes_per_char = nmb;␊ |
1057 | }␊ |
1058 | } else {␊ |
1059 | cursor++;␊ |
1060 | }␊ |
1061 | }␊ |
1062 | ␊ |
1063 | if (unicode >= 0 && unicode <= 0xffff)␊ |
1064 | {␊ |
1065 | QChar ch((ushort)unicode);␊ |
1066 | if (!from_unicode_page[ch.row()]) {␊ |
1067 | from_unicode_page[ch.row()] = new char[256];␊ |
1068 | for (int i=0; i<256; i++)␊ |
1069 | from_unicode_page[ch.row()][i]=0;␊ |
1070 | }␊ |
1071 | if ( mb_unicode ) {␊ |
1072 | from_unicode_page[ch.row()][ch.cell()] = 0;␊ |
1073 | if (!from_unicode_page_multibyte) {␊ |
1074 | from_unicode_page_multibyte = new char**[256];␊ |
1075 | for (int i=0; i<256; i++)␊ |
1076 | from_unicode_page_multibyte[i]=0;␊ |
1077 | }␊ |
1078 | if (!from_unicode_page_multibyte[ch.row()]) {␊ |
1079 | from_unicode_page_multibyte[ch.row()] = new char*[256];␊ |
1080 | for (int i=0; i<256; i++)␊ |
1081 | from_unicode_page_multibyte[ch.row()][i] = 0;␊ |
1082 | }␊ |
1083 | mb[nmb++] = 0;␊ |
1084 | from_unicode_page_multibyte[ch.row()][ch.cell()]␊ |
1085 | = qstrdup(mb);␊ |
1086 | *mb_unicode = unicode;␊ |
1087 | } else {␊ |
1088 | from_unicode_page[ch.row()][ch.cell()] = (char)byte;␊ |
1089 | if ( to_unicode )␊ |
1090 | to_unicode[byte] = unicode;␊ |
1091 | else␊ |
1092 | to_unicode_multibyte[byte].unicode = unicode;␊ |
1093 | }␊ |
1094 | } else {␊ |
1095 | }␊ |
1096 | }␊ |
1097 | }␊ |
1098 | n = n.stripWhiteSpace();␊ |
1099 | ␊ |
1100 | unkn = '?'; // ##### Might be a bad choice.␊ |
1101 | }␊ |
1102 | ␊ |
1103 | ~QTextCodecFromIOD()␊ |
1104 | {␊ |
1105 | if ( from_unicode_page ) {␊ |
1106 | for (int i=0; i<256; i++)␊ |
1107 | if (from_unicode_page[i])␊ |
1108 | delete [] from_unicode_page[i];␊ |
1109 | }␊ |
1110 | if ( from_unicode_page_multibyte ) {␊ |
1111 | for (int i=0; i<256; i++)␊ |
1112 | if (from_unicode_page_multibyte[i])␊ |
1113 | for (int j=0; j<256; j++)␊ |
1114 | if (from_unicode_page_multibyte[i][j])␊ |
1115 | delete [] from_unicode_page_multibyte[i][j];␊ |
1116 | }␊ |
1117 | if ( to_unicode )␊ |
1118 | delete [] to_unicode;␊ |
1119 | if ( to_unicode_multibyte )␊ |
1120 | delete [] to_unicode_multibyte;␊ |
1121 | }␊ |
1122 | ␊ |
1123 | bool ok() const␊ |
1124 | {␊ |
1125 | return !!from_unicode_page;␊ |
1126 | }␊ |
1127 | ␊ |
1128 | QTextDecoder* makeDecoder() const␊ |
1129 | {␊ |
1130 | if ( stateless() )␊ |
1131 | return QTextCodec::makeDecoder();␊ |
1132 | else␊ |
1133 | return new QTextCodecFromIODDecoder(this);␊ |
1134 | }␊ |
1135 | ␊ |
1136 | const char* name() const␊ |
1137 | {␊ |
1138 | return n;␊ |
1139 | }␊ |
1140 | ␊ |
1141 | int mibEnum() const␊ |
1142 | {␊ |
1143 | return 0; // #### Unknown.␊ |
1144 | }␊ |
1145 | ␊ |
1146 | int heuristicContentMatch(const char*, int) const␊ |
1147 | {␊ |
1148 | return 0;␊ |
1149 | }␊ |
1150 | ␊ |
1151 | int heuristicNameMatch(const char* hint) const␊ |
1152 | {␊ |
1153 | int bestr = QTextCodec::heuristicNameMatch(hint);␊ |
1154 | QStrListIterator it(aliases);␊ |
1155 | char* a;␊ |
1156 | while ((a=it.current())) {␊ |
1157 | ++it;␊ |
1158 | int r = simpleHeuristicNameMatch(a,hint);␊ |
1159 | if (r > bestr)␊ |
1160 | bestr = r;␊ |
1161 | }␊ |
1162 | return bestr;␊ |
1163 | }␊ |
1164 | ␊ |
1165 | QString toUnicode(const char* chars, int len) const␊ |
1166 | {␊ |
1167 | const uchar* uchars = (const uchar*)chars;␊ |
1168 | QString result;␊ |
1169 | QMultiByteUnicodeTable* multibyte=to_unicode_multibyte;␊ |
1170 | if ( multibyte ) {␊ |
1171 | while (len--) {␊ |
1172 | QMultiByteUnicodeTable& mb = multibyte[*uchars];␊ |
1173 | if ( mb.multibyte ) {␊ |
1174 | // Chained multi-byte␊ |
1175 | multibyte = mb.multibyte;␊ |
1176 | } else {␊ |
1177 | result += QChar(mb.unicode);␊ |
1178 | multibyte=to_unicode_multibyte;␊ |
1179 | }␊ |
1180 | uchars++;␊ |
1181 | }␊ |
1182 | } else {␊ |
1183 | while (len--)␊ |
1184 | result += QChar(to_unicode[*uchars++]);␊ |
1185 | }␊ |
1186 | return result;␊ |
1187 | }␊ |
1188 | ␊ |
1189 | QCString fromUnicode(const QString& uc, int& lenInOut) const␊ |
1190 | {␊ |
1191 | if (lenInOut > (int)uc.length())␊ |
1192 | lenInOut = uc.length();␊ |
1193 | int rlen = lenInOut*max_bytes_per_char;␊ |
1194 | QCString rstr(rlen);␊ |
1195 | char* cursor = rstr.data();␊ |
1196 | char* s=0;␊ |
1197 | int l = lenInOut;␊ |
1198 | int lout = 0;␊ |
1199 | for (int i=0; i<l; i++) {␊ |
1200 | QChar ch = uc[i];␊ |
1201 | if ( ch == QChar::null ) {␊ |
1202 | // special␊ |
1203 | *cursor++ = 0;␊ |
1204 | } else if ( from_unicode_page[ch.row()] &&␊ |
1205 | from_unicode_page[ch.row()][ch.cell()] )␊ |
1206 | {␊ |
1207 | *cursor++ = from_unicode_page[ch.row()][ch.cell()];␊ |
1208 | lout++;␊ |
1209 | } else if ( from_unicode_page_multibyte &&␊ |
1210 | from_unicode_page_multibyte[ch.row()] &&␊ |
1211 | (s=from_unicode_page_multibyte[ch.row()][ch.cell()]) )␊ |
1212 | {␊ |
1213 | while (*s) {␊ |
1214 | *cursor++ = *s++;␊ |
1215 | lout++;␊ |
1216 | }␊ |
1217 | } else {␊ |
1218 | *cursor++ = unkn;␊ |
1219 | lout++;␊ |
1220 | }␊ |
1221 | }␊ |
1222 | *cursor = 0;␊ |
1223 | lenInOut = lout;␊ |
1224 | return rstr;␊ |
1225 | }␊ |
1226 | };␊ |
1227 | ␊ |
1228 | QTextCodecFromIODDecoder::QTextCodecFromIODDecoder(const QTextCodecFromIOD* c) :␊ |
1229 | codec(c)␊ |
1230 | {␊ |
1231 | mb = codec->to_unicode_multibyte;␊ |
1232 | }␊ |
1233 | ␊ |
1234 | QString QTextCodecFromIODDecoder::toUnicode(const char* chars, int len)␊ |
1235 | {␊ |
1236 | const uchar* uchars = (const uchar*)chars;␊ |
1237 | QString result;␊ |
1238 | while (len--) {␊ |
1239 | QMultiByteUnicodeTable& t = mb[*uchars];␊ |
1240 | if ( t.multibyte ) {␊ |
1241 | // Chained multi-byte␊ |
1242 | mb = t.multibyte;␊ |
1243 | } else {␊ |
1244 | if ( t.unicode )␊ |
1245 | result += QChar(t.unicode);␊ |
1246 | mb=codec->to_unicode_multibyte;␊ |
1247 | }␊ |
1248 | uchars++;␊ |
1249 | }␊ |
1250 | return result;␊ |
1251 | }␊ |
1252 | ␊ |
1253 | /*!␊ |
1254 | Reads a POSIX2 charmap definition from \a iod.␊ |
1255 | The parser recognizes the following lines:␊ |
1256 | <pre>␊ |
1257 | <code_set_name> <i>name</i>␊ |
1258 | <escape_char> <i>character</i>␊ |
1259 | % alias <i>alias</i>␊ |
1260 | CHARMAP␊ |
1261 | <<i>token</i>> /x<i>hexbyte</i> <U<i>unicode</i>> ...␊ |
1262 | <<i>token</i>> /d<i>decbyte</i> <U<i>unicode</i>> ...␊ |
1263 | <<i>token</i>> /<i>octbyte</i> <U<i>unicode</i>> ...␊ |
1264 | <<i>token</i>> /<i>any</i>/<i>any</i>... <U<i>unicode</i>> ...␊ |
1265 | END CHARMAP␊ |
1266 | </pre>␊ |
1267 | ␊ |
1268 | The resulting QTextCodec is returned (and also added to the␊ |
1269 | global list of codecs). The name() of the result is taken␊ |
1270 | from the code_set_name.␊ |
1271 | ␊ |
1272 | Note that a codec constructed in this way uses much more memory␊ |
1273 | and is slower than a hand-written QTextCodec subclass, since␊ |
1274 | tables in code are in memory shared by all applications simultaneously␊ |
1275 | using Qt.␊ |
1276 | ␊ |
1277 | \sa loadCharmapFile()␊ |
1278 | */␊ |
1279 | QTextCodec* QTextCodec::loadCharmap(QIODevice* iod)␊ |
1280 | {␊ |
1281 | QTextCodecFromIOD* r = new QTextCodecFromIOD(iod);␊ |
1282 | if ( !r->ok() ) {␊ |
1283 | delete r;␊ |
1284 | r = 0;␊ |
1285 | }␊ |
1286 | return r;␊ |
1287 | }␊ |
1288 | ␊ |
1289 | /*!␊ |
1290 | A convenience function for loadCharmap().␊ |
1291 | */␊ |
1292 | QTextCodec* QTextCodec::loadCharmapFile(QString filename)␊ |
1293 | {␊ |
1294 | QFile f(filename);␊ |
1295 | if (f.open(IO_ReadOnly)) {␊ |
1296 | QTextCodecFromIOD* r = new QTextCodecFromIOD(&f);␊ |
1297 | if ( !r->ok() )␊ |
1298 | delete r;␊ |
1299 | else␊ |
1300 | return r;␊ |
1301 | }␊ |
1302 | return 0;␊ |
1303 | }␊ |
1304 | #endif //QT_NO_CODECS␊ |
1305 | ␊ |
1306 | ␊ |
1307 | /*!␊ |
1308 | Returns a string representing the current language.␊ |
1309 | */␊ |
1310 | ␊ |
1311 | const char* QTextCodec::locale()␊ |
1312 | {␊ |
1313 | static QCString lang;␊ |
1314 | if ( lang.isEmpty() ) {␊ |
1315 | lang = getenv( "LANG" ); //########Windows??␊ |
1316 | if ( lang.isEmpty() )␊ |
1317 | lang = "C";␊ |
1318 | }␊ |
1319 | return lang;␊ |
1320 | }␊ |
1321 | ␊ |
1322 | ␊ |
1323 | ␊ |
1324 | #ifndef QT_NO_CODECS␊ |
1325 | ␊ |
1326 | class QSimpleTextCodec: public QTextCodec␊ |
1327 | {␊ |
1328 | public:␊ |
1329 | QSimpleTextCodec( int );␊ |
1330 | ~QSimpleTextCodec();␊ |
1331 | ␊ |
1332 | QString toUnicode(const char* chars, int len) const;␊ |
1333 | QCString fromUnicode(const QString& uc, int& lenInOut ) const;␊ |
1334 | ␊ |
1335 | const char* name() const;␊ |
1336 | int mibEnum() const;␊ |
1337 | ␊ |
1338 | int heuristicContentMatch(const char* chars, int len) const;␊ |
1339 | ␊ |
1340 | int heuristicNameMatch(const char* hint) const;␊ |
1341 | ␊ |
1342 | private:␊ |
1343 | int forwardIndex;␊ |
1344 | };␊ |
1345 | ␊ |
1346 | ␊ |
1347 | #define LAST_MIB 2259␊ |
1348 | ␊ |
1349 | static struct {␊ |
1350 | const char * cs;␊ |
1351 | int mib;␊ |
1352 | Q_UINT16 values[128];␊ |
1353 | } unicodevalues[] = {␊ |
1354 | // from RFC 1489, ftp://ftp.isi.edu/in-notes/rfc1489.txt␊ |
1355 | { "KOI8-R", 2084,␊ |
1356 | { 0x2500, 0x2502, 0x250C, 0x2510, 0x2514, 0x2518, 0x251C, 0x2524,␊ |
1357 | 0x252C, 0x2534, 0x253C, 0x2580, 0x2584, 0x2588, 0x258C, 0x2590,␊ |
1358 | 0x2591, 0x2592, 0x2593, 0x2320, 0x25A0, 0x2219/**/, 0x221A, 0x2248,␊ |
1359 | 0x2264, 0x2265, 0x00A0, 0x2321, 0x00B0, 0x00B2, 0x00B7, 0x00F7,␊ |
1360 | 0x2550, 0x2551, 0x2552, 0x0451, 0x2553, 0x2554, 0x2555, 0x2556,␊ |
1361 | 0x2557, 0x2558, 0x2559, 0x255A, 0x255B, 0x255C, 0x255D, 0x255E,␊ |
1362 | 0x255F, 0x2560, 0x2561, 0x0401, 0x2562, 0x2563, 0x2564, 0x2565,␊ |
1363 | 0x2566, 0x2567, 0x2568, 0x2569, 0x256A, 0x256B, 0x256C, 0x00A9,␊ |
1364 | 0x044E, 0x0430, 0x0431, 0x0446, 0x0434, 0x0435, 0x0444, 0x0433,␊ |
1365 | 0x0445, 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E,␊ |
1366 | 0x043F, 0x044F, 0x0440, 0x0441, 0x0442, 0x0443, 0x0436, 0x0432,␊ |
1367 | 0x044C, 0x044B, 0x0437, 0x0448, 0x044D, 0x0449, 0x0447, 0x044A,␊ |
1368 | 0x042E, 0x0410, 0x0411, 0x0426, 0x0414, 0x0415, 0x0424, 0x0413,␊ |
1369 | 0x0425, 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E,␊ |
1370 | 0x041F, 0x042F, 0x0420, 0x0421, 0x0422, 0x0423, 0x0416, 0x0412,␊ |
1371 | 0x042C, 0x042B, 0x0417, 0x0428, 0x042D, 0x0429, 0x0427, 0x042A } },␊ |
1372 | // /**/ - The BULLET OPERATOR is confused. Some people think␊ |
1373 | // it should be 0x2022 (BULLET).␊ |
1374 | ␊ |
1375 | // from RFC 2319, ftp://ftp.isi.edu/in-notes/rfc2319.txt␊ |
1376 | { "KOI8-U", 2088,␊ |
1377 | { 0x2500, 0x2502, 0x250C, 0x2510, 0x2514, 0x2518, 0x251C, 0x2524,␊ |
1378 | ␉0x252C, 0x2534, 0x253C, 0x2580, 0x2584, 0x2588, 0x258C, 0x2590,␊ |
1379 | ␉0x2591, 0x2592, 0x2593, 0x2320, 0x25A0, 0x2219, 0x221A, 0x2248,␊ |
1380 | ␉0x2264, 0x2265, 0x00A0, 0x2321, 0x00B0, 0x00B2, 0x00B7, 0x00F7,␊ |
1381 | ␉0x2550, 0x2551, 0x2552, 0x0451, 0x0454, 0x2554, 0x0456, 0x0457,␊ |
1382 | ␉0x2557, 0x2558, 0x2559, 0x255A, 0x255B, 0x0491, 0x255D, 0x255E,␊ |
1383 | ␉0x255F, 0x2560, 0x2561, 0x0401, 0x0404, 0x2563, 0x0406, 0x0407,␊ |
1384 | ␉0x2566, 0x2567, 0x2568, 0x2569, 0x256A, 0x0490, 0x256C, 0x00A9,␊ |
1385 | ␉0x044E, 0x0430, 0x0431, 0x0446, 0x0434, 0x0435, 0x0444, 0x0433,␊ |
1386 | ␉0x0445, 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E,␊ |
1387 | ␉0x043F, 0x044F, 0x0440, 0x0441, 0x0442, 0x0443, 0x0436, 0x0432,␊ |
1388 | ␉0x044C, 0x044B, 0x0437, 0x0448, 0x044D, 0x0449, 0x0447, 0x044A,␊ |
1389 | ␉0x042E, 0x0410, 0x0411, 0x0426, 0x0414, 0x0415, 0x0424, 0x0413,␊ |
1390 | ␉0x0425, 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E,␊ |
1391 | ␉0x041F, 0x042F, 0x0420, 0x0421, 0x0422, 0x0423, 0x0416, 0x0412,␊ |
1392 | ␉0x042C, 0x042B, 0x0417, 0x0428, 0x042D, 0x0429, 0x0427, 0x042A } },␊ |
1393 | ␊ |
1394 | // next bits generated from tables on the Unicode 2.0 CD. we can␊ |
1395 | // use these tables since this is part of the transition to using␊ |
1396 | // unicode everywhere in qt.␊ |
1397 | ␊ |
1398 | // $ for A in 8 9 A B C D E F ; do for B in 0 1 2 3 4 5 6 7 8 9 A B C D E F ; do echo 0x${A}${B} 0xFFFD ; done ; done > /tmp/digits ; for a in 8859-* ; do ( awk '/^0x[89ABCDEF]/{ print $1, $2 }' < $a ; cat /tmp/digits ) | sort | uniq -w4 | cut -c6- | paste '-d ' - - - - - - - - | sed -e 's/ /, /g' -e 's/$/,/' -e '$ s/,$/} },/' -e '1 s/^/{ /' > ~/tmp/$a ; done␊ |
1399 | ␊ |
1400 | // then I inserted the files manually.␊ |
1401 | { "ISO 8859-2", 5,␊ |
1402 | { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,␊ |
1403 | 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,␊ |
1404 | 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,␊ |
1405 | 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,␊ |
1406 | 0x00A0, 0x0104, 0x02D8, 0x0141, 0x00A4, 0x013D, 0x015A, 0x00A7,␊ |
1407 | 0x00A8, 0x0160, 0x015E, 0x0164, 0x0179, 0x00AD, 0x017D, 0x017B,␊ |
1408 | 0x00B0, 0x0105, 0x02DB, 0x0142, 0x00B4, 0x013E, 0x015B, 0x02C7,␊ |
1409 | 0x00B8, 0x0161, 0x015F, 0x0165, 0x017A, 0x02DD, 0x017E, 0x017C,␊ |
1410 | 0x0154, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x0139, 0x0106, 0x00C7,␊ |
1411 | 0x010C, 0x00C9, 0x0118, 0x00CB, 0x011A, 0x00CD, 0x00CE, 0x010E,␊ |
1412 | 0x0110, 0x0143, 0x0147, 0x00D3, 0x00D4, 0x0150, 0x00D6, 0x00D7,␊ |
1413 | 0x0158, 0x016E, 0x00DA, 0x0170, 0x00DC, 0x00DD, 0x0162, 0x00DF,␊ |
1414 | 0x0155, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x013A, 0x0107, 0x00E7,␊ |
1415 | 0x010D, 0x00E9, 0x0119, 0x00EB, 0x011B, 0x00ED, 0x00EE, 0x010F,␊ |
1416 | 0x0111, 0x0144, 0x0148, 0x00F3, 0x00F4, 0x0151, 0x00F6, 0x00F7,␊ |
1417 | 0x0159, 0x016F, 0x00FA, 0x0171, 0x00FC, 0x00FD, 0x0163, 0x02D9} },␊ |
1418 | { "ISO 8859-3", 6,␊ |
1419 | { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,␊ |
1420 | 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,␊ |
1421 | 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,␊ |
1422 | 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,␊ |
1423 | 0x00A0, 0x0126, 0x02D8, 0x00A3, 0x00A4, 0xFFFD, 0x0124, 0x00A7,␊ |
1424 | 0x00A8, 0x0130, 0x015E, 0x011E, 0x0134, 0x00AD, 0xFFFD, 0x017B,␊ |
1425 | 0x00B0, 0x0127, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x0125, 0x00B7,␊ |
1426 | 0x00B8, 0x0131, 0x015F, 0x011F, 0x0135, 0x00BD, 0xFFFD, 0x017C,␊ |
1427 | 0x00C0, 0x00C1, 0x00C2, 0xFFFD, 0x00C4, 0x010A, 0x0108, 0x00C7,␊ |
1428 | 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,␊ |
1429 | 0xFFFD, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x0120, 0x00D6, 0x00D7,␊ |
1430 | 0x011C, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x016C, 0x015C, 0x00DF,␊ |
1431 | 0x00E0, 0x00E1, 0x00E2, 0xFFFD, 0x00E4, 0x010B, 0x0109, 0x00E7,␊ |
1432 | 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,␊ |
1433 | 0xFFFD, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x0121, 0x00F6, 0x00F7,␊ |
1434 | 0x011D, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x016D, 0x015D, 0x02D9} },␊ |
1435 | { "ISO 8859-4", 7,␊ |
1436 | { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,␊ |
1437 | 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,␊ |
1438 | 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,␊ |
1439 | 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,␊ |
1440 | 0x00A0, 0x0104, 0x0138, 0x0156, 0x00A4, 0x0128, 0x013B, 0x00A7,␊ |
1441 | 0x00A8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00AD, 0x017D, 0x00AF,␊ |
1442 | 0x00B0, 0x0105, 0x02DB, 0x0157, 0x00B4, 0x0129, 0x013C, 0x02C7,␊ |
1443 | 0x00B8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014A, 0x017E, 0x014B,␊ |
1444 | 0x0100, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x012E,␊ |
1445 | 0x010C, 0x00C9, 0x0118, 0x00CB, 0x0116, 0x00CD, 0x00CE, 0x012A,␊ |
1446 | 0x0110, 0x0145, 0x014C, 0x0136, 0x00D4, 0x00D5, 0x00D6, 0x00D7,␊ |
1447 | 0x00D8, 0x0172, 0x00DA, 0x00DB, 0x00DC, 0x0168, 0x016A, 0x00DF,␊ |
1448 | 0x0101, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x012F,␊ |
1449 | 0x010D, 0x00E9, 0x0119, 0x00EB, 0x0117, 0x00ED, 0x00EE, 0x012B,␊ |
1450 | 0x0111, 0x0146, 0x014D, 0x0137, 0x00F4, 0x00F5, 0x00F6, 0x00F7,␊ |
1451 | 0x00F8, 0x0173, 0x00FA, 0x00FB, 0x00FC, 0x0169, 0x016B, 0x02D9} },␊ |
1452 | { "ISO 8859-5", 8,␊ |
1453 | { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,␊ |
1454 | 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,␊ |
1455 | 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,␊ |
1456 | 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,␊ |
1457 | 0x00A0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,␊ |
1458 | 0x0408, 0x0409, 0x040A, 0x040B, 0x040C, 0x00AD, 0x040E, 0x040F,␊ |
1459 | 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,␊ |
1460 | 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, 0x041F,␊ |
1461 | 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,␊ |
1462 | 0x0428, 0x0429, 0x042A, 0x042B, 0x042C, 0x042D, 0x042E, 0x042F,␊ |
1463 | 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,␊ |
1464 | 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, 0x043F,␊ |
1465 | 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,␊ |
1466 | 0x0448, 0x0449, 0x044A, 0x044B, 0x044C, 0x044D, 0x044E, 0x044F,␊ |
1467 | 0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,␊ |
1468 | 0x0458, 0x0459, 0x045A, 0x045B, 0x045C, 0x00A7, 0x045E, 0x045F} },␊ |
1469 | { "ISO 8859-6-I", 82,␊ |
1470 | { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,␊ |
1471 | 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,␊ |
1472 | 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,␊ |
1473 | 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,␊ |
1474 | 0x00A0, 0xFFFD, 0xFFFD, 0xFFFD, 0x00A4, 0xFFFD, 0xFFFD, 0xFFFD,␊ |
1475 | 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x060C, 0x00AD, 0xFFFD, 0xFFFD,␊ |
1476 | 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,␊ |
1477 | 0xFFFD, 0xFFFD, 0xFFFD, 0x061B, 0xFFFD, 0xFFFD, 0xFFFD, 0x061F,␊ |
1478 | 0xFFFD, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,␊ |
1479 | 0x0628, 0x0629, 0x062A, 0x062B, 0x062C, 0x062D, 0x062E, 0x062F,␊ |
1480 | 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,␊ |
1481 | 0x0638, 0x0639, 0x063A, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,␊ |
1482 | 0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,␊ |
1483 | 0x0648, 0x0649, 0x064A, 0x064B, 0x064C, 0x064D, 0x064E, 0x064F,␊ |
1484 | 0x0650, 0x0651, 0x0652, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,␊ |
1485 | 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD} },␊ |
1486 | { "ISO 8859-7", 10,␊ |
1487 | { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,␊ |
1488 | 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,␊ |
1489 | 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,␊ |
1490 | 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,␊ |
1491 | 0x00A0, 0x2018, 0x2019, 0x00A3, 0xFFFD, 0xFFFD, 0x00A6, 0x00A7,␊ |
1492 | 0x00A8, 0x00A9, 0xFFFD, 0x00AB, 0x00AC, 0x00AD, 0xFFFD, 0x2015,␊ |
1493 | 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x0384, 0x0385, 0x0386, 0x00B7,␊ |
1494 | 0x0388, 0x0389, 0x038A, 0x00BB, 0x038C, 0x00BD, 0x038E, 0x038F,␊ |
1495 | 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,␊ |
1496 | 0x0398, 0x0399, 0x039A, 0x039B, 0x039C, 0x039D, 0x039E, 0x039F,␊ |
1497 | 0x03A0, 0x03A1, 0xFFFD, 0x03A3, 0x03A4, 0x03A5, 0x03A6, 0x03A7,␊ |
1498 | 0x03A8, 0x03A9, 0x03AA, 0x03AB, 0x03AC, 0x03AD, 0x03AE, 0x03AF,␊ |
1499 | 0x03B0, 0x03B1, 0x03B2, 0x03B3, 0x03B4, 0x03B5, 0x03B6, 0x03B7,␊ |
1500 | 0x03B8, 0x03B9, 0x03BA, 0x03BB, 0x03BC, 0x03BD, 0x03BE, 0x03BF,␊ |
1501 | 0x03C0, 0x03C1, 0x03C2, 0x03C3, 0x03C4, 0x03C5, 0x03C6, 0x03C7,␊ |
1502 | 0x03C8, 0x03C9, 0x03CA, 0x03CB, 0x03CC, 0x03CD, 0x03CE, 0xFFFD} },␊ |
1503 | { "ISO 8859-8-I", 85,␊ |
1504 | { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,␊ |
1505 | 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,␊ |
1506 | 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,␊ |
1507 | 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,␊ |
1508 | 0x00A0, 0xFFFD, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7,␊ |
1509 | 0x00A8, 0x00A9, 0x00D7, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x203E,␊ |
1510 | 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,␊ |
1511 | 0x00B8, 0x00B9, 0x00F7, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0xFFFD,␊ |
1512 | 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,␊ |
1513 | 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,␊ |
1514 | 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,␊ |
1515 | 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x2017,␊ |
1516 | 0x05D0, 0x05D1, 0x05D2, 0x05D3, 0x05D4, 0x05D5, 0x05D6, 0x05D7,␊ |
1517 | 0x05D8, 0x05D9, 0x05DA, 0x05DB, 0x05DC, 0x05DD, 0x05DE, 0x05DF,␊ |
1518 | 0x05E0, 0x05E1, 0x05E2, 0x05E3, 0x05E4, 0x05E5, 0x05E6, 0x05E7,␊ |
1519 | 0x05E8, 0x05E9, 0x05EA, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD} },␊ |
1520 | { "ISO 8859-9", 12,␊ |
1521 | { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,␊ |
1522 | 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,␊ |
1523 | 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,␊ |
1524 | 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,␊ |
1525 | 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7,␊ |
1526 | 0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,␊ |
1527 | 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,␊ |
1528 | 0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF,␊ |
1529 | 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7,␊ |
1530 | 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,␊ |
1531 | 0x011E, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7,␊ |
1532 | 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x0130, 0x015E, 0x00DF,␊ |
1533 | 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7,␊ |
1534 | 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,␊ |
1535 | 0x011F, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7,␊ |
1536 | 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x0131, 0x015F, 0x00FF} },␊ |
1537 | { "ISO 8859-10", 13,␊ |
1538 | { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,␊ |
1539 | 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,␊ |
1540 | 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,␊ |
1541 | 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,␊ |
1542 | 0x00A0, 0x0104, 0x0112, 0x0122, 0x012A, 0x0128, 0x0136, 0x00A7,␊ |
1543 | 0x013B, 0x0110, 0x0160, 0x0166, 0x017D, 0x00AD, 0x016A, 0x014A,␊ |
1544 | 0x00B0, 0x0105, 0x0113, 0x0123, 0x012B, 0x0129, 0x0137, 0x00B7,␊ |
1545 | 0x013C, 0x0111, 0x0161, 0x0167, 0x017E, 0x2015, 0x016B, 0x014B,␊ |
1546 | 0x0100, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x012E,␊ |
1547 | 0x010C, 0x00C9, 0x0118, 0x00CB, 0x0116, 0x00CD, 0x00CE, 0x00CF,␊ |
1548 | 0x00D0, 0x0145, 0x014C, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x0168,␊ |
1549 | 0x00D8, 0x0172, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF,␊ |
1550 | 0x0101, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x012F,␊ |
1551 | 0x010D, 0x00E9, 0x0119, 0x00EB, 0x0117, 0x00ED, 0x00EE, 0x00EF,␊ |
1552 | 0x00F0, 0x0146, 0x014D, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x0169,␊ |
1553 | 0x00F8, 0x0173, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x0138} },␊ |
1554 | { "ISO 8859-13", 109,␊ |
1555 | { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,␊ |
1556 | 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,␊ |
1557 | 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,␊ |
1558 | 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,␊ |
1559 | 0x00A0, 0x201D, 0x00A2, 0x00A3, 0x00A4, 0x201E, 0x00A6, 0x00A7,␊ |
1560 | 0x00D8, 0x00A9, 0x0156, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00C6,␊ |
1561 | 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x201C, 0x00B5, 0x00B6, 0x00B7,␊ |
1562 | 0x00F8, 0x00B9, 0x0157, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00E6,␊ |
1563 | 0x0104, 0x012E, 0x0100, 0x0106, 0x00C4, 0x00C5, 0x0118, 0x0112,␊ |
1564 | 0x010C, 0x00C9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012A, 0x013B,␊ |
1565 | 0x0160, 0x0143, 0x0145, 0x00D3, 0x014C, 0x00D5, 0x00D6, 0x00D7,␊ |
1566 | 0x0172, 0x0141, 0x015A, 0x016A, 0x00DC, 0x017B, 0x017D, 0x00DF,␊ |
1567 | 0x0105, 0x012F, 0x0101, 0x0107, 0x00E4, 0x00E5, 0x0119, 0x0113,␊ |
1568 | 0x010D, 0x00E9, 0x017A, 0x0117, 0x0123, 0x0137, 0x012B, 0x013C,␊ |
1569 | 0x0161, 0x0144, 0x0146, 0x00F3, 0x014D, 0x00F5, 0x00F6, 0x00F7,␊ |
1570 | 0x0173, 0x0142, 0x015B, 0x016B, 0x00FC, 0x017C, 0x017E, 0x2019} },␊ |
1571 | { "ISO 8859-14", 110,␊ |
1572 | { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,␊ |
1573 | 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,␊ |
1574 | 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,␊ |
1575 | 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,␊ |
1576 | 0x00A0, 0x1E02, 0x1E03, 0x00A3, 0x010A, 0x010B, 0x1E0A, 0x00A7,␊ |
1577 | 0x1E80, 0x00A9, 0x1E82, 0x1E0B, 0x1EF2, 0x00AD, 0x00AE, 0x0178,␊ |
1578 | 0x1E1E, 0x1E1F, 0x0120, 0x0121, 0x1E40, 0x1E41, 0x00B6, 0x1E56,␊ |
1579 | 0x1E81, 0x1E57, 0x1E83, 0x1E60, 0x1EF3, 0x1E84, 0x1E85, 0x1E61,␊ |
1580 | 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7,␊ |
1581 | 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,␊ |
1582 | 0x0174, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x1E6A,␊ |
1583 | 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x0176, 0x00DF,␊ |
1584 | 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7,␊ |
1585 | 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,␊ |
1586 | 0x0175, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x1E6B,␊ |
1587 | 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x0177, 0x00FF} },␊ |
1588 | { "ISO 8859-15", 111,␊ |
1589 | { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,␊ |
1590 | 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,␊ |
1591 | 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,␊ |
1592 | 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,␊ |
1593 | 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x20AC, 0x00A5, 0x0160, 0x00A7,␊ |
1594 | 0x0161, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,␊ |
1595 | 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x017D, 0x00B5, 0x00B6, 0x00B7,␊ |
1596 | 0x017E, 0x00B9, 0x00BA, 0x00BB, 0x0152, 0x0153, 0x0178, 0x00BF,␊ |
1597 | 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7,␊ |
1598 | 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,␊ |
1599 | 0x00D0, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7,␊ |
1600 | 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF,␊ |
1601 | 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7,␊ |
1602 | 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,␊ |
1603 | 0x00F0, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7,␊ |
1604 | 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x00FF} },␊ |
1605 | ␊ |
1606 | // next bits generated again from tables on the Unicode 3.0 CD.␊ |
1607 | ␊ |
1608 | // $ for a in CP* ; do ( awk '/^0x[89ABCDEF]/{ print $1, $2 }' < $a ) | sort | sed -e 's/#UNDEF.*$/0xFFFD/' | cut -c6- | paste '-d ' - - - - - - - - | sed -e 's/ /, /g' -e 's/$/,/' -e '$ s/,$/} },/' -e '1 s/^/{ /' > ~/tmp/$a ; done␊ |
1609 | ␊ |
1610 | { "CP 874", 0, //### what is the mib?␊ |
1611 | { 0x20AC, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x2026, 0xFFFD, 0xFFFD,␊ |
1612 | 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,␊ |
1613 | 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,␊ |
1614 | 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,␊ |
1615 | 0x00A0, 0x0E01, 0x0E02, 0x0E03, 0x0E04, 0x0E05, 0x0E06, 0x0E07,␊ |
1616 | 0x0E08, 0x0E09, 0x0E0A, 0x0E0B, 0x0E0C, 0x0E0D, 0x0E0E, 0x0E0F,␊ |
1617 | 0x0E10, 0x0E11, 0x0E12, 0x0E13, 0x0E14, 0x0E15, 0x0E16, 0x0E17,␊ |
1618 | 0x0E18, 0x0E19, 0x0E1A, 0x0E1B, 0x0E1C, 0x0E1D, 0x0E1E, 0x0E1F,␊ |
1619 | 0x0E20, 0x0E21, 0x0E22, 0x0E23, 0x0E24, 0x0E25, 0x0E26, 0x0E27,␊ |
1620 | 0x0E28, 0x0E29, 0x0E2A, 0x0E2B, 0x0E2C, 0x0E2D, 0x0E2E, 0x0E2F,␊ |
1621 | 0x0E30, 0x0E31, 0x0E32, 0x0E33, 0x0E34, 0x0E35, 0x0E36, 0x0E37,␊ |
1622 | 0x0E38, 0x0E39, 0x0E3A, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x0E3F,␊ |
1623 | 0x0E40, 0x0E41, 0x0E42, 0x0E43, 0x0E44, 0x0E45, 0x0E46, 0x0E47,␊ |
1624 | 0x0E48, 0x0E49, 0x0E4A, 0x0E4B, 0x0E4C, 0x0E4D, 0x0E4E, 0x0E4F,␊ |
1625 | 0x0E50, 0x0E51, 0x0E52, 0x0E53, 0x0E54, 0x0E55, 0x0E56, 0x0E57,␊ |
1626 | 0x0E58, 0x0E59, 0x0E5A, 0x0E5B, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD} },␊ |
1627 | { "CP 1250", 2250,␊ |
1628 | { 0x20AC, 0xFFFD, 0x201A, 0xFFFD, 0x201E, 0x2026, 0x2020, 0x2021,␊ |
1629 | 0xFFFD, 0x2030, 0x0160, 0x2039, 0x015A, 0x0164, 0x017D, 0x0179,␊ |
1630 | 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,␊ |
1631 | 0xFFFD, 0x2122, 0x0161, 0x203A, 0x015B, 0x0165, 0x017E, 0x017A,␊ |
1632 | 0x00A0, 0x02C7, 0x02D8, 0x0141, 0x00A4, 0x0104, 0x00A6, 0x00A7,␊ |
1633 | 0x00A8, 0x00A9, 0x015E, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x017B,␊ |
1634 | 0x00B0, 0x00B1, 0x02DB, 0x0142, 0x00B4, 0x00B5, 0x00B6, 0x00B7,␊ |
1635 | 0x00B8, 0x0105, 0x015F, 0x00BB, 0x013D, 0x02DD, 0x013E, 0x017C,␊ |
1636 | 0x0154, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x0139, 0x0106, 0x00C7,␊ |
1637 | 0x010C, 0x00C9, 0x0118, 0x00CB, 0x011A, 0x00CD, 0x00CE, 0x010E,␊ |
1638 | 0x0110, 0x0143, 0x0147, 0x00D3, 0x00D4, 0x0150, 0x00D6, 0x00D7,␊ |
1639 | 0x0158, 0x016E, 0x00DA, 0x0170, 0x00DC, 0x00DD, 0x0162, 0x00DF,␊ |
1640 | 0x0155, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x013A, 0x0107, 0x00E7,␊ |
1641 | 0x010D, 0x00E9, 0x0119, 0x00EB, 0x011B, 0x00ED, 0x00EE, 0x010F,␊ |
1642 | 0x0111, 0x0144, 0x0148, 0x00F3, 0x00F4, 0x0151, 0x00F6, 0x00F7,␊ |
1643 | 0x0159, 0x016F, 0x00FA, 0x0171, 0x00FC, 0x00FD, 0x0163, 0x02D9} },␊ |
1644 | { "CP 1251", 2251,␊ |
1645 | { 0x0402, 0x0403, 0x201A, 0x0453, 0x201E, 0x2026, 0x2020, 0x2021,␊ |
1646 | 0x20AC, 0x2030, 0x0409, 0x2039, 0x040A, 0x040C, 0x040B, 0x040F,␊ |
1647 | 0x0452, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,␊ |
1648 | 0xFFFD, 0x2122, 0x0459, 0x203A, 0x045A, 0x045C, 0x045B, 0x045F,␊ |
1649 | 0x00A0, 0x040E, 0x045E, 0x0408, 0x00A4, 0x0490, 0x00A6, 0x00A7,␊ |
1650 | 0x0401, 0x00A9, 0x0404, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x0407,␊ |
1651 | 0x00B0, 0x00B1, 0x0406, 0x0456, 0x0491, 0x00B5, 0x00B6, 0x00B7,␊ |
1652 | 0x0451, 0x2116, 0x0454, 0x00BB, 0x0458, 0x0405, 0x0455, 0x0457,␊ |
1653 | 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,␊ |
1654 | 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, 0x041F,␊ |
1655 | 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,␊ |
1656 | 0x0428, 0x0429, 0x042A, 0x042B, 0x042C, 0x042D, 0x042E, 0x042F,␊ |
1657 | 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,␊ |
1658 | 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, 0x043F,␊ |
1659 | 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,␊ |
1660 | 0x0448, 0x0449, 0x044A, 0x044B, 0x044C, 0x044D, 0x044E, 0x044F} },␊ |
1661 | { "CP 1252", 2252,␊ |
1662 | { 0x20AC, 0xFFFD, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,␊ |
1663 | 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0xFFFD, 0x017D, 0xFFFD,␊ |
1664 | 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,␊ |
1665 | 0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0xFFFD, 0x017E, 0x0178,␊ |
1666 | 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7,␊ |
1667 | 0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,␊ |
1668 | 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,␊ |
1669 | 0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF,␊ |
1670 | 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7,␊ |
1671 | 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,␊ |
1672 | 0x00D0, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7,␊ |
1673 | 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF,␊ |
1674 | 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7,␊ |
1675 | 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,␊ |
1676 | 0x00F0, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7,␊ |
1677 | 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x00FF} },␊ |
1678 | { "CP 1253", 2253,␊ |
1679 | { 0x20AC, 0xFFFD, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,␊ |
1680 | 0xFFFD, 0x2030, 0xFFFD, 0x2039, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,␊ |
1681 | 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,␊ |
1682 | 0xFFFD, 0x2122, 0xFFFD, 0x203A, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,␊ |
1683 | 0x00A0, 0x0385, 0x0386, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7,␊ |
1684 | 0x00A8, 0x00A9, 0xFFFD, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x2015,␊ |
1685 | 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x0384, 0x00B5, 0x00B6, 0x00B7,␊ |
1686 | 0x0388, 0x0389, 0x038A, 0x00BB, 0x038C, 0x00BD, 0x038E, 0x038F,␊ |
1687 | 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,␊ |
1688 | 0x0398, 0x0399, 0x039A, 0x039B, 0x039C, 0x039D, 0x039E, 0x039F,␊ |
1689 | 0x03A0, 0x03A1, 0xFFFD, 0x03A3, 0x03A4, 0x03A5, 0x03A6, 0x03A7,␊ |
1690 | 0x03A8, 0x03A9, 0x03AA, 0x03AB, 0x03AC, 0x03AD, 0x03AE, 0x03AF,␊ |
1691 | 0x03B0, 0x03B1, 0x03B2, 0x03B3, 0x03B4, 0x03B5, 0x03B6, 0x03B7,␊ |
1692 | 0x03B8, 0x03B9, 0x03BA, 0x03BB, 0x03BC, 0x03BD, 0x03BE, 0x03BF,␊ |
1693 | 0x03C0, 0x03C1, 0x03C2, 0x03C3, 0x03C4, 0x03C5, 0x03C6, 0x03C7,␊ |
1694 | 0x03C8, 0x03C9, 0x03CA, 0x03CB, 0x03CC, 0x03CD, 0x03CE, 0xFFFD} },␊ |
1695 | { "CP 1254", 2254,␊ |
1696 | { 0x20AC, 0xFFFD, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,␊ |
1697 | 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0xFFFD, 0xFFFD, 0xFFFD,␊ |
1698 | 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,␊ |
1699 | 0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0xFFFD, 0xFFFD, 0x0178,␊ |
1700 | 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7,␊ |
1701 | 0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,␊ |
1702 | 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,␊ |
1703 | 0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF,␊ |
1704 | 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7,␊ |
1705 | 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,␊ |
1706 | 0x011E, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7,␊ |
1707 | 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x0130, 0x015E, 0x00DF,␊ |
1708 | 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7,␊ |
1709 | 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,␊ |
1710 | 0x011F, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7,␊ |
1711 | 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x0131, 0x015F, 0x00FF} },␊ |
1712 | { "CP 1255", 2255,␊ |
1713 | { 0x20AC, 0xFFFD, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,␊ |
1714 | 0x02C6, 0x2030, 0xFFFD, 0x2039, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,␊ |
1715 | 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,␊ |
1716 | 0x02DC, 0x2122, 0xFFFD, 0x203A, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,␊ |
1717 | 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x20AA, 0x00A5, 0x00A6, 0x00A7,␊ |
1718 | 0x00A8, 0x00A9, 0x00D7, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,␊ |
1719 | 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,␊ |
1720 | 0x00B8, 0x00B9, 0x00F7, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF,␊ |
1721 | 0x05B0, 0x05B1, 0x05B2, 0x05B3, 0x05B4, 0x05B5, 0x05B6, 0x05B7,␊ |
1722 | 0x05B8, 0x05B9, 0xFFFD, 0x05BB, 0x05BC, 0x05BD, 0x05BE, 0x05BF,␊ |
1723 | 0x05C0, 0x05C1, 0x05C2, 0x05C3, 0x05F0, 0x05F1, 0x05F2, 0x05F3,␊ |
1724 | 0x05F4, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,␊ |
1725 | 0x05D0, 0x05D1, 0x05D2, 0x05D3, 0x05D4, 0x05D5, 0x05D6, 0x05D7,␊ |
1726 | 0x05D8, 0x05D9, 0x05DA, 0x05DB, 0x05DC, 0x05DD, 0x05DE, 0x05DF,␊ |
1727 | 0x05E0, 0x05E1, 0x05E2, 0x05E3, 0x05E4, 0x05E5, 0x05E6, 0x05E7,␊ |
1728 | 0x05E8, 0x05E9, 0x05EA, 0xFFFD, 0xFFFD, 0x200E, 0x200F, 0xFFFD} },␊ |
1729 | { "CP 1256", 2256,␊ |
1730 | { 0x20AC, 0x067E, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,␊ |
1731 | 0x02C6, 0x2030, 0x0679, 0x2039, 0x0152, 0x0686, 0x0698, 0x0688,␊ |
1732 | 0x06AF, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,␊ |
1733 | 0x06A9, 0x2122, 0x0691, 0x203A, 0x0153, 0x200C, 0x200D, 0x06BA,␊ |
1734 | 0x00A0, 0x060C, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7,␊ |
1735 | 0x00A8, 0x00A9, 0x06BE, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,␊ |
1736 | 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,␊ |
1737 | 0x00B8, 0x00B9, 0x061B, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x061F,␊ |
1738 | 0x06C1, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,␊ |
1739 | 0x0628, 0x0629, 0x062A, 0x062B, 0x062C, 0x062D, 0x062E, 0x062F,␊ |
1740 | 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x00D7,␊ |
1741 | 0x0637, 0x0638, 0x0639, 0x063A, 0x0640, 0x0641, 0x0642, 0x0643,␊ |
1742 | 0x00E0, 0x0644, 0x00E2, 0x0645, 0x0646, 0x0647, 0x0648, 0x00E7,␊ |
1743 | 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x0649, 0x064A, 0x00EE, 0x00EF,␊ |
1744 | 0x064B, 0x064C, 0x064D, 0x064E, 0x00F4, 0x064F, 0x0650, 0x00F7,␊ |
1745 | 0x0651, 0x00F9, 0x0652, 0x00FB, 0x00FC, 0x200E, 0x200F, 0x06D2} },␊ |
1746 | { "CP 1257", 2257,␊ |
1747 | { 0x20AC, 0xFFFD, 0x201A, 0xFFFD, 0x201E, 0x2026, 0x2020, 0x2021,␊ |
1748 | 0xFFFD, 0x2030, 0xFFFD, 0x2039, 0xFFFD, 0x00A8, 0x02C7, 0x00B8,␊ |
1749 | 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,␊ |
1750 | 0xFFFD, 0x2122, 0xFFFD, 0x203A, 0xFFFD, 0x00AF, 0x02DB, 0xFFFD,␊ |
1751 | 0x00A0, 0xFFFD, 0x00A2, 0x00A3, 0x00A4, 0xFFFD, 0x00A6, 0x00A7,␊ |
1752 | 0x00D8, 0x00A9, 0x0156, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00C6,␊ |
1753 | 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,␊ |
1754 | 0x00F8, 0x00B9, 0x0157, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00E6,␊ |
1755 | 0x0104, 0x012E, 0x0100, 0x0106, 0x00C4, 0x00C5, 0x0118, 0x0112,␊ |
1756 | 0x010C, 0x00C9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012A, 0x013B,␊ |
1757 | 0x0160, 0x0143, 0x0145, 0x00D3, 0x014C, 0x00D5, 0x00D6, 0x00D7,␊ |
1758 | 0x0172, 0x0141, 0x015A, 0x016A, 0x00DC, 0x017B, 0x017D, 0x00DF,␊ |
1759 | 0x0105, 0x012F, 0x0101, 0x0107, 0x00E4, 0x00E5, 0x0119, 0x0113,␊ |
1760 | 0x010D, 0x00E9, 0x017A, 0x0117, 0x0123, 0x0137, 0x012B, 0x013C,␊ |
1761 | 0x0161, 0x0144, 0x0146, 0x00F3, 0x014D, 0x00F5, 0x00F6, 0x00F7,␊ |
1762 | 0x0173, 0x0142, 0x015B, 0x016B, 0x00FC, 0x017C, 0x017E, 0x02D9} },␊ |
1763 | { "CP 1258", 2258,␊ |
1764 | { 0x20AC, 0xFFFD, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,␊ |
1765 | 0x02C6, 0x2030, 0xFFFD, 0x2039, 0x0152, 0xFFFD, 0xFFFD, 0xFFFD,␊ |
1766 | 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,␊ |
1767 | 0x02DC, 0x2122, 0xFFFD, 0x203A, 0x0153, 0xFFFD, 0xFFFD, 0x0178,␊ |
1768 | 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7,␊ |
1769 | 0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,␊ |
1770 | 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,␊ |
1771 | 0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF,␊ |
1772 | 0x00C0, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x00C5, 0x00C6, 0x00C7,␊ |
1773 | 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x0300, 0x00CD, 0x00CE, 0x00CF,␊ |
1774 | 0x0110, 0x00D1, 0x0309, 0x00D3, 0x00D4, 0x01A0, 0x00D6, 0x00D7,␊ |
1775 | 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x01AF, 0x0303, 0x00DF,␊ |
1776 | 0x00E0, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x00E5, 0x00E6, 0x00E7,␊ |
1777 | 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x0301, 0x00ED, 0x00EE, 0x00EF,␊ |
1778 | 0x0111, 0x00F1, 0x0323, 0x00F3, 0x00F4, 0x01A1, 0x00F6, 0x00F7,␊ |
1779 | 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x01B0, 0x20AB, 0x00FF} },␊ |
1780 | ␊ |
1781 | // this one is generated from the charmap file located in /usr/share/i18n/charmaps␊ |
1782 | // on most Linux distributions. The thai character set tis620 is byte by byte equivalent␊ |
1783 | // to iso8859-11, so we name it 8859-11 here, but recognise the name tis620 too.␊ |
1784 | ␊ |
1785 | // $ for A in 8 9 A B C D E F ; do for B in 0 1 2 3 4 5 6 7 8 9 A B C D E F ; do echo x${A}${B} 0xFFFD ; done ; done > /tmp/digits ; ( cut -c25- < TIS-620 ; cat /tmp/digits ) | awk '/^x[89ABCDEF]/{ print $1, $2 }' | sed -e 's/<U/0x/' -e 's/>//' | sort | uniq -w4 | cut -c5- | paste '-d ' - - - - - - - - | sed -e 's/ /, /g' -e 's/$/,/' -e '$ s/,$/} },/' -e '1 s/^/{ /' > ~/tmp/tis-620␊ |
1786 | { "ISO 8859-11", 2259, // Thai character set mib enum taken from tis620 (which is byte by byte equivalent)␊ |
1787 | { 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,␊ |
1788 | 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,␊ |
1789 | 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,␊ |
1790 | 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,␊ |
1791 | 0xFFFD, 0x0E01, 0x0E02, 0x0E03, 0x0E04, 0x0E05, 0x0E06, 0x0E07,␊ |
1792 | 0x0E08, 0x0E09, 0x0E0A, 0x0E0B, 0x0E0C, 0x0E0D, 0x0E0E, 0x0E0F,␊ |
1793 | 0x0E10, 0x0E11, 0x0E12, 0x0E13, 0x0E14, 0x0E15, 0x0E16, 0x0E17,␊ |
1794 | 0x0E18, 0x0E19, 0x0E1A, 0x0E1B, 0x0E1C, 0x0E1D, 0x0E1E, 0x0E1F,␊ |
1795 | 0x0E20, 0x0E21, 0x0E22, 0x0E23, 0x0E24, 0x0E25, 0x0E26, 0x0E27,␊ |
1796 | 0x0E28, 0x0E29, 0x0E2A, 0x0E2B, 0x0E2C, 0x0E2D, 0x0E2E, 0x0E2F,␊ |
1797 | 0x0E30, 0x0E31, 0x0E32, 0x0E33, 0x0E34, 0x0E35, 0x0E36, 0x0E37,␊ |
1798 | 0x0E38, 0x0E39, 0x0E3A, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x0E3F,␊ |
1799 | 0x0E40, 0x0E41, 0x0E42, 0x0E43, 0x0E44, 0x0E45, 0x0E46, 0x0E47,␊ |
1800 | 0x0E48, 0x0E49, 0x0E4A, 0x0E4B, 0x0E4C, 0x0E4D, 0x0E4E, 0x0E4F,␊ |
1801 | 0x0E50, 0x0E51, 0x0E52, 0x0E53, 0x0E54, 0x0E55, 0x0E56, 0x0E57,␊ |
1802 | 0x0E58, 0x0E59, 0x0E5A, 0x0E5B, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD} },␊ |
1803 | ␊ |
1804 | // change LAST_MIB if you add more, and edit unicodevalues in␊ |
1805 | // kernel/qpsprinter.cpp too.␊ |
1806 | };␊ |
1807 | ␊ |
1808 | ␊ |
1809 | static const QSimpleTextCodec * reverseOwner = 0;␊ |
1810 | static QArray<char> * reverseMap = 0;␊ |
1811 | ␊ |
1812 | ␊ |
1813 | QSimpleTextCodec::QSimpleTextCodec( int i )␊ |
1814 | : QTextCodec(), forwardIndex( i )␊ |
1815 | {␊ |
1816 | }␊ |
1817 | ␊ |
1818 | ␊ |
1819 | QSimpleTextCodec::~QSimpleTextCodec()␊ |
1820 | {␊ |
1821 | if ( reverseOwner == this ) {␊ |
1822 | delete reverseMap;␊ |
1823 | reverseMap = 0;␊ |
1824 | reverseOwner = 0;␊ |
1825 | }␊ |
1826 | }␊ |
1827 | ␊ |
1828 | // what happens if strlen(chars)<len? what happens if !chars? if len<1?␊ |
1829 | QString QSimpleTextCodec::toUnicode(const char* chars, int len) const␊ |
1830 | {␊ |
1831 | if(len <= 0)␊ |
1832 | return QString::null;␊ |
1833 | ␊ |
1834 | int clen = qstrlen(chars);␊ |
1835 | len = QMIN(len, clen); // Note: NUL ends string␊ |
1836 | ␊ |
1837 | QString r;␊ |
1838 | r.setUnicode(0, len);␊ |
1839 | QChar* uc = (QChar*)r.unicode(); // const_cast␊ |
1840 | const unsigned char * c = (const unsigned char *)chars;␊ |
1841 | for( int i=0; i<len; i++ ) {␊ |
1842 | if ( c[i] > 127 )␊ |
1843 | uc[i] = unicodevalues[forwardIndex].values[c[i]-128];␊ |
1844 | else␊ |
1845 | uc[i] = c[i];␊ |
1846 | }␊ |
1847 | return r;␊ |
1848 | }␊ |
1849 | ␊ |
1850 | ␊ |
1851 | QCString QSimpleTextCodec::fromUnicode(const QString& uc, int& len ) const␊ |
1852 | {␊ |
1853 | if ( reverseOwner != this ) {␊ |
1854 | int m = 0;␊ |
1855 | int i = 0;␊ |
1856 | while( i < 128 ) {␊ |
1857 | if ( unicodevalues[forwardIndex].values[i] > m &&␊ |
1858 | unicodevalues[forwardIndex].values[i] < 0xfffd )␊ |
1859 | m = unicodevalues[forwardIndex].values[i];␊ |
1860 | i++;␊ |
1861 | }␊ |
1862 | m++;␊ |
1863 | if ( !reverseMap )␊ |
1864 | reverseMap = new QArray<char>( m );␊ |
1865 | if ( m > (int)(reverseMap->size()) )␊ |
1866 | reverseMap->resize( m );␊ |
1867 | for( i = 0; i < 128 && i < m; i++ )␊ |
1868 | (*reverseMap)[i] = (char)i;␊ |
1869 | for( ;i < m; i++ )␊ |
1870 | (*reverseMap)[i] = '?';␊ |
1871 | for( i=128; i<256; i++ ) {␊ |
1872 | int u = unicodevalues[forwardIndex].values[i-128];␊ |
1873 | if ( u < m )␊ |
1874 | (*reverseMap)[u] = (char)(unsigned char)(i);␊ |
1875 | }␊ |
1876 | reverseOwner = this;␊ |
1877 | }␊ |
1878 | if ( len <0 || len > (int)uc.length() )␊ |
1879 | len = uc.length();␊ |
1880 | QCString r( len+1 );␊ |
1881 | int i = len;␊ |
1882 | int u;␊ |
1883 | const QChar* ucp = uc.unicode();␊ |
1884 | char* rp = r.data();␊ |
1885 | char* rmp = reverseMap->data();␊ |
1886 | int rmsize = (int) reverseMap->size();␊ |
1887 | while( i-- )␊ |
1888 | {␊ |
1889 | u = ucp->unicode();␊ |
1890 | *rp++ = u < 128 ? u : (( u < rmsize ) ? (*(rmp+u)) : '?' );␊ |
1891 | ucp++;␊ |
1892 | }␊ |
1893 | r[len] = 0;␊ |
1894 | return r;␊ |
1895 | }␊ |
1896 | ␊ |
1897 | ␊ |
1898 | const char* QSimpleTextCodec::name() const␊ |
1899 | {␊ |
1900 | return unicodevalues[forwardIndex].cs;␊ |
1901 | }␊ |
1902 | ␊ |
1903 | ␊ |
1904 | int QSimpleTextCodec::mibEnum() const␊ |
1905 | {␊ |
1906 | return unicodevalues[forwardIndex].mib;␊ |
1907 | }␊ |
1908 | ␊ |
1909 | int QSimpleTextCodec::heuristicNameMatch(const char* hint) const␊ |
1910 | {␊ |
1911 | if ( hint[0]=='k' ) {␊ |
1912 | // Help people with messy fonts␊ |
1913 | if ( QCString(hint) == "koi8-1" )␊ |
1914 | return QTextCodec::heuristicNameMatch("koi8-r")-1;␊ |
1915 | if ( QCString(hint) == "koi8-ru" )␊ |
1916 | return QTextCodec::heuristicNameMatch("koi8-r")-1;␊ |
1917 | } else if ( hint[0] == 't' && QCString(name()) == "ISO 8859-11" ) {␊ |
1918 | ␉// 8859-11 and tis620 are byte by bute equivalent␊ |
1919 | ␉int i = simpleHeuristicNameMatch("tis620-0", hint);␊ |
1920 | ␉if( !i )␊ |
1921 | ␉ i = simpleHeuristicNameMatch("tis-620", hint);␊ |
1922 | ␉if( i ) return i;␊ |
1923 | }␊ |
1924 | return QTextCodec::heuristicNameMatch(hint);␊ |
1925 | }␊ |
1926 | ␊ |
1927 | int QSimpleTextCodec::heuristicContentMatch(const char* chars, int len) const␊ |
1928 | {␊ |
1929 | if ( len<1 || !chars )␊ |
1930 | return -1;␊ |
1931 | int i = 0;␊ |
1932 | const uchar * c = (const unsigned char *)chars;␊ |
1933 | int r = 0;␊ |
1934 | while( i<len && c && *c ) {␊ |
1935 | if ( *c >= 128 ) {␊ |
1936 | if ( unicodevalues[forwardIndex].values[(*c)-128] == 0xfffd )␊ |
1937 | return -1;␊ |
1938 | }␊ |
1939 | if ( (*c >= ' ' && *c < 127) ||␊ |
1940 | *c == '\n' || *c == '\t' || *c == '\r' )␊ |
1941 | r++;␊ |
1942 | i++;␊ |
1943 | c++;␊ |
1944 | }␊ |
1945 | if ( mibEnum()==4 )␊ |
1946 | r+=1;␊ |
1947 | return r;␊ |
1948 | }␊ |
1949 | ␊ |
1950 | ␊ |
1951 | #endif // QT_NO_CODECS␊ |
1952 | ␊ |
1953 | class QLatin1Codec: public QTextCodec␊ |
1954 | {␊ |
1955 | public:␊ |
1956 | QLatin1Codec();␊ |
1957 | ~QLatin1Codec();␊ |
1958 | ␊ |
1959 | QString toUnicode(const char* chars, int len) const;␊ |
1960 | QCString fromUnicode(const QString& uc, int& lenInOut ) const;␊ |
1961 | ␊ |
1962 | const char* name() const;␊ |
1963 | int mibEnum() const;␊ |
1964 | ␊ |
1965 | int heuristicContentMatch(const char* chars, int len) const;␊ |
1966 | ␊ |
1967 | int heuristicNameMatch(const char* hint) const;␊ |
1968 | ␊ |
1969 | private:␊ |
1970 | int forwardIndex;␊ |
1971 | };␊ |
1972 | ␊ |
1973 | ␊ |
1974 | QLatin1Codec::QLatin1Codec()␊ |
1975 | : QTextCodec()␊ |
1976 | {␊ |
1977 | }␊ |
1978 | ␊ |
1979 | ␊ |
1980 | QLatin1Codec::~QLatin1Codec()␊ |
1981 | {␊ |
1982 | }␊ |
1983 | ␊ |
1984 | // what happens if strlen(chars)<len? what happens if !chars? if len<1?␊ |
1985 | QString QLatin1Codec::toUnicode(const char* chars, int len) const␊ |
1986 | {␊ |
1987 | if(len <= 0)␊ |
1988 | return QString::null;␊ |
1989 | ␊ |
1990 | return QString::fromLatin1(chars, len);␊ |
1991 | }␊ |
1992 | ␊ |
1993 | ␊ |
1994 | QCString QLatin1Codec::fromUnicode(const QString& uc, int& len ) const␊ |
1995 | {␊ |
1996 | if ( len <0 || len > (int)uc.length() )␊ |
1997 | len = uc.length();␊ |
1998 | QCString r( len+1 );␊ |
1999 | int i = 0;␊ |
2000 | const QChar *ch = uc.unicode();␊ |
2001 | while ( i < len ) {␊ |
2002 | ␉r[i] = ch->row() ? '?' : ch->cell();␊ |
2003 | ␉i++;␊ |
2004 | ␉ch++;␊ |
2005 | }␊ |
2006 | r[len] = 0;␊ |
2007 | return r;␊ |
2008 | }␊ |
2009 | ␊ |
2010 | ␊ |
2011 | const char* QLatin1Codec::name() const␊ |
2012 | {␊ |
2013 | return "ISO 8859-1";␊ |
2014 | }␊ |
2015 | ␊ |
2016 | ␊ |
2017 | int QLatin1Codec::mibEnum() const␊ |
2018 | {␊ |
2019 | return 4;␊ |
2020 | }␊ |
2021 | ␊ |
2022 | int QLatin1Codec::heuristicNameMatch(const char* hint) const␊ |
2023 | {␊ |
2024 | return QTextCodec::heuristicNameMatch(hint);␊ |
2025 | }␊ |
2026 | ␊ |
2027 | int QLatin1Codec::heuristicContentMatch(const char* chars, int len) const␊ |
2028 | {␊ |
2029 | if ( len<1 || !chars )␊ |
2030 | return -1;␊ |
2031 | int i = 0;␊ |
2032 | const uchar * c = (const unsigned char *)chars;␊ |
2033 | int r = 0;␊ |
2034 | while( i<len && c && *c ) {␊ |
2035 | if ( *c >= 0x80 && *c < 0xa0 )␊ |
2036 | return -1;␊ |
2037 | if ( (*c >= ' ' && *c < 127) ||␊ |
2038 | *c == '\n' || *c == '\t' || *c == '\r' )␊ |
2039 | r++;␊ |
2040 | i++;␊ |
2041 | c++;␊ |
2042 | }␊ |
2043 | return r;␊ |
2044 | }␊ |
2045 | ␊ |
2046 | ␊ |
2047 | static void setupBuiltinCodecs()␊ |
2048 | {␊ |
2049 | (void)new QLatin1Codec;␊ |
2050 | ␊ |
2051 | #ifndef QT_NO_CODECS␊ |
2052 | int i = 0;␊ |
2053 | do {␊ |
2054 | (void)new QSimpleTextCodec( i );␊ |
2055 | } while( unicodevalues[i++].mib != LAST_MIB );␊ |
2056 | ␊ |
2057 | //(void)new QEucJpCodec;␊ |
2058 | //(void)new QSjisCodec;␊ |
2059 | //(void)new QJisCodec;␊ |
2060 | //(void)new QEucKrCodec;␊ |
2061 | //(void)new QGbkCodec;␊ |
2062 | //(void)new QBig5Codec;␊ |
2063 | (void)new QUtf8Codec;␊ |
2064 | (void)new QUtf16Codec;␊ |
2065 | //(void)new QHebrewCodec;␊ |
2066 | //(void)new QArabicCodec;␊ |
2067 | //(void)new QTsciiCodec;␊ |
2068 | #endif // QT_NO_CODECS␊ |
2069 | }␊ |
2070 | ␊ |
2071 | #endif // QT_NO_TEXTCODEC␊ |
2072 |