Chameleon

Chameleon Svn Source Tree

Root/branches/xZenu/src/util/doxygen/qtools/qtextcodec.cpp

Source at commit 1322 created 12 years 8 months ago.
By meklort, Add doxygen to utils folder
1/****************************************************************************
2**
3**
4** Implementation of QTextCodec class
5**
6** Created : 981015
7**
8** Copyright (C)1998-2000 Trolltech AS. All rights reserved.
9**
10** This file is part of the tools module of the Qt GUI Toolkit.
11**
12** This file may be distributed under the terms of the Q Public License
13** as defined by Trolltech AS of Norway and appearing in the file
14** LICENSE.QPL included in the packaging of this file.
15**
16** This file may be distributed and/or modified under the terms of the
17** GNU General Public License version 2 as published by the Free Software
18** Foundation and appearing in the file LICENSE.GPL included in the
19** packaging of this file.
20**
21** Licensees holding valid Qt Enterprise Edition or Qt Professional Edition
22** licenses may use this file in accordance with the Qt Commercial License
23** Agreement provided with the Software.
24**
25** This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
26** WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
27**
28** See http://www.trolltech.com/pricing.html or email sales@trolltech.com for
29** information about Qt Commercial License Agreements.
30** See http://www.trolltech.com/qpl/ for QPL licensing information.
31** See http://www.trolltech.com/gpl/ for GPL licensing information.
32**
33** Contact info@trolltech.com if any conditions of this licensing are
34** not clear to you.
35**
36**********************************************************************/
37
38#include "qtextcodec.h"
39#ifndef QT_NO_TEXTCODEC
40
41#include "qlist.h"
42#ifndef QT_NO_CODECS
43#include "qutfcodec.h"
44//#include "qgbkcodec.h"
45//#include "qeucjpcodec.h"
46//#include "qjiscodec.h"
47//#include "qsjiscodec.h"
48//#include "qeuckrcodec.h"
49//#include "qbig5codec.h"
50//#include "qrtlcodec.h"
51//#include "qtsciicodec.h"
52#endif
53
54#include "qfile.h"
55#include "qstrlist.h"
56#include "qstring.h"
57
58#include <stdlib.h>
59#include <ctype.h>
60#include <locale.h>
61
62
63static QList<QTextCodec> * all = 0;
64static bool destroying_is_ok; // starts out as 0
65
66/*! Deletes all the created codecs.
67
68 \warning Do not call this function.
69
70 QApplication calls this just before exiting, to delete any
71 QTextCodec objects that may be lying around. Since various other
72 classes hold pointers to QTextCodec objects, it is not safe to call
73 this function earlier.
74
75 If you are using the utility classes (like QString) but not using
76 QApplication, calling this function at the very end of your
77 application can be helpful to chasing down memory leaks, as
78 QTextCodec objects will not show up.
79*/
80
81void QTextCodec::deleteAllCodecs()
82{
83 if ( !all )
84 return;
85
86 destroying_is_ok = TRUE;
87 QList<QTextCodec> * ball = all;
88 all = 0;
89 ball->clear();
90 delete ball;
91 destroying_is_ok = FALSE;
92}
93
94
95static void setupBuiltinCodecs();
96
97
98static void realSetup()
99{
100#if defined(CHECK_STATE)
101 if ( destroying_is_ok )
102 qWarning( "creating new codec during codec cleanup" );
103#endif
104 all = new QList<QTextCodec>;
105 all->setAutoDelete( TRUE );
106 setupBuiltinCodecs();
107}
108
109
110static inline void setup()
111{
112 if ( !all )
113 realSetup();
114}
115
116
117class QTextStatelessEncoder: public QTextEncoder {
118 const QTextCodec* codec;
119public:
120 QTextStatelessEncoder(const QTextCodec*);
121 QCString fromUnicode(const QString& uc, int& lenInOut);
122};
123
124
125class QTextStatelessDecoder : public QTextDecoder {
126 const QTextCodec* codec;
127public:
128 QTextStatelessDecoder(const QTextCodec*);
129 QString toUnicode(const char* chars, int len);
130};
131
132QTextStatelessEncoder::QTextStatelessEncoder(const QTextCodec* c) :
133 codec(c)
134{
135}
136
137
138QCString QTextStatelessEncoder::fromUnicode(const QString& uc, int& lenInOut)
139{
140 return codec->fromUnicode(uc,lenInOut);
141}
142
143
144QTextStatelessDecoder::QTextStatelessDecoder(const QTextCodec* c) :
145 codec(c)
146{
147}
148
149
150QString QTextStatelessDecoder::toUnicode(const char* chars, int len)
151{
152 return codec->toUnicode(chars,len);
153}
154
155
156
157// NOT REVISED
158/*!
159 \class QTextCodec qtextcodec.h
160 \brief Provides conversion between text encodings.
161
162 By making objects of subclasses of QTextCodec, support for
163 new text encodings can be added to Qt.
164
165 The abstract virtual functions describe the encoder to the
166 system and the coder is used as required in the different
167 text file formats supported QTextStream and, under X11 for the
168 locale-specific character input and output (under Windows NT
169 codecs are not needed for GUI I/O since the system works
170 with Unicode already, and Windows 95/98 has built-in convertors
171 for the 8-bit local encoding).
172
173 More recently created QTextCodec objects take precedence
174 over earlier ones.
175
176 To add support for another 8-bit encoding to Qt, make a subclass
177 or QTextCodec and implement at least the following methods:
178 <dl>
179 <dt>\c const char* name() const
180 <dd>Return the official name for the encoding.
181 <dt>\c int mibEnum() const
182 <dd>Return the MIB enum for the encoding if it is listed in the
183 <a href=ftp://ftp.isi.edu/in-notes/iana/assignments/character-sets>
184 IANA character-sets encoding file</a>.
185 </dl>
186 If the encoding is multi-byte then it will have "state"; that is,
187 the interpretation of some bytes will be dependent on some preceding
188 bytes. For such an encoding, you will need to implement
189 <dl>
190 <dt> \c QTextDecoder* makeDecoder() const
191 <dd>Return a QTextDecoder that remembers incomplete multibyte
192 sequence prefixes or other required state.
193 </dl>
194 If the encoding does \e not require state, you should implement:
195 <dl>
196 <dt> \c QString toUnicode(const char* chars, int len) const
197 <dd>Converts \e len characters from \e chars to Unicode.
198 </dl>
199 The base QTextCodec class has default implementations of the above
200 two functions, <i>but they are mutually recursive</i>, so you must
201 re-implement at least one of them, or both for improved efficiency.
202
203 For conversion from Unicode to 8-bit encodings, it is rarely necessary
204 to maintain state. However, two functions similar to the two above
205 are used for encoding:
206 <dl>
207 <dt> \c QTextEncoder* makeEncoder() const
208 <dd>Return a QTextDecoder.
209 <dt> \c QCString fromUnicode(const QString& uc, int& lenInOut ) const;
210 <dd>Converts \e lenInOut characters (of type QChar) from the start
211 of the string \a uc, returning a QCString result, and also returning
212 the \link QCString::length() length\endlink
213 of the result in lenInOut.
214 </dl>
215 Again, these are mutually recursive so only one needs to be implemented,
216 or both if better efficiency is possible.
217
218 Finally, you must implement:
219 <dl>
220 <dt> \c int heuristicContentMatch(const char* chars, int len) const
221 <dd>Gives a value indicating how likely it is that \e len characters
222 from \e chars are in the encoding.
223 </dl>
224 A good model for this function is the
225 QWindowsLocalCodec::heuristicContentMatch function found in the Qt sources.
226
227 A QTextCodec subclass might have improved performance if you also
228 re-implement:
229 <dl>
230 <dt> \c bool canEncode( QChar ) const
231 <dd>Test if a Unicode character can be encoded.
232 <dt> \c bool canEncode( const QString& ) const
233 <dd>Test if a string of Unicode characters can be encoded.
234 <dt> \c int heuristicNameMatch(const char* hint) const
235 <dd>Test if a possibly non-standard name is referring to the codec.
236 </dl>
237*/
238
239
240/*!
241 Constructs a QTextCodec, making it of highest precedence.
242 The QTextCodec should always be constructed on the heap
243 (with new), and once constructed it becomes the responsibility
244 of Qt to delete it (which is done at QApplication destruction).
245*/
246QTextCodec::QTextCodec()
247{
248 setup();
249 all->insert(0,this);
250}
251
252
253/*!
254 Destructs the QTextCodec. Note that you should not delete
255 codecs yourself - once created they become the responsibility
256 of Qt to delete.
257*/
258QTextCodec::~QTextCodec()
259{
260 if ( !destroying_is_ok )
261 qWarning("QTextCodec::~QTextCodec() called by application");
262 if ( all )
263 all->remove( this );
264}
265
266
267/*!
268 Returns a value indicating how likely this decoder is
269 for decoding some format that has the given name.
270
271 A good match returns a positive number around
272 the length of the string. A bad match is negative.
273
274 The default implementation calls simpleHeuristicNameMatch()
275 with the name of the codec.
276*/
277int QTextCodec::heuristicNameMatch(const char* hint) const
278{
279 return simpleHeuristicNameMatch(name(),hint);
280}
281
282
283// returns a string cotnaining the letters and numbers from input,
284// with a space separating run of a character class. e.g. "iso8859-1"
285// becomes "iso 8859 1"
286static QString lettersAndNumbers( const char * input )
287{
288 QString result;
289 QChar c;
290
291 while( input && *input ) {
292 c = *input;
293 if ( c.isLetter() || c.isNumber() )
294 result += c.lower();
295 if ( input[1] ) {
296 // add space at character class transition, except
297 // transition from upper-case to lower-case letter
298 QChar n( input[1] );
299 if ( c.isLetter() && n.isLetter() ) {
300 if ( c == c.lower() && n == n.upper() )
301 result += ' ';
302 } else if ( c.category() != n.category() ) {
303 result += ' ';
304 }
305 }
306 input++;
307 }
308 return result.simplifyWhiteSpace();
309}
310
311/*!
312 A simple utility function for heuristicNameMatch() - it
313 does some very minor character-skipping
314 so that almost-exact matches score high.
315*/
316int QTextCodec::simpleHeuristicNameMatch(const char* name, const char* hint)
317{
318 // if they're the same, return a perfect score.
319 if ( name && hint && qstrcmp( name, hint ) == 0 )
320 return qstrlen( hint );
321
322 // if the letters and numbers are the same, we have an "almost"
323 // perfect match.
324 QString h( lettersAndNumbers( hint ) );
325 QString n( lettersAndNumbers( name ) );
326 if ( h == n )
327 return qstrlen( hint )-1;
328
329 if ( h.stripWhiteSpace() == n.stripWhiteSpace() )
330 return qstrlen( hint )-2;
331
332 // could do some more here, but I don't think it's worth it
333
334 return 0;
335}
336
337
338/*!
339 Returns the QTextCodec \a i places from the more recently
340 inserted, or NULL if there is no such QTextCodec. Thus,
341 codecForIndex(0) returns the most recently created QTextCodec.
342*/
343QTextCodec* QTextCodec::codecForIndex(int i)
344{
345 setup();
346 return (uint)i >= all->count() ? 0 : all->at(i);
347}
348
349
350/*!
351 Returns the QTextCodec which matches the
352 \link QTextCodec::mibEnum() MIBenum\endlink \a mib.
353*/
354QTextCodec* QTextCodec::codecForMib(int mib)
355{
356 setup();
357 QListIterator<QTextCodec> i(*all);
358 QTextCodec* result;
359 for ( ; (result=i); ++i ) {
360 if ( result->mibEnum()==mib )
361 break;
362 }
363 return result;
364}
365
366
367
368
369
370#ifdef _OS_WIN32_
371class QWindowsLocalCodec: public QTextCodec
372{
373public:
374 QWindowsLocalCodec();
375 ~QWindowsLocalCodec();
376
377 QString toUnicode(const char* chars, int len) const;
378 QCString fromUnicode(const QString& uc, int& lenInOut ) const;
379
380 const char* name() const;
381 int mibEnum() const;
382
383 int heuristicContentMatch(const char* chars, int len) const;
384};
385
386QWindowsLocalCodec::QWindowsLocalCodec()
387{
388}
389
390QWindowsLocalCodec::~QWindowsLocalCodec()
391{
392}
393
394
395QString QWindowsLocalCodec::toUnicode(const char* chars, int len) const
396{
397 if ( len == 1 && chars ) { // Optimization; avoids allocation
398 char c[2];
399 c[0] = *chars;
400 c[1] = 0;
401 return qt_winMB2QString( c, 2 );
402 }
403 if ( len < 0 )
404 return qt_winMB2QString( chars );
405 QCString s(chars,len+1);
406 return qt_winMB2QString(s);
407}
408
409QCString QWindowsLocalCodec::fromUnicode(const QString& uc, int& lenInOut ) const
410{
411 QCString r = qt_winQString2MB( uc, lenInOut );
412 lenInOut = r.length();
413 return r;
414}
415
416
417const char* QWindowsLocalCodec::name() const
418{
419 return "System";
420}
421
422int QWindowsLocalCodec::mibEnum() const
423{
424 return 0;
425}
426
427
428int QWindowsLocalCodec::heuristicContentMatch(const char* chars, int len) const
429{
430 // ### Not a bad default implementation?
431 QString t = toUnicode(chars,len);
432 int l = t.length();
433 QCString mb = fromUnicode(t,l);
434 int i=0;
435 while ( i < len )
436 if ( chars[i] == mb[i] )
437 i++;
438 return i;
439}
440
441#else
442
443/* locale names mostly copied from XFree86 */
444static const char * const iso8859_2locales[] = {
445 "croatian", "cs", "cs_CS", "cs_CZ","cz", "cz_CZ", "czech", "hr",
446 "hr_HR", "hu", "hu_HU", "hungarian", "pl", "pl_PL", "polish", "ro",
447 "ro_RO", "rumanian", "serbocroatian", "sh", "sh_SP", "sh_YU", "sk",
448 "sk_SK", "sl", "sl_CS", "sl_SI", "slovak", "slovene", "sr_SP", 0 };
449
450static const char * const iso8859_3locales[] = {
451 "eo", 0 };
452
453static const char * const iso8859_4locales[] = {
454 "ee", "ee_EE", "lt", "lt_LT", "lv", "lv_LV", 0 };
455
456static const char * const iso8859_5locales[] = {
457 "bg", "bg_BG", "bulgarian", "mk", "mk_MK",
458 "sp", "sp_YU", 0 };
459
460static const char * const iso8859_6locales[] = {
461 "ar_AA", "ar_SA", "arabic", 0 };
462
463static const char * const iso8859_7locales[] = {
464 "el", "el_GR", "greek", 0 };
465
466static const char * const iso8859_8locales[] = {
467 "hebrew", "he", "he_IL", "iw", "iw_IL", 0 };
468
469static const char * const iso8859_9locales[] = {
470 "tr", "tr_TR", "turkish", 0 };
471
472static const char * const iso8859_15locales[] = {
473 "fr", "fi", "french", "finnish", "et", "et_EE", 0 };
474
475static const char * const koi8_ulocales[] = {
476 "uk", "uk_UA", "ru_UA", "ukrainian", 0 };
477
478static const char * const tis_620locales[] = {
479 "th", "th_TH", "thai", 0 };
480
481
482static bool try_locale_list( const char * const locale[], const char * lang )
483{
484 int i;
485 for( i=0; locale[i] && qstrcmp(locale[i], lang); i++ )
486 { }
487 return locale[i] != 0;
488}
489
490// For the probably_koi8_locales we have to look. the standard says
491// these are 8859-5, but almsot all Russion users uses KOI8-R and
492// incorrectly set $LANG to ru_RU. We'll check tolower() to see what
493// tolower() thinks ru_RU means.
494
495// If you read the history, it seems that many Russians blame ISO and
496// Peristroika for the confusion.
497//
498// The real bug is that some programs break if the user specifies
499// ru_RU.KOI8-R.
500
501static const char * const probably_koi8_rlocales[] = {
502 "ru", "ru_SU", "ru_RU", "russian", 0 };
503
504// this means ANY of these locale aliases. if they're aliases for
505// different locales, the code breaks.
506static QTextCodec * ru_RU_codec = 0;
507
508static QTextCodec * ru_RU_hack( const char * i ) {
509 if ( ! ru_RU_codec ) {
510 QCString origlocale = setlocale( LC_CTYPE, i );
511 // unicode koi8r latin5 name
512 // 0x044E 0xC0 0xEE CYRILLIC SMALL LETTER YU
513 // 0x042E 0xE0 0xCE CYRILLIC CAPITAL LETTER YU
514 int latin5 = tolower( 0xCE );
515 int koi8r = tolower( 0xE0 );
516 if ( koi8r == 0xC0 && latin5 != 0xEE ) {
517 ru_RU_codec = QTextCodec::codecForName( "KOI8-R" );
518 } else if ( koi8r != 0xC0 && latin5 == 0xEE ) {
519 ru_RU_codec = QTextCodec::codecForName( "ISO 8859-5" );
520 } else {
521 // something else again... let's assume... *throws dice*
522 ru_RU_codec = QTextCodec::codecForName( "KOI8-R" );
523 qWarning( "QTextCodec: using KOI8-R, probe failed (%02x %02x %s)",
524 koi8r, latin5, i );
525 }
526 setlocale( LC_CTYPE, origlocale.data() );
527 }
528 return ru_RU_codec;
529}
530
531#endif
532
533static QTextCodec * localeMapper = 0;
534
535void qt_set_locale_codec( QTextCodec *codec )
536{
537 localeMapper = codec;
538}
539
540/*! Returns a pointer to the codec most suitable for this locale. */
541
542QTextCodec* QTextCodec::codecForLocale()
543{
544 if ( localeMapper )
545 return localeMapper;
546
547 setup();
548
549#ifdef _OS_WIN32_
550 localeMapper = new QWindowsLocalCodec;
551#else
552 // Very poorly defined and followed standards causes lots of code
553 // to try to get all the cases...
554
555 char * lang = qstrdup( getenv("LANG") );
556
557 char * p = lang ? strchr( lang, '.' ) : 0;
558 if ( !p || *p != '.' ) {
559 // Some versions of setlocale return encoding, others not.
560 char *ctype = qstrdup( setlocale( LC_CTYPE, 0 ) );
561 // Some Linux distributions have broken locales which will return
562 // "C" for LC_CTYPE
563 if ( qstrcmp( ctype, "C" ) == 0 ) {
564 delete [] ctype;
565 } else {
566 if ( lang )
567 delete [] lang;
568 lang = ctype;
569 p = lang ? strchr( lang, '.' ) : 0;
570 }
571 }
572
573 if( p && *p == '.' ) {
574 // if there is an encoding and we don't know it, we return 0
575 // User knows what they are doing. Codecs will believe them.
576 localeMapper = codecForName( lang );
577 if ( !localeMapper ) {
578 // Use or codec disagree.
579 localeMapper = codecForName( p+1 );
580 }
581 }
582 if ( !localeMapper || !(p && *p == '.') ) {
583 // if there is none, we default to 8859-1
584 // We could perhaps default to 8859-15.
585 if ( try_locale_list( iso8859_2locales, lang ) )
586 localeMapper = codecForName( "ISO 8859-2" );
587 else if ( try_locale_list( iso8859_3locales, lang ) )
588 localeMapper = codecForName( "ISO 8859-3" );
589 else if ( try_locale_list( iso8859_4locales, lang ) )
590 localeMapper = codecForName( "ISO 8859-4" );
591 else if ( try_locale_list( iso8859_5locales, lang ) )
592 localeMapper = codecForName( "ISO 8859-5" );
593 else if ( try_locale_list( iso8859_6locales, lang ) )
594 localeMapper = codecForName( "ISO 8859-6-I" );
595 else if ( try_locale_list( iso8859_7locales, lang ) )
596 localeMapper = codecForName( "ISO 8859-7" );
597 else if ( try_locale_list( iso8859_8locales, lang ) )
598 localeMapper = codecForName( "ISO 8859-8-I" );
599 else if ( try_locale_list( iso8859_9locales, lang ) )
600 localeMapper = codecForName( "ISO 8859-9" );
601 else if ( try_locale_list( iso8859_15locales, lang ) )
602 localeMapper = codecForName( "ISO 8859-15" );
603 else if ( try_locale_list( tis_620locales, lang ) )
604 localeMapper = codecForName( "ISO 8859-11" );
605 else if ( try_locale_list( koi8_ulocales, lang ) )
606 localeMapper = codecForName( "KOI8-U" );
607 else if ( try_locale_list( probably_koi8_rlocales, lang ) )
608 localeMapper = ru_RU_hack( lang );
609 else if (!lang || !(localeMapper = codecForName(lang) ))
610 localeMapper = codecForName( "ISO 8859-1" );
611 }
612 delete[] lang;
613#endif
614
615 return localeMapper;
616}
617
618
619/*!
620 Searches all installed QTextCodec objects, returning the one
621 which best matches given name. Returns NULL if no codec has
622 a match closeness above \a accuracy.
623
624 \sa heuristicNameMatch()
625*/
626QTextCodec* QTextCodec::codecForName(const char* hint, int accuracy)
627{
628 setup();
629 QListIterator<QTextCodec> i(*all);
630 QTextCodec* result = 0;
631 int best=accuracy;
632 for ( QTextCodec* cursor; (cursor=i); ++i ) {
633 int s = cursor->heuristicNameMatch(hint);
634 if ( s > best ) {
635 best = s;
636 result = cursor;
637 }
638 }
639 return result;
640}
641
642
643/*!
644 Searches all installed QTextCodec objects, returning the one
645 which most recognizes the given content. May return 0.
646
647 Note that this is often a poor choice, since character
648 encodings often use most of the available character sequences,
649 and so only by linguistic analysis could a true match be made.
650
651 \sa heuristicContentMatch()
652*/
653QTextCodec* QTextCodec::codecForContent(const char* chars, int len)
654{
655 setup();
656 QListIterator<QTextCodec> i(*all);
657 QTextCodec* result = 0;
658 int best=0;
659 for ( QTextCodec* cursor; (cursor=i); ++i ) {
660 int s = cursor->heuristicContentMatch(chars,len);
661 if ( s > best ) {
662 best = s;
663 result = cursor;
664 }
665 }
666 return result;
667}
668
669
670/*!
671 \fn const char* QTextCodec::name() const
672 Subclasses of QTextCodec must reimplement this function. It returns
673 the name of the encoding supported by the subclass. When choosing
674 a name for an encoding, consider these points:
675 <ul>
676 <li>On X11, heuristicNameMatch( const char * hint )
677 is used to test if a the QTextCodec
678 can convert between Unicode and the encoding of a font
679 with encoding \e hint, such as "iso8859-1" for Latin-1 fonts,
680 "koi8-r" for Russian KOI8 fonts.
681 The default algorithm of heuristicNameMatch() uses name().
682 <li>Some applications may use this function to present
683 encodings to the end user.
684 </ul>
685*/
686
687/*!
688 \fn int QTextCodec::mibEnum() const
689
690 Subclasses of QTextCodec must reimplement this function. It returns the
691 MIBenum (see
692 <a href="ftp://ftp.isi.edu/in-notes/iana/assignments/character-sets">
693 the IANA character-sets encoding file</a> for more information).
694 It is important that each QTextCodec subclass return the correct unique
695 value for this function.
696*/
697
698
699/*!
700 \fn int QTextCodec::heuristicContentMatch(const char* chars, int len) const
701
702 Subclasses of QTextCodec must reimplement this function. It examines
703 the first \a len bytes of \a chars and returns a value indicating how
704 likely it is that the string is a prefix of text encoded in the
705 encoding of the subclass. Any negative return value indicates that the text
706 is detectably not in the encoding (eg. it contains undefined characters).
707 A return value of 0 indicates that the text should be decoded with this
708 codec rather than as ASCII, but there
709 is no particular evidence. The value should range up to \a len. Thus,
710 most decoders will return -1, 0, or -\a len.
711
712 The characters are not null terminated.
713
714 \sa codecForContent().
715*/
716
717
718/*!
719 Creates a QTextDecoder which stores enough state to decode chunks
720 of char* data to create chunks of Unicode data. The default implementation
721 creates a stateless decoder, which is sufficient for only the simplest
722 encodings where each byte corresponds to exactly one Unicode character.
723
724 The caller is responsible for deleting the returned object.
725*/
726QTextDecoder* QTextCodec::makeDecoder() const
727{
728 return new QTextStatelessDecoder(this);
729}
730
731
732/*!
733 Creates a QTextEncoder which stores enough state to encode chunks
734 of Unicode data as char* data. The default implementation
735 creates a stateless encoder, which is sufficient for only the simplest
736 encodings where each Unicode character corresponds to exactly one char.
737
738 The caller is responsible for deleting the returned object.
739*/
740QTextEncoder* QTextCodec::makeEncoder() const
741{
742 return new QTextStatelessEncoder(this);
743}
744
745
746/*!
747 Subclasses of QTextCodec must reimplement this function or
748 makeDecoder(). It converts the first \a len characters of \a chars
749 to Unicode.
750
751 The default implementation makes a decoder with makeDecoder() and
752 converts the input with that. Note that the default makeDecoder()
753 implementation makes a decoder that simply calls
754 this function, hence subclasses \e must reimplement one function or
755 the other to avoid infinite recursion.
756*/
757QString QTextCodec::toUnicode(const char* chars, int len) const
758{
759 QTextDecoder* i = makeDecoder();
760 QString result = i->toUnicode(chars,len);
761 delete i;
762 return result;
763}
764
765
766/*!
767 Subclasses of QTextCodec must reimplement either this function or
768 makeEncoder(). It converts the first \a lenInOut characters of \a
769 uc from Unicode to the encoding of the subclass. If \a lenInOut
770 is negative or too large, the length of \a uc is used instead.
771
772 The value returned is the property of the caller, which is
773 responsible for deleting it with "delete []". The length of the
774 resulting Unicode character sequence is returned in \a lenInOut.
775
776 The default implementation makes an encoder with makeEncoder() and
777 converts the input with that. Note that the default makeEncoder()
778 implementation makes an encoder that simply calls
779 this function, hence subclasses \e must reimplement one function or
780 the other to avoid infinite recursion.
781*/
782
783QCString QTextCodec::fromUnicode(const QString& uc, int& lenInOut) const
784{
785 QTextEncoder* i = makeEncoder();
786 QCString result = i->fromUnicode(uc, lenInOut);
787 delete i;
788 return result;
789}
790
791/*!
792 \overload QCString QTextCodec::fromUnicode(const QString& uc) const
793*/
794QCString QTextCodec::fromUnicode(const QString& uc) const
795{
796 int l = uc.length();
797 return fromUnicode(uc,l);
798}
799
800/*!
801 \overload QString QTextCodec::toUnicode(const QByteArray& a, int len) const
802*/
803QString QTextCodec::toUnicode(const QByteArray& a, int len) const
804{
805 int l = a.size();
806 if( l > 0 && a.data()[l - 1] == '\0' ) l--;
807 l = QMIN( l, len );
808 return toUnicode( a.data(), l );
809}
810
811/*!
812 \overload QString QTextCodec::toUnicode(const QByteArray& a) const
813*/
814QString QTextCodec::toUnicode(const QByteArray& a) const
815{
816 int l = a.size();
817 if( l > 0 && a.data()[l - 1] == '\0' ) l--;
818 return toUnicode( a.data(), l );
819}
820
821/*!
822 \overload QString QTextCodec::toUnicode(const char* chars) const
823*/
824QString QTextCodec::toUnicode(const char* chars) const
825{
826 return toUnicode(chars,qstrlen(chars));
827}
828
829/*!
830 Returns TRUE if the unicode character \a ch can be fully encoded
831 with this codec. The default implementation tests if the result of
832 toUnicode(fromUnicode(ch)) is the original \a ch. Subclasses may be
833 able to improve the efficiency.
834*/
835bool QTextCodec::canEncode( QChar ch ) const
836{
837 return toUnicode(fromUnicode(ch)) == ch;
838}
839
840/*!
841 Returns TRUE if the unicode string \a s can be fully encoded
842 with this codec. The default implementation tests if the result of
843 toUnicode(fromUnicode(s)) is the original \a s. Subclasses may be
844 able to improve the efficiency.
845*/
846bool QTextCodec::canEncode( const QString& s ) const
847{
848 return toUnicode(fromUnicode(s)) == s;
849}
850
851
852
853/*!
854 \class QTextEncoder qtextcodec.h
855 \brief State-based encoder
856
857 A QTextEncoder converts Unicode into another format, remembering
858 any state that is required between calls.
859
860 \sa QTextCodec::makeEncoder()
861*/
862
863/*!
864 Destructs the encoder.
865*/
866QTextEncoder::~QTextEncoder()
867{
868}
869/*!
870 \fn QCString QTextEncoder::fromUnicode(const QString& uc, int& lenInOut)
871
872 Converts \a lenInOut characters (not bytes) from \a uc, producing
873 a QCString. \a lenInOut will also be set to the
874 \link QCString::length() length\endlink of the result (in bytes).
875
876 The encoder is free to record state to use when subsequent calls are
877 made to this function (for example, it might change modes with escape
878 sequences if needed during the encoding of one string, then assume that
879 mode applies when a subsequent call begins).
880*/
881
882/*!
883 \class QTextDecoder qtextcodec.h
884 \brief State-based decoder
885
886 A QTextEncoder converts a text format into Unicode, remembering
887 any state that is required between calls.
888
889 \sa QTextCodec::makeEncoder()
890*/
891
892
893/*!
894 Destructs the decoder.
895*/
896QTextDecoder::~QTextDecoder()
897{
898}
899
900/*!
901 \fn QString QTextDecoder::toUnicode(const char* chars, int len)
902
903 Converts the first \a len bytes at \a chars to Unicode, returning the
904 result.
905
906 If not all characters are used (eg. only part of a multi-byte
907 encoding is at the end of the characters), the decoder remembers
908 enough state to continue with the next call to this function.
909*/
910
911#define CHAINED 0xffff
912
913struct QMultiByteUnicodeTable {
914 // If multibyte, ignore unicode and index into multibyte
915 // with the next character.
916 QMultiByteUnicodeTable() : unicode(0xfffd), multibyte(0) { }
917
918 ~QMultiByteUnicodeTable()
919 {
920 if ( multibyte )
921 delete [] multibyte;
922 }
923
924 ushort unicode;
925 QMultiByteUnicodeTable* multibyte;
926};
927
928#ifndef QT_NO_CODECS
929static int getByte(char* &cursor)
930{
931 int byte = 0;
932 if ( *cursor ) {
933 if ( cursor[1] == 'x' )
934 byte = strtol(cursor+2,&cursor,16);
935 else if ( cursor[1] == 'd' )
936 byte = strtol(cursor+2,&cursor,10);
937 else
938 byte = strtol(cursor+2,&cursor,8);
939 }
940 return byte&0xff;
941}
942
943class QTextCodecFromIOD;
944
945class QTextCodecFromIODDecoder : public QTextDecoder {
946 const QTextCodecFromIOD* codec;
947 QMultiByteUnicodeTable* mb;
948public:
949 QTextCodecFromIODDecoder(const QTextCodecFromIOD* c);
950 QString toUnicode(const char* chars, int len);
951};
952
953class QTextCodecFromIOD : public QTextCodec {
954 friend class QTextCodecFromIODDecoder;
955
956 QCString n;
957
958 // If from_unicode_page[row()][cell()] is 0 and from_unicode_page_multibyte,
959 // use from_unicode_page_multibyte[row()][cell()] as string.
960 char** from_unicode_page;
961 char*** from_unicode_page_multibyte;
962 char unkn;
963
964 // Only one of these is used
965 ushort* to_unicode;
966 QMultiByteUnicodeTable* to_unicode_multibyte;
967 int max_bytes_per_char;
968 QStrList aliases;
969
970 bool stateless() const { return !to_unicode_multibyte; }
971
972public:
973 QTextCodecFromIOD(QIODevice* iod)
974 {
975 from_unicode_page = 0;
976 to_unicode_multibyte = 0;
977 to_unicode = 0;
978 from_unicode_page_multibyte = 0;
979 max_bytes_per_char = 1;
980
981 const int maxlen=100;
982 char line[maxlen];
983 char esc='\\';
984 char comm='%';
985 bool incmap = FALSE;
986 while (iod->readLine(line,maxlen) > 0) {
987 if (0==qstrnicmp(line,"<code_set_name>",15))
988 n = line+15;
989 else if (0==qstrnicmp(line,"<escape_char> ",14))
990 esc = line[14];
991 else if (0==qstrnicmp(line,"<comment_char> ",15))
992 comm = line[15];
993 else if (line[0]==comm && 0==qstrnicmp(line+1," alias ",7)) {
994 aliases.append(line+8);
995 } else if (0==qstrnicmp(line,"CHARMAP",7)) {
996 if (!from_unicode_page) {
997 from_unicode_page = new char*[256];
998 for (int i=0; i<256; i++)
999 from_unicode_page[i]=0;
1000 }
1001 if (!to_unicode) {
1002 to_unicode = new ushort[256];
1003 }
1004 incmap = TRUE;
1005 } else if (0==qstrnicmp(line,"END CHARMAP",11))
1006 break;
1007 else if (incmap) {
1008 char* cursor = line;
1009 int byte=0,unicode=-1;
1010 ushort* mb_unicode=0;
1011 const int maxmb=8; // more -> we'll need to improve datastructures
1012 char mb[maxmb+1];
1013 int nmb=0;
1014
1015 while (*cursor) {
1016 if (cursor[0]=='<' && cursor[1]=='U' &&
1017 cursor[2]>='0' && cursor[2]<='9' &&
1018 cursor[3]>='0' && cursor[3]<='9') {
1019
1020 unicode = strtol(cursor+2,&cursor,16);
1021
1022 } else if (*cursor==esc) {
1023
1024 byte = getByte(cursor);
1025
1026 if ( *cursor == esc ) {
1027 if ( !to_unicode_multibyte ) {
1028 to_unicode_multibyte =
1029 new QMultiByteUnicodeTable[256];
1030 for (int i=0; i<256; i++) {
1031 to_unicode_multibyte[i].unicode =
1032 to_unicode[i];
1033 to_unicode_multibyte[i].multibyte = 0;
1034 }
1035 delete [] to_unicode;
1036 to_unicode = 0;
1037 }
1038 QMultiByteUnicodeTable* mbut =
1039 to_unicode_multibyte+byte;
1040 mb[nmb++] = byte;
1041 while ( nmb < maxmb && *cursor == esc ) {
1042 // Always at least once
1043
1044 mbut->unicode = CHAINED;
1045 byte = getByte(cursor);
1046 mb[nmb++] = byte;
1047 if (!mbut->multibyte) {
1048 mbut->multibyte =
1049 new QMultiByteUnicodeTable[256];
1050 }
1051 mbut = mbut->multibyte+byte;
1052 mb_unicode = & mbut->unicode;
1053 }
1054
1055 if ( nmb > max_bytes_per_char )
1056 max_bytes_per_char = nmb;
1057 }
1058 } else {
1059 cursor++;
1060 }
1061 }
1062
1063 if (unicode >= 0 && unicode <= 0xffff)
1064 {
1065 QChar ch((ushort)unicode);
1066 if (!from_unicode_page[ch.row()]) {
1067 from_unicode_page[ch.row()] = new char[256];
1068 for (int i=0; i<256; i++)
1069 from_unicode_page[ch.row()][i]=0;
1070 }
1071 if ( mb_unicode ) {
1072 from_unicode_page[ch.row()][ch.cell()] = 0;
1073 if (!from_unicode_page_multibyte) {
1074 from_unicode_page_multibyte = new char**[256];
1075 for (int i=0; i<256; i++)
1076 from_unicode_page_multibyte[i]=0;
1077 }
1078 if (!from_unicode_page_multibyte[ch.row()]) {
1079 from_unicode_page_multibyte[ch.row()] = new char*[256];
1080 for (int i=0; i<256; i++)
1081 from_unicode_page_multibyte[ch.row()][i] = 0;
1082 }
1083 mb[nmb++] = 0;
1084 from_unicode_page_multibyte[ch.row()][ch.cell()]
1085 = qstrdup(mb);
1086 *mb_unicode = unicode;
1087 } else {
1088 from_unicode_page[ch.row()][ch.cell()] = (char)byte;
1089 if ( to_unicode )
1090 to_unicode[byte] = unicode;
1091 else
1092 to_unicode_multibyte[byte].unicode = unicode;
1093 }
1094 } else {
1095 }
1096 }
1097 }
1098 n = n.stripWhiteSpace();
1099
1100 unkn = '?'; // ##### Might be a bad choice.
1101 }
1102
1103 ~QTextCodecFromIOD()
1104 {
1105 if ( from_unicode_page ) {
1106 for (int i=0; i<256; i++)
1107 if (from_unicode_page[i])
1108 delete [] from_unicode_page[i];
1109 }
1110 if ( from_unicode_page_multibyte ) {
1111 for (int i=0; i<256; i++)
1112 if (from_unicode_page_multibyte[i])
1113 for (int j=0; j<256; j++)
1114 if (from_unicode_page_multibyte[i][j])
1115 delete [] from_unicode_page_multibyte[i][j];
1116 }
1117 if ( to_unicode )
1118 delete [] to_unicode;
1119 if ( to_unicode_multibyte )
1120 delete [] to_unicode_multibyte;
1121 }
1122
1123 bool ok() const
1124 {
1125 return !!from_unicode_page;
1126 }
1127
1128 QTextDecoder* makeDecoder() const
1129 {
1130 if ( stateless() )
1131 return QTextCodec::makeDecoder();
1132 else
1133 return new QTextCodecFromIODDecoder(this);
1134 }
1135
1136 const char* name() const
1137 {
1138 return n;
1139 }
1140
1141 int mibEnum() const
1142 {
1143 return 0; // #### Unknown.
1144 }
1145
1146 int heuristicContentMatch(const char*, int) const
1147 {
1148 return 0;
1149 }
1150
1151 int heuristicNameMatch(const char* hint) const
1152 {
1153 int bestr = QTextCodec::heuristicNameMatch(hint);
1154 QStrListIterator it(aliases);
1155 char* a;
1156 while ((a=it.current())) {
1157 ++it;
1158 int r = simpleHeuristicNameMatch(a,hint);
1159 if (r > bestr)
1160 bestr = r;
1161 }
1162 return bestr;
1163 }
1164
1165 QString toUnicode(const char* chars, int len) const
1166 {
1167 const uchar* uchars = (const uchar*)chars;
1168 QString result;
1169 QMultiByteUnicodeTable* multibyte=to_unicode_multibyte;
1170 if ( multibyte ) {
1171 while (len--) {
1172 QMultiByteUnicodeTable& mb = multibyte[*uchars];
1173 if ( mb.multibyte ) {
1174 // Chained multi-byte
1175 multibyte = mb.multibyte;
1176 } else {
1177 result += QChar(mb.unicode);
1178 multibyte=to_unicode_multibyte;
1179 }
1180 uchars++;
1181 }
1182 } else {
1183 while (len--)
1184 result += QChar(to_unicode[*uchars++]);
1185 }
1186 return result;
1187 }
1188
1189 QCString fromUnicode(const QString& uc, int& lenInOut) const
1190 {
1191 if (lenInOut > (int)uc.length())
1192 lenInOut = uc.length();
1193 int rlen = lenInOut*max_bytes_per_char;
1194 QCString rstr(rlen);
1195 char* cursor = rstr.data();
1196 char* s=0;
1197 int l = lenInOut;
1198 int lout = 0;
1199 for (int i=0; i<l; i++) {
1200 QChar ch = uc[i];
1201 if ( ch == QChar::null ) {
1202 // special
1203 *cursor++ = 0;
1204 } else if ( from_unicode_page[ch.row()] &&
1205 from_unicode_page[ch.row()][ch.cell()] )
1206 {
1207 *cursor++ = from_unicode_page[ch.row()][ch.cell()];
1208 lout++;
1209 } else if ( from_unicode_page_multibyte &&
1210 from_unicode_page_multibyte[ch.row()] &&
1211 (s=from_unicode_page_multibyte[ch.row()][ch.cell()]) )
1212 {
1213 while (*s) {
1214 *cursor++ = *s++;
1215 lout++;
1216 }
1217 } else {
1218 *cursor++ = unkn;
1219 lout++;
1220 }
1221 }
1222 *cursor = 0;
1223 lenInOut = lout;
1224 return rstr;
1225 }
1226};
1227
1228QTextCodecFromIODDecoder::QTextCodecFromIODDecoder(const QTextCodecFromIOD* c) :
1229 codec(c)
1230{
1231 mb = codec->to_unicode_multibyte;
1232}
1233
1234QString QTextCodecFromIODDecoder::toUnicode(const char* chars, int len)
1235{
1236 const uchar* uchars = (const uchar*)chars;
1237 QString result;
1238 while (len--) {
1239 QMultiByteUnicodeTable& t = mb[*uchars];
1240 if ( t.multibyte ) {
1241 // Chained multi-byte
1242 mb = t.multibyte;
1243 } else {
1244 if ( t.unicode )
1245 result += QChar(t.unicode);
1246 mb=codec->to_unicode_multibyte;
1247 }
1248 uchars++;
1249 }
1250 return result;
1251}
1252
1253/*!
1254 Reads a POSIX2 charmap definition from \a iod.
1255 The parser recognizes the following lines:
1256<pre>
1257 &lt;code_set_name&gt; <i>name</i>
1258 &lt;escape_char&gt; <i>character</i>
1259 % alias <i>alias</i>
1260 CHARMAP
1261 &lt;<i>token</i>&gt; /x<i>hexbyte</i> &lt;U<i>unicode</i>&gt; ...
1262 &lt;<i>token</i>&gt; /d<i>decbyte</i> &lt;U<i>unicode</i>&gt; ...
1263 &lt;<i>token</i>&gt; /<i>octbyte</i> &lt;U<i>unicode</i>&gt; ...
1264 &lt;<i>token</i>&gt; /<i>any</i>/<i>any</i>... &lt;U<i>unicode</i>&gt; ...
1265 END CHARMAP
1266</pre>
1267
1268 The resulting QTextCodec is returned (and also added to the
1269 global list of codecs). The name() of the result is taken
1270 from the code_set_name.
1271
1272 Note that a codec constructed in this way uses much more memory
1273 and is slower than a hand-written QTextCodec subclass, since
1274 tables in code are in memory shared by all applications simultaneously
1275 using Qt.
1276
1277 \sa loadCharmapFile()
1278*/
1279QTextCodec* QTextCodec::loadCharmap(QIODevice* iod)
1280{
1281 QTextCodecFromIOD* r = new QTextCodecFromIOD(iod);
1282 if ( !r->ok() ) {
1283 delete r;
1284 r = 0;
1285 }
1286 return r;
1287}
1288
1289/*!
1290 A convenience function for loadCharmap().
1291*/
1292QTextCodec* QTextCodec::loadCharmapFile(QString filename)
1293{
1294 QFile f(filename);
1295 if (f.open(IO_ReadOnly)) {
1296 QTextCodecFromIOD* r = new QTextCodecFromIOD(&f);
1297 if ( !r->ok() )
1298 delete r;
1299 else
1300 return r;
1301 }
1302 return 0;
1303}
1304#endif //QT_NO_CODECS
1305
1306
1307/*!
1308 Returns a string representing the current language.
1309*/
1310
1311const char* QTextCodec::locale()
1312{
1313 static QCString lang;
1314 if ( lang.isEmpty() ) {
1315 lang = getenv( "LANG" ); //########Windows??
1316 if ( lang.isEmpty() )
1317 lang = "C";
1318 }
1319 return lang;
1320}
1321
1322
1323
1324#ifndef QT_NO_CODECS
1325
1326class QSimpleTextCodec: public QTextCodec
1327{
1328public:
1329 QSimpleTextCodec( int );
1330 ~QSimpleTextCodec();
1331
1332 QString toUnicode(const char* chars, int len) const;
1333 QCString fromUnicode(const QString& uc, int& lenInOut ) const;
1334
1335 const char* name() const;
1336 int mibEnum() const;
1337
1338 int heuristicContentMatch(const char* chars, int len) const;
1339
1340 int heuristicNameMatch(const char* hint) const;
1341
1342private:
1343 int forwardIndex;
1344};
1345
1346
1347#define LAST_MIB 2259
1348
1349static struct {
1350 const char * cs;
1351 int mib;
1352 Q_UINT16 values[128];
1353} unicodevalues[] = {
1354 // from RFC 1489, ftp://ftp.isi.edu/in-notes/rfc1489.txt
1355 { "KOI8-R", 2084,
1356 { 0x2500, 0x2502, 0x250C, 0x2510, 0x2514, 0x2518, 0x251C, 0x2524,
1357 0x252C, 0x2534, 0x253C, 0x2580, 0x2584, 0x2588, 0x258C, 0x2590,
1358 0x2591, 0x2592, 0x2593, 0x2320, 0x25A0, 0x2219/**/, 0x221A, 0x2248,
1359 0x2264, 0x2265, 0x00A0, 0x2321, 0x00B0, 0x00B2, 0x00B7, 0x00F7,
1360 0x2550, 0x2551, 0x2552, 0x0451, 0x2553, 0x2554, 0x2555, 0x2556,
1361 0x2557, 0x2558, 0x2559, 0x255A, 0x255B, 0x255C, 0x255D, 0x255E,
1362 0x255F, 0x2560, 0x2561, 0x0401, 0x2562, 0x2563, 0x2564, 0x2565,
1363 0x2566, 0x2567, 0x2568, 0x2569, 0x256A, 0x256B, 0x256C, 0x00A9,
1364 0x044E, 0x0430, 0x0431, 0x0446, 0x0434, 0x0435, 0x0444, 0x0433,
1365 0x0445, 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E,
1366 0x043F, 0x044F, 0x0440, 0x0441, 0x0442, 0x0443, 0x0436, 0x0432,
1367 0x044C, 0x044B, 0x0437, 0x0448, 0x044D, 0x0449, 0x0447, 0x044A,
1368 0x042E, 0x0410, 0x0411, 0x0426, 0x0414, 0x0415, 0x0424, 0x0413,
1369 0x0425, 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E,
1370 0x041F, 0x042F, 0x0420, 0x0421, 0x0422, 0x0423, 0x0416, 0x0412,
1371 0x042C, 0x042B, 0x0417, 0x0428, 0x042D, 0x0429, 0x0427, 0x042A } },
1372 // /**/ - The BULLET OPERATOR is confused. Some people think
1373 // it should be 0x2022 (BULLET).
1374
1375 // from RFC 2319, ftp://ftp.isi.edu/in-notes/rfc2319.txt
1376 { "KOI8-U", 2088,
1377 { 0x2500, 0x2502, 0x250C, 0x2510, 0x2514, 0x2518, 0x251C, 0x2524,
13780x252C, 0x2534, 0x253C, 0x2580, 0x2584, 0x2588, 0x258C, 0x2590,
13790x2591, 0x2592, 0x2593, 0x2320, 0x25A0, 0x2219, 0x221A, 0x2248,
13800x2264, 0x2265, 0x00A0, 0x2321, 0x00B0, 0x00B2, 0x00B7, 0x00F7,
13810x2550, 0x2551, 0x2552, 0x0451, 0x0454, 0x2554, 0x0456, 0x0457,
13820x2557, 0x2558, 0x2559, 0x255A, 0x255B, 0x0491, 0x255D, 0x255E,
13830x255F, 0x2560, 0x2561, 0x0401, 0x0404, 0x2563, 0x0406, 0x0407,
13840x2566, 0x2567, 0x2568, 0x2569, 0x256A, 0x0490, 0x256C, 0x00A9,
13850x044E, 0x0430, 0x0431, 0x0446, 0x0434, 0x0435, 0x0444, 0x0433,
13860x0445, 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E,
13870x043F, 0x044F, 0x0440, 0x0441, 0x0442, 0x0443, 0x0436, 0x0432,
13880x044C, 0x044B, 0x0437, 0x0448, 0x044D, 0x0449, 0x0447, 0x044A,
13890x042E, 0x0410, 0x0411, 0x0426, 0x0414, 0x0415, 0x0424, 0x0413,
13900x0425, 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E,
13910x041F, 0x042F, 0x0420, 0x0421, 0x0422, 0x0423, 0x0416, 0x0412,
13920x042C, 0x042B, 0x0417, 0x0428, 0x042D, 0x0429, 0x0427, 0x042A } },
1393
1394 // next bits generated from tables on the Unicode 2.0 CD. we can
1395 // use these tables since this is part of the transition to using
1396 // unicode everywhere in qt.
1397
1398 // $ for A in 8 9 A B C D E F ; do for B in 0 1 2 3 4 5 6 7 8 9 A B C D E F ; do echo 0x${A}${B} 0xFFFD ; done ; done > /tmp/digits ; for a in 8859-* ; do ( awk '/^0x[89ABCDEF]/{ print $1, $2 }' < $a ; cat /tmp/digits ) | sort | uniq -w4 | cut -c6- | paste '-d ' - - - - - - - - | sed -e 's/ /, /g' -e 's/$/,/' -e '$ s/,$/} },/' -e '1 s/^/{ /' > ~/tmp/$a ; done
1399
1400 // then I inserted the files manually.
1401 { "ISO 8859-2", 5,
1402 { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
1403 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
1404 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
1405 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
1406 0x00A0, 0x0104, 0x02D8, 0x0141, 0x00A4, 0x013D, 0x015A, 0x00A7,
1407 0x00A8, 0x0160, 0x015E, 0x0164, 0x0179, 0x00AD, 0x017D, 0x017B,
1408 0x00B0, 0x0105, 0x02DB, 0x0142, 0x00B4, 0x013E, 0x015B, 0x02C7,
1409 0x00B8, 0x0161, 0x015F, 0x0165, 0x017A, 0x02DD, 0x017E, 0x017C,
1410 0x0154, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x0139, 0x0106, 0x00C7,
1411 0x010C, 0x00C9, 0x0118, 0x00CB, 0x011A, 0x00CD, 0x00CE, 0x010E,
1412 0x0110, 0x0143, 0x0147, 0x00D3, 0x00D4, 0x0150, 0x00D6, 0x00D7,
1413 0x0158, 0x016E, 0x00DA, 0x0170, 0x00DC, 0x00DD, 0x0162, 0x00DF,
1414 0x0155, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x013A, 0x0107, 0x00E7,
1415 0x010D, 0x00E9, 0x0119, 0x00EB, 0x011B, 0x00ED, 0x00EE, 0x010F,
1416 0x0111, 0x0144, 0x0148, 0x00F3, 0x00F4, 0x0151, 0x00F6, 0x00F7,
1417 0x0159, 0x016F, 0x00FA, 0x0171, 0x00FC, 0x00FD, 0x0163, 0x02D9} },
1418 { "ISO 8859-3", 6,
1419 { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
1420 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
1421 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
1422 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
1423 0x00A0, 0x0126, 0x02D8, 0x00A3, 0x00A4, 0xFFFD, 0x0124, 0x00A7,
1424 0x00A8, 0x0130, 0x015E, 0x011E, 0x0134, 0x00AD, 0xFFFD, 0x017B,
1425 0x00B0, 0x0127, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x0125, 0x00B7,
1426 0x00B8, 0x0131, 0x015F, 0x011F, 0x0135, 0x00BD, 0xFFFD, 0x017C,
1427 0x00C0, 0x00C1, 0x00C2, 0xFFFD, 0x00C4, 0x010A, 0x0108, 0x00C7,
1428 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
1429 0xFFFD, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x0120, 0x00D6, 0x00D7,
1430 0x011C, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x016C, 0x015C, 0x00DF,
1431 0x00E0, 0x00E1, 0x00E2, 0xFFFD, 0x00E4, 0x010B, 0x0109, 0x00E7,
1432 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
1433 0xFFFD, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x0121, 0x00F6, 0x00F7,
1434 0x011D, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x016D, 0x015D, 0x02D9} },
1435 { "ISO 8859-4", 7,
1436 { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
1437 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
1438 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
1439 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
1440 0x00A0, 0x0104, 0x0138, 0x0156, 0x00A4, 0x0128, 0x013B, 0x00A7,
1441 0x00A8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00AD, 0x017D, 0x00AF,
1442 0x00B0, 0x0105, 0x02DB, 0x0157, 0x00B4, 0x0129, 0x013C, 0x02C7,
1443 0x00B8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014A, 0x017E, 0x014B,
1444 0x0100, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x012E,
1445 0x010C, 0x00C9, 0x0118, 0x00CB, 0x0116, 0x00CD, 0x00CE, 0x012A,
1446 0x0110, 0x0145, 0x014C, 0x0136, 0x00D4, 0x00D5, 0x00D6, 0x00D7,
1447 0x00D8, 0x0172, 0x00DA, 0x00DB, 0x00DC, 0x0168, 0x016A, 0x00DF,
1448 0x0101, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x012F,
1449 0x010D, 0x00E9, 0x0119, 0x00EB, 0x0117, 0x00ED, 0x00EE, 0x012B,
1450 0x0111, 0x0146, 0x014D, 0x0137, 0x00F4, 0x00F5, 0x00F6, 0x00F7,
1451 0x00F8, 0x0173, 0x00FA, 0x00FB, 0x00FC, 0x0169, 0x016B, 0x02D9} },
1452 { "ISO 8859-5", 8,
1453 { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
1454 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
1455 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
1456 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
1457 0x00A0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
1458 0x0408, 0x0409, 0x040A, 0x040B, 0x040C, 0x00AD, 0x040E, 0x040F,
1459 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
1460 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, 0x041F,
1461 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
1462 0x0428, 0x0429, 0x042A, 0x042B, 0x042C, 0x042D, 0x042E, 0x042F,
1463 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
1464 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, 0x043F,
1465 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
1466 0x0448, 0x0449, 0x044A, 0x044B, 0x044C, 0x044D, 0x044E, 0x044F,
1467 0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
1468 0x0458, 0x0459, 0x045A, 0x045B, 0x045C, 0x00A7, 0x045E, 0x045F} },
1469 { "ISO 8859-6-I", 82,
1470 { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
1471 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
1472 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
1473 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
1474 0x00A0, 0xFFFD, 0xFFFD, 0xFFFD, 0x00A4, 0xFFFD, 0xFFFD, 0xFFFD,
1475 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x060C, 0x00AD, 0xFFFD, 0xFFFD,
1476 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
1477 0xFFFD, 0xFFFD, 0xFFFD, 0x061B, 0xFFFD, 0xFFFD, 0xFFFD, 0x061F,
1478 0xFFFD, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
1479 0x0628, 0x0629, 0x062A, 0x062B, 0x062C, 0x062D, 0x062E, 0x062F,
1480 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
1481 0x0638, 0x0639, 0x063A, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
1482 0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
1483 0x0648, 0x0649, 0x064A, 0x064B, 0x064C, 0x064D, 0x064E, 0x064F,
1484 0x0650, 0x0651, 0x0652, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
1485 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD} },
1486 { "ISO 8859-7", 10,
1487 { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
1488 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
1489 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
1490 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
1491 0x00A0, 0x2018, 0x2019, 0x00A3, 0xFFFD, 0xFFFD, 0x00A6, 0x00A7,
1492 0x00A8, 0x00A9, 0xFFFD, 0x00AB, 0x00AC, 0x00AD, 0xFFFD, 0x2015,
1493 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x0384, 0x0385, 0x0386, 0x00B7,
1494 0x0388, 0x0389, 0x038A, 0x00BB, 0x038C, 0x00BD, 0x038E, 0x038F,
1495 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
1496 0x0398, 0x0399, 0x039A, 0x039B, 0x039C, 0x039D, 0x039E, 0x039F,
1497 0x03A0, 0x03A1, 0xFFFD, 0x03A3, 0x03A4, 0x03A5, 0x03A6, 0x03A7,
1498 0x03A8, 0x03A9, 0x03AA, 0x03AB, 0x03AC, 0x03AD, 0x03AE, 0x03AF,
1499 0x03B0, 0x03B1, 0x03B2, 0x03B3, 0x03B4, 0x03B5, 0x03B6, 0x03B7,
1500 0x03B8, 0x03B9, 0x03BA, 0x03BB, 0x03BC, 0x03BD, 0x03BE, 0x03BF,
1501 0x03C0, 0x03C1, 0x03C2, 0x03C3, 0x03C4, 0x03C5, 0x03C6, 0x03C7,
1502 0x03C8, 0x03C9, 0x03CA, 0x03CB, 0x03CC, 0x03CD, 0x03CE, 0xFFFD} },
1503 { "ISO 8859-8-I", 85,
1504 { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
1505 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
1506 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
1507 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
1508 0x00A0, 0xFFFD, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7,
1509 0x00A8, 0x00A9, 0x00D7, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x203E,
1510 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
1511 0x00B8, 0x00B9, 0x00F7, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0xFFFD,
1512 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
1513 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
1514 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
1515 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x2017,
1516 0x05D0, 0x05D1, 0x05D2, 0x05D3, 0x05D4, 0x05D5, 0x05D6, 0x05D7,
1517 0x05D8, 0x05D9, 0x05DA, 0x05DB, 0x05DC, 0x05DD, 0x05DE, 0x05DF,
1518 0x05E0, 0x05E1, 0x05E2, 0x05E3, 0x05E4, 0x05E5, 0x05E6, 0x05E7,
1519 0x05E8, 0x05E9, 0x05EA, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD} },
1520 { "ISO 8859-9", 12,
1521 { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
1522 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
1523 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
1524 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
1525 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7,
1526 0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
1527 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
1528 0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF,
1529 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7,
1530 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
1531 0x011E, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7,
1532 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x0130, 0x015E, 0x00DF,
1533 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7,
1534 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
1535 0x011F, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7,
1536 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x0131, 0x015F, 0x00FF} },
1537 { "ISO 8859-10", 13,
1538 { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
1539 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
1540 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
1541 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
1542 0x00A0, 0x0104, 0x0112, 0x0122, 0x012A, 0x0128, 0x0136, 0x00A7,
1543 0x013B, 0x0110, 0x0160, 0x0166, 0x017D, 0x00AD, 0x016A, 0x014A,
1544 0x00B0, 0x0105, 0x0113, 0x0123, 0x012B, 0x0129, 0x0137, 0x00B7,
1545 0x013C, 0x0111, 0x0161, 0x0167, 0x017E, 0x2015, 0x016B, 0x014B,
1546 0x0100, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x012E,
1547 0x010C, 0x00C9, 0x0118, 0x00CB, 0x0116, 0x00CD, 0x00CE, 0x00CF,
1548 0x00D0, 0x0145, 0x014C, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x0168,
1549 0x00D8, 0x0172, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF,
1550 0x0101, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x012F,
1551 0x010D, 0x00E9, 0x0119, 0x00EB, 0x0117, 0x00ED, 0x00EE, 0x00EF,
1552 0x00F0, 0x0146, 0x014D, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x0169,
1553 0x00F8, 0x0173, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x0138} },
1554 { "ISO 8859-13", 109,
1555 { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
1556 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
1557 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
1558 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
1559 0x00A0, 0x201D, 0x00A2, 0x00A3, 0x00A4, 0x201E, 0x00A6, 0x00A7,
1560 0x00D8, 0x00A9, 0x0156, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00C6,
1561 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x201C, 0x00B5, 0x00B6, 0x00B7,
1562 0x00F8, 0x00B9, 0x0157, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00E6,
1563 0x0104, 0x012E, 0x0100, 0x0106, 0x00C4, 0x00C5, 0x0118, 0x0112,
1564 0x010C, 0x00C9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012A, 0x013B,
1565 0x0160, 0x0143, 0x0145, 0x00D3, 0x014C, 0x00D5, 0x00D6, 0x00D7,
1566 0x0172, 0x0141, 0x015A, 0x016A, 0x00DC, 0x017B, 0x017D, 0x00DF,
1567 0x0105, 0x012F, 0x0101, 0x0107, 0x00E4, 0x00E5, 0x0119, 0x0113,
1568 0x010D, 0x00E9, 0x017A, 0x0117, 0x0123, 0x0137, 0x012B, 0x013C,
1569 0x0161, 0x0144, 0x0146, 0x00F3, 0x014D, 0x00F5, 0x00F6, 0x00F7,
1570 0x0173, 0x0142, 0x015B, 0x016B, 0x00FC, 0x017C, 0x017E, 0x2019} },
1571 { "ISO 8859-14", 110,
1572 { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
1573 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
1574 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
1575 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
1576 0x00A0, 0x1E02, 0x1E03, 0x00A3, 0x010A, 0x010B, 0x1E0A, 0x00A7,
1577 0x1E80, 0x00A9, 0x1E82, 0x1E0B, 0x1EF2, 0x00AD, 0x00AE, 0x0178,
1578 0x1E1E, 0x1E1F, 0x0120, 0x0121, 0x1E40, 0x1E41, 0x00B6, 0x1E56,
1579 0x1E81, 0x1E57, 0x1E83, 0x1E60, 0x1EF3, 0x1E84, 0x1E85, 0x1E61,
1580 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7,
1581 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
1582 0x0174, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x1E6A,
1583 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x0176, 0x00DF,
1584 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7,
1585 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
1586 0x0175, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x1E6B,
1587 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x0177, 0x00FF} },
1588 { "ISO 8859-15", 111,
1589 { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
1590 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
1591 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
1592 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
1593 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x20AC, 0x00A5, 0x0160, 0x00A7,
1594 0x0161, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
1595 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x017D, 0x00B5, 0x00B6, 0x00B7,
1596 0x017E, 0x00B9, 0x00BA, 0x00BB, 0x0152, 0x0153, 0x0178, 0x00BF,
1597 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7,
1598 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
1599 0x00D0, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7,
1600 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF,
1601 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7,
1602 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
1603 0x00F0, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7,
1604 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x00FF} },
1605
1606 // next bits generated again from tables on the Unicode 3.0 CD.
1607
1608 // $ for a in CP* ; do ( awk '/^0x[89ABCDEF]/{ print $1, $2 }' < $a ) | sort | sed -e 's/#UNDEF.*$/0xFFFD/' | cut -c6- | paste '-d ' - - - - - - - - | sed -e 's/ /, /g' -e 's/$/,/' -e '$ s/,$/} },/' -e '1 s/^/{ /' > ~/tmp/$a ; done
1609
1610 { "CP 874", 0, //### what is the mib?
1611 { 0x20AC, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x2026, 0xFFFD, 0xFFFD,
1612 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
1613 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
1614 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
1615 0x00A0, 0x0E01, 0x0E02, 0x0E03, 0x0E04, 0x0E05, 0x0E06, 0x0E07,
1616 0x0E08, 0x0E09, 0x0E0A, 0x0E0B, 0x0E0C, 0x0E0D, 0x0E0E, 0x0E0F,
1617 0x0E10, 0x0E11, 0x0E12, 0x0E13, 0x0E14, 0x0E15, 0x0E16, 0x0E17,
1618 0x0E18, 0x0E19, 0x0E1A, 0x0E1B, 0x0E1C, 0x0E1D, 0x0E1E, 0x0E1F,
1619 0x0E20, 0x0E21, 0x0E22, 0x0E23, 0x0E24, 0x0E25, 0x0E26, 0x0E27,
1620 0x0E28, 0x0E29, 0x0E2A, 0x0E2B, 0x0E2C, 0x0E2D, 0x0E2E, 0x0E2F,
1621 0x0E30, 0x0E31, 0x0E32, 0x0E33, 0x0E34, 0x0E35, 0x0E36, 0x0E37,
1622 0x0E38, 0x0E39, 0x0E3A, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x0E3F,
1623 0x0E40, 0x0E41, 0x0E42, 0x0E43, 0x0E44, 0x0E45, 0x0E46, 0x0E47,
1624 0x0E48, 0x0E49, 0x0E4A, 0x0E4B, 0x0E4C, 0x0E4D, 0x0E4E, 0x0E4F,
1625 0x0E50, 0x0E51, 0x0E52, 0x0E53, 0x0E54, 0x0E55, 0x0E56, 0x0E57,
1626 0x0E58, 0x0E59, 0x0E5A, 0x0E5B, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD} },
1627 { "CP 1250", 2250,
1628 { 0x20AC, 0xFFFD, 0x201A, 0xFFFD, 0x201E, 0x2026, 0x2020, 0x2021,
1629 0xFFFD, 0x2030, 0x0160, 0x2039, 0x015A, 0x0164, 0x017D, 0x0179,
1630 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
1631 0xFFFD, 0x2122, 0x0161, 0x203A, 0x015B, 0x0165, 0x017E, 0x017A,
1632 0x00A0, 0x02C7, 0x02D8, 0x0141, 0x00A4, 0x0104, 0x00A6, 0x00A7,
1633 0x00A8, 0x00A9, 0x015E, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x017B,
1634 0x00B0, 0x00B1, 0x02DB, 0x0142, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
1635 0x00B8, 0x0105, 0x015F, 0x00BB, 0x013D, 0x02DD, 0x013E, 0x017C,
1636 0x0154, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x0139, 0x0106, 0x00C7,
1637 0x010C, 0x00C9, 0x0118, 0x00CB, 0x011A, 0x00CD, 0x00CE, 0x010E,
1638 0x0110, 0x0143, 0x0147, 0x00D3, 0x00D4, 0x0150, 0x00D6, 0x00D7,
1639 0x0158, 0x016E, 0x00DA, 0x0170, 0x00DC, 0x00DD, 0x0162, 0x00DF,
1640 0x0155, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x013A, 0x0107, 0x00E7,
1641 0x010D, 0x00E9, 0x0119, 0x00EB, 0x011B, 0x00ED, 0x00EE, 0x010F,
1642 0x0111, 0x0144, 0x0148, 0x00F3, 0x00F4, 0x0151, 0x00F6, 0x00F7,
1643 0x0159, 0x016F, 0x00FA, 0x0171, 0x00FC, 0x00FD, 0x0163, 0x02D9} },
1644 { "CP 1251", 2251,
1645 { 0x0402, 0x0403, 0x201A, 0x0453, 0x201E, 0x2026, 0x2020, 0x2021,
1646 0x20AC, 0x2030, 0x0409, 0x2039, 0x040A, 0x040C, 0x040B, 0x040F,
1647 0x0452, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
1648 0xFFFD, 0x2122, 0x0459, 0x203A, 0x045A, 0x045C, 0x045B, 0x045F,
1649 0x00A0, 0x040E, 0x045E, 0x0408, 0x00A4, 0x0490, 0x00A6, 0x00A7,
1650 0x0401, 0x00A9, 0x0404, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x0407,
1651 0x00B0, 0x00B1, 0x0406, 0x0456, 0x0491, 0x00B5, 0x00B6, 0x00B7,
1652 0x0451, 0x2116, 0x0454, 0x00BB, 0x0458, 0x0405, 0x0455, 0x0457,
1653 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
1654 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, 0x041F,
1655 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
1656 0x0428, 0x0429, 0x042A, 0x042B, 0x042C, 0x042D, 0x042E, 0x042F,
1657 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
1658 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, 0x043F,
1659 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
1660 0x0448, 0x0449, 0x044A, 0x044B, 0x044C, 0x044D, 0x044E, 0x044F} },
1661 { "CP 1252", 2252,
1662 { 0x20AC, 0xFFFD, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
1663 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0xFFFD, 0x017D, 0xFFFD,
1664 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
1665 0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0xFFFD, 0x017E, 0x0178,
1666 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7,
1667 0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
1668 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
1669 0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF,
1670 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7,
1671 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
1672 0x00D0, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7,
1673 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF,
1674 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7,
1675 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
1676 0x00F0, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7,
1677 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x00FF} },
1678 { "CP 1253", 2253,
1679 { 0x20AC, 0xFFFD, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
1680 0xFFFD, 0x2030, 0xFFFD, 0x2039, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
1681 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
1682 0xFFFD, 0x2122, 0xFFFD, 0x203A, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
1683 0x00A0, 0x0385, 0x0386, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7,
1684 0x00A8, 0x00A9, 0xFFFD, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x2015,
1685 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x0384, 0x00B5, 0x00B6, 0x00B7,
1686 0x0388, 0x0389, 0x038A, 0x00BB, 0x038C, 0x00BD, 0x038E, 0x038F,
1687 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
1688 0x0398, 0x0399, 0x039A, 0x039B, 0x039C, 0x039D, 0x039E, 0x039F,
1689 0x03A0, 0x03A1, 0xFFFD, 0x03A3, 0x03A4, 0x03A5, 0x03A6, 0x03A7,
1690 0x03A8, 0x03A9, 0x03AA, 0x03AB, 0x03AC, 0x03AD, 0x03AE, 0x03AF,
1691 0x03B0, 0x03B1, 0x03B2, 0x03B3, 0x03B4, 0x03B5, 0x03B6, 0x03B7,
1692 0x03B8, 0x03B9, 0x03BA, 0x03BB, 0x03BC, 0x03BD, 0x03BE, 0x03BF,
1693 0x03C0, 0x03C1, 0x03C2, 0x03C3, 0x03C4, 0x03C5, 0x03C6, 0x03C7,
1694 0x03C8, 0x03C9, 0x03CA, 0x03CB, 0x03CC, 0x03CD, 0x03CE, 0xFFFD} },
1695 { "CP 1254", 2254,
1696 { 0x20AC, 0xFFFD, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
1697 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0xFFFD, 0xFFFD, 0xFFFD,
1698 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
1699 0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0xFFFD, 0xFFFD, 0x0178,
1700 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7,
1701 0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
1702 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
1703 0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF,
1704 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7,
1705 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
1706 0x011E, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7,
1707 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x0130, 0x015E, 0x00DF,
1708 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7,
1709 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
1710 0x011F, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7,
1711 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x0131, 0x015F, 0x00FF} },
1712 { "CP 1255", 2255,
1713 { 0x20AC, 0xFFFD, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
1714 0x02C6, 0x2030, 0xFFFD, 0x2039, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
1715 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
1716 0x02DC, 0x2122, 0xFFFD, 0x203A, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
1717 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x20AA, 0x00A5, 0x00A6, 0x00A7,
1718 0x00A8, 0x00A9, 0x00D7, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
1719 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
1720 0x00B8, 0x00B9, 0x00F7, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF,
1721 0x05B0, 0x05B1, 0x05B2, 0x05B3, 0x05B4, 0x05B5, 0x05B6, 0x05B7,
1722 0x05B8, 0x05B9, 0xFFFD, 0x05BB, 0x05BC, 0x05BD, 0x05BE, 0x05BF,
1723 0x05C0, 0x05C1, 0x05C2, 0x05C3, 0x05F0, 0x05F1, 0x05F2, 0x05F3,
1724 0x05F4, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
1725 0x05D0, 0x05D1, 0x05D2, 0x05D3, 0x05D4, 0x05D5, 0x05D6, 0x05D7,
1726 0x05D8, 0x05D9, 0x05DA, 0x05DB, 0x05DC, 0x05DD, 0x05DE, 0x05DF,
1727 0x05E0, 0x05E1, 0x05E2, 0x05E3, 0x05E4, 0x05E5, 0x05E6, 0x05E7,
1728 0x05E8, 0x05E9, 0x05EA, 0xFFFD, 0xFFFD, 0x200E, 0x200F, 0xFFFD} },
1729 { "CP 1256", 2256,
1730 { 0x20AC, 0x067E, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
1731 0x02C6, 0x2030, 0x0679, 0x2039, 0x0152, 0x0686, 0x0698, 0x0688,
1732 0x06AF, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
1733 0x06A9, 0x2122, 0x0691, 0x203A, 0x0153, 0x200C, 0x200D, 0x06BA,
1734 0x00A0, 0x060C, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7,
1735 0x00A8, 0x00A9, 0x06BE, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
1736 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
1737 0x00B8, 0x00B9, 0x061B, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x061F,
1738 0x06C1, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
1739 0x0628, 0x0629, 0x062A, 0x062B, 0x062C, 0x062D, 0x062E, 0x062F,
1740 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x00D7,
1741 0x0637, 0x0638, 0x0639, 0x063A, 0x0640, 0x0641, 0x0642, 0x0643,
1742 0x00E0, 0x0644, 0x00E2, 0x0645, 0x0646, 0x0647, 0x0648, 0x00E7,
1743 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x0649, 0x064A, 0x00EE, 0x00EF,
1744 0x064B, 0x064C, 0x064D, 0x064E, 0x00F4, 0x064F, 0x0650, 0x00F7,
1745 0x0651, 0x00F9, 0x0652, 0x00FB, 0x00FC, 0x200E, 0x200F, 0x06D2} },
1746 { "CP 1257", 2257,
1747 { 0x20AC, 0xFFFD, 0x201A, 0xFFFD, 0x201E, 0x2026, 0x2020, 0x2021,
1748 0xFFFD, 0x2030, 0xFFFD, 0x2039, 0xFFFD, 0x00A8, 0x02C7, 0x00B8,
1749 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
1750 0xFFFD, 0x2122, 0xFFFD, 0x203A, 0xFFFD, 0x00AF, 0x02DB, 0xFFFD,
1751 0x00A0, 0xFFFD, 0x00A2, 0x00A3, 0x00A4, 0xFFFD, 0x00A6, 0x00A7,
1752 0x00D8, 0x00A9, 0x0156, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00C6,
1753 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
1754 0x00F8, 0x00B9, 0x0157, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00E6,
1755 0x0104, 0x012E, 0x0100, 0x0106, 0x00C4, 0x00C5, 0x0118, 0x0112,
1756 0x010C, 0x00C9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012A, 0x013B,
1757 0x0160, 0x0143, 0x0145, 0x00D3, 0x014C, 0x00D5, 0x00D6, 0x00D7,
1758 0x0172, 0x0141, 0x015A, 0x016A, 0x00DC, 0x017B, 0x017D, 0x00DF,
1759 0x0105, 0x012F, 0x0101, 0x0107, 0x00E4, 0x00E5, 0x0119, 0x0113,
1760 0x010D, 0x00E9, 0x017A, 0x0117, 0x0123, 0x0137, 0x012B, 0x013C,
1761 0x0161, 0x0144, 0x0146, 0x00F3, 0x014D, 0x00F5, 0x00F6, 0x00F7,
1762 0x0173, 0x0142, 0x015B, 0x016B, 0x00FC, 0x017C, 0x017E, 0x02D9} },
1763 { "CP 1258", 2258,
1764 { 0x20AC, 0xFFFD, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
1765 0x02C6, 0x2030, 0xFFFD, 0x2039, 0x0152, 0xFFFD, 0xFFFD, 0xFFFD,
1766 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
1767 0x02DC, 0x2122, 0xFFFD, 0x203A, 0x0153, 0xFFFD, 0xFFFD, 0x0178,
1768 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7,
1769 0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
1770 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
1771 0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF,
1772 0x00C0, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x00C5, 0x00C6, 0x00C7,
1773 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x0300, 0x00CD, 0x00CE, 0x00CF,
1774 0x0110, 0x00D1, 0x0309, 0x00D3, 0x00D4, 0x01A0, 0x00D6, 0x00D7,
1775 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x01AF, 0x0303, 0x00DF,
1776 0x00E0, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x00E5, 0x00E6, 0x00E7,
1777 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x0301, 0x00ED, 0x00EE, 0x00EF,
1778 0x0111, 0x00F1, 0x0323, 0x00F3, 0x00F4, 0x01A1, 0x00F6, 0x00F7,
1779 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x01B0, 0x20AB, 0x00FF} },
1780
1781 // this one is generated from the charmap file located in /usr/share/i18n/charmaps
1782 // on most Linux distributions. The thai character set tis620 is byte by byte equivalent
1783 // to iso8859-11, so we name it 8859-11 here, but recognise the name tis620 too.
1784
1785 // $ for A in 8 9 A B C D E F ; do for B in 0 1 2 3 4 5 6 7 8 9 A B C D E F ; do echo x${A}${B} 0xFFFD ; done ; done > /tmp/digits ; ( cut -c25- < TIS-620 ; cat /tmp/digits ) | awk '/^x[89ABCDEF]/{ print $1, $2 }' | sed -e 's/<U/0x/' -e 's/>//' | sort | uniq -w4 | cut -c5- | paste '-d ' - - - - - - - - | sed -e 's/ /, /g' -e 's/$/,/' -e '$ s/,$/} },/' -e '1 s/^/{ /' > ~/tmp/tis-620
1786 { "ISO 8859-11", 2259, // Thai character set mib enum taken from tis620 (which is byte by byte equivalent)
1787 { 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
1788 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
1789 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
1790 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
1791 0xFFFD, 0x0E01, 0x0E02, 0x0E03, 0x0E04, 0x0E05, 0x0E06, 0x0E07,
1792 0x0E08, 0x0E09, 0x0E0A, 0x0E0B, 0x0E0C, 0x0E0D, 0x0E0E, 0x0E0F,
1793 0x0E10, 0x0E11, 0x0E12, 0x0E13, 0x0E14, 0x0E15, 0x0E16, 0x0E17,
1794 0x0E18, 0x0E19, 0x0E1A, 0x0E1B, 0x0E1C, 0x0E1D, 0x0E1E, 0x0E1F,
1795 0x0E20, 0x0E21, 0x0E22, 0x0E23, 0x0E24, 0x0E25, 0x0E26, 0x0E27,
1796 0x0E28, 0x0E29, 0x0E2A, 0x0E2B, 0x0E2C, 0x0E2D, 0x0E2E, 0x0E2F,
1797 0x0E30, 0x0E31, 0x0E32, 0x0E33, 0x0E34, 0x0E35, 0x0E36, 0x0E37,
1798 0x0E38, 0x0E39, 0x0E3A, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x0E3F,
1799 0x0E40, 0x0E41, 0x0E42, 0x0E43, 0x0E44, 0x0E45, 0x0E46, 0x0E47,
1800 0x0E48, 0x0E49, 0x0E4A, 0x0E4B, 0x0E4C, 0x0E4D, 0x0E4E, 0x0E4F,
1801 0x0E50, 0x0E51, 0x0E52, 0x0E53, 0x0E54, 0x0E55, 0x0E56, 0x0E57,
1802 0x0E58, 0x0E59, 0x0E5A, 0x0E5B, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD} },
1803
1804 // change LAST_MIB if you add more, and edit unicodevalues in
1805 // kernel/qpsprinter.cpp too.
1806};
1807
1808
1809static const QSimpleTextCodec * reverseOwner = 0;
1810static QArray<char> * reverseMap = 0;
1811
1812
1813QSimpleTextCodec::QSimpleTextCodec( int i )
1814 : QTextCodec(), forwardIndex( i )
1815{
1816}
1817
1818
1819QSimpleTextCodec::~QSimpleTextCodec()
1820{
1821 if ( reverseOwner == this ) {
1822 delete reverseMap;
1823 reverseMap = 0;
1824 reverseOwner = 0;
1825 }
1826}
1827
1828// what happens if strlen(chars)<len? what happens if !chars? if len<1?
1829QString QSimpleTextCodec::toUnicode(const char* chars, int len) const
1830{
1831 if(len <= 0)
1832 return QString::null;
1833
1834 int clen = qstrlen(chars);
1835 len = QMIN(len, clen); // Note: NUL ends string
1836
1837 QString r;
1838 r.setUnicode(0, len);
1839 QChar* uc = (QChar*)r.unicode(); // const_cast
1840 const unsigned char * c = (const unsigned char *)chars;
1841 for( int i=0; i<len; i++ ) {
1842 if ( c[i] > 127 )
1843 uc[i] = unicodevalues[forwardIndex].values[c[i]-128];
1844 else
1845 uc[i] = c[i];
1846 }
1847 return r;
1848}
1849
1850
1851QCString QSimpleTextCodec::fromUnicode(const QString& uc, int& len ) const
1852{
1853 if ( reverseOwner != this ) {
1854 int m = 0;
1855 int i = 0;
1856 while( i < 128 ) {
1857 if ( unicodevalues[forwardIndex].values[i] > m &&
1858 unicodevalues[forwardIndex].values[i] < 0xfffd )
1859 m = unicodevalues[forwardIndex].values[i];
1860 i++;
1861 }
1862 m++;
1863 if ( !reverseMap )
1864 reverseMap = new QArray<char>( m );
1865 if ( m > (int)(reverseMap->size()) )
1866 reverseMap->resize( m );
1867 for( i = 0; i < 128 && i < m; i++ )
1868 (*reverseMap)[i] = (char)i;
1869 for( ;i < m; i++ )
1870 (*reverseMap)[i] = '?';
1871 for( i=128; i<256; i++ ) {
1872 int u = unicodevalues[forwardIndex].values[i-128];
1873 if ( u < m )
1874 (*reverseMap)[u] = (char)(unsigned char)(i);
1875 }
1876 reverseOwner = this;
1877 }
1878 if ( len <0 || len > (int)uc.length() )
1879 len = uc.length();
1880 QCString r( len+1 );
1881 int i = len;
1882 int u;
1883 const QChar* ucp = uc.unicode();
1884 char* rp = r.data();
1885 char* rmp = reverseMap->data();
1886 int rmsize = (int) reverseMap->size();
1887 while( i-- )
1888 {
1889 u = ucp->unicode();
1890 *rp++ = u < 128 ? u : (( u < rmsize ) ? (*(rmp+u)) : '?' );
1891 ucp++;
1892 }
1893 r[len] = 0;
1894 return r;
1895}
1896
1897
1898const char* QSimpleTextCodec::name() const
1899{
1900 return unicodevalues[forwardIndex].cs;
1901}
1902
1903
1904int QSimpleTextCodec::mibEnum() const
1905{
1906 return unicodevalues[forwardIndex].mib;
1907}
1908
1909int QSimpleTextCodec::heuristicNameMatch(const char* hint) const
1910{
1911 if ( hint[0]=='k' ) {
1912 // Help people with messy fonts
1913 if ( QCString(hint) == "koi8-1" )
1914 return QTextCodec::heuristicNameMatch("koi8-r")-1;
1915 if ( QCString(hint) == "koi8-ru" )
1916 return QTextCodec::heuristicNameMatch("koi8-r")-1;
1917 } else if ( hint[0] == 't' && QCString(name()) == "ISO 8859-11" ) {
1918// 8859-11 and tis620 are byte by bute equivalent
1919int i = simpleHeuristicNameMatch("tis620-0", hint);
1920if( !i )
1921 i = simpleHeuristicNameMatch("tis-620", hint);
1922if( i ) return i;
1923 }
1924 return QTextCodec::heuristicNameMatch(hint);
1925}
1926
1927int QSimpleTextCodec::heuristicContentMatch(const char* chars, int len) const
1928{
1929 if ( len<1 || !chars )
1930 return -1;
1931 int i = 0;
1932 const uchar * c = (const unsigned char *)chars;
1933 int r = 0;
1934 while( i<len && c && *c ) {
1935 if ( *c >= 128 ) {
1936 if ( unicodevalues[forwardIndex].values[(*c)-128] == 0xfffd )
1937 return -1;
1938 }
1939 if ( (*c >= ' ' && *c < 127) ||
1940 *c == '\n' || *c == '\t' || *c == '\r' )
1941 r++;
1942 i++;
1943 c++;
1944 }
1945 if ( mibEnum()==4 )
1946 r+=1;
1947 return r;
1948}
1949
1950
1951#endif // QT_NO_CODECS
1952
1953class QLatin1Codec: public QTextCodec
1954{
1955public:
1956 QLatin1Codec();
1957 ~QLatin1Codec();
1958
1959 QString toUnicode(const char* chars, int len) const;
1960 QCString fromUnicode(const QString& uc, int& lenInOut ) const;
1961
1962 const char* name() const;
1963 int mibEnum() const;
1964
1965 int heuristicContentMatch(const char* chars, int len) const;
1966
1967 int heuristicNameMatch(const char* hint) const;
1968
1969private:
1970 int forwardIndex;
1971};
1972
1973
1974QLatin1Codec::QLatin1Codec()
1975 : QTextCodec()
1976{
1977}
1978
1979
1980QLatin1Codec::~QLatin1Codec()
1981{
1982}
1983
1984// what happens if strlen(chars)<len? what happens if !chars? if len<1?
1985QString QLatin1Codec::toUnicode(const char* chars, int len) const
1986{
1987 if(len <= 0)
1988 return QString::null;
1989
1990 return QString::fromLatin1(chars, len);
1991}
1992
1993
1994QCString QLatin1Codec::fromUnicode(const QString& uc, int& len ) const
1995{
1996 if ( len <0 || len > (int)uc.length() )
1997 len = uc.length();
1998 QCString r( len+1 );
1999 int i = 0;
2000 const QChar *ch = uc.unicode();
2001 while ( i < len ) {
2002r[i] = ch->row() ? '?' : ch->cell();
2003i++;
2004ch++;
2005 }
2006 r[len] = 0;
2007 return r;
2008}
2009
2010
2011const char* QLatin1Codec::name() const
2012{
2013 return "ISO 8859-1";
2014}
2015
2016
2017int QLatin1Codec::mibEnum() const
2018{
2019 return 4;
2020}
2021
2022int QLatin1Codec::heuristicNameMatch(const char* hint) const
2023{
2024 return QTextCodec::heuristicNameMatch(hint);
2025}
2026
2027int QLatin1Codec::heuristicContentMatch(const char* chars, int len) const
2028{
2029 if ( len<1 || !chars )
2030 return -1;
2031 int i = 0;
2032 const uchar * c = (const unsigned char *)chars;
2033 int r = 0;
2034 while( i<len && c && *c ) {
2035 if ( *c >= 0x80 && *c < 0xa0 )
2036 return -1;
2037 if ( (*c >= ' ' && *c < 127) ||
2038 *c == '\n' || *c == '\t' || *c == '\r' )
2039 r++;
2040 i++;
2041 c++;
2042 }
2043 return r;
2044}
2045
2046
2047static void setupBuiltinCodecs()
2048{
2049 (void)new QLatin1Codec;
2050
2051#ifndef QT_NO_CODECS
2052 int i = 0;
2053 do {
2054 (void)new QSimpleTextCodec( i );
2055 } while( unicodevalues[i++].mib != LAST_MIB );
2056
2057 //(void)new QEucJpCodec;
2058 //(void)new QSjisCodec;
2059 //(void)new QJisCodec;
2060 //(void)new QEucKrCodec;
2061 //(void)new QGbkCodec;
2062 //(void)new QBig5Codec;
2063 (void)new QUtf8Codec;
2064 (void)new QUtf16Codec;
2065 //(void)new QHebrewCodec;
2066 //(void)new QArabicCodec;
2067 //(void)new QTsciiCodec;
2068#endif // QT_NO_CODECS
2069}
2070
2071#endif // QT_NO_TEXTCODEC
2072

Archive Download this file

Revision: 1322