Root/
Source at commit 1322 created 12 years 8 months ago. By meklort, Add doxygen to utils folder | |
---|---|
1 | /*! \file translator.cpp ␊ |
2 | * \brief Implementation of generally used translator methods.␊ |
3 | * ␊ |
4 | * This file contains implementation of the translator methods that␊ |
5 | * are not expected to be reimplemented by derived translator classes.␊ |
6 | * It also contains static data tables used by the methods.␊ |
7 | * ␊ |
8 | */␊ |
9 | #include "translator.h"␊ |
10 | ␊ |
11 | /*! The translation table used by Win1250ToISO88592() method. */␊ |
12 | const char Translator::Win1250ToISO88592Tab[] = ␊ |
13 | {␊ |
14 | '\x80', '\x81', '\x82', '\x83', '\x84', '\x85', '\x86', '\x87',␊ |
15 | '\x88', '\x89', '\xA9', '\x8B', '\xA6', '\xAB', '\xAE', '\xAC',␊ |
16 | '\x90', '\x91', '\x92', '\x93', '\x94', '\x2E', '\x96', '\x97',␊ |
17 | '\x98', '\x99', '\xB9', '\x9B', '\xB6', '\xBB', '\xBE', '\xBC',␊ |
18 | '\xA0', '\x20', '\x20', '\xA3', '\xA4', '\xA1', '\xA6', '\xA7',␊ |
19 | '\x22', '\xA9', '\xAA', '\x3C', '\xAC', '\x2D', '\xAE', '\xAF',␊ |
20 | '\x2E', '\x2B', '\x20', '\xB3', '\x27', '\x75', '\xB6', '\xB7',␊ |
21 | '\x20', '\xB1', '\xBA', '\x3E', '\xA5', '\x22', '\xB5', '\xBF',␊ |
22 | '\xC0', '\xC1', '\xC2', '\xC3', '\xC4', '\xC5', '\xC6', '\xC7',␊ |
23 | '\xC8', '\xC9', '\xCA', '\xCB', '\xCC', '\xCD', '\xCE', '\xCF',␊ |
24 | '\xD0', '\xD1', '\xD2', '\xD3', '\xD4', '\xD5', '\xD6', '\xD7',␊ |
25 | '\xD8', '\xD9', '\xDA', '\xDB', '\xDC', '\xDD', '\xDE', '\xDF',␊ |
26 | '\xE0', '\xE1', '\xE2', '\xE3', '\xE4', '\xE5', '\xE6', '\xE7',␊ |
27 | '\xE8', '\xE9', '\xEA', '\xEB', '\xEC', '\xED', '\xEE', '\xEF',␊ |
28 | '\xF0', '\xF1', '\xF2', '\xF3', '\xF4', '\xF5', '\xF6', '\x2D',␊ |
29 | '\xF8', '\xF9', '\xFA', '\xFB', '\xFC', '\xFD', '\xFE', '\xFF',␊ |
30 | '\0'␊ |
31 | };␊ |
32 | ␊ |
33 | ␊ |
34 | /*! The translation table used by ISO88592ToWin1250() method. */␊ |
35 | const char Translator::ISO88592ToWin1250Tab[] = {␊ |
36 | '\x80', '\x81', '\x82', '\x83', '\x84', '\x85', '\x86', '\x87',␊ |
37 | '\x88', '\x89', '\x8A', '\x8B', '\x8C', '\x8D', '\x8E', '\x8F',␊ |
38 | '\x90', '\x91', '\x92', '\x93', '\x94', '\x95', '\x96', '\x97',␊ |
39 | '\x98', '\x99', '\x9A', '\x9B', '\x9C', '\x9D', '\x9E', '\x9F',␊ |
40 | '\xA0', '\xA5', '\xA2', '\xA3', '\xA4', '\xBC', '\x8C', '\xA7',␊ |
41 | '\xA8', '\x8A', '\xAA', '\x8D', '\x8F', '\xAD', '\x8E', '\xAF',␊ |
42 | '\xB0', '\xB9', '\xB2', '\xB3', '\xB4', '\xBE', '\x9C', '\xB7',␊ |
43 | '\xB8', '\x9A', '\xBA', '\x9D', '\x9F', '\xBD', '\x9E', '\xBF',␊ |
44 | '\xC0', '\xC1', '\xC2', '\xC3', '\xC4', '\xC5', '\xC6', '\xC7',␊ |
45 | '\xC8', '\xC9', '\xCA', '\xCB', '\xCC', '\xCD', '\xCE', '\xCF',␊ |
46 | '\xD0', '\xD1', '\xD2', '\xD3', '\xD4', '\xD5', '\xD6', '\xD7',␊ |
47 | '\xD8', '\xD9', '\xDA', '\xDB', '\xDC', '\xDD', '\xDE', '\xDF',␊ |
48 | '\xE0', '\xE1', '\xE2', '\xE3', '\xE4', '\xE5', '\xE6', '\xE7',␊ |
49 | '\xE8', '\xE9', '\xEA', '\xEB', '\xEC', '\xED', '\xEE', '\xEF',␊ |
50 | '\xF0', '\xF1', '\xF2', '\xF3', '\xF4', '\xF5', '\xF6', '\xF7',␊ |
51 | '\xF8', '\xF9', '\xFA', '\xFB', '\xFC', '\xFD', '\xFE', '\xFF',␊ |
52 | '\0'␊ |
53 | };␊ |
54 | ␊ |
55 | ␊ |
56 | /*! The translation table used by Koi8RToWindows1251() method. */␊ |
57 | const unsigned char Translator::Koi8RToWindows1251Tab[128] =␊ |
58 | { 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,␊ |
59 | 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,␊ |
60 | 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,␊ |
61 | 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,␊ |
62 | 254,224,225,246,228,229,244,227,245,232,233,234,235,236,237,238,␊ |
63 | 239,255,240,241,242,243,230,226,252,251,231,248,253,249,247,250,␊ |
64 | 222,192,193,214,196,197,212,195,213,200,201,202,203,204,205,206,␊ |
65 | 207,223,208,209,210,211,198,194,220,219,199,216,221,217,215,218 ␊ |
66 | };␊ |
67 | ␊ |
68 | ␊ |
69 | /*! The translation table used by Windows1251ToKoi8R() method. */␊ |
70 | const unsigned char Translator::Windows1251ToKoi8RTab[128] =␊ |
71 | { 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,␊ |
72 | 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,␊ |
73 | 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,␊ |
74 | 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,␊ |
75 | 225,226,247,231,228,229,246,250,233,234,235,236,237,238,239,240,␊ |
76 | 242,243,244,245,230,232,227,254,251,253,255,249,248,252,224,241,␊ |
77 | 193,194,215,199,196,197,214,218,201,202,203,204,205,206,207,208,␊ |
78 | 210,211,212,213,198,200,195,222,219,221,223,217,216,220,192,209␊ |
79 | };␊ |
80 | ␊ |
81 | /*! Returns the string converted from windows-1250 to iso-8859-2. */␊ |
82 | /* The method was designed initially for translator_cz.h. ␊ |
83 | * It is used for on-line encoding conversion related to␊ |
84 | * conditional compilation in Unix/MS Windows environments␊ |
85 | * (both use different encoding). Later, the translator_hr.h␊ |
86 | * (by Boris Bralo) used and improved the same style. As the␊ |
87 | * method with the translation table was the same, the␊ |
88 | * decision to move it to this base class was made. The same␊ |
89 | * holds for ISO88592ToWin1250() method. ␊ |
90 | * ␊ |
91 | * Alexandr Chelpanov used the same approach for␊ |
92 | * Koi8RToWindows1251() and Windows1251ToKoi8R() methods. Notice,␊ |
93 | * that he uses Unicode tables.␊ |
94 | * ␊ |
95 | * It is recommended for possibly other similar methods in future.␊ |
96 | */␊ |
97 | QCString Translator::Win1250ToISO88592(const QCString & sInput)␊ |
98 | {␊ |
99 | // The conversion table for characters >127␊ |
100 | // ␊ |
101 | ␊ |
102 | QCString result;␊ |
103 | int len = sInput.length();␊ |
104 | ␊ |
105 | for (int i = 0; i < len; ++i)␊ |
106 | {␊ |
107 | unsigned int c = sInput[i]; ␊ |
108 | result += (c > 127) ? Win1250ToISO88592Tab[c & 0x7F] : c;␊ |
109 | }␊ |
110 | return result;␊ |
111 | }␊ |
112 | ␊ |
113 | ␊ |
114 | /*! returns the string converted from iso-8859-2 to windows-1250 */␊ |
115 | /* See the comments of the Win1250ToISO88592() method for details. */␊ |
116 | QCString Translator::ISO88592ToWin1250(const QCString & sInput)␊ |
117 | {␊ |
118 | // The conversion table for characters >127␊ |
119 | // ␊ |
120 | QCString result;␊ |
121 | int len = sInput.length();␊ |
122 | ␊ |
123 | for (int i = 0; i < len; ++i)␊ |
124 | {␊ |
125 | unsigned int c = sInput[i]; ␊ |
126 | result += (c > 127) ? ISO88592ToWin1250Tab[c & 0x7F] : c;␊ |
127 | }␊ |
128 | return result;␊ |
129 | }␊ |
130 | ␊ |
131 | ␊ |
132 | /*! Returns the string converted from koi8-r to windows-1251. */␊ |
133 | /* The method was designed initially for translator_cz.h. ␊ |
134 | It is used for on-line encoding conversion related to conditional␊ |
135 | compilation in Unix/MS Windows environments (both use different␊ |
136 | encoding). Encoding table got from QT:qtextcodec.cpp␊ |
137 | */␊ |
138 | QCString Translator::Koi8RToWindows1251( const QCString & sInput )␊ |
139 | {␊ |
140 | ␊ |
141 | QCString result(sInput);␊ |
142 | int len = sInput.length();␊ |
143 | ␊ |
144 | const unsigned char * c = (const unsigned char *)(const char*)sInput;␊ |
145 | unsigned char *dc = (unsigned char*)(const char*)result;␊ |
146 | for( int i=0; i<len; i++ ) {␊ |
147 | if ( c[i] > 127 )␊ |
148 | dc[i] = Koi8RToWindows1251Tab[c[i]-128];␊ |
149 | }␊ |
150 | return result;␊ |
151 | }␊ |
152 | ␊ |
153 | ␊ |
154 | /*! returns the string converted from Windows-1251 to koi8-r */␊ |
155 | /* See the comments of the Koi8RToWindows1251() method for details.␊ |
156 | Encoding table got from QT:qtextcodec.cpp */␊ |
157 | QCString Translator::Windows1251ToKoi8R( const QCString & sInput )␊ |
158 | {␊ |
159 | QCString result(sInput);␊ |
160 | int len = sInput.length();␊ |
161 | ␊ |
162 | const unsigned char * c = (const unsigned char *)(const char*)sInput;␊ |
163 | unsigned char *dc = (unsigned char*)(const char*)result;␊ |
164 | for( int i=0; i<len; i++ ) {␊ |
165 | if ( c[i] > 127 )␊ |
166 | dc[i] = Windows1251ToKoi8RTab[c[i]-128];␊ |
167 | }␊ |
168 | return result;␊ |
169 | }␊ |
170 | ␊ |
171 | /*! returns the caracter converted from hankaku-kana to zenkakukana. ␊ |
172 | Thanks Yongmao Ni http://alfin.mine.utsunomiya-u.ac.jp/~niy/algo/ */␊ |
173 | unsigned int hankaku2zen(int hankaku)␊ |
174 | {␊ |
175 | static unsigned int z[64] = {␊ |
176 | 0x2121,0x2123,0x2156,0x2157,0x2122,0x2126,0x2572,0x2521,␊ |
177 | 0x2523,0x2525,0x2527,0x2529,0x2563,0x2565,0x2567,0x2543,␊ |
178 | 0x213c,0x2522,0x2524,0x2526,0x2528,0x252a,0x252b,0x252d,␊ |
179 | 0x252f,0x2531,0x2533,0x2535,0x2537,0x2539,0x253b,0x253d,␊ |
180 | 0x253f,0x2541,0x2544,0x2546,0x2548,0x254a,0x254b,0x254c,␊ |
181 | 0x254d,0x254e,0x254f,0x2552,0x2555,0x2558,0x255b,0x255e,␊ |
182 | 0x255f,0x2560,0x2561,0x2562,0x2564,0x2566,0x2568,0x2569,␊ |
183 | 0x256a,0x256b,0x256c,0x256d,0x256f,0x2573,0x212b,0x212c };␊ |
184 | ␊ |
185 | if (hankaku < 0xa0 || hankaku > 0xdf) return 0;␊ |
186 | return z[hankaku - 0xa0];␊ |
187 | }␊ |
188 | ␊ |
189 | /*! returns the character converted from japaneseEUC to SJIS␊ |
190 | Thanks Yongmao Ni http://alfin.mine.utsunomiya-u.ac.jp/~niy/algo/ */␊ |
191 | unsigned int euc2sjis(unsigned int euc)␊ |
192 | {␊ |
193 | unsigned int jis;␊ |
194 | unsigned int hib, lob;␊ |
195 | ␊ |
196 | if ((euc & 0xff00) == 0x8e00)␊ |
197 | jis = hankaku2zen(euc & 0xff);␊ |
198 | else jis = euc & ~0x8080;␊ |
199 | ␊ |
200 | hib = (jis >> 8) & 0xff;␊ |
201 | lob = jis & 0xff;␊ |
202 | lob += (hib & 1) ? 0x1f : 0x7d;␊ |
203 | if (lob >= 0x7f) lob++;␊ |
204 | hib = ((hib - 0x21) >> 1) + 0x81;␊ |
205 | if (hib > 0x9f) hib += 0x40;␊ |
206 | ␊ |
207 | return (hib << 8) | lob;␊ |
208 | }␊ |
209 | ␊ |
210 | ␊ |
211 | /*! returns the string converted from Japanese-EUC to SJIS */␊ |
212 | ␊ |
213 | QCString Translator::JapaneseEucToSjis( const QCString & sInput )␊ |
214 | {␊ |
215 | QString result;␊ |
216 | int len = sInput.length();␊ |
217 | int c1,c2,sj;␊ |
218 | ␊ |
219 | result.setUnicode(0, len);␊ |
220 | QChar* uc = (QChar*)result.unicode(); // const_cast␊ |
221 | const unsigned char * c = (const unsigned char *)(const char*)sInput;␊ |
222 | ␊ |
223 | for( int i=0; i<len;)␊ |
224 | {␊ |
225 | c1 = c[i];␊ |
226 | ␊ |
227 | if( c1 == EOF ) break;␊ |
228 | ␊ |
229 | /* if MSB=0 then the character is ascii */␊ |
230 | if(!( c1 & 0x80))␊ |
231 | ␉{␊ |
232 | ␉ uc[i] = c[i];␊ |
233 | ␉ i=i+1;␊ |
234 | ␉}␊ |
235 | else␊ |
236 | ␉{␊ |
237 | ␉ c2 = c[i+1];␊ |
238 | ␉ if( c2 == EOF ) break;␊ |
239 | ␉ sj = euc2sjis( (c1 << 8) + c2 );␊ |
240 | ␉ uc[i] = sj >> 8;␊ |
241 | ␉ uc[i+1] = sj & 0xff;␊ |
242 | ␉ i+=2;␊ |
243 | ␉}␊ |
244 | }␊ |
245 | ␊ |
246 | return result.latin1();␊ |
247 | ␊ |
248 | }␊ |
249 |