Root/
Source at commit 298 created 13 years 11 months ago. By azimutz, Matching trunk rev 295. | |
---|---|
1 | /*␊ |
2 | * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved.␊ |
3 | *␊ |
4 | * @APPLE_LICENSE_HEADER_START@␊ |
5 | * ␊ |
6 | * The contents of this file constitute Original Code as defined in and␊ |
7 | * are subject to the Apple Public Source License Version 2.0 (the␊ |
8 | * "License"). You may not use this file except in compliance with the␊ |
9 | * License. Please obtain a copy of the License at␊ |
10 | * http://www.apple.com/publicsource and read it before using this file.␊ |
11 | * ␊ |
12 | * This Original Code and all software distributed under the License are␊ |
13 | * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER␊ |
14 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,␊ |
15 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,␊ |
16 | * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the␊ |
17 | * License for the specific language governing rights and limitations␊ |
18 | * under the License.␊ |
19 | * ␊ |
20 | * @APPLE_LICENSE_HEADER_END@␊ |
21 | */␊ |
22 | /*␊ |
23 | * HFSCompare.c - Functions for working with and comparing HFS nams.␊ |
24 | *␊ |
25 | * Copyright (c) 1999-2000 Apple Computer, Inc.␊ |
26 | *␊ |
27 | * DRI: Josh de Cesare␊ |
28 | */␊ |
29 | ␊ |
30 | #include <sl.h>␊ |
31 | #include "hfs_CaseTables.h"␊ |
32 | ␊ |
33 | #if ! UNCOMPRESSED␊ |
34 | ␊ |
35 | static unsigned short *␊ |
36 | UncompressStructure(struct compressed_block *bp, int count, int size)␊ |
37 | {␊ |
38 | unsigned short *out = malloc(size);␊ |
39 | unsigned short *op = out;␊ |
40 | unsigned short data;␊ |
41 | int i, j;␊ |
42 | ␊ |
43 | for (i=0; i<count; i++, bp++) {␊ |
44 | // If this happens (it shouldn't) please fix size and/or double check that count really is␊ |
45 | // the number of elements in the array.␊ |
46 | // This was a very hard bug to find, so please leave this code here.␊ |
47 | if(out + size <= op + bp->count)␊ |
48 | {␊ |
49 | stop("HFS+ Unicode tables are malformed\n");␊ |
50 | }␊ |
51 | data = bp->data;␊ |
52 | for (j=0; j<bp->count; j++) {␊ |
53 | *op++ = data;␊ |
54 | if (bp->type == kTypeAscending) data++;␊ |
55 | else if (bp->type == kTypeAscending256) data += 256;␊ |
56 | }␊ |
57 | }␊ |
58 | return out;␊ |
59 | }␊ |
60 | ␊ |
61 | static void␊ |
62 | InitCompareTables(void)␊ |
63 | {␊ |
64 | if (gCompareTable == 0) {␊ |
65 | gCompareTable = UncompressStructure(gCompareTableCompressed,␊ |
66 | kCompareTableNBlocks, kCompareTableDataSize);␊ |
67 | gLowerCaseTable = UncompressStructure(gLowerCaseTableCompressed,␊ |
68 | kLowerCaseTableNBlocks, kLowerCaseTableDataSize);␊ |
69 | }␊ |
70 | }␊ |
71 | ␊ |
72 | #endif /* ! UNCOMPRESSED */␊ |
73 | ␊ |
74 | //_______________________________________________________________________␊ |
75 | //␊ |
76 | //␉Routine:␉FastRelString␊ |
77 | //␊ |
78 | //␉Output:␉␉returns -1 if str1 < str2␊ |
79 | //␉␉␉␉returns 1 if str1 > str2␊ |
80 | //␉␉␉␉return␉ 0 if equal␊ |
81 | //␊ |
82 | //_______________________________________________________________________␊ |
83 | ␊ |
84 | int32_t␉FastRelString(u_int8_t * str1, u_int8_t * str2)␊ |
85 | {␊ |
86 | ␉int32_t bestGuess;␊ |
87 | ␉u_int8_t length, length2;␊ |
88 | ␊ |
89 | #if ! UNCOMPRESED␊ |
90 | InitCompareTables();␊ |
91 | #endif␊ |
92 | ␊ |
93 | ␉length = *(str1++);␊ |
94 | ␉length2 = *(str2++);␊ |
95 | ␊ |
96 | ␉if (length == length2)␊ |
97 | ␉␉bestGuess = 0;␊ |
98 | ␉else if (length < length2)␊ |
99 | ␉␉bestGuess = -1;␊ |
100 | ␉else␊ |
101 | ␉{␊ |
102 | ␉␉bestGuess = 1;␊ |
103 | ␉␉length = length2;␊ |
104 | ␉}␊ |
105 | ␊ |
106 | ␉while (length--)␊ |
107 | ␉{␊ |
108 | ␉␉u_int32_t aChar, bChar;␊ |
109 | ␊ |
110 | ␉␉aChar = *(str1++);␊ |
111 | ␉␉bChar = *(str2++);␊ |
112 | ␉␉␊ |
113 | ␉␉if (aChar != bChar)␉/* If they don't match exacly, do case conversion */␊ |
114 | ␉␉{␊ |
115 | ␉␉␉u_int16_t aSortWord, bSortWord;␊ |
116 | ␊ |
117 | ␉␉␉aSortWord = gCompareTable[aChar];␊ |
118 | ␉␉␉bSortWord = gCompareTable[bChar];␊ |
119 | ␊ |
120 | ␉␉␉if (aSortWord > bSortWord)␊ |
121 | ␉␉␉␉return 1;␊ |
122 | ␊ |
123 | ␉␉␉if (aSortWord < bSortWord)␊ |
124 | ␉␉␉␉return -1;␊ |
125 | ␉␉}␊ |
126 | ␉␉␊ |
127 | ␉␉/*␊ |
128 | ␉␉ * If characters match exactly, then go on to next character␊ |
129 | ␉␉ * immediately without doing any extra work.␊ |
130 | ␉␉ */␊ |
131 | ␉}␊ |
132 | ␉␊ |
133 | ␉/* if you got to here, then return bestGuess */␊ |
134 | ␉return bestGuess;␊ |
135 | }␉␊ |
136 | ␊ |
137 | ␊ |
138 | //␊ |
139 | //␉FastUnicodeCompare - Compare two Unicode strings; produce a relative ordering␊ |
140 | //␊ |
141 | //␉ IF␉␉␉␉RESULT␊ |
142 | //␉--------------------------␊ |
143 | //␉str1 < str2␉␉=>␉-1␊ |
144 | //␉str1 = str2␉␉=>␉ 0␊ |
145 | //␉str1 > str2␉␉=>␉+1␊ |
146 | //␊ |
147 | //␉The lower case table starts with 256 entries (one for each of the upper bytes␊ |
148 | //␉of the original Unicode char). If that entry is zero, then all characters with␊ |
149 | //␉that upper byte are already case folded. If the entry is non-zero, then it is␊ |
150 | //␉the _index_ (not byte offset) of the start of the sub-table for the characters␊ |
151 | //␉with that upper byte. All ignorable characters are folded to the value zero.␊ |
152 | //␊ |
153 | //␉In pseudocode:␊ |
154 | //␊ |
155 | //␉␉Let c = source Unicode character␊ |
156 | //␉␉Let table[] = lower case table␊ |
157 | //␊ |
158 | //␉␉lower = table[highbyte(c)]␊ |
159 | //␉␉if (lower == 0)␊ |
160 | //␉␉␉lower = c␊ |
161 | //␉␉else␊ |
162 | //␉␉␉lower = table[lower+lowbyte(c)]␊ |
163 | //␊ |
164 | //␉␉if (lower == 0)␊ |
165 | //␉␉␉ignore this character␊ |
166 | //␊ |
167 | //␉To handle ignorable characters, we now need a loop to find the next valid character.␊ |
168 | //␉Also, we can't pre-compute the number of characters to compare; the string length might␊ |
169 | //␉be larger than the number of non-ignorable characters. Further, we must be able to handle␊ |
170 | //␉ignorable characters at any point in the string, including as the first or last characters.␊ |
171 | //␉We use a zero value as a sentinel to detect both end-of-string and ignorable characters.␊ |
172 | //␉Since the File Manager doesn't prevent the NUL character (value zero) as part of a filename,␊ |
173 | //␉the case mapping table is assumed to map u+0000 to some non-zero value (like 0xFFFF, which is␊ |
174 | //␉an invalid Unicode character).␊ |
175 | //␊ |
176 | //␉Pseudocode:␊ |
177 | //␊ |
178 | //␉␉while (1) {␊ |
179 | //␉␉␉c1 = GetNextValidChar(str1)␉␉␉//␉returns zero if at end of string␊ |
180 | //␉␉␉c2 = GetNextValidChar(str2)␊ |
181 | //␊ |
182 | //␉␉␉if (c1 != c2) break␉␉␉␉␉//␉found a difference␊ |
183 | //␊ |
184 | //␉␉␉if (c1 == 0)␉␉␉␉␉␉//␉reached end of string on both strings at once?␊ |
185 | //␉␉␉␉return 0;␉␉␉␉␉␉//␉yes, so strings are equal␊ |
186 | //␉␉}␊ |
187 | //␊ |
188 | //␉␉// When we get here, c1 != c2. So, we just need to determine which one is less.␊ |
189 | //␉␉if (c1 < c2)␊ |
190 | //␉␉␉return -1;␊ |
191 | //␉␉else␊ |
192 | //␉␉␉return 1;␊ |
193 | //␊ |
194 | ␊ |
195 | int32_t FastUnicodeCompare( u_int16_t * str1, register u_int32_t length1,␊ |
196 | u_int16_t * str2, register u_int32_t length2, int byte_order )␊ |
197 | {␊ |
198 | ␉register u_int16_t c1,c2;␊ |
199 | ␉register u_int16_t temp;␊ |
200 | ␊ |
201 | #if ! UNCOMPRESSED␊ |
202 | InitCompareTables();␊ |
203 | #endif␊ |
204 | ␊ |
205 | ␉while (1) {␊ |
206 | ␉␉/* Set default values for c1, c2 in case there are no more valid chars */␊ |
207 | ␉␉c1 = 0;␊ |
208 | ␉␉c2 = 0;␊ |
209 | ␊ |
210 | ␉␉/* Find next non-ignorable char from str1, or zero if no more */␊ |
211 | ␉␉while (length1 && c1 == 0) {␊ |
212 | ␉␉␉if (byte_order == OSBigEndian)␊ |
213 | ␉␉␉␉c1 = SWAP_BE16(*(str1++));␊ |
214 | ␉␉␉else␊ |
215 | ␉␉␉␉c1 = SWAP_LE16(*(str1++));␊ |
216 | ␉␉␉--length1;␊ |
217 | ␉␉␉if ((temp = gLowerCaseTable[c1>>8]) != 0)␉␉// is there a subtable for this upper byte?␊ |
218 | ␉␉␉␉c1 = gLowerCaseTable[temp + (c1 & 0x00FF)];␉// yes, so fold the char␊ |
219 | ␉␉}␊ |
220 | ␊ |
221 | ␉␉/* Find next non-ignorable char from str2, or zero if no more */␊ |
222 | ␉␉while (length2 && c2 == 0) {␊ |
223 | ␉␉␉if (byte_order == OSBigEndian)␊ |
224 | ␉␉␉␉c2 = SWAP_BE16(*(str2++));␊ |
225 | ␉␉␉else␊ |
226 | ␉␉␉␉c2 = SWAP_LE16(*(str2++));␊ |
227 | ␉␉␉--length2;␊ |
228 | ␉␉␉if ((temp = gLowerCaseTable[c2>>8]) != 0)␉␉// is there a subtable for this upper byte?␊ |
229 | ␉␉␉␉c2 = gLowerCaseTable[temp + (c2 & 0x00FF)];␉// yes, so fold the char␊ |
230 | ␉␉}␊ |
231 | ␊ |
232 | ␉␉if (c1 != c2)␉/* found a difference, so stop looping */␊ |
233 | ␉␉␉break;␊ |
234 | ␉␉␊ |
235 | ␉␉if (c1 == 0)␉␉/* did we reach the end of both strings at the same time? */␊ |
236 | ␉␉␉return 0;␉/* yes, so strings are equal */␊ |
237 | ␉}␊ |
238 | ␉␊ |
239 | ␉if (c1 < c2)␊ |
240 | ␉␉return -1;␊ |
241 | ␉else␊ |
242 | ␉␉return 1;␊ |
243 | }␊ |
244 | ␊ |
245 | ␊ |
246 | //␊ |
247 | // BinaryUnicodeCompare - Compare two Unicode strings; produce a relative ordering␊ |
248 | // Compared using a 16-bit binary comparison (no case folding)␊ |
249 | //␊ |
250 | int32_t BinaryUnicodeCompare (u_int16_t * str1, u_int32_t length1,␊ |
251 | u_int16_t * str2, u_int32_t length2)␊ |
252 | {␊ |
253 | register u_int16_t c1, c2;␊ |
254 | int32_t bestGuess;␊ |
255 | u_int32_t length;␊ |
256 | ␊ |
257 | bestGuess = 0;␊ |
258 | ␊ |
259 | if (length1 < length2) {␊ |
260 | length = length1;␊ |
261 | --bestGuess;␊ |
262 | } else if (length1 > length2) {␊ |
263 | length = length2;␊ |
264 | ++bestGuess;␊ |
265 | } else {␊ |
266 | length = length1;␊ |
267 | }␊ |
268 | ␊ |
269 | while (length--) {␊ |
270 | c1 = *(str1++);␊ |
271 | c2 = *(str2++);␊ |
272 | ␊ |
273 | if (c1 > c2)␊ |
274 | return (1);␊ |
275 | if (c1 < c2)␊ |
276 | return (-1);␊ |
277 | }␊ |
278 | ␊ |
279 | return (bestGuess);␊ |
280 | }␊ |
281 | ␊ |
282 | ␊ |
283 | /*␊ |
284 | * UTF-8 (UCS Transformation Format)␊ |
285 | *␊ |
286 | * The following subset of UTF-8 is used to encode UCS-2 filenames. It␊ |
287 | * requires a maximum of three 3 bytes per UCS-2 character. Only the␊ |
288 | * shortest encoding required to represent the significant UCS-2 bits␊ |
289 | * is legal.␊ |
290 | * ␊ |
291 | * UTF-8 Multibyte Codes␊ |
292 | *␊ |
293 | * Bytes Bits UCS-2 Min UCS-2 Max UTF-8 Byte Sequence (binary)␊ |
294 | * -------------------------------------------------------------------␊ |
295 | * 1 7 0x0000 0x007F 0xxxxxxx␊ |
296 | * 2 11 0x0080 0x07FF 110xxxxx 10xxxxxx␊ |
297 | * 3 16 0x0800 0xFFFF 1110xxxx 10xxxxxx 10xxxxxx␊ |
298 | * -------------------------------------------------------------------␊ |
299 | */␊ |
300 | ␊ |
301 | ␊ |
302 | /*␊ |
303 | * utf_encodestr - Encodes the UCS-2 (Unicode) string at ucsp into a␊ |
304 | * null terminated UTF-8 string at utf8p.␊ |
305 | *␊ |
306 | * ucslen is the number of UCS-2 input characters (not bytes)␊ |
307 | * bufsize is the size of the output buffer in bytes␊ |
308 | */␊ |
309 | void␊ |
310 | utf_encodestr( const u_int16_t * ucsp, int ucslen,␊ |
311 | u_int8_t * utf8p, u_int32_t bufsize, int byte_order )␊ |
312 | {␊ |
313 | ␉u_int8_t *bufend;␊ |
314 | ␉u_int16_t ucs_ch;␊ |
315 | ␊ |
316 | ␉bufend = utf8p + bufsize;␊ |
317 | ␊ |
318 | ␉while (ucslen-- > 0) {␊ |
319 | if (byte_order == OSBigEndian)␊ |
320 | ␉␉ ucs_ch = SWAP_BE16(*ucsp++);␊ |
321 | else␊ |
322 | ucs_ch = SWAP_LE16(*ucsp++);␊ |
323 | ␊ |
324 | ␉␉if (ucs_ch < 0x0080) {␊ |
325 | ␉␉␉if (utf8p >= bufend)␊ |
326 | ␉␉␉␉break;␊ |
327 | ␉␉␉if (ucs_ch == '\0')␊ |
328 | ␉␉␉␉continue;␉/* skip over embedded NULLs */␊ |
329 | ␉␉␉*utf8p++ = ucs_ch;␊ |
330 | ␊ |
331 | ␉␉} else if (ucs_ch < 0x800) {␊ |
332 | ␉␉␉if ((utf8p + 1) >= bufend)␊ |
333 | ␉␉␉␉break;␊ |
334 | ␉␉␉*utf8p++ = (ucs_ch >> 6) | 0xc0;␊ |
335 | ␉␉␉*utf8p++ = (ucs_ch & 0x3f) | 0x80;␊ |
336 | ␊ |
337 | ␉␉} else {␊ |
338 | ␉␉␉if ((utf8p + 2) >= bufend)␊ |
339 | ␉␉␉␉break;␊ |
340 | ␉␉␉*utf8p++ = (ucs_ch >> 12) | 0xe0;␊ |
341 | ␉␉␉*utf8p++ = ((ucs_ch >> 6) & 0x3f) | 0x80;␊ |
342 | ␉␉␉*utf8p++ = ((ucs_ch) & 0x3f) | 0x80;␊ |
343 | ␉␉}␊ |
344 | ␉}␊ |
345 | ␊ |
346 | ␉*utf8p = '\0';␊ |
347 | }␊ |
348 | ␊ |
349 | ␊ |
350 | /*␊ |
351 | * utf_decodestr - Decodes the null terminated UTF-8 string at␊ |
352 | * utf8p into a UCS-2 (Unicode) string at ucsp.␊ |
353 | *␊ |
354 | * ucslen is the number of UCS-2 output characters (not bytes)␊ |
355 | * bufsize is the size of the output buffer in bytes␊ |
356 | */␊ |
357 | void utf_decodestr(const u_int8_t * utf8p, u_int16_t * ucsp, u_int16_t * ucslen, u_int32_t bufsize, int byte_order)␊ |
358 | {␊ |
359 | ␉u_int16_t *bufstart;␊ |
360 | ␉u_int16_t *bufend;␊ |
361 | ␉u_int16_t ucs_ch;␊ |
362 | ␉u_int8_t byte;␊ |
363 | ␊ |
364 | ␉bufstart = ucsp;␊ |
365 | ␉bufend = (u_int16_t *)((u_int8_t *)ucsp + bufsize);␊ |
366 | ␊ |
367 | ␉while ((byte = *utf8p++) != '\0') {␊ |
368 | ␉␉if (ucsp >= bufend)␊ |
369 | ␉␉␉break;␊ |
370 | ␊ |
371 | ␉␉/* check for ascii */␊ |
372 | ␉␉if (byte < 0x80) {␊ |
373 | ␉␉␉ucs_ch = byte;␊ |
374 | ␉␉␉␊ |
375 | if (byte_order == OSBigEndian)␊ |
376 | ␉␉ *ucsp++ = SWAP_BE16(ucs_ch);␊ |
377 | else␊ |
378 | *ucsp++ = SWAP_LE16(ucs_ch);␊ |
379 | ␉␉␉␊ |
380 | ␉␉␉continue;␊ |
381 | ␉␉}␊ |
382 | ␊ |
383 | ␉␉switch (byte & 0xf0) {␊ |
384 | ␉␉/* 2 byte sequence*/␊ |
385 | ␉␉case 0xc0:␊ |
386 | ␉␉case 0xd0:␊ |
387 | ␉␉␉/* extract bits 6 - 10 from first byte */␊ |
388 | ␉␉␉ucs_ch = (byte & 0x1F) << 6; ␊ |
389 | ␉␉␉break;␊ |
390 | ␉␉/* 3 byte sequence*/␊ |
391 | ␉␉case 0xe0:␊ |
392 | ␉␉␉/* extract bits 12 - 15 from first byte */␊ |
393 | ␉␉␉ucs_ch = (byte & 0x0F) << 6;␊ |
394 | ␊ |
395 | ␉␉␉/* extract bits 6 - 11 from second byte */␊ |
396 | ␉␉␉if (((byte = *utf8p++) & 0xc0) != 0x80)␊ |
397 | ␉␉␉␉goto stop;␊ |
398 | ␊ |
399 | ␉␉␉ucs_ch += (byte & 0x3F);␊ |
400 | ␉␉␉ucs_ch <<= 6;␊ |
401 | ␉␉␉break;␊ |
402 | ␉␉default:␊ |
403 | ␉␉␉goto stop;␊ |
404 | ␉␉}␊ |
405 | ␊ |
406 | ␉␉/* extract bits 0 - 5 from final byte */␊ |
407 | ␉␉if (((byte = *utf8p++) & 0xc0) != 0x80)␊ |
408 | ␉␉␉goto stop;␊ |
409 | ␉␉ucs_ch += (byte & 0x3F); ␊ |
410 | ␊ |
411 | if (byte_order == OSBigEndian)␊ |
412 | *ucsp++ = SWAP_BE16(ucs_ch);␊ |
413 | else␊ |
414 | *ucsp++ = SWAP_LE16(ucs_ch);␊ |
415 | ␉}␊ |
416 | stop:␊ |
417 | if (byte_order == OSBigEndian)␊ |
418 | *ucslen = SWAP_BE16(ucsp - bufstart);␊ |
419 | else␊ |
420 | *ucslen = SWAP_LE16(ucsp - bufstart);␊ |
421 | }␊ |
422 |