Chameleon

Chameleon Svn Source Tree

Root/tags/2.3/i386/libsaio/hfs_compare.c

Source at commit 2862 created 7 years 26 days ago.
By ifabio, Tag 2.3 release, bump svn to 2.4
1/*
2 * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 2.0 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 *
22 * HFSCompare.c - Functions for working with and comparing HFS nams.
23 *
24 * Copyright (c) 1999-2000 Apple Computer, Inc.
25 *
26 * DRI: Josh de Cesare
27 */
28
29#include <sl.h>
30#include "hfs_CaseTables.h"
31
32#if ! UNCOMPRESSED
33
34static unsigned short *UncompressStructure(struct compressed_block *bp, int count, int size)
35{
36int i, j;
37
38unsigned short *out = malloc(size);
39
40if (out)
41{
42unsigned short *op = out;
43unsigned short data;
44
45for (i = 0; i < count; i++, bp++)
46{
47// If this happens (it shouldn't) please fix size and/or double check that count really is
48// the number of elements in the array.
49 // This was a very hard bug to find, so please leave this code here.
50if(out + size <= op + bp->count)
51{
52stop("HFS+ Unicode tables are malformed\n");
53}
54
55data = bp->data;
56
57for (j = 0; j < bp->count; j++)
58{
59*op++ = data;
60
61if (bp->type == kTypeAscending)
62{
63data++;
64}
65else if (bp->type == kTypeAscending256)
66{
67data += 256;
68}
69}
70}
71
72return out;
73}
74
75return NULL;
76}
77
78static void InitCompareTables(void)
79{
80if (gCompareTable == 0)
81{
82gCompareTable = UncompressStructure(gCompareTableCompressed,
83 kCompareTableNBlocks, kCompareTableDataSize);
84gLowerCaseTable = UncompressStructure(gLowerCaseTableCompressed,
85 kLowerCaseTableNBlocks, kLowerCaseTableDataSize);
86}
87}
88
89#endif /* ! UNCOMPRESSED */
90
91//
92//
93//Routine:FastRelString
94//
95//Output:returns -1 if str1 < str2
96//returns 1 if str1 > str2
97//return 0 if equal
98//
99//
100
101int32_tFastRelString(u_int8_t * str1, u_int8_t * str2)
102{
103int32_t bestGuess;
104u_int8_t length, length2;
105
106#if ! UNCOMPRESED
107 InitCompareTables();
108#endif
109
110length = *(str1++);
111length2 = *(str2++);
112
113if (length == length2)
114{
115bestGuess = 0;
116}
117else if (length < length2)
118{
119bestGuess = -1;
120}
121else
122{
123bestGuess = 1;
124length = length2;
125}
126
127while (length--)
128{
129u_int32_t aChar, bChar;
130
131aChar = *(str1++);
132bChar = *(str2++);
133
134if (aChar != bChar)/* If they don't match exacly, do case conversion */
135{
136u_int16_t aSortWord, bSortWord;
137
138aSortWord = gCompareTable[aChar];
139bSortWord = gCompareTable[bChar];
140
141if (aSortWord > bSortWord)
142{
143return 1;
144}
145
146if (aSortWord < bSortWord)
147{
148return -1;
149}
150}
151
152/*
153 * If characters match exactly, then go on to next character
154 * immediately without doing any extra work.
155 */
156}
157
158/* If you got to here, then return bestGuess */
159return bestGuess;
160}
161
162
163//
164//FastUnicodeCompare - Compare two Unicode strings; produce a relative ordering
165//
166// IFRESULT
167//--------------------------
168//str1 < str2=>-1
169//str1 = str2=> 0
170//str1 > str2=>+1
171//
172//The lower case table starts with 256 entries (one for each of the upper bytes
173//of the original Unicode char). If that entry is zero, then all characters with
174//that upper byte are already case folded. If the entry is non-zero, then it is
175//the _index_ (not byte offset) of the start of the sub-table for the characters
176//with that upper byte. All ignorable characters are folded to the value zero.
177//
178//In pseudocode:
179//
180//Let c = source Unicode character
181//Let table[] = lower case table
182//
183//lower = table[highbyte(c)]
184//if (lower == 0)
185//lower = c
186//else
187//lower = table[lower+lowbyte(c)]
188//
189//if (lower == 0)
190//ignore this character
191//
192//To handle ignorable characters, we now need a loop to find the next valid character.
193//Also, we can't pre-compute the number of characters to compare; the string length might
194//be larger than the number of non-ignorable characters. Further, we must be able to handle
195//ignorable characters at any point in the string, including as the first or last characters.
196//We use a zero value as a sentinel to detect both end-of-string and ignorable characters.
197//Since the File Manager doesn't prevent the NUL character (value zero) as part of a filename,
198//the case mapping table is assumed to map u+0000 to some non-zero value (like 0xFFFF, which is
199//an invalid Unicode character).
200//
201//Pseudocode:
202//
203//while (1) {
204//c1 = GetNextValidChar(str1)//returns zero if at end of string
205//c2 = GetNextValidChar(str2)
206//
207//if (c1 != c2) break//found a difference
208//
209//if (c1 == 0)//reached end of string on both strings at once?
210//return 0;//yes, so strings are equal
211//}
212//
213//// When we get here, c1 != c2. So, we just need to determine which one is less.
214//if (c1 < c2)
215//return -1;
216//else
217//return 1;
218//
219
220int32_t FastUnicodeCompare( u_int16_t * str1, register u_int32_t length1, u_int16_t * str2, register u_int32_t length2, int byte_order )
221{
222register u_int16_t c1,c2;
223register u_int16_t temp;
224
225#if ! UNCOMPRESSED
226 InitCompareTables();
227#endif
228
229while (1)
230{
231/* Set default values for c1, c2 in case there are no more valid chars */
232c1 = 0;
233c2 = 0;
234
235/* Find next non-ignorable char from str1, or zero if no more */
236while (length1 && c1 == 0)
237{
238if (byte_order == OSBigEndian)
239{
240c1 = SWAP_BE16(*(str1++));
241}
242else
243{
244c1 = SWAP_LE16(*(str1++));
245}
246
247--length1;
248if ((temp = gLowerCaseTable[c1>>8]) != 0)// is there a subtable for this upper byte?
249c1 = gLowerCaseTable[temp + (c1 & 0x00FF)];// yes, so fold the char
250}
251
252/* Find next non-ignorable char from str2, or zero if no more */
253while (length2 && c2 == 0)
254{
255if (byte_order == OSBigEndian)
256{
257c2 = SWAP_BE16(*(str2++));
258}
259else
260{
261c2 = SWAP_LE16(*(str2++));
262}
263--length2;
264if ((temp = gLowerCaseTable[c2>>8]) != 0)// Is there a subtable for this upper byte?
265{
266c2 = gLowerCaseTable[temp + (c2 & 0x00FF)];// Yes, so fold the char
267}
268}
269
270if (c1 != c2)/* Found a difference, so stop looping */
271{
272break;
273}
274
275if (c1 == 0)/* Did we reach the end of both strings at the same time? */
276{
277return 0;/* Yes, so strings are equal */
278}
279}
280
281if (c1 < c2)
282{
283return -1;
284}
285else
286{
287return 1;
288}
289}
290
291
292//
293// BinaryUnicodeCompare - Compare two Unicode strings; produce a relative ordering
294// Compared using a 16-bit binary comparison (no case folding)
295//
296int32_t BinaryUnicodeCompare(u_int16_t * str1, u_int32_t length1, u_int16_t * str2, u_int32_t length2)
297{
298register u_int16_t c1, c2;
299int32_t bestGuess = 0;
300u_int32_t length;
301
302if (length1 < length2)
303{
304length = length1;
305--bestGuess;
306}
307else if (length1 > length2)
308{
309length = length2;
310++bestGuess;
311}
312else
313{
314length = length1;
315}
316
317while (length--)
318{
319c1 = *(str1++);
320c2 = *(str2++);
321
322if (c1 > c2)
323{
324return (1);
325}
326
327if (c1 < c2)
328{
329return (-1);
330}
331}
332
333return (bestGuess);
334}
335
336
337/*
338 * UTF-8 (UCS Transformation Format)
339 *
340 * The following subset of UTF-8 is used to encode UCS-2 filenames. It
341 * requires a maximum of three 3 bytes per UCS-2 character. Only the
342 * shortest encoding required to represent the significant UCS-2 bits
343 * is legal.
344 *
345 * UTF-8 Multibyte Codes
346 *
347 * Bytes Bits UCS-2 Min UCS-2 Max UTF-8 Byte Sequence (binary)
348 * -------------------------------------------------------------------
349 * 1 7 0x0000 0x007F 0xxxxxxx
350 * 2 11 0x0080 0x07FF 110xxxxx 10xxxxxx
351 * 3 16 0x0800 0xFFFF 1110xxxx 10xxxxxx 10xxxxxx
352 * -------------------------------------------------------------------
353 */
354
355
356/*
357 * utf_encodestr - Encodes the UCS-2 (Unicode) string at ucsp into a
358 * null terminated UTF-8 string at utf8p.
359 *
360 * ucslen is the number of UCS-2 input characters (not bytes)
361 * bufsize is the size of the output buffer in bytes
362 */
363void
364utf_encodestr( const u_int16_t * ucsp, int ucslen, u_int8_t * utf8p, u_int32_t bufsize, int byte_order )
365{
366u_int8_t *bufend;
367u_int16_t ucs_ch;
368
369bufend = utf8p + bufsize;
370
371while (ucslen-- > 0)
372{
373 if (byte_order == OSBigEndian)
374{
375 ucs_ch = SWAP_BE16(*ucsp++);
376 }
377else
378{
379 ucs_ch = SWAP_LE16(*ucsp++);
380}
381
382if (ucs_ch < 0x0080)
383{
384if (utf8p >= bufend)
385{
386break;
387}
388
389if (ucs_ch == '\0')
390{
391continue;/* Skip over embedded NULLs */
392}
393
394*utf8p++ = ucs_ch;
395
396}
397else if (ucs_ch < 0x800)
398{
399if ((utf8p + 1) >= bufend)
400{
401break;
402}
403
404*utf8p++ = ((ucs_ch >> 6) | 0xc0);
405*utf8p++ = ((ucs_ch & 0x3f) | 0x80);
406}
407else
408{
409if ((utf8p + 2) >= bufend)
410{
411break;
412}
413
414*utf8p++ = ((ucs_ch >> 12) | 0xe0);
415*utf8p++ = (((ucs_ch >> 6) & 0x3f) | 0x80);
416*utf8p++ = ((ucs_ch & 0x3f) | 0x80);
417}
418}
419
420*utf8p = '\0';
421}
422
423
424/*
425 * utf_decodestr - Decodes the null terminated UTF-8 string at
426 * utf8p into a UCS-2 (Unicode) string at ucsp.
427 *
428 * ucslen is the number of UCS-2 output characters (not bytes)
429 * bufsize is the size of the output buffer in bytes
430 */
431void utf_decodestr(const u_int8_t * utf8p, u_int16_t * ucsp, u_int16_t * ucslen, u_int32_t bufsize, int byte_order)
432{
433u_int16_t *bufstart;
434u_int16_t *bufend;
435u_int16_t ucs_ch;
436u_int8_t byte;
437
438bufstart = ucsp;
439bufend = (u_int16_t *)((u_int8_t *)ucsp + bufsize);
440
441while ((byte = *utf8p++) != '\0')
442{
443if (ucsp >= bufend)
444{
445break;
446}
447
448/* Check for ASCII */
449if (byte < 0x80)
450{
451ucs_ch = byte;
452
453 if (byte_order == OSBigEndian)
454{
455*ucsp++ = SWAP_BE16(ucs_ch);
456 }
457else
458{
459*ucsp++ = SWAP_LE16(ucs_ch);
460}
461
462continue;
463}
464
465switch (byte & 0xf0)
466{
467/* 2 byte sequence */
468case 0xc0:
469case 0xd0:/* Extract bits 6 - 10 from first byte */
470ucs_ch = ((byte & 0x1F) << 6);
471break;
472/* 3 byte sequence */
473case 0xe0:
474/* Extract bits 12 - 15 from first byte */
475ucs_ch = ((byte & 0x0F) << 6);
476
477/* Extract bits 6 - 11 from second byte */
478if (((byte = *utf8p++) & 0xc0) != 0x80)
479{
480goto stop;
481}
482
483ucs_ch += (byte & 0x3F);
484ucs_ch <<= 6;
485break;
486default:
487goto stop;
488}
489
490/* Extract bits 0 - 5 from final byte */
491if (((byte = *utf8p++) & 0xc0) != 0x80)
492{
493goto stop;
494}
495
496ucs_ch += (byte & 0x3F);
497
498 if (byte_order == OSBigEndian)
499{
500*ucsp++ = SWAP_BE16(ucs_ch);
501 }
502else
503{
504*ucsp++ = SWAP_LE16(ucs_ch);
505}
506}
507stop:
508if (byte_order == OSBigEndian)
509{
510*ucslen = SWAP_BE16(ucsp - bufstart);
511}
512else
513{
514*ucslen = SWAP_LE16(ucsp - bufstart);
515}
516}
517

Archive Download this file

Revision: 2862