Chameleon

Chameleon Svn Source Tree

Root/branches/cparm/i386/libsaio/hfs_compare.c

1/*
2 * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 2.0 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22/*
23 * HFSCompare.c - Functions for working with and comparing HFS nams.
24 *
25 * Copyright (c) 1999-2000 Apple Computer, Inc.
26 *
27 * DRI: Josh de Cesare
28 */
29
30#include "sl.h"
31#include "hfs_CaseTables.h"
32#include "hfs.h"
33
34#if ! UNCOMPRESSED
35
36static unsigned short *
37UncompressStructure(struct compressed_block *bp, int count, int size)
38{
39 unsigned short *out = malloc(size);
40 unsigned short *op = out;
41 unsigned short data;
42 int i, j;
43
44 for (i=0; i<count; i++, bp++) {
45 // If this happens (it shouldn't) please fix size and/or double check that count really is
46 // the number of elements in the array.
47 // This was a very hard bug to find, so please leave this code here.
48 if(out + size <= op + bp->count)
49 {
50 stop("HFS+ Unicode tables are malformed\n");
51 return 0;
52 }
53 data = bp->data;
54 for (j=0; j<bp->count; j++) {
55 *op++ = data;
56 if (bp->type == kTypeAscending) data++;
57 else if (bp->type == kTypeAscending256) data += 256;
58 }
59 }
60 return out;
61}
62
63static void
64InitCompareTables(void)
65{
66 if (gCompareTable == 0) {
67 gCompareTable = UncompressStructure(gCompareTableCompressed,
68 kCompareTableNBlocks, kCompareTableDataSize);
69 gLowerCaseTable = UncompressStructure(gLowerCaseTableCompressed,
70 kLowerCaseTableNBlocks, kLowerCaseTableDataSize);
71 }
72}
73
74#endif /* ! UNCOMPRESSED */
75
76//_______________________________________________________________________
77//
78//Routine:FastRelString
79//
80//Output:returns -1 if str1 < str2
81//returns 1 if str1 > str2
82//return 0 if equal
83//
84//_______________________________________________________________________
85
86int32_tFastRelString(u_int8_t * str1, u_int8_t * str2)
87{
88int32_t bestGuess;
89u_int8_t length, length2;
90
91#if ! UNCOMPRESED
92 InitCompareTables();
93#endif
94
95length = *(str1++);
96length2 = *(str2++);
97
98if (length == length2)
99bestGuess = 0;
100else if (length < length2)
101bestGuess = -1;
102else
103{
104bestGuess = 1;
105length = length2;
106}
107
108while (length--)
109{
110u_int32_t aChar, bChar;
111
112aChar = *(str1++);
113bChar = *(str2++);
114
115if (aChar != bChar)/* If they don't match exacly, do case conversion */
116{
117u_int16_t aSortWord, bSortWord;
118
119aSortWord = gCompareTable[aChar];
120bSortWord = gCompareTable[bChar];
121
122if (aSortWord > bSortWord)
123return 1;
124
125if (aSortWord < bSortWord)
126return -1;
127}
128
129/*
130 * If characters match exactly, then go on to next character
131 * immediately without doing any extra work.
132 */
133}
134
135/* if you got to here, then return bestGuess */
136return bestGuess;
137}
138
139
140//
141//FastUnicodeCompare - Compare two Unicode strings; produce a relative ordering
142//
143// IFRESULT
144//--------------------------
145//str1 < str2=>-1
146//str1 = str2=> 0
147//str1 > str2=>+1
148//
149//The lower case table starts with 256 entries (one for each of the upper bytes
150//of the original Unicode char). If that entry is zero, then all characters with
151//that upper byte are already case folded. If the entry is non-zero, then it is
152//the _index_ (not byte offset) of the start of the sub-table for the characters
153//with that upper byte. All ignorable characters are folded to the value zero.
154//
155//In pseudocode:
156//
157//Let c = source Unicode character
158//Let table[] = lower case table
159//
160//lower = table[highbyte(c)]
161//if (lower == 0)
162//lower = c
163//else
164//lower = table[lower+lowbyte(c)]
165//
166//if (lower == 0)
167//ignore this character
168//
169//To handle ignorable characters, we now need a loop to find the next valid character.
170//Also, we can't pre-compute the number of characters to compare; the string length might
171//be larger than the number of non-ignorable characters. Further, we must be able to handle
172//ignorable characters at any point in the string, including as the first or last characters.
173//We use a zero value as a sentinel to detect both end-of-string and ignorable characters.
174//Since the File Manager doesn't prevent the NUL character (value zero) as part of a filename,
175//the case mapping table is assumed to map u+0000 to some non-zero value (like 0xFFFF, which is
176//an invalid Unicode character).
177//
178//Pseudocode:
179//
180//while (1) {
181//c1 = GetNextValidChar(str1)//returns zero if at end of string
182//c2 = GetNextValidChar(str2)
183//
184//if (c1 != c2) break//found a difference
185//
186//if (c1 == 0)//reached end of string on both strings at once?
187//return 0;//yes, so strings are equal
188//}
189//
190//// When we get here, c1 != c2. So, we just need to determine which one is less.
191//if (c1 < c2)
192//return -1;
193//else
194//return 1;
195//
196
197int32_t FastUnicodeCompare( u_int16_t * str1, register u_int32_t length1,
198 u_int16_t * str2, register u_int32_t length2, int byte_order )
199{
200register u_int16_t c1,c2;
201register u_int16_t temp;
202
203#if ! UNCOMPRESSED
204 InitCompareTables();
205#endif
206
207while (1) {
208/* Set default values for c1, c2 in case there are no more valid chars */
209c1 = 0;
210c2 = 0;
211
212/* Find next non-ignorable char from str1, or zero if no more */
213while (length1 && c1 == 0) {
214if (byte_order == OSBigEndian)
215c1 = SWAP_BE16(*(str1++));
216else
217c1 = SWAP_LE16(*(str1++));
218--length1;
219if ((temp = gLowerCaseTable[c1>>8]) != 0)// is there a subtable for this upper byte?
220c1 = gLowerCaseTable[temp + (c1 & 0x00FF)];// yes, so fold the char
221}
222
223/* Find next non-ignorable char from str2, or zero if no more */
224while (length2 && c2 == 0) {
225if (byte_order == OSBigEndian)
226c2 = SWAP_BE16(*(str2++));
227else
228c2 = SWAP_LE16(*(str2++));
229--length2;
230if ((temp = gLowerCaseTable[c2>>8]) != 0)// is there a subtable for this upper byte?
231c2 = gLowerCaseTable[temp + (c2 & 0x00FF)];// yes, so fold the char
232}
233
234if (c1 != c2)/* found a difference, so stop looping */
235break;
236
237if (c1 == 0)/* did we reach the end of both strings at the same time? */
238return 0;/* yes, so strings are equal */
239}
240
241if (c1 < c2)
242return -1;
243else
244return 1;
245}
246
247
248//
249// BinaryUnicodeCompare - Compare two Unicode strings; produce a relative ordering
250// Compared using a 16-bit binary comparison (no case folding)
251//
252int32_t BinaryUnicodeCompare (u_int16_t * str1, u_int32_t length1,
253 u_int16_t * str2, u_int32_t length2)
254{
255 register u_int16_t c1, c2;
256 int32_t bestGuess;
257 u_int32_t length;
258
259 bestGuess = 0;
260
261 if (length1 < length2) {
262 length = length1;
263 --bestGuess;
264 } else if (length1 > length2) {
265 length = length2;
266 ++bestGuess;
267 } else {
268 length = length1;
269 }
270
271 while (length--) {
272 c1 = *(str1++);
273 c2 = *(str2++);
274
275 if (c1 > c2)
276 return (1);
277 if (c1 < c2)
278 return (-1);
279 }
280
281 return (bestGuess);
282}
283
284
285/*
286 * UTF-8 (UCS Transformation Format)
287 *
288 * The following subset of UTF-8 is used to encode UCS-2 filenames. It
289 * requires a maximum of three 3 bytes per UCS-2 character. Only the
290 * shortest encoding required to represent the significant UCS-2 bits
291 * is legal.
292 *
293 * UTF-8 Multibyte Codes
294 *
295 * Bytes Bits UCS-2 Min UCS-2 Max UTF-8 Byte Sequence (binary)
296 * -------------------------------------------------------------------
297 * 1 7 0x0000 0x007F 0xxxxxxx
298 * 2 11 0x0080 0x07FF 110xxxxx 10xxxxxx
299 * 3 16 0x0800 0xFFFF 1110xxxx 10xxxxxx 10xxxxxx
300 * -------------------------------------------------------------------
301 */
302
303
304/*
305 * utf_encodestr - Encodes the UCS-2 (Unicode) string at ucsp into a
306 * null terminated UTF-8 string at utf8p.
307 *
308 * ucslen is the number of UCS-2 input characters (not bytes)
309 * bufsize is the size of the output buffer in bytes
310 */
311void
312utf_encodestr( const u_int16_t * ucsp, int ucslen,
313 u_int8_t * utf8p, u_int32_t bufsize, int byte_order )
314{
315u_int8_t *bufend;
316u_int16_t ucs_ch;
317
318bufend = utf8p + bufsize;
319
320while (ucslen-- > 0) {
321 if (byte_order == OSBigEndian)
322 ucs_ch = SWAP_BE16(*ucsp++);
323 else
324 ucs_ch = SWAP_LE16(*ucsp++);
325
326if (ucs_ch < 0x0080) {
327if (utf8p >= bufend)
328break;
329if (ucs_ch == '\0')
330continue;/* skip over embedded NULLs */
331*utf8p++ = ucs_ch;
332
333} else if (ucs_ch < 0x800) {
334if ((utf8p + 1) >= bufend)
335break;
336*utf8p++ = (ucs_ch >> 6) | 0xc0;
337*utf8p++ = (ucs_ch & 0x3f) | 0x80;
338
339} else {
340if ((utf8p + 2) >= bufend)
341break;
342*utf8p++ = (ucs_ch >> 12) | 0xe0;
343*utf8p++ = ((ucs_ch >> 6) & 0x3f) | 0x80;
344*utf8p++ = ((ucs_ch) & 0x3f) | 0x80;
345}
346}
347
348*utf8p = '\0';
349}
350
351
352/*
353 * utf_decodestr - Decodes the null terminated UTF-8 string at
354 * utf8p into a UCS-2 (Unicode) string at ucsp.
355 *
356 * ucslen is the number of UCS-2 output characters (not bytes)
357 * bufsize is the size of the output buffer in bytes
358 */
359void utf_decodestr(const u_int8_t * utf8p, u_int16_t * ucsp, u_int16_t * ucslen, u_int32_t bufsize, int byte_order)
360{
361u_int16_t *bufstart;
362u_int16_t *bufend;
363u_int16_t ucs_ch;
364u_int8_t byte;
365
366bufstart = ucsp;
367bufend = (u_int16_t *)((u_int8_t *)ucsp + bufsize);
368
369while ((byte = *utf8p++) != '\0') {
370if (ucsp >= bufend)
371break;
372
373/* check for ascii */
374if (byte < 0x80) {
375ucs_ch = byte;
376
377 if (byte_order == OSBigEndian)
378 *ucsp++ = SWAP_BE16(ucs_ch);
379 else
380 *ucsp++ = SWAP_LE16(ucs_ch);
381
382continue;
383}
384
385switch (byte & 0xf0) {
386/* 2 byte sequence*/
387case 0xc0:
388case 0xd0:
389/* extract bits 6 - 10 from first byte */
390ucs_ch = (byte & 0x1F) << 6;
391break;
392/* 3 byte sequence*/
393case 0xe0:
394/* extract bits 12 - 15 from first byte */
395ucs_ch = (byte & 0x0F) << 6;
396
397/* extract bits 6 - 11 from second byte */
398if (((byte = *utf8p++) & 0xc0) != 0x80)
399goto stop;
400
401ucs_ch += (byte & 0x3F);
402ucs_ch <<= 6;
403break;
404default:
405goto stop;
406}
407
408/* extract bits 0 - 5 from final byte */
409if (((byte = *utf8p++) & 0xc0) != 0x80)
410goto stop;
411ucs_ch += (byte & 0x3F);
412
413 if (byte_order == OSBigEndian)
414 *ucsp++ = SWAP_BE16(ucs_ch);
415 else
416 *ucsp++ = SWAP_LE16(ucs_ch);
417}
418stop:
419 if (byte_order == OSBigEndian)
420 *ucslen = SWAP_BE16(ucsp - bufstart);
421 else
422 *ucslen = SWAP_LE16(ucsp - bufstart);
423}
424

Archive Download this file

Revision: 2118