Chameleon

Chameleon Svn Source Tree

Root/branches/cparm/i386/libsaio/hfs_compare.c

1/*
2 * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 2.0 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22/*
23 * HFSCompare.c - Functions for working with and comparing HFS nams.
24 *
25 * Copyright (c) 1999-2000 Apple Computer, Inc.
26 *
27 * DRI: Josh de Cesare
28 */
29
30#include "sl.h"
31#include "hfs_CaseTables.h"
32#include "hfs.h"
33
34#if ! UNCOMPRESSED
35
36static unsigned short *
37UncompressStructure(struct compressed_block *bp, int count, int size)
38{
39 unsigned short *out = malloc(size);
40if (!out)
41{
42stop("UncompressStructure unable to allocate memory\n");
43return 0;
44}
45 unsigned short *op = out;
46 unsigned short data;
47 int i, j;
48
49 for (i=0; i<count; i++, bp++) {
50 // If this happens (it shouldn't) please fix size and/or double check that count really is
51 // the number of elements in the array.
52 // This was a very hard bug to find, so please leave this code here.
53 if(out + size <= op + bp->count)
54 {
55 stop("HFS+ Unicode tables are malformed\n");
56 return 0;
57 }
58 data = bp->data;
59 for (j=0; j<bp->count; j++) {
60 *op++ = data;
61 if (bp->type == kTypeAscending) data++;
62 else if (bp->type == kTypeAscending256) data += 256;
63 }
64 }
65 return out;
66}
67
68static void
69InitCompareTables(void)
70{
71 if (gCompareTable == 0) {
72 gCompareTable = UncompressStructure(gCompareTableCompressed,
73 kCompareTableNBlocks, kCompareTableDataSize);
74 gLowerCaseTable = UncompressStructure(gLowerCaseTableCompressed,
75 kLowerCaseTableNBlocks, kLowerCaseTableDataSize);
76 }
77}
78
79#endif /* ! UNCOMPRESSED */
80
81//_______________________________________________________________________
82//
83//Routine:FastRelString
84//
85//Output:returns -1 if str1 < str2
86//returns 1 if str1 > str2
87//return 0 if equal
88//
89//_______________________________________________________________________
90
91int32_tFastRelString(u_int8_t * str1, u_int8_t * str2)
92{
93int32_t bestGuess;
94u_int8_t length, length2;
95
96#if ! UNCOMPRESED
97 InitCompareTables();
98#endif
99
100length = *(str1++);
101length2 = *(str2++);
102
103if (length == length2)
104bestGuess = 0;
105else if (length < length2)
106bestGuess = -1;
107else
108{
109bestGuess = 1;
110length = length2;
111}
112
113while (length--)
114{
115u_int32_t aChar, bChar;
116
117aChar = *(str1++);
118bChar = *(str2++);
119
120if (aChar != bChar)/* If they don't match exacly, do case conversion */
121{
122u_int16_t aSortWord, bSortWord;
123
124aSortWord = gCompareTable[aChar];
125bSortWord = gCompareTable[bChar];
126
127if (aSortWord > bSortWord)
128return 1;
129
130if (aSortWord < bSortWord)
131return -1;
132}
133
134/*
135 * If characters match exactly, then go on to next character
136 * immediately without doing any extra work.
137 */
138}
139
140/* if you got to here, then return bestGuess */
141return bestGuess;
142}
143
144
145//
146//FastUnicodeCompare - Compare two Unicode strings; produce a relative ordering
147//
148// IFRESULT
149//--------------------------
150//str1 < str2=>-1
151//str1 = str2=> 0
152//str1 > str2=>+1
153//
154//The lower case table starts with 256 entries (one for each of the upper bytes
155//of the original Unicode char). If that entry is zero, then all characters with
156//that upper byte are already case folded. If the entry is non-zero, then it is
157//the _index_ (not byte offset) of the start of the sub-table for the characters
158//with that upper byte. All ignorable characters are folded to the value zero.
159//
160//In pseudocode:
161//
162//Let c = source Unicode character
163//Let table[] = lower case table
164//
165//lower = table[highbyte(c)]
166//if (lower == 0)
167//lower = c
168//else
169//lower = table[lower+lowbyte(c)]
170//
171//if (lower == 0)
172//ignore this character
173//
174//To handle ignorable characters, we now need a loop to find the next valid character.
175//Also, we can't pre-compute the number of characters to compare; the string length might
176//be larger than the number of non-ignorable characters. Further, we must be able to handle
177//ignorable characters at any point in the string, including as the first or last characters.
178//We use a zero value as a sentinel to detect both end-of-string and ignorable characters.
179//Since the File Manager doesn't prevent the NUL character (value zero) as part of a filename,
180//the case mapping table is assumed to map u+0000 to some non-zero value (like 0xFFFF, which is
181//an invalid Unicode character).
182//
183//Pseudocode:
184//
185//while (1) {
186//c1 = GetNextValidChar(str1)//returns zero if at end of string
187//c2 = GetNextValidChar(str2)
188//
189//if (c1 != c2) break//found a difference
190//
191//if (c1 == 0)//reached end of string on both strings at once?
192//return 0;//yes, so strings are equal
193//}
194//
195//// When we get here, c1 != c2. So, we just need to determine which one is less.
196//if (c1 < c2)
197//return -1;
198//else
199//return 1;
200//
201
202int32_t FastUnicodeCompare( u_int16_t * str1, register u_int32_t length1,
203 u_int16_t * str2, register u_int32_t length2, int byte_order )
204{
205register u_int16_t c1,c2;
206register u_int16_t temp;
207
208#if ! UNCOMPRESSED
209 InitCompareTables();
210#endif
211
212while (1) {
213/* Set default values for c1, c2 in case there are no more valid chars */
214c1 = 0;
215c2 = 0;
216
217/* Find next non-ignorable char from str1, or zero if no more */
218while (length1 && c1 == 0) {
219if (byte_order == OSBigEndian)
220c1 = SWAP_BE16(*(str1++));
221else
222c1 = SWAP_LE16(*(str1++));
223--length1;
224if ((temp = gLowerCaseTable[c1>>8]) != 0)// is there a subtable for this upper byte?
225c1 = gLowerCaseTable[temp + (c1 & 0x00FF)];// yes, so fold the char
226}
227
228/* Find next non-ignorable char from str2, or zero if no more */
229while (length2 && c2 == 0) {
230if (byte_order == OSBigEndian)
231c2 = SWAP_BE16(*(str2++));
232else
233c2 = SWAP_LE16(*(str2++));
234--length2;
235if ((temp = gLowerCaseTable[c2>>8]) != 0)// is there a subtable for this upper byte?
236c2 = gLowerCaseTable[temp + (c2 & 0x00FF)];// yes, so fold the char
237}
238
239if (c1 != c2)/* found a difference, so stop looping */
240break;
241
242if (c1 == 0)/* did we reach the end of both strings at the same time? */
243return 0;/* yes, so strings are equal */
244}
245
246if (c1 < c2)
247return -1;
248else
249return 1;
250}
251
252
253//
254// BinaryUnicodeCompare - Compare two Unicode strings; produce a relative ordering
255// Compared using a 16-bit binary comparison (no case folding)
256//
257int32_t BinaryUnicodeCompare (u_int16_t * str1, u_int32_t length1,
258 u_int16_t * str2, u_int32_t length2)
259{
260 register u_int16_t c1, c2;
261 int32_t bestGuess;
262 u_int32_t length;
263
264 bestGuess = 0;
265
266 if (length1 < length2) {
267 length = length1;
268 --bestGuess;
269 } else if (length1 > length2) {
270 length = length2;
271 ++bestGuess;
272 } else {
273 length = length1;
274 }
275
276 while (length--) {
277 c1 = *(str1++);
278 c2 = *(str2++);
279
280 if (c1 > c2)
281 return (1);
282 if (c1 < c2)
283 return (-1);
284 }
285
286 return (bestGuess);
287}
288
289
290/*
291 * UTF-8 (UCS Transformation Format)
292 *
293 * The following subset of UTF-8 is used to encode UCS-2 filenames. It
294 * requires a maximum of three 3 bytes per UCS-2 character. Only the
295 * shortest encoding required to represent the significant UCS-2 bits
296 * is legal.
297 *
298 * UTF-8 Multibyte Codes
299 *
300 * Bytes Bits UCS-2 Min UCS-2 Max UTF-8 Byte Sequence (binary)
301 * -------------------------------------------------------------------
302 * 1 7 0x0000 0x007F 0xxxxxxx
303 * 2 11 0x0080 0x07FF 110xxxxx 10xxxxxx
304 * 3 16 0x0800 0xFFFF 1110xxxx 10xxxxxx 10xxxxxx
305 * -------------------------------------------------------------------
306 */
307
308
309/*
310 * utf_encodestr - Encodes the UCS-2 (Unicode) string at ucsp into a
311 * null terminated UTF-8 string at utf8p.
312 *
313 * ucslen is the number of UCS-2 input characters (not bytes)
314 * bufsize is the size of the output buffer in bytes
315 */
316void
317utf_encodestr( const u_int16_t * ucsp, int ucslen,
318 u_int8_t * utf8p, u_int32_t bufsize, int byte_order )
319{
320u_int8_t *bufend;
321u_int16_t ucs_ch;
322
323bufend = utf8p + bufsize;
324
325while (ucslen-- > 0) {
326 if (byte_order == OSBigEndian)
327 ucs_ch = SWAP_BE16(*ucsp++);
328 else
329 ucs_ch = SWAP_LE16(*ucsp++);
330
331if (ucs_ch < 0x0080) {
332if (utf8p >= bufend)
333break;
334if (ucs_ch == '\0')
335continue;/* skip over embedded NULLs */
336*utf8p++ = ucs_ch;
337
338} else if (ucs_ch < 0x800) {
339if ((utf8p + 1) >= bufend)
340break;
341*utf8p++ = (ucs_ch >> 6) | 0xc0;
342*utf8p++ = (ucs_ch & 0x3f) | 0x80;
343
344} else {
345if ((utf8p + 2) >= bufend)
346break;
347*utf8p++ = (ucs_ch >> 12) | 0xe0;
348*utf8p++ = ((ucs_ch >> 6) & 0x3f) | 0x80;
349*utf8p++ = ((ucs_ch) & 0x3f) | 0x80;
350}
351}
352
353*utf8p = '\0';
354}
355
356
357/*
358 * utf_decodestr - Decodes the null terminated UTF-8 string at
359 * utf8p into a UCS-2 (Unicode) string at ucsp.
360 *
361 * ucslen is the number of UCS-2 output characters (not bytes)
362 * bufsize is the size of the output buffer in bytes
363 */
364void utf_decodestr(const u_int8_t * utf8p, u_int16_t * ucsp, u_int16_t * ucslen, u_int32_t bufsize, int byte_order)
365{
366u_int16_t *bufstart;
367u_int16_t *bufend;
368u_int16_t ucs_ch;
369u_int8_t byte;
370
371bufstart = ucsp;
372bufend = (u_int16_t *)((u_int8_t *)ucsp + bufsize);
373
374while ((byte = *utf8p++) != '\0') {
375if (ucsp >= bufend)
376break;
377
378/* check for ascii */
379if (byte < 0x80) {
380ucs_ch = byte;
381
382 if (byte_order == OSBigEndian)
383 *ucsp++ = SWAP_BE16(ucs_ch);
384 else
385 *ucsp++ = SWAP_LE16(ucs_ch);
386
387continue;
388}
389
390switch (byte & 0xf0) {
391/* 2 byte sequence*/
392case 0xc0:
393case 0xd0:
394/* extract bits 6 - 10 from first byte */
395ucs_ch = (byte & 0x1F) << 6;
396break;
397/* 3 byte sequence*/
398case 0xe0:
399/* extract bits 12 - 15 from first byte */
400ucs_ch = (byte & 0x0F) << 6;
401
402/* extract bits 6 - 11 from second byte */
403if (((byte = *utf8p++) & 0xc0) != 0x80)
404goto stop;
405
406ucs_ch += (byte & 0x3F);
407ucs_ch <<= 6;
408break;
409default:
410goto stop;
411}
412
413/* extract bits 0 - 5 from final byte */
414if (((byte = *utf8p++) & 0xc0) != 0x80)
415goto stop;
416ucs_ch += (byte & 0x3F);
417
418 if (byte_order == OSBigEndian)
419 *ucsp++ = SWAP_BE16(ucs_ch);
420 else
421 *ucsp++ = SWAP_LE16(ucs_ch);
422}
423stop:
424 if (byte_order == OSBigEndian)
425 *ucslen = SWAP_BE16(ucsp - bufstart);
426 else
427 *ucslen = SWAP_LE16(ucsp - bufstart);
428}
429

Archive Download this file

Revision: 2121