Chameleon

Chameleon Svn Source Tree

Root/branches/cparm/i386/libsaio/hfs_compare.c

1/*
2 * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 2.0 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22/*
23 * HFSCompare.c - Functions for working with and comparing HFS nams.
24 *
25 * Copyright (c) 1999-2000 Apple Computer, Inc.
26 *
27 * DRI: Josh de Cesare
28 */
29
30#include "sl.h"
31#include "hfs_CaseTables.h"
32#include "hfs.h"
33
34#if ! UNCOMPRESSED
35
36static unsigned short *
37UncompressStructure(struct compressed_block *bp, int count, int size)
38{
39 unsigned short *out = malloc(size);
40if (!out)
41{
42stop("UncompressStructure unable to allocate memory\n");
43return 0;
44}
45bzero(out,size);
46 unsigned short *op = out;
47 unsigned short data;
48 int i, j;
49
50 for (i=0; i<count; i++, bp++) {
51 // If this happens (it shouldn't) please fix size and/or double check that count really is
52 // the number of elements in the array.
53 // This was a very hard bug to find, so please leave this code here.
54 if(out + size <= op + bp->count)
55 {
56 stop("HFS+ Unicode tables are malformed\n");
57 return 0;
58 }
59 data = bp->data;
60 for (j=0; j<bp->count; j++) {
61 *op++ = data;
62 if (bp->type == kTypeAscending) data++;
63 else if (bp->type == kTypeAscending256) data += 256;
64 }
65 }
66 return out;
67}
68
69static void
70InitCompareTables(void)
71{
72 if (gCompareTable == 0) {
73 gCompareTable = UncompressStructure(gCompareTableCompressed,
74 kCompareTableNBlocks, kCompareTableDataSize);
75 gLowerCaseTable = UncompressStructure(gLowerCaseTableCompressed,
76 kLowerCaseTableNBlocks, kLowerCaseTableDataSize);
77 }
78}
79
80#endif /* ! UNCOMPRESSED */
81
82//_______________________________________________________________________
83//
84//Routine:FastRelString
85//
86//Output:returns -1 if str1 < str2
87//returns 1 if str1 > str2
88//return 0 if equal
89//
90//_______________________________________________________________________
91
92int32_tFastRelString(u_int8_t * str1, u_int8_t * str2)
93{
94int32_t bestGuess;
95u_int8_t length, length2;
96
97#if ! UNCOMPRESED
98InitCompareTables();
99#endif
100
101length = *(str1++);
102length2 = *(str2++);
103
104if (length == length2)
105bestGuess = 0;
106else if (length < length2)
107bestGuess = -1;
108else
109{
110bestGuess = 1;
111length = length2;
112}
113
114while (length--)
115{
116u_int32_t aChar, bChar;
117
118aChar = *(str1++);
119bChar = *(str2++);
120
121if (aChar != bChar)/* If they don't match exacly, do case conversion */
122{
123u_int16_t aSortWord, bSortWord;
124
125aSortWord = gCompareTable[aChar];
126bSortWord = gCompareTable[bChar];
127
128if (aSortWord > bSortWord)
129return 1;
130
131if (aSortWord < bSortWord)
132return -1;
133}
134
135/*
136 * If characters match exactly, then go on to next character
137 * immediately without doing any extra work.
138 */
139}
140
141/* if you got to here, then return bestGuess */
142return bestGuess;
143}
144
145
146//
147//FastUnicodeCompare - Compare two Unicode strings; produce a relative ordering
148//
149// IFRESULT
150//--------------------------
151//str1 < str2=>-1
152//str1 = str2=> 0
153//str1 > str2=>+1
154//
155//The lower case table starts with 256 entries (one for each of the upper bytes
156//of the original Unicode char). If that entry is zero, then all characters with
157//that upper byte are already case folded. If the entry is non-zero, then it is
158//the _index_ (not byte offset) of the start of the sub-table for the characters
159//with that upper byte. All ignorable characters are folded to the value zero.
160//
161//In pseudocode:
162//
163//Let c = source Unicode character
164//Let table[] = lower case table
165//
166//lower = table[highbyte(c)]
167//if (lower == 0)
168//lower = c
169//else
170//lower = table[lower+lowbyte(c)]
171//
172//if (lower == 0)
173//ignore this character
174//
175//To handle ignorable characters, we now need a loop to find the next valid character.
176//Also, we can't pre-compute the number of characters to compare; the string length might
177//be larger than the number of non-ignorable characters. Further, we must be able to handle
178//ignorable characters at any point in the string, including as the first or last characters.
179//We use a zero value as a sentinel to detect both end-of-string and ignorable characters.
180//Since the File Manager doesn't prevent the NUL character (value zero) as part of a filename,
181//the case mapping table is assumed to map u+0000 to some non-zero value (like 0xFFFF, which is
182//an invalid Unicode character).
183//
184//Pseudocode:
185//
186//while (1) {
187//c1 = GetNextValidChar(str1)//returns zero if at end of string
188//c2 = GetNextValidChar(str2)
189//
190//if (c1 != c2) break//found a difference
191//
192//if (c1 == 0)//reached end of string on both strings at once?
193//return 0;//yes, so strings are equal
194//}
195//
196//// When we get here, c1 != c2. So, we just need to determine which one is less.
197//if (c1 < c2)
198//return -1;
199//else
200//return 1;
201//
202
203int32_t FastUnicodeCompare( u_int16_t * str1, register u_int32_t length1,
204 u_int16_t * str2, register u_int32_t length2, int byte_order )
205{
206register u_int16_t c1,c2;
207register u_int16_t temp;
208
209#if ! UNCOMPRESSED
210InitCompareTables();
211#endif
212
213while (1) {
214/* Set default values for c1, c2 in case there are no more valid chars */
215c1 = 0;
216c2 = 0;
217
218/* Find next non-ignorable char from str1, or zero if no more */
219while (length1 && c1 == 0) {
220if (byte_order == OSBigEndian)
221c1 = SWAP_BE16(*(str1++));
222else
223c1 = SWAP_LE16(*(str1++));
224--length1;
225if ((temp = gLowerCaseTable[c1>>8]) != 0)// is there a subtable for this upper byte?
226c1 = gLowerCaseTable[temp + (c1 & 0x00FF)];// yes, so fold the char
227}
228
229/* Find next non-ignorable char from str2, or zero if no more */
230while (length2 && c2 == 0) {
231if (byte_order == OSBigEndian)
232c2 = SWAP_BE16(*(str2++));
233else
234c2 = SWAP_LE16(*(str2++));
235--length2;
236if ((temp = gLowerCaseTable[c2>>8]) != 0)// is there a subtable for this upper byte?
237c2 = gLowerCaseTable[temp + (c2 & 0x00FF)];// yes, so fold the char
238}
239
240if (c1 != c2)/* found a difference, so stop looping */
241break;
242
243if (c1 == 0)/* did we reach the end of both strings at the same time? */
244return 0;/* yes, so strings are equal */
245}
246
247if (c1 < c2)
248return -1;
249else
250return 1;
251}
252
253
254//
255// BinaryUnicodeCompare - Compare two Unicode strings; produce a relative ordering
256// Compared using a 16-bit binary comparison (no case folding)
257//
258int32_t BinaryUnicodeCompare (u_int16_t * str1, u_int32_t length1,
259 u_int16_t * str2, u_int32_t length2)
260{
261register u_int16_t c1, c2;
262int32_t bestGuess;
263u_int32_t length;
264
265bestGuess = 0;
266
267if (length1 < length2) {
268length = length1;
269--bestGuess;
270} else if (length1 > length2) {
271length = length2;
272++bestGuess;
273} else {
274length = length1;
275}
276
277while (length--) {
278c1 = *(str1++);
279c2 = *(str2++);
280
281if (c1 > c2)
282return (1);
283if (c1 < c2)
284return (-1);
285}
286
287return (bestGuess);
288}
289
290
291/*
292 * UTF-8 (UCS Transformation Format)
293 *
294 * The following subset of UTF-8 is used to encode UCS-2 filenames. It
295 * requires a maximum of three 3 bytes per UCS-2 character. Only the
296 * shortest encoding required to represent the significant UCS-2 bits
297 * is legal.
298 *
299 * UTF-8 Multibyte Codes
300 *
301 * Bytes Bits UCS-2 Min UCS-2 Max UTF-8 Byte Sequence (binary)
302 * -------------------------------------------------------------------
303 * 1 7 0x0000 0x007F 0xxxxxxx
304 * 2 11 0x0080 0x07FF 110xxxxx 10xxxxxx
305 * 3 16 0x0800 0xFFFF 1110xxxx 10xxxxxx 10xxxxxx
306 * -------------------------------------------------------------------
307 */
308
309
310/*
311 * utf_encodestr - Encodes the UCS-2 (Unicode) string at ucsp into a
312 * null terminated UTF-8 string at utf8p.
313 *
314 * ucslen is the number of UCS-2 input characters (not bytes)
315 * bufsize is the size of the output buffer in bytes
316 */
317void
318utf_encodestr( const u_int16_t * ucsp, int ucslen,
319 u_int8_t * utf8p, u_int32_t bufsize, int byte_order )
320{
321u_int8_t *bufend;
322u_int16_t ucs_ch;
323
324bufend = utf8p + bufsize;
325
326while (ucslen-- > 0) {
327if (byte_order == OSBigEndian)
328 ucs_ch = SWAP_BE16(*ucsp++);
329else
330ucs_ch = SWAP_LE16(*ucsp++);
331
332if (ucs_ch < 0x0080) {
333if (utf8p >= bufend)
334break;
335if (ucs_ch == '\0')
336continue;/* skip over embedded NULLs */
337*utf8p++ = ucs_ch;
338
339} else if (ucs_ch < 0x800) {
340if ((utf8p + 1) >= bufend)
341break;
342*utf8p++ = (ucs_ch >> 6) | 0xc0;
343*utf8p++ = (ucs_ch & 0x3f) | 0x80;
344
345} else {
346if ((utf8p + 2) >= bufend)
347break;
348*utf8p++ = (ucs_ch >> 12) | 0xe0;
349*utf8p++ = ((ucs_ch >> 6) & 0x3f) | 0x80;
350*utf8p++ = ((ucs_ch) & 0x3f) | 0x80;
351}
352}
353
354*utf8p = '\0';
355}
356
357
358/*
359 * utf_decodestr - Decodes the null terminated UTF-8 string at
360 * utf8p into a UCS-2 (Unicode) string at ucsp.
361 *
362 * ucslen is the number of UCS-2 output characters (not bytes)
363 * bufsize is the size of the output buffer in bytes
364 */
365void utf_decodestr(const u_int8_t * utf8p, u_int16_t * ucsp, u_int16_t * ucslen, u_int32_t bufsize, int byte_order)
366{
367u_int16_t *bufstart;
368u_int16_t *bufend;
369u_int16_t ucs_ch;
370u_int8_t byte;
371
372bufstart = ucsp;
373bufend = (u_int16_t *)((u_int8_t *)ucsp + bufsize);
374
375while ((byte = *utf8p++) != '\0') {
376if (ucsp >= bufend)
377break;
378
379/* check for ascii */
380if (byte < 0x80) {
381ucs_ch = byte;
382
383if (byte_order == OSBigEndian)
384*ucsp++ = SWAP_BE16(ucs_ch);
385else
386*ucsp++ = SWAP_LE16(ucs_ch);
387
388continue;
389}
390
391switch (byte & 0xf0) {
392/* 2 byte sequence*/
393case 0xc0:
394case 0xd0:
395/* extract bits 6 - 10 from first byte */
396ucs_ch = (byte & 0x1F) << 6;
397break;
398/* 3 byte sequence*/
399case 0xe0:
400/* extract bits 12 - 15 from first byte */
401ucs_ch = (byte & 0x0F) << 6;
402
403/* extract bits 6 - 11 from second byte */
404if (((byte = *utf8p++) & 0xc0) != 0x80)
405goto stop;
406
407ucs_ch += (byte & 0x3F);
408ucs_ch <<= 6;
409break;
410default:
411goto stop;
412}
413
414/* extract bits 0 - 5 from final byte */
415if (((byte = *utf8p++) & 0xc0) != 0x80)
416goto stop;
417ucs_ch += (byte & 0x3F);
418
419if (byte_order == OSBigEndian)
420*ucsp++ = SWAP_BE16(ucs_ch);
421else
422*ucsp++ = SWAP_LE16(ucs_ch);
423}
424stop:
425if (byte_order == OSBigEndian)
426*ucslen = SWAP_BE16(ucsp - bufstart);
427else
428*ucslen = SWAP_LE16(ucsp - bufstart);
429}
430

Archive Download this file

Revision: HEAD