Chameleon

Chameleon Svn Source Tree

Root/branches/ErmaC/Trunk/i386/libsaio/hfs_compare.c

1/*
2 * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 2.0 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22/*
23 * HFSCompare.c - Functions for working with and comparing HFS nams.
24 *
25 * Copyright (c) 1999-2000 Apple Computer, Inc.
26 *
27 * DRI: Josh de Cesare
28 */
29
30#include <sl.h>
31#include "hfs_CaseTables.h"
32
33#if ! UNCOMPRESSED
34
35static unsigned short * UncompressStructure(struct compressed_block *bp, int count, int size)
36{
37 unsigned short *out = malloc(size);
38 unsigned short *op = out;
39 unsigned short data;
40 int i, j;
41
42 for (i=0; i<count; i++, bp++) {
43 // If this happens (it shouldn't) please fix size and/or double check that count really is
44 // the number of elements in the array.
45 // This was a very hard bug to find, so please leave this code here.
46 if(out + size <= op + bp->count)
47 {
48 stop("HFS+ Unicode tables are malformed\n");
49 }
50 data = bp->data;
51 for (j=0; j<bp->count; j++) {
52 *op++ = data;
53 if (bp->type == kTypeAscending) data++;
54 else if (bp->type == kTypeAscending256) data += 256;
55 }
56 }
57 return out;
58}
59
60static void InitCompareTables(void)
61{
62 if (gCompareTable == 0) {
63 gCompareTable = UncompressStructure(gCompareTableCompressed,
64 kCompareTableNBlocks, kCompareTableDataSize);
65 gLowerCaseTable = UncompressStructure(gLowerCaseTableCompressed,
66 kLowerCaseTableNBlocks, kLowerCaseTableDataSize);
67 }
68}
69
70#endif /* ! UNCOMPRESSED */
71
72//_______________________________________________________________________
73//
74//Routine:FastRelString
75//
76//Output:returns -1 if str1 < str2
77//returns 1 if str1 > str2
78//return 0 if equal
79//
80//_______________________________________________________________________
81
82int32_tFastRelString(u_int8_t * str1, u_int8_t * str2)
83{
84int32_t bestGuess;
85u_int8_t length, length2;
86
87#if ! UNCOMPRESED
88 InitCompareTables();
89#endif
90
91length = *(str1++);
92length2 = *(str2++);
93
94if (length == length2)
95bestGuess = 0;
96else if (length < length2)
97bestGuess = -1;
98else
99{
100bestGuess = 1;
101length = length2;
102}
103
104while (length--)
105{
106u_int32_t aChar, bChar;
107
108aChar = *(str1++);
109bChar = *(str2++);
110
111if (aChar != bChar)/* If they don't match exacly, do case conversion */
112{
113u_int16_t aSortWord, bSortWord;
114
115aSortWord = gCompareTable[aChar];
116bSortWord = gCompareTable[bChar];
117
118if (aSortWord > bSortWord)
119return 1;
120
121if (aSortWord < bSortWord)
122return -1;
123}
124
125/*
126 * If characters match exactly, then go on to next character
127 * immediately without doing any extra work.
128 */
129}
130
131/* if you got to here, then return bestGuess */
132return bestGuess;
133}
134
135
136//
137//FastUnicodeCompare - Compare two Unicode strings; produce a relative ordering
138//
139// IFRESULT
140//--------------------------
141//str1 < str2=>-1
142//str1 = str2=> 0
143//str1 > str2=>+1
144//
145//The lower case table starts with 256 entries (one for each of the upper bytes
146//of the original Unicode char). If that entry is zero, then all characters with
147//that upper byte are already case folded. If the entry is non-zero, then it is
148//the _index_ (not byte offset) of the start of the sub-table for the characters
149//with that upper byte. All ignorable characters are folded to the value zero.
150//
151//In pseudocode:
152//
153//Let c = source Unicode character
154//Let table[] = lower case table
155//
156//lower = table[highbyte(c)]
157//if (lower == 0)
158//lower = c
159//else
160//lower = table[lower+lowbyte(c)]
161//
162//if (lower == 0)
163//ignore this character
164//
165//To handle ignorable characters, we now need a loop to find the next valid character.
166//Also, we can't pre-compute the number of characters to compare; the string length might
167//be larger than the number of non-ignorable characters. Further, we must be able to handle
168//ignorable characters at any point in the string, including as the first or last characters.
169//We use a zero value as a sentinel to detect both end-of-string and ignorable characters.
170//Since the File Manager doesn't prevent the NUL character (value zero) as part of a filename,
171//the case mapping table is assumed to map u+0000 to some non-zero value (like 0xFFFF, which is
172//an invalid Unicode character).
173//
174//Pseudocode:
175//
176//while (1) {
177//c1 = GetNextValidChar(str1)//returns zero if at end of string
178//c2 = GetNextValidChar(str2)
179//
180//if (c1 != c2) break//found a difference
181//
182//if (c1 == 0)//reached end of string on both strings at once?
183//return 0;//yes, so strings are equal
184//}
185//
186//// When we get here, c1 != c2. So, we just need to determine which one is less.
187//if (c1 < c2)
188//return -1;
189//else
190//return 1;
191//
192
193int32_t FastUnicodeCompare( u_int16_t * str1, register u_int32_t length1,
194 u_int16_t * str2, register u_int32_t length2, int byte_order )
195{
196register u_int16_t c1,c2;
197register u_int16_t temp;
198
199#if ! UNCOMPRESSED
200 InitCompareTables();
201#endif
202
203while (1) {
204/* Set default values for c1, c2 in case there are no more valid chars */
205c1 = 0;
206c2 = 0;
207
208/* Find next non-ignorable char from str1, or zero if no more */
209while (length1 && c1 == 0) {
210if (byte_order == OSBigEndian)
211c1 = SWAP_BE16(*(str1++));
212else
213c1 = SWAP_LE16(*(str1++));
214--length1;
215if ((temp = gLowerCaseTable[c1>>8]) != 0)// is there a subtable for this upper byte?
216c1 = gLowerCaseTable[temp + (c1 & 0x00FF)];// yes, so fold the char
217}
218
219/* Find next non-ignorable char from str2, or zero if no more */
220while (length2 && c2 == 0) {
221if (byte_order == OSBigEndian)
222c2 = SWAP_BE16(*(str2++));
223else
224c2 = SWAP_LE16(*(str2++));
225--length2;
226if ((temp = gLowerCaseTable[c2>>8]) != 0)// is there a subtable for this upper byte?
227c2 = gLowerCaseTable[temp + (c2 & 0x00FF)];// yes, so fold the char
228}
229
230if (c1 != c2)/* found a difference, so stop looping */
231break;
232
233if (c1 == 0)/* did we reach the end of both strings at the same time? */
234return 0;/* yes, so strings are equal */
235}
236
237if (c1 < c2)
238return -1;
239else
240return 1;
241}
242
243
244//
245// BinaryUnicodeCompare - Compare two Unicode strings; produce a relative ordering
246// Compared using a 16-bit binary comparison (no case folding)
247//
248int32_t BinaryUnicodeCompare (u_int16_t * str1, u_int32_t length1,
249 u_int16_t * str2, u_int32_t length2)
250{
251 register u_int16_t c1, c2;
252 int32_t bestGuess;
253 u_int32_t length;
254
255 bestGuess = 0;
256
257 if (length1 < length2) {
258 length = length1;
259 --bestGuess;
260 } else if (length1 > length2) {
261 length = length2;
262 ++bestGuess;
263 } else {
264 length = length1;
265 }
266
267 while (length--) {
268 c1 = *(str1++);
269 c2 = *(str2++);
270
271 if (c1 > c2)
272 return (1);
273 if (c1 < c2)
274 return (-1);
275 }
276
277 return (bestGuess);
278}
279
280
281/*
282 * UTF-8 (UCS Transformation Format)
283 *
284 * The following subset of UTF-8 is used to encode UCS-2 filenames. It
285 * requires a maximum of three 3 bytes per UCS-2 character. Only the
286 * shortest encoding required to represent the significant UCS-2 bits
287 * is legal.
288 *
289 * UTF-8 Multibyte Codes
290 *
291 * Bytes Bits UCS-2 Min UCS-2 Max UTF-8 Byte Sequence (binary)
292 * -------------------------------------------------------------------
293 * 1 7 0x0000 0x007F 0xxxxxxx
294 * 2 11 0x0080 0x07FF 110xxxxx 10xxxxxx
295 * 3 16 0x0800 0xFFFF 1110xxxx 10xxxxxx 10xxxxxx
296 * -------------------------------------------------------------------
297 */
298
299
300/*
301 * utf_encodestr - Encodes the UCS-2 (Unicode) string at ucsp into a
302 * null terminated UTF-8 string at utf8p.
303 *
304 * ucslen is the number of UCS-2 input characters (not bytes)
305 * bufsize is the size of the output buffer in bytes
306 */
307void utf_encodestr( const u_int16_t * ucsp, int ucslen,
308 u_int8_t * utf8p, u_int32_t bufsize, int byte_order )
309{
310u_int8_t *bufend;
311u_int16_t ucs_ch;
312
313bufend = utf8p + bufsize;
314
315while (ucslen-- > 0) {
316 if (byte_order == OSBigEndian)
317 ucs_ch = SWAP_BE16(*ucsp++);
318 else
319 ucs_ch = SWAP_LE16(*ucsp++);
320
321if (ucs_ch < 0x0080) {
322if (utf8p >= bufend)
323break;
324if (ucs_ch == '\0')
325continue;/* skip over embedded NULLs */
326*utf8p++ = ucs_ch;
327
328} else if (ucs_ch < 0x800) {
329if ((utf8p + 1) >= bufend)
330break;
331*utf8p++ = (ucs_ch >> 6) | 0xc0;
332*utf8p++ = (ucs_ch & 0x3f) | 0x80;
333
334} else {
335if ((utf8p + 2) >= bufend)
336break;
337*utf8p++ = (ucs_ch >> 12) | 0xe0;
338*utf8p++ = ((ucs_ch >> 6) & 0x3f) | 0x80;
339*utf8p++ = ((ucs_ch) & 0x3f) | 0x80;
340}
341}
342
343*utf8p = '\0';
344}
345
346
347/*
348 * utf_decodestr - Decodes the null terminated UTF-8 string at
349 * utf8p into a UCS-2 (Unicode) string at ucsp.
350 *
351 * ucslen is the number of UCS-2 output characters (not bytes)
352 * bufsize is the size of the output buffer in bytes
353 */
354void utf_decodestr(const u_int8_t * utf8p, u_int16_t * ucsp, u_int16_t * ucslen, u_int32_t bufsize, int byte_order)
355{
356u_int16_t *bufstart;
357u_int16_t *bufend;
358u_int16_t ucs_ch;
359u_int8_t byte;
360
361bufstart = ucsp;
362bufend = (u_int16_t *)((u_int8_t *)ucsp + bufsize);
363
364while ((byte = *utf8p++) != '\0') {
365if (ucsp >= bufend)
366break;
367
368/* check for ascii */
369if (byte < 0x80) {
370ucs_ch = byte;
371
372 if (byte_order == OSBigEndian)
373 *ucsp++ = SWAP_BE16(ucs_ch);
374 else
375 *ucsp++ = SWAP_LE16(ucs_ch);
376
377continue;
378}
379
380switch (byte & 0xf0) {
381/* 2 byte sequence*/
382case 0xc0:
383case 0xd0:
384/* extract bits 6 - 10 from first byte */
385ucs_ch = (byte & 0x1F) << 6;
386break;
387/* 3 byte sequence*/
388case 0xe0:
389/* extract bits 12 - 15 from first byte */
390ucs_ch = (byte & 0x0F) << 6;
391
392/* extract bits 6 - 11 from second byte */
393if (((byte = *utf8p++) & 0xc0) != 0x80)
394goto stop;
395
396ucs_ch += (byte & 0x3F);
397ucs_ch <<= 6;
398break;
399default:
400goto stop;
401}
402
403/* extract bits 0 - 5 from final byte */
404if (((byte = *utf8p++) & 0xc0) != 0x80)
405goto stop;
406ucs_ch += (byte & 0x3F);
407
408 if (byte_order == OSBigEndian)
409 *ucsp++ = SWAP_BE16(ucs_ch);
410 else
411 *ucsp++ = SWAP_LE16(ucs_ch);
412}
413stop:
414 if (byte_order == OSBigEndian)
415 *ucslen = SWAP_BE16(ucsp - bufstart);
416 else
417 *ucslen = SWAP_LE16(ucsp - bufstart);
418}
419

Archive Download this file

Revision: 2037