Chameleon

Chameleon Svn Source Tree

Root/branches/azimutz/Chazi/i386/libsaio/hfs_compare.c

1/*
2 * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 2.0 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22/*
23 * HFSCompare.c - Functions for working with and comparing HFS nams.
24 *
25 * Copyright (c) 1999-2000 Apple Computer, Inc.
26 *
27 * DRI: Josh de Cesare
28 */
29
30//#include <sl.h>
31#include "sl.h" //Azi:???
32#include "hfs_CaseTables.h"
33
34#if ! UNCOMPRESSED
35
36static unsigned short *
37UncompressStructure(struct compressed_block *bp, int count, int size)
38{
39 unsigned short *out = malloc(size);
40 unsigned short *op = out;
41 unsigned short data;
42 int i, j;
43
44 for (i=0; i<count; i++, bp++) {
45 // If this happens (it shouldn't) please fix size and/or double check that count really is
46 // the number of elements in the array.
47 // This was a very hard bug to find, so please leave this code here.
48 if(out + size <= op + bp->count)
49 {
50 stop("HFS+ Unicode tables are malformed\n");
51 }
52 data = bp->data;
53 for (j=0; j<bp->count; j++) {
54 *op++ = data;
55 if (bp->type == kTypeAscending) data++;
56 else if (bp->type == kTypeAscending256) data += 256;
57 }
58 }
59 return out;
60}
61
62static void
63InitCompareTables(void)
64{
65 if (gCompareTable == 0) {
66 gCompareTable = UncompressStructure(gCompareTableCompressed,
67 kCompareTableNBlocks, kCompareTableDataSize);
68 gLowerCaseTable = UncompressStructure(gLowerCaseTableCompressed,
69 kLowerCaseTableNBlocks, kLowerCaseTableDataSize);
70 }
71}
72
73#endif /* ! UNCOMPRESSED */
74
75//_______________________________________________________________________
76//
77//Routine:FastRelString
78//
79//Output:returns -1 if str1 < str2
80//returns 1 if str1 > str2
81//return 0 if equal
82//
83//_______________________________________________________________________
84
85int32_tFastRelString(u_int8_t * str1, u_int8_t * str2)
86{
87int32_t bestGuess;
88u_int8_t length, length2;
89
90#if ! UNCOMPRESED
91 InitCompareTables();
92#endif
93
94length = *(str1++);
95length2 = *(str2++);
96
97if (length == length2)
98bestGuess = 0;
99else if (length < length2)
100bestGuess = -1;
101else
102{
103bestGuess = 1;
104length = length2;
105}
106
107while (length--)
108{
109u_int32_t aChar, bChar;
110
111aChar = *(str1++);
112bChar = *(str2++);
113
114if (aChar != bChar)/* If they don't match exacly, do case conversion */
115{
116u_int16_t aSortWord, bSortWord;
117
118aSortWord = gCompareTable[aChar];
119bSortWord = gCompareTable[bChar];
120
121if (aSortWord > bSortWord)
122return 1;
123
124if (aSortWord < bSortWord)
125return -1;
126}
127
128/*
129 * If characters match exactly, then go on to next character
130 * immediately without doing any extra work.
131 */
132}
133
134/* if you got to here, then return bestGuess */
135return bestGuess;
136}
137
138
139//
140//FastUnicodeCompare - Compare two Unicode strings; produce a relative ordering
141//
142// IFRESULT
143//--------------------------
144//str1 < str2=>-1
145//str1 = str2=> 0
146//str1 > str2=>+1
147//
148//The lower case table starts with 256 entries (one for each of the upper bytes
149//of the original Unicode char). If that entry is zero, then all characters with
150//that upper byte are already case folded. If the entry is non-zero, then it is
151//the _index_ (not byte offset) of the start of the sub-table for the characters
152//with that upper byte. All ignorable characters are folded to the value zero.
153//
154//In pseudocode:
155//
156//Let c = source Unicode character
157//Let table[] = lower case table
158//
159//lower = table[highbyte(c)]
160//if (lower == 0)
161//lower = c
162//else
163//lower = table[lower+lowbyte(c)]
164//
165//if (lower == 0)
166//ignore this character
167//
168//To handle ignorable characters, we now need a loop to find the next valid character.
169//Also, we can't pre-compute the number of characters to compare; the string length might
170//be larger than the number of non-ignorable characters. Further, we must be able to handle
171//ignorable characters at any point in the string, including as the first or last characters.
172//We use a zero value as a sentinel to detect both end-of-string and ignorable characters.
173//Since the File Manager doesn't prevent the NUL character (value zero) as part of a filename,
174//the case mapping table is assumed to map u+0000 to some non-zero value (like 0xFFFF, which is
175//an invalid Unicode character).
176//
177//Pseudocode:
178//
179//while (1) {
180//c1 = GetNextValidChar(str1)//returns zero if at end of string
181//c2 = GetNextValidChar(str2)
182//
183//if (c1 != c2) break//found a difference
184//
185//if (c1 == 0)//reached end of string on both strings at once?
186//return 0;//yes, so strings are equal
187//}
188//
189//// When we get here, c1 != c2. So, we just need to determine which one is less.
190//if (c1 < c2)
191//return -1;
192//else
193//return 1;
194//
195
196int32_t FastUnicodeCompare( u_int16_t * str1, register u_int32_t length1,
197 u_int16_t * str2, register u_int32_t length2, int byte_order )
198{
199register u_int16_t c1,c2;
200register u_int16_t temp;
201
202#if ! UNCOMPRESSED
203 InitCompareTables();
204#endif
205
206while (1) {
207/* Set default values for c1, c2 in case there are no more valid chars */
208c1 = 0;
209c2 = 0;
210
211/* Find next non-ignorable char from str1, or zero if no more */
212while (length1 && c1 == 0) {
213if (byte_order == OSBigEndian)
214c1 = SWAP_BE16(*(str1++));
215else
216c1 = SWAP_LE16(*(str1++));
217--length1;
218if ((temp = gLowerCaseTable[c1>>8]) != 0)// is there a subtable for this upper byte?
219c1 = gLowerCaseTable[temp + (c1 & 0x00FF)];// yes, so fold the char
220}
221
222/* Find next non-ignorable char from str2, or zero if no more */
223while (length2 && c2 == 0) {
224if (byte_order == OSBigEndian)
225c2 = SWAP_BE16(*(str2++));
226else
227c2 = SWAP_LE16(*(str2++));
228--length2;
229if ((temp = gLowerCaseTable[c2>>8]) != 0)// is there a subtable for this upper byte?
230c2 = gLowerCaseTable[temp + (c2 & 0x00FF)];// yes, so fold the char
231}
232
233if (c1 != c2)/* found a difference, so stop looping */
234break;
235
236if (c1 == 0)/* did we reach the end of both strings at the same time? */
237return 0;/* yes, so strings are equal */
238}
239
240if (c1 < c2)
241return -1;
242else
243return 1;
244}
245
246
247//
248// BinaryUnicodeCompare - Compare two Unicode strings; produce a relative ordering
249// Compared using a 16-bit binary comparison (no case folding)
250//
251int32_t BinaryUnicodeCompare (u_int16_t * str1, u_int32_t length1,
252 u_int16_t * str2, u_int32_t length2)
253{
254 register u_int16_t c1, c2;
255 int32_t bestGuess;
256 u_int32_t length;
257
258 bestGuess = 0;
259
260 if (length1 < length2) {
261 length = length1;
262 --bestGuess;
263 } else if (length1 > length2) {
264 length = length2;
265 ++bestGuess;
266 } else {
267 length = length1;
268 }
269
270 while (length--) {
271 c1 = *(str1++);
272 c2 = *(str2++);
273
274 if (c1 > c2)
275 return (1);
276 if (c1 < c2)
277 return (-1);
278 }
279
280 return (bestGuess);
281}
282
283
284/*
285 * UTF-8 (UCS Transformation Format)
286 *
287 * The following subset of UTF-8 is used to encode UCS-2 filenames. It
288 * requires a maximum of three 3 bytes per UCS-2 character. Only the
289 * shortest encoding required to represent the significant UCS-2 bits
290 * is legal.
291 *
292 * UTF-8 Multibyte Codes
293 *
294 * Bytes Bits UCS-2 Min UCS-2 Max UTF-8 Byte Sequence (binary)
295 * -------------------------------------------------------------------
296 * 1 7 0x0000 0x007F 0xxxxxxx
297 * 2 11 0x0080 0x07FF 110xxxxx 10xxxxxx
298 * 3 16 0x0800 0xFFFF 1110xxxx 10xxxxxx 10xxxxxx
299 * -------------------------------------------------------------------
300 */
301
302
303/*
304 * utf_encodestr - Encodes the UCS-2 (Unicode) string at ucsp into a
305 * null terminated UTF-8 string at utf8p.
306 *
307 * ucslen is the number of UCS-2 input characters (not bytes)
308 * bufsize is the size of the output buffer in bytes
309 */
310void
311utf_encodestr( const u_int16_t * ucsp, int ucslen,
312 u_int8_t * utf8p, u_int32_t bufsize, int byte_order )
313{
314u_int8_t *bufend;
315u_int16_t ucs_ch;
316
317bufend = utf8p + bufsize;
318
319while (ucslen-- > 0) {
320 if (byte_order == OSBigEndian)
321 ucs_ch = SWAP_BE16(*ucsp++);
322 else
323 ucs_ch = SWAP_LE16(*ucsp++);
324
325if (ucs_ch < 0x0080) {
326if (utf8p >= bufend)
327break;
328if (ucs_ch == '\0')
329continue;/* skip over embedded NULLs */
330*utf8p++ = ucs_ch;
331
332} else if (ucs_ch < 0x800) {
333if ((utf8p + 1) >= bufend)
334break;
335*utf8p++ = (ucs_ch >> 6) | 0xc0;
336*utf8p++ = (ucs_ch & 0x3f) | 0x80;
337
338} else {
339if ((utf8p + 2) >= bufend)
340break;
341*utf8p++ = (ucs_ch >> 12) | 0xe0;
342*utf8p++ = ((ucs_ch >> 6) & 0x3f) | 0x80;
343*utf8p++ = ((ucs_ch) & 0x3f) | 0x80;
344}
345}
346
347*utf8p = '\0';
348}
349
350
351/*
352 * utf_decodestr - Decodes the null terminated UTF-8 string at
353 * utf8p into a UCS-2 (Unicode) string at ucsp.
354 *
355 * ucslen is the number of UCS-2 output characters (not bytes)
356 * bufsize is the size of the output buffer in bytes
357 */
358void utf_decodestr(const u_int8_t * utf8p, u_int16_t * ucsp, u_int16_t * ucslen, u_int32_t bufsize, int byte_order)
359{
360u_int16_t *bufstart;
361u_int16_t *bufend;
362u_int16_t ucs_ch;
363u_int8_t byte;
364
365bufstart = ucsp;
366bufend = (u_int16_t *)((u_int8_t *)ucsp + bufsize);
367
368while ((byte = *utf8p++) != '\0') {
369if (ucsp >= bufend)
370break;
371
372/* check for ascii */
373if (byte < 0x80) {
374ucs_ch = byte;
375
376 if (byte_order == OSBigEndian)
377 *ucsp++ = SWAP_BE16(ucs_ch);
378 else
379 *ucsp++ = SWAP_LE16(ucs_ch);
380
381continue;
382}
383
384switch (byte & 0xf0) {
385/* 2 byte sequence*/
386case 0xc0:
387case 0xd0:
388/* extract bits 6 - 10 from first byte */
389ucs_ch = (byte & 0x1F) << 6;
390break;
391/* 3 byte sequence*/
392case 0xe0:
393/* extract bits 12 - 15 from first byte */
394ucs_ch = (byte & 0x0F) << 6;
395
396/* extract bits 6 - 11 from second byte */
397if (((byte = *utf8p++) & 0xc0) != 0x80)
398goto stop;
399
400ucs_ch += (byte & 0x3F);
401ucs_ch <<= 6;
402break;
403default:
404goto stop;
405}
406
407/* extract bits 0 - 5 from final byte */
408if (((byte = *utf8p++) & 0xc0) != 0x80)
409goto stop;
410ucs_ch += (byte & 0x3F);
411
412 if (byte_order == OSBigEndian)
413 *ucsp++ = SWAP_BE16(ucs_ch);
414 else
415 *ucsp++ = SWAP_LE16(ucs_ch);
416}
417stop:
418 if (byte_order == OSBigEndian)
419 *ucslen = SWAP_BE16(ucsp - bufstart);
420 else
421 *ucslen = SWAP_LE16(ucsp - bufstart);
422}
423

Archive Download this file

Revision: 515