Chameleon

Chameleon Svn Source Tree

Root/trunk/i386/libsaio/hfs_compare.c

1/*
2 * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 2.0 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22/*
23 * HFSCompare.c - Functions for working with and comparing HFS nams.
24 *
25 * Copyright (c) 1999-2000 Apple Computer, Inc.
26 *
27 * DRI: Josh de Cesare
28 */
29
30#include <sl.h>
31#include "hfs_CaseTables.h"
32
33#if ! UNCOMPRESSED
34
35static unsigned short *
36UncompressStructure(struct compressed_block *bp, int count, int size)
37{
38 unsigned short *out = malloc(size);
39 unsigned short *op = out;
40 unsigned short data;
41 int i, j;
42
43 for (i=0; i<count; i++, bp++) {
44 // If this happens (it shouldn't) please fix size and/or double check that count really is
45 // the number of elements in the array.
46 // This was a very hard bug to find, so please leave this code here.
47 if(out + size <= op + bp->count)
48 {
49 stop("HFS+ Unicode tables are malformed\n");
50 }
51 data = bp->data;
52 for (j=0; j<bp->count; j++) {
53 *op++ = data;
54 if (bp->type == kTypeAscending) data++;
55 else if (bp->type == kTypeAscending256) data += 256;
56 }
57 }
58 return out;
59}
60
61static void
62InitCompareTables(void)
63{
64 if (gCompareTable == 0) {
65 gCompareTable = UncompressStructure(gCompareTableCompressed,
66 kCompareTableNBlocks, kCompareTableDataSize);
67 gLowerCaseTable = UncompressStructure(gLowerCaseTableCompressed,
68 kLowerCaseTableNBlocks, kLowerCaseTableDataSize);
69 }
70}
71
72#endif /* ! UNCOMPRESSED */
73
74//_______________________________________________________________________
75//
76//Routine:FastRelString
77//
78//Output:returns -1 if str1 < str2
79//returns 1 if str1 > str2
80//return 0 if equal
81//
82//_______________________________________________________________________
83
84int32_tFastRelString(u_int8_t * str1, u_int8_t * str2)
85{
86int32_t bestGuess;
87u_int8_t length, length2;
88
89#if ! UNCOMPRESED
90 InitCompareTables();
91#endif
92
93length = *(str1++);
94length2 = *(str2++);
95
96if (length == length2)
97bestGuess = 0;
98else if (length < length2)
99bestGuess = -1;
100else
101{
102bestGuess = 1;
103length = length2;
104}
105
106while (length--)
107{
108u_int32_t aChar, bChar;
109
110aChar = *(str1++);
111bChar = *(str2++);
112
113if (aChar != bChar)/* If they don't match exacly, do case conversion */
114{
115u_int16_t aSortWord, bSortWord;
116
117aSortWord = gCompareTable[aChar];
118bSortWord = gCompareTable[bChar];
119
120if (aSortWord > bSortWord)
121return 1;
122
123if (aSortWord < bSortWord)
124return -1;
125}
126
127/*
128 * If characters match exactly, then go on to next character
129 * immediately without doing any extra work.
130 */
131}
132
133/* If you got to here, then return bestGuess */
134return bestGuess;
135}
136
137
138//
139//FastUnicodeCompare - Compare two Unicode strings; produce a relative ordering
140//
141// IFRESULT
142//--------------------------
143//str1 < str2=>-1
144//str1 = str2=> 0
145//str1 > str2=>+1
146//
147//The lower case table starts with 256 entries (one for each of the upper bytes
148//of the original Unicode char). If that entry is zero, then all characters with
149//that upper byte are already case folded. If the entry is non-zero, then it is
150//the _index_ (not byte offset) of the start of the sub-table for the characters
151//with that upper byte. All ignorable characters are folded to the value zero.
152//
153//In pseudocode:
154//
155//Let c = source Unicode character
156//Let table[] = lower case table
157//
158//lower = table[highbyte(c)]
159//if (lower == 0)
160//lower = c
161//else
162//lower = table[lower+lowbyte(c)]
163//
164//if (lower == 0)
165//ignore this character
166//
167//To handle ignorable characters, we now need a loop to find the next valid character.
168//Also, we can't pre-compute the number of characters to compare; the string length might
169//be larger than the number of non-ignorable characters. Further, we must be able to handle
170//ignorable characters at any point in the string, including as the first or last characters.
171//We use a zero value as a sentinel to detect both end-of-string and ignorable characters.
172//Since the File Manager doesn't prevent the NUL character (value zero) as part of a filename,
173//the case mapping table is assumed to map u+0000 to some non-zero value (like 0xFFFF, which is
174//an invalid Unicode character).
175//
176//Pseudocode:
177//
178//while (1) {
179//c1 = GetNextValidChar(str1)//returns zero if at end of string
180//c2 = GetNextValidChar(str2)
181//
182//if (c1 != c2) break//found a difference
183//
184//if (c1 == 0)//reached end of string on both strings at once?
185//return 0;//yes, so strings are equal
186//}
187//
188//// When we get here, c1 != c2. So, we just need to determine which one is less.
189//if (c1 < c2)
190//return -1;
191//else
192//return 1;
193//
194
195int32_t FastUnicodeCompare( u_int16_t * str1, register u_int32_t length1,
196 u_int16_t * str2, register u_int32_t length2, int byte_order )
197{
198register u_int16_t c1,c2;
199register u_int16_t temp;
200
201#if ! UNCOMPRESSED
202 InitCompareTables();
203#endif
204
205while (1) {
206/* Set default values for c1, c2 in case there are no more valid chars */
207c1 = 0;
208c2 = 0;
209
210/* Find next non-ignorable char from str1, or zero if no more */
211while (length1 && c1 == 0) {
212if (byte_order == OSBigEndian)
213c1 = SWAP_BE16(*(str1++));
214else
215c1 = SWAP_LE16(*(str1++));
216--length1;
217if ((temp = gLowerCaseTable[c1>>8]) != 0)// is there a subtable for this upper byte?
218c1 = gLowerCaseTable[temp + (c1 & 0x00FF)];// yes, so fold the char
219}
220
221/* Find next non-ignorable char from str2, or zero if no more */
222while (length2 && c2 == 0) {
223if (byte_order == OSBigEndian)
224c2 = SWAP_BE16(*(str2++));
225else
226c2 = SWAP_LE16(*(str2++));
227--length2;
228if ((temp = gLowerCaseTable[c2>>8]) != 0)// is there a subtable for this upper byte?
229c2 = gLowerCaseTable[temp + (c2 & 0x00FF)];// yes, so fold the char
230}
231
232if (c1 != c2)/* found a difference, so stop looping */
233break;
234
235if (c1 == 0)/* did we reach the end of both strings at the same time? */
236return 0;/* yes, so strings are equal */
237}
238
239if (c1 < c2)
240return -1;
241else
242return 1;
243}
244
245
246//
247// BinaryUnicodeCompare - Compare two Unicode strings; produce a relative ordering
248// Compared using a 16-bit binary comparison (no case folding)
249//
250int32_t BinaryUnicodeCompare (u_int16_t * str1, u_int32_t length1,
251 u_int16_t * str2, u_int32_t length2)
252{
253 register u_int16_t c1, c2;
254 int32_t bestGuess;
255 u_int32_t length;
256
257 bestGuess = 0;
258
259 if (length1 < length2) {
260 length = length1;
261 --bestGuess;
262 } else if (length1 > length2) {
263 length = length2;
264 ++bestGuess;
265 } else {
266 length = length1;
267 }
268
269 while (length--) {
270 c1 = *(str1++);
271 c2 = *(str2++);
272
273 if (c1 > c2)
274 return (1);
275 if (c1 < c2)
276 return (-1);
277 }
278
279 return (bestGuess);
280}
281
282
283/*
284 * UTF-8 (UCS Transformation Format)
285 *
286 * The following subset of UTF-8 is used to encode UCS-2 filenames. It
287 * requires a maximum of three 3 bytes per UCS-2 character. Only the
288 * shortest encoding required to represent the significant UCS-2 bits
289 * is legal.
290 *
291 * UTF-8 Multibyte Codes
292 *
293 * Bytes Bits UCS-2 Min UCS-2 Max UTF-8 Byte Sequence (binary)
294 * -------------------------------------------------------------------
295 * 1 7 0x0000 0x007F 0xxxxxxx
296 * 2 11 0x0080 0x07FF 110xxxxx 10xxxxxx
297 * 3 16 0x0800 0xFFFF 1110xxxx 10xxxxxx 10xxxxxx
298 * -------------------------------------------------------------------
299 */
300
301
302/*
303 * utf_encodestr - Encodes the UCS-2 (Unicode) string at ucsp into a
304 * null terminated UTF-8 string at utf8p.
305 *
306 * ucslen is the number of UCS-2 input characters (not bytes)
307 * bufsize is the size of the output buffer in bytes
308 */
309void
310utf_encodestr( const u_int16_t * ucsp, int ucslen,
311 u_int8_t * utf8p, u_int32_t bufsize, int byte_order )
312{
313u_int8_t *bufend;
314u_int16_t ucs_ch;
315
316bufend = utf8p + bufsize;
317
318while (ucslen-- > 0) {
319 if (byte_order == OSBigEndian)
320 ucs_ch = SWAP_BE16(*ucsp++);
321 else
322 ucs_ch = SWAP_LE16(*ucsp++);
323
324if (ucs_ch < 0x0080) {
325if (utf8p >= bufend)
326break;
327if (ucs_ch == '\0')
328continue;/* skip over embedded NULLs */
329*utf8p++ = ucs_ch;
330
331} else if (ucs_ch < 0x800) {
332if ((utf8p + 1) >= bufend)
333break;
334*utf8p++ = (ucs_ch >> 6) | 0xc0;
335*utf8p++ = (ucs_ch & 0x3f) | 0x80;
336
337} else {
338if ((utf8p + 2) >= bufend)
339break;
340*utf8p++ = (ucs_ch >> 12) | 0xe0;
341*utf8p++ = ((ucs_ch >> 6) & 0x3f) | 0x80;
342*utf8p++ = ((ucs_ch) & 0x3f) | 0x80;
343}
344}
345
346*utf8p = '\0';
347}
348
349
350/*
351 * utf_decodestr - Decodes the null terminated UTF-8 string at
352 * utf8p into a UCS-2 (Unicode) string at ucsp.
353 *
354 * ucslen is the number of UCS-2 output characters (not bytes)
355 * bufsize is the size of the output buffer in bytes
356 */
357void utf_decodestr(const u_int8_t * utf8p, u_int16_t * ucsp, u_int16_t * ucslen, u_int32_t bufsize, int byte_order)
358{
359u_int16_t *bufstart;
360u_int16_t *bufend;
361u_int16_t ucs_ch;
362u_int8_t byte;
363
364bufstart = ucsp;
365bufend = (u_int16_t *)((u_int8_t *)ucsp + bufsize);
366
367while ((byte = *utf8p++) != '\0') {
368if (ucsp >= bufend)
369break;
370
371/* check for ascii */
372if (byte < 0x80) {
373ucs_ch = byte;
374
375 if (byte_order == OSBigEndian)
376 *ucsp++ = SWAP_BE16(ucs_ch);
377 else
378 *ucsp++ = SWAP_LE16(ucs_ch);
379
380continue;
381}
382
383switch (byte & 0xf0) {
384/* 2 byte sequence*/
385case 0xc0:
386case 0xd0:
387/* extract bits 6 - 10 from first byte */
388ucs_ch = (byte & 0x1F) << 6;
389break;
390/* 3 byte sequence*/
391case 0xe0:
392/* extract bits 12 - 15 from first byte */
393ucs_ch = (byte & 0x0F) << 6;
394
395/* extract bits 6 - 11 from second byte */
396if (((byte = *utf8p++) & 0xc0) != 0x80)
397goto stop;
398
399ucs_ch += (byte & 0x3F);
400ucs_ch <<= 6;
401break;
402default:
403goto stop;
404}
405
406/* extract bits 0 - 5 from final byte */
407if (((byte = *utf8p++) & 0xc0) != 0x80)
408goto stop;
409ucs_ch += (byte & 0x3F);
410
411 if (byte_order == OSBigEndian)
412 *ucsp++ = SWAP_BE16(ucs_ch);
413 else
414 *ucsp++ = SWAP_LE16(ucs_ch);
415}
416stop:
417 if (byte_order == OSBigEndian)
418 *ucslen = SWAP_BE16(ucsp - bufstart);
419 else
420 *ucslen = SWAP_LE16(ucsp - bufstart);
421}
422

Archive Download this file

Revision: 2425