trunk/i386/libsaio/hfs_compare.c - Chameleon Svn Source Tree - Chameleon open source boot loader project.

Root/trunk/i386/libsaio/hfs_compare.c

1	/*␊
2	* Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved.␊
3	*␊
4	* @APPLE_LICENSE_HEADER_START@␊
5	* ␊
6	* The contents of this file constitute Original Code as defined in and␊
7	* are subject to the Apple Public Source License Version 2.0 (the␊
8	* "License"). You may not use this file except in compliance with the␊
9	* License. Please obtain a copy of the License at␊
10	* http://www.apple.com/publicsource and read it before using this file.␊
11	* ␊
12	* This Original Code and all software distributed under the License are␊
13	* distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER␊
14	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,␊
15	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,␊
16	* FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the␊
17	* License for the specific language governing rights and limitations␊
18	* under the License.␊
19	* ␊
20	* @APPLE_LICENSE_HEADER_END@␊
21	*/␊
22	/*␊
23	* HFSCompare.c - Functions for working with and comparing HFS nams.␊
24	*␊
25	* Copyright (c) 1999-2000 Apple Computer, Inc.␊
26	*␊
27	* DRI: Josh de Cesare␊
28	*/␊
29	␊
30	#include <sl.h>␊
31	#include "hfs_CaseTables.h"␊
32	␊
33	#if ! UNCOMPRESSED␊
34	␊
35	static unsigned short *␊
36	UncompressStructure(struct compressed_block *bp, int count, int size)␊
37	{␊
38	unsigned short *out = malloc(size);␊
39	unsigned short *op = out;␊
40	unsigned short data;␊
41	int i, j;␊
42	␊
43	for (i=0; i<count; i++, bp++) {␊
44	// If this happens (it shouldn't) please fix size and/or double check that count really is␊
45	// the number of elements in the array.␊
46	// This was a very hard bug to find, so please leave this code here.␊
47	if(out + size <= op + bp->count)␊
48	{␊
49	stop("HFS+ Unicode tables are malformed\n");␊
50	}␊
51	data = bp->data;␊
52	for (j=0; j<bp->count; j++) {␊
53	*op++ = data;␊
54	if (bp->type == kTypeAscending) data++;␊
55	else if (bp->type == kTypeAscending256) data += 256;␊
56	}␊
57	}␊
58	return out;␊
59	}␊
60	␊
61	static void␊
62	InitCompareTables(void)␊
63	{␊
64	if (gCompareTable == 0) {␊
65	gCompareTable = UncompressStructure(gCompareTableCompressed,␊
66	kCompareTableNBlocks, kCompareTableDataSize);␊
67	gLowerCaseTable = UncompressStructure(gLowerCaseTableCompressed,␊
68	kLowerCaseTableNBlocks, kLowerCaseTableDataSize);␊
69	}␊
70	}␊
71	␊
72	#endif /* ! UNCOMPRESSED */␊
73	␊
74	//_______________________________________________________________________␊
75	//␊
76	//␉Routine:␉FastRelString␊
77	//␊
78	//␉Output:␉␉returns -1 if str1 < str2␊
79	//␉␉␉␉returns 1 if str1 > str2␊
80	//␉␉␉␉return␉ 0 if equal␊
81	//␊
82	//_______________________________________________________________________␊
83	␊
84	int32_t␉FastRelString(u_int8_t * str1, u_int8_t * str2)␊
85	{␊
86	␉int32_t bestGuess;␊
87	␉u_int8_t length, length2;␊
88	␊
89	#if ! UNCOMPRESED␊
90	InitCompareTables();␊
91	#endif␊
92	␊
93	␉length = *(str1++);␊
94	␉length2 = *(str2++);␊
95	␊
96	␉if (length == length2)␊
97	␉␉bestGuess = 0;␊
98	␉else if (length < length2)␊
99	␉␉bestGuess = -1;␊
100	␉else␊
101	␉{␊
102	␉␉bestGuess = 1;␊
103	␉␉length = length2;␊
104	␉}␊
105	␊
106	␉while (length--)␊
107	␉{␊
108	␉␉u_int32_t aChar, bChar;␊
109	␊
110	␉␉aChar = *(str1++);␊
111	␉␉bChar = *(str2++);␊
112	␉␉␊
113	␉␉if (aChar != bChar)␉/* If they don't match exacly, do case conversion */␊
114	␉␉{␊
115	␉␉␉u_int16_t aSortWord, bSortWord;␊
116	␊
117	␉␉␉aSortWord = gCompareTable[aChar];␊
118	␉␉␉bSortWord = gCompareTable[bChar];␊
119	␊
120	␉␉␉if (aSortWord > bSortWord)␊
121	␉␉␉␉return 1;␊
122	␊
123	␉␉␉if (aSortWord < bSortWord)␊
124	␉␉␉␉return -1;␊
125	␉␉}␊
126	␊
127	␉␉/*␊
128	␉␉ * If characters match exactly, then go on to next character␊
129	␉␉ * immediately without doing any extra work.␊
130	␉␉ */␊
131	␉}␊
132	␉␊
133	␉/* If you got to here, then return bestGuess */␊
134	␉return bestGuess;␊
135	}␉␊
136	␊
137	␊
138	//␊
139	//␉FastUnicodeCompare - Compare two Unicode strings; produce a relative ordering␊
140	//␊
141	//␉ IF␉␉␉␉RESULT␊
142	//␉--------------------------␊
143	//␉str1 < str2␉␉=>␉-1␊
144	//␉str1 = str2␉␉=>␉ 0␊
145	//␉str1 > str2␉␉=>␉+1␊
146	//␊
147	//␉The lower case table starts with 256 entries (one for each of the upper bytes␊
148	//␉of the original Unicode char). If that entry is zero, then all characters with␊
149	//␉that upper byte are already case folded. If the entry is non-zero, then it is␊
150	//␉the _index_ (not byte offset) of the start of the sub-table for the characters␊
151	//␉with that upper byte. All ignorable characters are folded to the value zero.␊
152	//␊
153	//␉In pseudocode:␊
154	//␊
155	//␉␉Let c = source Unicode character␊
156	//␉␉Let table[] = lower case table␊
157	//␊
158	//␉␉lower = table[highbyte(c)]␊
159	//␉␉if (lower == 0)␊
160	//␉␉␉lower = c␊
161	//␉␉else␊
162	//␉␉␉lower = table[lower+lowbyte(c)]␊
163	//␊
164	//␉␉if (lower == 0)␊
165	//␉␉␉ignore this character␊
166	//␊
167	//␉To handle ignorable characters, we now need a loop to find the next valid character.␊
168	//␉Also, we can't pre-compute the number of characters to compare; the string length might␊
169	//␉be larger than the number of non-ignorable characters. Further, we must be able to handle␊
170	//␉ignorable characters at any point in the string, including as the first or last characters.␊
171	//␉We use a zero value as a sentinel to detect both end-of-string and ignorable characters.␊
172	//␉Since the File Manager doesn't prevent the NUL character (value zero) as part of a filename,␊
173	//␉the case mapping table is assumed to map u+0000 to some non-zero value (like 0xFFFF, which is␊
174	//␉an invalid Unicode character).␊
175	//␊
176	//␉Pseudocode:␊
177	//␊
178	//␉␉while (1) {␊
179	//␉␉␉c1 = GetNextValidChar(str1)␉␉␉//␉returns zero if at end of string␊
180	//␉␉␉c2 = GetNextValidChar(str2)␊
181	//␊
182	//␉␉␉if (c1 != c2) break␉␉␉␉␉//␉found a difference␊
183	//␊
184	//␉␉␉if (c1 == 0)␉␉␉␉␉␉//␉reached end of string on both strings at once?␊
185	//␉␉␉␉return 0;␉␉␉␉␉␉//␉yes, so strings are equal␊
186	//␉␉}␊
187	//␊
188	//␉␉// When we get here, c1 != c2. So, we just need to determine which one is less.␊
189	//␉␉if (c1 < c2)␊
190	//␉␉␉return -1;␊
191	//␉␉else␊
192	//␉␉␉return 1;␊
193	//␊
194	␊
195	int32_t FastUnicodeCompare( u_int16_t * str1, register u_int32_t length1,␊
196	u_int16_t * str2, register u_int32_t length2, int byte_order )␊
197	{␊
198	␉register u_int16_t c1,c2;␊
199	␉register u_int16_t temp;␊
200	␊
201	#if ! UNCOMPRESSED␊
202	InitCompareTables();␊
203	#endif␊
204	␊
205	␉while (1) {␊
206	␉␉/* Set default values for c1, c2 in case there are no more valid chars */␊
207	␉␉c1 = 0;␊
208	␉␉c2 = 0;␊
209	␊
210	␉␉/* Find next non-ignorable char from str1, or zero if no more */␊
211	␉␉while (length1 && c1 == 0) {␊
212	␉␉␉if (byte_order == OSBigEndian)␊
213	␉␉␉␉c1 = SWAP_BE16(*(str1++));␊
214	␉␉␉else␊
215	␉␉␉␉c1 = SWAP_LE16(*(str1++));␊
216	␉␉␉--length1;␊
217	␉␉␉if ((temp = gLowerCaseTable[c1>>8]) != 0)␉␉// is there a subtable for this upper byte?␊
218	␉␉␉␉c1 = gLowerCaseTable[temp + (c1 & 0x00FF)];␉// yes, so fold the char␊
219	␉␉}␊
220	␊
221	␉␉/* Find next non-ignorable char from str2, or zero if no more */␊
222	␉␉while (length2 && c2 == 0) {␊
223	␉␉␉if (byte_order == OSBigEndian)␊
224	␉␉␉␉c2 = SWAP_BE16(*(str2++));␊
225	␉␉␉else␊
226	␉␉␉␉c2 = SWAP_LE16(*(str2++));␊
227	␉␉␉--length2;␊
228	␉␉␉if ((temp = gLowerCaseTable[c2>>8]) != 0)␉␉// is there a subtable for this upper byte?␊
229	␉␉␉␉c2 = gLowerCaseTable[temp + (c2 & 0x00FF)];␉// yes, so fold the char␊
230	␉␉}␊
231	␊
232	␉␉if (c1 != c2)␉/* found a difference, so stop looping */␊
233	␉␉␉break;␊
234	␉␉␊
235	␉␉if (c1 == 0)␉␉/* did we reach the end of both strings at the same time? */␊
236	␉␉␉return 0;␉/* yes, so strings are equal */␊
237	␉}␊
238	␉␊
239	␉if (c1 < c2)␊
240	␉␉return -1;␊
241	␉else␊
242	␉␉return 1;␊
243	}␊
244	␊
245	␊
246	//␊
247	// BinaryUnicodeCompare - Compare two Unicode strings; produce a relative ordering␊
248	// Compared using a 16-bit binary comparison (no case folding)␊
249	//␊
250	int32_t BinaryUnicodeCompare (u_int16_t * str1, u_int32_t length1,␊
251	u_int16_t * str2, u_int32_t length2)␊
252	{␊
253	register u_int16_t c1, c2;␊
254	int32_t bestGuess;␊
255	u_int32_t length;␊
256	␊
257	bestGuess = 0;␊
258	␊
259	if (length1 < length2) {␊
260	length = length1;␊
261	--bestGuess;␊
262	} else if (length1 > length2) {␊
263	length = length2;␊
264	++bestGuess;␊
265	} else {␊
266	length = length1;␊
267	}␊
268	␊
269	while (length--) {␊
270	c1 = *(str1++);␊
271	c2 = *(str2++);␊
272	␊
273	if (c1 > c2)␊
274	return (1);␊
275	if (c1 < c2)␊
276	return (-1);␊
277	}␊
278	␊
279	return (bestGuess);␊
280	}␊
281	␊
282	␊
283	/*␊
284	* UTF-8 (UCS Transformation Format)␊
285	*␊
286	* The following subset of UTF-8 is used to encode UCS-2 filenames. It␊
287	* requires a maximum of three 3 bytes per UCS-2 character. Only the␊
288	* shortest encoding required to represent the significant UCS-2 bits␊
289	* is legal.␊
290	* ␊
291	* UTF-8 Multibyte Codes␊
292	*␊
293	* Bytes Bits UCS-2 Min UCS-2 Max UTF-8 Byte Sequence (binary)␊
294	* -------------------------------------------------------------------␊
295	* 1 7 0x0000 0x007F 0xxxxxxx␊
296	* 2 11 0x0080 0x07FF 110xxxxx 10xxxxxx␊
297	* 3 16 0x0800 0xFFFF 1110xxxx 10xxxxxx 10xxxxxx␊
298	* -------------------------------------------------------------------␊
299	*/␊
300	␊
301	␊
302	/*␊
303	* utf_encodestr - Encodes the UCS-2 (Unicode) string at ucsp into a␊
304	* null terminated UTF-8 string at utf8p.␊
305	*␊
306	* ucslen is the number of UCS-2 input characters (not bytes)␊
307	* bufsize is the size of the output buffer in bytes␊
308	*/␊
309	void␊
310	utf_encodestr( const u_int16_t * ucsp, int ucslen,␊
311	u_int8_t * utf8p, u_int32_t bufsize, int byte_order )␊
312	{␊
313	␉u_int8_t *bufend;␊
314	␉u_int16_t ucs_ch;␊
315	␊
316	␉bufend = utf8p + bufsize;␊
317	␊
318	␉while (ucslen-- > 0) {␊
319	if (byte_order == OSBigEndian)␊
320	␉␉ ucs_ch = SWAP_BE16(*ucsp++);␊
321	else␊
322	ucs_ch = SWAP_LE16(*ucsp++);␊
323	␊
324	␉␉if (ucs_ch < 0x0080) {␊
325	␉␉␉if (utf8p >= bufend)␊
326	␉␉␉␉break;␊
327	␉␉␉if (ucs_ch == '\0')␊
328	␉␉␉␉continue;␉/* skip over embedded NULLs */␊
329	␉␉␉*utf8p++ = ucs_ch;␊
330	␊
331	␉␉} else if (ucs_ch < 0x800) {␊
332	␉␉␉if ((utf8p + 1) >= bufend)␊
333	␉␉␉␉break;␊
334	␉␉␉*utf8p++ = (ucs_ch >> 6) \| 0xc0;␊
335	␉␉␉*utf8p++ = (ucs_ch & 0x3f) \| 0x80;␊
336	␊
337	␉␉} else {␊
338	␉␉␉if ((utf8p + 2) >= bufend)␊
339	␉␉␉␉break;␊
340	␉␉␉*utf8p++ = (ucs_ch >> 12) \| 0xe0;␊
341	␉␉␉*utf8p++ = ((ucs_ch >> 6) & 0x3f) \| 0x80;␊
342	␉␉␉*utf8p++ = ((ucs_ch) & 0x3f) \| 0x80;␊
343	␉␉}␊
344	␉}␊
345	␊
346	␉*utf8p = '\0';␊
347	}␊
348	␊
349	␊
350	/*␊
351	* utf_decodestr - Decodes the null terminated UTF-8 string at␊
352	* utf8p into a UCS-2 (Unicode) string at ucsp.␊
353	*␊
354	* ucslen is the number of UCS-2 output characters (not bytes)␊
355	* bufsize is the size of the output buffer in bytes␊
356	*/␊
357	void utf_decodestr(const u_int8_t * utf8p, u_int16_t * ucsp, u_int16_t * ucslen, u_int32_t bufsize, int byte_order)␊
358	{␊
359	␉u_int16_t *bufstart;␊
360	␉u_int16_t *bufend;␊
361	␉u_int16_t ucs_ch;␊
362	␉u_int8_t byte;␊
363	␊
364	␉bufstart = ucsp;␊
365	␉bufend = (u_int16_t )((u_int8_t )ucsp + bufsize);␊
366	␊
367	␉while ((byte = *utf8p++) != '\0') {␊
368	␉␉if (ucsp >= bufend)␊
369	␉␉␉break;␊
370	␊
371	␉␉/* check for ascii */␊
372	␉␉if (byte < 0x80) {␊
373	␉␉␉ucs_ch = byte;␊
374	␉␉␉␊
375	if (byte_order == OSBigEndian)␊
376	␉␉ *ucsp++ = SWAP_BE16(ucs_ch);␊
377	else␊
378	*ucsp++ = SWAP_LE16(ucs_ch);␊
379	␉␉␉␊
380	␉␉␉continue;␊
381	␉␉}␊
382	␊
383	␉␉switch (byte & 0xf0) {␊
384	␉␉/* 2 byte sequence*/␊
385	␉␉case 0xc0:␊
386	␉␉case 0xd0:␊
387	␉␉␉/* extract bits 6 - 10 from first byte */␊
388	␉␉␉ucs_ch = (byte & 0x1F) << 6; ␊
389	␉␉␉break;␊
390	␉␉/* 3 byte sequence*/␊
391	␉␉case 0xe0:␊
392	␉␉␉/* extract bits 12 - 15 from first byte */␊
393	␉␉␉ucs_ch = (byte & 0x0F) << 6;␊
394	␊
395	␉␉␉/* extract bits 6 - 11 from second byte */␊
396	␉␉␉if (((byte = *utf8p++) & 0xc0) != 0x80)␊
397	␉␉␉␉goto stop;␊
398	␊
399	␉␉␉ucs_ch += (byte & 0x3F);␊
400	␉␉␉ucs_ch <<= 6;␊
401	␉␉␉break;␊
402	␉␉default:␊
403	␉␉␉goto stop;␊
404	␉␉}␊
405	␊
406	␉␉/* extract bits 0 - 5 from final byte */␊
407	␉␉if (((byte = *utf8p++) & 0xc0) != 0x80)␊
408	␉␉␉goto stop;␊
409	␉␉ucs_ch += (byte & 0x3F); ␊
410	␊
411	if (byte_order == OSBigEndian)␊
412	*ucsp++ = SWAP_BE16(ucs_ch);␊
413	else␊
414	*ucsp++ = SWAP_LE16(ucs_ch);␊
415	␉}␊
416	stop:␊
417	if (byte_order == OSBigEndian)␊
418	*ucslen = SWAP_BE16(ucsp - bufstart);␊
419	else␊
420	*ucslen = SWAP_LE16(ucsp - bufstart);␊
421	}␊
422

Download this file