branches/ErmaC/Trunk/i386/libsaio/hfs_compare.c - Chameleon Svn Source Tree - Chameleon open source boot loader project.

Root/branches/ErmaC/Trunk/i386/libsaio/hfs_compare.c

1	/*␊
2	* Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved.␊
3	*␊
4	* @APPLE_LICENSE_HEADER_START@␊
5	* ␊
6	* The contents of this file constitute Original Code as defined in and␊
7	* are subject to the Apple Public Source License Version 2.0 (the␊
8	* "License"). You may not use this file except in compliance with the␊
9	* License. Please obtain a copy of the License at␊
10	* http://www.apple.com/publicsource and read it before using this file.␊
11	* ␊
12	* This Original Code and all software distributed under the License are␊
13	* distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER␊
14	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,␊
15	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,␊
16	* FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the␊
17	* License for the specific language governing rights and limitations␊
18	* under the License.␊
19	* ␊
20	* @APPLE_LICENSE_HEADER_END@␊
21	*/␊
22	/*␊
23	* HFSCompare.c - Functions for working with and comparing HFS nams.␊
24	*␊
25	* Copyright (c) 1999-2000 Apple Computer, Inc.␊
26	*␊
27	* DRI: Josh de Cesare␊
28	*/␊
29	␊
30	#include <sl.h>␊
31	#include "hfs_CaseTables.h"␊
32	␊
33	#if ! UNCOMPRESSED␊
34	␊
35	static unsigned short * UncompressStructure(struct compressed_block *bp, int count, int size)␊
36	{␊
37	unsigned short *out = malloc(size);␊
38	unsigned short *op = out;␊
39	unsigned short data;␊
40	int i, j;␊
41	␊
42	for (i=0; i<count; i++, bp++) {␊
43	// If this happens (it shouldn't) please fix size and/or double check that count really is␊
44	// the number of elements in the array.␊
45	// This was a very hard bug to find, so please leave this code here.␊
46	if(out + size <= op + bp->count)␊
47	{␊
48	stop("HFS+ Unicode tables are malformed\n");␊
49	}␊
50	data = bp->data;␊
51	for (j=0; j<bp->count; j++) {␊
52	*op++ = data;␊
53	if (bp->type == kTypeAscending) data++;␊
54	else if (bp->type == kTypeAscending256) data += 256;␊
55	}␊
56	}␊
57	return out;␊
58	}␊
59	␊
60	static void InitCompareTables(void)␊
61	{␊
62	if (gCompareTable == 0) {␊
63	gCompareTable = UncompressStructure(gCompareTableCompressed,␊
64	kCompareTableNBlocks, kCompareTableDataSize);␊
65	gLowerCaseTable = UncompressStructure(gLowerCaseTableCompressed,␊
66	kLowerCaseTableNBlocks, kLowerCaseTableDataSize);␊
67	}␊
68	}␊
69	␊
70	#endif /* ! UNCOMPRESSED */␊
71	␊
72	//_______________________________________________________________________␊
73	//␊
74	//␉Routine:␉FastRelString␊
75	//␊
76	//␉Output:␉␉returns -1 if str1 < str2␊
77	//␉␉␉␉returns 1 if str1 > str2␊
78	//␉␉␉␉return␉ 0 if equal␊
79	//␊
80	//_______________________________________________________________________␊
81	␊
82	int32_t␉FastRelString(u_int8_t * str1, u_int8_t * str2)␊
83	{␊
84	␉int32_t bestGuess;␊
85	␉u_int8_t length, length2;␊
86	␊
87	#if ! UNCOMPRESED␊
88	InitCompareTables();␊
89	#endif␊
90	␊
91	␉length = *(str1++);␊
92	␉length2 = *(str2++);␊
93	␊
94	␉if (length == length2)␊
95	␉␉bestGuess = 0;␊
96	␉else if (length < length2)␊
97	␉␉bestGuess = -1;␊
98	␉else␊
99	␉{␊
100	␉␉bestGuess = 1;␊
101	␉␉length = length2;␊
102	␉}␊
103	␊
104	␉while (length--)␊
105	␉{␊
106	␉␉u_int32_t aChar, bChar;␊
107	␊
108	␉␉aChar = *(str1++);␊
109	␉␉bChar = *(str2++);␊
110	␉␉␊
111	␉␉if (aChar != bChar)␉/* If they don't match exacly, do case conversion */␊
112	␉␉{␊
113	␉␉␉u_int16_t aSortWord, bSortWord;␊
114	␊
115	␉␉␉aSortWord = gCompareTable[aChar];␊
116	␉␉␉bSortWord = gCompareTable[bChar];␊
117	␊
118	␉␉␉if (aSortWord > bSortWord)␊
119	␉␉␉␉return 1;␊
120	␊
121	␉␉␉if (aSortWord < bSortWord)␊
122	␉␉␉␉return -1;␊
123	␉␉}␊
124	␉␉␊
125	␉␉/*␊
126	␉␉ * If characters match exactly, then go on to next character␊
127	␉␉ * immediately without doing any extra work.␊
128	␉␉ */␊
129	␉}␊
130	␉␊
131	␉/* if you got to here, then return bestGuess */␊
132	␉return bestGuess;␊
133	}␉␊
134	␊
135	␊
136	//␊
137	//␉FastUnicodeCompare - Compare two Unicode strings; produce a relative ordering␊
138	//␊
139	//␉ IF␉␉␉␉RESULT␊
140	//␉--------------------------␊
141	//␉str1 < str2␉␉=>␉-1␊
142	//␉str1 = str2␉␉=>␉ 0␊
143	//␉str1 > str2␉␉=>␉+1␊
144	//␊
145	//␉The lower case table starts with 256 entries (one for each of the upper bytes␊
146	//␉of the original Unicode char). If that entry is zero, then all characters with␊
147	//␉that upper byte are already case folded. If the entry is non-zero, then it is␊
148	//␉the _index_ (not byte offset) of the start of the sub-table for the characters␊
149	//␉with that upper byte. All ignorable characters are folded to the value zero.␊
150	//␊
151	//␉In pseudocode:␊
152	//␊
153	//␉␉Let c = source Unicode character␊
154	//␉␉Let table[] = lower case table␊
155	//␊
156	//␉␉lower = table[highbyte(c)]␊
157	//␉␉if (lower == 0)␊
158	//␉␉␉lower = c␊
159	//␉␉else␊
160	//␉␉␉lower = table[lower+lowbyte(c)]␊
161	//␊
162	//␉␉if (lower == 0)␊
163	//␉␉␉ignore this character␊
164	//␊
165	//␉To handle ignorable characters, we now need a loop to find the next valid character.␊
166	//␉Also, we can't pre-compute the number of characters to compare; the string length might␊
167	//␉be larger than the number of non-ignorable characters. Further, we must be able to handle␊
168	//␉ignorable characters at any point in the string, including as the first or last characters.␊
169	//␉We use a zero value as a sentinel to detect both end-of-string and ignorable characters.␊
170	//␉Since the File Manager doesn't prevent the NUL character (value zero) as part of a filename,␊
171	//␉the case mapping table is assumed to map u+0000 to some non-zero value (like 0xFFFF, which is␊
172	//␉an invalid Unicode character).␊
173	//␊
174	//␉Pseudocode:␊
175	//␊
176	//␉␉while (1) {␊
177	//␉␉␉c1 = GetNextValidChar(str1)␉␉␉//␉returns zero if at end of string␊
178	//␉␉␉c2 = GetNextValidChar(str2)␊
179	//␊
180	//␉␉␉if (c1 != c2) break␉␉␉␉␉//␉found a difference␊
181	//␊
182	//␉␉␉if (c1 == 0)␉␉␉␉␉␉//␉reached end of string on both strings at once?␊
183	//␉␉␉␉return 0;␉␉␉␉␉␉//␉yes, so strings are equal␊
184	//␉␉}␊
185	//␊
186	//␉␉// When we get here, c1 != c2. So, we just need to determine which one is less.␊
187	//␉␉if (c1 < c2)␊
188	//␉␉␉return -1;␊
189	//␉␉else␊
190	//␉␉␉return 1;␊
191	//␊
192	␊
193	int32_t FastUnicodeCompare( u_int16_t * str1, register u_int32_t length1,␊
194	u_int16_t * str2, register u_int32_t length2, int byte_order )␊
195	{␊
196	␉register u_int16_t c1,c2;␊
197	␉register u_int16_t temp;␊
198	␊
199	#if ! UNCOMPRESSED␊
200	InitCompareTables();␊
201	#endif␊
202	␊
203	␉while (1) {␊
204	␉␉/* Set default values for c1, c2 in case there are no more valid chars */␊
205	␉␉c1 = 0;␊
206	␉␉c2 = 0;␊
207	␊
208	␉␉/* Find next non-ignorable char from str1, or zero if no more */␊
209	␉␉while (length1 && c1 == 0) {␊
210	␉␉␉if (byte_order == OSBigEndian)␊
211	␉␉␉␉c1 = SWAP_BE16(*(str1++));␊
212	␉␉␉else␊
213	␉␉␉␉c1 = SWAP_LE16(*(str1++));␊
214	␉␉␉--length1;␊
215	␉␉␉if ((temp = gLowerCaseTable[c1>>8]) != 0)␉␉// is there a subtable for this upper byte?␊
216	␉␉␉␉c1 = gLowerCaseTable[temp + (c1 & 0x00FF)];␉// yes, so fold the char␊
217	␉␉}␊
218	␊
219	␉␉/* Find next non-ignorable char from str2, or zero if no more */␊
220	␉␉while (length2 && c2 == 0) {␊
221	␉␉␉if (byte_order == OSBigEndian)␊
222	␉␉␉␉c2 = SWAP_BE16(*(str2++));␊
223	␉␉␉else␊
224	␉␉␉␉c2 = SWAP_LE16(*(str2++));␊
225	␉␉␉--length2;␊
226	␉␉␉if ((temp = gLowerCaseTable[c2>>8]) != 0)␉␉// is there a subtable for this upper byte?␊
227	␉␉␉␉c2 = gLowerCaseTable[temp + (c2 & 0x00FF)];␉// yes, so fold the char␊
228	␉␉}␊
229	␊
230	␉␉if (c1 != c2)␉/* found a difference, so stop looping */␊
231	␉␉␉break;␊
232	␉␉␊
233	␉␉if (c1 == 0)␉␉/* did we reach the end of both strings at the same time? */␊
234	␉␉␉return 0;␉/* yes, so strings are equal */␊
235	␉}␊
236	␉␊
237	␉if (c1 < c2)␊
238	␉␉return -1;␊
239	␉else␊
240	␉␉return 1;␊
241	}␊
242	␊
243	␊
244	//␊
245	// BinaryUnicodeCompare - Compare two Unicode strings; produce a relative ordering␊
246	// Compared using a 16-bit binary comparison (no case folding)␊
247	//␊
248	int32_t BinaryUnicodeCompare (u_int16_t * str1, u_int32_t length1,␊
249	u_int16_t * str2, u_int32_t length2)␊
250	{␊
251	register u_int16_t c1, c2;␊
252	int32_t bestGuess;␊
253	u_int32_t length;␊
254	␊
255	bestGuess = 0;␊
256	␊
257	if (length1 < length2) {␊
258	length = length1;␊
259	--bestGuess;␊
260	} else if (length1 > length2) {␊
261	length = length2;␊
262	++bestGuess;␊
263	} else {␊
264	length = length1;␊
265	}␊
266	␊
267	while (length--) {␊
268	c1 = *(str1++);␊
269	c2 = *(str2++);␊
270	␊
271	if (c1 > c2)␊
272	return (1);␊
273	if (c1 < c2)␊
274	return (-1);␊
275	}␊
276	␊
277	return (bestGuess);␊
278	}␊
279	␊
280	␊
281	/*␊
282	* UTF-8 (UCS Transformation Format)␊
283	*␊
284	* The following subset of UTF-8 is used to encode UCS-2 filenames. It␊
285	* requires a maximum of three 3 bytes per UCS-2 character. Only the␊
286	* shortest encoding required to represent the significant UCS-2 bits␊
287	* is legal.␊
288	* ␊
289	* UTF-8 Multibyte Codes␊
290	*␊
291	* Bytes Bits UCS-2 Min UCS-2 Max UTF-8 Byte Sequence (binary)␊
292	* -------------------------------------------------------------------␊
293	* 1 7 0x0000 0x007F 0xxxxxxx␊
294	* 2 11 0x0080 0x07FF 110xxxxx 10xxxxxx␊
295	* 3 16 0x0800 0xFFFF 1110xxxx 10xxxxxx 10xxxxxx␊
296	* -------------------------------------------------------------------␊
297	*/␊
298	␊
299	␊
300	/*␊
301	* utf_encodestr - Encodes the UCS-2 (Unicode) string at ucsp into a␊
302	* null terminated UTF-8 string at utf8p.␊
303	*␊
304	* ucslen is the number of UCS-2 input characters (not bytes)␊
305	* bufsize is the size of the output buffer in bytes␊
306	*/␊
307	void utf_encodestr( const u_int16_t * ucsp, int ucslen,␊
308	u_int8_t * utf8p, u_int32_t bufsize, int byte_order )␊
309	{␊
310	␉u_int8_t *bufend;␊
311	␉u_int16_t ucs_ch;␊
312	␊
313	␉bufend = utf8p + bufsize;␊
314	␊
315	␉while (ucslen-- > 0) {␊
316	if (byte_order == OSBigEndian)␊
317	␉␉ ucs_ch = SWAP_BE16(*ucsp++);␊
318	else␊
319	ucs_ch = SWAP_LE16(*ucsp++);␊
320	␊
321	␉␉if (ucs_ch < 0x0080) {␊
322	␉␉␉if (utf8p >= bufend)␊
323	␉␉␉␉break;␊
324	␉␉␉if (ucs_ch == '\0')␊
325	␉␉␉␉continue;␉/* skip over embedded NULLs */␊
326	␉␉␉*utf8p++ = ucs_ch;␊
327	␊
328	␉␉} else if (ucs_ch < 0x800) {␊
329	␉␉␉if ((utf8p + 1) >= bufend)␊
330	␉␉␉␉break;␊
331	␉␉␉*utf8p++ = (ucs_ch >> 6) \| 0xc0;␊
332	␉␉␉*utf8p++ = (ucs_ch & 0x3f) \| 0x80;␊
333	␊
334	␉␉} else {␊
335	␉␉␉if ((utf8p + 2) >= bufend)␊
336	␉␉␉␉break;␊
337	␉␉␉*utf8p++ = (ucs_ch >> 12) \| 0xe0;␊
338	␉␉␉*utf8p++ = ((ucs_ch >> 6) & 0x3f) \| 0x80;␊
339	␉␉␉*utf8p++ = ((ucs_ch) & 0x3f) \| 0x80;␊
340	␉␉}␊
341	␉}␊
342	␊
343	␉*utf8p = '\0';␊
344	}␊
345	␊
346	␊
347	/*␊
348	* utf_decodestr - Decodes the null terminated UTF-8 string at␊
349	* utf8p into a UCS-2 (Unicode) string at ucsp.␊
350	*␊
351	* ucslen is the number of UCS-2 output characters (not bytes)␊
352	* bufsize is the size of the output buffer in bytes␊
353	*/␊
354	void utf_decodestr(const u_int8_t * utf8p, u_int16_t * ucsp, u_int16_t * ucslen, u_int32_t bufsize, int byte_order)␊
355	{␊
356	␉u_int16_t *bufstart;␊
357	␉u_int16_t *bufend;␊
358	␉u_int16_t ucs_ch;␊
359	␉u_int8_t byte;␊
360	␊
361	␉bufstart = ucsp;␊
362	␉bufend = (u_int16_t )((u_int8_t )ucsp + bufsize);␊
363	␊
364	␉while ((byte = *utf8p++) != '\0') {␊
365	␉␉if (ucsp >= bufend)␊
366	␉␉␉break;␊
367	␊
368	␉␉/* check for ascii */␊
369	␉␉if (byte < 0x80) {␊
370	␉␉␉ucs_ch = byte;␊
371	␉␉␉␊
372	if (byte_order == OSBigEndian)␊
373	␉␉ *ucsp++ = SWAP_BE16(ucs_ch);␊
374	else␊
375	*ucsp++ = SWAP_LE16(ucs_ch);␊
376	␉␉␉␊
377	␉␉␉continue;␊
378	␉␉}␊
379	␊
380	␉␉switch (byte & 0xf0) {␊
381	␉␉/* 2 byte sequence*/␊
382	␉␉case 0xc0:␊
383	␉␉case 0xd0:␊
384	␉␉␉/* extract bits 6 - 10 from first byte */␊
385	␉␉␉ucs_ch = (byte & 0x1F) << 6; ␊
386	␉␉␉break;␊
387	␉␉/* 3 byte sequence*/␊
388	␉␉case 0xe0:␊
389	␉␉␉/* extract bits 12 - 15 from first byte */␊
390	␉␉␉ucs_ch = (byte & 0x0F) << 6;␊
391	␊
392	␉␉␉/* extract bits 6 - 11 from second byte */␊
393	␉␉␉if (((byte = *utf8p++) & 0xc0) != 0x80)␊
394	␉␉␉␉goto stop;␊
395	␊
396	␉␉␉ucs_ch += (byte & 0x3F);␊
397	␉␉␉ucs_ch <<= 6;␊
398	␉␉␉break;␊
399	␉␉default:␊
400	␉␉␉goto stop;␊
401	␉␉}␊
402	␊
403	␉␉/* extract bits 0 - 5 from final byte */␊
404	␉␉if (((byte = *utf8p++) & 0xc0) != 0x80)␊
405	␉␉␉goto stop;␊
406	␉␉ucs_ch += (byte & 0x3F); ␊
407	␊
408	if (byte_order == OSBigEndian)␊
409	*ucsp++ = SWAP_BE16(ucs_ch);␊
410	else␊
411	*ucsp++ = SWAP_LE16(ucs_ch);␊
412	␉}␊
413	stop:␊
414	if (byte_order == OSBigEndian)␊
415	*ucslen = SWAP_BE16(ucsp - bufstart);␊
416	else␊
417	*ucslen = SWAP_LE16(ucsp - bufstart);␊
418	}␊
419

Download this file