branches/cparm/i386/libsaio/hfs_compare.c - Chameleon Svn Source Tree - Chameleon open source boot loader project.

Root/branches/cparm/i386/libsaio/hfs_compare.c

1	/*␊
2	* Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved.␊
3	*␊
4	* @APPLE_LICENSE_HEADER_START@␊
5	* ␊
6	* The contents of this file constitute Original Code as defined in and␊
7	* are subject to the Apple Public Source License Version 2.0 (the␊
8	* "License"). You may not use this file except in compliance with the␊
9	* License. Please obtain a copy of the License at␊
10	* http://www.apple.com/publicsource and read it before using this file.␊
11	* ␊
12	* This Original Code and all software distributed under the License are␊
13	* distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER␊
14	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,␊
15	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,␊
16	* FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the␊
17	* License for the specific language governing rights and limitations␊
18	* under the License.␊
19	* ␊
20	* @APPLE_LICENSE_HEADER_END@␊
21	*/␊
22	/*␊
23	* HFSCompare.c - Functions for working with and comparing HFS nams.␊
24	*␊
25	* Copyright (c) 1999-2000 Apple Computer, Inc.␊
26	*␊
27	* DRI: Josh de Cesare␊
28	*/␊
29	␊
30	#include "sl.h"␊
31	#include "hfs_CaseTables.h"␊
32	#include "hfs.h"␊
33	␊
34	#if ! UNCOMPRESSED␊
35	␊
36	static unsigned short *␊
37	UncompressStructure(struct compressed_block *bp, int count, int size)␊
38	{␊
39	unsigned short *out = malloc(size);␊
40	␉if (!out) ␊
41	␉{␊
42	␉␉stop("UncompressStructure unable to allocate memory\n");␊
43	␉␉return 0;␊
44	␉}␊
45	unsigned short *op = out;␊
46	unsigned short data;␊
47	int i, j;␊
48	␊
49	for (i=0; i<count; i++, bp++) {␊
50	// If this happens (it shouldn't) please fix size and/or double check that count really is␊
51	// the number of elements in the array.␊
52	// This was a very hard bug to find, so please leave this code here.␊
53	if(out + size <= op + bp->count)␊
54	{␊
55	stop("HFS+ Unicode tables are malformed\n");␊
56	return 0;␊
57	}␊
58	data = bp->data;␊
59	for (j=0; j<bp->count; j++) {␊
60	*op++ = data;␊
61	if (bp->type == kTypeAscending) data++;␊
62	else if (bp->type == kTypeAscending256) data += 256;␊
63	}␊
64	}␊
65	return out;␊
66	}␊
67	␊
68	static void␊
69	InitCompareTables(void)␊
70	{␊
71	if (gCompareTable == 0) {␊
72	gCompareTable = UncompressStructure(gCompareTableCompressed,␊
73	kCompareTableNBlocks, kCompareTableDataSize);␊
74	gLowerCaseTable = UncompressStructure(gLowerCaseTableCompressed,␊
75	kLowerCaseTableNBlocks, kLowerCaseTableDataSize);␊
76	}␊
77	}␊
78	␊
79	#endif /* ! UNCOMPRESSED */␊
80	␊
81	//_______________________________________________________________________␊
82	//␊
83	//␉Routine:␉FastRelString␊
84	//␊
85	//␉Output:␉␉returns -1 if str1 < str2␊
86	//␉␉␉␉returns 1 if str1 > str2␊
87	//␉␉␉␉return␉ 0 if equal␊
88	//␊
89	//_______________________________________________________________________␊
90	␊
91	int32_t␉FastRelString(u_int8_t * str1, u_int8_t * str2)␊
92	{␊
93	␉int32_t bestGuess;␊
94	␉u_int8_t length, length2;␊
95	␊
96	#if ! UNCOMPRESED␊
97	InitCompareTables();␊
98	#endif␊
99	␊
100	␉length = *(str1++);␊
101	␉length2 = *(str2++);␊
102	␊
103	␉if (length == length2)␊
104	␉␉bestGuess = 0;␊
105	␉else if (length < length2)␊
106	␉␉bestGuess = -1;␊
107	␉else␊
108	␉{␊
109	␉␉bestGuess = 1;␊
110	␉␉length = length2;␊
111	␉}␊
112	␊
113	␉while (length--)␊
114	␉{␊
115	␉␉u_int32_t aChar, bChar;␊
116	␊
117	␉␉aChar = *(str1++);␊
118	␉␉bChar = *(str2++);␊
119	␉␉␊
120	␉␉if (aChar != bChar)␉/* If they don't match exacly, do case conversion */␊
121	␉␉{␊
122	␉␉␉u_int16_t aSortWord, bSortWord;␊
123	␊
124	␉␉␉aSortWord = gCompareTable[aChar];␊
125	␉␉␉bSortWord = gCompareTable[bChar];␊
126	␊
127	␉␉␉if (aSortWord > bSortWord)␊
128	␉␉␉␉return 1;␊
129	␊
130	␉␉␉if (aSortWord < bSortWord)␊
131	␉␉␉␉return -1;␊
132	␉␉}␊
133	␉␉␊
134	␉␉/*␊
135	␉␉ * If characters match exactly, then go on to next character␊
136	␉␉ * immediately without doing any extra work.␊
137	␉␉ */␊
138	␉}␊
139	␉␊
140	␉/* if you got to here, then return bestGuess */␊
141	␉return bestGuess;␊
142	}␉␊
143	␊
144	␊
145	//␊
146	//␉FastUnicodeCompare - Compare two Unicode strings; produce a relative ordering␊
147	//␊
148	//␉ IF␉␉␉␉RESULT␊
149	//␉--------------------------␊
150	//␉str1 < str2␉␉=>␉-1␊
151	//␉str1 = str2␉␉=>␉ 0␊
152	//␉str1 > str2␉␉=>␉+1␊
153	//␊
154	//␉The lower case table starts with 256 entries (one for each of the upper bytes␊
155	//␉of the original Unicode char). If that entry is zero, then all characters with␊
156	//␉that upper byte are already case folded. If the entry is non-zero, then it is␊
157	//␉the _index_ (not byte offset) of the start of the sub-table for the characters␊
158	//␉with that upper byte. All ignorable characters are folded to the value zero.␊
159	//␊
160	//␉In pseudocode:␊
161	//␊
162	//␉␉Let c = source Unicode character␊
163	//␉␉Let table[] = lower case table␊
164	//␊
165	//␉␉lower = table[highbyte(c)]␊
166	//␉␉if (lower == 0)␊
167	//␉␉␉lower = c␊
168	//␉␉else␊
169	//␉␉␉lower = table[lower+lowbyte(c)]␊
170	//␊
171	//␉␉if (lower == 0)␊
172	//␉␉␉ignore this character␊
173	//␊
174	//␉To handle ignorable characters, we now need a loop to find the next valid character.␊
175	//␉Also, we can't pre-compute the number of characters to compare; the string length might␊
176	//␉be larger than the number of non-ignorable characters. Further, we must be able to handle␊
177	//␉ignorable characters at any point in the string, including as the first or last characters.␊
178	//␉We use a zero value as a sentinel to detect both end-of-string and ignorable characters.␊
179	//␉Since the File Manager doesn't prevent the NUL character (value zero) as part of a filename,␊
180	//␉the case mapping table is assumed to map u+0000 to some non-zero value (like 0xFFFF, which is␊
181	//␉an invalid Unicode character).␊
182	//␊
183	//␉Pseudocode:␊
184	//␊
185	//␉␉while (1) {␊
186	//␉␉␉c1 = GetNextValidChar(str1)␉␉␉//␉returns zero if at end of string␊
187	//␉␉␉c2 = GetNextValidChar(str2)␊
188	//␊
189	//␉␉␉if (c1 != c2) break␉␉␉␉␉//␉found a difference␊
190	//␊
191	//␉␉␉if (c1 == 0)␉␉␉␉␉␉//␉reached end of string on both strings at once?␊
192	//␉␉␉␉return 0;␉␉␉␉␉␉//␉yes, so strings are equal␊
193	//␉␉}␊
194	//␊
195	//␉␉// When we get here, c1 != c2. So, we just need to determine which one is less.␊
196	//␉␉if (c1 < c2)␊
197	//␉␉␉return -1;␊
198	//␉␉else␊
199	//␉␉␉return 1;␊
200	//␊
201	␊
202	int32_t FastUnicodeCompare( u_int16_t * str1, register u_int32_t length1,␊
203	u_int16_t * str2, register u_int32_t length2, int byte_order )␊
204	{␊
205	␉register u_int16_t c1,c2;␊
206	␉register u_int16_t temp;␊
207	␊
208	#if ! UNCOMPRESSED␊
209	InitCompareTables();␊
210	#endif␊
211	␊
212	␉while (1) {␊
213	␉␉/* Set default values for c1, c2 in case there are no more valid chars */␊
214	␉␉c1 = 0;␊
215	␉␉c2 = 0;␊
216	␊
217	␉␉/* Find next non-ignorable char from str1, or zero if no more */␊
218	␉␉while (length1 && c1 == 0) {␊
219	␉␉␉if (byte_order == OSBigEndian)␊
220	␉␉␉␉c1 = SWAP_BE16(*(str1++));␊
221	␉␉␉else␊
222	␉␉␉␉c1 = SWAP_LE16(*(str1++));␊
223	␉␉␉--length1;␊
224	␉␉␉if ((temp = gLowerCaseTable[c1>>8]) != 0)␉␉// is there a subtable for this upper byte?␊
225	␉␉␉␉c1 = gLowerCaseTable[temp + (c1 & 0x00FF)];␉// yes, so fold the char␊
226	␉␉}␊
227	␊
228	␉␉/* Find next non-ignorable char from str2, or zero if no more */␊
229	␉␉while (length2 && c2 == 0) {␊
230	␉␉␉if (byte_order == OSBigEndian)␊
231	␉␉␉␉c2 = SWAP_BE16(*(str2++));␊
232	␉␉␉else␊
233	␉␉␉␉c2 = SWAP_LE16(*(str2++));␊
234	␉␉␉--length2;␊
235	␉␉␉if ((temp = gLowerCaseTable[c2>>8]) != 0)␉␉// is there a subtable for this upper byte?␊
236	␉␉␉␉c2 = gLowerCaseTable[temp + (c2 & 0x00FF)];␉// yes, so fold the char␊
237	␉␉}␊
238	␊
239	␉␉if (c1 != c2)␉/* found a difference, so stop looping */␊
240	␉␉␉break;␊
241	␉␉␊
242	␉␉if (c1 == 0)␉␉/* did we reach the end of both strings at the same time? */␊
243	␉␉␉return 0;␉/* yes, so strings are equal */␊
244	␉}␊
245	␉␊
246	␉if (c1 < c2)␊
247	␉␉return -1;␊
248	␉else␊
249	␉␉return 1;␊
250	}␊
251	␊
252	␊
253	//␊
254	// BinaryUnicodeCompare - Compare two Unicode strings; produce a relative ordering␊
255	// Compared using a 16-bit binary comparison (no case folding)␊
256	//␊
257	int32_t BinaryUnicodeCompare (u_int16_t * str1, u_int32_t length1,␊
258	u_int16_t * str2, u_int32_t length2)␊
259	{␊
260	register u_int16_t c1, c2;␊
261	int32_t bestGuess;␊
262	u_int32_t length;␊
263	␊
264	bestGuess = 0;␊
265	␊
266	if (length1 < length2) {␊
267	length = length1;␊
268	--bestGuess;␊
269	} else if (length1 > length2) {␊
270	length = length2;␊
271	++bestGuess;␊
272	} else {␊
273	length = length1;␊
274	}␊
275	␊
276	while (length--) {␊
277	c1 = *(str1++);␊
278	c2 = *(str2++);␊
279	␊
280	if (c1 > c2)␊
281	return (1);␊
282	if (c1 < c2)␊
283	return (-1);␊
284	}␊
285	␊
286	return (bestGuess);␊
287	}␊
288	␊
289	␊
290	/*␊
291	* UTF-8 (UCS Transformation Format)␊
292	*␊
293	* The following subset of UTF-8 is used to encode UCS-2 filenames. It␊
294	* requires a maximum of three 3 bytes per UCS-2 character. Only the␊
295	* shortest encoding required to represent the significant UCS-2 bits␊
296	* is legal.␊
297	* ␊
298	* UTF-8 Multibyte Codes␊
299	*␊
300	* Bytes Bits UCS-2 Min UCS-2 Max UTF-8 Byte Sequence (binary)␊
301	* -------------------------------------------------------------------␊
302	* 1 7 0x0000 0x007F 0xxxxxxx␊
303	* 2 11 0x0080 0x07FF 110xxxxx 10xxxxxx␊
304	* 3 16 0x0800 0xFFFF 1110xxxx 10xxxxxx 10xxxxxx␊
305	* -------------------------------------------------------------------␊
306	*/␊
307	␊
308	␊
309	/*␊
310	* utf_encodestr - Encodes the UCS-2 (Unicode) string at ucsp into a␊
311	* null terminated UTF-8 string at utf8p.␊
312	*␊
313	* ucslen is the number of UCS-2 input characters (not bytes)␊
314	* bufsize is the size of the output buffer in bytes␊
315	*/␊
316	void␊
317	utf_encodestr( const u_int16_t * ucsp, int ucslen,␊
318	u_int8_t * utf8p, u_int32_t bufsize, int byte_order )␊
319	{␊
320	␉u_int8_t *bufend;␊
321	␉u_int16_t ucs_ch;␊
322	␊
323	␉bufend = utf8p + bufsize;␊
324	␊
325	␉while (ucslen-- > 0) {␊
326	if (byte_order == OSBigEndian)␊
327	␉␉ ucs_ch = SWAP_BE16(*ucsp++);␊
328	else␊
329	ucs_ch = SWAP_LE16(*ucsp++);␊
330	␊
331	␉␉if (ucs_ch < 0x0080) {␊
332	␉␉␉if (utf8p >= bufend)␊
333	␉␉␉␉break;␊
334	␉␉␉if (ucs_ch == '\0')␊
335	␉␉␉␉continue;␉/* skip over embedded NULLs */␊
336	␉␉␉*utf8p++ = ucs_ch;␊
337	␊
338	␉␉} else if (ucs_ch < 0x800) {␊
339	␉␉␉if ((utf8p + 1) >= bufend)␊
340	␉␉␉␉break;␊
341	␉␉␉*utf8p++ = (ucs_ch >> 6) \| 0xc0;␊
342	␉␉␉*utf8p++ = (ucs_ch & 0x3f) \| 0x80;␊
343	␊
344	␉␉} else {␊
345	␉␉␉if ((utf8p + 2) >= bufend)␊
346	␉␉␉␉break;␊
347	␉␉␉*utf8p++ = (ucs_ch >> 12) \| 0xe0;␊
348	␉␉␉*utf8p++ = ((ucs_ch >> 6) & 0x3f) \| 0x80;␊
349	␉␉␉*utf8p++ = ((ucs_ch) & 0x3f) \| 0x80;␊
350	␉␉}␊
351	␉}␊
352	␊
353	␉*utf8p = '\0';␊
354	}␊
355	␊
356	␊
357	/*␊
358	* utf_decodestr - Decodes the null terminated UTF-8 string at␊
359	* utf8p into a UCS-2 (Unicode) string at ucsp.␊
360	*␊
361	* ucslen is the number of UCS-2 output characters (not bytes)␊
362	* bufsize is the size of the output buffer in bytes␊
363	*/␊
364	void utf_decodestr(const u_int8_t * utf8p, u_int16_t * ucsp, u_int16_t * ucslen, u_int32_t bufsize, int byte_order)␊
365	{␊
366	␉u_int16_t *bufstart;␊
367	␉u_int16_t *bufend;␊
368	␉u_int16_t ucs_ch;␊
369	␉u_int8_t byte;␊
370	␊
371	␉bufstart = ucsp;␊
372	␉bufend = (u_int16_t )((u_int8_t )ucsp + bufsize);␊
373	␊
374	␉while ((byte = *utf8p++) != '\0') {␊
375	␉␉if (ucsp >= bufend)␊
376	␉␉␉break;␊
377	␊
378	␉␉/* check for ascii */␊
379	␉␉if (byte < 0x80) {␊
380	␉␉␉ucs_ch = byte;␊
381	␉␉␉␊
382	if (byte_order == OSBigEndian)␊
383	␉␉ *ucsp++ = SWAP_BE16(ucs_ch);␊
384	else␊
385	*ucsp++ = SWAP_LE16(ucs_ch);␊
386	␉␉␉␊
387	␉␉␉continue;␊
388	␉␉}␊
389	␊
390	␉␉switch (byte & 0xf0) {␊
391	␉␉/* 2 byte sequence*/␊
392	␉␉case 0xc0:␊
393	␉␉case 0xd0:␊
394	␉␉␉/* extract bits 6 - 10 from first byte */␊
395	␉␉␉ucs_ch = (byte & 0x1F) << 6; ␊
396	␉␉␉break;␊
397	␉␉/* 3 byte sequence*/␊
398	␉␉case 0xe0:␊
399	␉␉␉/* extract bits 12 - 15 from first byte */␊
400	␉␉␉ucs_ch = (byte & 0x0F) << 6;␊
401	␊
402	␉␉␉/* extract bits 6 - 11 from second byte */␊
403	␉␉␉if (((byte = *utf8p++) & 0xc0) != 0x80)␊
404	␉␉␉␉goto stop;␊
405	␊
406	␉␉␉ucs_ch += (byte & 0x3F);␊
407	␉␉␉ucs_ch <<= 6;␊
408	␉␉␉break;␊
409	␉␉default:␊
410	␉␉␉goto stop;␊
411	␉␉}␊
412	␊
413	␉␉/* extract bits 0 - 5 from final byte */␊
414	␉␉if (((byte = *utf8p++) & 0xc0) != 0x80)␊
415	␉␉␉goto stop;␊
416	␉␉ucs_ch += (byte & 0x3F); ␊
417	␊
418	if (byte_order == OSBigEndian)␊
419	*ucsp++ = SWAP_BE16(ucs_ch);␊
420	else␊
421	*ucsp++ = SWAP_LE16(ucs_ch);␊
422	␉}␊
423	stop:␊
424	if (byte_order == OSBigEndian)␊
425	*ucslen = SWAP_BE16(ucsp - bufstart);␊
426	else␊
427	*ucslen = SWAP_LE16(ucsp - bufstart);␊
428	}␊
429

Download this file