branches/azimutz/Chazi/i386/libsaio/hfs_compare.c - Chameleon Svn Source Tree - Chameleon open source boot loader project.

Root/branches/azimutz/Chazi/i386/libsaio/hfs_compare.c

1	/*␊
2	* Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved.␊
3	*␊
4	* @APPLE_LICENSE_HEADER_START@␊
5	* ␊
6	* The contents of this file constitute Original Code as defined in and␊
7	* are subject to the Apple Public Source License Version 2.0 (the␊
8	* "License"). You may not use this file except in compliance with the␊
9	* License. Please obtain a copy of the License at␊
10	* http://www.apple.com/publicsource and read it before using this file.␊
11	* ␊
12	* This Original Code and all software distributed under the License are␊
13	* distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER␊
14	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,␊
15	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,␊
16	* FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the␊
17	* License for the specific language governing rights and limitations␊
18	* under the License.␊
19	* ␊
20	* @APPLE_LICENSE_HEADER_END@␊
21	*/␊
22	/*␊
23	* HFSCompare.c - Functions for working with and comparing HFS nams.␊
24	*␊
25	* Copyright (c) 1999-2000 Apple Computer, Inc.␊
26	*␊
27	* DRI: Josh de Cesare␊
28	*/␊
29	␊
30	//#include <sl.h>␊
31	#include "sl.h" //Azi:???␊
32	#include "hfs_CaseTables.h"␊
33	␊
34	#if ! UNCOMPRESSED␊
35	␊
36	static unsigned short *␊
37	UncompressStructure(struct compressed_block *bp, int count, int size)␊
38	{␊
39	unsigned short *out = malloc(size);␊
40	unsigned short *op = out;␊
41	unsigned short data;␊
42	int i, j;␊
43	␊
44	for (i=0; i<count; i++, bp++) {␊
45	// If this happens (it shouldn't) please fix size and/or double check that count really is␊
46	// the number of elements in the array.␊
47	// This was a very hard bug to find, so please leave this code here.␊
48	if(out + size <= op + bp->count)␊
49	{␊
50	stop("HFS+ Unicode tables are malformed\n");␊
51	}␊
52	data = bp->data;␊
53	for (j=0; j<bp->count; j++) {␊
54	*op++ = data;␊
55	if (bp->type == kTypeAscending) data++;␊
56	else if (bp->type == kTypeAscending256) data += 256;␊
57	}␊
58	}␊
59	return out;␊
60	}␊
61	␊
62	static void␊
63	InitCompareTables(void)␊
64	{␊
65	if (gCompareTable == 0) {␊
66	gCompareTable = UncompressStructure(gCompareTableCompressed,␊
67	kCompareTableNBlocks, kCompareTableDataSize);␊
68	gLowerCaseTable = UncompressStructure(gLowerCaseTableCompressed,␊
69	kLowerCaseTableNBlocks, kLowerCaseTableDataSize);␊
70	}␊
71	}␊
72	␊
73	#endif /* ! UNCOMPRESSED */␊
74	␊
75	//_______________________________________________________________________␊
76	//␊
77	//␉Routine:␉FastRelString␊
78	//␊
79	//␉Output:␉␉returns -1 if str1 < str2␊
80	//␉␉␉␉returns 1 if str1 > str2␊
81	//␉␉␉␉return␉ 0 if equal␊
82	//␊
83	//_______________________________________________________________________␊
84	␊
85	int32_t␉FastRelString(u_int8_t * str1, u_int8_t * str2)␊
86	{␊
87	␉int32_t bestGuess;␊
88	␉u_int8_t length, length2;␊
89	␊
90	#if ! UNCOMPRESED␊
91	InitCompareTables();␊
92	#endif␊
93	␊
94	␉length = *(str1++);␊
95	␉length2 = *(str2++);␊
96	␊
97	␉if (length == length2)␊
98	␉␉bestGuess = 0;␊
99	␉else if (length < length2)␊
100	␉␉bestGuess = -1;␊
101	␉else␊
102	␉{␊
103	␉␉bestGuess = 1;␊
104	␉␉length = length2;␊
105	␉}␊
106	␊
107	␉while (length--)␊
108	␉{␊
109	␉␉u_int32_t aChar, bChar;␊
110	␊
111	␉␉aChar = *(str1++);␊
112	␉␉bChar = *(str2++);␊
113	␉␉␊
114	␉␉if (aChar != bChar)␉/* If they don't match exacly, do case conversion */␊
115	␉␉{␊
116	␉␉␉u_int16_t aSortWord, bSortWord;␊
117	␊
118	␉␉␉aSortWord = gCompareTable[aChar];␊
119	␉␉␉bSortWord = gCompareTable[bChar];␊
120	␊
121	␉␉␉if (aSortWord > bSortWord)␊
122	␉␉␉␉return 1;␊
123	␊
124	␉␉␉if (aSortWord < bSortWord)␊
125	␉␉␉␉return -1;␊
126	␉␉}␊
127	␉␉␊
128	␉␉/*␊
129	␉␉ * If characters match exactly, then go on to next character␊
130	␉␉ * immediately without doing any extra work.␊
131	␉␉ */␊
132	␉}␊
133	␉␊
134	␉/* if you got to here, then return bestGuess */␊
135	␉return bestGuess;␊
136	}␉␊
137	␊
138	␊
139	//␊
140	//␉FastUnicodeCompare - Compare two Unicode strings; produce a relative ordering␊
141	//␊
142	//␉ IF␉␉␉␉RESULT␊
143	//␉--------------------------␊
144	//␉str1 < str2␉␉=>␉-1␊
145	//␉str1 = str2␉␉=>␉ 0␊
146	//␉str1 > str2␉␉=>␉+1␊
147	//␊
148	//␉The lower case table starts with 256 entries (one for each of the upper bytes␊
149	//␉of the original Unicode char). If that entry is zero, then all characters with␊
150	//␉that upper byte are already case folded. If the entry is non-zero, then it is␊
151	//␉the _index_ (not byte offset) of the start of the sub-table for the characters␊
152	//␉with that upper byte. All ignorable characters are folded to the value zero.␊
153	//␊
154	//␉In pseudocode:␊
155	//␊
156	//␉␉Let c = source Unicode character␊
157	//␉␉Let table[] = lower case table␊
158	//␊
159	//␉␉lower = table[highbyte(c)]␊
160	//␉␉if (lower == 0)␊
161	//␉␉␉lower = c␊
162	//␉␉else␊
163	//␉␉␉lower = table[lower+lowbyte(c)]␊
164	//␊
165	//␉␉if (lower == 0)␊
166	//␉␉␉ignore this character␊
167	//␊
168	//␉To handle ignorable characters, we now need a loop to find the next valid character.␊
169	//␉Also, we can't pre-compute the number of characters to compare; the string length might␊
170	//␉be larger than the number of non-ignorable characters. Further, we must be able to handle␊
171	//␉ignorable characters at any point in the string, including as the first or last characters.␊
172	//␉We use a zero value as a sentinel to detect both end-of-string and ignorable characters.␊
173	//␉Since the File Manager doesn't prevent the NUL character (value zero) as part of a filename,␊
174	//␉the case mapping table is assumed to map u+0000 to some non-zero value (like 0xFFFF, which is␊
175	//␉an invalid Unicode character).␊
176	//␊
177	//␉Pseudocode:␊
178	//␊
179	//␉␉while (1) {␊
180	//␉␉␉c1 = GetNextValidChar(str1)␉␉␉//␉returns zero if at end of string␊
181	//␉␉␉c2 = GetNextValidChar(str2)␊
182	//␊
183	//␉␉␉if (c1 != c2) break␉␉␉␉␉//␉found a difference␊
184	//␊
185	//␉␉␉if (c1 == 0)␉␉␉␉␉␉//␉reached end of string on both strings at once?␊
186	//␉␉␉␉return 0;␉␉␉␉␉␉//␉yes, so strings are equal␊
187	//␉␉}␊
188	//␊
189	//␉␉// When we get here, c1 != c2. So, we just need to determine which one is less.␊
190	//␉␉if (c1 < c2)␊
191	//␉␉␉return -1;␊
192	//␉␉else␊
193	//␉␉␉return 1;␊
194	//␊
195	␊
196	int32_t FastUnicodeCompare( u_int16_t * str1, register u_int32_t length1,␊
197	u_int16_t * str2, register u_int32_t length2, int byte_order )␊
198	{␊
199	␉register u_int16_t c1,c2;␊
200	␉register u_int16_t temp;␊
201	␊
202	#if ! UNCOMPRESSED␊
203	InitCompareTables();␊
204	#endif␊
205	␊
206	␉while (1) {␊
207	␉␉/* Set default values for c1, c2 in case there are no more valid chars */␊
208	␉␉c1 = 0;␊
209	␉␉c2 = 0;␊
210	␊
211	␉␉/* Find next non-ignorable char from str1, or zero if no more */␊
212	␉␉while (length1 && c1 == 0) {␊
213	␉␉␉if (byte_order == OSBigEndian)␊
214	␉␉␉␉c1 = SWAP_BE16(*(str1++));␊
215	␉␉␉else␊
216	␉␉␉␉c1 = SWAP_LE16(*(str1++));␊
217	␉␉␉--length1;␊
218	␉␉␉if ((temp = gLowerCaseTable[c1>>8]) != 0)␉␉// is there a subtable for this upper byte?␊
219	␉␉␉␉c1 = gLowerCaseTable[temp + (c1 & 0x00FF)];␉// yes, so fold the char␊
220	␉␉}␊
221	␊
222	␉␉/* Find next non-ignorable char from str2, or zero if no more */␊
223	␉␉while (length2 && c2 == 0) {␊
224	␉␉␉if (byte_order == OSBigEndian)␊
225	␉␉␉␉c2 = SWAP_BE16(*(str2++));␊
226	␉␉␉else␊
227	␉␉␉␉c2 = SWAP_LE16(*(str2++));␊
228	␉␉␉--length2;␊
229	␉␉␉if ((temp = gLowerCaseTable[c2>>8]) != 0)␉␉// is there a subtable for this upper byte?␊
230	␉␉␉␉c2 = gLowerCaseTable[temp + (c2 & 0x00FF)];␉// yes, so fold the char␊
231	␉␉}␊
232	␊
233	␉␉if (c1 != c2)␉/* found a difference, so stop looping */␊
234	␉␉␉break;␊
235	␉␉␊
236	␉␉if (c1 == 0)␉␉/* did we reach the end of both strings at the same time? */␊
237	␉␉␉return 0;␉/* yes, so strings are equal */␊
238	␉}␊
239	␉␊
240	␉if (c1 < c2)␊
241	␉␉return -1;␊
242	␉else␊
243	␉␉return 1;␊
244	}␊
245	␊
246	␊
247	//␊
248	// BinaryUnicodeCompare - Compare two Unicode strings; produce a relative ordering␊
249	// Compared using a 16-bit binary comparison (no case folding)␊
250	//␊
251	int32_t BinaryUnicodeCompare (u_int16_t * str1, u_int32_t length1,␊
252	u_int16_t * str2, u_int32_t length2)␊
253	{␊
254	register u_int16_t c1, c2;␊
255	int32_t bestGuess;␊
256	u_int32_t length;␊
257	␊
258	bestGuess = 0;␊
259	␊
260	if (length1 < length2) {␊
261	length = length1;␊
262	--bestGuess;␊
263	} else if (length1 > length2) {␊
264	length = length2;␊
265	++bestGuess;␊
266	} else {␊
267	length = length1;␊
268	}␊
269	␊
270	while (length--) {␊
271	c1 = *(str1++);␊
272	c2 = *(str2++);␊
273	␊
274	if (c1 > c2)␊
275	return (1);␊
276	if (c1 < c2)␊
277	return (-1);␊
278	}␊
279	␊
280	return (bestGuess);␊
281	}␊
282	␊
283	␊
284	/*␊
285	* UTF-8 (UCS Transformation Format)␊
286	*␊
287	* The following subset of UTF-8 is used to encode UCS-2 filenames. It␊
288	* requires a maximum of three 3 bytes per UCS-2 character. Only the␊
289	* shortest encoding required to represent the significant UCS-2 bits␊
290	* is legal.␊
291	* ␊
292	* UTF-8 Multibyte Codes␊
293	*␊
294	* Bytes Bits UCS-2 Min UCS-2 Max UTF-8 Byte Sequence (binary)␊
295	* -------------------------------------------------------------------␊
296	* 1 7 0x0000 0x007F 0xxxxxxx␊
297	* 2 11 0x0080 0x07FF 110xxxxx 10xxxxxx␊
298	* 3 16 0x0800 0xFFFF 1110xxxx 10xxxxxx 10xxxxxx␊
299	* -------------------------------------------------------------------␊
300	*/␊
301	␊
302	␊
303	/*␊
304	* utf_encodestr - Encodes the UCS-2 (Unicode) string at ucsp into a␊
305	* null terminated UTF-8 string at utf8p.␊
306	*␊
307	* ucslen is the number of UCS-2 input characters (not bytes)␊
308	* bufsize is the size of the output buffer in bytes␊
309	*/␊
310	void␊
311	utf_encodestr( const u_int16_t * ucsp, int ucslen,␊
312	u_int8_t * utf8p, u_int32_t bufsize, int byte_order )␊
313	{␊
314	␉u_int8_t *bufend;␊
315	␉u_int16_t ucs_ch;␊
316	␊
317	␉bufend = utf8p + bufsize;␊
318	␊
319	␉while (ucslen-- > 0) {␊
320	if (byte_order == OSBigEndian)␊
321	␉␉ ucs_ch = SWAP_BE16(*ucsp++);␊
322	else␊
323	ucs_ch = SWAP_LE16(*ucsp++);␊
324	␊
325	␉␉if (ucs_ch < 0x0080) {␊
326	␉␉␉if (utf8p >= bufend)␊
327	␉␉␉␉break;␊
328	␉␉␉if (ucs_ch == '\0')␊
329	␉␉␉␉continue;␉/* skip over embedded NULLs */␊
330	␉␉␉*utf8p++ = ucs_ch;␊
331	␊
332	␉␉} else if (ucs_ch < 0x800) {␊
333	␉␉␉if ((utf8p + 1) >= bufend)␊
334	␉␉␉␉break;␊
335	␉␉␉*utf8p++ = (ucs_ch >> 6) \| 0xc0;␊
336	␉␉␉*utf8p++ = (ucs_ch & 0x3f) \| 0x80;␊
337	␊
338	␉␉} else {␊
339	␉␉␉if ((utf8p + 2) >= bufend)␊
340	␉␉␉␉break;␊
341	␉␉␉*utf8p++ = (ucs_ch >> 12) \| 0xe0;␊
342	␉␉␉*utf8p++ = ((ucs_ch >> 6) & 0x3f) \| 0x80;␊
343	␉␉␉*utf8p++ = ((ucs_ch) & 0x3f) \| 0x80;␊
344	␉␉}␊
345	␉}␊
346	␊
347	␉*utf8p = '\0';␊
348	}␊
349	␊
350	␊
351	/*␊
352	* utf_decodestr - Decodes the null terminated UTF-8 string at␊
353	* utf8p into a UCS-2 (Unicode) string at ucsp.␊
354	*␊
355	* ucslen is the number of UCS-2 output characters (not bytes)␊
356	* bufsize is the size of the output buffer in bytes␊
357	*/␊
358	void utf_decodestr(const u_int8_t * utf8p, u_int16_t * ucsp, u_int16_t * ucslen, u_int32_t bufsize, int byte_order)␊
359	{␊
360	␉u_int16_t *bufstart;␊
361	␉u_int16_t *bufend;␊
362	␉u_int16_t ucs_ch;␊
363	␉u_int8_t byte;␊
364	␊
365	␉bufstart = ucsp;␊
366	␉bufend = (u_int16_t )((u_int8_t )ucsp + bufsize);␊
367	␊
368	␉while ((byte = *utf8p++) != '\0') {␊
369	␉␉if (ucsp >= bufend)␊
370	␉␉␉break;␊
371	␊
372	␉␉/* check for ascii */␊
373	␉␉if (byte < 0x80) {␊
374	␉␉␉ucs_ch = byte;␊
375	␉␉␉␊
376	if (byte_order == OSBigEndian)␊
377	␉␉ *ucsp++ = SWAP_BE16(ucs_ch);␊
378	else␊
379	*ucsp++ = SWAP_LE16(ucs_ch);␊
380	␉␉␉␊
381	␉␉␉continue;␊
382	␉␉}␊
383	␊
384	␉␉switch (byte & 0xf0) {␊
385	␉␉/* 2 byte sequence*/␊
386	␉␉case 0xc0:␊
387	␉␉case 0xd0:␊
388	␉␉␉/* extract bits 6 - 10 from first byte */␊
389	␉␉␉ucs_ch = (byte & 0x1F) << 6; ␊
390	␉␉␉break;␊
391	␉␉/* 3 byte sequence*/␊
392	␉␉case 0xe0:␊
393	␉␉␉/* extract bits 12 - 15 from first byte */␊
394	␉␉␉ucs_ch = (byte & 0x0F) << 6;␊
395	␊
396	␉␉␉/* extract bits 6 - 11 from second byte */␊
397	␉␉␉if (((byte = *utf8p++) & 0xc0) != 0x80)␊
398	␉␉␉␉goto stop;␊
399	␊
400	␉␉␉ucs_ch += (byte & 0x3F);␊
401	␉␉␉ucs_ch <<= 6;␊
402	␉␉␉break;␊
403	␉␉default:␊
404	␉␉␉goto stop;␊
405	␉␉}␊
406	␊
407	␉␉/* extract bits 0 - 5 from final byte */␊
408	␉␉if (((byte = *utf8p++) & 0xc0) != 0x80)␊
409	␉␉␉goto stop;␊
410	␉␉ucs_ch += (byte & 0x3F); ␊
411	␊
412	if (byte_order == OSBigEndian)␊
413	*ucsp++ = SWAP_BE16(ucs_ch);␊
414	else␊
415	*ucsp++ = SWAP_LE16(ucs_ch);␊
416	␉}␊
417	stop:␊
418	if (byte_order == OSBigEndian)␊
419	*ucslen = SWAP_BE16(ucsp - bufstart);␊
420	else␊
421	*ucslen = SWAP_LE16(ucsp - bufstart);␊
422	}␊
423

Download this file