branches/cparm/i386/libsaio/hfs_compare.c - Chameleon Svn Source Tree - Chameleon open source boot loader project.

Root/branches/cparm/i386/libsaio/hfs_compare.c

1	/*␊
2	* Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved.␊
3	*␊
4	* @APPLE_LICENSE_HEADER_START@␊
5	* ␊
6	* The contents of this file constitute Original Code as defined in and␊
7	* are subject to the Apple Public Source License Version 2.0 (the␊
8	* "License"). You may not use this file except in compliance with the␊
9	* License. Please obtain a copy of the License at␊
10	* http://www.apple.com/publicsource and read it before using this file.␊
11	* ␊
12	* This Original Code and all software distributed under the License are␊
13	* distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER␊
14	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,␊
15	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,␊
16	* FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the␊
17	* License for the specific language governing rights and limitations␊
18	* under the License.␊
19	* ␊
20	* @APPLE_LICENSE_HEADER_END@␊
21	*/␊
22	/*␊
23	* HFSCompare.c - Functions for working with and comparing HFS nams.␊
24	*␊
25	* Copyright (c) 1999-2000 Apple Computer, Inc.␊
26	*␊
27	* DRI: Josh de Cesare␊
28	*/␊
29	␊
30	#include "sl.h"␊
31	#include "hfs_CaseTables.h"␊
32	#include "hfs.h"␊
33	␊
34	#if ! UNCOMPRESSED␊
35	␊
36	static unsigned short *␊
37	UncompressStructure(struct compressed_block *bp, int count, int size)␊
38	{␊
39	unsigned short *out = malloc(size);␊
40	␉if (!out) ␊
41	␉{␊
42	␉␉stop("UncompressStructure unable to allocate memory\n");␊
43	␉␉return 0;␊
44	␉}␊
45	␉bzero(out,size);␊
46	unsigned short *op = out;␊
47	unsigned short data;␊
48	int i, j;␊
49	␉␊
50	for (i=0; i<count; i++, bp++) {␊
51	// If this happens (it shouldn't) please fix size and/or double check that count really is␊
52	// the number of elements in the array.␊
53	// This was a very hard bug to find, so please leave this code here.␊
54	if(out + size <= op + bp->count)␊
55	{␊
56	stop("HFS+ Unicode tables are malformed\n");␊
57	return 0;␊
58	}␊
59	data = bp->data;␊
60	for (j=0; j<bp->count; j++) {␊
61	*op++ = data;␊
62	if (bp->type == kTypeAscending) data++;␊
63	else if (bp->type == kTypeAscending256) data += 256;␊
64	}␊
65	}␊
66	return out;␊
67	}␊
68	␊
69	static void␊
70	InitCompareTables(void)␊
71	{␊
72	if (gCompareTable == 0) {␊
73	gCompareTable = UncompressStructure(gCompareTableCompressed,␊
74	kCompareTableNBlocks, kCompareTableDataSize);␊
75	gLowerCaseTable = UncompressStructure(gLowerCaseTableCompressed,␊
76	␉␉␉␉␉␉␉␉␉␉␉ kLowerCaseTableNBlocks, kLowerCaseTableDataSize);␊
77	}␊
78	}␊
79	␊
80	#endif /* ! UNCOMPRESSED */␊
81	␊
82	//_______________________________________________________________________␊
83	//␊
84	//␉Routine:␉FastRelString␊
85	//␊
86	//␉Output:␉␉returns -1 if str1 < str2␊
87	//␉␉␉␉returns 1 if str1 > str2␊
88	//␉␉␉␉return␉ 0 if equal␊
89	//␊
90	//_______________________________________________________________________␊
91	␊
92	int32_t␉FastRelString(u_int8_t * str1, u_int8_t * str2)␊
93	{␊
94	␉int32_t bestGuess;␊
95	␉u_int8_t length, length2;␊
96	␉␊
97	#if ! UNCOMPRESED␊
98	␉InitCompareTables();␊
99	#endif␊
100	␉␊
101	␉length = *(str1++);␊
102	␉length2 = *(str2++);␊
103	␉␊
104	␉if (length == length2)␊
105	␉␉bestGuess = 0;␊
106	␉else if (length < length2)␊
107	␉␉bestGuess = -1;␊
108	␉else␊
109	␉{␊
110	␉␉bestGuess = 1;␊
111	␉␉length = length2;␊
112	␉}␊
113	␉␊
114	␉while (length--)␊
115	␉{␊
116	␉␉u_int32_t aChar, bChar;␊
117	␉␉␊
118	␉␉aChar = *(str1++);␊
119	␉␉bChar = *(str2++);␊
120	␉␉␊
121	␉␉if (aChar != bChar)␉/* If they don't match exacly, do case conversion */␊
122	␉␉{␊
123	␉␉␉u_int16_t aSortWord, bSortWord;␊
124	␉␉␉␊
125	␉␉␉aSortWord = gCompareTable[aChar];␊
126	␉␉␉bSortWord = gCompareTable[bChar];␊
127	␉␉␉␊
128	␉␉␉if (aSortWord > bSortWord)␊
129	␉␉␉␉return 1;␊
130	␉␉␉␊
131	␉␉␉if (aSortWord < bSortWord)␊
132	␉␉␉␉return -1;␊
133	␉␉}␊
134	␉␉␊
135	␉␉/*␊
136	␉␉ * If characters match exactly, then go on to next character␊
137	␉␉ * immediately without doing any extra work.␊
138	␉␉ */␊
139	␉}␊
140	␉␊
141	␉/* if you got to here, then return bestGuess */␊
142	␉return bestGuess;␊
143	}␉␊
144	␊
145	␊
146	//␊
147	//␉FastUnicodeCompare - Compare two Unicode strings; produce a relative ordering␊
148	//␊
149	//␉ IF␉␉␉␉RESULT␊
150	//␉--------------------------␊
151	//␉str1 < str2␉␉=>␉-1␊
152	//␉str1 = str2␉␉=>␉ 0␊
153	//␉str1 > str2␉␉=>␉+1␊
154	//␊
155	//␉The lower case table starts with 256 entries (one for each of the upper bytes␊
156	//␉of the original Unicode char). If that entry is zero, then all characters with␊
157	//␉that upper byte are already case folded. If the entry is non-zero, then it is␊
158	//␉the _index_ (not byte offset) of the start of the sub-table for the characters␊
159	//␉with that upper byte. All ignorable characters are folded to the value zero.␊
160	//␊
161	//␉In pseudocode:␊
162	//␊
163	//␉␉Let c = source Unicode character␊
164	//␉␉Let table[] = lower case table␊
165	//␊
166	//␉␉lower = table[highbyte(c)]␊
167	//␉␉if (lower == 0)␊
168	//␉␉␉lower = c␊
169	//␉␉else␊
170	//␉␉␉lower = table[lower+lowbyte(c)]␊
171	//␊
172	//␉␉if (lower == 0)␊
173	//␉␉␉ignore this character␊
174	//␊
175	//␉To handle ignorable characters, we now need a loop to find the next valid character.␊
176	//␉Also, we can't pre-compute the number of characters to compare; the string length might␊
177	//␉be larger than the number of non-ignorable characters. Further, we must be able to handle␊
178	//␉ignorable characters at any point in the string, including as the first or last characters.␊
179	//␉We use a zero value as a sentinel to detect both end-of-string and ignorable characters.␊
180	//␉Since the File Manager doesn't prevent the NUL character (value zero) as part of a filename,␊
181	//␉the case mapping table is assumed to map u+0000 to some non-zero value (like 0xFFFF, which is␊
182	//␉an invalid Unicode character).␊
183	//␊
184	//␉Pseudocode:␊
185	//␊
186	//␉␉while (1) {␊
187	//␉␉␉c1 = GetNextValidChar(str1)␉␉␉//␉returns zero if at end of string␊
188	//␉␉␉c2 = GetNextValidChar(str2)␊
189	//␊
190	//␉␉␉if (c1 != c2) break␉␉␉␉␉//␉found a difference␊
191	//␊
192	//␉␉␉if (c1 == 0)␉␉␉␉␉␉//␉reached end of string on both strings at once?␊
193	//␉␉␉␉return 0;␉␉␉␉␉␉//␉yes, so strings are equal␊
194	//␉␉}␊
195	//␊
196	//␉␉// When we get here, c1 != c2. So, we just need to determine which one is less.␊
197	//␉␉if (c1 < c2)␊
198	//␉␉␉return -1;␊
199	//␉␉else␊
200	//␉␉␉return 1;␊
201	//␊
202	␊
203	int32_t FastUnicodeCompare( u_int16_t * str1, register u_int32_t length1,␊
204	␉␉␉␉␉␉ u_int16_t * str2, register u_int32_t length2, int byte_order )␊
205	{␊
206	␉register u_int16_t c1,c2;␊
207	␉register u_int16_t temp;␊
208	␉␊
209	#if ! UNCOMPRESSED␊
210	␉InitCompareTables();␊
211	#endif␊
212	␉␊
213	␉while (1) {␊
214	␉␉/* Set default values for c1, c2 in case there are no more valid chars */␊
215	␉␉c1 = 0;␊
216	␉␉c2 = 0;␊
217	␉␉␊
218	␉␉/* Find next non-ignorable char from str1, or zero if no more */␊
219	␉␉while (length1 && c1 == 0) {␊
220	␉␉␉if (byte_order == OSBigEndian)␊
221	␉␉␉␉c1 = SWAP_BE16(*(str1++));␊
222	␉␉␉else␊
223	␉␉␉␉c1 = SWAP_LE16(*(str1++));␊
224	␉␉␉--length1;␊
225	␉␉␉if ((temp = gLowerCaseTable[c1>>8]) != 0)␉␉// is there a subtable for this upper byte?␊
226	␉␉␉␉c1 = gLowerCaseTable[temp + (c1 & 0x00FF)];␉// yes, so fold the char␊
227	␉␉}␊
228	␉␉␊
229	␉␉/* Find next non-ignorable char from str2, or zero if no more */␊
230	␉␉while (length2 && c2 == 0) {␊
231	␉␉␉if (byte_order == OSBigEndian)␊
232	␉␉␉␉c2 = SWAP_BE16(*(str2++));␊
233	␉␉␉else␊
234	␉␉␉␉c2 = SWAP_LE16(*(str2++));␊
235	␉␉␉--length2;␊
236	␉␉␉if ((temp = gLowerCaseTable[c2>>8]) != 0)␉␉// is there a subtable for this upper byte?␊
237	␉␉␉␉c2 = gLowerCaseTable[temp + (c2 & 0x00FF)];␉// yes, so fold the char␊
238	␉␉}␊
239	␉␉␊
240	␉␉if (c1 != c2)␉/* found a difference, so stop looping */␊
241	␉␉␉break;␊
242	␉␉␊
243	␉␉if (c1 == 0)␉␉/* did we reach the end of both strings at the same time? */␊
244	␉␉␉return 0;␉/* yes, so strings are equal */␊
245	␉}␊
246	␉␊
247	␉if (c1 < c2)␊
248	␉␉return -1;␊
249	␉else␊
250	␉␉return 1;␊
251	}␊
252	␊
253	␊
254	//␊
255	// BinaryUnicodeCompare - Compare two Unicode strings; produce a relative ordering␊
256	// Compared using a 16-bit binary comparison (no case folding)␊
257	//␊
258	int32_t BinaryUnicodeCompare (u_int16_t * str1, u_int32_t length1,␊
259	u_int16_t * str2, u_int32_t length2)␊
260	{␊
261	␉register u_int16_t c1, c2;␊
262	␉int32_t bestGuess;␊
263	␉u_int32_t length;␊
264	␉␊
265	␉bestGuess = 0;␊
266	␉␊
267	␉if (length1 < length2) {␊
268	␉␉length = length1;␊
269	␉␉--bestGuess;␊
270	␉} else if (length1 > length2) {␊
271	␉␉length = length2;␊
272	␉␉++bestGuess;␊
273	␉} else {␊
274	␉␉length = length1;␊
275	␉}␊
276	␉␊
277	␉while (length--) {␊
278	␉␉c1 = *(str1++);␊
279	␉␉c2 = *(str2++);␊
280	␉␉␊
281	␉␉if (c1 > c2)␊
282	␉␉␉return (1);␊
283	␉␉if (c1 < c2)␊
284	␉␉␉return (-1);␊
285	␉}␊
286	␉␊
287	␉return (bestGuess);␊
288	}␊
289	␊
290	␊
291	/*␊
292	* UTF-8 (UCS Transformation Format)␊
293	*␊
294	* The following subset of UTF-8 is used to encode UCS-2 filenames. It␊
295	* requires a maximum of three 3 bytes per UCS-2 character. Only the␊
296	* shortest encoding required to represent the significant UCS-2 bits␊
297	* is legal.␊
298	* ␊
299	* UTF-8 Multibyte Codes␊
300	*␊
301	* Bytes Bits UCS-2 Min UCS-2 Max UTF-8 Byte Sequence (binary)␊
302	* -------------------------------------------------------------------␊
303	* 1 7 0x0000 0x007F 0xxxxxxx␊
304	* 2 11 0x0080 0x07FF 110xxxxx 10xxxxxx␊
305	* 3 16 0x0800 0xFFFF 1110xxxx 10xxxxxx 10xxxxxx␊
306	* -------------------------------------------------------------------␊
307	*/␊
308	␊
309	␊
310	/*␊
311	* utf_encodestr - Encodes the UCS-2 (Unicode) string at ucsp into a␊
312	* null terminated UTF-8 string at utf8p.␊
313	*␊
314	* ucslen is the number of UCS-2 input characters (not bytes)␊
315	* bufsize is the size of the output buffer in bytes␊
316	*/␊
317	void␊
318	utf_encodestr( const u_int16_t * ucsp, int ucslen,␊
319	␉␉␉ u_int8_t * utf8p, u_int32_t bufsize, int byte_order )␊
320	{␊
321	␉u_int8_t *bufend;␊
322	␉u_int16_t ucs_ch;␊
323	␉␊
324	␉bufend = utf8p + bufsize;␊
325	␉␊
326	␉while (ucslen-- > 0) {␊
327	␉␉if (byte_order == OSBigEndian)␊
328	␉␉ ucs_ch = SWAP_BE16(*ucsp++);␊
329	␉␉else␊
330	␉␉␉ucs_ch = SWAP_LE16(*ucsp++);␊
331	␉␉␊
332	␉␉if (ucs_ch < 0x0080) {␊
333	␉␉␉if (utf8p >= bufend)␊
334	␉␉␉␉break;␊
335	␉␉␉if (ucs_ch == '\0')␊
336	␉␉␉␉continue;␉/* skip over embedded NULLs */␊
337	␉␉␉*utf8p++ = ucs_ch;␊
338	␉␉␉␊
339	␉␉} else if (ucs_ch < 0x800) {␊
340	␉␉␉if ((utf8p + 1) >= bufend)␊
341	␉␉␉␉break;␊
342	␉␉␉*utf8p++ = (ucs_ch >> 6) \| 0xc0;␊
343	␉␉␉*utf8p++ = (ucs_ch & 0x3f) \| 0x80;␊
344	␉␉␉␊
345	␉␉} else {␊
346	␉␉␉if ((utf8p + 2) >= bufend)␊
347	␉␉␉␉break;␊
348	␉␉␉*utf8p++ = (ucs_ch >> 12) \| 0xe0;␊
349	␉␉␉*utf8p++ = ((ucs_ch >> 6) & 0x3f) \| 0x80;␊
350	␉␉␉*utf8p++ = ((ucs_ch) & 0x3f) \| 0x80;␊
351	␉␉}␊
352	␉}␊
353	␉␊
354	␉*utf8p = '\0';␊
355	}␊
356	␊
357	␊
358	/*␊
359	* utf_decodestr - Decodes the null terminated UTF-8 string at␊
360	* utf8p into a UCS-2 (Unicode) string at ucsp.␊
361	*␊
362	* ucslen is the number of UCS-2 output characters (not bytes)␊
363	* bufsize is the size of the output buffer in bytes␊
364	*/␊
365	void utf_decodestr(const u_int8_t * utf8p, u_int16_t * ucsp, u_int16_t * ucslen, u_int32_t bufsize, int byte_order)␊
366	{␊
367	␉u_int16_t *bufstart;␊
368	␉u_int16_t *bufend;␊
369	␉u_int16_t ucs_ch;␊
370	␉u_int8_t byte;␊
371	␉␊
372	␉bufstart = ucsp;␊
373	␉bufend = (u_int16_t )((u_int8_t )ucsp + bufsize);␊
374	␉␊
375	␉while ((byte = *utf8p++) != '\0') {␊
376	␉␉if (ucsp >= bufend)␊
377	␉␉␉break;␊
378	␉␉␊
379	␉␉/* check for ascii */␊
380	␉␉if (byte < 0x80) {␊
381	␉␉␉ucs_ch = byte;␊
382	␉␉␉␊
383	␉␉␉if (byte_order == OSBigEndian)␊
384	␉␉␉␉*ucsp++ = SWAP_BE16(ucs_ch);␊
385	␉␉␉else␊
386	␉␉␉␉*ucsp++ = SWAP_LE16(ucs_ch);␊
387	␉␉␉␊
388	␉␉␉continue;␊
389	␉␉}␊
390	␉␉␊
391	␉␉switch (byte & 0xf0) {␊
392	␉␉␉␉/* 2 byte sequence*/␊
393	␉␉␉case 0xc0:␊
394	␉␉␉case 0xd0:␊
395	␉␉␉␉/* extract bits 6 - 10 from first byte */␊
396	␉␉␉␉ucs_ch = (byte & 0x1F) << 6; ␊
397	␉␉␉␉break;␊
398	␉␉␉␉/* 3 byte sequence*/␊
399	␉␉␉case 0xe0:␊
400	␉␉␉␉/* extract bits 12 - 15 from first byte */␊
401	␉␉␉␉ucs_ch = (byte & 0x0F) << 6;␊
402	␉␉␉␉␊
403	␉␉␉␉/* extract bits 6 - 11 from second byte */␊
404	␉␉␉␉if (((byte = *utf8p++) & 0xc0) != 0x80)␊
405	␉␉␉␉␉goto stop;␊
406	␉␉␉␉␊
407	␉␉␉␉ucs_ch += (byte & 0x3F);␊
408	␉␉␉␉ucs_ch <<= 6;␊
409	␉␉␉␉break;␊
410	␉␉␉default:␊
411	␉␉␉␉goto stop;␊
412	␉␉}␊
413	␉␉␊
414	␉␉/* extract bits 0 - 5 from final byte */␊
415	␉␉if (((byte = *utf8p++) & 0xc0) != 0x80)␊
416	␉␉␉goto stop;␊
417	␉␉ucs_ch += (byte & 0x3F); ␊
418	␉␉␊
419	␉␉if (byte_order == OSBigEndian)␊
420	␉␉␉*ucsp++ = SWAP_BE16(ucs_ch);␊
421	␉␉else␊
422	␉␉␉*ucsp++ = SWAP_LE16(ucs_ch);␊
423	␉}␊
424	stop:␊
425	␉if (byte_order == OSBigEndian)␊
426	␉␉*ucslen = SWAP_BE16(ucsp - bufstart);␊
427	␉else␊
428	␉␉*ucslen = SWAP_LE16(ucsp - bufstart);␊
429	}␊
430

Download this file