tags/2.3/i386/libsaio/hfs_compare.c - Chameleon Svn Source Tree - Chameleon open source boot loader project.

Root/tags/2.3/i386/libsaio/hfs_compare.c

Source at commit 2862 created 7 years 26 days ago. By ifabio, Tag 2.3 release, bump svn to 2.4
1	/*␊
2	* Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved.␊
3	*␊
4	* @APPLE_LICENSE_HEADER_START@␊
5	* ␊
6	* The contents of this file constitute Original Code as defined in and␊
7	* are subject to the Apple Public Source License Version 2.0 (the␊
8	* "License"). You may not use this file except in compliance with the␊
9	* License. Please obtain a copy of the License at␊
10	* http://www.apple.com/publicsource and read it before using this file.␊
11	* ␊
12	* This Original Code and all software distributed under the License are␊
13	* distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER␊
14	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,␊
15	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,␊
16	* FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the␊
17	* License for the specific language governing rights and limitations␊
18	* under the License.␊
19	* ␊
20	* @APPLE_LICENSE_HEADER_END@␊
21	*␊
22	* HFSCompare.c - Functions for working with and comparing HFS nams.␊
23	*␊
24	* Copyright (c) 1999-2000 Apple Computer, Inc.␊
25	*␊
26	* DRI: Josh de Cesare␊
27	*/␊
28	␊
29	#include <sl.h>␊
30	#include "hfs_CaseTables.h"␊
31	␊
32	#if ! UNCOMPRESSED␊
33	␊
34	static unsigned short UncompressStructure(struct compressed_block bp, int count, int size)␊
35	{␊
36	␉int i, j;␊
37	␊
38	␉unsigned short *out = malloc(size);␊
39	␊
40	␉if (out)␊
41	␉{␊
42	␉␉unsigned short *op = out;␊
43	␉␉unsigned short data;␊
44	␊
45	␉␉for (i = 0; i < count; i++, bp++)␊
46	␉␉{␊
47	␉␉␉// If this happens (it shouldn't) please fix size and/or double check that count really is␊
48	␉␉␉// the number of elements in the array.␊
49	␉␉␉ // This was a very hard bug to find, so please leave this code here.␊
50	␉␉␉if(out + size <= op + bp->count)␊
51	␉␉␉{␊
52	␉␉␉␉stop("HFS+ Unicode tables are malformed\n");␊
53	␉␉␉}␊
54	␊
55	␉␉␉data = bp->data;␊
56	␊
57	␉␉␉for (j = 0; j < bp->count; j++)␊
58	␉␉␉{␊
59	␉␉␉␉*op++ = data;␊
60	␊
61	␉␉␉␉if (bp->type == kTypeAscending)␊
62	␉␉␉␉{␊
63	␉␉␉␉␉data++;␊
64	␉␉␉␉}␊
65	␉␉␉␉else if (bp->type == kTypeAscending256)␊
66	␉␉␉␉{␊
67	␉␉␉␉␉data += 256;␊
68	␉␉␉␉}␊
69	␉␉␉}␊
70	␉␉}␊
71	␊
72	␉␉return out;␊
73	␉}␊
74	␊
75	␉return NULL;␊
76	}␊
77	␊
78	static void InitCompareTables(void)␊
79	{␊
80	␉if (gCompareTable == 0)␊
81	␉{␊
82	␉␉gCompareTable = UncompressStructure(gCompareTableCompressed,␊
83	kCompareTableNBlocks, kCompareTableDataSize);␊
84	␉␉gLowerCaseTable = UncompressStructure(gLowerCaseTableCompressed,␊
85	kLowerCaseTableNBlocks, kLowerCaseTableDataSize);␊
86	␉}␊
87	}␊
88	␊
89	#endif /* ! UNCOMPRESSED */␊
90	␊
91	//␊
92	//␊
93	//␉Routine:␉FastRelString␊
94	//␊
95	//␉Output:␉␉returns -1 if str1 < str2␊
96	//␉␉␉␉returns 1 if str1 > str2␊
97	//␉␉␉␉return␉ 0 if equal␊
98	//␊
99	//␊
100	␊
101	int32_t␉FastRelString(u_int8_t * str1, u_int8_t * str2)␊
102	{␊
103	␉int32_t bestGuess;␊
104	␉u_int8_t length, length2;␊
105	␊
106	#if ! UNCOMPRESED␊
107	InitCompareTables();␊
108	#endif␊
109	␊
110	␉length = *(str1++);␊
111	␉length2 = *(str2++);␊
112	␊
113	␉if (length == length2)␊
114	␉{␊
115	␉␉bestGuess = 0;␊
116	␉}␊
117	␉else if (length < length2)␊
118	␉{␊
119	␉␉bestGuess = -1;␊
120	␉}␊
121	␉else␊
122	␉{␊
123	␉␉bestGuess = 1;␊
124	␉␉length = length2;␊
125	␉}␊
126	␊
127	␉while (length--)␊
128	␉{␊
129	␉␉u_int32_t aChar, bChar;␊
130	␊
131	␉␉aChar = *(str1++);␊
132	␉␉bChar = *(str2++);␊
133	␉␉␊
134	␉␉if (aChar != bChar)␉/* If they don't match exacly, do case conversion */␊
135	␉␉{␊
136	␉␉␉u_int16_t aSortWord, bSortWord;␊
137	␊
138	␉␉␉aSortWord = gCompareTable[aChar];␊
139	␉␉␉bSortWord = gCompareTable[bChar];␊
140	␊
141	␉␉␉if (aSortWord > bSortWord)␊
142	␉␉␉{␊
143	␉␉␉␉return 1;␊
144	␉␉␉}␊
145	␊
146	␉␉␉if (aSortWord < bSortWord)␊
147	␉␉␉{␊
148	␉␉␉␉return -1;␊
149	␉␉␉}␊
150	␉␉}␊
151	␊
152	␉␉/*␊
153	␉␉ * If characters match exactly, then go on to next character␊
154	␉␉ * immediately without doing any extra work.␊
155	␉␉ */␊
156	␉}␊
157	␉␊
158	␉/* If you got to here, then return bestGuess */␊
159	␉return bestGuess;␊
160	}␉␊
161	␊
162	␊
163	//␊
164	//␉FastUnicodeCompare - Compare two Unicode strings; produce a relative ordering␊
165	//␊
166	//␉ IF␉␉␉␉RESULT␊
167	//␉--------------------------␊
168	//␉str1 < str2␉␉=>␉-1␊
169	//␉str1 = str2␉␉=>␉ 0␊
170	//␉str1 > str2␉␉=>␉+1␊
171	//␊
172	//␉The lower case table starts with 256 entries (one for each of the upper bytes␊
173	//␉of the original Unicode char). If that entry is zero, then all characters with␊
174	//␉that upper byte are already case folded. If the entry is non-zero, then it is␊
175	//␉the _index_ (not byte offset) of the start of the sub-table for the characters␊
176	//␉with that upper byte. All ignorable characters are folded to the value zero.␊
177	//␊
178	//␉In pseudocode:␊
179	//␊
180	//␉␉Let c = source Unicode character␊
181	//␉␉Let table[] = lower case table␊
182	//␊
183	//␉␉lower = table[highbyte(c)]␊
184	//␉␉if (lower == 0)␊
185	//␉␉␉lower = c␊
186	//␉␉else␊
187	//␉␉␉lower = table[lower+lowbyte(c)]␊
188	//␊
189	//␉␉if (lower == 0)␊
190	//␉␉␉ignore this character␊
191	//␊
192	//␉To handle ignorable characters, we now need a loop to find the next valid character.␊
193	//␉Also, we can't pre-compute the number of characters to compare; the string length might␊
194	//␉be larger than the number of non-ignorable characters. Further, we must be able to handle␊
195	//␉ignorable characters at any point in the string, including as the first or last characters.␊
196	//␉We use a zero value as a sentinel to detect both end-of-string and ignorable characters.␊
197	//␉Since the File Manager doesn't prevent the NUL character (value zero) as part of a filename,␊
198	//␉the case mapping table is assumed to map u+0000 to some non-zero value (like 0xFFFF, which is␊
199	//␉an invalid Unicode character).␊
200	//␊
201	//␉Pseudocode:␊
202	//␊
203	//␉␉while (1) {␊
204	//␉␉␉c1 = GetNextValidChar(str1)␉␉␉//␉returns zero if at end of string␊
205	//␉␉␉c2 = GetNextValidChar(str2)␊
206	//␊
207	//␉␉␉if (c1 != c2) break␉␉␉␉␉//␉found a difference␊
208	//␊
209	//␉␉␉if (c1 == 0)␉␉␉␉␉␉//␉reached end of string on both strings at once?␊
210	//␉␉␉␉return 0;␉␉␉␉␉␉//␉yes, so strings are equal␊
211	//␉␉}␊
212	//␊
213	//␉␉// When we get here, c1 != c2. So, we just need to determine which one is less.␊
214	//␉␉if (c1 < c2)␊
215	//␉␉␉return -1;␊
216	//␉␉else␊
217	//␉␉␉return 1;␊
218	//␊
219	␊
220	int32_t FastUnicodeCompare( u_int16_t * str1, register u_int32_t length1, u_int16_t * str2, register u_int32_t length2, int byte_order )␊
221	{␊
222	␉register u_int16_t c1,c2;␊
223	␉register u_int16_t temp;␊
224	␊
225	#if ! UNCOMPRESSED␊
226	InitCompareTables();␊
227	#endif␊
228	␊
229	␉while (1)␊
230	␉{␊
231	␉␉/* Set default values for c1, c2 in case there are no more valid chars */␊
232	␉␉c1 = 0;␊
233	␉␉c2 = 0;␊
234	␊
235	␉␉/* Find next non-ignorable char from str1, or zero if no more */␊
236	␉␉while (length1 && c1 == 0)␊
237	␉␉{␊
238	␉␉␉if (byte_order == OSBigEndian)␊
239	␉␉␉{␊
240	␉␉␉␉c1 = SWAP_BE16(*(str1++));␊
241	␉␉␉}␊
242	␉␉␉else␊
243	␉␉␉{␊
244	␉␉␉␉c1 = SWAP_LE16(*(str1++));␊
245	␉␉␉}␊
246	␊
247	␉␉␉--length1;␊
248	␉␉␉if ((temp = gLowerCaseTable[c1>>8]) != 0)␉␉// is there a subtable for this upper byte?␊
249	␉␉␉␉c1 = gLowerCaseTable[temp + (c1 & 0x00FF)];␉// yes, so fold the char␊
250	␉␉}␊
251	␊
252	␉␉/* Find next non-ignorable char from str2, or zero if no more */␊
253	␉␉while (length2 && c2 == 0)␊
254	␉␉{␊
255	␉␉␉if (byte_order == OSBigEndian)␊
256	␉␉␉{␊
257	␉␉␉␉c2 = SWAP_BE16(*(str2++));␊
258	␉␉␉}␊
259	␉␉␉else␊
260	␉␉␉{␊
261	␉␉␉␉c2 = SWAP_LE16(*(str2++));␊
262	␉␉␉}␊
263	␉␉␉--length2;␊
264	␉␉␉if ((temp = gLowerCaseTable[c2>>8]) != 0)␉␉// Is there a subtable for this upper byte?␊
265	␉␉␉{␊
266	␉␉␉␉c2 = gLowerCaseTable[temp + (c2 & 0x00FF)];␉// Yes, so fold the char␊
267	␉␉␉}␊
268	␉␉}␊
269	␊
270	␉␉if (c1 != c2)␉/* Found a difference, so stop looping */␊
271	␉␉{␊
272	␉␉␉break;␊
273	␉␉}␊
274	␊
275	␉␉if (c1 == 0)␉␉/* Did we reach the end of both strings at the same time? */␊
276	␉␉{␊
277	␉␉␉return 0;␉/* Yes, so strings are equal */␊
278	␉␉}␊
279	␉}␊
280	␉␊
281	␉if (c1 < c2)␊
282	␉{␊
283	␉␉return -1;␊
284	␉}␊
285	␉else␊
286	␉{␊
287	␉␉return 1;␊
288	␉}␊
289	}␊
290	␊
291	␊
292	//␊
293	// BinaryUnicodeCompare - Compare two Unicode strings; produce a relative ordering␊
294	// Compared using a 16-bit binary comparison (no case folding)␊
295	//␊
296	int32_t BinaryUnicodeCompare(u_int16_t * str1, u_int32_t length1, u_int16_t * str2, u_int32_t length2)␊
297	{␊
298	␉register u_int16_t c1, c2;␊
299	␉int32_t bestGuess = 0;␊
300	␉u_int32_t length;␊
301	␊
302	␉if (length1 < length2)␊
303	␉{␊
304	␉␉length = length1;␊
305	␉␉--bestGuess;␊
306	␉}␊
307	␉else if (length1 > length2)␊
308	␉{␊
309	␉␉length = length2;␊
310	␉␉++bestGuess;␊
311	␉}␊
312	␉else␊
313	␉{␊
314	␉␉length = length1;␊
315	␉}␊
316	␊
317	␉while (length--)␊
318	␉{␊
319	␉␉c1 = *(str1++);␊
320	␉␉c2 = *(str2++);␊
321	␊
322	␉␉if (c1 > c2)␊
323	␉␉{␊
324	␉␉␉return (1);␊
325	␉␉}␊
326	␊
327	␉␉if (c1 < c2)␊
328	␉␉{␊
329	␉␉␉return (-1);␊
330	␉␉}␊
331	␉}␊
332	␊
333	␉return (bestGuess);␊
334	}␊
335	␊
336	␊
337	/*␊
338	* UTF-8 (UCS Transformation Format)␊
339	*␊
340	* The following subset of UTF-8 is used to encode UCS-2 filenames. It␊
341	* requires a maximum of three 3 bytes per UCS-2 character. Only the␊
342	* shortest encoding required to represent the significant UCS-2 bits␊
343	* is legal.␊
344	*␊
345	* UTF-8 Multibyte Codes␊
346	*␊
347	* Bytes Bits UCS-2 Min UCS-2 Max UTF-8 Byte Sequence (binary)␊
348	* -------------------------------------------------------------------␊
349	* 1 7 0x0000 0x007F 0xxxxxxx␊
350	* 2 11 0x0080 0x07FF 110xxxxx 10xxxxxx␊
351	* 3 16 0x0800 0xFFFF 1110xxxx 10xxxxxx 10xxxxxx␊
352	* -------------------------------------------------------------------␊
353	*/␊
354	␊
355	␊
356	/*␊
357	* utf_encodestr - Encodes the UCS-2 (Unicode) string at ucsp into a␊
358	* null terminated UTF-8 string at utf8p.␊
359	*␊
360	* ucslen is the number of UCS-2 input characters (not bytes)␊
361	* bufsize is the size of the output buffer in bytes␊
362	*/␊
363	void␊
364	utf_encodestr( const u_int16_t * ucsp, int ucslen, u_int8_t * utf8p, u_int32_t bufsize, int byte_order )␊
365	{␊
366	␉u_int8_t *bufend;␊
367	␉u_int16_t ucs_ch;␊
368	␊
369	␉bufend = utf8p + bufsize;␊
370	␊
371	␉while (ucslen-- > 0)␊
372	␉{␊
373	if (byte_order == OSBigEndian)␊
374	␉␉{␊
375	␉␉ ucs_ch = SWAP_BE16(*ucsp++);␊
376	}␊
377	␉␉else␊
378	␉␉{␊
379	ucs_ch = SWAP_LE16(*ucsp++);␊
380	␉␉}␊
381	␊
382	␉␉if (ucs_ch < 0x0080)␊
383	␉␉{␊
384	␉␉␉if (utf8p >= bufend)␊
385	␉␉␉{␊
386	␉␉␉␉break;␊
387	␉␉␉}␊
388	␊
389	␉␉␉if (ucs_ch == '\0')␊
390	␉␉␉{␊
391	␉␉␉␉continue;␉/* Skip over embedded NULLs */␊
392	␉␉␉}␊
393	␊
394	␉␉␉*utf8p++ = ucs_ch;␊
395	␊
396	␉␉}␊
397	␉␉else if (ucs_ch < 0x800)␊
398	␉␉{␊
399	␉␉␉if ((utf8p + 1) >= bufend)␊
400	␉␉␉{␊
401	␉␉␉␉break;␊
402	␉␉␉}␊
403	␊
404	␉␉␉*utf8p++ = ((ucs_ch >> 6) \| 0xc0);␊
405	␉␉␉*utf8p++ = ((ucs_ch & 0x3f) \| 0x80);␊
406	␉␉}␊
407	␉␉else␊
408	␉␉{␊
409	␉␉␉if ((utf8p + 2) >= bufend)␊
410	␉␉␉{␊
411	␉␉␉␉break;␊
412	␉␉␉}␊
413	␊
414	␉␉␉*utf8p++ = ((ucs_ch >> 12) \| 0xe0);␊
415	␉␉␉*utf8p++ = (((ucs_ch >> 6) & 0x3f) \| 0x80);␊
416	␉␉␉*utf8p++ = ((ucs_ch & 0x3f) \| 0x80);␊
417	␉␉}␊
418	␉}␊
419	␊
420	␉*utf8p = '\0';␊
421	}␊
422	␊
423	␊
424	/*␊
425	* utf_decodestr - Decodes the null terminated UTF-8 string at␊
426	* utf8p into a UCS-2 (Unicode) string at ucsp.␊
427	*␊
428	* ucslen is the number of UCS-2 output characters (not bytes)␊
429	* bufsize is the size of the output buffer in bytes␊
430	*/␊
431	void utf_decodestr(const u_int8_t * utf8p, u_int16_t * ucsp, u_int16_t * ucslen, u_int32_t bufsize, int byte_order)␊
432	{␊
433	␉u_int16_t *bufstart;␊
434	␉u_int16_t *bufend;␊
435	␉u_int16_t ucs_ch;␊
436	␉u_int8_t byte;␊
437	␊
438	␉bufstart = ucsp;␊
439	␉bufend = (u_int16_t )((u_int8_t )ucsp + bufsize);␊
440	␊
441	␉while ((byte = *utf8p++) != '\0')␊
442	␉{␊
443	␉␉if (ucsp >= bufend)␊
444	␉␉{␊
445	␉␉␉break;␊
446	␉␉}␊
447	␊
448	␉␉/* Check for ASCII */␊
449	␉␉if (byte < 0x80)␊
450	␉␉{␊
451	␉␉␉ucs_ch = byte;␊
452	␊
453	if (byte_order == OSBigEndian)␊
454	␉␉␉{␊
455	␉␉␉␉*ucsp++ = SWAP_BE16(ucs_ch);␊
456	}␊
457	␉␉␉else␊
458	␉␉␉{␊
459	␉␉␉␉*ucsp++ = SWAP_LE16(ucs_ch);␊
460	␉␉␉}␊
461	␊
462	␉␉␉continue;␊
463	␉␉}␊
464	␊
465	␉␉switch (byte & 0xf0)␊
466	␉␉{␊
467	␉␉␉/* 2 byte sequence */␊
468	␉␉␉case 0xc0:␊
469	␉␉␉case 0xd0:␉/* Extract bits 6 - 10 from first byte */␊
470	␉␉␉␉␉␉ucs_ch = ((byte & 0x1F) << 6);␊
471	␉␉␉␉␉␉break;␊
472	␉␉␉/* 3 byte sequence */␊
473	␉␉␉case 0xe0:␊
474	␉␉␉␉␉␉/* Extract bits 12 - 15 from first byte */␊
475	␉␉␉␉␉␉ucs_ch = ((byte & 0x0F) << 6);␊
476	␊
477	␉␉␉␉␉␉/* Extract bits 6 - 11 from second byte */␊
478	␉␉␉␉␉␉if (((byte = *utf8p++) & 0xc0) != 0x80)␊
479	␉␉␉␉␉␉{␊
480	␉␉␉␉␉␉␉goto stop;␊
481	␉␉␉␉␉␉}␊
482	␊
483	␉␉␉␉␉␉ucs_ch += (byte & 0x3F);␊
484	␉␉␉␉␉␉ucs_ch <<= 6;␊
485	␉␉␉␉␉␉break;␊
486	␉␉␉default:␊
487	␉␉␉␉␉␉goto stop;␊
488	␉␉}␊
489	␊
490	␉␉/* Extract bits 0 - 5 from final byte */␊
491	␉␉if (((byte = *utf8p++) & 0xc0) != 0x80)␊
492	␉␉{␊
493	␉␉␉goto stop;␊
494	␉␉}␊
495	␊
496	␉␉ucs_ch += (byte & 0x3F);␊
497	␊
498	if (byte_order == OSBigEndian)␊
499	␉␉{␊
500	␉␉␉*ucsp++ = SWAP_BE16(ucs_ch);␊
501	}␊
502	␉␉else␊
503	␉␉{␊
504	␉␉␉*ucsp++ = SWAP_LE16(ucs_ch);␊
505	␉␉}␊
506	␉}␊
507	stop:␊
508	␉if (byte_order == OSBigEndian)␊
509	␉{␊
510	␉␉*ucslen = SWAP_BE16(ucsp - bufstart);␊
511	␉}␊
512	␉else␊
513	␉{␊
514	␉␉*ucslen = SWAP_LE16(ucsp - bufstart);␊
515	␉}␊
516	}␊
517

Download this file