/*
 * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved.
 *
 * @APPLE_LICENSE_HEADER_START@
 * 
 * The contents of this file constitute Original Code as defined in and
 * are subject to the Apple Public Source License Version 2.0 (the
 * "License").  You may not use this file except in compliance with the
 * License.  Please obtain a copy of the License at
 * http://www.apple.com/publicsource and read it before using this file.
 * 
 * This Original Code and all software distributed under the License are
 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
 * License for the specific language governing rights and limitations
 * under the License.
 * 
 * @APPLE_LICENSE_HEADER_END@
 *
 * HFSCompare.c - Functions for working with and comparing HFS nams.
 *
 * Copyright (c) 1999-2000 Apple Computer, Inc.
 *
 * DRI: Josh de Cesare
 */

#include <sl.h>
#include "hfs_CaseTables.h"

#if ! UNCOMPRESSED

static unsigned short *UncompressStructure(struct compressed_block *bp, int count, int size)
{
	int i, j;

	unsigned short *out = malloc(size);

	if (out)
	{
		unsigned short *op = out;
		unsigned short data;

		for (i = 0; i < count; i++, bp++)
		{
			// If this happens (it shouldn't) please fix size and/or double check that count really is
			// the number of elements in the array.
			 // This was a very hard bug to find, so please leave this code here.
			if(out + size <= op + bp->count)
			{
				stop("HFS+ Unicode tables are malformed\n");
			}

			data = bp->data;

			for (j = 0; j < bp->count; j++)
			{
				*op++ = data;

				if (bp->type == kTypeAscending)
				{
					data++;
				}
				else if (bp->type == kTypeAscending256)
				{
					data += 256;
				}
			}
		}

		return out;
	}

	return NULL;
}

static void InitCompareTables(void)
{
	if (gCompareTable == 0)
	{
		gCompareTable = UncompressStructure(gCompareTableCompressed,
                                            kCompareTableNBlocks, kCompareTableDataSize);
		gLowerCaseTable = UncompressStructure(gLowerCaseTableCompressed,
                                            kLowerCaseTableNBlocks, kLowerCaseTableDataSize);
	}
}

#endif /* ! UNCOMPRESSED */

//
//
//	Routine:	FastRelString
//
//	Output:		returns -1 if str1 < str2
//				returns  1 if str1 > str2
//				return	 0 if equal
//
//

int32_t	FastRelString(u_int8_t * str1, u_int8_t * str2)
{
	int32_t  bestGuess;
	u_int8_t length, length2;

#if ! UNCOMPRESED
        InitCompareTables();
#endif

	length = *(str1++);
	length2 = *(str2++);

	if (length == length2)
	{
		bestGuess = 0;
	}
	else if (length < length2)
	{
		bestGuess = -1;
	}
	else
	{
		bestGuess = 1;
		length = length2;
	}

	while (length--)
	{
		u_int32_t aChar, bChar;

		aChar = *(str1++);
		bChar = *(str2++);
		
		if (aChar != bChar)	/* If they don't match exacly, do case conversion */
		{
			u_int16_t aSortWord, bSortWord;

			aSortWord = gCompareTable[aChar];
			bSortWord = gCompareTable[bChar];

			if (aSortWord > bSortWord)
			{
				return 1;
			}

			if (aSortWord < bSortWord)
			{
				return -1;
			}
		}

		/*
		 * If characters match exactly, then go on to next character
		 * immediately without doing any extra work.
		 */
	}
	
	/* If you got to here, then return bestGuess */
	return bestGuess;
}	


//
//	FastUnicodeCompare - Compare two Unicode strings; produce a relative ordering
//
//	    IF				RESULT
//	--------------------------
//	str1 < str2		=>	-1
//	str1 = str2		=>	 0
//	str1 > str2		=>	+1
//
//	The lower case table starts with 256 entries (one for each of the upper bytes
//	of the original Unicode char).  If that entry is zero, then all characters with
//	that upper byte are already case folded.  If the entry is non-zero, then it is
//	the _index_ (not byte offset) of the start of the sub-table for the characters
//	with that upper byte.  All ignorable characters are folded to the value zero.
//
//	In pseudocode:
//
//		Let c = source Unicode character
//		Let table[] = lower case table
//
//		lower = table[highbyte(c)]
//		if (lower == 0)
//			lower = c
//		else
//			lower = table[lower+lowbyte(c)]
//
//		if (lower == 0)
//			ignore this character
//
//	To handle ignorable characters, we now need a loop to find the next valid character.
//	Also, we can't pre-compute the number of characters to compare; the string length might
//	be larger than the number of non-ignorable characters.  Further, we must be able to handle
//	ignorable characters at any point in the string, including as the first or last characters.
//	We use a zero value as a sentinel to detect both end-of-string and ignorable characters.
//	Since the File Manager doesn't prevent the NUL character (value zero) as part of a filename,
//	the case mapping table is assumed to map u+0000 to some non-zero value (like 0xFFFF, which is
//	an invalid Unicode character).
//
//	Pseudocode:
//
//		while (1) {
//			c1 = GetNextValidChar(str1)			//	returns zero if at end of string
//			c2 = GetNextValidChar(str2)
//
//			if (c1 != c2) break					//	found a difference
//
//			if (c1 == 0)						//	reached end of string on both strings at once?
//				return 0;						//	yes, so strings are equal
//		}
//
//		// When we get here, c1 != c2.  So, we just need to determine which one is less.
//		if (c1 < c2)
//			return -1;
//		else
//			return 1;
//

int32_t FastUnicodeCompare( u_int16_t * str1, register u_int32_t length1, u_int16_t * str2, register u_int32_t length2, int byte_order )
{
	register u_int16_t c1,c2;
	register u_int16_t temp;

#if ! UNCOMPRESSED
        InitCompareTables();
#endif

	while (1)
	{
		/* Set default values for c1, c2 in case there are no more valid chars */
		c1 = 0;
		c2 = 0;

		/* Find next non-ignorable char from str1, or zero if no more */
		while (length1 && c1 == 0)
		{
			if (byte_order == OSBigEndian)
			{
				c1 = SWAP_BE16(*(str1++));
			}
			else
			{
				c1 = SWAP_LE16(*(str1++));
			}

			--length1;
			if ((temp = gLowerCaseTable[c1>>8]) != 0)		// is there a subtable for this upper byte?
				c1 = gLowerCaseTable[temp + (c1 & 0x00FF)];	// yes, so fold the char
		}

		/* Find next non-ignorable char from str2, or zero if no more */
		while (length2 && c2 == 0)
		{
			if (byte_order == OSBigEndian)
			{
				c2 = SWAP_BE16(*(str2++));
			}
			else
			{
				c2 = SWAP_LE16(*(str2++));
			}
			--length2;
			if ((temp = gLowerCaseTable[c2>>8]) != 0)		// Is there a subtable for this upper byte?
			{
				c2 = gLowerCaseTable[temp + (c2 & 0x00FF)];	// Yes, so fold the char
			}
		}

		if (c1 != c2)	/* Found a difference, so stop looping */
		{
			break;
		}

		if (c1 == 0)		/* Did we reach the end of both strings at the same time? */
		{
			return 0;	/* Yes, so strings are equal */
		}
	}
	
	if (c1 < c2)
	{
		return -1;
	}
	else
	{
		return 1;
	}
}


//
//  BinaryUnicodeCompare - Compare two Unicode strings; produce a relative ordering
//  Compared using a 16-bit binary comparison (no case folding)
//
int32_t BinaryUnicodeCompare(u_int16_t * str1, u_int32_t length1, u_int16_t * str2, u_int32_t length2)
{
	register u_int16_t c1, c2;
	int32_t bestGuess = 0;
	u_int32_t length;

	if (length1 < length2)
	{
		length = length1;
		--bestGuess;
	}
	else if (length1 > length2)
	{
		length = length2;
		++bestGuess;
	}
	else
	{
		length = length1;
	}

	while (length--)
	{
		c1 = *(str1++);
		c2 = *(str2++);

		if (c1 > c2)
		{
			return (1);
		}

		if (c1 < c2)
		{
			return (-1);
		}
	}

	return (bestGuess);
}


/*
 * UTF-8 (UCS Transformation Format)
 *
 * The following subset of UTF-8 is used to encode UCS-2 filenames. It
 * requires a maximum of three 3 bytes per UCS-2 character.  Only the
 * shortest encoding required to represent the significant UCS-2 bits
 * is legal.
 *
 * UTF-8 Multibyte Codes
 *
 * Bytes   Bits   UCS-2 Min   UCS-2 Max   UTF-8 Byte Sequence (binary)
 * -------------------------------------------------------------------
 *   1       7     0x0000      0x007F      0xxxxxxx
 *   2      11     0x0080      0x07FF      110xxxxx 10xxxxxx
 *   3      16     0x0800      0xFFFF      1110xxxx 10xxxxxx 10xxxxxx
 * -------------------------------------------------------------------
 */


/*
 * utf_encodestr - Encodes the UCS-2 (Unicode) string at ucsp into a
 * null terminated UTF-8 string at utf8p.
 *
 * ucslen is the number of UCS-2 input characters (not bytes)
 * bufsize is the size of the output buffer in bytes
 */
void
utf_encodestr( const u_int16_t * ucsp, int ucslen, u_int8_t * utf8p, u_int32_t bufsize, int byte_order )
{
	u_int8_t *bufend;
	u_int16_t ucs_ch;

	bufend = utf8p + bufsize;

	while (ucslen-- > 0)
	{
                if (byte_order == OSBigEndian)
		{
		    ucs_ch = SWAP_BE16(*ucsp++);
                }
		else
		{
                    ucs_ch = SWAP_LE16(*ucsp++);
		}

		if (ucs_ch < 0x0080)
		{
			if (utf8p >= bufend)
			{
				break;
			}

			if (ucs_ch == '\0')
			{
				continue;	/* Skip over embedded NULLs */
			}

			*utf8p++ = ucs_ch;

		}
		else if (ucs_ch < 0x800)
		{
			if ((utf8p + 1) >= bufend)
			{
				break;
			}

			*utf8p++ = ((ucs_ch >> 6) | 0xc0);
			*utf8p++ = ((ucs_ch & 0x3f) | 0x80);
		}
		else
		{
			if ((utf8p + 2) >= bufend)
			{
				break;
			}

			*utf8p++ = ((ucs_ch >> 12) | 0xe0);
			*utf8p++ = (((ucs_ch >> 6) & 0x3f) | 0x80);
			*utf8p++ = ((ucs_ch & 0x3f) | 0x80);
		}
	}

	*utf8p = '\0';
}


/*
 * utf_decodestr - Decodes the null terminated UTF-8 string at
 * utf8p into a UCS-2 (Unicode) string at ucsp.
 *
 * ucslen is the number of UCS-2 output characters (not bytes)
 * bufsize is the size of the output buffer in bytes
 */
void utf_decodestr(const u_int8_t * utf8p, u_int16_t * ucsp, u_int16_t * ucslen, u_int32_t bufsize, int byte_order)
{
	u_int16_t *bufstart;
	u_int16_t *bufend;
	u_int16_t ucs_ch;
	u_int8_t  byte;

	bufstart = ucsp;
	bufend = (u_int16_t *)((u_int8_t *)ucsp + bufsize);

	while ((byte = *utf8p++) != '\0')
	{
		if (ucsp >= bufend)
		{
			break;
		}

		/* Check for ASCII */
		if (byte < 0x80)
		{
			ucs_ch = byte;

                        if (byte_order == OSBigEndian)
			{
				*ucsp++ = SWAP_BE16(ucs_ch);
                        }
			else
			{
				*ucsp++ = SWAP_LE16(ucs_ch);
			}

			continue;
		}

		switch (byte & 0xf0)
		{
			/* 2 byte sequence */
			case 0xc0:
			case 0xd0:	/* Extract bits 6 - 10 from first byte */
						ucs_ch = ((byte & 0x1F) << 6);
						break;
			/* 3 byte sequence */
			case 0xe0:
						/* Extract bits 12 - 15 from first byte */
						ucs_ch = ((byte & 0x0F) << 6);

						/* Extract bits 6 - 11 from second byte */
						if (((byte = *utf8p++) & 0xc0) != 0x80)
						{
							goto stop;
						}

						ucs_ch += (byte & 0x3F);
						ucs_ch <<= 6;
						break;
			default:
						goto stop;
		}

		/* Extract bits 0 - 5 from final byte */
		if (((byte = *utf8p++) & 0xc0) != 0x80)
		{
			goto stop;
		}

		ucs_ch += (byte & 0x3F);

                if (byte_order == OSBigEndian)
		{
			*ucsp++ = SWAP_BE16(ucs_ch);
                }
		else
		{
			*ucsp++ = SWAP_LE16(ucs_ch);
		}
	}
stop:
	if (byte_order == OSBigEndian)
	{
		*ucslen = SWAP_BE16(ucsp - bufstart);
	}
	else
	{
		*ucslen = SWAP_LE16(ucsp - bufstart);
	}
}