Chameleon

Chameleon Svn Source Tree

Root/branches/ErmaC/Enoch/i386/boot2/WKdm.h

1/* direct-mapped partial matching compressor with simple 22/10 split
2 *
3 * Compresses buffers using a dictionary based match and partial match
4 * (high bits only or full match) scheme.
5 *
6 * Paul Wilson -- wilson@cs.utexas.edu
7 * Scott F. Kaplan -- sfkaplan@cs.utexas.edu
8 * September 1997
9 */
10
11/* compressed output format, in memory order
12 * 1. a four-word HEADER containing four one-word values:
13 * i. a one-word code saying what algorithm compressed the data
14 * ii. an integer WORD offset into the page saying
15 * where the queue position area starts
16 * iii. an integer WORD offset into the page saying where
17 * the low-bits area starts
18 * iv. an integer WORD offset into the page saying where the
19 * low-bits area ends
20 *
21 * 2. a 64-word TAGS AREA holding one two-bit tag for each word in
22 * the original (1024-word) page, packed 16 per word
23 *
24 * 3. a variable-sized FULL WORDS AREA (always word aligned and an
25 * integral number of words) holding full-word patterns that
26 * were not in the dictionary when encoded (i.e., dictionary misses)
27 *
28 * 4. a variable-sized QUEUE POSITIONS AREA (always word aligned and
29 * an integral number of words) holding four-bit queue positions,
30 * packed eight per word.
31 *
32 * 5. a variable-sized LOW BITS AREA (always word aligned and an
33 * integral number of words) holding ten-bit low-bit patterns
34 * (from partial matches), packed three per word.
35 */
36
37
38#ifdef __cplusplus
39extern "C" {
40#endif
41
42/* ============================================================ */
43/* Included files */
44
45//#include <stdio.h>
46//#include <unistd.h>
47//#include <math.h>
48//#include <strings.h>
49
50typedef unsigned long WK_word;
51
52/* at the moment we have dependencies on the page size. That should
53 * be changed to work for any power-of-two size that's at least 16
54 * words, or something like that
55 */
56
57#define PAGE_SIZE_IN_WORDS 1024
58#define PAGE_SIZE_IN_BYTES 4096
59
60#define DICTIONARY_SIZE 16
61
62/*
63 * macros defining the basic layout of stuff in a page
64 */
65#define HEADER_SIZE_IN_WORDS 4
66#define TAGS_AREA_OFFSET 4
67#define TAGS_AREA_SIZE 64
68
69/* the next few are used during compression to write the header */
70#define SET_QPOS_AREA_START(compr_dest_buf,qpos_start_addr) \
71 (compr_dest_buf[1] = qpos_start_addr - compr_dest_buf)
72#define SET_LOW_BITS_AREA_START(compr_dest_buf,lb_start_addr) \
73 (compr_dest_buf[2] = lb_start_addr - compr_dest_buf)
74#define SET_LOW_BITS_AREA_END(compr_dest_buf,lb_end_addr) \
75 (compr_dest_buf[3] = lb_end_addr - compr_dest_buf)
76
77/* the next few are only use during decompression to read the header */
78#define TAGS_AREA_START(decomp_src_buf) \
79 (decomp_src_buf + TAGS_AREA_OFFSET)
80#define TAGS_AREA_END(decomp_src_buf) \
81 (TAGS_AREA_START(decomp_src_buf) + TAGS_AREA_SIZE)
82#define FULL_WORD_AREA_START(the_buf) TAGS_AREA_END(the_buf)
83#define QPOS_AREA_START(decomp_src_buf) \
84 (decomp_src_buf + decomp_src_buf[1])
85#define LOW_BITS_AREA_START(decomp_src_buf) \
86 (decomp_src_buf + (decomp_src_buf[2]))
87#define QPOS_AREA_END(the_buf) LOW_BITS_AREA_START(the_buf)
88#define LOW_BITS_AREA_END(decomp_src_buf) \
89 (decomp_src_buf + (decomp_src_buf[3]))
90
91/* ============================================================ */
92/* Types and structures */
93
94/* A structure to store each element of the dictionary. */
95typedef WK_word DictionaryElement;
96
97/* ============================================================ */
98/* Misc constants */
99
100#define BITS_PER_WORD 32
101#define BYTES_PER_WORD 4
102#define NUM_LOW_BITS 10
103#define LOW_BITS_MASK 0x3FF
104#define ALL_ONES_MASK 0xFFFFFFFF
105
106#define TWO_BITS_PACKING_MASK 0x03030303
107#define FOUR_BITS_PACKING_MASK 0x0F0F0F0F
108#define TEN_LOW_BITS_MASK 0x000003FF
109#define TWENTY_TWO_HIGH_BITS_MASK 0xFFFFFC00
110
111/* Tag values. NOTE THAT CODE MAY DEPEND ON THE NUMBERS USED.
112 * Check for conditionals doing arithmetic on these things
113 * before changing them
114 */
115#define ZERO_TAG 0x0
116#define PARTIAL_TAG 0x1
117#define MISS_TAG 0x2
118#define EXACT_TAG 0x3
119
120#define BITS_PER_BYTE 8
121
122/* ============================================================ */
123/* Global macros */
124
125/* Shift out the low bits of a pattern to give the high bits pattern.
126 The stripped patterns are used for initial tests of partial
127 matches. */
128#define HIGH_BITS(word_pattern) (word_pattern >> NUM_LOW_BITS)
129
130/* String the high bits of a pattern so the low order bits can
131 be included in an encoding of a partial match. */
132#define LOW_BITS(word_pattern) (word_pattern & LOW_BITS_MASK)
133
134#if defined DEBUG_WK
135#define DEBUG_PRINT_1(string) printf (string)
136#define DEBUG_PRINT_2(string,value) printf(string, value)
137#else
138#define DEBUG_PRINT_1(string)
139#define DEBUG_PRINT_2(string, value)
140#endif
141
142/* Set up the dictionary before performing compression or
143 decompression. Each element is loaded with some value, the
144 high-bits version of that value, and a next pointer. */
145#define PRELOAD_DICTIONARY { \
146 dictionary[0] = 1; \
147 dictionary[1] = 1; \
148 dictionary[2] = 1; \
149 dictionary[3] = 1; \
150 dictionary[4] = 1; \
151 dictionary[5] = 1; \
152 dictionary[6] = 1; \
153 dictionary[7] = 1; \
154 dictionary[8] = 1; \
155 dictionary[9] = 1; \
156 dictionary[10] = 1; \
157 dictionary[11] = 1; \
158 dictionary[12] = 1; \
159 dictionary[13] = 1; \
160 dictionary[14] = 1; \
161 dictionary[15] = 1; \
162}
163
164/* these are the constants for the hash function lookup table.
165 * Only zero maps to zero. The rest of the tabale is the result
166 * of appending 17 randomizations of the multiples of 4 from
167 * 4 to 56. Generated by a Scheme script in hash.scm.
168 */
169#define HASH_LOOKUP_TABLE_CONTENTS { \
170 0, 52, 8, 56, 16, 12, 28, 20, 4, 36, 48, 24, 44, 40, 32, 60, \
171 8, 12, 28, 20, 4, 60, 16, 36, 24, 48, 44, 32, 52, 56, 40, 12, \
172 8, 48, 16, 52, 60, 28, 56, 32, 20, 24, 36, 40, 44, 4, 8, 40, \
173 60, 32, 20, 44, 4, 36, 52, 24, 16, 56, 48, 12, 28, 16, 8, 40, \
174 36, 28, 32, 12, 4, 44, 52, 20, 24, 48, 60, 56, 40, 48, 8, 32, \
175 28, 36, 4, 44, 20, 56, 60, 24, 52, 16, 12, 12, 4, 48, 20, 8, \
176 52, 16, 60, 24, 36, 44, 28, 56, 40, 32, 36, 20, 24, 60, 40, 44, \
177 52, 16, 32, 4, 48, 8, 28, 56, 12, 28, 32, 40, 52, 36, 16, 20, \
178 48, 8, 4, 60, 24, 56, 44, 12, 8, 36, 24, 28, 16, 60, 20, 56, \
179 32, 40, 48, 12, 4, 44, 52, 44, 40, 12, 56, 8, 36, 24, 60, 28, \
180 48, 4, 32, 20, 16, 52, 60, 12, 24, 36, 8, 4, 16, 56, 48, 44, \
181 40, 52, 32, 20, 28, 32, 12, 36, 28, 24, 56, 40, 16, 52, 44, 4, \
182 20, 60, 8, 48, 48, 52, 12, 20, 32, 44, 36, 28, 4, 40, 24, 8, \
183 56, 60, 16, 36, 32, 8, 40, 4, 52, 24, 44, 20, 12, 28, 48, 56, \
184 16, 60, 4, 52, 60, 48, 20, 16, 56, 44, 24, 8, 40, 12, 32, 28, \
185 36, 24, 32, 12, 4, 20, 16, 60, 36, 28, 8, 52, 40, 48, 44, 56 \
186}
187
188#define HASH_TO_DICT_BYTE_OFFSET(pattern) \
189 (hashLookupTable[((pattern) >> 10) & 0xFF])
190
191extern const char hashLookupTable[];
192
193/* EMIT... macros emit bytes or words into the intermediate arrays
194 */
195
196#define EMIT_BYTE(fill_ptr, byte_value) {*fill_ptr = byte_value; fill_ptr++;}
197#define EMIT_WORD(fill_ptr,word_value) {*fill_ptr = word_value; fill_ptr++;}
198
199/* RECORD... macros record the results of modeling in the intermediate
200 * arrays
201 */
202
203#define RECORD_ZERO { EMIT_BYTE(next_tag,ZERO_TAG); }
204
205#define RECORD_EXACT(queue_posn) EMIT_BYTE(next_tag,EXACT_TAG); \
206 EMIT_BYTE(next_qp,(queue_posn));
207
208#define RECORD_PARTIAL(queue_posn,low_bits_pattern) { \
209 EMIT_BYTE(next_tag,PARTIAL_TAG); \
210 EMIT_BYTE(next_qp,(queue_posn)); \
211 EMIT_WORD(next_low_bits,(low_bits_pattern)) }
212
213#define RECORD_MISS(word_pattern) EMIT_BYTE(next_tag,MISS_TAG); \
214 EMIT_WORD(next_full_patt,(word_pattern));
215
216void
217WKdm_decompress (WK_word* src_buf,
218 WK_word* dest_buf,
219 unsigned int words);
220unsigned int
221WKdm_compress (WK_word* src_buf,
222 WK_word* dest_buf,
223 unsigned int num_input_words);
224
225#ifdef __cplusplus
226} /* extern "C" */
227#endif
228

Archive Download this file

Revision: 2871