Root/
Source at commit 1458 created 12 years 10 months ago. By azimutz, Sync these with trunk (r1457). | |
---|---|
1 | /* direct-mapped partial matching compressor with simple 22/10 split␊ |
2 | *␊ |
3 | * Compresses buffers using a dictionary based match and partial match␊ |
4 | * (high bits only or full match) scheme.␊ |
5 | *␊ |
6 | * Paul Wilson -- wilson@cs.utexas.edu␊ |
7 | * Scott F. Kaplan -- sfkaplan@cs.utexas.edu␊ |
8 | * September 1997␊ |
9 | */␊ |
10 | ␊ |
11 | /* compressed output format, in memory order␊ |
12 | * 1. a four-word HEADER containing four one-word values:␊ |
13 | * i. a one-word code saying what algorithm compressed the data␊ |
14 | * ii. an integer WORD offset into the page saying␊ |
15 | * where the queue position area starts␊ |
16 | * iii. an integer WORD offset into the page saying where␊ |
17 | * the low-bits area starts␊ |
18 | * iv. an integer WORD offset into the page saying where the␊ |
19 | * low-bits area ends␊ |
20 | *␊ |
21 | * 2. a 64-word TAGS AREA holding one two-bit tag for each word in ␊ |
22 | * the original (1024-word) page, packed 16 per word␊ |
23 | *␊ |
24 | * 3. a variable-sized FULL WORDS AREA (always word aligned and an␊ |
25 | * integral number of words) holding full-word patterns that␊ |
26 | * were not in the dictionary when encoded (i.e., dictionary misses)␊ |
27 | *␊ |
28 | * 4. a variable-sized QUEUE POSITIONS AREA (always word aligned and␊ |
29 | * an integral number of words) holding four-bit queue positions,␊ |
30 | * packed eight per word.␊ |
31 | *␊ |
32 | * 5. a variable-sized LOW BITS AREA (always word aligned and an␊ |
33 | * integral number of words) holding ten-bit low-bit patterns␊ |
34 | * (from partial matches), packed three per word. ␊ |
35 | */␊ |
36 | ␊ |
37 | ␊ |
38 | #ifdef __cplusplus␊ |
39 | extern "C" {␊ |
40 | #endif␊ |
41 | ␊ |
42 | /* ============================================================ */␊ |
43 | /* Included files */␊ |
44 | ␊ |
45 | //#include <stdio.h>␊ |
46 | //#include <unistd.h>␊ |
47 | //#include <math.h>␊ |
48 | //#include <strings.h>␊ |
49 | ␊ |
50 | typedef unsigned long WK_word;␊ |
51 | ␊ |
52 | /* at the moment we have dependencies on the page size. That should␊ |
53 | * be changed to work for any power-of-two size that's at least 16␊ |
54 | * words, or something like that␊ |
55 | */␊ |
56 | ␊ |
57 | #define PAGE_SIZE_IN_WORDS 1024␊ |
58 | #define PAGE_SIZE_IN_BYTES 4096␊ |
59 | ␊ |
60 | #define DICTIONARY_SIZE 16␊ |
61 | ␊ |
62 | /*␊ |
63 | * macros defining the basic layout of stuff in a page␊ |
64 | */␊ |
65 | #define HEADER_SIZE_IN_WORDS 4␊ |
66 | #define TAGS_AREA_OFFSET 4␊ |
67 | #define TAGS_AREA_SIZE 64␊ |
68 | ␊ |
69 | /* the next few are used during compression to write the header */␊ |
70 | #define SET_QPOS_AREA_START(compr_dest_buf,qpos_start_addr) \␊ |
71 | (compr_dest_buf[1] = qpos_start_addr - compr_dest_buf)␊ |
72 | #define SET_LOW_BITS_AREA_START(compr_dest_buf,lb_start_addr) \␊ |
73 | (compr_dest_buf[2] = lb_start_addr - compr_dest_buf)␊ |
74 | #define SET_LOW_BITS_AREA_END(compr_dest_buf,lb_end_addr) \␊ |
75 | (compr_dest_buf[3] = lb_end_addr - compr_dest_buf)␊ |
76 | ␊ |
77 | /* the next few are only use during decompression to read the header */␊ |
78 | #define TAGS_AREA_START(decomp_src_buf) \␊ |
79 | (decomp_src_buf + TAGS_AREA_OFFSET)␊ |
80 | #define TAGS_AREA_END(decomp_src_buf) \␊ |
81 | (TAGS_AREA_START(decomp_src_buf) + TAGS_AREA_SIZE)␊ |
82 | #define FULL_WORD_AREA_START(the_buf) TAGS_AREA_END(the_buf)␊ |
83 | #define QPOS_AREA_START(decomp_src_buf) \␊ |
84 | (decomp_src_buf + decomp_src_buf[1]) ␊ |
85 | #define LOW_BITS_AREA_START(decomp_src_buf) \␊ |
86 | (decomp_src_buf + (decomp_src_buf[2]))␊ |
87 | #define QPOS_AREA_END(the_buf) LOW_BITS_AREA_START(the_buf)␊ |
88 | #define LOW_BITS_AREA_END(decomp_src_buf) \␊ |
89 | (decomp_src_buf + (decomp_src_buf[3]))␊ |
90 | ␊ |
91 | /* ============================================================ */␊ |
92 | /* Types and structures */␊ |
93 | ␊ |
94 | /* A structure to store each element of the dictionary. */␊ |
95 | typedef WK_word DictionaryElement;␊ |
96 | ␊ |
97 | /* ============================================================ */␊ |
98 | /* Misc constants */␊ |
99 | ␊ |
100 | #define BITS_PER_WORD 32␊ |
101 | #define BYTES_PER_WORD 4␊ |
102 | #define NUM_LOW_BITS 10␊ |
103 | #define LOW_BITS_MASK 0x3FF␊ |
104 | #define ALL_ONES_MASK 0xFFFFFFFF␊ |
105 | ␊ |
106 | #define TWO_BITS_PACKING_MASK 0x03030303␊ |
107 | #define FOUR_BITS_PACKING_MASK 0x0F0F0F0F␊ |
108 | #define TEN_LOW_BITS_MASK 0x000003FF␊ |
109 | #define TWENTY_TWO_HIGH_BITS_MASK 0xFFFFFC00␊ |
110 | ␊ |
111 | /* Tag values. NOTE THAT CODE MAY DEPEND ON THE NUMBERS USED.␊ |
112 | * Check for conditionals doing arithmetic on these things␊ |
113 | * before changing them␊ |
114 | */␊ |
115 | #define ZERO_TAG 0x0␊ |
116 | #define PARTIAL_TAG 0x1␊ |
117 | #define MISS_TAG 0x2␊ |
118 | #define EXACT_TAG 0x3␊ |
119 | ␊ |
120 | #define BITS_PER_BYTE 8␊ |
121 | ␊ |
122 | /* ============================================================ */␊ |
123 | /* Global macros */␊ |
124 | ␊ |
125 | /* Shift out the low bits of a pattern to give the high bits pattern.␊ |
126 | The stripped patterns are used for initial tests of partial␊ |
127 | matches. */␊ |
128 | #define HIGH_BITS(word_pattern) (word_pattern >> NUM_LOW_BITS)␊ |
129 | ␊ |
130 | /* String the high bits of a pattern so the low order bits can␊ |
131 | be included in an encoding of a partial match. */␊ |
132 | #define LOW_BITS(word_pattern) (word_pattern & LOW_BITS_MASK)␊ |
133 | ␊ |
134 | #if defined DEBUG_WK␊ |
135 | #define DEBUG_PRINT_1(string) printf (string)␊ |
136 | #define DEBUG_PRINT_2(string,value) printf(string, value)␊ |
137 | #else␊ |
138 | #define DEBUG_PRINT_1(string)␊ |
139 | #define DEBUG_PRINT_2(string, value)␊ |
140 | #endif␊ |
141 | ␊ |
142 | /* Set up the dictionary before performing compression or␊ |
143 | decompression. Each element is loaded with some value, the␊ |
144 | high-bits version of that value, and a next pointer. */␊ |
145 | #define PRELOAD_DICTIONARY { \␊ |
146 | dictionary[0] = 1; \␊ |
147 | dictionary[1] = 1; \␊ |
148 | dictionary[2] = 1; \␊ |
149 | dictionary[3] = 1; \␊ |
150 | dictionary[4] = 1; \␊ |
151 | dictionary[5] = 1; \␊ |
152 | dictionary[6] = 1; \␊ |
153 | dictionary[7] = 1; \␊ |
154 | dictionary[8] = 1; \␊ |
155 | dictionary[9] = 1; \␊ |
156 | dictionary[10] = 1; \␊ |
157 | dictionary[11] = 1; \␊ |
158 | dictionary[12] = 1; \␊ |
159 | dictionary[13] = 1; \␊ |
160 | dictionary[14] = 1; \␊ |
161 | dictionary[15] = 1; \␊ |
162 | }␊ |
163 | ␊ |
164 | /* these are the constants for the hash function lookup table.␊ |
165 | * Only zero maps to zero. The rest of the tabale is the result␊ |
166 | * of appending 17 randomizations of the multiples of 4 from␊ |
167 | * 4 to 56. Generated by a Scheme script in hash.scm. ␊ |
168 | */␊ |
169 | #define HASH_LOOKUP_TABLE_CONTENTS { \␊ |
170 | 0, 52, 8, 56, 16, 12, 28, 20, 4, 36, 48, 24, 44, 40, 32, 60, \␊ |
171 | 8, 12, 28, 20, 4, 60, 16, 36, 24, 48, 44, 32, 52, 56, 40, 12, \␊ |
172 | 8, 48, 16, 52, 60, 28, 56, 32, 20, 24, 36, 40, 44, 4, 8, 40, \␊ |
173 | 60, 32, 20, 44, 4, 36, 52, 24, 16, 56, 48, 12, 28, 16, 8, 40, \␊ |
174 | 36, 28, 32, 12, 4, 44, 52, 20, 24, 48, 60, 56, 40, 48, 8, 32, \␊ |
175 | 28, 36, 4, 44, 20, 56, 60, 24, 52, 16, 12, 12, 4, 48, 20, 8, \␊ |
176 | 52, 16, 60, 24, 36, 44, 28, 56, 40, 32, 36, 20, 24, 60, 40, 44, \␊ |
177 | 52, 16, 32, 4, 48, 8, 28, 56, 12, 28, 32, 40, 52, 36, 16, 20, \␊ |
178 | 48, 8, 4, 60, 24, 56, 44, 12, 8, 36, 24, 28, 16, 60, 20, 56, \␊ |
179 | 32, 40, 48, 12, 4, 44, 52, 44, 40, 12, 56, 8, 36, 24, 60, 28, \␊ |
180 | 48, 4, 32, 20, 16, 52, 60, 12, 24, 36, 8, 4, 16, 56, 48, 44, \␊ |
181 | 40, 52, 32, 20, 28, 32, 12, 36, 28, 24, 56, 40, 16, 52, 44, 4, \␊ |
182 | 20, 60, 8, 48, 48, 52, 12, 20, 32, 44, 36, 28, 4, 40, 24, 8, \␊ |
183 | 56, 60, 16, 36, 32, 8, 40, 4, 52, 24, 44, 20, 12, 28, 48, 56, \␊ |
184 | 16, 60, 4, 52, 60, 48, 20, 16, 56, 44, 24, 8, 40, 12, 32, 28, \␊ |
185 | 36, 24, 32, 12, 4, 20, 16, 60, 36, 28, 8, 52, 40, 48, 44, 56 \␊ |
186 | }␊ |
187 | ␊ |
188 | #define HASH_TO_DICT_BYTE_OFFSET(pattern) \␊ |
189 | (hashLookupTable[((pattern) >> 10) & 0xFF])␊ |
190 | ␊ |
191 | extern const char hashLookupTable[];␊ |
192 | ␊ |
193 | /* EMIT... macros emit bytes or words into the intermediate arrays␊ |
194 | */␊ |
195 | ␊ |
196 | #define EMIT_BYTE(fill_ptr, byte_value) {*fill_ptr = byte_value; fill_ptr++;}␊ |
197 | #define EMIT_WORD(fill_ptr,word_value) {*fill_ptr = word_value; fill_ptr++;}␊ |
198 | ␊ |
199 | /* RECORD... macros record the results of modeling in the intermediate␊ |
200 | * arrays␊ |
201 | */␊ |
202 | ␊ |
203 | #define RECORD_ZERO { EMIT_BYTE(next_tag,ZERO_TAG); }␊ |
204 | ␊ |
205 | #define RECORD_EXACT(queue_posn) EMIT_BYTE(next_tag,EXACT_TAG); \␊ |
206 | EMIT_BYTE(next_qp,(queue_posn)); ␊ |
207 | ␊ |
208 | #define RECORD_PARTIAL(queue_posn,low_bits_pattern) { \␊ |
209 | EMIT_BYTE(next_tag,PARTIAL_TAG); \␊ |
210 | EMIT_BYTE(next_qp,(queue_posn)); \␊ |
211 | EMIT_WORD(next_low_bits,(low_bits_pattern)) }␊ |
212 | ␊ |
213 | #define RECORD_MISS(word_pattern) EMIT_BYTE(next_tag,MISS_TAG); \␊ |
214 | EMIT_WORD(next_full_patt,(word_pattern)); ␊ |
215 | ␉␉␉␉ ␊ |
216 | void␊ |
217 | WKdm_decompress (WK_word* src_buf,␊ |
218 | ␉␉ WK_word* dest_buf,␊ |
219 | ␉␉ unsigned int words);␊ |
220 | unsigned int␊ |
221 | WKdm_compress (WK_word* src_buf,␊ |
222 | WK_word* dest_buf,␊ |
223 | ␉ unsigned int num_input_words);␊ |
224 | ␊ |
225 | #ifdef __cplusplus␊ |
226 | } /* extern "C" */␊ |
227 | #endif␊ |
228 |