Root/
Source at commit 1137 created 13 years 7 days ago. By meklort, Applies patch in issue #62 to trunk - Adds skip_partno support for multiboot bootloaders. | |
---|---|
1 | /* inffast.c -- fast decoding␊ |
2 | * Copyright (C) 1995-2008, 2010 Mark Adler␊ |
3 | * For conditions of distribution and use, see copyright notice in zlib.h␊ |
4 | */␊ |
5 | ␊ |
6 | #include "zutil.h"␊ |
7 | #include "inftrees.h"␊ |
8 | #include "inflate.h"␊ |
9 | #include "inffast.h"␊ |
10 | ␊ |
11 | #ifndef ASMINF␊ |
12 | ␊ |
13 | /* Allow machine dependent optimization for post-increment or pre-increment.␊ |
14 | Based on testing to date,␊ |
15 | Pre-increment preferred for:␊ |
16 | - PowerPC G3 (Adler)␊ |
17 | - MIPS R5000 (Randers-Pehrson)␊ |
18 | Post-increment preferred for:␊ |
19 | - none␊ |
20 | No measurable difference:␊ |
21 | - Pentium III (Anderson)␊ |
22 | - M68060 (Nikl)␊ |
23 | */␊ |
24 | #ifdef POSTINC␊ |
25 | # define OFF 0␊ |
26 | # define PUP(a) *(a)++␊ |
27 | #else␊ |
28 | # define OFF 1␊ |
29 | # define PUP(a) *++(a)␊ |
30 | #endif␊ |
31 | ␊ |
32 | /*␊ |
33 | Decode literal, length, and distance codes and write out the resulting␊ |
34 | literal and match bytes until either not enough input or output is␊ |
35 | available, an end-of-block is encountered, or a data error is encountered.␊ |
36 | When large enough input and output buffers are supplied to inflate(), for␊ |
37 | example, a 16K input buffer and a 64K output buffer, more than 95% of the␊ |
38 | inflate execution time is spent in this routine.␊ |
39 | ␊ |
40 | Entry assumptions:␊ |
41 | ␊ |
42 | state->mode == LEN␊ |
43 | strm->avail_in >= 6␊ |
44 | strm->avail_out >= 258␊ |
45 | start >= strm->avail_out␊ |
46 | state->bits < 8␊ |
47 | ␊ |
48 | On return, state->mode is one of:␊ |
49 | ␊ |
50 | LEN -- ran out of enough output space or enough available input␊ |
51 | TYPE -- reached end of block code, inflate() to interpret next block␊ |
52 | BAD -- error in block data␊ |
53 | ␊ |
54 | Notes:␊ |
55 | ␊ |
56 | - The maximum input bits used by a length/distance pair is 15 bits for the␊ |
57 | length code, 5 bits for the length extra, 15 bits for the distance code,␊ |
58 | and 13 bits for the distance extra. This totals 48 bits, or six bytes.␊ |
59 | Therefore if strm->avail_in >= 6, then there is enough input to avoid␊ |
60 | checking for available input while decoding.␊ |
61 | ␊ |
62 | - The maximum bytes that a single length/distance pair can output is 258␊ |
63 | bytes, which is the maximum length that can be coded. inflate_fast()␊ |
64 | requires strm->avail_out >= 258 for each loop to avoid checking for␊ |
65 | output space.␊ |
66 | */␊ |
67 | void ZLIB_INTERNAL inflate_fast(strm, start)␊ |
68 | z_streamp strm;␊ |
69 | unsigned start; /* inflate()'s starting value for strm->avail_out */␊ |
70 | {␊ |
71 | struct inflate_state FAR *state;␊ |
72 | unsigned char FAR *in; /* local strm->next_in */␊ |
73 | unsigned char FAR *last; /* while in < last, enough input available */␊ |
74 | unsigned char FAR *out; /* local strm->next_out */␊ |
75 | unsigned char FAR *beg; /* inflate()'s initial strm->next_out */␊ |
76 | unsigned char FAR *end; /* while out < end, enough space available */␊ |
77 | #ifdef INFLATE_STRICT␊ |
78 | unsigned dmax; /* maximum distance from zlib header */␊ |
79 | #endif␊ |
80 | unsigned wsize; /* window size or zero if not using window */␊ |
81 | unsigned whave; /* valid bytes in the window */␊ |
82 | unsigned wnext; /* window write index */␊ |
83 | unsigned char FAR *window; /* allocated sliding window, if wsize != 0 */␊ |
84 | unsigned long hold; /* local strm->hold */␊ |
85 | unsigned bits; /* local strm->bits */␊ |
86 | code const FAR *lcode; /* local strm->lencode */␊ |
87 | code const FAR *dcode; /* local strm->distcode */␊ |
88 | unsigned lmask; /* mask for first level of length codes */␊ |
89 | unsigned dmask; /* mask for first level of distance codes */␊ |
90 | code here; /* retrieved table entry */␊ |
91 | unsigned op; /* code bits, operation, extra bits, or */␊ |
92 | /* window position, window bytes to copy */␊ |
93 | unsigned len; /* match length, unused bytes */␊ |
94 | unsigned dist; /* match distance */␊ |
95 | unsigned char FAR *from; /* where to copy match from */␊ |
96 | ␊ |
97 | /* copy state to local variables */␊ |
98 | state = (struct inflate_state FAR *)strm->state;␊ |
99 | in = strm->next_in - OFF;␊ |
100 | last = in + (strm->avail_in - 5);␊ |
101 | out = strm->next_out - OFF;␊ |
102 | beg = out - (start - strm->avail_out);␊ |
103 | end = out + (strm->avail_out - 257);␊ |
104 | #ifdef INFLATE_STRICT␊ |
105 | dmax = state->dmax;␊ |
106 | #endif␊ |
107 | wsize = state->wsize;␊ |
108 | whave = state->whave;␊ |
109 | wnext = state->wnext;␊ |
110 | window = state->window;␊ |
111 | hold = state->hold;␊ |
112 | bits = state->bits;␊ |
113 | lcode = state->lencode;␊ |
114 | dcode = state->distcode;␊ |
115 | lmask = (1U << state->lenbits) - 1;␊ |
116 | dmask = (1U << state->distbits) - 1;␊ |
117 | ␊ |
118 | /* decode literals and length/distances until end-of-block or not enough␊ |
119 | input data or output space */␊ |
120 | do {␊ |
121 | if (bits < 15) {␊ |
122 | hold += (unsigned long)(PUP(in)) << bits;␊ |
123 | bits += 8;␊ |
124 | hold += (unsigned long)(PUP(in)) << bits;␊ |
125 | bits += 8;␊ |
126 | }␊ |
127 | here = lcode[hold & lmask];␊ |
128 | dolen:␊ |
129 | op = (unsigned)(here.bits);␊ |
130 | hold >>= op;␊ |
131 | bits -= op;␊ |
132 | op = (unsigned)(here.op);␊ |
133 | if (op == 0) { /* literal */␊ |
134 | Tracevv((stderr, here.val >= 0x20 && here.val < 0x7f ?␊ |
135 | "inflate: literal '%c'\n" :␊ |
136 | "inflate: literal 0x%02x\n", here.val));␊ |
137 | PUP(out) = (unsigned char)(here.val);␊ |
138 | }␊ |
139 | else if (op & 16) { /* length base */␊ |
140 | len = (unsigned)(here.val);␊ |
141 | op &= 15; /* number of extra bits */␊ |
142 | if (op) {␊ |
143 | if (bits < op) {␊ |
144 | hold += (unsigned long)(PUP(in)) << bits;␊ |
145 | bits += 8;␊ |
146 | }␊ |
147 | len += (unsigned)hold & ((1U << op) - 1);␊ |
148 | hold >>= op;␊ |
149 | bits -= op;␊ |
150 | }␊ |
151 | Tracevv((stderr, "inflate: length %u\n", len));␊ |
152 | if (bits < 15) {␊ |
153 | hold += (unsigned long)(PUP(in)) << bits;␊ |
154 | bits += 8;␊ |
155 | hold += (unsigned long)(PUP(in)) << bits;␊ |
156 | bits += 8;␊ |
157 | }␊ |
158 | here = dcode[hold & dmask];␊ |
159 | dodist:␊ |
160 | op = (unsigned)(here.bits);␊ |
161 | hold >>= op;␊ |
162 | bits -= op;␊ |
163 | op = (unsigned)(here.op);␊ |
164 | if (op & 16) { /* distance base */␊ |
165 | dist = (unsigned)(here.val);␊ |
166 | op &= 15; /* number of extra bits */␊ |
167 | if (bits < op) {␊ |
168 | hold += (unsigned long)(PUP(in)) << bits;␊ |
169 | bits += 8;␊ |
170 | if (bits < op) {␊ |
171 | hold += (unsigned long)(PUP(in)) << bits;␊ |
172 | bits += 8;␊ |
173 | }␊ |
174 | }␊ |
175 | dist += (unsigned)hold & ((1U << op) - 1);␊ |
176 | #ifdef INFLATE_STRICT␊ |
177 | if (dist > dmax) {␊ |
178 | strm->msg = (char *)"invalid distance too far back";␊ |
179 | state->mode = BAD;␊ |
180 | break;␊ |
181 | }␊ |
182 | #endif␊ |
183 | hold >>= op;␊ |
184 | bits -= op;␊ |
185 | Tracevv((stderr, "inflate: distance %u\n", dist));␊ |
186 | op = (unsigned)(out - beg); /* max distance in output */␊ |
187 | if (dist > op) { /* see if copy from window */␊ |
188 | op = dist - op; /* distance back in window */␊ |
189 | if (op > whave) {␊ |
190 | if (state->sane) {␊ |
191 | strm->msg =␊ |
192 | (char *)"invalid distance too far back";␊ |
193 | state->mode = BAD;␊ |
194 | break;␊ |
195 | }␊ |
196 | #ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR␊ |
197 | if (len <= op - whave) {␊ |
198 | do {␊ |
199 | PUP(out) = 0;␊ |
200 | } while (--len);␊ |
201 | continue;␊ |
202 | }␊ |
203 | len -= op - whave;␊ |
204 | do {␊ |
205 | PUP(out) = 0;␊ |
206 | } while (--op > whave);␊ |
207 | if (op == 0) {␊ |
208 | from = out - dist;␊ |
209 | do {␊ |
210 | PUP(out) = PUP(from);␊ |
211 | } while (--len);␊ |
212 | continue;␊ |
213 | }␊ |
214 | #endif␊ |
215 | }␊ |
216 | from = window - OFF;␊ |
217 | if (wnext == 0) { /* very common case */␊ |
218 | from += wsize - op;␊ |
219 | if (op < len) { /* some from window */␊ |
220 | len -= op;␊ |
221 | do {␊ |
222 | PUP(out) = PUP(from);␊ |
223 | } while (--op);␊ |
224 | from = out - dist; /* rest from output */␊ |
225 | }␊ |
226 | }␊ |
227 | else if (wnext < op) { /* wrap around window */␊ |
228 | from += wsize + wnext - op;␊ |
229 | op -= wnext;␊ |
230 | if (op < len) { /* some from end of window */␊ |
231 | len -= op;␊ |
232 | do {␊ |
233 | PUP(out) = PUP(from);␊ |
234 | } while (--op);␊ |
235 | from = window - OFF;␊ |
236 | if (wnext < len) { /* some from start of window */␊ |
237 | op = wnext;␊ |
238 | len -= op;␊ |
239 | do {␊ |
240 | PUP(out) = PUP(from);␊ |
241 | } while (--op);␊ |
242 | from = out - dist; /* rest from output */␊ |
243 | }␊ |
244 | }␊ |
245 | }␊ |
246 | else { /* contiguous in window */␊ |
247 | from += wnext - op;␊ |
248 | if (op < len) { /* some from window */␊ |
249 | len -= op;␊ |
250 | do {␊ |
251 | PUP(out) = PUP(from);␊ |
252 | } while (--op);␊ |
253 | from = out - dist; /* rest from output */␊ |
254 | }␊ |
255 | }␊ |
256 | while (len > 2) {␊ |
257 | PUP(out) = PUP(from);␊ |
258 | PUP(out) = PUP(from);␊ |
259 | PUP(out) = PUP(from);␊ |
260 | len -= 3;␊ |
261 | }␊ |
262 | if (len) {␊ |
263 | PUP(out) = PUP(from);␊ |
264 | if (len > 1)␊ |
265 | PUP(out) = PUP(from);␊ |
266 | }␊ |
267 | }␊ |
268 | else {␊ |
269 | from = out - dist; /* copy direct from output */␊ |
270 | do { /* minimum length is three */␊ |
271 | PUP(out) = PUP(from);␊ |
272 | PUP(out) = PUP(from);␊ |
273 | PUP(out) = PUP(from);␊ |
274 | len -= 3;␊ |
275 | } while (len > 2);␊ |
276 | if (len) {␊ |
277 | PUP(out) = PUP(from);␊ |
278 | if (len > 1)␊ |
279 | PUP(out) = PUP(from);␊ |
280 | }␊ |
281 | }␊ |
282 | }␊ |
283 | else if ((op & 64) == 0) { /* 2nd level distance code */␊ |
284 | here = dcode[here.val + (hold & ((1U << op) - 1))];␊ |
285 | goto dodist;␊ |
286 | }␊ |
287 | else {␊ |
288 | strm->msg = (char *)"invalid distance code";␊ |
289 | state->mode = BAD;␊ |
290 | break;␊ |
291 | }␊ |
292 | }␊ |
293 | else if ((op & 64) == 0) { /* 2nd level length code */␊ |
294 | here = lcode[here.val + (hold & ((1U << op) - 1))];␊ |
295 | goto dolen;␊ |
296 | }␊ |
297 | else if (op & 32) { /* end-of-block */␊ |
298 | Tracevv((stderr, "inflate: end of block\n"));␊ |
299 | state->mode = TYPE;␊ |
300 | break;␊ |
301 | }␊ |
302 | else {␊ |
303 | strm->msg = (char *)"invalid literal/length code";␊ |
304 | state->mode = BAD;␊ |
305 | break;␊ |
306 | }␊ |
307 | } while (in < last && out < end);␊ |
308 | ␊ |
309 | /* return unused bytes (on entry, bits < 8, so in won't go too far back) */␊ |
310 | len = bits >> 3;␊ |
311 | in -= len;␊ |
312 | bits -= len << 3;␊ |
313 | hold &= (1U << bits) - 1;␊ |
314 | ␊ |
315 | /* update state and return */␊ |
316 | strm->next_in = in + OFF;␊ |
317 | strm->next_out = out + OFF;␊ |
318 | strm->avail_in = (unsigned)(in < last ? 5 + (last - in) : 5 - (in - last));␊ |
319 | strm->avail_out = (unsigned)(out < end ?␊ |
320 | 257 + (end - out) : 257 - (out - end));␊ |
321 | state->hold = hold;␊ |
322 | state->bits = bits;␊ |
323 | return;␊ |
324 | }␊ |
325 | ␊ |
326 | /*␊ |
327 | inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe):␊ |
328 | - Using bit fields for code structure␊ |
329 | - Different op definition to avoid & for extra bits (do & for table bits)␊ |
330 | - Three separate decoding do-loops for direct, window, and wnext == 0␊ |
331 | - Special case for distance > 1 copies to do overlapped load and store copy␊ |
332 | - Explicit branch predictions (based on measured branch probabilities)␊ |
333 | - Deferring match copy and interspersed it with decoding subsequent codes␊ |
334 | - Swapping literal/length else␊ |
335 | - Swapping window/direct else␊ |
336 | - Larger unrolled copy loops (three is about right)␊ |
337 | - Moving len -= 3 statement into middle of loop␊ |
338 | */␊ |
339 | ␊ |
340 | #endif /* !ASMINF */␊ |
341 |