1 | /*-␊ |
2 | * Copyright (c) 1992, 1993␊ |
3 | *␉The Regents of the University of California. All rights reserved.␊ |
4 | *␊ |
5 | * This software was developed by the Computer Systems Engineering group␊ |
6 | * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and␊ |
7 | * contributed to Berkeley.␊ |
8 | *␊ |
9 | * Redistribution and use in source and binary forms, with or without␊ |
10 | * modification, are permitted provided that the following conditions␊ |
11 | * are met:␊ |
12 | * 1. Redistributions of source code must retain the above copyright␊ |
13 | * notice, this list of conditions and the following disclaimer.␊ |
14 | * 2. Redistributions in binary form must reproduce the above copyright␊ |
15 | * notice, this list of conditions and the following disclaimer in the␊ |
16 | * documentation and/or other materials provided with the distribution.␊ |
17 | * 3. All advertising materials mentioning features or use of this software␊ |
18 | * must display the following acknowledgement:␊ |
19 | *␉This product includes software developed by the University of␊ |
20 | *␉California, Berkeley and its contributors.␊ |
21 | * 4. Neither the name of the University nor the names of its contributors␊ |
22 | * may be used to endorse or promote products derived from this software␊ |
23 | * without specific prior written permission.␊ |
24 | *␊ |
25 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND␊ |
26 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE␊ |
27 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE␊ |
28 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE␊ |
29 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL␊ |
30 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS␊ |
31 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)␊ |
32 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT␊ |
33 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY␊ |
34 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF␊ |
35 | * SUCH DAMAGE.␊ |
36 | *␊ |
37 | * $FreeBSD: src/sys/libkern/qdivrem.c,v 1.8 1999/08/28 00:46:35 peter Exp $␊ |
38 | */␊ |
39 | ␊ |
40 | /*␊ |
41 | * Multiprecision divide. This algorithm is from Knuth vol. 2 (2nd ed),␊ |
42 | * section 4.3.1, pp. 257--259.␊ |
43 | */␊ |
44 | ␊ |
45 | #include "quad.h"␊ |
46 | ␊ |
47 | #define␉B␉(1 << HALF_BITS)␉/* digit base */␊ |
48 | ␊ |
49 | /* Combine two `digits' to make a single two-digit number. */␊ |
50 | #define␉COMBINE(a, b) (((u_long)(a) << HALF_BITS) | (b))␊ |
51 | ␊ |
52 | /* select a type for digits in base B: use unsigned short if they fit */␊ |
53 | #if ULONG_MAX == 0xffffffff && USHRT_MAX >= 0xffff␊ |
54 | typedef unsigned short digit;␊ |
55 | #else␊ |
56 | typedef u_long digit;␊ |
57 | #endif␊ |
58 | ␊ |
59 | /*␊ |
60 | * Shift p[0]..p[len] left `sh' bits, ignoring any bits that␊ |
61 | * `fall out' the left (there never will be any such anyway).␊ |
62 | * We may assume len >= 0. NOTE THAT THIS WRITES len+1 DIGITS.␊ |
63 | */␊ |
64 | static void␊ |
65 | shl(register digit *p, register int len, register int sh)␊ |
66 | {␊ |
67 | ␉register int i;␊ |
68 | ␊ |
69 | ␉for (i = 0; i < len; i++)␊ |
70 | ␉␉p[i] = LHALF(p[i] << sh) | (p[i + 1] >> (HALF_BITS - sh));␊ |
71 | ␉p[i] = LHALF(p[i] << sh);␊ |
72 | }␊ |
73 | ␊ |
74 | /*␊ |
75 | * __qdivrem(u, v, rem) returns u/v and, optionally, sets *rem to u%v.␊ |
76 | *␊ |
77 | * We do this in base 2-sup-HALF_BITS, so that all intermediate products␊ |
78 | * fit within u_long. As a consequence, the maximum length dividend and␊ |
79 | * divisor are 4 `digits' in this base (they are shorter if they have␊ |
80 | * leading zeros).␊ |
81 | */␊ |
82 | u_quad_t␊ |
83 | __qdivrem(uq, vq, arq)␊ |
84 | u_quad_t uq, vq, *arq;␊ |
85 | {␊ |
86 | ␉union uu tmp;␊ |
87 | ␉digit *u, *v, *q;␊ |
88 | ␉register digit v1, v2;␊ |
89 | ␉u_long qhat, rhat, t;␊ |
90 | ␉int m, n, d, j, i;␊ |
91 | ␉digit uspace[5], vspace[5], qspace[5];␊ |
92 | ␊ |
93 | ␉/*␊ |
94 | ␉ * Take care of special cases: divide by zero, and u < v.␊ |
95 | ␉ */␊ |
96 | ␉if (vq == 0) {␊ |
97 | #if 0 // ugly divide by zero reported by CSA, not sure that it's critical, and not sure that the solution provided is good, but it seems to work␊ |
98 | ␊ |
99 | ␉␉/* divide by zero. */␊ |
100 | ␉␉static volatile const unsigned int zero = 0;␊ |
101 | ␊ |
102 | ␉␉tmp.ul[Hi] = tmp.ul[Lo] = 1 / zero; ␊ |
103 | #else␊ |
104 | ␊ |
105 | tmp.ul[Hi] = tmp.ul[Lo] = ~0; // http://mail-index.netbsd.org/tech-kern/2000/10/12/0001.html␊ |
106 | #endif␊ |
107 | ␊ |
108 | ␉␉if (arq)␊ |
109 | ␉␉␉*arq = uq;␊ |
110 | ␉␉return (tmp.q);␊ |
111 | ␉}␊ |
112 | ␉if (uq < vq) {␊ |
113 | ␉␉if (arq)␊ |
114 | ␉␉␉*arq = uq;␊ |
115 | ␉␉return (0);␊ |
116 | ␉}␊ |
117 | ␉u = &uspace[0];␊ |
118 | ␉v = &vspace[0];␊ |
119 | ␉q = &qspace[0];␊ |
120 | ␊ |
121 | ␉/*␊ |
122 | ␉ * Break dividend and divisor into digits in base B, then␊ |
123 | ␉ * count leading zeros to determine m and n. When done, we␊ |
124 | ␉ * will have:␊ |
125 | ␉ *␉u = (u[1]u[2]...u[m+n]) sub B␊ |
126 | ␉ *␉v = (v[1]v[2]...v[n]) sub B␊ |
127 | ␉ *␉v[1] != 0␊ |
128 | ␉ *␉1 < n <= 4 (if n = 1, we use a different division algorithm)␊ |
129 | ␉ *␉m >= 0 (otherwise u < v, which we already checked)␊ |
130 | ␉ *␉m + n = 4␊ |
131 | ␉ * and thus␊ |
132 | ␉ *␉m = 4 - n <= 2␊ |
133 | ␉ */␊ |
134 | ␉tmp.uq = uq;␊ |
135 | ␉u[0] = 0;␊ |
136 | ␉u[1] = HHALF(tmp.ul[Hi]);␊ |
137 | ␉u[2] = LHALF(tmp.ul[Hi]);␊ |
138 | ␉u[3] = HHALF(tmp.ul[Lo]);␊ |
139 | ␉u[4] = LHALF(tmp.ul[Lo]);␊ |
140 | ␉tmp.uq = vq;␊ |
141 | ␉v[1] = HHALF(tmp.ul[Hi]);␊ |
142 | ␉v[2] = LHALF(tmp.ul[Hi]);␊ |
143 | ␉v[3] = HHALF(tmp.ul[Lo]);␊ |
144 | ␉v[4] = LHALF(tmp.ul[Lo]);␊ |
145 | ␉for (n = 4; v[1] == 0; v++) {␊ |
146 | ␉␉if (--n == 1) {␊ |
147 | ␉␉␉u_long rbj;␉/* r*B+u[j] (not root boy jim) */␊ |
148 | ␉␉␉digit q1, q2, q3, q4;␊ |
149 | ␊ |
150 | ␉␉␉/*␊ |
151 | ␉␉␉ * Change of plan, per exercise 16.␊ |
152 | ␉␉␉ *␉r = 0;␊ |
153 | ␉␉␉ *␉for j = 1..4:␊ |
154 | ␉␉␉ *␉␉q[j] = floor((r*B + u[j]) / v),␊ |
155 | ␉␉␉ *␉␉r = (r*B + u[j]) % v;␊ |
156 | ␉␉␉ * We unroll this completely here.␊ |
157 | ␉␉␉ */␊ |
158 | ␉␉␉t = v[2];␉/* nonzero, by definition */␊ |
159 | ␉␉␉q1 = u[1] / t;␊ |
160 | ␉␉␉rbj = COMBINE(u[1] % t, u[2]);␊ |
161 | ␉␉␉q2 = rbj / t;␊ |
162 | ␉␉␉rbj = COMBINE(rbj % t, u[3]);␊ |
163 | ␉␉␉q3 = rbj / t;␊ |
164 | ␉␉␉rbj = COMBINE(rbj % t, u[4]);␊ |
165 | ␉␉␉q4 = rbj / t;␊ |
166 | ␉␉␉if (arq)␊ |
167 | ␉␉␉␉*arq = rbj % t;␊ |
168 | ␉␉␉tmp.ul[Hi] = COMBINE(q1, q2);␊ |
169 | ␉␉␉tmp.ul[Lo] = COMBINE(q3, q4);␊ |
170 | ␉␉␉return (tmp.q);␊ |
171 | ␉␉}␊ |
172 | ␉}␊ |
173 | ␊ |
174 | ␉/*␊ |
175 | ␉ * By adjusting q once we determine m, we can guarantee that␊ |
176 | ␉ * there is a complete four-digit quotient at &qspace[1] when␊ |
177 | ␉ * we finally stop.␊ |
178 | ␉ */␊ |
179 | ␉for (m = 4 - n; u[1] == 0; u++)␊ |
180 | ␉␉m--;␊ |
181 | ␉for (i = 4 - m; --i >= 0;)␊ |
182 | ␉␉q[i] = 0;␊ |
183 | ␉q += 4 - m;␊ |
184 | ␊ |
185 | ␉/*␊ |
186 | ␉ * Here we run Program D, translated from MIX to C and acquiring␊ |
187 | ␉ * a few minor changes.␊ |
188 | ␉ *␊ |
189 | ␉ * D1: choose multiplier 1 << d to ensure v[1] >= B/2.␊ |
190 | ␉ */␊ |
191 | ␉d = 0;␊ |
192 | ␉for (t = v[1]; t < B / 2; t <<= 1)␊ |
193 | ␉␉d++;␊ |
194 | ␉if (d > 0) {␊ |
195 | ␉␉shl(&u[0], m + n, d);␉␉/* u <<= d */␊ |
196 | ␉␉shl(&v[1], n - 1, d);␉␉/* v <<= d */␊ |
197 | ␉}␊ |
198 | ␉/*␊ |
199 | ␉ * D2: j = 0.␊ |
200 | ␉ */␊ |
201 | ␉j = 0;␊ |
202 | ␉v1 = v[1];␉/* for D3 -- note that v[1..n] are constant */␊ |
203 | ␉v2 = v[2];␉/* for D3 */␊ |
204 | ␉do {␊ |
205 | ␉␉register digit uj0, uj1, uj2;␊ |
206 | ␊ |
207 | ␉␉/*␊ |
208 | ␉␉ * D3: Calculate qhat (\^q, in TeX notation).␊ |
209 | ␉␉ * Let qhat = min((u[j]*B + u[j+1])/v[1], B-1), and␊ |
210 | ␉␉ * let rhat = (u[j]*B + u[j+1]) mod v[1].␊ |
211 | ␉␉ * While rhat < B and v[2]*qhat > rhat*B+u[j+2],␊ |
212 | ␉␉ * decrement qhat and increase rhat correspondingly.␊ |
213 | ␉␉ * Note that if rhat >= B, v[2]*qhat < rhat*B.␊ |
214 | ␉␉ */␊ |
215 | ␉␉uj0 = u[j + 0];␉/* for D3 only -- note that u[j+...] change */␊ |
216 | ␉␉uj1 = u[j + 1];␉/* for D3 only */␊ |
217 | ␉␉uj2 = u[j + 2];␉/* for D3 only */␊ |
218 | ␉␉if (uj0 == v1) {␊ |
219 | ␉␉␉qhat = B;␊ |
220 | ␉␉␉rhat = uj1;␊ |
221 | ␉␉␉goto qhat_too_big;␊ |
222 | ␉␉} else {␊ |
223 | ␉␉␉u_long nn = COMBINE(uj0, uj1);␊ |
224 | ␉␉␉qhat = nn / v1;␊ |
225 | ␉␉␉rhat = nn % v1;␊ |
226 | ␉␉}␊ |
227 | ␉␉while (v2 * qhat > COMBINE(rhat, uj2)) {␊ |
228 | qhat_too_big:␊ |
229 | ␉␉␉qhat--;␊ |
230 | ␉␉␉if ((rhat += v1) >= B)␊ |
231 | ␉␉␉␉break;␊ |
232 | ␉␉}␊ |
233 | ␉␉/*␊ |
234 | ␉␉ * D4: Multiply and subtract.␊ |
235 | ␉␉ * The variable `t' holds any borrows across the loop.␊ |
236 | ␉␉ * We split this up so that we do not require v[0] = 0,␊ |
237 | ␉␉ * and to eliminate a final special case.␊ |
238 | ␉␉ */␊ |
239 | ␉␉for (t = 0, i = n; i > 0; i--) {␊ |
240 | ␉␉␉t = u[i + j] - v[i] * qhat - t;␊ |
241 | ␉␉␉u[i + j] = LHALF(t);␊ |
242 | ␉␉␉t = (B - HHALF(t)) & (B - 1);␊ |
243 | ␉␉}␊ |
244 | ␉␉t = u[j] - t;␊ |
245 | ␉␉u[j] = LHALF(t);␊ |
246 | ␉␉/*␊ |
247 | ␉␉ * D5: test remainder.␊ |
248 | ␉␉ * There is a borrow if and only if HHALF(t) is nonzero;␊ |
249 | ␉␉ * in that (rare) case, qhat was too large (by exactly 1).␊ |
250 | ␉␉ * Fix it by adding v[1..n] to u[j..j+n].␊ |
251 | ␉␉ */␊ |
252 | ␉␉if (HHALF(t)) {␊ |
253 | ␉␉␉qhat--;␊ |
254 | ␉␉␉for (t = 0, i = n; i > 0; i--) { /* D6: add back. */␊ |
255 | ␉␉␉␉t += u[i + j] + v[i];␊ |
256 | ␉␉␉␉u[i + j] = LHALF(t);␊ |
257 | ␉␉␉␉t = HHALF(t);␊ |
258 | ␉␉␉}␊ |
259 | ␉␉␉u[j] = LHALF(u[j] + t);␊ |
260 | ␉␉}␊ |
261 | ␉␉q[j] = qhat;␊ |
262 | ␉} while (++j <= m);␉␉/* D7: loop on j. */␊ |
263 | ␊ |
264 | ␉/*␊ |
265 | ␉ * If caller wants the remainder, we have to calculate it as␊ |
266 | ␉ * u[m..m+n] >> d (this is at most n digits and thus fits in␊ |
267 | ␉ * u[m+1..m+n], but we may need more source digits).␊ |
268 | ␉ */␊ |
269 | ␉if (arq) {␊ |
270 | ␉␉if (d) {␊ |
271 | ␉␉␉for (i = m + n; i > m; --i)␊ |
272 | ␉␉␉␉u[i] = (u[i] >> d) |␊ |
273 | LHALF(u[i - 1] << (HALF_BITS - d));␊ |
274 | ␉␉␉u[i] = 0;␊ |
275 | ␉␉}␊ |
276 | ␉␉tmp.ul[Hi] = COMBINE(uspace[1], uspace[2]);␊ |
277 | ␉␉tmp.ul[Lo] = COMBINE(uspace[3], uspace[4]);␊ |
278 | ␉␉*arq = tmp.q;␊ |
279 | ␉}␊ |
280 | ␊ |
281 | ␉tmp.ul[Hi] = COMBINE(qspace[1], qspace[2]);␊ |
282 | ␉tmp.ul[Lo] = COMBINE(qspace[3], qspace[4]);␊ |
283 | ␉return (tmp.q);␊ |
284 | } |