Root/
Source at commit 1308 created 12 years 8 months ago. By meklort, Add a few placeholders for file io | |
---|---|
1 | /*␊ |
2 | * vsscanf.c␊ |
3 | *␊ |
4 | * vsscanf(), from which the rest of the scanf()␊ |
5 | * family is built␊ |
6 | */␊ |
7 | #include <string.h>␊ |
8 | #include <stdio.h>␊ |
9 | #include <stdarg.h>␊ |
10 | #include <limits.h>␊ |
11 | #include <ctype.h>␊ |
12 | #include <stdint.h>␊ |
13 | ␊ |
14 | extern uintmax_t strntoumax(const char *nptr, char **endptr, int base, size_t n);␊ |
15 | ␊ |
16 | #ifndef LONG_BIT␊ |
17 | #define LONG_BIT (CHAR_BIT*sizeof(long))␊ |
18 | #endif␊ |
19 | ␊ |
20 | enum flags {␊ |
21 | ␉FL_SPLAT = 0x01,␉/* Drop the value, do not assign */␊ |
22 | ␉FL_INV = 0x02,␉/* Character-set with inverse */␊ |
23 | ␉FL_WIDTH = 0x04,␉/* Field width specified */␊ |
24 | ␉FL_MINUS = 0x08,␉/* Negative number */␊ |
25 | };␊ |
26 | ␊ |
27 | enum ranks {␊ |
28 | ␉rank_char = -2,␊ |
29 | ␉rank_short = -1,␊ |
30 | ␉rank_int = 0,␊ |
31 | ␉rank_long = 1,␊ |
32 | ␉rank_longlong = 2,␊ |
33 | ␉rank_ptr = INT_MAX␉/* Special value used for pointers */␊ |
34 | };␊ |
35 | ␊ |
36 | #define MIN_RANK␉rank_char␊ |
37 | #define MAX_RANK␉rank_longlong␊ |
38 | ␊ |
39 | #define INTMAX_RANK␉rank_longlong␊ |
40 | #define SIZE_T_RANK␉rank_long␊ |
41 | #define PTRDIFF_T_RANK␉rank_long␊ |
42 | ␊ |
43 | enum bail {␊ |
44 | ␉bail_none = 0,␉␉/* No error condition */␊ |
45 | ␉bail_eof,␉␉/* Hit EOF */␊ |
46 | ␉bail_err␉␉/* Conversion mismatch */␊ |
47 | };␊ |
48 | ␊ |
49 | static inline const char *skipspace(const char *p)␊ |
50 | {␊ |
51 | ␉while (isspace((unsigned char)*p))␊ |
52 | ␉␉p++;␊ |
53 | ␉return p;␊ |
54 | }␊ |
55 | ␊ |
56 | #undef set_bit␊ |
57 | static inline void set_bit(unsigned long *bitmap, unsigned int bit)␊ |
58 | {␊ |
59 | ␉bitmap[bit / LONG_BIT] |= 1UL << (bit % LONG_BIT);␊ |
60 | }␊ |
61 | ␊ |
62 | #undef test_bit␊ |
63 | static inline int test_bit(unsigned long *bitmap, unsigned int bit)␊ |
64 | {␊ |
65 | ␉return (int)(bitmap[bit / LONG_BIT] >> (bit % LONG_BIT)) & 1;␊ |
66 | }␊ |
67 | ␊ |
68 | int vsscanf(const char *buffer, const char *format, va_list ap)␊ |
69 | {␊ |
70 | ␉const char *p = format;␊ |
71 | ␉char ch;␊ |
72 | ␉unsigned char uc;␊ |
73 | ␉const char *q = buffer;␊ |
74 | ␉const char *qq;␊ |
75 | ␉uintmax_t val = 0;␊ |
76 | ␉int rank = rank_int;␉/* Default rank */␊ |
77 | ␉unsigned int width = UINT_MAX;␊ |
78 | ␉int base;␊ |
79 | ␉enum flags flags = 0;␊ |
80 | ␉enum {␊ |
81 | ␉␉st_normal,␉/* Ground state */␊ |
82 | ␉␉st_flags,␉/* Special flags */␊ |
83 | ␉␉st_width,␉/* Field width */␊ |
84 | ␉␉st_modifiers,␉/* Length or conversion modifiers */␊ |
85 | ␉␉st_match_init,␉/* Initial state of %[ sequence */␊ |
86 | ␉␉st_match,␉/* Main state of %[ sequence */␊ |
87 | ␉␉st_match_range,␉/* After - in a %[ sequence */␊ |
88 | ␉} state = st_normal;␊ |
89 | ␉char *sarg = NULL;␉/* %s %c or %[ string argument */␊ |
90 | ␉enum bail bail = bail_none;␊ |
91 | ␉int sign;␊ |
92 | ␉int converted = 0;␉/* Successful conversions */␊ |
93 | ␉unsigned long matchmap[((1 << CHAR_BIT) + (LONG_BIT - 1)) / LONG_BIT];␊ |
94 | ␉int matchinv = 0;␉/* Is match map inverted? */␊ |
95 | ␉unsigned char range_start = 0;␊ |
96 | ␊ |
97 | ␉while ((ch = *p++) && !bail) {␊ |
98 | ␉␉switch (state) {␊ |
99 | ␉␉case st_normal:␊ |
100 | ␉␉␉if (ch == '%') {␊ |
101 | ␉␉␉␉state = st_flags;␊ |
102 | ␉␉␉␉flags = 0;␊ |
103 | ␉␉␉␉rank = rank_int;␊ |
104 | ␉␉␉␉width = UINT_MAX;␊ |
105 | ␉␉␉} else if (isspace((unsigned char)ch)) {␊ |
106 | ␉␉␉␉q = skipspace(q);␊ |
107 | ␉␉␉} else {␊ |
108 | ␉␉␉␉if (*q == ch)␊ |
109 | ␉␉␉␉␉q++;␊ |
110 | ␉␉␉␉else␊ |
111 | ␉␉␉␉␉bail = bail_err; /* Match failure */␊ |
112 | ␉␉␉}␊ |
113 | ␉␉␉break;␊ |
114 | ␊ |
115 | ␉␉case st_flags:␊ |
116 | ␉␉␉switch (ch) {␊ |
117 | ␉␉␉case '*':␊ |
118 | ␉␉␉␉flags |= FL_SPLAT;␊ |
119 | ␉␉␉␉break;␊ |
120 | ␉␉␉case '0'...'9':␊ |
121 | ␉␉␉␉width = (ch - '0');␊ |
122 | ␉␉␉␉state = st_width;␊ |
123 | ␉␉␉␉flags |= FL_WIDTH;␊ |
124 | ␉␉␉␉break;␊ |
125 | ␉␉␉default:␊ |
126 | ␉␉␉␉state = st_modifiers;␊ |
127 | ␉␉␉␉p--;␉/* Process this character again */␊ |
128 | ␉␉␉␉break;␊ |
129 | ␉␉␉}␊ |
130 | ␉␉␉break;␊ |
131 | ␊ |
132 | ␉␉case st_width:␊ |
133 | ␉␉␉if (ch >= '0' && ch <= '9') {␊ |
134 | ␉␉␉␉width = width * 10 + (ch - '0');␊ |
135 | ␉␉␉} else {␊ |
136 | ␉␉␉␉state = st_modifiers;␊ |
137 | ␉␉␉␉p--;␉/* Process this character again */␊ |
138 | ␉␉␉}␊ |
139 | ␉␉␉break;␊ |
140 | ␊ |
141 | ␉␉case st_modifiers:␊ |
142 | ␉␉␉switch (ch) {␊ |
143 | ␉␉␉␉/* Length modifiers - nonterminal sequences */␊ |
144 | ␉␉␉case 'h':␊ |
145 | ␉␉␉␉rank--;␉/* Shorter rank */␊ |
146 | ␉␉␉␉break;␊ |
147 | ␉␉␉case 'l':␊ |
148 | ␉␉␉␉rank++;␉/* Longer rank */␊ |
149 | ␉␉␉␉break;␊ |
150 | ␉␉␉case 'j':␊ |
151 | ␉␉␉␉rank = INTMAX_RANK;␊ |
152 | ␉␉␉␉break;␊ |
153 | ␉␉␉case 'z':␊ |
154 | ␉␉␉␉rank = SIZE_T_RANK;␊ |
155 | ␉␉␉␉break;␊ |
156 | ␉␉␉case 't':␊ |
157 | ␉␉␉␉rank = PTRDIFF_T_RANK;␊ |
158 | ␉␉␉␉break;␊ |
159 | ␉␉␉case 'L':␊ |
160 | ␉␉␉case 'q':␊ |
161 | ␉␉␉␉rank = rank_longlong;␉/* long double/long long */␊ |
162 | ␉␉␉␉break;␊ |
163 | ␊ |
164 | ␉␉␉default:␊ |
165 | ␉␉␉␉/* Output modifiers - terminal sequences */␊ |
166 | ␉␉␉␉/* Next state will be normal */␊ |
167 | ␉␉␉␉state = st_normal;␊ |
168 | ␊ |
169 | ␉␉␉␉/* Canonicalize rank */␊ |
170 | ␉␉␉␉if (rank < MIN_RANK)␊ |
171 | ␉␉␉␉␉rank = MIN_RANK;␊ |
172 | ␉␉␉␉else if (rank > MAX_RANK)␊ |
173 | ␉␉␉␉␉rank = MAX_RANK;␊ |
174 | ␊ |
175 | ␉␉␉␉switch (ch) {␊ |
176 | ␉␉␉␉case 'P':␉/* Upper case pointer */␊ |
177 | ␉␉␉␉case 'p':␉/* Pointer */␊ |
178 | ␉␉␉␉␉rank = rank_ptr;␊ |
179 | ␉␉␉␉␉base = 0;␊ |
180 | ␉␉␉␉␉sign = 0;␊ |
181 | ␉␉␉␉␉goto scan_int;␊ |
182 | ␊ |
183 | ␉␉␉␉case 'i':␉/* Base-independent integer */␊ |
184 | ␉␉␉␉␉base = 0;␊ |
185 | ␉␉␉␉␉sign = 1;␊ |
186 | ␉␉␉␉␉goto scan_int;␊ |
187 | ␊ |
188 | ␉␉␉␉case 'd':␉/* Decimal integer */␊ |
189 | ␉␉␉␉␉base = 10;␊ |
190 | ␉␉␉␉␉sign = 1;␊ |
191 | ␉␉␉␉␉goto scan_int;␊ |
192 | ␊ |
193 | ␉␉␉␉case 'o':␉/* Octal integer */␊ |
194 | ␉␉␉␉␉base = 8;␊ |
195 | ␉␉␉␉␉sign = 0;␊ |
196 | ␉␉␉␉␉goto scan_int;␊ |
197 | ␊ |
198 | ␉␉␉␉case 'u':␉/* Unsigned decimal integer */␊ |
199 | ␉␉␉␉␉base = 10;␊ |
200 | ␉␉␉␉␉sign = 0;␊ |
201 | ␉␉␉␉␉goto scan_int;␊ |
202 | ␊ |
203 | ␉␉␉␉case 'x':␉/* Hexadecimal integer */␊ |
204 | ␉␉␉␉case 'X':␊ |
205 | ␉␉␉␉␉base = 16;␊ |
206 | ␉␉␉␉␉sign = 0;␊ |
207 | ␉␉␉␉␉goto scan_int;␊ |
208 | ␊ |
209 | ␉␉␉␉case 'n':␉/* # of characters consumed */␊ |
210 | ␉␉␉␉␉val = (q - buffer);␊ |
211 | ␉␉␉␉␉goto set_integer;␊ |
212 | ␊ |
213 | ␉␉␉␉ scan_int:␊ |
214 | ␉␉␉␉␉q = skipspace(q);␊ |
215 | ␉␉␉␉␉if (!*q) {␊ |
216 | ␉␉␉␉␉␉bail = bail_eof;␊ |
217 | ␉␉␉␉␉␉break;␊ |
218 | ␉␉␉␉␉}␊ |
219 | ␉␉␉␉␉val =␊ |
220 | ␉␉␉␉␉ strntoumax(q, (char **)&qq, base,␊ |
221 | ␉␉␉␉␉␉ width);␊ |
222 | ␉␉␉␉␉if (qq == q) {␊ |
223 | ␉␉␉␉␉␉bail = bail_err;␊ |
224 | ␉␉␉␉␉␉break;␊ |
225 | ␉␉␉␉␉}␊ |
226 | ␉␉␉␉␉q = qq;␊ |
227 | ␉␉␉␉␉if (!(flags & FL_SPLAT))␊ |
228 | ␉␉␉␉␉␉converted++;␊ |
229 | ␉␉␉␉␉/* fall through */␊ |
230 | ␊ |
231 | ␉␉␉␉ set_integer:␊ |
232 | ␉␉␉␉␉if (!(flags & FL_SPLAT)) {␊ |
233 | ␉␉␉␉␉␉switch (rank) {␊ |
234 | ␉␉␉␉␉␉case rank_char:␊ |
235 | ␉␉␉␉␉␉␉*va_arg(ap,␊ |
236 | ␉␉␉␉␉␉␉␉unsigned char *)␊ |
237 | ␉␉␉␉␉␉␉␉= val;␊ |
238 | ␉␉␉␉␉␉␉break;␊ |
239 | ␉␉␉␉␉␉case rank_short:␊ |
240 | ␉␉␉␉␉␉␉*va_arg(ap,␊ |
241 | ␉␉␉␉␉␉␉␉unsigned short␊ |
242 | ␉␉␉␉␉␉␉␉*) = val;␊ |
243 | ␉␉␉␉␉␉␉break;␊ |
244 | ␉␉␉␉␉␉case rank_int:␊ |
245 | ␉␉␉␉␉␉␉*va_arg(ap,␊ |
246 | ␉␉␉␉␉␉␉␉unsigned int *)␊ |
247 | ␉␉␉␉␉␉␉ = val;␊ |
248 | ␉␉␉␉␉␉␉break;␊ |
249 | ␉␉␉␉␉␉case rank_long:␊ |
250 | ␉␉␉␉␉␉␉*va_arg(ap,␊ |
251 | ␉␉␉␉␉␉␉␉unsigned long *)␊ |
252 | ␉␉␉␉␉␉␉␉= val;␊ |
253 | ␉␉␉␉␉␉␉break;␊ |
254 | ␉␉␉␉␉␉case rank_longlong:␊ |
255 | ␉␉␉␉␉␉␉*va_arg(ap,␊ |
256 | ␉␉␉␉␉␉␉␉unsigned long␊ |
257 | ␉␉␉␉␉␉␉␉long *) = val;␊ |
258 | ␉␉␉␉␉␉␉break;␊ |
259 | ␉␉␉␉␉␉case rank_ptr:␊ |
260 | ␉␉␉␉␉␉␉*va_arg(ap, void **) =␊ |
261 | ␉␉␉␉␉␉␉␉(void *)␊ |
262 | ␉␉␉␉␉␉␉␉(uintptr_t)val;␊ |
263 | ␉␉␉␉␉␉␉break;␊ |
264 | ␉␉␉␉␉␉}␊ |
265 | ␉␉␉␉␉}␊ |
266 | ␉␉␉␉␉break;␊ |
267 | ␊ |
268 | ␉␉␉␉case 'c':␉/* Character */␊ |
269 | ␉␉␉␉␉/* Default width == 1 */␊ |
270 | ␉␉␉␉␉width = (flags & FL_WIDTH) ? width : 1;␊ |
271 | ␉␉␉␉␉if (flags & FL_SPLAT) {␊ |
272 | ␉␉␉␉␉␉while (width--) {␊ |
273 | ␉␉␉␉␉␉␉if (!*q) {␊ |
274 | ␉␉␉␉␉␉␉␉bail = bail_eof;␊ |
275 | ␉␉␉␉␉␉␉␉break;␊ |
276 | ␉␉␉␉␉␉␉}␊ |
277 | ␉␉␉␉␉␉}␊ |
278 | ␉␉␉␉␉} else {␊ |
279 | ␉␉␉␉␉␉sarg = va_arg(ap, char *);␊ |
280 | ␉␉␉␉␉␉while (width--) {␊ |
281 | ␉␉␉␉␉␉␉if (!*q) {␊ |
282 | ␉␉␉␉␉␉␉␉bail = bail_eof;␊ |
283 | ␉␉␉␉␉␉␉␉break;␊ |
284 | ␉␉␉␉␉␉␉}␊ |
285 | ␉␉␉␉␉␉␉*sarg++ = *q++;␊ |
286 | ␉␉␉␉␉␉}␊ |
287 | ␉␉␉␉␉␉if (!bail)␊ |
288 | ␉␉␉␉␉␉␉converted++;␊ |
289 | ␉␉␉␉␉}␊ |
290 | ␉␉␉␉␉break;␊ |
291 | ␊ |
292 | ␉␉␉␉case 's':␉/* String */␊ |
293 | ␉␉␉␉␉uc = 1;␉/* Anything nonzero */␊ |
294 | ␉␉␉␉␉if (flags & FL_SPLAT) {␊ |
295 | ␉␉␉␉␉␉while (width-- && (uc = *q) &&␊ |
296 | ␉␉␉␉␉␉ !isspace(uc)) {␊ |
297 | ␉␉␉␉␉␉␉q++;␊ |
298 | ␉␉␉␉␉␉}␊ |
299 | ␉␉␉␉␉} else {␊ |
300 | ␉␉␉␉␉␉char *sp;␊ |
301 | ␉␉␉␉␉␉sp = sarg = va_arg(ap, char *);␊ |
302 | ␉␉␉␉␉␉while (width-- && (uc = *q) &&␊ |
303 | ␉␉␉␉␉␉ !isspace(uc)) {␊ |
304 | ␉␉␉␉␉␉␉*sp++ = uc;␊ |
305 | ␉␉␉␉␉␉␉q++;␊ |
306 | ␉␉␉␉␉␉}␊ |
307 | ␉␉␉␉␉␉if (sarg != sp) {␊ |
308 | ␉␉␉␉␉␉␉/* Terminate output */␊ |
309 | ␉␉␉␉␉␉␉*sp = '\0';␊ |
310 | ␉␉␉␉␉␉␉converted++;␊ |
311 | ␉␉␉␉␉␉}␊ |
312 | ␉␉␉␉␉}␊ |
313 | ␉␉␉␉␉if (!uc)␊ |
314 | ␉␉␉␉␉␉bail = bail_eof;␊ |
315 | ␉␉␉␉␉break;␊ |
316 | ␊ |
317 | ␉␉␉␉case '[':␉/* Character range */␊ |
318 | ␉␉␉␉␉sarg = (flags & FL_SPLAT) ? NULL␊ |
319 | ␉␉␉␉␉␉: va_arg(ap, char *);␊ |
320 | ␉␉␉␉␉state = st_match_init;␊ |
321 | ␉␉␉␉␉matchinv = 0;␊ |
322 | ␉␉␉␉␉memset(matchmap, 0, sizeof matchmap);␊ |
323 | ␉␉␉␉␉break;␊ |
324 | ␊ |
325 | ␉␉␉␉case '%':␉/* %% sequence */␊ |
326 | ␉␉␉␉␉if (*q == '%')␊ |
327 | ␉␉␉␉␉␉q++;␊ |
328 | ␉␉␉␉␉else␊ |
329 | ␉␉␉␉␉␉bail = bail_err;␊ |
330 | ␉␉␉␉␉break;␊ |
331 | ␊ |
332 | ␉␉␉␉default:␉/* Anything else */␊ |
333 | ␉␉␉␉␉/* Unknown sequence */␊ |
334 | ␉␉␉␉␉bail = bail_err;␊ |
335 | ␉␉␉␉␉break;␊ |
336 | ␉␉␉␉}␊ |
337 | ␉␉␉}␊ |
338 | ␉␉␉break;␊ |
339 | ␊ |
340 | ␉␉case st_match_init:␉/* Initial state for %[ match */␊ |
341 | ␉␉␉if (ch == '^' && !(flags & FL_INV)) {␊ |
342 | ␉␉␉␉matchinv = 1;␊ |
343 | ␉␉␉} else {␊ |
344 | ␉␉␉␉set_bit(matchmap, (unsigned char)ch);␊ |
345 | ␉␉␉␉state = st_match;␊ |
346 | ␉␉␉}␊ |
347 | ␉␉␉break;␊ |
348 | ␊ |
349 | ␉␉case st_match:␉/* Main state for %[ match */␊ |
350 | ␉␉␉if (ch == ']') {␊ |
351 | ␉␉␉␉goto match_run;␊ |
352 | ␉␉␉} else if (ch == '-') {␊ |
353 | ␉␉␉␉range_start = (unsigned char)ch;␊ |
354 | ␉␉␉␉state = st_match_range;␊ |
355 | ␉␉␉} else {␊ |
356 | ␉␉␉␉set_bit(matchmap, (unsigned char)ch);␊ |
357 | ␉␉␉}␊ |
358 | ␉␉␉break;␊ |
359 | ␊ |
360 | ␉␉case st_match_range:␉/* %[ match after - */␊ |
361 | ␉␉␉if (ch == ']') {␊ |
362 | ␉␉␉␉/* - was last character */␊ |
363 | ␉␉␉␉set_bit(matchmap, (unsigned char)'-');␊ |
364 | ␉␉␉␉goto match_run;␊ |
365 | ␉␉␉} else {␊ |
366 | ␉␉␉␉int i;␊ |
367 | ␉␉␉␉for (i = range_start; i < (unsigned char)ch;␊ |
368 | ␉␉␉␉ i++)␊ |
369 | ␉␉␉␉␉set_bit(matchmap, i);␊ |
370 | ␉␉␉␉state = st_match;␊ |
371 | ␉␉␉}␊ |
372 | ␉␉␉break;␊ |
373 | ␊ |
374 | ␉␉ match_run:␉/* Match expression finished */␊ |
375 | ␉␉␉qq = q;␊ |
376 | ␉␉␉uc = 1;␉/* Anything nonzero */␊ |
377 | ␉␉␉while (width && (uc = *q)␊ |
378 | ␉␉␉ && test_bit(matchmap, uc)^matchinv) {␊ |
379 | ␉␉␉␉if (sarg)␊ |
380 | ␉␉␉␉␉*sarg++ = uc;␊ |
381 | ␉␉␉␉q++;␊ |
382 | ␉␉␉}␊ |
383 | ␉␉␉if (q != qq && sarg) {␊ |
384 | ␉␉␉␉*sarg = '\0';␊ |
385 | ␉␉␉␉converted++;␊ |
386 | ␉␉␉} else {␊ |
387 | ␉␉␉␉bail = bail_err;␊ |
388 | ␉␉␉}␊ |
389 | ␉␉␉if (!uc)␊ |
390 | ␉␉␉␉bail = bail_eof;␊ |
391 | ␉␉␉break;␊ |
392 | ␉␉}␊ |
393 | ␉}␊ |
394 | ␊ |
395 | ␉if (bail == bail_eof && !converted)␊ |
396 | ␉␉converted = -1;␉/* Return EOF (-1) */␊ |
397 | ␊ |
398 | ␉return converted;␊ |
399 | }␊ |
400 |