Line data Source code
1 : /* Copyright (c) 2003-2004, Roger Dingledine
2 : * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
3 : * Copyright (c) 2007-2021, The Tor Project, Inc. */
4 : /* See LICENSE for licensing information */
5 :
6 : /**
7 : * \file scanf.c
8 : * \brief Locale-independent minimal implementation of sscanf().
9 : **/
10 :
11 : #include "lib/string/scanf.h"
12 : #include "lib/string/compat_ctype.h"
13 : #include "lib/cc/torint.h"
14 : #include "lib/err/torerr.h"
15 :
16 : #include <stdlib.h>
17 :
18 : #define MAX_SCANF_WIDTH 9999
19 :
20 : /** Helper: given an ASCII-encoded decimal digit, return its numeric value.
21 : * NOTE: requires that its input be in-bounds. */
22 : static int
23 3348204 : digit_to_num(char d)
24 : {
25 3348204 : int num = ((int)d) - (int)'0';
26 3348204 : raw_assert(num <= 9 && num >= 0);
27 3348204 : return num;
28 : }
29 :
30 : /** Helper: Read an unsigned int from *<b>bufp</b> of up to <b>width</b>
31 : * characters. (Handle arbitrary width if <b>width</b> is less than 0.) On
32 : * success, store the result in <b>out</b>, advance bufp to the next
33 : * character, and return 0. On failure, return -1. */
34 : static int
35 1171062 : scan_unsigned(const char **bufp, unsigned long *out, int width, unsigned base)
36 : {
37 1171062 : unsigned long result = 0;
38 1171062 : int scanned_so_far = 0;
39 1171062 : const int hex = base==16;
40 1171062 : raw_assert(base == 10 || base == 16);
41 1171062 : if (!bufp || !*bufp || !out)
42 : return -1;
43 1171062 : if (width<0)
44 9680 : width=MAX_SCANF_WIDTH;
45 :
46 3076705 : while (**bufp && (hex?TOR_ISXDIGIT(**bufp):TOR_ISDIGIT(**bufp))
47 5543887 : && scanned_so_far < width) {
48 2186382 : unsigned digit = hex?hex_decode_digit(*(*bufp)++):digit_to_num(*(*bufp)++);
49 : // Check for overflow beforehand, without actually causing any overflow
50 : // This preserves functionality on compilers that don't wrap overflow
51 : // (i.e. that trap or optimise away overflow)
52 : // result * base + digit > ULONG_MAX
53 : // result * base > ULONG_MAX - digit
54 2186382 : if (result > (ULONG_MAX - digit)/base)
55 : return -1; /* Processing this digit would overflow */
56 2186379 : result = result * base + digit;
57 2186379 : ++scanned_so_far;
58 : }
59 :
60 1171059 : if (!scanned_so_far) /* No actual digits scanned */
61 : return -1;
62 :
63 1154281 : *out = result;
64 1154281 : return 0;
65 : }
66 :
67 : /** Helper: Read an signed int from *<b>bufp</b> of up to <b>width</b>
68 : * characters. (Handle arbitrary width if <b>width</b> is less than 0.) On
69 : * success, store the result in <b>out</b>, advance bufp to the next
70 : * character, and return 0. On failure, return -1. */
71 : static int
72 84 : scan_signed(const char **bufp, long *out, int width)
73 : {
74 84 : int neg = 0;
75 84 : unsigned long result = 0;
76 :
77 84 : if (!bufp || !*bufp || !out)
78 : return -1;
79 84 : if (width<0)
80 84 : width=MAX_SCANF_WIDTH;
81 :
82 84 : if (**bufp == '-') {
83 10 : neg = 1;
84 10 : ++*bufp;
85 10 : --width;
86 : }
87 :
88 84 : if (scan_unsigned(bufp, &result, width, 10) < 0)
89 : return -1;
90 :
91 84 : if (neg && result > 0) {
92 10 : if (result > ((unsigned long)LONG_MAX) + 1)
93 : return -1; /* Underflow */
94 8 : else if (result == ((unsigned long)LONG_MAX) + 1)
95 2 : *out = LONG_MIN;
96 : else {
97 : /* We once had a far more clever no-overflow conversion here, but
98 : * some versions of GCC apparently ran it into the ground. Now
99 : * we just check for LONG_MIN explicitly.
100 : */
101 6 : *out = -(long)result;
102 : }
103 : } else {
104 74 : if (result > LONG_MAX)
105 : return -1; /* Overflow */
106 71 : *out = (long)result;
107 : }
108 :
109 : return 0;
110 : }
111 :
112 : /** Helper: Read a decimal-formatted double from *<b>bufp</b> of up to
113 : * <b>width</b> characters. (Handle arbitrary width if <b>width</b> is less
114 : * than 0.) On success, store the result in <b>out</b>, advance bufp to the
115 : * next character, and return 0. On failure, return -1. */
116 : static int
117 5 : scan_double(const char **bufp, double *out, int width)
118 : {
119 5 : int neg = 0;
120 5 : double result = 0;
121 5 : int scanned_so_far = 0;
122 :
123 5 : if (!bufp || !*bufp || !out)
124 : return -1;
125 5 : if (width<0)
126 5 : width=MAX_SCANF_WIDTH;
127 :
128 5 : if (**bufp == '-') {
129 1 : neg = 1;
130 1 : ++*bufp;
131 : }
132 :
133 22 : while (**bufp && TOR_ISDIGIT(**bufp) && scanned_so_far < width) {
134 17 : const int digit = digit_to_num(*(*bufp)++);
135 17 : result = result * 10 + digit;
136 17 : ++scanned_so_far;
137 : }
138 5 : if (**bufp == '.') {
139 4 : double fracval = 0, denominator = 1;
140 4 : ++*bufp;
141 4 : ++scanned_so_far;
142 21 : while (**bufp && TOR_ISDIGIT(**bufp) && scanned_so_far < width) {
143 17 : const int digit = digit_to_num(*(*bufp)++);
144 17 : fracval = fracval * 10 + digit;
145 17 : denominator *= 10;
146 17 : ++scanned_so_far;
147 : }
148 4 : result += fracval / denominator;
149 : }
150 :
151 5 : if (!scanned_so_far) /* No actual digits scanned */
152 : return -1;
153 :
154 4 : *out = neg ? -result : result;
155 4 : return 0;
156 : }
157 :
158 : /** Helper: copy up to <b>width</b> non-space characters from <b>bufp</b> to
159 : * <b>out</b>. Make sure <b>out</b> is nul-terminated. Advance <b>bufp</b>
160 : * to the next non-space character or the EOS. */
161 : static int
162 480 : scan_string(const char **bufp, char *out, int width)
163 : {
164 480 : int scanned_so_far = 0;
165 480 : if (!bufp || !out || width < 0)
166 : return -1;
167 2111 : while (**bufp && ! TOR_ISSPACE(**bufp) && scanned_so_far < width) {
168 1632 : *out++ = *(*bufp)++;
169 1632 : ++scanned_so_far;
170 : }
171 479 : *out = '\0';
172 479 : return 0;
173 : }
174 :
175 : /** Locale-independent, minimal, no-surprises scanf variant, accepting only a
176 : * restricted pattern format. For more info on what it supports, see
177 : * tor_sscanf() documentation. */
178 : int
179 300993 : tor_vsscanf(const char *buf, const char *pattern, va_list ap)
180 : {
181 300993 : int n_matched = 0;
182 :
183 2330222 : while (*pattern) {
184 2327208 : if (*pattern != '%') {
185 862741 : if (*buf == *pattern) {
186 862277 : ++buf;
187 862277 : ++pattern;
188 862277 : continue;
189 : } else {
190 464 : return n_matched;
191 : }
192 : } else {
193 1464467 : int width = -1;
194 1464467 : int longmod = 0;
195 1464467 : ++pattern;
196 1464467 : if (TOR_ISDIGIT(*pattern)) {
197 1161793 : width = digit_to_num(*pattern++);
198 1161859 : while (TOR_ISDIGIT(*pattern)) {
199 67 : width *= 10;
200 67 : width += digit_to_num(*pattern++);
201 67 : if (width > MAX_SCANF_WIDTH)
202 : return -1;
203 : }
204 1161792 : if (!width) /* No zero-width things. */
205 : return -1;
206 : }
207 1464466 : if (*pattern == 'l') {
208 81 : longmod = 1;
209 81 : ++pattern;
210 : }
211 1464466 : if (*pattern == 'u' || *pattern == 'x') {
212 1170998 : unsigned long u;
213 1170998 : const int base = (*pattern == 'u') ? 10 : 16;
214 1170998 : if (!*buf)
215 16805 : return n_matched;
216 1170978 : if (scan_unsigned(&buf, &u, width, base)<0)
217 16781 : return n_matched;
218 1154197 : if (longmod) {
219 4 : unsigned long *out = va_arg(ap, unsigned long *);
220 4 : *out = u;
221 : } else {
222 1154193 : unsigned *out = va_arg(ap, unsigned *);
223 1154193 : if (u > UINT_MAX)
224 4 : return n_matched;
225 1154189 : *out = (unsigned) u;
226 : }
227 1154193 : ++pattern;
228 1154193 : ++n_matched;
229 293468 : } else if (*pattern == 'f') {
230 6 : double *d = va_arg(ap, double *);
231 6 : if (!longmod)
232 : return -1; /* float not supported */
233 6 : if (!*buf)
234 1 : return n_matched;
235 5 : if (scan_double(&buf, d, width)<0)
236 1 : return n_matched;
237 4 : ++pattern;
238 4 : ++n_matched;
239 : } else if (*pattern == 'd') {
240 84 : long lng=0;
241 84 : if (scan_signed(&buf, &lng, width)<0)
242 10 : return n_matched;
243 79 : if (longmod) {
244 64 : long *out = va_arg(ap, long *);
245 64 : *out = lng;
246 : } else {
247 15 : int *out = va_arg(ap, int *);
248 : #if LONG_MAX > INT_MAX
249 15 : if (lng < INT_MIN || lng > INT_MAX)
250 5 : return n_matched;
251 : #endif
252 10 : *out = (int)lng;
253 : }
254 74 : ++pattern;
255 74 : ++n_matched;
256 : } else if (*pattern == 's') {
257 481 : char *s = va_arg(ap, char *);
258 481 : if (longmod)
259 : return -1;
260 481 : if (width < 0)
261 : return -1;
262 480 : if (scan_string(&buf, s, width)<0)
263 1 : return n_matched;
264 479 : ++pattern;
265 479 : ++n_matched;
266 : } else if (*pattern == 'c') {
267 292892 : char *ch = va_arg(ap, char *);
268 292892 : if (longmod)
269 : return -1;
270 292892 : if (width != -1)
271 : return -1;
272 292891 : if (!*buf)
273 280691 : return n_matched;
274 12200 : *ch = *buf++;
275 12200 : ++pattern;
276 12200 : ++n_matched;
277 : } else if (*pattern == '%') {
278 4 : if (*buf != '%')
279 2 : return n_matched;
280 2 : if (longmod)
281 : return -1;
282 2 : ++buf;
283 2 : ++pattern;
284 : } else {
285 : return -1; /* Unrecognized pattern component. */
286 : }
287 : }
288 : }
289 :
290 : return n_matched;
291 : }
292 :
293 : /** Minimal sscanf replacement: parse <b>buf</b> according to <b>pattern</b>
294 : * and store the results in the corresponding argument fields. Differs from
295 : * sscanf in that:
296 : * <ul><li>It only handles %u, %lu, %x, %lx, %[NUM]s, %d, %ld, %lf, and %c.
297 : * <li>It only handles decimal inputs for %lf. (12.3, not 1.23e1)
298 : * <li>It does not handle arbitrarily long widths.
299 : * <li>Numbers do not consume any space characters.
300 : * <li>It is locale-independent.
301 : * <li>%u and %x do not consume any space.
302 : * <li>It returns -1 on malformed patterns.</ul>
303 : *
304 : * (As with other locale-independent functions, we need this to parse data that
305 : * is in ASCII without worrying that the C library's locale-handling will make
306 : * miscellaneous characters look like numbers, spaces, and so on.)
307 : */
308 : int
309 300993 : tor_sscanf(const char *buf, const char *pattern, ...)
310 : {
311 300993 : int r;
312 300993 : va_list ap;
313 300993 : va_start(ap, pattern);
314 300993 : r = tor_vsscanf(buf, pattern, ap);
315 300993 : va_end(ap);
316 300993 : return r;
317 : }
|