Tor  0.4.7.0-alpha-dev
scanf.c
Go to the documentation of this file.
1 /* Copyright (c) 2003-2004, Roger Dingledine
2  * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
3  * Copyright (c) 2007-2021, The Tor Project, Inc. */
4 /* See LICENSE for licensing information */
5 
6 /**
7  * \file scanf.c
8  * \brief Locale-independent minimal implementation of sscanf().
9  **/
10 
11 #include "lib/string/scanf.h"
13 #include "lib/cc/torint.h"
14 #include "lib/err/torerr.h"
15 
16 #include <stdlib.h>
17 
18 #define MAX_SCANF_WIDTH 9999
19 
20 /** Helper: given an ASCII-encoded decimal digit, return its numeric value.
21  * NOTE: requires that its input be in-bounds. */
22 static int
23 digit_to_num(char d)
24 {
25  int num = ((int)d) - (int)'0';
26  raw_assert(num <= 9 && num >= 0);
27  return num;
28 }
29 
30 /** Helper: Read an unsigned int from *<b>bufp</b> of up to <b>width</b>
31  * characters. (Handle arbitrary width if <b>width</b> is less than 0.) On
32  * success, store the result in <b>out</b>, advance bufp to the next
33  * character, and return 0. On failure, return -1. */
34 static int
35 scan_unsigned(const char **bufp, unsigned long *out, int width, unsigned base)
36 {
37  unsigned long result = 0;
38  int scanned_so_far = 0;
39  const int hex = base==16;
40  raw_assert(base == 10 || base == 16);
41  if (!bufp || !*bufp || !out)
42  return -1;
43  if (width<0)
44  width=MAX_SCANF_WIDTH;
45 
46  while (**bufp && (hex?TOR_ISXDIGIT(**bufp):TOR_ISDIGIT(**bufp))
47  && scanned_so_far < width) {
48  unsigned digit = hex?hex_decode_digit(*(*bufp)++):digit_to_num(*(*bufp)++);
49  // Check for overflow beforehand, without actually causing any overflow
50  // This preserves functionality on compilers that don't wrap overflow
51  // (i.e. that trap or optimise away overflow)
52  // result * base + digit > ULONG_MAX
53  // result * base > ULONG_MAX - digit
54  if (result > (ULONG_MAX - digit)/base)
55  return -1; /* Processing this digit would overflow */
56  result = result * base + digit;
57  ++scanned_so_far;
58  }
59 
60  if (!scanned_so_far) /* No actual digits scanned */
61  return -1;
62 
63  *out = result;
64  return 0;
65 }
66 
67 /** Helper: Read an signed int from *<b>bufp</b> of up to <b>width</b>
68  * characters. (Handle arbitrary width if <b>width</b> is less than 0.) On
69  * success, store the result in <b>out</b>, advance bufp to the next
70  * character, and return 0. On failure, return -1. */
71 static int
72 scan_signed(const char **bufp, long *out, int width)
73 {
74  int neg = 0;
75  unsigned long result = 0;
76 
77  if (!bufp || !*bufp || !out)
78  return -1;
79  if (width<0)
80  width=MAX_SCANF_WIDTH;
81 
82  if (**bufp == '-') {
83  neg = 1;
84  ++*bufp;
85  --width;
86  }
87 
88  if (scan_unsigned(bufp, &result, width, 10) < 0)
89  return -1;
90 
91  if (neg && result > 0) {
92  if (result > ((unsigned long)LONG_MAX) + 1)
93  return -1; /* Underflow */
94  else if (result == ((unsigned long)LONG_MAX) + 1)
95  *out = LONG_MIN;
96  else {
97  /* We once had a far more clever no-overflow conversion here, but
98  * some versions of GCC apparently ran it into the ground. Now
99  * we just check for LONG_MIN explicitly.
100  */
101  *out = -(long)result;
102  }
103  } else {
104  if (result > LONG_MAX)
105  return -1; /* Overflow */
106  *out = (long)result;
107  }
108 
109  return 0;
110 }
111 
112 /** Helper: Read a decimal-formatted double from *<b>bufp</b> of up to
113  * <b>width</b> characters. (Handle arbitrary width if <b>width</b> is less
114  * than 0.) On success, store the result in <b>out</b>, advance bufp to the
115  * next character, and return 0. On failure, return -1. */
116 static int
117 scan_double(const char **bufp, double *out, int width)
118 {
119  int neg = 0;
120  double result = 0;
121  int scanned_so_far = 0;
122 
123  if (!bufp || !*bufp || !out)
124  return -1;
125  if (width<0)
126  width=MAX_SCANF_WIDTH;
127 
128  if (**bufp == '-') {
129  neg = 1;
130  ++*bufp;
131  }
132 
133  while (**bufp && TOR_ISDIGIT(**bufp) && scanned_so_far < width) {
134  const int digit = digit_to_num(*(*bufp)++);
135  result = result * 10 + digit;
136  ++scanned_so_far;
137  }
138  if (**bufp == '.') {
139  double fracval = 0, denominator = 1;
140  ++*bufp;
141  ++scanned_so_far;
142  while (**bufp && TOR_ISDIGIT(**bufp) && scanned_so_far < width) {
143  const int digit = digit_to_num(*(*bufp)++);
144  fracval = fracval * 10 + digit;
145  denominator *= 10;
146  ++scanned_so_far;
147  }
148  result += fracval / denominator;
149  }
150 
151  if (!scanned_so_far) /* No actual digits scanned */
152  return -1;
153 
154  *out = neg ? -result : result;
155  return 0;
156 }
157 
158 /** Helper: copy up to <b>width</b> non-space characters from <b>bufp</b> to
159  * <b>out</b>. Make sure <b>out</b> is nul-terminated. Advance <b>bufp</b>
160  * to the next non-space character or the EOS. */
161 static int
162 scan_string(const char **bufp, char *out, int width)
163 {
164  int scanned_so_far = 0;
165  if (!bufp || !out || width < 0)
166  return -1;
167  while (**bufp && ! TOR_ISSPACE(**bufp) && scanned_so_far < width) {
168  *out++ = *(*bufp)++;
169  ++scanned_so_far;
170  }
171  *out = '\0';
172  return 0;
173 }
174 
175 /** Locale-independent, minimal, no-surprises scanf variant, accepting only a
176  * restricted pattern format. For more info on what it supports, see
177  * tor_sscanf() documentation. */
178 int
179 tor_vsscanf(const char *buf, const char *pattern, va_list ap)
180 {
181  int n_matched = 0;
182 
183  while (*pattern) {
184  if (*pattern != '%') {
185  if (*buf == *pattern) {
186  ++buf;
187  ++pattern;
188  continue;
189  } else {
190  return n_matched;
191  }
192  } else {
193  int width = -1;
194  int longmod = 0;
195  ++pattern;
196  if (TOR_ISDIGIT(*pattern)) {
197  width = digit_to_num(*pattern++);
198  while (TOR_ISDIGIT(*pattern)) {
199  width *= 10;
200  width += digit_to_num(*pattern++);
201  if (width > MAX_SCANF_WIDTH)
202  return -1;
203  }
204  if (!width) /* No zero-width things. */
205  return -1;
206  }
207  if (*pattern == 'l') {
208  longmod = 1;
209  ++pattern;
210  }
211  if (*pattern == 'u' || *pattern == 'x') {
212  unsigned long u;
213  const int base = (*pattern == 'u') ? 10 : 16;
214  if (!*buf)
215  return n_matched;
216  if (scan_unsigned(&buf, &u, width, base)<0)
217  return n_matched;
218  if (longmod) {
219  unsigned long *out = va_arg(ap, unsigned long *);
220  *out = u;
221  } else {
222  unsigned *out = va_arg(ap, unsigned *);
223  if (u > UINT_MAX)
224  return n_matched;
225  *out = (unsigned) u;
226  }
227  ++pattern;
228  ++n_matched;
229  } else if (*pattern == 'f') {
230  double *d = va_arg(ap, double *);
231  if (!longmod)
232  return -1; /* float not supported */
233  if (!*buf)
234  return n_matched;
235  if (scan_double(&buf, d, width)<0)
236  return n_matched;
237  ++pattern;
238  ++n_matched;
239  } else if (*pattern == 'd') {
240  long lng=0;
241  if (scan_signed(&buf, &lng, width)<0)
242  return n_matched;
243  if (longmod) {
244  long *out = va_arg(ap, long *);
245  *out = lng;
246  } else {
247  int *out = va_arg(ap, int *);
248 #if LONG_MAX > INT_MAX
249  if (lng < INT_MIN || lng > INT_MAX)
250  return n_matched;
251 #endif
252  *out = (int)lng;
253  }
254  ++pattern;
255  ++n_matched;
256  } else if (*pattern == 's') {
257  char *s = va_arg(ap, char *);
258  if (longmod)
259  return -1;
260  if (width < 0)
261  return -1;
262  if (scan_string(&buf, s, width)<0)
263  return n_matched;
264  ++pattern;
265  ++n_matched;
266  } else if (*pattern == 'c') {
267  char *ch = va_arg(ap, char *);
268  if (longmod)
269  return -1;
270  if (width != -1)
271  return -1;
272  if (!*buf)
273  return n_matched;
274  *ch = *buf++;
275  ++pattern;
276  ++n_matched;
277  } else if (*pattern == '%') {
278  if (*buf != '%')
279  return n_matched;
280  if (longmod)
281  return -1;
282  ++buf;
283  ++pattern;
284  } else {
285  return -1; /* Unrecognized pattern component. */
286  }
287  }
288  }
289 
290  return n_matched;
291 }
292 
293 /** Minimal sscanf replacement: parse <b>buf</b> according to <b>pattern</b>
294  * and store the results in the corresponding argument fields. Differs from
295  * sscanf in that:
296  * <ul><li>It only handles %u, %lu, %x, %lx, %[NUM]s, %d, %ld, %lf, and %c.
297  * <li>It only handles decimal inputs for %lf. (12.3, not 1.23e1)
298  * <li>It does not handle arbitrarily long widths.
299  * <li>Numbers do not consume any space characters.
300  * <li>It is locale-independent.
301  * <li>%u and %x do not consume any space.
302  * <li>It returns -1 on malformed patterns.</ul>
303  *
304  * (As with other locale-independent functions, we need this to parse data that
305  * is in ASCII without worrying that the C library's locale-handling will make
306  * miscellaneous characters look like numbers, spaces, and so on.)
307  */
308 int
309 tor_sscanf(const char *buf, const char *pattern, ...)
310 {
311  int r;
312  va_list ap;
313  va_start(ap, pattern);
314  r = tor_vsscanf(buf, pattern, ap);
315  va_end(ap);
316  return r;
317 }
Locale-independent character-type inspection (header)
static int hex_decode_digit(char c)
Definition: compat_ctype.h:43
static int scan_double(const char **bufp, double *out, int width)
Definition: scanf.c:117
int tor_sscanf(const char *buf, const char *pattern,...)
Definition: scanf.c:309
static int scan_unsigned(const char **bufp, unsigned long *out, int width, unsigned base)
Definition: scanf.c:35
static int scan_string(const char **bufp, char *out, int width)
Definition: scanf.c:162
int tor_vsscanf(const char *buf, const char *pattern, va_list ap)
Definition: scanf.c:179
static int digit_to_num(char d)
Definition: scanf.c:23
static int scan_signed(const char **bufp, long *out, int width)
Definition: scanf.c:72
Header for scanf.c.
Headers for torerr.c.
Integer definitions used throughout Tor.