LCOV - code coverage report
Current view: top level - lib/string - scanf.c (source / functions) Hit Total Coverage
Test: lcov.info Lines: 160 160 100.0 %
Date: 2021-11-24 03:28:48 Functions: 7 7 100.0 %

          Line data    Source code
       1             : /* Copyright (c) 2003-2004, Roger Dingledine
       2             :  * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
       3             :  * Copyright (c) 2007-2021, The Tor Project, Inc. */
       4             : /* See LICENSE for licensing information */
       5             : 
       6             : /**
       7             :  * \file scanf.c
       8             :  * \brief Locale-independent minimal implementation of sscanf().
       9             :  **/
      10             : 
      11             : #include "lib/string/scanf.h"
      12             : #include "lib/string/compat_ctype.h"
      13             : #include "lib/cc/torint.h"
      14             : #include "lib/err/torerr.h"
      15             : 
      16             : #include <stdlib.h>
      17             : 
      18             : #define MAX_SCANF_WIDTH 9999
      19             : 
      20             : /** Helper: given an ASCII-encoded decimal digit, return its numeric value.
      21             :  * NOTE: requires that its input be in-bounds. */
      22             : static int
      23     3348204 : digit_to_num(char d)
      24             : {
      25     3348204 :   int num = ((int)d) - (int)'0';
      26     3348204 :   raw_assert(num <= 9 && num >= 0);
      27     3348204 :   return num;
      28             : }
      29             : 
      30             : /** Helper: Read an unsigned int from *<b>bufp</b> of up to <b>width</b>
      31             :  * characters.  (Handle arbitrary width if <b>width</b> is less than 0.)  On
      32             :  * success, store the result in <b>out</b>, advance bufp to the next
      33             :  * character, and return 0.  On failure, return -1. */
      34             : static int
      35     1171062 : scan_unsigned(const char **bufp, unsigned long *out, int width, unsigned base)
      36             : {
      37     1171062 :   unsigned long result = 0;
      38     1171062 :   int scanned_so_far = 0;
      39     1171062 :   const int hex = base==16;
      40     1171062 :   raw_assert(base == 10 || base == 16);
      41     1171062 :   if (!bufp || !*bufp || !out)
      42             :     return -1;
      43     1171062 :   if (width<0)
      44        9680 :     width=MAX_SCANF_WIDTH;
      45             : 
      46     3076705 :   while (**bufp && (hex?TOR_ISXDIGIT(**bufp):TOR_ISDIGIT(**bufp))
      47     5543887 :          && scanned_so_far < width) {
      48     2186382 :     unsigned digit = hex?hex_decode_digit(*(*bufp)++):digit_to_num(*(*bufp)++);
      49             :     // Check for overflow beforehand, without actually causing any overflow
      50             :     // This preserves functionality on compilers that don't wrap overflow
      51             :     // (i.e. that trap or optimise away overflow)
      52             :     // result * base + digit > ULONG_MAX
      53             :     // result * base > ULONG_MAX - digit
      54     2186382 :     if (result > (ULONG_MAX - digit)/base)
      55             :       return -1; /* Processing this digit would overflow */
      56     2186379 :     result = result * base + digit;
      57     2186379 :     ++scanned_so_far;
      58             :   }
      59             : 
      60     1171059 :   if (!scanned_so_far) /* No actual digits scanned */
      61             :     return -1;
      62             : 
      63     1154281 :   *out = result;
      64     1154281 :   return 0;
      65             : }
      66             : 
      67             : /** Helper: Read an signed int from *<b>bufp</b> of up to <b>width</b>
      68             :  * characters.  (Handle arbitrary width if <b>width</b> is less than 0.)  On
      69             :  * success, store the result in <b>out</b>, advance bufp to the next
      70             :  * character, and return 0.  On failure, return -1. */
      71             : static int
      72          84 : scan_signed(const char **bufp, long *out, int width)
      73             : {
      74          84 :   int neg = 0;
      75          84 :   unsigned long result = 0;
      76             : 
      77          84 :   if (!bufp || !*bufp || !out)
      78             :     return -1;
      79          84 :   if (width<0)
      80          84 :     width=MAX_SCANF_WIDTH;
      81             : 
      82          84 :   if (**bufp == '-') {
      83          10 :     neg = 1;
      84          10 :     ++*bufp;
      85          10 :     --width;
      86             :   }
      87             : 
      88          84 :   if (scan_unsigned(bufp, &result, width, 10) < 0)
      89             :     return -1;
      90             : 
      91          84 :   if (neg && result > 0) {
      92          10 :     if (result > ((unsigned long)LONG_MAX) + 1)
      93             :       return -1; /* Underflow */
      94           8 :     else if (result == ((unsigned long)LONG_MAX) + 1)
      95           2 :       *out = LONG_MIN;
      96             :     else {
      97             :       /* We once had a far more clever no-overflow conversion here, but
      98             :        * some versions of GCC apparently ran it into the ground.  Now
      99             :        * we just check for LONG_MIN explicitly.
     100             :        */
     101           6 :       *out = -(long)result;
     102             :     }
     103             :   } else {
     104          74 :     if (result > LONG_MAX)
     105             :       return -1; /* Overflow */
     106          71 :     *out = (long)result;
     107             :   }
     108             : 
     109             :   return 0;
     110             : }
     111             : 
     112             : /** Helper: Read a decimal-formatted double from *<b>bufp</b> of up to
     113             :  * <b>width</b> characters.  (Handle arbitrary width if <b>width</b> is less
     114             :  * than 0.)  On success, store the result in <b>out</b>, advance bufp to the
     115             :  * next character, and return 0.  On failure, return -1. */
     116             : static int
     117           5 : scan_double(const char **bufp, double *out, int width)
     118             : {
     119           5 :   int neg = 0;
     120           5 :   double result = 0;
     121           5 :   int scanned_so_far = 0;
     122             : 
     123           5 :   if (!bufp || !*bufp || !out)
     124             :     return -1;
     125           5 :   if (width<0)
     126           5 :     width=MAX_SCANF_WIDTH;
     127             : 
     128           5 :   if (**bufp == '-') {
     129           1 :     neg = 1;
     130           1 :     ++*bufp;
     131             :   }
     132             : 
     133          22 :   while (**bufp && TOR_ISDIGIT(**bufp) && scanned_so_far < width) {
     134          17 :     const int digit = digit_to_num(*(*bufp)++);
     135          17 :     result = result * 10 + digit;
     136          17 :     ++scanned_so_far;
     137             :   }
     138           5 :   if (**bufp == '.') {
     139           4 :     double fracval = 0, denominator = 1;
     140           4 :     ++*bufp;
     141           4 :     ++scanned_so_far;
     142          21 :     while (**bufp && TOR_ISDIGIT(**bufp) && scanned_so_far < width) {
     143          17 :       const int digit = digit_to_num(*(*bufp)++);
     144          17 :       fracval = fracval * 10 + digit;
     145          17 :       denominator *= 10;
     146          17 :       ++scanned_so_far;
     147             :     }
     148           4 :     result += fracval / denominator;
     149             :   }
     150             : 
     151           5 :   if (!scanned_so_far) /* No actual digits scanned */
     152             :     return -1;
     153             : 
     154           4 :   *out = neg ? -result : result;
     155           4 :   return 0;
     156             : }
     157             : 
     158             : /** Helper: copy up to <b>width</b> non-space characters from <b>bufp</b> to
     159             :  * <b>out</b>.  Make sure <b>out</b> is nul-terminated. Advance <b>bufp</b>
     160             :  * to the next non-space character or the EOS. */
     161             : static int
     162         480 : scan_string(const char **bufp, char *out, int width)
     163             : {
     164         480 :   int scanned_so_far = 0;
     165         480 :   if (!bufp || !out || width < 0)
     166             :     return -1;
     167        2111 :   while (**bufp && ! TOR_ISSPACE(**bufp) && scanned_so_far < width) {
     168        1632 :     *out++ = *(*bufp)++;
     169        1632 :     ++scanned_so_far;
     170             :   }
     171         479 :   *out = '\0';
     172         479 :   return 0;
     173             : }
     174             : 
     175             : /** Locale-independent, minimal, no-surprises scanf variant, accepting only a
     176             :  * restricted pattern format.  For more info on what it supports, see
     177             :  * tor_sscanf() documentation.  */
     178             : int
     179      300993 : tor_vsscanf(const char *buf, const char *pattern, va_list ap)
     180             : {
     181      300993 :   int n_matched = 0;
     182             : 
     183     2330222 :   while (*pattern) {
     184     2327208 :     if (*pattern != '%') {
     185      862741 :       if (*buf == *pattern) {
     186      862277 :         ++buf;
     187      862277 :         ++pattern;
     188      862277 :         continue;
     189             :       } else {
     190         464 :         return n_matched;
     191             :       }
     192             :     } else {
     193     1464467 :       int width = -1;
     194     1464467 :       int longmod = 0;
     195     1464467 :       ++pattern;
     196     1464467 :       if (TOR_ISDIGIT(*pattern)) {
     197     1161793 :         width = digit_to_num(*pattern++);
     198     1161859 :         while (TOR_ISDIGIT(*pattern)) {
     199          67 :           width *= 10;
     200          67 :           width += digit_to_num(*pattern++);
     201          67 :           if (width > MAX_SCANF_WIDTH)
     202             :             return -1;
     203             :         }
     204     1161792 :         if (!width) /* No zero-width things. */
     205             :           return -1;
     206             :       }
     207     1464466 :       if (*pattern == 'l') {
     208          81 :         longmod = 1;
     209          81 :         ++pattern;
     210             :       }
     211     1464466 :       if (*pattern == 'u' || *pattern == 'x') {
     212     1170998 :         unsigned long u;
     213     1170998 :         const int base = (*pattern == 'u') ? 10 : 16;
     214     1170998 :         if (!*buf)
     215       16805 :           return n_matched;
     216     1170978 :         if (scan_unsigned(&buf, &u, width, base)<0)
     217       16781 :           return n_matched;
     218     1154197 :         if (longmod) {
     219           4 :           unsigned long *out = va_arg(ap, unsigned long *);
     220           4 :           *out = u;
     221             :         } else {
     222     1154193 :           unsigned *out = va_arg(ap, unsigned *);
     223     1154193 :           if (u > UINT_MAX)
     224           4 :             return n_matched;
     225     1154189 :           *out = (unsigned) u;
     226             :         }
     227     1154193 :         ++pattern;
     228     1154193 :         ++n_matched;
     229      293468 :       } else if (*pattern == 'f') {
     230           6 :         double *d = va_arg(ap, double *);
     231           6 :         if (!longmod)
     232             :           return -1; /* float not supported */
     233           6 :         if (!*buf)
     234           1 :           return n_matched;
     235           5 :         if (scan_double(&buf, d, width)<0)
     236           1 :           return n_matched;
     237           4 :         ++pattern;
     238           4 :         ++n_matched;
     239             :       } else if (*pattern == 'd') {
     240          84 :         long lng=0;
     241          84 :         if (scan_signed(&buf, &lng, width)<0)
     242          10 :           return n_matched;
     243          79 :         if (longmod) {
     244          64 :           long *out = va_arg(ap, long *);
     245          64 :           *out = lng;
     246             :         } else {
     247          15 :           int *out = va_arg(ap, int *);
     248             : #if LONG_MAX > INT_MAX
     249          15 :           if (lng < INT_MIN || lng > INT_MAX)
     250           5 :             return n_matched;
     251             : #endif
     252          10 :           *out = (int)lng;
     253             :         }
     254          74 :         ++pattern;
     255          74 :         ++n_matched;
     256             :       } else if (*pattern == 's') {
     257         481 :         char *s = va_arg(ap, char *);
     258         481 :         if (longmod)
     259             :           return -1;
     260         481 :         if (width < 0)
     261             :           return -1;
     262         480 :         if (scan_string(&buf, s, width)<0)
     263           1 :           return n_matched;
     264         479 :         ++pattern;
     265         479 :         ++n_matched;
     266             :       } else if (*pattern == 'c') {
     267      292892 :         char *ch = va_arg(ap, char *);
     268      292892 :         if (longmod)
     269             :           return -1;
     270      292892 :         if (width != -1)
     271             :           return -1;
     272      292891 :         if (!*buf)
     273      280691 :           return n_matched;
     274       12200 :         *ch = *buf++;
     275       12200 :         ++pattern;
     276       12200 :         ++n_matched;
     277             :       } else if (*pattern == '%') {
     278           4 :         if (*buf != '%')
     279           2 :           return n_matched;
     280           2 :         if (longmod)
     281             :           return -1;
     282           2 :         ++buf;
     283           2 :         ++pattern;
     284             :       } else {
     285             :         return -1; /* Unrecognized pattern component. */
     286             :       }
     287             :     }
     288             :   }
     289             : 
     290             :   return n_matched;
     291             : }
     292             : 
     293             : /** Minimal sscanf replacement: parse <b>buf</b> according to <b>pattern</b>
     294             :  * and store the results in the corresponding argument fields.  Differs from
     295             :  * sscanf in that:
     296             :  * <ul><li>It only handles %u, %lu, %x, %lx, %[NUM]s, %d, %ld, %lf, and %c.
     297             :  *     <li>It only handles decimal inputs for %lf. (12.3, not 1.23e1)
     298             :  *     <li>It does not handle arbitrarily long widths.
     299             :  *     <li>Numbers do not consume any space characters.
     300             :  *     <li>It is locale-independent.
     301             :  *     <li>%u and %x do not consume any space.
     302             :  *     <li>It returns -1 on malformed patterns.</ul>
     303             :  *
     304             :  * (As with other locale-independent functions, we need this to parse data that
     305             :  * is in ASCII without worrying that the C library's locale-handling will make
     306             :  * miscellaneous characters look like numbers, spaces, and so on.)
     307             :  */
     308             : int
     309      300993 : tor_sscanf(const char *buf, const char *pattern, ...)
     310             : {
     311      300993 :   int r;
     312      300993 :   va_list ap;
     313      300993 :   va_start(ap, pattern);
     314      300993 :   r = tor_vsscanf(buf, pattern, ap);
     315      300993 :   va_end(ap);
     316      300993 :   return r;
     317             : }

Generated by: LCOV version 1.14