LCOV - code coverage report
Current view: top level - feature/dirparse - parsecommon.c (source / functions) Hit Total Coverage
Test: lcov.info Lines: 209 224 93.3 %
Date: 2021-11-24 03:28:48 Functions: 9 9 100.0 %

          Line data    Source code
       1             : /* Copyright (c) 2016-2021, The Tor Project, Inc. */
       2             : /* See LICENSE for licensing information */
       3             : 
       4             : /**
       5             :  * \file parsecommon.c
       6             :  * \brief Common code to parse and validate various type of descriptors.
       7             :  **/
       8             : 
       9             : #include "feature/dirparse/parsecommon.h"
      10             : #include "lib/log/log.h"
      11             : #include "lib/log/util_bug.h"
      12             : #include "lib/encoding/binascii.h"
      13             : #include "lib/container/smartlist.h"
      14             : #include "lib/string/util_string.h"
      15             : #include "lib/string/printf.h"
      16             : #include "lib/memarea/memarea.h"
      17             : #include "lib/crypt_ops/crypto_rsa.h"
      18             : #include "lib/ctime/di_ops.h"
      19             : 
      20             : #include <string.h>
      21             : 
      22             : #define MIN_ANNOTATION A_PURPOSE
      23             : #define MAX_ANNOTATION A_UNKNOWN_
      24             : 
      25             : #define ALLOC_ZERO(sz) memarea_alloc_zero(area,sz)
      26             : #define ALLOC(sz) memarea_alloc(area,sz)
      27             : #define STRDUP(str) memarea_strdup(area,str)
      28             : #define STRNDUP(str,n) memarea_strndup(area,(str),(n))
      29             : 
      30             : #define RET_ERR(msg)                                               \
      31             :   STMT_BEGIN                                                       \
      32             :     if (tok) token_clear(tok);                                      \
      33             :     tok = ALLOC_ZERO(sizeof(directory_token_t));                   \
      34             :     tok->tp = ERR_;                                                \
      35             :     tok->error = STRDUP(msg);                                      \
      36             :     goto done_tokenizing;                                          \
      37             :   STMT_END
      38             : 
      39             : /** Free all resources allocated for <b>tok</b> */
      40             : void
      41      184120 : token_clear(directory_token_t *tok)
      42             : {
      43      184120 :   if (tok->key)
      44        1428 :     crypto_pk_free(tok->key);
      45      184120 : }
      46             : 
      47             : /** Read all tokens from a string between <b>start</b> and <b>end</b>, and add
      48             :  * them to <b>out</b>.  Parse according to the token rules in <b>table</b>.
      49             :  * Caller must free tokens in <b>out</b>.  If <b>end</b> is NULL, use the
      50             :  * entire string.
      51             :  */
      52             : int
      53        6430 : tokenize_string(memarea_t *area,
      54             :                 const char *start, const char *end, smartlist_t *out,
      55             :                 const token_rule_t *table, int flags)
      56             : {
      57        6430 :   const char **s;
      58        6430 :   directory_token_t *tok = NULL;
      59        6430 :   int counts[NIL_];
      60        6430 :   int i;
      61        6430 :   int first_nonannotation;
      62        6430 :   int prev_len = smartlist_len(out);
      63        6430 :   tor_assert(area);
      64             : 
      65        6430 :   s = &start;
      66        6430 :   if (!end) {
      67           8 :     end = start+strlen(start);
      68             :   } else {
      69             :     /* it's only meaningful to check for nuls if we got an end-of-string ptr */
      70        6422 :     if (memchr(start, '\0', end-start)) {
      71          18 :       log_warn(LD_DIR, "parse error: internal NUL character.");
      72          18 :       return -1;
      73             :     }
      74             :   }
      75      987448 :   for (i = 0; i < NIL_; ++i)
      76      981036 :     counts[i] = 0;
      77             : 
      78        6461 :   SMARTLIST_FOREACH(out, const directory_token_t *, t, ++counts[t->tp]);
      79             : 
      80      134935 :   while (*s < end && (!tok || tok->tp != EOF_)) {
      81      129000 :     tok = get_next_token(area, s, end, table);
      82      129000 :     if (tok->tp == ERR_) {
      83         477 :       log_warn(LD_DIR, "parse error: %s", tok->error);
      84         477 :       token_clear(tok);
      85         477 :       return -1;
      86             :     }
      87      128523 :     ++counts[tok->tp];
      88      128523 :     smartlist_add(out, tok);
      89      128523 :     *s = eat_whitespace_eos(*s, end);
      90             :   }
      91             : 
      92        5935 :   if (flags & TS_NOCHECK)
      93             :     return 0;
      94             : 
      95        5910 :   if ((flags & TS_ANNOTATIONS_OK)) {
      96         109 :     first_nonannotation = -1;
      97         109 :     for (i = 0; i < smartlist_len(out); ++i) {
      98         109 :       tok = smartlist_get(out, i);
      99         109 :       if (tok->tp < MIN_ANNOTATION || tok->tp > MAX_ANNOTATION) {
     100             :         first_nonannotation = i;
     101             :         break;
     102             :       }
     103             :     }
     104          55 :     if (first_nonannotation < 0) {
     105           0 :       log_warn(LD_DIR, "parse error: item contains only annotations");
     106           0 :       return -1;
     107             :     }
     108         776 :     for (i=first_nonannotation;  i < smartlist_len(out); ++i) {
     109         722 :       tok = smartlist_get(out, i);
     110         722 :       if (tok->tp >= MIN_ANNOTATION && tok->tp <= MAX_ANNOTATION) {
     111           1 :         log_warn(LD_DIR, "parse error: Annotations mixed with keywords");
     112           1 :         return -1;
     113             :       }
     114             :     }
     115          54 :     if ((flags & TS_NO_NEW_ANNOTATIONS)) {
     116           1 :       if (first_nonannotation != prev_len) {
     117           0 :         log_warn(LD_DIR, "parse error: Unexpected annotations.");
     118           0 :         return -1;
     119             :       }
     120             :     }
     121             :   } else {
     122      128757 :     for (i=0;  i < smartlist_len(out); ++i) {
     123      122914 :       tok = smartlist_get(out, i);
     124      122914 :       if (tok->tp >= MIN_ANNOTATION && tok->tp <= MAX_ANNOTATION) {
     125          12 :         log_warn(LD_DIR, "parse error: no annotations allowed.");
     126          12 :         return -1;
     127             :       }
     128             :     }
     129             :     first_nonannotation = 0;
     130             :   }
     131       94413 :   for (i = 0; table[i].t; ++i) {
     132       89703 :     if (counts[table[i].v] < table[i].min_cnt) {
     133        1154 :       log_warn(LD_DIR, "Parse error: missing %s element.", table[i].t);
     134        1154 :       return -1;
     135             :     }
     136       88549 :     if (counts[table[i].v] > table[i].max_cnt) {
     137          25 :       log_warn(LD_DIR, "Parse error: too many %s elements.", table[i].t);
     138          25 :       return -1;
     139             :     }
     140       88524 :     if (table[i].pos & AT_START) {
     141        3119 :       if (smartlist_len(out) < 1 ||
     142        3119 :           (tok = smartlist_get(out, first_nonannotation))->tp != table[i].v) {
     143           5 :         log_warn(LD_DIR, "Parse error: first item is not %s.", table[i].t);
     144           5 :         return -1;
     145             :       }
     146             :     }
     147       88519 :     if (table[i].pos & AT_END) {
     148         344 :       if (smartlist_len(out) < 1 ||
     149         344 :           (tok = smartlist_get(out, smartlist_len(out)-1))->tp != table[i].v) {
     150           3 :         log_warn(LD_DIR, "Parse error: last item is not %s.", table[i].t);
     151           3 :         return -1;
     152             :       }
     153             :     }
     154             :   }
     155             :   return 0;
     156             : }
     157             : 
     158             : /** Helper: parse space-separated arguments from the string <b>s</b> ending at
     159             :  * <b>eol</b>, and store them in the args field of <b>tok</b>.  Store the
     160             :  * number of parsed elements into the n_args field of <b>tok</b>.  Allocate
     161             :  * all storage in <b>area</b>.  Return the number of arguments parsed, or
     162             :  * return -1 if there was an insanely high number of arguments. */
     163             : static inline int
     164      118748 : get_token_arguments(memarea_t *area, directory_token_t *tok,
     165             :                     const char *s, const char *eol)
     166             : {
     167             : /** Largest number of arguments we'll accept to any token, ever. */
     168             : #define MAX_ARGS 512
     169      118748 :   char *mem = memarea_strndup(area, s, eol-s);
     170      118748 :   char *cp = mem;
     171      118748 :   int j = 0;
     172      118748 :   char *args[MAX_ARGS];
     173      218885 :   while (*cp) {
     174      198924 :     if (j == MAX_ARGS)
     175             :       return -1;
     176      198917 :     args[j++] = cp;
     177      198917 :     cp = (char*)find_whitespace(cp);
     178      198917 :     if (!cp || !*cp)
     179             :       break; /* End of the line. */
     180      100137 :     *cp++ = '\0';
     181      100137 :     cp = (char*)eat_whitespace(cp);
     182             :   }
     183      118741 :   tok->n_args = j;
     184      118741 :   tok->args = memarea_memdup(area, args, j*sizeof(char*));
     185      118741 :   return j;
     186             : #undef MAX_ARGS
     187             : }
     188             : 
     189             : /** Helper: make sure that the token <b>tok</b> with keyword <b>kwd</b> obeys
     190             :  * the object syntax of <b>o_syn</b>.  Allocate all storage in <b>area</b>.
     191             :  * Return <b>tok</b> on success, or a new ERR_ token if the token didn't
     192             :  * conform to the syntax we wanted.
     193             :  **/
     194             : static inline directory_token_t *
     195      183208 : token_check_object(memarea_t *area, const char *kwd,
     196             :                    directory_token_t *tok, obj_syntax o_syn)
     197             : {
     198      183208 :   char ebuf[128];
     199      183208 :   switch (o_syn) {
     200      133253 :     case NO_OBJ:
     201             :       /* No object is allowed for this token. */
     202      133253 :       if (tok->object_body) {
     203          21 :         tor_snprintf(ebuf, sizeof(ebuf), "Unexpected object for %s", kwd);
     204          21 :         RET_ERR(ebuf);
     205             :       }
     206      133232 :       if (tok->key) {
     207           0 :         tor_snprintf(ebuf, sizeof(ebuf), "Unexpected public key for %s", kwd);
     208           0 :         RET_ERR(ebuf);
     209             :       }
     210             :       break;
     211        4147 :     case NEED_OBJ:
     212             :       /* There must be a (non-key) object. */
     213        4147 :       if (!tok->object_body) {
     214           5 :         tor_snprintf(ebuf, sizeof(ebuf), "Missing object for %s", kwd);
     215           5 :         RET_ERR(ebuf);
     216             :       }
     217             :       break;
     218         607 :     case NEED_KEY_1024: /* There must be a 1024-bit public key. */
     219         607 :       if (tok->key && crypto_pk_num_bits(tok->key) != PK_BYTES*8) {
     220           0 :         tor_snprintf(ebuf, sizeof(ebuf), "Wrong size on key for %s: %d bits",
     221           0 :                      kwd, crypto_pk_num_bits(tok->key));
     222           0 :         RET_ERR(ebuf);
     223             :       }
     224        2477 :       FALLTHROUGH;
     225             :     case NEED_KEY: /* There must be some kind of key. */
     226        2477 :       if (!tok->key) {
     227           8 :         tor_snprintf(ebuf, sizeof(ebuf), "Missing public key for %s", kwd);
     228           8 :         RET_ERR(ebuf);
     229             :       }
     230             : 
     231        2469 :       if (crypto_pk_key_is_private(tok->key)) {
     232           0 :         tor_snprintf(ebuf, sizeof(ebuf),
     233             :                 "Private key given for %s, which wants a public key", kwd);
     234           0 :         RET_ERR(ebuf);
     235             :       }
     236             :       break;
     237             :     case OBJ_OK:
     238             :       /* Anything goes with this token. */
     239             :       break;
     240             :   }
     241             : 
     242      183208 :  done_tokenizing:
     243      183208 :   return tok;
     244             : }
     245             : 
     246             : /** Return true iff the <b>memlen</b>-byte chunk of memory at
     247             :  * <b>memlen</b> is the same length as <b>token</b>, and their
     248             :  * contents are equal. */
     249             : static bool
     250     1949769 : mem_eq_token(const void *mem, size_t memlen, const char *token)
     251             : {
     252     1949769 :   size_t len = strlen(token);
     253     1949769 :   return memlen == len && fast_memeq(mem, token, len);
     254             : }
     255             : 
     256             : /** Helper function: read the next token from *s, advance *s to the end of the
     257             :  * token, and return the parsed token.  Parse *<b>s</b> according to the list
     258             :  * of tokens in <b>table</b>.
     259             :  */
     260             : directory_token_t *
     261      183657 : get_next_token(memarea_t *area,
     262             :                const char **s, const char *eos, const token_rule_t *table)
     263             : {
     264             :   /** Reject any object at least this big; it is probably an overflow, an
     265             :    * attack, a bug, or some other nonsense. */
     266             : #define MAX_UNPARSED_OBJECT_SIZE (128*1024)
     267             :   /** Reject any line at least this big; it is probably an overflow, an
     268             :    * attack, a bug, or some other nonsense. */
     269             : #define MAX_LINE_LENGTH (128*1024)
     270             : 
     271      183657 :   const char *next, *eol;
     272      183657 :   size_t obname_len;
     273      183657 :   int i;
     274      183657 :   directory_token_t *tok;
     275      183657 :   obj_syntax o_syn = NO_OBJ;
     276      183657 :   char ebuf[128];
     277      183657 :   const char *kwd = "";
     278             : 
     279      183657 :   tor_assert(area);
     280      183657 :   tok = ALLOC_ZERO(sizeof(directory_token_t));
     281      183657 :   tok->tp = ERR_;
     282             : 
     283             :   /* Set *s to first token, eol to end-of-line, next to after first token */
     284      183657 :   *s = eat_whitespace_eos(*s, eos); /* eat multi-line whitespace */
     285      183657 :   tor_assert(eos >= *s);
     286      183657 :   eol = memchr(*s, '\n', eos-*s);
     287      183657 :   if (!eol)
     288       54732 :     eol = eos;
     289      183657 :   if (eol - *s > MAX_LINE_LENGTH) {
     290           0 :     RET_ERR("Line far too long");
     291             :   }
     292             : 
     293      183657 :   next = find_whitespace_eos(*s, eol);
     294             : 
     295      183657 :   if (mem_eq_token(*s, next-*s, "opt")) {
     296             :     /* Skip past an "opt" at the start of the line. */
     297         599 :     *s = eat_whitespace_eos_no_nl(next, eol);
     298         599 :     next = find_whitespace_eos(*s, eol);
     299      183058 :   } else if (*s == eos) {  /* If no "opt", and end-of-line, line is invalid */
     300           1 :     RET_ERR("Unexpected EOF");
     301             :   }
     302             : 
     303             :   /* Search the table for the appropriate entry.  (I tried a binary search
     304             :    * instead, but it wasn't any faster.) */
     305     1784268 :   for (i = 0; table[i].t ; ++i) {
     306     1741432 :     if (mem_eq_token(*s, next-*s, table[i].t)) {
     307             :       /* We've found the keyword. */
     308      140820 :       kwd = table[i].t;
     309      140820 :       tok->tp = table[i].v;
     310      140820 :       o_syn = table[i].os;
     311      140820 :       *s = eat_whitespace_eos_no_nl(next, eol);
     312             :       /* We go ahead whether there are arguments or not, so that tok->args is
     313             :        * always set if we want arguments. */
     314      140820 :       if (table[i].concat_args) {
     315             :         /* The keyword takes the line as a single argument */
     316       22072 :         tok->args = ALLOC(sizeof(char*));
     317       22072 :         tok->args[0] = STRNDUP(*s,eol-*s); /* Grab everything on line */
     318       22072 :         tok->n_args = 1;
     319             :       } else {
     320             :         /* This keyword takes multiple arguments. */
     321      118748 :         if (get_token_arguments(area, tok, *s, eol)<0) {
     322           7 :           tor_snprintf(ebuf, sizeof(ebuf),"Far too many arguments to %s", kwd);
     323           7 :           RET_ERR(ebuf);
     324             :         }
     325      118741 :         *s = eol;
     326             :       }
     327      140813 :       if (tok->n_args < table[i].min_args) {
     328         303 :         tor_snprintf(ebuf, sizeof(ebuf), "Too few arguments to %s", kwd);
     329         303 :         RET_ERR(ebuf);
     330      140510 :       } else if (tok->n_args > table[i].max_args) {
     331           3 :         tor_snprintf(ebuf, sizeof(ebuf), "Too many arguments to %s", kwd);
     332           3 :         RET_ERR(ebuf);
     333             :       }
     334             :       break;
     335             :     }
     336             :   }
     337             : 
     338      183343 :   if (tok->tp == ERR_) {
     339             :     /* No keyword matched; call it an "K_opt" or "A_unrecognized" */
     340       42836 :     if (*s < eol && **s == '@')
     341         246 :       tok->tp = A_UNKNOWN_;
     342             :     else
     343       42590 :       tok->tp = K_OPT;
     344       42836 :     tok->args = ALLOC(sizeof(char*));
     345       42836 :     tok->args[0] = STRNDUP(*s, eol-*s);
     346       42836 :     tok->n_args = 1;
     347       42836 :     o_syn = OBJ_OK;
     348             :   }
     349             : 
     350             :   /* Check whether there's an object present */
     351      183343 :   *s = eat_whitespace_eos(eol, eos);  /* Scan from end of first line */
     352      183343 :   tor_assert(eos >= *s);
     353      183343 :   eol = memchr(*s, '\n', eos-*s);
     354      183343 :   if (!eol || eol-*s<11 || strcmpstart(*s, "-----BEGIN ")) /* No object. */
     355      175093 :     goto check_object;
     356             : 
     357        8250 :   if (eol - *s <= 16 || memchr(*s+11,'\0',eol-*s-16) || /* no short lines, */
     358        8249 :       !mem_eq_token(eol-5, 5, "-----") ||   /* nuls or invalid endings */
     359             :       (eol-*s) > MAX_UNPARSED_OBJECT_SIZE) {     /* name too long */
     360          10 :     RET_ERR("Malformed object: bad begin line");
     361             :   }
     362        8240 :   tok->object_type = STRNDUP(*s+11, eol-*s-16);
     363        8240 :   obname_len = eol-*s-16; /* store objname length here to avoid a strlen() */
     364        8240 :   *s = eol+1;    /* Set *s to possible start of object data (could be eos) */
     365             : 
     366             :   /* Go to the end of the object */
     367        8240 :   next = tor_memstr(*s, eos-*s, "-----END ");
     368        8240 :   if (!next) {
     369           7 :     RET_ERR("Malformed object: missing object end line");
     370             :   }
     371        8233 :   tor_assert(eos >= next);
     372        8233 :   eol = memchr(next, '\n', eos-next);
     373        8233 :   if (!eol)  /* end-of-line marker, or eos if there's no '\n' */
     374         298 :     eol = eos;
     375             :   /* Validate the ending tag, which should be 9 + NAME + 5 + eol */
     376        8233 :   if ((size_t)(eol-next) != 9+obname_len+5 ||
     377        8221 :       !mem_eq_token(next+9, obname_len, tok->object_type) ||
     378        8210 :       !mem_eq_token(eol-5, 5, "-----")) {
     379          30 :     tor_snprintf(ebuf, sizeof(ebuf), "Malformed object: mismatched end tag %s",
     380             :              tok->object_type);
     381          30 :     ebuf[sizeof(ebuf)-1] = '\0';
     382          30 :     RET_ERR(ebuf);
     383             :   }
     384        8203 :   if (next - *s > MAX_UNPARSED_OBJECT_SIZE)
     385           0 :     RET_ERR("Couldn't parse object: missing footer or object much too big.");
     386             : 
     387             :   {
     388        8203 :     int r;
     389        8203 :     size_t maxsize = base64_decode_maxsize(next-*s);
     390        8203 :     tok->object_body = ALLOC(maxsize);
     391        8203 :     r = base64_decode(tok->object_body, maxsize, *s, next-*s);
     392        8203 :     if (r<0)
     393          71 :       RET_ERR("Malformed object: bad base64-encoded data");
     394        8132 :     tok->object_size = r;
     395             :   }
     396             : 
     397        8132 :   if (!strcmp(tok->object_type, "RSA PUBLIC KEY")) { /* If it's a public key */
     398        2641 :     if (o_syn != NEED_KEY && o_syn != NEED_KEY_1024 && o_syn != OBJ_OK) {
     399           9 :       RET_ERR("Unexpected public key.");
     400             :     }
     401        2632 :     tok->key = crypto_pk_asn1_decode(tok->object_body, tok->object_size);
     402        2632 :     if (! tok->key)
     403           8 :       RET_ERR("Couldn't parse public key.");
     404             :   }
     405        8115 :   *s = eol;
     406             : 
     407      183208 :  check_object:
     408      183208 :   tok = token_check_object(area, kwd, tok, o_syn);
     409             : 
     410      183657 :  done_tokenizing:
     411      183657 :   return tok;
     412             : 
     413             : #undef RET_ERR
     414             : #undef ALLOC
     415             : #undef ALLOC_ZERO
     416             : #undef STRDUP
     417             : #undef STRNDUP
     418             : }
     419             : 
     420             : /** Find the first token in <b>s</b> whose keyword is <b>keyword</b>; fail
     421             :  * with an assert if no such keyword is found.
     422             :  */
     423             : directory_token_t *
     424       21255 : find_by_keyword_(smartlist_t *s, directory_keyword keyword,
     425             :                  const char *keyword_as_string)
     426             : {
     427       21255 :   directory_token_t *tok = find_opt_by_keyword(s, keyword);
     428       21255 :   if (PREDICT_UNLIKELY(!tok)) {
     429           0 :     log_err(LD_BUG, "Missing %s [%d] in directory object that should have "
     430             :          "been validated. Internal error.", keyword_as_string, (int)keyword);
     431           0 :     tor_assert(tok);
     432             :   }
     433       21255 :   return tok;
     434             : }
     435             : 
     436             : /** Find the first token in <b>s</b> whose keyword is <b>keyword</b>; return
     437             :  * NULL if no such keyword is found.
     438             :  */
     439             : directory_token_t *
     440       50885 : find_opt_by_keyword(const smartlist_t *s, directory_keyword keyword)
     441             : {
     442      693577 :   SMARTLIST_FOREACH(s, directory_token_t *, t, if (t->tp == keyword) return t);
     443             :   return NULL;
     444             : }
     445             : 
     446             : /** If there are any directory_token_t entries in <b>s</b> whose keyword is
     447             :  * <b>k</b>, return a newly allocated smartlist_t containing all such entries,
     448             :  * in the same order in which they occur in <b>s</b>.  Otherwise return
     449             :  * NULL. */
     450             : smartlist_t *
     451        3305 : find_all_by_keyword(const smartlist_t *s, directory_keyword k)
     452             : {
     453        3305 :   smartlist_t *out = NULL;
     454       65427 :   SMARTLIST_FOREACH(s, directory_token_t *, t,
     455             :                     if (t->tp == k) {
     456             :                     if (!out)
     457             :                     out = smartlist_new();
     458             :                     smartlist_add(out, t);
     459             :                     });
     460        3305 :   return out;
     461             : }

Generated by: LCOV version 1.14