Tor  0.4.7.0-alpha-dev
parsecommon.c
Go to the documentation of this file.
1 /* Copyright (c) 2016-2021, The Tor Project, Inc. */
2 /* See LICENSE for licensing information */
3 
4 /**
5  * \file parsecommon.c
6  * \brief Common code to parse and validate various type of descriptors.
7  **/
8 
10 #include "lib/log/log.h"
11 #include "lib/log/util_bug.h"
12 #include "lib/encoding/binascii.h"
14 #include "lib/string/util_string.h"
15 #include "lib/string/printf.h"
16 #include "lib/memarea/memarea.h"
18 #include "lib/ctime/di_ops.h"
19 
20 #include <string.h>
21 
22 #define MIN_ANNOTATION A_PURPOSE
23 #define MAX_ANNOTATION A_UNKNOWN_
24 
25 #define ALLOC_ZERO(sz) memarea_alloc_zero(area,sz)
26 #define ALLOC(sz) memarea_alloc(area,sz)
27 #define STRDUP(str) memarea_strdup(area,str)
28 #define STRNDUP(str,n) memarea_strndup(area,(str),(n))
29 
30 #define RET_ERR(msg) \
31  STMT_BEGIN \
32  if (tok) token_clear(tok); \
33  tok = ALLOC_ZERO(sizeof(directory_token_t)); \
34  tok->tp = ERR_; \
35  tok->error = STRDUP(msg); \
36  goto done_tokenizing; \
37  STMT_END
38 
39 /** Free all resources allocated for <b>tok</b> */
40 void
42 {
43  if (tok->key)
44  crypto_pk_free(tok->key);
45 }
46 
47 /** Read all tokens from a string between <b>start</b> and <b>end</b>, and add
48  * them to <b>out</b>. Parse according to the token rules in <b>table</b>.
49  * Caller must free tokens in <b>out</b>. If <b>end</b> is NULL, use the
50  * entire string.
51  */
52 int
54  const char *start, const char *end, smartlist_t *out,
55  const token_rule_t *table, int flags)
56 {
57  const char **s;
58  directory_token_t *tok = NULL;
59  int counts[NIL_];
60  int i;
61  int first_nonannotation;
62  int prev_len = smartlist_len(out);
63  tor_assert(area);
64 
65  s = &start;
66  if (!end) {
67  end = start+strlen(start);
68  } else {
69  /* it's only meaningful to check for nuls if we got an end-of-string ptr */
70  if (memchr(start, '\0', end-start)) {
71  log_warn(LD_DIR, "parse error: internal NUL character.");
72  return -1;
73  }
74  }
75  for (i = 0; i < NIL_; ++i)
76  counts[i] = 0;
77 
78  SMARTLIST_FOREACH(out, const directory_token_t *, t, ++counts[t->tp]);
79 
80  while (*s < end && (!tok || tok->tp != EOF_)) {
81  tok = get_next_token(area, s, end, table);
82  if (tok->tp == ERR_) {
83  log_warn(LD_DIR, "parse error: %s", tok->error);
84  token_clear(tok);
85  return -1;
86  }
87  ++counts[tok->tp];
88  smartlist_add(out, tok);
89  *s = eat_whitespace_eos(*s, end);
90  }
91 
92  if (flags & TS_NOCHECK)
93  return 0;
94 
95  if ((flags & TS_ANNOTATIONS_OK)) {
96  first_nonannotation = -1;
97  for (i = 0; i < smartlist_len(out); ++i) {
98  tok = smartlist_get(out, i);
99  if (tok->tp < MIN_ANNOTATION || tok->tp > MAX_ANNOTATION) {
100  first_nonannotation = i;
101  break;
102  }
103  }
104  if (first_nonannotation < 0) {
105  log_warn(LD_DIR, "parse error: item contains only annotations");
106  return -1;
107  }
108  for (i=first_nonannotation; i < smartlist_len(out); ++i) {
109  tok = smartlist_get(out, i);
110  if (tok->tp >= MIN_ANNOTATION && tok->tp <= MAX_ANNOTATION) {
111  log_warn(LD_DIR, "parse error: Annotations mixed with keywords");
112  return -1;
113  }
114  }
115  if ((flags & TS_NO_NEW_ANNOTATIONS)) {
116  if (first_nonannotation != prev_len) {
117  log_warn(LD_DIR, "parse error: Unexpected annotations.");
118  return -1;
119  }
120  }
121  } else {
122  for (i=0; i < smartlist_len(out); ++i) {
123  tok = smartlist_get(out, i);
124  if (tok->tp >= MIN_ANNOTATION && tok->tp <= MAX_ANNOTATION) {
125  log_warn(LD_DIR, "parse error: no annotations allowed.");
126  return -1;
127  }
128  }
129  first_nonannotation = 0;
130  }
131  for (i = 0; table[i].t; ++i) {
132  if (counts[table[i].v] < table[i].min_cnt) {
133  log_warn(LD_DIR, "Parse error: missing %s element.", table[i].t);
134  return -1;
135  }
136  if (counts[table[i].v] > table[i].max_cnt) {
137  log_warn(LD_DIR, "Parse error: too many %s elements.", table[i].t);
138  return -1;
139  }
140  if (table[i].pos & AT_START) {
141  if (smartlist_len(out) < 1 ||
142  (tok = smartlist_get(out, first_nonannotation))->tp != table[i].v) {
143  log_warn(LD_DIR, "Parse error: first item is not %s.", table[i].t);
144  return -1;
145  }
146  }
147  if (table[i].pos & AT_END) {
148  if (smartlist_len(out) < 1 ||
149  (tok = smartlist_get(out, smartlist_len(out)-1))->tp != table[i].v) {
150  log_warn(LD_DIR, "Parse error: last item is not %s.", table[i].t);
151  return -1;
152  }
153  }
154  }
155  return 0;
156 }
157 
158 /** Helper: parse space-separated arguments from the string <b>s</b> ending at
159  * <b>eol</b>, and store them in the args field of <b>tok</b>. Store the
160  * number of parsed elements into the n_args field of <b>tok</b>. Allocate
161  * all storage in <b>area</b>. Return the number of arguments parsed, or
162  * return -1 if there was an insanely high number of arguments. */
163 static inline int
165  const char *s, const char *eol)
166 {
167 /** Largest number of arguments we'll accept to any token, ever. */
168 #define MAX_ARGS 512
169  char *mem = memarea_strndup(area, s, eol-s);
170  char *cp = mem;
171  int j = 0;
172  char *args[MAX_ARGS];
173  while (*cp) {
174  if (j == MAX_ARGS)
175  return -1;
176  args[j++] = cp;
177  cp = (char*)find_whitespace(cp);
178  if (!cp || !*cp)
179  break; /* End of the line. */
180  *cp++ = '\0';
181  cp = (char*)eat_whitespace(cp);
182  }
183  tok->n_args = j;
184  tok->args = memarea_memdup(area, args, j*sizeof(char*));
185  return j;
186 #undef MAX_ARGS
187 }
188 
189 /** Helper: make sure that the token <b>tok</b> with keyword <b>kwd</b> obeys
190  * the object syntax of <b>o_syn</b>. Allocate all storage in <b>area</b>.
191  * Return <b>tok</b> on success, or a new ERR_ token if the token didn't
192  * conform to the syntax we wanted.
193  **/
194 static inline directory_token_t *
195 token_check_object(memarea_t *area, const char *kwd,
196  directory_token_t *tok, obj_syntax o_syn)
197 {
198  char ebuf[128];
199  switch (o_syn) {
200  case NO_OBJ:
201  /* No object is allowed for this token. */
202  if (tok->object_body) {
203  tor_snprintf(ebuf, sizeof(ebuf), "Unexpected object for %s", kwd);
204  RET_ERR(ebuf);
205  }
206  if (tok->key) {
207  tor_snprintf(ebuf, sizeof(ebuf), "Unexpected public key for %s", kwd);
208  RET_ERR(ebuf);
209  }
210  break;
211  case NEED_OBJ:
212  /* There must be a (non-key) object. */
213  if (!tok->object_body) {
214  tor_snprintf(ebuf, sizeof(ebuf), "Missing object for %s", kwd);
215  RET_ERR(ebuf);
216  }
217  break;
218  case NEED_KEY_1024: /* There must be a 1024-bit public key. */
219  if (tok->key && crypto_pk_num_bits(tok->key) != PK_BYTES*8) {
220  tor_snprintf(ebuf, sizeof(ebuf), "Wrong size on key for %s: %d bits",
221  kwd, crypto_pk_num_bits(tok->key));
222  RET_ERR(ebuf);
223  }
224  FALLTHROUGH;
225  case NEED_KEY: /* There must be some kind of key. */
226  if (!tok->key) {
227  tor_snprintf(ebuf, sizeof(ebuf), "Missing public key for %s", kwd);
228  RET_ERR(ebuf);
229  }
230 
231  if (crypto_pk_key_is_private(tok->key)) {
232  tor_snprintf(ebuf, sizeof(ebuf),
233  "Private key given for %s, which wants a public key", kwd);
234  RET_ERR(ebuf);
235  }
236  break;
237  case OBJ_OK:
238  /* Anything goes with this token. */
239  break;
240  }
241 
242  done_tokenizing:
243  return tok;
244 }
245 
246 /** Return true iff the <b>memlen</b>-byte chunk of memory at
247  * <b>memlen</b> is the same length as <b>token</b>, and their
248  * contents are equal. */
249 static bool
250 mem_eq_token(const void *mem, size_t memlen, const char *token)
251 {
252  size_t len = strlen(token);
253  return memlen == len && fast_memeq(mem, token, len);
254 }
255 
256 /** Helper function: read the next token from *s, advance *s to the end of the
257  * token, and return the parsed token. Parse *<b>s</b> according to the list
258  * of tokens in <b>table</b>.
259  */
262  const char **s, const char *eos, const token_rule_t *table)
263 {
264  /** Reject any object at least this big; it is probably an overflow, an
265  * attack, a bug, or some other nonsense. */
266 #define MAX_UNPARSED_OBJECT_SIZE (128*1024)
267  /** Reject any line at least this big; it is probably an overflow, an
268  * attack, a bug, or some other nonsense. */
269 #define MAX_LINE_LENGTH (128*1024)
270 
271  const char *next, *eol;
272  size_t obname_len;
273  int i;
274  directory_token_t *tok;
275  obj_syntax o_syn = NO_OBJ;
276  char ebuf[128];
277  const char *kwd = "";
278 
279  tor_assert(area);
280  tok = ALLOC_ZERO(sizeof(directory_token_t));
281  tok->tp = ERR_;
282 
283  /* Set *s to first token, eol to end-of-line, next to after first token */
284  *s = eat_whitespace_eos(*s, eos); /* eat multi-line whitespace */
285  tor_assert(eos >= *s);
286  eol = memchr(*s, '\n', eos-*s);
287  if (!eol)
288  eol = eos;
289  if (eol - *s > MAX_LINE_LENGTH) {
290  RET_ERR("Line far too long");
291  }
292 
293  next = find_whitespace_eos(*s, eol);
294 
295  if (mem_eq_token(*s, next-*s, "opt")) {
296  /* Skip past an "opt" at the start of the line. */
297  *s = eat_whitespace_eos_no_nl(next, eol);
298  next = find_whitespace_eos(*s, eol);
299  } else if (*s == eos) { /* If no "opt", and end-of-line, line is invalid */
300  RET_ERR("Unexpected EOF");
301  }
302 
303  /* Search the table for the appropriate entry. (I tried a binary search
304  * instead, but it wasn't any faster.) */
305  for (i = 0; table[i].t ; ++i) {
306  if (mem_eq_token(*s, next-*s, table[i].t)) {
307  /* We've found the keyword. */
308  kwd = table[i].t;
309  tok->tp = table[i].v;
310  o_syn = table[i].os;
311  *s = eat_whitespace_eos_no_nl(next, eol);
312  /* We go ahead whether there are arguments or not, so that tok->args is
313  * always set if we want arguments. */
314  if (table[i].concat_args) {
315  /* The keyword takes the line as a single argument */
316  tok->args = ALLOC(sizeof(char*));
317  tok->args[0] = STRNDUP(*s,eol-*s); /* Grab everything on line */
318  tok->n_args = 1;
319  } else {
320  /* This keyword takes multiple arguments. */
321  if (get_token_arguments(area, tok, *s, eol)<0) {
322  tor_snprintf(ebuf, sizeof(ebuf),"Far too many arguments to %s", kwd);
323  RET_ERR(ebuf);
324  }
325  *s = eol;
326  }
327  if (tok->n_args < table[i].min_args) {
328  tor_snprintf(ebuf, sizeof(ebuf), "Too few arguments to %s", kwd);
329  RET_ERR(ebuf);
330  } else if (tok->n_args > table[i].max_args) {
331  tor_snprintf(ebuf, sizeof(ebuf), "Too many arguments to %s", kwd);
332  RET_ERR(ebuf);
333  }
334  break;
335  }
336  }
337 
338  if (tok->tp == ERR_) {
339  /* No keyword matched; call it an "K_opt" or "A_unrecognized" */
340  if (*s < eol && **s == '@')
341  tok->tp = A_UNKNOWN_;
342  else
343  tok->tp = K_OPT;
344  tok->args = ALLOC(sizeof(char*));
345  tok->args[0] = STRNDUP(*s, eol-*s);
346  tok->n_args = 1;
347  o_syn = OBJ_OK;
348  }
349 
350  /* Check whether there's an object present */
351  *s = eat_whitespace_eos(eol, eos); /* Scan from end of first line */
352  tor_assert(eos >= *s);
353  eol = memchr(*s, '\n', eos-*s);
354  if (!eol || eol-*s<11 || strcmpstart(*s, "-----BEGIN ")) /* No object. */
355  goto check_object;
356 
357  if (eol - *s <= 16 || memchr(*s+11,'\0',eol-*s-16) || /* no short lines, */
358  !mem_eq_token(eol-5, 5, "-----") || /* nuls or invalid endings */
359  (eol-*s) > MAX_UNPARSED_OBJECT_SIZE) { /* name too long */
360  RET_ERR("Malformed object: bad begin line");
361  }
362  tok->object_type = STRNDUP(*s+11, eol-*s-16);
363  obname_len = eol-*s-16; /* store objname length here to avoid a strlen() */
364  *s = eol+1; /* Set *s to possible start of object data (could be eos) */
365 
366  /* Go to the end of the object */
367  next = tor_memstr(*s, eos-*s, "-----END ");
368  if (!next) {
369  RET_ERR("Malformed object: missing object end line");
370  }
371  tor_assert(eos >= next);
372  eol = memchr(next, '\n', eos-next);
373  if (!eol) /* end-of-line marker, or eos if there's no '\n' */
374  eol = eos;
375  /* Validate the ending tag, which should be 9 + NAME + 5 + eol */
376  if ((size_t)(eol-next) != 9+obname_len+5 ||
377  !mem_eq_token(next+9, obname_len, tok->object_type) ||
378  !mem_eq_token(eol-5, 5, "-----")) {
379  tor_snprintf(ebuf, sizeof(ebuf), "Malformed object: mismatched end tag %s",
380  tok->object_type);
381  ebuf[sizeof(ebuf)-1] = '\0';
382  RET_ERR(ebuf);
383  }
384  if (next - *s > MAX_UNPARSED_OBJECT_SIZE)
385  RET_ERR("Couldn't parse object: missing footer or object much too big.");
386 
387  {
388  int r;
389  size_t maxsize = base64_decode_maxsize(next-*s);
390  tok->object_body = ALLOC(maxsize);
391  r = base64_decode(tok->object_body, maxsize, *s, next-*s);
392  if (r<0)
393  RET_ERR("Malformed object: bad base64-encoded data");
394  tok->object_size = r;
395  }
396 
397  if (!strcmp(tok->object_type, "RSA PUBLIC KEY")) { /* If it's a public key */
398  if (o_syn != NEED_KEY && o_syn != NEED_KEY_1024 && o_syn != OBJ_OK) {
399  RET_ERR("Unexpected public key.");
400  }
402  if (! tok->key)
403  RET_ERR("Couldn't parse public key.");
404  }
405  *s = eol;
406 
407  check_object:
408  tok = token_check_object(area, kwd, tok, o_syn);
409 
410  done_tokenizing:
411  return tok;
412 
413 #undef RET_ERR
414 #undef ALLOC
415 #undef ALLOC_ZERO
416 #undef STRDUP
417 #undef STRNDUP
418 }
419 
420 /** Find the first token in <b>s</b> whose keyword is <b>keyword</b>; fail
421  * with an assert if no such keyword is found.
422  */
425  const char *keyword_as_string)
426 {
427  directory_token_t *tok = find_opt_by_keyword(s, keyword);
428  if (PREDICT_UNLIKELY(!tok)) {
429  log_err(LD_BUG, "Missing %s [%d] in directory object that should have "
430  "been validated. Internal error.", keyword_as_string, (int)keyword);
431  tor_assert(tok);
432  }
433  return tok;
434 }
435 
436 /** Find the first token in <b>s</b> whose keyword is <b>keyword</b>; return
437  * NULL if no such keyword is found.
438  */
441 {
442  SMARTLIST_FOREACH(s, directory_token_t *, t, if (t->tp == keyword) return t);
443  return NULL;
444 }
445 
446 /** If there are any directory_token_t entries in <b>s</b> whose keyword is
447  * <b>k</b>, return a newly allocated smartlist_t containing all such entries,
448  * in the same order in which they occur in <b>s</b>. Otherwise return
449  * NULL. */
450 smartlist_t *
452 {
453  smartlist_t *out = NULL;
455  if (t->tp == k) {
456  if (!out)
457  out = smartlist_new();
458  smartlist_add(out, t);
459  });
460  return out;
461 }
int base64_decode(char *dest, size_t destlen, const char *src, size_t srclen)
Definition: binascii.c:396
size_t base64_decode_maxsize(size_t srclen)
Definition: binascii.c:187
Header for binascii.c.
static conn_counts_t counts
Definition: connstats.c:72
Headers for crypto_rsa.c.
#define PK_BYTES
Definition: crypto_rsa.h:24
crypto_pk_t * crypto_pk_asn1_decode(const char *str, size_t len)
int crypto_pk_num_bits(crypto_pk_t *env)
int crypto_pk_key_is_private(const crypto_pk_t *key)
Headers for di_ops.c.
#define fast_memeq(a, b, c)
Definition: di_ops.h:35
Headers for log.c.
#define LD_BUG
Definition: log.h:86
#define LD_DIR
Definition: log.h:88
void * memarea_memdup(memarea_t *area, const void *s, size_t n)
Definition: memarea.c:257
char * memarea_strndup(memarea_t *area, const char *s, size_t n)
Definition: memarea.c:273
Header for memarea.c.
void token_clear(directory_token_t *tok)
Definition: parsecommon.c:41
int tokenize_string(memarea_t *area, const char *start, const char *end, smartlist_t *out, const token_rule_t *table, int flags)
Definition: parsecommon.c:53
static int get_token_arguments(memarea_t *area, directory_token_t *tok, const char *s, const char *eol)
Definition: parsecommon.c:164
directory_token_t * find_opt_by_keyword(const smartlist_t *s, directory_keyword keyword)
Definition: parsecommon.c:440
smartlist_t * find_all_by_keyword(const smartlist_t *s, directory_keyword k)
Definition: parsecommon.c:451
directory_token_t * get_next_token(memarea_t *area, const char **s, const char *eos, const token_rule_t *table)
Definition: parsecommon.c:261
static directory_token_t * token_check_object(memarea_t *area, const char *kwd, directory_token_t *tok, obj_syntax o_syn)
Definition: parsecommon.c:195
static bool mem_eq_token(const void *mem, size_t memlen, const char *token)
Definition: parsecommon.c:250
directory_token_t * find_by_keyword_(smartlist_t *s, directory_keyword keyword, const char *keyword_as_string)
Definition: parsecommon.c:424
Header file for parsecommon.c.
obj_syntax
Definition: parsecommon.h:218
@ NEED_KEY
Definition: parsecommon.h:222
@ OBJ_OK
Definition: parsecommon.h:223
@ NO_OBJ
Definition: parsecommon.h:219
@ NEED_OBJ
Definition: parsecommon.h:220
@ NEED_KEY_1024
Definition: parsecommon.h:221
directory_keyword
Definition: parsecommon.h:23
int tor_snprintf(char *str, size_t size, const char *format,...)
Definition: printf.c:27
Header for printf.c.
Header for smartlist.c.
void smartlist_add(smartlist_t *sl, void *element)
#define SMARTLIST_FOREACH(sl, type, var, cmd)
directory_keyword tp
Definition: parsecommon.h:202
struct crypto_pk_t * key
Definition: parsecommon.h:210
obj_syntax os
Definition: parsecommon.h:287
directory_keyword v
Definition: parsecommon.h:278
const char * t
Definition: parsecommon.h:276
Macros to manage assertions, fatal and non-fatal.
#define tor_assert(expr)
Definition: util_bug.h:102
const char * eat_whitespace_eos_no_nl(const char *s, const char *eos)
Definition: util_string.c:342
int strcmpstart(const char *s1, const char *s2)
Definition: util_string.c:215
const char * find_whitespace_eos(const char *s, const char *eos)
Definition: util_string.c:375
const char * find_whitespace(const char *s)
Definition: util_string.c:353
const char * eat_whitespace_eos(const char *s, const char *eos)
Definition: util_string.c:304
const char * eat_whitespace(const char *s)
Definition: util_string.c:277
Header for util_string.c.