Line data Source code
1 : /* Copyright (c) 2016-2021, The Tor Project, Inc. */
2 : /* See LICENSE for licensing information */
3 :
4 : /**
5 : * \file parsecommon.c
6 : * \brief Common code to parse and validate various type of descriptors.
7 : **/
8 :
9 : #include "feature/dirparse/parsecommon.h"
10 : #include "lib/log/log.h"
11 : #include "lib/log/util_bug.h"
12 : #include "lib/encoding/binascii.h"
13 : #include "lib/container/smartlist.h"
14 : #include "lib/string/util_string.h"
15 : #include "lib/string/printf.h"
16 : #include "lib/memarea/memarea.h"
17 : #include "lib/crypt_ops/crypto_rsa.h"
18 : #include "lib/ctime/di_ops.h"
19 :
20 : #include <string.h>
21 :
22 : #define MIN_ANNOTATION A_PURPOSE
23 : #define MAX_ANNOTATION A_UNKNOWN_
24 :
25 : #define ALLOC_ZERO(sz) memarea_alloc_zero(area,sz)
26 : #define ALLOC(sz) memarea_alloc(area,sz)
27 : #define STRDUP(str) memarea_strdup(area,str)
28 : #define STRNDUP(str,n) memarea_strndup(area,(str),(n))
29 :
30 : #define RET_ERR(msg) \
31 : STMT_BEGIN \
32 : if (tok) token_clear(tok); \
33 : tok = ALLOC_ZERO(sizeof(directory_token_t)); \
34 : tok->tp = ERR_; \
35 : tok->error = STRDUP(msg); \
36 : goto done_tokenizing; \
37 : STMT_END
38 :
39 : /** Free all resources allocated for <b>tok</b> */
40 : void
41 184120 : token_clear(directory_token_t *tok)
42 : {
43 184120 : if (tok->key)
44 1428 : crypto_pk_free(tok->key);
45 184120 : }
46 :
47 : /** Read all tokens from a string between <b>start</b> and <b>end</b>, and add
48 : * them to <b>out</b>. Parse according to the token rules in <b>table</b>.
49 : * Caller must free tokens in <b>out</b>. If <b>end</b> is NULL, use the
50 : * entire string.
51 : */
52 : int
53 6430 : tokenize_string(memarea_t *area,
54 : const char *start, const char *end, smartlist_t *out,
55 : const token_rule_t *table, int flags)
56 : {
57 6430 : const char **s;
58 6430 : directory_token_t *tok = NULL;
59 6430 : int counts[NIL_];
60 6430 : int i;
61 6430 : int first_nonannotation;
62 6430 : int prev_len = smartlist_len(out);
63 6430 : tor_assert(area);
64 :
65 6430 : s = &start;
66 6430 : if (!end) {
67 8 : end = start+strlen(start);
68 : } else {
69 : /* it's only meaningful to check for nuls if we got an end-of-string ptr */
70 6422 : if (memchr(start, '\0', end-start)) {
71 18 : log_warn(LD_DIR, "parse error: internal NUL character.");
72 18 : return -1;
73 : }
74 : }
75 987448 : for (i = 0; i < NIL_; ++i)
76 981036 : counts[i] = 0;
77 :
78 6461 : SMARTLIST_FOREACH(out, const directory_token_t *, t, ++counts[t->tp]);
79 :
80 134935 : while (*s < end && (!tok || tok->tp != EOF_)) {
81 129000 : tok = get_next_token(area, s, end, table);
82 129000 : if (tok->tp == ERR_) {
83 477 : log_warn(LD_DIR, "parse error: %s", tok->error);
84 477 : token_clear(tok);
85 477 : return -1;
86 : }
87 128523 : ++counts[tok->tp];
88 128523 : smartlist_add(out, tok);
89 128523 : *s = eat_whitespace_eos(*s, end);
90 : }
91 :
92 5935 : if (flags & TS_NOCHECK)
93 : return 0;
94 :
95 5910 : if ((flags & TS_ANNOTATIONS_OK)) {
96 109 : first_nonannotation = -1;
97 109 : for (i = 0; i < smartlist_len(out); ++i) {
98 109 : tok = smartlist_get(out, i);
99 109 : if (tok->tp < MIN_ANNOTATION || tok->tp > MAX_ANNOTATION) {
100 : first_nonannotation = i;
101 : break;
102 : }
103 : }
104 55 : if (first_nonannotation < 0) {
105 0 : log_warn(LD_DIR, "parse error: item contains only annotations");
106 0 : return -1;
107 : }
108 776 : for (i=first_nonannotation; i < smartlist_len(out); ++i) {
109 722 : tok = smartlist_get(out, i);
110 722 : if (tok->tp >= MIN_ANNOTATION && tok->tp <= MAX_ANNOTATION) {
111 1 : log_warn(LD_DIR, "parse error: Annotations mixed with keywords");
112 1 : return -1;
113 : }
114 : }
115 54 : if ((flags & TS_NO_NEW_ANNOTATIONS)) {
116 1 : if (first_nonannotation != prev_len) {
117 0 : log_warn(LD_DIR, "parse error: Unexpected annotations.");
118 0 : return -1;
119 : }
120 : }
121 : } else {
122 128757 : for (i=0; i < smartlist_len(out); ++i) {
123 122914 : tok = smartlist_get(out, i);
124 122914 : if (tok->tp >= MIN_ANNOTATION && tok->tp <= MAX_ANNOTATION) {
125 12 : log_warn(LD_DIR, "parse error: no annotations allowed.");
126 12 : return -1;
127 : }
128 : }
129 : first_nonannotation = 0;
130 : }
131 94413 : for (i = 0; table[i].t; ++i) {
132 89703 : if (counts[table[i].v] < table[i].min_cnt) {
133 1154 : log_warn(LD_DIR, "Parse error: missing %s element.", table[i].t);
134 1154 : return -1;
135 : }
136 88549 : if (counts[table[i].v] > table[i].max_cnt) {
137 25 : log_warn(LD_DIR, "Parse error: too many %s elements.", table[i].t);
138 25 : return -1;
139 : }
140 88524 : if (table[i].pos & AT_START) {
141 3119 : if (smartlist_len(out) < 1 ||
142 3119 : (tok = smartlist_get(out, first_nonannotation))->tp != table[i].v) {
143 5 : log_warn(LD_DIR, "Parse error: first item is not %s.", table[i].t);
144 5 : return -1;
145 : }
146 : }
147 88519 : if (table[i].pos & AT_END) {
148 344 : if (smartlist_len(out) < 1 ||
149 344 : (tok = smartlist_get(out, smartlist_len(out)-1))->tp != table[i].v) {
150 3 : log_warn(LD_DIR, "Parse error: last item is not %s.", table[i].t);
151 3 : return -1;
152 : }
153 : }
154 : }
155 : return 0;
156 : }
157 :
158 : /** Helper: parse space-separated arguments from the string <b>s</b> ending at
159 : * <b>eol</b>, and store them in the args field of <b>tok</b>. Store the
160 : * number of parsed elements into the n_args field of <b>tok</b>. Allocate
161 : * all storage in <b>area</b>. Return the number of arguments parsed, or
162 : * return -1 if there was an insanely high number of arguments. */
163 : static inline int
164 118748 : get_token_arguments(memarea_t *area, directory_token_t *tok,
165 : const char *s, const char *eol)
166 : {
167 : /** Largest number of arguments we'll accept to any token, ever. */
168 : #define MAX_ARGS 512
169 118748 : char *mem = memarea_strndup(area, s, eol-s);
170 118748 : char *cp = mem;
171 118748 : int j = 0;
172 118748 : char *args[MAX_ARGS];
173 218885 : while (*cp) {
174 198924 : if (j == MAX_ARGS)
175 : return -1;
176 198917 : args[j++] = cp;
177 198917 : cp = (char*)find_whitespace(cp);
178 198917 : if (!cp || !*cp)
179 : break; /* End of the line. */
180 100137 : *cp++ = '\0';
181 100137 : cp = (char*)eat_whitespace(cp);
182 : }
183 118741 : tok->n_args = j;
184 118741 : tok->args = memarea_memdup(area, args, j*sizeof(char*));
185 118741 : return j;
186 : #undef MAX_ARGS
187 : }
188 :
189 : /** Helper: make sure that the token <b>tok</b> with keyword <b>kwd</b> obeys
190 : * the object syntax of <b>o_syn</b>. Allocate all storage in <b>area</b>.
191 : * Return <b>tok</b> on success, or a new ERR_ token if the token didn't
192 : * conform to the syntax we wanted.
193 : **/
194 : static inline directory_token_t *
195 183208 : token_check_object(memarea_t *area, const char *kwd,
196 : directory_token_t *tok, obj_syntax o_syn)
197 : {
198 183208 : char ebuf[128];
199 183208 : switch (o_syn) {
200 133253 : case NO_OBJ:
201 : /* No object is allowed for this token. */
202 133253 : if (tok->object_body) {
203 21 : tor_snprintf(ebuf, sizeof(ebuf), "Unexpected object for %s", kwd);
204 21 : RET_ERR(ebuf);
205 : }
206 133232 : if (tok->key) {
207 0 : tor_snprintf(ebuf, sizeof(ebuf), "Unexpected public key for %s", kwd);
208 0 : RET_ERR(ebuf);
209 : }
210 : break;
211 4147 : case NEED_OBJ:
212 : /* There must be a (non-key) object. */
213 4147 : if (!tok->object_body) {
214 5 : tor_snprintf(ebuf, sizeof(ebuf), "Missing object for %s", kwd);
215 5 : RET_ERR(ebuf);
216 : }
217 : break;
218 607 : case NEED_KEY_1024: /* There must be a 1024-bit public key. */
219 607 : if (tok->key && crypto_pk_num_bits(tok->key) != PK_BYTES*8) {
220 0 : tor_snprintf(ebuf, sizeof(ebuf), "Wrong size on key for %s: %d bits",
221 0 : kwd, crypto_pk_num_bits(tok->key));
222 0 : RET_ERR(ebuf);
223 : }
224 2477 : FALLTHROUGH;
225 : case NEED_KEY: /* There must be some kind of key. */
226 2477 : if (!tok->key) {
227 8 : tor_snprintf(ebuf, sizeof(ebuf), "Missing public key for %s", kwd);
228 8 : RET_ERR(ebuf);
229 : }
230 :
231 2469 : if (crypto_pk_key_is_private(tok->key)) {
232 0 : tor_snprintf(ebuf, sizeof(ebuf),
233 : "Private key given for %s, which wants a public key", kwd);
234 0 : RET_ERR(ebuf);
235 : }
236 : break;
237 : case OBJ_OK:
238 : /* Anything goes with this token. */
239 : break;
240 : }
241 :
242 183208 : done_tokenizing:
243 183208 : return tok;
244 : }
245 :
246 : /** Return true iff the <b>memlen</b>-byte chunk of memory at
247 : * <b>memlen</b> is the same length as <b>token</b>, and their
248 : * contents are equal. */
249 : static bool
250 1949769 : mem_eq_token(const void *mem, size_t memlen, const char *token)
251 : {
252 1949769 : size_t len = strlen(token);
253 1949769 : return memlen == len && fast_memeq(mem, token, len);
254 : }
255 :
256 : /** Helper function: read the next token from *s, advance *s to the end of the
257 : * token, and return the parsed token. Parse *<b>s</b> according to the list
258 : * of tokens in <b>table</b>.
259 : */
260 : directory_token_t *
261 183657 : get_next_token(memarea_t *area,
262 : const char **s, const char *eos, const token_rule_t *table)
263 : {
264 : /** Reject any object at least this big; it is probably an overflow, an
265 : * attack, a bug, or some other nonsense. */
266 : #define MAX_UNPARSED_OBJECT_SIZE (128*1024)
267 : /** Reject any line at least this big; it is probably an overflow, an
268 : * attack, a bug, or some other nonsense. */
269 : #define MAX_LINE_LENGTH (128*1024)
270 :
271 183657 : const char *next, *eol;
272 183657 : size_t obname_len;
273 183657 : int i;
274 183657 : directory_token_t *tok;
275 183657 : obj_syntax o_syn = NO_OBJ;
276 183657 : char ebuf[128];
277 183657 : const char *kwd = "";
278 :
279 183657 : tor_assert(area);
280 183657 : tok = ALLOC_ZERO(sizeof(directory_token_t));
281 183657 : tok->tp = ERR_;
282 :
283 : /* Set *s to first token, eol to end-of-line, next to after first token */
284 183657 : *s = eat_whitespace_eos(*s, eos); /* eat multi-line whitespace */
285 183657 : tor_assert(eos >= *s);
286 183657 : eol = memchr(*s, '\n', eos-*s);
287 183657 : if (!eol)
288 54732 : eol = eos;
289 183657 : if (eol - *s > MAX_LINE_LENGTH) {
290 0 : RET_ERR("Line far too long");
291 : }
292 :
293 183657 : next = find_whitespace_eos(*s, eol);
294 :
295 183657 : if (mem_eq_token(*s, next-*s, "opt")) {
296 : /* Skip past an "opt" at the start of the line. */
297 599 : *s = eat_whitespace_eos_no_nl(next, eol);
298 599 : next = find_whitespace_eos(*s, eol);
299 183058 : } else if (*s == eos) { /* If no "opt", and end-of-line, line is invalid */
300 1 : RET_ERR("Unexpected EOF");
301 : }
302 :
303 : /* Search the table for the appropriate entry. (I tried a binary search
304 : * instead, but it wasn't any faster.) */
305 1784268 : for (i = 0; table[i].t ; ++i) {
306 1741432 : if (mem_eq_token(*s, next-*s, table[i].t)) {
307 : /* We've found the keyword. */
308 140820 : kwd = table[i].t;
309 140820 : tok->tp = table[i].v;
310 140820 : o_syn = table[i].os;
311 140820 : *s = eat_whitespace_eos_no_nl(next, eol);
312 : /* We go ahead whether there are arguments or not, so that tok->args is
313 : * always set if we want arguments. */
314 140820 : if (table[i].concat_args) {
315 : /* The keyword takes the line as a single argument */
316 22072 : tok->args = ALLOC(sizeof(char*));
317 22072 : tok->args[0] = STRNDUP(*s,eol-*s); /* Grab everything on line */
318 22072 : tok->n_args = 1;
319 : } else {
320 : /* This keyword takes multiple arguments. */
321 118748 : if (get_token_arguments(area, tok, *s, eol)<0) {
322 7 : tor_snprintf(ebuf, sizeof(ebuf),"Far too many arguments to %s", kwd);
323 7 : RET_ERR(ebuf);
324 : }
325 118741 : *s = eol;
326 : }
327 140813 : if (tok->n_args < table[i].min_args) {
328 303 : tor_snprintf(ebuf, sizeof(ebuf), "Too few arguments to %s", kwd);
329 303 : RET_ERR(ebuf);
330 140510 : } else if (tok->n_args > table[i].max_args) {
331 3 : tor_snprintf(ebuf, sizeof(ebuf), "Too many arguments to %s", kwd);
332 3 : RET_ERR(ebuf);
333 : }
334 : break;
335 : }
336 : }
337 :
338 183343 : if (tok->tp == ERR_) {
339 : /* No keyword matched; call it an "K_opt" or "A_unrecognized" */
340 42836 : if (*s < eol && **s == '@')
341 246 : tok->tp = A_UNKNOWN_;
342 : else
343 42590 : tok->tp = K_OPT;
344 42836 : tok->args = ALLOC(sizeof(char*));
345 42836 : tok->args[0] = STRNDUP(*s, eol-*s);
346 42836 : tok->n_args = 1;
347 42836 : o_syn = OBJ_OK;
348 : }
349 :
350 : /* Check whether there's an object present */
351 183343 : *s = eat_whitespace_eos(eol, eos); /* Scan from end of first line */
352 183343 : tor_assert(eos >= *s);
353 183343 : eol = memchr(*s, '\n', eos-*s);
354 183343 : if (!eol || eol-*s<11 || strcmpstart(*s, "-----BEGIN ")) /* No object. */
355 175093 : goto check_object;
356 :
357 8250 : if (eol - *s <= 16 || memchr(*s+11,'\0',eol-*s-16) || /* no short lines, */
358 8249 : !mem_eq_token(eol-5, 5, "-----") || /* nuls or invalid endings */
359 : (eol-*s) > MAX_UNPARSED_OBJECT_SIZE) { /* name too long */
360 10 : RET_ERR("Malformed object: bad begin line");
361 : }
362 8240 : tok->object_type = STRNDUP(*s+11, eol-*s-16);
363 8240 : obname_len = eol-*s-16; /* store objname length here to avoid a strlen() */
364 8240 : *s = eol+1; /* Set *s to possible start of object data (could be eos) */
365 :
366 : /* Go to the end of the object */
367 8240 : next = tor_memstr(*s, eos-*s, "-----END ");
368 8240 : if (!next) {
369 7 : RET_ERR("Malformed object: missing object end line");
370 : }
371 8233 : tor_assert(eos >= next);
372 8233 : eol = memchr(next, '\n', eos-next);
373 8233 : if (!eol) /* end-of-line marker, or eos if there's no '\n' */
374 298 : eol = eos;
375 : /* Validate the ending tag, which should be 9 + NAME + 5 + eol */
376 8233 : if ((size_t)(eol-next) != 9+obname_len+5 ||
377 8221 : !mem_eq_token(next+9, obname_len, tok->object_type) ||
378 8210 : !mem_eq_token(eol-5, 5, "-----")) {
379 30 : tor_snprintf(ebuf, sizeof(ebuf), "Malformed object: mismatched end tag %s",
380 : tok->object_type);
381 30 : ebuf[sizeof(ebuf)-1] = '\0';
382 30 : RET_ERR(ebuf);
383 : }
384 8203 : if (next - *s > MAX_UNPARSED_OBJECT_SIZE)
385 0 : RET_ERR("Couldn't parse object: missing footer or object much too big.");
386 :
387 : {
388 8203 : int r;
389 8203 : size_t maxsize = base64_decode_maxsize(next-*s);
390 8203 : tok->object_body = ALLOC(maxsize);
391 8203 : r = base64_decode(tok->object_body, maxsize, *s, next-*s);
392 8203 : if (r<0)
393 71 : RET_ERR("Malformed object: bad base64-encoded data");
394 8132 : tok->object_size = r;
395 : }
396 :
397 8132 : if (!strcmp(tok->object_type, "RSA PUBLIC KEY")) { /* If it's a public key */
398 2641 : if (o_syn != NEED_KEY && o_syn != NEED_KEY_1024 && o_syn != OBJ_OK) {
399 9 : RET_ERR("Unexpected public key.");
400 : }
401 2632 : tok->key = crypto_pk_asn1_decode(tok->object_body, tok->object_size);
402 2632 : if (! tok->key)
403 8 : RET_ERR("Couldn't parse public key.");
404 : }
405 8115 : *s = eol;
406 :
407 183208 : check_object:
408 183208 : tok = token_check_object(area, kwd, tok, o_syn);
409 :
410 183657 : done_tokenizing:
411 183657 : return tok;
412 :
413 : #undef RET_ERR
414 : #undef ALLOC
415 : #undef ALLOC_ZERO
416 : #undef STRDUP
417 : #undef STRNDUP
418 : }
419 :
420 : /** Find the first token in <b>s</b> whose keyword is <b>keyword</b>; fail
421 : * with an assert if no such keyword is found.
422 : */
423 : directory_token_t *
424 21255 : find_by_keyword_(smartlist_t *s, directory_keyword keyword,
425 : const char *keyword_as_string)
426 : {
427 21255 : directory_token_t *tok = find_opt_by_keyword(s, keyword);
428 21255 : if (PREDICT_UNLIKELY(!tok)) {
429 0 : log_err(LD_BUG, "Missing %s [%d] in directory object that should have "
430 : "been validated. Internal error.", keyword_as_string, (int)keyword);
431 0 : tor_assert(tok);
432 : }
433 21255 : return tok;
434 : }
435 :
436 : /** Find the first token in <b>s</b> whose keyword is <b>keyword</b>; return
437 : * NULL if no such keyword is found.
438 : */
439 : directory_token_t *
440 50885 : find_opt_by_keyword(const smartlist_t *s, directory_keyword keyword)
441 : {
442 693577 : SMARTLIST_FOREACH(s, directory_token_t *, t, if (t->tp == keyword) return t);
443 : return NULL;
444 : }
445 :
446 : /** If there are any directory_token_t entries in <b>s</b> whose keyword is
447 : * <b>k</b>, return a newly allocated smartlist_t containing all such entries,
448 : * in the same order in which they occur in <b>s</b>. Otherwise return
449 : * NULL. */
450 : smartlist_t *
451 3305 : find_all_by_keyword(const smartlist_t *s, directory_keyword k)
452 : {
453 3305 : smartlist_t *out = NULL;
454 65427 : SMARTLIST_FOREACH(s, directory_token_t *, t,
455 : if (t->tp == k) {
456 : if (!out)
457 : out = smartlist_new();
458 : smartlist_add(out, t);
459 : });
460 3305 : return out;
461 : }
|