Line data Source code
1 : /* Copyright (c) 2001 Matej Pfajfar.
2 : * Copyright (c) 2001-2004, Roger Dingledine.
3 : * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
4 : * Copyright (c) 2007-2021, The Tor Project, Inc. */
5 : /* See LICENSE for licensing information */
6 :
7 : /**
8 : * @file unparseable.c
9 : * @brief Dump unparseable objects to disk.
10 : **/
11 :
12 : #define UNPARSEABLE_PRIVATE
13 :
14 : #include "core/or/or.h"
15 : #include "app/config/config.h"
16 : #include "feature/dirparse/unparseable.h"
17 : #include "lib/sandbox/sandbox.h"
18 :
19 : #ifdef HAVE_SYS_STAT_H
20 : #include <sys/stat.h>
21 : #endif
22 :
23 : /* Dump mechanism for unparseable descriptors */
24 :
25 : /** List of dumped descriptors for FIFO cleanup purposes */
26 : STATIC smartlist_t *descs_dumped = NULL;
27 : /** Total size of dumped descriptors for FIFO cleanup */
28 : STATIC uint64_t len_descs_dumped = 0;
29 : /** Directory to stash dumps in */
30 : static int have_dump_desc_dir = 0;
31 : static int problem_with_dump_desc_dir = 0;
32 :
33 : #define DESC_DUMP_DATADIR_SUBDIR "unparseable-descs"
34 : #define DESC_DUMP_BASE_FILENAME "unparseable-desc"
35 :
36 : /** Find the dump directory and check if we'll be able to create it */
37 : void
38 194 : dump_desc_init(void)
39 : {
40 194 : char *dump_desc_dir;
41 :
42 194 : dump_desc_dir = get_datadir_fname(DESC_DUMP_DATADIR_SUBDIR);
43 :
44 : /*
45 : * We just check for it, don't create it at this point; we'll
46 : * create it when we need it if it isn't already there.
47 : */
48 194 : if (check_private_dir(dump_desc_dir, CPD_CHECK, get_options()->User) < 0) {
49 : /* Error, log and flag it as having a problem */
50 0 : log_notice(LD_DIR,
51 : "Doesn't look like we'll be able to create descriptor dump "
52 : "directory %s; dumps will be disabled.",
53 : dump_desc_dir);
54 0 : problem_with_dump_desc_dir = 1;
55 0 : tor_free(dump_desc_dir);
56 0 : return;
57 : }
58 :
59 : /* Check if it exists */
60 194 : switch (file_status(dump_desc_dir)) {
61 0 : case FN_DIR:
62 : /* We already have a directory */
63 0 : have_dump_desc_dir = 1;
64 0 : break;
65 194 : case FN_NOENT:
66 : /* Nothing, we'll need to create it later */
67 194 : have_dump_desc_dir = 0;
68 194 : break;
69 0 : case FN_ERROR:
70 : /* Log and flag having a problem */
71 0 : log_notice(LD_DIR,
72 : "Couldn't check whether descriptor dump directory %s already"
73 : " exists: %s",
74 : dump_desc_dir, strerror(errno));
75 0 : problem_with_dump_desc_dir = 1;
76 0 : break;
77 0 : case FN_FILE:
78 : case FN_EMPTY:
79 : default:
80 : /* Something else was here! */
81 0 : log_notice(LD_DIR,
82 : "Descriptor dump directory %s already exists and isn't a "
83 : "directory",
84 : dump_desc_dir);
85 0 : problem_with_dump_desc_dir = 1;
86 : }
87 :
88 194 : if (have_dump_desc_dir && !problem_with_dump_desc_dir) {
89 0 : dump_desc_populate_fifo_from_directory(dump_desc_dir);
90 : }
91 :
92 194 : tor_free(dump_desc_dir);
93 : }
94 :
95 : /** Create the dump directory if needed and possible */
96 : static void
97 75 : dump_desc_create_dir(void)
98 : {
99 75 : char *dump_desc_dir;
100 :
101 : /* If the problem flag is set, skip it */
102 75 : if (problem_with_dump_desc_dir) return;
103 :
104 : /* Do we need it? */
105 75 : if (!have_dump_desc_dir) {
106 1 : dump_desc_dir = get_datadir_fname(DESC_DUMP_DATADIR_SUBDIR);
107 :
108 1 : if (check_private_dir(dump_desc_dir, CPD_CREATE,
109 1 : get_options()->User) < 0) {
110 0 : log_notice(LD_DIR,
111 : "Failed to create descriptor dump directory %s",
112 : dump_desc_dir);
113 0 : problem_with_dump_desc_dir = 1;
114 : }
115 :
116 : /* Okay, we created it */
117 1 : have_dump_desc_dir = 1;
118 :
119 1 : tor_free(dump_desc_dir);
120 : }
121 : }
122 :
123 : /** Dump desc FIFO/cleanup; take ownership of the given filename, add it to
124 : * the FIFO, and clean up the oldest entries to the extent they exceed the
125 : * configured cap. If any old entries with a matching hash existed, they
126 : * just got overwritten right before this was called and we should adjust
127 : * the total size counter without deleting them.
128 : */
129 : static void
130 75 : dump_desc_fifo_add_and_clean(char *filename, const uint8_t *digest_sha256,
131 : size_t len)
132 : {
133 75 : dumped_desc_t *ent = NULL, *tmp;
134 75 : uint64_t max_len;
135 :
136 75 : tor_assert(filename != NULL);
137 75 : tor_assert(digest_sha256 != NULL);
138 :
139 75 : if (descs_dumped == NULL) {
140 : /* We better have no length, then */
141 8 : tor_assert(len_descs_dumped == 0);
142 : /* Make a smartlist */
143 8 : descs_dumped = smartlist_new();
144 : }
145 :
146 : /* Make a new entry to put this one in */
147 75 : ent = tor_malloc_zero(sizeof(*ent));
148 75 : ent->filename = filename;
149 75 : ent->len = len;
150 75 : ent->when = time(NULL);
151 75 : memcpy(ent->digest_sha256, digest_sha256, DIGEST256_LEN);
152 :
153 : /* Do we need to do some cleanup? */
154 75 : max_len = get_options()->MaxUnparseableDescSizeToLog;
155 : /* Iterate over the list until we've freed enough space */
156 78 : while (len > max_len - len_descs_dumped &&
157 3 : smartlist_len(descs_dumped) > 0) {
158 : /* Get the oldest thing on the list */
159 3 : tmp = (dumped_desc_t *)(smartlist_get(descs_dumped, 0));
160 :
161 : /*
162 : * Check if it matches the filename we just added, so we don't delete
163 : * something we just emitted if we get repeated identical descriptors.
164 : */
165 3 : if (strcmp(tmp->filename, filename) != 0) {
166 : /* Delete it and adjust the length counter */
167 3 : tor_unlink(tmp->filename);
168 3 : tor_assert(len_descs_dumped >= tmp->len);
169 3 : len_descs_dumped -= tmp->len;
170 3 : log_info(LD_DIR,
171 : "Deleting old unparseable descriptor dump %s due to "
172 : "space limits",
173 : tmp->filename);
174 : } else {
175 : /*
176 : * Don't delete, but do adjust the counter since we will bump it
177 : * later
178 : */
179 0 : tor_assert(len_descs_dumped >= tmp->len);
180 0 : len_descs_dumped -= tmp->len;
181 0 : log_info(LD_DIR,
182 : "Replacing old descriptor dump %s with new identical one",
183 : tmp->filename);
184 : }
185 :
186 : /* Free it and remove it from the list */
187 3 : smartlist_del_keeporder(descs_dumped, 0);
188 3 : tor_free(tmp->filename);
189 3 : tor_free(tmp);
190 : }
191 :
192 : /* Append our entry to the end of the list and bump the counter */
193 75 : smartlist_add(descs_dumped, ent);
194 75 : len_descs_dumped += len;
195 75 : }
196 :
197 : /** Check if we already have a descriptor for this hash and move it to the
198 : * head of the queue if so. Return 1 if one existed and 0 otherwise.
199 : */
200 : static int
201 82 : dump_desc_fifo_bump_hash(const uint8_t *digest_sha256)
202 : {
203 82 : dumped_desc_t *match = NULL;
204 :
205 82 : tor_assert(digest_sha256);
206 :
207 82 : if (descs_dumped) {
208 : /* Find a match if one exists */
209 1819 : SMARTLIST_FOREACH_BEGIN(descs_dumped, dumped_desc_t *, ent) {
210 3504 : if (ent &&
211 1752 : tor_memeq(ent->digest_sha256, digest_sha256, DIGEST256_LEN)) {
212 : /*
213 : * Save a pointer to the match and remove it from its current
214 : * position.
215 : */
216 7 : match = ent;
217 7 : SMARTLIST_DEL_CURRENT_KEEPORDER(descs_dumped, ent);
218 7 : break;
219 : }
220 1745 : } SMARTLIST_FOREACH_END(ent);
221 :
222 74 : if (match) {
223 : /* Update the timestamp */
224 7 : match->when = time(NULL);
225 : /* Add it back at the end of the list */
226 7 : smartlist_add(descs_dumped, match);
227 :
228 : /* Indicate we found one */
229 7 : return 1;
230 : }
231 : }
232 :
233 : return 0;
234 : }
235 :
236 : /** Clean up on exit; just memory, leave the dumps behind
237 : */
238 : void
239 245 : dump_desc_fifo_cleanup(void)
240 : {
241 245 : if (descs_dumped) {
242 : /* Free each descriptor */
243 73 : SMARTLIST_FOREACH_BEGIN(descs_dumped, dumped_desc_t *, ent) {
244 64 : tor_assert(ent);
245 64 : tor_free(ent->filename);
246 64 : tor_free(ent);
247 64 : } SMARTLIST_FOREACH_END(ent);
248 : /* Free the list */
249 9 : smartlist_free(descs_dumped);
250 9 : descs_dumped = NULL;
251 9 : len_descs_dumped = 0;
252 : }
253 245 : }
254 :
255 : /** Handle one file for dump_desc_populate_fifo_from_directory(); make sure
256 : * the filename is sensibly formed and matches the file content, and either
257 : * return a dumped_desc_t for it or remove the file and return NULL.
258 : */
259 9 : MOCK_IMPL(STATIC dumped_desc_t *,
260 : dump_desc_populate_one_file, (const char *dirname, const char *f))
261 : {
262 9 : dumped_desc_t *ent = NULL;
263 9 : char *path = NULL, *desc = NULL;
264 9 : const char *digest_str;
265 9 : char digest[DIGEST256_LEN], content_digest[DIGEST256_LEN];
266 : /* Expected prefix before digest in filenames */
267 9 : const char *f_pfx = DESC_DUMP_BASE_FILENAME ".";
268 : /*
269 : * Stat while reading; this is important in case the file
270 : * contains a NUL character.
271 : */
272 9 : struct stat st;
273 :
274 : /* Sanity-check args */
275 9 : tor_assert(dirname != NULL);
276 9 : tor_assert(f != NULL);
277 :
278 : /* Form the full path */
279 9 : tor_asprintf(&path, "%s" PATH_SEPARATOR "%s", dirname, f);
280 :
281 : /* Check that f has the form DESC_DUMP_BASE_FILENAME.<digest256> */
282 :
283 9 : if (!strcmpstart(f, f_pfx)) {
284 : /* It matches the form, but is the digest parseable as such? */
285 7 : digest_str = f + strlen(f_pfx);
286 7 : if (base16_decode(digest, DIGEST256_LEN,
287 : digest_str, strlen(digest_str)) != DIGEST256_LEN) {
288 : /* We failed to decode it */
289 : digest_str = NULL;
290 : }
291 : } else {
292 : /* No match */
293 : digest_str = NULL;
294 : }
295 :
296 3 : if (!digest_str) {
297 : /* We couldn't get a sensible digest */
298 6 : log_notice(LD_DIR,
299 : "Removing unrecognized filename %s from unparseable "
300 : "descriptors directory", f);
301 6 : tor_unlink(path);
302 : /* We're done */
303 6 : goto done;
304 : }
305 :
306 : /*
307 : * The filename has the form DESC_DUMP_BASE_FILENAME "." <digest256> and
308 : * we've decoded the digest. Next, check that we can read it and the
309 : * content matches this digest. We are relying on the fact that if the
310 : * file contains a '\0', read_file_to_str() will allocate space for and
311 : * read the entire file and return the correct size in st.
312 : */
313 3 : desc = read_file_to_str(path, RFTS_IGNORE_MISSING|RFTS_BIN, &st);
314 3 : if (!desc) {
315 : /* We couldn't read it */
316 1 : log_notice(LD_DIR,
317 : "Failed to read %s from unparseable descriptors directory; "
318 : "attempting to remove it.", f);
319 1 : tor_unlink(path);
320 : /* We're done */
321 1 : goto done;
322 : }
323 :
324 : #if SIZE_MAX > UINT64_MAX
325 : if (BUG((uint64_t)st.st_size > (uint64_t)SIZE_MAX)) {
326 : /* LCOV_EXCL_START
327 : * Should be impossible since RFTS above should have failed to read the
328 : * huge file into RAM. */
329 : goto done;
330 : /* LCOV_EXCL_STOP */
331 : }
332 : #endif /* SIZE_MAX > UINT64_MAX */
333 2 : if (BUG(st.st_size < 0)) {
334 : /* LCOV_EXCL_START
335 : * Should be impossible, since the OS isn't supposed to be b0rken. */
336 : goto done;
337 : /* LCOV_EXCL_STOP */
338 : }
339 : /* (Now we can be sure that st.st_size is safe to cast to a size_t.) */
340 :
341 : /*
342 : * We got one; now compute its digest and check that it matches the
343 : * filename.
344 : */
345 2 : if (crypto_digest256((char *)content_digest, desc, (size_t) st.st_size,
346 : DIGEST_SHA256) < 0) {
347 : /* Weird, but okay */
348 0 : log_info(LD_DIR,
349 : "Unable to hash content of %s from unparseable descriptors "
350 : "directory", f);
351 0 : tor_unlink(path);
352 : /* We're done */
353 0 : goto done;
354 : }
355 :
356 : /* Compare the digests */
357 2 : if (tor_memneq(digest, content_digest, DIGEST256_LEN)) {
358 : /* No match */
359 1 : log_info(LD_DIR,
360 : "Hash of %s from unparseable descriptors directory didn't "
361 : "match its filename; removing it", f);
362 1 : tor_unlink(path);
363 : /* We're done */
364 1 : goto done;
365 : }
366 :
367 : /* Okay, it's a match, we should prepare ent */
368 1 : ent = tor_malloc_zero(sizeof(dumped_desc_t));
369 1 : ent->filename = path;
370 1 : memcpy(ent->digest_sha256, digest, DIGEST256_LEN);
371 1 : ent->len = (size_t) st.st_size;
372 1 : ent->when = st.st_mtime;
373 : /* Null out path so we don't free it out from under ent */
374 1 : path = NULL;
375 :
376 9 : done:
377 : /* Free allocations if we had them */
378 9 : tor_free(desc);
379 9 : tor_free(path);
380 :
381 9 : return ent;
382 : }
383 :
384 : /** Sort helper for dump_desc_populate_fifo_from_directory(); compares
385 : * the when field of dumped_desc_ts in a smartlist to put the FIFO in
386 : * the correct order after reconstructing it from the directory.
387 : */
388 : static int
389 3 : dump_desc_compare_fifo_entries(const void **a_v, const void **b_v)
390 : {
391 3 : const dumped_desc_t **a = (const dumped_desc_t **)a_v;
392 3 : const dumped_desc_t **b = (const dumped_desc_t **)b_v;
393 :
394 3 : if ((a != NULL) && (*a != NULL)) {
395 3 : if ((b != NULL) && (*b != NULL)) {
396 : /* We have sensible dumped_desc_ts to compare */
397 3 : if ((*a)->when < (*b)->when) {
398 : return -1;
399 2 : } else if ((*a)->when == (*b)->when) {
400 : return 0;
401 : } else {
402 2 : return 1;
403 : }
404 : } else {
405 : /*
406 : * We shouldn't see this, but what the hell, NULLs precede everythin
407 : * else
408 : */
409 : return 1;
410 : }
411 : } else {
412 : return -1;
413 : }
414 : }
415 :
416 : /** Scan the contents of the directory, and update FIFO/counters; this will
417 : * consistency-check descriptor dump filenames against hashes of descriptor
418 : * dump file content, and remove any inconsistent/unreadable dumps, and then
419 : * reconstruct the dump FIFO as closely as possible for the last time the
420 : * tor process shut down. If a previous dump was repeated more than once and
421 : * moved ahead in the FIFO, the mtime will not have been updated and the
422 : * reconstructed order will be wrong, but will always be a permutation of
423 : * the original.
424 : */
425 : STATIC void
426 1 : dump_desc_populate_fifo_from_directory(const char *dirname)
427 : {
428 1 : smartlist_t *files = NULL;
429 1 : dumped_desc_t *ent = NULL;
430 :
431 1 : tor_assert(dirname != NULL);
432 :
433 : /* Get a list of files */
434 1 : files = tor_listdir(dirname);
435 1 : if (!files) {
436 0 : log_notice(LD_DIR,
437 : "Unable to get contents of unparseable descriptor dump "
438 : "directory %s",
439 : dirname);
440 0 : return;
441 : }
442 :
443 : /*
444 : * Iterate through the list and decide which files should go in the
445 : * FIFO and which should be purged.
446 : */
447 :
448 4 : SMARTLIST_FOREACH_BEGIN(files, char *, f) {
449 : /* Try to get a FIFO entry */
450 3 : ent = dump_desc_populate_one_file(dirname, f);
451 3 : if (ent) {
452 : /*
453 : * We got one; add it to the FIFO. No need for duplicate checking
454 : * here since we just verified the name and digest match.
455 : */
456 :
457 : /* Make sure we have a list to add it to */
458 3 : if (!descs_dumped) {
459 1 : descs_dumped = smartlist_new();
460 1 : len_descs_dumped = 0;
461 : }
462 :
463 : /* Add it and adjust the counter */
464 3 : smartlist_add(descs_dumped, ent);
465 3 : len_descs_dumped += ent->len;
466 : }
467 : /*
468 : * If we didn't, we will have unlinked the file if necessary and
469 : * possible, and emitted a log message about it, so just go on to
470 : * the next.
471 : */
472 3 : } SMARTLIST_FOREACH_END(f);
473 :
474 : /* Did we get anything? */
475 1 : if (descs_dumped != NULL) {
476 : /* Sort the FIFO in order of increasing timestamp */
477 1 : smartlist_sort(descs_dumped, dump_desc_compare_fifo_entries);
478 :
479 : /* Log some stats */
480 1 : log_info(LD_DIR,
481 : "Reloaded unparseable descriptor dump FIFO with %d dump(s) "
482 : "totaling %"PRIu64 " bytes",
483 : smartlist_len(descs_dumped), (len_descs_dumped));
484 : }
485 :
486 : /* Free the original list */
487 4 : SMARTLIST_FOREACH(files, char *, f, tor_free(f));
488 1 : smartlist_free(files);
489 : }
490 :
491 : /** For debugging purposes, dump unparseable descriptor *<b>desc</b> of
492 : * type *<b>type</b> to file $DATADIR/unparseable-desc. Do not write more
493 : * than one descriptor to disk per minute. If there is already such a
494 : * file in the data directory, overwrite it. */
495 82 : MOCK_IMPL(void,
496 : dump_desc,(const char *desc, const char *type))
497 : {
498 82 : tor_assert(desc);
499 82 : tor_assert(type);
500 : #ifndef TOR_UNIT_TESTS
501 : /* For now, we are disabling this function, since it can be called with
502 : * strings that are far too long. We can turn it back on if we fix it
503 : * someday, but we'd need to give it a length argument. A likelier
504 : * resolution here is simply to remove this module entirely. See tor#40286
505 : * for background. */
506 : if (1)
507 : return;
508 : #endif
509 82 : size_t len;
510 : /* The SHA256 of the string */
511 82 : uint8_t digest_sha256[DIGEST256_LEN];
512 82 : char digest_sha256_hex[HEX_DIGEST256_LEN+1];
513 : /* Filename to log it to */
514 82 : char *debugfile, *debugfile_base;
515 :
516 : /* Get the hash for logging purposes anyway */
517 82 : len = strlen(desc);
518 82 : if (crypto_digest256((char *)digest_sha256, desc, len,
519 : DIGEST_SHA256) < 0) {
520 0 : log_info(LD_DIR,
521 : "Unable to parse descriptor of type %s, and unable to even hash"
522 : " it!", type);
523 0 : goto err;
524 : }
525 :
526 82 : base16_encode(digest_sha256_hex, sizeof(digest_sha256_hex),
527 : (const char *)digest_sha256, sizeof(digest_sha256));
528 :
529 : /*
530 : * We mention type and hash in the main log; don't clutter up the files
531 : * with anything but the exact dump.
532 : */
533 82 : tor_asprintf(&debugfile_base,
534 : DESC_DUMP_BASE_FILENAME ".%s", digest_sha256_hex);
535 82 : debugfile = get_datadir_fname2(DESC_DUMP_DATADIR_SUBDIR, debugfile_base);
536 :
537 : /*
538 : * Check if the sandbox is active or will become active; see comment
539 : * below at the log message for why.
540 : */
541 82 : if (!(sandbox_is_active() || get_options()->Sandbox)) {
542 82 : if (len <= get_options()->MaxUnparseableDescSizeToLog) {
543 82 : if (!dump_desc_fifo_bump_hash(digest_sha256)) {
544 : /* Create the directory if needed */
545 75 : dump_desc_create_dir();
546 : /* Make sure we've got it */
547 75 : if (have_dump_desc_dir && !problem_with_dump_desc_dir) {
548 : /* Write it, and tell the main log about it */
549 75 : write_str_to_file(debugfile, desc, 1);
550 75 : log_info(LD_DIR,
551 : "Unable to parse descriptor of type %s with hash %s and "
552 : "length %lu. See file %s in data directory for details.",
553 : type, digest_sha256_hex, (unsigned long)len,
554 : debugfile_base);
555 75 : dump_desc_fifo_add_and_clean(debugfile, digest_sha256, len);
556 : /* Since we handed ownership over, don't free debugfile later */
557 75 : debugfile = NULL;
558 : } else {
559 : /* Problem with the subdirectory */
560 0 : log_info(LD_DIR,
561 : "Unable to parse descriptor of type %s with hash %s and "
562 : "length %lu. Descriptor not dumped because we had a "
563 : "problem creating the " DESC_DUMP_DATADIR_SUBDIR
564 : " subdirectory",
565 : type, digest_sha256_hex, (unsigned long)len);
566 : /* We do have to free debugfile in this case */
567 : }
568 : } else {
569 : /* We already had one with this hash dumped */
570 7 : log_info(LD_DIR,
571 : "Unable to parse descriptor of type %s with hash %s and "
572 : "length %lu. Descriptor not dumped because one with that "
573 : "hash has already been dumped.",
574 : type, digest_sha256_hex, (unsigned long)len);
575 : /* We do have to free debugfile in this case */
576 : }
577 : } else {
578 : /* Just log that it happened without dumping */
579 0 : log_info(LD_DIR,
580 : "Unable to parse descriptor of type %s with hash %s and "
581 : "length %lu. Descriptor not dumped because it exceeds maximum"
582 : " log size all by itself.",
583 : type, digest_sha256_hex, (unsigned long)len);
584 : /* We do have to free debugfile in this case */
585 : }
586 : } else {
587 : /*
588 : * Not logging because the sandbox is active and seccomp2 apparently
589 : * doesn't have a sensible way to allow filenames according to a pattern
590 : * match. (If we ever figure out how to say "allow writes to /regex/",
591 : * remove this checK).
592 : */
593 0 : log_info(LD_DIR,
594 : "Unable to parse descriptor of type %s with hash %s and "
595 : "length %lu. Descriptor not dumped because the sandbox is "
596 : "configured",
597 : type, digest_sha256_hex, (unsigned long)len);
598 : }
599 :
600 82 : tor_free(debugfile_base);
601 82 : tor_free(debugfile);
602 :
603 82 : err:
604 82 : return;
605 : }
|