Tor  0.4.7.0-alpha-dev
unparseable.c
Go to the documentation of this file.
1 /* Copyright (c) 2001 Matej Pfajfar.
2  * Copyright (c) 2001-2004, Roger Dingledine.
3  * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
4  * Copyright (c) 2007-2021, The Tor Project, Inc. */
5 /* See LICENSE for licensing information */
6 
7 /**
8  * @file unparseable.c
9  * @brief Dump unparseable objects to disk.
10  **/
11 
12 #define UNPARSEABLE_PRIVATE
13 
14 #include "core/or/or.h"
15 #include "app/config/config.h"
17 #include "lib/sandbox/sandbox.h"
18 
19 #ifdef HAVE_SYS_STAT_H
20 #include <sys/stat.h>
21 #endif
22 
23 /* Dump mechanism for unparseable descriptors */
24 
25 /** List of dumped descriptors for FIFO cleanup purposes */
27 /** Total size of dumped descriptors for FIFO cleanup */
28 STATIC uint64_t len_descs_dumped = 0;
29 /** Directory to stash dumps in */
30 static int have_dump_desc_dir = 0;
31 static int problem_with_dump_desc_dir = 0;
32 
33 #define DESC_DUMP_DATADIR_SUBDIR "unparseable-descs"
34 #define DESC_DUMP_BASE_FILENAME "unparseable-desc"
35 
36 /** Find the dump directory and check if we'll be able to create it */
37 void
39 {
40  char *dump_desc_dir;
41 
42  dump_desc_dir = get_datadir_fname(DESC_DUMP_DATADIR_SUBDIR);
43 
44  /*
45  * We just check for it, don't create it at this point; we'll
46  * create it when we need it if it isn't already there.
47  */
48  if (check_private_dir(dump_desc_dir, CPD_CHECK, get_options()->User) < 0) {
49  /* Error, log and flag it as having a problem */
50  log_notice(LD_DIR,
51  "Doesn't look like we'll be able to create descriptor dump "
52  "directory %s; dumps will be disabled.",
53  dump_desc_dir);
54  problem_with_dump_desc_dir = 1;
55  tor_free(dump_desc_dir);
56  return;
57  }
58 
59  /* Check if it exists */
60  switch (file_status(dump_desc_dir)) {
61  case FN_DIR:
62  /* We already have a directory */
64  break;
65  case FN_NOENT:
66  /* Nothing, we'll need to create it later */
68  break;
69  case FN_ERROR:
70  /* Log and flag having a problem */
71  log_notice(LD_DIR,
72  "Couldn't check whether descriptor dump directory %s already"
73  " exists: %s",
74  dump_desc_dir, strerror(errno));
75  problem_with_dump_desc_dir = 1;
76  break;
77  case FN_FILE:
78  case FN_EMPTY:
79  default:
80  /* Something else was here! */
81  log_notice(LD_DIR,
82  "Descriptor dump directory %s already exists and isn't a "
83  "directory",
84  dump_desc_dir);
85  problem_with_dump_desc_dir = 1;
86  }
87 
88  if (have_dump_desc_dir && !problem_with_dump_desc_dir) {
90  }
91 
92  tor_free(dump_desc_dir);
93 }
94 
95 /** Create the dump directory if needed and possible */
96 static void
98 {
99  char *dump_desc_dir;
100 
101  /* If the problem flag is set, skip it */
102  if (problem_with_dump_desc_dir) return;
103 
104  /* Do we need it? */
105  if (!have_dump_desc_dir) {
106  dump_desc_dir = get_datadir_fname(DESC_DUMP_DATADIR_SUBDIR);
107 
108  if (check_private_dir(dump_desc_dir, CPD_CREATE,
109  get_options()->User) < 0) {
110  log_notice(LD_DIR,
111  "Failed to create descriptor dump directory %s",
112  dump_desc_dir);
113  problem_with_dump_desc_dir = 1;
114  }
115 
116  /* Okay, we created it */
117  have_dump_desc_dir = 1;
118 
119  tor_free(dump_desc_dir);
120  }
121 }
122 
123 /** Dump desc FIFO/cleanup; take ownership of the given filename, add it to
124  * the FIFO, and clean up the oldest entries to the extent they exceed the
125  * configured cap. If any old entries with a matching hash existed, they
126  * just got overwritten right before this was called and we should adjust
127  * the total size counter without deleting them.
128  */
129 static void
130 dump_desc_fifo_add_and_clean(char *filename, const uint8_t *digest_sha256,
131  size_t len)
132 {
133  dumped_desc_t *ent = NULL, *tmp;
134  uint64_t max_len;
135 
136  tor_assert(filename != NULL);
137  tor_assert(digest_sha256 != NULL);
138 
139  if (descs_dumped == NULL) {
140  /* We better have no length, then */
142  /* Make a smartlist */
144  }
145 
146  /* Make a new entry to put this one in */
147  ent = tor_malloc_zero(sizeof(*ent));
148  ent->filename = filename;
149  ent->len = len;
150  ent->when = time(NULL);
151  memcpy(ent->digest_sha256, digest_sha256, DIGEST256_LEN);
152 
153  /* Do we need to do some cleanup? */
155  /* Iterate over the list until we've freed enough space */
156  while (len > max_len - len_descs_dumped &&
157  smartlist_len(descs_dumped) > 0) {
158  /* Get the oldest thing on the list */
159  tmp = (dumped_desc_t *)(smartlist_get(descs_dumped, 0));
160 
161  /*
162  * Check if it matches the filename we just added, so we don't delete
163  * something we just emitted if we get repeated identical descriptors.
164  */
165  if (strcmp(tmp->filename, filename) != 0) {
166  /* Delete it and adjust the length counter */
167  tor_unlink(tmp->filename);
168  tor_assert(len_descs_dumped >= tmp->len);
169  len_descs_dumped -= tmp->len;
170  log_info(LD_DIR,
171  "Deleting old unparseable descriptor dump %s due to "
172  "space limits",
173  tmp->filename);
174  } else {
175  /*
176  * Don't delete, but do adjust the counter since we will bump it
177  * later
178  */
179  tor_assert(len_descs_dumped >= tmp->len);
180  len_descs_dumped -= tmp->len;
181  log_info(LD_DIR,
182  "Replacing old descriptor dump %s with new identical one",
183  tmp->filename);
184  }
185 
186  /* Free it and remove it from the list */
188  tor_free(tmp->filename);
189  tor_free(tmp);
190  }
191 
192  /* Append our entry to the end of the list and bump the counter */
194  len_descs_dumped += len;
195 }
196 
197 /** Check if we already have a descriptor for this hash and move it to the
198  * head of the queue if so. Return 1 if one existed and 0 otherwise.
199  */
200 static int
201 dump_desc_fifo_bump_hash(const uint8_t *digest_sha256)
202 {
203  dumped_desc_t *match = NULL;
204 
205  tor_assert(digest_sha256);
206 
207  if (descs_dumped) {
208  /* Find a match if one exists */
209  SMARTLIST_FOREACH_BEGIN(descs_dumped, dumped_desc_t *, ent) {
210  if (ent &&
211  tor_memeq(ent->digest_sha256, digest_sha256, DIGEST256_LEN)) {
212  /*
213  * Save a pointer to the match and remove it from its current
214  * position.
215  */
216  match = ent;
218  break;
219  }
220  } SMARTLIST_FOREACH_END(ent);
221 
222  if (match) {
223  /* Update the timestamp */
224  match->when = time(NULL);
225  /* Add it back at the end of the list */
226  smartlist_add(descs_dumped, match);
227 
228  /* Indicate we found one */
229  return 1;
230  }
231  }
232 
233  return 0;
234 }
235 
236 /** Clean up on exit; just memory, leave the dumps behind
237  */
238 void
240 {
241  if (descs_dumped) {
242  /* Free each descriptor */
243  SMARTLIST_FOREACH_BEGIN(descs_dumped, dumped_desc_t *, ent) {
244  tor_assert(ent);
245  tor_free(ent->filename);
246  tor_free(ent);
247  } SMARTLIST_FOREACH_END(ent);
248  /* Free the list */
249  smartlist_free(descs_dumped);
250  descs_dumped = NULL;
251  len_descs_dumped = 0;
252  }
253 }
254 
255 /** Handle one file for dump_desc_populate_fifo_from_directory(); make sure
256  * the filename is sensibly formed and matches the file content, and either
257  * return a dumped_desc_t for it or remove the file and return NULL.
258  */
259 MOCK_IMPL(STATIC dumped_desc_t *,
260 dump_desc_populate_one_file, (const char *dirname, const char *f))
261 {
262  dumped_desc_t *ent = NULL;
263  char *path = NULL, *desc = NULL;
264  const char *digest_str;
265  char digest[DIGEST256_LEN], content_digest[DIGEST256_LEN];
266  /* Expected prefix before digest in filenames */
267  const char *f_pfx = DESC_DUMP_BASE_FILENAME ".";
268  /*
269  * Stat while reading; this is important in case the file
270  * contains a NUL character.
271  */
272  struct stat st;
273 
274  /* Sanity-check args */
275  tor_assert(dirname != NULL);
276  tor_assert(f != NULL);
277 
278  /* Form the full path */
279  tor_asprintf(&path, "%s" PATH_SEPARATOR "%s", dirname, f);
280 
281  /* Check that f has the form DESC_DUMP_BASE_FILENAME.<digest256> */
282 
283  if (!strcmpstart(f, f_pfx)) {
284  /* It matches the form, but is the digest parseable as such? */
285  digest_str = f + strlen(f_pfx);
286  if (base16_decode(digest, DIGEST256_LEN,
287  digest_str, strlen(digest_str)) != DIGEST256_LEN) {
288  /* We failed to decode it */
289  digest_str = NULL;
290  }
291  } else {
292  /* No match */
293  digest_str = NULL;
294  }
295 
296  if (!digest_str) {
297  /* We couldn't get a sensible digest */
298  log_notice(LD_DIR,
299  "Removing unrecognized filename %s from unparseable "
300  "descriptors directory", f);
301  tor_unlink(path);
302  /* We're done */
303  goto done;
304  }
305 
306  /*
307  * The filename has the form DESC_DUMP_BASE_FILENAME "." <digest256> and
308  * we've decoded the digest. Next, check that we can read it and the
309  * content matches this digest. We are relying on the fact that if the
310  * file contains a '\0', read_file_to_str() will allocate space for and
311  * read the entire file and return the correct size in st.
312  */
313  desc = read_file_to_str(path, RFTS_IGNORE_MISSING|RFTS_BIN, &st);
314  if (!desc) {
315  /* We couldn't read it */
316  log_notice(LD_DIR,
317  "Failed to read %s from unparseable descriptors directory; "
318  "attempting to remove it.", f);
319  tor_unlink(path);
320  /* We're done */
321  goto done;
322  }
323 
324 #if SIZE_MAX > UINT64_MAX
325  if (BUG((uint64_t)st.st_size > (uint64_t)SIZE_MAX)) {
326  /* LCOV_EXCL_START
327  * Should be impossible since RFTS above should have failed to read the
328  * huge file into RAM. */
329  goto done;
330  /* LCOV_EXCL_STOP */
331  }
332 #endif /* SIZE_MAX > UINT64_MAX */
333  if (BUG(st.st_size < 0)) {
334  /* LCOV_EXCL_START
335  * Should be impossible, since the OS isn't supposed to be b0rken. */
336  goto done;
337  /* LCOV_EXCL_STOP */
338  }
339  /* (Now we can be sure that st.st_size is safe to cast to a size_t.) */
340 
341  /*
342  * We got one; now compute its digest and check that it matches the
343  * filename.
344  */
345  if (crypto_digest256((char *)content_digest, desc, (size_t) st.st_size,
346  DIGEST_SHA256) < 0) {
347  /* Weird, but okay */
348  log_info(LD_DIR,
349  "Unable to hash content of %s from unparseable descriptors "
350  "directory", f);
351  tor_unlink(path);
352  /* We're done */
353  goto done;
354  }
355 
356  /* Compare the digests */
357  if (tor_memneq(digest, content_digest, DIGEST256_LEN)) {
358  /* No match */
359  log_info(LD_DIR,
360  "Hash of %s from unparseable descriptors directory didn't "
361  "match its filename; removing it", f);
362  tor_unlink(path);
363  /* We're done */
364  goto done;
365  }
366 
367  /* Okay, it's a match, we should prepare ent */
368  ent = tor_malloc_zero(sizeof(dumped_desc_t));
369  ent->filename = path;
370  memcpy(ent->digest_sha256, digest, DIGEST256_LEN);
371  ent->len = (size_t) st.st_size;
372  ent->when = st.st_mtime;
373  /* Null out path so we don't free it out from under ent */
374  path = NULL;
375 
376  done:
377  /* Free allocations if we had them */
378  tor_free(desc);
379  tor_free(path);
380 
381  return ent;
382 }
383 
384 /** Sort helper for dump_desc_populate_fifo_from_directory(); compares
385  * the when field of dumped_desc_ts in a smartlist to put the FIFO in
386  * the correct order after reconstructing it from the directory.
387  */
388 static int
389 dump_desc_compare_fifo_entries(const void **a_v, const void **b_v)
390 {
391  const dumped_desc_t **a = (const dumped_desc_t **)a_v;
392  const dumped_desc_t **b = (const dumped_desc_t **)b_v;
393 
394  if ((a != NULL) && (*a != NULL)) {
395  if ((b != NULL) && (*b != NULL)) {
396  /* We have sensible dumped_desc_ts to compare */
397  if ((*a)->when < (*b)->when) {
398  return -1;
399  } else if ((*a)->when == (*b)->when) {
400  return 0;
401  } else {
402  return 1;
403  }
404  } else {
405  /*
406  * We shouldn't see this, but what the hell, NULLs precede everythin
407  * else
408  */
409  return 1;
410  }
411  } else {
412  return -1;
413  }
414 }
415 
416 /** Scan the contents of the directory, and update FIFO/counters; this will
417  * consistency-check descriptor dump filenames against hashes of descriptor
418  * dump file content, and remove any inconsistent/unreadable dumps, and then
419  * reconstruct the dump FIFO as closely as possible for the last time the
420  * tor process shut down. If a previous dump was repeated more than once and
421  * moved ahead in the FIFO, the mtime will not have been updated and the
422  * reconstructed order will be wrong, but will always be a permutation of
423  * the original.
424  */
425 STATIC void
427 {
428  smartlist_t *files = NULL;
429  dumped_desc_t *ent = NULL;
430 
431  tor_assert(dirname != NULL);
432 
433  /* Get a list of files */
434  files = tor_listdir(dirname);
435  if (!files) {
436  log_notice(LD_DIR,
437  "Unable to get contents of unparseable descriptor dump "
438  "directory %s",
439  dirname);
440  return;
441  }
442 
443  /*
444  * Iterate through the list and decide which files should go in the
445  * FIFO and which should be purged.
446  */
447 
448  SMARTLIST_FOREACH_BEGIN(files, char *, f) {
449  /* Try to get a FIFO entry */
450  ent = dump_desc_populate_one_file(dirname, f);
451  if (ent) {
452  /*
453  * We got one; add it to the FIFO. No need for duplicate checking
454  * here since we just verified the name and digest match.
455  */
456 
457  /* Make sure we have a list to add it to */
458  if (!descs_dumped) {
460  len_descs_dumped = 0;
461  }
462 
463  /* Add it and adjust the counter */
465  len_descs_dumped += ent->len;
466  }
467  /*
468  * If we didn't, we will have unlinked the file if necessary and
469  * possible, and emitted a log message about it, so just go on to
470  * the next.
471  */
472  } SMARTLIST_FOREACH_END(f);
473 
474  /* Did we get anything? */
475  if (descs_dumped != NULL) {
476  /* Sort the FIFO in order of increasing timestamp */
478 
479  /* Log some stats */
480  log_info(LD_DIR,
481  "Reloaded unparseable descriptor dump FIFO with %d dump(s) "
482  "totaling %"PRIu64 " bytes",
483  smartlist_len(descs_dumped), (len_descs_dumped));
484  }
485 
486  /* Free the original list */
487  SMARTLIST_FOREACH(files, char *, f, tor_free(f));
488  smartlist_free(files);
489 }
490 
491 /** For debugging purposes, dump unparseable descriptor *<b>desc</b> of
492  * type *<b>type</b> to file $DATADIR/unparseable-desc. Do not write more
493  * than one descriptor to disk per minute. If there is already such a
494  * file in the data directory, overwrite it. */
495 MOCK_IMPL(void,
496 dump_desc,(const char *desc, const char *type))
497 {
498  tor_assert(desc);
499  tor_assert(type);
500 #ifndef TOR_UNIT_TESTS
501  /* For now, we are disabling this function, since it can be called with
502  * strings that are far too long. We can turn it back on if we fix it
503  * someday, but we'd need to give it a length argument. A likelier
504  * resolution here is simply to remove this module entirely. See tor#40286
505  * for background. */
506  if (1)
507  return;
508 #endif
509  size_t len;
510  /* The SHA256 of the string */
511  uint8_t digest_sha256[DIGEST256_LEN];
512  char digest_sha256_hex[HEX_DIGEST256_LEN+1];
513  /* Filename to log it to */
514  char *debugfile, *debugfile_base;
515 
516  /* Get the hash for logging purposes anyway */
517  len = strlen(desc);
518  if (crypto_digest256((char *)digest_sha256, desc, len,
519  DIGEST_SHA256) < 0) {
520  log_info(LD_DIR,
521  "Unable to parse descriptor of type %s, and unable to even hash"
522  " it!", type);
523  goto err;
524  }
525 
526  base16_encode(digest_sha256_hex, sizeof(digest_sha256_hex),
527  (const char *)digest_sha256, sizeof(digest_sha256));
528 
529  /*
530  * We mention type and hash in the main log; don't clutter up the files
531  * with anything but the exact dump.
532  */
533  tor_asprintf(&debugfile_base,
534  DESC_DUMP_BASE_FILENAME ".%s", digest_sha256_hex);
535  debugfile = get_datadir_fname2(DESC_DUMP_DATADIR_SUBDIR, debugfile_base);
536 
537  /*
538  * Check if the sandbox is active or will become active; see comment
539  * below at the log message for why.
540  */
541  if (!(sandbox_is_active() || get_options()->Sandbox)) {
542  if (len <= get_options()->MaxUnparseableDescSizeToLog) {
543  if (!dump_desc_fifo_bump_hash(digest_sha256)) {
544  /* Create the directory if needed */
546  /* Make sure we've got it */
547  if (have_dump_desc_dir && !problem_with_dump_desc_dir) {
548  /* Write it, and tell the main log about it */
549  write_str_to_file(debugfile, desc, 1);
550  log_info(LD_DIR,
551  "Unable to parse descriptor of type %s with hash %s and "
552  "length %lu. See file %s in data directory for details.",
553  type, digest_sha256_hex, (unsigned long)len,
554  debugfile_base);
555  dump_desc_fifo_add_and_clean(debugfile, digest_sha256, len);
556  /* Since we handed ownership over, don't free debugfile later */
557  debugfile = NULL;
558  } else {
559  /* Problem with the subdirectory */
560  log_info(LD_DIR,
561  "Unable to parse descriptor of type %s with hash %s and "
562  "length %lu. Descriptor not dumped because we had a "
563  "problem creating the " DESC_DUMP_DATADIR_SUBDIR
564  " subdirectory",
565  type, digest_sha256_hex, (unsigned long)len);
566  /* We do have to free debugfile in this case */
567  }
568  } else {
569  /* We already had one with this hash dumped */
570  log_info(LD_DIR,
571  "Unable to parse descriptor of type %s with hash %s and "
572  "length %lu. Descriptor not dumped because one with that "
573  "hash has already been dumped.",
574  type, digest_sha256_hex, (unsigned long)len);
575  /* We do have to free debugfile in this case */
576  }
577  } else {
578  /* Just log that it happened without dumping */
579  log_info(LD_DIR,
580  "Unable to parse descriptor of type %s with hash %s and "
581  "length %lu. Descriptor not dumped because it exceeds maximum"
582  " log size all by itself.",
583  type, digest_sha256_hex, (unsigned long)len);
584  /* We do have to free debugfile in this case */
585  }
586  } else {
587  /*
588  * Not logging because the sandbox is active and seccomp2 apparently
589  * doesn't have a sensible way to allow filenames according to a pattern
590  * match. (If we ever figure out how to say "allow writes to /regex/",
591  * remove this checK).
592  */
593  log_info(LD_DIR,
594  "Unable to parse descriptor of type %s with hash %s and "
595  "length %lu. Descriptor not dumped because the sandbox is "
596  "configured",
597  type, digest_sha256_hex, (unsigned long)len);
598  }
599 
600  tor_free(debugfile_base);
601  tor_free(debugfile);
602 
603  err:
604  return;
605 }
int base16_decode(char *dest, size_t destlen, const char *src, size_t srclen)
Definition: binascii.c:506
void base16_encode(char *dest, size_t destlen, const char *src, size_t srclen)
Definition: binascii.c:478
const or_options_t * get_options(void)
Definition: config.c:919
Header file for config.c.
#define HEX_DIGEST256_LEN
Definition: crypto_digest.h:37
int crypto_digest256(char *digest, const char *m, size_t len, digest_algorithm_t algorithm)
int tor_memeq(const void *a, const void *b, size_t sz)
Definition: di_ops.c:107
#define tor_memneq(a, b, sz)
Definition: di_ops.h:21
#define DIGEST256_LEN
Definition: digest_sizes.h:23
smartlist_t * tor_listdir(const char *dirname)
Definition: dir.c:307
int check_private_dir(const char *dirname, cpd_check_t check, const char *effective_user)
Definition: dir.c:71
int write_str_to_file(const char *fname, const char *str, int bin)
Definition: files.c:274
int tor_unlink(const char *pathname)
Definition: files.c:154
#define RFTS_IGNORE_MISSING
Definition: files.h:101
file_status_t file_status(const char *filename)
Definition: files.c:212
#define RFTS_BIN
Definition: files.h:99
#define LD_DIR
Definition: log.h:88
#define tor_free(p)
Definition: malloc.h:52
Master header file for Tor-specific functionality.
int tor_asprintf(char **strp, const char *fmt,...)
Definition: printf.c:75
int sandbox_is_active(void)
Definition: sandbox.c:1953
Header file for sandbox.c.
void smartlist_sort(smartlist_t *sl, int(*compare)(const void **a, const void **b))
Definition: smartlist.c:334
smartlist_t * smartlist_new(void)
void smartlist_add(smartlist_t *sl, void *element)
void smartlist_del_keeporder(smartlist_t *sl, int idx)
#define SMARTLIST_FOREACH_BEGIN(sl, type, var)
#define SMARTLIST_FOREACH(sl, type, var, cmd)
#define SMARTLIST_DEL_CURRENT_KEEPORDER(sl, var)
uint64_t MaxUnparseableDescSizeToLog
#define STATIC
Definition: testsupport.h:32
#define MOCK_IMPL(rv, funcname, arglist)
Definition: testsupport.h:133
STATIC void dump_desc_populate_fifo_from_directory(const char *dirname)
Definition: unparseable.c:426
static void dump_desc_fifo_add_and_clean(char *filename, const uint8_t *digest_sha256, size_t len)
Definition: unparseable.c:130
STATIC smartlist_t * descs_dumped
Definition: unparseable.c:26
STATIC dumped_desc_t * dump_desc_populate_one_file(const char *dirname, const char *f)
Definition: unparseable.c:260
void dump_desc_fifo_cleanup(void)
Definition: unparseable.c:239
static void dump_desc_create_dir(void)
Definition: unparseable.c:97
static int have_dump_desc_dir
Definition: unparseable.c:30
void dump_desc(const char *desc, const char *type)
Definition: unparseable.c:496
static int dump_desc_compare_fifo_entries(const void **a_v, const void **b_v)
Definition: unparseable.c:389
STATIC uint64_t len_descs_dumped
Definition: unparseable.c:28
static int dump_desc_fifo_bump_hash(const uint8_t *digest_sha256)
Definition: unparseable.c:201
void dump_desc_init(void)
Definition: unparseable.c:38
Header file for unparseable.c.
#define tor_assert(expr)
Definition: util_bug.h:102
int strcmpstart(const char *s1, const char *s2)
Definition: util_string.c:215