Line data Source code
1 : /* Copyright (c) 2017-2021, The Tor Project, Inc. */
2 : /* See LICENSE for licensing information */
3 :
4 : /**
5 : * \file storagedir.c
6 : *
7 : * \brief An abstraction for a directory full of similar files.
8 : *
9 : * Storagedirs are used by our consensus cache code, and may someday also get
10 : * used for unparseable objects. A large part of the need for this type is to
11 : * work around the limitations in our sandbox code, where all filenames need
12 : * to be registered in advance.
13 : **/
14 :
15 : #include "lib/fs/storagedir.h"
16 :
17 : #include "lib/container/smartlist.h"
18 : #include "lib/encoding/confline.h"
19 : #include "lib/fs/dir.h"
20 : #include "lib/fs/files.h"
21 : #include "lib/fs/mmap.h"
22 : #include "lib/log/escape.h"
23 : #include "lib/log/log.h"
24 : #include "lib/log/util_bug.h"
25 : #include "lib/malloc/malloc.h"
26 : #include "lib/memarea/memarea.h"
27 : #include "lib/sandbox/sandbox.h"
28 : #include "lib/string/printf.h"
29 : #include "lib/string/util_string.h"
30 :
31 : #ifdef HAVE_SYS_TYPES_H
32 : #include <sys/types.h>
33 : #endif
34 : #ifdef HAVE_SYS_STAT_H
35 : #include <sys/stat.h>
36 : #endif
37 : #ifdef HAVE_UNISTD_H
38 : #include <unistd.h>
39 : #endif
40 : #include <stdlib.h>
41 : #include <errno.h>
42 : #include <string.h>
43 :
44 : #define FNAME_MIN_NUM 1000
45 :
46 : /** A storage_dir_t represents a directory full of similar cached
47 : * files. Filenames are decimal integers. Files can be cleaned as needed
48 : * to limit total disk usage. */
49 : struct storage_dir_t {
50 : /** Directory holding the files for this storagedir. */
51 : char *directory;
52 : /** Either NULL, or a directory listing of the directory (as a smartlist
53 : * of strings */
54 : smartlist_t *contents;
55 : /** The largest number of non-temporary files we'll place in the
56 : * directory. */
57 : int max_files;
58 : /** If true, then 'usage' has been computed. */
59 : int usage_known;
60 : /** The total number of bytes used in this directory */
61 : uint64_t usage;
62 : };
63 :
64 : /** Create or open a new storage directory at <b>dirname</b>, with
65 : * capacity for up to <b>max_files</b> files.
66 : */
67 : storage_dir_t *
68 37 : storage_dir_new(const char *dirname, int max_files)
69 : {
70 37 : if (check_private_dir(dirname, CPD_CREATE, NULL) < 0)
71 : return NULL;
72 :
73 36 : storage_dir_t *d = tor_malloc_zero(sizeof(storage_dir_t));
74 36 : d->directory = tor_strdup(dirname);
75 36 : d->max_files = max_files;
76 36 : return d;
77 : }
78 :
79 : /**
80 : * Drop all in-RAM storage for <b>d</b>. Does not delete any files.
81 : */
82 : void
83 32 : storage_dir_free_(storage_dir_t *d)
84 : {
85 32 : if (d == NULL)
86 : return;
87 32 : tor_free(d->directory);
88 32 : if (d->contents) {
89 234 : SMARTLIST_FOREACH(d->contents, char *, cp, tor_free(cp));
90 31 : smartlist_free(d->contents);
91 : }
92 32 : tor_free(d);
93 : }
94 :
95 : /**
96 : * Tell the sandbox (if any) configured by <b>cfg</b> to allow the
97 : * operations that <b>d</b> will need.
98 : *
99 : * The presence of this function is why we need an upper limit on the
100 : * number of files in a storage_dir_t: we need to approve file operations
101 : * one by one.
102 : */
103 : int
104 0 : storage_dir_register_with_sandbox(storage_dir_t *d, sandbox_cfg_t **cfg)
105 : {
106 0 : int problems = 0;
107 0 : int idx;
108 0 : for (idx = FNAME_MIN_NUM; idx < FNAME_MIN_NUM + d->max_files; ++idx) {
109 0 : char *path = NULL, *tmppath = NULL;
110 0 : tor_asprintf(&path, "%s/%d", d->directory, idx);
111 0 : tor_asprintf(&tmppath, "%s/%d.tmp", d->directory, idx);
112 :
113 0 : problems += sandbox_cfg_allow_open_filename(cfg, tor_strdup(path));
114 0 : problems += sandbox_cfg_allow_open_filename(cfg, tor_strdup(tmppath));
115 0 : problems += sandbox_cfg_allow_stat_filename(cfg, tor_strdup(path));
116 0 : problems += sandbox_cfg_allow_stat_filename(cfg, tor_strdup(tmppath));
117 0 : problems += sandbox_cfg_allow_rename(cfg,
118 : tor_strdup(tmppath), tor_strdup(path));
119 :
120 0 : tor_free(path);
121 0 : tor_free(tmppath);
122 : }
123 :
124 0 : return problems ? -1 : 0;
125 : }
126 :
127 : /**
128 : * Remove all files in <b>d</b> whose names end with ".tmp".
129 : *
130 : * Requires that the contents field of <b>d</b> is set.
131 : */
132 : static void
133 41 : storage_dir_clean_tmpfiles(storage_dir_t *d)
134 : {
135 41 : if (!d->contents)
136 : return;
137 140 : SMARTLIST_FOREACH_BEGIN(d->contents, char *, fname) {
138 99 : if (strcmpend(fname, ".tmp"))
139 98 : continue;
140 1 : char *path = NULL;
141 1 : tor_asprintf(&path, "%s/%s", d->directory, fname);
142 1 : if (unlink(sandbox_intern_string(path))) {
143 0 : log_warn(LD_FS, "Unable to unlink %s while cleaning "
144 : "temporary files: %s", escaped(path), strerror(errno));
145 0 : tor_free(path);
146 0 : continue;
147 : }
148 1 : tor_free(path);
149 1 : SMARTLIST_DEL_CURRENT(d->contents, fname);
150 1 : tor_free(fname);
151 99 : } SMARTLIST_FOREACH_END(fname);
152 :
153 41 : d->usage_known = 0;
154 : }
155 :
156 : /**
157 : * Re-scan the directory <b>d</b> to learn its contents.
158 : */
159 : static int
160 41 : storage_dir_rescan(storage_dir_t *d)
161 : {
162 41 : if (d->contents) {
163 46 : SMARTLIST_FOREACH(d->contents, char *, cp, tor_free(cp));
164 6 : smartlist_free(d->contents);
165 : }
166 41 : d->usage = 0;
167 41 : d->usage_known = 0;
168 41 : if (NULL == (d->contents = tor_listdir(d->directory))) {
169 : return -1;
170 : }
171 41 : storage_dir_clean_tmpfiles(d);
172 41 : return 0;
173 : }
174 :
175 : /**
176 : * Return a smartlist containing the filenames within <b>d</b>.
177 : */
178 : const smartlist_t *
179 136 : storage_dir_list(storage_dir_t *d)
180 : {
181 136 : if (! d->contents)
182 31 : storage_dir_rescan(d);
183 136 : return d->contents;
184 : }
185 :
186 : /**
187 : * Return the total number of bytes used for storage in <b>d</b>.
188 : */
189 : uint64_t
190 19 : storage_dir_get_usage(storage_dir_t *d)
191 : {
192 19 : if (d->usage_known)
193 6 : return d->usage;
194 :
195 13 : uint64_t total = 0;
196 59 : SMARTLIST_FOREACH_BEGIN(storage_dir_list(d), const char *, cp) {
197 46 : char *path = NULL;
198 46 : struct stat st;
199 46 : tor_asprintf(&path, "%s/%s", d->directory, cp);
200 46 : if (stat(sandbox_intern_string(path), &st) == 0) {
201 46 : total += st.st_size;
202 : }
203 46 : tor_free(path);
204 46 : } SMARTLIST_FOREACH_END(cp);
205 :
206 13 : d->usage = total;
207 13 : d->usage_known = 1;
208 13 : return d->usage;
209 : }
210 :
211 : /** Mmap a specified file within <b>d</b>.
212 : *
213 : * On failure, return NULL and set errno as for tor_mmap_file(). */
214 : tor_mmap_t *
215 135 : storage_dir_map(storage_dir_t *d, const char *fname)
216 : {
217 135 : char *path = NULL;
218 135 : tor_asprintf(&path, "%s/%s", d->directory, fname);
219 135 : tor_mmap_t *result = tor_mmap_file(path);
220 135 : int errval = errno;
221 135 : tor_free(path);
222 135 : if (result == NULL)
223 0 : errno = errval;
224 135 : return result;
225 : }
226 :
227 : /** Read a file within <b>d</b> into a newly allocated buffer. Set
228 : * *<b>sz_out</b> to its size. */
229 : uint8_t *
230 3 : storage_dir_read(storage_dir_t *d, const char *fname, int bin, size_t *sz_out)
231 : {
232 3 : const int flags = bin ? RFTS_BIN : 0;
233 :
234 3 : char *path = NULL;
235 3 : tor_asprintf(&path, "%s/%s", d->directory, fname);
236 3 : struct stat st;
237 3 : char *contents = read_file_to_str(path, flags, &st);
238 3 : if (contents && sz_out) {
239 : // it fits in RAM, so we know its size is less than SIZE_MAX
240 : #if UINT64_MAX > SIZE_MAX
241 : tor_assert((uint64_t)st.st_size <= SIZE_MAX);
242 : #endif
243 2 : *sz_out = (size_t) st.st_size;
244 : }
245 :
246 3 : tor_free(path);
247 3 : return (uint8_t *) contents;
248 : }
249 :
250 : /** Helper: Find an unused filename within the directory */
251 : static char *
252 205 : find_unused_fname(storage_dir_t *d)
253 : {
254 205 : if (!d->contents) {
255 4 : if (storage_dir_rescan(d) < 0)
256 : return NULL;
257 : }
258 :
259 : char buf[16];
260 : int i;
261 : /* Yuck; this is quadratic. Fortunately, that shouldn't matter much,
262 : * since disk writes are more expensive by a lot. */
263 2059 : for (i = FNAME_MIN_NUM; i < FNAME_MIN_NUM + d->max_files; ++i) {
264 2057 : tor_snprintf(buf, sizeof(buf), "%d", i);
265 2057 : if (!smartlist_contains_string(d->contents, buf)) {
266 203 : return tor_strdup(buf);
267 : }
268 : }
269 : return NULL;
270 : }
271 :
272 : /** Helper: As storage_dir_save_bytes_to_file, but store a smartlist of
273 : * sized_chunk_t rather than a single byte array. */
274 : static int
275 205 : storage_dir_save_chunks_to_file(storage_dir_t *d,
276 : const smartlist_t *chunks,
277 : int binary,
278 : char **fname_out)
279 : {
280 205 : uint64_t total_length = 0;
281 205 : char *fname = find_unused_fname(d);
282 205 : if (!fname)
283 : return -1;
284 :
285 2010 : SMARTLIST_FOREACH(chunks, const sized_chunk_t *, ch,
286 : total_length += ch->len);
287 :
288 203 : char *path = NULL;
289 203 : tor_asprintf(&path, "%s/%s", d->directory, fname);
290 :
291 203 : int r = write_chunks_to_file(path, chunks, binary, 0);
292 203 : if (r == 0) {
293 203 : if (d->usage_known)
294 2 : d->usage += total_length;
295 203 : if (fname_out) {
296 200 : *fname_out = tor_strdup(fname);
297 : }
298 203 : if (d->contents)
299 203 : smartlist_add(d->contents, tor_strdup(fname));
300 : }
301 203 : tor_free(fname);
302 203 : tor_free(path);
303 203 : return r;
304 : }
305 :
306 : /** Try to write the <b>length</b> bytes at <b>data</b> into a new file
307 : * in <b>d</b>. On success, return 0 and set *<b>fname_out</b> to a
308 : * newly allocated string containing the filename. On failure, return
309 : * -1. */
310 : int
311 16 : storage_dir_save_bytes_to_file(storage_dir_t *d,
312 : const uint8_t *data,
313 : size_t length,
314 : int binary,
315 : char **fname_out)
316 : {
317 16 : smartlist_t *chunks = smartlist_new();
318 16 : sized_chunk_t chunk = { (const char *)data, length };
319 16 : smartlist_add(chunks, &chunk);
320 16 : int r = storage_dir_save_chunks_to_file(d, chunks, binary, fname_out);
321 16 : smartlist_free(chunks);
322 16 : return r;
323 : }
324 :
325 : /**
326 : * As storage_dir_save_bytes_to_file, but saves a NUL-terminated string
327 : * <b>str</b>.
328 : */
329 : int
330 14 : storage_dir_save_string_to_file(storage_dir_t *d,
331 : const char *str,
332 : int binary,
333 : char **fname_out)
334 : {
335 14 : return storage_dir_save_bytes_to_file(d,
336 : (const uint8_t*)str, strlen(str), binary, fname_out);
337 : }
338 :
339 : /**
340 : * As storage_dir_save_bytes_to_file, but associates the data with the
341 : * key-value pairs in <b>labels</b>. Files stored in this format can be
342 : * recovered with storage_dir_map_labeled() or storage_dir_read_labeled().
343 : */
344 : int
345 189 : storage_dir_save_labeled_to_file(storage_dir_t *d,
346 : const config_line_t *labels,
347 : const uint8_t *data,
348 : size_t length,
349 : char **fname_out)
350 : {
351 : /*
352 : * The storage format is to prefix the data with the key-value pairs in
353 : * <b>labels</b>, and a single NUL separator. But code outside this module
354 : * MUST NOT rely on that format.
355 : */
356 :
357 189 : smartlist_t *chunks = smartlist_new();
358 189 : memarea_t *area = memarea_new();
359 189 : const config_line_t *line;
360 1793 : for (line = labels; line; line = line->next) {
361 1415 : sized_chunk_t *sz = memarea_alloc(area, sizeof(sized_chunk_t));
362 1415 : sz->len = strlen(line->key) + 1 + strlen(line->value) + 1;
363 1415 : const size_t allocated = sz->len + 1;
364 1415 : char *bytes = memarea_alloc(area, allocated);
365 1415 : tor_snprintf(bytes, allocated, "%s %s\n", line->key, line->value);
366 1415 : sz->bytes = bytes;
367 1415 : smartlist_add(chunks, sz);
368 : }
369 :
370 189 : sized_chunk_t *nul = memarea_alloc(area, sizeof(sized_chunk_t));
371 189 : nul->len = 1;
372 189 : nul->bytes = "\0";
373 189 : smartlist_add(chunks, nul);
374 :
375 189 : sized_chunk_t *datachunk = memarea_alloc(area, sizeof(sized_chunk_t));
376 189 : datachunk->bytes = (const char *)data;
377 189 : datachunk->len = length;
378 189 : smartlist_add(chunks, datachunk);
379 :
380 189 : int r = storage_dir_save_chunks_to_file(d, chunks, 1, fname_out);
381 189 : smartlist_free(chunks);
382 189 : memarea_drop_all(area);
383 189 : return r;
384 : }
385 :
386 : /**
387 : * Map a file that was created with storage_dir_save_labeled_to_file(). On
388 : * failure, return NULL. On success, write a set of newly allocated labels
389 : * into *<b>labels_out</b>, a pointer to the data into *<b>data_out</b>, and
390 : * the data's size into *<b>sz_out</b>. On success, also return a tor_mmap_t
391 : * object whose contents should not be used -- it needs to be kept around,
392 : * though, for as long as <b>data_out</b> is going to be valid.
393 : *
394 : * On failure, set errno as for tor_mmap_file() if the file was missing or
395 : * empty, and set errno to EINVAL if the file was not in the labeled
396 : * format expected.
397 : */
398 : tor_mmap_t *
399 134 : storage_dir_map_labeled(storage_dir_t *dir,
400 : const char *fname,
401 : config_line_t **labels_out,
402 : const uint8_t **data_out,
403 : size_t *sz_out)
404 : {
405 134 : tor_mmap_t *m = storage_dir_map(dir, fname);
406 134 : int errval;
407 134 : if (! m) {
408 0 : errval = errno;
409 0 : goto err;
410 : }
411 134 : const char *nulp = memchr(m->data, '\0', m->size);
412 134 : if (! nulp) {
413 0 : errval = EINVAL;
414 0 : goto err;
415 : }
416 134 : if (labels_out && config_get_lines(m->data, labels_out, 0) < 0) {
417 0 : errval = EINVAL;
418 0 : goto err;
419 : }
420 134 : size_t offset = nulp - m->data + 1;
421 134 : tor_assert(offset <= m->size);
422 134 : *data_out = (const uint8_t *)(m->data + offset);
423 134 : *sz_out = m->size - offset;
424 :
425 134 : return m;
426 0 : err:
427 0 : tor_munmap_file(m);
428 0 : errno = errval;
429 0 : return NULL;
430 : }
431 :
432 : /** As storage_dir_map_labeled, but return a new byte array containing the
433 : * data. */
434 : uint8_t *
435 1 : storage_dir_read_labeled(storage_dir_t *dir,
436 : const char *fname,
437 : config_line_t **labels_out,
438 : size_t *sz_out)
439 : {
440 1 : const uint8_t *data = NULL;
441 1 : tor_mmap_t *m = storage_dir_map_labeled(dir, fname, labels_out,
442 : &data, sz_out);
443 1 : if (m == NULL)
444 : return NULL;
445 1 : uint8_t *result = tor_memdup(data, *sz_out);
446 1 : tor_munmap_file(m);
447 1 : return result;
448 : }
449 :
450 : /* Reduce the cached usage amount in <b>d</b> by <b>removed_file_size</b>.
451 : * This function is a no-op if <b>d->usage_known</b> is 0. */
452 : static void
453 48 : storage_dir_reduce_usage(storage_dir_t *d, uint64_t removed_file_size)
454 : {
455 48 : if (d->usage_known) {
456 9 : if (! BUG(d->usage < removed_file_size)) {
457 : /* This bug can also be triggered if an external process resized a file
458 : * between the call to storage_dir_get_usage() that last checked
459 : * actual usage (rather than relaying on cached usage), and the call to
460 : * this function. */
461 9 : d->usage -= removed_file_size;
462 : } else {
463 : /* If we underflowed the cached directory size, re-check the sizes of all
464 : * the files in the directory. This makes storage_dir_shrink() quadratic,
465 : * but only if a process is continually changing file sizes in the
466 : * storage directory (in which case, we have bigger issues).
467 : *
468 : * We can't just reset usage_known, because storage_dir_shrink() relies
469 : * on knowing the usage. */
470 0 : storage_dir_rescan(d);
471 0 : (void)storage_dir_get_usage(d);
472 : }
473 : }
474 48 : }
475 :
476 : /**
477 : * Remove the file called <b>fname</b> from <b>d</b>.
478 : */
479 : void
480 41 : storage_dir_remove_file(storage_dir_t *d,
481 : const char *fname)
482 : {
483 41 : char *path = NULL;
484 41 : tor_asprintf(&path, "%s/%s", d->directory, fname);
485 41 : const char *ipath = sandbox_intern_string(path);
486 :
487 41 : uint64_t size = 0;
488 41 : if (d->usage_known) {
489 2 : struct stat st;
490 2 : if (stat(ipath, &st) == 0) {
491 1 : size = st.st_size;
492 : }
493 : }
494 41 : if (unlink(ipath) == 0) {
495 40 : storage_dir_reduce_usage(d, size);
496 : } else {
497 1 : log_warn(LD_FS, "Unable to unlink %s while removing file: %s",
498 : escaped(path), strerror(errno));
499 1 : tor_free(path);
500 1 : return;
501 : }
502 40 : if (d->contents) {
503 40 : smartlist_string_remove(d->contents, fname);
504 : }
505 :
506 40 : tor_free(path);
507 : }
508 :
509 : /** Helper type: used to sort the members of storage directory by mtime. */
510 : typedef struct shrinking_dir_entry_t {
511 : time_t mtime;
512 : uint64_t size;
513 : char *path;
514 : } shrinking_dir_entry_t;
515 :
516 : /** Helper: use with qsort to sort shrinking_dir_entry_t structs. */
517 : static int
518 30 : shrinking_dir_entry_compare(const void *a_, const void *b_)
519 : {
520 30 : const shrinking_dir_entry_t *a = a_;
521 30 : const shrinking_dir_entry_t *b = b_;
522 :
523 30 : if (a->mtime < b->mtime)
524 : return -1;
525 30 : else if (a->mtime > b->mtime)
526 : return 1;
527 : else
528 0 : return 0;
529 : }
530 :
531 : /**
532 : * Try to free space by removing the oldest files in <b>d</b>. Delete
533 : * until no more than <b>target_size</b> bytes are left, and at least
534 : * <b>min_to_remove</b> files have been removed... or until there is
535 : * nothing left to remove.
536 : *
537 : * Return 0 on success; -1 on failure.
538 : */
539 : int
540 4 : storage_dir_shrink(storage_dir_t *d,
541 : uint64_t target_size,
542 : int min_to_remove)
543 : {
544 4 : if (d->usage_known && d->usage <= target_size && !min_to_remove) {
545 : /* Already small enough. */
546 : return 0;
547 : }
548 :
549 3 : if (storage_dir_rescan(d) < 0)
550 : return -1;
551 :
552 3 : const uint64_t orig_usage = storage_dir_get_usage(d);
553 3 : if (orig_usage <= target_size && !min_to_remove) {
554 : /* Okay, small enough after rescan! */
555 : return 0;
556 : }
557 :
558 3 : const int n = smartlist_len(d->contents);
559 3 : shrinking_dir_entry_t *ents = tor_calloc(n, sizeof(shrinking_dir_entry_t));
560 23 : SMARTLIST_FOREACH_BEGIN(d->contents, const char *, fname) {
561 20 : shrinking_dir_entry_t *ent = &ents[fname_sl_idx];
562 20 : struct stat st;
563 20 : tor_asprintf(&ent->path, "%s/%s", d->directory, fname);
564 20 : if (stat(sandbox_intern_string(ent->path), &st) == 0) {
565 20 : ent->mtime = st.st_mtime;
566 20 : ent->size = st.st_size;
567 : }
568 20 : } SMARTLIST_FOREACH_END(fname);
569 :
570 3 : qsort(ents, n, sizeof(shrinking_dir_entry_t), shrinking_dir_entry_compare);
571 :
572 3 : int idx = 0;
573 11 : while ((d->usage > target_size || min_to_remove > 0) && idx < n) {
574 8 : if (unlink(sandbox_intern_string(ents[idx].path)) == 0) {
575 8 : storage_dir_reduce_usage(d, ents[idx].size);
576 8 : --min_to_remove;
577 : }
578 8 : ++idx;
579 : }
580 :
581 23 : for (idx = 0; idx < n; ++idx) {
582 20 : tor_free(ents[idx].path);
583 : }
584 3 : tor_free(ents);
585 :
586 3 : storage_dir_rescan(d);
587 :
588 3 : return 0;
589 : }
590 :
591 : /** Remove all files in <b>d</b>. */
592 : int
593 1 : storage_dir_remove_all(storage_dir_t *d)
594 : {
595 1 : return storage_dir_shrink(d, 0, d->max_files);
596 : }
597 :
598 : /**
599 : * Return the largest number of non-temporary files we're willing to
600 : * store in <b>d</b>.
601 : */
602 : int
603 0 : storage_dir_get_max_files(storage_dir_t *d)
604 : {
605 0 : return d->max_files;
606 : }
|