tor  0.4.2.0-alpha-dev
compress.c
Go to the documentation of this file.
1 /* Copyright (c) 2004, Roger Dingledine.
2  * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
3  * Copyright (c) 2007-2019, The Tor Project, Inc. */
4 /* See LICENSE for licensing information */
5 
14 #include "orconfig.h"
15 
16 #include <stdlib.h>
17 #include <stdio.h>
18 #include <string.h>
19 #include "lib/cc/torint.h"
20 
21 #ifdef HAVE_NETINET_IN_H
22 #include <netinet/in.h>
23 #endif
24 
25 #include "lib/log/log.h"
26 #include "lib/log/util_bug.h"
27 #include "lib/arch/bytes.h"
28 #include "lib/ctime/di_ops.h"
29 #include "lib/compress/compress.h"
35 #include "lib/intmath/cmp.h"
36 #include "lib/malloc/malloc.h"
37 #include "lib/subsys/subsys.h"
38 #include "lib/thread/threads.h"
39 
42 
44 /* These macros define the maximum allowable compression factor. Anything of
45  * size greater than CHECK_FOR_COMPRESSION_BOMB_AFTER is not allowed to
46  * have an uncompression factor (uncompressed size:compressed size ratio) of
47  * any greater than MAX_UNCOMPRESSION_FACTOR.
48  *
49  * Picking a value for MAX_UNCOMPRESSION_FACTOR is a trade-off: we want it to
50  * be small to limit the attack multiplier, but we also want it to be large
51  * enough so that no legitimate document --even ones we might invent in the
52  * future -- ever compresses by a factor of greater than
53  * MAX_UNCOMPRESSION_FACTOR. Within those parameters, there's a reasonably
54  * large range of possible values. IMO, anything over 8 is probably safe; IMO
55  * anything under 50 is probably sufficient.
56  */
57 #define MAX_UNCOMPRESSION_FACTOR 25
58 #define CHECK_FOR_COMPRESSION_BOMB_AFTER (1024*64)
59 
64 tor_compress_is_compression_bomb,(size_t size_in, size_t size_out))
65 {
66  if (size_in == 0 || size_out < CHECK_FOR_COMPRESSION_BOMB_AFTER)
67  return 0;
68 
69  return (size_out / size_in > MAX_UNCOMPRESSION_FACTOR);
70 }
71 
74 static size_t
76  compression_level_t compression_level,
77  size_t in_len)
78 {
79  // ignore these for now.
80  (void)compression_level;
81  if (method == NO_METHOD) {
82  /* Guess that we'll need an extra byte, to avoid a needless realloc
83  * for nul-termination */
84  return (in_len < SIZE_MAX) ? in_len + 1 : in_len;
85  }
86 
87  /* Always guess a factor of 2. */
88  if (compress) {
89  in_len /= 2;
90  } else {
91  if (in_len < SIZE_T_CEILING/2)
92  in_len *= 2;
93  }
94  return MAX(in_len, 1024);
95 }
96 
100 static int
101 tor_compress_impl(int compress,
102  char **out, size_t *out_len,
103  const char *in, size_t in_len,
104  compress_method_t method,
105  compression_level_t compression_level,
106  int complete_only,
107  int protocol_warn_level)
108 {
109  tor_compress_state_t *stream;
110  int rv;
111 
112  stream = tor_compress_new(compress, method, compression_level);
113 
114  if (stream == NULL) {
115  log_warn(LD_GENERAL, "NULL stream while %scompressing",
116  compress?"":"de");
117  log_debug(LD_GENERAL, "method: %d level: %d at len: %lu",
118  method, compression_level, (unsigned long)in_len);
119  return -1;
120  }
121 
122  size_t in_len_orig = in_len;
123  size_t out_remaining, out_alloc;
124  char *outptr;
125 
126  out_remaining = out_alloc =
127  guess_compress_size(compress, method, compression_level, in_len);
128  *out = outptr = tor_malloc(out_remaining);
129 
130  const int finish = complete_only || compress;
131 
132  while (1) {
133  switch (tor_compress_process(stream,
134  &outptr, &out_remaining,
135  &in, &in_len, finish)) {
136  case TOR_COMPRESS_DONE:
137  if (in_len == 0 || compress) {
138  goto done;
139  } else {
140  // More data is present, and we're decompressing. So we may need to
141  // reinitialize the stream if we are handling multiple concatenated
142  // inputs.
143  tor_compress_free(stream);
144  stream = tor_compress_new(compress, method, compression_level);
145  if (stream == NULL) {
146  log_warn(LD_GENERAL, "NULL stream while %scompressing",
147  compress?"":"de");
148  goto err;
149  }
150  }
151  break;
152  case TOR_COMPRESS_OK:
153  if (compress || complete_only) {
154  log_fn(protocol_warn_level, LD_PROTOCOL,
155  "Unexpected %s while %scompressing",
156  complete_only?"end of input":"result",
157  compress?"":"de");
158  log_debug(LD_GENERAL, "method: %d level: %d at len: %lu",
159  method, compression_level, (unsigned long)in_len);
160  goto err;
161  } else {
162  if (in_len == 0) {
163  goto done;
164  }
165  }
166  break;
167  case TOR_COMPRESS_BUFFER_FULL: {
168  if (!compress && outptr < *out+out_alloc) {
169  // A buffer error in this case means that we have a problem
170  // with our input.
171  log_fn(protocol_warn_level, LD_PROTOCOL,
172  "Possible truncated or corrupt compressed data");
173  goto err;
174  }
175  if (out_alloc >= SIZE_T_CEILING / 2) {
176  log_warn(LD_GENERAL, "While %scompressing data: ran out of space.",
177  compress?"":"un");
178  goto err;
179  }
180  if (!compress &&
181  tor_compress_is_compression_bomb(in_len_orig, out_alloc)) {
182  // This should already have been caught down in the backend logic.
183  // LCOV_EXCL_START
184  tor_assert_nonfatal_unreached();
185  goto err;
186  // LCOV_EXCL_STOP
187  }
188  const size_t offset = outptr - *out;
189  out_alloc *= 2;
190  *out = tor_realloc(*out, out_alloc);
191  outptr = *out + offset;
192  out_remaining = out_alloc - offset;
193  break;
194  }
195  case TOR_COMPRESS_ERROR:
196  log_fn(protocol_warn_level, LD_GENERAL,
197  "Error while %scompressing data: bad input?",
198  compress?"":"un");
199  goto err; // bad data.
200 
201  // LCOV_EXCL_START
202  default:
203  tor_assert_nonfatal_unreached();
204  goto err;
205  // LCOV_EXCL_STOP
206  }
207  }
208  done:
209  *out_len = outptr - *out;
210  if (compress && tor_compress_is_compression_bomb(*out_len, in_len_orig)) {
211  log_warn(LD_BUG, "We compressed something and got an insanely high "
212  "compression factor; other Tors would think this was a "
213  "compression bomb.");
214  goto err;
215  }
216  if (!compress) {
217  // NUL-terminate our output.
218  if (out_alloc == *out_len)
219  *out = tor_realloc(*out, out_alloc + 1);
220  (*out)[*out_len] = '\0';
221  }
222  rv = 0;
223  goto out;
224 
225  err:
226  tor_free(*out);
227  *out_len = 0;
228  rv = -1;
229  goto out;
230 
231  out:
232  tor_compress_free(stream);
233  return rv;
234 }
235 
241 int
242 tor_compress(char **out, size_t *out_len,
243  const char *in, size_t in_len,
244  compress_method_t method)
245 {
246  return tor_compress_impl(1, out, out_len, in, in_len, method,
247  BEST_COMPRESSION,
248  1, LOG_WARN);
249 }
250 
267 int
268 tor_uncompress(char **out, size_t *out_len,
269  const char *in, size_t in_len,
270  compress_method_t method,
271  int complete_only,
272  int protocol_warn_level)
273 {
274  return tor_compress_impl(0, out, out_len, in, in_len, method,
275  BEST_COMPRESSION,
276  complete_only, protocol_warn_level);
277 }
278 
284 detect_compression_method(const char *in, size_t in_len)
285 {
286  if (in_len > 2 && fast_memeq(in, "\x1f\x8b", 2)) {
287  return GZIP_METHOD;
288  } else if (in_len > 2 && (in[0] & 0x0f) == 8 &&
289  (tor_ntohs(get_uint16(in)) % 31) == 0) {
290  return ZLIB_METHOD;
291  } else if (in_len > 2 &&
292  fast_memeq(in, "\x5d\x00\x00", 3)) {
293  return LZMA_METHOD;
294  } else if (in_len > 3 &&
295  fast_memeq(in, "\x28\xb5\x2f\xfd", 4)) {
296  return ZSTD_METHOD;
297  } else {
298  return UNKNOWN_METHOD;
299  }
300 }
301 
303 int
305 {
306  switch (method) {
307  case GZIP_METHOD:
308  case ZLIB_METHOD:
309  return tor_zlib_method_supported();
310  case LZMA_METHOD:
311  return tor_lzma_method_supported();
312  case ZSTD_METHOD:
313  return tor_zstd_method_supported();
314  case NO_METHOD:
315  return 1;
316  case UNKNOWN_METHOD:
317  default:
318  return 0;
319  }
320 }
321 
327 unsigned
329 {
330  static unsigned supported = 0;
331  if (supported == 0) {
333  for (m = NO_METHOD; m <= UNKNOWN_METHOD; ++m) {
335  supported |= (1u << m);
336  }
337  }
338  }
339  return supported;
340 }
341 
344 static const struct {
345  const char *name;
346  compress_method_t method;
348  { "gzip", GZIP_METHOD },
349  { "deflate", ZLIB_METHOD },
350  // We call this "x-tor-lzma" rather than "x-lzma", because we impose a
351  // lower maximum memory usage on the decoding side.
352  { "x-tor-lzma", LZMA_METHOD },
353  { "x-zstd" , ZSTD_METHOD },
354  { "identity", NO_METHOD },
355 
356  /* Later entries in this table are not canonical; these are recognized but
357  * not emitted. */
358  { "x-gzip", GZIP_METHOD },
359 };
360 
363 const char *
365 {
366  unsigned i;
367  for (i = 0; i < ARRAY_LENGTH(compression_method_names); ++i) {
368  if (method == compression_method_names[i].method)
369  return compression_method_names[i].name;
370  }
371  return NULL;
372 }
373 
375 static const struct {
376  compress_method_t method;
377  const char *name;
379  { NO_METHOD, "uncompressed" },
380  { GZIP_METHOD, "gzipped" },
381  { ZLIB_METHOD, "deflated" },
382  { LZMA_METHOD, "LZMA compressed" },
383  { ZSTD_METHOD, "Zstandard compressed" },
384  { UNKNOWN_METHOD, "unknown encoding" },
385 };
386 
389 const char *
391 {
392  unsigned i;
393  for (i = 0; i < ARRAY_LENGTH(compression_method_human_names); ++i) {
394  if (method == compression_method_human_names[i].method)
395  return compression_method_human_names[i].name;
396  }
397  return NULL;
398 }
399 
404 {
405  unsigned i;
406  for (i = 0; i < ARRAY_LENGTH(compression_method_names); ++i) {
407  if (!strcmp(compression_method_names[i].name, name))
408  return compression_method_names[i].method;
409  }
410  return UNKNOWN_METHOD;
411 }
412 
416 const char *
418 {
419  switch (method) {
420  case GZIP_METHOD:
421  case ZLIB_METHOD:
422  return tor_zlib_get_version_str();
423  case LZMA_METHOD:
424  return tor_lzma_get_version_str();
425  case ZSTD_METHOD:
426  return tor_zstd_get_version_str();
427  case NO_METHOD:
428  case UNKNOWN_METHOD:
429  default:
430  return NULL;
431  }
432 }
433 
437 const char *
439 {
440  switch (method) {
441  case GZIP_METHOD:
442  case ZLIB_METHOD:
444  case LZMA_METHOD:
446  case ZSTD_METHOD:
448  case NO_METHOD:
449  case UNKNOWN_METHOD:
450  default:
451  return NULL;
452  }
453 }
454 
457 size_t
459 {
464 }
465 
471  union {
472  tor_zlib_compress_state_t *zlib_state;
473  tor_lzma_compress_state_t *lzma_state;
474  tor_zstd_compress_state_t *zstd_state;
475  } u;
476 };
477 
481 tor_compress_new(int compress, compress_method_t method,
482  compression_level_t compression_level)
483 {
484  tor_compress_state_t *state;
485 
486  state = tor_malloc_zero(sizeof(tor_compress_state_t));
487  state->method = method;
488 
489  switch (method) {
490  case GZIP_METHOD:
491  case ZLIB_METHOD: {
492  tor_zlib_compress_state_t *zlib_state =
493  tor_zlib_compress_new(compress, method, compression_level);
494 
495  if (zlib_state == NULL)
496  goto err;
497 
498  state->u.zlib_state = zlib_state;
499  break;
500  }
501  case LZMA_METHOD: {
502  tor_lzma_compress_state_t *lzma_state =
503  tor_lzma_compress_new(compress, method, compression_level);
504 
505  if (lzma_state == NULL)
506  goto err;
507 
508  state->u.lzma_state = lzma_state;
509  break;
510  }
511  case ZSTD_METHOD: {
512  tor_zstd_compress_state_t *zstd_state =
513  tor_zstd_compress_new(compress, method, compression_level);
514 
515  if (zstd_state == NULL)
516  goto err;
517 
518  state->u.zstd_state = zstd_state;
519  break;
520  }
521  case NO_METHOD: {
522  break;
523  }
524  case UNKNOWN_METHOD:
525  goto err;
526  }
527 
529  sizeof(tor_compress_state_t));
530  return state;
531 
532  err:
533  tor_free(state);
534  return NULL;
535 }
536 
550  char **out, size_t *out_len,
551  const char **in, size_t *in_len,
552  int finish)
553 {
554  tor_assert(state != NULL);
555  const size_t in_len_orig = *in_len;
556  const size_t out_len_orig = *out_len;
558 
559  if (*out_len == 0 && (*in_len > 0 || finish)) {
560  // If we still have input data, but no space for output data, we might as
561  // well return early and let the caller do the reallocation of the out
562  // variable.
563  return TOR_COMPRESS_BUFFER_FULL;
564  }
565 
566  switch (state->method) {
567  case GZIP_METHOD:
568  case ZLIB_METHOD:
569  rv = tor_zlib_compress_process(state->u.zlib_state,
570  out, out_len, in, in_len,
571  finish);
572  break;
573  case LZMA_METHOD:
574  rv = tor_lzma_compress_process(state->u.lzma_state,
575  out, out_len, in, in_len,
576  finish);
577  break;
578  case ZSTD_METHOD:
579  rv = tor_zstd_compress_process(state->u.zstd_state,
580  out, out_len, in, in_len,
581  finish);
582  break;
583  case NO_METHOD:
584  rv = tor_cnone_compress_process(out, out_len, in, in_len,
585  finish);
586  break;
587  default:
588  case UNKNOWN_METHOD:
589  goto err;
590  }
591  if (BUG((rv == TOR_COMPRESS_OK) &&
592  *in_len == in_len_orig &&
593  *out_len == out_len_orig)) {
594  log_warn(LD_GENERAL,
595  "More info on the bug: method == %s, finish == %d, "
596  " *in_len == in_len_orig == %lu, "
597  "*out_len == out_len_orig == %lu",
599  (unsigned long)in_len_orig, (unsigned long)out_len_orig);
600  return TOR_COMPRESS_ERROR;
601  }
602 
603  return rv;
604  err:
605  return TOR_COMPRESS_ERROR;
606 }
607 
609 void
611 {
612  if (state == NULL)
613  return;
614 
615  switch (state->method) {
616  case GZIP_METHOD:
617  case ZLIB_METHOD:
618  tor_zlib_compress_free(state->u.zlib_state);
619  break;
620  case LZMA_METHOD:
621  tor_lzma_compress_free(state->u.lzma_state);
622  break;
623  case ZSTD_METHOD:
624  tor_zstd_compress_free(state->u.zstd_state);
625  break;
626  case NO_METHOD:
627  break;
628  case UNKNOWN_METHOD:
629  break;
630  }
631 
633  sizeof(tor_compress_state_t));
634  tor_free(state);
635 }
636 
638 size_t
640 {
641  tor_assert(state != NULL);
642 
643  size_t size = sizeof(tor_compress_state_t);
644 
645  switch (state->method) {
646  case GZIP_METHOD:
647  case ZLIB_METHOD:
648  size += tor_zlib_compress_state_size(state->u.zlib_state);
649  break;
650  case LZMA_METHOD:
651  size += tor_lzma_compress_state_size(state->u.lzma_state);
652  break;
653  case ZSTD_METHOD:
654  size += tor_zstd_compress_state_size(state->u.zstd_state);
655  break;
656  case NO_METHOD:
657  case UNKNOWN_METHOD:
658  break;
659  }
660 
661  return size;
662 }
663 
665 int
667 {
669 
670  tor_zlib_init();
671  tor_lzma_init();
672  tor_zstd_init();
673 
674  return 0;
675 }
676 
681 void
683 {
684  // XXXX can we move this into tor_compress_init() after all? log.c queues
685  // XXXX log messages at startup.
687 }
688 
689 static int
690 subsys_compress_initialize(void)
691 {
692  return tor_compress_init();
693 }
694 
695 const subsys_fns_t sys_compress = {
696  .name = "compress",
697  .supported = true,
698  .level = -70,
699  .initialize = subsys_compress_initialize,
700 };
compress_method_t detect_compression_method(const char *in, size_t in_len)
Definition: compress.c:284
int tor_zstd_method_supported(void)
Definition: compress_zstd.c:60
size_t atomic_counter_get(atomic_counter_t *counter)
tor_compress_state_t * tor_compress_new(int compress, compress_method_t method, compression_level_t compression_level)
Definition: compress.c:481
size_t tor_zlib_compress_state_size(const tor_zlib_compress_state_t *state)
Headers for di_ops.c.
void tor_lzma_init(void)
#define LD_GENERAL
Definition: log.h:59
Macro definitions for MIN, MAX, and CLAMP.
tor_zlib_compress_state_t * tor_zlib_compress_new(int compress_, compress_method_t method, compression_level_t compression_level)
Headers for compress.c.
int tor_compress_supports_method(compress_method_t method)
Definition: compress.c:304
compress_method_t method
Definition: compress.c:469
void atomic_counter_init(atomic_counter_t *counter)
union tor_compress_state_t::@2 u
size_t tor_zstd_compress_state_size(const tor_zstd_compress_state_t *state)
size_t tor_lzma_compress_state_size(const tor_lzma_compress_state_t *state)
unsigned tor_compress_get_supported_method_bitmask(void)
Definition: compress.c:328
int tor_compress_init(void)
Definition: compress.c:666
#define tor_free(p)
Definition: malloc.h:52
Integer definitions used throughout Tor.
Headers for util_malloc.c.
Header for compress_lzma.c.
static atomic_counter_t total_compress_allocation
Definition: compress.c:41
int tor_uncompress(char **out, size_t *out_len, const char *in, size_t in_len, compress_method_t method, int complete_only, int protocol_warn_level)
Definition: compress.c:268
static int tor_compress_impl(int compress, char **out, size_t *out_len, const char *in, size_t in_len, compress_method_t method, compression_level_t compression_level, int complete_only, int protocol_warn_level)
Definition: compress.c:101
tor_compress_output_t tor_lzma_compress_process(tor_lzma_compress_state_t *state, char **out, size_t *out_len, const char **in, size_t *in_len, int finish)
Header for threads.c.
void tor_compress_free_(tor_compress_state_t *state)
Definition: compress.c:610
tor_compress_output_t tor_zstd_compress_process(tor_zstd_compress_state_t *state, char **out, size_t *out_len, const char **in, size_t *in_len, int finish)
size_t tor_zlib_get_total_allocation(void)
const char * compression_method_get_name(compress_method_t method)
Definition: compress.c:364
tor_assert(buffer)
static const struct @0 compression_method_names[]
int tor_compress(char **out, size_t *out_len, const char *in, size_t in_len, compress_method_t method)
Definition: compress.c:242
void tor_compress_log_init_warnings(void)
Definition: compress.c:682
const char * tor_zstd_get_header_version_str(void)
struct tor_compress_state_t tor_compress_state_t
Definition: compress.h:76
tor_compress_output_t tor_zlib_compress_process(tor_zlib_compress_state_t *state, char **out, size_t *out_len, const char **in, size_t *in_len, int finish)
#define SIZE_T_CEILING
Definition: torint.h:126
MOCK_IMPL(int, tor_compress_is_compression_bomb,(size_t size_in, size_t size_out))
Definition: compress.c:63
void atomic_counter_sub(atomic_counter_t *counter, size_t sub)
int tor_zlib_method_supported(void)
Definition: compress_zlib.c:84
static uint16_t get_uint16(const void *cp)
Definition: bytes.h:37
#define LOG_WARN
Definition: log.h:50
void atomic_counter_add(atomic_counter_t *counter, size_t add)
tor_zstd_compress_state_t * tor_zstd_compress_new(int compress, compress_method_t method, compression_level_t level)
const char * tor_zlib_get_version_str(void)
Definition: compress_zlib.c:95
size_t tor_compress_get_total_allocation(void)
Definition: compress.c:458
Header for compress_zstd.c.
const char * tor_compress_header_version_str(compress_method_t method)
Definition: compress.c:438
const char * tor_zlib_get_header_version_str(void)
void tor_zstd_warn_if_version_mismatched(void)
tor_lzma_compress_state_t * tor_lzma_compress_new(int compress, compress_method_t method, compression_level_t level)
size_t tor_compress_state_size(const tor_compress_state_t *state)
Definition: compress.c:639
const char * tor_lzma_get_header_version_str(void)
Header for compress_zlib.c.
Inline functions for reading and writing multibyte values from the middle of strings,...
void tor_zstd_init(void)
#define ARRAY_LENGTH(x)
#define log_fn(severity, domain, args,...)
Definition: log.h:272
static size_t guess_compress_size(int compress, compress_method_t method, compression_level_t compression_level, size_t in_len)
Definition: compress.c:75
const char * name
Definition: subsys.h:28
size_t tor_lzma_get_total_allocation(void)
compression_level_t
Definition: compress.h:35
compress_method_t compression_method_get_by_name(const char *name)
Definition: compress.c:403
Header for compress_none.c.
const char * tor_zstd_get_version_str(void)
Definition: compress_zstd.c:87
#define MAX(a, b)
Definition: cmp.h:22
void tor_zlib_init(void)
compress_method_t
Definition: compress.h:21
Declare subsystem object for the compress module.
const char * tor_lzma_get_version_str(void)
Definition: compress_lzma.c:96
Headers for log.c.
int tor_lzma_method_supported(void)
Definition: compress_lzma.c:84
tor_compress_output_t tor_cnone_compress_process(char **out, size_t *out_len, const char **in, size_t *in_len, int finish)
Definition: compress_none.c:38
tor_compress_output_t
Definition: compress.h:68
Macros to manage assertions, fatal and non-fatal.
#define LD_PROTOCOL
Definition: log.h:69
const char * compression_method_get_human_name(compress_method_t method)
Definition: compress.c:390
const char * tor_compress_version_str(compress_method_t method)
Definition: compress.c:417
size_t tor_zstd_get_total_allocation(void)
static const struct @1 compression_method_human_names[]
#define LD_BUG
Definition: log.h:83
tor_compress_output_t tor_compress_process(tor_compress_state_t *state, char **out, size_t *out_len, const char **in, size_t *in_len, int finish)
Definition: compress.c:549