Tor  0.4.7.0-alpha-dev
bloomfilt.c
Go to the documentation of this file.
1 /* Copyright (c) 2003-2004, Roger Dingledine
2  * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
3  * Copyright (c) 2007-2021, The Tor Project, Inc. */
4 /* See LICENSE for licensing information */
5 
6 /**
7  * \file bloomfilt.c
8  * \brief Uses bitarray_t to implement a bloom filter.
9  **/
10 
11 #include <stdlib.h>
12 
13 #include "lib/malloc/malloc.h"
15 #include "lib/intmath/bits.h"
16 #include "lib/log/util_bug.h"
17 #include "ext/siphash.h"
18 
19 /** How many bloom-filter bits we set per address. This is twice the
20  * BLOOMFILT_N_HASHES value, since we split the siphash output into two 32-bit
21  * values. */
22 #define N_BITS_PER_ITEM (BLOOMFILT_N_HASHES * 2)
23 
24 struct bloomfilt_t {
25  /** siphash keys to make BLOOMFILT_N_HASHES independent hashes for each
26  * items. */
28  bloomfilt_hash_fn hashfn; /**< Function used to generate hashes */
29  uint32_t mask; /**< One less than the number of bits in <b>ba</b>; always
30  * one less than a power of two. */
31  bitarray_t *ba; /**< A bit array to implement the Bloom filter. */
32 };
33 
34 #define BIT(set, n) ((n) & (set)->mask)
35 
36 /** Add the element <b>item</b> to <b>set</b>. */
37 void
39  const void *item)
40 {
41  int i;
42  for (i = 0; i < BLOOMFILT_N_HASHES; ++i) {
43  uint64_t h = set->hashfn(&set->key[i], item);
44  uint32_t high_bits = (uint32_t)(h >> 32);
45  uint32_t low_bits = (uint32_t)(h);
46  bitarray_set(set->ba, BIT(set, high_bits));
47  bitarray_set(set->ba, BIT(set, low_bits));
48  }
49 }
50 
51 /** If <b>item</b> is in <b>set</b>, return nonzero. Otherwise,
52  * <em>probably</em> return zero. */
53 int
55  const void *item)
56 {
57  int i, matches = 0;
58  for (i = 0; i < BLOOMFILT_N_HASHES; ++i) {
59  uint64_t h = set->hashfn(&set->key[i], item);
60  uint32_t high_bits = (uint32_t)(h >> 32);
61  uint32_t low_bits = (uint32_t)(h);
62  // Note that !! is necessary here, since bitarray_is_set does not
63  // necessarily return 1 on true.
64  matches += !! bitarray_is_set(set->ba, BIT(set, high_bits));
65  matches += !! bitarray_is_set(set->ba, BIT(set, low_bits));
66  }
67  return matches == N_BITS_PER_ITEM;
68 }
69 
70 /** Return a newly allocated bloomfilt_t, optimized to hold a total of
71  * <b>max_elements</b> elements with a reasonably low false positive weight.
72  *
73  * Uses the siphash-based function <b>hashfn</b> to compute hard-to-collide
74  * functions of the items, and the key material <b>random_key</b> to
75  * key the hash. There must be BLOOMFILT_KEY_LEN bytes in the supplied key.
76  **/
78 bloomfilt_new(int max_elements,
79  bloomfilt_hash_fn hashfn,
80  const uint8_t *random_key)
81 {
82  /* The probability of false positives is about P=(1 - exp(-kn/m))^k, where k
83  * is the number of hash functions per entry, m is the bits in the array,
84  * and n is the number of elements inserted. For us, k==4, n<=max_elements,
85  * and m==n_bits= approximately max_elements*32. This gives
86  * P<(1-exp(-4*n/(32*n)))^4 == (1-exp(1/-8))^4 == .00019
87  *
88  * It would be more optimal in space vs false positives to get this false
89  * positive rate by going for k==13, and m==18.5n, but we also want to
90  * conserve CPU, and k==13 is pretty big.
91  */
92  int n_bits = 1u << (tor_log2(max_elements)+5);
93  bloomfilt_t *r = tor_malloc(sizeof(bloomfilt_t));
94  r->mask = n_bits - 1;
95  r->ba = bitarray_init_zero(n_bits);
96 
97  tor_assert(sizeof(r->key) == BLOOMFILT_KEY_LEN);
98  memcpy(r->key, random_key, sizeof(r->key));
99 
100  r->hashfn = hashfn;
101 
102  return r;
103 }
104 
105 /** Free all storage held in <b>set</b>. */
106 void
108 {
109  if (!set)
110  return;
111  bitarray_free(set->ba);
112  tor_free(set);
113 }
unsigned int bitarray_t
Definition: bitarray.h:30
static void bitarray_set(bitarray_t *b, int bit)
Definition: bitarray.h:68
static unsigned int bitarray_is_set(bitarray_t *b, int bit)
Definition: bitarray.h:81
static bitarray_t * bitarray_init_zero(unsigned int n_bits)
Definition: bitarray.h:33
int tor_log2(uint64_t u64)
Definition: bits.c:16
Header for bits.c.
void bloomfilt_add(bloomfilt_t *set, const void *item)
Definition: bloomfilt.c:38
#define N_BITS_PER_ITEM
Definition: bloomfilt.c:22
int bloomfilt_probably_contains(const bloomfilt_t *set, const void *item)
Definition: bloomfilt.c:54
bloomfilt_t * bloomfilt_new(int max_elements, bloomfilt_hash_fn hashfn, const uint8_t *random_key)
Definition: bloomfilt.c:78
void bloomfilt_free_(bloomfilt_t *set)
Definition: bloomfilt.c:107
Header for bloomfilt.c.
#define BLOOMFILT_N_HASHES
Definition: bloomfilt.h:23
#define BLOOMFILT_KEY_LEN
Definition: bloomfilt.h:26
Headers for util_malloc.c.
#define tor_free(p)
Definition: malloc.h:52
#define BIT(x)
Definition: protover.c:177
bloomfilt_hash_fn hashfn
Definition: bloomfilt.c:28
struct sipkey key[BLOOMFILT_N_HASHES]
Definition: bloomfilt.c:27
bitarray_t * ba
Definition: bloomfilt.c:31
uint32_t mask
Definition: bloomfilt.c:29
Definition: siphash.h:6
Macros to manage assertions, fatal and non-fatal.
#define tor_assert(expr)
Definition: util_bug.h:102