tor  0.4.1.0-alpha-dev
bloomfilt.c
Go to the documentation of this file.
1 /* Copyright (c) 2003-2004, Roger Dingledine
2  * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
3  * Copyright (c) 2007-2019, The Tor Project, Inc. */
4 /* See LICENSE for licensing information */
5 
11 #include <stdlib.h>
12 
13 #include "lib/malloc/malloc.h"
15 #include "lib/intmath/bits.h"
16 #include "lib/log/util_bug.h"
17 #include "ext/siphash.h"
18 
22 #define N_BITS_PER_ITEM (BLOOMFILT_N_HASHES * 2)
23 
24 struct bloomfilt_t {
27  struct sipkey key[BLOOMFILT_N_HASHES];
28  bloomfilt_hash_fn hashfn;
29  uint32_t mask;
32 };
33 
34 #define BIT(set, n) ((n) & (set)->mask)
35 
37 void
39  const void *item)
40 {
41  int i;
42  for (i = 0; i < BLOOMFILT_N_HASHES; ++i) {
43  uint64_t h = set->hashfn(&set->key[i], item);
44  uint32_t high_bits = (uint32_t)(h >> 32);
45  uint32_t low_bits = (uint32_t)(h);
46  bitarray_set(set->ba, BIT(set, high_bits));
47  bitarray_set(set->ba, BIT(set, low_bits));
48  }
49 }
50 
53 int
55  const void *item)
56 {
57  int i, matches = 0;
58  for (i = 0; i < BLOOMFILT_N_HASHES; ++i) {
59  uint64_t h = set->hashfn(&set->key[i], item);
60  uint32_t high_bits = (uint32_t)(h >> 32);
61  uint32_t low_bits = (uint32_t)(h);
62  // Note that !! is necessary here, since bitarray_is_set does not
63  // necessarily return 1 on true.
64  matches += !! bitarray_is_set(set->ba, BIT(set, high_bits));
65  matches += !! bitarray_is_set(set->ba, BIT(set, low_bits));
66  }
67  return matches == N_BITS_PER_ITEM;
68 }
69 
78 bloomfilt_new(int max_elements,
79  bloomfilt_hash_fn hashfn,
80  const uint8_t *random_key)
81 {
82  /* The probability of false positives is about P=(1 - exp(-kn/m))^k, where k
83  * is the number of hash functions per entry, m is the bits in the array,
84  * and n is the number of elements inserted. For us, k==4, n<=max_elements,
85  * and m==n_bits= approximately max_elements*32. This gives
86  * P<(1-exp(-4*n/(32*n)))^4 == (1-exp(1/-8))^4 == .00019
87  *
88  * It would be more optimal in space vs false positives to get this false
89  * positive rate by going for k==13, and m==18.5n, but we also want to
90  * conserve CPU, and k==13 is pretty big.
91  */
92  int n_bits = 1u << (tor_log2(max_elements)+5);
93  bloomfilt_t *r = tor_malloc(sizeof(bloomfilt_t));
94  r->mask = n_bits - 1;
95  r->ba = bitarray_init_zero(n_bits);
96 
97  tor_assert(sizeof(r->key) == BLOOMFILT_KEY_LEN);
98  memcpy(r->key, random_key, sizeof(r->key));
99 
100  r->hashfn = hashfn;
101 
102  return r;
103 }
104 
106 void
108 {
109  if (!set)
110  return;
111  bitarray_free(set->ba);
112  tor_free(set);
113 }
bitarray_t * ba
Definition: bloomfilt.c:31
unsigned int bitarray_t
Definition: bitarray.h:30
void bloomfilt_add(bloomfilt_t *set, const void *item)
Definition: bloomfilt.c:38
void bloomfilt_free_(bloomfilt_t *set)
Definition: bloomfilt.c:107
#define tor_free(p)
Definition: malloc.h:52
bloomfilt_t * bloomfilt_new(int max_elements, bloomfilt_hash_fn hashfn, const uint8_t *random_key)
Definition: bloomfilt.c:78
#define BLOOMFILT_N_HASHES
Definition: bloomfilt.h:23
Headers for util_malloc.c.
static void bitarray_set(bitarray_t *b, int bit)
Definition: bitarray.h:68
bloomfilt_hash_fn hashfn
Definition: bloomfilt.c:28
tor_assert(buffer)
#define BLOOMFILT_KEY_LEN
Definition: bloomfilt.h:26
Header for bloomfilt.c.
static bitarray_t * bitarray_init_zero(unsigned int n_bits)
Definition: bitarray.h:33
#define N_BITS_PER_ITEM
Definition: bloomfilt.c:22
int tor_log2(uint64_t u64)
Definition: bits.c:16
struct sipkey key[BLOOMFILT_N_HASHES]
Definition: bloomfilt.c:27
uint32_t mask
Definition: bloomfilt.c:29
Header for bits.c.
int bloomfilt_probably_contains(const bloomfilt_t *set, const void *item)
Definition: bloomfilt.c:54
Macros to manage assertions, fatal and non-fatal.
static unsigned int bitarray_is_set(bitarray_t *b, int bit)
Definition: bitarray.h:81