Line data Source code
1 : /* Copyright (c) 2003-2004, Roger Dingledine 2 : * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson. 3 : * Copyright (c) 2007-2021, The Tor Project, Inc. */ 4 : /* See LICENSE for licensing information */ 5 : 6 : /** 7 : * \file bloomfilt.c 8 : * \brief Uses bitarray_t to implement a bloom filter. 9 : **/ 10 : 11 : #include <stdlib.h> 12 : 13 : #include "lib/malloc/malloc.h" 14 : #include "lib/container/bloomfilt.h" 15 : #include "lib/intmath/bits.h" 16 : #include "lib/log/util_bug.h" 17 : #include "ext/siphash.h" 18 : 19 : /** How many bloom-filter bits we set per address. This is twice the 20 : * BLOOMFILT_N_HASHES value, since we split the siphash output into two 32-bit 21 : * values. */ 22 : #define N_BITS_PER_ITEM (BLOOMFILT_N_HASHES * 2) 23 : 24 : struct bloomfilt_t { 25 : /** siphash keys to make BLOOMFILT_N_HASHES independent hashes for each 26 : * items. */ 27 : struct sipkey key[BLOOMFILT_N_HASHES]; 28 : bloomfilt_hash_fn hashfn; /**< Function used to generate hashes */ 29 : uint32_t mask; /**< One less than the number of bits in <b>ba</b>; always 30 : * one less than a power of two. */ 31 : bitarray_t *ba; /**< A bit array to implement the Bloom filter. */ 32 : }; 33 : 34 : #define BIT(set, n) ((n) & (set)->mask) 35 : 36 : /** Add the element <b>item</b> to <b>set</b>. */ 37 : void 38 9042 : bloomfilt_add(bloomfilt_t *set, 39 : const void *item) 40 : { 41 9042 : int i; 42 27126 : for (i = 0; i < BLOOMFILT_N_HASHES; ++i) { 43 18084 : uint64_t h = set->hashfn(&set->key[i], item); 44 18084 : uint32_t high_bits = (uint32_t)(h >> 32); 45 18084 : uint32_t low_bits = (uint32_t)(h); 46 18084 : bitarray_set(set->ba, BIT(set, high_bits)); 47 18084 : bitarray_set(set->ba, BIT(set, low_bits)); 48 : } 49 9042 : } 50 : 51 : /** If <b>item</b> is in <b>set</b>, return nonzero. Otherwise, 52 : * <em>probably</em> return zero. */ 53 : int 54 23044 : bloomfilt_probably_contains(const bloomfilt_t *set, 55 : const void *item) 56 : { 57 23044 : int i, matches = 0; 58 69132 : for (i = 0; i < BLOOMFILT_N_HASHES; ++i) { 59 46088 : uint64_t h = set->hashfn(&set->key[i], item); 60 46088 : uint32_t high_bits = (uint32_t)(h >> 32); 61 46088 : uint32_t low_bits = (uint32_t)(h); 62 : // Note that !! is necessary here, since bitarray_is_set does not 63 : // necessarily return 1 on true. 64 46088 : matches += !! bitarray_is_set(set->ba, BIT(set, high_bits)); 65 46088 : matches += !! bitarray_is_set(set->ba, BIT(set, low_bits)); 66 : } 67 23044 : return matches == N_BITS_PER_ITEM; 68 : } 69 : 70 : /** Return a newly allocated bloomfilt_t, optimized to hold a total of 71 : * <b>max_elements</b> elements with a reasonably low false positive weight. 72 : * 73 : * Uses the siphash-based function <b>hashfn</b> to compute hard-to-collide 74 : * functions of the items, and the key material <b>random_key</b> to 75 : * key the hash. There must be BLOOMFILT_KEY_LEN bytes in the supplied key. 76 : **/ 77 : bloomfilt_t * 78 198 : bloomfilt_new(int max_elements, 79 : bloomfilt_hash_fn hashfn, 80 : const uint8_t *random_key) 81 : { 82 : /* The probability of false positives is about P=(1 - exp(-kn/m))^k, where k 83 : * is the number of hash functions per entry, m is the bits in the array, 84 : * and n is the number of elements inserted. For us, k==4, n<=max_elements, 85 : * and m==n_bits= approximately max_elements*32. This gives 86 : * P<(1-exp(-4*n/(32*n)))^4 == (1-exp(1/-8))^4 == .00019 87 : * 88 : * It would be more optimal in space vs false positives to get this false 89 : * positive rate by going for k==13, and m==18.5n, but we also want to 90 : * conserve CPU, and k==13 is pretty big. 91 : */ 92 198 : int n_bits = 1u << (tor_log2(max_elements)+5); 93 198 : bloomfilt_t *r = tor_malloc(sizeof(bloomfilt_t)); 94 198 : r->mask = n_bits - 1; 95 198 : r->ba = bitarray_init_zero(n_bits); 96 : 97 198 : tor_assert(sizeof(r->key) == BLOOMFILT_KEY_LEN); 98 198 : memcpy(r->key, random_key, sizeof(r->key)); 99 : 100 198 : r->hashfn = hashfn; 101 : 102 198 : return r; 103 : } 104 : 105 : /** Free all storage held in <b>set</b>. */ 106 : void 107 258 : bloomfilt_free_(bloomfilt_t *set) 108 : { 109 258 : if (!set) 110 : return; 111 187 : bitarray_free(set->ba); 112 187 : tor_free(set); 113 : }