LCOV - code coverage report
Current view: top level - lib/container - bloomfilt.c (source / functions) Hit Total Coverage
Test: lcov.info Lines: 31 31 100.0 %
Date: 2021-11-24 03:28:48 Functions: 4 4 100.0 %

          Line data    Source code
       1             : /* Copyright (c) 2003-2004, Roger Dingledine
       2             :  * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
       3             :  * Copyright (c) 2007-2021, The Tor Project, Inc. */
       4             : /* See LICENSE for licensing information */
       5             : 
       6             : /**
       7             :  * \file bloomfilt.c
       8             :  * \brief Uses bitarray_t to implement a bloom filter.
       9             :  **/
      10             : 
      11             : #include <stdlib.h>
      12             : 
      13             : #include "lib/malloc/malloc.h"
      14             : #include "lib/container/bloomfilt.h"
      15             : #include "lib/intmath/bits.h"
      16             : #include "lib/log/util_bug.h"
      17             : #include "ext/siphash.h"
      18             : 
      19             : /** How many bloom-filter bits we set per address. This is twice the
      20             :  * BLOOMFILT_N_HASHES value, since we split the siphash output into two 32-bit
      21             :  * values. */
      22             : #define N_BITS_PER_ITEM (BLOOMFILT_N_HASHES * 2)
      23             : 
      24             : struct bloomfilt_t {
      25             :   /** siphash keys to make BLOOMFILT_N_HASHES independent hashes for each
      26             :    * items. */
      27             :   struct sipkey key[BLOOMFILT_N_HASHES];
      28             :   bloomfilt_hash_fn hashfn; /**< Function used to generate hashes */
      29             :   uint32_t mask; /**< One less than the number of bits in <b>ba</b>; always
      30             :                   * one less than a power of two. */
      31             :   bitarray_t *ba; /**< A bit array to implement the Bloom filter. */
      32             : };
      33             : 
      34             : #define BIT(set, n) ((n) & (set)->mask)
      35             : 
      36             : /** Add the element <b>item</b> to <b>set</b>. */
      37             : void
      38        9042 : bloomfilt_add(bloomfilt_t *set,
      39             :               const void *item)
      40             : {
      41        9042 :   int i;
      42       27126 :   for (i = 0; i < BLOOMFILT_N_HASHES; ++i) {
      43       18084 :     uint64_t h = set->hashfn(&set->key[i], item);
      44       18084 :     uint32_t high_bits = (uint32_t)(h >> 32);
      45       18084 :     uint32_t low_bits = (uint32_t)(h);
      46       18084 :     bitarray_set(set->ba, BIT(set, high_bits));
      47       18084 :     bitarray_set(set->ba, BIT(set, low_bits));
      48             :   }
      49        9042 : }
      50             : 
      51             : /** If <b>item</b> is in <b>set</b>, return nonzero.  Otherwise,
      52             :  * <em>probably</em> return zero. */
      53             : int
      54       23044 : bloomfilt_probably_contains(const bloomfilt_t *set,
      55             :                             const void *item)
      56             : {
      57       23044 :   int i, matches = 0;
      58       69132 :   for (i = 0; i < BLOOMFILT_N_HASHES; ++i) {
      59       46088 :     uint64_t h = set->hashfn(&set->key[i], item);
      60       46088 :     uint32_t high_bits = (uint32_t)(h >> 32);
      61       46088 :     uint32_t low_bits = (uint32_t)(h);
      62             :     // Note that !! is necessary here, since bitarray_is_set does not
      63             :     // necessarily return 1 on true.
      64       46088 :     matches += !! bitarray_is_set(set->ba, BIT(set, high_bits));
      65       46088 :     matches += !! bitarray_is_set(set->ba, BIT(set, low_bits));
      66             :   }
      67       23044 :   return matches == N_BITS_PER_ITEM;
      68             : }
      69             : 
      70             : /** Return a newly allocated bloomfilt_t, optimized to hold a total of
      71             :  * <b>max_elements</b> elements with a reasonably low false positive weight.
      72             :  *
      73             :  * Uses the siphash-based function <b>hashfn</b> to compute hard-to-collide
      74             :  * functions of the items, and the key material <b>random_key</b> to
      75             :  * key the hash.  There must be BLOOMFILT_KEY_LEN bytes in the supplied key.
      76             :  **/
      77             : bloomfilt_t *
      78         198 : bloomfilt_new(int max_elements,
      79             :               bloomfilt_hash_fn hashfn,
      80             :               const uint8_t *random_key)
      81             : {
      82             :   /* The probability of false positives is about P=(1 - exp(-kn/m))^k, where k
      83             :    * is the number of hash functions per entry, m is the bits in the array,
      84             :    * and n is the number of elements inserted.  For us, k==4, n<=max_elements,
      85             :    * and m==n_bits= approximately max_elements*32.  This gives
      86             :    *   P<(1-exp(-4*n/(32*n)))^4 == (1-exp(1/-8))^4 == .00019
      87             :    *
      88             :    * It would be more optimal in space vs false positives to get this false
      89             :    * positive rate by going for k==13, and m==18.5n, but we also want to
      90             :    * conserve CPU, and k==13 is pretty big.
      91             :    */
      92         198 :   int n_bits = 1u << (tor_log2(max_elements)+5);
      93         198 :   bloomfilt_t *r = tor_malloc(sizeof(bloomfilt_t));
      94         198 :   r->mask = n_bits - 1;
      95         198 :   r->ba = bitarray_init_zero(n_bits);
      96             : 
      97         198 :   tor_assert(sizeof(r->key) == BLOOMFILT_KEY_LEN);
      98         198 :   memcpy(r->key, random_key, sizeof(r->key));
      99             : 
     100         198 :   r->hashfn = hashfn;
     101             : 
     102         198 :   return r;
     103             : }
     104             : 
     105             : /** Free all storage held in <b>set</b>. */
     106             : void
     107         258 : bloomfilt_free_(bloomfilt_t *set)
     108             : {
     109         258 :   if (!set)
     110             :     return;
     111         187 :   bitarray_free(set->ba);
     112         187 :   tor_free(set);
     113             : }

Generated by: LCOV version 1.14