tor
0.4.2.1alphadev

#include "orconfig.h"
#include "lib/math/prob_distr.h"
#include "lib/crypt_ops/crypto_rand.h"
#include "lib/cc/ctassert.h"
#include "lib/log/util_bug.h"
#include <float.h>
#include <math.h>
#include <stddef.h>
Go to the source code of this file.
Macros  
#define  PROB_DISTR_PRIVATE 
#define  DECLARE_PROB_DISTR_DOWNCAST_FN(name) 
Functions  
static unsigned  bitcount32 (uint32_t x) 
static unsigned  clz32 (uint32_t x) 
STATIC double  logistic (double x) 
STATIC double  logit (double p) 
STATIC double  logithalf (double p0) 
CTASSERT (FLT_RADIX==2)  
CTASSERT (DBL_MIN_EXP<=1021)  
CTASSERT (DBL_MANT_DIG<=53)  
STATIC double  random_uniform_01 (void) 
STATIC double  cdf_logistic (double x, double mu, double sigma) 
STATIC double  sf_logistic (double x, double mu, double sigma) 
STATIC double  icdf_logistic (double p, double mu, double sigma) 
STATIC double  isf_logistic (double p, double mu, double sigma) 
STATIC double  cdf_log_logistic (double x, double alpha, double beta) 
STATIC double  sf_log_logistic (double x, double alpha, double beta) 
STATIC double  icdf_log_logistic (double p, double alpha, double beta) 
STATIC double  isf_log_logistic (double p, double alpha, double beta) 
STATIC double  cdf_weibull (double x, double lambda, double k) 
STATIC double  sf_weibull (double x, double lambda, double k) 
STATIC double  icdf_weibull (double p, double lambda, double k) 
STATIC double  isf_weibull (double p, double lambda, double k) 
STATIC double  cdf_genpareto (double x, double mu, double sigma, double xi) 
STATIC double  sf_genpareto (double x, double mu, double sigma, double xi) 
STATIC double  icdf_genpareto (double p, double mu, double sigma, double xi) 
STATIC double  isf_genpareto (double p, double mu, double sigma, double xi) 
STATIC double  sample_uniform_interval (double p0, double a, double b) 
STATIC double  sample_logistic (uint32_t s, double t, double p0) 
static double  sample_logistic_locscale (uint32_t s, double t, double p0, double mu, double sigma) 
STATIC double  sample_log_logistic (uint32_t s, double p0) 
static double  sample_log_logistic_scaleshape (uint32_t s, double p0, double alpha, double beta) 
static double  sample_exponential (uint32_t s, double p0) 
STATIC double  sample_weibull (uint32_t s, double p0, double lambda, double k) 
STATIC double  sample_genpareto (uint32_t s, double p0, double xi) 
static double  sample_genpareto_locscale (uint32_t s, double p0, double mu, double sigma, double xi) 
static double  sample_geometric (uint32_t s, double p0, double p) 
const char *  dist_name (const struct dist *dist) 
double  dist_sample (const struct dist *dist) 
double  dist_cdf (const struct dist *dist, double x) 
double  dist_sf (const struct dist *dist, double x) 
double  dist_icdf (const struct dist *dist, double p) 
double  dist_isf (const struct dist *dist, double p) 
static double  uniform_sample (const struct dist *dist) 
static double  uniform_cdf (const struct dist *dist, double x) 
static double  uniform_sf (const struct dist *dist, double x) 
static double  uniform_icdf (const struct dist *dist, double p) 
static double  uniform_isf (const struct dist *dist, double p) 
static double  logistic_sample (const struct dist *dist) 
static double  logistic_cdf (const struct dist *dist, double x) 
static double  logistic_sf (const struct dist *dist, double x) 
static double  logistic_icdf (const struct dist *dist, double p) 
static double  logistic_isf (const struct dist *dist, double p) 
static double  log_logistic_sample (const struct dist *dist) 
static double  log_logistic_cdf (const struct dist *dist, double x) 
static double  log_logistic_sf (const struct dist *dist, double x) 
static double  log_logistic_icdf (const struct dist *dist, double p) 
static double  log_logistic_isf (const struct dist *dist, double p) 
static double  weibull_sample (const struct dist *dist) 
static double  weibull_cdf (const struct dist *dist, double x) 
static double  weibull_sf (const struct dist *dist, double x) 
static double  weibull_icdf (const struct dist *dist, double p) 
static double  weibull_isf (const struct dist *dist, double p) 
static double  genpareto_sample (const struct dist *dist) 
static double  genpareto_cdf (const struct dist *dist, double x) 
static double  genpareto_sf (const struct dist *dist, double x) 
static double  genpareto_icdf (const struct dist *dist, double p) 
static double  genpareto_isf (const struct dist *dist, double p) 
static double  geometric_sample (const struct dist *dist) 
static double  geometric_cdf (const struct dist *dist, double x) 
static double  geometric_sf (const struct dist *dist, double x) 
static double  geometric_icdf (const struct dist *dist, double p) 
static double  geometric_isf (const struct dist *dist, double p) 
Variables  
const struct dist_ops  uniform_ops 
const struct dist_ops  logistic_ops 
const struct dist_ops  log_logistic_ops 
const struct dist_ops  weibull_ops 
const struct dist_ops  genpareto_ops 
const struct dist_ops  geometric_ops 
Implements various probability distributions. Almost all code is courtesy of Riastradh.
Here are some details that might help you understand this file:
Throughout this file, ‘eps’ means the largest relative error of a correctly rounded floatingpoint operation, which in binary64 floatingpoint arithmetic is 2^53. Here the relative error of a true value x from a computed value y is x  y/x. This definition of epsilon is conventional for numerical analysts when writing error analyses. (If your libm doesn't provide correctly rounded exp and log, their relative error is usually below 2*2^53 and probably closer to 1.1*2^53 instead.)
The C constant DBL_EPSILON is actually twice this, and should perhaps rather be named ulp(1) – that is, it is the distance from 1 to the next greater floatingpoint number, which is usually of more interest to programmers and hardware engineers.
Since this file is concerned mainly with error bounds rather than with lowlevel bithacking of floatingpoint numbers, we adopt the numerical analysts' definition in the comments, though we do use DBL_EPSILON in a handful of places where it is convenient to use some function of eps = DBL_EPSILON/2 in a case analysis.
Definition in file prob_distr.c.
#define DECLARE_PROB_DISTR_DOWNCAST_FN  (  name  ) 
Declare a function that downcasts from a generic dist struct to the actual subtype probablity distribution it represents.
Definition at line 57 of file prob_distr.c.

static 
Count number of one bits in 32bit word.
Definition at line 75 of file prob_distr.c.
Referenced by clz32().
STATIC double cdf_genpareto  (  double  x, 
double  mu,  
double  sigma,  
double  xi  
) 
Compute the CDF of the GeneralizedPareto(mu, sigma, xi) distribution. Wellconditioned everywhere. For standard distribution (mu=0, sigma=1), condition number
(x/(1 + x xi)) / ((1 + x xi)^{1/xi}  1)
is bounded by 1, attained only at x = 0.
Definition at line 790 of file prob_distr.c.
STATIC double cdf_log_logistic  (  double  x, 
double  alpha,  
double  beta  
) 
Compute the CDF of the LogLogistic(alpha, beta) distribution. Wellconditioned for all x and alpha, and the condition number
beta/[1 + (x/alpha)^{beta}]
grows linearly with beta.
Loosely, the relative error of this implementation is bounded by
4 eps + 2 eps^2 + O(beta eps),
so don't bother trying this for beta anywhere near as large as 1/eps, around which point it levels off at 1.
Definition at line 596 of file prob_distr.c.
STATIC double cdf_logistic  (  double  x, 
double  mu,  
double  sigma  
) 
Compute the CDF of the Logistic(mu, sigma) distribution: the logistic function. Wellconditioned for negative inputs and small positive inputs; illconditioned for large positive inputs.
Definition at line 509 of file prob_distr.c.
References logistic().
STATIC double cdf_weibull  (  double  x, 
double  lambda,  
double  k  
) 
Compute the CDF of the Weibull(lambda, k) distribution. Wellconditioned for small x and k, and for large lambda – condition number
k (x/lambda)^k exp((x/lambda)^k)/[exp((x/lambda)^k)  1]
grows linearly with k, x^k, and lambda^{k}.
Definition at line 709 of file prob_distr.c.

static 
Count leading zeros in 32bit word.
Definition at line 95 of file prob_distr.c.
References bitcount32().
Referenced by random_uniform_01().
double dist_cdf  (  const struct dist *  dist, 
double  x  
) 
Compute the CDF of dist at x.
Definition at line 1341 of file prob_distr.c.
double dist_icdf  (  const struct dist *  dist, 
double  p  
) 
Compute the iCDF (Inverse CDF) of dist at x.
Definition at line 1355 of file prob_distr.c.
double dist_isf  (  const struct dist *  dist, 
double  p  
) 
Compute the iSF (Inverse Survival function) of dist at x.
Definition at line 1362 of file prob_distr.c.
const char* dist_name  (  const struct dist *  dist  ) 
Public API for probability distributions:
These are wrapper functions on top of the various probability distribution operations using the generic dist structure.
These are the functions that should be used by consumers of this API.Returns the name of the distribution in dist.
Definition at line 1327 of file prob_distr.c.
double dist_sf  (  const struct dist *  dist, 
double  x  
) 
Compute the SF (Survival function) of dist at x.
Definition at line 1348 of file prob_distr.c.

static 
Functions for generalized Pareto distributions
Definition at line 1585 of file prob_distr.c.
References crypto_fast_rng_get_u32(), get_thread_fast_rng(), random_uniform_01(), and sample_genpareto_locscale().

static 
Functions for geometric distribution on number of trials before success
Definition at line 1634 of file prob_distr.c.
References crypto_fast_rng_get_u32(), get_thread_fast_rng(), random_uniform_01(), and sample_geometric().
STATIC double icdf_genpareto  (  double  p, 
double  mu,  
double  sigma,  
double  xi  
) 
Compute the inverse of the CDF of the GeneralizedPareto(mu, sigma, xi) distribution. Illconditioned for p near 1; condition number is
xi (p/(1  p))/(1  (1  p)^xi)
Definition at line 851 of file prob_distr.c.
STATIC double icdf_log_logistic  (  double  p, 
double  alpha,  
double  beta  
) 
Compute the inverse of the CDF of the LogLogistic(alpha, beta) distribution. Illconditioned for p near 1 and beta near 0 with condition number 1/[beta (1  p)].
Definition at line 673 of file prob_distr.c.
STATIC double icdf_logistic  (  double  p, 
double  mu,  
double  sigma  
) 
Compute the inverse of the CDF of the Logistic(mu, sigma) distribution: the logit function. Wellconditioned near 0; illconditioned near 1/2 and 1.
Definition at line 532 of file prob_distr.c.
References logit().
STATIC double icdf_weibull  (  double  p, 
double  lambda,  
double  k  
) 
Compute the inverse of the CDF of the Weibull(lambda, k) distribution. Illconditioned for p near 1, and for k near 0; condition number is
(p/(1  p))/(k log(1  p)).
Definition at line 737 of file prob_distr.c.
STATIC double isf_genpareto  (  double  p, 
double  mu,  
double  sigma,  
double  xi  
) 
Compute the inverse of the SF of the GeneralizedPareto(mu, sigma, xi) distribution. Illconditioned for p near 1; conditon number is
xi/(1  p^{xi})
Definition at line 897 of file prob_distr.c.
STATIC double isf_log_logistic  (  double  p, 
double  alpha,  
double  beta  
) 
Compute the inverse of the SF of the LogLogistic(alpha, beta) distribution. Illconditioned for p near 1 and for large beta, with condition number 1/[beta (1  p)].
Definition at line 684 of file prob_distr.c.
STATIC double isf_logistic  (  double  p, 
double  mu,  
double  sigma  
) 
Compute the inverse of the SF of the Logistic(mu, sigma) distribution: the logit function. Wellconditioned near 0; illconditioned near 1/2 and 1.
Definition at line 543 of file prob_distr.c.
References logit().
STATIC double isf_weibull  (  double  p, 
double  lambda,  
double  k  
) 
Compute the inverse of the SF of the Weibull(lambda, k) distribution. Illconditioned for p near 0, and for k near 0; condition number is
1/(k log(p)).
Definition at line 750 of file prob_distr.c.

static 
Functions for loglogistic distribution:
Definition at line 1487 of file prob_distr.c.
References crypto_fast_rng_get_u32(), get_thread_fast_rng(), random_uniform_01(), and sample_log_logistic_scaleshape().
STATIC double logistic  (  double  x  ) 
Compute the logistic function: f(x) = 1/(1 + e^{x}) = e^x/(1 + e^x). Maps a logoddsspace probability in [\infty, +\infty] into a directspace probability in [0,1]. Inverse of logit.
Illconditioned for large x; the identity logistic(x) = 1  logistic(x) and the function logistichalf(x) = logistic(x)  1/2 may help to rearrange a computation.
This implementation gives relative error bounded by 7 eps.
Definition at line 191 of file prob_distr.c.
Referenced by cdf_logistic(), and sf_logistic().

static 
Private functions for each probability distribution. Functions for logistic distribution:
Definition at line 1437 of file prob_distr.c.
References crypto_fast_rng_get_u32(), get_thread_fast_rng(), random_uniform_01(), and sample_logistic_locscale().
STATIC double logit  (  double  p  ) 
Compute the logit function: log p/(1  p). Defined on [0,1]. Maps a directspace probability in [0,1] to a logoddsspace probability in [\infty, +\infty]. Inverse of logistic.
Illconditioned near 1/2 and 1; the identity logit(1  p) = logit(p) and the function logithalf(p0) = logit(1/2 + p0) may help to rearrange a computation for p in [1/(1 + e), 1  1/(1 + e)].
This implementation gives relative error bounded by 10 eps.
Definition at line 278 of file prob_distr.c.
Referenced by icdf_logistic(), isf_logistic(), and sample_logistic().
STATIC double logithalf  (  double  p0  ) 
Compute the logit function, translated in input by 1/2: logithalf(p) = logit(1/2 + p). Defined on [1/2, 1/2]. Inverse of logistichalf.
Illconditioned near +/1/2. If p0 > 1/2  1/(1 + e), it may be better to compute 1/2 + p0 or 1/2  p0 and to use logit instead. This implementation gives relative error bounded by 34 eps.
Definition at line 358 of file prob_distr.c.
Referenced by sample_logistic().
STATIC double random_uniform_01  (  void  ) 
Draw a floatingpoint number in [0, 1] with uniform distribution.
Note that the probability of returning 0 is less than 2^1074, so callers need not check for it. However, callers that cannot handle rounding to 1 must deal with that, because it occurs with probability 2^54, which is small but nonnegligible.
Definition at line 451 of file prob_distr.c.
References clz32(), crypto_fast_rng_get_u32(), and get_thread_fast_rng().
Referenced by genpareto_sample(), geometric_sample(), log_logistic_sample(), logistic_sample(), uniform_sample(), and weibull_sample().

static 
Deterministically sample from the standard exponential distribution, indexed by a uniform random 32bit integer s and a uniform random floatingpoint number p0 in (0, 1].
Definition at line 1194 of file prob_distr.c.
Referenced by sample_genpareto(), and sample_geometric().
STATIC double sample_genpareto  (  uint32_t  s, 
double  p0,  
double  xi  
) 
Deterministically sample from the generalized Pareto distribution with shape xi, indexed by a uniform random 32bit integer s and a uniform random floatingpoint number p0 in (0, 1].
Definition at line 1229 of file prob_distr.c.
References sample_exponential().
Referenced by sample_genpareto_locscale().

static 
Deterministically sample from a generalized Pareto distribution with shape xi, scaled by sigma and translated by mu.
Definition at line 1275 of file prob_distr.c.
References sample_genpareto().
Referenced by genpareto_sample().

static 
Deterministically sample from the geometric distribution with pertrial success probability p.
XXX Quantify the error (KL divergence?) of this ceilingofexponential sampler from a true geometric distribution, which we could get by rejection sampling. Relevant papers:
John F. Monahan, `Accuracy in Random Number Generation', Mathematics of Computation 45(172), October 1984, pp. 559568.
https://pdfs.semanticscholar.org/aca6/74b96da1df77b2224e8cfc5dd6d61a471632.pdf
Karl Bringmann and Tobias Friedrich, `Exact and Efficient Generation of Geometric Random Variates and Random Graphs', in Proceedings of the 40th International Colloaquium on Automata, Languages, and Programming  ICALP 2013, Springer LNCS 7965, pp.267278. https://doi.org/10.1007/9783642392061_23 https://people.mpiinf.mpg.de/~kbringma/paper/2013ICALP1.pdf
Definition at line 1303 of file prob_distr.c.
References sample_exponential().
Referenced by geometric_sample().
STATIC double sample_log_logistic  (  uint32_t  s, 
double  p0  
) 
Deterministically sample from the standard loglogistic distribution, indexed by a uniform random 32bit integer s and a uniform random floatingpoint number p0 in (0, 1].
Definition at line 1158 of file prob_distr.c.
Referenced by sample_log_logistic_scaleshape().

static 
Deterministically sample from the loglogistic distribution with scale alpha and shape beta.
Definition at line 1180 of file prob_distr.c.
References sample_log_logistic().
Referenced by log_logistic_sample().
STATIC double sample_logistic  (  uint32_t  s, 
double  t,  
double  p0  
) 
Deterministically sample from the standard logistic distribution, indexed by a uniform random 32bit integer s and uniform random floatingpoint numbers t and p0 in (0, 1].
Definition at line 1092 of file prob_distr.c.
References logit(), and logithalf().
Referenced by sample_logistic_locscale().

static 
Deterministically sample from the logistic distribution scaled by sigma and translated by mu.
Definition at line 1145 of file prob_distr.c.
References sample_logistic().
Referenced by logistic_sample().
STATIC double sample_uniform_interval  (  double  p0, 
double  a,  
double  b  
) 
Deterministic samplers, parametrized by uniform integer and (0,1] samples. No guarantees are made about which mapping from the integer and (0,1] samples these use; all that is guaranteed is the distribution of the outputs conditioned on a uniform distribution on the inputs. The automatic tests in test_prob_distr.c doublecheck the particular mappings we use.
Beware: Unlike random_uniform_01(), these are not guaranteed to be supported on all possible outputs. See Ilya Mironov, `On the Significance of the Least Significant Bits for Differential Privacy', for an example of what can go wrong if you try to use these to conceal information from an adversary but you expose the specific fullprecision floatingpoint values.
Note: None of these samplers use rejection sampling; they are all essentially inverseCDF transforms with tweaks. If you were to add, say, a Gamma sampler with the MarsagliaTsang method, you would have to parametrize it by a potentially infinite stream of uniform (and perhaps normal) samples rather than a fixed number, which doesn't make for quite as nice automatic testing as for these.Deterministically sample from the interval [a, b], indexed by a uniform random floatingpoint number p0 in (0, 1].
Note that even if p0 is nonzero, the result may be equal to a, if ulp(a)/2 is nonnegligible, e.g. if a = 1. For maximum resolution, arrange a <= b.
Definition at line 939 of file prob_distr.c.
Referenced by uniform_sample().
STATIC double sample_weibull  (  uint32_t  s, 
double  p0,  
double  lambda,  
double  k  
) 
Deterministically sample from a Weibull distribution with scale lambda and shape k – just an exponential with a shape parameter in addition to a scale parameter. (Yes, lambda really is the scale, not the rate.)
Definition at line 1217 of file prob_distr.c.
Referenced by weibull_sample().
STATIC double sf_genpareto  (  double  x, 
double  mu,  
double  sigma,  
double  xi  
) 
Compute the SF of the GeneralizedPareto(mu, sigma, xi) distribution. For standard distribution (mu=0, sigma=1), illconditioned for xi near 0; condition number
x (1 + x xi)^{(1  xi)/xi}/(1 + x xi)^{1/xi} = x (1 + x xi)^{1/xi  1}/(1 + x xi)^{1/xi} = (x/(1 + x xi)) (1 + x xi)^{1/xi}/(1 + x xi)^{1/xi} = x/(1 + x xi)
is bounded by 1/xi.
Definition at line 834 of file prob_distr.c.
STATIC double sf_log_logistic  (  double  x, 
double  alpha,  
double  beta  
) 
Compute the SF of the LogLogistic(alpha, beta) distribution. Wellconditioned for all x and alpha, and the condition number
beta/[1 + (x/alpha)^beta]
grows linearly with beta.
Loosely, the relative error of this implementation is bounded by
4 eps + 2 eps^2 + O(beta eps)
so don't bother trying this for beta anywhere near as large as 1/eps, beyond which point it grows unbounded.
Definition at line 657 of file prob_distr.c.
STATIC double sf_logistic  (  double  x, 
double  mu,  
double  sigma  
) 
Compute the SF of the Logistic(mu, sigma) distribution: the logistic function reflected over the y axis. Wellconditioned for positive inputs and small negative inputs; illconditioned for large negative inputs.
Definition at line 521 of file prob_distr.c.
References logistic().
STATIC double sf_weibull  (  double  x, 
double  lambda,  
double  k  
) 
Compute the SF of the Weibull(lambda, k) distribution. Wellconditioned for small x and k, and for large lambda – condition number
k (x/lambda)^k
grows linearly with k, x^k, and lambda^{k}.
Definition at line 724 of file prob_distr.c.

static 
Functions for uniform distribution
Definition at line 1370 of file prob_distr.c.
References random_uniform_01(), and sample_uniform_interval().

static 
Functions for Weibull distribution
Definition at line 1536 of file prob_distr.c.
References crypto_fast_rng_get_u32(), get_thread_fast_rng(), random_uniform_01(), and sample_weibull().
const struct dist_ops genpareto_ops 
Definition at line 1622 of file prob_distr.c.
const struct dist_ops geometric_ops 
Definition at line 1681 of file prob_distr.c.
const struct dist_ops log_logistic_ops 
Definition at line 1524 of file prob_distr.c.
const struct dist_ops logistic_ops 
Definition at line 1475 of file prob_distr.c.
const struct dist_ops uniform_ops 
Definition at line 1421 of file prob_distr.c.
const struct dist_ops weibull_ops 
Definition at line 1573 of file prob_distr.c.