Tor
0.4.4.0alphadev

Implements various probability distributions. Almost all code is courtesy of Riastradh. More...
#include "orconfig.h"
#include "lib/math/prob_distr.h"
#include "lib/crypt_ops/crypto_rand.h"
#include "lib/cc/ctassert.h"
#include "lib/log/util_bug.h"
#include <float.h>
#include <math.h>
#include <stddef.h>
Go to the source code of this file.
Macros  
#define  PROB_DISTR_PRIVATE 
#define  DECLARE_PROB_DISTR_DOWNCAST_FN(name) 
Functions  
static unsigned  bitcount32 (uint32_t x) 
static unsigned  clz32 (uint32_t x) 
STATIC double  logistic (double x) 
STATIC double  logit (double p) 
STATIC double  logithalf (double p0) 
CTASSERT (FLT_RADIX==2)  
CTASSERT (DBL_MIN_EXP<=1021)  
CTASSERT (DBL_MANT_DIG<=53)  
STATIC double  random_uniform_01 (void) 
STATIC double  cdf_logistic (double x, double mu, double sigma) 
STATIC double  sf_logistic (double x, double mu, double sigma) 
STATIC double  icdf_logistic (double p, double mu, double sigma) 
STATIC double  isf_logistic (double p, double mu, double sigma) 
STATIC double  cdf_log_logistic (double x, double alpha, double beta) 
STATIC double  sf_log_logistic (double x, double alpha, double beta) 
STATIC double  icdf_log_logistic (double p, double alpha, double beta) 
STATIC double  isf_log_logistic (double p, double alpha, double beta) 
STATIC double  cdf_weibull (double x, double lambda, double k) 
STATIC double  sf_weibull (double x, double lambda, double k) 
STATIC double  icdf_weibull (double p, double lambda, double k) 
STATIC double  isf_weibull (double p, double lambda, double k) 
STATIC double  cdf_genpareto (double x, double mu, double sigma, double xi) 
STATIC double  sf_genpareto (double x, double mu, double sigma, double xi) 
STATIC double  icdf_genpareto (double p, double mu, double sigma, double xi) 
STATIC double  isf_genpareto (double p, double mu, double sigma, double xi) 
STATIC double  sample_uniform_interval (double p0, double a, double b) 
STATIC double  sample_logistic (uint32_t s, double t, double p0) 
static double  sample_logistic_locscale (uint32_t s, double t, double p0, double mu, double sigma) 
STATIC double  sample_log_logistic (uint32_t s, double p0) 
static double  sample_log_logistic_scaleshape (uint32_t s, double p0, double alpha, double beta) 
static double  sample_exponential (uint32_t s, double p0) 
STATIC double  sample_weibull (uint32_t s, double p0, double lambda, double k) 
STATIC double  sample_genpareto (uint32_t s, double p0, double xi) 
static double  sample_genpareto_locscale (uint32_t s, double p0, double mu, double sigma, double xi) 
static double  sample_geometric (uint32_t s, double p0, double p) 
const char *  dist_name (const struct dist_t *dist) 
double  dist_sample (const struct dist_t *dist) 
double  dist_cdf (const struct dist_t *dist, double x) 
double  dist_sf (const struct dist_t *dist, double x) 
double  dist_icdf (const struct dist_t *dist, double p) 
double  dist_isf (const struct dist_t *dist, double p) 
static double  uniform_sample (const struct dist_t *dist) 
static double  uniform_cdf (const struct dist_t *dist, double x) 
static double  uniform_sf (const struct dist_t *dist, double x) 
static double  uniform_icdf (const struct dist_t *dist, double p) 
static double  uniform_isf (const struct dist_t *dist, double p) 
static double  logistic_sample (const struct dist_t *dist) 
static double  logistic_cdf (const struct dist_t *dist, double x) 
static double  logistic_sf (const struct dist_t *dist, double x) 
static double  logistic_icdf (const struct dist_t *dist, double p) 
static double  logistic_isf (const struct dist_t *dist, double p) 
static double  log_logistic_sample (const struct dist_t *dist) 
static double  log_logistic_cdf (const struct dist_t *dist, double x) 
static double  log_logistic_sf (const struct dist_t *dist, double x) 
static double  log_logistic_icdf (const struct dist_t *dist, double p) 
static double  log_logistic_isf (const struct dist_t *dist, double p) 
static double  weibull_sample (const struct dist_t *dist) 
static double  weibull_cdf (const struct dist_t *dist, double x) 
static double  weibull_sf (const struct dist_t *dist, double x) 
static double  weibull_icdf (const struct dist_t *dist, double p) 
static double  weibull_isf (const struct dist_t *dist, double p) 
static double  genpareto_sample (const struct dist_t *dist) 
static double  genpareto_cdf (const struct dist_t *dist, double x) 
static double  genpareto_sf (const struct dist_t *dist, double x) 
static double  genpareto_icdf (const struct dist_t *dist, double p) 
static double  genpareto_isf (const struct dist_t *dist, double p) 
static double  geometric_sample (const struct dist_t *dist) 
static double  geometric_cdf (const struct dist_t *dist, double x) 
static double  geometric_sf (const struct dist_t *dist, double x) 
static double  geometric_icdf (const struct dist_t *dist, double p) 
static double  geometric_isf (const struct dist_t *dist, double p) 
Variables  
const struct dist_ops_t  uniform_ops 
const struct dist_ops_t  logistic_ops 
const struct dist_ops_t  log_logistic_ops 
const struct dist_ops_t  weibull_ops 
const struct dist_ops_t  genpareto_ops 
const struct dist_ops_t  geometric_ops 
Implements various probability distributions. Almost all code is courtesy of Riastradh.
Here are some details that might help you understand this file:
Throughout this file, ‘eps’ means the largest relative error of a correctly rounded floatingpoint operation, which in binary64 floatingpoint arithmetic is 2^53. Here the relative error of a true value x from a computed value y is x  y/x. This definition of epsilon is conventional for numerical analysts when writing error analyses. (If your libm doesn't provide correctly rounded exp and log, their relative error is usually below 2*2^53 and probably closer to 1.1*2^53 instead.)
The C constant DBL_EPSILON is actually twice this, and should perhaps rather be named ulp(1) – that is, it is the distance from 1 to the next greater floatingpoint number, which is usually of more interest to programmers and hardware engineers.
Since this file is concerned mainly with error bounds rather than with lowlevel bithacking of floatingpoint numbers, we adopt the numerical analysts' definition in the comments, though we do use DBL_EPSILON in a handful of places where it is convenient to use some function of eps = DBL_EPSILON/2 in a case analysis.
Definition in file prob_distr.c.
#define DECLARE_PROB_DISTR_DOWNCAST_FN  (  name  ) 
Declare a function that downcasts from a generic dist struct to the actual subtype probablity distribution it represents.
Definition at line 58 of file prob_distr.c.

static 
Count number of one bits in 32bit word.
Definition at line 77 of file prob_distr.c.
Referenced by clz32().
STATIC double cdf_genpareto  (  double  x, 
double  mu,  
double  sigma,  
double  xi  
) 
Compute the CDF of the GeneralizedPareto(mu, sigma, xi) distribution. Wellconditioned everywhere. For standard distribution (mu=0, sigma=1), condition number
(x/(1 + x xi)) / ((1 + x xi)^{1/xi}  1)
is bounded by 1, attained only at x = 0.
Definition at line 792 of file prob_distr.c.
STATIC double cdf_log_logistic  (  double  x, 
double  alpha,  
double  beta  
) 
Compute the CDF of the LogLogistic(alpha, beta) distribution. Wellconditioned for all x and alpha, and the condition number
beta/[1 + (x/alpha)^{beta}]
grows linearly with beta.
Loosely, the relative error of this implementation is bounded by
4 eps + 2 eps^2 + O(beta eps),
so don't bother trying this for beta anywhere near as large as 1/eps, around which point it levels off at 1.
Definition at line 598 of file prob_distr.c.
STATIC double cdf_logistic  (  double  x, 
double  mu,  
double  sigma  
) 
Compute the CDF of the Logistic(mu, sigma) distribution: the logistic function. Wellconditioned for negative inputs and small positive inputs; illconditioned for large positive inputs.
Definition at line 511 of file prob_distr.c.
STATIC double cdf_weibull  (  double  x, 
double  lambda,  
double  k  
) 
Compute the CDF of the Weibull(lambda, k) distribution. Wellconditioned for small x and k, and for large lambda – condition number
k (x/lambda)^k exp((x/lambda)^k)/[exp((x/lambda)^k)  1]
grows linearly with k, x^k, and lambda^{k}.
Definition at line 711 of file prob_distr.c.

static 
Count leading zeros in 32bit word.
Definition at line 97 of file prob_distr.c.
Referenced by random_uniform_01().
double dist_cdf  (  const struct dist_t *  dist, 
double  x  
) 
Compute the CDF of dist at x.
Definition at line 1343 of file prob_distr.c.
double dist_icdf  (  const struct dist_t *  dist, 
double  p  
) 
Compute the iCDF (Inverse CDF) of dist at x.
Definition at line 1357 of file prob_distr.c.
double dist_isf  (  const struct dist_t *  dist, 
double  p  
) 
Compute the iSF (Inverse Survival function) of dist at x.
Definition at line 1364 of file prob_distr.c.
const char* dist_name  (  const struct dist_t *  dist  ) 
Public API for probability distributions:
These are wrapper functions on top of the various probability distribution operations using the generic dist structure.
These are the functions that should be used by consumers of this API.Returns the name of the distribution in dist.
Definition at line 1329 of file prob_distr.c.
double dist_sf  (  const struct dist_t *  dist, 
double  x  
) 
Compute the SF (Survival function) of dist at x.
Definition at line 1350 of file prob_distr.c.

static 
Functions for generalized Pareto distributions
Definition at line 1587 of file prob_distr.c.

static 
Functions for geometric distribution on number of trials before success
Definition at line 1636 of file prob_distr.c.
STATIC double icdf_genpareto  (  double  p, 
double  mu,  
double  sigma,  
double  xi  
) 
Compute the inverse of the CDF of the GeneralizedPareto(mu, sigma, xi) distribution. Illconditioned for p near 1; condition number is
xi (p/(1  p))/(1  (1  p)^xi)
Definition at line 853 of file prob_distr.c.
STATIC double icdf_log_logistic  (  double  p, 
double  alpha,  
double  beta  
) 
Compute the inverse of the CDF of the LogLogistic(alpha, beta) distribution. Illconditioned for p near 1 and beta near 0 with condition number 1/[beta (1  p)].
Definition at line 675 of file prob_distr.c.
STATIC double icdf_logistic  (  double  p, 
double  mu,  
double  sigma  
) 
Compute the inverse of the CDF of the Logistic(mu, sigma) distribution: the logit function. Wellconditioned near 0; illconditioned near 1/2 and 1.
Definition at line 534 of file prob_distr.c.
STATIC double icdf_weibull  (  double  p, 
double  lambda,  
double  k  
) 
Compute the inverse of the CDF of the Weibull(lambda, k) distribution. Illconditioned for p near 1, and for k near 0; condition number is
(p/(1  p))/(k log(1  p)).
Definition at line 739 of file prob_distr.c.
STATIC double isf_genpareto  (  double  p, 
double  mu,  
double  sigma,  
double  xi  
) 
Compute the inverse of the SF of the GeneralizedPareto(mu, sigma, xi) distribution. Illconditioned for p near 1; conditon number is
xi/(1  p^{xi})
Definition at line 899 of file prob_distr.c.
STATIC double isf_log_logistic  (  double  p, 
double  alpha,  
double  beta  
) 
Compute the inverse of the SF of the LogLogistic(alpha, beta) distribution. Illconditioned for p near 1 and for large beta, with condition number 1/[beta (1  p)].
Definition at line 686 of file prob_distr.c.
STATIC double isf_logistic  (  double  p, 
double  mu,  
double  sigma  
) 
Compute the inverse of the SF of the Logistic(mu, sigma) distribution: the logit function. Wellconditioned near 0; illconditioned near 1/2 and 1.
Definition at line 545 of file prob_distr.c.
STATIC double isf_weibull  (  double  p, 
double  lambda,  
double  k  
) 
Compute the inverse of the SF of the Weibull(lambda, k) distribution. Illconditioned for p near 0, and for k near 0; condition number is
1/(k log(p)).
Definition at line 752 of file prob_distr.c.

static 
Functions for loglogistic distribution:
Definition at line 1489 of file prob_distr.c.
STATIC double logistic  (  double  x  ) 
Compute the logistic function: f(x) = 1/(1 + e^{x}) = e^x/(1 + e^x). Maps a logoddsspace probability in [infinity, +infinity] into a directspace probability in [0,1]. Inverse of logit.
Illconditioned for large x; the identity logistic(x) = 1  logistic(x) and the function logistichalf(x) = logistic(x)  1/2 may help to rearrange a computation.
This implementation gives relative error bounded by 7 eps.
Definition at line 193 of file prob_distr.c.
Referenced by cdf_logistic(), and sf_logistic().

static 
Private functions for each probability distribution. Functions for logistic distribution:
Definition at line 1439 of file prob_distr.c.
STATIC double logit  (  double  p  ) 
Compute the logit function: log p/(1  p). Defined on [0,1]. Maps a directspace probability in [0,1] to a logoddsspace probability in [infinity, +infinity]. Inverse of logistic.
Illconditioned near 1/2 and 1; the identity logit(1  p) = logit(p) and the function logithalf(p0) = logit(1/2 + p0) may help to rearrange a computation for p in [1/(1 + e), 1  1/(1 + e)].
This implementation gives relative error bounded by 10 eps.
Definition at line 280 of file prob_distr.c.
Referenced by icdf_logistic(), isf_logistic(), and sample_logistic().
STATIC double logithalf  (  double  p0  ) 
Compute the logit function, translated in input by 1/2: logithalf(p) = logit(1/2 + p). Defined on [1/2, 1/2]. Inverse of logistichalf.
Illconditioned near +/1/2. If p0 > 1/2  1/(1 + e), it may be better to compute 1/2 + p0 or 1/2  p0 and to use logit instead. This implementation gives relative error bounded by 34 eps.
Definition at line 360 of file prob_distr.c.
Referenced by sample_logistic().
STATIC double random_uniform_01  (  void  ) 
Draw a floatingpoint number in [0, 1] with uniform distribution.
Note that the probability of returning 0 is less than 2^1074, so callers need not check for it. However, callers that cannot handle rounding to 1 must deal with that, because it occurs with probability 2^54, which is small but nonnegligible.
Definition at line 453 of file prob_distr.c.
Referenced by genpareto_sample(), geometric_sample(), log_logistic_sample(), logistic_sample(), uniform_sample(), and weibull_sample().

static 
Deterministically sample from the standard exponential distribution, indexed by a uniform random 32bit integer s and a uniform random floatingpoint number p0 in (0, 1].
Definition at line 1196 of file prob_distr.c.
Referenced by sample_genpareto(), and sample_geometric().
STATIC double sample_genpareto  (  uint32_t  s, 
double  p0,  
double  xi  
) 
Deterministically sample from the generalized Pareto distribution with shape xi, indexed by a uniform random 32bit integer s and a uniform random floatingpoint number p0 in (0, 1].
Definition at line 1231 of file prob_distr.c.
Referenced by sample_genpareto_locscale().

static 
Deterministically sample from a generalized Pareto distribution with shape xi, scaled by sigma and translated by mu.
Definition at line 1277 of file prob_distr.c.
Referenced by genpareto_sample().

static 
Deterministically sample from the geometric distribution with pertrial success probability p.
XXX Quantify the error (KL divergence?) of this ceilingofexponential sampler from a true geometric distribution, which we could get by rejection sampling. Relevant papers:
John F. Monahan, `Accuracy in Random Number Generation', Mathematics of Computation 45(172), October 1984, pp. 559568.
https://pdfs.semanticscholar.org/aca6/74b96da1df77b2224e8cfc5dd6d61a471632.pdf
Karl Bringmann and Tobias Friedrich, `Exact and Efficient Generation of Geometric Random Variates and Random Graphs', in Proceedings of the 40th International Colloaquium on Automata, Languages, and Programming  ICALP 2013, Springer LNCS 7965, pp.267278. https://doi.org/10.1007/9783642392061_23 https://people.mpiinf.mpg.de/~kbringma/paper/2013ICALP1.pdf
Definition at line 1305 of file prob_distr.c.
Referenced by geometric_sample().
STATIC double sample_log_logistic  (  uint32_t  s, 
double  p0  
) 
Deterministically sample from the standard loglogistic distribution, indexed by a uniform random 32bit integer s and a uniform random floatingpoint number p0 in (0, 1].
Definition at line 1160 of file prob_distr.c.
Referenced by sample_log_logistic_scaleshape().

static 
Deterministically sample from the loglogistic distribution with scale alpha and shape beta.
Definition at line 1182 of file prob_distr.c.
Referenced by log_logistic_sample().
STATIC double sample_logistic  (  uint32_t  s, 
double  t,  
double  p0  
) 
Deterministically sample from the standard logistic distribution, indexed by a uniform random 32bit integer s and uniform random floatingpoint numbers t and p0 in (0, 1].
Definition at line 1094 of file prob_distr.c.
Referenced by sample_logistic_locscale().

static 
Deterministically sample from the logistic distribution scaled by sigma and translated by mu.
Definition at line 1147 of file prob_distr.c.
Referenced by logistic_sample().
STATIC double sample_uniform_interval  (  double  p0, 
double  a,  
double  b  
) 
Deterministic samplers, parametrized by uniform integer and (0,1] samples. No guarantees are made about which mapping from the integer and (0,1] samples these use; all that is guaranteed is the distribution of the outputs conditioned on a uniform distribution on the inputs. The automatic tests in test_prob_distr.c doublecheck the particular mappings we use.
Beware: Unlike random_uniform_01(), these are not guaranteed to be supported on all possible outputs. See Ilya Mironov, `On the Significance of the Least Significant Bits for Differential Privacy', for an example of what can go wrong if you try to use these to conceal information from an adversary but you expose the specific fullprecision floatingpoint values.
Note: None of these samplers use rejection sampling; they are all essentially inverseCDF transforms with tweaks. If you were to add, say, a Gamma sampler with the MarsagliaTsang method, you would have to parametrize it by a potentially infinite stream of uniform (and perhaps normal) samples rather than a fixed number, which doesn't make for quite as nice automatic testing as for these.Deterministically sample from the interval [a, b], indexed by a uniform random floatingpoint number p0 in (0, 1].
Note that even if p0 is nonzero, the result may be equal to a, if ulp(a)/2 is nonnegligible, e.g. if a = 1. For maximum resolution, arrange a <= b.
Definition at line 941 of file prob_distr.c.
Referenced by uniform_sample().
STATIC double sample_weibull  (  uint32_t  s, 
double  p0,  
double  lambda,  
double  k  
) 
Deterministically sample from a Weibull distribution with scale lambda and shape k – just an exponential with a shape parameter in addition to a scale parameter. (Yes, lambda really is the scale, not the rate.)
Definition at line 1219 of file prob_distr.c.
Referenced by weibull_sample().
STATIC double sf_genpareto  (  double  x, 
double  mu,  
double  sigma,  
double  xi  
) 
Compute the SF of the GeneralizedPareto(mu, sigma, xi) distribution. For standard distribution (mu=0, sigma=1), illconditioned for xi near 0; condition number
x (1 + x xi)^{(1  xi)/xi}/(1 + x xi)^{1/xi} = x (1 + x xi)^{1/xi  1}/(1 + x xi)^{1/xi} = (x/(1 + x xi)) (1 + x xi)^{1/xi}/(1 + x xi)^{1/xi} = x/(1 + x xi)
is bounded by 1/xi.
Definition at line 836 of file prob_distr.c.
STATIC double sf_log_logistic  (  double  x, 
double  alpha,  
double  beta  
) 
Compute the SF of the LogLogistic(alpha, beta) distribution. Wellconditioned for all x and alpha, and the condition number
beta/[1 + (x/alpha)^beta]
grows linearly with beta.
Loosely, the relative error of this implementation is bounded by
4 eps + 2 eps^2 + O(beta eps)
so don't bother trying this for beta anywhere near as large as 1/eps, beyond which point it grows unbounded.
Definition at line 659 of file prob_distr.c.
STATIC double sf_logistic  (  double  x, 
double  mu,  
double  sigma  
) 
Compute the SF of the Logistic(mu, sigma) distribution: the logistic function reflected over the y axis. Wellconditioned for positive inputs and small negative inputs; illconditioned for large negative inputs.
Definition at line 523 of file prob_distr.c.
STATIC double sf_weibull  (  double  x, 
double  lambda,  
double  k  
) 
Compute the SF of the Weibull(lambda, k) distribution. Wellconditioned for small x and k, and for large lambda – condition number
k (x/lambda)^k
grows linearly with k, x^k, and lambda^{k}.
Definition at line 726 of file prob_distr.c.

static 
Functions for uniform distribution
Definition at line 1372 of file prob_distr.c.

static 
Functions for Weibull distribution
Definition at line 1538 of file prob_distr.c.
const struct dist_ops_t genpareto_ops 
Definition at line 1624 of file prob_distr.c.
const struct dist_ops_t geometric_ops 
Definition at line 1683 of file prob_distr.c.
const struct dist_ops_t log_logistic_ops 
Definition at line 1526 of file prob_distr.c.
const struct dist_ops_t logistic_ops 
Definition at line 1477 of file prob_distr.c.
const struct dist_ops_t uniform_ops 
Definition at line 1423 of file prob_distr.c.
const struct dist_ops_t weibull_ops 
Definition at line 1575 of file prob_distr.c.