Tor  0.4.7.0-alpha-dev
Macros | Functions | Variables
prob_distr.c File Reference

Implements various probability distributions. Almost all code is courtesy of Riastradh. More...

#include "orconfig.h"
#include "lib/math/prob_distr.h"
#include "lib/crypt_ops/crypto_rand.h"
#include "lib/cc/ctassert.h"
#include "lib/log/util_bug.h"
#include <float.h>
#include <math.h>
#include <stddef.h>

Go to the source code of this file.

Macros

#define PROB_DISTR_PRIVATE
 
#define DECLARE_PROB_DISTR_DOWNCAST_FN(name)
 

Functions

static unsigned bitcount32 (uint32_t x)
 
static unsigned clz32 (uint32_t x)
 
STATIC double logistic (double x)
 
STATIC double logit (double p)
 
STATIC double logithalf (double p0)
 
 CTASSERT (FLT_RADIX==2)
 
 CTASSERT (-DBL_MIN_EXP<=1021)
 
 CTASSERT (DBL_MANT_DIG<=53)
 
STATIC double random_uniform_01 (void)
 
STATIC double cdf_logistic (double x, double mu, double sigma)
 
STATIC double sf_logistic (double x, double mu, double sigma)
 
STATIC double icdf_logistic (double p, double mu, double sigma)
 
STATIC double isf_logistic (double p, double mu, double sigma)
 
STATIC double cdf_log_logistic (double x, double alpha, double beta)
 
STATIC double sf_log_logistic (double x, double alpha, double beta)
 
STATIC double icdf_log_logistic (double p, double alpha, double beta)
 
STATIC double isf_log_logistic (double p, double alpha, double beta)
 
STATIC double cdf_weibull (double x, double lambda, double k)
 
STATIC double sf_weibull (double x, double lambda, double k)
 
STATIC double icdf_weibull (double p, double lambda, double k)
 
STATIC double isf_weibull (double p, double lambda, double k)
 
STATIC double cdf_genpareto (double x, double mu, double sigma, double xi)
 
STATIC double sf_genpareto (double x, double mu, double sigma, double xi)
 
STATIC double icdf_genpareto (double p, double mu, double sigma, double xi)
 
STATIC double isf_genpareto (double p, double mu, double sigma, double xi)
 
STATIC double sample_uniform_interval (double p0, double a, double b)
 
STATIC double sample_logistic (uint32_t s, double t, double p0)
 
static double sample_logistic_locscale (uint32_t s, double t, double p0, double mu, double sigma)
 
STATIC double sample_log_logistic (uint32_t s, double p0)
 
static double sample_log_logistic_scaleshape (uint32_t s, double p0, double alpha, double beta)
 
static double sample_exponential (uint32_t s, double p0)
 
STATIC double sample_weibull (uint32_t s, double p0, double lambda, double k)
 
STATIC double sample_genpareto (uint32_t s, double p0, double xi)
 
static double sample_genpareto_locscale (uint32_t s, double p0, double mu, double sigma, double xi)
 
static double sample_geometric (uint32_t s, double p0, double p)
 
const char * dist_name (const struct dist_t *dist)
 
double dist_sample (const struct dist_t *dist)
 
double dist_cdf (const struct dist_t *dist, double x)
 
double dist_sf (const struct dist_t *dist, double x)
 
double dist_icdf (const struct dist_t *dist, double p)
 
double dist_isf (const struct dist_t *dist, double p)
 
static double uniform_sample (const struct dist_t *dist)
 
static double uniform_cdf (const struct dist_t *dist, double x)
 
static double uniform_sf (const struct dist_t *dist, double x)
 
static double uniform_icdf (const struct dist_t *dist, double p)
 
static double uniform_isf (const struct dist_t *dist, double p)
 
static double logistic_sample (const struct dist_t *dist)
 
static double logistic_cdf (const struct dist_t *dist, double x)
 
static double logistic_sf (const struct dist_t *dist, double x)
 
static double logistic_icdf (const struct dist_t *dist, double p)
 
static double logistic_isf (const struct dist_t *dist, double p)
 
static double log_logistic_sample (const struct dist_t *dist)
 
static double log_logistic_cdf (const struct dist_t *dist, double x)
 
static double log_logistic_sf (const struct dist_t *dist, double x)
 
static double log_logistic_icdf (const struct dist_t *dist, double p)
 
static double log_logistic_isf (const struct dist_t *dist, double p)
 
static double weibull_sample (const struct dist_t *dist)
 
static double weibull_cdf (const struct dist_t *dist, double x)
 
static double weibull_sf (const struct dist_t *dist, double x)
 
static double weibull_icdf (const struct dist_t *dist, double p)
 
static double weibull_isf (const struct dist_t *dist, double p)
 
static double genpareto_sample (const struct dist_t *dist)
 
static double genpareto_cdf (const struct dist_t *dist, double x)
 
static double genpareto_sf (const struct dist_t *dist, double x)
 
static double genpareto_icdf (const struct dist_t *dist, double p)
 
static double genpareto_isf (const struct dist_t *dist, double p)
 
static double geometric_sample (const struct dist_t *dist)
 
static double geometric_cdf (const struct dist_t *dist, double x)
 
static double geometric_sf (const struct dist_t *dist, double x)
 
static double geometric_icdf (const struct dist_t *dist, double p)
 
static double geometric_isf (const struct dist_t *dist, double p)
 

Variables

const struct dist_ops_t uniform_ops
 
const struct dist_ops_t logistic_ops
 
const struct dist_ops_t log_logistic_ops
 
const struct dist_ops_t weibull_ops
 
const struct dist_ops_t genpareto_ops
 
const struct dist_ops_t geometric_ops
 

Detailed Description

Implements various probability distributions. Almost all code is courtesy of Riastradh.

Here are some details that might help you understand this file:

Definition in file prob_distr.c.

Macro Definition Documentation

◆ DECLARE_PROB_DISTR_DOWNCAST_FN

#define DECLARE_PROB_DISTR_DOWNCAST_FN (   name)
Value:
static inline \
const struct name##_t * \
dist_to_const_##name(const struct dist_t *obj) { \
tor_assert(obj->ops == &name##_ops); \
return SUBTYPE_P(obj, struct name ## _t, base); \
}
#define SUBTYPE_P(p, subtype, basemember)
const char * name
Definition: config.c:2434

Declare a function that downcasts from a generic dist struct to the actual subtype probablity distribution it represents.

Definition at line 58 of file prob_distr.c.

Function Documentation

◆ bitcount32()

static unsigned bitcount32 ( uint32_t  x)
static

Count number of one bits in 32-bit word.

Definition at line 77 of file prob_distr.c.

Referenced by clz32().

◆ cdf_genpareto()

STATIC double cdf_genpareto ( double  x,
double  mu,
double  sigma,
double  xi 
)

Compute the CDF of the GeneralizedPareto(mu, sigma, xi) distribution. Well-conditioned everywhere. For standard distribution (mu=0, sigma=1), condition number

 (x/(1 + x xi)) / ((1 + x xi)^{1/xi} - 1)

is bounded by 1, attained only at x = 0.

Definition at line 792 of file prob_distr.c.

◆ cdf_log_logistic()

STATIC double cdf_log_logistic ( double  x,
double  alpha,
double  beta 
)

Compute the CDF of the LogLogistic(alpha, beta) distribution. Well-conditioned for all x and alpha, and the condition number

 -beta/[1 + (x/alpha)^{-beta}]

grows linearly with beta.

Loosely, the relative error of this implementation is bounded by

 4 eps + 2 eps^2 + O(beta eps),

so don't bother trying this for beta anywhere near as large as 1/eps, around which point it levels off at 1.

Definition at line 598 of file prob_distr.c.

◆ cdf_logistic()

STATIC double cdf_logistic ( double  x,
double  mu,
double  sigma 
)

Compute the CDF of the Logistic(mu, sigma) distribution: the logistic function. Well-conditioned for negative inputs and small positive inputs; ill-conditioned for large positive inputs.

Definition at line 511 of file prob_distr.c.

◆ cdf_weibull()

STATIC double cdf_weibull ( double  x,
double  lambda,
double  k 
)

Compute the CDF of the Weibull(lambda, k) distribution. Well-conditioned for small x and k, and for large lambda – condition number

 -k (x/lambda)^k exp(-(x/lambda)^k)/[exp(-(x/lambda)^k) - 1]

grows linearly with k, x^k, and lambda^{-k}.

Definition at line 711 of file prob_distr.c.

◆ clz32()

static unsigned clz32 ( uint32_t  x)
static

Count leading zeros in 32-bit word.

Definition at line 97 of file prob_distr.c.

Referenced by random_uniform_01().

◆ dist_cdf()

double dist_cdf ( const struct dist_t dist,
double  x 
)

Compute the CDF of dist at x.

Definition at line 1345 of file prob_distr.c.

◆ dist_icdf()

double dist_icdf ( const struct dist_t dist,
double  p 
)

Compute the iCDF (Inverse CDF) of dist at x.

Definition at line 1359 of file prob_distr.c.

◆ dist_isf()

double dist_isf ( const struct dist_t dist,
double  p 
)

Compute the iSF (Inverse Survival function) of dist at x.

Definition at line 1366 of file prob_distr.c.

◆ dist_name()

const char* dist_name ( const struct dist_t dist)

Public API for probability distributions:

These are wrapper functions on top of the various probability distribution operations using the generic dist structure.

These are the functions that should be used by consumers of this API. Returns the name of the distribution in dist.

Definition at line 1331 of file prob_distr.c.

◆ dist_sf()

double dist_sf ( const struct dist_t dist,
double  x 
)

Compute the SF (Survival function) of dist at x.

Definition at line 1352 of file prob_distr.c.

◆ genpareto_sample()

static double genpareto_sample ( const struct dist_t dist)
static

Functions for generalized Pareto distributions

Definition at line 1589 of file prob_distr.c.

◆ geometric_sample()

static double geometric_sample ( const struct dist_t dist)
static

Functions for geometric distribution on number of trials before success

Definition at line 1638 of file prob_distr.c.

◆ icdf_genpareto()

STATIC double icdf_genpareto ( double  p,
double  mu,
double  sigma,
double  xi 
)

Compute the inverse of the CDF of the GeneralizedPareto(mu, sigma, xi) distribution. Ill-conditioned for p near 1; condition number is

 xi (p/(1 - p))/(1 - (1 - p)^xi)

Definition at line 853 of file prob_distr.c.

◆ icdf_log_logistic()

STATIC double icdf_log_logistic ( double  p,
double  alpha,
double  beta 
)

Compute the inverse of the CDF of the LogLogistic(alpha, beta) distribution. Ill-conditioned for p near 1 and beta near 0 with condition number 1/[beta (1 - p)].

Definition at line 675 of file prob_distr.c.

◆ icdf_logistic()

STATIC double icdf_logistic ( double  p,
double  mu,
double  sigma 
)

Compute the inverse of the CDF of the Logistic(mu, sigma) distribution: the logit function. Well-conditioned near 0; ill-conditioned near 1/2 and 1.

Definition at line 534 of file prob_distr.c.

◆ icdf_weibull()

STATIC double icdf_weibull ( double  p,
double  lambda,
double  k 
)

Compute the inverse of the CDF of the Weibull(lambda, k) distribution. Ill-conditioned for p near 1, and for k near 0; condition number is

 (p/(1 - p))/(k log(1 - p)).

Definition at line 739 of file prob_distr.c.

◆ isf_genpareto()

STATIC double isf_genpareto ( double  p,
double  mu,
double  sigma,
double  xi 
)

Compute the inverse of the SF of the GeneralizedPareto(mu, sigma, xi) distribution. Ill-conditioned for p near 1; condition number is

 -xi/(1 - p^{-xi})

Definition at line 899 of file prob_distr.c.

◆ isf_log_logistic()

STATIC double isf_log_logistic ( double  p,
double  alpha,
double  beta 
)

Compute the inverse of the SF of the LogLogistic(alpha, beta) distribution. Ill-conditioned for p near 1 and for large beta, with condition number -1/[beta (1 - p)].

Definition at line 686 of file prob_distr.c.

◆ isf_logistic()

STATIC double isf_logistic ( double  p,
double  mu,
double  sigma 
)

Compute the inverse of the SF of the Logistic(mu, sigma) distribution: the -logit function. Well-conditioned near 0; ill-conditioned near 1/2 and 1.

Definition at line 545 of file prob_distr.c.

◆ isf_weibull()

STATIC double isf_weibull ( double  p,
double  lambda,
double  k 
)

Compute the inverse of the SF of the Weibull(lambda, k) distribution. Ill-conditioned for p near 0, and for k near 0; condition number is

 1/(k log(p)).

Definition at line 752 of file prob_distr.c.

◆ log_logistic_sample()

static double log_logistic_sample ( const struct dist_t dist)
static

Functions for log-logistic distribution:

Definition at line 1491 of file prob_distr.c.

◆ logistic()

STATIC double logistic ( double  x)

Compute the logistic function: f(x) = 1/(1 + e^{-x}) = e^x/(1 + e^x). Maps a log-odds-space probability in [-infinity, +infinity] into a direct-space probability in [0,1]. Inverse of logit.

Ill-conditioned for large x; the identity logistic(-x) = 1 - logistic(x) and the function logistichalf(x) = logistic(x) - 1/2 may help to rearrange a computation.

This implementation gives relative error bounded by 7 eps.

Definition at line 193 of file prob_distr.c.

Referenced by cdf_logistic(), and sf_logistic().

◆ logistic_sample()

static double logistic_sample ( const struct dist_t dist)
static

Private functions for each probability distribution. Functions for logistic distribution:

Definition at line 1441 of file prob_distr.c.

◆ logit()

STATIC double logit ( double  p)

Compute the logit function: log p/(1 - p). Defined on [0,1]. Maps a direct-space probability in [0,1] to a log-odds-space probability in [-infinity, +infinity]. Inverse of logistic.

Ill-conditioned near 1/2 and 1; the identity logit(1 - p) = -logit(p) and the function logithalf(p0) = logit(1/2 + p0) may help to rearrange a computation for p in [1/(1 + e), 1 - 1/(1 + e)].

This implementation gives relative error bounded by 10 eps.

Definition at line 280 of file prob_distr.c.

Referenced by icdf_logistic(), isf_logistic(), and sample_logistic().

◆ logithalf()

STATIC double logithalf ( double  p0)

Compute the logit function, translated in input by 1/2: logithalf(p) = logit(1/2 + p). Defined on [-1/2, 1/2]. Inverse of logistichalf.

Ill-conditioned near +/-1/2. If |p0| > 1/2 - 1/(1 + e), it may be better to compute 1/2 + p0 or -1/2 - p0 and to use logit instead. This implementation gives relative error bounded by 34 eps.

Definition at line 360 of file prob_distr.c.

Referenced by sample_logistic().

◆ random_uniform_01()

STATIC double random_uniform_01 ( void  )

Draw a floating-point number in [0, 1] with uniform distribution.

Note that the probability of returning 0 is less than 2^-1074, so callers need not check for it. However, callers that cannot handle rounding to 1 must deal with that, because it occurs with probability 2^-54, which is small but nonnegligible.

Definition at line 453 of file prob_distr.c.

Referenced by genpareto_sample(), geometric_sample(), log_logistic_sample(), logistic_sample(), uniform_sample(), and weibull_sample().

◆ sample_exponential()

static double sample_exponential ( uint32_t  s,
double  p0 
)
static

Deterministically sample from the standard exponential distribution, indexed by a uniform random 32-bit integer s and a uniform random floating-point number p0 in (0, 1].

Definition at line 1196 of file prob_distr.c.

Referenced by sample_genpareto(), and sample_geometric().

◆ sample_genpareto()

STATIC double sample_genpareto ( uint32_t  s,
double  p0,
double  xi 
)

Deterministically sample from the generalized Pareto distribution with shape xi, indexed by a uniform random 32-bit integer s and a uniform random floating-point number p0 in (0, 1].

Definition at line 1231 of file prob_distr.c.

Referenced by sample_genpareto_locscale().

◆ sample_genpareto_locscale()

static double sample_genpareto_locscale ( uint32_t  s,
double  p0,
double  mu,
double  sigma,
double  xi 
)
static

Deterministically sample from a generalized Pareto distribution with shape xi, scaled by sigma and translated by mu.

Definition at line 1277 of file prob_distr.c.

Referenced by genpareto_sample().

◆ sample_geometric()

static double sample_geometric ( uint32_t  s,
double  p0,
double  p 
)
static

Deterministically sample from the geometric distribution with per-trial success probability p.

Definition at line 1307 of file prob_distr.c.

Referenced by geometric_sample().

◆ sample_log_logistic()

STATIC double sample_log_logistic ( uint32_t  s,
double  p0 
)

Deterministically sample from the standard log-logistic distribution, indexed by a uniform random 32-bit integer s and a uniform random floating-point number p0 in (0, 1].

Definition at line 1160 of file prob_distr.c.

Referenced by sample_log_logistic_scaleshape().

◆ sample_log_logistic_scaleshape()

static double sample_log_logistic_scaleshape ( uint32_t  s,
double  p0,
double  alpha,
double  beta 
)
static

Deterministically sample from the log-logistic distribution with scale alpha and shape beta.

Definition at line 1182 of file prob_distr.c.

Referenced by log_logistic_sample().

◆ sample_logistic()

STATIC double sample_logistic ( uint32_t  s,
double  t,
double  p0 
)

Deterministically sample from the standard logistic distribution, indexed by a uniform random 32-bit integer s and uniform random floating-point numbers t and p0 in (0, 1].

Definition at line 1094 of file prob_distr.c.

Referenced by sample_logistic_locscale().

◆ sample_logistic_locscale()

static double sample_logistic_locscale ( uint32_t  s,
double  t,
double  p0,
double  mu,
double  sigma 
)
static

Deterministically sample from the logistic distribution scaled by sigma and translated by mu.

Definition at line 1147 of file prob_distr.c.

Referenced by logistic_sample().

◆ sample_uniform_interval()

STATIC double sample_uniform_interval ( double  p0,
double  a,
double  b 
)

Deterministic samplers, parametrized by uniform integer and (0,1] samples. No guarantees are made about which mapping from the integer and (0,1] samples these use; all that is guaranteed is the distribution of the outputs conditioned on a uniform distribution on the inputs. The automatic tests in test_prob_distr.c double-check the particular mappings we use.

Beware: Unlike random_uniform_01(), these are not guaranteed to be supported on all possible outputs. See Ilya Mironov, `On the Significance of the Least Significant Bits for Differential Privacy', for an example of what can go wrong if you try to use these to conceal information from an adversary but you expose the specific full-precision floating-point values.

Note: None of these samplers use rejection sampling; they are all essentially inverse-CDF transforms with tweaks. If you were to add, say, a Gamma sampler with the Marsaglia-Tsang method, you would have to parametrize it by a potentially infinite stream of uniform (and perhaps normal) samples rather than a fixed number, which doesn't make for quite as nice automatic testing as for these. Deterministically sample from the interval [a, b], indexed by a uniform random floating-point number p0 in (0, 1].

Note that even if p0 is nonzero, the result may be equal to a, if ulp(a)/2 is nonnegligible, e.g. if a = 1. For maximum resolution, arrange |a| <= |b|.

Definition at line 941 of file prob_distr.c.

Referenced by uniform_sample().

◆ sample_weibull()

STATIC double sample_weibull ( uint32_t  s,
double  p0,
double  lambda,
double  k 
)

Deterministically sample from a Weibull distribution with scale lambda and shape k – just an exponential with a shape parameter in addition to a scale parameter. (Yes, lambda really is the scale, not the rate.)

Definition at line 1219 of file prob_distr.c.

Referenced by weibull_sample().

◆ sf_genpareto()

STATIC double sf_genpareto ( double  x,
double  mu,
double  sigma,
double  xi 
)

Compute the SF of the GeneralizedPareto(mu, sigma, xi) distribution. For standard distribution (mu=0, sigma=1), ill-conditioned for xi near 0; condition number

 -x (1 + x xi)^{(-1 - xi)/xi}/(1 + x xi)^{-1/xi}
 = -x (1 + x xi)^{-1/xi - 1}/(1 + x xi)^{-1/xi}
 = -(x/(1 + x xi)) (1 + x xi)^{-1/xi}/(1 + x xi)^{-1/xi}
 = -x/(1 + x xi)

is bounded by 1/xi.

Definition at line 836 of file prob_distr.c.

◆ sf_log_logistic()

STATIC double sf_log_logistic ( double  x,
double  alpha,
double  beta 
)

Compute the SF of the LogLogistic(alpha, beta) distribution. Well-conditioned for all x and alpha, and the condition number

 beta/[1 + (x/alpha)^beta]

grows linearly with beta.

Loosely, the relative error of this implementation is bounded by

 4 eps + 2 eps^2 + O(beta eps)

so don't bother trying this for beta anywhere near as large as 1/eps, beyond which point it grows unbounded.

Definition at line 659 of file prob_distr.c.

◆ sf_logistic()

STATIC double sf_logistic ( double  x,
double  mu,
double  sigma 
)

Compute the SF of the Logistic(mu, sigma) distribution: the logistic function reflected over the y axis. Well-conditioned for positive inputs and small negative inputs; ill-conditioned for large negative inputs.

Definition at line 523 of file prob_distr.c.

◆ sf_weibull()

STATIC double sf_weibull ( double  x,
double  lambda,
double  k 
)

Compute the SF of the Weibull(lambda, k) distribution. Well-conditioned for small x and k, and for large lambda – condition number

 -k (x/lambda)^k

grows linearly with k, x^k, and lambda^{-k}.

Definition at line 726 of file prob_distr.c.

◆ uniform_sample()

static double uniform_sample ( const struct dist_t dist)
static

Functions for uniform distribution

Definition at line 1374 of file prob_distr.c.

◆ weibull_sample()

static double weibull_sample ( const struct dist_t dist)
static

Functions for Weibull distribution

Definition at line 1540 of file prob_distr.c.

Variable Documentation

◆ genpareto_ops

const struct dist_ops_t genpareto_ops
Initial value:
= {
.name = "generalized Pareto",
.sample = genpareto_sample,
.cdf = genpareto_cdf,
.sf = genpareto_sf,
.icdf = genpareto_icdf,
.isf = genpareto_isf,
}
static double genpareto_sample(const struct dist_t *dist)
Definition: prob_distr.c:1589

Definition at line 1620 of file prob_distr.c.

◆ geometric_ops

const struct dist_ops_t geometric_ops
Initial value:
= {
.name = "geometric (1-based)",
.sample = geometric_sample,
.cdf = geometric_cdf,
.sf = geometric_sf,
.icdf = geometric_icdf,
.isf = geometric_isf,
}
static double geometric_sample(const struct dist_t *dist)
Definition: prob_distr.c:1638

Definition at line 1678 of file prob_distr.c.

◆ log_logistic_ops

const struct dist_ops_t log_logistic_ops
Initial value:
= {
.name = "log logistic",
.cdf = log_logistic_cdf,
.sf = log_logistic_sf,
.icdf = log_logistic_icdf,
.isf = log_logistic_isf,
}
static double log_logistic_sample(const struct dist_t *dist)
Definition: prob_distr.c:1491

Definition at line 1522 of file prob_distr.c.

◆ logistic_ops

const struct dist_ops_t logistic_ops
Initial value:
= {
.name = "logistic",
.sample = logistic_sample,
.cdf = logistic_cdf,
.sf = logistic_sf,
.icdf = logistic_icdf,
.isf = logistic_isf,
}
static double logistic_sample(const struct dist_t *dist)
Definition: prob_distr.c:1441

Definition at line 1473 of file prob_distr.c.

◆ uniform_ops

const struct dist_ops_t uniform_ops
Initial value:
= {
.name = "uniform",
.sample = uniform_sample,
.cdf = uniform_cdf,
.sf = uniform_sf,
.icdf = uniform_icdf,
.isf = uniform_isf,
}
static double uniform_sample(const struct dist_t *dist)
Definition: prob_distr.c:1374

Definition at line 1417 of file prob_distr.c.

◆ weibull_ops

const struct dist_ops_t weibull_ops
Initial value:
= {
.name = "Weibull",
.sample = weibull_sample,
.cdf = weibull_cdf,
.sf = weibull_sf,
.icdf = weibull_icdf,
.isf = weibull_isf,
}
static double weibull_sample(const struct dist_t *dist)
Definition: prob_distr.c:1540

Definition at line 1571 of file prob_distr.c.