Tor
0.4.7.0-alpha-dev
|
Implements various probability distributions. Almost all code is courtesy of Riastradh. More...
#include "orconfig.h"
#include "lib/math/prob_distr.h"
#include "lib/crypt_ops/crypto_rand.h"
#include "lib/cc/ctassert.h"
#include "lib/log/util_bug.h"
#include <float.h>
#include <math.h>
#include <stddef.h>
Go to the source code of this file.
Macros | |
#define | PROB_DISTR_PRIVATE |
#define | DECLARE_PROB_DISTR_DOWNCAST_FN(name) |
Functions | |
static unsigned | bitcount32 (uint32_t x) |
static unsigned | clz32 (uint32_t x) |
STATIC double | logistic (double x) |
STATIC double | logit (double p) |
STATIC double | logithalf (double p0) |
CTASSERT (FLT_RADIX==2) | |
CTASSERT (-DBL_MIN_EXP<=1021) | |
CTASSERT (DBL_MANT_DIG<=53) | |
STATIC double | random_uniform_01 (void) |
STATIC double | cdf_logistic (double x, double mu, double sigma) |
STATIC double | sf_logistic (double x, double mu, double sigma) |
STATIC double | icdf_logistic (double p, double mu, double sigma) |
STATIC double | isf_logistic (double p, double mu, double sigma) |
STATIC double | cdf_log_logistic (double x, double alpha, double beta) |
STATIC double | sf_log_logistic (double x, double alpha, double beta) |
STATIC double | icdf_log_logistic (double p, double alpha, double beta) |
STATIC double | isf_log_logistic (double p, double alpha, double beta) |
STATIC double | cdf_weibull (double x, double lambda, double k) |
STATIC double | sf_weibull (double x, double lambda, double k) |
STATIC double | icdf_weibull (double p, double lambda, double k) |
STATIC double | isf_weibull (double p, double lambda, double k) |
STATIC double | cdf_genpareto (double x, double mu, double sigma, double xi) |
STATIC double | sf_genpareto (double x, double mu, double sigma, double xi) |
STATIC double | icdf_genpareto (double p, double mu, double sigma, double xi) |
STATIC double | isf_genpareto (double p, double mu, double sigma, double xi) |
STATIC double | sample_uniform_interval (double p0, double a, double b) |
STATIC double | sample_logistic (uint32_t s, double t, double p0) |
static double | sample_logistic_locscale (uint32_t s, double t, double p0, double mu, double sigma) |
STATIC double | sample_log_logistic (uint32_t s, double p0) |
static double | sample_log_logistic_scaleshape (uint32_t s, double p0, double alpha, double beta) |
static double | sample_exponential (uint32_t s, double p0) |
STATIC double | sample_weibull (uint32_t s, double p0, double lambda, double k) |
STATIC double | sample_genpareto (uint32_t s, double p0, double xi) |
static double | sample_genpareto_locscale (uint32_t s, double p0, double mu, double sigma, double xi) |
static double | sample_geometric (uint32_t s, double p0, double p) |
const char * | dist_name (const struct dist_t *dist) |
double | dist_sample (const struct dist_t *dist) |
double | dist_cdf (const struct dist_t *dist, double x) |
double | dist_sf (const struct dist_t *dist, double x) |
double | dist_icdf (const struct dist_t *dist, double p) |
double | dist_isf (const struct dist_t *dist, double p) |
static double | uniform_sample (const struct dist_t *dist) |
static double | uniform_cdf (const struct dist_t *dist, double x) |
static double | uniform_sf (const struct dist_t *dist, double x) |
static double | uniform_icdf (const struct dist_t *dist, double p) |
static double | uniform_isf (const struct dist_t *dist, double p) |
static double | logistic_sample (const struct dist_t *dist) |
static double | logistic_cdf (const struct dist_t *dist, double x) |
static double | logistic_sf (const struct dist_t *dist, double x) |
static double | logistic_icdf (const struct dist_t *dist, double p) |
static double | logistic_isf (const struct dist_t *dist, double p) |
static double | log_logistic_sample (const struct dist_t *dist) |
static double | log_logistic_cdf (const struct dist_t *dist, double x) |
static double | log_logistic_sf (const struct dist_t *dist, double x) |
static double | log_logistic_icdf (const struct dist_t *dist, double p) |
static double | log_logistic_isf (const struct dist_t *dist, double p) |
static double | weibull_sample (const struct dist_t *dist) |
static double | weibull_cdf (const struct dist_t *dist, double x) |
static double | weibull_sf (const struct dist_t *dist, double x) |
static double | weibull_icdf (const struct dist_t *dist, double p) |
static double | weibull_isf (const struct dist_t *dist, double p) |
static double | genpareto_sample (const struct dist_t *dist) |
static double | genpareto_cdf (const struct dist_t *dist, double x) |
static double | genpareto_sf (const struct dist_t *dist, double x) |
static double | genpareto_icdf (const struct dist_t *dist, double p) |
static double | genpareto_isf (const struct dist_t *dist, double p) |
static double | geometric_sample (const struct dist_t *dist) |
static double | geometric_cdf (const struct dist_t *dist, double x) |
static double | geometric_sf (const struct dist_t *dist, double x) |
static double | geometric_icdf (const struct dist_t *dist, double p) |
static double | geometric_isf (const struct dist_t *dist, double p) |
Variables | |
const struct dist_ops_t | uniform_ops |
const struct dist_ops_t | logistic_ops |
const struct dist_ops_t | log_logistic_ops |
const struct dist_ops_t | weibull_ops |
const struct dist_ops_t | genpareto_ops |
const struct dist_ops_t | geometric_ops |
Implements various probability distributions. Almost all code is courtesy of Riastradh.
Here are some details that might help you understand this file:
Throughout this file, ‘eps’ means the largest relative error of a correctly rounded floating-point operation, which in binary64 floating-point arithmetic is 2^-53. Here the relative error of a true value x from a computed value y is |x - y|/|x|. This definition of epsilon is conventional for numerical analysts when writing error analyses. (If your libm doesn't provide correctly rounded exp and log, their relative error is usually below 2*2^-53 and probably closer to 1.1*2^-53 instead.)
The C constant DBL_EPSILON is actually twice this, and should perhaps rather be named ulp(1) – that is, it is the distance from 1 to the next greater floating-point number, which is usually of more interest to programmers and hardware engineers.
Since this file is concerned mainly with error bounds rather than with low-level bit-hacking of floating-point numbers, we adopt the numerical analysts' definition in the comments, though we do use DBL_EPSILON in a handful of places where it is convenient to use some function of eps = DBL_EPSILON/2 in a case analysis.
Definition in file prob_distr.c.
#define DECLARE_PROB_DISTR_DOWNCAST_FN | ( | name | ) |
Declare a function that downcasts from a generic dist struct to the actual subtype probablity distribution it represents.
Definition at line 58 of file prob_distr.c.
|
static |
Count number of one bits in 32-bit word.
Definition at line 77 of file prob_distr.c.
Referenced by clz32().
STATIC double cdf_genpareto | ( | double | x, |
double | mu, | ||
double | sigma, | ||
double | xi | ||
) |
Compute the CDF of the GeneralizedPareto(mu, sigma, xi) distribution. Well-conditioned everywhere. For standard distribution (mu=0, sigma=1), condition number
(x/(1 + x xi)) / ((1 + x xi)^{1/xi} - 1)
is bounded by 1, attained only at x = 0.
Definition at line 792 of file prob_distr.c.
STATIC double cdf_log_logistic | ( | double | x, |
double | alpha, | ||
double | beta | ||
) |
Compute the CDF of the LogLogistic(alpha, beta) distribution. Well-conditioned for all x and alpha, and the condition number
-beta/[1 + (x/alpha)^{-beta}]
grows linearly with beta.
Loosely, the relative error of this implementation is bounded by
4 eps + 2 eps^2 + O(beta eps),
so don't bother trying this for beta anywhere near as large as 1/eps, around which point it levels off at 1.
Definition at line 598 of file prob_distr.c.
STATIC double cdf_logistic | ( | double | x, |
double | mu, | ||
double | sigma | ||
) |
Compute the CDF of the Logistic(mu, sigma) distribution: the logistic function. Well-conditioned for negative inputs and small positive inputs; ill-conditioned for large positive inputs.
Definition at line 511 of file prob_distr.c.
STATIC double cdf_weibull | ( | double | x, |
double | lambda, | ||
double | k | ||
) |
Compute the CDF of the Weibull(lambda, k) distribution. Well-conditioned for small x and k, and for large lambda – condition number
-k (x/lambda)^k exp(-(x/lambda)^k)/[exp(-(x/lambda)^k) - 1]
grows linearly with k, x^k, and lambda^{-k}.
Definition at line 711 of file prob_distr.c.
|
static |
Count leading zeros in 32-bit word.
Definition at line 97 of file prob_distr.c.
Referenced by random_uniform_01().
double dist_cdf | ( | const struct dist_t * | dist, |
double | x | ||
) |
Compute the CDF of dist at x.
Definition at line 1345 of file prob_distr.c.
double dist_icdf | ( | const struct dist_t * | dist, |
double | p | ||
) |
Compute the iCDF (Inverse CDF) of dist at x.
Definition at line 1359 of file prob_distr.c.
double dist_isf | ( | const struct dist_t * | dist, |
double | p | ||
) |
Compute the iSF (Inverse Survival function) of dist at x.
Definition at line 1366 of file prob_distr.c.
const char* dist_name | ( | const struct dist_t * | dist | ) |
Public API for probability distributions:
These are wrapper functions on top of the various probability distribution operations using the generic dist structure.
These are the functions that should be used by consumers of this API. Returns the name of the distribution in dist.
Definition at line 1331 of file prob_distr.c.
double dist_sf | ( | const struct dist_t * | dist, |
double | x | ||
) |
Compute the SF (Survival function) of dist at x.
Definition at line 1352 of file prob_distr.c.
|
static |
Functions for generalized Pareto distributions
Definition at line 1589 of file prob_distr.c.
|
static |
Functions for geometric distribution on number of trials before success
Definition at line 1638 of file prob_distr.c.
STATIC double icdf_genpareto | ( | double | p, |
double | mu, | ||
double | sigma, | ||
double | xi | ||
) |
Compute the inverse of the CDF of the GeneralizedPareto(mu, sigma, xi) distribution. Ill-conditioned for p near 1; condition number is
xi (p/(1 - p))/(1 - (1 - p)^xi)
Definition at line 853 of file prob_distr.c.
STATIC double icdf_log_logistic | ( | double | p, |
double | alpha, | ||
double | beta | ||
) |
Compute the inverse of the CDF of the LogLogistic(alpha, beta) distribution. Ill-conditioned for p near 1 and beta near 0 with condition number 1/[beta (1 - p)].
Definition at line 675 of file prob_distr.c.
STATIC double icdf_logistic | ( | double | p, |
double | mu, | ||
double | sigma | ||
) |
Compute the inverse of the CDF of the Logistic(mu, sigma) distribution: the logit function. Well-conditioned near 0; ill-conditioned near 1/2 and 1.
Definition at line 534 of file prob_distr.c.
STATIC double icdf_weibull | ( | double | p, |
double | lambda, | ||
double | k | ||
) |
Compute the inverse of the CDF of the Weibull(lambda, k) distribution. Ill-conditioned for p near 1, and for k near 0; condition number is
(p/(1 - p))/(k log(1 - p)).
Definition at line 739 of file prob_distr.c.
STATIC double isf_genpareto | ( | double | p, |
double | mu, | ||
double | sigma, | ||
double | xi | ||
) |
Compute the inverse of the SF of the GeneralizedPareto(mu, sigma, xi) distribution. Ill-conditioned for p near 1; condition number is
-xi/(1 - p^{-xi})
Definition at line 899 of file prob_distr.c.
STATIC double isf_log_logistic | ( | double | p, |
double | alpha, | ||
double | beta | ||
) |
Compute the inverse of the SF of the LogLogistic(alpha, beta) distribution. Ill-conditioned for p near 1 and for large beta, with condition number -1/[beta (1 - p)].
Definition at line 686 of file prob_distr.c.
STATIC double isf_logistic | ( | double | p, |
double | mu, | ||
double | sigma | ||
) |
Compute the inverse of the SF of the Logistic(mu, sigma) distribution: the -logit function. Well-conditioned near 0; ill-conditioned near 1/2 and 1.
Definition at line 545 of file prob_distr.c.
STATIC double isf_weibull | ( | double | p, |
double | lambda, | ||
double | k | ||
) |
Compute the inverse of the SF of the Weibull(lambda, k) distribution. Ill-conditioned for p near 0, and for k near 0; condition number is
1/(k log(p)).
Definition at line 752 of file prob_distr.c.
|
static |
Functions for log-logistic distribution:
Definition at line 1491 of file prob_distr.c.
STATIC double logistic | ( | double | x | ) |
Compute the logistic function: f(x) = 1/(1 + e^{-x}) = e^x/(1 + e^x). Maps a log-odds-space probability in [-infinity, +infinity] into a direct-space probability in [0,1]. Inverse of logit.
Ill-conditioned for large x; the identity logistic(-x) = 1 - logistic(x) and the function logistichalf(x) = logistic(x) - 1/2 may help to rearrange a computation.
This implementation gives relative error bounded by 7 eps.
Definition at line 193 of file prob_distr.c.
Referenced by cdf_logistic(), and sf_logistic().
|
static |
Private functions for each probability distribution. Functions for logistic distribution:
Definition at line 1441 of file prob_distr.c.
STATIC double logit | ( | double | p | ) |
Compute the logit function: log p/(1 - p). Defined on [0,1]. Maps a direct-space probability in [0,1] to a log-odds-space probability in [-infinity, +infinity]. Inverse of logistic.
Ill-conditioned near 1/2 and 1; the identity logit(1 - p) = -logit(p) and the function logithalf(p0) = logit(1/2 + p0) may help to rearrange a computation for p in [1/(1 + e), 1 - 1/(1 + e)].
This implementation gives relative error bounded by 10 eps.
Definition at line 280 of file prob_distr.c.
Referenced by icdf_logistic(), isf_logistic(), and sample_logistic().
STATIC double logithalf | ( | double | p0 | ) |
Compute the logit function, translated in input by 1/2: logithalf(p) = logit(1/2 + p). Defined on [-1/2, 1/2]. Inverse of logistichalf.
Ill-conditioned near +/-1/2. If |p0| > 1/2 - 1/(1 + e), it may be better to compute 1/2 + p0 or -1/2 - p0 and to use logit instead. This implementation gives relative error bounded by 34 eps.
Definition at line 360 of file prob_distr.c.
Referenced by sample_logistic().
STATIC double random_uniform_01 | ( | void | ) |
Draw a floating-point number in [0, 1] with uniform distribution.
Note that the probability of returning 0 is less than 2^-1074, so callers need not check for it. However, callers that cannot handle rounding to 1 must deal with that, because it occurs with probability 2^-54, which is small but nonnegligible.
Definition at line 453 of file prob_distr.c.
Referenced by genpareto_sample(), geometric_sample(), log_logistic_sample(), logistic_sample(), uniform_sample(), and weibull_sample().
|
static |
Deterministically sample from the standard exponential distribution, indexed by a uniform random 32-bit integer s and a uniform random floating-point number p0 in (0, 1].
Definition at line 1196 of file prob_distr.c.
Referenced by sample_genpareto(), and sample_geometric().
STATIC double sample_genpareto | ( | uint32_t | s, |
double | p0, | ||
double | xi | ||
) |
Deterministically sample from the generalized Pareto distribution with shape xi, indexed by a uniform random 32-bit integer s and a uniform random floating-point number p0 in (0, 1].
Definition at line 1231 of file prob_distr.c.
Referenced by sample_genpareto_locscale().
|
static |
Deterministically sample from a generalized Pareto distribution with shape xi, scaled by sigma and translated by mu.
Definition at line 1277 of file prob_distr.c.
Referenced by genpareto_sample().
|
static |
Deterministically sample from the geometric distribution with per-trial success probability p.
Definition at line 1307 of file prob_distr.c.
Referenced by geometric_sample().
STATIC double sample_log_logistic | ( | uint32_t | s, |
double | p0 | ||
) |
Deterministically sample from the standard log-logistic distribution, indexed by a uniform random 32-bit integer s and a uniform random floating-point number p0 in (0, 1].
Definition at line 1160 of file prob_distr.c.
Referenced by sample_log_logistic_scaleshape().
|
static |
Deterministically sample from the log-logistic distribution with scale alpha and shape beta.
Definition at line 1182 of file prob_distr.c.
Referenced by log_logistic_sample().
STATIC double sample_logistic | ( | uint32_t | s, |
double | t, | ||
double | p0 | ||
) |
Deterministically sample from the standard logistic distribution, indexed by a uniform random 32-bit integer s and uniform random floating-point numbers t and p0 in (0, 1].
Definition at line 1094 of file prob_distr.c.
Referenced by sample_logistic_locscale().
|
static |
Deterministically sample from the logistic distribution scaled by sigma and translated by mu.
Definition at line 1147 of file prob_distr.c.
Referenced by logistic_sample().
STATIC double sample_uniform_interval | ( | double | p0, |
double | a, | ||
double | b | ||
) |
Deterministic samplers, parametrized by uniform integer and (0,1] samples. No guarantees are made about which mapping from the integer and (0,1] samples these use; all that is guaranteed is the distribution of the outputs conditioned on a uniform distribution on the inputs. The automatic tests in test_prob_distr.c double-check the particular mappings we use.
Beware: Unlike random_uniform_01(), these are not guaranteed to be supported on all possible outputs. See Ilya Mironov, `On the Significance of the Least Significant Bits for Differential Privacy', for an example of what can go wrong if you try to use these to conceal information from an adversary but you expose the specific full-precision floating-point values.
Note: None of these samplers use rejection sampling; they are all essentially inverse-CDF transforms with tweaks. If you were to add, say, a Gamma sampler with the Marsaglia-Tsang method, you would have to parametrize it by a potentially infinite stream of uniform (and perhaps normal) samples rather than a fixed number, which doesn't make for quite as nice automatic testing as for these. Deterministically sample from the interval [a, b], indexed by a uniform random floating-point number p0 in (0, 1].
Note that even if p0 is nonzero, the result may be equal to a, if ulp(a)/2 is nonnegligible, e.g. if a = 1. For maximum resolution, arrange |a| <= |b|.
Definition at line 941 of file prob_distr.c.
Referenced by uniform_sample().
STATIC double sample_weibull | ( | uint32_t | s, |
double | p0, | ||
double | lambda, | ||
double | k | ||
) |
Deterministically sample from a Weibull distribution with scale lambda and shape k – just an exponential with a shape parameter in addition to a scale parameter. (Yes, lambda really is the scale, not the rate.)
Definition at line 1219 of file prob_distr.c.
Referenced by weibull_sample().
STATIC double sf_genpareto | ( | double | x, |
double | mu, | ||
double | sigma, | ||
double | xi | ||
) |
Compute the SF of the GeneralizedPareto(mu, sigma, xi) distribution. For standard distribution (mu=0, sigma=1), ill-conditioned for xi near 0; condition number
-x (1 + x xi)^{(-1 - xi)/xi}/(1 + x xi)^{-1/xi} = -x (1 + x xi)^{-1/xi - 1}/(1 + x xi)^{-1/xi} = -(x/(1 + x xi)) (1 + x xi)^{-1/xi}/(1 + x xi)^{-1/xi} = -x/(1 + x xi)
is bounded by 1/xi.
Definition at line 836 of file prob_distr.c.
STATIC double sf_log_logistic | ( | double | x, |
double | alpha, | ||
double | beta | ||
) |
Compute the SF of the LogLogistic(alpha, beta) distribution. Well-conditioned for all x and alpha, and the condition number
beta/[1 + (x/alpha)^beta]
grows linearly with beta.
Loosely, the relative error of this implementation is bounded by
4 eps + 2 eps^2 + O(beta eps)
so don't bother trying this for beta anywhere near as large as 1/eps, beyond which point it grows unbounded.
Definition at line 659 of file prob_distr.c.
STATIC double sf_logistic | ( | double | x, |
double | mu, | ||
double | sigma | ||
) |
Compute the SF of the Logistic(mu, sigma) distribution: the logistic function reflected over the y axis. Well-conditioned for positive inputs and small negative inputs; ill-conditioned for large negative inputs.
Definition at line 523 of file prob_distr.c.
STATIC double sf_weibull | ( | double | x, |
double | lambda, | ||
double | k | ||
) |
Compute the SF of the Weibull(lambda, k) distribution. Well-conditioned for small x and k, and for large lambda – condition number
-k (x/lambda)^k
grows linearly with k, x^k, and lambda^{-k}.
Definition at line 726 of file prob_distr.c.
|
static |
Functions for uniform distribution
Definition at line 1374 of file prob_distr.c.
|
static |
Functions for Weibull distribution
Definition at line 1540 of file prob_distr.c.
const struct dist_ops_t genpareto_ops |
Definition at line 1620 of file prob_distr.c.
const struct dist_ops_t geometric_ops |
Definition at line 1678 of file prob_distr.c.
const struct dist_ops_t log_logistic_ops |
Definition at line 1522 of file prob_distr.c.
const struct dist_ops_t logistic_ops |
Definition at line 1473 of file prob_distr.c.
const struct dist_ops_t uniform_ops |
Definition at line 1417 of file prob_distr.c.
const struct dist_ops_t weibull_ops |
Definition at line 1571 of file prob_distr.c.