tor  0.4.2.1-alpha-dev
prob_distr.c
Go to the documentation of this file.
1 /* Copyright (c) 2018-2019, The Tor Project, Inc. */
2 /* See LICENSE for licensing information */
3 
41 #define PROB_DISTR_PRIVATE
42 
43 #include "orconfig.h"
44 
45 #include "lib/math/prob_distr.h"
46 
48 #include "lib/cc/ctassert.h"
49 #include "lib/log/util_bug.h"
50 
51 #include <float.h>
52 #include <math.h>
53 #include <stddef.h>
54 
57 #define DECLARE_PROB_DISTR_DOWNCAST_FN(name) \
58  static inline \
59  const struct name * \
60  dist_to_const_##name(const struct dist *obj) { \
61  tor_assert(obj->ops == &name##_ops); \
62  return SUBTYPE_P(obj, struct name, base); \
63  }
70 
71 
74 static unsigned
75 bitcount32(uint32_t x)
76 {
77 
78  /* Count two-bit groups. */
79  x -= (x >> 1) & UINT32_C(0x55555555);
80 
81  /* Count four-bit groups. */
82  x = ((x >> 2) & UINT32_C(0x33333333)) + (x & UINT32_C(0x33333333));
83 
84  /* Count eight-bit groups. */
85  x = (x + (x >> 4)) & UINT32_C(0x0f0f0f0f);
86 
87  /* Sum all eight-bit groups, and extract the sum. */
88  return (x * UINT32_C(0x01010101)) >> 24;
89 }
90 
94 static unsigned
95 clz32(uint32_t x)
96 {
97 
98  /* Round up to a power of two. */
99  x |= x >> 1;
100  x |= x >> 2;
101  x |= x >> 4;
102  x |= x >> 8;
103  x |= x >> 16;
104 
105  /* Subtract count of one bits from 32. */
106  return (32 - bitcount32(x));
107 }
108 
109 /*
110  * Some lemmas that will be used throughout this file to prove various error
111  * bounds:
112  *
113  * Lemma 1. If |d| <= 1/2, then 1/(1 + d) <= 2.
114  *
115  * Proof. If 0 <= d <= 1/2, then 1 + d >= 1, so that 1/(1 + d) <= 1.
116  * If -1/2 <= d <= 0, then 1 + d >= 1/2, so that 1/(1 + d) <= 2. QED.
117  *
118  * Lemma 2. If b = a*(1 + d)/(1 + d') for |d'| < 1/2 and nonzero a, b,
119  * then b = a*(1 + e) for |e| <= 2|d' - d|.
120  *
121  * Proof. |a - b|/|a|
122  * = |a - a*(1 + d)/(1 + d')|/|a|
123  * = |1 - (1 + d)/(1 + d')|
124  * = |(1 + d' - 1 - d)/(1 + d')|
125  * = |(d' - d)/(1 + d')|
126  * <= 2|d' - d|, by Lemma 1,
127  *
128  * QED.
129  *
130  * Lemma 3. For |d|, |d'| < 1/4,
131  *
132  * |log((1 + d)/(1 + d'))| <= 4|d - d'|.
133  *
134  * Proof. Write
135  *
136  * log((1 + d)/(1 + d'))
137  * = log(1 + (1 + d)/(1 + d') - 1)
138  * = log(1 + (1 + d - 1 - d')/(1 + d')
139  * = log(1 + (d - d')/(1 + d')).
140  *
141  * By Lemma 1, |(d - d')/(1 + d')| < 2|d' - d| < 1, so the Taylor
142  * series of log(1 + x) converges absolutely for (d - d')/(1 + d'),
143  * and thus we have
144  *
145  * |log(1 + (d - d')/(1 + d'))|
146  * = |\sum_{n=1}^\infty ((d - d')/(1 + d'))^n/n|
147  * <= \sum_{n=1}^\infty |(d - d')/(1 + d')|^n/n
148  * <= \sum_{n=1}^\infty |2(d' - d)|^n/n
149  * <= \sum_{n=1}^\infty |2(d' - d)|^n
150  * = 1/(1 - |2(d' - d)|)
151  * <= 4|d' - d|,
152  *
153  * QED.
154  *
155  * Lemma 4. If 1/e <= 1 + x <= e, then
156  *
157  * log(1 + (1 + d) x) = (1 + d') log(1 + x)
158  *
159  * for |d'| < 8|d|.
160  *
161  * Proof. Write
162  *
163  * log(1 + (1 + d) x)
164  * = log(1 + x + x*d)
165  * = log((1 + x) (1 + x + x*d)/(1 + x))
166  * = log(1 + x) + log((1 + x + x*d)/(1 + x))
167  * = log(1 + x) (1 + log((1 + x + x*d)/(1 + x))/log(1 + x)).
168  *
169  * The relative error is bounded by
170  *
171  * |log((1 + x + x*d)/(1 + x))/log(1 + x)|
172  * <= 4|x + x*d - x|/|log(1 + x)|, by Lemma 3,
173  * = 4|x*d|/|log(1 + x)|
174  * < 8|d|,
175  *
176  * since in this range 0 < 1 - 1/e < x/log(1 + x) <= e - 1 < 2. QED.
177  */
178 
190 STATIC double
191 logistic(double x)
192 {
193  if (x <= log(DBL_EPSILON/2)) {
194  /*
195  * If x <= log(DBL_EPSILON/2) = log(eps), then e^x <= eps. In this case
196  * we will approximate the logistic() function with e^x because the
197  * relative error is less than eps. Here is a calculation of the
198  * relative error between the logistic() function and e^x and a proof
199  * that it's less than eps:
200  *
201  * |e^x - e^x/(1 + e^x)|/|e^x/(1 + e^x)|
202  * <= |1 - 1/(1 + e^x)|*|1 + e^x|
203  * = |e^x/(1 + e^x)|*|1 + e^x|
204  * = |e^x|
205  * <= eps.
206  */
207  return exp(x); /* return e^x */
208  } else if (x <= -log(DBL_EPSILON/2)) {
209  /*
210  * e^{-x} > 0, so 1 + e^{-x} > 1, and 0 < 1/(1 +
211  * e^{-x}) < 1; further, since e^{-x} < 1 + e^{-x}, we
212  * also have 0 < 1/(1 + e^{-x}) < 1. Thus, if exp has
213  * relative error d0, + has relative error d1, and /
214  * has relative error d2, then we get
215  *
216  * (1 + d2)/[(1 + (1 + d0) e^{-x})(1 + d1)]
217  * = (1 + d0)/[1 + e^{-x} + d0 e^{-x}
218  * + d1 + d1 e^{-x} + d0 d1 e^{-x}]
219  * = (1 + d0)/[(1 + e^{-x})
220  * * (1 + d0 e^{-x}/(1 + e^{-x})
221  * + d1/(1 + e^{-x})
222  * + d0 d1 e^{-x}/(1 + e^{-x}))].
223  * = (1 + d0)/[(1 + e^{-x})(1 + d')]
224  * = [1/(1 + e^{-x})] (1 + d0)/(1 + d')
225  *
226  * where
227  *
228  * d' = d0 e^{-x}/(1 + e^{-x})
229  * + d1/(1 + e^{-x})
230  * + d0 d1 e^{-x}/(1 + e^{-x}).
231  *
232  * By Lemma 2 this relative error is bounded by
233  *
234  * 2|d0 - d'|
235  * = 2|d0 - d0 e^{-x}/(1 + e^{-x})
236  * - d1/(1 + e^{-x})
237  * - d0 d1 e^{-x}/(1 + e^{-x})|
238  * <= 2|d0| + 2|d0 e^{-x}/(1 + e^{-x})|
239  * + 2|d1/(1 + e^{-x})|
240  * + 2|d0 d1 e^{-x}/(1 + e^{-x})|
241  * <= 2|d0| + 2|d0| + 2|d1| + 2|d0 d1|
242  * <= 4|d0| + 2|d1| + 2|d0 d1|
243  * <= 6 eps + 2 eps^2.
244  */
245  return 1/(1 + exp(-x));
246  } else {
247  /*
248  * e^{-x} <= eps, so the relative error of 1 from 1/(1
249  * + e^{-x}) is
250  *
251  * |1/(1 + e^{-x}) - 1|/|1/(1 + e^{-x})|
252  * = |e^{-x}/(1 + e^{-x})|/|1/(1 + e^{-x})|
253  * = |e^{-x}|
254  * <= eps.
255  *
256  * This computation avoids an intermediate overflow
257  * exception, although the effect on the result is
258  * harmless.
259  *
260  * XXX Should maybe raise inexact here.
261  */
262  return 1;
263  }
264 }
265 
277 STATIC double
278 logit(double p)
279 {
280 
281  /* logistic(-1) <= p <= logistic(+1) */
282  if (1/(1 + exp(1)) <= p && p <= 1/(1 + exp(-1))) {
283  /*
284  * For inputs near 1/2, we want to compute log1p(near
285  * 0) rather than log(near 1), so write this as:
286  *
287  * log(p/(1 - p)) = -log((1 - p)/p)
288  * = -log(1 + (1 - p)/p - 1)
289  * = -log(1 + (1 - p - p)/p)
290  * = -log(1 + (1 - 2p)/p).
291  *
292  * Since p = 2p/2 <= 1 <= 2*2p = 4p, the floating-point
293  * evaluation of 1 - 2p is exact; the only error arises
294  * from division and log1p. First, note that if
295  * logistic(-1) <= p <= logistic(+1), (1 - 2p)/p lies
296  * in the bounds of Lemma 4.
297  *
298  * If division has relative error d0 and log1p has
299  * relative error d1, the outcome is
300  *
301  * -(1 + d1) log(1 + (1 - 2p) (1 + d0)/p)
302  * = -(1 + d1) (1 + d') log(1 + (1 - 2p)/p)
303  * = -(1 + d1 + d' + d1 d') log(1 + (1 - 2p)/p).
304  *
305  * where |d'| < 8|d0| by Lemma 4. The relative error
306  * is then bounded by
307  *
308  * |d1 + d' + d1 d'|
309  * <= |d1| + 8|d0| + 8|d1 d0|
310  * <= 9 eps + 8 eps^2.
311  */
312  return -log1p((1 - 2*p)/p);
313  } else {
314  /*
315  * For inputs near 0, although 1 - p may be rounded to
316  * 1, it doesn't matter much because the magnitude of
317  * the result is so much larger. For inputs near 1, we
318  * can compute 1 - p exactly, although the precision on
319  * the input is limited so we won't ever get more than
320  * about 700 for the output.
321  *
322  * If - has relative error d0, / has relative error d1,
323  * and log has relative error d2, then
324  *
325  * (1 + d2) log((1 + d0) p/[(1 - p)(1 + d1)])
326  * = (1 + d2) [log(p/(1 - p)) + log((1 + d0)/(1 + d1))]
327  * = log(p/(1 - p)) + d2 log(p/(1 - p))
328  * + (1 + d2) log((1 + d0)/(1 + d1))
329  * = log(p/(1 - p))*[1 + d2 +
330  * + (1 + d2) log((1 + d0)/(1 + d1))/log(p/(1 - p))]
331  *
332  * Since 0 <= p < logistic(-1) or logistic(+1) < p <=
333  * 1, we have |log(p/(1 - p))| > 1. Hence this error
334  * is bounded by
335  *
336  * |d2 + (1 + d2) log((1 + d0)/(1 + d1))/log(p/(1 - p))|
337  * <= |d2| + |(1 + d2) log((1 + d0)/(1 + d1))
338  * / log(p/(1 - p))|
339  * <= |d2| + |(1 + d2) log((1 + d0)/(1 + d1))|
340  * <= |d2| + 4|(1 + d2) (d0 - d1)|, by Lemma 3,
341  * <= |d2| + 4|d0 - d1 + d2 d0 - d1 d0|
342  * <= |d2| + 4|d0| + 4|d1| + 4|d2 d0| + 4|d1 d0|
343  * <= 9 eps + 8 eps^2.
344  */
345  return log(p/(1 - p));
346  }
347 }
348 
357 STATIC double
358 logithalf(double p0)
359 {
360 
361  if (fabs(p0) <= 0.5 - 1/(1 + exp(1))) {
362  /*
363  * logit(1/2 + p0)
364  * = log((1/2 + p0)/(1 - (1/2 + p0)))
365  * = log((1/2 + p0)/(1/2 - p0))
366  * = log(1 + (1/2 + p0)/(1/2 - p0) - 1)
367  * = log(1 + (1/2 + p0 - (1/2 - p0))/(1/2 - p0))
368  * = log(1 + (1/2 + p0 - 1/2 + p0)/(1/2 - p0))
369  * = log(1 + 2 p0/(1/2 - p0))
370  *
371  * If the error of subtraction is d0, the error of
372  * division is d1, and the error of log1p is d2, then
373  * what we compute is
374  *
375  * (1 + d2) log(1 + (1 + d1) 2 p0/[(1 + d0) (1/2 - p0)])
376  * = (1 + d2) log(1 + (1 + d') 2 p0/(1/2 - p0))
377  * = (1 + d2) (1 + d'') log(1 + 2 p0/(1/2 - p0))
378  * = (1 + d2 + d'' + d2 d'') log(1 + 2 p0/(1/2 - p0)),
379  *
380  * where |d'| < 2|d0 - d1| <= 4 eps by Lemma 2, and
381  * |d''| < 8|d'| < 32 eps by Lemma 4 since
382  *
383  * 1/e <= 1 + 2*p0/(1/2 - p0) <= e
384  *
385  * when |p0| <= 1/2 - 1/(1 + e). Hence the relative
386  * error is bounded by
387  *
388  * |d2 + d'' + d2 d''|
389  * <= |d2| + |d''| + |d2 d''|
390  * <= |d1| + 32 |d0| + 32 |d1 d0|
391  * <= 33 eps + 32 eps^2.
392  */
393  return log1p(2*p0/(0.5 - p0));
394  } else {
395  /*
396  * We have a choice of computing logit(1/2 + p0) or
397  * -logit(1 - (1/2 + p0)) = -logit(1/2 - p0). It
398  * doesn't matter which way we do this: either way,
399  * since 1/2 p0 <= 1/2 <= 2 p0, the sum and difference
400  * are computed exactly. So let's do the one that
401  * skips the final negation.
402  *
403  * The result is
404  *
405  * (1 + d1) log((1 + d0) (1/2 + p0)/[(1 + d2) (1/2 - p0)])
406  * = (1 + d1) (1 + log((1 + d0)/(1 + d2))
407  * / log((1/2 + p0)/(1/2 - p0)))
408  * * log((1/2 + p0)/(1/2 - p0))
409  * = (1 + d') log((1/2 + p0)/(1/2 - p0))
410  * = (1 + d') logit(1/2 + p0)
411  *
412  * where
413  *
414  * d' = d1 + log((1 + d0)/(1 + d2))/logit(1/2 + p0)
415  * + d1 log((1 + d0)/(1 + d2))/logit(1/2 + p0).
416  *
417  * For |p| > 1/2 - 1/(1 + e), logit(1/2 + p0) > 1.
418  * Provided |d0|, |d2| < 1/4, by Lemma 3 we have
419  *
420  * |log((1 + d0)/(1 + d2))| <= 4|d0 - d2|.
421  *
422  * Hence the relative error is bounded by
423  *
424  * |d'| <= |d1| + 4|d0 - d2| + 4|d1| |d0 - d2|
425  * <= |d1| + 4|d0| + 4|d2| + 4|d1 d0| + 4|d1 d2|
426  * <= 9 eps + 8 eps^2.
427  */
428  return log((0.5 + p0)/(0.5 - p0));
429  }
430 }
431 
432 /*
433  * The following random_uniform_01 is tailored for IEEE 754 binary64
434  * floating-point or smaller. It can be adapted to larger
435  * floating-point formats like i387 80-bit or IEEE 754 binary128, but
436  * it may require sampling more bits.
437  */
438 CTASSERT(FLT_RADIX == 2);
439 CTASSERT(-DBL_MIN_EXP <= 1021);
440 CTASSERT(DBL_MANT_DIG <= 53);
441 
450 STATIC double
452 {
453  uint32_t z, x, hi, lo;
454  double s;
455 
456  /*
457  * Draw an exponent, geometrically distributed, but give up if
458  * we get a run of more than 1088 zeros, which really means the
459  * system is broken.
460  */
461  z = 0;
462  while ((x = crypto_fast_rng_get_u32(get_thread_fast_rng())) == 0) {
463  if (z >= 1088)
464  /* Your bit sampler is broken. Go home. */
465  return 0;
466  z += 32;
467  }
468  z += clz32(x);
469 
470  /*
471  * Pick 32-bit halves of an odd normalized significand.
472  * Picking it odd breaks ties in the subsequent rounding, which
473  * occur only with measure zero in the uniform distribution on
474  * [0, 1].
475  */
476  hi = crypto_fast_rng_get_u32(get_thread_fast_rng()) | UINT32_C(0x80000000);
477  lo = crypto_fast_rng_get_u32(get_thread_fast_rng()) | UINT32_C(0x00000001);
478 
479  /* Round to nearest scaled significand in [2^63, 2^64]. */
480  s = hi*(double)4294967296 + lo;
481 
482  /* Rescale into [1/2, 1] and apply exponent in one swell foop. */
483  return s * ldexp(1, -(64 + z));
484 }
485 
486 /*******************************************************************/
487 
488 /* Functions for specific probability distributions start here: */
489 
490 /*
491  * Logistic(mu, sigma) distribution, supported on (-\infty,+\infty)
492  *
493  * This is the uniform distribution on [0,1] mapped into log-odds
494  * space, scaled by sigma and translated by mu.
495  *
496  * pdf(x) = e^{-(x - mu)/sigma} sigma (1 + e^{-(x - mu)/sigma})^2
497  * cdf(x) = 1/(1 + e^{-(x - mu)/sigma}) = logistic((x - mu)/sigma)
498  * sf(x) = 1 - cdf(x) = 1 - logistic((x - mu)/sigma = logistic(-(x - mu)/sigma)
499  * icdf(p) = mu + sigma log p/(1 - p) = mu + sigma logit(p)
500  * isf(p) = mu + sigma log (1 - p)/p = mu - sigma logit(p)
501  */
502 
508 STATIC double
509 cdf_logistic(double x, double mu, double sigma)
510 {
511  return logistic((x - mu)/sigma);
512 }
513 
520 STATIC double
521 sf_logistic(double x, double mu, double sigma)
522 {
523  return logistic(-(x - mu)/sigma);
524 }
525 
531 STATIC double
532 icdf_logistic(double p, double mu, double sigma)
533 {
534  return mu + sigma*logit(p);
535 }
536 
542 STATIC double
543 isf_logistic(double p, double mu, double sigma)
544 {
545  return mu - sigma*logit(p);
546 }
547 
548 /*
549  * LogLogistic(alpha, beta) distribution, supported on (0, +\infty).
550  *
551  * This is the uniform distribution on [0,1] mapped into odds space,
552  * scaled by positive alpha and shaped by positive beta.
553  *
554  * Equivalent to computing exp of a Logistic(log alpha, 1/beta) sample.
555  * (Name arises because the pdf has LogLogistic(x; alpha, beta) =
556  * Logistic(log x; log alpha, 1/beta) and mathematicians got their
557  * covariance contravariant.)
558  *
559  * pdf(x) = (beta/alpha) (x/alpha)^{beta - 1}/(1 + (x/alpha)^beta)^2
560  * = (1/e^mu sigma) (x/e^mu)^{1/sigma - 1} /
561  * (1 + (x/e^mu)^{1/sigma})^2
562  * cdf(x) = 1/(1 + (x/alpha)^-beta) = 1/(1 + (x/e^mu)^{-1/sigma})
563  * = 1/(1 + (e^{log x}/e^mu)^{-1/sigma})
564  * = 1/(1 + (e^{log x - mu})^{-1/sigma})
565  * = 1/(1 + e^{-(log x - mu)/sigma})
566  * = logistic((log x - mu)/sigma)
567  * = logistic((log x - log alpha)/(1/beta))
568  * sf(x) = 1 - 1/(1 + (x/alpha)^-beta)
569  * = (x/alpha)^-beta/(1 + (x/alpha)^-beta)
570  * = 1/((x/alpha)^beta + 1)
571  * = 1/(1 + (x/alpha)^beta)
572  * icdf(p) = alpha (p/(1 - p))^{1/beta}
573  * = alpha e^{logit(p)/beta}
574  * = e^{mu + sigma logit(p)}
575  * isf(p) = alpha ((1 - p)/p)^{1/beta}
576  * = alpha e^{-logit(p)/beta}
577  * = e^{mu - sigma logit(p)}
578  */
579 
595 STATIC double
596 cdf_log_logistic(double x, double alpha, double beta)
597 {
598  /*
599  * Let d0 be the error of x/alpha; d1, of pow; d2, of +; and
600  * d3, of the final quotient. The exponentiation gives
601  *
602  * ((1 + d0) x/alpha)^{-beta}
603  * = (x/alpha)^{-beta} (1 + d0)^{-beta}
604  * = (x/alpha)^{-beta} (1 + (1 + d0)^{-beta} - 1)
605  * = (x/alpha)^{-beta} (1 + d')
606  *
607  * where d' = (1 + d0)^{-beta} - 1. If y = (x/alpha)^{-beta},
608  * the denominator is
609  *
610  * (1 + d2) (1 + (1 + d1) (1 + d') y)
611  * = (1 + d2) (1 + y + (d1 + d' + d1 d') y)
612  * = 1 + y + (1 + d2) (d1 + d' + d1 d') y
613  * = (1 + y) (1 + (1 + d2) (d1 + d' + d1 d') y/(1 + y))
614  * = (1 + y) (1 + d''),
615  *
616  * where d'' = (1 + d2) (d1 + d' + d1 d') y/(1 + y). The
617  * final result is
618  *
619  * (1 + d3) / [(1 + d2) (1 + d'') (1 + y)]
620  * = (1 + d''') / (1 + y)
621  *
622  * for |d'''| <= 2|d3 - d''| by Lemma 2 as long as |d''| < 1/2
623  * (which may not be the case for very large beta). This
624  * relative error is therefore bounded by
625  *
626  * |d'''|
627  * <= 2|d3 - d''|
628  * <= 2|d3| + 2|(1 + d2) (d1 + d' + d1 d') y/(1 + y)|
629  * <= 2|d3| + 2|(1 + d2) (d1 + d' + d1 d')|
630  * = 2|d3| + 2|d1 + d' + d1 d' + d2 d1 + d2 d' + d2 d1 d'|
631  * <= 2|d3| + 2|d1| + 2|d'| + 2|d1 d'| + 2|d2 d1| + 2|d2 d'|
632  * + 2|d2 d1 d'|
633  * <= 4 eps + 2 eps^2 + (2 + 2 eps + 2 eps^2) |d'|.
634  *
635  * Roughly, |d'| = |(1 + d0)^{-beta} - 1| grows like beta eps,
636  * until it levels off at 1.
637  */
638  return 1/(1 + pow(x/alpha, -beta));
639 }
640 
656 STATIC double
657 sf_log_logistic(double x, double alpha, double beta)
658 {
659  /*
660  * The error analysis here is essentially the same as in
661  * cdf_log_logistic, except that rather than levelling off at
662  * 1, |(1 + d0)^beta - 1| grows unbounded.
663  */
664  return 1/(1 + pow(x/alpha, beta));
665 }
666 
672 STATIC double
673 icdf_log_logistic(double p, double alpha, double beta)
674 {
675  return alpha*pow(p/(1 - p), 1/beta);
676 }
677 
683 STATIC double
684 isf_log_logistic(double p, double alpha, double beta)
685 {
686  return alpha*pow((1 - p)/p, 1/beta);
687 }
688 
689 /*
690  * Weibull(lambda, k) distribution, supported on (0, +\infty).
691  *
692  * pdf(x) = (k/lambda) (x/lambda)^{k - 1} e^{-(x/lambda)^k}
693  * cdf(x) = 1 - e^{-(x/lambda)^k}
694  * icdf(p) = lambda * (-log (1 - p))^{1/k}
695  * sf(x) = e^{-(x/lambda)^k}
696  * isf(p) = lambda * (-log p)^{1/k}
697  */
698 
708 STATIC double
709 cdf_weibull(double x, double lambda, double k)
710 {
711  return -expm1(-pow(x/lambda, k));
712 }
713 
723 STATIC double
724 sf_weibull(double x, double lambda, double k)
725 {
726  return exp(-pow(x/lambda, k));
727 }
728 
736 STATIC double
737 icdf_weibull(double p, double lambda, double k)
738 {
739  return lambda*pow(-log1p(-p), 1/k);
740 }
741 
749 STATIC double
750 isf_weibull(double p, double lambda, double k)
751 {
752  return lambda*pow(-log(p), 1/k);
753 }
754 
755 /*
756  * GeneralizedPareto(mu, sigma, xi), supported on (mu, +\infty) for
757  * nonnegative xi, or (mu, mu - sigma/xi) for negative xi.
758  *
759  * Samples:
760  * = mu - sigma log U, if xi = 0;
761  * = mu + sigma (U^{-xi} - 1)/xi = mu + sigma*expm1(-xi log U)/xi, if xi =/= 0,
762  * where U is uniform on (0,1].
763  * = mu + sigma (e^{xi X} - 1)/xi,
764  * where X has standard exponential distribution.
765  *
766  * pdf(x) = sigma^{-1} (1 + xi (x - mu)/sigma)^{-(1 + 1/xi)}
767  * cdf(x) = 1 - (1 + xi (x - mu)/sigma)^{-1/xi}
768  * = 1 - e^{-log(1 + xi (x - mu)/sigma)/xi}
769  * --> 1 - e^{-(x - mu)/sigma} as xi --> 0
770  * sf(x) = (1 + xi (x - mu)/sigma)^{-1/xi}
771  * --> e^{-(x - mu)/sigma} as xi --> 0
772  * icdf(p) = mu + sigma*(p^{-xi} - 1)/xi
773  * = mu + sigma*expm1(-xi log p)/xi
774  * --> mu + sigma*log p as xi --> 0
775  * isf(p) = mu + sigma*((1 - p)^{xi} - 1)/xi
776  * = mu + sigma*expm1(-xi log1p(-p))/xi
777  * --> mu + sigma*log1p(-p) as xi --> 0
778  */
779 
789 STATIC double
790 cdf_genpareto(double x, double mu, double sigma, double xi)
791 {
792  double x_0 = (x - mu)/sigma;
793 
794  /*
795  * log(1 + xi x_0)/xi
796  * = (-1/xi) \sum_{n=1}^\infty (-xi x_0)^n/n
797  * = (-1/xi) (-xi x_0 + \sum_{n=2}^\infty (-xi x_0)^n/n)
798  * = x_0 - (1/xi) \sum_{n=2}^\infty (-xi x_0)^n/n
799  * = x_0 - x_0 \sum_{n=2}^\infty (-xi x_0)^{n-1}/n
800  * = x_0 (1 - d),
801  *
802  * where d = \sum_{n=2}^\infty (-xi x_0)^{n-1}/n. If |xi| <
803  * eps/4|x_0|, then
804  *
805  * |d| <= \sum_{n=2}^\infty (eps/4)^{n-1}/n
806  * <= \sum_{n=2}^\infty (eps/4)^{n-1}
807  * = \sum_{n=1}^\infty (eps/4)^n
808  * = (eps/4) \sum_{n=0}^\infty (eps/4)^n
809  * = (eps/4)/(1 - eps/4)
810  * < eps/2
811  *
812  * for any 0 < eps < 2. Thus, the relative error of x_0 from
813  * log(1 + xi x_0)/xi is bounded by eps.
814  */
815  if (fabs(xi) < 1e-17/x_0)
816  return -expm1(-x_0);
817  else
818  return -expm1(-log1p(xi*x_0)/xi);
819 }
820 
833 STATIC double
834 sf_genpareto(double x, double mu, double sigma, double xi)
835 {
836  double x_0 = (x - mu)/sigma;
837 
838  if (fabs(xi) < 1e-17/x_0)
839  return exp(-x_0);
840  else
841  return exp(-log1p(xi*x_0)/xi);
842 }
843 
850 STATIC double
851 icdf_genpareto(double p, double mu, double sigma, double xi)
852 {
853  /*
854  * To compute f(xi) = (U^{-xi} - 1)/xi = (e^{-xi log U} - 1)/xi
855  * for xi near zero (note f(xi) --> -log U as xi --> 0), write
856  * the absolutely convergent Taylor expansion
857  *
858  * f(xi) = (1/xi)*(-xi log U + \sum_{n=2}^\infty (-xi log U)^n/n!
859  * = -log U + (1/xi)*\sum_{n=2}^\infty (-xi log U)^n/n!
860  * = -log U + \sum_{n=2}^\infty xi^{n-1} (-log U)^n/n!
861  * = -log U - log U \sum_{n=2}^\infty (-xi log U)^{n-1}/n!
862  * = -log U (1 + \sum_{n=2}^\infty (-xi log U)^{n-1}/n!).
863  *
864  * Let d = \sum_{n=2}^\infty (-xi log U)^{n-1}/n!. What do we
865  * lose if we discard it and use -log U as an approximation to
866  * f(xi)? If |xi| < eps/-4log U, then
867  *
868  * |d| <= \sum_{n=2}^\infty |xi log U|^{n-1}/n!
869  * <= \sum_{n=2}^\infty (eps/4)^{n-1}/n!
870  * <= \sum_{n=1}^\infty (eps/4)^n
871  * = (eps/4) \sum_{n=0}^\infty (eps/4)^n
872  * = (eps/4)/(1 - eps/4)
873  * < eps/2,
874  *
875  * for any 0 < eps < 2. Hence, as long as |xi| < eps/-2log U,
876  * f(xi) = -log U (1 + d) for |d| <= eps/2. |d| is the
877  * relative error of f(xi) from -log U; from this bound, the
878  * relative error of -log U from f(xi) is at most (eps/2)/(1 -
879  * eps/2) = eps/2 + (eps/2)^2 + (eps/2)^3 + ... < eps for 0 <
880  * eps < 1. Since -log U < 1000 for all U in (0, 1] in
881  * binary64 floating-point, we can safely cut xi off at 1e-20 <
882  * eps/4000 and attain <1ulp error from series truncation.
883  */
884  if (fabs(xi) <= 1e-20)
885  return mu - sigma*log1p(-p);
886  else
887  return mu + sigma*expm1(-xi*log1p(-p))/xi;
888 }
889 
896 STATIC double
897 isf_genpareto(double p, double mu, double sigma, double xi)
898 {
899  if (fabs(xi) <= 1e-20)
900  return mu - sigma*log(p);
901  else
902  return mu + sigma*expm1(-xi*log(p))/xi;
903 }
904 
905 /*******************************************************************/
906 
938 STATIC double
939 sample_uniform_interval(double p0, double a, double b)
940 {
941  /*
942  * XXX Prove that the distribution is, in fact, uniform on
943  * [a,b], particularly around p0 = 1, or at least has very
944  * small deviation from uniform, quantified appropriately
945  * (e.g., like in Monahan 1984, or by KL divergence). It
946  * almost certainly does but it would be nice to quantify the
947  * error.
948  */
949  if ((a <= 0 && 0 <= b) || (b <= 0 && 0 <= a)) {
950  /*
951  * When ab < 0, (1 - t) a + t b is monotonic, since for
952  * a <= b it is a sum of nondecreasing functions of t,
953  * and for b <= a, of nonincreasing functions of t.
954  * Further, clearly at 0 and 1 it attains a and b,
955  * respectively. Hence it is bounded within [a, b].
956  */
957  return (1 - p0)*a + p0*b;
958  } else {
959  /*
960  * a + (b - a) t is monotonic -- it is obviously a
961  * nondecreasing function of t for a <= b. Further, it
962  * attains a at 0, and while it may overshoot b at 1,
963  * we have a
964  *
965  * Theorem. If 0 <= t < 1, then the floating-point
966  * evaluation of a + (b - a) t is bounded in [a, b].
967  *
968  * Lemma 1. If 0 <= t < 1 is a floating-point number,
969  * then for any normal floating-point number x except
970  * the smallest in magnitude, |round(x*t)| < |x|.
971  *
972  * Proof. WLOG, assume x >= 0. Since the rounding
973  * function and t |---> x*t are nondecreasing, their
974  * composition t |---> round(x*t) is also
975  * nondecreasing, so it suffices to consider the
976  * largest floating-point number below 1, in particular
977  * t = 1 - ulp(1)/2.
978  *
979  * Case I: If x is a power of two, then the next
980  * floating-point number below x is x - ulp(x)/2 = x -
981  * x*ulp(1)/2 = x*(1 - ulp(1)/2) = x*t, so, since x*t
982  * is a floating-point number, multiplication is exact,
983  * and thus round(x*t) = x*t < x.
984  *
985  * Case II: If x is not a power of two, then the
986  * greatest lower bound of real numbers rounded to x is
987  * x - ulp(x)/2 = x - ulp(T(x))/2 = x - T(x)*ulp(1)/2,
988  * where T(X) is the largest power of two below x.
989  * Anything below this bound is rounded to a
990  * floating-point number smaller than x, and x*t = x*(1
991  * - ulp(1)/2) = x - x*ulp(1)/2 < x - T(x)*ulp(1)/2
992  * since T(x) < x, so round(x*t) < x*t < x. QED.
993  *
994  * Lemma 2. If x and y are subnormal, then round(x +
995  * y) = x + y.
996  *
997  * Proof. It is a matter of adding the significands,
998  * since if we treat subnormals as having an implicit
999  * zero bit before the `binary' point, their exponents
1000  * are all the same. There is at most one carry/borrow
1001  * bit, which can always be acommodated either in a
1002  * subnormal, or, at largest, in the implicit one bit
1003  * of a normal.
1004  *
1005  * Lemma 3. Let x and y be floating-point numbers. If
1006  * round(x - y) is subnormal or zero, then it is equal
1007  * to x - y.
1008  *
1009  * Proof. Case I (equal): round(x - y) = 0 iff x = y;
1010  * hence if round(x - y) = 0, then round(x - y) = 0 = x
1011  * - y.
1012  *
1013  * Case II (subnormal/subnormal): If x and y are both
1014  * subnormal, this follows directly from Lemma 2.
1015  *
1016  * Case IIIa (normal/subnormal): If x is normal and y
1017  * is subnormal, then x and y must share sign, or else
1018  * x - y would be larger than x and thus rounded to
1019  * normal. If s is the smallest normal positive
1020  * floating-point number, |x| < 2s since by
1021  * construction 2s - |y| is normal for all subnormal y.
1022  * This means that x and y must have the same exponent,
1023  * so the difference is the difference of significands,
1024  * which is exact.
1025  *
1026  * Case IIIb (subnormal/normal): Same as case IIIa for
1027  * -(y - x).
1028  *
1029  * Case IV (normal/normal): If x and y are both normal,
1030  * then they must share sign, or else x - y would be
1031  * larger than x and thus rounded to normal. Note that
1032  * |y| < 2|x|, for if |y| >= 2|x|, then |x| - |y| <=
1033  * -|x| but -|x| is normal like x. Also, |x|/2 < |y|:
1034  * if |x|/2 is subnormal, it must hold because y is
1035  * normal; if |x|/2 is normal, then |x|/2 >= s, so
1036  * since |x| - |y| < s,
1037  *
1038  * |x|/2 = |x| - |x|/2 <= |x| - s <= |y|;
1039  *
1040  * that is, |x|/2 < |y| < 2|x|, so by the Sterbenz
1041  * lemma, round(x - y) = x - y. QED.
1042  *
1043  * Proof of theorem. WLOG, assume 0 <= a <= b. Since
1044  * round(a + round(round(b - a)*t) is nondecreasing in
1045  * t and attains a at 0, the lower end of the bound is
1046  * trivial; we must show the upper end of the bound
1047  * strictly. It suffices to show this for the largest
1048  * floating-point number below 1, namely 1 - ulp(1)/2.
1049  *
1050  * Case I: round(b - a) is normal. Then it is at most
1051  * the smallest floating-point number above b - a. By
1052  * Lemma 1, round(round(b - a)*t) < round(b - a).
1053  * Since the inequality is strict, and since
1054  * round(round(b - a)*t) is a floating-point number
1055  * below round(b - a), and since there are no
1056  * floating-point numbers between b - a and round(b -
1057  * a), we must have round(round(b - a)*t) < b - a.
1058  * Then since y |---> round(a + y) is nondecreasing, we
1059  * must have
1060  *
1061  * round(a + round(round(b - a)*t))
1062  * <= round(a + (b - a))
1063  * = round(b) = b.
1064  *
1065  * Case II: round(b - a) is subnormal. In this case,
1066  * Lemma 1 falls apart -- we are not guaranteed the
1067  * strict inequality. However, by Lemma 3, the
1068  * difference is exact: round(b - a) = b - a. Thus,
1069  *
1070  * round(a + round(round(b - a)*t))
1071  * <= round(a + round((b - a)*t))
1072  * <= round(a + (b - a))
1073  * = round(b)
1074  * = b,
1075  *
1076  * QED.
1077  */
1078 
1079  /* p0 is restricted to [0,1], but we use >= to silence -Wfloat-equal. */
1080  if (p0 >= 1)
1081  return b;
1082  return a + (b - a)*p0;
1083  }
1084 }
1085 
1091 STATIC double
1092 sample_logistic(uint32_t s, double t, double p0)
1093 {
1094  double sign = (s & 1) ? -1 : +1;
1095  double r;
1096 
1097  /*
1098  * We carve up the interval (0, 1) into subregions to compute
1099  * the inverse CDF precisely:
1100  *
1101  * A = (0, 1/(1 + e)] ---> (-\infty, -1]
1102  * B = [1/(1 + e), 1/2] ---> [-1, 0]
1103  * C = [1/2, 1 - 1/(1 + e)] ---> [0, 1]
1104  * D = [1 - 1/(1 + e), 1) ---> [1, +\infty)
1105  *
1106  * Cases D and C are mirror images of cases A and B,
1107  * respectively, so we choose between them by the sign chosen
1108  * by a fair coin toss. We choose between cases A and B by a
1109  * coin toss weighted by
1110  *
1111  * 2/(1 + e) = 1 - [1/2 - 1/(1 + e)]/(1/2):
1112  *
1113  * if it comes up heads, scale p0 into a uniform (0, 1/(1 + e)]
1114  * sample p; if it comes up tails, scale p0 into a uniform (0,
1115  * 1/2 - 1/(1 + e)] sample and compute the inverse CDF of p =
1116  * 1/2 - p0.
1117  */
1118  if (t <= 2/(1 + exp(1))) {
1119  /* p uniform in (0, 1/(1 + e)], represented by p. */
1120  p0 /= 1 + exp(1);
1121  r = logit(p0);
1122  } else {
1123  /*
1124  * p uniform in [1/(1 + e), 1/2), actually represented
1125  * by p0 = 1/2 - p uniform in (0, 1/2 - 1/(1 + e)], so
1126  * that p = 1/2 - p.
1127  */
1128  p0 *= 0.5 - 1/(1 + exp(1));
1129  r = logithalf(p0);
1130  }
1131 
1132  /*
1133  * We have chosen from the negative half of the standard
1134  * logistic distribution, which is symmetric with the positive
1135  * half. Now use the sign to choose uniformly between them.
1136  */
1137  return sign*r;
1138 }
1139 
1144 static double
1145 sample_logistic_locscale(uint32_t s, double t, double p0, double mu,
1146  double sigma)
1147 {
1148 
1149  return mu + sigma*sample_logistic(s, t, p0);
1150 }
1151 
1157 STATIC double
1158 sample_log_logistic(uint32_t s, double p0)
1159 {
1160 
1161  /*
1162  * Carve up the interval (0, 1) into (0, 1/2] and [1/2, 1); the
1163  * condition numbers of the icdf and the isf coincide at 1/2.
1164  */
1165  p0 *= 0.5;
1166  if ((s & 1) == 0) {
1167  /* p = p0 in (0, 1/2] */
1168  return p0/(1 - p0);
1169  } else {
1170  /* p = 1 - p0 in [1/2, 1) */
1171  return (1 - p0)/p0;
1172  }
1173 }
1174 
1179 static double
1180 sample_log_logistic_scaleshape(uint32_t s, double p0, double alpha,
1181  double beta)
1182 {
1183  double x = sample_log_logistic(s, p0);
1184 
1185  return alpha*pow(x, 1/beta);
1186 }
1187 
1193 static double
1194 sample_exponential(uint32_t s, double p0)
1195 {
1196  /*
1197  * We would like to evaluate log(p) for p near 0, and log1p(-p)
1198  * for p near 1. Simply carve the interval into (0, 1/2] and
1199  * [1/2, 1) by a fair coin toss.
1200  */
1201  p0 *= 0.5;
1202  if ((s & 1) == 0)
1203  /* p = p0 in (0, 1/2] */
1204  return -log(p0);
1205  else
1206  /* p = 1 - p0 in [1/2, 1) */
1207  return -log1p(-p0);
1208 }
1209 
1216 STATIC double
1217 sample_weibull(uint32_t s, double p0, double lambda, double k)
1218 {
1219 
1220  return lambda*pow(sample_exponential(s, p0), 1/k);
1221 }
1222 
1228 STATIC double
1229 sample_genpareto(uint32_t s, double p0, double xi)
1230 {
1231  double x = sample_exponential(s, p0);
1232 
1233  /*
1234  * Write f(xi) = (e^{xi x} - 1)/xi for xi near zero as the
1235  * absolutely convergent Taylor series
1236  *
1237  * f(x) = (1/xi) (xi x + \sum_{n=2}^\infty (xi x)^n/n!)
1238  * = x + (1/xi) \sum_{n=2}^\inty (xi x)^n/n!
1239  * = x + \sum_{n=2}^\infty xi^{n-1} x^n/n!
1240  * = x + x \sum_{n=2}^\infty (xi x)^{n-1}/n!
1241  * = x (1 + \sum_{n=2}^\infty (xi x)^{n-1}/n!).
1242  *
1243  * d = \sum_{n=2}^\infty (xi x)^{n-1}/n! is the relative error
1244  * of f(x) from x. If |xi| < eps/4x, then
1245  *
1246  * |d| <= \sum_{n=2}^\infty |xi x|^{n-1}/n!
1247  * <= \sum_{n=2}^\infty (eps/4)^{n-1}/n!
1248  * <= \sum_{n=1}^\infty (eps/4)
1249  * = (eps/4) \sum_{n=0}^\infty (eps/4)^n
1250  * = (eps/4)/(1 - eps/4)
1251  * < eps/2,
1252  *
1253  * for any 0 < eps < 2. Hence, as long as |xi| < eps/2x, f(xi)
1254  * = x (1 + d) for |d| <= eps/2, so x = f(xi) (1 + d') for |d'|
1255  * <= eps. What bound should we use for x?
1256  *
1257  * - If x is exponentially distributed, x > 200 with
1258  * probability below e^{-200} << 2^{-256}, i.e. never.
1259  *
1260  * - If x is computed by -log(U) for U in (0, 1], x is
1261  * guaranteed to be below 1000 in IEEE 754 binary64
1262  * floating-point.
1263  *
1264  * We can safely cut xi off at 1e-20 < eps/4000 and attain an
1265  * error bounded by 0.5 ulp for this expression.
1266  */
1267  return (fabs(xi) < 1e-20 ? x : expm1(xi*x)/xi);
1268 }
1269 
1274 static double
1275 sample_genpareto_locscale(uint32_t s, double p0, double mu, double sigma,
1276  double xi)
1277 {
1278 
1279  return mu + sigma*sample_genpareto(s, p0, xi);
1280 }
1281 
1302 static double
1303 sample_geometric(uint32_t s, double p0, double p)
1304 {
1305  double x = sample_exponential(s, p0);
1306 
1307  /* This is actually a check against 1, but we do >= so that the compiler
1308  does not raise a -Wfloat-equal */
1309  if (p >= 1)
1310  return 1;
1311 
1312  return ceil(-x/log1p(-p));
1313 }
1314 
1315 /*******************************************************************/
1316 
1326 const char *
1327 dist_name(const struct dist *dist)
1328 {
1329  return dist->ops->name;
1330 }
1331 
1332 /* Sample a value from <b>dist</b> and return it. */
1333 double
1334 dist_sample(const struct dist *dist)
1335 {
1336  return dist->ops->sample(dist);
1337 }
1338 
1340 double
1341 dist_cdf(const struct dist *dist, double x)
1342 {
1343  return dist->ops->cdf(dist, x);
1344 }
1345 
1347 double
1348 dist_sf(const struct dist *dist, double x)
1349 {
1350  return dist->ops->sf(dist, x);
1351 }
1352 
1354 double
1355 dist_icdf(const struct dist *dist, double p)
1356 {
1357  return dist->ops->icdf(dist, p);
1358 }
1359 
1361 double
1362 dist_isf(const struct dist *dist, double p)
1363 {
1364  return dist->ops->isf(dist, p);
1365 }
1366 
1369 static double
1370 uniform_sample(const struct dist *dist)
1371 {
1372  const struct uniform *U = dist_to_const_uniform(dist);
1373  double p0 = random_uniform_01();
1374 
1375  return sample_uniform_interval(p0, U->a, U->b);
1376 }
1377 
1378 static double
1379 uniform_cdf(const struct dist *dist, double x)
1380 {
1381  const struct uniform *U = dist_to_const_uniform(dist);
1382  if (x < U->a)
1383  return 0;
1384  else if (x < U->b)
1385  return (x - U->a)/(U->b - U->a);
1386  else
1387  return 1;
1388 }
1389 
1390 static double
1391 uniform_sf(const struct dist *dist, double x)
1392 {
1393  const struct uniform *U = dist_to_const_uniform(dist);
1394 
1395  if (x > U->b)
1396  return 0;
1397  else if (x > U->a)
1398  return (U->b - x)/(U->b - U->a);
1399  else
1400  return 1;
1401 }
1402 
1403 static double
1404 uniform_icdf(const struct dist *dist, double p)
1405 {
1406  const struct uniform *U = dist_to_const_uniform(dist);
1407  double w = U->b - U->a;
1408 
1409  return (p < 0.5 ? (U->a + w*p) : (U->b - w*(1 - p)));
1410 }
1411 
1412 static double
1413 uniform_isf(const struct dist *dist, double p)
1414 {
1415  const struct uniform *U = dist_to_const_uniform(dist);
1416  double w = U->b - U->a;
1417 
1418  return (p < 0.5 ? (U->b - w*p) : (U->a + w*(1 - p)));
1419 }
1420 
1421 const struct dist_ops uniform_ops = {
1422  .name = "uniform",
1423  .sample = uniform_sample,
1424  .cdf = uniform_cdf,
1425  .sf = uniform_sf,
1426  .icdf = uniform_icdf,
1427  .isf = uniform_isf,
1428 };
1429 
1430 /*******************************************************************/
1431 
1436 static double
1437 logistic_sample(const struct dist *dist)
1438 {
1439  const struct logistic *L = dist_to_const_logistic(dist);
1441  double t = random_uniform_01();
1442  double p0 = random_uniform_01();
1443 
1444  return sample_logistic_locscale(s, t, p0, L->mu, L->sigma);
1445 }
1446 
1447 static double
1448 logistic_cdf(const struct dist *dist, double x)
1449 {
1450  const struct logistic *L = dist_to_const_logistic(dist);
1451  return cdf_logistic(x, L->mu, L->sigma);
1452 }
1453 
1454 static double
1455 logistic_sf(const struct dist *dist, double x)
1456 {
1457  const struct logistic *L = dist_to_const_logistic(dist);
1458  return sf_logistic(x, L->mu, L->sigma);
1459 }
1460 
1461 static double
1462 logistic_icdf(const struct dist *dist, double p)
1463 {
1464  const struct logistic *L = dist_to_const_logistic(dist);
1465  return icdf_logistic(p, L->mu, L->sigma);
1466 }
1467 
1468 static double
1469 logistic_isf(const struct dist *dist, double p)
1470 {
1471  const struct logistic *L = dist_to_const_logistic(dist);
1472  return isf_logistic(p, L->mu, L->sigma);
1473 }
1474 
1475 const struct dist_ops logistic_ops = {
1476  .name = "logistic",
1477  .sample = logistic_sample,
1478  .cdf = logistic_cdf,
1479  .sf = logistic_sf,
1480  .icdf = logistic_icdf,
1481  .isf = logistic_isf,
1482 };
1483 
1486 static double
1488 {
1489  const struct log_logistic *LL = dist_to_const_log_logistic(dist);
1491  double p0 = random_uniform_01();
1492 
1493  return sample_log_logistic_scaleshape(s, p0, LL->alpha, LL->beta);
1494 }
1495 
1496 static double
1497 log_logistic_cdf(const struct dist *dist, double x)
1498 {
1499  const struct log_logistic *LL = dist_to_const_log_logistic(dist);
1500  return cdf_log_logistic(x, LL->alpha, LL->beta);
1501 }
1502 
1503 static double
1504 log_logistic_sf(const struct dist *dist, double x)
1505 {
1506  const struct log_logistic *LL = dist_to_const_log_logistic(dist);
1507  return sf_log_logistic(x, LL->alpha, LL->beta);
1508 }
1509 
1510 static double
1511 log_logistic_icdf(const struct dist *dist, double p)
1512 {
1513  const struct log_logistic *LL = dist_to_const_log_logistic(dist);
1514  return icdf_log_logistic(p, LL->alpha, LL->beta);
1515 }
1516 
1517 static double
1518 log_logistic_isf(const struct dist *dist, double p)
1519 {
1520  const struct log_logistic *LL = dist_to_const_log_logistic(dist);
1521  return isf_log_logistic(p, LL->alpha, LL->beta);
1522 }
1523 
1524 const struct dist_ops log_logistic_ops = {
1525  .name = "log logistic",
1526  .sample = log_logistic_sample,
1527  .cdf = log_logistic_cdf,
1528  .sf = log_logistic_sf,
1529  .icdf = log_logistic_icdf,
1530  .isf = log_logistic_isf,
1531 };
1532 
1535 static double
1536 weibull_sample(const struct dist *dist)
1537 {
1538  const struct weibull *W = dist_to_const_weibull(dist);
1540  double p0 = random_uniform_01();
1541 
1542  return sample_weibull(s, p0, W->lambda, W->k);
1543 }
1544 
1545 static double
1546 weibull_cdf(const struct dist *dist, double x)
1547 {
1548  const struct weibull *W = dist_to_const_weibull(dist);
1549  return cdf_weibull(x, W->lambda, W->k);
1550 }
1551 
1552 static double
1553 weibull_sf(const struct dist *dist, double x)
1554 {
1555  const struct weibull *W = dist_to_const_weibull(dist);
1556  return sf_weibull(x, W->lambda, W->k);
1557 }
1558 
1559 static double
1560 weibull_icdf(const struct dist *dist, double p)
1561 {
1562  const struct weibull *W = dist_to_const_weibull(dist);
1563  return icdf_weibull(p, W->lambda, W->k);
1564 }
1565 
1566 static double
1567 weibull_isf(const struct dist *dist, double p)
1568 {
1569  const struct weibull *W = dist_to_const_weibull(dist);
1570  return isf_weibull(p, W->lambda, W->k);
1571 }
1572 
1573 const struct dist_ops weibull_ops = {
1574  .name = "Weibull",
1575  .sample = weibull_sample,
1576  .cdf = weibull_cdf,
1577  .sf = weibull_sf,
1578  .icdf = weibull_icdf,
1579  .isf = weibull_isf,
1580 };
1581 
1584 static double
1586 {
1587  const struct genpareto *GP = dist_to_const_genpareto(dist);
1589  double p0 = random_uniform_01();
1590 
1591  return sample_genpareto_locscale(s, p0, GP->mu, GP->sigma, GP->xi);
1592 }
1593 
1594 static double
1595 genpareto_cdf(const struct dist *dist, double x)
1596 {
1597  const struct genpareto *GP = dist_to_const_genpareto(dist);
1598  return cdf_genpareto(x, GP->mu, GP->sigma, GP->xi);
1599 }
1600 
1601 static double
1602 genpareto_sf(const struct dist *dist, double x)
1603 {
1604  const struct genpareto *GP = dist_to_const_genpareto(dist);
1605  return sf_genpareto(x, GP->mu, GP->sigma, GP->xi);
1606 }
1607 
1608 static double
1609 genpareto_icdf(const struct dist *dist, double p)
1610 {
1611  const struct genpareto *GP = dist_to_const_genpareto(dist);
1612  return icdf_genpareto(p, GP->mu, GP->sigma, GP->xi);
1613 }
1614 
1615 static double
1616 genpareto_isf(const struct dist *dist, double p)
1617 {
1618  const struct genpareto *GP = dist_to_const_genpareto(dist);
1619  return isf_genpareto(p, GP->mu, GP->sigma, GP->xi);
1620 }
1621 
1622 const struct dist_ops genpareto_ops = {
1623  .name = "generalized Pareto",
1624  .sample = genpareto_sample,
1625  .cdf = genpareto_cdf,
1626  .sf = genpareto_sf,
1627  .icdf = genpareto_icdf,
1628  .isf = genpareto_isf,
1629 };
1630 
1633 static double
1635 {
1636  const struct geometric *G = dist_to_const_geometric(dist);
1638  double p0 = random_uniform_01();
1639 
1640  return sample_geometric(s, p0, G->p);
1641 }
1642 
1643 static double
1644 geometric_cdf(const struct dist *dist, double x)
1645 {
1646  const struct geometric *G = dist_to_const_geometric(dist);
1647 
1648  if (x < 1)
1649  return 0;
1650  /* 1 - (1 - p)^floor(x) = 1 - e^{floor(x) log(1 - p)} */
1651  return -expm1(floor(x)*log1p(-G->p));
1652 }
1653 
1654 static double
1655 geometric_sf(const struct dist *dist, double x)
1656 {
1657  const struct geometric *G = dist_to_const_geometric(dist);
1658 
1659  if (x < 1)
1660  return 0;
1661  /* (1 - p)^floor(x) = e^{ceil(x) log(1 - p)} */
1662  return exp(floor(x)*log1p(-G->p));
1663 }
1664 
1665 static double
1666 geometric_icdf(const struct dist *dist, double p)
1667 {
1668  const struct geometric *G = dist_to_const_geometric(dist);
1669 
1670  return log1p(-p)/log1p(-G->p);
1671 }
1672 
1673 static double
1674 geometric_isf(const struct dist *dist, double p)
1675 {
1676  const struct geometric *G = dist_to_const_geometric(dist);
1677 
1678  return log(p)/log1p(-G->p);
1679 }
1680 
1681 const struct dist_ops geometric_ops = {
1682  .name = "geometric (1-based)",
1683  .sample = geometric_sample,
1684  .cdf = geometric_cdf,
1685  .sf = geometric_sf,
1686  .icdf = geometric_icdf,
1687  .isf = geometric_isf,
1688 };
STATIC double logithalf(double p0)
Definition: prob_distr.c:358
Common functions for using (pseudo-)random number generators.
STATIC double sf_logistic(double x, double mu, double sigma)
Definition: prob_distr.c:521
STATIC double cdf_genpareto(double x, double mu, double sigma, double xi)
Definition: prob_distr.c:790
STATIC double isf_genpareto(double p, double mu, double sigma, double xi)
Definition: prob_distr.c:897
#define CTASSERT(x)
Definition: ctassert.h:44
static double weibull_sample(const struct dist *dist)
Definition: prob_distr.c:1536
static double sample_exponential(uint32_t s, double p0)
Definition: prob_distr.c:1194
static double uniform_sample(const struct dist *dist)
Definition: prob_distr.c:1370
static double sample_logistic_locscale(uint32_t s, double t, double p0, double mu, double sigma)
Definition: prob_distr.c:1145
STATIC double isf_logistic(double p, double mu, double sigma)
Definition: prob_distr.c:543
static unsigned bitcount32(uint32_t x)
Definition: prob_distr.c:75
STATIC double icdf_genpareto(double p, double mu, double sigma, double xi)
Definition: prob_distr.c:851
STATIC double logit(double p)
Definition: prob_distr.c:278
STATIC double logistic(double x)
Definition: prob_distr.c:191
STATIC double sf_weibull(double x, double lambda, double k)
Definition: prob_distr.c:724
STATIC double sample_log_logistic(uint32_t s, double p0)
Definition: prob_distr.c:1158
STATIC double icdf_log_logistic(double p, double alpha, double beta)
Definition: prob_distr.c:673
STATIC double sf_log_logistic(double x, double alpha, double beta)
Definition: prob_distr.c:657
STATIC double cdf_weibull(double x, double lambda, double k)
Definition: prob_distr.c:709
STATIC double cdf_logistic(double x, double mu, double sigma)
Definition: prob_distr.c:509
STATIC double sf_genpareto(double x, double mu, double sigma, double xi)
Definition: prob_distr.c:834
STATIC double cdf_log_logistic(double x, double alpha, double beta)
Definition: prob_distr.c:596
static double sample_geometric(uint32_t s, double p0, double p)
Definition: prob_distr.c:1303
static double genpareto_sample(const struct dist *dist)
Definition: prob_distr.c:1585
double dist_sf(const struct dist *dist, double x)
Definition: prob_distr.c:1348
double dist_isf(const struct dist *dist, double p)
Definition: prob_distr.c:1362
STATIC double sample_uniform_interval(double p0, double a, double b)
Definition: prob_distr.c:939
static unsigned clz32(uint32_t x)
Definition: prob_distr.c:95
static double geometric_sample(const struct dist *dist)
Definition: prob_distr.c:1634
Header for prob_distr.c.
STATIC double icdf_logistic(double p, double mu, double sigma)
Definition: prob_distr.c:532
#define DECLARE_PROB_DISTR_DOWNCAST_FN(name)
Definition: prob_distr.c:57
STATIC double isf_weibull(double p, double lambda, double k)
Definition: prob_distr.c:750
Compile-time assertions: CTASSERT(expression).
double dist_icdf(const struct dist *dist, double p)
Definition: prob_distr.c:1355
crypto_fast_rng_t * get_thread_fast_rng(void)
STATIC double sample_genpareto(uint32_t s, double p0, double xi)
Definition: prob_distr.c:1229
static double sample_log_logistic_scaleshape(uint32_t s, double p0, double alpha, double beta)
Definition: prob_distr.c:1180
STATIC double random_uniform_01(void)
Definition: prob_distr.c:451
static double sample_genpareto_locscale(uint32_t s, double p0, double mu, double sigma, double xi)
Definition: prob_distr.c:1275
STATIC double sample_logistic(uint32_t s, double t, double p0)
Definition: prob_distr.c:1092
static double logistic_sample(const struct dist *dist)
Definition: prob_distr.c:1437
STATIC double isf_log_logistic(double p, double alpha, double beta)
Definition: prob_distr.c:684
static double log_logistic_sample(const struct dist *dist)
Definition: prob_distr.c:1487
double dist_cdf(const struct dist *dist, double x)
Definition: prob_distr.c:1341
STATIC double sample_weibull(uint32_t s, double p0, double lambda, double k)
Definition: prob_distr.c:1217
Macros to manage assertions, fatal and non-fatal.
STATIC double icdf_weibull(double p, double lambda, double k)
Definition: prob_distr.c:737
uint32_t crypto_fast_rng_get_u32(crypto_fast_rng_t *rng)
const char * dist_name(const struct dist *dist)
Definition: prob_distr.c:1327