LCOV - code coverage report
Current view: top level - test - test_prob_distr.c (source / functions) Hit Total Coverage
Test: lcov.info Lines: 498 519 96.0 %
Date: 2021-11-24 03:28:48 Functions: 28 28 100.0 %

          Line data    Source code
       1             : /* Copyright (c) 2018-2021, The Tor Project, Inc. */
       2             : /* See LICENSE for licensing information */
       3             : 
       4             : /**
       5             :  * \file test_prob_distr.c
       6             :  * \brief Test probability distributions.
       7             :  * \detail
       8             :  *
       9             :  * For each probability distribution we do two kinds of tests:
      10             :  *
      11             :  * a) We do numerical deterministic testing of their cdf/icdf/sf/isf functions
      12             :  *    and the various relationships between them for each distribution. We also
      13             :  *    do deterministic tests on their sampling functions. Test vectors for
      14             :  *    these tests were computed from alternative implementations and were
      15             :  *    eyeballed to make sure they make sense
      16             :  *    (e.g. src/test/prob_distr_mpfr_ref.c computes logit(p) using GNU mpfr
      17             :  *    with 200-bit precision and is then tested in test_logit_logistic()).
      18             :  *
      19             :  * b) We do stochastic hypothesis testing (G-test) to ensure that sampling from
      20             :  *    the given distributions is distributed properly. The stochastic tests are
      21             :  *    slow and their false positive rate is not well suited for CI, so they are
      22             :  *    currently disabled-by-default and put into 'tests-slow'.
      23             :  */
      24             : 
      25             : #define PROB_DISTR_PRIVATE
      26             : 
      27             : #include "orconfig.h"
      28             : 
      29             : #include "test/test.h"
      30             : 
      31             : #include "core/or/or.h"
      32             : 
      33             : #include "lib/math/prob_distr.h"
      34             : #include "lib/math/fp.h"
      35             : #include "lib/crypt_ops/crypto_rand.h"
      36             : #include "test/rng_test_helpers.h"
      37             : 
      38             : #include <float.h>
      39             : #include <math.h>
      40             : #include <stdbool.h>
      41             : #include <stddef.h>
      42             : #include <stdint.h>
      43             : #include <stdio.h>
      44             : #include <stdlib.h>
      45             : 
      46             : /**
      47             :  * Return floor(d) converted to size_t, as a workaround for complaints
      48             :  * under -Wbad-function-cast for (size_t)floor(d).
      49             :  */
      50             : static size_t
      51     2549088 : floor_to_size_t(double d)
      52             : {
      53     2549088 :   double integral_d = floor(d);
      54     2549088 :   return (size_t)integral_d;
      55             : }
      56             : 
      57             : /**
      58             :  * Return ceil(d) converted to size_t, as a workaround for complaints
      59             :  * under -Wbad-function-cast for (size_t)ceil(d).
      60             :  */
      61             : static size_t
      62          26 : ceil_to_size_t(double d)
      63             : {
      64          26 :   double integral_d = ceil(d);
      65          26 :   return (size_t)integral_d;
      66             : }
      67             : 
      68             : /*
      69             :  * Geometric(p) distribution, supported on {1, 2, 3, ...}.
      70             :  *
      71             :  * Compute the probability mass function Geom(n; p) of the number of
      72             :  * trials before the first success when success has probability p.
      73             :  */
      74             : static double
      75         396 : logpmf_geometric(unsigned n, double p)
      76             : {
      77             :   /* This is actually a check against 1, but we do >= so that the compiler
      78             :      does not raise a -Wfloat-equal */
      79         396 :   if (p >= 1) {
      80          99 :     if (n == 1)
      81             :       return 0;
      82             :     else
      83          98 :       return -HUGE_VAL;
      84             :   }
      85         297 :   return (n - 1)*log1p(-p) + log(p);
      86             : }
      87             : 
      88             : /**
      89             :  * Compute the logistic function, translated in output by 1/2:
      90             :  * logistichalf(x) = logistic(x) - 1/2.  Well-conditioned on the entire
      91             :  * real plane, with maximum condition number 1 at 0.
      92             :  *
      93             :  * This implementation gives relative error bounded by 5 eps.
      94             :  */
      95             : static double
      96         116 : logistichalf(double x)
      97             : {
      98             :   /*
      99             :    * Rewrite this with the identity
     100             :    *
     101             :    *  1/(1 + e^{-x}) - 1/2
     102             :    *  = (1 - 1/2 - e^{-x}/2)/(1 + e^{-x})
     103             :    *  = (1/2 - e^{-x}/2)/(1 + e^{-x})
     104             :    *  = (1 - e^{-x})/[2 (1 + e^{-x})]
     105             :    *  = -(e^{-x} - 1)/[2 (1 + e^{-x})],
     106             :    *
     107             :    * which we can evaluate by -expm1(-x)/[2 (1 + exp(-x))].
     108             :    *
     109             :    * Suppose exp has error d0, + has error d1, expm1 has error
     110             :    * d2, and / has error d3, so we evaluate
     111             :    *
     112             :    *  -(1 + d2) (1 + d3) (e^{-x} - 1)
     113             :    *    / [2 (1 + d1) (1 + (1 + d0) e^{-x})].
     114             :    *
     115             :    * In the denominator,
     116             :    *
     117             :    *  1 + (1 + d0) e^{-x}
     118             :    *  = 1 + e^{-x} + d0 e^{-x}
     119             :    *  = (1 + e^{-x}) (1 + d0 e^{-x}/(1 + e^{-x})),
     120             :    *
     121             :    * so the relative error of the numerator is
     122             :    *
     123             :    *  d' = d2 + d3 + d2 d3,
     124             :    * and of the denominator,
     125             :    *  d'' = d1 + d0 e^{-x}/(1 + e^{-x}) + d0 d1 e^{-x}/(1 + e^{-x})
     126             :    *      = d1 + d0 L(-x) + d0 d1 L(-x),
     127             :    *
     128             :    * where L(-x) is logistic(-x).  By Lemma 1 the relative error
     129             :    * of the quotient is bounded by
     130             :    *
     131             :    *  2|d2 + d3 + d2 d3 - d1 - d0 L(x) + d0 d1 L(x)|,
     132             :    *
     133             :    * Since 0 < L(x) < 1, this is bounded by
     134             :    *
     135             :    *  2|d2| + 2|d3| + 2|d2 d3| + 2|d1| + 2|d0| + 2|d0 d1|
     136             :    *  <= 4 eps + 2 eps^2.
     137             :    */
     138         116 :   if (x < log(DBL_EPSILON/8)) {
     139             :     /*
     140             :      * Avoid overflow in e^{-x}.  When x < log(eps/4), we
     141             :      * we further have x < logit(eps/4), so that
     142             :      * logistic(x) < eps/4.  Hence the relative error of
     143             :      * logistic(x) - 1/2 from -1/2 is bounded by eps/2, and
     144             :      * so the relative error of -1/2 from logistic(x) - 1/2
     145             :      * is bounded by eps.
     146             :      */
     147             :     return -0.5;
     148             :   } else {
     149         100 :     return -expm1(-x)/(2*(1 + exp(-x)));
     150             :   }
     151             : }
     152             : 
     153             : /**
     154             :  * Compute the log of the sum of the exps.  Caller should arrange the
     155             :  * array in descending order to minimize error because I don't want to
     156             :  * deal with using temporary space and the one caller in this file
     157             :  * arranges that anyway.
     158             :  *
     159             :  * Warning: This implementation does not handle infinite or NaN inputs
     160             :  * sensibly, because I don't need that here at the moment.  (NaN, or
     161             :  * -inf and +inf together, should yield NaN; +inf and finite should
     162             :  * yield +inf; otherwise all -inf should be ignored because exp(-inf) =
     163             :  * 0.)
     164             :  */
     165             : static double
     166           4 : logsumexp(double *A, size_t n)
     167             : {
     168           4 :   double maximum, sum;
     169           4 :   size_t i;
     170             : 
     171           4 :   if (n == 0)
     172           0 :     return log(0);
     173             : 
     174           4 :   maximum = A[0];
     175         396 :   for (i = 1; i < n; i++) {
     176         392 :     if (A[i] > maximum)
     177           0 :       maximum = A[i];
     178             :   }
     179             : 
     180             :   sum = 0;
     181         400 :   for (i = n; i --> 0;)
     182         396 :     sum += exp(A[i] - maximum);
     183             : 
     184           4 :   return log(sum) + maximum;
     185             : }
     186             : 
     187             : /**
     188             :  * Compute log(1 - e^x).  Defined only for negative x so that e^x < 1.
     189             :  * This is the complement of a probability in log space.
     190             :  */
     191             : static double
     192           4 : log1mexp(double x)
     193             : {
     194             : 
     195             :   /*
     196             :    * We want to compute log on [0, 1/2) but log1p on [1/2, +inf),
     197             :    * so partition x at -log(2) = log(1/2).
     198             :    */
     199           4 :   if (-log(2) < x)
     200           4 :     return log(-expm1(x));
     201             :   else
     202           0 :     return log1p(-exp(x));
     203             : }
     204             : 
     205             : /*
     206             :  * Tests of numerical errors in computing logit, logistic, and the
     207             :  * various cdfs, sfs, icdfs, and isfs.
     208             :  */
     209             : 
     210             : #define arraycount(A) (sizeof(A)/sizeof(A[0]))
     211             : 
     212             : /** Return relative error between <b>actual</b> and <b>expected</b>.
     213             :  *  Special cases: If <b>expected</b> is zero or infinite, return 1 if
     214             :  *  <b>actual</b> is equal to <b>expected</b> and 0 if not, since the
     215             :  *  usual notion of relative error is undefined but we only use this
     216             :  *  for testing relerr(e, a) <= bound.  If either is NaN, return NaN,
     217             :  *  which has the property that NaN <= bound is false no matter what
     218             :  *  bound is.
     219             :  *
     220             :  *  Beware: if you test !(relerr(e, a) > bound), then then the result
     221             :  *  is true when a is NaN because NaN > bound is false too.  See
     222             :  *  CHECK_RELERR for correct use to decide when to report failure.
     223             :  */
     224             : static double
     225       11062 : relerr(double expected, double actual)
     226             : {
     227             :   /*
     228             :    * To silence -Wfloat-equal, we have to test for equality using
     229             :    * inequalities: we have (fabs(expected) <= 0) iff (expected == 0),
     230             :    * and (actual <= expected && actual >= expected) iff actual ==
     231             :    * expected whether expected is zero or infinite.
     232             :    */
     233       11062 :   if (fabs(expected) <= 0 || tor_isinf(expected)) {
     234         358 :     if (actual <= expected && actual >= expected)
     235             :       return 0;
     236             :     else
     237           0 :       return 1;
     238             :   } else {
     239       10704 :     return fabs((expected - actual)/expected);
     240             :   }
     241             : }
     242             : 
     243             : /** Check that relative error of <b>expected</b> and <b>actual</b> is within
     244             :  *  <b>relerr_bound</b>.  Caller must arrange to have i and relerr_bound in
     245             :  *  scope.  */
     246             : #define CHECK_RELERR(expected, actual) do {                                   \
     247             :   double check_expected = (expected);                                         \
     248             :   double check_actual = (actual);                                             \
     249             :   const char *str_expected = #expected;                                       \
     250             :   const char *str_actual = #actual;                                           \
     251             :   double check_relerr = relerr(expected, actual);                             \
     252             :   if (!(relerr(check_expected, check_actual) <= relerr_bound)) {              \
     253             :     log_warn(LD_GENERAL, "%s:%d: case %u: relerr(%s=%.17e, %s=%.17e)"        \
     254             :              " = %.17e > %.17e\n",                                            \
     255             :              __func__, __LINE__, (unsigned) i,                                \
     256             :              str_expected, check_expected,                                    \
     257             :              str_actual, check_actual,                                        \
     258             :              check_relerr, relerr_bound);                                     \
     259             :     ok = false;                                                               \
     260             :   }                                                                           \
     261             : } while (0)
     262             : 
     263             : /* Check that a <= b.
     264             :  * Caller must arrange to have i in scope.  */
     265             : #define CHECK_LE(a, b) do {                                                   \
     266             :   double check_a = (a);                                                       \
     267             :   double check_b = (b);                                                       \
     268             :   const char *str_a = #a;                                                     \
     269             :   const char *str_b = #b;                                                     \
     270             :   if (!(check_a <= check_b)) {                                                \
     271             :     log_warn(LD_GENERAL, "%s:%d: case %u: %s=%.17e > %s=%.17e\n",             \
     272             :              __func__, __LINE__, (unsigned) i,                                \
     273             :              str_a, check_a, str_b, check_b);                                 \
     274             :     ok = false;                                                               \
     275             :   }                                                                           \
     276             : } while (0)
     277             : 
     278             : /**
     279             :  * Test the logit and logistic functions.  Confirm that they agree with
     280             :  * the cdf, sf, icdf, and isf of the standard Logistic distribution.
     281             :  * Confirm that the sampler for the standard logistic distribution maps
     282             :  * [0, 1] into the right subinterval for the inverse transform, for
     283             :  * this implementation.
     284             :  */
     285             : static void
     286           1 : test_logit_logistic(void *arg)
     287             : {
     288           1 :   (void) arg;
     289             : 
     290           1 :   static const struct {
     291             :     double x;                   /* x = logit(p) */
     292             :     double p;                   /* p = logistic(x) */
     293             :     double phalf;               /* p - 1/2 = logistic(x) - 1/2 */
     294             :   } cases[] = {
     295             :     { -HUGE_VAL, 0, -0.5 },
     296             :     { -1000, 0, -0.5 },
     297             :     { -710, 4.47628622567513e-309, -0.5 },
     298             :     { -708, 3.307553003638408e-308, -0.5 },
     299             :     { -2, .11920292202211755, -.3807970779778824 },
     300             :     { -1.0000001, .2689414017088022, -.23105859829119776 },
     301             :     { -1, .2689414213699951, -.23105857863000487 },
     302             :     { -0.9999999, .26894144103118883, -.2310585589688111 },
     303             :     /* see src/test/prob_distr_mpfr_ref.c for computation */
     304             :     { -4.000000000537333e-5, .49999, -1.0000000000010001e-5 },
     305             :     { -4.000000000533334e-5, .49999, -.00001 },
     306             :     { -4.000000108916878e-9, .499999999, -1.0000000272292198e-9 },
     307             :     { -4e-9, .499999999, -1e-9 },
     308             :     { -4e-16, .5, -1e-16 },
     309             :     { -4e-300, .5, -1e-300 },
     310             :     { 0, .5, 0 },
     311             :     { 4e-300, .5, 1e-300 },
     312             :     { 4e-16, .5, 1e-16 },
     313             :     { 3.999999886872274e-9, .500000001, 9.999999717180685e-10 },
     314             :     { 4e-9, .500000001, 1e-9 },
     315             :     { 4.0000000005333336e-5, .50001, .00001 },
     316             :     { 8.000042667076272e-3, .502, .002 },
     317             :     { 0.9999999, .7310585589688111, .2310585589688111 },
     318             :     { 1, .7310585786300049, .23105857863000487 },
     319             :     { 1.0000001, .7310585982911977, .23105859829119774 },
     320             :     { 2, .8807970779778823, .3807970779778824 },
     321             :     { 708, 1, .5 },
     322             :     { 710, 1, .5 },
     323             :     { 1000, 1, .5 },
     324             :     { HUGE_VAL, 1, .5 },
     325             :   };
     326           1 :   double relerr_bound = 3e-15; /* >10eps */
     327           1 :   size_t i;
     328           1 :   bool ok = true;
     329             : 
     330          30 :   for (i = 0; i < arraycount(cases); i++) {
     331          29 :     double x = cases[i].x;
     332          29 :     double p = cases[i].p;
     333          29 :     double phalf = cases[i].phalf;
     334             : 
     335             :     /*
     336             :      * cdf is logistic, icdf is logit, and symmetry for
     337             :      * sf/isf.
     338             :      */
     339          29 :     CHECK_RELERR(logistic(x), cdf_logistic(x, 0, 1));
     340          29 :     CHECK_RELERR(logistic(-x), sf_logistic(x, 0, 1));
     341          29 :     CHECK_RELERR(logit(p), icdf_logistic(p, 0, 1));
     342          29 :     CHECK_RELERR(-logit(p), isf_logistic(p, 0, 1));
     343             : 
     344          29 :     CHECK_RELERR(cdf_logistic(x, 0, 1), cdf_logistic(x*2, 0, 2));
     345          29 :     CHECK_RELERR(sf_logistic(x, 0, 1), sf_logistic(x*2, 0, 2));
     346          29 :     CHECK_RELERR(icdf_logistic(p, 0, 1), icdf_logistic(p, 0, 2)/2);
     347          29 :     CHECK_RELERR(isf_logistic(p, 0, 1), isf_logistic(p, 0, 2)/2);
     348             : 
     349          29 :     CHECK_RELERR(cdf_logistic(x, 0, 1), cdf_logistic(x/2, 0, .5));
     350          29 :     CHECK_RELERR(sf_logistic(x, 0, 1), sf_logistic(x/2, 0, .5));
     351          29 :     CHECK_RELERR(icdf_logistic(p, 0, 1), icdf_logistic(p, 0,.5)*2);
     352          29 :     CHECK_RELERR(isf_logistic(p, 0, 1), isf_logistic(p, 0, .5)*2);
     353             : 
     354          29 :     CHECK_RELERR(cdf_logistic(x, 0, 1), cdf_logistic(x*2 + 1, 1, 2));
     355          29 :     CHECK_RELERR(sf_logistic(x, 0, 1), sf_logistic(x*2 + 1, 1, 2));
     356             : 
     357             :     /*
     358             :      * For p near 0 and p near 1/2, the arithmetic of
     359             :      * translating by 1 loses precision.
     360             :      */
     361          29 :     if (fabs(p) > DBL_EPSILON && fabs(p) < 0.4) {
     362           4 :       CHECK_RELERR(icdf_logistic(p, 0, 1),
     363             :           (icdf_logistic(p, 1, 2) - 1)/2);
     364           4 :       CHECK_RELERR(isf_logistic(p, 0, 1),
     365             :           (isf_logistic(p, 1, 2) - 1)/2);
     366             :     }
     367             : 
     368          29 :     CHECK_RELERR(p, logistic(x));
     369          29 :     CHECK_RELERR(phalf, logistichalf(x));
     370             : 
     371             :     /*
     372             :      * On the interior floating-point numbers, either logit or
     373             :      * logithalf had better give the correct answer.
     374             :      *
     375             :      * For probabilities near 0, we can get much finer resolution with
     376             :      * logit, and for probabilities near 1/2, we can get much finer
     377             :      * resolution with logithalf by representing them using p - 1/2.
     378             :      *
     379             :      * E.g., we can write -.00001 for phalf, and .49999 for p, but the
     380             :      * difference 1/2 - .00001 gives 1.0000000000010001e-5 in binary64
     381             :      * arithmetic.  So test logit(.49999) which should give the same
     382             :      * answer as logithalf(-1.0000000000010001e-5), namely
     383             :      * -4.000000000537333e-5, and also test logithalf(-.00001) which
     384             :      * gives -4.000000000533334e-5 instead -- but don't expect
     385             :      * logit(.49999) to give -4.000000000533334e-5 even though it looks
     386             :      * like 1/2 - .00001.
     387             :      *
     388             :      * A naive implementation of logit will just use log(p/(1 - p)) and
     389             :      * give the answer -4.000000000551673e-05 for .49999, which is
     390             :      * wrong in a lot of digits, which happens because log is
     391             :      * ill-conditioned near 1 and thus amplifies whatever relative
     392             :      * error we made in computing p/(1 - p).
     393             :      */
     394          29 :     if ((0 < p && p < 1) || tor_isinf(x)) {
     395          25 :       if (phalf >= p - 0.5 && phalf <= p - 0.5)
     396          10 :         CHECK_RELERR(x, logit(p));
     397          25 :       if (p >= 0.5 + phalf && p <= 0.5 + phalf)
     398          18 :         CHECK_RELERR(x, logithalf(phalf));
     399             :     }
     400             : 
     401          29 :     CHECK_RELERR(-phalf, logistichalf(-x));
     402          29 :     if (fabs(phalf) < 0.5 || tor_isinf(x))
     403          23 :       CHECK_RELERR(-x, logithalf(-phalf));
     404          29 :     if (p < 1 || tor_isinf(x)) {
     405          26 :       CHECK_RELERR(1 - p, logistic(-x));
     406          26 :       if (p > .75 || tor_isinf(x))
     407           3 :         CHECK_RELERR(-x, logit(1 - p));
     408             :     } else {
     409          29 :       CHECK_LE(logistic(-x), 1e-300);
     410             :     }
     411             :   }
     412             : 
     413         102 :   for (i = 0; i <= 100; i++) {
     414         101 :     double p0 = (double)i/100;
     415             : 
     416         101 :     CHECK_RELERR(logit(p0/(1 + M_E)), sample_logistic(0, 0, p0));
     417         101 :     CHECK_RELERR(-logit(p0/(1 + M_E)), sample_logistic(1, 0, p0));
     418         101 :     CHECK_RELERR(logithalf(p0*(0.5 - 1/(1 + M_E))),
     419             :         sample_logistic(0, 1, p0));
     420         101 :     CHECK_RELERR(-logithalf(p0*(0.5 - 1/(1 + M_E))),
     421             :         sample_logistic(1, 1, p0));
     422             :   }
     423             : 
     424           1 :   if (!ok)
     425           0 :     printf("fail logit/logistic / logistic cdf/sf\n");
     426             : 
     427           1 :   tt_assert(ok);
     428             : 
     429           1 :  done:
     430           1 :   ;
     431           1 : }
     432             : 
     433             : /**
     434             :  * Test the cdf, sf, icdf, and isf of the LogLogistic distribution.
     435             :  */
     436             : static void
     437           1 : test_log_logistic(void *arg)
     438             : {
     439           1 :   (void) arg;
     440             : 
     441           1 :   static const struct {
     442             :     /* x is a point in the support of the LogLogistic distribution */
     443             :     double x;
     444             :     /* 'p' is the probability that a random variable X for a given LogLogistic
     445             :      * probability distribution will take value less-or-equal to x */
     446             :     double p;
     447             :     /* 'np' is the probability that a random variable X for a given LogLogistic
     448             :      * probability distribution will take value greater-or-equal to x. */
     449             :     double np;
     450             :   } cases[] = {
     451             :     { 0, 0, 1 },
     452             :     { 1e-300, 1e-300, 1 },
     453             :     { 1e-17, 1e-17, 1 },
     454             :     { 1e-15, 1e-15, .999999999999999 },
     455             :     { .1, .09090909090909091, .90909090909090909 },
     456             :     { .25, .2, .8 },
     457             :     { .5, .33333333333333333, .66666666666666667 },
     458             :     { .75, .42857142857142855, .5714285714285714 },
     459             :     { .9999, .49997499874993756, .5000250012500626 },
     460             :     { .99999999, .49999999749999996, .5000000025 },
     461             :     { .999999999999999, .49999999999999994, .5000000000000002 },
     462             :     { 1, .5, .5 },
     463             :   };
     464           1 :   double relerr_bound = 3e-15;
     465           1 :   size_t i;
     466           1 :   bool ok = true;
     467             : 
     468          13 :   for (i = 0; i < arraycount(cases); i++) {
     469          12 :     double x = cases[i].x;
     470          12 :     double p = cases[i].p;
     471          12 :     double np = cases[i].np;
     472             : 
     473          12 :     CHECK_RELERR(p, cdf_log_logistic(x, 1, 1));
     474          12 :     CHECK_RELERR(p, cdf_log_logistic(x/2, .5, 1));
     475          12 :     CHECK_RELERR(p, cdf_log_logistic(x*2, 2, 1));
     476          12 :     CHECK_RELERR(p, cdf_log_logistic(sqrt(x), 1, 2));
     477          12 :     CHECK_RELERR(p, cdf_log_logistic(sqrt(x)/2, .5, 2));
     478          12 :     CHECK_RELERR(p, cdf_log_logistic(sqrt(x)*2, 2, 2));
     479          12 :     if (2*sqrt(DBL_MIN) < x) {
     480          10 :       CHECK_RELERR(p, cdf_log_logistic(x*x, 1, .5));
     481          10 :       CHECK_RELERR(p, cdf_log_logistic(x*x/2, .5, .5));
     482          10 :       CHECK_RELERR(p, cdf_log_logistic(x*x*2, 2, .5));
     483             :     }
     484             : 
     485          12 :     CHECK_RELERR(np, sf_log_logistic(x, 1, 1));
     486          12 :     CHECK_RELERR(np, sf_log_logistic(x/2, .5, 1));
     487          12 :     CHECK_RELERR(np, sf_log_logistic(x*2, 2, 1));
     488          12 :     CHECK_RELERR(np, sf_log_logistic(sqrt(x), 1, 2));
     489          12 :     CHECK_RELERR(np, sf_log_logistic(sqrt(x)/2, .5, 2));
     490          12 :     CHECK_RELERR(np, sf_log_logistic(sqrt(x)*2, 2, 2));
     491          12 :     if (2*sqrt(DBL_MIN) < x) {
     492          10 :       CHECK_RELERR(np, sf_log_logistic(x*x, 1, .5));
     493          10 :       CHECK_RELERR(np, sf_log_logistic(x*x/2, .5, .5));
     494          10 :       CHECK_RELERR(np, sf_log_logistic(x*x*2, 2, .5));
     495             :     }
     496             : 
     497          12 :     CHECK_RELERR(np, cdf_log_logistic(1/x, 1, 1));
     498          12 :     CHECK_RELERR(np, cdf_log_logistic(1/(2*x), .5, 1));
     499          12 :     CHECK_RELERR(np, cdf_log_logistic(2/x, 2, 1));
     500          12 :     CHECK_RELERR(np, cdf_log_logistic(1/sqrt(x), 1, 2));
     501          12 :     CHECK_RELERR(np, cdf_log_logistic(1/(2*sqrt(x)), .5, 2));
     502          12 :     CHECK_RELERR(np, cdf_log_logistic(2/sqrt(x), 2, 2));
     503          12 :     if (2*sqrt(DBL_MIN) < x && x < 1/(2*sqrt(DBL_MIN))) {
     504          10 :       CHECK_RELERR(np, cdf_log_logistic(1/(x*x), 1, .5));
     505          10 :       CHECK_RELERR(np, cdf_log_logistic(1/(2*x*x), .5, .5));
     506          10 :       CHECK_RELERR(np, cdf_log_logistic(2/(x*x), 2, .5));
     507             :     }
     508             : 
     509          12 :     CHECK_RELERR(p, sf_log_logistic(1/x, 1, 1));
     510          12 :     CHECK_RELERR(p, sf_log_logistic(1/(2*x), .5, 1));
     511          12 :     CHECK_RELERR(p, sf_log_logistic(2/x, 2, 1));
     512          12 :     CHECK_RELERR(p, sf_log_logistic(1/sqrt(x), 1, 2));
     513          12 :     CHECK_RELERR(p, sf_log_logistic(1/(2*sqrt(x)), .5, 2));
     514          12 :     CHECK_RELERR(p, sf_log_logistic(2/sqrt(x), 2, 2));
     515          12 :     if (2*sqrt(DBL_MIN) < x && x < 1/(2*sqrt(DBL_MIN))) {
     516          10 :       CHECK_RELERR(p, sf_log_logistic(1/(x*x), 1, .5));
     517          10 :       CHECK_RELERR(p, sf_log_logistic(1/(2*x*x), .5, .5));
     518          10 :       CHECK_RELERR(p, sf_log_logistic(2/(x*x), 2, .5));
     519             :     }
     520             : 
     521          12 :     CHECK_RELERR(x, icdf_log_logistic(p, 1, 1));
     522          12 :     CHECK_RELERR(x/2, icdf_log_logistic(p, .5, 1));
     523          12 :     CHECK_RELERR(x*2, icdf_log_logistic(p, 2, 1));
     524          12 :     CHECK_RELERR(x, icdf_log_logistic(p, 1, 1));
     525          12 :     CHECK_RELERR(sqrt(x)/2, icdf_log_logistic(p, .5, 2));
     526          12 :     CHECK_RELERR(sqrt(x)*2, icdf_log_logistic(p, 2, 2));
     527          12 :     CHECK_RELERR(sqrt(x), icdf_log_logistic(p, 1, 2));
     528          12 :     CHECK_RELERR(x*x/2, icdf_log_logistic(p, .5, .5));
     529          12 :     CHECK_RELERR(x*x*2, icdf_log_logistic(p, 2, .5));
     530             : 
     531          12 :     if (np < .9) {
     532           7 :       CHECK_RELERR(x, isf_log_logistic(np, 1, 1));
     533           7 :       CHECK_RELERR(x/2, isf_log_logistic(np, .5, 1));
     534           7 :       CHECK_RELERR(x*2, isf_log_logistic(np, 2, 1));
     535           7 :       CHECK_RELERR(sqrt(x), isf_log_logistic(np, 1, 2));
     536           7 :       CHECK_RELERR(sqrt(x)/2, isf_log_logistic(np, .5, 2));
     537           7 :       CHECK_RELERR(sqrt(x)*2, isf_log_logistic(np, 2, 2));
     538           7 :       CHECK_RELERR(x*x, isf_log_logistic(np, 1, .5));
     539           7 :       CHECK_RELERR(x*x/2, isf_log_logistic(np, .5, .5));
     540           7 :       CHECK_RELERR(x*x*2, isf_log_logistic(np, 2, .5));
     541             : 
     542           7 :       CHECK_RELERR(1/x, icdf_log_logistic(np, 1, 1));
     543           7 :       CHECK_RELERR(1/(2*x), icdf_log_logistic(np, .5, 1));
     544           7 :       CHECK_RELERR(2/x, icdf_log_logistic(np, 2, 1));
     545           7 :       CHECK_RELERR(1/sqrt(x), icdf_log_logistic(np, 1, 2));
     546           7 :       CHECK_RELERR(1/(2*sqrt(x)),
     547             :           icdf_log_logistic(np, .5, 2));
     548           7 :       CHECK_RELERR(2/sqrt(x), icdf_log_logistic(np, 2, 2));
     549           7 :       CHECK_RELERR(1/(x*x), icdf_log_logistic(np, 1, .5));
     550           7 :       CHECK_RELERR(1/(2*x*x), icdf_log_logistic(np, .5, .5));
     551           7 :       CHECK_RELERR(2/(x*x), icdf_log_logistic(np, 2, .5));
     552             :     }
     553             : 
     554          12 :     CHECK_RELERR(1/x, isf_log_logistic(p, 1, 1));
     555          12 :     CHECK_RELERR(1/(2*x), isf_log_logistic(p, .5, 1));
     556          12 :     CHECK_RELERR(2/x, isf_log_logistic(p, 2, 1));
     557          12 :     CHECK_RELERR(1/sqrt(x), isf_log_logistic(p, 1, 2));
     558          12 :     CHECK_RELERR(1/(2*sqrt(x)), isf_log_logistic(p, .5, 2));
     559          12 :     CHECK_RELERR(2/sqrt(x), isf_log_logistic(p, 2, 2));
     560          12 :     CHECK_RELERR(1/(x*x), isf_log_logistic(p, 1, .5));
     561          12 :     CHECK_RELERR(1/(2*x*x), isf_log_logistic(p, .5, .5));
     562          12 :     CHECK_RELERR(2/(x*x), isf_log_logistic(p, 2, .5));
     563             :   }
     564             : 
     565         102 :   for (i = 0; i <= 100; i++) {
     566         101 :     double p0 = (double)i/100;
     567             : 
     568         101 :     CHECK_RELERR(0.5*p0/(1 - 0.5*p0), sample_log_logistic(0, p0));
     569         101 :     CHECK_RELERR((1 - 0.5*p0)/(0.5*p0),
     570             :         sample_log_logistic(1, p0));
     571             :   }
     572             : 
     573           1 :   if (!ok)
     574           0 :     printf("fail log logistic cdf/sf\n");
     575             : 
     576           1 :   tt_assert(ok);
     577             : 
     578           1 :  done:
     579           1 :   ;
     580           1 : }
     581             : 
     582             : /**
     583             :  * Test the cdf, sf, icdf, isf of the Weibull distribution.
     584             :  */
     585             : static void
     586           1 : test_weibull(void *arg)
     587             : {
     588           1 :   (void) arg;
     589             : 
     590           1 :   static const struct {
     591             :     /* x is a point in the support of the Weibull distribution */
     592             :     double x;
     593             :     /* 'p' is the probability that a random variable X for a given Weibull
     594             :      * probability distribution will take value less-or-equal to x */
     595             :     double p;
     596             :     /* 'np' is the probability that a random variable X for a given Weibull
     597             :      * probability distribution will take value greater-or-equal to x. */
     598             :     double np;
     599             :   } cases[] = {
     600             :     { 0, 0, 1 },
     601             :     { 1e-300, 1e-300, 1 },
     602             :     { 1e-17, 1e-17, 1 },
     603             :     { .1, .09516258196404043, .9048374180359595 },
     604             :     { .5, .3934693402873666, .6065306597126334 },
     605             :     { .6931471805599453, .5, .5 },
     606             :     { 1, .6321205588285577, .36787944117144233 },
     607             :     { 10, .9999546000702375, 4.5399929762484854e-5 },
     608             :     { 36, .9999999999999998, 2.319522830243569e-16 },
     609             :     { 37, .9999999999999999, 8.533047625744066e-17 },
     610             :     { 38, 1, 3.1391327920480296e-17 },
     611             :     { 100, 1, 3.720075976020836e-44 },
     612             :     { 708, 1, 3.307553003638408e-308 },
     613             :     { 710, 1, 4.47628622567513e-309 },
     614             :     { 1000, 1, 0 },
     615             :     { HUGE_VAL, 1, 0 },
     616             :   };
     617           1 :   double relerr_bound = 3e-15;
     618           1 :   size_t i;
     619           1 :   bool ok = true;
     620             : 
     621          17 :   for (i = 0; i < arraycount(cases); i++) {
     622          16 :     double x = cases[i].x;
     623          16 :     double p = cases[i].p;
     624          16 :     double np = cases[i].np;
     625             : 
     626          16 :     CHECK_RELERR(p, cdf_weibull(x, 1, 1));
     627          16 :     CHECK_RELERR(p, cdf_weibull(x/2, .5, 1));
     628          16 :     CHECK_RELERR(p, cdf_weibull(x*2, 2, 1));
     629             :     /* For 0 < x < sqrt(DBL_MIN), x^2 loses lots of bits.  */
     630          16 :     if (x <= 0 ||
     631             :         sqrt(DBL_MIN) <= x) {
     632          15 :       CHECK_RELERR(p, cdf_weibull(x*x, 1, .5));
     633          15 :       CHECK_RELERR(p, cdf_weibull(x*x/2, .5, .5));
     634          15 :       CHECK_RELERR(p, cdf_weibull(x*x*2, 2, .5));
     635             :     }
     636          16 :     CHECK_RELERR(p, cdf_weibull(sqrt(x), 1, 2));
     637          16 :     CHECK_RELERR(p, cdf_weibull(sqrt(x)/2, .5, 2));
     638          16 :     CHECK_RELERR(p, cdf_weibull(sqrt(x)*2, 2, 2));
     639          16 :     CHECK_RELERR(np, sf_weibull(x, 1, 1));
     640          16 :     CHECK_RELERR(np, sf_weibull(x/2, .5, 1));
     641          16 :     CHECK_RELERR(np, sf_weibull(x*2, 2, 1));
     642          16 :     CHECK_RELERR(np, sf_weibull(x*x, 1, .5));
     643          16 :     CHECK_RELERR(np, sf_weibull(x*x/2, .5, .5));
     644          16 :     CHECK_RELERR(np, sf_weibull(x*x*2, 2, .5));
     645          16 :     if (x >= 10) {
     646             :       /*
     647             :        * exp amplifies the error of sqrt(x)^2
     648             :        * proportionally to exp(x); for large inputs
     649             :        * this is significant.
     650             :        */
     651           9 :       double t = -expm1(-x*(2*DBL_EPSILON + DBL_EPSILON));
     652           9 :       relerr_bound = t + DBL_EPSILON + t*DBL_EPSILON;
     653           9 :       if (relerr_bound < 3e-15)
     654             :         /*
     655             :          * The tests are written only to 16
     656             :          * decimal places anyway even if your
     657             :          * `double' is, say, i387 binary80, for
     658             :          * whatever reason.
     659             :          */
     660           0 :         relerr_bound = 3e-15;
     661           9 :       CHECK_RELERR(np, sf_weibull(sqrt(x), 1, 2));
     662           9 :       CHECK_RELERR(np, sf_weibull(sqrt(x)/2, .5, 2));
     663           9 :       CHECK_RELERR(np, sf_weibull(sqrt(x)*2, 2, 2));
     664             :     }
     665             : 
     666          16 :     if (p <= 0.75) {
     667             :       /*
     668             :        * For p near 1, not enough precision near 1 to
     669             :        * recover x.
     670             :        */
     671           7 :       CHECK_RELERR(x, icdf_weibull(p, 1, 1));
     672           7 :       CHECK_RELERR(x/2, icdf_weibull(p, .5, 1));
     673           7 :       CHECK_RELERR(x*2, icdf_weibull(p, 2, 1));
     674             :     }
     675          16 :     if (p >= 0.25 && !tor_isinf(x) && np > 0) {
     676             :       /*
     677             :        * For p near 0, not enough precision in np
     678             :        * near 1 to recover x.  For 0, isf gives inf,
     679             :        * even if p is precise enough for the icdf to
     680             :        * work.
     681             :        */
     682          10 :       CHECK_RELERR(x, isf_weibull(np, 1, 1));
     683          10 :       CHECK_RELERR(x/2, isf_weibull(np, .5, 1));
     684          16 :       CHECK_RELERR(x*2, isf_weibull(np, 2, 1));
     685             :     }
     686             :   }
     687             : 
     688         102 :   for (i = 0; i <= 100; i++) {
     689         101 :     double p0 = (double)i/100;
     690             : 
     691         101 :     CHECK_RELERR(3*sqrt(-log(p0/2)), sample_weibull(0, p0, 3, 2));
     692         101 :     CHECK_RELERR(3*sqrt(-log1p(-p0/2)),
     693             :         sample_weibull(1, p0, 3, 2));
     694             :   }
     695             : 
     696           1 :   if (!ok)
     697           0 :     printf("fail Weibull cdf/sf\n");
     698             : 
     699           1 :   tt_assert(ok);
     700             : 
     701           1 :  done:
     702           1 :   ;
     703           1 : }
     704             : 
     705             : /**
     706             :  * Test the cdf, sf, icdf, and isf of the generalized Pareto
     707             :  * distribution.
     708             :  */
     709             : static void
     710           1 : test_genpareto(void *arg)
     711             : {
     712           1 :   (void) arg;
     713             : 
     714           1 :   struct {
     715             :     /* xi is the 'xi' parameter of the generalized Pareto distribution, and the
     716             :      * rest are the same as in the above tests */
     717             :     double xi, x, p, np;
     718           1 :   } cases[] = {
     719             :     { 0, 0, 0, 1 },
     720             :     { 1e-300, .004, 3.992010656008528e-3, .9960079893439915 },
     721             :     { 1e-300, .1, .09516258196404043, .9048374180359595 },
     722             :     { 1e-300, 1, .6321205588285577, .36787944117144233 },
     723             :     { 1e-300, 10, .9999546000702375, 4.5399929762484854e-5 },
     724             :     { 1e-200, 1e-16, 9.999999999999999e-17, .9999999999999999 },
     725             :     { 1e-16, 1e-200, 9.999999999999998e-201, 1 },
     726             :     { 1e-16, 1e-16, 1e-16, 1 },
     727             :     { 1e-16, .004, 3.992010656008528e-3, .9960079893439915 },
     728             :     { 1e-16, .1, .09516258196404043, .9048374180359595 },
     729             :     { 1e-16, 1, .6321205588285577, .36787944117144233 },
     730             :     { 1e-16, 10, .9999546000702375, 4.539992976248509e-5 },
     731             :     { 1e-10, 1e-6, 9.999995000001667e-7, .9999990000005 },
     732             :     { 1e-8, 1e-8, 9.999999950000001e-9, .9999999900000001 },
     733             :     { 1, 1e-300, 1e-300, 1 },
     734             :     { 1, 1e-16, 1e-16, .9999999999999999 },
     735             :     { 1, .1, .09090909090909091, .9090909090909091 },
     736             :     { 1, 1, .5, .5 },
     737             :     { 1, 10, .9090909090909091, .0909090909090909 },
     738             :     { 1, 100, .9900990099009901, .0099009900990099 },
     739             :     { 1, 1000, .999000999000999, 9.990009990009992e-4 },
     740             :     { 10, 1e-300, 1e-300, 1 },
     741             :     { 10, 1e-16, 9.999999999999995e-17, .9999999999999999 },
     742             :     { 10, .1, .06696700846319258, .9330329915368074 },
     743             :     { 10, 1, .21320655780322778, .7867934421967723 },
     744             :     { 10, 10, .3696701667040189, .6303298332959811 },
     745             :     { 10, 100, .49886285755007337, .5011371424499267 },
     746             :     { 10, 1000, .6018968102992647, .3981031897007353 },
     747             :   };
     748           1 :   double xi_array[] = { -1.5, -1, -1e-30, 0, 1e-30, 1, 1.5 };
     749           1 :   size_t i, j;
     750           1 :   double relerr_bound = 3e-15;
     751           1 :   bool ok = true;
     752             : 
     753          29 :   for (i = 0; i < arraycount(cases); i++) {
     754          28 :     double xi = cases[i].xi;
     755          28 :     double x = cases[i].x;
     756          28 :     double p = cases[i].p;
     757          28 :     double np = cases[i].np;
     758             : 
     759          28 :     CHECK_RELERR(p, cdf_genpareto(x, 0, 1, xi));
     760          28 :     CHECK_RELERR(p, cdf_genpareto(x*2, 0, 2, xi));
     761          28 :     CHECK_RELERR(p, cdf_genpareto(x/2, 0, .5, xi));
     762          28 :     CHECK_RELERR(np, sf_genpareto(x, 0, 1, xi));
     763          28 :     CHECK_RELERR(np, sf_genpareto(x*2, 0, 2, xi));
     764          28 :     CHECK_RELERR(np, sf_genpareto(x/2, 0, .5, xi));
     765             : 
     766          28 :     if (p < .5) {
     767          19 :       CHECK_RELERR(x, icdf_genpareto(p, 0, 1, xi));
     768          19 :       CHECK_RELERR(x*2, icdf_genpareto(p, 0, 2, xi));
     769          19 :       CHECK_RELERR(x/2, icdf_genpareto(p, 0, .5, xi));
     770             :     }
     771          28 :     if (np < .5) {
     772           8 :       CHECK_RELERR(x, isf_genpareto(np, 0, 1, xi));
     773           8 :       CHECK_RELERR(x*2, isf_genpareto(np, 0, 2, xi));
     774          28 :       CHECK_RELERR(x/2, isf_genpareto(np, 0, .5, xi));
     775             :     }
     776             :   }
     777             : 
     778           8 :   for (i = 0; i < arraycount(xi_array); i++) {
     779         714 :     for (j = 0; j <= 100; j++) {
     780         707 :       double p0 = (j == 0 ? 2*DBL_MIN : (double)j/100);
     781             : 
     782             :       /* This is actually a check against 0, but we do <= so that the compiler
     783             :          does not raise a -Wfloat-equal */
     784         707 :       if (fabs(xi_array[i]) <= 0) {
     785             :         /*
     786             :          * When xi == 0, the generalized Pareto
     787             :          * distribution reduces to an
     788             :          * exponential distribution.
     789             :          */
     790         101 :         CHECK_RELERR(-log(p0/2),
     791             :             sample_genpareto(0, p0, 0));
     792         101 :         CHECK_RELERR(-log1p(-p0/2),
     793             :             sample_genpareto(1, p0, 0));
     794             :       } else {
     795         606 :         CHECK_RELERR(expm1(-xi_array[i]*log(p0/2))/xi_array[i],
     796             :             sample_genpareto(0, p0, xi_array[i]));
     797         606 :         CHECK_RELERR((j == 0 ? DBL_MIN :
     798             :                 expm1(-xi_array[i]*log1p(-p0/2))/xi_array[i]),
     799             :             sample_genpareto(1, p0, xi_array[i]));
     800             :       }
     801             : 
     802         707 :       CHECK_RELERR(isf_genpareto(p0/2, 0, 1, xi_array[i]),
     803             :           sample_genpareto(0, p0, xi_array[i]));
     804         707 :       CHECK_RELERR(icdf_genpareto(p0/2, 0, 1, xi_array[i]),
     805             :           sample_genpareto(1, p0, xi_array[i]));
     806             :     }
     807             :   }
     808             : 
     809           1 :   tt_assert(ok);
     810             : 
     811           1 :  done:
     812           1 :   ;
     813           1 : }
     814             : 
     815             : /**
     816             :  * Test the deterministic sampler for uniform distribution on [a, b].
     817             :  *
     818             :  * This currently only tests whether the outcome lies within [a, b].
     819             :  */
     820             : static void
     821           1 : test_uniform_interval(void *arg)
     822             : {
     823           1 :   (void) arg;
     824           1 :   struct {
     825             :     /* Sample from a uniform distribution with parameters 'a' and 'b', using
     826             :      * 't' as the sampling index. */
     827             :     double t, a, b;
     828           1 :   } cases[] = {
     829             :     { 0, 0, 0 },
     830             :     { 0, 0, 1 },
     831             :     { 0, 1.0000000000000007, 3.999999999999995 },
     832             :     { 0, 4000, 4000 },
     833             :     { 0.42475836677491291, 4000, 4000 },
     834             :     { 0, -DBL_MAX, DBL_MAX },
     835             :     { 0.25, -DBL_MAX, DBL_MAX },
     836             :     { 0.5, -DBL_MAX, DBL_MAX },
     837             :   };
     838           1 :   size_t i = 0;
     839           1 :   bool ok = true;
     840             : 
     841           9 :   for (i = 0; i < arraycount(cases); i++) {
     842           8 :     double t = cases[i].t;
     843           8 :     double a = cases[i].a;
     844           8 :     double b = cases[i].b;
     845             : 
     846           8 :     CHECK_LE(a, sample_uniform_interval(t, a, b));
     847           8 :     CHECK_LE(sample_uniform_interval(t, a, b), b);
     848             : 
     849           8 :     CHECK_LE(a, sample_uniform_interval(1 - t, a, b));
     850           8 :     CHECK_LE(sample_uniform_interval(1 - t, a, b), b);
     851             : 
     852           8 :     CHECK_LE(sample_uniform_interval(t, -b, -a), -a);
     853           8 :     CHECK_LE(-b, sample_uniform_interval(t, -b, -a));
     854             : 
     855           8 :     CHECK_LE(sample_uniform_interval(1 - t, -b, -a), -a);
     856           8 :     CHECK_LE(-b, sample_uniform_interval(1 - t, -b, -a));
     857             :   }
     858             : 
     859           1 :   tt_assert(ok);
     860             : 
     861           1 :  done:
     862           1 :   ;
     863           1 : }
     864             : 
     865             : /********************** Stochastic tests ****************************/
     866             : 
     867             : /*
     868             :  * Psi test, sometimes also called G-test.  The psi test statistic,
     869             :  * suitably scaled, has chi^2 distribution, but the psi test tends to
     870             :  * have better statistical power in practice to detect deviations than
     871             :  * the chi^2 test does.  (The chi^2 test statistic is the first term of
     872             :  * the Taylor expansion of the psi test statistic.)  The psi test is
     873             :  * generic, for any CDF; particular distributions might have higher-
     874             :  * power tests to distinguish them from predictable deviations or bugs.
     875             :  *
     876             :  * We choose the psi critical value so that a single psi test has
     877             :  * probability below alpha = 1% of spuriously failing even if all the
     878             :  * code is correct.  But the false positive rate for a suite of n tests
     879             :  * is higher: 1 - Binom(0; n, alpha) = 1 - (1 - alpha)^n.  For n = 10,
     880             :  * this is about 10%, and for n = 100 it is well over 50%.
     881             :  *
     882             :  * Given that these tests will run with every CI job, we want to drive down the
     883             :  * false positive rate. We can drive it down by running each test four times,
     884             :  * and accepting it if it passes at least once; in that case, it is as if we
     885             :  * used Binom(4; 2, alpha) = alpha^4 as the false positive rate for each test,
     886             :  * and for n = 10 tests, it would be 9.99999959506e-08. If each CI build has 14
     887             :  * jobs, then the chance of a CI build failing is 1.39999903326e-06, which
     888             :  * means that a CI build will break with probability 50% after about 495106
     889             :  * builds.
     890             :  *
     891             :  * The critical value for a chi^2 distribution with 100 degrees of
     892             :  * freedom and false positive rate alpha = 1% was taken from:
     893             :  *
     894             :  *  NIST/SEMATECH e-Handbook of Statistical Methods, Section
     895             :  *  1.3.6.7.4 `Critical Values of the Chi-Square Distribution',
     896             :  *  <https://www.itl.nist.gov/div898/handbook/eda/section3/eda3674.htm>,
     897             :  *  retrieved 2018-10-28.
     898             :  */
     899             : 
     900             : static const size_t NSAMPLES = 100000;
     901             : /* Number of chances we give to the test to succeed. */
     902             : static const unsigned NTRIALS = 4;
     903             : /* Number of times we want the test to pass per NTRIALS. */
     904             : static const unsigned NPASSES_MIN = 1;
     905             : 
     906             : #define PSI_DF 100                          /* degrees of freedom */
     907             : static const double PSI_CRITICAL = 135.807; /* critical value, alpha = .01 */
     908             : 
     909             : /**
     910             :  * Perform a psi test on an array of sample counts, C, adding up to N
     911             :  * samples, and an array of log expected probabilities, logP,
     912             :  * representing the null hypothesis for the distribution of samples
     913             :  * counted.  Return false if the psi test rejects the null hypothesis,
     914             :  * true if otherwise.
     915             :  */
     916             : static bool
     917          30 : psi_test(const size_t C[PSI_DF], const double logP[PSI_DF], size_t N)
     918             : {
     919          30 :   double psi = 0;
     920          30 :   double c = 0;                 /* Kahan compensation */
     921          30 :   double t, u;
     922          30 :   size_t i;
     923             : 
     924        3030 :   for (i = 0; i < PSI_DF; i++) {
     925             :     /*
     926             :      * c*log(c/(n*p)) = (1/n) * f*log(f/p) where f = c/n is
     927             :      * the frequency, and f*log(f/p) ---> 0 as f ---> 0, so
     928             :      * this is a reasonable choice.  Further, any mass that
     929             :      * _fails_ to turn up in this bin will inflate another
     930             :      * bin instead, so we don't really lose anything by
     931             :      * ignoring empty bins even if they have high
     932             :      * probability.
     933             :      */
     934        3000 :     if (C[i] == 0)
     935         363 :       continue;
     936        2637 :     t = C[i]*(log((double)C[i]/N) - logP[i]) - c;
     937        2637 :     u = psi + t;
     938        2637 :     c = (u - psi) - t;
     939        2637 :     psi = u;
     940             :   }
     941          30 :   psi *= 2;
     942             : 
     943          30 :   return psi <= PSI_CRITICAL;
     944             : }
     945             : 
     946             : static bool
     947           4 : test_stochastic_geometric_impl(double p)
     948             : {
     949           4 :   const struct geometric_t geometric = {
     950             :     .base = GEOMETRIC(geometric),
     951             :     .p = p,
     952             :   };
     953           4 :   double logP[PSI_DF] = {0};
     954           4 :   unsigned ntry = NTRIALS, npass = 0;
     955           4 :   unsigned i;
     956           4 :   size_t j;
     957             : 
     958             :   /* Compute logP[i] = Geom(i + 1; p).  */
     959         400 :   for (i = 0; i < PSI_DF - 1; i++)
     960         396 :     logP[i] = logpmf_geometric(i + 1, p);
     961             : 
     962             :   /* Compute logP[n-1] = log (1 - (P[0] + P[1] + ... + P[n-2])).  */
     963           4 :   logP[PSI_DF - 1] = log1mexp(logsumexp(logP, PSI_DF - 1));
     964             : 
     965           4 :   while (ntry --> 0) {
     966           4 :     size_t C[PSI_DF] = {0};
     967             : 
     968      400004 :     for (j = 0; j < NSAMPLES; j++) {
     969      400000 :       double n_tmp = dist_sample(&geometric.base);
     970             : 
     971             :       /* Must be an integer.  (XXX -Wfloat-equal)  */
     972      400000 :       tor_assert(ceil(n_tmp) <= n_tmp && ceil(n_tmp) >= n_tmp);
     973             : 
     974             :       /* Must be a positive integer.  */
     975      400000 :       tor_assert(n_tmp >= 1);
     976             : 
     977             :       /* Probability of getting a value in the billions is negligible.  */
     978      400000 :       tor_assert(n_tmp <= (double)UINT_MAX);
     979             : 
     980      400000 :       unsigned n = (unsigned) n_tmp;
     981             : 
     982      400000 :       if (n > PSI_DF)
     983             :         n = PSI_DF;
     984      400000 :       C[n - 1]++;
     985             :     }
     986             : 
     987           4 :     if (psi_test(C, logP, NSAMPLES)) {
     988           4 :       if (++npass >= NPASSES_MIN)
     989             :         break;
     990             :     }
     991             :   }
     992             : 
     993           4 :   if (npass >= NPASSES_MIN) {
     994             :     /* printf("pass %s sampler\n", "geometric"); */
     995           4 :     return true;
     996             :   } else {
     997           0 :     printf("fail %s sampler\n", "geometric");
     998           0 :     return false;
     999             :   }
    1000             : }
    1001             : 
    1002             : /**
    1003             :  * Divide the support of <b>dist</b> into histogram bins in <b>logP</b>. Start
    1004             :  * at the 1st percentile and ending at the 99th percentile. Pick the bin
    1005             :  * boundaries using linear interpolation so that they are uniformly spaced.
    1006             :  *
    1007             :  * In each bin logP[i] we insert the expected log-probability that a sampled
    1008             :  * value will fall into that bin. We will use this as the null hypothesis of
    1009             :  * the psi test.
    1010             :  *
    1011             :  * Set logP[i] = log(CDF(x_i) - CDF(x_{i-1})), where x_-1 = -inf, x_n =
    1012             :  * +inf, and x_i = i*(hi - lo)/(n - 2).
    1013             :  */
    1014             : static void
    1015          26 : bin_cdfs(const struct dist_t *dist, double lo, double hi, double *logP,
    1016             :          size_t n)
    1017             : {
    1018             : #define CDF(x)  dist_cdf(dist, x)
    1019             : #define SF(x)   dist_sf(dist, x)
    1020          26 :   const double w = (hi - lo)/(n - 2);
    1021          26 :   double halfway = dist_icdf(dist, 0.5);
    1022          26 :   double x_0, x_1;
    1023          26 :   size_t i;
    1024          26 :   size_t n2 = ceil_to_size_t((halfway - lo)/w);
    1025             : 
    1026          26 :   tor_assert(lo <= halfway);
    1027          26 :   tor_assert(halfway <= hi);
    1028          26 :   tor_assert(n2 <= n);
    1029             : 
    1030          26 :   x_1 = lo;
    1031          26 :   logP[0] = log(CDF(x_1) - 0); /* 0 = CDF(-inf) */
    1032         737 :   for (i = 1; i < n2; i++) {
    1033         711 :     x_0 = x_1;
    1034             :     /* do the linear interpolation */
    1035         711 :     x_1 = (i <= n/2 ? lo + i*w : hi - (n - 2 - i)*w);
    1036             :     /* set the expected log-probability */
    1037         711 :     logP[i] = log(CDF(x_1) - CDF(x_0));
    1038             :   }
    1039          26 :   x_0 = hi;
    1040          26 :   logP[n - 1] = log(SF(x_0) - 0); /* 0 = SF(+inf) = 1 - CDF(+inf) */
    1041             : 
    1042             :   /* In this loop we are filling out the high part of the array. We are using
    1043             :    * SF because in these cases the CDF is near 1 where precision is lower. So
    1044             :    * instead we are using SF near 0 where the precision is higher. We have
    1045             :    * SF(t) = 1 - CDF(t).  */
    1046        1863 :   for (i = 1; i < n - n2; i++) {
    1047        1837 :     x_1 = x_0;
    1048             :     /* do the linear interpolation */
    1049        1837 :     x_0 = (i <= n/2 ? hi - i*w : lo + (n - 2 - i)*w);
    1050             :     /* set the expected log-probability */
    1051        1837 :     logP[n - i - 1] = log(SF(x_0) - SF(x_1));
    1052             :   }
    1053             : #undef SF
    1054             : #undef CDF
    1055          26 : }
    1056             : 
    1057             : /**
    1058             :  * Draw NSAMPLES samples from dist, counting the number of samples x in
    1059             :  * the ith bin C[i] if x_{i-1} <= x < x_i, where x_-1 = -inf, x_n =
    1060             :  * +inf, and x_i = i*(hi - lo)/(n - 2).
    1061             :  */
    1062             : static void
    1063          26 : bin_samples(const struct dist_t *dist, double lo, double hi, size_t *C,
    1064             :             size_t n)
    1065             : {
    1066          26 :   const double w = (hi - lo)/(n - 2);
    1067          26 :   size_t i;
    1068             : 
    1069     2600026 :   for (i = 0; i < NSAMPLES; i++) {
    1070     2600000 :     double x = dist_sample(dist);
    1071     2600000 :     size_t bin;
    1072             : 
    1073     2600000 :     if (x < lo)
    1074             :       bin = 0;
    1075     2574696 :     else if (x < hi)
    1076     2549088 :       bin = 1 + floor_to_size_t((x - lo)/w);
    1077             :     else
    1078       25608 :       bin = n - 1;
    1079     2600000 :     tor_assert(bin < n);
    1080     2600000 :     C[bin]++;
    1081             :   }
    1082          26 : }
    1083             : 
    1084             : /**
    1085             :  * Carry out a Psi test on <b>dist</b>.
    1086             :  *
    1087             :  * Sample NSAMPLES from dist, putting them in bins from -inf to lo to
    1088             :  * hi to +inf, and apply up to two psi tests.  True if at least one psi
    1089             :  * test passes; false if not.  False positive rate should be bounded by
    1090             :  * 0.01^2 = 0.0001.
    1091             :  */
    1092             : static bool
    1093          26 : test_psi_dist_sample(const struct dist_t *dist)
    1094             : {
    1095          26 :   double logP[PSI_DF] = {0};
    1096          26 :   unsigned ntry = NTRIALS, npass = 0;
    1097          26 :   double lo = dist_icdf(dist, 1/(double)(PSI_DF + 2));
    1098          26 :   double hi = dist_isf(dist, 1/(double)(PSI_DF + 2));
    1099             : 
    1100             :   /* Create the null hypothesis in logP */
    1101          26 :   bin_cdfs(dist, lo, hi, logP, PSI_DF);
    1102             : 
    1103             :   /* Now run the test */
    1104          26 :   while (ntry --> 0) {
    1105          26 :     size_t C[PSI_DF] = {0};
    1106          26 :     bin_samples(dist, lo, hi, C, PSI_DF);
    1107          26 :     if (psi_test(C, logP, NSAMPLES)) {
    1108          26 :       if (++npass >= NPASSES_MIN)
    1109             :         break;
    1110             :     }
    1111             :   }
    1112             : 
    1113             :   /* Did we fail or succeed? */
    1114          26 :   if (npass >= NPASSES_MIN) {
    1115             :     /* printf("pass %s sampler\n", dist_name(dist));*/
    1116          26 :     return true;
    1117             :   } else {
    1118           0 :     printf("fail %s sampler\n", dist_name(dist));
    1119           0 :     return false;
    1120             :   }
    1121             : }
    1122             : 
    1123             : static void
    1124           2 : write_stochastic_warning(void)
    1125             : {
    1126           2 :   if (tinytest_cur_test_has_failed()) {
    1127           0 :     printf("\n"
    1128             :          "NOTE: This is a stochastic test, and we expect it to fail from\n"
    1129             :          "time to time, with some low probability. If you see it fail more\n"
    1130             :          "than one trial in 100, though, please tell us.\n\n");
    1131             :   }
    1132           2 : }
    1133             : 
    1134             : static void
    1135           1 : test_stochastic_uniform(void *arg)
    1136             : {
    1137           1 :   (void) arg;
    1138             : 
    1139           1 :   const struct uniform_t uniform01 = {
    1140             :     .base = UNIFORM(uniform01),
    1141             :     .a = 0,
    1142             :     .b = 1,
    1143             :   };
    1144           1 :   const struct uniform_t uniform_pos = {
    1145             :     .base = UNIFORM(uniform_pos),
    1146             :     .a = 1.23,
    1147             :     .b = 4.56,
    1148             :   };
    1149           1 :   const struct uniform_t uniform_neg = {
    1150             :     .base = UNIFORM(uniform_neg),
    1151             :     .a = -10,
    1152             :     .b = -1,
    1153             :   };
    1154           1 :   const struct uniform_t uniform_cross = {
    1155             :     .base = UNIFORM(uniform_cross),
    1156             :     .a = -1.23,
    1157             :     .b = 4.56,
    1158             :   };
    1159           1 :   const struct uniform_t uniform_subnormal = {
    1160             :     .base = UNIFORM(uniform_subnormal),
    1161             :     .a = 4e-324,
    1162             :     .b = 4e-310,
    1163             :   };
    1164           1 :   const struct uniform_t uniform_subnormal_cross = {
    1165             :     .base = UNIFORM(uniform_subnormal_cross),
    1166             :     .a = -4e-324,
    1167             :     .b = 4e-310,
    1168             :   };
    1169           1 :   bool ok = true, tests_failed = true;
    1170             : 
    1171           1 :   testing_enable_reproducible_rng();
    1172             : 
    1173           1 :   ok &= test_psi_dist_sample(&uniform01.base);
    1174           1 :   ok &= test_psi_dist_sample(&uniform_pos.base);
    1175           1 :   ok &= test_psi_dist_sample(&uniform_neg.base);
    1176           1 :   ok &= test_psi_dist_sample(&uniform_cross.base);
    1177           1 :   ok &= test_psi_dist_sample(&uniform_subnormal.base);
    1178           1 :   ok &= test_psi_dist_sample(&uniform_subnormal_cross.base);
    1179             : 
    1180           1 :   tt_assert(ok);
    1181             : 
    1182             :   tests_failed = false;
    1183             : 
    1184             :  done:
    1185           0 :   if (tests_failed) {
    1186           0 :     write_stochastic_warning();
    1187             :   }
    1188           1 :   testing_disable_reproducible_rng();
    1189           1 : }
    1190             : 
    1191             : static bool
    1192           4 : test_stochastic_logistic_impl(double mu, double sigma)
    1193             : {
    1194           4 :   const struct logistic_t dist = {
    1195             :     .base = LOGISTIC(dist),
    1196             :     .mu = mu,
    1197             :     .sigma = sigma,
    1198             :   };
    1199             : 
    1200             :   /* XXX Consider some fancier logistic test.  */
    1201           4 :   return test_psi_dist_sample(&dist.base);
    1202             : }
    1203             : 
    1204             : static bool
    1205           4 : test_stochastic_log_logistic_impl(double alpha, double beta)
    1206             : {
    1207           4 :   const struct log_logistic_t dist = {
    1208             :     .base = LOG_LOGISTIC(dist),
    1209             :     .alpha = alpha,
    1210             :     .beta = beta,
    1211             :   };
    1212             : 
    1213             :   /* XXX Consider some fancier log logistic test.  */
    1214           4 :   return test_psi_dist_sample(&dist.base);
    1215             : }
    1216             : 
    1217             : static bool
    1218           5 : test_stochastic_weibull_impl(double lambda, double k)
    1219             : {
    1220           5 :   const struct weibull_t dist = {
    1221             :     .base = WEIBULL(dist),
    1222             :     .lambda = lambda,
    1223             :     .k = k,
    1224             :   };
    1225             : 
    1226             : // clang-format off
    1227             : /*
    1228             :  * XXX Consider applying a Tiku-Singh test:
    1229             :  *
    1230             :  *    M.L. Tiku and M. Singh, `Testing the two-parameter
    1231             :  *    Weibull distribution', Communications in Statistics --
    1232             :  *    Theory and Methods A10(9), 1981, 907--918.
    1233             : https://www.tandfonline.com/doi/pdf/10.1080/03610928108828082?needAccess=true
    1234             :  */
    1235             : // clang-format on
    1236           5 :   return test_psi_dist_sample(&dist.base);
    1237             : }
    1238             : 
    1239             : static bool
    1240           7 : test_stochastic_genpareto_impl(double mu, double sigma, double xi)
    1241             : {
    1242           7 :   const struct genpareto_t dist = {
    1243             :     .base = GENPARETO(dist),
    1244             :     .mu = mu,
    1245             :     .sigma = sigma,
    1246             :     .xi = xi,
    1247             :   };
    1248             : 
    1249             :   /* XXX Consider some fancier GPD test.  */
    1250           7 :   return test_psi_dist_sample(&dist.base);
    1251             : }
    1252             : 
    1253             : static void
    1254           1 : test_stochastic_genpareto(void *arg)
    1255             : {
    1256           1 :   bool ok = 0;
    1257           1 :   bool tests_failed = true;
    1258           1 :   (void) arg;
    1259             : 
    1260           1 :   testing_enable_reproducible_rng();
    1261             : 
    1262           1 :   ok = test_stochastic_genpareto_impl(0, 1, -0.25);
    1263           1 :   tt_assert(ok);
    1264           1 :   ok = test_stochastic_genpareto_impl(0, 1, -1e-30);
    1265           1 :   tt_assert(ok);
    1266           1 :   ok = test_stochastic_genpareto_impl(0, 1, 0);
    1267           1 :   tt_assert(ok);
    1268           1 :   ok = test_stochastic_genpareto_impl(0, 1, 1e-30);
    1269           1 :   tt_assert(ok);
    1270           1 :   ok = test_stochastic_genpareto_impl(0, 1, 0.25);
    1271           1 :   tt_assert(ok);
    1272           1 :   ok = test_stochastic_genpareto_impl(-1, 1, -0.25);
    1273           1 :   tt_assert(ok);
    1274           1 :   ok = test_stochastic_genpareto_impl(1, 2, 0.25);
    1275           1 :   tt_assert(ok);
    1276             : 
    1277             :   tests_failed = false;
    1278             : 
    1279             :  done:
    1280           0 :   if (tests_failed) {
    1281           0 :     write_stochastic_warning();
    1282             :   }
    1283           1 :   testing_disable_reproducible_rng();
    1284           1 : }
    1285             : 
    1286             : static void
    1287           1 : test_stochastic_geometric(void *arg)
    1288             : {
    1289           1 :   bool ok = 0;
    1290           1 :   bool tests_failed = true;
    1291             : 
    1292           1 :   (void) arg;
    1293             : 
    1294           1 :   testing_enable_reproducible_rng();
    1295             : 
    1296           1 :   ok = test_stochastic_geometric_impl(0.1);
    1297           1 :   tt_assert(ok);
    1298           1 :   ok = test_stochastic_geometric_impl(0.5);
    1299           1 :   tt_assert(ok);
    1300           1 :   ok = test_stochastic_geometric_impl(0.9);
    1301           1 :   tt_assert(ok);
    1302           1 :   ok = test_stochastic_geometric_impl(1);
    1303           1 :   tt_assert(ok);
    1304             : 
    1305             :   tests_failed = false;
    1306             : 
    1307             :  done:
    1308           0 :   if (tests_failed) {
    1309           0 :     write_stochastic_warning();
    1310             :   }
    1311           1 :   testing_disable_reproducible_rng();
    1312           1 : }
    1313             : 
    1314             : static void
    1315           1 : test_stochastic_logistic(void *arg)
    1316             : {
    1317           1 :   bool ok = 0;
    1318           1 :   bool tests_failed = true;
    1319           1 :   (void) arg;
    1320             : 
    1321           1 :   testing_enable_reproducible_rng();
    1322             : 
    1323           1 :   ok = test_stochastic_logistic_impl(0, 1);
    1324           1 :   tt_assert(ok);
    1325           1 :   ok = test_stochastic_logistic_impl(0, 1e-16);
    1326           1 :   tt_assert(ok);
    1327           1 :   ok = test_stochastic_logistic_impl(1, 10);
    1328           1 :   tt_assert(ok);
    1329           1 :   ok = test_stochastic_logistic_impl(-10, 100);
    1330           1 :   tt_assert(ok);
    1331             : 
    1332             :   tests_failed = false;
    1333             : 
    1334             :  done:
    1335           0 :   if (tests_failed) {
    1336           0 :     write_stochastic_warning();
    1337             :   }
    1338           1 :   testing_disable_reproducible_rng();
    1339           1 : }
    1340             : 
    1341             : static void
    1342           1 : test_stochastic_log_logistic(void *arg)
    1343             : {
    1344           1 :   bool ok = 0;
    1345           1 :   (void) arg;
    1346             : 
    1347           1 :   testing_enable_reproducible_rng();
    1348             : 
    1349           1 :   ok = test_stochastic_log_logistic_impl(1, 1);
    1350           1 :   tt_assert(ok);
    1351           1 :   ok = test_stochastic_log_logistic_impl(1, 10);
    1352           1 :   tt_assert(ok);
    1353           1 :   ok = test_stochastic_log_logistic_impl(M_E, 1e-1);
    1354           1 :   tt_assert(ok);
    1355           1 :   ok = test_stochastic_log_logistic_impl(exp(-10), 1e-2);
    1356           1 :   tt_assert(ok);
    1357             : 
    1358           1 :  done:
    1359           1 :   write_stochastic_warning();
    1360           1 :   testing_disable_reproducible_rng();
    1361           1 : }
    1362             : 
    1363             : static void
    1364           1 : test_stochastic_weibull(void *arg)
    1365             : {
    1366           1 :   bool ok = 0;
    1367           1 :   (void) arg;
    1368             : 
    1369           1 :   testing_enable_reproducible_rng();
    1370             : 
    1371           1 :   ok = test_stochastic_weibull_impl(1, 0.5);
    1372           1 :   tt_assert(ok);
    1373           1 :   ok = test_stochastic_weibull_impl(1, 1);
    1374           1 :   tt_assert(ok);
    1375           1 :   ok = test_stochastic_weibull_impl(1, 1.5);
    1376           1 :   tt_assert(ok);
    1377           1 :   ok = test_stochastic_weibull_impl(1, 2);
    1378           1 :   tt_assert(ok);
    1379           1 :   ok = test_stochastic_weibull_impl(10, 1);
    1380           1 :   tt_assert(ok);
    1381             : 
    1382           1 :  done:
    1383           1 :   write_stochastic_warning();
    1384           1 :   testing_disable_reproducible_rng();
    1385           1 :   UNMOCK(crypto_rand);
    1386           1 : }
    1387             : 
    1388             : struct testcase_t prob_distr_tests[] = {
    1389             :   { "logit_logistics", test_logit_logistic, TT_FORK, NULL, NULL },
    1390             :   { "log_logistic", test_log_logistic, TT_FORK, NULL, NULL },
    1391             :   { "weibull", test_weibull, TT_FORK, NULL, NULL },
    1392             :   { "genpareto", test_genpareto, TT_FORK, NULL, NULL },
    1393             :   { "uniform_interval", test_uniform_interval, TT_FORK, NULL, NULL },
    1394             :   END_OF_TESTCASES
    1395             : };
    1396             : 
    1397             : struct testcase_t slow_stochastic_prob_distr_tests[] = {
    1398             :   { "stochastic_genpareto", test_stochastic_genpareto, TT_FORK, NULL, NULL },
    1399             :   { "stochastic_geometric", test_stochastic_geometric, TT_FORK, NULL, NULL },
    1400             :   { "stochastic_uniform", test_stochastic_uniform, TT_FORK, NULL, NULL },
    1401             :   { "stochastic_logistic", test_stochastic_logistic, TT_FORK, NULL, NULL },
    1402             :   { "stochastic_log_logistic", test_stochastic_log_logistic, TT_FORK, NULL,
    1403             :     NULL },
    1404             :   { "stochastic_weibull", test_stochastic_weibull, TT_FORK, NULL, NULL },
    1405             :   END_OF_TESTCASES
    1406             : };

Generated by: LCOV version 1.14