AbstractRealDistribution.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */
  17. package org.apache.commons.math3.distribution;

  18. import java.io.Serializable;

  19. import org.apache.commons.math3.analysis.UnivariateFunction;
  20. import org.apache.commons.math3.analysis.solvers.UnivariateSolverUtils;
  21. import org.apache.commons.math3.exception.NotStrictlyPositiveException;
  22. import org.apache.commons.math3.exception.NumberIsTooLargeException;
  23. import org.apache.commons.math3.exception.OutOfRangeException;
  24. import org.apache.commons.math3.exception.util.LocalizedFormats;
  25. import org.apache.commons.math3.random.RandomGenerator;
  26. import org.apache.commons.math3.random.RandomDataImpl;
  27. import org.apache.commons.math3.util.FastMath;

  28. /**
  29.  * Base class for probability distributions on the reals.
  30.  * Default implementations are provided for some of the methods
  31.  * that do not vary from distribution to distribution.
  32.  *
  33.  * @since 3.0
  34.  */
  35. public abstract class AbstractRealDistribution
  36. implements RealDistribution, Serializable {
  37.     /** Default accuracy. */
  38.     public static final double SOLVER_DEFAULT_ABSOLUTE_ACCURACY = 1e-6;
  39.     /** Serializable version identifier */
  40.     private static final long serialVersionUID = -38038050983108802L;
  41.      /**
  42.       * RandomData instance used to generate samples from the distribution.
  43.       * @deprecated As of 3.1, to be removed in 4.0. Please use the
  44.       * {@link #random} instance variable instead.
  45.       */
  46.     @Deprecated
  47.     protected RandomDataImpl randomData = new RandomDataImpl();

  48.     /**
  49.      * RNG instance used to generate samples from the distribution.
  50.      * @since 3.1
  51.      */
  52.     protected final RandomGenerator random;

  53.     /** Solver absolute accuracy for inverse cumulative computation */
  54.     private double solverAbsoluteAccuracy = SOLVER_DEFAULT_ABSOLUTE_ACCURACY;

  55.     /**
  56.      * @deprecated As of 3.1, to be removed in 4.0. Please use
  57.      * {@link #AbstractRealDistribution(RandomGenerator)} instead.
  58.      */
  59.     @Deprecated
  60.     protected AbstractRealDistribution() {
  61.         // Legacy users are only allowed to access the deprecated "randomData".
  62.         // New users are forbidden to use this constructor.
  63.         random = null;
  64.     }
  65.     /**
  66.      * @param rng Random number generator.
  67.      * @since 3.1
  68.      */
  69.     protected AbstractRealDistribution(RandomGenerator rng) {
  70.         random = rng;
  71.     }

  72.     /**
  73.      * {@inheritDoc}
  74.      *
  75.      * The default implementation uses the identity
  76.      * <p>{@code P(x0 < X <= x1) = P(X <= x1) - P(X <= x0)}</p>
  77.      *
  78.      * @deprecated As of 3.1 (to be removed in 4.0). Please use
  79.      * {@link #probability(double,double)} instead.
  80.      */
  81.     @Deprecated
  82.     public double cumulativeProbability(double x0, double x1) throws NumberIsTooLargeException {
  83.         return probability(x0, x1);
  84.     }

  85.     /**
  86.      * For a random variable {@code X} whose values are distributed according
  87.      * to this distribution, this method returns {@code P(x0 < X <= x1)}.
  88.      *
  89.      * @param x0 Lower bound (excluded).
  90.      * @param x1 Upper bound (included).
  91.      * @return the probability that a random variable with this distribution
  92.      * takes a value between {@code x0} and {@code x1}, excluding the lower
  93.      * and including the upper endpoint.
  94.      * @throws NumberIsTooLargeException if {@code x0 > x1}.
  95.      *
  96.      * The default implementation uses the identity
  97.      * {@code P(x0 < X <= x1) = P(X <= x1) - P(X <= x0)}
  98.      *
  99.      * @since 3.1
  100.      */
  101.     public double probability(double x0,
  102.                               double x1) {
  103.         if (x0 > x1) {
  104.             throw new NumberIsTooLargeException(LocalizedFormats.LOWER_ENDPOINT_ABOVE_UPPER_ENDPOINT,
  105.                                                 x0, x1, true);
  106.         }
  107.         return cumulativeProbability(x1) - cumulativeProbability(x0);
  108.     }

  109.     /**
  110.      * {@inheritDoc}
  111.      *
  112.      * The default implementation returns
  113.      * <ul>
  114.      * <li>{@link #getSupportLowerBound()} for {@code p = 0},</li>
  115.      * <li>{@link #getSupportUpperBound()} for {@code p = 1}.</li>
  116.      * </ul>
  117.      */
  118.     public double inverseCumulativeProbability(final double p) throws OutOfRangeException {
  119.         /*
  120.          * IMPLEMENTATION NOTES
  121.          * --------------------
  122.          * Where applicable, use is made of the one-sided Chebyshev inequality
  123.          * to bracket the root. This inequality states that
  124.          * P(X - mu >= k * sig) <= 1 / (1 + k^2),
  125.          * mu: mean, sig: standard deviation. Equivalently
  126.          * 1 - P(X < mu + k * sig) <= 1 / (1 + k^2),
  127.          * F(mu + k * sig) >= k^2 / (1 + k^2).
  128.          *
  129.          * For k = sqrt(p / (1 - p)), we find
  130.          * F(mu + k * sig) >= p,
  131.          * and (mu + k * sig) is an upper-bound for the root.
  132.          *
  133.          * Then, introducing Y = -X, mean(Y) = -mu, sd(Y) = sig, and
  134.          * P(Y >= -mu + k * sig) <= 1 / (1 + k^2),
  135.          * P(-X >= -mu + k * sig) <= 1 / (1 + k^2),
  136.          * P(X <= mu - k * sig) <= 1 / (1 + k^2),
  137.          * F(mu - k * sig) <= 1 / (1 + k^2).
  138.          *
  139.          * For k = sqrt((1 - p) / p), we find
  140.          * F(mu - k * sig) <= p,
  141.          * and (mu - k * sig) is a lower-bound for the root.
  142.          *
  143.          * In cases where the Chebyshev inequality does not apply, geometric
  144.          * progressions 1, 2, 4, ... and -1, -2, -4, ... are used to bracket
  145.          * the root.
  146.          */
  147.         if (p < 0.0 || p > 1.0) {
  148.             throw new OutOfRangeException(p, 0, 1);
  149.         }

  150.         double lowerBound = getSupportLowerBound();
  151.         if (p == 0.0) {
  152.             return lowerBound;
  153.         }

  154.         double upperBound = getSupportUpperBound();
  155.         if (p == 1.0) {
  156.             return upperBound;
  157.         }

  158.         final double mu = getNumericalMean();
  159.         final double sig = FastMath.sqrt(getNumericalVariance());
  160.         final boolean chebyshevApplies;
  161.         chebyshevApplies = !(Double.isInfinite(mu) || Double.isNaN(mu) ||
  162.                              Double.isInfinite(sig) || Double.isNaN(sig));

  163.         if (lowerBound == Double.NEGATIVE_INFINITY) {
  164.             if (chebyshevApplies) {
  165.                 lowerBound = mu - sig * FastMath.sqrt((1. - p) / p);
  166.             } else {
  167.                 lowerBound = -1.0;
  168.                 while (cumulativeProbability(lowerBound) >= p) {
  169.                     lowerBound *= 2.0;
  170.                 }
  171.             }
  172.         }

  173.         if (upperBound == Double.POSITIVE_INFINITY) {
  174.             if (chebyshevApplies) {
  175.                 upperBound = mu + sig * FastMath.sqrt(p / (1. - p));
  176.             } else {
  177.                 upperBound = 1.0;
  178.                 while (cumulativeProbability(upperBound) < p) {
  179.                     upperBound *= 2.0;
  180.                 }
  181.             }
  182.         }

  183.         final UnivariateFunction toSolve = new UnivariateFunction() {

  184.             public double value(final double x) {
  185.                 return cumulativeProbability(x) - p;
  186.             }
  187.         };

  188.         double x = UnivariateSolverUtils.solve(toSolve,
  189.                                                    lowerBound,
  190.                                                    upperBound,
  191.                                                    getSolverAbsoluteAccuracy());

  192.         if (!isSupportConnected()) {
  193.             /* Test for plateau. */
  194.             final double dx = getSolverAbsoluteAccuracy();
  195.             if (x - dx >= getSupportLowerBound()) {
  196.                 double px = cumulativeProbability(x);
  197.                 if (cumulativeProbability(x - dx) == px) {
  198.                     upperBound = x;
  199.                     while (upperBound - lowerBound > dx) {
  200.                         final double midPoint = 0.5 * (lowerBound + upperBound);
  201.                         if (cumulativeProbability(midPoint) < px) {
  202.                             lowerBound = midPoint;
  203.                         } else {
  204.                             upperBound = midPoint;
  205.                         }
  206.                     }
  207.                     return upperBound;
  208.                 }
  209.             }
  210.         }
  211.         return x;
  212.     }

  213.     /**
  214.      * Returns the solver absolute accuracy for inverse cumulative computation.
  215.      * You can override this method in order to use a Brent solver with an
  216.      * absolute accuracy different from the default.
  217.      *
  218.      * @return the maximum absolute error in inverse cumulative probability estimates
  219.      */
  220.     protected double getSolverAbsoluteAccuracy() {
  221.         return solverAbsoluteAccuracy;
  222.     }

  223.     /** {@inheritDoc} */
  224.     public void reseedRandomGenerator(long seed) {
  225.         random.setSeed(seed);
  226.         randomData.reSeed(seed);
  227.     }

  228.     /**
  229.      * {@inheritDoc}
  230.      *
  231.      * The default implementation uses the
  232.      * <a href="http://en.wikipedia.org/wiki/Inverse_transform_sampling">
  233.      * inversion method.
  234.      * </a>
  235.      */
  236.     public double sample() {
  237.         return inverseCumulativeProbability(random.nextDouble());
  238.     }

  239.     /**
  240.      * {@inheritDoc}
  241.      *
  242.      * The default implementation generates the sample by calling
  243.      * {@link #sample()} in a loop.
  244.      */
  245.     public double[] sample(int sampleSize) {
  246.         if (sampleSize <= 0) {
  247.             throw new NotStrictlyPositiveException(LocalizedFormats.NUMBER_OF_SAMPLES,
  248.                     sampleSize);
  249.         }
  250.         double[] out = new double[sampleSize];
  251.         for (int i = 0; i < sampleSize; i++) {
  252.             out[i] = sample();
  253.         }
  254.         return out;
  255.     }

  256.     /**
  257.      * {@inheritDoc}
  258.      *
  259.      * @return zero.
  260.      * @since 3.1
  261.      */
  262.     public double probability(double x) {
  263.         return 0d;
  264.     }

  265.     /**
  266.      * Returns the natural logarithm of the probability density function (PDF) of this distribution
  267.      * evaluated at the specified point {@code x}. In general, the PDF is the derivative of the
  268.      * {@link #cumulativeProbability(double) CDF}. If the derivative does not exist at {@code x},
  269.      * then an appropriate replacement should be returned, e.g. {@code Double.POSITIVE_INFINITY},
  270.      * {@code Double.NaN}, or the limit inferior or limit superior of the difference quotient. Note
  271.      * that due to the floating point precision and under/overflow issues, this method will for some
  272.      * distributions be more precise and faster than computing the logarithm of
  273.      * {@link #density(double)}. The default implementation simply computes the logarithm of
  274.      * {@code density(x)}.
  275.      *
  276.      * @param x the point at which the PDF is evaluated
  277.      * @return the logarithm of the value of the probability density function at point {@code x}
  278.      */
  279.     public double logDensity(double x) {
  280.         return FastMath.log(density(x));
  281.     }
  282. }