Sigmoid.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */

  17. package org.apache.commons.math3.analysis.function;

  18. import java.util.Arrays;

  19. import org.apache.commons.math3.analysis.FunctionUtils;
  20. import org.apache.commons.math3.analysis.UnivariateFunction;
  21. import org.apache.commons.math3.analysis.DifferentiableUnivariateFunction;
  22. import org.apache.commons.math3.analysis.ParametricUnivariateFunction;
  23. import org.apache.commons.math3.analysis.differentiation.DerivativeStructure;
  24. import org.apache.commons.math3.analysis.differentiation.UnivariateDifferentiableFunction;
  25. import org.apache.commons.math3.exception.NullArgumentException;
  26. import org.apache.commons.math3.exception.DimensionMismatchException;
  27. import org.apache.commons.math3.util.FastMath;

  28. /**
  29.  * <a href="http://en.wikipedia.org/wiki/Sigmoid_function">
  30.  *  Sigmoid</a> function.
  31.  * It is the inverse of the {@link Logit logit} function.
  32.  * A more flexible version, the generalised logistic, is implemented
  33.  * by the {@link Logistic} class.
  34.  *
  35.  * @since 3.0
  36.  */
  37. public class Sigmoid implements UnivariateDifferentiableFunction, DifferentiableUnivariateFunction {
  38.     /** Lower asymptote. */
  39.     private final double lo;
  40.     /** Higher asymptote. */
  41.     private final double hi;

  42.     /**
  43.      * Usual sigmoid function, where the lower asymptote is 0 and the higher
  44.      * asymptote is 1.
  45.      */
  46.     public Sigmoid() {
  47.         this(0, 1);
  48.     }

  49.     /**
  50.      * Sigmoid function.
  51.      *
  52.      * @param lo Lower asymptote.
  53.      * @param hi Higher asymptote.
  54.      */
  55.     public Sigmoid(double lo,
  56.                    double hi) {
  57.         this.lo = lo;
  58.         this.hi = hi;
  59.     }

  60.     /** {@inheritDoc}
  61.      * @deprecated as of 3.1, replaced by {@link #value(DerivativeStructure)}
  62.      */
  63.     @Deprecated
  64.     public UnivariateFunction derivative() {
  65.         return FunctionUtils.toDifferentiableUnivariateFunction(this).derivative();
  66.     }

  67.     /** {@inheritDoc} */
  68.     public double value(double x) {
  69.         return value(x, lo, hi);
  70.     }

  71.     /**
  72.      * Parametric function where the input array contains the parameters of
  73.      * the {@link Sigmoid#Sigmoid(double,double) sigmoid function}, ordered
  74.      * as follows:
  75.      * <ul>
  76.      *  <li>Lower asymptote</li>
  77.      *  <li>Higher asymptote</li>
  78.      * </ul>
  79.      */
  80.     public static class Parametric implements ParametricUnivariateFunction {
  81.         /**
  82.          * Computes the value of the sigmoid at {@code x}.
  83.          *
  84.          * @param x Value for which the function must be computed.
  85.          * @param param Values of lower asymptote and higher asymptote.
  86.          * @return the value of the function.
  87.          * @throws NullArgumentException if {@code param} is {@code null}.
  88.          * @throws DimensionMismatchException if the size of {@code param} is
  89.          * not 2.
  90.          */
  91.         public double value(double x, double ... param)
  92.             throws NullArgumentException,
  93.                    DimensionMismatchException {
  94.             validateParameters(param);
  95.             return Sigmoid.value(x, param[0], param[1]);
  96.         }

  97.         /**
  98.          * Computes the value of the gradient at {@code x}.
  99.          * The components of the gradient vector are the partial
  100.          * derivatives of the function with respect to each of the
  101.          * <em>parameters</em> (lower asymptote and higher asymptote).
  102.          *
  103.          * @param x Value at which the gradient must be computed.
  104.          * @param param Values for lower asymptote and higher asymptote.
  105.          * @return the gradient vector at {@code x}.
  106.          * @throws NullArgumentException if {@code param} is {@code null}.
  107.          * @throws DimensionMismatchException if the size of {@code param} is
  108.          * not 2.
  109.          */
  110.         public double[] gradient(double x, double ... param)
  111.             throws NullArgumentException,
  112.                    DimensionMismatchException {
  113.             validateParameters(param);

  114.             final double invExp1 = 1 / (1 + FastMath.exp(-x));

  115.             return new double[] { 1 - invExp1, invExp1 };
  116.         }

  117.         /**
  118.          * Validates parameters to ensure they are appropriate for the evaluation of
  119.          * the {@link #value(double,double[])} and {@link #gradient(double,double[])}
  120.          * methods.
  121.          *
  122.          * @param param Values for lower and higher asymptotes.
  123.          * @throws NullArgumentException if {@code param} is {@code null}.
  124.          * @throws DimensionMismatchException if the size of {@code param} is
  125.          * not 2.
  126.          */
  127.         private void validateParameters(double[] param)
  128.             throws NullArgumentException,
  129.                    DimensionMismatchException {
  130.             if (param == null) {
  131.                 throw new NullArgumentException();
  132.             }
  133.             if (param.length != 2) {
  134.                 throw new DimensionMismatchException(param.length, 2);
  135.             }
  136.         }
  137.     }

  138.     /**
  139.      * @param x Value at which to compute the sigmoid.
  140.      * @param lo Lower asymptote.
  141.      * @param hi Higher asymptote.
  142.      * @return the value of the sigmoid function at {@code x}.
  143.      */
  144.     private static double value(double x,
  145.                                 double lo,
  146.                                 double hi) {
  147.         return lo + (hi - lo) / (1 + FastMath.exp(-x));
  148.     }

  149.     /** {@inheritDoc}
  150.      * @since 3.1
  151.      */
  152.     public DerivativeStructure value(final DerivativeStructure t)
  153.         throws DimensionMismatchException {

  154.         double[] f = new double[t.getOrder() + 1];
  155.         final double exp = FastMath.exp(-t.getValue());
  156.         if (Double.isInfinite(exp)) {

  157.             // special handling near lower boundary, to avoid NaN
  158.             f[0] = lo;
  159.             Arrays.fill(f, 1, f.length, 0.0);

  160.         } else {

  161.             // the nth order derivative of sigmoid has the form:
  162.             // dn(sigmoid(x)/dxn = P_n(exp(-x)) / (1+exp(-x))^(n+1)
  163.             // where P_n(t) is a degree n polynomial with normalized higher term
  164.             // P_0(t) = 1, P_1(t) = t, P_2(t) = t^2 - t, P_3(t) = t^3 - 4 t^2 + t...
  165.             // the general recurrence relation for P_n is:
  166.             // P_n(x) = n t P_(n-1)(t) - t (1 + t) P_(n-1)'(t)
  167.             final double[] p = new double[f.length];

  168.             final double inv   = 1 / (1 + exp);
  169.             double coeff = hi - lo;
  170.             for (int n = 0; n < f.length; ++n) {

  171.                 // update and evaluate polynomial P_n(t)
  172.                 double v = 0;
  173.                 p[n] = 1;
  174.                 for (int k = n; k >= 0; --k) {
  175.                     v = v * exp + p[k];
  176.                     if (k > 1) {
  177.                         p[k - 1] = (n - k + 2) * p[k - 2] - (k - 1) * p[k - 1];
  178.                     } else {
  179.                         p[0] = 0;
  180.                     }
  181.                 }

  182.                 coeff *= inv;
  183.                 f[n]   = coeff * v;

  184.             }

  185.             // fix function value
  186.             f[0] += lo;

  187.         }

  188.         return t.compose(f);

  189.     }

  190. }