MillerUpdatingRegression.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */
  17. package org.apache.commons.math3.stat.regression;

  18. import java.util.Arrays;
  19. import org.apache.commons.math3.exception.util.LocalizedFormats;
  20. import org.apache.commons.math3.util.FastMath;
  21. import org.apache.commons.math3.util.Precision;
  22. import org.apache.commons.math3.util.MathArrays;

  23. /**
  24.  * This class is a concrete implementation of the {@link UpdatingMultipleLinearRegression} interface.
  25.  *
  26.  * <p>The algorithm is described in: <pre>
  27.  * Algorithm AS 274: Least Squares Routines to Supplement Those of Gentleman
  28.  * Author(s): Alan J. Miller
  29.  * Source: Journal of the Royal Statistical Society.
  30.  * Series C (Applied Statistics), Vol. 41, No. 2
  31.  * (1992), pp. 458-478
  32.  * Published by: Blackwell Publishing for the Royal Statistical Society
  33.  * Stable URL: http://www.jstor.org/stable/2347583 </pre></p>
  34.  *
  35.  * <p>This method for multiple regression forms the solution to the OLS problem
  36.  * by updating the QR decomposition as described by Gentleman.</p>
  37.  *
  38.  * @since 3.0
  39.  */
  40. public class MillerUpdatingRegression implements UpdatingMultipleLinearRegression {

  41.     /** number of variables in regression */
  42.     private final int nvars;
  43.     /** diagonals of cross products matrix */
  44.     private final double[] d;
  45.     /** the elements of the R`Y */
  46.     private final double[] rhs;
  47.     /** the off diagonal portion of the R matrix */
  48.     private final double[] r;
  49.     /** the tolerance for each of the variables */
  50.     private final double[] tol;
  51.     /** residual sum of squares for all nested regressions */
  52.     private final double[] rss;
  53.     /** order of the regressors */
  54.     private final int[] vorder;
  55.     /** scratch space for tolerance calc */
  56.     private final double[] work_tolset;
  57.     /** number of observations entered */
  58.     private long nobs = 0;
  59.     /** sum of squared errors of largest regression */
  60.     private double sserr = 0.0;
  61.     /** has rss been called? */
  62.     private boolean rss_set = false;
  63.     /** has the tolerance setting method been called */
  64.     private boolean tol_set = false;
  65.     /** flags for variables with linear dependency problems */
  66.     private final boolean[] lindep;
  67.     /** singular x values */
  68.     private final double[] x_sing;
  69.     /** workspace for singularity method */
  70.     private final double[] work_sing;
  71.     /** summation of Y variable */
  72.     private double sumy = 0.0;
  73.     /** summation of squared Y values */
  74.     private double sumsqy = 0.0;
  75.     /** boolean flag whether a regression constant is added */
  76.     private boolean hasIntercept;
  77.     /** zero tolerance */
  78.     private final double epsilon;
  79.     /**
  80.      *  Set the default constructor to private access
  81.      *  to prevent inadvertent instantiation
  82.      */
  83.     @SuppressWarnings("unused")
  84.     private MillerUpdatingRegression() {
  85.         this(-1, false, Double.NaN);
  86.     }

  87.     /**
  88.      * This is the augmented constructor for the MillerUpdatingRegression class.
  89.      *
  90.      * @param numberOfVariables number of regressors to expect, not including constant
  91.      * @param includeConstant include a constant automatically
  92.      * @param errorTolerance  zero tolerance, how machine zero is determined
  93.      * @throws ModelSpecificationException if {@code numberOfVariables is less than 1}
  94.      */
  95.     public MillerUpdatingRegression(int numberOfVariables, boolean includeConstant, double errorTolerance)
  96.     throws ModelSpecificationException {
  97.         if (numberOfVariables < 1) {
  98.             throw new ModelSpecificationException(LocalizedFormats.NO_REGRESSORS);
  99.         }
  100.         if (includeConstant) {
  101.             this.nvars = numberOfVariables + 1;
  102.         } else {
  103.             this.nvars = numberOfVariables;
  104.         }
  105.         this.hasIntercept = includeConstant;
  106.         this.nobs = 0;
  107.         this.d = new double[this.nvars];
  108.         this.rhs = new double[this.nvars];
  109.         this.r = new double[this.nvars * (this.nvars - 1) / 2];
  110.         this.tol = new double[this.nvars];
  111.         this.rss = new double[this.nvars];
  112.         this.vorder = new int[this.nvars];
  113.         this.x_sing = new double[this.nvars];
  114.         this.work_sing = new double[this.nvars];
  115.         this.work_tolset = new double[this.nvars];
  116.         this.lindep = new boolean[this.nvars];
  117.         for (int i = 0; i < this.nvars; i++) {
  118.             vorder[i] = i;
  119.         }
  120.         if (errorTolerance > 0) {
  121.             this.epsilon = errorTolerance;
  122.         } else {
  123.             this.epsilon = -errorTolerance;
  124.         }
  125.     }

  126.     /**
  127.      * Primary constructor for the MillerUpdatingRegression.
  128.      *
  129.      * @param numberOfVariables maximum number of potential regressors
  130.      * @param includeConstant include a constant automatically
  131.      * @throws ModelSpecificationException if {@code numberOfVariables is less than 1}
  132.      */
  133.     public MillerUpdatingRegression(int numberOfVariables, boolean includeConstant)
  134.     throws ModelSpecificationException {
  135.         this(numberOfVariables, includeConstant, Precision.EPSILON);
  136.     }

  137.     /**
  138.      * A getter method which determines whether a constant is included.
  139.      * @return true regression has an intercept, false no intercept
  140.      */
  141.     public boolean hasIntercept() {
  142.         return this.hasIntercept;
  143.     }

  144.     /**
  145.      * Gets the number of observations added to the regression model.
  146.      * @return number of observations
  147.      */
  148.     public long getN() {
  149.         return this.nobs;
  150.     }

  151.     /**
  152.      * Adds an observation to the regression model.
  153.      * @param x the array with regressor values
  154.      * @param y  the value of dependent variable given these regressors
  155.      * @exception ModelSpecificationException if the length of {@code x} does not equal
  156.      * the number of independent variables in the model
  157.      */
  158.     public void addObservation(final double[] x, final double y)
  159.     throws ModelSpecificationException {

  160.         if ((!this.hasIntercept && x.length != nvars) ||
  161.                (this.hasIntercept && x.length + 1 != nvars)) {
  162.             throw new ModelSpecificationException(LocalizedFormats.INVALID_REGRESSION_OBSERVATION,
  163.                     x.length, nvars);
  164.         }
  165.         if (!this.hasIntercept) {
  166.             include(MathArrays.copyOf(x, x.length), 1.0, y);
  167.         } else {
  168.             final double[] tmp = new double[x.length + 1];
  169.             System.arraycopy(x, 0, tmp, 1, x.length);
  170.             tmp[0] = 1.0;
  171.             include(tmp, 1.0, y);
  172.         }
  173.         ++nobs;

  174.     }

  175.     /**
  176.      * Adds multiple observations to the model.
  177.      * @param x observations on the regressors
  178.      * @param y observations on the regressand
  179.      * @throws ModelSpecificationException if {@code x} is not rectangular, does not match
  180.      * the length of {@code y} or does not contain sufficient data to estimate the model
  181.      */
  182.     public void addObservations(double[][] x, double[] y) throws ModelSpecificationException {
  183.         if ((x == null) || (y == null) || (x.length != y.length)) {
  184.             throw new ModelSpecificationException(
  185.                   LocalizedFormats.DIMENSIONS_MISMATCH_SIMPLE,
  186.                   (x == null) ? 0 : x.length,
  187.                   (y == null) ? 0 : y.length);
  188.         }
  189.         if (x.length == 0) {  // Must be no y data either
  190.             throw new ModelSpecificationException(
  191.                     LocalizedFormats.NO_DATA);
  192.         }
  193.         if (x[0].length + 1 > x.length) {
  194.             throw new ModelSpecificationException(
  195.                   LocalizedFormats.NOT_ENOUGH_DATA_FOR_NUMBER_OF_PREDICTORS,
  196.                   x.length, x[0].length);
  197.         }
  198.         for (int i = 0; i < x.length; i++) {
  199.             addObservation(x[i], y[i]);
  200.         }
  201.     }

  202.     /**
  203.      * The include method is where the QR decomposition occurs. This statement forms all
  204.      * intermediate data which will be used for all derivative measures.
  205.      * According to the miller paper, note that in the original implementation the x vector
  206.      * is overwritten. In this implementation, the include method is passed a copy of the
  207.      * original data vector so that there is no contamination of the data. Additionally,
  208.      * this method differs slightly from Gentleman's method, in that the assumption is
  209.      * of dense design matrices, there is some advantage in using the original gentleman algorithm
  210.      * on sparse matrices.
  211.      *
  212.      * @param x observations on the regressors
  213.      * @param wi weight of the this observation (-1,1)
  214.      * @param yi observation on the regressand
  215.      */
  216.     private void include(final double[] x, final double wi, final double yi) {
  217.         int nextr = 0;
  218.         double w = wi;
  219.         double y = yi;
  220.         double xi;
  221.         double di;
  222.         double wxi;
  223.         double dpi;
  224.         double xk;
  225.         double _w;
  226.         this.rss_set = false;
  227.         sumy = smartAdd(yi, sumy);
  228.         sumsqy = smartAdd(sumsqy, yi * yi);
  229.         for (int i = 0; i < x.length; i++) {
  230.             if (w == 0.0) {
  231.                 return;
  232.             }
  233.             xi = x[i];

  234.             if (xi == 0.0) {
  235.                 nextr += nvars - i - 1;
  236.                 continue;
  237.             }
  238.             di = d[i];
  239.             wxi = w * xi;
  240.             _w = w;
  241.             if (di != 0.0) {
  242.                 dpi = smartAdd(di, wxi * xi);
  243.                 final double tmp = wxi * xi / di;
  244.                 if (FastMath.abs(tmp) > Precision.EPSILON) {
  245.                     w = (di * w) / dpi;
  246.                 }
  247.             } else {
  248.                 dpi = wxi * xi;
  249.                 w = 0.0;
  250.             }
  251.             d[i] = dpi;
  252.             for (int k = i + 1; k < nvars; k++) {
  253.                 xk = x[k];
  254.                 x[k] = smartAdd(xk, -xi * r[nextr]);
  255.                 if (di != 0.0) {
  256.                     r[nextr] = smartAdd(di * r[nextr], (_w * xi) * xk) / dpi;
  257.                 } else {
  258.                     r[nextr] = xk / xi;
  259.                 }
  260.                 ++nextr;
  261.             }
  262.             xk = y;
  263.             y = smartAdd(xk, -xi * rhs[i]);
  264.             if (di != 0.0) {
  265.                 rhs[i] = smartAdd(di * rhs[i], wxi * xk) / dpi;
  266.             } else {
  267.                 rhs[i] = xk / xi;
  268.             }
  269.         }
  270.         sserr = smartAdd(sserr, w * y * y);
  271.     }

  272.     /**
  273.      * Adds to number a and b such that the contamination due to
  274.      * numerical smallness of one addend does not corrupt the sum.
  275.      * @param a - an addend
  276.      * @param b - an addend
  277.      * @return the sum of the a and b
  278.      */
  279.     private double smartAdd(double a, double b) {
  280.         final double _a = FastMath.abs(a);
  281.         final double _b = FastMath.abs(b);
  282.         if (_a > _b) {
  283.             final double eps = _a * Precision.EPSILON;
  284.             if (_b > eps) {
  285.                 return a + b;
  286.             }
  287.             return a;
  288.         } else {
  289.             final double eps = _b * Precision.EPSILON;
  290.             if (_a > eps) {
  291.                 return a + b;
  292.             }
  293.             return b;
  294.         }
  295.     }

  296.     /**
  297.      * As the name suggests,  clear wipes the internals and reorders everything in the
  298.      * canonical order.
  299.      */
  300.     public void clear() {
  301.         Arrays.fill(this.d, 0.0);
  302.         Arrays.fill(this.rhs, 0.0);
  303.         Arrays.fill(this.r, 0.0);
  304.         Arrays.fill(this.tol, 0.0);
  305.         Arrays.fill(this.rss, 0.0);
  306.         Arrays.fill(this.work_tolset, 0.0);
  307.         Arrays.fill(this.work_sing, 0.0);
  308.         Arrays.fill(this.x_sing, 0.0);
  309.         Arrays.fill(this.lindep, false);
  310.         for (int i = 0; i < nvars; i++) {
  311.             this.vorder[i] = i;
  312.         }
  313.         this.nobs = 0;
  314.         this.sserr = 0.0;
  315.         this.sumy = 0.0;
  316.         this.sumsqy = 0.0;
  317.         this.rss_set = false;
  318.         this.tol_set = false;
  319.     }

  320.     /**
  321.      * This sets up tolerances for singularity testing.
  322.      */
  323.     private void tolset() {
  324.         int pos;
  325.         double total;
  326.         final double eps = this.epsilon;
  327.         for (int i = 0; i < nvars; i++) {
  328.             this.work_tolset[i] = FastMath.sqrt(d[i]);
  329.         }
  330.         tol[0] = eps * this.work_tolset[0];
  331.         for (int col = 1; col < nvars; col++) {
  332.             pos = col - 1;
  333.             total = work_tolset[col];
  334.             for (int row = 0; row < col; row++) {
  335.                 total += FastMath.abs(r[pos]) * work_tolset[row];
  336.                 pos += nvars - row - 2;
  337.             }
  338.             tol[col] = eps * total;
  339.         }
  340.         tol_set = true;
  341.     }

  342.     /**
  343.      * The regcf method conducts the linear regression and extracts the
  344.      * parameter vector. Notice that the algorithm can do subset regression
  345.      * with no alteration.
  346.      *
  347.      * @param nreq how many of the regressors to include (either in canonical
  348.      * order, or in the current reordered state)
  349.      * @return an array with the estimated slope coefficients
  350.      * @throws ModelSpecificationException if {@code nreq} is less than 1
  351.      * or greater than the number of independent variables
  352.      */
  353.     private double[] regcf(int nreq) throws ModelSpecificationException {
  354.         int nextr;
  355.         if (nreq < 1) {
  356.             throw new ModelSpecificationException(LocalizedFormats.NO_REGRESSORS);
  357.         }
  358.         if (nreq > this.nvars) {
  359.             throw new ModelSpecificationException(
  360.                     LocalizedFormats.TOO_MANY_REGRESSORS, nreq, this.nvars);
  361.         }
  362.         if (!this.tol_set) {
  363.             tolset();
  364.         }
  365.         final double[] ret = new double[nreq];
  366.         boolean rankProblem = false;
  367.         for (int i = nreq - 1; i > -1; i--) {
  368.             if (FastMath.sqrt(d[i]) < tol[i]) {
  369.                 ret[i] = 0.0;
  370.                 d[i] = 0.0;
  371.                 rankProblem = true;
  372.             } else {
  373.                 ret[i] = rhs[i];
  374.                 nextr = i * (nvars + nvars - i - 1) / 2;
  375.                 for (int j = i + 1; j < nreq; j++) {
  376.                     ret[i] = smartAdd(ret[i], -r[nextr] * ret[j]);
  377.                     ++nextr;
  378.                 }
  379.             }
  380.         }
  381.         if (rankProblem) {
  382.             for (int i = 0; i < nreq; i++) {
  383.                 if (this.lindep[i]) {
  384.                     ret[i] = Double.NaN;
  385.                 }
  386.             }
  387.         }
  388.         return ret;
  389.     }

  390.     /**
  391.      * The method which checks for singularities and then eliminates the offending
  392.      * columns.
  393.      */
  394.     private void singcheck() {
  395.         int pos;
  396.         for (int i = 0; i < nvars; i++) {
  397.             work_sing[i] = FastMath.sqrt(d[i]);
  398.         }
  399.         for (int col = 0; col < nvars; col++) {
  400.             // Set elements within R to zero if they are less than tol(col) in
  401.             // absolute value after being scaled by the square root of their row
  402.             // multiplier
  403.             final double temp = tol[col];
  404.             pos = col - 1;
  405.             for (int row = 0; row < col - 1; row++) {
  406.                 if (FastMath.abs(r[pos]) * work_sing[row] < temp) {
  407.                     r[pos] = 0.0;
  408.                 }
  409.                 pos += nvars - row - 2;
  410.             }
  411.             // If diagonal element is near zero, set it to zero, set appropriate
  412.             // element of LINDEP, and use INCLUD to augment the projections in
  413.             // the lower rows of the orthogonalization.
  414.             lindep[col] = false;
  415.             if (work_sing[col] < temp) {
  416.                 lindep[col] = true;
  417.                 if (col < nvars - 1) {
  418.                     Arrays.fill(x_sing, 0.0);
  419.                     int _pi = col * (nvars + nvars - col - 1) / 2;
  420.                     for (int _xi = col + 1; _xi < nvars; _xi++, _pi++) {
  421.                         x_sing[_xi] = r[_pi];
  422.                         r[_pi] = 0.0;
  423.                     }
  424.                     final double y = rhs[col];
  425.                     final double weight = d[col];
  426.                     d[col] = 0.0;
  427.                     rhs[col] = 0.0;
  428.                     this.include(x_sing, weight, y);
  429.                 } else {
  430.                     sserr += d[col] * rhs[col] * rhs[col];
  431.                 }
  432.             }
  433.         }
  434.     }

  435.     /**
  436.      * Calculates the sum of squared errors for the full regression
  437.      * and all subsets in the following manner: <pre>
  438.      * rss[] ={
  439.      * ResidualSumOfSquares_allNvars,
  440.      * ResidualSumOfSquares_FirstNvars-1,
  441.      * ResidualSumOfSquares_FirstNvars-2,
  442.      * ..., ResidualSumOfSquares_FirstVariable} </pre>
  443.      */
  444.     private void ss() {
  445.         double total = sserr;
  446.         rss[nvars - 1] = sserr;
  447.         for (int i = nvars - 1; i > 0; i--) {
  448.             total += d[i] * rhs[i] * rhs[i];
  449.             rss[i - 1] = total;
  450.         }
  451.         rss_set = true;
  452.     }

  453.     /**
  454.      * Calculates the cov matrix assuming only the first nreq variables are
  455.      * included in the calculation. The returned array contains a symmetric
  456.      * matrix stored in lower triangular form. The matrix will have
  457.      * ( nreq + 1 ) * nreq / 2 elements. For illustration <pre>
  458.      * cov =
  459.      * {
  460.      *  cov_00,
  461.      *  cov_10, cov_11,
  462.      *  cov_20, cov_21, cov22,
  463.      *  ...
  464.      * } </pre>
  465.      *
  466.      * @param nreq how many of the regressors to include (either in canonical
  467.      * order, or in the current reordered state)
  468.      * @return an array with the variance covariance of the included
  469.      * regressors in lower triangular form
  470.      */
  471.     private double[] cov(int nreq) {
  472.         if (this.nobs <= nreq) {
  473.             return null;
  474.         }
  475.         double rnk = 0.0;
  476.         for (int i = 0; i < nreq; i++) {
  477.             if (!this.lindep[i]) {
  478.                 rnk += 1.0;
  479.             }
  480.         }
  481.         final double var = rss[nreq - 1] / (nobs - rnk);
  482.         final double[] rinv = new double[nreq * (nreq - 1) / 2];
  483.         inverse(rinv, nreq);
  484.         final double[] covmat = new double[nreq * (nreq + 1) / 2];
  485.         Arrays.fill(covmat, Double.NaN);
  486.         int pos2;
  487.         int pos1;
  488.         int start = 0;
  489.         double total = 0;
  490.         for (int row = 0; row < nreq; row++) {
  491.             pos2 = start;
  492.             if (!this.lindep[row]) {
  493.                 for (int col = row; col < nreq; col++) {
  494.                     if (!this.lindep[col]) {
  495.                         pos1 = start + col - row;
  496.                         if (row == col) {
  497.                             total = 1.0 / d[col];
  498.                         } else {
  499.                             total = rinv[pos1 - 1] / d[col];
  500.                         }
  501.                         for (int k = col + 1; k < nreq; k++) {
  502.                             if (!this.lindep[k]) {
  503.                                 total += rinv[pos1] * rinv[pos2] / d[k];
  504.                             }
  505.                             ++pos1;
  506.                             ++pos2;
  507.                         }
  508.                         covmat[ (col + 1) * col / 2 + row] = total * var;
  509.                     } else {
  510.                         pos2 += nreq - col - 1;
  511.                     }
  512.                 }
  513.             }
  514.             start += nreq - row - 1;
  515.         }
  516.         return covmat;
  517.     }

  518.     /**
  519.      * This internal method calculates the inverse of the upper-triangular portion
  520.      * of the R matrix.
  521.      * @param rinv  the storage for the inverse of r
  522.      * @param nreq how many of the regressors to include (either in canonical
  523.      * order, or in the current reordered state)
  524.      */
  525.     private void inverse(double[] rinv, int nreq) {
  526.         int pos = nreq * (nreq - 1) / 2 - 1;
  527.         int pos1 = -1;
  528.         int pos2 = -1;
  529.         double total = 0.0;
  530.         Arrays.fill(rinv, Double.NaN);
  531.         for (int row = nreq - 1; row > 0; --row) {
  532.             if (!this.lindep[row]) {
  533.                 final int start = (row - 1) * (nvars + nvars - row) / 2;
  534.                 for (int col = nreq; col > row; --col) {
  535.                     pos1 = start;
  536.                     pos2 = pos;
  537.                     total = 0.0;
  538.                     for (int k = row; k < col - 1; k++) {
  539.                         pos2 += nreq - k - 1;
  540.                         if (!this.lindep[k]) {
  541.                             total += -r[pos1] * rinv[pos2];
  542.                         }
  543.                         ++pos1;
  544.                     }
  545.                     rinv[pos] = total - r[pos1];
  546.                     --pos;
  547.                 }
  548.             } else {
  549.                 pos -= nreq - row;
  550.             }
  551.         }
  552.     }

  553.     /**
  554.      * In the original algorithm only the partial correlations of the regressors
  555.      * is returned to the user. In this implementation, we have <pre>
  556.      * corr =
  557.      * {
  558.      *   corrxx - lower triangular
  559.      *   corrxy - bottom row of the matrix
  560.      * }
  561.      * Replaces subroutines PCORR and COR of:
  562.      * ALGORITHM AS274  APPL. STATIST. (1992) VOL.41, NO. 2 </pre>
  563.      *
  564.      * <p>Calculate partial correlations after the variables in rows
  565.      * 1, 2, ..., IN have been forced into the regression.
  566.      * If IN = 1, and the first row of R represents a constant in the
  567.      * model, then the usual simple correlations are returned.</p>
  568.      *
  569.      * <p>If IN = 0, the value returned in array CORMAT for the correlation
  570.      * of variables Xi & Xj is: <pre>
  571.      * sum ( Xi.Xj ) / Sqrt ( sum (Xi^2) . sum (Xj^2) )</pre></p>
  572.      *
  573.      * <p>On return, array CORMAT contains the upper triangle of the matrix of
  574.      * partial correlations stored by rows, excluding the 1's on the diagonal.
  575.      * e.g. if IN = 2, the consecutive elements returned are:
  576.      * (3,4) (3,5) ... (3,ncol), (4,5) (4,6) ... (4,ncol), etc.
  577.      * Array YCORR stores the partial correlations with the Y-variable
  578.      * starting with YCORR(IN+1) = partial correlation with the variable in
  579.      * position (IN+1). </p>
  580.      *
  581.      * @param in how many of the regressors to include (either in canonical
  582.      * order, or in the current reordered state)
  583.      * @return an array with the partial correlations of the remainder of
  584.      * regressors with each other and the regressand, in lower triangular form
  585.      */
  586.     public double[] getPartialCorrelations(int in) {
  587.         final double[] output = new double[(nvars - in + 1) * (nvars - in) / 2];
  588.         int pos;
  589.         int pos1;
  590.         int pos2;
  591.         final int rms_off = -in;
  592.         final int wrk_off = -(in + 1);
  593.         final double[] rms = new double[nvars - in];
  594.         final double[] work = new double[nvars - in - 1];
  595.         double sumxx;
  596.         double sumxy;
  597.         double sumyy;
  598.         final int offXX = (nvars - in) * (nvars - in - 1) / 2;
  599.         if (in < -1 || in >= nvars) {
  600.             return null;
  601.         }
  602.         final int nvm = nvars - 1;
  603.         final int base_pos = r.length - (nvm - in) * (nvm - in + 1) / 2;
  604.         if (d[in] > 0.0) {
  605.             rms[in + rms_off] = 1.0 / FastMath.sqrt(d[in]);
  606.         }
  607.         for (int col = in + 1; col < nvars; col++) {
  608.             pos = base_pos + col - 1 - in;
  609.             sumxx = d[col];
  610.             for (int row = in; row < col; row++) {
  611.                 sumxx += d[row] * r[pos] * r[pos];
  612.                 pos += nvars - row - 2;
  613.             }
  614.             if (sumxx > 0.0) {
  615.                 rms[col + rms_off] = 1.0 / FastMath.sqrt(sumxx);
  616.             } else {
  617.                 rms[col + rms_off] = 0.0;
  618.             }
  619.         }
  620.         sumyy = sserr;
  621.         for (int row = in; row < nvars; row++) {
  622.             sumyy += d[row] * rhs[row] * rhs[row];
  623.         }
  624.         if (sumyy > 0.0) {
  625.             sumyy = 1.0 / FastMath.sqrt(sumyy);
  626.         }
  627.         pos = 0;
  628.         for (int col1 = in; col1 < nvars; col1++) {
  629.             sumxy = 0.0;
  630.             Arrays.fill(work, 0.0);
  631.             pos1 = base_pos + col1 - in - 1;
  632.             for (int row = in; row < col1; row++) {
  633.                 pos2 = pos1 + 1;
  634.                 for (int col2 = col1 + 1; col2 < nvars; col2++) {
  635.                     work[col2 + wrk_off] += d[row] * r[pos1] * r[pos2];
  636.                     pos2++;
  637.                 }
  638.                 sumxy += d[row] * r[pos1] * rhs[row];
  639.                 pos1 += nvars - row - 2;
  640.             }
  641.             pos2 = pos1 + 1;
  642.             for (int col2 = col1 + 1; col2 < nvars; col2++) {
  643.                 work[col2 + wrk_off] += d[col1] * r[pos2];
  644.                 ++pos2;
  645.                 output[ (col2 - 1 - in) * (col2 - in) / 2 + col1 - in] =
  646.                         work[col2 + wrk_off] * rms[col1 + rms_off] * rms[col2 + rms_off];
  647.                 ++pos;
  648.             }
  649.             sumxy += d[col1] * rhs[col1];
  650.             output[col1 + rms_off + offXX] = sumxy * rms[col1 + rms_off] * sumyy;
  651.         }

  652.         return output;
  653.     }

  654.     /**
  655.      * ALGORITHM AS274 APPL. STATIST. (1992) VOL.41, NO. 2.
  656.      * Move variable from position FROM to position TO in an
  657.      * orthogonal reduction produced by AS75.1.
  658.      *
  659.      * @param from initial position
  660.      * @param to destination
  661.      */
  662.     private void vmove(int from, int to) {
  663.         double d1;
  664.         double d2;
  665.         double X;
  666.         double d1new;
  667.         double d2new;
  668.         double cbar;
  669.         double sbar;
  670.         double Y;
  671.         int first;
  672.         int inc;
  673.         int m1;
  674.         int m2;
  675.         int mp1;
  676.         int pos;
  677.         boolean bSkipTo40 = false;
  678.         if (from == to) {
  679.             return;
  680.         }
  681.         if (!this.rss_set) {
  682.             ss();
  683.         }
  684.         int count = 0;
  685.         if (from < to) {
  686.             first = from;
  687.             inc = 1;
  688.             count = to - from;
  689.         } else {
  690.             first = from - 1;
  691.             inc = -1;
  692.             count = from - to;
  693.         }

  694.         int m = first;
  695.         int idx = 0;
  696.         while (idx < count) {
  697.             m1 = m * (nvars + nvars - m - 1) / 2;
  698.             m2 = m1 + nvars - m - 1;
  699.             mp1 = m + 1;

  700.             d1 = d[m];
  701.             d2 = d[mp1];
  702.             // Special cases.
  703.             if (d1 > this.epsilon || d2 > this.epsilon) {
  704.                 X = r[m1];
  705.                 if (FastMath.abs(X) * FastMath.sqrt(d1) < tol[mp1]) {
  706.                     X = 0.0;
  707.                 }
  708.                 if (d1 < this.epsilon || FastMath.abs(X) < this.epsilon) {
  709.                     d[m] = d2;
  710.                     d[mp1] = d1;
  711.                     r[m1] = 0.0;
  712.                     for (int col = m + 2; col < nvars; col++) {
  713.                         ++m1;
  714.                         X = r[m1];
  715.                         r[m1] = r[m2];
  716.                         r[m2] = X;
  717.                         ++m2;
  718.                     }
  719.                     X = rhs[m];
  720.                     rhs[m] = rhs[mp1];
  721.                     rhs[mp1] = X;
  722.                     bSkipTo40 = true;
  723.                     //break;
  724.                 } else if (d2 < this.epsilon) {
  725.                     d[m] = d1 * X * X;
  726.                     r[m1] = 1.0 / X;
  727.                     for (int _i = m1 + 1; _i < m1 + nvars - m - 1; _i++) {
  728.                         r[_i] /= X;
  729.                     }
  730.                     rhs[m] /= X;
  731.                     bSkipTo40 = true;
  732.                     //break;
  733.                 }
  734.                 if (!bSkipTo40) {
  735.                     d1new = d2 + d1 * X * X;
  736.                     cbar = d2 / d1new;
  737.                     sbar = X * d1 / d1new;
  738.                     d2new = d1 * cbar;
  739.                     d[m] = d1new;
  740.                     d[mp1] = d2new;
  741.                     r[m1] = sbar;
  742.                     for (int col = m + 2; col < nvars; col++) {
  743.                         ++m1;
  744.                         Y = r[m1];
  745.                         r[m1] = cbar * r[m2] + sbar * Y;
  746.                         r[m2] = Y - X * r[m2];
  747.                         ++m2;
  748.                     }
  749.                     Y = rhs[m];
  750.                     rhs[m] = cbar * rhs[mp1] + sbar * Y;
  751.                     rhs[mp1] = Y - X * rhs[mp1];
  752.                 }
  753.             }
  754.             if (m > 0) {
  755.                 pos = m;
  756.                 for (int row = 0; row < m; row++) {
  757.                     X = r[pos];
  758.                     r[pos] = r[pos - 1];
  759.                     r[pos - 1] = X;
  760.                     pos += nvars - row - 2;
  761.                 }
  762.             }
  763.             // Adjust variable order (VORDER), the tolerances (TOL) and
  764.             // the vector of residual sums of squares (RSS).
  765.             m1 = vorder[m];
  766.             vorder[m] = vorder[mp1];
  767.             vorder[mp1] = m1;
  768.             X = tol[m];
  769.             tol[m] = tol[mp1];
  770.             tol[mp1] = X;
  771.             rss[m] = rss[mp1] + d[mp1] * rhs[mp1] * rhs[mp1];

  772.             m += inc;
  773.             ++idx;
  774.         }
  775.     }

  776.     /**
  777.      * ALGORITHM AS274  APPL. STATIST. (1992) VOL.41, NO. 2
  778.      *
  779.      * <p> Re-order the variables in an orthogonal reduction produced by
  780.      * AS75.1 so that the N variables in LIST start at position POS1,
  781.      * though will not necessarily be in the same order as in LIST.
  782.      * Any variables in VORDER before position POS1 are not moved.
  783.      * Auxiliary routine called: VMOVE. </p>
  784.      *
  785.      * <p>This internal method reorders the regressors.</p>
  786.      *
  787.      * @param list the regressors to move
  788.      * @param pos1 where the list will be placed
  789.      * @return -1 error, 0 everything ok
  790.      */
  791.     private int reorderRegressors(int[] list, int pos1) {
  792.         int next;
  793.         int i;
  794.         int l;
  795.         if (list.length < 1 || list.length > nvars + 1 - pos1) {
  796.             return -1;
  797.         }
  798.         next = pos1;
  799.         i = pos1;
  800.         while (i < nvars) {
  801.             l = vorder[i];
  802.             for (int j = 0; j < list.length; j++) {
  803.                 if (l == list[j] && i > next) {
  804.                     this.vmove(i, next);
  805.                     ++next;
  806.                     if (next >= list.length + pos1) {
  807.                         return 0;
  808.                     } else {
  809.                         break;
  810.                     }
  811.                 }
  812.             }
  813.             ++i;
  814.         }
  815.         return 0;
  816.     }

  817.     /**
  818.      * Gets the diagonal of the Hat matrix also known as the leverage matrix.
  819.      *
  820.      * @param  row_data returns the diagonal of the hat matrix for this observation
  821.      * @return the diagonal element of the hatmatrix
  822.      */
  823.     public double getDiagonalOfHatMatrix(double[] row_data) {
  824.         double[] wk = new double[this.nvars];
  825.         int pos;
  826.         double total;

  827.         if (row_data.length > nvars) {
  828.             return Double.NaN;
  829.         }
  830.         double[] xrow;
  831.         if (this.hasIntercept) {
  832.             xrow = new double[row_data.length + 1];
  833.             xrow[0] = 1.0;
  834.             System.arraycopy(row_data, 0, xrow, 1, row_data.length);
  835.         } else {
  836.             xrow = row_data;
  837.         }
  838.         double hii = 0.0;
  839.         for (int col = 0; col < xrow.length; col++) {
  840.             if (FastMath.sqrt(d[col]) < tol[col]) {
  841.                 wk[col] = 0.0;
  842.             } else {
  843.                 pos = col - 1;
  844.                 total = xrow[col];
  845.                 for (int row = 0; row < col; row++) {
  846.                     total = smartAdd(total, -wk[row] * r[pos]);
  847.                     pos += nvars - row - 2;
  848.                 }
  849.                 wk[col] = total;
  850.                 hii = smartAdd(hii, (total * total) / d[col]);
  851.             }
  852.         }
  853.         return hii;
  854.     }

  855.     /**
  856.      * Gets the order of the regressors, useful if some type of reordering
  857.      * has been called. Calling regress with int[]{} args will trigger
  858.      * a reordering.
  859.      *
  860.      * @return int[] with the current order of the regressors
  861.      */
  862.     public int[] getOrderOfRegressors(){
  863.         return MathArrays.copyOf(vorder);
  864.     }

  865.     /**
  866.      * Conducts a regression on the data in the model, using all regressors.
  867.      *
  868.      * @return RegressionResults the structure holding all regression results
  869.      * @exception  ModelSpecificationException - thrown if number of observations is
  870.      * less than the number of variables
  871.      */
  872.     public RegressionResults regress() throws ModelSpecificationException {
  873.         return regress(this.nvars);
  874.     }

  875.     /**
  876.      * Conducts a regression on the data in the model, using a subset of regressors.
  877.      *
  878.      * @param numberOfRegressors many of the regressors to include (either in canonical
  879.      * order, or in the current reordered state)
  880.      * @return RegressionResults the structure holding all regression results
  881.      * @exception  ModelSpecificationException - thrown if number of observations is
  882.      * less than the number of variables or number of regressors requested
  883.      * is greater than the regressors in the model
  884.      */
  885.     public RegressionResults regress(int numberOfRegressors) throws ModelSpecificationException {
  886.         if (this.nobs <= numberOfRegressors) {
  887.            throw new ModelSpecificationException(
  888.                    LocalizedFormats.NOT_ENOUGH_DATA_FOR_NUMBER_OF_PREDICTORS,
  889.                    this.nobs, numberOfRegressors);
  890.         }
  891.         if( numberOfRegressors > this.nvars ){
  892.             throw new ModelSpecificationException(
  893.                     LocalizedFormats.TOO_MANY_REGRESSORS, numberOfRegressors, this.nvars);
  894.         }

  895.         tolset();
  896.         singcheck();

  897.         double[] beta = this.regcf(numberOfRegressors);

  898.         ss();

  899.         double[] cov = this.cov(numberOfRegressors);

  900.         int rnk = 0;
  901.         for (int i = 0; i < this.lindep.length; i++) {
  902.             if (!this.lindep[i]) {
  903.                 ++rnk;
  904.             }
  905.         }

  906.         boolean needsReorder = false;
  907.         for (int i = 0; i < numberOfRegressors; i++) {
  908.             if (this.vorder[i] != i) {
  909.                 needsReorder = true;
  910.                 break;
  911.             }
  912.         }
  913.         if (!needsReorder) {
  914.             return new RegressionResults(
  915.                     beta, new double[][]{cov}, true, this.nobs, rnk,
  916.                     this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false);
  917.         } else {
  918.             double[] betaNew = new double[beta.length];
  919.             double[] covNew = new double[cov.length];

  920.             int[] newIndices = new int[beta.length];
  921.             for (int i = 0; i < nvars; i++) {
  922.                 for (int j = 0; j < numberOfRegressors; j++) {
  923.                     if (this.vorder[j] == i) {
  924.                         betaNew[i] = beta[ j];
  925.                         newIndices[i] = j;
  926.                     }
  927.                 }
  928.             }

  929.             int idx1 = 0;
  930.             int idx2;
  931.             int _i;
  932.             int _j;
  933.             for (int i = 0; i < beta.length; i++) {
  934.                 _i = newIndices[i];
  935.                 for (int j = 0; j <= i; j++, idx1++) {
  936.                     _j = newIndices[j];
  937.                     if (_i > _j) {
  938.                         idx2 = _i * (_i + 1) / 2 + _j;
  939.                     } else {
  940.                         idx2 = _j * (_j + 1) / 2 + _i;
  941.                     }
  942.                     covNew[idx1] = cov[idx2];
  943.                 }
  944.             }
  945.             return new RegressionResults(
  946.                     betaNew, new double[][]{covNew}, true, this.nobs, rnk,
  947.                     this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false);
  948.         }
  949.     }

  950.     /**
  951.      * Conducts a regression on the data in the model, using regressors in array
  952.      * Calling this method will change the internal order of the regressors
  953.      * and care is required in interpreting the hatmatrix.
  954.      *
  955.      * @param  variablesToInclude array of variables to include in regression
  956.      * @return RegressionResults the structure holding all regression results
  957.      * @exception  ModelSpecificationException - thrown if number of observations is
  958.      * less than the number of variables, the number of regressors requested
  959.      * is greater than the regressors in the model or a regressor index in
  960.      * regressor array does not exist
  961.      */
  962.     public RegressionResults regress(int[] variablesToInclude) throws ModelSpecificationException {
  963.         if (variablesToInclude.length > this.nvars) {
  964.             throw new ModelSpecificationException(
  965.                     LocalizedFormats.TOO_MANY_REGRESSORS, variablesToInclude.length, this.nvars);
  966.         }
  967.         if (this.nobs <= this.nvars) {
  968.             throw new ModelSpecificationException(
  969.                     LocalizedFormats.NOT_ENOUGH_DATA_FOR_NUMBER_OF_PREDICTORS,
  970.                     this.nobs, this.nvars);
  971.         }
  972.         Arrays.sort(variablesToInclude);
  973.         int iExclude = 0;
  974.         for (int i = 0; i < variablesToInclude.length; i++) {
  975.             if (i >= this.nvars) {
  976.                 throw new ModelSpecificationException(
  977.                         LocalizedFormats.INDEX_LARGER_THAN_MAX, i, this.nvars);
  978.             }
  979.             if (i > 0 && variablesToInclude[i] == variablesToInclude[i - 1]) {
  980.                 variablesToInclude[i] = -1;
  981.                 ++iExclude;
  982.             }
  983.         }
  984.         int[] series;
  985.         if (iExclude > 0) {
  986.             int j = 0;
  987.             series = new int[variablesToInclude.length - iExclude];
  988.             for (int i = 0; i < variablesToInclude.length; i++) {
  989.                 if (variablesToInclude[i] > -1) {
  990.                     series[j] = variablesToInclude[i];
  991.                     ++j;
  992.                 }
  993.             }
  994.         } else {
  995.             series = variablesToInclude;
  996.         }

  997.         reorderRegressors(series, 0);
  998.         tolset();
  999.         singcheck();

  1000.         double[] beta = this.regcf(series.length);

  1001.         ss();

  1002.         double[] cov = this.cov(series.length);

  1003.         int rnk = 0;
  1004.         for (int i = 0; i < this.lindep.length; i++) {
  1005.             if (!this.lindep[i]) {
  1006.                 ++rnk;
  1007.             }
  1008.         }

  1009.         boolean needsReorder = false;
  1010.         for (int i = 0; i < this.nvars; i++) {
  1011.             if (this.vorder[i] != series[i]) {
  1012.                 needsReorder = true;
  1013.                 break;
  1014.             }
  1015.         }
  1016.         if (!needsReorder) {
  1017.             return new RegressionResults(
  1018.                     beta, new double[][]{cov}, true, this.nobs, rnk,
  1019.                     this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false);
  1020.         } else {
  1021.             double[] betaNew = new double[beta.length];
  1022.             int[] newIndices = new int[beta.length];
  1023.             for (int i = 0; i < series.length; i++) {
  1024.                 for (int j = 0; j < this.vorder.length; j++) {
  1025.                     if (this.vorder[j] == series[i]) {
  1026.                         betaNew[i] = beta[ j];
  1027.                         newIndices[i] = j;
  1028.                     }
  1029.                 }
  1030.             }
  1031.             double[] covNew = new double[cov.length];
  1032.             int idx1 = 0;
  1033.             int idx2;
  1034.             int _i;
  1035.             int _j;
  1036.             for (int i = 0; i < beta.length; i++) {
  1037.                 _i = newIndices[i];
  1038.                 for (int j = 0; j <= i; j++, idx1++) {
  1039.                     _j = newIndices[j];
  1040.                     if (_i > _j) {
  1041.                         idx2 = _i * (_i + 1) / 2 + _j;
  1042.                     } else {
  1043.                         idx2 = _j * (_j + 1) / 2 + _i;
  1044.                     }
  1045.                     covNew[idx1] = cov[idx2];
  1046.                 }
  1047.             }
  1048.             return new RegressionResults(
  1049.                     betaNew, new double[][]{covNew}, true, this.nobs, rnk,
  1050.                     this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false);
  1051.         }
  1052.     }
  1053. }