[ VIGRA Homepage | Function Index | Class Index | Namespaces | File List | Main Page ]

vigra/regression.hxx VIGRA

00001 /************************************************************************/
00002 /*                                                                      */
00003 /*                  Copyright 2008 by Ullrich Koethe                    */
00004 /*                                                                      */
00005 /*    This file is part of the VIGRA computer vision library.           */
00006 /*    The VIGRA Website is                                              */
00007 /*        http://hci.iwr.uni-heidelberg.de/vigra/                       */
00008 /*    Please direct questions, bug reports, and contributions to        */
00009 /*        ullrich.koethe@iwr.uni-heidelberg.de    or                    */
00010 /*        vigra@informatik.uni-hamburg.de                               */
00011 /*                                                                      */
00012 /*    Permission is hereby granted, free of charge, to any person       */
00013 /*    obtaining a copy of this software and associated documentation    */
00014 /*    files (the "Software"), to deal in the Software without           */
00015 /*    restriction, including without limitation the rights to use,      */
00016 /*    copy, modify, merge, publish, distribute, sublicense, and/or      */
00017 /*    sell copies of the Software, and to permit persons to whom the    */
00018 /*    Software is furnished to do so, subject to the following          */
00019 /*    conditions:                                                       */
00020 /*                                                                      */
00021 /*    The above copyright notice and this permission notice shall be    */
00022 /*    included in all copies or substantial portions of the             */
00023 /*    Software.                                                         */
00024 /*                                                                      */
00025 /*    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND    */
00026 /*    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES   */
00027 /*    OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND          */
00028 /*    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT       */
00029 /*    HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,      */
00030 /*    WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING      */
00031 /*    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR     */
00032 /*    OTHER DEALINGS IN THE SOFTWARE.                                   */
00033 /*                                                                      */
00034 /************************************************************************/
00035 
00036 
00037 #ifndef VIGRA_REGRESSION_HXX
00038 #define VIGRA_REGRESSION_HXX
00039 
00040 #include "matrix.hxx"
00041 #include "linear_solve.hxx"
00042 #include "singular_value_decomposition.hxx"
00043 #include "numerictraits.hxx"
00044 #include "functorexpression.hxx"
00045 
00046 
00047 namespace vigra
00048 {
00049 
00050 namespace linalg
00051 {
00052 
00053 /** \addtogroup Optimization Optimization and Regression
00054  */
00055 //@{
00056    /** Ordinary Least Squares Regression.
00057 
00058        Given a matrix \a A with <tt>m</tt> rows and <tt>n</tt> columns (with <tt>m >= n</tt>),
00059        and a column vector \a b of length <tt>m</tt> rows, this function computes
00060        the column vector \a x of length <tt>n</tt> rows that solves the optimization problem
00061 
00062         \f[ \tilde \textrm{\bf x} = \textrm{argmin}
00063             \left|\left|\textrm{\bf A} \textrm{\bf x} - \textrm{\bf b}\right|\right|_2^2
00064         \f]
00065 
00066        When \a b is a matrix with <tt>k</tt> columns, \a x must also have
00067        <tt>k</tt> columns, which will contain the solutions for the corresponding columns of
00068        \a b. Note that all matrices must already have the correct shape.
00069 
00070        This function is just another name for \ref linearSolve(), perhaps
00071        leading to more readable code when \a A is a rectangular matrix. It returns
00072        <tt>false</tt> when the rank of \a A is less than <tt>n</tt>.
00073        See \ref linearSolve() for more documentation.
00074 
00075     <b>\#include</b> <vigra/regression.hxx>
00076         Namespaces: vigra and vigra::linalg
00077    */
00078 template <class T, class C1, class C2, class C3>
00079 inline bool
00080 leastSquares(MultiArrayView<2, T, C1> const & A,
00081              MultiArrayView<2, T, C2> const &b, MultiArrayView<2, T, C3> &x,
00082              std::string method = "QR")
00083 {
00084     return linearSolve(A, b, x, method);
00085 }
00086 
00087    /** Weighted Least Squares Regression.
00088 
00089        Given a matrix \a A with <tt>m</tt> rows and <tt>n</tt> columns (with <tt>m >= n</tt>),
00090        a vector \a b of length <tt>m</tt>, and a weight vector \a weights of length <tt>m</tt>
00091        with non-negative entries, this function computes the vector \a x of length <tt>n</tt>
00092        that solves the optimization problem
00093 
00094         \f[ \tilde \textrm{\bf x} = \textrm{argmin}
00095             \left(\textrm{\bf A} \textrm{\bf x} - \textrm{\bf b}\right)^T
00096              \textrm{diag}(\textrm{\bf weights})
00097              \left(\textrm{\bf A} \textrm{\bf x} - \textrm{\bf b}\right)
00098         \f]
00099 
00100        where <tt>diag(weights)</tt> creates a diagonal matrix from \a weights.
00101        The algorithm calls \ref leastSquares() on the equivalent problem
00102 
00103         \f[ \tilde \textrm{\bf x} = \textrm{argmin}
00104              \left|\left|\textrm{diag}(\textrm{\bf weights})^{1/2}\textrm{\bf A} \textrm{\bf x} -
00105                   \textrm{diag}(\textrm{\bf weights})^{1/2} \textrm{\bf b}\right|\right|_2^2
00106         \f]
00107 
00108        where the square root of \a weights is just taken element-wise.
00109 
00110        When \a b is a matrix with <tt>k</tt> columns, \a x must also have
00111        <tt>k</tt> columns, which will contain the solutions for the corresponding columns of
00112        \a b. Note that all matrices must already have the correct shape.
00113 
00114        The function returns
00115        <tt>false</tt> when the rank of the weighted matrix \a A is less than <tt>n</tt>.
00116 
00117     <b>\#include</b> <vigra/regression.hxx>
00118         Namespaces: vigra and vigra::linalg
00119    */
00120 template <class T, class C1, class C2, class C3, class C4>
00121 bool
00122 weightedLeastSquares(MultiArrayView<2, T, C1> const & A,
00123              MultiArrayView<2, T, C2> const &b, MultiArrayView<2, T, C3> const &weights,
00124              MultiArrayView<2, T, C4> &x, std::string method = "QR")
00125 {
00126     typedef T Real;
00127 
00128     const unsigned int rows = rowCount(A);
00129     const unsigned int cols = columnCount(A);
00130     const unsigned int rhsCount = columnCount(b);
00131     vigra_precondition(rows >= cols,
00132        "weightedLeastSquares(): Input matrix A must be rectangular with rowCount >= columnCount.");
00133     vigra_precondition(rowCount(b) == rows,
00134        "weightedLeastSquares(): Shape mismatch between matrices A and b.");
00135     vigra_precondition(rowCount(b) == rowCount(weights) && columnCount(weights) == 1,
00136        "weightedLeastSquares(): Weight matrix has wrong shape.");
00137     vigra_precondition(rowCount(x) == cols && columnCount(x) == rhsCount,
00138        "weightedLeastSquares(): Result matrix x has wrong shape.");
00139 
00140     Matrix<T> wa(A.shape()), wb(b.shape());
00141 
00142     for(unsigned int k=0; k<rows; ++k)
00143     {
00144         vigra_precondition(weights(k,0) >= 0,
00145            "weightedLeastSquares(): Weights must be positive.");
00146         T w = std::sqrt(weights(k,0));
00147         for(unsigned int l=0; l<cols; ++l)
00148             wa(k,l) = w * A(k,l);
00149         for(unsigned int l=0; l<rhsCount; ++l)
00150             wb(k,l) = w * b(k,l);
00151     }
00152 
00153     return leastSquares(wa, wb, x, method);
00154 }
00155 
00156    /** Ridge Regression.
00157 
00158        Given a matrix \a A with <tt>m</tt> rows and <tt>n</tt> columns (with <tt>m >= n</tt>),
00159        a vector \a b of length <tt>m</tt>, and a regularization parameter <tt>lambda >= 0.0</tt>,
00160        this function computes the vector \a x of length <tt>n</tt>
00161        that solves the optimization problem
00162 
00163         \f[ \tilde \textrm{\bf x} = \textrm{argmin}
00164             \left|\left|\textrm{\bf A} \textrm{\bf x} - \textrm{\bf b}\right|\right|_2^2 +
00165             \lambda \textrm{\bf x}^T\textrm{\bf x}
00166         \f]
00167 
00168        This is implemented by means of \ref singularValueDecomposition().
00169 
00170        When \a b is a matrix with <tt>k</tt> columns, \a x must also have
00171        <tt>k</tt> columns, which will contain the solutions for the corresponding columns of
00172        \a b. Note that all matrices must already have the correct shape.
00173 
00174        The function returns <tt>false</tt> if the rank of \a A is less than <tt>n</tt>
00175        and <tt>lambda == 0.0</tt>.
00176 
00177     <b>\#include</b> <vigra/regression.hxx>
00178         Namespaces: vigra and vigra::linalg
00179    */
00180 template <class T, class C1, class C2, class C3>
00181 bool
00182 ridgeRegression(MultiArrayView<2, T, C1> const & A,
00183                 MultiArrayView<2, T, C2> const &b, MultiArrayView<2, T, C3> &x, double lambda)
00184 {
00185     typedef T Real;
00186 
00187     const unsigned int rows = rowCount(A);
00188     const unsigned int cols = columnCount(A);
00189     const unsigned int rhsCount = columnCount(b);
00190     vigra_precondition(rows >= cols,
00191        "ridgeRegression(): Input matrix A must be rectangular with rowCount >= columnCount.");
00192     vigra_precondition(rowCount(b) == rows,
00193        "ridgeRegression(): Shape mismatch between matrices A and b.");
00194     vigra_precondition(rowCount(x) == cols && columnCount(x) == rhsCount,
00195        "ridgeRegression(): Result matrix x has wrong shape.");
00196     vigra_precondition(lambda >= 0.0,
00197        "ridgeRegression(): lambda >= 0.0 required.");
00198 
00199     unsigned int m = rows;
00200     unsigned int n = cols;
00201 
00202     Matrix<T> u(m, n), s(n, 1), v(n, n);
00203 
00204     unsigned int rank = singularValueDecomposition(A, u, s, v);
00205     if(rank < n && lambda == 0.0)
00206         return false;
00207 
00208     Matrix<T> t = transpose(u)*b;
00209     for(unsigned int k=0; k<cols; ++k)
00210         for(unsigned int l=0; l<rhsCount; ++l)
00211             t(k,l) *= s(k,0) / (sq(s(k,0)) + lambda);
00212     x = v*t;
00213     return true;
00214 }
00215 
00216    /** Weighted ridge Regression.
00217 
00218        Given a matrix \a A with <tt>m</tt> rows and <tt>n</tt> columns (with <tt>m >= n</tt>),
00219        a vector \a b of length <tt>m</tt>, a weight vector \a weights of length <tt>m</tt>
00220        with non-negative entries, and a regularization parameter <tt>lambda >= 0.0</tt>
00221        this function computes the vector \a x of length <tt>n</tt>
00222        that solves the optimization problem
00223 
00224         \f[ \tilde \textrm{\bf x} = \textrm{argmin}
00225             \left(\textrm{\bf A} \textrm{\bf x} - \textrm{\bf b}\right)^T
00226              \textrm{diag}(\textrm{\bf weights})
00227              \left(\textrm{\bf A} \textrm{\bf x} - \textrm{\bf b}\right) +
00228              \lambda \textrm{\bf x}^T\textrm{\bf x}
00229         \f]
00230 
00231        where <tt>diag(weights)</tt> creates a diagonal matrix from \a weights.
00232        The algorithm calls \ref ridgeRegression() on the equivalent problem
00233 
00234         \f[ \tilde \textrm{\bf x} = \textrm{argmin}
00235             \left|\left|\textrm{diag}(\textrm{\bf weights})^{1/2}\textrm{\bf A} \textrm{\bf x} -
00236                   \textrm{diag}(\textrm{\bf weights})^{1/2} \textrm{\bf b}\right|\right|_2^2 +
00237              \lambda \textrm{\bf x}^T\textrm{\bf x}
00238         \f]
00239 
00240        where the square root of \a weights is just taken element-wise.  This solution is
00241        computed by means of \ref singularValueDecomposition().
00242 
00243        When \a b is a matrix with <tt>k</tt> columns, \a x must also have
00244        <tt>k</tt> columns, which will contain the solutions for the corresponding columns of
00245        \a b. Note that all matrices must already have the correct shape.
00246 
00247        The function returns <tt>false</tt> if the rank of \a A is less than <tt>n</tt>
00248        and <tt>lambda == 0.0</tt>.
00249 
00250     <b>\#include</b> <vigra/regression.hxx>
00251         Namespaces: vigra and vigra::linalg
00252    */
00253 template <class T, class C1, class C2, class C3, class C4>
00254 bool
00255 weightedRidgeRegression(MultiArrayView<2, T, C1> const & A,
00256              MultiArrayView<2, T, C2> const &b, MultiArrayView<2, T, C3> const &weights,
00257              MultiArrayView<2, T, C4> &x, double lambda)
00258 {
00259     typedef T Real;
00260 
00261     const unsigned int rows = rowCount(A);
00262     const unsigned int cols = columnCount(A);
00263     const unsigned int rhsCount = columnCount(b);
00264     vigra_precondition(rows >= cols,
00265        "weightedRidgeRegression(): Input matrix A must be rectangular with rowCount >= columnCount.");
00266     vigra_precondition(rowCount(b) == rows,
00267        "weightedRidgeRegression(): Shape mismatch between matrices A and b.");
00268     vigra_precondition(rowCount(b) == rowCount(weights) && columnCount(weights) == 1,
00269        "weightedRidgeRegression(): Weight matrix has wrong shape.");
00270     vigra_precondition(rowCount(x) == cols && columnCount(x) == rhsCount,
00271        "weightedRidgeRegression(): Result matrix x has wrong shape.");
00272     vigra_precondition(lambda >= 0.0,
00273        "weightedRidgeRegression(): lambda >= 0.0 required.");
00274 
00275     Matrix<T> wa(A.shape()), wb(b.shape());
00276 
00277     for(unsigned int k=0; k<rows; ++k)
00278     {
00279         vigra_precondition(weights(k,0) >= 0,
00280            "weightedRidgeRegression(): Weights must be positive.");
00281         T w = std::sqrt(weights(k,0));
00282         for(unsigned int l=0; l<cols; ++l)
00283             wa(k,l) = w * A(k,l);
00284         for(unsigned int l=0; l<rhsCount; ++l)
00285             wb(k,l) = w * b(k,l);
00286     }
00287 
00288     return ridgeRegression(wa, wb, x, lambda);
00289 }
00290 
00291    /** Ridge Regression with many lambdas.
00292 
00293        This executes \ref ridgeRegression() for a sequence of regularization parameters. This
00294        is implemented so that the \ref singularValueDecomposition() has to be executed only once.
00295        \a lambda must be an array conforming to the <tt>std::vector</tt> interface, i.e. must
00296        support <tt>lambda.size()</tt> and <tt>lambda[k]</tt>. The columns of the matrix \a x
00297        will contain the solutions for the corresponding lambda, so the  number of columns of
00298        the matrix \a x must be equal to <tt>lambda.size()</tt>, and \a b must be a columns vector,
00299        i.e. cannot contain several right hand sides at once.
00300 
00301        The function returns <tt>false</tt> when the matrix \a A is rank deficient. If this
00302        happens, and one of the lambdas is zero, the corresponding column of \a x will be skipped.
00303 
00304     <b>\#include</b> <vigra/regression.hxx>
00305         Namespaces: vigra and vigra::linalg
00306    */
00307 template <class T, class C1, class C2, class C3, class Array>
00308 bool
00309 ridgeRegressionSeries(MultiArrayView<2, T, C1> const & A,
00310           MultiArrayView<2, T, C2> const &b, MultiArrayView<2, T, C3> &x, Array const & lambda)
00311 {
00312     typedef T Real;
00313 
00314     const unsigned int rows = rowCount(A);
00315     const unsigned int cols = columnCount(A);
00316     const unsigned int lambdaCount = lambda.size();
00317     vigra_precondition(rows >= cols,
00318        "ridgeRegressionSeries(): Input matrix A must be rectangular with rowCount >= columnCount.");
00319     vigra_precondition(rowCount(b) == rows && columnCount(b) == 1,
00320        "ridgeRegressionSeries(): Shape mismatch between matrices A and b.");
00321     vigra_precondition(rowCount(x) == cols && columnCount(x) == lambdaCount,
00322        "ridgeRegressionSeries(): Result matrix x has wrong shape.");
00323 
00324     unsigned int m = rows;
00325     unsigned int n = cols;
00326 
00327     Matrix<T> u(m, n), s(n, 1), v(n, n);
00328 
00329     unsigned int rank = singularValueDecomposition(A, u, s, v);
00330 
00331     Matrix<T> xl = transpose(u)*b;
00332     Matrix<T> xt(cols,1);
00333     for(unsigned int i=0; i<lambdaCount; ++i)
00334     {
00335         vigra_precondition(lambda[i] >= 0.0,
00336            "ridgeRegressionSeries(): lambda >= 0.0 required.");
00337         if(lambda[i] == 0.0 && rank < rows)
00338             continue;
00339         for(unsigned int k=0; k<cols; ++k)
00340             xt(k,0) = xl(k,0) * s(k,0) / (sq(s(k,0)) + lambda[i]);
00341         columnVector(x, i) = v*xt;
00342     }
00343     return (rank == n);
00344 }
00345 
00346 /** \brief Pass options to leastAngleRegression().
00347 
00348     <b>\#include</b> <vigra/regression.hxx>
00349         Namespaces: vigra and vigra::linalg
00350 */
00351 class LeastAngleRegressionOptions
00352 {
00353   public:
00354     enum Mode { LARS, LASSO, NNLASSO };
00355 
00356         /** Initialize all options with default values.
00357         */
00358     LeastAngleRegressionOptions()
00359     : max_solution_count(0),
00360       unconstrained_dimension_count(0),
00361       mode(LASSO),
00362       least_squares_solutions(true)
00363     {}
00364 
00365         /** Maximum number of solutions to be computed.
00366 
00367             If \a n is 0 (the default), the number of solutions is determined by the length
00368             of the solution array. Otherwise, the minimum of maxSolutionCount() and that
00369             length is taken.<br>
00370             Default: 0 (use length of solution array)
00371         */
00372     LeastAngleRegressionOptions & maxSolutionCount(unsigned int n)
00373     {
00374         max_solution_count = (int)n;
00375         return *this;
00376     }
00377 
00378         /** Set the mode of the algorithm.
00379 
00380             Mode must be one of "lars", "lasso", "nnlasso". The function just calls
00381             the member function of the corresponding name to set the mode.
00382 
00383             Default: "lasso"
00384         */
00385     LeastAngleRegressionOptions & setMode(std::string mode)
00386     {
00387         mode = tolower(mode);
00388         if(mode == "lars")
00389             this->lars();
00390         else if(mode == "lasso")
00391             this->lasso();
00392         else if(mode == "nnlasso")
00393             this->nnlasso();
00394         else
00395             vigra_fail("LeastAngleRegressionOptions.setMode(): Invalid mode.");
00396         return *this;
00397     }
00398 
00399 
00400         /** Use the plain LARS algorithm.
00401 
00402             Default: inactive
00403         */
00404     LeastAngleRegressionOptions & lars()
00405     {
00406         mode = LARS;
00407         return *this;
00408     }
00409 
00410         /** Use the LASSO modification of the LARS algorithm.
00411 
00412             This allows features to be removed from the active set under certain conditions.<br>
00413             Default: active
00414         */
00415     LeastAngleRegressionOptions & lasso()
00416     {
00417         mode = LASSO;
00418         return *this;
00419     }
00420 
00421         /** Use the non-negative LASSO modification of the LARS algorithm.
00422 
00423             This enforces all non-zero entries in the solution to be positive.<br>
00424             Default: inactive
00425         */
00426     LeastAngleRegressionOptions & nnlasso()
00427     {
00428         mode = NNLASSO;
00429         return *this;
00430     }
00431 
00432         /** Compute least squares solutions.
00433 
00434             Use least angle regression to determine active sets, but
00435             return least squares solutions for the features in each active set,
00436             instead of constrained solutions.<br>
00437             Default: <tt>true</tt>
00438         */
00439     LeastAngleRegressionOptions & leastSquaresSolutions(bool select = true)
00440     {
00441         least_squares_solutions = select;
00442         return *this;
00443     }
00444 
00445     int max_solution_count, unconstrained_dimension_count;
00446     Mode mode;
00447     bool least_squares_solutions;
00448 };
00449 
00450 namespace detail {
00451 
00452 template <class T, class C1, class C2>
00453 struct LarsData
00454 {
00455     typedef typename MultiArrayShape<2>::type Shape;
00456 
00457     int activeSetSize;
00458     MultiArrayView<2, T, C1> A;
00459     MultiArrayView<2, T, C2> b;
00460     Matrix<T> R, qtb, lars_solution, lars_prediction, next_lsq_solution, next_lsq_prediction, searchVector;
00461     ArrayVector<MultiArrayIndex> columnPermutation;
00462 
00463     // init data for a new run
00464     LarsData(MultiArrayView<2, T, C1> const & Ai, MultiArrayView<2, T, C2> const & bi)
00465     : activeSetSize(1),
00466       A(Ai), b(bi), R(A), qtb(b),
00467       lars_solution(A.shape(1), 1), lars_prediction(A.shape(0), 1),
00468       next_lsq_solution(A.shape(1), 1), next_lsq_prediction(A.shape(0), 1), searchVector(A.shape(0), 1),
00469       columnPermutation(A.shape(1))
00470     {
00471         for(unsigned int k=0; k<columnPermutation.size(); ++k)
00472             columnPermutation[k] = k;
00473     }
00474 
00475     // copy data for the recursive call in nnlassolsq
00476     LarsData(LarsData const & d, int asetSize)
00477     : activeSetSize(asetSize),
00478       A(d.R.subarray(Shape(0,0), Shape(d.A.shape(0), activeSetSize))), b(d.qtb), R(A), qtb(b),
00479       lars_solution(d.lars_solution.subarray(Shape(0,0), Shape(activeSetSize, 1))), lars_prediction(d.lars_prediction),
00480       next_lsq_solution(d.next_lsq_solution.subarray(Shape(0,0), Shape(activeSetSize, 1))), 
00481       next_lsq_prediction(d.next_lsq_prediction), searchVector(d.searchVector),
00482       columnPermutation(A.shape(1))
00483     {
00484         for(unsigned int k=0; k<columnPermutation.size(); ++k)
00485             columnPermutation[k] = k;
00486     }
00487 };
00488 
00489 template <class T, class C1, class C2, class Array1, class Array2, class Array3>
00490 unsigned int 
00491 leastAngleRegressionMainLoop(LarsData<T, C1, C2> & d,
00492                              Array1 & activeSets, 
00493                              Array2 * lars_solutions, Array3 * lsq_solutions,
00494                              LeastAngleRegressionOptions const & options)
00495 {
00496     using namespace vigra::functor;
00497 
00498     typedef typename MultiArrayShape<2>::type Shape;
00499     typedef typename Matrix<T>::view_type Subarray;
00500     typedef ArrayVector<MultiArrayIndex> Permutation;
00501     typedef typename Permutation::view_type ColumnSet;
00502 
00503     vigra_precondition(d.activeSetSize > 0,
00504        "leastAngleRegressionMainLoop() must not be called with empty active set.");
00505 
00506     bool enforce_positive = (options.mode == LeastAngleRegressionOptions::NNLASSO);
00507     bool lasso_modification = (options.mode != LeastAngleRegressionOptions::LARS);
00508 
00509     const MultiArrayIndex rows = rowCount(d.R);
00510     const MultiArrayIndex cols = columnCount(d.R);
00511     const MultiArrayIndex maxRank = std::min(rows, cols);
00512 
00513     MultiArrayIndex maxSolutionCount = options.max_solution_count;
00514     if(maxSolutionCount == 0)
00515         maxSolutionCount = lasso_modification
00516                                 ? 10*maxRank
00517                                 : maxRank;
00518 
00519     bool needToRemoveColumn = false;
00520     MultiArrayIndex columnToBeAdded = 0, columnToBeRemoved = 0;
00521     MultiArrayIndex currentSolutionCount = 0;
00522     while(currentSolutionCount < maxSolutionCount)
00523     {
00524         //ColumnSet activeSet = d.columnPermutation.subarray(0, (unsigned int)d.activeSetSize);
00525         ColumnSet inactiveSet = d.columnPermutation.subarray((unsigned int)d.activeSetSize, (unsigned int)cols);
00526 
00527         // find next dimension to be activated
00528         Matrix<T> cLARS = transpose(d.A) * (d.b - d.lars_prediction),      // correlation with LARS residual
00529                   cLSQ  = transpose(d.A) * (d.b - d.next_lsq_prediction);  // correlation with LSQ residual
00530 
00531         // In theory, all vectors in the active set should have the same correlation C, and
00532         // the correlation of all others should not exceed this. In practice, we may find the
00533         // maximum correlation in any variable due to tiny numerical inaccuracies. Therefore, we
00534         // determine C from the entire set of variables.
00535         MultiArrayIndex cmaxIndex = enforce_positive
00536                                       ? argMax(cLARS)
00537                                       : argMax(abs(cLARS));
00538         T C = abs(cLARS(cmaxIndex, 0));
00539 
00540         Matrix<T> ac(cols - d.activeSetSize, 1);
00541         for(MultiArrayIndex k = 0; k<cols-d.activeSetSize; ++k)
00542         {
00543             T rho = cLSQ(inactiveSet[k], 0), 
00544               cc  = C - sign(rho)*cLARS(inactiveSet[k], 0);
00545 
00546             if(rho == 0.0)  // make sure that 0/0 cannot happen in the other cases
00547                 ac(k,0) = 1.0; // variable k is linearly dependent on the active set
00548             else if(rho > 0.0)
00549                 ac(k,0) = cc / (cc + rho); // variable k would enter the active set with positive sign
00550             else if(enforce_positive)
00551                 ac(k,0) = 1.0; // variable k cannot enter the active set because it would be negative
00552             else
00553                 ac(k,0) = cc / (cc - rho); // variable k would enter the active set with negative sign
00554         }
00555 
00556         // in the non-negative case: make sure that a column just removed cannot re-enter right away
00557         // (in standard LASSO, this is allowed, because the variable may re-enter with opposite sign)
00558         if(enforce_positive && needToRemoveColumn)
00559                 ac(columnToBeRemoved-d.activeSetSize,0) = 1.0;
00560 
00561         // find candidate
00562         // Note: R uses Arg1() > epsilon, but this is only possible because it allows several variables to
00563         //       join the active set simultaneously, so that gamma = 0 cannot occur.
00564         columnToBeAdded = argMin(ac);
00565 
00566         // if no new column can be added, we do a full step gamma = 1.0 and then stop, unless a column is removed below
00567         T gamma = (d.activeSetSize == maxRank)
00568                      ? 1.0
00569                      : ac(columnToBeAdded, 0);
00570 
00571         // adjust columnToBeAdded: we skipped the active set
00572         if(columnToBeAdded >= 0)
00573             columnToBeAdded += d.activeSetSize;
00574 
00575         // check whether we have to remove a column from the active set
00576         needToRemoveColumn = false;
00577         if(lasso_modification)
00578         {
00579             // find dimensions whose weight changes sign below gamma*searchDirection
00580             Matrix<T> s(Shape(d.activeSetSize, 1), NumericTraits<T>::max());
00581             for(MultiArrayIndex k=0; k<d.activeSetSize; ++k)
00582             {
00583                 if(( enforce_positive && d.next_lsq_solution(k,0) < 0.0) ||
00584                    (!enforce_positive && sign(d.lars_solution(k,0))*sign(d.next_lsq_solution(k,0)) == -1.0))
00585                         s(k,0) = d.lars_solution(k,0) / (d.lars_solution(k,0) - d.next_lsq_solution(k,0));
00586             }
00587 
00588             columnToBeRemoved = argMinIf(s, Arg1() <= Param(gamma));
00589             if(columnToBeRemoved >= 0)
00590             {
00591                 needToRemoveColumn = true; // remove takes precedence over add
00592                 gamma = s(columnToBeRemoved, 0);
00593             }
00594         }
00595 
00596         // compute the current solutions
00597         d.lars_prediction  = gamma * d.next_lsq_prediction + (1.0 - gamma) * d.lars_prediction;
00598         d.lars_solution    = gamma * d.next_lsq_solution   + (1.0 - gamma) * d.lars_solution;
00599         if(needToRemoveColumn)
00600             d.lars_solution(columnToBeRemoved, 0) = 0.0;  // turn possible epsilon into an exact zero
00601 
00602         // write the current solution
00603         ++currentSolutionCount;
00604         activeSets.push_back(typename Array1::value_type(d.columnPermutation.begin(), d.columnPermutation.begin()+d.activeSetSize));
00605 
00606         if(lsq_solutions != 0)
00607         {
00608             if(enforce_positive)
00609             {
00610                 ArrayVector<Matrix<T> > nnresults;
00611                 ArrayVector<ArrayVector<MultiArrayIndex> > nnactiveSets;
00612                 LarsData<T, C1, C2> nnd(d, d.activeSetSize);
00613 
00614                 leastAngleRegressionMainLoop(nnd, nnactiveSets, &nnresults, (Array3*)0,
00615                                              LeastAngleRegressionOptions().leastSquaresSolutions(false).nnlasso());
00616                 //Matrix<T> nnlsq_solution(d.activeSetSize, 1);
00617                 typename Array2::value_type nnlsq_solution(Shape(d.activeSetSize, 1));
00618                 for(unsigned int k=0; k<nnactiveSets.back().size(); ++k)
00619                 {
00620                     nnlsq_solution(nnactiveSets.back()[k],0) = nnresults.back()[k];
00621                 }
00622                 //lsq_solutions->push_back(nnlsq_solution);
00623                 lsq_solutions->push_back(typename Array3::value_type());
00624                 lsq_solutions->back() = nnlsq_solution;
00625             }
00626             else
00627             {
00628                 //lsq_solutions->push_back(d.next_lsq_solution.subarray(Shape(0,0), Shape(d.activeSetSize, 1)));
00629                 lsq_solutions->push_back(typename Array3::value_type());
00630                 lsq_solutions->back() = d.next_lsq_solution.subarray(Shape(0,0), Shape(d.activeSetSize, 1));
00631             }
00632         }
00633         if(lars_solutions != 0)
00634         {
00635             //lars_solutions->push_back(d.lars_solution.subarray(Shape(0,0), Shape(d.activeSetSize, 1)));
00636             lars_solutions->push_back(typename Array2::value_type());
00637             lars_solutions->back() = d.lars_solution.subarray(Shape(0,0), Shape(d.activeSetSize, 1));
00638         }
00639 
00640         // no further solutions possible
00641         if(gamma == 1.0)
00642             break;
00643 
00644         if(needToRemoveColumn)
00645         {
00646             --d.activeSetSize;
00647             if(columnToBeRemoved != d.activeSetSize)
00648             {
00649                 // remove column 'columnToBeRemoved' and restore triangular form of R
00650                 // note: columnPermutation is automatically swapped here
00651                 detail::upperTriangularSwapColumns(columnToBeRemoved, d.activeSetSize, d.R, d.qtb, d.columnPermutation);
00652 
00653                 // swap solution entries
00654                 std::swap(d.lars_solution(columnToBeRemoved, 0), d.lars_solution(d.activeSetSize,0));
00655                 std::swap(d.next_lsq_solution(columnToBeRemoved, 0), d.next_lsq_solution(d.activeSetSize,0));
00656                 columnToBeRemoved = d.activeSetSize; // keep track of removed column
00657             }
00658             d.lars_solution(d.activeSetSize,0) = 0.0;
00659             d.next_lsq_solution(d.activeSetSize,0) = 0.0;
00660         }
00661         else
00662         {
00663             vigra_invariant(columnToBeAdded >= 0,
00664                 "leastAngleRegression(): internal error (columnToBeAdded < 0)");
00665             // add column 'columnToBeAdded'
00666             if(d.activeSetSize != columnToBeAdded)
00667             {
00668                 std::swap(d.columnPermutation[d.activeSetSize], d.columnPermutation[columnToBeAdded]);
00669                 columnVector(d.R, d.activeSetSize).swapData(columnVector(d.R, columnToBeAdded));
00670                 columnToBeAdded = d.activeSetSize; // keep track of added column
00671             }
00672 
00673             // zero the corresponding entries of the solutions
00674             d.next_lsq_solution(d.activeSetSize,0) = 0.0;
00675             d.lars_solution(d.activeSetSize,0) = 0.0;
00676 
00677             // reduce R (i.e. its newly added column) to triangular form
00678             detail::qrColumnHouseholderStep(d.activeSetSize, d.R, d.qtb);
00679             ++d.activeSetSize;
00680         }
00681 
00682         // compute the LSQ solution of the new active set
00683         Subarray Ractive = d.R.subarray(Shape(0,0), Shape(d.activeSetSize, d.activeSetSize));
00684         Subarray qtbactive = d.qtb.subarray(Shape(0,0), Shape(d.activeSetSize, 1));
00685         Subarray next_lsq_solution_view = d.next_lsq_solution.subarray(Shape(0,0), Shape(d.activeSetSize, 1));
00686         linearSolveUpperTriangular(Ractive, qtbactive, next_lsq_solution_view);
00687 
00688         // compute the LSQ prediction of the new active set
00689         d.next_lsq_prediction.init(0.0);
00690         for(MultiArrayIndex k=0; k<d.activeSetSize; ++k)
00691             d.next_lsq_prediction += next_lsq_solution_view(k,0)*columnVector(d.A, d.columnPermutation[k]);
00692     }
00693 
00694     return (unsigned int)currentSolutionCount;
00695 }
00696 
00697 template <class T, class C1, class C2, class Array1, class Array2>
00698 unsigned int
00699 leastAngleRegressionImpl(MultiArrayView<2, T, C1> const & A, MultiArrayView<2, T, C2> const &b,
00700                          Array1 & activeSets, Array2 * lasso_solutions, Array2 * lsq_solutions,
00701                          LeastAngleRegressionOptions const & options)
00702 {
00703     using namespace vigra::functor;
00704 
00705     const MultiArrayIndex rows = rowCount(A);
00706 
00707     vigra_precondition(rowCount(b) == rows && columnCount(b) == 1,
00708        "leastAngleRegression(): Shape mismatch between matrices A and b.");
00709 
00710     bool enforce_positive = (options.mode == LeastAngleRegressionOptions::NNLASSO);
00711 
00712     detail::LarsData<T, C1, C2> d(A, b);
00713 
00714     // find dimension with largest correlation
00715     Matrix<T> c = transpose(A)*b;
00716     MultiArrayIndex initialColumn = enforce_positive
00717                                        ? argMaxIf(c, Arg1() > Param(0.0))
00718                                        : argMax(abs(c));
00719     if(initialColumn == -1)
00720         return 0; // no solution found
00721 
00722     // prepare initial active set and search direction etc.
00723     std::swap(d.columnPermutation[0], d.columnPermutation[initialColumn]);
00724     columnVector(d.R, 0).swapData(columnVector(d.R, initialColumn));
00725     detail::qrColumnHouseholderStep(0, d.R, d.qtb);
00726     d.next_lsq_solution(0,0) = d.qtb(0,0) / d.R(0,0);
00727     d.next_lsq_prediction = d.next_lsq_solution(0,0) * columnVector(A, d.columnPermutation[0]);
00728     d.searchVector = d.next_lsq_solution(0,0) * columnVector(A, d.columnPermutation[0]);
00729 
00730     return leastAngleRegressionMainLoop(d, activeSets, lasso_solutions, lsq_solutions, options);
00731 }
00732 
00733 } // namespace detail
00734 
00735    /** Least Angle Regression.
00736 
00737     <b>\#include</b> <vigra/regression.hxx>
00738         Namespaces: vigra and vigra::linalg
00739 
00740    <b> Declarations:</b>
00741 
00742     \code
00743     namespace vigra {
00744       namespace linalg {
00745         // compute either LASSO or least squares solutions
00746         template <class T, class C1, class C2, class Array1, class Array2>
00747         unsigned int
00748         leastAngleRegression(MultiArrayView<2, T, C1> const & A, MultiArrayView<2, T, C2> const &b,
00749                              Array1 & activeSets, Array2 & solutions,
00750                              LeastAngleRegressionOptions const & options = LeastAngleRegressionOptions());
00751 
00752         // compute LASSO and least squares solutions
00753         template <class T, class C1, class C2, class Array1, class Array2>
00754         unsigned int
00755         leastAngleRegression(MultiArrayView<2, T, C1> const & A, MultiArrayView<2, T, C2> const &b,
00756                              Array1 & activeSets, Array2 & lasso_solutions, Array2 & lsq_solutions,
00757                              LeastAngleRegressionOptions const & options = LeastAngleRegressionOptions());
00758       }
00759       using linalg::leastAngleRegression;
00760     }
00761     \endcode
00762 
00763        This function implements Least Angle Regression (LARS) as described in
00764 
00765        &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
00766        B.Efron, T.Hastie, I.Johnstone, and R.Tibshirani: <i>"Least Angle Regression"</i>,
00767        Annals of Statistics 32(2):407-499, 2004.
00768 
00769        It is an efficient algorithm to solve the L1-regularized least squares (LASSO) problem
00770 
00771         \f[ \tilde \textrm{\bf x} = \textrm{argmin}
00772              \left|\left|\textrm{\bf A} \textrm{\bf x} - \textrm{\bf b}\right|\right|_2^2
00773            \textrm{ subject to } \left|\left|\textrm{\bf x}\right|\right|_1\le s
00774         \f]
00775 
00776        and the L1-regularized non-negative least squares (NN-LASSO) problem
00777 
00778         \f[ \tilde \textrm{\bf x} = \textrm{argmin} \left|\left|\textrm{\bf A} \textrm{\bf x} - \textrm{\bf b}\right|\right|_2^2
00779            \textrm{ subject to } \left|\left|\textrm{\bf x}\right|\right|_1\le s \textrm{ and } \textrm{\bf x}\ge \textrm{\bf 0}
00780         \f]
00781 
00782        where \a A is a matrix with <tt>m</tt> rows and <tt>n</tt> columns (often with <tt>m < n</tt>),
00783        \a b a vector of length <tt>m</tt>, and a regularization parameter s >= 0.0.
00784        L1-regularization has the desirable effect that it causes the solution \a x to be sparse, i.e. only
00785        the most important variables (called the <em>active set</em>) have non-zero values. The
00786        key insight of the LARS algorithm is the following: When the solution vector is considered
00787        as a function of the regularization parameter s, then <b>x</b>(s) is a piecewise
00788        linear function, i.e. a polyline in n-dimensional space. The knots of the polyline
00789        occur precisely at those values of s where one variable enters or leaves the active set,
00790        and can be efficiently computed.
00791 
00792        Therefore, leastAngleRegression() returns the entire solution path as a sequence of knot points, starting
00793        at \f$\textrm{\bf x}(s=0)\f$ (where the only feasible solution is obviously <b>x</b> = 0) and ending at
00794        \f$\textrm{\bf x}(s=\infty)\f$ (where the solution becomes the ordinary least squares solution). Actually,
00795        the initial null solution is not explicitly returned, i.e. the sequence starts at the first non-zero
00796        solution with one variable in the active set. The function leastAngleRegression() returns the number
00797        of solutions( i.e. knot points) computed.
00798 
00799        The sequences of active sets and corresponding variable weights are returned in \a activeSets and
00800        \a solutions respectively. That is, <tt>activeSets[i]</tt> is an \ref vigra::ArrayVector "ArrayVector<int>"
00801        containing the indices of the variables that are active at the i-th knot, and <tt>solutions</tt> is a
00802        \ref vigra::linalg::Matrix "Matrix<T>" containing the weights of those variables, in the same order (see
00803        example below). Variables not contained in <tt>activeSets[i]</tt> are zero at this solution.
00804 
00805        The behavior of the algorithm can be adapted by \ref vigra::linalg::LeastAngleRegressionOptions
00806        "LeastAngleRegressionOptions":
00807         <DL>
00808         <DT><b>options.lasso()</b> (active by default)
00809                           <DD> Compute the LASSO solution as described above.
00810         <DT><b>options.nnlasso()</b> (inactive by default)
00811                           <DD> Compute non-negative LASSO solutions, i.e. use the additional constraint that
00812                                <b>x</b> >= 0 in all solutions.
00813         <DT><b>options.lars()</b> (inactive by default)
00814                           <DD> Compute a solution path according to the plain LARS rule, i.e. never remove
00815                                a variable from the active set once it entered.
00816         <DT><b>options.leastSquaresSolutions(bool)</b> (default: true)
00817                           <DD> Use the algorithm mode selected above
00818                                to determine the sequence of active sets, but then compute and return an
00819                                ordinary (unconstrained) least squares solution for every active set.<br>
00820                                <b>Note:</b> The second form of leastAngleRegression() ignores this option and
00821                                does always compute both constrained and unconstrained solutions (returned in
00822                                \a lasso_solutions and \a lsq_solutions respectively).
00823         <DT><b>maxSolutionCount(unsigned int n)</b> (default: n = 0, i.e. compute all solutions)
00824                           <DD> Compute at most <tt>n</tt> solutions.
00825         </DL>
00826 
00827         <b>Usage:</b>
00828 
00829         \code
00830         int m = ..., n = ...;
00831         Matrix<double> A(m, n), b(m, 1);
00832         ... // fill A and b
00833 
00834         // normalize the input
00835         Matrix<double> offset(1,n), scaling(1,n);
00836         prepareColumns(A, A, offset, scaling, DataPreparationGoals(ZeroMean|UnitVariance));
00837         prepareColumns(b, b, DataPreparationGoals(ZeroMean));
00838 
00839         // arrays to hold the output
00840         ArrayVector<ArrayVector<int> > activeSets;
00841         ArrayVector<Matrix<double> > solutions;
00842 
00843         // run leastAngleRegression() in non-negative LASSO mode
00844         int numSolutions = leastAngleRegression(A, b, activeSets, solutions,
00845                                     LeastAngleRegressionOptions().nnlasso());
00846 
00847         // print results
00848         Matrix<double> denseSolution(1, n);
00849         for (MultiArrayIndex k = 0; k < numSolutions; ++k)
00850         {
00851             // transform the sparse solution into a dense vector
00852             denseSolution.init(0.0); // ensure that inactive variables are zero
00853             for (unsigned int i = 0; i < activeSets[k].size(); ++i)
00854             {
00855                 // set the values of the active variables;
00856                 // activeSets[k][i] is the true index of the i-th variable in the active set
00857                 denseSolution(0, activeSets[k][i]) = solutions[k](i,0);
00858             }
00859 
00860             // invert the input normalization
00861             denseSolution = denseSolution * pointWise(scaling);
00862 
00863             // output the solution
00864             std::cout << "solution " << k << ":\n" << denseSolution << std::endl;
00865         }
00866         \endcode
00867 
00868         <b>Required Interface:</b>
00869 
00870         <ul>
00871         <li> <tt>T</tt> must be numeric type (compatible to double)
00872         <li> <tt>Array1 a1;</tt><br>
00873              <tt>a1.push_back(ArrayVector<int>());</tt>
00874         <li> <tt>Array2 a2;</tt><br>
00875              <tt>a2.push_back(Matrix<T>());</tt>
00876         </ul>
00877    */
00878 doxygen_overloaded_function(template <...> unsigned int leastAngleRegression)
00879 
00880 template <class T, class C1, class C2, class Array1, class Array2>
00881 inline unsigned int
00882 leastAngleRegression(MultiArrayView<2, T, C1> const & A, MultiArrayView<2, T, C2> const &b,
00883                      Array1 & activeSets, Array2 & solutions,
00884                      LeastAngleRegressionOptions const & options = LeastAngleRegressionOptions())
00885 {
00886     if(options.least_squares_solutions)
00887         return detail::leastAngleRegressionImpl(A, b, activeSets, (Array2*)0, &solutions, options);
00888     else
00889         return detail::leastAngleRegressionImpl(A, b, activeSets, &solutions, (Array2*)0, options);
00890 }
00891 
00892 template <class T, class C1, class C2, class Array1, class Array2>
00893 inline unsigned int
00894 leastAngleRegression(MultiArrayView<2, T, C1> const & A, MultiArrayView<2, T, C2> const &b,
00895                      Array1 & activeSets, Array2 & lasso_solutions, Array2 & lsq_solutions,
00896                      LeastAngleRegressionOptions const & options = LeastAngleRegressionOptions())
00897 {
00898     return detail::leastAngleRegressionImpl(A, b, activeSets, &lasso_solutions, &lsq_solutions, options);
00899 }
00900 
00901    /** Non-negative Least Squares Regression.
00902 
00903        Given a matrix \a A with <tt>m</tt> rows and <tt>n</tt> columns (with <tt>m >= n</tt>),
00904        and a column vector \a b of length <tt>m</tt> rows, this function computes
00905        a column vector \a x of length <tt>n</tt> with <b>non-negative entries</b> that solves the optimization problem
00906 
00907         \f[ \tilde \textrm{\bf x} = \textrm{argmin}
00908             \left|\left|\textrm{\bf A} \textrm{\bf x} - \textrm{\bf b}\right|\right|_2^2
00909             \textrm{ subject to } \textrm{\bf x} \ge \textrm{\bf 0}
00910         \f]
00911 
00912        Both \a b and \a x must be column vectors (i.e. matrices with <tt>1</tt> column).
00913        Note that all matrices must already have the correct shape. The solution is computed by means
00914        of \ref leastAngleRegression() with non-negativity constraint.
00915 
00916     <b>\#include</b> <vigra/regression.hxx>
00917         Namespaces: vigra and vigra::linalg
00918    */
00919 template <class T, class C1, class C2, class C3>
00920 inline void
00921 nonnegativeLeastSquares(MultiArrayView<2, T, C1> const & A,
00922                         MultiArrayView<2, T, C2> const &b, MultiArrayView<2, T, C3> &x)
00923 {
00924     vigra_precondition(columnCount(A) == rowCount(x) && rowCount(A) == rowCount(b),
00925         "nonnegativeLeastSquares(): Matrix shape mismatch.");
00926     vigra_precondition(columnCount(b) == 1 && columnCount(x) == 1,
00927         "nonnegativeLeastSquares(): RHS and solution must be vectors (i.e. columnCount == 1).");
00928 
00929     ArrayVector<ArrayVector<MultiArrayIndex> > activeSets;
00930     ArrayVector<Matrix<T> > results;
00931 
00932     leastAngleRegression(A, b, activeSets, results,
00933                          LeastAngleRegressionOptions().leastSquaresSolutions(false).nnlasso());
00934     x.init(NumericTraits<T>::zero());
00935     if(activeSets.size() > 0)
00936         for(unsigned int k=0; k<activeSets.back().size(); ++k)
00937             x(activeSets.back()[k],0) = results.back()[k];
00938 }
00939 
00940 
00941 //@}
00942 
00943 } // namespace linalg
00944 
00945 using linalg::leastSquares;
00946 using linalg::weightedLeastSquares;
00947 using linalg::ridgeRegression;
00948 using linalg::weightedRidgeRegression;
00949 using linalg::ridgeRegressionSeries;
00950 using linalg::nonnegativeLeastSquares;
00951 using linalg::leastAngleRegression;
00952 using linalg::LeastAngleRegressionOptions;
00953 
00954 } // namespace vigra
00955 
00956 #endif // VIGRA_REGRESSION_HXX

© Ullrich Köthe (ullrich.koethe@iwr.uni-heidelberg.de)
Heidelberg Collaboratory for Image Processing, University of Heidelberg, Germany

html generated using doxygen and Python
vigra 1.9.0 (Tue Nov 6 2012)