Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members   File Members  

ImprovedLevenbergMarquardtMinimizer.cpp

Go to the documentation of this file.
00001 // ImprovedLevenbergMarquardtMinimizer.cpp: implementation of the CImprovedLevenbergMarquardtMinimizer class.
00002 //
00004 
00005 #include "ImprovedLevenbergMarquardtMinimizer.h"
00006 
00008 // Construction/Destruction
00010 
00011 CImprovedLevenbergMarquardtMinimizer::CImprovedLevenbergMarquardtMinimizer()
00012 {
00013         m_dLambda = 1.0e-02;
00014         m_dChiSqTol = 1.0e-03;
00015         m_dGradTol = 1.0e-03;
00016         m_dParTol = 1.0e-05;
00017         m_dTau = 1.0e-03;
00018         m_iNIterations = 1000;
00019         m_bPositiveSemiDef = true;
00020         m_bCheckChi = true;
00021         m_bCheckGrad = true;
00022         m_bCheckPar = true;
00023         m_dNu = 10.0;
00024         m_dFuncAccuracy = 1.0e-06;
00025 }
00026 
00027 CImprovedLevenbergMarquardtMinimizer::~CImprovedLevenbergMarquardtMinimizer()
00028 {
00029 
00030 }
00031 
00032 CImprovedLevenbergMarquardtMinimizer::CImprovedLevenbergMarquardtMinimizer(CParameterFilter *pFilter, bool psdFlag, double marquardt, bool checkChi, double chiTol, bool checkGrad, double gradTol, bool checkPar, double parTol, int nIterations, double funcAccuracy)
00033 {
00034         m_pFilter = pFilter;
00035         m_dLambda = marquardt;
00036         m_dChiSqTol = chiTol;
00037         m_bCheckChi = checkChi;
00038         m_dGradTol = gradTol;
00039         m_bCheckGrad = checkGrad;
00040         m_dParTol = parTol;
00041         m_bCheckPar = checkPar;
00042         m_iNIterations = nIterations;
00043         m_bPositiveSemiDef = psdFlag;
00044         m_dFuncAccuracy = funcAccuracy;
00045         m_dTau = 1.0e-03;
00046         m_dNu = 10.0;
00047 }
00048 
00049 double CImprovedLevenbergMarquardtMinimizer::Minimize(double *parameters, Minimizable *minimizable)
00050 {
00051         // loop variables
00052         int nPar,i;
00053         // set to false if the derivatives don't need to be recomputed
00054         bool computeFlag = true;
00055         m_bChiSqFlag = false;
00056         m_bGradFlag = false;
00057         m_bParFlag = false;
00058         // downcast the minimizable, and throw an exception if it is not an NLLSMinimizable 
00059         // (LevenbergMarquardt should only be used on a sum of squares objective function)
00060         NLLSMinimizable *nlls = dynamic_cast<NLLSMinimizable *>(minimizable);
00061         if(0 == nlls)
00062         {
00063                 throw std::runtime_error("Minimizable is not a sum of squares!");
00064         }
00065         
00066         // get the number of parameters and allocate 
00067         nParameters = nlls->GetNParameters();
00068         m_iNResiduals = nlls->GetNResiduals();
00069         
00070         m_pdAlpha = new double *[nParameters];
00071         m_pdAlpha[0] = new double[nParameters*nParameters];
00072         for(nPar = 1; nPar < nParameters; nPar++)
00073         {
00074                 m_pdAlpha[nPar] = &(m_pdAlpha[0][nPar*nParameters]);
00075         }
00076         m_pdGrad = new double[nParameters];
00077         m_pdDiagonal = new double[nParameters];
00078         m_pdForceMatrix = new double *[nParameters];
00079         m_pdForceMatrix[0] = new double[nParameters*m_iNResiduals];
00080         for(nPar = 1; nPar < nParameters; nPar++)
00081         {
00082                 m_pdForceMatrix[nPar] = &(m_pdForceMatrix[0][nPar*m_iNResiduals]);
00083         }
00084 
00085         // non-member data
00086         double *parLambda = new double[nParameters];
00087         double *parLambdaDiv = new double[nParameters];
00088         double *trialPOne = new double[nParameters];
00089         double *trialPTwo = new double[nParameters];
00090 
00091         // matrix/vector manipulations
00092         CMatrixOperations *pMatrixOps = new CMatrixOperations();
00093         
00094         // compute the starting cost
00095         double chiSqOld = nlls->ObjectiveFunction(parameters);
00096         cout << "Starting cost of " << chiSqOld << endl;
00097 
00098         double chiSqLambda = chiSqOld;
00099         double chiSqLambdaDiv = chiSqOld;
00100 
00101         int nIterations = 0;
00102         while( nIterations < m_iNIterations )
00103         {
00104                 // resetting the Marquardt parameter at the beginning seems to help
00105                 // with the number of evaluations sometimes
00106                 // m_dLambda = 1.0e-02;
00107 
00108                 // compute alpha,gradient
00109                 if(computeFlag){ComputeDerivativeInformation(parameters,nlls);}
00110                 
00111                 // solve for phi(lambda)
00112                 RescaleDiagonal(m_dLambda);
00113                 SVDSolveMarquardtSystem(parLambda);
00114                 RestoreDiagonal();
00115 
00116                 // solve for phi(lambda/nu)
00117                 RescaleDiagonal(m_dLambda/m_dNu);
00118                 SVDSolveMarquardtSystem(parLambdaDiv);
00119                 RestoreDiagonal();
00120                 
00121                 ObtainTrialParameters(parameters, parLambda, trialPOne);
00122                 chiSqLambda = nlls->ObjectiveFunction(trialPOne);
00123                 ObtainTrialParameters(parameters, parLambdaDiv, trialPTwo);
00124                 chiSqLambdaDiv = nlls->ObjectiveFunction(trialPTwo);
00125                 
00126                 if(chiSqLambdaDiv <= chiSqOld)  // Test I
00127                 {
00128                         cout << "Test I passed." << endl;
00129                         m_dLambda = m_dLambda/m_dNu;
00130                         // accept move, set recompute flag
00131                         computeFlag = true;
00132                         m_bChiSqFlag = ComputeChiSqTol(chiSqOld,chiSqLambdaDiv);
00133                         m_bGradFlag = ComputeGradTol();
00134                         m_bParFlag = ComputeParTol(parameters,parLambdaDiv);
00135                         AcceptParameters(parameters,trialPTwo,chiSqLambdaDiv);
00136                         chiSqOld = chiSqLambdaDiv;
00137                         cout << "Move accepted, new cost is " << chiSqLambdaDiv << endl;
00138                 }
00139                 else if(chiSqLambda <= chiSqOld) // Test II
00140                 {
00141                         cout << "Test II passed." << endl;
00142                         // Marquardt parameter is unchanged
00143                         // accept move, set recompute flag
00144                         computeFlag = true;
00145                         m_bChiSqFlag = ComputeChiSqTol(chiSqOld,chiSqLambda);
00146                         m_bGradFlag = ComputeGradTol();
00147                         m_bParFlag = ComputeParTol(parameters,parLambda);
00148                         AcceptParameters(parameters,trialPOne,chiSqLambda);
00149                         chiSqOld = chiSqLambda;
00150                         cout << "Move accepted, new cost is  " << chiSqLambda << endl;
00151                 }
00152                 else  // Test III
00153                 {
00154                         double lambdaMult = m_dLambda;
00155                         double chiSqLambdaMult = chiSqLambda;
00156 
00157                         double piOverFour = 0.78539816339744825;
00158 
00159                         while(chiSqLambdaMult > chiSqOld)
00160                         {
00161                                 double numerator = pMatrixOps->DotProduct(m_pdGrad,parLambda,nParameters);
00162                                 double denominator = (pMatrixOps->VectorL2Norm(m_pdGrad,nParameters))*(pMatrixOps->VectorL2Norm(parLambda,nParameters));
00163                                 double gamma = acos(numerator/denominator);
00164 
00165                                 if(gamma > piOverFour)
00166                                 {
00167                                         // increase lambda and try again
00168                                         lambdaMult = lambdaMult*m_dNu;
00169                                         
00170                                         // solve for phi(lambda/nu)
00171                                         RescaleDiagonal(lambdaMult);
00172                                         SVDSolveMarquardtSystem(parLambda);
00173                                         RestoreDiagonal();
00174                 
00175                                         ObtainTrialParameters(parameters, parLambda, trialPOne);
00176                                         chiSqLambdaMult = nlls->ObjectiveFunction(trialPOne);
00177                                 }
00178                                 else
00179                                 {
00180                                         while(chiSqLambdaMult > chiSqOld)
00181                                         {
00182                                                 // rescale the multiple of the parameters we're adding 
00183                                                 // until the cost decreases
00184                                                 for(i = 0; i < nParameters; i++)
00185                                                 {
00186                                                         parLambda[i] = 0.5*parLambda[i];
00187                                                 }
00188                                                 ObtainTrialParameters(parameters,parLambda,trialPOne);
00189                                                 chiSqLambdaMult = nlls->ObjectiveFunction(trialPOne);
00190                                         }
00191                                 }
00192 
00193                         }
00194                         cout << "Test III passed." << endl;
00195                         // accept move, rescale 
00196                         computeFlag = true;
00197                         m_bChiSqFlag = ComputeChiSqTol(chiSqOld,chiSqLambdaMult);
00198                         m_bGradFlag = ComputeGradTol();
00199                         m_bParFlag = ComputeParTol(parameters,parLambda);
00200                         AcceptParameters(parameters,trialPOne,chiSqLambdaMult);
00201                         chiSqOld = chiSqLambdaMult;
00202                         m_dLambda = lambdaMult;
00203                         cout << "Move accepted, new cost is " << chiSqLambdaMult << endl;
00204                 }
00205                 if(m_bChiSqFlag || m_bGradFlag || m_bParFlag)
00206                 {
00207                         if(m_bChiSqFlag)
00208                         {
00209                                 cout << "Convergence on Chi-Squared criterion." << endl;
00210                         }
00211                         if(m_bGradFlag)
00212                         {
00213                                 cout << "Convergence on ||g|| criterion." << endl;
00214                         }
00215                         if(m_bParFlag)
00216                         {
00217                                 cout << "Convergence on parameter criterion." << endl;
00218                         }
00219                         break;
00220                 }
00221 
00222                 nIterations++;
00223         }
00224         
00225 
00226         delete pMatrixOps;
00227         delete [] parLambda;
00228         delete [] parLambdaDiv;
00229         delete [] trialPOne;
00230         delete [] trialPTwo;
00231         delete [] m_pdGrad;
00232         delete [] m_pdDiagonal;
00233         delete [] m_pdAlpha[0];
00234         delete [] m_pdAlpha;
00235         delete [] m_pdForceMatrix[0];
00236         delete [] m_pdForceMatrix;
00237 
00238         return chiSqOld;
00239 }
00240 
00242 // Solves the Levenberg-Marquardt system alpha.deltaP = beta, using 
00243 // LAPACK functions for Cholesky decomposition and backsubstitution.
00244 // On return, deltaP contains the solution, and alpha and beta are 
00245 // not modified.
00247 
00248 
00249 void CImprovedLevenbergMarquardtMinimizer::SVDSolveMarquardtSystem(double *deltaP)
00250 {
00251         // solve the system by SVD decomposition
00252         int i,j,l;
00253         char JOBU = 'A';
00254         char JOBVT = 'A';
00255         int M = nParameters;
00256         int N = nParameters;
00257         
00258         // matrix A in column-major order
00259         double *A = new double[nParameters*nParameters];
00260         int counter = 0;
00261         for(j = 0; j < nParameters; j++)
00262         {
00263                 deltaP[j] = 0.0;
00264                 for(i = 0; i < nParameters; i++)
00265                 {
00266                         A[counter] = m_pdAlpha[i][j];
00267                         counter++;
00268                 }
00269         }
00270         
00271         double *D = new double[nParameters];
00272         double *E = new double[nParameters];
00273         double *TAUQ = new double[nParameters];
00274         double *TAUP = new double[nParameters];
00275 
00276         double *PT = new double[nParameters*nParameters];
00277         double *Q = new double[nParameters*nParameters];
00278         double *ATEMP = new double[nParameters*nParameters];
00279 
00280         int LDA = nParameters;
00281         double *S = new double[nParameters];
00282         int LDU = nParameters;
00283         int LDVT = nParameters;
00284         int LWORK = 5*M;
00285         double *WORK = new double[LWORK];
00286         int INFO = 0;
00287 
00288         // put the matrix into band diagonal form
00289         DGEBRD(&M,&N,A,&LDA,D,E,TAUQ,TAUP,WORK,&LWORK,&INFO);
00290         //cout << "Band diagonal form: INFO = " << INFO << endl;
00291         
00292         // now obtain Q and copy it into our array
00293         for(l = 0; l < nParameters*nParameters; l++)
00294         {
00295                 ATEMP[l] = A[l];
00296         }
00297         char VECT = 'Q';
00298         int K = M;
00299         DORGBR(&VECT,&M,&N,&K,ATEMP,&LDA,TAUQ,WORK,&LWORK,&INFO);
00300         //cout << "Q : INFO = " << INFO << endl;
00301         for(l = 0; l < nParameters*nParameters; l++)
00302         {
00303                 Q[l] = ATEMP[l];
00304         }
00305         // now obtain PT
00306         for(l = 0; l < nParameters*nParameters; l++)
00307         {
00308                 ATEMP[l] = A[l];
00309         }
00310         VECT = 'P';
00311         DORGBR(&VECT,&M,&N,&K,ATEMP,&LDA,TAUP,WORK,&LWORK,&INFO);
00312         //cout << "PT : INFO = " << INFO << endl;
00313         for(l = 0; l < nParameters*nParameters; l++)
00314         {
00315                 PT[l] = ATEMP[l];
00316         }
00317 
00318         // obtain the SVD decomposition of A, using the matrices found above
00319         char UPLO = 'U';
00320         int NCVT = nParameters;
00321         int NRU = nParameters;
00322         int NCC = 0;
00323         double *C = 0;
00324         int LDC = 1;
00325         DBDSQR(&UPLO,&N,&NCVT,&NRU,&NCC,D,E,PT,&LDVT,Q,&LDU,C,&LDC,WORK,&INFO);
00326         cout << "Matrix decomposed. Condition number of Hessian is " << D[0]/D[nParameters-1] << endl;
00327 
00328         // scroll through the singular values and find the ones whose inverses will be 
00329         // huge and set them to zero.  If the problem is underdetermined, we know there 
00330         // MUST be at least M - N singular values that are identically equal to zero 
00331         // (the last M - N in the list) even if numerical slop gives us small but nonzero
00332         // ones in the computed Hessian.  While these may always be so small as to be 
00333         // truncated, it's best to be safe.
00334 
00335         double cutoff = 1.0e-6*D[0];
00336         for(i = 0; i < nParameters; i++)
00337         {
00338                 if(D[i] < cutoff) {D[i] = 0;}
00339                 else {D[i] = 1/D[i];}
00340         }
00341 
00342         // here are the ones that have to be zero
00343         int diff = nParameters - m_iNResiduals;
00344         for(i = 0; i < diff; i++)
00345         {
00346                 D[(nParameters-1) - i] = 0.0;
00347         }
00348 
00349         // now do the multiplications to find the new parameters
00350         // first UT
00351         for(i = 0; i < nParameters; i++)
00352         {
00353                 for(j = 0; j < nParameters; j++)
00354                 {
00355                         deltaP[i] += Q[i*nParameters + j]*m_pdGrad[j];
00356                 }
00357         }
00358         // now the inverse singular values
00359         for(i = 0; i < nParameters; i++)
00360         {
00361                 deltaP[i] = deltaP[i]*D[i];
00362         }
00363         // now VT
00364         for(i = 0; i < nParameters; i++)
00365         {
00366                 S[i] = 0.0;
00367                 for(j = 0; j < nParameters; j++)
00368                 {
00369                         S[i] += PT[i*nParameters + j]*deltaP[j];
00370                 }
00371         }
00372         // copy back into deltaP
00373         for(i = 0; i < nParameters; i++)
00374         {
00375                 deltaP[i] = S[i];
00376         }
00377 
00378         delete [] A;
00379         delete [] ATEMP;
00380         delete [] TAUQ;
00381         delete [] TAUP;
00382         delete [] Q;
00383         delete [] PT;
00384         delete [] D;
00385         delete [] E;
00386         delete [] S;
00387         delete [] WORK;
00388 }
00389 
00390 
00391 
00392 

Generated on Mon Nov 3 09:37:55 2003 by doxygen1.2.14 written by Dimitri van Heesch, © 1997-2002