Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members   File Members  

ScaleInvariantMarquardtMinimizer.cpp

Go to the documentation of this file.
00001 // ScaleInvariantMarquardtMinimizer.cpp: implementation of the CScaleInvariantMarquardtMinimizer class.
00002 //
00004 
00005 #include "ScaleInvariantMarquardtMinimizer.h"
00006 
00008 // Construction/Destruction
00010 
00011 CScaleInvariantMarquardtMinimizer::CScaleInvariantMarquardtMinimizer()
00012 {
00013         m_dLambda = 1.0e-02;
00014         m_dChiSqTol = 1.0e-03;
00015         m_dGradTol = 1.0e-03;
00016         m_dParTol = 1.0e-05;
00017         m_dTau = 1.0e-03;
00018         m_iNIterations = 1000;
00019         m_bPositiveSemiDef = true;
00020         m_bCheckChi = true;
00021         m_bCheckGrad = true;
00022         m_bCheckPar = true;
00023         m_dNu = 10.0;
00024         m_dFuncAccuracy = 1.0e-06;
00025 }
00026 
00027 CScaleInvariantMarquardtMinimizer::~CScaleInvariantMarquardtMinimizer()
00028 {
00029 
00030 }
00031 
00032 CScaleInvariantMarquardtMinimizer::CScaleInvariantMarquardtMinimizer(CParameterFilter *pFilter, bool psdFlag, double marquardt, bool checkChi, double chiTol, bool checkGrad, double gradTol, bool checkPar, double parTol, int nIterations, double funcAccuracy)
00033 {
00034         m_pFilter = pFilter;
00035         m_dLambda = marquardt;
00036         m_dChiSqTol = chiTol;
00037         m_bCheckChi = checkChi;
00038         m_dGradTol = gradTol;
00039         m_bCheckGrad = checkGrad;
00040         m_dParTol = parTol;
00041         m_bCheckPar = checkPar;
00042         m_iNIterations = nIterations;
00043         m_bPositiveSemiDef = psdFlag;
00044         m_dFuncAccuracy = funcAccuracy;
00045         m_dTau = 1.0e-03;
00046         m_dNu = 10.0;
00047 }
00048 
00049 double CScaleInvariantMarquardtMinimizer::Minimize(double *parameters, Minimizable *minimizable)
00050 {
00051     //loop variables
00052         int nPar, i;
00053         // set to false if the derivatives don't need to be recomputed
00054         bool computeFlag = true;
00055         m_bChiSqFlag = false;
00056         m_bGradFlag = false;
00057         m_bParFlag = false;
00058         // downcast the minimizable, and throw an exception if it is not an NLLSMinimizable 
00059         // (LevenbergMarquardt should only be used on a sum of squares objective function)
00060         NLLSMinimizable *nlls = dynamic_cast<NLLSMinimizable *>(minimizable);
00061         if(0 == nlls)
00062         {
00063                 throw std::runtime_error("Minimizable is not a sum of squares!");
00064         }
00065         
00066         // get the number of parameters and allocate 
00067         nParameters = nlls->GetNParameters();
00068         m_iNResiduals = nlls->GetNResiduals();
00069         
00070         m_pdAlpha = new double *[nParameters];
00071         m_pdAlpha[0] = new double[nParameters*nParameters];
00072         for(nPar = 1; nPar < nParameters; nPar++)
00073         {
00074                 m_pdAlpha[nPar] = &(m_pdAlpha[0][nPar*nParameters]);
00075         }
00076         m_pdGrad = new double[nParameters];
00077         m_pdDiagonal = new double[nParameters];
00078         m_pdForceMatrix = new double *[nParameters];
00079         m_pdForceMatrix[0] = new double[nParameters*m_iNResiduals];
00080         for(nPar = 1; nPar < nParameters; nPar++)
00081         {
00082                 m_pdForceMatrix[nPar] = &(m_pdForceMatrix[0][nPar*m_iNResiduals]);
00083         }
00084         
00085         // non-member data
00086         double *parLambda = new double[nParameters];
00087         double *parLambdaDiv = new double[nParameters];
00088         double *trialPOne = new double[nParameters];
00089         double *trialPTwo = new double[nParameters];
00090 
00091         // matrix/vector manipulations
00092         CMatrixOperations *pMatrixOps = new CMatrixOperations();
00093         
00094         // compute the starting cost
00095         double chiSqOld = nlls->ObjectiveFunction(parameters);
00096         cout << "Starting cost of " << chiSqOld << endl;
00097         double chiSqLambda = chiSqOld;
00098         double chiSqLambdaDiv = chiSqOld;
00099 
00100         int nIterations = 0;
00101         while( nIterations < m_iNIterations )
00102         {
00103                 // resetting the Marquardt parameter at the beginning seems to help
00104                 // with the number of evaluations sometimes
00105                 // m_dLambda = 1.0e-02;
00106 
00107                 // compute alpha,gradient
00108                 if(computeFlag){
00109                         ComputeDerivativeInformation(parameters,nlls);
00110                         // now rescale both the Hessian and the gradient
00111                         for(i = 0; i < nParameters; i++)
00112                         {
00113                                 m_pdGrad[i] = m_pdGrad[i]/sqrt(m_pdDiagonal[i]);
00114                                 for(int j = 0; j < nParameters; j++)
00115                                 {
00116                                         double sqrt1 = sqrt(m_pdDiagonal[i]);
00117                                         double sqrt2 = sqrt(m_pdDiagonal[j]);
00118                                         m_pdAlpha[i][j] = m_pdAlpha[i][j]/(sqrt1*sqrt2);
00119                                 }
00120                         }
00121                 }
00122                 
00123                 // solve for phi(lambda)
00124                 RescaleDiagonal(m_dLambda);
00125                 SVDSolveMarquardtSystem(parLambda);
00126                 RestoreDiagonal();
00127 
00128                 // solve for phi(lambda/nu)
00129                 RescaleDiagonal(m_dLambda/m_dNu);
00130                 SVDSolveMarquardtSystem(parLambdaDiv);
00131                 RestoreDiagonal();
00132                 
00133                 ObtainTrialParameters(parameters, parLambda, trialPOne);
00134                 chiSqLambda = nlls->ObjectiveFunction(trialPOne);
00135                 ObtainTrialParameters(parameters, parLambdaDiv, trialPTwo);
00136                 chiSqLambdaDiv = nlls->ObjectiveFunction(trialPTwo);
00137                 
00138                 if(chiSqLambdaDiv <= chiSqOld)  // Test I
00139                 {
00140                         cout << "Test I passed." << endl;
00141                         m_dLambda = m_dLambda/m_dNu;
00142                         // accept move, set recompute flag
00143                         computeFlag = true;
00144                         m_bChiSqFlag = ComputeChiSqTol(chiSqOld,chiSqLambdaDiv);
00145                         m_bGradFlag = ComputeGradTol();
00146                         m_bParFlag = ComputeParTol(parameters,parLambdaDiv);
00147                         AcceptParameters(parameters,trialPTwo,chiSqLambdaDiv);
00148                         chiSqOld = chiSqLambdaDiv;
00149                         cout << "Move accepted, new cost is " << chiSqLambdaDiv << endl;
00150                 }
00151                 else if(chiSqLambda <= chiSqOld) // Test II
00152                 {
00153                         cout << "Test II passed." << endl;
00154                         // Marquardt parameter is unchanged
00155                         // accept move, set recompute flag
00156                         computeFlag = true;
00157                         m_bChiSqFlag = ComputeChiSqTol(chiSqOld,chiSqLambda);
00158                         m_bGradFlag = ComputeGradTol();
00159                         m_bParFlag = ComputeParTol(parameters,parLambda);
00160                         AcceptParameters(parameters,trialPOne,chiSqLambda);
00161                         chiSqOld = chiSqLambda;
00162                         cout << "Move accepted, new cost is  " << chiSqLambda << endl;
00163                 }
00164                 else  // Test III
00165                 {
00166                         double lambdaMult = m_dLambda;
00167                         double chiSqLambdaMult = chiSqLambda;
00168 
00169                         double piOverFour = 0.78539816339744825;
00170 
00171                         while(chiSqLambdaMult > chiSqOld)
00172                         {
00173                                 double numerator = pMatrixOps->DotProduct(m_pdGrad,parLambda,nParameters);
00174                                 double denominator = (pMatrixOps->VectorL2Norm(m_pdGrad,nParameters))*(pMatrixOps->VectorL2Norm(parLambda,nParameters));
00175                                 double gamma = acos(numerator/denominator);
00176 
00177                                 if(gamma > piOverFour)
00178                                 {
00179                                         // increase lambda and try again
00180                                         lambdaMult = lambdaMult*m_dNu;
00181                                         
00182                                         // solve for phi(lambda/nu)
00183                                         RescaleDiagonal(lambdaMult);
00184                                         SVDSolveMarquardtSystem(parLambda);
00185                                         RestoreDiagonal();
00186                 
00187                                         ObtainTrialParameters(parameters, parLambda, trialPOne);
00188                                         chiSqLambdaMult = nlls->ObjectiveFunction(trialPOne);
00189                                 }
00190                                 else
00191                                 {
00192                                         while(chiSqLambdaMult > chiSqOld)
00193                                         {
00194                                                 // rescale the multiple of the parameters we're adding 
00195                                                 // until the cost decreases
00196                                                 for(i = 0; i < nParameters; i++)
00197                                                 {
00198                                                         parLambda[i] = 0.5*parLambda[i];
00199                                                 }
00200                                                 ObtainTrialParameters(parameters,parLambda,trialPOne);
00201                                                 chiSqLambdaMult = nlls->ObjectiveFunction(trialPOne);
00202                                         }
00203                                 }
00204 
00205                         }
00206                         cout << "Test III passed." << endl;
00207                         // accept move, rescale 
00208                         computeFlag = true;
00209                         m_bChiSqFlag = ComputeChiSqTol(chiSqOld,chiSqLambdaMult);
00210                         m_bGradFlag = ComputeGradTol();
00211                         m_bParFlag = ComputeParTol(parameters,parLambda);
00212                         AcceptParameters(parameters,trialPOne,chiSqLambdaMult);
00213                         chiSqOld = chiSqLambdaMult;
00214                         m_dLambda = lambdaMult;
00215                         cout << "Move accepted, new cost is " << chiSqLambdaMult << endl;
00216                 }
00217                 if(m_bChiSqFlag || m_bGradFlag || m_bParFlag)
00218                 {
00219                         if(m_bChiSqFlag)
00220                         {
00221                                 cout << "Convergence on Chi-Squared criterion." << endl;
00222                         }
00223                         if(m_bGradFlag)
00224                         {
00225                                 cout << "Convergence on ||g|| criterion." << endl;
00226                         }
00227                         if(m_bParFlag)
00228                         {
00229                                 cout << "Convergence on parameter criterion." << endl;
00230                         }
00231                         break;
00232                 }
00233 
00234                 nIterations++;
00235         }
00236         
00237 
00238         delete pMatrixOps;
00239         delete [] parLambda;
00240         delete [] parLambdaDiv;
00241         delete [] trialPOne;
00242         delete [] trialPTwo;
00243         delete [] m_pdGrad;
00244         delete [] m_pdDiagonal;
00245         delete [] m_pdAlpha[0];
00246         delete [] m_pdAlpha;
00247 
00248         return chiSqOld;
00249 }
00250 
00252 // Solves the Levenberg-Marquardt system alpha.deltaP = beta, using 
00253 // LAPACK functions for Cholesky decomposition and backsubstitution.
00254 // On return, deltaP contains the solution, and alpha and beta are 
00255 // not modified.
00257 
00258 
00259 void CScaleInvariantMarquardtMinimizer::SVDSolveMarquardtSystem(double *deltaP)
00260 {
00261         int i,j,l;
00262         // solve the system by SVD decomposition
00263         char JOBU = 'A';
00264         char JOBVT = 'A';
00265         int M = nParameters;
00266         int N = nParameters;
00267         
00268         // matrix A in column-major order
00269         double *A = new double[nParameters*nParameters];
00270         int counter = 0;
00271         for(j = 0; j < nParameters; j++)
00272         {
00273                 deltaP[j] = 0.0;
00274                 for(i = 0; i < nParameters; i++)
00275                 {
00276                         A[counter] = m_pdAlpha[i][j];
00277                         counter++;
00278                 }
00279         }
00280         
00281         double *D = new double[nParameters];
00282         double *E = new double[nParameters];
00283         double *TAUQ = new double[nParameters];
00284         double *TAUP = new double[nParameters];
00285 
00286         double *PT = new double[nParameters*nParameters];
00287         double *Q = new double[nParameters*nParameters];
00288         double *ATEMP = new double[nParameters*nParameters];
00289 
00290         int LDA = nParameters;
00291         double *S = new double[nParameters];
00292         int LDU = nParameters;
00293         int LDVT = nParameters;
00294         int LWORK = 5*M;
00295         double *WORK = new double[LWORK];
00296         int INFO = 0;
00297 
00298         // put the matrix into band diagonal form
00299         DGEBRD(&M,&N,A,&LDA,D,E,TAUQ,TAUP,WORK,&LWORK,&INFO);
00300         //cout << "Band diagonal form: INFO = " << INFO << endl;
00301         
00302         // now obtain Q and copy it into our array
00303         for(l = 0; l < nParameters*nParameters; l++)
00304         {
00305                 ATEMP[l] = A[l];
00306         }
00307         char VECT = 'Q';
00308         int K = M;
00309         DORGBR(&VECT,&M,&N,&K,ATEMP,&LDA,TAUQ,WORK,&LWORK,&INFO);
00310         //cout << "Q : INFO = " << INFO << endl;
00311         for(l = 0; l < nParameters*nParameters; l++)
00312         {
00313                 Q[l] = ATEMP[l];
00314         }
00315         // now obtain PT
00316         for(l = 0; l < nParameters*nParameters; l++)
00317         {
00318                 ATEMP[l] = A[l];
00319         }
00320         VECT = 'P';
00321         DORGBR(&VECT,&M,&N,&K,ATEMP,&LDA,TAUP,WORK,&LWORK,&INFO);
00322         //cout << "PT : INFO = " << INFO << endl;
00323         for(l = 0; l < nParameters*nParameters; l++)
00324         {
00325                 PT[l] = ATEMP[l];
00326         }
00327 
00328         // obtain the SVD decomposition of A, using the matrices found above
00329         char UPLO = 'U';
00330         int NCVT = nParameters;
00331         int NRU = nParameters;
00332         int NCC = 0;
00333         double *C = 0;
00334         int LDC = 1;
00335         DBDSQR(&UPLO,&N,&NCVT,&NRU,&NCC,D,E,PT,&LDVT,Q,&LDU,C,&LDC,WORK,&INFO);
00336         cout << "Matrix decomposed. Condition number of Hessian is " << D[0]/D[nParameters-1] << endl;
00337 
00338         // scroll through the singular values and find the ones whose inverses will be 
00339         // huge and set them to zero.  If the problem is underdetermined, we know there 
00340         // MUST be at least M - N singular values that are identically equal to zero 
00341         // (the last M - N in the list) even if numerical slop gives us small but nonzero
00342         // ones in the computed Hessian.  While these may always be so small as to be 
00343         // truncated, it's best to be safe.
00344 
00345         double cutoff = 1.0e-6*D[0];
00346         for(i = 0; i < nParameters; i++)
00347         {
00348                 if(D[i] < cutoff) {D[i] = 0;}
00349                 else {D[i] = 1/D[i];}
00350         }
00351 
00352         // here are the ones that have to be zero
00353         int diff = nParameters - m_iNResiduals;
00354         for(i = 0; i < diff; i++)
00355         {
00356                 D[(nParameters-1) - i] = 0.0;
00357         }
00358 
00359         // now do the multiplications to find the new parameters
00360         // first UT
00361         for(i = 0; i < nParameters; i++)
00362         {
00363                 for(int j = 0; j < nParameters; j++)
00364                 {
00365                         deltaP[i] += Q[i*nParameters + j]*m_pdGrad[j];
00366                 }
00367         }
00368         // now the inverse singular values
00369         for(i = 0; i < nParameters; i++)
00370         {
00371                 deltaP[i] = deltaP[i]*D[i];
00372         }
00373         // now VT
00374         for(i = 0; i < nParameters; i++)
00375         {
00376                 S[i] = 0.0;
00377                 for(int j = 0; j < nParameters; j++)
00378                 {
00379                         S[i] += PT[i*nParameters + j]*deltaP[j];
00380                 }
00381         }
00382         // copy back into deltaP with appropriate rescaling
00383         for(i = 0; i < nParameters; i++)
00384         {
00385                 deltaP[i] = S[i]/sqrt(m_pdDiagonal[i]);
00386         }
00387 
00388         delete [] A;
00389         delete [] ATEMP;
00390         delete [] TAUQ;
00391         delete [] TAUP;
00392         delete [] Q;
00393         delete [] PT;
00394         delete [] D;
00395         delete [] E;
00396         delete [] S;
00397         delete [] WORK;
00398 }

Generated on Mon Nov 3 09:38:14 2003 by doxygen1.2.14 written by Dimitri van Heesch, © 1997-2002