Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members   File Members  

RobustLevenbergMarquardtMinimizer.cpp

Go to the documentation of this file.
00001 // RobustLevenbergMarquardtMinimizer.cpp: implementation of the CRobustLevenbergMarquardtMinimizer class.
00002 //
00004 
00005 #include "RobustLevenbergMarquardtMinimizer.h"
00006 
00008 // Construction/Destruction
00010 
00011 CRobustLevenbergMarquardtMinimizer::CRobustLevenbergMarquardtMinimizer()
00012 {
00013         m_dMarquardt = 0.00001;
00014         m_dChiSqTol = 0.001;
00015         m_dGradTol = 0.001;
00016         m_iNIterations = 1000;
00017 }
00018 
00019 CRobustLevenbergMarquardtMinimizer::CRobustLevenbergMarquardtMinimizer(CParameterFilter *pFilter, double marquardt, double chiTol, double gradTol, int nIterations)
00020 {
00021         m_dMarquardt = marquardt;
00022         m_dChiSqTol = chiTol;
00023         m_dGradTol = gradTol;
00024         m_iNIterations = nIterations;
00025         m_pFilter = pFilter;
00026 }
00027 
00028 CRobustLevenbergMarquardtMinimizer::~CRobustLevenbergMarquardtMinimizer()
00029 {
00030 
00031 }
00032 
00033 double CRobustLevenbergMarquardtMinimizer::Minimize(double *parameters, Minimizable *minimizable)
00034 {
00035         int i;
00036         // set to false if the derivatives don't need to be recomputed
00037         bool computeFlag = true;
00038         m_bBreakFlag = false;
00039 
00040         // downcast the minimizable, and throw an exception if it is not an NLLSMinimizable 
00041         // (LevenbergMarquardt should only be used on a sum of squares objective function)
00042         NLLSMinimizable *nlls = dynamic_cast<NLLSMinimizable *>(minimizable);
00043         if(0 == nlls)
00044         {
00045                 throw std::runtime_error("Minimizable is not a sum of squares!");
00046         }
00047         
00048         // get the number of parameters and allocate 
00049         nParameters = nlls->GetNParameters();
00050         m_iNResiduals = nlls->GetNResiduals();
00051         
00052         m_pdBeta = new double[nParameters];
00053         m_pdCurrentResiduals = new double[m_iNResiduals];
00054         m_pdAlpha = new double *[nParameters];
00055         m_pdAlpha[0] = new double[nParameters*nParameters];
00056         m_pdJacobian = new double *[m_iNResiduals];
00057         m_pdJacobian[0] = new double[m_iNResiduals*nParameters];
00058         m_pdScalingMatrix = new double[nParameters];
00059         for(int nPar = 1; nPar < nParameters; nPar++)
00060         {
00061                 m_pdAlpha[nPar] = &(m_pdAlpha[0][nPar*nParameters]);
00062         }
00063         for(int nR = 1; nR < m_iNResiduals; nR++)
00064         {
00065                 m_pdJacobian[nR] = &(m_pdJacobian[0][nR*nParameters]);
00066         }
00067         
00068         // non-member data
00069         double *deltaP = new double[nParameters];
00070         double *trialP = new double[nParameters];
00071         
00072         // compute the starting cost
00073         double chiSqOld = nlls->ObjectiveFunction(parameters);
00074         cout << "Starting cost of " << chiSqOld << endl;
00075         double chiSqNew = chiSqOld;
00076 
00077         int nIterations = 0;
00078         while( (nIterations < m_iNIterations) && (!m_bBreakFlag))
00079         {
00080                 // compute alpha,beta
00081                 if(computeFlag){ComputeDerivativeInformation(parameters,nlls);}
00082                 
00083                 // rescale the diagonal elements of the hessian
00084                 for(i = 0; i < nParameters; i++) 
00085                 { 
00086                         m_pdAlpha[i][i] = (1.0 + m_dMarquardt)*m_pdAlpha[i][i];
00087                 }
00088         
00089                 QRSolveMarquardtSystem(deltaP);
00090 
00091                 // undo hessian scaling for next pass (the Marquardt parameter
00092                 // changes regardless)
00093                 for(i = 0; i < nParameters; i++) 
00094                 { 
00095                         m_pdAlpha[i][i] = m_pdAlpha[i][i]/(1.0 + m_dMarquardt);
00096                 }
00097                 
00098                 // now evaluate the cost for the new set of parameters
00099                 m_pFilter->ForwardTransformation(parameters,nParameters);
00100                 for(int j = 0; j < nParameters; j++)
00101                 {
00102                         trialP[j] = parameters[j] + deltaP[j];
00103                 }
00104                 m_pFilter->BackwardTransformation(parameters,nParameters);
00105                 m_pFilter->BackwardTransformation(trialP,nParameters);
00106 
00107                 
00108                 chiSqNew = nlls->ObjectiveFunction(trialP);
00109                 
00110                 
00111                 // Sometimes the trial parameters are too big and we get -1*INF for 
00112                 // the objective function; for a sum-of-squares objective function,
00113                 // we know it is intrinsically positive so we should not accept 
00114                 // that move and hence those parameters.  The "or" statement in the 
00115                 // next line is an attempt to recover from these errors.
00116                 
00117                 if((chiSqNew >= chiSqOld) || (chiSqNew < 0.0))
00118                 {
00119                         m_dMarquardt = 10*m_dMarquardt;
00120                         computeFlag = false;
00121                 }
00122                 else
00123                 {       
00124                         cout << "Move accepted, new cost is " << chiSqNew << endl;
00125                         double fracTol = (chiSqNew - chiSqOld)/chiSqOld;
00126                         // accept new parameters
00127                         for(i = 0; i < nParameters; i++)
00128                         {
00129                                 parameters[i] = trialP[i];
00130                         }                                               
00131                         chiSqOld = chiSqNew;
00132 
00133                         // write the accepted parameters to a file, because numerically 
00134                         // computing the hessian is extremely expensive
00135                         fstream *pftemp = new fstream("martemp.par",ios::out);
00136                         *pftemp << "##################################################################" << endl;
00137                         *pftemp << "# BEST PARAMETERS" << endl;
00138                         *pftemp << "# COST : " << chiSqNew << endl;
00139                         *pftemp << "##################################################################" << endl;
00140                         for(i = 0; i < nParameters; i++)
00141                         {
00142                                 *pftemp << parameters[i] << endl;
00143                         }
00144                         delete pftemp;
00145                         // END WRITE
00146                         
00147                         // update marquardt parameter
00148                         m_dMarquardt = 0.1*m_dMarquardt;
00149                         computeFlag = true;     
00150 
00151                         // quit the minimization if the fractional change in cost is small
00152                         if(fabs(fracTol) < m_dChiSqTol)
00153                         {
00154                                 cout << "Convergence on Chi-Squared criterion." << endl;
00155                                 m_bBreakFlag = true; 
00156                         }
00157                 }
00158 
00159                 nIterations++;
00160         }
00161         
00162 
00163         delete [] deltaP;
00164         delete [] trialP;
00165         delete [] m_pdBeta;
00166         delete [] m_pdAlpha[0];
00167         delete [] m_pdAlpha;
00168         delete [] m_pdJacobian;
00169         delete [] m_pdJacobian[0];
00170         delete [] m_pdScalingMatrix;
00171         delete [] m_pdCurrentResiduals;
00172 
00173         return chiSqOld;
00174 }
00175 
00176 void CRobustLevenbergMarquardtMinimizer::ComputeDerivativeInformation(double *parameters, NLLSMinimizable *nlls)
00177 {
00178         int i;
00179         double transparameter = 0.0;
00180         double saveparameter = 0.0;
00181         double gradNorm = 0.0;
00182         double *fxPlus = new double[m_iNResiduals];
00183         double *fx = new double[m_iNResiduals];
00184 
00185         double chiSq = nlls->ObjectiveFunction(parameters);
00186         for(int k = 0; k < m_iNResiduals; k++)
00187         {
00188                 fx[k] = (nlls->GetResiduals())[k];
00189         }
00190 
00191         // loop over parameters
00192         for(i = 0; i < nParameters; i++)
00193         {
00194                 // transform the parameter and save its old value
00195                 saveparameter = parameters[i];
00196                 transparameter = m_pFilter->Operator(parameters[i]);
00197 
00198                 // if the parameter happens to equal 0.0, the stepsize will then be 
00199                 // zero and give NaN upon division, so we set it to 1.0e-08
00200                 double h = 1.0e-08;
00201                 if(transparameter > 0.0)
00202                 {
00203                         h = pow(DBL_EPSILON,0.333333)*transparameter;
00204                 }
00205                                 
00206                 // forward step
00207                 parameters[i] = m_pFilter->OperatorInverse(transparameter + h);
00208                 double chiSqPlus = nlls->ObjectiveFunction(parameters);
00209                 for(int k = 0; k < m_iNResiduals; k++)
00210                 {
00211                         fxPlus[k] = (nlls->GetResiduals())[k];
00212                 }
00213 
00214                 // compute scaled gradient element
00215                 m_pdBeta[i] = -0.5*((chiSqPlus - chiSq)/h);
00216                 gradNorm += m_pdBeta[i]*m_pdBeta[i];
00217                 
00218                 parameters[i] = saveparameter;
00219 
00220                 // fill in jacobian
00221                 for(int j = 0; j < m_iNResiduals; j++)
00222                 {
00223                         double delta = (fxPlus[j] - fx[j])/h;
00224                         m_pdJacobian[j][i] = -1*delta;
00225                 }
00226         }
00227         
00228         // compute gradient norm and check it
00229         gradNorm = sqrt(gradNorm);
00230         if(gradNorm < m_dGradTol) 
00231         {
00232                 cout << "Convergence on ||grad|| criterion." << endl;
00233                 m_bBreakFlag = true;
00234         }
00235 
00236         // compute the scaled hessian (it is symmetric)
00237         for(i = 0; i < nParameters; i++)
00238         {
00239                 for(int k = i; k < nParameters; k++)
00240                 {
00241                         m_pdAlpha[i][k] = 0.0;
00242                         for(int j = 0; j < m_iNResiduals; j++)
00243                         {
00244                                 m_pdAlpha[i][k] += m_pdJacobian[j][i]*m_pdJacobian[j][k];
00245                         }
00246                         m_pdAlpha[k][i] = m_pdAlpha[i][k];
00247                 }
00248         }
00249 
00250         delete [] fxPlus;
00251         delete [] fx;
00252 
00253         cout << "Hessian calculated . . ." << endl;
00254 
00255         return;
00256 }
00257 
00258 void CRobustLevenbergMarquardtMinimizer::QRSolveMarquardtSystem(double *deltaP)
00259 {
00260         int M = m_iNResiduals;
00261         int N = nParameters;
00262         int minMN = __min(M,N);
00263         int maxMN = __max(M,N);
00264         int INFO = 0;
00265         int LWORK = 4*N;
00266         int LDA = M;
00267         double *A = new double[M*N];
00268         double *WORK = new double[LWORK];
00269         double *TAU = new double[minMN];
00270         int *JPVT = new int[N];
00271 
00272         // copy the jacobian into the temp array, in column-major order
00273         int counter = 0;
00274         for(int j = 0; j < N; j++)
00275         {
00276                 for(int i = 0; i < M; i++)
00277                 {
00278                         A[counter] = m_pdJacobian[i][j];
00279                         counter++;
00280                 }
00281         }
00282 
00283         // decomposition and solution is done slightly differently depending upon
00284         // the shape of J
00285         if( M < N )
00286         {
00287                 // XXX debug
00288                 cout << "Using decomposition procedure for M < N . . . " << endl;
00289                 // QR decompose J
00290                 DGEQPF(&M, &N, A, &LDA, JPVT, TAU, WORK, &INFO);
00291                 if(INFO != 0)
00292                 {
00293                         cout << "ERROR : INFO = " << INFO << " in DGEQPF (JP = QR)" << endl;
00294                 }
00295                 exit(1);
00296         }
00297         else
00298         {
00299 
00300         }
00301 }

Generated on Mon Nov 3 09:38:13 2003 by doxygen1.2.14 written by Dimitri van Heesch, © 1997-2002