ConjugateGradientMinimizer.cpp Source File

00001 // ConjugateGradientMinimizer.cpp: implementation of the CConjugateGradientMinimizer class.
00002 //
00004 
00005 #include "ConjugateGradientMinimizer.h"
00006 
00008 // Construction/Destruction
00010 
00011 const double CConjugateGradientMinimizer::m_dGOLD = 1.618034;
00012 
00013 
00014 CConjugateGradientMinimizer::CConjugateGradientMinimizer(bool searchFlag, CParameterFilter *pFilter, int nIterations, double funcAccuracy, int nRestarts, double gradTol, double funcTol)
00015 {
00016         m_pFilter = pFilter;
00017         m_iNIterations = nIterations;
00018         m_iNRestarts = nRestarts;
00019         m_dFuncAccuracy = funcAccuracy;
00020         m_dGradTol = gradTol;
00021         m_dFuncTol = funcTol;
00022         m_pMO = new CMatrixOperations();
00023         m_dMinTol = m_dFuncAccuracy;
00024         m_bSearchFlag = searchFlag;
00025 }
00026 
00027 CConjugateGradientMinimizer::~CConjugateGradientMinimizer()
00028 {
00029         delete m_pMO;
00030 }
00031 
00032 double CConjugateGradientMinimizer::Minimize(double *parameters, Minimizable *minimizable)
00033 {
00034         // loop variables
00035         int i,j;
00036         double fbest = 0.0;
00037         double GammaNum = 0.0;
00038         double GammaDen = 0.0;
00039         double Gamma = 0.0;
00040         double xA = 0.0;
00041         double xB = 0.0;
00042         double xC = 0.0;
00043         double xM = 0.0;
00044         double fA = 0.0;
00045         double fB = 0.0;
00046         double fC = 0.0;
00047         double fM = 0.0;
00048         double supNorm = 0.0;
00049         // set local minimizable pointer
00050         m_pMinimizable = minimizable;
00051         // set number of parameters
00052         nParameters = m_pMinimizable->GetNParameters();
00053         // allocate
00054         // all parameters/gradients/etc. are stored in their transformed (filtered) form
00055         m_pdCurrentParameters = new double[nParameters];
00056         m_pdSearchDirection = new double[nParameters];
00057         m_pdGradient = new double[nParameters];
00058         double *oldGrad = new double[nParameters];
00059         m_pdForceVector = new double[nParameters];
00060         
00061         // copy filtered parameters into current p's
00062         m_pFilter->ForwardTransformation(parameters,nParameters);
00063         m_pMO->ElementCopy(parameters,m_pdCurrentParameters,nParameters);
00064         m_pFilter->BackwardTransformation(parameters,nParameters);
00065 
00066         
00067         // now get the gradient at the initial point
00068         this->ComputeGradient(m_pdCurrentParameters);
00069         
00070         // search direction = -g if none is provided
00071         if(m_bSearchFlag == false)
00072         {
00073                 // set the initial search direction equal to -g
00074                 m_pMO->ElementCopy(m_pdGradient,m_pdSearchDirection,nParameters);
00075                 m_pMO->RescaleVector(m_pdSearchDirection,-1.0,nParameters);
00076         }
00077         else // search direction is read from a file
00078         {
00079                 ParameterReader *pReader = new ParameterReader("search.par");
00080                 for(j = 0; j < nParameters; j++)
00081                 {
00082                         m_pdSearchDirection[j] = pReader->ReadParameter();
00083                 }
00084                 delete pReader;
00085         }
00086         
00087         xA = 0.0;
00088         // use 1/(sup norm) for the first iteration
00089         supNorm = m_pMO->VectorSupNorm(m_pdSearchDirection,nParameters);
00090         xB = 1.0/supNorm;
00091         
00092         int restartCount = 0;
00093         bool restartFlag = false;
00094         // now iterate
00095         for(i = 0; i < m_iNIterations; i++)
00096         {
00097                 // evaluate f at p and p + 1.0*s
00098                 fA = this->LineEvaluate(xA);
00099                 cout << "Cost at beginning of iteration " << i << " is " << fA << endl;
00100 
00101                 fB = this->LineEvaluate(xB);
00102                 cout << "Cost in conjugate direction is " << fB << endl;
00103 
00104                 // bracket the minimum
00105                 this->BracketMinimum(&xA,&fA,&xB,&fB,&xC,&fC);
00106                 // now line minimize in the bracket to find the minimum
00107                 this->BrentLine(xA,xB,xC,&xM,&fM);
00108                 // set the current set of parameters equal to p + xM*s
00109                 m_pMO->TwoVectorLC(1.0,m_pdCurrentParameters,xM,m_pdSearchDirection,m_pdCurrentParameters,nParameters);
00110                 fbest = fM;
00111 
00112                 // write the accepted parameters to a file so we can save
00113                 // incremental progress
00114                 fstream *pftemp = new fstream("cgtemp.par",ios::out);
00115                 *pftemp << "##################################################################" << endl;
00116                 *pftemp << "# BEST PARAMETERS" << endl;
00117                 *pftemp << "# COST : " << fbest << endl;
00118                 *pftemp << "##################################################################" << endl;
00119                 int pcount;
00120                 for(pcount = 0; pcount < nParameters; pcount++)
00121                 {
00122                         *pftemp << setprecision(10) << m_pFilter->OperatorInverse(m_pdCurrentParameters[pcount]) << endl;
00123                 }
00124                 delete pftemp;
00125                 // END WRITE
00126 
00127                 // save the old gradient and compute the new gradient
00128                 m_pMO->ElementCopy(m_pdGradient,oldGrad,nParameters);
00129                 this->ComputeGradient(m_pdCurrentParameters);
00130                 // now compute gamma
00131                 GammaNum = m_pMO->DotProduct(m_pdGradient,m_pdGradient,nParameters);
00132                 // if the magnitude of the new gradient is small, terminate
00133                 if(this->CheckGradTol(sqrt(GammaNum)) ) {break;}
00134                 if(this->CheckFuncTol(fA - fM)) 
00135                 {
00136                         // convergence suspected. do a restart
00137                         if(restartCount < m_iNRestarts)
00138                         {
00139                                 cout << "Restart number " << restartCount << endl;
00140                                 // this combined with later statements restarts along the
00141                                 // gradient direction
00142                                 GammaNum = 0.0;
00143                                 GammaDen = 1.0;
00144                                 restartCount++;
00145                                 restartFlag = true;
00146                         }
00147                         else
00148                         {
00149                                 // out of restarts
00150                                 break;
00151                         }
00152                 }
00153                 else
00154                 {
00155                         // continue computing the new search direction
00156                         GammaNum -= m_pMO->DotProduct(oldGrad,m_pdGradient,nParameters);
00157                         GammaDen = m_pMO->DotProduct(oldGrad,oldGrad,nParameters);
00158                 }       
00159                 Gamma = GammaNum/GammaDen;
00160                 // set the new search direction
00161                 m_pMO->TwoVectorLC(-1.0,m_pdGradient,Gamma,m_pdSearchDirection,m_pdSearchDirection,nParameters);
00162                 
00163                 // write the new search direction to a file
00164                 fstream *pfsrch = new fstream("searchtemp.par",ios::out);
00165                 *pfsrch << "##################################################################" << endl;
00166                 *pfsrch << "# SEARCH DIRECTION" << endl;
00167                 *pfsrch << "##################################################################" << endl;
00168                 for(pcount = 0; pcount < nParameters; pcount++)
00169                 {
00170                         *pfsrch << setprecision(10) << m_pdSearchDirection[pcount] << endl;
00171                 }
00172                 delete pfsrch;
00173                 
00174                 xA = 0.0;
00175                 // for the step in the next iteration, use either the sup norm or the distance to 
00176                 // the previous minimium, whichever is smaller
00177                 supNorm = m_pMO->VectorSupNorm(m_pdSearchDirection,nParameters);
00178                 if( (fabs(xM) < 1.0/supNorm) && (!restartFlag) )
00179                 {
00180                         xB = fabs(xM);
00181                         cout << "Distance to last minimum used to set scale of next step." << endl;
00182                 }
00183                 else
00184                 {
00185                         xB = 1.0/supNorm;
00186                         cout << "Sup norm used to set scale of next step." << endl;
00187                         restartFlag = false;
00188                 }
00189         }
00190 
00191         // return best parameters (which are the current ones) in parameter array,
00192         // remembering to transform
00193         m_pFilter->BackwardTransformation(m_pdCurrentParameters,nParameters);
00194         m_pMO->ElementCopy(m_pdCurrentParameters,parameters,nParameters);
00195 
00196         // cleanup
00197         delete [] m_pdCurrentParameters;
00198         delete [] m_pdSearchDirection;
00199         delete [] m_pdGradient;
00200         delete [] oldGrad;
00201 
00202         return fbest;
00203 }
00204 
00205 void CConjugateGradientMinimizer::ComputeGradient(double *parameters)
00206 {
00207         int i;
00208         double saveparameter = 0.0;
00209         double fPlus = 0.0;
00210         double fMinus = 0.0;
00211         
00212         // loop over parameters
00213         for(i = 0; i < nParameters; i++)
00214         {
00215                 // parameters are already transformed, so just save the old value
00216                 saveparameter = parameters[i];
00217 
00218                 double h = pow(m_dFuncAccuracy,0.333333)*fabs(parameters[i]);
00219                 // if statement generates warning if step is really small
00220                 if(h < DBL_EPSILON)
00221                 {
00222                         cout << "WARNING:Parmeter step small in ConjugateGradientMinimizer::ComputeGradient!" << endl;
00223                         if(fabs(parameters[i]) < DBL_MIN)
00224                         {
00225                                 cout << "WARNING:Zero parameter detected, truncating . . ." << endl;
00226                                 // pretend like the parameter was really small
00227                                 double changedpar = m_pFilter->OperatorInverse(parameters[i]) + DBL_EPSILON;
00228                                 h = pow(m_dFuncAccuracy,0.333333)*fabs(m_pFilter->Operator(changedpar));
00229                         }
00230                         
00231                 }
00232                                 
00233                 // forward step
00234                 parameters[i] += h;
00235                 m_pFilter->BackwardTransformation(parameters,nParameters);
00236                 fPlus = m_pMinimizable->ObjectiveFunction(parameters);
00237                 m_pFilter->ForwardTransformation(parameters,nParameters);
00238                 parameters[i] = saveparameter;
00239                 // backward step
00240                 parameters[i] -= h;
00241                 m_pFilter->BackwardTransformation(parameters,nParameters);
00242                 fMinus = m_pMinimizable->ObjectiveFunction(parameters);
00243                 m_pFilter->ForwardTransformation(parameters,nParameters);
00244 
00245                 // restore original parameter           
00246                 parameters[i] = saveparameter;
00247 
00248                 // fill in the appropriate piece of the gradient
00249                 m_pdGradient[i] = (fPlus - fMinus)/(2*h);
00250                 m_pdForceVector[i] = -1.0*m_pdGradient[i];
00251         }
00252 
00253     //print vector of forces
00254         Notify();
00255 
00256         cout << "Gradient calculated . . . " << endl;
00257 
00258         return;
00259 }
00260 
00261 void CConjugateGradientMinimizer::BracketMinimum(double *xA, double *fA, double *xB, double *fB, double *xC, double *fC)
00262 {
00263         double xU,fU,r,q,xUlim;
00264         double glim = 100.0;
00265 
00266         // if f(B) > f(A), reverse the roles of A and B
00267         if(*fB > *fA)
00268         {
00269                 double temp;
00270                 temp = *xA;
00271                 *xA = *xB;
00272                 *xB = temp;
00273                 temp = *fA;
00274                 *fA = *fB;
00275                 *fB = temp;
00276         }
00277         // initial guess
00278         *xC = *xB + m_dGOLD*(*xB - *xA);
00279         *fC = this->LineEvaluate(*xC);
00280         // now loop
00281         while(*fB > *fC)
00282         {
00283                 r = (*xB - *xA)*(*fB - *fC);
00284                 q = (*xB - *xC)*(*fB - *fA);
00285                 xU = *xB - ((*xB - *xC)*q - (*xB - *xA)*r)/(2.0*(q-r));
00286                 // largest value that we're willing to attempt
00287                 xUlim = *xB + glim*(*xC - *xB);
00288                 
00289                 if((*xB - xU)*(xU - *xC) > 0.0 )  // parabolic u is in (b,c)
00290                 {
00291                         // get f(U)
00292                         fU = this->LineEvaluate(xU);
00293                         if(fU < *fC)  // got a min. between b and c
00294                         {
00295                                 *xA = *xB;
00296                                 *xB = xU;
00297                                 *fA = *fB;
00298                                 *fB = fU;
00299                                 return;
00300                         }
00301                         else if(fU > *fC)       // got one between a and u
00302                         {
00303                                 *xC = xU;
00304                                 *fC = fU;
00305                                 return;
00306                         }
00307 
00308                         // parabolic fit was useless, take another step forward
00309                         xU = *xC + m_dGOLD*(*xB - *xC);
00310                         fU = this->LineEvaluate(xU);
00311                 }
00312                 else if((*xC - xU)*(xU - xUlim) > 0.0) // parabolic u is in (c,ulim)
00313                 {
00314                         fU = this->LineEvaluate(xU);
00315                         if(fU < *fC)
00316                         {
00317                                 *xB = *xC;
00318                                 *xC = xU;
00319                                 xU = *xC + m_dGOLD*(*xC - *xB);
00320                                 *fB = *fC;
00321                                 *fC = fU;
00322                                 fU = this->LineEvaluate(xU);
00323                         }
00324                 }
00325                 else if((xU - xUlim)*(xUlim - *xC) >= 0.0 ) // limit parabolic u to its max
00326                 {
00327                         xU = xUlim;
00328                         fU = this->LineEvaluate(xU);
00329                 }
00330                 else    // reject parabolic u entirely, use default magnification
00331                 {
00332                         xU = *xC + m_dGOLD*(*xC - *xB);
00333                         fU = this->LineEvaluate(xU);
00334                 }
00335                 // dump the oldest point and continue
00336                 *xA = *xB;
00337                 *xB = *xC;
00338                 *xC = xU;
00339                 *fA = *fB;
00340                 *fB = *fC;
00341                 *fC = fU;
00342         }       // end while loop
00343 }
00344 
00345 void CConjugateGradientMinimizer::BrentLine(double xA, double xB, double xC, double *xM, double *fM)
00346 {
00347         int i;
00348         double u,x,w,v,xm;
00349         double fu,fx,fw,fv;
00350         double a,b;
00351         double p,q,r,d,etemp;
00352         double tol1,tol2;
00353         double ZEPS = 1.0e-10;
00354         double RATIO = 1.0 - (1.0/m_dGOLD);
00355 
00356         double e = 0.0;
00357         
00358         // a and b need to be in ascending order, but the input abcissas need 
00359         // not be
00360         if(xA < xC)
00361         {
00362                 a = xA;
00363                 b = xC;
00364         }
00365         else
00366         {
00367                 a = xC;
00368                 b = xA;
00369         }
00370         x = xB;
00371         w = xB;
00372         v = xB;
00373         fx = this->LineEvaluate(x);
00374         fw = fx;
00375         fv = fx;
00376 
00377         for(i = 0; i < 100; i++)
00378         {
00379                 xm = 0.5*(a + b);
00380                 tol1 = m_dMinTol*fabs(x) + ZEPS;
00381                 tol2 = 2.0*tol1;
00382                 
00383                 if(fabs(x - xm) <= (tol2 - 0.5*(b - a)) ) // test for termination
00384                 {
00385                         *xM = x;
00386                         *fM = fx;
00387                         return;
00388                 }
00389                 if(fabs(e) > tol1) // construct a trial parabolic fit
00390                 {
00391                         r = (x - w)*(fx - fv);
00392                         q = (x - v)*(fx - fw);
00393                         p = (x - v)*q - (x - w)*r;
00394                         q = 2.0*(q - r);
00395                         if(q > 0.0) {p = -1.0*p;}
00396                         q = fabs(q);
00397                         etemp = e;
00398                         e = d;
00399                         // determine the acceptability of the parabolic fit
00400                         if( (fabs(p) >= fabs(0.5*q*etemp)) || (p <= q*(a-x)) || (p >= q*(b-x)) )
00401                         {
00402                                 if(x >= xm)
00403                                 {
00404                                         e = a - x;
00405                                 }
00406                                 else
00407                                 {
00408                                         e = b - x;
00409                                 }
00410                                 d = RATIO*e;
00411                         }
00412                         else  // take golden section step into the larger of the two segments
00413                         {
00414                                 d = p/q;
00415                                 u = x + d;
00416                                 if( ((u - a) < tol2) || ((b - u) < tol2) )
00417                                 {
00418                                         d = SgnProduct(tol1,xm - x);
00419                                 }
00420                         }
00421                 } // end if
00422                 else
00423                 {
00424                         if(x >= xm)
00425                         {
00426                                 e = a - x;
00427                         }
00428                         else
00429                         {
00430                                 e = b - x;
00431                         }
00432                         d = RATIO*e;
00433                 }
00434                 
00435                 if(fabs(d) >= tol1)
00436                 {
00437                         u = x + d;
00438                 }
00439                 else
00440                 {
00441                         u = x + SgnProduct(tol1,d);
00442                 }
00443                 fu = this->LineEvaluate(u);
00444 
00445                 // now decide what to do with u
00446                 if(fu <= fx)
00447                 {
00448                         if(u >= x)
00449                         {
00450                                 a = x;
00451                         }
00452                         else
00453                         {
00454                                 b = x;
00455                         }
00456                         v = w;
00457                         w = x;
00458                         x = u;
00459                         fv = fw;
00460                         fw = fx;
00461                         fx = fu;
00462                 }
00463                 else
00464                 {
00465                         if(u < x)
00466                         {
00467                                 a = u;
00468                         }
00469                         else
00470                         {
00471                                 b = u;
00472                         }
00473 
00474                         if((fu <= fw) || (w == x))
00475                         {
00476                                 v = w;
00477                                 w = u;
00478                                 fv = fw;
00479                                 fw = fu;
00480                         }
00481                         else if((fu <= fv) || (v == x) || (v == w))
00482                         {
00483                                 v = u;
00484                                 fv = fu;
00485                         }
00486                 }
00487         
00488         
00489         } // end for loop
00490 
00491         cout << "WARNING : Number of iterations in BrentLine() exceeded." << endl;
00492 
00493         return;
00494 }
00495 
00496 bool CConjugateGradientMinimizer::CheckGradTol(double norm)
00497 {
00498         if(norm < m_dGradTol)
00499         {
00500                 cout << "||g|| < eps_g, terminating . . . " << endl;
00501                 return true;
00502         }
00503         else
00504         {
00505                 return false;
00506         }
00507 }
00508 
00509 bool CConjugateGradientMinimizer::CheckFuncTol(double diff)
00510 {
00511         if(diff < m_dFuncTol)
00512         {
00513                 cout << "f_(k) - f_(k+1) < eps_f, convergence suspected . . . " << endl;
00514                 return true;
00515         }
00516         else
00517         {
00518                 return false;
00519         }
00520 }
00521 
00522 double CConjugateGradientMinimizer::LineEvaluate(double alpha)
00523 {
00524         double funcVal = 0.0;
00525         double *z = new double[nParameters];
00526         // form the linear combination
00527         m_pMO->TwoVectorLC(1.0,m_pdCurrentParameters,alpha,m_pdSearchDirection,z,nParameters);
00528         // back transform
00529         m_pFilter->BackwardTransformation(z,nParameters);
00530         // evaluate
00531         funcVal = m_pMinimizable->ObjectiveFunction(z);
00532         // cleanup
00533         delete [] z;
00534 
00535         return funcVal;
00536 }
00537 
00538 double CConjugateGradientMinimizer::SgnProduct(double a, double b)
00539 {
00540         int sign;
00541         if(b >= 0.0) {sign = 1;}
00542         else {sign = -1;}
00543         return sign*fabs(a);
00544 }