Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members   File Members  

TranslationModelMinimizable.cpp

Go to the documentation of this file.
00001 #include "TranslationModelMinimizable.h"
00002 #include <iostream>
00003 #include <fstream>
00004 #include <math.h>
00005 
00006 const double TranslationModelMinimizable::TINY = 1.0e-16;
00007 
00008 const int TranslationModelMinimizable::MAX_RNA_LENGTH = 4000;
00009 
00010 const int TranslationModelMinimizable::MAX_RNA_MOLS = 2000;
00011 
00012 const int TranslationModelMinimizable::MAX_STRING_LENGTH = 100;
00013 
00014 TranslationModelMinimizable::TranslationModelMinimizable(LocateInputFileDirectory *locate, const char* infilename, int ribosome_length)
00015 {
00016   rib_size = ribosome_length;
00017 
00018   // open file of genes and RNA and protein data
00019         ifstream input;
00020         input.open(infilename);
00021 
00022 //  ifstream input(infilename,ios::nocreate,filebuf::sh_read);
00023   if (!input)
00024   {
00025           std::cout << "1 Error opening file " << infilename << std::endl;
00026     return;
00027   }
00028 
00029   input >> num_genes;  // number of genes to be used
00030   input >> num_data_sets;  // number of sets of fold change data
00031                            // (1 less than number of expt conditions)
00032 
00033   fm = new double*[num_data_sets];
00034   fp = new double*[num_data_sets];
00035   fmerr = new double*[num_data_sets];
00036   fperr = new double*[num_data_sets];
00037 
00038   for (int i=0; i<num_data_sets; i++)
00039   {
00040     fm[i] = new double[num_genes];
00041     fp[i] = new double[num_genes];
00042     fmerr[i] = new double[num_genes];
00043     fperr[i] = new double[num_genes];
00044   }
00045 
00046   geneticcode = new GeneticCode;
00047 
00048   char** seqfilename;
00049   seqfilename = new char*[MAX_RNA_MOLS]; // hold filenames of seq. files
00050 
00051   // get experimental data from file
00052   for (int i=0; i<num_genes; i++)
00053   {
00054     seqfilename[i] = new char[MAX_STRING_LENGTH];
00055     input >> seqfilename[i];
00056 
00057     for (int j=0; j<num_data_sets; j++)
00058     {
00059       input >> fm[j][i];
00060       input >> fmerr[j][i];
00061     }
00062     for (int j=0; j<num_data_sets; j++)
00063     {
00064       input >> fp[j][i];
00065       input >> fperr[j][i];
00066     }
00067   }
00068 
00069   RNAseq = new Codon*[num_genes];
00070 
00071   // for now, set all efficiencies to 1
00072   // These will correspond to enum Codon
00073   efficiency = new double[geneticcode->GetNumCodons()];
00074   for (int i=0; i<geneticcode->GetNumCodons(); i++)
00075     efficiency[i]=1;
00076 
00077   Codon* temp_seq_array;
00078   temp_seq_array = new Codon[MAX_RNA_LENGTH+1];
00079 
00080   length = new int[num_genes];
00081 
00082   // begin opening sequence files and reading
00083   for (int i=0; i<num_genes; i++)
00084   {
00085     // open a sequence file
00086     ifstream inputseq;
00087         char fileName[1000];
00088 
00089         sprintf(fileName, "%s%s", locate->Get("Sequences/"), seqfilename[i]);
00090         inputseq.open(fileName);
00091 
00092 // cout << "opened inputseq file " << seqfilename[i] << endl;
00093 
00094     if (!inputseq)
00095     {
00096                 std::cout << "2 Error opening file " << seqfilename[i] << std::endl;
00097       return;
00098     }
00099 
00100     eatwhite(inputseq);
00101 
00102     // RNA sequence to a temporary array
00103     int RNAlength=0;  // length of RNA being read in
00104     char codonstring[6];
00105     Codon tempcodon;
00106     Anticodon temptRNA = uuu;
00107 
00108     while (temptRNA!=stop)
00109     {
00110       inputseq.get(codonstring, 4);
00111       tempcodon = geneticcode->read_seq(codonstring);
00112       temp_seq_array[RNAlength]=tempcodon;
00113 
00114       temptRNA = geneticcode->get_anticodon(tempcodon);
00115 
00116       RNAlength++;
00117     }
00118 
00119     // transfer seqs to appropriate sequence array
00120     length[i]=RNAlength;
00121 // cout << seqfilename[i] << '\t' << length[i] << endl;
00122     RNAseq[i] = new Codon[length[i]];
00123 
00124     for (int j=0; j<RNAlength; j++)
00125       RNAseq[i][j]=temp_seq_array[j];
00126   }
00127   // end opening sequence files and reading
00128 
00129   delete [] temp_seq_array;
00130   for (int i=0; i<num_genes; i++)
00131     delete [] seqfilename[i];
00132   delete [] seqfilename;  
00133 
00134   // compute number of residuals
00135   int num_resid=num_data_sets*num_genes;
00136   SetNResiduals(num_resid);
00137 
00138   // compute number of fit parameters
00139   num_params=0;
00140   num_params+=num_data_sets;  // ribosome concs., where Rn:=1
00141   num_params+=(geneticcode->GetNumAnticodons()-1)*(num_data_sets+1);  // tRNA elong. rates
00142   num_params+=num_genes-1;  // init. rates, where kinit0:=1
00143   num_params+=1;  // term. rate
00144 
00145 /* fit parameters ordered as follows:
00146 kelong[0]:  0,...,NUM_ANTICODONS-2
00147 R0:  NUM_ANTICODONS-1
00148 kelong[1]:  NUM_ANTICODONS,...,2*NUM_ANTICODONS-2
00149 R1:  2*NUM_ANTICODONS-1
00150 ...
00151 kelong[num_data_sets]:  num_data_sets*NUM_ANTICODONS,...,
00152                                 (num_data_sets+1)*NUM_ANTICODONS-2
00153 kterm:  (num_data_sets+1)*NUM_ANTICODONS-1
00154 kinit[1,...,num_genes-1]:  (num_data_sets+1)*NUM_ANTICODONS,...,
00155                              (num_data_sets+1)*NUM_ANTICODONS+num_genes-2
00156 */
00157 
00158   // allocate space for things that will be computed
00159   Allocate(nResiduals);
00160 }
00161 
00162 TranslationModelMinimizable::~TranslationModelMinimizable()
00163 {
00164   delete geneticcode;
00165   delete [] efficiency;
00166   delete [] length;
00167 
00168   for (int i=0; i<num_genes; i++)
00169     delete [] RNAseq[i];
00170   delete [] RNAseq;
00171 
00172   for (int i=0; i<num_data_sets; i++)
00173   {
00174     delete [] fm[i];
00175     delete [] fp[i];
00176     delete [] fmerr[i];
00177     delete [] fperr[i];
00178   }
00179 
00180   delete [] fm;
00181   delete [] fp;
00182   delete [] fmerr;
00183   delete [] fperr;
00184 }
00185 
00186 double TranslationModelMinimizable::EntropyShift(double T)
00187 {
00188   return 0.0;
00189 }
00190 
00191 double TranslationModelMinimizable::F(double *parameters, double T)
00192 {
00193   double objFuncVal = 0.0;
00194   objFuncVal = ComputeResiduals(parameters);
00195   return objFuncVal;
00196 }
00197 
00198 double TranslationModelMinimizable::F0(double *parameters, double T)
00199 {
00200   return 0.0;
00201 }
00202 
00203 int TranslationModelMinimizable::GetNParameters()
00204 {
00205   return num_params;
00206 }
00207 
00208 double TranslationModelMinimizable::ComputeResiduals(double *parameters)
00209 {
00210   double* k = new double[MAX_RNA_LENGTH];  // holds translation rates
00211                           // k[0]=init rate
00212                           // k[1]=rate for 1st codon after AUG
00213                           // k[length-1]=term rate
00214 
00215   // compute prot prod rates for reference state
00216   double* P0 = new double[num_genes];
00217   for (int i=0; i<num_genes; i++)
00218   {
00219     // find initiation rates
00220     if (i==0)
00221       k[0]=1*parameters[geneticcode->GetNumAnticodons()-1];  // kinit[0]*R0
00222     else
00223       k[0]=parameters[((num_data_sets+1)*geneticcode->GetNumAnticodons()-1+i)]
00224                 * parameters[geneticcode->GetNumAnticodons()-1];  // kinit[i]*R0
00225 
00226 
00227     // find elongation rates
00228     for (int j=1; j<length[i]-1; j++)
00229     {
00230       int tRNA = int(geneticcode->get_anticodon(RNAseq[i][j]));
00231       k[j]=parameters[tRNA];
00232     }
00233     // find termination rate
00234     k[length[i]-1]=parameters[(num_data_sets+1)*geneticcode->GetNumAnticodons()-1];
00235 
00236     // compute prot prod rate
00237     P0[i]=compute_current(k,length[i],rib_size);
00238 // cout << i << '\t' << P0[i] << endl;
00239 /*
00240 if (i==0)
00241 {
00242   cout << "state 0" << endl;
00243   for (int q=0; q<=length[i]-1; q++)
00244     cout << k[q] << endl;
00245 }
00246 */
00247   }
00248 
00249   for (int i=0; i<num_data_sets; i++)
00250   {
00251     for (int j=0; j<num_genes; j++)
00252     {
00253       // work with gene j in dataset i
00254 
00255       // find initiation rate
00256       double R;  // ribosome conc
00257       if (i==num_data_sets-1)
00258         R=1;
00259       else
00260         R=parameters[(i+2)*geneticcode->GetNumAnticodons()-1];
00261       double kinit;
00262       if (j==0)
00263         kinit=1;
00264       else
00265         kinit=parameters[((num_data_sets+1)*geneticcode->GetNumAnticodons()-1+j)];
00266       k[0]=kinit*R;
00267 
00268       // find elongation rates
00269       for (int q=1; q<length[j]-1; q++)
00270       {
00271         int tRNA = int(geneticcode->get_anticodon(RNAseq[j][q]));
00272         k[q]=parameters[(i+1)*geneticcode->GetNumAnticodons()+tRNA];
00273       }
00274       // find termination rate
00275       k[length[j]-1]=parameters[(num_data_sets+1)*geneticcode->GetNumAnticodons()-1];
00276 
00277       // compute prot prod rate
00278       double Pij=compute_current(k,length[j],rib_size);
00279 // cout << j << '\t' << Pij << endl;
00280 /*
00281 if (j==0)
00282 {
00283   cout << "state " << i+1 << endl;
00284   for (int q=0; q<=length[j]-1; q++)
00285     cout << k[q] << endl;
00286 }
00287 */
00288       // compute residual
00289       double fpij=Pij/P0[j]*fm[i][j];
00290 
00291       // compute residual for ln(fp)
00292       residuals[i*num_genes+j]=(log(fpij)-log(fp[i][j]))/sqrt(pow(fperr[i][j]/fp[i][j],2)+pow(fmerr[i][j]/fm[i][j],2));
00293     }
00294   }
00295 
00296   delete [] k;
00297   delete [] P0;
00298 
00299   return SumOfSquares();
00300 }
00301 
00302 
00303 /* fit parameters ordered as follows:
00304 kelong[0]:  0,...,NUM_ANTICODONS-2
00305 R0:  NUM_ANTICODONS-1
00306 kelong[1]:  NUM_ANTICODONS,...,2*NUM_ANTICODONS-2
00307 R1:  2*NUM_ANTICODONS-1
00308 ...
00309 kelong[num_data_sets]:  num_data_sets*NUM_ANTICODONS,...,
00310                                 (num_data_sets+1)*NUM_ANTICODONS-2
00311 kterm:  (num_data_sets+1)*NUM_ANTICODONS-1
00312 kinit[1,...,num_genes-1]:  (num_data_sets+1)*NUM_ANTICODONS,...,
00313                              (num_data_sets+1)*NUM_ANTICODONS+num_genes-2
00314 */
00315 
00316 // N is length of RNA
00317 // L is ribosome size
00318 double TranslationModelMinimizable::compute_current(double* k, int N, int L)
00319 {
00320 /*
00321 cout << endl << endl;
00322 for (int i=0; i<N; i++)
00323   cout << k[i] << endl;
00324 cout << endl << endl;
00325 */
00326   // find upper and lower bounds on current
00327   double kmin=k[0];
00328   double* kinv = new double[N];
00329   double* KL = new double[N];
00330   for (int i=0; i<N; i++)
00331   {
00332     if (k[i]<kmin)
00333       kmin=k[i];
00334     kinv[i]=1/k[i];
00335   }
00336   double Jmin=kmin/((1+sqrt((double)L))*(1+sqrt((double)L)));
00337   for (int i=L-1; i<N; i++)
00338   {
00339     double sum=0;
00340     for (int j=0; j<L; j++)
00341       sum+=kinv[i-j];
00342     KL[i]=1/sum;
00343   }
00344   double Jmax=KL[L-1];
00345   for (int i=L; i<N; i++)
00346     if (KL[i]<Jmax)
00347       Jmax=KL[i];
00348 
00349   // allocate space for ribosome densities
00350   double* n = new double[N];
00351 
00352   // find current
00353   double Jub=Jmax;
00354   double Jlb=Jmin;
00355 //cout << "Jub=" << Jub << " Jlb=" << Jlb << endl;
00356   // start bisection
00357   int done=0;
00358   while (Jub-Jlb>TINY && done==0)
00359   {
00360     double Jnew=(Jlb+Jub)/2;
00361 
00362     update_dens(n, k, Jnew, N-1, L);
00363 
00364     int bisected=0;
00365     // check for negative ribosome densities
00366     int i=N-1;
00367     while (i>=0 && bisected==0)
00368     {
00369       if (n[i]<0)  // negative rib dens--Jnew is too large
00370       {
00371         Jub=Jnew;
00372 //cout << "neg n; new Jub=" << Jub << endl;
00373         bisected=1;
00374       }
00375       i--;
00376     }
00377     if (bisected==0)
00378     {
00379       if (n[0]>k[0])
00380       {
00381         Jub=Jnew;
00382 //cout << "pred kinit too big; new Jub=" << Jub << endl;
00383         bisected=1;
00384       }
00385       else if (n[0]<k[0])
00386       {
00387         Jlb=Jnew;
00388 //cout << "pred kinit too low; new Jlb=" << Jlb << endl;
00389         bisected=1;
00390       }
00391       else
00392       {
00393         done=1;
00394         Jlb=Jnew;
00395         Jub=Jnew;
00396 //cout << "kinit fine; J="  << Jlb << endl;
00397       }
00398     }
00399   }
00400   // end bisecting current
00401 
00402   delete [] n;
00403   delete [] kinv;
00404   delete [] KL;
00405 
00406   return ((Jlb+Jub)/2);
00407 }
00408 
00409 // N is coordinate of final ribosome density, so array length is N+1
00410 void TranslationModelMinimizable::update_dens(double* n, double* k, double J, int N, 
00411 int L)
00412 {
00413   for (int i=N; i>=N-L+1; i--)
00414     n[i]=J/k[i];
00415   for (int i=N-L; i>=1; i--)
00416   {
00417     double sum=0;
00418     for (int j=1; j<=L; j++)
00419       sum+=n[i+j];
00420     n[i]=(J/k[i])*(1-sum+n[i+L])/(1-sum);
00421   }
00422 
00423   // store predicted kinit in n[0]
00424   double sum=0;
00425   for (int j=1; j<=L; j++)
00426     sum+=n[j];
00427   n[0]=J/(1-sum);
00428 }
00429 
00430 // from Stroustrup's book
00431 ifstream& TranslationModelMinimizable::eatwhite(ifstream& is)
00432 {
00433   char c;
00434   while (is.get(c))
00435   {
00436     if (!isspace(c)) // is c a whitespace character?
00437     {
00438       is.putback(c);  // put c back into the input buffer
00439       break;
00440     }
00441   }
00442   return is;
00443 }

Generated on Mon Nov 3 09:38:19 2003 by doxygen1.2.14 written by Dimitri van Heesch, © 1997-2002