00001 #include "TranslationModelMinimizable.h"
00002 #include <iostream>
00003 #include <fstream>
00004 #include <math.h>
00005
00006 const double TranslationModelMinimizable::TINY = 1.0e-16;
00007
00008 const int TranslationModelMinimizable::MAX_RNA_LENGTH = 4000;
00009
00010 const int TranslationModelMinimizable::MAX_RNA_MOLS = 2000;
00011
00012 const int TranslationModelMinimizable::MAX_STRING_LENGTH = 100;
00013
00014 TranslationModelMinimizable::TranslationModelMinimizable(LocateInputFileDirectory *locate, const char* infilename, int ribosome_length)
00015 {
00016 rib_size = ribosome_length;
00017
00018
00019 ifstream input;
00020 input.open(infilename);
00021
00022
00023 if (!input)
00024 {
00025 std::cout << "1 Error opening file " << infilename << std::endl;
00026 return;
00027 }
00028
00029 input >> num_genes;
00030 input >> num_data_sets;
00031
00032
00033 fm = new double*[num_data_sets];
00034 fp = new double*[num_data_sets];
00035 fmerr = new double*[num_data_sets];
00036 fperr = new double*[num_data_sets];
00037
00038 for (int i=0; i<num_data_sets; i++)
00039 {
00040 fm[i] = new double[num_genes];
00041 fp[i] = new double[num_genes];
00042 fmerr[i] = new double[num_genes];
00043 fperr[i] = new double[num_genes];
00044 }
00045
00046 geneticcode = new GeneticCode;
00047
00048 char** seqfilename;
00049 seqfilename = new char*[MAX_RNA_MOLS];
00050
00051
00052 for (int i=0; i<num_genes; i++)
00053 {
00054 seqfilename[i] = new char[MAX_STRING_LENGTH];
00055 input >> seqfilename[i];
00056
00057 for (int j=0; j<num_data_sets; j++)
00058 {
00059 input >> fm[j][i];
00060 input >> fmerr[j][i];
00061 }
00062 for (int j=0; j<num_data_sets; j++)
00063 {
00064 input >> fp[j][i];
00065 input >> fperr[j][i];
00066 }
00067 }
00068
00069 RNAseq = new Codon*[num_genes];
00070
00071
00072
00073 efficiency = new double[geneticcode->GetNumCodons()];
00074 for (int i=0; i<geneticcode->GetNumCodons(); i++)
00075 efficiency[i]=1;
00076
00077 Codon* temp_seq_array;
00078 temp_seq_array = new Codon[MAX_RNA_LENGTH+1];
00079
00080 length = new int[num_genes];
00081
00082
00083 for (int i=0; i<num_genes; i++)
00084 {
00085
00086 ifstream inputseq;
00087 char fileName[1000];
00088
00089 sprintf(fileName, "%s%s", locate->Get("Sequences/"), seqfilename[i]);
00090 inputseq.open(fileName);
00091
00092
00093
00094 if (!inputseq)
00095 {
00096 std::cout << "2 Error opening file " << seqfilename[i] << std::endl;
00097 return;
00098 }
00099
00100 eatwhite(inputseq);
00101
00102
00103 int RNAlength=0;
00104 char codonstring[6];
00105 Codon tempcodon;
00106 Anticodon temptRNA = uuu;
00107
00108 while (temptRNA!=stop)
00109 {
00110 inputseq.get(codonstring, 4);
00111 tempcodon = geneticcode->read_seq(codonstring);
00112 temp_seq_array[RNAlength]=tempcodon;
00113
00114 temptRNA = geneticcode->get_anticodon(tempcodon);
00115
00116 RNAlength++;
00117 }
00118
00119
00120 length[i]=RNAlength;
00121
00122 RNAseq[i] = new Codon[length[i]];
00123
00124 for (int j=0; j<RNAlength; j++)
00125 RNAseq[i][j]=temp_seq_array[j];
00126 }
00127
00128
00129 delete [] temp_seq_array;
00130 for (int i=0; i<num_genes; i++)
00131 delete [] seqfilename[i];
00132 delete [] seqfilename;
00133
00134
00135 int num_resid=num_data_sets*num_genes;
00136 SetNResiduals(num_resid);
00137
00138
00139 num_params=0;
00140 num_params+=num_data_sets;
00141 num_params+=(geneticcode->GetNumAnticodons()-1)*(num_data_sets+1);
00142 num_params+=num_genes-1;
00143 num_params+=1;
00144
00145
00146
00147
00148
00149
00150
00151
00152
00153
00154
00155
00156
00157
00158
00159 Allocate(nResiduals);
00160 }
00161
00162 TranslationModelMinimizable::~TranslationModelMinimizable()
00163 {
00164 delete geneticcode;
00165 delete [] efficiency;
00166 delete [] length;
00167
00168 for (int i=0; i<num_genes; i++)
00169 delete [] RNAseq[i];
00170 delete [] RNAseq;
00171
00172 for (int i=0; i<num_data_sets; i++)
00173 {
00174 delete [] fm[i];
00175 delete [] fp[i];
00176 delete [] fmerr[i];
00177 delete [] fperr[i];
00178 }
00179
00180 delete [] fm;
00181 delete [] fp;
00182 delete [] fmerr;
00183 delete [] fperr;
00184 }
00185
00186 double TranslationModelMinimizable::EntropyShift(double T)
00187 {
00188 return 0.0;
00189 }
00190
00191 double TranslationModelMinimizable::F(double *parameters, double T)
00192 {
00193 double objFuncVal = 0.0;
00194 objFuncVal = ComputeResiduals(parameters);
00195 return objFuncVal;
00196 }
00197
00198 double TranslationModelMinimizable::F0(double *parameters, double T)
00199 {
00200 return 0.0;
00201 }
00202
00203 int TranslationModelMinimizable::GetNParameters()
00204 {
00205 return num_params;
00206 }
00207
00208 double TranslationModelMinimizable::ComputeResiduals(double *parameters)
00209 {
00210 double* k = new double[MAX_RNA_LENGTH];
00211
00212
00213
00214
00215
00216 double* P0 = new double[num_genes];
00217 for (int i=0; i<num_genes; i++)
00218 {
00219
00220 if (i==0)
00221 k[0]=1*parameters[geneticcode->GetNumAnticodons()-1];
00222 else
00223 k[0]=parameters[((num_data_sets+1)*geneticcode->GetNumAnticodons()-1+i)]
00224 * parameters[geneticcode->GetNumAnticodons()-1];
00225
00226
00227
00228 for (int j=1; j<length[i]-1; j++)
00229 {
00230 int tRNA = int(geneticcode->get_anticodon(RNAseq[i][j]));
00231 k[j]=parameters[tRNA];
00232 }
00233
00234 k[length[i]-1]=parameters[(num_data_sets+1)*geneticcode->GetNumAnticodons()-1];
00235
00236
00237 P0[i]=compute_current(k,length[i],rib_size);
00238
00239
00240
00241
00242
00243
00244
00245
00246
00247 }
00248
00249 for (int i=0; i<num_data_sets; i++)
00250 {
00251 for (int j=0; j<num_genes; j++)
00252 {
00253
00254
00255
00256 double R;
00257 if (i==num_data_sets-1)
00258 R=1;
00259 else
00260 R=parameters[(i+2)*geneticcode->GetNumAnticodons()-1];
00261 double kinit;
00262 if (j==0)
00263 kinit=1;
00264 else
00265 kinit=parameters[((num_data_sets+1)*geneticcode->GetNumAnticodons()-1+j)];
00266 k[0]=kinit*R;
00267
00268
00269 for (int q=1; q<length[j]-1; q++)
00270 {
00271 int tRNA = int(geneticcode->get_anticodon(RNAseq[j][q]));
00272 k[q]=parameters[(i+1)*geneticcode->GetNumAnticodons()+tRNA];
00273 }
00274
00275 k[length[j]-1]=parameters[(num_data_sets+1)*geneticcode->GetNumAnticodons()-1];
00276
00277
00278 double Pij=compute_current(k,length[j],rib_size);
00279
00280
00281
00282
00283
00284
00285
00286
00287
00288
00289 double fpij=Pij/P0[j]*fm[i][j];
00290
00291
00292 residuals[i*num_genes+j]=(log(fpij)-log(fp[i][j]))/sqrt(pow(fperr[i][j]/fp[i][j],2)+pow(fmerr[i][j]/fm[i][j],2));
00293 }
00294 }
00295
00296 delete [] k;
00297 delete [] P0;
00298
00299 return SumOfSquares();
00300 }
00301
00302
00303
00304
00305
00306
00307
00308
00309
00310
00311
00312
00313
00314
00315
00316
00317
00318 double TranslationModelMinimizable::compute_current(double* k, int N, int L)
00319 {
00320
00321
00322
00323
00324
00325
00326
00327 double kmin=k[0];
00328 double* kinv = new double[N];
00329 double* KL = new double[N];
00330 for (int i=0; i<N; i++)
00331 {
00332 if (k[i]<kmin)
00333 kmin=k[i];
00334 kinv[i]=1/k[i];
00335 }
00336 double Jmin=kmin/((1+sqrt((double)L))*(1+sqrt((double)L)));
00337 for (int i=L-1; i<N; i++)
00338 {
00339 double sum=0;
00340 for (int j=0; j<L; j++)
00341 sum+=kinv[i-j];
00342 KL[i]=1/sum;
00343 }
00344 double Jmax=KL[L-1];
00345 for (int i=L; i<N; i++)
00346 if (KL[i]<Jmax)
00347 Jmax=KL[i];
00348
00349
00350 double* n = new double[N];
00351
00352
00353 double Jub=Jmax;
00354 double Jlb=Jmin;
00355
00356
00357 int done=0;
00358 while (Jub-Jlb>TINY && done==0)
00359 {
00360 double Jnew=(Jlb+Jub)/2;
00361
00362 update_dens(n, k, Jnew, N-1, L);
00363
00364 int bisected=0;
00365
00366 int i=N-1;
00367 while (i>=0 && bisected==0)
00368 {
00369 if (n[i]<0)
00370 {
00371 Jub=Jnew;
00372
00373 bisected=1;
00374 }
00375 i--;
00376 }
00377 if (bisected==0)
00378 {
00379 if (n[0]>k[0])
00380 {
00381 Jub=Jnew;
00382
00383 bisected=1;
00384 }
00385 else if (n[0]<k[0])
00386 {
00387 Jlb=Jnew;
00388
00389 bisected=1;
00390 }
00391 else
00392 {
00393 done=1;
00394 Jlb=Jnew;
00395 Jub=Jnew;
00396
00397 }
00398 }
00399 }
00400
00401
00402 delete [] n;
00403 delete [] kinv;
00404 delete [] KL;
00405
00406 return ((Jlb+Jub)/2);
00407 }
00408
00409
00410 void TranslationModelMinimizable::update_dens(double* n, double* k, double J, int N,
00411 int L)
00412 {
00413 for (int i=N; i>=N-L+1; i--)
00414 n[i]=J/k[i];
00415 for (int i=N-L; i>=1; i--)
00416 {
00417 double sum=0;
00418 for (int j=1; j<=L; j++)
00419 sum+=n[i+j];
00420 n[i]=(J/k[i])*(1-sum+n[i+L])/(1-sum);
00421 }
00422
00423
00424 double sum=0;
00425 for (int j=1; j<=L; j++)
00426 sum+=n[j];
00427 n[0]=J/(1-sum);
00428 }
00429
00430
00431 ifstream& TranslationModelMinimizable::eatwhite(ifstream& is)
00432 {
00433 char c;
00434 while (is.get(c))
00435 {
00436 if (!isspace(c))
00437 {
00438 is.putback(c);
00439 break;
00440 }
00441 }
00442 return is;
00443 }