libsequence  1.9.5
RedundancyCom95.cc
1 /*
2 
3  Copyright (C) 2003-2009 Kevin Thornton, krthornt[]@[]uci.edu
4 
5  Remove the brackets to email me.
6 
7  This file is part of libsequence.
8 
9  libsequence is free software: you can redistribute it and/or modify
10  it under the terms of the GNU General Public License as published by
11  the Free Software Foundation, either version 3 of the License, or
12  (at your option) any later version.
13 
14  libsequence is distributed in the hope that it will be useful,
15  but WITHOUT ANY WARRANTY; without even the implied warranty of
16  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17  GNU General Public License for more details.
18 
19  You should have received a copy of the GNU General Public License
20  long with libsequence. If not, see <http://www.gnu.org/licenses/>.
21 
22 */
23 
24 #include <cassert>
25 #include <map>
26 #include <cctype>
27 #include <cstring>
28 #include <algorithm>
29 #include <functional>
30 #include <Sequence/SeqEnums.hpp>
31 #include <Sequence/Translate.hpp>
34 #include <Sequence/Comparisons.hpp>
35 
36 namespace Sequence
37 {
38 #ifndef DOXYGEN_SKIP
39  struct RedundancyCom95impl
40  {
41  explicit RedundancyCom95impl(const GeneticCodes code);
42 
43  const GeneticCodes genetic_code;
44 
45  //matrices to be filled on the fly
46  double firstNon[4][4][4];
47  double first2S[4][4][4];
48  double first2V[4][4][4];
49  double thirdFour[4][4][4];
50  double thirdNon[4][4][4];
51  double third2S[4][4][4];
52  double third2V[4][4][4];
53  double l0_vals[4][4][4];
54  double l2S_vals[4][4][4];
55  double l2V_vals[4][4][4];
56  double l4_vals[4][4][4];
57  void FillFirstPositionCounts ();
58  void FillThirdPositionCounts ();
59  void FillLValues ();
60  void codonPrecondition(const std::string & codon);
61  };
62 
63  RedundancyCom95impl::RedundancyCom95impl(const GeneticCodes code) : genetic_code(code)
64  {
65  FillFirstPositionCounts ();
66  FillThirdPositionCounts ();
67  FillLValues ();
68  }
69 
70  void RedundancyCom95impl::codonPrecondition(const std::string & codon)
71 
72 
78  {
79  if ( codon.length() != 3 ||
80  std::find_if(codon.begin(),codon.end(),ambiguousNucleotide()) != codon.end() )
81  {
82  throw(std::runtime_error("Sequence::RedundancyCom95 -- precondition failed, invalid codon"));
83  }
84  }
85 
86  void
87  RedundancyCom95impl::FillFirstPositionCounts ()
88  {
89  std::string codon,mutation;
90  int numN, numTsS, numTvS; //numbers of changes of various types, N = nonsynon, S = synon
91  unsigned codonState;
92  int numPossTs, numPossTv;
93  double FirstN, First2S, First2V;
94  std::string codonTrans;
95  std::string mutationTrans;
96 
97  bool S, N;
98  //make every codon
99  codon.resize(3);
100  mutation.resize(3);
101  for( unsigned i = 0 ; i < 4 ; ++i )
102  {
103  for( unsigned j = 0 ; j < 4 ; ++j )
104  {
105  for( unsigned k = 0 ; k < 4 ; ++k )
106  {
107  numN = numTsS = numTvS = 0; //initialize
108  numPossTs = numPossTv = 0;
109  S = N = 0;
110 
111  codon[0] = dna_alphabet[i];
112  codon[1] = dna_alphabet[j];
113  codon[2] = dna_alphabet[k];
114 
115  //now make all possible mutations at first position
116  codonState = i;
117  for( unsigned l = 0 ; l < 4 ; ++l )
118  {
119  if (l != codonState)
120  { //only include mutations
121  mutation[0] = dna_alphabet[l];
122  mutation[1] = codon[1];
123  mutation[2] = codon[2]; //the mutant codon is now defined
124  auto type = TsTv(dna_alphabet[codonState],mutation[0]);
125  codonTrans = Translate (codon.begin(),
126  codon.end(), genetic_code);
127  mutationTrans = Translate (mutation.begin(),
128  mutation.end(),genetic_code);
129 
130  //if neither codon is a stop
131  if ((codonTrans)[0] != '*'
132  && (mutationTrans)[0] != '*')
133  {
134  if(codonTrans == mutationTrans)
135  { //synonymous change
136  S = 1;
137  N = 0;
138  }
139  else
140  { //nonsynonymous change
141  S = 0;
142  N = 1;
143  }
144  if (type == Mutations::Ts)
145  ++numPossTs;
146  else if (type == Mutations::Tv)
147  ++numPossTv;
148  if (N == 1) //if change is nonsynonymous
149  ++numN;
150  else if (S == 1 && type == Mutations::Ts)
151  //if change is synonymous transition
152  ++numTsS;
153  else if (S == 1 && type == Mutations::Tv)
154  //if change is synonymous transversion
155  ++numTvS;
156  }
157  }
158  }
159  //process here
160  if ((numPossTs + numPossTv) == 0)
161  { //stop codons
162  First2S = 0;
163  First2V = 0;
164  FirstN = 0;
165  }
166  else if ((numTsS + numTvS) == 0) //non-degenerate sites
167  {
168  First2S = 0;
169  First2V = 0;
170  FirstN = 1.0;
171  }
172  else if (numPossTs != 0 && numPossTv != 0)
173  { //special cases
174  if (double (numTsS) /
175  double (numPossTs) != 1.0
176  && double (numTvS) /
177  double (numPossTv) != 1.0)
178  { //fractional redundancy
179  First2S =
180  double (numTsS) /
181  double (numPossTs +
182  numPossTv);
183  First2V =
184  double (numTvS) /
185  double (numPossTs +
186  numPossTv);
187  FirstN = 1.0 - First2S -
188  First2V;
189  }
190  else
191  { //odd types of degeneracy
192  First2S =
193  double (numTsS) /
194  double (numPossTs);
195  First2V =
196  double (numTvS) /
197  double (numPossTv);
198  FirstN = 1.0 - First2S -
199  First2V;
200  }
201  }
202  else
203  {
204  if (numPossTs > 0)
205  First2S =
206  double (numTsS) /
207  double (numPossTs + numPossTv);
208  else
209  First2S = 0.0;
210 
211  if (numPossTv > 0)
212  First2V =
213  double (numTvS) /
214  double (numPossTv + numPossTs);
215  else
216  First2V = 0.0;
217  FirstN = 1.0 - First2S - First2V;
218  }
219  //fill array
220  firstNon[i][j][k]=FirstN;
221  first2S[i][j][k]=First2S;
222  first2V[i][j][k]=First2V;
223  }
224  }
225  }
226  }
227 
228  void
229  RedundancyCom95impl::FillThirdPositionCounts ()
230  {
231  std::string codon, mutation; //the two codons
232  int numN, numTsS, numTvS; //numbers of changes of various types, N = nonsynon, S = synon
233  unsigned codonState;
234  int numPossTs, numPossTv;
235  double ThirdN, Third2S, Third2V, Third4;
236  std::string codonTrans;
237  std::string mutationTrans;
238  bool S, N;
239  //make every codon
240  codon.resize(3);
241  mutation.resize(3);
242  for( unsigned i = 0 ; i < 4 ; ++i )
243  {
244  for( unsigned j = 0 ; j < 4 ; ++j )
245  {
246  for( unsigned k = 0 ; k < 4 ; ++k )
247  {
248  numN = numTsS = numTvS = 0; //initialize
249  numPossTs = numPossTv = 0;
250  S = N = 0;
251  codon[0] = dna_alphabet[i];
252  codon[1] = dna_alphabet[j];
253  codon[2] = dna_alphabet[k];
254  //now make all possible mutations at first position
255  codonState = k;
256 
257  for( unsigned l = 0 ; l < 4 ; ++l )
258  {
259  if (l != codonState)
260  { //only include mutations
261  mutation[0] = codon[0];
262  mutation[1] = codon[1];
263  mutation[2] = dna_alphabet[l];
264  auto type = TsTv( dna_alphabet[codonState],mutation[2] );
265  codonTrans = Translate (codon.begin(),
266  codon.end(),genetic_code);
267  mutationTrans = Translate (mutation.begin(),
268  mutation.end(),genetic_code);
269 
270  if ((codonTrans)[0] != '*'
271  && (mutationTrans)[0] != '*')
272  {
273  if(codonTrans==mutationTrans)
274  {
275  S = 1;
276  N = 0;
277  }
278  else
279  {
280  S = 0;
281  N = 1;
282  }
283  if (type == Mutations::Ts)
284  ++numPossTs;
285  else if (type == Mutations::Tv)
286  ++numPossTv;
287  if (N)
288  ++numN;
289  else if (S && type == Mutations::Ts)
290  ++numTsS;
291  else if (S && type == Mutations::Tv)
292  ++numTvS;
293  }
294  }
295  }
296  //process here
297  ThirdN = Third2S = Third2V = Third4 = 0.0;
298  if (numPossTs + numPossTv == 0)
299  { //stop codons
300  ThirdN = 0.0;
301  Third2S = 0.0;
302  Third2V = 0.0;
303  Third4 = 0.0;
304  }
305  else if (numTsS + numTvS == 0)
306  { //nondegenerate
307  ThirdN = 1.0;
308  Third2S = 0.0;
309  Third2V = 0.0;
310  Third4 = 0.0;
311  }
312  else if (numTsS + numTvS == 3)
313  { //4fold degenerate
314  ThirdN = 0.0;
315  Third2S = 0.0;
316  Third2V = 0.0;
317  Third4 = 1.0;
318  }
319  else if (numTsS == 0 || numTvS == 0)
320  { //if there is non-degeneracy for one type
321  if (numPossTs > 0)
322  Third2S =
323  double (numTsS) /
324  double (numPossTs);
325  else
326  Third2S = 0.0;
327  if (numPossTv > 0)
328  Third2V =
329  double (numTvS) /
330  double (numPossTv);
331  else
332  Third2V = 0.0;
333  ThirdN = 1.0 - Third2S - Third2V;
334  Third4 = 0.0;
335  }
336  else if (numTsS > 0 && numTvS > 0
337  && (double (numTsS) /
338  double (numPossTs) != 1.0
339  || double (numTvS) /
340  double (numPossTv) != 1.0))
341  { //fractional degeneracy
342  Third2S = 1.0 / 3.0;
343  Third2V = 1.0 / 3.0;
344  ThirdN = 1.0 / 3.0;
345  Third4 = 0.0;
346  }
347  thirdNon[i][j][k]=ThirdN;
348  third2S[i][j][k]=Third2S;
349  third2V[i][j][k]=Third2V;
350  thirdFour[i][j][k]=Third4;
351  }
352  }
353  }
354  }
355 
356  //the L values are just the sums of the site degeneracy values for each codon
357  void
358  RedundancyCom95impl::FillLValues ()
359  {
360  std::string trans;
361 
362  std::string codon(3,'A');
363  for(unsigned i = 0 ; i < 4 ; ++i )
364  for(unsigned j = 0 ; j < 4 ; ++j )
365  for(unsigned k = 0 ; k < 4 ; ++k )
366  {
367  codon[0] = dna_alphabet[i];
368  codon[1] = dna_alphabet[j];
369  codon[2] = dna_alphabet[k];
370 
371  trans =Translate (codon.begin(),codon.end(), genetic_code);
372  if(strcmp(trans.c_str(),"*")!=0)
373  { //if not a stop codon
374  l0_vals[i][j][k] = 1. + firstNon[i][j][k] + thirdNon[i][j][k];
375  l2S_vals[i][j][k] = first2S[i][j][k] + third2S[i][j][k];
376  l2V_vals[i][j][k] = first2V[i][j][k] + third2V[i][j][k];
377  l4_vals[i][j][k] = thirdFour[i][j][k];
378  }
379  else
380  {
381  l0_vals[i][j][k] = l2S_vals[i][j][k] = l2V_vals[i][j][k] = l4_vals[i][j][k] = 0.;
382  }
383  }
384  }
385 #endif
387  impl(std::unique_ptr<RedundancyCom95impl>(new RedundancyCom95impl(code)))
391  {
392  }
393 
394  RedundancyCom95::~RedundancyCom95(void)
395  {
396  }
397 
398 
399  //the functions below are klugdy, but there is no real loss of efficieny,
400  //so it is left alone for now
401  double
402  RedundancyCom95::FirstNon (const std::string & codon) const
403 
409  {
410  impl->codonPrecondition(codon);
411  auto i = std::distance( dna_alphabet.begin(),
412  std::find(dna_alphabet.begin(),
413  dna_alphabet.end(),codon[0]) ),
414  j = std::distance( dna_alphabet.begin(),
415  std::find(dna_alphabet.begin(),
416  dna_alphabet.end(),codon[1]) ),
417  k = std::distance( dna_alphabet.begin(),
418  std::find(dna_alphabet.begin(),
419  dna_alphabet.end(),codon[2]) );
420  return impl->firstNon[i][j][k];
421  }
422 
423  double
424  RedundancyCom95::First2S (const std::string & codon) const
425 
432  {
433  impl->codonPrecondition(codon);
434  auto i = std::distance( dna_alphabet.begin(),
435  std::find(dna_alphabet.begin(),
436  dna_alphabet.end(),codon[0]) ),
437  j = std::distance( dna_alphabet.begin(),
438  std::find(dna_alphabet.begin(),
439  dna_alphabet.end(),codon[1]) ),
440  k = std::distance( dna_alphabet.begin(),
441  std::find(dna_alphabet.begin(),
442  dna_alphabet.end(),codon[2]) );
443  return impl->first2S[i][j][k];
444  }
445 
446  double
447  RedundancyCom95::First2V (const std::string & codon) const
448 
454  {
455  impl->codonPrecondition(codon);
456  auto i = std::distance( dna_alphabet.begin(),
457  std::find(dna_alphabet.begin(),
458  dna_alphabet.end(),codon[0]) ),
459  j = std::distance( dna_alphabet.begin(),
460  std::find(dna_alphabet.begin(),
461  dna_alphabet.end(),codon[1]) ),
462  k = std::distance( dna_alphabet.begin(),
463  std::find(dna_alphabet.begin(),
464  dna_alphabet.end(),codon[2]) );
465  return impl->first2V[i][j][k];
466  }
467 
468  double
469  RedundancyCom95::ThirdNon (const std::string & codon) const
470 
476  {
477  impl->codonPrecondition(codon);
478  auto i = std::distance( dna_alphabet.begin(),
479  std::find(dna_alphabet.begin(),
480  dna_alphabet.end(),codon[0]) ),
481  j = std::distance( dna_alphabet.begin(),
482  std::find(dna_alphabet.begin(),
483  dna_alphabet.end(),codon[1]) ),
484  k = std::distance( dna_alphabet.begin(),
485  std::find(dna_alphabet.begin(),
486  dna_alphabet.end(),codon[2]) );
487  return impl->thirdNon[i][j][k];
488  }
489 
490  double
491  RedundancyCom95::ThirdFour (const std::string & codon) const
492 
498  {
499  impl->codonPrecondition(codon);
500  auto i = std::distance( dna_alphabet.begin(),
501  std::find(dna_alphabet.begin(),
502  dna_alphabet.end(),codon[0]) ),
503  j = std::distance( dna_alphabet.begin(),
504  std::find(dna_alphabet.begin(),
505  dna_alphabet.end(),codon[1]) ),
506  k = std::distance( dna_alphabet.begin(),
507  std::find(dna_alphabet.begin(),
508  dna_alphabet.end(),codon[2]) );
509  return impl->thirdFour[i][j][k];
510  }
511 
512  double
513  RedundancyCom95::Third2S (const std::string & codon) const
514 
520  {
521  impl->codonPrecondition(codon);
522  auto i = std::distance( dna_alphabet.begin(),
523  std::find(dna_alphabet.begin(),
524  dna_alphabet.end(),codon[0]) ),
525  j = std::distance( dna_alphabet.begin(),
526  std::find(dna_alphabet.begin(),
527  dna_alphabet.end(),codon[1]) ),
528  k = std::distance( dna_alphabet.begin(),
529  std::find(dna_alphabet.begin(),
530  dna_alphabet.end(),codon[2]) );
531  return impl->third2S[i][j][k];
532  }
533 
534  double
535  RedundancyCom95::Third2V (const std::string & codon) const
536 
542  {
543  impl->codonPrecondition(codon);
544  auto i = std::distance( dna_alphabet.begin(),
545  std::find(dna_alphabet.begin(),
546  dna_alphabet.end(),codon[0]) ),
547  j = std::distance( dna_alphabet.begin(),
548  std::find(dna_alphabet.begin(),
549  dna_alphabet.end(),codon[1]) ),
550  k = std::distance( dna_alphabet.begin(),
551  std::find(dna_alphabet.begin(),
552  dna_alphabet.end(),codon[2]) );
553  return impl->third2V[i][j][k];
554  }
555 
556  double
557  RedundancyCom95::L0_vals (const std::string & codon) const
558 
565  {
566  impl->codonPrecondition(codon);
567  auto i = std::distance( dna_alphabet.begin(),
568  std::find(dna_alphabet.begin(),
569  dna_alphabet.end(),codon[0]) ),
570  j = std::distance( dna_alphabet.begin(),
571  std::find(dna_alphabet.begin(),
572  dna_alphabet.end(),codon[1]) ),
573  k = std::distance( dna_alphabet.begin(),
574  std::find(dna_alphabet.begin(),
575  dna_alphabet.end(),codon[2]) );
576  return impl->l0_vals[i][j][k];
577  }
578 
579  double
580  RedundancyCom95::L2S_vals (const std::string & codon) const
581 
588  {
589  impl->codonPrecondition(codon);
590  auto i = std::distance( dna_alphabet.begin(),
591  std::find(dna_alphabet.begin(),
592  dna_alphabet.end(),codon[0]) ),
593  j = std::distance( dna_alphabet.begin(),
594  std::find(dna_alphabet.begin(),
595  dna_alphabet.end(),codon[1]) ),
596  k = std::distance( dna_alphabet.begin(),
597  std::find(dna_alphabet.begin(),
598  dna_alphabet.end(),codon[2]) );
599  return impl->l2S_vals[i][j][k];
600  }
601 
602  double
603  RedundancyCom95::L2V_vals (const std::string & codon) const
604 
611  {
612  impl->codonPrecondition(codon);
613  auto i = std::distance( dna_alphabet.begin(),
614  std::find(dna_alphabet.begin(),
615  dna_alphabet.end(),codon[0]) ),
616  j = std::distance( dna_alphabet.begin(),
617  std::find(dna_alphabet.begin(),
618  dna_alphabet.end(),codon[1]) ),
619  k = std::distance( dna_alphabet.begin(),
620  std::find(dna_alphabet.begin(),
621  dna_alphabet.end(),codon[2]) );
622  return impl->l2V_vals[i][j][k];
623  }
624 
625  double
626  RedundancyCom95::L4_vals (const std::string & codon) const
627 
634  {
635  impl->codonPrecondition(codon);
636  auto i = std::distance( dna_alphabet.begin(),
637  std::find(dna_alphabet.begin(),
638  dna_alphabet.end(),codon[0]) ),
639  j = std::distance( dna_alphabet.begin(),
640  std::find(dna_alphabet.begin(),
641  dna_alphabet.end(),codon[1]) ),
642  k = std::distance( dna_alphabet.begin(),
643  std::find(dna_alphabet.begin(),
644  dna_alphabet.end(),codon[2]) );
645  return impl->l4_vals[i][j][k];
646  }
647 
648  GeneticCodes RedundancyCom95::gencode()const
649  {
650  return impl->genetic_code;
651  }
652 }
Mutations TsTv(const char &i, const char &j)
Definition: Comparisons.cc:35
double i
Position of site i.
std::string Translate(std::string::const_iterator beg, std::string::const_iterator end, Sequence::GeneticCodes genetic_code=GeneticCodes::UNIVERSAL, const char &gapchar='-')
Definition: Translate.cc:223
double L2S_vals(const std::string &codon) const
STL namespace.
double j
Position of site j.
The namespace in which this library resides.
const alphabet_t dna_alphabet
Alphabet for DNA sequences Valid DNA characters. Upper-case only. Only - is accepted as gap character...
Definition: SeqAlphabets.cc:8
double First2V(const std::string &codon) const
double FirstNon(const std::string &codon) const
double ThirdFour(const std::string &codon) const
declares Sequence::Translate,a function to translate CDS sequences into peptide sequences ...
class Sequence::RedundancyCom95
double L2V_vals(const std::string &codon) const
double ThirdNon(const std::string &codon) const
double Third2V(const std::string &codon) const
double L4_vals(const std::string &codon) const
double First2S(const std::string &codon) const
delcaration of routines for comparing DNA sequences This file declares a set of functions useful for ...
double Third2S(const std::string &codon) const
Definition of enumeration types.
double L0_vals(const std::string &codon) const
RedundancyCom95(Sequence::GeneticCodes genetic_code=GeneticCodes::UNIVERSAL)