libsequence  1.9.5
Sites.cc
1 /*
2 
3  Copyright (C) 2003-2009 Kevin Thornton, krthornt[]@[]uci.edu
4 
5  Remove the brackets to email me.
6 
7  This file is part of libsequence.
8 
9  libsequence is free software: you can redistribute it and/or modify
10  it under the terms of the GNU General Public License as published by
11  the Free Software Foundation, either version 3 of the License, or
12  (at your option) any later version.
13 
14  libsequence is distributed in the hope that it will be useful,
15  but WITHOUT ANY WARRANTY; without even the implied warranty of
16  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17  GNU General Public License for more details.
18 
19  You should have received a copy of the GNU General Public License
20  long with libsequence. If not, see <http://www.gnu.org/licenses/>.
21 
22 */
23 
24 #ifndef NDEBUG
25 #include <cmath>
26 #include <cfloat>
27 #endif
28 #include <cassert>
29 #include <algorithm>
30 #include <cctype>
31 #include <Sequence/Seq.hpp>
32 #include <Sequence/Translate.hpp>
34 #include <Sequence/Comparisons.hpp>
36 #include <Sequence/Sites.hpp>
37 //divergence statistics for a pair of sequences
38 
39 namespace Sequence
40 {
41  struct Sites::SitesImpl
42  {
43  double _L0;
44  double _L2S;
45  double _L2V;
46  double _L4;
47  void siteinc (const RedundancyCom95 & sitesObj,
48  const std::string & codon1,const std::string &codon2);
49  void count_sites (const Sequence::Seq & sequence1,
50  const Sequence::Seq & sequence2,
51  const RedundancyCom95 & sitesObj,
52  const int maxdiffs);
53  SitesImpl() :_L0(0.),_L2S(0.),_L2V(0.0),_L4(0.0)
54  {
55  }
56  void zero_out() { _L0=_L2S=_L2V=_L4=0.; }
57  };
58 
59  void
60  Sites::SitesImpl::count_sites (const Sequence::Seq & sequence1,
61  const Sequence::Seq & sequence2,
62  const RedundancyCom95 & sitesObj,
63  const int maxdiffs)
64  { //now, it is correct
65  size_t i = 0, j = 0;
66  std::string codon1,codon2;
67  codon1.resize(3);
68  codon2.resize(3);
69  for (i = 0; i <= sequence1.length() - 3; i += 3)
70  {
71  for (j = 0; j <= 2; ++j)
72  {
73  codon1[j] = char(std::toupper(sequence1[i + j]));
74  codon2[j] = char(std::toupper(sequence2[i + j]));
75  }
76  //We won't check the r.v. of NumDiffs here b/c we have made the 2 seqs the
77  //same lenght.
78  decltype(NumDiffs(codon1,codon2)) nc = NumDiffs (codon1, codon2);
79 
80  if (nc == 0) //still need to count if there are 0 changes
81  siteinc (sitesObj,codon1, codon2);
82  else if (maxdiffs <= 3 && nc == 1)
83  siteinc (sitesObj,codon1, codon2);
84  else if (maxdiffs == 2 && nc <= 2)
85  siteinc (sitesObj,codon1, codon2);
86  else if (maxdiffs == 3 && nc <= 3)
87  siteinc (sitesObj,codon1, codon2);
88  }
89  }
90 
91  void
92  Sites::SitesImpl::siteinc (const RedundancyCom95 & sitesObj,
93  const std::string & codon1,
94  const std::string & codon2)
95  {
96  //skip ambiguous
97 
98  if ( std::find_if(codon1.begin(),codon1.end(),ambiguousNucleotide()) == codon1.end() &&
99  std::find_if(codon2.begin(),codon2.end(),ambiguousNucleotide()) == codon2.end() )
100  {
101  _L0 += (sitesObj.L0_vals(codon1) + sitesObj.L0_vals(codon2))/2.0;
102  _L2S += (sitesObj.L2S_vals(codon1) + sitesObj.L2S_vals(codon2))/2.0;
103  _L2V += (sitesObj.L2V_vals(codon1) + sitesObj.L2V_vals(codon2))/2.0;
104  _L4 += (sitesObj.L4_vals(codon1) + sitesObj.L4_vals(codon2))/2.0;
105  }
106  }
107 
108  Sites::Sites(const Sequence::Seq & seq1,
109  const Sequence::Seq & seq2,
110  const RedundancyCom95 & sitesObj,
111  int maxdiffs) : impl(std::unique_ptr<SitesImpl>(new SitesImpl()))
112  {
113  this->operator()(seq1,seq2,sitesObj,maxdiffs);
114  }
115 
116  Sites::Sites () :
117  impl(std::unique_ptr<SitesImpl>(new SitesImpl()))
118  {
119  }
120 
121  Sites::~Sites (void)
122  {}
123 
124  void Sites::operator()(const Sequence::Seq & seq1,
125  const Sequence::Seq & seq2,
126  const RedundancyCom95 & sitesObj,
127  int maxdiffs)
128  {
129  impl->zero_out();
130  impl->count_sites(seq1,seq2,sitesObj,maxdiffs);
131  }
132 
133  double Sites::L0(void) const
137  {
138  return impl->_L0;
139  }
140  double Sites::L2S(void) const
144  {
145  return impl->_L2S;
146  }
147  double Sites::L2V(void) const
151  {
152  return impl->_L2V;
153  }
154  double Sites::L4(void) const
158  {
159  return impl->_L4;
160  }
161 
162 }
class Sequence::Seq, an abstract base class for Sequences
Abstract interface to sequence objects.
Definition: Seq.hpp:46
double L2V(void) const
Definition: Sites.cc:147
class Sequence::Sites calculates the length of a pairwise alignment between coding sequences in terms...
STL namespace.
The namespace in which this library resides.
int NumDiffs(const std::string &seq1, const std::string &seq2, const bool &skip_missing=true, const bool &nucleic_acid=true)
Definition: Comparisons.cc:141
declares Sequence::Translate,a function to translate CDS sequences into peptide sequences ...
class Sequence::RedundancyCom95
double L4(void) const
Definition: Sites.cc:154
double L0(void) const
Definition: Sites.cc:133
size_type length(void) const
Definition: Seq.cc:58
double L2S(void) const
Definition: Sites.cc:140
delcaration of routines for comparing DNA sequences This file declares a set of functions useful for ...
void operator()(const RedundancyCom95 &sitesObj, const std::string &cod1, const std::string &cod2)
Definition: SingleSub.cc:55