libsequence  1.9.5
Kimura80.cc
1 /*
2 
3 Copyright (C) 2003-2009 Kevin Thornton, krthornt[]@[]uci.edu
4 
5 Remove the brackets to email me.
6 
7 This file is part of libsequence.
8 
9 libsequence is free software: you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation, either version 3 of the License, or
12 (at your option) any later version.
13 
14 libsequence is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18 
19 You should have received a copy of the GNU General Public License
20 long with libsequence. If not, see <http://www.gnu.org/licenses/>.
21 
22 */
23 
24 //Kimura80.cc Copyright 2001 Kevin Thornton, k-thornton@uchicago.edu
25 #include <cmath>
26 #include <cfloat>
27 #include <cctype>
28 #include <limits>
29 #include <stdexcept>
30 #include <Sequence/Seq.hpp>
31 #include <Sequence/Comparisons.hpp>
32 #include <Sequence/SeqEnums.hpp>
33 #include <Sequence/Kimura80.hpp>
34 
39 namespace Sequence
40  {
41  Kimura80::Kimura80 (const Sequence::Seq * seqa, const Sequence::Seq * seqb):
42  seqlen (seqa->length())
48  {
49  if (seqa->length () != seqb->length ())
50  throw std::runtime_error ("Sequence::Kimura80::Kimura80(): constructor called with two sequence objects of unequal length");
51  num_Ts = 0;
52  num_Tv = 0;
53  divergence = 0.0;
54  P = 0.0;
55  Q = 0.0;
56  sites_compared = 0;
57  Compute (seqa,seqb);
58  }
59 
60  void Kimura80::Compute (const Sequence::Seq *seq1, const Sequence::Seq *seq2)
61  {
62  unsigned i;
63 
64  unsigned ungapped_sites = 0;
65  for (i = 0; i < seqlen; ++i) //iterate over the sequence
66  {
67  if (NotAGap((*seq1)[i]) && NotAGap((*seq2)[i]))
68  {
69  ++ungapped_sites;
70  if (std::toupper((*seq1)[i]) !=
71  std::toupper((*seq2)[i])) //if the sites differ at that position
72  {
73  auto type = TsTv ((*seq1)[i], (*seq2)[i]); //check if difference is Ts or Tv
74 
75  if (type == Mutations::Ts)
76  { //Ts
77  ++num_Ts;
78  }
79  else if (type == Mutations::Tv)
80  { //Tv
81  ++num_Tv;
82  }
83  }
84  }
85  }
86  //make sure we use the right denominator
87 
88  sites_compared = (ungapped_sites < seqlen) ? ungapped_sites : seqlen;
89  //P and Q are the proportions of Ts and Tv changes observed
90  P = double (num_Ts) / double (sites_compared);
91  Q = double (num_Tv) / double (sites_compared);
92 
93  //Kimura's formula
94  if (fabs(1.0 - 2.0 * P - Q) > DBL_EPSILON)
95  {
96  divergence = -1.0 * 0.5 * std::log ((1.0 - 2.0 * P - Q)
97  * pow ((1 - 2.0 * Q), 0.5));
98  }
99  else
100  {
101  divergence = 0.;
102  }
103  //a correction for extremely low observed values
104  if (divergence <= 0.0-DBL_EPSILON)
105  divergence = 0.0;
106  }
107 
108  double
109  Kimura80::K (void) const
116  {
117  if (!std::isfinite(divergence))
118  return std::numeric_limits<double>::quiet_NaN();
119  return (divergence);
120  }
121 
122  size_t
123  Kimura80::sites (void) const
127  {
128  return (sites_compared);
129  }
130 }
Kimura80(const Sequence::Seq *seqa, const Sequence::Seq *seqb)
Definition: Kimura80.cc:41
Mutations TsTv(const char &i, const char &j)
Definition: Comparisons.cc:35
declaration of Sequence::Kimura80
class Sequence::Seq, an abstract base class for Sequences
Abstract interface to sequence objects.
Definition: Seq.hpp:46
The namespace in which this library resides.
size_t sites(void) const
Definition: Kimura80.cc:123
size_type length(void) const
Definition: Seq.cc:58
bool NotAGap(const char &c)
Definition: Comparisons.cc:196
double K() const
Definition: Kimura80.cc:109
delcaration of routines for comparing DNA sequences This file declares a set of functions useful for ...
Definition of enumeration types.