libsequence  1.9.5
thetah_thetal.cc
1 #include <cstdint>
2 #include <vector>
3 #include <limits>
4 #include <cmath>
5 #include <algorithm>
6 #include <stdexcept>
7 #include <Sequence/AlleleCountMatrix.hpp>
8 
9 namespace
10 {
11  inline double
12  process_row(const Sequence::AlleleCountMatrix& ac, const std::size_t i,
13  const std::size_t refindex, const double power)
14  {
15  std::int32_t nsam = 0;
16  std::int32_t nnonref = 0;
17  double temp = 0.0;
18  bool ref_seen = false;
19  for (std::size_t j = i; j < i + ac.ncol; ++j)
20  {
21  if (ac.counts[j])
22  {
23  nsam += ac.counts[j];
24  if (j - i != refindex)
25  {
26  ++nnonref;
27  temp += std::pow(ac.counts[j], power);
28  }
29  else
30  {
31  ref_seen = true;
32  }
33  }
34  }
35  if (nnonref > 1)
36  {
37  throw std::runtime_error(
38  "site has more than one derived state");
39  }
40  if (ref_seen)
41  {
42  //double nnm1 = static_cast<double>(nsam * (nsam - 1));
43  //return temp / nnm1;
44  auto x = (power == 1.0) ? 1./static_cast<double>(nsam-1) :
45  2.0/static_cast<double>(nsam*(nsam-1));
46  return temp * x;
47  }
48  return 0.0;
49  }
50 
51  inline double
52  calc_stat(const Sequence::AlleleCountMatrix& ac,
53  const std::vector<std::int8_t>& refstates, const double power)
54  {
55  double rv = 0.0;
56  if (ac.counts.empty())
57  {
58  return rv;
59  }
60  if (refstates.size() != ac.counts.size() / ac.ncol)
61  {
62  throw std::invalid_argument(
63  "incorrect number of reference states");
64  }
65  if (std::all_of(refstates.begin(), refstates.end(),
66  [](const std::int8_t i) { return i < 0; }))
67  {
68  throw std::invalid_argument(
69  "all reference states encoded as missing");
70  }
71  std::size_t rstate = 0;
72  for (std::size_t i = 0; i < ac.counts.size();
73  i += ac.ncol, ++rstate)
74  {
75  if (refstates[rstate] >= 0)
76  {
77  auto refindex
78  = static_cast<std::size_t>(refstates[rstate]);
79  if (refindex >= ac.ncol)
80  {
81  throw std::invalid_argument(
82  "reference state greater than max allelic "
83  "state");
84  }
85  rv += process_row(ac, i, refindex, power);
86  }
87  }
88  return rv;
89  }
90 
91  inline double
92  calc_stat(const Sequence::AlleleCountMatrix& ac,
93  const std::int8_t refstate, const double power)
94  {
95  double rv = 0.0;
96  if (ac.counts.empty())
97  {
98  return rv;
99  }
100  auto refindex = static_cast<std::size_t>(refstate);
101  if (refindex >= ac.ncol)
102  {
103  throw std::invalid_argument(
104  "reference state greater than max allelic state");
105  }
106  for (std::size_t i = 0; i < ac.counts.size(); i += ac.ncol)
107  {
108  rv += process_row(ac, i, refindex, power);
109  }
110  return rv;
111  }
112 } // namespace
113 
114 namespace Sequence
115 {
116  double
117  thetah(const AlleleCountMatrix& ac, const std::int8_t refstate)
118  {
119  return calc_stat(ac, refstate, 2.0);
120  }
121 
122  double
124  const std::vector<std::int8_t>& refstates)
125  {
126  return calc_stat(ac, refstates, 2.0);
127  }
128 
129  double
130  thetal(const AlleleCountMatrix& ac, const std::int8_t refstate)
131  {
132  return calc_stat(ac, refstate, 1.0);
133  }
134 
135  double
137  const std::vector<std::int8_t>& refstates)
138  {
139  return calc_stat(ac, refstates, 1.0);
140  }
141 
142 } // namespace Sequence
double thetal(const AlleleCountMatrix &ac, const std::int8_t refstate)
Zeng et al. .
double thetah(const AlleleCountMatrix &ac, const std::int8_t refstate)
Fay and Wu&#39;s .
The namespace in which this library resides.
Matrix representation of allele counts in a VariantMatrix To be constructed.