libsequence  1.9.5
hprime.cc
1 #include <cmath>
2 #include <functional>
3 #include <Sequence/AlleleCountMatrix.hpp>
4 #include <Sequence/summstats/auxillary.hpp>
5 #include "hprime_faywuh_aggregator.hpp"
6 
7 namespace
8 {
9  double
10  hprime_common(const std::uint32_t nsam, const unsigned S, const double tp,
11  const double tl)
12  {
13  using namespace Sequence;
14  if (tp == 0.0)
15  {
16  return std::numeric_limits<double>::quiet_NaN();
17  }
18  auto a = summstats_aux::a_sub_n(static_cast<std::uint32_t>(nsam));
19  auto b = summstats_aux::b_sub_n(static_cast<std::uint32_t>(nsam));
20  auto b1
21  = summstats_aux::b_sub_n_plus1(static_cast<std::uint32_t>(nsam));
22 
23  double tw
24  = static_cast<double>(S) / a; //TODO: replace with call to thetaw
25  double tsq = S * (S - 1) / (a * a + b);
26  double n = static_cast<double>(nsam);
27 
28  double vThetal
29  = (n * tw) / (2.0 * (n - 1.0))
30  + (2.0 * std::pow(n / (n - 1.0), 2.0) * (b1 - 1.0) - 1.0) * tsq;
31  double vPi = (3.0 * n * (n + 1.0) * tw + 2.0 * (n * n + n + 3.0) * tsq)
32  / (9 * n * (n - 1.0));
33  double cov
34  = ((n + 1.0) / (3.0 * (n - 1.0))) * tw
35  + ((7.0 * n * n + 3.0 * n - 2.0 - 4.0 * n * (n + 1.0) * b1)
36  / (2.0 * std::pow((n - 1.0), 2.0)))
37  * tsq;
38  return (tp - tl) / std::pow(vThetal + vPi - 2.0 * cov, 0.5);
39  }
40 } // namespace
41 
42 namespace Sequence
43 {
44  double
45  hprime(const AlleleCountMatrix &ac, const std::int8_t refstate)
46  {
47  if (ac.counts.empty())
48  {
49  return std::numeric_limits<double>::quiet_NaN();
50  }
51  auto refindex = static_cast<std::size_t>(refstate);
52  if (refindex >= ac.ncol)
53  {
54  throw std::invalid_argument(
55  "reference state greater than max allelic state");
56  }
57  detail::hprime_faywuh_row_processor rp;
58  for (std::size_t i = 0; i < ac.counts.size(); i += ac.ncol)
59  {
60  rp(ac, i, refindex, detail::stat_is_hprime());
61  }
62  return hprime_common(static_cast<std::uint32_t>(ac.nsam), rp.S, rp.pi,
63  rp.theta);
64  }
65 
66  double
68  const std::vector<std::int8_t> &refstates)
69  {
70  if (ac.counts.empty())
71  {
72  return std::numeric_limits<double>::quiet_NaN();
73  }
74  if (refstates.size() != ac.counts.size() / ac.ncol)
75  {
76  throw std::invalid_argument(
77  "incorrect number of reference states");
78  }
79  if (std::all_of(refstates.begin(), refstates.end(),
80  [](const std::int8_t i) { return i < 0; }))
81  {
82  throw std::invalid_argument(
83  "all reference states encoded as missing");
84  }
85 
86  detail::hprime_faywuh_row_processor rp;
87  std::size_t rstate = 0;
88  for (std::size_t i = 0; i < ac.counts.size(); i += ac.ncol, ++rstate)
89  {
90  if (refstates[rstate] >= 0)
91  {
92  auto refindex
93  = static_cast<std::size_t>(refstates[rstate]);
94  if (refindex >= ac.ncol)
95  {
96  throw std::invalid_argument(
97  "reference state greater than max allelic "
98  "state");
99  }
100  rp(ac, i, refindex, detail::stat_is_hprime());
101  }
102  }
103  return hprime_common(static_cast<std::uint32_t>(ac.nsam), rp.S, rp.pi,
104  rp.theta);
105  }
106 } // namespace Sequence
double hprime(const AlleleCountMatrix &ac, const std::int8_t refstate)
Definition: hprime.cc:45
The namespace in which this library resides.
Matrix representation of allele counts in a VariantMatrix To be constructed.