libsequence  1.9.5
garud.cc
1 #include <cstdint>
2 #include <limits>
3 #include <vector>
4 #include <algorithm>
5 #include <unordered_map>
8 #include <Sequence/VariantMatrix.hpp>
10 
11 namespace Sequence
12 {
13  GarudStats::GarudStats()
14  : H1(1.), H12(std::numeric_limits<double>::quiet_NaN()),
15  H2H1(std::numeric_limits<double>::quiet_NaN())
16  {
17  }
18 
19  GarudStats::GarudStats(const double __h1, const double __h12,
20  const double __h2h1)
21  : H1(__h1), H12(__h12), H2H1(__h2h1)
22  {
23  }
24 
27  {
28  GarudStats rv;
29  if (m.data.empty() || !m.nsam)
30  {
31  return rv;
32  }
33  // Although one of the stats is haplotype diversity,
34  // w
35  auto labels = label_haplotypes(m);
36  std::unordered_map<std::int32_t, std::int32_t> counts;
37  std::size_t nmissing = 0;
38  for (auto l : labels)
39  {
40  if (l < 0)
41  {
42  ++nmissing;
43  }
44  else
45  {
46  counts[l]++;
47  }
48  }
49  if (counts.size() < 2)
50  {
51  return rv;
52  }
53  rv.H1 = 1.0 - diversity_from_counts(counts, m.nsam - nmissing);
54  std::vector<std::pair<std::int32_t, std::int32_t>> vcounts(
55  counts.begin(), counts.end());
56  std::sort(vcounts.begin(), vcounts.end(),
57  [](const std::pair<std::int32_t, std::int32_t>& a,
58  const std::pair<std::int32_t, std::int32_t>& b) {
59  return a.second > b.second;
60  });
61  double nsam
62  = static_cast<double>(m.nsam) - static_cast<double>(nmissing);
63  rv.H12 = rv.H1
64  + 2. * static_cast<double>(vcounts[0].second)
65  * static_cast<double>(vcounts[1].second)
66  / (nsam * (nsam - 1.0));
67  rv.H2H1 = (rv.H1
68  - static_cast<double>(vcounts[0].second
69  * (vcounts[0].second - 1))
70  / (nsam * (nsam - 1)))
71  / rv.H1;
72  return rv;
73  }
74 } // namespace Sequence
GarudStats
Definition: Garud.cc:22
double diversity_from_counts(const std::unordered_map< std::int32_t, std::int32_t > &counts, const std::size_t nsam)
Calculate heterozygosity/diversity from count data.
Definition: generic.cc:10
GarudStats garud_statistics(const VariantMatrix &m)
Calculate H1, H12, and H2/H1.
Definition: garud.cc:26
STL namespace.
The namespace in which this library resides.
H1, H12, and H2/H1 stats.
std::vector< std::int32_t > label_haplotypes(const VariantMatrix &m)
Assign a unique label to each haplotype.
Generic utilities for calculating summary statistics.
"Classic" summaries of variation data.
Matrix representation of variation data.
std::size_t nsam
Sample size of data set.
std::vector< std::int8_t > data
Data stored in matrix form with rows as sites.