libsequence  1.9.5
StateCounts.cc
1 #include <Sequence/StateCounts.hpp>
2 #include <algorithm>
3 #include <stdexcept>
4 
5 namespace Sequence
6 {
7  StateCounts::StateCounts()
8  : counts(std::vector<std::int32_t>(max_allele + 1, 0)),
9  max_allele_idx{ 0 }, n{ 0u }, refstate(-1)
10  {
11  }
12 
13  StateCounts::StateCounts(const std::int8_t refstate_)
14  : counts(std::vector<std::int32_t>(max_allele + 1, 0)),
15  max_allele_idx{ 0 }, n{ 0u }, refstate(refstate_)
16  {
17  }
18 
19  void
20  StateCounts::operator()(ConstRowView& row)
21  {
22  std::fill(counts.begin(), counts.begin() + max_allele_idx + 1, 0);
23  n = 0;
24  max_allele_idx = 0;
25  for (std::size_t i = 0; i < row.size(); ++i)
26  {
27  auto ri = row[i];
28  if (ri >= 0)
29  {
30  ++n;
31  ++counts[static_cast<std::size_t>(ri)];
32  max_allele_idx = std::max(
33  max_allele_idx, static_cast<std::size_t>(ri));
34  }
35  else if (ri == VariantMatrix::mask)
36  {
37  throw std::invalid_argument(
38  "reserved value encountered");
39  }
40  }
41  }
42 
43  void
44  StateCounts::operator()(const RowView& row)
45  {
46  std::fill(counts.begin(), counts.begin() + max_allele_idx + 1, 0);
47  n = 0;
48  max_allele_idx = 0;
49  for (std::size_t i = 0; i < row.size(); ++i)
50  {
51  auto ri = row[i];
52  if (ri >= 0)
53  {
54  ++n;
55  ++counts[static_cast<std::size_t>(ri)];
56  max_allele_idx = std::max(
57  max_allele_idx, static_cast<std::size_t>(ri));
58  }
59  else if (ri == VariantMatrix::mask)
60  {
61  throw std::invalid_argument(
62  "reserved value encountered");
63  }
64  }
65  }
66 
67  std::vector<StateCounts>
69  const std::vector<std::int8_t>& refstates)
70  {
71  if (std::any_of(
72  refstates.begin(), refstates.end(),
73  [](const std::int8_t x) { return x == VariantMatrix::mask; }))
74  {
75  throw std::invalid_argument("reserved value encountered");
76  }
77  if (refstates.size() != m.nsites)
78  {
79  throw std::invalid_argument("refstates.size() != m.nsites");
80  }
81  std::vector<StateCounts> rv;
82  rv.reserve(m.nsites);
83  for (std::size_t i = 0; i < m.nsites; ++i)
84  {
85  StateCounts c(refstates[i]);
86  auto r = get_RowView(m, i);
87  c(r);
88  rv.emplace_back(std::move(c));
89  }
90  return rv;
91  }
92 
93  std::vector<StateCounts>
94  process_variable_sites(const VariantMatrix& m, const std::int8_t refstate)
95  {
96  if (refstate == Sequence::VariantMatrix::mask)
97  {
98  throw std::invalid_argument("reserved value encountered");
99  }
100  std::vector<StateCounts> rv;
101  rv.reserve(m.nsites);
102  for (std::size_t i = 0; i < m.nsites; ++i)
103  {
104  StateCounts c(refstate);
105  auto r = get_RowView(m, i);
106  c(r);
107  rv.emplace_back(std::move(c));
108  }
109  return rv;
110  }
111 
112  std::vector<StateCounts>
114  {
115  return process_variable_sites(m, -1);
116  }
117 } // namespace Sequence
Implementation details for Sequence::RowView and Sequence::ConstRowView.
Track character state occurrence at a site in a VariantMatrix.
Definition: StateCounts.hpp:11
std::size_t size() const
Number of elements.
std::vector< StateCounts > process_variable_sites(const VariantMatrix &m, const std::vector< std::int8_t > &refstates)
Definition: StateCounts.cc:68
std::size_t max_allele_idx
The max allelic value seen.
Definition: StateCounts.hpp:29
std::vector< std::int32_t > counts
Keep track of (state, count) pairs.
Definition: StateCounts.hpp:27
STL namespace.
The namespace in which this library resides.
ConstRowView get_RowView(const VariantMatrix &m, const std::size_t row)
Return a ConstRowView from VariantMatrix m at row row. std::out_of_range is thrown if row is out of r...
std::size_t nsites
Number of sites in data set.
std::uint32_t n
The sample size at this site. Excluded missing data.
Definition: StateCounts.hpp:31
Matrix representation of variation data.
static const std::int8_t mask
Reserved value for masked data.