libsequence  1.9.5
nsl.cc
1 #include <iostream>
2 #include <algorithm>
3 #include <cmath>
4 #include <cstdint>
5 #include <Sequence/VariantMatrix.hpp>
6 #include <Sequence/VariantMatrixViews.hpp>
8 
10 namespace
11 {
12  std::int64_t
13  get_left(const Sequence::ConstColView& sample_i,
14  const Sequence::ConstColView& sample_j, const std::size_t core)
15  {
16  std::int64_t left = static_cast<std::int64_t>(core) - 1;
17  std::size_t left_index;
18 
19  while (left >= 0)
20  {
21  left_index = static_cast<std::size_t>(left);
22  if (sample_i[left_index] >= 0 && sample_j[left_index] >= 0
23  && sample_i[left_index] != sample_j[left_index])
24  {
25  break;
26  }
27  --left;
28  }
29  return left;
30  }
31 
32  std::int64_t
33  get_right(const Sequence::ConstColView& sample_i,
34  const Sequence::ConstColView& sample_j, const std::size_t core,
35  const std::int64_t nsites)
36  {
37  std::int64_t right = static_cast<std::int64_t>(core) + 1;
38  std::size_t right_index;
39  while (right < nsites)
40  {
41  right_index = static_cast<std::size_t>(right);
42  if (sample_i[right_index] >= 0 && sample_j[right_index] >= 0
43  && sample_i[right_index] != sample_j[right_index])
44  {
45  break;
46  }
47  ++right;
48  }
49  return right;
50  }
51 
52  void
53  update_counts(double nsl_values[2], double ihs_values[2], int counts[2],
54  const std::size_t nsites,
55  const std::vector<double>& positions,
56  const std::size_t index, const std::int64_t left,
57  const std::int64_t right)
58  {
59  if (left >= 0 && static_cast<std::size_t>(right) < nsites)
60  //Then there are SNPs differentiating
61  //i and j within the region
62  {
63  nsl_values[index] += static_cast<double>(right - left);
64  //TODO: check if we need to add one?
65  ihs_values[index]
66  += positions[static_cast<std::size_t>(left)]
67  - positions[static_cast<std::size_t>(right)];
68  counts[index]++;
69  }
70  }
71 } // namespace
72 
73 namespace Sequence
74 {
75  nSLiHS
76  nsl(const VariantMatrix& m, const std::size_t core,
77  const std::int8_t refstate)
90  {
91  auto core_view = get_ConstRowView(m, core);
92  // Keep track of distances from core site
93  // for nsl and ihs separately
94  double nsl_values[2] = { 0, 0 };
95  double ihs_values[2] = { 0, 0 };
96  //Count sample size for non-ref and ref alleles at core site contributing to nSL
97  int counts[2] = { 0, 0 };
98  for (std::size_t i = 0; i < m.nsam - 1; ++i)
99  {
100  auto sample_i = get_ConstColView(m, i);
101  for (std::size_t j = i + 1; j < m.nsam; ++j)
102  {
103  if (core_view[i] == core_view[j])
104  {
105  auto sample_j = get_ConstColView(m, j);
106  //Find where samples i and j differ
107  auto left = get_left(sample_i, sample_j, core);
108  auto right = get_right(
109  sample_i, sample_j, core,
110  static_cast<std::int64_t>(m.nsites));
111  update_counts(nsl_values, ihs_values, counts,
112  m.nsites, m.positions,
113  static_cast<std::size_t>(
114  core_view[i] == refstate),
115  left, right);
116  }
117  }
118  }
119  double nSL_den = nsl_values[0] / static_cast<double>(counts[0]);
120  double nSL_num = nsl_values[1] / static_cast<double>(counts[1]);
121  double iHS_den = nsl_values[0] / static_cast<double>(counts[0]);
122  double iHS_num = nsl_values[1] / static_cast<double>(counts[1]);
123  auto nonrefcount = static_cast<std::int32_t>(
124  std::count_if(core_view.begin(), core_view.end(),
125  [refstate](const std::int8_t i) {
126  return i >= 0 && i != refstate;
127  }));
128  return nSLiHS{ std::log(nSL_num) - std::log(nSL_den),
129  std::log(iHS_num) - std::log(iHS_den), nonrefcount };
130  }
131 
132  nSLiHS
133  nslx(const VariantMatrix& m, const std::size_t core,
134  const std::int8_t refstate, const int x)
135  {
136  //Need to get indexes of all x-tons.
137  //Then, if two seqs differ at an x-ton,
138  //the stats get updated.
139  }
140 } // namespace Sequence
The namespace in which this library resides.
nSLiHS nsl(const VariantMatrix &m, const std::size_t core, const std::int8_t refstate)
nSL and iHS statistics
Definition: nsl.cc:76
std::size_t nsites
Number of sites in data set.
std::vector< double > positions
Position of sites.
ConstRowView get_ConstRowView(const VariantMatrix &m, const std::size_t row)
Return a ConstRowView from VariantMatrix m at row row. std::out_of_range is thrown if row is out of r...
Matrix representation of variation data.
std::size_t nsam
Sample size of data set.
ConstColView get_ConstColView(VariantMatrix &m, const std::size_t col)
Return a ConstColView from VariantMatrix m at col col. std::out_of_range is thcoln if col is out of r...
Implementation details for Sequence::ColView and Sequence::ConstColView.
nSL and iHS