libsequence  1.9.5
VariantMatrixTest.cc
Go to the documentation of this file.
1 
3 #include <Sequence/VariantMatrix.hpp>
4 #include <Sequence/AlleleCountMatrix.hpp>
5 #include <Sequence/VariantMatrixViews.hpp>
6 #include <Sequence/variant_matrix/windows.hpp>
7 #include <boost/test/unit_test.hpp>
8 #include <algorithm>
9 #include <numeric> //for std::iota
10 #include <iterator>
11 #include "VariantMatrixFixture.hpp"
12 
13 struct vmatrix_fixture
14 {
15  std::vector<std::int8_t> input_data;
16  std::vector<double> input_pos;
19  vmatrix_fixture()
20  : input_data(make_input_data()), input_pos(make_intput_pos()),
21  m(input_data, input_pos), m2(input_data, input_pos), c{ m }, c2{ m2 }
22  {
23  // The two VariantMatrix objects
24  // have same data, but different internal
25  // dimensions
26  std::swap(m2.nsites, m2.nsam);
27  m2.positions.resize(m2.nsites);
28  std::iota(std::begin(m2.positions), std::end(m2.positions), 0.);
29  }
30 
31  std::vector<std::int8_t>
32  make_input_data()
33  {
34  int nsam = 20;
35  int nsites = 5;
36  std::vector<std::int8_t> rv;
37  for (int i = 0; i < nsites; ++i)
38  {
39  for (int j = 0; j < nsam; ++j)
40  {
41  std::int8_t state = (j % 2 == 0.) ? 1 : 0;
42  rv.push_back(state);
43  }
44  }
45  return rv;
46  }
47 
48  std::vector<double>
49  make_intput_pos()
50  {
51  std::vector<double> rv;
52  rv.resize(5);
53  std::iota(rv.begin(), rv.end(), 0.);
54  return rv;
55  }
56 };
57 
58 BOOST_AUTO_TEST_SUITE(BasicVariantMatrixTests)
59 
60 BOOST_AUTO_TEST_CASE(construct_empty_VariantMatrix_from_move)
61 {
62  std::vector<std::int8_t> d{};
63  std::vector<double> p{};
64  Sequence::VariantMatrix m(std::move(d), std::move(p));
65  BOOST_REQUIRE_EQUAL(m.nsam, 0);
66  BOOST_REQUIRE_EQUAL(m.nsites, 0);
67 }
68 
69 BOOST_AUTO_TEST_CASE(construct_empty_VariantMatrix_from_init_lists)
70 {
71  Sequence::VariantMatrix m(std::vector<std::int8_t>{},
72  std::vector<double>{});
73  BOOST_REQUIRE_EQUAL(m.nsam, 0);
74  BOOST_REQUIRE_EQUAL(m.nsites, 0);
75 }
76 
77 BOOST_AUTO_TEST_SUITE_END()
78 
79 BOOST_FIXTURE_TEST_SUITE(VariantMatrixTest, vmatrix_fixture)
80 
81 BOOST_AUTO_TEST_CASE(test_construction)
82 {
83  Sequence::VariantMatrix m(std::vector<std::int8_t>(100, 1),
84  std::vector<double>(5, 0.0));
85  BOOST_REQUIRE_EQUAL(m.nsites, 5);
86  BOOST_REQUIRE_EQUAL(m.nsam, 20);
87  BOOST_REQUIRE_EQUAL(m.max_allele, 1);
88 }
89 
90 BOOST_AUTO_TEST_CASE(test_max_allele)
91 {
92  m.data[3] = 5;
94  BOOST_CHECK_EQUAL(vm.max_allele, 5);
96  BOOST_REQUIRE_EQUAL(vmc.ncol, 6);
97 }
98 
99 BOOST_AUTO_TEST_CASE(test_range_exceptions)
100 {
101  BOOST_REQUIRE_THROW(m.at(m.nsites + 1, 0), std::out_of_range);
102  BOOST_REQUIRE_THROW(m.at(0, m.nsam + 1), std::out_of_range);
103 }
104 
105 BOOST_AUTO_TEST_CASE(test_iteration)
106 {
107  for (std::size_t i = 0; i < m.nsam; ++i)
108  {
109  for (std::size_t j = 0; j < m.nsites; ++j)
110  {
111  auto x = m.get(j, i);
112  std::int8_t ex = (i % 2 == 0.) ? 1 : 0;
113  BOOST_REQUIRE_EQUAL(static_cast<int>(x),
114  static_cast<int>(ex));
115  }
116  }
117 }
118 
119 BOOST_AUTO_TEST_CASE(test_bad_row_swap)
120 {
121  auto a = Sequence::get_RowView(m, 0);
122  auto b = Sequence::get_RowView(m2, 0);
123  BOOST_REQUIRE_THROW(swap(a, b), std::invalid_argument);
124 }
125 
126 BOOST_AUTO_TEST_CASE(test_bad_column_swap)
127 {
128  auto a = Sequence::get_ColView(m, 0);
129  auto b = Sequence::get_ColView(m2, 0);
130  BOOST_REQUIRE_THROW(swap(a, b), std::invalid_argument);
131 }
132 
133 BOOST_AUTO_TEST_CASE(test_row_views)
134 {
135  for (std::size_t i = 0; i < m.nsites; ++i)
136  {
137  auto x = Sequence::get_RowView(m, i);
138  for (auto j = x.begin(); j != x.end(); ++j)
139  {
140  std::int8_t ex
141  = (std::distance(x.begin(), j) % 2 == 0.0) ? 1 : 0;
142  BOOST_REQUIRE_EQUAL(static_cast<int>(*j),
143  static_cast<int>(ex));
144  }
145  for (auto j = x.cbegin(); j != x.cend(); ++j)
146  {
147  std::int8_t ex
148  = (std::distance(x.cbegin(), j) % 2 == 0.0) ? 1 : 0;
149  BOOST_REQUIRE_EQUAL(static_cast<int>(*j),
150  static_cast<int>(ex));
151  }
152  for (auto j = std::begin(x); j != std::end(x); ++j)
153  {
154  std::int8_t ex
155  = (std::distance(x.begin(), j) % 2 == 0.0) ? 1 : 0;
156  BOOST_REQUIRE_EQUAL(static_cast<int>(*j),
157  static_cast<int>(ex));
158  }
159  for (std::size_t j = 0; j < x.size(); ++j)
160  {
161  std::int8_t ex = (j % 2 == 0.) ? 1 : 0;
162  BOOST_REQUIRE_EQUAL(static_cast<int>(x[j]),
163  static_cast<int>(ex));
164  }
165  std::size_t j = 0;
166  for (auto xj : x)
167  {
168  std::int8_t ex = (j % 2 == 0.) ? 1 : 0;
169  BOOST_REQUIRE_EQUAL(static_cast<int>(xj),
170  static_cast<int>(ex));
171  ++j;
172  }
173  }
174 }
175 
176 BOOST_AUTO_TEST_CASE(test_const_row_views)
177 {
178  for (std::size_t i = 0; i < m.nsites; ++i)
179  {
180  auto x = Sequence::get_ConstRowView(m, i);
181 
182  for (auto j = x.begin(); j != x.end(); ++j)
183  {
184  std::int8_t ex
185  = (std::distance(x.begin(), j) % 2 == 0.0) ? 1 : 0;
186  BOOST_REQUIRE_EQUAL(static_cast<int>(*j),
187  static_cast<int>(ex));
188  }
189  for (auto j = x.cbegin(); j != x.cend(); ++j)
190  {
191  std::int8_t ex
192  = (std::distance(x.cbegin(), j) % 2 == 0.0) ? 1 : 0;
193  BOOST_REQUIRE_EQUAL(static_cast<int>(*j),
194  static_cast<int>(ex));
195  }
196  for (auto j = std::begin(x); j != std::end(x); ++j)
197  {
198  std::int8_t ex
199  = (std::distance(x.begin(), j) % 2 == 0.0) ? 1 : 0;
200  BOOST_REQUIRE_EQUAL(static_cast<int>(*j),
201  static_cast<int>(ex));
202  }
203  for (std::size_t j = 0; j < x.size(); ++j)
204  {
205  std::int8_t ex = (j % 2 == 0.) ? 1 : 0;
206  BOOST_REQUIRE_EQUAL(static_cast<int>(x[j]),
207  static_cast<int>(ex));
208  }
209  std::size_t j = 0;
210  for (auto xj : x)
211  {
212  std::int8_t ex = (j % 2 == 0.) ? 1 : 0;
213  BOOST_REQUIRE_EQUAL(static_cast<int>(xj),
214  static_cast<int>(ex));
215  ++j;
216  }
217  }
218 }
219 
220 BOOST_AUTO_TEST_CASE(test_row_view_exceptions)
221 {
222  BOOST_REQUIRE_THROW(Sequence::get_RowView(m, m.nsites + 1),
223  std::exception);
224  BOOST_REQUIRE_THROW(Sequence::get_RowView(m, m.nsites + 1),
225  std::out_of_range);
226 
227  auto r = Sequence::get_RowView(m, 0);
228  BOOST_REQUIRE_THROW(r.at(m.nsam + 1), std::exception);
229  BOOST_REQUIRE_THROW(r.at(m.nsam + 1), std::out_of_range);
230 }
231 
232 BOOST_AUTO_TEST_CASE(test_const_row_view_exceptions)
233 {
234  BOOST_REQUIRE_THROW(Sequence::get_ConstRowView(m, m.nsites + 1),
235  std::exception);
236  BOOST_REQUIRE_THROW(Sequence::get_ConstRowView(m, m.nsites + 1),
237  std::out_of_range);
238 
239  auto r = Sequence::get_ConstRowView(m, 0);
240  BOOST_REQUIRE_THROW(r.at(m.nsam + 1), std::exception);
241  BOOST_REQUIRE_THROW(r.at(m.nsam + 1), std::out_of_range);
242 }
243 
244 BOOST_AUTO_TEST_CASE(test_row_view_iterators)
245 {
246  for (std::size_t i = 0; i < m.nsites; ++i)
247  {
248  auto row = Sequence::get_RowView(m, i);
249  BOOST_REQUIRE_EQUAL(std::distance(row.begin(), row.end()), m.nsam);
250  BOOST_REQUIRE_EQUAL(std::distance(row.cbegin(), row.cend()),
251  m.nsam);
252  }
253 }
254 
255 BOOST_AUTO_TEST_CASE(test_column_views)
256 {
257  for (std::size_t i = 0; i < m.nsam; ++i)
258  {
259  auto col = Sequence::get_ColView(m, i);
260  std::int8_t state = (i % 2 == 0) ? 1 : 0;
261  BOOST_REQUIRE_EQUAL(
262  std::count(std::begin(col), std::end(col), !state), 0);
263  BOOST_REQUIRE_EQUAL(std::count(col.rbegin(), col.rend(), !state),
264  0);
265  BOOST_REQUIRE_EQUAL(std::count(col.cbegin(), col.cend(), !state),
266  0);
267  BOOST_REQUIRE_EQUAL(std::count(col.crbegin(), col.crend(), !state),
268  0);
269 
270  BOOST_REQUIRE_EQUAL(std::distance(std::begin(col), std::end(col)),
271  m.nsites);
272  BOOST_REQUIRE_EQUAL(std::distance(col.rbegin(), col.rend()),
273  m.nsites);
274 
275  // Check that iterators and reverse iterators have the expected
276  // relationships:
277  auto fwd = col.begin();
278  auto rev = col.rbegin();
279  for (; rev < col.rend(); ++rev)
280  {
281  auto rf = std::distance(fwd, rev.base());
282  auto rb = std::distance(rev, col.rend());
283  BOOST_REQUIRE_EQUAL(rf, rb);
284  }
285 
286  auto cfwd = col.cbegin();
287  auto crev = col.crbegin();
288  for (; crev < col.crend(); ++crev)
289  {
290  auto rf = std::distance(cfwd, crev.base());
291  auto rb = std::distance(crev, col.crend());
292  BOOST_REQUIRE_EQUAL(rf, rb);
293  }
294  }
295 }
296 
297 BOOST_AUTO_TEST_CASE(tesl_col_view_iterator_increment)
298 {
299  auto x = Sequence::get_ConstColView(m, 0);
300  auto b = x.begin();
301  unsigned num_increments = 0;
302  while (b < x.end())
303  {
304  b = b + 2;
305  ++num_increments;
306  }
307  BOOST_REQUIRE_EQUAL(num_increments, 3);
308 }
309 
310 BOOST_AUTO_TEST_CASE(test_column_view_invalid_compare)
311 {
312  auto c0 = Sequence::get_ConstColView(m, 0);
313  auto c1 = Sequence::get_ConstColView(m, 1);
314  BOOST_REQUIRE_NO_THROW(std::distance(c0.begin(), c0.end()));
315  BOOST_REQUIRE_THROW(std::distance(c0.begin(), c1.begin()),
316  std::invalid_argument);
317 }
318 
319 // The remaining tests apply STL algorithms to column iterators,
320 // which is a good stress test. We've already done count above.
321 
322 BOOST_AUTO_TEST_CASE(test_accumulate)
323 {
324  auto c = Sequence::get_ConstColView(m, 0);
325  int sum = static_cast<int>(std::accumulate(c.cbegin(), c.cend(), 0));
326  BOOST_REQUIRE_EQUAL(sum, static_cast<int>(m.nsites));
327  c = Sequence::get_ConstColView(m, 1);
328  sum = static_cast<int>(std::accumulate(c.cbegin(), c.cend(), 0));
329  BOOST_REQUIRE_EQUAL(sum, 0);
330 }
331 BOOST_AUTO_TEST_SUITE_END()
332 
333 BOOST_FIXTURE_TEST_SUITE(VariantMatrixWindowTest, dataset)
334 
335 BOOST_AUTO_TEST_CASE(tests_windows_size_0)
336 {
337  auto w = Sequence::make_window(m, 10, 10);
338  BOOST_REQUIRE_EQUAL(w.nsam, 0);
339  BOOST_REQUIRE_EQUAL(w.nsites, 0);
340 }
341 
342 BOOST_AUTO_TEST_CASE(tests_windows_size_1)
343 {
344  for (std::size_t i = 0; i < m.positions.size(); ++i)
345  {
346  auto w = Sequence::make_window(m, m.positions[i], m.positions[i]);
347  BOOST_REQUIRE_EQUAL(w.positions[0], m.positions[i]);
348  auto r = Sequence::get_ConstRowView(m, i);
349  BOOST_REQUIRE_EQUAL(
350  std::mismatch(w.data.begin(), w.data.end(), r.begin()).first
351  == w.data.end(),
352  true);
353  }
354 }
355 
356 BOOST_AUTO_TEST_CASE(test_windows_multi_site)
357 {
358  auto w = Sequence::make_window(m, 0.11, 0.29);
359  BOOST_REQUIRE_EQUAL(w.nsites, 1);
360  BOOST_REQUIRE_EQUAL(w.positions[0], 0.2);
361 }
362 
363 BOOST_AUTO_TEST_SUITE_END()
364 
365 BOOST_FIXTURE_TEST_SUITE(test_AlleleCountMatrixOperations, dataset)
366 
367 BOOST_AUTO_TEST_CASE(test_slice)
368 {
370  auto w = Sequence::make_window(m, 0, 0.25);
372  std::vector<int> c;
373  for (std::size_t i = 0; i < 2; ++i)
374  {
375  auto r = am.row(i);
376  c.insert(c.end(), r.first, r.second);
377  }
378  Sequence::AlleleCountMatrix amslice(std::move(c), 2, 2, m.nsam);
379  BOOST_REQUIRE_EQUAL(amw.counts == amslice.counts, true);
380  BOOST_REQUIRE_EQUAL(amw.ncol, amslice.ncol);
381  BOOST_REQUIRE_EQUAL(amw.nrow, amslice.nrow);
382  BOOST_REQUIRE_EQUAL(amw.nsam, amslice.nsam);
383 }
384 
385 BOOST_AUTO_TEST_SUITE_END()
const std::int8_t max_allele
Max allelic value stored in matrix.
VariantMatrix make_window(const VariantMatrix &m, const double beg, const double end)
Return a window from a VariantMatrix.
Definition: windows.cc:6
std::int8_t & at(const std::size_t site, const std::size_t haplotype)
Get data from marker site and haplotype haplotype. std::out_of_range is thrown if indexes are invalid...
ConstRowView get_RowView(const VariantMatrix &m, const std::size_t row)
Return a ConstRowView from VariantMatrix m at row row. std::out_of_range is thrown if row is out of r...
std::size_t nsites
Number of sites in data set.
std::vector< double > positions
Position of sites.
ConstRowView get_ConstRowView(const VariantMatrix &m, const std::size_t row)
Return a ConstRowView from VariantMatrix m at row row. std::out_of_range is thrown if row is out of r...
Matrix representation of variation data.
ColView get_ColView(VariantMatrix &m, const std::size_t col)
Return a ColView from VariantMatrix m at col col. std::out_of_range is thcoln if col is out of range...
std::size_t nsam
Sample size of data set.
std::int8_t & get(const std::size_t site, const std::size_t haplotype)
Get data from marker site and haplotype haplotype. No range-checking is done.
ConstColView get_ConstColView(VariantMatrix &m, const std::size_t col)
Return a ConstColView from VariantMatrix m at col col. std::out_of_range is thcoln if col is out of r...
std::vector< std::int8_t > data
Data stored in matrix form with rows as sites.
Matrix representation of allele counts in a VariantMatrix To be constructed.