9 #include <Sequence/VariantMatrix.hpp> 10 #include <Sequence/VariantMatrixViews.hpp> 12 #include "VariantMatrixFixture.hpp" 13 #include <boost/test/unit_test.hpp> 19 for (std::size_t i = 0; i < m.
nsites; ++i)
22 int ndiffs = 0, ncomps = 0;
23 for (std::size_t j = 0; j < m.
nsam - 1; ++j)
25 for (std::size_t k = j + 1; k < m.
nsam; ++k)
27 if (r[j] >= 0 && r[k] >= 0)
38 +=
static_cast<double>(ndiffs) / static_cast<double>(ncomps);
46 double nnm1 =
static_cast<double>(m.
nsam * (m.
nsam - 1));
48 for (std::size_t i = 0; i < m.
nsites; ++i)
59 h += std::pow(nnonref, 2.0);
65 BOOST_FIXTURE_TEST_SUITE(test_classic_stats, dataset)
67 BOOST_AUTO_TEST_CASE(test_thetapi)
70 auto manual = manual_pi(m);
73 BOOST_CHECK_CLOSE(pi, manual, 1e-6);
76 BOOST_AUTO_TEST_CASE(test_thetapi_with_mising_data)
83 for (
int i = 1; i < static_cast<int>(m.
nsites); i += 2)
89 auto manual = manual_pi(m);
92 BOOST_CHECK_CLOSE(pi, manual, 1e-6);
95 BOOST_AUTO_TEST_CASE(test_num_poly_sites)
99 BOOST_REQUIRE_EQUAL(m.
nsites, S);
103 BOOST_REQUIRE_EQUAL(m.
nsites, tm);
107 std::fill(r.begin(), r.end(), 0);
110 BOOST_REQUIRE_EQUAL(S, m.
nsites - 1);
112 BOOST_REQUIRE_EQUAL(m.
nsites - 1, tm);
115 BOOST_AUTO_TEST_CASE(test_total_num_mutations)
123 BOOST_REQUIRE_EQUAL(tm, m.
nsites + 1);
126 BOOST_AUTO_TEST_CASE(test_nbiallelic_sites)
129 BOOST_REQUIRE_EQUAL(S2, m.
nsites);
136 BOOST_REQUIRE_EQUAL(S2, m.
nsites - 1);
139 BOOST_AUTO_TEST_CASE(test_count_alleles)
151 BOOST_REQUIRE_EQUAL(S2, nb);
154 BOOST_AUTO_TEST_CASE(test_thetaw)
163 for (
int i = 1; i < m.
nsam; ++i)
165 d += 1. /
static_cast<double>(i);
168 BOOST_CHECK_CLOSE(w, manual, 1e-6);
171 BOOST_AUTO_TEST_CASE(test_thetah)
176 auto m0 = manual_thetah(m, 0);
177 auto m1 = manual_thetah(m, 1);
178 BOOST_CHECK_CLOSE(h0, m0, 1e-6);
179 BOOST_CHECK_CLOSE(h1, m1, 1e-6);
182 BOOST_AUTO_TEST_CASE(test_faywuh)
192 BOOST_CHECK_EQUAL(fwh + h, pi);
193 BOOST_REQUIRE_EQUAL(pi - h, fwh);
196 BOOST_AUTO_TEST_CASE(test_thetah_multiple_derived_states)
200 for (std::size_t i = 0; i < f.
size(); ++i)
202 f[i] = (i % 2 == 0.0) ? 2 : 3;
211 BOOST_REQUIRE_THROW(
auto h
214 for (std::size_t i = 0; i < f.
size(); ++i)
219 BOOST_REQUIRE_NO_THROW(
223 BOOST_AUTO_TEST_CASE(test_num_haplotypes)
226 BOOST_REQUIRE_EQUAL(nh, 5);
229 BOOST_AUTO_TEST_CASE(test_unique_hap_at_any_index)
231 for (std::size_t i = 0; i < m.
nsam; ++i)
242 std::vector<std::string> haps;
243 for (std::size_t j = 0; j < m2.nsam; ++j)
247 for (
auto state : cvj)
249 haps.push_back(std::move(h));
251 std::set<std::string> uhaps(haps.begin(), haps.end());
252 BOOST_REQUIRE_EQUAL(uhaps.size(), nh);
256 BOOST_AUTO_TEST_CASE(test_haplotype_diversity)
266 std::vector<std::vector<std::int8_t>> haps;
267 for (std::size_t i = 0; i < m.
nsam; ++i)
271 std::vector<std::int8_t>(col.begin(), col.end()));
273 decltype(haps) uhaps;
276 if (std::find(uhaps.begin(), uhaps.end(), h) == uhaps.end())
282 for (
auto& uh : uhaps)
284 auto c = std::count(haps.begin(), haps.end(), uh);
285 mhd +=
static_cast<double>(c * (m.
nsam - c));
287 mhd /=
static_cast<double>(m.
nsam * (m.
nsam - 1));
289 BOOST_CHECK_CLOSE(hd, mhd, 1e-6);
292 BOOST_AUTO_TEST_CASE(test_rmin)
295 BOOST_REQUIRE_EQUAL(rm, 1);
298 BOOST_AUTO_TEST_SUITE_END()
std::vector< AlleleCounts > allele_counts(const AlleleCountMatrix &m)
Count number of alleles at each site.
double thetah(const AlleleCountMatrix &ac, const std::int8_t refstate)
Fay and Wu's .
std::int32_t number_of_haplotypes(const VariantMatrix &m)
Calculate the number of haplotypes in a sample.
size_type size(void) const
std::uint32_t total_number_of_mutations(const AlleleCountMatrix &m)
Total number of mutations in the sample.
ConstRowView get_RowView(const VariantMatrix &m, const std::size_t row)
Return a ConstRowView from VariantMatrix m at row row. std::out_of_range is thrown if row is out of r...
std::uint32_t nvariable_sites(const AlleleCountMatrix &m)
Number of polymorphic sites.
std::size_t nsites
Number of sites in data set.
"Classic" summaries of variation data.
double thetapi(const AlleleCountMatrix &ac)
Mean pairwise differences.
std::vector< double > positions
Position of sites.
double faywuh(const AlleleCountMatrix &ac, const std::int8_t refstate)
Fay and Wu's H.
double thetaw(const AlleleCountMatrix &ac)
Watterson's theta.
ConstRowView get_ConstRowView(const VariantMatrix &m, const std::size_t row)
Return a ConstRowView from VariantMatrix m at row row. std::out_of_range is thrown if row is out of r...
std::int32_t rmin(const VariantMatrix &m)
Matrix representation of variation data.
std::uint32_t nbiallelic_sites(const AlleleCountMatrix &m)
Number of bi-allelic sites.
ColView get_ColView(VariantMatrix &m, const std::size_t col)
Return a ColView from VariantMatrix m at col col. std::out_of_range is thcoln if col is out of range...
std::size_t nsam
Sample size of data set.
ConstColView get_ConstColView(VariantMatrix &m, const std::size_t col)
Return a ConstColView from VariantMatrix m at col col. std::out_of_range is thcoln if col is out of r...
std::vector< std::int8_t > data
Data stored in matrix form with rows as sites.
double haplotype_diversity(const VariantMatrix &m)
Calculate the haplotype diversity of a sample.
Matrix representation of allele counts in a VariantMatrix To be constructed.