38 PairwiseLDstats::PairwiseLDstats()
39 : i(
std::numeric_limits<double>::quiet_NaN()), j(i), rsq(i), D(i),
40 Dprime(i), skipped(true)
43 namespace Recombination
58 const bool &haveOutgroup,
59 const unsigned &outgroup,
60 const int &totsam,
const int &ss);
62 const bool &haveOutgroup,
63 const unsigned &outgroup,
const int &totsam,
64 vector<int> &list,
const int &ss);
65 void get_h_vals(
const int &ss,
const int &totsam,
67 const bool &haveOutgroup,
const unsigned &outgroup,
68 double *sumhi,
double *sumhisq);
69 double chatsub(
const int &totsam,
const double &sksq,
70 const double &sumhi,
const double &sumhisq);
71 double solveit(
const int &nsam,
const double &stat);
72 double g(
const int &nsam,
const double &c);
76 const bool &haveOutgroup,
77 const unsigned &outgroup,
const int &totsam,
86 double Svar = 0.0, meandiffs = 0.0;
87 double nsam = double(totsam);
88 vector<int> pairdiffs;
89 GetPairDiffs(data, haveOutgroup, outgroup, totsam, pairdiffs,
93 for (i = 0; unsigned(i) < pairdiffs.size(); ++i)
95 meandiffs += double(pairdiffs[i]);
97 meandiffs /= pow(nsam, 2.0);
98 for (i = 0; unsigned(i) < pairdiffs.size(); ++i)
99 Svar += pow((
double(pairdiffs[i]) - meandiffs), 2);
100 Svar /= pow(nsam, 2.);
106 const bool &haveOutgroup,
const unsigned &outgroup,
107 const int &totsam, vector<int> &list,
const int &ss)
116 for (i = 0; int(i) < totsam; ++i)
118 for (j = 0; int(j) < totsam; ++j)
120 if ((!(haveOutgroup))
121 || (haveOutgroup && i != unsigned(outgroup)
122 && j != unsigned(outgroup)))
127 for (k = 0; k < unsigned(ss);
149 list.push_back(ndiffs);
159 get_h_vals(
const int &ss,
const int &totsam,
161 const bool &haveOutgroup,
const unsigned &outgroup,
162 double *sumhi,
double *sumhisq)
181 for (
int i = 0; i < ss; ++i)
186 int samplesize = totsam;
187 for (
unsigned j = 0; j < data->size(); ++j)
189 if ((!(haveOutgroup))
191 && j != unsigned(outgroup)))
201 (*data)[j][
unsigned(i)]);
207 double denom = (double(samplesize)
208 * (double(samplesize) - 1.0));
209 SSH += (Counts.a > 0)
211 * double(Counts.a - 1) / denom
213 SSH += (Counts.g > 0)
215 * double(Counts.g - 1) / denom
217 SSH += (Counts.c > 0)
219 * double(Counts.c - 1) / denom
221 SSH += (Counts.t > 0)
223 * double(Counts.t - 1) / denom
225 SSH += (Counts.zero > 0)
226 ?
double(Counts.zero)
227 * double(Counts.zero - 1)
230 SSH += (Counts.one > 0)
232 * double(Counts.one - 1)
235 *sumhi += (1.0 - SSH);
236 *sumhisq += pow(1.0 - SSH, 2.0);
242 chatsub(
const int &totsam,
const double &sksq,
const double &sumhi,
243 const double &sumhisq)
249 double stat, thetahat, estimate;
251 stat = (sksq - sumhi + sumhisq) / (thetahat * thetahat);
252 estimate = solveit(totsam, stat);
257 solveit(
const int &totsam,
const double &stat)
267 xsmall = std::numeric_limits<float>::epsilon();
269 if (fabs(g(totsam, xsmall)) - stat
270 <= std::numeric_limits<double>::epsilon())
273 while (fabs(g(totsam, xbig)) - stat
274 >= std::numeric_limits<double>::epsilon()
278 && fabs(g(totsam, xbig) - stat)
279 > std::numeric_limits<double>::epsilon())
284 while ((xbig - xsmall) > std::numeric_limits<float>::epsilon())
286 double xtemp = (xbig + xsmall) / 2.;
287 if (fabs(g(totsam, xtemp)) - stat
288 > std::numeric_limits<double>::epsilon())
293 return ((xbig + xsmall) / 2.);
297 g(
const int &totsam,
const double &c)
305 double r97, i1, i2, sumpd, n, b1, b2, b3;
312 i2 = (log(((2. * c + y) * x) / ((2. * c + x) * y))) / r97;
313 i1 = (0.5) * log((c * c + 13. * c + 18.) / 18.)
316 = (-c + (c - 1.) * i1 + 2. * (7. * c + 9.) * i2) / (c * c);
317 b1 = 1. / 2. + 1. / c
318 + ((5. - c) * i1 - 18. * (c + 1.) * i2) / (c * c);
319 b2 = -1. / 2. + 2. / c
320 - 2. * ((c + 9.) * i1 + 2. * (2. * c + 9.) * i2)
323 + 2. * ((c + 7.) * i1 + 6. * (c + 3.) * i2) / (c * c);
324 esk = sumpd + b1 / n + b2 / (n * n) + b3 / (n * n * n);
330 const unsigned &outgroup)
340 int totsam = int(data->size());
343 int ss = int((*data)[0].length());
344 double sumhi = 0.0, sumhisq = 0.0;
345 get_h_vals(ss, totsam, data, haveOutgroup, outgroup, &sumhi,
347 double sksq = CalcSamplingVariance(data, haveOutgroup, outgroup,
349 return (chatsub(totsam, sksq, sumhi, sumhisq));
352 std::vector<PairwiseLDstats>
354 const bool &haveOutgroup,
const unsigned &outgroup,
355 const unsigned &mincount,
const double max_distance)
357 using return_type = std::vector<PairwiseLDstats>;
358 if (data->empty() || data->numsites() < 2)
359 return return_type();
361 auto nsites = data->numsites();
364 for (std::size_t i = 0; i < nsites - 1; ++i)
366 for (std::size_t j = i + 1; j < nsites; ++i)
368 rv.push_back(
PairwiseLD(data, i, j, haveOutgroup,
378 const bool &haveOutgroup,
const unsigned &outgroup,
379 const unsigned &mincount,
const double max_distance)
381 typedef Sequence::PolyTable::size_type USIZE;
382 unsigned ss = data->numsites();
385 throw std::runtime_error(
"site index i is out of range, " 386 + std::string(__FILE__) +
", line" 387 + std::to_string(__LINE__));
391 throw std::runtime_error(
"j must be > i, " 392 + std::string(__FILE__) +
", line" 393 + std::to_string(__LINE__));
397 const char _alphabet[10]
398 = {
'A',
'G',
'C',
'T',
'0',
'1',
'a',
'g',
'c',
't' };
399 static const unsigned alphsize = 6;
400 PolyTable::const_site_iterator beg = data->sbegin();
401 const double pos1 = (beg + i)->first, pos2 = (beg + j)->first;
403 const double op1 = pos1, op2 = pos2;
405 const USIZE datasize = data->size();
406 const USIZE nsam = datasize - USIZE(haveOutgroup);
413 std::pair<char, char> chars1, chars2;
419 std::pair<USIZE, USIZE> counts1, counts2;
426 unsigned states1 = 0;
427 std::string site1((beg + i)->second);
428 for (
unsigned letter = 0; letter < alphsize; ++letter)
430 std::transform(site1.begin(), site1.end(), site1.begin(),
432 if (std::find(site1.begin(), site1.end(),
436 if (chars1.first ==
'Z')
439 std::toupper(_alphabet[letter]));
444 chars1.second = char(
445 std::toupper(_alphabet[letter]));
446 if (chars1.first != chars1.second)
458 site1.replace(outgroup, 1,
"");
460 if (std::abs(data->position(j) - data->position(i))
463 std::string site2((beg + j)->second);
464 std::transform(site2.begin(), site2.end(),
465 site2.begin(), ::toupper);
471 unsigned states2 = 0;
472 for (
unsigned letter = 0; letter < alphsize;
475 if (std::find(site2.begin(), site2.end(),
479 if (chars2.first ==
'Z')
507 site2.replace(outgroup, 1,
"");
511 if (std::find(site1.begin(), site1.end(),
514 && std::find(site2.begin(),
533 if (haveOutgroup ==
false)
539 = unsigned(std::count(
547 = unsigned(std::count(
600 else if (haveOutgroup ==
true)
609 ==
char(std::toupper(
618 = unsigned(std::count(
623 = nsam - counts1.first;
632 ==
char(std::toupper(
641 = unsigned(std::count(
646 = nsam - counts2.first;
654 if (counts1.first >= mincount
655 && counts2.first >= mincount)
664 unsigned counts00 = 0,
709 = double(counts1.first)
716 = double(counts2.first)
719 rv.
D = double(counts11)
722 rv.
rsq = (rv.
D * rv.
D)
725 double dmin = std::max(
727 double dmax = std::min(
The base class for polymorphism tables.
keep track of state counts at a site in an alignment or along a sequence
double i
Position of site i.
double j
Position of site j.
Sequence::PolyTable, a virtual base class for polymorphism tables.
The namespace in which this library resides.
double rsq
$r^2$ between sites i and j
std::vector< PairwiseLDstats > Disequilibrium(const Sequence::PolyTable *data, const bool &haveOutgroup=false, const unsigned &outgroup=0, const unsigned &mincount=1, const double max_distance=std::numeric_limits< double >::max()) __attribute__((deprecated))
Calculate pairwise LD for a Sequence::PolyTable.
Pairwise linkage disequilibrium (LD) stats .
double HudsonsC(const Sequence::PolyTable *data, const bool &haveOutgroup, const unsigned &outgroup) __attribute__((deprecated))
namespace Sequence::Recombination
Declaration of Sequence::SimData, a class representing polymorphism data from coalescent simulations ...
double D
The D statistics.
declaration of Sequence::stateCounter, a class to keep track of nucleotide counts either at a site in...
PairwiseLDstats PairwiseLD(const Sequence::PolyTable *data, unsigned i, unsigned j, const bool &haveOutgroup=false, const unsigned &outgroup=0, const unsigned &mincount=1, const double max_distance=std::numeric_limits< double >::max()) __attribute__((deprecated))
bool skipped
If the site fails and filters, this is true.