libsequence  1.9.5
PolySIM.cc
1 /*
2 
3 Copyright (C) 2003-2009 Kevin Thornton, krthornt[]@[]uci.edu
4 
5 Remove the brackets to email me.
6 
7 This file is part of libsequence.
8 
9 libsequence is free software: you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation, either version 3 of the License, or
12 (at your option) any later version.
13 
14 libsequence is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18 
19 You should have received a copy of the GNU General Public License
20 long with libsequence. If not, see <http://www.gnu.org/licenses/>.
21 
22 */
23 
24 #include <Sequence/PolySIM.hpp>
25 #include <Sequence/PolySNPimpl.hpp>
28 #include <Sequence/SimData.hpp>
29 #include <cassert>
30 #include <cfloat>
31 #include <cmath>
32 #include <cstdlib>
33 #include <limits>
38 using namespace Sequence::Recombination;
39 
40 namespace Sequence
41 {
42  PolySIM::PolySIM(const Sequence::SimData *data)
43  : PolySNP(data, false, 0, true)
47  {
48  rep->_NumPoly = NumPoly();
49  }
50 
51  PolySIM::~PolySIM(void) {}
52 
53  double
54  PolySIM::ThetaPi(void) const
59  {
60  std::lock_guard<std::mutex> lock(rep->instance_lock);
61  if (rep->_know_pi == false)
62  {
63  double Pi = 0.0, nsam = double(rep->_nsam);
64  for (std::vector<stateCounter>::const_iterator i
65  = rep->_counts.begin();
66  i != rep->_counts.end(); ++i)
67  {
68  Pi += (2.0 * i->one * (nsam - i->one))
69  / (nsam * (nsam - 1.));
70  }
71  rep->_pi = Pi;
72  rep->_know_pi = true;
73  }
74  return rep->_pi;
75  }
76 
77  double
78  PolySIM::ThetaW(void) const
84  {
85  return (rep->_NumPoly > 0) ? (double(rep->_NumPoly) / a_sub_n()) : 0.;
86  }
87 
88  double
89  PolySIM::ThetaH(void) const
95  {
96  double H = 0.0, nsam = double(rep->_nsam);
97 
98  for (std::vector<stateCounter>::const_iterator i
99  = rep->_counts.begin();
100  i != rep->_counts.end(); ++i)
101  {
102  H += (i->one < nsam)
103  ? (2.0 * i->one * i->one) / (nsam * (nsam - 1.0))
104  : 0.;
105  }
106  return H;
107  }
108 
109  double
110  PolySIM::ThetaL(void) const
116  {
117  if (rep->_NumPoly == 0)
118  return 0.;
119  const char state = '1';
120  unsigned site;
121  unsigned seq;
122  int num_changes;
123  double thetal = 0.0, nsam = double(rep->_nsam);
124 
125  for (site = 0; site < rep->_data->numsites(); ++site)
126  {
127  for (seq = 0, num_changes = 0; seq < unsigned(nsam); ++seq)
128  {
129  num_changes
130  += (*rep->_data)[seq][site] == state ? 1 : 0;
131  }
132  if (num_changes < nsam)
133  {
134  double nc = double(num_changes);
135  thetal += nc / (nsam - 1.0);
136  }
137  }
138  return thetal;
139  }
140 
141  double
142  PolySIM::TajimasD(void) const
143  {
144  if (rep->_NumPoly == 0)
145  return std::numeric_limits<double>::quiet_NaN();
146  double Pi = ThetaPi();
147  double W = ThetaW();
148  return ((Pi - W) / Dnominator());
149  }
150 
151  double
152  PolySIM::Hprime(const bool &likeThorntonAndolfatto) const
157  {
158  if (rep->_NumPoly == 0)
159  return std::numeric_limits<double>::quiet_NaN();
160  double pi = ThetaPi();
161  double theta = ThetaW();
162  double thetal = ThetaL();
163  double a = a_sub_n();
164  double b = b_sub_n();
165  double b_n_plus1 = b_sub_n_plus1();
166  double S = rep->_NumPoly;
167  double thetasq = (likeThorntonAndolfatto == false)
168  ? S * (S - 1) / (a * a + b)
169  : theta * theta;
170 
171  double vThetal = (rep->_nsam * theta) / (2.0 * (rep->_nsam - 1.0))
172  + (2.0 * pow(rep->_nsam / (rep->_nsam - 1.0), 2.0)
173  * (b_n_plus1 - 1.0)
174  - 1.0)
175  * thetasq;
176 
177  double vPi
178  = (3.0 * rep->_nsam * (rep->_nsam + 1.0) * theta
179  + 2.0 * (rep->_nsam * rep->_nsam + rep->_nsam + 3.0) * thetasq)
180  / (9 * rep->_nsam * (rep->_nsam - 1.0));
181 
182  double cov = (rep->_nsam + 1.0) * theta / (3.0 * (rep->_nsam - 1.0))
183  + (7.0 * rep->_nsam * rep->_nsam + 3.0 * rep->_nsam - 2.0
184  - 4.0 * rep->_nsam * (rep->_nsam + 1.0) * b_n_plus1)
185  * thetasq / (2.0 * pow((rep->_nsam - 1.0), 2.0));
186 
187  double Hpr = pi - thetal;
188  Hpr /= pow((vThetal + vPi - 2.0 * cov), 0.5);
189  return (Hpr);
190  }
191 
192  double
193  PolySIM::Dnominator(void) const
194  {
195  if (rep->_NumPoly == 0)
196  return std::numeric_limits<double>::quiet_NaN();
197  double S = rep->_NumPoly;
198  double a1, a2, b1, b2, c1, c2, e1, e2;
199 
200  a1 = a_sub_n();
201  a2 = b_sub_n();
202  b1 = (rep->_nsam + 1.0) / (3.0 * (rep->_nsam - 1.0));
203  b2 = (2.0 * (pow(rep->_nsam, 2.0) + rep->_nsam + 3.0))
204  / (9.0 * rep->_nsam * (rep->_nsam - 1.0));
205  c1 = b1 - 1.0 / a1;
206  c2 = b2 - (rep->_nsam + 2.0) / (a1 * rep->_nsam) + a2 / pow(a1, 2.0);
207  e1 = c1 / a1;
208  e2 = c2 / (pow(a1, 2.0) + a2);
209  double denominator = pow((e1 * S + e2 * S * (S - 1.0)), 0.5);
210  return (denominator);
211  }
212 
213  int
214  PolySIM::HudsonsHaplotypeTest(const int &subsize, const int &subss) const
225  {
226  int *subslist, i, seq;
227  bool sflag, flag;
228  flag = 0;
229  sflag = 1;
230 
231  subslist = new int[subsize];
232 
233  for (i = 0; i < subsize; ++i)
234  subslist[i] = i;
235 
236  while (sflag)
237  {
238  int npoly
239  = poly(subslist, int(rep->_nsites), subsize, subss, &seq);
240  if (npoly > subss)
241  sflag
242  = nextsample(subslist, subsize, int(rep->_nsam), seq);
243  else
244  {
245  flag = 1;
246  break;
247  }
248  }
249  if (flag == 1)
250  {
251  delete[] subslist;
252  return (1);
253  }
254  else
255  {
256  delete[] subslist;
257  return (0);
258  }
259  }
260 
261  int
262  PolySIM::poly(int *subslist, const int &ss, const int &subsize,
263  const int &subss, int *seq) const
272  {
273  int npoly = 0, i, j, *polyvector;
274 
275  polyvector = new int[ss];
276  for (i = 0; i < ss; ++i)
277  polyvector[i] = 0;
278 
279  for (i = 1; i < subsize; ++i)
280  for (j = 0; j < ss; ++j)
281  if ((*rep->_data)[PolyTable::size_type(subslist[i])]
282  [std::string::size_type(j)]
283  != (*rep->_data)[PolyTable::size_type(subslist[0])]
284  [std::string::size_type(j)])
285  {
286  if (polyvector[j] == 0)
287  {
288  polyvector[j] = 1;
289  npoly++;
290  if (npoly == subss + 1)
291  *seq = i;
292  }
293  }
294  delete[] polyvector;
295  return (npoly);
296  }
297 
298  int
299  PolySIM::nextsample(int *subslist, const int &subsize, const int &nsam,
300  int seq) const
309  {
310  int i;
311  subslist[seq]++;
312  if (subslist[seq] > nsam - subsize + seq)
313  {
314  seq--;
315 
316  if (seq < 0)
317  return (0);
318 
319  return (nextsample(subslist, subsize, nsam, seq));
320  }
321 
322  for (i = seq + 1; i < subsize; i++)
323  subslist[i] = subslist[i - 1] + 1;
324  return (1);
325  }
326 
327  double
328  PolySIM::FuLiD(void) const
329  {
330  if (rep->_NumPoly == 0)
331  return std::numeric_limits<double>::quiet_NaN();
332  double ExternalMutations = double(NumExternalMutations());
333  double NumMut = double(NumMutations());
334  double a = a_sub_n();
335  double b = b_sub_n();
336  double c = c_sub_n();
337 
338  double vD = 1.0 + (pow(a, 2.0) / (b + pow(a, 2.0))
339  * (c - (rep->_nsam + 1.0) / (rep->_nsam - 1.0)));
340  double uD = a - 1.0 - vD;
341  double D = NumMut - a * double(ExternalMutations);
342  D /= pow((uD * NumMut + vD * pow(NumMut, 2.0)), 0.5);
343  return (D);
344  }
345 
346  double
347  PolySIM::FuLiF(void) const
348  {
349  if (rep->_NumPoly == 0)
350  return std::numeric_limits<double>::quiet_NaN();
351  double Pi = ThetaPi();
352  double NumMut = double(NumMutations());
353  double ExternalMutations = double(NumExternalMutations());
354  double a = a_sub_n();
355  double a_n_plus1 = a_sub_n_plus1();
356  double b = b_sub_n();
357  double c = c_sub_n();
358  double vF = c
359  + 2.0 * (pow(rep->_nsam, 2.0) + rep->_nsam + 3.0)
360  / (9.0 * rep->_nsam * (double(rep->_nsam - 1.0)));
361  vF -= (2.0 / (rep->_nsam - 1.0));
362  vF /= (pow(a, 2.0) + b);
363 
364  double uF
365  = 1.0 + (rep->_nsam + 1.0) / (3.0 * (double(rep->_nsam - 1.0)));
366  uF -= 4.0 * ((rep->_nsam + 1.0) / (pow(rep->_nsam - 1.0, 2.0)))
367  * (a_n_plus1 - 2.0 * rep->_nsam / (rep->_nsam + 1.0));
368  uF /= a;
369  uF -= vF;
370 
371  double F = Pi - ExternalMutations;
372  F /= pow(uF * NumMut + vF * pow(NumMut, 2.0), 0.5);
373  return (F);
374  }
375 
376  double
377  PolySIM::FuLiDStar(void) const
378  {
379  if (rep->_NumPoly == 0)
380  return std::numeric_limits<double>::quiet_NaN();
381  double Singletons = double(NumSingletons());
382  double NumMut = double(NumMutations());
383 
384  double a = a_sub_n();
385  double b = b_sub_n();
386  double d = d_sub_n();
387 
388  double vD = pow(rep->_nsam / (rep->_nsam - 1.0), 2.0) * b;
389  vD += pow(a, 2.0) * d;
390  vD -= 2.0 * (rep->_nsam * a * (a + 1.0))
391  / (pow(double(rep->_nsam - 1.0), 2.0));
392  vD /= (pow(a, 2.0) + b);
393 
394  double uD = (rep->_nsam / (rep->_nsam - 1.0))
395  * (a - (rep->_nsam / (rep->_nsam - 1.0)))
396  - vD;
397 
398  double DStar = (rep->_nsam / (rep->_nsam - 1.0)) * NumMut
399  - a * double(Singletons);
400  DStar /= pow(uD * NumMut + vD * pow(NumMut, 2.0), 0.5);
401  return (DStar);
402  }
403 
404  double
405  PolySIM::FuLiFStar(void) const
406  {
407  if (rep->_NumPoly == 0)
408  return std::numeric_limits<double>::quiet_NaN();
409  double Singletons = double(NumSingletons());
410  double Pi = ThetaPi();
411  double NumMut = double(NumMutations());
412 
413  double a = a_sub_n();
414  double a_n_plus1 = a_sub_n_plus1();
415  double b = b_sub_n();
416  // vF is taken from the correction published by
417  // Simonsen et al. (1995) Genetics 141: 413, eqn A5
418  double vF = 2.0 * pow(rep->_nsam, 3.0) + 110.0 * pow(rep->_nsam, 2.0)
419  - 255.0 * rep->_nsam + 153.0;
420  vF /= (9.0 * pow(rep->_nsam, 2.0) * (rep->_nsam - 1.0));
421  vF += (((2.0 * (rep->_nsam - 1.0) * a) / pow(rep->_nsam, 2.0))
422  - (8.0 * b / rep->_nsam));
423  vF /= (pow(a, 2.0) + b);
424 
425  double uF = (4.0 * pow(rep->_nsam, 2.0) + 19.0 * rep->_nsam + 3.0
426  - 12.0 * (rep->_nsam + 1.0) * a_n_plus1);
427  uF /= (3.0 * rep->_nsam * (rep->_nsam - 1.0));
428  uF /= a;
429  uF -= vF;
430  double FStar
431  = Pi - (((rep->_nsam - 1.0) / rep->_nsam)) * double(Singletons);
432  FStar /= pow((uF * NumMut + vF * pow(NumMut, 2.0)), 0.5);
433  return (FStar);
434  }
435 
436  unsigned
437  PolySIM::NumMutations(void) const
441  {
442  return rep->_NumPoly;
443  }
444 
445  // count the number of singletons
446  // only works for infinite sites
447  unsigned
448  PolySIM::NumSingletons(void) const
455  {
456  if (rep->_counted_singletons == false)
457  {
458  unsigned singletonCount = 0;
459  singletonCount = 0;
460  for (std::vector<stateCounter>::const_iterator i
461  = rep->_counts.begin();
462  i != rep->_counts.end(); ++i)
463  {
464  singletonCount
465  += (i->one == 1 || i->zero == 1) ? 1u : 0u;
466  }
467  rep->_counted_singletons = true;
468  rep->_singletons = singletonCount;
469  return (singletonCount);
470  }
471  else
472  return rep->_singletons;
473 
474  return rep->_singletons;
475  }
476 
477  unsigned
478  PolySIM::NumExternalMutations(void) const
488  {
489  unsigned numExternal = 0;
490  for (std::vector<stateCounter>::const_iterator i
491  = rep->_counts.begin();
492  i != rep->_counts.end(); ++i)
493  {
494  if (i->one == 1)
495  {
496  ++numExternal;
497  }
498  }
499  return (numExternal);
500  }
501 
502  unsigned
503  PolySIM::Minrec(void) const
510  {
511  if (rep->_NumPoly < 2)
512  return SEQMAXUNSIGNED;
513  unsigned a, b, e, numgametes, Rmin = 0, x = 0;
514  bool flag = false;
515 
516  if (rep->_NumPoly < 2 || x >= (rep->_NumPoly - 1))
517  return (0);
518  for (a = x + 1; a < rep->_nsites; ++a)
519  {
520  for (b = (flag == false) ? x : a - 1; b < a; ++b)
521  {
522  flag = false;
523  numgametes = 0;
524  for (e = 0; e < rep->_nsam; ++e)
525  {
526  if ((*rep->_data)[e][b] == '0'
527  && (*rep->_data)[e][a] == '0')
528  {
529  ++numgametes;
530  break;
531  }
532  }
533  for (e = 0; e < rep->_nsam; ++e)
534  {
535  if ((*rep->_data)[e][b] == '0'
536  && (*rep->_data)[e][a] == '1')
537  {
538  ++numgametes;
539  break;
540  }
541  }
542  for (e = 0; e < rep->_nsam; ++e)
543  {
544  if ((*rep->_data)[e][b] == '1'
545  && (*rep->_data)[e][a] == '0')
546  {
547  ++numgametes;
548  break;
549  }
550  }
551  for (e = 0; e < rep->_nsam; ++e)
552  {
553  if ((*rep->_data)[e][b] == '1'
554  && (*rep->_data)[e][a] == '1')
555  {
556  ++numgametes;
557  break;
558  }
559  }
560  if (numgametes == 4)
561  {
562  ++Rmin;
563  flag = true;
564  break;
565  }
566  }
567  if (flag == true)
568  {
569  x = a;
570  }
571  }
572  return Rmin;
573  }
574 
575  void
576  PolySIM::WallStats(void) const
577  {
578  std::lock_guard<std::mutex> lock(rep->instance_lock);
579  if (!rep->_calculated_wall_stats)
580  {
581  unsigned S = rep->_NumPoly;
582  if (S > 1)
583  {
584  unsigned n00, n01, n10, n11;
585  unsigned nhap_curr, nhap_left;
586  unsigned n0site1, n0site2;
587  nhap_left = SEQMAXUNSIGNED;
588  unsigned A = 0; // number of partitions with D' = 1
589  // (see Wall 1999)
590  for (unsigned site1 = 0; site1 < rep->_nsites - 1;
591  ++site1)
592  // iterate over sites (actually, adjacent pairs of
593  // sites)
594  {
595  for (unsigned site2 = site1 + 1;
596  site2 < rep->_nsites; ++site2)
597  {
598  nhap_curr = 0;
599  n00 = n01 = n10 = n11 = n0site1
600  = n0site2 = 0;
601  for (unsigned i = 0; i < rep->_nsam;
602  ++i)
603  {
604  auto site2state
605  = (*rep->_data)[i][site2];
606  switch (
607  (*rep->_data)[i][site1])
608  {
609  case '0':
610  ++n0site1;
611  switch (site2state)
612  {
613  case '0':
614  ++n0site2;
615  ++n00;
616  break;
617  case '1':
618  ++n01;
619  break;
620  }
621  break;
622  case '1':
623  switch (site2state)
624  {
625  case '0':
626  ++n0site2;
627  ++n10;
628  break;
629  case '1':
630  ++n11;
631  break;
632  }
633  break;
634  }
635  }
636  // the if statement checks to make
637  // sure that both sites are polymorphic
638  if ((n0site1 > 0
639  && n0site1 < rep->_nsam)
640  && (n0site2 > 0
641  && n0site2 < rep->_nsam))
642  {
643  nhap_curr += (n00 > 0) ? 1 : 0;
644  nhap_curr += (n01 > 0) ? 1 : 0;
645  nhap_curr += (n10 > 0) ? 1 : 0;
646  nhap_curr += (n11 > 0) ? 1 : 0;
647  if (site1 == 0)
648  {
649  if (nhap_curr == 2)
650  {
651  ++rep->_walls_Bprime;
652  ++A;
653  }
654  }
655  else
656  {
657  if (nhap_curr == 2)
658  ++rep->_walls_Bprime;
659  if (nhap_curr == 2
660  && nhap_left != 2)
661  ++A;
662  }
663  site1 = site2;
664  }
665  nhap_left = nhap_curr;
666  }
667  }
668  rep->_walls_B
669  = double(rep->_walls_Bprime) / (double(S - 1));
670  rep->_walls_Q
671  = (rep->_walls_Bprime + double(A)) / (double(S));
672  }
673  else
674  {
675  rep->_walls_B
676  = std::numeric_limits<double>::quiet_NaN();
677  rep->_walls_Bprime = 0;
678  rep->_walls_Q
679  = std::numeric_limits<double>::quiet_NaN();
680  }
681  }
682  rep->_calculated_wall_stats = true;
683  }
684 
685  double
686  PolySIM::WallsB(void) const
687  {
688  WallStats();
689  return rep->_walls_B;
690  }
691  unsigned
692  PolySIM::WallsBprime(void) const
693  {
694  WallStats();
695  return rep->_walls_Bprime;
696  }
697  double
698  PolySIM::WallsQ(void) const
699  {
700  WallStats();
701  return rep->_walls_Q;
702  }
703 }
double thetal(const AlleleCountMatrix &ac, const std::int8_t refstate)
Zeng et al. .
Methods dealing with recombination.
The namespace in which this library resides.
const unsigned SEQMAXUNSIGNED
Definition: SeqConstants.cc:32
declaration of Sequence::PolySIM, a class to analyze coalescent simulation data
namespace Sequence::Recombination
Declaration of Sequence::SimData, a class representing polymorphism data from coalescent simulations ...
Data from coalescent simulations.