libsequence  1.9.5
PolySNP.cc
1 /*
2 
3 Copyright (C) 2003-2009 Kevin Thornton, krthornt[]@[]uci.edu
4 
5 Remove the brackets to email me.
6 
7 This file is part of libsequence.
8 
9 libsequence is free software: you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation, either version 3 of the License, or
12 (at your option) any later version.
13 
14 libsequence is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18 
19 You should have received a copy of the GNU General Public License
20 long with libsequence. If not, see <http://www.gnu.org/licenses/>.
21 
22 */
23 
24 #include <cmath>
25 #include <cfloat>
26 #include <cassert>
27 #include <utility>
28 #include <cstdlib>
29 #include <cctype>
30 #include <set>
31 #include <limits>
32 #include <algorithm>
33 #include <functional>
34 #include <Sequence/PolyTable.hpp>
35 #include <Sequence/Comparisons.hpp>
37 #include <Sequence/PolySNP.hpp>
40 #include <Sequence/PolySNPimpl.hpp>
41 
42 using std::string;
43 using namespace Sequence::Recombination;
44 
45 namespace Sequence
46 {
47  struct uniqueSeq
48  : public std::binary_function<std::string, std::string, bool>
49  {
50  inline bool
51  operator()(const std::string &l, const std::string &r) const
52  {
53  // use Sequence::Different to prevent missing sites
54  // causing 2 sequences to be labelled as distinct
55  // return ( Different(l,r) &&
56  // std::lexicographical_compare(l.begin(),l.end(),r.begin(),r.end(),lt_nocase())
57  // );
58  return (Different(l, r)
59  && std::lexicographical_compare(
60  l.begin(), l.end(), r.begin(), r.end(),
61  [](const char &__a, const char __b) {
62  return (std::toupper(
63  static_cast<unsigned char>(__a))
64  < std::toupper(
65  static_cast<unsigned char>(__b)));
66  }));
67  }
68  };
69  _PolySNPImpl::_PolySNPImpl(const Sequence::PolyTable *data,
70  const bool &haveOutgroup,
71  const unsigned &outgroup, const bool &totMuts)
72  : _data(data), _nsites(data->numsites()),
73  _nsam(unsigned(data->size())), _outgroup(outgroup),
74  _haveOutgroup(haveOutgroup), _totMuts(totMuts),
75  _totsam(unsigned(data->size())), _DVK(0), _DVH(1.0),
76  _counted_singletons(false), _know_pi(false), _CalculatedDandV(false),
77  _pi(0.), _singletons(0), _walls_Bprime(0), _NumPoly(0), _walls_B(0.),
78  _walls_Q(0.), _calculated_wall_stats(false),
79  _counts(std::vector<Sequence::stateCounter>(
80  _nsites, Sequence::stateCounter('-'))),
81  _derivedCounts(std::vector<std::pair<bool, stateCounter>>(
82  _nsites,
83  std::make_pair<bool, stateCounter>(true, stateCounter('-')))),
84  _preprocessed(false)
85  {
86  if (haveOutgroup)
87  --_totsam; // because one sequence in data is an outgroup!
88  preprocess();
89  }
90 
91  void
108  {
109  if (!_preprocessed)
110  {
111  for (unsigned site = 0; site < _nsites; ++site)
112  {
113  for (unsigned seq = 0; seq < _nsam; ++seq)
114  {
115  // process counts w/o respect to
116  // ancestral or derived
117  if (!_haveOutgroup
118  || (_haveOutgroup && seq != _outgroup))
119  {
120  _counts[site]((*_data)[seq][site]);
121  }
122  // process derived states if outgroup is
123  // present
124  if (_haveOutgroup == true)
125  {
126  // if outgroup state is missing data
127  // or a gap
128  // set the bool for that site to false
129  if (std::toupper(
130  (*_data)[_outgroup][site])
131  == 'N'
132  || (*_data)[_outgroup][site]
133  == '-')
134  {
135  _derivedCounts[site].first
136  = false;
137  }
138  else
139  {
140  // tabulate the derived state
141  _derivedCounts[site].first
142  = true;
143  if (seq != _outgroup
144  && (*_data)[seq][site]
145  != (*_data)
146  [_outgroup]
147  [site])
148  {
149  _derivedCounts[site]
150  .second((
151  *_data)[seq]
152  [site]);
153  }
154  }
155  }
156  else
157  {
158  // if no outgroup, set bool
159  // for site to false
160  _derivedCounts[site].first = false;
161  }
162  }
163  if (_counts[site].nStates() > 1
164  && _counts[site].gap == 0)
165  ++_NumPoly;
166  }
167  _preprocessed = true;
168  }
169  }
170 
171  PolySNP::PolySNP(const Sequence::PolyTable *data, bool haveOutgroup,
172  unsigned outgroup, bool totMuts)
173  : rep(std::unique_ptr<_PolySNPImpl>(
174  new _PolySNPImpl(data, haveOutgroup, outgroup, totMuts)))
187  {
188  }
189 
190  PolySNP::~PolySNP(void) {}
191 
192  double
193  PolySNP::ThetaPi(void) const
228  {
229  assert(rep->_preprocessed);
230  std::lock_guard<std::mutex> lock(rep->instance_lock);
231  if (rep->_know_pi == false)
232  {
233  double Pi = 0.0;
234  for (unsigned i = 0; i < rep->_nsites; ++i)
235  { // iterate over sites
236  if (rep->_counts[i].gap == 0
237  && rep->_counts[i].nStates() > 1)
238  {
239  unsigned samplesize = rep->_totsam;
240  samplesize
241  -= rep->_counts[i].n; // adjust sample size
242  // for missing data
243  if (samplesize > 1)
244  {
245  double SSH
246  = 0.0; // sum of site homozygosity
247  double denom
248  = (double(samplesize)
249  * (double(samplesize) - 1.0));
250  SSH += (rep->_counts[i].a > 0)
251  ? double(rep->_counts[i].a)
252  * double(
253  rep->_counts[i]
254  .a
255  - 1)
256  / denom
257  : 0.;
258  SSH += (rep->_counts[i].g > 0)
259  ? double(rep->_counts[i].g)
260  * double(
261  rep->_counts[i]
262  .g
263  - 1)
264  / denom
265  : 0.;
266  SSH += (rep->_counts[i].c > 0)
267  ? double(rep->_counts[i].c)
268  * double(
269  rep->_counts[i]
270  .c
271  - 1)
272  / denom
273  : 0.;
274  SSH += (rep->_counts[i].t > 0)
275  ? double(rep->_counts[i].t)
276  * double(
277  rep->_counts[i]
278  .t
279  - 1)
280  / denom
281  : 0.;
282  SSH += (rep->_counts[i].zero > 0)
283  ? double(
284  rep->_counts[i].zero)
285  * double(
286  rep->_counts[i]
287  .zero
288  - 1)
289  / denom
290  : 0.;
291  SSH += (rep->_counts[i].one > 0)
292  ? double(
293  rep->_counts[i].one)
294  * double(
295  rep->_counts[i]
296  .one
297  - 1)
298  / denom
299  : 0.;
300  Pi += (1.0 - SSH);
301  }
302  }
303  }
304  rep->_pi = Pi;
305  rep->_know_pi = true;
306  return rep->_pi;
307  }
308  else
309  return rep->_pi;
310  return rep->_pi;
311  }
312 
313  double
314  PolySNP::ThetaW(void) const
338  {
339  assert(rep->_preprocessed);
340  double W = 0.0;
341  for (unsigned i = 0; i < rep->_nsites; ++i)
342  { // iterate over sitesvv
343  if (rep->_counts[i].gap == 0)
344  {
345  unsigned nstates = rep->_counts[i].nStates();
346  unsigned nsam_site = rep->_totsam - rep->_counts[i].n;
347  double denom = 0.0;
348  if (rep->_totMuts == true && nstates >= 2)
349  {
350  for (unsigned i = 1; i < nsam_site; ++i)
351  denom += 1.0 / double(i);
352  W += double(nstates - 1) / denom;
353  }
354  else if (rep->_totMuts == false && nstates >= 2)
355  {
356  for (unsigned i = 1; i < nsam_site; ++i)
357  denom += 1.0 / double(i);
358  W += 1.0 / denom;
359  }
360  }
361  }
362  return (W);
363  }
364 
365  double
366  PolySNP::ThetaH(void) const
402  {
403  assert(rep->_preprocessed);
404  if (rep->_NumPoly == 0)
405  return 0.;
406  if (!rep->_haveOutgroup)
407  return std::numeric_limits<double>::quiet_NaN();
408  double H = 0.0;
409  bool anc_is_present = 0; // is ancestral state present in the ingroup?
410 
411  for (unsigned i = 0; i < rep->_nsites; ++i)
412  { // iterate over sites
413  if (rep->_derivedCounts[i].second.gap == 0)
414  {
415  unsigned samplesize
416  = rep->_totsam; // sample size per site
417  unsigned sumDerCounts = 0;
418  sumDerCounts += rep->_derivedCounts[i].second.a;
419  sumDerCounts += rep->_derivedCounts[i].second.g;
420  sumDerCounts += rep->_derivedCounts[i].second.c;
421  sumDerCounts += rep->_derivedCounts[i].second.t;
422  sumDerCounts += rep->_derivedCounts[i].second.zero;
423  sumDerCounts += rep->_derivedCounts[i].second.one;
424  unsigned ancestralCounts
425  = samplesize - sumDerCounts
426  - rep->_derivedCounts[i].second.n;
427 
428  // if the ancestral state is not missing data, and
429  // the sum of the derived counts + and missing data in
430  // the ingroup
431  // does not equal the sample size, then the ancestral
432  // state is present
433  // at least once in the ingroup, and anc_is_present =
434  // true
435  anc_is_present = (rep->_derivedCounts[i].first == true
436  && ancestralCounts > 0)
437  ? true
438  : false;
439  if (anc_is_present) // ancestral state must be present
440  {
441  // number of derived states seen at this site
442  unsigned numDer
443  = rep->_derivedCounts[i].second.nStates();
444  if (numDer == 1)
445  { // simple if there is only one derived
446  // state inferred
447  samplesize
448  -= rep->_derivedCounts[i].second.n;
449  double denom
450  = (double(samplesize)
451  * (double(samplesize) - 1.0));
452  H += (rep->_derivedCounts[i].second.a
453  > 0)
454  ? (2.0
455  * pow(double(
456  rep->_derivedCounts
457  [i]
458  .second
459  .a),
460  2.0))
461  / denom
462  : 0.;
463  H += (rep->_derivedCounts[i].second.g
464  > 0)
465  ? (2.0
466  * pow(double(
467  rep->_derivedCounts
468  [i]
469  .second
470  .g),
471  2.0))
472  / denom
473  : 0.;
474  H += (rep->_derivedCounts[i].second.c
475  > 0)
476  ? (2.0
477  * pow(double(
478  rep->_derivedCounts
479  [i]
480  .second
481  .c),
482  2.0))
483  / denom
484  : 0.;
485  H += (rep->_derivedCounts[i].second.t
486  > 0)
487  ? (2.0
488  * pow(double(
489  rep->_derivedCounts
490  [i]
491  .second
492  .t),
493  2.0))
494  / denom
495  : 0.;
496  H += (rep->_derivedCounts[i]
497  .second.zero
498  > 0)
499  ? (2.0
500  * pow(double(
501  rep->_derivedCounts
502  [i]
503  .second
504  .zero),
505  2.0))
506  / denom
507  : 0.;
508  H += (rep->_derivedCounts[i].second.one
509  > 0)
510  ? (2.0
511  * pow(double(
512  rep->_derivedCounts
513  [i]
514  .second
515  .one),
516  2.0))
517  / denom
518  : 0.;
519  }
520  else if (numDer == 2
521  && rep->_haveOutgroup) // MUST have
522  // outgroup--else
523  // can't proceed
524  { // use a "missing data" scheme if there
525  // is >1 derived state
526  // iterate over derived states
527  unsigned config[2];
528  unsigned k = 0;
529  if (rep->_derivedCounts[i].second.a
530  > 0)
531  {
532  config[k++]
533  = rep->_derivedCounts[i]
534  .second.a;
535  }
536  if (rep->_derivedCounts[i].second.g
537  > 0)
538  {
539  config[k++]
540  = rep->_derivedCounts[i]
541  .second.g;
542  }
543  if (rep->_derivedCounts[i].second.c
544  > 0)
545  {
546  config[k++]
547  = rep->_derivedCounts[i]
548  .second.c;
549  }
550  if (rep->_derivedCounts[i].second.t
551  > 0)
552  {
553  config[k++]
554  = rep->_derivedCounts[i]
555  .second.t;
556  }
557  if (rep->_derivedCounts[i].second.zero
558  > 0)
559  {
560  config[k++]
561  = rep->_derivedCounts[i]
562  .second.zero;
563  }
564  if (rep->_derivedCounts[i].second.one
565  > 0)
566  {
567  config[k++]
568  = rep->_derivedCounts[i]
569  .second.one;
570  }
571  for (int i = 0; i < 2; ++i)
572  {
573  double sample_size_adjust
574  = (i == 0)
575  ? double(config[1])
576  : double(config[0]);
577  H += (2.0
578  * pow(double(config[i]),
579  2.0))
580  / ((double(samplesize)
581  - sample_size_adjust)
582  * (double(samplesize)
583  - sample_size_adjust
584  - 1.0));
585  }
586  }
587  }
588  }
589  }
590  return H;
591  }
592 
593  double
594  PolySNP::ThetaL(void) const
633  {
634  if (!rep->_haveOutgroup)
635  {
636  return std::numeric_limits<double>::quiet_NaN();
637  }
638  assert(rep->_preprocessed);
639  double thetal = 0.0;
640  if (rep->_NumPoly == 0)
641  return thetal;
642  bool anc_is_present = 0; // is ancestral state present in the ingroup?
643 
644  for (unsigned i = 0; i < rep->_nsites; ++i)
645  { // iterate over sites
646  if (rep->_derivedCounts[i].first == true
647  && rep->_derivedCounts[i].second.gap == 0)
648  {
649  unsigned samplesize
650  = rep->_totsam; // sample size per site
651  unsigned sumDerCounts = 0;
652  sumDerCounts += rep->_derivedCounts[i].second.a;
653  sumDerCounts += rep->_derivedCounts[i].second.g;
654  sumDerCounts += rep->_derivedCounts[i].second.c;
655  sumDerCounts += rep->_derivedCounts[i].second.t;
656  sumDerCounts += rep->_derivedCounts[i].second.zero;
657  sumDerCounts += rep->_derivedCounts[i].second.one;
658  unsigned ancestralCounts
659  = samplesize - sumDerCounts
660  - rep->_derivedCounts[i].second.n;
661 
662  // if the ancestral state is not missing data, and
663  // the sum of the derived counts + and missing data in
664  // the ingroup
665  // does not equal the sample size, then the ancestral
666  // state is present
667  // at least once in the ingroup, and anc_is_present =
668  // true
669  anc_is_present = (rep->_derivedCounts[i].first == true
670  && ancestralCounts > 0)
671  ? true
672  : false;
673  if (anc_is_present) // ancestral state must be present
674  {
675  // number of derived states seen at this site
676  unsigned numDer
677  = rep->_derivedCounts[i].second.nStates();
678  if (numDer == 1)
679  { // simple if there is only one derived
680  // state inferred
681  samplesize
682  -= rep->_derivedCounts[i].second.n;
683  double denom
684  = (double(samplesize) - 1.0);
685  thetal
686  += (rep->_derivedCounts[i].second.a
687  > 0)
688  ? double(
689  rep->_derivedCounts[i]
690  .second.a)
691  / denom
692  : 0.;
693  thetal
694  += (rep->_derivedCounts[i].second.g
695  > 0)
696  ? double(
697  rep->_derivedCounts[i]
698  .second.g)
699  / denom
700  : 0.;
701  thetal
702  += (rep->_derivedCounts[i].second.c
703  > 0)
704  ? double(
705  rep->_derivedCounts[i]
706  .second.c)
707  / denom
708  : 0.;
709  thetal
710  += (rep->_derivedCounts[i].second.t
711  > 0)
712  ? double(
713  rep->_derivedCounts[i]
714  .second.t)
715  / denom
716  : 0.;
717  thetal
718  += (rep->_derivedCounts[i]
719  .second.zero
720  > 0)
721  ? double(
722  rep->_derivedCounts[i]
723  .second.zero)
724  / denom
725  : 0.;
726  thetal
727  += (rep->_derivedCounts[i]
728  .second.one
729  > 0)
730  ? double(
731  rep->_derivedCounts[i]
732  .second.one)
733  / denom
734  : 0.;
735  }
736  else if (numDer == 2
737  && rep->_haveOutgroup) // MUST have
738  // outgroup--else
739  // can't proceed
740  { // use a "missing data" scheme if there
741  // is >1 derived state
742  // iterate over derived states
743  unsigned config[2];
744  unsigned k = 0;
745  if (rep->_derivedCounts[i].second.a
746  > 0)
747  {
748  config[k++]
749  = rep->_derivedCounts[i]
750  .second.a;
751  }
752  if (rep->_derivedCounts[i].second.g
753  > 0)
754  {
755  config[k++]
756  = rep->_derivedCounts[i]
757  .second.g;
758  }
759  if (rep->_derivedCounts[i].second.c
760  > 0)
761  {
762  config[k++]
763  = rep->_derivedCounts[i]
764  .second.c;
765  }
766  if (rep->_derivedCounts[i].second.t
767  > 0)
768  {
769  config[k++]
770  = rep->_derivedCounts[i]
771  .second.t;
772  }
773  if (rep->_derivedCounts[i].second.zero
774  > 0)
775  {
776  config[k++]
777  = rep->_derivedCounts[i]
778  .second.zero;
779  }
780  if (rep->_derivedCounts[i].second.one
781  > 0)
782  {
783  config[k++]
784  = rep->_derivedCounts[i]
785  .second.one;
786  }
787  for (int i = 0; i < 2; ++i)
788  {
789  double sample_size_adjust
790  = (i == 0)
791  ? double(config[1])
792  : double(config[0]);
793  thetal
794  += (double(config[i]))
795  / (double(samplesize)
796  - sample_size_adjust
797  - 1.0);
798  }
799  }
800  }
801  }
802  }
803  return thetal;
804  }
805 
806  unsigned
807  PolySNP::NumPoly(void) const
811  {
812  assert(rep->_preprocessed);
813  unsigned npoly = 0;
814  for (unsigned i = 0; i < rep->_nsites; ++i)
815  { // iterate over sites
816  if (rep->_counts[i].nStates() > 1 && rep->_counts[i].gap == 0)
817  ++npoly;
818  }
819  return npoly;
820  }
821 
822  unsigned
828  {
829  assert(rep->_preprocessed);
830  unsigned nmut = 0;
831 
832  for (unsigned i = 0; i < rep->_nsites; ++i)
833  { // iterate over sites
834  unsigned nstates = (rep->_counts[i].gap == 0)
835  ? rep->_counts[i].nStates()
836  : 0;
837  if (nstates > 1)
838  nmut += nstates - 1;
839  }
840  return nmut;
841  }
842 
843  unsigned
849  {
850  assert(rep->_preprocessed);
851  unsigned nsing = 0;
852  for (unsigned i = 0; i < rep->_nsites; ++i)
853  { // iterate over sites
854  unsigned curr_nsing = 0;
855  unsigned nstates = rep->_counts[i].nStates();
856  if (rep->_counts[i].gap == 0 && nstates > 1)
857  {
858  unsigned nsam = rep->_totsam - rep->_counts[i].n;
859  if (nsam == 2 && nstates == 2) // if n = 2 and there
860  // are 2 states, there
861  // must be 1 singleton
862  curr_nsing = 1;
863  else
864  {
865  curr_nsing
866  += (rep->_counts[i].a == 1) ? 1u : 0u;
867  curr_nsing
868  += (rep->_counts[i].g == 1) ? 1u : 0u;
869  curr_nsing
870  += (rep->_counts[i].c == 1) ? 1u : 0u;
871  curr_nsing
872  += (rep->_counts[i].t == 1) ? 1u : 0u;
873  curr_nsing
874  += (rep->_counts[i].zero == 1) ? 1u : 0u;
875  curr_nsing
876  += (rep->_counts[i].one == 1) ? 1u : 0u;
877  }
878  }
879  nsing += curr_nsing;
880  }
881  return nsing;
882  }
883 
884  unsigned
891  {
892  if (!rep->_haveOutgroup)
893  return SEQMAXUNSIGNED;
894  assert(rep->_preprocessed);
895  unsigned next = 0;
896  for (unsigned i = 0; i < rep->_nsites; ++i)
897  { // iterate over sites
898  unsigned nsam = rep->_totsam;
899  unsigned curr_next = 0;
900  if (rep->_derivedCounts[i].first == true
901  && rep->_derivedCounts[i].second.gap == 0)
902  {
903  nsam -= rep->_derivedCounts[i].second.n;
904  curr_next += (rep->_derivedCounts[i].second.a == 1)
905  ? 1u
906  : 0u;
907  curr_next += (rep->_derivedCounts[i].second.g == 1)
908  ? 1u
909  : 0u;
910  curr_next += (rep->_derivedCounts[i].second.c == 1)
911  ? 1u
912  : 0u;
913  curr_next += (rep->_derivedCounts[i].second.t == 1)
914  ? 1u
915  : 0u;
916  curr_next += (rep->_derivedCounts[i].second.zero == 1)
917  ? 1u
918  : 0u;
919  curr_next += (rep->_derivedCounts[i].second.one == 1)
920  ? 1u
921  : 0u;
922  }
923  next += (nsam > 1) ? curr_next : 0u;
924  }
925  return next;
926  }
927 
928  double
929  PolySNP::TajimasD(void) const
937  {
938  assert(rep->_preprocessed);
939  if (rep->_NumPoly == 0)
940  return std::numeric_limits<double>::quiet_NaN();
941  double D = 0.0;
942  double Pi = ThetaPi();
943  double W = ThetaW();
944  if (fabs(Pi - 0.) <= DBL_EPSILON && fabs(W - 0.) <= DBL_EPSILON)
945  D = 0.0;
946  else
947  D = (Pi - W) / Dnominator();
948  return D;
949  }
950 
951  double
952  PolySNP::Hprime(const bool &likeThorntonAndolfatto) const
972  {
973  assert(rep->_preprocessed);
974  if (rep->_NumPoly == 0)
975  return std::numeric_limits<double>::quiet_NaN();
976  assert(rep->_haveOutgroup == true);
977  double a = a_sub_n();
978  double b = b_sub_n();
979  double pi = ThetaPi();
980  double theta = ThetaW();
981 
982  double thetal = ThetaL();
983  double b_n_plus1 = b_sub_n_plus1();
984  double S = (rep->_totMuts) ? NumMutations() : NumPoly();
985  double thetasq = (likeThorntonAndolfatto == false)
986  ? S * (S - 1) / (a * a + b)
987  : theta * theta;
988 
989  double vThetal = (rep->_totsam * theta) / (2.0 * (rep->_totsam - 1.0))
990  + (2.0 * pow(rep->_totsam / (rep->_totsam - 1.0), 2.0)
991  * (b_n_plus1 - 1.0)
992  - 1.0)
993  * thetasq;
994 
995  double vPi
996  = (3.0 * rep->_totsam * (rep->_totsam + 1.0) * theta
997  + 2.0 * (rep->_totsam * rep->_totsam + rep->_totsam + 3.0)
998  * thetasq)
999  / (9 * rep->_totsam * (rep->_totsam - 1.0));
1000 
1001  double cov
1002  = ((rep->_totsam + 1.0) / (3.0 * (rep->_totsam - 1.0))) * theta
1003  + ((7.0 * rep->_totsam * rep->_totsam + 3.0 * rep->_totsam - 2.0
1004  - 4.0 * rep->_totsam * (rep->_totsam + 1.0) * b_n_plus1)
1005  / (2.0 * pow((rep->_totsam - 1.0), 2.0)))
1006  * thetasq;
1007 
1008  double Hpr = pi - thetal;
1009  Hpr /= pow((vThetal + vPi - 2.0 * cov), 0.5);
1010  return (Hpr);
1011  }
1012 
1013  double
1020  {
1021  assert(rep->_preprocessed);
1022  if (rep->_NumPoly == 0)
1023  return std::numeric_limits<double>::quiet_NaN();
1024  double S = 0.0;
1025  if (rep->_totMuts)
1026  {
1027  S = double(NumMutations());
1028  }
1029  else if (!(rep->_totMuts))
1030  {
1031  S = double(NumPoly());
1032  }
1033  double a1, a2, b1, b2, c1, c2, e1, e2;
1034 
1035  a1 = a_sub_n();
1036  a2 = b_sub_n();
1037  b1 = (rep->_totsam + 1.0) / (3.0 * (rep->_totsam - 1.0));
1038  b2 = (2.0 * (pow(rep->_totsam, 2.0) + rep->_totsam + 3.0))
1039  / (9.0 * rep->_totsam * (rep->_totsam - 1.0));
1040  c1 = b1 - 1.0 / a1;
1041  c2 = b2 - (rep->_totsam + 2.0) / (a1 * rep->_totsam)
1042  + a2 / pow(a1, 2.0);
1043  e1 = c1 / a1;
1044  e2 = c2 / (pow(a1, 2.0) + a2);
1045  double denominator = pow((e1 * S + e2 * S * (S - 1.0)), 0.5);
1046  return (denominator);
1047  }
1048 
1049  void
1064  {
1065  assert(rep->_preprocessed);
1066  std::lock_guard<std::mutex> lock(rep->instance_lock);
1067  if (!(rep->_CalculatedDandV))
1068  {
1069  if (rep->_NumPoly == 0)
1070  {
1071  rep->_DVK = 1;
1072  rep->_DVH = 0.;
1073  return;
1074  }
1075  if (rep->_data->size() > 0)
1076  {
1077  // step 1 : determine which sequences are unique in the
1078  // data,
1079  // exluding missing data
1080  std::set<string, uniqueSeq> unique_haplotypes;
1081  if (rep->_haveOutgroup)
1082  {
1083  unique_haplotypes.insert(rep->_data->begin(),
1084  rep->_data->begin()
1085  + rep->_outgroup);
1086  unique_haplotypes.insert(
1087  rep->_data->begin() + rep->_outgroup + 1,
1088  rep->_data->end());
1089  }
1090  else
1091  {
1092  unique_haplotypes.insert(rep->_data->begin(),
1093  rep->_data->end());
1094  }
1095  std::vector<std::string> vuhaps(
1096  unique_haplotypes.size());
1097  rep->_DVK = unsigned(unique_haplotypes.size());
1098  std::move(unique_haplotypes.begin(),
1099  unique_haplotypes.end(), vuhaps.begin());
1100  double homozygosity = 0.0;
1101  for (auto &uh : vuhaps)
1102  {
1103  double c = 0.0;
1104  if (rep->_haveOutgroup)
1105  {
1106  c = static_cast<double>(std::count_if(
1107  rep->_data->begin(),
1108  rep->_data->begin()
1109  + rep->_outgroup,
1110  [&uh](const std::string &__s) {
1111  return !Different(__s, uh,
1112  false, true);
1113  }));
1114  c += static_cast<double>(std::count_if(
1115  rep->_data->begin()
1116  + rep->_outgroup + 1,
1117  rep->_data->end(),
1118  [&uh](const std::string &__s) {
1119  return !Different(__s, uh,
1120  false, true);
1121  }));
1122  }
1123  else
1124  {
1125  c = static_cast<double>(std::count_if(
1126  rep->_data->begin(),
1127  rep->_data->end(),
1128  [&uh](const std::string &__s) {
1129  return !Different(__s, uh,
1130  false, true);
1131  }));
1132  }
1133  c /= static_cast<double>(rep->_totsam);
1134  homozygosity += pow(c, 2.);
1135  }
1136  rep->_DVH -= homozygosity;
1137  rep->_DVH *= rep->_totsam / (rep->_totsam - 1.0);
1138  rep->_CalculatedDandV = 1;
1139  }
1140  }
1141  }
1142 
1143  double
1144  PolySNP::WallsB(void) const
1150  {
1151  assert(rep->_preprocessed);
1152  WallStats();
1153  return rep->_walls_B;
1154  }
1155 
1156  void
1157  PolySNP::WallStats(void) const
1158  {
1159  assert(rep->_preprocessed);
1160  if (!rep->_calculated_wall_stats)
1161  {
1162  unsigned S = 0;
1163  // explicity count # of bi-allelic sites,
1164  // since that's the proper denominator
1165  for (std::vector<stateCounter>::const_iterator itr
1166  = rep->_counts.begin();
1167  itr < rep->_counts.end(); ++itr)
1168  {
1169  if (itr->nStates() == 2 && itr->gap == 0)
1170  ++S;
1171  }
1172  if (S > 1)
1173  {
1174  // std::ptrdiff_t nhap_curr, nhap_left;
1175  std::set<std::basic_string<char>,
1176  Sequence::uniqueSeq>::size_type nhap_curr,
1177  nhap_left;
1178 
1179  nhap_left = SEQMAXUNSIGNED;
1180 
1181  unsigned A = 0; // number of partitions with D' = 1
1182  // (see Wall 1999)
1183  // iterate over sites (actually, adjacent pairs of
1184  // sites)
1185  for (unsigned site1 = 0; site1 < rep->_nsites - 1;
1186  ++site1)
1187  {
1188  for (unsigned site2 = site1 + 1;
1189  site2 < rep->_nsites; ++site2)
1190  {
1191  if (rep->_counts[site1].nStates() == 2
1192  && rep->_counts[site2].nStates()
1193  == 2)
1194  {
1195  std::string config;
1196  config.resize(2);
1197  std::set<string, uniqueSeq>
1198  unique_haplotypes;
1199  nhap_curr = 0;
1200  for (unsigned i = 0;
1201  i < rep->_nsam; ++i)
1202  {
1203  if ((!rep->_haveOutgroup)
1204  || (rep->_haveOutgroup
1205  && i != rep->_outgroup))
1206  {
1207  config[0]
1208  = (*rep->_data)
1209  [i]
1210  [site1];
1211  config[1]
1212  = (*rep->_data)
1213  [i]
1214  [site2];
1215  unique_haplotypes
1216  .insert(
1217  config);
1218  }
1219  }
1220  nhap_curr
1221  = unique_haplotypes.size();
1222  if (site1 == 0)
1223  {
1224  if (nhap_curr == 2)
1225  {
1226  ++rep->_walls_Bprime;
1227  ++A;
1228  }
1229  }
1230  else
1231  {
1232  if (nhap_curr == 2)
1233  ++rep->_walls_Bprime;
1234  if (nhap_curr == 2
1235  && nhap_left != 2)
1236  ++A;
1237  }
1238  nhap_left = nhap_curr;
1239  site1 = site2;
1240  }
1241  }
1242  }
1243  rep->_walls_B
1244  = double(rep->_walls_Bprime) / (double(S - 1));
1245  rep->_walls_Q
1246  = (double(rep->_walls_Bprime) + double(A))
1247  / (double(S));
1248  }
1249  else
1250  {
1251  rep->_walls_B
1252  = std::numeric_limits<double>::quiet_NaN();
1253  rep->_walls_Bprime = 0;
1254  rep->_walls_Q
1255  = std::numeric_limits<double>::quiet_NaN();
1256  }
1257  }
1258  rep->_calculated_wall_stats = true;
1259  }
1260 
1261  unsigned
1268  {
1269  assert(rep->_preprocessed);
1270  WallStats();
1271  return rep->_walls_Bprime;
1272  }
1273 
1274  double
1275  PolySNP::WallsQ(void) const
1281  {
1282  assert(rep->_preprocessed);
1283  WallStats();
1284  return rep->_walls_Q;
1285  }
1286 
1287  double
1288  PolySNP::VarPi(void) const
1294  {
1295  if (rep->_data->empty() || !NumPoly())
1296  return std::numeric_limits<double>::quiet_NaN();
1297  double Pi = ThetaPi();
1298  double variance = 3.0 * rep->_totsam * (rep->_totsam + 1.0) * Pi
1299  + 2.0 * (pow(rep->_totsam, 2.0) + rep->_totsam + 3.0)
1300  * pow(Pi, 2.0);
1301  variance /= (11.0 * pow(rep->_totsam, 2.0) - 7.0 * rep->_totsam + 6.0);
1302  return (variance);
1303  }
1304 
1305  double
1312  {
1313  if (rep->_data->empty() || !NumPoly())
1314  return std::numeric_limits<double>::quiet_NaN();
1315  double Pi = ThetaPi();
1316  double variance
1317  = (3.0 * pow(rep->_totsam, 2.0) - 3.0 * rep->_totsam + 2.0) * Pi
1318  + 2.0 * rep->_totsam * (rep->_totsam - 1.0) * pow(Pi, 2.0);
1319  variance /= (11.0 * pow(rep->_totsam, 2.0) - 7.0 * rep->_totsam + 6.0);
1320  return (variance);
1321  }
1322 
1323  double
1330  {
1331  if (rep->_data->empty() || !NumPoly())
1332  return std::numeric_limits<double>::quiet_NaN();
1333  double Pi = ThetaPi();
1334  double variance = 2.0 * (3.0 * rep->_totsam - 1.0) * Pi
1335  + 2.0 * (2.0 * rep->_totsam + 3.0) * pow(Pi, 2.0);
1336  variance /= (11.0 * pow(rep->_totsam, 2.0) - 7.0 * rep->_totsam + 6.0);
1337  return (variance);
1338  }
1339 
1340  double
1346  {
1347  if (rep->_data->empty() || !NumPoly())
1348  return std::numeric_limits<double>::quiet_NaN();
1349  double a1 = a_sub_n();
1350  double a2 = b_sub_n();
1351  double S = (rep->_totMuts) ? NumMutations() : NumPoly();
1352  double variance = pow(a1, 2.0) * S + a2 * pow(S, 2.0);
1353  variance /= pow(a1, 2.0) * (pow(a1, 2.0) + a2);
1354  return (variance);
1355  }
1356 
1357  // correct
1358  double
1359  PolySNP::FuLiD(void) const
1367  {
1368  assert(rep->_preprocessed);
1369  // assert(rep->_haveOutgroup == true);
1370  if (rep->_NumPoly == 0 || !rep->_haveOutgroup)
1371  return std::numeric_limits<double>::quiet_NaN();
1372  double ExternalMutations = double(NumExternalMutations());
1373  double NumMut = double(NumMutations());
1374  double a = a_sub_n();
1375  double b = b_sub_n();
1376  double c = c_sub_n();
1377  double vD = 1.0
1378  + (pow(a, 2.0) / (b + pow(a, 2.0))
1379  * (c - (rep->_totsam + 1.0) / (rep->_totsam - 1.0)));
1380  double uD = a - 1.0 - vD;
1381  double D = NumMut - a * double(ExternalMutations);
1382  D /= pow((uD * NumMut + vD * pow(NumMut, 2.0)), 0.5);
1383  return (D);
1384  }
1385 
1386  // correct
1387  double
1388  PolySNP::FuLiF(void) const
1395  {
1396  assert(rep->_preprocessed);
1397  if (rep->_NumPoly == 0 || !rep->_haveOutgroup)
1398  return std::numeric_limits<double>::quiet_NaN();
1399  double Pi = ThetaPi();
1400  double NumMut = double(NumMutations());
1401  double ExternalMutations = double(NumExternalMutations());
1402  double a = a_sub_n();
1403  double a_n_plus1 = a_sub_n_plus1();
1404  double b = b_sub_n();
1405  double c = c_sub_n();
1406  double vF
1407  = c
1408  + 2.0 * (pow(rep->_totsam, 2.0) + rep->_totsam + 3.0)
1409  / (9.0 * rep->_totsam * (double(rep->_totsam - 1.0)));
1410  vF -= (2.0 / (rep->_totsam - 1.0));
1411  vF /= (pow(a, 2.0) + b);
1412 
1413  double uF
1414  = 1.0
1415  + (rep->_totsam + 1.0) / (3.0 * (double(rep->_totsam - 1.0)));
1416  uF -= 4.0 * ((rep->_totsam + 1.0) / (pow(rep->_totsam - 1.0, 2.0)))
1417  * (a_n_plus1 - 2.0 * rep->_totsam / (rep->_totsam + 1.0));
1418  uF /= a;
1419  uF -= vF;
1420 
1421  double F = Pi - ExternalMutations;
1422  F /= pow(uF * NumMut + vF * pow(NumMut, 2.0), 0.5);
1423  return (F);
1424  }
1425 
1426  // correct
1427  double
1433  {
1434  assert(rep->_preprocessed);
1435  if (rep->_NumPoly == 0)
1436  return std::numeric_limits<double>::quiet_NaN();
1437  double Singletons = double(NumSingletons());
1438  double NumMut = double(NumMutations());
1439 
1440  double a = a_sub_n();
1441  double b = b_sub_n();
1442  double d = d_sub_n();
1443 
1444  double vD = pow(rep->_totsam / (rep->_totsam - 1.0), 2.0) * b;
1445  vD += pow(a, 2.0) * d;
1446  vD -= 2.0 * (rep->_totsam * a * (a + 1.0))
1447  / (pow(double(rep->_totsam - 1.0), 2.0));
1448  vD /= (pow(a, 2.0) + b);
1449 
1450  double uD = (rep->_totsam / (rep->_totsam - 1.0))
1451  * (a - (rep->_totsam / (rep->_totsam - 1.0)))
1452  - vD;
1453 
1454  double DStar = (rep->_totsam / (rep->_totsam - 1.0)) * NumMut
1455  - a * double(Singletons);
1456  DStar /= pow(uD * NumMut + vD * pow(NumMut, 2.0), 0.5);
1457  return (DStar);
1458  }
1459 
1460  // correct
1461  double
1470  {
1471  assert(rep->_preprocessed);
1472  if (rep->_NumPoly == 0)
1473  return std::numeric_limits<double>::quiet_NaN();
1474  double Singletons = double(NumSingletons());
1475  double Pi = ThetaPi();
1476  double NumMut = double(NumMutations());
1477 
1478  double a = a_sub_n();
1479  double a_n_plus1 = a_sub_n_plus1();
1480  double b = b_sub_n();
1481  // vF is taken from the correction published by
1482  // Simonsen et al. (1995) Genetics 141: 413, eqn A5
1483  double vF = 2.0 * pow(rep->_totsam, 3.0)
1484  + 110.0 * pow(rep->_totsam, 2.0) - 255.0 * rep->_totsam
1485  + 153.0;
1486  vF /= (9.0 * pow(rep->_totsam, 2.0) * (rep->_totsam - 1.0));
1487  vF += (((2.0 * (rep->_totsam - 1.0) * a) / pow(rep->_totsam, 2.0))
1488  - (8.0 * b / rep->_totsam));
1489  vF /= (pow(a, 2.0) + b);
1490 
1491  double uF = (4.0 * pow(rep->_totsam, 2.0) + 19.0 * rep->_totsam + 3.0
1492  - 12.0 * (rep->_totsam + 1.0) * a_n_plus1);
1493  uF /= (3.0 * rep->_totsam * (rep->_totsam - 1.0));
1494  uF /= a;
1495  uF -= vF;
1496  double FStar
1497  = Pi
1498  - (((rep->_totsam - 1.0) / rep->_totsam)) * double(Singletons);
1499  FStar /= pow((uF * NumMut + vF * pow(NumMut, 2.0)), 0.5);
1500  return (FStar);
1501  }
1502 
1503  double
1504  PolySNP::a_sub_n(void) const
1510  {
1511  assert(rep->_preprocessed);
1512  int i;
1513  double a = 0.0;
1514  for (i = 1; i < int(rep->_totsam); ++i)
1515  a += 1. / double(i);
1516  return a;
1517  }
1518 
1519  double
1525  { // used by Fu and Li tests
1526  assert(rep->_preprocessed);
1527  int i;
1528  double a = 0.0;
1529  for (i = 1; i < int(rep->_totsam) + 1; ++i)
1530  {
1531  a += 1. / double(i);
1532  }
1533  return (a);
1534  }
1535 
1536  double
1537  PolySNP::b_sub_n(void) const
1542  { // sum of 1/i^2
1543  assert(rep->_preprocessed);
1544  int i;
1545  double b = 0.0;
1546  for (i = 1; i < int(rep->_totsam); ++i)
1547  b += 1. / (pow(double(i), 2.0));
1548  return b;
1549  }
1550 
1551  double
1558  { // sum of 1/i^2
1559  assert(rep->_preprocessed);
1560  int i;
1561  double b = 0.0;
1562  for (i = 1; i < int(rep->_totsam) + 1; ++i)
1563  b += 1. / (pow(double(i), 2.0));
1564  return b;
1565  }
1566 
1567  double
1568  PolySNP::c_sub_n(void) const
1579  { // from Fu and Li 93
1580  assert(rep->_preprocessed);
1581  double c = 0.0, a = a_sub_n();
1582  if (fabs(rep->_totsam - 2.) <= DBL_EPSILON)
1583  {
1584  c = 1.0;
1585  }
1586  else
1587  {
1588  c = 2.0 * (rep->_totsam * a - 2.0 * (rep->_totsam - 1.0));
1589  c /= ((rep->_totsam - 1.0) * (rep->_totsam - 2.0));
1590  }
1591  return c;
1592  }
1593 
1594  double
1595  PolySNP::d_sub_n(void) const
1601  { // from Fu and Li 93
1602  assert(rep->_preprocessed);
1603  double a_n_plus1, c, d;
1604  a_n_plus1 = a_sub_n_plus1();
1605  c = c_sub_n();
1606  d = c + (rep->_totsam - 2.0) / (pow(rep->_totsam - 1.0, 2.0));
1607  d += (2.0 / (rep->_totsam - 1.0))
1608  * (1.5 - ((2.0 * a_n_plus1 - 3.0) / (rep->_totsam - 2.0))
1609  - 1.0 / rep->_totsam);
1610  return d;
1611  }
1612 
1613  double
1614  PolySNP::DandVH(void) const
1623  {
1624  if (!(rep->_CalculatedDandV))
1626 
1627  return rep->_DVH;
1628  }
1629 
1630  unsigned
1631  PolySNP::DandVK(void) const
1640  {
1641  if (!(rep->_CalculatedDandV))
1643 
1644  return rep->_DVK;
1645  }
1646 
1647  double
1648  PolySNP::HudsonsC(void) const
1656  {
1657  assert(rep->_preprocessed);
1658  if (rep->_NumPoly == 0)
1659  return std::numeric_limits<double>::quiet_NaN();
1660  return (Recombination::HudsonsC(rep->_data, rep->_haveOutgroup,
1661  rep->_outgroup));
1662  }
1663 
1664  unsigned
1665  PolySNP::Minrec(void) const
1672  {
1673  assert(rep->_preprocessed);
1674  if (rep->_NumPoly < 2)
1675  return SEQMAXUNSIGNED;
1676  unsigned a, b, e, numgametes, Rmin = 0, x = 0;
1677  bool flag = false;
1678 
1679  for (a = x + 1; a < rep->_nsites; ++a)
1680  {
1681  char c11, c12;
1682  unsigned states1 = 0;
1683  c11 = c12 = 'Z'; // Z is a dummy value
1684  // count # states in site a
1685  states1 = rep->_counts[a].nStates();
1686 
1687  c11 = (c11 == 'Z' && rep->_counts[a].a > 0) ? 'A' : 'Z';
1688  c11 = (c11 == 'Z' && rep->_counts[a].g > 0) ? 'G' : c11;
1689  c11 = (c11 == 'Z' && rep->_counts[a].c > 0) ? 'C' : c11;
1690  c11 = (c11 == 'Z' && rep->_counts[a].t > 0) ? 'T' : c11;
1691  c11 = (c11 == 'Z' && rep->_counts[a].zero > 0) ? '0' : c11;
1692  c11 = (c11 == 'Z' && rep->_counts[a].one > 0) ? '1' : c11;
1693 
1694  c12 = (c12 == 'Z' && c11 != 'A' && rep->_counts[a].a > 0)
1695  ? 'A'
1696  : 'Z';
1697  c12 = (c12 == 'Z' && c11 != 'G' && rep->_counts[a].g > 0)
1698  ? 'G'
1699  : c12;
1700  c12 = (c12 == 'Z' && c11 != 'C' && rep->_counts[a].c > 0)
1701  ? 'C'
1702  : c12;
1703  c12 = (c12 == 'Z' && c11 != 'T' && rep->_counts[a].t > 0)
1704  ? 'T'
1705  : c12;
1706  c12 = (c12 == 'Z' && c11 != '0' && rep->_counts[a].zero > 0)
1707  ? '0'
1708  : c12;
1709  c12 = (c12 == 'Z' && c11 != '1' && rep->_counts[a].one > 0)
1710  ? '1'
1711  : c12;
1712 
1713  for (b = (flag == false) ? x : a - 1; b < a; ++b)
1714  {
1715  flag = false;
1716  numgametes = 0;
1717  unsigned states2 = rep->_counts[b].nStates();
1718  // need to skip sites with > 2 states
1719  if (states1 == 2 && states2 == 2)
1720  {
1721  char c21 = 'Z', c22 = 'Z';
1722  c21 = (c21 == 'Z' && rep->_counts[b].a > 0)
1723  ? 'A'
1724  : 'Z';
1725  c21 = (c21 == 'Z' && rep->_counts[b].g > 0)
1726  ? 'G'
1727  : c21;
1728  c21 = (c21 == 'Z' && rep->_counts[b].c > 0)
1729  ? 'C'
1730  : c21;
1731  c21 = (c21 == 'Z' && rep->_counts[b].t > 0)
1732  ? 'T'
1733  : c21;
1734  c21 = (c21 == 'Z' && rep->_counts[b].zero > 0)
1735  ? '0'
1736  : c21;
1737  c21 = (c21 == 'Z' && rep->_counts[b].one > 0)
1738  ? '1'
1739  : c21;
1740 
1741  c22 = (c22 == 'Z' && c21 != 'A'
1742  && rep->_counts[b].a > 0)
1743  ? 'A'
1744  : 'Z';
1745  c22 = (c22 == 'Z' && c21 != 'G'
1746  && rep->_counts[b].g > 0)
1747  ? 'G'
1748  : c22;
1749  c22 = (c22 == 'Z' && c21 != 'C'
1750  && rep->_counts[b].c > 0)
1751  ? 'C'
1752  : c22;
1753  c22 = (c22 == 'Z' && c21 != 'T'
1754  && rep->_counts[b].t > 0)
1755  ? 'T'
1756  : c22;
1757  c22 = (c22 == 'Z' && c21 != '0'
1758  && rep->_counts[b].zero > 0)
1759  ? '0'
1760  : c22;
1761  c22 = (c22 == 'Z' && c21 != '1'
1762  && rep->_counts[b].one > 0)
1763  ? '1'
1764  : c22;
1765 
1766  for (e = 0; e < rep->_nsam; ++e)
1767  {
1768  if (!rep->_haveOutgroup
1769  || (rep->_haveOutgroup
1770  && e != rep->_outgroup))
1771  if (toupper((*rep->_data)[e][a])
1772  == c11
1773  && toupper((*rep->_data)[e][b])
1774  == c21)
1775  {
1776  ++numgametes;
1777  break;
1778  }
1779  }
1780  for (e = 0; e < rep->_nsam; ++e)
1781  {
1782  if (!rep->_haveOutgroup
1783  || (rep->_haveOutgroup
1784  && e != rep->_outgroup))
1785  if (toupper((*rep->_data)[e][a])
1786  == c11
1787  && toupper((*rep->_data)[e][b])
1788  == c22)
1789  {
1790  ++numgametes;
1791  break;
1792  }
1793  }
1794  for (e = 0; e < rep->_nsam; ++e)
1795  {
1796  if (!rep->_haveOutgroup
1797  || (rep->_haveOutgroup
1798  && e != rep->_outgroup))
1799  if (toupper((*rep->_data)[e][a])
1800  == c12
1801  && toupper((*rep->_data)[e][b])
1802  == c21)
1803  {
1804  ++numgametes;
1805  break;
1806  }
1807  }
1808  for (e = 0; e < rep->_nsam; ++e)
1809  {
1810  if (!rep->_haveOutgroup
1811  || (rep->_haveOutgroup
1812  && e != rep->_outgroup))
1813  if (toupper((*rep->_data)[e][a])
1814  == c12
1815  && toupper((*rep->_data)[e][b])
1816  == c22)
1817  {
1818  ++numgametes;
1819  break;
1820  }
1821  }
1822  if (numgametes == 4)
1823  {
1824  ++Rmin;
1825  flag = true;
1826  break;
1827  }
1828  }
1829  }
1830  if (flag == true)
1831  x = a;
1832  }
1833  return Rmin;
1834  }
1835 
1836  std::vector<PairwiseLDstats>
1837  PolySNP::Disequilibrium(const unsigned &mincount,
1838  const double &max_marker_distance) const
1854  {
1855  assert(rep->_preprocessed);
1856  if (rep->_NumPoly < 2)
1857  return std::vector<PairwiseLDstats>();
1858  return Recombination::Disequilibrium(rep->_data, rep->_haveOutgroup,
1859  rep->_outgroup, mincount,
1860  max_marker_distance);
1861  }
1862 } // namespace Sequence
The base class for polymorphism tables.
bool operator()(const std::pair< key, value > &l, const std::pair< key, value > &r) const
virtual double FuLiF(void) const
Definition: PolySNP.cc:1388
double thetal(const AlleleCountMatrix &ac, const std::int8_t refstate)
Zeng et al. .
double VarPi(void) const
Definition: PolySNP.cc:1288
Methods dealing with recombination.
void preprocess(void)
Definition: PolySNP.cc:92
virtual unsigned NumMutations(void) const
Definition: PolySNP.cc:823
double DandVH(void) const
Definition: PolySNP.cc:1614
virtual unsigned NumSingletons(void) const
Definition: PolySNP.cc:844
virtual double ThetaH(void) const
Definition: PolySNP.cc:366
double HudsonsC(void) const
Definition: PolySNP.cc:1648
STL namespace.
unsigned NumPoly(void) const
Definition: PolySNP.cc:807
virtual double TajimasD(void) const
Definition: PolySNP.cc:929
Sequence::PolyTable, a virtual base class for polymorphism tables.
The namespace in which this library resides.
virtual double ThetaW(void) const
Definition: PolySNP.cc:314
double b_sub_n(void) const
Definition: PolySNP.cc:1537
virtual unsigned Minrec(void) const
Definition: PolySNP.cc:1665
double VarThetaW(void) const
Definition: PolySNP.cc:1341
unsigned DandVK(void) const
Definition: PolySNP.cc:1631
declaration of Sequence::PolySNP, a class to analyze SNP data
void DepaulisVeuilleStatistics(void) const
Definition: PolySNP.cc:1050
double b_sub_n_plus1(void) const
Definition: PolySNP.cc:1552
double SamplingVarPi(void) const
Definition: PolySNP.cc:1324
virtual double WallsB(void) const
Definition: PolySNP.cc:1144
double a_sub_n_plus1(void) const
Definition: PolySNP.cc:1520
std::vector< PairwiseLDstats > Disequilibrium(const Sequence::PolyTable *data, const bool &haveOutgroup=false, const unsigned &outgroup=0, const unsigned &mincount=1, const double max_distance=std::numeric_limits< double >::max()) __attribute__((deprecated))
Calculate pairwise LD for a Sequence::PolyTable.
PolySNP(const Sequence::PolyTable *data, bool haveOutgroup=false, unsigned outgroup=0, bool totMuts=true)
Definition: PolySNP.cc:171
virtual double FuLiFStar(void) const
Definition: PolySNP.cc:1462
virtual unsigned NumExternalMutations(void) const
Definition: PolySNP.cc:885
const unsigned SEQMAXUNSIGNED
Definition: SeqConstants.cc:32
virtual double Dnominator(void) const
Definition: PolySNP.cc:1014
virtual double ThetaL(void) const
Definition: PolySNP.cc:594
double variance(iterator beg, iterator end)
virtual double ThetaPi(void) const
Definition: PolySNP.cc:193
double c_sub_n(void) const
Definition: PolySNP.cc:1568
double HudsonsC(const Sequence::PolyTable *data, const bool &haveOutgroup, const unsigned &outgroup) __attribute__((deprecated))
virtual unsigned WallsBprime(void) const
Definition: PolySNP.cc:1262
virtual double Hprime(const bool &likeThorntonAndolfatto=false) const
Definition: PolySNP.cc:952
namespace Sequence::Recombination
double a_sub_n(void) const
Definition: PolySNP.cc:1504
std::vector< PairwiseLDstats > Disequilibrium(const unsigned &mincount=1, const double &max_marker_distance=std::numeric_limits< double >::max()) const
Definition: PolySNP.cc:1837
virtual double WallsQ(void) const
Definition: PolySNP.cc:1275
bool Different(const std::string &seq1, const std::string &seq2, const bool &skip_missing=true, const bool &nucleic_acid=true)
Definition: Comparisons.cc:98
double d_sub_n(void) const
Definition: PolySNP.cc:1595
double StochasticVarPi(void) const
Definition: PolySNP.cc:1306
virtual double FuLiDStar(void) const
Definition: PolySNP.cc:1428
declaration of Sequence::stateCounter, a class to keep track of nucleotide counts either at a site in...
delcaration of routines for comparing DNA sequences This file declares a set of functions useful for ...
virtual double FuLiD(void) const
Definition: PolySNP.cc:1359