libsequence  1.9.5
HKA.cc
1 /*
2 
3 Copyright (C) 2003-2009 Kevin Thornton, krthornt[]@[]uci.edu
4 
5 Remove the brackets to email me.
6 
7 This file is part of libsequence.
8 
9 libsequence is free software: you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation, either version 3 of the License, or
12 (at your option) any later version.
13 
14 libsequence is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18 
19 You should have received a copy of the GNU General Public License
20 long with libsequence. If not, see <http://www.gnu.org/licenses/>.
21 
22 */
23 
24 #include <Sequence/HKA.hpp>
25 #include <cmath>
26 
27 namespace
28 {
29  double Cn(unsigned n)
30  {
31  double cn=0.;
32  for(unsigned i=1;i<n;++i)
33  {
34  cn += 1./double(i);
35  }
36  return cn;
37  }
38 
39  double Cnsq(unsigned n)
40  {
41  double cn=0.;
42  for(unsigned i=1;i<n;++i)
43  {
44  cn += 1./double(i*i);
45  }
46  return cn;
47  }
48 }
49 
50 namespace Sequence
51 {
52  HKAdata::HKAdata() : SA(0),SB(0),D(0),nA(0),nB(0)
56  {
57  }
58 
59  HKAdata::HKAdata(const HKAdata & d) :
60  SA(d.SA),SB(d.SB),D(d.D),nA(d.nA),nB(d.nB)
64  {
65  }
66  HKAdata::HKAdata(unsigned sa,unsigned sb,double d,
67  unsigned na,unsigned nb) :
68  SA(sa),SB(sb),D(d),nA(na),nB(nb)
77  {
78  }
79 
80  HKAresults::HKAresults (const std::vector<double> & _thetas,
81  const std::vector< chisq_tuple > & _chisquareds,
82  const double & _fhat, const double & _That,
83  const double & _xsq,
84  const double & _xsqA,
85  const double & _xsqB)
86  : thetas(_thetas),chisquareds(_chisquareds),fhat(_fhat),
87  That(_That),xsq(_xsq),xsqA(_xsqA),xsqB(_xsqB)
97  {
98  }
99 
100  HKAresults calcHKA ( const std::vector< HKAdata > & data )
109  {
110  double sumD=0.;
111  //get some summary stats that we need
112  double sumTheta = 0.;
113  double fhat = 0.;
114  double cna,cnb=0.;
115  double that=0.;
116  unsigned n=0,sumSb=0;
117  for(unsigned i=0;i<data.size();++i)
118  {
119  sumD += data[i].D; // sum of divergence accross loci
120  cna = Cn(data[i].nA);
121  cnb += (data[i].nB > 1) ? Cn(data[i].nB) : 0.;
122  sumTheta += double(data[i].SA)/cna;
123  sumSb += data[i].SB;
124  n += (data[i].nB>1)?1u:0u;
125  //double f = (data[i].nB>1 && data[i].SA>0) ? (double(data[i].SB)*cna)/(double(data[i].SA)*cnb): 1.;
126  // n += (data[i].nB>=1 && data[i].SA>0) ? 1 : 0;
127  // fhat += f;
128  }
129  cnb /= n;
130  fhat = ( std::isfinite(cnb) && cnb > 0. ) ? double(sumSb)/(sumTheta*cnb) : 1.;
131  that = double(sumD)/sumTheta - (0.5*(1.+fhat));
132  std::vector<double> thetas(data.size());
133  std::vector< HKAresults::chisq_tuple > chisquareds;
134  double xsq = 0.,xsqA = 0., xsqB = 0;
135  //now, get the theta estimates for each locus,
136  //and count up the xsq statistic
137 
138  for(unsigned i=0;i<data.size();++i)
139  {
140  cna = Cn(data[i].nA);
141  cnb = Cn(data[i].nB);
142  thetas[i] = double(double(data[i].SA) +
143  double(data[i].SB)+
144  data[i].D)/
145  (that+0.5*(1+fhat)+cna+fhat*cnb);
146 
147  double ESA = thetas[i]*cna;
148  double VSA = ESA + thetas[i]*thetas[i]*Cnsq(data[i].nA);
149  double ESB = fhat*thetas[i]*cnb;
150  double VSB = ESB + fhat*fhat*thetas[i]*thetas[i]*Cnsq(data[i].nB);
151  double ED = thetas[i]*(that + 0.5*(1.+fhat));
152  double VD = ED + (thetas[i]*0.5*(1.+fhat))*(thetas[i]*0.5*(1.+fhat));
153  double xsq_t=0.,xsqA_t=0.,xsqB_t=0.;
154  const double spA = (double(data[i].SA)-ESA)*(double(data[i].SA)-ESA)/VSA;
155  xsq_t += spA;
156  xsqA_t += spA;
157  double xsqpoly = xsqA_t;
158  const double sp2 = (double(data[i].SB)-ESB)*(double(data[i].SB)-ESB)/VSB;
159  if (std::isfinite(sp2))
160  {
161  xsq_t += sp2;
162  xsqpoly += sp2;
163  }
164  xsqB_t += sp2; //this will be not finite if n=1 in species 2, which is the desired behavior
165  const double d = (data[i].D-ED)*(data[i].D-ED)/VD;
166  xsq_t += d;
167  chisquareds.push_back( std::make_tuple(xsqpoly,d,xsqA_t,xsqB_t) );
168  xsq += xsq_t;
169  xsqA_t += d;
170  xsqB_t += d;
171  xsqA += xsqA_t;
172  xsqB += xsqB_t;
173  }
174  return HKAresults( thetas,chisquareds,fhat,that,xsq,xsqA,xsqB );
175  }
176 }
const double fhat
Definition: HKA.hpp:105
Calculations related to the HKA test.
The namespace in which this library resides.
results of calculations of the HKA test
Definition: HKA.hpp:69
Data from a single locus for an HKA test.
Definition: HKA.hpp:39
const double xsqA
Definition: HKA.hpp:119
std::vector< chisq_tuple > chisquareds
Definition: HKA.hpp:100
HKAresults(const std::vector< double > &_thetas, const std::vector< chisq_tuple > &_chisquareds, const double &_fhat, const double &_That, const double &_xsq, const double &_xsqA, const double &_xsqB)
Definition: HKA.cc:80
const unsigned SA
Definition: HKA.hpp:45
const unsigned nA
Definition: HKA.hpp:54
const double D
Definition: HKA.hpp:50
const double xsq
Definition: HKA.hpp:114
HKAresults calcHKA(const std::vector< HKAdata > &data)
Definition: HKA.cc:100
const std::vector< double > thetas
Definition: HKA.hpp:76
const double xsqB
Definition: HKA.hpp:124