40 using std::accumulate;
46 #ifndef DOXYGEN_SKIP //doxygen should skip this 50 typedef std::vector<Sequence::stateCounter> _vCounts;
51 typedef std::vector< _vCounts > _vvCounts;
53 unsigned _nsam, _nsites, _npop;
54 double _piB, _piT, _piS, _piD;
55 vector<unsigned> _config;
56 vector<double> _weights;
57 vector< pair<unsigned,unsigned> > _indexes;
60 typedef std::pair< std::set<char>, std::set<char> > PopStateSets;
61 PopStateSets getPopStateSets(
const unsigned &i,
63 const unsigned & site);
64 FSTimpl(
const PolyTable *data,
unsigned npop,
const unsigned *config,
65 const double *weights,
bool haveOutgroup,
unsigned outgroup);
69 FSTimpl::FSTimpl(
const PolyTable *data,
unsigned npop,
const unsigned *config,
70 const double *weights,
bool haveOutgroup,
unsigned outgroup) :
71 _Counts( _vvCounts(npop,_vCounts(data->numsites()))),
72 _nsam(unsigned(data->size())),
73 _nsites(data->numsites()),_npop(npop),
74 _piB(0.), _piT(0.), _piS(0.), _piD(0.),
75 _indexes( vector< pair<unsigned,unsigned> >(npop) ),
81 throw std::runtime_error(
"Seqence::FST -- config vector is NULL");
85 _config.assign(config,config+npop);
86 if(accumulate(_config.begin(),_config.end(),
87 unsigned(0))+
unsigned(haveOutgroup) != _nsam)
89 throw std::runtime_error(
"Seqence::FST -- sum of elements in config does not equal the sample size stored in data");
95 _weights.assign(_npop, 1./
double(_npop));
99 _weights.assign(weights,weights+npop);
100 double sum = accumulate(_weights.begin(),_weights.end(),0.);
101 if ( std::fabs(sum-1.) > std::numeric_limits<double>::epsilon() )
103 throw std::runtime_error(
"Seqence::FST -- weights do not sum to 1");
107 _indexes[0] = pair<unsigned,unsigned>(0,_config[0]);
108 for(
unsigned i = 1 ; i < _npop ; ++i)
110 _indexes[i] = pair<unsigned,unsigned>(_indexes[i-1].second,
111 _indexes[i-1].second+_config[i]);
113 for (
unsigned i = 0 ; i < _npop ; ++i)
115 for(
unsigned j = 0 ; j < _nsites ; ++j)
117 _Counts[i][j] = stateCounter(
'-');
118 for(
unsigned k = _indexes[i].first ; k < _indexes[i].second ; ++k)
120 if (haveOutgroup ==
false ||
121 (haveOutgroup ==
true && k != outgroup))
122 _Counts[i][j]((*data)[k][j]);
127 if (haveOutgroup ==
true)
130 for(
unsigned i=0;i<pv.size();++i)
132 pv[i].second.erase(outgroup,1);
138 FSTimpl::PopStateSets FSTimpl::getPopStateSets(
const unsigned &pop1,
139 const unsigned &pop2,
140 const unsigned & site)
142 typedef std::string::difference_type UTYPE;
143 UTYPE beg1 = std::accumulate(_config.begin(),
144 _config.begin()+pop1,
146 UTYPE size1 = *(_config.begin()+pop1);
147 UTYPE beg2 = std::accumulate(_config.begin(),
148 _config.begin()+pop2,
150 UTYPE size2 = *(_config.begin()+pop2);
153 vector<char> ch1(pv[site].second.begin()+beg1,
154 pv[site].second.begin()+beg1+size1),
155 ch2(pv[site].second.begin()+beg2,
156 pv[site].second.begin()+beg2+size2);
157 ch1.erase( std::remove(ch1.begin(),ch1.end(),
'N'),ch1.end() );
158 ch2.erase( std::remove(ch2.begin(),ch2.end(),
'N'),ch2.end() );
159 std::set<char> states1(ch1.begin(),ch1.end()),
160 states2(ch2.begin(),ch2.end());
161 return std::make_pair(states1,states2);
164 FST::FST(
const PolyTable *data,
unsigned npop,
const unsigned *config,
165 const double *weights,
bool haveOutgroup,
unsigned outgroup)
187 impl = std::unique_ptr<FSTimpl>(
new FSTimpl(data,npop,config,weights,haveOutgroup,outgroup));
189 catch(std::runtime_error &e)
195 throw (std::runtime_error(
"Sequence::FST : unknown exception caught by constructor"));
204 void FSTimpl::doCalcs(
void)
207 double w_ii_sq = 0., weighted_Pi_ii = 0.;
209 for (
unsigned i = 0 ; i < _npop ; ++i)
212 w_ii_sq += _weights[i]*_weights[i];
213 for(
unsigned j = 0 ; j < _nsites ; ++j)
215 unsigned n = _config[i];
217 n -= _Counts[i][j].n;
218 double denom = (double(n)* (double(n) - 1.0));
219 SSH += (_Counts[i][j].a > 0) ?
double(_Counts[i][j].a) *
220 double (_Counts[i][j].a-1) /denom : 0. ;
221 SSH += (_Counts[i][j].g > 0) ?
double(_Counts[i][j].g) *
222 double (_Counts[i][j].g-1) /denom : 0. ;
223 SSH += (_Counts[i][j].c > 0) ?
double(_Counts[i][j].c) *
224 double (_Counts[i][j].c-1) /denom : 0. ;
225 SSH += (_Counts[i][j].t > 0) ?
double(_Counts[i][j].t) *
226 double (_Counts[i][j].t-1) /denom : 0. ;
227 SSH += (_Counts[i][j].zero > 0) ?
double(_Counts[i][j].zero) *
228 double (_Counts[i][j].zero-1) /denom : 0. ;
229 SSH += (_Counts[i][j].one > 0) ?
double(_Counts[i][j].one) *
230 double (_Counts[i][j].one-1) /denom : 0. ;
233 weighted_Pi_ii += _weights[i]*_weights[i]*Pi;
238 double sum_wi_wj = 0., weighted_Pi_ij = 0.;
239 for(
unsigned i = 0 ; i < _npop - 1 ; ++i)
241 for(
unsigned j = i+1 ; j < _npop ; ++j)
244 sum_wi_wj += _weights[i]*_weights[j];
245 for(
unsigned k = 0 ; k < _nsites ; ++k)
247 unsigned ni = _config[i],nj = _config[j];
248 ni -= _Counts[i][k].n;
249 nj -= _Counts[j][k].n;
250 Pi_i_j += (_Counts[i][k].a > 0) ?
251 (
double(_Counts[i][k].a)/double(ni))*
252 (
double(nj-_Counts[j][k].a)/double(nj)):0.;
253 Pi_i_j += (_Counts[i][k].g > 0) ?
254 (
double(_Counts[i][k].g)/double(ni))*
255 (
double(nj-_Counts[j][k].g)/double(nj)):0.;
256 Pi_i_j += (_Counts[i][k].c > 0) ?
257 (
double(_Counts[i][k].c)/double(ni))*
258 (
double(nj-_Counts[j][k].c)/double(nj)):0.;
259 Pi_i_j += (_Counts[i][k].t > 0) ?
260 (
double(_Counts[i][k].t)/double(ni))*
261 (
double(nj-_Counts[j][k].t)/double(nj)):0.;
262 Pi_i_j += (_Counts[i][k].zero > 0) ?
263 (
double(_Counts[i][k].zero)/double(ni))*
264 (
double(nj-_Counts[j][k].zero)/double(nj)):0.;
265 Pi_i_j += (_Counts[i][k].one > 0) ?
266 (
double(_Counts[i][k].one)/double(ni))*
267 (
double(nj-_Counts[j][k].one)/double(nj)):0.;
269 weighted_Pi_ij += _weights[i]*_weights[j]*Pi_i_j;
274 _piT = weighted_Pi_ii + 2.*weighted_Pi_ij;
275 _piS = weighted_Pi_ii / w_ii_sq ;
276 _piB = weighted_Pi_ij / sum_wi_wj;
277 _piD = (_piT - _piS)/(2. * sum_wi_wj);
282 std::set<double> FST::shared(
unsigned pop1,
unsigned pop2)
const 290 if (pop1 > impl->_npop-1 || pop2 > impl->_npop-1)
291 throw std::out_of_range(
"Seqence::FST -- indexes out of range");
293 std::set<double> sharedList;
295 for(
unsigned site = 0 ; site < impl->_nsites ; ++site)
297 if (impl->_Counts[pop1][site].gap == 0 &&
298 impl->_Counts[pop2][site].gap == 0)
300 FSTimpl::PopStateSets states = impl->getPopStateSets(pop1,pop2,site);
301 if (states.first.size() > 1 && states.second.size() > 1)
303 vector<char> overlap(states.first.size()+states.second.size());
304 vector<char>::iterator itr = std::set_intersection(states.first.begin(),
306 states.second.begin(),
309 if ( itr - overlap.begin() > 0)
311 sharedList.insert(impl->pv[site].first);
319 std::set<double> FST::fixed(
unsigned pop1,
unsigned pop2)
const 327 if (pop1 > impl->_npop-1 || pop2 > impl->_npop-1)
328 throw std::out_of_range(
"Seqence::FST -- indexes out of range");
329 std::set<double> fixedList;
330 for(
unsigned site = 0 ; site < impl->_nsites ; ++site)
332 if (impl->_Counts[pop1][site].gap == 0 &&
333 impl->_Counts[pop2][site].gap == 0)
335 FSTimpl::PopStateSets states = impl->getPopStateSets(pop1,pop2,site);
336 vector<char> overlap(states.first.size()+states.second.size());
337 vector<char>::iterator itr = std::set_intersection(states.first.begin(),
339 states.second.begin(),
342 if ( itr - overlap.begin() == 0)
344 fixedList.insert(impl->pv[site].first);
351 std::pair< std::set<double>,std::set<double> >
352 FST::Private(
unsigned pop1,
unsigned pop2)
const 363 if (pop1 > impl->_npop-1 || pop2 > impl->_npop-1)
364 throw std::out_of_range(
"Seqence::FST -- indexes out of range");
366 std::set<double> p1,p2;
367 for(
unsigned site = 0 ; site < impl->_nsites ; ++site)
369 if (impl->_Counts[pop1][site].gap == 0 &&
370 impl->_Counts[pop2][site].gap == 0)
372 FSTimpl::PopStateSets states = impl->getPopStateSets(pop1,pop2,site);
375 vector<char> priv1(states.first.size()),priv2(states.second.size());
377 vector<char>::iterator itr1 = std::set_difference(states.first.begin(),states.first.end(),
378 states.second.begin(),states.second.end(),
381 vector<char>::iterator itr2 = std::set_difference(states.second.begin(),states.second.end(),
382 states.first.begin(),states.first.end(),
386 if ( (itr1-priv1.begin())>0 && states.first.size()>1)
388 p1.insert(impl->pv[site].first);
390 if ( (itr2-priv2.begin())>0 && states.second.size()>1)
392 p2.insert(impl->pv[site].first);
396 return std::make_pair(p1,p2);
399 double FST::HSM(
void)
const 406 return impl->_piD/(impl->_piS+impl->_piD);
409 double FST::Slatkin(
void)
const 416 return impl->_piD/(2.*impl->_piS + impl->_piD);
419 double FST::HBK(
void)
const 427 return 1.-(impl->_piS/impl->_piT);
430 double FST::piB(
void)
const 439 double FST::piT(
void)
const 448 double FST::piS(
void)
const 457 double FST::piD(
void)
const polySiteVector make_polySiteVector(const Sequence::PolyTable &data) __attribute__((deprecated))
delcaration of a class (Sequence::FST) to analyze population structure
Sequence::PolyTable, a virtual base class for polymorphism tables.
The namespace in which this library resides.
std::vector< polymorphicSite > polySiteVector
Site-major variation tables in ASCII format.