libsequence  1.9.5
CoalescentTreeOperations.cc
1 /*
2 
3  Copyright (C) 2003-2009 Kevin Thornton, krthornt[]@[]uci.edu
4 
5  Remove the brackets to email me.
6 
7  This file is part of libsequence.
8 
9  libsequence is free software: you can redistribute it and/or modify
10  it under the terms of the GNU General Public License as published by
11  the Free Software Foundation, either version 3 of the License, or
12  (at your option) any later version.
13 
14  libsequence is distributed in the hope that it will be useful,
15  but WITHOUT ANY WARRANTY; without even the implied warranty of
16  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17  GNU General Public License for more details.
18 
19  You should have received a copy of the GNU General Public License
20  long with libsequence. If not, see <http://www.gnu.org/licenses/>.
21 
22 */
23 
25 #include <cassert>
26 #include <cmath>
27 #include <limits>
28 #include <stdexcept>
29 namespace Sequence
30 {
31  namespace coalsim {
32  double total_time( const marginal::const_iterator beg,
33  const int & nsam )
43  {
44  double t = (beg+2*nsam-2)->time;
45  int i = nsam;
46  while( i < 2*nsam-1 )
47  {
48  t += (beg+i)->time;
49  ++i;
50  }
51  return t;
52  }
53 
54  int pick_branch(marginal::const_iterator beg,
55  const int & nsam,
56  const double & rtime)
66  {
67  double time=0.;
68  int i=0;
69  //we iterate over all-but-the-last branch
70  //this is a guard in case rtime == ttime,
71  //which can cause numerical issues in
72  //the internal if statement
73  for(i=0;i<2*nsam-3;i++)
74  {
75  time += (beg + (beg+i)->abv)->time - (beg+i)->time;
76  if(time >= rtime) return i;
77  }
78  return i;
79  }
80 
81  std::vector<int> get_all_descendants (marginal::const_iterator beg,
82  const int & nsam,
83  const int & branch)
93  {
94  assert( branch <= 2*nsam-2 );
95  std::vector<int> desc;
96  int j;
97  for(int i=0;i<nsam;++i)
98  {
99  for(j=i;j<branch;j=(beg+j)->abv){};
100  if(j==branch) desc.push_back(i);
101  }
102  return desc;
103  }
104 
105  bool is_descendant( marginal::const_iterator beg,
106  const int & ind,
107  const int & branch )
120  {
121  int i=ind;
122  while(i<branch)
123  {
124  assert( (beg+i)->abv != -1 );
125  i = (beg+i)->abv;
126  }
127  return i==branch;
128  }
129 
130  double total_time_on_arg( const Sequence::coalsim::arg & sample_history,
131  const int & total_number_of_sites )
141  {
142 
143  size_t seg=0,nsegs=sample_history.size();
144  Sequence::coalsim::arg::const_iterator i = sample_history.begin(),j=i;
145  ++j;
146 
147  double tt = 0.;
148 
149  for( ; seg < nsegs ; ++seg,++i,++j )
150  {
151  //check that things are sane
152  assert( i->beg < total_number_of_sites );
153  if( i->beg > total_number_of_sites )
154  {
155  throw std::runtime_error("Sequence::coalsim::total_time_on_arg - beginning of marginal tree > number of sites simulated");
156  }
157  //get the beginning and end of the current marginal tree
158  int end = (seg<nsegs-1) ? j->beg : total_number_of_sites;
159  int beg = i->beg;
160 
161  //increment total time on tree as a weighted average
162  //of the times on all the marginals
163  tt += Sequence::coalsim::total_time(i->begin(),i->nsam) *
164  double(end-beg)/double(total_number_of_sites);
165  }
166  return tt;
167  }
168 
169  void minimize_arg( Sequence::coalsim::arg * sample_history )
176  {
177  arg::iterator i = sample_history->begin(),
178  j=i;
179  ++j;
180  assert(j->beg > i->beg);
181  size_t nsegs = sample_history->size();
182  for(size_t seg=0;seg<nsegs-1;++seg,++i,++j)
183  {
184  bool same_tree = true;
185  for( marginal::iterator mi = i->end()-1,
186  mj=j->end()-1 ;
187  same_tree == true && (mi >= i->begin() && mj >= j->begin()) ;
188  --mi,--mj )
189  {
190  if(std::fabs( mi->time - mj->time ) > std::numeric_limits<double>::epsilon() ||
191  mi->abv != mj->abv )
192  {
193  same_tree = false;
194  }
195  }
196  if(same_tree == true)
197  {
198  sample_history->erase(j);
199  nsegs--;
200  j=i;
201  --i;
202  seg--;
203  }
204  }
205  }
206 
207 #ifndef DOXYGEN_SKIP
208  class sfs_times_impl
209  {
210  public:
211  std::vector<double> times;
212  double tt;
213  size_t nbins;
214  sfs_times_impl() : times(std::vector<double>()),tt(0.),nbins(0){}
215  sfs_times_impl( const sfs_times_impl & s ) :
216  times(s.times),tt(s.tt),nbins(s.nbins) {}
217  sfs_times_impl(arg::const_iterator & sample_history_beg,
218  const arg::size_type & nsegs,
219  const int & total_nsites_simulated,
220  bool folded);
221  inline bool operator==(const sfs_times_impl & rhs) const
222  {
223  return (this->times==rhs.times &&
224  std::fabs(this->tt-rhs.tt)<=std::numeric_limits<double>::epsilon() &&
225  this->nbins == rhs.nbins);
226  }
227  };
228 
229  sfs_times_impl::sfs_times_impl(arg::const_iterator & sample_history_beg,
230  const arg::size_type & nsegs,
231  const int & total_nsites_simulated,
232  bool folded)
233  : times(std::vector<double>(std::vector<double>::size_type((folded==false) ? sample_history_beg->nsam-1 :
234  (sample_history_beg->nsam/2)),0.)),
235  tt(0.),
236  nbins(size_t((folded==false) ? sample_history_beg->nsam-1 :
237  (sample_history_beg->nsam/2)))
238  {
239  arg::const_iterator i = sample_history_beg,j=i;
240  ++j;
241  double t;
242  for(unsigned seg = 0; seg < nsegs ; ++seg,++i,++j)
243  {
244  int end = (seg<nsegs-1) ? j->beg : total_nsites_simulated;
245  int beg = i->beg;
246  const double scale = double(end-beg)/double(total_nsites_simulated);
247  //iterate over tips
248  const marginal::const_iterator treebeg = i->begin();
249  for(int tip = 0 ; tip < i->nsam ; ++tip)
250  {
251  //tips lead to singletons
252  t = ( (treebeg+((treebeg+tip )->abv))->time -
253  (treebeg+tip)->time)*scale;
254  tt += t;
255  times[0] += t;
256  }
257  //iterate over internal nodes;
258  for(int node = i->nsam ; node < 2*(i->nsam)-2 ; ++node)
259  {
260  std::vector<int> descendants = get_all_descendants(treebeg,i->nsam,node);
261  t = ( (treebeg+((treebeg+node )->abv))->time -
262  (treebeg+node)->time)*scale;
263  std::vector<int>::size_type index = (folded==false) ? descendants.size()-1 :
264  std::min(descendants.size(),std::vector<int>::size_type(i->nsam)-descendants.size())-1;
265  times[index] += t;
266  tt+=t;
267  }
268  }
269  }
270 #endif
271 
272  sfs_times::sfs_times() :
273  impl(std::unique_ptr<sfs_times_impl>(new sfs_times_impl()))
275  {
276  }
277 
278  sfs_times::sfs_times(const sfs_times & sfst) : impl(new sfs_times_impl(*(sfst.impl)))
282  {
283  }
284 
285  sfs_times::sfs_times(arg::const_iterator sample_history_beg,
286  const arg::size_type & nsegs,
287  const int & total_nsites_simulated,
288  bool folded) :
289  impl(new sfs_times_impl(sample_history_beg,nsegs,total_nsites_simulated,folded))
298  {
299  }
300 
301  sfs_times::~sfs_times()
302  {
303  }
304 
305  double sfs_times::operator[](std::vector<double>::size_type const & i) const
311  {
312  assert(i>0 && i<=impl->nbins);
313  return impl->times[i-1];
314  }
315 
316  sfs_times & sfs_times::operator=(const sfs_times & rhs)
320  {
321  if(*this==rhs) return *this;
322  this->impl->times = rhs.impl->times;
323  this->impl->tt = rhs.impl->tt;
324  this->impl->nbins = rhs.impl->nbins;
325  return *this;
326  }
327 
328  bool sfs_times::operator==(const sfs_times & rhs) const
332  {
333  return ( *(this->impl) == *(rhs.impl) );
334  }
335 
336  double sfs_times::ttime() const
340  {
341  return impl->tt;
342  }
343 
344  size_t sfs_times::size() const
349  {
350  return impl->nbins;
351  }
352 
353  sfs_times::const_iterator sfs_times::begin() const
357  {
358  return impl->times.begin();
359  }
360 
361  sfs_times::const_iterator sfs_times::end() const
365  {
366  return impl->times.end();
367  }
368  }
369 }
std::list< marginal > arg
Ancestral Recombination Graph.
Definition: SimTypes.hpp:217
A point on a marginal tree at which a coalescent event occurs.
Definition: SimTypes.hpp:151
int pick_branch(marginal::const_iterator beg, const int &nsam, const double &rtime)
pick a random branch of a marginal tree
void minimize_arg(arg *sample_history)
The namespace in which this library resides.
double total_time(const marginal::const_iterator beg, const int &nsam)
Calculate total time on a marginal tree.
double total_time_on_arg(const Sequence::coalsim::arg &sample_history, const int &total_number_of_sites)
Returns the total time on an ancestral recombination graph.
std::vector< int > get_all_descendants(marginal::const_iterator beg, const int &nsam, const int &branch)
Find all the descendants of a branch on a marginal tree.
bool is_descendant(marginal::const_iterator beg, const int &ind, const int &branch)
Ask if a tip of a tree is a descendant of a particular branch.