libsequence  1.9.5
DemographicModels.tcc
1 // -*- C++ -*-
2 #ifndef __SEQUENCE_COALESCENT_BITS_DEMOGRAPHIC_MODELS_HPP__
3 #define __SEQUENCE_COALESCENT_BITS_DEMOGRAPHIC_MODELS_HPP__
4 
5 #include <Sequence/Coalescent/Coalesce.hpp>
6 #include <Sequence/Coalescent/Recombination.hpp>
7 #include <Sequence/SeqConstants.hpp>
8 #include <functional>
9 #include <limits>
10 #include <cmath>
11 #include <cassert>
12 
13 namespace Sequence
14 {
15  namespace coalsim {
16 #ifndef DOXYGEN_SKIP
17  template<typename uniform_generator,
18  typename uniform01_generator,
19  typename exponential_generator>
20  arg bottleneck_details( uniform_generator & uni,
21  uniform01_generator & uni01,
22  exponential_generator & expo,
23  const std::vector<chromosome> & initialized_sample,
24  const marginal & initialized_marginal,
25  const double & tr,
26  const double & d,
27  const double & f,
28  const double & rho,
29  const bool & exponential_recovery,
30  const double & recovered_size )
31  {
32  assert( d > 0. );
33  assert( tr > 0. );
34  assert( f > 0. );
35  assert( rho >= 0. );
36  assert( recovered_size > 0. );
37  assert( initialized_marginal.nsam == int(initialized_sample.size()) );
38 
39  arg sample_history(1,initialized_marginal);
40  std::vector<chromosome> sample(initialized_sample);
41  int nsam=sample.size(),NSAM = sample.size();
42  const int nsites = sample[0].last()+1;
43  const double littler = rho/double(nsites-1);
44  int nlinks = NSAM*(nsites-1);
45  double t = 0.;
46  const double G = (std::log(recovered_size)-std::log(f))/d;
47  double tcoal,rcoal,trec,rrec,tmin;
48  bool event;
49  std::pair<int,int> two;
50  while( NSAM > 1 )
51  {
52  rcoal = double(NSAM*(NSAM-1));
53  rrec = littler*(nlinks);
54  if ( exponential_recovery )
55  {
56  assert(rcoal>0.);
57  if(t<tr)
58  {
59  tcoal = expo(recovered_size/rcoal);
60  }
61  if(t >= tr && t < (tr+d))
62  {
63  //uni01() must return an rv ~ U[0,1), so we must use 1-rv for logs...
64  double temp = 1.-G*recovered_size*std::exp(-G*(t-tr))*std::log(1.-uni01())/rcoal;
65  assert(temp >= 0.);
66  tcoal = std::log(temp)/G;
67  assert(std::isfinite(tcoal));
68  }
69  else
70  {
71  tcoal = expo(1./rcoal);
72  }
73  }
74  else
75  {
76  assert(rcoal > 0.);
77  if(t<tr)
78  {
79  tcoal = expo(recovered_size/rcoal);
80  }
81  if(t >= tr && t < (tr+d))
82  {
83  tcoal = expo(f/rcoal);
84  }
85  else
86  {
87  tcoal = expo(1./rcoal);
88  }
89  }
90  trec = (rrec>0.) ? expo(1./rrec) : SEQMAXDOUBLE;
91  assert(std::isfinite(trec));
92  tmin = std::min(tcoal,trec);
93  event = true;
94  if( t < tr && (t+tmin) >= tr )
95  {
96  t = tr;
97  event=false;
98  }
99  else if ( t < (tr+d) && (t+tmin) >= (tr+d) )
100  {
101  t = tr+d;
102  event=false;
103  }
104  if(event)
105  {
106  if (tcoal < trec)
107  {
108  t += tmin;
109  two = pick2(uni,NSAM);
110  NSAM -= coalesce(t,nsam,NSAM,two.first,two.second,nsites,
111  &nlinks,&sample,&sample_history);
112  }
113  else
114  {
115  t += tmin;
116  two = pick_uniform_spot(uni01(),
117  nlinks,
118  sample.begin(),NSAM);
119  nlinks -= crossover(NSAM,two.first,two.second,
120  &sample,&sample_history);
121  NSAM++;
122  }
123  if(NSAM < int(sample.size())/5)
124  sample.erase(sample.begin()+NSAM+1,sample.end());
125  }
126  }
127  return sample_history;
128  }
129 
130  //works and tested against ms
131  template<typename uniform_generator,
132  typename uniform01_generator,
133  typename exponential_generator>
134  arg exponential_change_details( uniform_generator & uni,
135  uniform01_generator & uni01,
136  exponential_generator & expo,
137  const std::vector<chromosome> & initialized_sample,
138  const marginal & initialized_marginal,
139  const double & G,
140  const double & t_begin,
141  const double & t_end,
142  const double & rho,
143  const double & size_at_end)
144  {
145  assert( t_begin >= 0. );
146  assert( t_end >= 0. );
147  assert( t_end >= t_begin );
148  assert( rho >= 0. );
149  assert( initialized_marginal.nsam == int(initialized_sample.size()) );
150  arg sample_history(1,initialized_marginal);
151  std::vector<chromosome> sample(initialized_sample);
152  int nsam=sample.size(),NSAM = sample.size();
153  const int nsites = sample[0].last()+1;
154  const double littler = rho/double(nsites-1);
155  int nlinks = NSAM*(nsites-1);
156  double t = 0.,tcoal,trec,rrec,rcoal,tmin;
157  bool event;
158  std::pair<int,int> two;
159  double temp;
160  while( NSAM > 1 )
161  {
162  assert( std::isfinite(t) );
163  rcoal = double(NSAM*(NSAM-1));
164  rrec = littler*(nlinks);
165  trec = (rrec>0.) ? expo((1./rrec)) : SEQMAXDOUBLE;
166  assert(std::isfinite(trec));
167  if ( t < t_begin || G == 0.)
168  {
169  tcoal = expo((1./rcoal));
170  assert( std::isfinite(tcoal) );
171  }
172  else if ( t >= t_begin && t < t_end )
173  {
174  temp = 1.-G*std::exp(-G*(t-t_begin))*std::log(1.-uni01())/rcoal;
175  assert( temp >= 0. );
176  tcoal = std::log(temp)/G;
177  assert( std::isfinite(tcoal) );
178  }
179  else
180  {
181  tcoal = (size_at_end > 0.) ? expo((size_at_end/rcoal)):
182  expo((std::exp(-G*(t_end-t_begin))/rcoal));
183  assert( std::isfinite(tcoal) );
184  }
185  tmin = std::min(tcoal,trec);
186  assert( std::isfinite(tmin) );
187  event = true;
188  if( t < t_begin && (t+tmin) >= t_begin)
189  {
190  t=t_begin;
191  event=false;
192  }
193  else if ( t<t_end && (t+tmin) >= t_end )
194  {
195  t=t_end;
196  event=false;
197  }
198  if(event)
199  {
200  if (tcoal < trec)
201  {
202  t += tmin;
203  two = pick2(uni,NSAM);
204  NSAM -= coalesce(t,nsam,NSAM,two.first,two.second,nsites,
205  &nlinks,&sample,&sample_history);
206  }
207  else
208  {
209  t += tmin;
210  two = pick_uniform_spot(uni01(),
211  nlinks,
212  sample.begin(),NSAM);
213  nlinks -= crossover(NSAM,two.first,two.second,
214  &sample,&sample_history);
215  NSAM++;
216  }
217  }
218  if(NSAM < int(sample.size())/5)
219  sample.erase(sample.begin()+NSAM+1,sample.end());
220  }
221  return sample_history;
222  }
223 #endif
224 
225  template<typename uniform_generator,
226  typename uniform01_generator,
227  typename exponential_generator>
228  arg bottleneck( uniform_generator & uni,
229  uniform01_generator & uni01,
230  exponential_generator & expo,
231  const std::vector<chromosome> & initialized_sample,
232  const marginal & initialized_marginal,
233  const double & tr,
234  const double & d,
235  const double & f,
236  const double & rho,
237  const bool & exponential_recovery,
238  const double & recovered_size )
239  /*!
240  @brief Coalescent simulation of a population bottleneck
241  Simulate a single, bottlenecked, population according to the Wright-Fisher model without selection. The population can recover from the bottleneck either instantaneously ("stepwise bottleneck"), or according to an exponential growth model. For the case of a stepwise bottleneck, this function is equivalent to the following options in Dick Hudson's program "ms": -eN 0 recovered_size -eN tr f -eN (tr+d) 1. For the case where recovery from the bottleneck is by exponential growth, the equivalent "ms" options are: -eN 0 recovered_size -eG tr (log(recovered_size)-log(f))/d -eG (tr+d) 0 -eN (tr+d) 1.
242  \param uni A binary function object (or equivalent) that returns a random deviate between a and b such that a <= x < b. a and b are the arguments to operator() of \a uni
243  \param uni01 A function object (or equivalent) whose operator() takes no arguments and returns a random deviate 0 <= x < 1.
244  \param expo A unary function object whose operator() takes the mean of an exponential process as an argument and returns a deviate from an exponential distribution with that mean
245  \param initialized_sample An initialized vector of chromosomes for a single population. For example, this may be the return value of init_sample. This object is used to copy-construct a non-const sample for the simulation
246  \param initialized_marginal An initialized marginal tree of the appropriate sample size for the simulation. For example, the return value of init_marginal.
247  \param tr The time at which the population recovers from the bottleneck. In units of 4N0 generations, where N0 is the effective size before the bottleneck.
248  \param d The duration of the bottleneck, in units of 4N0 generations, where N0 is the effective size before the bottleneck.
249  \param f Bottleneck severity. Define Nb as the effective size during the bottleneck, and N0 the effective size prior to the bottleneck. f=Nb/N0.
250  \param rho The population recombination rate 4N0r. The number of "sites" simulated is not neccesary, as it can be obtained from initialized_sample[0].last()+1.
251  \param exponential_recovery If true, the population recovers from the bottleneck according to an exponential growth model. If false, a stepwise bottleneck is assumed.
252  \param recovered_size If 1, the population recovers to N0 at time \a tr. If 0.5, the population recovers to 1/2 the pre-bottleneck size, etc.
253  \return The ancestral recombination graph (arg) describing the sample history.
254  \pre d>0 and tr>0 and f>0 and rho>=0 and recovered_size>0 and initialized_marginal.nsam == initialized_sample.size()
255  \note Preconditions are checked by the assert macro, and are therefore disabled when compiling with -DNDEBUG. No checks or warnings are otherwised performed nor given.
256  \ingroup coalescent
257  */
258  {
259  return bottleneck_details(uni,uni01,expo,initialized_sample,initialized_marginal,tr,d,f,rho,exponential_recovery,recovered_size);
260  }
261 
262  template<typename uniform_generator,
263  typename uniform01_generator,
264  typename exponential_generator>
265  arg bottleneck( const uniform_generator & uni,
266  const uniform01_generator & uni01,
267  const exponential_generator & expo,
268  const std::vector<chromosome> & initialized_sample,
269  const marginal & initialized_marginal,
270  const double & tr,
271  const double & d,
272  const double & f,
273  const double & rho,
274  const bool & exponential_recovery,
275  const double & recovered_size)
276  /*!
277  @brief Coalescent simulation of a population bottleneck
278  Simulate a single, bottlenecked, population according to the Wright-Fisher model without selection. The population can recover from the bottleneck either instantaneously ("stepwise bottleneck"), or according to an exponential growth model. For the case of a stepwise bottleneck, this function is equivalent to the following options in Dick Hudson's program "ms": -eN 0 recovered_size -eN tr f -eN (tr+d) 1. For the case where recovery from the bottleneck is by exponential growth, the equivalent "ms" options are: -eN 0 recovered_size -eG tr (log(recovered_size)-log(f))/d -eG (tr+d) 0 -eN (tr+d) 1.
279  \param uni A binary function object (or equivalent) that returns a random deviate between a and b such that a <= x < b. a and b are the arguments to operator() of \a uni
280  \param uni01 A function object (or equivalent) whose operator() takes no arguments and returns a random deviate 0 <= x < 1.
281  \param expo A unary function object whose operator() takes the mean of an exponential process as an argument and returns a deviate from an exponential distribution with that mean
282  \param initialized_sample An initialized vector of chromosomes for a single population. For example, this may be the return value of init_sample. This object is used to copy-construct a non-const sample for the simulation
283  \param initialized_marginal An initialized marginal tree of the appropriate sample size for the simulation. For example, the return value of init_marginal.
284  \param tr The time at which the population recovers from the bottleneck. In units of 4N0 generations, where N0 is the effective size before the bottleneck.
285  \param d The duration of the bottleneck, in units of 4N0 generations, where N0 is the effective size before the bottleneck.
286  \param f Bottleneck severity. Define Nb as the effective size during the bottleneck, and N0 the effective size prior to the bottleneck. f=Nb/N0.
287  \param rho The population recombination rate 4N0r. The number of "sites" simulated is not neccesary, as it can be obtained from initialized_sample[0].last()+1.
288  \param exponential_recovery If true, the population recovers from the bottleneck according to an exponential growth model. If false, a stepwise bottleneck is assumed.
289  \param recovered_size If 1, the population recovers to N0 at time \a tr. If 0.5, the population recovers to 1/2 the pre-bottleneck size, etc.
290  \return The ancestral recombination graph (arg) describing the sample history.
291  \pre d>0 and tr>0 and f>0 and rho>=0 and recovered_size>0 and initialized_marginal.nsam == initialized_sample.size()
292  \note Preconditions are checked by the assert macro, and are therefore disabled when compiling with -DNDEBUG. No checks or warnings are otherwised performed nor given.
293  \ingroup coalescent
294  */
295  {
296  return bottleneck_details(uni,uni01,expo,initialized_sample,initialized_marginal,tr,d,f,rho,exponential_recovery,recovered_size);
297  }
298 
299  template<typename uniform_generator,
300  typename uniform01_generator,
301  typename exponential_generator>
302  arg exponential_change( uniform_generator & uni,
303  uniform01_generator & uni01,
304  exponential_generator & expo,
305  const std::vector<chromosome> & initialized_sample,
306  const marginal & initialized_marginal,
307  const double & G,
308  const double & t_begin,
309  const double & t_end,
310  const double & rho,
311  const double & size_at_end)
312  /*!
313  @brief Coalescent simulation of exponential change in population size
314  Simulate a single population whose size changes exponentially during some period of time. The relevant command line options for Hudson's program "ms" would be: -eG t_begin G -eG t_end 0. -eN t_end size_at_end.
315  \param uni A binary function object (or equivalent) that returns a random deviate between a and b such that a <= x < b. a and b are the arguments to operator() of \a uni
316  \param uni01 A function object (or equivalent) whose operator() takes no arguments and returns a random deviate 0 <= x < 1.
317  \param expo A unary function object whose operator() takes the mean of an exponential process as an argument and returns a deviate from an exponential distribution with that mean
318  \param initialized_sample An initialized vector of chromosomes for a single population. For example, this may be the return value of init_sample. This object is used to copy-construct a non-const sample for the simulation
319  \param initialized_marginal An initialized marginal tree of the appropriate sample size for the simulation. For example, the return value of init_marginal.
320  \param G The rate of exponential change in effective size. If G>0, the population grows exponentially (forwards in time). If G<0, it shrinks (again, forwards in time).
321  \param t_begin The time in the past (in units of 4Ne generations) at which population size change begins (i.e., ends, moving forward in time)
322  \param t_end The time in the past (in units of 4Ne generations) at which populations size change ends (begins forward in time)
323  \param rho The population recombination rate 4N0r. The number of "sites" simulated is not neccesary, as it can be obtained from initialized_sample[0].last()+1.
324  \param size_at_end At time \a t_end in the past, the population size is set to \a size_at_end. If \a size_at_end = 1, the population is set to the same size that is was at t=0 (i.e. the beginning of the simulation). If \a size_at_and < 0, the population size is not adjusted at \a t_end. In other words, it is left at whatever it grew or shrank to.
325  \return The ancestral recombination graph (arg) describing the sample history.
326  \pre t_begin>=0 and t_end>=0 and t_end>=t_begin and rho>=0 and initialized_marginal.nsam == initialized_sample.size()
327  \note Preconditions are checked by the assert macro, and are therefore disabled when compiling with -DNDEBUG. No checks or warnings are otherwised performed nor given.
328  \ingroup coalescent
329  */
330  {
331  return exponential_change_details(uni,uni01,expo,initialized_sample,initialized_marginal,
332  G,t_begin,t_end,rho,size_at_end);
333  }
334 
335  template<typename uniform_generator,
336  typename uniform01_generator,
337  typename exponential_generator>
338  arg exponential_change( const uniform_generator & uni,
339  const uniform01_generator & uni01,
340  const exponential_generator & expo,
341  const std::vector<chromosome> & initialized_sample,
342  const marginal & initialized_marginal,
343  const double & G,
344  const double & t_begin,
345  const double & t_end,
346  const double & rho,
347  const double & size_at_end)
348  /*!
349  @brief Coalescent simulation of exponential change in population size
350  Simulate a single population whose size changes exponentially during some period of time. The relevant command line options for Hudson's program "ms" would be: -eG t_begin -eG t_end 0. -eN t_end size_at_end.
351  \param uni A binary function object (or equivalent) that returns a random deviate between a and b such that a <= x < b. a and b are the arguments to operator() of \a uni
352  \param uni01 A function object (or equivalent) whose operator() takes no arguments and returns a random deviate 0 <= x < 1.
353  \param expo A unary function object whose operator() takes the mean of an exponential process as an argument and returns a deviate from an exponential distribution with that mean
354  \param initialized_sample An initialized vector of chromosomes for a single population. For example, this may be the return value of init_sample. This object is used to copy-construct a non-const sample for the simulation
355  \param initialized_marginal An initialized marginal tree of the appropriate sample size for the simulation. For example, the return value of init_marginal.
356  \param G The rate of exponential change in effective size. If G>0, the population grows exponentially (forwards in time). If G<0, it shrinks (again, forwards in time).
357  \param t_begin The time in the past (in units of 4Ne generations) at which population size change begins (i.e., ends, moving forward in time)
358  \param t_end The time in the past (in units of 4Ne generations) at which populations size change ends (begins forward in time)
359  \param rho The population recombination rate 4N0r. The number of "sites" simulated is not neccesary, as it can be obtained from initialized_sample[0].last()+1.
360  \param size_at_end At time \a t_end in the past, the population size is set to \a size_at_end. If \a size_at_end = 1, the population is set to the same size that is was at t=0 (i.e. the beginning of the simulation). If \a size_at_and < 0, the population size is not adjusted at \a t_end. In other words, it is left at whatever it grew or shrank to.
361  \return The ancestral recombination graph (arg) describing the sample history.
362  \pre t_begin>=0 and t_end>=0 and t_end>=t_begin and rho>=0 and initialized_marginal.nsam == initialized_sample.size()
363  \note Preconditions are checked by the assert macro, and are therefore disabled when compiling with -DNDEBUG. No checks or warnings are otherwised performed nor given.
364  \ingroup coalescent
365  */
366  {
367  return exponential_change_details(uni,uni01,expo,initialized_sample,initialized_marginal,
368  G,t_begin,t_end,rho,size_at_end);
369  }
370 
371  template<typename uniform_generator,
372  typename uniform01_generator,
373  typename exponential_generator>
374  arg snm( uniform_generator & uni,
375  uniform01_generator & uni01,
376  exponential_generator & expo,
377  const std::vector<chromosome> & initialized_sample,
378  const marginal & initialized_marginal,
379  const double & rho)
380  {
381  return exponential_change_details(uni,uni01,expo,initialized_sample,initialized_marginal,
382  0.,0.,0.,rho,1.);
383  }
384 
385  template<typename uniform_generator,
386  typename uniform01_generator,
387  typename exponential_generator>
388  arg snm( const uniform_generator & uni,
389  const uniform01_generator & uni01,
390  const exponential_generator & expo,
391  const std::vector<chromosome> & initialized_sample,
392  const marginal & initialized_marginal,
393  const double & rho)
394  {
395  return exponential_change_details(uni,uni01,expo,initialized_sample,initialized_marginal,
396  0.,0.,0.,rho,1.);
397  }
398  }
399 } //namespace Sequence
400 
401 #endif //include guard