libsequence  1.9.5
Mutation.tcc
1 // Code for the -*- C++ -*-
2 #ifndef __SEQUENCE_COALESCENT_BITS_MUTATION_TCC__
3 #define __SEQUENCE_COALESCENT_BITS_MUTATION_TCC__
4 
5 #include <Sequence/Coalescent/TreeOperations.hpp>
6 #include <algorithm>
7 #include <numeric>
8 #include <functional>
9 #include <cassert>
10 #include <string>
11 
12 namespace Sequence
13 {
14  namespace coalsim {
15 #ifndef DOXYGEN_SKIP
16  template<typename uniform_generator>
17  void add_S_inf_sites_details( uniform_generator & uni ,
18  marginal::const_iterator history,
19  const double & tt,
20  const int & beg, const int & end,
21  const int & nsam, const int & nsites,
22  const int & S ,
23  const int & first_snp_index,
24  gamete_storage_type * gametes )
25  {
26  assert(S>=0);
27  assert(tt >= 0.);
28  assert(nsam > 0);
29  assert(nsites > 0);
30  for(std::vector<std::string>::size_type snp = std::vector<std::string>::size_type(first_snp_index) ;
31  snp < std::vector<std::string>::size_type(first_snp_index+S) ; ++snp)
32  {
33  double pos = uni(beg,end)/double(nsites);
34  gametes->first[snp]=pos;
35  int branch = pick_branch(history,nsam,uni(0.,tt));
36  for(std::vector<std::string>::size_type ind=0;
37  ind<std::vector<std::string>::size_type(nsam);
38  ++ind)
39  {
40  if( is_descendant(history,int(ind),branch) )
41  {
42  gametes->second[ind][snp] = '1';
43  }
44  else
45  {
46  gametes->second[ind][snp] = '0';
47  }
48  }
49  }
50  }
51 
52  template<typename poisson_generator,
53  typename uniform_generator>
54  std::vector<std::string>::size_type infinite_sites_details( poisson_generator & poiss,
55  uniform_generator & uni,
56  gamete_storage_type * gametes,
57  const int & nsites,
58  const arg & history,
59  const double & theta)
60  {
61  typedef std::vector<std::string>::size_type RTYPE;
62  assert(theta >= 0.);
63  int snp_index_i=0;
64  RTYPE ttlS=0;
65  arg::size_type seg=0,nsegs=history.size();
66  arg::const_iterator i = history.begin(),j=i;
67  ++j;
68  for( ; seg < nsegs ; ++i,++j,++seg)
69  {
70  double tt = total_time(i->begin(),i->nsam);
71  int end = (seg<nsegs-1) ? j->beg : nsites;
72  int beg = i->beg;
73  double mean = double(end-beg)*theta*tt/double(nsites);
74  int S = poiss(mean);
75  if(S>0)
76  {
77  ttlS+=RTYPE(S);
78  bool add=false;
79  while(ttlS >= MAX_SEGSITES)
80  {
81  MAX_SEGSITES += MAX_SEGS_INC;
82  add = true;
83  }
84  if(add)
85  {
86  gametes->first.resize(MAX_SEGSITES);
87  for(typename std::vector<std::string>::iterator itr =
88  gametes->second.begin() ;
89  itr < gametes->second.end() ;
90  ++itr)
91  {
92  itr->resize(MAX_SEGSITES);
93  }
94  }
95  add_S_inf_sites(uni,i->begin(),tt,beg,end,i->nsam,nsites,
96  S,snp_index_i,gametes);
97  snp_index_i+=S;
98  }
99  }
100  std::sort(gametes->first.begin(),
101  gametes->first.begin()+std::vector<std::string>::difference_type(ttlS));
102  return ttlS;
103  }
104 
105  template<typename uniform_generator>
106  std::vector<std::string>::size_type infinite_sites_details( uniform_generator & uni,
107  gamete_storage_type * gametes,
108  const int & nsites,
109  const arg & history,
110  const double * total_times,
111  const unsigned * segsites )
112  {
113  typedef std::vector<std::string>::size_type RTYPE;
114  arg::size_type seg=0,nsegs = history.size();
115  arg::const_iterator i=history.begin(),
116  j = history.begin();
117  ++j;
118  RTYPE ttlS = 0;
119  for( ; seg < nsegs ; ++seg,++i,++j )
120  {
121  if( *(segsites+seg) > 0 )
122  {
123  bool add = false;
124  while(ttlS + RTYPE(*(segsites+seg)) > MAX_SEGSITES)
125  {
126  MAX_SEGSITES += MAX_SEGS_INC;
127  add=true;
128  }
129  if(add)
130  {
131  gametes->first.resize(MAX_SEGSITES);
132  for(typename std::vector<std::string>::iterator itr =
133  gametes->second.begin() ;
134  itr < gametes->second.end() ;
135  ++itr)
136  {
137  itr->resize(MAX_SEGSITES);
138  }
139  }
140  int end = (seg<nsegs-1) ? j->beg : nsites;
141  int beg = i->beg;
142  add_S_inf_sites(uni,i->begin(),*(total_times+seg),
143  beg,end,i->nsam,nsites,
144  *(segsites+seg),ttlS,gametes);
145  }
146  ttlS += *(segsites+seg);
147  }
148  std::sort(gametes->first.begin(),
149  gametes->first.begin()+std::vector<std::string>::difference_type(ttlS));
150  return ttlS;
151  }
152 
153 
154 
155  template<typename poisson_generator,
156  typename uniform_generator>
157  SimData infinite_sites_sim_data_details( poisson_generator & poiss,
158  uniform_generator & uni,
159  const int & nsites,
160  const arg & history,
161  const double & theta)
162  {
163  std::vector<unsigned> segsites(history.size());
164  std::vector<double> total_times(history.size());
165  arg::const_iterator i = history.begin(),j=i;
166  ++j;
167  size_t nsegs = history.size();
168  for(size_t seg = 0 ; seg < nsegs ; ++seg,++i,++j)
169  {
170  int end = (seg<nsegs-1) ? j->beg : nsites;
171  int beg = i->beg;
172  total_times[seg] = total_time(i->begin(),i->nsam);
173  //segsites[seg] = unsigned(poiss(std::cref(total_times[seg]*theta*double(end-beg)/double(nsites))));
174  segsites[seg] = unsigned(poiss(total_times[seg]*theta*double(end-beg)/double(nsites)));
175  }
176  return infinite_sites_sim_data(uni,nsites,history,&total_times[0],&segsites[0]);
177  }
178 
179  template<typename uniform_generator>
180  SimData infinite_sites_sim_data_details( uniform_generator & uni,
181  const int & nsites,
182  const arg & history,
183  const double * total_times,
184  const unsigned * segsites )
185  {
186  assert(total_times != NULL);
187  assert(segsites != NULL);
188  const unsigned S = std::accumulate(segsites,segsites+history.size(),0u);
189  if(S==0)
190  return SimData();
191  unsigned nsam = unsigned(history.begin()->nsam);
192  std::vector<std::string> d(nsam,std::string(S,' '));
193  std::vector<double> pos(S);
194  auto pos_i = pos.begin();
195  unsigned seg=0,nsegs = unsigned(history.size());
196  arg::const_iterator i=history.begin(),
197  j = history.begin();
198  ++j;
199  int ttlS = 0;
200  //const double zero=0.;
201  for( ; seg < nsegs ; ++seg,++i,++j )
202  {
203  if( *(segsites+seg) > 0 )
204  {
205  int end = (seg<nsegs-1) ? j->beg : nsites;
206  int beg = i->beg;
207  const double tt = *(total_times+seg);
208  for(unsigned snp = unsigned(ttlS) ; snp < unsigned(ttlS)+*(segsites+seg) ; ++snp)
209  {
210  double pos = uni(beg,end)/double(nsites);
211  *(pos_i+snp)=pos;
212  int branch = pick_branch(i->begin(),nsam,uni(0.,tt));
213  for(unsigned ind=0;ind<nsam;++ind)
214  {
215  if( is_descendant(i->begin(),int(ind),branch) )
216  {
217  d[ind][snp] = '1';
218  }
219  else
220  {
221  d[ind][snp] = '0';
222  }
223  }
224  }
225  ttlS += *(segsites+seg);
226  }
227  }
228  std::sort(pos.begin(),pos.end());
229  return SimData(std::move(pos),std::move(d));
230  }
231 #endif
232 
233  template<typename uniform_generator>
234  void add_S_inf_sites ( uniform_generator & uni ,
235  marginal::const_iterator history,
236  const double & tt,
237  const int & beg, const int & end,
238  const int & nsam,
239  const int & nsites,
240  const int & S ,
241  const int & first_snp_index,
242  gamete_storage_type * gametes )
243  /*!
244  @brief Add S segregating sites to sample with a particular marginal history, according
245  to the infinitely-many sites model.
246 
247  This routine places a fixed number of segregating sites on a marginal history that
248  begins at position \a beg and ends at position \a end-1.
249  Mutations are assigned positions randomly on the continuous, half-open interval
250  [ beg/L,end/L ), where L = nsites-1.
251 
252  \param uni a uniform random number generator that takes two doubles as arguments
253  \param history the history onto which mutations will be placed
254  \param tt the total time on \a history
255  \param beg the first site in the history (0 <= beg < nsites)
256  \param end the last site in the history ( beg < end < nsites )
257  \param nsam the total sample size being simulated
258  \param nsites the number of mutational sites simulated
259  \param S the number of mutations to drop on \a history
260  \param first_snp_index the index at which mutations will begin to be added into \a gametes
261  \param gametes a container in which you are storing the mutations.
262  Must be allocated in the calling environment.
263  \ingroup coalescent
264  */
265  {
266  return add_S_inf_sites_details(uni,history,tt,beg,end,nsam,nsites,S,first_snp_index,gametes);
267  }
268 
269  template<typename uniform_generator>
270  void add_S_inf_sites ( const uniform_generator & uni ,
271  marginal::const_iterator history,
272  const double & tt,
273  const int & beg, const int & end,
274  const int & nsam,
275  const int & nsites,
276  const int & S ,
277  const int & first_snp_index,
278  gamete_storage_type * gametes )
279  /*!
280  @brief Add S segregating sites to sample with a particular marginal history, according
281  to the infinitely-many sites model.
282 
283  This routine places a fixed number of segregating sites on a marginal history that
284  begins at position \a beg and ends at position \a end-1.
285  Mutations are assigned positions randomly on the continuous, half-open interval
286  [ beg/L,end/L ), where L = nsites-1.
287 
288  \param uni a uniform random number generator that takes two doubles as arguments
289  \param history the history onto which mutations will be placed
290  \param tt the total time on \a history
291  \param beg the first site in the history (0 <= beg < nsites)
292  \param end the last site in the history ( beg < end < nsites )
293  \param nsam the total sample size being simulated
294  \param nsites the number of mutational sites simulated
295  \param S the number of mutations to drop on \a history
296  \param first_snp_index the index at which mutations will begin to be added into \a gametes
297  \param gametes a container in which you are storing the mutations.
298  Must be allocated in the calling environment.
299  \ingroup coalescent
300  */
301  {
302  return add_S_inf_sites_details(uni,history,tt,beg,end,nsam,nsites,S,first_snp_index,gametes);
303  }
304 
305  template<typename poisson_generator,
306  typename uniform_generator>
307  int infinite_sites( poisson_generator & poiss,
308  uniform_generator & uni,
309  gamete_storage_type * gametes,
310  const int & nsites,
311  const arg & history,
312  const double & theta )
313  /*!
314  @brief Apply the infinitely-many sites mutation model to an ancetral recombination graph
315  \param poiss a Poisson random number generator which takes the mean of the poisson as an argument
316  \param uni a uniform random number generator that takes two doubles as an argument
317  \param gametes object in which to store the simulated gametes
318  \param nsites the length of the region begin simulated
319  \param history the list of marginal histories for the sample
320  \param theta the coalescent-scaled mutation rate
321  \return the number of mutations placed on the tree
322  \ingroup coalescent
323  */
324  {
325  return infinite_sites_details(poiss,uni,gametes,nsites,history,theta);
326  }
327 
328  template<typename poisson_generator,
329  typename uniform_generator>
330  int infinite_sites( const poisson_generator & poiss,
331  const uniform_generator & uni,
332  gamete_storage_type * gametes,
333  const int & nsites,
334  const arg & history,
335  const double & theta )
336  /*!
337  @brief Apply the infinitely-many sites mutation model to an ancetral recombination graph
338  \param poiss a Poisson random number generator which takes the mean of the poisson as an argument
339  \param uni a uniform random number generator that takes two doubles as an argument
340  \param gametes object in which to store the simulated gametes
341  \param nsites the length of the region begin simulated
342  \param history the list of marginal histories for the sample
343  \param theta the coalescent-scaled mutation rate
344  \return the number of mutations placed on the tree
345  \ingroup coalescent
346  */
347  {
348  return infinite_sites_details(poiss,uni,gametes,nsites,history,theta);
349  }
350 
351  template<typename uniform_generator>
352  int infinite_sites( uniform_generator & uni,
353  gamete_storage_type * gametes,
354  const int & nsites,
355  const arg & history,
356  const double * total_times,
357  const unsigned * segsites )
358  /*!
359  @brief Apply the infinitely-many sites mutation model to an ancetral recombination graph with
360  a fixed number of segregating sites
361  \param uni a uniform random number generator that takes two doubles as an argument
362  \param gametes object in which to store the simulated gametes
363  \param nsites the length of the region begin simulated
364  \param history the list of marginal histories for the sample
365  \param total_times the total times on each marginal tree in \a history
366  \param segsites the number of segregating sites to place on each tree
367  \return the number of mutations placed on the tree
368  \note \a total_times and \a segsites must contain a number of elements equal to history.size()
369  \ingroup coalescent
370  */
371  {
372  return infinite_sites_details(uni,gametes,nsites,history,total_times,segsites);
373  }
374 
375  template<typename uniform_generator>
376  int infinite_sites( const uniform_generator & uni,
377  gamete_storage_type * gametes,
378  const int & nsites,
379  const arg & history,
380  const double * total_times,
381  const unsigned * segsites )
382  /*!
383  @brief Apply the infinitely-many sites mutation model to an ancetral recombination graph with
384  a fixed number of segregating sites
385  \param uni a uniform random number generator that takes two doubles as an argument
386  \param gametes object in which to store the simulated gametes
387  \param nsites the length of the region begin simulated
388  \param history the list of marginal histories for the sample
389  \param total_times the total times on each marginal tree in \a history
390  \param segsites the number of segregating sites to place on each tree
391  \return the number of mutations placed on the tree
392  \note \a total_times and \a segsites must contain a number of elements equal to history.size()
393  \ingroup coalescent
394  */
395 
396  { return infinite_sites_details(uni,gametes,nsites,history,total_times,segsites);
397  }
398 
399  template<typename poisson_generator,
400  typename uniform_generator>
401  SimData infinite_sites_sim_data( poisson_generator & poiss,
402  uniform_generator & uni,
403  const int & nsites,
404  const arg & history,
405  const double & theta)
406  /*!
407  @brief Apply the infinitely-many sites mutation model to an ancetral recombination graph
408  \param poiss a Poisson random number generator which takes the mean of the poisson as an argument
409  \param uni a uniform random number generator that takes two doubles as an argument
410  \param nsites the length of the region begin simulated
411  \param history the list of marginal histories for the sample
412  \param theta the coalescent-scaled mutation rate
413  \return an object of type SimData that represent the sample. (the gametes are also
414  stored in \a gametes). The SimData object can be passed directly into class PolySIM
415  for analysis
416  \ingroup coalescent
417  */
418  {
419  return infinite_sites_sim_data_details(poiss,uni,nsites,history,theta);
420  }
421 
422  template<typename poisson_generator,
423  typename uniform_generator>
424  SimData infinite_sites_sim_data( const poisson_generator & poiss,
425  const uniform_generator & uni,
426  const int & nsites,
427  const arg & history,
428  const double & theta)
429  /*!
430  @brief Apply the infinitely-many sites mutation model to an ancetral recombination graph
431  \param poiss a Poisson random number generator which takes the mean of the poisson as an argument
432  \param uni a uniform random number generator that takes two doubles as an argument
433  \param nsites the length of the region begin simulated
434  \param history the list of marginal histories for the sample
435  \param theta the coalescent-scaled mutation rate
436  \return an object of type SimData that represent the sample. (the gametes are also
437  stored in \a gametes). The SimData object can be passed directly into class PolySIM
438  for analysis
439  \ingroup coalescent
440  */
441  {
442  return infinite_sites_sim_data_details(poiss,uni,nsites,history,theta);
443  }
444 
445  template<typename uniform_generator>
446  SimData infinite_sites_sim_data( uniform_generator & uni,
447  const int & nsites,
448  const arg & history,
449  const double * total_times,
450  const unsigned * segsites)
451  /*!
452  @brief Apply the infinitely-many sites mutation model to an ancetral recombination graph with
453  a fixed number of segregating sites
454  \param uni a uniform random number generator that takes two doubles as an argument
455  \param nsites the length of the region begin simulated
456  \param history the list of marginal histories for the sample
457  \param total_times the total times on each marginal tree in \a history
458  \param segsites the number of segregating sites to place on each tree
459  \return an object of type SimData that represent the sample.
460  for analysis
461  \note \a total_times and \a segsites must contain a number of elements equal to history.size()
462  \ingroup coalescent
463  */
464  {
465  return infinite_sites_sim_data_details(uni,nsites,history,total_times,segsites);
466  }
467 
468  template<typename uniform_generator>
469  SimData infinite_sites_sim_data( const uniform_generator & uni,
470  const int & nsites,
471  const arg & history,
472  const double * total_times,
473  const unsigned * segsites)
474  /*!
475  @brief Apply the infinitely-many sites mutation model to an ancetral recombination graph with
476  a fixed number of segregating sites
477  \param uni a uniform random number generator that takes two doubles as an argument
478  \param nsites the length of the region begin simulated
479  \param history the list of marginal histories for the sample
480  \param total_times the total times on each marginal tree in \a history
481  \param segsites the number of segregating sites to place on each tree
482  \return an object of type SimData that represent the sample.
483  for analysis
484  \note \a total_times and \a segsites must contain a number of elements equal to history.size()
485  \ingroup coalescent
486  */
487  {
488  return infinite_sites_sim_data_details(uni,nsites,history,total_times,segsites);
489  }
490  }
491 } //ns sequence
492 #endif