libsequence  1.9.5
CoalescentFragmentsRescaling.cc
1 /*
2 
3  Copyright (C) 2003-2009 Kevin Thornton, krthornt[]@[]uci.edu
4 
5  Remove the brackets to email me.
6 
7  This file is part of libsequence.
8 
9  libsequence is free software: you can redistribute it and/or modify
10  it under the terms of the GNU General Public License as published by
11  the Free Software Foundation, either version 3 of the License, or
12  (at your option) any later version.
13 
14  libsequence is distributed in the hope that it will be useful,
15  but WITHOUT ANY WARRANTY; without even the implied warranty of
16  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17  GNU General Public License for more details.
18 
19  You should have received a copy of the GNU General Public License
20  long with libsequence. If not, see <http://www.gnu.org/licenses/>.
21 
22 */
23 
25 #include <Sequence/SimData.hpp>
26 #include <cassert>
27 #include <numeric>
28 
29 namespace Sequence
30 {
31  namespace coalsim {
32  int sample_length( const std::vector< std::pair<int,int> > & fragments )
38  {
39  int sum = 0;
40  for( std::vector< std::pair<int,int> >::const_iterator i = fragments.begin() ;
41  i < fragments.end() ; ++i )
42  {
43  sum += i->second;
44  }
45  return sum;
46  }
47 
48  int total_length( const std::vector< std::pair<int,int> > & fragments )
54  {
55  int sum = 0;
56  for( std::vector< std::pair<int,int> >::const_iterator i = fragments.begin() ;
57  i < fragments.end() ; ++i )
58  {
59  sum += (i->first+i->second);
60  }
61  return sum;
62  }
63 
64  void calculate_scales(const std::vector< std::pair<int,int> > & fragments,
65  std::vector< std::pair<double,double> > * sample_scale,
66  std::vector< std::pair<double,double> > * mutation_scale )
82  {
83  sample_scale->resize(fragments.size());
84  mutation_scale->resize(fragments.size());
85  //1. need to get the total length of the region simulated
86  int ttl_len = total_length(fragments);
87  int ttl_sample_len = sample_length(fragments);
88 
89  //2. need to convert the positions in fragments into scaled units
90  //we'll store this as a pair of [beg,end) rather than distance, length
91  double dummy = 0.;
92  int ttl = 0;
93  std::vector< std::pair<double,double> >::iterator si = sample_scale->begin(),
94  mi = mutation_scale->begin();
95  for( std::vector<std::pair<int,int> >::const_iterator i = fragments.begin() ;
96  i < fragments.end() ; ++i,++si,++mi )
97  {
98  si->first = dummy;
99  si->second = double(i->second)/double(ttl_sample_len);
100  dummy += double(i->second)/double(ttl_sample_len);
101  mi->first = double(ttl)/double(ttl_len) + double(i->first)/double(ttl_len);
102  mi->second = double(i->second)/double(ttl_len);
103  ttl += i->first + i->second;
104  }
105  }
106 
108  const std::vector< std::pair<double,double> > & sample_scale,
109  const std::vector< std::pair<double,double> > & mutation_scale )
116  {
117  assert(mutation_scale.size()==sample_scale.size());
118  typedef std::vector<std::pair<double,double> >::const_iterator ci;
119  for(Sequence::SimData::pos_iterator p = d->pbegin() ; p < d->pend() ; ++p)
120  {
121  for( ci ss=sample_scale.begin(),ms=mutation_scale.begin() ;
122  ss < sample_scale.end() && ms < mutation_scale.end() ; ++ss,++ms )
123  {
124  if( *p >= ss->first &&
125  *p < (ss->first+ss->second) )
126  {
127  //is in fragment i
128  double delta = (*p-ss->first)/(ss->second);
129  *p = ms->first + delta*ms->second;
130  break;
131  }
132  }
133  }
134  }
135 
136  void rescale_arg( arg * sample_history,
137  const std::vector< std::pair<int,int> > & fragments )
143  {
144  for(arg::iterator ai = sample_history->begin() ; ai != sample_history->end() ; ++ai)
145  {
146  int ttl_len = 0;
147  int ttl_sample_len=0;
148  for( std::vector< std::pair<int,int> >::const_iterator f = fragments.begin() ;
149  f < fragments.end() ; ++f )
150  {
151  ttl_len += f->first;
152  if( ai->beg >= ttl_sample_len && ai->beg < (ttl_sample_len+f->second) )
153  //the i-th marginal is in fragment f
154  {
155  double delta = double(ai->beg-ttl_sample_len)/double(f->second);
156  ai->beg = ttl_len + int(delta*double(f->second));
157  break;
158  }
159  else
160  {
161  ttl_sample_len += f->second;
162  ttl_len+=f->second;
163  }
164  }
165  }
166  }
167 
168  double integrate_genetic_map( const std::vector<chromosome> & sample,
169  const int & current_nsam,
170  const std::vector<double> & genetic_map,
171  std::vector<double> * reclens)
189  {
190  assert(current_nsam > 0);
191  assert(!genetic_map.empty());
192  reclens->resize(std::vector<double>::size_type(current_nsam));
193  std::vector<double>::iterator ri = reclens->begin();
194  double rrec=0.;
195  for(std::vector<chromosome>::const_iterator chrom = sample.begin() ;
196  chrom < (sample.begin()+current_nsam) ; ++chrom,++ri)
197  {
198  int beg = chrom->first(), end = chrom->last();
199  assert(beg<=end);
200  double rho_chrom = std::accumulate(genetic_map.begin()+beg,genetic_map.begin()+end,0.);
201  *ri = rho_chrom;
202  rrec += rho_chrom;
203  }
204  return rrec;
205  }
206  }
207 } //ns Sequence
void calculate_scales(const std::vector< std::pair< int, int > > &fragments, std::vector< std::pair< double, double > > *sample_scale, std::vector< std::pair< double, double > > *mutation_scale)
This is a helper function that rescales physical distance in base pairs to continuous distance on the...
int total_length(const std::vector< std::pair< int, int > > &fragments)
When simulating partially linked regions, return the total length of the region.
std::list< marginal > arg
Ancestral Recombination Graph.
Definition: SimTypes.hpp:217
The namespace in which this library resides.
iterator begin()
Definition: Seq.cc:189
iterator end()
Definition: Seq.cc:198
Helper functions for simulating partially linked fragments One often wants to simulate partially link...
void rescale_mutation_positions(Sequence::SimData *d, const std::vector< std::pair< double, double > > &sample_scale, const std::vector< std::pair< double, double > > &mutation_scale) __attribute__((deprecated))
Rescales the positions of the mutations in d from the scale given in sample_scale to that given in mu...
double integrate_genetic_map(const std::vector< chromosome > &sample, const int &current_nsam, const std::vector< double > &genetic_map, std::vector< double > *reclens)
When simulating non-uniform recombination rates, the probability of recombination at each point in th...
Declaration of Sequence::SimData, a class representing polymorphism data from coalescent simulations ...
Data from coalescent simulations.
int sample_length(const std::vector< std::pair< int, int > > &fragments)
When simulating partially linked regions, return the total length of sample material that we are simu...
void rescale_arg(arg *sample_history, const std::vector< std::pair< int, int > > &fragments)
Rescales the beginnings of marginal trees in an ancestral recombination graph from a genetic scale to...