libsequence  1.9.5
PolyTableIterators.cc
1 
3 /*
4  Example of how to use the iterators
5  of class Sequence::PolyTable. In
6  this example, Sequence::PolySites is used,
7  which is derived from Sequence::PolyTable
8 */
9 #include <Sequence/Fasta.hpp>
10 #include <Sequence/Alignment.hpp>
11 #include <Sequence/PolySites.hpp>
12 #include <Sequence/PolySNP.hpp>
14 #include <iostream>
15 #include <algorithm>
16 #include <iterator>
17 
18 using namespace std;
19 
20 
21 int main(int argc, char **argv)
22 {
23  const char *infilename = argv[1];
24 
25  vector<Sequence::Fasta> data;
26 
27  Sequence::Alignment::GetData(data,infilename);
28 
29  if ( Sequence::Alignment::IsAlignment(data) &&
30  Sequence::Alignment::validForPolyAnalysis(data.begin(),data.end()) )
31  {
32  Sequence::PolySites SNPs(data);
33 
34  //1. use PolyTable::pos_iterator to access site positions
35 
36  //Print positions of segregating sites
37  cout << "Original position order:\n";
38  copy(SNPs.pbegin(),SNPs.pend(),
39  ostream_iterator<double>(cout," "));
40  cout << endl;
41 
42  //2. PolyTable::pos_iterator can be accessed in const-
43  // and non-const contexts, allowing us to do things like
44  // permute site positions. Note that this only permutes
45  // the positions, not the states associated with them.
46  // This allows, amongst other things, to calculate the
47  // significance of linkage-disequilibrium measures by
48  // a permutation test.
49  random_shuffle(SNPs.pbegin(),SNPs.pend());
50 
51  cout << "Permuted positions:\n";
52  copy(SNPs.pbegin(),SNPs.pend(),
53  ostream_iterator<double>(cout," "));
54  cout <<'\n'<< endl;
55 
56  //3. Access the individuals using iterators.
57  //This iterator type is PolyTable::data_iterator,
58  //which can be accessed in both const- and non-const
59  //contexts.
60  cout << "Original data:\n";
61  copy(SNPs.begin(),SNPs.end(),ostream_iterator<string>(cout,"\n"));
62  cout <<'\n'<< endl;
63 
64  //4. We can use PolyTable::data_iterator in
65  //non-const contexts to do things like permute
66  //the order of the haplotypes. One application
67  //of this would be to assess the significance of
68  //population structure statistics (Fst and the like)
69  //by permutation tests
70  random_shuffle(SNPs.begin(),SNPs.end());
71  cout << "Permuted data:\n";
72  copy(SNPs.begin(),SNPs.end(),ostream_iterator<string>(cout,"\n"));
73  cout << endl;
74 
75  //5. There is a special iterator, PolyTable::const_site_iterator,
76  //which gives access to a single SNP in a const-context. So far,
77  //only const access is supported. The value type of this iterator
78  //is equivalent to std::pair<double,std::string>. We will use this
79  //iterator to calculate nucleotide diversity for the sample. The
80  //defnitiion of nucleotide diversity does not depend on either the order
81  //of the SNPs nor the arrangement of haplotypes, so we're able to do the
82  //calculation on the data that we've permuted.
83  //We handle missing data by adjusting the sample size for each site.
84  //This implementation is contrived, and we'd really use
85  //Sequence::stateCounter or Sequence::makeCountList to handle the
86  //counting of the nucleotides for us.
87  double pi = 0.;
88  Sequence::PolySites::const_site_iterator itr = SNPs.sbegin();
89  while( itr < SNPs.send() )
90  {
91  //count up numbers of A,G,C,T, and N using std::count
92  unsigned A = count(itr->second.begin(),itr->second.end(),'A');
93  unsigned G = count(itr->second.begin(),itr->second.end(),'G');
94  unsigned C = count(itr->second.begin(),itr->second.end(),'C');
95  unsigned T = count(itr->second.begin(),itr->second.end(),'T');
96  unsigned N = count(itr->second.begin(),itr->second.end(),'N');
97  double SH = 1.; //SH = site heterozygosity
98 
99  //sample size at this site, w/o missing data
100  unsigned n = itr->second.length() - N;
101 
102  //For each character state, the homozygosity is the probability
103  //of sampling the observed count for that state (k) out of n alleles times the
104  //probability of sampling it again out of n-1 alleles, given that you've sampled
105  //it once (i.e. (k-1)/(n-1).
106  //Pi is the sum of 1-homozygosity accross all segregating sites
107  SH -= (A>0) ? (double(A)/double(n))*(double(A-1)/double(n-1)) : 0.;
108  SH -= (G>0) ? (double(G)/double(n))*(double(G-1)/double(n-1)) : 0.;
109  SH -= (C>0) ? (double(C)/double(n))*(double(C-1)/double(n-1)) : 0.;
110  SH -= (T>0) ? (double(T)/double(n))*(double(T-1)/double(n-1)) : 0.;
111  pi += SH;
112  ++itr;
113  }
114  cout << "Pi (for the entire region) = "<< pi << endl;
115  }
116 }
Polymorphism tables for sequence data.
Definition: PolySites.hpp:33
STL namespace.
declaration of Sequence::PolySNP, a class to analyze SNP data
Declaration of namespace Sequence::Alignment.
Sequence::PolySites, generates polymorphism tables from data.
Declaration of Sequence::Fasta streams.