libsequence  1.9.5
PolyTableIterators.cc
/*
Example of how to use the iterators
of class Sequence::PolyTable. In
this example, Sequence::PolySites is used,
which is derived from Sequence::PolyTable
*/
#include <iostream>
#include <algorithm>
#include <iterator>
using namespace std;
int main(int argc, char **argv)
{
const char *infilename = argv[1];
vector<Sequence::Fasta> data;
Sequence::Alignment::GetData(data,infilename);
Sequence::Alignment::validForPolyAnalysis(data.begin(),data.end()) )
{
Sequence::PolySites SNPs(data);
//1. use PolyTable::pos_iterator to access site positions
//Print positions of segregating sites
cout << "Original position order:\n";
copy(SNPs.pbegin(),SNPs.pend(),
ostream_iterator<double>(cout," "));
cout << endl;
//2. PolyTable::pos_iterator can be accessed in const-
// and non-const contexts, allowing us to do things like
// permute site positions. Note that this only permutes
// the positions, not the states associated with them.
// This allows, amongst other things, to calculate the
// significance of linkage-disequilibrium measures by
// a permutation test.
random_shuffle(SNPs.pbegin(),SNPs.pend());
cout << "Permuted positions:\n";
copy(SNPs.pbegin(),SNPs.pend(),
ostream_iterator<double>(cout," "));
cout <<'\n'<< endl;
//3. Access the individuals using iterators.
//This iterator type is PolyTable::data_iterator,
//which can be accessed in both const- and non-const
//contexts.
cout << "Original data:\n";
copy(SNPs.begin(),SNPs.end(),ostream_iterator<string>(cout,"\n"));
cout <<'\n'<< endl;
//4. We can use PolyTable::data_iterator in
//non-const contexts to do things like permute
//the order of the haplotypes. One application
//of this would be to assess the significance of
//population structure statistics (Fst and the like)
//by permutation tests
random_shuffle(SNPs.begin(),SNPs.end());
cout << "Permuted data:\n";
copy(SNPs.begin(),SNPs.end(),ostream_iterator<string>(cout,"\n"));
cout << endl;
//5. There is a special iterator, PolyTable::const_site_iterator,
//which gives access to a single SNP in a const-context. So far,
//only const access is supported. The value type of this iterator
//is equivalent to std::pair<double,std::string>. We will use this
//iterator to calculate nucleotide diversity for the sample. The
//defnitiion of nucleotide diversity does not depend on either the order
//of the SNPs nor the arrangement of haplotypes, so we're able to do the
//calculation on the data that we've permuted.
//We handle missing data by adjusting the sample size for each site.
//This implementation is contrived, and we'd really use
//Sequence::stateCounter or Sequence::makeCountList to handle the
//counting of the nucleotides for us.
double pi = 0.;
Sequence::PolySites::const_site_iterator itr = SNPs.sbegin();
while( itr < SNPs.send() )
{
//count up numbers of A,G,C,T, and N using std::count
unsigned A = count(itr->second.begin(),itr->second.end(),'A');
unsigned G = count(itr->second.begin(),itr->second.end(),'G');
unsigned C = count(itr->second.begin(),itr->second.end(),'C');
unsigned T = count(itr->second.begin(),itr->second.end(),'T');
unsigned N = count(itr->second.begin(),itr->second.end(),'N');
double SH = 1.; //SH = site heterozygosity
//sample size at this site, w/o missing data
unsigned n = itr->second.length() - N;
//For each character state, the homozygosity is the probability
//of sampling the observed count for that state (k) out of n alleles times the
//probability of sampling it again out of n-1 alleles, given that you've sampled
//it once (i.e. (k-1)/(n-1).
//Pi is the sum of 1-homozygosity accross all segregating sites
SH -= (A>0) ? (double(A)/double(n))*(double(A-1)/double(n-1)) : 0.;
SH -= (G>0) ? (double(G)/double(n))*(double(G-1)/double(n-1)) : 0.;
SH -= (C>0) ? (double(C)/double(n))*(double(C-1)/double(n-1)) : 0.;
SH -= (T>0) ? (double(T)/double(n))*(double(T-1)/double(n-1)) : 0.;
pi += SH;
++itr;
}
cout << "Pi (for the entire region) = "<< pi << endl;
}
}