doc/html/PolyTableIterators_8cc_source.html

 /*
   Example of how to use the iterators
   of class Sequence::PolyTable. In
   this example, Sequence::PolySites is used,
   which is derived from Sequence::PolyTable
 */
 #include <Sequence/Fasta.hpp>
 #include <Sequence/Alignment.hpp>
 #include <Sequence/PolySites.hpp>
 #include <Sequence/PolySNP.hpp>
 #include <Sequence/PolyTableSlice.hpp>
 #include <iostream>
 #include <algorithm>
 #include <iterator>

 using namespace std;


 int main(int argc, char **argv)
 {
   const char *infilename = argv[1];

   vector<Sequence::Fasta> data;

   Sequence::Alignment::GetData(data,infilename);

   if ( Sequence::Alignment::IsAlignment(data) &&
        Sequence::Alignment::validForPolyAnalysis(data.begin(),data.end()) )
     {
       Sequence::PolySites SNPs(data);

       //1. use PolyTable::pos_iterator to access site positions

       //Print positions of segregating sites
       cout << "Original position order:\n";
       copy(SNPs.pbegin(),SNPs.pend(),
            ostream_iterator<double>(cout," "));
       cout << endl;

       //2. PolyTable::pos_iterator can be accessed in const-
       // and non-const contexts, allowing us to do things like
       // permute site positions.  Note that this only permutes
       // the positions, not the states associated with them.
       // This allows, amongst other things, to calculate the
       // significance of linkage-disequilibrium measures by
       // a permutation test.
       random_shuffle(SNPs.pbegin(),SNPs.pend());

       cout << "Permuted positions:\n";
       copy(SNPs.pbegin(),SNPs.pend(),
            ostream_iterator<double>(cout," "));
       cout <<'\n'<< endl;

       //3. Access the individuals using iterators.
       //This iterator type is PolyTable::data_iterator,
       //which can be accessed in both const- and non-const
       //contexts.
       cout << "Original data:\n";
       copy(SNPs.begin(),SNPs.end(),ostream_iterator<string>(cout,"\n"));
       cout <<'\n'<< endl;

       //4. We can use PolyTable::data_iterator in
       //non-const contexts to do things like permute
       //the order of the haplotypes.  One application
       //of this would be to assess the significance of
       //population structure statistics (Fst and the like)
       //by permutation tests
       random_shuffle(SNPs.begin(),SNPs.end());
       cout << "Permuted data:\n";
       copy(SNPs.begin(),SNPs.end(),ostream_iterator<string>(cout,"\n"));
       cout << endl;

       //5. There is a special iterator, PolyTable::const_site_iterator,
       //which gives access to a single SNP in a const-context.  So far,
       //only const access is supported.  The value type of this iterator
       //is equivalent to std::pair<double,std::string>.  We will use this
       //iterator to calculate nucleotide diversity for the sample.  The
       //defnitiion of nucleotide diversity does not depend on either the order
       //of the SNPs nor the arrangement of haplotypes, so we're able to do the
       //calculation on the data that we've permuted.
       //We handle missing data by adjusting the sample size for each site.
       //This implementation is contrived, and we'd really use
       //Sequence::stateCounter or Sequence::makeCountList to handle the
       //counting of the nucleotides for us.
       double pi = 0.;
       Sequence::PolySites::const_site_iterator itr = SNPs.sbegin();
       while( itr < SNPs.send() )
         {
           //count up numbers of A,G,C,T, and N using std::count
           unsigned A = count(itr->second.begin(),itr->second.end(),'A');
           unsigned G = count(itr->second.begin(),itr->second.end(),'G');
           unsigned C = count(itr->second.begin(),itr->second.end(),'C');
           unsigned T = count(itr->second.begin(),itr->second.end(),'T');
           unsigned N = count(itr->second.begin(),itr->second.end(),'N');
           double SH = 1.; //SH = site heterozygosity

           //sample size at this site, w/o missing data
           unsigned n = itr->second.length() - N;

           //For each character state, the homozygosity is the probability
           //of sampling the observed count for that state (k) out of n alleles times the
           //probability of sampling it again out of n-1 alleles, given that you've sampled
           //it once (i.e. (k-1)/(n-1).
           //Pi is the sum of 1-homozygosity accross all segregating sites
           SH -= (A>0) ? (double(A)/double(n))*(double(A-1)/double(n-1)) : 0.;
           SH -= (G>0) ? (double(G)/double(n))*(double(G-1)/double(n-1)) : 0.;
           SH -= (C>0) ? (double(C)/double(n))*(double(C-1)/double(n-1)) : 0.;
           SH -= (T>0) ? (double(T)/double(n))*(double(T-1)/double(n-1)) : 0.;
           pi += SH;
           ++itr;
         }
       cout << "Pi (for the entire region) = "<< pi << endl;
     }
 }
Sequence::PolySites
Polymorphism tables for sequence data.
Definition: PolySites.hpp:33

std
STL namespace.

PolySNP.hpp
declaration of Sequence::PolySNP, a class to analyze SNP data

Alignment.hpp
Declaration of namespace Sequence::Alignment.

PolySites.hpp
Sequence::PolySites, generates polymorphism tables from data.

PolyTableSlice.hpp

Fasta.hpp
Declaration of Sequence::Fasta streams.