libsequence  1.9.5
polySiteVector_test.cc
Go to the documentation of this file.
1 
6 #include <Sequence/PolySites.hpp>
8 #include <iostream>
9 #include <algorithm>
10 #include <limits>
11 
12 using namespace std;
13 using namespace Sequence;
14 
15 //uses the "classic" stateCounter to determine MAF
16 double local_maf( const polymorphicSite & p )
17 {
18  stateCounter sc = for_each(p.second.begin(),
19  p.second.end(),stateCounter());
20  unsigned mcount = numeric_limits<unsigned>::max();
21  mcount = sc.a ? min(mcount,sc.a) : mcount;
22  mcount = sc.g ? min(mcount,sc.g) : mcount;
23  mcount = sc.c ? min(mcount,sc.c) : mcount;
24  mcount = sc.t ? min(mcount,sc.t) : mcount;
25  return double(mcount)/double(p.second.size()-sc.n);
26 }
27 
28 std::vector<std::string::size_type> local_has_missing( const polymorphicSite & p )
29 //case-insensitive!
30 {
31  typedef std::vector<std::string::size_type> rvtype;
32  rvtype rv;
33 
34  string::size_type f = p.second.find('N');
35  while (f != string::npos)
36  {
37  rv.push_back(f);
38  f = p.second.find('N',f+1);
39  }
40  return rv;
41 }
42 
43 int main( int argc, char ** argv )
44 {
45  //Can use C++11 initializer lists
46  //polymorphicSite is a typedef for pair<double,string>
47  polySiteVector x{ polymorphicSite(1.,"AAAT"),polymorphicSite(2.,"GANG") };
48 
49  //Print the data
50  cout << "The data:\n";
51  for( auto _x : x )
52  {
53  cout << _x.first << ' ' << _x.second << '\n';
54  }
55 
56  //Calculate MAFs
57  vector<double> mafs;
58  for_each( x.begin(),x.end(), [&mafs](const polymorphicSite & __p){ mafs.push_back(local_maf(__p)); } );
59  cout << "The minor allele frequencies:\n";
60  for( auto __maf : mafs )
61  {
62  cout << __maf << '\n';
63  }
64 
65  //Reverse the order of the segregating sites by sort.
66  //With "classic" PolyTables, this could not be done, b/c access to polymorphicSites was const-only
67  sort(x.begin(),x.end(),[](const polymorphicSite & lhs,
68  const polymorphicSite & rhs) { return lhs.first > rhs.first; });
69 
70  cout << "The data after sorting:\n";
71  for( auto _x : x )
72  {
73  cout << _x.first << ' ' << _x.second << '\n';
74  }
75 
76  //Who has missing data?
77  std::vector<std::string::size_type> missing;
78  for_each(x.begin(),x.end(),[&missing](const polymorphicSite & __p) {
79  std::vector<std::string::size_type> __temp = local_has_missing(__p);
80  copy(__temp.begin(),__temp.end(),back_inserter(missing));
81  });
82 
83  cout << "Index of individuals with missing data:\n";
84  for( auto __m : missing )
85  {
86  cout << __m << '\n';
87  }
88 
89 
90  //Delete sites with MAF <= 0.25
91  x.erase( remove_if(x.begin(),x.end(),[](const polymorphicSite & __p){ return local_maf(__p) <= 0.25; } ),x.end() );
92 
93  cout << "The data after applying MAF filter:\n";
94  for( auto _x : x )
95  {
96  cout << _x.first << ' ' << _x.second << '\n';
97  }
98 
99  //Convert to a PolySites
100  PolySites ps(x.begin(),x.end());
101 
102  cout << "Print out the classic object:\n"
103  << ps << '\n';
104 }
keep track of state counts at a site in an alignment or along a sequence
Polymorphism tables for sequence data.
Definition: PolySites.hpp:33
STL namespace.
The namespace in which this library resides.
std::vector< polymorphicSite > polySiteVector
Site-major variation tables in ASCII format.
Sequence::PolySites, generates polymorphism tables from data.
declaration of Sequence::stateCounter, a class to keep track of nucleotide counts either at a site in...
std::pair< double, std::string > polymorphicSite