libsequence  1.9.5
codons.cc
1 
4 //C++ standard library headers
5 #include<string>
6 #include<iostream>
7 #include<fstream>
8 #include<algorithm>
9 #include<functional>
10 #include<utility>
11 #include<vector>
12 
13 //libsequence headers
14 #include <Sequence/Fasta.hpp>
15 #include <Sequence/CodonTable.hpp>
17 using namespace std;
18 using namespace Sequence;
19 
20 typedef string::size_type sst;
21 
22 //predicate function to count GC content
23 struct GorC : public unary_function< char, bool >
24 {
25  inline bool operator()(const char &ch) const
26  {
27  return ( ch == 'G' || ch == 'C' );
28  }
29 };
30 
31 int main(int argc, char *argv[])
32 {
33  Fasta sequence;
34  CodonUsageTable totalCodonUsage;
35  while (! cin.fail())
36  {
37  cin >> sequence;
38  unsigned length = sequence.length();
39 
40  //Sequence::Seq has standard iterators pointing to the beginning
41  //and end of a sequence. The functions are Seq::begin() and Seq::end().
42  //The iterators are of type std::string::iterator
43  //(and std::string::const_iterator is provided for const Sequence::Seq objects)
44 
45  //count the # of gap characters in the sequence
46  unsigned numgaps = count(sequence.begin(),sequence.end(),'-');
47  //count the # of missing characters in the sequence
48  unsigned num_missing = count(sequence.begin(),sequence.end(),'N');
49 
50  unsigned datalen = length-numgaps-num_missing;
51 
52  //calculate GC content for the whole gene
53  unsigned G_or_C = 0;
54  G_or_C = count_if(sequence.begin(),sequence.end(),GorC());
55  double GC = double(G_or_C)/double(datalen);
56 
57  //count codon usage
58  CodonUsageTable UsageTable= makeCodonUsageTable(&sequence);
59 
60  //Use a template operator += from Sequence/CountingOperators.hpp
61  //to keep a running total of codon usage
62  totalCodonUsage += UsageTable;
63  //The class Sequence::Seq has a member function GetSeq(void)
64  //which returns the sequence as a std::string. This allows
65  //one to use the rich set of C++ functions for finding
66  //substrings (string::find(), string::rfind(), etc)
67  //without having to write wrappers for these functions in the
68  //Seq class
69  string seq = sequence.GetSeq();
70 
71  //use std::string functions to count GC content at third positions (GC3)
72  sst pos = 0;
73  unsigned G3=0,C3=0;
74  while ( (pos = seq.find("G",pos)) != string::npos)
75  {
76  //need to check against pos + 1 because pos is an index,
77  //so adding 1 has the effect of counting positions starting
78  //from 1, so that 3rd positions are 3,6,9,... instead
79  //of 2,5,8,...
80  if (unsigned(pos+1) % 3 == 0.)
81  {
82  ++G3;
83  }
84  ++pos;
85  }
86 
87  pos = 0;
88  while ( (pos = seq.find("C",pos)) != string::npos )
89  {
90  if (unsigned(pos+1) % 3 == 0.)
91  {
92  ++C3;
93  }
94  ++pos;
95  }
96 
97  //divide sum by # of third positions
98  double GC3 = double(G3+C3)/( double(datalen) / 3. );
99 
100  //output
101  cout << "Codon Usage Table for: "<< sequence.GetName() << endl;
102  cout << "Sequence length is: "<< sequence.length()<<endl;
103  cout << "Length exluding gap characters and missing data is: "<<datalen<<endl;
104  cout << "GC content of coding sequence is: "<<GC<<endl;
105  cout << "GC3 content of coding sequence is: "<<GC3<<endl;
106  unsigned pretty = 1;
107  for(unsigned i = 0 ; i < UsageTable.size() ; ++i)
108  {
109  if (pretty < 8)
110  {
111  cout << UsageTable[i].first << '\t' << UsageTable[i].second << '\t';
112  }
113  else
114  {
115  cout << UsageTable[i].first << '\t' << UsageTable[i].second << endl;
116  pretty = 0;
117  }
118  ++pretty;
119  }
120  cout << "//"<<endl;
121  }
122  cout << "Codon Usage Table for all regions\n";
123  unsigned pretty = 1;
124  for(unsigned i = 0 ; i < totalCodonUsage.size() ; ++i)
125  {
126  if (pretty < 8)
127  {
128  cout << totalCodonUsage[i].first << '\t' << totalCodonUsage[i].second << '\t';
129  }
130  else
131  {
132  cout << totalCodonUsage[i].first << '\t' << totalCodonUsage[i].second << endl;
133  pretty = 0;
134  }
135  ++pretty;
136  }
137  cout << "//\n";
138 }
bool operator()(const std::pair< key, value > &l, const std::pair< key, value > &r) const
FASTA sequence stream.
Definition: Fasta.hpp:49
STL namespace.
The namespace in which this library resides.
std::vector< std::pair< std::string, int > > CodonUsageTable
Definition: typedefs.hpp:43
facility to count codons in CDS sequence, function Sequence::makeCodonUsageTable
size_type length(void) const
Definition: Seq.cc:58
Declaration of Sequence::Fasta streams.
Declarations of operators to add associative containers together.
CodonUsageTable makeCodonUsageTable(const Seq *sequence)
Definition: CodonTable.cc:81