libsequence  1.9.5
SeqAlphabets.cc
Go to the documentation of this file.
3 #include <algorithm>
4 #include <cctype>
5 #include <array>
6 
7 namespace Sequence {
8  const alphabet_t dna_alphabet{ {'A','C','G','T',
9  'R','Y','S','W',
10  'K','M','B','D',
11  'H','V','N','-'} };
12 
13  const alphabet_t dna_poly_alphabet{ {'A','C','G','T', //0-3
14  '0','1','-','N', //4-7
15  '\0', //8
16  } };
17 
18  const alphabet_t::size_type NOTPOLYCHAR = dna_poly_alphabet.size();
19 
20  const alphabet_t::size_type POLYEOS = alphabet_t::size_type( std::distance(dna_poly_alphabet.begin(),
21  std::find(dna_poly_alphabet.begin(),
22  dna_poly_alphabet.end(),
23  '\0')
24  ) );
25  bool isDNA( const char & ch)
26  {
27  return std::find( dna_alphabet.begin(),
28  dna_alphabet.end(),
29  std::toupper(ch) ) != dna_alphabet.end();
30  }
31 
32  bool ambiguousNucleotide::operator()(const char & c) const
33  {
34  return std::distance( dna_alphabet.begin(),
35  std::find(dna_alphabet.begin(),
36  dna_alphabet.end(),
37  std::toupper(c)) ) > 3;
38  /*
39  const char ch = char(std::toupper(c));
40  return (ch != 'A' &&
41  ch != 'G' &&
42  ch != 'T' &&
43  ch != 'C' );
44  */
45  }
46 
47  bool invalidPolyChar::operator()(const char & nucleotide) const
48  {
49  auto itr = std::find(dna_poly_alphabet.begin(),
50  dna_poly_alphabet.end(),
51  std::toupper(nucleotide));
52  if(itr == dna_poly_alphabet.end()) return 1;
53  auto d = std::distance( dna_alphabet.begin(),
54  itr );
55  return ( d > 3 && d < 14 );
56  }
57 }
bool isDNA(const char &ch)
test if character is part of Sequence::dna_alphabet
Definition: SeqAlphabets.cc:25
std::array< const char, 16 > alphabet_t
Container type for nucleotide alphabets.
The namespace in which this library resides.
const alphabet_t dna_alphabet
Alphabet for DNA sequences Valid DNA characters. Upper-case only. Only - is accepted as gap character...
Definition: SeqAlphabets.cc:8
bool operator()(const char &c) const
Definition: SeqAlphabets.cc:32
const alphabet_t::size_type NOTPOLYCHAR
An index from dna_poly_alphabet >= this is not a valid character for variation analysis.
Definition: SeqAlphabets.cc:18
const alphabet_t dna_poly_alphabet
Alphabet for polymorphism (SNP) analysis. 16 characters are used so that we may encode 2 nucleotides ...
Definition: SeqAlphabets.cc:13
const alphabet_t::size_type POLYEOS
The value of terminating an encoded string of SNP data.
Definition: SeqAlphabets.cc:20
bool operator()(const char &nucleotide) const
Definition: SeqAlphabets.cc:47