libsequence  1.9.5
Comparisons.cc
1 /*
2 
3 Copyright (C) 2003-2009 Kevin Thornton, krthornt[]@[]uci.edu
4 
5 Remove the brackets to email me.
6 
7 This file is part of libsequence.
8 
9 libsequence is free software: you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation, either version 3 of the License, or
12 (at your option) any later version.
13 
14 libsequence is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18 
19 You should have received a copy of the GNU General Public License
20 long with libsequence. If not, see <http://www.gnu.org/licenses/>.
21 
22 */
23 
24 #include <Sequence/SeqEnums.hpp>
26 #include <algorithm>
27 #include <iterator>
28 #include <cassert>
29 #include <string>
30 #include <cctype>
31 #include <stdexcept>
32 
33 namespace Sequence
34  {
35  Mutations TsTv (const char & i,const char & j)
40  {
41  auto k = std::distance( dna_alphabet.begin(),
42  std::find( dna_alphabet.begin(),
43  dna_alphabet.end(),
44  char(std::toupper(i)) ) );
45  if ( k > 3 )
46  {
47  std::string message("Sequence::TsTv error: ");
48  message += i;
49  message += " is not A,G,C, nor T.";
50  throw std::runtime_error(message.c_str());
51  }
52  auto l = std::distance( dna_alphabet.begin(),
53  std::find( dna_alphabet.begin(),
54  dna_alphabet.end(),
55  char(std::toupper(j)) ) );
56  if ( l > 3 )
57  {
58  std::string message("Sequence::TsTv error: ");
59  message += j;
60  message += " is not A,G,C, nor T.";
61  throw std::runtime_error(message.c_str());
62  }
63  auto type = k + l;
64  if (type%2 != 0) //if odd
65  return (Mutations::Tv); //a transversion
66  else if (type%2==0) //if even
67  return (Mutations::Ts); //a transition
68  return (Mutations::Unknown); //can be used for error checking
69  }
70 
71  Mutations TsTv (const int & i, const int & j)
76  {
77  assert( std::distance(dna_alphabet.begin(),
78  std::find( dna_alphabet.begin(),
79  dna_alphabet.end(),
80  std::toupper(i))) < 4 );
81  assert( std::distance(dna_alphabet.begin(),
82  std::find( dna_alphabet.begin(),
83  dna_alphabet.end(),
84  std::toupper(j))) < 4 );
85  int type = i + j;
86  if (type%2!=0.) //if odd
87  {
88  return (Mutations::Tv); //a transversion
89  }
90  else if (type%2==0.) //if even
91  {
92  return (Mutations::Ts); //a transition
93  }
94 
95  return (Mutations::Unknown);
96  }
97 
98  bool Different (const std::string & seq1,
99  const std::string & seq2,
100  const bool & skip_missing,
101  const bool & nucleic_acid)
112  {
113  if(! (seq1.length () == seq2.length ()))
114  return true;
115  if (skip_missing)
116  {
117  char MISSING = 'N';
118  if (!nucleic_acid)
119  MISSING = 'X';//for peptide sequences
120  for (unsigned i = 0 ; i < seq1.length() ; ++i)
121  {
122  const char _ch1 = char(std::toupper(seq1[i])),
123  _ch2 = char(std::toupper(seq2[i]));
124  if( (_ch1 != MISSING && _ch2 != MISSING) &&
125  _ch1 != _ch2 )
126  return 1;
127  }
128  }
129  else
130  {
131  for (unsigned i = 0 ; i < seq1.length() ; ++i)
132  {
133  if ( char(std::toupper(seq1[i])) != char(std::toupper(seq2[i])) )
134  return 1;
135  }
136  }
137  return 0;
138  }
139 
140 
141  int NumDiffs (const std::string & seq1,
142  const std::string & seq2,
143  const bool & skip_missing,
144  const bool & nucleic_acid)
154  {
155  int ndiff = 0;
156  size_t len = seq1.length();
157  if (seq1.length() != seq2.length())
158  {
159  return -1;
160  }
161  char MISSING = 'N';
162  if (!nucleic_acid)
163  MISSING = 'X';//for peptide sequences
164  for (unsigned i = 0; i < len; ++i)
165  {
166  const char _ch1 = char(std::toupper(seq1[i])),
167  _ch2 = char(std::toupper(seq2[i]));
168  if(skip_missing == true)
169  {
170  if( (_ch1 != MISSING &&
171  _ch2 != MISSING) &&
172  (_ch1 != _ch2 ) )
173  ++ndiff;
174  }
175  else
176  {
177  if (_ch1 != _ch2)
178  ++ndiff;
179  }
180  }
181  return ndiff;
182  }
183 
184  bool Gapped(const std::string &s)
192  {
193  return (s.find('-') != std::string::npos);
194  }
195 
196  bool NotAGap(const char &c)
201  {
202  return c != '-';
203  }
204 }
205 
Mutations TsTv(const char &i, const char &j)
Definition: Comparisons.cc:35
The namespace in which this library resides.
const alphabet_t dna_alphabet
Alphabet for DNA sequences Valid DNA characters. Upper-case only. Only - is accepted as gap character...
Definition: SeqAlphabets.cc:8
int NumDiffs(const std::string &seq1, const std::string &seq2, const bool &skip_missing=true, const bool &nucleic_acid=true)
Definition: Comparisons.cc:141
bool Different(const std::string &seq1, const std::string &seq2, const bool &skip_missing=true, const bool &nucleic_acid=true)
Definition: Comparisons.cc:98
bool NotAGap(const char &c)
Definition: Comparisons.cc:196
bool Gapped(const std::string &s)
Definition: Comparisons.cc:184
Definition of enumeration types.