libsequence  1.9.5
shortestPath.cc
1 /*
2 
3  Copyright (C) 2003-2009 Kevin Thornton, krthornt[]@[]uci.edu
4 
5  Remove the brackets to email me.
6 
7  This file is part of libsequence.
8 
9  libsequence is free software: you can redistribute it and/or modify
10  it under the terms of the GNU General Public License as published by
11  the Free Software Foundation, either version 3 of the License, or
12  (at your option) any later version.
13 
14  libsequence is distributed in the hope that it will be useful,
15  but WITHOUT ANY WARRANTY; without even the implied warranty of
16  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17  GNU General Public License for more details.
18 
19  You should have received a copy of the GNU General Public License
20  long with libsequence. If not, see <http://www.gnu.org/licenses/>.
21 
22 */
23 
26 #include <Sequence/Translate.hpp>
27 #include <Sequence/Grantham.hpp>
30 #include <Sequence/Comparisons.hpp>
31 #include <algorithm>
32 
33 namespace Sequence
34 {
35 #ifndef DOXYGEN_SKIP //doxygen should skip this
36  class shortestPath::shortestPathImpl
37  {
38  public:
40  Sequence::Grantham distances;
41  double _distance;
42  std::string t1,t2;
43  std::vector<std::string> _path;
44  std::pair<double,shortestPath::pathType>process_path(const std::string &intermediate,
45  const Sequence::GeneticCodes & code);
46  std::pair<double,shortestPath::pathType>process_path2(const std::string &intermediate1,
47  const std::string &intermediate2,
48  const Sequence::GeneticCodes & code);
49  shortestPathImpl(const std::string &codon1,
50  const std::string &codon2,
51  const Sequence::GeneticCodes & code);
52  };
53 
54  shortestPath::shortestPathImpl::shortestPathImpl(const std::string &codon1,
55  const std::string &codon2,
56  const Sequence::GeneticCodes & code):
57  _type(shortestPath::pathType::AMBIG),
58  distances(Sequence::Grantham()),
59  _distance(0.)
60  {
61  if(codon1.length()!=3 || codon2.length() != 3)
62  {
63  throw std::runtime_error("Codons are not both of length 3");
64  }
65  t1 = Sequence::Translate(codon1.begin(),codon1.end(),code);
66  t2 = Sequence::Translate(codon2.begin(),codon2.end(),code);
67 
68  _path.push_back(codon1);
69 
70  //We have already thrown an exception if codons are not the same length
71  decltype(Sequence::NumDiffs(codon1,codon2)) ndiffs = Sequence::NumDiffs(codon1,codon2);
72 
73  if (ndiffs == 0)
74  {
75  _type = shortestPath::pathType::NONE;
76  }
77  else if (t1 != "X" && t2 != "X" &&
78  (std::find_if(codon1.begin(),codon1.end(),
79  ambiguousNucleotide()) == codon1.end()) &&
80  (std::find_if(codon2.begin(),codon2.end(),
81  ambiguousNucleotide()) == codon2.end()) )
82  {
83  if (ndiffs == 1)
84  {
85  _distance = distances(t1[0],t2[0]);
86  if(t1 != t2)
87  {
88  _type = shortestPath::pathType::N;
89  }
90  else
91  {
92  _type = shortestPath::pathType::S;
93  }
94 
95  }
96  else if (ndiffs == 2)
97  {
98  auto intermediates = Sequence::Intermediates2(codon1,codon2);
99 
100  std::vector< std::pair<double, shortestPath::pathType> >
101  paths(2,std::pair<double, shortestPath::pathType>(0.,shortestPath::pathType::AMBIG));
102 
103  //update pathway lengths
104  paths[0] = process_path(intermediates[0],code);
105  paths[1] = process_path(intermediates[1],code);
106  if (paths[0].first < paths[1].first)
107  {
108  _type = paths[0].second;
109  _distance = paths[0].first;
110  _path.push_back(intermediates[0]);
111  }
112  else
113  {
114  _type = paths[1].second;
115  _distance = paths[1].first;
116  _path.push_back(intermediates[1]);
117  }
118  }
119  else if (ndiffs == 3)
120  {
121  auto intermediates = Sequence::Intermediates3(codon1,codon2);
122  std::vector< std::pair<double, shortestPath::pathType> >
123  paths(6,std::pair<double, shortestPath::pathType>(0.,shortestPath::pathType::AMBIG));
124  //there are 6 paths b/w codons that differ at all 3 sites
125  //the indexing of array intermediates is describing in the documentation
126  //for class Sequence::ThreeSubs in the manual
127  paths[0] = process_path2(intermediates[0],intermediates[1],code);
128  paths[1] = process_path2(intermediates[0],intermediates[2],code);
129  paths[2] = process_path2(intermediates[3],intermediates[4],code);
130  paths[3] = process_path2(intermediates[3],intermediates[5],code);
131  paths[4] = process_path2(intermediates[6],intermediates[7],code);
132  paths[5] = process_path2(intermediates[6],intermediates[8],code);
133 
134  size_t shortest = 0;
135  double shortest_path = paths[0].first;
136  for(unsigned i = 0 ; i < 6 ; ++i)
137  {
138  if (paths[i].first < shortest_path)
139  {
140  shortest=i;
141  shortest_path=paths[i].first;
142  }
143  }
144  _type = paths[shortest].second;
145  _distance = paths[shortest].first;
146  switch (shortest)
147  {
148  case 0:
149  _path.push_back(intermediates[0]);
150  _path.push_back(intermediates[1]);
151  break;
152  case 1:
153  _path.push_back(intermediates[0]);
154  _path.push_back(intermediates[2]);
155  break;
156  case 2:
157  _path.push_back(intermediates[3]);
158  _path.push_back(intermediates[4]);
159  break;
160  case 3:
161  _path.push_back(intermediates[3]);
162  _path.push_back(intermediates[5]);
163  break;
164  case 4:
165  _path.push_back(intermediates[6]);
166  _path.push_back(intermediates[7]);
167  break;
168  case 5:
169  _path.push_back(intermediates[6]);
170  _path.push_back(intermediates[8]);
171  break;
172  }
173  }
174  }
175  else
176  {
177  _type = shortestPath::pathType::AMBIG;
178  }
179  _path.push_back(codon2);
180  }
181 
182  std::pair<double,shortestPath::pathType>
183  shortestPath::shortestPathImpl::process_path(const std::string &intermediate,
184  const Sequence::GeneticCodes & code)
185  {
186  std::string tint = Sequence::Translate(intermediate.begin(),intermediate.end(),
187  code);
188  shortestPath::pathType t = shortestPath::pathType::AMBIG;
189  double d = ( distances(t1[0],tint[0]) + distances(tint[0],t2[0]) );
190  if ( (t1 != tint) && (tint != t2) )
191  {
192  t = shortestPath::pathType::NN;
193  }
194  else if ( ((t1 != tint) && (tint == t2)) ||
195  ((t1 == tint) && (tint != t2)) )
196  {
197  t = shortestPath::pathType::SN;
198  }
199  else if ( t1==tint && tint==t2 )
200  {
201  t = shortestPath::pathType::SS;
202  }
203  return std::make_pair(d,t);
204  }
205 
206  std::pair<double,shortestPath::pathType>
207  shortestPath::shortestPathImpl::process_path2(const std::string &intermediate1,
208  const std::string &intermediate2,
209  const Sequence::GeneticCodes & code)
210  {
211  std::string tint1 = Sequence::Translate(intermediate1.begin(),
212  intermediate1.end(),code);
213  std::string tint2 = Sequence::Translate(intermediate2.begin(),
214  intermediate2.end(),code);
215  shortestPath::pathType t = shortestPath::pathType::AMBIG;
216  double d = (distances(t1[0],tint1[0]) +
217  distances(tint1[0],tint2[0]) +
218  distances(tint2[0],t2[0]));
219 
220  bool a = (t1==tint1),
221  b = (tint1==tint2),
222  c = (tint2==t2);
223 
224  if ( !a && !b && !c )
225  {
226  t = shortestPath::pathType::NNN;
227  }
228  else if ( (a && b && !c) ||
229  (a && !b && c) ||
230  (!a && b && c) )
231  {
232  t = shortestPath::pathType::SSN;
233  }
234  else if ( (!a && !b && c) ||
235  (!a && b && !c) ||
236  ( a && !b && !c) )
237  {
238  t = shortestPath::pathType::SNN;
239  }
240 
241  else if (a && b && c)
242  {
243  t = shortestPath::pathType::SSS;
244  }
245  return std::make_pair(d,t);
246  }
247 #endif
248 
249  shortestPath::shortestPath(const std::string &codon1,
250  const std::string &codon2,
251  const Sequence::GeneticCodes & code)
252 
253 
254 
262  {
263  try
264  {
265  impl = std::unique_ptr<shortestPath::shortestPathImpl>(new shortestPath::shortestPathImpl(codon1,codon2,code));
266  }
267  catch(std::runtime_error &e)
268  {
269  throw;
270  }
271  catch(std::exception &e)
272  {
273  throw (std::runtime_error(e.what()));
274  }
275  catch (...)
276  {
277  throw (std::runtime_error("caught exception of unknown type"));
278  }
279  }
280 
281  shortestPath::~shortestPath()
282  {
283  }
284 
290  {
291  return impl->_type;
292  }
293 
294  shortestPath::const_iterator shortestPath::begin() const
300  {
301  return impl->_path.begin();
302  }
303 
304  shortestPath::const_iterator shortestPath::end() const
310  {
311  return impl->_path.end();
312  }
313 
318  {
319  return impl->_distance;
320  }
321 
322  std::pair<unsigned,unsigned> mutsShortestPath(const std::string &codon1,
323  const std::string &codon2,
324  const Sequence::GeneticCodes & code)
325 
326 
327 
352  {
353  std::pair<unsigned,unsigned> rv;
354  try
355  {
356  shortestPath sp(codon1,codon2,code);
358  switch (type)
359  {
360  case shortestPath::pathType::S :
361  {
362  rv = std::make_pair(1,0);
363  break;
364  }
365  case shortestPath::pathType::N :
366  {
367  rv = std::make_pair(0,1);
368  break;
369  }
370  case shortestPath::pathType::SS :
371  {
372  rv = std::make_pair(2,0);
373  break;
374  }
375  case shortestPath::pathType::SN :
376  {
377  rv = std::make_pair(1,1);
378  break;
379  }
380  case shortestPath::pathType::NN :
381  {
382  rv = std::make_pair(0,2);
383  break;
384  }
385  case shortestPath::pathType::SSS :
386  {
387  rv = std::make_pair(3,0);
388  break;
389  }
390  case shortestPath::pathType::SSN :
391  {
392  rv = std::make_pair(2,1);
393  break;
394  }
395  case shortestPath::pathType::SNN :
396  {
397  rv = std::make_pair(1,2);
398  break;
399  }
400  case shortestPath::pathType::NNN :
401  {
402  rv = std::make_pair(0,3);
403  break;
404  }
405  case shortestPath::pathType::NONE :
406  {
407  rv = std::make_pair(0,0);
408  break;
409  }
410  case shortestPath::pathType::AMBIG :
411  {
412  rv = std::make_pair(SEQMAXUNSIGNED,SEQMAXUNSIGNED);
413  break;
414  }
415  }
416  return std::make_pair(SEQMAXUNSIGNED,SEQMAXUNSIGNED);
417  }
418  catch (std::runtime_error &e)
419  {
420  throw;
421  }
422  return rv;
423  }
424 
425  std::pair<unsigned,shortestPath::pathType> diffType(const std::string &codon1,
426  const std::string &codon2,
427  const Sequence::GeneticCodes & code)
428 
429 
430 
446  {
447  try
448  {
449  shortestPath sp(codon1,codon2,code);
451  //check preconditions
452  if (type == shortestPath::pathType::AMBIG)
453  {
454  return std::make_pair(SEQMAXUNSIGNED,type);
455  }
456  unsigned site=0,ndiffs=0;
457  for(unsigned i = 0 ; i < 3 ; ++i)
458  {
459  if (codon1[i] != codon2[i])
460  {
461  ++ndiffs;
462  site = i;
463  }
464  }
465  if( ndiffs != 1 )
466  {
467  return std::make_pair(SEQMAXUNSIGNED,type);
468  }
469  return std::make_pair(site,type);
470  }
471  catch (std::runtime_error &e)
472  {
473  throw;
474  }
475  }
476 
477  std::tuple<shortestPath::pathType,shortestPath::pathType,shortestPath::pathType>
478  diffTypeMulti(const std::string &codon1,
479  const std::string &codon2,
480  const Sequence::GeneticCodes &code)
481 
482 
483 
495  {
496  try
497  {
498  shortestPath::pathType p[3]; //one for each codon position
499  std::string t1(codon1),t2(codon2);
500  for(unsigned i = 0; i < 3 ; ++i)
501  {
502  //make codons that only differ at one position each
503  std::swap(t1[i],t2[i]);
504  std::pair<unsigned,shortestPath::pathType> dt1 = diffType(t1,codon1,code);
505  std::pair<unsigned,shortestPath::pathType> dt2 = diffType(t2,codon2,code);
506  if (dt1 == dt2)
507  p[i] = dt1.second;
508  else
509  p[i] = shortestPath::pathType::AMBIG;
510  //swap back...
511  std::swap(t1[i],t2[i]);
512  }
513  return std::make_tuple(p[0],p[1],p[2]);
514  }
515  catch(std::runtime_error &e)
516  {
517  throw;
518  }
519  }
520 }
std::string Translate(std::string::const_iterator beg, std::string::const_iterator end, Sequence::GeneticCodes genetic_code=GeneticCodes::UNIVERSAL, const char &gapchar='-')
Definition: Translate.cc:223
std::pair< unsigned, shortestPath::pathType > diffType(const std::string &codon1, const std::string &codon2, const Sequence::GeneticCodes &code=GeneticCodes::UNIVERSAL)
const_iterator begin() const
double path_distance() const
The namespace in which this library resides.
int NumDiffs(const std::string &seq1, const std::string &seq2, const bool &skip_missing=true, const bool &nucleic_acid=true)
Definition: Comparisons.cc:141
shortestPath(const std::string &codon1, const std::string &codon2, const Sequence::GeneticCodes &code=GeneticCodes::UNIVERSAL)
Inter2_t Intermediates2(const std::string &codon1, const std::string &codon2)
Calculate the intermediate codons between a pair of codons diverged at 2 positions.
declares Sequence::Translate,a function to translate CDS sequences into peptide sequences ...
Grantham&#39;s distances (Sequence::Grantham)
const unsigned SEQMAXUNSIGNED
Definition: SeqConstants.cc:32
declarations of Sequence::Intermediates2 and Sequence::Intermediates3
Calculate shortest path between 2 codons.
Grantham&#39;s distances.
Definition: Grantham.hpp:40
const_iterator end() const
std::pair< unsigned, unsigned > mutsShortestPath(const std::string &codon1, const std::string &codon2, const Sequence::GeneticCodes &code=GeneticCodes::UNIVERSAL)
Inter3_t Intermediates3(const std::string &codon1, const std::string &codon2)
Calculate the intermediate codons between a pair of codons diverged at 3 positions.
delcaration of routines for comparing DNA sequences This file declares a set of functions useful for ...
std::tuple< shortestPath::pathType, shortestPath::pathType, shortestPath::pathType > diffTypeMulti(const std::string &codon1, const std::string &codon2, const Sequence::GeneticCodes &code=GeneticCodes::UNIVERSAL)
Routines to find the shortest distance between any 2 codons, using Grantham&#39;s distance. Declares the class Sequence::shortestPath, and the functions Sequence::mutsShortestPath and Sequence::diffType.
pathType type() const