libsequence  1.9.5
Clustalw.tcc
1 // Code for the -*- C++ -*- namespace Sequence::ClustalW<T>
2 
3 /*
4 
5 Copyright (C) 2003-2009 Kevin Thornton, krthornt[]@[]uci.edu
6 
7 Remove the brackets to email me.
8 
9 This file is part of libsequence.
10 
11 libsequence is free software: you can redistribute it and/or modify
12 it under the terms of the GNU General Public License as published by
13 the Free Software Foundation, either version 3 of the License, or
14 (at your option) any later version.
15 
16 libsequence is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License for more details.
20 
21 You should have received a copy of the GNU General Public License
22 long with libsequence. If not, see <http://www.gnu.org/licenses/>.
23 
24 */
25 
26 /*! \file Clustalw.tcc
27  @brief code for Clustalw.hpp
28 */
29 #include <map>
30 #include <Sequence/AlignStream.hpp>
31 #include <iterator>
32 #include <algorithm>
33 #include <cassert>
34 
35 namespace Sequence
36 {
37  template <typename T>
38  std::istream &
39  ClustalW<T>::read(std::istream &s)
40  /*!
41  Calls to Sequence::operator>> into objects of type ClustalW<T>
42  results in a call to this function, which reads the alignment in
43  from the stream.
44  */
45  {
46  std::string clustalw;
47  char ch;
48  std::map<std::string, std::string> seqs;
49  std::map<std::string, int> order;
50  std::size_t nseqs = 0;
51  s >> clustalw >> std::ws;
52  if (clustalw != "CLUSTAL")
53  {
54  throw std::runtime_error("Sequence::ClustalW::read() : input "
55  "stream does not appear to be in "
56  "CLUSTALW format");
57  }
58  else
59  {
60  ReadThroughLine(s);
61  }
62  std::string temp, temp2;
63  while (!s.eof())
64  {
65  s.get(ch);
66  bool putback = 0;
67  if (ch == '\n' || ch == ' ' || ch == '*')
68  {
69  s.putback(ch);
70  ReadThroughLine(s);
71  putback = 1;
72  }
73  else
74  {
75  if (!putback)
76  s.putback(ch);
77  s >> temp;
78  auto iter = seqs.find(temp);
79  if (iter != seqs.end())
80  {
81  s >> temp2 >> std::ws;
82  seqs[(*iter).first] += temp2;
83  }
84  else
85  {
86  s >> temp2 >> std::ws;
87  seqs[temp] = temp2;
88  order[temp] = nseqs++;
89  }
90  }
91  s >> std::ws;
92  }
93 
94  typename std::vector<T> _data;
95  for (int i = 0; i < nseqs; ++i)
96  {
97  auto iter = seqs.begin(), iter_end = seqs.end();
98  bool found = 0;
99  while (iter != iter_end)
100  {
101  if (order[(*iter).first] == i)
102  {
103  T t;
104  t.name = iter->first;
105  t.seq = std::move(iter->second);
106  _data.emplace_back(std::move(t));
107  //_data[i].name = std::move(iter->first);
108  //_data[i].seq = std::move(iter->second);
109  iter = iter_end;
110  found = 1;
111  }
112  if (!found)
113  ++iter;
114  }
115  }
116  if (_data.size() != nseqs)
117  {
118  throw std::runtime_error("fatal error converting input data");
119  }
120  this->assign(std::move(_data));
121  return s;
122  }
123 
124  template <typename T>
125  std::ostream &
126  ClustalW<T>::print(std::ostream &s) const
127  {
128  typename ClustalW<T>::const_iterator i = this->begin(),
129  j = this->end();
130  unsigned k = 0, len = unsigned(i->seq.length());
131  s << "CLUSTAL W"
132  << "\n\n";
133  while (k < len)
134  {
135  unsigned offset = (k + 60 < len) ? k + 60 : k + (len - k);
136  for (i = this->begin(); i < j; ++i)
137  {
138  s << i->name << '\t';
139  std::copy(i->seq.begin()
140  + std::string::difference_type(k),
141  i->seq.begin()
142  + std::string::difference_type(offset),
143  std::ostream_iterator<char>(s, ""));
144  s << '\n';
145  }
146  s << '\n';
147  k = offset;
148  }
149  return s;
150  }
151 
152  template <typename T>
153  std::istream &
154  ClustalW<T>::ReadThroughLine(std::istream &s)
155  {
156  char ch;
157  while (s.get(ch))
158  {
159  if (ch == '\n')
160  return s;
161  }
162  return s;
163  }
164 }