libsequence  1.9.5
SimpleSNP.cc
1 /*
2 
3 Copyright (C) 2003-2009 Kevin Thornton, krthornt[]@[]uci.edu
4 
5 Remove the brackets to email me.
6 
7 This file is part of libsequence.
8 
9 libsequence is free software: you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation, either version 3 of the License, or
12 (at your option) any later version.
13 
14 libsequence is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18 
19 You should have received a copy of the GNU General Public License
20 long with libsequence. If not, see <http://www.gnu.org/licenses/>.
21 
22 */
23 
24 #include <Sequence/SimpleSNP.hpp>
26 #include <cstring>
27 #include <cctype>
28 #include <cassert>
29 #include <iostream>
30 #include <sstream>
31 #include <stdexcept>
32 
33 using std::vector;
34 using std::string;
35 
36 namespace Sequence
37 {
38 
39  SimpleSNP::SimpleSNP(SimpleSNP && s) : PolyTable(std::move(s))
40  {
41  }
42 
43  SimpleSNP & SimpleSNP::operator=(SimpleSNP && pt)
44  {
45  PolyTable::operator=(std::move(pt));
46  return *this;
47  }
48 
49  std::istream & SimpleSNP::read (std::istream & s)
50 
51 
54  {
55  unsigned nsam, nsites, i, j;
56  char ch;
57  if(!(s >> nsam >> nsites))
58  {
59  throw std::runtime_error("SimpleSNP.cc: file did not start with nsam nsites");
60  }
61  if (Diploid)
62  nsam *= 2;
63  std::vector<double> _positions(nsites);
64 
65  for (i = 0; i < nsites; ++i)
66  {
67  if (! (s >> _positions[i] >> std::ws))
68  throw std::runtime_error("SimpleSNP.cc: error in processing site positions");
69  }
70  std::string outgroup,temp,temp2;
71  std::getline(s,temp);
72  std::istringstream check(temp),check2(temp);
73  unsigned nc = 0;
74  while (! check.eof() )
75  {
76  check >> temp2 >> std::ws;
77  ++nc;
78  }
79  bool haveOGlabel = (nc == nsites + 1) ? true : false;
80  _names.resize(nsam+1);
81  if(haveOGlabel)
82  check2 >> _names[0];
83  else
84  _names[0]="anc";
85  outgroup.resize(nsites);
86  for (i = 0; i < nsites; ++i)
87  {
88  if(!(check2 >> ch))
89  {
90  throw std::runtime_error("SimpleSNP.cc: error reading in seg. sites");
91  }
92  ch = char(toupper(ch));
93  switch (ch)
94  {
95  case '?':
96  outgroup[i] = 'N';
97  break;
98  default:
99  outgroup[i] = ch;
100  break;
101  }
102  }
103  outgroup[i] = '\0';
104  unsigned numn = 0;
105  //count # of ambiguous bases in outgroup
106  for (i = 0; i < outgroup.length(); ++i)
107  {
108  if (char(std::toupper(outgroup[i])) == 'N')
109  ++numn;
110  }
111  unsigned haveOG = 0;
112 
113  std::vector<std::string> _data;
114  if(numn == outgroup.length())
115  //if all outgroup characters are ambiguous,
116  //then we have no outgroup, so we don't include
117  //it in the data vector, and so we don't allocate space
118  {
119  _data.resize(nsam);
120  for (i = 0 ; i < nsam ; ++i)
121  _data[i].resize(nsites);
122  }
123  else
124  //otherwise,we have an outgroup and need to allocate space
125  {
126  _data.resize(nsam+1);
127  for (i = 0 ; i < nsam+1 ; ++i)
128  _data[i].resize(nsites);
129 
130  _data[0] = outgroup;
131  haveOG=1;
132  haveOutgroup = true;
133  }
134 
135  for (i = 0 + haveOG; i < nsam + haveOG; ++i)
136  {
137  string name; //don't store the name
138  if(!(s >> name))
139  throw std::runtime_error("SimpleSNP.cc: error processing sequences");
140  //_names[i-(haveOG+haveOGlabel)] = name;
141  _names[i-haveOG+1] = name;
142  char *temp = new char[nsites+1];
143  char *temp2 = nullptr;
144  if (Diploid)
145  {
146  //_names[i-(haveOG+haveOGlabel)+1] = name;
147  _names[i-haveOG+2] = name;
148  temp2 = new char[nsites+1];
149  }
150  for (j = 0; j < nsites; ++j)
151  {
152  if(!(s >> ch))
153  {
154  delete [] temp;
155  if(temp2 != nullptr) delete [] temp2;
156  throw std::runtime_error("SimpleSNP.cc: error processing sequenes");
157  }
158  ch = char(toupper(ch));
159  if (Diploid)
160  {
161  switch (char(std::toupper(ch))) //use IUPAC ambiguity symbols
162  {
163  case '?':
164  temp[j] = 'N';
165  temp2[j] = 'N';
166  break;
167  case 'M':
168  temp[j] = 'A';
169  temp2[j] = 'C';
170  break;
171  case 'R':
172  temp[j] = 'A';
173  temp2[j] = 'G';
174  break;
175  case 'W':
176  temp[j] = 'A';
177  temp2[j] = 'T';
178  break;
179  case 'S':
180  temp[j] = 'C';
181  temp2[j] = 'G';
182  break;
183  case 'Y':
184  temp[j] = 'C';
185  temp2[j] = 'T';
186  break;
187  case 'K':
188  temp[j] = 'G';
189  temp2[j] = 'T';
190  break;
191  default:
192  temp[j] = ch;
193  temp2[j] = ch;
194  }
195  }
196  else if (isoFemale)
197  {
198  //not implemented yet!!
199  }
200  else
201  {
202  switch (ch)
203  {
204  case '?':
205  temp[j] = 'N';
206  break;
207  default:
208  temp[j] = ch;
209  break;
210  }
211  }
212  }
213  temp[j] = '\0';
214  if (Diploid)
215  {
216  temp2[j] = '\0';
217  _data[i++] = temp;
218  _data[i] = temp2;
219  delete [] temp2;
220  }
221  else
222  _data[i] = temp;
223 
224  delete [] temp;
225  }
226  //assign the data to the base class
227  if (_data.size() != nsam+haveOG)
228  {
229  throw (std::runtime_error("SimpleSNP::read() -- number of sequences does not match input value"));
230  }
231  this->assign(std::move(_positions),std::move(_data));
232  return s;
233  }
234 
235  std::ostream & SimpleSNP::print(std::ostream &o) const
236  {
237  o << this->size()-this->haveOutgroup <<'\t' <<this->numsites() << '\n';
238 
239  for(unsigned i = 0 ; i < this->numsites()-1 ; ++i)
240  {
241  o << this->position(i) << '\t';
242  }
243  o << this->position(this->numsites()-1) << '\n';
244 
245  if (haveOutgroup == true)
246  {
247  if(_names.empty())
248  {
249  o << "anc ";
250  }
251  else
252  {
253  o << _names[0];
254  }
255  for(unsigned i = 0 ; i < this->numsites() ; ++i)
256  {
257  o << '\t' << (*this)[0][i];
258  }
259  o << '\n';
260  for(unsigned i = 1 ; i < this->size() ; ++i)
261  {
262  if (_names.empty())
263  {
264  o << "seq"<<i;
265  }
266  else
267  {
268  o << _names[i];
269  }
270  for(unsigned j = 0 ; j < this->numsites() ; ++j)
271  {
272  o << '\t' << (*this)[i][j];
273  }
274  if (i < this->size()-1)
275  o << '\n';
276  }
277  }
278  else
279  {
280  o << "anc ";
281  for(unsigned i = 0 ; i < this->numsites() ; ++i)
282  {
283  o << '\t' << 'N';
284  }
285  o << '\n';
286  for(unsigned i = 0 ; i < this->size() ; ++i)
287  {
288  if (_names.empty())
289  {
290  o << "seq"<<i;
291  }
292  else
293  o << _names[i];
294  for(unsigned j = 0 ; j < this->numsites() ; ++j)
295  {
296  o << '\t' << (*this)[i][j];
297  }
298  if (i < this->size()-1)
299  o << '\n';
300  }
301  }
302  return o;
303  }
304 
305  std::string SimpleSNP::label(unsigned i) const
309  {
310  if(i >= this->size() )
311  {
312  throw std::out_of_range("Sequence::SimpleSNP::label(), i out of range");
313  }
314  if (haveOutgroup && i==0)
315  {
316  return string("anc");
317  }
318  if( _names.empty() )
319  {
320  return string("seq" + std::to_string(i));
321  }
322  return _names[i-unsigned(haveOutgroup)];
323  }
324 
325  bool SimpleSNP::outgroup(void) const
330  {
331  return haveOutgroup;
332  }
333 
334  void SimpleSNP::set_outgroup(const bool & b)
335  /*
336  Tells the object whether or not the first sequence
337  is an outgroup sequence
338  */
339  {
340  haveOutgroup = b;
341  }
342 
343 }
STL namespace.
The namespace in which this library resides.
Declaration of Sequence::SimpleSNP, a polymorphism table stream in a "spreadsheet" format...
Operations on non-const Sequence::PolyTable objects.