libsequence  1.9.5
Specializations.cc
Go to the documentation of this file.
1 /*
2 
3 Copyright (C) 2003-2009 Kevin Thornton, krthornt[]@[]uci.edu
4 
5 Remove the brackets to email me.
6 
7 This file is part of libsequence.
8 
9 libsequence is free software: you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation, either version 3 of the License, or
12 (at your option) any later version.
13 
14 libsequence is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18 
19 You should have received a copy of the GNU General Public License
20 long with libsequence. If not, see <http://www.gnu.org/licenses/>.
21 
22 */
23 
24 //Code for the -*- C++ -*- Template Specializations for libsequence
25 #include <Sequence/Alignment.hpp>
26 
31 namespace Sequence
32 {
33  namespace Alignment
34  {
35  template<> bool Gapped(const std::vector<std::string> &data)
39  {
40  for (std::vector<std::string>::size_type i = 0; i < data.size (); ++i)
41  //iterate over sequences
42  {
43  if( data[i].find('-') != std::string::npos )
44  return true;
45  }
46 
47  return false;
48  }
49 
50  template <> bool IsAlignment(const std::vector<std::string> &data)
54  {
55  for (std::vector<std::string>::size_type i = 0; i < data.size (); ++i)
56  if (data[i].length () != data[0].length ())
57  return 0;
58 
59  return 1;
60  }
61 
62  template<>
63  bool validForPolyAnalysis( std::vector<std::string>::const_iterator beg,
64  std::vector<std::string>::const_iterator end )
68  {
69  while(beg < end)
70  {
71  if (std::find_if(beg->begin(),beg->end(),
73  != beg->end())
74  {
75  return false;
76  }
77  ++beg;
78  }
79  return true;
80  }
81 
82  template<>
83  bool validForPolyAnalysis( std::vector<std::string>::iterator beg,
84  std::vector<std::string>::iterator end )
85  {
86  while(beg < end)
87  {
88  if (std::find_if(beg->begin(),beg->end(),
90  != beg->end())
91  {
92  return false;
93  }
94  ++beg;
95  }
96  return true;
97  }
98 
99  template <>
100  unsigned UnGappedLength(const std::vector <std::string>&data)
101 
102 
105  {
106  unsigned len = 0;
107  if (!IsAlignment(data))
109 
110  for (std::string::size_type j = 0; j < data[0].length (); ++j)
111  {
112  bool site_gapped = 0;
113  for (std::vector<std::string>::size_type i = 0; i < data.size (); ++i)
114  {
115  if (data[i][j] == '-')
116  {
117  site_gapped = 1;
118  i = data.size();
119  }
120  }
121  if (!(site_gapped))
122  ++len;
123  }
124  return len;
125  }
126 
127  template<>
128  void RemoveGaps (std::vector <std::string> &data)
132  {
133  size_t i, j;
134  size_t length = data[0].length ();
135  std::vector < std::string > ungapped_sequences(data.size());
136  bool site_is_gapped;
137  for (i = 0; i < length; ++i)
138  { //iterate over sites
139  for ( j = 0, site_is_gapped = 0;
140  j < data.size(); ++j)
141  {
142  if (data[j][i] == '-')
143  {
144  site_is_gapped = 1;
145  j = data.size();
146  }
147  }
148  if (!(site_is_gapped))
149  {
150  for ( j = 0 ; j != data.size(); ++j)
151  ungapped_sequences[j] += data[j][i];
152  }
153  }
154  //redo the data
155  data = std::move( ungapped_sequences );
156  }
157 
158  template<>
159  void RemoveTerminalGaps (std::vector <std::string>&data)
163  {
164  size_t i, j, leftmost, rightmost, numUngapped,offset;
165  size_t length = data[0].length(); //how much we have to iterate over
166  std::vector < std::string > trimmed_sequences;
167  size_t size = data.size();
168 
169  leftmost = SEQMAXUNSIGNED;
170  rightmost = length + 1; //offset by one b/c its an array...
171 
172  //find the leftmost site where all sites in the alignment are ungapped
173  for (i = 0; i < length; ++i)
174  { //iterate over sites
175  for (numUngapped = 0, j = 0; j != data.size (); ++j)
176  {
177  if (data[j][i] != '-')
178  ++numUngapped;
179  }
180  if (numUngapped == size)
181  {
182  leftmost = i;
183  i = length + 1;
184  }
185  }
186 
187  //find the rigthmost site where all sites in the alignment are ungapped
188  bool exit_condition = false;
189  for (i = length - 1; i < data[0].length() && exit_condition == false; --i)
190  {
191  for (numUngapped = 0, j = 0; j != data.size (); ++j)
192  {
193  if (data[j][i] != '-')
194  ++numUngapped;
195  }
196  if (numUngapped == size)
197  {
198  rightmost = i;
199  exit_condition = true;
200  }
201  }
202 
203  //now, fill the array of trimmed sequences
204  offset = rightmost - leftmost + 1;
205  for (j = 0; j != data.size (); ++j)
206  trimmed_sequences.push_back (data[j].substr (leftmost, offset));
207 
208  //now, redo the seq array for the current object
209  data.assign(trimmed_sequences.begin(),trimmed_sequences.end());
210  }
211 
212  template <>
213  void RemoveFixedOutgroupInsertions( std::vector<std::string> & data,
214  unsigned site,
215  const unsigned & ref )
216  {
217  const size_t nsam = data.size()-1;
218  if ( site < data[0].length() )
219  {
220  unsigned ngap=0;
221  for(unsigned ind=0;ind<data.size();++ind)
222  {
223  if (ind != ref)
224  {
225  ngap += (data[ind][site] == '-') ? 1u : 0u;
226  }
227  }
228  if(ngap==nsam)
229  {
230  for(unsigned ind=0;ind<data.size();++ind)
231  {
232  data[ind].erase(site,1);
233  }
234  RemoveFixedOutgroupInsertions(data,site,ref);
235  }
236  RemoveFixedOutgroupInsertions(data,site++,ref);
237  }
238  }
239 
240  template<>
241  std::vector <std::string>Trim (const std::vector <std::string >&data,
242  const std::vector <int> &sites)
243 
244 
245 
248  {
249  size_t i, j, numseqs = data.size (), numIntervals = sites.size ();
250  std::string::size_type start, stop;
251  std::vector < std::string >trimmedData(numseqs);
252  std::vector < std::string > trimmedTemp(numseqs);
253  if (sites.empty ())
254  {
255  throw std::runtime_error ("Sequence::Alignment::Trim(): empty vector of positions passed to function");
256  }
257  if (numIntervals % 2 != 0)
258  {
259  throw std::runtime_error ("Sequence::Alignment::Trim(): odd number of positions passed");
260  }
261 
262  for (i = 0; i < numIntervals; i += 2)
263  {
264  start = std::string::size_type(sites[i]);
265  stop = std::string::size_type(sites[i + 1]);
266  for (j = 0; j < numseqs; ++j)
267  {
268  trimmedTemp[j] += data[j].substr (start, stop - start + 1);
269  }
270  }
271  trimmedData.assign(trimmedTemp.begin(),trimmedTemp.end());
272  return trimmedData;
273  }
274 
275  template<>
276  std::vector <std::string> TrimComplement (const std::vector <std::string> & data,
277  const std::vector < int > &sites)
278 
279 
280 
283  {
284  std::vector < int >newSites;
285  size_t i, j, numseqs = data.size (), numIntervals = sites.size (), lastval;
286 
287  if (sites.empty ())
288  {
289  throw std::runtime_error ("Sequence::Alignment::TrimComplement(): empty vector of positions passed to function");
290  }
291  if (sites.size() % 2 != 0)
292  {
293  throw std::runtime_error ("Sequence::Alignment::TrimComplement(): odd number of positions passed to function");
294  }
295 
296  std::vector < std::string > trimmedTemp(numseqs);
297 
298  size_t odd_even;
299  if (sites[0] == 0)
300  {
301  for (i = 1; i < numIntervals; ++i)
302  {
303  odd_even = i+1;
304  if (odd_even%2==0)
305  {
306  newSites.push_back (sites[i] + 1);
307  }
308  else if (odd_even%2!=0)
309  {
310  newSites.push_back (sites[i] - 1);
311  }
312  }
313  }
314  else if (sites[0] > 0)
315  {
316  newSites.push_back (0);
317  for (i = 0; i < numIntervals; ++i)
318  {
319  odd_even = i+1;
320  if (odd_even%2==0)
321  {
322  newSites.push_back (sites[i] + 1);
323  }
324  else if (odd_even%2!=0)
325  {
326  newSites.push_back (sites[i] - 1);
327  }
328  }
329  }
330 
331  lastval = size_t(newSites[newSites.size () - 1]);
332  newSites.pop_back ();
333  numIntervals = newSites.size ();
334  for (i = 0; i < numIntervals; i += 2)
335  {
336  size_t start = size_t(newSites[i]);
337  size_t stop = size_t(newSites[i + 1]);
338  for (j = 0; j < numseqs; ++j)
339  {
340  trimmedTemp[j] +=
341  data[j].
342  substr (start, stop - start + 1);
343  }
344  }
345 
346  for (j = 0; j < numseqs; ++j)
347  {
348  trimmedTemp[j] +=data[j].substr (lastval);
349  }
350 
351  return trimmedTemp;
352  }
353  }
354 }
355 
The namespace in which this library resides.
This functor can be used to determine if a range contains characters that the SNP analysis routines i...
Declaration of namespace Sequence::Alignment.
const unsigned SEQMAXUNSIGNED
Definition: SeqConstants.cc:32