libsequence  1.9.5
AlignmentTest.cc
Go to the documentation of this file.
1 
12 #include <Sequence/Fasta.hpp>
13 #include <Sequence/Alignment.hpp>
14 #include <boost/test/unit_test.hpp>
15 #include <vector>
16 #include <fstream>
17 #include <iterator>
18 #include <algorithm>
19 #include <unistd.h>
20 
21 BOOST_AUTO_TEST_SUITE(AlignmentTest)
22 
23 BOOST_AUTO_TEST_CASE(IsAlignmentFasta)
24 {
25  std::vector<Sequence::Fasta> vf
26  = { Sequence::Fasta("seq1", "ATG"), Sequence::Fasta("seq2", "AGC"),
27  Sequence::Fasta("seq3", "GTC"), Sequence::Fasta("seq4", "ATT"),
28  Sequence::Fasta("seq5", "AAC") };
29 
30  BOOST_REQUIRE_EQUAL(Sequence::Alignment::IsAlignment(vf), true);
31  vf[0][1] = '-';
32  BOOST_REQUIRE_EQUAL(Sequence::Alignment::IsAlignment(vf), true);
33  //pull the gaps out in a way that doesn't preserve the alignment
34  vf[0].seq.erase(std::remove(vf[0].seq.begin(), vf[0].seq.end(), '-'),
35  vf[0].seq.end());
36  BOOST_REQUIRE_EQUAL(Sequence::Alignment::IsAlignment(vf), false);
37 }
38 
39 BOOST_AUTO_TEST_CASE(IsAlignmentString)
40 {
41  std::vector<std::string> vf
42  = { std::string("ATG"), std::string("AGC"), std::string("GTC"),
43  std::string("ATT"), std::string("AAC") };
44 
45  BOOST_REQUIRE_EQUAL(Sequence::Alignment::IsAlignment(vf), true);
46  vf[0][1] = '-';
47  BOOST_REQUIRE_EQUAL(Sequence::Alignment::IsAlignment(vf), true);
48  //pull the gaps out in a way that doesn't preserve the alignment
49  vf[0].erase(std::remove(vf[0].begin(), vf[0].end(), '-'), vf[0].end());
50  BOOST_REQUIRE_EQUAL(Sequence::Alignment::IsAlignment(vf), false);
51 }
52 
53 BOOST_AUTO_TEST_CASE(GetDataFasta)
54 {
55  const char* fn = "GetDataText.txt";
56 
57  std::vector<Sequence::Fasta> vf
58  = { Sequence::Fasta("seq1", "ATG"), Sequence::Fasta("seq2", "ATC") },
59  vf2;
60 
61  BOOST_REQUIRE_NO_THROW(
62  std::ofstream o(fn);
63  std::copy(vf.begin(), vf.end(),
64  std::ostream_iterator<Sequence::Fasta>(o, "\n"));
65  o.close();
66 
67  Sequence::Alignment::GetData(vf2, fn); BOOST_REQUIRE(vf == vf2);
68  BOOST_REQUIRE(Sequence::Alignment::IsAlignment(vf2)); unlink(fn););
69 }
70 
71 BOOST_AUTO_TEST_CASE(GetDataFastaStream)
72 {
73  const char* fn = "GetDataText.txt";
74 
75  std::vector<Sequence::Fasta> vf
76  = { Sequence::Fasta("seq1", "ATG"), Sequence::Fasta("seq2", "ATC") },
77  vf2;
78 
79  BOOST_REQUIRE_NO_THROW(
80  std::ofstream o(fn);
81  std::copy(vf.begin(), vf.end(),
82  std::ostream_iterator<Sequence::Fasta>(o, "\n"));
83  o.close();
84 
85  std::ifstream in(fn); Sequence::Alignment::GetData(vf2, in);
86  in.close(); BOOST_REQUIRE(vf == vf2);
87  BOOST_REQUIRE(Sequence::Alignment::IsAlignment(vf2)); unlink(fn););
88 }
89 
90 BOOST_AUTO_TEST_CASE(GetDataString)
91 {
92  const char* fn = "GetDataText.txt";
93 
94  std::vector<std::string> vf = { "ATG", "ATC" }, vf2;
95 
96  BOOST_REQUIRE_NO_THROW(
97  std::ofstream o(fn); std::copy(
98  vf.begin(), vf.end(), std::ostream_iterator<std::string>(o, "\n"));
99  o.close(); Sequence::Alignment::GetData(vf2, fn);
100  BOOST_REQUIRE(vf == vf2);
101  BOOST_REQUIRE(Sequence::Alignment::IsAlignment(vf2)); unlink(fn););
102 }
103 
104 BOOST_AUTO_TEST_CASE(GetDataStringStream)
105 {
106  const char* fn = "GetDataText.txt";
107 
108  std::vector<std::string> vf = { "ATG", "ATC" }, vf2;
109 
110  BOOST_REQUIRE_NO_THROW(
111  std::ofstream o(fn); std::copy(
112  vf.begin(), vf.end(), std::ostream_iterator<std::string>(o, "\n"));
113  o.close(); std::ifstream in(fn); Sequence::Alignment::GetData(vf2, in);
114  in.close(); BOOST_REQUIRE(vf == vf2);
115  BOOST_REQUIRE(Sequence::Alignment::IsAlignment(vf2)); unlink(fn););
116 }
117 
118 BOOST_AUTO_TEST_CASE(ReadNFasta)
119 {
120  const char* fn = "GetDataText.txt";
121 
122  std::vector<Sequence::Fasta> vf
123  = { Sequence::Fasta("seq1", "ATG"), Sequence::Fasta("seq2", "AGC"),
124  Sequence::Fasta("seq3", "GTC"), Sequence::Fasta("seq4", "ATT"),
125  Sequence::Fasta("seq5", "AAC") },
126  vf2;
127 
128  BOOST_REQUIRE_NO_THROW(
129  std::ofstream o(fn);
130  std::copy(vf.begin(), vf.end(),
131  std::ostream_iterator<Sequence::Fasta>(o, "\n"));
132  o.close();
133 
134  for (decltype(vf.size()) n = 0; n < vf.size(); ++n) {
135  vf2.clear();
136  std::ifstream in(fn);
137  Sequence::Alignment::ReadNObjects(vf2, n, in);
138  BOOST_REQUIRE_EQUAL(vf2.size(), n);
139  } unlink(fn););
140 
141  //What happens if you try to read too much in?
142  std::ofstream o(fn);
143  std::copy(vf.begin(), vf.end(),
144  std::ostream_iterator<Sequence::Fasta>(o, "\n"));
145  o.close();
146  vf2.clear();
147  std::ifstream in(fn);
148  BOOST_CHECK_NO_THROW(
149  Sequence::Alignment::ReadNObjects(vf2, vf.size() + 1, in));
150  BOOST_CHECK_EQUAL(vf2.size(), vf.size());
151  in.close();
152  unlink(fn);
153 }
154 
155 BOOST_AUTO_TEST_CASE(ReadNString)
156 {
157  const char* fn = "GetDataText.txt";
158 
159  std::vector<std::string> vf
160  = { std::string("ATG"), std::string("AGC"), std::string("GTC"),
161  std::string("ATT"), std::string("AAC") },
162  vf2;
163 
164  BOOST_REQUIRE_NO_THROW(
165  std::ofstream o(fn); std::copy(
166  vf.begin(), vf.end(), std::ostream_iterator<std::string>(o, "\n"));
167  o.close();
168 
169  for (decltype(vf.size()) n = 0; n < vf.size(); ++n) {
170  vf2.clear();
171  std::ifstream in(fn);
172  Sequence::Alignment::ReadNObjects(vf2, n, in);
173  BOOST_REQUIRE_EQUAL(vf2.size(), n);
174  } unlink(fn););
175 
176  //What happens if you try to read too much in?
177  std::ofstream o(fn);
178  std::copy(vf.begin(), vf.end(),
179  std::ostream_iterator<std::string>(o, "\n"));
180  o.close();
181  vf2.clear();
182  std::ifstream in(fn);
183  BOOST_CHECK_NO_THROW(
184  Sequence::Alignment::ReadNObjects(vf2, vf.size() + 1, in));
185  BOOST_CHECK_EQUAL(vf2.size(), vf.size());
186  in.close();
187  unlink(fn);
188 }
189 
190 BOOST_AUTO_TEST_CASE(GappedFasta)
191 {
192  std::vector<Sequence::Fasta> vf
193  = { Sequence::Fasta("seq1", "ATG"), Sequence::Fasta("seq2", "AGC"),
194  Sequence::Fasta("seq3", "GTC"), Sequence::Fasta("seq4", "ATT"),
195  Sequence::Fasta("seq5", "AAC") };
196 
197  BOOST_REQUIRE_EQUAL(Sequence::Alignment::Gapped(vf), false);
198  vf[0][1] = '-';
199  BOOST_REQUIRE_EQUAL(Sequence::Alignment::Gapped(vf), true);
200 }
201 
202 BOOST_AUTO_TEST_CASE(GappedString)
203 {
204  std::vector<std::string> vf
205  = { std::string("ATG"), std::string("AGC"), std::string("GTC"),
206  std::string("ATT"), std::string("AAC") };
207 
208  BOOST_REQUIRE_EQUAL(Sequence::Alignment::Gapped(vf), false);
209  vf[0][1] = '-';
210  BOOST_REQUIRE_EQUAL(Sequence::Alignment::Gapped(vf), true);
211 }
212 
213 BOOST_AUTO_TEST_CASE(UnGappedLengthFasta)
214 {
215  std::vector<Sequence::Fasta> vf
216  = { Sequence::Fasta("seq1", "ATG"), Sequence::Fasta("seq2", "AGC"),
217  Sequence::Fasta("seq3", "GTC"), Sequence::Fasta("seq4", "ATT"),
218  Sequence::Fasta("seq5", "AAC") };
219 
220  BOOST_REQUIRE_EQUAL(Sequence::Alignment::UnGappedLength(vf), 3);
221  vf[0][1] = '-';
222  BOOST_REQUIRE_EQUAL(Sequence::Alignment::UnGappedLength(vf), 2);
223 }
224 
225 BOOST_AUTO_TEST_CASE(UnGappedLengthString)
226 {
227  std::vector<std::string> vf
228  = { std::string("ATG"), std::string("AGC"), std::string("GTC"),
229  std::string("ATT"), std::string("AAC") };
230 
231  BOOST_REQUIRE_EQUAL(Sequence::Alignment::UnGappedLength(vf), 3);
232  vf[0][1] = '-';
233  BOOST_REQUIRE_EQUAL(Sequence::Alignment::UnGappedLength(vf), 2);
234 }
235 
236 BOOST_AUTO_TEST_CASE(RemoveGapsFasta)
237 {
238  std::vector<Sequence::Fasta> vf
239  = { Sequence::Fasta("seq1", "ATG"), Sequence::Fasta("seq2", "AGC"),
240  Sequence::Fasta("seq3", "GTC"), Sequence::Fasta("seq4", "ATT"),
241  Sequence::Fasta("seq5", "AAC") };
242  auto vf2 = vf;
243 
244  vf2[0][1] = '-';
245  BOOST_REQUIRE_EQUAL(Sequence::Alignment::UnGappedLength(vf2), 2);
247  BOOST_REQUIRE_EQUAL(Sequence::Alignment::UnGappedLength(vf2), 2);
248  BOOST_REQUIRE_EQUAL(Sequence::Alignment::UnGappedLength(vf2),
249  vf2[0].length());
250 
251  for (decltype(vf.size()) i = 0; i < vf.size(); ++i)
252  {
253  //Base 0 should be the same in orig data and copy
254  BOOST_REQUIRE_EQUAL(vf[i][0], vf2[i][0]);
255  //The gap removal will cause a shift, hence 2 should == 1.
256  BOOST_REQUIRE_EQUAL(vf[i][2], vf2[i][1]);
257  }
258 }
259 
260 BOOST_AUTO_TEST_CASE(RemoveGapsString)
261 {
262  std::vector<std::string> vf
263  = { std::string("ATG"), std::string("AGC"), std::string("GTC"),
264  std::string("ATT"), std::string("AAC") };
265  auto vf2 = vf;
266 
267  vf2[0][1] = '-';
268  BOOST_REQUIRE_EQUAL(Sequence::Alignment::UnGappedLength(vf2), 2);
270  BOOST_REQUIRE_EQUAL(Sequence::Alignment::UnGappedLength(vf2), 2);
271  BOOST_REQUIRE_EQUAL(Sequence::Alignment::UnGappedLength(vf2),
272  vf2[0].length());
273 
274  for (decltype(vf.size()) i = 0; i < vf.size(); ++i)
275  {
276  //Base 0 should be the same in orig data and copy
277  BOOST_REQUIRE_EQUAL(vf[i][0], vf2[i][0]);
278  //The gap removal will cause a shift, hence 2 should == 1.
279  BOOST_REQUIRE_EQUAL(vf[i][2], vf2[i][1]);
280  }
281 }
282 
283 BOOST_AUTO_TEST_CASE(RemoveTerminalGapsFasta)
284 {
285  std::vector<Sequence::Fasta> vf
286  = { Sequence::Fasta("seq1", "A-TG"), Sequence::Fasta("seq2", "ACGC"),
287  Sequence::Fasta("seq3", "GCTC"), Sequence::Fasta("seq4", "ACTT"),
288  Sequence::Fasta("seq5", "ACA-") };
289  auto vf2 = vf;
290 
291  BOOST_REQUIRE_EQUAL(Sequence::Alignment::UnGappedLength(vf2), 2);
293  BOOST_REQUIRE_EQUAL(Sequence::Alignment::UnGappedLength(vf2), 2);
294 
295  //now, vf and vf2 must be the same for the name 3 positions:
296  for (decltype(vf.size()) i = 0; i < vf.size(); ++i)
297  {
298  BOOST_REQUIRE_EQUAL(vf[i].substr(0, 3), vf2[i].seq);
299  //While we're at it: let's test the explicit type case of a Sequence::Seq to std::string
300  BOOST_REQUIRE_EQUAL(vf[i].substr(0, 3), std::string(vf2[i]));
301  }
302 }
303 
304 BOOST_AUTO_TEST_CASE(RemoveTerminalGapString)
305 {
306  std::vector<std::string> vf
307  = { std::string("A-TG"), std::string("ACGC"), std::string("GCTC"),
308  std::string("ACTT"), std::string("ACA-") };
309  auto vf2 = vf;
310 
311  BOOST_REQUIRE_EQUAL(Sequence::Alignment::UnGappedLength(vf2), 2);
313  BOOST_REQUIRE_EQUAL(Sequence::Alignment::UnGappedLength(vf2), 2);
314 
315  //now, vf and vf2 must be the same for the name 3 positions:
316  for (decltype(vf.size()) i = 0; i < vf.size(); ++i)
317  {
318  BOOST_REQUIRE_EQUAL(vf[i].substr(0, 3), vf2[i]);
319  }
320 }
321 
322 BOOST_AUTO_TEST_CASE(TrimFasta)
323 {
324  std::vector<Sequence::Fasta> vf
325  = { Sequence::Fasta("seq1", "A-TG"), Sequence::Fasta("seq2", "ACGC"),
326  Sequence::Fasta("seq3", "GCTC"), Sequence::Fasta("seq4", "ACTT"),
327  Sequence::Fasta("seq5", "ACA-") };
328 
329  BOOST_REQUIRE_EQUAL(Sequence::Alignment::UnGappedLength(vf), 2);
330  //We will extract sites 2 and 3 from the alignment using Trim:
331  auto vf2 = Sequence::Alignment::Trim(vf, std::vector<int>{ 1, 2 });
332  BOOST_REQUIRE_EQUAL(Sequence::Alignment::UnGappedLength(vf2), 1);
333  //now, vf and vf2 must be the same for the middle 2 positions of vf:
334  for (decltype(vf.size()) i = 0; i < vf.size(); ++i)
335  {
336  BOOST_REQUIRE_EQUAL(vf[i].seq.substr(1, 2), vf2[i].seq);
337  }
338 }
339 
340 BOOST_AUTO_TEST_CASE(TrimString)
341 {
342  std::vector<std::string> vf
343  = { std::string("A-TG"), std::string("ACGC"), std::string("GCTC"),
344  std::string("ACTT"), std::string("ACA-") };
345 
346  BOOST_REQUIRE_EQUAL(Sequence::Alignment::UnGappedLength(vf), 2);
347  //We will extract sites 2 and 3 from the alignment using Trim:
348  auto vf2 = Sequence::Alignment::Trim(vf, std::vector<int>{ 1, 2 });
349  BOOST_REQUIRE_EQUAL(Sequence::Alignment::UnGappedLength(vf2), 1);
350  //now, vf and vf2 must be the same for the middle 2 positions of vf:
351  for (decltype(vf.size()) i = 0; i < vf.size(); ++i)
352  {
353  BOOST_REQUIRE_EQUAL(vf[i].substr(1, 2), vf2[i]);
354  }
355 }
356 
357 BOOST_AUTO_TEST_CASE(TrimComplementFasta)
358 {
359  std::vector<Sequence::Fasta> vf
360  = { Sequence::Fasta("seq1", "A-TG"), Sequence::Fasta("seq2", "ACGC"),
361  Sequence::Fasta("seq3", "GCTC"), Sequence::Fasta("seq4", "ACTT"),
362  Sequence::Fasta("seq5", "ACA-") };
363 
364  BOOST_REQUIRE_EQUAL(Sequence::Alignment::UnGappedLength(vf), 2);
365  //We will remove sites 2 and 3 from the alignment using Trim:
366  auto vf2
367  = Sequence::Alignment::TrimComplement(vf, std::vector<int>{ 1, 2 });
368  BOOST_REQUIRE_EQUAL(Sequence::Alignment::UnGappedLength(vf2), 1);
369  //now, vf and vf2 must be the same for the name and last positions of vf
370  for (decltype(vf.size()) i = 0; i < vf.size(); ++i)
371  {
372  BOOST_REQUIRE_EQUAL(vf[i][0], vf2[i][0]);
373  BOOST_REQUIRE_EQUAL(vf[i][3], vf2[i][1]);
374  }
375 }
376 
377 BOOST_AUTO_TEST_CASE(TrimComplementString)
378 {
379  std::vector<std::string> vf
380  = { std::string("A-TG"), std::string("ACGC"), std::string("GCTC"),
381  std::string("ACTT"), std::string("ACA-") };
382 
383  BOOST_REQUIRE_EQUAL(Sequence::Alignment::UnGappedLength(vf), 2);
384  //We will remove sites 2 and 3 from the alignment using Trim:
385  auto vf2
386  = Sequence::Alignment::TrimComplement(vf, std::vector<int>{ 1, 2 });
387  BOOST_REQUIRE_EQUAL(Sequence::Alignment::UnGappedLength(vf2), 1);
388  //now, vf and vf2 must be the same for the name and last positions of vf
389  for (decltype(vf.size()) i = 0; i < vf.size(); ++i)
390  {
391  BOOST_REQUIRE_EQUAL(vf[i][0], vf2[i][0]);
392  BOOST_REQUIRE_EQUAL(vf[i][3], vf2[i][1]);
393  }
394 }
395 
396 BOOST_AUTO_TEST_CASE(TrimComplementSameResults)
397 {
398  std::vector<Sequence::Fasta> vf
399  = { Sequence::Fasta("seq1", "A-TG"), Sequence::Fasta("seq2", "ACGC"),
400  Sequence::Fasta("seq3", "GCTC"), Sequence::Fasta("seq4", "ACTT"),
401  Sequence::Fasta("seq5", "ACA-") };
402  std::vector<std::string> vs;
403  //The magic below works b/c if implicit conversion of Sequence::Seq to std::string
404  std::copy(vf.begin(), vf.end(), std::back_inserter(vs));
405 
406  auto vf2
407  = Sequence::Alignment::TrimComplement(vf, std::vector<int>{ 1, 2 });
408  auto vs2
409  = Sequence::Alignment::TrimComplement(vs, std::vector<int>{ 1, 2 });
410 
411  for (decltype(vf2.size()) i = 0; i < vf2.size(); ++i)
412  {
413  BOOST_REQUIRE_EQUAL(std::string(vf2[i]), vs2[i]);
414  BOOST_REQUIRE_EQUAL(vf2[i].seq, vs2[i]);
415  }
416 }
417 BOOST_AUTO_TEST_SUITE_END()
void RemoveTerminalGaps(std::vector< std::string > &data)
FASTA sequence stream.
Definition: Fasta.hpp:49
std::vector< std::string > TrimComplement(const std::vector< std::string > &data, const std::vector< int > &sites)
Declaration of namespace Sequence::Alignment.
void RemoveGaps(std::vector< std::string > &data)
std::vector< std::string > Trim(const std::vector< std::string > &data, const std::vector< int > &sites)
Declaration of Sequence::Fasta streams.