Skip to content

Commit b1ca0eb

Browse files
committed
[test] add map_io_reader_test.cpp
1 parent a9402fc commit b1ca0eb

File tree

4 files changed

+321
-0
lines changed

4 files changed

+321
-0
lines changed

include/bio/map_io/reader_options.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include <vector>
1919

2020
#include <seqan3/alphabet/nucleotide/dna5.hpp>
21+
#include <seqan3/alphabet/quality/phred42.hpp>
2122
#include <seqan3/alphabet/views/char_to.hpp>
2223
#include <seqan3/utility/type_list/traits.hpp>
2324

test/unit/map_io/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
11
bio_test(header_test.cpp)
2+
bio_test(map_io_reader_test.cpp)

test/unit/map_io/data.hpp

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
// -----------------------------------------------------------------------------------------------------
2+
// Copyright (c) 2006-2020, Knut Reinert & Freie Universität Berlin
3+
// Copyright (c) 2016-2020, Knut Reinert & MPI für molekulare Genetik
4+
// Copyright (c) 2020-2021, deCODE Genetics
5+
// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
6+
// shipped with this file and also available at: https://github.com/seqan/b.i.o./blob/master/LICENSE
7+
// -----------------------------------------------------------------------------------------------------
8+
9+
#include <string_view>
10+
11+
inline constexpr std::string_view input =
12+
R"(@HD VN:1.6
13+
@SQ SN:ref LN:34
14+
read1 41 ref 1 61 1S1M1D1M1I ref 10 300 ACGT !##$ AS:i:2 NM:i:7
15+
read2 42 ref 2 62 1H7M1D1M1S2H ref 10 300 AGGCTGNAG !##$&'()* xy:B:S,3,4,5
16+
read3 43 ref 3 63 1S1M1P1M1I1M1I1D1M1S ref 10 300 GGAGTATA !!*+,-./
17+
)";
18+
19+
inline constexpr std::string_view input_bgzipped{
20+
"\x1f\x8b\x08\x04\x00\x00\x00\x00\x00\xff\x06\x00\x42\x43\x02\x00"
21+
"\xbc\x00\x55\x8e\xcb\x0a\xc2\x30\x14\x44\xd7\xd3\xbf\x28\x15\x1f"
22+
"\xb5\xd6\xde\x24\xa6\x90\x55\x63\x0b\xa9\x60\x83\x92\xe0\x5e\xb0"
23+
"\x82\xdb\xae\xf4\xef\x6d\xd3\x8d\x2e\x86\x0b\xc3\x3d\x87\xa9\xda"
24+
"\x06\x37\xab\x28\x97\x51\xe5\xae\x70\x56\x0d\xfd\x13\x67\xab\xb8"
25+
"\x88\x86\xfe\xfe\x20\x08\xc2\x54\x11\x24\x81\x1c\x75\xd4\x8c\x39"
26+
"\xcd\x5d\x01\x5e\x14\xd0\xb5\xf1\x88\x93\x64\x01\xed\xd4\x4b\x31"
27+
"\xd8\x6e\x3c\x65\xe0\x19\x04\x0b\xbf\x0c\x92\x81\xda\x72\xe6\x1d"
28+
"\x6b\xff\x0c\xc6\xd4\xde\x58\x6d\x82\x66\xb9\x5a\x6f\x52\xbc\x3f"
29+
"\xea\xa8\x5c\xc6\x33\x91\x1d\x82\x8a\x43\xf0\x00\x71\x48\x3e\x4f"
30+
"\xb9\x4c\x53\x42\x82\xf4\x57\x69\x8c\x36\x5e\x7b\x8d\x38\x4e\xb7"
31+
"\xd9\x2e\xdf\x47\x5f\x01\x82\x81\x27\xeb\x00\x00\x00\x1f\x8b\x08"
32+
"\x04\x00\x00\x00\x00\x00\xff\x06\x00\x42\x43\x02\x00\x1b\x00\x03"
33+
"\x00\x00\x00\x00\x00\x00\x00\x00\x00", 217};
Lines changed: 286 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,286 @@
1+
// -----------------------------------------------------------------------------------------------------
2+
// Copyright (c) 2006-2020, Knut Reinert & Freie Universität Berlin
3+
// Copyright (c) 2016-2020, Knut Reinert & MPI für molekulare Genetik
4+
// Copyright (c) 2020-2021, deCODE Genetics
5+
// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
6+
// shipped with this file and also available at: https://github.com/seqan/b.i.o./blob/master/LICENSE
7+
// -----------------------------------------------------------------------------------------------------
8+
9+
#include <algorithm>
10+
#include <sstream>
11+
12+
#include <gtest/gtest.h>
13+
14+
#include <seqan3/alphabet/nucleotide/dna5.hpp>
15+
#include <seqan3/test/expect_range_eq.hpp>
16+
#include <seqan3/test/expect_same_type.hpp>
17+
#include <seqan3/test/tmp_filename.hpp>
18+
19+
#include <bio/map_io/reader.hpp>
20+
21+
#include "data.hpp"
22+
23+
TEST(map_io_reader, concepts)
24+
{
25+
using t = bio::map_io::reader<>;
26+
EXPECT_TRUE((std::ranges::input_range<t>));
27+
28+
using ct = bio::map_io::reader<> const;
29+
// not const-iterable
30+
EXPECT_FALSE((std::ranges::input_range<ct>));
31+
}
32+
33+
void map_io_reader_filename_constructor(bool ext_check, auto &&... args)
34+
{
35+
/* just the filename */
36+
{
37+
seqan3::test::tmp_filename filename{"map_io_reader_constructor.sam"};
38+
std::ofstream filecreator{filename.get_path(), std::ios::out | std::ios::binary};
39+
40+
EXPECT_NO_THROW((bio::map_io::reader{filename.get_path(), std::forward<decltype(args)>(args)...}));
41+
}
42+
43+
// correct format check is done by tests of that format
44+
45+
/* non-existent file */
46+
{
47+
EXPECT_THROW((bio::map_io::reader{"/dev/nonexistant/foobarOOO", std::forward<decltype(args)>(args)...}),
48+
bio::file_open_error);
49+
}
50+
51+
/* wrong extension */
52+
if (ext_check)
53+
{
54+
seqan3::test::tmp_filename filename{"map_io_reader_constructor.xyz"};
55+
std::ofstream filecreator{filename.get_path(), std::ios::out | std::ios::binary};
56+
EXPECT_THROW((bio::map_io::reader{filename.get_path(), std::forward<decltype(args)>(args)...}),
57+
bio::unhandled_extension_error);
58+
}
59+
}
60+
61+
TEST(map_io_reader, constructor1_just_filename)
62+
{
63+
map_io_reader_filename_constructor(true);
64+
EXPECT_TRUE((std::same_as<decltype(bio::map_io::reader{""}), bio::map_io::reader<>>));
65+
}
66+
67+
TEST(map_io_reader, constructor1_with_opts)
68+
{
69+
bio::map_io::reader_options opt{.field_types = bio::map_io::field_types_sam<>};
70+
using control_t = bio::map_io::reader<std::remove_cvref_t<decltype(bio::map_io::default_field_ids)>,
71+
std::remove_cvref_t<decltype(bio::map_io::field_types_sam<>)>,
72+
seqan3::type_list<bio::sam>>;
73+
74+
map_io_reader_filename_constructor(true, std::move(opt));
75+
EXPECT_TRUE((std::same_as<decltype(bio::map_io::reader{"", opt}), control_t>));
76+
}
77+
78+
TEST(map_io_reader, constructor2_just_filename_direct_format)
79+
{
80+
map_io_reader_filename_constructor(false, bio::sam{});
81+
EXPECT_TRUE((std::same_as<decltype(bio::map_io::reader{"", bio::sam{}}), bio::map_io::reader<>>));
82+
}
83+
84+
TEST(map_io_reader, constructor2_with_opts_direct_format)
85+
{
86+
bio::map_io::reader_options opt{.field_types = bio::map_io::field_types_sam<>};
87+
using control_t = bio::map_io::reader<std::remove_cvref_t<decltype(bio::map_io::default_field_ids)>,
88+
std::remove_cvref_t<decltype(bio::map_io::field_types_sam<>)>,
89+
seqan3::type_list<bio::sam>>;
90+
91+
map_io_reader_filename_constructor(false, bio::sam{}, std::move(opt));
92+
EXPECT_TRUE((std::same_as<decltype(bio::map_io::reader{"", bio::sam{}, opt}), control_t>));
93+
}
94+
95+
TEST(map_io_reader, constructor2_just_filename_format_variant)
96+
{
97+
std::variant<bio::sam> var{};
98+
99+
map_io_reader_filename_constructor(false, var);
100+
EXPECT_TRUE((std::same_as<decltype(bio::map_io::reader{"", var}), bio::map_io::reader<>>));
101+
}
102+
103+
TEST(map_io_reader, constructor2_with_opts_format_variant)
104+
{
105+
std::variant<bio::sam> var{};
106+
bio::map_io::reader_options opt{.field_types = bio::map_io::field_types_sam<>};
107+
using control_t = bio::map_io::reader<std::remove_cvref_t<decltype(bio::map_io::default_field_ids)>,
108+
std::remove_cvref_t<decltype(bio::map_io::field_types_sam<>)>,
109+
seqan3::type_list<bio::sam>>;
110+
111+
map_io_reader_filename_constructor(false, var, std::move(opt));
112+
EXPECT_TRUE((std::same_as<decltype(bio::map_io::reader{"", var, std::move(opt)}), control_t>));
113+
}
114+
115+
TEST(map_io_reader, constructor3)
116+
{
117+
std::istringstream str;
118+
119+
EXPECT_NO_THROW((bio::map_io::reader{str, bio::sam{}}));
120+
EXPECT_TRUE((std::same_as<decltype(bio::map_io::reader{str, bio::sam{}}), bio::map_io::reader<>>));
121+
}
122+
123+
TEST(map_io_reader, constructor3_with_opts)
124+
{
125+
std::istringstream str;
126+
bio::map_io::reader_options opt{.field_types = bio::map_io::field_types_sam<>};
127+
using control_t = bio::map_io::reader<std::remove_cvref_t<decltype(bio::map_io::default_field_ids)>,
128+
std::remove_cvref_t<decltype(bio::map_io::field_types_sam<>)>,
129+
seqan3::type_list<bio::sam>>;
130+
131+
EXPECT_NO_THROW((bio::map_io::reader{str, bio::sam{}, opt}));
132+
EXPECT_TRUE((std::same_as<decltype(bio::map_io::reader{str, bio::sam{}, opt}), control_t>));
133+
}
134+
135+
TEST(map_io_reader, constructor4)
136+
{
137+
std::istringstream str;
138+
139+
EXPECT_NO_THROW((bio::map_io::reader{std::move(str), bio::sam{}}));
140+
EXPECT_TRUE((std::same_as<decltype(bio::map_io::reader{std::move(str), bio::sam{}}), bio::map_io::reader<>>));
141+
}
142+
143+
TEST(map_io_reader, constructor4_with_opts)
144+
{
145+
std::istringstream str;
146+
bio::map_io::reader_options opt{.field_types = bio::map_io::field_types_sam<>};
147+
using control_t = bio::map_io::reader<std::remove_cvref_t<decltype(bio::map_io::default_field_ids)>,
148+
std::remove_cvref_t<decltype(bio::map_io::field_types_sam<>)>,
149+
seqan3::type_list<bio::sam>>;
150+
151+
EXPECT_NO_THROW((bio::map_io::reader{std::move(str), bio::sam{}, opt}));
152+
EXPECT_TRUE((std::same_as<decltype(bio::map_io::reader{std::move(str), bio::sam{}, opt}), control_t>));
153+
}
154+
155+
TEST(map_io_reader, iteration)
156+
{
157+
{
158+
std::istringstream str{static_cast<std::string>(input)};
159+
bio::map_io::reader reader{str, bio::sam{}};
160+
161+
EXPECT_EQ(std::ranges::distance(reader), 3);
162+
}
163+
164+
{
165+
std::istringstream str{static_cast<std::string>(input)};
166+
bio::map_io::reader reader{str, bio::sam{}};
167+
168+
size_t count = 0;
169+
for (auto & rec : reader)
170+
{
171+
++count;
172+
EXPECT_TRUE(rec.id().starts_with("read"));
173+
// only very basic check here, rest in format test
174+
}
175+
EXPECT_EQ(count, 3);
176+
}
177+
}
178+
179+
TEST(map_io_reader, empty_file)
180+
{
181+
{
182+
seqan3::test::tmp_filename filename{"map_io_reader_constructor.sam"};
183+
std::ofstream filecreator{filename.get_path(), std::ios::out | std::ios::binary};
184+
185+
bio::map_io::reader reader{filename.get_path()};
186+
187+
EXPECT_THROW(reader.begin(), bio::file_open_error);
188+
}
189+
}
190+
191+
TEST(map_io_reader, empty_stream)
192+
{
193+
{
194+
std::istringstream str{""};
195+
bio::map_io::reader reader{str, bio::sam{}};
196+
197+
EXPECT_THROW(reader.begin(), bio::file_open_error);
198+
}
199+
}
200+
201+
// TEST(map_io_reader, custom_field_types)
202+
// {
203+
// bio::map_io::reader_options opt{.field_types = bio::map_io::field_types<bio::ownership::deep>};
204+
205+
// std::istringstream str{static_cast<std::string>(input)};
206+
// bio::map_io::reader reader{str, bio::sam{}, opt};
207+
208+
// EXPECT_TRUE((std::same_as<decltype(reader.front().seq()), std::vector<seqan3::dna5> &>));
209+
// EXPECT_TRUE((std::same_as<decltype(reader.front().id()), std::string &>));
210+
// }
211+
212+
TEST(map_io_reader, custom_field_ids_structured_bindings)
213+
{
214+
bio::map_io::reader_options opt{.field_ids = bio::vtag<bio::field::seq, bio::field::id>,
215+
.field_types = bio::ttag<std::string, std::string>};
216+
217+
std::istringstream str{static_cast<std::string>(input)};
218+
bio::map_io::reader reader{str, bio::sam{}, opt};
219+
220+
for (auto & [seq, id] : reader)
221+
EXPECT_TRUE(id.starts_with("read"));
222+
}
223+
224+
TEST(map_io_reader, decompression_filename)
225+
{
226+
seqan3::test::tmp_filename filename{"map_io_reader.sam.gz"};
227+
228+
{
229+
std::ofstream filecreator{filename.get_path(), std::ios::out | std::ios::binary};
230+
bio::detail::fast_ostreambuf_iterator it{filecreator};
231+
it.write_range(input_bgzipped);
232+
}
233+
234+
bio::map_io::reader reader{filename.get_path()};
235+
236+
size_t count = 0;
237+
for (auto & rec : reader)
238+
{
239+
++count;
240+
EXPECT_TRUE(rec.id().starts_with("read"));
241+
// only very basic check here, rest in format test
242+
}
243+
EXPECT_EQ(count, 3);
244+
}
245+
246+
TEST(map_io_reader, decompression_stream)
247+
{
248+
std::istringstream str{static_cast<std::string>(input_bgzipped)};
249+
250+
bio::map_io::reader reader{str, bio::sam{}};
251+
252+
size_t count = 0;
253+
for (auto & rec : reader)
254+
{
255+
++count;
256+
EXPECT_TRUE(rec.id().starts_with("read"));
257+
// only very basic check here, rest in format test
258+
}
259+
EXPECT_EQ(count, 3);
260+
}
261+
262+
TEST(map_io_reader, get_header)
263+
{
264+
// get header before calling begin()
265+
{
266+
std::istringstream str{static_cast<std::string>(input)};
267+
bio::map_io::reader reader{str, bio::sam{}};
268+
269+
bio::map_io::header const & hdr = reader.header();
270+
271+
EXPECT_EQ(hdr.format_version, "1.6");
272+
}
273+
274+
// get header after calling begin()
275+
{
276+
std::istringstream str{static_cast<std::string>(input)};
277+
bio::map_io::reader reader{str, bio::sam{}};
278+
279+
auto it = reader.begin();
280+
EXPECT_EQ(it->id(), "read1");
281+
282+
bio::map_io::header const & hdr = reader.header();
283+
284+
EXPECT_EQ(hdr.format_version, "1.6");
285+
}
286+
}

0 commit comments

Comments
 (0)