Skip to content

Commit a94ac99

Browse files
authored
Merge pull request #11 from h-2/wip
[feature] seq_io::reader and lots of required code
2 parents 3b2e3be + 29d46d4 commit a94ac99

26 files changed

+1650
-53
lines changed
Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
// -----------------------------------------------------------------------------------------------------
2+
// Copyright (c) 2006-2020, Knut Reinert & Freie Universität Berlin
3+
// Copyright (c) 2016-2020, Knut Reinert & MPI für molekulare Genetik
4+
// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5+
// shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6+
// -----------------------------------------------------------------------------------------------------
7+
8+
/*!\file
9+
* \brief Provides the seqan3::detail::in_file_iterator class template.
10+
* \author Hannes Hauswedell <hannes.hauswedell AT fu-berlin.de>
11+
*/
12+
13+
#pragma once
14+
15+
#include <cassert>
16+
#include <ranges>
17+
18+
#include <bio/platform.hpp>
19+
20+
namespace bio::detail
21+
{
22+
23+
/*!\brief Input iterator that provides a range-like interface for readers.
24+
* \tparam file_type The data structure on which the iterator operates.
25+
* \implements std::input_Iterator
26+
* \ingroup bio
27+
*
28+
* This iterator is a single-pass input iterator for input files. All member types are resolved
29+
* via `file_type`'s member types, dereference is implemented via file's `front()` member
30+
* function, and increment calls the `buffer_next_record()` member of file.
31+
*
32+
* Note that since this is a single-pass input iterator, post-increment returns void because
33+
* previous iterators are always invalid (all iterators point to the current position in single-pass
34+
* ranges).
35+
*
36+
* This iterator may be compared against std::default_sentinel_t, this check delegates to
37+
* calling the `eof()` member function on the file's stream.
38+
*/
39+
template <typename file_type>
40+
class in_file_iterator
41+
{
42+
static_assert(!std::is_const_v<file_type>,
43+
"You cannot iterate over const files, because the iterator changes the file.");
44+
45+
public:
46+
/*!\name Member types
47+
* \brief The associated types are derived from the `file_type`.
48+
* \{
49+
*/
50+
51+
//!\brief The value type.
52+
using value_type = typename file_type::record_type;
53+
//!\brief The reference type.
54+
using reference = typename file_type::record_type &;
55+
//!\brief The const reference type.
56+
using const_reference = typename file_type::record_type &;
57+
//!\brief The size type.
58+
using size_type = size_t;
59+
//!\brief The difference type. A signed integer type, usually std::ptrdiff_t.
60+
using difference_type = ptrdiff_t;
61+
//!\brief The pointer type.
62+
using pointer = typename file_type::record_type *;
63+
//!\brief Tag this class as an input iterator.
64+
using iterator_category = std::input_iterator_tag;
65+
//!\}
66+
67+
/*!\name Constructors, destructor and assignment.
68+
* \{
69+
*/
70+
in_file_iterator() = default; //!< Defaulted.
71+
in_file_iterator(in_file_iterator const &) = default; //!< Defaulted.
72+
in_file_iterator(in_file_iterator &&) = default; //!< Defaulted.
73+
~in_file_iterator() = default; //!< Defaulted.
74+
in_file_iterator & operator=(in_file_iterator const &) = default; //!< Defaulted.
75+
in_file_iterator & operator=(in_file_iterator &&) = default; //!< Defaulted.
76+
77+
//!\brief Construct with reference to host.
78+
in_file_iterator(file_type & _host) noexcept : host{&_host} {}
79+
//!\}
80+
81+
/*!\name Iterator operations
82+
* \{
83+
*/
84+
//!\brief Move to the next record in the file and return a reference to it.
85+
in_file_iterator & operator++()
86+
{
87+
assert(host != nullptr);
88+
host->read_next_record();
89+
return *this;
90+
}
91+
92+
//!\brief Post-increment is the same as pre-increment, but returns void.
93+
void operator++(int)
94+
{
95+
assert(host != nullptr);
96+
++(*this);
97+
}
98+
99+
//!\brief Dereference returns the currently buffered record.
100+
reference operator*() noexcept
101+
{
102+
assert(host != nullptr);
103+
return host->record_buffer;
104+
}
105+
106+
//!\brief Dereference returns the currently buffered record.
107+
reference operator*() const noexcept
108+
{
109+
assert(host != nullptr);
110+
return host->record_buffer;
111+
}
112+
113+
//!\brief Dereference returns the currently buffered record.
114+
value_type * operator->() noexcept
115+
{
116+
assert(host != nullptr);
117+
return &host->record_buffer;
118+
}
119+
120+
//!\brief Dereference returns the currently buffered record.
121+
value_type const * operator->() const noexcept
122+
{
123+
assert(host != nullptr);
124+
return &host->record_buffer;
125+
}
126+
127+
//!\}
128+
129+
/*!\name Comparison operators
130+
* \brief Only (in-)equality comparison of iterator with end() is supported.
131+
* \{
132+
*/
133+
134+
//!\brief Checks whether `*this` is equal to the sentinel.
135+
constexpr bool operator==(std::default_sentinel_t const &) const noexcept
136+
{
137+
assert(host != nullptr);
138+
return host->at_end;
139+
}
140+
141+
//!\brief Checks whether `*this` is not equal to the sentinel.
142+
constexpr bool operator!=(std::default_sentinel_t const &) const noexcept
143+
{
144+
assert(host != nullptr);
145+
return !host->at_end;
146+
}
147+
148+
//!\brief Checks whether `it` is equal to the sentinel.
149+
constexpr friend bool operator==(std::default_sentinel_t const &, in_file_iterator const & it) noexcept
150+
{
151+
return (it == std::default_sentinel);
152+
}
153+
154+
//!\brief Checks whether `it` is not equal to the sentinel.
155+
constexpr friend bool operator!=(std::default_sentinel_t const &, in_file_iterator const & it) noexcept
156+
{
157+
return (it != std::default_sentinel);
158+
}
159+
//!\}
160+
161+
private:
162+
//!\brief Pointer to file host.
163+
file_type * host{};
164+
};
165+
166+
} // namespace bio::detail

include/bio/detail/misc.hpp

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
// -----------------------------------------------------------------------------------------------------
2+
// Copyright (c) 2006-2021, Knut Reinert & Freie Universität Berlin
3+
// Copyright (c) 2016-2021, Knut Reinert & MPI für molekulare Genetik
4+
// Copyright (c) 2020-2021, deCODE Genetics
5+
// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
6+
// shipped with this file and also available at: https://github.com/seqan/bio/blob/master/LICENSE.md
7+
// -----------------------------------------------------------------------------------------------------
8+
9+
/*!\file
10+
* \brief Provides miscellaneous utilities.
11+
* \author Hannes Hauswedell <hannes.hauswedell AT decode.is>
12+
*/
13+
14+
#pragma once
15+
16+
#include <algorithm>
17+
#include <concepts>
18+
#include <filesystem>
19+
#include <ranges>
20+
#include <string>
21+
22+
#include <seqan3/core/detail/template_inspection.hpp>
23+
#include <seqan3/utility/type_list/detail/type_list_algorithm.hpp>
24+
#include <seqan3/utility/type_list/type_list.hpp>
25+
26+
#include <bio/exception.hpp>
27+
28+
namespace bio::detail
29+
{
30+
31+
/*!\addtogroup bio
32+
* \{
33+
*/
34+
35+
/*!\brief Sets the file format according to the file name extension.
36+
* \param[out] format The format to set.
37+
* \param[in] file_name The file name to extract the extension from.
38+
*
39+
* \throws seqan3::unhandled_extension_error If the extension in file_name does
40+
* not occur in any valid extensions of the formats specified in the
41+
* \p format_variant_type template argument list.
42+
*/
43+
void set_format(auto & format, std::filesystem::path const & file_name)
44+
{
45+
using format_variant_type = std::remove_cvref_t<decltype(format)>;
46+
using valid_formats = seqan3::detail::transfer_template_args_onto_t<format_variant_type, seqan3::type_list>;
47+
48+
bool format_found = false;
49+
std::string extension = file_name.extension().string();
50+
if (extension.size() > 1)
51+
{
52+
extension = extension.substr(1); // drop leading "."
53+
seqan3::detail::for_each<valid_formats>(
54+
[&](auto fmt)
55+
{
56+
using fm_type = typename decltype(fmt)::type; // remove type_identity wrapper
57+
58+
for (auto const & ext : fm_type::file_extensions)
59+
{
60+
if (std::ranges::equal(ext, extension))
61+
{
62+
format.template emplace<fm_type>();
63+
format_found = true;
64+
return;
65+
}
66+
}
67+
});
68+
}
69+
70+
if (!format_found)
71+
throw unhandled_extension_error("No valid format found for this extension.");
72+
}
73+
74+
//!\}
75+
76+
} // namespace bio::detail

include/bio/detail/range.hpp

Lines changed: 56 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,10 @@
1313

1414
#pragma once
1515

16+
#include <ranges>
17+
#include <span>
18+
1619
#include <seqan3/alphabet/concept.hpp>
17-
#include <seqan3/utility/concept/container.hpp>
1820

1921
#include <bio/platform.hpp>
2022

@@ -25,6 +27,57 @@ namespace bio::detail
2527
* \{
2628
*/
2729

30+
// ----------------------------------------------------------------------------
31+
// concepts
32+
// ----------------------------------------------------------------------------
33+
34+
/*!\interface bio::detail::back_insertable_with <>
35+
* \extends std::ranges::output_range
36+
* \tparam rng_t The container type.
37+
* \tparam val_t The type to append to the container.
38+
* \brief Describes range types that can grow in amortised constant time by appending an element of type val_t.
39+
*/
40+
//!\cond
41+
template <typename rng_t, typename val_t>
42+
concept back_insertable_with = std::ranges::output_range<rng_t, val_t> && requires(rng_t & v)
43+
{
44+
v.push_back(std::declval<val_t>());
45+
};
46+
//!\endcond
47+
48+
/*!\interface bio::detail::back_insertable <>
49+
* \extends std::ranges::output_range
50+
* \extends std::ranges::input_range
51+
* \tparam rng_t The container type.
52+
* \brief Describes range types that can grow in amortised constant time by appending an element.
53+
*/
54+
//!\cond
55+
template <typename rng_t>
56+
concept back_insertable =
57+
std::ranges::input_range<rng_t> && back_insertable_with<rng_t, std::ranges::range_reference_t<rng_t>>;
58+
//!\endcond
59+
60+
//!\brief A seqan3::alphabet that is **not** a character or number (any std::integral).
61+
template <typename t>
62+
concept deliberate_alphabet = seqan3::alphabet<t> && !std::integral<std::remove_cvref_t<t>>;
63+
64+
//!\brief A range whose value type is `char`.
65+
template <typename t>
66+
concept char_range = std::ranges::range<t> && std::same_as<char, std::remove_cvref_t<std::ranges::range_value_t<t>>>;
67+
68+
//!\brief A range whose value type is an integral type other than `char`.
69+
template <typename t>
70+
concept int_range = std::ranges::range<t> && std::integral<std::remove_cvref_t<std::ranges::range_value_t<t>>> &&
71+
!std::same_as<char, std::remove_cvref_t<std::ranges::range_value_t<t>>>;
72+
73+
//!\brief A type that is not std::span<std::byte const>.
74+
template <typename t>
75+
concept not_a_byte_span = !std::same_as<t, std::span<std::byte const>>;
76+
77+
// ----------------------------------------------------------------------------
78+
// copy functions
79+
// ----------------------------------------------------------------------------
80+
2881
/*!\brief Copy elements from the first range into the second range.
2982
* \param[in] in The range to copy from.
3083
* \param[out] out The range to copy to.
@@ -36,8 +89,8 @@ namespace bio::detail
3689
* If the input range is sized and the target range offers a `.resize()` member, this function uses
3790
* resize and assignment instead of back-insertion.
3891
*/
39-
void sized_range_copy(std::ranges::input_range auto && in,
40-
seqan3::back_insertable_with<std::ranges::range_reference_t<decltype(in)>> auto && out)
92+
void sized_range_copy(std::ranges::input_range auto && in,
93+
back_insertable_with<std::ranges::range_reference_t<decltype(in)>> auto && out)
4194
{
4295
using in_t = decltype(in);
4396
using out_t = decltype(out);
@@ -64,23 +117,6 @@ void string_copy(std::string_view const in, auto & out)
64117
sized_range_copy(in, out);
65118
}
66119

67-
//!\brief A seqan3::alphabet that is **not** a character or number (any std::integral).
68-
template <typename t>
69-
concept deliberate_alphabet = seqan3::alphabet<t> && !std::integral<std::remove_cvref_t<t>>;
70-
71-
//!\brief A range whose value type is `char`.
72-
template <typename t>
73-
concept char_range = std::ranges::range<t> && std::same_as<char, std::remove_cvref_t<std::ranges::range_value_t<t>>>;
74-
75-
//!\brief A range whose value type is an integral type other than `char`.
76-
template <typename t>
77-
concept int_range = std::ranges::range<t> && std::integral<std::remove_cvref_t<std::ranges::range_value_t<t>>> &&
78-
!std::same_as<char, std::remove_cvref_t<std::ranges::range_value_t<t>>>;
79-
80-
//!\brief A type that is not std::span<std::byte const>.
81-
template <typename t>
82-
concept not_a_byte_span = !std::same_as<t, std::span<std::byte const>>;
83-
84120
//!\}
85121

86122
} // namespace bio::detail

0 commit comments

Comments
 (0)