Skip to content

Commit 54b1649

Browse files
committed
Switch to BufferedWriter for write_rds() and related functions.
This eliminates the need for a separate buffer argument. We also allow users to control the buffer size and whether or not the write is parallelized.
1 parent 0f34da7 commit 54b1649

19 files changed

+355
-374
lines changed

examples/src/write_dgCMatrix.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,6 @@ int main () {
4747
auto factors = new rds2cpp::GenericVector;
4848
obj.attributes.values.emplace_back(factors);
4949

50-
rds2cpp::write_rds(file_info, "my_matrix.rds");
50+
rds2cpp::write_rds(file_info, "my_matrix.rds", {});
5151
return 0;
5252
}

include/rds2cpp/SharedParseInfo.hpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,7 @@ struct SharedParseInfo {
2929

3030
private:
3131
std::size_t compute_reference_index(const Header& header) const {
32-
// Shouldn't matter that we use a signed integer here,
33-
// as the left-shifts should never get to the sign bit.
32+
// Shouldn't matter that we use a signed integer here, as the left-shifts should never get to the sign bit.
3433
std::int32_t index = 0;
3534
for (int i = 0; i < 3; ++i) {
3635
index <<= 8;

include/rds2cpp/SharedWriteInfo.hpp

Lines changed: 46 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@ namespace rds2cpp {
1919

2020
struct SharedWriteInfo;
2121

22-
template<class Writer>
23-
void write_object(const RObject* object, Writer& writer, std::vector<unsigned char>& buffer, SharedWriteInfo& shared);
22+
template<class BufferedWriter_>
23+
void write_object(const RObject* object, BufferedWriter_& bufwriter, SharedWriteInfo& shared);
2424

2525
struct SharedWriteInfo {
2626
std::size_t reference_count;
@@ -52,34 +52,32 @@ struct SharedWriteInfo {
5252
{}
5353

5454
private:
55-
template<class Writer>
56-
static void write_reference(std::size_t ref, Writer& writer, std::vector<unsigned char>& buffer) {
57-
buffer.resize(4);
58-
buffer[2] = ref & 255;
59-
ref >>= 8;
60-
buffer[1] = ref & 255;
61-
ref >>= 8;
62-
buffer[0] = ref & 255;
63-
buffer[3] = static_cast<unsigned char>(SEXPType::REF);
64-
writer.write(buffer.data(), buffer.size());
65-
return;
55+
template<class BufferedWriter_>
56+
static void write_reference(std::size_t ref, BufferedWriter_& bufwriter) {
57+
Header details;
58+
59+
// Opposite of SharedParseInfo::compute_reference_index.
60+
for (int i = 0; i < 3; ++i) {
61+
details[2 - i] = ref & 255;
62+
ref >>= 8;
63+
}
64+
65+
details[3] = static_cast<unsigned char>(SEXPType::REF);
66+
bufwriter.write(details.data(), details.size());
6667
}
6768

6869
public:
69-
template<class Writer>
70-
std::size_t write_symbol(const std::string& value, StringEncoding encoding, Writer& writer, std::vector<unsigned char>& buffer) {
70+
template<class BufferedWriter_>
71+
std::size_t write_symbol(const std::string& value, StringEncoding encoding, BufferedWriter_& bufwriter) {
7172
auto& host = symbol_mappings[static_cast<int>(encoding)];
7273
auto it = host.find(value);
7374
if (it != host.end()) {
74-
write_reference(it->second, writer, buffer);
75+
write_reference(it->second, bufwriter);
7576
return it->second;
7677
}
7778

78-
buffer.clear();
79-
inject_header(SEXPType::SYM, buffer);
80-
writer.write(buffer.data(), buffer.size());
81-
82-
write_single_string(value, encoding, false, writer, buffer);
79+
inject_header(SEXPType::SYM, bufwriter);
80+
write_single_string(value, encoding, false, bufwriter);
8381

8482
const auto old_reference_count = reference_count;
8583
host[value] = reference_count;
@@ -88,26 +86,26 @@ struct SharedWriteInfo {
8886
return old_reference_count;
8987
}
9088

91-
template<class Writer>
92-
void write_symbol(const RObject* obj, Writer& writer, std::vector<unsigned char>& buffer) {
89+
template<class BufferedWriter_>
90+
void write_symbol(const RObject* obj, BufferedWriter_& bufwriter) {
9391
auto ptr = static_cast<const SymbolIndex*>(obj);
94-
auto index = ptr->index;
92+
const auto index = ptr->index;
9593
if (index >= known_symbol_mappings.size()) {
9694
throw std::runtime_error("symbol index out of range for supplied Symbol objects");
9795
}
9896

9997
auto& candidate = known_symbol_mappings[index];
10098
if (candidate != 0) {
101-
write_reference(candidate, writer, buffer);
99+
write_reference(candidate, bufwriter);
102100
} else {
103101
const auto& sym = (*known_symbols)[index];
104-
candidate = write_symbol(sym.name, sym.encoding, writer, buffer);
102+
candidate = write_symbol(sym.name, sym.encoding, bufwriter);
105103
}
106104
}
107105

108106
public:
109-
template<class Writer>
110-
void write_external_pointer(const RObject* obj, Writer& writer, std::vector<unsigned char>& buffer) {
107+
template<class BufferedWriter_>
108+
void write_external_pointer(const RObject* obj, BufferedWriter_& bufwriter) {
111109
auto ptr = static_cast<const ExternalPointerIndex*>(obj);
112110
auto index = ptr->index;
113111
if (index >= known_external_pointer_mappings.size()) {
@@ -116,36 +114,28 @@ struct SharedWriteInfo {
116114

117115
auto& candidate = known_external_pointer_mappings[index];
118116
if (candidate != 0) {
119-
write_reference(candidate, writer, buffer);
117+
write_reference(candidate, bufwriter);
120118
return;
121119
}
122120
candidate = reference_count;
123121
reference_count = sanisizer::sum<I<decltype(reference_count)> >(reference_count, 1); // safely incrementing this count.
124122

125123
const auto& ext = (*known_external_pointers)[index];
126-
127-
buffer.clear();
128-
inject_header(SEXPType::EXTPTR, ext.attributes, buffer);
129-
writer.write(buffer.data(), buffer.size());
130-
131-
write_object(ext.protection.get(), writer, buffer, *this);
132-
write_object(ext.tag.get(), writer, buffer, *this);
133-
write_attributes(ext.attributes, writer, buffer, *this);
134-
135-
return;
124+
inject_header(SEXPType::EXTPTR, ext.attributes, bufwriter);
125+
write_object(ext.protection.get(), bufwriter, *this);
126+
write_object(ext.tag.get(), bufwriter, *this);
127+
write_attributes(ext.attributes, bufwriter, *this);
136128
}
137129

138130
public:
139-
template<class Writer>
140-
void write_environment(const RObject* obj, Writer& writer, std::vector<unsigned char>& buffer) {
131+
template<class BufferedWriter_>
132+
void write_environment(const RObject* obj, BufferedWriter_& bufwriter) {
141133
auto ptr = static_cast<const EnvironmentIndex*>(obj);
142134
auto index = ptr->index;
143135
auto env_type = ptr->env_type;
144136

145137
if (env_type == SEXPType::GLOBALENV_ || env_type == SEXPType::BASEENV_ || env_type == SEXPType::EMPTYENV_) {
146-
buffer.clear();
147-
inject_header(env_type, buffer);
148-
writer.write(buffer.data(), buffer.size());
138+
inject_header(env_type, bufwriter);
149139
return;
150140
}
151141

@@ -155,25 +145,20 @@ struct SharedWriteInfo {
155145

156146
auto& candidate = known_environment_mappings[index];
157147
if (candidate != 0) {
158-
write_reference(candidate, writer, buffer);
148+
write_reference(candidate, bufwriter);
159149
return;
160150
}
161-
162151
candidate = reference_count++;
163-
const auto& env = (*known_environments)[index];
164152

165-
buffer.clear();
166-
inject_header(SEXPType::ENV, env.attributes, buffer);
167-
168-
buffer.insert(buffer.end(), 3, 0);
169-
buffer.push_back(env.locked);
170-
writer.write(buffer.data(), buffer.size());
153+
const auto& env = (*known_environments)[index];
154+
inject_header(SEXPType::ENV, env.attributes, bufwriter);
155+
inject_integer<std::int32_t, std::int32_t>(env.locked, bufwriter);
171156

172157
{
173158
EnvironmentIndex parent;
174159
parent.index = env.parent;
175160
parent.env_type = env.parent_type;
176-
write_environment(&parent, writer, buffer);
161+
write_environment(&parent, bufwriter);
177162
}
178163

179164
const auto& names = env.variable_names;
@@ -184,31 +169,22 @@ struct SharedWriteInfo {
184169
if (len) {
185170
// Creating a tagged pairlist per element.
186171
for (I<decltype(len)> i = 0; i < len; ++i) {
187-
buffer.clear();
188-
inject_next_pairlist_header(true, buffer);
189-
writer.write(buffer.data(), buffer.size());
190-
191-
write_symbol(names[i], encodings[i], writer, buffer);
192-
write_object(values[i].get(), writer, buffer, *this);
172+
inject_next_pairlist_header(true, bufwriter);
173+
write_symbol(names[i], encodings[i], bufwriter);
174+
write_object(values[i].get(), bufwriter, *this);
193175
}
194176
}
195177

196178
// Terminating the pairlist.
197-
buffer.clear();
198-
inject_header(SEXPType::NILVALUE_, buffer);
199-
writer.write(buffer.data(), buffer.size());
179+
inject_header(SEXPType::NILVALUE_, bufwriter);
200180

201181
// We're not saving a hash table, because I don't want to have to
202182
// reproduce R's environment hashing logic.
203-
buffer.clear();
204-
inject_header(SEXPType::NILVALUE_, buffer);
205-
writer.write(buffer.data(), buffer.size());
183+
inject_header(SEXPType::NILVALUE_, bufwriter);
206184

207-
if (!write_attributes(env.attributes, writer, buffer, *this)) {
185+
if (!write_attributes(env.attributes, bufwriter, *this)) {
208186
// Finishing with NULL if there aren't any attributes.
209-
buffer.clear();
210-
inject_header(SEXPType::NILVALUE_, buffer);
211-
writer.write(buffer.data(), buffer.size());
187+
inject_header(SEXPType::NILVALUE_, bufwriter);
212188
}
213189
}
214190
};

include/rds2cpp/utils_write.hpp

Lines changed: 48 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -5,42 +5,33 @@
55
#include <vector>
66
#include <cstdint>
77
#include <cstddef>
8+
#include <type_traits>
89

910
#include "utils_parse.hpp"
1011

1112
namespace rds2cpp {
1213

13-
inline void inject_integer(std::int32_t value, std::vector<unsigned char>& buffer) {
14+
template<typename Target_, typename Type_, class BufferedWriter_>
15+
void inject_integer(Type_ value, BufferedWriter_& bufwriter) {
16+
static_assert(std::is_same<Target_, Type_>::value); // force users to set the type to avoid implicit typing.
1417
auto ptr = reinterpret_cast<unsigned char*>(&value);
15-
constexpr std::size_t width = 4;
18+
constexpr std::size_t width = sizeof(Target_);
1619
if (little_endian()) {
1720
std::reverse(ptr, ptr + width);
1821
}
19-
buffer.insert(buffer.end(), ptr, ptr + width);
22+
bufwriter.write(ptr, width);
2023
}
2124

22-
inline void inject_length(std::size_t value, std::vector<unsigned char>& buffer) {
25+
template<class BufferedWriter_>
26+
void inject_length(std::size_t value, BufferedWriter_& bufwriter) {
2327
if (value <= 2147483647) {
24-
inject_integer(value, buffer);
25-
return;
26-
}
27-
28-
inject_integer(-1, buffer);
29-
uint64_t big = value;
30-
31-
auto ptr = reinterpret_cast<unsigned char*>(&big);
32-
constexpr std::size_t width = 8;
33-
if (little_endian()) {
34-
std::reverse(ptr, ptr + width/2);
35-
std::reverse(ptr + width/2, ptr + width);
28+
inject_integer<std::int32_t, std::int32_t>(value, bufwriter);
29+
} else {
30+
// See get_length() for the inverse logic.
31+
inject_integer<std::int32_t, std::int32_t>(-1, bufwriter);
32+
inject_integer<std::uint32_t>(sanisizer::cast<std::uint32_t>(value >> 32), bufwriter);
33+
inject_integer<std::uint32_t>(static_cast<std::uint32_t>(value & 0xFFFFFFFF), bufwriter); // must fit in a uint32 as we're taking the lowest 32 bits.
3634
}
37-
38-
buffer.insert(buffer.end(), ptr, ptr + width);
39-
}
40-
41-
inline void inject_string(const char* ptr, std::size_t n, std::vector<unsigned char>& buffer) {
42-
auto p = reinterpret_cast<const unsigned char*>(ptr);
43-
buffer.insert(buffer.end(), p, p + n);
4435
}
4536

4637
template<class Object, typename = int>
@@ -70,40 +61,50 @@ inline unsigned char inject_attribute_header(Attributes& attributes) {
7061
return bit;
7162
}
7263

73-
template<class Object>
74-
void inject_header(Object& vec, std::vector<unsigned char>& buffer) {
75-
buffer.insert(buffer.end(), 2, 0);
64+
template<class Object_, class BufferedWriter_>
65+
void inject_header(Object_& vec, BufferedWriter_& bufwriter) {
66+
Header details;
67+
details[0] = 0;
68+
details[1] = 0;
7669

77-
if constexpr(has_attributes_for_writing<Object>::value) {
78-
buffer.push_back(inject_attribute_header(vec.attributes));
70+
if constexpr(has_attributes_for_writing<Object_>::value) {
71+
details[2] = inject_attribute_header(vec.attributes);
7972
} else {
80-
buffer.push_back(0);
73+
details[2] = 0;
8174
}
8275

83-
// cast from enum should be safe, as SEXPTypes are also unsigned chars.
84-
buffer.push_back(static_cast<unsigned char>(vec.type()));
85-
return;
76+
details[3] = static_cast<unsigned char>(vec.type()); // Cast from enum should be safe, as SEXPTypes are also unsigned chars.
77+
bufwriter.write(details.data(), details.size());
8678
}
8779

88-
inline void inject_header(SEXPType type, std::vector<unsigned char>& buffer) {
89-
buffer.insert(buffer.end(), 3, 0);
90-
buffer.push_back(static_cast<unsigned char>(type));
91-
return;
80+
template<class BufferedWriter_>
81+
void inject_header(SEXPType type, BufferedWriter_& bufwriter) {
82+
Header details;
83+
details[0] = 0;
84+
details[1] = 0;
85+
details[2] = 0;
86+
details[3] = static_cast<unsigned char>(type);
87+
bufwriter.write(details.data(), details.size());
9288
}
9389

94-
template<class Attributes>
95-
void inject_header(SEXPType type, Attributes& attributes, std::vector<unsigned char>& buffer) {
96-
buffer.insert(buffer.end(), 2, 0);
97-
buffer.push_back(inject_attribute_header(attributes));
98-
buffer.push_back(static_cast<unsigned char>(type));
99-
return;
90+
template<class Attributes_, class BufferedWriter_>
91+
void inject_header(SEXPType type, Attributes_& attributes, BufferedWriter_& bufwriter) {
92+
Header details;
93+
details[0] = 0;
94+
details[1] = 0;
95+
details[2] = inject_attribute_header(attributes);
96+
details[3] = static_cast<unsigned char>(type);
97+
bufwriter.write(details.data(), details.size());
10098
}
10199

102-
inline void inject_next_pairlist_header(bool tagged, std::vector<unsigned char>& buffer) {
103-
buffer.insert(buffer.end(), 2, 0);
104-
buffer.push_back(tagged ? 0x4 : 0); // has tag.
105-
buffer.push_back(static_cast<unsigned char>(SEXPType::LIST));
106-
return;
100+
template<class BufferedWriter_>
101+
void inject_next_pairlist_header(bool tagged, BufferedWriter_& bufwriter) {
102+
Header details;
103+
details[0] = 0;
104+
details[1] = 0;
105+
details[2] = (tagged ? 0x4 : 0); // has tag.
106+
details[3] = static_cast<unsigned char>(SEXPType::LIST);
107+
bufwriter.write(details.data(), details.size());
107108
}
108109

109110
}

0 commit comments

Comments
 (0)