-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.cpp
More file actions
165 lines (151 loc) · 4.86 KB
/
Copy pathmain.cpp
File metadata and controls
165 lines (151 loc) · 4.86 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
#include <iostream>
#include <sstream>
#include <limits>
#include <fstream>
#define BUFFER_SIZE (8192*8)
unsigned int cbc_length;
class Node {
private: Node *A= nullptr, *C= nullptr, *T= nullptr, *G = nullptr;
unsigned int count = 0;
public:
Node* get_child(char base){
switch(base){
case 'A':
if (A == nullptr)
A = new Node();
return A;
case 'C':
if (C == nullptr)
C = new Node();
return C;
case 'G':
if (G == nullptr)
G = new Node();
return G;
case 'T':
if (T == nullptr)
T = new Node();
return T;
default:
return nullptr;
}
}
void increment_count() {
count++;
}
void print(unsigned int depth_to_be_printed){
if (depth_to_be_printed == 0)
std::cout << count << "\n";
else{
depth_to_be_printed--;
if (A!=nullptr)
A->print(depth_to_be_printed);
if (C!=nullptr)
C->print(depth_to_be_printed);
if (G!=nullptr)
G->print(depth_to_be_printed);
if (T!=nullptr)
T->print(depth_to_be_printed);
}
}
};
Node root = Node();
int parse_line(char (&buffer)[BUFFER_SIZE], size_t &i, size_t &valid_size){
while (true) {
for (; i<valid_size; ) {
if (buffer[i++]=='\n')
return EXIT_SUCCESS;
}
if (0>=(valid_size = fread(buffer, 1, sizeof(buffer), stdin))) {
std::cerr << "premature end of file";
return EXIT_FAILURE;
}
i = 0;
}
}
int parse_plus_line(char (&buffer)[BUFFER_SIZE], size_t &i, size_t &valid_size){
if (i>=valid_size){
if(0 >= (valid_size = fread(buffer, 1, sizeof(buffer), stdin)))
return EXIT_FAILURE;
i = 0;
}
if (buffer[i++]!='+'){
std::cerr << "line does not start with '+'" << std::endl;
return EXIT_FAILURE;
}
return parse_line(buffer, i, valid_size);
}
int parse_barcode_and_add_to_tree(char (&buffer)[BUFFER_SIZE], size_t &i, size_t &valid_size){
Node *ptr = &root;
ptr->increment_count();
unsigned int j = cbc_length;
while (true) {
for (; i<valid_size; i++) {
ptr = ptr->get_child(buffer[i]);
if (ptr == nullptr) {
if(buffer[i]=='N')
return parse_line(buffer, i, valid_size);
else {
std::cerr << "barcode contains: " << buffer[i] << std::endl;
return EXIT_FAILURE;
}
}
ptr->increment_count();
j--;
if (j == 0) {
return parse_line(buffer, i, valid_size);
}
}
i = 0;
if (0>=(valid_size = fread(buffer, 1, sizeof(buffer), stdin))) {
std::cerr << "premature end of file";
return EXIT_FAILURE;
}
}
}
int parse_at_line(char (&buffer)[BUFFER_SIZE], size_t &i, size_t &valid_size){
if (i>=valid_size){
i = 0;
if(0 >= (valid_size = fread(buffer, 1, sizeof(buffer), stdin)))
return EXIT_FAILURE;
}
if (buffer[i++]!='@'){
std::cerr << "line does not start with '@'" << std::endl;
return EXIT_FAILURE;
}
return parse_line(buffer, i, valid_size);
}
int parse_eof(char (&buffer)[BUFFER_SIZE], size_t &i, size_t &valid_size){
if (i>=valid_size){
i = 0;
if(0 >= (valid_size = fread(buffer, 1, sizeof(buffer), stdin)))
return EXIT_SUCCESS;
}
return EXIT_FAILURE;
}
int main(int argc, char* argv[]) {
try {
std::string arg = argv[1];
if (argc!=2) throw std::exception();
std::size_t pos;
int x = std::stoi(arg, &pos);
if (pos < arg.size()) throw std::exception();
if (x<=0 || x>1000) throw std::exception();
cbc_length = (unsigned int) x;
} catch (...) {
std::cerr << "Usage: <cat input.fastq> | " << argv[0] << " <length of cell barcode (16 for 10X chromium v2)>" << '\n';
return EXIT_FAILURE;
}
char buffer[BUFFER_SIZE];
size_t valid_size = 0; // valid_size of the valid part of the buffer
size_t i = 0; // courser (current postiion in buffer)
int ret = EXIT_SUCCESS;
while (parse_eof(buffer, i , valid_size)==EXIT_FAILURE) {
ret = parse_at_line(buffer, i, valid_size) || // @ID....
parse_barcode_and_add_to_tree(buffer, i, valid_size) || // ATTCGC...
parse_plus_line(buffer, i, valid_size) || // +
parse_line(buffer, i, valid_size); // ///777788899 ....
}
root.print(cbc_length);
return ret;
}