-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtext.cpp
More file actions
153 lines (124 loc) · 3.86 KB
/
text.cpp
File metadata and controls
153 lines (124 loc) · 3.86 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
// Copyright 2012 Florian Petran
#include"text.h"
#include<cctype>
#include<string>
#include<list>
#include<vector>
#include<fstream>
#include<ostream>
#include<stdexcept>
#include<utility>
#include"string_impl.h"
using std::string;
using std::list;
using std::ifstream;
using std::pair;
using std::out_of_range;
using std::runtime_error;
namespace Align {
//////////////////////////////// base Word ////////////////////////////////////
Word::Word(const Text* text) {
_text = text;
}
const Text& Word::get_text() const {
return *_text;
}
/////////////////////////////// WordToken /////////////////////////////////////
WordToken::WordToken(const Text* text,
list<Sequence*> * seqs,
const string_impl* s,
const int pos, const WordType* type)
: Word(text) {
_position = pos;
_string_realization = s;
_type = type;
_sequences = seqs;
}
bool WordToken::operator==(const WordToken& other) const {
return
this->_type == other._type
&& this->_position == other._position;
}
void WordToken::remove_from(const Sequence* seq) const {
auto si = _sequences->begin();
while (si != _sequences->end())
if (*si == seq)
si = _sequences->erase(si);
else
++si;
}
bool WordToken::close_to(const WordToken& other) const {
bool result = this->_position != other._position
&& abs(this->_position - other._position)
<= Params::get().closeness();
if (Params::get().monotony())
return result && this->_position < other._position;
return result;
}
/////////////////////////////// WordType //////////////////////////////////////
WordType::WordType(const Text* text)
: Word(text), _frequency(0) {}
const WordType& WordType::add_token(const WordToken& token) {
_tokens.push_back(token);
++_frequency;
return *this;
}
bool WordType::operator==(const WordType& other) const {
for (const WordToken& this_tok : this->_tokens)
for (const WordToken& other_tok : other._tokens)
if (this_tok.get_str() == other_tok.get_str())
return true;
return false;
}
////////////////////////////////// Text ///////////////////////////////////////
const WordToken& Text::operator[](int index) const {
return std::vector<WordToken>::operator[](index);
}
const WordToken& Text::at(int index) const {
if (index >= _length || index < 0)
throw out_of_range("Text lookup out of range");
return this->operator[](index);
}
Text::Text(const string& fname) {
open(fname);
}
Text::~Text() {
for (pair<const string_impl, string_impl*>& sp : string_ptrs)
delete sp.second;
for (pair<const string_impl, WordType*>& wt : _types) {
for (const WordToken& tok : wt.second->get_tokens())
delete tok.get_sequences();
delete wt.second;
}
}
void Text::open(const string& fname) {
ifstream file;
file.open(fname);
if (!file.is_open())
throw runtime_error(string("Text file not found: ") + fname);
int pos = 0;
char c_line[256];
while (!file.eof()) {
file.getline(c_line, 256);
string_impl line = c_line;
lower_case(&line);
if (_types.find(line) == _types.end())
_types[line] = new WordType(this);
if (string_ptrs.find(line) == string_ptrs.end())
string_ptrs[line] = new string_impl(line);
list<Sequence*> *seqs = new list<Sequence*>();
WordToken tok = WordToken(this,
seqs,
string_ptrs[line],
pos,
_types[line]);
_types[line]->add_token(tok);
this->push_back(tok);
++pos;
}
_length = pos;
_fname = fname;
file.clear();
file.close();
}
} // namespace Align