Skip to content

Commit b99c2a2

Browse files
authored
Merge pull request #4 from ceerRep/master
cR's Homework
2 parents 9d92f35 + f81e06f commit b99c2a2

20 files changed

Lines changed: 1412 additions & 0 deletions

week_1/2018202133HPY/.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
main
2+
download/*
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
{
2+
"files.associations": {
3+
"mutex": "cpp",
4+
"array": "cpp",
5+
"*.tcc": "cpp",
6+
"cctype": "cpp",
7+
"chrono": "cpp",
8+
"clocale": "cpp",
9+
"cmath": "cpp",
10+
"condition_variable": "cpp",
11+
"cstdint": "cpp",
12+
"cstdio": "cpp",
13+
"cstdlib": "cpp",
14+
"ctime": "cpp",
15+
"cwchar": "cpp",
16+
"cwctype": "cpp",
17+
"unordered_map": "cpp",
18+
"vector": "cpp",
19+
"exception": "cpp",
20+
"fstream": "cpp",
21+
"functional": "cpp",
22+
"initializer_list": "cpp",
23+
"iosfwd": "cpp",
24+
"iostream": "cpp",
25+
"istream": "cpp",
26+
"limits": "cpp",
27+
"memory": "cpp",
28+
"new": "cpp",
29+
"optional": "cpp",
30+
"ostream": "cpp",
31+
"ratio": "cpp",
32+
"sstream": "cpp",
33+
"stdexcept": "cpp",
34+
"streambuf": "cpp",
35+
"string_view": "cpp",
36+
"system_error": "cpp",
37+
"thread": "cpp",
38+
"type_traits": "cpp",
39+
"tuple": "cpp",
40+
"typeinfo": "cpp",
41+
"utility": "cpp",
42+
"cstdarg": "cpp",
43+
"list": "cpp",
44+
"numeric": "cpp",
45+
"bitset": "cpp",
46+
"cstring": "cpp",
47+
"deque": "cpp",
48+
"algorithm": "cpp",
49+
"codecvt": "cpp"
50+
}
51+
}

week_1/2018202133HPY/Makefile

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
override CXX=clang++
2+
3+
SRC = ${wildcard src/*.cpp}
4+
OBJ = ${patsubst src/%.cpp, tmp/%.o, $(SRC)}
5+
DEPEND = ${OBJ:tmp/%.o=tmp/%.d}
6+
7+
CXXFLAGS = -std=gnu++17 -g -O2
8+
LDFLAGS = -lcurl -lstdc++fs -lpthread
9+
10+
all: main
11+
12+
include $(DEPEND)
13+
14+
#tmp/%.o : src/%.cpp
15+
# $(CXX) $(CXXFLAGS) -c $< -o $@
16+
17+
tmp/%.d: src/%.cpp
18+
$(CXX) -MM $< | sed 's/^\(\S\)/tmp\/\1/' > $@
19+
echo -e '\t$$(CXX) $$(CXXFLAGS) -c $< -o ${patsubst src/%.cpp, tmp/%.o, $<}' >> $@
20+
21+
main: $(OBJ)
22+
$(CXX) $(OBJ) -o main $(LDFLAGS)
23+
24+
clean_depend:
25+
$(RM) $(DEPEND)
26+
27+
clean:
28+
$(RM) depend tmp/*.o main tmp/*.d
29+
30+
.PHONY: clean clean_depend

week_1/2018202133HPY/README.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
# Simple crawler
2+
---
3+
## Usage
4+
```
5+
$ make
6+
$ ./main
7+
```

week_1/2018202133HPY/src/Curl.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
#include <curl/curl.h>
2+
3+
class CurlInit {
4+
CurlInit()
5+
{
6+
curl_global_init(CURL_GLOBAL_ALL);
7+
}
8+
~CurlInit()
9+
{
10+
curl_global_cleanup();
11+
}
12+
static void _()
13+
{
14+
static CurlInit __curl_init;
15+
}
16+
};
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
#include "CurlEasyHandle.hpp"
2+
3+
#include <mutex>
4+
#include <sstream>
5+
#include <string>
6+
7+
size_t CurlEasyHandle::curlHeaderHandleFunc(void* buffer, size_t size, size_t nmemb, void* userp)
8+
{
9+
auto that = (CurlEasyHandle*)userp;
10+
11+
std::lock_guard lock_that { that->lock };
12+
13+
std::string key, val;
14+
std::stringstream ss;
15+
16+
ss.write((const char*)buffer, nmemb);
17+
18+
if (!(ss >> key)) {
19+
that->on_header_receive(*that);
20+
} else if (key.substr(0, 4) == "HTTP") {
21+
that->env["HTTP_VER"] = key.substr(5);
22+
23+
ss >> val;
24+
25+
that->env["STATUS_CODE"] = val;
26+
27+
getline(ss, val);
28+
val.erase(0, val.find_first_not_of(" \r\n\t"));
29+
val.erase(val.find_last_not_of(" \r\n\t") + 1);
30+
that->env["REASON_PHRASE"] = val;
31+
} else {
32+
key.erase(key.find_last_of(':'));
33+
std::transform(key.begin(), key.end(), key.begin(), ::tolower);
34+
35+
getline(ss, val);
36+
val.erase(0, val.find_first_not_of(" \r\n\t"));
37+
val.erase(val.find_last_not_of(" \r\n\t") + 1);
38+
that->env[key] = val;
39+
}
40+
return nmemb;
41+
}
Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
#ifndef _COWR_CURLEASYHANDLE
2+
3+
#define _COWR_CURLEASYHANDLE
4+
5+
#include <curl/curl.h>
6+
7+
#include <functional>
8+
#include <iostream>
9+
#include <map>
10+
#include <mutex>
11+
12+
class CurlEasyHandle {
13+
protected:
14+
std::recursive_mutex lock;
15+
CURL* handle;
16+
17+
std::map<std::string, std::string> env;
18+
19+
std::function<void(void*, size_t, size_t, CurlEasyHandle&)> on_data_receive = curlDefaultDataReceiveHandler;
20+
std::function<void(CurlEasyHandle&)> on_header_receive = curlDefaultHeaderReceiveHandler;
21+
22+
public:
23+
CurlEasyHandle()
24+
{
25+
handle = curl_easy_init();
26+
27+
curl_easy_setopt(handle, CURLOPT_WRITEFUNCTION, curlDataHandleFunc);
28+
curl_easy_setopt(handle, CURLOPT_WRITEDATA, this);
29+
curl_easy_setopt(handle, CURLOPT_HEADERFUNCTION, curlHeaderHandleFunc);
30+
curl_easy_setopt(handle, CURLOPT_HEADERDATA, this);
31+
curl_easy_setopt(handle, CURLOPT_USERAGENT, "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36");
32+
33+
setCanRedirect(true);
34+
}
35+
36+
~CurlEasyHandle()
37+
{
38+
if (handle)
39+
curl_easy_cleanup(handle);
40+
}
41+
42+
CurlEasyHandle(const CurlEasyHandle&) = delete;
43+
44+
CurlEasyHandle(CurlEasyHandle&& r)
45+
{
46+
std::lock_guard lock_r { r.lock };
47+
env = std::move(r.env);
48+
on_data_receive = std::move(r.on_data_receive);
49+
on_header_receive = std::move(r.on_header_receive);
50+
handle = r.handle;
51+
r.handle = nullptr;
52+
}
53+
54+
template <typename T>
55+
CURLcode setOption(CURLoption option, T args)
56+
{
57+
std::lock_guard lock_this { lock };
58+
return curl_easy_setopt(handle, option, args);
59+
}
60+
61+
CURLcode setURL(std::string URI)
62+
{
63+
std::lock_guard lock_this { lock };
64+
return curl_easy_setopt(handle, CURLOPT_URL, URI.c_str());
65+
}
66+
67+
CURLcode setDataReceiveHandler(std::function<void(void*, size_t, size_t, CurlEasyHandle&)> handler)
68+
{
69+
std::lock_guard lock_this { lock };
70+
on_data_receive = handler;
71+
return CURLE_OK;
72+
}
73+
74+
CURLcode setHeaderReceiveHandler(std::function<void(CurlEasyHandle&)> handler)
75+
{
76+
std::lock_guard lock_this { lock };
77+
on_header_receive = handler;
78+
return CURLE_OK;
79+
}
80+
81+
CURLcode setCanRedirect(bool redirect)
82+
{
83+
std::lock_guard lock_this { lock };
84+
return curl_easy_setopt(handle, CURLOPT_FOLLOWLOCATION, long(redirect));
85+
}
86+
87+
std::string getURL()
88+
{
89+
std::lock_guard lock_this { lock };
90+
char* url = NULL;
91+
curl_easy_getinfo(handle, CURLINFO_EFFECTIVE_URL, &url);
92+
return url;
93+
}
94+
95+
virtual CURLcode perform()
96+
{
97+
std::lock_guard lock_this { lock };
98+
return curl_easy_perform(handle);
99+
}
100+
101+
operator CURL*()
102+
{
103+
return handle;
104+
}
105+
106+
auto getenv() const
107+
{
108+
std::lock_guard lock_this { const_cast<std::recursive_mutex&>(lock) };
109+
return env;
110+
}
111+
112+
static size_t curlDefaultDataReceiveHandler(void* buffer, size_t size, size_t nmemb, CurlEasyHandle& easy_handle)
113+
{
114+
std::cout.write((const char*)buffer, nmemb);
115+
std::cout.flush();
116+
return nmemb;
117+
}
118+
119+
static void curlDefaultHeaderReceiveHandler(CurlEasyHandle& easy_handle)
120+
{
121+
}
122+
123+
static size_t curlDataHandleFunc(void* buffer, size_t size, size_t nmemb, void* userp)
124+
{
125+
((CurlEasyHandle*)userp)->on_data_receive(buffer, size, nmemb, *(CurlEasyHandle*)userp);
126+
return nmemb;
127+
}
128+
129+
static size_t curlHeaderHandleFunc(void* buffer, size_t size, size_t nmemb, void* userp);
130+
};
131+
132+
#endif
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
#include "CurlMultiHandle.hpp"
2+
3+
int CurlMultiHandle::perform()
4+
{
5+
int running;
6+
7+
curl_multi_perform(multi_handle, &running);
8+
9+
struct CURLMsg* m;
10+
11+
for (auto it = easy_handles.begin(); it != easy_handles.end();) {
12+
if ((*it)->can_remove()) {
13+
curl_multi_remove_handle(multi_handle, **it);
14+
15+
lookup.erase(**it);
16+
it = easy_handles.erase(it);
17+
} else {
18+
it++;
19+
}
20+
}
21+
22+
if (std::unique_lock lock_this { lock_queue }; easy_handles.size() < max_num && wait_queue.size() > 0) {
23+
auto handle = wait_queue.front();
24+
25+
handle->start();
26+
easy_handles.insert(handle);
27+
lookup[*handle] = handle;
28+
curl_multi_add_handle(multi_handle, *handle);
29+
30+
wait_queue.pop_front();
31+
running = 1;
32+
}
33+
34+
do {
35+
int msgq = 0;
36+
m = curl_multi_info_read(multi_handle, &msgq);
37+
if (m && (m->msg == CURLMSG_DONE)) {
38+
CURL* e = m->easy_handle;
39+
auto it = easy_handles.find(std::shared_ptr<SingleHandle>(lookup[e]));
40+
41+
removeHandle(*it);
42+
}
43+
44+
} while (m);
45+
46+
return running;
47+
}
48+
49+
int CurlMultiHandle::SingleHandle::curlProgressCallback(void* clientp, curl_off_t dltotal, curl_off_t dlnow, curl_off_t ultotal, curl_off_t ulnow)
50+
{
51+
auto psingle = reinterpret_cast<SingleHandle*>(clientp);
52+
53+
if (!psingle->header_received) {
54+
if (getNowTime() - psingle->start_time > (psingle->header_received ? 5 * timeout_ms : timeout_ms)) {
55+
psingle->TLE = true;
56+
return 1;
57+
}
58+
}
59+
60+
return 0;
61+
}

0 commit comments

Comments
 (0)