-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy path360daily.py
More file actions
79 lines (63 loc) · 2.17 KB
/
360daily.py
File metadata and controls
79 lines (63 loc) · 2.17 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#this project is aimed to crawl interesting information for hacker
#-*- coding:utf-8 -*-
import requests
import datetime
import re
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
def crawlknowledge(url):
knowledgelink=[]
resp=requests.get(url)
safesystem=re.findall("<div class=\"report-item\">.*?</a></div>",resp.content,re.S)
for child in safesystem:
child=child.replace("<div class=\"report-item\">","")
child = child.replace("\n", "")
child=child.replace("<div class=\"report-title\">","")
child=child.replace("</div> ","")
child=child.replace(" ","")
child=child.replace("<div class=\"report-link\"><a href=\"",",")
child=re.subn("\" target=\"_blank\">.*?</div>","",child)
try:
title,realurl=child[0].split(",")
respurl=requests.head(realurl)
realurl=respurl.headers.get('location')
child=title+","+str(realurl)
knowledgelink.append(child)
except:
knowledgelink.append(child[0])
return knowledgelink
def generaldate(start,end,step=1,format="%Y-%m-%d"):
strptime,strftime=datetime.datetime.strptime,datetime.datetime.strftime
days=(strptime(end,format)-strptime(start,format)).days
return [strftime(strptime(start,format)+datetime.timedelta(i),format) for i in xrange(0,days,step)]
def main():
errorurl=open('360error.txt','a')
f=open('360daily.txt','a')
time=generaldate("2018-03-30", "2018-06-07")
for day in time:
#day=day.replace("-","/")
url='https://cert.360.cn/daily?date='+day
results = crawlknowledge(url)
f.write(day)
f.write('\n')
for result in results:
f.write(result)
f.write('\n')
'''
try:
print day
results = crawlknowledge(url)
print type(results)
f.write(day)
f.write('\n')
for result in results:
f.write(result)
f.write('\n')
except :
print url
'''
errorurl.close()
f.close()
if __name__=='__main__':
main()