-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathsplitinfo.py
More file actions
52 lines (52 loc) · 1.61 KB
/
splitinfo.py
File metadata and controls
52 lines (52 loc) · 1.61 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import DGStorage as DG;
import urllib.parse;
a=DG.DGStorage();
type='biology';
a.select(type);
b=DG.DGStorage();
b.select('knowledgebase');
ok=False;
i=0;
while ok==False:
res=a.fetch(20,(i-1)*20);
i+=1;
if len(res)==0:
ok=True;
content=[];
for item in res:
split=item["content"].split('\n');
split=split[38].split('●');
#print(len(split));
if len(split)==31: #超过一页了,最后一项是分页
split=split[1:-1];
split=split[0:4]; #要不然题太多了
for element in split:
element=element.split('</a>')[0];
#print(element.find('.html">'));
url=element[element.find('<a href=')+9:element.find('.html">')+5];
element=element[element.find('.html">')+7:element.find('</a>')-1];
#print(element);
content.append(element);
b.add(url,'',{"content":element,"type":type,"kbname":item["prop"]["name"],"kb":item["uid"]});
print('add '+item["uid"]);
else:
split=split[1:];
split=split[0:4]; #要不然题太多了
for element in split:
element=element.split('</a>')[0];
#print(element.find('.html">'));
url=element[element.find('<a href=')+9:element.find('.html">')+5];
element=element[element.find('.html">')+7:element.find('</a>')-1];
#print(element);
content.append(element);
b.add(url,'',{"content":element,"type":type,"kbname":item["prop"]["name"],"kb":item["uid"]});
print('add '+item["uid"]);
string='';
for element in content:
element=urllib.parse.quote_plus(element);
if string!='':
string=string+','+str(element);
else:
string=str(element);
#a.setprop(item["uid"],"content",string);
#print('set content '+item["uid"]);