Skip to content

Commit 0d6239f

Browse files
committed
Update spider.py
1 parent 063e668 commit 0d6239f

1 file changed

Lines changed: 30 additions & 0 deletions

File tree

crawler/av-spider/spider.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,9 @@ def start(self):
7474
data = json.loads(r.text)
7575
self.ids = data.get('ids')
7676
self.stars = data.get('stars')
77+
self.studios = data.get('studios')
78+
self.series = data.get('series')
79+
self.genres = data.get('genres')
7780

7881
self.parseList(self.startUrl)
7982

@@ -86,6 +89,33 @@ def start(self):
8689
url = self.host + '/star/' + star['id']
8790
self.parseList(url)
8891

92+
for studio in self.studios:
93+
if studio['status'] < 1:
94+
continue
95+
if len(studio['id']) > 6:
96+
continue
97+
print('开始爬取片商 ' + studio['name'])
98+
url = self.host + '/studio/' + studio['id']
99+
self.parseList(url)
100+
101+
for series in self.series:
102+
if series['status'] < 1:
103+
continue
104+
if len(series['id']) > 6:
105+
continue
106+
print('开始爬取系列 ' + series['name'])
107+
url = self.host + '/series/' + series['id']
108+
self.parseList(url)
109+
110+
for genre in self.genres:
111+
if genre['status'] < 1:
112+
continue
113+
if len(genre['id']) > 6:
114+
continue
115+
print('开始爬取类别 ' + genre['name'])
116+
url = self.host + '/genre/' + genre['id']
117+
self.parseList(url)
118+
89119
def parseList(self, url):
90120
r = self.request(url)
91121
if r == False:

0 commit comments

Comments
 (0)