-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgetFlags.py
More file actions
34 lines (30 loc) · 986 Bytes
/
getFlags.py
File metadata and controls
34 lines (30 loc) · 986 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
#-*- coding:utf-8 -*-
#rate: 0.1
#burst: 3
#用来爬取爬虫示例网站上的国家的国旗
import urllib, urllib2, re
from bs4 import BeautifulSoup
import time
def getHtml(url):
header = {'User-Agent':'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:55.0) Gecko/20100101 Firefox/55.0'}
request = urllib2.Request(url, headers=header)
response = urllib2.urlopen(request)
html = response.read()
html2 = BeautifulSoup(html, 'html.parser')
return html2
def getFlags(html, a):
flagurls = []
flags = html.find_all('img')
for pattern in flags:
flagurls.append('http://example.webscraping.com'+pattern.get('src'))
for i in flagurls:
urllib.urlretrieve(i, '/home/xpl/图片/Flags/%d.png'%a)
a += 1
return a
if __name__ == '__main__':
a = 1
for i in range(0, 25):
time.sleep(2)
url = 'http://example.webscraping.com/places/default/index/%d'%i
html = getHtml(url)
a = getFlags(html, a)