-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.py
More file actions
40 lines (33 loc) · 881 Bytes
/
main.py
File metadata and controls
40 lines (33 loc) · 881 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import requests
from bs4 import BeautifulSoup
import re
# change the URL
webpage = 'URL'
response = requests.get(webpage)
soup = BeautifulSoup(response.text, "html.parser")
# images
images = soup.find_all('img', attrs={'src': re.compile("^https://")})
# links
links = soup.find_all("a", href=True)
# print images one-by-one
print('images: ')
print('')
for image in images:
print(image['src'])
# print links one-by-one
print('***************************************************************************************************************')
print('links:')
print('')
for link in links:
print(link['href'])
# print images as a list
print('')
srcs = []
for image in images:
srcs.append(image["src"])
print('images list: ', srcs)
# print links as a list
links_as_list = []
for link in links:
links_as_list.append(link['href'])
print('links list:', links_as_list)