forked from mahmutovichana/Google-Image-Scraper
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgoogleImageScraper.py
More file actions
71 lines (58 loc) · 2.29 KB
/
googleImageScraper.py
File metadata and controls
71 lines (58 loc) · 2.29 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import os
import io
import time
import base64
import requests
from PIL import Image
from urllib.parse import quote
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
# Enter query for Google search
query = "plastic"
# Convert the query into URL format
query_url = quote(query)
# Specify the desired folder path on the desktop
folder_name = os.path.join('C:\\Users\\mahmu\\OneDrive\\Desktop', query)
try:
# Create the folder if it doesn't exist
os.makedirs(folder_name)
except Exception as e:
print(f"An error occurred: {str(e)}")
# Initialize the Edge web browser using options and a service
driver = webdriver.Edge(r"C:\Users\mahmu\MicrosoftWebDriver.exe")
# URL for Google Images search
url = f"https://www.google.com/search?q={query_url}&tbm=isch"
# Open the URL in the web browser
driver.get(url)
# Simulate scrolling to load more images
for _ in range(10): # Adjust the number based on the number of images wanted
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(2) # Wait for images to load
try:
# Find all image elements
img_elements = driver.find_elements_by_css_selector('img.rg_i')
print(img_elements)
except Exception as e:
print(f"An error occurred: {str(e)}")
# Download and save images
for i, img in enumerate(img_elements):
img_url = img.get_attribute("src")
if img_url and img_url.startswith('http'):
img_response = requests.get(img_url)
img_name = f"{i + 1}.jpg"
img_path = os.path.join(folder_name, img_name)
# Save the image to computer
with open(img_path, "wb") as img_file:
img_file.write(img_response.content)
elif img_url and img_url.startswith('data:image/jpeg;base64'):
# Decode base64 image data and save it
img_data = img_url.split('base64,')[1]
img = Image.open(io.BytesIO(base64.b64decode(img_data)))
img_name = f"{i + 1}.jpg"
img_path = os.path.join(folder_name, img_name)
img.save(img_path)
print(f"Images have been downloaded and saved in the folder: {folder_name}")
# Close the web browser
driver.quit()