ohyicong · kamalkhnl · Apr 4, 2025
diff --git a/main.py b/main.py
@@ -1,18 +1,25 @@
 # -*- coding: utf-8 -*-
 """
-Created on Sun Jul 12 11:02:06 2020
-
-@author: OHyic
-
+Google Image Scraper
+A script to download images from Google Images based on search terms.
 """
-#Import libraries
+
+# Import required libraries
 import os
 import concurrent.futures
+import chromedriver_autoinstaller
 from GoogleImageScraper import GoogleImageScraper
 from patch import webdriver_executable
 
 
 def worker_thread(search_key):
+    """
+    Worker thread function that handles image scraping for a single search term
+
+    Args:
+        search_key (str): The search term to scrape images for
+    """
+    # Initialize scraper with configuration parameters
     image_scraper = GoogleImageScraper(
         webdriver_path, 
         image_path, 
@@ -22,31 +29,46 @@ def worker_thread(search_key):
         min_resolution, 
         max_resolution, 
         max_missed)
+
+    # Get image URLs and save them to disk
     image_urls = image_scraper.find_image_urls()
     image_scraper.save_images(image_urls, keep_filenames)
 
-    #Release resources
+    # Release resources
     del image_scraper
 
 if __name__ == "__main__":
-    #Define file path
-    webdriver_path = os.path.normpath(os.path.join(os.getcwd(), 'webdriver', webdriver_executable()))
+    # Auto-install chromedriver for the current Chrome version
+    chromedriver_path = chromedriver_autoinstaller.install()
+
+    # Create photos directory if it doesn't exist
     image_path = os.path.normpath(os.path.join(os.getcwd(), 'photos'))
+    os.makedirs(image_path, exist_ok=True)
+
+    # Use the detected chromedriver path
+    webdriver_path = chromedriver_path
+
+    # Collect search terms from user input
+    user_input = input("Enter search terms separated by commas: ")
+    search_keys = [term.strip() for term in user_input.split(',') if term.strip()]
+
+    # Exit if no valid search terms were provided
+    if not search_keys:
+        print("No valid search terms provided. Exiting...")
+        exit()
+
+    print(f"Will search for: {', '.join(search_keys)}")
+
+    # Configuration parameters
+    number_of_images = 10               # Number of images to download per search term
+    headless = False                    # Whether to run Chrome in headless mode
+    min_resolution = (0, 0)             # Minimum image resolution (width, height)
+    max_resolution = (9999, 9999)       # Maximum image resolution (width, height)
+    max_missed = 10                     # Maximum number of consecutive download failures before stopping
+    number_of_workers = min(len(search_keys), 4)  # Number of parallel threads (max 4)
+    keep_filenames = False              # Whether to keep original filenames from URLs
 
-    #Add new search key into array ["cat","t-shirt","apple","orange","pear","fish"]
-    search_keys = list(set(["car","stars"]))
-
-    #Parameters
-    number_of_images = 10                # Desired number of images
-    headless = False                    # True = No Chrome GUI
-    min_resolution = (0, 0)             # Minimum desired image resolution
-    max_resolution = (9999, 9999)       # Maximum desired image resolution
-    max_missed = 10                     # Max number of failed images before exit
-    number_of_workers = 1               # Number of "workers" used
-    keep_filenames = False              # Keep original URL image filenames
-
-    #Run each search_key in a separate thread
-    #Automatically waits for all threads to finish
-    #Removes duplicate strings from search_keys
+    # Run searches in parallel threads
+    # Each thread handles one search term
     with concurrent.futures.ThreadPoolExecutor(max_workers=number_of_workers) as executor:
         executor.map(worker_thread, search_keys)
diff --git a/requirements.txt b/requirements.txt
@@ -1,3 +1,4 @@
 selenium==3.141.0
 requests==2.25.1
-pillow==9.0.1
+pillow==9.0.1
+chromedriver_autoinstaller