diff --git a/uk_bin_collection/uk_bin_collection/councils/EnfieldCouncil.py b/uk_bin_collection/uk_bin_collection/councils/EnfieldCouncil.py index 195400893e..5f524b79ed 100644 --- a/uk_bin_collection/uk_bin_collection/councils/EnfieldCouncil.py +++ b/uk_bin_collection/uk_bin_collection/councils/EnfieldCouncil.py @@ -1,224 +1,62 @@ -import time - -from bs4 import BeautifulSoup -from selenium.webdriver.common.by import By -from selenium.webdriver.support import expected_conditions as EC -from selenium.webdriver.support.ui import Select, WebDriverWait -import pdb +from datetime import datetime +import curl_cffi #better impersonation than manual headers from uk_bin_collection.uk_bin_collection.common import * from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass -# import the wonderful Beautiful Soup and the URL grabber class CouncilClass(AbstractGetBinDataClass): - """ - Concrete classes have to implement all abstract operations of the - base class. They can also override some operations with a default - implementation. - """ def parse_data(self, page: str, **kwargs) -> dict: - driver = None - try: - user_postcode = kwargs.get("postcode") - if not user_postcode: - raise ValueError("No postcode provided.") - check_postcode(user_postcode) - - user_paon = kwargs.get("paon") - check_paon(user_paon) - headless = kwargs.get("headless") - web_driver = kwargs.get("web_driver") - # Use a realistic user agent to help bypass Cloudflare - user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" - driver = create_webdriver(web_driver, headless, user_agent, __name__) - page = "https://www.enfield.gov.uk/services/rubbish-and-recycling/find-my-collection-day" - driver.get(page) - - # Wait for Cloudflare challenge to complete - print("Waiting for page to load (Cloudflare check)...") - max_attempts = 3 - for attempt in range(max_attempts): - try: - WebDriverWait(driver, 60).until( - lambda d: "Just a moment" not in d.title and d.title != "" and len(d.find_elements(By.TAG_NAME, "input")) > 1 - ) - print(f"Page loaded: {driver.title}") - break - except: - print(f"Attempt {attempt + 1}: Timeout waiting for page load. Current title: {driver.title}") - if attempt < max_attempts - 1: - time.sleep(10) - driver.refresh() - else: - print("Failed to bypass Cloudflare after multiple attempts") - - time.sleep(8) - - try: - accept_cookies = WebDriverWait(driver, timeout=10).until( - EC.presence_of_element_located((By.ID, "ccc-notify-reject")) - ) - accept_cookies.click() - except: - print( - "Accept cookies banner not found or clickable within the specified time." - ) - pass - - # Check for multiple iframes and find the correct one - try: - iframes = driver.find_elements(By.TAG_NAME, "iframe") - - # Try each iframe to find the one with the bin collection form - for i, iframe in enumerate(iframes): - try: - driver.switch_to.frame(iframe) - - # Check if this iframe has the postcode input - time.sleep(2) - inputs = driver.find_elements(By.TAG_NAME, "input") - - # Look for address-related inputs - for inp in inputs: - aria_label = inp.get_attribute('aria-label') or '' - placeholder = inp.get_attribute('placeholder') or '' - if 'address' in aria_label.lower() or 'postcode' in placeholder.lower(): - break - else: - # This iframe doesn't have the form, try the next one - driver.switch_to.default_content() - continue - - # Found the right iframe, break out of the loop - break - except Exception as e: - driver.switch_to.default_content() - continue - else: - # No suitable iframe found, stay in main content - driver.switch_to.default_content() - except Exception as e: - pass - - # Try multiple selectors for the postcode input - postcode_input = None - selectors = [ - '[aria-label="Enter your address"]', - 'input[placeholder*="postcode"]', - 'input[placeholder*="address"]', - 'input[type="text"]' - ] - - for selector in selectors: - try: - postcode_input = WebDriverWait(driver, 5).until( - EC.element_to_be_clickable((By.CSS_SELECTOR, selector)) - ) - break - except: - continue - - if not postcode_input: - raise ValueError("Could not find postcode input field") - - postcode_input.send_keys(user_postcode) - - find_address_button = WebDriverWait(driver, 10).until( - EC.element_to_be_clickable((By.ID, "submitButton0")) + uprn = kwargs.get("uprn") + check_uprn(uprn) + + r = curl_cffi.requests.get( + f"https://www.enfield.gov.uk/_design/integrations/bartec/find-my-collection/rest/schedule?uprn={uprn}", + headers={ + "Accept": "*/*", + "Referer": "https://www.enfield.gov.uk/services/rubbish-and-recycling/find-my-collection-day", + "Connection": "keep-alive", + "Sec-Fetch-Mode": "cors", + "Sec-Fetch-Site": "same-origin", + }, + impersonate="firefox", + timeout=30 + ) + + r.raise_for_status() + + text = r.text.lstrip() + if text.startswith("