Skip to content
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
125 changes: 60 additions & 65 deletions uk_bin_collection/uk_bin_collection/councils/ArunCouncil.py
Original file line number Diff line number Diff line change
@@ -1,98 +1,93 @@
import time

import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import Select, WebDriverWait

from uk_bin_collection.uk_bin_collection.common import *
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass


# import the wonderful Beautiful Soup and the URL grabber
class CouncilClass(AbstractGetBinDataClass):
"""
Concrete classes have to implement all abstract operations of the
base class. They can also override some operations with a default
implementation.
"""

def parse_data(self, page: str, **kwargs) -> dict:
def parse_data(self, page: str = None, **kwargs) -> dict:
try:
# Make a BS4 object
data = {"bins": []}

user_paon = kwargs.get("paon")
user_postcode = kwargs.get("postcode")
headless = kwargs.get("headless")
web_driver = kwargs.get("web_driver")
driver = create_webdriver(web_driver, headless, None, __name__)
page = "https://www1.arun.gov.uk/when-are-my-bins-collected/"
check_paon(user_paon)
user_uprn = kwargs.get("uprn")

check_postcode(user_postcode)
driver.get(page)
check_uprn(user_uprn)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

🧩 Analysis chain

🏁 Script executed:

# Find the common.py file
fd common.py

Repository: robbrad/UKBinCollectionData

Length of output: 116


🏁 Script executed:

# Search for check_uprn function definition
rg -A 20 "def check_uprn" --type py

Repository: robbrad/UKBinCollectionData

Length of output: 1542


🏁 Script executed:

# Check ArunCouncil.py to see how check_uprn is used
rg -B 5 -A 5 "check_uprn" uk_bin_collection/uk_bin_collection/councils/ArunCouncil.py

Repository: robbrad/UKBinCollectionData

Length of output: 426


check_uprn silently swallows validation errors and execution continues.

The check_uprn function in common.py catches all exceptions, prints an error message, but does not re-raise the exception or return a failure indicator. On line 16, the return value is not checked, so invalid UPRNs result only in a printed message while execution continues with potentially bad data.

Either:

  1. Fix check_uprn in common.py to re-raise the exception (preferred)
  2. Check the return value here: if not check_uprn(user_uprn): raise ValueError("Invalid UPRN")
🧰 Tools
🪛 Ruff (0.14.11)

16-16: check_uprn may be undefined, or defined from star imports

(F405)

🤖 Prompt for AI Agents
In `@uk_bin_collection/uk_bin_collection/councils/ArunCouncil.py` at line 16, The
call to check_uprn(user_uprn) in ArunCouncil.py currently ignores validation
failures because check_uprn in common.py catches all exceptions and only prints
an error; either make the preferred fix in common.py by removing the swallow—log
the error then re-raise the exception (i.e., avoid a bare except and raise the
caught exception) so callers like the call in ArunCouncil.py will get an
exception, or if you can't change common.py right now, change the call in
ArunCouncil.py to explicitly check the result (e.g., if not
check_uprn(user_uprn): raise ValueError("Invalid UPRN")) so invalid UPRNs stop
execution; reference check_uprn and the caller in ArunCouncil.py when making the
change.


start_now_button = WebDriverWait(driver, timeout=15).until(
EC.presence_of_element_located((By.LINK_TEXT, "Start now"))
BASE = "https://www1.arun.gov.uk"
UA = (
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:146.0) "
"Gecko/20100101 Firefox/146.0"
)
start_now_button.click()

# Wait for the postcode field to appear then populate it
input_element_postcode = WebDriverWait(driver, 30).until(
EC.presence_of_element_located((By.ID, "postcode"))
s = requests.Session()
s.headers.update(
{
"User-Agent": UA,
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language": "en-GB,en;q=0.5",
}
)
input_element_postcode.send_keys(user_postcode)

continue_button = WebDriverWait(driver, timeout=15).until(
EC.presence_of_element_located((By.CLASS_NAME, "govuk-button"))
s.get(f"{BASE}/when-are-my-bins-collected")

s.post(
f"{BASE}/when-are-my-bins-collected/postcode",
data={"postcode": user_postcode},
headers={
"Referer": f"{BASE}/when-are-my-bins-collected",
"Origin": BASE,
},
)
continue_button.click()

address_selection_menu = Select(driver.find_element(By.ID, "address"))
for idx, addr_option in enumerate(address_selection_menu.options):
option_name = addr_option.text[0 : len(user_paon)]
if option_name == user_paon:
selected_address = addr_option
break
address_selection_menu.select_by_visible_text(selected_address.text)

continue_button = WebDriverWait(driver, timeout=15).until(
EC.presence_of_element_located((By.CLASS_NAME, "govuk-button"))

s.post(
f"{BASE}/when-are-my-bins-collected/select",
data={"address": user_uprn},
headers={
"Referer": f"{BASE}/when-are-my-bins-collected/postcode",
"Origin": BASE,
},
)
continue_button.click()
# Check for text saying "Next collection dates"
WebDriverWait(driver, 30).until(
EC.presence_of_element_located(
(By.XPATH, "//*[contains(text(), 'Next collection dates')]")
)

r = s.get(
f"{BASE}/when-are-my-bins-collected/collections",
headers={
"Referer": f"{BASE}/when-are-my-bins-collected/select",
},
)
Comment on lines +33 to 58
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

HTTP responses are not validated for success.

None of the requests check for HTTP errors. If the server returns 4xx/5xx (e.g., invalid postcode, rate limiting, server error), the code silently continues and fails later with a confusing error parsing the response.

Add raise_for_status() after critical requests, or at minimum check the final response before parsing.

Suggested fix
             s.get(f"{BASE}/when-are-my-bins-collected")

-            s.post(
+            resp = s.post(
                 f"{BASE}/when-are-my-bins-collected/postcode",
                 data={"postcode": user_postcode},
                 headers={
                     "Referer": f"{BASE}/when-are-my-bins-collected",
                     "Origin": BASE,
                 },
             )
+            resp.raise_for_status()

-            s.post(
+            resp = s.post(
                 f"{BASE}/when-are-my-bins-collected/select",
                 data={"address": user_uprn},
                 headers={
                     "Referer": f"{BASE}/when-are-my-bins-collected/postcode",
                     "Origin": BASE,
                 },
             )
+            resp.raise_for_status()

             r = s.get(
                 f"{BASE}/when-are-my-bins-collected/collections",
                 headers={
                     "Referer": f"{BASE}/when-are-my-bins-collected/select",
                 },
             )
+            r.raise_for_status()
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
s.get(f"{BASE}/when-are-my-bins-collected")
s.post(
f"{BASE}/when-are-my-bins-collected/postcode",
data={"postcode": user_postcode},
headers={
"Referer": f"{BASE}/when-are-my-bins-collected",
"Origin": BASE,
},
)
continue_button.click()
address_selection_menu = Select(driver.find_element(By.ID, "address"))
for idx, addr_option in enumerate(address_selection_menu.options):
option_name = addr_option.text[0 : len(user_paon)]
if option_name == user_paon:
selected_address = addr_option
break
address_selection_menu.select_by_visible_text(selected_address.text)
continue_button = WebDriverWait(driver, timeout=15).until(
EC.presence_of_element_located((By.CLASS_NAME, "govuk-button"))
s.post(
f"{BASE}/when-are-my-bins-collected/select",
data={"address": user_uprn},
headers={
"Referer": f"{BASE}/when-are-my-bins-collected/postcode",
"Origin": BASE,
},
)
continue_button.click()
# Check for text saying "Next collection dates"
WebDriverWait(driver, 30).until(
EC.presence_of_element_located(
(By.XPATH, "//*[contains(text(), 'Next collection dates')]")
)
r = s.get(
f"{BASE}/when-are-my-bins-collected/collections",
headers={
"Referer": f"{BASE}/when-are-my-bins-collected/select",
},
)
s.get(f"{BASE}/when-are-my-bins-collected")
resp = s.post(
f"{BASE}/when-are-my-bins-collected/postcode",
data={"postcode": user_postcode},
headers={
"Referer": f"{BASE}/when-are-my-bins-collected",
"Origin": BASE,
},
)
resp.raise_for_status()
resp = s.post(
f"{BASE}/when-are-my-bins-collected/select",
data={"address": user_uprn},
headers={
"Referer": f"{BASE}/when-are-my-bins-collected/postcode",
"Origin": BASE,
},
)
resp.raise_for_status()
r = s.get(
f"{BASE}/when-are-my-bins-collected/collections",
headers={
"Referer": f"{BASE}/when-are-my-bins-collected/select",
},
)
r.raise_for_status()
🤖 Prompt for AI Agents
In `@uk_bin_collection/uk_bin_collection/councils/ArunCouncil.py` around lines 33
- 58, The HTTP requests in ArunCouncil.py (the session calls using s.get and
s.post and the final response stored in r) aren't validated, so add response
checks: call raise_for_status() on each response returned by s.get/ s.post (the
calls to f"{BASE}/when-are-my-bins-collected",
f"{BASE}/when-are-my-bins-collected/postcode",
f"{BASE}/when-are-my-bins-collected/select") and on the final r from
s.get(f"{BASE}/when-are-my-bins-collected/collections") before any parsing;
ensure you capture each request's return value (e.g., resp = s.post(...)) then
call resp.raise_for_status() so HTTP 4xx/5xx are surfaced immediately.


soup = BeautifulSoup(driver.page_source, "html.parser")
page = r.text


soup = BeautifulSoup(page, "html.parser")
soup.prettify()

table = soup.find("table", class_="govuk-table")
if not table:
raise ValueError("Bin collection table not found")

for row in table.find("tbody").find_all("tr"):
# Extract the type of collection and the date of next collection
collection_type = (
row.find("th", class_="govuk-table__header").text.strip().split(" ")
)[0]
collection_date = row.find(
"td", class_="govuk-table__cell"
).text.strip()
row.find("th", class_="govuk-table__header")
.text.strip()
.split(" ")[0]
)

collection_date = (
row.find("td", class_="govuk-table__cell")
.text.strip()
)

# Append the information to the data structure
data["bins"].append(
{"type": collection_type, "collectionDate": collection_date}
{
"type": collection_type,
"collectionDate": collection_date,
}
)

except Exception as e:
# Here you can log the exception if needed
print(f"An error occurred: {e}")
# Optionally, re-raise the exception if you want it to propagate
raise
finally:
# This block ensures that the driver is closed regardless of an exception.
if driver:
driver.quit()

return data
Loading