Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
117 changes: 90 additions & 27 deletions uk_bin_collection/uk_bin_collection/councils/CumberlandCouncil.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,12 @@
from uk_bin_collection.uk_bin_collection.common import *
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass

# Module-level constant so the month list is defined once and never duplicated.
_MONTH_NAMES = [
"January", "February", "March", "April", "May", "June",
"July", "August", "September", "October", "November", "December",
]


class CouncilClass(AbstractGetBinDataClass):
"""
Expand All @@ -18,7 +24,10 @@ def parse_data(self, page: str, **kwargs) -> dict:
bindata = {"bins": []}

# Direct URL to the bin collection schedule using UPRN
url = f"https://www.cumberland.gov.uk/bins-recycling-and-street-cleaning/waste-collections/bin-collection-schedule/view/{user_uprn}"
url = (
f"https://www.cumberland.gov.uk/bins-recycling-and-street-cleaning/"
f"waste-collections/bin-collection-schedule/view/{user_uprn}"
)

# Fetch the page
response = requests.get(url)
Expand All @@ -31,62 +40,116 @@ def parse_data(self, page: str, **kwargs) -> dict:

# Parse the text content to extract collection dates
text_content = content_region.get_text()
lines = [line.strip() for line in text_content.split('\n') if line.strip()]

lines = [line.strip() for line in text_content.split("\n") if line.strip()]

# ------------------------------------------------------------------ #
# The heading is split across multiple lines, e.g.:
# "Collection calendar:"
# "February"
# "to"
# "August"
# "2026"
#
# We find "Collection calendar:" then scan the following lines to
# extract the start month, end month, and year.
#
# For same-year calendars (start month <= end month, e.g. Feb-Aug 2026)
# every month gets calendar_year.
#
# For cross-year calendars (start month > end month, e.g. Nov-Mar 2026)
# months >= start_month_num get (calendar_year - 1) and months
# < start_month_num get calendar_year.
# ------------------------------------------------------------------ #
calendar_year = None
start_month_num = None
end_month_num = None

for i, line in enumerate(lines):
if line.strip().startswith("Collection calendar"):
for j in range(i + 1, min(i + 6, len(lines))):
if lines[j] in _MONTH_NAMES:
if start_month_num is None:
start_month_num = _MONTH_NAMES.index(lines[j]) + 1
else:
end_month_num = _MONTH_NAMES.index(lines[j]) + 1
if lines[j].isdigit() and len(lines[j]) == 4:
calendar_year = int(lines[j])
break

if calendar_year is None:
raise ValueError(
"Could not determine collection year from 'Collection calendar' heading. "
"Page format may have changed."
)

is_same_year = (
start_month_num is None
or end_month_num is None
or end_month_num >= start_month_num
)
Comment on lines +85 to +89
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Silent same-year fallback when month names cannot be parsed.

is_same_year evaluates to True whenever start_month_num or end_month_num is None. This means a page whose heading contains the year but not parseable month names will silently produce wrong years for a cross-year calendar instead of surfacing the problem. Per the project's explicit-failure preference, consider raising here (or at least guarding the cross-year path) so a structural change in the heading is caught immediately.

Based on learnings from the UKBinCollectionData project: prefer explicit failures (raise exceptions on unexpected formats) over silent defaults, to ensure format changes are detected early.

🛡️ Proposed fix
+        if start_month_num is None or end_month_num is None:
+            raise ValueError(
+                "Could not determine start/end months from 'Collection calendar' heading. "
+                "Page format may have changed."
+            )
+
         is_same_year = (
-            start_month_num is None
-            or end_month_num is None
-            or end_month_num >= start_month_num
+            end_month_num >= start_month_num
         )
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
is_same_year = (
start_month_num is None
or end_month_num is None
or end_month_num >= start_month_num
)
if start_month_num is None or end_month_num is None:
raise ValueError(
"Could not determine start/end months from 'Collection calendar' heading. "
"Page format may have changed."
)
is_same_year = (
end_month_num >= start_month_num
)
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@uk_bin_collection/uk_bin_collection/councils/CumberlandCouncil.py` around
lines 85 - 89, The current is_same_year logic silently treats unparsed months as
same-year because it returns True when start_month_num or end_month_num is None;
update the code around is_same_year (the variables start_month_num,
end_month_num, and is_same_year) to fail explicitly instead: if either month
number is None, raise a clear parsing/format exception (e.g., ValueError with
context including the raw heading) or skip the cross-year branch and raise when
cross-year handling would be needed, so that unparseable headings are surfaced
rather than defaulting to same-year.


current_month = None
current_year = None
current_year = calendar_year
i = 0

# Determine the year range from the page header
year_2026 = "2026" in text_content


while i < len(lines):
line = lines[i]

# Check if this is a month name
if line in ["January", "February", "March", "April", "May", "June",
"July", "August", "September", "October", "November", "December"]:
current_month = line
# Determine year based on month and context
if year_2026:
current_year = "2026" if line in ["January", "February"] else "2025"
if line in _MONTH_NAMES:
month_num = datetime.strptime(line, "%B").month
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

🧩 Analysis chain

🏁 Script executed:

# First, find the file
fd -t f "CumberlandCouncil.py"

Repository: robbrad/UKBinCollectionData

Length of output: 136


🏁 Script executed:

# Check the import statements and lines mentioned
cat -n uk_bin_collection/uk_bin_collection/councils/CumberlandCouncil.py | head -20

Repository: robbrad/UKBinCollectionData

Length of output: 853


🏁 Script executed:

# Check around the lines mentioned (100, 136, 140, 153)
sed -n '95,160p' uk_bin_collection/uk_bin_collection/councils/CumberlandCouncil.py | cat -n

Repository: robbrad/UKBinCollectionData

Length of output: 2671


🏁 Script executed:

# Check what's exported from common.py
cat -n uk_bin_collection/uk_bin_collection/common.py | head -100

Repository: robbrad/UKBinCollectionData

Length of output: 3297


🏁 Script executed:

# Check if datetime is imported in common.py
rg -n "^import datetime|^from datetime" uk_bin_collection/uk_bin_collection/common.py

Repository: robbrad/UKBinCollectionData

Length of output: 113


Replace star import with explicit imports of datetime and date_format.

Lines 100, 136, 140, and 153 use datetime and date_format from the star import. Explicit imports remove the Ruff F405 ambiguity and clarify dependencies.

Also applies to: 136-136, 140-140, 153-153

🧰 Tools
🪛 Ruff (0.15.1)

[error] 100-100: datetime may be undefined, or defined from star imports

(F405)

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@uk_bin_collection/uk_bin_collection/councils/CumberlandCouncil.py` at line
100, Replace the star import in CumberlandCouncil.py with explicit imports for
the used names: import datetime (or from datetime import datetime as needed) and
import date_format from its defining module; then update usages like month_num =
datetime.strptime(line, "%B").month and any other references at the locations
that previously relied on the star import (the occurrences of datetime and
date_format) so they resolve to the explicitly imported symbols and avoid the
Ruff F405 ambiguity.


if is_same_year:
current_year = calendar_year
else:
current_year = str(datetime.now().year)
# Cross-year: months on or after the start month belong to
# the year before the heading year
current_year = (
calendar_year - 1
if month_num >= start_month_num
else calendar_year
)

current_month = line
i += 1
continue

# Check if this is a day number (1-31)
if line.isdigit() and 1 <= int(line) <= 31 and current_month:
day = line
# Next line should be the bin type

if i + 1 < len(lines):
bin_type = lines[i + 1]

# Skip the subtype line (Refuse/Recycling detail)
if i + 2 < len(lines) and lines[i + 2] in ["Refuse", "Recycling"]:

# Skip the subtype line (e.g. Paper, Recycling, Refuse, Green).
# A subtype is any line that is neither a digit nor a month name.
if (
i + 2 < len(lines)
and not lines[i + 2].isdigit()
and lines[i + 2] not in _MONTH_NAMES
):
i += 1

# Parse the date
try:
date_str = f"{day} {current_month} {current_year}"
collection_date = datetime.strptime(date_str, "%d %B %Y")

dict_data = {
"type": bin_type,
"collectionDate": collection_date.strftime(date_format),
}
bindata["bins"].append(dict_data)
except ValueError:
pass
Comment on lines 134 to 144
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Silent except ValueError: pass masks format regressions.

The bare pass means any date-construction failure (including a silently wrong year assignment that produces an out-of-range date) is swallowed with no signal. Per the project's coding preference, this should propagate or at minimum log the failure so format changes are detected immediately rather than producing an empty result set.

Based on learnings from the UKBinCollectionData project: in uk_bin_collection/**/*.py, prefer explicit failures (raise exceptions on unexpected formats) over silent defaults or swallowed errors, and use clear exception types so format changes are detected early.

🛡️ Proposed fix
                    try:
                        date_str = f"{day} {current_month} {current_year}"
                        collection_date = datetime.strptime(date_str, "%d %B %Y")
-                    except ValueError:
-                        pass
+                    except ValueError as e:
+                        raise ValueError(
+                            f"Failed to parse collection date '{date_str}': {e}"
+                        ) from e
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
try:
date_str = f"{day} {current_month} {current_year}"
collection_date = datetime.strptime(date_str, "%d %B %Y")
dict_data = {
"type": bin_type,
"collectionDate": collection_date.strftime(date_format),
}
bindata["bins"].append(dict_data)
except ValueError:
pass
try:
date_str = f"{day} {current_month} {current_year}"
collection_date = datetime.strptime(date_str, "%d %B %Y")
dict_data = {
"type": bin_type,
"collectionDate": collection_date.strftime(date_format),
}
bindata["bins"].append(dict_data)
except ValueError as e:
raise ValueError(
f"Failed to parse collection date '{date_str}': {e}"
) from e
🧰 Tools
🪛 Ruff (0.15.1)

[error] 136-136: datetime may be undefined, or defined from star imports

(F405)


[error] 140-140: date_format may be undefined, or defined from star imports

(F405)

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@uk_bin_collection/uk_bin_collection/councils/CumberlandCouncil.py` around
lines 134 - 144, The try/except block around building date_str and parsing it
(using date_str, collection_date, current_month, current_year, date_format and
appending dict_data to bindata["bins"]) silently swallows ValueError; change it
to surface the failure by either logging the failing date string and bin_type
then re-raising a ValueError or simply re-raising with contextual information
(e.g. include date_str and bin_type), so format regressions in
CumberlandCouncil.py are not ignored; do not keep a bare `except ValueError:
pass`.


i += 2
continue

i += 1

# Sort by collection date
bindata["bins"].sort(
key=lambda x: datetime.strptime(x.get("collectionDate"), "%d/%m/%Y")
key=lambda x: datetime.strptime(x.get("collectionDate"), date_format)
)

return bindata