From f2b6ef93a339e1bbeea5ae28eee55bb5604cc7fe Mon Sep 17 00:00:00 2001 From: makemelegal Date: Thu, 19 Feb 2026 18:36:58 +0000 Subject: [PATCH 1/2] Update CumberlandCouncil.py fix: CumberlandCouncil - correct year assignment for all months - previous had accidentally hardcoded the year as 2025 for all months except Jan/Feb --- .../councils/CumberlandCouncil.py | 74 ++++++++++++------- 1 file changed, 48 insertions(+), 26 deletions(-) diff --git a/uk_bin_collection/uk_bin_collection/councils/CumberlandCouncil.py b/uk_bin_collection/uk_bin_collection/councils/CumberlandCouncil.py index a665024cea..bbe926635a 100644 --- a/uk_bin_collection/uk_bin_collection/councils/CumberlandCouncil.py +++ b/uk_bin_collection/uk_bin_collection/councils/CumberlandCouncil.py @@ -1,6 +1,5 @@ import requests from bs4 import BeautifulSoup - from uk_bin_collection.uk_bin_collection.common import * from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass @@ -31,46 +30,70 @@ def parse_data(self, page: str, **kwargs) -> dict: # Parse the text content to extract collection dates text_content = content_region.get_text() - lines = [line.strip() for line in text_content.split('\n') if line.strip()] - + lines = [line.strip() for line in text_content.split("\n") if line.strip()] + current_month = None - current_year = None + current_year = datetime.now().year + previous_month_num = 0 i = 0 - - # Determine the year range from the page header - year_2026 = "2026" in text_content - + + # Determine the base year from the page heading, e.g. + # "Collection calendar: February to August 2026" + # This is more reliable than checking whether "2026" appears anywhere + # in the page, which broke the year assignment for all non-Jan/Feb months. + for line in lines: + if "Collection calendar" in line: + for word in reversed(line.split()): + if word.isdigit() and len(word) == 4: + current_year = int(word) + break + break + while i < len(lines): line = lines[i] - + # Check if this is a month name - if line in ["January", "February", "March", "April", "May", "June", - "July", "August", "September", "October", "November", "December"]: + if line in [ + "January", "February", "March", "April", "May", "June", + "July", "August", "September", "October", "November", "December", + ]: + month_num = datetime.strptime(line, "%B").month + + # If months go backwards (e.g. December -> January), + # we have crossed into the next year + if month_num < previous_month_num: + current_year += 1 + + previous_month_num = month_num current_month = line - # Determine year based on month and context - if year_2026: - current_year = "2026" if line in ["January", "February"] else "2025" - else: - current_year = str(datetime.now().year) i += 1 continue - + # Check if this is a day number (1-31) if line.isdigit() and 1 <= int(line) <= 31 and current_month: day = line + # Next line should be the bin type if i + 1 < len(lines): bin_type = lines[i + 1] - - # Skip the subtype line (Refuse/Recycling detail) - if i + 2 < len(lines) and lines[i + 2] in ["Refuse", "Recycling"]: + + # Skip the subtype line (e.g. Refuse, Recycling, Paper, Green) + # A subtype is any line that is neither a digit nor a month name + if ( + i + 2 < len(lines) + and not lines[i + 2].isdigit() + and lines[i + 2] not in [ + "January", "February", "March", "April", "May", "June", + "July", "August", "September", "October", "November", "December", + ] + ): i += 1 - + # Parse the date try: date_str = f"{day} {current_month} {current_year}" collection_date = datetime.strptime(date_str, "%d %B %Y") - + dict_data = { "type": bin_type, "collectionDate": collection_date.strftime(date_format), @@ -78,15 +101,14 @@ def parse_data(self, page: str, **kwargs) -> dict: bindata["bins"].append(dict_data) except ValueError: pass - + i += 2 continue - + i += 1 # Sort by collection date bindata["bins"].sort( - key=lambda x: datetime.strptime(x.get("collectionDate"), "%d/%m/%Y") + key=lambda x: datetime.strptime(x.get("collectionDate"), date_format) ) - return bindata From 7e83de200eb33302319721e5288149bb3e3bb2db Mon Sep 17 00:00:00 2001 From: makemelegal Date: Thu, 19 Feb 2026 20:47:00 +0000 Subject: [PATCH 2/2] Update CumberlandCouncil.py further feedback from AI agent. this fixes the issues flagged and builds on previous commit --- .../councils/CumberlandCouncil.py | 103 ++++++++++++------ 1 file changed, 72 insertions(+), 31 deletions(-) diff --git a/uk_bin_collection/uk_bin_collection/councils/CumberlandCouncil.py b/uk_bin_collection/uk_bin_collection/councils/CumberlandCouncil.py index bbe926635a..8ef94a0d5f 100644 --- a/uk_bin_collection/uk_bin_collection/councils/CumberlandCouncil.py +++ b/uk_bin_collection/uk_bin_collection/councils/CumberlandCouncil.py @@ -1,8 +1,15 @@ import requests from bs4 import BeautifulSoup + from uk_bin_collection.uk_bin_collection.common import * from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass +# Module-level constant so the month list is defined once and never duplicated. +_MONTH_NAMES = [ + "January", "February", "March", "April", "May", "June", + "July", "August", "September", "October", "November", "December", +] + class CouncilClass(AbstractGetBinDataClass): """ @@ -17,7 +24,10 @@ def parse_data(self, page: str, **kwargs) -> dict: bindata = {"bins": []} # Direct URL to the bin collection schedule using UPRN - url = f"https://www.cumberland.gov.uk/bins-recycling-and-street-cleaning/waste-collections/bin-collection-schedule/view/{user_uprn}" + url = ( + f"https://www.cumberland.gov.uk/bins-recycling-and-street-cleaning/" + f"waste-collections/bin-collection-schedule/view/{user_uprn}" + ) # Fetch the page response = requests.get(url) @@ -32,39 +42,74 @@ def parse_data(self, page: str, **kwargs) -> dict: text_content = content_region.get_text() lines = [line.strip() for line in text_content.split("\n") if line.strip()] + # ------------------------------------------------------------------ # + # The heading is split across multiple lines, e.g.: + # "Collection calendar:" + # "February" + # "to" + # "August" + # "2026" + # + # We find "Collection calendar:" then scan the following lines to + # extract the start month, end month, and year. + # + # For same-year calendars (start month <= end month, e.g. Feb-Aug 2026) + # every month gets calendar_year. + # + # For cross-year calendars (start month > end month, e.g. Nov-Mar 2026) + # months >= start_month_num get (calendar_year - 1) and months + # < start_month_num get calendar_year. + # ------------------------------------------------------------------ # + calendar_year = None + start_month_num = None + end_month_num = None + + for i, line in enumerate(lines): + if line.strip().startswith("Collection calendar"): + for j in range(i + 1, min(i + 6, len(lines))): + if lines[j] in _MONTH_NAMES: + if start_month_num is None: + start_month_num = _MONTH_NAMES.index(lines[j]) + 1 + else: + end_month_num = _MONTH_NAMES.index(lines[j]) + 1 + if lines[j].isdigit() and len(lines[j]) == 4: + calendar_year = int(lines[j]) + break + + if calendar_year is None: + raise ValueError( + "Could not determine collection year from 'Collection calendar' heading. " + "Page format may have changed." + ) + + is_same_year = ( + start_month_num is None + or end_month_num is None + or end_month_num >= start_month_num + ) + current_month = None - current_year = datetime.now().year - previous_month_num = 0 + current_year = calendar_year i = 0 - # Determine the base year from the page heading, e.g. - # "Collection calendar: February to August 2026" - # This is more reliable than checking whether "2026" appears anywhere - # in the page, which broke the year assignment for all non-Jan/Feb months. - for line in lines: - if "Collection calendar" in line: - for word in reversed(line.split()): - if word.isdigit() and len(word) == 4: - current_year = int(word) - break - break - while i < len(lines): line = lines[i] # Check if this is a month name - if line in [ - "January", "February", "March", "April", "May", "June", - "July", "August", "September", "October", "November", "December", - ]: + if line in _MONTH_NAMES: month_num = datetime.strptime(line, "%B").month - # If months go backwards (e.g. December -> January), - # we have crossed into the next year - if month_num < previous_month_num: - current_year += 1 + if is_same_year: + current_year = calendar_year + else: + # Cross-year: months on or after the start month belong to + # the year before the heading year + current_year = ( + calendar_year - 1 + if month_num >= start_month_num + else calendar_year + ) - previous_month_num = month_num current_month = line i += 1 continue @@ -73,19 +118,15 @@ def parse_data(self, page: str, **kwargs) -> dict: if line.isdigit() and 1 <= int(line) <= 31 and current_month: day = line - # Next line should be the bin type if i + 1 < len(lines): bin_type = lines[i + 1] - # Skip the subtype line (e.g. Refuse, Recycling, Paper, Green) - # A subtype is any line that is neither a digit nor a month name + # Skip the subtype line (e.g. Paper, Recycling, Refuse, Green). + # A subtype is any line that is neither a digit nor a month name. if ( i + 2 < len(lines) and not lines[i + 2].isdigit() - and lines[i + 2] not in [ - "January", "February", "March", "April", "May", "June", - "July", "August", "September", "October", "November", "December", - ] + and lines[i + 2] not in _MONTH_NAMES ): i += 1