From f2b6ef93a339e1bbeea5ae28eee55bb5604cc7fe Mon Sep 17 00:00:00 2001
From: makemelegal <makemelegal@hotmail.com>
Date: Thu, 19 Feb 2026 18:36:58 +0000
Subject: [PATCH 1/2] Update CumberlandCouncil.py

fix: CumberlandCouncil - correct year assignment for all months - previous had accidentally hardcoded the year as 2025 for all months except Jan/Feb
---
 .../councils/CumberlandCouncil.py             | 74 ++++++++++++-------
 1 file changed, 48 insertions(+), 26 deletions(-)

diff --git a/uk_bin_collection/uk_bin_collection/councils/CumberlandCouncil.py b/uk_bin_collection/uk_bin_collection/councils/CumberlandCouncil.py
index a665024cea..bbe926635a 100644
--- a/uk_bin_collection/uk_bin_collection/councils/CumberlandCouncil.py
+++ b/uk_bin_collection/uk_bin_collection/councils/CumberlandCouncil.py
@@ -1,6 +1,5 @@
 import requests
 from bs4 import BeautifulSoup
-
 from uk_bin_collection.uk_bin_collection.common import *
 from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
 
@@ -31,46 +30,70 @@ def parse_data(self, page: str, **kwargs) -> dict:
 
         # Parse the text content to extract collection dates
         text_content = content_region.get_text()
-        lines = [line.strip() for line in text_content.split('\n') if line.strip()]
-        
+        lines = [line.strip() for line in text_content.split("\n") if line.strip()]
+
         current_month = None
-        current_year = None
+        current_year = datetime.now().year
+        previous_month_num = 0
         i = 0
-        
-        # Determine the year range from the page header
-        year_2026 = "2026" in text_content
-        
+
+        # Determine the base year from the page heading, e.g.
+        # "Collection calendar: February to August 2026"
+        # This is more reliable than checking whether "2026" appears anywhere
+        # in the page, which broke the year assignment for all non-Jan/Feb months.
+        for line in lines:
+            if "Collection calendar" in line:
+                for word in reversed(line.split()):
+                    if word.isdigit() and len(word) == 4:
+                        current_year = int(word)
+                        break
+                break
+
         while i < len(lines):
             line = lines[i]
-            
+
             # Check if this is a month name
-            if line in ["January", "February", "March", "April", "May", "June", 
-                       "July", "August", "September", "October", "November", "December"]:
+            if line in [
+                "January", "February", "March", "April", "May", "June",
+                "July", "August", "September", "October", "November", "December",
+            ]:
+                month_num = datetime.strptime(line, "%B").month
+
+                # If months go backwards (e.g. December -> January),
+                # we have crossed into the next year
+                if month_num < previous_month_num:
+                    current_year += 1
+
+                previous_month_num = month_num
                 current_month = line
-                # Determine year based on month and context
-                if year_2026:
-                    current_year = "2026" if line in ["January", "February"] else "2025"
-                else:
-                    current_year = str(datetime.now().year)
                 i += 1
                 continue
-            
+
             # Check if this is a day number (1-31)
             if line.isdigit() and 1 <= int(line) <= 31 and current_month:
                 day = line
+
                 # Next line should be the bin type
                 if i + 1 < len(lines):
                     bin_type = lines[i + 1]
-                    
-                    # Skip the subtype line (Refuse/Recycling detail)
-                    if i + 2 < len(lines) and lines[i + 2] in ["Refuse", "Recycling"]:
+
+                    # Skip the subtype line (e.g. Refuse, Recycling, Paper, Green)
+                    # A subtype is any line that is neither a digit nor a month name
+                    if (
+                        i + 2 < len(lines)
+                        and not lines[i + 2].isdigit()
+                        and lines[i + 2] not in [
+                            "January", "February", "March", "April", "May", "June",
+                            "July", "August", "September", "October", "November", "December",
+                        ]
+                    ):
                         i += 1
-                    
+
                     # Parse the date
                     try:
                         date_str = f"{day} {current_month} {current_year}"
                         collection_date = datetime.strptime(date_str, "%d %B %Y")
-                        
+
                         dict_data = {
                             "type": bin_type,
                             "collectionDate": collection_date.strftime(date_format),
@@ -78,15 +101,14 @@ def parse_data(self, page: str, **kwargs) -> dict:
                         bindata["bins"].append(dict_data)
                     except ValueError:
                         pass
-                    
+
                     i += 2
                     continue
-            
+
             i += 1
 
         # Sort by collection date
         bindata["bins"].sort(
-            key=lambda x: datetime.strptime(x.get("collectionDate"), "%d/%m/%Y")
+            key=lambda x: datetime.strptime(x.get("collectionDate"), date_format)
         )
-
         return bindata

From 7e83de200eb33302319721e5288149bb3e3bb2db Mon Sep 17 00:00:00 2001
From: makemelegal <makemelegal@hotmail.com>
Date: Thu, 19 Feb 2026 20:47:00 +0000
Subject: [PATCH 2/2] Update CumberlandCouncil.py

further feedback from AI agent. this fixes the issues flagged and builds on previous commit
---
 .../councils/CumberlandCouncil.py             | 103 ++++++++++++------
 1 file changed, 72 insertions(+), 31 deletions(-)

diff --git a/uk_bin_collection/uk_bin_collection/councils/CumberlandCouncil.py b/uk_bin_collection/uk_bin_collection/councils/CumberlandCouncil.py
index bbe926635a..8ef94a0d5f 100644
--- a/uk_bin_collection/uk_bin_collection/councils/CumberlandCouncil.py
+++ b/uk_bin_collection/uk_bin_collection/councils/CumberlandCouncil.py
@@ -1,8 +1,15 @@
 import requests
 from bs4 import BeautifulSoup
+
 from uk_bin_collection.uk_bin_collection.common import *
 from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
 
+# Module-level constant so the month list is defined once and never duplicated.
+_MONTH_NAMES = [
+    "January", "February", "March", "April", "May", "June",
+    "July", "August", "September", "October", "November", "December",
+]
+
 
 class CouncilClass(AbstractGetBinDataClass):
     """
@@ -17,7 +24,10 @@ def parse_data(self, page: str, **kwargs) -> dict:
         bindata = {"bins": []}
 
         # Direct URL to the bin collection schedule using UPRN
-        url = f"https://www.cumberland.gov.uk/bins-recycling-and-street-cleaning/waste-collections/bin-collection-schedule/view/{user_uprn}"
+        url = (
+            f"https://www.cumberland.gov.uk/bins-recycling-and-street-cleaning/"
+            f"waste-collections/bin-collection-schedule/view/{user_uprn}"
+        )
 
         # Fetch the page
         response = requests.get(url)
@@ -32,39 +42,74 @@ def parse_data(self, page: str, **kwargs) -> dict:
         text_content = content_region.get_text()
         lines = [line.strip() for line in text_content.split("\n") if line.strip()]
 
+        # ------------------------------------------------------------------ #
+        # The heading is split across multiple lines, e.g.:
+        #   "Collection calendar:"
+        #   "February"
+        #   "to"
+        #   "August"
+        #   "2026"
+        #
+        # We find "Collection calendar:" then scan the following lines to
+        # extract the start month, end month, and year.
+        #
+        # For same-year calendars (start month <= end month, e.g. Feb-Aug 2026)
+        # every month gets calendar_year.
+        #
+        # For cross-year calendars (start month > end month, e.g. Nov-Mar 2026)
+        # months >= start_month_num get (calendar_year - 1) and months
+        # < start_month_num get calendar_year.
+        # ------------------------------------------------------------------ #
+        calendar_year = None
+        start_month_num = None
+        end_month_num = None
+
+        for i, line in enumerate(lines):
+            if line.strip().startswith("Collection calendar"):
+                for j in range(i + 1, min(i + 6, len(lines))):
+                    if lines[j] in _MONTH_NAMES:
+                        if start_month_num is None:
+                            start_month_num = _MONTH_NAMES.index(lines[j]) + 1
+                        else:
+                            end_month_num = _MONTH_NAMES.index(lines[j]) + 1
+                    if lines[j].isdigit() and len(lines[j]) == 4:
+                        calendar_year = int(lines[j])
+                break
+
+        if calendar_year is None:
+            raise ValueError(
+                "Could not determine collection year from 'Collection calendar' heading. "
+                "Page format may have changed."
+            )
+
+        is_same_year = (
+            start_month_num is None
+            or end_month_num is None
+            or end_month_num >= start_month_num
+        )
+
         current_month = None
-        current_year = datetime.now().year
-        previous_month_num = 0
+        current_year = calendar_year
         i = 0
 
-        # Determine the base year from the page heading, e.g.
-        # "Collection calendar: February to August 2026"
-        # This is more reliable than checking whether "2026" appears anywhere
-        # in the page, which broke the year assignment for all non-Jan/Feb months.
-        for line in lines:
-            if "Collection calendar" in line:
-                for word in reversed(line.split()):
-                    if word.isdigit() and len(word) == 4:
-                        current_year = int(word)
-                        break
-                break
-
         while i < len(lines):
             line = lines[i]
 
             # Check if this is a month name
-            if line in [
-                "January", "February", "March", "April", "May", "June",
-                "July", "August", "September", "October", "November", "December",
-            ]:
+            if line in _MONTH_NAMES:
                 month_num = datetime.strptime(line, "%B").month
 
-                # If months go backwards (e.g. December -> January),
-                # we have crossed into the next year
-                if month_num < previous_month_num:
-                    current_year += 1
+                if is_same_year:
+                    current_year = calendar_year
+                else:
+                    # Cross-year: months on or after the start month belong to
+                    # the year before the heading year
+                    current_year = (
+                        calendar_year - 1
+                        if month_num >= start_month_num
+                        else calendar_year
+                    )
 
-                previous_month_num = month_num
                 current_month = line
                 i += 1
                 continue
@@ -73,19 +118,15 @@ def parse_data(self, page: str, **kwargs) -> dict:
             if line.isdigit() and 1 <= int(line) <= 31 and current_month:
                 day = line
 
-                # Next line should be the bin type
                 if i + 1 < len(lines):
                     bin_type = lines[i + 1]
 
-                    # Skip the subtype line (e.g. Refuse, Recycling, Paper, Green)
-                    # A subtype is any line that is neither a digit nor a month name
+                    # Skip the subtype line (e.g. Paper, Recycling, Refuse, Green).
+                    # A subtype is any line that is neither a digit nor a month name.
                     if (
                         i + 2 < len(lines)
                         and not lines[i + 2].isdigit()
-                        and lines[i + 2] not in [
-                            "January", "February", "March", "April", "May", "June",
-                            "July", "August", "September", "October", "November", "December",
-                        ]
+                        and lines[i + 2] not in _MONTH_NAMES
                     ):
                         i += 1