openclimatefix · AswaniSahoo · Feb 16, 2026 · Copilot · Feb 16, 2026 · Copilot
diff --git a/src/open_data_pvnet/scripts/fetch_elia_data.py b/src/open_data_pvnet/scripts/fetch_elia_data.py
@@ -0,0 +1,161 @@
+import logging
+from typing import Optional, List
+
+import pandas as pd
+import requests
+import xarray as xr
+
+logger = logging.getLogger(__name__)
+
+
+class EliaData:
+    """
+    Class to handle interactions with the Elia (Belgium TSO) Open Data API.
+
+    Elia provides public solar generation data via the Opendatasoft platform.
+    No API key is required.
+
+    Reference: https://opendata.elia.be/explore/dataset/ods087/
+    """
+
+    def __init__(self) -> None:
+        self.base_url = (
+            "https://opendata.elia.be/api/explore/v2.1/catalog/datasets"
+        )
+        self.default_dataset = "ods087"
+
+    def get_data(
+        self,
+        start_date: str,
+        end_date: str,
+        dataset: str = "ods087",
+        limit: int = 100,
+    ) -> Optional[pd.DataFrame]:
+        """
+        Fetch solar generation data from the Elia Open Data API.
+
+        Automatically paginates through all available results for the
+        requested date range.
+
+        Args:
+            start_date: Start date string (YYYY-MM-DD)
+            end_date: End date string (YYYY-MM-DD)
+            dataset: Elia dataset identifier (default: ods087 for solar PV)
+            limit: Number of records per API page (max 100)
+
+        Returns:
+            pd.DataFrame with solar generation records, or None if error/empty
+        """
+        url = f"{self.base_url}/{dataset}/records"
+
+        where_clause = (
+            f"datetime >= '{start_date}T00:00:00Z' "
+            f"AND datetime <= '{end_date}T23:59:59Z'"
+        )
+
+        params = {
+            "where": where_clause,
+            "order_by": "datetime ASC",
+            "limit": limit,
+            "offset": 0,
+        }
+
+        all_data: List[dict] = []
+        current_offset = 0
+
+        try:
+            while True:
+                # Create a fresh copy to avoid mutating the original params
+                request_params = params.copy()
+                request_params["offset"] = current_offset
+
+                logger.info(
+                    f"Fetching data from {url}, offset={current_offset}..."
+                )
+                response = requests.get(url, params=request_params)
+                response.raise_for_status()
+
+                payload = response.json()
+                results = payload.get("results", [])
+
+                if not results:
+                    logger.info("No more data returned from API.")
+                    break
+
+                all_data.extend(results)
+
+                if len(results) < limit:
+                    break
+
+                current_offset += limit
+
+            if not all_data:
+                logger.warning("No data retrieved.")
+                return None
+
+            return pd.DataFrame(all_data)
+
+        except requests.exceptions.RequestException as e:
+            logger.error(f"Request failed: {e}")
+            if "response" in locals() and response is not None:
+                logger.error(f"Response: {response.text}")
+            return None
+
+    def get_dataset(
+        self,
+        start_date: str,
+        end_date: str,
+        dataset: str = "ods087",
+    ) -> Optional[xr.Dataset]:
+        """
+        Fetch data and convert to xarray Dataset compatible with ocf-data-sampler.
+
+        Args:
+            start_date: Start date string (YYYY-MM-DD)
+            end_date: End date string (YYYY-MM-DD)
+            dataset: Elia dataset identifier
+
+        Returns:
+            xr.Dataset with datetime_utc index, or None if no data
+        """
+        df = self.get_data(
+            start_date=start_date,
+            end_date=end_date,
+            dataset=dataset,
+        )
+
+        if df is None or df.empty:
+            return None
+
+        # Convert datetime column to proper UTC datetime
+        if "datetime" in df.columns:
+            df["datetime_utc"] = pd.to_datetime(df["datetime"], utc=True)
+            df = df.drop(columns=["datetime"])
+
+        # Select numeric columns for the dataset
+        value_cols = [
+            c
+            for c in df.columns
+            if c not in ("datetime_utc", "resolutioncode", "mostrecent")
-            if c not in ("datetime_utc", "resolutioncode", "mostrecent")
+            if c not in (
+                "datetime_utc",
+                "resolutioncode",
+                "mostrecent",
+                "mostrecentforecast",
+            )
-            if c not in ("datetime_utc", "resolutioncode", "mostrecent")
+            if c not in (
+                "datetime_utc",
+                "resolutioncode",
+                "mostrecent",
+                "mostrecentforecast",
+            )
+        ]
+
+        # Ensure numeric conversion for value columns
+        for col in value_cols:
+            df[col] = pd.to_numeric(df[col], errors="coerce")
+
+        # Drop duplicates and set index
+        df = df.drop_duplicates(subset=["datetime_utc"])
+        df = df.set_index("datetime_utc")
+
+        ds = xr.Dataset.from_dataframe(df)
+
+        return ds
+
+
+if __name__ == "__main__":
+    logging.basicConfig(level=logging.INFO)
+    elia = EliaData()
+    print(
+        "EliaData initialized. "
+        "Call get_data(start_date, end_date) to fetch Belgium solar data."
+    )
diff --git a/tests/test_elia.py b/tests/test_elia.py
@@ -0,0 +1,177 @@
+import pytest
+import pandas as pd
+from unittest.mock import Mock, patch
+
+from open_data_pvnet.scripts.fetch_elia_data import EliaData
+
+
+@pytest.fixture
+def mock_response():
+    """Fixture to mock a successful Elia API response."""
+    mock = Mock()
+    mock.json.return_value = {
+        "results": [
+            {
+                "datetime": "2024-06-15T12:00:00+00:00",
+                "measured": 2500.0,
+                "mostrecentforecast": 2450.0,
+                "monitoredcapacity": 7500.0,
+                "resolutioncode": "PT15M",
+            },
+            {
+                "datetime": "2024-06-15T12:15:00+00:00",
+                "measured": 2520.0,
+                "mostrecentforecast": 2460.0,
+                "monitoredcapacity": 7500.0,
+                "resolutioncode": "PT15M",
+            },
+        ]
+    }
+    mock.raise_for_status.return_value = None
+    return mock
+
+
+def test_init():
+    """EliaData should initialize without any API key."""
+    elia = EliaData()
+    assert elia.base_url == (
+        "https://opendata.elia.be/api/explore/v2.1/catalog/datasets"
+    )
+    assert elia.default_dataset == "ods087"
+
+
+def test_get_data_success(mock_response):
+    """Should return a DataFrame with solar generation data."""
+    with patch("requests.get", return_value=mock_response) as mock_get:
+        elia = EliaData()
+
+        df = elia.get_data(
+            start_date="2024-06-15",
+            end_date="2024-06-15",
+        )
+
+        assert isinstance(df, pd.DataFrame)
+        assert len(df) == 2
+        assert "measured" in df.columns
+        assert "datetime" in df.columns
+
+        # Verify API call was made
+        mock_get.assert_called_once()
+        _, kwargs = mock_get.call_args
+        assert "ods087" in kwargs["params"]["where"] or "ods087" in _[0]
-        _, kwargs = mock_get.call_args
-        assert "ods087" in kwargs["params"]["where"] or "ods087" in _[0]
+        args, kwargs = mock_get.call_args
+        assert "ods087" in args[0]
-        _, kwargs = mock_get.call_args
-        assert "ods087" in kwargs["params"]["where"] or "ods087" in _[0]
+        args, kwargs = mock_get.call_args
+        assert "ods087" in args[0]
+
+
+def test_get_data_custom_dataset(mock_response):
+    """Should use the specified dataset ID in the API URL."""
+    with patch("requests.get", return_value=mock_response) as mock_get:
+        elia = EliaData()
+        elia.get_data("2024-01-01", "2024-01-01", dataset="ods088")
+
+        args, _ = mock_get.call_args
+        assert "ods088" in args[0]
+
+
+def test_get_data_empty_response():
+    """Should return None when no data is available."""
+    mock_resp = Mock()
+    mock_resp.json.return_value = {"results": []}
+    mock_resp.raise_for_status.return_value = None
+
+    with patch("requests.get", return_value=mock_resp):
+        elia = EliaData()
+        df = elia.get_data("2024-06-15", "2024-06-15")
+        assert df is None
+
+
+def test_get_data_api_error():
+    """Should return None on API errors."""
+    mock_resp = Mock()
+    import requests as req_lib
+
+    mock_resp.raise_for_status.side_effect = req_lib.exceptions.HTTPError(
+        "API Error"
+    )
+
+    with patch("requests.get", return_value=mock_resp):
+        elia = EliaData()
+        df = elia.get_data("2024-06-15", "2024-06-15")
+        assert df is None
+
+
+def test_get_data_pagination():
+    """Should auto-paginate through all available data."""
+    page1 = {
+        "results": [
+            {"datetime": "2024-06-15T12:00:00+00:00", "measured": 2500.0},
+            {"datetime": "2024-06-15T12:15:00+00:00", "measured": 2520.0},
+        ]
+    }
+    page2 = {
+        "results": [
+            {"datetime": "2024-06-15T12:30:00+00:00", "measured": 2510.0},
+        ]
+    }
+
+    mock_resp1 = Mock()
+    mock_resp1.json.return_value = page1
+    mock_resp1.raise_for_status.return_value = None
+
+    mock_resp2 = Mock()
+    mock_resp2.json.return_value = page2
+    mock_resp2.raise_for_status.return_value = None
+
+    with patch(
+        "requests.get", side_effect=[mock_resp1, mock_resp2]
+    ) as mock_get:
+        elia = EliaData()
+
+        df = elia.get_data("2024-06-15", "2024-06-15", limit=2)
+
+        assert len(df) == 3
+        assert mock_get.call_count == 2
+
+        call_args_list = mock_get.call_args_list
+        assert call_args_list[0][1]["params"]["offset"] == 0
+        assert call_args_list[1][1]["params"]["offset"] == 2
+
+
+def test_get_dataset_success(mock_response):
+    """Should return an xarray Dataset with datetime_utc index."""
+    import xarray as xr
+
+    with patch("requests.get", return_value=mock_response):
+        elia = EliaData()
+
+        ds = elia.get_dataset(
+            start_date="2024-06-15",
+            end_date="2024-06-15",
+        )
+
+        assert isinstance(ds, xr.Dataset)
+        assert "datetime_utc" in ds.coords or "datetime_utc" in ds.indexes
+        assert "measured" in ds.data_vars
+        assert len(ds.datetime_utc) == 2
+
+
+def test_get_dataset_empty():
+    """Should return None when no data is available."""
+    mock_resp = Mock()
+    mock_resp.json.return_value = {"results": []}
+    mock_resp.raise_for_status.return_value = None
+
+    with patch("requests.get", return_value=mock_resp):
+        elia = EliaData()
+        ds = elia.get_dataset("2024-06-15", "2024-06-15")
+        assert ds is None
+
+
+def test_get_data_date_filtering(mock_response):
+    """Should pass correct date range in the API where clause."""
+    with patch("requests.get", return_value=mock_response) as mock_get:
+        elia = EliaData()
+        elia.get_data("2024-06-15", "2024-06-16")
+
+        _, kwargs = mock_get.call_args
+        where = kwargs["params"]["where"]
+        assert "2024-06-15" in where
+        assert "2024-06-16" in where