Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
161 changes: 161 additions & 0 deletions src/open_data_pvnet/scripts/fetch_elia_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
import logging
from typing import Optional, List

import pandas as pd
import requests
import xarray as xr

logger = logging.getLogger(__name__)


class EliaData:
"""
Class to handle interactions with the Elia (Belgium TSO) Open Data API.

Elia provides public solar generation data via the Opendatasoft platform.
No API key is required.

Reference: https://opendata.elia.be/explore/dataset/ods087/
"""

def __init__(self) -> None:
self.base_url = (
"https://opendata.elia.be/api/explore/v2.1/catalog/datasets"
)
self.default_dataset = "ods087"

def get_data(
self,
start_date: str,
end_date: str,
dataset: str = "ods087",
limit: int = 100,
) -> Optional[pd.DataFrame]:
Comment on lines +21 to +33
Copy link

Copilot AI Feb 16, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

default_dataset is set on the instance but get_data()/get_dataset() hard-code their own default dataset="ods087", so the class has two sources of truth. Consider making dataset optional (default None) and falling back to self.default_dataset, or otherwise referencing self.default_dataset so future updates don’t drift.

Copilot uses AI. Check for mistakes.
"""
Fetch solar generation data from the Elia Open Data API.

Automatically paginates through all available results for the
requested date range.

Args:
start_date: Start date string (YYYY-MM-DD)
end_date: End date string (YYYY-MM-DD)
dataset: Elia dataset identifier (default: ods087 for solar PV)
limit: Number of records per API page (max 100)

Returns:
pd.DataFrame with solar generation records, or None if error/empty
"""
url = f"{self.base_url}/{dataset}/records"

where_clause = (
f"datetime >= '{start_date}T00:00:00Z' "
f"AND datetime <= '{end_date}T23:59:59Z'"
)

params = {
"where": where_clause,
"order_by": "datetime ASC",
"limit": limit,
"offset": 0,
}
Comment on lines +32 to +61
Copy link

Copilot AI Feb 16, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The docstring says limit is "max 100" but the method doesn’t enforce it. If callers pass a higher value, the API may error; consider validating/clamping limit (and also ensuring it’s > 0) so pagination logic remains reliable.

Copilot uses AI. Check for mistakes.

all_data: List[dict] = []
current_offset = 0

try:
while True:
# Create a fresh copy to avoid mutating the original params
request_params = params.copy()
request_params["offset"] = current_offset

logger.info(
f"Fetching data from {url}, offset={current_offset}..."
)
response = requests.get(url, params=request_params)
response.raise_for_status()

payload = response.json()
results = payload.get("results", [])

if not results:
logger.info("No more data returned from API.")
break

all_data.extend(results)

if len(results) < limit:
break

current_offset += limit

if not all_data:
logger.warning("No data retrieved.")
return None

return pd.DataFrame(all_data)

except requests.exceptions.RequestException as e:
logger.error(f"Request failed: {e}")
if "response" in locals() and response is not None:
logger.error(f"Response: {response.text}")
return None

def get_dataset(
self,
start_date: str,
end_date: str,
dataset: str = "ods087",
) -> Optional[xr.Dataset]:
"""
Fetch data and convert to xarray Dataset compatible with ocf-data-sampler.

Args:
start_date: Start date string (YYYY-MM-DD)
end_date: End date string (YYYY-MM-DD)
dataset: Elia dataset identifier

Returns:
xr.Dataset with datetime_utc index, or None if no data
"""
df = self.get_data(
start_date=start_date,
end_date=end_date,
dataset=dataset,
)

if df is None or df.empty:
return None

# Convert datetime column to proper UTC datetime
if "datetime" in df.columns:
df["datetime_utc"] = pd.to_datetime(df["datetime"], utc=True)
df = df.drop(columns=["datetime"])

# Select numeric columns for the dataset
value_cols = [
c
for c in df.columns
if c not in ("datetime_utc", "resolutioncode", "mostrecent")
Copy link

Copilot AI Feb 16, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The exclusion list for value_cols includes "mostrecent", but the API field used in the tests/fixtures is "mostrecentforecast". As written, this filter won’t exclude the forecast column (and may indicate a typo/unused exclusion). Align the exclusion list with actual field names or remove the special-casing to avoid silently including/unintentionally coercing the wrong columns.

Suggested change
if c not in ("datetime_utc", "resolutioncode", "mostrecent")
if c not in (
"datetime_utc",
"resolutioncode",
"mostrecent",
"mostrecentforecast",
)

Copilot uses AI. Check for mistakes.
]

# Ensure numeric conversion for value columns
for col in value_cols:
df[col] = pd.to_numeric(df[col], errors="coerce")

# Drop duplicates and set index
df = df.drop_duplicates(subset=["datetime_utc"])
df = df.set_index("datetime_utc")

ds = xr.Dataset.from_dataframe(df)

return ds


if __name__ == "__main__":
logging.basicConfig(level=logging.INFO)
elia = EliaData()
print(
"EliaData initialized. "
"Call get_data(start_date, end_date) to fetch Belgium solar data."
)
177 changes: 177 additions & 0 deletions tests/test_elia.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
import pytest
import pandas as pd
from unittest.mock import Mock, patch

from open_data_pvnet.scripts.fetch_elia_data import EliaData


@pytest.fixture
def mock_response():
"""Fixture to mock a successful Elia API response."""
mock = Mock()
mock.json.return_value = {
"results": [
{
"datetime": "2024-06-15T12:00:00+00:00",
"measured": 2500.0,
"mostrecentforecast": 2450.0,
"monitoredcapacity": 7500.0,
"resolutioncode": "PT15M",
},
{
"datetime": "2024-06-15T12:15:00+00:00",
"measured": 2520.0,
"mostrecentforecast": 2460.0,
"monitoredcapacity": 7500.0,
"resolutioncode": "PT15M",
},
]
}
mock.raise_for_status.return_value = None
return mock


def test_init():
"""EliaData should initialize without any API key."""
elia = EliaData()
assert elia.base_url == (
"https://opendata.elia.be/api/explore/v2.1/catalog/datasets"
)
assert elia.default_dataset == "ods087"


def test_get_data_success(mock_response):
"""Should return a DataFrame with solar generation data."""
with patch("requests.get", return_value=mock_response) as mock_get:
elia = EliaData()

df = elia.get_data(
start_date="2024-06-15",
end_date="2024-06-15",
)

assert isinstance(df, pd.DataFrame)
assert len(df) == 2
assert "measured" in df.columns
assert "datetime" in df.columns

# Verify API call was made
mock_get.assert_called_once()
_, kwargs = mock_get.call_args
assert "ods087" in kwargs["params"]["where"] or "ods087" in _[0]
Comment on lines +60 to +61
Copy link

Copilot AI Feb 16, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This assertion mixes checking the dataset ID in the URL with checking it in the where clause, but the dataset ID shouldn’t appear in where. As an OR, it also weakens the test (it could pass even if the URL is wrong). Consider asserting explicitly on the requested URL/path (and keep where assertions focused on date filtering).

Suggested change
_, kwargs = mock_get.call_args
assert "ods087" in kwargs["params"]["where"] or "ods087" in _[0]
args, kwargs = mock_get.call_args
assert "ods087" in args[0]

Copilot uses AI. Check for mistakes.


def test_get_data_custom_dataset(mock_response):
"""Should use the specified dataset ID in the API URL."""
with patch("requests.get", return_value=mock_response) as mock_get:
elia = EliaData()
elia.get_data("2024-01-01", "2024-01-01", dataset="ods088")

args, _ = mock_get.call_args
assert "ods088" in args[0]


def test_get_data_empty_response():
"""Should return None when no data is available."""
mock_resp = Mock()
mock_resp.json.return_value = {"results": []}
mock_resp.raise_for_status.return_value = None

with patch("requests.get", return_value=mock_resp):
elia = EliaData()
df = elia.get_data("2024-06-15", "2024-06-15")
assert df is None


def test_get_data_api_error():
"""Should return None on API errors."""
mock_resp = Mock()
import requests as req_lib

mock_resp.raise_for_status.side_effect = req_lib.exceptions.HTTPError(
"API Error"
)

with patch("requests.get", return_value=mock_resp):
elia = EliaData()
df = elia.get_data("2024-06-15", "2024-06-15")
assert df is None


def test_get_data_pagination():
"""Should auto-paginate through all available data."""
page1 = {
"results": [
{"datetime": "2024-06-15T12:00:00+00:00", "measured": 2500.0},
{"datetime": "2024-06-15T12:15:00+00:00", "measured": 2520.0},
]
}
page2 = {
"results": [
{"datetime": "2024-06-15T12:30:00+00:00", "measured": 2510.0},
]
}

mock_resp1 = Mock()
mock_resp1.json.return_value = page1
mock_resp1.raise_for_status.return_value = None

mock_resp2 = Mock()
mock_resp2.json.return_value = page2
mock_resp2.raise_for_status.return_value = None

with patch(
"requests.get", side_effect=[mock_resp1, mock_resp2]
) as mock_get:
elia = EliaData()

df = elia.get_data("2024-06-15", "2024-06-15", limit=2)

assert len(df) == 3
assert mock_get.call_count == 2

call_args_list = mock_get.call_args_list
assert call_args_list[0][1]["params"]["offset"] == 0
assert call_args_list[1][1]["params"]["offset"] == 2


def test_get_dataset_success(mock_response):
"""Should return an xarray Dataset with datetime_utc index."""
import xarray as xr

with patch("requests.get", return_value=mock_response):
elia = EliaData()

ds = elia.get_dataset(
start_date="2024-06-15",
end_date="2024-06-15",
)

assert isinstance(ds, xr.Dataset)
assert "datetime_utc" in ds.coords or "datetime_utc" in ds.indexes
assert "measured" in ds.data_vars
assert len(ds.datetime_utc) == 2


def test_get_dataset_empty():
"""Should return None when no data is available."""
mock_resp = Mock()
mock_resp.json.return_value = {"results": []}
mock_resp.raise_for_status.return_value = None

with patch("requests.get", return_value=mock_resp):
elia = EliaData()
ds = elia.get_dataset("2024-06-15", "2024-06-15")
assert ds is None


def test_get_data_date_filtering(mock_response):
"""Should pass correct date range in the API where clause."""
with patch("requests.get", return_value=mock_response) as mock_get:
elia = EliaData()
elia.get_data("2024-06-15", "2024-06-16")

_, kwargs = mock_get.call_args
where = kwargs["params"]["where"]
assert "2024-06-15" in where
assert "2024-06-16" in where
Loading