diff --git a/backend/ibex/core/data_manipulation_methods.py b/backend/ibex/core/data_manipulation_methods.py new file mode 100644 index 00000000..74696422 --- /dev/null +++ b/backend/ibex/core/data_manipulation_methods.py @@ -0,0 +1,223 @@ +from typing import Optional +from enum import Enum +from pydantic import BaseModel + + +class InterpolationMethod(str, Enum): + EXACT_VALUE = "exact_value" + LINEAR = "linear" + NEAREST = "nearest" + + +class SmoothingMethod(str, Enum): + GAUSSIAN_FILTER = "gaussian_filter" + SAVITZKY_GOLAY_FILTER = "savitzky-golay_filter" + + +class AdditionalParameter(BaseModel): + """ + Parameters for specific data manipulation methods. E.g. sigma -> for Gaussian smoothing + """ + + name: str + human_readable_name: str + description: str + possible_values: Optional[list[str]] = None + + +class PossibleValue(BaseModel): + """ + Possible value for data manipulation method parameter. E.g. Gaussian smoothing + """ + + value: str + description: str + additional_parameters: Optional[list[AdditionalParameter]] = None + + +class DataManipulationParameter(BaseModel): + """ + E.g. interpolate_over, type of filtering, binary operation + """ + + human_readable_name: str + name: str + description: str + type: str + default: Optional[str] = None + possible_values: Optional[list[PossibleValue]] = None + group_label: Optional[str] = None + fields: Optional[list["DataManipulationParameter"]] = None + + +class DataManipulationOperation(BaseModel): + """ + Type of data manipulation method. E.g. data-smoothing, data-interpolation, binary-operation + """ + + name: str + description: str + method_parameters: list[DataManipulationParameter] + + +class DataManipulationMethodsResponse(BaseModel): + data_manipulation_methods: list[DataManipulationOperation] + + +DataManipulationParameter.model_rebuild() +available_methods = DataManipulationMethodsResponse(data_manipulation_methods=[]) + +# ====================== DATA INTERPOLATION ====================== + +data_interpolation_description = DataManipulationOperation( + name="Data interpolation", + description="Operation performed in order to represent dataset over different set of coordinates", + method_parameters=[], +) + +# ====================== DATA INTERPOLATION PARAMETERS ====================== + +data_interpolation_interpolate_over_parameter = DataManipulationParameter( + human_readable_name="Interpolate over", + name="interpolate_over", + description="List of URIs to gather coordinates from, for interpolation", + type="list[string]", +) + +data_interpolation_method_parameter = DataManipulationParameter( + human_readable_name="Interpolation method", + name="interpolation_method", + description="Method used during data interpolation", + type="string", + default=InterpolationMethod.EXACT_VALUE, + possible_values=[ + PossibleValue( + value=InterpolationMethod.EXACT_VALUE, + description="values are present only on data points where they were originally. Rest of the data grid is filled with NaNs", + ), + PossibleValue( + value=InterpolationMethod.LINEAR, + description="see scipy.interpolate.RegularGridInterpolator documentation", + ), + PossibleValue( + value=InterpolationMethod.NEAREST, + description="see scipy.interpolate.RegularGridInterpolator documentation", + ), + ], +) + +data_interpolation_description.method_parameters.append(data_interpolation_interpolate_over_parameter) +data_interpolation_description.method_parameters.append(data_interpolation_method_parameter) +available_methods.data_manipulation_methods.append(data_interpolation_description) + +# ====================== DATA SMOOTHING ====================== + +data_smoothing_description = DataManipulationOperation( + name="Data smoothing/denoising", + description="Operation performed in order to eliminate noise from data", + method_parameters=[], +) + +# ====================== DATA SMOOTHING PARAMETERS ====================== + +data_smoothing_method_parameter = DataManipulationParameter( + human_readable_name="Smoothing method", + name="smoothing_method", + description="Method to be used in data smoothing process", + type="string", + possible_values=[ + PossibleValue( + value=SmoothingMethod.GAUSSIAN_FILTER, + description="see scipy.ndimage.gaussian_filter documentation", + additional_parameters=[ + AdditionalParameter( + name="gaussian_smoothing_sigma", + human_readable_name="Sigma", + description="Standard deviation for Gaussian kernel.", + ) + ], + ), + PossibleValue( + value=SmoothingMethod.SAVITZKY_GOLAY_FILTER, + description="see scipy.signal.savgol_filter documentation", + additional_parameters=[ + AdditionalParameter( + name="savgol_smoothing_window_length", + human_readable_name="Window length", + description="The length of the filter window (i.e., the number of coefficients). If mode is ‘interp’, window_length must be less than or equal to the size of x.", + ), + AdditionalParameter( + name="savgol_smoothing_polyorder", + human_readable_name="Polyorder", + description="The order of the polynomial used to fit the samples. polyorder must be less than window_length.", + ), + AdditionalParameter( + name="savgol_smoothing_deriv", + human_readable_name="Deriv", + description="The order of the derivative to compute. This must be a nonnegative integer. The default is 0, which means to filter the data without differentiating.", + ), + AdditionalParameter( + name="savgol_smoothing_delta", + human_readable_name="Window delta", + description="The spacing of the samples to which the filter will be applied. This is only used if deriv > 0. Default is 1.0.", + ), + AdditionalParameter( + name="savgol_smoothing_mode", + human_readable_name="Mode", + description="This determines the type of extension to use for the padded signal to which the filter is applied.", + possible_values=["mirror", "constant", "nearest", "wrap", "interp"], + ), + AdditionalParameter( + name="savgol_smoothing_cval", + human_readable_name="C-Val", + description="Value to fill past the edges of the input if mode is ‘constant’. Default is 0.0.", + ), + ], + ), + ], +) + +data_smoothing_description.method_parameters.append(data_smoothing_method_parameter) +available_methods.data_manipulation_methods.append(data_smoothing_description) + +# ====================== SIMPLE DATA OPERATIONS ====================== + +simple_data_operations_description = DataManipulationOperation( + name="Simple Data Operations", + description="Ordered list of scalar operations applied to the dataset. " + "Execution order is determined by the order of parameters in the request.", + method_parameters=[], +) + +data_operations_parameter = DataManipulationParameter( + human_readable_name="Operations", + name="operations", + description="Ordered list of scalar operations applied to every data point.", + type="list[object]", + group_label="Operation", + fields=[ + DataManipulationParameter( + human_readable_name="Type", + name="operation_type", + description="Type of operation", + type="string", + possible_values=[ + PossibleValue(value="add", description="Addition"), + PossibleValue(value="sub", description="Subtraction"), + PossibleValue(value="mul", description="Multiplication"), + PossibleValue(value="div", description="Division"), + PossibleValue(value="pow", description="Exponentiation"), + PossibleValue(value="root", description="Nth root"), + ], + ), + DataManipulationParameter( + human_readable_name="Value", + name="operation_value", + description="Scalar value for the operation", + type="number", + ), + ], +) + +simple_data_operations_description.method_parameters.append(data_operations_parameter) +available_methods.data_manipulation_methods.append(simple_data_operations_description) diff --git a/backend/ibex/core/ibex_service.py b/backend/ibex/core/ibex_service.py index 63c287de..b1354e39 100644 --- a/backend/ibex/core/ibex_service.py +++ b/backend/ibex/core/ibex_service.py @@ -3,11 +3,12 @@ import time from pathlib import Path from functools import wraps # for measure_execution_time() -from typing import Any, Callable, Optional, Sequence, List +from typing import Any, Callable, Optional, Sequence from ibex.data_source.imas_python_source import IMASPythonSource from ibex.data_source.exception import CannotGenerateUriException from ibex.core.utils import IMAS_URI +from ibex.endpoints.schemas.request_data_schemas import PlotDataRequestModel # helper decorator used during development @@ -114,21 +115,5 @@ def get_multiple_node_data(uri: str) -> dict: ) -def get_plot_data( - uri: str, - interpolate_over: List[str] | None, - interpolation_method: str | None, - downsampling_method: str | None, - downsampled_size: int, -) -> dict: - uri_obj = IMAS_URI(uri) - return data_source.get_plot_data( - uri=uri_obj.uri_entry_identifiers, - ids=uri_obj.ids_name, - node_path=uri_obj.node_path, - occurrence=uri_obj.occurrence, - interpolate_over=interpolate_over, - interpolation_method=interpolation_method, - downsampling_method=downsampling_method, - downsampled_size=downsampled_size, - ) +def get_plot_data(plot_data_query: PlotDataRequestModel) -> dict: + return data_source.get_plot_data(plot_data_query) diff --git a/backend/ibex/data_source/data_source_interface.py b/backend/ibex/data_source/data_source_interface.py index e457258d..7c8f6219 100644 --- a/backend/ibex/data_source/data_source_interface.py +++ b/backend/ibex/data_source/data_source_interface.py @@ -1,7 +1,8 @@ """Interface for all data sources""" from abc import ABC, abstractmethod -from typing import Sequence, Optional, List +from typing import Sequence, Optional +from ibex.endpoints.schemas.request_data_schemas import PlotDataRequestModel class DataSourceInterface(ABC): @@ -123,28 +124,12 @@ def list_db_entries( """ ... - def get_plot_data( - self, - uri: str, - ids: str, - node_path: str, - occurrence: int = 0, - interpolate_over: List[str] | None = None, - interpolation_method: str | None = None, - downsampling_method: str | None = None, - downsampled_size: int = 1000, - ) -> dict: + def get_plot_data(self, plot_data_query: PlotDataRequestModel) -> dict: """ Returns all data used to plot selected quantity. Result contains data values, metadata and coordinates. - :param uri: imas URI - :param ids: name of ids e.g. core_profiles - :param node_path: path to ids node e.g. ids_properties/version_put - :param occurrence: ids occurrence number - :param interpolate_over: list of uris used in interpolation - :param interpolation_method: method to be used in data interpolation; one from scipy.interpolate.RegularGridInterpolator or 'exact_value' - :param downsampling_method: one of the downsampling metods returend by :func:`~ibex.endpoints.info.downsampling_methods` endpoint, or None - :param downsampled_size: target size of downsampled data + :param plot_data_query: See :class:`ibex.endpoints.schemas.request_data_schemas.PlotDataRequestModel` + :type plot_data_query: :class:`ibex.endpoints.schemas.request_data_schemas.PlotDataRequestModel` :return: Dictionary containing data values, metadata and coordinates. """ ... diff --git a/backend/ibex/data_source/imas_python_source.py b/backend/ibex/data_source/imas_python_source.py index c16921ba..6a77c6ad 100644 --- a/backend/ibex/data_source/imas_python_source.py +++ b/backend/ibex/data_source/imas_python_source.py @@ -5,6 +5,7 @@ import imas # type: ignore import numpy as np # type: ignore import re # type: ignore +from copy import copy # type: ignore from idstools.database import DBMaster # type: ignore from imas.ids_metadata import IDSMetadata # type: ignore from imas.ids_primitive import ( @@ -46,7 +47,12 @@ flatten, expand, calculate_coordinate_shapes, + apply_savgol_filter, + apply_gaussian_filter, + apply_simple_operations, ) +from ibex.core.data_manipulation_methods import SmoothingMethod, InterpolationMethod +from ibex.endpoints.schemas.request_data_schemas import PlotDataRequestModel class IMASPythonSource(DataSourceInterface): @@ -592,31 +598,21 @@ def _check_data_is_leaf_node(self, data) -> None: elif isinstance(data, IDSStructure): raise NotALeafNodeException("Cannot serialize non-leaf node") - def get_plot_data( - self, - uri: str, - ids: str, - node_path: str, - occurrence: int = 0, - interpolate_over: List[str] | None = None, - interpolation_method: str | None = None, - downsampling_method: str | None = None, - downsampled_size: int = 1000, - ) -> dict: + def get_plot_data(self, plot_data_query: PlotDataRequestModel) -> dict: """ Returns all data used to plot selected quantity. Result contains data values, metadata and coordinates. - :param uri: imas URI - :param ids: name of ids e.g. core_profiles - :param node_path: path to ids node e.g. ids_properties/version_put - :param occurrence: ids occurrence number - :param interpolate_over: list of uris used in interpolation - :param interpolation_method: method to be used in data interpolation; one from scipy.interpolate.RegularGridInterpolator or 'exact_value' - :param downsampling_method: one of the downsampling metods returend by :func:`~ibex.endpoints.info.downsampling_methods` endpoint, or None - :param downsampled_size: target size of downsampled data + :param plot_data_query: See :class:`ibex.endpoints.schemas.request_data_schemas.PlotDataRequestModel` + :type plot_data_query: :class:`ibex.endpoints.schemas.request_data_schemas.PlotDataRequestModel` :return: Dictionary containing data values, metadata and coordinates. """ + uri_obj = IMAS_URI(plot_data_query.uri) + uri = uri_obj.uri_entry_identifiers + ids = uri_obj.ids_name + node_path = uri_obj.node_path + occurrence = uri_obj.occurrence + with self._open_entry(uri) as entry: ids_obj = self._get_ids_from_entry(entry, ids, occurrence) @@ -790,6 +786,41 @@ def get_plot_data( # FE expects data's first dimension to be connected with second dimension, thus this transformation data_to_be_returned = transform_2D_data(data_to_be_returned) + # ============= BEGIN simple operations ============ + + if plot_data_query.operations is not None: + data_to_be_returned = apply_simple_operations(data_to_be_returned, plot_data_query.operations) + + # ============= END simple operations ============= + + # ============= BEGIN data smoothing ============ + + if plot_data_query.smoothing_method is not None: + if first_value.metadata.ndim != 1: + raise InvalidParametersException("Data smoothing is only supported for 1D data") + if not coordinates_to_be_returned or coordinates_to_be_returned[0]["name"] != "time": + raise InvalidParametersException( + "Data smoothing is only supported when the first coordinate is time" + ) + + if plot_data_query.smoothing_method == SmoothingMethod.SAVITZKY_GOLAY_FILTER: + data_to_be_returned = apply_savgol_filter( + data_to_be_returned, + window_length=plot_data_query.savgol_smoothing_window_length, + polyorder=plot_data_query.savgol_smoothing_polyorder, + deriv=plot_data_query.savgol_smoothing_deriv, + delta=plot_data_query.savgol_smoothing_delta, + mode=plot_data_query.savgol_smoothing_mode, + cval=plot_data_query.savgol_smoothing_cval, + ) + + elif plot_data_query.smoothing_method == SmoothingMethod.GAUSSIAN_FILTER: + data_to_be_returned = apply_gaussian_filter( + data_to_be_returned, sigma=plot_data_query.gaussian_smoothing_sigma + ) + + # ============= END data smoothing ============= + # ============= BEGIN resample data onto new time vector ============= def convert_to_lists(data): @@ -800,7 +831,7 @@ def convert_to_lists(data): else: return data - if interpolate_over: + if plot_data_query.interpolate_over: # =================== GATHER ALL COORDINATES =================== original_coord_values = [] new_common_coords = coordinates_to_be_returned @@ -809,7 +840,7 @@ def convert_to_lists(data): original_coord_values.append(sorted(set(flatten(c["value"])))) original_coord_values.reverse() - for _uri in interpolate_over: + for _uri in plot_data_query.interpolate_over: _uri_obj = IMAS_URI(_uri) if _uri_obj.ids_name != ids or _uri_obj.node_path != node_path: @@ -817,14 +848,11 @@ def convert_to_lists(data): "IDS name and node path should be the same for source and target URI when interpolating data" ) - interpolate_to_coordinates = self.get_plot_data( - uri=_uri_obj.uri_entry_identifiers, - ids=_uri_obj.ids_name, - node_path=_uri_obj.node_path, - occurrence=_uri_obj.occurrence, - downsampling_method=downsampling_method, - downsampled_size=downsampled_size, - )["data"]["coordinates"] + new_plot_data_query = copy(plot_data_query) + new_plot_data_query.uri = _uri + new_plot_data_query.interpolate_over = None + new_plot_data_query.smoothing_method = None + interpolate_to_coordinates = self.get_plot_data(new_plot_data_query)["data"]["coordinates"] if len(interpolate_to_coordinates) != len(coordinates_to_be_returned): message = "Interpolation error. Source and target nodes have different number of coordinates." @@ -846,7 +874,10 @@ def convert_to_lists(data): data_to_be_returned = pad_to_rectangular(data_to_be_returned) # === run interpolation === - if interpolation_method == "exact_value" or not interpolation_method: + if ( + plot_data_query.interpolation_method == InterpolationMethod.EXACT_VALUE + or not plot_data_query.interpolation_method + ): data_to_be_returned = resample_data_without_interpolation( tuple(original_coord_values), data_to_be_returned, tuple(common_coords_values) ) @@ -855,7 +886,7 @@ def convert_to_lists(data): tuple(original_coord_values), data_to_be_returned, tuple(common_coords_values), - interpolation_method=interpolation_method, + interpolation_method=plot_data_query.interpolation_method, ) new_coordinate_shapes = calculate_coordinate_shapes( @@ -881,15 +912,17 @@ def convert_to_lists(data): # If coordinate targets node -> downsample coordinate as well coordinates_to_be_returned[0]["value"], data_to_be_returned = downsample_data( data_to_be_returned, - target_size=downsampled_size, - method=downsampling_method, + target_size=plot_data_query.downsampled_size, + method=plot_data_query.downsampling_method, x=coordinates_to_be_returned[0]["value"], single_x_axis=(coordinates_to_be_returned[0]["path"] == f"#{ids}/time"), ) else: _, data_to_be_returned = downsample_data( - data_to_be_returned, target_size=downsampled_size, method=downsampling_method + data_to_be_returned, + target_size=plot_data_query.downsampled_size, + method=plot_data_query.downsampling_method, ) # serialize coordinates and update shapes (they could be changed by downsampling) for c in coordinates_to_be_returned: diff --git a/backend/ibex/data_source/imas_python_source_utils.py b/backend/ibex/data_source/imas_python_source_utils.py index e43b29f3..4e22c52c 100644 --- a/backend/ibex/data_source/imas_python_source_utils.py +++ b/backend/ibex/data_source/imas_python_source_utils.py @@ -3,9 +3,108 @@ import numpy as np from imas.ids_primitive import IDSNumericArray from scipy.interpolate import RegularGridInterpolator +from scipy.ndimage import gaussian_filter +from scipy.signal import savgol_filter from ibex.data_source.exception import InvalidParametersException +def apply_savgol_filter( + data: list | np.ndarray, + window_length: int | None, + polyorder: int | None, + deriv: int | None, + delta: float | None, + mode: str | None, + cval: float | None, +): + """ + Apply Savitzky-Golay filer to data + :param data: The input array. + :param window_length: The length of the filter window (i.e., the number of coefficients). If mode is ‘interp’, window_length must be less than or equal to the size of x. + :param polyorder: The order of the polynomial used to fit the samples. polyorder must be less than window_length. + :param deriv: The order of the derivative to compute. This must be a nonnegative integer. The default is 0, which means to filter the data without differentiating. + :param delta: The spacing of the samples to which the filter will be applied. This is only used if deriv > 0. Default is 1.0. + :param mode: Must be ‘mirror’, ‘constant’, ‘nearest’, ‘wrap’ or ‘interp’. + :param cval: Value to fill past the edges of the input if mode is ‘constant’. Default is 0.0. + + :return: Data with filter applied + """ + + params = { + "window_length": window_length, + "polyorder": polyorder, + "deriv": deriv, + "delta": delta, + "mode": mode, + "cval": cval, + } + non_empty_params = {k: v for k, v in params.items() if v is not None} + + if isinstance(data, list): + return [apply_savgol_filter(x, **params) for x in data] + elif isinstance(data, (np.ndarray, IDSNumericArray)): + return savgol_filter(data, **non_empty_params) + else: + msg = "Smoothing can be executed only on numeric arrays, not single values or strings." + raise InvalidParametersException(msg) + + +def apply_gaussian_filter(data: list | np.ndarray, sigma): + """ + Apply Gaussian filer to data + :param data: The input array. + :param sigma: Standard deviation for Gaussian kernel. The standard deviations of the Gaussian filter are given for each axis as a sequence, or as a single number, in which case it is equal for all axes. + :return: Data with filter applied + """ + if isinstance(data, list): + return [apply_gaussian_filter(x, sigma) for x in data] + elif isinstance(data, (np.ndarray, IDSNumericArray)): + return gaussian_filter(data, sigma=sigma) + else: + msg = "Smoothing can be executed only on numeric arrays, not single values or strings." + raise InvalidParametersException(msg) + + +def _safe_division(data, divisor): + if divisor == 0: + raise InvalidParametersException("Division by zero is not allowed") + return data / divisor + + +def apply_simple_operations(data: list | np.ndarray, operations: list[str]): + """ + Apply simple scalar operations to data in the order given. + Each operation is a string in the format 'type:value', e.g. 'add:10', 'mul:5'. + :param data: Input data + :param operations: List of operations and operands divided by colon (:) + :return: Data after operation + """ + _OP_FUNCS = { + "add": lambda r, v: r + v, + "sub": lambda r, v: r - v, + "mul": lambda r, v: r * v, + "div": lambda r, v: _safe_division(r, v), + "pow": lambda r, v: np.power(r, v), + "root": lambda r, v: np.power(r, 1 / v), + } + + if isinstance(data, list): + return [apply_simple_operations(x, operations) for x in data] + elif isinstance(data, (np.ndarray, IDSNumericArray)): + result = data + for op_str in operations: + op_type, value_str = op_str.split(":", 1) + value = float(value_str) + func = _OP_FUNCS.get(op_type) + if func is None: + raise InvalidParametersException(f"Unknown operation type: {op_type}") + result = func(result, value) + return result + else: + msg = "Simple operations can be executed only on numeric arrays, not single values or strings." + raise InvalidParametersException(msg) + + def union_arrays(data: list): return reduce(np.union1d, data) @@ -62,7 +161,7 @@ def expand(data: list, grid_shape: list): :param data: 1D input array of shape (N,) :param grid_shape: target grid shape (e.g. [4, 3, 5]) - :return: broadcasted array of shape (*grid_shape, N) + :return: broadcasted array of shape ``(*grid_shape, N)`` :raises ValueError: if input data is not 1-dimensional """ diff --git a/backend/ibex/endpoints/data.py b/backend/ibex/endpoints/data.py index 031f9830..05930d35 100644 --- a/backend/ibex/endpoints/data.py +++ b/backend/ibex/endpoints/data.py @@ -1,13 +1,14 @@ """Endpoints extracting data from data source""" import orjson -from typing import List, Any, Optional +from typing import Any, Annotated from fastapi import APIRouter, Query # type: ignore from fastapi.responses import ORJSONResponse # type: ignore from ibex.core import ibex_service -from ibex.endpoints.schemas.data_schemas import FieldValueResponse, PlotDataResponse +from ibex.endpoints.schemas.request_data_schemas import PlotDataRequestModel +from ibex.endpoints.schemas.response_data_schemas import FieldValueResponse, PlotDataResponse router = APIRouter() @@ -69,17 +70,12 @@ def field_value( 200: {"description": "Plot data returned successfully"}, 404: {"description": "Data node not found"}, 464: {"description": "Given data node is empty"}, + 466: {"description": "Invalid parameters for requested data manipulation"}, }, description="Returns single (or tensorized) data node value with detailed parameters used to plot the data", ) @ibex_service.measure_execution_time -def plot_data( - uri: str, - interpolate_over: Optional[List[str]] = Query(None), - interpolation_method: Optional[str] = Query(None), - downsampling_method: str | None = Query(None), - downsampled_size: int = 1000, -) -> Any: +def plot_data(plot_data_query: Annotated[PlotDataRequestModel, Query()]) -> CustomORJSONResponse: """ IBEX endpoint. Prepares and returns full information about data node and it's coordinates. @@ -113,20 +109,12 @@ def plot_data( | } | } - :param uri: IMAS URI with the path to leaf node - :param interpolate_over: list of IMAS URIs used in interpolation. E.g. imas:hdf5?path=/home/ITER/wasikj/Desktop/work/IBEX/testdb2#equilibrium/time_slice[:]/profiles_2d[:]/psi - :param interpolation_method: method of interpolation; one of the possible parameters provided from /info/data_manipulation_methods - :param downsampling_method: one of the downsampling metods returend by :func:`~ibex.endpoints.info.downsampling_methods` endpoint, or None - :param downsampled_size: target size of downsampled data + :param plot_data_query: See :class:`ibex.endpoints.schemas.request_data_schemas.PlotDataRequestModel` + :type plot_data_query: :class:`ibex.endpoints.schemas.request_data_schemas.PlotDataRequestModel` + :rtype: dict (automatically converted to JSON by FastAPI) :return: JSON response + """ - return CustomORJSONResponse( - ibex_service.get_plot_data( - uri=uri.strip(), - interpolate_over=interpolate_over, - interpolation_method=interpolation_method, - downsampling_method=downsampling_method, - downsampled_size=downsampled_size, - ) - ) + + return CustomORJSONResponse(ibex_service.get_plot_data(plot_data_query)) diff --git a/backend/ibex/endpoints/data_entry.py b/backend/ibex/endpoints/data_entry.py index ff09bd56..bd42e342 100644 --- a/backend/ibex/endpoints/data_entry.py +++ b/backend/ibex/endpoints/data_entry.py @@ -5,7 +5,7 @@ from fastapi import APIRouter # type: ignore from ibex.core import ibex_service -from ibex.endpoints.schemas.data_entry_schemas import ( +from ibex.endpoints.schemas.response_data_entry_schemas import ( UriFromPathResponse, ExistsResponse, ListIdsesResponse, diff --git a/backend/ibex/endpoints/ids_info.py b/backend/ibex/endpoints/ids_info.py index 594399ae..f8c0ad05 100644 --- a/backend/ibex/endpoints/ids_info.py +++ b/backend/ibex/endpoints/ids_info.py @@ -3,7 +3,7 @@ from fastapi import APIRouter # type: ignore from ibex.core import ibex_service -from ibex.endpoints.schemas.ids_info_schemas import NodeInfoResponse, FindPathsResponse, ArraySummaryResponse +from ibex.endpoints.schemas.response_ids_info_schemas import NodeInfoResponse, FindPathsResponse, ArraySummaryResponse router = APIRouter() diff --git a/backend/ibex/endpoints/info.py b/backend/ibex/endpoints/info.py index e6ce21a3..214b918c 100644 --- a/backend/ibex/endpoints/info.py +++ b/backend/ibex/endpoints/info.py @@ -5,11 +5,12 @@ from ibex.core import ibex_service from ibex.core.utils import DownsamplingMethods from ibex import __version__ -from ibex.endpoints.schemas.info_schemas import ( +from ibex.endpoints.schemas.response_info_schemas import ( VersionResponse, DownsamplingMethodsResponse, DataManipulationMethodsResponse, ) +from ibex.core.data_manipulation_methods import available_methods router = APIRouter() @@ -94,54 +95,5 @@ def data_manipulation_methods() -> dict: :return: JSON response """ - res = { - "data_manipulation_methods": [ - { - "name": "Data interpolation", - "description": "Operation performed in order to represent dataset over different set of coordinates", - "method_parameters": [ - { - "human_readable_name": "Interpolate over", - "name": "interpolate_over", - "description": "List of URIs to gather coordinates from, for interpolation", - }, - { - "human_readable_name": "Data interpolation method", - "name": "interpolation_method", - "description": "Method used during data interpolation. All possible for scipy.interpolate.RegularGridInterpolator 'method' parameter or 'exact'", - "possible_values": [ - { - "value": "exact_value", - "description": "values are present only on data points where they were originally. Rest of the data grid is filled with NaNs", - }, - { - "value": "linear", - "description": "see scipy.interpolate.RegularGridInterpolator documentation", - }, - { - "value": "nearest", - "description": "see scipy.interpolate.RegularGridInterpolator documentation", - }, - { - "value": "slinear", - "description": "see scipy.interpolate.RegularGridInterpolator documentation", - }, - { - "value": "cubic", - "description": "see scipy.interpolate.RegularGridInterpolator documentation", - }, - { - "value": "quintic", - "description": "see scipy.interpolate.RegularGridInterpolator documentation", - }, - { - "value": "pchip", - "description": "see scipy.interpolate.RegularGridInterpolator documentation", - }, - ], - }, - ], - } - ] - } - return res + + return available_methods diff --git a/backend/ibex/endpoints/schemas/request_data_schemas.py b/backend/ibex/endpoints/schemas/request_data_schemas.py new file mode 100644 index 00000000..4c0bc2a6 --- /dev/null +++ b/backend/ibex/endpoints/schemas/request_data_schemas.py @@ -0,0 +1,122 @@ +from pydantic import BaseModel, Field, model_validator +from typing import Optional, List +from ibex.core.data_manipulation_methods import available_methods, SmoothingMethod +from enum import Enum + + +def _get_parameter_possible_values(parameter_name: str) -> list[str]: + """ + Helper function. Return allowed values declared for a top-level data manipulation parameter. + """ + for method in available_methods.data_manipulation_methods: + for method_parameter in method.method_parameters: + if method_parameter.name == parameter_name: + return [possible_value.value for possible_value in (method_parameter.possible_values or [])] + return [] + + +def _get_connected_parameter_possible_values(parameter_name: str) -> list[str]: + """ + Helper function. Returns allowed values for a connected parameter referenced by name. + :param parameter_name: + :return: + """ + for method in available_methods.data_manipulation_methods: + for method_parameter in method.method_parameters: + if not method_parameter.possible_values: + continue + + for possible_value in method_parameter.possible_values: + if not possible_value.additional_parameters: + continue + + for connected_parameter in possible_value.additional_parameters: + if connected_parameter.name == parameter_name: + return connected_parameter.possible_values or [] + + raise ValueError(f"Possible values for '{parameter_name}' not found") + + +# Create ENUM from savgol smoothing mode (for validation) +SavgolSmoothingMode = Enum( + "SavgolSmoothingMode", + {value.upper(): value for value in _get_connected_parameter_possible_values("savgol_smoothing_mode")}, + type=str, +) + +# ========== PLOT DATA ========== + + +class SavgolSmoothingParameters(BaseModel): + savgol_smoothing_window_length: int | None = Field( + default=None, + description="The length of the filter window (i.e., the number of coefficients). If mode is 'interp', window_length must be less than or equal to the size of x.", + ) + savgol_smoothing_polyorder: int | None = Field( + default=None, + description="The order of the polynomial used to fit the samples. polyorder must be less than window_length.", + ) + savgol_smoothing_deriv: int | None = Field( + default=None, + description="The order of the derivative to compute. This must be a nonnegative integer. The default is 0, which means to filter the data without differentiating.", + ) + savgol_smoothing_delta: float | None = Field( + default=None, + description="The spacing of the samples to which the filter will be applied. This is only used if deriv > 0. Default is 1.0.", + ) + savgol_smoothing_mode: SavgolSmoothingMode = Field( + default=SavgolSmoothingMode.INTERP, + description="Must be 'mirror', 'constant', 'nearest', 'wrap' or 'interp' (default).", + ) + savgol_smoothing_cval: float | None = Field( + default=None, + description="Value to fill past the edges of the input if mode is 'constant'. Default is 0.0.", + ) + + +class GaussianSmoothingParameters(BaseModel): + gaussian_smoothing_sigma: float | None = Field( + default=None, + description="Standard deviation for Gaussian kernel.", + ) + + +class PlotDataBasicParameters(BaseModel): + """...""" + + uri: str = Field(description="IMAS URI") + interpolate_over: Optional[List[str]] = Field( + default=None, description="List of IMAS URIs to be used in data interpolation" + ) + interpolation_method: str | None = Field(default=None, description="Interpolation method to be used") + downsampling_method: str | None = Field(default=None, description="Downsampling method to be used") + downsampled_size: int = Field(default=1000, description="Desired size of the data after downsampling") + smoothing_method: SmoothingMethod | None = Field(default=None, description="Smoothing method to be used") + operations: Optional[List[str]] = Field( + default=None, + description="Ordered list of scalar operations in format 'type:value' e.g. 'add:10'", + ) + + +class PlotDataRequestModel( + PlotDataBasicParameters, + SavgolSmoothingParameters, + GaussianSmoothingParameters, +): + @model_validator(mode="after") + def validate_gaussian_smoothing_parameters(self) -> "PlotDataRequestModel": + if self.smoothing_method == SmoothingMethod.GAUSSIAN_FILTER and self.gaussian_smoothing_sigma is None: + raise ValueError("gaussian_smoothing_sigma is required when smoothing_method is 'gaussian_filter'") + + if self.smoothing_method == SmoothingMethod.SAVITZKY_GOLAY_FILTER: + if self.savgol_smoothing_window_length is None: + raise ValueError( + "savgol_smoothing_window_length is required when smoothing_method is 'savitzky_golay_filter'" + ) + + if self.savgol_smoothing_polyorder is None: + raise ValueError( + "savgol_smoothing_polyorder is required when smoothing_method is 'savitzky_golay_filter'" + ) + + return self diff --git a/backend/ibex/endpoints/schemas/data_entry_schemas.py b/backend/ibex/endpoints/schemas/response_data_entry_schemas.py similarity index 100% rename from backend/ibex/endpoints/schemas/data_entry_schemas.py rename to backend/ibex/endpoints/schemas/response_data_entry_schemas.py diff --git a/backend/ibex/endpoints/schemas/data_schemas.py b/backend/ibex/endpoints/schemas/response_data_schemas.py similarity index 100% rename from backend/ibex/endpoints/schemas/data_schemas.py rename to backend/ibex/endpoints/schemas/response_data_schemas.py diff --git a/backend/ibex/endpoints/schemas/ids_info_schemas.py b/backend/ibex/endpoints/schemas/response_ids_info_schemas.py similarity index 100% rename from backend/ibex/endpoints/schemas/ids_info_schemas.py rename to backend/ibex/endpoints/schemas/response_ids_info_schemas.py diff --git a/backend/ibex/endpoints/schemas/response_info_schemas.py b/backend/ibex/endpoints/schemas/response_info_schemas.py new file mode 100644 index 00000000..c6bd4e02 --- /dev/null +++ b/backend/ibex/endpoints/schemas/response_info_schemas.py @@ -0,0 +1,28 @@ +from pydantic import BaseModel, Field +from ibex.core import data_manipulation_methods + + +# ========== VERSION ========== +class VersionResponse(BaseModel): + """Response for /info/version endpoint""" + + version: str = Field(description="IBEX version", examples=["0.0.1", "1.0.2"]) + + +# ========== DOWNSAMPLING METHODS ========== +class DownsamplingMethodModel(BaseModel): + """Intermediate model for /info/downsampling_methods endpoint""" + + name: str = Field(description="Method name", examples=["STEP", "STEP_AVERAGE"]) + description: str = Field(description="Method description", examples=["Simple step algorithm"]) + + +class DownsamplingMethodsResponse(BaseModel): + """Response for /info/downsampling_methods endpoint""" + + downsampling_methods: list[DownsamplingMethodModel] = Field(description="Available downsampling methods") + + +# ========== DATA MANIPULATION METHODS ========== + +DataManipulationMethodsResponse = data_manipulation_methods.DataManipulationMethodsResponse diff --git a/backend/tests/conftest.py b/backend/tests/conftest.py index 1bedf148..bdfcaeab 100644 --- a/backend/tests/conftest.py +++ b/backend/tests/conftest.py @@ -82,6 +82,9 @@ def entry_path(tmp_path_factory): profiles_2d.grid.dim2 = np.array([0, 1, 2]) i += 10 + # ===== for data smoothing (must be time-based) ===== + core_profiles.global_quantities.ip = np.array([1.0, 2.0, 3.0, 4.0, 5.0]) + entry.put(core_profiles) entry.close() diff --git a/backend/tests/test_data_endpoints.py b/backend/tests/test_data_endpoints.py index 5ddd8f51..791a325e 100644 --- a/backend/tests/test_data_endpoints.py +++ b/backend/tests/test_data_endpoints.py @@ -69,6 +69,77 @@ def test_plot_data(entry_path): assert time_coordinate["description"] == "Generic time" +def test_plot_data_with_gaussian_smoothing(entry_path): + parameters = { + "uri": f"imas:hdf5?path={entry_path}#core_profiles/global_quantities/ip", + "smoothing_method": "gaussian_filter", + "gaussian_smoothing_sigma": 1, + } + response = pytest.test_client.get("/data/plot_data", params=parameters) + assert response.status_code == 200 + + response_body = response.json() + assert response_body["data"]["value"] == pytest.approx([1.42, 2.06, 3.0, 3.93, 4.57], 0.1) + + +def test_plot_data_with_savgol_smoothing(entry_path): + parameters = { + "uri": f"imas:hdf5?path={entry_path}#core_profiles/global_quantities/ip", + "smoothing_method": "savitzky-golay_filter", + "savgol_smoothing_window_length": 5, + "savgol_smoothing_polyorder": 2, + } + response = pytest.test_client.get("/data/plot_data", params=parameters) + assert response.status_code == 200 + + response_body = response.json() + assert response_body["data"]["value"] == pytest.approx([0.99, 2.0, 3.0, 4.0, 5.0], 0.1) + + +def test_plot_data_with_simple_operations(entry_path): + # core_profiles.time = [1,2,3,4,5] (float) + cases = [ + ( + {"operations": ["add:2", "mul:3"]}, + [9.0, 12.0, 15.0, 18.0, 21.0], + ), + ( + {"operations": ["mul:3", "add:2"]}, + [5.0, 8.0, 11.0, 14.0, 17.0], + ), + ( + {"operations": ["div:2"]}, + [0.5, 1.0, 1.5, 2.0, 2.5], + ), + ( + {"operations": ["pow:2"]}, + [1.0, 4.0, 9.0, 16.0, 25.0], + ), + ( + {"operations": ["root:2"]}, + [1.0, 1.41421356237, 1.73205080757, 2.0, 2.2360679775], + ), + ] + + for params, expected in cases: + parameters = {"uri": f"imas:hdf5?path={entry_path}#core_profiles/time", **params} + response = pytest.test_client.get("/data/plot_data", params=parameters) + assert response.status_code == 200 + + response_body = response.json() + assert response_body["data"]["value"] == pytest.approx(expected) + + +def test_plot_data_smoothing_with_wrong_target_node(entry_path): + parameters = { + "uri": f"imas:hdf5?path={entry_path}#core_profiles/time", # targetet quantity must be time-based + "smoothing_method": "gaussian_filter", + "gaussian_smoothing_sigma": 1, + } + response = pytest.test_client.get("/data/plot_data", params=parameters) + assert response.status_code == 466 + + def test_plot_data_2d(entry_path): parameters = { "uri": f"imas:hdf5?path={entry_path}#core_profiles/profiles_2d[:]/ion[:]/temperature", @@ -122,3 +193,32 @@ def test_plot_data_1_N_coord(entry_path): assert numeric_coordinate["ndim"] == 1 assert numeric_coordinate["path"] == "" assert numeric_coordinate["description"] == "1...N" + + +def test_plot_data_requires_gaussian_sigma(entry_path): + parameters = { + "uri": f"imas:hdf5?path={entry_path}#core_profiles/profiles_1d[:]/time", + "smoothing_method": "gaussian_filter", + } + response = pytest.test_client.get("/data/plot_data", params=parameters) + + assert response.status_code == 422 + assert "gaussian_smoothing_sigma is required" in response.text + + +def test_plot_data_requires_savgol_window_length_and_polyorder(entry_path): + base_parameters = { + "uri": f"imas:hdf5?path={entry_path}#core_profiles/profiles_1d[:]/time", + "smoothing_method": "savitzky-golay_filter", + } + + response = pytest.test_client.get("/data/plot_data", params=base_parameters) + assert response.status_code == 422 + assert "savgol_smoothing_window_length is required" in response.text + + response = pytest.test_client.get( + "/data/plot_data", + params={**base_parameters, "savgol_smoothing_window_length": 5}, + ) + assert response.status_code == 422 + assert "savgol_smoothing_polyorder is required" in response.text diff --git a/backend/tests/test_data_manipulation.py b/backend/tests/test_data_manipulation.py new file mode 100644 index 00000000..78ca8633 --- /dev/null +++ b/backend/tests/test_data_manipulation.py @@ -0,0 +1,88 @@ +import numpy as np +import pytest +from ibex.data_source.exception import InvalidParametersException +from ibex.data_source.imas_python_source_utils import ( + apply_gaussian_filter, + apply_savgol_filter, + apply_simple_operations, +) + + +def test_apply_gaussian_smoothing(): + data = np.array([10.25, 12.8, 15.4, 18.15, 21.0, 24.35, 27.6, 30.2, 33.75, 36.1]) + + expected_sigma_1 = [11.343, 13.002, 15.479, 18.228, 21.18, 24.309, 27.414, 30.417, 33.211, 35.019] + expected_sigma_2 = [13.286, 14.311, 16.176, 18.615, 21.385, 24.291, 27.152, 29.743, 31.761, 32.881] + + assert expected_sigma_1 == pytest.approx(apply_gaussian_filter(data, sigma=1), 0.1) + assert expected_sigma_2 == pytest.approx(apply_gaussian_filter(data, sigma=2), 0.1) + + +def test_apply_savitzky_golay_smoothing(): + data = np.array([10.25, 12.8, 15.4, 18.15, 21.0, 24.35, 27.6, 30.2, 33.75, 36.1]) + + expected_window_5_poly_2 = [10.257, 12.781, 15.413, 18.111, 21.086, 24.346, 27.416, 30.521, 33.426, 36.209] + expected_window_7_poly_3_deriv_1 = [2.533, 4.694, 5.717, 5.69, 6.303, 6.242, 6.338, 6.442, 5.184, 2.735] + + assert expected_window_5_poly_2 == pytest.approx( + apply_savgol_filter( + data, + window_length=5, + polyorder=2, + deriv=0, + delta=1.0, + mode="interp", + cval=0.0, + ), + 0.1, + ) + + assert expected_window_7_poly_3_deriv_1 == pytest.approx( + apply_savgol_filter( + data, + window_length=7, + polyorder=3, + deriv=1, + delta=0.5, + mode="nearest", + cval=0.0, + ), + 0.1, + ) + + +@pytest.mark.parametrize( + ("operations", "data", "expected"), + [ + (["add:2"], np.array([1.0, 2.0, 3.0]), np.array([3.0, 4.0, 5.0])), + (["sub:1"], np.array([3.0, 4.0, 5.0]), np.array([2.0, 3.0, 4.0])), + (["mul:3"], np.array([1.0, 2.0, 3.0]), np.array([3.0, 6.0, 9.0])), + (["div:2"], np.array([2.0, 4.0, 6.0]), np.array([1.0, 2.0, 3.0])), + (["pow:2"], np.array([2.0, 3.0, 4.0]), np.array([4.0, 9.0, 16.0])), + (["root:2"], np.array([1.0, 4.0, 9.0]), np.array([1.0, 2.0, 3.0])), + ], +) +def test_apply_simple_operations(operations, data, expected): + assert np.asarray(expected) == pytest.approx(apply_simple_operations(data, operations)) + + +def test_apply_simple_operations_recurses_over_lists(): + data = [np.array([1.0, 2.0]), np.array([3.0, 4.0])] + result = apply_simple_operations(data, ["add:1", "mul:2", "add:3"]) + assert np.asarray(result[0]) == pytest.approx([7.0, 9.0]) + assert np.asarray(result[1]) == pytest.approx([11.0, 13.0]) + + +def test_apply_simple_operations_rejects_division_by_zero(): + with pytest.raises(InvalidParametersException, match="Division by zero is not allowed"): + apply_simple_operations(np.array([1.0]), ["div:0"]) + + +def test_apply_simple_operations_uses_order(): + data = np.array([5.0]) + # mul then add -> (5*2)+1 = 11 + result = apply_simple_operations(data, ["mul:2", "add:1"]) + assert result == pytest.approx([11.0]) + # add then mul -> (5+1)*2 = 12 + result = apply_simple_operations(data, ["add:1", "mul:2"]) + assert result == pytest.approx([12.0]) diff --git a/docs/source/_templates/custom-class-template.rst b/docs/source/_templates/custom-class-template.rst index b29757c5..0ad8c217 100644 --- a/docs/source/_templates/custom-class-template.rst +++ b/docs/source/_templates/custom-class-template.rst @@ -8,14 +8,14 @@ :inherited-members: {% block methods %} - .. automethod:: __init__ - {% if methods %} .. rubric:: {{ _('Methods') }} .. autosummary:: {% for item in methods %} + {% if item != '__init__' %} ~{{ name }}.{{ item }} + {% endif %} {%- endfor %} {% endif %} {% endblock %} diff --git a/docs/source/_templates/custom-module-template.rst b/docs/source/_templates/custom-module-template.rst index bd69dd82..89a9dbdc 100644 --- a/docs/source/_templates/custom-module-template.rst +++ b/docs/source/_templates/custom-module-template.rst @@ -51,15 +51,39 @@ {% endif %} {% endblock %} -{% block modules %} -{% if modules %} -.. rubric:: Modules + {% block modules %} + {% if modules %} + .. rubric:: Modules -.. autosummary:: - :toctree: - :template: custom-module-template.rst -{% for item in modules | reject("equalto", "test") %} - {{ item }} -{%- endfor %} -{% endif %} -{% endblock %} + {% set schema_modules = [] %} + {% set regular_modules = [] %} + {% for item in modules | reject("equalto", "test") %} + {% if item == "ibex.endpoints.schemas" %} + {% set _ = schema_modules.append(item) %} + {% else %} + {% set _ = regular_modules.append(item) %} + {% endif %} + {% endfor %} + + {% if regular_modules %} + .. autosummary:: + :toctree: + :template: custom-module-template.rst + :recursive: + {% for item in regular_modules %} + {{ item }} + {%- endfor %} + {% endif %} + + {% if schema_modules %} + .. autosummary:: + :toctree: + :template: custom-pydantic-model-template.rst + :recursive: + {% for item in schema_modules %} + {{ item }} + {%- endfor %} + {% endif %} + + {% endif %} + {% endblock %} diff --git a/docs/source/_templates/custom-pydantic-model-template.rst b/docs/source/_templates/custom-pydantic-model-template.rst new file mode 100644 index 00000000..fe699057 --- /dev/null +++ b/docs/source/_templates/custom-pydantic-model-template.rst @@ -0,0 +1,47 @@ +{{ fullname | escape | underline}} + +{% if objtype in ['module', 'package'] %} +.. automodule:: {{ fullname }} + + {% if modules %} + .. rubric:: Modules + + .. autosummary:: + :toctree: + :template: custom-pydantic-model-template.rst + :recursive: + {% for item in modules %} + {{ item }} + {%- endfor %} + {% endif %} + + {% set public_members = [] %} + {% for item in members %} + {% if not item.startswith('_') %} + {% set _ = public_members.append(item) %} + {% endif %} + {% endfor %} + + {% if public_members %} + .. rubric:: Models + + .. autosummary:: + :toctree: + :template: custom-pydantic-model-template.rst + {% for item in public_members %} + {{ fullname }}.{{ item }} + {%- endfor %} + {% endif %} + +{% elif objtype == 'pydantic_model' %} +.. currentmodule:: {{ module }} + +.. autopydantic_model:: {{ objname }} + :members: + :undoc-members: + :model-summary-list-order: bysource + :model-show-validator-members: False + :model-show-validator-summary: False + :model-show-config-summary: False + :model-show-json: False +{% endif %} \ No newline at end of file diff --git a/docs/source/conf.py b/docs/source/conf.py index 3860d5a5..ed8bbfe2 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -294,8 +294,8 @@ # Configuration of sphinx.ext.mathjax # https://www.sphinx-doc.org/en/master/usage/extensions/math.html#module-sphinx.ext.mathjax -autodoc_pydantic_model_show_json = True -autodoc_pydantic_model_show_config_summary = True +autodoc_pydantic_model_show_json = False +autodoc_pydantic_model_show_config_summary = False def escape_underscores(string): diff --git a/docs/source/developers_manual/backend_development/adding_new_data_manipulation_method.rst b/docs/source/developers_manual/backend_development/adding_new_data_manipulation_method.rst new file mode 100644 index 00000000..287aefde --- /dev/null +++ b/docs/source/developers_manual/backend_development/adding_new_data_manipulation_method.rst @@ -0,0 +1,51 @@ +.. _`Adding new data manipulation method`: + + +Adding a new data manipulation method +-------------------------------------- + +Adding a new data manipulation operation requires updates in three places: the request model, the operation description registry, and the backend execution path. + +Request model +~~~~~~~~~~~~~~ + +Expose the new operation through the request schema in ``backend/ibex/endpoints/schemas/request_data_schemas.py``. + +In practice this usually means: + +* adding a new top-level selector field to ``PlotDataBasicParameters`` if the operation introduces a new method family +* adding a dedicated parameter model when the operation needs extra configuration fields +* extending ``PlotDataRequestModel`` so the new parameters are accepted by ``/data/plot_data/`` +* adding validation in a ``model_validator`` when some parameters are required only for specific operation modes + +This is the layer that defines which query parameters are accepted and how they are validated before the request reaches the data source. + +Operation description registry +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Register the operation in ``backend/ibex/core/data_manipulation_methods.py``. + +This file provides the metadata returned by ``/info/data_manipulation_methods/``, so every new operation should be described there using: + +* ``DataManipulationOperation`` for the operation itself +* ``DataManipulationParameter`` for top-level request parameters +* ``PossibleValue`` for supported modes or variants +* ``AdditionalParameter`` for parameters that are only relevant to a specific mode + +This description should match the request schema exactly. +If a parameter is accepted by ``PlotDataRequestModel``, it should also be reflected here so that the API can describe it consistently. + +Backend execution +~~~~~~~~~~~~~~~~~~ + +Implement the actual operation in ``backend/ibex/data_source/imas_python_source.py``. + +This is where the backend transforms the numerical data returned from the IDS source. + +When adding a new operation: + +* read the parameters from ``plot_data_query`` +* apply the transformation to ``data_to_be_returned`` + +If the logic becomes substantial or reusable, the numerical transformation itself should be extracted into a helper function and then called from the data source flow. + diff --git a/docs/source/developers_manual/backend_development/backend_development.rst b/docs/source/developers_manual/backend_development/backend_development.rst index 6dc22a94..1a06293d 100644 --- a/docs/source/developers_manual/backend_development/backend_development.rst +++ b/docs/source/developers_manual/backend_development/backend_development.rst @@ -6,6 +6,8 @@ Backend development backend_development_introduction data_interpolation + data_manipulation + adding_new_data_manipulation_method adding_new_data_source adding_new_downsampling_method benchmarking diff --git a/docs/source/developers_manual/backend_development/data_manipulation.rst b/docs/source/developers_manual/backend_development/data_manipulation.rst new file mode 100644 index 00000000..39263c23 --- /dev/null +++ b/docs/source/developers_manual/backend_development/data_manipulation.rst @@ -0,0 +1,164 @@ +.. _`Data manipulation`: + +====================== +Data manipulation +====================== + +Introduction +------------- + +The IBEX backend provides a range of data manipulation techniques that directly affect the shape and appearance of the resulting plots. +These operations are applied as part of the ``/data/plot_data/`` request flow and allow the backend to transform datasets before they are returned to the frontend. +The backend applies the manipulation stages in this order: + +1. simple data operations +2. data smoothing +3. data interpolation +4. downsampling + +This means later stages operate on the output of earlier ones when the corresponding request parameters are enabled. + +Data smoothing +--------------- + +IBEX supports smoothing and denoising of returned datasets. +This functionality is intended for cases where the raw signal contains high-frequency noise and a filtered representation is preferred for visualization or analysis. + +Configuration +~~~~~~~~~~~~~~ + +Data smoothing is configured through the ``smoothing_method`` parameter of the ``/data/plot_data/`` endpoint. +It is only accepted for nodes whose first coordinate is ``time``. +If a different first coordinate is used, the backend rejects the request with an invalid-parameters error. + +At the moment, the backend supports the following smoothing methods: + +* ``gaussian_filter`` +* ``savitzky_golay_filter`` + +The full list of available methods and their parameters can be retrieved from the ``/info/data_manipulation_methods/`` endpoint. + +Gaussian smoothing +~~~~~~~~~~~~~~~~~~~ + +The ``gaussian_filter`` method applies a Gaussian kernel to the returned data. +It requires the ``gaussian_smoothing_sigma`` parameter, which defines the standard deviation of the Gaussian kernel. + + +Savitzky-Golay smoothing +~~~~~~~~~~~~~~~~~~~~~~~~~ + +The ``savitzky_golay_filter`` method applies a Savitzky-Golay filter to the returned data. +This approach smooths the data while preserving local shape better than a simple Gaussian filter in many cases. + +The following parameters are supported: + +* ``savgol_smoothing_window_length``: required +* ``savgol_smoothing_polyorder``: required +* ``savgol_smoothing_deriv``: optional +* ``savgol_smoothing_delta``: optional +* ``savgol_smoothing_mode``: optional, one of ``mirror``, ``constant``, ``nearest``, ``wrap``, ``interp`` +* ``savgol_smoothing_cval``: optional + + +Implementation +~~~~~~~~~~~~~~~ + +Data smoothing is applied in the backend after the raw IDS data has been converted to a NumPy array and after the internal 2D data transformation step, when applicable. + +The current implementation uses SciPy-based smoothing routines: + +* Gaussian smoothing is applied with a Gaussian filter implementation. +* Savitzky-Golay smoothing is applied with a Savitzky-Golay filter implementation. + +Because smoothing modifies the returned numerical values, it should be treated as a visualization-oriented transformation and not as a lossless representation of the original dataset. + +Example usage +~~~~~~~~~~~~~~ + +The following examples demonstrate how smoothing can be enabled for testing purposes. + +Gaussian smoothing: + +.. code-block:: bash + + curl -X 'GET' \ + '/data/plot_data?uri=&smoothing_method=gaussian_filter&gaussian_smoothing_sigma=1.0' \ + -H 'accept: application/json' + +Savitzky-Golay smoothing: + +.. code-block:: bash + + curl -X 'GET' \ + '/data/plot_data?uri=&smoothing_method=savitzky_golay_filter&savgol_smoothing_window_length=5&savgol_smoothing_polyorder=2' \ + -H 'accept: application/json' + + +Simple data operations +------------------------ + +IBEX supports a sequence of scalar operations that can be applied element-wise to every data point in the returned dataset. +Simple data operations use fixed numeric values. + +The following operation types are supported: + +* ``add`` — addition +* ``sub`` — subtraction +* ``mul`` — multiplication +* ``div`` — division +* ``pow`` — exponentiation +* ``root`` — Nth root + +Operations are applied **in the order they appear** in the request. +The backend executes them sequentially on the numerical data **before** smoothing, interpolation, or downsampling. + +Configuration +~~~~~~~~~~~~~~ + +Simple data operations are configured through the ``operations`` parameter of the ``/data/plot_data/`` endpoint. +It accepts a list of strings in the format ``type:value``, for example ``add:10`` or ``mul:2.5``. + +The full list of available operations can be retrieved from the ``/info/data_manipulation_methods/`` endpoint. + +Division by zero is rejected by the backend with an ``InvalidParametersException`` ("Division by zero is not allowed"). + +Implementation +~~~~~~~~~~~~~~~ + +Simple data operations are applied in ``apply_simple_operations()`` in +``backend/ibex/data_source/imas_python_source_utils.py``. + +The function iterates over the list of operation strings in order, splitting each on the colon to extract the operation type and the scalar value. +For each operation the corresponding arithmetic lambda is applied element-wise to the data array. + +The implementation handles both flat arrays and nested lists of arrays (higher-dimensional data) recursively. + +Example usage +~~~~~~~~~~~~~~ + +The following examples demonstrate how simple data operations can be enabled for testing purposes. + +Single operation (addition by 2): + +.. code-block:: bash + + curl -X 'GET' \ + '/data/plot_data?uri=&operations=add:2' \ + -H 'accept: application/json' + +Two chained operations (add then multiply): + +.. code-block:: bash + + curl -X 'GET' \ + '/data/plot_data?uri=&operations=add:2&operations=mul:3' \ + -H 'accept: application/json' + +Order matters (multiply then add yields different result): + +.. code-block:: bash + + curl -X 'GET' \ + '/data/plot_data?uri=&operations=mul:3&operations=add:2' \ + -H 'accept: application/json'