diff --git a/backend/ibex/core/data_manipulation_methods.py b/backend/ibex/core/data_manipulation_methods.py new file mode 100644 index 00000000..b35e330e --- /dev/null +++ b/backend/ibex/core/data_manipulation_methods.py @@ -0,0 +1,172 @@ +from typing import Optional +from enum import Enum +from pydantic import BaseModel + + +class InterpolationMethod(str, Enum): + EXACT_VALUE = "exact_value" + LINEAR = "linear" + NEAREST = "nearest" + + +class SmoothingMethod(str, Enum): + GAUSSIAN_FILTER = "gaussian_filter" + SAVITZKY_GOLAY_FILTER = "savitzky-golay_filter" + + +class AdditionalParameter(BaseModel): + """ + Parameters for specific data manipulation methods. E.g. sigma -> for Gaussian smoothing + """ + + name: str + human_readable_name: str + description: str + possible_values: Optional[list[str]] = None + + +class PossibleValue(BaseModel): + """ + Possible value for data manipulation method parameter. E.g. Gaussian smoothing + """ + + value: str + description: str + additional_parameters: Optional[list[AdditionalParameter]] = None + + +class DataManipulationParameter(BaseModel): + """ + E.g. interpolate_over, type of filtering, binary operation + """ + + human_readable_name: str + name: str + description: str + possible_values: Optional[list[PossibleValue]] = None + + +class DataManipulationOperation(BaseModel): + """ + Type of data manipulation method. E.g. data-smoothing, data-interpolation, binary-operation + """ + + name: str + description: str + method_parameters: list[DataManipulationParameter] + + +class DataManipulationMethodsResponse(BaseModel): + data_manipulation_methods: list[DataManipulationOperation] + + +available_methods = DataManipulationMethodsResponse(data_manipulation_methods=[]) + +# ====================== DATA INTERPOLATION ====================== + +data_interpolation_description = DataManipulationOperation( + name="Data interpolation", + description="Operation performed in order to represent dataset over different set of coordinates", + method_parameters=[], +) + +# ====================== DATA INTERPOLATION PARAMETERS ====================== + +data_interpolation_interpolate_over_parameter = DataManipulationParameter( + human_readable_name="Interpolate over", + name="interpolate_over", + description="List of URIs to gather coordinates from, for interpolation", +) + +data_interpolation_method_parameter = DataManipulationParameter( + human_readable_name="Interpolation method", + name="interpolation_method", + description="List of URIs to gather coordinates from, for interpolation", + possible_values=[ + PossibleValue( + value=InterpolationMethod.EXACT_VALUE, + description="values are present only on data points where they were originally. Rest of the data grid is filled with NaNs", + ), + PossibleValue( + value=InterpolationMethod.LINEAR, + description="see scipy.interpolate.RegularGridInterpolator documentation", + ), + PossibleValue( + value=InterpolationMethod.NEAREST, + description="see scipy.interpolate.RegularGridInterpolator documentation", + ), + ], +) + +data_interpolation_description.method_parameters.append(data_interpolation_interpolate_over_parameter) +data_interpolation_description.method_parameters.append(data_interpolation_method_parameter) +available_methods.data_manipulation_methods.append(data_interpolation_description) + +# ====================== DATA SMOOTHING ====================== + +data_smoothing_description = DataManipulationOperation( + name="Data smoothing/denoising", + description="Operation performed in order to eliminate noise from data", + method_parameters=[], +) + +# ====================== DATA SMOOTHING PARAMETERS ====================== + +data_smoothing_method_parameter = DataManipulationParameter( + human_readable_name="Smoothing method", + name="smoothing_method", + description="Method to be used in data smoothing process", + possible_values=[ + PossibleValue( + value=SmoothingMethod.GAUSSIAN_FILTER, + description="see scipy.ndimage.gaussian_filter documentation", + additional_parameters=[ + AdditionalParameter( + name="gaussian_smoothing_sigma", + human_readable_name="Sigma", + description="Standard deviation for Gaussian kernel.", + ) + ], + ), + PossibleValue( + value=SmoothingMethod.SAVITZKY_GOLAY_FILTER, + description="see scipy.signal.savgol_filter documentation", + additional_parameters=[ + AdditionalParameter( + name="savgol_smoothing_window_length", + human_readable_name="Window length", + description="The length of the filter window (i.e., the number of coefficients). If mode is ‘interp’, window_length must be less than or equal to the size of x.", + ), + AdditionalParameter( + name="savgol_smoothing_polyorder", + human_readable_name="Polyorder", + description="The order of the polynomial used to fit the samples. polyorder must be less than window_length.", + ), + AdditionalParameter( + name="savgol_smoothing_deriv", + human_readable_name="Deriv", + description="The order of the derivative to compute. This must be a nonnegative integer. The default is 0, which means to filter the data without differentiating.", + ), + AdditionalParameter( + name="savgol_smoothing_delta", + human_readable_name="Window delta", + description="The spacing of the samples to which the filter will be applied. This is only used if deriv > 0. Default is 1.0.", + ), + AdditionalParameter( + name="savgol_smoothing_mode", + human_readable_name="Mode", + description="This determines the type of extension to use for the padded signal to which the filter is applied.", + possible_values=["mirror", "constant", "nearest", "wrap", "interp"], + ), + AdditionalParameter( + name="savgol_smoothing_cval", + human_readable_name="C-Val", + description="Value to fill past the edges of the input if mode is ‘constant’. Default is 0.0.", + ), + ], + ), + ], +) + +data_smoothing_description.method_parameters.append(data_smoothing_method_parameter) +available_methods.data_manipulation_methods.append(data_smoothing_description) diff --git a/backend/ibex/core/ibex_service.py b/backend/ibex/core/ibex_service.py index f645863e..ce0187bb 100644 --- a/backend/ibex/core/ibex_service.py +++ b/backend/ibex/core/ibex_service.py @@ -3,11 +3,12 @@ import time from pathlib import Path from functools import wraps # for measure_execution_time() -from typing import Any, Callable, Optional, Sequence, List +from typing import Any, Callable, Optional, Sequence from ibex.data_source.imas_python_source import IMASPythonSource from ibex.data_source.exception import CannotGenerateUriException from ibex.core.utils import IMAS_URI +from ibex.endpoints.schemas.request_data_schemas import PlotDataRequestModel # helper decorator used during development @@ -116,21 +117,5 @@ def get_multiple_node_data(uri: str) -> dict: ) -def get_plot_data( - uri: str, - interpolate_over: List[str] | None, - interpolation_method: str | None, - downsampling_method: str | None, - downsampled_size: int, -) -> dict: - uri_obj = IMAS_URI(uri) - return data_source.get_plot_data( - uri=uri_obj.uri_entry_identifiers, - ids=uri_obj.ids_name, - node_path=uri_obj.node_path, - occurrence=uri_obj.occurrence, - interpolate_over=interpolate_over, - interpolation_method=interpolation_method, - downsampling_method=downsampling_method, - downsampled_size=downsampled_size, - ) +def get_plot_data(plot_data_query: PlotDataRequestModel) -> dict: + return data_source.get_plot_data(plot_data_query) diff --git a/backend/ibex/data_source/data_source_interface.py b/backend/ibex/data_source/data_source_interface.py index e457258d..7c8f6219 100644 --- a/backend/ibex/data_source/data_source_interface.py +++ b/backend/ibex/data_source/data_source_interface.py @@ -1,7 +1,8 @@ """Interface for all data sources""" from abc import ABC, abstractmethod -from typing import Sequence, Optional, List +from typing import Sequence, Optional +from ibex.endpoints.schemas.request_data_schemas import PlotDataRequestModel class DataSourceInterface(ABC): @@ -123,28 +124,12 @@ def list_db_entries( """ ... - def get_plot_data( - self, - uri: str, - ids: str, - node_path: str, - occurrence: int = 0, - interpolate_over: List[str] | None = None, - interpolation_method: str | None = None, - downsampling_method: str | None = None, - downsampled_size: int = 1000, - ) -> dict: + def get_plot_data(self, plot_data_query: PlotDataRequestModel) -> dict: """ Returns all data used to plot selected quantity. Result contains data values, metadata and coordinates. - :param uri: imas URI - :param ids: name of ids e.g. core_profiles - :param node_path: path to ids node e.g. ids_properties/version_put - :param occurrence: ids occurrence number - :param interpolate_over: list of uris used in interpolation - :param interpolation_method: method to be used in data interpolation; one from scipy.interpolate.RegularGridInterpolator or 'exact_value' - :param downsampling_method: one of the downsampling metods returend by :func:`~ibex.endpoints.info.downsampling_methods` endpoint, or None - :param downsampled_size: target size of downsampled data + :param plot_data_query: See :class:`ibex.endpoints.schemas.request_data_schemas.PlotDataRequestModel` + :type plot_data_query: :class:`ibex.endpoints.schemas.request_data_schemas.PlotDataRequestModel` :return: Dictionary containing data values, metadata and coordinates. """ ... diff --git a/backend/ibex/data_source/imas_python_source.py b/backend/ibex/data_source/imas_python_source.py index 91d57203..8743bc0c 100644 --- a/backend/ibex/data_source/imas_python_source.py +++ b/backend/ibex/data_source/imas_python_source.py @@ -5,6 +5,7 @@ import imas # type: ignore import numpy as np # type: ignore import re # type: ignore +from copy import copy # type: ignore from idstools.database import DBMaster # type: ignore from imas.ids_metadata import IDSMetadata # type: ignore from imas.ids_primitive import ( @@ -47,7 +48,11 @@ flatten, expand, calculate_coordinate_shapes, + apply_savgol_filter, + apply_gaussian_filter, ) +from ibex.core.data_manipulation_methods import SmoothingMethod, InterpolationMethod +from ibex.endpoints.schemas.request_data_schemas import PlotDataRequestModel class IMASPythonSource(DataSourceInterface): @@ -628,31 +633,37 @@ def _check_data_is_leaf_node(self, data) -> None: elif isinstance(data, IDSStructure): raise NotALeafNodeException("Cannot serialize non-leaf node") - def get_plot_data( - self, - uri: str, - ids: str, - node_path: str, - occurrence: int = 0, - interpolate_over: List[str] | None = None, - interpolation_method: str | None = None, - downsampling_method: str | None = None, - downsampled_size: int = 1000, - ) -> dict: + def _leaf_node_coordinates_contain_time(self, leaf_node_path: str, coordinates_to_be_returned: list[dict]) -> bool: + """ + Returns True when the leaf-node coordinates contain a time coordinate. + + :param loaf_node_path: path to tested_node + :param coordinates_to_be_returned: list of dicts of coordinates from get_plot_data() method + :return: True or False + """ + if not coordinates_to_be_returned: + return False + for coordinate in coordinates_to_be_returned: + if coordinate["name"] == "time" and coordinate["target"] == leaf_node_path: + return True + + return False + + def get_plot_data(self, plot_data_query: PlotDataRequestModel) -> dict: """ Returns all data used to plot selected quantity. Result contains data values, metadata and coordinates. - :param uri: imas URI - :param ids: name of ids e.g. core_profiles - :param node_path: path to ids node e.g. ids_properties/version_put - :param occurrence: ids occurrence number - :param interpolate_over: list of uris used in interpolation - :param interpolation_method: method to be used in data interpolation; one from scipy.interpolate.RegularGridInterpolator or 'exact_value' - :param downsampling_method: one of the downsampling metods returend by :func:`~ibex.endpoints.info.downsampling_methods` endpoint, or None - :param downsampled_size: target size of downsampled data + :param plot_data_query: See :class:`ibex.endpoints.schemas.request_data_schemas.PlotDataRequestModel` + :type plot_data_query: :class:`ibex.endpoints.schemas.request_data_schemas.PlotDataRequestModel` :return: Dictionary containing data values, metadata and coordinates. """ + uri_obj = IMAS_URI(plot_data_query.uri.strip()) + uri = uri_obj.uri_entry_identifiers + ids = uri_obj.ids_name + node_path = uri_obj.node_path + occurrence = uri_obj.occurrence + with self._open_entry(uri) as entry: ids_obj = self._get_ids_from_entry(entry, ids, occurrence) @@ -671,8 +682,8 @@ def get_plot_data( for _node_path, _coordinate_path_list in coordinates_dict.items(): _new_coordinate_path_list = [] for _coordinate_path in _coordinate_path_list: - if _coordinate_path == "1...N": - _new_coordinate_path_list.append("1...N") + if _coordinate_path.startswith("1..."): # 1...N, 1...2, 1...3 etc. + _new_coordinate_path_list.append(_coordinate_path) continue _new_coordinate_path = "" @@ -705,10 +716,8 @@ def get_plot_data( shapes_dimension = not bool(re.search(r"\[\d+\]$", str(target))) for coord in coord_list: - if coord == "1...N": - # 1...N coords are targeting AoS + if coord.startswith("1..."): # remove last array operator ([]) from path and save it as target_str - splitted_target = str(target).split("/") splitted_target[-1] = re.sub(r"[\[\(](.*?)[\]\)]", "", splitted_target[-1]) target_str = "/".join([x for x in splitted_target]) @@ -719,7 +728,7 @@ def get_plot_data( coord_target_objects = self._get_raw_data(ids_obj, path_elements) self._check_data_is_leaf_node(coord_target_objects) - # collect labels for 1...N coordinates + # collect labels for 1... coordinates labels = [] try: for element in coord_target_objects: @@ -758,7 +767,7 @@ def get_plot_data( # (otherwise coordinate name would be the same as target node name) coordinate_name = splitted_target[-1] if f"{target}" == f"{node_path}": - coordinate_name = "1...N" + coordinate_name = coord try: coord_data_shape = np.asarray(coord_values).shape @@ -820,11 +829,40 @@ def get_plot_data( first_value = find_first_value_in_list(ids_data) data_to_be_returned = convert_ids_data_into_numpy_array(ids_data) - if first_value.metadata.ndim == 2: - # Transform 2D arrays. - # By default first dimension of 2D has coordinate that is second on the list - # FE expects data's first dimension to be connected with second dimension, thus this transformation - data_to_be_returned = transform_2D_data(data_to_be_returned) + # ============= BEGIN data smoothing ============ + if plot_data_query.smoothing_method is not None: + if not self._leaf_node_coordinates_contain_time(f"#{ids}/{node_path}", coordinates_to_be_returned): + raise InvalidParametersException( + "Data smoothing is only supported when leaf-node coordinates contain time" + ) + + if plot_data_query.smoothing_method == SmoothingMethod.SAVITZKY_GOLAY_FILTER: + if first_value.metadata.ndim != 1: + message = f"Savitzky-Golay filter supports only 1D smoothing. Selected data node is {first_value.metadata.ndim}D." + raise InvalidParametersException(message) + data_to_be_returned = apply_savgol_filter( + data_to_be_returned, + window_length=plot_data_query.savgol_smoothing_window_length, + polyorder=plot_data_query.savgol_smoothing_polyorder, + deriv=plot_data_query.savgol_smoothing_deriv, + delta=plot_data_query.savgol_smoothing_delta, + mode=plot_data_query.savgol_smoothing_mode, + cval=plot_data_query.savgol_smoothing_cval, + ) + + elif plot_data_query.smoothing_method == SmoothingMethod.GAUSSIAN_FILTER: + time_coordinate_axis = None + if first_value.metadata.ndim == 2: + time_coordinate_axis = next( + (i for i, d in enumerate(coordinates_to_be_returned) if d.get("name") == "time"), None + ) + data_to_be_returned = apply_gaussian_filter( + data_to_be_returned, + sigma=plot_data_query.gaussian_smoothing_sigma, + axis=time_coordinate_axis, + ) + + # ============= END data smoothing ============= # ============= BEGIN resample data onto new time vector ============= @@ -836,7 +874,7 @@ def convert_to_lists(data): else: return data - if interpolate_over: + if plot_data_query.interpolate_over: # =================== GATHER ALL COORDINATES =================== original_coord_values = [] new_common_coords = coordinates_to_be_returned @@ -845,7 +883,7 @@ def convert_to_lists(data): original_coord_values.append(sorted(set(flatten(c["value"])))) original_coord_values.reverse() - for _uri in interpolate_over: + for _uri in plot_data_query.interpolate_over: _uri_obj = IMAS_URI(_uri) if _uri_obj.ids_name != ids or _uri_obj.node_path != node_path: @@ -857,14 +895,11 @@ def convert_to_lists(data): "IDS name and node path should be the same for source and target URI when interpolating data" ) - interpolate_to_coordinates = self.get_plot_data( - uri=_uri_obj.uri_entry_identifiers, - ids=_uri_obj.ids_name, - node_path=_uri_obj.node_path, - occurrence=_uri_obj.occurrence, - downsampling_method=downsampling_method, - downsampled_size=downsampled_size, - )["data"]["coordinates"] + new_plot_data_query = copy(plot_data_query) + new_plot_data_query.uri = _uri + new_plot_data_query.interpolate_over = None + new_plot_data_query.smoothing_method = None + interpolate_to_coordinates = self.get_plot_data(new_plot_data_query)["data"]["coordinates"] if len(interpolate_to_coordinates) != len(coordinates_to_be_returned): message = "Interpolation error. Source and target nodes have different number of coordinates." @@ -886,7 +921,10 @@ def convert_to_lists(data): data_to_be_returned = pad_to_rectangular(data_to_be_returned) # === run interpolation === - if interpolation_method == "exact_value" or not interpolation_method: + if ( + plot_data_query.interpolation_method == InterpolationMethod.EXACT_VALUE + or not plot_data_query.interpolation_method + ): data_to_be_returned = resample_data_without_interpolation( tuple(original_coord_values), data_to_be_returned, tuple(common_coords_values) ) @@ -895,7 +933,7 @@ def convert_to_lists(data): tuple(original_coord_values), data_to_be_returned, tuple(common_coords_values), - interpolation_method=interpolation_method, + interpolation_method=plot_data_query.interpolation_method, ) new_coordinate_shapes = calculate_coordinate_shapes( @@ -911,6 +949,13 @@ def convert_to_lists(data): # ============= END resample data onto new time vector ============= + if first_value.metadata.ndim == 2: + # Transform 2D arrays. + # By default first dimension of 2D has coordinate that is second on the list + # FE expects data's first dimension to be connected with second dimension, thus this transformation + + data_to_be_returned = transform_2D_data(data_to_be_returned) + try: original_data_shape = np.asarray(data_to_be_returned).shape except ValueError: @@ -921,15 +966,17 @@ def convert_to_lists(data): # If coordinate targets node -> downsample coordinate as well coordinates_to_be_returned[0]["value"], data_to_be_returned = downsample_data( data_to_be_returned, - target_size=downsampled_size, - method=downsampling_method, + target_size=plot_data_query.downsampled_size, + method=plot_data_query.downsampling_method, x=coordinates_to_be_returned[0]["value"], single_x_axis=(coordinates_to_be_returned[0]["path"] == f"#{ids}/time"), ) else: _, data_to_be_returned = downsample_data( - data_to_be_returned, target_size=downsampled_size, method=downsampling_method + data_to_be_returned, + target_size=plot_data_query.downsampled_size, + method=plot_data_query.downsampling_method, ) # serialize coordinates and update shapes (they could be changed by downsampling) for c in coordinates_to_be_returned: diff --git a/backend/ibex/data_source/imas_python_source_utils.py b/backend/ibex/data_source/imas_python_source_utils.py index 94bf7723..261b99f8 100644 --- a/backend/ibex/data_source/imas_python_source_utils.py +++ b/backend/ibex/data_source/imas_python_source_utils.py @@ -4,6 +4,8 @@ import numpy as np from imas.ids_primitive import IDSNumericArray from scipy.interpolate import RegularGridInterpolator +from scipy.ndimage import gaussian_filter1d +from scipy.signal import savgol_filter from ibex.data_source.exception import InvalidParametersException @@ -32,6 +34,66 @@ def path_in_filled_paths(node_path: str, filled_paths: List[str]): return False +def apply_savgol_filter( + data: list | np.ndarray, + window_length: int | None, + polyorder: int | None, + deriv: int | None, + delta: float | None, + mode: str | None, + cval: float | None, +): + """ + Apply Savitzky-Golay filer to data + :param data: The input array. + :param window_length: The length of the filter window (i.e., the number of coefficients). If mode is ‘interp’, window_length must be less than or equal to the size of x. + :param polyorder: The order of the polynomial used to fit the samples. polyorder must be less than window_length. + :param deriv: The order of the derivative to compute. This must be a nonnegative integer. The default is 0, which means to filter the data without differentiating. + :param delta: The spacing of the samples to which the filter will be applied. This is only used if deriv > 0. Default is 1.0. + :param mode: Must be ‘mirror’, ‘constant’, ‘nearest’, ‘wrap’ or ‘interp’. + :param cval: Value to fill past the edges of the input if mode is ‘constant’. Default is 0.0. + + :return: Data with filter applied + """ + + params = { + "window_length": window_length, + "polyorder": polyorder, + "deriv": deriv, + "delta": delta, + "mode": mode, + "cval": cval, + } + non_empty_params = {k: v for k, v in params.items() if v is not None} + + if isinstance(data, list): + return [apply_savgol_filter(x, **params) for x in data] + elif isinstance(data, (np.ndarray, IDSNumericArray)): + return savgol_filter(data, **non_empty_params) + else: + msg = "Smoothing can be executed only on numeric arrays, not single values or strings." + raise InvalidParametersException(msg) + + +def apply_gaussian_filter(data: list | np.ndarray, sigma, axis: int | None = None): + """ + Apply Gaussian filer to data + :param data: The input array. + :param sigma: Standard deviation for Gaussian kernel. The standard deviations of the Gaussian filter are given for each axis as a sequence, or as a single number, in which case it is equal for all axes. + :return: Data with filter applied + """ + if isinstance(data, list): + return [apply_gaussian_filter(x, sigma) for x in data] + elif isinstance(data, (np.ndarray, IDSNumericArray)): + if axis is None: + return gaussian_filter1d(data, sigma=sigma) + else: + return gaussian_filter1d(data, sigma=sigma, axis=axis) + else: + msg = "Smoothing can be executed only on numeric arrays, not single values or strings." + raise InvalidParametersException(msg) + + def union_arrays(data: list): return reduce(np.union1d, data) @@ -88,7 +150,7 @@ def expand(data: list, grid_shape: list): :param data: 1D input array of shape (N,) :param grid_shape: target grid shape (e.g. [4, 3, 5]) - :return: broadcasted array of shape (*grid_shape, N) + :return: broadcasted array of shape ``(*grid_shape, N)`` :raises ValueError: if input data is not 1-dimensional """ diff --git a/backend/ibex/endpoints/data.py b/backend/ibex/endpoints/data.py index 031f9830..05930d35 100644 --- a/backend/ibex/endpoints/data.py +++ b/backend/ibex/endpoints/data.py @@ -1,13 +1,14 @@ """Endpoints extracting data from data source""" import orjson -from typing import List, Any, Optional +from typing import Any, Annotated from fastapi import APIRouter, Query # type: ignore from fastapi.responses import ORJSONResponse # type: ignore from ibex.core import ibex_service -from ibex.endpoints.schemas.data_schemas import FieldValueResponse, PlotDataResponse +from ibex.endpoints.schemas.request_data_schemas import PlotDataRequestModel +from ibex.endpoints.schemas.response_data_schemas import FieldValueResponse, PlotDataResponse router = APIRouter() @@ -69,17 +70,12 @@ def field_value( 200: {"description": "Plot data returned successfully"}, 404: {"description": "Data node not found"}, 464: {"description": "Given data node is empty"}, + 466: {"description": "Invalid parameters for requested data manipulation"}, }, description="Returns single (or tensorized) data node value with detailed parameters used to plot the data", ) @ibex_service.measure_execution_time -def plot_data( - uri: str, - interpolate_over: Optional[List[str]] = Query(None), - interpolation_method: Optional[str] = Query(None), - downsampling_method: str | None = Query(None), - downsampled_size: int = 1000, -) -> Any: +def plot_data(plot_data_query: Annotated[PlotDataRequestModel, Query()]) -> CustomORJSONResponse: """ IBEX endpoint. Prepares and returns full information about data node and it's coordinates. @@ -113,20 +109,12 @@ def plot_data( | } | } - :param uri: IMAS URI with the path to leaf node - :param interpolate_over: list of IMAS URIs used in interpolation. E.g. imas:hdf5?path=/home/ITER/wasikj/Desktop/work/IBEX/testdb2#equilibrium/time_slice[:]/profiles_2d[:]/psi - :param interpolation_method: method of interpolation; one of the possible parameters provided from /info/data_manipulation_methods - :param downsampling_method: one of the downsampling metods returend by :func:`~ibex.endpoints.info.downsampling_methods` endpoint, or None - :param downsampled_size: target size of downsampled data + :param plot_data_query: See :class:`ibex.endpoints.schemas.request_data_schemas.PlotDataRequestModel` + :type plot_data_query: :class:`ibex.endpoints.schemas.request_data_schemas.PlotDataRequestModel` + :rtype: dict (automatically converted to JSON by FastAPI) :return: JSON response + """ - return CustomORJSONResponse( - ibex_service.get_plot_data( - uri=uri.strip(), - interpolate_over=interpolate_over, - interpolation_method=interpolation_method, - downsampling_method=downsampling_method, - downsampled_size=downsampled_size, - ) - ) + + return CustomORJSONResponse(ibex_service.get_plot_data(plot_data_query)) diff --git a/backend/ibex/endpoints/data_entry.py b/backend/ibex/endpoints/data_entry.py index ff09bd56..bd42e342 100644 --- a/backend/ibex/endpoints/data_entry.py +++ b/backend/ibex/endpoints/data_entry.py @@ -5,7 +5,7 @@ from fastapi import APIRouter # type: ignore from ibex.core import ibex_service -from ibex.endpoints.schemas.data_entry_schemas import ( +from ibex.endpoints.schemas.response_data_entry_schemas import ( UriFromPathResponse, ExistsResponse, ListIdsesResponse, diff --git a/backend/ibex/endpoints/ids_info.py b/backend/ibex/endpoints/ids_info.py index 60edaf55..93c65a14 100644 --- a/backend/ibex/endpoints/ids_info.py +++ b/backend/ibex/endpoints/ids_info.py @@ -3,7 +3,7 @@ from fastapi import APIRouter # type: ignore from ibex.core import ibex_service -from ibex.endpoints.schemas.ids_info_schemas import NodeInfoResponse, FindPathsResponse, ArraySummaryResponse +from ibex.endpoints.schemas.response_ids_info_schemas import NodeInfoResponse, FindPathsResponse, ArraySummaryResponse router = APIRouter() diff --git a/backend/ibex/endpoints/info.py b/backend/ibex/endpoints/info.py index e6ce21a3..214b918c 100644 --- a/backend/ibex/endpoints/info.py +++ b/backend/ibex/endpoints/info.py @@ -5,11 +5,12 @@ from ibex.core import ibex_service from ibex.core.utils import DownsamplingMethods from ibex import __version__ -from ibex.endpoints.schemas.info_schemas import ( +from ibex.endpoints.schemas.response_info_schemas import ( VersionResponse, DownsamplingMethodsResponse, DataManipulationMethodsResponse, ) +from ibex.core.data_manipulation_methods import available_methods router = APIRouter() @@ -94,54 +95,5 @@ def data_manipulation_methods() -> dict: :return: JSON response """ - res = { - "data_manipulation_methods": [ - { - "name": "Data interpolation", - "description": "Operation performed in order to represent dataset over different set of coordinates", - "method_parameters": [ - { - "human_readable_name": "Interpolate over", - "name": "interpolate_over", - "description": "List of URIs to gather coordinates from, for interpolation", - }, - { - "human_readable_name": "Data interpolation method", - "name": "interpolation_method", - "description": "Method used during data interpolation. All possible for scipy.interpolate.RegularGridInterpolator 'method' parameter or 'exact'", - "possible_values": [ - { - "value": "exact_value", - "description": "values are present only on data points where they were originally. Rest of the data grid is filled with NaNs", - }, - { - "value": "linear", - "description": "see scipy.interpolate.RegularGridInterpolator documentation", - }, - { - "value": "nearest", - "description": "see scipy.interpolate.RegularGridInterpolator documentation", - }, - { - "value": "slinear", - "description": "see scipy.interpolate.RegularGridInterpolator documentation", - }, - { - "value": "cubic", - "description": "see scipy.interpolate.RegularGridInterpolator documentation", - }, - { - "value": "quintic", - "description": "see scipy.interpolate.RegularGridInterpolator documentation", - }, - { - "value": "pchip", - "description": "see scipy.interpolate.RegularGridInterpolator documentation", - }, - ], - }, - ], - } - ] - } - return res + + return available_methods diff --git a/backend/ibex/endpoints/schemas/request_data_schemas.py b/backend/ibex/endpoints/schemas/request_data_schemas.py new file mode 100644 index 00000000..f4b93643 --- /dev/null +++ b/backend/ibex/endpoints/schemas/request_data_schemas.py @@ -0,0 +1,114 @@ +from pydantic import BaseModel, Field, model_validator +from typing import Optional, List +from ibex.core.data_manipulation_methods import available_methods, SmoothingMethod +from enum import Enum + + +def _get_parameter_possible_values(parameter_name: str) -> list[str]: + """ + Helper function. Return allowed values declared for a top-level data manipulation parameter. + """ + for method in available_methods.data_manipulation_methods: + for method_parameter in method.method_parameters: + if method_parameter.name == parameter_name: + return [possible_value.value for possible_value in (method_parameter.possible_values or [])] + return [] + + +def _get_connected_parameter_possible_values(parameter_name: str) -> list[str]: + """ + Helper function. Returns allowed values for a connected parameter referenced by name. + :param parameter_name: + :return: + """ + for method in available_methods.data_manipulation_methods: + for method_parameter in method.method_parameters: + if not method_parameter.possible_values: + continue + + for possible_value in method_parameter.possible_values: + if not possible_value.additional_parameters: + continue + + for connected_parameter in possible_value.additional_parameters: + if connected_parameter.name == parameter_name: + return connected_parameter.possible_values or [] + + raise ValueError(f"Possible values for '{parameter_name}' not found") + + +# Create ENUM from savgol smoothing mode (for validation) +SavgolSmoothingMode = Enum( + "SavgolSmoothingMode", + {value.upper(): value for value in _get_connected_parameter_possible_values("savgol_smoothing_mode")}, + type=str, +) + +# ========== PLOT DATA ========== + + +class SavgolSmoothingParameters(BaseModel): + savgol_smoothing_window_length: int | None = Field( + default=None, + description="The length of the filter window (i.e., the number of coefficients). If mode is 'interp', window_length must be less than or equal to the size of x.", + ) + savgol_smoothing_polyorder: int | None = Field( + default=None, + description="The order of the polynomial used to fit the samples. polyorder must be less than window_length.", + ) + savgol_smoothing_deriv: int | None = Field( + default=None, + description="The order of the derivative to compute. This must be a nonnegative integer. The default is 0, which means to filter the data without differentiating.", + ) + savgol_smoothing_delta: float | None = Field( + default=None, + description="The spacing of the samples to which the filter will be applied. This is only used if deriv > 0. Default is 1.0.", + ) + savgol_smoothing_mode: SavgolSmoothingMode = Field( + default=SavgolSmoothingMode.INTERP, + description="Must be 'mirror', 'constant', 'nearest', 'wrap' or 'interp' (default).", + ) + savgol_smoothing_cval: float | None = Field( + default=None, + description="Value to fill past the edges of the input if mode is 'constant'. Default is 0.0.", + ) + + +class GaussianSmoothingParameters(BaseModel): + gaussian_smoothing_sigma: float | None = Field( + default=None, + description="Standard deviation for Gaussian kernel.", + ) + + +class PlotDataBasicParameters(BaseModel): + """...""" + + uri: str = Field(description="IMAS URI") + interpolate_over: Optional[List[str]] = Field( + default=None, description="List of IMAS URIs to be used in data interpolation" + ) + interpolation_method: str | None = Field(default=None, description="Interpolation method to be used") + downsampling_method: str | None = Field(default=None, description="Downsampling method to be used") + downsampled_size: int = Field(default=1000, description="Desired size of the data after downsampling") + smoothing_method: SmoothingMethod | None = Field(default=None, description="Smoothing method to be used") + + +class PlotDataRequestModel(PlotDataBasicParameters, SavgolSmoothingParameters, GaussianSmoothingParameters): + @model_validator(mode="after") + def validate_gaussian_smoothing_parameters(self) -> "PlotDataRequestModel": + if self.smoothing_method == SmoothingMethod.GAUSSIAN_FILTER and self.gaussian_smoothing_sigma is None: + raise ValueError("gaussian_smoothing_sigma is required when smoothing_method is 'gaussian_filter'") + + if self.smoothing_method == SmoothingMethod.SAVITZKY_GOLAY_FILTER: + if self.savgol_smoothing_window_length is None: + raise ValueError( + "savgol_smoothing_window_length is required when smoothing_method is 'savitzky_golay_filter'" + ) + + if self.savgol_smoothing_polyorder is None: + raise ValueError( + "savgol_smoothing_polyorder is required when smoothing_method is 'savitzky_golay_filter'" + ) + + return self diff --git a/backend/ibex/endpoints/schemas/data_entry_schemas.py b/backend/ibex/endpoints/schemas/response_data_entry_schemas.py similarity index 100% rename from backend/ibex/endpoints/schemas/data_entry_schemas.py rename to backend/ibex/endpoints/schemas/response_data_entry_schemas.py diff --git a/backend/ibex/endpoints/schemas/data_schemas.py b/backend/ibex/endpoints/schemas/response_data_schemas.py similarity index 100% rename from backend/ibex/endpoints/schemas/data_schemas.py rename to backend/ibex/endpoints/schemas/response_data_schemas.py diff --git a/backend/ibex/endpoints/schemas/ids_info_schemas.py b/backend/ibex/endpoints/schemas/response_ids_info_schemas.py similarity index 100% rename from backend/ibex/endpoints/schemas/ids_info_schemas.py rename to backend/ibex/endpoints/schemas/response_ids_info_schemas.py diff --git a/backend/ibex/endpoints/schemas/response_info_schemas.py b/backend/ibex/endpoints/schemas/response_info_schemas.py new file mode 100644 index 00000000..c6bd4e02 --- /dev/null +++ b/backend/ibex/endpoints/schemas/response_info_schemas.py @@ -0,0 +1,28 @@ +from pydantic import BaseModel, Field +from ibex.core import data_manipulation_methods + + +# ========== VERSION ========== +class VersionResponse(BaseModel): + """Response for /info/version endpoint""" + + version: str = Field(description="IBEX version", examples=["0.0.1", "1.0.2"]) + + +# ========== DOWNSAMPLING METHODS ========== +class DownsamplingMethodModel(BaseModel): + """Intermediate model for /info/downsampling_methods endpoint""" + + name: str = Field(description="Method name", examples=["STEP", "STEP_AVERAGE"]) + description: str = Field(description="Method description", examples=["Simple step algorithm"]) + + +class DownsamplingMethodsResponse(BaseModel): + """Response for /info/downsampling_methods endpoint""" + + downsampling_methods: list[DownsamplingMethodModel] = Field(description="Available downsampling methods") + + +# ========== DATA MANIPULATION METHODS ========== + +DataManipulationMethodsResponse = data_manipulation_methods.DataManipulationMethodsResponse diff --git a/backend/tests/conftest.py b/backend/tests/conftest.py index 220783e1..bb49da26 100644 --- a/backend/tests/conftest.py +++ b/backend/tests/conftest.py @@ -26,8 +26,8 @@ def interpolation_entry_path_directory(tmp_path_factory): ts.profiles_2d.resize(2) for p2d in ts.profiles_2d: p2d.psi = np.asarray(np.random.rand(3, 3)) - p2d.grid.dim1 = np.asarray([1.0, 2.0, 3.0]) - p2d.grid.dim2 = np.asarray([1.0, 2.0, 3.0]) + p2d.grid.dim1 = np.asarray([1.0, 2.0, 3.0], dtype=float) + p2d.grid.dim2 = np.asarray([1.0, 2.0, 3.0], dtype=float) entry.put(eq) with imas.DBEntry(f"imas:hdf5?path={tmp_path}/interpolation_db_2", mode="w") as entry: @@ -44,8 +44,8 @@ def interpolation_entry_path_directory(tmp_path_factory): ts.profiles_2d.resize(4) for p2d in ts.profiles_2d: p2d.psi = np.asarray(np.random.rand(9, 3)) - p2d.grid.dim1 = np.asarray([0.3, 0.6, 0.9, 1.2, 1.5, 1.8, 2.1, 2.4, 2.7]) - p2d.grid.dim2 = np.asarray([1.0, 2.0, 3.0]) + p2d.grid.dim1 = np.asarray([0.3, 0.6, 0.9, 1.2, 1.5, 1.8, 2.1, 2.4, 2.7], dtype=float) + p2d.grid.dim2 = np.asarray([1.0, 2.0, 3.0], dtype=float) entry.put(eq) return tmp_path @@ -85,12 +85,32 @@ def entry_path(tmp_path_factory): for ion in profiles_2d.ion: ion.name = f"random ion name {i}" - ion.temperature = np.array([[i, +1, i + 2], [i + 10, i + 11, i + 12], [i + 20, i + 21, i + 32]]) - profiles_2d.grid.dim1 = np.array([0, 1, 2]) - profiles_2d.grid.dim2 = np.array([0, 1, 2]) + ion.temperature = np.array( + [[i, +1, i + 2], [i + 10, i + 11, i + 12], [i + 20, i + 21, i + 32]], dtype=float + ) + profiles_2d.grid.dim1 = np.array([0, 1, 2], dtype=float) + profiles_2d.grid.dim2 = np.array([0, 1, 2], dtype=float) i += 10 + # ===== for data smoothing (must be time-based) ===== + core_profiles.global_quantities.ip = np.array([1.0, 2.0, 3.0, 4.0, 5.0]) + entry.put(core_profiles) - entry.close() + # ===== for data smoothing 2D (one of coordinates is time) ===== + + wall = entry.factory.wall() + wall.ids_properties.homogeneous_time = 1 + + wall.time = np.array(range(1, 6), dtype=float) + wall.global_quantities.electrons.particle_flux_from_wall = np.array( + [ + [1, 3, 2, 4, 3], + [1, 1, 1, 1, 1], + [1, 1, 1, 1, 1], + ] + ) + entry.put(wall) + + entry.close() return tmp_path diff --git a/backend/tests/test_data_endpoints.py b/backend/tests/test_data_endpoints.py index 5ddd8f51..dde9b446 100644 --- a/backend/tests/test_data_endpoints.py +++ b/backend/tests/test_data_endpoints.py @@ -1,4 +1,5 @@ import pytest +import numpy as np def test_status_codes(entry_path): @@ -69,6 +70,59 @@ def test_plot_data(entry_path): assert time_coordinate["description"] == "Generic time" +def test_plot_data_with_gaussian_smoothing(entry_path): + parameters = { + "uri": f"imas:hdf5?path={entry_path}#core_profiles/global_quantities/ip", + "smoothing_method": "gaussian_filter", + "gaussian_smoothing_sigma": 1, + } + response = pytest.test_client.get("/data/plot_data", params=parameters) + assert response.status_code == 200 + + response_body = response.json() + assert response_body["data"]["value"] == pytest.approx([1.42, 2.06, 3.0, 3.93, 4.57], 0.1) + + +def test_plot_data_with_gaussian_smoothing_2d(entry_path): + parameters = { + "uri": f"imas:hdf5?path={entry_path}#wall/global_quantities/electrons/particle_flux_from_wall", + "smoothing_method": "gaussian_filter", + "gaussian_smoothing_sigma": 1, + } + response = pytest.test_client.get("/data/plot_data", params=parameters) + assert response.status_code == 200 + + response_body = response.json() + + assert np.array(response_body["data"]["value"]) == pytest.approx( + np.array([[1.6, 1.0, 1.0], [2.2, 1.0, 1.0], [2.7, 1.0, 1.0], [3.1, 1.0, 1.0], [3.2, 1.0, 1.0]]), 0.1 + ) + + +def test_plot_data_with_savgol_smoothing(entry_path): + parameters = { + "uri": f"imas:hdf5?path={entry_path}#core_profiles/global_quantities/ip", + "smoothing_method": "savitzky-golay_filter", + "savgol_smoothing_window_length": 5, + "savgol_smoothing_polyorder": 2, + } + response = pytest.test_client.get("/data/plot_data", params=parameters) + assert response.status_code == 200 + + response_body = response.json() + assert response_body["data"]["value"] == pytest.approx([0.99, 2.0, 3.0, 4.0, 5.0], 0.1) + + +def test_plot_data_smoothing_with_wrong_target_node(entry_path): + parameters = { + "uri": f"imas:hdf5?path={entry_path}#core_profiles/time", # targetet quantity must be time-based + "smoothing_method": "gaussian_filter", + "gaussian_smoothing_sigma": 1, + } + response = pytest.test_client.get("/data/plot_data", params=parameters) + assert response.status_code == 466 + + def test_plot_data_2d(entry_path): parameters = { "uri": f"imas:hdf5?path={entry_path}#core_profiles/profiles_2d[:]/ion[:]/temperature", @@ -122,3 +176,32 @@ def test_plot_data_1_N_coord(entry_path): assert numeric_coordinate["ndim"] == 1 assert numeric_coordinate["path"] == "" assert numeric_coordinate["description"] == "1...N" + + +def test_plot_data_requires_gaussian_sigma(entry_path): + parameters = { + "uri": f"imas:hdf5?path={entry_path}#core_profiles/profiles_1d[:]/time", + "smoothing_method": "gaussian_filter", + } + response = pytest.test_client.get("/data/plot_data", params=parameters) + + assert response.status_code == 422 + assert "gaussian_smoothing_sigma is required" in response.text + + +def test_plot_data_requires_savgol_window_length_and_polyorder(entry_path): + base_parameters = { + "uri": f"imas:hdf5?path={entry_path}#core_profiles/profiles_1d[:]/time", + "smoothing_method": "savitzky-golay_filter", + } + + response = pytest.test_client.get("/data/plot_data", params=base_parameters) + assert response.status_code == 422 + assert "savgol_smoothing_window_length is required" in response.text + + response = pytest.test_client.get( + "/data/plot_data", + params={**base_parameters, "savgol_smoothing_window_length": 5}, + ) + assert response.status_code == 422 + assert "savgol_smoothing_polyorder is required" in response.text diff --git a/backend/tests/test_data_manipulation.py b/backend/tests/test_data_manipulation.py new file mode 100644 index 00000000..c987bc9f --- /dev/null +++ b/backend/tests/test_data_manipulation.py @@ -0,0 +1,49 @@ +import numpy as np +import pytest +from ibex.data_source.imas_python_source_utils import ( + apply_gaussian_filter, + apply_savgol_filter, +) + + +def test_apply_gaussian_smoothing(): + data = np.array([10.25, 12.8, 15.4, 18.15, 21.0, 24.35, 27.6, 30.2, 33.75, 36.1]) + + expected_sigma_1 = [11.343, 13.002, 15.479, 18.228, 21.18, 24.309, 27.414, 30.417, 33.211, 35.019] + expected_sigma_2 = [13.286, 14.311, 16.176, 18.615, 21.385, 24.291, 27.152, 29.743, 31.761, 32.881] + + assert expected_sigma_1 == pytest.approx(apply_gaussian_filter(data, sigma=1), 0.1) + assert expected_sigma_2 == pytest.approx(apply_gaussian_filter(data, sigma=2), 0.1) + + +def test_apply_savitzky_golay_smoothing(): + data = np.array([10.25, 12.8, 15.4, 18.15, 21.0, 24.35, 27.6, 30.2, 33.75, 36.1]) + + expected_window_5_poly_2 = [10.257, 12.781, 15.413, 18.111, 21.086, 24.346, 27.416, 30.521, 33.426, 36.209] + expected_window_7_poly_3_deriv_1 = [2.533, 4.694, 5.717, 5.69, 6.303, 6.242, 6.338, 6.442, 5.184, 2.735] + + assert expected_window_5_poly_2 == pytest.approx( + apply_savgol_filter( + data, + window_length=5, + polyorder=2, + deriv=0, + delta=1.0, + mode="interp", + cval=0.0, + ), + 0.1, + ) + + assert expected_window_7_poly_3_deriv_1 == pytest.approx( + apply_savgol_filter( + data, + window_length=7, + polyorder=3, + deriv=1, + delta=0.5, + mode="nearest", + cval=0.0, + ), + 0.1, + ) diff --git a/backend/tests/test_ids_info_endpoints.py b/backend/tests/test_ids_info_endpoints.py index 5905a6cd..94fe8b48 100644 --- a/backend/tests/test_ids_info_endpoints.py +++ b/backend/tests/test_ids_info_endpoints.py @@ -91,6 +91,9 @@ def test_find_paths(entry_path): {"path": "#core_profiles/ids_properties/version_put/data_dictionary", "has_data": has_data_true}, {"path": "#core_profiles/ids_properties/version_put/access_layer", "has_data": has_data_true}, {"path": "#core_profiles/ids_properties/version_put/access_layer_language", "has_data": has_data_true}, + {"path": "#wall/ids_properties/version_put/data_dictionary", "has_data": has_data_true}, + {"path": "#wall/ids_properties/version_put/access_layer", "has_data": has_data_true}, + {"path": "#wall/ids_properties/version_put/access_layer_language", "has_data": has_data_true}, ] diff --git a/docs/source/_templates/custom-class-template.rst b/docs/source/_templates/custom-class-template.rst index b29757c5..0ad8c217 100644 --- a/docs/source/_templates/custom-class-template.rst +++ b/docs/source/_templates/custom-class-template.rst @@ -8,14 +8,14 @@ :inherited-members: {% block methods %} - .. automethod:: __init__ - {% if methods %} .. rubric:: {{ _('Methods') }} .. autosummary:: {% for item in methods %} + {% if item != '__init__' %} ~{{ name }}.{{ item }} + {% endif %} {%- endfor %} {% endif %} {% endblock %} diff --git a/docs/source/_templates/custom-module-template.rst b/docs/source/_templates/custom-module-template.rst index bd69dd82..89a9dbdc 100644 --- a/docs/source/_templates/custom-module-template.rst +++ b/docs/source/_templates/custom-module-template.rst @@ -51,15 +51,39 @@ {% endif %} {% endblock %} -{% block modules %} -{% if modules %} -.. rubric:: Modules + {% block modules %} + {% if modules %} + .. rubric:: Modules -.. autosummary:: - :toctree: - :template: custom-module-template.rst -{% for item in modules | reject("equalto", "test") %} - {{ item }} -{%- endfor %} -{% endif %} -{% endblock %} + {% set schema_modules = [] %} + {% set regular_modules = [] %} + {% for item in modules | reject("equalto", "test") %} + {% if item == "ibex.endpoints.schemas" %} + {% set _ = schema_modules.append(item) %} + {% else %} + {% set _ = regular_modules.append(item) %} + {% endif %} + {% endfor %} + + {% if regular_modules %} + .. autosummary:: + :toctree: + :template: custom-module-template.rst + :recursive: + {% for item in regular_modules %} + {{ item }} + {%- endfor %} + {% endif %} + + {% if schema_modules %} + .. autosummary:: + :toctree: + :template: custom-pydantic-model-template.rst + :recursive: + {% for item in schema_modules %} + {{ item }} + {%- endfor %} + {% endif %} + + {% endif %} + {% endblock %} diff --git a/docs/source/_templates/custom-pydantic-model-template.rst b/docs/source/_templates/custom-pydantic-model-template.rst new file mode 100644 index 00000000..fe699057 --- /dev/null +++ b/docs/source/_templates/custom-pydantic-model-template.rst @@ -0,0 +1,47 @@ +{{ fullname | escape | underline}} + +{% if objtype in ['module', 'package'] %} +.. automodule:: {{ fullname }} + + {% if modules %} + .. rubric:: Modules + + .. autosummary:: + :toctree: + :template: custom-pydantic-model-template.rst + :recursive: + {% for item in modules %} + {{ item }} + {%- endfor %} + {% endif %} + + {% set public_members = [] %} + {% for item in members %} + {% if not item.startswith('_') %} + {% set _ = public_members.append(item) %} + {% endif %} + {% endfor %} + + {% if public_members %} + .. rubric:: Models + + .. autosummary:: + :toctree: + :template: custom-pydantic-model-template.rst + {% for item in public_members %} + {{ fullname }}.{{ item }} + {%- endfor %} + {% endif %} + +{% elif objtype == 'pydantic_model' %} +.. currentmodule:: {{ module }} + +.. autopydantic_model:: {{ objname }} + :members: + :undoc-members: + :model-summary-list-order: bysource + :model-show-validator-members: False + :model-show-validator-summary: False + :model-show-config-summary: False + :model-show-json: False +{% endif %} \ No newline at end of file diff --git a/docs/source/developers_manual/backend_development/adding_new_data_manipulation_method.rst b/docs/source/developers_manual/backend_development/adding_new_data_manipulation_method.rst new file mode 100644 index 00000000..287aefde --- /dev/null +++ b/docs/source/developers_manual/backend_development/adding_new_data_manipulation_method.rst @@ -0,0 +1,51 @@ +.. _`Adding new data manipulation method`: + + +Adding a new data manipulation method +-------------------------------------- + +Adding a new data manipulation operation requires updates in three places: the request model, the operation description registry, and the backend execution path. + +Request model +~~~~~~~~~~~~~~ + +Expose the new operation through the request schema in ``backend/ibex/endpoints/schemas/request_data_schemas.py``. + +In practice this usually means: + +* adding a new top-level selector field to ``PlotDataBasicParameters`` if the operation introduces a new method family +* adding a dedicated parameter model when the operation needs extra configuration fields +* extending ``PlotDataRequestModel`` so the new parameters are accepted by ``/data/plot_data/`` +* adding validation in a ``model_validator`` when some parameters are required only for specific operation modes + +This is the layer that defines which query parameters are accepted and how they are validated before the request reaches the data source. + +Operation description registry +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Register the operation in ``backend/ibex/core/data_manipulation_methods.py``. + +This file provides the metadata returned by ``/info/data_manipulation_methods/``, so every new operation should be described there using: + +* ``DataManipulationOperation`` for the operation itself +* ``DataManipulationParameter`` for top-level request parameters +* ``PossibleValue`` for supported modes or variants +* ``AdditionalParameter`` for parameters that are only relevant to a specific mode + +This description should match the request schema exactly. +If a parameter is accepted by ``PlotDataRequestModel``, it should also be reflected here so that the API can describe it consistently. + +Backend execution +~~~~~~~~~~~~~~~~~~ + +Implement the actual operation in ``backend/ibex/data_source/imas_python_source.py``. + +This is where the backend transforms the numerical data returned from the IDS source. + +When adding a new operation: + +* read the parameters from ``plot_data_query`` +* apply the transformation to ``data_to_be_returned`` + +If the logic becomes substantial or reusable, the numerical transformation itself should be extracted into a helper function and then called from the data source flow. + diff --git a/docs/source/developers_manual/backend_development/backend_development.rst b/docs/source/developers_manual/backend_development/backend_development.rst index 6dc22a94..1a06293d 100644 --- a/docs/source/developers_manual/backend_development/backend_development.rst +++ b/docs/source/developers_manual/backend_development/backend_development.rst @@ -6,6 +6,8 @@ Backend development backend_development_introduction data_interpolation + data_manipulation + adding_new_data_manipulation_method adding_new_data_source adding_new_downsampling_method benchmarking diff --git a/docs/source/developers_manual/backend_development/data_manipulation.rst b/docs/source/developers_manual/backend_development/data_manipulation.rst new file mode 100644 index 00000000..dc08be09 --- /dev/null +++ b/docs/source/developers_manual/backend_development/data_manipulation.rst @@ -0,0 +1,87 @@ +.. _`Data manipulation`: + +====================== +Data manipulation +====================== + +Introduction +------------- + +The IBEX backend provides a range of data manipulation techniques that directly affect the shape and appearance of the resulting plots. +These operations are applied as part of the ``/data/plot_data/`` request flow and allow the backend to transform datasets before they are returned to the frontend. + +Data smoothing +--------------- + +IBEX supports smoothing and denoising of returned datasets. +This functionality is intended for cases where the raw signal contains high-frequency noise and a filtered representation is preferred for visualization or analysis. + +Configuration +~~~~~~~~~~~~~~ + +Data smoothing is configured through the ``smoothing_method`` parameter of the ``/data/plot_data/`` endpoint. +It is only accepted for nodes whose first coordinate is ``time``. +If a different first coordinate is used, the backend rejects the request with an invalid-parameters error. + +At the moment, the backend supports the following smoothing methods: + +* ``gaussian_filter`` +* ``savitzky_golay_filter`` + +The full list of available methods and their parameters can be retrieved from the ``/info/data_manipulation_methods/`` endpoint. + +Gaussian smoothing +~~~~~~~~~~~~~~~~~~~ + +The ``gaussian_filter`` method applies a Gaussian kernel to the returned data. +It requires the ``gaussian_smoothing_sigma`` parameter, which defines the standard deviation of the Gaussian kernel. + + +Savitzky-Golay smoothing +~~~~~~~~~~~~~~~~~~~~~~~~~ + +The ``savitzky_golay_filter`` method applies a Savitzky-Golay filter to the returned data. +This approach smooths the data while preserving local shape better than a simple Gaussian filter in many cases. + +The following parameters are supported: + +* ``savgol_smoothing_window_length``: required +* ``savgol_smoothing_polyorder``: required +* ``savgol_smoothing_deriv``: optional +* ``savgol_smoothing_delta``: optional +* ``savgol_smoothing_mode``: optional, one of ``mirror``, ``constant``, ``nearest``, ``wrap``, ``interp`` +* ``savgol_smoothing_cval``: optional + + +Implementation +~~~~~~~~~~~~~~~ + +Data smoothing is applied in the backend after the raw IDS data has been converted to a NumPy array and after the internal 2D data transformation step, when applicable. + +The current implementation uses SciPy-based smoothing routines: + +* Gaussian smoothing is applied with a Gaussian filter implementation. +* Savitzky-Golay smoothing is applied with a Savitzky-Golay filter implementation. + +Because smoothing modifies the returned numerical values, it should be treated as a visualization-oriented transformation and not as a lossless representation of the original dataset. + +Example usage +~~~~~~~~~~~~~~ + +The following examples demonstrate how smoothing can be enabled for testing purposes. + +Gaussian smoothing: + +.. code-block:: bash + + curl -X 'GET' \ + '/data/plot_data?uri=&smoothing_method=gaussian_filter&gaussian_smoothing_sigma=1.0' \ + -H 'accept: application/json' + +Savitzky-Golay smoothing: + +.. code-block:: bash + + curl -X 'GET' \ + '/data/plot_data?uri=&smoothing_method=savitzky_golay_filter&savgol_smoothing_window_length=5&savgol_smoothing_polyorder=2' \ + -H 'accept: application/json'