Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
7284f0d
Improve pydantic models #520
joshdimanteto Jan 9, 2026
24be733
Merge branch 'upgrade-to-pydantic-V2-#520' into refactor-dg-api-model…
joshdimanteto Jan 9, 2026
f0b4bf6
refactor: datagateway api model to use pydantic V2 #522
joshdimanteto Jan 9, 2026
d9bd46f
use endpoint_dict for swagger #522
joshdimanteto Jan 11, 2026
d53c565
Auto generated pydantic using ICAT entity type #519
joshdimanteto Jan 15, 2026
2492b49
add docstring to build_models #522
joshdimanteto Jan 16, 2026
cc387dc
Fix minor bugs
joshdimanteto Jan 16, 2026
6a04939
Merge branch 'upgrade-to-pydantic-V2-#520' into refactor-dg-api-model…
joshdimanteto Feb 25, 2026
6896015
make the relational fields optional on schemas #522
joshdimanteto Feb 25, 2026
1f4707f
Merge branch 'upgrade-to-pydantic-V2-#520' into refactor-dg-api-model…
joshdimanteto Feb 25, 2026
57cb6fb
fix linting and failing integration test #522
joshdimanteto Feb 25, 2026
6153cf2
Make all field value schema optional #522
joshdimanteto Feb 25, 2026
7a007ee
fix linting #522
joshdimanteto Feb 25, 2026
edfcce6
Merge branch 'upgrade-to-pydantic-V2-#520' into refactor-dg-api-model…
joshdimanteto Mar 2, 2026
bf4bd8a
Address review comments #522
joshdimanteto Mar 2, 2026
40b50da
Merge branch 'upgrade-to-pydantic-V2-#520' into refactor-dg-api-model…
joshdimanteto Mar 2, 2026
e8e0b9f
Merge branch 'upgrade-to-pydantic-V2-#520' into refactor-dg-api-model…
joshdimanteto Mar 12, 2026
bf6083f
Merge branch 'upgrade-to-pydantic-V2-#520' into refactor-dg-api-model…
joshdimanteto Mar 13, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 3 additions & 10 deletions datagateway_api/src/api_start_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,6 @@
from flask_swagger_ui import get_swaggerui_blueprint

from datagateway_api.src.common.config import Config
from datagateway_api.src.datagateway_api.icat.icat_client_pool import create_client_pool
from datagateway_api.src.datagateway_api.icat.python_icat import PythonICAT
from datagateway_api.src.resources.entities.entity_endpoint import (
get_count_endpoint,
get_endpoint,
Expand Down Expand Up @@ -102,15 +100,15 @@ def create_search_api_spec():
)


def create_app_infrastructure(flask_app):
def create_app_infrastructure(flask_app, datagateway_model_list):
CORS(flask_app)
flask_app.url_map.strict_slashes = False
api = CustomErrorHandledApi(flask_app)
specs = []
if Config.config.datagateway_api is not None:
configure_datagateway_api_swaggerui_blueprint(flask_app)
datagateway_api_spec = create_datagateway_api_spec()
initialise_datagateway_api_spec(datagateway_api_spec)
initialise_datagateway_api_spec(datagateway_api_spec, datagateway_model_list)
specs.append(datagateway_api_spec)
if Config.config.search_api is not None:
configure_search_api_swaggerui_blueprint(flask_app)
Expand All @@ -121,19 +119,14 @@ def create_app_infrastructure(flask_app):
return api, specs


def create_api_endpoints(flask_app, api, specs):
def create_api_endpoints(flask_app, api, specs, python_icat, icat_client_pool):
# DataGateway API endpoints
if Config.config.datagateway_api is not None:
datagateway_api_spec = next(
(spec for spec in specs if spec.title == "DataGateway API"),
None,
)

python_icat = PythonICAT()

# Create client pool
icat_client_pool = create_client_pool()

datagateway_api_extension = Config.config.datagateway_api.extension
for entity_name in endpoints:
get_endpoint_resource = get_endpoint(
Expand Down
11 changes: 10 additions & 1 deletion datagateway_api/src/common/date_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,22 @@ class DateHandler:
def is_str_a_date(potential_date):
"""
This function identifies if a string contains a date. This function doesn't
detect which format the date is, just if there's a date or not.
detect which format the date is, just if there's a date or not. An additional
check is performed to ensure purely numeric strings (e.g. "5", "20200101") are
not incorrectly treated as dates.


:param potential_date: String data that could contain a date of any format
:type potential_date: :class:`str`
:return: Boolean to signify whether `potential_date` is a date or not
"""

text = potential_date.strip()

# Reject if the string is just digits (like "5" or "20200101")
if text.isdigit():
return False

try:
# Disabled fuzzy to avoid picking up dates in things like descriptions etc.
parse(potential_date, fuzzy=False)
Expand Down
31 changes: 3 additions & 28 deletions datagateway_api/src/common/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,9 @@

from flask import request
from flask_restful import reqparse
from pydantic import ValidationError
import requests
from sqlalchemy.exc import IntegrityError


from datagateway_api.src.common.date_handler import DateHandler
from datagateway_api.src.common.exceptions import (
Expand All @@ -16,8 +17,6 @@
FilterError,
MissingCredentialsError,
)
from datagateway_api.src.datagateway_api.icat import models
from datagateway_api.src.resources.entities.entity_endpoint_dict import endpoints

log = logging.getLogger()

Expand All @@ -43,7 +42,7 @@ def wrapper_gets_records(*args, **kwargs):
except TypeError as e:
log.exception(e.args)
raise BadRequestError() from e
except IntegrityError as e:
except ValidationError as e:
log.exception(e.args)
raise BadRequestError() from e

Expand Down Expand Up @@ -129,30 +128,6 @@ def get_filters_from_query_string(api_type, entity_name=None):
raise FilterError(e) from e


def get_entity_object_from_name(entity_name):
"""
From an entity name, this function gets a Python version of that entity for the
Python ICAT

:param entity_name: Name of the entity to fetch a version from this model
:type entity_name: :class:`str`
:return: Object of the entity requested (e.g.
:class:`.datagateway_api.icat.models.INVESTIGATIONINSTRUMENT`)
:raises: KeyError: If an entity model cannot be found as a class in this model
"""
try:
# If a plural is given, fetch the singular field name
if entity_name[-1] == "s":
entity_name = entity_name[0].upper() + entity_name[1:]
entity_name = endpoints[entity_name]

return getattr(models, entity_name.upper())
except KeyError as e:
raise ApiError(
f"Entity class cannot be found, missing class for {entity_name}",
) from e


def get_icat_properties(icat_url, icat_check_cert):
"""
ICAT properties can be retrieved using Python ICAT's client object, however this
Expand Down
165 changes: 165 additions & 0 deletions datagateway_api/src/datagateway_api/build_models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
from datetime import datetime
import logging
from typing import Annotated, List, Optional, Union

from icat.exception import ICATError
from pydantic import BaseModel, create_model, Field

from datagateway_api.src.common.exceptions import PythonICATError
from datagateway_api.src.datagateway_api.icat.helpers import get_cached_client

log = logging.getLogger()


TYPE_MAP = {
"String": str,
"Long": int,
"Date": str,
"Boolean": bool,
"Double": float,
}

SYSTEM_FIELDS = {
"id",
"createId",
"modId",
"createTime",
"modTime",
}


class ICATId(BaseModel):
id_: Annotated[Optional[int], Field(None, alias="id")]


class ICATBaseEntity(ICATId):
create_id: Annotated[Optional[str], Field(None, alias="createId")]
create_time: Annotated[Optional[datetime], Field(None, alias="createTime")]
mod_id: Annotated[Optional[str], Field(None, alias="modId")]
mod_time: Annotated[Optional[datetime], Field(None, alias="modTime")]


def build_datagateway_api_model(**kwargs):
"""
Dynamically construct Pydantic models for all ICAT entities exposed by the
connected ICAT server.

This function queries the ICAT server for its schema (entity names, fields,
types, relationships, and nullability) and generates a set of Pydantic
models representing:

- The base entity model for each ICAT entity (e.g. `Investigation`)
- A corresponding POST model for creation (e.g. `InvestigationPost`)
- A corresponding PATCH model for partial updates (e.g. `InvestigationPatch`)

Relationship fields (ONE or MANY) are converted into either model references
or lists of ICAT IDs. Attribute fields are mapped to Python/Pydantic primitive
types according to the TYPE_MAP. Optionality and nullability are not strictly
preserved for all generated fields, as values support the distinct filter
operator, which may request one or many values from a given object. Field
descriptions from ICAT, when available, are carried over into the model metadata.

All generated models are finally rebuilt (`model_rebuild`) using the full
model namespace so that forward references between models resolve correctly.

Parameters
----------
**kwargs :
Optional configuration parameters. Expected keys:
- `client_pool`: A pool or cache of ICAT clients, passed into
`get_cached_client`.

Returns
-------
dict
A dictionary mapping model names (e.g. `"Investigation"`,
`"InvestigationPost"`, `"InvestigationPatch"`) to their corresponding
dynamically generated Pydantic model classes.

Raises
------
PythonICATError
If the ICAT server reports an error while fetching entity names or
entity schema information.

Notes
-----
- Models include metadata (via `Annotated[... , Field(...)]`) for descriptions.
- SYSTEM_FIELDS are always excluded from the generated models.
- Relationship fields use forward references and are resolved at the end of
generation.
- The POST and PATCH models differ by optionality and update semantics.

"""

log.info("Building datagateway models")

datagateway_api_models = {}

client_pool = kwargs.get("client_pool")
client = get_cached_client(None, client_pool)

try:
entity_names = client.getEntityNames()
except ICATError as e:
raise PythonICATError(e) from e

for name in entity_names:
info = client.getEntityInfo(name)
fields = {}
post_fields = {}
post_name = f"{name}Post"
patch_name = f"{name}Patch"
for field in info.fields:

if field.name in SYSTEM_FIELDS:
continue

if field.relType == "ATTRIBUTE":
field_type = TYPE_MAP.get(field.type, str)
optional_field_type = Optional[field_type]

description = getattr(field, "comment", None)
field_metadata = Field(description=description)
optional_annotated_type = Annotated[optional_field_type, field_metadata]

fields[field.name] = (optional_annotated_type, None)
post_fields[field.name] = (optional_annotated_type, None)

else:
rel_model_name = field.type
if field.relType == "MANY":
rel_type_str = f"List['{rel_model_name}']" # noqa: B907
post_type = f"List['{rel_model_name}Post']" # noqa: B907
else:
rel_type_str = f"'{rel_model_name}'" # noqa: B907
post_type = int

optional_type = Optional[post_type]
rel_type_str = f"Optional[{rel_type_str}]"

description = getattr(field, "comment", None)
field_metadata = Field(description=description)
annotated_type = Annotated[rel_type_str, field_metadata]
optional_annotated_type = Annotated[optional_type, field_metadata]
fields[field.name] = (annotated_type, None)
post_fields[field.name] = (optional_annotated_type, None)

model = create_model(name, __base__=ICATBaseEntity, **fields)
post_model = create_model(post_name, **post_fields)
patch_model = create_model(patch_name, __base__=ICATId, **post_fields)
datagateway_api_models[name] = model
datagateway_api_models[post_name] = post_model
datagateway_api_models[patch_name] = patch_model

for model in datagateway_api_models.values():
types_namespace = {
**datagateway_api_models,
"List": List,
"Optional": Optional,
"Union": Union,
}
model.model_rebuild(_types_namespace=types_namespace)

log.info("Finished building all datagateway models")
return datagateway_api_models
17 changes: 17 additions & 0 deletions datagateway_api/src/datagateway_api/icat/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -573,6 +573,23 @@ def create_entities(client, entity_type, data):
else:
# This means the attribute has a relationship with another object
try:
# TODO:
# The field "value" can be either List[TYPE] or int,
# but only the single-object case works correctly.
#
# When a field requires a list of objects, the API fails
# because the list type is not handled during creation.
#
# Even when forcing it to work by using the wrong type, the
# GET request still does not return the one-to-one related
# values (e.g. "Facility f INCLUDE f.parameterTypes").
#
# After attempting to fix the GET behaviour, the create
# operation now throws a duplicate reference error when
# saving related entities.
#
# Fix list handling, one-to-one include behaviour, and
# duplicate reference errors.
Comment on lines +576 to +592
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is fix in #519

related_object = client.get(entity_info.type, value)
except ICATNoObjectError as e:
raise BadRequestError(e) from e
Expand Down
Loading
Loading