diff --git a/src/uipath_langchain/agent/react/json_utils.py b/src/uipath_langchain/agent/react/json_utils.py index 021a90f45..2b0e82799 100644 --- a/src/uipath_langchain/agent/react/json_utils.py +++ b/src/uipath_langchain/agent/react/json_utils.py @@ -50,10 +50,11 @@ def _recursive_search( for field_name, field_info in current_model.model_fields.items(): annotation = field_info.annotation + json_key = _json_key(field_name, field_info) if current_path: - field_path = f"{current_path}.{field_name}" + field_path = f"{current_path}.{json_key}" else: - field_path = f"$.{field_name}" + field_path = f"$.{json_key}" annotation = _unwrap_optional(annotation) origin = get_origin(annotation) @@ -117,7 +118,7 @@ def extract_values_by_paths( >>> _extract_values_by_paths(obj, paths) [{'id': '123'}, {'id': '456'}, {'id': '789'}] """ - data = obj.model_dump() if isinstance(obj, BaseModel) else obj + data = obj.model_dump(by_alias=True) if isinstance(obj, BaseModel) else obj results = [] for json_path in json_paths: @@ -204,5 +205,15 @@ def _unwrap_lists(annotation: Any) -> tuple[Any, str]: return annotation, suffix +def _json_key(field_name: str, field_info: Any) -> str: + """Get the JSON property name for a field, accounting for aliases. + + When fields are renamed for Pydantic compatibility (e.g. ``_hidden`` → ``hidden_``), + the serialization alias holds the original JSON Schema property name. JSONPath + expressions must use that original name so they match keys in serialized dicts. + """ + return field_info.serialization_alias or field_info.alias or field_name + + def _is_pydantic_model(annotation: Any) -> bool: return isinstance(annotation, type) and issubclass(annotation, BaseModel) diff --git a/src/uipath_langchain/agent/react/jsonschema_pydantic_converter.py b/src/uipath_langchain/agent/react/jsonschema_pydantic_converter.py index c45829a67..95bc650bd 100644 --- a/src/uipath_langchain/agent/react/jsonschema_pydantic_converter.py +++ b/src/uipath_langchain/agent/react/jsonschema_pydantic_converter.py @@ -5,14 +5,17 @@ from jsonschema_pydantic_converter import transform_with_modules from pydantic import BaseModel -from uipath.runtime.errors import UiPathErrorCategory - -from uipath_langchain.agent.exceptions import AgentStartupError, AgentStartupErrorCode # Shared pseudo-module for all dynamically created types # This allows get_type_hints() to resolve forward references _DYNAMIC_MODULE_NAME = "jsonschema_pydantic_converter._dynamic" +# Field names that shadow BaseModel attributes and must be renamed. +# Computed from BaseModel's public interface to stay future-proof across Pydantic versions. +_RESERVED_FIELD_NAMES: frozenset[str] = frozenset( + name for name in dir(BaseModel) if not name.startswith("_") +) + def _get_or_create_dynamic_module() -> ModuleType: """Get or create the shared pseudo-module for dynamic types.""" @@ -25,18 +28,144 @@ def _get_or_create_dynamic_module() -> ModuleType: return sys.modules[_DYNAMIC_MODULE_NAME] +def _needs_rename(name: str) -> bool: + """Check if a JSON Schema property name needs renaming for Pydantic compatibility.""" + return name.startswith("_") or name in _RESERVED_FIELD_NAMES + + +def _safe_field_name( + original: str, existing_keys: set[str], used_keys: set[str] +) -> str: + """Generate a Pydantic-safe field name from a JSON Schema property name. + + Strips leading underscores and avoids collisions with BaseModel attributes + and other property names (both original and already-renamed). + """ + name = original.lstrip("_") or "field" + if name in _RESERVED_FIELD_NAMES: + name += "_" + while name in existing_keys or name in used_keys: + name += "_" + return name + + +def _rename_reserved_properties( + schema: dict[str, Any], +) -> tuple[dict[str, Any], dict[str, str]]: + """Rename JSON Schema properties that are invalid as Pydantic field names. + + Handles two cases: + - Properties starting with ``_`` (Pydantic treats these as private attributes) + - Properties that shadow ``BaseModel`` attributes (e.g. ``schema``, ``copy``) + + Returns: + Tuple of (modified schema copy, {new_field_name: original_name}). + """ + renames: dict[str, str] = {} + + def _process(s: dict[str, Any]) -> dict[str, Any]: + result = s.copy() + + if "properties" in result: + existing_keys = set(result["properties"].keys()) + used_keys: set[str] = set() + new_props: dict[str, Any] = {} + + for key, value in result["properties"].items(): + if _needs_rename(key): + new_key = _safe_field_name(key, existing_keys, used_keys) + renames[new_key] = key + else: + new_key = key + + used_keys.add(new_key) + new_props[new_key] = ( + _process(value) if isinstance(value, dict) else value + ) + result["properties"] = new_props + + if "required" in result: + # Build a lookup from original→renamed for this level only + local_renames = {v: k for k, v in renames.items() if v in existing_keys} + result["required"] = [ + local_renames.get(name, name) for name in result["required"] + ] + + for defs_key in ("$defs", "definitions"): + if defs_key in result: + result[defs_key] = { + k: (_process(v) if isinstance(v, dict) else v) + for k, v in result[defs_key].items() + } + + if "items" in result and isinstance(result["items"], dict): + result["items"] = _process(result["items"]) + + for keyword in ("allOf", "anyOf", "oneOf"): + if keyword in result: + result[keyword] = [ + _process(sub) if isinstance(sub, dict) else sub + for sub in result[keyword] + ] + + if "not" in result and isinstance(result["not"], dict): + result["not"] = _process(result["not"]) + + for keyword in ("if", "then", "else"): + if keyword in result and isinstance(result[keyword], dict): + result[keyword] = _process(result[keyword]) + + return result + + modified = _process(schema) + return modified, renames + + +def _apply_field_aliases( + model: Type[BaseModel], + namespace: dict[str, Any], + renames: dict[str, str], +) -> None: + """Add aliases to renamed fields so serialization/validation uses original names. + + Iterates the root model and all nested models from the namespace. For any + field whose name appears in ``renames``, sets alias/validation_alias/ + serialization_alias to the original property name and enables + ``populate_by_name`` + ``serialize_by_alias`` in the model config. + """ + if not renames: + return + + all_models = [model] + for v in namespace.values(): + if inspect.isclass(v) and issubclass(v, BaseModel): + all_models.append(v) + + for m in all_models: + needs_rebuild = False + for field_name, field_info in m.model_fields.items(): + if field_name in renames: + original_name = renames[field_name] + field_info.alias = original_name + field_info.validation_alias = original_name + field_info.serialization_alias = original_name + needs_rebuild = True + + if needs_rebuild: + m.model_config = { + **m.model_config, + "populate_by_name": True, + "serialize_by_alias": True, + } + m.model_rebuild(force=True) + + def create_model( schema: dict[str, Any], ) -> Type[BaseModel]: - if has_underscore_fields(schema): - raise AgentStartupError( - code=AgentStartupErrorCode.UNDERSCORE_SCHEMA, - title="Schema contains properties starting with '_'", - detail="Schema properties starting with '_' are currently not supported. If they are unavoidable, please contact UiPath Support", - category=UiPathErrorCategory.USER, - ) - - model, namespace = transform_with_modules(schema) + processed_schema, renames = _rename_reserved_properties(schema) + model, namespace = transform_with_modules(processed_schema) + _apply_field_aliases(model, namespace, renames) corrected_namespace: dict[str, Any] = {} def collect_types(annotation: Any) -> None: diff --git a/tests/agent/react/test_json_utils.py b/tests/agent/react/test_json_utils.py index da7397544..3d4404acc 100644 --- a/tests/agent/react/test_json_utils.py +++ b/tests/agent/react/test_json_utils.py @@ -1,11 +1,12 @@ -from typing import Optional +from typing import Any, Optional -from pydantic import BaseModel, RootModel +from pydantic import BaseModel, ConfigDict, Field, RootModel from uipath_langchain.agent.react.json_utils import ( extract_values_by_paths, get_json_paths_by_type, ) +from uipath_langchain.agent.react.jsonschema_pydantic_converter import create_model class Target(BaseModel): @@ -151,3 +152,176 @@ def test_extract_no_paths(self): def test_extract_path_not_found(self): values = extract_values_by_paths({"a": 1}, ["$.missing"]) assert values == [] + + +# -- aliased fields (renamed by create_model) --------------------------------- + + +class TestJsonPathsWithAliasedFields: + """Verify that JSONPath extraction works correctly when fields have been + renamed by create_model (underscore-prefixed or reserved names).""" + + def test_underscore_field_jsonpath_uses_alias(self): + """JSONPath must use the original '_attachment' name, not the Python 'attachment'.""" + + class Attachment(BaseModel): + id: str + + class ModelWithAlias(BaseModel): + model_config = ConfigDict(populate_by_name=True, serialize_by_alias=True) + + attachment: Attachment = Field( + alias="_attachment", serialization_alias="_attachment" + ) + + paths = get_json_paths_by_type(ModelWithAlias, "Attachment") + assert paths == ["$._attachment"] + + def test_reserved_field_jsonpath_uses_alias(self): + """JSONPath must use the original 'schema' alias, not the Python 'schema_'.""" + + class Attachment(BaseModel): + id: str + + class ModelWithReserved(BaseModel): + model_config = ConfigDict(populate_by_name=True, serialize_by_alias=True) + + schema_: Attachment = Field(alias="schema", serialization_alias="schema") + + paths = get_json_paths_by_type(ModelWithReserved, "Attachment") + assert paths == ["$.schema"] + + def test_extract_values_from_dict_with_alias_keys(self): + """extract_values_by_paths must find values using alias-keyed dicts.""" + + class Attachment(BaseModel): + id: str + + class ModelWithAlias(BaseModel): + model_config = ConfigDict(populate_by_name=True, serialize_by_alias=True) + + attachment: Attachment = Field( + alias="_attachment", serialization_alias="_attachment" + ) + + paths = get_json_paths_by_type(ModelWithAlias, "Attachment") + # Dict uses original/alias key names (as LLM or API would produce) + data = {"_attachment": {"id": "abc-123"}} + values = extract_values_by_paths(data, paths) + assert values == [{"id": "abc-123"}] + + def test_extract_values_from_model_with_alias(self): + """extract_values_by_paths on a BaseModel must dump with aliases.""" + + class Attachment(BaseModel): + id: str + + class ModelWithAlias(BaseModel): + model_config = ConfigDict(populate_by_name=True, serialize_by_alias=True) + + attachment: Attachment = Field( + alias="_attachment", serialization_alias="_attachment" + ) + + paths = get_json_paths_by_type(ModelWithAlias, "Attachment") + obj = ModelWithAlias.model_validate({"_attachment": {"id": "abc-123"}}) + values = extract_values_by_paths(obj, paths) + assert values == [{"id": "abc-123"}] + + def test_create_model_with_underscore_attachment_field(self): + """End-to-end: create_model + JSONPath for an underscore attachment field. + + Uses 'job-attachment' definition name (production format) which the library + converts internally to namespace key '__Job_attachment'. + """ + schema: dict[str, Any] = { + "type": "object", + "title": "Input", + "properties": { + "_file": {"$ref": "#/definitions/job-attachment"}, + "name": {"type": "string"}, + }, + "definitions": { + "job-attachment": { + "type": "object", + "properties": { + "ID": {"type": "string"}, + "full_name": {"type": "string"}, + }, + } + }, + } + model = create_model(schema) + + # JSONPath should use the original "_file" name (the alias) + paths = get_json_paths_by_type(model, "__Job_attachment") + assert paths == ["$._file"] + + # Extract from a dict with original keys (as LLM would produce) + data = {"_file": {"ID": "uuid-1", "full_name": "report.pdf"}, "name": "test"} + values = extract_values_by_paths(data, paths) + assert len(values) == 1 + assert values[0]["ID"] == "uuid-1" + + def test_create_model_with_reserved_attachment_field(self): + """End-to-end: create_model + JSONPath for a reserved-name attachment field.""" + schema: dict[str, Any] = { + "type": "object", + "title": "Input", + "properties": { + "copy": {"$ref": "#/definitions/job-attachment"}, + }, + "definitions": { + "job-attachment": { + "type": "object", + "properties": { + "ID": {"type": "string"}, + "full_name": {"type": "string"}, + }, + } + }, + } + model = create_model(schema) + + paths = get_json_paths_by_type(model, "__Job_attachment") + assert paths == ["$.copy"] + + data = {"copy": {"ID": "uuid-2", "full_name": "backup.zip"}} + values = extract_values_by_paths(data, paths) + assert len(values) == 1 + assert values[0]["ID"] == "uuid-2" + + def test_create_model_attachment_list_with_underscore(self): + """End-to-end: underscore attachment field inside a list.""" + schema: dict[str, Any] = { + "type": "object", + "title": "Input", + "properties": { + "_files": { + "type": "array", + "items": {"$ref": "#/definitions/job-attachment"}, + }, + }, + "definitions": { + "job-attachment": { + "type": "object", + "properties": { + "ID": {"type": "string"}, + "full_name": {"type": "string"}, + }, + } + }, + } + model = create_model(schema) + + paths = get_json_paths_by_type(model, "__Job_attachment") + assert paths == ["$._files[*]"] + + data = { + "_files": [ + {"ID": "uuid-a", "full_name": "a.pdf"}, + {"ID": "uuid-b", "full_name": "b.pdf"}, + ] + } + values = extract_values_by_paths(data, paths) + assert len(values) == 2 diff --git a/tests/agent/react/test_schemas.py b/tests/agent/react/test_schemas.py index 33fdc22c5..245945599 100644 --- a/tests/agent/react/test_schemas.py +++ b/tests/agent/react/test_schemas.py @@ -3,9 +3,11 @@ from typing import Any import pytest +from pydantic import BaseModel -from uipath_langchain.agent.exceptions import AgentStartupError, AgentStartupErrorCode from uipath_langchain.agent.react.jsonschema_pydantic_converter import ( + _RESERVED_FIELD_NAMES, + _rename_reserved_properties, create_model, has_underscore_fields, ) @@ -275,37 +277,362 @@ def test_returns_false(self, schema: dict[str, Any]) -> None: assert has_underscore_fields(schema) is False -class TestCreateModelRejectsUnderscoreFields: - def test_top_level_underscore_field(self) -> None: +class TestCreateModelWithUnderscoreFields: + """Tests for create_model aliasing of underscore-prefixed fields.""" + + def test_underscore_field_creates_valid_model(self) -> None: schema = { "title": "Input", "type": "object", "properties": { + "_hidden": {"type": "string"}, "name": {"type": "string"}, + }, + } + model = create_model(schema) + + assert issubclass(model, BaseModel) + assert callable(model.model_json_schema) + + def test_underscore_field_validate_and_dump(self) -> None: + schema = { + "title": "Input", + "type": "object", + "properties": { "_hidden": {"type": "string"}, + "name": {"type": "string"}, }, } - with pytest.raises(AgentStartupError) as exc_info: - create_model(schema) - assert exc_info.value.error_info.code == AgentStartupError.full_code( - AgentStartupErrorCode.UNDERSCORE_SCHEMA - ) + model = create_model(schema) + + instance = model.model_validate({"_hidden": "secret", "name": "alice"}) + dumped = instance.model_dump() + + assert dumped == {"_hidden": "secret", "name": "alice"} + + def test_underscore_field_json_schema_shows_original(self) -> None: + schema = { + "title": "Input", + "type": "object", + "properties": { + "_hidden": {"type": "string"}, + }, + } + model = create_model(schema) + + json_schema = model.model_json_schema() + assert "_hidden" in json_schema["properties"] - def test_nested_underscore_field(self) -> None: + def test_underscore_field_that_would_also_shadow_basemodel(self) -> None: + """'_schema' strips to 'schema' which is reserved — should still work.""" schema = { "title": "Input", "type": "object", "properties": { - "outer": { + "_schema": {"type": "string"}, + "name": {"type": "string"}, + }, + } + model = create_model(schema) + + instance = model.model_validate({"_schema": "val", "name": "n"}) + assert instance.model_dump() == {"_schema": "val", "name": "n"} + + def test_underscore_field_collision_with_stripped_name(self) -> None: + """Schema has both '_hidden' and 'hidden' — no collision after rename.""" + schema = { + "title": "Input", + "type": "object", + "properties": { + "_hidden": {"type": "string"}, + "hidden": {"type": "string"}, + }, + } + model = create_model(schema) + + instance = model.model_validate({"_hidden": "a", "hidden": "b"}) + dumped = instance.model_dump() + + assert dumped["_hidden"] == "a" + assert dumped["hidden"] == "b" + + +class TestRenameReservedProperties: + """Tests for _rename_reserved_properties schema pre-processing.""" + + def test_renames_schema_field(self) -> None: + schema = { + "type": "object", + "properties": { + "schema": {"type": "string"}, + "name": {"type": "string"}, + }, + "required": ["schema", "name"], + } + modified, renames = _rename_reserved_properties(schema) + + assert "schema_" in modified["properties"] + assert "schema" not in modified["properties"] + assert "name" in modified["properties"] + assert renames == {"schema_": "schema"} + assert modified["required"] == ["schema_", "name"] + + def test_renames_multiple_reserved_fields(self) -> None: + schema = { + "type": "object", + "properties": { + "schema": {"type": "string"}, + "copy": {"type": "string"}, + "validate": {"type": "string"}, + "name": {"type": "string"}, + }, + } + modified, renames = _rename_reserved_properties(schema) + + assert "schema_" in modified["properties"] + assert "copy_" in modified["properties"] + assert "validate_" in modified["properties"] + assert "name" in modified["properties"] + assert len(renames) == 3 + + def test_handles_collision_with_existing_field(self) -> None: + """When 'schema_' already exists, 'schema' should become 'schema__'.""" + schema = { + "type": "object", + "properties": { + "schema": {"type": "string"}, + "schema_": {"type": "string"}, + }, + } + modified, renames = _rename_reserved_properties(schema) + + assert "schema__" in modified["properties"] + assert "schema_" in modified["properties"] + assert renames["schema__"] == "schema" + + def test_renames_in_defs(self) -> None: + schema = { + "type": "object", + "properties": {"name": {"type": "string"}}, + "$defs": { + "Inner": { "type": "object", "properties": { - "_secret": {"type": "integer"}, + "schema": {"type": "string"}, }, + "required": ["schema"], }, }, } - with pytest.raises(AgentStartupError) as exc_info: - create_model(schema) - assert exc_info.value.error_info.code == AgentStartupError.full_code( - AgentStartupErrorCode.UNDERSCORE_SCHEMA + modified, renames = _rename_reserved_properties(schema) + + inner = modified["$defs"]["Inner"] + assert "schema_" in inner["properties"] + assert inner["required"] == ["schema_"] + + def test_does_not_modify_original_schema(self) -> None: + schema = { + "type": "object", + "properties": {"schema": {"type": "string"}}, + } + _rename_reserved_properties(schema) + + assert "schema" in schema["properties"] + + def test_no_renames_for_normal_fields(self) -> None: + schema = { + "type": "object", + "properties": { + "name": {"type": "string"}, + "age": {"type": "integer"}, + }, + } + modified, renames = _rename_reserved_properties(schema) + + assert renames == {} + assert modified["properties"] == schema["properties"] + + def test_renames_underscore_field(self) -> None: + schema = { + "type": "object", + "properties": { + "_hidden": {"type": "string"}, + "name": {"type": "string"}, + }, + "required": ["_hidden", "name"], + } + modified, renames = _rename_reserved_properties(schema) + + assert "hidden" in modified["properties"] + assert "_hidden" not in modified["properties"] + assert renames == {"hidden": "_hidden"} + assert modified["required"] == ["hidden", "name"] + + def test_underscore_field_stripped_to_reserved_name(self) -> None: + """'_schema' strips to 'schema' which is reserved — gets extra '_'.""" + schema = { + "type": "object", + "properties": { + "_schema": {"type": "string"}, + }, + } + modified, renames = _rename_reserved_properties(schema) + + assert "schema_" in modified["properties"] + assert renames == {"schema_": "_schema"} + + def test_underscore_field_collision_with_existing(self) -> None: + """'_hidden' strips to 'hidden', but 'hidden' already exists.""" + schema = { + "type": "object", + "properties": { + "_hidden": {"type": "string"}, + "hidden": {"type": "string"}, + }, + } + modified, renames = _rename_reserved_properties(schema) + + assert "hidden" in modified["properties"] + assert "hidden_" in modified["properties"] + assert renames["hidden_"] == "_hidden" + + def test_mixed_underscore_and_reserved(self) -> None: + schema = { + "type": "object", + "properties": { + "_secret": {"type": "string"}, + "schema": {"type": "string"}, + "name": {"type": "string"}, + }, + } + modified, renames = _rename_reserved_properties(schema) + + assert "secret" in modified["properties"] + assert "schema_" in modified["properties"] + assert "name" in modified["properties"] + assert renames == {"secret": "_secret", "schema_": "schema"} + + +class TestCreateModelWithReservedFields: + """Tests for create_model handling of reserved field names.""" + + def test_schema_field_creates_valid_model(self) -> None: + schema = { + "title": "Input", + "type": "object", + "properties": { + "schema": {"type": "string"}, + "name": {"type": "string"}, + }, + } + model = create_model(schema) + + assert issubclass(model, BaseModel) + # BaseModel methods should still work + assert callable(model.model_json_schema) + assert callable(model.model_validate) + + def test_model_validate_accepts_original_names(self) -> None: + schema = { + "title": "Input", + "type": "object", + "properties": { + "schema": {"type": "string"}, + "name": {"type": "string"}, + }, + } + model = create_model(schema) + + instance = model.model_validate({"schema": "test_val", "name": "alice"}) + assert instance is not None + + def test_model_dump_outputs_original_names(self) -> None: + schema = { + "title": "Input", + "type": "object", + "properties": { + "schema": {"type": "string"}, + "name": {"type": "string"}, + }, + } + model = create_model(schema) + + instance = model.model_validate({"schema": "test_val", "name": "alice"}) + dumped = instance.model_dump() + + assert dumped == {"schema": "test_val", "name": "alice"} + + def test_model_dump_json_mode_outputs_original_names(self) -> None: + schema = { + "title": "Input", + "type": "object", + "properties": { + "schema": {"type": "string"}, + }, + } + model = create_model(schema) + + instance = model.model_validate({"schema": "val"}) + dumped = instance.model_dump(mode="json") + + assert dumped == {"schema": "val"} + + def test_model_json_schema_shows_original_names(self) -> None: + schema = { + "title": "Input", + "type": "object", + "properties": { + "schema": {"type": "string"}, + "copy": {"type": "integer"}, + }, + } + model = create_model(schema) + + json_schema = model.model_json_schema() + assert "schema" in json_schema["properties"] + assert "copy" in json_schema["properties"] + assert "schema_" not in json_schema["properties"] + assert "copy_" not in json_schema["properties"] + + def test_multiple_reserved_fields(self) -> None: + schema = { + "title": "Input", + "type": "object", + "properties": { + "schema": {"type": "string"}, + "copy": {"type": "string"}, + "validate": {"type": "string"}, + "name": {"type": "string"}, + }, + } + model = create_model(schema) + + instance = model.model_validate( + {"schema": "s", "copy": "c", "validate": "v", "name": "n"} ) + dumped = instance.model_dump() + + assert dumped == {"schema": "s", "copy": "c", "validate": "v", "name": "n"} + + def test_model_fields_field_does_not_shadow(self) -> None: + """'model_fields' is in Pydantic's protected namespace — must not crash.""" + schema = { + "title": "Input", + "type": "object", + "properties": { + "model_fields": {"type": "string"}, + "name": {"type": "string"}, + }, + } + model = create_model(schema) + + # model.model_fields should still be the Pydantic descriptor, not a field value + assert isinstance(model.model_fields, dict) + instance = model.model_validate({"model_fields": "test", "name": "n"}) + assert instance.model_dump() == {"model_fields": "test", "name": "n"} + + def test_reserved_field_names_constant_contains_known_problematic_names( + self, + ) -> None: + known_problematic = {"schema", "copy", "validate", "dict", "json", "construct"} + assert known_problematic.issubset(_RESERVED_FIELD_NAMES)