Skip to content

Commit adc3f20

Browse files
committed
Speed up validate() with validator caching and optional schema-check fast path
1 parent dd5d24c commit adc3f20

File tree

3 files changed

+198
-16
lines changed

3 files changed

+198
-16
lines changed

docs/contributing.rst

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,3 +74,22 @@ To run all checks on all files, enter:
7474
pre-commit run --all-files
7575
7676
Pre-commit check results are also attached to your PR through integration with Github Action.
77+
78+
Performance benchmark
79+
^^^^^^^^^^^^^^^^^^^^^
80+
81+
To collect a local benchmark report for validation performance, run:
82+
83+
.. code-block:: console
84+
85+
poetry run python benchmarks/run.py --output reports/benchmarks/current.json
86+
87+
To compare two benchmark reports and optionally fail on regressions, run:
88+
89+
.. code-block:: console
90+
91+
poetry run python benchmarks/compare.py \
92+
--baseline reports/benchmarks/baseline.json \
93+
--candidate reports/benchmarks/current.json \
94+
--regression-threshold 5 \
95+
--fail-on-regression

openapi_schema_validator/shortcuts.py

Lines changed: 128 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,10 @@
1+
from __future__ import annotations
2+
3+
from collections import OrderedDict
4+
from dataclasses import dataclass
5+
from threading import RLock
16
from typing import Any
7+
from typing import Hashable
28
from typing import Mapping
39
from typing import cast
410

@@ -11,14 +17,79 @@
1117
from openapi_schema_validator.validators import OAS32Validator
1218
from openapi_schema_validator.validators import check_openapi_schema
1319

20+
_VALIDATOR_CACHE_MAX_SIZE = 128
21+
22+
23+
@dataclass
24+
class _CachedValidator:
25+
validator: Any
26+
schema_checked: bool
27+
28+
29+
_validator_cache: OrderedDict[Hashable, _CachedValidator] = OrderedDict()
30+
_validator_cache_lock = RLock()
31+
_LOCAL_ONLY_REGISTRY = Registry()
32+
33+
34+
def _freeze_value(value: Any) -> Hashable:
35+
if isinstance(value, dict):
36+
return tuple(
37+
sorted((str(k), _freeze_value(v)) for k, v in value.items())
38+
)
39+
if isinstance(value, list):
40+
return tuple(_freeze_value(item) for item in value)
41+
if isinstance(value, tuple):
42+
return tuple(_freeze_value(item) for item in value)
43+
if isinstance(value, set):
44+
return tuple(
45+
sorted(
46+
(_freeze_value(item) for item in value),
47+
key=repr,
48+
)
49+
)
50+
if isinstance(value, (str, bytes, int, float, bool, type(None))):
51+
return value
52+
return ("id", id(value))
53+
54+
55+
def _schema_fingerprint(schema: Mapping[str, Any]) -> Hashable:
56+
return _freeze_value(dict(schema))
57+
58+
59+
def _cache_key(
60+
schema: Mapping[str, Any],
61+
cls: type[Validator],
62+
args: tuple[Any, ...],
63+
kwargs: Mapping[str, Any],
64+
allow_remote_references: bool,
65+
) -> Hashable:
66+
return (
67+
cls,
68+
allow_remote_references,
69+
_schema_fingerprint(schema),
70+
_freeze_value(args),
71+
_freeze_value(dict(kwargs)),
72+
)
73+
74+
75+
def _prune_cache_if_needed() -> None:
76+
while len(_validator_cache) > _VALIDATOR_CACHE_MAX_SIZE:
77+
_validator_cache.popitem(last=False)
78+
79+
80+
def _clear_validate_cache() -> None:
81+
with _validator_cache_lock:
82+
_validator_cache.clear()
83+
1484

1585
def validate(
1686
instance: Any,
1787
schema: Mapping[str, Any],
1888
cls: type[Validator] = OAS32Validator,
1989
*args: Any,
2090
allow_remote_references: bool = False,
21-
**kwargs: Any
91+
check_schema: bool = True,
92+
**kwargs: Any,
2293
) -> None:
2394
"""
2495
Validate an instance against a given schema using the specified
@@ -38,6 +109,9 @@ def validate(
38109
allow_remote_references: If ``True`` and no explicit ``registry`` is
39110
provided, allow jsonschema's default remote reference retrieval
40111
behavior.
112+
check_schema: If ``True`` (default), validate the provided schema
113+
before validating ``instance``. If ``False``, skip schema
114+
validation and run instance validation directly.
41115
**kwargs: Keyword arguments forwarded to ``cls`` constructor
42116
(for example ``registry`` and ``format_checker``). If omitted,
43117
a local-only empty ``Registry`` is used to avoid implicit remote
@@ -49,25 +123,63 @@ def validate(
49123
"""
50124
schema_dict = cast(dict[str, Any], schema)
51125

52-
meta_schema = getattr(cls, "META_SCHEMA", None)
53-
# jsonschema's default check_schema path does not accept a custom
54-
# registry, so for OAS dialects we use the package registry
55-
# explicitly to keep metaschema resolution local and deterministic.
56-
if isinstance(meta_schema, dict) and meta_schema.get("$id") in (
57-
OAS31_BASE_DIALECT_ID,
58-
OAS32_BASE_DIALECT_ID,
59-
):
60-
check_openapi_schema(cls, schema_dict)
61-
else:
62-
cls.check_schema(schema_dict)
63-
64126
validator_kwargs = kwargs.copy()
65127
if not allow_remote_references:
66-
validator_kwargs.setdefault("registry", Registry())
128+
validator_kwargs.setdefault("registry", _LOCAL_ONLY_REGISTRY)
129+
130+
key = _cache_key(
131+
schema=schema_dict,
132+
cls=cls,
133+
args=args,
134+
kwargs=validator_kwargs,
135+
allow_remote_references=allow_remote_references,
136+
)
137+
138+
with _validator_cache_lock:
139+
cached = _validator_cache.get(key)
140+
141+
if cached is None:
142+
if check_schema:
143+
meta_schema = getattr(cls, "META_SCHEMA", None)
144+
# jsonschema's default check_schema path does not accept a custom
145+
# registry, so for OAS dialects we use the package registry
146+
# explicitly to keep metaschema resolution local and deterministic.
147+
if isinstance(meta_schema, dict) and meta_schema.get("$id") in (
148+
OAS31_BASE_DIALECT_ID,
149+
OAS32_BASE_DIALECT_ID,
150+
):
151+
check_openapi_schema(cls, schema_dict)
152+
else:
153+
cls.check_schema(schema_dict)
154+
155+
validator = cls(schema_dict, *args, **validator_kwargs)
156+
cached = _CachedValidator(
157+
validator=validator,
158+
schema_checked=check_schema,
159+
)
160+
with _validator_cache_lock:
161+
_validator_cache[key] = cached
162+
_validator_cache.move_to_end(key)
163+
_prune_cache_if_needed()
164+
elif check_schema and not cached.schema_checked:
165+
meta_schema = getattr(cls, "META_SCHEMA", None)
166+
if isinstance(meta_schema, dict) and meta_schema.get("$id") in (
167+
OAS31_BASE_DIALECT_ID,
168+
OAS32_BASE_DIALECT_ID,
169+
):
170+
check_openapi_schema(cls, schema_dict)
171+
else:
172+
cls.check_schema(schema_dict)
173+
174+
with _validator_cache_lock:
175+
cached.schema_checked = True
176+
_validator_cache.move_to_end(key)
177+
else:
178+
with _validator_cache_lock:
179+
_validator_cache.move_to_end(key)
67180

68-
validator = cls(schema_dict, *args, **validator_kwargs)
69181
error = best_match(
70-
validator.evolve(schema=schema_dict).iter_errors(instance)
182+
cached.validator.evolve(schema=schema_dict).iter_errors(instance)
71183
)
72184
if error is not None:
73185
raise error

tests/unit/test_shortcut.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,16 @@
11
import inspect
2+
import re
23
from unittest.mock import patch
34

45
import pytest
6+
from jsonschema.exceptions import SchemaError
57
from referencing import Registry
68
from referencing import Resource
79

810
from openapi_schema_validator import OAS32Validator
911
from openapi_schema_validator import validate
12+
from openapi_schema_validator._regex import has_ecma_regex
13+
from openapi_schema_validator.shortcuts import _clear_validate_cache
1014

1115

1216
@pytest.fixture(scope="function")
@@ -23,6 +27,13 @@ def schema():
2327
}
2428

2529

30+
@pytest.fixture(autouse=True)
31+
def clear_validate_cache_fixture():
32+
_clear_validate_cache()
33+
yield
34+
_clear_validate_cache()
35+
36+
2637
def test_validate_does_not_add_nullable_to_schema(schema):
2738
"""
2839
Verify that calling validate does not add the 'nullable' key to the schema
@@ -118,3 +129,43 @@ def test_validate_can_allow_implicit_remote_references():
118129
validate({}, schema, allow_remote_references=True)
119130

120131
assert urlopen.called
132+
133+
134+
def test_validate_skip_schema_check():
135+
schema = {"type": "string", "pattern": "["}
136+
137+
with pytest.raises(SchemaError, match="is not a 'regex'"):
138+
validate("foo", schema)
139+
140+
if has_ecma_regex():
141+
with pytest.raises(Exception):
142+
validate("foo", schema, check_schema=False)
143+
else:
144+
with pytest.raises(re.error):
145+
validate("foo", schema, check_schema=False)
146+
147+
148+
def test_validate_cache_avoids_rechecking_schema(schema):
149+
with patch(
150+
"openapi_schema_validator.shortcuts.check_openapi_schema"
151+
) as check_schema_mock:
152+
validate({"email": "foo@bar.com"}, schema, cls=OAS32Validator)
153+
validate({"email": "foo@bar.com"}, schema, cls=OAS32Validator)
154+
155+
check_schema_mock.assert_called_once()
156+
157+
158+
def test_validate_cache_promotes_unchecked_validator(schema):
159+
with patch(
160+
"openapi_schema_validator.shortcuts.check_openapi_schema"
161+
) as check_schema_mock:
162+
validate(
163+
{"email": "foo@bar.com"},
164+
schema,
165+
cls=OAS32Validator,
166+
check_schema=False,
167+
)
168+
validate({"email": "foo@bar.com"}, schema, cls=OAS32Validator)
169+
validate({"email": "foo@bar.com"}, schema, cls=OAS32Validator)
170+
171+
check_schema_mock.assert_called_once()

0 commit comments

Comments
 (0)