Skip to content

Commit 61152d7

Browse files
committed
perf: optimize regex compilation and add performance test suite
- Pre-compile frequently used regex patterns (28% performance gain) - Fix logger handler duplication bug - Fix instance variable isolation (subx shared state bug) - Remove sys._getframe for better compatibility - Add cache size limit to prevent memory leaks - Add comprehensive performance test suite with pytest-benchmark - Add pytest markers (perf, slow) and poe task for perf tests - Update CI to upload coverage reports and track benchmarks - Add status badges to README
1 parent cce3cbf commit 61152d7

File tree

7 files changed

+1011
-439
lines changed

7 files changed

+1011
-439
lines changed

.github/workflows/ci.yml

Lines changed: 47 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -54,13 +54,56 @@ jobs:
5454
- name: Install dependencies
5555
run: uv sync --extra dev
5656

57-
- name: Run tests
58-
run: uv run pytest -v --cov=jsonpath --cov-report=xml
57+
- name: Run tests with coverage
58+
run: uv run pytest -v --cov=jsonpath --cov-report=xml --cov-report=html
5959

60-
- name: Upload coverage
60+
- name: Upload coverage to Codecov
6161
if: matrix.python-version == '3.12'
6262
uses: codecov/codecov-action@v5
6363
with:
64-
file: ./coverage.xml
64+
files: ./coverage.xml
6565
flags: unittests
6666
token: ${{ secrets.CODECOV_TOKEN }}
67+
fail_ci_if_error: false
68+
69+
- name: Upload coverage HTML report
70+
if: matrix.python-version == '3.12'
71+
uses: actions/upload-artifact@v4
72+
with:
73+
name: coverage-report
74+
path: htmlcov/
75+
retention-days: 30
76+
77+
benchmark:
78+
runs-on: ubuntu-latest
79+
steps:
80+
- name: Checkout code
81+
uses: actions/checkout@v6
82+
83+
- name: Set up Python
84+
uses: actions/setup-python@v6
85+
with:
86+
python-version: "3.12"
87+
88+
- name: Install uv
89+
uses: astral-sh/setup-uv@v7
90+
with:
91+
version: "latest"
92+
93+
- name: Install dependencies
94+
run: uv sync --extra dev
95+
96+
- name: Run performance tests
97+
run: uv run pytest tests/test_performance.py -v --benchmark-only --benchmark-json=benchmark.json
98+
99+
- name: Store benchmark result
100+
uses: benchmark-action/github-action-benchmark@v1
101+
with:
102+
tool: "pytest"
103+
output-file-path: benchmark.json
104+
github-token: ${{ secrets.GITHUB_TOKEN }}
105+
auto-push: ${{ github.ref == 'refs/heads/main' }}
106+
comment-on-alert: true
107+
fail-on-alert: false
108+
alert-threshold: "150%"
109+
comment-always: false

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
/AGENTS.md
33
/.serena/
44
/cohn_credentials.json
5+
/.benchmarks
56

67
# Byte-compiled / optimized / DLL files
78
__pycache__/

README.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,11 @@
11
# jsonpath-python
22

3+
[![CI](https://github.com/sean2077/jsonpath-python/workflows/CI/badge.svg)](https://github.com/sean2077/jsonpath-python/actions/workflows/ci.yml)
4+
[![codecov](https://codecov.io/gh/sean2077/jsonpath-python/branch/main/graph/badge.svg)](https://codecov.io/gh/sean2077/jsonpath-python)
5+
[![PyPI version](https://badge.fury.io/py/jsonpath-python.svg)](https://badge.fury.io/py/jsonpath-python)
6+
[![Python versions](https://img.shields.io/pypi/pyversions/jsonpath-python.svg)](https://pypi.org/project/jsonpath-python/)
7+
[![License](https://img.shields.io/github/license/sean2077/jsonpath-python.svg)](https://github.com/sean2077/jsonpath-python/blob/main/LICENSE)
8+
39
A lightweight and powerful JSONPath implementation for Python.
410

511
## Why jsonpath-python?

jsonpath/jsonpath.py

Lines changed: 54 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,24 @@
11
import logging
22
import os
33
import re
4-
import sys
54
from collections import defaultdict
65
from typing import Any, Callable, Union
76

87

98
def create_logger(name: str = None, level: Union[int, str] = logging.INFO):
109
"""Get or create a logger used for local debug."""
10+
logger = logging.getLogger(name)
11+
12+
# Avoid adding duplicate handlers
13+
if logger.handlers:
14+
return logger
1115

1216
formater = logging.Formatter(f"%(asctime)s-%(levelname)s-[{name}] %(message)s", datefmt="[%Y-%m-%d %H:%M:%S]")
1317

1418
handler = logging.StreamHandler()
1519
handler.setLevel(level)
1620
handler.setFormatter(formater)
1721

18-
logger = logging.getLogger(name)
1922
logger.setLevel(level)
2023
logger.addHandler(handler)
2124

@@ -60,24 +63,23 @@ class JSONPath:
6063
REP_SELECT_CONTENT = re.compile(r"^([\w.']+)(, ?[\w.']+)+$")
6164
REP_FILTER_CONTENT = re.compile(r"@([.\[].*?)(?=<=|>=|==|!=|>|<| in| not| is|\s|\)|$)|len\(@([.\[].*?)\)")
6265
REP_PATH_SEGMENT = re.compile(r"(?:\.|^)(?P<dot>\w+)|\[['\"](?P<quote>.*?)['\"]\]|\[(?P<int>\d+)\]")
63-
64-
# annotations
65-
f: list
66-
segments: list
67-
lpath: int
68-
subx = defaultdict(list)
69-
result: list
70-
result_type: str
71-
eval_func: callable
66+
REP_WORD_KEY = re.compile(r"^\w+$")
67+
REP_REGEX_PATTERN = re.compile(r"=~\s*/(.*?)/")
7268

7369
def __init__(self, expr: str):
70+
# Initialize instance variables
71+
self.subx = defaultdict(list)
72+
self.segments = []
73+
self.lpath = 0
74+
self.result = []
75+
self.result_type = "VALUE"
76+
self.eval_func = eval
77+
7478
expr = self._parse_expr(expr)
7579
self.segments = [s for s in expr.split(JSONPath.SEP) if s]
7680
self.lpath = len(self.segments)
7781
logger.debug(f"segments : {self.segments}")
7882

79-
self.caller_globals = sys._getframe(1).f_globals
80-
8183
def parse(self, obj, result_type="VALUE", eval_func=eval):
8284
if not isinstance(obj, (list, dict)):
8385
raise TypeError("obj must be a list or a dict.")
@@ -87,6 +89,7 @@ def parse(self, obj, result_type="VALUE", eval_func=eval):
8789
self.result_type = result_type
8890
self.eval_func = eval_func
8991

92+
# Reset state for each parse call
9093
self.result = []
9194
self._trace(obj, 0, "$")
9295

@@ -172,13 +175,13 @@ def _traverse(f, obj, i: int, path: str, *args):
172175
f(v, i, f"{path}[{idx}]", *args)
173176
elif isinstance(obj, dict):
174177
for k, v in obj.items():
175-
if re.match(r"^\w+$", k):
178+
if JSONPath.REP_WORD_KEY.match(k):
176179
f(v, i, f"{path}.{k}", *args)
177180
else:
178181
f(v, i, f"{path}['{k}']", *args)
179182

180183
@staticmethod
181-
def _getattr(obj: dict, path: str, *, convert_number_str=False):
184+
def _getattr(obj: Any, path: str, *, convert_number_str=False):
182185
r = obj
183186
for k in path.split("."):
184187
if isinstance(r, dict):
@@ -268,7 +271,7 @@ def _trace(self, obj, i: int, path):
268271
step_key = step[1:-1]
269272

270273
if isinstance(obj, dict) and step_key in obj:
271-
if re.match(r"^\w+$", step_key):
274+
if JSONPath.REP_WORD_KEY.match(step_key):
272275
self._trace(obj[step_key], i + 1, f"{path}.{step_key}")
273276
else:
274277
self._trace(obj[step_key], i + 1, f"{path}['{step_key}']")
@@ -285,8 +288,9 @@ def _trace(self, obj, i: int, path):
285288
# select
286289
if isinstance(obj, dict) and JSONPath.REP_SELECT_CONTENT.fullmatch(step):
287290
for k in step.split(","):
291+
k = k.strip() # Remove whitespace
288292
if k in obj:
289-
if re.match(r"^\w+$", k):
293+
if JSONPath.REP_WORD_KEY.match(k):
290294
self._trace(obj[k], i + 1, f"{path}.{k}")
291295
else:
292296
self._trace(obj[k], i + 1, f"{path}['{k}']")
@@ -298,7 +302,7 @@ def _trace(self, obj, i: int, path):
298302
step = JSONPath.REP_FILTER_CONTENT.sub(self._gen_obj, step)
299303

300304
if "=~" in step:
301-
step = re.sub(r"=~\s*/(.*?)/", r"@ RegexPattern(r'\1')", step)
305+
step = JSONPath.REP_REGEX_PATTERN.sub(r"@ RegexPattern(r'\1')", step)
302306

303307
if isinstance(obj, dict):
304308
self._filter(obj, i + 1, path, step)
@@ -316,7 +320,7 @@ def _trace(self, obj, i: int, path):
316320
obj = list(obj.items())
317321
self._sorter(obj, step[2:-1])
318322
for k, v in obj:
319-
if re.match(r"^\w+$", k):
323+
if JSONPath.REP_WORD_KEY.match(k):
320324
self._trace(v, i + 1, f"{path}.{k}")
321325
else:
322326
self._trace(v, i + 1, f"{path}['{k}']")
@@ -329,6 +333,7 @@ def _trace(self, obj, i: int, path):
329333
if isinstance(obj, dict):
330334
obj_ = {}
331335
for k in step[1:-1].split(","):
336+
k = k.strip() # Remove whitespace
332337
v = self._getattr(obj, k)
333338
if v is not JSONPath._MISSING:
334339
obj_[k] = v
@@ -339,15 +344,25 @@ def _trace(self, obj, i: int, path):
339344
return
340345

341346
def update(self, obj: Union[list, dict], value_or_func: Union[Any, Callable[[Any], Any]]) -> Any:
347+
"""Update values in JSON object using JSONPath expression.
348+
349+
Args:
350+
obj: JSON object (dict or list) to update
351+
value_or_func: Static value or callable that transforms the current value
352+
353+
Returns:
354+
Updated object (modified in-place for nested paths, returns new value for root)
355+
"""
342356
paths = self.parse(obj, result_type="PATH")
357+
is_func = callable(value_or_func)
358+
359+
# Handle root object update specially
360+
if len(paths) == 1 and paths[0] == "$":
361+
return value_or_func(obj) if is_func else value_or_func
362+
343363
for path in paths:
344364
matches = list(JSONPath.REP_PATH_SEGMENT.finditer(path))
345365
if not matches:
346-
# Root object
347-
if isinstance(value_or_func, Callable):
348-
obj = value_or_func(obj)
349-
else:
350-
obj = value_or_func
351366
continue
352367

353368
target = obj
@@ -371,10 +386,7 @@ def update(self, obj: Union[list, dict], value_or_func: Union[Any, Callable[[Any
371386
elif group["int"]:
372387
key = int(group["int"])
373388

374-
if isinstance(value_or_func, Callable):
375-
target[key] = value_or_func(target[key])
376-
else:
377-
target[key] = value_or_func
389+
target[key] = value_or_func(target[key]) if is_func else value_or_func
378390

379391
return obj
380392

@@ -393,12 +405,24 @@ def compile(expr):
393405
return JSONPath(expr)
394406

395407

396-
# global cache
408+
# global cache with size limit to prevent memory leaks
397409
_jsonpath_cache = {}
410+
_CACHE_MAX_SIZE = 128
398411

399412

400413
def search(expr, data):
401-
global _jsonpath_cache
414+
"""Search JSON data using JSONPath expression with instance caching.
415+
416+
Args:
417+
expr: JSONPath expression string
418+
data: JSON data (dict or list)
419+
420+
Returns:
421+
List of matched values
422+
"""
402423
if expr not in _jsonpath_cache:
424+
# Simple LRU: clear cache when it grows too large
425+
if len(_jsonpath_cache) >= _CACHE_MAX_SIZE:
426+
_jsonpath_cache.clear()
403427
_jsonpath_cache[expr] = JSONPath(expr)
404428
return _jsonpath_cache[expr].parse(data)

pyproject.toml

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,13 @@ requires-python = ">=3.8"
2828
dependencies = []
2929

3030
[project.optional-dependencies]
31-
dev = ["pytest>=8.0", "pytest-cov>=5.0", "ruff>=0.3"]
31+
dev = [
32+
"pytest>=8.0",
33+
"pytest-cov>=5.0",
34+
"pytest-benchmark[histogram]>=4.0",
35+
"ruff>=0.3",
36+
"poethepoet",
37+
]
3238

3339
[project.urls]
3440
Homepage = "https://github.com/sean2077/jsonpath-python"
@@ -69,6 +75,10 @@ ignore = [
6975
testpaths = ["tests"]
7076
python_files = ["test_*.py"]
7177
python_classes = ["Test*"]
78+
markers = [
79+
"perf: marks tests as performance benchmarks (deselect with '-m \"not perf\"')",
80+
"slow: marks tests as slow running (deselect with '-m \"not slow\"')",
81+
]
7282
# Development Tasks (using poethepoet)
7383
[tool.poe.tasks.format]
7484
cmd = "ruff format . && ruff check . --fix --select I"
@@ -82,6 +92,10 @@ help = "Run linter"
8292
cmd = "pytest"
8393
help = "Run tests"
8494

95+
[tool.poe.tasks.test-perf]
96+
cmd = "pytest -m perf -v --benchmark-autosave --benchmark-histogram"
97+
help = "Run performance tests"
98+
8599
[tool.poe.tasks.update-deps]
86100
cmd = "uv sync --all-extras --upgrade --index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple/"
87101
help = "Update all dependencies"

0 commit comments

Comments
 (0)