From 60d462e5960e7743df6ab65ee9340296ed0eff14 Mon Sep 17 00:00:00 2001 From: Gwen Reusing Date: Thu, 30 Oct 2025 01:20:03 -0400 Subject: [PATCH] Parsing LLM response with JSON parsing Library Adding dirtyjson to parse LLM Response and use regex search as fallback. Added pbr dependency to bandit --- .pre-commit-config.yaml | 2 +- poetry.lock | 111 ++++++++++++++++---------------- pyproject.toml | 11 +++- src/shardguard/core/planning.py | 34 +++++----- 4 files changed, 85 insertions(+), 73 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a8debb6..2334f6c 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -30,7 +30,7 @@ repos: hooks: - id: bandit args: ["-c", "pyproject.toml"] - additional_dependencies: ["bandit[toml]"] + additional_dependencies: ["bandit[toml]", "pbr"] # Markdown linting - repo: https://github.com/igorshubovych/markdownlint-cli diff --git a/poetry.lock b/poetry.lock index 154f59a..e578523 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 2.1.3 and should not be changed by hand. +# This file is automatically @generated by Poetry 2.2.1 and should not be changed by hand. [[package]] name = "annotated-types" @@ -55,15 +55,14 @@ tests-mypy = ["mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" a [[package]] name = "bandit" -version = "1.8.5" +version = "1.8.6" description = "Security oriented static analyser for python code." -optional = true +optional = false python-versions = ">=3.9" -groups = ["main"] -markers = "extra == \"dev\"" +groups = ["main", "dev"] files = [ - {file = "bandit-1.8.5-py3-none-any.whl", hash = "sha256:cb2e57524e99e33ced48833c6cc9c12ac78ae970bb6a450a83c4b506ecc1e2f9"}, - {file = "bandit-1.8.5.tar.gz", hash = "sha256:db812e9c39b8868c0fed5278b77fffbbaba828b4891bc80e34b9c50373201cfd"}, + {file = "bandit-1.8.6-py3-none-any.whl", hash = "sha256:3348e934d736fcdb68b6aa4030487097e23a501adf3e7827b63658df464dddd0"}, + {file = "bandit-1.8.6.tar.gz", hash = "sha256:dbfe9c25fc6961c2078593de55fd19f2559f9e45b99f1272341f5b95dea4e56b"}, ] [package.dependencies] @@ -107,10 +106,9 @@ files = [ name = "cfgv" version = "3.4.0" description = "Validate configuration and produce human readable error messages." -optional = true +optional = false python-versions = ">=3.8" -groups = ["main"] -markers = "extra == \"dev\"" +groups = ["main", "dev"] files = [ {file = "cfgv-3.4.0-py2.py3-none-any.whl", hash = "sha256:b7265b1f29fd3316bfcd2b330d63d024f2bfd8bcb8b0272f8e19a504856c48f9"}, {file = "cfgv-3.4.0.tar.gz", hash = "sha256:e52591d4c5f5dead8e0f673fb16db7949d2cfb3f7da4582893288f0ded8fe560"}, @@ -239,12 +237,12 @@ version = "0.4.6" description = "Cross-platform colored terminal text." optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" -groups = ["main"] -markers = "platform_system == \"Windows\" or sys_platform == \"win32\"" +groups = ["main", "dev"] files = [ {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, ] +markers = {main = "platform_system == \"Windows\" or sys_platform == \"win32\"", dev = "platform_system == \"Windows\""} [[package]] name = "coverage" @@ -327,14 +325,25 @@ files = [ [package.extras] toml = ["tomli ; python_full_version <= \"3.11.0a6\""] +[[package]] +name = "dirtyjson" +version = "1.0.8" +description = "JSON decoder for Python that can extract data from the muck" +optional = false +python-versions = "*" +groups = ["main"] +files = [ + {file = "dirtyjson-1.0.8-py3-none-any.whl", hash = "sha256:125e27248435a58acace26d5c2c4c11a1c0de0a9c5124c5a94ba78e517d74f53"}, + {file = "dirtyjson-1.0.8.tar.gz", hash = "sha256:90ca4a18f3ff30ce849d100dcf4a003953c79d3a2348ef056f1d9c22231a25fd"}, +] + [[package]] name = "distlib" version = "0.3.9" description = "Distribution utilities" -optional = true +optional = false python-versions = "*" -groups = ["main"] -markers = "extra == \"dev\"" +groups = ["main", "dev"] files = [ {file = "distlib-0.3.9-py2.py3-none-any.whl", hash = "sha256:47f8c22fd27c27e25a65601af709b38e4f0a45ea4fc2e710f65755fa8caaaf87"}, {file = "distlib-0.3.9.tar.gz", hash = "sha256:a60f20dea646b8a33f3e7772f74dc0b2d0772d2837ee1342a00645c81edf9403"}, @@ -344,10 +353,9 @@ files = [ name = "filelock" version = "3.18.0" description = "A platform independent file lock." -optional = true +optional = false python-versions = ">=3.9" -groups = ["main"] -markers = "extra == \"dev\"" +groups = ["main", "dev"] files = [ {file = "filelock-3.18.0-py3-none-any.whl", hash = "sha256:c401f4f8377c4464e6db25fff06205fd89bdd83b65eb0488ed1b160f780e21de"}, {file = "filelock-3.18.0.tar.gz", hash = "sha256:adbc88eabb99d2fec8c9c1b229b171f18afa655400173ddc653d5d01501fb9f2"}, @@ -719,10 +727,9 @@ files = [ name = "identify" version = "2.6.12" description = "File identification library for Python" -optional = true +optional = false python-versions = ">=3.9" -groups = ["main"] -markers = "extra == \"dev\"" +groups = ["main", "dev"] files = [ {file = "identify-2.6.12-py2.py3-none-any.whl", hash = "sha256:ad9672d5a72e0d2ff7c5c8809b62dfa60458626352fb0eb7b55e69bdc45334a2"}, {file = "identify-2.6.12.tar.gz", hash = "sha256:d8de45749f1efb108badef65ee8386f0f7bb19a7f26185f74de6367bffbaf0e6"}, @@ -821,7 +828,7 @@ version = "3.0.0" description = "Python port of markdown-it. Markdown parsing, done right!" optional = false python-versions = ">=3.8" -groups = ["main"] +groups = ["main", "dev"] files = [ {file = "markdown-it-py-3.0.0.tar.gz", hash = "sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb"}, {file = "markdown_it_py-3.0.0-py3-none-any.whl", hash = "sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1"}, @@ -875,7 +882,7 @@ version = "0.1.2" description = "Markdown URL utilities" optional = false python-versions = ">=3.7" -groups = ["main"] +groups = ["main", "dev"] files = [ {file = "mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8"}, {file = "mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba"}, @@ -953,10 +960,9 @@ files = [ name = "nodeenv" version = "1.9.1" description = "Node.js virtual environment builder" -optional = true +optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" -groups = ["main"] -markers = "extra == \"dev\"" +groups = ["main", "dev"] files = [ {file = "nodeenv-1.9.1-py2.py3-none-any.whl", hash = "sha256:ba11c9782d29c27c70ffbdda2d7415098754709be8a7056d79a737cd901155c9"}, {file = "nodeenv-1.9.1.tar.gz", hash = "sha256:6ec12890a2dab7946721edbfbcd91f3319c6ccc9aec47be7c7e6b7011ee6645f"}, @@ -990,15 +996,14 @@ files = [ [[package]] name = "pbr" -version = "6.1.1" +version = "7.0.1" description = "Python Build Reasonableness" -optional = true +optional = false python-versions = ">=2.6" -groups = ["main"] -markers = "extra == \"dev\"" +groups = ["main", "dev"] files = [ - {file = "pbr-6.1.1-py2.py3-none-any.whl", hash = "sha256:38d4daea5d9fa63b3f626131b9d34947fd0c8be9b05a29276870580050a25a76"}, - {file = "pbr-6.1.1.tar.gz", hash = "sha256:93ea72ce6989eb2eed99d0f75721474f69ad88128afdef5ac377eb797c4bf76b"}, + {file = "pbr-7.0.1-py2.py3-none-any.whl", hash = "sha256:32df5156fbeccb6f8a858d1ebc4e465dcf47d6cc7a4895d5df9aa951c712fc35"}, + {file = "pbr-7.0.1.tar.gz", hash = "sha256:3ecbcb11d2b8551588ec816b3756b1eb4394186c3b689b17e04850dfc20f7e57"}, ] [package.dependencies] @@ -1008,10 +1013,9 @@ setuptools = "*" name = "platformdirs" version = "4.3.8" description = "A small Python package for determining appropriate platform-specific dirs, e.g. a `user data dir`." -optional = true +optional = false python-versions = ">=3.9" -groups = ["main"] -markers = "extra == \"dev\"" +groups = ["main", "dev"] files = [ {file = "platformdirs-4.3.8-py3-none-any.whl", hash = "sha256:ff7059bb7eb1179e2685604f4aaf157cfd9535242bd23742eadc3c13542139b4"}, {file = "platformdirs-4.3.8.tar.gz", hash = "sha256:3d512d96e16bcb959a814c9f348431070822a6496326a4be0911c40b5a74c2bc"}, @@ -1041,15 +1045,14 @@ testing = ["coverage", "pytest", "pytest-benchmark"] [[package]] name = "pre-commit" -version = "4.2.0" +version = "4.3.0" description = "A framework for managing and maintaining multi-language pre-commit hooks." -optional = true +optional = false python-versions = ">=3.9" -groups = ["main"] -markers = "extra == \"dev\"" +groups = ["main", "dev"] files = [ - {file = "pre_commit-4.2.0-py2.py3-none-any.whl", hash = "sha256:a009ca7205f1eb497d10b845e52c838a98b6cdd2102a6c8e4540e94ee75c58bd"}, - {file = "pre_commit-4.2.0.tar.gz", hash = "sha256:601283b9757afd87d40c4c4a9b2b5de9637a8ea02eaff7adc2d0fb4e04841146"}, + {file = "pre_commit-4.3.0-py2.py3-none-any.whl", hash = "sha256:2b0747ad7e6e967169136edffee14c16e148a778a54e4f967921aa1ebf2308d8"}, + {file = "pre_commit-4.3.0.tar.gz", hash = "sha256:499fe450cc9d42e9d58e606262795ecb64dd05438943c62b66f6a8673da30b16"}, ] [package.dependencies] @@ -1308,7 +1311,7 @@ version = "2.19.2" description = "Pygments is a syntax highlighting package written in Python." optional = false python-versions = ">=3.8" -groups = ["main"] +groups = ["main", "dev"] files = [ {file = "pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b"}, {file = "pygments-2.19.2.tar.gz", hash = "sha256:636cb2477cec7f8952536970bc533bc43743542f70392ae026374600add5b887"}, @@ -1427,10 +1430,9 @@ files = [ name = "pyyaml" version = "6.0.2" description = "YAML parser and emitter for Python" -optional = true +optional = false python-versions = ">=3.8" -groups = ["main"] -markers = "extra == \"dev\"" +groups = ["main", "dev"] files = [ {file = "PyYAML-6.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0a9a2848a5b7feac301353437eb7d5957887edbf81d56e903999a75a3d743086"}, {file = "PyYAML-6.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:29717114e51c84ddfba879543fb232a6ed60086602313ca38cce623c1d62cfbf"}, @@ -1531,7 +1533,7 @@ version = "14.0.0" description = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal" optional = false python-versions = ">=3.8.0" -groups = ["main"] +groups = ["main", "dev"] files = [ {file = "rich-14.0.0-py3-none-any.whl", hash = "sha256:1c9491e1951aac09caffd42f448ee3d04e58923ffe14993f6e83068dc395d7e0"}, {file = "rich-14.0.0.tar.gz", hash = "sha256:82f1bc23a6a21ebca4ae0c45af9bdbc492ed20231dcb63f297d6d1021a9d5725"}, @@ -1746,10 +1748,9 @@ files = [ name = "setuptools" version = "80.9.0" description = "Easily download, build, install, upgrade, and uninstall Python packages" -optional = true +optional = false python-versions = ">=3.9" -groups = ["main"] -markers = "extra == \"dev\"" +groups = ["main", "dev"] files = [ {file = "setuptools-80.9.0-py3-none-any.whl", hash = "sha256:062d34222ad13e0cc312a4c02d73f059e86a4acbfbdea8f8f76b28c99f306922"}, {file = "setuptools-80.9.0.tar.gz", hash = "sha256:f36b47402ecde768dbfafc46e8e4207b4360c654f1f3bb84475f0a28628fb19c"}, @@ -1844,10 +1845,9 @@ full = ["httpx (>=0.27.0,<0.29.0)", "itsdangerous", "jinja2", "python-multipart name = "stevedore" version = "5.4.1" description = "Manage dynamic plugins for Python applications" -optional = true +optional = false python-versions = ">=3.9" -groups = ["main"] -markers = "extra == \"dev\"" +groups = ["main", "dev"] files = [ {file = "stevedore-5.4.1-py3-none-any.whl", hash = "sha256:d10a31c7b86cba16c1f6e8d15416955fc797052351a56af15e608ad20811fcfe"}, {file = "stevedore-5.4.1.tar.gz", hash = "sha256:3135b5ae50fe12816ef291baff420acb727fcd356106e3e9cbfa9e5985cd6f4b"}, @@ -1977,10 +1977,9 @@ standard = ["colorama (>=0.4) ; sys_platform == \"win32\"", "httptools (>=0.6.3) name = "virtualenv" version = "20.31.2" description = "Virtual Python Environment builder" -optional = true +optional = false python-versions = ">=3.8" -groups = ["main"] -markers = "extra == \"dev\"" +groups = ["main", "dev"] files = [ {file = "virtualenv-20.31.2-py3-none-any.whl", hash = "sha256:36efd0d9650ee985f0cad72065001e66d49a6f24eb44d98980f630686243cf11"}, {file = "virtualenv-20.31.2.tar.gz", hash = "sha256:e10c0a9d02835e592521be48b332b6caee6887f332c111aa79a09b9e79efc2af"}, @@ -2017,4 +2016,4 @@ dev = ["bandit", "mypy", "pre-commit", "pydocstyle", "pytest", "pytest-asyncio", [metadata] lock-version = "2.1" python-versions = ">=3.13,<4.0" -content-hash = "be3a1e393ac71b774a0d287dca1d8981e88106f8fd0e315aaca3890b5e8ab93c" +content-hash = "5226c2860f0f908a0f820ab523851665341132e71a2b14ba6c2a3a52747375b5" diff --git a/pyproject.toml b/pyproject.toml index ba12b19..766c4d1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,7 +20,9 @@ dependencies = [ "loguru>=0.7.3,<0.8.0", "mcp>=1.0.0", "google-generativeai>=0.8.0,<1.0.0", - "python-dotenv>=1.0.0,<2.0.0" + "python-dotenv>=1.0.0,<2.0.0", + "dirtyjson (>=1.0.8,<2.0.0)", + "pre-commit (>=4.3.0,<5.0.0)" ] [project.scripts] @@ -132,3 +134,10 @@ inherit = false match = "(?!test_).*\\.py" match-dir = "(?!tests|migrations|__pycache__).*" ignore = "D100,D104" # Ignore missing docstrings in modules and packages for now + +[dependency-groups] +dev = [ + "pre-commit (>=4.3.0,<5.0.0)", + "bandit (>=1.8.6,<2.0.0)", + "pbr (>=7.0.1,<8.0.0)" +] diff --git a/src/shardguard/core/planning.py b/src/shardguard/core/planning.py index aa1cbf0..4468bb2 100644 --- a/src/shardguard/core/planning.py +++ b/src/shardguard/core/planning.py @@ -5,6 +5,8 @@ import re from typing import Protocol +import dirtyjson + from .llm_providers import LLMProviderFactory from .mcp_integration import MCPClient @@ -71,21 +73,23 @@ async def get_available_tools_description(self) -> str: def _extract_json_from_response(self, response: str) -> str: """Extract JSON from LLM response that might contain extra text.""" - # Try to find JSON block enclosed in curly braces - matches = re.findall(r"\{.*\}", response, re.DOTALL) - - if matches: - # Return the longest JSON-like match - json_candidate = max(matches, key=len) - # Validate that it's actually valid JSON - try: - json.loads(json_candidate) - return json_candidate - except json.JSONDecodeError: - pass - - # If no valid JSON found, return the original response - return response + try: + obj = dirtyjson.loads(response, search_for_first_object=True) + return json.dumps(obj) + except Exception: + # Try to find a JSON block enclosed in curly braces as a fallback + matches = re.findall(r"\{.*\}", response, re.DOTALL) + if matches: + # Return the longest JSON-like match + json_candidate = max(matches, key=len) + # Validate that it's actually valid JSON + try: + json.loads(json_candidate) + return json_candidate + except json.JSONDecodeError: + pass + # If no valid JSON is found, return the original response + return response def _create_fallback_response(self, prompt: str, error: str) -> str: """Create a fallback response when plan generation fails."""