Skip to content

Commit 5fbd520

Browse files
fix(deps): resolve CVEs across pip and npm dependencies with langchain 1.x migration
- pyproject.toml: tighten minimum version constraints for all vulnerable direct/transitive pip deps; add [tool.uv] override-dependencies to force requests>=2.33.0 past pysonar pin; relax streamlit from ==1.50.0 to >=1.51.0 - uv.lock: regenerated — notable bumps: langchain-core 0.3.80→1.2.23, langsmith 0.4.49→0.7.22, tornado 6.5.2→6.5.5, pyopenssl 25.3.0→26.0.0, streamlit 1.50.0→1.55.0, requests 2.32.5→2.33.0 - streamlit_app/requirements.txt: relax streamlit pin to >=1.51.0 - docsite/package.json: add npm overrides for node-forge ^1.4.0, serialize-javascript ^7.0.5, brace-expansion, path-to-regexp, picomatch - docsite/package-lock.json: regenerated — node-forge 1.3.2→1.4.0, serialize-javascript 7.0.4→7.0.5; zero npm vulnerabilities remaining - migrate 6 source files from removed langchain.output_parsers/langchain.schema to langchain_classic (langchain 1.x breaking change)
1 parent 0437589 commit 5fbd520

11 files changed

Lines changed: 596 additions & 510 deletions

File tree

docsite/package-lock.json

Lines changed: 25 additions & 34 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

docsite/package.json

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,11 @@
3131
},
3232
"overrides": {
3333
"svgo": "^3.3.3",
34-
"serialize-javascript": "^7.0.3",
34+
"serialize-javascript": "^7.0.5",
35+
"node-forge": "^1.4.0",
36+
"brace-expansion": "^1.1.13",
37+
"path-to-regexp": "^0.1.13",
38+
"picomatch": "^4.0.0",
3539
"minimatch": "^3.1.4",
3640
"ajv": "^8.18.0",
3741
"qs": "^6.14.2",

pyproject.toml

Lines changed: 22 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,8 @@ classifiers = [
2727
dependencies = [
2828
"langchain-community>=0.4.0",
2929
"langchain-openai>=1.0.0",
30-
"langgraph>=1.1.1,<2.0.0",
31-
"nltk>=3.9.3",
30+
"langgraph>=1.1.3,<2.0.0",
31+
"nltk>=3.9.4",
3232
"numpy<=2.3.0",
3333
"asyncpg>=0.30.0",
3434
"fastapi[standard]>=0.116.1",
@@ -54,10 +54,19 @@ dependencies = [
5454
"aiofiles>=23.2.1",
5555
"tavily-python>=0.1.11",
5656
"pillow>=12.1.1",
57-
"cryptography>=46.0.5",
58-
"filelock>=3.20.3",
59-
"PyJWT>=2.12.0",
60-
"orjson>=3.11.6",
57+
"cryptography>=46.0.6",
58+
"filelock>=3.25.0",
59+
"PyJWT>=2.12.1",
60+
"orjson>=3.11.7",
61+
"tornado>=6.5.3",
62+
"protobuf>=6.33.0",
63+
"pyasn1>=0.6.3",
64+
"marshmallow>=3.26.2",
65+
"pygments>=2.20.0",
66+
"pyopenssl>=26.0.0",
67+
"langsmith>=0.4.50",
68+
"fonttools>=4.62.1",
69+
"requests>=2.33.0",
6170
]
6271

6372
[project.optional-dependencies]
@@ -99,7 +108,7 @@ oracle = [
99108
]
100109

101110
streamlit = [
102-
"streamlit==1.50.0",
111+
"streamlit>=1.51.0",
103112
"pyngrok==7.4.0",
104113
"python-dotenv==1.1.1",
105114
"xlsxwriter==3.2.9",
@@ -144,6 +153,12 @@ dev = [
144153
"twine>=6.1.0",
145154
]
146155

156+
[tool.uv]
157+
override-dependencies = [
158+
# pysonar (dev-only) pins requests==2.32.5 which conflicts; override to fix CVE
159+
"requests>=2.33.0",
160+
]
161+
147162
[tool.ruff]
148163
src = ["src"]
149164

src/intugle/core/conceptual_search/agent/tools/web_tools.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import logging
22

3-
from langchain.schema import Document
3+
from langchain_classic.schema import Document
44
from langchain_community.tools.tavily_search import TavilySearchResults
55
from langchain_core.tools import tool
66

src/intugle/core/llms/chat.py

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,12 @@
33
from typing import TYPE_CHECKING, Optional
44

55
from langchain.chat_models import init_chat_model
6-
from langchain.output_parsers import (
6+
from langchain_classic.output_parsers import (
77
ResponseSchema,
88
RetryWithErrorOutputParser,
99
StructuredOutputParser,
1010
)
11-
from langchain.prompts import BaseChatPromptTemplate, ChatPromptTemplate
11+
from langchain_core.prompts import BaseChatPromptTemplate, ChatPromptTemplate
1212
from langchain_core.rate_limiters import InMemoryRateLimiter
1313

1414
from intugle.core import settings
@@ -53,9 +53,7 @@ def __init__(
5353
self.prompt_template: BaseChatPromptTemplate = prompt_template # prompt template
5454

5555
self.output_parser = (
56-
self.__output_parser_builder__(response_schemas=response_schemas)
57-
if response_schemas is not None
58-
else None
56+
self.__output_parser_builder__(response_schemas=response_schemas) if response_schemas is not None else None
5957
) # the built output parser
6058

6159
self.format_instructions = (
@@ -74,9 +72,7 @@ def __output_parser_builder__(self, response_schemas: list[ResponseSchema] = Non
7472
for building the corresponding output paraser from the given ResponseSchema
7573
"""
7674
parser = self.parser.from_response_schemas(response_schemas=response_schemas)
77-
retry_parser = RetryWithErrorOutputParser.from_llm(
78-
parser=parser, llm=self.model, max_retries=self.MAX_RETRIES
79-
)
75+
retry_parser = RetryWithErrorOutputParser.from_llm(parser=parser, llm=self.model, max_retries=self.MAX_RETRIES)
8076
return retry_parser
8177

8278
@classmethod
@@ -97,9 +93,7 @@ def invoke(self, *args, **kwargs):
9793

9894
sucessfull_parsing = False
9995

100-
prompt_value = self.llm_prompt.format_prompt(
101-
format_instructions=self.format_instructions, **kwargs
102-
)
96+
prompt_value = self.llm_prompt.format_prompt(format_instructions=self.format_instructions, **kwargs)
10397
messages = prompt_value.to_messages()
10498
_message = messages
10599
response = ""

src/intugle/core/pipeline/business_glossary/prompts.py

Lines changed: 21 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,18 @@
1-
from langchain.output_parsers import ResponseSchema
1+
from langchain_classic.output_parsers import ResponseSchema
22

33
table_glossary = [ResponseSchema(name="table_glossary", description=" single-sentence business glossary definition")]
4-
column_glossary = [ResponseSchema(name="column_glossary", description="precise, single-sentence and non-technical business glossary definition")]
5-
column_tag_glossary = [ResponseSchema(name="column_tag_glossary", description="three precise and distinct business tags", type="list[str]")]
4+
column_glossary = [
5+
ResponseSchema(
6+
name="column_glossary", description="precise, single-sentence and non-technical business glossary definition"
7+
)
8+
]
9+
column_tag_glossary = [
10+
ResponseSchema(name="column_tag_glossary", description="three precise and distinct business tags", type="list[str]")
11+
]
612

713
BUSINESS_GLOSSARY_PROMPTS = {
8-
"gpt-4o": {
9-
"TABLE_GLOSSARY_TEMPLATE": """You are responsible for Data Governance in {domain},
14+
"gpt-4o": {
15+
"TABLE_GLOSSARY_TEMPLATE": """You are responsible for Data Governance in {domain},
1016
generate a concise, non-technical business glossary definition for the table on a provided DDL statement.
1117
The definition should be written as a single sentence and clearly describe the business purpose or function.\n
1218
# Instructions
@@ -23,7 +29,7 @@
2329
# Output
2430
{format_instructions}
2531
""",
26-
"BUSINESS_GLOSSARY_TEMPLATE": """You are responsible for Data Governance in {domain},
32+
"BUSINESS_GLOSSARY_TEMPLATE": """You are responsible for Data Governance in {domain},
2733
generate a concise single-sentence business glossary definition for each column mentioned in the DDL statement.\n
2834
The definition should clearly describe the business purpose or function.\n
2935
@@ -38,7 +44,7 @@
3844
{create_statements}\n
3945
{format_instructions}
4046
""",
41-
"BUSINESS_TAGS_TEMPLATE": """You are responsible for Data Governance in {domain}, your task is to generate three business tags for a column based on the DDL statements of a table given below.
47+
"BUSINESS_TAGS_TEMPLATE": """You are responsible for Data Governance in {domain}, your task is to generate three business tags for a column based on the DDL statements of a table given below.
4248
Use the column's context within the DDL statement (e.g., its name, type, and table name) to infer relevant business tags. Focus on generating concise, domain-relevant,
4349
and meaningful tags that align with the potential business use of the column.
4450
@@ -65,10 +71,10 @@
6571
# Additional Context:
6672
{additional_context}\n
6773
{format_instructions}
68-
"""
69-
},
70-
"gpt-4o-mini": {
71-
"TABLE_GLOSSARY_TEMPLATE": """
74+
""",
75+
},
76+
"gpt-4o-mini": {
77+
"TABLE_GLOSSARY_TEMPLATE": """
7278
Role: You are responsible for Data Governance in the {domain}.\n
7379
Task: You will be given a SQL DDL statement how `{table}` table is structured. Generate a concise, non-technical business glossary definition for `{table}` that clearly describe the business purpose or function.\n
7480
@@ -89,7 +95,7 @@
8995
\n\n
9096
{format_instructions}
9197
""",
92-
"BUSINESS_GLOSSARY_TEMPLATE": """
98+
"BUSINESS_GLOSSARY_TEMPLATE": """
9399
Role: You are responsible for Data Governance in the {domain}.\n
94100
Task: You will be given a SQL DDL statement how the attribute `{column}` is structured.\n
95101
@@ -111,8 +117,7 @@
111117
{additional_context}\n\n
112118
{format_instructions}
113119
""",
114-
"BUSINESS_TAGS_TEMPLATE":
115-
"""
120+
"BUSINESS_TAGS_TEMPLATE": """
116121
Role: You are responsible for Data Governance in the {domain}.\n
117122
Task: You will be given a SQL DDL statement how the attribute `{column}` is structured.\n
118123
@@ -131,6 +136,6 @@
131136
# Additional Context:
132137
{additional_context}\n\n
133138
{format_instructions}
134-
"""
139+
""",
140+
},
135141
}
136-
}

src/intugle/core/pipeline/business_glossary/utils.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
import pandas as pd
55

6-
from langchain.output_parsers import RetryOutputParser
6+
from langchain_classic.output_parsers import RetryOutputParser
77
from langchain_core.prompt_values import StringPromptValue
88

99
from intugle.core import settings
@@ -56,11 +56,8 @@ def get_additional_context(table_name: str, global_additional_context: str = "",
5656

5757

5858
def preprocess_profiling_df(profiling_data: pd.DataFrame):
59-
6059
profiling_data = preprocess_profiling_data(
61-
profiling_data=profiling_data,
62-
sample_limit=settings.STRATA_SAMPLE_LIMIT,
63-
dtypes_to_filter=None
60+
profiling_data=profiling_data, sample_limit=settings.STRATA_SAMPLE_LIMIT, dtypes_to_filter=None
6461
)
6562

6663
return profiling_data

src/intugle/core/pipeline/datatype_identification/l2_model.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
import pandas as pd
1010

11-
from langchain.output_parsers import ResponseSchema
11+
from langchain_classic.output_parsers import ResponseSchema
1212
from tqdm.auto import tqdm
1313

1414
from intugle.core.llms.chat import ChatModelLLM
@@ -82,9 +82,7 @@ def __classify_dim_measure__(self, table: str, column_name: str) -> str:
8282
def __call__(self, row) -> str:
8383
column_name = row["column_name"]
8484

85-
sample_data = adjust_sample(
86-
sample_data=row["sample_data"], expected_size=settings.L2_SAMPLE_LIMIT
87-
)
85+
sample_data = adjust_sample(sample_data=row["sample_data"], expected_size=settings.L2_SAMPLE_LIMIT)
8886

8987
table = pd.DataFrame(sample_data, columns=[column_name])
9088

@@ -101,7 +99,6 @@ def __call__(
10199
self,
102100
l1_pred: pd.DataFrame,
103101
):
104-
105102
l1_pred["predicted_datatype_l2"] = l1_pred.progress_apply(
106103
self.__model,
107104
axis=1,

0 commit comments

Comments
 (0)