From 536995177dc3538e0e91721209dafc07a64bd5ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E1=84=87=E1=85=A1=E1=86=A8=E1=84=80=E1=85=A7=E1=86=BC?= =?UTF-8?q?=E1=84=90=E1=85=A2?= Date: Sun, 27 Apr 2025 23:41:15 +0900 Subject: [PATCH 1/8] =?UTF-8?q?fix:=20pre-commit=EC=97=90=20isort,=20flake?= =?UTF-8?q?8=20=EC=B6=94=EA=B0=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .pre-commit-config.yaml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 30c473d..87dca01 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,5 +1,13 @@ repos: + - repo: https://github.com/pycqa/isort + rev: 6.0.0 + hooks: + - id: isort - repo: https://github.com/psf/black rev: 25.1.0 hooks: - id: black + - repo: https://github.com/pycqa/flake8 + rev: 7.1.2 + hooks: + - id: flake8 \ No newline at end of file From d3edc1fa96cd2b904a3356e7a56ef6d267e1cf08 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E1=84=87=E1=85=A1=E1=86=A8=E1=84=80=E1=85=A7=E1=86=BC?= =?UTF-8?q?=E1=84=90=E1=85=A2?= Date: Mon, 28 Apr 2025 00:01:19 +0900 Subject: [PATCH 2/8] =?UTF-8?q?fix:=20=ED=98=84=EC=9E=AC=20=EC=83=81?= =?UTF-8?q?=ED=83=9C=EC=97=90=EC=84=9C=20=EB=AC=B4=EC=8B=9C=ED=95=B4?= =?UTF-8?q?=EC=95=BC=ED=95=98=EB=8A=94=20=EA=B7=9C=EC=B9=99=20=EC=B6=94?= =?UTF-8?q?=EA=B0=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .pre-commit-config.yaml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 87dca01..327a986 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -10,4 +10,7 @@ repos: - repo: https://github.com/pycqa/flake8 rev: 7.1.2 hooks: - - id: flake8 \ No newline at end of file + - id: flake8 + args: + - --ignore=E501,E722,E262,F401,F841 + - --max-line-length=88 \ No newline at end of file From b1eabad36c59fd03cb8a1da04c165390cc3c468d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E1=84=87=E1=85=A1=E1=86=A8=E1=84=80=E1=85=A7=E1=86=BC?= =?UTF-8?q?=E1=84=90=E1=85=A2?= Date: Mon, 28 Apr 2025 00:07:34 +0900 Subject: [PATCH 3/8] =?UTF-8?q?fix:=20W503,=20E226=20=EC=B6=94=EA=B0=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 327a986..463dc09 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -12,5 +12,5 @@ repos: hooks: - id: flake8 args: - - --ignore=E501,E722,E262,F401,F841 + - --ignore=E501,E722,E262,F401,F841,W503,E226 - --max-line-length=88 \ No newline at end of file From e7b67c334b28ea8ff28164fe2f0b06daeae4acb1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E1=84=87=E1=85=A1=E1=86=A8=E1=84=80=E1=85=A7=E1=86=BC?= =?UTF-8?q?=E1=84=90=E1=85=A2?= Date: Mon, 28 Apr 2025 00:15:28 +0900 Subject: [PATCH 4/8] =?UTF-8?q?fix:=20pre-commit=20CICD=20=EC=88=98?= =?UTF-8?q?=EC=A0=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/pre-commit.yml | 37 ++++++++++++++++++++++++++++---- 1 file changed, 33 insertions(+), 4 deletions(-) diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml index a73aeca..76961c2 100644 --- a/.github/workflows/pre-commit.yml +++ b/.github/workflows/pre-commit.yml @@ -1,4 +1,4 @@ -name: Pre-commit Black Check +name: Pre-commit Auto-fix and Comment on: push: @@ -10,12 +10,14 @@ on: jobs: pre-commit: - name: Run Pre-commit Hooks + name: Run Pre-commit Hooks, Auto-fix, and Comment runs-on: ubuntu-latest steps: - name: Checkout repository uses: actions/checkout@v4 + with: + token: ${{ secrets.GITHUB_TOKEN }} - name: Set up Python uses: actions/setup-python@v4 @@ -25,5 +27,32 @@ jobs: - name: Install pre-commit run: pip install pre-commit - - name: Run pre-commit - run: pre-commit run --all-files --verbose + - name: Run pre-commit and fix files + id: precommit_run + run: pre-commit run --all-files --verbose --hook-stage manual + + - name: Commit and push changes if any + id: commit_push + run: | + git config user.name "github-actions[bot]" + git config user.email "github-actions[bot]@users.noreply.github.com" + git add . + if git diff-index --quiet HEAD; then + echo "no_changes=true" >> $GITHUB_ENV + else + git commit -m "style: Apply pre-commit fixes (black, isort)" + git push + echo "no_changes=false" >> $GITHUB_ENV + fi + + - name: Comment on PR if changes were pushed + if: env.no_changes == 'false' && github.event_name == 'pull_request' + uses: actions/github-script@v7 + with: + script: | + github.rest.issues.createComment({ + issue_number: context.issue.number, + owner: context.repo.owner, + repo: context.repo.repo, + body: "✅ Pre-commit hooks automatically fixed some issues (black, isort) and changes were pushed!" + }) \ No newline at end of file From ad39794a8764b507358c5a40c06559157d145862 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E1=84=87=E1=85=A1=E1=86=A8=E1=84=80=E1=85=A7=E1=86=BC?= =?UTF-8?q?=E1=84=90=E1=85=A2?= Date: Mon, 28 Apr 2025 00:20:46 +0900 Subject: [PATCH 5/8] =?UTF-8?q?fix:=20=EC=88=98=EC=A0=95=ED=95=A0=20?= =?UTF-8?q?=EC=88=98=20=EC=9E=88=EB=8A=94=20=EA=B6=8C=ED=95=9C=20=EB=B6=80?= =?UTF-8?q?=EC=97=AC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/pre-commit.yml | 44 ++++++++++++++++---------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml index 76961c2..c13ea5b 100644 --- a/.github/workflows/pre-commit.yml +++ b/.github/workflows/pre-commit.yml @@ -2,19 +2,19 @@ name: Pre-commit Auto-fix and Comment on: push: - branches: - - master + branches: [ master ] pull_request: - branches: - - master + branches: [ master ] jobs: pre-commit: - name: Run Pre-commit Hooks, Auto-fix, and Comment runs-on: ubuntu-latest + permissions: + contents: write + pull-requests: write steps: - - name: Checkout repository + - name: Checkout uses: actions/checkout@v4 with: token: ${{ secrets.GITHUB_TOKEN }} @@ -25,34 +25,34 @@ jobs: python-version: "3.x" - name: Install pre-commit - run: pip install pre-commit + run: python -m pip install --upgrade pre-commit - - name: Run pre-commit and fix files + - name: Run pre-commit (auto-fix) id: precommit_run - run: pre-commit run --all-files --verbose --hook-stage manual + continue-on-error: true + run: | + pre-commit run --all-files --verbose --hook-stage manual - - name: Commit and push changes if any + - name: Commit & push if changed id: commit_push run: | - git config user.name "github-actions[bot]" + git config user.name "github-actions[bot]" git config user.email "github-actions[bot]@users.noreply.github.com" - git add . - if git diff-index --quiet HEAD; then - echo "no_changes=true" >> $GITHUB_ENV - else - git commit -m "style: Apply pre-commit fixes (black, isort)" + if ! git diff --quiet; then + git add -u + git commit -m "style: apply pre-commit fixes (black, isort)" || true git push - echo "no_changes=false" >> $GITHUB_ENV + echo "FIXES_PUSHED=true" >> $GITHUB_ENV fi - - name: Comment on PR if changes were pushed - if: env.no_changes == 'false' && github.event_name == 'pull_request' + - name: Comment on PR + if: env.FIXES_PUSHED == 'true' && github.event_name == 'pull_request' uses: actions/github-script@v7 with: script: | github.rest.issues.createComment({ - issue_number: context.issue.number, owner: context.repo.owner, repo: context.repo.repo, - body: "✅ Pre-commit hooks automatically fixed some issues (black, isort) and changes were pushed!" - }) \ No newline at end of file + issue_number: context.issue.number, + body: "✅ Pre-commit hooks fixed formatting (black, isort) and the bot pushed the changes." + }) From 756ebc6a06a70b2d4cc0f7ae06000590968b97d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E1=84=87=E1=85=A1=E1=86=A8=E1=84=80=E1=85=A7=E1=86=BC?= =?UTF-8?q?=E1=84=90=E1=85=A2?= Date: Mon, 28 Apr 2025 00:32:57 +0900 Subject: [PATCH 6/8] =?UTF-8?q?fix:=20=EC=9E=91=EC=97=85=20=EC=A4=91?= =?UTF-8?q?=EC=9D=B8=20=EB=B8=8C=EB=9E=9C=EC=B9=98=20=EC=83=81=ED=83=9C?= =?UTF-8?q?=EC=97=90=EC=84=9C=20push?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/pre-commit.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml index c13ea5b..242b833 100644 --- a/.github/workflows/pre-commit.yml +++ b/.github/workflows/pre-commit.yml @@ -14,10 +14,12 @@ jobs: pull-requests: write steps: - - name: Checkout + - name: Checkout repository uses: actions/checkout@v4 with: token: ${{ secrets.GITHUB_TOKEN }} + fetch-depth: 0 + ref: ${{ github.head_ref || github.ref_name }} - name: Set up Python uses: actions/setup-python@v4 @@ -34,7 +36,6 @@ jobs: pre-commit run --all-files --verbose --hook-stage manual - name: Commit & push if changed - id: commit_push run: | git config user.name "github-actions[bot]" git config user.email "github-actions[bot]@users.noreply.github.com" From f92afd3071e9ff1412d666f35af676fca9d84333 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Sun, 27 Apr 2025 15:33:29 +0000 Subject: [PATCH 7/8] style: apply pre-commit fixes (black, isort) --- cli/__init__.py | 4 +++- data_utils/datahub_source.py | 15 ++++++++++----- evaluation/gen_answer.py | 7 +++---- evaluation/gen_persona.py | 8 ++++---- evaluation/gen_question.py | 8 ++++---- evaluation/persona_class.py | 3 ++- evaluation/utils.py | 5 +++-- interface/lang2sql.py | 12 ++++++------ interface/viz_eval.py | 6 +++--- llm_utils/chains.py | 7 ++++--- llm_utils/connect_db.py | 1 + llm_utils/graph.py | 15 ++++++--------- llm_utils/llm_factory.py | 6 +++--- llm_utils/prompts_class.py | 2 +- llm_utils/tools.py | 6 +++--- setup.py | 2 +- 16 files changed, 57 insertions(+), 50 deletions(-) diff --git a/cli/__init__.py b/cli/__init__.py index 1fd9fec..278d951 100644 --- a/cli/__init__.py +++ b/cli/__init__.py @@ -1,5 +1,7 @@ -import click import subprocess + +import click + from llm_utils.tools import set_gms_server diff --git a/data_utils/datahub_source.py b/data_utils/datahub_source.py index 9be2c7d..e91e0bc 100644 --- a/data_utils/datahub_source.py +++ b/data_utils/datahub_source.py @@ -1,13 +1,18 @@ -from datahub.metadata.schema_classes import DatasetPropertiesClass, SchemaMetadataClass -from datahub.emitter.rest_emitter import DatahubRestEmitter -from datahub.ingestion.graph.client import DatahubClientConfig, DataHubGraph -from datahub.metadata.schema_classes import UpstreamLineageClass from collections import defaultdict + import requests +from datahub.emitter.rest_emitter import DatahubRestEmitter +from datahub.ingestion.graph.client import DatahubClientConfig, DataHubGraph +from datahub.metadata.schema_classes import ( + DatasetPropertiesClass, + SchemaMetadataClass, + UpstreamLineageClass, +) + from data_utils.queries import ( - ROOT_GLOSSARY_NODES_QUERY, GLOSSARY_NODE_QUERY, LIST_QUERIES_QUERY, + ROOT_GLOSSARY_NODES_QUERY, ) diff --git a/evaluation/gen_answer.py b/evaluation/gen_answer.py index 65feb91..9848bb6 100644 --- a/evaluation/gen_answer.py +++ b/evaluation/gen_answer.py @@ -1,10 +1,9 @@ +import uuid from argparse import ArgumentParser -from langchain_core.messages import HumanMessage - -from utils import load_question_json, save_answer_json +from langchain_core.messages import HumanMessage from tqdm import tqdm -import uuid +from utils import load_question_json, save_answer_json from llm_utils.graph import builder diff --git a/evaluation/gen_persona.py b/evaluation/gen_persona.py index 4884d63..52ad642 100644 --- a/evaluation/gen_persona.py +++ b/evaluation/gen_persona.py @@ -1,12 +1,12 @@ import os +from argparse import ArgumentParser -from utils import save_persona_json, pretty_print_persona +from langchain_core.prompts import ChatPromptTemplate +from langchain_openai.chat_models import ChatOpenAI from persona_class import PersonaList +from utils import pretty_print_persona, save_persona_json from llm_utils.tools import _get_table_info -from langchain_openai.chat_models import ChatOpenAI -from langchain_core.prompts import ChatPromptTemplate -from argparse import ArgumentParser def get_table_des_string(tables_desc): diff --git a/evaluation/gen_question.py b/evaluation/gen_question.py index 25f9c66..202810f 100644 --- a/evaluation/gen_question.py +++ b/evaluation/gen_question.py @@ -1,10 +1,10 @@ -from utils import load_persona_json, save_question_json +import os +from argparse import ArgumentParser + from langchain_core.prompts import ChatPromptTemplate from langchain_openai.chat_models import ChatOpenAI from tqdm import tqdm - -from argparse import ArgumentParser -import os +from utils import load_persona_json, save_question_json def get_persona_prompt(persona): diff --git a/evaluation/persona_class.py b/evaluation/persona_class.py index d276558..6b34628 100644 --- a/evaluation/persona_class.py +++ b/evaluation/persona_class.py @@ -1,6 +1,7 @@ -from pydantic import BaseModel from typing import List +from pydantic import BaseModel + class Persona(BaseModel): name: str diff --git a/evaluation/utils.py b/evaluation/utils.py index f569d7e..9b448d9 100644 --- a/evaluation/utils.py +++ b/evaluation/utils.py @@ -1,7 +1,8 @@ import json -from persona_class import PersonaList -from glob import glob import os +from glob import glob + +from persona_class import PersonaList def save_persona_json(data, filepath): diff --git a/interface/lang2sql.py b/interface/lang2sql.py index 6a80e1c..c97ede6 100644 --- a/interface/lang2sql.py +++ b/interface/lang2sql.py @@ -1,15 +1,15 @@ -import streamlit as st -from langchain_core.messages import HumanMessage -from llm_utils.graph import builder -from langchain.chains.sql_database.prompt import SQL_PROMPTS import os from typing import Union -import pandas as pd +import pandas as pd +import streamlit as st from clickhouse_driver import Client -from llm_utils.connect_db import ConnectDB from dotenv import load_dotenv +from langchain.chains.sql_database.prompt import SQL_PROMPTS +from langchain_core.messages import HumanMessage +from llm_utils.connect_db import ConnectDB +from llm_utils.graph import builder # Clickhouse 연결 db = ConnectDB() diff --git a/interface/viz_eval.py b/interface/viz_eval.py index 2989ac8..01556ce 100644 --- a/interface/viz_eval.py +++ b/interface/viz_eval.py @@ -1,9 +1,9 @@ -import streamlit as st -import json import glob -import pandas as pd +import json import os +import pandas as pd +import streamlit as st st.set_page_config(layout="wide", page_title="Lang2SQL 평가 시각화") diff --git a/llm_utils/chains.py b/llm_utils/chains.py index 81d957e..319cc12 100644 --- a/llm_utils/chains.py +++ b/llm_utils/chains.py @@ -1,15 +1,16 @@ import os + +from dotenv import load_dotenv from langchain_core.prompts import ( ChatPromptTemplate, MessagesPlaceholder, SystemMessagePromptTemplate, ) -from .llm_factory import get_llm - -from dotenv import load_dotenv from prompt.template_loader import get_prompt_template +from .llm_factory import get_llm + env_path = os.path.join(os.getcwd(), ".env") if os.path.exists(env_path): diff --git a/llm_utils/connect_db.py b/llm_utils/connect_db.py index aa2c099..afea7c9 100644 --- a/llm_utils/connect_db.py +++ b/llm_utils/connect_db.py @@ -1,5 +1,6 @@ import os from typing import Union + import pandas as pd from clickhouse_driver import Client from dotenv import load_dotenv diff --git a/llm_utils/graph.py b/llm_utils/graph.py index a6f5137..cad94f0 100644 --- a/llm_utils/graph.py +++ b/llm_utils/graph.py @@ -1,20 +1,17 @@ -import os import json +import os -from typing_extensions import TypedDict, Annotated +from langchain.chains.sql_database.prompt import SQL_PROMPTS from langgraph.graph import END, StateGraph from langgraph.graph.message import add_messages -from langchain.chains.sql_database.prompt import SQL_PROMPTS from pydantic import BaseModel, Field -from .llm_factory import get_llm - -from llm_utils.chains import ( - query_refiner_chain, - query_maker_chain, -) +from typing_extensions import Annotated, TypedDict +from llm_utils.chains import query_maker_chain, query_refiner_chain from llm_utils.tools import get_info_from_db +from .llm_factory import get_llm + # 노드 식별자 정의 QUERY_REFINER = "query_refiner" GET_TABLE_INFO = "get_table_info" diff --git a/llm_utils/llm_factory.py b/llm_utils/llm_factory.py index bdb4d64..d0117e3 100644 --- a/llm_utils/llm_factory.py +++ b/llm_utils/llm_factory.py @@ -4,7 +4,8 @@ from dotenv import load_dotenv from langchain.llms.base import BaseLanguageModel -from langchain_aws import ChatBedrockConverse, BedrockEmbeddings +from langchain_aws import BedrockEmbeddings, ChatBedrockConverse +from langchain_community.llms.bedrock import Bedrock from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings from langchain_huggingface import ( ChatHuggingFace, @@ -13,12 +14,11 @@ ) from langchain_ollama import ChatOllama, OllamaEmbeddings from langchain_openai import ( + AzureChatOpenAI, AzureOpenAIEmbeddings, ChatOpenAI, - AzureChatOpenAI, OpenAIEmbeddings, ) -from langchain_community.llms.bedrock import Bedrock # .env 파일 로딩 load_dotenv() diff --git a/llm_utils/prompts_class.py b/llm_utils/prompts_class.py index ceeadbd..dc57647 100644 --- a/llm_utils/prompts_class.py +++ b/llm_utils/prompts_class.py @@ -1,6 +1,6 @@ -from langchain.chains.sql_database.prompt import SQL_PROMPTS import os +from langchain.chains.sql_database.prompt import SQL_PROMPTS from langchain_core.prompts import load_prompt diff --git a/llm_utils/tools.py b/llm_utils/tools.py index 31c2a09..5ccf077 100644 --- a/llm_utils/tools.py +++ b/llm_utils/tools.py @@ -1,11 +1,11 @@ import os -from typing import List, Dict, Optional, TypeVar, Callable, Iterable, Any +from concurrent.futures import ThreadPoolExecutor +from typing import Any, Callable, Dict, Iterable, List, Optional, TypeVar from langchain.schema import Document +from tqdm import tqdm from data_utils.datahub_source import DatahubMetadataFetcher -from tqdm import tqdm -from concurrent.futures import ThreadPoolExecutor T = TypeVar("T") R = TypeVar("R") diff --git a/setup.py b/setup.py index 71a31ac..9851412 100644 --- a/setup.py +++ b/setup.py @@ -1,5 +1,5 @@ # setup.py -from setuptools import setup, find_packages +from setuptools import find_packages, setup with open("docs/README.md", "r", encoding="utf-8") as fh: long_description = fh.read() From 52d9204c3cdc68175317e8e1bf943c04dcedc54b Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Tue, 29 Apr 2025 14:19:53 +0000 Subject: [PATCH 8/8] style: apply pre-commit fixes (black, isort) --- cli/__init__.py | 1 + interface/lang2sql.py | 2 ++ 2 files changed, 3 insertions(+) diff --git a/cli/__init__.py b/cli/__init__.py index 5c7e980..65d9db5 100644 --- a/cli/__init__.py +++ b/cli/__init__.py @@ -1,6 +1,7 @@ """ Datahub GMS 서버 URL을 설정하고, 필요 시 Streamlit 인터페이스를 실행하는 CLI 프로그램입니다. """ + import subprocess import click diff --git a/interface/lang2sql.py b/interface/lang2sql.py index 6a1370a..368de32 100644 --- a/interface/lang2sql.py +++ b/interface/lang2sql.py @@ -22,6 +22,8 @@ "show_table": "Show Table", "show_chart": "Show Chart", } + + def summarize_total_tokens(data: list) -> int: """ 메시지 데이터에서 총 토큰 사용량을 집계합니다.