From 685003608fc75717ad91832ca87c328390551ae5 Mon Sep 17 00:00:00 2001 From: pomegranar Date: Sun, 5 Apr 2026 06:10:17 +0000 Subject: [PATCH 1/2] added .python-version lockfile for uv workflows. python 3.11 is the industry standard --- .python-version | 1 + 1 file changed, 1 insertion(+) create mode 100644 .python-version diff --git a/.python-version b/.python-version new file mode 100644 index 00000000..2c073331 --- /dev/null +++ b/.python-version @@ -0,0 +1 @@ +3.11 From 6a0cd39dd8ec54f67f41e72f88dc902184a7cb61 Mon Sep 17 00:00:00 2001 From: pomegranar Date: Sun, 5 Apr 2026 06:20:01 +0000 Subject: [PATCH 2/2] renamed some variables in local_ingest to better reflect actual logic. --- .../core/tools/syllabi_tool/local_ingest.py | 22 ++++++++----------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/chatdku/core/tools/syllabi_tool/local_ingest.py b/chatdku/core/tools/syllabi_tool/local_ingest.py index d3b6f680..dad934d0 100644 --- a/chatdku/core/tools/syllabi_tool/local_ingest.py +++ b/chatdku/core/tools/syllabi_tool/local_ingest.py @@ -2,7 +2,7 @@ """ Local PDF/DOCX Ingestor A command line utility that extracts structured data from PDFs and DOCX files -using SGLang hosted Qwen3 model and stores results in PostgreSQL database. +using large language model and stores results in PostgreSQL database. """ import argparse @@ -60,7 +60,7 @@ def __init__(self, args): self.args = args self.setup_logging() self.setup_database_connection() - self.setup_sglang_client() + self.setup_llm() self.load_schema() self.logger.info("Creating cursor.") self.cursor = self.conn.cursor() @@ -117,9 +117,7 @@ def setup_database_connection(self): self.logger.error(f"Failed to connect to database: {e}") sys.exit(1) - def setup_sglang_client(self): - """Setup SGLang client for Qwen3 model""" - # SGLang serves models via OpenAI-compatible API + def setup_llm(self): lm = dspy.LM( model="openai/" + config.backup_llm, api_base=config.backup_llm_url, @@ -129,7 +127,7 @@ def setup_sglang_client(self): temperature=config.llm_temperature, ) dspy.configure(lm=lm) - self.logger.info(f"SGLang client configured for: {self.args.sglang_url}") + self.logger.info(f"LLM client configured for: {self.args.llm_url}") def load_schema(self): """Load and validate JSON schema""" @@ -236,7 +234,7 @@ def extract_docx_content(self, file_path: Path) -> str: def extract_structured_data( self, content: str, file_name: str ) -> Optional[Dict[str, Any]]: - """Use SGLang + Qwen3 to extract structured data from content""" + """Use LLM to extract structured data from content""" # Create prompt for structured extraction based on schema schema_description = json.dumps(self.schema, indent=2) @@ -339,7 +337,6 @@ def process_file(self, file_path: Path): self.logger.error(f"No content extracted from {file_path.name}") return - # Extract structured data using SGLang + Qwen3 structured_data = self.extract_structured_data(content, file_path.name) if structured_data: @@ -429,7 +426,7 @@ def create_default_schema(): def main(): """Main entry point""" parser = argparse.ArgumentParser( - description="Extract structured data from PDFs and DOCX files using SGLang + Qwen3", + description="Extract structured data from PDFs and DOCX files.", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Examples: @@ -478,17 +475,16 @@ def main(): help="Database table name for storing extracted data (default: documents)", ) - # SGLang configuration parser.add_argument( - "--sglang-url", + "--llm-url", default="http://localhost:8000/v1", - help="SGLang server URL (default: http://localhost:8000/v1)", + help="LLM server URL (default: http://localhost:8000/v1)", ) parser.add_argument( "--model-name", default="Qwen/Qwen3-8B", - help="Model name for SGLang (default: Qwen/Qwen3-8B)", + help="Model name (default: Qwen/Qwen3-8B)", ) # Utility arguments