From c173c1ca260feb5b8279c1a2505b192839906050 Mon Sep 17 00:00:00 2001 From: denkv Date: Tue, 2 Jun 2026 14:14:02 +0200 Subject: [PATCH 01/14] Configure qdrant location instead of host and port --- learn2rag/pipeline/qdrant.py | 4 +--- learn2rag/ui/templates/compose/pipelines/continuous.yml | 2 +- learn2rag/ui/templates/compose/pipelines/import.yml | 2 +- learn2rag/ui/templates/compose/pipelines/pipeline.yml | 2 +- 4 files changed, 4 insertions(+), 6 deletions(-) diff --git a/learn2rag/pipeline/qdrant.py b/learn2rag/pipeline/qdrant.py index 4939d4a..2a65fd2 100644 --- a/learn2rag/pipeline/qdrant.py +++ b/learn2rag/pipeline/qdrant.py @@ -9,10 +9,8 @@ class Qdrant: client = QdrantClient( - host="localhost", - port=int(os.environ.get('QDRANT__SERVICE__HTTP_PORT', 6336)), + location=os.environ.get('QDRANT_LOCATION', 'http://localhost:6336'), api_key=os.environ.get('QDRANT__SERVICE__API_KEY'), - https=False, ) def __init__(self, collection_name: str, opt_config: dict[str, Any]) -> None: diff --git a/learn2rag/ui/templates/compose/pipelines/continuous.yml b/learn2rag/ui/templates/compose/pipelines/continuous.yml index 8ec4bbb..08aeb6d 100644 --- a/learn2rag/ui/templates/compose/pipelines/continuous.yml +++ b/learn2rag/ui/templates/compose/pipelines/continuous.yml @@ -203,7 +203,7 @@ services: environment: LEARN2RAG_PATH: '{{learn2rag_path}}' LEARN2RAG_PIPELINE_PORT: '{{ports.pipeline}}' - QDRANT__SERVICE__HTTP_PORT: '{{ports.qdrant_http}}' + QDRANT_LOCATION: '{{ "http://localhost:" ~ ports.qdrant_http }}' QDRANT__SERVICE__API_KEY: '{{qdrant_api_key}}' PIPELINE_USER_CONFIG: '{{storage_path}}/basic_user_config.json' IMPORTER_CONFIG: '{{storage_path}}/importer_config.json' diff --git a/learn2rag/ui/templates/compose/pipelines/import.yml b/learn2rag/ui/templates/compose/pipelines/import.yml index 29acb44..2ebe1ae 100644 --- a/learn2rag/ui/templates/compose/pipelines/import.yml +++ b/learn2rag/ui/templates/compose/pipelines/import.yml @@ -118,7 +118,7 @@ services: environment: LEARN2RAG_PATH: '{{learn2rag_path}}' STORAGE_PATH: '{{storage_path}}' - QDRANT__SERVICE__HTTP_PORT: '{{ports.qdrant_http}}' + QDRANT_LOCATION: '{{ "http://localhost:" ~ ports.qdrant_http }}' QDRANT__SERVICE__API_KEY: '{{qdrant_api_key}}' PIPELINE_USER_CONFIG: '{{storage_path}}/basic_user_config.json' IMPORTER_CONFIG: '{{storage_path}}/importer_config.json' diff --git a/learn2rag/ui/templates/compose/pipelines/pipeline.yml b/learn2rag/ui/templates/compose/pipelines/pipeline.yml index 6460ead..01ef1f3 100644 --- a/learn2rag/ui/templates/compose/pipelines/pipeline.yml +++ b/learn2rag/ui/templates/compose/pipelines/pipeline.yml @@ -175,7 +175,7 @@ services: environment: LEARN2RAG_PATH: '{{learn2rag_path}}' LEARN2RAG_PIPELINE_PORT: '{{ports.pipeline}}' - QDRANT__SERVICE__HTTP_PORT: '{{ports.qdrant_http}}' + QDRANT_LOCATION: '{{ "http://localhost:" ~ ports.qdrant_http }}' QDRANT__SERVICE__API_KEY: '{{qdrant_api_key}}' PIPELINE_USER_CONFIG: '{{storage_path}}/basic_user_config.json' IMPORTER_CONFIG: '{{storage_path}}/importer_config.json' From 65dffa9ddfca100ec816859fc1e4270dcc886bec Mon Sep 17 00:00:00 2001 From: denkv Date: Tue, 2 Jun 2026 14:15:05 +0200 Subject: [PATCH 02/14] Add a possibility to redefine qdrant location per pipeline Skip running qdrant if location is defined this way. --- learn2rag/ui/templates/compose/pipelines/continuous.yml | 4 +++- learn2rag/ui/templates/compose/pipelines/import.yml | 4 +++- learn2rag/ui/templates/compose/pipelines/pipeline.yml | 4 +++- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/learn2rag/ui/templates/compose/pipelines/continuous.yml b/learn2rag/ui/templates/compose/pipelines/continuous.yml index 08aeb6d..d92fb9d 100644 --- a/learn2rag/ui/templates/compose/pipelines/continuous.yml +++ b/learn2rag/ui/templates/compose/pipelines/continuous.yml @@ -161,12 +161,14 @@ services: healthcheck: # TODO: We only support ['CMD', 'curl', '-f', ...] test: ['CMD', 'curl', '-f', '{{learn2rag_scheme}}://localhost:{{ports.ui}}/health'] + #!!! {% if not pipeline.qdrant_location %} qdrant: working_dir: '{{storage_path}}' command: - '{{learn2rag_path}}/services/qdrant/qdrant{% if is_windows %}.exe{% endif %}' - '--config-path' - '{{storage_path}}/qdrant_config.yml' + #!!! {% endif %} import: working_dir: '{{storage_path}}' command: @@ -203,7 +205,7 @@ services: environment: LEARN2RAG_PATH: '{{learn2rag_path}}' LEARN2RAG_PIPELINE_PORT: '{{ports.pipeline}}' - QDRANT_LOCATION: '{{ "http://localhost:" ~ ports.qdrant_http }}' + QDRANT_LOCATION: '{{ pipeline.qdrant_location or "http://localhost:" ~ ports.qdrant_http }}' QDRANT__SERVICE__API_KEY: '{{qdrant_api_key}}' PIPELINE_USER_CONFIG: '{{storage_path}}/basic_user_config.json' IMPORTER_CONFIG: '{{storage_path}}/importer_config.json' diff --git a/learn2rag/ui/templates/compose/pipelines/import.yml b/learn2rag/ui/templates/compose/pipelines/import.yml index 2ebe1ae..459225a 100644 --- a/learn2rag/ui/templates/compose/pipelines/import.yml +++ b/learn2rag/ui/templates/compose/pipelines/import.yml @@ -99,12 +99,14 @@ files: content: '' services: + #!!! {% if not pipeline.qdrant_location %} qdrant: working_dir: '{{storage_path}}' command: - '{{learn2rag_path}}/services/qdrant/qdrant{% if is_windows %}.exe{% endif %}' - '--config-path' - '{{storage_path}}/qdrant_config.yml' + #!!! {% endif %} main: working_dir: '{{storage_path}}' command: @@ -118,7 +120,7 @@ services: environment: LEARN2RAG_PATH: '{{learn2rag_path}}' STORAGE_PATH: '{{storage_path}}' - QDRANT_LOCATION: '{{ "http://localhost:" ~ ports.qdrant_http }}' + QDRANT_LOCATION: '{{ pipeline.qdrant_location or "http://localhost:" ~ ports.qdrant_http }}' QDRANT__SERVICE__API_KEY: '{{qdrant_api_key}}' PIPELINE_USER_CONFIG: '{{storage_path}}/basic_user_config.json' IMPORTER_CONFIG: '{{storage_path}}/importer_config.json' diff --git a/learn2rag/ui/templates/compose/pipelines/pipeline.yml b/learn2rag/ui/templates/compose/pipelines/pipeline.yml index 01ef1f3..674ec9e 100644 --- a/learn2rag/ui/templates/compose/pipelines/pipeline.yml +++ b/learn2rag/ui/templates/compose/pipelines/pipeline.yml @@ -159,12 +159,14 @@ services: healthcheck: # TODO: We only support ['CMD', 'curl', '-f', ...] test: ['CMD', 'curl', '-f', '{{learn2rag_scheme}}://localhost:{{ports.ui}}/health'] + #!!! {% if not pipeline.qdrant_location %} qdrant: working_dir: '{{storage_path}}' command: - '{{learn2rag_path}}/services/qdrant/qdrant{% if is_windows %}.exe{% endif %}' - '--config-path' - '{{storage_path}}/qdrant_config.yml' + #!!! {% endif %} main: working_dir: '{{storage_path}}' command: @@ -175,7 +177,7 @@ services: environment: LEARN2RAG_PATH: '{{learn2rag_path}}' LEARN2RAG_PIPELINE_PORT: '{{ports.pipeline}}' - QDRANT_LOCATION: '{{ "http://localhost:" ~ ports.qdrant_http }}' + QDRANT_LOCATION: '{{ pipeline.qdrant_location or "http://localhost:" ~ ports.qdrant_http }}' QDRANT__SERVICE__API_KEY: '{{qdrant_api_key}}' PIPELINE_USER_CONFIG: '{{storage_path}}/basic_user_config.json' IMPORTER_CONFIG: '{{storage_path}}/importer_config.json' From 0b86c9ffe743406c0eeed3f261a9f57e46a27a03 Mon Sep 17 00:00:00 2001 From: denkv Date: Tue, 2 Jun 2026 16:07:55 +0200 Subject: [PATCH 03/14] Allow LABEL=None for LLMClient and exclude them from the interface --- learn2rag/pipeline/llm.py | 11 ++++++++--- learn2rag/ui/templates/models_add.html | 2 ++ learn2rag/ui/templates/models_list.html | 4 ++++ 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/learn2rag/pipeline/llm.py b/learn2rag/pipeline/llm.py index 18cc061..570cce4 100644 --- a/learn2rag/pipeline/llm.py +++ b/learn2rag/pipeline/llm.py @@ -10,10 +10,15 @@ class LLMClient(): - # ID is used as a key to store in user data, should not be changed ID: str - # LABEL is a display label for user interface - LABEL: str + '''A key stored in user data, must not be changed''' + + LABEL: str | None + ''' + A display label for the interface. + If None, the option would be excluded from the interface. + ''' + chat_model: BaseChatModel diff --git a/learn2rag/ui/templates/models_add.html b/learn2rag/ui/templates/models_add.html index f17d093..15fce6e 100644 --- a/learn2rag/ui/templates/models_add.html +++ b/learn2rag/ui/templates/models_add.html @@ -16,12 +16,14 @@

diff --git a/learn2rag/ui/templates/models_list.html b/learn2rag/ui/templates/models_list.html index 4743fc9..782a921 100644 --- a/learn2rag/ui/templates/models_list.html +++ b/learn2rag/ui/templates/models_list.html @@ -15,9 +15,13 @@ {{ model.label }} {% if model.api in llm.llms %} + {% if llm.llms[model.api].LABEL %} {{ llm.llms[model.api].LABEL }} {% else %} {{ model.api }} + {% endif %} + {% else %} + {{ model.api }} ⚠️ {% endif %} From e0cf5894ca053c89381dfc44553467f0746e0950 Mon Sep 17 00:00:00 2001 From: denkv Date: Tue, 2 Jun 2026 16:08:56 +0200 Subject: [PATCH 04/14] Add a mock LLM client for tests --- learn2rag/pipeline/llm.py | 45 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/learn2rag/pipeline/llm.py b/learn2rag/pipeline/llm.py index 570cce4..0060049 100644 --- a/learn2rag/pipeline/llm.py +++ b/learn2rag/pipeline/llm.py @@ -2,8 +2,11 @@ import os from pydantic import SecretStr from langchain_core.language_models.chat_models import BaseChatModel +from langchain_core.messages import AIMessage, BaseMessage, SystemMessage +from langchain_core.outputs import ChatGeneration, ChatResult from langchain_ollama import ChatOllama from langchain_openai import ChatOpenAI +from typing import Any, ClassVar logger = logging.getLogger(__name__) @@ -23,6 +26,9 @@ class LLMClient(): llms = {} +'''A dict holding supported LLM client classes''' + + def llm_client(cls: type[LLMClient]) -> type[LLMClient]: llms[cls.ID] = cls; return cls @@ -30,6 +36,7 @@ def llm_client(cls: type[LLMClient]) -> type[LLMClient]: # First @llm_client would be the default in UI when adding an external model @llm_client class OpenAIClient(LLMClient): + '''A LLM client based on OpenAI API''' ID = 'ChatOpenAI' LABEL = 'OpenAI' @@ -44,6 +51,7 @@ def __init__(self, *, url: str, token: SecretStr, model: str, proxy: str | None) @llm_client class OllamaClient(LLMClient): + '''A LLM client based on Ollama API''' ID = 'ChatOllama' LABEL = 'Ollama' @@ -59,7 +67,44 @@ def __init__(self, *, url: str, token: str | None, model: str, proxy: str | None ) +class TestFakeChatModel(BaseChatModel): + ''' + A mock BaseChatModel implementation. + Responds with the full content of the system prompt. + ''' + hint: ClassVar[str] = 'This is an internal model used for testing only.' + + @property + def _llm_type(self) -> str: return 'test_fake_chat_model' + + def _generate( + self, + messages: list[BaseMessage], + stop: list[str] | None = None, + run_manager: Any = None, + **kwargs: Any + ) -> ChatResult: + assert isinstance(messages[0], SystemMessage) + content = f'{self.hint} {messages[0].content}' + return ChatResult( + generations=[ + ChatGeneration(message=AIMessage(content=content)), + ], + ) + + +@llm_client +class FakeClient(LLMClient): + '''A mock LLM client to use only in tests''' + ID = 'ChatFake' + LABEL = None + + def __init__(self, *, url: str, token: str | None, model: str, proxy: str | None) -> None: + self.chat_model = TestFakeChatModel() + + def chat_model_from_env() -> BaseChatModel: + '''Returns an instance of LLM client based on the environment variables''' default_llm = OpenAIClient llm_id = os.environ.get('LLM_API_TYPE', default_llm.ID) logger.debug('Using LLM: %s', llm_id) From 1ebdd6f12d5ba72e22f3d8d77560a5c253de2fb5 Mon Sep 17 00:00:00 2001 From: denkv Date: Wed, 3 Jun 2026 14:37:53 +0200 Subject: [PATCH 05/14] Move utility functions to a submodule --- learn2rag/ui/__init__.py | 46 +++++++++-------------------- learn2rag/utils/__init__.py | 42 ++++++++++++++++++++++++++ learn2rag/utils/tests/__init__.py | 0 learn2rag/utils/tests/test_utils.py | 25 ++++++++++++++++ 4 files changed, 81 insertions(+), 32 deletions(-) create mode 100644 learn2rag/utils/__init__.py create mode 100644 learn2rag/utils/tests/__init__.py create mode 100644 learn2rag/utils/tests/test_utils.py diff --git a/learn2rag/ui/__init__.py b/learn2rag/ui/__init__.py index bf830a2..905c635 100644 --- a/learn2rag/ui/__init__.py +++ b/learn2rag/ui/__init__.py @@ -4,13 +4,10 @@ import logging import math import os -import platform -import xdg.BaseDirectory import secrets import shutil import signal import socket -import subprocess import threading import time from typing import Any @@ -29,6 +26,12 @@ from learn2rag.compose import Project import learn2rag.data import learn2rag.pipeline.llm +from ..utils import ( + is_windows, + normalize_path, + open_web_browser, + save_data_path, +) from datetime import datetime # <-- ADD THIS @@ -37,10 +40,6 @@ logging.getLogger().setLevel(logging.DEBUG) -def expand_path(path: Path) -> Path: - return Path(path).expanduser().absolute() - - import werkzeug def redirect(url: str) -> 'werkzeug.wrappers.response.Response': if 'HX-Boosted' in request.headers: @@ -53,14 +52,14 @@ def redirect(url: str) -> 'werkzeug.wrappers.response.Response': def start_project(name: str, template_file: Path, storage_path: Path, render_context: dict[str, Any]={}) -> Project: logging.debug('UI starting project: %s', name) - storage_path = expand_path(storage_path) + storage_path = normalize_path(storage_path) logging.debug('Storage path: %s', storage_path) storage_path.mkdir(parents=True, exist_ok=True) project_file = storage_path / 'compose.yml' template = jinja2.Template(template_file.read_text()) project_file.write_text(template.render(render_context | { - 'is_windows': platform.system() == 'Windows', + 'is_windows': is_windows(), 'learn2rag_path': Path('.').absolute(), 'storage_path': storage_path, })) @@ -126,14 +125,9 @@ def merge(source: dict[str, Any], destination: dict[str, Any]) -> dict[str, Any] def create_app(config: dict[str, Any]={}) -> Flask: # create and configure the app - if platform.system() == 'Windows': - windows_app_data = os.getenv('LOCALAPPDATA') - assert windows_app_data is not None - default_instance_path = windows_app_data + '/Learn2RAG/instance' - else: - default_instance_path = xdg.BaseDirectory.save_data_path('Learn2RAG/instance') + default_instance_path = save_data_path('Learn2RAG', 'instance') - example_local_path = r'C:\Users\User\Documents' if platform.system() == 'Windows' else '/home/user/Documents' + example_local_path = r'C:\Users\User\Documents' if is_windows() else '/home/user/Documents' app = Flask( __name__, instance_path=config.get('flask', {}).get('instance_path', default_instance_path), @@ -220,7 +214,7 @@ def inject_current_year() -> dict[str, Any]: def remove_pipeline_storage_directory(storage_path: Path) -> bool: try: - storage_path = expand_path(storage_path) + storage_path = normalize_path(storage_path) shutil.rmtree(storage_path) flash(pgettext('flash', 'Directory removed: %(path)s', path=storage_path)) except FileNotFoundError: @@ -423,7 +417,7 @@ def start_pipeline(name: str, pipeline: dict[str, Any], template_name: str) -> N sources = learn2rag.data.get_entries(app.instance_path, 'sources', pipeline['sources']) for path_name, source in sources.items(): if 'path' in source: - source['path'] = str(expand_path(source['path'])) + source['path'] = str(normalize_path(source['path'])) # Fetch the language model configuration first let see if it works language_model = learn2rag.data.get_entry(app.instance_path, 'models', pipeline['language_model']) @@ -520,7 +514,7 @@ def pipeline_logs(name: str, file: str) -> 'str | werkzeug.wrappers.response.Res if pipeline is None: flash(pgettext('flash', 'The requested pipeline is not found'), 'error') elif file in ['debug.log', 'error.log']: - storage_path = expand_path(pipeline['storage_path']) + storage_path = normalize_path(pipeline['storage_path']) log_file = storage_path / 'logs' / file try: content = log_file.read_text() @@ -578,18 +572,6 @@ def shutdown() -> None: os.kill(os.getpid(), signal.SIGTERM) -def webbrowser_open(url: str) -> None: - try: - if platform.system() == 'Windows': - subprocess.Popen(['explorer', url]) - else: - subprocess.Popen(['xdg-open', url]) - except FileNotFoundError: - pass - except Exception as e: - print(e) - - def main(config: dict[str, Any]) -> None: app = create_app(config=config) @@ -616,7 +598,7 @@ def main(config: dict[str, Any]) -> None: protocol = 'https' if use_https else 'http' url = f"{protocol}://localhost:{port}" - webbrowser_open(url) + open_web_browser(url) logging.info('*' * 40) logging.info('Learn2RAG: ' + url) logging.info('*' * 40) diff --git a/learn2rag/utils/__init__.py b/learn2rag/utils/__init__.py new file mode 100644 index 0000000..cd3d167 --- /dev/null +++ b/learn2rag/utils/__init__.py @@ -0,0 +1,42 @@ +''' +Utilities which do not depend on Learn2RAG. +''' +import logging +import platform +import os +import subprocess +from pathlib import Path + +import xdg.BaseDirectory + + +def is_windows() -> bool: + return platform.system() == 'Windows' + + +def normalize_path(path: Path) -> Path: + 'Expand ~ and ~user constructs; make the path absolute, resolving symlinks' + return Path(path).expanduser().resolve() + + +def open_web_browser(url: str) -> None: + 'Tries to open the specified URL in a web browser' + try: + if not is_windows(): + subprocess.Popen(['xdg-open', url]) + else: + subprocess.Popen(['explorer', url]) + except FileNotFoundError: + pass + except Exception: + logging.error('Unable to open the web browser', exc_info=True) + + +def save_data_path(*resource: str) -> str: + 'Returns the application data path for the specified resource' + if not is_windows(): + return xdg.BaseDirectory.save_data_path(*resource) + else: + windows_app_data = os.getenv('LOCALAPPDATA') + assert windows_app_data is not None + return os.path.join(windows_app_data, *resource) diff --git a/learn2rag/utils/tests/__init__.py b/learn2rag/utils/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/learn2rag/utils/tests/test_utils.py b/learn2rag/utils/tests/test_utils.py new file mode 100644 index 0000000..92d9f88 --- /dev/null +++ b/learn2rag/utils/tests/test_utils.py @@ -0,0 +1,25 @@ +import unittest +from getpass import getuser +from pathlib import Path + +from .. import ( + is_windows, + normalize_path, + save_data_path, +) + + +class UtilsTestCase(unittest.TestCase): + @unittest.skipIf(is_windows(), 'This test is not adapted for Windows') + def test_normalize_path(self) -> None: + username = getuser() + assert str(normalize_path(Path('~' + username))).startswith('/') + assert str(normalize_path(Path('.'))).startswith('/') + with self.assertRaises(ValueError): + str(normalize_path(Path('..'))).index('..') + + def test_save_data_path(self) -> None: + path = Path(save_data_path('Learn2RAG', 'tests')) + assert path.exists() + assert path.is_dir() + (path / 'writeable').touch() From ed0b571bea0a7896b3dc197e9cba9c9fe0c461ac Mon Sep 17 00:00:00 2001 From: denkv Date: Wed, 3 Jun 2026 15:19:40 +0200 Subject: [PATCH 06/14] Fix the storage path for a language model download process --- .gitignore | 1 - learn2rag/ui/__init__.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index 1fc8657..6c4c62f 100644 --- a/.gitignore +++ b/.gitignore @@ -19,7 +19,6 @@ __pycache__ /config.yml #generate during install step compose.db -compose.yml /learn2rag/pipeline/data/loaded_documents_kcenter.json /.env /.local.envrc diff --git a/learn2rag/ui/__init__.py b/learn2rag/ui/__init__.py index 905c635..f9e8250 100644 --- a/learn2rag/ui/__init__.py +++ b/learn2rag/ui/__init__.py @@ -275,7 +275,7 @@ def model_create() -> 'str | werkzeug.wrappers.response.Response': if request.form.get('ollama') == 'pull': if model.find(':') == -1: model += ':latest' - start_project('ollama_download', components_template_path / 'ollama-download.yml', Path(), {'model': model}) + start_project('ollama_download', components_template_path / 'ollama-download.yml', Path(app.instance_path) / 'ollama_download', {'model': model}) return flask_redirect(url_for('model_pulling', model=model)) elif api == learn2rag.pipeline.llm.OpenAIClient.ID: url = request.form['url'] From 9a480f2ec3663df45805c5b096eeefb1e5b27828 Mon Sep 17 00:00:00 2001 From: denkv Date: Wed, 3 Jun 2026 15:45:04 +0200 Subject: [PATCH 07/14] Move templating into compose submodule --- learn2rag/compose/__init__.py | 16 +++++++++++++--- learn2rag/ui/__init__.py | 17 +++++++---------- 2 files changed, 20 insertions(+), 13 deletions(-) diff --git a/learn2rag/compose/__init__.py b/learn2rag/compose/__init__.py index b735789..688c969 100644 --- a/learn2rag/compose/__init__.py +++ b/learn2rag/compose/__init__.py @@ -10,6 +10,7 @@ import urllib.request from typing import Any, Optional +import jinja2 import psutil import yaml @@ -109,9 +110,18 @@ class Project(): content: dict[str, Any] @staticmethod - def create(project_file: str | Path, name: str) -> 'Project | None': - with open(project_file) as f: - content = yaml.safe_load(f) + def create( + compose_file: str | Path, + name: str, + *, + template: bool = False, + template_context: dict[str, Any] = {}, + ) -> 'Project | None': + if template: + content = yaml.safe_load(jinja2.Template(Path(compose_file).read_text()).render(template_context)) + else: + with open(compose_file) as f: + content = yaml.safe_load(f) assert len(content['services']) > 0 cur = con.cursor() cur.execute('BEGIN EXCLUSIVE') diff --git a/learn2rag/ui/__init__.py b/learn2rag/ui/__init__.py index f9e8250..15248d2 100644 --- a/learn2rag/ui/__init__.py +++ b/learn2rag/ui/__init__.py @@ -17,7 +17,6 @@ from flask import Flask, flash, redirect as flask_redirect, render_template, request, make_response, url_for from flask_babel import Babel, gettext, ngettext, pgettext # type: ignore[import-untyped] import flask.logging -import jinja2 import ollama import uvicorn import yaml @@ -54,20 +53,18 @@ def start_project(name: str, template_file: Path, storage_path: Path, render_con logging.debug('UI starting project: %s', name) storage_path = normalize_path(storage_path) logging.debug('Storage path: %s', storage_path) + project = None + if project := Project.get(name): + assert not project.running + project.remove() + storage_path.mkdir(parents=True, exist_ok=True) - project_file = storage_path / 'compose.yml' - template = jinja2.Template(template_file.read_text()) - project_file.write_text(template.render(render_context | { + project = Project.create(template_file, name, template=True, template_context=render_context | { 'is_windows': is_windows(), 'learn2rag_path': Path('.').absolute(), 'storage_path': storage_path, - })) - project = None - if project := Project.get(name): - assert not project.running - project.remove() - project = Project.create(project_file, name) + }) assert project is not None, 'project should not be None' project.start() return project From 492f40965c47ad3788f501f3331ba942be7248b6 Mon Sep 17 00:00:00 2001 From: denkv Date: Wed, 3 Jun 2026 15:48:52 +0200 Subject: [PATCH 08/14] Move path normalization --- learn2rag/ui/__init__.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/learn2rag/ui/__init__.py b/learn2rag/ui/__init__.py index 15248d2..b235a00 100644 --- a/learn2rag/ui/__init__.py +++ b/learn2rag/ui/__init__.py @@ -51,7 +51,6 @@ def redirect(url: str) -> 'werkzeug.wrappers.response.Response': def start_project(name: str, template_file: Path, storage_path: Path, render_context: dict[str, Any]={}) -> Project: logging.debug('UI starting project: %s', name) - storage_path = normalize_path(storage_path) logging.debug('Storage path: %s', storage_path) project = None if project := Project.get(name): @@ -466,7 +465,7 @@ def start_pipeline(name: str, pipeline: dict[str, Any], template_name: str) -> N ports = find_free_ports(len(port_names), configured_ports=configured_ports, preferred_ports=app.config.get('PREFERRED_PORTS', range(9001, 9011))) render_context['ports'] = dict(zip(port_names, ports)) - storage_path = Path(pipeline['storage_path']) + storage_path = normalize_path(pipeline['storage_path']) try: project = start_project(name, template_file, storage_path, render_context) From 898815ed71785861a29fc42ef8a14cf3655fdb10 Mon Sep 17 00:00:00 2001 From: denkv Date: Mon, 8 Jun 2026 14:27:47 +0200 Subject: [PATCH 09/14] Refactor qdrant initialization --- learn2rag/pipeline/qdrant.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/learn2rag/pipeline/qdrant.py b/learn2rag/pipeline/qdrant.py index 2a65fd2..1297e75 100644 --- a/learn2rag/pipeline/qdrant.py +++ b/learn2rag/pipeline/qdrant.py @@ -6,11 +6,14 @@ from .config import user_config +api_key = os.environ.get('QDRANT__SERVICE__API_KEY') +location = os.environ.get('QDRANT_LOCATION', 'http://localhost:6336') + class Qdrant: client = QdrantClient( - location=os.environ.get('QDRANT_LOCATION', 'http://localhost:6336'), - api_key=os.environ.get('QDRANT__SERVICE__API_KEY'), + location=location, + api_key=api_key, ) def __init__(self, collection_name: str, opt_config: dict[str, Any]) -> None: From bbc8368c8a804b355025a846ac37cef728005d33 Mon Sep 17 00:00:00 2001 From: denkv Date: Mon, 8 Jun 2026 14:36:46 +0200 Subject: [PATCH 10/14] Allow to use qdrant with persistence path --- learn2rag/pipeline/qdrant.py | 4 +++- learn2rag/ui/templates/compose/pipelines/continuous.yml | 2 +- learn2rag/ui/templates/compose/pipelines/import.yml | 3 ++- learn2rag/ui/templates/compose/pipelines/pipeline.yml | 3 ++- 4 files changed, 8 insertions(+), 4 deletions(-) diff --git a/learn2rag/pipeline/qdrant.py b/learn2rag/pipeline/qdrant.py index 1297e75..510507e 100644 --- a/learn2rag/pipeline/qdrant.py +++ b/learn2rag/pipeline/qdrant.py @@ -7,13 +7,15 @@ from .config import user_config api_key = os.environ.get('QDRANT__SERVICE__API_KEY') -location = os.environ.get('QDRANT_LOCATION', 'http://localhost:6336') +path = os.environ.get('QDRANT_PATH') or None +location = None if path else os.environ.get('QDRANT_LOCATION', 'http://localhost:6336') class Qdrant: client = QdrantClient( location=location, api_key=api_key, + path=path, ) def __init__(self, collection_name: str, opt_config: dict[str, Any]) -> None: diff --git a/learn2rag/ui/templates/compose/pipelines/continuous.yml b/learn2rag/ui/templates/compose/pipelines/continuous.yml index d92fb9d..11c3875 100644 --- a/learn2rag/ui/templates/compose/pipelines/continuous.yml +++ b/learn2rag/ui/templates/compose/pipelines/continuous.yml @@ -161,7 +161,7 @@ services: healthcheck: # TODO: We only support ['CMD', 'curl', '-f', ...] test: ['CMD', 'curl', '-f', '{{learn2rag_scheme}}://localhost:{{ports.ui}}/health'] - #!!! {% if not pipeline.qdrant_location %} + #!!! {% if not pipeline.qdrant_location and not pipeline.qdrant_path %} qdrant: working_dir: '{{storage_path}}' command: diff --git a/learn2rag/ui/templates/compose/pipelines/import.yml b/learn2rag/ui/templates/compose/pipelines/import.yml index 459225a..fe9302b 100644 --- a/learn2rag/ui/templates/compose/pipelines/import.yml +++ b/learn2rag/ui/templates/compose/pipelines/import.yml @@ -99,7 +99,7 @@ files: content: '' services: - #!!! {% if not pipeline.qdrant_location %} + #!!! {% if not pipeline.qdrant_location and not pipeline.qdrant_path %} qdrant: working_dir: '{{storage_path}}' command: @@ -121,6 +121,7 @@ services: LEARN2RAG_PATH: '{{learn2rag_path}}' STORAGE_PATH: '{{storage_path}}' QDRANT_LOCATION: '{{ pipeline.qdrant_location or "http://localhost:" ~ ports.qdrant_http }}' + QDRANT_PATH: '{{ pipeline.qdrant_path }}' QDRANT__SERVICE__API_KEY: '{{qdrant_api_key}}' PIPELINE_USER_CONFIG: '{{storage_path}}/basic_user_config.json' IMPORTER_CONFIG: '{{storage_path}}/importer_config.json' diff --git a/learn2rag/ui/templates/compose/pipelines/pipeline.yml b/learn2rag/ui/templates/compose/pipelines/pipeline.yml index 674ec9e..9d9fbeb 100644 --- a/learn2rag/ui/templates/compose/pipelines/pipeline.yml +++ b/learn2rag/ui/templates/compose/pipelines/pipeline.yml @@ -159,7 +159,7 @@ services: healthcheck: # TODO: We only support ['CMD', 'curl', '-f', ...] test: ['CMD', 'curl', '-f', '{{learn2rag_scheme}}://localhost:{{ports.ui}}/health'] - #!!! {% if not pipeline.qdrant_location %} + #!!! {% if not pipeline.qdrant_location and not pipeline.qdrant_path %} qdrant: working_dir: '{{storage_path}}' command: @@ -178,6 +178,7 @@ services: LEARN2RAG_PATH: '{{learn2rag_path}}' LEARN2RAG_PIPELINE_PORT: '{{ports.pipeline}}' QDRANT_LOCATION: '{{ pipeline.qdrant_location or "http://localhost:" ~ ports.qdrant_http }}' + QDRANT_PATH: '{{ pipeline.qdrant_path }}' QDRANT__SERVICE__API_KEY: '{{qdrant_api_key}}' PIPELINE_USER_CONFIG: '{{storage_path}}/basic_user_config.json' IMPORTER_CONFIG: '{{storage_path}}/importer_config.json' From 7a1df06dbb00e38f3d1a4de6328643a57234aacd Mon Sep 17 00:00:00 2001 From: denkv Date: Mon, 8 Jun 2026 14:29:17 +0200 Subject: [PATCH 11/14] Use the same format for all configurations --- learn2rag/ui/templates/compose/pipelines/pipeline.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/learn2rag/ui/templates/compose/pipelines/pipeline.yml b/learn2rag/ui/templates/compose/pipelines/pipeline.yml index 9d9fbeb..2e61204 100644 --- a/learn2rag/ui/templates/compose/pipelines/pipeline.yml +++ b/learn2rag/ui/templates/compose/pipelines/pipeline.yml @@ -14,7 +14,7 @@ files: service: api_key: '{{qdrant_api_key}}' grpc_port: null - http_port: '{{ports.qdrant_http}}' + http_port: {{ports.qdrant_http}} host: '127.0.0.1' telemetry_disabled: true - path: '{{storage_path}}/basic_user_config.json' From 2719adf3b6a6ab10cf76ec61f35cf44af0419659 Mon Sep 17 00:00:00 2001 From: denkv Date: Mon, 8 Jun 2026 14:31:32 +0200 Subject: [PATCH 12/14] Do not run open-webui if port is not set --- learn2rag/ui/templates/compose/pipelines/continuous.yml | 2 ++ learn2rag/ui/templates/compose/pipelines/pipeline.yml | 2 ++ 2 files changed, 4 insertions(+) diff --git a/learn2rag/ui/templates/compose/pipelines/continuous.yml b/learn2rag/ui/templates/compose/pipelines/continuous.yml index 11c3875..bd4254f 100644 --- a/learn2rag/ui/templates/compose/pipelines/continuous.yml +++ b/learn2rag/ui/templates/compose/pipelines/continuous.yml @@ -102,6 +102,7 @@ files: content: '' services: + #!!! {% if ports.ui %} open-webui: working_dir: '{{storage_path}}' command: @@ -161,6 +162,7 @@ services: healthcheck: # TODO: We only support ['CMD', 'curl', '-f', ...] test: ['CMD', 'curl', '-f', '{{learn2rag_scheme}}://localhost:{{ports.ui}}/health'] + #!!! {% endif %} #!!! {% if not pipeline.qdrant_location and not pipeline.qdrant_path %} qdrant: working_dir: '{{storage_path}}' diff --git a/learn2rag/ui/templates/compose/pipelines/pipeline.yml b/learn2rag/ui/templates/compose/pipelines/pipeline.yml index 2e61204..1c77961 100644 --- a/learn2rag/ui/templates/compose/pipelines/pipeline.yml +++ b/learn2rag/ui/templates/compose/pipelines/pipeline.yml @@ -100,6 +100,7 @@ files: content: '' services: + #!!! {% if ports.ui %} open-webui: working_dir: '{{storage_path}}' command: @@ -159,6 +160,7 @@ services: healthcheck: # TODO: We only support ['CMD', 'curl', '-f', ...] test: ['CMD', 'curl', '-f', '{{learn2rag_scheme}}://localhost:{{ports.ui}}/health'] + #!!! {% endif %} #!!! {% if not pipeline.qdrant_location and not pipeline.qdrant_path %} qdrant: working_dir: '{{storage_path}}' From 59d5484f4b1d194e4b06cd371937778847fba350 Mon Sep 17 00:00:00 2001 From: denkv Date: Mon, 8 Jun 2026 14:32:04 +0200 Subject: [PATCH 13/14] Add waitUntil from pytestqt --- learn2rag/utils/__init__.py | 77 +++++++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) diff --git a/learn2rag/utils/__init__.py b/learn2rag/utils/__init__.py index cd3d167..2951d34 100644 --- a/learn2rag/utils/__init__.py +++ b/learn2rag/utils/__init__.py @@ -6,6 +6,8 @@ import os import subprocess from pathlib import Path +from time import sleep +from typing import Callable, Optional import xdg.BaseDirectory @@ -40,3 +42,78 @@ def save_data_path(*resource: str) -> str: windows_app_data = os.getenv('LOCALAPPDATA') assert windows_app_data is not None return os.path.join(windows_app_data, *resource) + + +# adapted from pytestqt +def waitUntil( + callback: Callable[[], Optional[bool]], *, timeout: int = 5000 +) -> None: + """ + .. versionadded:: 2.0 + + Wait in a busy loop, calling the given callback periodically until timeout is reached. + + ``callback()`` should raise ``AssertionError`` to indicate that the desired condition + has not yet been reached, or just return ``None`` when it does. Useful to ``assert`` until + some condition is satisfied: + + .. code-block:: python + + def view_updated(): + assert view_model.count() > 10 + + + qtbot.waitUntil(view_updated) + + Another possibility is for ``callback()`` to return ``True`` when the desired condition + is met, ``False`` otherwise. Useful specially with ``lambda`` for terser code, but keep + in mind that the error message in those cases is usually not very useful because it is + not using an ``assert`` expression. + + .. code-block:: python + + qtbot.waitUntil(lambda: view_model.count() > 10) + + Note that this usage only accepts returning actual ``True`` and ``False`` values, + so returning an empty list to express "falseness" raises a ``ValueError``. + + :param callback: callable that will be called periodically. + :param timeout: timeout value in ms. + :raises ValueError: if the return value from the callback is anything other than ``None``, + ``True`` or ``False``. + + .. note:: This method is also available as ``wait_until`` (pep-8 alias) + """ + __tracebackhide__ = True + import time + + start = time.time() + + def timed_out() -> bool: + elapsed = time.time() - start + elapsed_ms = elapsed * 1000 + return elapsed_ms > timeout + + timeout_msg = f"waitUntil timed out in {timeout} milliseconds" + + while True: + try: + result = callback() + except AssertionError as e: + if timed_out(): + raise TimeoutError(timeout_msg) from e + else: + if result not in (None, True, False): + msg = "waitUntil() callback must return None, True or False, returned %r" + raise ValueError(msg % result) + + # 'assert' form + if result is None: + return + + # 'True/False' form + if result: + return + if timed_out(): + raise TimeoutError(timeout_msg) + sleep(10) From 7e0c91f17dc6edcaeef808dba46ea6e9f4b28af3 Mon Sep 17 00:00:00 2001 From: denkv Date: Mon, 8 Jun 2026 14:35:08 +0200 Subject: [PATCH 14/14] Add a test for running import and RAG --- learn2rag/tests/__init__.py | 0 learn2rag/tests/data/rabbits.txt | 7 ++ learn2rag/tests/test_learn2rag.py | 102 ++++++++++++++++++++++++++++++ 3 files changed, 109 insertions(+) create mode 100644 learn2rag/tests/__init__.py create mode 100644 learn2rag/tests/data/rabbits.txt create mode 100644 learn2rag/tests/test_learn2rag.py diff --git a/learn2rag/tests/__init__.py b/learn2rag/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/learn2rag/tests/data/rabbits.txt b/learn2rag/tests/data/rabbits.txt new file mode 100644 index 0000000..3d814e5 --- /dev/null +++ b/learn2rag/tests/data/rabbits.txt @@ -0,0 +1,7 @@ +Rabbits or bunnies are small mammals in the family Leporidae (which also includes the hares), which is in the order Lagomorpha (which also includes pikas). They are familiar throughout the world as a small herbivore, a prey animal, a domesticated form of livestock, and a pet, having a widespread effect on ecologies and cultures. The most widespread rabbit genera are Oryctolagus and Sylvilagus. The former, Oryctolagus, includes the European rabbit, Oryctolagus cuniculus, which is the ancestor of the hundreds of breeds of domestic rabbit and has been introduced on every continent except Antarctica. The latter, Sylvilagus, includes over 13 wild rabbit species, among them the cottontails and tapetis. Wild rabbits not included in Oryctolagus and Sylvilagus include several species of limited distribution, including the pygmy rabbit, volcano rabbit, and Sumatran striped rabbit. + +Rabbits are a paraphyletic grouping, and do not constitute a clade, as hares (belonging to the genus Lepus) are nested within the Leporidae clade and are not described as rabbits. Although once considered rodents, lagomorphs diverged earlier and have a number of traits rodents lack, including two extra incisors. Similarities between rabbits and rodents were once attributed to convergent evolution, but studies in molecular biology have found a common ancestor between lagomorphs and rodents and place them in the clade Glires. + +Rabbit physiology is suited to escaping predators and surviving in various habitats, living either alone or in groups in nests or burrows. As prey animals, rabbits are constantly aware of their surroundings, having a wide field of vision and ears with high surface area to detect potential predators. The ears of a rabbit are essential for thermoregulation and contain a high density of blood vessels. The bone structure of a rabbit's hind legs, which is longer than that of the fore legs, allows for quick hopping, which is beneficial for escaping predators and can provide powerful kicks if captured. Rabbits are typically nocturnal and often sleep with their eyes open. They reproduce quickly, having short pregnancies, large litters of four to twelve kits, and no particular mating season; however, the mortality rate of rabbit embryos is high, and there exist several widespread diseases that affect rabbits, such as rabbit hemorrhagic disease and myxomatosis. In some regions, especially Australia, rabbits have caused ecological problems and are regarded as a pest. + +Humans have used rabbits as livestock since at least the first century BC in ancient Rome, raising them for their meat, fur and wool. The various breeds of the European rabbit have been developed to suit each of these products; the practice of raising and breeding rabbits as livestock is known as cuniculture. Rabbits are seen in human culture globally, appearing as a symbol of fertility, cunning, and innocence in major religions, historical and contemporary art. diff --git a/learn2rag/tests/test_learn2rag.py b/learn2rag/tests/test_learn2rag.py new file mode 100644 index 0000000..095de51 --- /dev/null +++ b/learn2rag/tests/test_learn2rag.py @@ -0,0 +1,102 @@ +import shutil +from pathlib import Path +from unittest import TestCase +from typing import Any + +from ..compose import Project +from ..utils import is_windows, save_data_path, waitUntil + +from openai import APIConnectionError, OpenAI + +template_dir = Path(__file__).resolve().parent.parent / 'ui' / 'templates' / 'compose' / 'pipelines' +data_dir = Path(__file__).resolve().parent / 'data' + + +class Learn2RAGTestCase(TestCase): + openai_client: Any + project_name: str + rag_port: int + storage_path: Path + + def setUp(self) -> None: + self.project_name = 'test' + self.rag_port = 5002 + self.storage_path = Path(save_data_path('Learn2RAG', 'tests')) + self.storage_path.mkdir(parents=True, exist_ok=True) + self.openai_client = OpenAI( + api_key='', + base_url=f'http://localhost:{self.rag_port}', + max_retries=0, + ) + if project := Project.get(self.project_name): + if project.running: + project.stop() + project.remove() + + def tearDown(self) -> None: + if self.storage_path is not None: + shutil.rmtree(self.storage_path, ignore_errors=True) + if project := Project.get(self.project_name): + if project.running: + project.stop() + project.remove() + + def test_learn2rag(self) -> None: + template_context = { + 'is_windows': is_windows(), + 'learn2rag_path': Path('.').absolute(), + 'storage_path': self.storage_path, + 'ports': { + 'pipeline': self.rag_port, + }, + 'qdrant_api_key': '', + 'language_model': {'api': 'ChatFake'}, + 'pipeline': { + 'qdrant_path': self.storage_path / 'qdrant_persistence', + }, + 'import_config': { + 'loaders': [ + { + 'loader_id': 'local_test', + 'loader_type': 'DirectoryLoader', + 'recursive': 'True', + 'path': str(data_dir), + }, + ], + }, + } + + project = Project.create(template_dir / 'import.yml', self.project_name, template=True, template_context=template_context) + assert project is not None, 'project should not be None' + project.start() + assert project.running + + def check_import() -> None: + project = Project.get(self.project_name) + assert project is not None + assert not project.running + waitUntil(check_import, timeout=1 * 60 * 1000) + + project.remove() + + project = Project.create(template_dir / 'pipeline.yml', self.project_name, template=True, template_context=template_context) + assert project is not None, 'project should not be None' + project.start() + assert project.running + + def check_rag() -> None: + try: + completion = self.openai_client.chat.completions.create( + model='learn2rag', + messages=[ + {'role': 'user', 'content': f'What are rabbits?'}, + ], + ) + content = completion.choices[-1].message.content + assert 'for testing only' in content, 'contains test marker' + assert "Information:\\n" in content, 'contains the prompt' + assert not content.endswith("Information:\\n"), 'contains any document chunks in the prompt' + assert 'Lagomorpha' in content, 'specific text from a test file' + except APIConnectionError: + assert False + waitUntil(check_rag, timeout=1 * 60 * 1000)