diff --git a/README.md b/README.md index 44bacdfc0..18f025630 100644 --- a/README.md +++ b/README.md @@ -125,6 +125,8 @@ user_data_collection: feedback_storage: "/tmp/data/feedback" transcripts_enabled: true transcripts_storage: "/tmp/data/transcripts" + config_enabled: true + config_storage: "/tmp/data/config" ``` ### MCP Server and Tool Configuration @@ -241,6 +243,8 @@ user_data_collection: feedback_storage: "/tmp/data/feedback" transcripts_enabled: true transcripts_storage: "/tmp/data/transcripts" + config_enabled: true + config_storage: "/tmp/data/config" ``` ## User data collection @@ -255,6 +259,8 @@ user_data_collection: feedback_storage: "/tmp/data/feedback" transcripts_enabled: true transcripts_storage: "/tmp/data/transcripts" + config_enabled: true + config_storage: "/tmp/data/config" ``` **Configuration options:** @@ -263,6 +269,8 @@ user_data_collection: - `feedback_storage`: Directory path where feedback JSON files are stored - `transcripts_enabled`: Enable/disable collection of conversation transcripts - `transcripts_storage`: Directory path where transcript JSON files are stored +- `config_enabled`: Enable/disable collection of service configuration at startup +- `config_storage`: Directory path where configuration JSON files are stored > **Note**: The data collection system is designed to be extensible. Additional data types can be configured and collected as needed for your specific use case. @@ -650,6 +658,8 @@ The Lightspeed Core Stack integrates with the [lightspeed-to-dataverse-exporter] feedback_storage: "/shared/data/feedback" transcripts_enabled: true transcripts_storage: "/shared/data/transcripts" + config_enabled: true + config_storage: "/shared/data/config" ``` 2. **Deploy the exporter service** pointing to the same data directories diff --git a/docs/deployment_guide.md b/docs/deployment_guide.md index b3467eef4..afb9d1db9 100644 --- a/docs/deployment_guide.md +++ b/docs/deployment_guide.md @@ -1099,6 +1099,8 @@ user_data_collection: feedback_storage: "/tmp/data/feedback" transcripts_enabled: true transcripts_storage: "/tmp/data/transcripts" + config_enabled: true + config_storage: "/tmp/data/config" authentication: module: "noop" @@ -1254,6 +1256,8 @@ user_data_collection: feedback_storage: "/tmp/data/feedback" transcripts_enabled: true transcripts_storage: "/tmp/data/transcripts" + config_enabled: true + config_storage: "/tmp/data/config" authentication: module: "noop" diff --git a/docs/getting_started.md b/docs/getting_started.md index cdc5e9d08..c8f9d0cf7 100644 --- a/docs/getting_started.md +++ b/docs/getting_started.md @@ -264,6 +264,8 @@ user_data_collection: feedback_storage: "/tmp/data/feedback" transcripts_enabled: true transcripts_storage: "/tmp/data/transcripts" + config_enabled: true + config_storage: "/tmp/data/config" authentication: module: "noop" diff --git a/examples/lightspeed-stack-lls-external.yaml b/examples/lightspeed-stack-lls-external.yaml index 35c022e9f..38ff18262 100644 --- a/examples/lightspeed-stack-lls-external.yaml +++ b/examples/lightspeed-stack-lls-external.yaml @@ -15,6 +15,8 @@ user_data_collection: feedback_storage: "/tmp/data/feedback" transcripts_enabled: true transcripts_storage: "/tmp/data/transcripts" + config_enabled: true + config_storage: "/tmp/data/config" data_collector: enabled: false ingress_server_url: null diff --git a/examples/lightspeed-stack-lls-library.yaml b/examples/lightspeed-stack-lls-library.yaml index 984c7c032..7310b8895 100644 --- a/examples/lightspeed-stack-lls-library.yaml +++ b/examples/lightspeed-stack-lls-library.yaml @@ -14,6 +14,8 @@ user_data_collection: feedback_storage: "/tmp/data/feedback" transcripts_enabled: true transcripts_storage: "/tmp/data/transcripts" + config_enabled: true + config_storage: "/tmp/data/config" data_collector: enabled: false ingress_server_url: null diff --git a/lightspeed-stack.yaml b/lightspeed-stack.yaml index d7d3f571d..fcd3abcf4 100644 --- a/lightspeed-stack.yaml +++ b/lightspeed-stack.yaml @@ -20,6 +20,8 @@ user_data_collection: feedback_storage: "/tmp/data/feedback" transcripts_enabled: true transcripts_storage: "/tmp/data/transcripts" + config_enabled: true + config_storage: "/tmp/data/config" authentication: module: "noop" diff --git a/src/app/endpoints/config.py b/src/app/endpoints/config.py index fec854294..67f4d4425 100644 --- a/src/app/endpoints/config.py +++ b/src/app/endpoints/config.py @@ -40,6 +40,8 @@ "feedback_storage": "/tmp/data/feedback", "transcripts_enabled": False, "transcripts_storage": None, + "config_enabled": True, + "config_storage": "/tmp/data/config", }, "mcp_servers": [ {"name": "server1", "provider_id": "provider1", "url": "http://url.com:1"}, diff --git a/src/lightspeed_stack.py b/src/lightspeed_stack.py index cf47c2f91..b314e006e 100644 --- a/src/lightspeed_stack.py +++ b/src/lightspeed_stack.py @@ -6,12 +6,18 @@ from argparse import ArgumentParser import asyncio +import json import logging +from datetime import datetime, timezone +from pathlib import Path from rich.logging import RichHandler + from runners.uvicorn import start_uvicorn from configuration import configuration from client import AsyncLlamaStackClientHolder +from utils import suid +import version FORMAT = "%(message)s" logging.basicConfig( @@ -21,6 +27,42 @@ logger = logging.getLogger(__name__) +def store_config(cfg_file: str) -> None: + """Store service configuration in the local filesystem. + + This function stores the original configuration file content once at startup. + Since the configuration is immutable for a single service deployment, + this avoids duplicating the same config data in every transcript/feedback. + + Args: + cfg_file: Path to the original configuration file. + """ + with open(cfg_file, "r", encoding="utf-8") as f: + config_content = f.read() + + data_to_store = { + "metadata": { + "timestamp": datetime.now(timezone.utc).isoformat(), + "service_version": version.__version__, + "config_file_path": cfg_file, + }, + "configuration": config_content, + } + + # Store the data in the local filesystem + config_storage = configuration.user_data_collection_configuration.config_storage + if config_storage is None: + raise ValueError("config_storage must be set when config collection is enabled") + storage_path = Path(config_storage) + storage_path.mkdir(parents=True, exist_ok=True) + config_file_path = storage_path / f"{suid.get_suid()}.json" + + with open(config_file_path, "w", encoding="utf-8") as config_file: + json.dump(data_to_store, config_file, indent=2) + + logger.info("Service configuration stored in '%s'", config_file_path) + + def create_argument_parser() -> ArgumentParser: """Create and configure argument parser object.""" parser = ArgumentParser() @@ -62,6 +104,13 @@ def main() -> None: logger.info( "Llama stack configuration: %s", configuration.llama_stack_configuration ) + + # store service configuration if enabled + if configuration.user_data_collection_configuration.config_enabled: + store_config(args.config_file) + else: + logger.debug("Config collection is disabled in configuration") + logger.info("Creating AsyncLlamaStackClient") asyncio.run( AsyncLlamaStackClientHolder().load(configuration.configuration.llama_stack) diff --git a/src/models/config.py b/src/models/config.py index bda9699ad..124f75885 100644 --- a/src/models/config.py +++ b/src/models/config.py @@ -205,6 +205,8 @@ class UserDataCollection(BaseModel): feedback_storage: Optional[str] = None transcripts_enabled: bool = False transcripts_storage: Optional[str] = None + config_enabled: bool = False + config_storage: Optional[str] = None @model_validator(mode="after") def check_storage_location_is_set_when_needed(self) -> Self: @@ -215,6 +217,10 @@ def check_storage_location_is_set_when_needed(self) -> Self: raise ValueError( "transcripts_storage is required when transcripts is enabled" ) + if self.config_enabled and self.config_storage is None: + raise ValueError( + "config_storage is required when config collection is enabled" + ) return self diff --git a/tests/configuration/lightspeed-stack.yaml b/tests/configuration/lightspeed-stack.yaml index d2b4ab1fa..69934304d 100644 --- a/tests/configuration/lightspeed-stack.yaml +++ b/tests/configuration/lightspeed-stack.yaml @@ -32,6 +32,10 @@ llama_stack: user_data_collection: feedback_enabled: true feedback_storage: "/tmp/data/feedback" + transcripts_enabled: true + transcripts_storage: "/tmp/data/transcripts" + config_enabled: true + config_storage: "/tmp/data/config" mcp_servers: - name: "server1" provider_id: "provider1" diff --git a/tests/unit/models/test_config.py b/tests/unit/models/test_config.py index 02a976c68..fc2629409 100644 --- a/tests/unit/models/test_config.py +++ b/tests/unit/models/test_config.py @@ -598,6 +598,8 @@ def test_dump_configuration(tmp_path) -> None: "feedback_storage": None, "transcripts_enabled": False, "transcripts_storage": None, + "config_enabled": False, + "config_storage": None, }, "mcp_servers": [], "authentication": { diff --git a/tests/unit/test_lightspeed_stack.py b/tests/unit/test_lightspeed_stack.py index 6f6ed41d7..3d6718462 100644 --- a/tests/unit/test_lightspeed_stack.py +++ b/tests/unit/test_lightspeed_stack.py @@ -1,6 +1,13 @@ """Unit tests for functions defined in src/lightspeed_stack.py.""" -from lightspeed_stack import create_argument_parser +import json +import tempfile +from pathlib import Path +from unittest.mock import MagicMock, patch + +import pytest + +from lightspeed_stack import create_argument_parser, store_config def test_create_argument_parser(): @@ -8,3 +15,223 @@ def test_create_argument_parser(): arg_parser = create_argument_parser() # nothing more to test w/o actual parsing is done assert arg_parser is not None + + +@pytest.fixture +def config_storage_path(tmpdir): + """Fixture provides a temporary config storage location.""" + return (tmpdir / "config").strpath + + +@pytest.fixture +def mock_configuration(config_storage_path): # pylint: disable=redefined-outer-name + """Fixture provides a mock configuration object for testing.""" + mock_config = MagicMock() + mock_config.user_data_collection_configuration.config_storage = config_storage_path + return mock_config + + +@pytest.fixture +def sample_config_file(): + """Create a temporary config file with sample content.""" + config_content = """# Sample configuration +name: Lightspeed Core Service (LCS) +service: + host: localhost + port: 8080 + auth_enabled: false + workers: 1 + color_log: true + access_log: true +llama_stack: + use_as_library_client: true + library_client_config_path: run.yaml +user_data_collection: + feedback_enabled: true + feedback_storage: "/tmp/data/feedback" + transcripts_enabled: true + transcripts_storage: "/tmp/data/transcripts" + config_enabled: true + config_storage: "/tmp/data/config" +authentication: + module: "noop" +""" + with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f: + f.write(config_content) + f.flush() + yield f.name + # Cleanup + Path(f.name).unlink(missing_ok=True) + + +@patch("lightspeed_stack.configuration") +def test_store_config_enabled( + mock_configuration_module, + config_storage_path, + sample_config_file, + mock_configuration, +): # pylint: disable=redefined-outer-name + """Test that config is stored when enabled.""" + mock_configuration_module.user_data_collection_configuration = ( + mock_configuration.user_data_collection_configuration + ) + + store_config(sample_config_file) + + # Verify that a config file was created + config_files = list(Path(config_storage_path).glob("*.json")) + assert len(config_files) == 1, f"Expected 1 config file, found {len(config_files)}" + + # Verify the content + with open(config_files[0], "r", encoding="utf-8") as f: + stored_data = json.load(f) + + assert "metadata" in stored_data + assert "configuration" in stored_data + assert "timestamp" in stored_data["metadata"] + assert "service_version" in stored_data["metadata"] + assert "config_file_path" in stored_data["metadata"] + + assert stored_data["metadata"]["config_file_path"] == sample_config_file + assert "name: Lightspeed Core Service (LCS)" in stored_data["configuration"] + assert "user_data_collection:" in stored_data["configuration"] + + +@patch("lightspeed_stack.configuration") +def test_store_config_creates_directory( + mock_configuration_module, tmpdir, sample_config_file +): # pylint: disable=redefined-outer-name + """Test that config storage creates directory if it doesn't exist.""" + # Use a non-existent nested directory path + nested_path = tmpdir / "nested" / "config" / "storage" + full_path = nested_path.strpath + + # Create a mock config with the nested path + mock_config = MagicMock() + mock_config.user_data_collection_configuration.config_storage = full_path + mock_configuration_module.user_data_collection_configuration = ( + mock_config.user_data_collection_configuration + ) + + # Directory shouldn't exist initially + assert not Path(full_path).exists() + + # Call store_config + store_config(sample_config_file) + + # Directory should be created + assert Path(full_path).exists() + assert Path(full_path).is_dir() + + # Config file should be stored + config_files = list(Path(full_path).glob("*.json")) + assert len(config_files) == 1 + + +@patch("lightspeed_stack.configuration") +def test_store_config_unique_filenames( + mock_configuration_module, + config_storage_path, + sample_config_file, + mock_configuration, +): # pylint: disable=redefined-outer-name + """Test that multiple calls create files with unique names.""" + mock_configuration_module.user_data_collection_configuration = ( + mock_configuration.user_data_collection_configuration + ) + + # Call store_config multiple times + store_config(sample_config_file) + store_config(sample_config_file) + store_config(sample_config_file) + + # Should have 3 unique files + config_files = list(Path(config_storage_path).glob("*.json")) + assert len(config_files) == 3 + + # All filenames should be unique + filenames = [f.name for f in config_files] + assert len(set(filenames)) == 3 + + +@patch("lightspeed_stack.version.__version__", "1.2.3-test") +@patch("lightspeed_stack.configuration") +def test_store_config_includes_version( + mock_configuration_module, + config_storage_path, + sample_config_file, + mock_configuration, +): # pylint: disable=redefined-outer-name + """Test that stored config includes the service version.""" + mock_configuration_module.user_data_collection_configuration = ( + mock_configuration.user_data_collection_configuration + ) + + store_config(sample_config_file) + + config_files = list(Path(config_storage_path).glob("*.json")) + with open(config_files[0], "r", encoding="utf-8") as f: + stored_data = json.load(f) + + assert stored_data["metadata"]["service_version"] == "1.2.3-test" + + +@patch("lightspeed_stack.configuration") +def test_store_config_preserves_yaml_content( + mock_configuration_module, + config_storage_path, + sample_config_file, + mock_configuration, +): # pylint: disable=redefined-outer-name + """Test that original YAML content is preserved exactly.""" + mock_configuration_module.user_data_collection_configuration = ( + mock_configuration.user_data_collection_configuration + ) + + # Read the original content + with open(sample_config_file, "r", encoding="utf-8") as f: + original_content = f.read() + + store_config(sample_config_file) + + config_files = list(Path(config_storage_path).glob("*.json")) + with open(config_files[0], "r", encoding="utf-8") as f: + stored_data = json.load(f) + + # The stored configuration should match the original exactly + assert stored_data["configuration"] == original_content + + +@patch("lightspeed_stack.configuration") +def test_store_config_json_format( + mock_configuration_module, + config_storage_path, + sample_config_file, + mock_configuration, +): # pylint: disable=redefined-outer-name + """Test that stored file is valid JSON with proper structure.""" + mock_configuration_module.user_data_collection_configuration = ( + mock_configuration.user_data_collection_configuration + ) + + store_config(sample_config_file) + + config_files = list(Path(config_storage_path).glob("*.json")) + + # Should be valid JSON + with open(config_files[0], "r", encoding="utf-8") as f: + stored_data = json.load(f) # This will raise if invalid JSON + + # Should have expected structure + expected_keys = {"metadata", "configuration"} + assert set(stored_data.keys()) == expected_keys + + expected_metadata_keys = { + "timestamp", + "service_version", + "config_file_path", + } + assert set(stored_data["metadata"].keys()) == expected_metadata_keys + + # Configuration should be a string (YAML content) + assert isinstance(stored_data["configuration"], str)