Feat cortexcpp e2e cortexllamacpp nightly (#1744)

hiento09 · web-flow · commit 9622b91dbe57 · 2024-11-29T08:37:42.000+07:00
diff --git a/.github/workflows/cortex-cpp-quality-gate.yml b/.github/workflows/cortex-cpp-quality-gate.yml
@@ -5,6 +5,8 @@ on:
     types: [opened, synchronize, reopened, ready_for_review]
     paths: ["engine/**", ".github/workflows/cortex-cpp-quality-gate.yml"]
   workflow_dispatch:
+  schedule:
+    - cron: '0 22 * * *'
 
 env:
   LLM_MODEL_URL: https://delta.jan.ai/tinyllama-1.1b-chat-v0.3.Q2_K.gguf
@@ -149,6 +151,34 @@ jobs:
         env:
           GITHUB_TOKEN: ${{ secrets.PAT_SERVICE_ACCOUNT }}
 
+      - name: Run e2e tests
+        if: github.event_name == 'schedule' && runner.os != 'Windows' && github.event.pull_request.draft == false
+        run: |
+          cd engine
+          cp build/cortex build/cortex-nightly
+          cp build/cortex build/cortex-beta
+          python -m pip install --upgrade pip
+          python -m pip install -r e2e-test/requirements.txt
+          python e2e-test/cortex-llamacpp-e2e-nightly.py
+          rm build/cortex-nightly
+          rm build/cortex-beta
+        env:
+          GITHUB_TOKEN: ${{ secrets.PAT_SERVICE_ACCOUNT }}
+
+      - name: Run e2e tests
+        if: github.event_name == 'schedule' &&  runner.os == 'Windows' && github.event.pull_request.draft == false
+        run: |
+          cd engine
+          cp build/cortex.exe build/cortex-nightly.exe
+          cp build/cortex.exe build/cortex-beta.exe
+          python -m pip install --upgrade pip
+          python -m pip install -r e2e-test/requirements.txt
+          python e2e-test/cortex-llamacpp-e2e-nightly.py
+          rm build/cortex-nightly.exe
+          rm build/cortex-beta.exe
+        env:
+          GITHUB_TOKEN: ${{ secrets.PAT_SERVICE_ACCOUNT }}
+
       - name: Pre-package
         run: |
           cd engine
@@ -197,12 +227,28 @@ jobs:
           submodules: 'recursive'
       
       - name: Run Docker
+        if: github.event_name != 'schedule'
+        run: |
+          docker build \
+            --build-arg REMOTE_CACHE_URL="${{ secrets.MINIO_ENDPOINT }}/vcpkg-cache" \
+            --build-arg MINIO_ENDPOINT_URL="${{ secrets.MINIO_ENDPOINT }}" \
+            --build-arg MINIO_ACCESS_KEY="${{ secrets.MINIO_ACCESS_KEY_ID }}" \
+            --build-arg MINIO_SECRET_KEY="${{ secrets.MINIO_SECRET_ACCESS_KEY }}" \
+            -t menloltd/cortex:test -f docker/Dockerfile.cache .
+          docker run -it -d -p 3928:39281 --name cortex menloltd/cortex:test
+          sleep 20
+
+      - name: Run Docker
+        if: github.event_name == 'schedule'
         run: |
+          latest_prerelease=$(curl -s https://api.github.com/repos/cortexcpp/cortex.cpp/releases | jq -r '.[] | select(.prerelease == true) | .tag_name' | head -n 1)
+          echo "cortex.llamacpp latest release: $latest_prerelease"
           docker build \
             --build-arg REMOTE_CACHE_URL="${{ secrets.MINIO_ENDPOINT }}/vcpkg-cache" \
             --build-arg MINIO_ENDPOINT_URL="${{ secrets.MINIO_ENDPOINT }}" \
             --build-arg MINIO_ACCESS_KEY="${{ secrets.MINIO_ACCESS_KEY_ID }}" \
             --build-arg MINIO_SECRET_KEY="${{ secrets.MINIO_SECRET_ACCESS_KEY }}" \
+            --build-arg CORTEX_CPP_VERSION="${latest_prerelease}" \
             -t menloltd/cortex:test -f docker/Dockerfile.cache .
           docker run -it -d -p 3928:39281 --name cortex menloltd/cortex:test
           sleep 20
diff --git a/engine/e2e-test/cortex-llamacpp-e2e-nightly.py b/engine/e2e-test/cortex-llamacpp-e2e-nightly.py
@@ -0,0 +1,33 @@
+import pytest
+import sys
+
+### e2e tests are expensive, have to keep engines tests in order
+from test_api_engine_list import TestApiEngineList
+from test_api_engine_install_nightly import TestApiEngineInstall
+from test_api_engine_get import TestApiEngineGet
+
+### models, keeps in order, note that we only uninstall engine after finishing all models test
+from test_api_model_pull_direct_url import TestApiModelPullDirectUrl
+from test_api_model_start import TestApiModelStart
+from test_api_model_stop import TestApiModelStop
+from test_api_model_get import TestApiModelGet
+from test_api_model_list import TestApiModelList
+from test_api_model_update import TestApiModelUpdate
+from test_api_model_delete import TestApiModelDelete
+from test_api_model_import import TestApiModelImport
+from test_api_engine_uninstall import TestApiEngineUninstall
+
+###
+from test_cli_engine_get import TestCliEngineGet
+from test_cli_engine_install_nightly import TestCliEngineInstall
+from test_cli_engine_list import TestCliEngineList
+from test_cli_model_delete import TestCliModelDelete
+from test_cli_model_pull_direct_url import TestCliModelPullDirectUrl
+from test_cli_server_start import TestCliServerStart
+from test_cortex_update import TestCortexUpdate
+from test_create_log_folder import TestCreateLogFolder
+from test_cli_model_import import TestCliModelImport
+from test_cli_engine_uninstall import TestCliEngineUninstall
+
+if __name__ == "__main__":
+    sys.exit(pytest.main([__file__, "-v"]))
diff --git a/engine/e2e-test/test_api_engine_install_nightly.py b/engine/e2e-test/test_api_engine_install_nightly.py
@@ -0,0 +1,37 @@
+import pytest
+import requests
+from test_runner import start_server, stop_server, get_latest_pre_release_tag
+
+latest_pre_release_tag = get_latest_pre_release_tag("janhq", "cortex.llamacpp")
+
+class TestApiEngineInstall:
+
+    @pytest.fixture(autouse=True)
+    def setup_and_teardown(self):
+        # Setup
+        success = start_server()
+        if not success:
+            raise Exception("Failed to start server")
+
+        yield
+
+        # Teardown
+        stop_server()
+
+    def test_engines_install_llamacpp_should_be_successful(self):
+        response = requests.post("http://localhost:3928/v1/engines/llama-cpp/install")
+        assert response.status_code == 200
+
+    def test_engines_install_llamacpp_specific_version_and_variant(self):
+        data = {"version": latest_pre_release_tag, "variant": "linux-amd64-avx-cuda-11-7"}
+        response = requests.post(
+            "http://localhost:3928/v1/engines/llama-cpp/install", json=data
+        )
+        assert response.status_code == 200
+
+    def test_engines_install_llamacpp_specific_version_and_null_variant(self):
+        data = {"version": latest_pre_release_tag}
+        response = requests.post(
+            "http://localhost:3928/v1/engines/llama-cpp/install", json=data
+        )
+        assert response.status_code == 200
diff --git a/engine/e2e-test/test_cli_engine_install_nightly.py b/engine/e2e-test/test_cli_engine_install_nightly.py
@@ -0,0 +1,89 @@
+import platform
+import tempfile
+
+import pytest
+import requests
+from test_runner import run, start_server, stop_server, get_latest_pre_release_tag
+
+latest_pre_release_tag = get_latest_pre_release_tag("janhq", "cortex.llamacpp")
+
+class TestCliEngineInstall:
+    def setup_and_teardown(self):
+        # Setup
+        stop_server()
+        success = start_server()
+        if not success:
+            raise Exception("Failed to start server")
+
+        yield
+
+        # Teardown
+        stop_server()
+
+    def test_engines_install_llamacpp_should_be_successfully(self):
+        exit_code, output, error = run(
+            "Install Engine",
+            ["engines", "install", "llama-cpp"],
+            timeout=None,
+            capture=False,
+        )
+        response = requests.get("http://127.0.0.1:3928/v1/engines/llama-cpp")
+        assert len(response.json()) > 0
+        assert exit_code == 0, f"Install engine failed with error: {error}"
+
+    @pytest.mark.skipif(platform.system() != "Darwin", reason="macOS-specific test")
+    def test_engines_install_onnx_on_macos_should_be_failed(self):
+        exit_code, output, error = run(
+            "Install Engine", ["engines", "install", "onnxruntime"]
+        )
+        assert "is not supported on" in output, "Should display error message"
+        assert exit_code == 0, f"Install engine failed with error: {error}"
+
+    @pytest.mark.skipif(platform.system() != "Darwin", reason="macOS-specific test")
+    def test_engines_install_onnx_on_tensorrt_should_be_failed(self):
+        exit_code, output, error = run(
+            "Install Engine", ["engines", "install", "tensorrt-llm"]
+        )
+        assert "is not supported on" in output, "Should display error message"
+        assert exit_code == 0, f"Install engine failed with error: {error}"
+
+    def test_engines_should_fallback_to_download_llamacpp_engine_if_not_exists(self):
+        exit_code, output, error = run(
+            "Install Engine",
+            ["engines", "install", "llama-cpp", "-s", tempfile.gettempdir()],
+            timeout=None,
+        )
+        # response = requests.get("http://127.0.0.1:3928/v1/engines/llama-cpp")
+        # assert len(response.json()) > 0
+        assert "downloaded successfully" in output
+        assert exit_code == 0, f"Install engine failed with error: {error}"
+
+    def test_engines_should_not_perform_with_dummy_path(self):
+        exit_code, output, error = run(
+            "Install Engine",
+            ["engines", "install", "llama-cpp", "-s", "abcpod"],
+            timeout=None,
+        )
+        assert "Folder does not exist" in output, "Should display error"
+        assert exit_code == 0, f"Install engine failed with error: {error}"
+
+    def test_engines_install_pre_release_llamacpp(self):
+        engine_version = latest_pre_release_tag
+        exit_code, output, error = run(
+            "Install Engine",
+            ["engines", "install", "llama-cpp", "-v", engine_version],
+            timeout=None,
+            capture=False,
+        )
+        response = requests.get("http://127.0.0.1:3928/v1/engines/llama-cpp")
+        assert len(response.json()) > 0
+        is_engine_version_exist = False
+        for item in response.json():
+            # Check if 'version' key exists and matches target
+            if "version" in item and item["version"] == engine_version:
+                is_engine_version_exist = True
+                break
+
+        # loop through all the installed response, expect we find
+        assert is_engine_version_exist, f"Engine version {engine_version} is not found"
+        assert exit_code == 0, f"Install engine failed with error: {error}"
diff --git a/engine/e2e-test/test_runner.py b/engine/e2e-test/test_runner.py
@@ -6,6 +6,7 @@
 import subprocess
 import threading
 import time
+import requests
 from typing import List
 
 import websockets
@@ -187,3 +188,36 @@ async def receive_until_success():
 
         except asyncio.TimeoutError:
             raise TimeoutError("Timeout waiting for DownloadSuccess event")
+
+
+def get_latest_pre_release_tag(repo_owner, repo_name):
+    # URL for GitHub API to fetch all releases of the repository
+    url = f"https://api.github.com/repos/{repo_owner}/{repo_name}/releases"
+    
+    # Headers to specify the API version
+    headers = {
+        "Accept": "application/vnd.github.v3+json"
+    }
+    
+    # Send a GET request to the GitHub API
+    response = requests.get(url, headers=headers)
+    
+    # Check the response status; raise an error if the request failed
+    if response.status_code != 200:
+        raise Exception(f"Failed to fetch releases: {response.status_code}, {response.text}")
+    
+    # Parse the JSON response into a list of releases
+    releases = response.json()
+    
+    # Filter the releases to include only pre-releases
+    pre_releases = [release for release in releases if release.get("prerelease")]
+    
+    # If no pre-releases are found, raise an exception
+    if not pre_releases:
+        raise Exception("No pre-releases found")
+    
+    # Sort the pre-releases by creation date, newest first
+    pre_releases.sort(key=lambda x: x["created_at"], reverse=True)
+    
+    # Return the tag name of the latest pre-release
+    return pre_releases[0]["tag_name"]