Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 5 additions & 57 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -89,63 +89,11 @@ jobs:
python -m pip install --upgrade pip
pip install -c backend/constraints-${{ matrix.lancedb }}.txt \
-r backend/requirements.txt
pip install httpx # Required for TestClient
# httpx is required by TestClient; <0.28 keeps the app= kwarg
# that the starlette version pinned by fastapi 0.104 still uses
pip install pytest "httpx<0.28"

- name: Debug dependency versions
- name: Run API endpoint tests
run: |
cd backend
python -c "
import lancedb
import pyarrow
import fastapi
import starlette
from fastapi.testclient import TestClient
import inspect

print(f'=== Lance {lancedb.__version__} Dependencies ===')
print(f'LanceDB: {lancedb.__version__}')
print(f'PyArrow: {pyarrow.__version__}')
print(f'FastAPI: {fastapi.__version__}')
print(f'Starlette: {starlette.__version__}')

print(f'\\n=== TestClient signature ===')
sig = inspect.signature(TestClient.__init__)
print(f'TestClient.__init__{sig}')

print(f'\\n=== App module structure ===')
import app
print(f'app module type: {type(app)}')
if hasattr(app, 'app'):
print(f'app.app type: {type(app.app)}')
print(f'app.app class: {app.app.__class__.__name__}')
else:
print('No app.app attribute found')
"

- name: Test API endpoints
run: |
cd backend
python -c "
import app
import lancedb
import pyarrow
from fastapi.testclient import TestClient

# Print version information first
print(f'Testing with LanceDB {lancedb.__version__}, PyArrow {pyarrow.__version__}')

# Test health endpoint only - skip TestClient for now
# response = client.get('/healthz')
# assert response.status_code == 200
# assert response.json()['ok'] == True
print('✓ Health check skipped (debugging TestClient)')

# Test datasets endpoint (will fail without data but should not crash)
# try:
# response = client.get('/datasets')
# print('✓ Datasets endpoint accessible')
# except Exception as e:
# print(f'✓ Datasets endpoint handled error gracefully: {e}')

print('✓ Debug completed - TestClient investigation needed')
"
python -m pytest tests/ -v
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

### Added
- API endpoint test suite (pytest + FastAPI TestClient) covering all six endpoints, pagination, column filtering, value serialization, and corrupted-dataset handling. The CI test job now runs it against every supported Lance version (#28).

### Fixed
- Frontend fetch calls now check `response.ok` before parsing JSON, so HTTP error responses surface as error states instead of being parsed as data (#27).
- `/rows` with unknown column names and `/vector/preview` with a missing or non-vector column now return 400 as intended; the error was previously masked as a generic 500 (#28).

## [0.2.0] - 2026-04-16

Expand Down
4 changes: 4 additions & 0 deletions backend/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -341,6 +341,8 @@ async def get_dataset_rows(
"offset": offset
}

except HTTPException:
raise
except Exception as e:
logger.error(f"Error getting rows for {dataset_name}: {e}")
raise HTTPException(status_code=500, detail="Failed to get dataset rows")
Expand Down Expand Up @@ -392,6 +394,8 @@ async def get_vector_preview(

return {"stats": stats, "preview": preview}

except HTTPException:
raise
except Exception as e:
logger.error(f"Error getting vector preview for {dataset_name}.{column}: {e}")
raise HTTPException(status_code=500, detail="Failed to get vector preview")
Expand Down
84 changes: 84 additions & 0 deletions backend/tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
"""Shared fixtures: a temporary Lance database and a FastAPI test client.

Run from the backend directory:

python -m pytest tests/ -v

The sample data is written once per session with whatever lancedb/pyarrow
versions are installed, so the same tests run against every constraints file.
"""

import os
import sys
from pathlib import Path

import lancedb
import pyarrow as pa
import pytest

sys.path.insert(0, str(Path(__file__).resolve().parent.parent))

ROWS = 10
VEC_DIM = 4
CLIP_DIM = 512


def _sample_table() -> pa.Table:
ids = list(range(ROWS))
texts = [None if i == 3 else f"row {i}" for i in ids]
scores = [i * 1.5 for i in ids]
blobs = [b"hello" if i % 2 == 0 else b"\xff\xfe\x01\x02" for i in ids]
vecs = [None if i == 5 else [float(i), -1.0, 0.5, 2.0] for i in ids]
unit = 1.0 / CLIP_DIM ** 0.5
embeddings = [[unit] * CLIP_DIM for _ in ids]

return pa.table({
"id": pa.array(ids, type=pa.int64()),
"text": pa.array(texts, type=pa.string()),
"score": pa.array(scores, type=pa.float64()),
"blob": pa.array(blobs, type=pa.binary()),
"vec": pa.array(vecs, type=pa.list_(pa.float32())),
"embedding": pa.array(embeddings, type=pa.list_(pa.float32(), CLIP_DIM)),
})


def _corrupt_table(db_dir: Path, name: str) -> None:
"""Overwrite the data fragments of a table so reads fail but the
manifest stays intact and open_table() still succeeds."""
table_dir = db_dir / f"{name}.lance"
data_files = [p for p in table_dir.rglob("*.lance") if p.is_file()]
assert data_files, f"no data files found under {table_dir}"
for path in data_files:
path.write_bytes(b"not a lance data file")


@pytest.fixture(scope="session")
def data_dir(tmp_path_factory):
path = tmp_path_factory.mktemp("lance-data")
db = lancedb.connect(str(path))
db.create_table("sample", _sample_table())
db.create_table("broken", pa.table({"id": pa.array([1, 2, 3], type=pa.int64())}))
_corrupt_table(path, "broken")
return path


@pytest.fixture(scope="session")
def vec_nulls_preserved(data_dir):
"""Lance format v1 (lancedb 0.3.x/0.5) stores a null list as an empty
list. Detect what the installed version actually does so tests can
assert the matching serialization."""
db = lancedb.connect(str(data_dir))
values = db.open_table("sample").to_arrow().column("vec").to_pylist()
return values[5] is None


@pytest.fixture(scope="session")
def client(data_dir):
# DATA_PATH is read at import time, so set it before importing app
os.environ["DATA_PATH"] = str(data_dir)
import app as app_module
app_module.DATA_PATH = data_dir

from fastapi.testclient import TestClient
with TestClient(app_module.app) as test_client:
yield test_client
Loading
Loading