Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .lfsconfig
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[lfs]
url = https://lfs.dimensionalos.com/dimensionalOS/dimos
85 changes: 85 additions & 0 deletions dimos/utils/test_lfs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
# Copyright 2025-2026 Dimensional Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Smoke tests for the dimos Git LFS server (lfs.dimensionalos.com).

These talk to the LFS batch API directly (no `git lfs` toolchain), so when CI
fails the diagnostic is "the LFS server returned X" rather than the looser
"git lfs pull failed". The end-to-end smudge test lives in test_data.py.
"""

import hashlib
import urllib.request

import pytest
import requests

LFS_URL = "https://lfs.dimensionalos.com/dimensionalOS/dimos"

# Known fixture: cafe.jpg.tar.gz pointer in dev. SHA + size match the LFS
# pointer file at data/.lfs/cafe.jpg.tar.gz.
KNOWN_OID = "b8cf30439b41033ccb04b09b9fc8388d18fb544d55b85c155dbf85700b9e7603"
KNOWN_SIZE = 136165


def _batch(operation: str, oid: str, size: int, *, auth=None):
return requests.post(
Comment on lines +35 to +37
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 auth parameter is defined but never exercised in any test

_batch accepts an auth kwarg intended for the authenticated-push path, but no test currently passes a real credential. If the giftless auth handler (PAT → GitHub permission check → S3 PUT signing) regresses, none of these tests would catch it. Consider adding a pytest.mark.slow test that uses a read-only test PAT (stored as a CI secret) to exercise at least the upload batch request, so the push auth path has some CI coverage.

f"{LFS_URL}/objects/batch",
json={
"operation": operation,
"transfers": ["basic"],
"objects": [{"oid": oid, "size": size}],
},
headers={
"Accept": "application/vnd.git-lfs+json",
"Content-Type": "application/vnd.git-lfs+json",
},
auth=auth,
timeout=15,
)


@pytest.mark.slow
def test_anonymous_download_returns_presigned_url():
"""An unauthenticated batch download request returns a presigned S3 URL."""
response = _batch("download", KNOWN_OID, KNOWN_SIZE)
response.raise_for_status()

obj = response.json()["objects"][0]
assert obj["oid"] == KNOWN_OID
assert "actions" in obj, f"no download action — server response: {obj}"

href = obj["actions"]["download"]["href"]
assert href.startswith("https://dimos-github-lfs.s3"), href
Comment on lines +63 to +64
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 The startswith("https://dimos-github-lfs.s3") check couples the test to the virtual-hosted S3 URL style. AWS presigned URLs can also be path-style (https://s3.amazonaws.com/dimos-github-lfs/…), and the format may also include a regional subdomain variant. Checking for the bucket name anywhere in the URL is less brittle while still confirming the object came from the right bucket.

Suggested change
href = obj["actions"]["download"]["href"]
assert href.startswith("https://dimos-github-lfs.s3"), href
href = obj["actions"]["download"]["href"]
assert "dimos-github-lfs" in href, href



@pytest.mark.slow
def test_anonymous_upload_is_forbidden():
"""An unauthenticated upload returns 403 — only repo collaborators can push."""
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what is a normal dev process for a third party contributor?

response = _batch("upload", "0" * 64, 1)
assert response.status_code == 403, response.text
Comment on lines +67 to +71
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 The test pins 403, but HTTP semantics permit a server to return 401 for unauthenticated requests (signalling "please authenticate") rather than 403 ("authenticated but not permitted"). Giftless may return either code depending on configuration. Accepting both makes the test resilient to that variation without weakening the assertion that anonymous upload is rejected.

Suggested change
@pytest.mark.slow
def test_anonymous_upload_is_forbidden():
"""An unauthenticated upload returns 403 — only repo collaborators can push."""
response = _batch("upload", "0" * 64, 1)
assert response.status_code == 403, response.text
@pytest.mark.slow
def test_anonymous_upload_is_forbidden():
"""An unauthenticated upload returns 401/403 — only repo collaborators can push."""
response = _batch("upload", "0" * 64, 1)
assert response.status_code in (401, 403), response.text



@pytest.mark.slow
def test_known_object_roundtrip():
"""Fetching the known fixture via giftless yields bytes whose SHA matches the pointer."""
response = _batch("download", KNOWN_OID, KNOWN_SIZE)
response.raise_for_status()
href = response.json()["objects"][0]["actions"]["download"]["href"]

with urllib.request.urlopen(href, timeout=30) as r:
body = r.read()

assert hashlib.sha256(body).hexdigest() == KNOWN_OID
assert len(body) == KNOWN_SIZE
Loading