-
Notifications
You must be signed in to change notification settings - Fork 498
feat: Add video upload and list_objects functionality to ObjectStore #1195
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: develop
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The submodule should not be updated until the PR in NeMo-Agent-Toolkit-UI is merged. We never allow non- |
| +16 −1 | components/Chatbar/components/ChatbarSettings.tsx | |
| +155 −0 | components/Media/VideoLibrary.tsx | |
| +30 −0 | components/Media/VideoLibraryModal.tsx | |
| +272 −0 | components/Media/VideoUpload.tsx | |
| +1 −1 | components/Settings/SettingDialog.tsx | |
| +6 −0 | constants/index.d.ts | |
| +9 −0 | constants/index.js | |
| +143 −0 | utils/api/videoUpload.ts |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -17,12 +17,14 @@ | |
|
|
||
| import aioboto3 | ||
| from botocore.client import BaseClient | ||
| from botocore.client import Config | ||
| from botocore.exceptions import ClientError | ||
|
|
||
| from nat.data_models.object_store import KeyAlreadyExistsError | ||
| from nat.data_models.object_store import NoSuchKeyError | ||
| from nat.object_store.interfaces import ObjectStore | ||
| from nat.object_store.models import ObjectStoreItem | ||
| from nat.object_store.models import ObjectStoreListItem | ||
|
|
||
| logger = logging.getLogger(__name__) | ||
|
|
||
|
|
@@ -55,6 +57,10 @@ def __init__(self, | |
| self._client_args["region_name"] = region | ||
| if endpoint_url: | ||
| self._client_args["endpoint_url"] = endpoint_url | ||
| # Use path-style addressing for non-AWS endpoints (MinIO, etc.) to avoid | ||
| # DNS-based virtual host lookups that fail for local endpoints | ||
| if 'amazonaws.com' not in endpoint_url.lower(): | ||
| self._client_args["config"] = Config(s3={'addressing_style': 'path'}) | ||
|
|
||
| async def __aenter__(self) -> "S3ObjectStore": | ||
|
|
||
|
|
@@ -164,3 +170,52 @@ async def delete_object(self, key: str) -> None: | |
|
|
||
| if results.get('DeleteMarker', False): | ||
| raise NoSuchKeyError(key=key, additional_message="Object was a delete marker") | ||
|
|
||
| async def list_objects(self, prefix: str | None = None) -> list[ObjectStoreListItem]: | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Every other object store would need to have a This is why I made my comment above related to introducing an attack vector. I feel like the UI should own/maintain the video list itself rather than expect the toolkit to do so. |
||
| """ | ||
| List objects in the S3 bucket, optionally filtered by key prefix. | ||
| """ | ||
| if self._client is None: | ||
| raise RuntimeError("Connection not established") | ||
|
|
||
| objects = [] | ||
|
|
||
| try: | ||
| paginator = self._client.get_paginator('list_objects_v2') | ||
|
|
||
| pagination_args = {"Bucket": self.bucket_name} | ||
| if prefix is not None: | ||
| pagination_args["Prefix"] = prefix | ||
|
|
||
| async for page in paginator.paginate(**pagination_args): | ||
|
|
||
| if 'Contents' not in page: | ||
| continue | ||
|
|
||
| for obj in page['Contents']: | ||
| key = obj['Key'] | ||
|
|
||
| if key.endswith('/'): | ||
| continue | ||
|
|
||
| try: | ||
| head_response = await self._client.head_object(Bucket=self.bucket_name, Key=key) | ||
| content_type = head_response.get('ContentType') | ||
| metadata = head_response.get('Metadata', {}) | ||
| except ClientError as e: | ||
| logger.warning(f"Failed to get metadata for {key}: {e}") | ||
| content_type = None | ||
| metadata = {} | ||
|
|
||
| objects.append( | ||
| ObjectStoreListItem(key=key, | ||
| size=obj.get('Size', 0), | ||
| content_type=content_type, | ||
| metadata=metadata if metadata else None, | ||
| last_modified=obj.get('LastModified'))) | ||
|
|
||
| except ClientError as e: | ||
| logger.error(f"Error listing objects with prefix '{prefix}': {e}", exc_info=True) | ||
| raise RuntimeError(f"Failed to list objects: {str(e)}") from e | ||
|
|
||
| return objects | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,55 @@ | ||
| # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. | ||
| # SPDX-License-Identifier: Apache-2.0 | ||
| # | ||
| # Licensed under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
| """Pytest fixtures for S3 integration tests.""" | ||
|
|
||
| import socket | ||
|
|
||
| import pytest | ||
|
|
||
|
|
||
| def is_port_open(host: str, port: int) -> bool: | ||
| """Check if a port is open on the given host.""" | ||
| try: | ||
| with socket.create_connection((host, port), timeout=1): | ||
| return True | ||
| except (TimeoutError, ConnectionRefusedError, OSError): | ||
| return False | ||
|
|
||
|
|
||
| @pytest.fixture(scope="session") | ||
| def minio_server(): | ||
| """ | ||
| Fixture that checks if MinIO is running on localhost:9000. | ||
|
|
||
| To run S3 integration tests, start MinIO with: | ||
|
|
||
| docker run --rm -p 9000:9000 -p 9001:9001 \\ | ||
| -e MINIO_ROOT_USER=minioadmin \\ | ||
| -e MINIO_ROOT_PASSWORD=minioadmin \\ | ||
| minio/minio server /data --console-address ":9001" | ||
|
|
||
| Then run tests with: | ||
| pytest packages/nvidia_nat_s3/tests/test_s3_object_store.py --run_integration -v | ||
| """ | ||
| if not is_port_open("localhost", 9000): | ||
| pytest.skip("MinIO not running on localhost:9000. " | ||
| "Start MinIO to run S3 integration tests.") | ||
|
Comment on lines
+22
to
+48
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We already have e2e integration for bringing up a minio server as part of CI. |
||
|
|
||
| return { | ||
| "bucket_name": "test-bucket", | ||
| "endpoint_url": "http://localhost:9000", | ||
| "aws_access_key_id": "minioadmin", | ||
| "aws_secret_access_key": "minioadmin", | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -24,6 +24,7 @@ | |
| from nat.data_models.object_store import NoSuchKeyError | ||
| from nat.object_store.interfaces import ObjectStore | ||
| from nat.object_store.models import ObjectStoreItem | ||
| from nat.object_store.models import ObjectStoreListItem | ||
|
|
||
|
|
||
| @pytest.mark.asyncio(loop_scope="class") | ||
|
|
@@ -115,3 +116,63 @@ async def test_delete_object(self, store: ObjectStore): | |
| # Try to delete the object again | ||
| with pytest.raises(NoSuchKeyError): | ||
| await store.delete_object(key) | ||
|
|
||
| async def test_list_objects(self, store: ObjectStore): | ||
| """Test listing objects with and without prefix filtering""" | ||
|
|
||
| test_id = str(uuid.uuid4())[:8] | ||
|
|
||
| test_objects = { | ||
| f"videos/{test_id}/video1.mp4": | ||
| ObjectStoreItem(data=b"video1_data", content_type="video/mp4", metadata={"title": "Video 1"}), | ||
| f"videos/{test_id}/video2.mp4": | ||
| ObjectStoreItem(data=b"video2_data", content_type="video/mp4", metadata={"title": "Video 2"}), | ||
| f"images/{test_id}/image1.png": | ||
| ObjectStoreItem(data=b"image1_data", content_type="image/png", metadata={"title": "Image 1"}), | ||
| f"docs/{test_id}/doc1.txt": | ||
| ObjectStoreItem(data=b"doc1_data", content_type="text/plain") | ||
| } | ||
|
|
||
| for key, item in test_objects.items(): | ||
| await store.put_object(key, item) | ||
|
|
||
| # Test 1: List all objects (no prefix) | ||
| all_objects = await store.list_objects() | ||
| all_keys = {obj.key for obj in all_objects} | ||
|
|
||
| for key in test_objects.keys(): | ||
| assert key in all_keys, f"Expected key {key} not found in all_objects" | ||
|
|
||
| # Test 2: List with videos prefix | ||
| video_objects = await store.list_objects(prefix=f"videos/{test_id}/") | ||
| assert len(video_objects) == 2, f"Expected 2 video objects, got {len(video_objects)}" | ||
|
|
||
| video_keys = {obj.key for obj in video_objects} | ||
| assert f"videos/{test_id}/video1.mp4" in video_keys | ||
| assert f"videos/{test_id}/video2.mp4" in video_keys | ||
|
|
||
| for obj in video_objects: | ||
| assert isinstance(obj, ObjectStoreListItem) | ||
| assert obj.key.startswith(f"videos/{test_id}/") | ||
| assert obj.size > 0 | ||
| assert obj.content_type == "video/mp4" | ||
| assert obj.metadata is not None | ||
| assert "title" in obj.metadata | ||
|
|
||
| # Test 3: List with images prefix | ||
| image_objects = await store.list_objects(prefix=f"images/{test_id}/") | ||
| assert len(image_objects) == 1 | ||
| assert image_objects[0].key == f"images/{test_id}/image1.png" | ||
| assert image_objects[0].content_type == "image/png" | ||
|
|
||
| # Test 4: List with non-existent prefix | ||
| empty_objects = await store.list_objects(prefix=f"nonexistent/{test_id}/") | ||
| assert len(empty_objects) == 0 | ||
|
|
||
| # Test 5: List with partial prefix | ||
| all_test_objects = await store.list_objects(prefix=f"videos/{test_id}") | ||
| assert len(all_test_objects) >= 2 # At least our video objects | ||
|
|
||
| # Cleanup | ||
| for key in test_objects.keys(): | ||
| await store.delete_object(key) | ||
|
Comment on lines
+176
to
+178
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If any assertions above fail, then data will never be cleaned up. This should be part of a finally block |
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't see how this can safely be generalized across all object stores without introducing an attack vector.