Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions dandi/dandiapi.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
REQUEST_RETRIES,
RETRY_STATUSES,
ZARR_DELETE_BATCH_SIZE,
ZARR_EXTENSIONS,
DandiInstance,
EmbargoStatus,
)
Expand Down Expand Up @@ -1251,6 +1252,35 @@ def get_asset_by_path(self, path: str) -> RemoteAsset:
else:
return asset

def get_asset_with_subpath(self, path: str) -> RemoteAsset | ZarrWithPrefix:
def is_zarr_part(part: str) -> bool:
for ext in ZARR_EXTENSIONS:
if part.endswith(ext) and part != ext:
return True
return False

full_path = PurePosixPath(path)
asset_path = PurePosixPath()
for i, p in enumerate(full_path.parts):
asset_path /= p
if is_zarr_part(p) and i < len(full_path.parts) - 1:
try:
asset = self.get_asset_by_path(str(asset_path))
except NotFoundError:
pass
else:
if isinstance(asset, RemoteZarrAsset):
return ZarrWithPrefix(
zarr=asset, prefix="/".join(full_path.parts[i + 1 :])
)
else:
# We found a blob, which is not a folder, so no Zarr
# path can exist under it.
raise NotFoundError(
f"{path!r} is not a Zarr path with entry prefix"
)
return self.get_asset_by_path(path)

def download_directory(
self,
assets_dirpath: str,
Expand Down Expand Up @@ -1934,6 +1964,12 @@ def set_raw_metadata(self, metadata: dict[str, Any]) -> None:
self._metadata = data["metadata"]


@dataclass
class ZarrWithPrefix:
zarr: RemoteZarrAsset
prefix: str


@dataclass
class RemoteZarrEntry:
"""
Expand Down
14 changes: 12 additions & 2 deletions dandi/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,7 @@ class Downloader:
url: ParsedDandiURL
output_dir: InitVar[str | Path]
output_prefix: Path = field(init=False)
#: just a convenience combination of output_dir and output_prefix
output_path: Path = field(init=False)
existing: DownloadExisting
get_metadata: bool
Expand Down Expand Up @@ -333,6 +334,12 @@ def download_generator(self) -> Iterator[dict]:
asset.path,
)
mtime = asset.modified
if asset.subpath:
lgr.warning(
"No downloading of subpaths within blobs yet. Got %s for %s",
asset.subpath,
asset.path,
)
_download_generator = _download_file(
asset.get_download_file_iter(),
download_path,
Expand All @@ -352,7 +359,8 @@ def download_generator(self) -> Iterator[dict]:
), f"Asset {asset.path} is neither blob nor Zarr"
_download_generator = _download_zarr(
asset,
download_path,
prefix=asset.subpath,
download_path=download_path,
toplevel_path=self.output_path,
existing=self.existing,
jobs=self.jobs_per_zarr,
Expand Down Expand Up @@ -812,6 +820,7 @@ def _download_file(
lgr.warning("downloader logic: We should not be here!")

final_digest = None

if downloaded_digest and not resuming:
assert downloaded_digest is not None
final_digest = downloaded_digest.hexdigest() # we care only about hex
Expand Down Expand Up @@ -977,6 +986,7 @@ def _download_zarr(
toplevel_path: str | Path,
existing: DownloadExisting,
lock: Lock,
prefix: str | None = None,
jobs: int | None = None,
) -> Iterator[dict]:
# Avoid heavy import by importing within function:
Expand All @@ -993,7 +1003,7 @@ def digest_callback(path: str, algoname: str, d: str) -> None:
digests[path] = d

def downloads_gen():
for entry in asset.iterfiles():
for entry in asset.iterfiles(prefix=prefix):
entries.append(entry)
etag = entry.digest
assert etag.algorithm is DigestType.md5
Expand Down
Loading