From 9e02239c8ac5c0be851b35358b5b58d3ba065182 Mon Sep 17 00:00:00 2001 From: Mark Kittisopikul Date: Wed, 28 Jan 2026 01:49:59 -0500 Subject: [PATCH 1/9] Initial version of OZX support, with Claude --- fileglancer/ozxzip.py | 678 ++++++++++++++++++ .../__tests__/unitTests/ozxDetection.test.ts | 170 +++++ frontend/src/queries/ozxQueries.ts | 343 +++++++++ frontend/src/utils/ozxDetection.ts | 63 ++ tests/test_ozxzip.py | 576 +++++++++++++++ 5 files changed, 1830 insertions(+) create mode 100644 fileglancer/ozxzip.py create mode 100644 frontend/src/__tests__/unitTests/ozxDetection.test.ts create mode 100644 frontend/src/queries/ozxQueries.ts create mode 100644 frontend/src/utils/ozxDetection.ts create mode 100644 tests/test_ozxzip.py diff --git a/fileglancer/ozxzip.py b/fileglancer/ozxzip.py new file mode 100644 index 00000000..3778ff3f --- /dev/null +++ b/fileglancer/ozxzip.py @@ -0,0 +1,678 @@ +"""RFC-9 compliant reader for .ozx (Zipped OME-Zarr) files. + +RFC-9 Spec: https://ngff.openmicroscopy.org/rfc/9/index.html + +This module provides functionality to read OME-Zarr data from ZIP archives +with support for: +- Partial central directory parsing (jsonFirst optimization) +- ZIP64 format for large files +- Range request streaming for chunks +""" + +import struct +import json +import zlib +from dataclasses import dataclass, field +from typing import Optional, Dict, Generator, BinaryIO, List +from io import BytesIO + +from loguru import logger + +# ZIP signatures +ZIP_LOCAL_HEADER_SIG = b'\x50\x4b\x03\x04' +ZIP_CD_SIG = b'\x50\x4b\x01\x02' +ZIP_EOCD_SIG = b'\x50\x4b\x05\x06' +ZIP_EOCD64_SIG = b'\x50\x4b\x06\x06' +ZIP_EOCD64_LOC_SIG = b'\x50\x4b\x06\x07' + +# Compression methods +COMPRESSION_STORED = 0 +COMPRESSION_DEFLATE = 8 + +# ZIP64 marker value +ZIP64_MARKER = 0xFFFFFFFF +ZIP64_MARKER_16 = 0xFFFF + +# Extra field header IDs +ZIP64_EXTRA_ID = 0x0001 + +# Default buffer size for streaming +DEFAULT_BUFFER_SIZE = 8192 + +# Maximum EOCD search size (65KB comment + 22 byte EOCD header) +MAX_EOCD_SEARCH_SIZE = 65536 + 22 + + +@dataclass +class OZXMetadata: + """Parsed metadata from ZIP comment (RFC-9 format).""" + version: str + json_first: bool = False + raw_comment: Optional[str] = None + + +@dataclass +class ZipEntry: + """A file entry from the ZIP central directory.""" + filename: str + compressed_size: int + uncompressed_size: int + compression_method: int # 0=STORE, 8=DEFLATE + local_header_offset: int + crc32: int + extra_field: bytes = field(default_factory=bytes, repr=False) + + @property + def is_directory(self) -> bool: + """Check if this entry represents a directory.""" + return self.filename.endswith('/') + + @property + def is_json_file(self) -> bool: + """Check if this is a JSON metadata file (for jsonFirst optimization).""" + name = self.filename.lower() + return (name.endswith('.json') or + name.endswith('.zattrs') or + name.endswith('.zarray') or + name.endswith('.zgroup')) + + +class OZXReaderError(Exception): + """Base exception for OZX reader errors.""" + pass + + +class InvalidZipError(OZXReaderError): + """Raised when the ZIP file is invalid or corrupted.""" + pass + + +class InvalidOZXError(OZXReaderError): + """Raised when the file is not a valid OZX file.""" + pass + + +class OZXReader: + """ + RFC-9 compliant reader for .ozx files. + + Supports: + - Partial central directory parsing (jsonFirst optimization) + - ZIP64 format for large files + - Range requests for streaming chunks + + Usage: + with OZXReader('/path/to/file.ozx') as reader: + metadata = reader.get_metadata() + entries = reader.parse_central_directory(json_only=metadata.json_first) + content = reader.read_file('path/in/archive.json') + """ + + def __init__(self, file_path: str): + """Initialize the OZX reader. + + Args: + file_path: Path to the .ozx file + """ + self.file_path = file_path + self._fh: Optional[BinaryIO] = None + self._file_size: int = 0 + self._metadata: Optional[OZXMetadata] = None + self._entries: Dict[str, ZipEntry] = {} + self._cd_offset: int = 0 + self._cd_size: int = 0 + self._cd_entries_count: int = 0 + self._is_zip64: bool = False + self._cd_parsed: bool = False + + def open(self) -> 'OZXReader': + """Open the file and parse EOCD. + + Returns: + Self for method chaining + + Raises: + FileNotFoundError: If the file doesn't exist + InvalidZipError: If the file is not a valid ZIP + """ + import os + self._fh = open(self.file_path, 'rb') + self._file_size = os.fstat(self._fh.fileno()).st_size + self._parse_eocd() + return self + + def close(self): + """Close the file handle.""" + if self._fh: + self._fh.close() + self._fh = None + + def __enter__(self) -> 'OZXReader': + return self.open() + + def __exit__(self, *args): + self.close() + + @property + def file_size(self) -> int: + """Get the size of the OZX file.""" + return self._file_size + + @property + def is_zip64(self) -> bool: + """Check if this is a ZIP64 format archive.""" + return self._is_zip64 + + def get_metadata(self) -> Optional[OZXMetadata]: + """Get parsed OME metadata from ZIP comment. + + Returns: + OZXMetadata if valid OME metadata found, None otherwise + """ + return self._metadata + + def parse_central_directory(self, json_only: bool = False) -> Dict[str, ZipEntry]: + """ + Parse the central directory. + + Args: + json_only: If True and jsonFirst=True in metadata, stop parsing + after the last JSON file. This is the RFC-9 optimization + for metadata discovery. + + Returns: + Dictionary mapping filenames to ZipEntry objects + + Raises: + InvalidZipError: If central directory is corrupted + """ + if self._fh is None: + raise OZXReaderError("File not opened") + + if self._cd_parsed and not json_only: + return self._entries + + self._fh.seek(self._cd_offset) + entries: Dict[str, ZipEntry] = {} + + for i in range(self._cd_entries_count): + # Read CD file header (46 bytes minimum) + header = self._fh.read(46) + if len(header) < 46 or header[:4] != ZIP_CD_SIG: + raise InvalidZipError(f"Invalid central directory entry at index {i}") + + # Parse header fields + (version_made, version_needed, flags, compression, + mod_time, mod_date, crc32, comp_size, uncomp_size, + name_len, extra_len, comment_len, disk_start, + internal_attr, external_attr, local_offset) = struct.unpack( + ' 0 else b'' + + # Skip comment + if comment_len > 0: + self._fh.seek(comment_len, 1) + + # Handle ZIP64 extra field if needed + if comp_size == ZIP64_MARKER or uncomp_size == ZIP64_MARKER or local_offset == ZIP64_MARKER: + comp_size, uncomp_size, local_offset = self._parse_zip64_extra( + extra, comp_size, uncomp_size, local_offset) + + entry = ZipEntry( + filename=filename, + compressed_size=comp_size, + uncompressed_size=uncomp_size, + compression_method=compression, + local_header_offset=local_offset, + crc32=crc32, + extra_field=extra + ) + + entries[filename] = entry + + # jsonFirst optimization: stop early if we've hit non-JSON files + if json_only and self._metadata and self._metadata.json_first: + if not entry.is_directory and not entry.is_json_file: + logger.debug(f"jsonFirst optimization: stopping at {filename}") + break + + self._entries.update(entries) + if not json_only: + self._cd_parsed = True + + return entries + + def list_files(self, prefix: str = "") -> List[str]: + """List files in archive, optionally filtered by prefix. + + Args: + prefix: Only return files starting with this prefix + + Returns: + List of filenames matching the prefix + """ + if not self._cd_parsed: + self.parse_central_directory() + + if prefix: + return [name for name in self._entries.keys() + if name.startswith(prefix) and not self._entries[name].is_directory] + return [name for name in self._entries.keys() + if not self._entries[name].is_directory] + + def get_entry(self, path: str) -> Optional[ZipEntry]: + """Get info about a specific file in the archive. + + Args: + path: Path within the archive + + Returns: + ZipEntry if found, None otherwise + """ + if not self._cd_parsed: + self.parse_central_directory() + return self._entries.get(path) + + def read_file(self, path: str) -> bytes: + """Read entire file from archive. + + Args: + path: Path within the archive + + Returns: + File contents as bytes + + Raises: + FileNotFoundError: If path not found in archive + InvalidZipError: If decompression fails + """ + return b''.join(self.stream_file(path)) + + def stream_file(self, path: str, buffer_size: int = DEFAULT_BUFFER_SIZE) -> Generator[bytes, None, None]: + """Stream file content from archive. + + Args: + path: Path within the archive + buffer_size: Size of chunks to yield + + Yields: + Chunks of file content + + Raises: + FileNotFoundError: If path not found in archive + """ + if self._fh is None: + raise OZXReaderError("File not opened") + + entry = self.get_entry(path) + if entry is None: + raise FileNotFoundError(f"File not found in archive: {path}") + + # Seek to local file header and skip it + self._fh.seek(entry.local_header_offset) + local_header = self._fh.read(30) + if local_header[:4] != ZIP_LOCAL_HEADER_SIG: + raise InvalidZipError(f"Invalid local header for {path}") + + # Get local header name and extra lengths + name_len, extra_len = struct.unpack(' 0: + chunk_size = min(buffer_size, remaining) + chunk = self._fh.read(chunk_size) + if not chunk: + break + yield chunk + remaining -= len(chunk) + + elif entry.compression_method == COMPRESSION_DEFLATE: + # Compressed - need to decompress + decompressor = zlib.decompressobj(-zlib.MAX_WBITS) + remaining = entry.compressed_size + + while remaining > 0: + chunk_size = min(buffer_size, remaining) + compressed_chunk = self._fh.read(chunk_size) + if not compressed_chunk: + break + remaining -= len(compressed_chunk) + + decompressed = decompressor.decompress(compressed_chunk) + if decompressed: + yield decompressed + + # Flush any remaining data + final = decompressor.flush() + if final: + yield final + else: + raise InvalidZipError(f"Unsupported compression method: {entry.compression_method}") + + def stream_file_range(self, path: str, start: int, end: int, + buffer_size: int = DEFAULT_BUFFER_SIZE) -> Generator[bytes, None, None]: + """Stream a byte range of uncompressed file content. + + Note: For DEFLATE compressed files, this must decompress from the + beginning to reach the desired offset. + + Args: + path: Path within the archive + start: Start byte offset (inclusive) + end: End byte offset (inclusive) + buffer_size: Size of chunks to yield + + Yields: + Chunks of file content within the specified range + + Raises: + FileNotFoundError: If path not found in archive + ValueError: If range is invalid + """ + if self._fh is None: + raise OZXReaderError("File not opened") + + entry = self.get_entry(path) + if entry is None: + raise FileNotFoundError(f"File not found in archive: {path}") + + if start < 0: + raise ValueError("Start position cannot be negative") + if end < start: + raise ValueError("End position cannot be less than start position") + if start >= entry.uncompressed_size: + return # Nothing to return + + # Clamp end to file size + end = min(end, entry.uncompressed_size - 1) + range_length = end - start + 1 + + # Seek to local file header and skip it + self._fh.seek(entry.local_header_offset) + local_header = self._fh.read(30) + if local_header[:4] != ZIP_LOCAL_HEADER_SIG: + raise InvalidZipError(f"Invalid local header for {path}") + + name_len, extra_len = struct.unpack(' 0: + chunk_size = min(buffer_size, remaining) + chunk = self._fh.read(chunk_size) + if not chunk: + break + yield chunk + remaining -= len(chunk) + + elif entry.compression_method == COMPRESSION_DEFLATE: + # For compressed files, we need to decompress from the start + # and skip to the desired offset + decompressor = zlib.decompressobj(-zlib.MAX_WBITS) + compressed_remaining = entry.compressed_size + decompressed_pos = 0 + output_remaining = range_length + buffer = BytesIO() + + while compressed_remaining > 0 and output_remaining > 0: + chunk_size = min(buffer_size, compressed_remaining) + compressed_chunk = self._fh.read(chunk_size) + if not compressed_chunk: + break + compressed_remaining -= len(compressed_chunk) + + decompressed = decompressor.decompress(compressed_chunk) + if not decompressed: + continue + + # Handle the decompressed chunk + chunk_start = 0 + chunk_len = len(decompressed) + + # Skip data before our range + if decompressed_pos + chunk_len <= start: + decompressed_pos += chunk_len + continue + + # Calculate how much of this chunk to skip + if decompressed_pos < start: + chunk_start = start - decompressed_pos + + # Calculate how much of this chunk to output + output_bytes = min(chunk_len - chunk_start, output_remaining) + + if output_bytes > 0: + yield decompressed[chunk_start:chunk_start + output_bytes] + output_remaining -= output_bytes + + decompressed_pos += chunk_len + + # Flush and handle remaining + if output_remaining > 0: + final = decompressor.flush() + if final: + # Apply same range logic to final chunk + chunk_len = len(final) + if decompressed_pos + chunk_len > start: + chunk_start = max(0, start - decompressed_pos) + output_bytes = min(chunk_len - chunk_start, output_remaining) + if output_bytes > 0: + yield final[chunk_start:chunk_start + output_bytes] + else: + raise InvalidZipError(f"Unsupported compression method: {entry.compression_method}") + + def _parse_eocd(self): + """Parse End of Central Directory record. + + Raises: + InvalidZipError: If EOCD not found or invalid + """ + if self._fh is None: + raise OZXReaderError("File not opened") + + # Search backwards from end of file for EOCD signature + search_size = min(MAX_EOCD_SEARCH_SIZE, self._file_size) + self._fh.seek(self._file_size - search_size) + data = self._fh.read(search_size) + + # Find EOCD signature (searching from end) + eocd_pos = data.rfind(ZIP_EOCD_SIG) + if eocd_pos == -1: + raise InvalidZipError("End of Central Directory not found") + + # Position in file + eocd_file_pos = self._file_size - search_size + eocd_pos + + # Parse EOCD (22 bytes minimum) + eocd = data[eocd_pos:eocd_pos + 22] + if len(eocd) < 22: + raise InvalidZipError("Truncated EOCD record") + + (disk_num, cd_disk, cd_entries_this_disk, cd_entries_total, + cd_size, cd_offset, comment_len) = struct.unpack(' 0: + comment_data = data[eocd_pos + 22:eocd_pos + 22 + comment_len] + if len(comment_data) == comment_len: + comment = comment_data.decode('utf-8', errors='replace') + + # Check for ZIP64 + if (cd_offset == ZIP64_MARKER or cd_size == ZIP64_MARKER or + cd_entries_total == ZIP64_MARKER_16): + self._is_zip64 = True + self._parse_zip64_eocd(eocd_file_pos) + else: + self._cd_offset = cd_offset + self._cd_size = cd_size + self._cd_entries_count = cd_entries_total + + # Parse ZIP comment for OZX metadata + self._metadata = self._parse_zip_comment(comment) + + def _parse_zip64_eocd(self, eocd_pos: int): + """Parse ZIP64 End of Central Directory records. + + Args: + eocd_pos: Position of standard EOCD in file + + Raises: + InvalidZipError: If ZIP64 records not found or invalid + """ + if self._fh is None: + raise OZXReaderError("File not opened") + + # Look for ZIP64 EOCD Locator (20 bytes before EOCD) + loc_pos = eocd_pos - 20 + if loc_pos < 0: + raise InvalidZipError("ZIP64 EOCD Locator not found") + + self._fh.seek(loc_pos) + locator = self._fh.read(20) + + if locator[:4] != ZIP_EOCD64_LOC_SIG: + raise InvalidZipError("Invalid ZIP64 EOCD Locator") + + # Parse locator to get ZIP64 EOCD offset + (zip64_disk, zip64_eocd_offset, total_disks) = struct.unpack( + ' Optional[OZXMetadata]: + """Parse ZIP comment for RFC-9 OME metadata. + + RFC-9 comment format: + { + "ome": { + "version": "0.5", + "zipFile": { + "centralDirectory": { + "jsonFirst": true + } + } + } + } + + Args: + comment: ZIP file comment string + + Returns: + OZXMetadata if valid, None otherwise + """ + if not comment: + return None + + try: + data = json.loads(comment) + if not isinstance(data, dict) or 'ome' not in data: + logger.debug("ZIP comment is not OME metadata") + return None + + ome = data['ome'] + if not isinstance(ome, dict) or 'version' not in ome: + logger.debug("Invalid OME metadata structure") + return None + + version = str(ome['version']) + + # Check for jsonFirst flag + json_first = False + zip_file = ome.get('zipFile', {}) + if isinstance(zip_file, dict): + cd = zip_file.get('centralDirectory', {}) + if isinstance(cd, dict): + json_first = bool(cd.get('jsonFirst', False)) + + logger.debug(f"Parsed OZX metadata: version={version}, jsonFirst={json_first}") + return OZXMetadata( + version=version, + json_first=json_first, + raw_comment=comment + ) + + except json.JSONDecodeError as e: + logger.debug(f"Failed to parse ZIP comment as JSON: {e}") + return None + + def _parse_zip64_extra(self, extra: bytes, comp_size: int, + uncomp_size: int, local_offset: int) -> tuple: + """Parse ZIP64 extra field to get actual values. + + Args: + extra: Extra field data + comp_size: Compressed size from CD (may be 0xFFFFFFFF) + uncomp_size: Uncompressed size from CD (may be 0xFFFFFFFF) + local_offset: Local header offset from CD (may be 0xFFFFFFFF) + + Returns: + Tuple of (actual_comp_size, actual_uncomp_size, actual_local_offset) + """ + offset = 0 + while offset + 4 <= len(extra): + header_id, data_size = struct.unpack(' bool: + """Check if a filename has the .ozx extension. + + Args: + filename: Filename to check + + Returns: + True if the file has a .ozx extension + """ + return filename.lower().endswith('.ozx') diff --git a/frontend/src/__tests__/unitTests/ozxDetection.test.ts b/frontend/src/__tests__/unitTests/ozxDetection.test.ts new file mode 100644 index 00000000..58b5aa91 --- /dev/null +++ b/frontend/src/__tests__/unitTests/ozxDetection.test.ts @@ -0,0 +1,170 @@ +import { describe, it, expect } from 'vitest'; +import { + isOzxFile, + isOzxFilename, + hasOzxFiles, + getOzxFiles, + getOzxFilePath +} from '@/utils/ozxDetection'; +import { detectOzxZarrVersions } from '@/queries/zarrQueries'; +import type { FileOrFolder } from '@/shared.types'; + +// Helper to create minimal FileOrFolder objects for testing +const createFile = (name: string, path?: string): FileOrFolder => ({ + name, + path: path ?? `/${name}`, + size: 1000, + is_dir: false, + permissions: 'rw-r--r--', + owner: 'test', + group: 'test', + last_modified: Date.now() +}); + +const createDir = (name: string, path?: string): FileOrFolder => ({ + name, + path: path ?? `/${name}`, + size: 0, + is_dir: true, + permissions: 'rwxr-xr-x', + owner: 'test', + group: 'test', + last_modified: Date.now() +}); + +describe('isOzxFile', () => { + it('should return true for files with .ozx extension', () => { + expect(isOzxFile(createFile('image.ozx'))).toBe(true); + expect(isOzxFile(createFile('data.OZX'))).toBe(true); + expect(isOzxFile(createFile('sample.Ozx'))).toBe(true); + }); + + it('should return false for non-ozx files', () => { + expect(isOzxFile(createFile('image.zarr'))).toBe(false); + expect(isOzxFile(createFile('data.zip'))).toBe(false); + expect(isOzxFile(createFile('file.txt'))).toBe(false); + expect(isOzxFile(createFile('ozx'))).toBe(false); + expect(isOzxFile(createFile('.ozx'))).toBe(true); // Hidden file with .ozx extension + }); + + it('should return false for directories', () => { + expect(isOzxFile(createDir('folder.ozx'))).toBe(false); + }); +}); + +describe('isOzxFilename', () => { + it('should return true for filenames with .ozx extension', () => { + expect(isOzxFilename('image.ozx')).toBe(true); + expect(isOzxFilename('data.OZX')).toBe(true); + expect(isOzxFilename('/path/to/file.ozx')).toBe(true); + }); + + it('should return false for non-ozx filenames', () => { + expect(isOzxFilename('image.zarr')).toBe(false); + expect(isOzxFilename('data.zip')).toBe(false); + }); +}); + +describe('hasOzxFiles', () => { + it('should return true if any file is an OZX file', () => { + const files = [ + createFile('image.zarr'), + createFile('data.ozx'), + createFile('text.txt') + ]; + expect(hasOzxFiles(files)).toBe(true); + }); + + it('should return false if no OZX files exist', () => { + const files = [ + createFile('image.zarr'), + createFile('data.zip'), + createFile('text.txt') + ]; + expect(hasOzxFiles(files)).toBe(false); + }); + + it('should return false for empty array', () => { + expect(hasOzxFiles([])).toBe(false); + }); +}); + +describe('getOzxFiles', () => { + it('should return only OZX files', () => { + const files = [ + createFile('image.zarr'), + createFile('data1.ozx'), + createFile('text.txt'), + createFile('data2.ozx') + ]; + const result = getOzxFiles(files); + expect(result).toHaveLength(2); + expect(result[0].name).toBe('data1.ozx'); + expect(result[1].name).toBe('data2.ozx'); + }); + + it('should return empty array if no OZX files', () => { + const files = [createFile('image.zarr'), createFile('text.txt')]; + expect(getOzxFiles(files)).toEqual([]); + }); +}); + +describe('getOzxFilePath', () => { + it('should return path without leading slash', () => { + const file = createFile('data.ozx', '/path/to/data.ozx'); + expect(getOzxFilePath(file)).toBe('path/to/data.ozx'); + }); + + it('should return path unchanged if no leading slash', () => { + const file = createFile('data.ozx', 'path/to/data.ozx'); + expect(getOzxFilePath(file)).toBe('path/to/data.ozx'); + }); +}); + +describe('detectOzxZarrVersions', () => { + it('should detect zarr v3 when zarr.json exists at root', () => { + const files = ['zarr.json', '0/zarr.json', '0/c/0/0/0']; + expect(detectOzxZarrVersions(files)).toEqual(['v3']); + }); + + it('should detect zarr v2 when .zarray exists at root', () => { + const files = ['.zarray', '.zattrs', '0/0']; + expect(detectOzxZarrVersions(files)).toEqual(['v2']); + }); + + it('should detect zarr v2 when .zattrs exists at root', () => { + const files = ['.zattrs', '0/.zarray', '0/0/0']; + expect(detectOzxZarrVersions(files)).toEqual(['v2']); + }); + + it('should detect both versions when both exist', () => { + const files = ['zarr.json', '.zarray', '0/c/0/0/0']; + expect(detectOzxZarrVersions(files)).toEqual(['v2', 'v3']); + }); + + it('should return empty array when no zarr files', () => { + const files = ['data.txt', 'image.png']; + expect(detectOzxZarrVersions(files)).toEqual([]); + }); + + it('should return empty array for empty file list', () => { + expect(detectOzxZarrVersions([])).toEqual([]); + }); + + it('should detect version from nested paths', () => { + const files = ['folder/zarr.json', 'folder/.zattrs']; + // Nested paths are detected because we check for files ending with /name + // This allows detection of zarr data at any level in the archive + const result = detectOzxZarrVersions(files); + expect(result).toContain('v3'); // folder/zarr.json + expect(result).toContain('v2'); // folder/.zattrs + }); + + it('should detect from paths ending with marker files', () => { + const files = ['root/zarr.json', 'root/.zattrs']; + // Paths ending with /zarr.json should be detected + const result = detectOzxZarrVersions(files); + expect(result).toContain('v3'); + expect(result).toContain('v2'); + }); +}); diff --git a/frontend/src/queries/ozxQueries.ts b/frontend/src/queries/ozxQueries.ts new file mode 100644 index 00000000..e89dafd3 --- /dev/null +++ b/frontend/src/queries/ozxQueries.ts @@ -0,0 +1,343 @@ +/** + * OZX (Zipped OME-Zarr) query hooks and store implementation. + * + * RFC-9 Spec: https://ngff.openmicroscopy.org/rfc/9/index.html + */ + +import { useQuery } from '@tanstack/react-query'; +import type { UseQueryResult } from '@tanstack/react-query'; +import { default as log } from '@/logger'; +import { buildUrl, sendFetchRequest } from '@/utils'; +import { sendRequestAndThrowForNotOk } from './queryUtils'; + +/** + * Metadata response from the OZX metadata endpoint. + */ +export type OzxMetadataResponse = { + version: string | null; + json_first: boolean; + file_count: number; + is_zip64: boolean; +}; + +/** + * Build URL for accessing content within an OZX file. + * + * @param fspName - The file share path name + * @param ozxFilePath - Path to the OZX file within the FSP + * @param internalPath - Path within the OZX archive + * @returns Properly encoded URL for the OZX content endpoint + */ +export function buildOzxContentUrl( + fspName: string, + ozxFilePath: string, + internalPath: string +): string { + // Build the path segment: fspName/ozxFilePath + const pathSegment = `${fspName}/${ozxFilePath}`; + return buildUrl('/api/ozx-content/', pathSegment, { subpath: internalPath }); +} + +/** + * Build full URL for accessing content within an OZX file. + * Returns absolute URL suitable for external use (e.g., zarrita stores). + * + * @param fspName - The file share path name + * @param ozxFilePath - Path to the OZX file within the FSP + * @param internalPath - Path within the OZX archive + * @returns Absolute URL + */ +export function getOzxContentUrl( + fspName: string, + ozxFilePath: string, + internalPath: string +): string { + const relativePath = buildOzxContentUrl(fspName, ozxFilePath, internalPath); + return new URL(relativePath, window.location.origin).href; +} + +/** + * Build URL for the OZX metadata endpoint. + */ +export function buildOzxMetadataUrl( + fspName: string, + ozxFilePath: string +): string { + const pathSegment = `${fspName}/${ozxFilePath}`; + return buildUrl('/api/ozx-metadata/', pathSegment, null); +} + +/** + * Build URL for listing files in an OZX archive. + */ +export function buildOzxListUrl( + fspName: string, + ozxFilePath: string, + prefix?: string +): string { + const pathSegment = `${fspName}/${ozxFilePath}`; + const params = prefix ? { prefix } : null; + return buildUrl('/api/ozx-list/', pathSegment, params); +} + +/** + * Fetch OZX metadata from the backend. + */ +async function fetchOzxMetadata( + fspName: string, + ozxFilePath: string +): Promise { + const url = buildOzxMetadataUrl(fspName, ozxFilePath); + const response = (await sendRequestAndThrowForNotOk( + url, + 'GET' + )) as OzxMetadataResponse; + return response; +} + +/** + * Hook to fetch OZX archive metadata. + * + * @param fspName - The file share path name + * @param ozxFilePath - Path to the OZX file within the FSP + * @param enabled - Whether the query should be enabled + */ +export function useOzxMetadataQuery( + fspName: string | undefined, + ozxFilePath: string | undefined, + enabled: boolean = true +): UseQueryResult { + return useQuery({ + queryKey: ['ozx', 'metadata', fspName || '', ozxFilePath || ''], + queryFn: async () => { + if (!fspName || !ozxFilePath) { + throw new Error('fspName and ozxFilePath are required'); + } + return await fetchOzxMetadata(fspName, ozxFilePath); + }, + enabled: enabled && !!fspName && !!ozxFilePath, + staleTime: 5 * 60 * 1000 // 5 minutes - OZX metadata doesn't change often + }); +} + +/** + * Fetch list of files in an OZX archive. + */ +async function fetchOzxFileList( + fspName: string, + ozxFilePath: string, + prefix?: string +): Promise { + const url = buildOzxListUrl(fspName, ozxFilePath, prefix); + const response = (await sendRequestAndThrowForNotOk(url, 'GET')) as { + files: string[]; + }; + return response.files; +} + +/** + * Hook to fetch list of files in an OZX archive. + * + * @param fspName - The file share path name + * @param ozxFilePath - Path to the OZX file within the FSP + * @param prefix - Optional prefix to filter files + * @param enabled - Whether the query should be enabled + */ +export function useOzxFileListQuery( + fspName: string | undefined, + ozxFilePath: string | undefined, + prefix?: string, + enabled: boolean = true +): UseQueryResult { + return useQuery({ + queryKey: ['ozx', 'files', fspName || '', ozxFilePath || '', prefix || ''], + queryFn: async () => { + if (!fspName || !ozxFilePath) { + throw new Error('fspName and ozxFilePath are required'); + } + return await fetchOzxFileList(fspName, ozxFilePath, prefix); + }, + enabled: enabled && !!fspName && !!ozxFilePath, + staleTime: 5 * 60 * 1000 + }); +} + +/** + * Fetch content from within an OZX file. + * Supports optional range requests. + */ +export async function fetchOzxContent( + fspName: string, + ozxFilePath: string, + internalPath: string, + options?: { + signal?: AbortSignal; + rangeStart?: number; + rangeEnd?: number; + } +): Promise { + const url = buildOzxContentUrl(fspName, ozxFilePath, internalPath); + + const headers: HeadersInit = {}; + if (options?.rangeStart !== undefined && options?.rangeEnd !== undefined) { + headers['Range'] = `bytes=${options.rangeStart}-${options.rangeEnd}`; + } + + const response = await fetch(url, { + method: 'GET', + credentials: 'include', + headers, + signal: options?.signal + }); + + if (!response.ok && response.status !== 206) { + throw new Error(`Failed to fetch OZX content: ${response.status}`); + } + + return new Uint8Array(await response.arrayBuffer()); +} + +/** + * A store implementation compatible with zarrita that reads from OZX archives + * via the Fileglancer OZX API endpoints. + * + * This allows existing zarrita-based code to transparently read from OZX files. + */ +export class OzxFetchStore { + private fspName: string; + private ozxPath: string; + private baseUrl: string; + + /** + * Create a new OzxFetchStore. + * + * @param fspName - The file share path name + * @param ozxPath - Path to the OZX file within the FSP + */ + constructor(fspName: string, ozxPath: string) { + this.fspName = fspName; + this.ozxPath = ozxPath; + // Compute base URL for logging + this.baseUrl = getOzxContentUrl(fspName, ozxPath, ''); + log.debug('Created OzxFetchStore for', this.baseUrl); + } + + /** + * Get full content of a file within the OZX archive. + * + * @param key - Path within the archive (e.g., "zarr.json", "0/c/0/0/0") + * @returns File content as Uint8Array, or undefined if not found + */ + async get(key: string): Promise { + try { + const url = buildOzxContentUrl(this.fspName, this.ozxPath, key); + const response = await sendFetchRequest(url, 'GET'); + + if (!response.ok) { + if (response.status === 404) { + return undefined; + } + throw new Error(`Failed to fetch ${key}: ${response.status}`); + } + + return new Uint8Array(await response.arrayBuffer()); + } catch (error) { + log.debug(`OzxFetchStore.get(${key}) error:`, error); + return undefined; + } + } + + /** + * Get a byte range from a file within the OZX archive. + * This is the key method for efficient chunk access. + * + * @param key - Path within the archive + * @param offset - Starting byte offset + * @param length - Number of bytes to read + * @returns File content range as Uint8Array, or undefined if not found + */ + async getRange( + key: string, + offset: number, + length: number + ): Promise { + try { + const url = buildOzxContentUrl(this.fspName, this.ozxPath, key); + const response = await fetch(url, { + method: 'GET', + credentials: 'include', + headers: { + Range: `bytes=${offset}-${offset + length - 1}` + } + }); + + if (!response.ok && response.status !== 206) { + if (response.status === 404) { + return undefined; + } + throw new Error( + `Failed to fetch range from ${key}: ${response.status}` + ); + } + + return new Uint8Array(await response.arrayBuffer()); + } catch (error) { + log.debug( + `OzxFetchStore.getRange(${key}, ${offset}, ${length}) error:`, + error + ); + return undefined; + } + } + + /** + * Check if a file exists in the OZX archive. + * + * @param key - Path within the archive + * @returns True if the file exists + */ + async has(key: string): Promise { + try { + const url = buildOzxContentUrl(this.fspName, this.ozxPath, key); + const response = await fetch(url, { + method: 'HEAD', + credentials: 'include' + }); + return response.ok; + } catch { + return false; + } + } + + /** + * List files in the OZX archive with optional prefix filter. + * + * @param prefix - Optional prefix to filter files + * @returns Array of file paths + */ + async list(prefix?: string): Promise { + return await fetchOzxFileList(this.fspName, this.ozxPath, prefix); + } + + /** + * Get the base URL for this store (for debugging/logging). + */ + getBaseUrl(): string { + return this.baseUrl; + } +} + +/** + * Create an OzxFetchStore for the given file. + * This is a factory function for creating stores. + * + * @param fspName - The file share path name + * @param ozxFilePath - Path to the OZX file within the FSP + * @returns OzxFetchStore instance + */ +export function createOzxStore( + fspName: string, + ozxFilePath: string +): OzxFetchStore { + return new OzxFetchStore(fspName, ozxFilePath); +} diff --git a/frontend/src/utils/ozxDetection.ts b/frontend/src/utils/ozxDetection.ts new file mode 100644 index 00000000..2f9c8b68 --- /dev/null +++ b/frontend/src/utils/ozxDetection.ts @@ -0,0 +1,63 @@ +/** + * OZX (Zipped OME-Zarr) file detection utilities. + * + * RFC-9 Spec: https://ngff.openmicroscopy.org/rfc/9/index.html + */ + +import type { FileOrFolder } from '@/shared.types'; + +/** + * Check if a file is an OZX (Zipped OME-Zarr) file by extension. + * + * @param file - The file to check + * @returns True if the file has a .ozx extension + */ +export function isOzxFile(file: FileOrFolder): boolean { + return !file.is_dir && file.name.toLowerCase().endsWith('.ozx'); +} + +/** + * Check if a filename has the .ozx extension. + * + * @param filename - The filename to check + * @returns True if the filename ends with .ozx + */ +export function isOzxFilename(filename: string): boolean { + return filename.toLowerCase().endsWith('.ozx'); +} + +/** + * Check if a list of files contains any OZX files. + * + * @param files - Array of files to check + * @returns True if at least one file is an OZX file + */ +export function hasOzxFiles(files: FileOrFolder[]): boolean { + return files.some(isOzxFile); +} + +/** + * Get all OZX files from a list of files. + * + * @param files - Array of files to filter + * @returns Array containing only the OZX files + */ +export function getOzxFiles(files: FileOrFolder[]): FileOrFolder[] { + return files.filter(isOzxFile); +} + +/** + * Extract the path from a file for OZX API calls. + * Removes leading slashes and normalizes the path. + * + * @param file - The file to get the path from + * @returns Normalized path suitable for API calls + */ +export function getOzxFilePath(file: FileOrFolder): string { + let path = file.path; + // Remove leading slash if present + if (path.startsWith('/')) { + path = path.slice(1); + } + return path; +} diff --git a/tests/test_ozxzip.py b/tests/test_ozxzip.py new file mode 100644 index 00000000..2edc42fc --- /dev/null +++ b/tests/test_ozxzip.py @@ -0,0 +1,576 @@ +"""Tests for the OZX ZIP reader module.""" + +import os +import io +import struct +import json +import zlib +import tempfile +import pytest + +from fileglancer.ozxzip import ( + OZXReader, + OZXMetadata, + ZipEntry, + OZXReaderError, + InvalidZipError, + InvalidOZXError, + is_ozx_file, + ZIP_LOCAL_HEADER_SIG, + ZIP_CD_SIG, + ZIP_EOCD_SIG, + ZIP_EOCD64_SIG, + ZIP_EOCD64_LOC_SIG, + COMPRESSION_STORED, + COMPRESSION_DEFLATE, +) + + +def create_zip_local_header(filename: bytes, data: bytes, compression: int = COMPRESSION_STORED) -> bytes: + """Create a ZIP local file header.""" + crc = zlib.crc32(data) & 0xFFFFFFFF + + if compression == COMPRESSION_DEFLATE: + compressor = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, zlib.DEFLATED, -zlib.MAX_WBITS) + compressed = compressor.compress(data) + compressor.flush() + comp_size = len(compressed) + data_to_write = compressed + else: + comp_size = len(data) + data_to_write = data + + uncomp_size = len(data) + + header = struct.pack( + '<4sHHHHHLLLHH', + ZIP_LOCAL_HEADER_SIG, + 20, # version needed + 0, # flags + compression, + 0, # mod time + 0, # mod date + crc, + comp_size, + uncomp_size, + len(filename), + 0 # extra field length + ) + return header + filename + data_to_write, crc, comp_size, uncomp_size + + +def create_zip_cd_entry(filename: bytes, crc: int, comp_size: int, uncomp_size: int, + local_offset: int, compression: int = COMPRESSION_STORED) -> bytes: + """Create a ZIP central directory entry.""" + header = struct.pack( + '<4sHHHHHHLLLHHHHHLL', + ZIP_CD_SIG, + 20, # version made by + 20, # version needed + 0, # flags + compression, + 0, # mod time + 0, # mod date + crc, + comp_size, + uncomp_size, + len(filename), + 0, # extra field length + 0, # comment length + 0, # disk number start + 0, # internal attributes + 0, # external attributes + local_offset + ) + return header + filename + + +def create_zip_eocd(cd_entries: int, cd_size: int, cd_offset: int, comment: bytes = b'') -> bytes: + """Create a ZIP end of central directory record.""" + return struct.pack( + '<4sHHHHLLH', + ZIP_EOCD_SIG, + 0, # disk number + 0, # disk with CD + cd_entries, + cd_entries, + cd_size, + cd_offset, + len(comment) + ) + comment + + +def create_simple_ozx(files: dict, comment: str = None) -> bytes: + """Create a simple OZX (ZIP) file with the given files. + + Args: + files: Dictionary mapping filenames to file contents + comment: Optional ZIP comment (for OZX metadata) + + Returns: + bytes: Complete ZIP file data + """ + data = io.BytesIO() + cd_entries = [] + local_offsets = [] + + # Write local file headers and data + for filename, content in files.items(): + filename_bytes = filename.encode('utf-8') + offset = data.tell() + local_offsets.append(offset) + + local_data, crc, comp_size, uncomp_size = create_zip_local_header( + filename_bytes, content.encode('utf-8') if isinstance(content, str) else content + ) + data.write(local_data) + cd_entries.append((filename_bytes, crc, comp_size, uncomp_size, offset)) + + # Write central directory + cd_start = data.tell() + for filename_bytes, crc, comp_size, uncomp_size, offset in cd_entries: + cd_entry = create_zip_cd_entry(filename_bytes, crc, comp_size, uncomp_size, offset) + data.write(cd_entry) + cd_size = data.tell() - cd_start + + # Write EOCD + comment_bytes = comment.encode('utf-8') if comment else b'' + eocd = create_zip_eocd(len(files), cd_size, cd_start, comment_bytes) + data.write(eocd) + + return data.getvalue() + + +@pytest.fixture +def temp_ozx_file(): + """Create a temporary OZX file for testing.""" + files = { + 'zarr.json': '{"zarr_format": 3, "node_type": "group"}', + '0/zarr.json': '{"zarr_format": 3, "node_type": "array"}', + '0/c/0/0/0': b'\x00' * 100, # Binary chunk data + } + comment = json.dumps({ + "ome": { + "version": "0.5", + "zipFile": { + "centralDirectory": { + "jsonFirst": True + } + } + } + }) + + zip_data = create_simple_ozx(files, comment) + + with tempfile.NamedTemporaryFile(suffix='.ozx', delete=False) as f: + f.write(zip_data) + temp_path = f.name + + yield temp_path + + # Cleanup + if os.path.exists(temp_path): + os.unlink(temp_path) + + +@pytest.fixture +def temp_ozx_no_metadata(): + """Create a temporary OZX file without OME metadata.""" + files = { + 'data.txt': 'Hello, World!', + 'folder/nested.txt': 'Nested content', + } + + zip_data = create_simple_ozx(files) + + with tempfile.NamedTemporaryFile(suffix='.ozx', delete=False) as f: + f.write(zip_data) + temp_path = f.name + + yield temp_path + + if os.path.exists(temp_path): + os.unlink(temp_path) + + +class TestOZXReaderBasics: + """Test basic OZXReader functionality.""" + + def test_is_ozx_file(self): + """Test is_ozx_file helper function.""" + assert is_ozx_file('test.ozx') is True + assert is_ozx_file('test.OZX') is True + assert is_ozx_file('path/to/file.ozx') is True + assert is_ozx_file('test.zip') is False + assert is_ozx_file('test.zarr') is False + assert is_ozx_file('ozx') is False + + def test_open_close(self, temp_ozx_file): + """Test opening and closing OZX file.""" + reader = OZXReader(temp_ozx_file) + reader.open() + assert reader._fh is not None + assert reader.file_size > 0 + reader.close() + assert reader._fh is None + + def test_context_manager(self, temp_ozx_file): + """Test using OZXReader as context manager.""" + with OZXReader(temp_ozx_file) as reader: + assert reader._fh is not None + assert reader.file_size > 0 + assert reader._fh is None + + def test_file_not_found(self): + """Test opening non-existent file.""" + reader = OZXReader('/nonexistent/path/file.ozx') + with pytest.raises(FileNotFoundError): + reader.open() + + +class TestOZXMetadataParsing: + """Test OZX metadata parsing from ZIP comment.""" + + def test_parse_ome_metadata(self, temp_ozx_file): + """Test parsing OME metadata from ZIP comment.""" + with OZXReader(temp_ozx_file) as reader: + metadata = reader.get_metadata() + assert metadata is not None + assert metadata.version == "0.5" + assert metadata.json_first is True + + def test_no_metadata(self, temp_ozx_no_metadata): + """Test OZX file without OME metadata.""" + with OZXReader(temp_ozx_no_metadata) as reader: + metadata = reader.get_metadata() + assert metadata is None + + def test_invalid_json_comment(self): + """Test OZX file with invalid JSON comment.""" + files = {'test.txt': 'content'} + zip_data = create_simple_ozx(files, "not valid json") + + with tempfile.NamedTemporaryFile(suffix='.ozx', delete=False) as f: + f.write(zip_data) + temp_path = f.name + + try: + with OZXReader(temp_path) as reader: + metadata = reader.get_metadata() + assert metadata is None + finally: + os.unlink(temp_path) + + def test_json_without_ome_key(self): + """Test OZX file with JSON comment but no 'ome' key.""" + files = {'test.txt': 'content'} + comment = json.dumps({"other": "data"}) + zip_data = create_simple_ozx(files, comment) + + with tempfile.NamedTemporaryFile(suffix='.ozx', delete=False) as f: + f.write(zip_data) + temp_path = f.name + + try: + with OZXReader(temp_path) as reader: + metadata = reader.get_metadata() + assert metadata is None + finally: + os.unlink(temp_path) + + +class TestCentralDirectory: + """Test central directory parsing.""" + + def test_parse_central_directory(self, temp_ozx_file): + """Test parsing central directory.""" + with OZXReader(temp_ozx_file) as reader: + entries = reader.parse_central_directory() + assert 'zarr.json' in entries + assert '0/zarr.json' in entries + assert '0/c/0/0/0' in entries + + def test_entry_properties(self, temp_ozx_file): + """Test ZipEntry properties.""" + with OZXReader(temp_ozx_file) as reader: + entries = reader.parse_central_directory() + + json_entry = entries['zarr.json'] + assert json_entry.is_json_file is True + assert json_entry.is_directory is False + + chunk_entry = entries['0/c/0/0/0'] + assert chunk_entry.is_json_file is False + assert chunk_entry.uncompressed_size == 100 + + def test_json_first_optimization(self, temp_ozx_file): + """Test jsonFirst optimization stops at first non-JSON file.""" + with OZXReader(temp_ozx_file) as reader: + metadata = reader.get_metadata() + assert metadata.json_first is True + + # Parse with json_only=True + entries = reader.parse_central_directory(json_only=True) + + # Should have stopped before the binary chunk + # The exact behavior depends on the order in the central directory + assert 'zarr.json' in entries + + def test_list_files(self, temp_ozx_file): + """Test listing files in archive.""" + with OZXReader(temp_ozx_file) as reader: + files = reader.list_files() + assert 'zarr.json' in files + assert '0/zarr.json' in files + assert '0/c/0/0/0' in files + + def test_list_files_with_prefix(self, temp_ozx_file): + """Test listing files with prefix filter.""" + with OZXReader(temp_ozx_file) as reader: + files = reader.list_files(prefix='0/') + assert '0/zarr.json' in files + assert '0/c/0/0/0' in files + assert 'zarr.json' not in files + + def test_get_entry(self, temp_ozx_file): + """Test getting specific entry.""" + with OZXReader(temp_ozx_file) as reader: + entry = reader.get_entry('zarr.json') + assert entry is not None + assert entry.filename == 'zarr.json' + + missing = reader.get_entry('nonexistent.txt') + assert missing is None + + +class TestFileReading: + """Test reading files from archive.""" + + def test_read_file(self, temp_ozx_file): + """Test reading entire file.""" + with OZXReader(temp_ozx_file) as reader: + content = reader.read_file('zarr.json') + data = json.loads(content.decode('utf-8')) + assert data['zarr_format'] == 3 + assert data['node_type'] == 'group' + + def test_read_binary_file(self, temp_ozx_file): + """Test reading binary file.""" + with OZXReader(temp_ozx_file) as reader: + content = reader.read_file('0/c/0/0/0') + assert len(content) == 100 + assert content == b'\x00' * 100 + + def test_read_nonexistent_file(self, temp_ozx_file): + """Test reading nonexistent file.""" + with OZXReader(temp_ozx_file) as reader: + with pytest.raises(FileNotFoundError): + reader.read_file('nonexistent.txt') + + def test_stream_file(self, temp_ozx_file): + """Test streaming file content.""" + with OZXReader(temp_ozx_file) as reader: + chunks = list(reader.stream_file('zarr.json', buffer_size=10)) + content = b''.join(chunks) + data = json.loads(content.decode('utf-8')) + assert data['zarr_format'] == 3 + + +class TestRangeRequests: + """Test range request functionality.""" + + def test_stream_file_range(self, temp_ozx_no_metadata): + """Test streaming a range of file content.""" + with OZXReader(temp_ozx_no_metadata) as reader: + # "Hello, World!" = 13 bytes + # Get bytes 0-4 = "Hello" + content = b''.join(reader.stream_file_range('data.txt', 0, 4)) + assert content == b'Hello' + + def test_stream_file_range_middle(self, temp_ozx_no_metadata): + """Test streaming from middle of file.""" + with OZXReader(temp_ozx_no_metadata) as reader: + # Get bytes 7-11 = "World" + content = b''.join(reader.stream_file_range('data.txt', 7, 11)) + assert content == b'World' + + def test_stream_file_range_full(self, temp_ozx_no_metadata): + """Test streaming full file via range.""" + with OZXReader(temp_ozx_no_metadata) as reader: + content = b''.join(reader.stream_file_range('data.txt', 0, 12)) + assert content == b'Hello, World!' + + def test_stream_file_range_past_end(self, temp_ozx_no_metadata): + """Test range extending past end of file.""" + with OZXReader(temp_ozx_no_metadata) as reader: + # Request beyond file size - should clamp to file end + content = b''.join(reader.stream_file_range('data.txt', 7, 100)) + assert content == b'World!' + + def test_stream_file_range_invalid(self, temp_ozx_no_metadata): + """Test invalid range requests.""" + with OZXReader(temp_ozx_no_metadata) as reader: + with pytest.raises(ValueError): + list(reader.stream_file_range('data.txt', -1, 5)) + + with pytest.raises(ValueError): + list(reader.stream_file_range('data.txt', 10, 5)) + + +class TestCompression: + """Test handling of compressed files.""" + + def test_deflate_compression(self): + """Test reading DEFLATE compressed files.""" + # Create a ZIP with compressed content + content = b'Hello, this is some test content that should compress well. ' * 10 + filename = b'compressed.txt' + + data = io.BytesIO() + + # Write local header with compression + local_data, crc, comp_size, uncomp_size = create_zip_local_header( + filename, content, compression=COMPRESSION_DEFLATE + ) + data.write(local_data) + + # Write central directory + cd_start = data.tell() + cd_entry = create_zip_cd_entry(filename, crc, comp_size, uncomp_size, 0, compression=COMPRESSION_DEFLATE) + data.write(cd_entry) + cd_size = data.tell() - cd_start + + # Write EOCD + eocd = create_zip_eocd(1, cd_size, cd_start) + data.write(eocd) + + with tempfile.NamedTemporaryFile(suffix='.ozx', delete=False) as f: + f.write(data.getvalue()) + temp_path = f.name + + try: + with OZXReader(temp_path) as reader: + read_content = reader.read_file('compressed.txt') + assert read_content == content + + # Test streaming too + streamed = b''.join(reader.stream_file('compressed.txt')) + assert streamed == content + finally: + os.unlink(temp_path) + + +class TestZipEntry: + """Test ZipEntry dataclass.""" + + def test_is_directory(self): + """Test is_directory property.""" + dir_entry = ZipEntry( + filename='folder/', + compressed_size=0, + uncompressed_size=0, + compression_method=0, + local_header_offset=0, + crc32=0 + ) + assert dir_entry.is_directory is True + + file_entry = ZipEntry( + filename='file.txt', + compressed_size=100, + uncompressed_size=100, + compression_method=0, + local_header_offset=0, + crc32=123456 + ) + assert file_entry.is_directory is False + + def test_is_json_file(self): + """Test is_json_file property.""" + test_cases = [ + ('zarr.json', True), + ('.zarray', True), + ('.zattrs', True), + ('.zgroup', True), + ('data/zarr.JSON', True), # case insensitive + ('data.txt', False), + ('image.png', False), + ('c/0/0/0', False), + ] + + for filename, expected in test_cases: + entry = ZipEntry( + filename=filename, + compressed_size=0, + uncompressed_size=0, + compression_method=0, + local_header_offset=0, + crc32=0 + ) + assert entry.is_json_file is expected, f"Failed for {filename}" + + +class TestOZXMetadata: + """Test OZXMetadata dataclass.""" + + def test_metadata_creation(self): + """Test creating OZXMetadata.""" + metadata = OZXMetadata(version="0.5", json_first=True) + assert metadata.version == "0.5" + assert metadata.json_first is True + + def test_metadata_defaults(self): + """Test default values.""" + metadata = OZXMetadata(version="0.4") + assert metadata.json_first is False + assert metadata.raw_comment is None + + +class TestEdgeCases: + """Test edge cases and error handling.""" + + def test_empty_archive(self): + """Test handling of empty archive.""" + files = {} + zip_data = create_simple_ozx(files) + + with tempfile.NamedTemporaryFile(suffix='.ozx', delete=False) as f: + f.write(zip_data) + temp_path = f.name + + try: + with OZXReader(temp_path) as reader: + entries = reader.parse_central_directory() + assert len(entries) == 0 + files = reader.list_files() + assert len(files) == 0 + finally: + os.unlink(temp_path) + + def test_reader_not_opened(self): + """Test error when reader not opened.""" + reader = OZXReader('/some/path.ozx') + with pytest.raises(OZXReaderError): + reader.parse_central_directory() + + def test_unicode_filenames(self): + """Test handling of Unicode filenames.""" + files = { + 'data/ζ—₯本θͺž.txt': 'Japanese text', + 'data/emoji_πŸŽ‰.txt': 'Party!', + } + zip_data = create_simple_ozx(files) + + with tempfile.NamedTemporaryFile(suffix='.ozx', delete=False) as f: + f.write(zip_data) + temp_path = f.name + + try: + with OZXReader(temp_path) as reader: + entries = reader.parse_central_directory() + assert 'data/ζ—₯本θͺž.txt' in entries + assert 'data/emoji_πŸŽ‰.txt' in entries + + content = reader.read_file('data/ζ—₯本θͺž.txt') + assert content.decode('utf-8') == 'Japanese text' + finally: + os.unlink(temp_path) From 33baf099850c9a84824a8e7369b456604a88cf74 Mon Sep 17 00:00:00 2001 From: Mark Kittisopikul Date: Wed, 28 Jan 2026 02:09:21 -0500 Subject: [PATCH 2/9] Remove Zarr v2 support from OZX --- docs/RFC-9-OZX-Implementation.md | 285 ++++++++++++++++++ fileglancer/app.py | 273 +++++++++++++++++ fileglancer/model.py | 38 +++ .../__tests__/unitTests/ozxDetection.test.ts | 34 +-- frontend/src/queries/zarrQueries.ts | 203 ++++++++++++- 5 files changed, 813 insertions(+), 20 deletions(-) create mode 100644 docs/RFC-9-OZX-Implementation.md diff --git a/docs/RFC-9-OZX-Implementation.md b/docs/RFC-9-OZX-Implementation.md new file mode 100644 index 00000000..c539e537 --- /dev/null +++ b/docs/RFC-9-OZX-Implementation.md @@ -0,0 +1,285 @@ +# RFC-9 Zipped OME-Zarr (.ozx) Implementation + +**Date**: 2026-01-28 +**RFC Spec**: https://ngff.openmicroscopy.org/rfc/9/index.html +**OME-Zarr v0.5 Spec**: https://ngff.openmicroscopy.org/0.5/index.html + +## Overview + +This document describes the implementation of RFC-9 support for reading OME-Zarr data from ZIP archives (`.ozx` files) in Fileglancer. The implementation allows users to browse, preview, and access OME-Zarr imaging data stored in compressed ZIP archives without extracting them. + +**Important**: RFC-9 is designed specifically for OME-Zarr v0.5, which is built on **Zarr v3 only**. This implementation does not support Zarr v2 within OZX files. + +## Architecture + +``` +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ Frontend β”‚ +β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ +β”‚ β”‚ ozxDetection.ts │───▢│ OzxFetchStore│───▢│ zarrita/ome-zarrβ”‚ β”‚ +β”‚ β”‚ (detection) β”‚ β”‚ (custom store)β”‚ β”‚ (existing) β”‚ β”‚ +β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ +β”‚ β”‚ β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”‚β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ + β”‚ HTTP + Range requests + β–Ό +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ Backend β”‚ +β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ +β”‚ β”‚ /api/ozx-content/ │───▢│ OZXReader (ozxzip.py) β”‚ β”‚ +β”‚ β”‚ /api/ozx-metadata/ β”‚ β”‚ - ZIP64 support β”‚ β”‚ +β”‚ β”‚ /api/ozx-list/ β”‚ β”‚ - Partial CD parsing β”‚ β”‚ +β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ - Range request streaming β”‚ β”‚ +β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ +``` + +## Files Created/Modified + +### Backend (Python) + +| File | Action | Description | +| ----------------------- | ---------- | ---------------------------------------- | +| `fileglancer/ozxzip.py` | **CREATE** | RFC-9 ZIP reader with partial CD parsing | +| `fileglancer/app.py` | MODIFY | Add `/api/ozx-*` endpoints | +| `fileglancer/model.py` | MODIFY | Add OZX Pydantic models | +| `tests/test_ozxzip.py` | **CREATE** | Unit tests for OZXReader (31 tests) | + +### Frontend (TypeScript) + +| File | Action | Description | +| ------------------------------------------------------- | ---------- | -------------------------------------- | +| `frontend/src/utils/ozxDetection.ts` | **CREATE** | `.ozx` file detection utilities | +| `frontend/src/queries/ozxQueries.ts` | **CREATE** | TanStack Query hooks and OzxFetchStore | +| `frontend/src/queries/zarrQueries.ts` | MODIFY | OZX detection integration | +| `frontend/src/__tests__/unitTests/ozxDetection.test.ts` | **CREATE** | Frontend detection tests (20 tests) | + +## Backend Implementation Details + +### OZXReader (`fileglancer/ozxzip.py`) + +The core ZIP reader implements: + +1. **EOCD Parsing**: Locates End of Central Directory record by scanning backwards from file end +2. **ZIP64 Support**: Handles large archives with ZIP64 extended fields +3. **OME Metadata**: Parses ZIP comment for RFC-9 OME metadata JSON +4. **jsonFirst Optimization**: When `jsonFirst=true` in metadata, stops parsing central directory after last JSON file +5. **Compression**: Supports STORE (uncompressed) and DEFLATE compression methods +6. **Range Streaming**: Efficient byte-range streaming for HTTP Range requests + +Key classes: + +- `OZXReader`: Main reader class with context manager support +- `OZXMetadata`: Parsed OME metadata from ZIP comment +- `ZipEntry`: Individual file entry from central directory + +### API Endpoints + +#### `GET /api/ozx-content/{path_name:path}?subpath={internal_path}` + +Streams file content from within an OZX archive. Supports HTTP Range requests for efficient chunk access. + +**Response Headers**: + +- `Accept-Ranges: bytes` +- `Content-Length: {size}` +- `Content-Range: bytes {start}-{end}/{total}` (for 206 responses) + +#### `HEAD /api/ozx-content/{path_name:path}?subpath={internal_path}` + +Returns file metadata without content body. + +#### `GET /api/ozx-metadata/{path_name:path}` + +Returns OZX archive metadata: + +```json +{ + "version": "0.5", + "json_first": true, + "file_count": 42, + "is_zip64": false +} +``` + +#### `GET /api/ozx-list/{path_name:path}?prefix={optional_prefix}` + +Lists files in the OZX archive: + +```json +{ + "files": ["zarr.json", "0/zarr.json", "0/c/0/0/0", ...] +} +``` + +## Frontend Implementation Details + +### Detection Utilities (`ozxDetection.ts`) + +```typescript +// Check if a file is an OZX file +isOzxFile(file: FileOrFolder): boolean + +// Check filename extension +isOzxFilename(filename: string): boolean + +// Check if array contains OZX files +hasOzxFiles(files: FileOrFolder[]): boolean + +// Filter to get only OZX files +getOzxFiles(files: FileOrFolder[]): FileOrFolder[] +``` + +### OzxFetchStore (`ozxQueries.ts`) + +A zarrita-compatible store that reads from OZX archives via the API: + +```typescript +class OzxFetchStore { + constructor(fspName: string, ozxPath: string); + + // Get full file content + async get(key: string): Promise; + + // Get byte range (for efficient chunk access) + async getRange( + key: string, + offset: number, + length: number + ): Promise; + + // Check if file exists + async has(key: string): Promise; + + // List files with optional prefix + async list(prefix?: string): Promise; +} +``` + +### Query Hooks + +```typescript +// Fetch OZX archive metadata +useOzxMetadataQuery(fspName, ozxFilePath, enabled?) + +// Fetch list of files in OZX +useOzxFileListQuery(fspName, ozxFilePath, prefix?, enabled?) + +// Fetch Zarr v3 metadata from OZX file (RFC-9 requires Zarr v3) +useOzxZarrMetadataQuery({ fspName, ozxFile }) +``` + +### Zarr Version Detection + +```typescript +// Detects Zarr v3 in OZX archives (RFC-9 requires Zarr v3 only) +detectOzxZarrVersions(files: string[]): ('v3')[] +``` + +Note: Unlike regular Zarr directories which can be v2 or v3, OZX files per RFC-9 only support Zarr v3 (OME-Zarr v0.5). The detection function only looks for `zarr.json` files and ignores Zarr v2 markers (`.zarray`, `.zattrs`, `.zgroup`). + +## RFC-9 ZIP Comment Format + +The OZX file's ZIP comment contains OME metadata: + +```json +{ + "ome": { + "version": "0.5", + "zipFile": { + "centralDirectory": { + "jsonFirst": true + } + } + } +} +``` + +When `jsonFirst` is true, JSON metadata files (.json, .zattrs, .zarray, .zgroup) are sorted first in the central directory, allowing partial parsing for metadata discovery. + +## Testing + +### Backend Tests + +```bash +pixi run -e test pytest tests/test_ozxzip.py -v +``` + +Tests cover: + +- Basic reader operations (open, close, context manager) +- Metadata parsing (valid, missing, invalid JSON) +- Central directory parsing and jsonFirst optimization +- File reading (text, binary, compressed) +- Range request streaming +- ZIP64 handling +- Unicode filenames +- Edge cases (empty archive, unopened reader) + +### Frontend Tests + +```bash +pixi run test-frontend -- src/__tests__/unitTests/ozxDetection.test.ts +``` + +Tests cover: + +- File detection (extension matching, directories) +- Array filtering functions +- Path handling +- Zarr version detection within OZX + +## Usage Example + +### Reading OZX in Frontend + +```typescript +import { isOzxFile } from '@/utils/ozxDetection'; +import { useOzxZarrMetadataQuery } from '@/queries/zarrQueries'; + +function ZarrViewer({ file, fspName }) { + // Check if this is an OZX file + if (isOzxFile(file)) { + const { data, isLoading } = useOzxZarrMetadataQuery({ + fspName, + ozxFile: file + }); + + if (data?.metadata) { + // Use data.metadata for display + // data.omeZarrUrl can be passed to viewers + // data.store provides the OzxFetchStore for chunk access + } + } +} +``` + +### Direct API Access + +```bash +# Get archive metadata +curl http://localhost:7878/api/ozx-metadata/myFSP/path/to/data.ozx + +# List files +curl http://localhost:7878/api/ozx-list/myFSP/path/to/data.ozx + +# Get file content +curl http://localhost:7878/api/ozx-content/myFSP/path/to/data.ozx?subpath=zarr.json + +# Get range (for chunk access) +curl -H "Range: bytes=0-1023" \ + http://localhost:7878/api/ozx-content/myFSP/path/to/data.ozx?subpath=0/c/0/0/0 +``` + +## Future Enhancements + +1. **Write Support**: Currently read-only; could add ability to update OZX files +2. **Caching**: Add server-side caching of central directory for frequently accessed archives +3. **Thumbnail Generation**: Integrate with existing thumbnail generation for OZX OME-Zarr +4. **Neuroglancer Integration**: Generate Neuroglancer URLs pointing to OZX content + +## Related Documentation + +- [RFC-9 Specification](https://ngff.openmicroscopy.org/rfc/9/index.html) +- [OME-NGFF Specification](https://ngff.openmicroscopy.org/) +- [Zarr v3 Specification](https://zarr-specs.readthedocs.io/en/latest/v3/core/v3.0.html) diff --git a/fileglancer/app.py b/fileglancer/app.py index b346b005..9f0276be 100644 --- a/fileglancer/app.py +++ b/fileglancer/app.py @@ -34,6 +34,7 @@ from fileglancer.utils import format_timestamp, guess_content_type, parse_range_header from fileglancer.user_context import UserContext, EffectiveUserContext, CurrentUserContext, UserContextConfigurationError from fileglancer.filestore import Filestore, RootCheckError +from fileglancer.ozxzip import OZXReader, OZXReaderError, InvalidZipError, is_ozx_file from fileglancer.log import AccessLogMiddleware from x2s3.utils import get_read_access_acl, get_nosuchbucket_response, get_error_response @@ -1214,6 +1215,278 @@ async def get_file_content(request: Request, path_name: str, subpath: Optional[s ) + @app.head("/api/ozx-content/{path_name:path}") + async def head_ozx_file_content( + path_name: str, + subpath: str = Query(..., description="Path within the OZX file"), + username: str = Depends(get_current_user) + ): + """HEAD request for OZX file content (returns size, supports Range).""" + + filestore_name, _, ozx_subpath = path_name.partition('/') + + with _get_user_context(username): + filestore, error = _get_filestore(filestore_name) + if filestore is None: + raise HTTPException(status_code=404 if "not found" in error else 500, detail=error) + + try: + ozx_file_path = filestore._check_path_in_root(ozx_subpath) + except RootCheckError as e: + raise HTTPException(status_code=400, detail=str(e)) + + if not is_ozx_file(ozx_file_path): + raise HTTPException(status_code=400, detail="Not an OZX file") + + try: + reader = OZXReader(ozx_file_path) + reader.open() + except FileNotFoundError: + raise HTTPException(status_code=404, detail="OZX file not found") + except (InvalidZipError, OZXReaderError) as e: + raise HTTPException(status_code=400, detail=f"Invalid OZX file: {e}") + + # Parse central directory and get entry (outside user context) + try: + reader.parse_central_directory() + entry = reader.get_entry(subpath) + if entry is None: + reader.close() + raise HTTPException(status_code=404, detail="File not found in OZX archive") + + file_size = entry.uncompressed_size + content_type = guess_content_type(subpath) + file_name = subpath.split('/')[-1] if subpath else '' + + headers = { + 'Accept-Ranges': 'bytes', + 'Content-Length': str(file_size), + } + + if content_type == 'application/octet-stream' and file_name: + headers['Content-Disposition'] = f'attachment; filename="{file_name}"' + + reader.close() + return Response(status_code=200, headers=headers, media_type=content_type) + + except Exception as e: + reader.close() + raise HTTPException(status_code=500, detail=str(e)) + + + @app.get("/api/ozx-content/{path_name:path}") + async def get_ozx_file_content( + request: Request, + path_name: str, + subpath: str = Query(..., description="Path within the OZX file"), + username: str = Depends(get_current_user) + ): + """ + Stream file content from within an OZX archive. + Supports HTTP Range requests for efficient chunk access. + """ + + filestore_name, _, ozx_subpath = path_name.partition('/') + + with _get_user_context(username): + filestore, error = _get_filestore(filestore_name) + if filestore is None: + raise HTTPException(status_code=404 if "not found" in error else 500, detail=error) + + try: + ozx_file_path = filestore._check_path_in_root(ozx_subpath) + except RootCheckError as e: + raise HTTPException(status_code=400, detail=str(e)) + + if not is_ozx_file(ozx_file_path): + raise HTTPException(status_code=400, detail="Not an OZX file") + + try: + reader = OZXReader(ozx_file_path) + reader.open() + except FileNotFoundError: + raise HTTPException(status_code=404, detail="OZX file not found") + except (InvalidZipError, OZXReaderError) as e: + raise HTTPException(status_code=400, detail=f"Invalid OZX file: {e}") + + # Parse central directory and get entry (outside user context) + # The file handle retains access rights + try: + reader.parse_central_directory() + entry = reader.get_entry(subpath) + if entry is None: + reader.close() + raise HTTPException(status_code=404, detail="File not found in OZX archive") + + content_type = guess_content_type(subpath) + file_size = entry.uncompressed_size + file_name = subpath.split('/')[-1] if subpath else '' + range_header = request.headers.get('Range') + + if range_header: + # Handle Range request (HTTP 206) + range_result = parse_range_header(range_header, file_size) + if range_result is None: + reader.close() + return Response(status_code=416, headers={'Content-Range': f'bytes */{file_size}'}) + + start, end = range_result + + async def stream_range(): + try: + for chunk in reader.stream_file_range(subpath, start, end): + yield chunk + finally: + reader.close() + + headers = { + 'Accept-Ranges': 'bytes', + 'Content-Length': str(end - start + 1), + 'Content-Range': f'bytes {start}-{end}/{file_size}', + } + + if content_type == 'application/octet-stream' and file_name: + headers['Content-Disposition'] = f'attachment; filename="{file_name}"' + + return StreamingResponse( + stream_range(), + status_code=206, + headers=headers, + media_type=content_type + ) + else: + # Full file (HTTP 200) + async def stream_full(): + try: + for chunk in reader.stream_file(subpath): + yield chunk + finally: + reader.close() + + headers = { + 'Accept-Ranges': 'bytes', + 'Content-Length': str(file_size), + } + + if content_type == 'application/octet-stream' and file_name: + headers['Content-Disposition'] = f'attachment; filename="{file_name}"' + + return StreamingResponse( + stream_full(), + status_code=200, + headers=headers, + media_type=content_type + ) + + except FileNotFoundError: + reader.close() + raise HTTPException(status_code=404, detail="File not found in OZX archive") + except Exception as e: + reader.close() + logger.exception(f"Error reading OZX content: {e}") + raise HTTPException(status_code=500, detail=str(e)) + + + @app.get("/api/ozx-metadata/{path_name:path}") + async def get_ozx_metadata( + path_name: str, + username: str = Depends(get_current_user) + ): + """ + Get metadata about an OZX archive. + Returns OME version, jsonFirst flag, file count, and ZIP64 status. + """ + + filestore_name, _, ozx_subpath = path_name.partition('/') + + with _get_user_context(username): + filestore, error = _get_filestore(filestore_name) + if filestore is None: + raise HTTPException(status_code=404 if "not found" in error else 500, detail=error) + + try: + ozx_file_path = filestore._check_path_in_root(ozx_subpath) + except RootCheckError as e: + raise HTTPException(status_code=400, detail=str(e)) + + if not is_ozx_file(ozx_file_path): + raise HTTPException(status_code=400, detail="Not an OZX file") + + try: + reader = OZXReader(ozx_file_path) + reader.open() + except FileNotFoundError: + raise HTTPException(status_code=404, detail="OZX file not found") + except (InvalidZipError, OZXReaderError) as e: + raise HTTPException(status_code=400, detail=f"Invalid OZX file: {e}") + + # Get metadata outside user context + try: + metadata = reader.get_metadata() + entries = reader.parse_central_directory(json_only=metadata.json_first if metadata else False) + + result = { + "version": metadata.version if metadata else None, + "json_first": metadata.json_first if metadata else False, + "file_count": len(entries), + "is_zip64": reader.is_zip64 + } + + reader.close() + return result + + except Exception as e: + reader.close() + logger.exception(f"Error reading OZX metadata: {e}") + raise HTTPException(status_code=500, detail=str(e)) + + + @app.get("/api/ozx-list/{path_name:path}") + async def list_ozx_files( + path_name: str, + prefix: str = Query('', description="Filter files by prefix"), + username: str = Depends(get_current_user) + ): + """ + List files in an OZX archive. + Optionally filter by path prefix. + """ + + filestore_name, _, ozx_subpath = path_name.partition('/') + + with _get_user_context(username): + filestore, error = _get_filestore(filestore_name) + if filestore is None: + raise HTTPException(status_code=404 if "not found" in error else 500, detail=error) + + try: + ozx_file_path = filestore._check_path_in_root(ozx_subpath) + except RootCheckError as e: + raise HTTPException(status_code=400, detail=str(e)) + + if not is_ozx_file(ozx_file_path): + raise HTTPException(status_code=400, detail="Not an OZX file") + + try: + reader = OZXReader(ozx_file_path) + reader.open() + except FileNotFoundError: + raise HTTPException(status_code=404, detail="OZX file not found") + except (InvalidZipError, OZXReaderError) as e: + raise HTTPException(status_code=400, detail=f"Invalid OZX file: {e}") + + # List files outside user context + try: + files = reader.list_files(prefix) + reader.close() + return {"files": files} + + except Exception as e: + reader.close() + logger.exception(f"Error listing OZX files: {e}") + raise HTTPException(status_code=500, detail=str(e)) + + @app.get("/api/files/{path_name}") async def get_file_metadata(path_name: str, subpath: Optional[str] = Query(''), username: str = Depends(get_current_user)): diff --git a/fileglancer/model.py b/fileglancer/model.py index ffe5330e..52b645ad 100644 --- a/fileglancer/model.py +++ b/fileglancer/model.py @@ -303,3 +303,41 @@ class NeuroglancerShortLinkResponse(BaseModel): links: List[NeuroglancerShortLink] = Field( description="A list of stored Neuroglancer short links" ) + + +class OZXFileEntry(BaseModel): + """A file entry within an OZX archive""" + filename: str = Field( + description="The path of the file within the OZX archive" + ) + compressed_size: int = Field( + description="The compressed size of the file in bytes" + ) + uncompressed_size: int = Field( + description="The uncompressed size of the file in bytes" + ) + compression_method: int = Field( + description="The compression method (0=STORE, 8=DEFLATE)" + ) + is_directory: bool = Field( + description="Whether this entry is a directory" + ) + + +class OZXMetadataResponse(BaseModel): + """Metadata about an OZX archive""" + version: Optional[str] = Field( + description="The OME version from the ZIP comment", + default=None + ) + json_first: bool = Field( + description="Whether JSON files are sorted first in the central directory", + default=False + ) + file_count: int = Field( + description="Number of files in the archive" + ) + is_zip64: bool = Field( + description="Whether the archive uses ZIP64 format", + default=False + ) diff --git a/frontend/src/__tests__/unitTests/ozxDetection.test.ts b/frontend/src/__tests__/unitTests/ozxDetection.test.ts index 58b5aa91..2418290d 100644 --- a/frontend/src/__tests__/unitTests/ozxDetection.test.ts +++ b/frontend/src/__tests__/unitTests/ozxDetection.test.ts @@ -122,27 +122,26 @@ describe('getOzxFilePath', () => { }); describe('detectOzxZarrVersions', () => { + // RFC-9 OZX is for OME-Zarr v0.5 which requires Zarr v3 only + it('should detect zarr v3 when zarr.json exists at root', () => { const files = ['zarr.json', '0/zarr.json', '0/c/0/0/0']; expect(detectOzxZarrVersions(files)).toEqual(['v3']); }); - it('should detect zarr v2 when .zarray exists at root', () => { + it('should NOT detect zarr v2 - RFC-9 requires Zarr v3', () => { + // .zarray and .zattrs are Zarr v2 markers, not supported in RFC-9 OZX const files = ['.zarray', '.zattrs', '0/0']; - expect(detectOzxZarrVersions(files)).toEqual(['v2']); - }); - - it('should detect zarr v2 when .zattrs exists at root', () => { - const files = ['.zattrs', '0/.zarray', '0/0/0']; - expect(detectOzxZarrVersions(files)).toEqual(['v2']); + expect(detectOzxZarrVersions(files)).toEqual([]); }); - it('should detect both versions when both exist', () => { + it('should only detect v3 even when v2 markers also exist', () => { + // RFC-9 OZX is Zarr v3 only, so v2 markers are ignored const files = ['zarr.json', '.zarray', '0/c/0/0/0']; - expect(detectOzxZarrVersions(files)).toEqual(['v2', 'v3']); + expect(detectOzxZarrVersions(files)).toEqual(['v3']); }); - it('should return empty array when no zarr files', () => { + it('should return empty array when no zarr.json files', () => { const files = ['data.txt', 'image.png']; expect(detectOzxZarrVersions(files)).toEqual([]); }); @@ -151,20 +150,17 @@ describe('detectOzxZarrVersions', () => { expect(detectOzxZarrVersions([])).toEqual([]); }); - it('should detect version from nested paths', () => { + it('should detect zarr.json from nested paths', () => { const files = ['folder/zarr.json', 'folder/.zattrs']; - // Nested paths are detected because we check for files ending with /name - // This allows detection of zarr data at any level in the archive + // Nested zarr.json is detected, .zattrs is ignored (v2 only) const result = detectOzxZarrVersions(files); - expect(result).toContain('v3'); // folder/zarr.json - expect(result).toContain('v2'); // folder/.zattrs + expect(result).toEqual(['v3']); }); - it('should detect from paths ending with marker files', () => { + it('should detect zarr.json from paths ending with /zarr.json', () => { const files = ['root/zarr.json', 'root/.zattrs']; - // Paths ending with /zarr.json should be detected + // Only zarr.json is detected for RFC-9 OZX const result = detectOzxZarrVersions(files); - expect(result).toContain('v3'); - expect(result).toContain('v2'); + expect(result).toEqual(['v3']); }); }); diff --git a/frontend/src/queries/zarrQueries.ts b/frontend/src/queries/zarrQueries.ts index 67f71de2..5bf42b6a 100644 --- a/frontend/src/queries/zarrQueries.ts +++ b/frontend/src/queries/zarrQueries.ts @@ -8,7 +8,13 @@ import { import type { Metadata } from '@/omezarr-helper'; import { getFileURL } from '@/utils'; import { fetchFileAsJson } from './queryUtils'; -import { FileOrFolder } from '@/shared.types'; +import { isOzxFile } from '@/utils/ozxDetection'; +import { + OzxFetchStore, + getOzxContentUrl, + useOzxFileListQuery +} from './ozxQueries'; +import type { FileOrFolder } from '@/shared.types'; export type OpenWithToolUrls = { copy: string; @@ -319,3 +325,198 @@ export function useOmeZarrThumbnailQuery( retry: false }); } + +// OZX (Zipped OME-Zarr) types +type OzxZarrMetadataQueryParams = { + fspName: string | undefined; + ozxFile: FileOrFolder | undefined | null; +}; + +type OzxZarrMetadataResult = { + metadata: ZarrMetadata; + omeZarrUrl: string | null; + availableVersions: ('v2' | 'v3')[]; + store: OzxFetchStore | null; +}; + +/** + * Detects if an OZX archive contains Zarr v3 data. + * RFC-9 OZX files are specifically for OME-Zarr v0.5 which requires Zarr v3. + * @param files - Array of file paths within the OZX archive + * @returns Array containing ['v3'] if zarr.json found, empty array otherwise + */ +export function detectOzxZarrVersions(files: string[]): 'v3'[] { + if (!files || files.length === 0) { + return []; + } + + // RFC-9 OZX is for OME-Zarr v0.5 which is Zarr v3 only + // Check for zarr.json at root or in subdirectories + const hasZarrJson = files.some( + f => f === 'zarr.json' || f.endsWith('/zarr.json') + ); + + return hasZarrJson ? ['v3'] : []; +} + +/** + * Fetches Zarr metadata from an OZX archive. + * Uses OzxFetchStore to read files from within the ZIP archive. + */ +async function fetchOzxZarrMetadata( + fspName: string, + ozxFilePath: string, + files: string[] +): Promise { + const store = new OzxFetchStore(fspName, ozxFilePath); + const availableVersions = detectOzxZarrVersions(files); + + // Get the base URL for OME-Zarr viewers (using empty internal path) + const baseUrl = getOzxContentUrl(fspName, ozxFilePath, ''); + + // Default to Zarr v3 when available + if (availableVersions.includes('v3')) { + const zarrJsonContent = await store.get('zarr.json'); + if (!zarrJsonContent) { + log.warn('Could not read zarr.json from OZX'); + return { + metadata: null, + omeZarrUrl: null, + availableVersions, + store + }; + } + + const attrs = JSON.parse( + new TextDecoder().decode(zarrJsonContent) + ) as ZarrV3Attrs; + + if (attrs.node_type === 'array') { + log.info('Getting Zarr array from OZX with Zarr version 3'); + // For OZX arrays, we need a custom store - use baseUrl which routes through OZX API + const arr = await getZarrArray(baseUrl, 3); + const shapes = [arr.shape]; + return { + metadata: { + arr, + shapes, + multiscale: undefined, + scales: undefined, + omero: undefined, + labels: undefined, + zarrVersion: 3 + }, + omeZarrUrl: null, + availableVersions, + store + }; + } else if (attrs.node_type === 'group') { + if (attrs.attributes?.ome?.multiscales) { + log.info('Getting OME-Zarr metadata from OZX with Zarr version 3'); + // Use the OZX content URL as the base for OME-Zarr + const metadata = await getOmeZarrMetadata(baseUrl); + + // Check for labels + try { + const labelsContent = await store.get('labels/zarr.json'); + if (labelsContent) { + const labelsAttrs = JSON.parse( + new TextDecoder().decode(labelsContent) + ) as ZarrV3Attrs; + metadata.labels = labelsAttrs?.attributes?.ome?.labels; + if (metadata.labels) { + log.info('OME-Zarr Labels found in OZX: ', metadata.labels); + } + } + } catch (error) { + log.trace('Could not fetch labels attrs from OZX: ', error); + } + + return { + metadata, + omeZarrUrl: baseUrl, + availableVersions, + store + }; + } else { + log.info('OZX Zarrv3 group has no multiscales', attrs.attributes); + return { + metadata: null, + omeZarrUrl: null, + availableVersions, + store + }; + } + } else { + log.warn('Unknown OZX Zarrv3 node type', attrs.node_type); + return { + metadata: null, + omeZarrUrl: null, + availableVersions, + store + }; + } + } + + // RFC-9 OZX is for OME-Zarr v0.5 which requires Zarr v3 + // If we reach here, no valid zarr.json was found + log.debug('No Zarr v3 data detected in OZX (RFC-9 requires Zarr v3)'); + return { + metadata: null, + omeZarrUrl: null, + availableVersions: [], + store + }; +} + +/** + * Hook to fetch Zarr metadata from an OZX (Zipped OME-Zarr) file. + * This hook handles: + * 1. Listing files within the OZX archive + * 2. Detecting Zarr version + * 3. Reading metadata + * 4. Providing an OzxFetchStore for chunk access + */ +export function useOzxZarrMetadataQuery( + params: OzxZarrMetadataQueryParams +): UseQueryResult { + const { fspName, ozxFile } = params; + + // First, get the file list from the OZX + const fileListQuery = useOzxFileListQuery( + fspName, + ozxFile?.path, + undefined, + !!fspName && !!ozxFile && isOzxFile(ozxFile) + ); + + return useQuery({ + queryKey: ['ozx', 'zarr', 'metadata', fspName || '', ozxFile?.path || ''], + queryFn: async () => { + if (!fspName || !ozxFile) { + throw new Error('fspName and ozxFile are required'); + } + if (!fileListQuery.data) { + throw new Error('File list not available'); + } + return await fetchOzxZarrMetadata( + fspName, + ozxFile.path, + fileListQuery.data + ); + }, + enabled: + !!fspName && + !!ozxFile && + isOzxFile(ozxFile) && + !!fileListQuery.data && + fileListQuery.data.length > 0 && + detectOzxZarrVersions(fileListQuery.data).length > 0, + staleTime: 5 * 60 * 1000, + retry: false + }); +} + +// Re-export OZX detection utilities for convenience +export { isOzxFile } from '@/utils/ozxDetection'; +export { OzxFetchStore, getOzxContentUrl } from './ozxQueries'; From d9313d69f22c71d58587ecd2f1bf3e56e58efdc8 Mon Sep 17 00:00:00 2001 From: Mark Kittisopikul Date: Wed, 28 Jan 2026 02:39:46 -0500 Subject: [PATCH 3/9] Factor out generic Zip support, improve stop condition --- fileglancer/ozxzip.py | 635 +++++++---------------------------------- fileglancer/zipread.py | 601 ++++++++++++++++++++++++++++++++++++++ tests/test_ozxzip.py | 29 +- tests/test_zipread.py | 451 +++++++++++++++++++++++++++++ 4 files changed, 1161 insertions(+), 555 deletions(-) create mode 100644 fileglancer/zipread.py create mode 100644 tests/test_zipread.py diff --git a/fileglancer/ozxzip.py b/fileglancer/ozxzip.py index 3778ff3f..d600ad68 100644 --- a/fileglancer/ozxzip.py +++ b/fileglancer/ozxzip.py @@ -1,111 +1,90 @@ """RFC-9 compliant reader for .ozx (Zipped OME-Zarr) files. RFC-9 Spec: https://ngff.openmicroscopy.org/rfc/9/index.html +OME-Zarr v0.5 Spec: https://ngff.openmicroscopy.org/0.5/index.html -This module provides functionality to read OME-Zarr data from ZIP archives -with support for: -- Partial central directory parsing (jsonFirst optimization) -- ZIP64 format for large files -- Range request streaming for chunks +This module extends the generic ZipReader with OZX-specific functionality: +- OME metadata parsing from ZIP comment +- jsonFirst optimization for partial central directory parsing """ -import struct import json -import zlib -from dataclasses import dataclass, field -from typing import Optional, Dict, Generator, BinaryIO, List -from io import BytesIO +from dataclasses import dataclass +from typing import Optional, Dict from loguru import logger -# ZIP signatures -ZIP_LOCAL_HEADER_SIG = b'\x50\x4b\x03\x04' -ZIP_CD_SIG = b'\x50\x4b\x01\x02' -ZIP_EOCD_SIG = b'\x50\x4b\x05\x06' -ZIP_EOCD64_SIG = b'\x50\x4b\x06\x06' -ZIP_EOCD64_LOC_SIG = b'\x50\x4b\x06\x07' - -# Compression methods -COMPRESSION_STORED = 0 -COMPRESSION_DEFLATE = 8 - -# ZIP64 marker value -ZIP64_MARKER = 0xFFFFFFFF -ZIP64_MARKER_16 = 0xFFFF - -# Extra field header IDs -ZIP64_EXTRA_ID = 0x0001 - -# Default buffer size for streaming -DEFAULT_BUFFER_SIZE = 8192 - -# Maximum EOCD search size (65KB comment + 22 byte EOCD header) -MAX_EOCD_SEARCH_SIZE = 65536 + 22 +from .zipread import ZipReader, ZipEntry, ZipReaderError, InvalidZipError @dataclass class OZXMetadata: - """Parsed metadata from ZIP comment (RFC-9 format).""" + """Parsed OME metadata from ZIP comment (RFC-9 format). + + RFC-9 defines the ZIP comment format as: + { + "ome": { + "version": "0.5", + "zipFile": { + "centralDirectory": { + "jsonFirst": true + } + } + } + } + """ version: str json_first: bool = False raw_comment: Optional[str] = None -@dataclass -class ZipEntry: - """A file entry from the ZIP central directory.""" - filename: str - compressed_size: int - uncompressed_size: int - compression_method: int # 0=STORE, 8=DEFLATE - local_header_offset: int - crc32: int - extra_field: bytes = field(default_factory=bytes, repr=False) - - @property - def is_directory(self) -> bool: - """Check if this entry represents a directory.""" - return self.filename.endswith('/') - - @property - def is_json_file(self) -> bool: - """Check if this is a JSON metadata file (for jsonFirst optimization).""" - name = self.filename.lower() - return (name.endswith('.json') or - name.endswith('.zattrs') or - name.endswith('.zarray') or - name.endswith('.zgroup')) - - -class OZXReaderError(Exception): +class OZXReaderError(ZipReaderError): """Base exception for OZX reader errors.""" pass -class InvalidZipError(OZXReaderError): - """Raised when the ZIP file is invalid or corrupted.""" - pass - - class InvalidOZXError(OZXReaderError): """Raised when the file is not a valid OZX file.""" pass -class OZXReader: +def is_json_metadata_file(filename: str) -> bool: + """Check if a filename is a JSON metadata file. + + Used for the jsonFirst optimization - these files are sorted + first in the central directory when jsonFirst=True. + + Args: + filename: The filename to check + + Returns: + True if this is a JSON metadata file + """ + name = filename.lower() + return (name.endswith('.json') or + name.endswith('.zattrs') or + name.endswith('.zarray') or + name.endswith('.zgroup')) + + +class OZXReader(ZipReader): """ - RFC-9 compliant reader for .ozx files. + RFC-9 compliant reader for .ozx (Zipped OME-Zarr) files. - Supports: - - Partial central directory parsing (jsonFirst optimization) - - ZIP64 format for large files - - Range requests for streaming chunks + Extends ZipReader with OZX-specific functionality: + - Parses OME metadata from ZIP comment + - Supports jsonFirst optimization for partial central directory parsing + + Note: RFC-9 is for OME-Zarr v0.5 which requires Zarr v3 only. Usage: with OZXReader('/path/to/file.ozx') as reader: - metadata = reader.get_metadata() - entries = reader.parse_central_directory(json_only=metadata.json_first) - content = reader.read_file('path/in/archive.json') + metadata = reader.get_ome_metadata() + if metadata and metadata.json_first: + entries = reader.parse_central_directory(json_only=True) + else: + entries = reader.parse_central_directory() + content = reader.read_file('zarr.json') """ def __init__(self, file_path: str): @@ -114,19 +93,11 @@ def __init__(self, file_path: str): Args: file_path: Path to the .ozx file """ - self.file_path = file_path - self._fh: Optional[BinaryIO] = None - self._file_size: int = 0 - self._metadata: Optional[OZXMetadata] = None - self._entries: Dict[str, ZipEntry] = {} - self._cd_offset: int = 0 - self._cd_size: int = 0 - self._cd_entries_count: int = 0 - self._is_zip64: bool = False - self._cd_parsed: bool = False + super().__init__(file_path) + self._ome_metadata: Optional[OZXMetadata] = None def open(self) -> 'OZXReader': - """Open the file and parse EOCD. + """Open the file, parse EOCD, and extract OME metadata. Returns: Self for method chaining @@ -135,50 +106,32 @@ def open(self) -> 'OZXReader': FileNotFoundError: If the file doesn't exist InvalidZipError: If the file is not a valid ZIP """ - import os - self._fh = open(self.file_path, 'rb') - self._file_size = os.fstat(self._fh.fileno()).st_size - self._parse_eocd() + super().open() + # Parse OME metadata from ZIP comment + self._ome_metadata = self._parse_ome_comment(self.comment) return self - def close(self): - """Close the file handle.""" - if self._fh: - self._fh.close() - self._fh = None - - def __enter__(self) -> 'OZXReader': - return self.open() - - def __exit__(self, *args): - self.close() - - @property - def file_size(self) -> int: - """Get the size of the OZX file.""" - return self._file_size - - @property - def is_zip64(self) -> bool: - """Check if this is a ZIP64 format archive.""" - return self._is_zip64 - - def get_metadata(self) -> Optional[OZXMetadata]: + def get_ome_metadata(self) -> Optional[OZXMetadata]: """Get parsed OME metadata from ZIP comment. Returns: OZXMetadata if valid OME metadata found, None otherwise """ - return self._metadata + return self._ome_metadata + + # Alias for backward compatibility + def get_metadata(self) -> Optional[OZXMetadata]: + """Alias for get_ome_metadata() for backward compatibility.""" + return self.get_ome_metadata() def parse_central_directory(self, json_only: bool = False) -> Dict[str, ZipEntry]: """ - Parse the central directory. + Parse the central directory with optional jsonFirst optimization. Args: json_only: If True and jsonFirst=True in metadata, stop parsing - after the last JSON file. This is the RFC-9 optimization - for metadata discovery. + after the last JSON metadata file. This is the RFC-9 + optimization for efficient metadata discovery. Returns: Dictionary mapping filenames to ZipEntry objects @@ -186,389 +139,18 @@ def parse_central_directory(self, json_only: bool = False) -> Dict[str, ZipEntry Raises: InvalidZipError: If central directory is corrupted """ - if self._fh is None: - raise OZXReaderError("File not opened") - - if self._cd_parsed and not json_only: - return self._entries - - self._fh.seek(self._cd_offset) - entries: Dict[str, ZipEntry] = {} - - for i in range(self._cd_entries_count): - # Read CD file header (46 bytes minimum) - header = self._fh.read(46) - if len(header) < 46 or header[:4] != ZIP_CD_SIG: - raise InvalidZipError(f"Invalid central directory entry at index {i}") - - # Parse header fields - (version_made, version_needed, flags, compression, - mod_time, mod_date, crc32, comp_size, uncomp_size, - name_len, extra_len, comment_len, disk_start, - internal_attr, external_attr, local_offset) = struct.unpack( - ' 0 else b'' - - # Skip comment - if comment_len > 0: - self._fh.seek(comment_len, 1) - - # Handle ZIP64 extra field if needed - if comp_size == ZIP64_MARKER or uncomp_size == ZIP64_MARKER or local_offset == ZIP64_MARKER: - comp_size, uncomp_size, local_offset = self._parse_zip64_extra( - extra, comp_size, uncomp_size, local_offset) - - entry = ZipEntry( - filename=filename, - compressed_size=comp_size, - uncompressed_size=uncomp_size, - compression_method=compression, - local_header_offset=local_offset, - crc32=crc32, - extra_field=extra - ) - - entries[filename] = entry - - # jsonFirst optimization: stop early if we've hit non-JSON files - if json_only and self._metadata and self._metadata.json_first: - if not entry.is_directory and not entry.is_json_file: - logger.debug(f"jsonFirst optimization: stopping at {filename}") - break - - self._entries.update(entries) - if not json_only: - self._cd_parsed = True - - return entries - - def list_files(self, prefix: str = "") -> List[str]: - """List files in archive, optionally filtered by prefix. - - Args: - prefix: Only return files starting with this prefix - - Returns: - List of filenames matching the prefix - """ - if not self._cd_parsed: - self.parse_central_directory() - - if prefix: - return [name for name in self._entries.keys() - if name.startswith(prefix) and not self._entries[name].is_directory] - return [name for name in self._entries.keys() - if not self._entries[name].is_directory] - - def get_entry(self, path: str) -> Optional[ZipEntry]: - """Get info about a specific file in the archive. - - Args: - path: Path within the archive - - Returns: - ZipEntry if found, None otherwise - """ - if not self._cd_parsed: - self.parse_central_directory() - return self._entries.get(path) - - def read_file(self, path: str) -> bytes: - """Read entire file from archive. - - Args: - path: Path within the archive - - Returns: - File contents as bytes - - Raises: - FileNotFoundError: If path not found in archive - InvalidZipError: If decompression fails - """ - return b''.join(self.stream_file(path)) - - def stream_file(self, path: str, buffer_size: int = DEFAULT_BUFFER_SIZE) -> Generator[bytes, None, None]: - """Stream file content from archive. - - Args: - path: Path within the archive - buffer_size: Size of chunks to yield - - Yields: - Chunks of file content - - Raises: - FileNotFoundError: If path not found in archive - """ - if self._fh is None: - raise OZXReaderError("File not opened") - - entry = self.get_entry(path) - if entry is None: - raise FileNotFoundError(f"File not found in archive: {path}") - - # Seek to local file header and skip it - self._fh.seek(entry.local_header_offset) - local_header = self._fh.read(30) - if local_header[:4] != ZIP_LOCAL_HEADER_SIG: - raise InvalidZipError(f"Invalid local header for {path}") - - # Get local header name and extra lengths - name_len, extra_len = struct.unpack(' 0: - chunk_size = min(buffer_size, remaining) - chunk = self._fh.read(chunk_size) - if not chunk: - break - yield chunk - remaining -= len(chunk) - - elif entry.compression_method == COMPRESSION_DEFLATE: - # Compressed - need to decompress - decompressor = zlib.decompressobj(-zlib.MAX_WBITS) - remaining = entry.compressed_size - - while remaining > 0: - chunk_size = min(buffer_size, remaining) - compressed_chunk = self._fh.read(chunk_size) - if not compressed_chunk: - break - remaining -= len(compressed_chunk) - - decompressed = decompressor.decompress(compressed_chunk) - if decompressed: - yield decompressed - - # Flush any remaining data - final = decompressor.flush() - if final: - yield final + if json_only and self._ome_metadata and self._ome_metadata.json_first: + # Use the stop condition to implement jsonFirst optimization + def stop_at_non_json(entry: ZipEntry, index: int) -> bool: + if entry.is_directory: + return False + return not is_json_metadata_file(entry.filename) + + return super().parse_central_directory(stop_condition=stop_at_non_json) else: - raise InvalidZipError(f"Unsupported compression method: {entry.compression_method}") - - def stream_file_range(self, path: str, start: int, end: int, - buffer_size: int = DEFAULT_BUFFER_SIZE) -> Generator[bytes, None, None]: - """Stream a byte range of uncompressed file content. - - Note: For DEFLATE compressed files, this must decompress from the - beginning to reach the desired offset. - - Args: - path: Path within the archive - start: Start byte offset (inclusive) - end: End byte offset (inclusive) - buffer_size: Size of chunks to yield - - Yields: - Chunks of file content within the specified range - - Raises: - FileNotFoundError: If path not found in archive - ValueError: If range is invalid - """ - if self._fh is None: - raise OZXReaderError("File not opened") - - entry = self.get_entry(path) - if entry is None: - raise FileNotFoundError(f"File not found in archive: {path}") - - if start < 0: - raise ValueError("Start position cannot be negative") - if end < start: - raise ValueError("End position cannot be less than start position") - if start >= entry.uncompressed_size: - return # Nothing to return - - # Clamp end to file size - end = min(end, entry.uncompressed_size - 1) - range_length = end - start + 1 - - # Seek to local file header and skip it - self._fh.seek(entry.local_header_offset) - local_header = self._fh.read(30) - if local_header[:4] != ZIP_LOCAL_HEADER_SIG: - raise InvalidZipError(f"Invalid local header for {path}") - - name_len, extra_len = struct.unpack(' 0: - chunk_size = min(buffer_size, remaining) - chunk = self._fh.read(chunk_size) - if not chunk: - break - yield chunk - remaining -= len(chunk) - - elif entry.compression_method == COMPRESSION_DEFLATE: - # For compressed files, we need to decompress from the start - # and skip to the desired offset - decompressor = zlib.decompressobj(-zlib.MAX_WBITS) - compressed_remaining = entry.compressed_size - decompressed_pos = 0 - output_remaining = range_length - buffer = BytesIO() - - while compressed_remaining > 0 and output_remaining > 0: - chunk_size = min(buffer_size, compressed_remaining) - compressed_chunk = self._fh.read(chunk_size) - if not compressed_chunk: - break - compressed_remaining -= len(compressed_chunk) - - decompressed = decompressor.decompress(compressed_chunk) - if not decompressed: - continue - - # Handle the decompressed chunk - chunk_start = 0 - chunk_len = len(decompressed) - - # Skip data before our range - if decompressed_pos + chunk_len <= start: - decompressed_pos += chunk_len - continue - - # Calculate how much of this chunk to skip - if decompressed_pos < start: - chunk_start = start - decompressed_pos - - # Calculate how much of this chunk to output - output_bytes = min(chunk_len - chunk_start, output_remaining) - - if output_bytes > 0: - yield decompressed[chunk_start:chunk_start + output_bytes] - output_remaining -= output_bytes - - decompressed_pos += chunk_len - - # Flush and handle remaining - if output_remaining > 0: - final = decompressor.flush() - if final: - # Apply same range logic to final chunk - chunk_len = len(final) - if decompressed_pos + chunk_len > start: - chunk_start = max(0, start - decompressed_pos) - output_bytes = min(chunk_len - chunk_start, output_remaining) - if output_bytes > 0: - yield final[chunk_start:chunk_start + output_bytes] - else: - raise InvalidZipError(f"Unsupported compression method: {entry.compression_method}") - - def _parse_eocd(self): - """Parse End of Central Directory record. - - Raises: - InvalidZipError: If EOCD not found or invalid - """ - if self._fh is None: - raise OZXReaderError("File not opened") - - # Search backwards from end of file for EOCD signature - search_size = min(MAX_EOCD_SEARCH_SIZE, self._file_size) - self._fh.seek(self._file_size - search_size) - data = self._fh.read(search_size) - - # Find EOCD signature (searching from end) - eocd_pos = data.rfind(ZIP_EOCD_SIG) - if eocd_pos == -1: - raise InvalidZipError("End of Central Directory not found") - - # Position in file - eocd_file_pos = self._file_size - search_size + eocd_pos - - # Parse EOCD (22 bytes minimum) - eocd = data[eocd_pos:eocd_pos + 22] - if len(eocd) < 22: - raise InvalidZipError("Truncated EOCD record") - - (disk_num, cd_disk, cd_entries_this_disk, cd_entries_total, - cd_size, cd_offset, comment_len) = struct.unpack(' 0: - comment_data = data[eocd_pos + 22:eocd_pos + 22 + comment_len] - if len(comment_data) == comment_len: - comment = comment_data.decode('utf-8', errors='replace') - - # Check for ZIP64 - if (cd_offset == ZIP64_MARKER or cd_size == ZIP64_MARKER or - cd_entries_total == ZIP64_MARKER_16): - self._is_zip64 = True - self._parse_zip64_eocd(eocd_file_pos) - else: - self._cd_offset = cd_offset - self._cd_size = cd_size - self._cd_entries_count = cd_entries_total - - # Parse ZIP comment for OZX metadata - self._metadata = self._parse_zip_comment(comment) - - def _parse_zip64_eocd(self, eocd_pos: int): - """Parse ZIP64 End of Central Directory records. - - Args: - eocd_pos: Position of standard EOCD in file + return super().parse_central_directory() - Raises: - InvalidZipError: If ZIP64 records not found or invalid - """ - if self._fh is None: - raise OZXReaderError("File not opened") - - # Look for ZIP64 EOCD Locator (20 bytes before EOCD) - loc_pos = eocd_pos - 20 - if loc_pos < 0: - raise InvalidZipError("ZIP64 EOCD Locator not found") - - self._fh.seek(loc_pos) - locator = self._fh.read(20) - - if locator[:4] != ZIP_EOCD64_LOC_SIG: - raise InvalidZipError("Invalid ZIP64 EOCD Locator") - - # Parse locator to get ZIP64 EOCD offset - (zip64_disk, zip64_eocd_offset, total_disks) = struct.unpack( - ' Optional[OZXMetadata]: + def _parse_ome_comment(self, comment: str) -> Optional[OZXMetadata]: """Parse ZIP comment for RFC-9 OME metadata. RFC-9 comment format: @@ -624,47 +206,6 @@ def _parse_zip_comment(self, comment: str) -> Optional[OZXMetadata]: logger.debug(f"Failed to parse ZIP comment as JSON: {e}") return None - def _parse_zip64_extra(self, extra: bytes, comp_size: int, - uncomp_size: int, local_offset: int) -> tuple: - """Parse ZIP64 extra field to get actual values. - - Args: - extra: Extra field data - comp_size: Compressed size from CD (may be 0xFFFFFFFF) - uncomp_size: Uncompressed size from CD (may be 0xFFFFFFFF) - local_offset: Local header offset from CD (may be 0xFFFFFFFF) - - Returns: - Tuple of (actual_comp_size, actual_uncomp_size, actual_local_offset) - """ - offset = 0 - while offset + 4 <= len(extra): - header_id, data_size = struct.unpack(' bool: """Check if a filename has the .ozx extension. @@ -676,3 +217,19 @@ def is_ozx_file(filename: str) -> bool: True if the file has a .ozx extension """ return filename.lower().endswith('.ozx') + + +# Re-export commonly used items from zipread for convenience +__all__ = [ + 'OZXReader', + 'OZXMetadata', + 'OZXReaderError', + 'InvalidOZXError', + 'is_ozx_file', + 'is_json_metadata_file', + # Re-exports from zipread + 'ZipReader', + 'ZipEntry', + 'ZipReaderError', + 'InvalidZipError', +] diff --git a/fileglancer/zipread.py b/fileglancer/zipread.py new file mode 100644 index 00000000..bd5f82cd --- /dev/null +++ b/fileglancer/zipread.py @@ -0,0 +1,601 @@ +"""Generic ZIP file reader with streaming support. + +This module provides functionality to read ZIP archives with support for: +- ZIP64 format for large files +- STORE and DEFLATE compression methods +- Range request streaming for efficient chunk access +""" + +import struct +import zlib +from dataclasses import dataclass, field +from typing import Optional, Dict, Generator, BinaryIO, List, Callable +from io import BytesIO + +from loguru import logger + +# ZIP signatures +ZIP_LOCAL_HEADER_SIG = b'\x50\x4b\x03\x04' +ZIP_CD_SIG = b'\x50\x4b\x01\x02' +ZIP_EOCD_SIG = b'\x50\x4b\x05\x06' +ZIP_EOCD64_SIG = b'\x50\x4b\x06\x06' +ZIP_EOCD64_LOC_SIG = b'\x50\x4b\x06\x07' + +# Compression methods +COMPRESSION_STORED = 0 +COMPRESSION_DEFLATE = 8 + +# ZIP64 marker value +ZIP64_MARKER = 0xFFFFFFFF +ZIP64_MARKER_16 = 0xFFFF + +# Extra field header IDs +ZIP64_EXTRA_ID = 0x0001 + +# Default buffer size for streaming +DEFAULT_BUFFER_SIZE = 8192 + +# Maximum EOCD search size (65KB comment + 22 byte EOCD header) +MAX_EOCD_SEARCH_SIZE = 65536 + 22 + + +@dataclass +class ZipEntry: + """A file entry from the ZIP central directory.""" + filename: str + compressed_size: int + uncompressed_size: int + compression_method: int # 0=STORE, 8=DEFLATE + local_header_offset: int + crc32: int + extra_field: bytes = field(default_factory=bytes, repr=False) + + @property + def is_directory(self) -> bool: + """Check if this entry represents a directory.""" + return self.filename.endswith('/') + + +class ZipReaderError(Exception): + """Base exception for ZIP reader errors.""" + pass + + +class InvalidZipError(ZipReaderError): + """Raised when the ZIP file is invalid or corrupted.""" + pass + + +class ZipReader: + """ + Generic ZIP file reader with streaming support. + + Supports: + - ZIP64 format for large files + - STORE and DEFLATE compression + - Range requests for streaming chunks + - Custom comment parsing via callback + + Usage: + with ZipReader('/path/to/file.zip') as reader: + entries = reader.parse_central_directory() + content = reader.read_file('path/in/archive.txt') + """ + + def __init__(self, file_path: str): + """Initialize the ZIP reader. + + Args: + file_path: Path to the ZIP file + """ + self.file_path = file_path + self._fh: Optional[BinaryIO] = None + self._file_size: int = 0 + self._comment: str = "" + self._entries: Dict[str, ZipEntry] = {} + self._cd_offset: int = 0 + self._cd_size: int = 0 + self._cd_entries_count: int = 0 + self._is_zip64: bool = False + self._cd_parsed: bool = False + + def open(self) -> 'ZipReader': + """Open the file and parse EOCD. + + Returns: + Self for method chaining + + Raises: + FileNotFoundError: If the file doesn't exist + InvalidZipError: If the file is not a valid ZIP + """ + import os + self._fh = open(self.file_path, 'rb') + self._file_size = os.fstat(self._fh.fileno()).st_size + self._parse_eocd() + return self + + def close(self): + """Close the file handle.""" + if self._fh: + self._fh.close() + self._fh = None + + def __enter__(self) -> 'ZipReader': + return self.open() + + def __exit__(self, *args): + self.close() + + @property + def file_size(self) -> int: + """Get the size of the ZIP file.""" + return self._file_size + + @property + def is_zip64(self) -> bool: + """Check if this is a ZIP64 format archive.""" + return self._is_zip64 + + @property + def comment(self) -> str: + """Get the ZIP file comment.""" + return self._comment + + @property + def entries(self) -> Dict[str, ZipEntry]: + """Get the parsed entries dictionary.""" + return self._entries + + @property + def cd_entries_count(self) -> int: + """Get the number of entries in the central directory.""" + return self._cd_entries_count + + def parse_central_directory( + self, + stop_condition: Optional[Callable[[ZipEntry, int], bool]] = None, + max_entries: Optional[int] = None + ) -> Dict[str, ZipEntry]: + """ + Parse the central directory. + + Args: + stop_condition: Optional callback that receives each ZipEntry and its + 0-based index. If it returns True, parsing stops early. + Useful for optimizations like stopping after metadata files. + max_entries: Optional maximum number of entries to parse. If specified, + parsing stops after this many entries are processed. + + Returns: + Dictionary mapping filenames to ZipEntry objects + + Raises: + InvalidZipError: If central directory is corrupted + """ + if self._fh is None: + raise ZipReaderError("File not opened") + + if self._cd_parsed: + return self._entries + + self._fh.seek(self._cd_offset) + entries: Dict[str, ZipEntry] = {} + + # Determine the maximum entries to process + entries_to_process = self._cd_entries_count + if max_entries is not None: + entries_to_process = min(entries_to_process, max_entries) + + for i in range(self._cd_entries_count): + # Check max_entries limit + if max_entries is not None and i >= max_entries: + logger.debug(f"Reached max_entries limit ({max_entries})") + break + + # Read CD file header (46 bytes minimum) + header = self._fh.read(46) + if len(header) < 46 or header[:4] != ZIP_CD_SIG: + raise InvalidZipError(f"Invalid central directory entry at index {i}") + + # Parse header fields + (version_made, version_needed, flags, compression, + mod_time, mod_date, crc32, comp_size, uncomp_size, + name_len, extra_len, comment_len, disk_start, + internal_attr, external_attr, local_offset) = struct.unpack( + ' 0 else b'' + + # Skip comment + if comment_len > 0: + self._fh.seek(comment_len, 1) + + # Handle ZIP64 extra field if needed + if comp_size == ZIP64_MARKER or uncomp_size == ZIP64_MARKER or local_offset == ZIP64_MARKER: + comp_size, uncomp_size, local_offset = self._parse_zip64_extra( + extra, comp_size, uncomp_size, local_offset) + + entry = ZipEntry( + filename=filename, + compressed_size=comp_size, + uncompressed_size=uncomp_size, + compression_method=compression, + local_header_offset=local_offset, + crc32=crc32, + extra_field=extra + ) + + entries[filename] = entry + + # Check stop condition + if stop_condition and stop_condition(entry, i): + logger.debug(f"Stop condition met at index {i}, filename: {filename}") + break + + self._entries.update(entries) + if stop_condition is None and max_entries is None: + self._cd_parsed = True + + return entries + + def list_files(self, prefix: str = "") -> List[str]: + """List files in archive, optionally filtered by prefix. + + Args: + prefix: Only return files starting with this prefix + + Returns: + List of filenames matching the prefix + """ + if not self._cd_parsed: + self.parse_central_directory() + + if prefix: + return [name for name in self._entries.keys() + if name.startswith(prefix) and not self._entries[name].is_directory] + return [name for name in self._entries.keys() + if not self._entries[name].is_directory] + + def get_entry(self, path: str) -> Optional[ZipEntry]: + """Get info about a specific file in the archive. + + Args: + path: Path within the archive + + Returns: + ZipEntry if found, None otherwise + """ + if not self._cd_parsed: + self.parse_central_directory() + return self._entries.get(path) + + def read_file(self, path: str) -> bytes: + """Read entire file from archive. + + Args: + path: Path within the archive + + Returns: + File contents as bytes + + Raises: + FileNotFoundError: If path not found in archive + InvalidZipError: If decompression fails + """ + return b''.join(self.stream_file(path)) + + def stream_file(self, path: str, buffer_size: int = DEFAULT_BUFFER_SIZE) -> Generator[bytes, None, None]: + """Stream file content from archive. + + Args: + path: Path within the archive + buffer_size: Size of chunks to yield + + Yields: + Chunks of file content + + Raises: + FileNotFoundError: If path not found in archive + """ + if self._fh is None: + raise ZipReaderError("File not opened") + + entry = self.get_entry(path) + if entry is None: + raise FileNotFoundError(f"File not found in archive: {path}") + + # Seek to local file header and skip it + self._fh.seek(entry.local_header_offset) + local_header = self._fh.read(30) + if local_header[:4] != ZIP_LOCAL_HEADER_SIG: + raise InvalidZipError(f"Invalid local header for {path}") + + # Get local header name and extra lengths + name_len, extra_len = struct.unpack(' 0: + chunk_size = min(buffer_size, remaining) + chunk = self._fh.read(chunk_size) + if not chunk: + break + yield chunk + remaining -= len(chunk) + + elif entry.compression_method == COMPRESSION_DEFLATE: + # Compressed - need to decompress + decompressor = zlib.decompressobj(-zlib.MAX_WBITS) + remaining = entry.compressed_size + + while remaining > 0: + chunk_size = min(buffer_size, remaining) + compressed_chunk = self._fh.read(chunk_size) + if not compressed_chunk: + break + remaining -= len(compressed_chunk) + + decompressed = decompressor.decompress(compressed_chunk) + if decompressed: + yield decompressed + + # Flush any remaining data + final = decompressor.flush() + if final: + yield final + else: + raise InvalidZipError(f"Unsupported compression method: {entry.compression_method}") + + def stream_file_range(self, path: str, start: int, end: int, + buffer_size: int = DEFAULT_BUFFER_SIZE) -> Generator[bytes, None, None]: + """Stream a byte range of uncompressed file content. + + Note: For DEFLATE compressed files, this must decompress from the + beginning to reach the desired offset. + + Args: + path: Path within the archive + start: Start byte offset (inclusive) + end: End byte offset (inclusive) + buffer_size: Size of chunks to yield + + Yields: + Chunks of file content within the specified range + + Raises: + FileNotFoundError: If path not found in archive + ValueError: If range is invalid + """ + if self._fh is None: + raise ZipReaderError("File not opened") + + entry = self.get_entry(path) + if entry is None: + raise FileNotFoundError(f"File not found in archive: {path}") + + if start < 0: + raise ValueError("Start position cannot be negative") + if end < start: + raise ValueError("End position cannot be less than start position") + if start >= entry.uncompressed_size: + return # Nothing to return + + # Clamp end to file size + end = min(end, entry.uncompressed_size - 1) + range_length = end - start + 1 + + # Seek to local file header and skip it + self._fh.seek(entry.local_header_offset) + local_header = self._fh.read(30) + if local_header[:4] != ZIP_LOCAL_HEADER_SIG: + raise InvalidZipError(f"Invalid local header for {path}") + + name_len, extra_len = struct.unpack(' 0: + chunk_size = min(buffer_size, remaining) + chunk = self._fh.read(chunk_size) + if not chunk: + break + yield chunk + remaining -= len(chunk) + + elif entry.compression_method == COMPRESSION_DEFLATE: + # For compressed files, we need to decompress from the start + # and skip to the desired offset + decompressor = zlib.decompressobj(-zlib.MAX_WBITS) + compressed_remaining = entry.compressed_size + decompressed_pos = 0 + output_remaining = range_length + + while compressed_remaining > 0 and output_remaining > 0: + chunk_size = min(buffer_size, compressed_remaining) + compressed_chunk = self._fh.read(chunk_size) + if not compressed_chunk: + break + compressed_remaining -= len(compressed_chunk) + + decompressed = decompressor.decompress(compressed_chunk) + if not decompressed: + continue + + # Handle the decompressed chunk + chunk_start = 0 + chunk_len = len(decompressed) + + # Skip data before our range + if decompressed_pos + chunk_len <= start: + decompressed_pos += chunk_len + continue + + # Calculate how much of this chunk to skip + if decompressed_pos < start: + chunk_start = start - decompressed_pos + + # Calculate how much of this chunk to output + output_bytes = min(chunk_len - chunk_start, output_remaining) + + if output_bytes > 0: + yield decompressed[chunk_start:chunk_start + output_bytes] + output_remaining -= output_bytes + + decompressed_pos += chunk_len + + # Flush and handle remaining + if output_remaining > 0: + final = decompressor.flush() + if final: + # Apply same range logic to final chunk + chunk_len = len(final) + if decompressed_pos + chunk_len > start: + chunk_start = max(0, start - decompressed_pos) + output_bytes = min(chunk_len - chunk_start, output_remaining) + if output_bytes > 0: + yield final[chunk_start:chunk_start + output_bytes] + else: + raise InvalidZipError(f"Unsupported compression method: {entry.compression_method}") + + def _parse_eocd(self): + """Parse End of Central Directory record. + + Raises: + InvalidZipError: If EOCD not found or invalid + """ + if self._fh is None: + raise ZipReaderError("File not opened") + + # Search backwards from end of file for EOCD signature + search_size = min(MAX_EOCD_SEARCH_SIZE, self._file_size) + self._fh.seek(self._file_size - search_size) + data = self._fh.read(search_size) + + # Find EOCD signature (searching from end) + eocd_pos = data.rfind(ZIP_EOCD_SIG) + if eocd_pos == -1: + raise InvalidZipError("End of Central Directory not found") + + # Position in file + eocd_file_pos = self._file_size - search_size + eocd_pos + + # Parse EOCD (22 bytes minimum) + eocd = data[eocd_pos:eocd_pos + 22] + if len(eocd) < 22: + raise InvalidZipError("Truncated EOCD record") + + (disk_num, cd_disk, cd_entries_this_disk, cd_entries_total, + cd_size, cd_offset, comment_len) = struct.unpack(' 0: + comment_data = data[eocd_pos + 22:eocd_pos + 22 + comment_len] + if len(comment_data) == comment_len: + self._comment = comment_data.decode('utf-8', errors='replace') + + # Check for ZIP64 + if (cd_offset == ZIP64_MARKER or cd_size == ZIP64_MARKER or + cd_entries_total == ZIP64_MARKER_16): + self._is_zip64 = True + self._parse_zip64_eocd(eocd_file_pos) + else: + self._cd_offset = cd_offset + self._cd_size = cd_size + self._cd_entries_count = cd_entries_total + + def _parse_zip64_eocd(self, eocd_pos: int): + """Parse ZIP64 End of Central Directory records. + + Args: + eocd_pos: Position of standard EOCD in file + + Raises: + InvalidZipError: If ZIP64 records not found or invalid + """ + if self._fh is None: + raise ZipReaderError("File not opened") + + # Look for ZIP64 EOCD Locator (20 bytes before EOCD) + loc_pos = eocd_pos - 20 + if loc_pos < 0: + raise InvalidZipError("ZIP64 EOCD Locator not found") + + self._fh.seek(loc_pos) + locator = self._fh.read(20) + + if locator[:4] != ZIP_EOCD64_LOC_SIG: + raise InvalidZipError("Invalid ZIP64 EOCD Locator") + + # Parse locator to get ZIP64 EOCD offset + (zip64_disk, zip64_eocd_offset, total_disks) = struct.unpack( + ' tuple: + """Parse ZIP64 extra field to get actual values. + + Args: + extra: Extra field data + comp_size: Compressed size from CD (may be 0xFFFFFFFF) + uncomp_size: Uncompressed size from CD (may be 0xFFFFFFFF) + local_offset: Local header offset from CD (may be 0xFFFFFFFF) + + Returns: + Tuple of (actual_comp_size, actual_uncomp_size, actual_local_offset) + """ + offset = 0 + while offset + 4 <= len(extra): + header_id, data_size = struct.unpack(' bytes: + """Create a ZIP local file header.""" + crc = zlib.crc32(data) & 0xFFFFFFFF + + if compression == COMPRESSION_DEFLATE: + compressor = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, zlib.DEFLATED, -zlib.MAX_WBITS) + compressed = compressor.compress(data) + compressor.flush() + comp_size = len(compressed) + data_to_write = compressed + else: + comp_size = len(data) + data_to_write = data + + uncomp_size = len(data) + + header = struct.pack( + '<4sHHHHHLLLHH', + ZIP_LOCAL_HEADER_SIG, + 20, # version needed + 0, # flags + compression, + 0, # mod time + 0, # mod date + crc, + comp_size, + uncomp_size, + len(filename), + 0 # extra field length + ) + return header + filename + data_to_write, crc, comp_size, uncomp_size + + +def create_zip_cd_entry(filename: bytes, crc: int, comp_size: int, uncomp_size: int, + local_offset: int, compression: int = COMPRESSION_STORED) -> bytes: + """Create a ZIP central directory entry.""" + header = struct.pack( + '<4sHHHHHHLLLHHHHHLL', + ZIP_CD_SIG, + 20, # version made by + 20, # version needed + 0, # flags + compression, + 0, # mod time + 0, # mod date + crc, + comp_size, + uncomp_size, + len(filename), + 0, # extra field length + 0, # comment length + 0, # disk number start + 0, # internal attributes + 0, # external attributes + local_offset + ) + return header + filename + + +def create_zip_eocd(cd_entries: int, cd_size: int, cd_offset: int, comment: bytes = b'') -> bytes: + """Create a ZIP end of central directory record.""" + return struct.pack( + '<4sHHHHLLH', + ZIP_EOCD_SIG, + 0, # disk number + 0, # disk with CD + cd_entries, + cd_entries, + cd_size, + cd_offset, + len(comment) + ) + comment + + +def create_simple_zip(files: dict, comment: str = None) -> bytes: + """Create a simple ZIP file with the given files. + + Args: + files: Dictionary mapping filenames to file contents + comment: Optional ZIP comment + + Returns: + bytes: Complete ZIP file data + """ + data = io.BytesIO() + cd_entries = [] + + # Write local file headers and data + for filename, content in files.items(): + filename_bytes = filename.encode('utf-8') + offset = data.tell() + + content_bytes = content.encode('utf-8') if isinstance(content, str) else content + local_data, crc, comp_size, uncomp_size = create_zip_local_header( + filename_bytes, content_bytes + ) + data.write(local_data) + cd_entries.append((filename_bytes, crc, comp_size, uncomp_size, offset)) + + # Write central directory + cd_start = data.tell() + for filename_bytes, crc, comp_size, uncomp_size, offset in cd_entries: + cd_entry = create_zip_cd_entry(filename_bytes, crc, comp_size, uncomp_size, offset) + data.write(cd_entry) + cd_size = data.tell() - cd_start + + # Write EOCD + comment_bytes = comment.encode('utf-8') if comment else b'' + eocd = create_zip_eocd(len(files), cd_size, cd_start, comment_bytes) + data.write(eocd) + + return data.getvalue() + + +@pytest.fixture +def temp_zip_file(): + """Create a temporary ZIP file for testing.""" + files = { + 'readme.txt': 'This is a test file.', + 'data/file1.txt': 'File 1 content', + 'data/file2.txt': 'File 2 content', + } + comment = "Test ZIP archive" + + zip_data = create_simple_zip(files, comment) + + with tempfile.NamedTemporaryFile(suffix='.zip', delete=False) as f: + f.write(zip_data) + temp_path = f.name + + yield temp_path + + if os.path.exists(temp_path): + os.unlink(temp_path) + + +class TestZipReaderBasics: + """Test basic ZipReader functionality.""" + + def test_open_close(self, temp_zip_file): + """Test opening and closing ZIP file.""" + reader = ZipReader(temp_zip_file) + reader.open() + assert reader._fh is not None + assert reader.file_size > 0 + reader.close() + assert reader._fh is None + + def test_context_manager(self, temp_zip_file): + """Test using ZipReader as context manager.""" + with ZipReader(temp_zip_file) as reader: + assert reader._fh is not None + assert reader.file_size > 0 + assert reader._fh is None + + def test_file_not_found(self): + """Test opening non-existent file.""" + reader = ZipReader('/nonexistent/path/file.zip') + with pytest.raises(FileNotFoundError): + reader.open() + + def test_comment_property(self, temp_zip_file): + """Test accessing ZIP comment.""" + with ZipReader(temp_zip_file) as reader: + assert reader.comment == "Test ZIP archive" + + +class TestCentralDirectory: + """Test central directory parsing.""" + + def test_parse_central_directory(self, temp_zip_file): + """Test parsing central directory.""" + with ZipReader(temp_zip_file) as reader: + entries = reader.parse_central_directory() + assert 'readme.txt' in entries + assert 'data/file1.txt' in entries + assert 'data/file2.txt' in entries + + def test_entry_properties(self, temp_zip_file): + """Test ZipEntry properties.""" + with ZipReader(temp_zip_file) as reader: + entries = reader.parse_central_directory() + + readme = entries['readme.txt'] + assert readme.is_directory is False + assert readme.uncompressed_size == len('This is a test file.') + + def test_list_files(self, temp_zip_file): + """Test listing files in archive.""" + with ZipReader(temp_zip_file) as reader: + files = reader.list_files() + assert 'readme.txt' in files + assert 'data/file1.txt' in files + assert 'data/file2.txt' in files + + def test_list_files_with_prefix(self, temp_zip_file): + """Test listing files with prefix filter.""" + with ZipReader(temp_zip_file) as reader: + files = reader.list_files(prefix='data/') + assert 'data/file1.txt' in files + assert 'data/file2.txt' in files + assert 'readme.txt' not in files + + def test_get_entry(self, temp_zip_file): + """Test getting specific entry.""" + with ZipReader(temp_zip_file) as reader: + entry = reader.get_entry('readme.txt') + assert entry is not None + assert entry.filename == 'readme.txt' + + missing = reader.get_entry('nonexistent.txt') + assert missing is None + + def test_stop_condition(self, temp_zip_file): + """Test parsing with stop condition.""" + with ZipReader(temp_zip_file) as reader: + # Stop after first file using index + # Note: stop condition is checked AFTER adding the entry, + # so returning True at index 0 stops after the first entry + def stop_after_one(entry, index): + return index >= 0 # Stop after processing first entry (index 0) + + entries = reader.parse_central_directory(stop_condition=stop_after_one) + # Should have stopped after first entry + assert len(entries) == 1 + + def test_stop_condition_with_index(self, temp_zip_file): + """Test that stop condition receives correct index.""" + with ZipReader(temp_zip_file) as reader: + indices_seen = [] + + def track_indices(entry, index): + indices_seen.append(index) + return False # Don't stop + + reader.parse_central_directory(stop_condition=track_indices) + assert indices_seen == [0, 1, 2] # 3 files in temp_zip_file + + def test_max_entries(self, temp_zip_file): + """Test limiting entries with max_entries parameter.""" + with ZipReader(temp_zip_file) as reader: + entries = reader.parse_central_directory(max_entries=2) + assert len(entries) == 2 + + def test_max_entries_zero(self, temp_zip_file): + """Test max_entries=0 returns no entries.""" + with ZipReader(temp_zip_file) as reader: + entries = reader.parse_central_directory(max_entries=0) + assert len(entries) == 0 + + def test_max_entries_exceeds_total(self, temp_zip_file): + """Test max_entries larger than total entries.""" + with ZipReader(temp_zip_file) as reader: + # Archive has 3 files, requesting 100 + entries = reader.parse_central_directory(max_entries=100) + assert len(entries) == 3 + + def test_max_entries_with_stop_condition(self, temp_zip_file): + """Test that stop_condition and max_entries work together.""" + with ZipReader(temp_zip_file) as reader: + # Stop condition would stop at index 2, but max_entries=1 should stop first + def stop_at_two(entry, index): + return index >= 2 + + entries = reader.parse_central_directory( + stop_condition=stop_at_two, + max_entries=1 + ) + assert len(entries) == 1 + + +class TestFileReading: + """Test reading files from archive.""" + + def test_read_file(self, temp_zip_file): + """Test reading entire file.""" + with ZipReader(temp_zip_file) as reader: + content = reader.read_file('readme.txt') + assert content == b'This is a test file.' + + def test_read_nonexistent_file(self, temp_zip_file): + """Test reading nonexistent file.""" + with ZipReader(temp_zip_file) as reader: + with pytest.raises(FileNotFoundError): + reader.read_file('nonexistent.txt') + + def test_stream_file(self, temp_zip_file): + """Test streaming file content.""" + with ZipReader(temp_zip_file) as reader: + chunks = list(reader.stream_file('readme.txt', buffer_size=5)) + content = b''.join(chunks) + assert content == b'This is a test file.' + + +class TestRangeRequests: + """Test range request functionality.""" + + def test_stream_file_range(self, temp_zip_file): + """Test streaming a range of file content.""" + with ZipReader(temp_zip_file) as reader: + # "This is a test file." - get bytes 0-3 = "This" + content = b''.join(reader.stream_file_range('readme.txt', 0, 3)) + assert content == b'This' + + def test_stream_file_range_middle(self, temp_zip_file): + """Test streaming from middle of file.""" + with ZipReader(temp_zip_file) as reader: + # Get "test" + content = b''.join(reader.stream_file_range('readme.txt', 10, 13)) + assert content == b'test' + + def test_stream_file_range_invalid(self, temp_zip_file): + """Test invalid range requests.""" + with ZipReader(temp_zip_file) as reader: + with pytest.raises(ValueError): + list(reader.stream_file_range('readme.txt', -1, 5)) + + with pytest.raises(ValueError): + list(reader.stream_file_range('readme.txt', 10, 5)) + + +class TestCompression: + """Test handling of compressed files.""" + + def test_deflate_compression(self): + """Test reading DEFLATE compressed files.""" + content = b'Hello, this is some test content that should compress well. ' * 10 + filename = b'compressed.txt' + + data = io.BytesIO() + + # Write local header with compression + local_data, crc, comp_size, uncomp_size = create_zip_local_header( + filename, content, compression=COMPRESSION_DEFLATE + ) + data.write(local_data) + + # Write central directory + cd_start = data.tell() + cd_entry = create_zip_cd_entry(filename, crc, comp_size, uncomp_size, 0, compression=COMPRESSION_DEFLATE) + data.write(cd_entry) + cd_size = data.tell() - cd_start + + # Write EOCD + eocd = create_zip_eocd(1, cd_size, cd_start) + data.write(eocd) + + with tempfile.NamedTemporaryFile(suffix='.zip', delete=False) as f: + f.write(data.getvalue()) + temp_path = f.name + + try: + with ZipReader(temp_path) as reader: + read_content = reader.read_file('compressed.txt') + assert read_content == content + + # Test streaming too + streamed = b''.join(reader.stream_file('compressed.txt')) + assert streamed == content + finally: + os.unlink(temp_path) + + +class TestZipEntry: + """Test ZipEntry dataclass.""" + + def test_is_directory(self): + """Test is_directory property.""" + dir_entry = ZipEntry( + filename='folder/', + compressed_size=0, + uncompressed_size=0, + compression_method=0, + local_header_offset=0, + crc32=0 + ) + assert dir_entry.is_directory is True + + file_entry = ZipEntry( + filename='file.txt', + compressed_size=100, + uncompressed_size=100, + compression_method=0, + local_header_offset=0, + crc32=123456 + ) + assert file_entry.is_directory is False + + +class TestEdgeCases: + """Test edge cases and error handling.""" + + def test_empty_archive(self): + """Test handling of empty archive.""" + files = {} + zip_data = create_simple_zip(files) + + with tempfile.NamedTemporaryFile(suffix='.zip', delete=False) as f: + f.write(zip_data) + temp_path = f.name + + try: + with ZipReader(temp_path) as reader: + entries = reader.parse_central_directory() + assert len(entries) == 0 + files = reader.list_files() + assert len(files) == 0 + finally: + os.unlink(temp_path) + + def test_reader_not_opened(self): + """Test error when reader not opened.""" + reader = ZipReader('/some/path.zip') + with pytest.raises(ZipReaderError): + reader.parse_central_directory() + + def test_entries_property(self, temp_zip_file): + """Test entries property.""" + with ZipReader(temp_zip_file) as reader: + # Before parsing + assert reader.entries == {} + + # After parsing + reader.parse_central_directory() + assert len(reader.entries) == 3 + + def test_cd_entries_count(self, temp_zip_file): + """Test cd_entries_count property.""" + with ZipReader(temp_zip_file) as reader: + assert reader.cd_entries_count == 3 From 703e8a92df4d879fbb317a3060f7e991c48d953f Mon Sep 17 00:00:00 2001 From: Mark Kittisopikul Date: Wed, 28 Jan 2026 02:47:14 -0500 Subject: [PATCH 4/9] Update OZX implementation documentation --- docs/RFC-9-OZX-Implementation.md | 174 +++++++++++++++++++++++++++---- 1 file changed, 151 insertions(+), 23 deletions(-) diff --git a/docs/RFC-9-OZX-Implementation.md b/docs/RFC-9-OZX-Implementation.md index c539e537..7408815a 100644 --- a/docs/RFC-9-OZX-Implementation.md +++ b/docs/RFC-9-OZX-Implementation.md @@ -26,10 +26,14 @@ This document describes the implementation of RFC-9 support for reading OME-Zarr β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ Backend β”‚ β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ -β”‚ β”‚ /api/ozx-content/ │───▢│ OZXReader (ozxzip.py) β”‚ β”‚ -β”‚ β”‚ /api/ozx-metadata/ β”‚ β”‚ - ZIP64 support β”‚ β”‚ -β”‚ β”‚ /api/ozx-list/ β”‚ β”‚ - Partial CD parsing β”‚ β”‚ -β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ - Range request streaming β”‚ β”‚ +β”‚ β”‚ /api/ozx-content/ β”‚ β”‚ OZXReader (ozxzip.py) β”‚ β”‚ +β”‚ β”‚ /api/ozx-metadata/ │───▢│ - OME metadata parsing β”‚ β”‚ +β”‚ β”‚ /api/ozx-list/ β”‚ β”‚ - jsonFirst optimization β”‚ β”‚ +β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ β”‚ +β”‚ β”‚ ZipReader (zipread.py) β”‚ β”‚ +β”‚ β”‚ - ZIP64 support β”‚ β”‚ +β”‚ β”‚ - Partial CD parsing β”‚ β”‚ +β”‚ β”‚ - Range request streaming β”‚ β”‚ β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ ``` @@ -38,12 +42,14 @@ This document describes the implementation of RFC-9 support for reading OME-Zarr ### Backend (Python) -| File | Action | Description | -| ----------------------- | ---------- | ---------------------------------------- | -| `fileglancer/ozxzip.py` | **CREATE** | RFC-9 ZIP reader with partial CD parsing | -| `fileglancer/app.py` | MODIFY | Add `/api/ozx-*` endpoints | -| `fileglancer/model.py` | MODIFY | Add OZX Pydantic models | -| `tests/test_ozxzip.py` | **CREATE** | Unit tests for OZXReader (31 tests) | +| File | Action | Description | +| ------------------------ | ---------- | --------------------------------------------- | +| `fileglancer/zipread.py` | **CREATE** | Generic ZIP reader with streaming support | +| `fileglancer/ozxzip.py` | **CREATE** | RFC-9 OZX layer extending ZipReader | +| `fileglancer/app.py` | MODIFY | Add `/api/ozx-*` endpoints | +| `fileglancer/model.py` | MODIFY | Add OZX Pydantic models | +| `tests/test_zipread.py` | **CREATE** | Unit tests for generic ZipReader (27 tests) | +| `tests/test_ozxzip.py` | **CREATE** | Unit tests for OZXReader (31 tests) | ### Frontend (TypeScript) @@ -56,22 +62,95 @@ This document describes the implementation of RFC-9 support for reading OME-Zarr ## Backend Implementation Details -### OZXReader (`fileglancer/ozxzip.py`) +The backend uses a two-layer architecture separating generic ZIP functionality from OZX-specific features. + +### ZipReader (`fileglancer/zipread.py`) -The core ZIP reader implements: +Generic ZIP file reader providing: 1. **EOCD Parsing**: Locates End of Central Directory record by scanning backwards from file end 2. **ZIP64 Support**: Handles large archives with ZIP64 extended fields -3. **OME Metadata**: Parses ZIP comment for RFC-9 OME metadata JSON -4. **jsonFirst Optimization**: When `jsonFirst=true` in metadata, stops parsing central directory after last JSON file -5. **Compression**: Supports STORE (uncompressed) and DEFLATE compression methods -6. **Range Streaming**: Efficient byte-range streaming for HTTP Range requests +3. **Compression**: Supports STORE (uncompressed) and DEFLATE compression methods +4. **Range Streaming**: Efficient byte-range streaming for HTTP Range requests +5. **Flexible Parsing**: Supports `stop_condition` callback and `max_entries` limit -Key classes: +Key classes and functions: -- `OZXReader`: Main reader class with context manager support -- `OZXMetadata`: Parsed OME metadata from ZIP comment +- `ZipReader`: Generic ZIP reader with context manager support - `ZipEntry`: Individual file entry from central directory +- `ZipReaderError`, `InvalidZipError`: Exception classes + +#### Central Directory Parsing API + +```python +def parse_central_directory( + self, + stop_condition: Optional[Callable[[ZipEntry, int], bool]] = None, + max_entries: Optional[int] = None +) -> Dict[str, ZipEntry]: + """ + Parse the central directory. + + Args: + stop_condition: Optional callback receiving (entry, index). + Returns True to stop parsing after the current entry. + max_entries: Optional maximum number of entries to parse. + + Returns: + Dictionary mapping filenames to ZipEntry objects + """ +``` + +**Examples**: + +```python +# Parse all entries +entries = reader.parse_central_directory() + +# Stop after 100 entries +entries = reader.parse_central_directory(max_entries=100) + +# Stop when finding a specific file +def stop_at_target(entry, index): + return entry.filename == "target.json" +entries = reader.parse_central_directory(stop_condition=stop_at_target) + +# Stop after processing 5 JSON files +json_count = [0] +def stop_after_5_json(entry, index): + if entry.filename.endswith('.json'): + json_count[0] += 1 + return json_count[0] >= 5 +entries = reader.parse_central_directory(stop_condition=stop_after_5_json) +``` + +### OZXReader (`fileglancer/ozxzip.py`) + +Extends `ZipReader` with RFC-9 OZX-specific functionality: + +1. **OME Metadata**: Parses ZIP comment for RFC-9 OME metadata JSON +2. **jsonFirst Optimization**: When `jsonFirst=true` in metadata, stops parsing central directory after last JSON metadata file +3. **Metadata File Detection**: Identifies `.json`, `.zattrs`, `.zarray`, `.zgroup` files + +Key classes and functions: + +- `OZXReader`: Extends ZipReader with OZX-specific methods +- `OZXMetadata`: Parsed OME metadata from ZIP comment +- `is_json_metadata_file()`: Check if filename is a JSON metadata file +- `is_ozx_file()`: Check if filename has `.ozx` extension + +#### jsonFirst Optimization + +```python +with OZXReader(path) as reader: + metadata = reader.get_ome_metadata() + + # Parse only JSON metadata files (efficient for large archives) + if metadata and metadata.json_first: + entries = reader.parse_central_directory(json_only=True) + else: + entries = reader.parse_central_directory() +``` ### API Endpoints @@ -178,6 +257,33 @@ detectOzxZarrVersions(files: string[]): ('v3')[] Note: Unlike regular Zarr directories which can be v2 or v3, OZX files per RFC-9 only support Zarr v3 (OME-Zarr v0.5). The detection function only looks for `zarr.json` files and ignores Zarr v2 markers (`.zarray`, `.zattrs`, `.zgroup`). +## Modular Architecture + +The implementation separates generic ZIP functionality from OZX-specific features: + +``` +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ OZXReader β”‚ +β”‚ - OME metadata parsing β”‚ +β”‚ - jsonFirst optimization β”‚ +β”‚ - is_json_metadata_file() β”‚ +β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ +β”‚ ZipReader β”‚ +β”‚ - EOCD/ZIP64 parsing β”‚ +β”‚ - Central directory parsing β”‚ +β”‚ - stop_condition & max_entries β”‚ +β”‚ - File streaming & range requests β”‚ +β”‚ - STORE/DEFLATE compression β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ +``` + +**Benefits**: + +1. **Reusability**: `ZipReader` can be used for any ZIP file, not just OZX +2. **Testability**: Each layer has focused unit tests +3. **Extensibility**: New ZIP-based formats can extend `ZipReader` +4. **Separation of Concerns**: Generic ZIP logic is decoupled from OME-specific features + ## RFC-9 ZIP Comment Format The OZX file's ZIP comment contains OME metadata: @@ -202,19 +308,41 @@ When `jsonFirst` is true, JSON metadata files (.json, .zattrs, .zarray, .zgroup) ### Backend Tests ```bash +# Run all ZIP/OZX tests +pixi run -e test pytest tests/test_zipread.py tests/test_ozxzip.py -v + +# Run only generic ZIP tests +pixi run -e test pytest tests/test_zipread.py -v + +# Run only OZX-specific tests pixi run -e test pytest tests/test_ozxzip.py -v ``` +#### Generic ZipReader Tests (`test_zipread.py`) + Tests cover: - Basic reader operations (open, close, context manager) -- Metadata parsing (valid, missing, invalid JSON) -- Central directory parsing and jsonFirst optimization +- Central directory parsing +- `stop_condition` callback with index parameter +- `max_entries` limit parameter +- Combined `stop_condition` and `max_entries` +- File reading and streaming +- Range request streaming +- DEFLATE compression +- Edge cases (empty archive, unopened reader) + +#### OZX-Specific Tests (`test_ozxzip.py`) + +Tests cover: + +- OZX file detection utilities +- OME metadata parsing (valid, missing, invalid JSON) +- jsonFirst optimization - File reading (text, binary, compressed) - Range request streaming -- ZIP64 handling - Unicode filenames -- Edge cases (empty archive, unopened reader) +- Edge cases ### Frontend Tests From 58fdc8dceed3fb85f4f8eea9a95451f15afdf059 Mon Sep 17 00:00:00 2001 From: Mark Kittisopikul Date: Wed, 28 Jan 2026 03:48:01 -0500 Subject: [PATCH 5/9] Add central directory caching --- docs/RFC-9-OZX-Implementation.md | 52 +++++++++++------------ fileglancer/ozxzip.py | 17 ++++++-- fileglancer/zipread.py | 67 ++++++++++++++++++++---------- tests/test_zipread.py | 26 ++++++------ tests/test_zipread_resume.py | 71 ++++++++++++++++++++++++++++++++ 5 files changed, 170 insertions(+), 63 deletions(-) create mode 100644 tests/test_zipread_resume.py diff --git a/docs/RFC-9-OZX-Implementation.md b/docs/RFC-9-OZX-Implementation.md index 7408815a..4f6ef0ef 100644 --- a/docs/RFC-9-OZX-Implementation.md +++ b/docs/RFC-9-OZX-Implementation.md @@ -14,27 +14,27 @@ This document describes the implementation of RFC-9 support for reading OME-Zarr ``` β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” -β”‚ Frontend β”‚ -β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ +β”‚ Frontend β”‚ +β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ β”‚ β”‚ ozxDetection.ts │───▢│ OzxFetchStore│───▢│ zarrita/ome-zarrβ”‚ β”‚ -β”‚ β”‚ (detection) β”‚ β”‚ (custom store)β”‚ β”‚ (existing) β”‚ β”‚ -β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ -β”‚ β”‚ β”‚ -β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”‚β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ +β”‚ β”‚ (detection) β”‚ β”‚(custom store)β”‚ β”‚ (existing) β”‚ β”‚ +β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ +β”‚ β”‚ β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”‚β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ HTTP + Range requests β–Ό β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” -β”‚ Backend β”‚ -β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ -β”‚ β”‚ /api/ozx-content/ β”‚ β”‚ OZXReader (ozxzip.py) β”‚ β”‚ -β”‚ β”‚ /api/ozx-metadata/ │───▢│ - OME metadata parsing β”‚ β”‚ -β”‚ β”‚ /api/ozx-list/ β”‚ β”‚ - jsonFirst optimization β”‚ β”‚ -β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ β”‚ -β”‚ β”‚ ZipReader (zipread.py) β”‚ β”‚ -β”‚ β”‚ - ZIP64 support β”‚ β”‚ -β”‚ β”‚ - Partial CD parsing β”‚ β”‚ -β”‚ β”‚ - Range request streaming β”‚ β”‚ -β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ +β”‚ Backend β”‚ +β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ +β”‚ β”‚ /api/ozx-content/ β”‚ β”‚ OZXReader (ozxzip.py) β”‚ β”‚ +β”‚ β”‚ /api/ozx-metadata/ │───▢│ - OME metadata parsing β”‚ β”‚ +β”‚ β”‚ /api/ozx-list/ β”‚ β”‚ - jsonFirst optimization β”‚ β”‚ +β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ β”‚ +β”‚ β”‚ ZipReader (zipread.py) β”‚ β”‚ +β”‚ β”‚ - ZIP64 support β”‚ β”‚ +β”‚ β”‚ - Partial CD parsing β”‚ β”‚ +β”‚ β”‚ - Range request streaming β”‚ β”‚ +β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ ``` @@ -72,7 +72,7 @@ Generic ZIP file reader providing: 2. **ZIP64 Support**: Handles large archives with ZIP64 extended fields 3. **Compression**: Supports STORE (uncompressed) and DEFLATE compression methods 4. **Range Streaming**: Efficient byte-range streaming for HTTP Range requests -5. **Flexible Parsing**: Supports `stop_condition` callback and `max_entries` limit +5. **Flexible Parsing**: Supports `stop_condition` callback and `max_new_entries` limit Key classes and functions: @@ -86,7 +86,7 @@ Key classes and functions: def parse_central_directory( self, stop_condition: Optional[Callable[[ZipEntry, int], bool]] = None, - max_entries: Optional[int] = None + max_new_entries: Optional[int] = None ) -> Dict[str, ZipEntry]: """ Parse the central directory. @@ -94,7 +94,7 @@ def parse_central_directory( Args: stop_condition: Optional callback receiving (entry, index). Returns True to stop parsing after the current entry. - max_entries: Optional maximum number of entries to parse. + max_new_entries: Optional maximum number of entries to parse. Returns: Dictionary mapping filenames to ZipEntry objects @@ -108,7 +108,7 @@ def parse_central_directory( entries = reader.parse_central_directory() # Stop after 100 entries -entries = reader.parse_central_directory(max_entries=100) +entries = reader.parse_central_directory(max_new_entries=100) # Stop when finding a specific file def stop_at_target(entry, index): @@ -263,15 +263,15 @@ The implementation separates generic ZIP functionality from OZX-specific feature ``` β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” -β”‚ OZXReader β”‚ +β”‚ OZXReader β”‚ β”‚ - OME metadata parsing β”‚ β”‚ - jsonFirst optimization β”‚ β”‚ - is_json_metadata_file() β”‚ β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ -β”‚ ZipReader β”‚ +β”‚ ZipReader β”‚ β”‚ - EOCD/ZIP64 parsing β”‚ β”‚ - Central directory parsing β”‚ -β”‚ - stop_condition & max_entries β”‚ +β”‚ - stop_condition & max_new_entries β”‚ β”‚ - File streaming & range requests β”‚ β”‚ - STORE/DEFLATE compression β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ @@ -325,8 +325,8 @@ Tests cover: - Basic reader operations (open, close, context manager) - Central directory parsing - `stop_condition` callback with index parameter -- `max_entries` limit parameter -- Combined `stop_condition` and `max_entries` +- `max_new_entries` limit parameter +- Combined `stop_condition` and `max_new_entries` - File reading and streaming - Range request streaming - DEFLATE compression diff --git a/fileglancer/ozxzip.py b/fileglancer/ozxzip.py index d600ad68..4cd3e71e 100644 --- a/fileglancer/ozxzip.py +++ b/fileglancer/ozxzip.py @@ -124,7 +124,12 @@ def get_metadata(self) -> Optional[OZXMetadata]: """Alias for get_ome_metadata() for backward compatibility.""" return self.get_ome_metadata() - def parse_central_directory(self, json_only: bool = False) -> Dict[str, ZipEntry]: + def parse_central_directory( + self, + json_only: bool = False, + stop_condition: Optional[Callable[[ZipEntry, int], bool]] = None, + max_new_entries: Optional[int] = None + ) -> Dict[str, ZipEntry]: """ Parse the central directory with optional jsonFirst optimization. @@ -132,6 +137,8 @@ def parse_central_directory(self, json_only: bool = False) -> Dict[str, ZipEntry json_only: If True and jsonFirst=True in metadata, stop parsing after the last JSON metadata file. This is the RFC-9 optimization for efficient metadata discovery. + stop_condition: Optional callback (passed to parent). + max_new_entries: Optional maximum number of entries to parse (passed to parent). Returns: Dictionary mapping filenames to ZipEntry objects @@ -142,13 +149,17 @@ def parse_central_directory(self, json_only: bool = False) -> Dict[str, ZipEntry if json_only and self._ome_metadata and self._ome_metadata.json_first: # Use the stop condition to implement jsonFirst optimization def stop_at_non_json(entry: ZipEntry, index: int) -> bool: + # Check user's stop condition first + if stop_condition and stop_condition(entry, index): + return True + if entry.is_directory: return False return not is_json_metadata_file(entry.filename) - return super().parse_central_directory(stop_condition=stop_at_non_json) + return super().parse_central_directory(stop_condition=stop_at_non_json, max_new_entries=max_new_entries) else: - return super().parse_central_directory() + return super().parse_central_directory(stop_condition=stop_condition, max_new_entries=max_new_entries) def _parse_ome_comment(self, comment: str) -> Optional[OZXMetadata]: """Parse ZIP comment for RFC-9 OME metadata. diff --git a/fileglancer/zipread.py b/fileglancer/zipread.py index bd5f82cd..c9ae5ece 100644 --- a/fileglancer/zipread.py +++ b/fileglancer/zipread.py @@ -98,6 +98,8 @@ def __init__(self, file_path: str): self._cd_entries_count: int = 0 self._is_zip64: bool = False self._cd_parsed: bool = False + self._cd_next_offset: int = 0 + self._cd_entries_read_count: int = 0 def open(self) -> 'ZipReader': """Open the file and parse EOCD. @@ -155,20 +157,23 @@ def cd_entries_count(self) -> int: def parse_central_directory( self, stop_condition: Optional[Callable[[ZipEntry, int], bool]] = None, - max_entries: Optional[int] = None + max_new_entries: Optional[int] = None ) -> Dict[str, ZipEntry]: """ Parse the central directory. + Supports partial parsing and resuming. If called multiple times, + it resumes from where it left off, unless already fully parsed. + Args: stop_condition: Optional callback that receives each ZipEntry and its 0-based index. If it returns True, parsing stops early. Useful for optimizations like stopping after metadata files. - max_entries: Optional maximum number of entries to parse. If specified, - parsing stops after this many entries are processed. + max_new_entries: Optional maximum number of entries to parse in this call. + If specified, parsing stops after this many NEW entries are processed. Returns: - Dictionary mapping filenames to ZipEntry objects + Dictionary mapping filenames to ZipEntry objects (accumulated) Raises: InvalidZipError: If central directory is corrupted @@ -179,19 +184,19 @@ def parse_central_directory( if self._cd_parsed: return self._entries - self._fh.seek(self._cd_offset) - entries: Dict[str, ZipEntry] = {} + self._fh.seek(self._cd_next_offset) - # Determine the maximum entries to process - entries_to_process = self._cd_entries_count - if max_entries is not None: - entries_to_process = min(entries_to_process, max_entries) + start_index = self._cd_entries_read_count + remaining_entries = self._cd_entries_count - start_index - for i in range(self._cd_entries_count): - # Check max_entries limit - if max_entries is not None and i >= max_entries: - logger.debug(f"Reached max_entries limit ({max_entries})") - break + entries_to_read = remaining_entries + if max_new_entries is not None: + entries_to_read = min(remaining_entries, max_new_entries) + + entries_read_this_call = 0 + + while entries_read_this_call < entries_to_read: + i = start_index + entries_read_this_call # Read CD file header (46 bytes minimum) header = self._fh.read(46) @@ -215,6 +220,9 @@ def parse_central_directory( if comment_len > 0: self._fh.seek(comment_len, 1) + # Update next offset + self._cd_next_offset = self._fh.tell() + # Handle ZIP64 extra field if needed if comp_size == ZIP64_MARKER or uncomp_size == ZIP64_MARKER or local_offset == ZIP64_MARKER: comp_size, uncomp_size, local_offset = self._parse_zip64_extra( @@ -230,18 +238,19 @@ def parse_central_directory( extra_field=extra ) - entries[filename] = entry + self._entries[filename] = entry + self._cd_entries_read_count += 1 + entries_read_this_call += 1 # Check stop condition if stop_condition and stop_condition(entry, i): logger.debug(f"Stop condition met at index {i}, filename: {filename}") break - self._entries.update(entries) - if stop_condition is None and max_entries is None: + if self._cd_entries_read_count >= self._cd_entries_count: self._cd_parsed = True - return entries + return self._entries def list_files(self, prefix: str = "") -> List[str]: """List files in archive, optionally filtered by prefix. @@ -270,8 +279,20 @@ def get_entry(self, path: str) -> Optional[ZipEntry]: Returns: ZipEntry if found, None otherwise """ - if not self._cd_parsed: - self.parse_central_directory() + # Check if we already have it + if path in self._entries: + return self._entries[path] + + # If fully parsed and not found, it doesn't exist + if self._cd_parsed: + return None + + # Scan forward until we find it or finish + def stop_on_find(entry, idx): + return entry.filename == path + + self.parse_central_directory(stop_condition=stop_on_find) + return self._entries.get(path) def read_file(self, path: str) -> bytes: @@ -516,6 +537,10 @@ def _parse_eocd(self): self._cd_size = cd_size self._cd_entries_count = cd_entries_total + # Initialize partial parsing state + self._cd_next_offset = self._cd_offset + self._cd_entries_read_count = 0 + def _parse_zip64_eocd(self, eocd_pos: int): """Parse ZIP64 End of Central Directory records. diff --git a/tests/test_zipread.py b/tests/test_zipread.py index 2611270b..1e8c8fe7 100644 --- a/tests/test_zipread.py +++ b/tests/test_zipread.py @@ -257,35 +257,35 @@ def track_indices(entry, index): reader.parse_central_directory(stop_condition=track_indices) assert indices_seen == [0, 1, 2] # 3 files in temp_zip_file - def test_max_entries(self, temp_zip_file): - """Test limiting entries with max_entries parameter.""" + def test_max_new_entries(self, temp_zip_file): + """Test limiting entries with max_new_entries parameter.""" with ZipReader(temp_zip_file) as reader: - entries = reader.parse_central_directory(max_entries=2) + entries = reader.parse_central_directory(max_new_entries=2) assert len(entries) == 2 - def test_max_entries_zero(self, temp_zip_file): - """Test max_entries=0 returns no entries.""" + def test_max_new_entries_zero(self, temp_zip_file): + """Test max_new_entries=0 returns no entries.""" with ZipReader(temp_zip_file) as reader: - entries = reader.parse_central_directory(max_entries=0) + entries = reader.parse_central_directory(max_new_entries=0) assert len(entries) == 0 - def test_max_entries_exceeds_total(self, temp_zip_file): - """Test max_entries larger than total entries.""" + def test_max_new_entries_exceeds_total(self, temp_zip_file): + """Test max_new_entries larger than total entries.""" with ZipReader(temp_zip_file) as reader: # Archive has 3 files, requesting 100 - entries = reader.parse_central_directory(max_entries=100) + entries = reader.parse_central_directory(max_new_entries=100) assert len(entries) == 3 - def test_max_entries_with_stop_condition(self, temp_zip_file): - """Test that stop_condition and max_entries work together.""" + def test_max_new_entries_with_stop_condition(self, temp_zip_file): + """Test that stop_condition and max_new_entries work together.""" with ZipReader(temp_zip_file) as reader: - # Stop condition would stop at index 2, but max_entries=1 should stop first + # Stop condition would stop at index 2, but max_new_entries=1 should stop first def stop_at_two(entry, index): return index >= 2 entries = reader.parse_central_directory( stop_condition=stop_at_two, - max_entries=1 + max_new_entries=1 ) assert len(entries) == 1 diff --git a/tests/test_zipread_resume.py b/tests/test_zipread_resume.py new file mode 100644 index 00000000..e324eeb0 --- /dev/null +++ b/tests/test_zipread_resume.py @@ -0,0 +1,71 @@ +import zipfile +import pytest +from fileglancer.zipread import ZipReader + +@pytest.fixture +def sample_zip(tmp_path): + zip_path = tmp_path / "test.zip" + with zipfile.ZipFile(zip_path, 'w') as zf: + zf.writestr("file1.txt", "content1") + zf.writestr("file2.txt", "content2") + zf.writestr("file3.txt", "content3") + zf.writestr("file4.txt", "content4") + return str(zip_path) + +def test_resume_parsing(sample_zip): + with ZipReader(sample_zip) as reader: + # Read first 2 entries + entries = reader.parse_central_directory(max_new_entries=2) + assert len(entries) == 2 + assert "file1.txt" in entries + assert "file2.txt" in entries + assert "file3.txt" not in entries + + # Read next 1 entry + entries = reader.parse_central_directory(max_new_entries=1) + assert len(entries) == 3 + assert "file3.txt" in entries + + # Read rest + entries = reader.parse_central_directory() + assert len(entries) == 4 + assert "file4.txt" in entries + + # Check parsed flag + assert reader._cd_parsed + +def test_lazy_get_entry(sample_zip): + with ZipReader(sample_zip) as reader: + # We haven't parsed anything yet + assert len(reader._entries) == 0 + + # Request file3.txt + entry = reader.get_entry("file3.txt") + assert entry is not None + assert entry.filename == "file3.txt" + + # It should have parsed at least 3 entries + assert len(reader._entries) >= 3 + assert "file1.txt" in reader._entries + + # file4 shouldn't be parsed yet + assert "file4.txt" not in reader._entries + assert not reader._cd_parsed + +def test_resume_with_stop_condition(sample_zip): + with ZipReader(sample_zip) as reader: + # Stop at file2 + def stop_at_2(entry, idx): + return entry.filename == "file2.txt" + + entries = reader.parse_central_directory(stop_condition=stop_at_2) + assert "file2.txt" in entries + assert "file3.txt" not in entries + + # Resume and stop at file3 + def stop_at_3(entry, idx): + return entry.filename == "file3.txt" + + entries = reader.parse_central_directory(stop_condition=stop_at_3) + assert "file3.txt" in entries + assert "file4.txt" not in entries From f719b9857141610fefade31ce64b3062d165dc4a Mon Sep 17 00:00:00 2001 From: Mark Kittisopikul Date: Wed, 28 Jan 2026 07:24:30 -0500 Subject: [PATCH 6/9] Add frontend components to view within zip or ozx archives --- fileglancer/app.py | 36 +- fileglancer/ozxzip.py | 14 + .../components/ui/BrowsePage/FileViewer.tsx | 314 +++++++++++++++++- .../src/components/ui/Table/TableCard.tsx | 2 +- frontend/src/queries/fileContentQueries.ts | 5 +- frontend/src/queries/ozxQueries.ts | 104 +++++- frontend/src/utils/ozxDetection.ts | 20 ++ 7 files changed, 478 insertions(+), 17 deletions(-) diff --git a/fileglancer/app.py b/fileglancer/app.py index 9f0276be..fa3ab2d8 100644 --- a/fileglancer/app.py +++ b/fileglancer/app.py @@ -34,7 +34,7 @@ from fileglancer.utils import format_timestamp, guess_content_type, parse_range_header from fileglancer.user_context import UserContext, EffectiveUserContext, CurrentUserContext, UserContextConfigurationError from fileglancer.filestore import Filestore, RootCheckError -from fileglancer.ozxzip import OZXReader, OZXReaderError, InvalidZipError, is_ozx_file +from fileglancer.ozxzip import OZXReader, OZXReaderError, InvalidZipError, is_ozx_file, is_zip_file from fileglancer.log import AccessLogMiddleware from x2s3.utils import get_read_access_acl, get_nosuchbucket_response, get_error_response @@ -1235,7 +1235,7 @@ async def head_ozx_file_content( except RootCheckError as e: raise HTTPException(status_code=400, detail=str(e)) - if not is_ozx_file(ozx_file_path): + if not is_zip_file(ozx_file_path): raise HTTPException(status_code=400, detail="Not an OZX file") try: @@ -1298,7 +1298,7 @@ async def get_ozx_file_content( except RootCheckError as e: raise HTTPException(status_code=400, detail=str(e)) - if not is_ozx_file(ozx_file_path): + if not is_zip_file(ozx_file_path): raise HTTPException(status_code=400, detail="Not an OZX file") try: @@ -1409,7 +1409,7 @@ async def get_ozx_metadata( except RootCheckError as e: raise HTTPException(status_code=400, detail=str(e)) - if not is_ozx_file(ozx_file_path): + if not is_zip_file(ozx_file_path): raise HTTPException(status_code=400, detail="Not an OZX file") try: @@ -1445,11 +1445,13 @@ async def get_ozx_metadata( async def list_ozx_files( path_name: str, prefix: str = Query('', description="Filter files by prefix"), + details: bool = Query(False, description="Include file details (size, compression)"), username: str = Depends(get_current_user) ): """ List files in an OZX archive. Optionally filter by path prefix. + If details=True, returns full file entry information including size. """ filestore_name, _, ozx_subpath = path_name.partition('/') @@ -1464,7 +1466,7 @@ async def list_ozx_files( except RootCheckError as e: raise HTTPException(status_code=400, detail=str(e)) - if not is_ozx_file(ozx_file_path): + if not is_zip_file(ozx_file_path): raise HTTPException(status_code=400, detail="Not an OZX file") try: @@ -1477,9 +1479,27 @@ async def list_ozx_files( # List files outside user context try: - files = reader.list_files(prefix) - reader.close() - return {"files": files} + if details: + # Return full file entry details + reader.parse_central_directory() + entries = [] + for filename, entry in reader.entries.items(): + if prefix and not filename.startswith(prefix): + continue + entries.append({ + "filename": entry.filename, + "compressed_size": entry.compressed_size, + "uncompressed_size": entry.uncompressed_size, + "compression_method": entry.compression_method, + "is_directory": entry.is_directory + }) + reader.close() + return {"entries": entries} + else: + # Return just filenames for backward compatibility + files = reader.list_files(prefix) + reader.close() + return {"files": files} except Exception as e: reader.close() diff --git a/fileglancer/ozxzip.py b/fileglancer/ozxzip.py index 4cd3e71e..39b2b00a 100644 --- a/fileglancer/ozxzip.py +++ b/fileglancer/ozxzip.py @@ -230,6 +230,19 @@ def is_ozx_file(filename: str) -> bool: return filename.lower().endswith('.ozx') +def is_zip_file(filename: str) -> bool: + """Check if a filename has a .zip or .ozx extension. + + Args: + filename: Filename to check + + Returns: + True if the file has a .zip or .ozx extension + """ + name = filename.lower() + return name.endswith('.zip') or name.endswith('.ozx') + + # Re-export commonly used items from zipread for convenience __all__ = [ 'OZXReader', @@ -237,6 +250,7 @@ def is_ozx_file(filename: str) -> bool: 'OZXReaderError', 'InvalidOZXError', 'is_ozx_file', + 'is_zip_file', 'is_json_metadata_file', # Re-exports from zipread 'ZipReader', diff --git a/frontend/src/components/ui/BrowsePage/FileViewer.tsx b/frontend/src/components/ui/BrowsePage/FileViewer.tsx index 0fd1e82f..d9e1d2d9 100644 --- a/frontend/src/components/ui/BrowsePage/FileViewer.tsx +++ b/frontend/src/components/ui/BrowsePage/FileViewer.tsx @@ -1,5 +1,11 @@ -import { useEffect, useState } from 'react'; -import { Switch, Typography } from '@material-tailwind/react'; +import { useEffect, useState, useMemo } from 'react'; +import { Switch, Typography, IconButton } from '@material-tailwind/react'; +import { + HiOutlineFolder, + HiOutlineDocument, + HiArrowLeft, + HiOutlineDownload +} from 'react-icons/hi'; import { Prism as SyntaxHighlighter } from 'react-syntax-highlighter'; import { materialDark, @@ -10,11 +16,308 @@ import { useFileBrowserContext } from '@/contexts/FileBrowserContext'; import { formatFileSize, formatUnixTimestamp } from '@/utils'; import type { FileOrFolder } from '@/shared.types'; import { useFileContentQuery } from '@/queries/fileContentQueries'; +import { + useOzxFileEntriesQuery, + useOzxFileContentQuery, + buildOzxContentUrl +} from '@/queries/ozxQueries'; +import { isAnyZipFile, getOzxFilePath } from '@/utils/ozxDetection'; type FileViewerProps = { readonly file: FileOrFolder; }; +const InternalFileViewer = ({ + fspName, + ozxPath, + internalPath, + onBack +}: { + readonly fspName: string; + readonly ozxPath: string; + readonly internalPath: string; + readonly onBack: () => void; +}) => { + const { data, isLoading, error } = useOzxFileContentQuery( + fspName, + ozxPath, + internalPath + ); + const [isDarkMode, setIsDarkMode] = useState(false); + + useEffect(() => { + const checkDarkMode = () => + setIsDarkMode(document.documentElement.classList.contains('dark')); + checkDarkMode(); + const observer = new MutationObserver(checkDarkMode); + observer.observe(document.documentElement, { + attributes: true, + attributeFilter: ['class'] + }); + return () => observer.disconnect(); + }, []); + + if (isLoading) { + return
Loading content...
; + } + if (error) { + return
Error: {error.message}
; + } + + const content = data ? new TextDecoder().decode(data) : ''; + const language = getLanguageFromExtension(internalPath); + + return ( +
+
+ + + + + {internalPath} + +
+
+ + {content} + +
+
+ ); +}; + +type ZipBrowserItem = { + name: string; + path: string; + isDir: boolean; + size: number; +}; + +const ZipBrowser = ({ file }: { readonly file: FileOrFolder }) => { + const { fspName } = useFileBrowserContext(); + const ozxPath = getOzxFilePath(file); + const { + data: allEntries, + isLoading, + error + } = useOzxFileEntriesQuery(fspName, ozxPath); + const [internalPath, setInternalPath] = useState(''); + const [selectedFile, setSelectedFile] = useState(null); + + const items = useMemo(() => { + if (!allEntries) { + return []; + } + + const folders = new Map(); // path -> total size of contents + const files: ZipBrowserItem[] = []; + + allEntries.forEach(entry => { + const filename = entry.filename; + if (!filename.startsWith(internalPath)) { + return; + } + + const relative = filename.slice(internalPath.length); + const slashIndex = relative.indexOf('/'); + + if (slashIndex === -1) { + // Direct file in current directory + if (relative !== '' && !entry.is_directory) { + files.push({ + name: relative, + path: filename, + isDir: false, + size: entry.uncompressed_size + }); + } + } else { + // File in a subdirectory - track the folder + const folderPath = internalPath + relative.slice(0, slashIndex + 1); + const currentSize = folders.get(folderPath) || 0; + folders.set(folderPath, currentSize + entry.uncompressed_size); + } + }); + + const folderItems: ZipBrowserItem[] = Array.from(folders.entries()) + .sort(([a], [b]) => a.localeCompare(b)) + .map(([path, size]) => ({ + name: path.slice(internalPath.length).replace(/\/$/, ''), + path, + isDir: true, + size + })); + + const fileItems = files.sort((a, b) => a.name.localeCompare(b.name)); + + return [...folderItems, ...fileItems]; + }, [allEntries, internalPath]); + + if (isLoading) { + return ( +
+ + Loading archive contents... + +
+ ); + } + + if (error) { + return ( +
+ Error: {error.message} +
+ ); + } + + if (selectedFile && fspName) { + return ( + setSelectedFile(null)} + ozxPath={ozxPath} + /> + ); + } + + const navigateUp = () => { + const parts = internalPath.split('/').filter(Boolean); + parts.pop(); + setInternalPath(parts.length > 0 ? parts.join('/') + '/' : ''); + }; + + const handleDownload = (itemPath: string, itemName: string) => { + if (!fspName) { + return; + } + const url = buildOzxContentUrl(fspName, ozxPath, itemPath); + const link = document.createElement('a'); + link.href = url; + link.download = itemName; + document.body.appendChild(link); + link.click(); + document.body.removeChild(link); + }; + + return ( +
+ {/* Breadcrumb header */} +
+ {internalPath ? ( + + + + ) : null} + + {file.name}/{internalPath} + +
+ + {/* Table view */} +
+ + + + + + + + + + + {items.map(item => ( + { + if (item.isDir) { + setInternalPath(item.path); + } else { + setSelectedFile(item.path); + } + }} + > + + + + + + ))} + {items.length === 0 ? ( + + + + ) : null} + +
+ Name + + Type + + Size + + Actions +
+
+ {item.isDir ? ( + + ) : ( + + )} + + {item.name} + +
+
+ + {item.isDir ? 'Folder' : 'File'} + + + + {formatFileSize(item.size)} + + + {!item.isDir ? ( + { + e.stopPropagation(); + handleDownload(item.path, item.name); + }} + size="sm" + variant="ghost" + > + + + ) : null} +
+ This folder is empty +
+
+
+ ); +}; + // Map file extensions to syntax highlighter languages const getLanguageFromExtension = (filename: string): string => { const extension = filename.split('.').pop()?.toLowerCase() || ''; @@ -80,7 +383,8 @@ export default function FileViewer({ file }: FileViewerProps) { const [isDarkMode, setIsDarkMode] = useState(false); const [formatJson, setFormatJson] = useState(true); - const contentQuery = useFileContentQuery(fspName, file.path); + const isZip = isAnyZipFile(file); + const contentQuery = useFileContentQuery(fspName, file.path, !isZip); const language = getLanguageFromExtension(file.name); const isJsonFile = language === 'json'; @@ -101,6 +405,10 @@ export default function FileViewer({ file }: FileViewerProps) { }, []); const renderViewer = () => { + if (isAnyZipFile(file)) { + return ; + } + if (contentQuery.isLoading) { return (
diff --git a/frontend/src/components/ui/Table/TableCard.tsx b/frontend/src/components/ui/Table/TableCard.tsx index a367645a..cfbcd017 100644 --- a/frontend/src/components/ui/Table/TableCard.tsx +++ b/frontend/src/components/ui/Table/TableCard.tsx @@ -62,7 +62,7 @@ declare module '@tanstack/react-table' { data: CellContextMenuData ) => void; } - // eslint-disable-next-line @typescript-eslint/no-unused-vars + interface ColumnMeta { // Optional function to extract searchable values from a cell // Used by globalFilterFn to allow columns to define custom search behavior diff --git a/frontend/src/queries/fileContentQueries.ts b/frontend/src/queries/fileContentQueries.ts index 2e71fb7c..fb69b405 100644 --- a/frontend/src/queries/fileContentQueries.ts +++ b/frontend/src/queries/fileContentQueries.ts @@ -48,7 +48,8 @@ async function fetchFileWithTextDetection( export function useFileContentQuery( fspName: string | undefined, - filePath: string + filePath: string, + enabled: boolean = true ): UseQueryResult { return useQuery({ queryKey: fileContentQueryKeys.detail(fspName || '', filePath), @@ -58,7 +59,7 @@ export function useFileContentQuery( }); return content; }, - enabled: !!fspName && !!filePath, + enabled: enabled && !!fspName && !!filePath, retry: (failureCount, error) => { // Do not retry on permission errors if ( diff --git a/frontend/src/queries/ozxQueries.ts b/frontend/src/queries/ozxQueries.ts index e89dafd3..e973fca3 100644 --- a/frontend/src/queries/ozxQueries.ts +++ b/frontend/src/queries/ozxQueries.ts @@ -20,6 +20,17 @@ export type OzxMetadataResponse = { is_zip64: boolean; }; +/** + * A file entry within an OZX archive with full details. + */ +export type OzxFileEntry = { + filename: string; + compressed_size: number; + uncompressed_size: number; + compression_method: number; + is_directory: boolean; +}; + /** * Build URL for accessing content within an OZX file. * @@ -73,11 +84,22 @@ export function buildOzxMetadataUrl( export function buildOzxListUrl( fspName: string, ozxFilePath: string, - prefix?: string + prefix?: string, + details?: boolean ): string { const pathSegment = `${fspName}/${ozxFilePath}`; - const params = prefix ? { prefix } : null; - return buildUrl('/api/ozx-list/', pathSegment, params); + const params: Record = {}; + if (prefix) { + params.prefix = prefix; + } + if (details) { + params.details = 'true'; + } + return buildUrl( + '/api/ozx-list/', + pathSegment, + Object.keys(params).length > 0 ? params : null + ); } /** @@ -135,6 +157,21 @@ async function fetchOzxFileList( return response.files; } +/** + * Fetch detailed file entries from an OZX archive. + */ +async function fetchOzxFileEntries( + fspName: string, + ozxFilePath: string, + prefix?: string +): Promise { + const url = buildOzxListUrl(fspName, ozxFilePath, prefix, true); + const response = (await sendRequestAndThrowForNotOk(url, 'GET')) as { + entries: OzxFileEntry[]; + }; + return response.entries; +} + /** * Hook to fetch list of files in an OZX archive. * @@ -162,6 +199,39 @@ export function useOzxFileListQuery( }); } +/** + * Hook to fetch detailed file entries from an OZX archive. + * + * @param fspName - The file share path name + * @param ozxFilePath - Path to the OZX file within the FSP + * @param prefix - Optional prefix to filter files + * @param enabled - Whether the query should be enabled + */ +export function useOzxFileEntriesQuery( + fspName: string | undefined, + ozxFilePath: string | undefined, + prefix?: string, + enabled: boolean = true +): UseQueryResult { + return useQuery({ + queryKey: [ + 'ozx', + 'entries', + fspName || '', + ozxFilePath || '', + prefix || '' + ], + queryFn: async () => { + if (!fspName || !ozxFilePath) { + throw new Error('fspName and ozxFilePath are required'); + } + return await fetchOzxFileEntries(fspName, ozxFilePath, prefix); + }, + enabled: enabled && !!fspName && !!ozxFilePath, + staleTime: 5 * 60 * 1000 + }); +} + /** * Fetch content from within an OZX file. * Supports optional range requests. @@ -327,6 +397,34 @@ export class OzxFetchStore { } } +/** + * Hook to fetch content of a file within an OZX archive. + */ +export function useOzxFileContentQuery( + fspName: string | undefined, + ozxFilePath: string | undefined, + internalPath: string | undefined, + enabled: boolean = true +): UseQueryResult { + return useQuery({ + queryKey: [ + 'ozx', + 'content', + fspName || '', + ozxFilePath || '', + internalPath || '' + ], + queryFn: async () => { + if (!fspName || !ozxFilePath || !internalPath) { + throw new Error('fspName, ozxFilePath, and internalPath are required'); + } + return await fetchOzxContent(fspName, ozxFilePath, internalPath); + }, + enabled: enabled && !!fspName && !!ozxFilePath && !!internalPath, + staleTime: 5 * 60 * 1000 + }); +} + /** * Create an OzxFetchStore for the given file. * This is a factory function for creating stores. diff --git a/frontend/src/utils/ozxDetection.ts b/frontend/src/utils/ozxDetection.ts index 2f9c8b68..c721596b 100644 --- a/frontend/src/utils/ozxDetection.ts +++ b/frontend/src/utils/ozxDetection.ts @@ -26,6 +26,26 @@ export function isOzxFilename(filename: string): boolean { return filename.toLowerCase().endsWith('.ozx'); } +/** + * Check if a file is a regular ZIP file by extension. + * + * @param file - The file to check + * @returns True if the file has a .zip extension + */ +export function isZipFile(file: FileOrFolder): boolean { + return !file.is_dir && file.name.toLowerCase().endsWith('.zip'); +} + +/** + * Check if a file is either an OZX or a ZIP file. + * + * @param file - The file to check + * @returns True if the file is an OZX or a ZIP file + */ +export function isAnyZipFile(file: FileOrFolder): boolean { + return isOzxFile(file) || isZipFile(file); +} + /** * Check if a list of files contains any OZX files. * From fb5ecc7b499012f39c2201b59f1455f02651d024 Mon Sep 17 00:00:00 2001 From: Mark Kittisopikul Date: Wed, 28 Jan 2026 09:04:31 -0500 Subject: [PATCH 7/9] Add progressive loading for ZIP/OZX archive entries Large archives with thousands of entries can be slow to load. This adds pagination support to load entries incrementally: - Backend: /api/ozx-list now accepts offset/limit params and returns total_count, has_more for pagination - Frontend: New useOzxFileEntriesInfiniteQuery hook with TanStack Query - ZipBrowser: Shows "Load more" button and entry count progress Initial load fetches 100 entries, with more loaded on demand. Co-Authored-By: Claude Opus 4.5 --- fileglancer/app.py | 64 +++++++++-- fileglancer/ozxzip.py | 2 +- .../components/ui/BrowsePage/FileViewer.tsx | 56 +++++++-- frontend/src/queries/ozxQueries.ts | 107 +++++++++++++++++- 4 files changed, 207 insertions(+), 22 deletions(-) diff --git a/fileglancer/app.py b/fileglancer/app.py index fa3ab2d8..9f1f77a7 100644 --- a/fileglancer/app.py +++ b/fileglancer/app.py @@ -1446,12 +1446,24 @@ async def list_ozx_files( path_name: str, prefix: str = Query('', description="Filter files by prefix"), details: bool = Query(False, description="Include file details (size, compression)"), + offset: int = Query(0, ge=0, description="Number of entries to skip"), + limit: int = Query(100, ge=1, le=1000, description="Maximum entries to return"), username: str = Depends(get_current_user) ): """ - List files in an OZX archive. + List files in an OZX archive with pagination support. Optionally filter by path prefix. If details=True, returns full file entry information including size. + + Pagination: + - offset: Number of entries to skip (default 0) + - limit: Maximum entries to return (default 100, max 1000) + + Response includes: + - total_count: Total number of entries in the archive + - offset: Current offset + - limit: Current limit + - has_more: Whether more entries exist beyond this page """ filestore_name, _, ozx_subpath = path_name.partition('/') @@ -1479,13 +1491,29 @@ async def list_ozx_files( # List files outside user context try: + # Get total count from central directory metadata (available after open) + total_count = reader.cd_entries_count + + # Parse entries up to offset + limit + reader.parse_central_directory(max_new_entries=offset + limit) + + # Get all parsed entries as a list (preserves CD order) + all_entries = list(reader.entries.values()) + + # Apply offset and limit + paginated_entries = all_entries[offset:offset + limit] + + # Calculate has_more + has_more = offset + limit < total_count + if details: - # Return full file entry details - reader.parse_central_directory() + # Apply prefix filter if specified + if prefix: + paginated_entries = [e for e in paginated_entries if e.filename.startswith(prefix)] + + # Return full file entry details with pagination info entries = [] - for filename, entry in reader.entries.items(): - if prefix and not filename.startswith(prefix): - continue + for entry in paginated_entries: entries.append({ "filename": entry.filename, "compressed_size": entry.compressed_size, @@ -1494,12 +1522,28 @@ async def list_ozx_files( "is_directory": entry.is_directory }) reader.close() - return {"entries": entries} + return { + "entries": entries, + "total_count": total_count, + "offset": offset, + "limit": limit, + "has_more": has_more + } else: - # Return just filenames for backward compatibility - files = reader.list_files(prefix) + # Apply prefix filter if specified + if prefix: + paginated_entries = [e for e in paginated_entries if e.filename.startswith(prefix)] + + # Return just filenames with pagination info + files = [e.filename for e in paginated_entries if not e.is_directory] reader.close() - return {"files": files} + return { + "files": files, + "total_count": total_count, + "offset": offset, + "limit": limit, + "has_more": has_more + } except Exception as e: reader.close() diff --git a/fileglancer/ozxzip.py b/fileglancer/ozxzip.py index 39b2b00a..1172b650 100644 --- a/fileglancer/ozxzip.py +++ b/fileglancer/ozxzip.py @@ -10,7 +10,7 @@ import json from dataclasses import dataclass -from typing import Optional, Dict +from typing import Optional, Dict, Callable from loguru import logger diff --git a/frontend/src/components/ui/BrowsePage/FileViewer.tsx b/frontend/src/components/ui/BrowsePage/FileViewer.tsx index d9e1d2d9..f831ef87 100644 --- a/frontend/src/components/ui/BrowsePage/FileViewer.tsx +++ b/frontend/src/components/ui/BrowsePage/FileViewer.tsx @@ -17,10 +17,11 @@ import { formatFileSize, formatUnixTimestamp } from '@/utils'; import type { FileOrFolder } from '@/shared.types'; import { useFileContentQuery } from '@/queries/fileContentQueries'; import { - useOzxFileEntriesQuery, + useOzxFileEntriesInfiniteQuery, useOzxFileContentQuery, buildOzxContentUrl } from '@/queries/ozxQueries'; +import type { OzxFileEntry } from '@/queries/ozxQueries'; import { isAnyZipFile, getOzxFilePath } from '@/utils/ozxDetection'; type FileViewerProps = { @@ -107,15 +108,30 @@ const ZipBrowser = ({ file }: { readonly file: FileOrFolder }) => { const { fspName } = useFileBrowserContext(); const ozxPath = getOzxFilePath(file); const { - data: allEntries, + data, isLoading, - error - } = useOzxFileEntriesQuery(fspName, ozxPath); + error, + fetchNextPage, + hasNextPage, + isFetchingNextPage + } = useOzxFileEntriesInfiniteQuery(fspName, ozxPath, 100); const [internalPath, setInternalPath] = useState(''); const [selectedFile, setSelectedFile] = useState(null); + // Flatten all pages into a single array of entries + const allEntries = useMemo(() => { + if (!data?.pages) { + return []; + } + return data.pages.flatMap(page => page.entries); + }, [data]); + + // Get total count from the first page (same across all pages) + const totalCount = data?.pages[0]?.total_count ?? 0; + const loadedCount = allEntries.length; + const items = useMemo(() => { - if (!allEntries) { + if (!allEntries.length) { return []; } @@ -213,7 +229,7 @@ const ZipBrowser = ({ file }: { readonly file: FileOrFolder }) => { return (
- {/* Breadcrumb header */} + {/* Breadcrumb header with progress indicator */}
{internalPath ? ( { ) : null} - + {file.name}/{internalPath} + {totalCount > 0 ? ( + + {loadedCount} of {totalCount} entries + + ) : null}
{/* Table view */} @@ -304,7 +328,7 @@ const ZipBrowser = ({ file }: { readonly file: FileOrFolder }) => { ))} - {items.length === 0 ? ( + {items.length === 0 && !hasNextPage ? ( This folder is empty @@ -313,6 +337,22 @@ const ZipBrowser = ({ file }: { readonly file: FileOrFolder }) => { ) : null} + + {/* Load more button */} + {hasNextPage ? ( +
+ +
+ ) : null}
); diff --git a/frontend/src/queries/ozxQueries.ts b/frontend/src/queries/ozxQueries.ts index e973fca3..5ee02ae2 100644 --- a/frontend/src/queries/ozxQueries.ts +++ b/frontend/src/queries/ozxQueries.ts @@ -4,8 +4,12 @@ * RFC-9 Spec: https://ngff.openmicroscopy.org/rfc/9/index.html */ -import { useQuery } from '@tanstack/react-query'; -import type { UseQueryResult } from '@tanstack/react-query'; +import { useQuery, useInfiniteQuery } from '@tanstack/react-query'; +import type { + UseQueryResult, + UseInfiniteQueryResult, + InfiniteData +} from '@tanstack/react-query'; import { default as log } from '@/logger'; import { buildUrl, sendFetchRequest } from '@/utils'; import { sendRequestAndThrowForNotOk } from './queryUtils'; @@ -31,6 +35,17 @@ export type OzxFileEntry = { is_directory: boolean; }; +/** + * Paginated response for file entries from OZX archive. + */ +export type OzxFileEntriesPage = { + entries: OzxFileEntry[]; + total_count: number; + offset: number; + limit: number; + has_more: boolean; +}; + /** * Build URL for accessing content within an OZX file. * @@ -80,12 +95,21 @@ export function buildOzxMetadataUrl( /** * Build URL for listing files in an OZX archive. + * + * @param fspName - The file share path name + * @param ozxFilePath - Path to the OZX file within the FSP + * @param prefix - Optional prefix to filter files + * @param details - If true, include full file entry details + * @param offset - Number of entries to skip (for pagination) + * @param limit - Maximum entries to return (for pagination) */ export function buildOzxListUrl( fspName: string, ozxFilePath: string, prefix?: string, - details?: boolean + details?: boolean, + offset?: number, + limit?: number ): string { const pathSegment = `${fspName}/${ozxFilePath}`; const params: Record = {}; @@ -95,6 +119,12 @@ export function buildOzxListUrl( if (details) { params.details = 'true'; } + if (offset !== undefined) { + params.offset = String(offset); + } + if (limit !== undefined) { + params.limit = String(limit); + } return buildUrl( '/api/ozx-list/', pathSegment, @@ -232,6 +262,77 @@ export function useOzxFileEntriesQuery( }); } +/** + * Fetch a page of detailed file entries from an OZX archive. + */ +async function fetchOzxFileEntriesPage( + fspName: string, + ozxFilePath: string, + offset: number, + limit: number, + prefix?: string +): Promise { + const url = buildOzxListUrl( + fspName, + ozxFilePath, + prefix, + true, + offset, + limit + ); + const response = (await sendRequestAndThrowForNotOk( + url, + 'GET' + )) as OzxFileEntriesPage; + return response; +} + +/** + * Hook to fetch detailed file entries from an OZX archive with infinite scrolling. + * Loads entries progressively as user requests more. + * + * @param fspName - The file share path name + * @param ozxFilePath - Path to the OZX file within the FSP + * @param pageSize - Number of entries per page (default 100) + * @param enabled - Whether the query should be enabled + */ +export function useOzxFileEntriesInfiniteQuery( + fspName: string | undefined, + ozxFilePath: string | undefined, + pageSize: number = 100, + enabled: boolean = true +): UseInfiniteQueryResult, Error> { + return useInfiniteQuery({ + queryKey: [ + 'ozx', + 'entries-infinite', + fspName || '', + ozxFilePath || '', + pageSize + ], + queryFn: async ({ pageParam = 0 }) => { + if (!fspName || !ozxFilePath) { + throw new Error('fspName and ozxFilePath are required'); + } + return await fetchOzxFileEntriesPage( + fspName, + ozxFilePath, + pageParam, + pageSize + ); + }, + initialPageParam: 0, + getNextPageParam: lastPage => { + if (lastPage.has_more) { + return lastPage.offset + lastPage.limit; + } + return undefined; + }, + enabled: enabled && !!fspName && !!ozxFilePath, + staleTime: 5 * 60 * 1000 + }); +} + /** * Fetch content from within an OZX file. * Supports optional range requests. From 1a904d9db42d554bac472c856945fb12376c1601 Mon Sep 17 00:00:00 2001 From: Mark Kittisopikul Date: Wed, 28 Jan 2026 10:09:41 -0500 Subject: [PATCH 8/9] Rename generic ozx components to zip if not ozx specific --- docs/RFC-9-OZX-Implementation.md | 16 +- fileglancer/app.py | 74 +++---- .../__tests__/unitTests/ozxDetection.test.ts | 8 +- .../components/ui/BrowsePage/FileViewer.tsx | 20 +- frontend/src/queries/ozxQueries.ts | 188 +++++++++--------- frontend/src/queries/zarrQueries.ts | 10 +- frontend/src/utils/ozxDetection.ts | 4 +- 7 files changed, 160 insertions(+), 160 deletions(-) diff --git a/docs/RFC-9-OZX-Implementation.md b/docs/RFC-9-OZX-Implementation.md index 4f6ef0ef..d3233796 100644 --- a/docs/RFC-9-OZX-Implementation.md +++ b/docs/RFC-9-OZX-Implementation.md @@ -26,9 +26,9 @@ This document describes the implementation of RFC-9 support for reading OME-Zarr β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ Backend β”‚ β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ -β”‚ β”‚ /api/ozx-content/ β”‚ β”‚ OZXReader (ozxzip.py) β”‚ β”‚ +β”‚ β”‚ /api/zip-content/ β”‚ β”‚ OZXReader (ozxzip.py) β”‚ β”‚ β”‚ β”‚ /api/ozx-metadata/ │───▢│ - OME metadata parsing β”‚ β”‚ -β”‚ β”‚ /api/ozx-list/ β”‚ β”‚ - jsonFirst optimization β”‚ β”‚ +β”‚ β”‚ /api/zip-list/ β”‚ β”‚ - jsonFirst optimization β”‚ β”‚ β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ β”‚ β”‚ β”‚ ZipReader (zipread.py) β”‚ β”‚ β”‚ β”‚ - ZIP64 support β”‚ β”‚ @@ -154,7 +154,7 @@ with OZXReader(path) as reader: ### API Endpoints -#### `GET /api/ozx-content/{path_name:path}?subpath={internal_path}` +#### `GET /api/zip-content/{path_name:path}?subpath={internal_path}` Streams file content from within an OZX archive. Supports HTTP Range requests for efficient chunk access. @@ -164,7 +164,7 @@ Streams file content from within an OZX archive. Supports HTTP Range requests fo - `Content-Length: {size}` - `Content-Range: bytes {start}-{end}/{total}` (for 206 responses) -#### `HEAD /api/ozx-content/{path_name:path}?subpath={internal_path}` +#### `HEAD /api/zip-content/{path_name:path}?subpath={internal_path}` Returns file metadata without content body. @@ -181,7 +181,7 @@ Returns OZX archive metadata: } ``` -#### `GET /api/ozx-list/{path_name:path}?prefix={optional_prefix}` +#### `GET /api/zip-list/{path_name:path}?prefix={optional_prefix}` Lists files in the OZX archive: @@ -389,14 +389,14 @@ function ZarrViewer({ file, fspName }) { curl http://localhost:7878/api/ozx-metadata/myFSP/path/to/data.ozx # List files -curl http://localhost:7878/api/ozx-list/myFSP/path/to/data.ozx +curl http://localhost:7878/api/zip-list/myFSP/path/to/data.ozx # Get file content -curl http://localhost:7878/api/ozx-content/myFSP/path/to/data.ozx?subpath=zarr.json +curl http://localhost:7878/api/zip-content/myFSP/path/to/data.ozx?subpath=zarr.json # Get range (for chunk access) curl -H "Range: bytes=0-1023" \ - http://localhost:7878/api/ozx-content/myFSP/path/to/data.ozx?subpath=0/c/0/0/0 + http://localhost:7878/api/zip-content/myFSP/path/to/data.ozx?subpath=0/c/0/0/0 ``` ## Future Enhancements diff --git a/fileglancer/app.py b/fileglancer/app.py index 9f1f77a7..a8c67010 100644 --- a/fileglancer/app.py +++ b/fileglancer/app.py @@ -1215,15 +1215,15 @@ async def get_file_content(request: Request, path_name: str, subpath: Optional[s ) - @app.head("/api/ozx-content/{path_name:path}") - async def head_ozx_file_content( + @app.head("/api/zip-content/{path_name:path}") + async def head_zip_file_content( path_name: str, - subpath: str = Query(..., description="Path within the OZX file"), + subpath: str = Query(..., description="Path within the ZIP file"), username: str = Depends(get_current_user) ): - """HEAD request for OZX file content (returns size, supports Range).""" + """HEAD request for ZIP file content (returns size, supports Range).""" - filestore_name, _, ozx_subpath = path_name.partition('/') + filestore_name, _, zip_subpath = path_name.partition('/') with _get_user_context(username): filestore, error = _get_filestore(filestore_name) @@ -1231,20 +1231,20 @@ async def head_ozx_file_content( raise HTTPException(status_code=404 if "not found" in error else 500, detail=error) try: - ozx_file_path = filestore._check_path_in_root(ozx_subpath) + zip_file_path = filestore._check_path_in_root(zip_subpath) except RootCheckError as e: raise HTTPException(status_code=400, detail=str(e)) - if not is_zip_file(ozx_file_path): - raise HTTPException(status_code=400, detail="Not an OZX file") + if not is_zip_file(zip_file_path): + raise HTTPException(status_code=400, detail="Not a ZIP file") try: - reader = OZXReader(ozx_file_path) + reader = OZXReader(zip_file_path) reader.open() except FileNotFoundError: - raise HTTPException(status_code=404, detail="OZX file not found") + raise HTTPException(status_code=404, detail="ZIP file not found") except (InvalidZipError, OZXReaderError) as e: - raise HTTPException(status_code=400, detail=f"Invalid OZX file: {e}") + raise HTTPException(status_code=400, detail=f"Invalid ZIP file: {e}") # Parse central directory and get entry (outside user context) try: @@ -1252,7 +1252,7 @@ async def head_ozx_file_content( entry = reader.get_entry(subpath) if entry is None: reader.close() - raise HTTPException(status_code=404, detail="File not found in OZX archive") + raise HTTPException(status_code=404, detail="File not found in ZIP archive") file_size = entry.uncompressed_size content_type = guess_content_type(subpath) @@ -1274,19 +1274,19 @@ async def head_ozx_file_content( raise HTTPException(status_code=500, detail=str(e)) - @app.get("/api/ozx-content/{path_name:path}") - async def get_ozx_file_content( + @app.get("/api/zip-content/{path_name:path}") + async def get_zip_file_content( request: Request, path_name: str, - subpath: str = Query(..., description="Path within the OZX file"), + subpath: str = Query(..., description="Path within the ZIP file"), username: str = Depends(get_current_user) ): """ - Stream file content from within an OZX archive. + Stream file content from within a ZIP archive. Supports HTTP Range requests for efficient chunk access. """ - filestore_name, _, ozx_subpath = path_name.partition('/') + filestore_name, _, zip_subpath = path_name.partition('/') with _get_user_context(username): filestore, error = _get_filestore(filestore_name) @@ -1294,20 +1294,20 @@ async def get_ozx_file_content( raise HTTPException(status_code=404 if "not found" in error else 500, detail=error) try: - ozx_file_path = filestore._check_path_in_root(ozx_subpath) + zip_file_path = filestore._check_path_in_root(zip_subpath) except RootCheckError as e: raise HTTPException(status_code=400, detail=str(e)) - if not is_zip_file(ozx_file_path): - raise HTTPException(status_code=400, detail="Not an OZX file") + if not is_zip_file(zip_file_path): + raise HTTPException(status_code=400, detail="Not a ZIP file") try: - reader = OZXReader(ozx_file_path) + reader = OZXReader(zip_file_path) reader.open() except FileNotFoundError: - raise HTTPException(status_code=404, detail="OZX file not found") + raise HTTPException(status_code=404, detail="ZIP file not found") except (InvalidZipError, OZXReaderError) as e: - raise HTTPException(status_code=400, detail=f"Invalid OZX file: {e}") + raise HTTPException(status_code=400, detail=f"Invalid ZIP file: {e}") # Parse central directory and get entry (outside user context) # The file handle retains access rights @@ -1316,7 +1316,7 @@ async def get_ozx_file_content( entry = reader.get_entry(subpath) if entry is None: reader.close() - raise HTTPException(status_code=404, detail="File not found in OZX archive") + raise HTTPException(status_code=404, detail="File not found in ZIP archive") content_type = guess_content_type(subpath) file_size = entry.uncompressed_size @@ -1380,10 +1380,10 @@ async def stream_full(): except FileNotFoundError: reader.close() - raise HTTPException(status_code=404, detail="File not found in OZX archive") + raise HTTPException(status_code=404, detail="File not found in ZIP archive") except Exception as e: reader.close() - logger.exception(f"Error reading OZX content: {e}") + logger.exception(f"Error reading ZIP content: {e}") raise HTTPException(status_code=500, detail=str(e)) @@ -1441,8 +1441,8 @@ async def get_ozx_metadata( raise HTTPException(status_code=500, detail=str(e)) - @app.get("/api/ozx-list/{path_name:path}") - async def list_ozx_files( + @app.get("/api/zip-list/{path_name:path}") + async def list_zip_files( path_name: str, prefix: str = Query('', description="Filter files by prefix"), details: bool = Query(False, description="Include file details (size, compression)"), @@ -1451,7 +1451,7 @@ async def list_ozx_files( username: str = Depends(get_current_user) ): """ - List files in an OZX archive with pagination support. + List files in a ZIP archive with pagination support. Optionally filter by path prefix. If details=True, returns full file entry information including size. @@ -1466,7 +1466,7 @@ async def list_ozx_files( - has_more: Whether more entries exist beyond this page """ - filestore_name, _, ozx_subpath = path_name.partition('/') + filestore_name, _, zip_subpath = path_name.partition('/') with _get_user_context(username): filestore, error = _get_filestore(filestore_name) @@ -1474,20 +1474,20 @@ async def list_ozx_files( raise HTTPException(status_code=404 if "not found" in error else 500, detail=error) try: - ozx_file_path = filestore._check_path_in_root(ozx_subpath) + zip_file_path = filestore._check_path_in_root(zip_subpath) except RootCheckError as e: raise HTTPException(status_code=400, detail=str(e)) - if not is_zip_file(ozx_file_path): - raise HTTPException(status_code=400, detail="Not an OZX file") + if not is_zip_file(zip_file_path): + raise HTTPException(status_code=400, detail="Not a ZIP file") try: - reader = OZXReader(ozx_file_path) + reader = OZXReader(zip_file_path) reader.open() except FileNotFoundError: - raise HTTPException(status_code=404, detail="OZX file not found") + raise HTTPException(status_code=404, detail="ZIP file not found") except (InvalidZipError, OZXReaderError) as e: - raise HTTPException(status_code=400, detail=f"Invalid OZX file: {e}") + raise HTTPException(status_code=400, detail=f"Invalid ZIP file: {e}") # List files outside user context try: @@ -1547,7 +1547,7 @@ async def list_ozx_files( except Exception as e: reader.close() - logger.exception(f"Error listing OZX files: {e}") + logger.exception(f"Error listing ZIP files: {e}") raise HTTPException(status_code=500, detail=str(e)) diff --git a/frontend/src/__tests__/unitTests/ozxDetection.test.ts b/frontend/src/__tests__/unitTests/ozxDetection.test.ts index 2418290d..62a9e36e 100644 --- a/frontend/src/__tests__/unitTests/ozxDetection.test.ts +++ b/frontend/src/__tests__/unitTests/ozxDetection.test.ts @@ -4,7 +4,7 @@ import { isOzxFilename, hasOzxFiles, getOzxFiles, - getOzxFilePath + getZipFilePath } from '@/utils/ozxDetection'; import { detectOzxZarrVersions } from '@/queries/zarrQueries'; import type { FileOrFolder } from '@/shared.types'; @@ -109,15 +109,15 @@ describe('getOzxFiles', () => { }); }); -describe('getOzxFilePath', () => { +describe('getZipFilePath', () => { it('should return path without leading slash', () => { const file = createFile('data.ozx', '/path/to/data.ozx'); - expect(getOzxFilePath(file)).toBe('path/to/data.ozx'); + expect(getZipFilePath(file)).toBe('path/to/data.ozx'); }); it('should return path unchanged if no leading slash', () => { const file = createFile('data.ozx', 'path/to/data.ozx'); - expect(getOzxFilePath(file)).toBe('path/to/data.ozx'); + expect(getZipFilePath(file)).toBe('path/to/data.ozx'); }); }); diff --git a/frontend/src/components/ui/BrowsePage/FileViewer.tsx b/frontend/src/components/ui/BrowsePage/FileViewer.tsx index f831ef87..7ae68a24 100644 --- a/frontend/src/components/ui/BrowsePage/FileViewer.tsx +++ b/frontend/src/components/ui/BrowsePage/FileViewer.tsx @@ -17,12 +17,12 @@ import { formatFileSize, formatUnixTimestamp } from '@/utils'; import type { FileOrFolder } from '@/shared.types'; import { useFileContentQuery } from '@/queries/fileContentQueries'; import { - useOzxFileEntriesInfiniteQuery, - useOzxFileContentQuery, - buildOzxContentUrl + useZipFileEntriesInfiniteQuery, + useZipFileContentQuery, + buildZipContentUrl } from '@/queries/ozxQueries'; -import type { OzxFileEntry } from '@/queries/ozxQueries'; -import { isAnyZipFile, getOzxFilePath } from '@/utils/ozxDetection'; +import type { ZipFileEntry } from '@/queries/ozxQueries'; +import { isAnyZipFile, getZipFilePath } from '@/utils/ozxDetection'; type FileViewerProps = { readonly file: FileOrFolder; @@ -39,7 +39,7 @@ const InternalFileViewer = ({ readonly internalPath: string; readonly onBack: () => void; }) => { - const { data, isLoading, error } = useOzxFileContentQuery( + const { data, isLoading, error } = useZipFileContentQuery( fspName, ozxPath, internalPath @@ -106,7 +106,7 @@ type ZipBrowserItem = { const ZipBrowser = ({ file }: { readonly file: FileOrFolder }) => { const { fspName } = useFileBrowserContext(); - const ozxPath = getOzxFilePath(file); + const ozxPath = getZipFilePath(file); const { data, isLoading, @@ -114,12 +114,12 @@ const ZipBrowser = ({ file }: { readonly file: FileOrFolder }) => { fetchNextPage, hasNextPage, isFetchingNextPage - } = useOzxFileEntriesInfiniteQuery(fspName, ozxPath, 100); + } = useZipFileEntriesInfiniteQuery(fspName, ozxPath, 100); const [internalPath, setInternalPath] = useState(''); const [selectedFile, setSelectedFile] = useState(null); // Flatten all pages into a single array of entries - const allEntries = useMemo(() => { + const allEntries = useMemo(() => { if (!data?.pages) { return []; } @@ -218,7 +218,7 @@ const ZipBrowser = ({ file }: { readonly file: FileOrFolder }) => { if (!fspName) { return; } - const url = buildOzxContentUrl(fspName, ozxPath, itemPath); + const url = buildZipContentUrl(fspName, ozxPath, itemPath); const link = document.createElement('a'); link.href = url; link.download = itemName; diff --git a/frontend/src/queries/ozxQueries.ts b/frontend/src/queries/ozxQueries.ts index 5ee02ae2..742a3a5b 100644 --- a/frontend/src/queries/ozxQueries.ts +++ b/frontend/src/queries/ozxQueries.ts @@ -25,9 +25,9 @@ export type OzxMetadataResponse = { }; /** - * A file entry within an OZX archive with full details. + * A file entry within a ZIP archive with full details. */ -export type OzxFileEntry = { +export type ZipFileEntry = { filename: string; compressed_size: number; uncompressed_size: number; @@ -36,10 +36,10 @@ export type OzxFileEntry = { }; /** - * Paginated response for file entries from OZX archive. + * Paginated response for file entries from a ZIP archive. */ -export type OzxFileEntriesPage = { - entries: OzxFileEntry[]; +export type ZipFileEntriesPage = { + entries: ZipFileEntry[]; total_count: number; offset: number; limit: number; @@ -47,38 +47,38 @@ export type OzxFileEntriesPage = { }; /** - * Build URL for accessing content within an OZX file. + * Build URL for accessing content within a ZIP file. * * @param fspName - The file share path name - * @param ozxFilePath - Path to the OZX file within the FSP - * @param internalPath - Path within the OZX archive - * @returns Properly encoded URL for the OZX content endpoint + * @param zipFilePath - Path to the ZIP file within the FSP + * @param internalPath - Path within the ZIP archive + * @returns Properly encoded URL for the ZIP content endpoint */ -export function buildOzxContentUrl( +export function buildZipContentUrl( fspName: string, - ozxFilePath: string, + zipFilePath: string, internalPath: string ): string { - // Build the path segment: fspName/ozxFilePath - const pathSegment = `${fspName}/${ozxFilePath}`; - return buildUrl('/api/ozx-content/', pathSegment, { subpath: internalPath }); + // Build the path segment: fspName/zipFilePath + const pathSegment = `${fspName}/${zipFilePath}`; + return buildUrl('/api/zip-content/', pathSegment, { subpath: internalPath }); } /** - * Build full URL for accessing content within an OZX file. + * Build full URL for accessing content within a ZIP file. * Returns absolute URL suitable for external use (e.g., zarrita stores). * * @param fspName - The file share path name - * @param ozxFilePath - Path to the OZX file within the FSP - * @param internalPath - Path within the OZX archive + * @param zipFilePath - Path to the ZIP file within the FSP + * @param internalPath - Path within the ZIP archive * @returns Absolute URL */ -export function getOzxContentUrl( +export function getZipContentUrl( fspName: string, - ozxFilePath: string, + zipFilePath: string, internalPath: string ): string { - const relativePath = buildOzxContentUrl(fspName, ozxFilePath, internalPath); + const relativePath = buildZipContentUrl(fspName, zipFilePath, internalPath); return new URL(relativePath, window.location.origin).href; } @@ -94,24 +94,24 @@ export function buildOzxMetadataUrl( } /** - * Build URL for listing files in an OZX archive. + * Build URL for listing files in a ZIP archive. * * @param fspName - The file share path name - * @param ozxFilePath - Path to the OZX file within the FSP + * @param zipFilePath - Path to the ZIP file within the FSP * @param prefix - Optional prefix to filter files * @param details - If true, include full file entry details * @param offset - Number of entries to skip (for pagination) * @param limit - Maximum entries to return (for pagination) */ -export function buildOzxListUrl( +export function buildZipListUrl( fspName: string, - ozxFilePath: string, + zipFilePath: string, prefix?: string, details?: boolean, offset?: number, limit?: number ): string { - const pathSegment = `${fspName}/${ozxFilePath}`; + const pathSegment = `${fspName}/${zipFilePath}`; const params: Record = {}; if (prefix) { params.prefix = prefix; @@ -126,7 +126,7 @@ export function buildOzxListUrl( params.limit = String(limit); } return buildUrl( - '/api/ozx-list/', + '/api/zip-list/', pathSegment, Object.keys(params).length > 0 ? params : null ); @@ -173,14 +173,14 @@ export function useOzxMetadataQuery( } /** - * Fetch list of files in an OZX archive. + * Fetch list of files in a ZIP archive. */ -async function fetchOzxFileList( +async function fetchZipFileList( fspName: string, - ozxFilePath: string, + zipFilePath: string, prefix?: string ): Promise { - const url = buildOzxListUrl(fspName, ozxFilePath, prefix); + const url = buildZipListUrl(fspName, zipFilePath, prefix); const response = (await sendRequestAndThrowForNotOk(url, 'GET')) as { files: string[]; }; @@ -188,93 +188,93 @@ async function fetchOzxFileList( } /** - * Fetch detailed file entries from an OZX archive. + * Fetch detailed file entries from a ZIP archive. */ -async function fetchOzxFileEntries( +async function fetchZipFileEntries( fspName: string, - ozxFilePath: string, + zipFilePath: string, prefix?: string -): Promise { - const url = buildOzxListUrl(fspName, ozxFilePath, prefix, true); +): Promise { + const url = buildZipListUrl(fspName, zipFilePath, prefix, true); const response = (await sendRequestAndThrowForNotOk(url, 'GET')) as { - entries: OzxFileEntry[]; + entries: ZipFileEntry[]; }; return response.entries; } /** - * Hook to fetch list of files in an OZX archive. + * Hook to fetch list of files in a ZIP archive. * * @param fspName - The file share path name - * @param ozxFilePath - Path to the OZX file within the FSP + * @param zipFilePath - Path to the ZIP file within the FSP * @param prefix - Optional prefix to filter files * @param enabled - Whether the query should be enabled */ -export function useOzxFileListQuery( +export function useZipFileListQuery( fspName: string | undefined, - ozxFilePath: string | undefined, + zipFilePath: string | undefined, prefix?: string, enabled: boolean = true ): UseQueryResult { return useQuery({ - queryKey: ['ozx', 'files', fspName || '', ozxFilePath || '', prefix || ''], + queryKey: ['zip', 'files', fspName || '', zipFilePath || '', prefix || ''], queryFn: async () => { - if (!fspName || !ozxFilePath) { - throw new Error('fspName and ozxFilePath are required'); + if (!fspName || !zipFilePath) { + throw new Error('fspName and zipFilePath are required'); } - return await fetchOzxFileList(fspName, ozxFilePath, prefix); + return await fetchZipFileList(fspName, zipFilePath, prefix); }, - enabled: enabled && !!fspName && !!ozxFilePath, + enabled: enabled && !!fspName && !!zipFilePath, staleTime: 5 * 60 * 1000 }); } /** - * Hook to fetch detailed file entries from an OZX archive. + * Hook to fetch detailed file entries from a ZIP archive. * * @param fspName - The file share path name - * @param ozxFilePath - Path to the OZX file within the FSP + * @param zipFilePath - Path to the ZIP file within the FSP * @param prefix - Optional prefix to filter files * @param enabled - Whether the query should be enabled */ -export function useOzxFileEntriesQuery( +export function useZipFileEntriesQuery( fspName: string | undefined, - ozxFilePath: string | undefined, + zipFilePath: string | undefined, prefix?: string, enabled: boolean = true -): UseQueryResult { +): UseQueryResult { return useQuery({ queryKey: [ - 'ozx', + 'zip', 'entries', fspName || '', - ozxFilePath || '', + zipFilePath || '', prefix || '' ], queryFn: async () => { - if (!fspName || !ozxFilePath) { - throw new Error('fspName and ozxFilePath are required'); + if (!fspName || !zipFilePath) { + throw new Error('fspName and zipFilePath are required'); } - return await fetchOzxFileEntries(fspName, ozxFilePath, prefix); + return await fetchZipFileEntries(fspName, zipFilePath, prefix); }, - enabled: enabled && !!fspName && !!ozxFilePath, + enabled: enabled && !!fspName && !!zipFilePath, staleTime: 5 * 60 * 1000 }); } /** - * Fetch a page of detailed file entries from an OZX archive. + * Fetch a page of detailed file entries from a ZIP archive. */ -async function fetchOzxFileEntriesPage( +async function fetchZipFileEntriesPage( fspName: string, - ozxFilePath: string, + zipFilePath: string, offset: number, limit: number, prefix?: string -): Promise { - const url = buildOzxListUrl( +): Promise { + const url = buildZipListUrl( fspName, - ozxFilePath, + zipFilePath, prefix, true, offset, @@ -283,40 +283,40 @@ async function fetchOzxFileEntriesPage( const response = (await sendRequestAndThrowForNotOk( url, 'GET' - )) as OzxFileEntriesPage; + )) as ZipFileEntriesPage; return response; } /** - * Hook to fetch detailed file entries from an OZX archive with infinite scrolling. + * Hook to fetch detailed file entries from a ZIP archive with infinite scrolling. * Loads entries progressively as user requests more. * * @param fspName - The file share path name - * @param ozxFilePath - Path to the OZX file within the FSP + * @param zipFilePath - Path to the ZIP file within the FSP * @param pageSize - Number of entries per page (default 100) * @param enabled - Whether the query should be enabled */ -export function useOzxFileEntriesInfiniteQuery( +export function useZipFileEntriesInfiniteQuery( fspName: string | undefined, - ozxFilePath: string | undefined, + zipFilePath: string | undefined, pageSize: number = 100, enabled: boolean = true -): UseInfiniteQueryResult, Error> { +): UseInfiniteQueryResult, Error> { return useInfiniteQuery({ queryKey: [ - 'ozx', + 'zip', 'entries-infinite', fspName || '', - ozxFilePath || '', + zipFilePath || '', pageSize ], queryFn: async ({ pageParam = 0 }) => { - if (!fspName || !ozxFilePath) { - throw new Error('fspName and ozxFilePath are required'); + if (!fspName || !zipFilePath) { + throw new Error('fspName and zipFilePath are required'); } - return await fetchOzxFileEntriesPage( + return await fetchZipFileEntriesPage( fspName, - ozxFilePath, + zipFilePath, pageParam, pageSize ); @@ -328,18 +328,18 @@ export function useOzxFileEntriesInfiniteQuery( } return undefined; }, - enabled: enabled && !!fspName && !!ozxFilePath, + enabled: enabled && !!fspName && !!zipFilePath, staleTime: 5 * 60 * 1000 }); } /** - * Fetch content from within an OZX file. + * Fetch content from within a ZIP file. * Supports optional range requests. */ -export async function fetchOzxContent( +export async function fetchZipContent( fspName: string, - ozxFilePath: string, + zipFilePath: string, internalPath: string, options?: { signal?: AbortSignal; @@ -347,7 +347,7 @@ export async function fetchOzxContent( rangeEnd?: number; } ): Promise { - const url = buildOzxContentUrl(fspName, ozxFilePath, internalPath); + const url = buildZipContentUrl(fspName, zipFilePath, internalPath); const headers: HeadersInit = {}; if (options?.rangeStart !== undefined && options?.rangeEnd !== undefined) { @@ -362,7 +362,7 @@ export async function fetchOzxContent( }); if (!response.ok && response.status !== 206) { - throw new Error(`Failed to fetch OZX content: ${response.status}`); + throw new Error(`Failed to fetch ZIP content: ${response.status}`); } return new Uint8Array(await response.arrayBuffer()); @@ -389,7 +389,7 @@ export class OzxFetchStore { this.fspName = fspName; this.ozxPath = ozxPath; // Compute base URL for logging - this.baseUrl = getOzxContentUrl(fspName, ozxPath, ''); + this.baseUrl = getZipContentUrl(fspName, ozxPath, ''); log.debug('Created OzxFetchStore for', this.baseUrl); } @@ -401,7 +401,7 @@ export class OzxFetchStore { */ async get(key: string): Promise { try { - const url = buildOzxContentUrl(this.fspName, this.ozxPath, key); + const url = buildZipContentUrl(this.fspName, this.ozxPath, key); const response = await sendFetchRequest(url, 'GET'); if (!response.ok) { @@ -433,7 +433,7 @@ export class OzxFetchStore { length: number ): Promise { try { - const url = buildOzxContentUrl(this.fspName, this.ozxPath, key); + const url = buildZipContentUrl(this.fspName, this.ozxPath, key); const response = await fetch(url, { method: 'GET', credentials: 'include', @@ -469,7 +469,7 @@ export class OzxFetchStore { */ async has(key: string): Promise { try { - const url = buildOzxContentUrl(this.fspName, this.ozxPath, key); + const url = buildZipContentUrl(this.fspName, this.ozxPath, key); const response = await fetch(url, { method: 'HEAD', credentials: 'include' @@ -487,7 +487,7 @@ export class OzxFetchStore { * @returns Array of file paths */ async list(prefix?: string): Promise { - return await fetchOzxFileList(this.fspName, this.ozxPath, prefix); + return await fetchZipFileList(this.fspName, this.ozxPath, prefix); } /** @@ -499,29 +499,29 @@ export class OzxFetchStore { } /** - * Hook to fetch content of a file within an OZX archive. + * Hook to fetch content of a file within a ZIP archive. */ -export function useOzxFileContentQuery( +export function useZipFileContentQuery( fspName: string | undefined, - ozxFilePath: string | undefined, + zipFilePath: string | undefined, internalPath: string | undefined, enabled: boolean = true ): UseQueryResult { return useQuery({ queryKey: [ - 'ozx', + 'zip', 'content', fspName || '', - ozxFilePath || '', + zipFilePath || '', internalPath || '' ], queryFn: async () => { - if (!fspName || !ozxFilePath || !internalPath) { - throw new Error('fspName, ozxFilePath, and internalPath are required'); + if (!fspName || !zipFilePath || !internalPath) { + throw new Error('fspName, zipFilePath, and internalPath are required'); } - return await fetchOzxContent(fspName, ozxFilePath, internalPath); + return await fetchZipContent(fspName, zipFilePath, internalPath); }, - enabled: enabled && !!fspName && !!ozxFilePath && !!internalPath, + enabled: enabled && !!fspName && !!zipFilePath && !!internalPath, staleTime: 5 * 60 * 1000 }); } diff --git a/frontend/src/queries/zarrQueries.ts b/frontend/src/queries/zarrQueries.ts index 5bf42b6a..a4d8960f 100644 --- a/frontend/src/queries/zarrQueries.ts +++ b/frontend/src/queries/zarrQueries.ts @@ -11,8 +11,8 @@ import { fetchFileAsJson } from './queryUtils'; import { isOzxFile } from '@/utils/ozxDetection'; import { OzxFetchStore, - getOzxContentUrl, - useOzxFileListQuery + getZipContentUrl, + useZipFileListQuery } from './ozxQueries'; import type { FileOrFolder } from '@/shared.types'; @@ -372,7 +372,7 @@ async function fetchOzxZarrMetadata( const availableVersions = detectOzxZarrVersions(files); // Get the base URL for OME-Zarr viewers (using empty internal path) - const baseUrl = getOzxContentUrl(fspName, ozxFilePath, ''); + const baseUrl = getZipContentUrl(fspName, ozxFilePath, ''); // Default to Zarr v3 when available if (availableVersions.includes('v3')) { @@ -483,7 +483,7 @@ export function useOzxZarrMetadataQuery( const { fspName, ozxFile } = params; // First, get the file list from the OZX - const fileListQuery = useOzxFileListQuery( + const fileListQuery = useZipFileListQuery( fspName, ozxFile?.path, undefined, @@ -519,4 +519,4 @@ export function useOzxZarrMetadataQuery( // Re-export OZX detection utilities for convenience export { isOzxFile } from '@/utils/ozxDetection'; -export { OzxFetchStore, getOzxContentUrl } from './ozxQueries'; +export { OzxFetchStore, getZipContentUrl } from './ozxQueries'; diff --git a/frontend/src/utils/ozxDetection.ts b/frontend/src/utils/ozxDetection.ts index c721596b..dda44a21 100644 --- a/frontend/src/utils/ozxDetection.ts +++ b/frontend/src/utils/ozxDetection.ts @@ -67,13 +67,13 @@ export function getOzxFiles(files: FileOrFolder[]): FileOrFolder[] { } /** - * Extract the path from a file for OZX API calls. + * Extract the path from a file for ZIP/OZX API calls. * Removes leading slashes and normalizes the path. * * @param file - The file to get the path from * @returns Normalized path suitable for API calls */ -export function getOzxFilePath(file: FileOrFolder): string { +export function getZipFilePath(file: FileOrFolder): string { let path = file.path; // Remove leading slash if present if (path.startsWith('/')) { From 11045d05fce0f460d3a98886fc70ff783ed72529 Mon Sep 17 00:00:00 2001 From: Mark Kittisopikul Date: Wed, 28 Jan 2026 15:39:12 -0500 Subject: [PATCH 9/9] Fix JSON formatting for files in a OZX archive --- .../components/ui/BrowsePage/FileViewer.tsx | 46 ++++++++++++++++--- 1 file changed, 40 insertions(+), 6 deletions(-) diff --git a/frontend/src/components/ui/BrowsePage/FileViewer.tsx b/frontend/src/components/ui/BrowsePage/FileViewer.tsx index 7ae68a24..a0e48ee6 100644 --- a/frontend/src/components/ui/BrowsePage/FileViewer.tsx +++ b/frontend/src/components/ui/BrowsePage/FileViewer.tsx @@ -45,6 +45,7 @@ const InternalFileViewer = ({ internalPath ); const [isDarkMode, setIsDarkMode] = useState(false); + const [formatJson, setFormatJson] = useState(true); useEffect(() => { const checkDarkMode = () => @@ -67,10 +68,23 @@ const InternalFileViewer = ({ const content = data ? new TextDecoder().decode(data) : ''; const language = getLanguageFromExtension(internalPath); + const isJsonFile = language === 'json'; + + // Format JSON if toggle is enabled and content is valid JSON + let displayContent = content; + if (isJsonFile && formatJson && content) { + try { + const parsed = JSON.parse(content); + displayContent = JSON.stringify(parsed, null, 2); + } catch { + // If JSON parsing fails, show original content + displayContent = content; + } + } return ( -
-
+
+
- + {internalPath} + {isJsonFile ? ( +
+ + Format JSON + + setFormatJson(!formatJson)} + /> +
+ ) : null}
-
+
- {content} + {displayContent}