Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

import { useIsBinaryFile } from '@studio/components/filesets/hooks/useIsBinaryFile';
import { useQuery, UseQueryResult } from '@tanstack/react-query';

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

📐 Maintainability & Code Quality | 🟡 Minor | ⚡ Quick win

Separate type-only imports.

UseQueryResult is only used as a type. Per coding guidelines, use import type for type-only imports.

📦 Suggested fix
-import { useQuery, UseQueryResult } from '`@tanstack/react-query`';
+import { useQuery } from '`@tanstack/react-query`';
+import type { UseQueryResult } from '`@tanstack/react-query`';
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
import { useQuery, UseQueryResult } from '@tanstack/react-query';
import { useQuery } from '`@tanstack/react-query`';
import type { UseQueryResult } from '`@tanstack/react-query`';
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@web/packages/studio/src/components/filesets/hooks/useIsBinaryFile.test.ts` at
line 5, The import statement on line 5 mixes value and type imports from
`@tanstack/react-query`. Separate UseQueryResult into a type-only import using
import type syntax, keeping useQuery in the regular import statement. This
follows coding guidelines by explicitly marking type-only imports and allows the
compiler to properly tree-shake unused types.

Source: Coding guidelines

import { renderHook } from '@testing-library/react';

vi.mock('@tanstack/react-query');
vi.mock('@nemo/sdk/generated/fetchers/platform');
vi.mock('@nemo/sdk/generated/platform/api', () => ({
getFilesDownloadFileQueryKey: vi.fn(() => ['/files/download']),
}));
vi.mock('axios', () => {
const mockInstance = {
head: vi.fn(),
interceptors: {
request: { use: vi.fn() },
response: { use: vi.fn() },
},
};
return { default: mockInstance };
});

const mockUseQuery = vi.mocked(useQuery);

describe('useIsBinaryFile', () => {
beforeEach(() => {
vi.clearAllMocks();
mockUseQuery.mockReturnValue({
data: false,
isPending: false,
refetch: vi.fn(),
isFetching: false,
isStale: false,
isError: false,
error: null,
failureCount: 0,
failureReason: null,
isFetchNextPageEnabled: undefined,
dataUpdateCount: 0,
dataUpdatedAt: 0,
errorUpdateCount: 0,
errorUpdatedAt: 0,
fetchStatus: 'idle',
status: 'success',
variables: undefined,
queryKey: [],
queryHash: '',
queryFn: undefined,
queryKeyHashFn: undefined,
meta: undefined,
isLoading: false,
isLoadingError: false,
isRefetchError: false,
isSuccess: true,
isObservingMisorderedIndex: false,
} as unknown as UseQueryResult<boolean, Error>);
});

it('returns isBinary=false for .jsonl files without a HEAD request', () => {
const { result } = renderHook(() =>
useIsBinaryFile('default', 'test-dataset', 'data/test.jsonl')
);

expect(result.current).toEqual({ isBinary: false, isLoading: false });
// Query should be disabled for known text extensions
expect(mockUseQuery).toHaveBeenCalledWith(expect.objectContaining({ enabled: false }));
});

it('returns isBinary=false for .json files without a HEAD request', () => {
const { result } = renderHook(() => useIsBinaryFile('default', 'test-dataset', 'config.json'));

expect(result.current).toEqual({ isBinary: false, isLoading: false });
});

it('returns isBinary=false for .csv files without a HEAD request', () => {
const { result } = renderHook(() => useIsBinaryFile('default', 'test-dataset', 'data.csv'));

expect(result.current).toEqual({ isBinary: false, isLoading: false });
});

it('returns isBinary=false for .py files without a HEAD request', () => {
const { result } = renderHook(() => useIsBinaryFile('default', 'test-dataset', 'script.py'));

expect(result.current).toEqual({ isBinary: false, isLoading: false });
});

it('returns isBinary=false for .yaml and .yml files', () => {
const { result } = renderHook(() => useIsBinaryFile('default', 'test-dataset', 'config.yaml'));

expect(result.current).toEqual({ isBinary: false, isLoading: false });
});

it('returns isBinary=false for .md files', () => {
const { result } = renderHook(() => useIsBinaryFile('default', 'test-dataset', 'README.md'));

expect(result.current).toEqual({ isBinary: false, isLoading: false });
});

it('returns isBinary=false when filePath is undefined', () => {
const { result } = renderHook(() => useIsBinaryFile('default', 'test-dataset', undefined));

expect(result.current).toEqual({ isBinary: false, isLoading: false });
});

it('returns isBinary=true for .png files (binary blocklist)', () => {
const { result } = renderHook(() => useIsBinaryFile('default', 'test-dataset', 'image.png'));

expect(result.current).toEqual({ isBinary: true, isLoading: false });
});

it('returns isBinary=true for .zip files (binary blocklist)', () => {
const { result } = renderHook(() => useIsBinaryFile('default', 'test-dataset', 'archive.zip'));

expect(result.current).toEqual({ isBinary: true, isLoading: false });
});

it('enables the HEAD query for unknown extensions', () => {
renderHook(() => useIsBinaryFile('default', 'test-dataset', 'data.unknown'));

expect(mockUseQuery).toHaveBeenCalledWith(expect.objectContaining({ enabled: true }));
});
});
Original file line number Diff line number Diff line change
Expand Up @@ -9,37 +9,104 @@ import { isBinaryExtension } from '@studio/util/binaryFile';
import { useQuery } from '@tanstack/react-query';
import axios from 'axios';

/**
* Extensions that are unambiguously text. The backend HEAD endpoint returns
* `application/octet-stream` for all files, so Content-Type-based detection
* fails for these. This allowlist short-circuits the HEAD request.
*/
const KNOWN_TEXT_EXTENSIONS = new Set([
// Data
'json',
'jsonl',
'csv',
'tsv',
// Code
'py',
'js',
'jsx',
'ts',
'tsx',
'java',
'c',
'cpp',
'h',
'go',
'rs',
'rb',
'php',
'swift',
'kt',
'scala',
'r',
'm',
'sh',
'bash',
'zsh',
'fish',
// Markup / Config
'html',
'htm',
'xml',
'yaml',
'yml',
'toml',
'ini',
'cfg',
'conf',
'jsonc',
'env',
// Text
'txt',
'md',
'rst',
'log',
'diff',
'patch',
// Other
'sql',
'graphql',
'proto',
'dockerfile',
'makefile',
]);

function isKnownTextExtension(path: string): boolean {
const ext = path.split('.').at(-1)?.toLowerCase();
return ext !== undefined && KNOWN_TEXT_EXTENSIONS.has(ext);
}

/**
* Determine whether a fileset file should be treated as binary (no text preview).
*
* Strategy (three tiers):
* 1. Extension in `BINARY_FILE_EXTENSIONS` → binary immediately, no network.
* 2. Extension not in blocklist → HEAD request; `Content-Type` header decides.
* Strategy (four tiers):
* 1. Extension in `KNOWN_TEXT_EXTENSIONS` → text immediately, no network.
* 2. Extension in `BINARY_FILE_EXTENSIONS` → binary immediately, no network.
* 3. Extension not in either list → HEAD request; `Content-Type` decides.
* - `text/*` or known text application types → not binary.
* - Everything else → binary.
* 3. HEAD fails / no Content-Type → assume text (fail-open for preview).
* 4. HEAD fails / no Content-Type → assume text (fail-open for preview).
*
* Returns `{ isBinary, isLoading }`. `isLoading` is true only during the HEAD
* request (tier-2 path); tier-1 resolves synchronously.
* request (tier-3 path); tiers 1-2 resolve synchronously.
*/
export function useIsBinaryFile(
workspace: string,
filesetName: string,
filePath: string | undefined
): { isBinary: boolean; isLoading: boolean } {
const knownText = filePath !== undefined && isKnownTextExtension(filePath);
const blocklisted = filePath !== undefined && isBinaryExtension(filePath);

const { data: headBinary, isPending } = useQuery({
queryKey: ['file-content-type', workspace, filesetName, filePath],
queryFn: async (): Promise<boolean> => {
if (!filePath) return false;
try {
// axios.head() is auth-aware via the interceptor registered when
// '@nemo/sdk/generated/fetchers/platform' is imported above.
const [fileUrl] = getFilesDownloadFileQueryKey(
encodeURIComponent(workspace),
encodeURIComponent(filesetName),
encodeURIComponent(filePath)
encodeURIComponent(filePath!)
);
const res = await axios.head(fileUrl);
const ct = String(res.headers['content-type'] ?? '');
Expand All @@ -48,13 +115,14 @@ export function useIsBinaryFile(
return false; // fail-open: assume text
}
},
enabled: !!filePath && !blocklisted,
enabled: !!filePath && !knownText && !blocklisted,
staleTime: Infinity,
retry: false,
});

if (blocklisted) return { isBinary: true, isLoading: false };
if (!filePath) return { isBinary: false, isLoading: false };
if (knownText) return { isBinary: false, isLoading: false };
if (blocklisted) return { isBinary: true, isLoading: false };
return { isBinary: headBinary ?? false, isLoading: isPending };
}

Expand Down