Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions packages/lib/src/__tests__/sheet.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,16 @@ describe('adjustFormulaReferences', () => {
it('clamps to row/column zero instead of going negative', () => {
expect(adjustFormulaReferences('=A1', -5, -5)).toBe('=A1');
});

it('handles long uppercase runs without regex backtracking', () => {
const formula = `=${'A'.repeat(80_000)}!`;
const startedAt = Date.now();
const adjusted = adjustFormulaReferences(formula, 1, 1);
const elapsedMs = Date.now() - startedAt;

expect(adjusted).toBe(formula);
expect(elapsedMs).toBeLessThan(1000);
});
});

describe('sheet sanitisation', () => {
Expand Down
137 changes: 137 additions & 0 deletions packages/lib/src/services/__tests__/drive-search-service.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
import { beforeEach, describe, expect, it, vi } from 'vitest';

const mockTransaction = vi.hoisted(() => vi.fn());
const mockGetUserAccessLevel = vi.hoisted(() => vi.fn());

vi.mock('@pagespace/db', () => ({
db: {
transaction: mockTransaction,
},
pages: {
id: 'pages.id',
title: 'pages.title',
type: 'pages.type',
parentId: 'pages.parentId',
content: 'pages.content',
driveId: 'pages.driveId',
isTrashed: 'pages.isTrashed',
},
drives: {
id: 'drives.id',
slug: 'drives.slug',
name: 'drives.name',
},
eq: vi.fn((a, b) => ({ op: 'eq', a, b })),
and: vi.fn((...args) => ({ op: 'and', args })),
inArray: vi.fn((a, b) => ({ op: 'inArray', a, b })),
sql: vi.fn((strings: TemplateStringsArray, ...values: unknown[]) => ({
strings: Array.from(strings),
values,
})),
}));

vi.mock('../../permissions/permissions', () => ({
getUserAccessLevel: mockGetUserAccessLevel,
getUserDriveAccess: vi.fn(),
}));

import { regexSearchPages } from '../drive-search-service';

type SearchRow = {
id: string;
title: string;
type: string;
parentId: string | null;
content: string;
};

function setupTransactionResult(rows: SearchRow[]) {
const execute = vi.fn().mockResolvedValue(undefined);
const limit = vi.fn().mockResolvedValue(rows);
const where = vi.fn().mockReturnValue({ limit });
const from = vi.fn().mockReturnValue({ where });
const select = vi.fn().mockReturnValue({ from });

mockTransaction.mockImplementation(async (callback: (tx: unknown) => Promise<SearchRow[]>) =>
callback({ execute, select })
);

return { execute, select, from, where, limit };
}

describe('regexSearchPages security behavior', () => {
beforeEach(() => {
vi.clearAllMocks();
mockGetUserAccessLevel.mockResolvedValue({ canView: true });
});

it('returns line previews for literal patterns without compiling user regex', async () => {
const rows: SearchRow[] = [
{
id: 'page-1',
title: 'Alpha',
type: 'DOCUMENT',
parentId: null,
// Case-sensitive: only 'hello' matches, not 'HELLO' (aligns with PostgreSQL ~ operator)
content: 'hello world\nnothing here\nhello again',
},
];

const { execute } = setupTransactionResult(rows);
const response = await regexSearchPages('drive-1', 'user-1', 'hello', null, {
searchIn: 'content',
maxResults: 50,
});

expect(execute).toHaveBeenCalledTimes(1);
expect(response.totalResults).toBe(1);
expect(response.results[0]?.matchingLines).toEqual([
{ lineNumber: 1, content: 'hello world' },
{ lineNumber: 3, content: 'hello again' },
]);
expect(response.results[0]?.totalMatches).toBe(2);
});

it('skips line previews for non-literal regex patterns', async () => {
const rows: SearchRow[] = [
{
id: 'page-1',
title: 'ReDoS Test',
type: 'DOCUMENT',
parentId: null,
content: `${'A'.repeat(60)}!`,
},
];

setupTransactionResult(rows);
const response = await regexSearchPages('drive-1', 'user-1', '(A+)+$', null, {
searchIn: 'content',
maxResults: 50,
});

expect(response.totalResults).toBe(1);
expect(response.results[0]?.matchingLines).toEqual([]);
expect(response.results[0]?.totalMatches).toBe(0);
});

it('returns a stable response when PostgreSQL cancels due to statement timeout', async () => {
mockTransaction.mockRejectedValue({
code: '57014',
message: 'canceling statement due to statement timeout',
});

const response = await regexSearchPages('drive-1', 'user-1', '(A+)+$', null, {
searchIn: 'content',
maxResults: 50,
});

expect(response.totalResults).toBe(0);
expect(response.results).toEqual([]);
expect(response.summary).toContain('timed out');
expect(response.stats).toEqual({
pagesScanned: 0,
pagesWithAccess: 0,
documentTypes: [],
});
});
});
178 changes: 139 additions & 39 deletions packages/lib/src/services/drive-search-service.ts
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,24 @@ export interface RegexSearchResponse {
nextSteps: string[];
}

/**
* Security constants for regex search to prevent ReDoS attacks.
*
* MAX_REGEX_PATTERN_LENGTH: Limits user input length to bound worst-case matching time.
* REGEX_QUERY_TIMEOUT_MS: PostgreSQL statement timeout to abort runaway regex queries.
* Set to 3 seconds as a balance between allowing complex legitimate searches and
* preventing denial-of-service from pathological patterns.
* REGEX_META_CHARS: Identifies patterns containing regex metacharacters; only literal
* patterns are safe for JavaScript-side line extraction.
*/
const MAX_REGEX_PATTERN_LENGTH = 500;
const MAX_REGEX_RESULTS = 100;
const MAX_REGEX_LINE_PREVIEWS = 5;
const MAX_REGEX_LINE_CONTENT_LENGTH = 200;
const REGEX_QUERY_TIMEOUT_MS = 3000;
const POSTGRES_STATEMENT_TIMEOUT_CODE = '57014';
const REGEX_META_CHARS = /[\\^$.*+?()[\]{}|]/;

// ============================================================================
// Service Functions
// ============================================================================
Expand Down Expand Up @@ -241,6 +259,81 @@ export async function globSearchPages(
};
}

function isRegexQueryTimeoutError(error: unknown): boolean {
if (!error || typeof error !== 'object') {
return false;
}

const candidate = error as { code?: unknown; message?: unknown };
if (candidate.code === POSTGRES_STATEMENT_TIMEOUT_CODE) {
return true;
}

return typeof candidate.message === 'string'
&& candidate.message.toLowerCase().includes('statement timeout');
}

function isLiteralRegexPattern(pattern: string): boolean {
return !REGEX_META_CHARS.test(pattern);
}

function buildRegexTimeoutResponse(
driveSlug: string | null,
pattern: string,
searchIn: 'content' | 'title' | 'both'
): RegexSearchResponse {
return {
driveSlug,
pattern,
searchIn,
results: [],
totalResults: 0,
summary: 'Regex search timed out. Try a simpler pattern.',
stats: {
pagesScanned: 0,
pagesWithAccess: 0,
documentTypes: [],
},
nextSteps: [
'Use a less complex regex pattern',
`Keep patterns short (under ${MAX_REGEX_PATTERN_LENGTH} characters)`,
'Narrow your search scope with searchIn=title',
],
};
}

function extractLiteralMatchingLines(
content: string,
literalPattern: string
): { matchingLines: Array<{ lineNumber: number; content: string }>; totalMatches: number } {
if (literalPattern.length === 0) {
return { matchingLines: [], totalMatches: 0 };
}

// Case-sensitive matching to align with PostgreSQL ~ operator
const needle = literalPattern;
const matchingLines: Array<{ lineNumber: number; content: string }> = [];
let totalMatches = 0;

const lines = content.split('\n');
for (let index = 0; index < lines.length; index++) {
const line = lines[index];
if (!line.includes(needle)) {
continue;
}

totalMatches += 1;
if (matchingLines.length < MAX_REGEX_LINE_PREVIEWS) {
matchingLines.push({
lineNumber: index + 1,
content: line.substring(0, MAX_REGEX_LINE_CONTENT_LENGTH),
});
}
}

return { matchingLines, totalMatches };
}
Comment thread
coderabbitai[bot] marked this conversation as resolved.

/**
* Perform regex search on pages in a drive
*/
Expand All @@ -252,19 +345,19 @@ export async function regexSearchPages(
options: RegexSearchOptions = {}
): Promise<RegexSearchResponse> {
const { searchIn = 'content', maxResults = 50 } = options;
const effectiveMaxResults = Math.min(maxResults, 100);
const effectiveMaxResults = Math.min(maxResults, MAX_REGEX_RESULTS);

// Validate and limit pattern length to prevent ReDoS
if (pattern.length > 500) {
if (pattern.length > MAX_REGEX_PATTERN_LENGTH) {
return {
driveSlug,
pattern,
searchIn,
results: [],
totalResults: 0,
summary: 'Pattern too long (max 500 characters)',
summary: `Pattern too long (max ${MAX_REGEX_PATTERN_LENGTH} characters)`,
stats: { pagesScanned: 0, pagesWithAccess: 0, documentTypes: [] },
nextSteps: ['Shorten your regex pattern to under 500 characters'],
nextSteps: [`Shorten your regex pattern to under ${MAX_REGEX_PATTERN_LENGTH} characters`],
};
}

Expand All @@ -289,32 +382,50 @@ export async function regexSearchPages(
whereConditions = and(
eq(pages.driveId, driveId),
eq(pages.isTrashed, false),
sql`${pages.content} ~ ${pgPattern} OR ${pages.title} ~ ${pgPattern}`
sql`(${pages.content} ~ ${pgPattern} OR ${pages.title} ~ ${pgPattern})`
);
}

// Build and execute query
const matchingPages = await db
.select({
id: pages.id,
title: pages.title,
type: pages.type,
parentId: pages.parentId,
content: pages.content,
})
.from(pages)
.where(whereConditions)
.limit(effectiveMaxResults);
let matchingPages: Array<{
id: string;
title: string;
type: string;
parentId: string | null;
content: string;
}>;

// Use original pattern for line-level matching — consistent with PG regex semantics.
// Pattern is length-checked (≤500 chars) and applied per-line (not concatenated).
let lineRegex: RegExp | null = null;
try {
lineRegex = new RegExp(pattern, 'gi');
} catch {
// PG regex syntax may differ from JS — skip line extraction
matchingPages = await db.transaction(async (tx) => {
await tx.execute(
sql`SELECT set_config('statement_timeout', ${String(REGEX_QUERY_TIMEOUT_MS)}, true)`
);
return tx
.select({
id: pages.id,
title: pages.title,
type: pages.type,
parentId: pages.parentId,
content: pages.content,
})
.from(pages)
.where(whereConditions)
.limit(effectiveMaxResults);
});
Comment thread
coderabbitai[bot] marked this conversation as resolved.
} catch (error) {
if (isRegexQueryTimeoutError(error)) {
console.warn('[drive-search] Regex query timed out', {
driveId,
patternLength: pattern.length,
searchIn,
});
return buildRegexTimeoutResponse(driveSlug, pattern, searchIn);
}
throw error;
}

// Only perform line previews for literal patterns to avoid user-controlled regex execution.
const canExtractLiteralLineMatches = searchIn !== 'title' && isLiteralRegexPattern(pattern);

// Filter by permissions and build results
const results: RegexSearchResult[] = [];
for (const page of matchingPages) {
Expand Down Expand Up @@ -350,28 +461,17 @@ export async function regexSearchPages(
const semanticPath = `/${[...pathParts, ...parentChain, page.title].join('/')}`;

// Extract matching lines if searching content
const matchingLines: Array<{ lineNumber: number; content: string }> = [];
if (searchIn !== 'title' && lineRegex) {
const lines = page.content.split('\n');
lines.forEach((line, index) => {
// Reset lastIndex for global regex on each line
lineRegex!.lastIndex = 0;
if (lineRegex!.test(line)) {
matchingLines.push({
lineNumber: index + 1,
content: line.substring(0, 200), // Truncate long lines
});
}
});
}
const { matchingLines, totalMatches } = canExtractLiteralLineMatches
? extractLiteralMatchingLines(page.content, pattern)
: { matchingLines: [], totalMatches: 0 };

results.push({
pageId: page.id,
title: page.title,
type: page.type,
semanticPath,
matchingLines: matchingLines.slice(0, 5), // Limit to first 5 matches
totalMatches: matchingLines.length,
matchingLines,
totalMatches,
});
}

Expand Down
Loading