Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 57 additions & 0 deletions src/core/ai-index.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@ const baseConfig: ResolvedAeoConfig = {
aiIndex: true,
schema: true,
},
aiIndex: {
maxChunkLength: 2000,
maxKeywords: 10,
},
robots: { allow: ['/'], disallow: [], crawlDelay: 0, sitemap: '' },
widget: {
enabled: true,
Expand Down Expand Up @@ -137,6 +141,59 @@ describe('generateAIIndex', () => {
expect(entry?.keywords).not.toContain('ux');
});

it('should use configured max chunk length', () => {
const config: ResolvedAeoConfig = {
...baseConfig,
aiIndex: {
...baseConfig.aiIndex,
maxChunkLength: 20,
},
pages: [
{
pathname: '/chunked',
title: 'Chunked',
content: [
'First paragraph content.',
'Second paragraph content.',
'Third paragraph content.',
].join('\n\n'),
},
],
};

const result = generateAIIndex(config);
const index = JSON.parse(result);
const entries = index.entries
.filter((e: any) => e.url === 'https://example.com/chunked')
.sort((a: any, b: any) => a.metadata.chunkIndex - b.metadata.chunkIndex);

expect(entries).toHaveLength(3);
expect(entries.map((entry: any) => entry.metadata.chunkIndex)).toEqual([0, 1, 2]);
});

it('should use configured max keywords', () => {
const config: ResolvedAeoConfig = {
...baseConfig,
aiIndex: {
...baseConfig.aiIndex,
maxKeywords: 2,
},
pages: [
{
pathname: '/keywords',
title: 'Keywords',
content: 'alpha alpha alpha beta beta gamma delta epsilon',
},
],
};

const result = generateAIIndex(config);
const index = JSON.parse(result);
const entry = index.entries.find((e: any) => e.url === 'https://example.com/keywords');

expect(entry?.keywords).toEqual(['alpha', 'beta']);
});

it('should handle pages without content', () => {
const result = generateAIIndex(baseConfig);
const index = JSON.parse(result);
Expand Down
16 changes: 9 additions & 7 deletions src/core/ai-index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@ import { createHash } from 'crypto';
import type { ResolvedAeoConfig, AIIndexEntry } from '../types';
import { parseFrontmatter, extractTitle } from './utils';

function extractKeywords(content: string): string[] {
function extractKeywords(content: string, maxKeywords: number): string[] {
if (maxKeywords < 1) return [];

const words = content
.normalize('NFC')
.toLowerCase()
Expand All @@ -22,11 +24,11 @@ function extractKeywords(content: string): string[] {

return Object.entries(wordCount)
.sort((a, b) => b[1] - a[1])
.slice(0, 10)
.slice(0, maxKeywords)
.map(([word]) => word);
}

function chunkContent(content: string, maxLength: number = 2000): string[] {
function chunkContent(content: string, maxLength: number): string[] {
const chunks: string[] = [];
const paragraphs = content.split('\n\n');

Expand Down Expand Up @@ -66,9 +68,9 @@ function collectAIIndexEntries(dir: string, config: ResolvedAeoConfig, base: str
const urlPath = relativePath.replace(/\.mdx?$/, '');
const url = `${config.url}/${urlPath}`;

const chunks = chunkContent(mainContent);
const chunks = chunkContent(mainContent, config.aiIndex.maxChunkLength);
const title = frontmatter.title || extractTitle(mainContent);
const keywords = extractKeywords(mainContent);
const keywords = extractKeywords(mainContent, config.aiIndex.maxKeywords);

chunks.forEach((chunk, index) => {
const id = createHash('sha256')
Expand Down Expand Up @@ -115,8 +117,8 @@ export function generateAIIndex(config: ResolvedAeoConfig): string {
const content = page.content || '';

if (content) {
const chunks = chunkContent(content);
const keywords = extractKeywords(content);
const chunks = chunkContent(content, config.aiIndex.maxChunkLength);
const keywords = extractKeywords(content, config.aiIndex.maxKeywords);

chunks.forEach((chunk, index) => {
const id = createHash('sha256')
Expand Down
1 change: 1 addition & 0 deletions src/core/audit.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ function makeConfig(overrides: Partial<ResolvedAeoConfig> = {}): ResolvedAeoConf
outDir: './out',
contentDir: '',
generators: { robotsTxt: true, llmsTxt: true, llmsFullTxt: true, rawMarkdown: true, manifest: true, sitemap: true, aiIndex: true, schema: true },
aiIndex: { maxChunkLength: 2000, maxKeywords: 10 },
robots: { allow: ['/'], disallow: [], crawlDelay: 0, sitemap: '/sitemap.xml' },
schema: {
enabled: true,
Expand Down
4 changes: 4 additions & 0 deletions src/core/generate-wrapper.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,10 @@ const baseConfig: ResolvedAeoConfig = {
aiIndex: true,
schema: true,
},
aiIndex: {
maxChunkLength: 2000,
maxKeywords: 10,
},
robots: { allow: ['/'], disallow: [], crawlDelay: 0, sitemap: '' },
widget: {
enabled: true,
Expand Down
4 changes: 4 additions & 0 deletions src/core/llms-full.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@ const baseConfig: ResolvedAeoConfig = {
aiIndex: true,
schema: true,
},
aiIndex: {
maxChunkLength: 2000,
maxKeywords: 10,
},
robots: { allow: ['/'], disallow: [], crawlDelay: 0, sitemap: '' },
widget: {
enabled: true,
Expand Down
4 changes: 4 additions & 0 deletions src/core/llms-txt.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@ const baseConfig: ResolvedAeoConfig = {
aiIndex: true,
schema: true,
},
aiIndex: {
maxChunkLength: 2000,
maxKeywords: 10,
},
robots: { allow: ['/'], disallow: [], crawlDelay: 0, sitemap: '' },
widget: {
enabled: true,
Expand Down
4 changes: 4 additions & 0 deletions src/core/manifest.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@ const baseConfig: ResolvedAeoConfig = {
aiIndex: true,
schema: true,
},
aiIndex: {
maxChunkLength: 2000,
maxKeywords: 10,
},
robots: { allow: ['/'], disallow: [], crawlDelay: 0, sitemap: '' },
widget: {
enabled: true,
Expand Down
4 changes: 4 additions & 0 deletions src/core/raw-markdown.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,10 @@ const createConfig = (overrides = {}): ResolvedAeoConfig => ({
aiIndex: true,
schema: true,
},
aiIndex: {
maxChunkLength: 2000,
maxKeywords: 10,
},
robots: { allow: ['/'], disallow: [], crawlDelay: 0, sitemap: '' },
widget: {
enabled: true,
Expand Down
1 change: 1 addition & 0 deletions src/core/report.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ function makeConfig(): ResolvedAeoConfig {
outDir: './out',
contentDir: '',
generators: { robotsTxt: true, llmsTxt: true, llmsFullTxt: true, rawMarkdown: true, manifest: true, sitemap: true, aiIndex: true, schema: true },
aiIndex: { maxChunkLength: 2000, maxKeywords: 10 },
robots: { allow: ['/'], disallow: [], crawlDelay: 0, sitemap: '/sitemap.xml' },
schema: { enabled: true, organization: { name: 'Test Co', url: 'https://test.com', logo: '', sameAs: [] }, defaultType: 'WebPage' },
og: { enabled: true, image: '', twitterHandle: '', type: 'website' },
Expand Down
6 changes: 5 additions & 1 deletion src/core/robots.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@ describe('generateRobotsTxt', () => {
aiIndex: true,
schema: true,
},
aiIndex: {
maxChunkLength: 2000,
maxKeywords: 10,
},
robots: { allow: ['/'], disallow: [], crawlDelay: 0, sitemap: '' },
widget: {
enabled: true,
Expand Down Expand Up @@ -104,4 +108,4 @@ describe('generateRobotsTxt', () => {
expect(bingbotMatches.length).toBe(1)
expect(semrushMatches.length).toBe(1)
})
})
})
6 changes: 5 additions & 1 deletion src/core/sitemap.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,10 @@ describe('generateSitemap', () => {
aiIndex: true,
schema: true,
},
aiIndex: {
maxChunkLength: 2000,
maxKeywords: 10,
},
robots: { allow: ['/'], disallow: [], crawlDelay: 0, sitemap: '' },
widget: {
enabled: true,
Expand Down Expand Up @@ -281,4 +285,4 @@ describe('generateSitemap', () => {
expect(sitemap).toContain('<loc>https://example.com/docs/guide</loc>');
expect(sitemap).toContain('<loc>https://example.com/docs/api/reference</loc>');
});
});
});
50 changes: 50 additions & 0 deletions src/core/utils.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import {
parseFrontmatter,
bumpHeadings,
extractTitle,
validateConfig,
} from './utils';

vi.mock('./detect', () => ({
Expand All @@ -24,6 +25,10 @@ describe('utils', () => {
expect(result.url).toBe('https://example.com');
expect(result.generators.robotsTxt).toBe(true);
expect(result.generators.llmsTxt).toBe(true);
expect(result.aiIndex).toEqual({
maxChunkLength: 2000,
maxKeywords: 10,
});
expect(result.widget.enabled).toBe(true);
expect(result.widget.position).toBe('bottom-right');
});
Expand Down Expand Up @@ -54,6 +59,17 @@ describe('utils', () => {
expect(result.widget.theme.background).toBe('rgba(18, 18, 24, 0.9)');
});

it('should handle partial aiIndex config', () => {
const result = resolveConfig({
aiIndex: {
maxKeywords: 5,
},
});

expect(result.aiIndex.maxKeywords).toBe(5);
expect(result.aiIndex.maxChunkLength).toBe(2000);
});

it('should resolve robots config', () => {
const result = resolveConfig({
robots: { disallow: ['/admin'], crawlDelay: 5 },
Expand All @@ -65,6 +81,40 @@ describe('utils', () => {
});
});

describe('validateConfig', () => {
it('warns when aiIndex.maxChunkLength is zero or negative', () => {
expect(validateConfig({ aiIndex: { maxChunkLength: 0 } })).toEqual(
expect.arrayContaining([expect.stringContaining('aiIndex.maxChunkLength')])
);
expect(validateConfig({ aiIndex: { maxChunkLength: -1 } })).toEqual(
expect.arrayContaining([expect.stringContaining('aiIndex.maxChunkLength')])
);
});

it('warns when aiIndex.maxKeywords is zero or negative', () => {
expect(validateConfig({ aiIndex: { maxKeywords: 0 } })).toEqual(
expect.arrayContaining([expect.stringContaining('aiIndex.maxKeywords')])
);
});

it('does not warn for valid aiIndex values', () => {
const warnings = validateConfig({
title: 'My Site',
url: 'https://mysite.com',
aiIndex: { maxChunkLength: 1500, maxKeywords: 8 },
});
expect(warnings.some((w) => w.includes('aiIndex'))).toBe(false);
});

it('does not warn when aiIndex is omitted', () => {
const warnings = validateConfig({
title: 'My Site',
url: 'https://mysite.com',
});
expect(warnings.some((w) => w.includes('aiIndex'))).toBe(false);
});
});

describe('parseFrontmatter', () => {
it('should extract frontmatter from markdown', () => {
const input = '---\ntitle: My Title\ndescription: My Desc\n---\n# Content';
Expand Down
14 changes: 13 additions & 1 deletion src/core/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,14 @@ export function validateConfig(config: AeoConfig): string[] {
warnings.push('robots.crawlDelay should be a positive number');
}

if (config.aiIndex?.maxChunkLength !== undefined && config.aiIndex.maxChunkLength <= 0) {
warnings.push('aiIndex.maxChunkLength should be a positive number — sub-1 values produce one chunk per paragraph');
}

if (config.aiIndex?.maxKeywords !== undefined && config.aiIndex.maxKeywords <= 0) {
warnings.push('aiIndex.maxKeywords should be a positive number — sub-1 values produce an empty keywords array');
}

return warnings;
}

Expand All @@ -54,6 +62,10 @@ export function resolveConfig(config: AeoConfig = {}): ResolvedAeoConfig {
aiIndex: config.generators?.aiIndex !== false,
schema: config.generators?.schema !== false,
},
aiIndex: {
maxChunkLength: config.aiIndex?.maxChunkLength ?? 2000,
maxKeywords: config.aiIndex?.maxKeywords ?? 10,
},
Comment thread
greptile-apps[bot] marked this conversation as resolved.
robots: {
allow: config.robots?.allow || ['/'],
disallow: config.robots?.disallow || [],
Expand Down Expand Up @@ -199,4 +211,4 @@ export function getAllMarkdownFiles(

scanDirectory(projectRoot);
return files;
}
}
10 changes: 9 additions & 1 deletion src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,10 @@ export interface AeoConfig {
aiIndex?: boolean;
schema?: boolean;
};
aiIndex?: {
maxChunkLength?: number;
maxKeywords?: number;
};
robots?: {
allow?: string[];
disallow?: string[];
Expand Down Expand Up @@ -77,6 +81,10 @@ export interface ResolvedAeoConfig {
aiIndex: boolean;
schema: boolean;
};
aiIndex: {
maxChunkLength: number;
maxKeywords: number;
};
robots: {
allow: string[];
disallow: string[];
Expand Down Expand Up @@ -172,4 +180,4 @@ export interface FrameworkInfo {
framework: FrameworkType;
contentDir: string;
outDir: string;
}
}
13 changes: 13 additions & 0 deletions website/src/content/docs/reference/configuration.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,12 @@ export default defineConfig({
schema: true,
},

// Configure ai-index.json generation
aiIndex: {
maxChunkLength: 2000,
maxKeywords: 10,
},

// Customize robots.txt
robots: {
allow: ['/'],
Expand Down Expand Up @@ -103,6 +109,13 @@ export default defineConfig({
| `aiIndex` | `boolean` | `true` | Generate `ai-index.json` |
| `schema` | `boolean` | `false` | Generate JSON-LD structured data |

### `aiIndex`

| Option | Type | Default | Description |
Comment thread
greptile-apps[bot] marked this conversation as resolved.
|--------|------|---------|-------------|
| `maxChunkLength` | `number` | `2000` | Target chunk length (soft limit). Chunks split on `\n\n` paragraph boundaries, so a single long paragraph can exceed this value. Set with embedding-model token limits in mind. |
| `maxKeywords` | `number` | `10` | Maximum number of keywords extracted per `ai-index.json` entry. |

### `robots`

| Option | Type | Default | Description |
Expand Down
Loading