Skip to content

Commit c65081b

Browse files
vlussenburglb-vincentlactions-uservim-zz
authored
Add readMarkdownWithLinks filter (#825)
* Add readMarkdownWithLinks filter * added test to main file * Update README.md * Remove self-referencing link test case Removed test for self-referencing link in readMarkdown function. * Update read_markdown_with_links.cm --------- Co-authored-by: Vincent Lussenburg <vincent@linearb.io> Co-authored-by: GitHub Actions Bot <actions@github.com> Co-authored-by: Ofer Affias <ofer@linearb.io>
1 parent d5b272d commit c65081b

File tree

3 files changed

+407
-0
lines changed

3 files changed

+407
-0
lines changed
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
??? note "Plugin Code: readMarkdownWithLinks"
2+
```javascript
3+
--8<-- "plugins/filters/readMarkdownWithLinks/index.js"
4+
```
5+
<div class="result" markdown>
6+
<span>
7+
</span>
8+
</div>
9+
10+
The main use case for this plugin is enhancing LinearB AI code reviews with comprehensive documentation context.
11+
12+
### Basic Usage
13+
```yaml
14+
guidelines: |
15+
{{ "REVIEW_RULES.md" | readMarkdownWithLinks | dump }}
16+
17+
Additional Context:
18+
{{ "README.md" | readMarkdownWithLinks(maxDepth=2) | dump }}
19+
```
20+
21+
## Configuration Options
22+
23+
- `followLinks` (boolean, default: `true`): Whether to follow internal markdown links
24+
- `maxDepth` (number, default: `3`): Maximum depth to follow links to prevent excessive recursion
25+
26+
## API
27+
28+
### `readMarkdownWithLinks(filePath, options)`
29+
30+
Returns the combined content of the main file and all linked files as a formatted string.
31+
32+
### `readMarkdown(filePath, options)`
33+
34+
Returns a structured object containing:
35+
- `path`: Absolute path to the file
36+
- `content`: File content
37+
- `error`: Any error encountered
38+
- `linkedFiles`: Array of linked file objects with the same structure
39+
40+
## Example Output
41+
```
42+
=== main.md ===
43+
# Main Document
44+
Content of main document...
45+
46+
=== related.md ===
47+
# Related Document
48+
Content of related document...
49+
50+
=== subdoc.md ===
51+
# Sub Document
52+
Content of sub document...
53+
```
54+
55+
56+
??? example "gitStream CM Example: readMarkdownWithLinks"
57+
```yaml+jinja
58+
--8<-- "plugins/filters/readMarkdownWithLinks/read_markdown_with_links.cm"
59+
```
60+
<div class="result" markdown>
61+
<span>
62+
</span>
63+
</div>
64+
65+
[Download Source Code](https://github.com/linear-b/gitstream/tree/main/plugins/filters/readMarkdownWithLinks)
Lines changed: 270 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,270 @@
1+
const fs = require('fs');
2+
const path = require('path');
3+
4+
/**
5+
* Safely read file with path traversal protection
6+
* @param {string} filePath - Path to file to read
7+
* @returns {string|null} File content or null if error/invalid path
8+
*/
9+
function readFile(filePath) {
10+
// Whitelist: only allow relative paths within current directory
11+
const normalizedPath = path.normalize(filePath);
12+
13+
if (path.isAbsolute(normalizedPath) || normalizedPath.includes('..')) {
14+
console.log(`Invalid path: ${filePath}`);
15+
return null;
16+
}
17+
18+
try {
19+
return fs.readFileSync(normalizedPath, 'utf8');
20+
} catch (error) {
21+
console.log(`Error reading file ${filePath}: ${error.message}`);
22+
return null;
23+
}
24+
}
25+
26+
/**
27+
* Extract internal markdown links from content
28+
* Matches patterns like [text](./file.md) or [text](../file.md) or [text](file.md)
29+
* @param {string} content - The markdown content to scan for links
30+
* @param {string} basePath - Base directory path for resolving relative links
31+
* @returns {Array} Array of link objects with text, path, and resolvedPath
32+
*/
33+
function extractInternalLinks(content, basePath) {
34+
const linkRegex = /\[([^\]]+)\]\(([^)]+)\)/g;
35+
const internalLinks = [];
36+
let match;
37+
38+
while ((match = linkRegex.exec(content)) !== null) {
39+
const linkText = match[1];
40+
const linkPath = match[2];
41+
42+
// Check if it's an internal link (not http/https and ends with .md)
43+
if (!linkPath.startsWith('http') && linkPath.endsWith('.md')) {
44+
const resolvedPath = path.join(basePath, linkPath);
45+
internalLinks.push({
46+
text: linkText,
47+
path: linkPath,
48+
resolvedPath: resolvedPath
49+
});
50+
}
51+
}
52+
53+
return internalLinks;
54+
}
55+
56+
/**
57+
* Read markdown file and follow internal links
58+
* @param {string} filePath - Path to the markdown file
59+
* @param {Object} options - Configuration options
60+
* @param {boolean} options.followLinks - Whether to follow internal links (default: true)
61+
* @param {number} options.maxDepth - Maximum depth to follow links (default: 3)
62+
* @param {Set} options.visited - Internal set to track visited files (prevent cycles)
63+
* @param {number} options.currentDepth - Current depth (internal)
64+
* @returns {Object} Object containing content and linked files
65+
*/
66+
function readMarkdown(filePath, options = {}) {
67+
const {
68+
followLinks = true,
69+
maxDepth = 3,
70+
visited = new Set(),
71+
currentDepth = 0
72+
} = options;
73+
74+
const normalizedPath = path.normalize(filePath);
75+
76+
// Check if we've already visited this file (prevent cycles)
77+
if (visited.has(normalizedPath)) {
78+
return {
79+
path: normalizedPath,
80+
content: null,
81+
error: 'Circular reference detected',
82+
linkedFiles: []
83+
};
84+
}
85+
86+
// Check depth limit
87+
if (currentDepth >= maxDepth) {
88+
return {
89+
path: normalizedPath,
90+
content: readFile(normalizedPath),
91+
error: null,
92+
linkedFiles: [],
93+
depthLimitReached: true
94+
};
95+
}
96+
97+
// Mark this file as visited
98+
visited.add(normalizedPath);
99+
100+
// Read the main file content
101+
const content = readFile(normalizedPath);
102+
if (content === null) {
103+
return {
104+
path: normalizedPath,
105+
content: null,
106+
error: 'File not found or could not be read',
107+
linkedFiles: []
108+
};
109+
}
110+
111+
const result = {
112+
path: normalizedPath,
113+
content: content,
114+
error: null,
115+
linkedFiles: []
116+
};
117+
118+
// If we should follow links, extract and process them
119+
if (followLinks) {
120+
const basePath = path.dirname(normalizedPath);
121+
const internalLinks = extractInternalLinks(content, basePath);
122+
123+
for (const link of internalLinks) {
124+
const linkedFileResult = readMarkdown(link.resolvedPath, {
125+
followLinks,
126+
maxDepth,
127+
visited: new Set(visited), // Create a new set for each branch
128+
currentDepth: currentDepth + 1
129+
});
130+
131+
result.linkedFiles.push({
132+
linkText: link.text,
133+
originalPath: link.path,
134+
...linkedFileResult
135+
});
136+
}
137+
}
138+
139+
return result;
140+
}
141+
142+
/**
143+
* @module readMarkdownWithLinks
144+
* @description Reads a markdown file and follows internal links to create a comprehensive document view.
145+
* Prevents circular references and supports configurable depth limits.
146+
* @param {string} filePath - Path to the markdown file to read
147+
* @param {Object} [options={}] - Configuration options for link following
148+
* @param {boolean} [options.followLinks=true] - Whether to follow internal links
149+
* @param {number} [options.maxDepth=3] - Maximum depth to follow links
150+
* @param {boolean} [options.structured=false] - Return structured data instead of combined text
151+
* @returns {string} Combined content of the file and all linked files with headers
152+
* @example {{ "docs/README.md" | readMarkdownWithLinks }}
153+
* @example {{ "docs/README.md" | readMarkdownWithLinks(maxDepth=2) }}
154+
* @license MIT
155+
*/
156+
function readMarkdownWithLinks(filePath, options = {}) {
157+
const {
158+
followLinks = true,
159+
maxDepth = 3,
160+
structured = false
161+
} = options;
162+
163+
const result = readMarkdown(filePath, {
164+
followLinks,
165+
maxDepth,
166+
visited: new Set(),
167+
currentDepth: 0
168+
});
169+
170+
// Return structured data if requested
171+
if (structured) {
172+
return result;
173+
}
174+
175+
// Otherwise return combined content
176+
function combineContent(fileResult, depth = 0) {
177+
const indent = ' '.repeat(depth);
178+
let combined = '';
179+
180+
if (fileResult.content) {
181+
combined += `${indent}=== ${path.basename(fileResult.path)} ===\n`;
182+
combined += fileResult.content + '\n\n';
183+
}
184+
185+
if (fileResult.linkedFiles) {
186+
for (const linkedFile of fileResult.linkedFiles) {
187+
combined += combineContent(linkedFile, depth + 1);
188+
}
189+
}
190+
191+
return combined;
192+
}
193+
194+
return combineContent(result);
195+
}
196+
197+
module.exports = readMarkdownWithLinks;
198+
199+
200+
201+
202+
// ============================================================================
203+
// TESTS (for local development only)
204+
// ============================================================================
205+
if (require.main === module) {
206+
const fs = require('fs');
207+
208+
function assert(condition, message) {
209+
if (!condition) { console.error(`❌ ${message}`); process.exit(1); }
210+
console.log(`✅ ${message}`);
211+
}
212+
213+
// Setup
214+
fs.mkdirSync('./test-files/sub', { recursive: true });
215+
fs.writeFileSync('./test-files/main.md', '# Main\n[Related](./related.md)\n[Another](./another.md)\n[External](https://example.com)');
216+
fs.writeFileSync('./test-files/related.md', '# Related\n[Sub](./sub/subdoc.md)');
217+
fs.writeFileSync('./test-files/another.md', '# Another');
218+
fs.writeFileSync('./test-files/sub/subdoc.md', '# Sub\n[Main](../main.md)');
219+
220+
console.log('🧪 Running tests\n');
221+
222+
// Test 1: Basic reading
223+
let r = readMarkdown('./test-files/main.md', { followLinks: false });
224+
assert(r.content?.includes('# Main'), 'Basic file reading');
225+
226+
// Test 2: Link following
227+
r = readMarkdown('./test-files/main.md', { maxDepth: 2 });
228+
console.log(r.linkedFiles[0])
229+
assert(r.linkedFiles.length === 2, 'Follows 2 links');
230+
assert(r.linkedFiles[0].linkedFiles.length === 1, 'Nested link following');
231+
232+
// Test 3: Circular reference
233+
r = readMarkdown('./test-files/main.md', { maxDepth: 5 });
234+
const circularRef = r.linkedFiles[0].linkedFiles[0].linkedFiles[0];
235+
assert(circularRef?.error === 'Circular reference detected', 'Circular reference detection');
236+
237+
// Test 4: Depth limit
238+
r = readMarkdown('./test-files/main.md', { maxDepth: 1 });
239+
assert(r.linkedFiles[0].linkedFiles.length === 0, 'Depth limit respected');
240+
241+
// Test 5: Non-existent file
242+
r = readMarkdown('./test-files/missing.md');
243+
assert(r.error === 'File not found or could not be read', 'Non-existent file handling');
244+
245+
// Test 6: Combined output
246+
const combined = readMarkdownWithLinks('./test-files/main.md', { maxDepth: 1 });
247+
assert(combined.includes('=== main.md ==='), 'Combined format includes headers');
248+
assert(combined.includes(' === related.md ==='), 'Nested files indented');
249+
250+
// Test 7: Path traversal blocked
251+
r = readMarkdown('../../../etc/passwd');
252+
assert(r.content === null, 'Path traversal blocked');
253+
assert(r.error === 'File not found or could not be read', 'Path traversal returns error');
254+
255+
// Test 8: Absolute path blocked
256+
const content1 = readFile('/etc/passwd');
257+
assert(content1 === null, 'Absolute Unix path blocked');
258+
259+
const content2 = readFile('C:\\Windows\\System32\\config');
260+
assert(content2 === null, 'Absolute Windows path blocked');
261+
262+
// Test 9: Empty file handling
263+
fs.writeFileSync('./test-files/empty.md', '');
264+
r = readMarkdown('./test-files/empty.md');
265+
assert(r.content === '', 'Empty file handled');
266+
assert(r.linkedFiles.length === 0, 'Empty file has no links');
267+
268+
console.log('\n🎉 All tests passed!');
269+
fs.rmSync('./test-files', { recursive: true });
270+
}

0 commit comments

Comments
 (0)