Skip to content

Commit b921e87

Browse files
committed
feat(skill): validate links, strip images, include openapi.json and changelog
- Add post-processing step to rewrite Docusaurus site-root paths (e.g. /developer/foo) to proper relative .md paths within the skill - Strip markdown and HTML images from all generated files since assets are not bundled with the skill - Copy hindsight-docs/static/openapi.json into references/openapi.json and map /api-reference links to it - Include changelog.md from src/pages/ alongside faq and best-practices - Add final validation step that fails the build if any link still points outside the skill directory
1 parent c761e3a commit b921e87

2 files changed

Lines changed: 8625 additions & 1 deletion

File tree

scripts/generate-docs-skill.sh

Lines changed: 113 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,7 @@ done
159159

160160
# Process standalone pages (e.g. best-practices, faq) from src/pages/
161161
print_info "Processing standalone pages..."
162-
for page in best-practices faq; do
162+
for page in best-practices faq changelog; do
163163
for ext in md mdx; do
164164
src="$PAGES_DIR/$page.$ext"
165165
if [ -f "$src" ]; then
@@ -175,6 +175,15 @@ for page in best-practices faq; do
175175
done
176176
done
177177

178+
# Copy OpenAPI spec into the skill
179+
OPENAPI_SRC="$ROOT_DIR/hindsight-docs/static/openapi.json"
180+
if [ -f "$OPENAPI_SRC" ]; then
181+
cp "$OPENAPI_SRC" "$REFS_DIR/openapi.json"
182+
print_info "Included: openapi.json"
183+
else
184+
print_warn "openapi.json not found at $OPENAPI_SRC — skipping"
185+
fi
186+
178187
# Generate SKILL.md
179188
print_info "Generating SKILL.md..."
180189
cat > "$SKILL_DIR/SKILL.md" <<'EOF'
@@ -208,6 +217,8 @@ All documentation is in `references/` organized by category:
208217
references/
209218
├── best-practices.md # START HERE — missions, tags, formats, anti-patterns
210219
├── faq.md # Common questions and decisions
220+
├── changelog.md # Release history and version changes
221+
├── openapi.json # Full OpenAPI spec — endpoint schemas, request/response models
211222
├── developer/
212223
│ ├── api/ # Core operations: retain, recall, reflect, memory banks
213224
│ └── *.md # Architecture, configuration, deployment, performance
@@ -302,6 +313,107 @@ print_info "✓ Generated skill at: $SKILL_DIR"
302313
print_info "✓ Documentation files: $(find "$REFS_DIR" -type f | wc -l | tr -d ' ')"
303314
print_info "✓ SKILL.md created with search guidance"
304315

316+
# Rewrite Docusaurus absolute paths (e.g. /developer/foo) to relative paths
317+
print_info "Rewriting Docusaurus absolute paths to relative paths..."
318+
python3 - "$REFS_DIR" <<'PYTHON'
319+
import sys
320+
import re
321+
import os
322+
from pathlib import Path
323+
324+
refs_dir = Path(sys.argv[1]).resolve()
325+
link_pattern = re.compile(r'\[([^\]]*)\]\((/[^)]*)\)')
326+
327+
SPECIAL_MAPPINGS = {
328+
'/api-reference': 'openapi.json',
329+
}
330+
331+
def try_resolve(url_path, refs_dir):
332+
"""Try to find the file in refs_dir for a Docusaurus absolute path like /developer/foo."""
333+
if url_path in SPECIAL_MAPPINGS:
334+
candidate = refs_dir / SPECIAL_MAPPINGS[url_path]
335+
return candidate if candidate.exists() else None
336+
doc_path = url_path.lstrip('/')
337+
for candidate in [
338+
refs_dir / (doc_path + '.md'),
339+
refs_dir / doc_path / 'index.md',
340+
refs_dir / doc_path,
341+
]:
342+
if candidate.exists():
343+
return candidate
344+
return None
345+
346+
image_pattern = re.compile(r'!\[[^\]]*\]\([^)]*\)')
347+
html_img_pattern = re.compile(r'<img\b[^>]*/?>', re.IGNORECASE)
348+
349+
changed = 0
350+
for md_file in refs_dir.rglob("*.md"):
351+
content = md_file.read_text()
352+
353+
# Strip images (markdown and HTML)
354+
content = image_pattern.sub('', content)
355+
content = html_img_pattern.sub('', content)
356+
357+
def rewrite(match):
358+
text = match.group(1)
359+
url = match.group(2)
360+
anchor = ''
361+
if '#' in url:
362+
url, frag = url.split('#', 1)
363+
anchor = '#' + frag
364+
if not url or url == '/':
365+
return match.group(0)
366+
resolved = try_resolve(url, refs_dir)
367+
if resolved is None:
368+
return match.group(0)
369+
rel = os.path.relpath(resolved, md_file.parent)
370+
return f'[{text}]({rel}{anchor})'
371+
372+
new_content = link_pattern.sub(rewrite, content)
373+
if new_content != content:
374+
md_file.write_text(new_content)
375+
changed += 1
376+
377+
print(f"[INFO] Rewrote Docusaurus links in {changed} file(s)")
378+
PYTHON
379+
380+
# Validate: no links point outside the skill directory
381+
print_info "Validating links in generated skill files..."
382+
python3 - "$SKILL_DIR" <<'PYTHON'
383+
import sys
384+
import re
385+
from pathlib import Path
386+
387+
skill_dir = Path(sys.argv[1]).resolve()
388+
errors = []
389+
390+
# Find all markdown links: [text](url) — exclude images too
391+
link_pattern = re.compile(r'\[([^\]]*)\]\(([^)]+)\)')
392+
393+
for md_file in skill_dir.rglob("*.md"):
394+
content = md_file.read_text()
395+
for match in link_pattern.finditer(content):
396+
url = match.group(2).split("#")[0].strip() # strip anchors
397+
if not url:
398+
continue
399+
# Absolute URLs and anchors-only are fine
400+
if url.startswith(("http://", "https://", "mailto:", "ftp://")):
401+
continue
402+
# Resolve relative to the file's directory
403+
resolved = (md_file.parent / url).resolve()
404+
if not str(resolved).startswith(str(skill_dir)):
405+
errors.append(f" {md_file.relative_to(skill_dir)}: '{url}' -> {resolved}")
406+
407+
if errors:
408+
print("ERROR: The following links point outside the skill directory.")
409+
print("All links must be absolute URLs or relative paths within the skill.")
410+
for e in errors:
411+
print(e)
412+
sys.exit(1)
413+
414+
print(f"[INFO] Link validation passed ({skill_dir})")
415+
PYTHON
416+
305417
echo ""
306418
print_info "Usage:"
307419
echo " - Agents can use Glob to find files: references/developer/api/*.md"

0 commit comments

Comments
 (0)