From 3323feabd39fc21470d19601eb11e9cd15d3f489 Mon Sep 17 00:00:00 2001
From: shrwnsan <38465+shrwnsan@users.noreply.github.com>
Date: Mon, 25 May 2026 18:51:42 +0000
Subject: [PATCH 1/7] feat: add generate-report.py script for inventory.ndjson
Generates a self-contained HTML report from bumblebee scan output.
Features:
- 7 ordered sections: Ecosystems, Lifecycle Scripts, Version Sprawl,
Top Projects, Confidence Levels, Detection Sources, Scan Roots
- Dark/light theme toggle with localStorage persistence
- Scroll-spy page outline (Notion-style mini-TOC)
- Scroll progress indicator
- Responsive layout (mobile through desktop)
- Animated section reveals and hover interactions
- Zero external dependencies (Python 3.10+ stdlib only)
Usage:
python3 scripts/generate-report.py inventory.ndjson
python3 scripts/generate-report.py -o report.html
---
.gitignore | 1 +
scripts/generate-report.py | 1392 ++++++++++++++++++++++++++++++++++++
2 files changed, 1393 insertions(+)
create mode 100755 scripts/generate-report.py
diff --git a/.gitignore b/.gitignore
index 2100afd..f6391ea 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,6 +2,7 @@
*.test
*.out
*.ndjson
+report.html
.idea/
.vscode/
diff --git a/scripts/generate-report.py b/scripts/generate-report.py
new file mode 100755
index 0000000..605e018
--- /dev/null
+++ b/scripts/generate-report.py
@@ -0,0 +1,1392 @@
+#!/usr/bin/env python3
+"""Generate an HTML report from bumblebee inventory.ndjson.
+
+Requires Python 3.10+.
+
+Usage:
+ python3 scripts/generate-report.py # defaults
+ python3 scripts/generate-report.py inventory.ndjson # explicit input
+ python3 scripts/generate-report.py -o report.html # explicit output
+ python3 scripts/generate-report.py scan.ndjson -o out.html # both
+"""
+
+import json
+import collections
+import html as html_mod
+import sys
+import argparse
+
+# ── Ecosystem colours ──
+ECO_COLORS = {
+ "go": "#00ADD8",
+ "npm": "#CB3837",
+ "pypi": "#306998",
+ "rubygems": "#CC342D",
+ "browser-extension": "#FF9500",
+ "editor-extension": "#8B5CF6",
+ "mcp": "#10B981",
+ "unknown": "#6B7280",
+}
+
+# ── Helpers ──
+
+def esc(s):
+ return html_mod.escape(str(s))
+
+
+def bar_html(pct, color, min_pct=2.5):
+ pct = max(pct, min_pct)
+ return (
+ f'
"
+ )
+
+
+# ── Main generator ──
+
+def generate_report(ndjson_path: str, output_path: str) -> None:
+ # ── Load data ──
+ packages: list[dict] = []
+ summary: dict | None = None
+
+ with open(ndjson_path) as f:
+ for line in f:
+ r = json.loads(line)
+ if r["record_type"] == "package":
+ packages.append(r)
+ elif r["record_type"] == "scan_summary":
+ summary = r
+
+ if not summary:
+ print("Error: no scan_summary record found", file=sys.stderr)
+ sys.exit(1)
+
+ if not packages:
+ print("Error: no package records found", file=sys.stderr)
+ sys.exit(1)
+
+ # ── Compute aggregates ──
+ ecosystems = collections.Counter()
+ source_types = collections.Counter()
+ projects = collections.Counter()
+ confidence_levels = collections.Counter()
+ direct_deps = 0
+ lifecycle_script_pkgs: list[dict] = []
+ unique_by_eco: dict[str, set[str]] = collections.defaultdict(set)
+ multi_version: dict[tuple, dict[str, list]] = collections.defaultdict(
+ lambda: collections.defaultdict(list)
+ )
+
+ for p in packages:
+ eco = p.get("ecosystem", "unknown")
+ ecosystems[eco] += 1
+ source_types[p.get("source_type", "unknown")] += 1
+
+ proj = p.get("project_path", "unknown")
+ proj = proj.replace("/Users/", "~/").replace("/home/", "~/")
+ projects[proj] += 1
+
+ confidence_levels[p.get("confidence", "unknown")] += 1
+
+ if p.get("direct_dependency"):
+ direct_deps += 1
+
+ if p.get("has_lifecycle_scripts"):
+ lifecycle_script_pkgs.append(
+ {
+ "name": p.get("package_name"),
+ "ecosystem": eco,
+ "project": proj,
+ "scripts": p.get("lifecycle_scripts", []),
+ }
+ )
+
+ unique_by_eco[eco].add(p.get("normalized_name", ""))
+
+ name = p.get("normalized_name", "")
+ ver = p.get("version", "?")
+ multi_version[(eco, name)][ver].append(proj)
+
+ multi_pkgs = {k: v for k, v in multi_version.items() if len(v) > 1}
+ top_multi = sorted(multi_pkgs.items(), key=lambda x: -len(x[1]))[:25]
+ eco_order = [e for e, _ in ecosystems.most_common()]
+
+ # ── Group scan roots by kind ──
+ root_kinds_map: dict[str, list[str]] = collections.OrderedDict()
+ for root in summary["roots"]:
+ rk = root["kind"]
+ path = root["path"].replace("/Users/", "~/").replace("/home/", "~/")
+ root_kinds_map.setdefault(rk, []).append(path)
+
+ # ── Build table rows ──
+
+ # Ecosystems
+ max_eco = max(ecosystems.values())
+ eco_rows = ""
+ for eco in eco_order:
+ cnt = ecosystems[eco]
+ unique = len(unique_by_eco[eco])
+ pct = cnt / max_eco * 100
+ color = ECO_COLORS.get(eco, "#6B7280")
+ eco_rows += f"""
+
+ | {esc(eco)} |
+ {cnt:,} |
+ {unique:,} |
+ {bar_html(pct, color)} |
+
"""
+
+ # Source types
+ src_rows = ""
+ max_src = max(source_types.values())
+ for src, cnt in source_types.most_common():
+ pct = cnt / max_src * 100
+ src_rows += f"""
+
+ {esc(src)} |
+ {cnt:,} |
+ {bar_html(pct, "#64748b")} |
+
"""
+
+ # Confidence
+ conf_rows = ""
+ max_conf = max(confidence_levels.values())
+ conf_colors = {"high": "#10b981", "medium": "#f59e0b", "low": "#ef4444"}
+ for lvl, cnt in confidence_levels.most_common():
+ color = conf_colors.get(lvl, "#6B7280")
+ pct = cnt / max_conf * 100
+ conf_rows += f"""
+
+ | {esc(lvl)} |
+ {cnt:,} |
+ {cnt / len(packages) * 100:.1f}% |
+ {bar_html(pct, color)} |
+
"""
+
+ # Top projects
+ proj_rows = ""
+ max_proj = max(projects.values())
+ for proj, cnt in projects.most_common(25):
+ pct = cnt / max_proj * 100
+ short = proj.split("/")[-1] if len(proj) > 60 else proj
+ proj_rows += f"""
+
+ |
+ {esc(short)}
+ {esc(proj)}
+ |
+ {cnt:,} |
+ {bar_html(pct, "#d97706")} |
+
"""
+
+ # Multi-version
+ multi_rows = ""
+ for (eco, name), versions in top_multi:
+ ver_list = sorted(versions.keys())[:3]
+ ver_str = ", ".join(ver_list)
+ if len(versions) > 3:
+ ver_str += f" … +{len(versions) - 3} more"
+ color = ECO_COLORS.get(eco, "#6B7280")
+ multi_rows += f"""
+
+ | {esc(eco)} |
+ {esc(name)} |
+ {len(versions)} |
+ {esc(ver_str)} |
+
"""
+
+ # Lifecycle scripts
+ lifecycle_rows = ""
+ for pkg in sorted(lifecycle_script_pkgs, key=lambda x: x["name"]):
+ color = ECO_COLORS.get(pkg["ecosystem"], "#6B7280")
+ scripts_html = " ".join(
+ f'{esc(s)}' for s in pkg["scripts"]
+ )
+ lifecycle_rows += f"""
+
+ | {esc(pkg["ecosystem"])} |
+ {esc(pkg["name"])} |
+ {scripts_html} |
+ {esc(pkg["project"].split("/")[-1])}{esc(pkg["project"])} |
+
"""
+
+ # Scan roots
+ root_icons = {
+ "user_package_root": "📦",
+ "editor_extension_root": "🧩",
+ "mcp_config_root": "🔌",
+ "browser_extension_root": "🌐",
+ "homebrew_root": "🍺",
+ }
+ scan_roots_html = ""
+ for rk, paths in root_kinds_map.items():
+ icon = root_icons.get(rk, "📁")
+ items = "".join(f"{esc(p)}" for p in paths)
+ scan_roots_html += f"""
+ """
+
+ duration_s = summary["duration_ms"] / 1000
+
+ # ── Assemble HTML ──
+ html = f"""
+
+
+
+
+Bumblebee Inventory Report — {esc(summary["endpoint"]["hostname"])}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ {len(packages):,}
+ Total Packages
+
+
+ {sum(len(v) for v in unique_by_eco.values()):,}
+ Unique Names
+
+
+ {len(ecosystems)}
+ Ecosystems
+
+
+ {len(projects)}
+ Projects
+
+
+ {direct_deps:,}
+ Direct Deps
+
+
+ {len(lifecycle_script_pkgs)}
+ Lifecycle Scripts
+
+
+ {len(multi_pkgs)}
+ Multi-Version
+
+
+ {summary["files_considered"]:,}
+ Files Scanned
+
+
+
+
+
+
+
01
+
+
Packages by Ecosystem
+ {len(ecosystems)} ecosystems
+
+
+
+ What’s on this machine — a breakdown of every package discovered, grouped by language and runtime.
+
+ | Ecosystem | Total | Unique | Distribution |
+ {eco_rows}
+
+
+
+
+
+
+
02
+
+
Lifecycle Scripts
+ {len(lifecycle_script_pkgs)} packages with install-time hooks
+
+
+
+ What’s dangerous — packages that run arbitrary code when installed or updated.
+
+ ⚠
+ These packages execute arbitrary code at install time (preinstall, postinstall, prepare). Review them for supply-chain risk.
+
+
+ | Ecosystem | Package | Scripts | Project |
+ {lifecycle_rows}
+
+
+
+
+
+
+
03
+
+
Version Sprawl
+ {len(multi_pkgs)} packages with multiple versions
+
+
+
+ What’s outdated — packages pinned to many different versions across projects, increasing patching burden.
+
+ | Ecosystem | Package | Versions | Sample |
+ {multi_rows}
+
+
+
+
+
+
+
04
+
+
Top Projects
+ {len(projects)} total
+
+
+
+ Where complexity concentrates — projects with the deepest dependency trees.
+
+ | Project | Packages | Distribution |
+ {proj_rows}
+
+
+
+
+
+
+
05
+
+
Confidence Levels
+
+
+
+ How reliable is this data — detection confidence assigned to each package record.
+
+ | Level | Count | Share | Distribution |
+ {conf_rows}
+
+
+
+
+
+
+
06
+
+
Detection Sources
+ {len(source_types)} sources
+
+
+
+ How packages were found — lockfiles, module caches, manifests, and extension metadata.
+
+ | Source | Count | Distribution |
+ {src_rows}
+
+
+
+
+
+
+
07
+
+
Scan Roots
+ {len(summary["roots"])} directories
+
+
+
+ Reference — every directory bumblebee crawled during this scan.
+ {scan_roots_html}
+
+
+
+
+
+
+
+
+
+
+
+
+"""
+
+ with open(output_path, "w") as f:
+ f.write(html)
+ print(f"Report written to {output_path} ({len(html):,} bytes)")
+
+
+# ── CLI entry point ──
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser(
+ description="Generate an HTML report from bumblebee inventory.ndjson",
+ )
+ parser.add_argument(
+ "input",
+ nargs="?",
+ default="inventory.ndjson",
+ help="Path to inventory.ndjson (default: inventory.ndjson)",
+ )
+ parser.add_argument(
+ "-o",
+ "--output",
+ default="report.html",
+ help="Output HTML file path (default: report.html)",
+ )
+ args = parser.parse_args()
+ generate_report(args.input, args.output)
From 61cb55b845aab777965b0821d68ba2e903ea7961 Mon Sep 17 00:00:00 2001
From: shrwnsan <38465+shrwnsan@users.noreply.github.com>
Date: Mon, 25 May 2026 19:08:21 +0000
Subject: [PATCH 2/7] style: adopt Perplexity editorial design system
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Dark mode: Midnight Parchment
- Deep forest-teal base (#091717) with ivory ink text
- Teal accent (#20808d) for numbering and decoration
- Subtle teal hex-grid background texture
Light mode: Perplexity Editorial
- Warm parchment cream (#fbfaf4) from Perplexity blog
- Deep teal-black text (#13343b)
- Electric blue link gradient in header
Typography:
- Newsreader (serif) for display headings — editorial authority
- Outfit (geometric sans) for body text
- JetBrains Mono for code and data
Visual refinements:
- Sharper border-radius (4px/8px, down from 8px/14px)
- Lighter hex-grid strokes (0.5px weight)
- Badge and header adapt to both themes
- Section numbering: teal in dark, dark-teal in light
---
scripts/generate-report.py | 110 ++++++++++++++++++++-----------------
1 file changed, 59 insertions(+), 51 deletions(-)
diff --git a/scripts/generate-report.py b/scripts/generate-report.py
index 605e018..bafa722 100755
--- a/scripts/generate-report.py
+++ b/scripts/generate-report.py
@@ -246,82 +246,84 @@ def generate_report(ndjson_path: str, output_path: str) -> None:
Bumblebee Inventory Report — {esc(summary["endpoint"]["hostname"])}
-
+