From e9c29093c299ee63dcfb686d82bffa9c835885e1 Mon Sep 17 00:00:00 2001 From: zilbonn <114275603+zilbonn@users.noreply.github.com> Date: Fri, 29 May 2026 16:30:04 -0400 Subject: [PATCH] Add Ruby gRPC and Falcon matcher coverage --- docs/supported-tech.md | 26 +++- .../src/__tests__/prompt-assemble.test.ts | 22 +++- .../processor/src/prompt/file-language.ts | 1 + packages/processor/src/prompt/highlights.ts | 35 +++++ packages/processor/src/prompt/slug-notes.ts | 8 ++ .../scanner/src/__tests__/detect-tech.test.ts | 48 +++++++ .../src/__tests__/framework-matchers.test.ts | 61 +++++++++ .../src/__tests__/ruby-async-scan.test.ts | 124 ++++++++++++++++++ packages/scanner/src/detect-tech.ts | 38 ++++++ packages/scanner/src/index.ts | 2 +- packages/scanner/src/matchers/index.ts | 6 + .../matchers/rb-async-websocket-handler.ts | 49 +++++++ .../src/matchers/rb-falcon-rack-app.ts | 50 +++++++ .../scanner/src/matchers/rb-grpc-service.ts | 70 ++++++++++ packages/scanner/src/matchers/utils.ts | 29 ++++ prompt-samples/07-overflow-fallback.md | 4 +- 16 files changed, 566 insertions(+), 7 deletions(-) create mode 100644 packages/scanner/src/__tests__/ruby-async-scan.test.ts create mode 100644 packages/scanner/src/matchers/rb-async-websocket-handler.ts create mode 100644 packages/scanner/src/matchers/rb-falcon-rack-app.ts create mode 100644 packages/scanner/src/matchers/rb-grpc-service.ts diff --git a/docs/supported-tech.md b/docs/supported-tech.md index 58bc428..07c32e7 100644 --- a/docs/supported-tech.md +++ b/docs/supported-tech.md @@ -122,7 +122,21 @@ that single signal. strong-params bypasses, `raw`/`html_safe` XSS, raw SQL, open redirect. ### Other Ruby detected -`sinatra`, `grape`, `hanami`, `roda`. Roadmap. +`sinatra`, `grape`, `hanami`, `roda`. + +### Ruby gRPC / async (`grpc-ruby`, `async-grpc`, `falcon-ruby`, `async-websocket`) +- **Sentinel detection:** root `Gemfile` / `Gemfile.lock` exact gems: + `grpc`, `async-grpc`, `falcon`, `async-websocket`. `async-grpc` + also emits the shared `grpc` tag. +- **Matchers:** `rb-grpc-service`, `rb-async-websocket-handler`, + `rb-falcon-rack-app` (gated). Their gates also use recursive + `Gemfile` / `Gemfile.lock` / `.ru` sentinels so nested Ruby services + can still produce `.rb` candidates. +- **Prompt highlights:** per-RPC interceptor auth, `call.metadata` + trust, Falcon/Rack async service boundaries, WebSocket handshake and + per-message authorization. +- **Proto files:** `proto-rpc-surface` activates via the shared `grpc` + tag when root detection sees Ruby gRPC. ## Go @@ -152,11 +166,17 @@ that single signal. ### Generic Go (`go`) Always-on Go matchers regardless of framework: `go-http-handler`, `go-ssrf`, `go-command-injection`, `go-embed-asset`, -`connectrpc-handler-impl`, `proto-rpc-surface`, `unix-socket-listener`. +`connectrpc-handler-impl`, `unix-socket-listener`. + +### Protobuf / gRPC (`grpc`) +`proto-rpc-surface` is cross-language and activates for projects tagged +`grpc` or `connectrpc`. It flags `.proto` service/message definitions as +wire-format trust boundaries. ### Other Go detected `gorilla`, `buffalo`, `grpc`, `connectrpc`, `cobra`. Roadmap for -dedicated matchers (gRPC service impl already partially covered). +dedicated Go matchers (gRPC wire formats are covered by +`proto-rpc-surface`). ## Rust diff --git a/packages/processor/src/__tests__/prompt-assemble.test.ts b/packages/processor/src/__tests__/prompt-assemble.test.ts index 2885d82..e833bc9 100644 --- a/packages/processor/src/__tests__/prompt-assemble.test.ts +++ b/packages/processor/src/__tests__/prompt-assemble.test.ts @@ -1,5 +1,6 @@ import { describe, expect, it } from "vitest"; -import { assemblePrompt, CORE_PROMPT, TECH_HIGHLIGHTS } from "../prompt/index.js"; +import { languagesForBatch } from "../prompt/file-language.js"; +import { assemblePrompt, CORE_PROMPT, noteForSlug, TECH_HIGHLIGHTS } from "../prompt/index.js"; describe("assemblePrompt", () => { it("returns just the core prompt when no tech is detected and no batch slugs", () => { @@ -43,6 +44,25 @@ describe("assemblePrompt", () => { expect(meta.slugsWithNotes).toBe(2); }); + it("maps Rack config files to Ruby for batch-scoped highlights", () => { + expect(languagesForBatch(["config.ru"])).toEqual(["ruby"]); + }); + + it("uses Ruby-specific gRPC guidance without Go implementation terms", () => { + const { prompt } = assemblePrompt({ + detectedTags: ["grpc-ruby"], + batchSlugs: ["rb-grpc-service", "proto-rpc-surface"], + batchLanguages: ["ruby"], + }); + + expect(prompt).toContain("GRPC::ServerInterceptor"); + expect(prompt).toContain("call.metadata"); + expect(prompt).toContain("::Service"); + expect(prompt).not.toMatch(/context\.Context|connect\.Request|net\/http/); + expect(noteForSlug("proto-rpc-surface")).toMatch(/Wire-format boundary/); + expect(noteForSlug("proto-rpc-surface")).not.toMatch(/Go|Ruby|context\.Context/); + }); + it("appends INFO.md and promptAppend at the end, both after the framework section", () => { const { prompt } = assemblePrompt({ detectedTags: ["nextjs"], diff --git a/packages/processor/src/prompt/file-language.ts b/packages/processor/src/prompt/file-language.ts index 437f8b9..7926e6d 100644 --- a/packages/processor/src/prompt/file-language.ts +++ b/packages/processor/src/prompt/file-language.ts @@ -21,6 +21,7 @@ const EXT_TO_LANGUAGE: Record = { ".mjs": "javascript", ".py": "python", ".rb": "ruby", + ".ru": "ruby", ".php": "php", ".go": "go", ".rs": "rust", diff --git a/packages/processor/src/prompt/highlights.ts b/packages/processor/src/prompt/highlights.ts index ccb5706..a1d4858 100644 --- a/packages/processor/src/prompt/highlights.ts +++ b/packages/processor/src/prompt/highlights.ts @@ -175,6 +175,41 @@ export const TECH_HIGHLIGHTS: TechHighlight[] = [ "`redirect_to params[:return_to]` is an open redirect; check for an allowlist", ], }, + { + tag: "grpc-ruby", + title: "Ruby gRPC", + languages: ["ruby"], + bullets: [ + "`class X < Some::Service` and `GRPC::GenericService` define public RPC methods; each method needs explicit auth + authorization", + "`GRPC::ServerInterceptor` is the per-RPC gate — confirm it wraps every method and streaming mode", + "`call.metadata` carries attacker-controlled headers/tokens; verify JWT/API keys before using request fields", + "`request` message fields are untrusted even when protobuf-typed — check SQL, subprocess, filesystem, and HTTP sinks", + "`add_http2_port`, `server.handle`, and `run_till_terminated` mark the exposed server boundary", + "`async-grpc` runs work in fibers; avoid shared mutable auth/session state across concurrent RPCs", + ], + }, + { + tag: "falcon-ruby", + title: "Ruby Falcon", + languages: ["ruby"], + bullets: [ + "Falcon exposes Rack/async-http apps; `config.ru` is the deployment boundary but auth still belongs in the app path", + "`Async::HTTP`, `Async::Container`, and `Async::Service` bootstrap long-lived services — verify what is externally reachable", + "Per-route or per-RPC auth must run inside the app, not only in a front proxy or service manager", + "Fiber concurrency makes globals/class vars risky for tenant, user, or request-scoped state", + ], + }, + { + tag: "async-websocket", + title: "Ruby async-websocket", + languages: ["ruby"], + bullets: [ + "`Async::WebSocket::Adapters::Rack.open` upgrades to a long-lived public connection; authenticate during the handshake", + "`connection.read` / `message.buffer` are attacker-controlled per-message inputs — validate every message shape", + "Authorization must be checked for each privileged message/action, not just once at connection open", + "WebSocket loops need rate limits and close/error handling to avoid unbounded work or leaked exceptions", + ], + }, // --- Go frameworks --- { diff --git a/packages/processor/src/prompt/slug-notes.ts b/packages/processor/src/prompt/slug-notes.ts index 8839d9d..31e89b2 100644 --- a/packages/processor/src/prompt/slug-notes.ts +++ b/packages/processor/src/prompt/slug-notes.ts @@ -64,6 +64,8 @@ const SLUG_NOTES: Record = { "Confirm the endpoint truly has no auth (not just a permissive guard) and that it returns sensitive data.", "service-entry-point": "Coarse flag — verify there's an actual auth gap, not just an internal-only handler reachable via service mesh.", + "proto-rpc-surface": + "Wire-format boundary — treat RPC request fields as untrusted and trace sensitive fields to auth, validation, and sink use.", "object-injection": "User-controlled keys into `obj[x] = v` without an allowlist enable prototype-pollution / overwriting safe defaults.", "spread-operator-injection": @@ -184,6 +186,12 @@ const SLUG_NOTES: Record = { "Weak entry-point candidate — confirm a `before` callback or middleware enforces auth on this Action class.", "rb-roda-route": "Weak entry-point candidate — auth must wrap the tree node, not just the leaf; confirm scope.", + "rb-grpc-service": + "Ruby gRPC entry-point candidate — confirm an interceptor or method-level check authenticates `call.metadata` and authorizes each RPC.", + "rb-async-websocket-handler": + "Async WebSocket entry-point candidate — authenticate the handshake and validate/authorize every `connection.read` message.", + "rb-falcon-rack-app": + "Falcon/Rack bootstrap candidate — confirm exposed async services route requests through app-level auth, not only deployment config.", "go-gorilla-route": "Weak entry-point candidate — confirm `router.Use(auth)` covers this subrouter; `PathPrefix(...).Handler(other)` doesn't inherit.", diff --git a/packages/scanner/src/__tests__/detect-tech.test.ts b/packages/scanner/src/__tests__/detect-tech.test.ts index d48cb26..ad7c0ad 100644 --- a/packages/scanner/src/__tests__/detect-tech.test.ts +++ b/packages/scanner/src/__tests__/detect-tech.test.ts @@ -87,6 +87,54 @@ describe("detectTech", () => { expect(detectTech(tmpRoot).tags).toContain("rails"); }); + it("detects async-grpc Ruby stacks from Gemfile", () => { + write("Gemfile", `source "https://rubygems.org"\ngem "async-grpc", "~> 0.3"\n`); + const tags = detectTech(tmpRoot).tags; + expect(tags).toEqual(expect.arrayContaining(["ruby", "async-grpc", "grpc-ruby", "grpc"])); + }); + + it("detects async-grpc Ruby stacks from Gemfile.lock", () => { + // Bundler spec entries are indented with exactly four spaces. + write("Gemfile.lock", `GEM\n specs:\n async-grpc (1.2.3)\n`); + const tags = detectTech(tmpRoot).tags; + expect(tags).toEqual(expect.arrayContaining(["ruby", "async-grpc", "grpc-ruby", "grpc"])); + }); + + it("detects plain Ruby grpc without async-grpc", () => { + write("Gemfile", `source "https://rubygems.org"\ngem "grpc", "~> 1.60"\n`); + const tags = detectTech(tmpRoot).tags; + expect(tags).toEqual(expect.arrayContaining(["ruby", "grpc-ruby", "grpc"])); + expect(tags).not.toContain("async-grpc"); + }); + + it("detects Ruby async-websocket from Gemfile", () => { + write("Gemfile", `source "https://rubygems.org"\ngem "async-websocket", "~> 0.30"\n`); + const tags = detectTech(tmpRoot).tags; + expect(tags).toEqual(expect.arrayContaining(["ruby", "async-websocket"])); + }); + + it("does not detect gRPC from commented gems or similarly named gems", () => { + write("Gemfile", `source "https://rubygems.org"\n# gem "grpc"\ngem "grpc-tools", "~> 1.60"\n`); + const tags = detectTech(tmpRoot).tags; + expect(tags).toContain("ruby"); + expect(tags).not.toContain("grpc"); + expect(tags).not.toContain("grpc-ruby"); + }); + + it("keeps Python Falcon and Ruby Falcon as distinct tags", () => { + write("pyproject.toml", `[project]\nname = "x"\ndependencies = ["falcon"]\n`); + let tags = detectTech(tmpRoot).tags; + expect(tags).toContain("falcon"); + expect(tags).not.toContain("falcon-ruby"); + + fs.rmSync(tmpRoot, { recursive: true, force: true }); + tmpRoot = fs.mkdtempSync(path.join(os.tmpdir(), "deepsec-detect-tech-")); + write("Gemfile", `source "https://rubygems.org"\ngem "falcon", "~> 0.50"\n`); + tags = detectTech(tmpRoot).tags; + expect(tags).toContain("falcon-ruby"); + expect(tags).not.toContain("falcon"); + }); + it("detects Gin from go.mod", () => { write("go.mod", `module example.com/x\n\nrequire (\n github.com/gin-gonic/gin v1.10.0\n)\n`); const tags = detectTech(tmpRoot).tags; diff --git a/packages/scanner/src/__tests__/framework-matchers.test.ts b/packages/scanner/src/__tests__/framework-matchers.test.ts index 0deb089..dd21ad4 100644 --- a/packages/scanner/src/__tests__/framework-matchers.test.ts +++ b/packages/scanner/src/__tests__/framework-matchers.test.ts @@ -11,6 +11,9 @@ import { phpLaravelRouteMatcher } from "../matchers/php-laravel-route.js"; import { pyDjangoViewMatcher } from "../matchers/py-django-view.js"; import { pyFastapiRouteMatcher } from "../matchers/py-fastapi-route.js"; import { pyFlaskRouteMatcher } from "../matchers/py-flask-route.js"; +import { rbAsyncWebSocketHandlerMatcher } from "../matchers/rb-async-websocket-handler.js"; +import { rbFalconRackAppMatcher } from "../matchers/rb-falcon-rack-app.js"; +import { rbGrpcServiceMatcher } from "../matchers/rb-grpc-service.js"; import { rbRailsControllerMatcher } from "../matchers/rb-rails-controller.js"; describe("framework entry-point matchers", () => { @@ -135,6 +138,64 @@ end expect(matches.length).toBeGreaterThan(0); }); + it("rb-grpc-service detects service classes, methods, metadata, interceptors, and server bootstrap", () => { + const src = ` +class GreeterServer < Helloworld::Greeter::Service + def lookup(request, call) + token = call.metadata["authorization"] + end +end + +class AuthInterceptor < GRPC::ServerInterceptor + def request_response(request: nil, call: nil, method: nil) + end +end + +server.handle(GreeterServer) +server.add_http2_port("0.0.0.0:50051", :this_port_is_insecure) +server.run_till_terminated +`; + const matches = rbGrpcServiceMatcher.match(src, "lib/greeter_server.rb"); + expect(matches.length).toBeGreaterThanOrEqual(5); + }); + + it("rb-grpc-service ignores generated and vendored Ruby files", () => { + const src = ` +# Generated by the protocol buffer compiler. DO NOT EDIT! +class Greeter < Helloworld::Greeter::Service +end +`; + expect(rbGrpcServiceMatcher.match(src, "lib/helloworld_services_pb.rb")).toEqual([]); + expect(rbGrpcServiceMatcher.match(src, "vendor/bundle/ruby/greeter.rb")).toEqual([]); + expect(rbGrpcServiceMatcher.match(src, "spec/greeter_server.rb")).toEqual([]); + expect(rbGrpcServiceMatcher.match(src, "lib/greeter_server.rb")).toEqual([]); + }); + + it("rb-async-websocket-handler detects async websocket receive loops", () => { + const src = ` +Async::WebSocket::Adapters::Rack.open(env) do |connection| + while message = connection.read + handle_message(message.buffer) + end +end +`; + const matches = rbAsyncWebSocketHandlerMatcher.match(src, "lib/stream_handler.rb"); + expect(matches.length).toBeGreaterThanOrEqual(3); + }); + + it("rb-falcon-rack-app detects strong Falcon and Async service signals", () => { + const src = ` +require "falcon" +service = Falcon::Service.new +endpoint = Async::HTTP::Endpoint.parse("https://example.com") +`; + const matches = rbFalconRackAppMatcher.match(src, "lib/server.rb"); + expect(matches.length).toBeGreaterThanOrEqual(3); + expect(rbFalconRackAppMatcher.match(`run App.new\nmap "/api" do\nend\n`, "config.ru")).toEqual( + [], + ); + }); + it("php-laravel-route detects Route::get and DB::raw", () => { const src = ` void> = []; + +afterEach(() => { + for (const cleanup of cleanups.reverse()) cleanup(); + cleanups = []; +}); + +function makeProject(files: Record): { root: string; projectId: string } { + const root = fs.mkdtempSync(path.join(os.tmpdir(), "deepsec-ruby-async-")); + const dataRoot = fs.mkdtempSync(path.join(os.tmpdir(), "deepsec-ruby-async-data-")); + const previousDataRoot = process.env.DEEPSEC_DATA_ROOT; + process.env.DEEPSEC_DATA_ROOT = dataRoot; + + for (const [rel, content] of Object.entries(files)) { + const abs = path.join(root, rel); + fs.mkdirSync(path.dirname(abs), { recursive: true }); + fs.writeFileSync(abs, content); + } + + const projectId = `ruby-${Date.now().toString(36)}-${Math.random().toString(36).slice(2, 8)}`; + cleanups.push(() => { + fs.rmSync(root, { recursive: true, force: true }); + fs.rmSync(dataRoot, { recursive: true, force: true }); + if (previousDataRoot === undefined) delete process.env.DEEPSEC_DATA_ROOT; + else process.env.DEEPSEC_DATA_ROOT = previousDataRoot; + }); + return { root, projectId }; +} + +describe("Ruby async matcher gates", () => { + it("activates Ruby gRPC and websocket matchers from nested Gemfiles", () => { + const { root } = makeProject({ + "apps/foo/Gemfile": `source "https://rubygems.org"\ngem "async-grpc"\ngem "async-websocket"\n`, + }); + const detected = detectTech(root); + + expect(evaluateGate(rbGrpcServiceMatcher.requires, detected, root)).toBe(true); + expect(evaluateGate(rbAsyncWebSocketHandlerMatcher.requires, detected, root)).toBe(true); + }); + + it("activates the Falcon matcher from nested Gemfiles and root config.ru", () => { + let project = makeProject({ + "apps/foo/Gemfile": `source "https://rubygems.org"\ngem "falcon"\n`, + }); + let detected = detectTech(project.root); + expect(evaluateGate(rbFalconRackAppMatcher.requires, detected, project.root)).toBe(true); + + project = makeProject({ + "config.ru": `run App.new\n`, + }); + detected = detectTech(project.root); + expect(evaluateGate(rbFalconRackAppMatcher.requires, detected, project.root)).toBe(true); + }); +}); + +describe("Ruby async full scans", () => { + it("creates Ruby service and proto records for root-detected async-grpc projects", async () => { + const { root, projectId } = makeProject({ + Gemfile: `source "https://rubygems.org"\ngem "async-grpc"\ngem "falcon"\n`, + "config.ru": `run App.new\n`, + "proto/foo.proto": `syntax = "proto3";\nservice Example { rpc Lookup (LookupRequest) returns (LookupResponse); }\nmessage LookupRequest { string path = 1; }\nmessage LookupResponse { string value = 1; }\n`, + "lib/example_service.rb": ` +class ExampleService < ExampleGen::Service + def lookup(request, call) + BackendClient.new(request.path, call.metadata["authorization"]).run + end +end +`, + }); + + const result = await scan({ projectId, root }); + expect(result.activeMatchers).toEqual( + expect.arrayContaining(["rb-grpc-service", "proto-rpc-surface"]), + ); + + const records = loadAllFileRecords(projectId); + const service = records.find((r) => r.filePath === "lib/example_service.rb"); + expect(service?.candidates.map((c) => c.vulnSlug)).toContain("rb-grpc-service"); + + const proto = records.find((r) => r.filePath === "proto/foo.proto"); + expect(proto?.candidates.map((c) => c.vulnSlug)).toContain("proto-rpc-surface"); + }); + + it("creates Ruby service records for nested async-grpc projects via sentinel gates", async () => { + const { root, projectId } = makeProject({ + "apps/api/Gemfile": `source "https://rubygems.org"\ngem "async-grpc"\n`, + "apps/api/lib/example_service.rb": ` +class ExampleService < ExampleGen::Service + def lookup(request, call) + call.metadata["authorization"] + end +end +`, + }); + + const result = await scan({ projectId, root }); + expect(result.activeMatchers).toContain("rb-grpc-service"); + + const records = loadAllFileRecords(projectId); + const service = records.find((r) => r.filePath === "apps/api/lib/example_service.rb"); + expect(service?.candidates.map((c) => c.vulnSlug)).toContain("rb-grpc-service"); + }); + + it("counts .ru files as Ruby in language stats", async () => { + const { root, projectId } = makeProject({ + "config.ru": `run App.new\n`, + }); + + const result = await scan({ projectId, root, matcherSlugs: ["xss"] }); + const ruby = result.languageStats.find((s) => s.language === "ruby"); + expect(ruby?.scannedFiles).toBe(1); + }); +}); diff --git a/packages/scanner/src/detect-tech.ts b/packages/scanner/src/detect-tech.ts index 0e22daa..dbfb25f 100644 --- a/packages/scanner/src/detect-tech.ts +++ b/packages/scanner/src/detect-tech.ts @@ -57,6 +57,31 @@ function listDir(rootPath: string, rel: string): string[] { */ type Detector = (rootPath: string, cache: Map) => string[]; +function escapeRegExp(value: string): string { + return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); +} + +function gemfileHas(content: string | null, gemName: string): boolean { + if (!content) return false; + const gem = escapeRegExp(gemName); + const gemLine = new RegExp(`^\\s*gem\\s+["']${gem}["'](?:\\s|,|\\)|$)`); + + for (const rawLine of content.split("\n")) { + const trimmed = rawLine.trim(); + if (!trimmed || trimmed.startsWith("#")) continue; + const line = rawLine.replace(/#.*$/, ""); + if (gemLine.test(line)) return true; + } + + return false; +} + +function lockfileHas(content: string | null, gemName: string): boolean { + if (!content) return false; + const gem = escapeRegExp(gemName); + return new RegExp(`^ {4}${gem} \\(\\d`, "m").test(content); +} + const detectors: Detector[] = [ // --- Node / TS / JS ecosystems --- (root, cache) => { @@ -187,6 +212,18 @@ const detectors: Detector[] = [ if (/\bgrape\b/.test(haystack)) tags.push("grape"); if (/\bhanami\b/.test(haystack)) tags.push("hanami"); if (/\broda\b/.test(haystack)) tags.push("roda"); + if (gemfileHas(gemfile, "async-grpc") || lockfileHas(lock, "async-grpc")) { + tags.push("async-grpc", "grpc-ruby", "grpc"); + } + if (gemfileHas(gemfile, "grpc") || lockfileHas(lock, "grpc")) { + tags.push("grpc-ruby", "grpc"); + } + if (gemfileHas(gemfile, "falcon") || lockfileHas(lock, "falcon")) { + tags.push("falcon-ruby"); + } + if (gemfileHas(gemfile, "async-websocket") || lockfileHas(lock, "async-websocket")) { + tags.push("async-websocket"); + } return tags; }, @@ -401,6 +438,7 @@ export function detectTech(rootPath: string): DetectedTech { "Pipfile", "Gemfile", "Gemfile.lock", + "config.ru", "config/routes.rb", "bin/rails", "go.mod", diff --git a/packages/scanner/src/index.ts b/packages/scanner/src/index.ts index b4f8630..37cb01d 100644 --- a/packages/scanner/src/index.ts +++ b/packages/scanner/src/index.ts @@ -386,7 +386,7 @@ const LANGUAGE_EXTENSIONS: Record = { typescript: [".ts", ".tsx", ".cts", ".mts"], javascript: [".js", ".jsx", ".cjs", ".mjs"], python: [".py"], - ruby: [".rb"], + ruby: [".rb", ".ru"], php: [".php"], go: [".go"], rust: [".rs"], diff --git a/packages/scanner/src/matchers/index.ts b/packages/scanner/src/matchers/index.ts index e86ba78..5d8b7a5 100644 --- a/packages/scanner/src/matchers/index.ts +++ b/packages/scanner/src/matchers/index.ts @@ -158,7 +158,10 @@ import { pySqlRawMatcher } from "./py-sql-raw.js"; import { pyStarletteRouteMatcher } from "./py-starlette-route.js"; import { pyTornadoHandlerMatcher } from "./py-tornado-handler.js"; import { rateLimitBypassMatcher } from "./rate-limit-bypass.js"; +import { rbAsyncWebSocketHandlerMatcher } from "./rb-async-websocket-handler.js"; +import { rbFalconRackAppMatcher } from "./rb-falcon-rack-app.js"; import { rbGrapeEndpointMatcher } from "./rb-grape-endpoint.js"; +import { rbGrpcServiceMatcher } from "./rb-grpc-service.js"; import { rbHanamiActionMatcher } from "./rb-hanami-action.js"; import { rbRailsControllerMatcher } from "./rb-rails-controller.js"; import { rbRodaRouteMatcher } from "./rb-roda-route.js"; @@ -407,6 +410,9 @@ export function createDefaultRegistry(): MatcherRegistry { registry.register(rbGrapeEndpointMatcher); registry.register(rbHanamiActionMatcher); registry.register(rbRodaRouteMatcher); + registry.register(rbGrpcServiceMatcher); + registry.register(rbAsyncWebSocketHandlerMatcher); + registry.register(rbFalconRackAppMatcher); // Go registry.register(goGinRouteMatcher); registry.register(goEchoRouteMatcher); diff --git a/packages/scanner/src/matchers/rb-async-websocket-handler.ts b/packages/scanner/src/matchers/rb-async-websocket-handler.ts new file mode 100644 index 0000000..fb2629e --- /dev/null +++ b/packages/scanner/src/matchers/rb-async-websocket-handler.ts @@ -0,0 +1,49 @@ +import type { MatcherPlugin } from "../types.js"; +import { + isSkippableRubyGeneratedFile, + regexMatcher, + rubyGemfileHas, + rubyLockfileHas, +} from "./utils.js"; + +function hasAsyncWebSocketSentinel(_path: string, content: string): boolean { + return rubyGemfileHas(content, "async-websocket") || rubyLockfileHas(content, "async-websocket"); +} + +export const rbAsyncWebSocketHandlerMatcher: MatcherPlugin = { + noiseTier: "noisy" as const, + slug: "rb-async-websocket-handler", + description: "Ruby async-websocket handlers and message loops (gated on async-websocket)", + filePatterns: ["**/*.rb", "**/*.ru"], + requires: { + tech: ["async-websocket"], + sentinelFiles: ["**/Gemfile", "**/Gemfile.lock"], + sentinelContains: hasAsyncWebSocketSentinel, + }, + examples: [ + `Async::WebSocket::Adapters::Rack.open(env) do |connection|`, + `message = connection.read`, + `payload = message.buffer`, + `while message = connection.read`, + ], + match(content, filePath) { + if (isSkippableRubyGeneratedFile(filePath, content)) return []; + + return regexMatcher( + "rb-async-websocket-handler", + [ + { + regex: /\bAsync::WebSocket::Adapters::Rack\.open\b/, + label: "Async::WebSocket::Adapters::Rack.open handler", + }, + { regex: /\bconnection\.read\b/, label: "connection.read message receive" }, + { regex: /\bmessage\.buffer\b/, label: "message.buffer untrusted payload" }, + { + regex: /while\s+\w+\s*=\s*connection\.read\b/, + label: "websocket read loop", + }, + ], + content, + ); + }, +}; diff --git a/packages/scanner/src/matchers/rb-falcon-rack-app.ts b/packages/scanner/src/matchers/rb-falcon-rack-app.ts new file mode 100644 index 0000000..11bc69f --- /dev/null +++ b/packages/scanner/src/matchers/rb-falcon-rack-app.ts @@ -0,0 +1,50 @@ +import type { MatcherPlugin } from "../types.js"; +import { + isSkippableRubyGeneratedFile, + regexMatcher, + rubyGemfileHas, + rubyLockfileHas, +} from "./utils.js"; + +function hasFalconRubySentinel(filePath: string, content: string): boolean { + return ( + filePath.endsWith(".ru") || + rubyGemfileHas(content, "falcon") || + rubyLockfileHas(content, "falcon") + ); +} + +export const rbFalconRackAppMatcher: MatcherPlugin = { + noiseTier: "noisy" as const, + slug: "rb-falcon-rack-app", + description: "Ruby Falcon / async Rack app bootstrap surfaces (gated on Falcon)", + filePatterns: ["**/*.rb", "**/*.ru"], + requires: { + tech: ["falcon-ruby"], + sentinelFiles: ["**/Gemfile", "**/Gemfile.lock", "**/*.ru"], + sentinelContains: hasFalconRubySentinel, + }, + examples: [ + `require "falcon"`, + `service = Falcon::Service.new`, + `endpoint = Async::HTTP::Endpoint.parse(url)`, + `container = Async::Container.new`, + `service = Async::Service.new`, + ], + match(content, filePath) { + if (isSkippableRubyGeneratedFile(filePath, content)) return []; + + return regexMatcher( + "rb-falcon-rack-app", + [ + { regex: /require\s+["']falcon["']/, label: "require 'falcon'" }, + { regex: /\bFalcon::\w+/, label: "Falcon::* app/server surface" }, + { + regex: /\bAsync::(?:HTTP|Container|Service)\b/, + label: "Async HTTP/container/service bootstrap", + }, + ], + content, + ); + }, +}; diff --git a/packages/scanner/src/matchers/rb-grpc-service.ts b/packages/scanner/src/matchers/rb-grpc-service.ts new file mode 100644 index 0000000..058c322 --- /dev/null +++ b/packages/scanner/src/matchers/rb-grpc-service.ts @@ -0,0 +1,70 @@ +import type { MatcherPlugin } from "../types.js"; +import { + isSkippableRubyGeneratedFile, + regexMatcher, + rubyGemfileHas, + rubyLockfileHas, +} from "./utils.js"; + +function hasGrpcRubySentinel(_path: string, content: string): boolean { + return ( + rubyGemfileHas(content, "grpc") || + rubyGemfileHas(content, "async-grpc") || + rubyLockfileHas(content, "grpc") || + rubyLockfileHas(content, "async-grpc") + ); +} + +export const rbGrpcServiceMatcher: MatcherPlugin = { + noiseTier: "noisy" as const, + slug: "rb-grpc-service", + description: "Ruby gRPC service implementations and interceptors (gated on grpc/async-grpc)", + filePatterns: ["**/*.rb"], + requires: { + tech: ["grpc-ruby", "async-grpc"], + sentinelFiles: ["**/Gemfile", "**/Gemfile.lock"], + sentinelContains: hasGrpcRubySentinel, + }, + examples: [ + `class GreeterServer < Helloworld::Greeter::Service`, + `include GRPC::GenericService`, + `rpc :Lookup, LookupRequest, LookupResponse`, + `def lookup(request, call)`, + `token = call.metadata["authorization"]`, + `class AuthInterceptor < GRPC::ServerInterceptor`, + `def request_response(request: nil, call: nil, method: nil)`, + `server.add_http2_port("0.0.0.0:50051", :this_port_is_insecure)`, + `server.handle(ExampleService)`, + `server.run_till_terminated`, + ], + match(content, filePath) { + if (isSkippableRubyGeneratedFile(filePath, content)) return []; + + return regexMatcher( + "rb-grpc-service", + [ + { + regex: /^\s*class\s+\w+(?:::\w+)*\s*<\s*(?:::)?[\w:]+::Service\b/m, + label: "Ruby gRPC service implementation subclass", + }, + { regex: /\bGRPC::GenericService\b/, label: "GRPC::GenericService definition" }, + { regex: /^\s*rpc\s+:\w+/m, label: "rpc :Method declaration" }, + { + regex: /^\s*def\s+\w+[!?]?\s*\(\s*\w+\s*,\s*_?call\s*\)/m, + label: "RPC method(request, call)", + }, + { regex: /\bcall\.metadata\b/, label: "call.metadata auth/header boundary" }, + { regex: /\bGRPC::ServerInterceptor\b/, label: "GRPC::ServerInterceptor" }, + { + regex: /^\s*def\s+(?:request_response|client_streamer|server_streamer|bidi_streamer)\b/m, + label: "gRPC interceptor hook", + }, + { + regex: /\bserver\.handle\b|\b(?:add_http2_port|run_till_terminated)\b/, + label: "gRPC server bootstrap", + }, + ], + content, + ); + }, +}; diff --git a/packages/scanner/src/matchers/utils.ts b/packages/scanner/src/matchers/utils.ts index 1897586..6669c75 100644 --- a/packages/scanner/src/matchers/utils.ts +++ b/packages/scanner/src/matchers/utils.ts @@ -37,3 +37,32 @@ export function regexMatcher( return matches; } + +function escapeRegExp(value: string): string { + return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); +} + +export function rubyGemfileHas(content: string, gemName: string): boolean { + const gem = escapeRegExp(gemName); + const gemLine = new RegExp(`^\\s*gem\\s+["']${gem}["'](?:\\s|,|\\)|$)`); + + for (const rawLine of content.split("\n")) { + const trimmed = rawLine.trim(); + if (!trimmed || trimmed.startsWith("#")) continue; + const line = rawLine.replace(/#.*$/, ""); + if (gemLine.test(line)) return true; + } + + return false; +} + +export function rubyLockfileHas(content: string, gemName: string): boolean { + const gem = escapeRegExp(gemName); + return new RegExp(`^ {4}${gem} \\(\\d`, "m").test(content); +} + +export function isSkippableRubyGeneratedFile(filePath: string, content: string): boolean { + if (/(^|\/)(?:test|tests|spec|vendor|generated)(?:\/|$)/i.test(filePath)) return true; + if (/(?:_pb|_services_pb)\.rb$/i.test(filePath)) return true; + return /Generated by the protocol buffer compiler|DO NOT EDIT.*protoc|@generated/i.test(content); +} diff --git a/prompt-samples/07-overflow-fallback.md b/prompt-samples/07-overflow-fallback.md index c42c3fc..0402ee9 100644 --- a/prompt-samples/07-overflow-fallback.md +++ b/prompt-samples/07-overflow-fallback.md @@ -1,7 +1,7 @@