From 28d7a4619bd824dee3bea74b002d42e33537f97f Mon Sep 17 00:00:00 2001 From: "Chris (ChrisJr404)" <11917633+ChrisJr404@users.noreply.github.com> Date: Mon, 4 May 2026 14:36:54 -0400 Subject: [PATCH] fix(parsers/javascript): extract Express anonymous route handler callbacks (#21) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The analyzer never walked into call-expression argument lists, so when a route was defined idiomatically as router.post('/orders', authenticateToken, async (req, res) => { ... }); the only thing it picked up was the named middleware reference. The actual handler body — where most of the application's business logic and vulnerabilities live — was invisible to every downstream stage, causing reachability_filter to drop almost everything as "unreachable from input." Adds an _extractRouteHandlerCallbacks pass that walks every CallExpression in the file, detects the Express verb shape (`.{get,post,put,patch,delete,options,head,all,use}(...)`), and extracts each ArrowFunction / FunctionExpression argument as a unit with: - synthetic name ` ` (e.g. `POST /orders`) when the path is a string literal — matches the "method and path as metadata" expectation in the issue. - `unitType: "route_handler"` so the existing classifier downstream doesn't have to re-derive it from the body. - `isEntryPoint: true` so reachability_filter treats it as a request-data entry the way it already treats named (req, res) middleware. - `httpMethod` / `httpPath` properties carried through for any follow-up steps that want to render route info. Multi-callback registrations (chained middleware + final handler) get suffixed with their post-path arg index so they don't collide. `SyntaxKind.CallExpression` is resolved dynamically off the typescript dep at call time — its numeric value drifts between typescript releases (213 in older versions, 214 in 5.x). Smoke-tested against the issue's example file plus three additional shapes (`router.get(path, handler)`, `app.use(path, mw)`, `router.delete(path, mw1, mw2, handler)`); all four units extracted with correct method / path / index. The named middleware references (`authenticateToken`) correctly stay out of the unit list since they parse as Identifiers, not ArrowFunctions. Hapi / Koa / Fastify use a different shape (object literal with a `handler` property) and are out of scope for this fix. --- .../parsers/javascript/typescript_analyzer.js | 143 ++++++++++++++++++ 1 file changed, 143 insertions(+) diff --git a/libs/openant-core/parsers/javascript/typescript_analyzer.js b/libs/openant-core/parsers/javascript/typescript_analyzer.js index a41a80d..1497d51 100644 --- a/libs/openant-core/parsers/javascript/typescript_analyzer.js +++ b/libs/openant-core/parsers/javascript/typescript_analyzer.js @@ -240,6 +240,149 @@ class TypeScriptAnalyzer { // Extract functions from module.exports.propertyName = function() {...} // Pattern used by DVNA and similar CommonJS codebases this._extractModuleExportsPropertyFunctions(sourceFile, relativePath); + + // Extract anonymous arrow / function-expression callbacks passed to + // Express.js style route registrations. Without this pass the parser + // misses the actual handler bodies whenever a codebase uses the + // idiomatic `router.post('/x', handler, async (req, res) => {...})` + // pattern, which is the bug reported in + // https://github.com/knostic/OpenAnt/issues/21. + this._extractRouteHandlerCallbacks(sourceFile, relativePath); + } + + /** + * Express verbs that take a path-string and one or more handler + * callbacks. `use` is included because `app.use('/api', (req, res, next) => …)` + * is a common middleware mounting pattern. `all` is the Express + * "match every method" wildcard. + * + * Hapi / Koa / Fastify use a different shape (object literal with a + * `handler` property rather than a positional callback) and would need + * separate detection — out of scope for the #21 fix. + */ + static _expressRouteVerbs() { + return new Set([ + "get", + "post", + "put", + "patch", + "delete", + "options", + "head", + "all", + "use", + ]); + } + + /** + * Walk every call expression in the file and, when it looks like + * `.(, ...callbacks)` for an Express verb, treat + * each arrow / function-expression argument as a route handler unit. + * + * Each extracted unit gets: + * - a synthetic name in the shape ` ` (e.g. + * `POST /orders`) when the path is a string literal — matches the + * "method and path as metadata" expectation in the issue. + * - `isEntryPoint: true` since these directly receive HTTP request + * data, which is what the reachability_filter looks for. + * - `unitType: "route_handler"` so the existing classifier logic + * downstream doesn't have to re-derive it. + * + * If multiple callbacks are passed (middleware chain plus the final + * handler), each one becomes its own unit suffixed with its 0-based + * argument index, so they don't collide. + */ + _extractRouteHandlerCallbacks(sourceFile, relativePath) { + const verbs = TypeScriptAnalyzer._expressRouteVerbs(); + + // SyntaxKind.CallExpression — its numeric value drifts between + // typescript releases (213 in older versions, 214 in 5.x), so we + // resolve it dynamically off the typescript dep rather than + // hard-coding it. + const ts = require("typescript"); + const callExprKind = ts.SyntaxKind.CallExpression; + + for (const callExpr of sourceFile.getDescendantsOfKind(callExprKind)) { + const expression = callExpr.getExpression(); + if (!expression || expression.getKindName() !== "PropertyAccessExpression") { + continue; + } + + const verb = expression.getName ? expression.getName() : null; + if (!verb || !verbs.has(verb.toLowerCase())) { + continue; + } + + const args = callExpr.getArguments(); + if (args.length === 0) { + continue; + } + + // Path is the first arg if it's a string literal. Some patterns + // pass a regex or omit the path entirely (e.g. `app.use(middleware)`), + // in which case we fall back to a `` label. + let pathLiteral = null; + const first = args[0]; + const firstKind = first.getKindName(); + if ( + firstKind === "StringLiteral" || + firstKind === "NoSubstitutionTemplateLiteral" + ) { + // .getLiteralText() returns the unquoted value + pathLiteral = first.getLiteralText ? first.getLiteralText() : null; + } + + // Iterate the *callback* arguments — skip the path arg if present. + const startIdx = + firstKind === "StringLiteral" || + firstKind === "NoSubstitutionTemplateLiteral" || + firstKind === "RegularExpressionLiteral" + ? 1 + : 0; + + for (let i = startIdx; i < args.length; i++) { + const arg = args[i]; + const argKind = arg.getKindName(); + if (argKind !== "ArrowFunction" && argKind !== "FunctionExpression") { + continue; + } + + const argIdx = i - startIdx; + const verbUpper = verb.toUpperCase(); + let baseName; + if (pathLiteral) { + baseName = `${verbUpper} ${pathLiteral}`; + } else { + baseName = verbUpper; + } + + // Suffix duplicate base names with the argument index so the + // function map doesn't collide. The first callback gets no + // suffix to keep the common "single handler" case readable. + const name = argIdx === 0 ? baseName : `${baseName} [${argIdx}]`; + const functionId = `${relativePath}:${name}`; + + // Skip if a previous pass already extracted this exact id, e.g. + // when the route handler was named via a separate variable + // declaration earlier in the file. + if (this.functions[functionId]) { + continue; + } + + const code = arg.getFullText(); + this.functions[functionId] = { + name: name, + code: code, + isExported: false, + unitType: "route_handler", + startLine: arg.getStartLineNumber(), + endLine: arg.getEndLineNumber(), + isEntryPoint: true, + httpMethod: verbUpper, + httpPath: pathLiteral || null, + }; + } + } } /**