From 05dcc76af6ebeed5173fad5f086511ebcae71ec1 Mon Sep 17 00:00:00 2001 From: Kevin Gilpin Date: Wed, 7 Sep 2022 08:34:27 -0400 Subject: [PATCH 1/4] wip: Search AppMaps for scope and stack --- packages/cli/src/cli.ts | 2 + packages/cli/src/cmds/search/search.ts | 98 ++++++++++++++++++++++++++ packages/cli/src/search/findStack.ts | 93 ++++++++++++++++++++++++ 3 files changed, 193 insertions(+) create mode 100644 packages/cli/src/cmds/search/search.ts create mode 100644 packages/cli/src/search/findStack.ts diff --git a/packages/cli/src/cli.ts b/packages/cli/src/cli.ts index 6eead7d969..6aa01290f9 100755 --- a/packages/cli/src/cli.ts +++ b/packages/cli/src/cli.ts @@ -24,6 +24,7 @@ const InventoryCommand = require('./inventoryCommand'); const OpenCommand = require('./cmds/open/open'); const InspectCommand = require('./cmds/inspect/inspect'); const RecordCommand = require('./cmds/record/record'); +const SearchCommand = require('./cmds/search/search'); import InstallCommand from './cmds/agentInstaller/install-agent'; import StatusCommand from './cmds/agentInstaller/status'; import PruneCommand from './cmds/prune/prune'; @@ -451,6 +452,7 @@ yargs(process.argv.slice(2)) .command(RecordCommand) .command(StatusCommand) .command(InspectCommand) + .command(SearchCommand) .command(PruneCommand) .strict() .demandCommand() diff --git a/packages/cli/src/cmds/search/search.ts b/packages/cli/src/cmds/search/search.ts new file mode 100644 index 0000000000..ffb3f59503 --- /dev/null +++ b/packages/cli/src/cmds/search/search.ts @@ -0,0 +1,98 @@ +import yargs, { number } from 'yargs'; +import readline from 'readline'; +import { handleWorkingDirectory } from '../../lib/handleWorkingDirectory'; +import { locateAppMapDir } from '../../lib/locateAppMapDir'; +import FindCodeObjects from '../../search/findCodeObjects'; +import FindEvents from '../../search/findEvents'; +import { verbose } from '../../utils'; +import FindStack, { FindStackMatch } from '../../search/findStack'; + +export const command = 'search'; +export const describe = + 'Search AppMaps for references to a code objects (package, function, line, class, query, route, etc)'; + +export const builder = (args) => { + args.option('directory', { + describe: 'program working directory', + type: 'string', + alias: 'd', + }); + args.option('appmap-dir', { + describe: 'directory to recursively inspect for AppMaps', + }); + args.option('route', { + describe: 'a route which all matches must contain', + }); + args.option('limit', { + describe: 'number of top matches to print', + type: number, + default: 20, + }); + return args.strict(); +}; + +export const handler = async (argv) => { + verbose(argv.verbose); + handleWorkingDirectory(argv.directory); + const appmapDir = await locateAppMapDir(argv.appmapDir); + const route = argv.route; + const limit = argv.limit; + + if (!route) yargs.exit(1, new Error(`No route was provided`)); + + const routeParam = `route:${route}`; + let stack: string; + + if (process.stdin.isTTY) { + const rl = readline.createInterface({ + input: process.stdin, + output: process.stdout, + }); + stack = await new Promise((resolve) => { + rl.question(`Enter a stack trace to search for: `, resolve); + }); + } else { + const result: Buffer[] = []; + let length = 0; + for await (const chunk of process.stdin) { + result.push(chunk); + length += chunk.length; + } + stack = Buffer.concat(result, length).toString('utf8'); + } + + if (!stack) yargs.exit(1, new Error(`No stack was provided`)); + const stackLines = stack + .split('\n') + .map((line) => line.trim()) + .filter((line) => line !== ''); + + const finder = new FindCodeObjects(appmapDir, routeParam); + const codeObjectMatches = await finder.find( + (count) => {}, + () => {} + ); + + if (codeObjectMatches?.length === 0) { + return yargs.exit(1, new Error(`Code object '${routeParam}' not found`)); + } + + const result: FindStackMatch[] = []; + await Promise.all( + codeObjectMatches.map(async (codeObjectMatch) => { + const findStack = new FindStack(codeObjectMatch.appmap, stackLines); + const matches = await findStack.match(); + result.push(...matches); + }) + ); + + result + .sort((a, b) => b.score - a.score) + .slice(0, limit) + .forEach((match) => { + console.log( + `Event: ${match.appmap}.appmap.json:${match.eventId}, Code Object: ${match.codeObjectId}, score: ${match.score}` + ); + console.log('\n'); + }); +}; diff --git a/packages/cli/src/search/findStack.ts b/packages/cli/src/search/findStack.ts new file mode 100644 index 0000000000..9635501073 --- /dev/null +++ b/packages/cli/src/search/findStack.ts @@ -0,0 +1,93 @@ +import { buildAppMap, Event } from '@appland/models'; +import { readFile } from 'fs/promises'; + +export type FindStackMatch = { + appmap: string; + codeObjectId: string; + eventId: number; + score: number; +}; + +export default class FindStack { + constructor(public appMapName: string, public stackLines: string[]) {} + + async match(): Promise { + const appmapFile = [this.appMapName, 'appmap.json'].join('.'); + + let appmapData: string; + try { + appmapData = JSON.parse(await readFile(appmapFile, 'utf-8')); + } catch (e) { + console.log((e as any).code); + console.warn(`Error loading ${appmapFile}: ${e}`); + return []; + } + + const appmap = buildAppMap(appmapData).build(); + const locationStack = [...this.stackLines]; + const result: FindStackMatch[] = []; + let score: number[] = []; + + const enter = (event: Event): boolean | undefined => { + const eventLocation = [event?.path, event?.lineno] + .filter(Boolean) + .join(':'); + let matchIndex: number | undefined; + if (eventLocation && eventLocation !== '') { + for ( + let i = 0; + matchIndex === undefined && i < locationStack.length; + i++ + ) { + const stackLine = locationStack[i]; + if (eventLocation === stackLine) { + matchIndex = i; + } + } + } + + if (matchIndex !== undefined) { + locationStack.splice(0, matchIndex + 1); + score.push(1); + if (locationStack.length === 0) { + return true; + } + } else { + score.push(0); + } + }; + + const leave = () => { + score.pop(); + }; + + for (let i = 0; i < appmap.events.length; ) { + const event = appmap.events[i]; + if (event.isCall()) { + const isFullMatch = enter(event); + const isLeaf = event.children.length === 0; + if (isFullMatch || isLeaf) { + const total = score.reduce((sum, n) => (n ? sum + n : sum)); + if (total > 0) { + result.push({ + appmap: this.appMapName, + eventId: event.id, + codeObjectId: event.codeObject.fqid, + score: total, + }); + } + } + if (isFullMatch) { + for (++i; appmap.events[i] !== event.returnEvent; ++i) {} + } else { + ++i; + } + } else { + ++i; + leave(); + } + } + + return result; + } +} From eddd50dcaa975913eb876b71f7532d597470664b Mon Sep 17 00:00:00 2001 From: Kevin Gilpin Date: Wed, 7 Sep 2022 13:12:51 -0400 Subject: [PATCH 2/4] wip Filter out duplicates by hash More flexible stack parsing --- packages/cli/src/cmds/search/sanitizeStack.ts | 26 +++++++++ packages/cli/src/cmds/search/search.ts | 20 +++++-- packages/cli/src/search/findStack.ts | 39 +++++++++---- packages/cli/src/search/searchResultHashV2.ts | 58 +++++++++++++++++++ 4 files changed, 127 insertions(+), 16 deletions(-) create mode 100644 packages/cli/src/cmds/search/sanitizeStack.ts create mode 100644 packages/cli/src/search/searchResultHashV2.ts diff --git a/packages/cli/src/cmds/search/sanitizeStack.ts b/packages/cli/src/cmds/search/sanitizeStack.ts new file mode 100644 index 0000000000..cc7ddcad62 --- /dev/null +++ b/packages/cli/src/cmds/search/sanitizeStack.ts @@ -0,0 +1,26 @@ +/** + * Process and sanitize a raw user-input stack trace. The output is an array of strings, each of the + * form `path/to/file.ext(:)?`, where lineno is an optional line number. The first (0th) + * entry in the input stack is expected to be the deepest function. The output stack lines are in + * the reverse order. + * + * @param stack raw stack trace input from the user. + */ +export function sanitizeStack(stack: string): string[] { + const isNumeric = (n: string): boolean => + !isNaN(parseFloat(n)) && isFinite(parseFloat(n)); + + const sanitize = (line: string): string => { + const [path, lineno] = line.split(':', 2); + const result = [path]; + if (isNumeric(lineno)) result.push(lineno); + return result.join(':'); + }; + + return stack + .split('\n') + .map((line) => line.trim()) + .filter((line) => line !== '') + .map(sanitize) + .reverse(); +} diff --git a/packages/cli/src/cmds/search/search.ts b/packages/cli/src/cmds/search/search.ts index ffb3f59503..35ccd1512e 100644 --- a/packages/cli/src/cmds/search/search.ts +++ b/packages/cli/src/cmds/search/search.ts @@ -6,6 +6,7 @@ import FindCodeObjects from '../../search/findCodeObjects'; import FindEvents from '../../search/findEvents'; import { verbose } from '../../utils'; import FindStack, { FindStackMatch } from '../../search/findStack'; +import { sanitizeStack } from './sanitizeStack'; export const command = 'search'; export const describe = @@ -62,10 +63,7 @@ export const handler = async (argv) => { } if (!stack) yargs.exit(1, new Error(`No stack was provided`)); - const stackLines = stack - .split('\n') - .map((line) => line.trim()) - .filter((line) => line !== ''); + const stackLines = sanitizeStack(stack); const finder = new FindCodeObjects(appmapDir, routeParam); const codeObjectMatches = await finder.find( @@ -86,13 +84,23 @@ export const handler = async (argv) => { }) ); + let duplicateCount = 0; + const hashes = new Set(); result .sort((a, b) => b.score - a.score) .slice(0, limit) .forEach((match) => { + if (hashes.has(match.hash_v2)) { + duplicateCount += 1; + return; + } + hashes.add(match.hash_v2); console.log( - `Event: ${match.appmap}.appmap.json:${match.eventId}, Code Object: ${match.codeObjectId}, score: ${match.score}` + `${match.appmap}.appmap.json:${ + match.eventIds[match.eventIds.length - 1] + } (score=${match.score})` ); - console.log('\n'); }); + console.log(); + console.log(`Suppressed printing of ${duplicateCount} duplicates`); }; diff --git a/packages/cli/src/search/findStack.ts b/packages/cli/src/search/findStack.ts index 9635501073..956225e8e9 100644 --- a/packages/cli/src/search/findStack.ts +++ b/packages/cli/src/search/findStack.ts @@ -1,11 +1,14 @@ import { buildAppMap, Event } from '@appland/models'; import { readFile } from 'fs/promises'; +import { inspect } from 'util'; +import { verbose } from '../utils'; +import SearchResultHashV2 from './searchResultHashV2'; export type FindStackMatch = { appmap: string; - codeObjectId: string; - eventId: number; + eventIds: number[]; score: number; + hash_v2: string; }; export default class FindStack { @@ -25,27 +28,39 @@ export default class FindStack { const appmap = buildAppMap(appmapData).build(); const locationStack = [...this.stackLines]; + if (verbose()) + console.log(`Searching for stack: ${inspect(locationStack)}`); const result: FindStackMatch[] = []; let score: number[] = []; + let stack: Event[] = []; const enter = (event: Event): boolean | undefined => { - const eventLocation = [event?.path, event?.lineno] - .filter(Boolean) - .join(':'); + // TODO: Match by path and score by proximity to lineno let matchIndex: number | undefined; - if (eventLocation && eventLocation !== '') { + if (event.path && event.lineno) { + if (verbose()) + console.log( + `${stack.map((_) => ' ').join('')} ${event.path}:${event.lineno}` + ); for ( let i = 0; matchIndex === undefined && i < locationStack.length; i++ ) { - const stackLine = locationStack[i]; - if (eventLocation === stackLine) { + const [stackLinePath, stackLineLineno] = locationStack[i].split( + ':', + 2 + ); + if ( + stackLinePath === event.path && + Math.abs(event.lineno - parseFloat(stackLineLineno)) < 5 + ) { matchIndex = i; } } } + stack.push(event); if (matchIndex !== undefined) { locationStack.splice(0, matchIndex + 1); score.push(1); @@ -58,6 +73,7 @@ export default class FindStack { }; const leave = () => { + stack.pop(); score.pop(); }; @@ -69,10 +85,13 @@ export default class FindStack { if (isFullMatch || isLeaf) { const total = score.reduce((sum, n) => (n ? sum + n : sum)); if (total > 0) { + const hash = new SearchResultHashV2(stack); result.push({ appmap: this.appMapName, - eventId: event.id, - codeObjectId: event.codeObject.fqid, + eventIds: stack + .filter((_, index) => score[index]) + .map((e) => e.id), + hash_v2: hash.digest(), score: total, }); } diff --git a/packages/cli/src/search/searchResultHashV2.ts b/packages/cli/src/search/searchResultHashV2.ts new file mode 100644 index 0000000000..e1ea83acdf --- /dev/null +++ b/packages/cli/src/search/searchResultHashV2.ts @@ -0,0 +1,58 @@ +import { Event } from '@appland/models'; +import { createHash, Hash } from 'crypto'; + +function hashEvent(entries: string[], prefix: string, event: Event): void { + Object.keys(event.stableProperties) + .sort() + .forEach((key) => + entries.push( + [[prefix, key].join('.'), event.stableProperties[key].toString()].join( + '=' + ) + ) + ); +} + +/** + * Builds a hash (digest) of a search result. The digest is constructed by first building a canonical + * string of the search result, of the form: + * + * ``` + * [ + * algorithmVersion=2 + * stack[1].=value1 + * ... + * stack[1].=valueN + * ... + * stack[3].=value1 + * ... + * stack[3].=valueN + * ] + * ``` + */ +export default class SearchResultHashV2 { + private hashEntries: string[] = []; + private hash: Hash; + + constructor(stack: Event[]) { + this.hash = createHash('sha256'); + + // Algorithm version is 2 because it closely matches the Findings hash algorithm v2 + const hashEntries = [['algorithmVersion', '2']].map((e) => e.join('=')); + this.hashEntries = hashEntries; + + stack.forEach((event, index) => + hashEvent(hashEntries, `stack[${index + 1}]`, event) + ); + + hashEntries.forEach((e) => this.hash.update(e)); + } + + get canonicalString(): string { + return this.hashEntries.join('\n'); + } + + digest(): string { + return this.hash.digest('hex'); + } +} From e22acf7fbe16bb64f049a2e8f4631ebe0e72e00e Mon Sep 17 00:00:00 2001 From: Kevin Gilpin Date: Wed, 7 Sep 2022 15:27:40 -0400 Subject: [PATCH 3/4] wip Determine whether stack occurs within function body --- packages/cli/src/cmds/search/sanitizeStack.ts | 5 +- packages/cli/src/lib/isNumeric.ts | 3 + packages/cli/src/search/findStack.ts | 39 ++++++++-- packages/cli/src/search/locationMap.ts | 76 +++++++++++++++++++ 4 files changed, 112 insertions(+), 11 deletions(-) create mode 100644 packages/cli/src/lib/isNumeric.ts create mode 100644 packages/cli/src/search/locationMap.ts diff --git a/packages/cli/src/cmds/search/sanitizeStack.ts b/packages/cli/src/cmds/search/sanitizeStack.ts index cc7ddcad62..f6fd552a52 100644 --- a/packages/cli/src/cmds/search/sanitizeStack.ts +++ b/packages/cli/src/cmds/search/sanitizeStack.ts @@ -1,3 +1,5 @@ +import isNumeric from "../../lib/isNumeric"; + /** * Process and sanitize a raw user-input stack trace. The output is an array of strings, each of the * form `path/to/file.ext(:)?`, where lineno is an optional line number. The first (0th) @@ -7,9 +9,6 @@ * @param stack raw stack trace input from the user. */ export function sanitizeStack(stack: string): string[] { - const isNumeric = (n: string): boolean => - !isNaN(parseFloat(n)) && isFinite(parseFloat(n)); - const sanitize = (line: string): string => { const [path, lineno] = line.split(':', 2); const result = [path]; diff --git a/packages/cli/src/lib/isNumeric.ts b/packages/cli/src/lib/isNumeric.ts new file mode 100644 index 0000000000..de7ee68510 --- /dev/null +++ b/packages/cli/src/lib/isNumeric.ts @@ -0,0 +1,3 @@ +export default function isNumeric(n: string): boolean { + return !isNaN(parseFloat(n)) && isFinite(parseFloat(n)); +} diff --git a/packages/cli/src/search/findStack.ts b/packages/cli/src/search/findStack.ts index 956225e8e9..8dfb3c468e 100644 --- a/packages/cli/src/search/findStack.ts +++ b/packages/cli/src/search/findStack.ts @@ -1,7 +1,8 @@ -import { buildAppMap, Event } from '@appland/models'; +import { buildAppMap, CodeObject, Event } from '@appland/models'; import { readFile } from 'fs/promises'; import { inspect } from 'util'; import { verbose } from '../utils'; +import LocationMap from './locationMap'; import SearchResultHashV2 from './searchResultHashV2'; export type FindStackMatch = { @@ -27,6 +28,8 @@ export default class FindStack { } const appmap = buildAppMap(appmapData).build(); + const locationMap = new LocationMap(appmap.classMap); + const locationStack = [...this.stackLines]; if (verbose()) console.log(`Searching for stack: ${inspect(locationStack)}`); @@ -35,12 +38,11 @@ export default class FindStack { let stack: Event[] = []; const enter = (event: Event): boolean | undefined => { - // TODO: Match by path and score by proximity to lineno let matchIndex: number | undefined; if (event.path && event.lineno) { if (verbose()) console.log( - `${stack.map((_) => ' ').join('')} ${event.path}:${event.lineno}` + `${stack.map((_) => ' ').join('')}${event.path}:${event.lineno}` ); for ( let i = 0; @@ -51,9 +53,21 @@ export default class FindStack { ':', 2 ); + + if (stackLinePath === event.path) + locationMap.functionContainsLine( + stackLinePath, + event.lineno, + parseFloat(stackLineLineno) + ); + if ( stackLinePath === event.path && - Math.abs(event.lineno - parseFloat(stackLineLineno)) < 5 + locationMap.functionContainsLine( + stackLinePath, + event.lineno, + parseFloat(stackLineLineno) + ) ) { matchIndex = i; } @@ -62,7 +76,16 @@ export default class FindStack { stack.push(event); if (matchIndex !== undefined) { + if (verbose()) + console.log( + `${stack.map((_) => ' ').join('')}Matched ${ + locationStack[matchIndex] + } at event ${event.id} (${event.codeObject.fqid})` + ); locationStack.splice(0, matchIndex + 1); + if (verbose()) + console.log(`Now matching stack: ${inspect(locationStack)}`); + score.push(1); if (locationStack.length === 0) { return true; @@ -85,12 +108,12 @@ export default class FindStack { if (isFullMatch || isLeaf) { const total = score.reduce((sum, n) => (n ? sum + n : sum)); if (total > 0) { - const hash = new SearchResultHashV2(stack); + const matchStack = stack.filter((_, index) => score[index]); + const hash = new SearchResultHashV2(matchStack); + if (verbose()) console.log(`Match hash: ${hash.canonicalString}`); result.push({ appmap: this.appMapName, - eventIds: stack - .filter((_, index) => score[index]) - .map((e) => e.id), + eventIds: matchStack.map((e) => e.id), hash_v2: hash.digest(), score: total, }); diff --git a/packages/cli/src/search/locationMap.ts b/packages/cli/src/search/locationMap.ts new file mode 100644 index 0000000000..f702402c95 --- /dev/null +++ b/packages/cli/src/search/locationMap.ts @@ -0,0 +1,76 @@ +import { AppMap, ClassMap, CodeObject } from '@appland/models'; +import isNumeric from '../lib/isNumeric'; + +export const Threshold = 20; + +export default class LocationMap { + lineNumbers: Map; + + constructor(public classMap: ClassMap) { + const lineNumbers: Map = new Map(); + classMap.visit((co: CodeObject) => { + if (!co.location) return; + + const [path, lineno] = co.location.split(':', 2); + if (!path || !isNumeric(lineno)) return; + + if (!lineNumbers.get(path)) lineNumbers.set(path, []); + + lineNumbers.get(path)!.push(parseFloat(lineno)); + }); + + this.lineNumbers = new Map(); + for (const entry of lineNumbers) { + this.lineNumbers.set(entry[0], [...entry[1].sort((a, b) => a - b)]); + } + } + + /** + * Tests whether a test line number is contained within the function that starts at a given + * line number. + * + * If the test line number is greater than the given line number and less than the + * next known function line, returns true. + * + * If the test line number is greater than the given line number and greater than the + * next known function line, returns false. + * + * If the test line number is greater than the given line number, and there is no greater + * known function line, returns true if the test line number is within a threshold of + * the given line number. + * + * @param path file path containing the function to test + * @param lineNumber start line of the function + * @param testLineNumber line number to test + * @param threshold used if the testLineNumber is greater than the lineNumber, and there is + * no known larger function line number in the code file (path). + */ + functionContainsLine( + path: string, + lineNumber: number, + testLineNumber: number, + threshold = Threshold + ): boolean { + if (testLineNumber < lineNumber) return false; + + const lineNumbers = this.lineNumbers.get(path); + if (!lineNumbers) return false; + + const lastLineNumber = lineNumbers[lineNumbers.length - 1]; + + let containingFunctionLine: number | undefined; + for (let i = 0; i < lineNumbers.length; i++) { + const line = lineNumbers[i]; + if (line <= testLineNumber) containingFunctionLine = line; + else break; + } + + if (lineNumber === containingFunctionLine) { + if (containingFunctionLine === lastLineNumber) + return testLineNumber - lastLineNumber < threshold; + else return true; + } + + return false; + } +} From 4bbc2addfaf63a249351f042ef7ef1bade6a5253 Mon Sep 17 00:00:00 2001 From: Kevin Gilpin Date: Wed, 7 Sep 2022 17:04:51 -0400 Subject: [PATCH 4/4] wip Don't emit redundant events --- packages/cli/src/search/findStack.ts | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/packages/cli/src/search/findStack.ts b/packages/cli/src/search/findStack.ts index 8dfb3c468e..b5c15c3787 100644 --- a/packages/cli/src/search/findStack.ts +++ b/packages/cli/src/search/findStack.ts @@ -100,6 +100,7 @@ export default class FindStack { score.pop(); }; + const eventsEmitted = new Set(); for (let i = 0; i < appmap.events.length; ) { const event = appmap.events[i]; if (event.isCall()) { @@ -109,14 +110,17 @@ export default class FindStack { const total = score.reduce((sum, n) => (n ? sum + n : sum)); if (total > 0) { const matchStack = stack.filter((_, index) => score[index]); - const hash = new SearchResultHashV2(matchStack); - if (verbose()) console.log(`Match hash: ${hash.canonicalString}`); - result.push({ - appmap: this.appMapName, - eventIds: matchStack.map((e) => e.id), - hash_v2: hash.digest(), - score: total, - }); + if (!eventsEmitted.has(matchStack[matchStack.length - 1].id)) { + eventsEmitted.add(matchStack[matchStack.length - 1].id); + const hash = new SearchResultHashV2(matchStack); + if (verbose()) console.log(`Match hash: ${hash.canonicalString}`); + result.push({ + appmap: this.appMapName, + eventIds: matchStack.map((e) => e.id), + hash_v2: hash.digest(), + score: total, + }); + } } } if (isFullMatch) {