From 3ea47dcaaaa713bd14b6f02b8f29ea73aac0ead1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9e=20Kooi?= Date: Sun, 26 Nov 2017 16:03:28 +0100 Subject: [PATCH 1/7] Add fast version using Acorn tokenizer --- find-fast.js | 84 ++++++++++++++++++++++++++++++++++++++++++++++++++++ index.js | 16 ++++++---- 2 files changed, 95 insertions(+), 5 deletions(-) create mode 100644 find-fast.js diff --git a/find-fast.js b/find-fast.js new file mode 100644 index 000000000..da2a79e76 --- /dev/null +++ b/find-fast.js @@ -0,0 +1,84 @@ +var acorn = require('acorn'); +var defined = require('defined'); + +var ST_NONE = 0; +var ST_SAW_NAME = 1; +var ST_INSIDE_CALL = 2; +var ST_MEMBER_EXPRESSION = 3; + +module.exports = function detective (src, opts) { + opts = opts || {}; + src = String(src) + + if (!opts.word) opts.word = 'require' + + var tokenizer = acorn.tokenizer(src, opts.parse); + var token; + var any = {}; + var search = [ + acorn.tokTypes.name, + acorn.tokTypes.parenL, + any, + acorn.tokTypes.parenR + ]; + var state = ST_NONE; + var opener; + var args = []; + + var modules = { strings: [], expressions: [] }; + if (opts.nodes) modules.nodes = []; + + while ((token = tokenizer.getToken()) && token.type !== acorn.tokTypes.eof) { + if (state !== ST_INSIDE_CALL && token.type === acorn.tokTypes.dot) { + state = ST_MEMBER_EXPRESSION; + } else if (state === ST_NONE && token.type === acorn.tokTypes.name && mayBeRequire(token)) { + state = ST_SAW_NAME; + opener = token; + } else if (state === ST_SAW_NAME && token.type === acorn.tokTypes.parenL) { + state = ST_INSIDE_CALL; + args = [] + } else if (state === ST_INSIDE_CALL) { + if (token.type === acorn.tokTypes.parenR) { // End of fn() call + var node; + // When a custom `isRequire` is passed, we need to parse the entire CallExpression and pass it to the function. + if (opts.nodes || opts.isRequire) { + // Cut `src` at the end of this call, so that parseExpressionAt doesn't consider the `.abc` in + // `require('xyz').abc` + var chunk = src.slice(0, token.end); + node = acorn.parseExpressionAt(chunk, opener.start, opts.parse); + } + + if (opts.isRequire && !opts.isRequire(node)) { + state = ST_NONE; + continue; + } + + if (args.length === 1 && args[0].type === acorn.tokTypes.string) { + modules.strings.push(args[0].value); + } else if (args.length > 0) { + modules.expressions.push(src.slice(args[0].start, args[args.length - 1].end)); + } + + if (opts.nodes) { + modules.nodes.push(node); + } + + state = ST_NONE; + } else { + args.push(token) + } + } else { + state = ST_NONE; + } + } + return modules; + + function mayBeRequire (token) { + if (opts.isRequire) { + // We'll parse all callexpressions in this case. + return token.type === acorn.tokTypes.name; + } + return token.type === acorn.tokTypes.name && + token.value === opts.word; + } +} diff --git a/index.js b/index.js index 382d701a9..5632eb195 100644 --- a/index.js +++ b/index.js @@ -1,12 +1,13 @@ var acorn = require('acorn-node'); var walk = require('acorn-node/walk'); var defined = require('defined'); +var fastFind = require('./find-fast'); var requireRe = /\brequire\b/; -function parse (src, opts) { - if (!opts) opts = {}; - return acorn.parse(src, { +function getParseOpts (opts) { + opts = opts || {}; + return { ecmaVersion: defined(opts.ecmaVersion, 9), sourceType: defined(opts.sourceType, 'script'), ranges: defined(opts.ranges, opts.range), @@ -19,7 +20,7 @@ function parse (src, opts) { opts.allowImportExportEverywhere, true ), allowHashBang: defined(opts.allowHashBang, true) - }); + }; } var exports = module.exports = function (src, opts) { @@ -28,6 +29,11 @@ var exports = module.exports = function (src, opts) { exports.find = function (src, opts) { if (!opts) opts = {}; + opts = Object.assign({}, opts, { parse: getParseOpts(opts.parse) }); + + if (!opts.isRequire && !opts.fullParse) { + return fastFind(src, opts); + } var word = opts.word === undefined ? 'require' : opts.word; if (typeof src !== 'string') src = String(src); @@ -44,7 +50,7 @@ exports.find = function (src, opts) { var wordRe = word === 'require' ? requireRe : RegExp('\\b' + word + '\\b'); if (!wordRe.test(src)) return modules; - var ast = parse(src, opts.parse); + var ast = acorn.parse(src, opts.parse); function visit(node, st, c) { var hasRequire = wordRe.test(src.slice(node.start, node.end)); From 5dd1fb1a0374625033c93dfe6580971baa3fbad8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9e=20Kooi?= Date: Sun, 26 Nov 2017 16:05:39 +0100 Subject: [PATCH 2/7] formatting --- find-fast.js | 121 ++++++++++++++++++++++++--------------------------- 1 file changed, 57 insertions(+), 64 deletions(-) diff --git a/find-fast.js b/find-fast.js index da2a79e76..88d04c073 100644 --- a/find-fast.js +++ b/find-fast.js @@ -6,79 +6,72 @@ var ST_SAW_NAME = 1; var ST_INSIDE_CALL = 2; var ST_MEMBER_EXPRESSION = 3; -module.exports = function detective (src, opts) { - opts = opts || {}; - src = String(src) +module.exports = function findFast(src, opts) { + if (!opts) opts = {}; + if (typeof src !== 'string') src = String(src); + if (opts.word === undefined) opts.word = 'require'; - if (!opts.word) opts.word = 'require' + var tokenizer = acorn.tokenizer(src, opts.parse); + var token; + var state = ST_NONE; - var tokenizer = acorn.tokenizer(src, opts.parse); - var token; - var any = {}; - var search = [ - acorn.tokTypes.name, - acorn.tokTypes.parenL, - any, - acorn.tokTypes.parenR - ]; - var state = ST_NONE; - var opener; - var args = []; + var opener; + var args = []; - var modules = { strings: [], expressions: [] }; - if (opts.nodes) modules.nodes = []; + var modules = { strings: [], expressions: [] }; + if (opts.nodes) modules.nodes = []; - while ((token = tokenizer.getToken()) && token.type !== acorn.tokTypes.eof) { - if (state !== ST_INSIDE_CALL && token.type === acorn.tokTypes.dot) { - state = ST_MEMBER_EXPRESSION; - } else if (state === ST_NONE && token.type === acorn.tokTypes.name && mayBeRequire(token)) { - state = ST_SAW_NAME; - opener = token; - } else if (state === ST_SAW_NAME && token.type === acorn.tokTypes.parenL) { - state = ST_INSIDE_CALL; - args = [] - } else if (state === ST_INSIDE_CALL) { - if (token.type === acorn.tokTypes.parenR) { // End of fn() call - var node; - // When a custom `isRequire` is passed, we need to parse the entire CallExpression and pass it to the function. - if (opts.nodes || opts.isRequire) { - // Cut `src` at the end of this call, so that parseExpressionAt doesn't consider the `.abc` in - // `require('xyz').abc` - var chunk = src.slice(0, token.end); - node = acorn.parseExpressionAt(chunk, opener.start, opts.parse); - } + while ((token = tokenizer.getToken()) && token.type !== acorn.tokTypes.eof) { + if (state !== ST_INSIDE_CALL && token.type === acorn.tokTypes.dot) { + state = ST_MEMBER_EXPRESSION; + } else if (state === ST_NONE && token.type === acorn.tokTypes.name && mayBeRequire(token)) { + state = ST_SAW_NAME; + opener = token; + } else if (state === ST_SAW_NAME && token.type === acorn.tokTypes.parenL) { + state = ST_INSIDE_CALL; + args = []; + } else if (state === ST_INSIDE_CALL) { + if (token.type === acorn.tokTypes.parenR) { // End of fn() call + var node; + // When a custom `isRequire` is passed, we need to parse the entire CallExpression and pass it to the function. + if (opts.nodes || opts.isRequire) { + // Cut `src` at the end of this call, so that parseExpressionAt doesn't consider the `.abc` in + // `require('xyz').abc` + var chunk = src.slice(0, token.end); + node = acorn.parseExpressionAt(chunk, opener.start, opts.parse); + } - if (opts.isRequire && !opts.isRequire(node)) { - state = ST_NONE; - continue; - } + if (opts.isRequire && !opts.isRequire(node)) { + state = ST_NONE; + continue; + } - if (args.length === 1 && args[0].type === acorn.tokTypes.string) { - modules.strings.push(args[0].value); - } else if (args.length > 0) { - modules.expressions.push(src.slice(args[0].start, args[args.length - 1].end)); - } + if (args.length === 1 && args[0].type === acorn.tokTypes.string) { + modules.strings.push(args[0].value); + } else if (args.length > 0) { + modules.expressions.push(src.slice(args[0].start, args[args.length - 1].end)); + } - if (opts.nodes) { - modules.nodes.push(node); - } + if (opts.nodes) { + modules.nodes.push(node); + } - state = ST_NONE; - } else { - args.push(token) - } - } else { - state = ST_NONE; + state = ST_NONE; + } else { + args.push(token); + } + } else { + state = ST_NONE; + } } - } - return modules; + return modules; - function mayBeRequire (token) { - if (opts.isRequire) { - // We'll parse all callexpressions in this case. - return token.type === acorn.tokTypes.name; + function mayBeRequire(token) { + if (opts.isRequire) { + // We'll parse all callexpressions in this case. + return token.type === acorn.tokTypes.name; + } + return token.type === acorn.tokTypes.name && + token.value === opts.word; } - return token.type === acorn.tokTypes.name && - token.value === opts.word; - } } From 453ca4e725eca912f6da954c86f5a7a2d8a9c139 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9e=20Kooi?= Date: Sun, 26 Nov 2017 16:06:17 +0100 Subject: [PATCH 3/7] Remove `isRequire` stuff from find-fast --- find-fast.js | 22 ++++------------------ 1 file changed, 4 insertions(+), 18 deletions(-) diff --git a/find-fast.js b/find-fast.js index 88d04c073..bd937ee38 100644 --- a/find-fast.js +++ b/find-fast.js @@ -32,20 +32,6 @@ module.exports = function findFast(src, opts) { args = []; } else if (state === ST_INSIDE_CALL) { if (token.type === acorn.tokTypes.parenR) { // End of fn() call - var node; - // When a custom `isRequire` is passed, we need to parse the entire CallExpression and pass it to the function. - if (opts.nodes || opts.isRequire) { - // Cut `src` at the end of this call, so that parseExpressionAt doesn't consider the `.abc` in - // `require('xyz').abc` - var chunk = src.slice(0, token.end); - node = acorn.parseExpressionAt(chunk, opener.start, opts.parse); - } - - if (opts.isRequire && !opts.isRequire(node)) { - state = ST_NONE; - continue; - } - if (args.length === 1 && args[0].type === acorn.tokTypes.string) { modules.strings.push(args[0].value); } else if (args.length > 0) { @@ -53,6 +39,10 @@ module.exports = function findFast(src, opts) { } if (opts.nodes) { + // Cut `src` at the end of this call, so that parseExpressionAt doesn't consider the `.abc` in + // `require('xyz').abc` + var chunk = src.slice(0, token.end); + var node = acorn.parseExpressionAt(chunk, opener.start, opts.parse); modules.nodes.push(node); } @@ -67,10 +57,6 @@ module.exports = function findFast(src, opts) { return modules; function mayBeRequire(token) { - if (opts.isRequire) { - // We'll parse all callexpressions in this case. - return token.type === acorn.tokTypes.name; - } return token.type === acorn.tokTypes.name && token.value === opts.word; } From a7df66bac0daebf482c12c0f8b37f95536af735a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9e=20Kooi?= Date: Sun, 26 Nov 2017 16:19:11 +0100 Subject: [PATCH 4/7] add comment test --- test/comment.js | 11 +++++++++++ test/files/comment.js | 5 +++++ 2 files changed, 16 insertions(+) create mode 100644 test/comment.js create mode 100644 test/files/comment.js diff --git a/test/comment.js b/test/comment.js new file mode 100644 index 000000000..985ec5d61 --- /dev/null +++ b/test/comment.js @@ -0,0 +1,11 @@ +var test = require('tap').test; +var detective = require('../'); +var fs = require('fs'); +var src = fs.readFileSync(__dirname + '/files/comment.js'); + +test('comment', function (t) { + var modules = detective.find(src); + t.deepEqual(modules.strings, [ 'beep' ]); + t.notOk(modules.nodes, 'has no nodes'); + t.end(); +}); diff --git a/test/files/comment.js b/test/files/comment.js new file mode 100644 index 000000000..7664ebeb0 --- /dev/null +++ b/test/files/comment.js @@ -0,0 +1,5 @@ +var x = require /* idk */ +// whatever +( +'beep' // boop +) From 575f3e1dd8ec6dbc0941a75a1743557d16db5cb7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9e=20Kooi?= Date: Sun, 26 Nov 2017 16:29:17 +0100 Subject: [PATCH 5/7] shallow-copy --- index.js | 4 +++- package.json | 3 ++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/index.js b/index.js index 5632eb195..5f4181b12 100644 --- a/index.js +++ b/index.js @@ -1,5 +1,6 @@ var acorn = require('acorn-node'); var walk = require('acorn-node/walk'); +var copy = require('shallow-copy'); var defined = require('defined'); var fastFind = require('./find-fast'); @@ -29,7 +30,8 @@ var exports = module.exports = function (src, opts) { exports.find = function (src, opts) { if (!opts) opts = {}; - opts = Object.assign({}, opts, { parse: getParseOpts(opts.parse) }); + else opts = copy(opts); + opts.parse = getParseOpts(opts.parse); if (!opts.isRequire && !opts.fullParse) { return fastFind(src, opts); diff --git a/package.json b/package.json index 86eeb286b..0ba65a6e6 100644 --- a/package.json +++ b/package.json @@ -9,7 +9,8 @@ }, "dependencies": { "acorn-node": "^1.3.0", - "defined": "^1.0.0" + "defined": "^1.0.0", + "shallow-copy": "0.0.1" }, "devDependencies": { "tap": "^10.7.3" From 038c74c1200e8e77696e955d54c80b12de86e8e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9e=20Kooi?= Date: Mon, 25 Dec 2017 10:39:09 +0100 Subject: [PATCH 6/7] support template strings in the token based finder --- find-fast.js | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/find-fast.js b/find-fast.js index bd937ee38..b0465d51e 100644 --- a/find-fast.js +++ b/find-fast.js @@ -34,6 +34,11 @@ module.exports = function findFast(src, opts) { if (token.type === acorn.tokTypes.parenR) { // End of fn() call if (args.length === 1 && args[0].type === acorn.tokTypes.string) { modules.strings.push(args[0].value); + } else if (args.length === 3 // A template string without any expressions + && args[0].type === acorn.tokTypes.backQuote + && args[1].type === acorn.tokTypes.template + && args[2].type === acorn.tokTypes.backQuote) { + modules.strings.push(args[1].value); } else if (args.length > 0) { modules.expressions.push(src.slice(args[0].start, args[args.length - 1].end)); } From 796bdae27445e4cda6c17bce647ceab884ab6823 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9e=20Kooi?= Date: Mon, 12 Feb 2018 20:31:12 +0100 Subject: [PATCH 7/7] ignore require() calls inside already browserified bundles If a function expression declares a `require` parameter, its body is not analyzed for `require()` calls. This is mostly helpful for bundles that were already browserified, `detective` will ignore the calls to browser-pack's require runtime. --- index.js | 13 +++++++++++++ test/files/scope.js | 8 ++++++++ test/scope.js | 9 +++++++++ 3 files changed, 30 insertions(+) create mode 100644 test/files/scope.js create mode 100644 test/scope.js diff --git a/index.js b/index.js index 3b568c420..651afdc9f 100644 --- a/index.js +++ b/index.js @@ -50,6 +50,7 @@ exports.find = function (src, opts) { function visit(node, st, c) { var hasRequire = wordRe.test(src.slice(node.start, node.end)); if (!hasRequire) return; + if (redefinesRequire(node)) return; walk.base[node.type](node, st, c); if (node.type !== 'CallExpression') return; if (isRequire(node)) { @@ -76,6 +77,18 @@ exports.find = function (src, opts) { Statement: visit, Expression: visit }); + + // Detect `require` redefinitions in function parameter lists, like + // in `[function(require,module,exports){` generated by browser-pack. + // This is a simple way to address the 99% case without doing full scope analysis + function redefinesRequire(node) { + if (node.type === 'FunctionExpression') { + return node.params.some(function (param) { + return param.type === 'Identifier' && param.name === word; + }); + } + return false; + } return modules; }; diff --git a/test/files/scope.js b/test/files/scope.js new file mode 100644 index 000000000..66ca0aed5 --- /dev/null +++ b/test/files/scope.js @@ -0,0 +1,8 @@ +(function(modules){ + modules[1](function(i){return modules[i]()}) +})({1:[function (require,module,exports) { + require('./y') +},{'./y':2}],2:function(require,module,exports){ + console.log("abc") +}}) +require('./z') diff --git a/test/scope.js b/test/scope.js new file mode 100644 index 000000000..e6b29bbb9 --- /dev/null +++ b/test/scope.js @@ -0,0 +1,9 @@ +var test = require('tap').test; +var detective = require('../'); +var fs = require('fs'); +var src = fs.readFileSync(__dirname + '/files/scope.js'); + +test('scope', function (t) { + t.plan(1); + t.deepEqual(detective(src), [ './z' ]); +});