diff --git a/find-fast.js b/find-fast.js new file mode 100644 index 000000000..bc206dcdc --- /dev/null +++ b/find-fast.js @@ -0,0 +1,113 @@ +var acorn = require('acorn-node'); +var defined = require('defined'); + +var ST_NONE = 0; // Default. +var ST_SAW_NAME = 1; // Saw a `require` identifier. +var ST_INSIDE_CALL = 2; // Found a `require(` sequence; if followed by a string, that is a dependency. +var ST_MEMBER_EXPRESSION = 3; // Saw a `.`; if followed by a `require` identifier that should be ignored. +var ST_REDEF_PATTERN = 4; // Currently in progress detecting a redefinition pattern: `{0:[function(require` +var ST_REDEFINED = 5; // Currently inside a scope with a redefined `require` identifier. + +var REQUIRE_REDEF_PATTERN = [ + function (token) { return token.type === acorn.tokTypes.braceL; }, // { + function (token) { return token.type === acorn.tokTypes.num || token.type === acorn.tokTypes.string; }, // 0 + function (token) { return token.type === acorn.tokTypes.colon; }, // : + function (token) { return token.type === acorn.tokTypes.bracketL; }, // [ + function (token) { return token.type === acorn.tokTypes._function; }, // function + function (token) { return token.type === acorn.tokTypes.parenL; }, // ( + function (token, opts) { return token.type === acorn.tokTypes.name && token.value === opts.word; }, // require +]; + +module.exports = function findFast(src, opts) { + if (!opts) opts = {}; + if (typeof src !== 'string') src = String(src); + if (opts.word === undefined) opts.word = 'require'; + + var tokenizer = acorn.tokenizer(src, opts.parse); + var token; + var state = ST_NONE; + // Current index in the require redefinition pattern. + var redefIndex = 0; + // Block scope depth when require was redefined. This is used to match the + // correct } with the opening { after the redefining function parameter list. + var redefDepth = 0; + + var opener; + var args = []; + + var modules = { strings: [], expressions: [] }; + if (opts.nodes) modules.nodes = []; + + while ((token = tokenizer.getToken()) && token.type !== acorn.tokTypes.eof) { + if (state === ST_REDEFINED) { + if (token.type === acorn.tokTypes.braceL) redefDepth++; + if (token.type === acorn.tokTypes.braceR) redefDepth--; + if (redefDepth === 0) { + state = ST_NONE; + } + continue; + } + if (state === ST_REDEF_PATTERN) { + if (redefIndex >= REQUIRE_REDEF_PATTERN.length) { + // the { after the function() parameter list + if (token.type === acorn.tokTypes.braceL) { + state = ST_REDEFINED; + redefDepth = 1; + } + continue; + } else if (REQUIRE_REDEF_PATTERN[redefIndex](token, opts)) { + redefIndex++; + continue; + } else { + redefIndex = 0; + state = ST_NONE; + } + } + + if (state !== ST_INSIDE_CALL && token.type === acorn.tokTypes.dot) { + state = ST_MEMBER_EXPRESSION; + } else if (state === ST_NONE && token.type === acorn.tokTypes.name && mayBeRequire(token)) { + state = ST_SAW_NAME; + opener = token; + } else if (state === ST_SAW_NAME && token.type === acorn.tokTypes.parenL) { + state = ST_INSIDE_CALL; + args = []; + } else if (state === ST_INSIDE_CALL) { + if (token.type === acorn.tokTypes.parenR) { // End of fn() call + if (args.length === 1 && args[0].type === acorn.tokTypes.string) { + modules.strings.push(args[0].value); + } else if (args.length === 3 // A template string without any expressions + && args[0].type === acorn.tokTypes.backQuote + && args[1].type === acorn.tokTypes.template + && args[2].type === acorn.tokTypes.backQuote) { + modules.strings.push(args[1].value); + } else if (args.length > 0) { + modules.expressions.push(src.slice(args[0].start, args[args.length - 1].end)); + } + + if (opts.nodes) { + // Cut `src` at the end of this call, so that parseExpressionAt doesn't consider the `.abc` in + // `require('xyz').abc` + var chunk = src.slice(0, token.end); + var node = acorn.parseExpressionAt(chunk, opener.start, opts.parse); + modules.nodes.push(node); + } + + state = ST_NONE; + } else { + args.push(token); + } + } else if (REQUIRE_REDEF_PATTERN[0](token)) { + state = ST_REDEF_PATTERN; + redefIndex = 1; + } else { + state = ST_NONE; + } + } + return modules; + + function mayBeRequire(token) { + return token.type === acorn.tokTypes.name && + token.value === opts.word; + } +} diff --git a/index.js b/index.js index 382d701a9..b72fcf88f 100644 --- a/index.js +++ b/index.js @@ -1,12 +1,14 @@ var acorn = require('acorn-node'); var walk = require('acorn-node/walk'); +var copy = require('shallow-copy'); var defined = require('defined'); +var fastFind = require('./find-fast'); var requireRe = /\brequire\b/; -function parse (src, opts) { - if (!opts) opts = {}; - return acorn.parse(src, { +function getParseOpts (opts) { + opts = opts || {}; + return { ecmaVersion: defined(opts.ecmaVersion, 9), sourceType: defined(opts.sourceType, 'script'), ranges: defined(opts.ranges, opts.range), @@ -19,7 +21,7 @@ function parse (src, opts) { opts.allowImportExportEverywhere, true ), allowHashBang: defined(opts.allowHashBang, true) - }); + }; } var exports = module.exports = function (src, opts) { @@ -28,6 +30,12 @@ var exports = module.exports = function (src, opts) { exports.find = function (src, opts) { if (!opts) opts = {}; + else opts = copy(opts); + opts.parse = getParseOpts(opts.parse); + + if (!opts.isRequire && !opts.fullParse) { + return fastFind(src, opts); + } var word = opts.word === undefined ? 'require' : opts.word; if (typeof src !== 'string') src = String(src); @@ -44,11 +52,12 @@ exports.find = function (src, opts) { var wordRe = word === 'require' ? requireRe : RegExp('\\b' + word + '\\b'); if (!wordRe.test(src)) return modules; - var ast = parse(src, opts.parse); + var ast = acorn.parse(src, opts.parse); function visit(node, st, c) { var hasRequire = wordRe.test(src.slice(node.start, node.end)); if (!hasRequire) return; + if (isBundledDefinition(node)) return; walk.base[node.type](node, st, c); if (node.type !== 'CallExpression') return; if (isRequire(node)) { @@ -75,6 +84,20 @@ exports.find = function (src, opts) { Statement: visit, Expression: visit }); + + // Detect `require` redefinitions in function parameter lists, like + // in `{0:[function(require,module,exports){` generated by browser-pack. + // This is a simple way to address the 99% case without doing full scope analysis + function isBundledDefinition(node) { + if (node.type !== 'ObjectExpression') return false; + if (node.properties.length < 1) return false; + var arr = node.properties[0].value; + if (arr.type !== 'ArrayExpression') return false; + if (arr.elements.length < 2) return false; + if (arr.elements[0].type !== 'FunctionExpression') return false; + var fn = arr.elements[0]; + return fn.params.length > 0 && fn.params[0].type === 'Identifier' && fn.params[0].name === word; + } return modules; }; diff --git a/package.json b/package.json index 380171ab1..ff576e7fd 100644 --- a/package.json +++ b/package.json @@ -11,7 +11,8 @@ "dependencies": { "acorn-node": "^1.3.0", "defined": "^1.0.0", - "minimist": "^1.1.1" + "minimist": "^1.1.1", + "shallow-copy": "0.0.1" }, "devDependencies": { "tap": "^10.7.3" diff --git a/test/both.js b/test/both.js index f09f1f854..29cd32894 100644 --- a/test/both.js +++ b/test/both.js @@ -11,6 +11,14 @@ test('both', function (t) { t.end(); }); +test('both fullParse', function (t) { + var modules = detective.find(src, { fullParse: true }); + t.deepEqual(modules.strings, [ 'a', 'b' ]); + t.deepEqual(modules.expressions, [ "'c' + x", "'d' + y" ]); + t.notOk(modules.nodes, 'has no nodes'); + t.end(); +}); + test('both with nodes specified in opts', function (t) { var modules = detective.find(src, { nodes: true }); t.deepEqual(modules.strings, [ 'a', 'b' ]); @@ -24,3 +32,17 @@ test('both with nodes specified in opts', function (t) { 'has a node for each require'); t.end(); }); + +test('both with nodes and fullParse', function (t) { + var modules = detective.find(src, { nodes: true, fullParse: true }); + t.deepEqual(modules.strings, [ 'a', 'b' ]); + t.deepEqual(modules.expressions, [ "'c' + x", "'d' + y" ]); + t.deepEqual( + modules.nodes.map(function (n) { + var arg = n.arguments[0]; + return arg.value || arg.left.value; + }), + [ 'a', 'b', 'c', 'd' ], + 'has a node for each require'); + t.end(); +}); diff --git a/test/chained.js b/test/chained.js index 307c20150..bb370d692 100644 --- a/test/chained.js +++ b/test/chained.js @@ -5,5 +5,6 @@ var src = fs.readFileSync(__dirname + '/files/chained.js'); test('chained', function (t) { t.deepEqual(detective(src), [ 'c', 'b', 'a' ]); + t.deepEqual(detective(src, { fullParse: true }), [ 'c', 'b', 'a' ]); t.end(); }); diff --git a/test/comment.js b/test/comment.js new file mode 100644 index 000000000..d20f62a4b --- /dev/null +++ b/test/comment.js @@ -0,0 +1,18 @@ +var test = require('tap').test; +var detective = require('../'); +var fs = require('fs'); +var src = fs.readFileSync(__dirname + '/files/comment.js'); + +test('comment', function (t) { + var modules = detective.find(src); + t.deepEqual(modules.strings, [ 'beep' ]); + t.notOk(modules.nodes, 'has no nodes'); + t.end(); +}); + +test('comment fullParse', function (t) { + var modules = detective.find(src, { fullParse: true }); + t.deepEqual(modules.strings, [ 'beep' ]); + t.notOk(modules.nodes, 'has no nodes'); + t.end(); +}); diff --git a/test/complicated.js b/test/complicated.js index af402e18c..b08eab96e 100644 --- a/test/complicated.js +++ b/test/complicated.js @@ -51,8 +51,9 @@ var sources = [ ]; test('complicated', function (t) { - t.plan(sources.length); + t.plan(sources.length * 2); sources.forEach(function(src) { t.deepEqual(detective(src), [ 'a' ]); + t.deepEqual(detective(src, { fullParse: true }), [ 'a' ]); }); }); diff --git a/test/es6-module.js b/test/es6-module.js index 379c89cc3..57d20bcf3 100644 --- a/test/es6-module.js +++ b/test/es6-module.js @@ -4,6 +4,7 @@ var fs = require('fs'); var src = fs.readFileSync(__dirname + '/files/es6-module.js'); test('es6-module', function (t) { - t.plan(1); + t.plan(2); t.deepEqual(detective(src, {parse: {sourceType: 'module'}}), [ 'a', 'b' ]); + t.deepEqual(detective(src, {parse: {sourceType: 'module'}, fullParse: true}), [ 'a', 'b' ]); }); diff --git a/test/files/comment.js b/test/files/comment.js new file mode 100644 index 000000000..7664ebeb0 --- /dev/null +++ b/test/files/comment.js @@ -0,0 +1,5 @@ +var x = require /* idk */ +// whatever +( +'beep' // boop +) diff --git a/test/files/scope.js b/test/files/scope.js new file mode 100644 index 000000000..b94d787ac --- /dev/null +++ b/test/files/scope.js @@ -0,0 +1,12 @@ +(function(modules){ + modules[1](function(i){return modules[i]()}) +})({1:[function (require,module,exports) { + require('./y') // inside a bundle; should not be detected +},{'./y':2}],2:function(require,module,exports){ + console.log("abc") +}}) + +(function (require) { + require('./x'); // not inside a bundle; should be detected +}(require)); // (because someone might do this) +require('./z') diff --git a/test/generators.js b/test/generators.js index c16d53466..b3c099385 100644 --- a/test/generators.js +++ b/test/generators.js @@ -4,6 +4,7 @@ var fs = require('fs'); var src = fs.readFileSync(__dirname + '/files/generators.js'); test('generators', function (t) { - t.plan(1); + t.plan(2); t.deepEqual(detective(src), [ 'a', 'b' ]); + t.deepEqual(detective(src, { fullParse: true }), [ 'a', 'b' ]); }); diff --git a/test/nested.js b/test/nested.js index d688c0f80..6b694e4f9 100644 --- a/test/nested.js +++ b/test/nested.js @@ -5,5 +5,6 @@ var src = fs.readFileSync(__dirname + '/files/nested.js'); test('nested', function (t) { t.deepEqual(detective(src), [ 'a', 'b', 'c' ]); + t.deepEqual(detective(src, { fullParse: true }), [ 'a', 'b', 'c' ]); t.end(); }); diff --git a/test/noargs.js b/test/noargs.js index 4871b60be..8dc7ac6cd 100644 --- a/test/noargs.js +++ b/test/noargs.js @@ -7,8 +7,9 @@ var fs = require('fs'); var src = [ 'fn();', 'otherfn();', 'fn();' ].join('\n') test('noargs', function (t) { - t.plan(1); + t.plan(2); t.deepEqual(detective(src, { word: 'fn' }).length, 0, 'finds no arg id'); + t.deepEqual(detective(src, { word: 'fn', fullParse: true }).length, 0, 'finds no arg id'); }); test('find noargs with nodes', function (t) { @@ -24,3 +25,17 @@ test('find noargs with nodes', function (t) { 'all matches are correct' ); }); + +test('find noargs with nodes and fullParse', function (t) { + t.plan(4); + var modules = detective.find(src, { word: 'fn', nodes: true, fullParse: true }); + t.equal(modules.strings.length, 0, 'finds no arg id'); + t.equal(modules.expressions.length, 0, 'finds no expressions'); + t.equal(modules.nodes.length, 2, 'finds a node for each matching function call'); + t.equal( + modules.nodes.filter(function (x) { + return x.callee.name === 'fn' + }).length, 2, + 'all matches are correct' + ); +}); diff --git a/test/return.js b/test/return.js index c2da016bb..a23f92958 100644 --- a/test/return.js +++ b/test/return.js @@ -4,6 +4,7 @@ var fs = require('fs'); var src = [ 'require("a")\nreturn' ]; test('return', function (t) { - t.plan(1); + t.plan(2); t.deepEqual(detective(src), [ 'a' ]); + t.deepEqual(detective(src, { fullParse: true }), [ 'a' ]); }); diff --git a/test/scope.js b/test/scope.js new file mode 100644 index 000000000..1c3e735f8 --- /dev/null +++ b/test/scope.js @@ -0,0 +1,10 @@ +var test = require('tap').test; +var detective = require('../'); +var fs = require('fs'); +var src = fs.readFileSync(__dirname + '/files/scope.js'); + +test('scope', function (t) { + t.plan(2); + t.deepEqual(detective(src), [ './x', './z' ]); + t.deepEqual(detective(src, { fullParse: true }), [ './x', './z' ]); +}); diff --git a/test/set-in-object-pattern.js b/test/set-in-object-pattern.js index 4787b1edb..3d418b4c9 100644 --- a/test/set-in-object-pattern.js +++ b/test/set-in-object-pattern.js @@ -8,5 +8,9 @@ test('set in object pattern', function (t) { detective(src, { word : 'load' }), [ 'a', 'b', 'c', 'tt' ] ); + t.deepEqual( + detective(src, { word : 'load', fullParse: true }), + [ 'a', 'b', 'c', 'tt' ] + ); t.end(); -}); \ No newline at end of file +}); diff --git a/test/shebang.js b/test/shebang.js index b662ea241..92c1395da 100644 --- a/test/shebang.js +++ b/test/shebang.js @@ -4,6 +4,7 @@ var fs = require('fs'); var src = fs.readFileSync(__dirname + '/files/shebang.js'); test('shebang', function (t) { - t.plan(1); + t.plan(2); t.deepEqual(detective(src), [ 'a', 'b', 'c' ]); + t.deepEqual(detective(src, { fullParse: true }), [ 'a', 'b', 'c' ]); }); diff --git a/test/sparse-array.js b/test/sparse-array.js index f64f3593c..7cdbc4a37 100644 --- a/test/sparse-array.js +++ b/test/sparse-array.js @@ -8,6 +8,9 @@ test('sparse-array', function (t) { t.doesNotThrow(function () { detective(src) }) + t.doesNotThrow(function () { + detective(src, { fullParse: true }) + }) t.end(); }); diff --git a/test/strings.js b/test/strings.js index 3b5e7d821..3a87aca9d 100644 --- a/test/strings.js +++ b/test/strings.js @@ -5,5 +5,6 @@ var src = fs.readFileSync(__dirname + '/files/strings.js'); test('single', function (t) { t.deepEqual(detective(src), [ 'a', 'b', 'c', 'events', 'doom', 'y', 'events2' ]); + t.deepEqual(detective(src, { fullParse: true }), [ 'a', 'b', 'c', 'events', 'doom', 'y', 'events2' ]); t.end(); }); diff --git a/test/word.js b/test/word.js index cf5397d85..d9951640a 100644 --- a/test/word.js +++ b/test/word.js @@ -8,5 +8,9 @@ test('word', function (t) { detective(src, { word : 'load' }), [ 'a', 'b', 'c', 'events', 'doom', 'y', 'events2' ] ); + t.deepEqual( + detective(src, { word : 'load', fullParse: true }), + [ 'a', 'b', 'c', 'events', 'doom', 'y', 'events2' ] + ); t.end(); }); diff --git a/test/yield.js b/test/yield.js index 85560ab7a..d6b06dfd0 100644 --- a/test/yield.js +++ b/test/yield.js @@ -4,6 +4,7 @@ var fs = require('fs'); var src = fs.readFileSync(__dirname + '/files/yield.js'); test('yield', function (t) { - t.plan(1); + t.plan(2); t.deepEqual(detective(src), [ 'a', 'c' ]); + t.deepEqual(detective(src, { fullParse: true }), [ 'a', 'c' ]); });