Skip to content

Commit d0617e3

Browse files
committed
- add aura prefetch command for native prefetching of files into cache
- remove old r2c integration that is no longer working/udpated
1 parent d4acae8 commit d0617e3

File tree

7 files changed

+18
-132
lines changed

7 files changed

+18
-132
lines changed

aura/cli.py

Lines changed: 4 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -231,23 +231,10 @@ def find_typosquatting(out, max_distance, limit=None):
231231
commands.generate_typosquatting(out=out, distance=max_distance, limit=limit)
232232

233233

234-
@cli.group("r2c")
235-
def r2c():
236-
pass
237-
238-
239-
@r2c.command(name="generate_input")
240-
@click.argument("out_file", metavar="<OUTPUT FILE>", type=click.File("w"))
241-
def generate_input(out_file):
242-
commands.generate_r2c_input(out_file)
243-
244-
245-
@r2c.command(name="scan")
246-
@click.option("--out", default="/analysis/output/output.json", type=click.File("w"))
247-
@click.option("--mode", default="generic")
248-
@click.argument("source", nargs=-1, type=click.Path())
249-
def run_r2c_analyzer(source, out, mode):
250-
commands.r2c_scan(source=source, out_file=out, mode=mode)
234+
@cli.command()
235+
@click.argument("uris", nargs=-1, metavar="<URI 1>, <URI 2>, ...")
236+
def prefetch(uris):
237+
commands.prefetch(*uris)
251238

252239

253240
@cli.command(name="check_requirement")

aura/commands.py

Lines changed: 7 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import json
66
import time
77
import traceback
8+
from concurrent import futures
89
from pathlib import Path
910
from functools import partial
1011
from typing import Union, Optional, Tuple, Generator, List
@@ -223,94 +224,10 @@ def generate_typosquatting(out, distance=2, limit=None):
223224
out.write(json.dumps({"original": x, "typosquatting": y}) + "\n")
224225

225226

226-
def generate_r2c_input(out_file):
227-
inputs = []
228-
229-
for pkg_name in PypiPackage.list_packages():
230-
try:
231-
pkg = PypiPackage.from_pypi(pkg_name)
232-
except exceptions.NoSuchPackage:
233-
continue
234-
targets = []
235-
236-
input_definition = {
237-
"metadata": {"package": pkg_name},
238-
"input_type": "AuraInput",
239-
}
240-
241-
for url in pkg.info["urls"]:
242-
targets.append({"url": url["url"], "metadata": url})
243-
244-
input_definition["targets"] = json.dumps(targets)
245-
inputs.append(input_definition)
246-
247-
out_file.write(
248-
json.dumps(
249-
{
250-
"name": "aura",
251-
"version": "0.0.1",
252-
"description": "This is a set of all PyPI packages",
253-
"inputs": inputs,
254-
}
255-
)
256-
)
257-
258-
259-
def r2c_scan(source, out_file, mode="generic"):
260-
out = {"results": [], "errors": []}
261-
262-
pkg_metadata = {}
263-
264-
metadata = {"format": "none"}
265-
266-
if mode == "pypi":
267-
logger.info("R2C mode set to PyPI")
268-
assert len(source) == 1
269-
location = Path(source[0])
270-
271-
meta_loc = location / "metadata.json"
272-
if meta_loc.is_file():
273-
with open(location / "metadata.json", "r") as fd:
274-
pkg_metadata = json.loads(fd.read())
275-
metadata.update(
276-
{
277-
"package_type": pkg_metadata.get("packagetype"),
278-
"package_name": pkg_metadata.get("name"),
279-
"python_version": pkg_metadata.get("python_version"),
280-
}
281-
)
282-
source = [
283-
os.fspath(x.absolute())
284-
for x in location.iterdir()
285-
if x.name != "metadata.json"
286-
]
287-
else:
288-
logger.info("R2C mode set to generic")
289-
290-
for src in source:
291-
logger.info(f"Enumerating {src} with metadata: {metadata}")
292-
293-
try:
294-
data = scan_uri(src, metadata=metadata)
295-
296-
for loc in data:
297-
for hit in loc["hits"]:
298-
rhit = {"check_id": hit.pop("type"), "extra": hit}
299-
if "line_no" in hit:
300-
rhit["start"] = {"line": hit["line_no"]}
301-
rhit["path"] = os.path.relpath(hit["location"], source[0])
302-
303-
out["results"].append(rhit)
304-
305-
except Exception as exc:
306-
exc_tb = sys.exc_info()[-1]
307-
308-
out["errors"].append(
309-
{
310-
"message": f"[{exc_tb.tb_lineno}] An exception occurred: {str(exc)}",
311-
"data": {"path": str(src)},
312-
}
313-
)
227+
def prefetch(*uris):
228+
with futures.ThreadPoolExecutor() as executor:
229+
for uri in uris:
230+
handler = URIHandler.from_uri(uri)
231+
executor.submit(lambda :list(handler.get_paths()))
314232

315-
pprint(out)
316-
out_file.write(json.dumps(out, default=utils.json_encoder))
233+
executor.shutdown(wait=True)

aura/uri_handlers/base.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ def exists(self) -> bool:
9494
return True
9595

9696
@abstractmethod
97-
def get_paths(self, metadata: Optional[dict]) -> Generator[ScanLocation, None, None]:
97+
def get_paths(self, metadata: Optional[dict]=None) -> Generator[ScanLocation, None, None]:
9898
...
9999

100100
def get_diff_paths(self, other: URIHandler) -> Generator[Tuple[ScanLocation, ScanLocation], None, None]:
@@ -156,7 +156,6 @@ def __post_init__(self):
156156
except PythonExecutorError:
157157
pass
158158

159-
160159
def __compute_hashes(self):
161160
tl = tlsh.Tlsh()
162161
md5 = hashlib.md5()
@@ -186,7 +185,6 @@ def __compute_hashes(self):
186185
self.metadata["sha256"] = sha256.hexdigest()
187186
self.metadata["sha512"] = sha512.hexdigest()
188187

189-
190188
def __str__(self):
191189
return self.strip(self.str_location)
192190

aura/uri_handlers/git.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import pathlib
33
import shutil
44
from urllib.parse import ParseResult
5+
from typing import Optional
56

67
from .. import config
78
from ..exceptions import PluginDisabled
@@ -41,7 +42,7 @@ def metadata(self):
4142
}
4243
return m
4344

44-
def get_paths(self, metadata: dict=None):
45+
def get_paths(self, metadata: Optional[dict]=None):
4546
if self.opts.get('download_dir') is None:
4647
p = tempfile.mkdtemp(prefix="aura_git_repo_clone_")
4748
self.opts["download_dir"] = p

aura/uri_handlers/mirror.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# -*- coding: utf-8 -*-
2-
import typing
32
from urllib.parse import urlparse, ParseResult, parse_qs
3+
from typing import Optional, Generator
44

55
from .base import URIHandler, ScanLocation
66
from .. import cache
@@ -30,7 +30,7 @@ def __init__(self, uri: ParseResult):
3030
def metadata(self):
3131
return {"package": self.package_name, "package_opts": self.opts}
3232

33-
def get_paths(self, metadata: dict=None, package=None) -> typing.Generator[ScanLocation, None, None]:
33+
def get_paths(self, metadata: Optional[dict]=None, package=None) -> Generator[ScanLocation, None, None]:
3434
if package is None:
3535
package = self.package
3636

aura/uri_handlers/pypi.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import tempfile
55
import pathlib
66
import urllib.parse
7-
from typing import Generator, Tuple
7+
from typing import Generator, Tuple, Optional
88

99
from .base import URIHandler, PackageProvider, ScanLocation
1010
from ..exceptions import UnsupportedDiffLocation
@@ -53,7 +53,7 @@ def metadata(self):
5353
}
5454
return m
5555

56-
def get_paths(self, metadata: dict=None):
56+
def get_paths(self, metadata: Optional[dict]=None):
5757
if self.opts.get("download_dir") is None:
5858
self.opts["download_dir"] = pathlib.Path(
5959
tempfile.mkdtemp(prefix="aura_pypi_download_")

tests/test_cli.py

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -158,23 +158,6 @@ def test_ast_parser(fixtures):
158158
assert "adalaraoawa aoalalaeaH" in result.stdout
159159

160160

161-
def test_r2c_integration():
162-
f_name = 'r2c_test_output.json'
163-
runner = CliRunner()
164-
with runner.isolated_filesystem():
165-
result = runner.invoke(cli.cli, ['r2c', 'scan', '--out', f_name])
166-
167-
if result.exception:
168-
raise result.exception
169-
170-
assert result.exit_code == 0
171-
with open(f_name, 'r') as fd:
172-
data = json.loads(fd.read())
173-
174-
assert 'results' in data
175-
assert 'errors' in data
176-
177-
178161
def test_async_cleanup(fixtures):
179162
from aura.uri_handlers import base
180163

0 commit comments

Comments
 (0)