|
5 | 5 | import json |
6 | 6 | import time |
7 | 7 | import traceback |
| 8 | +from concurrent import futures |
8 | 9 | from pathlib import Path |
9 | 10 | from functools import partial |
10 | 11 | from typing import Union, Optional, Tuple, Generator, List |
@@ -223,94 +224,10 @@ def generate_typosquatting(out, distance=2, limit=None): |
223 | 224 | out.write(json.dumps({"original": x, "typosquatting": y}) + "\n") |
224 | 225 |
|
225 | 226 |
|
226 | | -def generate_r2c_input(out_file): |
227 | | - inputs = [] |
228 | | - |
229 | | - for pkg_name in PypiPackage.list_packages(): |
230 | | - try: |
231 | | - pkg = PypiPackage.from_pypi(pkg_name) |
232 | | - except exceptions.NoSuchPackage: |
233 | | - continue |
234 | | - targets = [] |
235 | | - |
236 | | - input_definition = { |
237 | | - "metadata": {"package": pkg_name}, |
238 | | - "input_type": "AuraInput", |
239 | | - } |
240 | | - |
241 | | - for url in pkg.info["urls"]: |
242 | | - targets.append({"url": url["url"], "metadata": url}) |
243 | | - |
244 | | - input_definition["targets"] = json.dumps(targets) |
245 | | - inputs.append(input_definition) |
246 | | - |
247 | | - out_file.write( |
248 | | - json.dumps( |
249 | | - { |
250 | | - "name": "aura", |
251 | | - "version": "0.0.1", |
252 | | - "description": "This is a set of all PyPI packages", |
253 | | - "inputs": inputs, |
254 | | - } |
255 | | - ) |
256 | | - ) |
257 | | - |
258 | | - |
259 | | -def r2c_scan(source, out_file, mode="generic"): |
260 | | - out = {"results": [], "errors": []} |
261 | | - |
262 | | - pkg_metadata = {} |
263 | | - |
264 | | - metadata = {"format": "none"} |
265 | | - |
266 | | - if mode == "pypi": |
267 | | - logger.info("R2C mode set to PyPI") |
268 | | - assert len(source) == 1 |
269 | | - location = Path(source[0]) |
270 | | - |
271 | | - meta_loc = location / "metadata.json" |
272 | | - if meta_loc.is_file(): |
273 | | - with open(location / "metadata.json", "r") as fd: |
274 | | - pkg_metadata = json.loads(fd.read()) |
275 | | - metadata.update( |
276 | | - { |
277 | | - "package_type": pkg_metadata.get("packagetype"), |
278 | | - "package_name": pkg_metadata.get("name"), |
279 | | - "python_version": pkg_metadata.get("python_version"), |
280 | | - } |
281 | | - ) |
282 | | - source = [ |
283 | | - os.fspath(x.absolute()) |
284 | | - for x in location.iterdir() |
285 | | - if x.name != "metadata.json" |
286 | | - ] |
287 | | - else: |
288 | | - logger.info("R2C mode set to generic") |
289 | | - |
290 | | - for src in source: |
291 | | - logger.info(f"Enumerating {src} with metadata: {metadata}") |
292 | | - |
293 | | - try: |
294 | | - data = scan_uri(src, metadata=metadata) |
295 | | - |
296 | | - for loc in data: |
297 | | - for hit in loc["hits"]: |
298 | | - rhit = {"check_id": hit.pop("type"), "extra": hit} |
299 | | - if "line_no" in hit: |
300 | | - rhit["start"] = {"line": hit["line_no"]} |
301 | | - rhit["path"] = os.path.relpath(hit["location"], source[0]) |
302 | | - |
303 | | - out["results"].append(rhit) |
304 | | - |
305 | | - except Exception as exc: |
306 | | - exc_tb = sys.exc_info()[-1] |
307 | | - |
308 | | - out["errors"].append( |
309 | | - { |
310 | | - "message": f"[{exc_tb.tb_lineno}] An exception occurred: {str(exc)}", |
311 | | - "data": {"path": str(src)}, |
312 | | - } |
313 | | - ) |
| 227 | +def prefetch(*uris): |
| 228 | + with futures.ThreadPoolExecutor() as executor: |
| 229 | + for uri in uris: |
| 230 | + handler = URIHandler.from_uri(uri) |
| 231 | + executor.submit(lambda :list(handler.get_paths())) |
314 | 232 |
|
315 | | - pprint(out) |
316 | | - out_file.write(json.dumps(out, default=utils.json_encoder)) |
| 233 | + executor.shutdown(wait=True) |
0 commit comments