Skip to content

Commit 4dc7d03

Browse files
committed
refactor the aura update to work with the new dataset release process
update docs with env config options related to dataset add integration status to the `aura info`
1 parent d621ba6 commit 4dc7d03

File tree

7 files changed

+87
-59
lines changed

7 files changed

+87
-59
lines changed

aura/config.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@
2222
except ImportError:
2323
import json
2424

25+
from .exceptions import InvalidConfiguration
26+
2527

2628
CFG: Optional[dict] = None
2729
CFG_PATH = None
@@ -168,7 +170,7 @@ def get_file_location(location: str, base_path: Optional[str]=None) -> str:
168170
return str(pth)
169171

170172
# TODO: use custom exception here so we can log as fatal and sys.exit(1)
171-
raise ValueError(f"Can't find configuration file `{location}` using base path `{base_path}`")
173+
raise InvalidConfiguration(f"Can't find configuration file `{location}` using base path `{base_path}`")
172174

173175

174176
def get_file_content(location: str, base_path: Optional[str]=None) -> str:
@@ -242,6 +244,11 @@ def get_pypi_stats_path() -> Path:
242244
return Path(get_file_location(pth, CFG_PATH))
243245

244246

247+
def get_reverse_dependencies_path() -> Path:
248+
pth = os.environ.get("AURA_REVERSE_DEPENDENCIES", None) or CFG["aura"]["reverse_dependencies"]
249+
return Path(get_file_location(pth, CFG_PATH))
250+
251+
245252
def iter_pypi_stats() -> Generator[dict, None, None]:
246253
pth = get_pypi_stats_path()
247254
with pth.open() as fd:

aura/data/aura_config.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ aura: &aura_config
2020
# This file is needed for typosquatting detections
2121
pypi_stats: &pypi_stats pypi_stats.json
2222

23+
reverse_dependencies: &reverse_dependencies reverse_dependencies.json
24+
2325
# Threshold for package download after which the package is considered not legitimate
2426
pypi_download_threshold: 10000
2527

aura/exceptions.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,10 @@ class NoSuchRepository(InvalidLocation):
2525
pass
2626

2727

28+
class InvalidConfiguration(AuraException, ValueError):
29+
pass
30+
31+
2832
class ASTNodeRewrite(AuraException):
2933
pass
3034

aura/info.py

Lines changed: 31 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
jsonschema = None
1414

1515
from . import __version__ as version
16+
from .exceptions import InvalidConfiguration
1617
from .uri_handlers.base import URIHandler
1718
from . import plugins
1819
from . import config
@@ -27,16 +28,35 @@ def get_analyzer_description(analyzer) -> str:
2728

2829

2930
def check_pypi_stats() -> dict:
30-
if config.get_pypi_stats_path(): # Put into try except
31-
return {
32-
"enabled": True,
33-
"description": "PyPI typosquatting protection enabled"
34-
}
35-
else:
36-
return {
37-
"enabled": False,
38-
"description": "PyPI download stats not found, typosquatting protection is disabled. Run `aura fetch-pypi-stats` to download"
39-
}
31+
try:
32+
if config.get_pypi_stats_path(): # Put into try except
33+
return {
34+
"enabled": True,
35+
"description": "PyPI typosquatting protection enabled"
36+
}
37+
except InvalidConfiguration:
38+
pass
39+
40+
return {
41+
"enabled": False,
42+
"description": "PyPI download stats not found, typosquatting protection is disabled. Run `aura update` to download"
43+
}
44+
45+
46+
def check_reverse_dependencies() -> dict:
47+
try:
48+
if config.get_reverse_dependencies_path():
49+
return {
50+
"enabled": True,
51+
"description": "Reverse dependencies dataset present. Package scoring feature is fully enabled"
52+
}
53+
except InvalidConfiguration:
54+
pass
55+
56+
return {
57+
"enabled": False,
58+
"description": "Reverse dependencies dataset not found, package scoring may be affected. Run `aura update` to download"
59+
}
4060

4161

4262
def check_git() -> dict:
@@ -90,7 +110,6 @@ def gather_aura_information() -> dict:
90110
}
91111

92112
for k, v in analyzers["disabled"]:
93-
#doc = get_analyzer_description(v)
94113
info["analyzers"][k] = {
95114
"enabled": False,
96115
"description": v
@@ -105,6 +124,7 @@ def gather_aura_information() -> dict:
105124
info["uri_handlers"][k] = {"enabled": True}
106125

107126
info["integrations"]["pypi_stats"] = check_pypi_stats()
127+
info["integrations"]["reverse_dependencies"] = check_reverse_dependencies()
108128
info["integrations"]["git"] = check_git()
109129

110130
return info

aura/output/text.py

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,10 @@
5252
}
5353

5454

55+
OK = '\u2713'
56+
NOK = '\u2717'
57+
58+
5559

5660
class PrettyReport:
5761
ANSI_RE = re.compile(r"""
@@ -311,10 +315,14 @@ class TextInfoOutput(InfoOutputBase):
311315
def protocol(cls) -> str:
312316
return "text"
313317

314-
def output_info_data(self, data):
315-
OK = '\u2713'
316-
NOK = '\u2717'
318+
def get_feature_status(self, fmt, name, status):
319+
enabled = status.get("enabled", True)
320+
mark = OK if enabled else NOK
321+
description = status.get("description", "Description N/A")
322+
s = {"fg": ("bright_green" if enabled else "bright_red")}
323+
return style(fmt.format(mark=mark, status=status, name=name, enabled=enabled, description=description), **s)
317324

325+
def output_info_data(self, data):
318326
out = PrettyReport()
319327

320328
# Left hand side of the table contains logo and basic project information
@@ -343,22 +351,15 @@ def output_info_data(self, data):
343351
rhs_lines.append("Installed analyzers:")
344352

345353
for name, i in data["analyzers"].items():
346-
if i["enabled"]:
347-
mark = OK
348-
s = {"fg": "bright_green"}
349-
else:
350-
mark = NOK
351-
s = {"fg": "bright_red"}
354+
rhs_lines.append(self.get_feature_status(fmt=" {mark} {name}: {description}", name=name, status=i))
352355

353-
rhs_lines.append(style(f" {mark} {name}: {i['description']}", **s))
356+
rhs_lines.append("Integrations:")
357+
for name, i in data["integrations"].items():
358+
rhs_lines.append(self.get_feature_status(fmt=" {mark} {name}: {description}", name=name, status=i))
354359

355360
rhs_lines.append("Installed URI handlers:")
356361
for name, i in data["uri_handlers"].items():
357-
enabled = i.get("enabled", True)
358-
mark = OK if enabled else NOK
359-
s = {"fg": ("bright_green" if enabled else "bright_red") }
360-
361-
rhs_lines.append(style(f" {mark} `{name}://` - {i.get('description', 'Description N/A')}", **s))
362+
rhs_lines.append(self.get_feature_status(fmt=" {mark} `{name}://` - {description}", name=name, status=i))
362363

363364
rhs_size = max(len(x) for x in rhs_lines)
364365

aura/update.py

Lines changed: 13 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,15 @@
11
import os
22
import shutil
3+
import tarfile
4+
import tempfile
35
from pathlib import Path
46

57
from click import secho
68

79
from . import utils
810

911

10-
STATS_CDN_URL = "https://cdn.sourcecode.ai/aura/pypi_stats.json"
11-
REVERSE_CDN_URL = "https://cdn.sourcecode.ai/aura/reverse_dependencies.json"
12+
DATASET_CDN_URL = "https://cdn.sourcecode.ai/aura/aura_dataset.tgz"
1213

1314

1415
def backup_file(file_path):
@@ -20,28 +21,19 @@ def backup_file(file_path):
2021
shutil.copyfile(file_path, f"{file_path}.bak")
2122

2223

23-
def update_pypi_stats(outfile=None):
24-
if outfile is None:
25-
outfile = Path("pypi_stats.json")
24+
def update_dataset():
25+
cwd = Path.cwd()
2626

27-
backup_file(outfile)
28-
secho("Downloading latest pypi download stats dataset")
27+
secho("Downloading latest aura dataset files")
28+
with tempfile.NamedTemporaryFile(prefix="aura_dataset_update_", suffix=".tgz", mode="wb") as fd:
29+
utils.download_file(DATASET_CDN_URL, fd)
2930

30-
fd = outfile.open("wb")
31-
utils.download_file(STATS_CDN_URL, fd)
31+
archive = tarfile.open(fd.name, "r:*")
3232

33-
34-
def update_reverse_dependencies(outfile=None):
35-
if outfile is None:
36-
outfile = Path("reverse_dependencies.json")
37-
38-
backup_file(outfile)
39-
secho("Downloading latest reverse PyPI dependencies dataset")
40-
41-
fd = outfile.open("wb")
42-
utils.download_file(REVERSE_CDN_URL, fd)
33+
for f in ("reverse_dependencies.json", "pypi_stats.json"):
34+
backup_file(cwd/f)
35+
archive.extract(f, path=cwd)
4336

4437

4538
def update_all():
46-
update_pypi_stats()
47-
update_reverse_dependencies()
39+
update_dataset()

docs/source/configuration.rst

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -115,14 +115,16 @@ Environment config options
115115

116116
The following environment variable configuration options can be used to configure the aura behaviour:
117117

118-
======================= =============================================================
119-
Environment variable Explanation
120-
======================= =============================================================
121-
AURA_CFG Overwrite the path to the main configuration file
122-
AURA_SIGNATURES Overwrite the path to the configuration file for signatures/patterns
123-
AURA_MIRROR_PATH Location to the local pypi mirror repository
124-
AURA_LOG_LEVEL Output log level
125-
AURA_NO_BLOBS Disable extraction of data blobs for further analysis
126-
AURA_NO_PROGRESS Disable cli progress bar, useful when redirecting stderr and stdout
127-
AURA_DEBUG_LINES List of line numbers separated by ``,``. Aura will then call ``breakpoint()`` when traversing AST tree and it visits a node located on those specific line numbers
128-
======================= =============================================================
118+
=========================== =============================================================
119+
Environment variable Explanation
120+
=========================== =============================================================
121+
AURA_CFG Overwrite the path to the main configuration file
122+
AURA_SIGNATURES Overwrite the path to the configuration file for signatures/patterns
123+
AURA_MIRROR_PATH Location to the local pypi mirror repository
124+
AURA_PYPI_STATS Overwrite the path to the aura `pypi_stats` dataset
125+
AURA_REVERSE_DEPENDENCIES Overwrite the path to the aura `reverse_dependencies` dataset
126+
AURA_LOG_LEVEL Output log level
127+
AURA_NO_BLOBS Disable extraction of data blobs for further analysis
128+
AURA_NO_PROGRESS Disable cli progress bar, useful when redirecting stderr and stdout
129+
AURA_DEBUG_LINES List of line numbers separated by ``,``. Aura will then call ``breakpoint()`` when traversing AST tree and it visits a node located on those specific line numbers
130+
=========================== =============================================================

0 commit comments

Comments
 (0)