Skip to content

Commit ff68a5d

Browse files
committed
-refactor typosquatting for better text output
- various bugfixes and code enhancements
1 parent 8f2339d commit ff68a5d

File tree

13 files changed

+383
-160
lines changed

13 files changed

+383
-160
lines changed

aura/analyzers/data_finder.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,11 +32,10 @@ def __init__(self, *args, **kwargs):
3232

3333
self._no_blobs = False
3434

35-
if "AURA_NO_BLOB" in os.environ:
35+
if "AURA_NO_BLOBS" in os.environ:
3636
self._no_blobs = True
3737

3838
self.__min_blob_size = self.get_min_size()
39-
pass
4039

4140
@classmethod
4241
def get_min_size(cls) -> int:

aura/cache.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,12 @@ def proxy_url(cls, *, url, fd, cache_id=None):
4343
return utils.download_file(url, fd=fd)
4444

4545
if cache_id is None:
46-
cache_id = hashlib.md5(url).hexdigest()
46+
if isinstance(url, str):
47+
burl = url.encode()
48+
else:
49+
burl = url
50+
51+
cache_id = hashlib.md5(burl).hexdigest()
4752

4853
cache_id = f"url_{cache_id}"
4954
cache_pth: Path = cls.get_location()/cache_id

aura/cli.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -223,12 +223,13 @@ def update_aura():
223223
@click.option("-o", "--out", default="-", type=click.File("w"))
224224
@click.option("-m", "--max-distance", default=2, type=click.IntRange(min=0, max=10))
225225
@click.option("-l", "--limit", default=100, type=click.INT)
226-
def find_typosquatting(out, max_distance, limit=100):
226+
@click.argument("pkg", nargs=-1)
227+
def find_typosquatting(out, max_distance, limit=100, pkg=None):
227228
if limit <= 0:
228229
click.secho("Invalid value for limit", file=sys.stderr)
229230
sys.exit(1)
230231

231-
commands.generate_typosquatting(out=out, distance=max_distance, limit=limit)
232+
commands.generate_typosquatting(out=out, distance=max_distance, limit=limit, pkgs=pkg)
232233

233234

234235
@cli.command()

aura/commands.py

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -214,13 +214,28 @@ def show_info():
214214
formatter.output_info_data(info_data)
215215

216216

217-
def generate_typosquatting(out, distance=2, limit=None):
217+
def generate_typosquatting(out, distance=2, limit=None, pkgs=None):
218+
from .output.text import PrettyReport
219+
220+
p = PrettyReport() # TODO: convert into plugin system
221+
222+
if not pkgs:
223+
pkgs = typos.get_popular_packages()
224+
218225
f = partial(typos.damerau_levenshtein, max_distance=distance)
219-
for num, (x, y) in enumerate(typos.enumerator(typos.generate_popular(), f)):
220-
if limit and num >= limit:
226+
combinations = typos.generate_combinations(left=pkgs)
227+
228+
for idx, data in enumerate(typos.enumerator(combinations, f)):
229+
if limit and idx >= limit:
221230
break
222231

223-
out.write(json.dumps({"original": x, "typosquatting": y}) + "\n")
232+
try:
233+
diff_table = data["orig_pkg"]._cmp_info(data["typo_pkg"])
234+
t1 = data["orig_score"].get_score_table()
235+
t2 = data["typo_score"].get_score_table()
236+
p.print_tables(t1, t2, diff_table)
237+
except exceptions.NoSuchPackage:
238+
continue
224239

225240

226241
def prefetch(*uris):

aura/diff.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
import tempfile
99
import shutil
1010
import pprint
11-
from typing import Union, Optional, List
11+
from typing import Union, Optional, List, Tuple
1212
from pathlib import Path
1313
from collections import defaultdict
1414
from dataclasses import dataclass
@@ -358,6 +358,8 @@ def pprint(self):
358358

359359

360360
class DiffDetections:
361+
ANALYSIS_CACHE = {}
362+
361363
def __init__(self, file_diffs: List[Diff], a_location: ScanLocation, b_location: ScanLocation):
362364
self.file_diffs: List[Diff] = file_diffs
363365
self.a_loc: ScanLocation = a_location
@@ -384,9 +386,12 @@ def __init__(self, file_diffs: List[Diff], a_location: ScanLocation, b_location:
384386
b_detections = b_pairs.get(d.b_ref, [])
385387
d.add_detections(a_detections, b_detections)
386388

387-
def scan_location(self, location):
388-
sandbox = Analyzer(location=location)
389-
return tuple(sandbox.run())
389+
def scan_location(self, location) -> Tuple[Detection]:
390+
if location.location not in self.ANALYSIS_CACHE:
391+
sandbox = Analyzer(location=location)
392+
detections = tuple(sandbox.run())
393+
self.ANALYSIS_CACHE[location.location] = detections
394+
return self.ANALYSIS_CACHE[location.location]
390395

391396
def pair_hits(self, diff_refs, hits):
392397
orphans = []

aura/output/table.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
from click import style
2+
13
from dataclasses import dataclass, field
24
from typing import Any, List, Iterable, Optional
35

@@ -33,6 +35,10 @@ def style(self) -> dict:
3335

3436
return s
3537

38+
@property
39+
def pretty(self) -> str:
40+
return style(str(self), **self.style)
41+
3642
def asdict(self) -> dict:
3743
data = {
3844
"value": self.value
@@ -81,6 +87,11 @@ def __iadd__(self, other):
8187
def __iter__(self):
8288
yield from self.rows
8389

90+
@property
91+
def width(self) -> int:
92+
title_width = len(self.metadata.get("title", ""))
93+
return max(sum(self.col_len)+3, title_width)
94+
8495
def asdict(self) -> dict:
8596
d = {"rows": [
8697
[c.asdict() for c in row] for row in self.rows
@@ -89,3 +100,27 @@ def asdict(self) -> dict:
89100
d["metadata"] = self.metadata
90101

91102
return d
103+
104+
def pprint(self, preport=None):
105+
from .text import PrettyReport
106+
107+
if preport is None:
108+
preport = PrettyReport()
109+
110+
preport.print_tables(self, self)
111+
112+
preport.print_top_separator()
113+
114+
if self.metadata.get("title"):
115+
preport.print_heading(self.metadata["title"])
116+
117+
for row in self:
118+
cols = []
119+
for idx, col in enumerate(row):
120+
text = preport._align_text(style(str(col), **col.style), width=self.col_len[idx])
121+
cols.append(text)
122+
123+
preport.align(" \u2502 ".join(cols))
124+
125+
preport.print_bottom_separator()
126+

aura/output/text.py

Lines changed: 62 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import re
22
import os
33
import sys
4+
import itertools
45
from shutil import get_terminal_size
56
from dataclasses import dataclass
67
from textwrap import wrap
@@ -69,40 +70,54 @@ class PrettyReport:
6970
""", re.VERBOSE)
7071

7172
def __init__(self, fd=None):
72-
width = config.get_settings("aura.text-output-width", "auto")
7373
self.term_width = get_terminal_size(fallback=(120, 24))[0]
7474

75-
if width == "auto":
76-
self.width = self.term_width
75+
if "AURA_TERM_WIDTH" in os.environ:
76+
self.width = int(os.environ["AURA_TERM_WIDTH"])
7777
else:
78-
self.width = int(width or 120)
78+
width = config.get_settings("aura.text-output-width", "auto")
79+
if width == "auto":
80+
self.width = self.term_width
81+
else:
82+
self.width = int(width or 120)
7983

8084
self.fd = fd
8185

8286
@classmethod
8387
def ansi_length(cls, line:str):
8488
return len(cls.ANSI_RE.sub("", line))
8589

86-
def print_separator(self, sep="\u2504", left="\u251C", right="\u2524"):
87-
secho(f"{left}{sep*(self.width-2)}{right}", file=self.fd, color=TTY_COLORS)
90+
def print_separator(self, sep="\u2504", left="\u251C", right="\u2524", width=None):
91+
if width is None:
92+
width = self.width
93+
94+
secho(f"{left}{sep*(width-2)}{right}", file=self.fd, color=TTY_COLORS)
8895

8996
def print_thick_separator(self):
9097
self.print_separator(left="\u255E", sep="\u2550", right="\u2561")
9198

92-
def print_top_separator(self):
93-
self.print_separator(left="\u2552", sep="\u2550", right="\u2555")
99+
def print_top_separator(self, **kwargs):
100+
self.print_separator(left="\u2552", sep="\u2550", right="\u2555", **kwargs)
94101

95-
def print_bottom_separator(self):
96-
self.print_separator(left="\u2558", sep="\u2550", right="\u255B")
102+
def print_bottom_separator(self, **kwargs):
103+
self.print_separator(left="\u2558", sep="\u2550", right="\u255B", **kwargs)
104+
105+
def generate_heading(self, text, left="\u251C", right="\u2524", infill="\u2591", width=None):
106+
if width is None:
107+
width = self.width - len(left) - len(right) - 2
97108

98-
def print_heading(self, text, left="\u251C", right="\u2524", infill="\u2591"):
99109
text_len = self.ansi_length(text)
100-
ljust = (self.width-4-text_len)//2
101-
rjust = self.width-4-text_len-ljust
102-
secho(f"{left}{infill*ljust} {text} {infill*rjust}{right}", file=self.fd, color=TTY_COLORS)
110+
ljust = (width - text_len) // 2
111+
rjust = width - text_len - ljust
112+
return f"{left}{infill*ljust} {text} {infill*rjust}{right}"
113+
114+
def print_heading(self, *args, **kwargs):
115+
secho(self.generate_heading(*args, **kwargs), file=self.fd, color=TTY_COLORS)
103116

104-
def align(self, line, pos=-1, left="\u2502 ", right=" \u2502"):
105-
line = self._align_text(line, self.width - len(left) - len(right), pos=pos)
117+
def align(self, line, pos=-1, left="\u2502 ", right=" \u2502", width=None):
118+
if width is None:
119+
width = self.width
120+
line = self._align_text(line, width - len(left) - len(right), pos=pos)
106121
secho(f"{left}{line}{right}", file=self.fd, color=TTY_COLORS)
107122

108123
def wrap(self, text, left="\u2502 ", right=" \u2502"):
@@ -136,6 +151,34 @@ def _align_text(self, text, width, pos=-1):
136151
else:
137152
return " " * (remaining_len - content_len) + text
138153

154+
def print_tables(self, *tables):
155+
table_widths = [t.width+2 for t in tables]
156+
157+
self.print_top_separator()
158+
159+
titles = [t.metadata.get("title", "N/A") for t in tables]
160+
tparts = [tuple(self.generate_heading(title, width=w, left="", right="") for w, title in zip(table_widths, titles))]
161+
162+
for idx, rows in enumerate(itertools.zip_longest(*tables, fillvalue="")):
163+
164+
full_row = []
165+
166+
for ridx, row in enumerate(rows):
167+
text = " \u2506 ".join(self._align_text(c.pretty, width=tables[ridx].col_len[cidx]) for cidx, c in enumerate(row))
168+
text = self._align_text(text, width=table_widths[ridx]+2)
169+
170+
full_row.append(text)
171+
172+
tparts.append(full_row)
173+
174+
for idx, tpart in enumerate(tparts):
175+
self.align(" \u2551 ".join(tpart))
176+
if idx == 0:
177+
self.print_thick_separator()
178+
179+
self.print_bottom_separator()
180+
181+
139182

140183
@dataclass()
141184
class TextBase:
@@ -224,7 +267,7 @@ def imports_to_tree(self, items: list) -> dict:
224267

225268
return root
226269

227-
def output_table(self, table):
270+
def output_table(self, table: Table):
228271
out = PrettyReport(fd=self._fd)
229272
out.print_top_separator()
230273

@@ -405,8 +448,8 @@ def __exit__(self, exc_type, exc_val, exc_tb):
405448
def output_diff(self, diff_analyzer):
406449
out = PrettyReport(fd=self._fd)
407450

408-
for table in diff_analyzer.tables:
409-
self.output_table(table)
451+
if diff_analyzer.tables:
452+
out.print_tables(*diff_analyzer.tables)
410453

411454
for diff in self.filtered(diff_analyzer.diffs):
412455
out.print_separator(left="\u2552", sep="\u2550", right="\u2555")

0 commit comments

Comments
 (0)