Skip to content

Commit 21b7450

Browse files
committed
Handle paths with pathlib.Path #2402
* Remove unnecessary tests Signed-off-by: Jono Yang <jyang@nexb.com>
1 parent 7e6317f commit 21b7450

File tree

2 files changed

+11
-33
lines changed

2 files changed

+11
-33
lines changed

src/textcode/gibberish.py

Lines changed: 11 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -11,24 +11,24 @@
1111

1212
import math
1313
import pickle
14-
import os
14+
from pathlib import Path
1515

16-
data_dir = os.path.dirname(os.path.abspath(__file__)) + '/data/gibberish/'
17-
model_path = data_dir + 'gib_model.pki'
16+
data_dir = Path(__file__).parent / 'data' / 'gibberish'
17+
model_path = data_dir / 'gib_model.pki'
18+
big_file_path = data_dir / 'big.txt'
19+
good_file_path = data_dir / 'good.txt'
20+
bad_file_path = data_dir / 'bad.txt'
1821

1922
accepted_chars = 'abcdefghijklmnopqrstuvwxyz0123456789- '
2023
pos = dict([(char, idx) for idx, char in enumerate(accepted_chars)])
2124

2225

2326
class Gibberish(object):
2427
def __init__(self):
25-
self.train_if_necessary()
26-
27-
def train_if_necessary(self):
28-
if not os.path.isfile(model_path):
29-
self.train()
30-
else:
28+
if model_path.exists():
3129
self.load_persisted_model()
30+
else:
31+
self.train()
3232

3333
def persist_model(self):
3434
with open(model_path, 'wb') as f:
@@ -62,8 +62,8 @@ def avg_transition_prob(self, l, log_prob_mat):
6262
# The exponentiation translates from log probs to probs.
6363
return math.exp(log_prob / (transition_ct or 1))
6464

65-
def train(self, bigfile=data_dir + 'big.txt', goodfile=data_dir + 'good.txt',
66-
badfile=data_dir + 'bad.txt'):
65+
def train(self, bigfile=big_file_path, goodfile=good_file_path,
66+
badfile=bad_file_path):
6767
""" Write a simple model as a pickle file """
6868
k = len(accepted_chars)
6969
# Assume we have seen 10 of each character pair. This acts as a kind of
@@ -103,9 +103,7 @@ def train(self, bigfile=data_dir + 'big.txt', goodfile=data_dir + 'good.txt',
103103
self.persist_model()
104104

105105
def detect_gibberish(self, text):
106-
107106
text = ''.join(self.normalize(text))
108-
109107
return self.avg_transition_prob(text, self.mat) < self.thresh
110108

111109
def percent_gibberish(self, text):

tests/cluecode/test_copyrights_basic.py

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -249,26 +249,6 @@ def test_is_candidate_should_not_select_line_with_junk_hex(self):
249249
line = prepare_text_line(line)
250250
assert not copyrights.is_candidate(line)
251251

252-
def test_is_candidate_should_select_line_with_a_trailing_years(self):
253-
line = '01061C3F5280CD4AC504152B81E452BD820154 2014\n'
254-
line = prepare_text_line(line)
255-
assert copyrights.is_candidate(line)
256-
257-
def test_is_candidate_should_select_line_with_proper_years(self):
258-
line = '01061C3F5280CD4AC504152B81E452BD820154 2014-'
259-
line = prepare_text_line(line)
260-
assert copyrights.is_candidate(line)
261-
262-
def test_is_candidate_should_select_line_with_proper_years2(self):
263-
line = '01061C3F5280CD4,2016 152B81E452BD820154'
264-
line = prepare_text_line(line)
265-
assert copyrights.is_candidate(line)
266-
267-
def test_is_candidate_should_select_line_with_dashed_year(self):
268-
line = 'pub 1024D/CCD6F801 2006-11-15'
269-
line = prepare_text_line(line)
270-
assert copyrights.is_candidate(line)
271-
272252
def test_is_candidate_should_select_line_with_iso_date_year(self):
273253
line = 'sig 3 ccd6f801 2006-11-15 nathan mittler <nathan.mittler@gmail.com>'
274254
line = prepare_text_line(line)

0 commit comments

Comments
 (0)