Skip to content

Commit 5d0f4b5

Browse files
Merge branch 'feat/multiref' into fix/cva16-build-files
2 parents d60f549 + 33c14bd commit 5d0f4b5

2 files changed

Lines changed: 367 additions & 89 deletions

File tree

scripts/lib/minimizer.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,5 +139,54 @@ def serialize_ref_search_index(index):
139139
return index
140140

141141

142+
def deserialize_ref_search_index(data: dict) -> dict:
143+
data = copy.deepcopy(data)
144+
data["minimizers"] = {int(k): v for k, v in data["minimizers"].items()}
145+
data["normalization"] = np.array(data["normalization"])
146+
return data
147+
148+
149+
def search_one_query(
150+
index: dict,
151+
qry: SeqRecord,
152+
) -> tuple[np.ndarray, np.ndarray]:
153+
n_refs = len(index["references"])
154+
minimizers = get_ref_search_minimizers(qry)
155+
hit_count = np.zeros(n_refs, dtype=np.int32)
156+
for m in minimizers:
157+
if m in index["minimizers"]:
158+
hit_count[index["minimizers"][m]] += 1
159+
seq_len = len(preprocess_seq(qry))
160+
normalized_hits = index["normalization"] * hit_count / seq_len
161+
return normalized_hits, hit_count
162+
163+
164+
def filter_matches(
165+
normalized_hits: np.ndarray,
166+
hit_count: np.ndarray,
167+
min_score: float,
168+
min_hits: int,
169+
max_score_gap: float,
170+
all_matches: bool,
171+
) -> list[tuple[int, float, int]]:
172+
total_hits = int(np.sum(hit_count))
173+
max_score = float(np.max(normalized_hits))
174+
if max_score < min_score or total_hits < min_hits:
175+
return []
176+
177+
order = np.argsort(normalized_hits)[::-1]
178+
matches = []
179+
for idx in order:
180+
score = float(normalized_hits[idx])
181+
if score < min_score:
182+
break
183+
if matches and matches[-1][1] - score > max_score_gap:
184+
break
185+
matches.append((int(idx), score, int(hit_count[idx])))
186+
if not all_matches:
187+
break
188+
return matches
189+
190+
142191
def to_bitstring(arr) -> str:
143192
return "".join([str(x) for x in arr])

0 commit comments

Comments
 (0)