@@ -139,5 +139,54 @@ def serialize_ref_search_index(index):
139139 return index
140140
141141
142+ def deserialize_ref_search_index (data : dict ) -> dict :
143+ data = copy .deepcopy (data )
144+ data ["minimizers" ] = {int (k ): v for k , v in data ["minimizers" ].items ()}
145+ data ["normalization" ] = np .array (data ["normalization" ])
146+ return data
147+
148+
149+ def search_one_query (
150+ index : dict ,
151+ qry : SeqRecord ,
152+ ) -> tuple [np .ndarray , np .ndarray ]:
153+ n_refs = len (index ["references" ])
154+ minimizers = get_ref_search_minimizers (qry )
155+ hit_count = np .zeros (n_refs , dtype = np .int32 )
156+ for m in minimizers :
157+ if m in index ["minimizers" ]:
158+ hit_count [index ["minimizers" ][m ]] += 1
159+ seq_len = len (preprocess_seq (qry ))
160+ normalized_hits = index ["normalization" ] * hit_count / seq_len
161+ return normalized_hits , hit_count
162+
163+
164+ def filter_matches (
165+ normalized_hits : np .ndarray ,
166+ hit_count : np .ndarray ,
167+ min_score : float ,
168+ min_hits : int ,
169+ max_score_gap : float ,
170+ all_matches : bool ,
171+ ) -> list [tuple [int , float , int ]]:
172+ total_hits = int (np .sum (hit_count ))
173+ max_score = float (np .max (normalized_hits ))
174+ if max_score < min_score or total_hits < min_hits :
175+ return []
176+
177+ order = np .argsort (normalized_hits )[::- 1 ]
178+ matches = []
179+ for idx in order :
180+ score = float (normalized_hits [idx ])
181+ if score < min_score :
182+ break
183+ if matches and matches [- 1 ][1 ] - score > max_score_gap :
184+ break
185+ matches .append ((int (idx ), score , int (hit_count [idx ])))
186+ if not all_matches :
187+ break
188+ return matches
189+
190+
142191def to_bitstring (arr ) -> str :
143192 return "" .join ([str (x ) for x in arr ])
0 commit comments