1010#include < limits>
1111#include < vector>
1212#include < algorithm>
13+ #include < iterator>
1314#include < unordered_map>
1415#include < fstream>
1516#include < zlib.h>
@@ -356,17 +357,49 @@ namespace skch
356357 // Filter over reference axis and report the mappings
357358 if (param.filterMode == filter::ONETOONE)
358359 {
359- skch::Filter::ref::filterMappings (allReadMappings, this ->refSketch ,
360- param.numMappingsForSegment - 1
361- // (input->len < param.segLength ? param.shortSecondaryToKeep : param.secondaryToKeep)
362- );
360+ // how many secondary mappings to keep
361+ int n_mappings = param.numMappingsForSegment - 1 ;
362+
363+ // Group sequences by query prefix, then pass to ref filter
364+ auto subrange_begin = allReadMappings.begin ();
365+ auto subrange_end = allReadMappings.begin ();
366+ MappingResultsVector_t tmpMappings;
367+ MappingResultsVector_t filteredMappings;
368+
369+ while (subrange_end != allReadMappings.end ())
370+ {
371+ if (param.skip_prefix )
372+ {
373+ int currGroup = this ->getRefGroup (qmetadata[subrange_begin->querySeqId ].name );
374+ subrange_end = std::find_if_not (subrange_begin, allReadMappings.end (), [this , currGroup] (const auto & allReadMappings_candidate) {
375+ return currGroup == this ->getRefGroup (this ->qmetadata [allReadMappings_candidate.querySeqId ].name );
376+ });
377+ }
378+ else
379+ {
380+ subrange_end = allReadMappings.end ();
381+ }
382+ tmpMappings.insert (
383+ tmpMappings.end (),
384+ std::make_move_iterator (subrange_begin),
385+ std::make_move_iterator (subrange_end));
386+
387+ // tmpMappings now contains mappings from one group of query sequences to all reference groups
388+ // we now run filterByGroup, which filters based on the reference group.
389+ filterByGroup (tmpMappings, filteredMappings, n_mappings, true );
390+ tmpMappings.clear ();
391+ subrange_begin = subrange_end;
392+ }
393+ allReadMappings = std::move (filteredMappings);
363394
364395 // Re-sort mappings by input order of query sequences
365396 // This order may be needed for any post analysis of output
366- std::sort (allReadMappings.begin (), allReadMappings.end (), [](const MappingResult &a, const MappingResult &b)
367- {
368- return (a.querySeqId < b.querySeqId );
369- });
397+ std::sort (
398+ allReadMappings.begin (), allReadMappings.end (),
399+ [](const MappingResult &a, const MappingResult &b) {
400+ return std::tie (a.querySeqId , a.queryStartPos , a.refSeqId , a.refStartPos )
401+ < std::tie (b.querySeqId , b.queryStartPos , b.refSeqId , b.refStartPos );
402+ });
370403
371404 reportReadMappings (allReadMappings, " " , outstrm);
372405 }
@@ -460,17 +493,19 @@ namespace skch
460493 }
461494
462495 /* *
463- * @brief helper to main filtering function
464- * @details filters mappings by group
465- * @param[in] input unfiltered mappings
466- * @param[in] output filtered mappings
467- * @param[in] input num mappings per segment
468- * @return void
496+ * @brief helper to main filtering function
497+ * @details filters mappings by group
498+ * @param[in] input unfiltered mappings
499+ * @param[in] output filtered mappings
500+ * @param[in] n_mappings num mappings per segment
501+ * @param[in] filter_ref use Filter::ref instead of Filter::query
502+ * @return void
469503 */
470504 void filterByGroup (
471505 MappingResultsVector_t &unfilteredMappings,
472506 MappingResultsVector_t &filteredMappings,
473- int n_mappings)
507+ int n_mappings,
508+ bool filter_ref)
474509 {
475510 filteredMappings.reserve (unfilteredMappings.size ());
476511
@@ -480,6 +515,7 @@ namespace skch
480515 auto subrange_end = unfilteredMappings.begin ();
481516 if (param.filterMode == filter::MAP || param.filterMode == filter::ONETOONE)
482517 {
518+ std::vector<skch::MappingResult> tmpMappings;
483519 while (subrange_end != unfilteredMappings.end ())
484520 {
485521 if (param.skip_prefix )
@@ -493,13 +529,25 @@ namespace skch
493529 {
494530 subrange_end = unfilteredMappings.end ();
495531 }
496- // TODO why are we filtering these before merging?
497- std::vector<skch::MappingResult> tmpMappings (std::distance (subrange_begin, subrange_end));
498- std::move (subrange_begin, subrange_end, tmpMappings.begin ());
532+ tmpMappings.insert (
533+ tmpMappings.end (),
534+ std::make_move_iterator (subrange_begin),
535+ std::make_move_iterator (subrange_end));
499536 std::sort (tmpMappings.begin (), tmpMappings.end (), [](const auto & a, const auto & b)
500537 { return std::tie (a.queryStartPos , a.refSeqId , a.refStartPos ) < std::tie (b.queryStartPos , b.refSeqId , b.refStartPos ); });
501- skch::Filter::query::filterMappings (tmpMappings, n_mappings);
502- std::move (tmpMappings.begin (), tmpMappings.end (), std::back_inserter (filteredMappings));
538+ if (filter_ref)
539+ {
540+ skch::Filter::ref::filterMappings (tmpMappings, this ->refSketch , n_mappings);
541+ }
542+ else
543+ {
544+ skch::Filter::query::filterMappings (tmpMappings, n_mappings);
545+ }
546+ filteredMappings.insert (
547+ filteredMappings.end (),
548+ std::make_move_iterator (tmpMappings.begin ()),
549+ std::make_move_iterator (tmpMappings.end ()));
550+ tmpMappings.clear ();
503551 subrange_begin = subrange_end;
504552 }
505553 }
@@ -509,8 +557,6 @@ namespace skch
509557 [](const MappingResult &a, const MappingResult &b) {
510558 return std::tie (a.queryStartPos , a.refSeqId , a.refStartPos )
511559 < std::tie (b.queryStartPos , b.refSeqId , b.refStartPos );
512- // return std::tie(a.refSeqId, a.refStartPos, a.queryStartPos)
513- // < std::tie(b.refSeqId, b.refStartPos, b.queryStartPos);
514560 });
515561 }
516562
@@ -644,10 +690,10 @@ namespace skch
644690 }
645691
646692 if (param.filterMode == filter::MAP || param.filterMode == filter::ONETOONE) {
647- MappingResultsVector_t tempMappings ;
648- tempMappings .reserve (output->readMappings .size ());
649- filterByGroup (unfilteredMappings, tempMappings , n_mappings);
650- std::swap (tempMappings, unfilteredMappings );
693+ MappingResultsVector_t tmpMappings ;
694+ tmpMappings .reserve (output->readMappings .size ());
695+ filterByGroup (unfilteredMappings, tmpMappings , n_mappings, false );
696+ unfilteredMappings = std::move (tmpMappings );
651697 }
652698
653699 std::swap (output->readMappings , unfilteredMappings);
0 commit comments