Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion cpp/Averager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,17 @@ void Averager::find_ERs(double threshold, int min_length)
unsigned int i = next_index++;
if (i >= chroms.size()) break;
const std::string& chrom = chroms[i];
const std::vector<BedGraphRow>& intervals = mean_intervals[chrom];
// Read-only lookup: operator[] would silently insert a default
// entry under concurrent access from worker threads, which is
// undefined behavior on a shared map.
const auto intervals_it = mean_intervals.find(chrom);
if (intervals_it == mean_intervals.end())
{
std::cerr << "[ERROR] Missing mean_intervals entry for chromosome: "
<< chrom << std::endl;
continue;
}
const std::vector<BedGraphRow>& intervals = intervals_it->second;

std::vector<BedGraphRow> chrom_ers;

Expand Down
25 changes: 15 additions & 10 deletions cpp/Integrator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -154,29 +154,34 @@ void Integrator::stitch_one_strand(const std::string& chrom,

void Integrator::stitch_up(std::unordered_map<std::string, std::vector<BedGraphRow>>& expressed_regions, const std::unordered_map<std::string, std::vector<uint32_t>>& mm_chrom_sj, const std::vector<SJRow>& rr_all_sj)
{
// Strand-aware stitching. For each chromosome:
// Strand-aware stitching. For each chromosome with at least one ER:
// 1. Bucket SJs by strand (SJRow.strand: true -> '+', false -> '-').
// A chromosome with no SJs at all skips both strand passes and
// every ER is emitted as a single-ER StitchedER with strand '.'.
// 2. Run stitch_one_strand for each non-empty bucket. The shared
// consumed_indices set ensures an ER can be in at most one chain.
// 3. Emit any ER not pulled into a chain as a single-ER StitchedER
// with strand '.'.
// 4. Sort the chromosome's StitchedERs by start so write_to_gtf reads
// them in genomic order even when chains came from different passes.
for (const auto& sjs : mm_chrom_sj)
for (const auto& chrom_ers : expressed_regions)
{
const std::string& chrom = sjs.first;
if (expressed_regions.find(chrom) == expressed_regions.end()) continue;
const auto& ers = expressed_regions.at(chrom);
const std::string& chrom = chrom_ers.first;
const auto& ers = chrom_ers.second;
if (ers.empty()) continue;

std::vector<uint32_t> plus_sjs;
std::vector<uint32_t> minus_sjs;
plus_sjs.reserve(sjs.second.size());
minus_sjs.reserve(sjs.second.size());
for (uint32_t sj_id : sjs.second)
const auto sjs_it = mm_chrom_sj.find(chrom);
if (sjs_it != mm_chrom_sj.end())
{
if (sj_id == 0 || sj_id - 1 >= rr_all_sj.size()) continue;
(rr_all_sj[sj_id - 1].strand ? plus_sjs : minus_sjs).emplace_back(sj_id);
plus_sjs.reserve(sjs_it->second.size());
minus_sjs.reserve(sjs_it->second.size());
for (uint32_t sj_id : sjs_it->second)
{
if (sj_id == 0 || sj_id - 1 >= rr_all_sj.size()) continue;
(rr_all_sj[sj_id - 1].strand ? plus_sjs : minus_sjs).emplace_back(sj_id);
}
}

std::unordered_set<int> consumed;
Expand Down
24 changes: 24 additions & 0 deletions tests/UnitTests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -898,6 +898,30 @@ TEST(Integrator, StitchUpEachERAppearsExactlyOnceWithBothStrands)
EXPECT_EQ(unstranded_count, 1);
}

// A chromosome with expressed regions but no entry in mm_chrom_sj must still
// have its ERs emitted as single-ER unstranded StitchedERs. The previous
// implementation iterated only over mm_chrom_sj keys and silently dropped
// ERs from chromosomes with no junctions.
TEST(Integrator, StitchUpEmitsERsOnChromosomesWithNoSJs)
{
std::vector<SJRow> rr_all_sj = {};
std::unordered_map<std::string, std::vector<BedGraphRow>> expressed_regions;
expressed_regions["chr1"] = {
BedGraphRow("chr1", 10000, 10500, 100.0),
BedGraphRow("chr1", 11000, 12500, 101.0),
};
std::unordered_map<std::string, std::vector<uint32_t>> mm_chrom_sj;

Integrator integrator(0.1, 5);
integrator.stitch_up(expressed_regions, mm_chrom_sj, rr_all_sj);

ASSERT_EQ(integrator.stitched_ERs.size(), 2u);
for (const auto& ser : integrator.stitched_ERs) {
EXPECT_EQ(ser.strand, '.');
EXPECT_EQ(ser.er_ids.size(), 1u);
}
}

// GTFRow must propagate the StitchedER strand into column 7 of the output
// row. Without this, gffcompare can never match transcripts or exons since
// it requires strand agreement at those levels.
Expand Down
Loading