diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..4e74f0d --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,40 @@ +# Changelog + +All notable changes to fastder are recorded in this file. + +## [0.1.0] - 2026-05-19 + +### Fixed + +- GTF output is now 1-based. fastder's internal coordinates are 0-based + half-open (the BedGraph and BigWig convention), but the GTF writer emitted + the start column without converting it, so every gene, transcript and exon + start was one base too low. The start is now shifted by one on output. The + end needs no shift, because a 0-based half-open end equals the 1-based + inclusive end of the same interval. +- RR splice junction coordinates are converted to 0-based on read. The RR + file carries 1-based inclusive intron coordinates (the STAR SJ.out.tab + convention), but fastder works in 0-based half-open coordinates like the + coverage. The mismatch left junction-snapped exon ends one base too long. + The RR parser now shifts the junction start by one on read, and rejects a + junction start of 0, which is malformed under the 1-based convention, + rather than letting the unsigned subtraction wrap. + +### Changed + +- Stitching is no longer gated on coverage similarity. The default + `--coverage-tolerance` is raised to 1000, which makes the gate effectively + off. A splice junction is the evidence that two regions belong to one + transcript, and exon coverage varies widely within a transcript, so the + previous default (0.8) wrongly rejected legitimate stitches. The flag is + kept as a knob. +- Exon boundaries are snapped to splice junctions. When a stitched chain + crosses a junction, the exon edges on either side are set to the junction's + donor and acceptor coordinates instead of the coverage-derived expressed- + region edges. Edges with no adjacent junction, namely the outer ends of a + chain and both ends of a monoexonic region, keep the coverage extent. + +## [Legacy] + +Baseline forked from the upstream fastder repository, which carried no tagged +release. Versioned history begins at 0.1.0 above. diff --git a/CMakeLists.txt b/CMakeLists.txt index c203de8..79cd70b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -58,8 +58,8 @@ endif() set(CPACK_PACKAGE_NAME "fastder") set(CPACK_PACKAGE_VENDOR "Martina Lehmann") set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "fastder: A Fast, Agnostic, Splice-Junction-Based Tool for the Detection of Expressed Regions") -set(CPACK_PACKAGE_VERSION "0.0.1") -set(CPACK_PACKAGE_CONTACT "martina.lavanya@gmail.com") +set(CPACK_PACKAGE_VERSION "0.1.0") +set(CPACK_PACKAGE_CONTACT "martina.lavanya@gmail.com, izaskun.mallona.work@gmail.com") # Debian-specific packaging settings set(CPACK_DEBIAN_PACKAGE_MAINTAINER "Martina Lehmann") diff --git a/README.md b/README.md index 854d6e8..eaa0925 100644 --- a/README.md +++ b/README.md @@ -91,8 +91,7 @@ that the tool will only output expressed regions on chromosome 1, and will ignor - `--min-length 5` describes the minimum length (in bp) that an ER must have. For instance, three consecutive base pairs with coverage > 0.25 CPM will be ignored if the min length is set to 5 bp. - `--position-tolerance 5` describes the maximum permitted offset of the end position of an exon and the starting position of a splice junction. If this tolerance is set to 5, an ER with end position = 1000 bp and a splice junction with start position = 1005 bp will be stitched together (if the coverage and end junction match). - - `--coverage-tolerance 0.1` describes the maximum permitted coverage deviation between two ER that are separated by a spliced region. For a coverage tolerance of 0.1, - two ERs with coverage = 10 CPM and 11 CPM will be stitched together (if there is a matching splice junction). + - `--coverage-tolerance 1000` describes the permitted coverage deviation between two ERs separated by a spliced region. It defaults to 1000, effectively off, so stitching is driven by splice junctions rather than coverage similarity. A visualization of the different parameters is provided below. @@ -103,13 +102,14 @@ Usage: --dir ... \ [--chr ...] \ [--min-coverage ] \ + [--min-length ] \ [--position-tolerance ] \ [--coverage-tolerance ] \ - [--help] + [--cores ] Required inputs: - --dir ... Relative path from the build directory to the directory containing the input files. + --dir ... Relative path from the build directory, or an absolute path, to the directory containing the input files. Example: --dir ../../data/test_exon_skipping Optional inputs: @@ -118,24 +118,26 @@ Optional inputs: Default: all (chr1-chr22, chrX) Example: --chr chr1 chr2 chr3 - --min-length Minimum length [#bp] required for a region to qualify as an expressed region (ER). - Default: 5 bp - Example: --min-length 5 + --min-length Minimum length [#bp] required for a region to qualify as an expressed region (ER). + Default: 10 bp + Example: --min-length 10 --min-coverage Minimum coverage [CPM] required for a region to qualify as an ER. Normalized in-place by library size. - Default: 0.25 CPM + Default: 0.05 CPM Example: --min-coverage 0.25 --position-tolerance Maximum allowed positional deviation between splice junction and ER coordinates [bp]. Default: 5 bp Example: --position-tolerance 5 - --coverage-tolerance Allowed relative deviation in coverage between stitched ERs (e.g. 0.1 = 10%). - Default: 0.1 - Example: --coverage-tolerance 0.1 + --coverage-tolerance Permitted coverage deviation between stitched ERs, as a proportion of the running average. + Default: 1000 (gate effectively off; stitching is junction-driven). + Example: --coverage-tolerance 2.0 - --help Show this help message. + --cores Number of cores fastder may use. + Default: 10 + Example: --cores 23 Example: @@ -144,9 +146,9 @@ Example: --dir ../../data/input \ --chr chr1 chr2 \ --position-tolerance 5 \ - --min-length 5 \ - --min-coverage 0.25 \ - --coverage-tolerance 0.1 + --min-length 10 \ + --min-coverage 0.05 \ + --cores 10 ``` diff --git a/cpp/GTFRow.h b/cpp/GTFRow.h index 714378c..aa69813 100644 --- a/cpp/GTFRow.h +++ b/cpp/GTFRow.h @@ -28,7 +28,11 @@ class GTFRow // overload output operator for GTFRow friend std::ostream& operator<< (std::ostream& os, const GTFRow& row) { - return os << row.seqname << "\t" << row.source << "\t" << row.feature << "\t" << row.start << "\t" << row.end << "\t" << + // Internal coordinates are 0-based half-open (BedGraph / BigWig + // convention). GTF is 1-based inclusive, so the start is shifted by + // one. The end needs no shift: a 0-based half-open end equals the + // 1-based inclusive end of the same interval. + return os << row.seqname << "\t" << row.source << "\t" << row.feature << "\t" << (row.start + 1) << "\t" << row.end << "\t" << row.score << "\t" << row.strand << "\t" << row.frame << "\t" << row.attribute; } diff --git a/cpp/Integrator.cpp b/cpp/Integrator.cpp index f6a1853..0550e8a 100644 --- a/cpp/Integrator.cpp +++ b/cpp/Integrator.cpp @@ -123,11 +123,23 @@ void Integrator::stitch_one_strand(const std::string& chrom, } const auto sj_to_check = (sj_it == strand_sjs.end()) ? std::prev(strand_sjs.end()) : sj_it; + // TODO reject geometrically inconsistent stitches here, rather than + // only repairing them in write_to_gtf. When position_tolerance is + // larger than a short ER, the junctions matched to its two edges can + // snap past each other and imply a negative-length exon. write_to_gtf + // currently falls back to that ER's coverage extent; a cleaner fix is + // to refuse the stitch when position_tolerance exceeds the ER length + // or when the flanking junctions would produce a non-positive exon. if (is_similar(candidate, expressed_region, rr_all_sj[*sj_to_check - 1])) { + const SJRow& used_sj = rr_all_sj[*sj_to_check - 1]; uint64_t sj_length = expressed_region.start - ers[candidate.er_ids.back()].end; - candidate.append(-1, sj_length, 0.0); - candidate.append(i, expressed_region.length, expressed_region.coverage); + // record the splice junction's [donor, acceptor] for the spliced + // region, and the ER's coverage extent for the appended exon, so + // write_to_gtf can snap exon edges to the splice sites. + candidate.append(-1, sj_length, 0.0, used_sj.start, used_sj.end); + candidate.append(i, expressed_region.length, expressed_region.coverage, + expressed_region.start, expressed_region.end); int er_count = 0; for (int id : candidate.er_ids) if (id >= 0) ++er_count; if (er_count > max_chain_len) max_chain_len = er_count; @@ -240,28 +252,57 @@ void Integrator::write_to_gtf(const std::string& output_path) for (unsigned int i = 0; i < this->stitched_ERs.size(); ++i) { - // each stitched_er is both a gene and a transcript - GTFRow gtf_row = GTFRow(stitched_ERs[i], "gene", i + 1); + const StitchedER& ser = stitched_ERs[i]; + + // Resolve each exon's [start, end]. The default is the ER's + // coverage-derived extent; an edge that abuts a splice junction is + // snapped to the junction coordinate (the donor for an exon end, the + // acceptor for the next exon start), since the junction marks the + // exact splice site. Edges with no adjacent junction, namely the + // outer ends of a chain and both ends of a monoexonic ER, keep the + // coverage extent. + std::vector> exons; + std::vector exon_scores; + for (unsigned int k = 0; k < ser.er_ids.size(); ++k) + { + if (ser.er_ids.at(k) == -1) continue; // spliced region, not an exon + const uint64_t cov_start = ser.er_bounds.at(k).first; + const uint64_t cov_end = ser.er_bounds.at(k).second; + uint64_t ex_start = cov_start; + uint64_t ex_end = cov_end; + if (k > 0 && ser.er_ids.at(k - 1) == -1) + ex_start = ser.er_bounds.at(k - 1).second; // splice acceptor + if (k + 1 < ser.er_ids.size() && ser.er_ids.at(k + 1) == -1) + ex_end = ser.er_bounds.at(k + 1).first; // splice donor + // Keep the snapped edges only if they still form a valid exon. + // With a large position_tolerance two junctions flanking a short + // expressed region can snap past each other (start >= end); fall + // back to the coverage extent, which is always a valid interval. + if (ex_start >= ex_end) + { + ex_start = cov_start; + ex_end = cov_end; + } + exons.emplace_back(ex_start, ex_end); + exon_scores.push_back(ser.all_coverages.at(k).second); + } + if (exons.empty()) continue; + + // each stitched_er is both a gene and a transcript; their span runs + // from the first exon start to the last exon end + GTFRow gtf_row = GTFRow(ser, "gene", i + 1); + gtf_row.start = exons.front().first; + gtf_row.end = exons.back().second; out << gtf_row << std::endl; gtf_row.change_feature("transcript", i + 1, 0); out << gtf_row << std::endl; - int exon_nr = 1; - // add the ERs within the stitched_er - for (unsigned int k = 0; k < stitched_ERs[i].er_ids.size(); ++k) + for (unsigned int e = 0; e < exons.size(); ++e) { - if (stitched_ERs[i].er_ids.at(k) != -1){ - gtf_row.change_feature("exon", i + 1, exon_nr); - // need to include SJ length as well - gtf_row.end = gtf_row.start + stitched_ERs.at(i).all_coverages.at(k).first; // start + length = end - gtf_row.score = stitched_ERs.at(i).all_coverages.at(k).second; // use the per-exon average coverage here instead of the overall coverage - out << gtf_row << std::endl; - gtf_row.start = gtf_row.end; - ++exon_nr; - } - else - { - gtf_row.start += stitched_ERs.at(i).all_coverages.at(k).first; // add length of the SJ - } + gtf_row.change_feature("exon", i + 1, e + 1); + gtf_row.start = exons.at(e).first; + gtf_row.end = exons.at(e).second; + gtf_row.score = exon_scores.at(e); + out << gtf_row << std::endl; } } out.close(); diff --git a/cpp/SJRow.h b/cpp/SJRow.h index c8d17ea..b2ccbae 100644 --- a/cpp/SJRow.h +++ b/cpp/SJRow.h @@ -40,6 +40,20 @@ class SJRow return is; } row.strand = (strand_ == '+'); // 1 if + + // The RR file gives 1-based inclusive intron coordinates (STAR + // SJ.out.tab convention). fastder works in 0-based half-open + // coordinates, like the BedGraph / BigWig coverage, so shift the + // start by one; the end already equals the 0-based half-open end. + // This keeps junction-to-ER comparisons and the exon-boundary + // snapping consistent with the coverage coordinates. + // A valid 1-based start is at least 1; a 0 here means malformed or + // truncated input. Reject the row instead of underflowing uint64_t. + if (row.start == 0) + { + is.setstate(std::ios::failbit); + return is; + } + row.start -= 1; return is; } diff --git a/cpp/StitchedER.cpp b/cpp/StitchedER.cpp index 869bb45..b4d1caa 100644 --- a/cpp/StitchedER.cpp +++ b/cpp/StitchedER.cpp @@ -14,6 +14,7 @@ StitchedER::StitchedER(const BedGraphRow& expressed_region, int er_id) er_ids = {er_id}; across_er_coverage = expressed_region.coverage; all_coverages = {std::make_pair(expressed_region.length, expressed_region.coverage)}; + er_bounds = {std::make_pair(expressed_region.start, expressed_region.end)}; total_length = expressed_region.length; } @@ -39,11 +40,14 @@ double StitchedER::get_avg_coverage() const return sum / full_length; } -// note that er_id is 0-indexed -void StitchedER::append(int er_id, unsigned int length, double coverage) +// note that er_id is 0-indexed. lo/hi is the ER's coverage extent, or the +// splice junction's [donor, acceptor] when er_id is -1 (a spliced region). +void StitchedER::append(int er_id, unsigned int length, double coverage, + uint64_t lo, uint64_t hi) { er_ids.emplace_back(er_id); // -1 for spliced regions all_coverages.push_back({std::make_pair(length, coverage)}); + er_bounds.push_back(std::make_pair(lo, hi)); // only update avg coverage if an exon was added if (er_id != -1) { diff --git a/cpp/StitchedER.h b/cpp/StitchedER.h index 9ee8147..aa0ff77 100644 --- a/cpp/StitchedER.h +++ b/cpp/StitchedER.h @@ -16,7 +16,8 @@ class StitchedER StitchedER() = default; StitchedER(const BedGraphRow& expressed_region, int er_id); - void append(int er_id, unsigned int length, double coverage); + void append(int er_id, unsigned int length, double coverage, + uint64_t lo, uint64_t hi); double get_avg_coverage() const; //bool is_similar(double val1, double val2); @@ -28,6 +29,12 @@ class StitchedER // example: stitched_ER consists of er_ids 45, 46, 47, 49 == vector indices of expressed_regions double across_er_coverage; // avg (weighted) coverage of all exons that are part of the stitched ER so far std::vector> all_coverages; // stores a pair of er length (= weight) + normalized average coverage of the er + // Genomic [start, end) of each entry in er_ids, aligned by index. For a + // real ER it is the coverage-derived extent; for a spliced region (-1) it + // is the splice junction's [donor, acceptor]. write_to_gtf snaps an exon + // edge to the adjacent junction coordinate and falls back to the coverage + // extent where an edge has no junction. + std::vector> er_bounds; unsigned int total_length; // combined length of all ERs (excluding spliced regions) uint64_t start; uint64_t end; diff --git a/cpp/main.cpp b/cpp/main.cpp index 50c6071..ede0787 100644 --- a/cpp/main.cpp +++ b/cpp/main.cpp @@ -18,7 +18,12 @@ int main(int argc, char* argv[]) { // default values (if not provided by user) int position_tolerance = 5; int min_length = 10; - double coverage_tolerance = 0.8; + // coverage_tolerance gates stitching on how similar two regions' coverage + // is. A splice junction is the real evidence that two exons belong to one + // transcript, and the coverage of exons within a transcript varies widely, + // so this gate defaults effectively off (any fold-difference passes) and + // is left as a knob only for deliberate experiments. + double coverage_tolerance = 1000.0; double min_coverage = 0.05; std::string directory; int cores = 10; @@ -43,16 +48,21 @@ int main(int argc, char* argv[]) { " Normalization is done in-place by library size. \n" " Default = 0.05 CPM.\n" << " Example: --min-coverage 0.25\n\n" + << " --min-length Minimum length, in [nt], for a region to qualify as an expressed region (ER). Default = 10 nt.\n" + << " Example: --min-length 10\n\n" << " --position-tolerance Maximum permitted position deviation of splice junction and ER coordinates, in [nt]. Default = 5 nt.\n" << " Example: --position-tolerance 5\n\n" - << " --coverage-tolerance Permitted coverage deviation within a stitched ER, as a proportion (e.g. 0.1 = 10 %).\n" - << " The value is not strictly bound in (0,1). Default = 0.7.\n" - << " Example: --coverage-tolerance 0.8\n\n" + << " --coverage-tolerance Permitted coverage deviation when stitching, as a proportion: a\n" + << " neighbour passes if its coverage is within (1 +/- tolerance)\n" + << " of the running average. Not bound to (0,1); 2.0 allows a 3x\n" + << " spread. Default = 1000 (the gate is effectively off, stitching\n" + << " is driven by splice junctions).\n" + << " Example: --coverage-tolerance 2.0\n\n" << " --cores Number of cores that fastder may use. Default = 10 cores.\n" << " Example: --cores 23\n\n" << "Example:\n" << " ./fastder --dir ../data --chr chr1 chr2 --position-tolerance 5 " - "--min-coverage 0.05 --coverage-tolerance 0.7 --cores 23\n" + "--min-coverage 0.05 --coverage-tolerance 2.0 --cores 23\n" << std::endl; // parse command-line arguments diff --git a/tests/UnitTests.cpp b/tests/UnitTests.cpp index 68d971b..6129a58 100644 --- a/tests/UnitTests.cpp +++ b/tests/UnitTests.cpp @@ -400,7 +400,8 @@ TEST(SJRow, ParsesAndDiscardsUnusedRRColumns) EXPECT_TRUE(iss.good() || iss.eof()); EXPECT_EQ(row.chrom, "chr1"); - EXPECT_EQ(row.start, 143465342u); + // The RR start is 1-based; the parser shifts it to 0-based half-open. + EXPECT_EQ(row.start, 143465341u); EXPECT_EQ(row.end, 143470614u); EXPECT_EQ(row.length, 5273u); EXPECT_FALSE(row.strand); // '-' -> false @@ -422,7 +423,8 @@ TEST(SJRow, ParsesPlaceholderAnnotationColumns) SJRow row; iss >> row; EXPECT_EQ(row.chrom, "chr21"); - EXPECT_EQ(row.start, 100u); + // 1-based RR start shifted to 0-based half-open by the parser. + EXPECT_EQ(row.start, 99u); EXPECT_EQ(row.end, 200u); EXPECT_TRUE(row.strand); // '+' EXPECT_FALSE(row.annotated); @@ -471,8 +473,9 @@ TEST(Parser, ChrFilterRetainsOnlyRequestedRows) EXPECT_EQ(parser.sj_id_remap[2], 2u); ASSERT_EQ(parser.rr_all_sj.size(), 2u); EXPECT_EQ(parser.rr_all_sj[0].chrom, "chr1"); - EXPECT_EQ(parser.rr_all_sj[0].start, 100u); - EXPECT_EQ(parser.rr_all_sj[1].start, 500u); + // RR starts are 1-based; the parser shifts them to 0-based half-open. + EXPECT_EQ(parser.rr_all_sj[0].start, 99u); + EXPECT_EQ(parser.rr_all_sj[1].start, 499u); fs::remove_all(tmp); } @@ -962,6 +965,124 @@ TEST(GTFRow, SerializesMinusStrandFromStitchedER) } +// operator<< converts the 0-based half-open internal coordinates to GTF's +// 1-based inclusive convention: the start column is start + 1, the end +// column equals end unchanged. +TEST(GTFRow, SerializesOneBasedInclusiveCoordinates) +{ + BedGraphRow er("chr1", 10000, 10500, 100.0); + StitchedER s(er, 0); + GTFRow row(s, "exon", 1); + row.start = 10000; + row.end = 10500; + + std::ostringstream os; + os << row; + std::istringstream is(os.str()); + std::vector cols; + std::string field; + while (std::getline(is, field, '\t')) cols.push_back(field); + ASSERT_GE(cols.size(), 5u); + EXPECT_EQ(cols[3], "10001"); // start + 1 + EXPECT_EQ(cols[4], "10500"); // end unchanged +} + + +// Helper: read a GTF written by write_to_gtf and return the exon rows as +// (start, end) pairs in file order. Columns 4 and 5 are 1-based inclusive. +static std::vector> read_gtf_exons(const std::string& path) +{ + std::vector> exons; + std::ifstream in(path); + std::string line; + while (std::getline(in, line)) + { + if (line.empty() || line[0] == '#') continue; + std::istringstream is(line); + std::vector cols; + std::string field; + while (std::getline(is, field, '\t')) cols.push_back(field); + if (cols.size() < 5 || cols[2] != "exon") continue; + exons.emplace_back(std::stoull(cols[3]), std::stoull(cols[4])); + } + return exons; +} + + +// write_to_gtf snaps exon edges to the flanking splice junctions. In a +// three-ER chain the internal edges must equal the junction donor / acceptor +// coordinates rather than the coverage extent; the outer edges keep coverage. +TEST(IntegratorWriteGtf, ExonEdgesSnapToSpliceJunctions) +{ + // SJ coordinates are 0-based half-open here, as fastder holds them after + // parsing; the SJRow constructor stores them verbatim. + std::vector rr_all_sj = { + SJRow("chr1", 10500, 11000, 500, '-', false), + SJRow("chr1", 13000, 14000, 1000, '-', false) + }; + std::unordered_map> expressed_regions; + expressed_regions["chr1"] = { + BedGraphRow("chr1", 10000, 10498, 100), + BedGraphRow("chr1", 11002, 12999, 101), + BedGraphRow("chr1", 14003, 14540, 30) + }; + std::unordered_map> mm_chrom_sj; + mm_chrom_sj["chr1"] = {1, 2}; + // coverage_tolerance off so the chain forms regardless of exon coverage. + Integrator integrator = Integrator(1000.0, 5); + integrator.stitch_up(expressed_regions, mm_chrom_sj, rr_all_sj); + const std::string path = "../../tests/gtfs/write_gtf_snap_test.gtf"; + integrator.write_to_gtf(path); + + auto exons = read_gtf_exons(path); + ASSERT_EQ(exons.size(), 3u); + // Internal edges snap to the junctions, serialized 1-based inclusive. + EXPECT_EQ(exons[0].second, 10500u); // first exon end -> donor 10500 + EXPECT_EQ(exons[1].first, 11001u); // second exon start -> acceptor 11000 + EXPECT_EQ(exons[1].second, 13000u); // second exon end -> donor 13000 + EXPECT_EQ(exons[2].first, 14001u); // third exon start -> acceptor 14000 +} + + +// When the junctions flanking a short middle exon snap past each other, so +// the snapped start lands at or beyond the snapped end, write_to_gtf must +// fall back to that ER's coverage extent so the emitted exon stays valid. +TEST(IntegratorWriteGtf, FallsBackToCoverageExtentWhenJunctionsSnapPast) +{ + // SJ coordinates are 0-based half-open. The right junction's donor (11004) + // lies before the left junction's acceptor (11008), so snapping the middle + // exon to them would invert it; the coverage extent is used instead. + std::vector rr_all_sj = { + SJRow("chr1", 10500, 11008, 508, '-', false), + SJRow("chr1", 11004, 12000, 996, '-', false) + }; + std::unordered_map> expressed_regions; + expressed_regions["chr1"] = { + BedGraphRow("chr1", 10000, 10500, 100), + BedGraphRow("chr1", 11000, 11012, 100), + BedGraphRow("chr1", 12000, 12500, 100) + }; + std::unordered_map> mm_chrom_sj; + mm_chrom_sj["chr1"] = {1, 2}; + Integrator integrator = Integrator(1000.0, 50); + integrator.stitch_up(expressed_regions, mm_chrom_sj, rr_all_sj); + const std::string path = "../../tests/gtfs/write_gtf_fallback_test.gtf"; + integrator.write_to_gtf(path); + + auto exons = read_gtf_exons(path); + ASSERT_EQ(exons.size(), 3u); + // Every emitted exon is a valid interval. + for (const auto& ex : exons) + { + EXPECT_LE(ex.first, ex.second); + } + // The middle exon falls back to its coverage extent 11000..11012, + // serialized 1-based inclusive as 11001..11012. + EXPECT_EQ(exons[1].first, 11001u); + EXPECT_EQ(exons[1].second, 11012u); +} + + // ===================================================================== // libBigWig integration (gated). These run only when fastder is built with // -DFASTDER_USE_LIBBIGWIG=ON; otherwise they GTEST_SKIP so the unbuilt code diff --git a/tests/gtfs/parser_test3.gtf b/tests/gtfs/parser_test3.gtf index 5cdb4ea..1c4ee1b 100644 --- a/tests/gtfs/parser_test3.gtf +++ b/tests/gtfs/parser_test3.gtf @@ -2,18 +2,18 @@ #provider: FASTDER #contact: martina.lavanya@gmail.com #format: gtf -#date: 2026-4-28 -chr1 fastder gene 10000 10500 225.982 . . gene_id "gene1"; gene_name "faster_gene1"; -chr1 fastder transcript 10000 10500 225.982 . . gene_id "gene1"; transcript_id "tx1"; -chr1 fastder exon 10000 10500 225.982 . . gene_id "gene1"; transcript_id "tx1"; exon_number "1"; -chr1 fastder gene 11000 12500 216.566 . . gene_id "gene2"; gene_name "faster_gene2"; -chr1 fastder transcript 11000 12500 216.566 . . gene_id "gene2"; transcript_id "tx2"; -chr1 fastder exon 11000 12500 216.566 . . gene_id "gene2"; transcript_id "tx2"; exon_number "1"; -chr1 fastder gene 12861 14540 183.631 . . gene_id "gene3"; gene_name "faster_gene3"; -chr1 fastder transcript 12861 14540 183.631 . . gene_id "gene3"; transcript_id "tx3"; -chr1 fastder exon 12861 12999 172.148 . . gene_id "gene3"; transcript_id "tx3"; exon_number "1"; -chr1 fastder exon 14001 14540 186.572 . . gene_id "gene3"; transcript_id "tx3"; exon_number "2"; -chr2 fastder gene 10000 12500 218.92 . . gene_id "gene4"; gene_name "faster_gene4"; -chr2 fastder transcript 10000 12500 218.92 . . gene_id "gene4"; transcript_id "tx4"; -chr2 fastder exon 10000 10500 225.982 . . gene_id "gene4"; transcript_id "tx4"; exon_number "1"; -chr2 fastder exon 11000 12500 216.566 . . gene_id "gene4"; transcript_id "tx4"; exon_number "2"; +#date: 2026-5-18 +chr1 fastder gene 10001 10500 225.982 . . gene_id "gene1"; gene_name "faster_gene1"; +chr1 fastder transcript 10001 10500 225.982 . . gene_id "gene1"; transcript_id "tx1"; +chr1 fastder exon 10001 10500 225.982 . . gene_id "gene1"; transcript_id "tx1"; exon_number "1"; +chr1 fastder gene 11001 12500 216.566 . . gene_id "gene2"; gene_name "faster_gene2"; +chr1 fastder transcript 11001 12500 216.566 . . gene_id "gene2"; transcript_id "tx2"; +chr1 fastder exon 11001 12500 216.566 . . gene_id "gene2"; transcript_id "tx2"; exon_number "1"; +chr1 fastder gene 12862 14540 183.631 - . gene_id "gene3"; gene_name "faster_gene3"; +chr1 fastder transcript 12862 14540 183.631 - . gene_id "gene3"; transcript_id "tx3"; +chr1 fastder exon 12862 12999 172.148 - . gene_id "gene3"; transcript_id "tx3"; exon_number "1"; +chr1 fastder exon 14001 14540 186.572 - . gene_id "gene3"; transcript_id "tx3"; exon_number "2"; +chr2 fastder gene 10001 12500 218.92 - . gene_id "gene4"; gene_name "faster_gene4"; +chr2 fastder transcript 10001 12500 218.92 - . gene_id "gene4"; transcript_id "tx4"; +chr2 fastder exon 10001 10499 225.982 - . gene_id "gene4"; transcript_id "tx4"; exon_number "1"; +chr2 fastder exon 11001 12500 216.566 - . gene_id "gene4"; transcript_id "tx4"; exon_number "2"; diff --git a/tests/gtfs/splicing_scenarios_test1.gtf b/tests/gtfs/splicing_scenarios_test1.gtf index 235eeef..04034ac 100644 --- a/tests/gtfs/splicing_scenarios_test1.gtf +++ b/tests/gtfs/splicing_scenarios_test1.gtf @@ -2,12 +2,12 @@ #provider: FASTDER #contact: martina.lavanya@gmail.com #format: gtf -#date: 2026-4-28 -chr1 fastder gene 10000 12500 100.75 . . gene_id "gene1"; gene_name "faster_gene1"; -chr1 fastder transcript 10000 12500 100.75 . . gene_id "gene1"; transcript_id "tx1"; -chr1 fastder exon 10000 10500 100 . . gene_id "gene1"; transcript_id "tx1"; exon_number "1"; -chr1 fastder exon 11000 12500 101 . . gene_id "gene1"; transcript_id "tx1"; exon_number "2"; -chr1 fastder gene 12861 14540 29.7962 . . gene_id "gene2"; gene_name "faster_gene2"; -chr1 fastder transcript 12861 14540 29.7962 . . gene_id "gene2"; transcript_id "tx2"; -chr1 fastder exon 12861 12999 29 . . gene_id "gene2"; transcript_id "tx2"; exon_number "1"; -chr1 fastder exon 14001 14540 30 . . gene_id "gene2"; transcript_id "tx2"; exon_number "2"; +#date: 2026-5-18 +chr1 fastder gene 10001 12500 100.75 - . gene_id "gene1"; gene_name "faster_gene1"; +chr1 fastder transcript 10001 12500 100.75 - . gene_id "gene1"; transcript_id "tx1"; +chr1 fastder exon 10001 10500 100 - . gene_id "gene1"; transcript_id "tx1"; exon_number "1"; +chr1 fastder exon 11001 12500 101 - . gene_id "gene1"; transcript_id "tx1"; exon_number "2"; +chr1 fastder gene 12862 14540 29.7962 - . gene_id "gene2"; gene_name "faster_gene2"; +chr1 fastder transcript 12862 14540 29.7962 - . gene_id "gene2"; transcript_id "tx2"; +chr1 fastder exon 12862 13000 29 - . gene_id "gene2"; transcript_id "tx2"; exon_number "1"; +chr1 fastder exon 14001 14540 30 - . gene_id "gene2"; transcript_id "tx2"; exon_number "2"; diff --git a/tests/gtfs/splicing_scenarios_test2.gtf b/tests/gtfs/splicing_scenarios_test2.gtf index 5ffeb25..c270bb0 100644 --- a/tests/gtfs/splicing_scenarios_test2.gtf +++ b/tests/gtfs/splicing_scenarios_test2.gtf @@ -2,13 +2,13 @@ #provider: FASTDER #contact: martina.lavanya@gmail.com #format: gtf -#date: 2026-4-28 -chr1 fastder gene 10000 12500 100.75 . . gene_id "gene1"; gene_name "faster_gene1"; -chr1 fastder transcript 10000 12500 100.75 . . gene_id "gene1"; transcript_id "tx1"; -chr1 fastder exon 10000 10500 100 . . gene_id "gene1"; transcript_id "tx1"; exon_number "1"; -chr1 fastder exon 11000 12500 101 . . gene_id "gene1"; transcript_id "tx1"; exon_number "2"; -chr1 fastder gene 12861 15300 29.784 . . gene_id "gene2"; gene_name "faster_gene2"; -chr1 fastder transcript 12861 15300 29.784 . . gene_id "gene2"; transcript_id "tx2"; -chr1 fastder exon 12861 12999 29 . . gene_id "gene2"; transcript_id "tx2"; exon_number "1"; -chr1 fastder exon 14001 14201 30 . . gene_id "gene2"; transcript_id "tx2"; exon_number "2"; -chr1 fastder exon 14999 15300 30 . . gene_id "gene2"; transcript_id "tx2"; exon_number "3"; +#date: 2026-5-18 +chr1 fastder gene 10001 12500 100.75 - . gene_id "gene1"; gene_name "faster_gene1"; +chr1 fastder transcript 10001 12500 100.75 - . gene_id "gene1"; transcript_id "tx1"; +chr1 fastder exon 10001 10500 100 - . gene_id "gene1"; transcript_id "tx1"; exon_number "1"; +chr1 fastder exon 11001 12500 101 - . gene_id "gene1"; transcript_id "tx1"; exon_number "2"; +chr1 fastder gene 12862 15300 29.784 - . gene_id "gene2"; gene_name "faster_gene2"; +chr1 fastder transcript 12862 15300 29.784 - . gene_id "gene2"; transcript_id "tx2"; +chr1 fastder exon 12862 13000 29 - . gene_id "gene2"; transcript_id "tx2"; exon_number "1"; +chr1 fastder exon 14001 14200 30 - . gene_id "gene2"; transcript_id "tx2"; exon_number "2"; +chr1 fastder exon 15001 15300 30 - . gene_id "gene2"; transcript_id "tx2"; exon_number "3"; diff --git a/tests/gtfs/splicing_scenarios_test3.gtf b/tests/gtfs/splicing_scenarios_test3.gtf index a9e68b4..5177cae 100644 --- a/tests/gtfs/splicing_scenarios_test3.gtf +++ b/tests/gtfs/splicing_scenarios_test3.gtf @@ -2,16 +2,16 @@ #provider: FASTDER #contact: martina.lavanya@gmail.com #format: gtf -#date: 2026-4-28 -chr1 fastder gene 10000 12500 100.75 . . gene_id "gene1"; gene_name "faster_gene1"; -chr1 fastder transcript 10000 12500 100.75 . . gene_id "gene1"; transcript_id "tx1"; -chr1 fastder exon 10000 10500 100 . . gene_id "gene1"; transcript_id "tx1"; exon_number "1"; -chr1 fastder exon 11000 12500 101 . . gene_id "gene1"; transcript_id "tx1"; exon_number "2"; -chr1 fastder gene 12861 15300 29.784 . . gene_id "gene2"; gene_name "faster_gene2"; -chr1 fastder transcript 12861 15300 29.784 . . gene_id "gene2"; transcript_id "tx2"; -chr1 fastder exon 12861 12999 29 . . gene_id "gene2"; transcript_id "tx2"; exon_number "1"; -chr1 fastder exon 14001 14201 30 . . gene_id "gene2"; transcript_id "tx2"; exon_number "2"; -chr1 fastder exon 14999 15300 30 . . gene_id "gene2"; transcript_id "tx2"; exon_number "3"; -chr1 fastder gene 15300 15600 37 . . gene_id "gene3"; gene_name "faster_gene3"; -chr1 fastder transcript 15300 15600 37 . . gene_id "gene3"; transcript_id "tx3"; -chr1 fastder exon 15300 15600 37 . . gene_id "gene3"; transcript_id "tx3"; exon_number "1"; +#date: 2026-5-18 +chr1 fastder gene 10001 12500 100.75 - . gene_id "gene1"; gene_name "faster_gene1"; +chr1 fastder transcript 10001 12500 100.75 - . gene_id "gene1"; transcript_id "tx1"; +chr1 fastder exon 10001 10500 100 - . gene_id "gene1"; transcript_id "tx1"; exon_number "1"; +chr1 fastder exon 11001 12500 101 - . gene_id "gene1"; transcript_id "tx1"; exon_number "2"; +chr1 fastder gene 12862 15300 29.784 - . gene_id "gene2"; gene_name "faster_gene2"; +chr1 fastder transcript 12862 15300 29.784 - . gene_id "gene2"; transcript_id "tx2"; +chr1 fastder exon 12862 13000 29 - . gene_id "gene2"; transcript_id "tx2"; exon_number "1"; +chr1 fastder exon 14001 14200 30 - . gene_id "gene2"; transcript_id "tx2"; exon_number "2"; +chr1 fastder exon 15001 15300 30 - . gene_id "gene2"; transcript_id "tx2"; exon_number "3"; +chr1 fastder gene 15301 15600 37 . . gene_id "gene3"; gene_name "faster_gene3"; +chr1 fastder transcript 15301 15600 37 . . gene_id "gene3"; transcript_id "tx3"; +chr1 fastder exon 15301 15600 37 . . gene_id "gene3"; transcript_id "tx3"; exon_number "1"; diff --git a/tests/gtfs/splicing_scenarios_test4.gtf b/tests/gtfs/splicing_scenarios_test4.gtf index 174d888..40845c5 100644 --- a/tests/gtfs/splicing_scenarios_test4.gtf +++ b/tests/gtfs/splicing_scenarios_test4.gtf @@ -2,19 +2,19 @@ #provider: FASTDER #contact: martina.lavanya@gmail.com #format: gtf -#date: 2026-4-28 -chr1 fastder gene 10000 12500 100.75 . . gene_id "gene1"; gene_name "faster_gene1"; -chr1 fastder transcript 10000 12500 100.75 . . gene_id "gene1"; transcript_id "tx1"; -chr1 fastder exon 10000 10500 100 . . gene_id "gene1"; transcript_id "tx1"; exon_number "1"; -chr1 fastder exon 11000 12500 101 . . gene_id "gene1"; transcript_id "tx1"; exon_number "2"; -chr1 fastder gene 12861 15300 29.784 . . gene_id "gene2"; gene_name "faster_gene2"; -chr1 fastder transcript 12861 15300 29.784 . . gene_id "gene2"; transcript_id "tx2"; -chr1 fastder exon 12861 12999 29 . . gene_id "gene2"; transcript_id "tx2"; exon_number "1"; -chr1 fastder exon 14001 14201 30 . . gene_id "gene2"; transcript_id "tx2"; exon_number "2"; -chr1 fastder exon 14999 15300 30 . . gene_id "gene2"; transcript_id "tx2"; exon_number "3"; -chr1 fastder gene 15300 15600 37 . . gene_id "gene3"; gene_name "faster_gene3"; -chr1 fastder transcript 15300 15600 37 . . gene_id "gene3"; transcript_id "tx3"; -chr1 fastder exon 15300 15600 37 . . gene_id "gene3"; transcript_id "tx3"; exon_number "1"; -chr1 fastder gene 15600 15800 87 . . gene_id "gene4"; gene_name "faster_gene4"; -chr1 fastder transcript 15600 15800 87 . . gene_id "gene4"; transcript_id "tx4"; -chr1 fastder exon 15600 15800 87 . . gene_id "gene4"; transcript_id "tx4"; exon_number "1"; +#date: 2026-5-18 +chr1 fastder gene 10001 12500 100.75 - . gene_id "gene1"; gene_name "faster_gene1"; +chr1 fastder transcript 10001 12500 100.75 - . gene_id "gene1"; transcript_id "tx1"; +chr1 fastder exon 10001 10500 100 - . gene_id "gene1"; transcript_id "tx1"; exon_number "1"; +chr1 fastder exon 11001 12500 101 - . gene_id "gene1"; transcript_id "tx1"; exon_number "2"; +chr1 fastder gene 12862 15300 29.784 - . gene_id "gene2"; gene_name "faster_gene2"; +chr1 fastder transcript 12862 15300 29.784 - . gene_id "gene2"; transcript_id "tx2"; +chr1 fastder exon 12862 13000 29 - . gene_id "gene2"; transcript_id "tx2"; exon_number "1"; +chr1 fastder exon 14001 14200 30 - . gene_id "gene2"; transcript_id "tx2"; exon_number "2"; +chr1 fastder exon 15001 15300 30 - . gene_id "gene2"; transcript_id "tx2"; exon_number "3"; +chr1 fastder gene 15301 15600 37 . . gene_id "gene3"; gene_name "faster_gene3"; +chr1 fastder transcript 15301 15600 37 . . gene_id "gene3"; transcript_id "tx3"; +chr1 fastder exon 15301 15600 37 . . gene_id "gene3"; transcript_id "tx3"; exon_number "1"; +chr1 fastder gene 15601 15800 87 . . gene_id "gene4"; gene_name "faster_gene4"; +chr1 fastder transcript 15601 15800 87 . . gene_id "gene4"; transcript_id "tx4"; +chr1 fastder exon 15601 15800 87 . . gene_id "gene4"; transcript_id "tx4"; exon_number "1"; diff --git a/tests/gtfs/splicing_scenarios_test5.gtf b/tests/gtfs/splicing_scenarios_test5.gtf index b981f8d..07cefdf 100644 --- a/tests/gtfs/splicing_scenarios_test5.gtf +++ b/tests/gtfs/splicing_scenarios_test5.gtf @@ -2,25 +2,25 @@ #provider: FASTDER #contact: martina.lavanya@gmail.com #format: gtf -#date: 2026-4-28 -chr1 fastder gene 10000 10500 100 . . gene_id "gene1"; gene_name "faster_gene1"; -chr1 fastder transcript 10000 10500 100 . . gene_id "gene1"; transcript_id "tx1"; -chr1 fastder exon 10000 10500 100 . . gene_id "gene1"; transcript_id "tx1"; exon_number "1"; -chr1 fastder gene 11000 12500 101 . . gene_id "gene2"; gene_name "faster_gene2"; -chr1 fastder transcript 11000 12500 101 . . gene_id "gene2"; transcript_id "tx2"; -chr1 fastder exon 11000 12500 101 . . gene_id "gene2"; transcript_id "tx2"; exon_number "1"; -chr1 fastder gene 12861 12999 29 . . gene_id "gene3"; gene_name "faster_gene3"; -chr1 fastder transcript 12861 12999 29 . . gene_id "gene3"; transcript_id "tx3"; -chr1 fastder exon 12861 12999 29 . . gene_id "gene3"; transcript_id "tx3"; exon_number "1"; -chr1 fastder gene 14001 14201 30 . . gene_id "gene4"; gene_name "faster_gene4"; -chr1 fastder transcript 14001 14201 30 . . gene_id "gene4"; transcript_id "tx4"; -chr1 fastder exon 14001 14201 30 . . gene_id "gene4"; transcript_id "tx4"; exon_number "1"; -chr1 fastder gene 14999 15300 30 . . gene_id "gene5"; gene_name "faster_gene5"; -chr1 fastder transcript 14999 15300 30 . . gene_id "gene5"; transcript_id "tx5"; -chr1 fastder exon 14999 15300 30 . . gene_id "gene5"; transcript_id "tx5"; exon_number "1"; -chr1 fastder gene 15300 15600 37 . . gene_id "gene6"; gene_name "faster_gene6"; -chr1 fastder transcript 15300 15600 37 . . gene_id "gene6"; transcript_id "tx6"; -chr1 fastder exon 15300 15600 37 . . gene_id "gene6"; transcript_id "tx6"; exon_number "1"; -chr1 fastder gene 15600 15800 87 . . gene_id "gene7"; gene_name "faster_gene7"; -chr1 fastder transcript 15600 15800 87 . . gene_id "gene7"; transcript_id "tx7"; -chr1 fastder exon 15600 15800 87 . . gene_id "gene7"; transcript_id "tx7"; exon_number "1"; +#date: 2026-5-18 +chr1 fastder gene 10001 10500 100 . . gene_id "gene1"; gene_name "faster_gene1"; +chr1 fastder transcript 10001 10500 100 . . gene_id "gene1"; transcript_id "tx1"; +chr1 fastder exon 10001 10500 100 . . gene_id "gene1"; transcript_id "tx1"; exon_number "1"; +chr1 fastder gene 11001 12500 101 . . gene_id "gene2"; gene_name "faster_gene2"; +chr1 fastder transcript 11001 12500 101 . . gene_id "gene2"; transcript_id "tx2"; +chr1 fastder exon 11001 12500 101 . . gene_id "gene2"; transcript_id "tx2"; exon_number "1"; +chr1 fastder gene 12862 12999 29 . . gene_id "gene3"; gene_name "faster_gene3"; +chr1 fastder transcript 12862 12999 29 . . gene_id "gene3"; transcript_id "tx3"; +chr1 fastder exon 12862 12999 29 . . gene_id "gene3"; transcript_id "tx3"; exon_number "1"; +chr1 fastder gene 14002 14201 30 . . gene_id "gene4"; gene_name "faster_gene4"; +chr1 fastder transcript 14002 14201 30 . . gene_id "gene4"; transcript_id "tx4"; +chr1 fastder exon 14002 14201 30 . . gene_id "gene4"; transcript_id "tx4"; exon_number "1"; +chr1 fastder gene 15000 15300 30 . . gene_id "gene5"; gene_name "faster_gene5"; +chr1 fastder transcript 15000 15300 30 . . gene_id "gene5"; transcript_id "tx5"; +chr1 fastder exon 15000 15300 30 . . gene_id "gene5"; transcript_id "tx5"; exon_number "1"; +chr1 fastder gene 15301 15600 37 . . gene_id "gene6"; gene_name "faster_gene6"; +chr1 fastder transcript 15301 15600 37 . . gene_id "gene6"; transcript_id "tx6"; +chr1 fastder exon 15301 15600 37 . . gene_id "gene6"; transcript_id "tx6"; exon_number "1"; +chr1 fastder gene 15601 15800 87 . . gene_id "gene7"; gene_name "faster_gene7"; +chr1 fastder transcript 15601 15800 87 . . gene_id "gene7"; transcript_id "tx7"; +chr1 fastder exon 15601 15800 87 . . gene_id "gene7"; transcript_id "tx7"; exon_number "1"; diff --git a/tests/gtfs/splicing_scenarios_test6.gtf b/tests/gtfs/splicing_scenarios_test6.gtf index 410d536..a19baf5 100644 --- a/tests/gtfs/splicing_scenarios_test6.gtf +++ b/tests/gtfs/splicing_scenarios_test6.gtf @@ -2,21 +2,21 @@ #provider: FASTDER #contact: martina.lavanya@gmail.com #format: gtf -#date: 2026-4-28 -chr1 fastder gene 10000 12500 100.75 . . gene_id "gene1"; gene_name "faster_gene1"; -chr1 fastder transcript 10000 12500 100.75 . . gene_id "gene1"; transcript_id "tx1"; -chr1 fastder exon 10000 10500 100 . . gene_id "gene1"; transcript_id "tx1"; exon_number "1"; -chr1 fastder exon 11000 12500 101 . . gene_id "gene1"; transcript_id "tx1"; exon_number "2"; -chr1 fastder gene 12861 12999 29 . . gene_id "gene2"; gene_name "faster_gene2"; -chr1 fastder transcript 12861 12999 29 . . gene_id "gene2"; transcript_id "tx2"; -chr1 fastder exon 12861 12999 29 . . gene_id "gene2"; transcript_id "tx2"; exon_number "1"; -chr1 fastder gene 14001 15300 30 . . gene_id "gene3"; gene_name "faster_gene3"; -chr1 fastder transcript 14001 15300 30 . . gene_id "gene3"; transcript_id "tx3"; -chr1 fastder exon 14001 14201 30 . . gene_id "gene3"; transcript_id "tx3"; exon_number "1"; -chr1 fastder exon 14999 15300 30 . . gene_id "gene3"; transcript_id "tx3"; exon_number "2"; -chr1 fastder gene 15300 15600 37 . . gene_id "gene4"; gene_name "faster_gene4"; -chr1 fastder transcript 15300 15600 37 . . gene_id "gene4"; transcript_id "tx4"; -chr1 fastder exon 15300 15600 37 . . gene_id "gene4"; transcript_id "tx4"; exon_number "1"; -chr1 fastder gene 15600 15800 87 . . gene_id "gene5"; gene_name "faster_gene5"; -chr1 fastder transcript 15600 15800 87 . . gene_id "gene5"; transcript_id "tx5"; -chr1 fastder exon 15600 15800 87 . . gene_id "gene5"; transcript_id "tx5"; exon_number "1"; +#date: 2026-5-18 +chr1 fastder gene 10001 12500 100.75 - . gene_id "gene1"; gene_name "faster_gene1"; +chr1 fastder transcript 10001 12500 100.75 - . gene_id "gene1"; transcript_id "tx1"; +chr1 fastder exon 10001 10500 100 - . gene_id "gene1"; transcript_id "tx1"; exon_number "1"; +chr1 fastder exon 11001 12500 101 - . gene_id "gene1"; transcript_id "tx1"; exon_number "2"; +chr1 fastder gene 12862 12999 29 . . gene_id "gene2"; gene_name "faster_gene2"; +chr1 fastder transcript 12862 12999 29 . . gene_id "gene2"; transcript_id "tx2"; +chr1 fastder exon 12862 12999 29 . . gene_id "gene2"; transcript_id "tx2"; exon_number "1"; +chr1 fastder gene 14002 15300 30 - . gene_id "gene3"; gene_name "faster_gene3"; +chr1 fastder transcript 14002 15300 30 - . gene_id "gene3"; transcript_id "tx3"; +chr1 fastder exon 14002 14200 30 - . gene_id "gene3"; transcript_id "tx3"; exon_number "1"; +chr1 fastder exon 15001 15300 30 - . gene_id "gene3"; transcript_id "tx3"; exon_number "2"; +chr1 fastder gene 15301 15600 37 . . gene_id "gene4"; gene_name "faster_gene4"; +chr1 fastder transcript 15301 15600 37 . . gene_id "gene4"; transcript_id "tx4"; +chr1 fastder exon 15301 15600 37 . . gene_id "gene4"; transcript_id "tx4"; exon_number "1"; +chr1 fastder gene 15601 15800 87 . . gene_id "gene5"; gene_name "faster_gene5"; +chr1 fastder transcript 15601 15800 87 . . gene_id "gene5"; transcript_id "tx5"; +chr1 fastder exon 15601 15800 87 . . gene_id "gene5"; transcript_id "tx5"; exon_number "1"; diff --git a/tests/gtfs/splicing_scenarios_test7.gtf b/tests/gtfs/splicing_scenarios_test7.gtf index f90eb85..f0e8112 100644 --- a/tests/gtfs/splicing_scenarios_test7.gtf +++ b/tests/gtfs/splicing_scenarios_test7.gtf @@ -2,15 +2,15 @@ #provider: FASTDER #contact: martina.lavanya@gmail.com #format: gtf -#date: 2026-4-28 -chr1 fastder gene 9000 9200 2 . . gene_id "gene1"; gene_name "faster_gene1"; -chr1 fastder transcript 9000 9200 2 . . gene_id "gene1"; transcript_id "tx1"; -chr1 fastder exon 9000 9200 2 . . gene_id "gene1"; transcript_id "tx1"; exon_number "1"; -chr1 fastder gene 10000 12500 100.75 . . gene_id "gene2"; gene_name "faster_gene2"; -chr1 fastder transcript 10000 12500 100.75 . . gene_id "gene2"; transcript_id "tx2"; -chr1 fastder exon 10000 10500 100 . . gene_id "gene2"; transcript_id "tx2"; exon_number "1"; -chr1 fastder exon 11000 12500 101 . . gene_id "gene2"; transcript_id "tx2"; exon_number "2"; -chr1 fastder gene 14001 15300 30 . . gene_id "gene3"; gene_name "faster_gene3"; -chr1 fastder transcript 14001 15300 30 . . gene_id "gene3"; transcript_id "tx3"; -chr1 fastder exon 14001 14201 30 . . gene_id "gene3"; transcript_id "tx3"; exon_number "1"; -chr1 fastder exon 14999 15300 30 . . gene_id "gene3"; transcript_id "tx3"; exon_number "2"; +#date: 2026-5-18 +chr1 fastder gene 9001 9200 2 . . gene_id "gene1"; gene_name "faster_gene1"; +chr1 fastder transcript 9001 9200 2 . . gene_id "gene1"; transcript_id "tx1"; +chr1 fastder exon 9001 9200 2 . . gene_id "gene1"; transcript_id "tx1"; exon_number "1"; +chr1 fastder gene 10001 12500 100.75 - . gene_id "gene2"; gene_name "faster_gene2"; +chr1 fastder transcript 10001 12500 100.75 - . gene_id "gene2"; transcript_id "tx2"; +chr1 fastder exon 10001 10500 100 - . gene_id "gene2"; transcript_id "tx2"; exon_number "1"; +chr1 fastder exon 11001 12500 101 - . gene_id "gene2"; transcript_id "tx2"; exon_number "2"; +chr1 fastder gene 14002 15300 30 - . gene_id "gene3"; gene_name "faster_gene3"; +chr1 fastder transcript 14002 15300 30 - . gene_id "gene3"; transcript_id "tx3"; +chr1 fastder exon 14002 14200 30 - . gene_id "gene3"; transcript_id "tx3"; exon_number "1"; +chr1 fastder exon 15001 15300 30 - . gene_id "gene3"; transcript_id "tx3"; exon_number "2"; diff --git a/tests/gtfs/splicing_scenarios_test8.gtf b/tests/gtfs/splicing_scenarios_test8.gtf index 0850c2e..f781ce8 100644 --- a/tests/gtfs/splicing_scenarios_test8.gtf +++ b/tests/gtfs/splicing_scenarios_test8.gtf @@ -2,16 +2,16 @@ #provider: FASTDER #contact: martina.lavanya@gmail.com #format: gtf -#date: 2026-4-28 -chr1 fastder gene 9000 9200 2 . . gene_id "gene1"; gene_name "faster_gene1"; -chr1 fastder transcript 9000 9200 2 . . gene_id "gene1"; transcript_id "tx1"; -chr1 fastder exon 9000 9200 2 . . gene_id "gene1"; transcript_id "tx1"; exon_number "1"; -chr1 fastder gene 10000 15300 30.9503 . . gene_id "gene2"; gene_name "faster_gene2"; -chr1 fastder transcript 10000 15300 30.9503 . . gene_id "gene2"; transcript_id "tx2"; -chr1 fastder exon 10000 10500 30 . . gene_id "gene2"; transcript_id "tx2"; exon_number "1"; -chr1 fastder exon 11000 14201 31 . . gene_id "gene2"; transcript_id "tx2"; exon_number "2"; -chr1 fastder exon 14999 15300 32 . . gene_id "gene2"; transcript_id "tx2"; exon_number "3"; -chr2 fastder gene 1501 4000 10.1188 . . gene_id "gene3"; gene_name "faster_gene3"; -chr2 fastder transcript 1501 4000 10.1188 . . gene_id "gene3"; transcript_id "tx3"; -chr2 fastder exon 1501 2999 10 . . gene_id "gene3"; transcript_id "tx3"; exon_number "1"; -chr2 fastder exon 3798 4000 11 . . gene_id "gene3"; transcript_id "tx3"; exon_number "2"; +#date: 2026-5-18 +chr1 fastder gene 9001 9200 2 . . gene_id "gene1"; gene_name "faster_gene1"; +chr1 fastder transcript 9001 9200 2 . . gene_id "gene1"; transcript_id "tx1"; +chr1 fastder exon 9001 9200 2 . . gene_id "gene1"; transcript_id "tx1"; exon_number "1"; +chr1 fastder gene 10001 15300 30.9503 - . gene_id "gene2"; gene_name "faster_gene2"; +chr1 fastder transcript 10001 15300 30.9503 - . gene_id "gene2"; transcript_id "tx2"; +chr1 fastder exon 10001 10500 30 - . gene_id "gene2"; transcript_id "tx2"; exon_number "1"; +chr1 fastder exon 11001 14200 31 - . gene_id "gene2"; transcript_id "tx2"; exon_number "2"; +chr1 fastder exon 15001 15300 32 - . gene_id "gene2"; transcript_id "tx2"; exon_number "3"; +chr2 fastder gene 1502 4000 10.1188 - . gene_id "gene3"; gene_name "faster_gene3"; +chr2 fastder transcript 1502 4000 10.1188 - . gene_id "gene3"; transcript_id "tx3"; +chr2 fastder exon 1502 3000 10 - . gene_id "gene3"; transcript_id "tx3"; exon_number "1"; +chr2 fastder exon 3801 4000 11 - . gene_id "gene3"; transcript_id "tx3"; exon_number "2"; diff --git a/tests/gtfs/write_gtf_fallback_test.gtf b/tests/gtfs/write_gtf_fallback_test.gtf new file mode 100644 index 0000000..6b00bc4 --- /dev/null +++ b/tests/gtfs/write_gtf_fallback_test.gtf @@ -0,0 +1,10 @@ +#description: expressed region annotation of the genome based on BedGraph and MM / RR splice junction information. +#provider: FASTDER +#contact: martina.lavanya@gmail.com +#format: gtf +#date: 2026-5-19 +chr1 fastder gene 10001 12500 100 - . gene_id "gene1"; gene_name "faster_gene1"; +chr1 fastder transcript 10001 12500 100 - . gene_id "gene1"; transcript_id "tx1"; +chr1 fastder exon 10001 10500 100 - . gene_id "gene1"; transcript_id "tx1"; exon_number "1"; +chr1 fastder exon 11001 11012 100 - . gene_id "gene1"; transcript_id "tx1"; exon_number "2"; +chr1 fastder exon 12001 12500 100 - . gene_id "gene1"; transcript_id "tx1"; exon_number "3"; diff --git a/tests/gtfs/write_gtf_snap_test.gtf b/tests/gtfs/write_gtf_snap_test.gtf new file mode 100644 index 0000000..ed2a5a4 --- /dev/null +++ b/tests/gtfs/write_gtf_snap_test.gtf @@ -0,0 +1,10 @@ +#description: expressed region annotation of the genome based on BedGraph and MM / RR splice junction information. +#provider: FASTDER +#contact: martina.lavanya@gmail.com +#format: gtf +#date: 2026-5-19 +chr1 fastder gene 10001 14540 88.2609 - . gene_id "gene1"; gene_name "faster_gene1"; +chr1 fastder transcript 10001 14540 88.2609 - . gene_id "gene1"; transcript_id "tx1"; +chr1 fastder exon 10001 10500 100 - . gene_id "gene1"; transcript_id "tx1"; exon_number "1"; +chr1 fastder exon 11001 13000 101 - . gene_id "gene1"; transcript_id "tx1"; exon_number "2"; +chr1 fastder exon 14001 14540 30 - . gene_id "gene1"; transcript_id "tx1"; exon_number "3";