diff --git a/CHANGELOG.md b/CHANGELOG.md index c0a4afa..c156d6e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,26 @@ All notable changes to this project are documented here. This project adheres to [Semantic Versioning](https://semver.org/). -## [2.1.0] - unreleased +## [2.1.1] - unreleased + +### Fixed +- `--out_filtered_bam ` with a single BAM and a bare filename (no directory + prefix, e.g. `--out_filtered_bam filtered.bam`) created a **directory** named after + the output (`filtered.bam/`) and wrote the filtered BAM *inside* it, instead of + writing the file. 2.1.0 corrected output-path generation but missed two inline + sites that run first and create the directory: the auto-built bowtie2 index + directory and the temporary spurious-junction BED directory both treated a bare + output filename as a directory. Creating that directory also flipped the + file/directory heuristic (which checks `fs::is_directory`), defeating the 2.1.0 + change. Now a bare single-file `--out_filtered_bam` writes exactly that file (and + `--removed_alignments_bam` writes `_removed_alignments.bam` next to it), with + no stray directory. For a bare filename the auto-built bowtie2 index is written to + its own `_bt2_idx/` directory next to the reference FASTA (matching the + default when no output path is given). +- The temporary spurious-junction BED now uses a process-unique name, so concurrent + EASTR runs sharing an output directory no longer clobber each other's temp file. + +## [2.1.0] - 2026-06-08 ### Added - `--version` flag that prints the version and exits 0. The version is generated @@ -31,8 +50,10 @@ All notable changes to this project are documented here. This project adheres to `--out_kept_junctions`. It previously behaved as a directory for BAM input and was a silent no-op for GTF/BED. Directory mode is still used for multi-input runs. (Issues #1, #3) -- `--out_filtered_bam` now accepts a **file path** for a single BAM input (writes the - filtered BAM directly), keeping directory mode for BAM lists. (Issue #4) +- `--out_filtered_bam` accepts a **file path** for a single BAM input (writes the + filtered BAM directly), keeping directory mode for BAM lists. (Issue #4) **Note:** + this was only partially fixed in 2.1.0 — a bare filename still created a directory; + fully fixed in 2.1.1. ### Fixed - Bare output filenames without a directory prefix (e.g. `out.bed`, `out.bam`) were diff --git a/CMakeLists.txt b/CMakeLists.txt index 19d2bca..ded8da3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,5 @@ cmake_minimum_required(VERSION 3.16) -project(eastr VERSION 2.1.0 LANGUAGES CXX C) +project(eastr VERSION 2.1.1 LANGUAGES CXX C) set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) diff --git a/README.md b/README.md index 926e23a..8dea63f 100644 --- a/README.md +++ b/README.md @@ -100,6 +100,8 @@ eastr --gtf annotation.gtf \ ``` > **Note:** The `-i` (bowtie2 index) flag is optional. If omitted, EASTR will automatically build an index from the reference FASTA, which adds a few minutes for large genomes. Providing a pre-built index speeds up repeated runs. +> +> The auto-built index is written to its own `_bt2_idx/` directory (it is made up of several `.bt2` files) next to the reference FASTA — or inside the `--out_filtered_bam` directory when that is a directory — and is preserved for reuse. If the reference directory is read-only, pass a pre-built index with `-i`. ### Save removed alignments for inspection diff --git a/conda/meta.yaml b/conda/meta.yaml index 71908f6..80b564b 100644 --- a/conda/meta.yaml +++ b/conda/meta.yaml @@ -1,5 +1,5 @@ {% set name = "eastr-cpp" %} -{% set version = "2.1.0" %} +{% set version = "2.1.1" %} package: name: {{ name|lower }} diff --git a/include/eastr/path_utils.hpp b/include/eastr/path_utils.hpp index 122c62b..1fe3762 100644 --- a/include/eastr/path_utils.hpp +++ b/include/eastr/path_utils.hpp @@ -18,4 +18,24 @@ std::string path_basename(const std::string& path); // Directory component of `path`, or "." if `path` has no separator. std::string path_parent_dir(const std::string& path); +// Directory in which to place build artifacts (e.g. the auto-built bowtie2 +// index) for the output `out_path`. A bare filename returns "" — the sentinel +// meaning "no explicit directory; use the default location". Crucially this +// never treats a single output *file* as a directory. +// "" -> "" +// "filtered.bam" -> "" (bare file: default / next to the reference) +// "d/filt.bam" -> "d" +// "outdir" -> "outdir" (no extension: a directory) +std::string index_output_dir(const std::string& out_path); + +// A real, never-empty directory in which to place a sidecar/temp file next to +// the output `out_path` (e.g. the temporary spurious-junction BED). Like +// index_output_dir but a bare filename resolves to "." (the current directory) +// rather than the empty sentinel, since a temp file needs a concrete location. +// "" -> "." +// "filtered.bam" -> "." +// "d/filt.bam" -> "d" +// "outdir" -> "outdir" +std::string sidecar_dir(const std::string& out_path); + } // namespace eastr diff --git a/src/main.cpp b/src/main.cpp index 650d9dc..9d5b612 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -335,13 +335,14 @@ int main(int argc, char* argv[]) { // Index reference if needed eastr::FastaIndex::ensure_indexed(config.reference_fasta); - // Determine output directory for index + // Determine output directory for the auto-built index. A bare output + // filename resolves to "" so the index is built in the default location + // (next to the reference) rather than in a directory named after the file. std::string output_dir; if (!config.out_filtered_bam.empty()) { - output_dir = fs::path(config.out_filtered_bam).parent_path().string(); - if (output_dir.empty()) output_dir = config.out_filtered_bam; + output_dir = eastr::index_output_dir(config.out_filtered_bam); } else if (!config.out_removed_junctions.empty() && config.out_removed_junctions != "stdout") { - output_dir = fs::path(config.out_removed_junctions).parent_path().string(); + output_dir = eastr::index_output_dir(config.out_removed_junctions); } // Build or locate bowtie2 index @@ -510,11 +511,14 @@ int main(int argc, char* argv[]) { // If output is "stdout", we need a temp file for BAM filtering bool using_temp_bed = (spurious_bed_path == "stdout" || spurious_bed_path.empty()); if (using_temp_bed) { - // Create temp file in the filtered BAM output directory - fs::path temp_dir = fs::path(config.out_filtered_bam).parent_path(); - if (temp_dir.empty()) temp_dir = config.out_filtered_bam; + // Place the temp BED in the filtered BAM's directory. sidecar_dir + // never treats a single output file as a directory (so a bare + // "filtered.bam" resolves to "." rather than creating filtered.bam/). + // The name is pid-unique to avoid collisions between concurrent runs. + fs::path temp_dir = eastr::sidecar_dir(config.out_filtered_bam); fs::create_directories(temp_dir); - spurious_bed_path = (temp_dir / ".eastr_spurious_temp.bed").string(); + std::string temp_name = ".eastr_spurious_temp." + std::to_string(getpid()) + ".bed"; + spurious_bed_path = (temp_dir / temp_name).string(); } else { // Create output directory if needed fs::path dir = fs::path(spurious_bed_path).parent_path(); diff --git a/src/path_utils.cpp b/src/path_utils.cpp index 5ba2ce7..baf65f8 100644 --- a/src/path_utils.cpp +++ b/src/path_utils.cpp @@ -54,4 +54,33 @@ std::string path_parent_dir(const std::string& path) { return path.substr(0, last_slash); } +std::string index_output_dir(const std::string& out_path) { + if (out_path.empty()) { + return std::string(); + } + if (is_file_path(out_path)) { + // Place artifacts beside the output file; "" for a bare filename so the + // caller falls back to its default location (next to the reference). + size_t last_slash = out_path.find_last_of("/\\"); + return (last_slash == std::string::npos) ? std::string() + : out_path.substr(0, last_slash); + } + // A directory output: use it directly. + return out_path; +} + +std::string sidecar_dir(const std::string& out_path) { + if (out_path.empty()) { + return std::string("."); + } + if (is_file_path(out_path)) { + // Parent of the output file, or "." for a bare filename — never the + // file path itself (which would create a directory named after it). + size_t last_slash = out_path.find_last_of("/\\"); + return (last_slash == std::string::npos) ? std::string(".") + : out_path.substr(0, last_slash); + } + return out_path; +} + } // namespace eastr diff --git a/tests/test_path_utils.cpp b/tests/test_path_utils.cpp index bda6bf1..cb66297 100644 --- a/tests/test_path_utils.cpp +++ b/tests/test_path_utils.cpp @@ -39,6 +39,31 @@ TEST_CASE("path_parent_dir returns the directory component", "[path_utils]") { REQUIRE(path_parent_dir("sample.bam") == "."); } +// NOTE: index_output_dir/sidecar_dir call is_file_path, which inspects the +// filesystem (fs::is_directory). These cases use paths that do NOT exist on +// disk; do not create fixtures named "filtered.bam"/"outdir" or results flip. + +TEST_CASE("index_output_dir: bare file -> default (empty), never a directory", + "[path_utils]") { + // Regression (2.1.1): a bare "filtered.bam" must NOT be used as a directory. + // Empty result => build the index in the default location (next to reference). + REQUIRE(index_output_dir("") == ""); + REQUIRE(index_output_dir("filtered.bam") == ""); + REQUIRE(index_output_dir("out/filtered.bam") == "out"); + REQUIRE(index_output_dir("a/b/c/f.bam") == "a/b/c"); + REQUIRE(index_output_dir("outdir") == "outdir"); // no extension => a directory +} + +TEST_CASE("sidecar_dir: bare file -> '.', never a directory", "[path_utils]") { + // Regression (2.1.1): a bare "filtered.bam" must resolve to "." (cwd), not + // create a directory named "filtered.bam/". + REQUIRE(sidecar_dir("") == "."); + REQUIRE(sidecar_dir("filtered.bam") == "."); + REQUIRE(sidecar_dir("out/filtered.bam") == "out"); + REQUIRE(sidecar_dir("a/b/filt.bam") == "a/b"); + REQUIRE(sidecar_dir("outdir") == "outdir"); +} + TEST_CASE("generate_junction_output_paths: single input + file path", "[output_writer]") { // Single input, bare filename -> use the path as-is (issue #3 core). auto paths = OutputWriter::generate_junction_output_paths( @@ -71,6 +96,14 @@ TEST_CASE("generate_bam_output_paths: single input + file path", "[output_writer REQUIRE(paths[0] == "out.bam"); } +TEST_CASE("generate_bam_output_paths: single input + directory", "[output_writer]") { + // Single BAM + a directory path -> one per-sample file inside the directory. + auto paths = OutputWriter::generate_bam_output_paths( + {"/d/sample.bam"}, "outdir", "_EASTR_filtered"); + REQUIRE(paths.size() == 1); + REQUIRE(paths[0] == "outdir/sample_EASTR_filtered.bam"); +} + TEST_CASE("generate_bam_output_paths: multi input uses directory", "[output_writer]") { auto paths = OutputWriter::generate_bam_output_paths( {"a.bam", "b.bam"}, "outdir", "_EASTR_filtered");