diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index f49b35f..61e407e 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -18,6 +18,8 @@ jobs: run: cargo build --verbose --package simd-minimizers - name: Run tests run: cargo test --verbose --package simd-minimizers + - name: Run tests release + run: cargo test --release --verbose --package simd-minimizers test-mac: runs-on: macos-latest @@ -27,3 +29,5 @@ jobs: run: cargo build --verbose --package simd-minimizers - name: Run tests run: cargo test --verbose --package simd-minimizers + - name: Run tests release + run: cargo test --release --verbose --package simd-minimizers diff --git a/README.md b/README.md index 0563335..547bb12 100644 --- a/README.md +++ b/README.md @@ -57,6 +57,14 @@ let minimizer_vals: Vec = canonical_minimizers(k, w) .run(packed_seq.as_slice(), &mut minimizer_positions) .values_u64() .collect(); + +// Compute _syncmers_ positions and values instead: +let mut syncmer_positions = Vec::new(); +// List of (k+w-1)-mer values. +let syncmer_vals: Vec = canonical_syncmers(k, w) + .run(packed_seq.as_slice(), &mut syncmer_positions) + .values_u64() + .collect(); ``` ## Benchmarks diff --git a/src/collect.rs b/src/collect.rs index d7b4287..e906433 100644 --- a/src/collect.rs +++ b/src/collect.rs @@ -34,6 +34,7 @@ pub fn collect_and_dedup_into_scalar(mut it: impl Iterator, out_vec: }); out_vec.truncate(idx + 1); } + pub fn collect_and_dedup_with_index_into_scalar( it: impl Iterator, out_vec: &mut Vec, @@ -111,7 +112,7 @@ pub trait CollectAndDedup: Sized { /// Works by taking 8 elements from each stream, and then transposing the SIMD-matrix before writing out the results. /// /// By default (when `SUPER` is false), the deduplicated input values are written in `out_vec`. - /// When `SUPER` is true, the index of the stream in which the input value first appeared, i.e., the start of its super-k-mer, is additionale written in `idx_vec`. + /// When `SUPER` is true, the index in the stream where the input value first appeared, i.e., the start of its super-k-mer, is additionaly written in `idx_vec`. fn collect_and_dedup_into_impl( self, out_vec: &mut Vec, diff --git a/src/intrinsics/dedup.rs b/src/intrinsics/dedup.rs index 60e7caf..0f9472f 100644 --- a/src/intrinsics/dedup.rs +++ b/src/intrinsics/dedup.rs @@ -3,15 +3,24 @@ use crate::minimizers::SIMD_SKIPPED; use core::mem::transmute; use packed_seq::L; -#[cfg(target_feature = "neon")] -const OFFSET: S = S::new([0x03_02_01_00; 8]); -#[cfg(target_feature = "neon")] -const MASK: S = S::new([0x04_04_04_04; 8]); +/// Append the values of `x` selected by `mask` to `v`. +#[cfg(not(any(target_feature = "avx2", target_feature = "neon")))] +#[inline(always)] +pub unsafe fn append_filtered_vals(vals: S, mask: S, v: &mut [u32], write_idx: &mut usize) { + unsafe { + for i in 0..L { + if mask.as_array()[i] != 0 { + v.as_mut_ptr().add(*write_idx).write(x.as_array()[i]); + *write_idx += 1; + } + } + } +} /// Dedup adjacent `new` values (starting with the last element of `old`). /// If an element is different from the preceding element, append the corresponding element of `vals` to `v[write_idx]`. -#[inline(always)] #[cfg(not(any(target_feature = "avx2", target_feature = "neon")))] +#[inline(always)] pub unsafe fn append_unique_vals( old: S, new: S, @@ -36,8 +45,8 @@ pub unsafe fn append_unique_vals( /// Dedup adjacent `new` values (starting with the last element of `old`). /// If an element is different from the preceding element, append the corresponding element of `vals` to `v[write_idx]` and `vals2` to `v2[write_idx]`. -#[inline(always)] #[cfg(not(any(target_feature = "avx2", target_feature = "neon")))] +#[inline(always)] pub unsafe fn append_unique_vals_2( old: S, new: S, @@ -64,6 +73,51 @@ pub unsafe fn append_unique_vals_2( } } +/// Append the values of `x` where `mask` is *false* to `v`. +#[cfg(target_feature = "avx2")] +#[inline(always)] +pub unsafe fn append_filtered_vals(vals: S, mask: S, v: &mut [u32], write_idx: &mut usize) { + unsafe { + use core::arch::x86_64::*; + let mask = _mm256_movemask_ps(transmute(mask)) as usize; + let numberofnewvalues = L - mask.count_ones() as usize; + let key = UNIQSHUF[mask]; + append_filtered_vals_from_key(vals, key, v, write_idx); + *write_idx += numberofnewvalues; + } +} + +#[cfg(target_feature = "avx2")] +#[inline(always)] +pub unsafe fn append_filtered_vals_2( + vals: S, + vals2: S, + mask: S, + v: &mut [u32], + v2: &mut [u32], + write_idx: &mut usize, +) { + unsafe { + use core::arch::x86_64::*; + let mask = _mm256_movemask_ps(transmute(mask)) as usize; + let numberofnewvalues = L - mask.count_ones() as usize; + let key = UNIQSHUF[mask]; + append_filtered_vals_from_key(vals, key, v, write_idx); + append_filtered_vals_from_key(vals2, key, v2, write_idx); + *write_idx += numberofnewvalues; + } +} + +#[cfg(target_feature = "avx2")] +#[inline(always)] +unsafe fn append_filtered_vals_from_key(vals: S, key: S, v: &mut [u32], write_idx: &mut usize) { + unsafe { + use core::arch::x86_64::*; + let val = _mm256_permutevar8x32_epi32(transmute(vals), transmute(key)); + _mm256_storeu_si256(v.as_mut_ptr().add(*write_idx) as *mut __m256i, val); + } +} + /// Dedup adjacent `new` values (starting with the last element of `old`). /// If an element is different from the preceding element, append the corresponding element of `vals` to `v[write_idx]`. /// @@ -83,23 +137,18 @@ pub unsafe fn append_unique_vals( use core::arch::x86_64::*; let old = transmute(old); - let vals = transmute(vals); let recon = _mm256_blend_epi32(old, transmute(new), 0b01111111); let movebyone_mask = _mm256_set_epi32(6, 5, 4, 3, 2, 1, 0, 7); // rotate shuffle let vec_tmp: S = transmute(_mm256_permutevar8x32_epi32(recon, movebyone_mask)); - let mut m = vec_tmp.cmp_eq(new); + let mut mask = vec_tmp.cmp_eq(new); if SKIP_MAX { // skip everything equal to prev, or equal to MAX. - m |= new.cmp_eq(SIMD_SKIPPED); + mask |= new.cmp_eq(SIMD_SKIPPED); } - let m = _mm256_movemask_ps(transmute(m)) as usize; - let numberofnewvalues = L - m.count_ones() as usize; - let key = transmute(UNIQSHUF[m]); - let val = _mm256_permutevar8x32_epi32(vals, key); - _mm256_storeu_si256(v.as_mut_ptr().add(*write_idx) as *mut __m256i, val); - *write_idx += numberofnewvalues; + + append_filtered_vals(vals, mask, v, write_idx); } } @@ -125,24 +174,89 @@ pub unsafe fn append_unique_vals_2( let old = transmute(old); let new = transmute(new); - let vals = transmute(vals); - let vals2 = transmute(vals2); let recon = _mm256_blend_epi32(old, new, 0b01111111); let movebyone_mask = _mm256_set_epi32(6, 5, 4, 3, 2, 1, 0, 7); // rotate shuffle let vec_tmp = _mm256_permutevar8x32_epi32(recon, movebyone_mask); + let mask = transmute(_mm256_cmpeq_epi32(vec_tmp, new)); - let m = _mm256_movemask_ps(transmute(_mm256_cmpeq_epi32(vec_tmp, new))) as usize; - let numberofnewvalues = L - m.count_ones() as usize; - let key = transmute(UNIQSHUF[m]); - let val = _mm256_permutevar8x32_epi32(vals, key); - _mm256_storeu_si256(v.as_mut_ptr().add(*write_idx) as *mut __m256i, val); - let val2 = _mm256_permutevar8x32_epi32(vals2, key); - _mm256_storeu_si256(v2.as_mut_ptr().add(*write_idx) as *mut __m256i, val2); + append_filtered_vals_2(vals, vals2, mask, v, v2, write_idx); + } +} + +#[cfg(target_feature = "neon")] +const POW1: wide::u32x4 = wide::u32x4::new([1, 2, 4, 8]); +#[cfg(target_feature = "neon")] +const POW2: wide::u32x4 = wide::u32x4::new([16, 32, 64, 128]); +#[cfg(target_feature = "neon")] +const NEW_OLD_MASK: S = S::new([!0, !0, !0, !0, !0, !0, !0, 0]); +#[cfg(target_feature = "neon")] +const I1: core::arch::aarch64::uint8x16_t = + unsafe { transmute([0x1F1E1D1C, 0x03020100, 0x07060504, 0x0B0A0908]) }; +#[cfg(target_feature = "neon")] +const I2: core::arch::aarch64::uint8x16_t = + unsafe { transmute([0x0F0E0D0C, 0x13121110, 0x17161514, 0x1B1A1918]) }; + +/// Append the values of `x` selected by `mask` to `v`. +#[cfg(target_feature = "neon")] +#[inline(always)] +pub unsafe fn append_filtered_vals(vals: S, mask: S, v: &mut [u32], write_idx: &mut usize) { + unsafe { + use core::arch::aarch64::vaddvq_u32; + use wide::u32x4; + + let (m1, m2): (u32x4, u32x4) = transmute(mask); + let m1 = vaddvq_u32(transmute(m1 & POW1)); + let m2 = vaddvq_u32(transmute(m2 & POW2)); + let mask = (m1 | m2) as usize; + let numberofnewvalues = L - mask.count_ones() as usize; + let key = UNIQSHUF_NEON[mask]; + append_filtered_vals_from_key(vals, key, v, write_idx); *write_idx += numberofnewvalues; } } +#[cfg(target_feature = "neon")] +#[inline(always)] +pub unsafe fn append_filtered_vals_2( + vals: S, + vals2: S, + mask: S, + v: &mut [u32], + v2: &mut [u32], + write_idx: &mut usize, +) { + unsafe { + use core::arch::aarch64::vaddvq_u32; + use wide::u32x4; + + let (m1, m2): (u32x4, u32x4) = transmute(mask); + let m1 = vaddvq_u32(transmute(m1 & POW1)); + let m2 = vaddvq_u32(transmute(m2 & POW2)); + let mask = (m1 | m2) as usize; + let numberofnewvalues = L - mask.count_ones() as usize; + let key = UNIQSHUF_NEON[mask]; + append_filtered_vals_from_key(vals, key, v, write_idx); + append_filtered_vals_from_key(vals2, key, v2, write_idx); + *write_idx += numberofnewvalues; + } +} + +#[cfg(target_feature = "neon")] +#[inline(always)] +unsafe fn append_filtered_vals_from_key(vals: S, key: S, v: &mut [u32], write_idx: &mut usize) { + unsafe { + use core::arch::aarch64::{vqtbl2q_u8, vst1_u32_x4}; + + let (i1, i2) = transmute(key); + let t = transmute(vals); + let r1 = vqtbl2q_u8(t, i1); + let r2 = vqtbl2q_u8(t, i2); + let val: S = transmute((r1, r2)); + vst1_u32_x4(v.as_mut_ptr().add(*write_idx), transmute(val)); + } +} + /// Dedup adjacent `new` values (starting with the last element of `old`). /// If an element is different from the preceding element, append the corresponding element of `vals` to `v[write_idx]`. /// @@ -159,61 +273,18 @@ pub unsafe fn append_unique_vals( write_idx: &mut usize, ) { unsafe { - use core::arch::aarch64::{vaddvq_u32, vqtbl2q_u8, vst1_u32_x4}; - use wide::u32x4; + use core::arch::aarch64::vqtbl2q_u8; - let new_old_mask = S::new([ - u32::MAX, - u32::MAX, - u32::MAX, - u32::MAX, - u32::MAX, - u32::MAX, - u32::MAX, - 0, - ]); - let recon = new_old_mask.blend(new, old); - - // let rotate_idx = S::new([7, 0, 1, 2, 3, 4, 5, 6]); - // let idx = rotate_idx * S::splat(0x04_04_04_04) + S::splat(0x03_02_01_00); - // let (i1, i2) = transmute(idx); - let (i1, i2) = transmute([ - 0x1F1E1D1Cu32, - 0x03020100, - 0x07060504, - 0x0B0A0908, - 0x0F0E0D0C, - 0x13121110, - 0x17161514, - 0x1B1A1918, - ]); + let recon = NEW_OLD_MASK.blend(new, old); let t = transmute(recon); - let r1 = vqtbl2q_u8(t, i1); - let r2 = vqtbl2q_u8(t, i2); + let r1 = vqtbl2q_u8(t, I1); + let r2 = vqtbl2q_u8(t, I2); let prec: S = transmute((r1, r2)); - let mut dup = prec.cmp_eq(new); if SKIP_MAX { dup |= new.cmp_eq(SIMD_SKIPPED); } - // emulate movemask - let (d1, d2): (u32x4, u32x4) = transmute(dup); - let pow1 = u32x4::new([1, 2, 4, 8]); - let pow2 = u32x4::new([16, 32, 64, 128]); - let m1 = vaddvq_u32(transmute(d1 & pow1)); - let m2 = vaddvq_u32(transmute(d2 & pow2)); - let m = (m1 | m2) as usize; - - let numberofnewvalues = L - m.count_ones() as usize; - let key = UNIQSHUF[m]; - let idx = key * MASK + OFFSET; - let (i1, i2) = transmute(idx); - let t = transmute(vals); - let r1 = vqtbl2q_u8(t, i1); - let r2 = vqtbl2q_u8(t, i2); - let val: S = transmute((r1, r2)); - vst1_u32_x4(v.as_mut_ptr().add(*write_idx), transmute(val)); - *write_idx += numberofnewvalues; + append_filtered_vals(vals, dup, v, write_idx); } } @@ -235,67 +306,21 @@ pub unsafe fn append_unique_vals_2( write_idx: &mut usize, ) { unsafe { - use core::arch::aarch64::{vaddvq_u32, vqtbl2q_u8, vst1_u32_x4}; - use wide::u32x4; + use core::arch::aarch64::vqtbl2q_u8; - let new_old_mask = S::new([ - u32::MAX, - u32::MAX, - u32::MAX, - u32::MAX, - u32::MAX, - u32::MAX, - u32::MAX, - 0, - ]); - let recon = new_old_mask.blend(new, old); - - // let rotate_idx = S::new([7, 0, 1, 2, 3, 4, 5, 6]); - // let idx = rotate_idx * S::splat(0x04_04_04_04) + S::splat(0x03_02_01_00); - // let (i1, i2) = transmute(idx); - let (i1, i2) = transmute([ - 0x1F1E1D1Cu32, - 0x03020100, - 0x07060504, - 0x0B0A0908, - 0x0F0E0D0C, - 0x13121110, - 0x17161514, - 0x1B1A1918, - ]); + let recon = NEW_OLD_MASK.blend(new, old); let t = transmute(recon); - let r1 = vqtbl2q_u8(t, i1); - let r2 = vqtbl2q_u8(t, i2); + let r1 = vqtbl2q_u8(t, I1); + let r2 = vqtbl2q_u8(t, I2); let prec: S = transmute((r1, r2)); - let dup = prec.cmp_eq(new); - let (d1, d2): (u32x4, u32x4) = transmute(dup); - let pow1 = u32x4::new([1, 2, 4, 8]); - let pow2 = u32x4::new([16, 32, 64, 128]); - let m1 = vaddvq_u32(transmute(d1 & pow1)); - let m2 = vaddvq_u32(transmute(d2 & pow2)); - let m = (m1 | m2) as usize; - - let numberofnewvalues = L - m.count_ones() as usize; - let key = UNIQSHUF[m]; - let idx = key * MASK + OFFSET; - let (i1, i2) = transmute(idx); - let t = transmute(vals); - let r1 = vqtbl2q_u8(t, i1); - let r2 = vqtbl2q_u8(t, i2); - let val: S = transmute((r1, r2)); - vst1_u32_x4(v.as_mut_ptr().add(*write_idx), transmute(val)); - let t = transmute(vals2); - let r1 = vqtbl2q_u8(t, i1); - let r2 = vqtbl2q_u8(t, i2); - let val2: S = transmute((r1, r2)); - vst1_u32_x4(v2.as_mut_ptr().add(*write_idx), transmute(val2)); - *write_idx += numberofnewvalues; + append_filtered_vals_2(vals, vals2, dup, v, v2, write_idx); } } /// For each of 256 masks of which elements are different than their predecessor, /// a shuffle that sends those new elements to the beginning. +#[cfg(target_feature = "avx2")] #[rustfmt::skip] const UNIQSHUF: [S; 256] = unsafe {transmute([ 0,1,2,3,4,5,6,7, @@ -556,6 +581,272 @@ const UNIQSHUF: [S; 256] = unsafe {transmute([ 0,0,0,0,0,0,0,0, ])}; +#[cfg(target_feature = "neon")] +#[allow(clippy::erasing_op, clippy::identity_op)] +#[rustfmt::skip] +const UNIQSHUF_NEON: [wide::u32x8; 256] = unsafe { +const M: u32 = 0x04_04_04_04; +const O: u32 = 0x03_02_01_00; +transmute([ +0*M+O,1*M+O,2*M+O,3*M+O,4*M+O,5*M+O,6*M+O,7*M+O, +1*M+O,2*M+O,3*M+O,4*M+O,5*M+O,6*M+O,7*M+O,0*M+O, +0*M+O,2*M+O,3*M+O,4*M+O,5*M+O,6*M+O,7*M+O,0*M+O, +2*M+O,3*M+O,4*M+O,5*M+O,6*M+O,7*M+O,0*M+O,0*M+O, +0*M+O,1*M+O,3*M+O,4*M+O,5*M+O,6*M+O,7*M+O,0*M+O, +1*M+O,3*M+O,4*M+O,5*M+O,6*M+O,7*M+O,0*M+O,0*M+O, +0*M+O,3*M+O,4*M+O,5*M+O,6*M+O,7*M+O,0*M+O,0*M+O, +3*M+O,4*M+O,5*M+O,6*M+O,7*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,1*M+O,2*M+O,4*M+O,5*M+O,6*M+O,7*M+O,0*M+O, +1*M+O,2*M+O,4*M+O,5*M+O,6*M+O,7*M+O,0*M+O,0*M+O, +0*M+O,2*M+O,4*M+O,5*M+O,6*M+O,7*M+O,0*M+O,0*M+O, +2*M+O,4*M+O,5*M+O,6*M+O,7*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,1*M+O,4*M+O,5*M+O,6*M+O,7*M+O,0*M+O,0*M+O, +1*M+O,4*M+O,5*M+O,6*M+O,7*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,4*M+O,5*M+O,6*M+O,7*M+O,0*M+O,0*M+O,0*M+O, +4*M+O,5*M+O,6*M+O,7*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,1*M+O,2*M+O,3*M+O,5*M+O,6*M+O,7*M+O,0*M+O, +1*M+O,2*M+O,3*M+O,5*M+O,6*M+O,7*M+O,0*M+O,0*M+O, +0*M+O,2*M+O,3*M+O,5*M+O,6*M+O,7*M+O,0*M+O,0*M+O, +2*M+O,3*M+O,5*M+O,6*M+O,7*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,1*M+O,3*M+O,5*M+O,6*M+O,7*M+O,0*M+O,0*M+O, +1*M+O,3*M+O,5*M+O,6*M+O,7*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,3*M+O,5*M+O,6*M+O,7*M+O,0*M+O,0*M+O,0*M+O, +3*M+O,5*M+O,6*M+O,7*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,1*M+O,2*M+O,5*M+O,6*M+O,7*M+O,0*M+O,0*M+O, +1*M+O,2*M+O,5*M+O,6*M+O,7*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,2*M+O,5*M+O,6*M+O,7*M+O,0*M+O,0*M+O,0*M+O, +2*M+O,5*M+O,6*M+O,7*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,1*M+O,5*M+O,6*M+O,7*M+O,0*M+O,0*M+O,0*M+O, +1*M+O,5*M+O,6*M+O,7*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,5*M+O,6*M+O,7*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +5*M+O,6*M+O,7*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,1*M+O,2*M+O,3*M+O,4*M+O,6*M+O,7*M+O,0*M+O, +1*M+O,2*M+O,3*M+O,4*M+O,6*M+O,7*M+O,0*M+O,0*M+O, +0*M+O,2*M+O,3*M+O,4*M+O,6*M+O,7*M+O,0*M+O,0*M+O, +2*M+O,3*M+O,4*M+O,6*M+O,7*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,1*M+O,3*M+O,4*M+O,6*M+O,7*M+O,0*M+O,0*M+O, +1*M+O,3*M+O,4*M+O,6*M+O,7*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,3*M+O,4*M+O,6*M+O,7*M+O,0*M+O,0*M+O,0*M+O, +3*M+O,4*M+O,6*M+O,7*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,1*M+O,2*M+O,4*M+O,6*M+O,7*M+O,0*M+O,0*M+O, +1*M+O,2*M+O,4*M+O,6*M+O,7*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,2*M+O,4*M+O,6*M+O,7*M+O,0*M+O,0*M+O,0*M+O, +2*M+O,4*M+O,6*M+O,7*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,1*M+O,4*M+O,6*M+O,7*M+O,0*M+O,0*M+O,0*M+O, +1*M+O,4*M+O,6*M+O,7*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,4*M+O,6*M+O,7*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +4*M+O,6*M+O,7*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,1*M+O,2*M+O,3*M+O,6*M+O,7*M+O,0*M+O,0*M+O, +1*M+O,2*M+O,3*M+O,6*M+O,7*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,2*M+O,3*M+O,6*M+O,7*M+O,0*M+O,0*M+O,0*M+O, +2*M+O,3*M+O,6*M+O,7*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,1*M+O,3*M+O,6*M+O,7*M+O,0*M+O,0*M+O,0*M+O, +1*M+O,3*M+O,6*M+O,7*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,3*M+O,6*M+O,7*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +3*M+O,6*M+O,7*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,1*M+O,2*M+O,6*M+O,7*M+O,0*M+O,0*M+O,0*M+O, +1*M+O,2*M+O,6*M+O,7*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,2*M+O,6*M+O,7*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +2*M+O,6*M+O,7*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,1*M+O,6*M+O,7*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +1*M+O,6*M+O,7*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,6*M+O,7*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +6*M+O,7*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,1*M+O,2*M+O,3*M+O,4*M+O,5*M+O,7*M+O,0*M+O, +1*M+O,2*M+O,3*M+O,4*M+O,5*M+O,7*M+O,0*M+O,0*M+O, +0*M+O,2*M+O,3*M+O,4*M+O,5*M+O,7*M+O,0*M+O,0*M+O, +2*M+O,3*M+O,4*M+O,5*M+O,7*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,1*M+O,3*M+O,4*M+O,5*M+O,7*M+O,0*M+O,0*M+O, +1*M+O,3*M+O,4*M+O,5*M+O,7*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,3*M+O,4*M+O,5*M+O,7*M+O,0*M+O,0*M+O,0*M+O, +3*M+O,4*M+O,5*M+O,7*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,1*M+O,2*M+O,4*M+O,5*M+O,7*M+O,0*M+O,0*M+O, +1*M+O,2*M+O,4*M+O,5*M+O,7*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,2*M+O,4*M+O,5*M+O,7*M+O,0*M+O,0*M+O,0*M+O, +2*M+O,4*M+O,5*M+O,7*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,1*M+O,4*M+O,5*M+O,7*M+O,0*M+O,0*M+O,0*M+O, +1*M+O,4*M+O,5*M+O,7*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,4*M+O,5*M+O,7*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +4*M+O,5*M+O,7*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,1*M+O,2*M+O,3*M+O,5*M+O,7*M+O,0*M+O,0*M+O, +1*M+O,2*M+O,3*M+O,5*M+O,7*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,2*M+O,3*M+O,5*M+O,7*M+O,0*M+O,0*M+O,0*M+O, +2*M+O,3*M+O,5*M+O,7*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,1*M+O,3*M+O,5*M+O,7*M+O,0*M+O,0*M+O,0*M+O, +1*M+O,3*M+O,5*M+O,7*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,3*M+O,5*M+O,7*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +3*M+O,5*M+O,7*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,1*M+O,2*M+O,5*M+O,7*M+O,0*M+O,0*M+O,0*M+O, +1*M+O,2*M+O,5*M+O,7*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,2*M+O,5*M+O,7*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +2*M+O,5*M+O,7*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,1*M+O,5*M+O,7*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +1*M+O,5*M+O,7*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,5*M+O,7*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +5*M+O,7*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,1*M+O,2*M+O,3*M+O,4*M+O,7*M+O,0*M+O,0*M+O, +1*M+O,2*M+O,3*M+O,4*M+O,7*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,2*M+O,3*M+O,4*M+O,7*M+O,0*M+O,0*M+O,0*M+O, +2*M+O,3*M+O,4*M+O,7*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,1*M+O,3*M+O,4*M+O,7*M+O,0*M+O,0*M+O,0*M+O, +1*M+O,3*M+O,4*M+O,7*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,3*M+O,4*M+O,7*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +3*M+O,4*M+O,7*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,1*M+O,2*M+O,4*M+O,7*M+O,0*M+O,0*M+O,0*M+O, +1*M+O,2*M+O,4*M+O,7*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,2*M+O,4*M+O,7*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +2*M+O,4*M+O,7*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,1*M+O,4*M+O,7*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +1*M+O,4*M+O,7*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,4*M+O,7*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +4*M+O,7*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,1*M+O,2*M+O,3*M+O,7*M+O,0*M+O,0*M+O,0*M+O, +1*M+O,2*M+O,3*M+O,7*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,2*M+O,3*M+O,7*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +2*M+O,3*M+O,7*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,1*M+O,3*M+O,7*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +1*M+O,3*M+O,7*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,3*M+O,7*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +3*M+O,7*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,1*M+O,2*M+O,7*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +1*M+O,2*M+O,7*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,2*M+O,7*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +2*M+O,7*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,1*M+O,7*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +1*M+O,7*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,7*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +7*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,1*M+O,2*M+O,3*M+O,4*M+O,5*M+O,6*M+O,0*M+O, +1*M+O,2*M+O,3*M+O,4*M+O,5*M+O,6*M+O,0*M+O,0*M+O, +0*M+O,2*M+O,3*M+O,4*M+O,5*M+O,6*M+O,0*M+O,0*M+O, +2*M+O,3*M+O,4*M+O,5*M+O,6*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,1*M+O,3*M+O,4*M+O,5*M+O,6*M+O,0*M+O,0*M+O, +1*M+O,3*M+O,4*M+O,5*M+O,6*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,3*M+O,4*M+O,5*M+O,6*M+O,0*M+O,0*M+O,0*M+O, +3*M+O,4*M+O,5*M+O,6*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,1*M+O,2*M+O,4*M+O,5*M+O,6*M+O,0*M+O,0*M+O, +1*M+O,2*M+O,4*M+O,5*M+O,6*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,2*M+O,4*M+O,5*M+O,6*M+O,0*M+O,0*M+O,0*M+O, +2*M+O,4*M+O,5*M+O,6*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,1*M+O,4*M+O,5*M+O,6*M+O,0*M+O,0*M+O,0*M+O, +1*M+O,4*M+O,5*M+O,6*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,4*M+O,5*M+O,6*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +4*M+O,5*M+O,6*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,1*M+O,2*M+O,3*M+O,5*M+O,6*M+O,0*M+O,0*M+O, +1*M+O,2*M+O,3*M+O,5*M+O,6*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,2*M+O,3*M+O,5*M+O,6*M+O,0*M+O,0*M+O,0*M+O, +2*M+O,3*M+O,5*M+O,6*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,1*M+O,3*M+O,5*M+O,6*M+O,0*M+O,0*M+O,0*M+O, +1*M+O,3*M+O,5*M+O,6*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,3*M+O,5*M+O,6*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +3*M+O,5*M+O,6*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,1*M+O,2*M+O,5*M+O,6*M+O,0*M+O,0*M+O,0*M+O, +1*M+O,2*M+O,5*M+O,6*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,2*M+O,5*M+O,6*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +2*M+O,5*M+O,6*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,1*M+O,5*M+O,6*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +1*M+O,5*M+O,6*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,5*M+O,6*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +5*M+O,6*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,1*M+O,2*M+O,3*M+O,4*M+O,6*M+O,0*M+O,0*M+O, +1*M+O,2*M+O,3*M+O,4*M+O,6*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,2*M+O,3*M+O,4*M+O,6*M+O,0*M+O,0*M+O,0*M+O, +2*M+O,3*M+O,4*M+O,6*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,1*M+O,3*M+O,4*M+O,6*M+O,0*M+O,0*M+O,0*M+O, +1*M+O,3*M+O,4*M+O,6*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,3*M+O,4*M+O,6*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +3*M+O,4*M+O,6*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,1*M+O,2*M+O,4*M+O,6*M+O,0*M+O,0*M+O,0*M+O, +1*M+O,2*M+O,4*M+O,6*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,2*M+O,4*M+O,6*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +2*M+O,4*M+O,6*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,1*M+O,4*M+O,6*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +1*M+O,4*M+O,6*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,4*M+O,6*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +4*M+O,6*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,1*M+O,2*M+O,3*M+O,6*M+O,0*M+O,0*M+O,0*M+O, +1*M+O,2*M+O,3*M+O,6*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,2*M+O,3*M+O,6*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +2*M+O,3*M+O,6*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,1*M+O,3*M+O,6*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +1*M+O,3*M+O,6*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,3*M+O,6*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +3*M+O,6*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,1*M+O,2*M+O,6*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +1*M+O,2*M+O,6*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,2*M+O,6*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +2*M+O,6*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,1*M+O,6*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +1*M+O,6*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,6*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +6*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,1*M+O,2*M+O,3*M+O,4*M+O,5*M+O,0*M+O,0*M+O, +1*M+O,2*M+O,3*M+O,4*M+O,5*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,2*M+O,3*M+O,4*M+O,5*M+O,0*M+O,0*M+O,0*M+O, +2*M+O,3*M+O,4*M+O,5*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,1*M+O,3*M+O,4*M+O,5*M+O,0*M+O,0*M+O,0*M+O, +1*M+O,3*M+O,4*M+O,5*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,3*M+O,4*M+O,5*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +3*M+O,4*M+O,5*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,1*M+O,2*M+O,4*M+O,5*M+O,0*M+O,0*M+O,0*M+O, +1*M+O,2*M+O,4*M+O,5*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,2*M+O,4*M+O,5*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +2*M+O,4*M+O,5*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,1*M+O,4*M+O,5*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +1*M+O,4*M+O,5*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,4*M+O,5*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +4*M+O,5*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,1*M+O,2*M+O,3*M+O,5*M+O,0*M+O,0*M+O,0*M+O, +1*M+O,2*M+O,3*M+O,5*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,2*M+O,3*M+O,5*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +2*M+O,3*M+O,5*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,1*M+O,3*M+O,5*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +1*M+O,3*M+O,5*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,3*M+O,5*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +3*M+O,5*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,1*M+O,2*M+O,5*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +1*M+O,2*M+O,5*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,2*M+O,5*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +2*M+O,5*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,1*M+O,5*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +1*M+O,5*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,5*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +5*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,1*M+O,2*M+O,3*M+O,4*M+O,0*M+O,0*M+O,0*M+O, +1*M+O,2*M+O,3*M+O,4*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,2*M+O,3*M+O,4*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +2*M+O,3*M+O,4*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,1*M+O,3*M+O,4*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +1*M+O,3*M+O,4*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,3*M+O,4*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +3*M+O,4*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,1*M+O,2*M+O,4*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +1*M+O,2*M+O,4*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,2*M+O,4*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +2*M+O,4*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,1*M+O,4*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +1*M+O,4*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,4*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +4*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,1*M+O,2*M+O,3*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +1*M+O,2*M+O,3*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,2*M+O,3*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +2*M+O,3*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,1*M+O,3*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +1*M+O,3*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,3*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +3*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,1*M+O,2*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +1*M+O,2*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,2*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +2*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,1*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +1*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +0*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O,0*M+O, +]) +}; + #[cfg(test)] mod test { use super::*; diff --git a/src/lib.rs b/src/lib.rs index ccac38c..a3dc7b9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -3,6 +3,7 @@ //! The main functions are: //! - [`minimizer_positions`]: compute the positions of all minimizers of a sequence. //! - [`canonical_minimizer_positions`]: compute the positions of all _canonical_ minimizers of a sequence. +//! //! Adjacent equal positions are deduplicated, but since the canonical minimizer is _not_ _forward_, a position could appear more than once. //! //! The implementation uses SIMD by splitting each sequence into 8 chunks and processing those in parallel. @@ -11,8 +12,7 @@ //! //! The minimizer of a single window can be found using [`one_minimizer`] and [`one_canonical_minimizer`], but note that these functions are not nearly as efficient. //! -//! The [`scalar`] versions are mostly for testing only, and basically always slower. -//! Only for short sequences with length up to 100 is [`scalar::minimizer_positions_scalar`] faster than the SIMD version. +//! The `scalar` versions are mostly for testing only, and basically always slower. //! //! ## Minimizers //! @@ -44,11 +44,19 @@ //! 3. Compute the 'preferred' strand of the current window as the one with more `TG` characters. This requires `l=w+k-1` to be odd for proper tie-breaking. //! 4. Return either the leftmost or rightmost smallest k-mer, depending on the preferred strand. //! +//! ## Syncmers +//! +//! _Syncmers_ are (in our notation) windows of length `l = w + k - 1` characters where the minimizer k-mer is a prefix or suffix. +//! (Or, in classical notation, `k`-mers with the smallest `s`-mer as prefix or suffix.) +//! These can be computed by using [`fn@syncmers`] or [`canonical_syncmers`] instead of [`minimizers`] or [`canonical_minimizers`]. +//! +//! Note that canonical syncmers are chosen as the minimum of the forward and reverse-complement k-mer representation. +//! //! ## Input types //! -//! This crate depends on [`packed-seq`] to handle generic types of input sequences. +//! This crate depends on [`packed_seq`] to handle generic types of input sequences. //! Most commonly, one should use [`packed_seq::PackedSeqVec`] for packed DNA sequences, but one can also simply wrap a sequence of `ACTGactg` characters in [`packed_seq::AsciiSeqVec`]. -//! Additionally, [`simd-minimizers`] works on general (ASCII) `&[u8]` text. +//! Additionally, `simd_minimizers` works on general (ASCII) `&[u8]` text. //! //! The main function provided by [`packed_seq`] is [`packed_seq::Seq::iter_bp`], which splits the input into 8 chunks and iterates them in parallel using SIMD. //! @@ -153,9 +161,10 @@ mod canonical; pub mod collect; mod minimizers; mod sliding_min; +pub mod syncmers; mod intrinsics { mod dedup; - pub use dedup::{append_unique_vals, append_unique_vals_2}; + pub use dedup::{append_filtered_vals, append_unique_vals, append_unique_vals_2}; } #[cfg(test)] @@ -197,12 +206,18 @@ use seq_hash::KmerHasher; pub use minimizers::one_minimizer; use seq_hash::NtHasher; pub use sliding_min::Cache; +use syncmers::CollectSyncmers; +use syncmers::collect_syncmers_scalar; thread_local! { static CACHE: std::cell::RefCell<(Cache, Vec, Vec)> = std::cell::RefCell::new(Default::default()); } -pub struct Builder<'h, const CANONICAL: bool, H: KmerHasher, SkPos> { +/// `CANONICAL`: true for canonical minimizers. +/// `H`: the kmer hasher to use. +/// `SkPos`: type of super-k-mer position storage. Use `()` to disable super-k-mers. +/// `SYNCMER`: 0 for minimizers, 1 for closed syncmers, 2 for open syncmers. +pub struct Builder<'h, const CANONICAL: bool, H: KmerHasher, SkPos, const SYNCMER: u8> { k: usize, w: usize, hasher: Option<&'h H>, @@ -210,13 +225,46 @@ pub struct Builder<'h, const CANONICAL: bool, H: KmerHasher, SkPos> { } pub struct Output<'o, const CANONICAL: bool, S> { - k: usize, + /// k for minimizers, k+w-1 for syncmers + len: usize, seq: S, min_pos: &'o Vec, } #[must_use] -pub fn minimizers(k: usize, w: usize) -> Builder<'static, false, NtHasher, ()> { +pub const fn minimizers(k: usize, w: usize) -> Builder<'static, false, NtHasher, (), 0> { + Builder { + k, + w, + hasher: None, + sk_pos: (), + } +} + +#[must_use] +pub const fn canonical_minimizers( + k: usize, + w: usize, +) -> Builder<'static, true, NtHasher, (), 0> { + Builder { + k, + w, + hasher: None, + sk_pos: (), + } +} + +/// Return positions/values of *closed* syncmers of length `k+w-1`. +/// +/// These are windows with the minimizer at the start or end of the window. +/// +/// `k` here corresponds to `s` in original syncmer notation: the minimizer length. +/// `k+w-1` corresponds to `k` in original syncmer notation: the length of the extracted string. +#[must_use] +pub const fn closed_syncmers( + k: usize, + w: usize, +) -> Builder<'static, false, NtHasher, (), 1> { Builder { k, w, @@ -226,7 +274,10 @@ pub fn minimizers(k: usize, w: usize) -> Builder<'static, false, NtHasher } #[must_use] -pub fn canonical_minimizers(k: usize, w: usize) -> Builder<'static, true, NtHasher, ()> { +pub const fn canonical_closed_syncmers( + k: usize, + w: usize, +) -> Builder<'static, true, NtHasher, (), 1> { Builder { k, w, @@ -235,9 +286,43 @@ pub fn canonical_minimizers(k: usize, w: usize) -> Builder<'static, true, NtHash } } -impl Builder<'static, CANONICAL, NtHasher, ()> { +/// Return positions/values of *open* syncmers of length `k+w-1`. +/// +/// These are windows with the minimizer in the middle of the window. This requires `w` to be odd. +/// +/// `k` here corresponds to `s` in original syncmer notation: the minimizer length. +/// `k+w-1` corresponds to `k` in original syncmer notation: the length of the extracted string. +#[must_use] +pub const fn open_syncmers(k: usize, w: usize) -> Builder<'static, false, NtHasher, (), 2> { + Builder { + k, + w, + hasher: None, + sk_pos: (), + } +} + +#[must_use] +pub const fn canonical_open_syncmers( + k: usize, + w: usize, +) -> Builder<'static, true, NtHasher, (), 2> { + Builder { + k, + w, + hasher: None, + sk_pos: (), + } +} + +impl + Builder<'static, CANONICAL, NtHasher, (), SYNCMERS> +{ #[must_use] - pub fn hasher<'h, H2: KmerHasher>(&self, hasher: &'h H2) -> Builder<'h, CANONICAL, H2, ()> { + pub const fn hasher<'h, H2: KmerHasher>( + &self, + hasher: &'h H2, + ) -> Builder<'h, CANONICAL, H2, (), SYNCMERS> { Builder { k: self.k, w: self.w, @@ -246,23 +331,25 @@ impl Builder<'static, CANONICAL, NtHasher, ()> } } } -impl<'h, const CANONICAL: bool, H: KmerHasher> Builder<'h, CANONICAL, H, ()> { +impl<'h, const CANONICAL: bool, H: KmerHasher> Builder<'h, CANONICAL, H, (), 0> { #[must_use] - pub fn super_kmers<'o2>( + pub const fn super_kmers<'o2>( &self, sk_pos: &'o2 mut Vec, - ) -> Builder<'h, CANONICAL, H, &'o2 mut Vec> { + ) -> Builder<'h, CANONICAL, H, &'o2 mut Vec, 0> { Builder { k: self.k, w: self.w, hasher: self.hasher, - sk_pos: sk_pos, + sk_pos, } } } /// Without-superkmer version -impl<'h, const CANONICAL: bool, H: KmerHasher> Builder<'h, CANONICAL, H, ()> { +impl<'h, const CANONICAL: bool, H: KmerHasher, const SYNCMERS: u8> + Builder<'h, CANONICAL, H, (), SYNCMERS> +{ pub fn run_scalar_once<'s, SEQ: Seq<'s>>(&self, seq: SEQ) -> Vec { let mut min_pos = vec![]; self.run_impl::(seq, &mut min_pos); @@ -301,30 +388,63 @@ impl<'h, const CANONICAL: bool, H: KmerHasher> Builder<'h, CANONICAL, H, ()> { .hasher .unwrap_or_else(|| default_hasher.as_ref().unwrap()); - CACHE.with_borrow_mut(|cache| match (SIMD, CANONICAL) { - (false, false) => collect_and_dedup_into_scalar( + CACHE.with_borrow_mut(|cache| match (SIMD, CANONICAL, SYNCMERS) { + (false, false, 0) => collect_and_dedup_into_scalar( + minimizers_seq_scalar(seq, hasher, self.w, &mut cache.0), + min_pos, + ), + (false, false, 1) => collect_syncmers_scalar::( + self.w, minimizers_seq_scalar(seq, hasher, self.w, &mut cache.0), min_pos, ), - (false, true) => collect_and_dedup_into_scalar( + (false, false, 2) => collect_syncmers_scalar::( + self.w, + minimizers_seq_scalar(seq, hasher, self.w, &mut cache.0), + min_pos, + ), + (false, true, 0) => collect_and_dedup_into_scalar( + canonical_minimizers_seq_scalar(seq, hasher, self.w, &mut cache.0), + min_pos, + ), + (false, true, 1) => collect_syncmers_scalar::( + self.w, + canonical_minimizers_seq_scalar(seq, hasher, self.w, &mut cache.0), + min_pos, + ), + (false, true, 2) => collect_syncmers_scalar::( + self.w, canonical_minimizers_seq_scalar(seq, hasher, self.w, &mut cache.0), min_pos, ), - (true, false) => minimizers_seq_simd(seq, hasher, self.w, &mut cache.0) + (true, false, 0) => minimizers_seq_simd(seq, hasher, self.w, &mut cache.0) .collect_and_dedup_into::(min_pos), - (true, true) => canonical_minimizers_seq_simd(seq, hasher, self.w, &mut cache.0) + (true, false, 1) => minimizers_seq_simd(seq, hasher, self.w, &mut cache.0) + .collect_syncmers_into::(self.w, min_pos), + (true, false, 2) => minimizers_seq_simd(seq, hasher, self.w, &mut cache.0) + .collect_syncmers_into::(self.w, min_pos), + (true, true, 0) => canonical_minimizers_seq_simd(seq, hasher, self.w, &mut cache.0) .collect_and_dedup_into::(min_pos), + (true, true, 1) => canonical_minimizers_seq_simd(seq, hasher, self.w, &mut cache.0) + .collect_syncmers_into::(self.w, min_pos), + (true, true, 2) => canonical_minimizers_seq_simd(seq, hasher, self.w, &mut cache.0) + .collect_syncmers_into::(self.w, min_pos), + _ => unreachable!("SYNCMERS generic must be 0 (no syncmers), 1 (closed syncmers), or 2 (open syncmers)."), }); Output { - k: self.k, + len: if SYNCMERS != 0 { + self.k + self.w - 1 + } else { + self.k + }, seq, min_pos, } } } -impl<'h, H: KmerHasher> Builder<'h, true, H, ()> { - pub fn run_skip_ambiguous_windows_once<'s, 'o>(&self, nseq: PackedNSeq<'s>) -> Vec { +impl<'h, H: KmerHasher, const SYNCMERS: u8> Builder<'h, true, H, (), SYNCMERS> { + pub fn run_skip_ambiguous_windows_once<'s>(&self, nseq: PackedNSeq<'s>) -> Vec { let mut min_pos = vec![]; self.run_skip_ambiguous_windows(nseq, &mut min_pos); min_pos @@ -347,10 +467,23 @@ impl<'h, H: KmerHasher> Builder<'h, true, H, ()> { let hasher = self .hasher .unwrap_or_else(|| default_hasher.as_ref().unwrap()); - canonical_minimizers_skip_ambiguous_windows(nseq, hasher, self.w, cache) - .collect_and_dedup_into::(min_pos); + match SYNCMERS { + 0 => canonical_minimizers_skip_ambiguous_windows(nseq, hasher, self.w, cache) + .collect_and_dedup_into::(min_pos), + 1 => canonical_minimizers_skip_ambiguous_windows(nseq, hasher, self.w, cache) + .collect_syncmers_into::(self.w, min_pos), + 2 => canonical_minimizers_skip_ambiguous_windows(nseq, hasher, self.w, cache) + .collect_syncmers_into::(self.w, min_pos), + _ => panic!( + "SYNCMERS generic must be 0 (no syncmers), 1 (closed syncmers), or 2 (open syncmers)." + ), + } Output { - k: self.k, + len: if SYNCMERS != 0 { + self.k + self.w - 1 + } else { + self.k + }, seq: nseq.seq, min_pos, } @@ -358,7 +491,11 @@ impl<'h, H: KmerHasher> Builder<'h, true, H, ()> { } /// With-superkmer version -impl<'h, 'o2, const CANONICAL: bool, H: KmerHasher> Builder<'h, CANONICAL, H, &'o2 mut Vec> { +/// +/// (does not work in combination with syncmers) +impl<'h, 'o2, const CANONICAL: bool, H: KmerHasher> + Builder<'h, CANONICAL, H, &'o2 mut Vec, 0> +{ pub fn run_scalar_once<'s, SEQ: Seq<'s>>(self, seq: SEQ) -> Vec { let mut min_pos = vec![]; self.run_scalar(seq, &mut min_pos); @@ -388,7 +525,7 @@ impl<'h, 'o2, const CANONICAL: bool, H: KmerHasher> Builder<'h, CANONICAL, H, &' ), }); Output { - k: self.k, + len: self.k, seq, min_pos, } @@ -427,7 +564,7 @@ impl<'h, 'o2, const CANONICAL: bool, H: KmerHasher> Builder<'h, CANONICAL, H, &' .collect_and_dedup_with_index_into(min_pos, self.sk_pos), }; Output { - k: self.k, + len: self.k, seq, min_pos, } @@ -452,11 +589,11 @@ impl<'s, 'o, const CANONICAL: bool, SEQ: Seq<'s>> Output<'o, CANONICAL, SEQ> { #[inline(always)] move |&pos| { let val = if CANONICAL { - let a = self.seq.read_kmer(self.k, pos as usize); - let b = self.seq.read_revcomp_kmer(self.k, pos as usize); + let a = self.seq.read_kmer(self.len, pos as usize); + let b = self.seq.read_revcomp_kmer(self.len, pos as usize); core::cmp::min(a, b) } else { - self.seq.read_kmer(self.k, pos as usize) + self.seq.read_kmer(self.len, pos as usize) }; (pos, val) }, @@ -469,11 +606,11 @@ impl<'s, 'o, const CANONICAL: bool, SEQ: Seq<'s>> Output<'o, CANONICAL, SEQ> { #[inline(always)] move |&pos| { let val = if CANONICAL { - let a = self.seq.read_kmer_u128(self.k, pos as usize); - let b = self.seq.read_revcomp_kmer_u128(self.k, pos as usize); + let a = self.seq.read_kmer_u128(self.len, pos as usize); + let b = self.seq.read_revcomp_kmer_u128(self.len, pos as usize); core::cmp::min(a, b) } else { - self.seq.read_kmer_u128(self.k, pos as usize) + self.seq.read_kmer_u128(self.len, pos as usize) }; (pos, val) }, diff --git a/src/syncmers.rs b/src/syncmers.rs new file mode 100644 index 0000000..ec3fec3 --- /dev/null +++ b/src/syncmers.rs @@ -0,0 +1,171 @@ +//! Collect (and dedup) SIMD-iterator values into a flat `Vec`. + +#![allow(clippy::uninit_vec)] + +use std::{ + array::{self, from_fn}, + cell::RefCell, +}; + +use crate::{S, minimizers::SKIPPED}; +use packed_seq::{ChunkIt, L, PaddedIt, intrinsics::transpose}; +use wide::u32x8; + +/// Collect positions of all syncmers. +/// `OPEN`: +/// - `false`: closed syncmers +/// - `true`: open syncmers +pub fn collect_syncmers_scalar( + w: usize, + it: impl Iterator, + out_vec: &mut Vec, +) { + if OPEN { + assert!( + w % 2 == 1, + "Open syncmers require odd window size, so that there is a unique middle element." + ); + } + unsafe { out_vec.set_len(out_vec.capacity()) }; + let mut idx = 0; + it.enumerate().for_each(|(i, min_pos)| { + let is_syncmer = if OPEN { + min_pos as usize == i + w / 2 + } else { + min_pos as usize == i || min_pos as usize == i + w - 1 + }; + if is_syncmer { + if idx == out_vec.len() { + out_vec.reserve(1); + unsafe { out_vec.set_len(out_vec.capacity()) }; + } + *unsafe { out_vec.get_unchecked_mut(idx) } = i as u32; + idx += 1; + } + }); + out_vec.truncate(idx); +} + +pub trait CollectSyncmers: Sized { + /// Collect all indices where syncmers start. + /// + /// Automatically skips `SIMD_SKIPPED` values for ambiguous windows for sequences shorter than 2^32-2 or so. + fn collect_syncmers(self, w: usize) -> Vec { + let mut v = vec![]; + self.collect_syncmers_into::(w, &mut v); + v + } + + /// Collect all indices where syncmers start into `out_vec`. + /// + /// Automatically skips `SIMD_SKIPPED` values for ambiguous windows for sequences shorter than 2^32-2 or so. + fn collect_syncmers_into(self, w: usize, out_vec: &mut Vec); +} + +thread_local! { + static CACHE: RefCell<[Vec; 8]> = RefCell::new(array::from_fn(|_| Vec::new())); +} + +impl> CollectSyncmers for PaddedIt { + // mostly copied from `Collect::collect_minimizers_into` + #[inline(always)] + fn collect_syncmers_into(self, w: usize, out_vec: &mut Vec) { + let Self { it, padding } = self; + CACHE.with( + #[inline(always)] + |v| { + let mut v = v.borrow_mut(); + + let mut write_idx = [0; 8]; + + let len = it.len(); + let mut lane_offsets: u32x8 = u32x8::from(from_fn(|i| (i * len) as u32)); + + let mut mask = u32x8::ZERO; + let mut padding_i = 0; + let mut padding_idx = 0; + assert!(padding <= L * len, "padding {padding} <= L {L} * len {len}"); + let mut remaining_padding = padding; + for i in (0..8).rev() { + if remaining_padding >= len { + mask.as_array_mut()[i] = u32::MAX; + remaining_padding -= len; + continue; + } + padding_i = len - remaining_padding; + padding_idx = i; + break; + } + + // FIXME: Is this one slow? + let mut m = [u32x8::ZERO; 8]; + let mut i = 0; + it.for_each( + #[inline(always)] + |x| { + if i == padding_i { + mask.as_array_mut()[padding_idx] = u32::MAX; + } + let x = x | mask; + + // Every non-syncmer minimizer pos is masked out. + let is_syncmer = if OPEN { + x.cmp_eq(lane_offsets + S::splat((w / 2) as u32)) + } else { + x.cmp_eq(lane_offsets) | x.cmp_eq(lane_offsets + S::splat(w as u32 - 1)) + }; + // current window position if syncmer, else u32::MAX + let y = is_syncmer.blend(lane_offsets, u32x8::MAX); + + m[i % 8] = y; + if i % 8 == 7 { + let t = transpose(m); + for j in 0..8 { + let lane = t[j]; + if write_idx[j] + 8 > v[j].len() { + v[j].reserve(8); + unsafe { + let new_len = v[j].capacity(); + v[j].set_len(new_len); + } + } + unsafe { + crate::intrinsics::append_filtered_vals( + lane, + // skip masked out values + lane.cmp_eq(u32x8::MAX), + &mut v[j], + &mut write_idx[j], + ); + } + } + } + i += 1; + lane_offsets += S::ONE; + }, + ); + + for j in 0..8 { + v[j].truncate(write_idx[j]); + } + + // Manually write the unfinished parts of length k=i%8. + let t = transpose(m); + let k = i % 8; + for j in 0..8 { + let lane = t[j].as_array_ref(); + for &x in lane.iter().take(k) { + if x < SKIPPED { + v[j].push(x); + } + } + } + + // Flatten v. + for lane in v.iter() { + out_vec.extend_from_slice(lane.as_slice()); + } + }, + ) + } +} diff --git a/src/test.rs b/src/test.rs index 9a8c7b4..ac17c8f 100644 --- a/src/test.rs +++ b/src/test.rs @@ -50,6 +50,7 @@ fn test_on_inputs(mut f: impl FnMut(usize, usize, &[u8], AsciiSeq, PackedSeq)) { } } +#[cfg(not(debug_assertions))] #[test] fn minimizers_fwd() { fn f(hasher: impl Fn(usize) -> H) { @@ -103,9 +104,9 @@ fn minimizers_canonical() { assert_eq!(scalar_ascii, simd_packed, "k={k}, w={w}, len={len}"); }); } - f(|k| NtHasher::::new(k)); - f(|k| MulHasher::::new(k)); - f(|k| AntiLexHasher::::new(k)); + f(NtHasher::::new); + f(MulHasher::::new); + f(AntiLexHasher::::new); } #[test] @@ -285,9 +286,8 @@ fn _builder<'s>( ) { let hasher = &::new_with_seed(k, 1234); - // warning: unused - // minimizers(k, w); - // minimizers(k, w).hasher(hasher); + let _ = minimizers(k, w); + let _ = minimizers(k, w).hasher(hasher); minimizers(k, w).run(seq, min_pos); canonical_minimizers(k, w).run(seq, min_pos); @@ -314,6 +314,21 @@ fn _builder<'s>( for _ in 0..10 { m.super_kmers(sk_pos).run(seq, min_pos); } + // syncmers + let _ = closed_syncmers(k, w).run(seq, min_pos); + let _ = closed_syncmers(k, w).run_once(seq); + let _ = closed_syncmers(k, w).run_scalar_once(seq); + let _ = canonical_closed_syncmers(k, w) + .run(seq, min_pos) + .pos_and_values_u64() + .collect_vec(); + let _ = open_syncmers(k, w).run(seq, min_pos); + let _ = open_syncmers(k, w).run_once(seq); + let _ = open_syncmers(k, w).run_scalar_once(seq); + let _ = canonical_open_syncmers(k, w) + .run(seq, min_pos) + .pos_and_values_u64() + .collect_vec(); } #[test] @@ -465,3 +480,229 @@ fn skip_ambiguous() { } } } + +#[test] +fn closed_syncmers_scalar() { + // Left-syncmers are selected. + let min_pos = vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9]; + let mut out = vec![]; + collect_syncmers_scalar::(5, min_pos.into_iter(), &mut out); + assert_eq!(out, vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9]); + // Right-syncmers are selected. + let min_pos = vec![4, 5, 6, 7, 8, 9, 10, 11, 12, 13]; + let mut out = vec![]; + collect_syncmers_scalar::(5, min_pos.into_iter(), &mut out); + assert_eq!(out, vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9]); + // In-between are not selected. + let min_pos = vec![1, 2, 5, 5, 5, 8, 7, 10, 10, 10]; + let mut out = vec![]; + collect_syncmers_scalar::(5, min_pos.into_iter(), &mut out); + assert_eq!(out, vec![]); +} + +#[test] +fn open_syncmers_scalar() { + // Middle are selected. + let min_pos = vec![2, 3, 4, 5, 6, 7, 8, 9, 10, 11]; + let mut out = vec![]; + collect_syncmers_scalar::(5, min_pos.into_iter(), &mut out); + assert_eq!(out, vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9]); + // Left/Right/other + let min_pos = vec![0, 1, 6, 7, 7, 6, 6, 8, 11, 10]; + let mut out = vec![]; + collect_syncmers_scalar::(5, min_pos.into_iter(), &mut out); + assert_eq!(out, vec![]); +} + +#[cfg(not(debug_assertions))] +#[test] +fn syncmers_simd_fwd() { + // Closed + test_on_inputs(|k, w, _slice, ascii_seq, packed_seq| { + let hasher = >::new(k); + + let min_pos = ascii_seq + .0 + .windows(w + k - 1) + .enumerate() + .map(|(pos, seq)| (pos + one_minimizer(AsciiSeq(seq), &hasher)) as u32) + .collect::>(); + let mut naive = vec![]; + collect_syncmers_scalar::(w, min_pos.into_iter(), &mut naive); + + let m = closed_syncmers(k, w); + let scalar_ascii = m.run_scalar_once(ascii_seq); + let scalar_packed = m.run_scalar_once(packed_seq); + let simd_ascii = m.run_once(ascii_seq); + let simd_packed = m.run_once(packed_seq); + + let len = ascii_seq.len(); + assert_eq!(naive, scalar_ascii, "k={k}, w={w}, len={len}"); + assert_eq!(naive, scalar_packed, "k={k}, w={w}, len={len}"); + assert_eq!(naive, simd_ascii, "k={k}, w={w}, len={len}"); + assert_eq!(naive, simd_packed, "k={k}, w={w}, len={len}"); + }); + + // Open + test_on_inputs(|k, w, _slice, ascii_seq, packed_seq| { + if w % 2 == 0 { + return; + } + let hasher = >::new(k); + + let min_pos = ascii_seq + .0 + .windows(w + k - 1) + .enumerate() + .map(|(pos, seq)| (pos + one_minimizer(AsciiSeq(seq), &hasher)) as u32) + .collect::>(); + let mut naive = vec![]; + collect_syncmers_scalar::(w, min_pos.into_iter(), &mut naive); + + let m = open_syncmers(k, w); + let scalar_ascii = m.run_scalar_once(ascii_seq); + let scalar_packed = m.run_scalar_once(packed_seq); + let simd_ascii = m.run_once(ascii_seq); + let simd_packed = m.run_once(packed_seq); + + let len = ascii_seq.len(); + assert_eq!(naive, scalar_ascii, "k={k}, w={w}, len={len}"); + assert_eq!(naive, scalar_packed, "k={k}, w={w}, len={len}"); + assert_eq!(naive, simd_ascii, "k={k}, w={w}, len={len}"); + assert_eq!(naive, simd_packed, "k={k}, w={w}, len={len}"); + }); +} + +// Only for closed syncmers, since equal minimizers lead to 0 open syncmers. +#[test] +fn closed_syncmer_values() { + // Test on a sequence with value [3,3,3,3] so that syncmer values are 2^(k+w-1)-1 + let n = 100; + let ascii_seq = vec![b'G'; n]; + let packed_seq = PackedSeqVec::from_ascii(&ascii_seq); + let packed_seq = packed_seq.as_slice(); + for k in 1..10 { + for w in 1..10 { + let pos = &mut vec![]; + let m = closed_syncmers(k, w); + let out = m.run(packed_seq, pos); + + let values = out.values_u64().collect_vec(); + assert_eq!(values.len(), n - (k + w - 1) + 1); + for x in values { + assert_eq!(x, (1u64 << (2 * (k + w - 1))) - 1); + } + } + } +} + +#[test] +fn syncmers_canonical() { + // Closed + test_on_inputs(|k, w, _slice, ascii_seq, packed_seq| { + if (k + w - 1) % 2 == 0 { + return; + } + let m = canonical_closed_syncmers(k, w); + + let scalar_ascii = m.run_scalar_once(ascii_seq); + let scalar_packed = m.run_scalar_once(packed_seq); + let simd_ascii = m.run_once(ascii_seq); + let simd_packed = m.run_once(packed_seq); + + let len = ascii_seq.len(); + assert_eq!(scalar_ascii, scalar_packed, "k={k}, w={w}, len={len}"); + assert_eq!(scalar_ascii, simd_ascii, "k={k}, w={w}, len={len}"); + assert_eq!(scalar_ascii, simd_packed, "k={k}, w={w}, len={len}"); + }); + + // Open + test_on_inputs(|k, w, _slice, ascii_seq, packed_seq| { + if w % 2 == 0 { + return; + } + if (k + w - 1) % 2 == 0 { + return; + } + let m = canonical_open_syncmers(k, w); + + let scalar_ascii = m.run_scalar_once(ascii_seq); + let scalar_packed = m.run_scalar_once(packed_seq); + let simd_ascii = m.run_once(ascii_seq); + let simd_packed = m.run_once(packed_seq); + + let len = ascii_seq.len(); + assert_eq!(scalar_ascii, scalar_packed, "k={k}, w={w}, len={len}"); + assert_eq!(scalar_ascii, simd_ascii, "k={k}, w={w}, len={len}"); + assert_eq!(scalar_ascii, simd_packed, "k={k}, w={w}, len={len}"); + }); +} + +#[test] +fn canonical_syncmers_positions_and_values() { + fn test() { + test_on_inputs(|k, w, _slice, ascii_seq, packed_seq| { + if k + w - 1 > 32 { + return; + } + if w % 2 == 0 { + return; + } + if (k + w - 1) % 2 == 0 { + return; + } + + let packed_seq_rc = packed_seq.to_revcomp(); + let packed_seq_rc = packed_seq_rc.as_slice(); + + let mut fwd_positions = vec![]; + let mut rc_positions = vec![]; + let fwd_values; + let mut rc_values; + + if OPEN { + let m = canonical_open_syncmers(k, w); + fwd_values = m + .run(packed_seq, &mut fwd_positions) + .values_u64() + .collect_vec(); + rc_values = m + .run(packed_seq_rc, &mut rc_positions) + .values_u64() + .collect_vec(); + } else { + let m = canonical_closed_syncmers(k, w); + fwd_values = m + .run(packed_seq, &mut fwd_positions) + .values_u64() + .collect_vec(); + rc_values = m + .run(packed_seq_rc, &mut rc_positions) + .values_u64() + .collect_vec(); + } + + // Check that positions are symmetric. + let len = ascii_seq.len(); + for (&x, &y) in fwd_positions.iter().zip(rc_positions.iter().rev()) { + assert_eq!( + (x + y) as usize, + len - (k + w - 1), + "k={k}, w={w}, fwd={x}, rc={y}" + ); + } + + // Check that values are the same. + rc_values.reverse(); + assert_eq!( + fwd_values, + rc_values, + "k={k}, w={w}, len={}", + ascii_seq.len() + ); + }); + } + + test::(); + test::(); +}