diff --git a/benches/simd_math_remaining_baseline/inverse_hyperbolic.rs b/benches/simd_math_remaining_baseline/inverse_hyperbolic.rs
index c8d3734..2ea9d50 100644
--- a/benches/simd_math_remaining_baseline/inverse_hyperbolic.rs
+++ b/benches/simd_math_remaining_baseline/inverse_hyperbolic.rs
@@ -1,5 +1,5 @@
 use criterion::Criterion;
-use simdeez::math::SimdMathF32InverseHyperbolic;
+use simdeez::math::{SimdMathF32InverseHyperbolic, SimdMathF64InverseHyperbolic};
 use simdeez::{prelude::*, simd_unsafe_generate_all};
 
 use crate::shared::{self, INPUT_LEN};
@@ -19,6 +19,21 @@ fn scalar_atanh_sum(input: &[f32]) -> f32 {
     input.iter().copied().map(f32::atanh).sum()
 }
 
+#[inline(never)]
+fn scalar_asinh_sum_f64(input: &[f64]) -> f64 {
+    input.iter().copied().map(f64::asinh).sum()
+}
+
+#[inline(never)]
+fn scalar_acosh_sum_f64(input: &[f64]) -> f64 {
+    input.iter().copied().map(f64::acosh).sum()
+}
+
+#[inline(never)]
+fn scalar_atanh_sum_f64(input: &[f64]) -> f64 {
+    input.iter().copied().map(f64::atanh).sum()
+}
+
 simd_unsafe_generate_all!(
     fn simdeez_asinh_sum(input: &[f32]) -> f32 {
         shared::simdeez_unary_sum_impl::<S>(input, |v| v.asinh_u35())
@@ -37,11 +52,35 @@ simd_unsafe_generate_all!(
     }
 );
 
+simd_unsafe_generate_all!(
+    fn simdeez_asinh_sum_f64(input: &[f64]) -> f64 {
+        shared::simdeez_unary_sum_impl_f64::<S>(input, |v| v.asinh_u35())
+    }
+);
+
+simd_unsafe_generate_all!(
+    fn simdeez_acosh_sum_f64(input: &[f64]) -> f64 {
+        shared::simdeez_unary_sum_impl_f64::<S>(input, |v| v.acosh_u35())
+    }
+);
+
+simd_unsafe_generate_all!(
+    fn simdeez_atanh_sum_f64(input: &[f64]) -> f64 {
+        shared::simdeez_unary_sum_impl_f64::<S>(input, |v| v.atanh_u35())
+    }
+);
+
 pub fn register(c: &mut Criterion) {
     let asinh_inputs = shared::make_unary_inputs(INPUT_LEN, 0xDEADB001, -16_384.0..16_384.0);
     let acosh_inputs = shared::make_positive_inputs(INPUT_LEN, 0xDEADB002, 1.0, 16_384.0);
     let atanh_inputs = shared::make_unary_inputs(INPUT_LEN, 0xDEADB003, -0.999_999..0.999_999);
 
+    let asinh_inputs_f64 =
+        shared::make_unary_inputs_f64(INPUT_LEN, 0xDEADB101, -16_384.0..16_384.0);
+    let acosh_inputs_f64 = shared::make_positive_inputs_f64(INPUT_LEN, 0xDEADB102, 1.0, 16_384.0);
+    let atanh_inputs_f64 =
+        shared::make_unary_inputs_f64(INPUT_LEN, 0xDEADB103, -0.999_999_999_999..0.999_999_999_999);
+
     shared::bench_unary(
         c,
         "simd_math_baseline/f32/asinh_u35",
@@ -65,4 +104,28 @@ pub fn register(c: &mut Criterion) {
         scalar_atanh_sum,
         simdeez_atanh_sum,
     );
+
+    shared::bench_unary_f64(
+        c,
+        "simd_math_baseline/f64/asinh_u35",
+        &asinh_inputs_f64,
+        scalar_asinh_sum_f64,
+        simdeez_asinh_sum_f64,
+    );
+
+    shared::bench_unary_f64(
+        c,
+        "simd_math_baseline/f64/acosh_u35",
+        &acosh_inputs_f64,
+        scalar_acosh_sum_f64,
+        simdeez_acosh_sum_f64,
+    );
+
+    shared::bench_unary_f64(
+        c,
+        "simd_math_baseline/f64/atanh_u35",
+        &atanh_inputs_f64,
+        scalar_atanh_sum_f64,
+        simdeez_atanh_sum_f64,
+    );
 }
diff --git a/benches/simd_math_remaining_baseline/shared.rs b/benches/simd_math_remaining_baseline/shared.rs
index de3e87e..14ef80a 100644
--- a/benches/simd_math_remaining_baseline/shared.rs
+++ b/benches/simd_math_remaining_baseline/shared.rs
@@ -16,6 +16,16 @@ pub fn make_positive_inputs(len: usize, seed: u64, min: f32, max: f32) -> Vec<f3
     (0..len).map(|_| rng.gen_range(min..max)).collect()
 }
 
+pub fn make_unary_inputs_f64(len: usize, seed: u64, range: core::ops::Range<f64>) -> Vec<f64> {
+    let mut rng = ChaCha8Rng::seed_from_u64(seed);
+    (0..len).map(|_| rng.gen_range(range.clone())).collect()
+}
+
+pub fn make_positive_inputs_f64(len: usize, seed: u64, min: f64, max: f64) -> Vec<f64> {
+    let mut rng = ChaCha8Rng::seed_from_u64(seed);
+    (0..len).map(|_| rng.gen_range(min..max)).collect()
+}
+
 pub fn make_binary_inputs(
     len: usize,
     seed: u64,
@@ -40,6 +50,18 @@ pub fn simdeez_unary_sum_impl<S: Simd>(input: &[f32], op: impl Fn(S::Vf32) -> S:
     sum
 }
 
+#[inline(always)]
+pub fn simdeez_unary_sum_impl_f64<S: Simd>(input: &[f64], op: impl Fn(S::Vf64) -> S::Vf64) -> f64 {
+    let mut sum = 0.0f64;
+    let mut i = 0;
+    while i + S::Vf64::WIDTH <= input.len() {
+        let v = S::Vf64::load_from_slice(&input[i..]);
+        sum += op(v).horizontal_add();
+        i += S::Vf64::WIDTH;
+    }
+    sum
+}
+
 #[inline(always)]
 pub fn simdeez_binary_sum_impl<S: Simd>(
     a: &[f32],
@@ -75,6 +97,24 @@ pub fn bench_unary(
     group.finish();
 }
 
+pub fn bench_unary_f64(
+    c: &mut Criterion,
+    name: &str,
+    input: &[f64],
+    scalar: fn(&[f64]) -> f64,
+    simd: fn(&[f64]) -> f64,
+) {
+    let mut group = c.benchmark_group(name);
+    group.throughput(Throughput::Elements(input.len() as u64));
+    group.bench_function("scalar-native", |b| {
+        b.iter(|| black_box(scalar(black_box(input))))
+    });
+    group.bench_function("simdeez-runtime", |b| {
+        b.iter(|| black_box(simd(black_box(input))))
+    });
+    group.finish();
+}
+
 pub fn bench_binary(
     c: &mut Criterion,
     name: &str,
diff --git a/src/math/f64/inverse_hyperbolic.rs b/src/math/f64/inverse_hyperbolic.rs
index 503988c..1ddde48 100644
--- a/src/math/f64/inverse_hyperbolic.rs
+++ b/src/math/f64/inverse_hyperbolic.rs
@@ -1,12 +1,73 @@
-use crate::math::{map, scalar};
-use crate::SimdFloat64;
+use crate::math::{f64, scalar};
+use crate::{Simd, SimdBaseIo, SimdBaseOps, SimdConsts, SimdFloat64};
+
+type SimdI64<V> = <<V as SimdConsts>::Engine as Simd>::Vi64;
+
+#[inline(always)]
+fn any_lane_nonzero<V>(mask: SimdI64<V>) -> bool
+where
+    V: SimdFloat64,
+{
+    unsafe {
+        let lanes = mask.as_array();
+        for lane in 0..V::WIDTH {
+            if lanes[lane] != 0 {
+                return true;
+            }
+        }
+    }
+
+    false
+}
+
+#[inline(always)]
+fn patch_exceptional_lanes<V>(
+    input: V,
+    output: V,
+    exceptional_mask: SimdI64<V>,
+    scalar_fallback: fn(f64) -> f64,
+) -> V
+where
+    V: SimdFloat64,
+{
+    if !any_lane_nonzero::<V>(exceptional_mask) {
+        return output;
+    }
+
+    unsafe {
+        let input_lanes = input.as_array();
+        let mask_lanes = exceptional_mask.as_array();
+        let mut output_lanes = output.as_array();
+
+        for lane in 0..V::WIDTH {
+            if mask_lanes[lane] != 0 {
+                output_lanes[lane] = scalar_fallback(input_lanes[lane]);
+            }
+        }
+
+        V::load_from_ptr_unaligned(&output_lanes as *const V::ArrayRepresentation as *const f64)
+    }
+}
 
 #[inline(always)]
 pub(crate) fn asinh_u35<V>(input: V) -> V
 where
     V: SimdFloat64,
 {
-    map::unary_f64(input, scalar::asinh_u35_f64)
+    let finite_mask = input.cmp_eq(input).bitcast_i64();
+    let abs_x = input.abs();
+    let tiny_mask = abs_x.cmp_lt(V::set1(1.0)).bitcast_i64();
+    let large_mask = abs_x.cmp_gt(V::set1(1.0e150)).bitcast_i64();
+    let zero_mask = input.cmp_eq(V::zeroes()).bitcast_i64();
+    let exceptional_mask =
+        finite_mask.cmp_eq(SimdI64::<V>::zeroes()) | tiny_mask | large_mask | zero_mask;
+
+    let radicand = (abs_x * abs_x) + V::set1(1.0);
+    let magnitude = f64::ln_u35(abs_x + radicand.sqrt());
+    let negative_mask = input.cmp_lt(V::zeroes());
+    let fast = negative_mask.blendv(magnitude, -magnitude);
+
+    patch_exceptional_lanes(input, fast, exceptional_mask, scalar::asinh_u35_f64)
 }
 
 #[inline(always)]
@@ -14,7 +75,15 @@ pub(crate) fn acosh_u35<V>(input: V) -> V
 where
     V: SimdFloat64,
 {
-    map::unary_f64(input, scalar::acosh_u35_f64)
+    let finite_mask = input.cmp_eq(input).bitcast_i64();
+    let in_domain_mask = input.cmp_gte(V::set1(1.0)).bitcast_i64();
+    let fast_mask = finite_mask & in_domain_mask;
+    let exceptional_mask = fast_mask.cmp_eq(SimdI64::<V>::zeroes());
+
+    let root_term = ((input - V::set1(1.0)).sqrt()) * ((input + V::set1(1.0)).sqrt());
+    let fast = f64::ln_u35(input + root_term);
+
+    patch_exceptional_lanes(input, fast, exceptional_mask, scalar::acosh_u35_f64)
 }
 
 #[inline(always)]
@@ -22,5 +91,19 @@ pub(crate) fn atanh_u35<V>(input: V) -> V
 where
     V: SimdFloat64,
 {
-    map::unary_f64(input, scalar::atanh_u35_f64)
+    let finite_mask = input.cmp_eq(input).bitcast_i64();
+    let abs_x = input.abs();
+    let strict_domain_mask = abs_x.cmp_lt(V::set1(1.0)).bitcast_i64();
+    let non_zero_mask = input.cmp_neq(V::zeroes()).bitcast_i64();
+    let stable_range_mask = abs_x.cmp_lte(V::set1(0.99)).bitcast_i64();
+    let away_from_zero_mask = abs_x.cmp_gte(V::set1(0.9)).bitcast_i64();
+    let fast_mask =
+        finite_mask & strict_domain_mask & non_zero_mask & stable_range_mask & away_from_zero_mask;
+    let exceptional_mask = fast_mask.cmp_eq(SimdI64::<V>::zeroes());
+
+    let one = V::set1(1.0);
+    let ratio = (one + input) / (one - input);
+    let fast = f64::ln_u35(ratio) * V::set1(0.5);
+
+    patch_exceptional_lanes(input, fast, exceptional_mask, scalar::atanh_u35_f64)
 }
diff --git a/src/tests/simd_math_targeted_edges/inverse_hyperbolic.rs b/src/tests/simd_math_targeted_edges/inverse_hyperbolic.rs
index b5390c4..c07c793 100644
--- a/src/tests/simd_math_targeted_edges/inverse_hyperbolic.rs
+++ b/src/tests/simd_math_targeted_edges/inverse_hyperbolic.rs
@@ -1,5 +1,5 @@
 use super::*;
-use crate::math::SimdMathF32InverseHyperbolic;
+use crate::math::{SimdMathF32InverseHyperbolic, SimdMathF64InverseHyperbolic};
 
 fn run_f32_inverse_hyperbolic_domain_edges<S: Simd>() {
     let acosh_inputs = [
@@ -135,3 +135,138 @@ simd_math_targeted_all_backends!(
     f32_inverse_hyperbolic_mixed_lanes,
     run_f32_inverse_hyperbolic_mixed_lanes
 );
+
+fn run_f64_inverse_hyperbolic_domain_edges<S: Simd>() {
+    let acosh_inputs = [
+        0.0f64,
+        0.5,
+        1.0 - f64::EPSILON,
+        1.0,
+        1.0 + f64::EPSILON,
+        1.000_000_000_001,
+        2.0,
+        10.0,
+        f64::INFINITY,
+        f64::NAN,
+    ];
+    check_targeted_unary_f64::<S>(
+        "acosh_u35",
+        &acosh_inputs,
+        contracts::ACOSH_U35_F64_MAX_ULP,
+        |v| v.acosh_u35(),
+        f64::acosh,
+    );
+
+    let atanh_inputs = [
+        -1.0f64,
+        -1.0 + f64::EPSILON,
+        -0.999_999_999_999,
+        -0.5,
+        -0.0,
+        0.0,
+        0.5,
+        0.999_999_999_999,
+        1.0 - f64::EPSILON,
+        1.0,
+        1.0 + f64::EPSILON,
+        f64::NAN,
+    ];
+    check_targeted_unary_f64::<S>(
+        "atanh_u35",
+        &atanh_inputs,
+        contracts::ATANH_U35_F64_MAX_ULP,
+        |v| v.atanh_u35(),
+        f64::atanh,
+    );
+
+    let asinh_inputs = [
+        f64::NEG_INFINITY,
+        -1.0e300,
+        -1000.0,
+        -1.0,
+        -f64::MIN_POSITIVE,
+        -0.0,
+        0.0,
+        f64::MIN_POSITIVE,
+        1.0,
+        1000.0,
+        1.0e300,
+        f64::INFINITY,
+        f64::NAN,
+    ];
+    check_targeted_unary_f64::<S>(
+        "asinh_u35",
+        &asinh_inputs,
+        contracts::ASINH_U35_F64_MAX_ULP,
+        |v| v.asinh_u35(),
+        f64::asinh,
+    );
+}
+
+fn run_f64_inverse_hyperbolic_mixed_lanes<S: Simd>() {
+    let mut lanes = vec![0.0f64; S::Vf64::WIDTH];
+    let pattern = [
+        -0.75f64,
+        -0.0,
+        0.0,
+        0.75,
+        0.999_999_999_999,
+        -0.999_999_999_999,
+        1.0,
+        -1.0,
+        1.0 + f64::EPSILON,
+        f64::NAN,
+        f64::INFINITY,
+        f64::NEG_INFINITY,
+        1.0,
+        2.0,
+        10.0,
+        0.5,
+    ];
+    for (i, lane) in lanes.iter_mut().enumerate() {
+        *lane = pattern[i % pattern.len()];
+    }
+
+    let input = S::Vf64::load_from_slice(&lanes);
+    let asinh = input.asinh_u35();
+    let acosh = input.acosh_u35();
+    let atanh = input.atanh_u35();
+
+    for (lane, &x) in lanes.iter().enumerate() {
+        assert_f64_contract(
+            "asinh_u35",
+            x,
+            asinh[lane],
+            x.asinh(),
+            contracts::ASINH_U35_F64_MAX_ULP,
+        )
+        .unwrap_or_else(|e| panic!("lane {lane}: {e}"));
+
+        assert_f64_contract(
+            "acosh_u35",
+            x,
+            acosh[lane],
+            x.acosh(),
+            contracts::ACOSH_U35_F64_MAX_ULP,
+        )
+        .unwrap_or_else(|e| panic!("lane {lane}: {e}"));
+
+        assert_f64_contract(
+            "atanh_u35",
+            x,
+            atanh[lane],
+            x.atanh(),
+            contracts::ATANH_U35_F64_MAX_ULP,
+        )
+        .unwrap_or_else(|e| panic!("lane {lane}: {e}"));
+    }
+}
+
+simd_math_targeted_all_backends!(
+    f64_inverse_hyperbolic_domain_edges,
+    run_f64_inverse_hyperbolic_domain_edges
+);
+simd_math_targeted_all_backends!(
+    f64_inverse_hyperbolic_mixed_lanes,
+    run_f64_inverse_hyperbolic_mixed_lanes
+);
diff --git a/src/tests/simd_math_targeted_edges/mod.rs b/src/tests/simd_math_targeted_edges/mod.rs
index babe6b3..fe2f727 100644
--- a/src/tests/simd_math_targeted_edges/mod.rs
+++ b/src/tests/simd_math_targeted_edges/mod.rs
@@ -13,7 +13,7 @@ use crate::engines::wasm32::Wasm;
 use crate::engines::{avx2::Avx2, sse2::Sse2, sse41::Sse41};
 
 use crate::math::SimdMathF32Core;
-use crate::math::{contracts, SimdMathF32};
+use crate::math::{contracts, SimdMathF32, SimdMathF64};
 use crate::{Simd, SimdBaseIo, SimdConsts};
 
 fn assert_f32_contract(
@@ -89,6 +89,79 @@ fn check_targeted_unary_f32<S: Simd>(
     }
 }
 
+fn assert_f64_contract(
+    fn_name: &str,
+    input: f64,
+    actual: f64,
+    expected: f64,
+    max_ulp: u64,
+) -> Result<(), String> {
+    if expected.is_nan() {
+        if actual.is_nan() {
+            return Ok(());
+        }
+        return Err(format!("{fn_name}({input:?}) expected NaN, got {actual:?}"));
+    }
+
+    if expected.is_infinite() {
+        if actual.to_bits() == expected.to_bits() {
+            return Ok(());
+        }
+        return Err(format!(
+            "{fn_name}({input:?}) expected {:?}, got {:?}",
+            expected, actual
+        ));
+    }
+
+    if expected == 0.0 {
+        if actual.to_bits() == expected.to_bits() {
+            return Ok(());
+        }
+        return Err(format!(
+            "{fn_name}({input:?}) expected signed zero bits {:016x}, got {:016x}",
+            expected.to_bits(),
+            actual.to_bits()
+        ));
+    }
+
+    if actual.is_nan() || actual.is_infinite() {
+        return Err(format!(
+            "{fn_name}({input:?}) expected finite {expected:?}, got {actual:?}"
+        ));
+    }
+
+    let ulp = ulp_distance_f64(actual, expected)
+        .ok_or_else(|| format!("{fn_name}({input:?}) failed to compute f64 ULP distance"))?;
+    if ulp > max_ulp {
+        return Err(format!(
+            "{fn_name}({input:?}) ULP distance {ulp} exceeds max {max_ulp} (actual={actual:?}, expected={expected:?})"
+        ));
+    }
+
+    Ok(())
+}
+
+fn check_targeted_unary_f64<S: Simd>(
+    fn_name: &str,
+    inputs: &[f64],
+    max_ulp: u64,
+    simd_fn: impl Fn(S::Vf64) -> S::Vf64,
+    scalar_ref: impl Fn(f64) -> f64,
+) {
+    for chunk in inputs.chunks(S::Vf64::WIDTH) {
+        let input = S::Vf64::load_from_slice(chunk);
+        let output = simd_fn(input);
+
+        for (lane, &x) in chunk.iter().enumerate() {
+            let actual = output[lane];
+            let expected = scalar_ref(x);
+            if let Err(err) = assert_f64_contract(fn_name, x, actual, expected, max_ulp) {
+                panic!("{err}");
+            }
+        }
+    }
+}
+
 #[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
 fn run_log2_u35_vector_apply_avx2(input: &[f32], output: &mut [f32]) {
     assert_eq!(input.len(), output.len());