Merge pull request #26 from asgr/copilot/add-rcpp-dispersion-speedup

asgr · web-flow · commit 25b75d0410f3 · 2026-02-23T21:32:30.000+08:00
Add Rcpp speedup for `disp_stars` convolution in `SFHfunc()` and `SFHburst()`
diff --git a/R/RcppExports.R b/R/RcppExports.R
@@ -1,6 +1,10 @@
 # Generated by using Rcpp::compileAttributes() -> do not edit by hand
 # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
 
+.disp_stars_cpp <- function(wave_log, lum_log, z_disp, grid, weights, res) {
+    .Call(`_ProSpect_disp_stars_cpp`, wave_log, lum_log, z_disp, grid, weights, res)
+}
+
 .colSums_wt_cpp <- function(mat, vec_wt = 1L) {
     .Call(`_ProSpect_colSums_wt_cpp`, mat, vec_wt)
 }
diff --git a/R/SFH.R b/R/SFH.R
@@ -349,20 +349,7 @@ SFHfunc = function(massfunc = massfunc_b5,
 
     z_disp = sqrt(veldisp^2 + vel_LSF^2)/(.c_to_mps/1000) #this will be a vector of length wave_lum
 
-    lum_conv = numeric(length(lum))
-
-    for(i in seq_along(grid)){
-      z_seq_log = log10(1 + grid[i]*z_disp)
-      new_wave_log = wave_lum_log + z_seq_log
-      new_lum = lum_log - z_seq_log
-      new_lum = 10^approx(x=new_wave_log, y=new_lum, xout=wave_lum_log, rule=2, yleft=new_lum[1], yright=new_lum[length(new_lum)])$y
-      new_lum = new_lum*weights[i]
-      #lum_conv = lum_conv + new_lum
-      .vec_add_cpp(lum_conv, new_lum)
-    }
-
-    lum = lum_conv*res
-    rm(lum_conv)
+    lum = .disp_stars_cpp(wave_lum_log, lum_log, z_disp, grid, weights, res)
   }
 
   if (emission) {
@@ -973,7 +960,7 @@ SFHburst = function(burstmass = 1e8,
         vel_LSF = LSF(wave_lum*(1 + z)) #to get LSF dispersion in km/s into z in the oberved frame
       }else if(is.matrix(LSF) | is.data.frame(LSF)){
         vel_LSF = approx(x=log10(LSF[,1]), y=LSF[,2], xout=log10(wave_lum*(1 + z)), rule=2)$y
-      }else if(length(LSF == 1)){
+      }else if(is.numeric(LSF) && length(LSF) == 1){
         vel_LSF = rep(LSF, length(wave_lum))
       }else{
         stop('LSF is in the wrong format!')
@@ -984,20 +971,7 @@ SFHburst = function(burstmass = 1e8,
 
     z_disp = sqrt(veldisp^2 + vel_LSF^2)/(.c_to_mps/1000) #this will be a vector of length wave_lum
 
-    lum_conv = numeric(length(lum))
-
-    for(i in seq_along(grid)){
-      z_seq = grid[i]*z_disp
-      new_wave_log = wave_lum_log + log10(1 + z_seq)
-      new_lum = lum_log - log10(1 + z_seq)
-      new_lum = 10^approx(x=new_wave_log, y=new_lum, xout=wave_lum_log, rule=2, yleft=new_lum[1], yright=new_lum[length(new_lum)])$y
-      new_lum = new_lum*weights[i]
-      #lum_conv = lum_conv + new_lum
-      .vec_add_cpp(lum_conv, new_lum)
-    }
-
-    lum = lum_conv*res
-    rm(lum_conv)
+    lum = .disp_stars_cpp(wave_lum_log, lum_log, z_disp, grid, weights, res)
   }
 
   if (emission & burstage < 1e7) {
diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp
@@ -10,6 +10,22 @@ Rcpp::Rostream<true>&  Rcpp::Rcout = Rcpp::Rcpp_cout_get();
 Rcpp::Rostream<false>& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get();
 #endif
 
+// disp_stars_cpp
+NumericVector disp_stars_cpp(NumericVector wave_log, NumericVector lum_log, NumericVector z_disp, NumericVector grid, NumericVector weights, double res);
+RcppExport SEXP _ProSpect_disp_stars_cpp(SEXP wave_logSEXP, SEXP lum_logSEXP, SEXP z_dispSEXP, SEXP gridSEXP, SEXP weightsSEXP, SEXP resSEXP) {
+BEGIN_RCPP
+    Rcpp::RObject rcpp_result_gen;
+    Rcpp::RNGScope rcpp_rngScope_gen;
+    Rcpp::traits::input_parameter< NumericVector >::type wave_log(wave_logSEXP);
+    Rcpp::traits::input_parameter< NumericVector >::type lum_log(lum_logSEXP);
+    Rcpp::traits::input_parameter< NumericVector >::type z_disp(z_dispSEXP);
+    Rcpp::traits::input_parameter< NumericVector >::type grid(gridSEXP);
+    Rcpp::traits::input_parameter< NumericVector >::type weights(weightsSEXP);
+    Rcpp::traits::input_parameter< double >::type res(resSEXP);
+    rcpp_result_gen = Rcpp::wrap(disp_stars_cpp(wave_log, lum_log, z_disp, grid, weights, res));
+    return rcpp_result_gen;
+END_RCPP
+}
 // colSums_wt_cpp
 NumericVector colSums_wt_cpp(NumericMatrix mat, NumericVector vec_wt);
 RcppExport SEXP _ProSpect_colSums_wt_cpp(SEXP matSEXP, SEXP vec_wtSEXP) {
@@ -101,6 +117,7 @@ END_RCPP
 }
 
 static const R_CallMethodDef CallEntries[] = {
+    {"_ProSpect_disp_stars_cpp", (DL_FUNC) &_ProSpect_disp_stars_cpp, 6},
     {"_ProSpect_colSums_wt_cpp", (DL_FUNC) &_ProSpect_colSums_wt_cpp, 2},
     {"_ProSpect_mat_vec_mult_col", (DL_FUNC) &_ProSpect_mat_vec_mult_col, 3},
     {"_ProSpect_mat_vec_mult_row", (DL_FUNC) &_ProSpect_mat_vec_mult_row, 3},
diff --git a/src/disp_stars.cpp b/src/disp_stars.cpp
@@ -0,0 +1,96 @@
+#include <Rcpp.h>
+#include <cmath>
+using namespace Rcpp;
+
+// disp_stars_cpp: Rcpp implementation of the stellar dispersion convolution.
+//
+// Equivalent to the R loop in SFHfunc()/SFHburst() when disp_stars=TRUE:
+//   for(i in seq_along(grid)){
+//     z_seq_log = log10(1 + grid[i]*z_disp)
+//     new_wave_log = wave_log + z_seq_log
+//     new_lum = lum_log - z_seq_log
+//     new_lum = 10^approx(x=new_wave_log, y=new_lum, xout=wave_log,
+//                         rule=2, yleft=new_lum[1], yright=new_lum[n])$y
+//     lum_conv = lum_conv + weights[i] * new_lum
+//   }
+//   return(lum_conv * res)
+//
+// Inputs:
+//   wave_log  - log10 wavelengths (monotone increasing, length n)
+//   lum_log   - log10 luminosity (length n)
+//   z_disp    - wavelength-dependent velocity dispersion in z units (length n)
+//   grid      - Gaussian quadrature grid points, e.g. seq(-range, range, by=res)
+//   weights   - dnorm(grid)
+//   res       - grid spacing (multiplied into final result)
+//
+// [[Rcpp::export(".disp_stars_cpp")]]
+NumericVector disp_stars_cpp(NumericVector wave_log,
+                              NumericVector lum_log,
+                              NumericVector z_disp,
+                              NumericVector grid,
+                              NumericVector weights,
+                              double res) {
+  int n  = wave_log.size();
+  int ng = grid.size();
+
+  if (lum_log.size() != n)  stop("lum_log must have same length as wave_log");
+  if (z_disp.size()  != n)  stop("z_disp must have same length as wave_log");
+  if (weights.size() != ng) stop("weights must have same length as grid");
+
+  NumericVector out(n, 0.0);
+  // Reusable buffers for the shifted wavelength / luminosity grids
+  NumericVector x_src(n);
+  NumericVector y_src(n);
+
+  for (int gi = 0; gi < ng; gi++) {
+    double g = grid[gi];
+    double w = weights[gi];
+
+    // Build shifted grids: x_src = wave_log + log10(1 + g*z_disp)
+    //                      y_src = lum_log  - log10(1 + g*z_disp)
+    for (int j = 0; j < n; j++) {
+      double val = 1.0 + g * z_disp[j];
+      if (val <= 0.0) {
+        stop("1 + grid[i]*z_disp <= 0: cannot take log10. "
+             "Reduce veldisp or the grid range.");
+      }
+      double lv = std::log10(val);
+      x_src[j] = wave_log[j] + lv;
+      y_src[j] = lum_log[j]  - lv;
+    }
+
+    // Linear interpolation of y_src(x_src) onto wave_log.
+    // Uses rule=2: extrapolate with boundary values y_src[0] / y_src[n-1].
+    // Two-pointer sweep works because wave_log is monotone increasing and
+    // x_src remains monotone for the small shifts typical of veldisp.
+    // lo is intentionally reset to 0 for each grid point gi: since wave_log
+    // is scanned left-to-right each iteration, lo must start fresh.
+    int lo = 0;
+    for (int j = 0; j < n; j++) {
+      double xq = wave_log[j];
+      double y_interp;
+
+      if (xq <= x_src[0]) {
+        y_interp = y_src[0];
+      } else if (xq >= x_src[n - 1]) {
+        y_interp = y_src[n - 1];
+      } else {
+        // Advance lo so that x_src[lo] <= xq < x_src[lo+1]
+        while (lo < n - 2 && x_src[lo + 1] <= xq) {
+          lo++;
+        }
+        double t = (xq - x_src[lo]) / (x_src[lo + 1] - x_src[lo]);
+        y_interp = y_src[lo] + t * (y_src[lo + 1] - y_src[lo]);
+      }
+
+      out[j] += w * std::pow(10.0, y_interp);
+    }
+  }
+
+  // Scale by the grid spacing
+  for (int j = 0; j < n; j++) {
+    out[j] *= res;
+  }
+
+  return out;
+}
diff --git a/tests/test_disp_stars_cpp.R b/tests/test_disp_stars_cpp.R
@@ -0,0 +1,70 @@
+# Regression test: verify that .disp_stars_cpp() matches the original R loop
+# to within a tight numerical tolerance (relative error < 1e-10).
+#
+# Run via: Rscript tests/test_disp_stars_cpp.R
+# (after the package has been installed or loaded with devtools::load_all())
+
+library(ProSpect)
+
+# R reference implementation of the disp_stars loop (identical to pre-Rcpp SFH.R)
+disp_stars_R <- function(wave_lum_log, lum_log, z_disp, grid, weights, res) {
+  lum_conv <- numeric(length(lum_log))
+  for (i in seq_along(grid)) {
+    z_seq_log  <- log10(1 + grid[i] * z_disp)
+    new_wave   <- wave_lum_log + z_seq_log
+    new_lum    <- lum_log - z_seq_log
+    new_lum    <- 10^approx(x = new_wave, y = new_lum,
+                            xout  = wave_lum_log,
+                            rule  = 2,
+                            yleft = new_lum[1],
+                            yright = new_lum[length(new_lum)])$y
+    lum_conv <- lum_conv + weights[i] * new_lum
+  }
+  lum_conv * res
+}
+
+set.seed(42)
+n    <- 500
+wave <- seq(3000, 10000, length.out = n)
+wave_log <- log10(wave)
+lum      <- runif(n, 1e3, 1e8)
+lum_log  <- log10(lum)
+
+# Several test cases with different dispersion and grid settings
+test_cases <- list(
+  list(veldisp = 100,  range = 3, res = 0.1),
+  list(veldisp = 250,  range = 3, res = 0.1),
+  list(veldisp = 50,   range = 5, res = 0.05),
+  list(veldisp = 300,  range = 3, res = 0.2)
+)
+
+c_to_mps <- 299792458  # speed of light in m/s (matches ProSpect:::.c_to_mps)
+
+all_passed <- TRUE
+
+for (tc in test_cases) {
+  veldisp <- tc$veldisp
+  range   <- tc$range
+  res     <- tc$res
+
+  grid    <- seq(-range, range, by = res)
+  weights <- dnorm(grid)
+  z_disp  <- rep(veldisp / (c_to_mps / 1000), n)  # constant z_disp for simplicity
+
+  ref <- disp_stars_R(wave_log, lum_log, z_disp, grid, weights, res)
+  got <- .disp_stars_cpp(wave_log, lum_log, z_disp, grid, weights, res)
+
+  rel_err <- max(abs(ref - got) / pmax(abs(ref), 1e-300))
+
+  if (rel_err > 1e-10) {
+    cat(sprintf("FAIL: veldisp=%g range=%g res=%g => max rel error = %e\n",
+                veldisp, range, res, rel_err))
+    all_passed <- FALSE
+  } else {
+    cat(sprintf("PASS: veldisp=%g range=%g res=%g => max rel error = %e\n",
+                veldisp, range, res, rel_err))
+  }
+}
+
+if (!all_passed) stop("One or more disp_stars_cpp regression tests failed!")
+cat("All disp_stars_cpp regression tests passed.\n")