Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 32 additions & 8 deletions cxx/isce3/cuda/focus/Backproject.cu
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,10 @@ using DeviceDEMInterpolator = isce3::cuda::geometry::gpuDEMInterpolator;
using DeviceOrbitView = isce3::cuda::core::OrbitView;
using DeviceRadarGeometry = isce3::cuda::container::RadarGeometry;

using HostWindow = isce3::core::ChebyKernel<float>;
using DeviceWindow= isce3::cuda::core::ChebyKernel<float>;
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
using DeviceWindow= isce3::cuda::core::ChebyKernel<float>;
using DeviceWindow = isce3::cuda::core::ChebyKernel<float>;

using DeviceWindowView = isce3::cuda::core::ChebyKernelView<float>;

template<typename T>
using DeviceLUT2d = isce3::cuda::core::gpuLUT2d<T>;

Expand Down Expand Up @@ -406,7 +410,8 @@ __global__ void sumCoherentBatch(
const double* __restrict__ tau_atm_in,
const int* __restrict__ kstart_in, const int* __restrict__ kstop_in,
const size_t n, const double fc, const Kernel kernel,
const int batch_start, const int batch_stop)
const int batch_start, const int batch_stop,
const std::optional<DeviceWindowView> window = std::nullopt)
{
// thread index (1d grid of 1d blocks)
const auto tid = static_cast<size_t>(blockIdx.x) * blockDim.x + threadIdx.x;
Expand Down Expand Up @@ -434,6 +439,9 @@ __global__ void sumCoherentBatch(
const double dtau = sampling_window.spacing();
const auto samples = static_cast<size_t>(sampling_window.size());

// scale pulse index to unit range
const float kscale = 1.0f / (kstop - kstart - 1);

// loop over lines in batch
thrust::complex<double> batch_sum = {0., 0.};
for (int k = batch_start; k < batch_stop; ++k) {
Expand All @@ -456,7 +464,13 @@ __global__ void sumCoherentBatch(
::sincospi(2. * fc * tau, &sin_phi, &cos_phi);
z *= thrust::complex<double>(cos_phi, sin_phi);

batch_sum += z;
// assume branch prediction works better than an unnecessary multiply
if (window.has_value()) {
const auto x = (k - kstart) * kscale - 0.5f;
batch_sum += window.value()(x) * z;
Comment on lines +469 to +470
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's not obvious to me that the support of the window function is necessarily $[-\frac{1}{2}, \frac{1}{2}]$.

Based on reading the implementation of ChebyKernel, it seems like the support domain is $[-\frac{w}{2}, \frac{w}{2}]$, where $w$ is the width of the kernel. Is that correct?

In that case, should we define kscale like this instead?

-    const float kscale = 1.0f / (kstop - kstart - 1);
+    const float w = window.has_value() ? window.value().width() : 1.0f;
+    const float kscale = w / (kstop - kstart - 1);

And then define x as:

-            const auto x = (k - kstart) * kscale - 0.5f;
+            const auto x = (k - kstart) * kscale - window.value().halfwidth();

(Note that we'd have to add the halfwidth() member function to Kernel.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point. How about making width==1 part of the interface and throwing an exception in the host code if it's not satisfied?

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, I guess that'd be OK with me if that's your preference.

I'm not sure why it'd be preferable to only support kernel width == 1, though. It doesn't seem like it'd be significantly less work to implement. Are you just thinking that the code will be cleaner if we require width == 1?

} else {
batch_sum += z;
}
Comment on lines +467 to +473
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This implementation seems reasonable to me but FYI GPUs don't have branch predictors.

https://chatgpt.com/share/68d426b9-befc-8011-9e95-9c34d82a22b1

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we want to over-engineer this to avoid a conditional inside the loop, I could imagine a possible approach that templates sumCoherentBatch on the window type, kinda like this:

struct NullWindow {};

template<class DeviceWindowView>
__global__ void sumCoherentBatch(..., DeviceWindowView window, ...)
{
    ...
    for (int k = batch_start; k < batch_stop; ++k) {
        ...
        if constexpr (std::is_same_v<DeviceWindowView, NullWindow>) {
            batch_sum += z;
        } else {
            const auto x = (k - kstart) * kscale - 0.5f;
            batch_sum += window.value()(x) * z;
        }
    }
    ...
}

Then, on the host side, you'd have to call it in a conditional like this:

ErrorCode backproject(..., const std::optional<HostWindow> h_window, ...)
{
    ...
    if (h_window.has_value()) {
        const auto d_window = DeviceWindow(h_window);
        const auto dv_window = DeviceWindowView(d_window);
        sumCoherentBatch<<<...>>>(..., dv_window, ...);
    } else {
        sumCoherentBatch<<<...>>>(..., NullWindow{}, ...);
    }
    ...
}

Not sure if that'd cause a noticeable improvement in runtime to be worth the additional code complexity.

}

// add batch sum to total
Expand All @@ -474,6 +488,7 @@ ErrorCode backproject(std::complex<float>* out,
const Kernel& kernel, DryTroposphereModel dry_tropo_model,
const Rdr2GeoBracketParams& rdr2geo_params,
const Geo2RdrBracketParams& geo2rdr_params, int batch,
const std::optional<DeviceWindowView> window,
float* height)
{
// XXX input reference epoch must match output reference epoch
Expand Down Expand Up @@ -681,7 +696,7 @@ ErrorCode backproject(std::complex<float>* out,
img.data().get(), rc.data().get(), pos.data().get(),
vel.data().get(), sampling_window, x.data().get(),
tau_atm.data().get(), kstart.data().get(), kstop.data().get(),
out_grid_size, fc, kernel, k, k + curr_batch);
out_grid_size, fc, kernel, k, k + curr_batch, window);

checkCudaErrors(cudaPeekAtLastError());
checkCudaErrors(cudaStreamSynchronize(cudaStreamDefault));
Expand All @@ -704,48 +719,57 @@ ErrorCode backproject(std::complex<float>* out,
DryTroposphereModel dry_tropo_model,
const Rdr2GeoBracketParams& rdr2geo_params,
const Geo2RdrBracketParams& geo2rdr_params, int batch,
const std::optional<HostWindow> window,
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

General guidance is to pass inputs by const reference unless they're cheap to copy (typically 2-3 words in size).

https://isocpp.github.io/CppCoreGuidelines/CppCoreGuidelines#f16-for-in-parameters-pass-cheaply-copied-types-by-value-and-others-by-reference-to-const

HostWindow stores a std::vector internally so it's potentially expensive to copy. Let's pass it by const reference here.

Suggested change
const std::optional<HostWindow> window,
const std::optional<HostWindow>& window,

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree it'd be nice to avoid a copy. I would've thought you'd want an optional reference std::optional<HostWindow&> window rather than a reference to optional, but apparently that's not allowed. I guess I need to spend some time learning about optional semantics (and other C++ nuances).

float* height)
{
// copy inputs to device
const DeviceRadarGeometry d_out_geometry(out_geometry);
const DeviceRadarGeometry d_in_geometry(in_geometry);
DeviceDEMInterpolator d_dem(dem);
std::optional<DeviceWindow> d_window;
std::optional<DeviceWindowView> dv_window = std::nullopt;
if (window.has_value()) {
// make a device copy held in function scope
d_window.emplace(DeviceWindow(window.value()));
// get a non-owning view
dv_window.emplace(d_window.value());
}
ErrorCode ec;

if (typeid(kernel) == typeid(HostBartlettKernel<float>)) {
const DeviceBartlettKernel<float> d_kernel(
dynamic_cast<const HostBartlettKernel<float>&>(kernel));
ec = backproject(out, d_out_geometry, in, d_in_geometry, d_dem, fc, ds,
d_kernel, dry_tropo_model, rdr2geo_params,
geo2rdr_params, batch, height);
geo2rdr_params, batch, dv_window, height);
}
else if (typeid(kernel) == typeid(HostLinearKernel<float>)) {
const DeviceLinearKernel<float> d_kernel(
dynamic_cast<const HostLinearKernel<float>&>(kernel));
ec = backproject(out, d_out_geometry, in, d_in_geometry, d_dem, fc, ds,
d_kernel, dry_tropo_model, rdr2geo_params,
geo2rdr_params, batch, height);
geo2rdr_params, batch, dv_window, height);
}
else if (typeid(kernel) == typeid(HostKnabKernel<float>)) {
const DeviceKnabKernel<float> d_kernel(
dynamic_cast<const HostKnabKernel<float>&>(kernel));
ec = backproject(out, d_out_geometry, in, d_in_geometry, d_dem, fc, ds,
d_kernel, dry_tropo_model, rdr2geo_params,
geo2rdr_params, batch, height);
geo2rdr_params, batch, dv_window, height);
}
else if (typeid(kernel) == typeid(HostTabulatedKernel<float>)) {
const DeviceTabulatedKernel<float> d_kernel(
dynamic_cast<const HostTabulatedKernel<float>&>(kernel));
ec = backproject(out, d_out_geometry, in, d_in_geometry, d_dem, fc, ds,
d_kernel, dry_tropo_model, rdr2geo_params,
geo2rdr_params, batch, height);
geo2rdr_params, batch, dv_window, height);
}
else if (typeid(kernel) == typeid(HostChebyKernel<float>)) {
const DeviceChebyKernel<float> d_kernel(
dynamic_cast<const HostChebyKernel<float>&>(kernel));
ec = backproject(out, d_out_geometry, in, d_in_geometry, d_dem, fc, ds,
d_kernel, dry_tropo_model, rdr2geo_params,
geo2rdr_params, batch, height);
geo2rdr_params, batch, dv_window, height);
}
else {
throw isce3::except::RuntimeError(ISCE_SRCINFO(), "not implemented");
Expand Down
11 changes: 9 additions & 2 deletions cxx/isce3/cuda/focus/Backproject.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include <isce3/geometry/forward.h>

#include <complex>
#include <optional>

#include <isce3/core/Kernels.h>
#include <isce3/error/ErrorCode.h>
Expand Down Expand Up @@ -35,6 +36,7 @@ using isce3::geometry::detail::Rdr2GeoBracketParams;
* \param[in] rdr2geo_params rdr2geo configuration parameters
* \param[in] geo2rdr_params geo2rdr configuration parameters
* \param[in] batch Number of range-compressed data lines per batch
* \param[in] window Fit to apodization window on interval [-0.5, 0.5]
* \param[out] height Height of each pixel in meters above ellipsoid
*
* \returns Non-zero error code if geometry fails to converge for any pixel,
Expand All @@ -53,7 +55,9 @@ backproject(std::complex<float>* out,
DryTroposphereModel dry_tropo_model = DryTroposphereModel::TSX,
const Rdr2GeoBracketParams& rdr2geo_params = {},
const Geo2RdrBracketParams& geo2rdr_params = {},
int batch = 1024, float* height = nullptr);
int batch = 1024,
const std::optional<isce3::core::ChebyKernel<float>> window = std::nullopt,
Copy link
Copy Markdown
Member

@gmgunter gmgunter Sep 25, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, I'm not seeing the same change in the corresponding .cu file. This overload seems to take a const std::optional<isce3::cuda::core::ChebyKernelView<float>> in that file, which doesn't match the type of the argument here in the header file. Did I miss something?

const std::optional<DeviceWindowView> window,

float* height = nullptr);

/**
* Focus in azimuth via time-domain backprojection
Expand All @@ -70,6 +74,7 @@ backproject(std::complex<float>* out,
* \param[in] rdr2geo_params rdr2geo configuration parameters
* \param[in] geo2rdr_params geo2rdr configuration parameters
* \param[in] batch Number of range-compressed data lines per batch
* \param[in] window Fit to apodization window on interval [-0.5, 0.5]
* \param[out] height Height of each pixel in meters above ellipsoid
*
* \returns Non-zero error code if geometry fails to converge for any pixel,
Expand All @@ -85,6 +90,8 @@ backproject(std::complex<float>* out,
DryTroposphereModel dry_tropo_model = DryTroposphereModel::TSX,
const Rdr2GeoBracketParams& rdr2geo_params = {},
const Geo2RdrBracketParams& geo2rdr_params = {},
int batch = 1024, float* height = nullptr);
int batch = 1024,
const std::optional<isce3::core::ChebyKernel<float>> window = std::nullopt,
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As mentioned in a previous comment, let's pass this by const ref.

Suggested change
const std::optional<isce3::core::ChebyKernel<float>> window = std::nullopt,
const std::optional<isce3::core::ChebyKernel<float>>& window = std::nullopt,

float* height = nullptr);

}}} // namespace isce3::cuda::focus
24 changes: 19 additions & 5 deletions cxx/isce3/focus/Backproject.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include <isce3/geometry/geometry.h>
#include <isce3/geometry/rdr2geo_roots.h>
#include <isce3/geometry/geo2rdr_roots.h>
#include <isce3/math/complexOperations.h>
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, why didn't we need this in the CUDA backprojection code?

#include <limits>
#include <string>
#include <vector>
Expand All @@ -23,6 +24,7 @@ using namespace isce3::geometry;
using isce3::error::ErrorCode;

using isce3::container::RadarGeometry;
using Window = isce3::core::ChebyKernel<float>;

namespace isce3 {
namespace focus {
Expand All @@ -35,8 +37,13 @@ inline std::complex<float> sumCoherent(const std::complex<float>* data,
double fc,
double tau_atm,
const Kernel<float>& kernel,
int kstart, int kstop)
int kstart, int kstop,
const std::optional<Window> window = std::nullopt)
{
using namespace isce3::math::complex_operations;

const float kscale = 1.0f / (kstop - kstart - 1);

// loop over pulses within integration window
std::complex<double> sum(0., 0.);
for (int k = kstart; k < kstop; ++k) {
Expand All @@ -54,9 +61,15 @@ inline std::complex<float> sumCoherent(const std::complex<float>* data,
double phi = 2. * M_PI * fc * tau;
s *= std::complex<double>(std::cos(phi), std::sin(phi));

// worst-case numerical error increases linearly, accumulate using
// double precision to mitigate errors
sum += s;
// assume branch prediction works better than an unnecessary multiply
if (window.has_value()) {
const auto x = (k - kstart) * kscale - 0.5f;
sum += window.value()(x) * s;
} else {
// worst-case numerical error increases linearly, accumulate using
// double precision to mitigate errors
sum += s;
}
}

return std::complex<float>(sum);
Expand All @@ -69,6 +82,7 @@ backproject(std::complex<float>* out, const RadarGeometry& out_geometry,
const Kernel<float>& kernel, DryTroposphereModel dry_tropo_model,
const isce3::geometry::detail::Rdr2GeoBracketParams& r2g_params,
const isce3::geometry::detail::Geo2RdrBracketParams& g2r_params,
const std::optional<Window> window,
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
const std::optional<Window> window,
const std::optional<Window>& window,

float* height)
{
static constexpr double c = isce3::core::speed_of_light;
Expand Down Expand Up @@ -201,7 +215,7 @@ backproject(std::complex<float>* out, const RadarGeometry& out_geometry,
// integrate pulses
out[j * out_geometry.gridWidth() + i] =
sumCoherent(in, sampling_window, pos, vel, x, fc, tau_atm,
kernel, kstart, kstop);
kernel, kstart, kstop, window);
}
}

Expand Down
4 changes: 4 additions & 0 deletions cxx/isce3/focus/Backproject.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,11 @@

#include <isce3/container/forward.h>
#include <isce3/core/forward.h>
#include <isce3/core/Kernels.h>
#include <isce3/geometry/forward.h>

#include <complex>
#include <optional>

#include <isce3/error/ErrorCode.h>
#include <isce3/geometry/detail/Geo2Rdr.h>
Expand All @@ -29,6 +31,7 @@ namespace focus {
* \param[in] dry_tropo_model Dry troposphere path delay model
* \param[in] r2g_params rdr2geo configuration parameters
* \param[in] g2r_params geo2rdr configuration parameters
* \param[in] window Fit to apodization window on interval [-0.5, 0.5]
* \param[out] height Height of each pixel in meters above ellipsoid
*
* \returns Non-zero error code if geometry fails to converge for any pixel,
Expand All @@ -44,6 +47,7 @@ backproject(std::complex<float>* out,
DryTroposphereModel dry_tropo_model = DryTroposphereModel::TSX,
const isce3::geometry::detail::Rdr2GeoBracketParams& r2g_params = {},
const isce3::geometry::detail::Geo2RdrBracketParams& g2r_params = {},
const std::optional<isce3::core::ChebyKernel<float>> window = std::nullopt,
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
const std::optional<isce3::core::ChebyKernel<float>> window = std::nullopt,
const std::optional<isce3::core::ChebyKernel<float>>& window = std::nullopt,

float* height = nullptr);

} // namespace focus
Expand Down
4 changes: 3 additions & 1 deletion python/extensions/pybind_isce3/cuda/focus/Backproject.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ void addbinding_cuda_backproject(py::module& m)
py::dict rdr2geo_params,
py::dict geo2rdr_params,
int batch,
const std::optional<isce3::core::ChebyKernel<float>> window,
std::optional<py::array_t<float, py::array::c_style>> height) {

if (out.ndim() != 2) {
Expand Down Expand Up @@ -93,7 +94,7 @@ void addbinding_cuda_backproject(py::module& m)
py::gil_scoped_release release;
err = backproject(out_data, out_geometry, in_data, in_geometry,
dem, fc, ds, kernel, atm, r2gparams, g2rparams, batch,
height_data);
window, height_data);
}
// TODO bind ErrorCode class. For now return nonzero on failure.
return err != ErrorCode::Success;
Expand All @@ -113,5 +114,6 @@ void addbinding_cuda_backproject(py::module& m)
py::arg("rdr2geo_params") = py::dict(),
py::arg("geo2rdr_params") = py::dict(),
py::arg("batch") = 1024,
py::arg("window") = py::none(),
py::arg("height") = py::none());
}
4 changes: 3 additions & 1 deletion python/extensions/pybind_isce3/focus/Backproject.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ void addbinding_backproject(py::module& m)
const std::string& dry_tropo_model,
py::dict rdr2geo_params,
py::dict geo2rdr_params,
const std::optional<isce3::core::ChebyKernel<float>> window,
std::optional<py::array_t<float, py::array::c_style>> height) {

if (out.ndim() != 2) {
Expand Down Expand Up @@ -143,7 +144,7 @@ void addbinding_backproject(py::module& m)
{
py::gil_scoped_release release;
err = backproject(out_data, out_geometry, in_data, in_geometry,
dem, fc, ds, kernel, atm, r2gparams, g2rparams,
dem, fc, ds, kernel, atm, r2gparams, g2rparams, window,
height_data);
}
// TODO bind ErrorCode class. For now return nonzero on failure.
Expand All @@ -163,5 +164,6 @@ void addbinding_backproject(py::module& m)
py::arg("dry_tropo_model") = "tsx",
py::arg("rdr2geo_params") = py::dict(),
py::arg("geo2rdr_params") = py::dict(),
py::arg("window") = py::none(),
py::arg("height") = py::none());
}
1 change: 1 addition & 0 deletions python/packages/isce3/focus/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@
from .valid_regions import (RadarPoint, RadarBoundingBox,
get_focused_sub_swaths, fill_gaps)
from .calibration_luts import make_los_luts, make_cal_luts
from .cheby_windows import get_window_approximation, WindowKind
from .notch import Notch, FrequencyDomain
70 changes: 70 additions & 0 deletions python/packages/isce3/focus/cheby_windows.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
from enum import Enum, unique
import numpy as np
from isce3.core import KernelF32, ChebyKernelF32


# helper classes for fitting

class CosineWindowF32(KernelF32):
def __init__(self, pedestal_height: float = 1.0):
width = 1.0
KernelF32.__init__(self, width)
self.pedestal_height = pedestal_height
self.a0 = (1.0 + pedestal_height) / 2
self.a1 = 1.0 - self.a0
self.freq = 2 * np.pi

def __call__(self, t: float) -> np.float32:
if not (-0.5 <= t <= 0.5):
return np.float32(0)
return np.float32(self.a0 - self.a1 * np.cos(self.freq * (t + 0.5)))


class KaiserWindowF32(KernelF32):
def __init__(self, beta: float = 0.0):
width = 1.0
KernelF32.__init__(self, width)
self.beta = beta
self.scale = 1.0 / np.i0(beta)

def __call__(self, t: float) -> np.float32:
if not (-0.5 <= t <= 0.5):
return np.float32(0)
x = self.beta * np.sqrt(1 - 4 * t * t)
return np.float32(np.i0(x) * self.scale)


@unique
class WindowKind(str, Enum):
KAISER = "kaiser"
COSINE = "cosine"
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

NBD, but I like to add something like this to str/Enum classes so that they behave a little bit more like Python 3.11 StrEnums.

Suggested change
COSINE = "cosine"
COSINE = "cosine"
def __str__(self) -> str:
return self.value

Without overriding the __str__ method, you'll get something like this when converting the enum to a string:

>>> class WindowKind(str, Enum):
...     KAISER = "kaiser"
...     COSINE = "cosine"
...
>>> str(WindowKind.KAISER)
'WindowKind.KAISER'

With this change, you'll instead get this:

>>> class WindowKind(str, Enum):
...     KAISER = "kaiser"
...     COSINE = "cosine"
...
...     def __str__(self):
...         return self.value
...
>>> str(WindowKind.KAISER)
'kaiser'



def get_window_approximation(kind: WindowKind, shape, n=8) -> ChebyKernelF32:
"""
Generate a Chebyshev polynomial that approximates an apodization window.

Parameters
----------
kind : str or WindowKind
Either "kaiser" or "cosine" window
shape : float
Shape parameter of the window. Pedestal height for raised cosine
window or beta for Kaiser window.
n : int
Number of coefficients in the polynomial.

Returns
-------
window : isce3.core.ChebyKernelF32
An approximation to the desired apodization window scaled to the
domain [-0.5, 0.5].
"""
kind = WindowKind(kind)
if kind == "kaiser":
win = KaiserWindowF32(shape)
elif kind == "cosine":
win = CosineWindowF32(shape)
else:
assert False, "unhandled WindowKind"
return ChebyKernelF32(win, n)
Loading