From 34ece86ddf11e01e3dc324a4438a648ebbc71bb6 Mon Sep 17 00:00:00 2001 From: Brian Hawkins <1729052+bhawkins@users.noreply.github.com> Date: Thu, 4 Dec 2025 18:16:40 -0800 Subject: [PATCH 01/13] Use consistent X and Y indexing. --- cxx/isce3/cuda/matchtemplate/pycuampcor/cuDeramp.cu | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cxx/isce3/cuda/matchtemplate/pycuampcor/cuDeramp.cu b/cxx/isce3/cuda/matchtemplate/pycuampcor/cuDeramp.cu index cd46e7893..25be2e59f 100644 --- a/cxx/isce3/cuda/matchtemplate/pycuampcor/cuDeramp.cu +++ b/cxx/isce3/cuda/matchtemplate/pycuampcor/cuDeramp.cu @@ -105,8 +105,8 @@ __global__ void cuDerampMethod1_kernel(float2 *images, const int imageNX, int co for (int i = tid; i < imageSize; i += nthreads) { - pixelIdxX = i%imageNY; - pixelIdxY = i/imageNY; + pixelIdxX = i / imageNY; + pixelIdxY = i % imageNY; float phase = pixelIdxX*phaseX + pixelIdxY*phaseY; float2 phase_factor = make_float2(cosf(phase), sinf(phase)); image[i] *= phase_factor; From d66149ca7a12835f3d642e787c0c2fc2b7ed8dcd Mon Sep 17 00:00:00 2001 From: Brian Hawkins <1729052+bhawkins@users.noreply.github.com> Date: Thu, 4 Dec 2025 19:53:23 -0800 Subject: [PATCH 02/13] Ditto in CPU version. --- cxx/isce3/matchtemplate/pycuampcor/cuDeramp.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cxx/isce3/matchtemplate/pycuampcor/cuDeramp.cpp b/cxx/isce3/matchtemplate/pycuampcor/cuDeramp.cpp index a1d015395..fa8b2482a 100644 --- a/cxx/isce3/matchtemplate/pycuampcor/cuDeramp.cpp +++ b/cxx/isce3/matchtemplate/pycuampcor/cuDeramp.cpp @@ -52,8 +52,8 @@ static void cuDerampMethod1_kernel(float2 *images, const int imageNX, int const float phaseY = atan2f(phaseDiffY.y, phaseDiffY.x); for (int i = 0; i < imageSize; i++) { - const int pixelIdxX = i%imageNY; - const int pixelIdxY = i/imageNY; + const int pixelIdxX = i / imageNY; + const int pixelIdxY = i % imageNY; float phase = pixelIdxX*phaseX + pixelIdxY*phaseY; float2 phase_factor = make_float2(cosf(phase), sinf(phase)); image[i] *= phase_factor; From 70ef962a8567acf52dda3753376b5c6c1c1c0d7d Mon Sep 17 00:00:00 2001 From: Brian Hawkins <1729052+bhawkins@users.noreply.github.com> Date: Fri, 5 Dec 2025 13:32:05 -0800 Subject: [PATCH 03/13] update golden data --- .../ovs128-rho0.8/golden/correlation_peak | Bin 624 -> 624 bytes .../ovs128-rho0.8/golden/dense_offsets | Bin 1248 -> 1248 bytes 2 files changed, 0 insertions(+), 0 deletions(-) diff --git a/tests/data/ampcor/accuracy-testdata/ovs128-rho0.8/golden/correlation_peak b/tests/data/ampcor/accuracy-testdata/ovs128-rho0.8/golden/correlation_peak index f71dbd19c66aa4d5119b3d522220f88f11594853..2d13833a8d0b5a7c09a3958e194580c245fc9d4c 100644 GIT binary patch literal 624 zcmWN_dn^=i0KoBHnvlpNne~UQx{0PTS>yY&T9KNXn)imxA8n0Z^GCBX{Si%Vnbl@7 zkA}F+*m+(O?%WYa?zlVe$Mv{8dT=34eg6DhFzaaOwiQkeqd0g^;}>YezeCT=u4Ymb z6x6Lxv0gTUo8K03vs+8RF+E9UGj_W-X^-9`BvLC;d&N!UY#+iy_JzpNX6h^IQJq&& z?k*?7p^1ynErd%G9PjI) zQ>Wp3@G#e=28c*g5>TS0A<0Cgmy1|=YAbwFS4gy$3zNjiql973MGa#~86YdiMf9CC z@Nr|5z_Df$-_BwkU?N@8L2_d)eTVl8YuhEAOMQGV)j>vTrp9^J+)sAl0&*tNp0OACh9){w%ej}UAVcpWLf(|HsL`?zr{HRf zn(SLsI2Rjn%dO+U+#u6#bKI5vzdbsnrR|7A3xquw=Z24gOPR~8MfDQvx=KjC9?y6E z*vm(GAzLDApcV586PuSO@J&&2driShk59<+mSJ`tn4~Hhob1MqBh)w z=c^W?6>WIN4Y7DkwYc1qP;mo@U&u?GJmoxE0U V5;^>ns`6>74r;J=9V1!V#Xo!IF`obc literal 624 zcmV~$dn^=i0KoAhkJ@5p9_zK43Yn=!rs(@ClUCEB(IiHh*h(vH(Xged^+(fMjb-N5 zwMjbCYV)S!+~f{-PCI9GoX0UbHP50xA5WV_($ZC=l*~}jxW-aMHO*FY3{ST*9ql5&PPq|&hxufP(q$u3_{aUkdq8w;~saCQ;DUs?;VX*GpK zdIGwdk(B9qJfbB0q(n&l){w@!3#WVQ`1?AFO1YUxnX8bSTS@-pAY4n#d|;f~hidM$ z$XTpbl9M~e>qkyvrMCv@b0xC&Vcd+J9KSe^By*V?SFA*8`2_t(jI`g^kY4X5{2!Vq z@X!$#-Av>!HId!pbWK^OAA#VO}yHx#VuRU;G0oOcN&nyuM=utMwsn5 zYRWN24R9-B5Q(aX@Yoqb@|HN9IEH=UI!RV5fz=Exb#p34l8gklYDkuU<;58jUos4=q)iZ1 ztEWh(r?+?#Lu@|{6$1pGlT+fYVfONO9ymA&$1Nk(NeW_Y+t@zUhjXTsk5_xBowZO? zxy5I1E!#|e$eySO%{NftzKZqHX6jY#_~iFe@U*GoW>hEb(ev2* SOt2iQBUm=diMCPt8R0*0vn`JR diff --git a/tests/data/ampcor/accuracy-testdata/ovs128-rho0.8/golden/dense_offsets b/tests/data/ampcor/accuracy-testdata/ovs128-rho0.8/golden/dense_offsets index 93a7ba97f558029e629b40257a948919f178383d..9b1a3d0ad363943e0cadcd30fa2c1f10dde6ce40 100644 GIT binary patch literal 1248 zcmY+Bfo;P;3gzSe{zTU_oy>vRu)dIsgqj*eY; zXx}qB_hyeS8qU!?KWO&&_3D}2;H~Nb@7wjN)pNjkF3!&SQk>%h-8oNXCTD8X=YAii z^PBU|lQ}*)%Guv$yyAnFS9|E$5i%#YcO}BRO9p$-(cn4Q-@7Mkw4&KR==6BM`jp7d z{mYqlUbJ%>nA+s1?DtD+et0~3Eip5=^neF=r#<_QybJW3QzE09-3&ina$a|;uEQb QXYj3+(CZF7Y5l?a1IDTD`v3p{ literal 1248 zcmYk3(T#&Z3`G5TWl#u6ltL+#kP=b|ciIxJq`Wt*BP?0AXU1bTsWQ&1YLK;&zf~`) z^SR1aEnp>2s6CjYe_(Gg^$WYEI(K&P_SHvxpfl_bwCW7`Ay3K~YV~yvMsFNw^$zA9 zxLCZ=$`#f4YMAYp=GL2UP3}CfuxNX~nzLTyoaK|=r}wrs--0aPGO=UUc+Wk3)FN7k`l@&#WdpV)Ww9!Km%4)`(smsaFC{{seQ+|IJx#%_?r} z?ue1cs5LZ;d%yhR}Xpn_e1BLTUJt?m^rRH^NtOSS#N9ZQTFN8SSq{j zmkIAqJQ`#4Hyh5^{r+W}x0sE-pF8e@>utI>sQoSRM Date: Thu, 11 Dec 2025 01:36:28 -0800 Subject: [PATCH 04/13] deramp improvement 1) fix the pixelIdx error (thanks to @bhawkins) 2) use double precision for phase correction 3) add derampAxis parameter for options to deramp along certain direction 4) move cuDeramp from oversampler to chunk processor; making the code more readable --- .../matchtemplate/pycuampcor/cuAmpcorChunk.cu | 35 +++- .../pycuampcor/cuAmpcorParameter.cu | 3 +- .../pycuampcor/cuAmpcorParameter.h | 1 + .../matchtemplate/pycuampcor/cuAmpcorUtil.h | 4 +- .../cuda/matchtemplate/pycuampcor/cuDeramp.cu | 149 ++++++++++-------- .../matchtemplate/pycuampcor/cuOverSampler.cu | 26 ++- .../matchtemplate/pycuampcor/cuOverSampler.h | 2 +- .../pycuampcor/cuAmpcorChunk.cpp | 34 +++- .../pycuampcor/cuAmpcorParameter.cpp | 1 + .../pycuampcor/cuAmpcorParameter.h | 1 + .../matchtemplate/pycuampcor/cuAmpcorUtil.h | 4 +- .../matchtemplate/pycuampcor/cuDeramp.cpp | 61 ++++--- .../pycuampcor/cuOverSampler.cpp | 24 ++- .../matchtemplate/pycuampcor/cuOverSampler.h | 2 +- .../cuda/matchtemplate/pycuampcor.cpp | 1 + .../pybind_isce3/matchtemplate/pycuampcor.cpp | 1 + .../packages/nisar/workflows/dense_offsets.py | 16 +- 17 files changed, 224 insertions(+), 141 deletions(-) diff --git a/cxx/isce3/cuda/matchtemplate/pycuampcor/cuAmpcorChunk.cu b/cxx/isce3/cuda/matchtemplate/pycuampcor/cuAmpcorChunk.cu index 8f35fa075..5825ad37d 100644 --- a/cxx/isce3/cuda/matchtemplate/pycuampcor/cuAmpcorChunk.cu +++ b/cxx/isce3/cuda/matchtemplate/pycuampcor/cuAmpcorChunk.cu @@ -76,7 +76,7 @@ void cuAmpcorChunk::run(int idxDown_, int idxAcross_) cuArraysSumCorr(r_corrBatchRawZoomIn, i_corrBatchZoomInValid, r_corrBatchSum, i_corrBatchValidCount, stream); #ifdef CUAMPCOR_DEBUG - r_maxval->outputToFile("r_maxval", stream); + r_maxval->outputToFile("r_corrBatchRawMaxVal", stream); r_corrBatchRawZoomIn->outputToFile("r_corrBatchRawStatZoomIn", stream); i_corrBatchZoomInValid->outputToFile("i_corrBatchZoomInValid", stream); r_corrBatchSum->outputToFile("r_corrBatchSum", stream); @@ -107,9 +107,17 @@ void cuAmpcorChunk::run(int idxDown_, int idxAcross_) maxLocShift->outputToFile("i_maxLocShift", stream); #endif + // deramp reference + cuDeramp(param->derampMethod, c_referenceBatchRaw, param->derampAxis, stream); + +#ifdef CUAMPCOR_DEBUG + // dump the deramped reference image(s) + c_referenceBatchRaw->outputToFile("c_referenceBatchRawDeramped", stream); +#endif + // oversample reference - // (deramping included in oversampler) - referenceBatchOverSampler->execute(c_referenceBatchRaw, c_referenceBatchOverSampled, param->derampMethod); + referenceBatchOverSampler->execute(c_referenceBatchRaw, c_referenceBatchOverSampled); + // take amplitudes cuArraysAbs(c_referenceBatchOverSampled, r_referenceBatchOverSampled, stream); @@ -127,15 +135,28 @@ void cuAmpcorChunk::run(int idxDown_, int idxAcross_) r_referenceBatchOverSampled->outputToFile("r_referenceBatchOverSampledSubMean",stream); #endif - // extract secondary and oversample + // extract secondary images around the max location with a smaller search range cuArraysCopyExtract(c_secondaryBatchRaw, c_secondaryBatchZoomIn, offsetInit, stream); - secondaryBatchOverSampler->execute(c_secondaryBatchZoomIn, c_secondaryBatchOverSampled, param->derampMethod); + +#ifdef CUAMPCOR_DEBUG + // dump the extracted raw secondary image + c_secondaryBatchZoomIn->outputToFile("c_secondaryBatchZoomInRaw", stream); +#endif + + // deramp secondary + cuDeramp(param->derampMethod, c_secondaryBatchZoomIn, param->derampAxis, stream); + +#ifdef CUAMPCOR_DEBUG + // dump the deramped secondary image(s) + c_secondaryBatchZoomIn->outputToFile("c_secondaryBatchZoomInDeramped", stream); +#endif + + // oversample secondary + secondaryBatchOverSampler->execute(c_secondaryBatchZoomIn, c_secondaryBatchOverSampled); // take amplitudes cuArraysAbs(c_secondaryBatchOverSampled, r_secondaryBatchOverSampled, stream); #ifdef CUAMPCOR_DEBUG - // dump the extracted raw secondary image - c_secondaryBatchZoomIn->outputToFile("c_secondaryBatchZoomIn", stream); // dump the oversampled secondary image(s) c_secondaryBatchOverSampled->outputToFile("c_secondaryBatchOverSampled", stream); r_secondaryBatchOverSampled->outputToFile("r_secondaryBatchOverSampled", stream); diff --git a/cxx/isce3/cuda/matchtemplate/pycuampcor/cuAmpcorParameter.cu b/cxx/isce3/cuda/matchtemplate/pycuampcor/cuAmpcorParameter.cu index 9acc6b294..6e058df0c 100644 --- a/cxx/isce3/cuda/matchtemplate/pycuampcor/cuAmpcorParameter.cu +++ b/cxx/isce3/cuda/matchtemplate/pycuampcor/cuAmpcorParameter.cu @@ -22,7 +22,8 @@ cuAmpcorParameter::cuAmpcorParameter() algorithm = 0; //0 freq; 1 time deviceID = 0; nStreams = 1; - derampMethod = 1; + derampMethod = 1; // average deramp + derampAxis = 2; // both directions windowSizeWidthRaw = 64; windowSizeHeightRaw = 64; diff --git a/cxx/isce3/cuda/matchtemplate/pycuampcor/cuAmpcorParameter.h b/cxx/isce3/cuda/matchtemplate/pycuampcor/cuAmpcorParameter.h index 357f36ba9..6c5a01b49 100644 --- a/cxx/isce3/cuda/matchtemplate/pycuampcor/cuAmpcorParameter.h +++ b/cxx/isce3/cuda/matchtemplate/pycuampcor/cuAmpcorParameter.h @@ -44,6 +44,7 @@ class cuAmpcorParameter{ int deviceID; ///< Targeted GPU device ID: use -1 to auto select int nStreams; ///< Number of streams to asynchonize data transfers and compute kernels int derampMethod; ///< Method for deramping 0=None, 1=average + int derampAxis; ///< Axis for deramping 0=down (azimuth) 1=across (range), 2=both axes // chip or window size for raw data int windowSizeHeightRaw; ///< Template window height (original size) diff --git a/cxx/isce3/cuda/matchtemplate/pycuampcor/cuAmpcorUtil.h b/cxx/isce3/cuda/matchtemplate/pycuampcor/cuAmpcorUtil.h index d56a01c67..59be4cbc0 100644 --- a/cxx/isce3/cuda/matchtemplate/pycuampcor/cuAmpcorUtil.h +++ b/cxx/isce3/cuda/matchtemplate/pycuampcor/cuAmpcorUtil.h @@ -46,8 +46,8 @@ void cuArraysC2R(cuArrays *image1, cuArrays *image2, cudaStream_t void cuArraysAbs(cuArrays *image1, cuArrays *image2, cudaStream_t stream); // cuDeramp.cu: deramping phase -void cuDeramp(int method, cuArrays *images, cudaStream_t stream); -void cuDerampMethod1(cuArrays *images, cudaStream_t stream); +void cuDeramp(const int method, cuArrays *images, const int axis, cudaStream_t stream); +void cuLinearDeramp(cuArrays *images, const int axis, cudaStream_t stream); // cuArraysPadding.cu: various utilities for oversampling padding void cuArraysPadding(cuArrays *image1, cuArrays *image2, cudaStream_t stream); diff --git a/cxx/isce3/cuda/matchtemplate/pycuampcor/cuDeramp.cu b/cxx/isce3/cuda/matchtemplate/pycuampcor/cuDeramp.cu index 25be2e59f..786b9d4dd 100644 --- a/cxx/isce3/cuda/matchtemplate/pycuampcor/cuDeramp.cu +++ b/cxx/isce3/cuda/matchtemplate/pycuampcor/cuDeramp.cu @@ -11,9 +11,9 @@ * Method 0 or else: skip deramping * */ - -#include "cuArrays.h" -#include "float2.h" + +#include "cuArrays.h" +#include "float2.h" #include #include "cudaError.h" #include "cudaUtil.h" @@ -27,14 +27,14 @@ // cuda does not have a good support on volatile vector struct, e.g. float2 // have to use regular float type for shared memory (volatile) data // the following methods are defined to operate float2/complex objects through float -inline static __device__ void copyToShared(volatile float *s, const int i, const float2 x, const int block) +inline static __device__ void copyToShared(volatile float *s, const int i, const float2 x, const int block) { s[i] = x.x; s[i+block] = x.y; } -inline static __device__ void copyFromShared(float2 &x, volatile float *s, const int i, const int block) +inline static __device__ void copyFromShared(float2 &x, volatile float *s, const int i, const int block) { x.x = s[i]; x.y = s[i+block]; } -inline static __device__ void addInShared(volatile float *s, const int i, const int j, const int block) +inline static __device__ void addInShared(volatile float *s, const int i, const int j, const int block) { s[i] += s[i+j]; s[i+block] += s[i+j+block];} @@ -45,72 +45,87 @@ __device__ void complexSumReduceBlock(float2& sum, volatile float *shmem) const int tid = threadIdx.x; copyToShared(shmem, tid, sum, nthreads); __syncthreads(); - + if (nthreads >=1024) { if (tid < 512) { addInShared(shmem, tid, 512, nthreads); } __syncthreads(); } if (nthreads >= 512) { if (tid < 256) { addInShared(shmem, tid, 256, nthreads); } __syncthreads(); } if (nthreads >= 256) { if (tid < 128) { addInShared(shmem, tid, 128, nthreads); } __syncthreads(); } if (nthreads >= 128) { if (tid < 64) { addInShared(shmem, tid, 64, nthreads); } __syncthreads(); } if (tid < 32) - { + { addInShared(shmem, tid, 32, nthreads); addInShared(shmem, tid, 16, nthreads); addInShared(shmem, tid, 8, nthreads); addInShared(shmem, tid, 4, nthreads); addInShared(shmem, tid, 2, nthreads); - addInShared(shmem, tid, 1, nthreads); + addInShared(shmem, tid, 1, nthreads); } __syncthreads(); copyFromShared(sum, shmem, 0, nthreads); } -// cuda kernel for cuDerampMethod1 +// cuda kernel for cuLinearDeramp with Method 1 template -__global__ void cuDerampMethod1_kernel(float2 *images, const int imageNX, int const imageNY, - const int imageSize, const int nImages, const float normCoef) +__global__ void cuLinearDeramp_kernel(float2 *images, const int imageNX, int const imageNY, + const int imageSize, const int nImages, const float normCoef, const int axis) { __shared__ float shmem[2*nthreads]; int pixelIdx, pixelIdxX, pixelIdxY; - - const int bid = blockIdx.x; + + const int bid = blockIdx.x; if(bid >= nImages) return; float2 *image = images+ bid*imageSize; - const int tid = threadIdx.x; - float2 phaseDiffY = make_float2(0.0f, 0.0f); - for (int i = tid; i < imageSize; i += nthreads) { - pixelIdxY = i % imageNY; - if(pixelIdxY < imageNY -1) { - pixelIdx = i; - float2 cprod = complexMulConj( image[pixelIdx], image[pixelIdx+1]); - phaseDiffY += cprod; - } - } - complexSumReduceBlock(phaseDiffY, shmem); - //phaseDiffY *= normCoef; - float phaseY=atan2f(phaseDiffY.y, phaseDiffY.x); - - float2 phaseDiffX = make_float2(0.0f, 0.0f); - for (int i = tid; i < imageSize; i += nthreads) { - pixelIdxX = i / imageNY; - if(pixelIdxX < imageNX -1) { - pixelIdx = i; - float2 cprod = complexMulConj(image[i], image[i+imageNY]); - phaseDiffX += cprod; + const int tid = threadIdx.x; + + // average phase ramp along row/range direction + double phaseY = 0.0; + if (axis != 0) + { + float2 phaseDiffY = make_float2(0.0f, 0.0f); + for (int i = tid; i < imageSize; i += nthreads) { + pixelIdxY = i % imageNY; + if(pixelIdxY < imageNY -1) { + pixelIdx = i; + float2 cprod = complexMulConj( image[pixelIdx], image[pixelIdx+1]); + phaseDiffY += cprod; + } } - } - - complexSumReduceBlock(phaseDiffX, shmem); - - //phaseDiffX *= normCoef; - float phaseX = atan2f(phaseDiffX.y, phaseDiffX.x); //+FLT_EPSILON - + complexSumReduceBlock(phaseDiffY, shmem); + //phaseDiffY *= normCoef; + phaseY=atan2(phaseDiffY.y, phaseDiffY.x); + } + + // average phase ramp along column/azimuth direction + double phaseX = 0.0; + if (axis != 1) + { + float2 phaseDiffX = make_float2(0.0f, 0.0f); + for (int i = tid; i < imageSize; i += nthreads) { + pixelIdxX = i / imageNY; + if(pixelIdxX < imageNX -1) { + pixelIdx = i; + float2 cprod = complexMulConj(image[i], image[i+imageNY]); + phaseDiffX += cprod; + } + } + + complexSumReduceBlock(phaseDiffX, shmem); + + //phaseDiffX *= normCoef; + phaseX = atan2(phaseDiffX.y, phaseDiffX.x); //+FLT_EPSILON + } + // deramp with the estimated phase ramps for (int i = tid; i < imageSize; i += nthreads) - { + { pixelIdxX = i / imageNY; pixelIdxY = i % imageNY; - float phase = pixelIdxX*phaseX + pixelIdxY*phaseY; - float2 phase_factor = make_float2(cosf(phase), sinf(phase)); - image[i] *= phase_factor; - } + // use double to improve accuracy + double phase = pixelIdxX*phaseX + pixelIdxY*phaseY; + double phase_cos = cos(phase); + double phase_sin = sin(phase); + image[i] = make_float2( + image[i].x*phase_cos - image[i].y*phase_sin, + image[i].x*phase_sin + image[i].y*phase_cos); + } } /** @@ -120,38 +135,38 @@ __global__ void cuDerampMethod1_kernel(float2 *images, const int imageNX, int co * @param[in,out] images input/output complex signals * @param[in] stream cuda stream */ -void cuDerampMethod1(cuArrays *images, cudaStream_t stream) +void cuLinearDeramp(cuArrays *images, const int axis, cudaStream_t stream) { - + const dim3 grid(images->count); const int imageSize = images->width*images->height; const float invSize = 1.0f/imageSize; if(imageSize <=64) { - cuDerampMethod1_kernel<64> <<>> - (images->devData, images->height, images->width, - imageSize, images->count, invSize); } - else if(imageSize <=128) { - cuDerampMethod1_kernel<128> <<>> - (images->devData, images->height, images->width, - imageSize, images->count, invSize); } - else if(imageSize <=256) { - cuDerampMethod1_kernel<256> <<>> - (images->devData, images->height, images->width, - imageSize, images->count, invSize); } + cuLinearDeramp_kernel<64> <<>> + (images->devData, images->height, images->width, + imageSize, images->count, invSize, axis); } + else if(imageSize <=128) { + cuLinearDeramp_kernel<128> <<>> + (images->devData, images->height, images->width, + imageSize, images->count, invSize, axis); } + else if(imageSize <=256) { + cuLinearDeramp_kernel<256> <<>> + (images->devData, images->height, images->width, + imageSize, images->count, invSize, axis); } else { - cuDerampMethod1_kernel<512> <<>> - (images->devData, images->height, images->width, - imageSize, images->count, invSize); } - getLastCudaError("cuDerampMethod1 kernel error\n"); + cuLinearDeramp_kernel<512> <<>> + (images->devData, images->height, images->width, + imageSize, images->count, invSize, axis); } + getLastCudaError("cuLinearDeramp kernel error\n"); } - -void cuDeramp(int method, cuArrays *images, cudaStream_t stream) + +void cuDeramp(const int method, cuArrays *images, const int axis, cudaStream_t stream) { switch(method) { case 1: - cuDerampMethod1(images, stream); + cuLinearDeramp(images, axis, stream); break; default: break; diff --git a/cxx/isce3/cuda/matchtemplate/pycuampcor/cuOverSampler.cu b/cxx/isce3/cuda/matchtemplate/pycuampcor/cuOverSampler.cu index 1b6ab6267..d60399023 100644 --- a/cxx/isce3/cuda/matchtemplate/pycuampcor/cuOverSampler.cu +++ b/cxx/isce3/cuda/matchtemplate/pycuampcor/cuOverSampler.cu @@ -1,4 +1,4 @@ -/* +/* * @file cuOverSampler.cu * @brief Implementations of cuOverSamplerR2R (C2C) class */ @@ -22,17 +22,17 @@ */ cuOverSamplerC2C::cuOverSamplerC2C(int inNX, int inNY, int outNX, int outNY, int nImages, cudaStream_t stream_) { - + int inNXp2 = inNX; int inNYp2 = inNY; int outNXp2 = outNX; int outNYp2 = outNY; - + /* if expanded to 2^n int inNXp2 = nextpower2(inNX); int inNYp2 = nextpower2(inNY); int outNXp2 = inNXp2*outNX/inNX; - int outNYp2 = inNYp2*outNY/inNY; + int outNYp2 = inNYp2*outNY/inNY; */ // set up work arrays @@ -67,25 +67,23 @@ void cuOverSamplerC2C::setStream(cudaStream_t stream_) * Execute fft oversampling * @param[in] imagesIn input batch of images * @param[out] imagesOut output batch of images - * @param[in] method phase deramping method */ -void cuOverSamplerC2C::execute(cuArrays *imagesIn, cuArrays *imagesOut, int method) -{ - cuDeramp(method, imagesIn, stream); +void cuOverSamplerC2C::execute(cuArrays *imagesIn, cuArrays *imagesOut) +{ cufft_Error(cufftExecC2C(forwardPlan, imagesIn->devData, workIn->devData, CUFFT_INVERSE )); cuArraysPaddingMany(workIn, workOut, stream); cufft_Error(cufftExecC2C(backwardPlan, workOut->devData, imagesOut->devData, CUFFT_FORWARD)); } /// destructor -cuOverSamplerC2C::~cuOverSamplerC2C() +cuOverSamplerC2C::~cuOverSamplerC2C() { // destroy fft handles cufft_Error(cufftDestroy(forwardPlan)); cufft_Error(cufftDestroy(backwardPlan)); // deallocate work arrays delete(workIn); - delete(workOut); + delete(workOut); } // end of cuOverSamplerC2C @@ -99,7 +97,7 @@ cuOverSamplerC2C::~cuOverSamplerC2C() */ cuOverSamplerR2R::cuOverSamplerR2R(int inNX, int inNY, int outNX, int outNY, int nImages, cudaStream_t stream) { - + int inNXp2 = inNX; int inNYp2 = inNY; int outNXp2 = outNX; @@ -144,14 +142,14 @@ void cuOverSamplerR2R::execute(cuArrays *imagesIn, cuArrays *image cufft_Error(cufftExecC2C(forwardPlan, workSizeIn->devData, workSizeIn->devData, CUFFT_INVERSE)); cuArraysPaddingMany(workSizeIn, workSizeOut, stream); cufft_Error(cufftExecC2C(backwardPlan, workSizeOut->devData, workSizeOut->devData,CUFFT_FORWARD )); - cuArraysCopyExtract(workSizeOut, imagesOut, make_int2(0,0), stream); + cuArraysCopyExtract(workSizeOut, imagesOut, make_int2(0,0), stream); } /// destructor -cuOverSamplerR2R::~cuOverSamplerR2R() +cuOverSamplerR2R::~cuOverSamplerR2R() { cufft_Error(cufftDestroy(forwardPlan)); - cufft_Error(cufftDestroy(backwardPlan)); + cufft_Error(cufftDestroy(backwardPlan)); workSizeIn->deallocate(); workSizeOut->deallocate(); } diff --git a/cxx/isce3/cuda/matchtemplate/pycuampcor/cuOverSampler.h b/cxx/isce3/cuda/matchtemplate/pycuampcor/cuOverSampler.h index 9ddce96b2..a3154b613 100644 --- a/cxx/isce3/cuda/matchtemplate/pycuampcor/cuOverSampler.h +++ b/cxx/isce3/cuda/matchtemplate/pycuampcor/cuOverSampler.h @@ -31,7 +31,7 @@ class cuOverSamplerC2C // set cuda stream void setStream(cudaStream_t stream_); // execute oversampling - void execute(cuArrays *imagesIn, cuArrays *imagesOut, int deramp_method=0); + void execute(cuArrays *imagesIn, cuArrays *imagesOut); // destructor ~cuOverSamplerC2C(); }; diff --git a/cxx/isce3/matchtemplate/pycuampcor/cuAmpcorChunk.cpp b/cxx/isce3/matchtemplate/pycuampcor/cuAmpcorChunk.cpp index 7e8b6fb98..8f5fb65ad 100644 --- a/cxx/isce3/matchtemplate/pycuampcor/cuAmpcorChunk.cpp +++ b/cxx/isce3/matchtemplate/pycuampcor/cuAmpcorChunk.cpp @@ -85,7 +85,7 @@ void cuAmpcorChunk::run(int idxDown_, int idxAcross_) cuArraysSumCorr(r_corrBatchRawZoomIn, i_corrBatchZoomInValid, r_corrBatchSum, i_corrBatchValidCount); #ifdef CUAMPCOR_DEBUG - r_maxval->outputToFile("r_maxval"); + r_maxval->outputToFile("r_corrBatchRawMaxVal"); r_corrBatchRawZoomIn->outputToFile("r_corrBatchRawStatZoomIn"); i_corrBatchZoomInValid->outputToFile("i_corrBatchZoomInValid"); r_corrBatchSum->outputToFile("r_corrBatchSum"); @@ -115,9 +115,16 @@ void cuAmpcorChunk::run(int idxDown_, int idxAcross_) maxLocShift->outputToFile("i_maxLocShift"); #endif + // deramp reference + cuDeramp(param->derampMethod, c_referenceBatchRaw, param->derampAxis); + +#ifdef CUAMPCOR_DEBUG + // dump the deramped reference image(s) + c_referenceBatchRaw->outputToFile("c_referenceBatchRawDeramped"); +#endif + // oversample reference - // (deramping included in oversampler) - referenceBatchOverSampler->execute(c_referenceBatchRaw, c_referenceBatchOverSampled, param->derampMethod); + referenceBatchOverSampler->execute(c_referenceBatchRaw, c_referenceBatchOverSampled); // take amplitudes cuArraysAbs(c_referenceBatchOverSampled, r_referenceBatchOverSampled); @@ -135,15 +142,28 @@ void cuAmpcorChunk::run(int idxDown_, int idxAcross_) r_referenceBatchOverSampled->outputToFile("r_referenceBatchOverSampledSubMean"); #endif - // extract secondary and oversample + // extract secondary for smaller search window cuArraysCopyExtract(c_secondaryBatchRaw, c_secondaryBatchZoomIn, offsetInit); - secondaryBatchOverSampler->execute(c_secondaryBatchZoomIn, c_secondaryBatchOverSampled, param->derampMethod); - // take amplitudes - cuArraysAbs(c_secondaryBatchOverSampled, r_secondaryBatchOverSampled); #ifdef CUAMPCOR_DEBUG // dump the extracted raw secondary image c_secondaryBatchZoomIn->outputToFile("c_secondaryBatchZoomIn"); +#endif + + // deramp secondary + cuDeramp(param->derampMethod, c_secondaryBatchZoomIn, param->derampAxis); +#ifdef CUAMPCOR_DEBUG + // dump the deramped secondary image(s) + c_secondaryBatchZoomIn->outputToFile("c_secondaryBatchZoomInDeramped"); +#endif + + // oversample secondary + secondaryBatchOverSampler->execute(c_secondaryBatchZoomIn, c_secondaryBatchOverSampled); + + // take amplitudes + cuArraysAbs(c_secondaryBatchOverSampled, r_secondaryBatchOverSampled); + +#ifdef CUAMPCOR_DEBUG // dump the oversampled secondary image(s) c_secondaryBatchOverSampled->outputToFile("c_secondaryBatchOverSampled"); r_secondaryBatchOverSampled->outputToFile("r_secondaryBatchOverSampled"); diff --git a/cxx/isce3/matchtemplate/pycuampcor/cuAmpcorParameter.cpp b/cxx/isce3/matchtemplate/pycuampcor/cuAmpcorParameter.cpp index e10e38fc1..b82313fc9 100644 --- a/cxx/isce3/matchtemplate/pycuampcor/cuAmpcorParameter.cpp +++ b/cxx/isce3/matchtemplate/pycuampcor/cuAmpcorParameter.cpp @@ -25,6 +25,7 @@ cuAmpcorParameter::cuAmpcorParameter() deviceID = 0; nStreams = 1; derampMethod = 1; + derampAxis = 2; // both directions windowSizeWidthRaw = 64; windowSizeHeightRaw = 64; diff --git a/cxx/isce3/matchtemplate/pycuampcor/cuAmpcorParameter.h b/cxx/isce3/matchtemplate/pycuampcor/cuAmpcorParameter.h index 5c9258d04..ce6d095bd 100644 --- a/cxx/isce3/matchtemplate/pycuampcor/cuAmpcorParameter.h +++ b/cxx/isce3/matchtemplate/pycuampcor/cuAmpcorParameter.h @@ -46,6 +46,7 @@ class cuAmpcorParameter{ int deviceID; ///< Targeted GPU device ID: use -1 to auto select int nStreams; ///< Number of streams to asynchonize data transfers and compute kernels int derampMethod; ///< Method for deramping 0=None, 1=average + int derampAxis; ///< Axis for deramping 0=down (azimuth) 1=across (range), 2=both axes // chip or window size for raw data int windowSizeHeightRaw; ///< Template window height (original size) diff --git a/cxx/isce3/matchtemplate/pycuampcor/cuAmpcorUtil.h b/cxx/isce3/matchtemplate/pycuampcor/cuAmpcorUtil.h index be318fc9a..7a1b6d373 100644 --- a/cxx/isce3/matchtemplate/pycuampcor/cuAmpcorUtil.h +++ b/cxx/isce3/matchtemplate/pycuampcor/cuAmpcorUtil.h @@ -42,8 +42,8 @@ void cuArraysCopyPadded(cuArrays *imageIn, cuArrays *imageOut); void cuArraysAbs(cuArrays *image1, cuArrays *image2); // cuDeramp.cu: deramping phase -void cuDeramp(int method, cuArrays *images); -void cuDerampMethod1(cuArrays *images); +void cuDeramp(const int method, cuArrays *images, const int axis); +void cuLinearDeramp(cuArrays *images, const int axis); // cuArraysPadding.cu: various utilities for oversampling padding void cuArraysPaddingMany(cuArrays *image1, cuArrays *image2); diff --git a/cxx/isce3/matchtemplate/pycuampcor/cuDeramp.cpp b/cxx/isce3/matchtemplate/pycuampcor/cuDeramp.cpp index fa8b2482a..6924818a2 100644 --- a/cxx/isce3/matchtemplate/pycuampcor/cuDeramp.cpp +++ b/cxx/isce3/matchtemplate/pycuampcor/cuDeramp.cpp @@ -23,40 +23,51 @@ namespace isce3::matchtemplate::pycuampcor { -// kernel for cuDerampMethod1 -static void cuDerampMethod1_kernel(float2 *images, const int imageNX, int const imageNY, - const int imageSize, const int nImages, const float normCoef) +// kernel for linear deramping +static void cuLinearDeramp_kernel(float2 *images, const int imageNX, const int imageNY, + const int imageSize, const int nImages, const float normCoef, const int axis) { for (int k = 0; k < nImages; k++) { float2* image = images + k * imageSize; - double2 phaseDiffY = make_double2(0.0, 0.0); - for (int j = 0; j < imageNX; j++) { - for (int i = 0; i < imageNY - 1; i++) { - const int pixelIdx = j * imageNY + i; - float2 cprod = complexMulConj(image[pixelIdx], image[pixelIdx+1]); - phaseDiffY += cprod; + double phaseY = 0.0; + if(axis != 0) + { + double2 phaseDiffY = make_double2(0.0, 0.0); + for (int j = 0; j < imageNX; j++) { + for (int i = 0; i < imageNY - 1; i++) { + const int pixelIdx = j * imageNY + i; + float2 cprod = complexMulConj(image[pixelIdx], image[pixelIdx+1]); + phaseDiffY += cprod; + } } + phaseY = atan2(phaseDiffY.y, phaseDiffY.x); } - double2 phaseDiffX = make_double2(0.0, 0.0); - for (int j = 0; j < imageNX - 1; j++) { - for (int i = 0; i < imageNY; i++) { - const int pixelIdx = j * imageNY + i; - float2 cprod = complexMulConj(image[pixelIdx], image[pixelIdx+imageNY]); - phaseDiffX += cprod; + double phaseX = 0.0; + if(axis != 1) + { + double2 phaseDiffX = make_double2(0.0, 0.0); + for (int j = 0; j < imageNX - 1; j++) { + for (int i = 0; i < imageNY; i++) { + const int pixelIdx = j * imageNY + i; + float2 cprod = complexMulConj(image[pixelIdx], image[pixelIdx+imageNY]); + phaseDiffX += cprod; + } } + phaseX = atan2(phaseDiffX.y, phaseDiffX.x); } - float phaseX = atan2f(phaseDiffX.y, phaseDiffX.x); - float phaseY = atan2f(phaseDiffY.y, phaseDiffY.x); for (int i = 0; i < imageSize; i++) { const int pixelIdxX = i / imageNY; const int pixelIdxY = i % imageNY; - float phase = pixelIdxX*phaseX + pixelIdxY*phaseY; - float2 phase_factor = make_float2(cosf(phase), sinf(phase)); - image[i] *= phase_factor; + double phase = pixelIdxX*phaseX + pixelIdxY*phaseY; + double phase_cos = cos(phase); + double phase_sin = sin(phase); + image[i] = make_float2( + image[i].x*phase_cos - image[i].y*phase_sin, + image[i].x*phase_sin + image[i].y*phase_cos); } } } @@ -67,20 +78,20 @@ static void cuDerampMethod1_kernel(float2 *images, const int imageNX, int const * and the average phase shift is obtained as atan(\sum imag / \sum real). * @param[inout] images input/output complex signals */ -void cuDerampMethod1(cuArrays *images) +void cuLinearDeramp(cuArrays *images, const int axis) { const int imageSize = images->width*images->height; const float invSize = 1.0f/imageSize; - cuDerampMethod1_kernel(images->devData, images->height, images->width, - imageSize, images->count, invSize); + cuLinearDeramp_kernel(images->devData, images->height, images->width, + imageSize, images->count, invSize, axis); } -void cuDeramp(int method, cuArrays *images) +void cuDeramp(const int method, cuArrays *images, const int axis) { switch(method) { case 1: - cuDerampMethod1(images); + cuLinearDeramp(images, axis); break; default: break; diff --git a/cxx/isce3/matchtemplate/pycuampcor/cuOverSampler.cpp b/cxx/isce3/matchtemplate/pycuampcor/cuOverSampler.cpp index 4b84e6aa6..e94de10ff 100644 --- a/cxx/isce3/matchtemplate/pycuampcor/cuOverSampler.cpp +++ b/cxx/isce3/matchtemplate/pycuampcor/cuOverSampler.cpp @@ -1,4 +1,4 @@ -/* +/* * @file cuOverSampler.cu * @brief Implementations of cuOverSamplerR2R (C2C) class */ @@ -23,17 +23,17 @@ cuOverSamplerC2C::cuOverSamplerC2C( cuArrays *imagesIn, cuArrays *imagesOut, int inNX, int inNY, int outNX, int outNY, int nImages) { - + int inNXp2 = inNX; int inNYp2 = inNY; int outNXp2 = outNX; int outNYp2 = outNY; - + /* if expanded to 2^n int inNXp2 = nextpower2(inNX); int inNYp2 = nextpower2(inNY); int outNXp2 = inNXp2*outNX/inNX; - int outNYp2 = inNYp2*outNY/inNY; + int outNYp2 = inNYp2*outNY/inNY; */ // set up work arrays @@ -70,25 +70,23 @@ cuOverSamplerC2C::cuOverSamplerC2C( * Execute fft oversampling * @param[in] imagesIn input batch of images * @param[out] imagesOut output batch of images - * @param[in] method phase deramping method */ -void cuOverSamplerC2C::execute(cuArrays *imagesIn, cuArrays *imagesOut, int method) -{ - cuDeramp(method, imagesIn); +void cuOverSamplerC2C::execute(cuArrays *imagesIn, cuArrays *imagesOut) +{ fftwf_execute(forwardPlan); cuArraysPaddingMany(workIn, workOut); fftwf_execute(backwardPlan); } /// destructor -cuOverSamplerC2C::~cuOverSamplerC2C() +cuOverSamplerC2C::~cuOverSamplerC2C() { // destroy fft handles fftwf_destroy_plan(forwardPlan); fftwf_destroy_plan(backwardPlan); // deallocate work arrays delete(workIn); - delete(workOut); + delete(workOut); } // end of cuOverSamplerC2C @@ -101,7 +99,7 @@ cuOverSamplerC2C::~cuOverSamplerC2C() */ cuOverSamplerR2R::cuOverSamplerR2R(int inNX, int inNY, int outNX, int outNY, int nImages) { - + int inNXp2 = inNX; int inNYp2 = inNY; int outNXp2 = outNX; @@ -151,11 +149,11 @@ void cuOverSamplerR2R::execute(cuArrays *imagesIn, cuArrays *image fftwf_execute(forwardPlan); cuArraysPaddingMany(workSizeIn, workSizeOut); fftwf_execute(backwardPlan); - cuArraysCopyExtract(workSizeOut, imagesOut, make_int2(0,0)); + cuArraysCopyExtract(workSizeOut, imagesOut, make_int2(0,0)); } /// destructor -cuOverSamplerR2R::~cuOverSamplerR2R() +cuOverSamplerR2R::~cuOverSamplerR2R() { fftwf_destroy_plan(forwardPlan); fftwf_destroy_plan(backwardPlan); diff --git a/cxx/isce3/matchtemplate/pycuampcor/cuOverSampler.h b/cxx/isce3/matchtemplate/pycuampcor/cuOverSampler.h index 2b79a67b2..6a57b9509 100644 --- a/cxx/isce3/matchtemplate/pycuampcor/cuOverSampler.h +++ b/cxx/isce3/matchtemplate/pycuampcor/cuOverSampler.h @@ -33,7 +33,7 @@ class cuOverSamplerC2C cuOverSamplerC2C(cuArrays *imagesIn, cuArrays *imagesOut, int inNX, int inNY, int outNX, int outNY, int nImages); // execute oversampling - void execute(cuArrays *imagesIn, cuArrays *imagesOut, int deramp_method=0); + void execute(cuArrays *imagesIn, cuArrays *imagesOut); // destructor ~cuOverSamplerC2C(); }; diff --git a/python/extensions/pybind_isce3/cuda/matchtemplate/pycuampcor.cpp b/python/extensions/pybind_isce3/cuda/matchtemplate/pycuampcor.cpp index 7e884ba45..036376c56 100644 --- a/python/extensions/pybind_isce3/cuda/matchtemplate/pycuampcor.cpp +++ b/python/extensions/pybind_isce3/cuda/matchtemplate/pycuampcor.cpp @@ -32,6 +32,7 @@ void addbinding_pycuampcor(pybind11::module& m) .DEF_PARAM(int, deviceID) .DEF_PARAM(int, nStreams) .DEF_PARAM(int, derampMethod) + .DEF_PARAM(int, derampAxis) .DEF_PARAM(str, referenceImageName) .DEF_PARAM(int, referenceImageHeight) diff --git a/python/extensions/pybind_isce3/matchtemplate/pycuampcor.cpp b/python/extensions/pybind_isce3/matchtemplate/pycuampcor.cpp index d4ff33b32..f783ae709 100644 --- a/python/extensions/pybind_isce3/matchtemplate/pycuampcor.cpp +++ b/python/extensions/pybind_isce3/matchtemplate/pycuampcor.cpp @@ -32,6 +32,7 @@ void addbinding_pycuampcor_cpu(pybind11::module& m) .DEF_PARAM(int, deviceID) .DEF_PARAM(int, nStreams) .DEF_PARAM(int, derampMethod) + .DEF_PARAM(int, derampAxis) .DEF_PARAM(str, referenceImageName) .DEF_PARAM(int, referenceImageHeight) diff --git a/python/packages/nisar/workflows/dense_offsets.py b/python/packages/nisar/workflows/dense_offsets.py index e897b19cd..54c785323 100644 --- a/python/packages/nisar/workflows/dense_offsets.py +++ b/python/packages/nisar/workflows/dense_offsets.py @@ -202,7 +202,21 @@ def set_optional_attributes(ampcor_obj, cfg, length, width): if cfg['deramping_method'] is not None: deramp = cfg['deramping_method'] - ampcor_obj.derampMethod = 0 if deramp == "magnitude" else 1 + if deramp == "magnitude": + ampcor_obj.derampMethod = 0 + elif deramp == "linear": + ampcor_obj.derampMethod = 1 + else: # skip deramping + ampcor_obj.derampMethod = 2 + + if cfg['deramping_axis'] is not None: + deramp_axis = cfg['deramping_axis'] + if deramp_axis == "azimuth": + ampcor_obj.derampAxis = 0 + elif deramp_axis == "range": + ampcor_obj.derampAxis = 1 + else: # both directions + ampcor_obj.derampAxis = 2 if cfg['correlation_statistics_zoom'] is not None: ampcor_obj.corrStatWindowSize = cfg['correlation_statistics_zoom'] From 619df3a063a606e60c227cf7fc2214e6989ae1de Mon Sep 17 00:00:00 2001 From: Xiaodong Huang Date: Thu, 11 Dec 2025 19:50:29 +0000 Subject: [PATCH 05/13] update the runconfig --- .../packages/nisar/workflows/dense_offsets.py | 2 +- .../nisar/workflows/offsets_product.py | 21 +++++++++++++++++-- share/nisar/defaults/insar.yaml | 2 ++ share/nisar/schemas/insar.yaml | 3 +++ 4 files changed, 25 insertions(+), 3 deletions(-) diff --git a/python/packages/nisar/workflows/dense_offsets.py b/python/packages/nisar/workflows/dense_offsets.py index 54c785323..4a1feddd3 100644 --- a/python/packages/nisar/workflows/dense_offsets.py +++ b/python/packages/nisar/workflows/dense_offsets.py @@ -204,7 +204,7 @@ def set_optional_attributes(ampcor_obj, cfg, length, width): deramp = cfg['deramping_method'] if deramp == "magnitude": ampcor_obj.derampMethod = 0 - elif deramp == "linear": + elif deramp == "complex": ampcor_obj.derampMethod = 1 else: # skip deramping ampcor_obj.derampMethod = 2 diff --git a/python/packages/nisar/workflows/offsets_product.py b/python/packages/nisar/workflows/offsets_product.py index ed5ef8358..724517b35 100644 --- a/python/packages/nisar/workflows/offsets_product.py +++ b/python/packages/nisar/workflows/offsets_product.py @@ -256,8 +256,25 @@ def set_ampcor_params(cfg, ampcor_obj): ampcor_obj.algorithm = 0 if cfg['cross_correlation_domain'] == \ 'frequency' else 1 ampcor_obj.rawDataOversamplingFactor = cfg['slc_oversampling_factor'] - ampcor_obj.derampMethod = 0 if cfg['deramping_method'] == \ - 'magnitude' else 1 + + if cfg['deramping_method'] is not None: + deramp = cfg['deramping_method'] + if deramp == "magnitude": + ampcor_obj.derampMethod = 0 + elif deramp == "complex": + ampcor_obj.derampMethod = 1 + else: # skip deramping + ampcor_obj.derampMethod = 2 + + if cfg['deramping_axis'] is not None: + deramp_axis = cfg['deramping_axis'] + if deramp_axis == "azimuth": + ampcor_obj.derampAxis = 0 + elif deramp_axis == "range": + ampcor_obj.derampAxis = 1 + else: # both directions + ampcor_obj.derampAxis = 2 + ampcor_obj.corrStatWindowSize = cfg['correlation_statistics_zoom'] ampcor_obj.corrSurfaceZoomInWindow = cfg['correlation_surface_zoom'] ampcor_obj.corrSurfaceOverSamplingFactor = cfg[ diff --git a/share/nisar/defaults/insar.yaml b/share/nisar/defaults/insar.yaml index 9280e7604..815987953 100644 --- a/share/nisar/defaults/insar.yaml +++ b/share/nisar/defaults/insar.yaml @@ -410,6 +410,8 @@ runconfig: slc_oversampling_factor: 2 # Deramp data prior to FFT: magnitude or complex (linear phase ramp) deramping_method: 'complex' + # Deramp data axis prior to FFT: range, azimuth, or both + deramping_axis: 'azimuth' # Flag to use constant range/azimuth offsets in dense offsets estimation use_gross_offsets: True # Constant offset along slant range to guide dense offset estimation diff --git a/share/nisar/schemas/insar.yaml b/share/nisar/schemas/insar.yaml index 999dc97ad..c8455d154 100644 --- a/share/nisar/schemas/insar.yaml +++ b/share/nisar/schemas/insar.yaml @@ -332,6 +332,9 @@ dense_offsets_options: # Deramp data prior to FFT: magnitude or complex (linear phase ramp) deramping_method: enum('magnitude', 'complex', required=False) + # Deramp data axis prior to FFT: range, azimuth, or both + deramping_axis: enum('range', 'azimuth', 'both', required=False) + # Flag to use constant range/azimuth offsets in dense offsets estimation use_gross_offsets: bool(required=False) From 8bc8712e270e60f0c79b86a887f3d3c4546da502 Mon Sep 17 00:00:00 2001 From: Xiaodong Huang Date: Wed, 17 Dec 2025 19:57:09 +0000 Subject: [PATCH 06/13] add the complex_no_deramp option --- share/nisar/defaults/insar.yaml | 6 ++++-- share/nisar/schemas/insar.yaml | 11 +++++++---- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/share/nisar/defaults/insar.yaml b/share/nisar/defaults/insar.yaml index 815987953..a4ef7a5ef 100644 --- a/share/nisar/defaults/insar.yaml +++ b/share/nisar/defaults/insar.yaml @@ -408,7 +408,7 @@ runconfig: # Anti-aliasing oversampling factor to apply to reference/secondary SLC # prior to cross-correlation computation slc_oversampling_factor: 2 - # Deramp data prior to FFT: magnitude or complex (linear phase ramp) + # Deramp data prior to FFT: magnitude , complex (linear phase ramp), or complex without deramp deramping_method: 'complex' # Deramp data axis prior to FFT: range, azimuth, or both deramping_axis: 'azimuth' @@ -461,8 +461,10 @@ runconfig: start_pixel_azimuth: # Cross-correlation domain cross_correlation_domain: 'frequency' - # Deramp data prior to FFT: magnitude or complex (linear phase ramp) + # Deramp data prior to FFT: magnitude , complex (linear phase ramp), or complex without deramp deramping_method: 'complex' + # Deramp data axis prior to FFT: range, azimuth, or both + deramping_axis: 'azimuth' # Anti-aliasing oversampling factor to apply to reference/secondary SLC # prior to cross-correlation computation slc_oversampling_factor: 2 diff --git a/share/nisar/schemas/insar.yaml b/share/nisar/schemas/insar.yaml index c8455d154..6381ccdc7 100644 --- a/share/nisar/schemas/insar.yaml +++ b/share/nisar/schemas/insar.yaml @@ -329,8 +329,8 @@ dense_offsets_options: # prior to cross-correlation computation slc_oversampling_factor: int(min=2, max=5, required=False) - # Deramp data prior to FFT: magnitude or complex (linear phase ramp) - deramping_method: enum('magnitude', 'complex', required=False) + # Deramp data prior to FFT: magnitude , complex (linear phase ramp), or complex without deramp + deramping_method: enum('magnitude', 'complex', 'complex_no_deramp', required=False) # Deramp data axis prior to FFT: range, azimuth, or both deramping_axis: enum('range', 'azimuth', 'both', required=False) @@ -412,8 +412,11 @@ offsets_product_options: # prior to cross-correlation computation slc_oversampling_factor: int(min=2, max=5, required=False) - # Deramp data prior to FFT: magnitude or complex (linear phase ramp) - deramping_method: enum('magnitude', 'complex', required=False) + # Deramp data prior to FFT: magnitude , complex (linear phase ramp), or complex without deramp + deramping_method: enum('magnitude', 'complex', 'complex_no_deramp', required=False) + + # Deramp data axis prior to FFT: range, azimuth, or both + deramping_axis: enum('range', 'azimuth', 'both', required=False) # Flag to use constant range/azimuth offsets in dense offsets estimation use_gross_offsets: bool(required=False) From e84ec704dc1c2bba0edfdf10ddd8b46ea921a92d Mon Sep 17 00:00:00 2001 From: Lijun Zhu <38738201+lijun99@users.noreply.github.com> Date: Wed, 14 Jan 2026 16:28:36 -0800 Subject: [PATCH 07/13] Apply suggestions from code review from @bhawkins : - commenting and value checks on deramp_method and deramp_axis - use explicit sincos for potential performance boost Co-authored-by: Brian Hawkins <1729052+bhawkins@users.noreply.github.com> --- cxx/isce3/cuda/matchtemplate/pycuampcor/cuAmpcorUtil.h | 3 +++ cxx/isce3/cuda/matchtemplate/pycuampcor/cuDeramp.cu | 8 +++++--- python/packages/nisar/workflows/offsets_product.py | 4 +++- 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/cxx/isce3/cuda/matchtemplate/pycuampcor/cuAmpcorUtil.h b/cxx/isce3/cuda/matchtemplate/pycuampcor/cuAmpcorUtil.h index 59be4cbc0..3339d82b0 100644 --- a/cxx/isce3/cuda/matchtemplate/pycuampcor/cuAmpcorUtil.h +++ b/cxx/isce3/cuda/matchtemplate/pycuampcor/cuAmpcorUtil.h @@ -46,6 +46,9 @@ void cuArraysC2R(cuArrays *image1, cuArrays *image2, cudaStream_t void cuArraysAbs(cuArrays *image1, cuArrays *image2, cudaStream_t stream); // cuDeramp.cu: deramping phase +// `cuDeramp` calls a deramp implementation (or does nothing) based on the value of `method`: +// `method=1` for cuLinearDeramp, any other value for no-op +// `cuLinearDeramp` Estimates the phase gradient over the chip and removes it. void cuDeramp(const int method, cuArrays *images, const int axis, cudaStream_t stream); void cuLinearDeramp(cuArrays *images, const int axis, cudaStream_t stream); diff --git a/cxx/isce3/cuda/matchtemplate/pycuampcor/cuDeramp.cu b/cxx/isce3/cuda/matchtemplate/pycuampcor/cuDeramp.cu index 786b9d4dd..81d5d2733 100644 --- a/cxx/isce3/cuda/matchtemplate/pycuampcor/cuDeramp.cu +++ b/cxx/isce3/cuda/matchtemplate/pycuampcor/cuDeramp.cu @@ -120,8 +120,8 @@ __global__ void cuLinearDeramp_kernel(float2 *images, const int imageNX, int con pixelIdxY = i % imageNY; // use double to improve accuracy double phase = pixelIdxX*phaseX + pixelIdxY*phaseY; - double phase_cos = cos(phase); - double phase_sin = sin(phase); + double phase_sin, phase_cos; + sincos(phase, &phase_sin, &phase_cos); image[i] = make_float2( image[i].x*phase_cos - image[i].y*phase_sin, image[i].x*phase_sin + image[i].y*phase_cos); @@ -137,7 +137,9 @@ __global__ void cuLinearDeramp_kernel(float2 *images, const int imageNX, int con */ void cuLinearDeramp(cuArrays *images, const int axis, cudaStream_t stream) { - + if ((axis < 0) or (axis > 2)) { + throw std::invalid_argument("deramp axis must be 0, 1, or 2"); + } const dim3 grid(images->count); const int imageSize = images->width*images->height; const float invSize = 1.0f/imageSize; diff --git a/python/packages/nisar/workflows/offsets_product.py b/python/packages/nisar/workflows/offsets_product.py index 724517b35..5b29ac94e 100644 --- a/python/packages/nisar/workflows/offsets_product.py +++ b/python/packages/nisar/workflows/offsets_product.py @@ -272,8 +272,10 @@ def set_ampcor_params(cfg, ampcor_obj): ampcor_obj.derampAxis = 0 elif deramp_axis == "range": ampcor_obj.derampAxis = 1 - else: # both directions + elif deramp_axis == "both": ampcor_obj.derampAxis = 2 + else: + raise ValueError(f"invalid {deramp_axis=}") ampcor_obj.corrStatWindowSize = cfg['correlation_statistics_zoom'] ampcor_obj.corrSurfaceZoomInWindow = cfg['correlation_surface_zoom'] From 75b7f18266073e8fcb0bb7889ec6156a52344356 Mon Sep 17 00:00:00 2001 From: Lijun Zhu Date: Wed, 14 Jan 2026 16:34:45 -0800 Subject: [PATCH 08/13] use explicit sincos for ampcor cpu module as well --- cxx/isce3/matchtemplate/pycuampcor/cuDeramp.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cxx/isce3/matchtemplate/pycuampcor/cuDeramp.cpp b/cxx/isce3/matchtemplate/pycuampcor/cuDeramp.cpp index 6924818a2..0a993d1f4 100644 --- a/cxx/isce3/matchtemplate/pycuampcor/cuDeramp.cpp +++ b/cxx/isce3/matchtemplate/pycuampcor/cuDeramp.cpp @@ -63,8 +63,8 @@ static void cuLinearDeramp_kernel(float2 *images, const int imageNX, const int i const int pixelIdxX = i / imageNY; const int pixelIdxY = i % imageNY; double phase = pixelIdxX*phaseX + pixelIdxY*phaseY; - double phase_cos = cos(phase); - double phase_sin = sin(phase); + double phase_sin, phase_cos; + sincos(phase, &phase_sin, &phase_cos); image[i] = make_float2( image[i].x*phase_cos - image[i].y*phase_sin, image[i].x*phase_sin + image[i].y*phase_cos); From ed931b504a50fbe150bce8768ee33f99548b7748 Mon Sep 17 00:00:00 2001 From: Lijun Zhu Date: Wed, 14 Jan 2026 16:48:50 -0800 Subject: [PATCH 09/13] add comments on deramp_method in runconfig --- share/nisar/defaults/insar.yaml | 4 ++-- share/nisar/schemas/insar.yaml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/share/nisar/defaults/insar.yaml b/share/nisar/defaults/insar.yaml index a4ef7a5ef..1072ccf3e 100644 --- a/share/nisar/defaults/insar.yaml +++ b/share/nisar/defaults/insar.yaml @@ -408,7 +408,7 @@ runconfig: # Anti-aliasing oversampling factor to apply to reference/secondary SLC # prior to cross-correlation computation slc_oversampling_factor: 2 - # Deramp data prior to FFT: magnitude , complex (linear phase ramp), or complex without deramp + # Deramp data prior to FFT: 'magnitude', 'complex' (linear phase ramp), or 'complex_no_deramp' deramping_method: 'complex' # Deramp data axis prior to FFT: range, azimuth, or both deramping_axis: 'azimuth' @@ -461,7 +461,7 @@ runconfig: start_pixel_azimuth: # Cross-correlation domain cross_correlation_domain: 'frequency' - # Deramp data prior to FFT: magnitude , complex (linear phase ramp), or complex without deramp + # Deramp data prior to FFT: 'magnitude', 'complex' (linear phase ramp), or 'complex_no_deramp' deramping_method: 'complex' # Deramp data axis prior to FFT: range, azimuth, or both deramping_axis: 'azimuth' diff --git a/share/nisar/schemas/insar.yaml b/share/nisar/schemas/insar.yaml index 6381ccdc7..c9e4439e9 100644 --- a/share/nisar/schemas/insar.yaml +++ b/share/nisar/schemas/insar.yaml @@ -329,7 +329,7 @@ dense_offsets_options: # prior to cross-correlation computation slc_oversampling_factor: int(min=2, max=5, required=False) - # Deramp data prior to FFT: magnitude , complex (linear phase ramp), or complex without deramp + # Deramp data prior to FFT: 'magnitude' , 'complex' (linear phase ramp), or 'complex_no_deramp' deramping_method: enum('magnitude', 'complex', 'complex_no_deramp', required=False) # Deramp data axis prior to FFT: range, azimuth, or both @@ -412,7 +412,7 @@ offsets_product_options: # prior to cross-correlation computation slc_oversampling_factor: int(min=2, max=5, required=False) - # Deramp data prior to FFT: magnitude , complex (linear phase ramp), or complex without deramp + # Deramp data prior to FFT: 'magnitude', 'complex' (linear phase ramp), or 'complex_no_deramp' deramping_method: enum('magnitude', 'complex', 'complex_no_deramp', required=False) # Deramp data axis prior to FFT: range, azimuth, or both From aa8d2bb7d0aa17697dbcbab4f7ee3d05879c1f43 Mon Sep 17 00:00:00 2001 From: Lijun Zhu Date: Wed, 14 Jan 2026 17:07:01 -0800 Subject: [PATCH 10/13] add math.h for sincos --- cxx/isce3/matchtemplate/pycuampcor/cuDeramp.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/cxx/isce3/matchtemplate/pycuampcor/cuDeramp.cpp b/cxx/isce3/matchtemplate/pycuampcor/cuDeramp.cpp index 0a993d1f4..a5dc67808 100644 --- a/cxx/isce3/matchtemplate/pycuampcor/cuDeramp.cpp +++ b/cxx/isce3/matchtemplate/pycuampcor/cuDeramp.cpp @@ -19,6 +19,7 @@ #include #include #include +#include #include namespace isce3::matchtemplate::pycuampcor { From 1abc35ecd8786e019c4c7589bbe759651a650e11 Mon Sep 17 00:00:00 2001 From: Lijun Zhu Date: Thu, 15 Jan 2026 13:18:41 -0800 Subject: [PATCH 11/13] update ampcor test golden data and adjust the tolerance for correlation peak --- .../ovs128-rho0.8/golden/correlation_peak | Bin 624 -> 624 bytes .../ovs128-rho0.8/golden/dense_offsets | Bin 1248 -> 1248 bytes .../isce3/matchtemplate/test_ampcor.py | 4 +++- 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/data/ampcor/accuracy-testdata/ovs128-rho0.8/golden/correlation_peak b/tests/data/ampcor/accuracy-testdata/ovs128-rho0.8/golden/correlation_peak index 2d13833a8d0b5a7c09a3958e194580c245fc9d4c..30107f3642f7f6f77fee81f937271c7c0f5c336b 100644 GIT binary patch literal 624 zcmWNOZAepL0EHv$!-CxA?!9~8d+#a)^sa z%?e5?5KAdB&BExz3^V&tfsmP@4$@!|64}BavhM$LIOjY^?$kM#>85VTB=U=(N1mj+#lhn!8*jcUMEE5qlO6Pxfu)! zJ|0o2eV|ZZ9Yy69jpus=HEx}f!*0T}Ch5I`vh@au#gajrWU5rB^PQJqfk{EXp#Pi6 zqaTurg$Cn|lHqAd$5w-pRVHgv1iABc`Yd0Y)R+huY>M;{(IwgcO49yT(2B|XwO&T^ z4Net$84p-24{0=&IBCp_Wi}kcu%%7*D{P2Uh`y^4Xcn|PZIq-$;b~FWy+Wh-ydKKr}fdy+%uT6eD|$B@#_@Q1{FkxO3p7yDAmQtr)ZLJQGQv{ sQ|Y2%R5D;OD!O5P5VBLf*v52)gR5aXvGE!w4+yeCZnA$W^p`pK58;5+t^fc4 literal 624 zcmWN_dn^=i0KoBHnvlpNne~UQx{0PTS>yY&T9KNXn)imxA8n0Z^GCBX{Si%Vnbl@7 zkA}F+*m+(O?%WYa?zlVe$Mv{8dT=34eg6DhFzaaOwiQkeqd0g^;}>YezeCT=u4Ymb z6x6Lxv0gTUo8K03vs+8RF+E9UGj_W-X^-9`BvLC;d&N!UY#+iy_JzpNX6h^IQJq&& z?k*?7p^1ynErd%G9PjI) zQ>Wp3@G#e=28c*g5>TS0A<0Cgmy1|=YAbwFS4gy$3zNjiql973MGa#~86YdiMf9CC z@Nr|5z_Df$-_BwkU?N@8L2_d)eTVl8YuhEAOMQGV)j>vTrp9^J+)sAl0&*tNp0OACh9){w%ej}UAVcpWLf(|HsL`?zr{HRf zn(SLsI2Rjn%dO+U+#u6#bKI5vzdbsnrR|7A3xquw=Z24gOPR~8MfDQvx=KjC9?y6E z*vm(GAzLDApcV586PuSO@J&&2driShk59<+mSJ`tn4~Hhob1MqBh)w z=c^W?6>WIN4Y7DkwYc1qP;mo@U&u?GJmoxE0U V5;^>ns`6>74r;J=9V1!V#Xo!IF`obc diff --git a/tests/data/ampcor/accuracy-testdata/ovs128-rho0.8/golden/dense_offsets b/tests/data/ampcor/accuracy-testdata/ovs128-rho0.8/golden/dense_offsets index 9b1a3d0ad363943e0cadcd30fa2c1f10dde6ce40..b2398d3f347741b35d307677dce36aebdc69d6f5 100644 GIT binary patch literal 1248 zcmZ9Kfo;Pu3D~&r$omv%P9~e;&89Zy%*Vk00+~_YQh@cerO%?~d$gu261~M(aU| z;>B?GRWEDa=#6N;LycaHPU-hhH|1J)c07D^w>EEH&D8%f2fe?o_MY2e@xtRfFpC?utGi8uFqa oX#QX5$qhLo`>DgD|FA3f^gS}V{^qLAZs*BwzccaoPVl_`0YOCJUH||9 literal 1248 zcmY+Bfo;P;3gzSe{zTU_oy>vRu)dIsgqj*eY; zXx}qB_hyeS8qU!?KWO&&_3D}2;H~Nb@7wjN)pNjkF3!&SQk>%h-8oNXCTD8X=YAii z^PBU|lQ}*)%Guv$yyAnFS9|E$5i%#YcO}BRO9p$-(cn4Q-@7Mkw4&KR==6BM`jp7d z{mYqlUbJ%>nA+s1?DtD+et0~3Eip5=^neF=r#<_QybJW3QzE09-3&ina$a|;uEQb QXYj3+(CZF7Y5l?a1IDTD`v3p{ diff --git a/tests/python/packages/isce3/matchtemplate/test_ampcor.py b/tests/python/packages/isce3/matchtemplate/test_ampcor.py index ea01ff724..09e75fd2b 100644 --- a/tests/python/packages/isce3/matchtemplate/test_ampcor.py +++ b/tests/python/packages/isce3/matchtemplate/test_ampcor.py @@ -83,6 +83,7 @@ def test_ampcor(): ampcor.algorithm = 0 # frequency ampcor.corrSurfaceOverSamplingMethod = ovs ampcor.derampMethod = 1 + ampcor.derampAxis = 0 ampcor.corrStatWindowSize = 21 ampcor.corrSurfaceZoomInWindow = 8 @@ -159,7 +160,8 @@ def test_ampcor(): meantol = 2e-2 tol = 1e-1 elif fname == "correlation_peak": - meantol = 1e-2 + meantol = 2e-2 + tol = 5e-2 else: meantol = 1 / 64 / 5 tol = 1 / 64 From b44b6fd715a1523017eea15e9fe4ebaf38fcc179 Mon Sep 17 00:00:00 2001 From: Lijun Zhu Date: Thu, 15 Jan 2026 14:21:52 -0800 Subject: [PATCH 12/13] fix sincos for macOS --- cxx/isce3/matchtemplate/pycuampcor/cuDeramp.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/cxx/isce3/matchtemplate/pycuampcor/cuDeramp.cpp b/cxx/isce3/matchtemplate/pycuampcor/cuDeramp.cpp index a5dc67808..d041d8fae 100644 --- a/cxx/isce3/matchtemplate/pycuampcor/cuDeramp.cpp +++ b/cxx/isce3/matchtemplate/pycuampcor/cuDeramp.cpp @@ -19,7 +19,6 @@ #include #include #include -#include #include namespace isce3::matchtemplate::pycuampcor { @@ -65,7 +64,11 @@ static void cuLinearDeramp_kernel(float2 *images, const int imageNX, const int i const int pixelIdxY = i % imageNY; double phase = pixelIdxX*phaseX + pixelIdxY*phaseY; double phase_sin, phase_cos; +#if defined(__APPLE__) || defined(__MACOSX__) + __sincos(phase, &phase_sin, &phase_cos); +#else sincos(phase, &phase_sin, &phase_cos); +#endif image[i] = make_float2( image[i].x*phase_cos - image[i].y*phase_sin, image[i].x*phase_sin + image[i].y*phase_cos); From a6b0e9b575ca4bc1d2b3506c7812d37aecda4763 Mon Sep 17 00:00:00 2001 From: Lijun Zhu Date: Fri, 16 Jan 2026 14:18:25 -0800 Subject: [PATCH 13/13] revert sincos in the cpu module to separate calls (optimize by -ffast-math) --- cxx/isce3/matchtemplate/pycuampcor/cuDeramp.cpp | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/cxx/isce3/matchtemplate/pycuampcor/cuDeramp.cpp b/cxx/isce3/matchtemplate/pycuampcor/cuDeramp.cpp index d041d8fae..6924818a2 100644 --- a/cxx/isce3/matchtemplate/pycuampcor/cuDeramp.cpp +++ b/cxx/isce3/matchtemplate/pycuampcor/cuDeramp.cpp @@ -63,12 +63,8 @@ static void cuLinearDeramp_kernel(float2 *images, const int imageNX, const int i const int pixelIdxX = i / imageNY; const int pixelIdxY = i % imageNY; double phase = pixelIdxX*phaseX + pixelIdxY*phaseY; - double phase_sin, phase_cos; -#if defined(__APPLE__) || defined(__MACOSX__) - __sincos(phase, &phase_sin, &phase_cos); -#else - sincos(phase, &phase_sin, &phase_cos); -#endif + double phase_cos = cos(phase); + double phase_sin = sin(phase); image[i] = make_float2( image[i].x*phase_cos - image[i].y*phase_sin, image[i].x*phase_sin + image[i].y*phase_cos);