diff options
author | Willem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl> | 2021-06-02 11:44:01 +0200 |
---|---|---|
committer | Willem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl> | 2021-11-16 17:30:12 +0100 |
commit | e99c6a75bada269381b247c555786dda8b390b7a (patch) | |
tree | ec21ba3db1dc7da5b7eeef5621b9a54e72dbf868 /cuda/3d/par3d_fp.cu | |
parent | ea9703e63e9d3976e89bc1d81bdd1ec3e76b68b6 (diff) | |
download | astra-e99c6a75bada269381b247c555786dda8b390b7a.tar.gz astra-e99c6a75bada269381b247c555786dda8b390b7a.tar.bz2 astra-e99c6a75bada269381b247c555786dda8b390b7a.tar.xz astra-e99c6a75bada269381b247c555786dda8b390b7a.zip |
Fix non-padded GPULink memory handling in FP3D kernels
This would fail silently if the output projection data object was not
padded to a multiple of 32 pixels, potentially corrupting the start of
projection rows.
3D GPU memory allocated by ASTRA itself is always padded by cudaMalloc3D
and therefore not affected. GPULink allows bypassing this, possibly
triggering this bug.
Diffstat (limited to 'cuda/3d/par3d_fp.cu')
-rw-r--r-- | cuda/3d/par3d_fp.cu | 7 |
1 files changed, 7 insertions, 0 deletions
diff --git a/cuda/3d/par3d_fp.cu b/cuda/3d/par3d_fp.cu index cf8336c..e1c82c3 100644 --- a/cuda/3d/par3d_fp.cu +++ b/cuda/3d/par3d_fp.cu @@ -175,6 +175,8 @@ __global__ void par3D_FP_t(float* D_projData, unsigned int projPitch, const int detectorU = (blockIdx.x%((dims.iProjU+g_detBlockU-1)/g_detBlockU)) * g_detBlockU + threadIdx.x; + if (detectorU >= dims.iProjU) + return; const int startDetectorV = (blockIdx.x/((dims.iProjU+g_detBlockU-1)/g_detBlockU)) * g_detBlockV; int endDetectorV = startDetectorV + g_detBlockV; if (endDetectorV > dims.iProjV) @@ -251,7 +253,10 @@ __global__ void par3D_FP_SS_t(float* D_projData, unsigned int projPitch, const float a2 = c.c2(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ); const float fDistCorr = sc.scale(a1, a2); + const int detectorU = (blockIdx.x%((dims.iProjU+g_detBlockU-1)/g_detBlockU)) * g_detBlockU + threadIdx.x; + if (detectorU >= dims.iProjU) + return; const int startDetectorV = (blockIdx.x/((dims.iProjU+g_detBlockU-1)/g_detBlockU)) * g_detBlockV; int endDetectorV = startDetectorV + g_detBlockV; if (endDetectorV > dims.iProjV) @@ -359,6 +364,8 @@ __global__ void par3D_FP_SumSqW_t(float* D_projData, unsigned int projPitch, const int detectorU = (blockIdx.x%((dims.iProjU+g_detBlockU-1)/g_detBlockU)) * g_detBlockU + threadIdx.x; + if (detectorU >= dims.iProjU) + return; const int startDetectorV = (blockIdx.x/((dims.iProjU+g_detBlockU-1)/g_detBlockU)) * g_detBlockV; int endDetectorV = startDetectorV + g_detBlockV; if (endDetectorV > dims.iProjV) |