diff options
author | Suren A. Chilingaryan <csa@ipecompute4.ands.kit.edu> | 2022-07-26 23:32:36 +0200 |
---|---|---|
committer | Suren A. Chilingaryan <csa@ipecompute4.ands.kit.edu> | 2022-07-26 23:32:36 +0200 |
commit | accc4439d9dd035765ed77d94a0ceece3270cc0b (patch) | |
tree | 886507357f95680e8a6a7dc80aefe04ca42c5b2d /cuda/3d | |
parent | bfceef4da377c32cd59d1e51efb9aa1a21c7f4bd (diff) | |
download | astra-tnv.tar.gz astra-tnv.tar.bz2 astra-tnv.tar.xz astra-tnv.zip |
Half-precision back-/forward-projection for parallel geometrytnv
Diffstat (limited to 'cuda/3d')
-rw-r--r-- | cuda/3d/par3d_bp.cu | 21 | ||||
-rw-r--r-- | cuda/3d/par3d_fp.cu | 19 | ||||
-rw-r--r-- | cuda/3d/rounding.h | 11 |
3 files changed, 47 insertions, 4 deletions
diff --git a/cuda/3d/par3d_bp.cu b/cuda/3d/par3d_bp.cu index e43479a..75cbf2b 100644 --- a/cuda/3d/par3d_bp.cu +++ b/cuda/3d/par3d_bp.cu @@ -75,7 +75,7 @@ static bool bindProjDataTexture(const cudaArray* array) return true; } - +#include "rounding.h" __global__ void dev_par3D_BP(void* D_volData, unsigned int volPitch, int startAngle, int angleOffset, const SDimensions3D dims, float fOutputScale) { float* volData = (float*)D_volData; @@ -122,8 +122,23 @@ __global__ void dev_par3D_BP(void* D_volData, unsigned int volPitch, int startAn float fV = fCv.w + fX * fCv.x + fY * fCv.y + fZ * fCv.z; for (int idx = 0; idx < ZSIZE; ++idx) { - - float fVal = tex3D(gT_par3DProjTexture, fU, fAngle, fV); + float fVal; + textype h5 = texto(0.5f); + textype fU_ = texto(fU); + textype fUf_ = texto(floor(fU)); + float fUf = floor(fU); + + if ((fU - fUf) < 0.5f) { + textype fVal1 = texto(tex3D(gT_par3DProjTexture, fUf - 0.5f, fAngle, fV)); + textype fVal2 = texto(tex3D(gT_par3DProjTexture, fUf + 0.5f, fAngle, fV)); + fVal = texfrom(fVal1 + (fU_ + h5 - fUf_) * (fVal2 - fVal1)); + } else { + textype fVal1 = texto(tex3D(gT_par3DProjTexture, fUf + 0.5f, fAngle, fV)); + textype fVal2 = texto(tex3D(gT_par3DProjTexture, fUf + 1.5f, fAngle, fV)); + fVal = texfrom(fVal1 + (fU_ - h5 - fUf_) * (fVal2 - fVal1)); + } + +// float fVal = tex3D(gT_par3DProjTexture, fU, fAngle, fV); Z[idx] += fVal; fU += fCu.z; diff --git a/cuda/3d/par3d_fp.cu b/cuda/3d/par3d_fp.cu index a99308f..3ad9f0d 100644 --- a/cuda/3d/par3d_fp.cu +++ b/cuda/3d/par3d_fp.cu @@ -146,6 +146,7 @@ struct SCALE_NONCUBE { // blockIdx: x = u/v detector // y = angle block +#include "rounding.h" template<class COORD, class SCALE> __global__ void par3D_FP_t(float* D_projData, unsigned int projPitch, @@ -212,7 +213,23 @@ __global__ void par3D_FP_t(float* D_projData, unsigned int projPitch, for (int s = startSlice; s < endSlice; ++s) { - fVal += c.tex(f0, f1, f2); + textype h5 = texto(0.5f); + textype f1_ = texto(f1); + textype f1f_ = texto(floor(f1)); + float f1f = floor(f1); + + if ((f1 - f1f) < 0.5f) { + textype fVal1 = texto(c.tex(f0, f1f - 0.5f, f2)); + textype fVal2 = texto(c.tex(f0, f1f + 0.5f, f2)); + fVal += texfrom(fVal1 + (f1_ + h5 - f1f_) * (fVal2 - fVal1)); +// fVal += texfrom(__hfma(__hadd(h5,__hsub(f1_, f1f_)), __hsub(fVal2, fVal1), fVal1)); + } else { + textype fVal1 = texto(c.tex(f0, f1f + 0.5f, f2)); + textype fVal2 = texto(c.tex(f0, f1f + 1.5f, f2)); + fVal += texfrom(fVal1 + (f1_ - h5 - f1f_) * (fVal2 - fVal1)); + } + +// fVal += c.tex(f0, f1, f2); f0 += 1.0f; f1 += a1; f2 += a2; diff --git a/cuda/3d/rounding.h b/cuda/3d/rounding.h new file mode 100644 index 0000000..b7ccc1c --- /dev/null +++ b/cuda/3d/rounding.h @@ -0,0 +1,11 @@ +#include <cuda_fp16.h> + +#define texto __float2half +#define texfrom __half2float +#define textype half + + +//#define texto +//#define texfrom +//#define textype float + |