From accc4439d9dd035765ed77d94a0ceece3270cc0b Mon Sep 17 00:00:00 2001 From: "Suren A. Chilingaryan" Date: Tue, 26 Jul 2022 23:32:36 +0200 Subject: Half-precision back-/forward-projection for parallel geometry --- cuda/3d/par3d_fp.cu | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) (limited to 'cuda/3d/par3d_fp.cu') diff --git a/cuda/3d/par3d_fp.cu b/cuda/3d/par3d_fp.cu index a99308f..3ad9f0d 100644 --- a/cuda/3d/par3d_fp.cu +++ b/cuda/3d/par3d_fp.cu @@ -146,6 +146,7 @@ struct SCALE_NONCUBE { // blockIdx: x = u/v detector // y = angle block +#include "rounding.h" template __global__ void par3D_FP_t(float* D_projData, unsigned int projPitch, @@ -212,7 +213,23 @@ __global__ void par3D_FP_t(float* D_projData, unsigned int projPitch, for (int s = startSlice; s < endSlice; ++s) { - fVal += c.tex(f0, f1, f2); + textype h5 = texto(0.5f); + textype f1_ = texto(f1); + textype f1f_ = texto(floor(f1)); + float f1f = floor(f1); + + if ((f1 - f1f) < 0.5f) { + textype fVal1 = texto(c.tex(f0, f1f - 0.5f, f2)); + textype fVal2 = texto(c.tex(f0, f1f + 0.5f, f2)); + fVal += texfrom(fVal1 + (f1_ + h5 - f1f_) * (fVal2 - fVal1)); +// fVal += texfrom(__hfma(__hadd(h5,__hsub(f1_, f1f_)), __hsub(fVal2, fVal1), fVal1)); + } else { + textype fVal1 = texto(c.tex(f0, f1f + 0.5f, f2)); + textype fVal2 = texto(c.tex(f0, f1f + 1.5f, f2)); + fVal += texfrom(fVal1 + (f1_ - h5 - f1f_) * (fVal2 - fVal1)); + } + +// fVal += c.tex(f0, f1, f2); f0 += 1.0f; f1 += a1; f2 += a2; -- cgit v1.2.3