summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorWillem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>2016-02-05 17:17:17 +0100
committerWillem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>2016-04-18 11:49:16 +0200
commit5768229672bffc93b5158da4bef893f40fe76ea1 (patch)
tree6cd677e2335511b88331bc87367a1c155759126c
parent9479de2c72d147adf7e57e84fe181408b9582bcd (diff)
downloadastra-5768229672bffc93b5158da4bef893f40fe76ea1.tar.gz
astra-5768229672bffc93b5158da4bef893f40fe76ea1.tar.bz2
astra-5768229672bffc93b5158da4bef893f40fe76ea1.tar.xz
astra-5768229672bffc93b5158da4bef893f40fe76ea1.zip
Move detector-independent vars out of loop
-rw-r--r--cuda/3d/par3d_fp.cu26
1 files changed, 12 insertions, 14 deletions
diff --git a/cuda/3d/par3d_fp.cu b/cuda/3d/par3d_fp.cu
index c6c4fda..6f9ce40 100644
--- a/cuda/3d/par3d_fp.cu
+++ b/cuda/3d/par3d_fp.cu
@@ -174,6 +174,9 @@ __global__ void par3D_FP_t(float* D_projData, unsigned int projPitch,
const float fDetSY = gC_DetSY[angle] + 0.5f * fDetUY + 0.5f * fDetVY;
const float fDetSZ = gC_DetSZ[angle] + 0.5f * fDetUZ + 0.5f * fDetVZ;
+ const float a1 = c.c1(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ);
+ const float a2 = c.c2(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ);
+ const float fDistCorr = sc.scale(a1, a2);
const int detectorU = (blockIdx.x%((dims.iProjU+g_detBlockU-1)/g_detBlockU)) * g_detBlockU + threadIdx.x;
@@ -199,13 +202,9 @@ __global__ void par3D_FP_t(float* D_projData, unsigned int projPitch,
/* ray: (y) = (ay) * x + (by) */
/* (z) (az) (bz) */
- const float a1 = c.c1(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ);
- const float a2 = c.c2(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ);
const float b1 = c.c1(fDetX,fDetY,fDetZ) - a1 * c.c0(fDetX,fDetY,fDetZ);
const float b2 = c.c2(fDetX,fDetY,fDetZ) - a2 * c.c0(fDetX,fDetY,fDetZ);
- const float fDistCorr = sc.scale(a1, a2);
-
float fVal = 0.0f;
float f0 = startSlice + 0.5f;
@@ -253,7 +252,9 @@ __global__ void par3D_FP_SS_t(float* D_projData, unsigned int projPitch,
const float fDetSY = gC_DetSY[angle] + 0.5f * fDetUY + 0.5f * fDetVY;
const float fDetSZ = gC_DetSZ[angle] + 0.5f * fDetUZ + 0.5f * fDetVZ;
-
+ const float a1 = c.c1(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ);
+ const float a2 = c.c2(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ);
+ const float fDistCorr = sc.scale(a1, a2);
const int detectorU = (blockIdx.x%((dims.iProjU+g_detBlockU-1)/g_detBlockU)) * g_detBlockU + threadIdx.x;
const int startDetectorV = (blockIdx.x/((dims.iProjU+g_detBlockU-1)/g_detBlockU)) * g_detBlockV;
@@ -288,12 +289,9 @@ __global__ void par3D_FP_SS_t(float* D_projData, unsigned int projPitch,
/* ray: (y) = (ay) * x + (by) */
/* (z) (az) (bz) */
- const float a1 = c.c1(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ);
- const float a2 = c.c2(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ);
const float b1 = c.c1(fDetX,fDetY,fDetZ) - a1 * c.c0(fDetX,fDetY,fDetZ);
const float b2 = c.c2(fDetX,fDetY,fDetZ) - a2 * c.c0(fDetX,fDetY,fDetZ);
- const float fDistCorr = sc.scale(a1, a2);
float fVal = 0.0f;
@@ -309,12 +307,12 @@ __global__ void par3D_FP_SS_t(float* D_projData, unsigned int projPitch,
f2 += a2;
}
- fVal *= fDistCorr;
fV += fVal;
}
}
+ fV *= fDistCorr;
D_projData[(detectorV*dims.iProjAngles+angle)*projPitch+detectorU] += fV / (iRaysPerDetDim * iRaysPerDetDim);
}
}
@@ -360,6 +358,9 @@ __global__ void par3D_FP_SumSqW_t(float* D_projData, unsigned int projPitch,
const float fDetSY = gC_DetSY[angle] + 0.5f * fDetUY + 0.5f * fDetVY;
const float fDetSZ = gC_DetSZ[angle] + 0.5f * fDetUZ + 0.5f * fDetVZ;
+ const float a1 = c.c1(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ);
+ const float a2 = c.c2(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ);
+ const float fDistCorr = sc.scale(a1, a2);
const int detectorU = (blockIdx.x%((dims.iProjU+g_detBlockU-1)/g_detBlockU)) * g_detBlockU + threadIdx.x;
@@ -385,13 +386,9 @@ __global__ void par3D_FP_SumSqW_t(float* D_projData, unsigned int projPitch,
/* ray: (y) = (ay) * x + (by) */
/* (z) (az) (bz) */
- const float a1 = c.c1(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ);
- const float a2 = c.c2(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ);
const float b1 = c.c1(fDetX,fDetY,fDetZ) - a1 * c.c0(fDetX,fDetY,fDetZ);
const float b2 = c.c2(fDetX,fDetY,fDetZ) - a2 * c.c0(fDetX,fDetY,fDetZ);
- const float fDistCorr = sc.scale(a1, a2);
-
float fVal = 0.0f;
float f0 = startSlice + 0.5f;
@@ -400,12 +397,13 @@ __global__ void par3D_FP_SumSqW_t(float* D_projData, unsigned int projPitch,
for (int s = startSlice; s < endSlice; ++s)
{
- fVal += dirWeights(f1, c.nDim1(dims)) * dirWeights(f2, c.nDim2(dims)) * fDistCorr * fDistCorr;
+ fVal += dirWeights(f1, c.nDim1(dims)) * dirWeights(f2, c.nDim2(dims));
f0 += 1.0f;
f1 += a1;
f2 += a2;
}
+ fVal *= fDistCorr * fDistCorr;
D_projData[(detectorV*dims.iProjAngles+angle)*projPitch+detectorU] += fVal;
}
}