summaryrefslogtreecommitdiffstats
path: root/cuda/3d/fdk.cu
diff options
context:
space:
mode:
Diffstat (limited to 'cuda/3d/fdk.cu')
-rw-r--r--cuda/3d/fdk.cu289
1 files changed, 103 insertions, 186 deletions
diff --git a/cuda/3d/fdk.cu b/cuda/3d/fdk.cu
index 4899ad1..bbac0be 100644
--- a/cuda/3d/fdk.cu
+++ b/cuda/3d/fdk.cu
@@ -41,174 +41,26 @@ $Id$
#include "dims3d.h"
#include "arith3d.h"
+#include "cone_bp.h"
#include "../2d/fft.h"
-typedef texture<float, 3, cudaReadModeElementType> texture3D;
-
-static texture3D gT_coneProjTexture;
+#include "../../include/astra/Logging.h"
namespace astraCUDA3d {
-static const unsigned int g_volBlockZ = 16;
-
-static const unsigned int g_anglesPerBlock = 64;
-static const unsigned int g_volBlockX = 32;
-static const unsigned int g_volBlockY = 16;
-
static const unsigned int g_anglesPerWeightBlock = 16;
static const unsigned int g_detBlockU = 32;
static const unsigned int g_detBlockV = 32;
-static const unsigned g_MaxAngles = 2048;
+static const unsigned g_MaxAngles = 12000;
-__constant__ float gC_angle_sin[g_MaxAngles];
-__constant__ float gC_angle_cos[g_MaxAngles];
__constant__ float gC_angle[g_MaxAngles];
// per-detector u/v shifts?
-static bool bindProjDataTexture(const cudaArray* array)
-{
- cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>();
-
- gT_coneProjTexture.addressMode[0] = cudaAddressModeBorder;
- gT_coneProjTexture.addressMode[1] = cudaAddressModeBorder;
- gT_coneProjTexture.addressMode[2] = cudaAddressModeBorder;
- gT_coneProjTexture.filterMode = cudaFilterModeLinear;
- gT_coneProjTexture.normalized = false;
-
- cudaBindTextureToArray(gT_coneProjTexture, array, channelDesc);
- // TODO: error value?
-
- return true;
-}
-
-
-__global__ void devBP_FDK(void* D_volData, unsigned int volPitch, int startAngle, float fSrcOrigin, float fDetOrigin, float fSrcZ, float fDetZ, float fInvDetUSize, float fInvDetVSize, const SDimensions3D dims)
-{
- float* volData = (float*)D_volData;
-
- int endAngle = startAngle + g_anglesPerBlock;
- if (endAngle > dims.iProjAngles)
- endAngle = dims.iProjAngles;
-
- // threadIdx: x = rel x
- // y = rel y
-
- // blockIdx: x = x + y
- // y = z
-
-
- // TO TRY: precompute part of detector intersection formulas in shared mem?
- // TO TRY: inner loop over z, gather ray values in shared mem
-
- const int X = blockIdx.x % ((dims.iVolX+g_volBlockX-1)/g_volBlockX) * g_volBlockX + threadIdx.x;
- const int Y = blockIdx.x / ((dims.iVolX+g_volBlockX-1)/g_volBlockX) * g_volBlockY + threadIdx.y;
-
- if (X > dims.iVolX)
- return;
- if (Y > dims.iVolY)
- return;
-
- const int startZ = blockIdx.y * g_volBlockZ;
- int endZ = startZ + g_volBlockZ;
- if (endZ > dims.iVolZ)
- endZ = dims.iVolZ;
-
- float fX = X - 0.5f*dims.iVolX + 0.5f;
- float fY = Y - 0.5f*dims.iVolY + 0.5f;
- float fZ = startZ - 0.5f*dims.iVolZ + 0.5f - fSrcZ;
-
- const float fU_base = 0.5f*dims.iProjU - 0.5f + 0.5f;
- const float fV_base = 0.5f*dims.iProjV - 0.5f + 0.5f + (fDetZ-fSrcZ);
-
- // Note re. fZ/rV_base: the computations below are all relative to the
- // optical axis, so we do the Z-adjustments beforehand.
-
- for (int Z = startZ; Z < endZ; ++Z, fZ += 1.0f)
- {
-
- float fVal = 0.0f;
- float fAngle = startAngle + 0.5f;
-
- for (int angle = startAngle; angle < endAngle; ++angle, fAngle += 1.0f)
- {
-
- const float cos_theta = gC_angle_cos[angle];
- const float sin_theta = gC_angle_sin[angle];
-
- const float fR = fSrcOrigin;
- const float fD = fR - fX * sin_theta + fY * cos_theta;
- float fWeight = fR / fD;
- fWeight *= fWeight;
-
- const float fScaleFactor = (fR + fDetOrigin) / fD;
- const float fU = fU_base + (fX*cos_theta+fY*sin_theta) * fScaleFactor * fInvDetUSize;
- const float fV = fV_base + fZ * fScaleFactor * fInvDetVSize;
-
- fVal += tex3D(gT_coneProjTexture, fU, fAngle, fV);
-
- }
-
- volData[(Z*dims.iVolY+Y)*volPitch+X] += fVal;
-// projData[(angle*dims.iProjV+detectorV)*projPitch+detectorU] = 10.0f;
-// if (threadIdx.x == 0 && threadIdx.y == 0) { printf("%d,%d,%d [%d / %d] -> %f\n", angle, detectorU, detectorV, (angle*dims.iProjV+detectorV)*projPitch+detectorU, projPitch, projData[(angle*dims.iProjV+detectorV)*projPitch+detectorU]); }
- }
-
-}
-
-
-bool FDK_BP(cudaPitchedPtr D_volumeData,
- cudaPitchedPtr D_projData,
- float fSrcOrigin, float fDetOrigin,
- float fSrcZ, float fDetZ, float fDetUSize, float fDetVSize,
- const SDimensions3D& dims, const float* angles)
-{
- // transfer projections to array
-
- cudaArray* cuArray = allocateProjectionArray(dims);
- transferProjectionsToArray(D_projData, cuArray, dims);
-
- bindProjDataTexture(cuArray);
-
- float* angle_sin = new float[dims.iProjAngles];
- float* angle_cos = new float[dims.iProjAngles];
-
- for (unsigned int i = 0; i < dims.iProjAngles; ++i) {
- angle_sin[i] = sinf(angles[i]);
- angle_cos[i] = cosf(angles[i]);
- }
- cudaError_t e1 = cudaMemcpyToSymbol(gC_angle_sin, angle_sin, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice);
- cudaError_t e2 = cudaMemcpyToSymbol(gC_angle_cos, angle_cos, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice);
- assert(e1 == cudaSuccess);
- assert(e2 == cudaSuccess);
-
- delete[] angle_sin;
- delete[] angle_cos;
-
- dim3 dimBlock(g_volBlockX, g_volBlockY);
-
- dim3 dimGrid(((dims.iVolX+g_volBlockX-1)/g_volBlockX)*((dims.iVolY+g_volBlockY-1)/g_volBlockY), (dims.iVolZ+g_volBlockZ-1)/g_volBlockZ);
-
- // timeval t;
- // tic(t);
-
- for (unsigned int i = 0; i < dims.iProjAngles; i += g_anglesPerBlock) {
- devBP_FDK<<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, fSrcOrigin, fDetOrigin, fSrcZ, fDetZ, 1.0f / fDetUSize, 1.0f / fDetVSize, dims);
- }
-
- cudaTextForceKernelsCompletion();
-
- cudaFreeArray(cuArray);
-
- // printf("%f\n", toc(t));
-
- return true;
-}
-
-__global__ void devFDK_preweight(void* D_projData, unsigned int projPitch, unsigned int startAngle, unsigned int endAngle, float fSrcOrigin, float fDetOrigin, float fSrcZ, float fDetZ, float fDetUSize, float fDetVSize, const SDimensions3D dims)
+__global__ void devFDK_preweight(void* D_projData, unsigned int projPitch, unsigned int startAngle, unsigned int endAngle, float fSrcOrigin, float fDetOrigin, float fZShift, float fDetUSize, float fDetVSize, const SDimensions3D dims)
{
float* projData = (float*)D_projData;
int angle = startAngle + blockIdx.y * g_anglesPerWeightBlock + threadIdx.y;
@@ -230,21 +82,26 @@ __global__ void devFDK_preweight(void* D_projData, unsigned int projPitch, unsig
const float fT = fCentralRayLength * fCentralRayLength + fU * fU;
- float fV = (startDetectorV - 0.5f*dims.iProjV + 0.5f) * fDetVSize + fDetZ - fSrcZ;
+ float fV = (startDetectorV - 0.5f*dims.iProjV + 0.5f) * fDetVSize + fZShift;
+
+ //const float fW = fCentralRayLength;
+ //const float fW = fCentralRayLength * (M_PI / 2.0f) / (float)dims.iProjAngles;
+ const float fW1 = fSrcOrigin * fDetUSize * fDetVSize;
+ const float fW = fCentralRayLength * fW1 * fW1 * (M_PI / 2.0f) / (float)dims.iProjAngles;
for (int detectorV = startDetectorV; detectorV < endDetectorV; ++detectorV)
{
const float fRayLength = sqrtf(fT + fV * fV);
- const float fWeight = fCentralRayLength / fRayLength;
+ const float fWeight = fW / fRayLength;
projData[(detectorV*dims.iProjAngles+angle)*projPitch+detectorU] *= fWeight;
- fV += 1.0f;
+ fV += fDetVSize;
}
}
-__global__ void devFDK_ParkerWeight(void* D_projData, unsigned int projPitch, unsigned int startAngle, unsigned int endAngle, float fSrcOrigin, float fDetOrigin, float fSrcZ, float fDetZ, float fDetUSize, float fCentralFanAngle, const SDimensions3D dims)
+__global__ void devFDK_ParkerWeight(void* D_projData, unsigned int projPitch, unsigned int startAngle, unsigned int endAngle, float fSrcOrigin, float fDetOrigin, float fDetUSize, float fCentralFanAngle, const SDimensions3D dims)
{
float* projData = (float*)D_projData;
int angle = startAngle + blockIdx.y * g_anglesPerWeightBlock + threadIdx.y;
@@ -305,7 +162,7 @@ __global__ void devFDK_ParkerWeight(void* D_projData, unsigned int projPitch, un
// Perform the FDK pre-weighting and filtering
bool FDK_PreWeight(cudaPitchedPtr D_projData,
float fSrcOrigin, float fDetOrigin,
- float fSrcZ, float fDetZ,
+ float fZShift,
float fDetUSize, float fDetVSize, bool bShortScan,
const SDimensions3D& dims, const float* angles)
{
@@ -318,23 +175,57 @@ bool FDK_PreWeight(cudaPitchedPtr D_projData,
int projPitch = D_projData.pitch/sizeof(float);
- devFDK_preweight<<<dimGrid, dimBlock>>>(D_projData.ptr, projPitch, 0, dims.iProjAngles, fSrcOrigin, fDetOrigin, fSrcZ, fDetZ, fDetUSize, fDetVSize, dims);
+ devFDK_preweight<<<dimGrid, dimBlock>>>(D_projData.ptr, projPitch, 0, dims.iProjAngles, fSrcOrigin, fDetOrigin, fZShift, fDetUSize, fDetVSize, dims);
cudaTextForceKernelsCompletion();
- if (bShortScan) {
+ if (bShortScan && dims.iProjAngles > 1) {
+ ASTRA_DEBUG("Doing Parker weighting");
// We do short-scan Parker weighting
- cudaError_t e1 = cudaMemcpyToSymbol(gC_angle, angles,
+ // First, determine (in a very basic way) the interval that's
+ // been scanned. We assume angles[0] is one of the endpoints of the
+ // range.
+ float fdA = angles[1] - angles[0];
+
+ while (fdA < -M_PI)
+ fdA += 2*M_PI;
+ while (fdA >= M_PI)
+ fdA -= 2*M_PI;
+
+ float fAngleBase;
+ if (fdA >= 0.0f) {
+ // going up from angles[0]
+ fAngleBase = angles[dims.iProjAngles - 1];
+ } else {
+ // going down from angles[0]
+ fAngleBase = angles[0];
+ }
+
+ // We pick the highest end of the range, and then
+ // move all angles so they fall in (-2pi,0]
+
+ float *fRelAngles = new float[dims.iProjAngles];
+ for (unsigned int i = 0; i < dims.iProjAngles; ++i) {
+ float f = angles[i] - fAngleBase;
+ while (f > 0)
+ f -= 2*M_PI;
+ while (f <= -2*M_PI)
+ f += 2*M_PI;
+ fRelAngles[i] = f;
+
+ }
+
+ cudaError_t e1 = cudaMemcpyToSymbol(gC_angle, fRelAngles,
dims.iProjAngles*sizeof(float), 0,
cudaMemcpyHostToDevice);
assert(!e1);
+ delete[] fRelAngles;
- // TODO: detector pixel size!
- float fCentralFanAngle = atanf((dims.iProjU*0.5f) /
+ float fCentralFanAngle = atanf(fDetUSize * (dims.iProjU*0.5f) /
(fSrcOrigin + fDetOrigin));
- devFDK_ParkerWeight<<<dimGrid, dimBlock>>>(D_projData.ptr, projPitch, 0, dims.iProjAngles, fSrcOrigin, fDetOrigin, fSrcZ, fDetZ, fDetUSize, fCentralFanAngle, dims);
+ devFDK_ParkerWeight<<<dimGrid, dimBlock>>>(D_projData.ptr, projPitch, 0, dims.iProjAngles, fSrcOrigin, fDetOrigin, fDetUSize, fCentralFanAngle, dims);
}
@@ -344,10 +235,7 @@ bool FDK_PreWeight(cudaPitchedPtr D_projData,
bool FDK_Filter(cudaPitchedPtr D_projData,
cufftComplex * D_filter,
- float fSrcOrigin, float fDetOrigin,
- float fSrcZ, float fDetZ,
- float fDetUSize, float fDetVSize, bool bShortScan,
- const SDimensions3D& dims, const float* angles)
+ const SDimensions3D& dims)
{
// The filtering is a regular ramp filter per detector line.
@@ -392,10 +280,9 @@ bool FDK_Filter(cudaPitchedPtr D_projData,
bool FDK(cudaPitchedPtr D_volumeData,
cudaPitchedPtr D_projData,
- float fSrcOrigin, float fDetOrigin,
- float fSrcZ, float fDetZ, float fDetUSize, float fDetVSize,
- const SDimensions3D& dims, const float* angles, bool bShortScan,
- const float* filter)
+ const SConeProjection* angles,
+ const SDimensions3D& dims, SProjectorParams3D params, bool bShortScan,
+ const float* pfFilter)
{
bool ok;
// Generate filter
@@ -404,20 +291,53 @@ bool FDK(cudaPitchedPtr D_volumeData,
int iPaddedDetCount = calcNextPowerOfTwo(2 * dims.iProjU);
int iHalfFFTSize = calcFFTFourSize(iPaddedDetCount);
+
+ // NB: We don't support arbitrary cone_vec geometries here.
+ // Only those that are vertical sub-geometries
+ // (cf. CompositeGeometryManager) of regular cone geometries.
+ assert(dims.iProjAngles > 0);
+ const SConeProjection& p0 = angles[0];
+
+ // assuming U is in the XY plane, V is parallel to Z axis
+ float fDetCX = p0.fDetSX + 0.5*dims.iProjU*p0.fDetUX;
+ float fDetCY = p0.fDetSY + 0.5*dims.iProjU*p0.fDetUY;
+ float fDetCZ = p0.fDetSZ + 0.5*dims.iProjV*p0.fDetVZ;
+
+ float fSrcOrigin = sqrt(p0.fSrcX*p0.fSrcX + p0.fSrcY*p0.fSrcY);
+ float fDetOrigin = sqrt(fDetCX*fDetCX + fDetCY*fDetCY);
+ float fDetUSize = sqrt(p0.fDetUX*p0.fDetUX + p0.fDetUY*p0.fDetUY);
+ float fDetVSize = abs(p0.fDetVZ);
+
+ float fZShift = fDetCZ - p0.fSrcZ;
+
+ float *pfAngles = new float[dims.iProjAngles];
+ for (unsigned int i = 0; i < dims.iProjAngles; ++i) {
+ // FIXME: Sign/order
+ pfAngles[i] = -atan2(angles[i].fSrcX, angles[i].fSrcY) + M_PI;
+ }
+
+
+#if 1
ok = FDK_PreWeight(D_projData, fSrcOrigin, fDetOrigin,
- fSrcZ, fDetZ, fDetUSize, fDetVSize,
- bShortScan, dims, angles);
+ fZShift, fDetUSize, fDetVSize,
+ bShortScan, dims, pfAngles);
+#else
+ ok = true;
+#endif
+ delete[] pfAngles;
+
if (!ok)
return false;
+#if 1
cufftComplex *pHostFilter = new cufftComplex[dims.iProjAngles * iHalfFFTSize];
memset(pHostFilter, 0, sizeof(cufftComplex) * dims.iProjAngles * iHalfFFTSize);
- if (filter==NULL){
+ if (pfFilter == 0){
genFilter(FILTER_RAMLAK, 1.0f, dims.iProjAngles, pHostFilter, iPaddedDetCount, iHalfFFTSize);
- }else{
- for(int i=0;i<dims.iProjAngles * iHalfFFTSize;i++){
- pHostFilter[i].x = filter[i];
+ } else {
+ for (int i = 0; i < dims.iProjAngles * iHalfFFTSize; i++) {
+ pHostFilter[i].x = pfFilter[i];
pHostFilter[i].y = 0;
}
}
@@ -433,28 +353,25 @@ bool FDK(cudaPitchedPtr D_volumeData,
- ok = FDK_Filter(D_projData, D_filter, fSrcOrigin, fDetOrigin,
- fSrcZ, fDetZ, fDetUSize, fDetVSize,
- bShortScan, dims, angles);
+ ok = FDK_Filter(D_projData, D_filter, dims);
// Clean up filter
freeComplexOnDevice(D_filter);
-
+#endif
if (!ok)
return false;
// Perform BP
- ok = FDK_BP(D_volumeData, D_projData, fSrcOrigin, fDetOrigin, fSrcZ, fDetZ,
- fDetUSize, fDetVSize, dims, angles);
+ params.bFDKWeighting = true;
+
+ //ok = FDK_BP(D_volumeData, D_projData, fSrcOrigin, fDetOrigin, 0.0f, 0.0f, fDetUSize, fDetVSize, dims, pfAngles);
+ ok = ConeBP(D_volumeData, D_projData, dims, angles, params);
if (!ok)
return false;
- processVol3D<opMul>(D_volumeData,
- (M_PI / 2.0f) / (float)dims.iProjAngles, dims);
-
return true;
}