summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--cuda/2d/astra.cu2
-rw-r--r--cuda/2d/fft.cu45
-rw-r--r--cuda/3d/astra3d.cu80
-rw-r--r--cuda/3d/astra3d.h7
-rw-r--r--cuda/3d/fdk.cu250
-rw-r--r--cuda/3d/fdk.h8
-rw-r--r--cuda/3d/mem3d.cu28
-rw-r--r--cuda/3d/mem3d.h1
-rw-r--r--include/astra/CompositeGeometryManager.h9
-rw-r--r--src/CompositeGeometryManager.cpp50
-rw-r--r--src/CudaFDKAlgorithm3D.cpp9
11 files changed, 240 insertions, 249 deletions
diff --git a/cuda/2d/astra.cu b/cuda/2d/astra.cu
index 7317d69..b56ddf9 100644
--- a/cuda/2d/astra.cu
+++ b/cuda/2d/astra.cu
@@ -343,7 +343,7 @@ bool AstraFBP::run()
dims3d.iProjV = 1;
astraCUDA3d::FDK_PreWeight(tmp, pData->fOriginSourceDistance,
- pData->fOriginDetectorDistance, 0.0f, 0.0f,
+ pData->fOriginDetectorDistance, 0.0f,
pData->dims.fDetScale, 1.0f, // TODO: Are these correct?
pData->bShortScan, dims3d, pData->angles);
}
diff --git a/cuda/2d/fft.cu b/cuda/2d/fft.cu
index 2bfd493..a84b2cc 100644
--- a/cuda/2d/fft.cu
+++ b/cuda/2d/fft.cu
@@ -35,6 +35,7 @@ $Id$
#include <fstream>
#include "../../include/astra/Logging.h"
+#include "../../include/astra/Fourier.h"
using namespace astra;
@@ -303,6 +304,8 @@ void genFilter(E_FBPFILTER _eFilter, float _fD, int _iProjectionCount,
float * pfFilt = new float[_iFFTFourierDetectorCount];
float * pfW = new float[_iFFTFourierDetectorCount];
+#if 1
+
for(int iDetectorIndex = 0; iDetectorIndex < _iFFTFourierDetectorCount; iDetectorIndex++)
{
float fRelIndex = (float)iDetectorIndex / (float)_iFFTRealDetectorCount;
@@ -314,6 +317,48 @@ void genFilter(E_FBPFILTER _eFilter, float _fD, int _iProjectionCount,
// w = 2*pi*(0:size(filt,2)-1)/order
pfW[iDetectorIndex] = 3.1415f * 2.0f * fRelIndex;
}
+#else
+
+ float *pfRealIn = new float[_iFFTRealDetectorCount];
+ float *pfImagIn = new float[_iFFTRealDetectorCount];
+ float *pfRealOut = new float[_iFFTRealDetectorCount];
+ float *pfImagOut = new float[_iFFTRealDetectorCount];
+
+ for (int i = 0; i < _iFFTRealDetectorCount; ++i) {
+ pfImagIn[i] = 0.0f;
+ pfRealOut[i] = 0.0f;
+ pfImagOut[i] = 0.0f;
+
+ if (i & 1) {
+ int j = i;
+ if (2*j > _iFFTRealDetectorCount)
+ j = _iFFTRealDetectorCount - j;
+ float f = M_PI * j;
+ pfRealIn[i] = -1 / (f*f);
+ } else {
+ pfRealIn[i] = 0.0f;
+ }
+ }
+
+ pfRealIn[0] = 0.25f;
+
+ fastTwoPowerFourierTransform1D(_iFFTRealDetectorCount, pfRealIn, pfImagIn,
+ pfRealOut, pfImagOut, 1, 1, false);
+
+ for(int iDetectorIndex = 0; iDetectorIndex < _iFFTFourierDetectorCount; iDetectorIndex++)
+ {
+ float fRelIndex = (float)iDetectorIndex / (float)_iFFTRealDetectorCount;
+
+ pfFilt[iDetectorIndex] = 2.0f * pfRealOut[iDetectorIndex];
+ pfW[iDetectorIndex] = M_PI * 2.0f * fRelIndex;
+ }
+
+ delete[] pfRealIn;
+ delete[] pfImagIn;
+ delete[] pfRealOut;
+ delete[] pfImagOut;
+
+#endif
switch(_eFilter)
{
diff --git a/cuda/3d/astra3d.cu b/cuda/3d/astra3d.cu
index dd6d551..91f4530 100644
--- a/cuda/3d/astra3d.cu
+++ b/cuda/3d/astra3d.cu
@@ -1291,84 +1291,4 @@ bool astraCudaBP_SIRTWeighted(float* pfVolume,
-bool astraCudaFDK(float* pfVolume, const float* pfProjections,
- const CVolumeGeometry3D* pVolGeom,
- const CConeProjectionGeometry3D* pProjGeom,
- bool bShortScan,
- int iGPUIndex, int iVoxelSuperSampling)
-{
- SDimensions3D dims;
- SProjectorParams3D params;
-
- params.iRaysPerVoxelDim = iVoxelSuperSampling;
-
- bool ok = convertAstraGeometry_dims(pVolGeom, pProjGeom, dims);
-
- // TODO: Check that pVolGeom is normalized, since we don't support
- // other volume geometries yet
-
- if (!ok)
- return false;
-
-
- if (iVoxelSuperSampling == 0)
- return false;
-
- if (iGPUIndex != -1) {
- cudaSetDevice(iGPUIndex);
- cudaError_t err = cudaGetLastError();
-
- // Ignore errors caused by calling cudaSetDevice multiple times
- if (err != cudaSuccess && err != cudaErrorSetOnActiveProcess)
- return false;
- }
-
- cudaPitchedPtr D_volumeData = allocateVolumeData(dims);
- ok = D_volumeData.ptr;
- if (!ok)
- return false;
-
- cudaPitchedPtr D_projData = allocateProjectionData(dims);
- ok = D_projData.ptr;
- if (!ok) {
- cudaFree(D_volumeData.ptr);
- return false;
- }
-
- ok &= copyProjectionsToDevice(pfProjections, D_projData, dims, dims.iProjU);
-
- ok &= zeroVolumeData(D_volumeData, dims);
-
- if (!ok) {
- cudaFree(D_volumeData.ptr);
- cudaFree(D_projData.ptr);
- return false;
- }
-
- float fOriginSourceDistance = pProjGeom->getOriginSourceDistance();
- float fOriginDetectorDistance = pProjGeom->getOriginDetectorDistance();
- float fDetUSize = pProjGeom->getDetectorSpacingX();
- float fDetVSize = pProjGeom->getDetectorSpacingY();
- const float *pfAngles = pProjGeom->getProjectionAngles();
-
-
- // TODO: Offer interface for SrcZ, DetZ
- // TODO: Voxel scaling
- ok &= FDK(D_volumeData, D_projData, fOriginSourceDistance,
- fOriginDetectorDistance, 0, 0, fDetUSize, fDetVSize,
- dims, pfAngles, bShortScan);
-
- ok &= copyVolumeFromDevice(pfVolume, D_volumeData, dims, dims.iVolX);
-
-
- cudaFree(D_volumeData.ptr);
- cudaFree(D_projData.ptr);
-
- return ok;
-
-}
-
-
-
-
}
diff --git a/cuda/3d/astra3d.h b/cuda/3d/astra3d.h
index 3345ab8..93bf576 100644
--- a/cuda/3d/astra3d.h
+++ b/cuda/3d/astra3d.h
@@ -309,13 +309,6 @@ _AstraExport bool astraCudaBP_SIRTWeighted(float* pfVolume, const float* pfProje
const CProjectionGeometry3D* pProjGeom,
int iGPUIndex, int iVoxelSuperSampling);
-_AstraExport bool astraCudaFDK(float* pfVolume, const float* pfProjections,
- const CVolumeGeometry3D* pVolGeom,
- const CConeProjectionGeometry3D* pProjGeom,
- bool bShortScan,
- int iGPUIndex, int iVoxelSuperSampling);
-
-
}
diff --git a/cuda/3d/fdk.cu b/cuda/3d/fdk.cu
index 0e13be1..d847eee 100644
--- a/cuda/3d/fdk.cu
+++ b/cuda/3d/fdk.cu
@@ -41,8 +41,11 @@ $Id$
#include "dims3d.h"
#include "arith3d.h"
+#include "cone_bp.h"
#include "../2d/fft.h"
+#include "../../include/astra/Logging.h"
+
typedef texture<float, 3, cudaReadModeElementType> texture3D;
static texture3D gT_coneProjTexture;
@@ -86,129 +89,7 @@ static bool bindProjDataTexture(const cudaArray* array)
}
-__global__ void devBP_FDK(void* D_volData, unsigned int volPitch, int startAngle, float fSrcOrigin, float fDetOrigin, float fSrcZ, float fDetZ, float fInvDetUSize, float fInvDetVSize, const SDimensions3D dims)
-{
- float* volData = (float*)D_volData;
-
- int endAngle = startAngle + g_anglesPerBlock;
- if (endAngle > dims.iProjAngles)
- endAngle = dims.iProjAngles;
-
- // threadIdx: x = rel x
- // y = rel y
-
- // blockIdx: x = x + y
- // y = z
-
-
- // TO TRY: precompute part of detector intersection formulas in shared mem?
- // TO TRY: inner loop over z, gather ray values in shared mem
-
- const int X = blockIdx.x % ((dims.iVolX+g_volBlockX-1)/g_volBlockX) * g_volBlockX + threadIdx.x;
- const int Y = blockIdx.x / ((dims.iVolX+g_volBlockX-1)/g_volBlockX) * g_volBlockY + threadIdx.y;
-
- if (X > dims.iVolX)
- return;
- if (Y > dims.iVolY)
- return;
-
- const int startZ = blockIdx.y * g_volBlockZ;
- int endZ = startZ + g_volBlockZ;
- if (endZ > dims.iVolZ)
- endZ = dims.iVolZ;
-
- float fX = X - 0.5f*dims.iVolX + 0.5f;
- float fY = Y - 0.5f*dims.iVolY + 0.5f;
- float fZ = startZ - 0.5f*dims.iVolZ + 0.5f - fSrcZ;
-
- const float fU_base = 0.5f*dims.iProjU - 0.5f + 0.5f;
- const float fV_base = 0.5f*dims.iProjV - 0.5f + 0.5f + (fDetZ-fSrcZ);
-
- // Note re. fZ/rV_base: the computations below are all relative to the
- // optical axis, so we do the Z-adjustments beforehand.
-
- for (int Z = startZ; Z < endZ; ++Z, fZ += 1.0f)
- {
-
- float fVal = 0.0f;
- float fAngle = startAngle + 0.5f;
-
- for (int angle = startAngle; angle < endAngle; ++angle, fAngle += 1.0f)
- {
-
- const float cos_theta = gC_angle_cos[angle];
- const float sin_theta = gC_angle_sin[angle];
-
- const float fR = fSrcOrigin;
- const float fD = fR - fX * sin_theta + fY * cos_theta;
- float fWeight = fR / fD;
- fWeight *= fWeight;
-
- const float fScaleFactor = (fR + fDetOrigin) / fD;
- const float fU = fU_base + (fX*cos_theta+fY*sin_theta) * fScaleFactor * fInvDetUSize;
- const float fV = fV_base + fZ * fScaleFactor * fInvDetVSize;
-
- fVal += tex3D(gT_coneProjTexture, fU, fAngle, fV);
-
- }
-
- volData[(Z*dims.iVolY+Y)*volPitch+X] += fVal;
-// projData[(angle*dims.iProjV+detectorV)*projPitch+detectorU] = 10.0f;
-// if (threadIdx.x == 0 && threadIdx.y == 0) { printf("%d,%d,%d [%d / %d] -> %f\n", angle, detectorU, detectorV, (angle*dims.iProjV+detectorV)*projPitch+detectorU, projPitch, projData[(angle*dims.iProjV+detectorV)*projPitch+detectorU]); }
- }
-
-}
-
-
-bool FDK_BP(cudaPitchedPtr D_volumeData,
- cudaPitchedPtr D_projData,
- float fSrcOrigin, float fDetOrigin,
- float fSrcZ, float fDetZ, float fDetUSize, float fDetVSize,
- const SDimensions3D& dims, const float* angles)
-{
- // transfer projections to array
-
- cudaArray* cuArray = allocateProjectionArray(dims);
- transferProjectionsToArray(D_projData, cuArray, dims);
-
- bindProjDataTexture(cuArray);
-
- float* angle_sin = new float[dims.iProjAngles];
- float* angle_cos = new float[dims.iProjAngles];
-
- for (unsigned int i = 0; i < dims.iProjAngles; ++i) {
- angle_sin[i] = sinf(angles[i]);
- angle_cos[i] = cosf(angles[i]);
- }
- cudaError_t e1 = cudaMemcpyToSymbol(gC_angle_sin, angle_sin, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice);
- cudaError_t e2 = cudaMemcpyToSymbol(gC_angle_cos, angle_cos, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice);
- assert(e1 == cudaSuccess);
- assert(e2 == cudaSuccess);
-
- delete[] angle_sin;
- delete[] angle_cos;
-
- dim3 dimBlock(g_volBlockX, g_volBlockY);
-
- dim3 dimGrid(((dims.iVolX+g_volBlockX-1)/g_volBlockX)*((dims.iVolY+g_volBlockY-1)/g_volBlockY), (dims.iVolZ+g_volBlockZ-1)/g_volBlockZ);
-
- // timeval t;
- // tic(t);
-
- for (unsigned int i = 0; i < dims.iProjAngles; i += g_anglesPerBlock) {
- devBP_FDK<<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, fSrcOrigin, fDetOrigin, fSrcZ, fDetZ, 1.0f / fDetUSize, 1.0f / fDetVSize, dims);
- }
-
- cudaTextForceKernelsCompletion();
-
- cudaFreeArray(cuArray);
-
- // printf("%f\n", toc(t));
-
- return true;
-}
-
-__global__ void devFDK_preweight(void* D_projData, unsigned int projPitch, unsigned int startAngle, unsigned int endAngle, float fSrcOrigin, float fDetOrigin, float fSrcZ, float fDetZ, float fDetUSize, float fDetVSize, const SDimensions3D dims)
+__global__ void devFDK_preweight(void* D_projData, unsigned int projPitch, unsigned int startAngle, unsigned int endAngle, float fSrcOrigin, float fDetOrigin, float fZShift, float fDetUSize, float fDetVSize, const SDimensions3D dims)
{
float* projData = (float*)D_projData;
int angle = startAngle + blockIdx.y * g_anglesPerWeightBlock + threadIdx.y;
@@ -230,21 +111,26 @@ __global__ void devFDK_preweight(void* D_projData, unsigned int projPitch, unsig
const float fT = fCentralRayLength * fCentralRayLength + fU * fU;
- float fV = (startDetectorV - 0.5f*dims.iProjV + 0.5f) * fDetVSize + fDetZ - fSrcZ;
+ float fV = (startDetectorV - 0.5f*dims.iProjV + 0.5f) * fDetVSize + fZShift;
+
+ //const float fW = fCentralRayLength;
+ //const float fW = fCentralRayLength * (M_PI / 2.0f) / (float)dims.iProjAngles;
+ const float fW1 = fSrcOrigin * fDetUSize * fDetVSize;
+ const float fW = fCentralRayLength * fW1 * fW1 * (M_PI / 2.0f) / (float)dims.iProjAngles;
for (int detectorV = startDetectorV; detectorV < endDetectorV; ++detectorV)
{
const float fRayLength = sqrtf(fT + fV * fV);
- const float fWeight = fCentralRayLength / fRayLength;
+ const float fWeight = fW / fRayLength;
projData[(detectorV*dims.iProjAngles+angle)*projPitch+detectorU] *= fWeight;
- fV += 1.0f;
+ fV += fDetVSize;
}
}
-__global__ void devFDK_ParkerWeight(void* D_projData, unsigned int projPitch, unsigned int startAngle, unsigned int endAngle, float fSrcOrigin, float fDetOrigin, float fSrcZ, float fDetZ, float fDetUSize, float fCentralFanAngle, const SDimensions3D dims)
+__global__ void devFDK_ParkerWeight(void* D_projData, unsigned int projPitch, unsigned int startAngle, unsigned int endAngle, float fSrcOrigin, float fDetOrigin, float fDetUSize, float fCentralFanAngle, const SDimensions3D dims)
{
float* projData = (float*)D_projData;
int angle = startAngle + blockIdx.y * g_anglesPerWeightBlock + threadIdx.y;
@@ -305,7 +191,7 @@ __global__ void devFDK_ParkerWeight(void* D_projData, unsigned int projPitch, un
// Perform the FDK pre-weighting and filtering
bool FDK_PreWeight(cudaPitchedPtr D_projData,
float fSrcOrigin, float fDetOrigin,
- float fSrcZ, float fDetZ,
+ float fZShift,
float fDetUSize, float fDetVSize, bool bShortScan,
const SDimensions3D& dims, const float* angles)
{
@@ -318,23 +204,57 @@ bool FDK_PreWeight(cudaPitchedPtr D_projData,
int projPitch = D_projData.pitch/sizeof(float);
- devFDK_preweight<<<dimGrid, dimBlock>>>(D_projData.ptr, projPitch, 0, dims.iProjAngles, fSrcOrigin, fDetOrigin, fSrcZ, fDetZ, fDetUSize, fDetVSize, dims);
+ devFDK_preweight<<<dimGrid, dimBlock>>>(D_projData.ptr, projPitch, 0, dims.iProjAngles, fSrcOrigin, fDetOrigin, fZShift, fDetUSize, fDetVSize, dims);
cudaTextForceKernelsCompletion();
- if (bShortScan) {
+ if (bShortScan && dims.iProjAngles > 1) {
+ ASTRA_DEBUG("Doing Parker weighting");
// We do short-scan Parker weighting
- cudaError_t e1 = cudaMemcpyToSymbol(gC_angle, angles,
+ // First, determine (in a very basic way) the interval that's
+ // been scanned. We assume angles[0] is one of the endpoints of the
+ // range.
+ float fdA = angles[1] - angles[0];
+
+ while (fdA < -M_PI)
+ fdA += 2*M_PI;
+ while (fdA >= M_PI)
+ fdA -= 2*M_PI;
+
+ float fAngleBase;
+ if (fdA >= 0.0f) {
+ // going up from angles[0]
+ fAngleBase = angles[dims.iProjAngles - 1];
+ } else {
+ // going down from angles[0]
+ fAngleBase = angles[0];
+ }
+
+ // We pick the highest end of the range, and then
+ // move all angles so they fall in (-2pi,0]
+
+ float *fRelAngles = new float[dims.iProjAngles];
+ for (unsigned int i = 0; i < dims.iProjAngles; ++i) {
+ float f = angles[i] - fAngleBase;
+ while (f > 0)
+ f -= 2*M_PI;
+ while (f <= -2*M_PI)
+ f += 2*M_PI;
+ fRelAngles[i] = f;
+
+ }
+
+ cudaError_t e1 = cudaMemcpyToSymbol(gC_angle, fRelAngles,
dims.iProjAngles*sizeof(float), 0,
cudaMemcpyHostToDevice);
assert(!e1);
+ delete[] fRelAngles;
- // TODO: detector pixel size!
- float fCentralFanAngle = atanf((dims.iProjU*0.5f) /
+ float fCentralFanAngle = atanf(fDetUSize * (dims.iProjU*0.5f) /
(fSrcOrigin + fDetOrigin));
- devFDK_ParkerWeight<<<dimGrid, dimBlock>>>(D_projData.ptr, projPitch, 0, dims.iProjAngles, fSrcOrigin, fDetOrigin, fSrcZ, fDetZ, fDetUSize, fCentralFanAngle, dims);
+ devFDK_ParkerWeight<<<dimGrid, dimBlock>>>(D_projData.ptr, projPitch, 0, dims.iProjAngles, fSrcOrigin, fDetOrigin, fDetUSize, fCentralFanAngle, dims);
}
@@ -344,10 +264,7 @@ bool FDK_PreWeight(cudaPitchedPtr D_projData,
bool FDK_Filter(cudaPitchedPtr D_projData,
cufftComplex * D_filter,
- float fSrcOrigin, float fDetOrigin,
- float fSrcZ, float fDetZ,
- float fDetUSize, float fDetVSize, bool bShortScan,
- const SDimensions3D& dims, const float* angles)
+ const SDimensions3D& dims)
{
// The filtering is a regular ramp filter per detector line.
@@ -392,9 +309,8 @@ bool FDK_Filter(cudaPitchedPtr D_projData,
bool FDK(cudaPitchedPtr D_volumeData,
cudaPitchedPtr D_projData,
- float fSrcOrigin, float fDetOrigin,
- float fSrcZ, float fDetZ, float fDetUSize, float fDetVSize,
- const SDimensions3D& dims, const float* angles, bool bShortScan)
+ const SConeProjection* angles,
+ const SDimensions3D& dims, SProjectorParams3D params, bool bShortScan)
{
bool ok;
// Generate filter
@@ -403,12 +319,45 @@ bool FDK(cudaPitchedPtr D_volumeData,
int iPaddedDetCount = calcNextPowerOfTwo(2 * dims.iProjU);
int iHalfFFTSize = calcFFTFourSize(iPaddedDetCount);
+
+ // NB: We don't support arbitrary cone_vec geometries here.
+ // Only those that are vertical sub-geometries
+ // (cf. CompositeGeometryManager) of regular cone geometries.
+ assert(dims.iProjAngles > 0);
+ const SConeProjection& p0 = angles[0];
+
+ // assuming U is in the XY plane, V is parallel to Z axis
+ float fDetCX = p0.fDetSX + 0.5*dims.iProjU*p0.fDetUX;
+ float fDetCY = p0.fDetSY + 0.5*dims.iProjU*p0.fDetUY;
+ float fDetCZ = p0.fDetSZ + 0.5*dims.iProjV*p0.fDetVZ;
+
+ float fSrcOrigin = sqrt(p0.fSrcX*p0.fSrcX + p0.fSrcY*p0.fSrcY);
+ float fDetOrigin = sqrt(fDetCX*fDetCX + fDetCY*fDetCY);
+ float fDetUSize = sqrt(p0.fDetUX*p0.fDetUX + p0.fDetUY*p0.fDetUY);
+ float fDetVSize = abs(p0.fDetVZ);
+
+ float fZShift = fDetCZ - p0.fSrcZ;
+
+ float *pfAngles = new float[dims.iProjAngles];
+ for (unsigned int i = 0; i < dims.iProjAngles; ++i) {
+ // FIXME: Sign/order
+ pfAngles[i] = -atan2(angles[i].fSrcX, angles[i].fSrcY) + M_PI;
+ }
+
+
+#if 1
ok = FDK_PreWeight(D_projData, fSrcOrigin, fDetOrigin,
- fSrcZ, fDetZ, fDetUSize, fDetVSize,
- bShortScan, dims, angles);
+ fZShift, fDetUSize, fDetVSize,
+ bShortScan, dims, pfAngles);
+#else
+ ok = true;
+#endif
+ delete[] pfAngles;
+
if (!ok)
return false;
+#if 1
cufftComplex *pHostFilter = new cufftComplex[dims.iProjAngles * iHalfFFTSize];
memset(pHostFilter, 0, sizeof(cufftComplex) * dims.iProjAngles * iHalfFFTSize);
@@ -425,28 +374,25 @@ bool FDK(cudaPitchedPtr D_volumeData,
- ok = FDK_Filter(D_projData, D_filter, fSrcOrigin, fDetOrigin,
- fSrcZ, fDetZ, fDetUSize, fDetVSize,
- bShortScan, dims, angles);
+ ok = FDK_Filter(D_projData, D_filter, dims);
// Clean up filter
freeComplexOnDevice(D_filter);
-
+#endif
if (!ok)
return false;
// Perform BP
- ok = FDK_BP(D_volumeData, D_projData, fSrcOrigin, fDetOrigin, fSrcZ, fDetZ,
- fDetUSize, fDetVSize, dims, angles);
+ params.bFDKWeighting = true;
+
+ //ok = FDK_BP(D_volumeData, D_projData, fSrcOrigin, fDetOrigin, 0.0f, 0.0f, fDetUSize, fDetVSize, dims, pfAngles);
+ ok = ConeBP(D_volumeData, D_projData, dims, angles, params);
if (!ok)
return false;
- processVol3D<opMul>(D_volumeData,
- (M_PI / 2.0f) / (float)dims.iProjAngles, dims);
-
return true;
}
diff --git a/cuda/3d/fdk.h b/cuda/3d/fdk.h
index de7fe53..8d2a128 100644
--- a/cuda/3d/fdk.h
+++ b/cuda/3d/fdk.h
@@ -35,16 +35,14 @@ namespace astraCUDA3d {
bool FDK_PreWeight(cudaPitchedPtr D_projData,
float fSrcOrigin, float fDetOrigin,
- float fSrcZ, float fDetZ,
+ float fZShift,
float fDetUSize, float fDetVSize, bool bShortScan,
const SDimensions3D& dims, const float* angles);
bool FDK(cudaPitchedPtr D_volumeData,
cudaPitchedPtr D_projData,
- float fSrcOrigin, float fDetOrigin,
- float fSrcZ, float fDetZ, float fDetUSize, float fDetVSize,
- const SDimensions3D& dims, const float* angles, bool bShortScan);
-
+ const SConeProjection* angles,
+ const SDimensions3D& dims, SProjectorParams3D params, bool bShortScan);
}
diff --git a/cuda/3d/mem3d.cu b/cuda/3d/mem3d.cu
index 9a239da..4fb30a2 100644
--- a/cuda/3d/mem3d.cu
+++ b/cuda/3d/mem3d.cu
@@ -38,6 +38,7 @@ $Id$
#include "cone_bp.h"
#include "par3d_fp.h"
#include "par3d_bp.h"
+#include "fdk.h"
#include "astra/Logging.h"
@@ -278,6 +279,33 @@ bool BP(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, con
}
+bool FDK(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, const astra::CVolumeGeometry3D* pVolGeom, MemHandle3D volData, bool bShortScan)
+{
+ SDimensions3D dims;
+ SProjectorParams3D params;
+
+ bool ok = convertAstraGeometry_dims(pVolGeom, pProjGeom, dims);
+ if (!ok)
+ return false;
+
+ SPar3DProjection* pParProjs;
+ SConeProjection* pConeProjs;
+
+ ok = convertAstraGeometry(pVolGeom, pProjGeom,
+ pParProjs, pConeProjs,
+ params);
+
+ if (!ok || !pConeProjs)
+ return false;
+
+ ok &= FDK(volData.d->ptr, projData.d->ptr, pConeProjs, dims, params, bShortScan);
+
+ return ok;
+
+
+
+}
+
diff --git a/cuda/3d/mem3d.h b/cuda/3d/mem3d.h
index 6fff80b..2d0fb27 100644
--- a/cuda/3d/mem3d.h
+++ b/cuda/3d/mem3d.h
@@ -95,6 +95,7 @@ bool FP(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, con
bool BP(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, const astra::CVolumeGeometry3D* pVolGeom, MemHandle3D volData, int iVoxelSuperSampling);
+bool FDK(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, const astra::CVolumeGeometry3D* pVolGeom, MemHandle3D volData, bool bShortScan);
}
diff --git a/include/astra/CompositeGeometryManager.h b/include/astra/CompositeGeometryManager.h
index 18dd72f..064370a 100644
--- a/include/astra/CompositeGeometryManager.h
+++ b/include/astra/CompositeGeometryManager.h
@@ -55,6 +55,10 @@ struct SGPUParams {
size_t memory;
};
+struct SFDKSettings {
+ bool bShortScan;
+};
+
class _AstraExport CCompositeGeometryManager {
public:
@@ -127,9 +131,10 @@ public:
CProjector3D *pProjector; // For a `global' geometry. It will not match
// the geometries of the input and output.
+ SFDKSettings FDKSettings;
enum {
- JOB_FP, JOB_BP, JOB_NOP
+ JOB_FP, JOB_BP, JOB_FDK, JOB_NOP
} eType;
enum {
MODE_ADD, MODE_SET
@@ -155,6 +160,8 @@ public:
CFloat32ProjectionData3DMemory *pProjData);
bool doBP(CProjector3D *pProjector, CFloat32VolumeData3DMemory *pVolData,
CFloat32ProjectionData3DMemory *pProjData);
+ bool doFDK(CProjector3D *pProjector, CFloat32VolumeData3DMemory *pVolData,
+ CFloat32ProjectionData3DMemory *pProjData, bool bShortScan);
bool doFP(CProjector3D *pProjector, const std::vector<CFloat32VolumeData3DMemory *>& volData, const std::vector<CFloat32ProjectionData3DMemory *>& projData);
bool doBP(CProjector3D *pProjector, const std::vector<CFloat32VolumeData3DMemory *>& volData, const std::vector<CFloat32ProjectionData3DMemory *>& projData);
diff --git a/src/CompositeGeometryManager.cpp b/src/CompositeGeometryManager.cpp
index 084ba8c..c5b4d3b 100644
--- a/src/CompositeGeometryManager.cpp
+++ b/src/CompositeGeometryManager.cpp
@@ -146,6 +146,7 @@ bool CCompositeGeometryManager::splitJobs(TJobSet &jobs, size_t maxSize, int div
newjob.eType = j->eType;
newjob.eMode = j->eMode;
newjob.pProjector = j->pProjector;
+ newjob.FDKSettings = j->FDKSettings;
CPart* input = job.pInput->reduce(outputPart.get());
@@ -992,6 +993,25 @@ bool CCompositeGeometryManager::doBP(CProjector3D *pProjector, CFloat32VolumeDat
return doJobs(L);
}
+
+bool CCompositeGeometryManager::doFDK(CProjector3D *pProjector, CFloat32VolumeData3DMemory *pVolData,
+ CFloat32ProjectionData3DMemory *pProjData, bool bShortScan)
+{
+ if (!dynamic_cast<CConeProjectionGeometry3D*>(pProjData->getGeometry())) {
+ ASTRA_ERROR("CCompositeGeometryManager::doFDK: cone geometry required");
+ return false;
+ }
+
+ SJob job = createJobBP(pProjector, pVolData, pProjData);
+ job.eType = SJob::JOB_FDK;
+ job.FDKSettings.bShortScan = bShortScan;
+
+ TJobList L;
+ L.push_back(job);
+
+ return doJobs(L);
+}
+
bool CCompositeGeometryManager::doFP(CProjector3D *pProjector, const std::vector<CFloat32VolumeData3DMemory *>& volData, const std::vector<CFloat32ProjectionData3DMemory *>& projData)
{
ASTRA_DEBUG("CCompositeGeometryManager::doFP, multi-volume");
@@ -1185,7 +1205,9 @@ static bool doJob(const CCompositeGeometryManager::TJobSet::const_iterator& iter
ok = astraCUDA3d::copyToGPUMemory(src, inputMem, srcdims);
if (!ok) ASTRA_ERROR("Error copying input data to GPU");
- if (j.eType == CCompositeGeometryManager::SJob::JOB_FP) {
+ switch (j.eType) {
+ case CCompositeGeometryManager::SJob::JOB_FP:
+ {
assert(dynamic_cast<CCompositeGeometryManager::CVolumePart*>(j.pInput.get()));
assert(dynamic_cast<CCompositeGeometryManager::CProjectionPart*>(j.pOutput.get()));
@@ -1194,7 +1216,10 @@ static bool doJob(const CCompositeGeometryManager::TJobSet::const_iterator& iter
ok = astraCUDA3d::FP(((CCompositeGeometryManager::CProjectionPart*)j.pOutput.get())->pGeom, outputMem, ((CCompositeGeometryManager::CVolumePart*)j.pInput.get())->pGeom, inputMem, detectorSuperSampling, projKernel);
if (!ok) ASTRA_ERROR("Error performing sub-FP");
ASTRA_DEBUG("CCompositeGeometryManager::doJobs: FP done");
- } else if (j.eType == CCompositeGeometryManager::SJob::JOB_BP) {
+ }
+ break;
+ case CCompositeGeometryManager::SJob::JOB_BP:
+ {
assert(dynamic_cast<CCompositeGeometryManager::CVolumePart*>(j.pOutput.get()));
assert(dynamic_cast<CCompositeGeometryManager::CProjectionPart*>(j.pInput.get()));
@@ -1203,7 +1228,26 @@ static bool doJob(const CCompositeGeometryManager::TJobSet::const_iterator& iter
ok = astraCUDA3d::BP(((CCompositeGeometryManager::CProjectionPart*)j.pInput.get())->pGeom, inputMem, ((CCompositeGeometryManager::CVolumePart*)j.pOutput.get())->pGeom, outputMem, voxelSuperSampling);
if (!ok) ASTRA_ERROR("Error performing sub-BP");
ASTRA_DEBUG("CCompositeGeometryManager::doJobs: BP done");
- } else {
+ }
+ break;
+ case CCompositeGeometryManager::SJob::JOB_FDK:
+ {
+ assert(dynamic_cast<CCompositeGeometryManager::CVolumePart*>(j.pOutput.get()));
+ assert(dynamic_cast<CCompositeGeometryManager::CProjectionPart*>(j.pInput.get()));
+
+ if (srcdims.subx || srcdims.suby) {
+ ASTRA_ERROR("CCompositeGeometryManager::doJobs: data too large for FDK");
+ ok = false;
+ } else {
+ ASTRA_DEBUG("CCompositeGeometryManager::doJobs: doing FDK");
+
+ ok = astraCUDA3d::FDK(((CCompositeGeometryManager::CProjectionPart*)j.pInput.get())->pGeom, inputMem, ((CCompositeGeometryManager::CVolumePart*)j.pOutput.get())->pGeom, outputMem, j.FDKSettings.bShortScan);
+ if (!ok) ASTRA_ERROR("Error performing sub-FDK");
+ ASTRA_DEBUG("CCompositeGeometryManager::doJobs: FDK done");
+ }
+ }
+ break;
+ default:
assert(false);
}
diff --git a/src/CudaFDKAlgorithm3D.cpp b/src/CudaFDKAlgorithm3D.cpp
index e101a42..c7c8ed5 100644
--- a/src/CudaFDKAlgorithm3D.cpp
+++ b/src/CudaFDKAlgorithm3D.cpp
@@ -32,6 +32,7 @@ $Id$
#include "astra/CudaProjector3D.h"
#include "astra/ConeProjectionGeometry3D.h"
+#include "astra/CompositeGeometryManager.h"
#include "astra/Logging.h"
@@ -206,6 +207,7 @@ void CCudaFDKAlgorithm3D::run(int _iNrIterations)
ASTRA_ASSERT(pReconMem);
+#if 0
bool ok = true;
ok = astraCudaFDK(pReconMem->getData(), pSinoMem->getDataConst(),
@@ -213,6 +215,13 @@ void CCudaFDKAlgorithm3D::run(int _iNrIterations)
m_bShortScan, m_iGPUIndex, m_iVoxelSuperSampling);
ASTRA_ASSERT(ok);
+#endif
+
+ CCompositeGeometryManager cgm;
+
+ cgm.doFDK(m_pProjector, pReconMem, pSinoMem, m_bShortScan);
+
+
}
//----------------------------------------------------------------------------------------