summaryrefslogtreecommitdiffstats
path: root/cuda
diff options
context:
space:
mode:
authorWillem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>2018-01-09 14:09:40 +0100
committerWillem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>2018-01-09 14:09:40 +0100
commitde27e439a0c59fade175fba4e0b4a1e0c84b933d (patch)
tree8d724e10b35291f5bfd63174eeeca95e52a863df /cuda
parent324611ce98a82944def875e61cb93dd98ced9c79 (diff)
parent84da1d5e27abadf28e97695e88494c58bf1c2697 (diff)
downloadastra-de27e439a0c59fade175fba4e0b4a1e0c84b933d.tar.gz
astra-de27e439a0c59fade175fba4e0b4a1e0c84b933d.tar.bz2
astra-de27e439a0c59fade175fba4e0b4a1e0c84b933d.tar.xz
astra-de27e439a0c59fade175fba4e0b4a1e0c84b933d.zip
Merge branch 'parallel_vec'
Diffstat (limited to 'cuda')
-rw-r--r--cuda/2d/algo.cu62
-rw-r--r--cuda/2d/algo.h22
-rw-r--r--cuda/2d/astra.cu731
-rw-r--r--cuda/2d/astra.h155
-rw-r--r--cuda/2d/cgls.cu36
-rw-r--r--cuda/2d/dims.h5
-rw-r--r--cuda/2d/em.cu28
-rw-r--r--cuda/2d/fbp.cu347
-rw-r--r--cuda/2d/fbp.h98
-rw-r--r--cuda/2d/fbp_filters.h4
-rw-r--r--cuda/2d/fft.cu12
-rw-r--r--cuda/2d/fft.h8
-rw-r--r--cuda/2d/par_bp.cu166
-rw-r--r--cuda/2d/par_bp.h6
-rw-r--r--cuda/2d/par_fp.cu358
-rw-r--r--cuda/2d/par_fp.h4
-rw-r--r--cuda/2d/sart.cu8
-rw-r--r--cuda/2d/sirt.cu45
-rw-r--r--cuda/3d/fdk.cu22
19 files changed, 771 insertions, 1346 deletions
diff --git a/cuda/2d/algo.cu b/cuda/2d/algo.cu
index 0e5209f..3275f6d 100644
--- a/cuda/2d/algo.cu
+++ b/cuda/2d/algo.cu
@@ -34,13 +34,13 @@ along with the ASTRA Toolbox. If not, see <http://www.gnu.org/licenses/>.
#include "fan_bp.h"
#include "util.h"
#include "arith.h"
+#include "astra.h"
namespace astraCUDA {
ReconAlgo::ReconAlgo()
{
- angles = 0;
- TOffsets = 0;
+ parProjs = 0;
fanProjs = 0;
shouldAbort = false;
@@ -65,8 +65,7 @@ ReconAlgo::~ReconAlgo()
void ReconAlgo::reset()
{
- delete[] angles;
- delete[] TOffsets;
+ delete[] parProjs;
delete[] fanProjs;
if (freeGPUMemory) {
@@ -76,8 +75,7 @@ void ReconAlgo::reset()
cudaFree(D_volumeData);
}
- angles = 0;
- TOffsets = 0;
+ parProjs = 0;
fanProjs = 0;
shouldAbort = false;
@@ -123,46 +121,40 @@ bool ReconAlgo::enableSinogramMask()
}
-bool ReconAlgo::setGeometry(const SDimensions& _dims, const float* _angles)
+bool ReconAlgo::setGeometry(const astra::CVolumeGeometry2D* pVolGeom,
+ const astra::CProjectionGeometry2D* pProjGeom)
{
- dims = _dims;
+ bool ok;
- angles = new float[dims.iProjAngles];
+ ok = convertAstraGeometry_dims(pVolGeom, pProjGeom, dims);
- memcpy(angles, _angles, sizeof(angles[0]) * dims.iProjAngles);
+ if (!ok)
+ return false;
+ delete[] parProjs;
+ parProjs = 0;
delete[] fanProjs;
fanProjs = 0;
- return true;
-}
-
-bool ReconAlgo::setFanGeometry(const SDimensions& _dims,
- const SFanProjection* _projs)
-{
- dims = _dims;
- fanProjs = new SFanProjection[dims.iProjAngles];
-
- memcpy(fanProjs, _projs, sizeof(fanProjs[0]) * dims.iProjAngles);
-
- delete[] angles;
- angles = 0;
+ fOutputScale = 1.0f;
+ ok = convertAstraGeometry(pVolGeom, pProjGeom, parProjs, fanProjs, fOutputScale);
+ if (!ok)
+ return false;
return true;
}
-
-bool ReconAlgo::setTOffsets(const float* _TOffsets)
+bool ReconAlgo::setSuperSampling(int raysPerDet, int raysPerPixelDim)
{
- // TODO: determine if they're all zero?
- TOffsets = new float[dims.iProjAngles];
- memcpy(TOffsets, _TOffsets, sizeof(angles[0]) * dims.iProjAngles);
+ if (raysPerDet <= 0 || raysPerPixelDim <= 0)
+ return false;
+
+ dims.iRaysPerDet = raysPerDet;
+ dims.iRaysPerPixelDim = raysPerPixelDim;
return true;
}
-
-
bool ReconAlgo::setVolumeMask(float* _D_maskData, unsigned int _maskPitch)
{
assert(useVolumeMask);
@@ -323,14 +315,14 @@ bool ReconAlgo::callFP(float* D_volumeData, unsigned int volumePitch,
float* D_projData, unsigned int projPitch,
float outputScale)
{
- if (angles) {
+ if (parProjs) {
assert(!fanProjs);
return FP(D_volumeData, volumePitch, D_projData, projPitch,
- dims, angles, TOffsets, outputScale);
+ dims, parProjs, fOutputScale * outputScale);
} else {
assert(fanProjs);
return FanFP(D_volumeData, volumePitch, D_projData, projPitch,
- dims, fanProjs, outputScale);
+ dims, fanProjs, fOutputScale * outputScale);
}
}
@@ -338,10 +330,10 @@ bool ReconAlgo::callBP(float* D_volumeData, unsigned int volumePitch,
float* D_projData, unsigned int projPitch,
float outputScale)
{
- if (angles) {
+ if (parProjs) {
assert(!fanProjs);
return BP(D_volumeData, volumePitch, D_projData, projPitch,
- dims, angles, TOffsets, outputScale);
+ dims, parProjs, outputScale);
} else {
assert(fanProjs);
return FanBP(D_volumeData, volumePitch, D_projData, projPitch,
diff --git a/cuda/2d/algo.h b/cuda/2d/algo.h
index 4a75907..d70859f 100644
--- a/cuda/2d/algo.h
+++ b/cuda/2d/algo.h
@@ -31,6 +31,17 @@ along with the ASTRA Toolbox. If not, see <http://www.gnu.org/licenses/>.
#include "astra/Globals.h"
#include "dims.h"
+namespace astra {
+
+class CParallelProjectionGeometry2D;
+class CParallelVecProjectionGeometry2D;
+class CFanFlatProjectionGeometry2D;
+class CFanFlatVecProjectionGeometry2D;
+class CVolumeGeometry2D;
+class CProjectionGeometry2D;
+
+}
+
namespace astraCUDA {
class _AstraExport ReconAlgo {
@@ -40,11 +51,10 @@ public:
bool setGPUIndex(int iGPUIndex);
- bool setGeometry(const SDimensions& dims, const float* angles);
- bool setFanGeometry(const SDimensions& dims, const SFanProjection* projs);
+ bool setGeometry(const astra::CVolumeGeometry2D* pVolGeom,
+ const astra::CProjectionGeometry2D* pProjGeom);
- // setTOffsets should (optionally) be called after setGeometry
- bool setTOffsets(const float* TOffsets);
+ bool setSuperSampling(int raysPerDet, int raysPerPixelDim);
void signalAbort() { shouldAbort = true; }
@@ -123,9 +133,9 @@ protected:
SDimensions dims;
- float* angles;
- float* TOffsets;
+ SParProjection* parProjs;
SFanProjection* fanProjs;
+ float fOutputScale;
volatile bool shouldAbort;
diff --git a/cuda/2d/astra.cu b/cuda/2d/astra.cu
index c0132b2..81459de 100644
--- a/cuda/2d/astra.cu
+++ b/cuda/2d/astra.cu
@@ -41,8 +41,10 @@ along with the ASTRA Toolbox. If not, see <http://www.gnu.org/licenses/>.
#include <fstream>
#include <cuda.h>
+#include "../../include/astra/GeometryUtil2D.h"
#include "../../include/astra/VolumeGeometry2D.h"
#include "../../include/astra/ParallelProjectionGeometry2D.h"
+#include "../../include/astra/ParallelVecProjectionGeometry2D.h"
#include "../../include/astra/FanFlatProjectionGeometry2D.h"
#include "../../include/astra/FanFlatVecProjectionGeometry2D.h"
@@ -63,514 +65,6 @@ enum CUDAProjectionType {
};
-class AstraFBP_internal {
-public:
- SDimensions dims;
- float* angles;
- float* TOffsets;
- astraCUDA::SFanProjection* fanProjections;
-
- float fOriginSourceDistance;
- float fOriginDetectorDistance;
-
- float fPixelSize;
-
- bool bFanBeam;
- bool bShortScan;
-
- bool initialized;
- bool setStartReconstruction;
-
- float* D_sinoData;
- unsigned int sinoPitch;
-
- float* D_volumeData;
- unsigned int volumePitch;
-
- cufftComplex * m_pDevFilter;
-};
-
-AstraFBP::AstraFBP()
-{
- pData = new AstraFBP_internal();
-
- pData->angles = 0;
- pData->fanProjections = 0;
- pData->TOffsets = 0;
- pData->D_sinoData = 0;
- pData->D_volumeData = 0;
-
- pData->dims.iVolWidth = 0;
- pData->dims.iProjAngles = 0;
- pData->dims.fDetScale = 1.0f;
- pData->dims.iRaysPerDet = 1;
- pData->dims.iRaysPerPixelDim = 1;
-
- pData->initialized = false;
- pData->setStartReconstruction = false;
-
- pData->m_pDevFilter = NULL;
-}
-
-AstraFBP::~AstraFBP()
-{
- delete[] pData->angles;
- pData->angles = 0;
-
- delete[] pData->TOffsets;
- pData->TOffsets = 0;
-
- delete[] pData->fanProjections;
- pData->fanProjections = 0;
-
- cudaFree(pData->D_sinoData);
- pData->D_sinoData = 0;
-
- cudaFree(pData->D_volumeData);
- pData->D_volumeData = 0;
-
- if(pData->m_pDevFilter != NULL)
- {
- freeComplexOnDevice(pData->m_pDevFilter);
- pData->m_pDevFilter = NULL;
- }
-
- delete pData;
- pData = 0;
-}
-
-bool AstraFBP::setReconstructionGeometry(unsigned int iVolWidth,
- unsigned int iVolHeight,
- float fPixelSize)
-{
- if (pData->initialized)
- return false;
-
- pData->dims.iVolWidth = iVolWidth;
- pData->dims.iVolHeight = iVolHeight;
-
- pData->fPixelSize = fPixelSize;
-
- return (iVolWidth > 0 && iVolHeight > 0 && fPixelSize > 0.0f);
-}
-
-bool AstraFBP::setProjectionGeometry(unsigned int iProjAngles,
- unsigned int iProjDets,
- const float* pfAngles,
- float fDetSize)
-{
- if (pData->initialized)
- return false;
-
- pData->dims.iProjAngles = iProjAngles;
- pData->dims.iProjDets = iProjDets;
- pData->dims.fDetScale = fDetSize / pData->fPixelSize;
-
- if (iProjAngles == 0 || iProjDets == 0 || pfAngles == 0)
- return false;
-
- pData->angles = new float[iProjAngles];
- memcpy(pData->angles, pfAngles, iProjAngles * sizeof(pfAngles[0]));
-
- pData->bFanBeam = false;
-
- return true;
-}
-
-bool AstraFBP::setFanGeometry(unsigned int iProjAngles,
- unsigned int iProjDets,
- const astraCUDA::SFanProjection *fanProjs,
- const float* pfAngles,
- float fOriginSourceDistance,
- float fOriginDetectorDistance,
- float fDetSize,
- bool bShortScan)
-{
- // Slightly abusing setProjectionGeometry for this...
- if (!setProjectionGeometry(iProjAngles, iProjDets, pfAngles, fDetSize))
- return false;
-
- pData->fOriginSourceDistance = fOriginSourceDistance;
- pData->fOriginDetectorDistance = fOriginDetectorDistance;
-
- pData->fanProjections = new astraCUDA::SFanProjection[iProjAngles];
- memcpy(pData->fanProjections, fanProjs, iProjAngles * sizeof(fanProjs[0]));
-
- pData->bFanBeam = true;
- pData->bShortScan = bShortScan;
-
- return true;
-}
-
-
-bool AstraFBP::setPixelSuperSampling(unsigned int iPixelSuperSampling)
-{
- if (pData->initialized)
- return false;
-
- if (iPixelSuperSampling == 0)
- return false;
-
- pData->dims.iRaysPerPixelDim = iPixelSuperSampling;
-
- return true;
-}
-
-
-bool AstraFBP::setTOffsets(const float* pfTOffsets)
-{
- if (pData->initialized)
- return false;
-
- if (pfTOffsets == 0)
- return false;
-
- pData->TOffsets = new float[pData->dims.iProjAngles];
- memcpy(pData->TOffsets, pfTOffsets, pData->dims.iProjAngles * sizeof(pfTOffsets[0]));
-
- return true;
-}
-
-bool AstraFBP::init(int iGPUIndex)
-{
- if (pData->initialized)
- {
- return false;
- }
-
- if (pData->dims.iProjAngles == 0 || pData->dims.iVolWidth == 0)
- {
- return false;
- }
-
- if (iGPUIndex != -1) {
- cudaSetDevice(iGPUIndex);
- cudaError_t err = cudaGetLastError();
-
- // Ignore errors caused by calling cudaSetDevice multiple times
- if (err != cudaSuccess && err != cudaErrorSetOnActiveProcess)
- {
- return false;
- }
- }
-
- bool ok = allocateVolumeData(pData->D_volumeData, pData->volumePitch, pData->dims);
- if (!ok)
- {
- return false;
- }
-
- ok = allocateProjectionData(pData->D_sinoData, pData->sinoPitch, pData->dims);
- if (!ok)
- {
- cudaFree(pData->D_volumeData);
- pData->D_volumeData = 0;
- return false;
- }
-
- pData->initialized = true;
-
- return true;
-}
-
-bool AstraFBP::setSinogram(const float* pfSinogram,
- unsigned int iSinogramPitch)
-{
- if (!pData->initialized)
- return false;
- if (!pfSinogram)
- return false;
-
- bool ok = copySinogramToDevice(pfSinogram, iSinogramPitch,
- pData->dims,
- pData->D_sinoData, pData->sinoPitch);
- if (!ok)
- return false;
-
- // rescale sinogram to adjust for pixel size
- processSino<opMul>(pData->D_sinoData,
- 1.0f/(pData->fPixelSize*pData->fPixelSize),
- pData->sinoPitch, pData->dims);
-
- pData->setStartReconstruction = false;
-
- return true;
-}
-
-static int calcNextPowerOfTwo(int _iValue)
-{
- int iOutput = 1;
-
- while(iOutput < _iValue)
- {
- iOutput *= 2;
- }
-
- return iOutput;
-}
-
-bool AstraFBP::run()
-{
- if (!pData->initialized)
- {
- return false;
- }
-
- zeroVolumeData(pData->D_volumeData, pData->volumePitch, pData->dims);
-
- bool ok = false;
-
- if (pData->bFanBeam) {
- // Call FDK_PreWeight to handle fan beam geometry. We treat
- // this as a cone beam setup of a single slice:
-
- // TODO: TOffsets affects this preweighting...
-
- // We create a fake cudaPitchedPtr
- cudaPitchedPtr tmp;
- tmp.ptr = pData->D_sinoData;
- tmp.pitch = pData->sinoPitch * sizeof(float);
- tmp.xsize = pData->dims.iProjDets;
- tmp.ysize = pData->dims.iProjAngles;
- // and a fake Dimensions3D
- astraCUDA3d::SDimensions3D dims3d;
- dims3d.iVolX = pData->dims.iVolWidth;
- dims3d.iVolY = pData->dims.iVolHeight;
- dims3d.iVolZ = 1;
- dims3d.iProjAngles = pData->dims.iProjAngles;
- dims3d.iProjU = pData->dims.iProjDets;
- dims3d.iProjV = 1;
-
- astraCUDA3d::FDK_PreWeight(tmp, pData->fOriginSourceDistance,
- pData->fOriginDetectorDistance, 0.0f,
- pData->dims.fDetScale, 1.0f, // TODO: Are these correct?
- pData->bShortScan, dims3d, pData->angles);
- }
-
- if (pData->m_pDevFilter) {
-
- int iFFTRealDetCount = calcNextPowerOfTwo(2 * pData->dims.iProjDets);
- int iFFTFourDetCount = calcFFTFourSize(iFFTRealDetCount);
-
- cufftComplex * pDevComplexSinogram = NULL;
-
- allocateComplexOnDevice(pData->dims.iProjAngles, iFFTFourDetCount, &pDevComplexSinogram);
-
- runCudaFFT(pData->dims.iProjAngles, pData->D_sinoData, pData->sinoPitch, pData->dims.iProjDets, iFFTRealDetCount, iFFTFourDetCount, pDevComplexSinogram);
-
- applyFilter(pData->dims.iProjAngles, iFFTFourDetCount, pDevComplexSinogram, pData->m_pDevFilter);
-
- runCudaIFFT(pData->dims.iProjAngles, pDevComplexSinogram, pData->D_sinoData, pData->sinoPitch, pData->dims.iProjDets, iFFTRealDetCount, iFFTFourDetCount);
-
- freeComplexOnDevice(pDevComplexSinogram);
-
- }
-
- float fOutputScale = (M_PI / 2.0f) / (float)pData->dims.iProjAngles;
-
- if (pData->bFanBeam) {
- ok = FanBP_FBPWeighted(pData->D_volumeData, pData->volumePitch, pData->D_sinoData, pData->sinoPitch, pData->dims, pData->fanProjections, fOutputScale);
-
- } else {
- ok = BP(pData->D_volumeData, pData->volumePitch, pData->D_sinoData, pData->sinoPitch, pData->dims, pData->angles, pData->TOffsets, fOutputScale);
- }
- if(!ok)
- {
- return false;
- }
-
- return true;
-}
-
-bool AstraFBP::getReconstruction(float* pfReconstruction, unsigned int iReconstructionPitch) const
-{
- if (!pData->initialized)
- return false;
-
- bool ok = copyVolumeFromDevice(pfReconstruction, iReconstructionPitch,
- pData->dims,
- pData->D_volumeData, pData->volumePitch);
- if (!ok)
- return false;
-
- return true;
-}
-
-int AstraFBP::calcFourierFilterSize(int _iDetectorCount)
-{
- int iFFTRealDetCount = calcNextPowerOfTwo(2 * _iDetectorCount);
- int iFreqBinCount = calcFFTFourSize(iFFTRealDetCount);
-
- // CHECKME: Matlab makes this at least 64. Do we also need to?
- return iFreqBinCount;
-}
-
-bool AstraFBP::setFilter(E_FBPFILTER _eFilter, const float * _pfHostFilter /* = NULL */, int _iFilterWidth /* = 0 */, float _fD /* = 1.0f */, float _fFilterParameter /* = -1.0f */)
-{
- if(pData->m_pDevFilter != 0)
- {
- freeComplexOnDevice(pData->m_pDevFilter);
- pData->m_pDevFilter = 0;
- }
-
- if (_eFilter == FILTER_NONE)
- return true; // leave pData->m_pDevFilter set to 0
-
-
- int iFFTRealDetCount = calcNextPowerOfTwo(2 * pData->dims.iProjDets);
- int iFreqBinCount = calcFFTFourSize(iFFTRealDetCount);
-
- cufftComplex * pHostFilter = new cufftComplex[pData->dims.iProjAngles * iFreqBinCount];
- memset(pHostFilter, 0, sizeof(cufftComplex) * pData->dims.iProjAngles * iFreqBinCount);
-
- allocateComplexOnDevice(pData->dims.iProjAngles, iFreqBinCount, &(pData->m_pDevFilter));
-
- switch(_eFilter)
- {
- case FILTER_NONE:
- // handled above
- break;
- case FILTER_RAMLAK:
- case FILTER_SHEPPLOGAN:
- case FILTER_COSINE:
- case FILTER_HAMMING:
- case FILTER_HANN:
- case FILTER_TUKEY:
- case FILTER_LANCZOS:
- case FILTER_TRIANGULAR:
- case FILTER_GAUSSIAN:
- case FILTER_BARTLETTHANN:
- case FILTER_BLACKMAN:
- case FILTER_NUTTALL:
- case FILTER_BLACKMANHARRIS:
- case FILTER_BLACKMANNUTTALL:
- case FILTER_FLATTOP:
- case FILTER_PARZEN:
- {
- genFilter(_eFilter, _fD, pData->dims.iProjAngles, pHostFilter, iFFTRealDetCount, iFreqBinCount, _fFilterParameter);
- uploadComplexArrayToDevice(pData->dims.iProjAngles, iFreqBinCount, pHostFilter, pData->m_pDevFilter);
-
- break;
- }
- case FILTER_PROJECTION:
- {
- // make sure the offered filter has the correct size
- assert(_iFilterWidth == iFreqBinCount);
-
- for(int iFreqBinIndex = 0; iFreqBinIndex < iFreqBinCount; iFreqBinIndex++)
- {
- float fValue = _pfHostFilter[iFreqBinIndex];
-
- for(int iProjectionIndex = 0; iProjectionIndex < (int)pData->dims.iProjAngles; iProjectionIndex++)
- {
- pHostFilter[iFreqBinIndex + iProjectionIndex * iFreqBinCount].x = fValue;
- pHostFilter[iFreqBinIndex + iProjectionIndex * iFreqBinCount].y = 0.0f;
- }
- }
- uploadComplexArrayToDevice(pData->dims.iProjAngles, iFreqBinCount, pHostFilter, pData->m_pDevFilter);
- break;
- }
- case FILTER_SINOGRAM:
- {
- // make sure the offered filter has the correct size
- assert(_iFilterWidth == iFreqBinCount);
-
- for(int iFreqBinIndex = 0; iFreqBinIndex < iFreqBinCount; iFreqBinIndex++)
- {
- for(int iProjectionIndex = 0; iProjectionIndex < (int)pData->dims.iProjAngles; iProjectionIndex++)
- {
- float fValue = _pfHostFilter[iFreqBinIndex + iProjectionIndex * _iFilterWidth];
-
- pHostFilter[iFreqBinIndex + iProjectionIndex * iFreqBinCount].x = fValue;
- pHostFilter[iFreqBinIndex + iProjectionIndex * iFreqBinCount].y = 0.0f;
- }
- }
- uploadComplexArrayToDevice(pData->dims.iProjAngles, iFreqBinCount, pHostFilter, pData->m_pDevFilter);
- break;
- }
- case FILTER_RPROJECTION:
- {
- int iProjectionCount = pData->dims.iProjAngles;
- int iRealFilterElementCount = iProjectionCount * iFFTRealDetCount;
- float * pfHostRealFilter = new float[iRealFilterElementCount];
- memset(pfHostRealFilter, 0, sizeof(float) * iRealFilterElementCount);
-
- int iUsedFilterWidth = min(_iFilterWidth, iFFTRealDetCount);
- int iStartFilterIndex = (_iFilterWidth - iUsedFilterWidth) / 2;
- int iMaxFilterIndex = iStartFilterIndex + iUsedFilterWidth;
-
- int iFilterShiftSize = _iFilterWidth / 2;
-
- for(int iDetectorIndex = iStartFilterIndex; iDetectorIndex < iMaxFilterIndex; iDetectorIndex++)
- {
- int iFFTInFilterIndex = (iDetectorIndex + iFFTRealDetCount - iFilterShiftSize) % iFFTRealDetCount;
- float fValue = _pfHostFilter[iDetectorIndex];
-
- for(int iProjectionIndex = 0; iProjectionIndex < (int)pData->dims.iProjAngles; iProjectionIndex++)
- {
- pfHostRealFilter[iFFTInFilterIndex + iProjectionIndex * iFFTRealDetCount] = fValue;
- }
- }
-
- float* pfDevRealFilter = NULL;
- cudaMalloc((void **)&pfDevRealFilter, sizeof(float) * iRealFilterElementCount); // TODO: check for errors
- cudaMemcpy(pfDevRealFilter, pfHostRealFilter, sizeof(float) * iRealFilterElementCount, cudaMemcpyHostToDevice);
- delete[] pfHostRealFilter;
-
- runCudaFFT(iProjectionCount, pfDevRealFilter, iFFTRealDetCount, iFFTRealDetCount, iFFTRealDetCount, iFreqBinCount, pData->m_pDevFilter);
-
- cudaFree(pfDevRealFilter);
-
- break;
- }
- case FILTER_RSINOGRAM:
- {
- int iProjectionCount = pData->dims.iProjAngles;
- int iRealFilterElementCount = iProjectionCount * iFFTRealDetCount;
- float* pfHostRealFilter = new float[iRealFilterElementCount];
- memset(pfHostRealFilter, 0, sizeof(float) * iRealFilterElementCount);
-
- int iUsedFilterWidth = min(_iFilterWidth, iFFTRealDetCount);
- int iStartFilterIndex = (_iFilterWidth - iUsedFilterWidth) / 2;
- int iMaxFilterIndex = iStartFilterIndex + iUsedFilterWidth;
-
- int iFilterShiftSize = _iFilterWidth / 2;
-
- for(int iDetectorIndex = iStartFilterIndex; iDetectorIndex < iMaxFilterIndex; iDetectorIndex++)
- {
- int iFFTInFilterIndex = (iDetectorIndex + iFFTRealDetCount - iFilterShiftSize) % iFFTRealDetCount;
-
- for(int iProjectionIndex = 0; iProjectionIndex < (int)pData->dims.iProjAngles; iProjectionIndex++)
- {
- float fValue = _pfHostFilter[iDetectorIndex + iProjectionIndex * _iFilterWidth];
- pfHostRealFilter[iFFTInFilterIndex + iProjectionIndex * iFFTRealDetCount] = fValue;
- }
- }
-
- float* pfDevRealFilter = NULL;
- cudaMalloc((void **)&pfDevRealFilter, sizeof(float) * iRealFilterElementCount); // TODO: check for errors
- cudaMemcpy(pfDevRealFilter, pfHostRealFilter, sizeof(float) * iRealFilterElementCount, cudaMemcpyHostToDevice);
- delete[] pfHostRealFilter;
-
- runCudaFFT(iProjectionCount, pfDevRealFilter, iFFTRealDetCount, iFFTRealDetCount, iFFTRealDetCount, iFreqBinCount, pData->m_pDevFilter);
-
- cudaFree(pfDevRealFilter);
-
- break;
- }
- default:
- {
- ASTRA_ERROR("AstraFBP::setFilter: Unknown filter type requested");
- delete [] pHostFilter;
- return false;
- }
- }
-
- delete [] pHostFilter;
-
- return true;
-}
-
BPalgo::BPalgo()
{
@@ -615,18 +109,17 @@ float BPalgo::computeDiffNorm()
bool astraCudaFP(const float* pfVolume, float* pfSinogram,
unsigned int iVolWidth, unsigned int iVolHeight,
unsigned int iProjAngles, unsigned int iProjDets,
- const float *pfAngles, const float *pfOffsets,
- float fDetSize, unsigned int iDetSuperSampling,
+ const SParProjection *pAngles,
+ unsigned int iDetSuperSampling,
float fOutputScale, int iGPUIndex)
{
SDimensions dims;
- if (iProjAngles == 0 || iProjDets == 0 || pfAngles == 0)
+ if (iProjAngles == 0 || iProjDets == 0 || pAngles == 0)
return false;
dims.iProjAngles = iProjAngles;
dims.iProjDets = iProjDets;
- dims.fDetScale = fDetSize;
if (iDetSuperSampling == 0)
return false;
@@ -676,7 +169,7 @@ bool astraCudaFP(const float* pfVolume, float* pfSinogram,
}
zeroProjectionData(D_sinoData, sinoPitch, dims);
- ok = FP(D_volumeData, volumePitch, D_sinoData, sinoPitch, dims, pfAngles, pfOffsets, fOutputScale);
+ ok = FP(D_volumeData, volumePitch, D_sinoData, sinoPitch, dims, pAngles, fOutputScale);
if (!ok) {
cudaFree(D_volumeData);
cudaFree(D_sinoData);
@@ -711,7 +204,6 @@ bool astraCudaFanFP(const float* pfVolume, float* pfSinogram,
dims.iProjAngles = iProjAngles;
dims.iProjDets = iProjDets;
- dims.fDetScale = 1.0f; // TODO?
if (iDetSuperSampling == 0)
return false;
@@ -787,123 +279,99 @@ bool astraCudaFanFP(const float* pfVolume, float* pfSinogram,
}
-bool convertAstraGeometry(const CVolumeGeometry2D* pVolGeom,
- const CParallelProjectionGeometry2D* pProjGeom,
- float*& detectorOffsets, float*& projectionAngles,
- float& detSize, float& outputScale)
+// adjust pProjs to normalize volume geometry
+template<typename ProjectionT>
+static bool convertAstraGeometry_internal(const CVolumeGeometry2D* pVolGeom,
+ unsigned int iProjectionAngleCount,
+ ProjectionT*& pProjs,
+ float& fOutputScale)
{
- assert(pVolGeom);
- assert(pProjGeom);
- assert(pProjGeom->getProjectionAngles());
-
+ // TODO: Make EPS relative
const float EPS = 0.00001f;
- int nth = pProjGeom->getProjectionAngleCount();
-
// Check if pixels are square
if (abs(pVolGeom->getPixelLengthX() - pVolGeom->getPixelLengthY()) > EPS)
return false;
+ float dx = -(pVolGeom->getWindowMinX() + pVolGeom->getWindowMaxX()) / 2;
+ float dy = -(pVolGeom->getWindowMinY() + pVolGeom->getWindowMaxY()) / 2;
- // Scale volume pixels to 1x1
- detSize = pProjGeom->getDetectorWidth() / pVolGeom->getPixelLengthX();
+ float factor = 1.0f / pVolGeom->getPixelLengthX();
- // Copy angles
- float *angles = new float[nth];
- for (int i = 0; i < nth; ++i)
- angles[i] = pProjGeom->getProjectionAngles()[i];
- projectionAngles = angles;
-
- // Check if we need to translate
- bool offCenter = false;
- if (abs(pVolGeom->getWindowMinX() + pVolGeom->getWindowMaxX()) > EPS ||
- abs(pVolGeom->getWindowMinY() + pVolGeom->getWindowMaxY()) > EPS)
- {
- offCenter = true;
+ for (int i = 0; i < iProjectionAngleCount; ++i) {
+ // CHECKME: Order of scaling and translation
+ pProjs[i].translate(dx, dy);
+ pProjs[i].scale(factor);
}
+ // CHECKME: Check factor
+ fOutputScale *= pVolGeom->getPixelLengthX() * pVolGeom->getPixelLengthY();
- // If there are existing detector offsets, or if we need to translate,
- // we need to return offsets
- if (pProjGeom->getExtraDetectorOffset() || offCenter)
- {
- float* offset = new float[nth];
-
- if (pProjGeom->getExtraDetectorOffset()) {
- for (int i = 0; i < nth; ++i)
- offset[i] = pProjGeom->getExtraDetectorOffset()[i];
- } else {
- for (int i = 0; i < nth; ++i)
- offset[i] = 0.0f;
- }
+ return true;
+}
- if (offCenter) {
- float dx = (pVolGeom->getWindowMinX() + pVolGeom->getWindowMaxX()) / 2;
- float dy = (pVolGeom->getWindowMinY() + pVolGeom->getWindowMaxY()) / 2;
- // CHECKME: Is d in pixels or in units?
- for (int i = 0; i < nth; ++i) {
- float d = dx * cos(angles[i]) + dy * sin(angles[i]);
- offset[i] += d;
- }
- }
+bool convertAstraGeometry(const CVolumeGeometry2D* pVolGeom,
+ const CParallelProjectionGeometry2D* pProjGeom,
+ SParProjection*& pProjs,
+ float& fOutputScale)
+{
+ assert(pVolGeom);
+ assert(pProjGeom);
+ assert(pProjGeom->getProjectionAngles());
- // CHECKME: Order of scaling and translation
+ int nth = pProjGeom->getProjectionAngleCount();
- // Scale volume pixels to 1x1
- for (int i = 0; i < nth; ++i) {
- //offset[i] /= pVolGeom->getPixelLengthX();
- //offset[i] *= detSize;
- }
+ pProjs = genParProjections(nth,
+ pProjGeom->getDetectorCount(),
+ pProjGeom->getDetectorWidth(),
+ pProjGeom->getProjectionAngles(), 0);
+ bool ok;
+ fOutputScale = 1.0f;
- detectorOffsets = offset;
- } else {
- detectorOffsets = 0;
- }
+ ok = convertAstraGeometry_internal(pVolGeom, nth, pProjs, fOutputScale);
- outputScale = pVolGeom->getPixelLengthX();
- outputScale *= outputScale;
+ if (!ok) {
+ delete[] pProjs;
+ pProjs = 0;
+ }
- return true;
+ return ok;
}
-static void convertAstraGeometry_internal(const CVolumeGeometry2D* pVolGeom,
- unsigned int iProjectionAngleCount,
- astraCUDA::SFanProjection*& pProjs,
- float& outputScale)
+bool convertAstraGeometry(const CVolumeGeometry2D* pVolGeom,
+ const CParallelVecProjectionGeometry2D* pProjGeom,
+ SParProjection*& pProjs,
+ float& fOutputScale)
{
- // Translate
- float dx = (pVolGeom->getWindowMinX() + pVolGeom->getWindowMaxX()) / 2;
- float dy = (pVolGeom->getWindowMinY() + pVolGeom->getWindowMaxY()) / 2;
+ assert(pVolGeom);
+ assert(pProjGeom);
+ assert(pProjGeom->getProjectionVectors());
- for (int i = 0; i < iProjectionAngleCount; ++i) {
- pProjs[i].fSrcX -= dx;
- pProjs[i].fSrcY -= dy;
- pProjs[i].fDetSX -= dx;
- pProjs[i].fDetSY -= dy;
+ int nth = pProjGeom->getProjectionAngleCount();
+
+ pProjs = new SParProjection[nth];
+
+ for (int i = 0; i < nth; ++i) {
+ pProjs[i] = pProjGeom->getProjectionVectors()[i];
}
- // CHECKME: Order of scaling and translation
+ bool ok;
+ fOutputScale = 1.0f;
- // Scale
- float factor = 1.0f / pVolGeom->getPixelLengthX();
- for (int i = 0; i < iProjectionAngleCount; ++i) {
- pProjs[i].fSrcX *= factor;
- pProjs[i].fSrcY *= factor;
- pProjs[i].fDetSX *= factor;
- pProjs[i].fDetSY *= factor;
- pProjs[i].fDetUX *= factor;
- pProjs[i].fDetUY *= factor;
+ ok = convertAstraGeometry_internal(pVolGeom, nth, pProjs, fOutputScale);
+ if (!ok) {
+ delete[] pProjs;
+ pProjs = 0;
}
- // CHECKME: Check factor
- outputScale = pVolGeom->getPixelLengthX();
-// outputScale *= outputScale;
+ return ok;
}
+
bool convertAstraGeometry(const CVolumeGeometry2D* pVolGeom,
const CFanFlatProjectionGeometry2D* pProjGeom,
astraCUDA::SFanProjection*& pProjs,
@@ -913,6 +381,7 @@ bool convertAstraGeometry(const CVolumeGeometry2D* pVolGeom,
assert(pProjGeom);
assert(pProjGeom->getProjectionAngles());
+ // TODO: Make EPS relative
const float EPS = 0.00001f;
int nth = pProjGeom->getProjectionAngleCount();
@@ -931,23 +400,9 @@ bool convertAstraGeometry(const CVolumeGeometry2D* pVolGeom,
float fDetSize = pProjGeom->getDetectorWidth();
const float *pfAngles = pProjGeom->getProjectionAngles();
- pProjs = new SFanProjection[nth];
-
- float fSrcX0 = 0.0f;
- float fSrcY0 = -fOriginSourceDistance;
- float fDetUX0 = fDetSize;
- float fDetUY0 = 0.0f;
- float fDetSX0 = pProjGeom->getDetectorCount() * fDetUX0 / -2.0f;
- float fDetSY0 = fOriginDetectorDistance;
-
-#define ROTATE0(name,i,alpha) do { pProjs[i].f##name##X = f##name##X0 * cos(alpha) - f##name##Y0 * sin(alpha); pProjs[i].f##name##Y = f##name##X0 * sin(alpha) + f##name##Y0 * cos(alpha); } while(0)
- for (int i = 0; i < nth; ++i) {
- ROTATE0(Src, i, pfAngles[i]);
- ROTATE0(DetS, i, pfAngles[i]);
- ROTATE0(DetU, i, pfAngles[i]);
- }
-
-#undef ROTATE0
+ pProjs = genFanProjections(nth, pProjGeom->getDetectorCount(),
+ fOriginSourceDistance, fOriginDetectorDistance,
+ fDetSize, pfAngles);
convertAstraGeometry_internal(pVolGeom, nth, pProjs, outputScale);
@@ -964,6 +419,7 @@ bool convertAstraGeometry(const CVolumeGeometry2D* pVolGeom,
assert(pProjGeom);
assert(pProjGeom->getProjectionVectors());
+ // TODO: Make EPS relative
const float EPS = 0.00001f;
int nx = pVolGeom->getGridColCount();
@@ -985,6 +441,51 @@ bool convertAstraGeometry(const CVolumeGeometry2D* pVolGeom,
return true;
}
+bool convertAstraGeometry(const CVolumeGeometry2D* pVolGeom,
+ const CProjectionGeometry2D* pProjGeom,
+ astraCUDA::SParProjection*& pParProjs,
+ astraCUDA::SFanProjection*& pFanProjs,
+ float& outputScale)
+{
+ const CParallelProjectionGeometry2D* parProjGeom = dynamic_cast<const CParallelProjectionGeometry2D*>(pProjGeom);
+ const CParallelVecProjectionGeometry2D* parVecProjGeom = dynamic_cast<const CParallelVecProjectionGeometry2D*>(pProjGeom);
+ const CFanFlatProjectionGeometry2D* fanProjGeom = dynamic_cast<const CFanFlatProjectionGeometry2D*>(pProjGeom);
+ const CFanFlatVecProjectionGeometry2D* fanVecProjGeom = dynamic_cast<const CFanFlatVecProjectionGeometry2D*>(pProjGeom);
+
+ bool ok = false;
+
+ if (parProjGeom) {
+ ok = convertAstraGeometry(pVolGeom, parProjGeom, pParProjs, outputScale);
+ } else if (parVecProjGeom) {
+ ok = convertAstraGeometry(pVolGeom, parVecProjGeom, pParProjs, outputScale);
+ } else if (fanProjGeom) {
+ ok = convertAstraGeometry(pVolGeom, fanProjGeom, pFanProjs, outputScale);
+ } else if (fanVecProjGeom) {
+ ok = convertAstraGeometry(pVolGeom, fanVecProjGeom, pFanProjs, outputScale);
+ } else {
+ ok = false;
+ }
+
+ return ok;
+}
+
+bool convertAstraGeometry_dims(const CVolumeGeometry2D* pVolGeom,
+ const CProjectionGeometry2D* pProjGeom,
+ SDimensions& dims)
+{
+ dims.iVolWidth = pVolGeom->getGridColCount();
+ dims.iVolHeight = pVolGeom->getGridRowCount();
+
+ dims.iProjAngles = pProjGeom->getProjectionAngleCount();
+ dims.iProjDets = pProjGeom->getDetectorCount();
+
+ dims.iRaysPerDet = 1;
+ dims.iRaysPerPixelDim = 1;
+
+ return true;
+}
+
+
}
diff --git a/cuda/2d/astra.h b/cuda/2d/astra.h
index e4cefac..9355cb4 100644
--- a/cuda/2d/astra.h
+++ b/cuda/2d/astra.h
@@ -41,140 +41,12 @@ enum Cuda2DProjectionKernel {
};
class CParallelProjectionGeometry2D;
+class CParallelVecProjectionGeometry2D;
class CFanFlatProjectionGeometry2D;
class CFanFlatVecProjectionGeometry2D;
class CVolumeGeometry2D;
+class CProjectionGeometry2D;
-class AstraFBP_internal;
-
-class _AstraExport AstraFBP {
-public:
- // Constructor
- AstraFBP();
-
- // Destructor
- ~AstraFBP();
-
- // Set the size of the reconstruction rectangle.
- // Volume pixels are currently assumed to be 1x1 squares.
- bool setReconstructionGeometry(unsigned int iVolWidth,
- unsigned int iVolHeight,
- float fPixelSize = 1.0f);
-
- // Set the projection angles and number of detector pixels per angle.
- // pfAngles must be a float array of length iProjAngles.
- // fDetSize indicates the size of a detector pixel compared to a
- // volume pixel edge.
- //
- // pfAngles will only be read from during this call.
- bool setProjectionGeometry(unsigned int iProjAngles,
- unsigned int iProjDets,
- const float *pfAngles,
- float fDetSize = 1.0f);
- // Set the projection angles and number of detector pixels per angle.
- // pfAngles must be a float array of length iProjAngles.
- // fDetSize indicates the size of a detector pixel compared to a
- // volume pixel edge.
- //
- // pfAngles, fanProjs will only be read from during this call.
- bool setFanGeometry(unsigned int iProjAngles,
- unsigned int iProjDets,
- const astraCUDA::SFanProjection *fanProjs,
- const float *pfAngles,
- float fOriginSourceDistance,
- float fOriginDetectorDistance,
- float fDetSize = 1.0f,
- bool bShortScan = false);
-
- // Set linear supersampling factor for the BP.
- // (The number of rays is the square of this)
- //
- // This may optionally be called before init().
- bool setPixelSuperSampling(unsigned int iPixelSuperSampling);
-
- // Set per-detector shifts.
- //
- // pfTOffsets will only be read from during this call.
- bool setTOffsets(const float *pfTOffsets);
-
- // Returns the required size of a filter in the fourier domain
- // when multiplying it with the fft of the projection data.
- // Its value is equal to the smallest power of two larger than
- // or equal to twice the number of detectors in the spatial domain.
- //
- // _iDetectorCount is the number of detectors in the spatial domain.
- static int calcFourierFilterSize(int _iDetectorCount);
-
- // Sets the filter type. Some filter types require the user to supply an
- // array containing the filter.
- // The number of elements in a filter in the fourier domain should be equal
- // to the value returned by calcFourierFilterSize().
- // The following types require a filter:
- //
- // - FILTER_PROJECTION:
- // The filter size should be equal to the output of
- // calcFourierFilterSize(). The filtered sinogram is
- // multiplied with the supplied filter.
- //
- // - FILTER_SINOGRAM:
- // Same as FILTER_PROJECTION, but now the filter should contain a row for
- // every projection direction.
- //
- // - FILTER_RPROJECTION:
- // The filter should now contain one kernel (= ifft of filter), with the
- // peak in the center. The filter width
- // can be any value. If odd, the peak is assumed to be in the center, if
- // even, it is assumed to be at floor(filter-width/2).
- //
- // - FILTER_RSINOGRAM
- // Same as FILTER_RPROJECTION, but now the supplied filter should contain a
- // row for every projection direction.
- //
- // A large number of other filters (FILTER_RAMLAK, FILTER_SHEPPLOGAN,
- // FILTER_COSINE, FILTER_HAMMING, and FILTER_HANN)
- // have a D variable, which gives the cutoff point in the frequency domain.
- // Setting this value to 1.0 will include the whole filter
- bool setFilter(E_FBPFILTER _eFilter,
- const float * _pfHostFilter = NULL,
- int _iFilterWidth = 0, float _fD = 1.0f, float _fFilterParameter = -1.0f);
-
- // Initialize CUDA, allocate GPU buffers and
- // precompute geometry-specific data.
- //
- // CUDA is set up to use GPU number iGPUIndex.
- //
- // This must be called after calling setReconstructionGeometry() and
- // setProjectionGeometry().
- bool init(int iGPUIndex = 0);
-
- // Setup input sinogram for a slice.
- // pfSinogram must be a float array of size iProjAngles*iSinogramPitch.
- // NB: iSinogramPitch is measured in floats, not in bytes.
- //
- // This must be called after init(), and before iterate(). It may be
- // called again after iterate()/getReconstruction() to start a new slice.
- //
- // pfSinogram will only be read from during this call.
- bool setSinogram(const float* pfSinogram, unsigned int iSinogramPitch);
-
- // Runs an FBP reconstruction.
- // This must be called after setSinogram().
- //
- // run can be called before setFilter, but will then use the default Ram-Lak filter
- bool run();
-
- // Get the reconstructed slice.
- // pfReconstruction must be a float array of size
- // iVolHeight*iReconstructionPitch.
- // NB: iReconstructionPitch is measured in floats, not in bytes.
- //
- // This may be called after run().
- bool getReconstruction(float* pfReconstruction,
- unsigned int iReconstructionPitch) const;
-
-private:
- AstraFBP_internal* pData;
-};
class _AstraExport BPalgo : public astraCUDA::ReconAlgo {
public:
@@ -197,8 +69,8 @@ public:
_AstraExport bool astraCudaFP(const float* pfVolume, float* pfSinogram,
unsigned int iVolWidth, unsigned int iVolHeight,
unsigned int iProjAngles, unsigned int iProjDets,
- const float *pfAngles, const float *pfOffsets,
- float fDetSize = 1.0f, unsigned int iDetSuperSampling = 1,
+ const SParProjection *pAngles,
+ unsigned int iDetSuperSampling = 1,
float fOutputScale = 1.0f, int iGPUIndex = 0);
_AstraExport bool astraCudaFanFP(const float* pfVolume, float* pfSinogram,
@@ -211,8 +83,14 @@ _AstraExport bool astraCudaFanFP(const float* pfVolume, float* pfSinogram,
_AstraExport bool convertAstraGeometry(const CVolumeGeometry2D* pVolGeom,
const CParallelProjectionGeometry2D* pProjGeom,
- float*& pfDetectorOffsets, float*& pfProjectionAngles,
- float& fDetSize, float& fOutputScale);
+ astraCUDA::SParProjection*& pProjs,
+ float& fOutputScale);
+
+_AstraExport bool convertAstraGeometry(const CVolumeGeometry2D* pVolGeom,
+ const CParallelVecProjectionGeometry2D* pProjGeom,
+ astraCUDA::SParProjection*& pProjs,
+ float& fOutputScale);
+
_AstraExport bool convertAstraGeometry(const CVolumeGeometry2D* pVolGeom,
const CFanFlatProjectionGeometry2D* pProjGeom,
@@ -224,6 +102,15 @@ _AstraExport bool convertAstraGeometry(const CVolumeGeometry2D* pVolGeom,
astraCUDA::SFanProjection*& pProjs,
float& outputScale);
+_AstraExport bool convertAstraGeometry_dims(const CVolumeGeometry2D* pVolGeom,
+ const CProjectionGeometry2D* pProjGeom,
+ astraCUDA::SDimensions& dims);
+
+_AstraExport bool convertAstraGeometry(const CVolumeGeometry2D* pVolGeom,
+ const CProjectionGeometry2D* pProjGeom,
+ astraCUDA::SParProjection*& pParProjs,
+ astraCUDA::SFanProjection*& pFanProjs,
+ float& outputScale);
}
namespace astraCUDA {
diff --git a/cuda/2d/cgls.cu b/cuda/2d/cgls.cu
index 6962d81..1a31438 100644
--- a/cuda/2d/cgls.cu
+++ b/cuda/2d/cgls.cu
@@ -206,42 +206,6 @@ float CGLS::computeDiffNorm()
return sqrt(s);
}
-bool doCGLS(float* D_volumeData, unsigned int volumePitch,
- float* D_sinoData, unsigned int sinoPitch,
- const SDimensions& dims, /*const SAugmentedData& augs,*/
- const float* angles, const float* TOffsets, unsigned int iterations)
-{
- CGLS cgls;
- bool ok = true;
-
- ok &= cgls.setGeometry(dims, angles);
-#if 0
- if (D_maskData)
- ok &= cgls.enableVolumeMask();
-#endif
- if (TOffsets)
- ok &= cgls.setTOffsets(TOffsets);
-
- if (!ok)
- return false;
-
- ok = cgls.init();
- if (!ok)
- return false;
-
-#if 0
- if (D_maskData)
- ok &= cgls.setVolumeMask(D_maskData, maskPitch);
-#endif
-
- ok &= cgls.setBuffers(D_volumeData, volumePitch, D_sinoData, sinoPitch);
- if (!ok)
- return false;
-
- ok = cgls.iterate(iterations);
-
- return ok;
-}
}
diff --git a/cuda/2d/dims.h b/cuda/2d/dims.h
index afb997a..10eaabf 100644
--- a/cuda/2d/dims.h
+++ b/cuda/2d/dims.h
@@ -33,9 +33,11 @@ along with the ASTRA Toolbox. If not, see <http://www.gnu.org/licenses/>.
namespace astraCUDA {
+using astra::SParProjection;
using astra::SFanProjection;
+
struct SDimensions {
// Width, height of reconstruction volume
unsigned int iVolWidth;
@@ -47,9 +49,6 @@ struct SDimensions {
// Number of detector pixels
unsigned int iProjDets;
- // size of detector compared to volume pixels
- float fDetScale;
-
// in FP, number of rays to trace per detector pixel.
// This should usually be set to 1.
// If fDetScale > 1, this should be set to an integer of roughly
diff --git a/cuda/2d/em.cu b/cuda/2d/em.cu
index 11ed45b..03cc652 100644
--- a/cuda/2d/em.cu
+++ b/cuda/2d/em.cu
@@ -169,34 +169,6 @@ float EM::computeDiffNorm()
}
-bool doEM(float* D_volumeData, unsigned int volumePitch,
- float* D_sinoData, unsigned int sinoPitch,
- const SDimensions& dims, const float* angles,
- const float* TOffsets, unsigned int iterations)
-{
- EM em;
- bool ok = true;
-
- ok &= em.setGeometry(dims, angles);
- if (TOffsets)
- ok &= em.setTOffsets(TOffsets);
-
- if (!ok)
- return false;
-
- ok = em.init();
- if (!ok)
- return false;
-
- ok &= em.setBuffers(D_volumeData, volumePitch, D_sinoData, sinoPitch);
- if (!ok)
- return false;
-
- ok = em.iterate(iterations);
-
- return ok;
-}
-
}
#ifdef STANDALONE
diff --git a/cuda/2d/fbp.cu b/cuda/2d/fbp.cu
new file mode 100644
index 0000000..365fc88
--- /dev/null
+++ b/cuda/2d/fbp.cu
@@ -0,0 +1,347 @@
+/*
+-----------------------------------------------------------------------
+Copyright: 2010-2015, iMinds-Vision Lab, University of Antwerp
+ 2014-2015, CWI, Amsterdam
+
+Contact: astra@uantwerpen.be
+Website: http://sf.net/projects/astra-toolbox
+
+This file is part of the ASTRA Toolbox.
+
+
+The ASTRA Toolbox is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+The ASTRA Toolbox is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with the ASTRA Toolbox. If not, see <http://www.gnu.org/licenses/>.
+
+-----------------------------------------------------------------------
+$Id$
+*/
+
+#include "fbp.h"
+#include "fft.h"
+#include "par_bp.h"
+#include "fan_bp.h"
+#include "util.h"
+
+// For fan-beam preweighting
+#include "../3d/fdk.h"
+
+#include "astra/Logging.h"
+
+#include <cuda.h>
+
+namespace astraCUDA {
+
+
+
+static int calcNextPowerOfTwo(int n)
+{
+ int x = 1;
+ while (x < n)
+ x *= 2;
+
+ return x;
+}
+
+// static
+int FBP::calcFourierFilterSize(int _iDetectorCount)
+{
+ int iFFTRealDetCount = calcNextPowerOfTwo(2 * _iDetectorCount);
+ int iFreqBinCount = calcFFTFourierSize(iFFTRealDetCount);
+
+ // CHECKME: Matlab makes this at least 64. Do we also need to?
+ return iFreqBinCount;
+}
+
+
+
+
+FBP::FBP() : ReconAlgo()
+{
+ D_filter = 0;
+
+}
+
+FBP::~FBP()
+{
+ reset();
+}
+
+void FBP::reset()
+{
+ if (D_filter) {
+ freeComplexOnDevice((cufftComplex *)D_filter);
+ D_filter = 0;
+ }
+}
+
+bool FBP::init()
+{
+ return true;
+}
+
+bool FBP::setFilter(astra::E_FBPFILTER _eFilter, const float * _pfHostFilter /* = NULL */, int _iFilterWidth /* = 0 */, float _fD /* = 1.0f */, float _fFilterParameter /* = -1.0f */)
+{
+ if (D_filter)
+ {
+ freeComplexOnDevice((cufftComplex*)D_filter);
+ D_filter = 0;
+ }
+
+ if (_eFilter == astra::FILTER_NONE)
+ return true; // leave D_filter set to 0
+
+
+ int iFFTRealDetCount = calcNextPowerOfTwo(2 * dims.iProjDets);
+ int iFreqBinCount = calcFFTFourierSize(iFFTRealDetCount);
+
+ cufftComplex * pHostFilter = new cufftComplex[dims.iProjAngles * iFreqBinCount];
+ memset(pHostFilter, 0, sizeof(cufftComplex) * dims.iProjAngles * iFreqBinCount);
+
+ allocateComplexOnDevice(dims.iProjAngles, iFreqBinCount, (cufftComplex**)&D_filter);
+
+ switch(_eFilter)
+ {
+ case astra::FILTER_NONE:
+ // handled above
+ break;
+ case astra::FILTER_RAMLAK:
+ case astra::FILTER_SHEPPLOGAN:
+ case astra::FILTER_COSINE:
+ case astra::FILTER_HAMMING:
+ case astra::FILTER_HANN:
+ case astra::FILTER_TUKEY:
+ case astra::FILTER_LANCZOS:
+ case astra::FILTER_TRIANGULAR:
+ case astra::FILTER_GAUSSIAN:
+ case astra::FILTER_BARTLETTHANN:
+ case astra::FILTER_BLACKMAN:
+ case astra::FILTER_NUTTALL:
+ case astra::FILTER_BLACKMANHARRIS:
+ case astra::FILTER_BLACKMANNUTTALL:
+ case astra::FILTER_FLATTOP:
+ case astra::FILTER_PARZEN:
+ {
+ genFilter(_eFilter, _fD, dims.iProjAngles, pHostFilter, iFFTRealDetCount, iFreqBinCount, _fFilterParameter);
+ uploadComplexArrayToDevice(dims.iProjAngles, iFreqBinCount, pHostFilter, (cufftComplex*)D_filter);
+
+ break;
+ }
+ case astra::FILTER_PROJECTION:
+ {
+ // make sure the offered filter has the correct size
+ assert(_iFilterWidth == iFreqBinCount);
+
+ for(int iFreqBinIndex = 0; iFreqBinIndex < iFreqBinCount; iFreqBinIndex++)
+ {
+ float fValue = _pfHostFilter[iFreqBinIndex];
+
+ for(int iProjectionIndex = 0; iProjectionIndex < (int)dims.iProjAngles; iProjectionIndex++)
+ {
+ pHostFilter[iFreqBinIndex + iProjectionIndex * iFreqBinCount].x = fValue;
+ pHostFilter[iFreqBinIndex + iProjectionIndex * iFreqBinCount].y = 0.0f;
+ }
+ }
+ uploadComplexArrayToDevice(dims.iProjAngles, iFreqBinCount, pHostFilter, (cufftComplex*)D_filter);
+ break;
+ }
+ case astra::FILTER_SINOGRAM:
+ {
+ // make sure the offered filter has the correct size
+ assert(_iFilterWidth == iFreqBinCount);
+
+ for(int iFreqBinIndex = 0; iFreqBinIndex < iFreqBinCount; iFreqBinIndex++)
+ {
+ for(int iProjectionIndex = 0; iProjectionIndex < (int)dims.iProjAngles; iProjectionIndex++)
+ {
+ float fValue = _pfHostFilter[iFreqBinIndex + iProjectionIndex * _iFilterWidth];
+
+ pHostFilter[iFreqBinIndex + iProjectionIndex * iFreqBinCount].x = fValue;
+ pHostFilter[iFreqBinIndex + iProjectionIndex * iFreqBinCount].y = 0.0f;
+ }
+ }
+ uploadComplexArrayToDevice(dims.iProjAngles, iFreqBinCount, pHostFilter, (cufftComplex*)D_filter);
+ break;
+ }
+ case astra::FILTER_RPROJECTION:
+ {
+ int iProjectionCount = dims.iProjAngles;
+ int iRealFilterElementCount = iProjectionCount * iFFTRealDetCount;
+ float * pfHostRealFilter = new float[iRealFilterElementCount];
+ memset(pfHostRealFilter, 0, sizeof(float) * iRealFilterElementCount);
+
+ int iUsedFilterWidth = min(_iFilterWidth, iFFTRealDetCount);
+ int iStartFilterIndex = (_iFilterWidth - iUsedFilterWidth) / 2;
+ int iMaxFilterIndex = iStartFilterIndex + iUsedFilterWidth;
+
+ int iFilterShiftSize = _iFilterWidth / 2;
+
+ for(int iDetectorIndex = iStartFilterIndex; iDetectorIndex < iMaxFilterIndex; iDetectorIndex++)
+ {
+ int iFFTInFilterIndex = (iDetectorIndex + iFFTRealDetCount - iFilterShiftSize) % iFFTRealDetCount;
+ float fValue = _pfHostFilter[iDetectorIndex];
+
+ for(int iProjectionIndex = 0; iProjectionIndex < (int)dims.iProjAngles; iProjectionIndex++)
+ {
+ pfHostRealFilter[iFFTInFilterIndex + iProjectionIndex * iFFTRealDetCount] = fValue;
+ }
+ }
+
+ float* pfDevRealFilter = NULL;
+ cudaMalloc((void **)&pfDevRealFilter, sizeof(float) * iRealFilterElementCount); // TODO: check for errors
+ cudaMemcpy(pfDevRealFilter, pfHostRealFilter, sizeof(float) * iRealFilterElementCount, cudaMemcpyHostToDevice);
+ delete[] pfHostRealFilter;
+
+ runCudaFFT(iProjectionCount, pfDevRealFilter, iFFTRealDetCount, iFFTRealDetCount, iFFTRealDetCount, iFreqBinCount, (cufftComplex*)D_filter);
+
+ cudaFree(pfDevRealFilter);
+
+ break;
+ }
+ case astra::FILTER_RSINOGRAM:
+ {
+ int iProjectionCount = dims.iProjAngles;
+ int iRealFilterElementCount = iProjectionCount * iFFTRealDetCount;
+ float* pfHostRealFilter = new float[iRealFilterElementCount];
+ memset(pfHostRealFilter, 0, sizeof(float) * iRealFilterElementCount);
+
+ int iUsedFilterWidth = min(_iFilterWidth, iFFTRealDetCount);
+ int iStartFilterIndex = (_iFilterWidth - iUsedFilterWidth) / 2;
+ int iMaxFilterIndex = iStartFilterIndex + iUsedFilterWidth;
+
+ int iFilterShiftSize = _iFilterWidth / 2;
+
+ for(int iDetectorIndex = iStartFilterIndex; iDetectorIndex < iMaxFilterIndex; iDetectorIndex++)
+ {
+ int iFFTInFilterIndex = (iDetectorIndex + iFFTRealDetCount - iFilterShiftSize) % iFFTRealDetCount;
+
+ for(int iProjectionIndex = 0; iProjectionIndex < (int)dims.iProjAngles; iProjectionIndex++)
+ {
+ float fValue = _pfHostFilter[iDetectorIndex + iProjectionIndex * _iFilterWidth];
+ pfHostRealFilter[iFFTInFilterIndex + iProjectionIndex * iFFTRealDetCount] = fValue;
+ }
+ }
+
+ float* pfDevRealFilter = NULL;
+ cudaMalloc((void **)&pfDevRealFilter, sizeof(float) * iRealFilterElementCount); // TODO: check for errors
+ cudaMemcpy(pfDevRealFilter, pfHostRealFilter, sizeof(float) * iRealFilterElementCount, cudaMemcpyHostToDevice);
+ delete[] pfHostRealFilter;
+
+ runCudaFFT(iProjectionCount, pfDevRealFilter, iFFTRealDetCount, iFFTRealDetCount, iFFTRealDetCount, iFreqBinCount, (cufftComplex*)D_filter);
+
+ cudaFree(pfDevRealFilter);
+
+ break;
+ }
+ default:
+ {
+ ASTRA_ERROR("FBP::setFilter: Unknown filter type requested");
+ delete [] pHostFilter;
+ return false;
+ }
+ }
+
+ delete [] pHostFilter;
+
+ return true;
+}
+
+bool FBP::iterate(unsigned int iterations)
+{
+ zeroVolumeData(D_volumeData, volumePitch, dims);
+
+ bool ok = false;
+
+ if (fanProjs) {
+ // Call FDK_PreWeight to handle fan beam geometry. We treat
+ // this as a cone beam setup of a single slice:
+
+ // TODO: TOffsets affects this preweighting...
+
+ // TODO: We take the fan parameters from the last projection here
+ // without checking if they're the same in all projections
+
+ float *pfAngles = new float[dims.iProjAngles];
+
+ float fOriginSource, fOriginDetector, fDetSize, fOffset;
+ for (unsigned int i = 0; i < dims.iProjAngles; ++i) {
+ bool ok = astra::getFanParameters(fanProjs[i], dims.iProjDets,
+ pfAngles[i],
+ fOriginSource, fOriginDetector,
+ fDetSize, fOffset);
+ if (!ok) {
+ ASTRA_ERROR("FBP_CUDA: Failed to extract circular fan beam parameters from fan beam geometry");
+ return false;
+ }
+ }
+
+ // We create a fake cudaPitchedPtr
+ cudaPitchedPtr tmp;
+ tmp.ptr = D_sinoData;
+ tmp.pitch = sinoPitch * sizeof(float);
+ tmp.xsize = dims.iProjDets;
+ tmp.ysize = dims.iProjAngles;
+ // and a fake Dimensions3D
+ astraCUDA3d::SDimensions3D dims3d;
+ dims3d.iVolX = dims.iVolWidth;
+ dims3d.iVolY = dims.iVolHeight;
+ dims3d.iVolZ = 1;
+ dims3d.iProjAngles = dims.iProjAngles;
+ dims3d.iProjU = dims.iProjDets;
+ dims3d.iProjV = 1;
+
+ astraCUDA3d::FDK_PreWeight(tmp, fOriginSource,
+ fOriginDetector, 0.0f,
+ fDetSize, 1.0f,
+ m_bShortScan, dims3d, pfAngles);
+ } else {
+ // TODO: How should different detector pixel size in different
+ // projections be handled?
+ }
+
+ if (D_filter) {
+
+ int iFFTRealDetCount = calcNextPowerOfTwo(2 * dims.iProjDets);
+ int iFFTFourDetCount = calcFFTFourierSize(iFFTRealDetCount);
+
+ cufftComplex * pDevComplexSinogram = NULL;
+
+ allocateComplexOnDevice(dims.iProjAngles, iFFTFourDetCount, &pDevComplexSinogram);
+
+ runCudaFFT(dims.iProjAngles, D_sinoData, sinoPitch, dims.iProjDets, iFFTRealDetCount, iFFTFourDetCount, pDevComplexSinogram);
+
+ applyFilter(dims.iProjAngles, iFFTFourDetCount, pDevComplexSinogram, (cufftComplex*)D_filter);
+
+ runCudaIFFT(dims.iProjAngles, pDevComplexSinogram, D_sinoData, sinoPitch, dims.iProjDets, iFFTRealDetCount, iFFTFourDetCount);
+
+ freeComplexOnDevice(pDevComplexSinogram);
+
+ }
+
+ float fOutputScale = (M_PI / 2.0f) / (float)dims.iProjAngles;
+
+ if (fanProjs) {
+ ok = FanBP_FBPWeighted(D_volumeData, volumePitch, D_sinoData, sinoPitch, dims, fanProjs, fOutputScale);
+
+ } else {
+ ok = BP(D_volumeData, volumePitch, D_sinoData, sinoPitch, dims, parProjs, fOutputScale);
+ }
+ if(!ok)
+ {
+ return false;
+ }
+
+ return true;
+}
+
+
+}
diff --git a/cuda/2d/fbp.h b/cuda/2d/fbp.h
new file mode 100644
index 0000000..8b4d64d
--- /dev/null
+++ b/cuda/2d/fbp.h
@@ -0,0 +1,98 @@
+/*
+-----------------------------------------------------------------------
+Copyright: 2010-2015, iMinds-Vision Lab, University of Antwerp
+ 2014-2015, CWI, Amsterdam
+
+Contact: astra@uantwerpen.be
+Website: http://sf.net/projects/astra-toolbox
+
+This file is part of the ASTRA Toolbox.
+
+
+The ASTRA Toolbox is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+The ASTRA Toolbox is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with the ASTRA Toolbox. If not, see <http://www.gnu.org/licenses/>.
+
+-----------------------------------------------------------------------
+$Id$
+*/
+
+#include "algo.h"
+#include "fbp_filters.h"
+
+namespace astraCUDA {
+
+class _AstraExport FBP : public ReconAlgo {
+public:
+ FBP();
+ ~FBP();
+
+ virtual bool useSinogramMask() { return false; }
+ virtual bool useVolumeMask() { return false; }
+
+ // Returns the required size of a filter in the fourier domain
+ // when multiplying it with the fft of the projection data.
+ // Its value is equal to the smallest power of two larger than
+ // or equal to twice the number of detectors in the spatial domain.
+ //
+ // _iDetectorCount is the number of detectors in the spatial domain.
+ static int calcFourierFilterSize(int _iDetectorCount);
+
+ // Sets the filter type. Some filter types require the user to supply an
+ // array containing the filter.
+ // The number of elements in a filter in the fourier domain should be equal
+ // to the value returned by calcFourierFilterSize().
+ // The following types require a filter:
+ //
+ // - FILTER_PROJECTION:
+ // The filter size should be equal to the output of
+ // calcFourierFilterSize(). The filtered sinogram is
+ // multiplied with the supplied filter.
+ //
+ // - FILTER_SINOGRAM:
+ // Same as FILTER_PROJECTION, but now the filter should contain a row for
+ // every projection direction.
+ //
+ // - FILTER_RPROJECTION:
+ // The filter should now contain one kernel (= ifft of filter), with the
+ // peak in the center. The filter width
+ // can be any value. If odd, the peak is assumed to be in the center, if
+ // even, it is assumed to be at floor(filter-width/2).
+ //
+ // - FILTER_RSINOGRAM
+ // Same as FILTER_RPROJECTION, but now the supplied filter should contain a
+ // row for every projection direction.
+ //
+ // A large number of other filters (FILTER_RAMLAK, FILTER_SHEPPLOGAN,
+ // FILTER_COSINE, FILTER_HAMMING, and FILTER_HANN)
+ // have a D variable, which gives the cutoff point in the frequency domain.
+ // Setting this value to 1.0 will include the whole filter
+ bool setFilter(astra::E_FBPFILTER _eFilter,
+ const float * _pfHostFilter = NULL,
+ int _iFilterWidth = 0, float _fD = 1.0f, float _fFilterParameter = -1.0f);
+
+ bool setShortScan(bool ss) { m_bShortScan = ss; return true; }
+
+ virtual bool init();
+
+ virtual bool iterate(unsigned int iterations);
+
+ virtual float computeDiffNorm() { return 0.0f; } // TODO
+
+protected:
+ void reset();
+
+ void* D_filter; // cufftComplex*
+ bool m_bShortScan;
+};
+
+}
diff --git a/cuda/2d/fbp_filters.h b/cuda/2d/fbp_filters.h
index f1223c5..bf1342d 100644
--- a/cuda/2d/fbp_filters.h
+++ b/cuda/2d/fbp_filters.h
@@ -28,6 +28,8 @@ along with the ASTRA Toolbox. If not, see <http://www.gnu.org/licenses/>.
#ifndef FBP_FILTERS_H
#define FBP_FILTERS_H
+namespace astra {
+
enum E_FBPFILTER
{
FILTER_NONE, //< no filter (regular BP)
@@ -54,4 +56,6 @@ enum E_FBPFILTER
FILTER_RSINOGRAM, //< sinogram filter in real space
};
+}
+
#endif /* FBP_FILTERS_H */
diff --git a/cuda/2d/fft.cu b/cuda/2d/fft.cu
index 654bbae..5500e14 100644
--- a/cuda/2d/fft.cu
+++ b/cuda/2d/fft.cu
@@ -63,6 +63,8 @@ using namespace astra;
} } while (0)
+namespace astraCUDA {
+
__global__ static void applyFilter_kernel(int _iProjectionCount,
int _iFreqBinCount,
cufftComplex * _pSinogram,
@@ -275,11 +277,11 @@ bool runCudaIFFT(int _iProjectionCount, const cufftComplex* _pDevSourceComplex,
// Because the input is real, the Fourier transform is symmetric.
// CUFFT only outputs the first half (ignoring the redundant second half),
// and expects the same as input for the IFFT.
-int calcFFTFourSize(int _iFFTRealSize)
+int calcFFTFourierSize(int _iFFTRealSize)
{
- int iFFTFourSize = _iFFTRealSize / 2 + 1;
+ int iFFTFourierSize = _iFFTRealSize / 2 + 1;
- return iFFTFourSize;
+ return iFFTFourierSize;
}
void genIdenFilter(int _iProjectionCount, cufftComplex * _pFilter,
@@ -694,6 +696,10 @@ void genFilter(E_FBPFILTER _eFilter, float _fD, int _iProjectionCount,
delete[] pfW;
}
+
+}
+
+
#ifdef STANDALONE
__global__ static void doubleFourierOutput_kernel(int _iProjectionCount,
diff --git a/cuda/2d/fft.h b/cuda/2d/fft.h
index 91f05f2..3e91043 100644
--- a/cuda/2d/fft.h
+++ b/cuda/2d/fft.h
@@ -33,6 +33,8 @@ along with the ASTRA Toolbox. If not, see <http://www.gnu.org/licenses/>.
#include "fbp_filters.h"
+namespace astraCUDA {
+
bool allocateComplexOnDevice(int _iProjectionCount,
int _iDetectorCount,
cufftComplex ** _ppDevComplex);
@@ -56,13 +58,15 @@ bool runCudaIFFT(int _iProjectionCount, const cufftComplex* _pDevSourceComplex,
void applyFilter(int _iProjectionCount, int _iFreqBinCount,
cufftComplex * _pSinogram, cufftComplex * _pFilter);
-int calcFFTFourSize(int _iFFTRealSize);
+int calcFFTFourierSize(int _iFFTRealSize);
-void genFilter(E_FBPFILTER _eFilter, float _fD, int _iProjectionCount,
+void genFilter(astra::E_FBPFILTER _eFilter, float _fD, int _iProjectionCount,
cufftComplex * _pFilter, int _iFFTRealDetectorCount,
int _iFFTFourierDetectorCount, float _fParameter = -1.0f);
void genIdenFilter(int _iProjectionCount, cufftComplex * _pFilter,
int _iFFTRealDetectorCount, int _iFFTFourierDetectorCount);
+}
+
#endif /* FFT_H */
diff --git a/cuda/2d/par_bp.cu b/cuda/2d/par_bp.cu
index dfdd193..307c561 100644
--- a/cuda/2d/par_bp.cu
+++ b/cuda/2d/par_bp.cu
@@ -52,8 +52,8 @@ const unsigned int g_blockSlices = 16;
const unsigned int g_MaxAngles = 2560;
-__constant__ float gC_angle_sin[g_MaxAngles];
-__constant__ float gC_angle_cos[g_MaxAngles];
+__constant__ float gC_angle_scaled_sin[g_MaxAngles];
+__constant__ float gC_angle_scaled_cos[g_MaxAngles];
__constant__ float gC_angle_offset[g_MaxAngles];
static bool bindProjDataTexture(float* data, unsigned int pitch, unsigned int width, unsigned int height, cudaTextureAddressMode mode = cudaAddressModeBorder)
@@ -72,7 +72,7 @@ static bool bindProjDataTexture(float* data, unsigned int pitch, unsigned int wi
return true;
}
-__global__ void devBP(float* D_volData, unsigned int volPitch, unsigned int startAngle, bool offsets, const SDimensions dims, float fOutputScale)
+__global__ void devBP(float* D_volData, unsigned int volPitch, unsigned int startAngle, const SDimensions dims, float fOutputScale)
{
const int relX = threadIdx.x;
const int relY = threadIdx.y;
@@ -86,47 +86,30 @@ __global__ void devBP(float* D_volData, unsigned int volPitch, unsigned int star
if (X >= dims.iVolWidth || Y >= dims.iVolHeight)
return;
- const float fX = ( X - 0.5f*dims.iVolWidth + 0.5f ) / dims.fDetScale;
- const float fY = ( Y - 0.5f*dims.iVolHeight + 0.5f ) / dims.fDetScale;
+ const float fX = ( X - 0.5f*dims.iVolWidth + 0.5f );
+ const float fY = ( Y - 0.5f*dims.iVolHeight + 0.5f );
float* volData = (float*)D_volData;
float fVal = 0.0f;
float fA = startAngle + 0.5f;
- const float fT_base = 0.5f*dims.iProjDets - 0.5f + 0.5f;
- if (offsets) {
-
- for (int angle = startAngle; angle < endAngle; ++angle)
- {
- const float cos_theta = gC_angle_cos[angle];
- const float sin_theta = gC_angle_sin[angle];
- const float TOffset = gC_angle_offset[angle];
-
- const float fT = fT_base + fX * cos_theta - fY * sin_theta + TOffset;
- fVal += tex2D(gT_projTexture, fT, fA);
- fA += 1.0f;
- }
-
- } else {
-
- for (int angle = startAngle; angle < endAngle; ++angle)
- {
- const float cos_theta = gC_angle_cos[angle];
- const float sin_theta = gC_angle_sin[angle];
-
- const float fT = fT_base + fX * cos_theta - fY * sin_theta;
- fVal += tex2D(gT_projTexture, fT, fA);
- fA += 1.0f;
- }
+ for (int angle = startAngle; angle < endAngle; ++angle)
+ {
+ const float scaled_cos_theta = gC_angle_scaled_cos[angle];
+ const float scaled_sin_theta = gC_angle_scaled_sin[angle];
+ const float TOffset = gC_angle_offset[angle];
+ const float fT = fX * scaled_cos_theta - fY * scaled_sin_theta + TOffset;
+ fVal += tex2D(gT_projTexture, fT, fA);
+ fA += 1.0f;
}
volData[Y*volPitch+X] += fVal * fOutputScale;
}
// supersampling version
-__global__ void devBP_SS(float* D_volData, unsigned int volPitch, unsigned int startAngle, bool offsets, const SDimensions dims, float fOutputScale)
+__global__ void devBP_SS(float* D_volData, unsigned int volPitch, unsigned int startAngle, const SDimensions dims, float fOutputScale)
{
const int relX = threadIdx.x;
const int relY = threadIdx.y;
@@ -140,61 +123,35 @@ __global__ void devBP_SS(float* D_volData, unsigned int volPitch, unsigned int s
if (X >= dims.iVolWidth || Y >= dims.iVolHeight)
return;
- const float fX = ( X - 0.5f*dims.iVolWidth + 0.5f - 0.5f + 0.5f/dims.iRaysPerPixelDim) / dims.fDetScale;
- const float fY = ( Y - 0.5f*dims.iVolHeight + 0.5f - 0.5f + 0.5f/dims.iRaysPerPixelDim) / dims.fDetScale;
+ const float fX = ( X - 0.5f*dims.iVolWidth + 0.5f - 0.5f + 0.5f/dims.iRaysPerPixelDim);
+ const float fY = ( Y - 0.5f*dims.iVolHeight + 0.5f - 0.5f + 0.5f/dims.iRaysPerPixelDim);
- const float fSubStep = 1.0f/(dims.iRaysPerPixelDim * dims.fDetScale);
+ const float fSubStep = 1.0f/(dims.iRaysPerPixelDim); // * dims.fDetScale);
float* volData = (float*)D_volData;
float fVal = 0.0f;
float fA = startAngle + 0.5f;
- const float fT_base = 0.5f*dims.iProjDets - 0.5f + 0.5f;
fOutputScale /= (dims.iRaysPerPixelDim * dims.iRaysPerPixelDim);
- if (offsets) {
-
- for (int angle = startAngle; angle < endAngle; ++angle)
- {
- const float cos_theta = gC_angle_cos[angle];
- const float sin_theta = gC_angle_sin[angle];
- const float TOffset = gC_angle_offset[angle];
-
- float fT = fT_base + fX * cos_theta - fY * sin_theta + TOffset;
-
- for (int iSubX = 0; iSubX < dims.iRaysPerPixelDim; ++iSubX) {
- float fTy = fT;
- fT += fSubStep * cos_theta;
- for (int iSubY = 0; iSubY < dims.iRaysPerPixelDim; ++iSubY) {
- fVal += tex2D(gT_projTexture, fTy, fA);
- fTy -= fSubStep * sin_theta;
- }
- }
- fA += 1.0f;
- }
-
- } else {
+ for (int angle = startAngle; angle < endAngle; ++angle)
+ {
+ const float cos_theta = gC_angle_scaled_cos[angle];
+ const float sin_theta = gC_angle_scaled_sin[angle];
+ const float TOffset = gC_angle_offset[angle];
- for (int angle = startAngle; angle < endAngle; ++angle)
- {
- const float cos_theta = gC_angle_cos[angle];
- const float sin_theta = gC_angle_sin[angle];
+ float fT = fX * cos_theta - fY * sin_theta + TOffset;
- float fT = fT_base + fX * cos_theta - fY * sin_theta;
-
- for (int iSubX = 0; iSubX < dims.iRaysPerPixelDim; ++iSubX) {
- float fTy = fT;
- fT += fSubStep * cos_theta;
- for (int iSubY = 0; iSubY < dims.iRaysPerPixelDim; ++iSubY) {
- fVal += tex2D(gT_projTexture, fTy, fA);
- fTy -= fSubStep * sin_theta;
- }
+ for (int iSubX = 0; iSubX < dims.iRaysPerPixelDim; ++iSubX) {
+ float fTy = fT;
+ fT += fSubStep * cos_theta;
+ for (int iSubY = 0; iSubY < dims.iRaysPerPixelDim; ++iSubY) {
+ fVal += tex2D(gT_projTexture, fTy, fA);
+ fTy -= fSubStep * sin_theta;
}
- fA += 1.0f;
-
}
-
+ fA += 1.0f;
}
volData[Y*volPitch+X] += fVal * fOutputScale;
@@ -211,12 +168,10 @@ __global__ void devBP_SART(float* D_volData, unsigned int volPitch, float offset
if (X >= dims.iVolWidth || Y >= dims.iVolHeight)
return;
- const float fX = ( X - 0.5f*dims.iVolWidth + 0.5f ) / dims.fDetScale;
- const float fY = ( Y - 0.5f*dims.iVolHeight + 0.5f ) / dims.fDetScale;
-
- const float fT_base = 0.5f*dims.iProjDets - 0.5f + 0.5f;
+ const float fX = ( X - 0.5f*dims.iVolWidth + 0.5f );
+ const float fY = ( Y - 0.5f*dims.iVolHeight + 0.5f );
- const float fT = fT_base + fX * angle_cos - fY * angle_sin + offset;
+ const float fT = fX * angle_cos - fY * angle_sin + offset;
const float fVal = tex2D(gT_projTexture, fT, 0.5f);
D_volData[Y*volPitch+X] += fVal * fOutputScale;
@@ -225,32 +180,35 @@ __global__ void devBP_SART(float* D_volData, unsigned int volPitch, float offset
bool BP_internal(float* D_volumeData, unsigned int volumePitch,
float* D_projData, unsigned int projPitch,
- const SDimensions& dims, const float* angles, const float* TOffsets, float fOutputScale)
+ const SDimensions& dims, const SParProjection* angles,
+ float fOutputScale)
{
- // TODO: process angles block by block
assert(dims.iProjAngles <= g_MaxAngles);
- float* angle_sin = new float[dims.iProjAngles];
- float* angle_cos = new float[dims.iProjAngles];
+ float* angle_scaled_sin = new float[dims.iProjAngles];
+ float* angle_scaled_cos = new float[dims.iProjAngles];
+ float* angle_offset = new float[dims.iProjAngles];
bindProjDataTexture(D_projData, projPitch, dims.iProjDets, dims.iProjAngles);
for (unsigned int i = 0; i < dims.iProjAngles; ++i) {
- angle_sin[i] = sinf(angles[i]);
- angle_cos[i] = cosf(angles[i]);
+ double d = angles[i].fDetUX * angles[i].fRayY - angles[i].fDetUY * angles[i].fRayX;
+ angle_scaled_cos[i] = angles[i].fRayY / d;
+ angle_scaled_sin[i] = -angles[i].fRayX / d; // TODO: Check signs
+ angle_offset[i] = (angles[i].fDetSY * angles[i].fRayX - angles[i].fDetSX * angles[i].fRayY) / d;
}
- cudaError_t e1 = cudaMemcpyToSymbol(gC_angle_sin, angle_sin, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice);
- cudaError_t e2 = cudaMemcpyToSymbol(gC_angle_cos, angle_cos, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice);
+
+ cudaError_t e1 = cudaMemcpyToSymbol(gC_angle_scaled_sin, angle_scaled_sin, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice);
+ cudaError_t e2 = cudaMemcpyToSymbol(gC_angle_scaled_cos, angle_scaled_cos, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice);
+ cudaError_t e3 = cudaMemcpyToSymbol(gC_angle_offset, angle_offset, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice);
assert(e1 == cudaSuccess);
assert(e2 == cudaSuccess);
+ assert(e3 == cudaSuccess);
- if (TOffsets) {
- cudaError_t e3 = cudaMemcpyToSymbol(gC_angle_offset, TOffsets, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice);
- assert(e3 == cudaSuccess);
- }
- delete[] angle_sin;
- delete[] angle_cos;
+ delete[] angle_scaled_sin;
+ delete[] angle_scaled_cos;
+ delete[] angle_offset;
dim3 dimBlock(g_blockSlices, g_blockSliceSize);
dim3 dimGrid((dims.iVolWidth+g_blockSlices-1)/g_blockSlices,
@@ -262,9 +220,9 @@ bool BP_internal(float* D_volumeData, unsigned int volumePitch,
for (unsigned int i = 0; i < dims.iProjAngles; i += g_anglesPerBlock) {
if (dims.iRaysPerPixelDim > 1)
- devBP_SS<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, (TOffsets != 0), dims, fOutputScale);
+ devBP_SS<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, dims, fOutputScale);
else
- devBP<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, (TOffsets != 0), dims, fOutputScale);
+ devBP<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, dims, fOutputScale);
}
cudaThreadSynchronize();
@@ -277,7 +235,7 @@ bool BP_internal(float* D_volumeData, unsigned int volumePitch,
bool BP(float* D_volumeData, unsigned int volumePitch,
float* D_projData, unsigned int projPitch,
- const SDimensions& dims, const float* angles, const float* TOffsets, float fOutputScale)
+ const SDimensions& dims, const SParProjection* angles, float fOutputScale)
{
for (unsigned int iAngle = 0; iAngle < dims.iProjAngles; iAngle += g_MaxAngles) {
SDimensions subdims = dims;
@@ -289,9 +247,7 @@ bool BP(float* D_volumeData, unsigned int volumePitch,
bool ret;
ret = BP_internal(D_volumeData, volumePitch,
D_projData + iAngle * projPitch, projPitch,
- subdims, angles + iAngle,
- TOffsets ? TOffsets + iAngle : 0,
- fOutputScale);
+ subdims, angles + iAngle, fOutputScale);
if (!ret)
return false;
}
@@ -302,25 +258,23 @@ bool BP(float* D_volumeData, unsigned int volumePitch,
bool BP_SART(float* D_volumeData, unsigned int volumePitch,
float* D_projData, unsigned int projPitch,
unsigned int angle, const SDimensions& dims,
- const float* angles, const float* TOffsets, float fOutputScale)
+ const SParProjection* angles, float fOutputScale)
{
// Only one angle.
// We need to Clamp to the border pixels instead of to zero, because
// SART weights with ray length.
bindProjDataTexture(D_projData, projPitch, dims.iProjDets, 1, cudaAddressModeClamp);
- float angle_sin = sinf(angles[angle]);
- float angle_cos = cosf(angles[angle]);
-
- float offset = 0.0f;
- if (TOffsets)
- offset = TOffsets[angle];
+ double d = angles[angle].fDetUX * angles[angle].fRayY - angles[angle].fDetUY * angles[angle].fRayX;
+ float angle_scaled_cos = angles[angle].fRayY / d;
+ float angle_scaled_sin = -angles[angle].fRayX / d; // TODO: Check signs
+ float angle_offset = (angles[angle].fDetSY * angles[angle].fRayX - angles[angle].fDetSX * angles[angle].fRayY) / d;
dim3 dimBlock(g_blockSlices, g_blockSliceSize);
dim3 dimGrid((dims.iVolWidth+g_blockSlices-1)/g_blockSlices,
(dims.iVolHeight+g_blockSliceSize-1)/g_blockSliceSize);
- devBP_SART<<<dimGrid, dimBlock>>>(D_volumeData, volumePitch, offset, angle_sin, angle_cos, dims, fOutputScale);
+ devBP_SART<<<dimGrid, dimBlock>>>(D_volumeData, volumePitch, angle_offset, angle_scaled_sin, angle_scaled_cos, dims, fOutputScale);
cudaThreadSynchronize();
cudaTextForceKernelsCompletion();
diff --git a/cuda/2d/par_bp.h b/cuda/2d/par_bp.h
index 0720030..3fa8a28 100644
--- a/cuda/2d/par_bp.h
+++ b/cuda/2d/par_bp.h
@@ -34,13 +34,13 @@ namespace astraCUDA {
_AstraExport bool BP(float* D_volumeData, unsigned int volumePitch,
float* D_projData, unsigned int projPitch,
- const SDimensions& dims, const float* angles,
- const float* TOffsets, float fOutputScale);
+ const SDimensions& dims, const SParProjection* angles,
+ float fOutputScale);
_AstraExport bool BP_SART(float* D_volumeData, unsigned int volumePitch,
float* D_projData, unsigned int projPitch,
unsigned int angle, const SDimensions& dims,
- const float* angles, const float* TOffsets, float fOutputScale);
+ const SParProjection* angles, float fOutputScale);
}
diff --git a/cuda/2d/par_fp.cu b/cuda/2d/par_fp.cu
index f58a643..a0b04ee 100644
--- a/cuda/2d/par_fp.cu
+++ b/cuda/2d/par_fp.cu
@@ -29,6 +29,7 @@ along with the ASTRA Toolbox. If not, see <http://www.gnu.org/licenses/>.
#include <cassert>
#include <iostream>
#include <list>
+#include <cmath>
#include "util.h"
#include "arith.h"
@@ -50,6 +51,7 @@ namespace astraCUDA {
static const unsigned g_MaxAngles = 2560;
__constant__ float gC_angle[g_MaxAngles];
__constant__ float gC_angle_offset[g_MaxAngles];
+__constant__ float gC_angle_detsize[g_MaxAngles];
// optimization parameters
@@ -62,10 +64,6 @@ static const unsigned int g_blockSlices = 64;
#define iPREC_FACTOR 16
-// if necessary, a buffer of zeroes of size g_MaxAngles
-static float* g_pfZeroes = 0;
-
-
static bool bindVolumeDataTexture(float* data, cudaArray*& dataArray, unsigned int pitch, unsigned int width, unsigned int height)
{
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>();
@@ -88,9 +86,10 @@ static bool bindVolumeDataTexture(float* data, cudaArray*& dataArray, unsigned i
return true;
}
+
// projection for angles that are roughly horizontal
-// theta between 45 and 135 degrees
-__global__ void FPhorizontal(float* D_projData, unsigned int projPitch, unsigned int startSlice, unsigned int startAngle, unsigned int endAngle, int regionOffset, const SDimensions dims, float outputScale)
+// (detector roughly vertical)
+__global__ void FPhorizontal_simple(float* D_projData, unsigned int projPitch, unsigned int startSlice, unsigned int startAngle, unsigned int endAngle, const SDimensions dims, float outputScale)
{
const int relDet = threadIdx.x;
const int relAngle = threadIdx.y;
@@ -105,33 +104,7 @@ __global__ void FPhorizontal(float* D_projData, unsigned int projPitch, unsigned
const float sin_theta = __sinf(theta);
// compute start detector for this block/angle:
- // (The same for all threadIdx.x)
- // -------------------------------------
-
- const int midSlice = startSlice + g_blockSlices / 2;
-
- // ASSUMPTION: fDetScale >= 1.0f
- // problem: detector regions get skipped because slice blocks aren't large
- // enough
- const unsigned int g_blockSliceSize = g_detBlockSize;
-
- // project (midSlice,midRegion) on this thread's detector
-
- const float fBase = 0.5f*dims.iProjDets - 0.5f +
- (
- (midSlice - 0.5f*dims.iVolWidth + 0.5f) * cos_theta
- - (g_blockSliceSize/2 - 0.5f*dims.iVolHeight + 0.5f) * sin_theta
- + gC_angle_offset[angle]
- ) / dims.fDetScale;
- int iBase = (int)floorf(fBase * fPREC_FACTOR);
- int iInc = (int)floorf(g_blockSliceSize * sin_theta / dims.fDetScale * -fPREC_FACTOR);
-
- // ASSUMPTION: 16 > regionOffset / fDetScale
- const int detRegion = (iBase + (blockIdx.y - regionOffset)*iInc + 16*iPREC_FACTOR*g_detBlockSize) / (iPREC_FACTOR * g_detBlockSize) - 16;
- const int detPrevRegion = (iBase + (blockIdx.y - regionOffset - 1)*iInc + 16*iPREC_FACTOR*g_detBlockSize) / (iPREC_FACTOR * g_detBlockSize) - 16;
-
- if (blockIdx.y > 0 && detRegion == detPrevRegion)
- return;
+ const int detRegion = blockIdx.y;
const int detector = detRegion * g_detBlockSize + relDet;
@@ -141,7 +114,7 @@ __global__ void FPhorizontal(float* D_projData, unsigned int projPitch, unsigned
if (detector < 0 || detector >= dims.iProjDets)
return;
- const float fDetStep = -dims.fDetScale / sin_theta;
+ const float fDetStep = -gC_angle_detsize[angle] / sin_theta;
float fSliceStep = cos_theta / sin_theta;
float fDistCorr;
if (sin_theta > 0.0f)
@@ -191,9 +164,10 @@ __global__ void FPhorizontal(float* D_projData, unsigned int projPitch, unsigned
D_projData[angle*projPitch+detector] += fVal * fDistCorr;
}
+
// projection for angles that are roughly vertical
-// theta between 0 and 45, or 135 and 180 degrees
-__global__ void FPvertical(float* D_projData, unsigned int projPitch, unsigned int startSlice, unsigned int startAngle, unsigned int endAngle, int regionOffset, const SDimensions dims, float outputScale)
+// (detector roughly horizontal)
+__global__ void FPvertical_simple(float* D_projData, unsigned int projPitch, unsigned int startSlice, unsigned int startAngle, unsigned int endAngle, const SDimensions dims, float outputScale)
{
const int relDet = threadIdx.x;
const int relAngle = threadIdx.y;
@@ -208,33 +182,7 @@ __global__ void FPvertical(float* D_projData, unsigned int projPitch, unsigned i
const float sin_theta = __sinf(theta);
// compute start detector for this block/angle:
- // (The same for all threadIdx.x)
- // -------------------------------------
-
- const int midSlice = startSlice + g_blockSlices / 2;
-
- // project (midSlice,midRegion) on this thread's detector
-
- // ASSUMPTION: fDetScale >= 1.0f
- // problem: detector regions get skipped because slice blocks aren't large
- // enough
- const unsigned int g_blockSliceSize = g_detBlockSize;
-
- const float fBase = 0.5f*dims.iProjDets - 0.5f +
- (
- (g_blockSliceSize/2 - 0.5f*dims.iVolWidth + 0.5f) * cos_theta
- - (midSlice - 0.5f*dims.iVolHeight + 0.5f) * sin_theta
- + gC_angle_offset[angle]
- ) / dims.fDetScale;
- int iBase = (int)floorf(fBase * fPREC_FACTOR);
- int iInc = (int)floorf(g_blockSliceSize * cos_theta / dims.fDetScale * fPREC_FACTOR);
-
- // ASSUMPTION: 16 > regionOffset / fDetScale
- const int detRegion = (iBase + (blockIdx.y - regionOffset)*iInc + 16*iPREC_FACTOR*g_detBlockSize) / (iPREC_FACTOR * g_detBlockSize) - 16;
- const int detPrevRegion = (iBase + (blockIdx.y - regionOffset-1)*iInc + 16*iPREC_FACTOR*g_detBlockSize) / (iPREC_FACTOR * g_detBlockSize) - 16;
-
- if (blockIdx.y > 0 && detRegion == detPrevRegion)
- return;
+ const int detRegion = blockIdx.y;
const int detector = detRegion * g_detBlockSize + relDet;
@@ -244,7 +192,7 @@ __global__ void FPvertical(float* D_projData, unsigned int projPitch, unsigned i
if (detector < 0 || detector >= dims.iProjDets)
return;
- const float fDetStep = dims.fDetScale / cos_theta;
+ const float fDetStep = gC_angle_detsize[angle] / cos_theta;
float fSliceStep = sin_theta / cos_theta;
float fDistCorr;
if (cos_theta < 0.0f)
@@ -292,183 +240,45 @@ __global__ void FPvertical(float* D_projData, unsigned int projPitch, unsigned i
D_projData[angle*projPitch+detector] += fVal * fDistCorr;
}
-// projection for angles that are roughly horizontal
-// (detector roughly vertical)
-__global__ void FPhorizontal_simple(float* D_projData, unsigned int projPitch, unsigned int startSlice, unsigned int startAngle, unsigned int endAngle, const SDimensions dims, float outputScale)
-{
- const int relDet = threadIdx.x;
- const int relAngle = threadIdx.y;
-
- int angle = startAngle + blockIdx.x * g_anglesPerBlock + relAngle;
-
- if (angle >= endAngle)
- return;
- const float theta = gC_angle[angle];
- const float cos_theta = __cosf(theta);
- const float sin_theta = __sinf(theta);
- // compute start detector for this block/angle:
- const int detRegion = blockIdx.y;
-
- const int detector = detRegion * g_detBlockSize + relDet;
-
- // Now project the part of the ray to angle,detector through
- // slices startSlice to startSlice+g_blockSlices-1
- if (detector < 0 || detector >= dims.iProjDets)
- return;
+// Coordinates of center of detector pixel number t:
+// x = (t - 0.5*nDets + 0.5 - fOffset) * fSize * cos(fAngle)
+// y = - (t - 0.5*nDets + 0.5 - fOffset) * fSize * sin(fAngle)
- const float fDetStep = -dims.fDetScale / sin_theta;
- float fSliceStep = cos_theta / sin_theta;
- float fDistCorr;
- if (sin_theta > 0.0f)
- fDistCorr = -fDetStep;
- else
- fDistCorr = fDetStep;
- fDistCorr *= outputScale;
- float fVal = 0.0f;
- // project detector on slice
- float fP = (detector - 0.5f*dims.iProjDets + 0.5f - gC_angle_offset[angle]) * fDetStep + (startSlice - 0.5f*dims.iVolWidth + 0.5f) * fSliceStep + 0.5f*dims.iVolHeight - 0.5f + 0.5f;
- float fS = startSlice + 0.5f;
- int endSlice = startSlice + g_blockSlices;
- if (endSlice > dims.iVolWidth)
- endSlice = dims.iVolWidth;
-
- if (dims.iRaysPerDet > 1) {
-
- fP += (-0.5f*dims.iRaysPerDet + 0.5f)/dims.iRaysPerDet * fDetStep;
- const float fSubDetStep = fDetStep / dims.iRaysPerDet;
- fDistCorr /= dims.iRaysPerDet;
-
- fSliceStep -= dims.iRaysPerDet * fSubDetStep;
-
- for (int slice = startSlice; slice < endSlice; ++slice)
- {
- for (int iSubT = 0; iSubT < dims.iRaysPerDet; ++iSubT) {
- fVal += tex2D(gT_volumeTexture, fS, fP);
- fP += fSubDetStep;
- }
- fP += fSliceStep;
- fS += 1.0f;
- }
-
- } else {
-
- for (int slice = startSlice; slice < endSlice; ++slice)
- {
- fVal += tex2D(gT_volumeTexture, fS, fP);
- fP += fSliceStep;
- fS += 1.0f;
- }
-
-
- }
-
- D_projData[angle*projPitch+detector] += fVal * fDistCorr;
-}
-
-
-// projection for angles that are roughly vertical
-// (detector roughly horizontal)
-__global__ void FPvertical_simple(float* D_projData, unsigned int projPitch, unsigned int startSlice, unsigned int startAngle, unsigned int endAngle, const SDimensions dims, float outputScale)
+static void convertAndUploadAngles(const SParProjection *projs, unsigned int nth, unsigned int ndets)
{
- const int relDet = threadIdx.x;
- const int relAngle = threadIdx.y;
-
- int angle = startAngle + blockIdx.x * g_anglesPerBlock + relAngle;
-
- if (angle >= endAngle)
- return;
-
- const float theta = gC_angle[angle];
- const float cos_theta = __cosf(theta);
- const float sin_theta = __sinf(theta);
-
- // compute start detector for this block/angle:
- const int detRegion = blockIdx.y;
-
- const int detector = detRegion * g_detBlockSize + relDet;
-
- // Now project the part of the ray to angle,detector through
- // slices startSlice to startSlice+g_blockSlices-1
-
- if (detector < 0 || detector >= dims.iProjDets)
- return;
-
- const float fDetStep = dims.fDetScale / cos_theta;
- float fSliceStep = sin_theta / cos_theta;
- float fDistCorr;
- if (cos_theta < 0.0f)
- fDistCorr = -fDetStep;
- else
- fDistCorr = fDetStep;
- fDistCorr *= outputScale;
-
- float fVal = 0.0f;
- float fP = (detector - 0.5f*dims.iProjDets + 0.5f - gC_angle_offset[angle]) * fDetStep + (startSlice - 0.5f*dims.iVolHeight + 0.5f) * fSliceStep + 0.5f*dims.iVolWidth - 0.5f + 0.5f;
- float fS = startSlice+0.5f;
- int endSlice = startSlice + g_blockSlices;
- if (endSlice > dims.iVolHeight)
- endSlice = dims.iVolHeight;
-
- if (dims.iRaysPerDet > 1) {
-
- fP += (-0.5f*dims.iRaysPerDet + 0.5f)/dims.iRaysPerDet * fDetStep;
- const float fSubDetStep = fDetStep / dims.iRaysPerDet;
- fDistCorr /= dims.iRaysPerDet;
-
- fSliceStep -= dims.iRaysPerDet * fSubDetStep;
-
- for (int slice = startSlice; slice < endSlice; ++slice)
- {
- for (int iSubT = 0; iSubT < dims.iRaysPerDet; ++iSubT) {
- fVal += tex2D(gT_volumeTexture, fP, fS);
- fP += fSubDetStep;
- }
- fP += fSliceStep;
- fS += 1.0f;
- }
-
- } else {
+ float *angles = new float[nth];
+ float *offsets = new float[nth];
+ float *detsizes = new float[nth];
- for (int slice = startSlice; slice < endSlice; ++slice)
- {
- fVal += tex2D(gT_volumeTexture, fP, fS);
- fP += fSliceStep;
- fS += 1.0f;
- }
-
- }
+ for (int i = 0; i < nth; ++i)
+ getParParameters(projs[i], ndets, angles[i], detsizes[i], offsets[i]);
- D_projData[angle*projPitch+detector] += fVal * fDistCorr;
+ cudaMemcpyToSymbol(gC_angle, angles, nth*sizeof(float), 0, cudaMemcpyHostToDevice);
+ cudaMemcpyToSymbol(gC_angle_offset, offsets, nth*sizeof(float), 0, cudaMemcpyHostToDevice);
+ cudaMemcpyToSymbol(gC_angle_detsize, detsizes, nth*sizeof(float), 0, cudaMemcpyHostToDevice);
}
bool FP_simple_internal(float* D_volumeData, unsigned int volumePitch,
float* D_projData, unsigned int projPitch,
- const SDimensions& dims, const float* angles,
- const float* TOffsets, float outputScale)
+ const SDimensions& dims, const SParProjection* angles,
+ float outputScale)
{
- // TODO: load angles into constant memory in smaller blocks
assert(dims.iProjAngles <= g_MaxAngles);
+ assert(angles);
+
cudaArray* D_dataArray;
bindVolumeDataTexture(D_volumeData, D_dataArray, volumePitch, dims.iVolWidth, dims.iVolHeight);
- cudaMemcpyToSymbol(gC_angle, angles, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice);
- if (TOffsets) {
- cudaMemcpyToSymbol(gC_angle_offset, TOffsets, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice);
- } else {
- if (!g_pfZeroes) {
- g_pfZeroes = new float[g_MaxAngles];
- memset(g_pfZeroes, 0, g_MaxAngles * sizeof(float));
- }
- cudaMemcpyToSymbol(gC_angle_offset, g_pfZeroes, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice);
- }
+ convertAndUploadAngles(angles, dims.iProjAngles, dims.iProjDets);
+
dim3 dimBlock(g_detBlockSize, g_anglesPerBlock); // detector block size, angles
@@ -491,7 +301,7 @@ bool FP_simple_internal(float* D_volumeData, unsigned int volumePitch,
// Maybe we should detect corner cases and put them in the optimal
// group of angles.
if (a != dims.iProjAngles)
- vertical = (fabsf(sinf(angles[a])) <= fabsf(cosf(angles[a])));
+ vertical = (fabsf(angles[a].fRayX) <= fabsf(angles[a].fRayY));
if (a == dims.iProjAngles || vertical != blockVertical) {
// block done
@@ -535,8 +345,8 @@ bool FP_simple_internal(float* D_volumeData, unsigned int volumePitch,
bool FP_simple(float* D_volumeData, unsigned int volumePitch,
float* D_projData, unsigned int projPitch,
- const SDimensions& dims, const float* angles,
- const float* TOffsets, float outputScale)
+ const SDimensions& dims, const SParProjection* angles,
+ float outputScale)
{
for (unsigned int iAngle = 0; iAngle < dims.iProjAngles; iAngle += g_MaxAngles) {
SDimensions subdims = dims;
@@ -549,7 +359,7 @@ bool FP_simple(float* D_volumeData, unsigned int volumePitch,
ret = FP_simple_internal(D_volumeData, volumePitch,
D_projData + iAngle * projPitch, projPitch,
subdims, angles + iAngle,
- TOffsets ? TOffsets + iAngle : 0, outputScale);
+ outputScale);
if (!ret)
return false;
}
@@ -558,106 +368,12 @@ bool FP_simple(float* D_volumeData, unsigned int volumePitch,
bool FP(float* D_volumeData, unsigned int volumePitch,
float* D_projData, unsigned int projPitch,
- const SDimensions& dims, const float* angles,
- const float* TOffsets, float outputScale)
+ const SDimensions& dims, const SParProjection* angles,
+ float outputScale)
{
return FP_simple(D_volumeData, volumePitch, D_projData, projPitch,
- dims, angles, TOffsets, outputScale);
+ dims, angles, outputScale);
- // TODO: Fix bug in this non-simple FP with large detscale and TOffsets
-#if 0
-
- // TODO: load angles into constant memory in smaller blocks
- assert(dims.iProjAngles <= g_MaxAngles);
-
- // TODO: compute region size dynamically to resolve these two assumptions
- // ASSUMPTION: 16 > regionOffset / fDetScale
- const unsigned int g_blockSliceSize = g_detBlockSize;
- assert(16 > (g_blockSlices / g_blockSliceSize) / dims.fDetScale);
- // ASSUMPTION: fDetScale >= 1.0f
- assert(dims.fDetScale > 0.9999f);
-
- cudaArray* D_dataArray;
- bindVolumeDataTexture(D_volumeData, D_dataArray, volumePitch, dims.iVolWidth, dims.iVolHeight);
-
- cudaMemcpyToSymbol(gC_angle, angles, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice);
-
- if (TOffsets) {
- cudaMemcpyToSymbol(gC_angle_offset, TOffsets, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice);
- } else {
- if (!g_pfZeroes) {
- g_pfZeroes = new float[g_MaxAngles];
- memset(g_pfZeroes, 0, g_MaxAngles * sizeof(float));
- }
- cudaMemcpyToSymbol(gC_angle_offset, g_pfZeroes, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice);
- }
-
- int regionOffset = g_blockSlices / g_blockSliceSize;
-
- dim3 dimBlock(g_detBlockSize, g_anglesPerBlock); // region size, angles
-
- std::list<cudaStream_t> streams;
-
-
- // Run over all angles, grouping them into groups of the same
- // orientation (roughly horizontal vs. roughly vertical).
- // Start a stream of grids for each such group.
-
- // TODO: Check if it's worth it to store this info instead
- // of recomputing it every FP.
-
- unsigned int blockStart = 0;
- unsigned int blockEnd = 0;
- bool blockVertical = false;
- for (unsigned int a = 0; a <= dims.iProjAngles; ++a) {
- bool vertical;
- // TODO: Having <= instead of < below causes a 5% speedup.
- // Maybe we should detect corner cases and put them in the optimal
- // group of angles.
- if (a != dims.iProjAngles)
- vertical = (fabsf(sinf(angles[a])) <= fabsf(cosf(angles[a])));
- if (a == dims.iProjAngles || vertical != blockVertical) {
- // block done
-
- blockEnd = a;
- if (blockStart != blockEnd) {
- unsigned int length = dims.iVolHeight;
- if (blockVertical)
- length = dims.iVolWidth;
- dim3 dimGrid((blockEnd-blockStart+g_anglesPerBlock-1)/g_anglesPerBlock,
- (length+g_blockSliceSize-1)/g_blockSliceSize+2*regionOffset); // angle blocks, regions
- // TODO: check if we can't immediately
- // destroy the stream after use
- cudaStream_t stream;
- cudaStreamCreate(&stream);
- streams.push_back(stream);
- //printf("angle block: %d to %d, %d\n", blockStart, blockEnd, blockVertical);
- if (!blockVertical)
- for (unsigned int i = 0; i < dims.iVolWidth; i += g_blockSlices)
- FPhorizontal<<<dimGrid, dimBlock, 0, stream>>>(D_projData, projPitch, i, blockStart, blockEnd, regionOffset, dims, outputScale);
- else
- for (unsigned int i = 0; i < dims.iVolHeight; i += g_blockSlices)
- FPvertical<<<dimGrid, dimBlock, 0, stream>>>(D_projData, projPitch, i, blockStart, blockEnd, regionOffset, dims, outputScale);
- }
- blockVertical = vertical;
- blockStart = a;
- }
- }
-
- for (std::list<cudaStream_t>::iterator iter = streams.begin(); iter != streams.end(); ++iter)
- cudaStreamDestroy(*iter);
-
- streams.clear();
-
- cudaThreadSynchronize();
-
- cudaTextForceKernelsCompletion();
-
- cudaFreeArray(D_dataArray);
-
-
- return true;
-#endif
}
diff --git a/cuda/2d/par_fp.h b/cuda/2d/par_fp.h
index 4830c1e..1f0ff69 100644
--- a/cuda/2d/par_fp.h
+++ b/cuda/2d/par_fp.h
@@ -32,8 +32,8 @@ namespace astraCUDA {
_AstraExport bool FP(float* D_volumeData, unsigned int volumePitch,
float* D_projData, unsigned int projPitch,
- const SDimensions& dims, const float* angles,
- const float* TOffsets, float fOutputScale);
+ const SDimensions& dims, const SParProjection* angles,
+ float fOutputScale);
}
diff --git a/cuda/2d/sart.cu b/cuda/2d/sart.cu
index f444b63..3a3c7e6 100644
--- a/cuda/2d/sart.cu
+++ b/cuda/2d/sart.cu
@@ -253,10 +253,10 @@ bool SART::callFP_SART(float* D_volumeData, unsigned int volumePitch,
{
SDimensions d = dims;
d.iProjAngles = 1;
- if (angles) {
+ if (parProjs) {
assert(!fanProjs);
return FP(D_volumeData, volumePitch, D_projData, projPitch,
- d, &angles[angle], TOffsets, outputScale);
+ d, &parProjs[angle], outputScale);
} else {
assert(fanProjs);
return FanFP(D_volumeData, volumePitch, D_projData, projPitch,
@@ -268,10 +268,10 @@ bool SART::callBP_SART(float* D_volumeData, unsigned int volumePitch,
float* D_projData, unsigned int projPitch,
unsigned int angle, float outputScale)
{
- if (angles) {
+ if (parProjs) {
assert(!fanProjs);
return BP_SART(D_volumeData, volumePitch, D_projData, projPitch,
- angle, dims, angles, TOffsets, outputScale);
+ angle, dims, parProjs, outputScale);
} else {
assert(fanProjs);
return FanBP_SART(D_volumeData, volumePitch, D_projData, projPitch,
diff --git a/cuda/2d/sirt.cu b/cuda/2d/sirt.cu
index 0079717..b393d7f 100644
--- a/cuda/2d/sirt.cu
+++ b/cuda/2d/sirt.cu
@@ -151,7 +151,7 @@ bool SIRT::precomputeWeights()
bool SIRT::doSlabCorrections()
{
// This function compensates for effectively infinitely large slab-like
- // objects of finite thickness 1.
+ // objects of finite thickness 1 in a parallel beam geometry.
// Each ray through the object has an intersection of length d/cos(alpha).
// The length of the ray actually intersecting the reconstruction volume is
@@ -169,6 +169,10 @@ bool SIRT::doSlabCorrections()
if (useVolumeMask || useSinogramMask)
return false;
+ // Parallel-beam only
+ if (!parProjs)
+ return false;
+
// multiply by line weights
processSino<opDiv>(D_sinoData, D_lineWeight, projPitch, dims);
@@ -180,7 +184,9 @@ bool SIRT::doSlabCorrections()
float bound = cosf(1.3963f);
float* t = (float*)D_sinoData;
for (int i = 0; i < dims.iProjAngles; ++i) {
- float f = fabs(cosf(angles[i]));
+ float angle, detsize, offset;
+ getParParameters(parProjs[i], dims.iProjDets, angle, detsize, offset);
+ float f = fabs(cosf(angle));
if (f < bound)
f = bound;
@@ -188,7 +194,6 @@ bool SIRT::doSlabCorrections()
processSino<opMul>(t, f, sinoPitch, subdims);
t += sinoPitch;
}
-
return true;
}
@@ -298,40 +303,6 @@ float SIRT::computeDiffNorm()
}
-bool doSIRT(float* D_volumeData, unsigned int volumePitch,
- float* D_sinoData, unsigned int sinoPitch,
- float* D_maskData, unsigned int maskPitch,
- const SDimensions& dims, const float* angles,
- const float* TOffsets, unsigned int iterations)
-{
- SIRT sirt;
- bool ok = true;
-
- ok &= sirt.setGeometry(dims, angles);
- if (D_maskData)
- ok &= sirt.enableVolumeMask();
- if (TOffsets)
- ok &= sirt.setTOffsets(TOffsets);
-
- if (!ok)
- return false;
-
- ok = sirt.init();
- if (!ok)
- return false;
-
- if (D_maskData)
- ok &= sirt.setVolumeMask(D_maskData, maskPitch);
-
- ok &= sirt.setBuffers(D_volumeData, volumePitch, D_sinoData, sinoPitch);
- if (!ok)
- return false;
-
- ok = sirt.iterate(iterations);
-
- return ok;
-}
-
}
#ifdef STANDALONE
diff --git a/cuda/3d/fdk.cu b/cuda/3d/fdk.cu
index 27357ad..91b0219 100644
--- a/cuda/3d/fdk.cu
+++ b/cuda/3d/fdk.cu
@@ -233,7 +233,7 @@ bool FDK_Filter(cudaPitchedPtr D_projData,
// The filtering is a regular ramp filter per detector line.
int iPaddedDetCount = calcNextPowerOfTwo(2 * dims.iProjU);
- int iHalfFFTSize = calcFFTFourSize(iPaddedDetCount);
+ int iHalfFFTSize = astraCUDA::calcFFTFourierSize(iPaddedDetCount);
int projPitch = D_projData.pitch/sizeof(float);
@@ -241,22 +241,22 @@ bool FDK_Filter(cudaPitchedPtr D_projData,
float* D_sinoData = (float*)D_projData.ptr;
cufftComplex * D_sinoFFT = NULL;
- allocateComplexOnDevice(dims.iProjAngles, iHalfFFTSize, &D_sinoFFT);
+ astraCUDA::allocateComplexOnDevice(dims.iProjAngles, iHalfFFTSize, &D_sinoFFT);
bool ok = true;
for (int v = 0; v < dims.iProjV; ++v) {
- ok = runCudaFFT(dims.iProjAngles, D_sinoData, projPitch,
+ ok = astraCUDA::runCudaFFT(dims.iProjAngles, D_sinoData, projPitch,
dims.iProjU, iPaddedDetCount, iHalfFFTSize,
D_sinoFFT);
if (!ok) break;
- applyFilter(dims.iProjAngles, iHalfFFTSize, D_sinoFFT, D_filter);
+ astraCUDA::applyFilter(dims.iProjAngles, iHalfFFTSize, D_sinoFFT, D_filter);
- ok = runCudaIFFT(dims.iProjAngles, D_sinoFFT, D_sinoData, projPitch,
+ ok = astraCUDA::runCudaIFFT(dims.iProjAngles, D_sinoFFT, D_sinoData, projPitch,
dims.iProjU, iPaddedDetCount, iHalfFFTSize);
if (!ok) break;
@@ -264,7 +264,7 @@ bool FDK_Filter(cudaPitchedPtr D_projData,
D_sinoData += (dims.iProjAngles * projPitch);
}
- freeComplexOnDevice(D_sinoFFT);
+ astraCUDA::freeComplexOnDevice(D_sinoFFT);
return ok;
}
@@ -281,7 +281,7 @@ bool FDK(cudaPitchedPtr D_volumeData,
// TODO: Check errors
cufftComplex * D_filter;
int iPaddedDetCount = calcNextPowerOfTwo(2 * dims.iProjU);
- int iHalfFFTSize = calcFFTFourSize(iPaddedDetCount);
+ int iHalfFFTSize = astraCUDA::calcFFTFourierSize(iPaddedDetCount);
// NB: We don't support arbitrary cone_vec geometries here.
@@ -326,7 +326,7 @@ bool FDK(cudaPitchedPtr D_volumeData,
memset(pHostFilter, 0, sizeof(cufftComplex) * dims.iProjAngles * iHalfFFTSize);
if (pfFilter == 0){
- genFilter(FILTER_RAMLAK, 1.0f, dims.iProjAngles, pHostFilter, iPaddedDetCount, iHalfFFTSize);
+ astraCUDA::genFilter(astra::FILTER_RAMLAK, 1.0f, dims.iProjAngles, pHostFilter, iPaddedDetCount, iHalfFFTSize);
} else {
for (int i = 0; i < dims.iProjAngles * iHalfFFTSize; i++) {
pHostFilter[i].x = pfFilter[i];
@@ -335,8 +335,8 @@ bool FDK(cudaPitchedPtr D_volumeData,
}
- allocateComplexOnDevice(dims.iProjAngles, iHalfFFTSize, &D_filter);
- uploadComplexArrayToDevice(dims.iProjAngles, iHalfFFTSize, pHostFilter, D_filter);
+ astraCUDA::allocateComplexOnDevice(dims.iProjAngles, iHalfFFTSize, &D_filter);
+ astraCUDA::uploadComplexArrayToDevice(dims.iProjAngles, iHalfFFTSize, pHostFilter, D_filter);
delete [] pHostFilter;
@@ -348,7 +348,7 @@ bool FDK(cudaPitchedPtr D_volumeData,
ok = FDK_Filter(D_projData, D_filter, dims);
// Clean up filter
- freeComplexOnDevice(D_filter);
+ astraCUDA::freeComplexOnDevice(D_filter);
#endif
if (!ok)