diff options
Diffstat (limited to 'cuda/2d')
| -rw-r--r-- | cuda/2d/algo.cu | 62 | ||||
| -rw-r--r-- | cuda/2d/algo.h | 22 | ||||
| -rw-r--r-- | cuda/2d/astra.cu | 731 | ||||
| -rw-r--r-- | cuda/2d/astra.h | 155 | ||||
| -rw-r--r-- | cuda/2d/cgls.cu | 36 | ||||
| -rw-r--r-- | cuda/2d/dims.h | 5 | ||||
| -rw-r--r-- | cuda/2d/em.cu | 28 | ||||
| -rw-r--r-- | cuda/2d/fbp.cu | 347 | ||||
| -rw-r--r-- | cuda/2d/fbp.h | 98 | ||||
| -rw-r--r-- | cuda/2d/fbp_filters.h | 4 | ||||
| -rw-r--r-- | cuda/2d/fft.cu | 12 | ||||
| -rw-r--r-- | cuda/2d/fft.h | 8 | ||||
| -rw-r--r-- | cuda/2d/par_bp.cu | 166 | ||||
| -rw-r--r-- | cuda/2d/par_bp.h | 6 | ||||
| -rw-r--r-- | cuda/2d/par_fp.cu | 358 | ||||
| -rw-r--r-- | cuda/2d/par_fp.h | 4 | ||||
| -rw-r--r-- | cuda/2d/sart.cu | 8 | ||||
| -rw-r--r-- | cuda/2d/sirt.cu | 45 | 
18 files changed, 760 insertions, 1335 deletions
diff --git a/cuda/2d/algo.cu b/cuda/2d/algo.cu index 0e5209f..3275f6d 100644 --- a/cuda/2d/algo.cu +++ b/cuda/2d/algo.cu @@ -34,13 +34,13 @@ along with the ASTRA Toolbox. If not, see <http://www.gnu.org/licenses/>.  #include "fan_bp.h"  #include "util.h"  #include "arith.h" +#include "astra.h"  namespace astraCUDA {  ReconAlgo::ReconAlgo()  { -	angles = 0; -	TOffsets = 0; +	parProjs = 0;  	fanProjs = 0;  	shouldAbort = false; @@ -65,8 +65,7 @@ ReconAlgo::~ReconAlgo()  void ReconAlgo::reset()  { -	delete[] angles; -	delete[] TOffsets; +	delete[] parProjs;  	delete[] fanProjs;  	if (freeGPUMemory) { @@ -76,8 +75,7 @@ void ReconAlgo::reset()  		cudaFree(D_volumeData);  	} -	angles = 0; -	TOffsets = 0; +	parProjs = 0;  	fanProjs = 0;  	shouldAbort = false; @@ -123,46 +121,40 @@ bool ReconAlgo::enableSinogramMask()  } -bool ReconAlgo::setGeometry(const SDimensions& _dims, const float* _angles) +bool ReconAlgo::setGeometry(const astra::CVolumeGeometry2D* pVolGeom, +                            const astra::CProjectionGeometry2D* pProjGeom)  { -	dims = _dims; +	bool ok; -	angles = new float[dims.iProjAngles]; +	ok = convertAstraGeometry_dims(pVolGeom, pProjGeom, dims); -	memcpy(angles, _angles, sizeof(angles[0]) * dims.iProjAngles); +	if (!ok) +		return false; +	delete[] parProjs; +	parProjs = 0;  	delete[] fanProjs;  	fanProjs = 0; -	return true; -} - -bool ReconAlgo::setFanGeometry(const SDimensions& _dims, -                               const SFanProjection* _projs) -{ -	dims = _dims; -	fanProjs = new SFanProjection[dims.iProjAngles]; - -	memcpy(fanProjs, _projs, sizeof(fanProjs[0]) * dims.iProjAngles); - -	delete[] angles; -	angles = 0; +	fOutputScale = 1.0f; +	ok = convertAstraGeometry(pVolGeom, pProjGeom, parProjs, fanProjs, fOutputScale); +	if (!ok) +		return false;  	return true;  } - -bool ReconAlgo::setTOffsets(const float* _TOffsets) +bool ReconAlgo::setSuperSampling(int raysPerDet, int raysPerPixelDim)  { -	// TODO: determine if they're all zero? -	TOffsets = new float[dims.iProjAngles]; -	memcpy(TOffsets, _TOffsets, sizeof(angles[0]) * dims.iProjAngles); +	if (raysPerDet <= 0 || raysPerPixelDim <= 0) +		return false; + +	dims.iRaysPerDet = raysPerDet; +	dims.iRaysPerPixelDim = raysPerPixelDim;  	return true;  } - -  bool ReconAlgo::setVolumeMask(float* _D_maskData, unsigned int _maskPitch)  {  	assert(useVolumeMask); @@ -323,14 +315,14 @@ bool ReconAlgo::callFP(float* D_volumeData, unsigned int volumePitch,                         float* D_projData, unsigned int projPitch,                         float outputScale)  { -	if (angles) { +	if (parProjs) {  		assert(!fanProjs);  		return FP(D_volumeData, volumePitch, D_projData, projPitch, -		          dims, angles, TOffsets, outputScale); +		          dims, parProjs, fOutputScale * outputScale);  	} else {  		assert(fanProjs);  		return FanFP(D_volumeData, volumePitch, D_projData, projPitch, -		             dims, fanProjs, outputScale); +		             dims, fanProjs, fOutputScale * outputScale);  	}  } @@ -338,10 +330,10 @@ bool ReconAlgo::callBP(float* D_volumeData, unsigned int volumePitch,                         float* D_projData, unsigned int projPitch,                         float outputScale)  { -	if (angles) { +	if (parProjs) {  		assert(!fanProjs);  		return BP(D_volumeData, volumePitch, D_projData, projPitch, -		          dims, angles, TOffsets, outputScale); +		          dims, parProjs, outputScale);  	} else {  		assert(fanProjs);  		return FanBP(D_volumeData, volumePitch, D_projData, projPitch, diff --git a/cuda/2d/algo.h b/cuda/2d/algo.h index 4a75907..d70859f 100644 --- a/cuda/2d/algo.h +++ b/cuda/2d/algo.h @@ -31,6 +31,17 @@ along with the ASTRA Toolbox. If not, see <http://www.gnu.org/licenses/>.  #include "astra/Globals.h"  #include "dims.h" +namespace astra { + +class CParallelProjectionGeometry2D; +class CParallelVecProjectionGeometry2D; +class CFanFlatProjectionGeometry2D; +class CFanFlatVecProjectionGeometry2D; +class CVolumeGeometry2D; +class CProjectionGeometry2D; + +} +  namespace astraCUDA {  class _AstraExport ReconAlgo { @@ -40,11 +51,10 @@ public:  	bool setGPUIndex(int iGPUIndex); -	bool setGeometry(const SDimensions& dims, const float* angles); -	bool setFanGeometry(const SDimensions& dims, const SFanProjection* projs); +	bool setGeometry(const astra::CVolumeGeometry2D* pVolGeom, +	                 const astra::CProjectionGeometry2D* pProjGeom); -	// setTOffsets should (optionally) be called after setGeometry -	bool setTOffsets(const float* TOffsets); +	bool setSuperSampling(int raysPerDet, int raysPerPixelDim);  	void signalAbort() { shouldAbort = true; } @@ -123,9 +133,9 @@ protected:  	SDimensions dims; -	float* angles; -	float* TOffsets; +	SParProjection* parProjs;  	SFanProjection* fanProjs; +	float fOutputScale;  	volatile bool shouldAbort; diff --git a/cuda/2d/astra.cu b/cuda/2d/astra.cu index c0132b2..81459de 100644 --- a/cuda/2d/astra.cu +++ b/cuda/2d/astra.cu @@ -41,8 +41,10 @@ along with the ASTRA Toolbox. If not, see <http://www.gnu.org/licenses/>.  #include <fstream>  #include <cuda.h> +#include "../../include/astra/GeometryUtil2D.h"  #include "../../include/astra/VolumeGeometry2D.h"  #include "../../include/astra/ParallelProjectionGeometry2D.h" +#include "../../include/astra/ParallelVecProjectionGeometry2D.h"  #include "../../include/astra/FanFlatProjectionGeometry2D.h"  #include "../../include/astra/FanFlatVecProjectionGeometry2D.h" @@ -63,514 +65,6 @@ enum CUDAProjectionType {  }; -class AstraFBP_internal { -public: -	SDimensions dims; -	float* angles; -	float* TOffsets; -	astraCUDA::SFanProjection* fanProjections; - -	float fOriginSourceDistance; -	float fOriginDetectorDistance; - -	float fPixelSize; - -	bool bFanBeam; -	bool bShortScan; - -	bool initialized; -	bool setStartReconstruction; - -	float* D_sinoData; -	unsigned int sinoPitch; - -	float* D_volumeData; -	unsigned int volumePitch; - -	cufftComplex * m_pDevFilter; -}; - -AstraFBP::AstraFBP() -{ -	pData = new AstraFBP_internal(); - -	pData->angles = 0; -	pData->fanProjections = 0; -	pData->TOffsets = 0; -	pData->D_sinoData = 0; -	pData->D_volumeData = 0; - -	pData->dims.iVolWidth = 0; -	pData->dims.iProjAngles = 0; -	pData->dims.fDetScale = 1.0f; -	pData->dims.iRaysPerDet = 1; -	pData->dims.iRaysPerPixelDim = 1; - -	pData->initialized = false; -	pData->setStartReconstruction = false; - -	pData->m_pDevFilter = NULL; -} - -AstraFBP::~AstraFBP() -{ -	delete[] pData->angles; -	pData->angles = 0; - -	delete[] pData->TOffsets; -	pData->TOffsets = 0; - -	delete[] pData->fanProjections; -	pData->fanProjections = 0; - -	cudaFree(pData->D_sinoData); -	pData->D_sinoData = 0; - -	cudaFree(pData->D_volumeData); -	pData->D_volumeData = 0; - -	if(pData->m_pDevFilter != NULL) -	{ -		freeComplexOnDevice(pData->m_pDevFilter); -		pData->m_pDevFilter = NULL; -	} - -	delete pData; -	pData = 0; -} - -bool AstraFBP::setReconstructionGeometry(unsigned int iVolWidth, -                                          unsigned int iVolHeight, -                                          float fPixelSize) -{ -	if (pData->initialized) -		return false; - -	pData->dims.iVolWidth = iVolWidth; -	pData->dims.iVolHeight = iVolHeight; - -	pData->fPixelSize = fPixelSize; - -	return (iVolWidth > 0 && iVolHeight > 0 && fPixelSize > 0.0f); -} - -bool AstraFBP::setProjectionGeometry(unsigned int iProjAngles, -                                      unsigned int iProjDets, -                                      const float* pfAngles, -                                      float fDetSize) -{ -	if (pData->initialized) -		return false; - -	pData->dims.iProjAngles = iProjAngles; -	pData->dims.iProjDets = iProjDets; -	pData->dims.fDetScale = fDetSize / pData->fPixelSize; - -	if (iProjAngles == 0 || iProjDets == 0 || pfAngles == 0) -		return false; - -	pData->angles = new float[iProjAngles]; -	memcpy(pData->angles, pfAngles, iProjAngles * sizeof(pfAngles[0])); - -	pData->bFanBeam = false; - -	return true; -} - -bool AstraFBP::setFanGeometry(unsigned int iProjAngles, -                              unsigned int iProjDets, -                              const astraCUDA::SFanProjection *fanProjs, -                              const float* pfAngles, -                              float fOriginSourceDistance, -                              float fOriginDetectorDistance, -                              float fDetSize, -                              bool bShortScan) -{ -	// Slightly abusing setProjectionGeometry for this... -	if (!setProjectionGeometry(iProjAngles, iProjDets, pfAngles, fDetSize)) -		return false; - -	pData->fOriginSourceDistance = fOriginSourceDistance; -	pData->fOriginDetectorDistance = fOriginDetectorDistance; - -	pData->fanProjections = new astraCUDA::SFanProjection[iProjAngles]; -	memcpy(pData->fanProjections, fanProjs, iProjAngles * sizeof(fanProjs[0])); - -	pData->bFanBeam = true; -	pData->bShortScan = bShortScan; - -	return true; -} - - -bool AstraFBP::setPixelSuperSampling(unsigned int iPixelSuperSampling) -{ -	if (pData->initialized) -		return false; - -	if (iPixelSuperSampling == 0) -		return false; - -	pData->dims.iRaysPerPixelDim = iPixelSuperSampling; - -	return true; -} - - -bool AstraFBP::setTOffsets(const float* pfTOffsets) -{ -	if (pData->initialized) -		return false; - -	if (pfTOffsets == 0) -		return false; - -	pData->TOffsets = new float[pData->dims.iProjAngles]; -	memcpy(pData->TOffsets, pfTOffsets, pData->dims.iProjAngles * sizeof(pfTOffsets[0])); - -	return true; -} - -bool AstraFBP::init(int iGPUIndex) -{ -	if (pData->initialized) -	{ -		return false; -	} - -	if (pData->dims.iProjAngles == 0 || pData->dims.iVolWidth == 0) -	{ -		return false; -	} - -	if (iGPUIndex != -1) { -		cudaSetDevice(iGPUIndex); -		cudaError_t err = cudaGetLastError(); - -		// Ignore errors caused by calling cudaSetDevice multiple times -		if (err != cudaSuccess && err != cudaErrorSetOnActiveProcess) -		{ -			return false; -		} -	} - -	bool ok = allocateVolumeData(pData->D_volumeData, pData->volumePitch, pData->dims); -	if (!ok) -	{ -		return false; -	} - -	ok = allocateProjectionData(pData->D_sinoData, pData->sinoPitch, pData->dims); -	if (!ok) -	{ -		cudaFree(pData->D_volumeData); -		pData->D_volumeData = 0; -		return false; -	} - -	pData->initialized = true; - -	return true; -} - -bool AstraFBP::setSinogram(const float* pfSinogram, -                            unsigned int iSinogramPitch) -{ -	if (!pData->initialized) -		return false; -	if (!pfSinogram) -		return false; - -	bool ok = copySinogramToDevice(pfSinogram, iSinogramPitch, -	                               pData->dims, -	                               pData->D_sinoData, pData->sinoPitch); -	if (!ok) -		return false; - -	// rescale sinogram to adjust for pixel size -	processSino<opMul>(pData->D_sinoData, -	                       1.0f/(pData->fPixelSize*pData->fPixelSize), -	                       pData->sinoPitch, pData->dims); - -	pData->setStartReconstruction = false; - -	return true; -} - -static int calcNextPowerOfTwo(int _iValue) -{ -	int iOutput = 1; - -	while(iOutput < _iValue) -	{ -		iOutput *= 2; -	} - -	return iOutput; -} - -bool AstraFBP::run() -{ -	if (!pData->initialized) -	{ -		return false; -	} - -	zeroVolumeData(pData->D_volumeData, pData->volumePitch, pData->dims); - -	bool ok = false; - -	if (pData->bFanBeam) { -		// Call FDK_PreWeight to handle fan beam geometry. We treat -		// this as a cone beam setup of a single slice: - -		// TODO: TOffsets affects this preweighting... - -		// We create a fake cudaPitchedPtr -		cudaPitchedPtr tmp; -		tmp.ptr = pData->D_sinoData; -		tmp.pitch = pData->sinoPitch * sizeof(float); -		tmp.xsize = pData->dims.iProjDets; -		tmp.ysize = pData->dims.iProjAngles; -		// and a fake Dimensions3D -		astraCUDA3d::SDimensions3D dims3d; -		dims3d.iVolX = pData->dims.iVolWidth; -		dims3d.iVolY = pData->dims.iVolHeight; -		dims3d.iVolZ = 1; -		dims3d.iProjAngles = pData->dims.iProjAngles; -		dims3d.iProjU = pData->dims.iProjDets; -		dims3d.iProjV = 1; - -		astraCUDA3d::FDK_PreWeight(tmp, pData->fOriginSourceDistance, -		              pData->fOriginDetectorDistance, 0.0f, -		              pData->dims.fDetScale, 1.0f, // TODO: Are these correct? -		              pData->bShortScan, dims3d, pData->angles); -	} - -	if (pData->m_pDevFilter) { - -		int iFFTRealDetCount = calcNextPowerOfTwo(2 * pData->dims.iProjDets); -		int iFFTFourDetCount = calcFFTFourSize(iFFTRealDetCount); - -		cufftComplex * pDevComplexSinogram = NULL; - -		allocateComplexOnDevice(pData->dims.iProjAngles, iFFTFourDetCount, &pDevComplexSinogram); - -		runCudaFFT(pData->dims.iProjAngles, pData->D_sinoData, pData->sinoPitch, pData->dims.iProjDets, iFFTRealDetCount, iFFTFourDetCount, pDevComplexSinogram); - -		applyFilter(pData->dims.iProjAngles, iFFTFourDetCount, pDevComplexSinogram, pData->m_pDevFilter); - -		runCudaIFFT(pData->dims.iProjAngles, pDevComplexSinogram, pData->D_sinoData, pData->sinoPitch, pData->dims.iProjDets, iFFTRealDetCount, iFFTFourDetCount); - -		freeComplexOnDevice(pDevComplexSinogram); - -	} - -	float fOutputScale = (M_PI / 2.0f) / (float)pData->dims.iProjAngles; - -	if (pData->bFanBeam) { -		ok = FanBP_FBPWeighted(pData->D_volumeData, pData->volumePitch, pData->D_sinoData, pData->sinoPitch, pData->dims, pData->fanProjections, fOutputScale); - -	} else { -		ok = BP(pData->D_volumeData, pData->volumePitch, pData->D_sinoData, pData->sinoPitch, pData->dims, pData->angles, pData->TOffsets, fOutputScale); -	} -	if(!ok) -	{ -		return false; -	} - -	return true; -} - -bool AstraFBP::getReconstruction(float* pfReconstruction, unsigned int iReconstructionPitch) const -{ -	if (!pData->initialized) -		return false; - -	bool ok = copyVolumeFromDevice(pfReconstruction, iReconstructionPitch, -	                               pData->dims, -	                               pData->D_volumeData, pData->volumePitch); -	if (!ok) -		return false; - -	return true; -} - -int AstraFBP::calcFourierFilterSize(int _iDetectorCount) -{ -	int iFFTRealDetCount = calcNextPowerOfTwo(2 * _iDetectorCount); -	int iFreqBinCount = calcFFTFourSize(iFFTRealDetCount); - -	// CHECKME: Matlab makes this at least 64. Do we also need to? -	return iFreqBinCount; -} - -bool AstraFBP::setFilter(E_FBPFILTER _eFilter, const float * _pfHostFilter /* = NULL */, int _iFilterWidth /* = 0 */, float _fD /* = 1.0f */, float _fFilterParameter /* = -1.0f */) -{ -	if(pData->m_pDevFilter != 0) -	{ -		freeComplexOnDevice(pData->m_pDevFilter); -		pData->m_pDevFilter = 0; -	} - -	if (_eFilter == FILTER_NONE) -		return true; // leave pData->m_pDevFilter set to 0 - - -	int iFFTRealDetCount = calcNextPowerOfTwo(2 * pData->dims.iProjDets); -	int iFreqBinCount = calcFFTFourSize(iFFTRealDetCount); - -	cufftComplex * pHostFilter = new cufftComplex[pData->dims.iProjAngles * iFreqBinCount]; -	memset(pHostFilter, 0, sizeof(cufftComplex) * pData->dims.iProjAngles * iFreqBinCount); - -	allocateComplexOnDevice(pData->dims.iProjAngles, iFreqBinCount, &(pData->m_pDevFilter)); - -	switch(_eFilter) -	{ -		case FILTER_NONE: -			// handled above -			break; -		case FILTER_RAMLAK: -		case FILTER_SHEPPLOGAN: -		case FILTER_COSINE: -		case FILTER_HAMMING: -		case FILTER_HANN: -		case FILTER_TUKEY: -		case FILTER_LANCZOS: -		case FILTER_TRIANGULAR: -		case FILTER_GAUSSIAN: -		case FILTER_BARTLETTHANN: -		case FILTER_BLACKMAN: -		case FILTER_NUTTALL: -		case FILTER_BLACKMANHARRIS: -		case FILTER_BLACKMANNUTTALL: -		case FILTER_FLATTOP: -		case FILTER_PARZEN: -		{ -			genFilter(_eFilter, _fD, pData->dims.iProjAngles, pHostFilter, iFFTRealDetCount, iFreqBinCount, _fFilterParameter); -			uploadComplexArrayToDevice(pData->dims.iProjAngles, iFreqBinCount, pHostFilter, pData->m_pDevFilter); - -			break; -		} -		case FILTER_PROJECTION: -		{ -			// make sure the offered filter has the correct size -			assert(_iFilterWidth == iFreqBinCount); - -			for(int iFreqBinIndex = 0; iFreqBinIndex < iFreqBinCount; iFreqBinIndex++) -			{ -				float fValue = _pfHostFilter[iFreqBinIndex]; - -				for(int iProjectionIndex = 0; iProjectionIndex < (int)pData->dims.iProjAngles; iProjectionIndex++) -				{ -					pHostFilter[iFreqBinIndex + iProjectionIndex * iFreqBinCount].x = fValue; -					pHostFilter[iFreqBinIndex + iProjectionIndex * iFreqBinCount].y = 0.0f; -				} -			} -			uploadComplexArrayToDevice(pData->dims.iProjAngles, iFreqBinCount, pHostFilter, pData->m_pDevFilter); -			break; -		} -		case FILTER_SINOGRAM: -		{ -			// make sure the offered filter has the correct size -			assert(_iFilterWidth == iFreqBinCount); - -			for(int iFreqBinIndex = 0; iFreqBinIndex < iFreqBinCount; iFreqBinIndex++) -			{ -				for(int iProjectionIndex = 0; iProjectionIndex < (int)pData->dims.iProjAngles; iProjectionIndex++) -				{ -					float fValue = _pfHostFilter[iFreqBinIndex + iProjectionIndex * _iFilterWidth]; - -					pHostFilter[iFreqBinIndex + iProjectionIndex * iFreqBinCount].x = fValue; -					pHostFilter[iFreqBinIndex + iProjectionIndex * iFreqBinCount].y = 0.0f; -				} -			} -			uploadComplexArrayToDevice(pData->dims.iProjAngles, iFreqBinCount, pHostFilter, pData->m_pDevFilter); -			break; -		} -		case FILTER_RPROJECTION: -		{ -			int iProjectionCount = pData->dims.iProjAngles; -			int iRealFilterElementCount = iProjectionCount * iFFTRealDetCount; -			float * pfHostRealFilter = new float[iRealFilterElementCount]; -			memset(pfHostRealFilter, 0, sizeof(float) * iRealFilterElementCount); - -			int iUsedFilterWidth = min(_iFilterWidth, iFFTRealDetCount); -			int iStartFilterIndex = (_iFilterWidth - iUsedFilterWidth) / 2; -			int iMaxFilterIndex = iStartFilterIndex + iUsedFilterWidth; - -			int iFilterShiftSize = _iFilterWidth / 2; - -			for(int iDetectorIndex = iStartFilterIndex; iDetectorIndex < iMaxFilterIndex; iDetectorIndex++) -			{ -				int iFFTInFilterIndex = (iDetectorIndex + iFFTRealDetCount - iFilterShiftSize) % iFFTRealDetCount; -				float fValue = _pfHostFilter[iDetectorIndex]; - -				for(int iProjectionIndex = 0; iProjectionIndex < (int)pData->dims.iProjAngles; iProjectionIndex++) -				{ -					pfHostRealFilter[iFFTInFilterIndex + iProjectionIndex * iFFTRealDetCount] = fValue; -				} -			} - -			float* pfDevRealFilter = NULL; -			cudaMalloc((void **)&pfDevRealFilter, sizeof(float) * iRealFilterElementCount); // TODO: check for errors -			cudaMemcpy(pfDevRealFilter, pfHostRealFilter, sizeof(float) * iRealFilterElementCount, cudaMemcpyHostToDevice); -			delete[] pfHostRealFilter; - -			runCudaFFT(iProjectionCount, pfDevRealFilter, iFFTRealDetCount, iFFTRealDetCount, iFFTRealDetCount, iFreqBinCount, pData->m_pDevFilter); - -			cudaFree(pfDevRealFilter); - -			break; -		} -		case FILTER_RSINOGRAM: -		{ -			int iProjectionCount = pData->dims.iProjAngles; -			int iRealFilterElementCount = iProjectionCount * iFFTRealDetCount; -			float* pfHostRealFilter = new float[iRealFilterElementCount]; -			memset(pfHostRealFilter, 0, sizeof(float) * iRealFilterElementCount); - -			int iUsedFilterWidth = min(_iFilterWidth, iFFTRealDetCount); -			int iStartFilterIndex = (_iFilterWidth - iUsedFilterWidth) / 2; -			int iMaxFilterIndex = iStartFilterIndex + iUsedFilterWidth; - -			int iFilterShiftSize = _iFilterWidth / 2; - -			for(int iDetectorIndex = iStartFilterIndex; iDetectorIndex < iMaxFilterIndex; iDetectorIndex++) -			{ -				int iFFTInFilterIndex = (iDetectorIndex + iFFTRealDetCount - iFilterShiftSize) % iFFTRealDetCount; - -				for(int iProjectionIndex = 0; iProjectionIndex < (int)pData->dims.iProjAngles; iProjectionIndex++) -				{ -					float fValue = _pfHostFilter[iDetectorIndex + iProjectionIndex * _iFilterWidth]; -					pfHostRealFilter[iFFTInFilterIndex + iProjectionIndex * iFFTRealDetCount] = fValue; -				} -			} - -			float* pfDevRealFilter = NULL; -			cudaMalloc((void **)&pfDevRealFilter, sizeof(float) * iRealFilterElementCount); // TODO: check for errors -			cudaMemcpy(pfDevRealFilter, pfHostRealFilter, sizeof(float) * iRealFilterElementCount, cudaMemcpyHostToDevice); -			delete[] pfHostRealFilter; - -			runCudaFFT(iProjectionCount, pfDevRealFilter, iFFTRealDetCount, iFFTRealDetCount, iFFTRealDetCount, iFreqBinCount, pData->m_pDevFilter); - -			cudaFree(pfDevRealFilter); - -			break; -		} -		default: -		{ -			ASTRA_ERROR("AstraFBP::setFilter: Unknown filter type requested"); -			delete [] pHostFilter; -			return false; -		} -	} - -	delete [] pHostFilter; - -	return true; -} -  BPalgo::BPalgo()  { @@ -615,18 +109,17 @@ float BPalgo::computeDiffNorm()  bool astraCudaFP(const float* pfVolume, float* pfSinogram,                   unsigned int iVolWidth, unsigned int iVolHeight,                   unsigned int iProjAngles, unsigned int iProjDets, -                 const float *pfAngles, const float *pfOffsets, -                 float fDetSize, unsigned int iDetSuperSampling, +                 const SParProjection *pAngles, +                 unsigned int iDetSuperSampling,                   float fOutputScale, int iGPUIndex)  {  	SDimensions dims; -	if (iProjAngles == 0 || iProjDets == 0 || pfAngles == 0) +	if (iProjAngles == 0 || iProjDets == 0 || pAngles == 0)  		return false;  	dims.iProjAngles = iProjAngles;  	dims.iProjDets = iProjDets; -	dims.fDetScale = fDetSize;  	if (iDetSuperSampling == 0)  		return false; @@ -676,7 +169,7 @@ bool astraCudaFP(const float* pfVolume, float* pfSinogram,  	}  	zeroProjectionData(D_sinoData, sinoPitch, dims); -	ok = FP(D_volumeData, volumePitch, D_sinoData, sinoPitch, dims, pfAngles, pfOffsets, fOutputScale); +	ok = FP(D_volumeData, volumePitch, D_sinoData, sinoPitch, dims, pAngles, fOutputScale);  	if (!ok) {  		cudaFree(D_volumeData);  		cudaFree(D_sinoData); @@ -711,7 +204,6 @@ bool astraCudaFanFP(const float* pfVolume, float* pfSinogram,  	dims.iProjAngles = iProjAngles;  	dims.iProjDets = iProjDets; -	dims.fDetScale = 1.0f; // TODO?  	if (iDetSuperSampling == 0)  		return false; @@ -787,123 +279,99 @@ bool astraCudaFanFP(const float* pfVolume, float* pfSinogram,  } -bool convertAstraGeometry(const CVolumeGeometry2D* pVolGeom, -                          const CParallelProjectionGeometry2D* pProjGeom, -                          float*& detectorOffsets, float*& projectionAngles, -                          float& detSize, float& outputScale) +// adjust pProjs to normalize volume geometry +template<typename ProjectionT> +static bool convertAstraGeometry_internal(const CVolumeGeometry2D* pVolGeom, +                          unsigned int iProjectionAngleCount, +                          ProjectionT*& pProjs, +                          float& fOutputScale)  { -	assert(pVolGeom); -	assert(pProjGeom); -	assert(pProjGeom->getProjectionAngles()); - +	// TODO: Make EPS relative  	const float EPS = 0.00001f; -	int nth = pProjGeom->getProjectionAngleCount(); -  	// Check if pixels are square  	if (abs(pVolGeom->getPixelLengthX() - pVolGeom->getPixelLengthY()) > EPS)  		return false; +	float dx = -(pVolGeom->getWindowMinX() + pVolGeom->getWindowMaxX()) / 2; +	float dy = -(pVolGeom->getWindowMinY() + pVolGeom->getWindowMaxY()) / 2; -	// Scale volume pixels to 1x1 -	detSize = pProjGeom->getDetectorWidth() / pVolGeom->getPixelLengthX(); +	float factor = 1.0f / pVolGeom->getPixelLengthX(); -	// Copy angles -	float *angles = new float[nth]; -	for (int i = 0; i < nth; ++i) -		angles[i] = pProjGeom->getProjectionAngles()[i]; -	projectionAngles = angles; - -	// Check if we need to translate -	bool offCenter = false; -	if (abs(pVolGeom->getWindowMinX() + pVolGeom->getWindowMaxX()) > EPS || -	    abs(pVolGeom->getWindowMinY() + pVolGeom->getWindowMaxY()) > EPS) -	{ -		offCenter = true; +	for (int i = 0; i < iProjectionAngleCount; ++i) { +		// CHECKME: Order of scaling and translation +		pProjs[i].translate(dx, dy); +		pProjs[i].scale(factor);  	} +	// CHECKME: Check factor +	fOutputScale *= pVolGeom->getPixelLengthX() * pVolGeom->getPixelLengthY(); -	// If there are existing detector offsets, or if we need to translate, -	// we need to return offsets -	if (pProjGeom->getExtraDetectorOffset() || offCenter) -	{ -		float* offset = new float[nth]; - -		if (pProjGeom->getExtraDetectorOffset()) { -			for (int i = 0; i < nth; ++i) -				offset[i] = pProjGeom->getExtraDetectorOffset()[i]; -		} else { -			for (int i = 0; i < nth; ++i) -				offset[i] = 0.0f; -		} +	return true; +} -		if (offCenter) { -			float dx = (pVolGeom->getWindowMinX() + pVolGeom->getWindowMaxX()) / 2; -			float dy = (pVolGeom->getWindowMinY() + pVolGeom->getWindowMaxY()) / 2; -			// CHECKME: Is d in pixels or in units? -			for (int i = 0; i < nth; ++i) { -				float d = dx * cos(angles[i]) + dy * sin(angles[i]); -				offset[i] += d; -			} -		} +bool convertAstraGeometry(const CVolumeGeometry2D* pVolGeom, +                          const CParallelProjectionGeometry2D* pProjGeom, +                          SParProjection*& pProjs, +                          float& fOutputScale) +{ +	assert(pVolGeom); +	assert(pProjGeom); +	assert(pProjGeom->getProjectionAngles()); -		// CHECKME: Order of scaling and translation +	int nth = pProjGeom->getProjectionAngleCount(); -		// Scale volume pixels to 1x1 -		for (int i = 0; i < nth; ++i) { -			//offset[i] /= pVolGeom->getPixelLengthX(); -			//offset[i] *= detSize; -		} +	pProjs = genParProjections(nth, +	                           pProjGeom->getDetectorCount(), +	                           pProjGeom->getDetectorWidth(), +	                           pProjGeom->getProjectionAngles(), 0); +	bool ok; +	fOutputScale = 1.0f; -		detectorOffsets = offset; -	} else { -		detectorOffsets = 0; -	} +	ok = convertAstraGeometry_internal(pVolGeom, nth, pProjs, fOutputScale); -	outputScale = pVolGeom->getPixelLengthX(); -	outputScale *= outputScale; +	if (!ok) { +		delete[] pProjs; +		pProjs = 0; +	} -	return true; +	return ok;  } -static void convertAstraGeometry_internal(const CVolumeGeometry2D* pVolGeom, -                          unsigned int iProjectionAngleCount, -                          astraCUDA::SFanProjection*& pProjs, -                          float& outputScale) +bool convertAstraGeometry(const CVolumeGeometry2D* pVolGeom, +                          const CParallelVecProjectionGeometry2D* pProjGeom, +                          SParProjection*& pProjs, +                          float& fOutputScale)  { -	// Translate -	float dx = (pVolGeom->getWindowMinX() + pVolGeom->getWindowMaxX()) / 2; -	float dy = (pVolGeom->getWindowMinY() + pVolGeom->getWindowMaxY()) / 2; +	assert(pVolGeom); +	assert(pProjGeom); +	assert(pProjGeom->getProjectionVectors()); -	for (int i = 0; i < iProjectionAngleCount; ++i) { -		pProjs[i].fSrcX -= dx; -		pProjs[i].fSrcY -= dy; -		pProjs[i].fDetSX -= dx; -		pProjs[i].fDetSY -= dy; +	int nth = pProjGeom->getProjectionAngleCount(); + +	pProjs = new SParProjection[nth]; + +	for (int i = 0; i < nth; ++i) { +		pProjs[i] = pProjGeom->getProjectionVectors()[i];  	} -	// CHECKME: Order of scaling and translation +	bool ok; +	fOutputScale = 1.0f; -	// Scale -	float factor = 1.0f / pVolGeom->getPixelLengthX(); -	for (int i = 0; i < iProjectionAngleCount; ++i) { -		pProjs[i].fSrcX *= factor; -		pProjs[i].fSrcY *= factor; -		pProjs[i].fDetSX *= factor; -		pProjs[i].fDetSY *= factor; -		pProjs[i].fDetUX *= factor; -		pProjs[i].fDetUY *= factor; +	ok = convertAstraGeometry_internal(pVolGeom, nth, pProjs, fOutputScale); +	if (!ok) { +		delete[] pProjs; +		pProjs = 0;  	} -	// CHECKME: Check factor -	outputScale = pVolGeom->getPixelLengthX(); -//	outputScale *= outputScale; +	return ok;  } +  bool convertAstraGeometry(const CVolumeGeometry2D* pVolGeom,                            const CFanFlatProjectionGeometry2D* pProjGeom,                            astraCUDA::SFanProjection*& pProjs, @@ -913,6 +381,7 @@ bool convertAstraGeometry(const CVolumeGeometry2D* pVolGeom,  	assert(pProjGeom);  	assert(pProjGeom->getProjectionAngles()); +	// TODO: Make EPS relative  	const float EPS = 0.00001f;  	int nth = pProjGeom->getProjectionAngleCount(); @@ -931,23 +400,9 @@ bool convertAstraGeometry(const CVolumeGeometry2D* pVolGeom,  	float fDetSize = pProjGeom->getDetectorWidth();  	const float *pfAngles = pProjGeom->getProjectionAngles(); -	pProjs = new SFanProjection[nth]; - -	float fSrcX0 = 0.0f; -	float fSrcY0 = -fOriginSourceDistance; -	float fDetUX0 = fDetSize; -	float fDetUY0 = 0.0f; -	float fDetSX0 = pProjGeom->getDetectorCount() * fDetUX0 / -2.0f; -	float fDetSY0 = fOriginDetectorDistance; - -#define ROTATE0(name,i,alpha) do { pProjs[i].f##name##X = f##name##X0 * cos(alpha) - f##name##Y0 * sin(alpha); pProjs[i].f##name##Y = f##name##X0 * sin(alpha) + f##name##Y0 * cos(alpha); } while(0) -	for (int i = 0; i < nth; ++i) { -		ROTATE0(Src, i, pfAngles[i]); -		ROTATE0(DetS, i, pfAngles[i]); -		ROTATE0(DetU, i, pfAngles[i]); -	} - -#undef ROTATE0 +	pProjs = genFanProjections(nth, pProjGeom->getDetectorCount(), +                               fOriginSourceDistance, fOriginDetectorDistance, +	                           fDetSize, pfAngles);  	convertAstraGeometry_internal(pVolGeom, nth, pProjs, outputScale); @@ -964,6 +419,7 @@ bool convertAstraGeometry(const CVolumeGeometry2D* pVolGeom,  	assert(pProjGeom);  	assert(pProjGeom->getProjectionVectors()); +	// TODO: Make EPS relative  	const float EPS = 0.00001f;  	int nx = pVolGeom->getGridColCount(); @@ -985,6 +441,51 @@ bool convertAstraGeometry(const CVolumeGeometry2D* pVolGeom,  	return true;  } +bool convertAstraGeometry(const CVolumeGeometry2D* pVolGeom, +                          const CProjectionGeometry2D* pProjGeom, +                          astraCUDA::SParProjection*& pParProjs, +                          astraCUDA::SFanProjection*& pFanProjs, +                          float& outputScale) +{ +	const CParallelProjectionGeometry2D* parProjGeom = dynamic_cast<const CParallelProjectionGeometry2D*>(pProjGeom); +	const CParallelVecProjectionGeometry2D* parVecProjGeom = dynamic_cast<const CParallelVecProjectionGeometry2D*>(pProjGeom); +	const CFanFlatProjectionGeometry2D* fanProjGeom = dynamic_cast<const CFanFlatProjectionGeometry2D*>(pProjGeom); +	const CFanFlatVecProjectionGeometry2D* fanVecProjGeom = dynamic_cast<const CFanFlatVecProjectionGeometry2D*>(pProjGeom); + +	bool ok = false; + +	if (parProjGeom) { +		ok = convertAstraGeometry(pVolGeom, parProjGeom, pParProjs, outputScale); +	} else if (parVecProjGeom) { +		ok = convertAstraGeometry(pVolGeom, parVecProjGeom, pParProjs, outputScale); +	} else if (fanProjGeom) { +		ok = convertAstraGeometry(pVolGeom, fanProjGeom, pFanProjs, outputScale); +	} else if (fanVecProjGeom) { +		ok = convertAstraGeometry(pVolGeom, fanVecProjGeom, pFanProjs, outputScale); +	} else { +		ok = false; +	} + +	return ok; +} + +bool convertAstraGeometry_dims(const CVolumeGeometry2D* pVolGeom, +                               const CProjectionGeometry2D* pProjGeom, +                               SDimensions& dims) +{ +	dims.iVolWidth = pVolGeom->getGridColCount(); +	dims.iVolHeight = pVolGeom->getGridRowCount(); + +	dims.iProjAngles = pProjGeom->getProjectionAngleCount(); +	dims.iProjDets = pProjGeom->getDetectorCount(); + +	dims.iRaysPerDet = 1; +	dims.iRaysPerPixelDim = 1; + +	return true; +} + +  } diff --git a/cuda/2d/astra.h b/cuda/2d/astra.h index e4cefac..9355cb4 100644 --- a/cuda/2d/astra.h +++ b/cuda/2d/astra.h @@ -41,140 +41,12 @@ enum Cuda2DProjectionKernel {  };  class CParallelProjectionGeometry2D; +class CParallelVecProjectionGeometry2D;  class CFanFlatProjectionGeometry2D;  class CFanFlatVecProjectionGeometry2D;  class CVolumeGeometry2D; +class CProjectionGeometry2D; -class AstraFBP_internal; - -class _AstraExport AstraFBP { -public: -	// Constructor -	AstraFBP(); - -	// Destructor -	~AstraFBP(); - -	// Set the size of the reconstruction rectangle. -	// Volume pixels are currently assumed to be 1x1 squares. -	bool setReconstructionGeometry(unsigned int iVolWidth, -	                               unsigned int iVolHeight, -	                               float fPixelSize = 1.0f); - -	// Set the projection angles and number of detector pixels per angle. -	// pfAngles must be a float array of length iProjAngles. -	// fDetSize indicates the size of a detector pixel compared to a -	// volume pixel edge. -	// -	// pfAngles will only be read from during this call. -	bool setProjectionGeometry(unsigned int iProjAngles, -	                           unsigned int iProjDets, -	                           const float *pfAngles, -	                           float fDetSize = 1.0f); -	// Set the projection angles and number of detector pixels per angle. -	// pfAngles must be a float array of length iProjAngles. -	// fDetSize indicates the size of a detector pixel compared to a -	// volume pixel edge. -	// -	// pfAngles, fanProjs will only be read from during this call. -	bool setFanGeometry(unsigned int iProjAngles, -	                    unsigned int iProjDets, -	                    const astraCUDA::SFanProjection *fanProjs, -	                    const float *pfAngles, -	                    float fOriginSourceDistance, -	                    float fOriginDetectorDistance, -	                    float fDetSize = 1.0f, -	                    bool bShortScan = false); - -	// Set linear supersampling factor for the BP. -	// (The number of rays is the square of this) -	// -	// This may optionally be called before init(). -	bool setPixelSuperSampling(unsigned int iPixelSuperSampling); - -	// Set per-detector shifts. -	// -	// pfTOffsets will only be read from during this call. -	bool setTOffsets(const float *pfTOffsets); - -	// Returns the required size of a filter in the fourier domain -	// when multiplying it with the fft of the projection data. -	// Its value is equal to the smallest power of two larger than -	// or equal to twice the number of detectors in the spatial domain. -	// -	// _iDetectorCount is the number of detectors in the spatial domain. -	static int calcFourierFilterSize(int _iDetectorCount); - -	// Sets the filter type. Some filter types require the user to supply an -	// array containing the filter. -	// The number of elements in a filter in the fourier domain should be equal -	// to the value returned by calcFourierFilterSize(). -	// The following types require a filter: -	// -	// - FILTER_PROJECTION: -	// The filter size should be equal to the output of -	// calcFourierFilterSize(). The filtered sinogram is -	// multiplied with the supplied filter. -	// -	// - FILTER_SINOGRAM: -	// Same as FILTER_PROJECTION, but now the filter should contain a row for -	// every projection direction. -	// -	// - FILTER_RPROJECTION: -	// The filter should now contain one kernel (= ifft of filter), with the -	// peak in the center. The filter width -	// can be any value. If odd, the peak is assumed to be in the center, if -	// even, it is assumed to be at floor(filter-width/2). -	// -	// - FILTER_RSINOGRAM -	// Same as FILTER_RPROJECTION, but now the supplied filter should contain a -	// row for every projection direction. -	// -	// A large number of other filters (FILTER_RAMLAK, FILTER_SHEPPLOGAN, -	// FILTER_COSINE, FILTER_HAMMING, and FILTER_HANN) -	// have a D variable, which gives the cutoff point in the frequency domain. -	// Setting this value to 1.0 will include the whole filter -	bool setFilter(E_FBPFILTER _eFilter, -                   const float * _pfHostFilter = NULL, -                   int _iFilterWidth = 0, float _fD = 1.0f, float _fFilterParameter = -1.0f); - -	// Initialize CUDA, allocate GPU buffers and -	// precompute geometry-specific data. -	// -	// CUDA is set up to use GPU number iGPUIndex. -	// -	// This must be called after calling setReconstructionGeometry() and -	// setProjectionGeometry(). -	bool init(int iGPUIndex = 0); - -	// Setup input sinogram for a slice. -	// pfSinogram must be a float array of size iProjAngles*iSinogramPitch. -	// NB: iSinogramPitch is measured in floats, not in bytes. -	// -	// This must be called after init(), and before iterate(). It may be -	// called again after iterate()/getReconstruction() to start a new slice. -	// -	// pfSinogram will only be read from during this call. -	bool setSinogram(const float* pfSinogram, unsigned int iSinogramPitch); - -	// Runs an FBP reconstruction. -	// This must be called after setSinogram(). -	// -	// run can be called before setFilter, but will then use the default Ram-Lak filter -	bool run(); - -	// Get the reconstructed slice. -	// pfReconstruction must be a float array of size -	// iVolHeight*iReconstructionPitch. -	// NB: iReconstructionPitch is measured in floats, not in bytes. -	// -	// This may be called after run(). -	bool getReconstruction(float* pfReconstruction, -	                       unsigned int iReconstructionPitch) const; - -private: -	AstraFBP_internal* pData; -};  class _AstraExport BPalgo : public astraCUDA::ReconAlgo {  public: @@ -197,8 +69,8 @@ public:  _AstraExport bool astraCudaFP(const float* pfVolume, float* pfSinogram,                   unsigned int iVolWidth, unsigned int iVolHeight,                   unsigned int iProjAngles, unsigned int iProjDets, -                 const float *pfAngles, const float *pfOffsets, -                 float fDetSize = 1.0f, unsigned int iDetSuperSampling = 1, +                 const SParProjection *pAngles, +                 unsigned int iDetSuperSampling = 1,                   float fOutputScale = 1.0f, int iGPUIndex = 0);  _AstraExport bool astraCudaFanFP(const float* pfVolume, float* pfSinogram, @@ -211,8 +83,14 @@ _AstraExport bool astraCudaFanFP(const float* pfVolume, float* pfSinogram,  _AstraExport bool convertAstraGeometry(const CVolumeGeometry2D* pVolGeom,                      const CParallelProjectionGeometry2D* pProjGeom, -                    float*& pfDetectorOffsets, float*& pfProjectionAngles, -                    float& fDetSize, float& fOutputScale); +                    astraCUDA::SParProjection*& pProjs, +                    float& fOutputScale); + +_AstraExport bool convertAstraGeometry(const CVolumeGeometry2D* pVolGeom, +                    const CParallelVecProjectionGeometry2D* pProjGeom, +                    astraCUDA::SParProjection*& pProjs, +                    float& fOutputScale); +  _AstraExport bool convertAstraGeometry(const CVolumeGeometry2D* pVolGeom,                      const CFanFlatProjectionGeometry2D* pProjGeom, @@ -224,6 +102,15 @@ _AstraExport bool convertAstraGeometry(const CVolumeGeometry2D* pVolGeom,                      astraCUDA::SFanProjection*& pProjs,                      float& outputScale); +_AstraExport bool convertAstraGeometry_dims(const CVolumeGeometry2D* pVolGeom, +                          const CProjectionGeometry2D* pProjGeom, +                          astraCUDA::SDimensions& dims); + +_AstraExport bool convertAstraGeometry(const CVolumeGeometry2D* pVolGeom, +                          const CProjectionGeometry2D* pProjGeom, +                          astraCUDA::SParProjection*& pParProjs, +                          astraCUDA::SFanProjection*& pFanProjs, +                          float& outputScale);  }  namespace astraCUDA { diff --git a/cuda/2d/cgls.cu b/cuda/2d/cgls.cu index 6962d81..1a31438 100644 --- a/cuda/2d/cgls.cu +++ b/cuda/2d/cgls.cu @@ -206,42 +206,6 @@ float CGLS::computeDiffNorm()  	return sqrt(s);  } -bool doCGLS(float* D_volumeData, unsigned int volumePitch, -            float* D_sinoData, unsigned int sinoPitch, -            const SDimensions& dims, /*const SAugmentedData& augs,*/ -            const float* angles, const float* TOffsets, unsigned int iterations) -{ -	CGLS cgls; -	bool ok = true; - -	ok &= cgls.setGeometry(dims, angles); -#if 0 -	if (D_maskData) -		ok &= cgls.enableVolumeMask(); -#endif -	if (TOffsets) -		ok &= cgls.setTOffsets(TOffsets); - -	if (!ok) -		return false; - -	ok = cgls.init(); -	if (!ok) -		return false; - -#if 0 -	if (D_maskData) -		ok &= cgls.setVolumeMask(D_maskData, maskPitch); -#endif - -	ok &= cgls.setBuffers(D_volumeData, volumePitch, D_sinoData, sinoPitch); -	if (!ok) -		return false; - -	ok = cgls.iterate(iterations); - -	return ok; -}  } diff --git a/cuda/2d/dims.h b/cuda/2d/dims.h index afb997a..10eaabf 100644 --- a/cuda/2d/dims.h +++ b/cuda/2d/dims.h @@ -33,9 +33,11 @@ along with the ASTRA Toolbox. If not, see <http://www.gnu.org/licenses/>.  namespace astraCUDA { +using astra::SParProjection;  using astra::SFanProjection; +  struct SDimensions {  	// Width, height of reconstruction volume  	unsigned int iVolWidth; @@ -47,9 +49,6 @@ struct SDimensions {  	// Number of detector pixels  	unsigned int iProjDets; -	// size of detector compared to volume pixels -	float fDetScale; -  	// in FP, number of rays to trace per detector pixel.  	// This should usually be set to 1.  	// If fDetScale > 1, this should be set to an integer of roughly diff --git a/cuda/2d/em.cu b/cuda/2d/em.cu index 11ed45b..03cc652 100644 --- a/cuda/2d/em.cu +++ b/cuda/2d/em.cu @@ -169,34 +169,6 @@ float EM::computeDiffNorm()  } -bool doEM(float* D_volumeData, unsigned int volumePitch, -          float* D_sinoData, unsigned int sinoPitch, -          const SDimensions& dims, const float* angles, -          const float* TOffsets, unsigned int iterations) -{ -	EM em; -	bool ok = true; - -	ok &= em.setGeometry(dims, angles); -	if (TOffsets) -		ok &= em.setTOffsets(TOffsets); - -	if (!ok) -		return false; - -	ok = em.init(); -	if (!ok) -		return false; - -	ok &= em.setBuffers(D_volumeData, volumePitch, D_sinoData, sinoPitch); -	if (!ok) -		return false; - -	ok = em.iterate(iterations); - -	return ok; -} -  }  #ifdef STANDALONE diff --git a/cuda/2d/fbp.cu b/cuda/2d/fbp.cu new file mode 100644 index 0000000..365fc88 --- /dev/null +++ b/cuda/2d/fbp.cu @@ -0,0 +1,347 @@ +/* +----------------------------------------------------------------------- +Copyright: 2010-2015, iMinds-Vision Lab, University of Antwerp +           2014-2015, CWI, Amsterdam + +Contact: astra@uantwerpen.be +Website: http://sf.net/projects/astra-toolbox + +This file is part of the ASTRA Toolbox. + + +The ASTRA Toolbox is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +The ASTRA Toolbox is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with the ASTRA Toolbox. If not, see <http://www.gnu.org/licenses/>. + +----------------------------------------------------------------------- +$Id$ +*/ + +#include "fbp.h" +#include "fft.h" +#include "par_bp.h" +#include "fan_bp.h" +#include "util.h" + +// For fan-beam preweighting +#include "../3d/fdk.h" + +#include "astra/Logging.h" + +#include <cuda.h> + +namespace astraCUDA { + + + +static int calcNextPowerOfTwo(int n) +{ +	int x = 1; +	while (x < n) +		x *= 2; + +	return x; +} + +// static +int FBP::calcFourierFilterSize(int _iDetectorCount) +{ +	int iFFTRealDetCount = calcNextPowerOfTwo(2 * _iDetectorCount); +	int iFreqBinCount = calcFFTFourierSize(iFFTRealDetCount); + +	// CHECKME: Matlab makes this at least 64. Do we also need to? +	return iFreqBinCount; +} + + + + +FBP::FBP() : ReconAlgo() +{ +	D_filter = 0; + +} + +FBP::~FBP() +{ +	reset(); +} + +void FBP::reset() +{ +	if (D_filter) { +		freeComplexOnDevice((cufftComplex *)D_filter); +		D_filter = 0; +	} +} + +bool FBP::init() +{ +	return true; +} + +bool FBP::setFilter(astra::E_FBPFILTER _eFilter, const float * _pfHostFilter /* = NULL */, int _iFilterWidth /* = 0 */, float _fD /* = 1.0f */, float _fFilterParameter /* = -1.0f */) +{ +	if (D_filter) +	{ +		freeComplexOnDevice((cufftComplex*)D_filter); +		D_filter = 0; +	} + +	if (_eFilter == astra::FILTER_NONE) +		return true; // leave D_filter set to 0 + + +	int iFFTRealDetCount = calcNextPowerOfTwo(2 * dims.iProjDets); +	int iFreqBinCount = calcFFTFourierSize(iFFTRealDetCount); + +	cufftComplex * pHostFilter = new cufftComplex[dims.iProjAngles * iFreqBinCount]; +	memset(pHostFilter, 0, sizeof(cufftComplex) * dims.iProjAngles * iFreqBinCount); + +	allocateComplexOnDevice(dims.iProjAngles, iFreqBinCount, (cufftComplex**)&D_filter); + +	switch(_eFilter) +	{ +		case astra::FILTER_NONE: +			// handled above +			break; +		case astra::FILTER_RAMLAK: +		case astra::FILTER_SHEPPLOGAN: +		case astra::FILTER_COSINE: +		case astra::FILTER_HAMMING: +		case astra::FILTER_HANN: +		case astra::FILTER_TUKEY: +		case astra::FILTER_LANCZOS: +		case astra::FILTER_TRIANGULAR: +		case astra::FILTER_GAUSSIAN: +		case astra::FILTER_BARTLETTHANN: +		case astra::FILTER_BLACKMAN: +		case astra::FILTER_NUTTALL: +		case astra::FILTER_BLACKMANHARRIS: +		case astra::FILTER_BLACKMANNUTTALL: +		case astra::FILTER_FLATTOP: +		case astra::FILTER_PARZEN: +		{ +			genFilter(_eFilter, _fD, dims.iProjAngles, pHostFilter, iFFTRealDetCount, iFreqBinCount, _fFilterParameter); +			uploadComplexArrayToDevice(dims.iProjAngles, iFreqBinCount, pHostFilter, (cufftComplex*)D_filter); + +			break; +		} +		case astra::FILTER_PROJECTION: +		{ +			// make sure the offered filter has the correct size +			assert(_iFilterWidth == iFreqBinCount); + +			for(int iFreqBinIndex = 0; iFreqBinIndex < iFreqBinCount; iFreqBinIndex++) +			{ +				float fValue = _pfHostFilter[iFreqBinIndex]; + +				for(int iProjectionIndex = 0; iProjectionIndex < (int)dims.iProjAngles; iProjectionIndex++) +				{ +					pHostFilter[iFreqBinIndex + iProjectionIndex * iFreqBinCount].x = fValue; +					pHostFilter[iFreqBinIndex + iProjectionIndex * iFreqBinCount].y = 0.0f; +				} +			} +			uploadComplexArrayToDevice(dims.iProjAngles, iFreqBinCount, pHostFilter, (cufftComplex*)D_filter); +			break; +		} +		case astra::FILTER_SINOGRAM: +		{ +			// make sure the offered filter has the correct size +			assert(_iFilterWidth == iFreqBinCount); + +			for(int iFreqBinIndex = 0; iFreqBinIndex < iFreqBinCount; iFreqBinIndex++) +			{ +				for(int iProjectionIndex = 0; iProjectionIndex < (int)dims.iProjAngles; iProjectionIndex++) +				{ +					float fValue = _pfHostFilter[iFreqBinIndex + iProjectionIndex * _iFilterWidth]; + +					pHostFilter[iFreqBinIndex + iProjectionIndex * iFreqBinCount].x = fValue; +					pHostFilter[iFreqBinIndex + iProjectionIndex * iFreqBinCount].y = 0.0f; +				} +			} +			uploadComplexArrayToDevice(dims.iProjAngles, iFreqBinCount, pHostFilter, (cufftComplex*)D_filter); +			break; +		} +		case astra::FILTER_RPROJECTION: +		{ +			int iProjectionCount = dims.iProjAngles; +			int iRealFilterElementCount = iProjectionCount * iFFTRealDetCount; +			float * pfHostRealFilter = new float[iRealFilterElementCount]; +			memset(pfHostRealFilter, 0, sizeof(float) * iRealFilterElementCount); + +			int iUsedFilterWidth = min(_iFilterWidth, iFFTRealDetCount); +			int iStartFilterIndex = (_iFilterWidth - iUsedFilterWidth) / 2; +			int iMaxFilterIndex = iStartFilterIndex + iUsedFilterWidth; + +			int iFilterShiftSize = _iFilterWidth / 2; + +			for(int iDetectorIndex = iStartFilterIndex; iDetectorIndex < iMaxFilterIndex; iDetectorIndex++) +			{ +				int iFFTInFilterIndex = (iDetectorIndex + iFFTRealDetCount - iFilterShiftSize) % iFFTRealDetCount; +				float fValue = _pfHostFilter[iDetectorIndex]; + +				for(int iProjectionIndex = 0; iProjectionIndex < (int)dims.iProjAngles; iProjectionIndex++) +				{ +					pfHostRealFilter[iFFTInFilterIndex + iProjectionIndex * iFFTRealDetCount] = fValue; +				} +			} + +			float* pfDevRealFilter = NULL; +			cudaMalloc((void **)&pfDevRealFilter, sizeof(float) * iRealFilterElementCount); // TODO: check for errors +			cudaMemcpy(pfDevRealFilter, pfHostRealFilter, sizeof(float) * iRealFilterElementCount, cudaMemcpyHostToDevice); +			delete[] pfHostRealFilter; + +			runCudaFFT(iProjectionCount, pfDevRealFilter, iFFTRealDetCount, iFFTRealDetCount, iFFTRealDetCount, iFreqBinCount, (cufftComplex*)D_filter); + +			cudaFree(pfDevRealFilter); + +			break; +		} +		case astra::FILTER_RSINOGRAM: +		{ +			int iProjectionCount = dims.iProjAngles; +			int iRealFilterElementCount = iProjectionCount * iFFTRealDetCount; +			float* pfHostRealFilter = new float[iRealFilterElementCount]; +			memset(pfHostRealFilter, 0, sizeof(float) * iRealFilterElementCount); + +			int iUsedFilterWidth = min(_iFilterWidth, iFFTRealDetCount); +			int iStartFilterIndex = (_iFilterWidth - iUsedFilterWidth) / 2; +			int iMaxFilterIndex = iStartFilterIndex + iUsedFilterWidth; + +			int iFilterShiftSize = _iFilterWidth / 2; + +			for(int iDetectorIndex = iStartFilterIndex; iDetectorIndex < iMaxFilterIndex; iDetectorIndex++) +			{ +				int iFFTInFilterIndex = (iDetectorIndex + iFFTRealDetCount - iFilterShiftSize) % iFFTRealDetCount; + +				for(int iProjectionIndex = 0; iProjectionIndex < (int)dims.iProjAngles; iProjectionIndex++) +				{ +					float fValue = _pfHostFilter[iDetectorIndex + iProjectionIndex * _iFilterWidth]; +					pfHostRealFilter[iFFTInFilterIndex + iProjectionIndex * iFFTRealDetCount] = fValue; +				} +			} + +			float* pfDevRealFilter = NULL; +			cudaMalloc((void **)&pfDevRealFilter, sizeof(float) * iRealFilterElementCount); // TODO: check for errors +			cudaMemcpy(pfDevRealFilter, pfHostRealFilter, sizeof(float) * iRealFilterElementCount, cudaMemcpyHostToDevice); +			delete[] pfHostRealFilter; + +			runCudaFFT(iProjectionCount, pfDevRealFilter, iFFTRealDetCount, iFFTRealDetCount, iFFTRealDetCount, iFreqBinCount, (cufftComplex*)D_filter); + +			cudaFree(pfDevRealFilter); + +			break; +		} +		default: +		{ +			ASTRA_ERROR("FBP::setFilter: Unknown filter type requested"); +			delete [] pHostFilter; +			return false; +		} +	} + +	delete [] pHostFilter; + +	return true; +} + +bool FBP::iterate(unsigned int iterations) +{ +	zeroVolumeData(D_volumeData, volumePitch, dims); + +	bool ok = false; + +	if (fanProjs) { +		// Call FDK_PreWeight to handle fan beam geometry. We treat +		// this as a cone beam setup of a single slice: + +		// TODO: TOffsets affects this preweighting... + +		// TODO: We take the fan parameters from the last projection here +		// without checking if they're the same in all projections + +		float *pfAngles = new float[dims.iProjAngles]; + +		float fOriginSource, fOriginDetector, fDetSize, fOffset; +		for (unsigned int i = 0; i < dims.iProjAngles; ++i) { +			bool ok = astra::getFanParameters(fanProjs[i], dims.iProjDets, +			                                  pfAngles[i], +			                                  fOriginSource, fOriginDetector, +			                                  fDetSize, fOffset); +			if (!ok) { +				ASTRA_ERROR("FBP_CUDA: Failed to extract circular fan beam parameters from fan beam geometry"); +				return false; +			} +		} + +		// We create a fake cudaPitchedPtr +		cudaPitchedPtr tmp; +		tmp.ptr = D_sinoData; +		tmp.pitch = sinoPitch * sizeof(float); +		tmp.xsize = dims.iProjDets; +		tmp.ysize = dims.iProjAngles; +		// and a fake Dimensions3D +		astraCUDA3d::SDimensions3D dims3d; +		dims3d.iVolX = dims.iVolWidth; +		dims3d.iVolY = dims.iVolHeight; +		dims3d.iVolZ = 1; +		dims3d.iProjAngles = dims.iProjAngles; +		dims3d.iProjU = dims.iProjDets; +		dims3d.iProjV = 1; + +		astraCUDA3d::FDK_PreWeight(tmp, fOriginSource, +		              fOriginDetector, 0.0f, +		              fDetSize, 1.0f, +		              m_bShortScan, dims3d, pfAngles); +	} else { +		// TODO: How should different detector pixel size in different +		// projections be handled? +	} + +	if (D_filter) { + +		int iFFTRealDetCount = calcNextPowerOfTwo(2 * dims.iProjDets); +		int iFFTFourDetCount = calcFFTFourierSize(iFFTRealDetCount); + +		cufftComplex * pDevComplexSinogram = NULL; + +		allocateComplexOnDevice(dims.iProjAngles, iFFTFourDetCount, &pDevComplexSinogram); + +		runCudaFFT(dims.iProjAngles, D_sinoData, sinoPitch, dims.iProjDets, iFFTRealDetCount, iFFTFourDetCount, pDevComplexSinogram); + +		applyFilter(dims.iProjAngles, iFFTFourDetCount, pDevComplexSinogram, (cufftComplex*)D_filter); + +		runCudaIFFT(dims.iProjAngles, pDevComplexSinogram, D_sinoData, sinoPitch, dims.iProjDets, iFFTRealDetCount, iFFTFourDetCount); + +		freeComplexOnDevice(pDevComplexSinogram); + +	} + +	float fOutputScale = (M_PI / 2.0f) / (float)dims.iProjAngles; + +	if (fanProjs) { +		ok = FanBP_FBPWeighted(D_volumeData, volumePitch, D_sinoData, sinoPitch, dims, fanProjs, fOutputScale); + +	} else { +		ok = BP(D_volumeData, volumePitch, D_sinoData, sinoPitch, dims, parProjs, fOutputScale); +	} +	if(!ok) +	{ +		return false; +	} + +	return true; +} + + +} diff --git a/cuda/2d/fbp.h b/cuda/2d/fbp.h new file mode 100644 index 0000000..8b4d64d --- /dev/null +++ b/cuda/2d/fbp.h @@ -0,0 +1,98 @@ +/* +----------------------------------------------------------------------- +Copyright: 2010-2015, iMinds-Vision Lab, University of Antwerp +           2014-2015, CWI, Amsterdam + +Contact: astra@uantwerpen.be +Website: http://sf.net/projects/astra-toolbox + +This file is part of the ASTRA Toolbox. + + +The ASTRA Toolbox is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +The ASTRA Toolbox is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with the ASTRA Toolbox. If not, see <http://www.gnu.org/licenses/>. + +----------------------------------------------------------------------- +$Id$ +*/ + +#include "algo.h" +#include "fbp_filters.h" + +namespace astraCUDA { + +class _AstraExport FBP : public ReconAlgo { +public: +	FBP(); +	~FBP(); + +	virtual bool useSinogramMask() { return false; } +	virtual bool useVolumeMask() { return false; } + +	// Returns the required size of a filter in the fourier domain +	// when multiplying it with the fft of the projection data. +	// Its value is equal to the smallest power of two larger than +	// or equal to twice the number of detectors in the spatial domain. +	// +	// _iDetectorCount is the number of detectors in the spatial domain. +	static int calcFourierFilterSize(int _iDetectorCount); + +	// Sets the filter type. Some filter types require the user to supply an +	// array containing the filter. +	// The number of elements in a filter in the fourier domain should be equal +	// to the value returned by calcFourierFilterSize(). +	// The following types require a filter: +	// +	// - FILTER_PROJECTION: +	// The filter size should be equal to the output of +	// calcFourierFilterSize(). The filtered sinogram is +	// multiplied with the supplied filter. +	// +	// - FILTER_SINOGRAM: +	// Same as FILTER_PROJECTION, but now the filter should contain a row for +	// every projection direction. +	// +	// - FILTER_RPROJECTION: +	// The filter should now contain one kernel (= ifft of filter), with the +	// peak in the center. The filter width +	// can be any value. If odd, the peak is assumed to be in the center, if +	// even, it is assumed to be at floor(filter-width/2). +	// +	// - FILTER_RSINOGRAM +	// Same as FILTER_RPROJECTION, but now the supplied filter should contain a +	// row for every projection direction. +	// +	// A large number of other filters (FILTER_RAMLAK, FILTER_SHEPPLOGAN, +	// FILTER_COSINE, FILTER_HAMMING, and FILTER_HANN) +	// have a D variable, which gives the cutoff point in the frequency domain. +	// Setting this value to 1.0 will include the whole filter +	bool setFilter(astra::E_FBPFILTER _eFilter, +                   const float * _pfHostFilter = NULL, +                   int _iFilterWidth = 0, float _fD = 1.0f, float _fFilterParameter = -1.0f); + +	bool setShortScan(bool ss) { m_bShortScan = ss; return true; } + +	virtual bool init(); + +	virtual bool iterate(unsigned int iterations); + +	virtual float computeDiffNorm() { return 0.0f; } // TODO + +protected: +	void reset(); + +	void* D_filter; // cufftComplex* +	bool m_bShortScan; +}; + +} diff --git a/cuda/2d/fbp_filters.h b/cuda/2d/fbp_filters.h index f1223c5..bf1342d 100644 --- a/cuda/2d/fbp_filters.h +++ b/cuda/2d/fbp_filters.h @@ -28,6 +28,8 @@ along with the ASTRA Toolbox. If not, see <http://www.gnu.org/licenses/>.  #ifndef FBP_FILTERS_H  #define FBP_FILTERS_H +namespace astra { +  enum E_FBPFILTER  {  	FILTER_NONE,			//< no filter (regular BP) @@ -54,4 +56,6 @@ enum E_FBPFILTER  	FILTER_RSINOGRAM,		//< sinogram filter in real space  }; +} +  #endif /* FBP_FILTERS_H */ diff --git a/cuda/2d/fft.cu b/cuda/2d/fft.cu index 654bbae..5500e14 100644 --- a/cuda/2d/fft.cu +++ b/cuda/2d/fft.cu @@ -63,6 +63,8 @@ using namespace astra;    } } while (0) +namespace astraCUDA { +  __global__ static void applyFilter_kernel(int _iProjectionCount,                                            int _iFreqBinCount,                                            cufftComplex * _pSinogram, @@ -275,11 +277,11 @@ bool runCudaIFFT(int _iProjectionCount, const cufftComplex* _pDevSourceComplex,  // Because the input is real, the Fourier transform is symmetric.  // CUFFT only outputs the first half (ignoring the redundant second half),  // and expects the same as input for the IFFT. -int calcFFTFourSize(int _iFFTRealSize) +int calcFFTFourierSize(int _iFFTRealSize)  { -	int iFFTFourSize = _iFFTRealSize / 2 + 1; +	int iFFTFourierSize = _iFFTRealSize / 2 + 1; -	return iFFTFourSize; +	return iFFTFourierSize;  }  void genIdenFilter(int _iProjectionCount, cufftComplex * _pFilter, @@ -694,6 +696,10 @@ void genFilter(E_FBPFILTER _eFilter, float _fD, int _iProjectionCount,  	delete[] pfW;  } + +} + +  #ifdef STANDALONE  __global__ static void doubleFourierOutput_kernel(int _iProjectionCount, diff --git a/cuda/2d/fft.h b/cuda/2d/fft.h index 91f05f2..3e91043 100644 --- a/cuda/2d/fft.h +++ b/cuda/2d/fft.h @@ -33,6 +33,8 @@ along with the ASTRA Toolbox. If not, see <http://www.gnu.org/licenses/>.  #include "fbp_filters.h" +namespace astraCUDA { +  bool allocateComplexOnDevice(int _iProjectionCount,                               int _iDetectorCount,                               cufftComplex ** _ppDevComplex); @@ -56,13 +58,15 @@ bool runCudaIFFT(int _iProjectionCount, const cufftComplex* _pDevSourceComplex,  void applyFilter(int _iProjectionCount, int _iFreqBinCount,                   cufftComplex * _pSinogram, cufftComplex * _pFilter); -int calcFFTFourSize(int _iFFTRealSize); +int calcFFTFourierSize(int _iFFTRealSize); -void genFilter(E_FBPFILTER _eFilter, float _fD, int _iProjectionCount, +void genFilter(astra::E_FBPFILTER _eFilter, float _fD, int _iProjectionCount,                 cufftComplex * _pFilter, int _iFFTRealDetectorCount,                 int _iFFTFourierDetectorCount, float _fParameter = -1.0f);  void genIdenFilter(int _iProjectionCount, cufftComplex * _pFilter,                     int _iFFTRealDetectorCount, int _iFFTFourierDetectorCount); +} +  #endif /* FFT_H */ diff --git a/cuda/2d/par_bp.cu b/cuda/2d/par_bp.cu index dfdd193..307c561 100644 --- a/cuda/2d/par_bp.cu +++ b/cuda/2d/par_bp.cu @@ -52,8 +52,8 @@ const unsigned int g_blockSlices = 16;  const unsigned int g_MaxAngles = 2560; -__constant__ float gC_angle_sin[g_MaxAngles]; -__constant__ float gC_angle_cos[g_MaxAngles]; +__constant__ float gC_angle_scaled_sin[g_MaxAngles]; +__constant__ float gC_angle_scaled_cos[g_MaxAngles];  __constant__ float gC_angle_offset[g_MaxAngles];  static bool bindProjDataTexture(float* data, unsigned int pitch, unsigned int width, unsigned int height, cudaTextureAddressMode mode = cudaAddressModeBorder) @@ -72,7 +72,7 @@ static bool bindProjDataTexture(float* data, unsigned int pitch, unsigned int wi  	return true;  } -__global__ void devBP(float* D_volData, unsigned int volPitch, unsigned int startAngle, bool offsets, const SDimensions dims, float fOutputScale) +__global__ void devBP(float* D_volData, unsigned int volPitch, unsigned int startAngle, const SDimensions dims, float fOutputScale)  {  	const int relX = threadIdx.x;  	const int relY = threadIdx.y; @@ -86,47 +86,30 @@ __global__ void devBP(float* D_volData, unsigned int volPitch, unsigned int star  	if (X >= dims.iVolWidth || Y >= dims.iVolHeight)  		return; -	const float fX = ( X - 0.5f*dims.iVolWidth + 0.5f ) / dims.fDetScale; -	const float fY = ( Y - 0.5f*dims.iVolHeight + 0.5f ) / dims.fDetScale; +	const float fX = ( X - 0.5f*dims.iVolWidth + 0.5f ); +	const float fY = ( Y - 0.5f*dims.iVolHeight + 0.5f );  	float* volData = (float*)D_volData;  	float fVal = 0.0f;  	float fA = startAngle + 0.5f; -	const float fT_base = 0.5f*dims.iProjDets - 0.5f + 0.5f; -	if (offsets) { - -		for (int angle = startAngle; angle < endAngle; ++angle) -		{ -			const float cos_theta = gC_angle_cos[angle]; -			const float sin_theta = gC_angle_sin[angle]; -			const float TOffset = gC_angle_offset[angle]; - -			const float fT = fT_base + fX * cos_theta - fY * sin_theta + TOffset; -			fVal += tex2D(gT_projTexture, fT, fA); -			fA += 1.0f; -		} - -	} else { - -		for (int angle = startAngle; angle < endAngle; ++angle) -		{ -			const float cos_theta = gC_angle_cos[angle]; -			const float sin_theta = gC_angle_sin[angle]; - -			const float fT = fT_base + fX * cos_theta - fY * sin_theta; -			fVal += tex2D(gT_projTexture, fT, fA); -			fA += 1.0f; -		} +	for (int angle = startAngle; angle < endAngle; ++angle) +	{ +		const float scaled_cos_theta = gC_angle_scaled_cos[angle]; +		const float scaled_sin_theta = gC_angle_scaled_sin[angle]; +		const float TOffset = gC_angle_offset[angle]; +		const float fT = fX * scaled_cos_theta - fY * scaled_sin_theta + TOffset; +		fVal += tex2D(gT_projTexture, fT, fA); +		fA += 1.0f;  	}  	volData[Y*volPitch+X] += fVal * fOutputScale;  }  // supersampling version -__global__ void devBP_SS(float* D_volData, unsigned int volPitch, unsigned int startAngle, bool offsets, const SDimensions dims, float fOutputScale) +__global__ void devBP_SS(float* D_volData, unsigned int volPitch, unsigned int startAngle, const SDimensions dims, float fOutputScale)  {  	const int relX = threadIdx.x;  	const int relY = threadIdx.y; @@ -140,61 +123,35 @@ __global__ void devBP_SS(float* D_volData, unsigned int volPitch, unsigned int s  	if (X >= dims.iVolWidth || Y >= dims.iVolHeight)  		return; -	const float fX = ( X - 0.5f*dims.iVolWidth + 0.5f - 0.5f + 0.5f/dims.iRaysPerPixelDim) / dims.fDetScale; -	const float fY = ( Y - 0.5f*dims.iVolHeight + 0.5f - 0.5f + 0.5f/dims.iRaysPerPixelDim) / dims.fDetScale; +	const float fX = ( X - 0.5f*dims.iVolWidth + 0.5f - 0.5f + 0.5f/dims.iRaysPerPixelDim); +	const float fY = ( Y - 0.5f*dims.iVolHeight + 0.5f - 0.5f + 0.5f/dims.iRaysPerPixelDim); -	const float fSubStep = 1.0f/(dims.iRaysPerPixelDim * dims.fDetScale); +	const float fSubStep = 1.0f/(dims.iRaysPerPixelDim); // * dims.fDetScale);  	float* volData = (float*)D_volData;  	float fVal = 0.0f;  	float fA = startAngle + 0.5f; -	const float fT_base = 0.5f*dims.iProjDets - 0.5f + 0.5f;  	fOutputScale /= (dims.iRaysPerPixelDim * dims.iRaysPerPixelDim); -	if (offsets) { - -		for (int angle = startAngle; angle < endAngle; ++angle) -		{ -			const float cos_theta = gC_angle_cos[angle]; -			const float sin_theta = gC_angle_sin[angle]; -			const float TOffset = gC_angle_offset[angle]; - -			float fT = fT_base + fX * cos_theta - fY * sin_theta + TOffset; - -			for (int iSubX = 0; iSubX < dims.iRaysPerPixelDim; ++iSubX) { -				float fTy = fT; -				fT += fSubStep * cos_theta; -				for (int iSubY = 0; iSubY < dims.iRaysPerPixelDim; ++iSubY) { -					fVal += tex2D(gT_projTexture, fTy, fA); -					fTy -= fSubStep * sin_theta; -				} -			} -			fA += 1.0f; -		} - -	} else { +	for (int angle = startAngle; angle < endAngle; ++angle) +	{ +		const float cos_theta = gC_angle_scaled_cos[angle]; +		const float sin_theta = gC_angle_scaled_sin[angle]; +		const float TOffset = gC_angle_offset[angle]; -		for (int angle = startAngle; angle < endAngle; ++angle) -		{ -			const float cos_theta = gC_angle_cos[angle]; -			const float sin_theta = gC_angle_sin[angle]; +		float fT = fX * cos_theta - fY * sin_theta + TOffset; -			float fT = fT_base + fX * cos_theta - fY * sin_theta; - -			for (int iSubX = 0; iSubX < dims.iRaysPerPixelDim; ++iSubX) { -				float fTy = fT; -				fT += fSubStep * cos_theta; -				for (int iSubY = 0; iSubY < dims.iRaysPerPixelDim; ++iSubY) { -					fVal += tex2D(gT_projTexture, fTy, fA); -					fTy -= fSubStep * sin_theta; -				} +		for (int iSubX = 0; iSubX < dims.iRaysPerPixelDim; ++iSubX) { +			float fTy = fT; +			fT += fSubStep * cos_theta; +			for (int iSubY = 0; iSubY < dims.iRaysPerPixelDim; ++iSubY) { +				fVal += tex2D(gT_projTexture, fTy, fA); +				fTy -= fSubStep * sin_theta;  			} -			fA += 1.0f; -  		} - +		fA += 1.0f;  	}  	volData[Y*volPitch+X] += fVal * fOutputScale; @@ -211,12 +168,10 @@ __global__ void devBP_SART(float* D_volData, unsigned int volPitch, float offset  	if (X >= dims.iVolWidth || Y >= dims.iVolHeight)  		return; -	const float fX = ( X - 0.5f*dims.iVolWidth + 0.5f ) / dims.fDetScale; -	const float fY = ( Y - 0.5f*dims.iVolHeight + 0.5f ) / dims.fDetScale; - -	const float fT_base = 0.5f*dims.iProjDets - 0.5f + 0.5f; +	const float fX = ( X - 0.5f*dims.iVolWidth + 0.5f ); +	const float fY = ( Y - 0.5f*dims.iVolHeight + 0.5f ); -	const float fT = fT_base + fX * angle_cos - fY * angle_sin + offset; +	const float fT = fX * angle_cos - fY * angle_sin + offset;  	const float fVal = tex2D(gT_projTexture, fT, 0.5f);  	D_volData[Y*volPitch+X] += fVal * fOutputScale; @@ -225,32 +180,35 @@ __global__ void devBP_SART(float* D_volData, unsigned int volPitch, float offset  bool BP_internal(float* D_volumeData, unsigned int volumePitch,          float* D_projData, unsigned int projPitch, -        const SDimensions& dims, const float* angles, const float* TOffsets, float fOutputScale) +        const SDimensions& dims, const SParProjection* angles, +        float fOutputScale)  { -	// TODO: process angles block by block  	assert(dims.iProjAngles <= g_MaxAngles); -	float* angle_sin = new float[dims.iProjAngles]; -	float* angle_cos = new float[dims.iProjAngles]; +	float* angle_scaled_sin = new float[dims.iProjAngles]; +	float* angle_scaled_cos = new float[dims.iProjAngles]; +	float* angle_offset = new float[dims.iProjAngles];  	bindProjDataTexture(D_projData, projPitch, dims.iProjDets, dims.iProjAngles);  	for (unsigned int i = 0; i < dims.iProjAngles; ++i) { -		angle_sin[i] = sinf(angles[i]); -		angle_cos[i] = cosf(angles[i]); +		double d = angles[i].fDetUX * angles[i].fRayY - angles[i].fDetUY * angles[i].fRayX; +		angle_scaled_cos[i] = angles[i].fRayY / d; +		angle_scaled_sin[i] = -angles[i].fRayX / d; // TODO: Check signs +		angle_offset[i] = (angles[i].fDetSY * angles[i].fRayX - angles[i].fDetSX * angles[i].fRayY) / d;  	} -	cudaError_t e1 = cudaMemcpyToSymbol(gC_angle_sin, angle_sin, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice); -	cudaError_t e2 = cudaMemcpyToSymbol(gC_angle_cos, angle_cos, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice); + +	cudaError_t e1 = cudaMemcpyToSymbol(gC_angle_scaled_sin, angle_scaled_sin, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice); +	cudaError_t e2 = cudaMemcpyToSymbol(gC_angle_scaled_cos, angle_scaled_cos, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice); +	cudaError_t e3 = cudaMemcpyToSymbol(gC_angle_offset, angle_offset, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice);  	assert(e1 == cudaSuccess);  	assert(e2 == cudaSuccess); +	assert(e3 == cudaSuccess); -	if (TOffsets) { -		cudaError_t e3 = cudaMemcpyToSymbol(gC_angle_offset, TOffsets, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice); -		assert(e3 == cudaSuccess); -	} -	delete[] angle_sin; -	delete[] angle_cos; +	delete[] angle_scaled_sin; +	delete[] angle_scaled_cos; +	delete[] angle_offset;  	dim3 dimBlock(g_blockSlices, g_blockSliceSize);  	dim3 dimGrid((dims.iVolWidth+g_blockSlices-1)/g_blockSlices, @@ -262,9 +220,9 @@ bool BP_internal(float* D_volumeData, unsigned int volumePitch,  	for (unsigned int i = 0; i < dims.iProjAngles; i += g_anglesPerBlock) {  		if (dims.iRaysPerPixelDim > 1) -			devBP_SS<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, (TOffsets != 0), dims, fOutputScale); +			devBP_SS<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, dims, fOutputScale);  		else -			devBP<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, (TOffsets != 0), dims, fOutputScale); +			devBP<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, dims, fOutputScale);  	}  	cudaThreadSynchronize(); @@ -277,7 +235,7 @@ bool BP_internal(float* D_volumeData, unsigned int volumePitch,  bool BP(float* D_volumeData, unsigned int volumePitch,          float* D_projData, unsigned int projPitch, -        const SDimensions& dims, const float* angles, const float* TOffsets, float fOutputScale) +        const SDimensions& dims, const SParProjection* angles, float fOutputScale)  {  	for (unsigned int iAngle = 0; iAngle < dims.iProjAngles; iAngle += g_MaxAngles) {  		SDimensions subdims = dims; @@ -289,9 +247,7 @@ bool BP(float* D_volumeData, unsigned int volumePitch,  		bool ret;  		ret = BP_internal(D_volumeData, volumePitch,  		                  D_projData + iAngle * projPitch, projPitch, -		                  subdims, angles + iAngle, -		                  TOffsets ? TOffsets + iAngle : 0, -		                  fOutputScale); +		                  subdims, angles + iAngle, fOutputScale);  		if (!ret)  			return false;  	} @@ -302,25 +258,23 @@ bool BP(float* D_volumeData, unsigned int volumePitch,  bool BP_SART(float* D_volumeData, unsigned int volumePitch,               float* D_projData, unsigned int projPitch,               unsigned int angle, const SDimensions& dims, -             const float* angles, const float* TOffsets, float fOutputScale) +             const SParProjection* angles, float fOutputScale)  {  	// Only one angle.  	// We need to Clamp to the border pixels instead of to zero, because  	// SART weights with ray length.  	bindProjDataTexture(D_projData, projPitch, dims.iProjDets, 1, cudaAddressModeClamp); -	float angle_sin = sinf(angles[angle]); -	float angle_cos = cosf(angles[angle]); - -	float offset = 0.0f; -	if (TOffsets) -		offset = TOffsets[angle]; +	double d = angles[angle].fDetUX * angles[angle].fRayY - angles[angle].fDetUY * angles[angle].fRayX; +	float angle_scaled_cos = angles[angle].fRayY / d; +	float angle_scaled_sin = -angles[angle].fRayX / d; // TODO: Check signs +	float angle_offset = (angles[angle].fDetSY * angles[angle].fRayX - angles[angle].fDetSX * angles[angle].fRayY) / d;  	dim3 dimBlock(g_blockSlices, g_blockSliceSize);  	dim3 dimGrid((dims.iVolWidth+g_blockSlices-1)/g_blockSlices,  	             (dims.iVolHeight+g_blockSliceSize-1)/g_blockSliceSize); -	devBP_SART<<<dimGrid, dimBlock>>>(D_volumeData, volumePitch, offset, angle_sin, angle_cos, dims, fOutputScale); +	devBP_SART<<<dimGrid, dimBlock>>>(D_volumeData, volumePitch, angle_offset, angle_scaled_sin, angle_scaled_cos, dims, fOutputScale);  	cudaThreadSynchronize();  	cudaTextForceKernelsCompletion(); diff --git a/cuda/2d/par_bp.h b/cuda/2d/par_bp.h index 0720030..3fa8a28 100644 --- a/cuda/2d/par_bp.h +++ b/cuda/2d/par_bp.h @@ -34,13 +34,13 @@ namespace astraCUDA {  _AstraExport bool BP(float* D_volumeData, unsigned int volumePitch,          float* D_projData, unsigned int projPitch, -        const SDimensions& dims, const float* angles, -        const float* TOffsets, float fOutputScale); +        const SDimensions& dims, const SParProjection* angles, +        float fOutputScale);  _AstraExport bool BP_SART(float* D_volumeData, unsigned int volumePitch,               float* D_projData, unsigned int projPitch,               unsigned int angle, const SDimensions& dims, -             const float* angles, const float* TOffsets, float fOutputScale); +             const SParProjection* angles, float fOutputScale);  } diff --git a/cuda/2d/par_fp.cu b/cuda/2d/par_fp.cu index f58a643..a0b04ee 100644 --- a/cuda/2d/par_fp.cu +++ b/cuda/2d/par_fp.cu @@ -29,6 +29,7 @@ along with the ASTRA Toolbox. If not, see <http://www.gnu.org/licenses/>.  #include <cassert>  #include <iostream>  #include <list> +#include <cmath>  #include "util.h"  #include "arith.h" @@ -50,6 +51,7 @@ namespace astraCUDA {  static const unsigned g_MaxAngles = 2560;  __constant__ float gC_angle[g_MaxAngles];  __constant__ float gC_angle_offset[g_MaxAngles]; +__constant__ float gC_angle_detsize[g_MaxAngles];  // optimization parameters @@ -62,10 +64,6 @@ static const unsigned int g_blockSlices = 64;  #define iPREC_FACTOR 16 -// if necessary, a buffer of zeroes of size g_MaxAngles -static float* g_pfZeroes = 0; - -  static bool bindVolumeDataTexture(float* data, cudaArray*& dataArray, unsigned int pitch, unsigned int width, unsigned int height)  {  	cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>(); @@ -88,9 +86,10 @@ static bool bindVolumeDataTexture(float* data, cudaArray*& dataArray, unsigned i  	return true;  } +  // projection for angles that are roughly horizontal -// theta between 45 and 135 degrees -__global__ void FPhorizontal(float* D_projData, unsigned int projPitch, unsigned int startSlice, unsigned int startAngle, unsigned int endAngle, int regionOffset, const SDimensions dims, float outputScale) +// (detector roughly vertical) +__global__ void FPhorizontal_simple(float* D_projData, unsigned int projPitch, unsigned int startSlice, unsigned int startAngle, unsigned int endAngle, const SDimensions dims, float outputScale)  {  	const int relDet = threadIdx.x;  	const int relAngle = threadIdx.y; @@ -105,33 +104,7 @@ __global__ void FPhorizontal(float* D_projData, unsigned int projPitch, unsigned  	const float sin_theta = __sinf(theta);  	// compute start detector for this block/angle: -	// (The same for all threadIdx.x) -	// ------------------------------------- - -	const int midSlice = startSlice + g_blockSlices / 2; - -	// ASSUMPTION: fDetScale >= 1.0f -	// problem: detector regions get skipped because slice blocks aren't large -	// enough -	const unsigned int g_blockSliceSize = g_detBlockSize; - -	// project (midSlice,midRegion) on this thread's detector - -	const float fBase = 0.5f*dims.iProjDets - 0.5f + -		( -		    (midSlice - 0.5f*dims.iVolWidth + 0.5f) * cos_theta -		  - (g_blockSliceSize/2 - 0.5f*dims.iVolHeight + 0.5f) * sin_theta -		  + gC_angle_offset[angle] -		) / dims.fDetScale; -	int iBase = (int)floorf(fBase * fPREC_FACTOR); -	int iInc = (int)floorf(g_blockSliceSize * sin_theta / dims.fDetScale * -fPREC_FACTOR); - -	// ASSUMPTION: 16 > regionOffset / fDetScale -	const int detRegion = (iBase + (blockIdx.y - regionOffset)*iInc + 16*iPREC_FACTOR*g_detBlockSize) / (iPREC_FACTOR * g_detBlockSize) - 16; -	const int detPrevRegion = (iBase + (blockIdx.y - regionOffset - 1)*iInc + 16*iPREC_FACTOR*g_detBlockSize) / (iPREC_FACTOR * g_detBlockSize) - 16; - -	if (blockIdx.y > 0 && detRegion == detPrevRegion) -		return; +	const int detRegion = blockIdx.y;  	const int detector = detRegion * g_detBlockSize + relDet; @@ -141,7 +114,7 @@ __global__ void FPhorizontal(float* D_projData, unsigned int projPitch, unsigned  	if (detector < 0 || detector >= dims.iProjDets)  		return; -	const float fDetStep = -dims.fDetScale / sin_theta; +	const float fDetStep = -gC_angle_detsize[angle] / sin_theta;  	float fSliceStep = cos_theta / sin_theta;  	float fDistCorr;  	if (sin_theta > 0.0f) @@ -191,9 +164,10 @@ __global__ void FPhorizontal(float* D_projData, unsigned int projPitch, unsigned  	D_projData[angle*projPitch+detector] += fVal * fDistCorr;  } +  // projection for angles that are roughly vertical -// theta between 0 and 45, or 135 and 180 degrees -__global__ void FPvertical(float* D_projData, unsigned int projPitch, unsigned int startSlice, unsigned int startAngle, unsigned int endAngle, int regionOffset, const SDimensions dims, float outputScale) +// (detector roughly horizontal) +__global__ void FPvertical_simple(float* D_projData, unsigned int projPitch, unsigned int startSlice, unsigned int startAngle, unsigned int endAngle, const SDimensions dims, float outputScale)  {  	const int relDet = threadIdx.x;  	const int relAngle = threadIdx.y; @@ -208,33 +182,7 @@ __global__ void FPvertical(float* D_projData, unsigned int projPitch, unsigned i  	const float sin_theta = __sinf(theta);  	// compute start detector for this block/angle: -	// (The same for all threadIdx.x) -	// ------------------------------------- - -	const int midSlice = startSlice + g_blockSlices / 2; - -	// project (midSlice,midRegion) on this thread's detector - -	// ASSUMPTION: fDetScale >= 1.0f -	// problem: detector regions get skipped because slice blocks aren't large -	// enough -	const unsigned int g_blockSliceSize = g_detBlockSize; - -	const float fBase = 0.5f*dims.iProjDets - 0.5f + -		( -		    (g_blockSliceSize/2 - 0.5f*dims.iVolWidth + 0.5f) * cos_theta -		  - (midSlice - 0.5f*dims.iVolHeight + 0.5f) * sin_theta -		  + gC_angle_offset[angle] -		) / dims.fDetScale; -	int iBase = (int)floorf(fBase * fPREC_FACTOR); -	int iInc = (int)floorf(g_blockSliceSize * cos_theta / dims.fDetScale * fPREC_FACTOR); - -	// ASSUMPTION: 16 > regionOffset / fDetScale -	const int detRegion = (iBase + (blockIdx.y - regionOffset)*iInc + 16*iPREC_FACTOR*g_detBlockSize) / (iPREC_FACTOR * g_detBlockSize) - 16; -	const int detPrevRegion = (iBase + (blockIdx.y - regionOffset-1)*iInc + 16*iPREC_FACTOR*g_detBlockSize) / (iPREC_FACTOR * g_detBlockSize) - 16; - -	if (blockIdx.y > 0 && detRegion == detPrevRegion) -		return; +	const int detRegion = blockIdx.y;  	const int detector = detRegion * g_detBlockSize + relDet; @@ -244,7 +192,7 @@ __global__ void FPvertical(float* D_projData, unsigned int projPitch, unsigned i  	if (detector < 0 || detector >= dims.iProjDets)  		return; -	const float fDetStep = dims.fDetScale / cos_theta; +	const float fDetStep = gC_angle_detsize[angle] / cos_theta;  	float fSliceStep = sin_theta / cos_theta;  	float fDistCorr;  	if (cos_theta < 0.0f) @@ -292,183 +240,45 @@ __global__ void FPvertical(float* D_projData, unsigned int projPitch, unsigned i  	D_projData[angle*projPitch+detector] += fVal * fDistCorr;  } -// projection for angles that are roughly horizontal -// (detector roughly vertical) -__global__ void FPhorizontal_simple(float* D_projData, unsigned int projPitch, unsigned int startSlice, unsigned int startAngle, unsigned int endAngle, const SDimensions dims, float outputScale) -{ -	const int relDet = threadIdx.x; -	const int relAngle = threadIdx.y; - -	int angle = startAngle + blockIdx.x * g_anglesPerBlock + relAngle; - -	if (angle >= endAngle) -		return; -	const float theta = gC_angle[angle]; -	const float cos_theta = __cosf(theta); -	const float sin_theta = __sinf(theta); -	// compute start detector for this block/angle: -	const int detRegion = blockIdx.y; - -	const int detector = detRegion * g_detBlockSize + relDet; - -	// Now project the part of the ray to angle,detector through -	// slices startSlice to startSlice+g_blockSlices-1 -	if (detector < 0 || detector >= dims.iProjDets) -		return; +// Coordinates of center of detector pixel number t: +// x = (t - 0.5*nDets + 0.5 - fOffset) * fSize * cos(fAngle) +// y = - (t - 0.5*nDets + 0.5 - fOffset) * fSize * sin(fAngle) -	const float fDetStep = -dims.fDetScale / sin_theta; -	float fSliceStep = cos_theta / sin_theta; -	float fDistCorr; -	if (sin_theta > 0.0f) -		fDistCorr = -fDetStep; -	else -		fDistCorr = fDetStep; -	fDistCorr *= outputScale; -	float fVal = 0.0f; -	// project detector on slice -	float fP = (detector - 0.5f*dims.iProjDets + 0.5f - gC_angle_offset[angle]) * fDetStep + (startSlice - 0.5f*dims.iVolWidth + 0.5f) * fSliceStep + 0.5f*dims.iVolHeight - 0.5f + 0.5f; -	float fS = startSlice + 0.5f; -	int endSlice = startSlice + g_blockSlices; -	if (endSlice > dims.iVolWidth) -		endSlice = dims.iVolWidth; - -	if (dims.iRaysPerDet > 1) { - -		fP += (-0.5f*dims.iRaysPerDet + 0.5f)/dims.iRaysPerDet * fDetStep; -		const float fSubDetStep = fDetStep / dims.iRaysPerDet; -		fDistCorr /= dims.iRaysPerDet; - -		fSliceStep -= dims.iRaysPerDet * fSubDetStep; - -		for (int slice = startSlice; slice < endSlice; ++slice) -		{ -			for (int iSubT = 0; iSubT < dims.iRaysPerDet; ++iSubT) { -				fVal += tex2D(gT_volumeTexture, fS, fP); -				fP += fSubDetStep; -			} -			fP += fSliceStep; -			fS += 1.0f; -		} - -	} else { - -		for (int slice = startSlice; slice < endSlice; ++slice) -		{ -			fVal += tex2D(gT_volumeTexture, fS, fP); -			fP += fSliceStep; -			fS += 1.0f; -		} - - -	} - -	D_projData[angle*projPitch+detector] += fVal * fDistCorr; -} - - -// projection for angles that are roughly vertical -// (detector roughly horizontal) -__global__ void FPvertical_simple(float* D_projData, unsigned int projPitch, unsigned int startSlice, unsigned int startAngle, unsigned int endAngle, const SDimensions dims, float outputScale) +static void convertAndUploadAngles(const SParProjection *projs, unsigned int nth, unsigned int ndets)  { -	const int relDet = threadIdx.x; -	const int relAngle = threadIdx.y; - -	int angle = startAngle + blockIdx.x * g_anglesPerBlock + relAngle; - -	if (angle >= endAngle) -		return; - -	const float theta = gC_angle[angle]; -	const float cos_theta = __cosf(theta); -	const float sin_theta = __sinf(theta); - -	// compute start detector for this block/angle: -	const int detRegion = blockIdx.y; - -	const int detector = detRegion * g_detBlockSize + relDet; - -	// Now project the part of the ray to angle,detector through -	// slices startSlice to startSlice+g_blockSlices-1 - -	if (detector < 0 || detector >= dims.iProjDets) -		return; - -	const float fDetStep = dims.fDetScale / cos_theta; -	float fSliceStep = sin_theta / cos_theta; -	float fDistCorr; -	if (cos_theta < 0.0f) -		fDistCorr = -fDetStep; -	else -		fDistCorr = fDetStep; -	fDistCorr *= outputScale; - -	float fVal = 0.0f; -	float fP = (detector - 0.5f*dims.iProjDets + 0.5f - gC_angle_offset[angle]) * fDetStep + (startSlice - 0.5f*dims.iVolHeight + 0.5f) * fSliceStep + 0.5f*dims.iVolWidth - 0.5f + 0.5f; -	float fS = startSlice+0.5f; -	int endSlice = startSlice + g_blockSlices; -	if (endSlice > dims.iVolHeight) -		endSlice = dims.iVolHeight; - -	if (dims.iRaysPerDet > 1) { - -		fP += (-0.5f*dims.iRaysPerDet + 0.5f)/dims.iRaysPerDet * fDetStep; -		const float fSubDetStep = fDetStep / dims.iRaysPerDet; -		fDistCorr /= dims.iRaysPerDet; - -		fSliceStep -= dims.iRaysPerDet * fSubDetStep; - -		for (int slice = startSlice; slice < endSlice; ++slice) -		{ -			for (int iSubT = 0; iSubT < dims.iRaysPerDet; ++iSubT) { -				fVal += tex2D(gT_volumeTexture, fP, fS); -				fP += fSubDetStep; -			} -			fP += fSliceStep; -			fS += 1.0f; -		} - -	} else { +	float *angles = new float[nth]; +	float *offsets = new float[nth]; +	float *detsizes = new float[nth]; -		for (int slice = startSlice; slice < endSlice; ++slice) -		{ -			fVal += tex2D(gT_volumeTexture, fP, fS); -			fP += fSliceStep; -			fS += 1.0f; -		} - -	} +	for (int i = 0; i < nth; ++i) +		getParParameters(projs[i], ndets, angles[i], detsizes[i], offsets[i]); -	D_projData[angle*projPitch+detector] += fVal * fDistCorr; +	cudaMemcpyToSymbol(gC_angle, angles, nth*sizeof(float), 0, cudaMemcpyHostToDevice);  +	cudaMemcpyToSymbol(gC_angle_offset, offsets, nth*sizeof(float), 0, cudaMemcpyHostToDevice); +	cudaMemcpyToSymbol(gC_angle_detsize, detsizes, nth*sizeof(float), 0, cudaMemcpyHostToDevice);   }  bool FP_simple_internal(float* D_volumeData, unsigned int volumePitch,                 float* D_projData, unsigned int projPitch, -               const SDimensions& dims, const float* angles, -               const float* TOffsets, float outputScale) +               const SDimensions& dims, const SParProjection* angles, +               float outputScale)  { -	// TODO: load angles into constant memory in smaller blocks  	assert(dims.iProjAngles <= g_MaxAngles); +	assert(angles); +  	cudaArray* D_dataArray;  	bindVolumeDataTexture(D_volumeData, D_dataArray, volumePitch, dims.iVolWidth, dims.iVolHeight); -	cudaMemcpyToSymbol(gC_angle, angles, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice); -	if (TOffsets) { -		cudaMemcpyToSymbol(gC_angle_offset, TOffsets, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice); -	} else { -		if (!g_pfZeroes) { -			g_pfZeroes = new float[g_MaxAngles]; -			memset(g_pfZeroes, 0, g_MaxAngles * sizeof(float)); -		} -		cudaMemcpyToSymbol(gC_angle_offset, g_pfZeroes, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice); -	} +	convertAndUploadAngles(angles, dims.iProjAngles, dims.iProjDets); +  	dim3 dimBlock(g_detBlockSize, g_anglesPerBlock); // detector block size, angles @@ -491,7 +301,7 @@ bool FP_simple_internal(float* D_volumeData, unsigned int volumePitch,  		// Maybe we should detect corner cases and put them in the optimal  		// group of angles.  		if (a != dims.iProjAngles) -			vertical = (fabsf(sinf(angles[a])) <= fabsf(cosf(angles[a]))); +			vertical = (fabsf(angles[a].fRayX) <= fabsf(angles[a].fRayY));  		if (a == dims.iProjAngles || vertical != blockVertical) {  			// block done @@ -535,8 +345,8 @@ bool FP_simple_internal(float* D_volumeData, unsigned int volumePitch,  bool FP_simple(float* D_volumeData, unsigned int volumePitch,                 float* D_projData, unsigned int projPitch, -               const SDimensions& dims, const float* angles, -               const float* TOffsets, float outputScale) +               const SDimensions& dims, const SParProjection* angles, +               float outputScale)  {  	for (unsigned int iAngle = 0; iAngle < dims.iProjAngles; iAngle += g_MaxAngles) {  		SDimensions subdims = dims; @@ -549,7 +359,7 @@ bool FP_simple(float* D_volumeData, unsigned int volumePitch,  		ret = FP_simple_internal(D_volumeData, volumePitch,  		                         D_projData + iAngle * projPitch, projPitch,  		                         subdims, angles + iAngle, -		                         TOffsets ? TOffsets + iAngle : 0, outputScale); +		                         outputScale);  		if (!ret)  			return false;  	} @@ -558,106 +368,12 @@ bool FP_simple(float* D_volumeData, unsigned int volumePitch,  bool FP(float* D_volumeData, unsigned int volumePitch,          float* D_projData, unsigned int projPitch, -        const SDimensions& dims, const float* angles, -        const float* TOffsets, float outputScale) +        const SDimensions& dims, const SParProjection* angles, +        float outputScale)  {  	return FP_simple(D_volumeData, volumePitch, D_projData, projPitch, -	                 dims, angles, TOffsets, outputScale); +	                 dims, angles, outputScale); -	// TODO: Fix bug in this non-simple FP with large detscale and TOffsets -#if 0 - -	// TODO: load angles into constant memory in smaller blocks -	assert(dims.iProjAngles <= g_MaxAngles); - -	// TODO: compute region size dynamically to resolve these two assumptions -	// ASSUMPTION: 16 > regionOffset / fDetScale -	const unsigned int g_blockSliceSize = g_detBlockSize; -	assert(16 > (g_blockSlices / g_blockSliceSize) / dims.fDetScale); -	// ASSUMPTION: fDetScale >= 1.0f -	assert(dims.fDetScale > 0.9999f); - -	cudaArray* D_dataArray; -	bindVolumeDataTexture(D_volumeData, D_dataArray, volumePitch, dims.iVolWidth, dims.iVolHeight); - -	cudaMemcpyToSymbol(gC_angle, angles, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice); - -	if (TOffsets) { -		cudaMemcpyToSymbol(gC_angle_offset, TOffsets, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice); -	} else { -		if (!g_pfZeroes) { -			g_pfZeroes = new float[g_MaxAngles]; -			memset(g_pfZeroes, 0, g_MaxAngles * sizeof(float)); -		} -		cudaMemcpyToSymbol(gC_angle_offset, g_pfZeroes, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice); -	} - -	int regionOffset = g_blockSlices / g_blockSliceSize; - -	dim3 dimBlock(g_detBlockSize, g_anglesPerBlock); // region size, angles - -	std::list<cudaStream_t> streams; - - -	// Run over all angles, grouping them into groups of the same -	// orientation (roughly horizontal vs. roughly vertical). -	// Start a stream of grids for each such group. - -	// TODO: Check if it's worth it to store this info instead -	// of recomputing it every FP. - -	unsigned int blockStart = 0; -	unsigned int blockEnd = 0; -	bool blockVertical = false; -	for (unsigned int a = 0; a <= dims.iProjAngles; ++a) { -		bool vertical; -		// TODO: Having <= instead of < below causes a 5% speedup. -		// Maybe we should detect corner cases and put them in the optimal -		// group of angles. -		if (a != dims.iProjAngles) -			vertical = (fabsf(sinf(angles[a])) <= fabsf(cosf(angles[a]))); -		if (a == dims.iProjAngles || vertical != blockVertical) { -			// block done - -			blockEnd = a; -			if (blockStart != blockEnd) { -				unsigned int length = dims.iVolHeight; -				if (blockVertical) -					length = dims.iVolWidth; -				dim3 dimGrid((blockEnd-blockStart+g_anglesPerBlock-1)/g_anglesPerBlock, -				             (length+g_blockSliceSize-1)/g_blockSliceSize+2*regionOffset); // angle blocks, regions -				// TODO: check if we can't immediately -				//       destroy the stream after use -				cudaStream_t stream; -				cudaStreamCreate(&stream); -				streams.push_back(stream); -				//printf("angle block: %d to %d, %d\n", blockStart, blockEnd, blockVertical); -				if (!blockVertical) -					for (unsigned int i = 0; i < dims.iVolWidth; i += g_blockSlices) -						FPhorizontal<<<dimGrid, dimBlock, 0, stream>>>(D_projData, projPitch, i, blockStart, blockEnd, regionOffset, dims, outputScale); -				else -					for (unsigned int i = 0; i < dims.iVolHeight; i += g_blockSlices) -						FPvertical<<<dimGrid, dimBlock, 0, stream>>>(D_projData, projPitch, i, blockStart, blockEnd, regionOffset, dims, outputScale); -			} -			blockVertical = vertical; -			blockStart = a; -		} -	} - -	for (std::list<cudaStream_t>::iterator iter = streams.begin(); iter != streams.end(); ++iter) -		cudaStreamDestroy(*iter); - -	streams.clear(); - -	cudaThreadSynchronize(); - -	cudaTextForceKernelsCompletion(); - -	cudaFreeArray(D_dataArray); -		 - -	return true; -#endif  } diff --git a/cuda/2d/par_fp.h b/cuda/2d/par_fp.h index 4830c1e..1f0ff69 100644 --- a/cuda/2d/par_fp.h +++ b/cuda/2d/par_fp.h @@ -32,8 +32,8 @@ namespace astraCUDA {  _AstraExport bool FP(float* D_volumeData, unsigned int volumePitch,          float* D_projData, unsigned int projPitch, -        const SDimensions& dims, const float* angles, -        const float* TOffsets, float fOutputScale); +        const SDimensions& dims, const SParProjection* angles, +        float fOutputScale);  } diff --git a/cuda/2d/sart.cu b/cuda/2d/sart.cu index f444b63..3a3c7e6 100644 --- a/cuda/2d/sart.cu +++ b/cuda/2d/sart.cu @@ -253,10 +253,10 @@ bool SART::callFP_SART(float* D_volumeData, unsigned int volumePitch,  {  	SDimensions d = dims;  	d.iProjAngles = 1; -	if (angles) { +	if (parProjs) {  		assert(!fanProjs);  		return FP(D_volumeData, volumePitch, D_projData, projPitch, -		          d, &angles[angle], TOffsets, outputScale); +		          d, &parProjs[angle], outputScale);  	} else {  		assert(fanProjs);  		return FanFP(D_volumeData, volumePitch, D_projData, projPitch, @@ -268,10 +268,10 @@ bool SART::callBP_SART(float* D_volumeData, unsigned int volumePitch,                         float* D_projData, unsigned int projPitch,                         unsigned int angle, float outputScale)  { -	if (angles) { +	if (parProjs) {  		assert(!fanProjs);  		return BP_SART(D_volumeData, volumePitch, D_projData, projPitch, -		               angle, dims, angles, TOffsets, outputScale); +		               angle, dims, parProjs, outputScale);  	} else {  		assert(fanProjs);  		return FanBP_SART(D_volumeData, volumePitch, D_projData, projPitch, diff --git a/cuda/2d/sirt.cu b/cuda/2d/sirt.cu index 0079717..b393d7f 100644 --- a/cuda/2d/sirt.cu +++ b/cuda/2d/sirt.cu @@ -151,7 +151,7 @@ bool SIRT::precomputeWeights()  bool SIRT::doSlabCorrections()  {  	// This function compensates for effectively infinitely large slab-like -	// objects of finite thickness 1. +	// objects of finite thickness 1 in a parallel beam geometry.  	// Each ray through the object has an intersection of length d/cos(alpha).  	// The length of the ray actually intersecting the reconstruction volume is @@ -169,6 +169,10 @@ bool SIRT::doSlabCorrections()  	if (useVolumeMask || useSinogramMask)  		return false; +	// Parallel-beam only +	if (!parProjs) +		return false; +  	// multiply by line weights  	processSino<opDiv>(D_sinoData, D_lineWeight, projPitch, dims); @@ -180,7 +184,9 @@ bool SIRT::doSlabCorrections()  	float bound = cosf(1.3963f);  	float* t = (float*)D_sinoData;  	for (int i = 0; i < dims.iProjAngles; ++i) { -		float f = fabs(cosf(angles[i])); +		float angle, detsize, offset; +		getParParameters(parProjs[i], dims.iProjDets, angle, detsize, offset); +		float f = fabs(cosf(angle));  		if (f < bound)  			f = bound; @@ -188,7 +194,6 @@ bool SIRT::doSlabCorrections()  		processSino<opMul>(t, f, sinoPitch, subdims);  		t += sinoPitch;  	} -  	return true;  } @@ -298,40 +303,6 @@ float SIRT::computeDiffNorm()  } -bool doSIRT(float* D_volumeData, unsigned int volumePitch, -            float* D_sinoData, unsigned int sinoPitch, -            float* D_maskData, unsigned int maskPitch, -            const SDimensions& dims, const float* angles, -            const float* TOffsets, unsigned int iterations) -{ -	SIRT sirt; -	bool ok = true; - -	ok &= sirt.setGeometry(dims, angles); -	if (D_maskData) -		ok &= sirt.enableVolumeMask(); -	if (TOffsets) -		ok &= sirt.setTOffsets(TOffsets); - -	if (!ok) -		return false; - -	ok = sirt.init(); -	if (!ok) -		return false; - -	if (D_maskData) -		ok &= sirt.setVolumeMask(D_maskData, maskPitch); - -	ok &= sirt.setBuffers(D_volumeData, volumePitch, D_sinoData, sinoPitch); -	if (!ok) -		return false; - -	ok = sirt.iterate(iterations); - -	return ok; -} -  }  #ifdef STANDALONE  | 
