From 248639b4fee8659a4106dcc44d721149a1885018 Mon Sep 17 00:00:00 2001
From: Willem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>
Date: Thu, 5 Mar 2015 17:01:47 +0100
Subject: Add 3d geometry normalization functions

---
 cuda/3d/astra3d.cu | 150 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 cuda/3d/astra3d.h  |  16 ++++++
 2 files changed, 166 insertions(+)

(limited to 'cuda')

diff --git a/cuda/3d/astra3d.cu b/cuda/3d/astra3d.cu
index 0b9c70b..f672d6c 100644
--- a/cuda/3d/astra3d.cu
+++ b/cuda/3d/astra3d.cu
@@ -40,6 +40,12 @@ $Id$
 #include "arith3d.h"
 #include "astra3d.h"
 
+#include "astra/ParallelProjectionGeometry3D.h"
+#include "astra/ParallelVecProjectionGeometry3D.h"
+#include "astra/ConeProjectionGeometry3D.h"
+#include "astra/ConeVecProjectionGeometry3D.h"
+#include "astra/VolumeGeometry3D.h"
+
 #include <iostream>
 
 using namespace astraCUDA3d;
@@ -137,6 +143,150 @@ static SPar3DProjection* genPar3DProjections(unsigned int iProjAngles,
 
 
 
+
+// adjust pProjs to normalize volume geometry
+template<typename ProjectionT>
+static bool convertAstraGeometry_internal(const CVolumeGeometry3D* pVolGeom,
+                          unsigned int iProjectionAngleCount,
+                          ProjectionT*& pProjs,
+                          float& fOutputScale)
+{
+	assert(pVolGeom);
+	assert(pProjs);
+
+	// TODO: Relative instead of absolute
+	const float EPS = 0.00001f;
+	if (abs(pVolGeom->getPixelLengthX() - pVolGeom->getPixelLengthY()) > EPS)
+		return false;
+	if (abs(pVolGeom->getPixelLengthX() - pVolGeom->getPixelLengthZ()) > EPS)
+		return false;
+
+
+	// Translate
+	float dx = -(pVolGeom->getWindowMinX() + pVolGeom->getWindowMaxX()) / 2;
+	float dy = -(pVolGeom->getWindowMinY() + pVolGeom->getWindowMaxY()) / 2;
+	float dz = -(pVolGeom->getWindowMinZ() + pVolGeom->getWindowMaxZ()) / 2;
+
+	float factor = 1.0f / pVolGeom->getPixelLengthX();
+
+	for (int i = 0; i < iProjectionAngleCount; ++i) {
+		// CHECKME: Order of scaling and translation
+		pProjs[i].translate(dx, dy, dz);
+		pProjs[i].scale(factor);
+	}
+
+	// CHECKME: Check factor
+	fOutputScale *= pVolGeom->getPixelLengthX();
+
+	return true;
+}
+
+
+
+bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom,
+                          const CParallelProjectionGeometry3D* pProjGeom,
+                          SPar3DProjection*& pProjs, float& fOutputScale)
+{
+	assert(pVolGeom);
+	assert(pProjGeom);
+	assert(pProjGeom->getProjectionAngles());
+
+	int nth = pProjGeom->getProjectionCount();
+
+	pProjs = genPar3DProjections(nth,
+	                             pProjGeom->getDetectorColCount(),
+	                             pProjGeom->getDetectorRowCount(),
+	                             pProjGeom->getDetectorSpacingX(),
+	                             pProjGeom->getDetectorSpacingY(),
+	                             pProjGeom->getProjectionAngles());
+
+	bool ok;
+
+	fOutputScale = 1.0f;
+
+	ok = convertAstraGeometry_internal(pVolGeom, nth, pProjs, fOutputScale);
+
+	return ok;
+}
+
+bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom,
+                          const CParallelVecProjectionGeometry3D* pProjGeom,
+                          SPar3DProjection*& pProjs, float& fOutputScale)
+{
+	assert(pVolGeom);
+	assert(pProjGeom);
+	assert(pProjGeom->getProjectionVectors());
+
+	int nth = pProjGeom->getProjectionCount();
+
+	pProjs = new SPar3DProjection[nth];
+	for (int i = 0; i < nth; ++i)
+		pProjs[i] = pProjGeom->getProjectionVectors()[i];
+
+	bool ok;
+
+	fOutputScale = 1.0f;
+
+	ok = convertAstraGeometry_internal(pVolGeom, nth, pProjs, fOutputScale);
+
+	return ok;
+}
+
+bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom,
+                          const CConeProjectionGeometry3D* pProjGeom,
+                          SConeProjection*& pProjs, float& fOutputScale)
+{
+	assert(pVolGeom);
+	assert(pProjGeom);
+	assert(pProjGeom->getProjectionAngles());
+
+	int nth = pProjGeom->getProjectionCount();
+
+	pProjs = genConeProjections(nth,
+	                            pProjGeom->getDetectorColCount(),
+	                            pProjGeom->getDetectorRowCount(),
+	                            pProjGeom->getOriginSourceDistance(),
+	                            pProjGeom->getOriginDetectorDistance(),
+	                            pProjGeom->getDetectorSpacingX(),
+	                            pProjGeom->getDetectorSpacingY(),
+	                            pProjGeom->getProjectionAngles());
+
+	bool ok;
+
+	fOutputScale = 1.0f;
+
+	ok = convertAstraGeometry_internal(pVolGeom, nth, pProjs, fOutputScale);
+
+	return ok;
+}
+
+bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom,
+                          const CConeVecProjectionGeometry3D* pProjGeom,
+                          SConeProjection*& pProjs, float& fOutputScale)
+{
+	assert(pVolGeom);
+	assert(pProjGeom);
+	assert(pProjGeom->getProjectionAngles());
+
+	int nth = pProjGeom->getProjectionCount();
+
+	pProjs = new SConeProjection[nth];
+	for (int i = 0; i < nth; ++i)
+		pProjs[i] = pProjGeom->getProjectionVectors()[i];
+
+	bool ok;
+
+	fOutputScale = 1.0f;
+
+	ok = convertAstraGeometry_internal(pVolGeom, nth, pProjs, fOutputScale);
+
+	return ok;
+}
+
+
+
+
+
 class AstraSIRT3d_internal {
 public:
 	SDimensions3D dims;
diff --git a/cuda/3d/astra3d.h b/cuda/3d/astra3d.h
index f91fe26..47e252e 100644
--- a/cuda/3d/astra3d.h
+++ b/cuda/3d/astra3d.h
@@ -466,6 +466,22 @@ _AstraExport bool astraCudaFDK(float* pfVolume, const float* pfProjections,
                   bool bShortScan,
                   int iGPUIndex, int iVoxelSuperSampling);
 
+_AstraExport bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom,
+                          const CParallelProjectionGeometry3D* pProjGeom,
+                          SPar3DProjection*& pProjs, float& fOutputScale);
+
+_AstraExport bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom,
+                          const CParallelVecProjectionGeometry3D* pProjGeom,
+                          SPar3DProjection*& pProjs, float& fOutputScale);
+
+_AstraExport bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom,
+                          const CConeProjectionGeometry3D* pProjGeom,
+                          SConeProjection*& pProjs, float& fOutputScale);
+
+_AstraExport bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom,
+                          const CConeVecProjectionGeometry3D* pProjGeom,
+                          SConeProjection*& pProjs, float& fOutputScale);
+
 }
 
 
-- 
cgit v1.2.3


From 5304d08cd1ab7b8d778c367912934376eb92370f Mon Sep 17 00:00:00 2001
From: Willem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>
Date: Mon, 9 Mar 2015 15:43:56 +0100
Subject: Allow non-centered volume geometry in SIRT3D and CGLS3D

---
 cuda/3d/astra3d.cu | 284 ++++++++++++++---------------------------------------
 cuda/3d/astra3d.h  |  90 +++--------------
 2 files changed, 88 insertions(+), 286 deletions(-)

(limited to 'cuda')

diff --git a/cuda/3d/astra3d.cu b/cuda/3d/astra3d.cu
index f672d6c..426f3a0 100644
--- a/cuda/3d/astra3d.cu
+++ b/cuda/3d/astra3d.cu
@@ -182,6 +182,20 @@ static bool convertAstraGeometry_internal(const CVolumeGeometry3D* pVolGeom,
 }
 
 
+void convertAstraGeometry_dims(const CVolumeGeometry3D* pVolGeom,
+                               const CProjectionGeometry3D* pProjGeom,
+                               SDimensions3D& dims)
+{
+	dims.iVolX = pVolGeom->getGridColCount();
+	dims.iVolY = pVolGeom->getGridRowCount();
+	dims.iVolZ = pVolGeom->getGridSliceCount();
+	dims.iProjAngles = pProjGeom->getProjectionCount();
+	dims.iProjU = pProjGeom->getDetectorColCount(),
+	dims.iProjV = pProjGeom->getDetectorRowCount(),
+	dims.iRaysPerDetDim = 1;
+	dims.iRaysPerVoxelDim = 1;
+}
+
 
 bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom,
                           const CParallelProjectionGeometry3D* pProjGeom,
@@ -370,127 +384,55 @@ AstraSIRT3d::~AstraSIRT3d()
 	pData = 0;
 }
 
-bool AstraSIRT3d::setReconstructionGeometry(unsigned int iVolX,
-                                            unsigned int iVolY,
-                                            unsigned int iVolZ/*,
-                                            float fPixelSize = 1.0f*/)
+bool AstraSIRT3d::setGeometry(const CVolumeGeometry3D* pVolGeom,
+	                      const CProjectionGeometry3D* pProjGeom)
 {
 	if (pData->initialized)
 		return false;
 
-	pData->dims.iVolX = iVolX;
-	pData->dims.iVolY = iVolY;
-	pData->dims.iVolZ = iVolZ;
+	convertAstraGeometry_dims(pVolGeom, pProjGeom, pData->dims);
 
-	return (iVolX > 0 && iVolY > 0 && iVolZ > 0);
-}
-
-
-bool AstraSIRT3d::setPar3DGeometry(unsigned int iProjAngles,
-                                   unsigned int iProjU,
-                                   unsigned int iProjV,
-                                   const SPar3DProjection* projs)
-{
-	if (pData->initialized)
+	if (pData->dims.iVolX <= 0 || pData->dims.iVolX <= 0 || pData->dims.iVolX <= 0)
 		return false;
-
-	pData->dims.iProjAngles = iProjAngles;
-	pData->dims.iProjU = iProjU;
-	pData->dims.iProjV = iProjV;
-
-	if (iProjAngles == 0 || iProjU == 0 || iProjV == 0 || projs == 0)
+	if (pData->dims.iProjAngles <= 0 || pData->dims.iProjU <= 0 || pData->dims.iProjV <= 0)
 		return false;
 
-	pData->parprojs = new SPar3DProjection[iProjAngles];
-	memcpy(pData->parprojs, projs, iProjAngles * sizeof(projs[0]));
-
-	pData->projType = PROJ_PARALLEL;
-
-	return true;
-}
-
-bool AstraSIRT3d::setPar3DGeometry(unsigned int iProjAngles,
-                                   unsigned int iProjU,
-                                   unsigned int iProjV,
-                                   float fDetUSize,
-                                   float fDetVSize,
-                                   const float *pfAngles)
-{
-	if (pData->initialized)
-		return false;
-
-	if (iProjAngles == 0 || iProjU == 0 || iProjV == 0 || pfAngles == 0)
-		return false;
-
-	SPar3DProjection* p = genPar3DProjections(iProjAngles,
-                                              iProjU, iProjV,
-                                              fDetUSize, fDetVSize,
-                                              pfAngles);
-	pData->dims.iProjAngles = iProjAngles;
-	pData->dims.iProjU = iProjU;
-	pData->dims.iProjV = iProjV;
-
-	pData->parprojs = p;
-	pData->projType = PROJ_PARALLEL;
-
-	return true;
-}
-
-
+	const CConeProjectionGeometry3D* conegeom = dynamic_cast<const CConeProjectionGeometry3D*>(pProjGeom);
+	const CParallelProjectionGeometry3D* par3dgeom = dynamic_cast<const CParallelProjectionGeometry3D*>(pProjGeom);
+	const CParallelVecProjectionGeometry3D* parvec3dgeom = dynamic_cast<const CParallelVecProjectionGeometry3D*>(pProjGeom);
+	const CConeVecProjectionGeometry3D* conevec3dgeom = dynamic_cast<const CConeVecProjectionGeometry3D*>(pProjGeom);
 
-bool AstraSIRT3d::setConeGeometry(unsigned int iProjAngles,
-                                  unsigned int iProjU,
-                                  unsigned int iProjV,
-                                  const SConeProjection* projs)
-{
-	if (pData->initialized)
-		return false;
+	float outputScale;
+	bool ok;
 
-	pData->dims.iProjAngles = iProjAngles;
-	pData->dims.iProjU = iProjU;
-	pData->dims.iProjV = iProjV;
+	pData->projs = 0;
+	pData->parprojs = 0;
+
+	if (conegeom) {
+		ok = convertAstraGeometry(pVolGeom, conegeom, pData->projs, outputScale);
+		pData->projType = PROJ_PARALLEL;
+	} else if (conevec3dgeom) {
+		ok = convertAstraGeometry(pVolGeom, conevec3dgeom, pData->projs, outputScale);
+		pData->projType = PROJ_PARALLEL;
+	} else if (par3dgeom) {
+		ok = convertAstraGeometry(pVolGeom, par3dgeom, pData->parprojs, outputScale);
+		pData->projType = PROJ_CONE;
+	} else if (parvec3dgeom) {
+		ok = convertAstraGeometry(pVolGeom, parvec3dgeom, pData->parprojs, outputScale);
+		pData->projType = PROJ_CONE;
+	} else {
+		ok = false;
+	}
 
-	if (iProjAngles == 0 || iProjU == 0 || iProjV == 0 || projs == 0)
+	if (!ok)
 		return false;
 
-	pData->projs = new SConeProjection[iProjAngles];
-	memcpy(pData->projs, projs, iProjAngles * sizeof(projs[0]));
 
-	pData->projType = PROJ_CONE;
+	// TODO: Handle outputScale
 
 	return true;
 }
 
-bool AstraSIRT3d::setConeGeometry(unsigned int iProjAngles,
-                                  unsigned int iProjU,
-                                  unsigned int iProjV,
-                                  float fOriginSourceDistance,
-                                  float fOriginDetectorDistance,
-                                  float fDetUSize,
-                                  float fDetVSize,
-                                  const float *pfAngles)
-{
-	if (pData->initialized)
-		return false;
-
-	if (iProjAngles == 0 || iProjU == 0 || iProjV == 0 || pfAngles == 0)
-		return false;
-
-	SConeProjection* p = genConeProjections(iProjAngles,
-                                            iProjU, iProjV,
-                                            fOriginSourceDistance,
-                                            fOriginDetectorDistance,
-                                            fDetUSize, fDetVSize,
-                                            pfAngles);
-	pData->dims.iProjAngles = iProjAngles;
-	pData->dims.iProjU = iProjU;
-	pData->dims.iProjV = iProjV;
-
-	pData->projs = p;
-	pData->projType = PROJ_CONE;
-
-	return true;
-}
 
 bool AstraSIRT3d::enableSuperSampling(unsigned int iVoxelSuperSampling,
                                       unsigned int iDetectorSuperSampling)
@@ -837,125 +779,51 @@ AstraCGLS3d::~AstraCGLS3d()
 	pData = 0;
 }
 
-bool AstraCGLS3d::setReconstructionGeometry(unsigned int iVolX,
-                                            unsigned int iVolY,
-                                            unsigned int iVolZ/*,
-                                            float fPixelSize = 1.0f*/)
+bool AstraCGLS3d::setGeometry(const CVolumeGeometry3D* pVolGeom,
+	                      const CProjectionGeometry3D* pProjGeom)
 {
 	if (pData->initialized)
 		return false;
 
-	pData->dims.iVolX = iVolX;
-	pData->dims.iVolY = iVolY;
-	pData->dims.iVolZ = iVolZ;
+	convertAstraGeometry_dims(pVolGeom, pProjGeom, pData->dims);
 
-	return (iVolX > 0 && iVolY > 0 && iVolZ > 0);
-}
-
-
-bool AstraCGLS3d::setPar3DGeometry(unsigned int iProjAngles,
-                                   unsigned int iProjU,
-                                   unsigned int iProjV,
-                                   const SPar3DProjection* projs)
-{
-	if (pData->initialized)
+	if (pData->dims.iVolX <= 0 || pData->dims.iVolX <= 0 || pData->dims.iVolX <= 0)
 		return false;
-
-	pData->dims.iProjAngles = iProjAngles;
-	pData->dims.iProjU = iProjU;
-	pData->dims.iProjV = iProjV;
-
-	if (iProjAngles == 0 || iProjU == 0 || iProjV == 0 || projs == 0)
+	if (pData->dims.iProjAngles <= 0 || pData->dims.iProjU <= 0 || pData->dims.iProjV <= 0)
 		return false;
 
-	pData->parprojs = new SPar3DProjection[iProjAngles];
-	memcpy(pData->parprojs, projs, iProjAngles * sizeof(projs[0]));
+	const CConeProjectionGeometry3D* conegeom = dynamic_cast<const CConeProjectionGeometry3D*>(pProjGeom);
+	const CParallelProjectionGeometry3D* par3dgeom = dynamic_cast<const CParallelProjectionGeometry3D*>(pProjGeom);
+	const CParallelVecProjectionGeometry3D* parvec3dgeom = dynamic_cast<const CParallelVecProjectionGeometry3D*>(pProjGeom);
+	const CConeVecProjectionGeometry3D* conevec3dgeom = dynamic_cast<const CConeVecProjectionGeometry3D*>(pProjGeom);
 
-	pData->projType = PROJ_PARALLEL;
-
-	return true;
-}
-
-bool AstraCGLS3d::setPar3DGeometry(unsigned int iProjAngles,
-                                   unsigned int iProjU,
-                                   unsigned int iProjV,
-                                   float fDetUSize,
-                                   float fDetVSize,
-                                   const float *pfAngles)
-{
-	if (pData->initialized)
-		return false;
-
-	if (iProjAngles == 0 || iProjU == 0 || iProjV == 0 || pfAngles == 0)
-		return false;
-
-	SPar3DProjection* p = genPar3DProjections(iProjAngles,
-                                              iProjU, iProjV,
-                                              fDetUSize, fDetVSize,
-                                              pfAngles);
-	pData->dims.iProjAngles = iProjAngles;
-	pData->dims.iProjU = iProjU;
-	pData->dims.iProjV = iProjV;
-
-	pData->parprojs = p;
-	pData->projType = PROJ_PARALLEL;
-
-	return true;
-}
-
-
-
-bool AstraCGLS3d::setConeGeometry(unsigned int iProjAngles,
-                                  unsigned int iProjU,
-                                  unsigned int iProjV,
-                                  const SConeProjection* projs)
-{
-	if (pData->initialized)
-		return false;
-
-	pData->dims.iProjAngles = iProjAngles;
-	pData->dims.iProjU = iProjU;
-	pData->dims.iProjV = iProjV;
-
-	if (iProjAngles == 0 || iProjU == 0 || iProjV == 0 || projs == 0)
-		return false;
-
-	pData->projs = new SConeProjection[iProjAngles];
-	memcpy(pData->projs, projs, iProjAngles * sizeof(projs[0]));
-
-	pData->projType = PROJ_CONE;
-
-	return true;
-}
+	float outputScale;
+	bool ok;
 
-bool AstraCGLS3d::setConeGeometry(unsigned int iProjAngles,
-                                  unsigned int iProjU,
-                                  unsigned int iProjV,
-                                  float fOriginSourceDistance,
-                                  float fOriginDetectorDistance,
-                                  float fDetUSize,
-                                  float fDetVSize,
-                                  const float *pfAngles)
-{
-	if (pData->initialized)
-		return false;
+	pData->projs = 0;
+	pData->parprojs = 0;
+
+	if (conegeom) {
+		ok = convertAstraGeometry(pVolGeom, conegeom, pData->projs, outputScale);
+		pData->projType = PROJ_PARALLEL;
+	} else if (conevec3dgeom) {
+		ok = convertAstraGeometry(pVolGeom, conevec3dgeom, pData->projs, outputScale);
+		pData->projType = PROJ_PARALLEL;
+	} else if (par3dgeom) {
+		ok = convertAstraGeometry(pVolGeom, par3dgeom, pData->parprojs, outputScale);
+		pData->projType = PROJ_CONE;
+	} else if (parvec3dgeom) {
+		ok = convertAstraGeometry(pVolGeom, parvec3dgeom, pData->parprojs, outputScale);
+		pData->projType = PROJ_CONE;
+	} else {
+		ok = false;
+	}
 
-	if (iProjAngles == 0 || iProjU == 0 || iProjV == 0 || pfAngles == 0)
+	if (!ok)
 		return false;
 
-	SConeProjection* p = genConeProjections(iProjAngles,
-                                            iProjU, iProjV,
-                                            fOriginSourceDistance,
-                                            fOriginDetectorDistance,
-                                            fDetUSize, fDetVSize,
-                                            pfAngles);
-
-	pData->dims.iProjAngles = iProjAngles;
-	pData->dims.iProjU = iProjU;
-	pData->dims.iProjV = iProjV;
 
-	pData->projs = p;
-	pData->projType = PROJ_CONE;
+	// TODO: Handle outputScale
 
 	return true;
 }
diff --git a/cuda/3d/astra3d.h b/cuda/3d/astra3d.h
index 47e252e..cab5479 100644
--- a/cuda/3d/astra3d.h
+++ b/cuda/3d/astra3d.h
@@ -42,7 +42,12 @@ enum Cuda3DProjectionKernel {
 	ker3d_sum_square_weights
 };
 
-
+class CProjectionGeometry3D;
+class CParallelProjectionGeometry3D;
+class CParallelVecProjectionGeometry3D;
+class CConeProjectionGeometry3D;
+class CConeVecProjectionGeometry3D;
+class CVolumeGeometry3D;
 class AstraSIRT3d_internal;
 
 
@@ -52,37 +57,9 @@ public:
 	AstraSIRT3d();
 	~AstraSIRT3d();
 
-	// Set the number of pixels in the reconstruction rectangle,
-	// and the length of the edge of a pixel.
-	// Volume pixels are assumed to be square.
-	// This must be called before setting the projection geometry.
-	bool setReconstructionGeometry(unsigned int iVolX,
-	                               unsigned int iVolY,
-	                               unsigned int iVolZ/*,
-	                               float fPixelSize = 1.0f*/);
-
-	bool setConeGeometry(unsigned int iProjAngles,
-	                     unsigned int iProjU,
-	                     unsigned int iProjV,
-	                     const SConeProjection* projs);
-	bool setConeGeometry(unsigned int iProjAngles,
-	                     unsigned int iProjU,
-	                     unsigned int iProjV,
-	                     float fOriginSourceDistance,
-	                     float fOriginDetectorDistance,
-	                     float fSourceZ,
-	                     float fDetSize,
-	                     const float *pfAngles);
-	bool setPar3DGeometry(unsigned int iProjAngles,
-	                      unsigned int iProjU,
-	                      unsigned int iProjV,
-	                      const SPar3DProjection* projs);
-	bool setPar3DGeometry(unsigned int iProjAngles,
-	                      unsigned int iProjU,
-	                      unsigned int iProjV,
-	                      float fSourceZ,
-	                      float fDetSize,
-	                      const float *pfAngles);
+	// Set the volume and projection geometry
+	bool setGeometry(const CVolumeGeometry3D* pVolGeom,
+	                 const CProjectionGeometry3D* pProjGeom);
 
 	// Enable supersampling.
 	//
@@ -197,37 +174,9 @@ public:
 	AstraCGLS3d();
 	~AstraCGLS3d();
 
-	// Set the number of pixels in the reconstruction rectangle,
-	// and the length of the edge of a pixel.
-	// Volume pixels are assumed to be square.
-	// This must be called before setting the projection geometry.
-	bool setReconstructionGeometry(unsigned int iVolX,
-	                               unsigned int iVolY,
-	                               unsigned int iVolZ/*,
-	                               float fPixelSize = 1.0f*/);
-
-	bool setConeGeometry(unsigned int iProjAngles,
-	                     unsigned int iProjU,
-	                     unsigned int iProjV,
-	                     const SConeProjection* projs);
-	bool setConeGeometry(unsigned int iProjAngles,
-	                     unsigned int iProjU,
-	                     unsigned int iProjV,
-	                     float fOriginSourceDistance,
-	                     float fOriginDetectorDistance,
-	                     float fSourceZ,
-	                     float fDetSize,
-	                     const float *pfAngles);
-	bool setPar3DGeometry(unsigned int iProjAngles,
-	                      unsigned int iProjU,
-	                      unsigned int iProjV,
-	                      const SPar3DProjection* projs);
-	bool setPar3DGeometry(unsigned int iProjAngles,
-	                      unsigned int iProjU,
-	                      unsigned int iProjV,
-	                      float fSourceZ,
-	                      float fDetSize,
-	                      const float *pfAngles);
+	// Set the volume and projection geometry
+	bool setGeometry(const CVolumeGeometry3D* pVolGeom,
+	                 const CProjectionGeometry3D* pProjGeom);
 
 	// Enable supersampling.
 	//
@@ -466,21 +415,6 @@ _AstraExport bool astraCudaFDK(float* pfVolume, const float* pfProjections,
                   bool bShortScan,
                   int iGPUIndex, int iVoxelSuperSampling);
 
-_AstraExport bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom,
-                          const CParallelProjectionGeometry3D* pProjGeom,
-                          SPar3DProjection*& pProjs, float& fOutputScale);
-
-_AstraExport bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom,
-                          const CParallelVecProjectionGeometry3D* pProjGeom,
-                          SPar3DProjection*& pProjs, float& fOutputScale);
-
-_AstraExport bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom,
-                          const CConeProjectionGeometry3D* pProjGeom,
-                          SConeProjection*& pProjs, float& fOutputScale);
-
-_AstraExport bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom,
-                          const CConeVecProjectionGeometry3D* pProjGeom,
-                          SConeProjection*& pProjs, float& fOutputScale);
 
 }
 
-- 
cgit v1.2.3


From 9eb68c39c62a8e674e3dbe50252528226c6593ff Mon Sep 17 00:00:00 2001
From: Willem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>
Date: Wed, 11 Mar 2015 12:06:11 +0100
Subject: Adjust interface slightly

---
 cuda/3d/astra3d.cu | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

(limited to 'cuda')

diff --git a/cuda/3d/astra3d.cu b/cuda/3d/astra3d.cu
index 426f3a0..5b1f363 100644
--- a/cuda/3d/astra3d.cu
+++ b/cuda/3d/astra3d.cu
@@ -182,7 +182,7 @@ static bool convertAstraGeometry_internal(const CVolumeGeometry3D* pVolGeom,
 }
 
 
-void convertAstraGeometry_dims(const CVolumeGeometry3D* pVolGeom,
+bool convertAstraGeometry_dims(const CVolumeGeometry3D* pVolGeom,
                                const CProjectionGeometry3D* pProjGeom,
                                SDimensions3D& dims)
 {
@@ -194,6 +194,13 @@ void convertAstraGeometry_dims(const CVolumeGeometry3D* pVolGeom,
 	dims.iProjV = pProjGeom->getDetectorRowCount(),
 	dims.iRaysPerDetDim = 1;
 	dims.iRaysPerVoxelDim = 1;
+
+	if (dims.iVolX <= 0 || dims.iVolX <= 0 || dims.iVolX <= 0)
+		return false;
+	if (dims.iProjAngles <= 0 || dims.iProjU <= 0 || dims.iProjV <= 0)
+		return false;
+
+	return true;
 }
 
 
@@ -390,11 +397,9 @@ bool AstraSIRT3d::setGeometry(const CVolumeGeometry3D* pVolGeom,
 	if (pData->initialized)
 		return false;
 
-	convertAstraGeometry_dims(pVolGeom, pProjGeom, pData->dims);
+	bool ok = convertAstraGeometry_dims(pVolGeom, pProjGeom, pData->dims);
 
-	if (pData->dims.iVolX <= 0 || pData->dims.iVolX <= 0 || pData->dims.iVolX <= 0)
-		return false;
-	if (pData->dims.iProjAngles <= 0 || pData->dims.iProjU <= 0 || pData->dims.iProjV <= 0)
+	if (!ok)
 		return false;
 
 	const CConeProjectionGeometry3D* conegeom = dynamic_cast<const CConeProjectionGeometry3D*>(pProjGeom);
@@ -403,7 +408,6 @@ bool AstraSIRT3d::setGeometry(const CVolumeGeometry3D* pVolGeom,
 	const CConeVecProjectionGeometry3D* conevec3dgeom = dynamic_cast<const CConeVecProjectionGeometry3D*>(pProjGeom);
 
 	float outputScale;
-	bool ok;
 
 	pData->projs = 0;
 	pData->parprojs = 0;
@@ -785,11 +789,9 @@ bool AstraCGLS3d::setGeometry(const CVolumeGeometry3D* pVolGeom,
 	if (pData->initialized)
 		return false;
 
-	convertAstraGeometry_dims(pVolGeom, pProjGeom, pData->dims);
+	bool ok = convertAstraGeometry_dims(pVolGeom, pProjGeom, pData->dims);
 
-	if (pData->dims.iVolX <= 0 || pData->dims.iVolX <= 0 || pData->dims.iVolX <= 0)
-		return false;
-	if (pData->dims.iProjAngles <= 0 || pData->dims.iProjU <= 0 || pData->dims.iProjV <= 0)
+	if (!ok)
 		return false;
 
 	const CConeProjectionGeometry3D* conegeom = dynamic_cast<const CConeProjectionGeometry3D*>(pProjGeom);
@@ -798,7 +800,6 @@ bool AstraCGLS3d::setGeometry(const CVolumeGeometry3D* pVolGeom,
 	const CConeVecProjectionGeometry3D* conevec3dgeom = dynamic_cast<const CConeVecProjectionGeometry3D*>(pProjGeom);
 
 	float outputScale;
-	bool ok;
 
 	pData->projs = 0;
 	pData->parprojs = 0;
-- 
cgit v1.2.3


From 140f64028a6c06895ba7dad8997e14b7a05aadab Mon Sep 17 00:00:00 2001
From: Willem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>
Date: Wed, 11 Mar 2015 12:07:48 +0100
Subject: Let astraCudaFDK use utility functions

---
 cuda/3d/astra3d.cu | 36 ++++++++++++++----------------------
 cuda/3d/astra3d.h  | 13 ++-----------
 2 files changed, 16 insertions(+), 33 deletions(-)

(limited to 'cuda')

diff --git a/cuda/3d/astra3d.cu b/cuda/3d/astra3d.cu
index 5b1f363..0e94fb8 100644
--- a/cuda/3d/astra3d.cu
+++ b/cuda/3d/astra3d.cu
@@ -1679,33 +1679,19 @@ bool astraCudaPar3DBP_SIRTWeighted(float* pfVolume,
 
 
 bool astraCudaFDK(float* pfVolume, const float* pfProjections,
-                  unsigned int iVolX,
-                  unsigned int iVolY,
-                  unsigned int iVolZ,
-                  unsigned int iProjAngles,
-                  unsigned int iProjU,
-                  unsigned int iProjV,
-                  float fOriginSourceDistance,
-                  float fOriginDetectorDistance,
-                  float fDetUSize,
-                  float fDetVSize,
-                  const float *pfAngles,
+                  const CVolumeGeometry3D* pVolGeom,
+                  const CConeProjectionGeometry3D* pProjGeom,
                   bool bShortScan,
                   int iGPUIndex, int iVoxelSuperSampling)
 {
 	SDimensions3D dims;
 
-	dims.iVolX = iVolX;
-	dims.iVolY = iVolY;
-	dims.iVolZ = iVolZ;
-	if (iVolX == 0 || iVolY == 0 || iVolZ == 0)
-		return false;
+	bool ok = convertAstraGeometry_dims(pVolGeom, pProjGeom, dims);
 
-	dims.iProjAngles = iProjAngles;
-	dims.iProjU = iProjU;
-	dims.iProjV = iProjV;
+	// TODO: Check that pVolGeom is normalized, since we don't support
+	// other volume geometries yet
 
-	if (iProjAngles == 0 || iProjU == 0 || iProjV == 0 || pfAngles == 0)
+	if (!ok)
 		return false;
 
 	dims.iRaysPerVoxelDim = iVoxelSuperSampling;
@@ -1722,9 +1708,8 @@ bool astraCudaFDK(float* pfVolume, const float* pfProjections,
 			return false;
 	}
 
-
 	cudaPitchedPtr D_volumeData = allocateVolumeData(dims);
-	bool ok = D_volumeData.ptr;
+	ok = D_volumeData.ptr;
 	if (!ok)
 		return false;
 
@@ -1745,6 +1730,13 @@ bool astraCudaFDK(float* pfVolume, const float* pfProjections,
 		return false;
 	}
 
+	float fOriginSourceDistance = pProjGeom->getOriginSourceDistance();
+	float fOriginDetectorDistance = pProjGeom->getOriginDetectorDistance();
+	float fDetUSize = pProjGeom->getDetectorSpacingX();
+	float fDetVSize = pProjGeom->getDetectorSpacingY();
+	const float *pfAngles = pProjGeom->getProjectionAngles();
+
+
 	// TODO: Offer interface for SrcZ, DetZ
 	ok &= FDK(D_volumeData, D_projData, fOriginSourceDistance,
 	          fOriginDetectorDistance, 0, 0, fDetUSize, fDetVSize,
diff --git a/cuda/3d/astra3d.h b/cuda/3d/astra3d.h
index cab5479..6bac8b2 100644
--- a/cuda/3d/astra3d.h
+++ b/cuda/3d/astra3d.h
@@ -401,17 +401,8 @@ _AstraExport bool astraCudaPar3DBP_SIRTWeighted(float* pfVolume, const float* pf
                       int iGPUIndex, int iVoxelSuperSampling);
 
 _AstraExport bool astraCudaFDK(float* pfVolume, const float* pfProjections,
-                  unsigned int iVolX,
-                  unsigned int iVolY,
-                  unsigned int iVolZ,
-                  unsigned int iProjAngles,
-                  unsigned int iProjU,
-                  unsigned int iProjV,
-                  float fOriginSourceDistance,
-                  float fOriginDetectorDistance,
-                  float fDetUSize,
-                  float fDetVSize,
-                  const float *pfAngles,
+                  const CVolumeGeometry3D* pVolGeom,
+                  const CConeProjectionGeometry3D* pProjGeom,
                   bool bShortScan,
                   int iGPUIndex, int iVoxelSuperSampling);
 
-- 
cgit v1.2.3


From e188bcdaaffee075adf5fa4371453d91bcb71225 Mon Sep 17 00:00:00 2001
From: Willem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>
Date: Wed, 11 Mar 2015 14:43:49 +0100
Subject: Add another utility function

---
 cuda/3d/astra3d.cu | 99 +++++++++++++++++++++++++++++-------------------------
 1 file changed, 53 insertions(+), 46 deletions(-)

(limited to 'cuda')

diff --git a/cuda/3d/astra3d.cu b/cuda/3d/astra3d.cu
index 0e94fb8..eff928d 100644
--- a/cuda/3d/astra3d.cu
+++ b/cuda/3d/astra3d.cu
@@ -305,6 +305,37 @@ bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom,
 }
 
 
+bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom,
+                          const CConeVecProjectionGeometry3D* pProjGeom,
+                          SPar3DProjection*& pParProjs,
+                          SConeProjection*& pConeProjs,
+                          float& fOutputScale)
+{
+	const CConeProjectionGeometry3D* conegeom = dynamic_cast<const CConeProjectionGeometry3D*>(pProjGeom);
+	const CParallelProjectionGeometry3D* par3dgeom = dynamic_cast<const CParallelProjectionGeometry3D*>(pProjGeom);
+	const CParallelVecProjectionGeometry3D* parvec3dgeom = dynamic_cast<const CParallelVecProjectionGeometry3D*>(pProjGeom);
+	const CConeVecProjectionGeometry3D* conevec3dgeom = dynamic_cast<const CConeVecProjectionGeometry3D*>(pProjGeom);
+
+	pConeProjs = 0;
+	pParProjs = 0;
+
+	bool ok;
+
+	if (conegeom) {
+		ok = convertAstraGeometry(pVolGeom, conegeom, pConeProjs, outputScale);
+	} else if (conevec3dgeom) {
+		ok = convertAstraGeometry(pVolGeom, conevec3dgeom, pConeProjs, outputScale);
+	} else if (par3dgeom) {
+		ok = convertAstraGeometry(pVolGeom, par3dgeom, pParProjs, outputScale);
+	} else if (parvec3dgeom) {
+		ok = convertAstraGeometry(pVolGeom, parvec3dgeom, pParProjs, outputScale);
+	} else {
+		ok = false;
+	}
+
+	return ok;
+}
+
 
 
 
@@ -402,35 +433,23 @@ bool AstraSIRT3d::setGeometry(const CVolumeGeometry3D* pVolGeom,
 	if (!ok)
 		return false;
 
-	const CConeProjectionGeometry3D* conegeom = dynamic_cast<const CConeProjectionGeometry3D*>(pProjGeom);
-	const CParallelProjectionGeometry3D* par3dgeom = dynamic_cast<const CParallelProjectionGeometry3D*>(pProjGeom);
-	const CParallelVecProjectionGeometry3D* parvec3dgeom = dynamic_cast<const CParallelVecProjectionGeometry3D*>(pProjGeom);
-	const CConeVecProjectionGeometry3D* conevec3dgeom = dynamic_cast<const CConeVecProjectionGeometry3D*>(pProjGeom);
-
-	float outputScale;
-
 	pData->projs = 0;
 	pData->parprojs = 0;
+	float outputScale;
 
-	if (conegeom) {
-		ok = convertAstraGeometry(pVolGeom, conegeom, pData->projs, outputScale);
-		pData->projType = PROJ_PARALLEL;
-	} else if (conevec3dgeom) {
-		ok = convertAstraGeometry(pVolGeom, conevec3dgeom, pData->projs, outputScale);
-		pData->projType = PROJ_PARALLEL;
-	} else if (par3dgeom) {
-		ok = convertAstraGeometry(pVolGeom, par3dgeom, pData->parprojs, outputScale);
-		pData->projType = PROJ_CONE;
-	} else if (parvec3dgeom) {
-		ok = convertAstraGeometry(pVolGeom, parvec3dgeom, pData->parprojs, outputScale);
-		pData->projType = PROJ_CONE;
-	} else {
-		ok = false;
-	}
-
+	ok = convertAstraGeometry(pVolGeom, pProjGeom,
+	                          pData->parprojs, pData->projs,
+	                          outputScale);
 	if (!ok)
 		return false;
 
+	if (pData->projs) {
+		assert(pData->parprojs == 0);
+		pData->projType = PROJ_CONE;
+	} else {
+		assert(pData->parprojs != 0);
+		pData->projType = PROJ_PARALLEL;
+	}
 
 	// TODO: Handle outputScale
 
@@ -794,35 +813,23 @@ bool AstraCGLS3d::setGeometry(const CVolumeGeometry3D* pVolGeom,
 	if (!ok)
 		return false;
 
-	const CConeProjectionGeometry3D* conegeom = dynamic_cast<const CConeProjectionGeometry3D*>(pProjGeom);
-	const CParallelProjectionGeometry3D* par3dgeom = dynamic_cast<const CParallelProjectionGeometry3D*>(pProjGeom);
-	const CParallelVecProjectionGeometry3D* parvec3dgeom = dynamic_cast<const CParallelVecProjectionGeometry3D*>(pProjGeom);
-	const CConeVecProjectionGeometry3D* conevec3dgeom = dynamic_cast<const CConeVecProjectionGeometry3D*>(pProjGeom);
-
-	float outputScale;
-
 	pData->projs = 0;
 	pData->parprojs = 0;
+	float outputScale;
 
-	if (conegeom) {
-		ok = convertAstraGeometry(pVolGeom, conegeom, pData->projs, outputScale);
-		pData->projType = PROJ_PARALLEL;
-	} else if (conevec3dgeom) {
-		ok = convertAstraGeometry(pVolGeom, conevec3dgeom, pData->projs, outputScale);
-		pData->projType = PROJ_PARALLEL;
-	} else if (par3dgeom) {
-		ok = convertAstraGeometry(pVolGeom, par3dgeom, pData->parprojs, outputScale);
-		pData->projType = PROJ_CONE;
-	} else if (parvec3dgeom) {
-		ok = convertAstraGeometry(pVolGeom, parvec3dgeom, pData->parprojs, outputScale);
-		pData->projType = PROJ_CONE;
-	} else {
-		ok = false;
-	}
-
+	ok = convertAstraGeometry(pVolGeom, pProjGeom,
+	                          pData->parprojs, pData->projs,
+	                          outputScale);
 	if (!ok)
 		return false;
 
+	if (pData->projs) {
+		assert(pData->parprojs == 0);
+		pData->projType = PROJ_CONE;
+	} else {
+		assert(pData->parprojs != 0);
+		pData->projType = PROJ_PARALLEL;
+	}
 
 	// TODO: Handle outputScale
 
-- 
cgit v1.2.3


From 18d12242207d1113c3015b451f522531168e626a Mon Sep 17 00:00:00 2001
From: Willem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>
Date: Wed, 11 Mar 2015 17:27:44 +0100
Subject: Add flexible volgeom3d support to astraCudaBP_SIRTWeighted

---
 cuda/3d/astra3d.cu | 95 ++++++++++++++++++++----------------------------------
 cuda/3d/astra3d.h  | 23 ++-----------
 2 files changed, 38 insertions(+), 80 deletions(-)

(limited to 'cuda')

diff --git a/cuda/3d/astra3d.cu b/cuda/3d/astra3d.cu
index eff928d..2f7ea99 100644
--- a/cuda/3d/astra3d.cu
+++ b/cuda/3d/astra3d.cu
@@ -306,7 +306,7 @@ bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom,
 
 
 bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom,
-                          const CConeVecProjectionGeometry3D* pProjGeom,
+                          const CProjectionGeometry3D* pProjGeom,
                           SPar3DProjection*& pParProjs,
                           SConeProjection*& pConeProjs,
                           float& fOutputScale)
@@ -322,13 +322,13 @@ bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom,
 	bool ok;
 
 	if (conegeom) {
-		ok = convertAstraGeometry(pVolGeom, conegeom, pConeProjs, outputScale);
+		ok = convertAstraGeometry(pVolGeom, conegeom, pConeProjs, fOutputScale);
 	} else if (conevec3dgeom) {
-		ok = convertAstraGeometry(pVolGeom, conevec3dgeom, pConeProjs, outputScale);
+		ok = convertAstraGeometry(pVolGeom, conevec3dgeom, pConeProjs, fOutputScale);
 	} else if (par3dgeom) {
-		ok = convertAstraGeometry(pVolGeom, par3dgeom, pParProjs, outputScale);
+		ok = convertAstraGeometry(pVolGeom, par3dgeom, pParProjs, fOutputScale);
 	} else if (parvec3dgeom) {
-		ok = convertAstraGeometry(pVolGeom, parvec3dgeom, pParProjs, outputScale);
+		ok = convertAstraGeometry(pVolGeom, parvec3dgeom, pParProjs, fOutputScale);
 	} else {
 		ok = false;
 	}
@@ -1471,40 +1471,6 @@ bool astraCudaPar3DBP(float* pfVolume, const float* pfProjections,
 	return ok;
 }
 
-// This computes the column weights, divides by them, and adds the
-// result to the current volume. This is both more expensive and more
-// GPU memory intensive than the regular BP, but allows saving system RAM.
-bool astraCudaPar3DBP_SIRTWeighted(float* pfVolume, const float* pfProjections,
-                      unsigned int iVolX,
-                      unsigned int iVolY,
-                      unsigned int iVolZ,
-                      unsigned int iProjAngles,
-                      unsigned int iProjU,
-                      unsigned int iProjV,
-                      float fDetUSize,
-                      float fDetVSize,
-                      const float *pfAngles,
-                      int iGPUIndex, int iVoxelSuperSampling)
-{
-	if (iVolX == 0 || iVolY == 0 || iVolZ == 0)
-		return false;
-	if (iProjAngles == 0 || iProjU == 0 || iProjV == 0 || pfAngles == 0)
-		return false;
-
-	SPar3DProjection* p = genPar3DProjections(iProjAngles,
-                                             iProjU, iProjV,
-                                             fDetUSize, fDetVSize,
-                                             pfAngles);
-
-	bool ok;
-	ok = astraCudaPar3DBP_SIRTWeighted(pfVolume, pfProjections, iVolX, iVolY, iVolZ,
-	                      iProjAngles, iProjU, iProjV, p, iGPUIndex, iVoxelSuperSampling);
-
-	delete[] p;
-
-	return ok;
-}
-
 
 bool astraCudaPar3DBP(float* pfVolume, const float* pfProjections,
                       unsigned int iVolX,
@@ -1582,33 +1548,30 @@ bool astraCudaPar3DBP(float* pfVolume, const float* pfProjections,
 // This computes the column weights, divides by them, and adds the
 // result to the current volume. This is both more expensive and more
 // GPU memory intensive than the regular BP, but allows saving system RAM.
-bool astraCudaPar3DBP_SIRTWeighted(float* pfVolume,
+bool astraCudaBP_SIRTWeighted(float* pfVolume,
                       const float* pfProjections,
-                      unsigned int iVolX,
-                      unsigned int iVolY,
-                      unsigned int iVolZ,
-                      unsigned int iProjAngles,
-                      unsigned int iProjU,
-                      unsigned int iProjV,
-                      const SPar3DProjection *pfAngles,
+                      const CVolumeGeometry3D* pVolGeom,
+                      const CProjectionGeometry3D* pProjGeom,
                       int iGPUIndex, int iVoxelSuperSampling)
 {
 	SDimensions3D dims;
 
-	dims.iVolX = iVolX;
-	dims.iVolY = iVolY;
-	dims.iVolZ = iVolZ;
-	if (iVolX == 0 || iVolY == 0 || iVolZ == 0)
+	bool ok = convertAstraGeometry_dims(pVolGeom, pProjGeom, dims);
+	if (!ok)
 		return false;
 
-	dims.iProjAngles = iProjAngles;
-	dims.iProjU = iProjU;
-	dims.iProjV = iProjV;
+	dims.iRaysPerVoxelDim = iVoxelSuperSampling;
 
-	if (iProjAngles == 0 || iProjU == 0 || iProjV == 0 || pfAngles == 0)
-		return false;
+	SPar3DProjection* pParProjs;
+	SConeProjection* pConeProjs;
 
-	dims.iRaysPerVoxelDim = iVoxelSuperSampling;
+	float outputScale;
+
+	ok = convertAstraGeometry(pVolGeom, pProjGeom,
+	                          pParProjs, pConeProjs,
+	                          outputScale);
+
+	// TODO: OutputScale
 
 	if (iGPUIndex != -1) {
 		cudaSetDevice(iGPUIndex);
@@ -1621,7 +1584,7 @@ bool astraCudaPar3DBP_SIRTWeighted(float* pfVolume,
 
 
 	cudaPitchedPtr D_pixelWeight = allocateVolumeData(dims);
-	bool ok = D_pixelWeight.ptr;
+	ok = D_pixelWeight.ptr;
 	if (!ok)
 		return false;
 
@@ -1643,7 +1606,12 @@ bool astraCudaPar3DBP_SIRTWeighted(float* pfVolume,
 	// Compute weights
 	ok &= zeroVolumeData(D_pixelWeight, dims);
 	processSino3D<opSet>(D_projData, 1.0f, dims);
-	ok &= Par3DBP(D_pixelWeight, D_projData, dims, pfAngles);
+
+	if (pParProjs)
+		ok &= Par3DBP(D_pixelWeight, D_projData, dims, pParProjs);
+	else
+		ok &= ConeBP(D_pixelWeight, D_projData, dims, pConeProjs);
+
 	processVol3D<opInvert>(D_pixelWeight, dims);
 	if (!ok) {
 		cudaFree(D_pixelWeight.ptr);
@@ -1656,7 +1624,11 @@ bool astraCudaPar3DBP_SIRTWeighted(float* pfVolume,
 	                              dims, dims.iProjU);
 	ok &= zeroVolumeData(D_volumeData, dims);
 	// Do BP into D_volumeData
-	ok &= Par3DBP(D_volumeData, D_projData, dims, pfAngles);
+	if (pParProjs)
+		ok &= Par3DBP(D_volumeData, D_projData, dims, pParProjs);
+	else
+		ok &= ConeBP(D_volumeData, D_projData, dims, pConeProjs);
+
 	// Multiply with weights
 	processVol3D<opMul>(D_volumeData, D_pixelWeight, dims);
 
@@ -1679,6 +1651,9 @@ bool astraCudaPar3DBP_SIRTWeighted(float* pfVolume,
 	cudaFree(D_volumeData.ptr);
 	cudaFree(D_projData.ptr);
 
+	delete[] pParProjs;
+	delete[] pConeProjs;
+
 	return ok;
 
 }
diff --git a/cuda/3d/astra3d.h b/cuda/3d/astra3d.h
index 6bac8b2..b2e4e08 100644
--- a/cuda/3d/astra3d.h
+++ b/cuda/3d/astra3d.h
@@ -378,26 +378,9 @@ _AstraExport bool astraCudaPar3DBP(float* pfVolume, const float* pfProjections,
                       const SPar3DProjection *pfAngles,
                       int iGPUIndex, int iVoxelSuperSampling);
 
-_AstraExport bool astraCudaPar3DBP_SIRTWeighted(float* pfVolume, const float* pfProjections,
-                      unsigned int iVolX,
-                      unsigned int iVolY,
-                      unsigned int iVolZ,
-                      unsigned int iProjAngles,
-                      unsigned int iProjU,
-                      unsigned int iProjV,
-                      float fDetUSize,
-                      float fDetVSize,
-                      const float *pfAngles,
-                      int iGPUIndex, int iVoxelSuperSampling);
-
-_AstraExport bool astraCudaPar3DBP_SIRTWeighted(float* pfVolume, const float* pfProjections,
-                      unsigned int iVolX,
-                      unsigned int iVolY,
-                      unsigned int iVolZ,
-                      unsigned int iProjAngles,
-                      unsigned int iProjU,
-                      unsigned int iProjV,
-                      const SPar3DProjection *pfAngles,
+_AstraExport bool astraCudaBP_SIRTWeighted(float* pfVolume, const float* pfProjections,
+                      const CVolumeGeometry3D* pVolGeom,
+                      const CProjectionGeometry3D* pProjGeom,
                       int iGPUIndex, int iVoxelSuperSampling);
 
 _AstraExport bool astraCudaFDK(float* pfVolume, const float* pfProjections,
-- 
cgit v1.2.3


From 6909836555afe155ffc3897ef2189ed0562bb045 Mon Sep 17 00:00:00 2001
From: Willem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>
Date: Wed, 11 Mar 2015 18:44:53 +0100
Subject: Add flexible volgeom3d support to astraCudaBP

---
 cuda/3d/astra3d.cu | 176 ++++++-----------------------------------------------
 cuda/3d/astra3d.h  |  47 +-------------
 2 files changed, 21 insertions(+), 202 deletions(-)

(limited to 'cuda')

diff --git a/cuda/3d/astra3d.cu b/cuda/3d/astra3d.cu
index 2f7ea99..97bebf4 100644
--- a/cuda/3d/astra3d.cu
+++ b/cuda/3d/astra3d.cu
@@ -1331,173 +1331,30 @@ bool astraCudaPar3DFP(const float* pfVolume, float* pfProjections,
 
 }
 
-bool astraCudaConeBP(float* pfVolume, const float* pfProjections,
-                     unsigned int iVolX,
-                     unsigned int iVolY,
-                     unsigned int iVolZ,
-                     unsigned int iProjAngles,
-                     unsigned int iProjU,
-                     unsigned int iProjV,
-                     float fOriginSourceDistance,
-                     float fOriginDetectorDistance,
-                     float fDetUSize,
-                     float fDetVSize,
-                     const float *pfAngles,
-                     int iGPUIndex, int iVoxelSuperSampling)
-{
-	if (iVolX == 0 || iVolY == 0 || iVolZ == 0)
-		return false;
-	if (iProjAngles == 0 || iProjU == 0 || iProjV == 0 || pfAngles == 0)
-		return false;
-
-	SConeProjection* p = genConeProjections(iProjAngles,
-                                            iProjU, iProjV,
-                                            fOriginSourceDistance,
-                                            fOriginDetectorDistance,
-                                            fDetUSize, fDetVSize,
-                                            pfAngles);
-
-	bool ok;
-	ok = astraCudaConeBP(pfVolume, pfProjections, iVolX, iVolY, iVolZ,
-	                     iProjAngles, iProjU, iProjV, p, iGPUIndex, iVoxelSuperSampling);
-
-	delete[] p;
-
-	return ok;
-}
 
-bool astraCudaConeBP(float* pfVolume, const float* pfProjections,
-                     unsigned int iVolX,
-                     unsigned int iVolY,
-                     unsigned int iVolZ,
-                     unsigned int iProjAngles,
-                     unsigned int iProjU,
-                     unsigned int iProjV,
-                     const SConeProjection *pfAngles,
-                     int iGPUIndex, int iVoxelSuperSampling)
+bool astraCudaBP(float* pfVolume, const float* pfProjections,
+                 const CVolumeGeometry3D* pVolGeom,
+                 const CProjectionGeometry3D* pProjGeom,
+                 int iGPUIndex, int iVoxelSuperSampling)
 {
 	SDimensions3D dims;
 
-	dims.iVolX = iVolX;
-	dims.iVolY = iVolY;
-	dims.iVolZ = iVolZ;
-	if (iVolX == 0 || iVolY == 0 || iVolZ == 0)
-		return false;
-
-	dims.iProjAngles = iProjAngles;
-	dims.iProjU = iProjU;
-	dims.iProjV = iProjV;
-
-	if (iProjAngles == 0 || iProjU == 0 || iProjV == 0 || pfAngles == 0)
-		return false;
-
-	dims.iRaysPerVoxelDim = iVoxelSuperSampling;
-
-	if (iProjAngles == 0 || iProjU == 0 || iProjV == 0 || pfAngles == 0)
-		return false;
-
-	if (iGPUIndex != -1) {
-		cudaSetDevice(iGPUIndex);
-		cudaError_t err = cudaGetLastError();
-
-		// Ignore errors caused by calling cudaSetDevice multiple times
-		if (err != cudaSuccess && err != cudaErrorSetOnActiveProcess)
-			return false;
-	}
-
-	cudaPitchedPtr D_volumeData = allocateVolumeData(dims);
-	bool ok = D_volumeData.ptr;
+	bool ok = convertAstraGeometry_dims(pVolGeom, pProjGeom, dims);
 	if (!ok)
 		return false;
 
-	cudaPitchedPtr D_projData = allocateProjectionData(dims);
-	ok = D_projData.ptr;
-	if (!ok) {
-		cudaFree(D_volumeData.ptr);
-		return false;
-	}
-
-	ok &= copyProjectionsToDevice(pfProjections, D_projData,
-	                              dims, dims.iProjU);
-
-	ok &= zeroVolumeData(D_volumeData, dims);
-
-	if (!ok) {
-		cudaFree(D_volumeData.ptr);
-		cudaFree(D_projData.ptr);
-		return false;
-	}
-
-	ok &= ConeBP(D_volumeData, D_projData, dims, pfAngles);
-
-	ok &= copyVolumeFromDevice(pfVolume, D_volumeData, dims, dims.iVolX);
-
-
-	cudaFree(D_volumeData.ptr);
-	cudaFree(D_projData.ptr);
-
-	return ok;
-
-}
-
-bool astraCudaPar3DBP(float* pfVolume, const float* pfProjections,
-                      unsigned int iVolX,
-                      unsigned int iVolY,
-                      unsigned int iVolZ,
-                      unsigned int iProjAngles,
-                      unsigned int iProjU,
-                      unsigned int iProjV,
-                      float fDetUSize,
-                      float fDetVSize,
-                      const float *pfAngles,
-                      int iGPUIndex, int iVoxelSuperSampling)
-{
-	if (iVolX == 0 || iVolY == 0 || iVolZ == 0)
-		return false;
-	if (iProjAngles == 0 || iProjU == 0 || iProjV == 0 || pfAngles == 0)
-		return false;
-
-	SPar3DProjection* p = genPar3DProjections(iProjAngles,
-                                             iProjU, iProjV,
-                                             fDetUSize, fDetVSize,
-                                             pfAngles);
-
-	bool ok;
-	ok = astraCudaPar3DBP(pfVolume, pfProjections, iVolX, iVolY, iVolZ,
-	                      iProjAngles, iProjU, iProjV, p, iGPUIndex, iVoxelSuperSampling);
-
-	delete[] p;
-
-	return ok;
-}
-
-
-bool astraCudaPar3DBP(float* pfVolume, const float* pfProjections,
-                      unsigned int iVolX,
-                      unsigned int iVolY,
-                      unsigned int iVolZ,
-                      unsigned int iProjAngles,
-                      unsigned int iProjU,
-                      unsigned int iProjV,
-                      const SPar3DProjection *pfAngles,
-                      int iGPUIndex, int iVoxelSuperSampling)
-{
-	SDimensions3D dims;
+	dims.iRaysPerVoxelDim = iVoxelSuperSampling;
 
-	dims.iVolX = iVolX;
-	dims.iVolY = iVolY;
-	dims.iVolZ = iVolZ;
-	if (iVolX == 0 || iVolY == 0 || iVolZ == 0)
-		return false;
+	SPar3DProjection* pParProjs;
+	SConeProjection* pConeProjs;
 
-	dims.iProjAngles = iProjAngles;
-	dims.iProjU = iProjU;
-	dims.iProjV = iProjV;
+	float outputScale;
 
-	if (iProjAngles == 0 || iProjU == 0 || iProjV == 0 || pfAngles == 0)
-		return false;
+	ok = convertAstraGeometry(pVolGeom, pProjGeom,
+	                          pParProjs, pConeProjs,
+	                          outputScale);
 
-	dims.iRaysPerVoxelDim = iVoxelSuperSampling;
+	// TODO: OutputScale
 
 	if (iGPUIndex != -1) {
 		cudaSetDevice(iGPUIndex);
@@ -1510,7 +1367,7 @@ bool astraCudaPar3DBP(float* pfVolume, const float* pfProjections,
 
 
 	cudaPitchedPtr D_volumeData = allocateVolumeData(dims);
-	bool ok = D_volumeData.ptr;
+	ok = D_volumeData.ptr;
 	if (!ok)
 		return false;
 
@@ -1532,7 +1389,10 @@ bool astraCudaPar3DBP(float* pfVolume, const float* pfProjections,
 		return false;
 	}
 
-	ok &= Par3DBP(D_volumeData, D_projData, dims, pfAngles);
+	if (pParProjs)
+		ok &= Par3DBP(D_volumeData, D_projData, dims, pParProjs);
+	else
+		ok &= ConeBP(D_volumeData, D_projData, dims, pConeProjs);
 
 	ok &= copyVolumeFromDevice(pfVolume, D_volumeData, dims, dims.iVolX);
 
diff --git a/cuda/3d/astra3d.h b/cuda/3d/astra3d.h
index b2e4e08..5464d2f 100644
--- a/cuda/3d/astra3d.h
+++ b/cuda/3d/astra3d.h
@@ -332,50 +332,9 @@ _AstraExport bool astraCudaPar3DFP(const float* pfVolume, float* pfProjections,
                       Cuda3DProjectionKernel projKernel);
 
 
-_AstraExport bool astraCudaConeBP(float* pfVolume, const float* pfProjections,
-                     unsigned int iVolX,
-                     unsigned int iVolY,
-                     unsigned int iVolZ,
-                     unsigned int iProjAngles,
-                     unsigned int iProjU,
-                     unsigned int iProjV,
-                     float fOriginSourceDistance,
-                     float fOriginDetectorDistance,
-                     float fDetUSize,
-                     float fDetVSize,
-                     const float *pfAngles,
-                     int iGPUIndex, int iVoxelSuperSampling);
-
-_AstraExport bool astraCudaConeBP(float* pfVolume, const float* pfProjections,
-                     unsigned int iVolX,
-                     unsigned int iVolY,
-                     unsigned int iVolZ,
-                     unsigned int iProjAngles,
-                     unsigned int iProjU,
-                     unsigned int iProjV,
-                     const SConeProjection *pfAngles,
-                     int iGPUIndex, int iVoxelSuperSampling);
-
-_AstraExport bool astraCudaPar3DBP(float* pfVolume, const float* pfProjections,
-                      unsigned int iVolX,
-                      unsigned int iVolY,
-                      unsigned int iVolZ,
-                      unsigned int iProjAngles,
-                      unsigned int iProjU,
-                      unsigned int iProjV,
-                      float fDetUSize,
-                      float fDetVSize,
-                      const float *pfAngles,
-                      int iGPUIndex, int iVoxelSuperSampling);
-
-_AstraExport bool astraCudaPar3DBP(float* pfVolume, const float* pfProjections,
-                      unsigned int iVolX,
-                      unsigned int iVolY,
-                      unsigned int iVolZ,
-                      unsigned int iProjAngles,
-                      unsigned int iProjU,
-                      unsigned int iProjV,
-                      const SPar3DProjection *pfAngles,
+_AstraExport bool astraCudaBP(float* pfVolume, const float* pfProjections,
+                      const CVolumeGeometry3D* pVolGeom,
+                      const CProjectionGeometry3D* pProjGeom,
                       int iGPUIndex, int iVoxelSuperSampling);
 
 _AstraExport bool astraCudaBP_SIRTWeighted(float* pfVolume, const float* pfProjections,
-- 
cgit v1.2.3


From 57ee6b85884b8226b26b7415ef151b4a6e63337c Mon Sep 17 00:00:00 2001
From: Willem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>
Date: Thu, 12 Mar 2015 11:53:40 +0100
Subject: Add flexible volgeom3d support to astraCudaFP

---
 cuda/3d/astra3d.cu | 204 +++++++++--------------------------------------------
 cuda/3d/astra3d.h  |  49 +------------
 2 files changed, 36 insertions(+), 217 deletions(-)

(limited to 'cuda')

diff --git a/cuda/3d/astra3d.cu b/cuda/3d/astra3d.cu
index 97bebf4..b2375f3 100644
--- a/cuda/3d/astra3d.cu
+++ b/cuda/3d/astra3d.cu
@@ -1103,179 +1103,31 @@ float AstraCGLS3d::computeDiffNorm()
 
 
 
-bool astraCudaConeFP(const float* pfVolume, float* pfProjections,
-                     unsigned int iVolX,
-                     unsigned int iVolY,
-                     unsigned int iVolZ,
-                     unsigned int iProjAngles,
-                     unsigned int iProjU,
-                     unsigned int iProjV,
-                     float fOriginSourceDistance,
-                     float fOriginDetectorDistance,
-                     float fDetUSize,
-                     float fDetVSize,
-                     const float *pfAngles,
-                     int iGPUIndex, int iDetectorSuperSampling)
-{
-	if (iVolX == 0 || iVolY == 0 || iVolZ == 0)
-		return false;
-	if (iProjAngles == 0 || iProjU == 0 || iProjV == 0 || pfAngles == 0)
-		return false;
-
-	SConeProjection* p = genConeProjections(iProjAngles,
-                                            iProjU, iProjV,
-                                            fOriginSourceDistance,
-                                            fOriginDetectorDistance,
-                                            fDetUSize, fDetVSize,
-                                            pfAngles);
-
-	bool ok;
-	ok = astraCudaConeFP(pfVolume, pfProjections, iVolX, iVolY, iVolZ,
-	                     iProjAngles, iProjU, iProjV, p, iGPUIndex, iDetectorSuperSampling);
-
-	delete[] p;
-
-	return ok;
-}
-
-bool astraCudaConeFP(const float* pfVolume, float* pfProjections,
-                     unsigned int iVolX,
-                     unsigned int iVolY,
-                     unsigned int iVolZ,
-                     unsigned int iProjAngles,
-                     unsigned int iProjU,
-                     unsigned int iProjV,
-                     const SConeProjection *pfAngles,
-                     int iGPUIndex, int iDetectorSuperSampling)
+bool astraCudaFP(const float* pfVolume, float* pfProjections,
+                 const CVolumeGeometry3D* pVolGeom,
+                 const CProjectionGeometry3D* pProjGeom,
+                 int iGPUIndex, int iDetectorSuperSampling,
+                 Cuda3DProjectionKernel projKernel)
 {
 	SDimensions3D dims;
 
-	dims.iVolX = iVolX;
-	dims.iVolY = iVolY;
-	dims.iVolZ = iVolZ;
-	if (iVolX == 0 || iVolY == 0 || iVolZ == 0)
-		return false;
-
-	dims.iProjAngles = iProjAngles;
-	dims.iProjU = iProjU;
-	dims.iProjV = iProjV;
-
-	if (iProjAngles == 0 || iProjU == 0 || iProjV == 0 || pfAngles == 0)
+	bool ok = convertAstraGeometry_dims(pVolGeom, pProjGeom, dims);
+	if (!ok)
 		return false;
 
 	dims.iRaysPerDetDim = iDetectorSuperSampling;
-
 	if (iDetectorSuperSampling == 0)
 		return false;
 
-	if (iGPUIndex != -1) {
-		cudaSetDevice(iGPUIndex);
-		cudaError_t err = cudaGetLastError();
-
-		// Ignore errors caused by calling cudaSetDevice multiple times
-		if (err != cudaSuccess && err != cudaErrorSetOnActiveProcess)
-			return false;
-	}
-
-	cudaPitchedPtr D_volumeData = allocateVolumeData(dims);
-	bool ok = D_volumeData.ptr;
-	if (!ok)
-		return false;
-
-	cudaPitchedPtr D_projData = allocateProjectionData(dims);
-	ok = D_projData.ptr;
-	if (!ok) {
-		cudaFree(D_volumeData.ptr);
-		return false;
-	}
-
-	ok &= copyVolumeToDevice(pfVolume, D_volumeData, dims, dims.iVolX);
-
-	ok &= zeroProjectionData(D_projData, dims);
-
-	if (!ok) {
-		cudaFree(D_volumeData.ptr);
-		cudaFree(D_projData.ptr);
-		return false;
-	}
-
-	ok &= ConeFP(D_volumeData, D_projData, dims, pfAngles, 1.0f);
-
-	ok &= copyProjectionsFromDevice(pfProjections, D_projData,
-	                                dims, dims.iProjU);
-
-
-	cudaFree(D_volumeData.ptr);
-	cudaFree(D_projData.ptr);
-
-	return ok;
-
-}
-
-bool astraCudaPar3DFP(const float* pfVolume, float* pfProjections,
-                      unsigned int iVolX,
-                      unsigned int iVolY,
-                      unsigned int iVolZ,
-                      unsigned int iProjAngles,
-                      unsigned int iProjU,
-                      unsigned int iProjV,
-                      float fDetUSize,
-                      float fDetVSize,
-                      const float *pfAngles,
-                      int iGPUIndex, int iDetectorSuperSampling,
-                      Cuda3DProjectionKernel projKernel)
-{
-	if (iVolX == 0 || iVolY == 0 || iVolZ == 0)
-		return false;
-	if (iProjAngles == 0 || iProjU == 0 || iProjV == 0 || pfAngles == 0)
-		return false;
-
-	SPar3DProjection* p = genPar3DProjections(iProjAngles,
-                                             iProjU, iProjV,
-                                             fDetUSize, fDetVSize,
-                                             pfAngles);
-
-	bool ok;
-	ok = astraCudaPar3DFP(pfVolume, pfProjections, iVolX, iVolY, iVolZ,
-	                      iProjAngles, iProjU, iProjV, p, iGPUIndex, iDetectorSuperSampling,
-	                      projKernel);
-
-	delete[] p;
-
-	return ok;
-}
-
-
-bool astraCudaPar3DFP(const float* pfVolume, float* pfProjections,
-                      unsigned int iVolX,
-                      unsigned int iVolY,
-                      unsigned int iVolZ,
-                      unsigned int iProjAngles,
-                      unsigned int iProjU,
-                      unsigned int iProjV,
-                      const SPar3DProjection *pfAngles,
-                      int iGPUIndex, int iDetectorSuperSampling,
-                      Cuda3DProjectionKernel projKernel)
-{
-	SDimensions3D dims;
-
-	dims.iVolX = iVolX;
-	dims.iVolY = iVolY;
-	dims.iVolZ = iVolZ;
-	if (iVolX == 0 || iVolY == 0 || iVolZ == 0)
-		return false;
-
-	dims.iProjAngles = iProjAngles;
-	dims.iProjU = iProjU;
-	dims.iProjV = iProjV;
+	SPar3DProjection* pParProjs;
+	SConeProjection* pConeProjs;
 
-	if (iProjAngles == 0 || iProjU == 0 || iProjV == 0 || pfAngles == 0)
-		return false;
+	float outputScale;
 
-	dims.iRaysPerDetDim = iDetectorSuperSampling;
+	ok = convertAstraGeometry(pVolGeom, pProjGeom,
+	                          pParProjs, pConeProjs,
+	                          outputScale);
 
-	if (iDetectorSuperSampling == 0)
-		return false;
 
 	if (iGPUIndex != -1) {
 		cudaSetDevice(iGPUIndex);
@@ -1288,7 +1140,7 @@ bool astraCudaPar3DFP(const float* pfVolume, float* pfProjections,
 
 
 	cudaPitchedPtr D_volumeData = allocateVolumeData(dims);
-	bool ok = D_volumeData.ptr;
+	ok = D_volumeData.ptr;
 	if (!ok)
 		return false;
 
@@ -1309,15 +1161,25 @@ bool astraCudaPar3DFP(const float* pfVolume, float* pfProjections,
 		return false;
 	}
 
-	switch (projKernel) {
-	case ker3d_default:
-		ok &= Par3DFP(D_volumeData, D_projData, dims, pfAngles, 1.0f);
-		break;
-	case ker3d_sum_square_weights:
-		ok &= Par3DFP_SumSqW(D_volumeData, D_projData, dims, pfAngles, 1.0f);
-		break;
-	default:
-		assert(false);
+	if (pParProjs) {
+		switch (projKernel) {
+		case ker3d_default:
+			ok &= Par3DFP(D_volumeData, D_projData, dims, pParProjs, 1.0f);
+			break;
+		case ker3d_sum_square_weights:
+			ok &= Par3DFP_SumSqW(D_volumeData, D_projData, dims, pParProjs, 1.0f);
+			break;
+		default:
+			assert(false);
+		}
+	} else {
+		switch (projKernel) {
+		case ker3d_default:
+			ok &= ConeFP(D_volumeData, D_projData, dims, pConeProjs, 1.0f);
+			break;
+		default:
+			assert(false);
+		}
 	}
 
 	ok &= copyProjectionsFromDevice(pfProjections, D_projData,
diff --git a/cuda/3d/astra3d.h b/cuda/3d/astra3d.h
index 5464d2f..6c3fcfb 100644
--- a/cuda/3d/astra3d.h
+++ b/cuda/3d/astra3d.h
@@ -282,52 +282,9 @@ protected:
 };
 
 
-
-_AstraExport bool astraCudaConeFP(const float* pfVolume, float* pfProjections,
-                     unsigned int iVolX,
-                     unsigned int iVolY,
-                     unsigned int iVolZ,
-                     unsigned int iProjAngles,
-                     unsigned int iProjU,
-                     unsigned int iProjV,
-                     float fOriginSourceDistance,
-                     float fOriginDetectorDistance,
-                     float fDetUSize,
-                     float fDetVSize,
-                     const float *pfAngles,
-                     int iGPUIndex, int iDetectorSuperSampling);
-
-_AstraExport bool astraCudaConeFP(const float* pfVolume, float* pfProjections,
-                     unsigned int iVolX,
-                     unsigned int iVolY,
-                     unsigned int iVolZ,
-                     unsigned int iProjAngles,
-                     unsigned int iProjU,
-                     unsigned int iProjV,
-                     const SConeProjection *pfAngles,
-                     int iGPUIndex, int iDetectorSuperSampling);
-
-_AstraExport bool astraCudaPar3DFP(const float* pfVolume, float* pfProjections,
-                      unsigned int iVolX,
-                      unsigned int iVolY,
-                      unsigned int iVolZ,
-                      unsigned int iProjAngles,
-                      unsigned int iProjU,
-                      unsigned int iProjV,
-                      float fDetUSize,
-                      float fDetVSize,
-                      const float *pfAngles,
-                      int iGPUIndex, int iDetectorSuperSampling,
-                      Cuda3DProjectionKernel projKernel);
-
-_AstraExport bool astraCudaPar3DFP(const float* pfVolume, float* pfProjections,
-                      unsigned int iVolX,
-                      unsigned int iVolY,
-                      unsigned int iVolZ,
-                      unsigned int iProjAngles,
-                      unsigned int iProjU,
-                      unsigned int iProjV,
-                      const SPar3DProjection *pfAngles,
+_AstraExport bool astraCudaFP(const float* pfVolume, float* pfProjections,
+                      const CVolumeGeometry3D* pVolGeom,
+                      const CProjectionGeometry3D* pProjGeom,
                       int iGPUIndex, int iDetectorSuperSampling,
                       Cuda3DProjectionKernel projKernel);
 
-- 
cgit v1.2.3


From a70ad8df8fc2a3da63fc91dd18bbfd55be7a89dd Mon Sep 17 00:00:00 2001
From: Willem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>
Date: Thu, 12 Mar 2015 12:30:47 +0100
Subject: Add outputScale argument to 3D CUDA BP

---
 cuda/3d/algo3d.cu   |  7 ++++---
 cuda/3d/algo3d.h    |  3 ++-
 cuda/3d/astra3d.cu  | 12 ++++++------
 cuda/3d/cgls3d.cu   |  4 ++--
 cuda/3d/cone_bp.cu  | 24 +++++++++++++++---------
 cuda/3d/cone_bp.h   |  7 ++++---
 cuda/3d/par3d_bp.cu | 23 ++++++++++++++---------
 cuda/3d/par3d_bp.h  |  6 ++++--
 cuda/3d/sirt3d.cu   |  6 +++---
 9 files changed, 54 insertions(+), 38 deletions(-)

(limited to 'cuda')

diff --git a/cuda/3d/algo3d.cu b/cuda/3d/algo3d.cu
index 7f61280..b775438 100644
--- a/cuda/3d/algo3d.cu
+++ b/cuda/3d/algo3d.cu
@@ -94,12 +94,13 @@ bool ReconAlgo3D::callFP(cudaPitchedPtr& D_volumeData,
 }
 
 bool ReconAlgo3D::callBP(cudaPitchedPtr& D_volumeData,
-                       cudaPitchedPtr& D_projData)
+                       cudaPitchedPtr& D_projData,
+                       float outputScale)
 {
 	if (coneProjs) {
-		return ConeBP(D_volumeData, D_projData, dims, coneProjs);
+		return ConeBP(D_volumeData, D_projData, dims, coneProjs, outputScale);
 	} else {
-		return Par3DBP(D_volumeData, D_projData, dims, par3DProjs);
+		return Par3DBP(D_volumeData, D_projData, dims, par3DProjs, outputScale);
 	}
 }
 
diff --git a/cuda/3d/algo3d.h b/cuda/3d/algo3d.h
index f4c6a87..35ffc49 100644
--- a/cuda/3d/algo3d.h
+++ b/cuda/3d/algo3d.h
@@ -51,7 +51,8 @@ protected:
 	            cudaPitchedPtr& D_projData, 
 	            float outputScale);
 	bool callBP(cudaPitchedPtr& D_volumeData, 
-	            cudaPitchedPtr& D_projData);
+	            cudaPitchedPtr& D_projData,
+	            float outputScale);
 
 	SDimensions3D dims;
 	SConeProjection* coneProjs;
diff --git a/cuda/3d/astra3d.cu b/cuda/3d/astra3d.cu
index b2375f3..7589416 100644
--- a/cuda/3d/astra3d.cu
+++ b/cuda/3d/astra3d.cu
@@ -1252,9 +1252,9 @@ bool astraCudaBP(float* pfVolume, const float* pfProjections,
 	}
 
 	if (pParProjs)
-		ok &= Par3DBP(D_volumeData, D_projData, dims, pParProjs);
+		ok &= Par3DBP(D_volumeData, D_projData, dims, pParProjs, 1.0f);
 	else
-		ok &= ConeBP(D_volumeData, D_projData, dims, pConeProjs);
+		ok &= ConeBP(D_volumeData, D_projData, dims, pConeProjs, 1.0f);
 
 	ok &= copyVolumeFromDevice(pfVolume, D_volumeData, dims, dims.iVolX);
 
@@ -1330,9 +1330,9 @@ bool astraCudaBP_SIRTWeighted(float* pfVolume,
 	processSino3D<opSet>(D_projData, 1.0f, dims);
 
 	if (pParProjs)
-		ok &= Par3DBP(D_pixelWeight, D_projData, dims, pParProjs);
+		ok &= Par3DBP(D_pixelWeight, D_projData, dims, pParProjs, 1.0f);
 	else
-		ok &= ConeBP(D_pixelWeight, D_projData, dims, pConeProjs);
+		ok &= ConeBP(D_pixelWeight, D_projData, dims, pConeProjs, 1.0f);
 
 	processVol3D<opInvert>(D_pixelWeight, dims);
 	if (!ok) {
@@ -1347,9 +1347,9 @@ bool astraCudaBP_SIRTWeighted(float* pfVolume,
 	ok &= zeroVolumeData(D_volumeData, dims);
 	// Do BP into D_volumeData
 	if (pParProjs)
-		ok &= Par3DBP(D_volumeData, D_projData, dims, pParProjs);
+		ok &= Par3DBP(D_volumeData, D_projData, dims, pParProjs, 1.0f);
 	else
-		ok &= ConeBP(D_volumeData, D_projData, dims, pConeProjs);
+		ok &= ConeBP(D_volumeData, D_projData, dims, pConeProjs, 1.0f);
 
 	// Multiply with weights
 	processVol3D<opMul>(D_volumeData, D_pixelWeight, dims);
diff --git a/cuda/3d/cgls3d.cu b/cuda/3d/cgls3d.cu
index 5071a9b..4f632f3 100644
--- a/cuda/3d/cgls3d.cu
+++ b/cuda/3d/cgls3d.cu
@@ -165,7 +165,7 @@ bool CGLS::iterate(unsigned int iterations)
 
 		// p = A'*r
 		zeroVolumeData(D_p, dims);
-		callBP(D_p, D_r);
+		callBP(D_p, D_r, 1.0f);
 		if (useVolumeMask)
 			processVol3D<opMul>(D_p, D_maskData, dims);
 
@@ -195,7 +195,7 @@ bool CGLS::iterate(unsigned int iterations)
 
 		// z = A'*r
 		zeroVolumeData(D_z, dims);
-		callBP(D_z, D_r);
+		callBP(D_z, D_r, 1.0f);
 		if (useVolumeMask)
 			processVol3D<opMul>(D_z, D_maskData, dims);
 
diff --git a/cuda/3d/cone_bp.cu b/cuda/3d/cone_bp.cu
index 5648d6f..5e67980 100644
--- a/cuda/3d/cone_bp.cu
+++ b/cuda/3d/cone_bp.cu
@@ -78,7 +78,8 @@ bool bindProjDataTexture(const cudaArray* array)
 
 //__launch_bounds__(32*16, 4)
 __global__ void dev_cone_BP(void* D_volData, unsigned int volPitch, int startAngle,
-                            int angleOffset, const astraCUDA3d::SDimensions3D dims)
+                            int angleOffset, const astraCUDA3d::SDimensions3D dims,
+                            float fOutputScale)
 {
 	float* volData = (float*)D_volData;
 
@@ -147,13 +148,13 @@ __global__ void dev_cone_BP(void* D_volData, unsigned int volPitch, int startAng
 		endZ = dims.iVolZ - startZ;
 
 	for(int i=0; i < endZ; i++)
-		volData[((startZ+i)*dims.iVolY+Y)*volPitch+X] += Z[i];
+		volData[((startZ+i)*dims.iVolY+Y)*volPitch+X] += Z[i] * fOutputScale;
 } //End kernel
 
 
 
 // supersampling version
-__global__ void dev_cone_BP_SS(void* D_volData, unsigned int volPitch, int startAngle, int angleOffset, const SDimensions3D dims)
+__global__ void dev_cone_BP_SS(void* D_volData, unsigned int volPitch, int startAngle, int angleOffset, const SDimensions3D dims, float fOutputScale)
 {
 	float* volData = (float*)D_volData;
 
@@ -189,6 +190,9 @@ __global__ void dev_cone_BP_SS(void* D_volData, unsigned int volPitch, int start
 	float fZ = startZ - 0.5f*dims.iVolZ + 0.5f - 0.5f + 0.5f/dims.iRaysPerVoxelDim;
 	const float fSubStep = 1.0f/dims.iRaysPerVoxelDim;
 
+	fOutputScale /= (dims.iRaysPerVoxelDim*dims.iRaysPerVoxelDim*dims.iRaysPerVoxelDim);
+
+
 	for (int Z = startZ; Z < endZ; ++Z, fZ += 1.0f)
 	{
 
@@ -236,14 +240,15 @@ __global__ void dev_cone_BP_SS(void* D_volData, unsigned int volPitch, int start
 
 		}
 
-		volData[(Z*dims.iVolY+Y)*volPitch+X] += fVal / (dims.iRaysPerVoxelDim*dims.iRaysPerVoxelDim*dims.iRaysPerVoxelDim);
+		volData[(Z*dims.iVolY+Y)*volPitch+X] += fVal * fOutputScale;
 	}
 }
 
 
 bool ConeBP_Array(cudaPitchedPtr D_volumeData,
                   cudaArray *D_projArray,
-                  const SDimensions3D& dims, const SConeProjection* angles)
+                  const SDimensions3D& dims, const SConeProjection* angles,
+                  float fOutputScale)
 {
 	bindProjDataTexture(D_projArray);
 
@@ -291,9 +296,9 @@ bool ConeBP_Array(cudaPitchedPtr D_volumeData,
 		for (unsigned int i = 0; i < angleCount; i += g_anglesPerBlock) {
 		// printf("Calling BP: %d, %dx%d, %dx%d to %p\n", i, dimBlock.x, dimBlock.y, dimGrid.x, dimGrid.y, (void*)D_volumeData.ptr); 
 			if (dims.iRaysPerVoxelDim == 1)
-				dev_cone_BP<<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims);
+				dev_cone_BP<<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, fOutputScale);
 			else
-				dev_cone_BP_SS<<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims);
+				dev_cone_BP_SS<<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, fOutputScale);
 		}
 
 		cudaTextForceKernelsCompletion();
@@ -309,14 +314,15 @@ bool ConeBP_Array(cudaPitchedPtr D_volumeData,
 
 bool ConeBP(cudaPitchedPtr D_volumeData,
             cudaPitchedPtr D_projData,
-            const SDimensions3D& dims, const SConeProjection* angles)
+            const SDimensions3D& dims, const SConeProjection* angles,
+            float fOutputScale)
 {
 	// transfer projections to array
 
 	cudaArray* cuArray = allocateProjectionArray(dims);
 	transferProjectionsToArray(D_projData, cuArray, dims);
 
-	bool ret = ConeBP_Array(D_volumeData, cuArray, dims, angles);
+	bool ret = ConeBP_Array(D_volumeData, cuArray, dims, angles, fOutputScale);
 
 	cudaFreeArray(cuArray);
 
diff --git a/cuda/3d/cone_bp.h b/cuda/3d/cone_bp.h
index cba6d9f..4d3d2dd 100644
--- a/cuda/3d/cone_bp.h
+++ b/cuda/3d/cone_bp.h
@@ -33,13 +33,14 @@ namespace astraCUDA3d {
 
 _AstraExport bool ConeBP_Array(cudaPitchedPtr D_volumeData,
                   cudaArray *D_projArray,
-                  const SDimensions3D& dims, const SConeProjection* angles);
+                  const SDimensions3D& dims, const SConeProjection* angles,
+                  float fOutputScale);
 
 _AstraExport bool ConeBP(cudaPitchedPtr D_volumeData,
             cudaPitchedPtr D_projData,
-            const SDimensions3D& dims, const SConeProjection* angles);
+            const SDimensions3D& dims, const SConeProjection* angles,
+            float fOutputScale);
          
-
 }
 
 #endif
diff --git a/cuda/3d/par3d_bp.cu b/cuda/3d/par3d_bp.cu
index 0c33280..1217949 100644
--- a/cuda/3d/par3d_bp.cu
+++ b/cuda/3d/par3d_bp.cu
@@ -77,7 +77,7 @@ static bool bindProjDataTexture(const cudaArray* array)
 }
 
 
-__global__ void dev_par3D_BP(void* D_volData, unsigned int volPitch, int startAngle, int angleOffset, const SDimensions3D dims)
+__global__ void dev_par3D_BP(void* D_volData, unsigned int volPitch, int startAngle, int angleOffset, const SDimensions3D dims, float fOutputScale)
 {
 	float* volData = (float*)D_volData;
 
@@ -139,11 +139,11 @@ __global__ void dev_par3D_BP(void* D_volData, unsigned int volPitch, int startAn
 		endZ = dims.iVolZ - startZ;
 
 	for(int i=0; i < endZ; i++)
-		volData[((startZ+i)*dims.iVolY+Y)*volPitch+X] += Z[i];
+		volData[((startZ+i)*dims.iVolY+Y)*volPitch+X] += Z[i] * fOutputScale;
 }
 
 // supersampling version
-__global__ void dev_par3D_BP_SS(void* D_volData, unsigned int volPitch, int startAngle, int angleOffset, const SDimensions3D dims)
+__global__ void dev_par3D_BP_SS(void* D_volData, unsigned int volPitch, int startAngle, int angleOffset, const SDimensions3D dims, float fOutputScale)
 {
 	float* volData = (float*)D_volData;
 
@@ -180,6 +180,9 @@ __global__ void dev_par3D_BP_SS(void* D_volData, unsigned int volPitch, int star
 
 	const float fSubStep = 1.0f/dims.iRaysPerVoxelDim;
 
+	fOutputScale /= (dims.iRaysPerVoxelDim*dims.iRaysPerVoxelDim*dims.iRaysPerVoxelDim);
+
+
 	for (int Z = startZ; Z < endZ; ++Z, fZ += 1.0f)
 	{
 
@@ -217,14 +220,15 @@ __global__ void dev_par3D_BP_SS(void* D_volData, unsigned int volPitch, int star
 
 		}
 
-		volData[(Z*dims.iVolY+Y)*volPitch+X] += fVal / (dims.iRaysPerVoxelDim*dims.iRaysPerVoxelDim*dims.iRaysPerVoxelDim);
+		volData[(Z*dims.iVolY+Y)*volPitch+X] += fVal * fOutputScale;
 	}
 
 }
 
 bool Par3DBP_Array(cudaPitchedPtr D_volumeData,
                    cudaArray *D_projArray,
-                   const SDimensions3D& dims, const SPar3DProjection* angles)
+                   const SDimensions3D& dims, const SPar3DProjection* angles,
+                   float fOutputScale)
 {
 	bindProjDataTexture(D_projArray);
 
@@ -271,9 +275,9 @@ bool Par3DBP_Array(cudaPitchedPtr D_volumeData,
 		for (unsigned int i = 0; i < angleCount; i += g_anglesPerBlock) {
 			// printf("Calling BP: %d, %dx%d, %dx%d to %p\n", i, dimBlock.x, dimBlock.y, dimGrid.x, dimGrid.y, (void*)D_volumeData.ptr); 
 			if (dims.iRaysPerVoxelDim == 1)
-				dev_par3D_BP<<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims);
+				dev_par3D_BP<<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, fOutputScale);
 			else
-				dev_par3D_BP_SS<<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims);
+				dev_par3D_BP_SS<<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, fOutputScale);
 		}
 
 		cudaTextForceKernelsCompletion();
@@ -288,14 +292,15 @@ bool Par3DBP_Array(cudaPitchedPtr D_volumeData,
 
 bool Par3DBP(cudaPitchedPtr D_volumeData,
             cudaPitchedPtr D_projData,
-            const SDimensions3D& dims, const SPar3DProjection* angles)
+            const SDimensions3D& dims, const SPar3DProjection* angles,
+            float fOutputScale)
 {
 	// transfer projections to array
 
 	cudaArray* cuArray = allocateProjectionArray(dims);
 	transferProjectionsToArray(D_projData, cuArray, dims);
 
-	bool ret = Par3DBP_Array(D_volumeData, cuArray, dims, angles);
+	bool ret = Par3DBP_Array(D_volumeData, cuArray, dims, angles, fOutputScale);
 
 	cudaFreeArray(cuArray);
 
diff --git a/cuda/3d/par3d_bp.h b/cuda/3d/par3d_bp.h
index ece37d1..f1fc62d 100644
--- a/cuda/3d/par3d_bp.h
+++ b/cuda/3d/par3d_bp.h
@@ -33,11 +33,13 @@ namespace astraCUDA3d {
 
 _AstraExport bool Par3DBP_Array(cudaPitchedPtr D_volumeData,
                    cudaArray *D_projArray,
-                   const SDimensions3D& dims, const SPar3DProjection* angles);
+                   const SDimensions3D& dims, const SPar3DProjection* angles,
+                   float fOutputScale);
 
 _AstraExport bool Par3DBP(cudaPitchedPtr D_volumeData,
              cudaPitchedPtr D_projData,
-             const SDimensions3D& dims, const SPar3DProjection* angles);
+             const SDimensions3D& dims, const SPar3DProjection* angles,
+             float fOutputScale);
          
 
 }
diff --git a/cuda/3d/sirt3d.cu b/cuda/3d/sirt3d.cu
index 389ee6b..0e6630a 100644
--- a/cuda/3d/sirt3d.cu
+++ b/cuda/3d/sirt3d.cu
@@ -160,10 +160,10 @@ bool SIRT::precomputeWeights()
 	zeroVolumeData(D_pixelWeight, dims);
 
 	if (useSinogramMask) {
-		callBP(D_pixelWeight, D_smaskData);
+		callBP(D_pixelWeight, D_smaskData, 1.0f);
 	} else {
 		processSino3D<opSet>(D_projData, 1.0f, dims);
-		callBP(D_pixelWeight, D_projData);
+		callBP(D_pixelWeight, D_projData, 1.0f);
 	}
 #if 0
 	float* bufp = new float[512*512];
@@ -293,7 +293,7 @@ bool SIRT::iterate(unsigned int iterations)
 #endif
 
 
-		callBP(D_tmpData, D_projData);
+		callBP(D_tmpData, D_projData, 1.0f);
 #if 0
 	printf("Dumping tmpData: %p\n", (void*)D_tmpData.ptr);
 	float* buf = new float[256*256];
-- 
cgit v1.2.3


From 42db106d9b66639312d874e4f35e4e9ff7a407d0 Mon Sep 17 00:00:00 2001
From: Willem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>
Date: Thu, 12 Mar 2015 14:03:17 +0100
Subject: Scale CUDA 3D FP/BP output with volume pixel size

---
 cuda/3d/algo3d.cu  | 15 +++++++++------
 cuda/3d/algo3d.h   |  6 ++++--
 cuda/3d/astra3d.cu | 48 +++++++++++++++++++++---------------------------
 cuda/3d/cgls3d.cu  |  2 +-
 cuda/3d/sirt3d.cu  |  2 +-
 5 files changed, 36 insertions(+), 37 deletions(-)

(limited to 'cuda')

diff --git a/cuda/3d/algo3d.cu b/cuda/3d/algo3d.cu
index b775438..cc86b70 100644
--- a/cuda/3d/algo3d.cu
+++ b/cuda/3d/algo3d.cu
@@ -41,6 +41,7 @@ ReconAlgo3D::ReconAlgo3D()
 	coneProjs = 0;
 	par3DProjs = 0;
 	shouldAbort = false;
+	fOutputScale = 1.0f;
 }
 
 ReconAlgo3D::~ReconAlgo3D()
@@ -57,9 +58,10 @@ void ReconAlgo3D::reset()
 	shouldAbort = false;
 }
 
-bool ReconAlgo3D::setConeGeometry(const SDimensions3D& _dims, const SConeProjection* _angles)
+bool ReconAlgo3D::setConeGeometry(const SDimensions3D& _dims, const SConeProjection* _angles, float _outputScale)
 {
 	dims = _dims;
+	fOutputScale = _outputScale;
 
 	coneProjs = new SConeProjection[dims.iProjAngles];
 	par3DProjs = 0;
@@ -69,9 +71,10 @@ bool ReconAlgo3D::setConeGeometry(const SDimensions3D& _dims, const SConeProject
 	return true;
 }
 
-bool ReconAlgo3D::setPar3DGeometry(const SDimensions3D& _dims, const SPar3DProjection* _angles)
+bool ReconAlgo3D::setPar3DGeometry(const SDimensions3D& _dims, const SPar3DProjection* _angles, float _outputScale)
 {
 	dims = _dims;
+	fOutputScale = _outputScale;
 
 	par3DProjs = new SPar3DProjection[dims.iProjAngles];
 	coneProjs = 0;
@@ -87,9 +90,9 @@ bool ReconAlgo3D::callFP(cudaPitchedPtr& D_volumeData,
                        float outputScale)
 {
 	if (coneProjs) {
-		return ConeFP(D_volumeData, D_projData, dims, coneProjs, outputScale);
+		return ConeFP(D_volumeData, D_projData, dims, coneProjs, outputScale * this->fOutputScale);
 	} else {
-		return Par3DFP(D_volumeData, D_projData, dims, par3DProjs, outputScale);
+		return Par3DFP(D_volumeData, D_projData, dims, par3DProjs, outputScale * this->fOutputScale);
 	}
 }
 
@@ -98,9 +101,9 @@ bool ReconAlgo3D::callBP(cudaPitchedPtr& D_volumeData,
                        float outputScale)
 {
 	if (coneProjs) {
-		return ConeBP(D_volumeData, D_projData, dims, coneProjs, outputScale);
+		return ConeBP(D_volumeData, D_projData, dims, coneProjs, outputScale * this->fOutputScale);
 	} else {
-		return Par3DBP(D_volumeData, D_projData, dims, par3DProjs, outputScale);
+		return Par3DBP(D_volumeData, D_projData, dims, par3DProjs, outputScale * this->fOutputScale);
 	}
 }
 
diff --git a/cuda/3d/algo3d.h b/cuda/3d/algo3d.h
index 35ffc49..886b092 100644
--- a/cuda/3d/algo3d.h
+++ b/cuda/3d/algo3d.h
@@ -39,8 +39,8 @@ public:
 	ReconAlgo3D();
 	~ReconAlgo3D();
 
-	bool setConeGeometry(const SDimensions3D& dims, const SConeProjection* projs);
-	bool setPar3DGeometry(const SDimensions3D& dims, const SPar3DProjection* projs);
+	bool setConeGeometry(const SDimensions3D& dims, const SConeProjection* projs, float fOutputScale);
+	bool setPar3DGeometry(const SDimensions3D& dims, const SPar3DProjection* projs, float fOutputScale);
 
 	void signalAbort() { shouldAbort = true; }
 
@@ -58,6 +58,8 @@ protected:
 	SConeProjection* coneProjs;
 	SPar3DProjection* par3DProjs;
 
+	float fOutputScale;
+
 	volatile bool shouldAbort;
 
 };
diff --git a/cuda/3d/astra3d.cu b/cuda/3d/astra3d.cu
index 7589416..ae79efb 100644
--- a/cuda/3d/astra3d.cu
+++ b/cuda/3d/astra3d.cu
@@ -353,7 +353,7 @@ public:
 	SConeProjection* projs;
 	SPar3DProjection* parprojs;
 
-	float fPixelSize;
+	float fOutputScale;
 
 	bool initialized;
 	bool setStartReconstruction;
@@ -390,6 +390,8 @@ AstraSIRT3d::AstraSIRT3d()
 	pData->dims.iRaysPerVoxelDim = 1;
 
 	pData->projs = 0;
+	pData->parprojs = 0;
+	pData->fOutputScale = 1.0f;
 
 	pData->initialized = false;
 	pData->setStartReconstruction = false;
@@ -435,11 +437,10 @@ bool AstraSIRT3d::setGeometry(const CVolumeGeometry3D* pVolGeom,
 
 	pData->projs = 0;
 	pData->parprojs = 0;
-	float outputScale;
 
 	ok = convertAstraGeometry(pVolGeom, pProjGeom,
 	                          pData->parprojs, pData->projs,
-	                          outputScale);
+	                          pData->fOutputScale);
 	if (!ok)
 		return false;
 
@@ -451,8 +452,6 @@ bool AstraSIRT3d::setGeometry(const CVolumeGeometry3D* pVolGeom,
 		pData->projType = PROJ_PARALLEL;
 	}
 
-	// TODO: Handle outputScale
-
 	return true;
 }
 
@@ -519,9 +518,9 @@ bool AstraSIRT3d::init()
 	bool ok;
 
 	if (pData->projType == PROJ_PARALLEL) {
-		ok = pData->sirt.setPar3DGeometry(pData->dims, pData->parprojs);
+		ok = pData->sirt.setPar3DGeometry(pData->dims, pData->parprojs, pData->fOutputScale);
 	} else {
-		ok = pData->sirt.setConeGeometry(pData->dims, pData->projs);
+		ok = pData->sirt.setConeGeometry(pData->dims, pData->projs, pData->fOutputScale);
 	}
 
 	if (!ok)
@@ -733,7 +732,7 @@ public:
 	SConeProjection* projs;
 	SPar3DProjection* parprojs;
 
-	float fPixelSize;
+	float fOutputScale;
 
 	bool initialized;
 	bool setStartReconstruction;
@@ -770,6 +769,8 @@ AstraCGLS3d::AstraCGLS3d()
 	pData->dims.iRaysPerVoxelDim = 1;
 
 	pData->projs = 0;
+	pData->parprojs = 0;
+	pData->fOutputScale = 1.0f;
 
 	pData->initialized = false;
 	pData->setStartReconstruction = false;
@@ -815,11 +816,10 @@ bool AstraCGLS3d::setGeometry(const CVolumeGeometry3D* pVolGeom,
 
 	pData->projs = 0;
 	pData->parprojs = 0;
-	float outputScale;
 
 	ok = convertAstraGeometry(pVolGeom, pProjGeom,
 	                          pData->parprojs, pData->projs,
-	                          outputScale);
+	                          pData->fOutputScale);
 	if (!ok)
 		return false;
 
@@ -831,8 +831,6 @@ bool AstraCGLS3d::setGeometry(const CVolumeGeometry3D* pVolGeom,
 		pData->projType = PROJ_PARALLEL;
 	}
 
-	// TODO: Handle outputScale
-
 	return true;
 }
 
@@ -900,9 +898,9 @@ bool AstraCGLS3d::init()
 	bool ok;
 
 	if (pData->projType == PROJ_PARALLEL) {
-		ok = pData->cgls.setPar3DGeometry(pData->dims, pData->parprojs);
+		ok = pData->cgls.setPar3DGeometry(pData->dims, pData->parprojs, pData->fOutputScale);
 	} else {
-		ok = pData->cgls.setConeGeometry(pData->dims, pData->projs);
+		ok = pData->cgls.setConeGeometry(pData->dims, pData->projs, pData->fOutputScale);
 	}
 
 	if (!ok)
@@ -1164,10 +1162,10 @@ bool astraCudaFP(const float* pfVolume, float* pfProjections,
 	if (pParProjs) {
 		switch (projKernel) {
 		case ker3d_default:
-			ok &= Par3DFP(D_volumeData, D_projData, dims, pParProjs, 1.0f);
+			ok &= Par3DFP(D_volumeData, D_projData, dims, pParProjs, outputScale);
 			break;
 		case ker3d_sum_square_weights:
-			ok &= Par3DFP_SumSqW(D_volumeData, D_projData, dims, pParProjs, 1.0f);
+			ok &= Par3DFP_SumSqW(D_volumeData, D_projData, dims, pParProjs, outputScale*outputScale);
 			break;
 		default:
 			assert(false);
@@ -1175,7 +1173,7 @@ bool astraCudaFP(const float* pfVolume, float* pfProjections,
 	} else {
 		switch (projKernel) {
 		case ker3d_default:
-			ok &= ConeFP(D_volumeData, D_projData, dims, pConeProjs, 1.0f);
+			ok &= ConeFP(D_volumeData, D_projData, dims, pConeProjs, outputScale);
 			break;
 		default:
 			assert(false);
@@ -1216,8 +1214,6 @@ bool astraCudaBP(float* pfVolume, const float* pfProjections,
 	                          pParProjs, pConeProjs,
 	                          outputScale);
 
-	// TODO: OutputScale
-
 	if (iGPUIndex != -1) {
 		cudaSetDevice(iGPUIndex);
 		cudaError_t err = cudaGetLastError();
@@ -1252,9 +1248,9 @@ bool astraCudaBP(float* pfVolume, const float* pfProjections,
 	}
 
 	if (pParProjs)
-		ok &= Par3DBP(D_volumeData, D_projData, dims, pParProjs, 1.0f);
+		ok &= Par3DBP(D_volumeData, D_projData, dims, pParProjs, outputScale);
 	else
-		ok &= ConeBP(D_volumeData, D_projData, dims, pConeProjs, 1.0f);
+		ok &= ConeBP(D_volumeData, D_projData, dims, pConeProjs, outputScale);
 
 	ok &= copyVolumeFromDevice(pfVolume, D_volumeData, dims, dims.iVolX);
 
@@ -1293,8 +1289,6 @@ bool astraCudaBP_SIRTWeighted(float* pfVolume,
 	                          pParProjs, pConeProjs,
 	                          outputScale);
 
-	// TODO: OutputScale
-
 	if (iGPUIndex != -1) {
 		cudaSetDevice(iGPUIndex);
 		cudaError_t err = cudaGetLastError();
@@ -1330,9 +1324,9 @@ bool astraCudaBP_SIRTWeighted(float* pfVolume,
 	processSino3D<opSet>(D_projData, 1.0f, dims);
 
 	if (pParProjs)
-		ok &= Par3DBP(D_pixelWeight, D_projData, dims, pParProjs, 1.0f);
+		ok &= Par3DBP(D_pixelWeight, D_projData, dims, pParProjs, outputScale);
 	else
-		ok &= ConeBP(D_pixelWeight, D_projData, dims, pConeProjs, 1.0f);
+		ok &= ConeBP(D_pixelWeight, D_projData, dims, pConeProjs, outputScale);
 
 	processVol3D<opInvert>(D_pixelWeight, dims);
 	if (!ok) {
@@ -1347,9 +1341,9 @@ bool astraCudaBP_SIRTWeighted(float* pfVolume,
 	ok &= zeroVolumeData(D_volumeData, dims);
 	// Do BP into D_volumeData
 	if (pParProjs)
-		ok &= Par3DBP(D_volumeData, D_projData, dims, pParProjs, 1.0f);
+		ok &= Par3DBP(D_volumeData, D_projData, dims, pParProjs, outputScale);
 	else
-		ok &= ConeBP(D_volumeData, D_projData, dims, pConeProjs, 1.0f);
+		ok &= ConeBP(D_volumeData, D_projData, dims, pConeProjs, outputScale);
 
 	// Multiply with weights
 	processVol3D<opMul>(D_volumeData, D_pixelWeight, dims);
diff --git a/cuda/3d/cgls3d.cu b/cuda/3d/cgls3d.cu
index 4f632f3..dd0e8a0 100644
--- a/cuda/3d/cgls3d.cu
+++ b/cuda/3d/cgls3d.cu
@@ -242,7 +242,7 @@ bool doCGLS(cudaPitchedPtr& D_volumeData,
 	CGLS cgls;
 	bool ok = true;
 
-	ok &= cgls.setConeGeometry(dims, angles);
+	ok &= cgls.setConeGeometry(dims, angles, 1.0f);
 	if (D_maskData.ptr)
 		ok &= cgls.enableVolumeMask();
 
diff --git a/cuda/3d/sirt3d.cu b/cuda/3d/sirt3d.cu
index 0e6630a..484521e 100644
--- a/cuda/3d/sirt3d.cu
+++ b/cuda/3d/sirt3d.cu
@@ -347,7 +347,7 @@ bool doSIRT(cudaPitchedPtr& D_volumeData,
 	SIRT sirt;
 	bool ok = true;
 
-	ok &= sirt.setConeGeometry(dims, angles);
+	ok &= sirt.setConeGeometry(dims, angles, 1.0f);
 	if (D_maskData.ptr)
 		ok &= sirt.enableVolumeMask();
 
-- 
cgit v1.2.3


From 1a7dfb0964fa7686aa01f0d836f95910dc4dc07f Mon Sep 17 00:00:00 2001
From: Willem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>
Date: Thu, 12 Mar 2015 14:18:54 +0100
Subject: Adapt standalone test programs to outputscale

---
 cuda/3d/cone_bp.cu  | 2 +-
 cuda/3d/par3d_bp.cu | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'cuda')

diff --git a/cuda/3d/cone_bp.cu b/cuda/3d/cone_bp.cu
index 5e67980..4a41f6a 100644
--- a/cuda/3d/cone_bp.cu
+++ b/cuda/3d/cone_bp.cu
@@ -479,7 +479,7 @@ int main()
 	}
 #endif
 
-	astraCUDA3d::ConeBP(volData, projData, dims, angle);
+	astraCUDA3d::ConeBP(volData, projData, dims, angle, 1.0f);
 #if 0
 	float* buf = new float[256*256];
 
diff --git a/cuda/3d/par3d_bp.cu b/cuda/3d/par3d_bp.cu
index 1217949..cafab46 100644
--- a/cuda/3d/par3d_bp.cu
+++ b/cuda/3d/par3d_bp.cu
@@ -450,7 +450,7 @@ int main()
 		cudaMemcpy3D(&p);
 	}
 
-	astraCUDA3d::Par3DBP(volData, projData, dims, angle);
+	astraCUDA3d::Par3DBP(volData, projData, dims, angle, 1.0f);
 #if 1
 	float* buf = new float[256*256];
 
-- 
cgit v1.2.3


From 9458268a8b9192af98fc1b88bf0a5fbbc7696a77 Mon Sep 17 00:00:00 2001
From: Willem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>
Date: Thu, 12 Mar 2015 14:28:03 +0100
Subject: Add outputScale argument to 2D CUDA BP

---
 cuda/2d/algo.cu   |  7 ++++---
 cuda/2d/algo.h    |  3 ++-
 cuda/2d/astra.cu  | 12 +++++-------
 cuda/2d/cgls.cu   |  4 ++--
 cuda/2d/em.cu     |  4 ++--
 cuda/2d/fan_bp.cu | 47 +++++++++++++++++++++++++++--------------------
 cuda/2d/fan_bp.h  |  9 ++++++---
 cuda/2d/par_bp.cu | 31 +++++++++++++++++--------------
 cuda/2d/par_bp.h  |  4 ++--
 cuda/2d/sart.cu   | 10 +++++-----
 cuda/2d/sart.h    |  2 +-
 cuda/2d/sirt.cu   |  6 +++---
 12 files changed, 76 insertions(+), 63 deletions(-)

(limited to 'cuda')

diff --git a/cuda/2d/algo.cu b/cuda/2d/algo.cu
index 144fabd..dc74e51 100644
--- a/cuda/2d/algo.cu
+++ b/cuda/2d/algo.cu
@@ -336,16 +336,17 @@ bool ReconAlgo::callFP(float* D_volumeData, unsigned int volumePitch,
 }
 
 bool ReconAlgo::callBP(float* D_volumeData, unsigned int volumePitch,
-                       float* D_projData, unsigned int projPitch)
+                       float* D_projData, unsigned int projPitch,
+                       float outputScale)
 {
 	if (angles) {
 		assert(!fanProjs);
 		return BP(D_volumeData, volumePitch, D_projData, projPitch,
-		          dims, angles, TOffsets);
+		          dims, angles, TOffsets, outputScale);
 	} else {
 		assert(fanProjs);
 		return FanBP(D_volumeData, volumePitch, D_projData, projPitch,
-		             dims, fanProjs);
+		             dims, fanProjs, outputScale);
 	}
 
 }
diff --git a/cuda/2d/algo.h b/cuda/2d/algo.h
index a75905e..99959c8 100644
--- a/cuda/2d/algo.h
+++ b/cuda/2d/algo.h
@@ -118,7 +118,8 @@ protected:
 	            float* D_projData, unsigned int projPitch,
 	            float outputScale);
 	bool callBP(float* D_volumeData, unsigned int volumePitch,
-	            float* D_projData, unsigned int projPitch);
+	            float* D_projData, unsigned int projPitch,
+	            float outputScale);
 
 
 	SDimensions dims;
diff --git a/cuda/2d/astra.cu b/cuda/2d/astra.cu
index 0b5be06..5e2a07a 100644
--- a/cuda/2d/astra.cu
+++ b/cuda/2d/astra.cu
@@ -367,21 +367,19 @@ bool AstraFBP::run()
 
 	}
 
+	float fOutputScale = (M_PI / 2.0f) / (float)pData->dims.iProjAngles;
+
 	if (pData->bFanBeam) {
-		ok = FanBP_FBPWeighted(pData->D_volumeData, pData->volumePitch, pData->D_sinoData, pData->sinoPitch, pData->dims, pData->fanProjections);
+		ok = FanBP_FBPWeighted(pData->D_volumeData, pData->volumePitch, pData->D_sinoData, pData->sinoPitch, pData->dims, pData->fanProjections, fOutputScale);
 
 	} else {
-		ok = BP(pData->D_volumeData, pData->volumePitch, pData->D_sinoData, pData->sinoPitch, pData->dims, pData->angles, pData->TOffsets);
+		ok = BP(pData->D_volumeData, pData->volumePitch, pData->D_sinoData, pData->sinoPitch, pData->dims, pData->angles, pData->TOffsets, fOutputScale);
 	}
 	if(!ok)
 	{
 		return false;
 	}
 
-	processVol<opMul>(pData->D_volumeData,
-	                      (M_PI / 2.0f) / (float)pData->dims.iProjAngles,
-	                      pData->volumePitch, pData->dims);
-
 	return true;
 }
 
@@ -593,7 +591,7 @@ bool BPalgo::iterate(unsigned int)
 {
 	// TODO: This zeroVolume makes an earlier memcpy of D_volumeData redundant
 	zeroVolumeData(D_volumeData, volumePitch, dims);
-	callBP(D_volumeData, volumePitch, D_sinoData, sinoPitch);
+	callBP(D_volumeData, volumePitch, D_sinoData, sinoPitch, 1.0f);
 	return true;
 }
 
diff --git a/cuda/2d/cgls.cu b/cuda/2d/cgls.cu
index 9ead563..f402914 100644
--- a/cuda/2d/cgls.cu
+++ b/cuda/2d/cgls.cu
@@ -135,7 +135,7 @@ bool CGLS::iterate(unsigned int iterations)
 
 		// p = A'*r
 		zeroVolumeData(D_p, pPitch, dims);
-		callBP(D_p, pPitch, D_r, rPitch);
+		callBP(D_p, pPitch, D_r, rPitch, 1.0f);
 		if (useVolumeMask)
 			processVol<opMul>(D_p, D_maskData, pPitch, dims);
 
@@ -166,7 +166,7 @@ bool CGLS::iterate(unsigned int iterations)
 
 		// z = A'*r
 		zeroVolumeData(D_z, zPitch, dims);
-		callBP(D_z, zPitch, D_r, rPitch);
+		callBP(D_z, zPitch, D_r, rPitch, 1.0f);
 		if (useVolumeMask)
 			processVol<opMul>(D_z, D_maskData, zPitch, dims);
 
diff --git a/cuda/2d/em.cu b/cuda/2d/em.cu
index 00127c0..8593b08 100644
--- a/cuda/2d/em.cu
+++ b/cuda/2d/em.cu
@@ -102,7 +102,7 @@ bool EM::precomputeWeights()
 #endif
 	{
 		processSino<opSet>(D_projData, 1.0f, projPitch, dims);
-		callBP(D_pixelWeight, pixelPitch, D_projData, projPitch);
+		callBP(D_pixelWeight, pixelPitch, D_projData, projPitch, 1.0f);
 	}
 	processVol<opInvert>(D_pixelWeight, pixelPitch, dims);
 
@@ -137,7 +137,7 @@ bool EM::iterate(unsigned int iterations)
 
 		// Do BP of projData into tmpData
 		zeroVolumeData(D_tmpData, tmpPitch, dims);
-		callBP(D_tmpData, tmpPitch, D_projData, projPitch);
+		callBP(D_tmpData, tmpPitch, D_projData, projPitch, 1.0f);
 
 		// Multiply volumeData with tmpData divided by pixel weights
 		processVol<opMul2>(D_volumeData, D_tmpData, D_pixelWeight, pixelPitch, dims);
diff --git a/cuda/2d/fan_bp.cu b/cuda/2d/fan_bp.cu
index 74e8b12..b4321ba 100644
--- a/cuda/2d/fan_bp.cu
+++ b/cuda/2d/fan_bp.cu
@@ -77,7 +77,7 @@ static bool bindProjDataTexture(float* data, unsigned int pitch, unsigned int wi
 	return true;
 }
 
-__global__ void devFanBP(float* D_volData, unsigned int volPitch, unsigned int startAngle, const SDimensions dims)
+__global__ void devFanBP(float* D_volData, unsigned int volPitch, unsigned int startAngle, const SDimensions dims, float fOutputScale)
 {
 	const int relX = threadIdx.x;
 	const int relY = threadIdx.y;
@@ -121,11 +121,11 @@ __global__ void devFanBP(float* D_volData, unsigned int volPitch, unsigned int s
 		fA += 1.0f;
 	}
 
-	volData[Y*volPitch+X] += fVal;
+	volData[Y*volPitch+X] += fVal * fOutputScale;
 }
 
 // supersampling version
-__global__ void devFanBP_SS(float* D_volData, unsigned int volPitch, unsigned int startAngle, const SDimensions dims)
+__global__ void devFanBP_SS(float* D_volData, unsigned int volPitch, unsigned int startAngle, const SDimensions dims, float fOutputScale)
 {
 	const int relX = threadIdx.x;
 	const int relY = threadIdx.y;
@@ -146,6 +146,8 @@ __global__ void devFanBP_SS(float* D_volData, unsigned int volPitch, unsigned in
 
 	float* volData = (float*)D_volData;
 
+	fOutputScale /= (dims.iRaysPerPixelDim * dims.iRaysPerPixelDim);
+
 	float fVal = 0.0f;
 	float fA = startAngle + 0.5f;
 
@@ -180,14 +182,14 @@ __global__ void devFanBP_SS(float* D_volData, unsigned int volPitch, unsigned in
 		fA += 1.0f;
 	}
 
-	volData[Y*volPitch+X] += fVal / (dims.iRaysPerPixelDim * dims.iRaysPerPixelDim);
+	volData[Y*volPitch+X] += fVal * fOutputScale;
 }
 
 
 // BP specifically for SART.
 // It includes (free) weighting with voxel weight.
 // It assumes the proj texture is set up _without_ padding, unlike regular BP.
-__global__ void devFanBP_SART(float* D_volData, unsigned int volPitch, const SDimensions dims)
+__global__ void devFanBP_SART(float* D_volData, unsigned int volPitch, const SDimensions dims, float fOutputScale)
 {
 	const int relX = threadIdx.x;
 	const int relY = threadIdx.y;
@@ -222,12 +224,12 @@ __global__ void devFanBP_SART(float* D_volData, unsigned int volPitch, const SDi
 	const float fT = fNum / fDen;
 	const float fVal = tex2D(gT_FanProjTexture, fT, 0.5f);
 
-	volData[Y*volPitch+X] += fVal;
+	volData[Y*volPitch+X] += fVal * fOutputScale;
 }
 
 // Weighted BP for use in fan beam FBP
 // Each pixel/ray is weighted by 1/L^2 where L is the distance to the source.
-__global__ void devFanBP_FBPWeighted(float* D_volData, unsigned int volPitch, unsigned int startAngle, const SDimensions dims)
+__global__ void devFanBP_FBPWeighted(float* D_volData, unsigned int volPitch, unsigned int startAngle, const SDimensions dims, float fOutputScale)
 {
 	const int relX = threadIdx.x;
 	const int relY = threadIdx.y;
@@ -273,13 +275,14 @@ __global__ void devFanBP_FBPWeighted(float* D_volData, unsigned int volPitch, un
 		fA += 1.0f;
 	}
 
-	volData[Y*volPitch+X] += fVal;
+	volData[Y*volPitch+X] += fVal * fOutputScale;
 }
 
 
 bool FanBP_internal(float* D_volumeData, unsigned int volumePitch,
            float* D_projData, unsigned int projPitch,
-           const SDimensions& dims, const SFanProjection* angles)
+           const SDimensions& dims, const SFanProjection* angles,
+           float fOutputScale)
 {
 	assert(dims.iProjAngles <= g_MaxAngles);
 
@@ -310,9 +313,9 @@ bool FanBP_internal(float* D_volumeData, unsigned int volumePitch,
 
 	for (unsigned int i = 0; i < dims.iProjAngles; i += g_anglesPerBlock) {
 		if (dims.iRaysPerPixelDim > 1)
-			devFanBP_SS<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, dims);
+			devFanBP_SS<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, dims, fOutputScale);
 		else
-			devFanBP<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, dims);
+			devFanBP<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, dims, fOutputScale);
 	}
 	cudaThreadSynchronize();
 
@@ -325,7 +328,8 @@ bool FanBP_internal(float* D_volumeData, unsigned int volumePitch,
 
 bool FanBP_FBPWeighted_internal(float* D_volumeData, unsigned int volumePitch,
            float* D_projData, unsigned int projPitch,
-           const SDimensions& dims, const SFanProjection* angles)
+           const SDimensions& dims, const SFanProjection* angles,
+           float fOutputScale)
 {
 	assert(dims.iProjAngles <= g_MaxAngles);
 
@@ -355,7 +359,7 @@ bool FanBP_FBPWeighted_internal(float* D_volumeData, unsigned int volumePitch,
 	cudaStreamCreate(&stream);
 
 	for (unsigned int i = 0; i < dims.iProjAngles; i += g_anglesPerBlock) {
-		devFanBP_FBPWeighted<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, dims);
+		devFanBP_FBPWeighted<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, dims, fOutputScale);
 	}
 	cudaThreadSynchronize();
 
@@ -370,7 +374,8 @@ bool FanBP_FBPWeighted_internal(float* D_volumeData, unsigned int volumePitch,
 bool FanBP_SART(float* D_volumeData, unsigned int volumePitch,
                 float* D_projData, unsigned int projPitch,
                 unsigned int angle,
-                const SDimensions& dims, const SFanProjection* angles)
+                const SDimensions& dims, const SFanProjection* angles,
+                float fOutputScale)
 {
 	// only one angle
 	bindProjDataTexture(D_projData, projPitch, dims.iProjDets, 1, cudaAddressModeClamp);
@@ -391,7 +396,7 @@ bool FanBP_SART(float* D_volumeData, unsigned int volumePitch,
 	dim3 dimGrid((dims.iVolWidth+g_blockSlices-1)/g_blockSlices,
 	             (dims.iVolHeight+g_blockSliceSize-1)/g_blockSliceSize);
 
-	devFanBP_SART<<<dimGrid, dimBlock>>>(D_volumeData, volumePitch, dims);
+	devFanBP_SART<<<dimGrid, dimBlock>>>(D_volumeData, volumePitch, dims, fOutputScale);
 	cudaThreadSynchronize();
 
 	cudaTextForceKernelsCompletion();
@@ -401,7 +406,8 @@ bool FanBP_SART(float* D_volumeData, unsigned int volumePitch,
 
 bool FanBP(float* D_volumeData, unsigned int volumePitch,
            float* D_projData, unsigned int projPitch,
-           const SDimensions& dims, const SFanProjection* angles)
+           const SDimensions& dims, const SFanProjection* angles,
+           float fOutputScale)
 {
 	for (unsigned int iAngle = 0; iAngle < dims.iProjAngles; iAngle += g_MaxAngles) {
 		SDimensions subdims = dims;
@@ -413,7 +419,7 @@ bool FanBP(float* D_volumeData, unsigned int volumePitch,
 		bool ret;
 		ret = FanBP_internal(D_volumeData, volumePitch,
 		                  D_projData + iAngle * projPitch, projPitch,
-		                  subdims, angles + iAngle);
+		                  subdims, angles + iAngle, fOutputScale);
 		if (!ret)
 			return false;
 	}
@@ -422,7 +428,8 @@ bool FanBP(float* D_volumeData, unsigned int volumePitch,
 
 bool FanBP_FBPWeighted(float* D_volumeData, unsigned int volumePitch,
            float* D_projData, unsigned int projPitch,
-           const SDimensions& dims, const SFanProjection* angles)
+           const SDimensions& dims, const SFanProjection* angles,
+           float fOutputScale)
 {
 	for (unsigned int iAngle = 0; iAngle < dims.iProjAngles; iAngle += g_MaxAngles) {
 		SDimensions subdims = dims;
@@ -434,7 +441,7 @@ bool FanBP_FBPWeighted(float* D_volumeData, unsigned int volumePitch,
 		bool ret;
 		ret = FanBP_FBPWeighted_internal(D_volumeData, volumePitch,
 		                  D_projData + iAngle * projPitch, projPitch,
-		                  subdims, angles + iAngle);
+		                  subdims, angles + iAngle, fOutputScale);
 
 		if (!ret)
 			return false;
@@ -498,7 +505,7 @@ int main()
 	copyVolumeToDevice(img, dims.iVolWidth, dims.iVolWidth, dims.iVolHeight, D_volumeData, volumePitch);
 	copySinogramToDevice(sino, dims.iProjDets, dims.iProjDets, dims.iProjAngles, D_projData, projPitch);
 
-	FanBP(D_volumeData, volumePitch, D_projData, projPitch, dims, projs);
+	FanBP(D_volumeData, volumePitch, D_projData, projPitch, dims, projs, 1.0f);
 
 	copyVolumeFromDevice(img, dims.iVolWidth, dims.iVolWidth, dims.iVolHeight, D_volumeData, volumePitch);
 
diff --git a/cuda/2d/fan_bp.h b/cuda/2d/fan_bp.h
index e4e69b0..3ebe1e8 100644
--- a/cuda/2d/fan_bp.h
+++ b/cuda/2d/fan_bp.h
@@ -33,16 +33,19 @@ namespace astraCUDA {
 
 _AstraExport bool FanBP(float* D_volumeData, unsigned int volumePitch,
            float* D_projData, unsigned int projPitch,
-           const SDimensions& dims, const SFanProjection* angles);
+           const SDimensions& dims, const SFanProjection* angles,
+           float fOutputScale);
 
 _AstraExport bool FanBP_SART(float* D_volumeData, unsigned int volumePitch,
                 float* D_projData, unsigned int projPitch,
                 unsigned int angle,
-                const SDimensions& dims, const SFanProjection* angles);
+                const SDimensions& dims, const SFanProjection* angles,
+                float fOutputScale);
 
 _AstraExport bool FanBP_FBPWeighted(float* D_volumeData, unsigned int volumePitch,
            float* D_projData, unsigned int projPitch,
-           const SDimensions& dims, const SFanProjection* angles);
+           const SDimensions& dims, const SFanProjection* angles,
+           float fOutputScale);
 
 
 }
diff --git a/cuda/2d/par_bp.cu b/cuda/2d/par_bp.cu
index 635200f..d9f7325 100644
--- a/cuda/2d/par_bp.cu
+++ b/cuda/2d/par_bp.cu
@@ -73,7 +73,7 @@ static bool bindProjDataTexture(float* data, unsigned int pitch, unsigned int wi
 	return true;
 }
 
-__global__ void devBP(float* D_volData, unsigned int volPitch, unsigned int startAngle, bool offsets, const SDimensions dims)
+__global__ void devBP(float* D_volData, unsigned int volPitch, unsigned int startAngle, bool offsets, const SDimensions dims, float fOutputScale)
 {
 	const int relX = threadIdx.x;
 	const int relY = threadIdx.y;
@@ -123,11 +123,11 @@ __global__ void devBP(float* D_volData, unsigned int volPitch, unsigned int star
 
 	}
 
-	volData[Y*volPitch+X] += fVal;
+	volData[Y*volPitch+X] += fVal * fOutputScale;
 }
 
 // supersampling version
-__global__ void devBP_SS(float* D_volData, unsigned int volPitch, unsigned int startAngle, bool offsets, const SDimensions dims)
+__global__ void devBP_SS(float* D_volData, unsigned int volPitch, unsigned int startAngle, bool offsets, const SDimensions dims, float fOutputScale)
 {
 	const int relX = threadIdx.x;
 	const int relY = threadIdx.y;
@@ -152,6 +152,8 @@ __global__ void devBP_SS(float* D_volData, unsigned int volPitch, unsigned int s
 	float fA = startAngle + 0.5f;
 	const float fT_base = 0.5f*dims.iProjDets - 0.5f + 0.5f;
 
+	fOutputScale /= (dims.iRaysPerPixelDim * dims.iRaysPerPixelDim);
+
 	if (offsets) {
 
 		for (int angle = startAngle; angle < endAngle; ++angle)
@@ -196,10 +198,10 @@ __global__ void devBP_SS(float* D_volData, unsigned int volPitch, unsigned int s
 
 	}
 
-	volData[Y*volPitch+X] += fVal / (dims.iRaysPerPixelDim * dims.iRaysPerPixelDim);
+	volData[Y*volPitch+X] += fVal * fOutputScale;
 }
 
-__global__ void devBP_SART(float* D_volData, unsigned int volPitch, float offset, float angle_sin, float angle_cos, const SDimensions dims)
+__global__ void devBP_SART(float* D_volData, unsigned int volPitch, float offset, float angle_sin, float angle_cos, const SDimensions dims, float fOutputScale)
 {
 	const int relX = threadIdx.x;
 	const int relY = threadIdx.y;
@@ -218,13 +220,13 @@ __global__ void devBP_SART(float* D_volData, unsigned int volPitch, float offset
 	const float fT = fT_base + fX * angle_cos - fY * angle_sin + offset;
 	const float fVal = tex2D(gT_projTexture, fT, 0.5f);
 
-	D_volData[Y*volPitch+X] += fVal;
+	D_volData[Y*volPitch+X] += fVal * fOutputScale;
 }
 
 
 bool BP_internal(float* D_volumeData, unsigned int volumePitch,
         float* D_projData, unsigned int projPitch,
-        const SDimensions& dims, const float* angles, const float* TOffsets)
+        const SDimensions& dims, const float* angles, const float* TOffsets, float fOutputScale)
 {
 	// TODO: process angles block by block
 	assert(dims.iProjAngles <= g_MaxAngles);
@@ -261,9 +263,9 @@ bool BP_internal(float* D_volumeData, unsigned int volumePitch,
 	for (unsigned int i = 0; i < dims.iProjAngles; i += g_anglesPerBlock) {
 
 		if (dims.iRaysPerPixelDim > 1)
-			devBP_SS<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, (TOffsets != 0), dims);
+			devBP_SS<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, (TOffsets != 0), dims, fOutputScale);
 		else
-			devBP<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, (TOffsets != 0), dims);
+			devBP<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, (TOffsets != 0), dims, fOutputScale);
 	}
 	cudaThreadSynchronize();
 
@@ -276,7 +278,7 @@ bool BP_internal(float* D_volumeData, unsigned int volumePitch,
 
 bool BP(float* D_volumeData, unsigned int volumePitch,
         float* D_projData, unsigned int projPitch,
-        const SDimensions& dims, const float* angles, const float* TOffsets)
+        const SDimensions& dims, const float* angles, const float* TOffsets, float fOutputScale)
 {
 	for (unsigned int iAngle = 0; iAngle < dims.iProjAngles; iAngle += g_MaxAngles) {
 		SDimensions subdims = dims;
@@ -289,7 +291,8 @@ bool BP(float* D_volumeData, unsigned int volumePitch,
 		ret = BP_internal(D_volumeData, volumePitch,
 		                  D_projData + iAngle * projPitch, projPitch,
 		                  subdims, angles + iAngle,
-		                  TOffsets ? TOffsets + iAngle : 0);
+		                  TOffsets ? TOffsets + iAngle : 0,
+		                  fOutputScale);
 		if (!ret)
 			return false;
 	}
@@ -300,7 +303,7 @@ bool BP(float* D_volumeData, unsigned int volumePitch,
 bool BP_SART(float* D_volumeData, unsigned int volumePitch,
              float* D_projData, unsigned int projPitch,
              unsigned int angle, const SDimensions& dims,
-             const float* angles, const float* TOffsets)
+             const float* angles, const float* TOffsets, float fOutputScale)
 {
 	// Only one angle.
 	// We need to Clamp to the border pixels instead of to zero, because
@@ -318,7 +321,7 @@ bool BP_SART(float* D_volumeData, unsigned int volumePitch,
 	dim3 dimGrid((dims.iVolWidth+g_blockSlices-1)/g_blockSlices,
 	             (dims.iVolHeight+g_blockSliceSize-1)/g_blockSliceSize);
 
-	devBP_SART<<<dimGrid, dimBlock>>>(D_volumeData, volumePitch, offset, angle_sin, angle_cos, dims);
+	devBP_SART<<<dimGrid, dimBlock>>>(D_volumeData, volumePitch, offset, angle_sin, angle_cos, dims, fOutputScale);
 	cudaThreadSynchronize();
 
 	cudaTextForceKernelsCompletion();
@@ -369,7 +372,7 @@ int main()
 	for (unsigned int i = 0; i < dims.iProjAngles; ++i)
 		angle[i] = i*(M_PI/dims.iProjAngles);
 
-	BP(D_volumeData, volumePitch, D_projData, projPitch, dims, angle, 0);
+	BP(D_volumeData, volumePitch, D_projData, projPitch, dims, angle, 0, 1.0f);
 
 	delete[] angle;
 
diff --git a/cuda/2d/par_bp.h b/cuda/2d/par_bp.h
index eaeafd8..64bcd34 100644
--- a/cuda/2d/par_bp.h
+++ b/cuda/2d/par_bp.h
@@ -36,12 +36,12 @@ namespace astraCUDA {
 _AstraExport bool BP(float* D_volumeData, unsigned int volumePitch,
         float* D_projData, unsigned int projPitch,
         const SDimensions& dims, const float* angles,
-        const float* TOffsets);
+        const float* TOffsets, float fOutputScale);
 
 _AstraExport bool BP_SART(float* D_volumeData, unsigned int volumePitch,
              float* D_projData, unsigned int projPitch,
              unsigned int angle, const SDimensions& dims,
-             const float* angles, const float* TOffsets);
+             const float* angles, const float* TOffsets, float fOutputScale);
 
 }
 
diff --git a/cuda/2d/sart.cu b/cuda/2d/sart.cu
index 29670c3..e5cb5bb 100644
--- a/cuda/2d/sart.cu
+++ b/cuda/2d/sart.cu
@@ -200,10 +200,10 @@ bool SART::iterate(unsigned int iterations)
 			// BP, mask, and add back
 			// TODO: Try putting the masking directly in the BP
 			zeroVolumeData(D_tmpData, tmpPitch, dims);
-			callBP_SART(D_tmpData, tmpPitch, D_projData, projPitch, angle);
+			callBP_SART(D_tmpData, tmpPitch, D_projData, projPitch, angle, 1.0f);
 			processVol<opAddMul>(D_volumeData, D_maskData, D_tmpData, volumePitch, dims);
 		} else {
-			callBP_SART(D_volumeData, volumePitch, D_projData, projPitch, angle);
+			callBP_SART(D_volumeData, volumePitch, D_projData, projPitch, angle, 1.0f);
 		}
 
 		if (useMinConstraint)
@@ -264,16 +264,16 @@ bool SART::callFP_SART(float* D_volumeData, unsigned int volumePitch,
 
 bool SART::callBP_SART(float* D_volumeData, unsigned int volumePitch,
                        float* D_projData, unsigned int projPitch,
-                       unsigned int angle)
+                       unsigned int angle, float outputScale)
 {
 	if (angles) {
 		assert(!fanProjs);
 		return BP_SART(D_volumeData, volumePitch, D_projData, projPitch,
-		               angle, dims, angles, TOffsets);
+		               angle, dims, angles, TOffsets, outputScale);
 	} else {
 		assert(fanProjs);
 		return FanBP_SART(D_volumeData, volumePitch, D_projData, projPitch,
-		                  angle, dims, fanProjs);
+		                  angle, dims, fanProjs, outputScale);
 	}
 
 }
diff --git a/cuda/2d/sart.h b/cuda/2d/sart.h
index 6574a6f..7dcd641 100644
--- a/cuda/2d/sart.h
+++ b/cuda/2d/sart.h
@@ -59,7 +59,7 @@ protected:
 	                 unsigned int angle, float outputScale);
 	bool callBP_SART(float* D_volumeData, unsigned int volumePitch,
 	                 float* D_projData, unsigned int projPitch,
-	                 unsigned int angle);
+	                 unsigned int angle, float outputScale);
 
 
 	// projection angle variables
diff --git a/cuda/2d/sirt.cu b/cuda/2d/sirt.cu
index a6194a5..162ee77 100644
--- a/cuda/2d/sirt.cu
+++ b/cuda/2d/sirt.cu
@@ -127,10 +127,10 @@ bool SIRT::precomputeWeights()
 
 	zeroVolumeData(D_pixelWeight, pixelPitch, dims);
 	if (useSinogramMask) {
-		callBP(D_pixelWeight, pixelPitch, D_smaskData, smaskPitch);
+		callBP(D_pixelWeight, pixelPitch, D_smaskData, smaskPitch, 1.0f);
 	} else {
 		processSino<opSet>(D_projData, 1.0f, projPitch, dims);
-		callBP(D_pixelWeight, pixelPitch, D_projData, projPitch);
+		callBP(D_pixelWeight, pixelPitch, D_projData, projPitch, 1.0f);
 	}
 	processVol<opInvert>(D_pixelWeight, pixelPitch, dims);
 
@@ -251,7 +251,7 @@ bool SIRT::iterate(unsigned int iterations)
 
 		zeroVolumeData(D_tmpData, tmpPitch, dims);
 
-		callBP(D_tmpData, tmpPitch, D_projData, projPitch);
+		callBP(D_tmpData, tmpPitch, D_projData, projPitch, 1.0f);
 
 		processVol<opAddMul>(D_volumeData, D_pixelWeight, D_tmpData, volumePitch, dims);
 
-- 
cgit v1.2.3


From 7584ffbd6748bcca8c3f7ed2dc961be01f2fcfdc Mon Sep 17 00:00:00 2001
From: Willem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>
Date: Thu, 16 Jul 2015 18:09:51 +0200
Subject: Fix assert

---
 cuda/3d/astra3d.cu | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'cuda')

diff --git a/cuda/3d/astra3d.cu b/cuda/3d/astra3d.cu
index ae79efb..3815a1a 100644
--- a/cuda/3d/astra3d.cu
+++ b/cuda/3d/astra3d.cu
@@ -287,7 +287,7 @@ bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom,
 {
 	assert(pVolGeom);
 	assert(pProjGeom);
-	assert(pProjGeom->getProjectionAngles());
+	assert(pProjGeom->getProjectionVectors());
 
 	int nth = pProjGeom->getProjectionCount();
 
-- 
cgit v1.2.3


From b14fb531ad9ae3d565f2cf28f5506408ab10dbed Mon Sep 17 00:00:00 2001
From: Willem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>
Date: Wed, 18 Nov 2015 11:26:15 +0100
Subject: Add CompositeGeometryManager

This handles FP and BP operations on multiple data objects at once,
splitting them to fit in GPU memory where necessary.
---
 cuda/3d/astra3d.cu |  82 ----------------
 cuda/3d/astra3d.h  |   9 ++
 cuda/3d/mem3d.cu   | 270 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 cuda/3d/mem3d.h    |  99 ++++++++++++++++++++
 4 files changed, 378 insertions(+), 82 deletions(-)
 create mode 100644 cuda/3d/mem3d.cu
 create mode 100644 cuda/3d/mem3d.h

(limited to 'cuda')

diff --git a/cuda/3d/astra3d.cu b/cuda/3d/astra3d.cu
index 3815a1a..8328229 100644
--- a/cuda/3d/astra3d.cu
+++ b/cuda/3d/astra3d.cu
@@ -58,88 +58,6 @@ enum CUDAProjectionType3d {
 };
 
 
-static SConeProjection* genConeProjections(unsigned int iProjAngles,
-                                           unsigned int iProjU,
-                                           unsigned int iProjV,
-                                           double fOriginSourceDistance,
-                                           double fOriginDetectorDistance,
-                                           double fDetUSize,
-                                           double fDetVSize,
-                                           const float *pfAngles)
-{
-	SConeProjection base;
-	base.fSrcX = 0.0f;
-	base.fSrcY = -fOriginSourceDistance;
-	base.fSrcZ = 0.0f;
-
-	base.fDetSX = iProjU * fDetUSize * -0.5f;
-	base.fDetSY = fOriginDetectorDistance;
-	base.fDetSZ = iProjV * fDetVSize * -0.5f;
-
-	base.fDetUX = fDetUSize;
-	base.fDetUY = 0.0f;
-	base.fDetUZ = 0.0f;
-
-	base.fDetVX = 0.0f;
-	base.fDetVY = 0.0f;
-	base.fDetVZ = fDetVSize;
-
-	SConeProjection* p = new SConeProjection[iProjAngles];
-
-#define ROTATE0(name,i,alpha) do { p[i].f##name##X = base.f##name##X * cos(alpha) - base.f##name##Y * sin(alpha); p[i].f##name##Y = base.f##name##X * sin(alpha) + base.f##name##Y * cos(alpha); p[i].f##name##Z = base.f##name##Z; } while(0)
-
-	for (unsigned int i = 0; i < iProjAngles; ++i) {
-		ROTATE0(Src, i, pfAngles[i]);
-		ROTATE0(DetS, i, pfAngles[i]);
-		ROTATE0(DetU, i, pfAngles[i]);
-		ROTATE0(DetV, i, pfAngles[i]);
-	}
-
-#undef ROTATE0
-
-	return p;
-}
-
-static SPar3DProjection* genPar3DProjections(unsigned int iProjAngles,
-                                             unsigned int iProjU,
-                                             unsigned int iProjV,
-                                             double fDetUSize,
-                                             double fDetVSize,
-                                             const float *pfAngles)
-{
-	SPar3DProjection base;
-	base.fRayX = 0.0f;
-	base.fRayY = 1.0f;
-	base.fRayZ = 0.0f;
-
-	base.fDetSX = iProjU * fDetUSize * -0.5f;
-	base.fDetSY = 0.0f;
-	base.fDetSZ = iProjV * fDetVSize * -0.5f;
-
-	base.fDetUX = fDetUSize;
-	base.fDetUY = 0.0f;
-	base.fDetUZ = 0.0f;
-
-	base.fDetVX = 0.0f;
-	base.fDetVY = 0.0f;
-	base.fDetVZ = fDetVSize;
-
-	SPar3DProjection* p = new SPar3DProjection[iProjAngles];
-
-#define ROTATE0(name,i,alpha) do { p[i].f##name##X = base.f##name##X * cos(alpha) - base.f##name##Y * sin(alpha); p[i].f##name##Y = base.f##name##X * sin(alpha) + base.f##name##Y * cos(alpha); p[i].f##name##Z = base.f##name##Z; } while(0)
-
-	for (unsigned int i = 0; i < iProjAngles; ++i) {
-		ROTATE0(Ray, i, pfAngles[i]);
-		ROTATE0(DetS, i, pfAngles[i]);
-		ROTATE0(DetU, i, pfAngles[i]);
-		ROTATE0(DetV, i, pfAngles[i]);
-	}
-
-#undef ROTATE0
-
-	return p;
-}
-
 
 
 
diff --git a/cuda/3d/astra3d.h b/cuda/3d/astra3d.h
index 6c3fcfb..2782994 100644
--- a/cuda/3d/astra3d.h
+++ b/cuda/3d/astra3d.h
@@ -281,6 +281,15 @@ protected:
 	AstraCGLS3d_internal *pData;
 };
 
+bool convertAstraGeometry_dims(const CVolumeGeometry3D* pVolGeom,
+                               const CProjectionGeometry3D* pProjGeom,
+                               astraCUDA3d::SDimensions3D& dims);
+
+bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom,
+                          const CProjectionGeometry3D* pProjGeom,
+                          SPar3DProjection*& pParProjs,
+                          SConeProjection*& pConeProjs,
+                          float& fOutputScale);
 
 _AstraExport bool astraCudaFP(const float* pfVolume, float* pfProjections,
                       const CVolumeGeometry3D* pVolGeom,
diff --git a/cuda/3d/mem3d.cu b/cuda/3d/mem3d.cu
new file mode 100644
index 0000000..6d81dc0
--- /dev/null
+++ b/cuda/3d/mem3d.cu
@@ -0,0 +1,270 @@
+/*
+-----------------------------------------------------------------------
+Copyright: 2010-2015, iMinds-Vision Lab, University of Antwerp
+           2014-2015, CWI, Amsterdam
+
+Contact: astra@uantwerpen.be
+Website: http://sf.net/projects/astra-toolbox
+
+This file is part of the ASTRA Toolbox.
+
+
+The ASTRA Toolbox is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+The ASTRA Toolbox is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with the ASTRA Toolbox. If not, see <http://www.gnu.org/licenses/>.
+
+-----------------------------------------------------------------------
+$Id$
+*/
+
+#include <cstdio>
+#include <cassert>
+
+#include "util3d.h"
+
+#include "mem3d.h"
+
+#include "astra3d.h"
+#include "cone_fp.h"
+#include "cone_bp.h"
+#include "par3d_fp.h"
+#include "par3d_bp.h"
+
+#include "astra/Logging.h"
+
+
+namespace astraCUDA3d {
+
+
+struct SMemHandle3D_internal
+{
+	cudaPitchedPtr ptr;
+	unsigned int nx;
+	unsigned int ny;
+	unsigned int nz;
+};
+
+size_t availableGPUMemory()
+{
+	size_t free, total;
+	cudaError_t err = cudaMemGetInfo(&free, &total);
+	if (err != cudaSuccess)
+		return 0;
+	return free;
+}
+
+MemHandle3D allocateGPUMemory(unsigned int x, unsigned int y, unsigned int z, Mem3DZeroMode zero)
+{
+	SMemHandle3D_internal hnd;
+	hnd.nx = x;
+	hnd.ny = y;
+	hnd.nz = z;
+
+	size_t free = availableGPUMemory();
+
+	cudaError_t err;
+	err = cudaMalloc3D(&hnd.ptr, make_cudaExtent(sizeof(float)*x, y, z));
+
+	if (err != cudaSuccess) {
+		return MemHandle3D();
+	}
+
+	size_t free2 = availableGPUMemory();
+
+	ASTRA_DEBUG("Allocated %d x %d x %d on GPU. (Pre: %lu, post: %lu)", x, y, z, free, free2);
+
+
+
+	if (zero == INIT_ZERO) {
+		err = cudaMemset3D(hnd.ptr, 0, make_cudaExtent(sizeof(float)*x, y, z));
+		if (err != cudaSuccess) {
+			cudaFree(hnd.ptr.ptr);
+			return MemHandle3D();
+		}
+	}
+
+	MemHandle3D ret;
+	ret.d = boost::shared_ptr<SMemHandle3D_internal>(new SMemHandle3D_internal);
+	*ret.d = hnd;
+
+	return ret;
+}
+
+bool freeGPUMemory(MemHandle3D handle)
+{
+	size_t free = availableGPUMemory();
+	cudaError_t err = cudaFree(handle.d->ptr.ptr);
+	size_t free2 = availableGPUMemory();
+
+	ASTRA_DEBUG("Freeing memory. (Pre: %lu, post: %lu)", free, free2);
+
+	return err == cudaSuccess;
+}
+
+bool copyToGPUMemory(const float *src, MemHandle3D dst, const SSubDimensions3D &pos)
+{
+	ASTRA_DEBUG("Copying %d x %d x %d to GPU", pos.subnx, pos.subny, pos.subnz);
+	ASTRA_DEBUG("Offset %d,%d,%d", pos.subx, pos.suby, pos.subz);
+	cudaPitchedPtr s;
+	s.ptr = (void*)src; // const cast away
+	s.pitch = pos.pitch * sizeof(float);
+	s.xsize = pos.nx * sizeof(float);
+	s.ysize = pos.ny;
+	ASTRA_DEBUG("Pitch %d, xsize %d, ysize %d", s.pitch, s.xsize, s.ysize);
+
+	cudaMemcpy3DParms p;
+	p.srcArray = 0;
+	p.srcPos = make_cudaPos(pos.subx * sizeof(float), pos.suby, pos.subz);
+	p.srcPtr = s;
+
+	p.dstArray = 0;
+	p.dstPos = make_cudaPos(0, 0, 0);
+	p.dstPtr = dst.d->ptr;
+
+	p.extent = make_cudaExtent(pos.subnx * sizeof(float), pos.subny, pos.subnz);
+
+	p.kind = cudaMemcpyHostToDevice;
+
+	cudaError_t err = cudaMemcpy3D(&p);
+
+	return err == cudaSuccess;
+}
+
+
+bool copyFromGPUMemory(float *dst, MemHandle3D src, const SSubDimensions3D &pos)
+{
+	ASTRA_DEBUG("Copying %d x %d x %d from GPU", pos.subnx, pos.subny, pos.subnz);
+	ASTRA_DEBUG("Offset %d,%d,%d", pos.subx, pos.suby, pos.subz);
+	cudaPitchedPtr d;
+	d.ptr = (void*)dst;
+	d.pitch = pos.pitch * sizeof(float);
+	d.xsize = pos.nx * sizeof(float);
+	d.ysize = pos.ny;
+	ASTRA_DEBUG("Pitch %d, xsize %d, ysize %d", d.pitch, d.xsize, d.ysize);
+
+	cudaMemcpy3DParms p;
+	p.srcArray = 0;
+	p.srcPos = make_cudaPos(0, 0, 0);
+	p.srcPtr = src.d->ptr;
+
+	p.dstArray = 0;
+	p.dstPos = make_cudaPos(pos.subx * sizeof(float), pos.suby, pos.subz);
+	p.dstPtr = d;
+
+	p.extent = make_cudaExtent(pos.subnx * sizeof(float), pos.subny, pos.subnz);
+
+	p.kind = cudaMemcpyDeviceToHost;
+
+	cudaError_t err = cudaMemcpy3D(&p);
+
+	return err == cudaSuccess;
+
+}
+
+
+bool FP(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, const astra::CVolumeGeometry3D* pVolGeom, MemHandle3D volData, int iDetectorSuperSampling, astra::Cuda3DProjectionKernel projKernel)
+{
+	SDimensions3D dims;
+
+	bool ok = convertAstraGeometry_dims(pVolGeom, pProjGeom, dims);
+	if (!ok)
+		return false;
+
+#if 1
+	dims.iRaysPerDetDim = iDetectorSuperSampling;
+	if (iDetectorSuperSampling == 0)
+		return false;
+#else
+	dims.iRaysPerDetDim = 1;
+	astra::Cuda3DProjectionKernel projKernel = astra::ker3d_default;
+#endif
+
+
+	SPar3DProjection* pParProjs;
+	SConeProjection* pConeProjs;
+
+	float outputScale = 1.0f;
+
+	ok = convertAstraGeometry(pVolGeom, pProjGeom,
+	                          pParProjs, pConeProjs,
+	                          outputScale);
+
+	if (pParProjs) {
+#if 0
+		for (int i = 0; i < dims.iProjAngles; ++i) {
+			ASTRA_DEBUG("Vec: %6.3f %6.3f %6.3f %6.3f %6.3f %6.3f %6.3f %6.3f %6.3f %6.3f %6.3f %6.3f\n",
+			    pParProjs[i].fRayX, pParProjs[i].fRayY, pParProjs[i].fRayZ,
+			    pParProjs[i].fDetSX, pParProjs[i].fDetSY, pParProjs[i].fDetSZ,
+			    pParProjs[i].fDetUX, pParProjs[i].fDetUY, pParProjs[i].fDetUZ,
+			    pParProjs[i].fDetVX, pParProjs[i].fDetVY, pParProjs[i].fDetVZ);
+		}
+#endif
+
+		switch (projKernel) {
+		case astra::ker3d_default:
+			ok &= Par3DFP(volData.d->ptr, projData.d->ptr, dims, pParProjs, outputScale);
+			break;
+		case astra::ker3d_sum_square_weights:
+			ok &= Par3DFP_SumSqW(volData.d->ptr, projData.d->ptr, dims, pParProjs, outputScale*outputScale);
+			break;
+		default:
+			ok = false;
+		}
+	} else {
+		switch (projKernel) {
+		case astra::ker3d_default:
+			ok &= ConeFP(volData.d->ptr, projData.d->ptr, dims, pConeProjs, outputScale);
+			break;
+		default:
+			ok = false;
+		}
+	}
+
+	return ok;
+}
+
+bool BP(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, const astra::CVolumeGeometry3D* pVolGeom, MemHandle3D volData, int iVoxelSuperSampling)
+{
+	SDimensions3D dims;
+
+	bool ok = convertAstraGeometry_dims(pVolGeom, pProjGeom, dims);
+	if (!ok)
+		return false;
+
+#if 1
+	dims.iRaysPerVoxelDim = iVoxelSuperSampling;
+#else
+	dims.iRaysPerVoxelDim = 1;
+#endif
+
+	SPar3DProjection* pParProjs;
+	SConeProjection* pConeProjs;
+
+	float outputScale = 1.0f;
+
+	ok = convertAstraGeometry(pVolGeom, pProjGeom,
+	                          pParProjs, pConeProjs,
+	                          outputScale);
+
+	if (pParProjs)
+		ok &= Par3DBP(volData.d->ptr, projData.d->ptr, dims, pParProjs, outputScale);
+	else
+		ok &= ConeBP(volData.d->ptr, projData.d->ptr, dims, pConeProjs, outputScale);
+
+	return ok;
+
+}
+
+
+
+
+}
diff --git a/cuda/3d/mem3d.h b/cuda/3d/mem3d.h
new file mode 100644
index 0000000..82bad19
--- /dev/null
+++ b/cuda/3d/mem3d.h
@@ -0,0 +1,99 @@
+/*
+-----------------------------------------------------------------------
+Copyright: 2010-2015, iMinds-Vision Lab, University of Antwerp
+           2014-2015, CWI, Amsterdam
+
+Contact: astra@uantwerpen.be
+Website: http://sf.net/projects/astra-toolbox
+
+This file is part of the ASTRA Toolbox.
+
+
+The ASTRA Toolbox is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+The ASTRA Toolbox is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with the ASTRA Toolbox. If not, see <http://www.gnu.org/licenses/>.
+
+-----------------------------------------------------------------------
+*/
+
+#ifndef _CUDA_MEM3D_H
+#define _CUDA_MEM3D_H
+
+#include <boost/shared_ptr.hpp>
+
+#include "astra3d.h"
+
+namespace astra {
+class CVolumeGeometry3D;
+class CProjectionGeometry3D;	
+}
+
+namespace astraCUDA3d {
+
+// TODO: Make it possible to delete these handles when they're no longer
+// necessary inside the FP/BP
+//
+// TODO: Add functions for querying capacity
+
+struct SMemHandle3D_internal;
+
+struct MemHandle3D {
+	boost::shared_ptr<SMemHandle3D_internal> d;
+	operator bool() const { return (bool)d; }
+};
+
+struct SSubDimensions3D {
+	unsigned int nx;
+	unsigned int ny;
+	unsigned int nz;
+	unsigned int pitch;
+	unsigned int subnx;
+	unsigned int subny;
+	unsigned int subnz;
+	unsigned int subx;
+	unsigned int suby;
+	unsigned int subz;
+};
+
+/*
+// Useful or not?
+enum Mem3DCopyMode {
+	MODE_SET,
+	MODE_ADD
+};
+*/
+
+enum Mem3DZeroMode {
+	INIT_NO,
+	INIT_ZERO
+};
+
+size_t availableGPUMemory();
+
+MemHandle3D allocateGPUMemory(unsigned int x, unsigned int y, unsigned int z, Mem3DZeroMode zero);
+
+bool copyToGPUMemory(const float *src, MemHandle3D dst, const SSubDimensions3D &pos);
+
+bool copyFromGPUMemory(float *dst, MemHandle3D src, const SSubDimensions3D &pos);
+
+bool freeGPUMemory(MemHandle3D handle);
+
+
+bool FP(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, const astra::CVolumeGeometry3D* pVolGeom, MemHandle3D volData, int iDetectorSuperSampling, astra::Cuda3DProjectionKernel projKernel);
+
+bool BP(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, const astra::CVolumeGeometry3D* pVolGeom, MemHandle3D volData, int iVoxelSuperSampling);
+
+
+
+}
+
+#endif
-- 
cgit v1.2.3


From c66e4b030467ddadac71e5bd4803737cf94c0a07 Mon Sep 17 00:00:00 2001
From: Willem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>
Date: Tue, 22 Dec 2015 14:00:50 +0100
Subject: Reduce FP3D CUDA kernel runtime

This reduces the chance of the Windows display driver watchdog triggering,
and doesn't seem to hurt performance.
---
 cuda/3d/cone_fp.cu  | 2 +-
 cuda/3d/par3d_fp.cu | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'cuda')

diff --git a/cuda/3d/cone_fp.cu b/cuda/3d/cone_fp.cu
index b36d2bc..13b184f 100644
--- a/cuda/3d/cone_fp.cu
+++ b/cuda/3d/cone_fp.cu
@@ -49,7 +49,7 @@ namespace astraCUDA3d {
 static const unsigned int g_anglesPerBlock = 4;
 
 // thickness of the slices we're splitting the volume up into
-static const unsigned int g_blockSlices = 64;
+static const unsigned int g_blockSlices = 32;
 static const unsigned int g_detBlockU = 32;
 static const unsigned int g_detBlockV = 32;
 
diff --git a/cuda/3d/par3d_fp.cu b/cuda/3d/par3d_fp.cu
index b14c494..3ce3d42 100644
--- a/cuda/3d/par3d_fp.cu
+++ b/cuda/3d/par3d_fp.cu
@@ -49,7 +49,7 @@ namespace astraCUDA3d {
 static const unsigned int g_anglesPerBlock = 4;
 
 // thickness of the slices we're splitting the volume up into
-static const unsigned int g_blockSlices = 64;
+static const unsigned int g_blockSlices = 32;
 static const unsigned int g_detBlockU = 32;
 static const unsigned int g_detBlockV = 32;
 
-- 
cgit v1.2.3


From 687c5e244e46e51786afad77f5015cae9abad129 Mon Sep 17 00:00:00 2001
From: Willem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>
Date: Wed, 6 Jan 2016 15:10:34 +0100
Subject: Add multi-GPU support to CompositeGeometryManager

---
 cuda/3d/mem3d.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'cuda')

diff --git a/cuda/3d/mem3d.h b/cuda/3d/mem3d.h
index 82bad19..acb72cb 100644
--- a/cuda/3d/mem3d.h
+++ b/cuda/3d/mem3d.h
@@ -87,6 +87,8 @@ bool copyFromGPUMemory(float *dst, MemHandle3D src, const SSubDimensions3D &pos)
 
 bool freeGPUMemory(MemHandle3D handle);
 
+bool setGPUIndex(int index);
+
 
 bool FP(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, const astra::CVolumeGeometry3D* pVolGeom, MemHandle3D volData, int iDetectorSuperSampling, astra::Cuda3DProjectionKernel projKernel);
 
-- 
cgit v1.2.3


From 3743fdc534b39958c105f4124ad1130d3e8b042a Mon Sep 17 00:00:00 2001
From: Willem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>
Date: Tue, 16 Feb 2016 17:53:24 +0100
Subject: Query max texture size instead of hardcoding it

---
 cuda/3d/mem3d.cu | 19 +++++++++++++++++++
 cuda/3d/mem3d.h  |  1 +
 2 files changed, 20 insertions(+)

(limited to 'cuda')

diff --git a/cuda/3d/mem3d.cu b/cuda/3d/mem3d.cu
index 6d81dc0..0320117 100644
--- a/cuda/3d/mem3d.cu
+++ b/cuda/3d/mem3d.cu
@@ -62,6 +62,25 @@ size_t availableGPUMemory()
 	return free;
 }
 
+int maxBlockDimension()
+{
+	int dev;
+	cudaError_t err = cudaGetDevice(&dev);
+	if (err != cudaSuccess) {
+		ASTRA_WARN("Error querying device");
+		return 0;
+	}
+
+	cudaDeviceProp props;
+	err = cudaGetDeviceProperties(&props, dev);
+	if (err != cudaSuccess) {
+		ASTRA_WARN("Error querying device %d properties", dev);
+		return 0;
+	}
+
+	return std::min(props.maxTexture3D[0], std::min(props.maxTexture3D[1], props.maxTexture3D[2]));
+}
+
 MemHandle3D allocateGPUMemory(unsigned int x, unsigned int y, unsigned int z, Mem3DZeroMode zero)
 {
 	SMemHandle3D_internal hnd;
diff --git a/cuda/3d/mem3d.h b/cuda/3d/mem3d.h
index acb72cb..6fff80b 100644
--- a/cuda/3d/mem3d.h
+++ b/cuda/3d/mem3d.h
@@ -78,6 +78,7 @@ enum Mem3DZeroMode {
 };
 
 size_t availableGPUMemory();
+int maxBlockDimension();
 
 MemHandle3D allocateGPUMemory(unsigned int x, unsigned int y, unsigned int z, Mem3DZeroMode zero);
 
-- 
cgit v1.2.3


From 5edb35edc2c721b458334a65512b534912c2c542 Mon Sep 17 00:00:00 2001
From: Willem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>
Date: Wed, 23 Mar 2016 15:30:56 +0100
Subject: Add relaxation parameters to SIRT, SART

---
 cuda/2d/sart.cu | 7 +++++--
 cuda/2d/sart.h  | 4 ++++
 cuda/2d/sirt.cu | 8 ++++++++
 cuda/2d/sirt.h  | 4 ++++
 4 files changed, 21 insertions(+), 2 deletions(-)

(limited to 'cuda')

diff --git a/cuda/2d/sart.cu b/cuda/2d/sart.cu
index e5cb5bb..c8608a3 100644
--- a/cuda/2d/sart.cu
+++ b/cuda/2d/sart.cu
@@ -71,6 +71,8 @@ SART::SART() : ReconAlgo()
 	projectionCount = 0;
 	iteration = 0;
 	customOrder = false;
+
+	fRelaxation = 1.0f;
 }
 
 
@@ -98,6 +100,7 @@ void SART::reset()
 	projectionCount = 0;
 	iteration = 0;
 	customOrder = false;
+	fRelaxation = 1.0f;
 
 	ReconAlgo::reset();
 }
@@ -200,10 +203,10 @@ bool SART::iterate(unsigned int iterations)
 			// BP, mask, and add back
 			// TODO: Try putting the masking directly in the BP
 			zeroVolumeData(D_tmpData, tmpPitch, dims);
-			callBP_SART(D_tmpData, tmpPitch, D_projData, projPitch, angle, 1.0f);
+			callBP_SART(D_tmpData, tmpPitch, D_projData, projPitch, angle, fRelaxation);
 			processVol<opAddMul>(D_volumeData, D_maskData, D_tmpData, volumePitch, dims);
 		} else {
-			callBP_SART(D_volumeData, volumePitch, D_projData, projPitch, angle, 1.0f);
+			callBP_SART(D_volumeData, volumePitch, D_projData, projPitch, angle, fRelaxation);
 		}
 
 		if (useMinConstraint)
diff --git a/cuda/2d/sart.h b/cuda/2d/sart.h
index 7dcd641..eff9ecf 100644
--- a/cuda/2d/sart.h
+++ b/cuda/2d/sart.h
@@ -50,6 +50,8 @@ public:
 
 	virtual float computeDiffNorm();
 
+	void setRelaxation(float r) { fRelaxation = r; }
+
 protected:
 	void reset();
 	bool precomputeWeights();
@@ -78,6 +80,8 @@ protected:
 	// Geometry-specific precomputed data
 	float* D_lineWeight;
 	unsigned int linePitch;
+
+	float fRelaxation;
 };
 
 }
diff --git a/cuda/2d/sirt.cu b/cuda/2d/sirt.cu
index 162ee77..4baaccb 100644
--- a/cuda/2d/sirt.cu
+++ b/cuda/2d/sirt.cu
@@ -50,6 +50,8 @@ SIRT::SIRT() : ReconAlgo()
 	D_minMaskData = 0;
 	D_maxMaskData = 0;
 
+	fRelaxation = 1.0f;
+
 	freeMinMaxMasks = false;
 }
 
@@ -83,6 +85,8 @@ void SIRT::reset()
 	useVolumeMask = false;
 	useSinogramMask = false;
 
+	fRelaxation = 1.0f;
+
 	ReconAlgo::reset();
 }
 
@@ -139,6 +143,9 @@ bool SIRT::precomputeWeights()
 		processVol<opMul>(D_pixelWeight, D_maskData, pixelPitch, dims);
 	}
 
+	// Also fold the relaxation factor into pixel weights
+	processVol<opMul>(D_pixelWeight, fRelaxation, pixelPitch, dims);
+
 	return true;
 }
 
@@ -253,6 +260,7 @@ bool SIRT::iterate(unsigned int iterations)
 
 		callBP(D_tmpData, tmpPitch, D_projData, projPitch, 1.0f);
 
+		// pixel weights also contain the volume mask and relaxation factor
 		processVol<opAddMul>(D_volumeData, D_pixelWeight, D_tmpData, volumePitch, dims);
 
 		if (useMinConstraint)
diff --git a/cuda/2d/sirt.h b/cuda/2d/sirt.h
index 21094a1..bc913f4 100644
--- a/cuda/2d/sirt.h
+++ b/cuda/2d/sirt.h
@@ -53,6 +53,8 @@ public:
 	bool uploadMinMaxMasks(const float* minMaskData, const float* maxMaskData,
 	                       unsigned int pitch);
 
+	void setRelaxation(float r) { fRelaxation = r; }
+
 	virtual bool iterate(unsigned int iterations);
 
 	virtual float computeDiffNorm();
@@ -81,6 +83,8 @@ protected:
 	unsigned int minMaskPitch;
 	float* D_maxMaskData;
 	unsigned int maxMaskPitch;
+
+	float fRelaxation;
 };
 
 bool doSIRT(float* D_volumeData, unsigned int volumePitch,
-- 
cgit v1.2.3


From 16430239d04ff738a21146c410918c285552543f Mon Sep 17 00:00:00 2001
From: Willem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>
Date: Wed, 23 Mar 2016 15:50:24 +0100
Subject: Add relaxation parameters to SIRT3D

---
 cuda/3d/astra3d.cu | 13 +++++++++++++
 cuda/3d/astra3d.h  |  2 ++
 cuda/3d/sirt3d.cu  |  8 +++++++-
 cuda/3d/sirt3d.h   |  5 +++++
 4 files changed, 27 insertions(+), 1 deletion(-)

(limited to 'cuda')

diff --git a/cuda/3d/astra3d.cu b/cuda/3d/astra3d.cu
index 8328229..5670873 100644
--- a/cuda/3d/astra3d.cu
+++ b/cuda/3d/astra3d.cu
@@ -267,6 +267,7 @@ public:
 	float fOriginDetectorDistance;
 	float fSourceZ;
 	float fDetSize;
+	float fRelaxation;
 
 	SConeProjection* projs;
 	SPar3DProjection* parprojs;
@@ -311,6 +312,8 @@ AstraSIRT3d::AstraSIRT3d()
 	pData->parprojs = 0;
 	pData->fOutputScale = 1.0f;
 
+	pData->fRelaxation = 1.0f;
+
 	pData->initialized = false;
 	pData->setStartReconstruction = false;
 
@@ -389,6 +392,14 @@ bool AstraSIRT3d::enableSuperSampling(unsigned int iVoxelSuperSampling,
 	return true;
 }
 
+void AstraSIRT3d::setRelaxation(float r)
+{
+	if (pData->initialized)
+		return;
+
+	pData->fRelaxation = r;
+}
+
 bool AstraSIRT3d::enableVolumeMask()
 {
 	if (pData->initialized)
@@ -448,6 +459,8 @@ bool AstraSIRT3d::init()
 	if (!ok)
 		return false;
 
+	pData->sirt.setRelaxation(pData->fRelaxation);
+
 	pData->D_volumeData = allocateVolumeData(pData->dims);
 	ok = pData->D_volumeData.ptr;
 	if (!ok)
diff --git a/cuda/3d/astra3d.h b/cuda/3d/astra3d.h
index 2782994..2137587 100644
--- a/cuda/3d/astra3d.h
+++ b/cuda/3d/astra3d.h
@@ -68,6 +68,8 @@ public:
 	bool enableSuperSampling(unsigned int iVoxelSuperSampling,
 	                         unsigned int iDetectorSuperSampling);
 
+	void setRelaxation(float r);
+
 	// Enable volume/sinogram masks
 	//
 	// This may optionally be called before init().
diff --git a/cuda/3d/sirt3d.cu b/cuda/3d/sirt3d.cu
index 484521e..713944b 100644
--- a/cuda/3d/sirt3d.cu
+++ b/cuda/3d/sirt3d.cu
@@ -59,6 +59,8 @@ SIRT::SIRT() : ReconAlgo3D()
 
 	useMinConstraint = false;
 	useMaxConstraint = false;
+
+	fRelaxation = 1.0f;
 }
 
 
@@ -89,6 +91,8 @@ void SIRT::reset()
 	useVolumeMask = false;
 	useSinogramMask = false;
 
+	fRelaxation = 1.0f;
+
 	ReconAlgo3D::reset();
 }
 
@@ -196,6 +200,8 @@ bool SIRT::precomputeWeights()
 		// scale pixel weights with mask to zero out masked pixels
 		processVol3D<opMul>(D_pixelWeight, D_maskData, dims);
 	}
+	processVol3D<opMul>(D_pixelWeight, fRelaxation, dims);
+
 
 	return true;
 }
@@ -307,7 +313,7 @@ bool SIRT::iterate(unsigned int iterations)
 	}
 #endif
 
-
+		// pixel weights also contain the volume mask and relaxation factor
 		processVol3D<opAddMul>(D_volumeData, D_tmpData, D_pixelWeight, dims);
 
 		if (useMinConstraint)
diff --git a/cuda/3d/sirt3d.h b/cuda/3d/sirt3d.h
index bb3864a..5e93deb 100644
--- a/cuda/3d/sirt3d.h
+++ b/cuda/3d/sirt3d.h
@@ -48,6 +48,9 @@ public:
 	// init should be called after setting all geometry
 	bool init();
 
+	// Set relaxation factor. This may be called after init and before iterate.
+	void setRelaxation(float r) { fRelaxation = r; }
+
 	// setVolumeMask should be called after init and before iterate,
 	// but only if enableVolumeMask was called before init.
 	// It may be called again after iterate.
@@ -91,6 +94,8 @@ protected:
 	float fMinConstraint;
 	float fMaxConstraint;
 
+	float fRelaxation;
+
 	cudaPitchedPtr D_maskData;
 	cudaPitchedPtr D_smaskData;
 
-- 
cgit v1.2.3


From 558820f1421e79b32e542c133c83683e58df6d5e Mon Sep 17 00:00:00 2001
From: Willem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>
Date: Fri, 24 Jun 2016 15:28:47 +0200
Subject: Compute FBP filter in spatial domain

---
 cuda/2d/fft.cu | 46 +++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 39 insertions(+), 7 deletions(-)

(limited to 'cuda')

diff --git a/cuda/2d/fft.cu b/cuda/2d/fft.cu
index 2bfd493..2d259a9 100644
--- a/cuda/2d/fft.cu
+++ b/cuda/2d/fft.cu
@@ -35,6 +35,7 @@ $Id$
 #include <fstream>
 
 #include "../../include/astra/Logging.h"
+#include "astra/Fourier.h"
 
 using namespace astra;
 
@@ -303,16 +304,48 @@ void genFilter(E_FBPFILTER _eFilter, float _fD, int _iProjectionCount,
 	float * pfFilt = new float[_iFFTFourierDetectorCount];
 	float * pfW = new float[_iFFTFourierDetectorCount];
 
+	// We cache one Fourier transform for repeated FBP's of the same size
+	static float *pfData = 0;
+	static int iFilterCacheSize = 0;
+
+	if (!pfData || iFilterCacheSize != _iFFTRealDetectorCount) {
+		// Compute filter in spatial domain
+
+		delete[] pfData;
+		pfData = new float[2*_iFFTRealDetectorCount];
+		int *ip = new int[int(2+sqrt(_iFFTRealDetectorCount)+1)];
+		ip[0] = 0;
+		float32 *w = new float32[_iFFTRealDetectorCount/2];
+
+		for (int i = 0; i < _iFFTRealDetectorCount; ++i) {
+			pfData[2*i+1] = 0.0f;
+
+			if (i & 1) {
+				int j = i;
+				if (2*j > _iFFTRealDetectorCount)
+					j = _iFFTRealDetectorCount - j;
+				float f = M_PI * j;
+				pfData[2*i] = -1 / (f*f);
+			} else {
+				pfData[2*i] = 0.0f;
+			}
+		}
+
+		pfData[0] = 0.25f;
+
+		cdft(2*_iFFTRealDetectorCount, -1, pfData, ip, w);
+		delete[] ip;
+		delete[] w;
+
+		iFilterCacheSize = _iFFTRealDetectorCount;
+	}
+
 	for(int iDetectorIndex = 0; iDetectorIndex < _iFFTFourierDetectorCount; iDetectorIndex++)
 	{
 		float fRelIndex = (float)iDetectorIndex / (float)_iFFTRealDetectorCount;
 
-		// filt = 2*( 0:(order/2) )./order;
-		pfFilt[iDetectorIndex] = 2.0f * fRelIndex;
-		//pfFilt[iDetectorIndex] = 1.0f;
-
-		// w = 2*pi*(0:size(filt,2)-1)/order
-		pfW[iDetectorIndex] = 3.1415f * 2.0f * fRelIndex;
+		pfFilt[iDetectorIndex] = 2.0f * pfData[2*iDetectorIndex];
+		pfW[iDetectorIndex] = M_PI * 2.0f * fRelIndex;
 	}
 
 	switch(_eFilter)
@@ -866,5 +899,4 @@ void downloadDebugFilterReal(float * _pfHostSinogram, int _iProjectionCount,
 	free(pfHostFilter);
 }
 
-
 #endif
-- 
cgit v1.2.3