From 5304d08cd1ab7b8d778c367912934376eb92370f Mon Sep 17 00:00:00 2001
From: Willem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>
Date: Mon, 9 Mar 2015 15:43:56 +0100
Subject: Allow non-centered volume geometry in SIRT3D and CGLS3D

---
 src/CudaCglsAlgorithm3D.cpp | 39 +--------------------------------------
 src/CudaSirtAlgorithm3D.cpp | 38 +-------------------------------------
 2 files changed, 2 insertions(+), 75 deletions(-)

(limited to 'src')

diff --git a/src/CudaCglsAlgorithm3D.cpp b/src/CudaCglsAlgorithm3D.cpp
index a5500d6..3677458 100644
--- a/src/CudaCglsAlgorithm3D.cpp
+++ b/src/CudaCglsAlgorithm3D.cpp
@@ -171,9 +171,6 @@ void CCudaCglsAlgorithm3D::run(int _iNrIterations)
 	ASTRA_ASSERT(m_bIsInitialized);
 
 	const CProjectionGeometry3D* projgeom = m_pSinogram->getGeometry();
-	const CConeProjectionGeometry3D* conegeom = dynamic_cast<const CConeProjectionGeometry3D*>(projgeom);
-	const CParallelVecProjectionGeometry3D* parvec3dgeom = dynamic_cast<const CParallelVecProjectionGeometry3D*>(projgeom);
-	const CConeVecProjectionGeometry3D* conevec3dgeom = dynamic_cast<const CConeVecProjectionGeometry3D*>(projgeom);
 	const CVolumeGeometry3D& volgeom = *m_pReconstruction->getGeometry();
 
 	bool ok = true;
@@ -182,41 +179,7 @@ void CCudaCglsAlgorithm3D::run(int _iNrIterations)
 
 		ok &= m_pCgls->setGPUIndex(m_iGPUIndex);
 
-		ok &= m_pCgls->setReconstructionGeometry(volgeom.getGridColCount(),
-		                                         volgeom.getGridRowCount(),
-		                                         volgeom.getGridSliceCount());
-/*
-                                  unsigned int iProjAngles,
-                                  unsigned int iProjU,
-                                  unsigned int iProjV,
-                                  float fOriginSourceDistance,
-                                  float fOriginDetectorDistance,
-                                  float fDetUSize,
-                                  float fDetVSize,
-                                  const float *pfAngles)
-*/
-		if (conegeom) {
-			ok &= m_pCgls->setConeGeometry(conegeom->getProjectionCount(),
-			                               conegeom->getDetectorColCount(),
-			                               conegeom->getDetectorRowCount(),
-			                               conegeom->getOriginSourceDistance(),
-			                               conegeom->getOriginDetectorDistance(),
-			                               conegeom->getDetectorSpacingX(),
-			                               conegeom->getDetectorSpacingY(),
-			                               conegeom->getProjectionAngles());
-		} else if (parvec3dgeom) {
-			ok &= m_pCgls->setPar3DGeometry(parvec3dgeom->getProjectionCount(),
-			                                parvec3dgeom->getDetectorColCount(),
-			                                parvec3dgeom->getDetectorRowCount(),
-			                                parvec3dgeom->getProjectionVectors());
-		} else if (conevec3dgeom) {
-			ok &= m_pCgls->setConeGeometry(conevec3dgeom->getProjectionCount(),
-			                               conevec3dgeom->getDetectorColCount(),
-			                               conevec3dgeom->getDetectorRowCount(),
-			                               conevec3dgeom->getProjectionVectors());
-		} else {
-			ASTRA_ASSERT(false);
-		}
+		ok &= m_pCgls->setGeometry(&volgeom, projgeom);
 
 		ok &= m_pCgls->enableSuperSampling(m_iVoxelSuperSampling, m_iDetectorSuperSampling);
 
diff --git a/src/CudaSirtAlgorithm3D.cpp b/src/CudaSirtAlgorithm3D.cpp
index da83c7e..d67778f 100644
--- a/src/CudaSirtAlgorithm3D.cpp
+++ b/src/CudaSirtAlgorithm3D.cpp
@@ -172,10 +172,6 @@ void CCudaSirtAlgorithm3D::run(int _iNrIterations)
 	ASTRA_ASSERT(m_bIsInitialized);
 
 	const CProjectionGeometry3D* projgeom = m_pSinogram->getGeometry();
-	const CConeProjectionGeometry3D* conegeom = dynamic_cast<const CConeProjectionGeometry3D*>(projgeom);
-	const CParallelProjectionGeometry3D* par3dgeom = dynamic_cast<const CParallelProjectionGeometry3D*>(projgeom);
-	const CParallelVecProjectionGeometry3D* parvec3dgeom = dynamic_cast<const CParallelVecProjectionGeometry3D*>(projgeom);
-	const CConeVecProjectionGeometry3D* conevec3dgeom = dynamic_cast<const CConeVecProjectionGeometry3D*>(projgeom);
 	const CVolumeGeometry3D& volgeom = *m_pReconstruction->getGeometry();
 
 	bool ok = true;
@@ -184,39 +180,7 @@ void CCudaSirtAlgorithm3D::run(int _iNrIterations)
 
 		ok &= m_pSirt->setGPUIndex(m_iGPUIndex);
 
-		ok &= m_pSirt->setReconstructionGeometry(volgeom.getGridColCount(),
-		                                         volgeom.getGridRowCount(),
-		                                         volgeom.getGridSliceCount());
-
-		if (conegeom) {
-			ok &= m_pSirt->setConeGeometry(conegeom->getProjectionCount(),
-			                               conegeom->getDetectorColCount(),
-			                               conegeom->getDetectorRowCount(),
-			                               conegeom->getOriginSourceDistance(),
-			                               conegeom->getOriginDetectorDistance(),
-			                               conegeom->getDetectorSpacingX(),
-			                               conegeom->getDetectorSpacingY(),
-			                               conegeom->getProjectionAngles());
-		} else if (par3dgeom) {
-			ok &= m_pSirt->setPar3DGeometry(par3dgeom->getProjectionCount(),
-			                                par3dgeom->getDetectorColCount(),
-			                                par3dgeom->getDetectorRowCount(),
-			                                par3dgeom->getDetectorSpacingX(),
-			                                par3dgeom->getDetectorSpacingY(),
-			                                par3dgeom->getProjectionAngles());
-		} else if (parvec3dgeom) {
-			ok &= m_pSirt->setPar3DGeometry(parvec3dgeom->getProjectionCount(),
-			                                parvec3dgeom->getDetectorColCount(),
-			                                parvec3dgeom->getDetectorRowCount(),
-			                                parvec3dgeom->getProjectionVectors());
-		} else if (conevec3dgeom) {
-			ok &= m_pSirt->setConeGeometry(conevec3dgeom->getProjectionCount(),
-			                               conevec3dgeom->getDetectorColCount(),
-			                               conevec3dgeom->getDetectorRowCount(),
-			                               conevec3dgeom->getProjectionVectors());
-		} else {
-			ASTRA_ASSERT(false);
-		}
+		ok &= m_pSirt->setGeometry(&volgeom, projgeom);
 
 		ok &= m_pSirt->enableSuperSampling(m_iVoxelSuperSampling, m_iDetectorSuperSampling);
 
-- 
cgit v1.2.3


From 140f64028a6c06895ba7dad8997e14b7a05aadab Mon Sep 17 00:00:00 2001
From: Willem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>
Date: Wed, 11 Mar 2015 12:07:48 +0100
Subject: Let astraCudaFDK use utility functions

---
 src/CudaFDKAlgorithm3D.cpp | 12 +-----------
 1 file changed, 1 insertion(+), 11 deletions(-)

(limited to 'src')

diff --git a/src/CudaFDKAlgorithm3D.cpp b/src/CudaFDKAlgorithm3D.cpp
index 7638696..0a46ff6 100644
--- a/src/CudaFDKAlgorithm3D.cpp
+++ b/src/CudaFDKAlgorithm3D.cpp
@@ -171,17 +171,7 @@ void CCudaFDKAlgorithm3D::run(int _iNrIterations)
 	bool ok = true;
 
 	ok = astraCudaFDK(pReconMem->getData(), pSinoMem->getDataConst(),
-	                  volgeom.getGridColCount(),
-	                  volgeom.getGridRowCount(),
-	                  volgeom.getGridSliceCount(),
-	                  conegeom->getProjectionCount(),
-	                  conegeom->getDetectorColCount(),
-	                  conegeom->getDetectorRowCount(),
-	                  conegeom->getOriginSourceDistance(),
-	                  conegeom->getOriginDetectorDistance(),
-	                  conegeom->getDetectorSpacingX(),
-	                  conegeom->getDetectorSpacingY(),
-	                  conegeom->getProjectionAngles(),
+	                  &volgeom, conegeom,
 	                  m_bShortScan, m_iGPUIndex, m_iVoxelSuperSampling);
 
 	ASTRA_ASSERT(ok);
-- 
cgit v1.2.3


From 18d12242207d1113c3015b451f522531168e626a Mon Sep 17 00:00:00 2001
From: Willem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>
Date: Wed, 11 Mar 2015 17:27:44 +0100
Subject: Add flexible volgeom3d support to astraCudaBP_SIRTWeighted

---
 src/CudaBackProjectionAlgorithm3D.cpp | 87 +++++++++++------------------------
 1 file changed, 28 insertions(+), 59 deletions(-)

(limited to 'src')

diff --git a/src/CudaBackProjectionAlgorithm3D.cpp b/src/CudaBackProjectionAlgorithm3D.cpp
index abcf096..7117cfc 100644
--- a/src/CudaBackProjectionAlgorithm3D.cpp
+++ b/src/CudaBackProjectionAlgorithm3D.cpp
@@ -107,16 +107,8 @@ bool CCudaBackProjectionAlgorithm3D::initialize(const Config& _cfg)
 	m_iVoxelSuperSampling = (int)_cfg.self->getOptionNumerical("VoxelSuperSampling", 1);
 	CC.markOptionParsed("VoxelSuperSampling");
 
-	CFloat32ProjectionData3DMemory* pSinoMem = dynamic_cast<CFloat32ProjectionData3DMemory*>(m_pSinogram);
-	ASTRA_ASSERT(pSinoMem);
-	const CProjectionGeometry3D* projgeom = pSinoMem->getGeometry();
-const CParallelProjectionGeometry3D* par3dgeom = dynamic_cast<const CParallelProjectionGeometry3D*>(projgeom);
-	const CParallelVecProjectionGeometry3D* parvec3dgeom = dynamic_cast<const CParallelVecProjectionGeometry3D*>(projgeom);
-	if (parvec3dgeom || par3dgeom) {
-		// This option is only supported for Par3D currently
-		m_bSIRTWeighting = _cfg.self->getOptionBool("SIRTWeighting", false);
-		CC.markOptionParsed("SIRTWeighting");
-	}
+	m_bSIRTWeighting = _cfg.self->getOptionBool("SIRTWeighting", false);
+	CC.markOptionParsed("SIRTWeighting");
 
 	// success
 	m_bIsInitialized = _check();
@@ -178,7 +170,12 @@ void CCudaBackProjectionAlgorithm3D::run(int _iNrIterations)
 	const CParallelVecProjectionGeometry3D* parvec3dgeom = dynamic_cast<const CParallelVecProjectionGeometry3D*>(projgeom);
 	const CVolumeGeometry3D& volgeom = *pReconMem->getGeometry();
 
-	if (conegeom) {
+	if (m_bSIRTWeighting) {
+		astraCudaBP_SIRTWeighted(pReconMem->getData(),
+		                         pSinoMem->getDataConst(),
+		                         &volgeom, projgeom,
+		                         m_iGPUIndex, m_iVoxelSuperSampling);
+	} else if (conegeom) {
 		astraCudaConeBP(pReconMem->getData(), pSinoMem->getDataConst(),
 		                volgeom.getGridColCount(),
 		                volgeom.getGridRowCount(),
@@ -193,55 +190,27 @@ void CCudaBackProjectionAlgorithm3D::run(int _iNrIterations)
 		                conegeom->getProjectionAngles(),
 		                m_iGPUIndex, m_iVoxelSuperSampling);
 	} else if (par3dgeom) {
-		if (!m_bSIRTWeighting) {
-			astraCudaPar3DBP(pReconMem->getData(), pSinoMem->getDataConst(),
-			                 volgeom.getGridColCount(),
-			                 volgeom.getGridRowCount(),
-			                 volgeom.getGridSliceCount(),
-			                 par3dgeom->getProjectionCount(),
-			                 par3dgeom->getDetectorColCount(),
-			                 par3dgeom->getDetectorRowCount(),
-			                 par3dgeom->getDetectorSpacingX(),
-			                 par3dgeom->getDetectorSpacingY(),
-			                 par3dgeom->getProjectionAngles(),
-			                 m_iGPUIndex, m_iVoxelSuperSampling);
-		} else {
-			astraCudaPar3DBP_SIRTWeighted(pReconMem->getData(),
-			                 pSinoMem->getDataConst(),
-			                 volgeom.getGridColCount(),
-			                 volgeom.getGridRowCount(),
-			                 volgeom.getGridSliceCount(),
-			                 par3dgeom->getProjectionCount(),
-			                 par3dgeom->getDetectorColCount(),
-			                 par3dgeom->getDetectorRowCount(),
-			                 par3dgeom->getDetectorSpacingX(),
-			                 par3dgeom->getDetectorSpacingY(),
-			                 par3dgeom->getProjectionAngles(),
-			                 m_iGPUIndex, m_iVoxelSuperSampling);
-		}
+		astraCudaPar3DBP(pReconMem->getData(), pSinoMem->getDataConst(),
+		                 volgeom.getGridColCount(),
+		                 volgeom.getGridRowCount(),
+		                 volgeom.getGridSliceCount(),
+		                 par3dgeom->getProjectionCount(),
+		                 par3dgeom->getDetectorColCount(),
+		                 par3dgeom->getDetectorRowCount(),
+		                 par3dgeom->getDetectorSpacingX(),
+		                 par3dgeom->getDetectorSpacingY(),
+		                 par3dgeom->getProjectionAngles(),
+		                 m_iGPUIndex, m_iVoxelSuperSampling);
 	} else if (parvec3dgeom) {
-		if (!m_bSIRTWeighting) {
-			astraCudaPar3DBP(pReconMem->getData(), pSinoMem->getDataConst(),
-			                 volgeom.getGridColCount(),
-			                 volgeom.getGridRowCount(),
-			                 volgeom.getGridSliceCount(),
-			                 parvec3dgeom->getProjectionCount(),
-			                 parvec3dgeom->getDetectorColCount(),
-			                 parvec3dgeom->getDetectorRowCount(),
-			                 parvec3dgeom->getProjectionVectors(),
-			                 m_iGPUIndex, m_iVoxelSuperSampling);
-		} else {
-			astraCudaPar3DBP_SIRTWeighted(pReconMem->getData(),
-			                 pSinoMem->getDataConst(),
-			                 volgeom.getGridColCount(),
-			                 volgeom.getGridRowCount(),
-			                 volgeom.getGridSliceCount(),
-			                 parvec3dgeom->getProjectionCount(),
-			                 parvec3dgeom->getDetectorColCount(),
-			                 parvec3dgeom->getDetectorRowCount(),
-			                 parvec3dgeom->getProjectionVectors(),
-			                 m_iGPUIndex, m_iVoxelSuperSampling);
-		}
+		astraCudaPar3DBP(pReconMem->getData(), pSinoMem->getDataConst(),
+		                 volgeom.getGridColCount(),
+		                 volgeom.getGridRowCount(),
+		                 volgeom.getGridSliceCount(),
+		                 parvec3dgeom->getProjectionCount(),
+		                 parvec3dgeom->getDetectorColCount(),
+		                 parvec3dgeom->getDetectorRowCount(),
+		                 parvec3dgeom->getProjectionVectors(),
+		                 m_iGPUIndex, m_iVoxelSuperSampling);
 	} else if (conevecgeom) {
 		astraCudaConeBP(pReconMem->getData(), pSinoMem->getDataConst(),
 		                volgeom.getGridColCount(),
-- 
cgit v1.2.3


From 6909836555afe155ffc3897ef2189ed0562bb045 Mon Sep 17 00:00:00 2001
From: Willem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>
Date: Wed, 11 Mar 2015 18:44:53 +0100
Subject: Add flexible volgeom3d support to astraCudaBP

---
 src/CudaBackProjectionAlgorithm3D.cpp | 54 ++---------------------------------
 1 file changed, 3 insertions(+), 51 deletions(-)

(limited to 'src')

diff --git a/src/CudaBackProjectionAlgorithm3D.cpp b/src/CudaBackProjectionAlgorithm3D.cpp
index 7117cfc..a8a1b0a 100644
--- a/src/CudaBackProjectionAlgorithm3D.cpp
+++ b/src/CudaBackProjectionAlgorithm3D.cpp
@@ -164,10 +164,6 @@ void CCudaBackProjectionAlgorithm3D::run(int _iNrIterations)
 	ASTRA_ASSERT(pReconMem);
 
 	const CProjectionGeometry3D* projgeom = pSinoMem->getGeometry();
-	const CConeProjectionGeometry3D* conegeom = dynamic_cast<const CConeProjectionGeometry3D*>(projgeom);
-	const CParallelProjectionGeometry3D* par3dgeom = dynamic_cast<const CParallelProjectionGeometry3D*>(projgeom);
-	const CConeVecProjectionGeometry3D* conevecgeom = dynamic_cast<const CConeVecProjectionGeometry3D*>(projgeom);
-	const CParallelVecProjectionGeometry3D* parvec3dgeom = dynamic_cast<const CParallelVecProjectionGeometry3D*>(projgeom);
 	const CVolumeGeometry3D& volgeom = *pReconMem->getGeometry();
 
 	if (m_bSIRTWeighting) {
@@ -175,54 +171,10 @@ void CCudaBackProjectionAlgorithm3D::run(int _iNrIterations)
 		                         pSinoMem->getDataConst(),
 		                         &volgeom, projgeom,
 		                         m_iGPUIndex, m_iVoxelSuperSampling);
-	} else if (conegeom) {
-		astraCudaConeBP(pReconMem->getData(), pSinoMem->getDataConst(),
-		                volgeom.getGridColCount(),
-		                volgeom.getGridRowCount(),
-		                volgeom.getGridSliceCount(),
-		                conegeom->getProjectionCount(),
-		                conegeom->getDetectorColCount(),
-		                conegeom->getDetectorRowCount(),
-		                conegeom->getOriginSourceDistance(),
-		                conegeom->getOriginDetectorDistance(),
-		                conegeom->getDetectorSpacingX(),
-		                conegeom->getDetectorSpacingY(),
-		                conegeom->getProjectionAngles(),
-		                m_iGPUIndex, m_iVoxelSuperSampling);
-	} else if (par3dgeom) {
-		astraCudaPar3DBP(pReconMem->getData(), pSinoMem->getDataConst(),
-		                 volgeom.getGridColCount(),
-		                 volgeom.getGridRowCount(),
-		                 volgeom.getGridSliceCount(),
-		                 par3dgeom->getProjectionCount(),
-		                 par3dgeom->getDetectorColCount(),
-		                 par3dgeom->getDetectorRowCount(),
-		                 par3dgeom->getDetectorSpacingX(),
-		                 par3dgeom->getDetectorSpacingY(),
-		                 par3dgeom->getProjectionAngles(),
-		                 m_iGPUIndex, m_iVoxelSuperSampling);
-	} else if (parvec3dgeom) {
-		astraCudaPar3DBP(pReconMem->getData(), pSinoMem->getDataConst(),
-		                 volgeom.getGridColCount(),
-		                 volgeom.getGridRowCount(),
-		                 volgeom.getGridSliceCount(),
-		                 parvec3dgeom->getProjectionCount(),
-		                 parvec3dgeom->getDetectorColCount(),
-		                 parvec3dgeom->getDetectorRowCount(),
-		                 parvec3dgeom->getProjectionVectors(),
-		                 m_iGPUIndex, m_iVoxelSuperSampling);
-	} else if (conevecgeom) {
-		astraCudaConeBP(pReconMem->getData(), pSinoMem->getDataConst(),
-		                volgeom.getGridColCount(),
-		                volgeom.getGridRowCount(),
-		                volgeom.getGridSliceCount(),
-		                conevecgeom->getProjectionCount(),
-		                conevecgeom->getDetectorColCount(),
-		                conevecgeom->getDetectorRowCount(),
-		                conevecgeom->getProjectionVectors(),
-		                m_iGPUIndex, m_iVoxelSuperSampling);
 	} else {
-		ASTRA_ASSERT(false);
+		astraCudaBP(pReconMem->getData(), pSinoMem->getDataConst(),
+		            &volgeom, projgeom,
+		            m_iGPUIndex, m_iVoxelSuperSampling);
 	}
 
 }
-- 
cgit v1.2.3


From 57ee6b85884b8226b26b7415ef151b4a6e63337c Mon Sep 17 00:00:00 2001
From: Willem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>
Date: Thu, 12 Mar 2015 11:53:40 +0100
Subject: Add flexible volgeom3d support to astraCudaFP

---
 src/CudaForwardProjectionAlgorithm3D.cpp | 59 ++------------------------------
 1 file changed, 3 insertions(+), 56 deletions(-)

(limited to 'src')

diff --git a/src/CudaForwardProjectionAlgorithm3D.cpp b/src/CudaForwardProjectionAlgorithm3D.cpp
index bb122e0..914ee2f 100644
--- a/src/CudaForwardProjectionAlgorithm3D.cpp
+++ b/src/CudaForwardProjectionAlgorithm3D.cpp
@@ -239,10 +239,6 @@ void CCudaForwardProjectionAlgorithm3D::run(int)
 	assert(m_bIsInitialized);
 
 	const CProjectionGeometry3D* projgeom = m_pProjections->getGeometry();
-	const CConeProjectionGeometry3D* conegeom = dynamic_cast<const CConeProjectionGeometry3D*>(projgeom);
-	const CParallelProjectionGeometry3D* par3dgeom = dynamic_cast<const CParallelProjectionGeometry3D*>(projgeom);
-	const CConeVecProjectionGeometry3D* conevecgeom = dynamic_cast<const CConeVecProjectionGeometry3D*>(projgeom);
-	const CParallelVecProjectionGeometry3D* parvec3dgeom = dynamic_cast<const CParallelVecProjectionGeometry3D*>(projgeom);
 	const CVolumeGeometry3D& volgeom = *m_pVolume->getGeometry();
 
 	Cuda3DProjectionKernel projKernel = ker3d_default;
@@ -270,58 +266,9 @@ void CCudaForwardProjectionAlgorithm3D::run(int)
 	}
 #endif
 
-	if (conegeom) {
-		astraCudaConeFP(m_pVolume->getDataConst(), m_pProjections->getData(),
-		                volgeom.getGridColCount(),
-		                volgeom.getGridRowCount(),
-		                volgeom.getGridSliceCount(),
-		                conegeom->getProjectionCount(),
-		                conegeom->getDetectorColCount(),
-		                conegeom->getDetectorRowCount(),
-		                conegeom->getOriginSourceDistance(),
-		                conegeom->getOriginDetectorDistance(),
-		                conegeom->getDetectorSpacingX(),
-		                conegeom->getDetectorSpacingY(),
-		                conegeom->getProjectionAngles(),
-		                m_iGPUIndex, m_iDetectorSuperSampling);
-	} else if (par3dgeom) {
-		astraCudaPar3DFP(m_pVolume->getDataConst(), m_pProjections->getData(),
-		                 volgeom.getGridColCount(),
-		                 volgeom.getGridRowCount(),
-		                 volgeom.getGridSliceCount(),
-		                 par3dgeom->getProjectionCount(),
-		                 par3dgeom->getDetectorColCount(),
-		                 par3dgeom->getDetectorRowCount(),
-		                 par3dgeom->getDetectorSpacingX(),
-		                 par3dgeom->getDetectorSpacingY(),
-		                 par3dgeom->getProjectionAngles(),
-		                 m_iGPUIndex, m_iDetectorSuperSampling,
-		                 projKernel);
-	} else if (parvec3dgeom) {
-		astraCudaPar3DFP(m_pVolume->getDataConst(), m_pProjections->getData(),
-		                 volgeom.getGridColCount(),
-		                 volgeom.getGridRowCount(),
-		                 volgeom.getGridSliceCount(),
-		                 parvec3dgeom->getProjectionCount(),
-		                 parvec3dgeom->getDetectorColCount(),
-		                 parvec3dgeom->getDetectorRowCount(),
-		                 parvec3dgeom->getProjectionVectors(),
-		                 m_iGPUIndex, m_iDetectorSuperSampling,
-		                 projKernel);
-	} else if (conevecgeom) {
-		astraCudaConeFP(m_pVolume->getDataConst(), m_pProjections->getData(),
-		                volgeom.getGridColCount(),
-		                volgeom.getGridRowCount(),
-		                volgeom.getGridSliceCount(),
-		                conevecgeom->getProjectionCount(),
-		                conevecgeom->getDetectorColCount(),
-		                conevecgeom->getDetectorRowCount(),
-		                conevecgeom->getProjectionVectors(),
-		                m_iGPUIndex, m_iDetectorSuperSampling);
-	} else {
-		ASTRA_ASSERT(false);
-	}
-
+	astraCudaFP(m_pVolume->getDataConst(), m_pProjections->getData(),
+	            &volgeom, projgeom,
+	            m_iGPUIndex, m_iDetectorSuperSampling, projKernel);
 }
 
 
-- 
cgit v1.2.3


From 167ec3f4e1cbe4eb856474cb515291261955b053 Mon Sep 17 00:00:00 2001
From: Willem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>
Date: Fri, 22 May 2015 14:56:28 +0200
Subject: Add supersampling options to Cuda Projectors

---
 src/CudaProjector2D.cpp | 8 ++++++++
 src/CudaProjector3D.cpp | 8 ++++++++
 2 files changed, 16 insertions(+)

(limited to 'src')

diff --git a/src/CudaProjector2D.cpp b/src/CudaProjector2D.cpp
index fa024c8..a26e32d 100644
--- a/src/CudaProjector2D.cpp
+++ b/src/CudaProjector2D.cpp
@@ -59,6 +59,8 @@ void CCudaProjector2D::_clear()
 	m_bIsInitialized = false;
 
 	m_projectionKernel = ker2d_default;
+	m_iVoxelSuperSampling = 1;
+	m_iDetectorSuperSampling = 1;
 }
 
 //----------------------------------------------------------------------------------------
@@ -117,6 +119,12 @@ bool CCudaProjector2D::initialize(const Config& _cfg)
 	}
 	CC.markNodeParsed("ProjectionKernel");
 
+	m_iVoxelSuperSampling = (int)_cfg.self.getOptionNumerical("VoxelSuperSampling", 1);
+	CC.markOptionParsed("VoxelSuperSampling");
+ 
+	m_iDetectorSuperSampling = (int)_cfg.self.getOptionNumerical("DetectorSuperSampling", 1);
+	CC.markOptionParsed("DetectorSuperSampling");
+
 	m_bIsInitialized = _check();
 	return m_bIsInitialized;
 }
diff --git a/src/CudaProjector3D.cpp b/src/CudaProjector3D.cpp
index 41529a5..d2fd74c 100644
--- a/src/CudaProjector3D.cpp
+++ b/src/CudaProjector3D.cpp
@@ -62,6 +62,8 @@ void CCudaProjector3D::_clear()
 	m_bIsInitialized = false;
 
 	m_projectionKernel = ker3d_default;
+	m_iVoxelSuperSampling = 1;
+	m_iDetectorSuperSampling = 1;
 }
 
 //----------------------------------------------------------------------------------------
@@ -120,6 +122,12 @@ bool CCudaProjector3D::initialize(const Config& _cfg)
 	}
 	CC.markNodeParsed("ProjectionKernel");
 
+	m_iVoxelSuperSampling = (int)_cfg.self.getOptionNumerical("VoxelSuperSampling", 1);
+	CC.markOptionParsed("VoxelSuperSampling");
+ 
+	m_iDetectorSuperSampling = (int)_cfg.self.getOptionNumerical("DetectorSuperSampling", 1);
+	CC.markOptionParsed("DetectorSuperSampling");
+
 	m_bIsInitialized = _check();
 	return m_bIsInitialized;
 }
-- 
cgit v1.2.3


From 0985154228a63db25e9a0a0165994221d9b97a91 Mon Sep 17 00:00:00 2001
From: Willem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>
Date: Tue, 26 May 2015 15:36:42 +0200
Subject: Use supersampling options from CudaProjector3D

---
 src/CudaBackProjectionAlgorithm3D.cpp    | 14 +++++++++++++-
 src/CudaCglsAlgorithm3D.cpp              | 22 +++++++++++++++++++---
 src/CudaFDKAlgorithm3D.cpp               | 13 ++++++++++++-
 src/CudaForwardProjectionAlgorithm3D.cpp | 16 +++++++++++++---
 src/CudaSirtAlgorithm3D.cpp              | 23 ++++++++++++++++++++---
 src/ReconstructionAlgorithm3D.cpp        | 18 +++++++++++-------
 6 files changed, 88 insertions(+), 18 deletions(-)

(limited to 'src')

diff --git a/src/CudaBackProjectionAlgorithm3D.cpp b/src/CudaBackProjectionAlgorithm3D.cpp
index fbb8f28..e8e0433 100644
--- a/src/CudaBackProjectionAlgorithm3D.cpp
+++ b/src/CudaBackProjectionAlgorithm3D.cpp
@@ -32,6 +32,7 @@ $Id$
 
 #include "astra/AstraObjectManager.h"
 
+#include "astra/CudaProjector3D.h"
 #include "astra/ConeProjectionGeometry3D.h"
 #include "astra/ParallelProjectionGeometry3D.h"
 #include "astra/ParallelVecProjectionGeometry3D.h"
@@ -102,9 +103,20 @@ bool CCudaBackProjectionAlgorithm3D::initialize(const Config& _cfg)
 		return false;
 	}
 
+	CCudaProjector3D* pCudaProjector = 0;
+	pCudaProjector = dynamic_cast<CCudaProjector3D*>(m_pProjector);
+	if (!pCudaProjector) {
+		// TODO: Report
+	}
+
 	m_iGPUIndex = (int)_cfg.self.getOptionNumerical("GPUindex", -1);
 	CC.markOptionParsed("GPUindex");
-	m_iVoxelSuperSampling = (int)_cfg.self.getOptionNumerical("VoxelSuperSampling", 1);
+
+
+	m_iVoxelSuperSampling = 1;
+	if (pCudaProjector)
+		m_iVoxelSuperSampling = pCudaProjector->getVoxelSuperSampling();
+	m_iVoxelSuperSampling = (int)_cfg.self.getOptionNumerical("VoxelSuperSampling", m_iVoxelSuperSampling);
 	CC.markOptionParsed("VoxelSuperSampling");
 
 	CFloat32ProjectionData3DMemory* pSinoMem = dynamic_cast<CFloat32ProjectionData3DMemory*>(m_pSinogram);
diff --git a/src/CudaCglsAlgorithm3D.cpp b/src/CudaCglsAlgorithm3D.cpp
index 3457b81..f527dc5 100644
--- a/src/CudaCglsAlgorithm3D.cpp
+++ b/src/CudaCglsAlgorithm3D.cpp
@@ -32,6 +32,7 @@ $Id$
 
 #include "astra/AstraObjectManager.h"
 
+#include "astra/CudaProjector3D.h"
 #include "astra/ConeProjectionGeometry3D.h"
 #include "astra/ParallelVecProjectionGeometry3D.h"
 #include "astra/ConeVecProjectionGeometry3D.h"
@@ -106,12 +107,27 @@ bool CCudaCglsAlgorithm3D::initialize(const Config& _cfg)
 		return false;
 	}
 
+	CCudaProjector3D* pCudaProjector = 0;
+	pCudaProjector = dynamic_cast<CCudaProjector3D*>(m_pProjector);
+	if (!pCudaProjector) {
+		// TODO: Report
+	}
+
 	m_iGPUIndex = (int)_cfg.self.getOptionNumerical("GPUindex", -1);
 	CC.markOptionParsed("GPUindex");
-	m_iDetectorSuperSampling = (int)_cfg.self.getOptionNumerical("DetectorSuperSampling", 1);
-	CC.markOptionParsed("DetectorSuperSampling");
-	m_iVoxelSuperSampling = (int)_cfg.self.getOptionNumerical("VoxelSuperSampling", 1);
+
+	m_iVoxelSuperSampling = 1;
+	m_iDetectorSuperSampling = 1;
+	if (pCudaProjector) {
+		// New interface
+		m_iVoxelSuperSampling = pCudaProjector->getVoxelSuperSampling();
+		m_iDetectorSuperSampling = pCudaProjector->getDetectorSuperSampling();
+	}
+	// Deprecated options
+	m_iVoxelSuperSampling = (int)_cfg.self.getOptionNumerical("VoxelSuperSampling", m_iVoxelSuperSampling);
+	m_iDetectorSuperSampling = (int)_cfg.self.getOptionNumerical("DetectorSuperSampling", m_iDetectorSuperSampling);
 	CC.markOptionParsed("VoxelSuperSampling");
+	CC.markOptionParsed("DetectorSuperSampling");
 
 	m_pCgls = new AstraCGLS3d();
 
diff --git a/src/CudaFDKAlgorithm3D.cpp b/src/CudaFDKAlgorithm3D.cpp
index 467e641..667d926 100644
--- a/src/CudaFDKAlgorithm3D.cpp
+++ b/src/CudaFDKAlgorithm3D.cpp
@@ -32,6 +32,7 @@ $Id$
 
 #include "astra/AstraObjectManager.h"
 
+#include "astra/CudaProjector3D.h"
 #include "astra/ConeProjectionGeometry3D.h"
 
 #include "../cuda/3d/astra3d.h"
@@ -100,9 +101,19 @@ bool CCudaFDKAlgorithm3D::initialize(const Config& _cfg)
 		return false;
 	}
 
+	CCudaProjector3D* pCudaProjector = 0;
+	pCudaProjector = dynamic_cast<CCudaProjector3D*>(m_pProjector);
+	if (!pCudaProjector) {
+		// TODO: Report
+	}
+
 	m_iGPUIndex = (int)_cfg.self.getOptionNumerical("GPUindex", -1);
 	CC.markOptionParsed("GPUindex");
-	m_iVoxelSuperSampling = (int)_cfg.self.getOptionNumerical("VoxelSuperSampling", 1);
+
+	m_iVoxelSuperSampling = 1;
+	if (pCudaProjector)
+		m_iVoxelSuperSampling = pCudaProjector->getVoxelSuperSampling();
+	m_iVoxelSuperSampling = (int)_cfg.self.getOptionNumerical("VoxelSuperSampling", m_iVoxelSuperSampling);
 	CC.markOptionParsed("VoxelSuperSampling");
 
 	m_bShortScan = _cfg.self.getOptionBool("ShortScan", false);
diff --git a/src/CudaForwardProjectionAlgorithm3D.cpp b/src/CudaForwardProjectionAlgorithm3D.cpp
index e29b5a9..46dab12 100644
--- a/src/CudaForwardProjectionAlgorithm3D.cpp
+++ b/src/CudaForwardProjectionAlgorithm3D.cpp
@@ -97,18 +97,28 @@ bool CCudaForwardProjectionAlgorithm3D::initialize(const Config& _cfg)
 
 	// optional: projector
 	node = _cfg.self.getSingleNode("ProjectorId");
+	CCudaProjector3D* pCudaProjector = 0;
+	m_pProjector = 0;
 	if (node) {
 		id = boost::lexical_cast<int>(node.getContent());
 		m_pProjector = CProjector3DManager::getSingleton().get(id);
-	} else {
-		m_pProjector = 0; // TODO: or manually construct default projector?
+		pCudaProjector = dynamic_cast<CCudaProjector3D*>(CProjector3DManager::getSingleton().get(id));
+		m_pProjector = pCudaProjector;
+		if (!pCudaProjector) {
+			// TODO: Report
+		}
 	}
 	CC.markNodeParsed("ProjectorId");
 
 	// GPU number
 	m_iGPUIndex = (int)_cfg.self.getOptionNumerical("GPUindex", -1);
 	CC.markOptionParsed("GPUindex");
-	m_iDetectorSuperSampling = (int)_cfg.self.getOptionNumerical("DetectorSuperSampling", 1);
+
+
+	m_iDetectorSuperSampling = 1;
+	if (pCudaProjector)
+		m_iDetectorSuperSampling = pCudaProjector->getDetectorSuperSampling();
+	m_iDetectorSuperSampling = (int)_cfg.self.getOptionNumerical("DetectorSuperSampling", m_iDetectorSuperSampling);
 	CC.markOptionParsed("DetectorSuperSampling");
 
 	// success
diff --git a/src/CudaSirtAlgorithm3D.cpp b/src/CudaSirtAlgorithm3D.cpp
index 5ad131b..abbb9fd 100644
--- a/src/CudaSirtAlgorithm3D.cpp
+++ b/src/CudaSirtAlgorithm3D.cpp
@@ -36,6 +36,7 @@ $Id$
 #include "astra/ParallelProjectionGeometry3D.h"
 #include "astra/ParallelVecProjectionGeometry3D.h"
 #include "astra/ConeVecProjectionGeometry3D.h"
+#include "astra/CudaProjector3D.h"
 
 #include "../cuda/3d/astra3d.h"
 
@@ -107,12 +108,28 @@ bool CCudaSirtAlgorithm3D::initialize(const Config& _cfg)
 		return false;
 	}
 
+	CCudaProjector3D* pCudaProjector = 0;
+	pCudaProjector = dynamic_cast<CCudaProjector3D*>(m_pProjector);
+	if (!pCudaProjector) {
+		// TODO: Report
+	}
+
 	m_iGPUIndex = (int)_cfg.self.getOptionNumerical("GPUindex", -1);
 	CC.markOptionParsed("GPUindex");
-	m_iDetectorSuperSampling = (int)_cfg.self.getOptionNumerical("DetectorSuperSampling", 1);
-	CC.markOptionParsed("DetectorSuperSampling");
-	m_iVoxelSuperSampling = (int)_cfg.self.getOptionNumerical("VoxelSuperSampling", 1);
+
+
+	m_iVoxelSuperSampling = 1;
+	m_iDetectorSuperSampling = 1;
+	if (pCudaProjector) {
+		// New interface
+		m_iVoxelSuperSampling = pCudaProjector->getVoxelSuperSampling();
+		m_iDetectorSuperSampling = pCudaProjector->getDetectorSuperSampling();
+	}
+	// Deprecated options
+	m_iVoxelSuperSampling = (int)_cfg.self.getOptionNumerical("VoxelSuperSampling", m_iVoxelSuperSampling);
+	m_iDetectorSuperSampling = (int)_cfg.self.getOptionNumerical("DetectorSuperSampling", m_iDetectorSuperSampling);
 	CC.markOptionParsed("VoxelSuperSampling");
+	CC.markOptionParsed("DetectorSuperSampling");
 
 	m_pSirt = new AstraSIRT3d();
 
diff --git a/src/ReconstructionAlgorithm3D.cpp b/src/ReconstructionAlgorithm3D.cpp
index 86b8ab2..f975ace 100644
--- a/src/ReconstructionAlgorithm3D.cpp
+++ b/src/ReconstructionAlgorithm3D.cpp
@@ -106,14 +106,18 @@ bool CReconstructionAlgorithm3D::initialize(const Config& _cfg)
 
 	XMLNode node;
 	int id;
-#if 0
+
 	// projector
-	node = _cfg.self->getSingleNode("ProjectorId");
-	ASTRA_CONFIG_CHECK(node, "Reconstruction3D", "No ProjectorId tag specified.");
-	id = boost::lexical_cast<int>(node->getContent());
-	m_pProjector = CProjector3DManager::getSingleton().get(id);
-	ASTRA_DELETE(node);
-#endif
+	node = _cfg.self.getSingleNode("ProjectorId");
+	m_pProjector = 0;
+	if (node) {
+		id = boost::lexical_cast<int>(node.getContent());
+		m_pProjector = CProjector3DManager::getSingleton().get(id);
+		if (!m_pProjector) {
+			// TODO: Report
+		}
+	}
+	CC.markNodeParsed("ProjectorId");
 
 	// sinogram data
 	node = _cfg.self.getSingleNode("ProjectionDataId");
-- 
cgit v1.2.3


From e622f453c6ab9de6277611e01cac415e297553f7 Mon Sep 17 00:00:00 2001
From: Willem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>
Date: Thu, 4 Jun 2015 17:23:54 +0200
Subject: Use supersampling options from CudaProjector2D

---
 src/CudaFilteredBackProjectionAlgorithm.cpp | 27 ++++++++++++++++--
 src/CudaForwardProjectionAlgorithm.cpp      | 38 +++++++++++++++----------
 src/CudaReconstructionAlgorithm2D.cpp       | 44 +++++++++++++++++------------
 3 files changed, 73 insertions(+), 36 deletions(-)

(limited to 'src')

diff --git a/src/CudaFilteredBackProjectionAlgorithm.cpp b/src/CudaFilteredBackProjectionAlgorithm.cpp
index 5d6c166..aac96d6 100644
--- a/src/CudaFilteredBackProjectionAlgorithm.cpp
+++ b/src/CudaFilteredBackProjectionAlgorithm.cpp
@@ -32,6 +32,7 @@ $Id$
 #include <cstring>
 
 #include "astra/AstraObjectManager.h"
+#include "astra/CudaProjector2D.h"
 #include "../cuda/2d/astra.h"
 
 #include "astra/Logging.h"
@@ -77,8 +78,22 @@ bool CCudaFilteredBackProjectionAlgorithm::initialize(const Config& _cfg)
 		clear();
 	}
 
+	// Projector
+	XMLNode node = _cfg.self.getSingleNode("ProjectorId");
+	CCudaProjector2D* pCudaProjector = 0;
+	if (node) {
+		int id = boost::lexical_cast<int>(node.getContent());
+		CProjector2D *projector = CProjector2DManager::getSingleton().get(id);
+		pCudaProjector = dynamic_cast<CCudaProjector2D*>(projector);
+		if (!pCudaProjector) {
+			ASTRA_WARN("non-CUDA Projector2D passed");
+		}
+	}
+	CC.markNodeParsed("ProjectorId");
+
+
 	// sinogram data
-	XMLNode node = _cfg.self.getSingleNode("ProjectionDataId");
+	node = _cfg.self.getSingleNode("ProjectionDataId");
 	ASTRA_CONFIG_CHECK(node, "CudaFBP", "No ProjectionDataId tag specified.");
 	int id = boost::lexical_cast<int>(node.getContent());
 	m_pSinogram = dynamic_cast<CFloat32ProjectionData2D*>(CData2DManager::getSingleton().get(id));
@@ -152,10 +167,16 @@ bool CCudaFilteredBackProjectionAlgorithm::initialize(const Config& _cfg)
 	m_iGPUIndex = (int)_cfg.self.getOptionNumerical("GPUindex", -1);
 	CC.markOptionParsed("GPUindex");
 
-	// Pixel supersampling factor
-	m_iPixelSuperSampling = (int)_cfg.self.getOptionNumerical("PixelSuperSampling", 1);
+	m_iPixelSuperSampling = 1;
+	if (pCudaProjector) {
+		// New interface
+		m_iPixelSuperSampling = pCudaProjector->getVoxelSuperSampling();
+	}
+	// Deprecated options
+	m_iPixelSuperSampling = (int)_cfg.self.getOptionNumerical("PixelSuperSampling", m_iPixelSuperSampling);
 	CC.markOptionParsed("PixelSuperSampling");
 
+
 	// Fan beam short scan mode
 	if (m_pSinogram && dynamic_cast<CFanFlatProjectionGeometry2D*>(m_pSinogram->getGeometry())) {
 		m_bShortScan = (int)_cfg.self.getOptionBool("ShortScan", false);
diff --git a/src/CudaForwardProjectionAlgorithm.cpp b/src/CudaForwardProjectionAlgorithm.cpp
index 0f97d59..b382f2e 100644
--- a/src/CudaForwardProjectionAlgorithm.cpp
+++ b/src/CudaForwardProjectionAlgorithm.cpp
@@ -71,9 +71,24 @@ bool CCudaForwardProjectionAlgorithm::initialize(const Config& _cfg)
 {
 	ASTRA_ASSERT(_cfg.self);
 	ConfigStackCheck<CAlgorithm> CC("CudaForwardProjectionAlgorithm", this, _cfg);
+
+	// Projector
+	XMLNode node = _cfg.self.getSingleNode("ProjectorId");
+	CCudaProjector2D* pCudaProjector = 0;
+	if (node) {
+		int id = boost::lexical_cast<int>(node.getContent());
+		CProjector2D *projector = CProjector2DManager::getSingleton().get(id);
+		pCudaProjector = dynamic_cast<CCudaProjector2D*>(projector);
+		if (!pCudaProjector) {
+			ASTRA_WARN("non-CUDA Projector2D passed to FP_CUDA");
+		}
+	}
+	CC.markNodeParsed("ProjectorId");
+
+
 	
 	// sinogram data
-	XMLNode node = _cfg.self.getSingleNode("ProjectionDataId");
+	node = _cfg.self.getSingleNode("ProjectionDataId");
 	ASTRA_CONFIG_CHECK(node, "FP_CUDA", "No ProjectionDataId tag specified.");
 	int id = boost::lexical_cast<int>(node.getContent());
 	m_pSinogram = dynamic_cast<CFloat32ProjectionData2D*>(CData2DManager::getSingleton().get(id));
@@ -94,21 +109,14 @@ bool CCudaForwardProjectionAlgorithm::initialize(const Config& _cfg)
 		CC.markOptionParsed("GPUIndex");
 
 	// Detector supersampling factor
-	m_iDetectorSuperSampling = (int)_cfg.self.getOptionNumerical("DetectorSuperSampling", 1);
-	CC.markOptionParsed("DetectorSuperSampling");
-
-
-	// This isn't used yet, but passing it is not something to warn about
-	node = _cfg.self.getSingleNode("ProjectorId");
-	if (node) {
-		id = boost::lexical_cast<int>(node.getContent());
-		CProjector2D *projector = CProjector2DManager::getSingleton().get(id);
-		if (!dynamic_cast<CCudaProjector2D*>(projector)) {
-			ASTRA_WARN("non-CUDA Projector2D passed to FP_CUDA");
-		}
+	m_iDetectorSuperSampling = 1;
+	if (pCudaProjector) {
+		// New interface
+		m_iDetectorSuperSampling = pCudaProjector->getDetectorSuperSampling();
 	}
-	CC.markNodeParsed("ProjectorId");
-	
+	// Deprecated option
+	m_iDetectorSuperSampling = (int)_cfg.self.getOptionNumerical("DetectorSuperSampling", m_iDetectorSuperSampling);
+	CC.markOptionParsed("DetectorSuperSampling");
 
 
 	// return success
diff --git a/src/CudaReconstructionAlgorithm2D.cpp b/src/CudaReconstructionAlgorithm2D.cpp
index db99d42..71b6637 100644
--- a/src/CudaReconstructionAlgorithm2D.cpp
+++ b/src/CudaReconstructionAlgorithm2D.cpp
@@ -95,8 +95,22 @@ bool CCudaReconstructionAlgorithm2D::initialize(const Config& _cfg)
 		clear();
 	}
 
+	// Projector
+	XMLNode node = _cfg.self.getSingleNode("ProjectorId");
+	CCudaProjector2D* pCudaProjector = 0;
+	if (node) {
+		int id = boost::lexical_cast<int>(node.getContent());
+		CProjector2D *projector = CProjector2DManager::getSingleton().get(id);
+		pCudaProjector = dynamic_cast<CCudaProjector2D*>(projector);
+		if (!pCudaProjector) {
+			ASTRA_WARN("non-CUDA Projector2D passed");
+		}
+	}
+	CC.markNodeParsed("ProjectorId");
+
+
 	// sinogram data
-	XMLNode node = _cfg.self.getSingleNode("ProjectionDataId");
+	node = _cfg.self.getSingleNode("ProjectionDataId");
 	ASTRA_CONFIG_CHECK(node, "CudaSirt2", "No ProjectionDataId tag specified.");
 	int id = boost::lexical_cast<int>(node.getContent());
 	m_pSinogram = dynamic_cast<CFloat32ProjectionData2D*>(CData2DManager::getSingleton().get(id));
@@ -161,27 +175,21 @@ bool CCudaReconstructionAlgorithm2D::initialize(const Config& _cfg)
 	if (!_cfg.self.hasOption("GPUindex"))
 		CC.markOptionParsed("GPUIndex");
 
-	// Detector supersampling factor
-	m_iDetectorSuperSampling = (int)_cfg.self.getOptionNumerical("DetectorSuperSampling", 1);
+	// Supersampling factors
+	m_iDetectorSuperSampling = 1;
+	m_iPixelSuperSampling = 1;
+	if (pCudaProjector) {
+		// New interface
+		m_iDetectorSuperSampling = pCudaProjector->getDetectorSuperSampling();
+		m_iPixelSuperSampling = pCudaProjector->getVoxelSuperSampling();
+	}
+	// Deprecated options
+	m_iDetectorSuperSampling = (int)_cfg.self.getOptionNumerical("DetectorSuperSampling", m_iDetectorSuperSampling);
+	m_iPixelSuperSampling = (int)_cfg.self.getOptionNumerical("PixelSuperSampling", m_iPixelSuperSampling);
 	CC.markOptionParsed("DetectorSuperSampling");
-
-	// Pixel supersampling factor
-	m_iPixelSuperSampling = (int)_cfg.self.getOptionNumerical("PixelSuperSampling", 1);
 	CC.markOptionParsed("PixelSuperSampling");
 
 
-	// This isn't used yet, but passing it is not something to warn about
-	node = _cfg.self.getSingleNode("ProjectorId");
-	if (node) {
-		id = boost::lexical_cast<int>(node.getContent());
-		CProjector2D *projector = CProjector2DManager::getSingleton().get(id);
-		if (!dynamic_cast<CCudaProjector2D*>(projector)) {
-			ASTRA_WARN("non-CUDA Projector2D passed");
-		}
-	}
-	CC.markNodeParsed("ProjectorId");
-
-
 	return _check();
 }
 
-- 
cgit v1.2.3


From 26713deae284d6bb793b728c7af2db28a7484054 Mon Sep 17 00:00:00 2001
From: "Daniel M. Pelt" <D.M.Pelt@cwi.nl>
Date: Wed, 24 Jun 2015 20:30:52 +0200
Subject: Include ExtraDetectorOffset in returned configuration

---
 src/ParallelProjectionGeometry2D.cpp | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'src')

diff --git a/src/ParallelProjectionGeometry2D.cpp b/src/ParallelProjectionGeometry2D.cpp
index 699e141..5f51d08 100644
--- a/src/ParallelProjectionGeometry2D.cpp
+++ b/src/ParallelProjectionGeometry2D.cpp
@@ -180,6 +180,9 @@ Config* CParallelProjectionGeometry2D::getConfiguration() const
 	cfg->self.addChildNode("DetectorCount", getDetectorCount());
 	cfg->self.addChildNode("DetectorWidth", getDetectorWidth());
 	cfg->self.addChildNode("ProjectionAngles", m_pfProjectionAngles, m_iProjectionAngleCount);
+	XMLNode opt = cfg->self.addChildNode("Option");
+	opt.addAttribute("key","ExtraDetectorOffset");
+	opt.setContent(m_pfExtraDetectorOffset, m_iProjectionAngleCount);
 	return cfg;
 }
 //----------------------------------------------------------------------------------------
-- 
cgit v1.2.3


From f1a8bd8d2b62b089a90fef55268e3300581717ed Mon Sep 17 00:00:00 2001
From: "Daniel M. Pelt" <D.M.Pelt@cwi.nl>
Date: Thu, 25 Jun 2015 21:38:46 +0200
Subject: Add extra null check for ExtraDetectorOffset

---
 src/ParallelProjectionGeometry2D.cpp | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'src')

diff --git a/src/ParallelProjectionGeometry2D.cpp b/src/ParallelProjectionGeometry2D.cpp
index 5f51d08..7260b83 100644
--- a/src/ParallelProjectionGeometry2D.cpp
+++ b/src/ParallelProjectionGeometry2D.cpp
@@ -180,9 +180,11 @@ Config* CParallelProjectionGeometry2D::getConfiguration() const
 	cfg->self.addChildNode("DetectorCount", getDetectorCount());
 	cfg->self.addChildNode("DetectorWidth", getDetectorWidth());
 	cfg->self.addChildNode("ProjectionAngles", m_pfProjectionAngles, m_iProjectionAngleCount);
-	XMLNode opt = cfg->self.addChildNode("Option");
-	opt.addAttribute("key","ExtraDetectorOffset");
-	opt.setContent(m_pfExtraDetectorOffset, m_iProjectionAngleCount);
+	if(m_pfExtraDetectorOffset!=NULL){
+		XMLNode opt = cfg->self.addChildNode("Option");
+		opt.addAttribute("key","ExtraDetectorOffset");
+		opt.setContent(m_pfExtraDetectorOffset, m_iProjectionAngleCount);
+	}
 	return cfg;
 }
 //----------------------------------------------------------------------------------------
-- 
cgit v1.2.3


From 18b6d25f7e4f0943b3592f3bb4f6ca5ed9c285d3 Mon Sep 17 00:00:00 2001
From: "Daniel M. Pelt" <D.M.Pelt@cwi.nl>
Date: Fri, 19 Jun 2015 22:28:06 +0200
Subject: Add support for Python algorithm plugins

---
 src/PluginAlgorithm.cpp | 294 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 294 insertions(+)
 create mode 100644 src/PluginAlgorithm.cpp

(limited to 'src')

diff --git a/src/PluginAlgorithm.cpp b/src/PluginAlgorithm.cpp
new file mode 100644
index 0000000..df13f31
--- /dev/null
+++ b/src/PluginAlgorithm.cpp
@@ -0,0 +1,294 @@
+/*
+-----------------------------------------------------------------------
+Copyright: 2010-2015, iMinds-Vision Lab, University of Antwerp
+           2014-2015, CWI, Amsterdam
+
+Contact: astra@uantwerpen.be
+Website: http://sf.net/projects/astra-toolbox
+
+This file is part of the ASTRA Toolbox.
+
+
+The ASTRA Toolbox is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+The ASTRA Toolbox is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with the ASTRA Toolbox. If not, see <http://www.gnu.org/licenses/>.
+
+-----------------------------------------------------------------------
+$Id$
+*/
+
+#ifdef ASTRA_PYTHON
+
+#include "astra/PluginAlgorithm.h"
+#include <boost/algorithm/string.hpp>
+#include <boost/algorithm/string/split.hpp>
+#include <boost/lexical_cast.hpp>
+#include <iostream>
+#include <fstream>
+#include <string>
+
+namespace astra {
+
+CPluginAlgorithm::CPluginAlgorithm(PyObject* pyclass){
+    instance = PyObject_CallObject(pyclass, NULL);
+}
+
+CPluginAlgorithm::~CPluginAlgorithm(){
+    if(instance!=NULL){
+        Py_DECREF(instance);
+        instance = NULL;
+    }
+}
+
+bool CPluginAlgorithm::initialize(const Config& _cfg){
+    if(instance==NULL) return false;
+    PyObject *cfgDict = XMLNode2dict(_cfg.self);
+    PyObject *retVal = PyObject_CallMethod(instance, "astra_init", "O",cfgDict);
+    Py_DECREF(cfgDict);
+    if(retVal==NULL) return false;
+    m_bIsInitialized = true;
+    Py_DECREF(retVal);
+    return m_bIsInitialized;
+}
+
+void CPluginAlgorithm::run(int _iNrIterations){
+    if(instance==NULL) return;
+    PyObject *retVal = PyObject_CallMethod(instance, "run", "i",_iNrIterations);
+    if(retVal==NULL) return;
+    Py_DECREF(retVal);
+}
+
+const char ps =
+#ifdef _WIN32
+                            '\\';
+#else
+                            '/';
+#endif
+
+std::vector<std::string> CPluginAlgorithmFactory::getPluginPathList(){
+    std::vector<std::string> list;
+    list.push_back("/etc/astra-toolbox");
+    PyObject *ret, *retb;
+    ret = PyObject_CallMethod(inspect,"getfile","O",astra);
+    if(ret!=NULL){
+        retb = PyObject_CallMethod(six,"b","O",ret);
+        Py_DECREF(ret);
+        if(retb!=NULL){
+            std::string astra_inst (PyBytes_AsString(retb));
+            Py_DECREF(retb);
+            ret = PyObject_CallMethod(ospath,"dirname","s",astra_inst.c_str());
+            if(ret!=NULL){
+                retb = PyObject_CallMethod(six,"b","O",ret);
+                Py_DECREF(ret);
+                if(retb!=NULL){
+                    list.push_back(std::string(PyBytes_AsString(retb)));
+                    Py_DECREF(retb);
+                }
+            }
+        }
+    }
+    ret = PyObject_CallMethod(ospath,"expanduser","s","~");
+    if(ret!=NULL){
+        retb = PyObject_CallMethod(six,"b","O",ret);
+        Py_DECREF(ret);
+        if(retb!=NULL){
+            list.push_back(std::string(PyBytes_AsString(retb)) + ps + ".astra-toolbox");
+            Py_DECREF(retb);
+        }
+    }
+    const char *envval = getenv("ASTRA_PLUGIN_PATH");
+    if(envval!=NULL){
+        list.push_back(std::string(envval));
+    }
+    return list;
+}
+
+CPluginAlgorithmFactory::CPluginAlgorithmFactory(){
+    Py_Initialize();
+    pluginDict = PyDict_New();
+    ospath = PyImport_ImportModule("os.path");
+    inspect = PyImport_ImportModule("inspect");
+    six = PyImport_ImportModule("six");
+    astra = PyImport_ImportModule("astra");
+    std::vector<std::string> fls = getPluginPathList();
+    std::vector<std::string> items;
+    for(unsigned int i=0;i<fls.size();i++){
+        std::ifstream fs ((fls[i]+ps+"plugins.txt").c_str());
+        if(!fs.is_open()) continue;
+        std::string line;
+        while (std::getline(fs,line)){
+            boost::split(items, line, boost::is_any_of(" "));
+            if(items.size()<2) continue;
+            PyObject *str = PyBytes_FromString(items[1].c_str());
+            PyDict_SetItemString(pluginDict,items[0].c_str(),str);
+            Py_DECREF(str);
+        }
+        fs.close();
+    }
+}
+
+CPluginAlgorithmFactory::~CPluginAlgorithmFactory(){
+    if(pluginDict!=NULL){
+        Py_DECREF(pluginDict);
+    }
+}
+
+bool CPluginAlgorithmFactory::registerPlugin(std::string name, std::string className){
+    PyObject *str = PyBytes_FromString(className.c_str());
+    PyDict_SetItemString(pluginDict, name.c_str(), str);
+    Py_DECREF(str);
+    return true;
+}
+
+bool CPluginAlgorithmFactory::registerPluginClass(std::string name, PyObject * className){
+    PyDict_SetItemString(pluginDict, name.c_str(), className);
+    return true;
+}
+
+PyObject * getClassFromString(std::string str){
+    std::vector<std::string> items;
+    boost::split(items, str, boost::is_any_of("."));
+    PyObject *pyclass = PyImport_ImportModule(items[0].c_str());
+    if(pyclass==NULL) return NULL;
+    PyObject *submod = pyclass;
+    for(unsigned int i=1;i<items.size();i++){
+        submod = PyObject_GetAttrString(submod,items[i].c_str());
+        Py_DECREF(pyclass);
+        pyclass = submod;
+        if(pyclass==NULL) return NULL;
+    }
+    return pyclass;
+}
+
+CPluginAlgorithm * CPluginAlgorithmFactory::getPlugin(std::string name){
+    PyObject *className = PyDict_GetItemString(pluginDict, name.c_str());
+    if(className==NULL) return NULL;
+    CPluginAlgorithm *alg = NULL;
+    if(PyBytes_Check(className)){
+        std::string str = std::string(PyBytes_AsString(className));
+    	PyObject *pyclass = getClassFromString(str);
+        if(pyclass!=NULL){
+            alg = new CPluginAlgorithm(pyclass);
+            Py_DECREF(pyclass);
+        }
+    }else{
+        alg = new CPluginAlgorithm(className);
+    }
+    return alg;
+}
+
+PyObject * CPluginAlgorithmFactory::getRegistered(){
+    Py_INCREF(pluginDict);
+    return pluginDict;
+}
+
+std::string CPluginAlgorithmFactory::getHelp(std::string name){
+    PyObject *className = PyDict_GetItemString(pluginDict, name.c_str());
+    if(className==NULL) return "";
+    std::string str = std::string(PyBytes_AsString(className));
+    std::string ret = "";
+    PyObject *pyclass = getClassFromString(str);
+    if(pyclass==NULL) return "";
+    PyObject *module = PyImport_ImportModule("inspect");
+    if(module!=NULL){
+        PyObject *retVal = PyObject_CallMethod(module,"getdoc","O",pyclass);
+        if(retVal!=NULL){
+            PyObject *retb = PyObject_CallMethod(six,"b","O",retVal);
+            Py_DECREF(retVal);
+            if(retVal!=NULL){
+                ret = std::string(PyBytes_AsString(retb));
+                Py_DECREF(retb);
+            }
+        }
+        Py_DECREF(module);
+    }
+    Py_DECREF(pyclass);
+    return ret;
+}
+
+DEFINE_SINGLETON(CPluginAlgorithmFactory);
+
+#if PY_MAJOR_VERSION >= 3
+PyObject * pyStringFromString(std::string str){
+    return PyUnicode_FromString(str.c_str());
+}
+#else
+PyObject * pyStringFromString(std::string str){
+    return PyBytes_FromString(str.c_str());
+}
+#endif
+
+PyObject* stringToPythonValue(std::string str){
+    if(str.find(";")!=std::string::npos){
+        std::vector<std::string> rows, row;
+        boost::split(rows, str, boost::is_any_of(";"));
+        PyObject *mat = PyList_New(rows.size());
+        for(unsigned int i=0; i<rows.size(); i++){
+            boost::split(row, rows[i], boost::is_any_of(","));
+            PyObject *rowlist = PyList_New(row.size());
+            for(unsigned int j=0;j<row.size();j++){
+                PyList_SetItem(rowlist, j, PyFloat_FromDouble(boost::lexical_cast<double>(row[j])));
+            }
+            PyList_SetItem(mat, i, rowlist);
+        }
+        return mat;
+    }
+    if(str.find(",")!=std::string::npos){
+        std::vector<std::string> vec;
+        boost::split(vec, str, boost::is_any_of(","));
+        PyObject *veclist = PyList_New(vec.size());
+        for(unsigned int i=0;i<vec.size();i++){
+            PyList_SetItem(veclist, i, PyFloat_FromDouble(boost::lexical_cast<double>(vec[i])));
+        }
+        return veclist;
+    }
+    try{
+        return PyLong_FromLong(boost::lexical_cast<long>(str));
+    }catch(const boost::bad_lexical_cast &){
+        try{
+            return PyFloat_FromDouble(boost::lexical_cast<double>(str));
+        }catch(const boost::bad_lexical_cast &){
+            return pyStringFromString(str);
+        }
+    }
+}
+
+PyObject* XMLNode2dict(XMLNode node){
+    PyObject *dct = PyDict_New();
+    PyObject *opts = PyDict_New();
+    if(node.hasAttribute("type")){
+        PyObject *obj = pyStringFromString(node.getAttribute("type").c_str());
+        PyDict_SetItemString(dct, "type", obj);
+        Py_DECREF(obj);
+    }
+    std::list<XMLNode> nodes = node.getNodes();
+    std::list<XMLNode>::iterator it = nodes.begin();
+    while(it!=nodes.end()){
+        XMLNode subnode = *it;
+        if(subnode.getName()=="Option"){
+            PyObject *obj = stringToPythonValue(subnode.getAttribute("value"));
+            PyDict_SetItemString(opts, subnode.getAttribute("key").c_str(), obj);
+            Py_DECREF(obj);
+        }else{
+            PyObject *obj = stringToPythonValue(subnode.getContent());
+            PyDict_SetItemString(dct, subnode.getName().c_str(), obj);
+            Py_DECREF(obj);
+        }
+        ++it;
+    }
+    PyDict_SetItemString(dct, "options", opts);
+    Py_DECREF(opts);
+    return dct;
+}
+
+}
+#endif
\ No newline at end of file
-- 
cgit v1.2.3


From 4c9e432ae4581fdc110e9a9c45267227be1c7c31 Mon Sep 17 00:00:00 2001
From: "Daniel M. Pelt" <D.M.Pelt@cwi.nl>
Date: Wed, 24 Jun 2015 20:43:05 +0200
Subject: Fix config to dict translation for array options

---
 src/PluginAlgorithm.cpp | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/PluginAlgorithm.cpp b/src/PluginAlgorithm.cpp
index df13f31..a27ce2c 100644
--- a/src/PluginAlgorithm.cpp
+++ b/src/PluginAlgorithm.cpp
@@ -275,7 +275,12 @@ PyObject* XMLNode2dict(XMLNode node){
     while(it!=nodes.end()){
         XMLNode subnode = *it;
         if(subnode.getName()=="Option"){
-            PyObject *obj = stringToPythonValue(subnode.getAttribute("value"));
+            PyObject *obj;
+            if(subnode.hasAttribute("value")){
+                obj = stringToPythonValue(subnode.getAttribute("value"));
+            }else{
+                obj = stringToPythonValue(subnode.getContent());
+            }
             PyDict_SetItemString(opts, subnode.getAttribute("key").c_str(), obj);
             Py_DECREF(obj);
         }else{
-- 
cgit v1.2.3


From edae78481cf0e9cbffe335de1e541821758c5da1 Mon Sep 17 00:00:00 2001
From: "Daniel M. Pelt" <D.M.Pelt@cwi.nl>
Date: Wed, 24 Jun 2015 21:36:04 +0200
Subject: Log error when running Python plugin algorithm

---
 src/PluginAlgorithm.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/PluginAlgorithm.cpp b/src/PluginAlgorithm.cpp
index a27ce2c..7dcaf68 100644
--- a/src/PluginAlgorithm.cpp
+++ b/src/PluginAlgorithm.cpp
@@ -62,7 +62,7 @@ bool CPluginAlgorithm::initialize(const Config& _cfg){
 
 void CPluginAlgorithm::run(int _iNrIterations){
     if(instance==NULL) return;
-    PyObject *retVal = PyObject_CallMethod(instance, "run", "i",_iNrIterations);
+    PyObject *retVal = PyObject_CallMethod(instance, "astra_run", "i",_iNrIterations);
     if(retVal==NULL) return;
     Py_DECREF(retVal);
 }
-- 
cgit v1.2.3


From 2f871bc7068d6c87a7d950ae044ba66b0b8dcd3f Mon Sep 17 00:00:00 2001
From: "Daniel M. Pelt" <D.M.Pelt@cwi.nl>
Date: Fri, 17 Jul 2015 12:05:46 +0200
Subject: Remove config text file loading for plugins

---
 src/PluginAlgorithm.cpp | 72 ++++---------------------------------------------
 1 file changed, 5 insertions(+), 67 deletions(-)

(limited to 'src')

diff --git a/src/PluginAlgorithm.cpp b/src/PluginAlgorithm.cpp
index 7dcaf68..8ba6631 100644
--- a/src/PluginAlgorithm.cpp
+++ b/src/PluginAlgorithm.cpp
@@ -67,79 +67,19 @@ void CPluginAlgorithm::run(int _iNrIterations){
     Py_DECREF(retVal);
 }
 
-const char ps =
-#ifdef _WIN32
-                            '\\';
-#else
-                            '/';
-#endif
-
-std::vector<std::string> CPluginAlgorithmFactory::getPluginPathList(){
-    std::vector<std::string> list;
-    list.push_back("/etc/astra-toolbox");
-    PyObject *ret, *retb;
-    ret = PyObject_CallMethod(inspect,"getfile","O",astra);
-    if(ret!=NULL){
-        retb = PyObject_CallMethod(six,"b","O",ret);
-        Py_DECREF(ret);
-        if(retb!=NULL){
-            std::string astra_inst (PyBytes_AsString(retb));
-            Py_DECREF(retb);
-            ret = PyObject_CallMethod(ospath,"dirname","s",astra_inst.c_str());
-            if(ret!=NULL){
-                retb = PyObject_CallMethod(six,"b","O",ret);
-                Py_DECREF(ret);
-                if(retb!=NULL){
-                    list.push_back(std::string(PyBytes_AsString(retb)));
-                    Py_DECREF(retb);
-                }
-            }
-        }
-    }
-    ret = PyObject_CallMethod(ospath,"expanduser","s","~");
-    if(ret!=NULL){
-        retb = PyObject_CallMethod(six,"b","O",ret);
-        Py_DECREF(ret);
-        if(retb!=NULL){
-            list.push_back(std::string(PyBytes_AsString(retb)) + ps + ".astra-toolbox");
-            Py_DECREF(retb);
-        }
-    }
-    const char *envval = getenv("ASTRA_PLUGIN_PATH");
-    if(envval!=NULL){
-        list.push_back(std::string(envval));
-    }
-    return list;
-}
-
 CPluginAlgorithmFactory::CPluginAlgorithmFactory(){
     Py_Initialize();
     pluginDict = PyDict_New();
-    ospath = PyImport_ImportModule("os.path");
     inspect = PyImport_ImportModule("inspect");
     six = PyImport_ImportModule("six");
-    astra = PyImport_ImportModule("astra");
-    std::vector<std::string> fls = getPluginPathList();
-    std::vector<std::string> items;
-    for(unsigned int i=0;i<fls.size();i++){
-        std::ifstream fs ((fls[i]+ps+"plugins.txt").c_str());
-        if(!fs.is_open()) continue;
-        std::string line;
-        while (std::getline(fs,line)){
-            boost::split(items, line, boost::is_any_of(" "));
-            if(items.size()<2) continue;
-            PyObject *str = PyBytes_FromString(items[1].c_str());
-            PyDict_SetItemString(pluginDict,items[0].c_str(),str);
-            Py_DECREF(str);
-        }
-        fs.close();
-    }
 }
 
 CPluginAlgorithmFactory::~CPluginAlgorithmFactory(){
     if(pluginDict!=NULL){
         Py_DECREF(pluginDict);
     }
+    if(inspect!=NULL) Py_DECREF(inspect);
+    if(six!=NULL) Py_DECREF(six);
 }
 
 bool CPluginAlgorithmFactory::registerPlugin(std::string name, std::string className){
@@ -198,18 +138,16 @@ std::string CPluginAlgorithmFactory::getHelp(std::string name){
     std::string ret = "";
     PyObject *pyclass = getClassFromString(str);
     if(pyclass==NULL) return "";
-    PyObject *module = PyImport_ImportModule("inspect");
-    if(module!=NULL){
-        PyObject *retVal = PyObject_CallMethod(module,"getdoc","O",pyclass);
+    if(inspect!=NULL && six!=NULL){
+        PyObject *retVal = PyObject_CallMethod(inspect,"getdoc","O",pyclass);
         if(retVal!=NULL){
             PyObject *retb = PyObject_CallMethod(six,"b","O",retVal);
             Py_DECREF(retVal);
-            if(retVal!=NULL){
+            if(retb!=NULL){
                 ret = std::string(PyBytes_AsString(retb));
                 Py_DECREF(retb);
             }
         }
-        Py_DECREF(module);
     }
     Py_DECREF(pyclass);
     return ret;
-- 
cgit v1.2.3


From 3d136b7c819b0b142ad056bf01c8c1191eea9ba0 Mon Sep 17 00:00:00 2001
From: "Daniel M. Pelt" <D.M.Pelt@cwi.nl>
Date: Fri, 17 Jul 2015 16:22:05 +0200
Subject: Fix numpy lapack loading when running in Matlab

---
 src/Globals.cpp         |  3 +++
 src/PluginAlgorithm.cpp | 29 +++++++++++++++++++++++++++++
 2 files changed, 32 insertions(+)

(limited to 'src')

diff --git a/src/Globals.cpp b/src/Globals.cpp
index 813f9c9..904a459 100644
--- a/src/Globals.cpp
+++ b/src/Globals.cpp
@@ -28,5 +28,8 @@ $Id$
 
 #include "astra/Globals.h"
 
+namespace astra{
+    bool running_in_matlab=false;
+}
 // nothing to see here :)
 
diff --git a/src/PluginAlgorithm.cpp b/src/PluginAlgorithm.cpp
index 8ba6631..c26ee3f 100644
--- a/src/PluginAlgorithm.cpp
+++ b/src/PluginAlgorithm.cpp
@@ -67,8 +67,37 @@ void CPluginAlgorithm::run(int _iNrIterations){
     Py_DECREF(retVal);
 }
 
+void fixLapackLoading(){
+    // When running in Matlab, we need to force numpy
+    // to use its internal lapack library instead of
+    // Matlab's MKL library to avoid errors. To do this,
+    // we set Python's dlopen flags to RTLD_NOW|RTLD_DEEPBIND
+    // and import 'numpy.linalg.lapack_lite' here. We reset
+    // Python's dlopen flags afterwards.
+    PyObject *sys = PyImport_ImportModule("sys");
+    if(sys!=NULL){
+        PyObject *curFlags = PyObject_CallMethod(sys,"getdlopenflags",NULL);
+        if(curFlags!=NULL){
+            PyObject *retVal = PyObject_CallMethod(sys, "setdlopenflags", "i",10);
+            if(retVal!=NULL){
+                PyObject *lapack = PyImport_ImportModule("numpy.linalg.lapack_lite");
+                if(lapack!=NULL){
+                    Py_DECREF(lapack);
+                }
+                PyObject_CallMethod(sys, "setdlopenflags", "O",curFlags);
+                Py_DECREF(retVal);
+            }
+            Py_DECREF(curFlags);
+        }
+        Py_DECREF(sys);
+    }
+}
+
 CPluginAlgorithmFactory::CPluginAlgorithmFactory(){
     Py_Initialize();
+#ifndef _MSC_VER
+    if(astra::running_in_matlab) fixLapackLoading();
+#endif
     pluginDict = PyDict_New();
     inspect = PyImport_ImportModule("inspect");
     six = PyImport_ImportModule("six");
-- 
cgit v1.2.3


From ef9eb1dc7eb494e87f728af7caff8e5291cf320c Mon Sep 17 00:00:00 2001
From: "Daniel M. Pelt" <D.M.Pelt@cwi.nl>
Date: Mon, 20 Jul 2015 10:34:55 +0200
Subject: Also log Python errors when importing and creating Python plugins

---
 src/PluginAlgorithm.cpp | 48 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 48 insertions(+)

(limited to 'src')

diff --git a/src/PluginAlgorithm.cpp b/src/PluginAlgorithm.cpp
index c26ee3f..a118f54 100644
--- a/src/PluginAlgorithm.cpp
+++ b/src/PluginAlgorithm.cpp
@@ -29,6 +29,7 @@ $Id$
 #ifdef ASTRA_PYTHON
 
 #include "astra/PluginAlgorithm.h"
+#include "astra/Logging.h"
 #include <boost/algorithm/string.hpp>
 #include <boost/algorithm/string/split.hpp>
 #include <boost/lexical_cast.hpp>
@@ -38,8 +39,53 @@ $Id$
 
 namespace astra {
 
+
+void logPythonError(){
+    if(PyErr_Occurred()){
+        PyObject *ptype, *pvalue, *ptraceback;
+        PyErr_Fetch(&ptype, &pvalue, &ptraceback);
+        PyObject *traceback = PyImport_ImportModule("traceback");
+        if(traceback!=NULL){
+            PyObject *exc;
+            if(ptraceback==NULL){
+                exc = PyObject_CallMethod(traceback,"format_exception_only","OO",ptype, pvalue);
+            }else{
+                exc = PyObject_CallMethod(traceback,"format_exception","OOO",ptype, pvalue, ptraceback);
+            }
+            if(exc!=NULL){
+                PyObject *six = PyImport_ImportModule("six");
+                if(six!=NULL){
+                    PyObject *iter = PyObject_GetIter(exc);
+                    if(iter!=NULL){
+                        PyObject *line;
+                        std::string errStr = "";
+                        while(line = PyIter_Next(iter)){
+                            PyObject *retb = PyObject_CallMethod(six,"b","O",line);
+                            if(retb!=NULL){
+                                errStr += std::string(PyBytes_AsString(retb));
+                                Py_DECREF(retb);
+                            }
+                            Py_DECREF(line);
+                        }
+                        ASTRA_ERROR("%s",errStr.c_str());
+                        Py_DECREF(iter);
+                    }
+                    Py_DECREF(six);
+                }
+                Py_DECREF(exc);
+            }
+            Py_DECREF(traceback);
+        }
+        if(ptype!=NULL) Py_DECREF(ptype);
+        if(pvalue!=NULL) Py_DECREF(pvalue);
+        if(ptraceback!=NULL) Py_DECREF(ptraceback);
+    }
+}
+
+
 CPluginAlgorithm::CPluginAlgorithm(PyObject* pyclass){
     instance = PyObject_CallObject(pyclass, NULL);
+    if(instance==NULL) logPythonError();
 }
 
 CPluginAlgorithm::~CPluginAlgorithm(){
@@ -148,6 +194,8 @@ CPluginAlgorithm * CPluginAlgorithmFactory::getPlugin(std::string name){
         if(pyclass!=NULL){
             alg = new CPluginAlgorithm(pyclass);
             Py_DECREF(pyclass);
+        }else{
+            logPythonError();
         }
     }else{
         alg = new CPluginAlgorithm(className);
-- 
cgit v1.2.3


From 37abc22cf8d26fa3f7e282a1ee50a2a129d5a295 Mon Sep 17 00:00:00 2001
From: "Daniel M. Pelt" <D.M.Pelt@cwi.nl>
Date: Mon, 20 Jul 2015 11:26:39 +0200
Subject: Always log Python errors when importing/creating plugins

---
 src/PluginAlgorithm.cpp | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

(limited to 'src')

diff --git a/src/PluginAlgorithm.cpp b/src/PluginAlgorithm.cpp
index a118f54..d6cf731 100644
--- a/src/PluginAlgorithm.cpp
+++ b/src/PluginAlgorithm.cpp
@@ -173,13 +173,19 @@ PyObject * getClassFromString(std::string str){
     std::vector<std::string> items;
     boost::split(items, str, boost::is_any_of("."));
     PyObject *pyclass = PyImport_ImportModule(items[0].c_str());
-    if(pyclass==NULL) return NULL;
+    if(pyclass==NULL){
+        logPythonError();
+        return NULL;
+    }
     PyObject *submod = pyclass;
     for(unsigned int i=1;i<items.size();i++){
         submod = PyObject_GetAttrString(submod,items[i].c_str());
         Py_DECREF(pyclass);
         pyclass = submod;
-        if(pyclass==NULL) return NULL;
+        if(pyclass==NULL){
+            logPythonError();
+            return NULL;
+        }
     }
     return pyclass;
 }
@@ -194,8 +200,6 @@ CPluginAlgorithm * CPluginAlgorithmFactory::getPlugin(std::string name){
         if(pyclass!=NULL){
             alg = new CPluginAlgorithm(pyclass);
             Py_DECREF(pyclass);
-        }else{
-            logPythonError();
         }
     }else{
         alg = new CPluginAlgorithm(className);
-- 
cgit v1.2.3


From d91b51f6d58003de84a9d6dd8189fceba0e81a5a Mon Sep 17 00:00:00 2001
From: "Daniel M. Pelt" <D.M.Pelt@cwi.nl>
Date: Mon, 20 Jul 2015 14:07:21 +0200
Subject: Allow registering plugins without explicit name, and fix exception
 handling when running in Matlab

---
 src/PluginAlgorithm.cpp | 95 +++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 76 insertions(+), 19 deletions(-)

(limited to 'src')

diff --git a/src/PluginAlgorithm.cpp b/src/PluginAlgorithm.cpp
index d6cf731..7f7ff61 100644
--- a/src/PluginAlgorithm.cpp
+++ b/src/PluginAlgorithm.cpp
@@ -100,7 +100,10 @@ bool CPluginAlgorithm::initialize(const Config& _cfg){
     PyObject *cfgDict = XMLNode2dict(_cfg.self);
     PyObject *retVal = PyObject_CallMethod(instance, "astra_init", "O",cfgDict);
     Py_DECREF(cfgDict);
-    if(retVal==NULL) return false;
+    if(retVal==NULL){
+        logPythonError();
+        return false;
+    }
     m_bIsInitialized = true;
     Py_DECREF(retVal);
     return m_bIsInitialized;
@@ -108,8 +111,11 @@ bool CPluginAlgorithm::initialize(const Config& _cfg){
 
 void CPluginAlgorithm::run(int _iNrIterations){
     if(instance==NULL) return;
-    PyObject *retVal = PyObject_CallMethod(instance, "astra_run", "i",_iNrIterations);
-    if(retVal==NULL) return;
+    PyObject *retVal = PyObject_CallMethod(instance, "run", "i",_iNrIterations);
+    if(retVal==NULL){
+        logPythonError();
+        return;
+    }
     Py_DECREF(retVal);
 }
 
@@ -157,18 +163,6 @@ CPluginAlgorithmFactory::~CPluginAlgorithmFactory(){
     if(six!=NULL) Py_DECREF(six);
 }
 
-bool CPluginAlgorithmFactory::registerPlugin(std::string name, std::string className){
-    PyObject *str = PyBytes_FromString(className.c_str());
-    PyDict_SetItemString(pluginDict, name.c_str(), str);
-    Py_DECREF(str);
-    return true;
-}
-
-bool CPluginAlgorithmFactory::registerPluginClass(std::string name, PyObject * className){
-    PyDict_SetItemString(pluginDict, name.c_str(), className);
-    return true;
-}
-
 PyObject * getClassFromString(std::string str){
     std::vector<std::string> items;
     boost::split(items, str, boost::is_any_of("."));
@@ -190,6 +184,43 @@ PyObject * getClassFromString(std::string str){
     return pyclass;
 }
 
+bool CPluginAlgorithmFactory::registerPlugin(std::string name, std::string className){
+    PyObject *str = PyBytes_FromString(className.c_str());
+    PyDict_SetItemString(pluginDict, name.c_str(), str);
+    Py_DECREF(str);
+    return true;
+}
+
+bool CPluginAlgorithmFactory::registerPlugin(std::string className){
+    PyObject *pyclass = getClassFromString(className);
+    if(pyclass==NULL) return false;
+    bool ret = registerPluginClass(pyclass);
+    Py_DECREF(pyclass);
+    return ret;
+}
+
+bool CPluginAlgorithmFactory::registerPluginClass(std::string name, PyObject * className){
+    PyDict_SetItemString(pluginDict, name.c_str(), className);
+    return true;
+}
+
+bool CPluginAlgorithmFactory::registerPluginClass(PyObject * className){
+    PyObject *astra_name = PyObject_GetAttrString(className,"astra_name");
+    if(astra_name==NULL){
+        logPythonError();
+        return false;
+    }
+    PyObject *retb = PyObject_CallMethod(six,"b","O",astra_name);
+    if(retb!=NULL){
+        PyDict_SetItemString(pluginDict,PyBytes_AsString(retb),className);
+        Py_DECREF(retb);
+    }else{
+        logPythonError();
+    }
+    Py_DECREF(astra_name);
+    return true;
+}
+
 CPluginAlgorithm * CPluginAlgorithmFactory::getPlugin(std::string name){
     PyObject *className = PyDict_GetItemString(pluginDict, name.c_str());
     if(className==NULL) return NULL;
@@ -212,12 +243,34 @@ PyObject * CPluginAlgorithmFactory::getRegistered(){
     return pluginDict;
 }
 
+std::map<std::string, std::string> CPluginAlgorithmFactory::getRegisteredMap(){
+    std::map<std::string, std::string> ret;
+    PyObject *key, *value;
+    Py_ssize_t pos = 0;
+    while (PyDict_Next(pluginDict, &pos, &key, &value)) {
+        PyObject * keyb = PyObject_Bytes(key);
+        PyObject * valb = PyObject_Bytes(value);
+        ret[PyBytes_AsString(keyb)] = PyBytes_AsString(valb);
+        Py_DECREF(keyb);
+        Py_DECREF(valb);
+    }
+    return ret;
+}
+
 std::string CPluginAlgorithmFactory::getHelp(std::string name){
     PyObject *className = PyDict_GetItemString(pluginDict, name.c_str());
-    if(className==NULL) return "";
-    std::string str = std::string(PyBytes_AsString(className));
+    if(className==NULL){
+        ASTRA_ERROR("Plugin %s not found!",name.c_str());
+        return "";
+    }
     std::string ret = "";
-    PyObject *pyclass = getClassFromString(str);
+    PyObject *pyclass;
+    if(PyBytes_Check(className)){
+        std::string str = std::string(PyBytes_AsString(className));
+        pyclass = getClassFromString(str);
+    }else{
+        pyclass = className;
+    }
     if(pyclass==NULL) return "";
     if(inspect!=NULL && six!=NULL){
         PyObject *retVal = PyObject_CallMethod(inspect,"getdoc","O",pyclass);
@@ -228,9 +281,13 @@ std::string CPluginAlgorithmFactory::getHelp(std::string name){
                 ret = std::string(PyBytes_AsString(retb));
                 Py_DECREF(retb);
             }
+        }else{
+            logPythonError();
         }
     }
-    Py_DECREF(pyclass);
+    if(PyBytes_Check(className)){
+        Py_DECREF(pyclass);
+    }
     return ret;
 }
 
-- 
cgit v1.2.3


From 3808967cfaa6beb9d93d2035ebc72fa010fdab11 Mon Sep 17 00:00:00 2001
From: "Daniel M. Pelt" <D.M.Pelt@cwi.nl>
Date: Mon, 20 Jul 2015 16:41:55 +0200
Subject: Normalize Python exceptions (needed for some)

---
 src/PluginAlgorithm.cpp | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src')

diff --git a/src/PluginAlgorithm.cpp b/src/PluginAlgorithm.cpp
index 7f7ff61..56c4e4d 100644
--- a/src/PluginAlgorithm.cpp
+++ b/src/PluginAlgorithm.cpp
@@ -44,6 +44,7 @@ void logPythonError(){
     if(PyErr_Occurred()){
         PyObject *ptype, *pvalue, *ptraceback;
         PyErr_Fetch(&ptype, &pvalue, &ptraceback);
+        PyErr_NormalizeException(&ptype, &pvalue, &ptraceback);
         PyObject *traceback = PyImport_ImportModule("traceback");
         if(traceback!=NULL){
             PyObject *exc;
-- 
cgit v1.2.3


From dc3bed557603d4735ddc20961c28e5e868fc315c Mon Sep 17 00:00:00 2001
From: "Daniel M. Pelt" <D.M.Pelt@cwi.nl>
Date: Tue, 21 Jul 2015 11:44:29 +0200
Subject: Clear Python error when plugin is not find in getHelp

---
 src/PluginAlgorithm.cpp | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src')

diff --git a/src/PluginAlgorithm.cpp b/src/PluginAlgorithm.cpp
index 56c4e4d..5c779fd 100644
--- a/src/PluginAlgorithm.cpp
+++ b/src/PluginAlgorithm.cpp
@@ -262,6 +262,7 @@ std::string CPluginAlgorithmFactory::getHelp(std::string name){
     PyObject *className = PyDict_GetItemString(pluginDict, name.c_str());
     if(className==NULL){
         ASTRA_ERROR("Plugin %s not found!",name.c_str());
+        PyErr_Clear();
         return "";
     }
     std::string ret = "";
-- 
cgit v1.2.3


From 645122f4b365ce44849afda2ed8a711ae649ed76 Mon Sep 17 00:00:00 2001
From: "Daniel M. Pelt" <D.M.Pelt@cwi.nl>
Date: Tue, 21 Jul 2015 13:56:18 +0200
Subject: Fix 'get_registered' in Matlab with Python 3

---
 src/PluginAlgorithm.cpp | 23 ++++++++++++++++++-----
 1 file changed, 18 insertions(+), 5 deletions(-)

(limited to 'src')

diff --git a/src/PluginAlgorithm.cpp b/src/PluginAlgorithm.cpp
index 5c779fd..5d6d733 100644
--- a/src/PluginAlgorithm.cpp
+++ b/src/PluginAlgorithm.cpp
@@ -249,11 +249,24 @@ std::map<std::string, std::string> CPluginAlgorithmFactory::getRegisteredMap(){
     PyObject *key, *value;
     Py_ssize_t pos = 0;
     while (PyDict_Next(pluginDict, &pos, &key, &value)) {
-        PyObject * keyb = PyObject_Bytes(key);
-        PyObject * valb = PyObject_Bytes(value);
-        ret[PyBytes_AsString(keyb)] = PyBytes_AsString(valb);
-        Py_DECREF(keyb);
-        Py_DECREF(valb);
+        PyObject *keystr = PyObject_Str(key);
+        if(keystr!=NULL){
+            PyObject *valstr = PyObject_Str(value);
+            if(valstr!=NULL){
+                PyObject * keyb = PyObject_CallMethod(six,"b","O",keystr);
+                if(keyb!=NULL){
+                    PyObject * valb = PyObject_CallMethod(six,"b","O",valstr);
+                    if(valb!=NULL){
+                        ret[PyBytes_AsString(keyb)] = PyBytes_AsString(valb);
+                        Py_DECREF(valb);
+                    }
+                    Py_DECREF(keyb);
+                }
+                Py_DECREF(valstr);
+            }
+            Py_DECREF(keystr);
+        }
+        logPythonError();
     }
     return ret;
 }
-- 
cgit v1.2.3


From ab980d9f088c0f4e28d61b94c32788c30a9c4cb9 Mon Sep 17 00:00:00 2001
From: "Daniel M. Pelt" <D.M.Pelt@cwi.nl>
Date: Wed, 5 Aug 2015 16:26:01 +0200
Subject: Fix get_help for classes without docstring

---
 src/PluginAlgorithm.cpp | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

(limited to 'src')

diff --git a/src/PluginAlgorithm.cpp b/src/PluginAlgorithm.cpp
index 5d6d733..4066e30 100644
--- a/src/PluginAlgorithm.cpp
+++ b/src/PluginAlgorithm.cpp
@@ -290,12 +290,14 @@ std::string CPluginAlgorithmFactory::getHelp(std::string name){
     if(inspect!=NULL && six!=NULL){
         PyObject *retVal = PyObject_CallMethod(inspect,"getdoc","O",pyclass);
         if(retVal!=NULL){
-            PyObject *retb = PyObject_CallMethod(six,"b","O",retVal);
-            Py_DECREF(retVal);
-            if(retb!=NULL){
-                ret = std::string(PyBytes_AsString(retb));
-                Py_DECREF(retb);
+            if(retVal!=Py_None){
+                PyObject *retb = PyObject_CallMethod(six,"b","O",retVal);
+                if(retb!=NULL){
+                    ret = std::string(PyBytes_AsString(retb));
+                    Py_DECREF(retb);
+                }
             }
+            Py_DECREF(retVal);
         }else{
             logPythonError();
         }
-- 
cgit v1.2.3


From 0d5947a0e8e7d6f86c7591a96d877dfe14b187e4 Mon Sep 17 00:00:00 2001
From: "Daniel M. Pelt" <D.M.Pelt@cwi.nl>
Date: Mon, 10 Aug 2015 17:08:34 +0200
Subject: Ensure we have acquired the GIL before calling Python plugin 'run'
 method

---
 src/PluginAlgorithm.cpp | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

(limited to 'src')

diff --git a/src/PluginAlgorithm.cpp b/src/PluginAlgorithm.cpp
index 4066e30..e79c77b 100644
--- a/src/PluginAlgorithm.cpp
+++ b/src/PluginAlgorithm.cpp
@@ -112,12 +112,14 @@ bool CPluginAlgorithm::initialize(const Config& _cfg){
 
 void CPluginAlgorithm::run(int _iNrIterations){
     if(instance==NULL) return;
+    PyGILState_STATE state = PyGILState_Ensure();
     PyObject *retVal = PyObject_CallMethod(instance, "run", "i",_iNrIterations);
     if(retVal==NULL){
         logPythonError();
-        return;
+    }else{
+        Py_DECREF(retVal);
     }
-    Py_DECREF(retVal);
+    PyGILState_Release(state);
 }
 
 void fixLapackLoading(){
@@ -147,7 +149,10 @@ void fixLapackLoading(){
 }
 
 CPluginAlgorithmFactory::CPluginAlgorithmFactory(){
-    Py_Initialize();
+    if(!Py_IsInitialized()){
+        Py_Initialize();
+        PyEval_InitThreads();
+    }
 #ifndef _MSC_VER
     if(astra::running_in_matlab) fixLapackLoading();
 #endif
-- 
cgit v1.2.3


From c1713c00c4aeae594913667d868106e8591dd1d1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20H=C3=A4ggstr=C3=B6m?=
 <christian.haggstrom@orexplore.com>
Date: Fri, 31 Oct 2014 14:00:38 +0100
Subject: Silence bogus warning:

Warning: CudaSirtAlgorithm3D: unused configuration options: SinogramMaskId
---
 src/ReconstructionAlgorithm3D.cpp | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src')

diff --git a/src/ReconstructionAlgorithm3D.cpp b/src/ReconstructionAlgorithm3D.cpp
index f975ace..13d4b07 100644
--- a/src/ReconstructionAlgorithm3D.cpp
+++ b/src/ReconstructionAlgorithm3D.cpp
@@ -147,6 +147,7 @@ bool CReconstructionAlgorithm3D::initialize(const Config& _cfg)
 		id = boost::lexical_cast<int>(_cfg.self.getOption("SinogramMaskId"));
 		m_pSinogramMask = dynamic_cast<CFloat32ProjectionData3D*>(CData3DManager::getSingleton().get(id));
 	}
+	CC.markOptionParsed("SinogramMaskId");
 
 	// Constraints - NEW
 	if (_cfg.self.hasOption("MinConstraint")) {
-- 
cgit v1.2.3


From 43a38c117405f99e3a1b498f899de4ba6d01a044 Mon Sep 17 00:00:00 2001
From: Willem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>
Date: Wed, 7 Oct 2015 18:14:39 +0200
Subject: Improve option passing through CudaProjector3D

Not all constructors were reading options from the projector.
Also allow passing GPUIndex via CudaProjector3D.

Thanks to Nicola Vigano for part of the patch.
---
 src/CudaBackProjectionAlgorithm3D.cpp    | 43 ++++++++++++++++++++--------
 src/CudaCglsAlgorithm3D.cpp              | 47 +++++++++++++++++++++----------
 src/CudaFDKAlgorithm3D.cpp               | 40 ++++++++++++++++++--------
 src/CudaForwardProjectionAlgorithm3D.cpp | 46 ++++++++++++++++++++----------
 src/CudaProjector3D.cpp                  |  7 +++++
 src/CudaSirtAlgorithm3D.cpp              | 48 +++++++++++++++++++++-----------
 6 files changed, 161 insertions(+), 70 deletions(-)

(limited to 'src')

diff --git a/src/CudaBackProjectionAlgorithm3D.cpp b/src/CudaBackProjectionAlgorithm3D.cpp
index e8e0433..c9d9447 100644
--- a/src/CudaBackProjectionAlgorithm3D.cpp
+++ b/src/CudaBackProjectionAlgorithm3D.cpp
@@ -38,6 +38,8 @@ $Id$
 #include "astra/ParallelVecProjectionGeometry3D.h"
 #include "astra/ConeVecProjectionGeometry3D.h"
 
+#include "astra/Logging.h"
+
 #include "../cuda/3d/astra3d.h"
 
 using namespace std;
@@ -86,6 +88,24 @@ bool CCudaBackProjectionAlgorithm3D::_check()
 	return true;
 }
 
+//---------------------------------------------------------------------------------------
+void CCudaBackProjectionAlgorithm3D::initializeFromProjector()
+{
+	m_iVoxelSuperSampling = 1;
+	m_iGPUIndex = -1;
+
+	CCudaProjector3D* pCudaProjector = dynamic_cast<CCudaProjector3D*>(m_pProjector);
+	if (!pCudaProjector) {
+		if (m_pProjector) {
+			ASTRA_WARN("non-CUDA Projector3D passed to BP3D_CUDA");
+		}
+	} else {
+		m_iVoxelSuperSampling = pCudaProjector->getVoxelSuperSampling();
+		m_iGPUIndex = pCudaProjector->getGPUIndex();
+	}
+
+}
+
 //---------------------------------------------------------------------------------------
 // Initialize - Config
 bool CCudaBackProjectionAlgorithm3D::initialize(const Config& _cfg)
@@ -103,21 +123,18 @@ bool CCudaBackProjectionAlgorithm3D::initialize(const Config& _cfg)
 		return false;
 	}
 
-	CCudaProjector3D* pCudaProjector = 0;
-	pCudaProjector = dynamic_cast<CCudaProjector3D*>(m_pProjector);
-	if (!pCudaProjector) {
-		// TODO: Report
-	}
-
-	m_iGPUIndex = (int)_cfg.self.getOptionNumerical("GPUindex", -1);
-	CC.markOptionParsed("GPUindex");
-
+	initializeFromProjector();
 
-	m_iVoxelSuperSampling = 1;
-	if (pCudaProjector)
-		m_iVoxelSuperSampling = pCudaProjector->getVoxelSuperSampling();
+	// Deprecated options
 	m_iVoxelSuperSampling = (int)_cfg.self.getOptionNumerical("VoxelSuperSampling", m_iVoxelSuperSampling);
+	m_iGPUIndex = (int)_cfg.self.getOptionNumerical("GPUindex", m_iGPUIndex);
+	m_iGPUIndex = (int)_cfg.self.getOptionNumerical("GPUIndex", m_iGPUIndex);
 	CC.markOptionParsed("VoxelSuperSampling");
+	CC.markOptionParsed("GPUIndex");
+	if (!_cfg.self.hasOption("GPUIndex"))
+		CC.markOptionParsed("GPUindex");
+
+
 
 	CFloat32ProjectionData3DMemory* pSinoMem = dynamic_cast<CFloat32ProjectionData3DMemory*>(m_pSinogram);
 	ASTRA_ASSERT(pSinoMem);
@@ -151,6 +168,8 @@ bool CCudaBackProjectionAlgorithm3D::initialize(CProjector3D* _pProjector,
 	m_pSinogram = _pSinogram;
 	m_pReconstruction = _pReconstruction;
 
+	initializeFromProjector();
+
 	// success
 	m_bIsInitialized = _check();
 	return m_bIsInitialized;
diff --git a/src/CudaCglsAlgorithm3D.cpp b/src/CudaCglsAlgorithm3D.cpp
index f527dc5..1cccb6a 100644
--- a/src/CudaCglsAlgorithm3D.cpp
+++ b/src/CudaCglsAlgorithm3D.cpp
@@ -37,6 +37,8 @@ $Id$
 #include "astra/ParallelVecProjectionGeometry3D.h"
 #include "astra/ConeVecProjectionGeometry3D.h"
 
+#include "astra/Logging.h"
+
 #include "../cuda/3d/astra3d.h"
 
 using namespace std;
@@ -89,6 +91,26 @@ bool CCudaCglsAlgorithm3D::_check()
 	return true;
 }
 
+//---------------------------------------------------------------------------------------
+void CCudaCglsAlgorithm3D::initializeFromProjector()
+{
+	m_iVoxelSuperSampling = 1;
+	m_iDetectorSuperSampling = 1;
+	m_iGPUIndex = -1;
+
+	CCudaProjector3D* pCudaProjector = dynamic_cast<CCudaProjector3D*>(m_pProjector);
+	if (!pCudaProjector) {
+		if (m_pProjector) {
+			ASTRA_WARN("non-CUDA Projector3D passed to CGLS3D_CUDA");
+		}
+	} else {
+		m_iVoxelSuperSampling = pCudaProjector->getVoxelSuperSampling();
+		m_iDetectorSuperSampling = pCudaProjector->getDetectorSuperSampling();
+		m_iGPUIndex = pCudaProjector->getGPUIndex();
+	}
+
+}
+
 //---------------------------------------------------------------------------------------
 // Initialize - Config
 bool CCudaCglsAlgorithm3D::initialize(const Config& _cfg)
@@ -107,27 +129,20 @@ bool CCudaCglsAlgorithm3D::initialize(const Config& _cfg)
 		return false;
 	}
 
-	CCudaProjector3D* pCudaProjector = 0;
-	pCudaProjector = dynamic_cast<CCudaProjector3D*>(m_pProjector);
-	if (!pCudaProjector) {
-		// TODO: Report
-	}
+	initializeFromProjector();
 
-	m_iGPUIndex = (int)_cfg.self.getOptionNumerical("GPUindex", -1);
-	CC.markOptionParsed("GPUindex");
-
-	m_iVoxelSuperSampling = 1;
-	m_iDetectorSuperSampling = 1;
-	if (pCudaProjector) {
-		// New interface
-		m_iVoxelSuperSampling = pCudaProjector->getVoxelSuperSampling();
-		m_iDetectorSuperSampling = pCudaProjector->getDetectorSuperSampling();
-	}
 	// Deprecated options
 	m_iVoxelSuperSampling = (int)_cfg.self.getOptionNumerical("VoxelSuperSampling", m_iVoxelSuperSampling);
 	m_iDetectorSuperSampling = (int)_cfg.self.getOptionNumerical("DetectorSuperSampling", m_iDetectorSuperSampling);
+	m_iGPUIndex = (int)_cfg.self.getOptionNumerical("GPUindex", m_iGPUIndex);
+	m_iGPUIndex = (int)_cfg.self.getOptionNumerical("GPUIndex", m_iGPUIndex);
 	CC.markOptionParsed("VoxelSuperSampling");
 	CC.markOptionParsed("DetectorSuperSampling");
+	CC.markOptionParsed("GPUIndex");
+	if (!_cfg.self.hasOption("GPUIndex"))
+		CC.markOptionParsed("GPUindex");
+
+
 
 	m_pCgls = new AstraCGLS3d();
 
@@ -155,6 +170,8 @@ bool CCudaCglsAlgorithm3D::initialize(CProjector3D* _pProjector,
 	m_pSinogram = _pSinogram;
 	m_pReconstruction = _pReconstruction;
 
+	initializeFromProjector();
+
 	m_pCgls = new AstraCGLS3d;
 
 	m_bAstraCGLSInit = false;
diff --git a/src/CudaFDKAlgorithm3D.cpp b/src/CudaFDKAlgorithm3D.cpp
index 667d926..625d02a 100644
--- a/src/CudaFDKAlgorithm3D.cpp
+++ b/src/CudaFDKAlgorithm3D.cpp
@@ -35,6 +35,8 @@ $Id$
 #include "astra/CudaProjector3D.h"
 #include "astra/ConeProjectionGeometry3D.h"
 
+#include "astra/Logging.h"
+
 #include "../cuda/3d/astra3d.h"
 
 using namespace std;
@@ -84,6 +86,24 @@ bool CCudaFDKAlgorithm3D::_check()
 	return true;
 }
 
+//---------------------------------------------------------------------------------------
+void CCudaFDKAlgorithm3D::initializeFromProjector()
+{
+	m_iVoxelSuperSampling = 1;
+	m_iGPUIndex = -1;
+
+	CCudaProjector3D* pCudaProjector = dynamic_cast<CCudaProjector3D*>(m_pProjector);
+	if (!pCudaProjector) {
+		if (m_pProjector) {
+			ASTRA_WARN("non-CUDA Projector3D passed to FDK_CUDA");
+		}
+	} else {
+		m_iVoxelSuperSampling = pCudaProjector->getVoxelSuperSampling();
+		m_iGPUIndex = pCudaProjector->getGPUIndex();
+	}
+
+}
+
 //---------------------------------------------------------------------------------------
 // Initialize - Config
 bool CCudaFDKAlgorithm3D::initialize(const Config& _cfg)
@@ -101,20 +121,18 @@ bool CCudaFDKAlgorithm3D::initialize(const Config& _cfg)
 		return false;
 	}
 
-	CCudaProjector3D* pCudaProjector = 0;
-	pCudaProjector = dynamic_cast<CCudaProjector3D*>(m_pProjector);
-	if (!pCudaProjector) {
-		// TODO: Report
-	}
-
-	m_iGPUIndex = (int)_cfg.self.getOptionNumerical("GPUindex", -1);
-	CC.markOptionParsed("GPUindex");
+	initializeFromProjector();
 
-	m_iVoxelSuperSampling = 1;
-	if (pCudaProjector)
-		m_iVoxelSuperSampling = pCudaProjector->getVoxelSuperSampling();
+	// Deprecated options
 	m_iVoxelSuperSampling = (int)_cfg.self.getOptionNumerical("VoxelSuperSampling", m_iVoxelSuperSampling);
+	m_iGPUIndex = (int)_cfg.self.getOptionNumerical("GPUindex", m_iGPUIndex);
+	m_iGPUIndex = (int)_cfg.self.getOptionNumerical("GPUIndex", m_iGPUIndex);
 	CC.markOptionParsed("VoxelSuperSampling");
+	CC.markOptionParsed("GPUIndex");
+	if (!_cfg.self.hasOption("GPUIndex"))
+		CC.markOptionParsed("GPUindex");
+
+
 
 	m_bShortScan = _cfg.self.getOptionBool("ShortScan", false);
 	CC.markOptionParsed("ShortScan");
diff --git a/src/CudaForwardProjectionAlgorithm3D.cpp b/src/CudaForwardProjectionAlgorithm3D.cpp
index 46dab12..6498885 100644
--- a/src/CudaForwardProjectionAlgorithm3D.cpp
+++ b/src/CudaForwardProjectionAlgorithm3D.cpp
@@ -71,6 +71,23 @@ CCudaForwardProjectionAlgorithm3D::~CCudaForwardProjectionAlgorithm3D()
 
 }
 
+//---------------------------------------------------------------------------------------
+void CCudaForwardProjectionAlgorithm3D::initializeFromProjector()
+{
+	m_iDetectorSuperSampling = 1;
+	m_iGPUIndex = -1;
+
+	CCudaProjector3D* pCudaProjector = dynamic_cast<CCudaProjector3D*>(m_pProjector);
+	if (!pCudaProjector) {
+		if (m_pProjector) {
+			ASTRA_WARN("non-CUDA Projector3D passed to FP3D_CUDA");
+		}
+	} else {
+		m_iDetectorSuperSampling = pCudaProjector->getDetectorSuperSampling();
+		m_iGPUIndex = pCudaProjector->getGPUIndex();
+	}
+}
+
 //---------------------------------------------------------------------------------------
 // Initialize - Config
 bool CCudaForwardProjectionAlgorithm3D::initialize(const Config& _cfg)
@@ -97,29 +114,21 @@ bool CCudaForwardProjectionAlgorithm3D::initialize(const Config& _cfg)
 
 	// optional: projector
 	node = _cfg.self.getSingleNode("ProjectorId");
-	CCudaProjector3D* pCudaProjector = 0;
 	m_pProjector = 0;
 	if (node) {
 		id = boost::lexical_cast<int>(node.getContent());
 		m_pProjector = CProjector3DManager::getSingleton().get(id);
-		pCudaProjector = dynamic_cast<CCudaProjector3D*>(CProjector3DManager::getSingleton().get(id));
-		m_pProjector = pCudaProjector;
-		if (!pCudaProjector) {
-			// TODO: Report
-		}
 	}
 	CC.markNodeParsed("ProjectorId");
 
-	// GPU number
-	m_iGPUIndex = (int)_cfg.self.getOptionNumerical("GPUindex", -1);
-	CC.markOptionParsed("GPUindex");
-
+	initializeFromProjector();
 
-	m_iDetectorSuperSampling = 1;
-	if (pCudaProjector)
-		m_iDetectorSuperSampling = pCudaProjector->getDetectorSuperSampling();
+	// Deprecated options
 	m_iDetectorSuperSampling = (int)_cfg.self.getOptionNumerical("DetectorSuperSampling", m_iDetectorSuperSampling);
+	m_iGPUIndex = (int)_cfg.self.getOptionNumerical("GPUindex", m_iGPUIndex);
 	CC.markOptionParsed("DetectorSuperSampling");
+	CC.markOptionParsed("GPUindex");
+
 
 	// success
 	m_bIsInitialized = check();
@@ -142,8 +151,15 @@ bool CCudaForwardProjectionAlgorithm3D::initialize(CProjector3D* _pProjector,
 	m_pProjections = _pProjections;
 	m_pVolume = _pVolume;
 
-	m_iDetectorSuperSampling = _iDetectorSuperSampling;
-	m_iGPUIndex = _iGPUindex;
+	CCudaProjector3D* pCudaProjector = dynamic_cast<CCudaProjector3D*>(m_pProjector);
+	if (!pCudaProjector) {
+		// TODO: Report
+		m_iDetectorSuperSampling = _iDetectorSuperSampling;
+		m_iGPUIndex = _iGPUindex;
+	} else {
+		m_iDetectorSuperSampling = pCudaProjector->getDetectorSuperSampling();
+		m_iGPUIndex = pCudaProjector->getGPUIndex();
+	}
 
 	// success
 	m_bIsInitialized = check();
diff --git a/src/CudaProjector3D.cpp b/src/CudaProjector3D.cpp
index d2fd74c..bbfbd34 100644
--- a/src/CudaProjector3D.cpp
+++ b/src/CudaProjector3D.cpp
@@ -64,6 +64,7 @@ void CCudaProjector3D::_clear()
 	m_projectionKernel = ker3d_default;
 	m_iVoxelSuperSampling = 1;
 	m_iDetectorSuperSampling = 1;
+	m_iGPUIndex = -1;
 }
 
 //----------------------------------------------------------------------------------------
@@ -128,6 +129,12 @@ bool CCudaProjector3D::initialize(const Config& _cfg)
 	m_iDetectorSuperSampling = (int)_cfg.self.getOptionNumerical("DetectorSuperSampling", 1);
 	CC.markOptionParsed("DetectorSuperSampling");
 
+	m_iGPUIndex = (int)_cfg.self.getOptionNumerical("GPUindex", -1);
+	m_iGPUIndex = (int)_cfg.self.getOptionNumerical("GPUIndex", m_iGPUIndex);
+	CC.markOptionParsed("GPUIndex");
+	if (!_cfg.self.hasOption("GPUIndex"))
+		CC.markOptionParsed("GPUindex");
+
 	m_bIsInitialized = _check();
 	return m_bIsInitialized;
 }
diff --git a/src/CudaSirtAlgorithm3D.cpp b/src/CudaSirtAlgorithm3D.cpp
index abbb9fd..67594f4 100644
--- a/src/CudaSirtAlgorithm3D.cpp
+++ b/src/CudaSirtAlgorithm3D.cpp
@@ -38,6 +38,8 @@ $Id$
 #include "astra/ConeVecProjectionGeometry3D.h"
 #include "astra/CudaProjector3D.h"
 
+#include "astra/Logging.h"
+
 #include "../cuda/3d/astra3d.h"
 
 using namespace std;
@@ -90,7 +92,27 @@ bool CCudaSirtAlgorithm3D::_check()
 	return true;
 }
 
-//---------------------------------------------------------------------------------------
+//----------------------------------------------------------------------------------------
+void CCudaSirtAlgorithm3D::initializeFromProjector()
+{
+	m_iVoxelSuperSampling = 1;
+	m_iDetectorSuperSampling = 1;
+	m_iGPUIndex = -1;
+
+	CCudaProjector3D* pCudaProjector = dynamic_cast<CCudaProjector3D*>(m_pProjector);
+	if (!pCudaProjector) {
+		if (m_pProjector) {
+			ASTRA_WARN("non-CUDA Projector3D passed to SIRT3D_CUDA");
+		}
+	} else {
+		m_iVoxelSuperSampling = pCudaProjector->getVoxelSuperSampling();
+		m_iDetectorSuperSampling = pCudaProjector->getDetectorSuperSampling();
+		m_iGPUIndex = pCudaProjector->getGPUIndex();
+	}
+
+}
+
+//--------------------------------------------------------------------------------------
 // Initialize - Config
 bool CCudaSirtAlgorithm3D::initialize(const Config& _cfg)
 {
@@ -108,28 +130,20 @@ bool CCudaSirtAlgorithm3D::initialize(const Config& _cfg)
 		return false;
 	}
 
-	CCudaProjector3D* pCudaProjector = 0;
-	pCudaProjector = dynamic_cast<CCudaProjector3D*>(m_pProjector);
-	if (!pCudaProjector) {
-		// TODO: Report
-	}
+	initializeFromProjector();
 
-	m_iGPUIndex = (int)_cfg.self.getOptionNumerical("GPUindex", -1);
-	CC.markOptionParsed("GPUindex");
-
-
-	m_iVoxelSuperSampling = 1;
-	m_iDetectorSuperSampling = 1;
-	if (pCudaProjector) {
-		// New interface
-		m_iVoxelSuperSampling = pCudaProjector->getVoxelSuperSampling();
-		m_iDetectorSuperSampling = pCudaProjector->getDetectorSuperSampling();
-	}
 	// Deprecated options
 	m_iVoxelSuperSampling = (int)_cfg.self.getOptionNumerical("VoxelSuperSampling", m_iVoxelSuperSampling);
 	m_iDetectorSuperSampling = (int)_cfg.self.getOptionNumerical("DetectorSuperSampling", m_iDetectorSuperSampling);
+	m_iGPUIndex = (int)_cfg.self.getOptionNumerical("GPUindex", m_iGPUIndex);
+	m_iGPUIndex = (int)_cfg.self.getOptionNumerical("GPUIndex", m_iGPUIndex);
 	CC.markOptionParsed("VoxelSuperSampling");
 	CC.markOptionParsed("DetectorSuperSampling");
+	CC.markOptionParsed("GPUIndex");
+	if (!_cfg.self.hasOption("GPUIndex"))
+		CC.markOptionParsed("GPUindex");
+
+
 
 	m_pSirt = new AstraSIRT3d();
 
-- 
cgit v1.2.3


From 003663649a191fc5bc011d6e5424496576b5e793 Mon Sep 17 00:00:00 2001
From: Willem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>
Date: Thu, 8 Oct 2015 11:24:49 +0200
Subject: Improve option passing through CudaProjector2D

Not all constructors were reading options from the projector.
Also allow passing GPUIndex via CudaProjector2D.

Also refactor CudaReconstructionAlgorithm::initialize/check
to avoid code duplication with ReconstructionAlgorithm.
---
 src/CudaBackProjectionAlgorithm.cpp         |   5 +-
 src/CudaCglsAlgorithm.cpp                   |   6 +-
 src/CudaEMAlgorithm.cpp                     |   6 +-
 src/CudaFilteredBackProjectionAlgorithm.cpp |  43 ++++---
 src/CudaForwardProjectionAlgorithm.cpp      |  60 +++++-----
 src/CudaProjector2D.cpp                     |  17 +--
 src/CudaReconstructionAlgorithm2D.cpp       | 169 ++++++----------------------
 src/CudaSartAlgorithm.cpp                   |   5 +-
 src/CudaSirtAlgorithm.cpp                   |   6 +-
 src/ReconstructionAlgorithm2D.cpp           |  25 ++--
 10 files changed, 134 insertions(+), 208 deletions(-)

(limited to 'src')

diff --git a/src/CudaBackProjectionAlgorithm.cpp b/src/CudaBackProjectionAlgorithm.cpp
index 365e058..a73f895 100644
--- a/src/CudaBackProjectionAlgorithm.cpp
+++ b/src/CudaBackProjectionAlgorithm.cpp
@@ -76,10 +76,9 @@ bool CCudaBackProjectionAlgorithm::initialize(const Config& _cfg)
 // Initialize - C++
 bool CCudaBackProjectionAlgorithm::initialize(CProjector2D* _pProjector,
                                      CFloat32ProjectionData2D* _pSinogram, 
-                                     CFloat32VolumeData2D* _pReconstruction,
-                                     int _iGPUindex, int _iPixelSuperSampling)
+                                     CFloat32VolumeData2D* _pReconstruction)
 {
-	m_bIsInitialized = CCudaReconstructionAlgorithm2D::initialize(_pProjector, _pSinogram, _pReconstruction, _iGPUindex, 1, _iPixelSuperSampling);
+	m_bIsInitialized = CCudaReconstructionAlgorithm2D::initialize(_pProjector, _pSinogram, _pReconstruction);
 
 	if (!m_bIsInitialized)
 		return false;
diff --git a/src/CudaCglsAlgorithm.cpp b/src/CudaCglsAlgorithm.cpp
index 0cedff6..9dd4f78 100644
--- a/src/CudaCglsAlgorithm.cpp
+++ b/src/CudaCglsAlgorithm.cpp
@@ -77,11 +77,9 @@ bool CCudaCglsAlgorithm::initialize(const Config& _cfg)
 // Initialize - C++
 bool CCudaCglsAlgorithm::initialize(CProjector2D* _pProjector,
                                     CFloat32ProjectionData2D* _pSinogram, 
-                                    CFloat32VolumeData2D* _pReconstruction,
-                                    int _iGPUindex, int _iDetectorSuperSampling,
-                                    int _iPixelSuperSampling)
+                                    CFloat32VolumeData2D* _pReconstruction)
 {
-	m_bIsInitialized = CCudaReconstructionAlgorithm2D::initialize(_pProjector, _pSinogram, _pReconstruction, _iGPUindex, _iDetectorSuperSampling, _iPixelSuperSampling);
+	m_bIsInitialized = CCudaReconstructionAlgorithm2D::initialize(_pProjector, _pSinogram, _pReconstruction);
 
 	if (!m_bIsInitialized)
 		return false;
diff --git a/src/CudaEMAlgorithm.cpp b/src/CudaEMAlgorithm.cpp
index 5c71f3d..d0afd80 100644
--- a/src/CudaEMAlgorithm.cpp
+++ b/src/CudaEMAlgorithm.cpp
@@ -76,11 +76,9 @@ bool CCudaEMAlgorithm::initialize(const Config& _cfg)
 // Initialize - C++
 bool CCudaEMAlgorithm::initialize(CProjector2D* _pProjector,
                                      CFloat32ProjectionData2D* _pSinogram, 
-                                     CFloat32VolumeData2D* _pReconstruction,
-                                     int _iGPUindex, int _iDetectorSuperSampling,
-                                     int _iPixelSuperSampling)
+                                     CFloat32VolumeData2D* _pReconstruction)
 {
-	m_bIsInitialized = CCudaReconstructionAlgorithm2D::initialize(_pProjector, _pSinogram, _pReconstruction, _iGPUindex, _iDetectorSuperSampling, _iPixelSuperSampling);
+	m_bIsInitialized = CCudaReconstructionAlgorithm2D::initialize(_pProjector, _pSinogram, _pReconstruction);
 
 	if (!m_bIsInitialized)
 		return false;
diff --git a/src/CudaFilteredBackProjectionAlgorithm.cpp b/src/CudaFilteredBackProjectionAlgorithm.cpp
index aac96d6..8c0659d 100644
--- a/src/CudaFilteredBackProjectionAlgorithm.cpp
+++ b/src/CudaFilteredBackProjectionAlgorithm.cpp
@@ -67,6 +67,24 @@ CCudaFilteredBackProjectionAlgorithm::~CCudaFilteredBackProjectionAlgorithm()
 	}
 }
 
+void CCudaFilteredBackProjectionAlgorithm::initializeFromProjector()
+{
+	m_iPixelSuperSampling = 1;
+	m_iGPUIndex = -1;
+
+	// Projector
+	CCudaProjector2D* pCudaProjector = dynamic_cast<CCudaProjector2D*>(m_pProjector);
+	if (!pCudaProjector) {
+		if (m_pProjector) {
+			ASTRA_WARN("non-CUDA Projector2D passed to FBP_CUDA");
+		}
+	} else {
+		m_iPixelSuperSampling = pCudaProjector->getVoxelSuperSampling();
+		m_iGPUIndex = pCudaProjector->getGPUIndex();
+	}
+
+}
+
 bool CCudaFilteredBackProjectionAlgorithm::initialize(const Config& _cfg)
 {
 	ASTRA_ASSERT(_cfg.self);
@@ -163,27 +181,24 @@ bool CCudaFilteredBackProjectionAlgorithm::initialize(const Config& _cfg)
 	}
 	CC.markNodeParsed("FilterD"); // TODO: Only for some types!
 
-	// GPU number
-	m_iGPUIndex = (int)_cfg.self.getOptionNumerical("GPUindex", -1);
-	CC.markOptionParsed("GPUindex");
-
-	m_iPixelSuperSampling = 1;
-	if (pCudaProjector) {
-		// New interface
-		m_iPixelSuperSampling = pCudaProjector->getVoxelSuperSampling();
-	}
-	// Deprecated options
-	m_iPixelSuperSampling = (int)_cfg.self.getOptionNumerical("PixelSuperSampling", m_iPixelSuperSampling);
-	CC.markOptionParsed("PixelSuperSampling");
-
-
 	// Fan beam short scan mode
 	if (m_pSinogram && dynamic_cast<CFanFlatProjectionGeometry2D*>(m_pSinogram->getGeometry())) {
 		m_bShortScan = (int)_cfg.self.getOptionBool("ShortScan", false);
 		CC.markOptionParsed("ShortScan");
 	}
 
+	initializeFromProjector();
 
+	// Deprecated options
+	m_iPixelSuperSampling = (int)_cfg.self.getOptionNumerical("PixelSuperSampling", m_iPixelSuperSampling);
+	CC.markOptionParsed("PixelSuperSampling");
+
+	// GPU number
+	m_iGPUIndex = (int)_cfg.self.getOptionNumerical("GPUindex", -1);
+	m_iGPUIndex = (int)_cfg.self.getOptionNumerical("GPUIndex", m_iGPUIndex);
+	CC.markOptionParsed("GPUIndex");
+	if (!_cfg.self.hasOption("GPUIndex"))
+		CC.markOptionParsed("GPUindex");
 
 
 	m_pFBP = new AstraFBP;
diff --git a/src/CudaForwardProjectionAlgorithm.cpp b/src/CudaForwardProjectionAlgorithm.cpp
index b382f2e..9ca13ae 100644
--- a/src/CudaForwardProjectionAlgorithm.cpp
+++ b/src/CudaForwardProjectionAlgorithm.cpp
@@ -38,8 +38,11 @@ $Id$
 #include <boost/lexical_cast.hpp>
 
 #include "astra/AstraObjectManager.h"
+#include "astra/ParallelProjectionGeometry2D.h"
 #include "astra/FanFlatProjectionGeometry2D.h"
 #include "astra/FanFlatVecProjectionGeometry2D.h"
+#include "astra/Float32ProjectionData2D.h"
+#include "astra/Float32VolumeData2D.h"
 #include "astra/CudaProjector2D.h"
 
 #include "astra/Logging.h"
@@ -65,6 +68,24 @@ CCudaForwardProjectionAlgorithm::~CCudaForwardProjectionAlgorithm()
 
 }
 
+//---------------------------------------------------------------------------------------
+void CCudaForwardProjectionAlgorithm::initializeFromProjector()
+{
+	m_iDetectorSuperSampling = 1;
+	m_iGPUIndex = -1;
+
+	// Projector
+	CCudaProjector2D* pCudaProjector = dynamic_cast<CCudaProjector2D*>(m_pProjector);
+	if (!pCudaProjector) {
+		if (m_pProjector) {
+			ASTRA_WARN("non-CUDA Projector2D passed to FP_CUDA");
+		}
+	} else {
+		m_iDetectorSuperSampling = pCudaProjector->getDetectorSuperSampling();
+		m_iGPUIndex = pCudaProjector->getGPUIndex();
+	}
+}
+
 //---------------------------------------------------------------------------------------
 // Initialize - Config
 bool CCudaForwardProjectionAlgorithm::initialize(const Config& _cfg)
@@ -74,14 +95,9 @@ bool CCudaForwardProjectionAlgorithm::initialize(const Config& _cfg)
 
 	// Projector
 	XMLNode node = _cfg.self.getSingleNode("ProjectorId");
-	CCudaProjector2D* pCudaProjector = 0;
 	if (node) {
 		int id = boost::lexical_cast<int>(node.getContent());
-		CProjector2D *projector = CProjector2DManager::getSingleton().get(id);
-		pCudaProjector = dynamic_cast<CCudaProjector2D*>(projector);
-		if (!pCudaProjector) {
-			ASTRA_WARN("non-CUDA Projector2D passed to FP_CUDA");
-		}
+		m_pProjector = CProjector2DManager::getSingleton().get(id);
 	}
 	CC.markNodeParsed("ProjectorId");
 
@@ -101,22 +117,18 @@ bool CCudaForwardProjectionAlgorithm::initialize(const Config& _cfg)
 	m_pVolume = dynamic_cast<CFloat32VolumeData2D*>(CData2DManager::getSingleton().get(id));
 	CC.markNodeParsed("VolumeDataId");
 
+	initializeFromProjector();
+
+	// Deprecated options
+	m_iDetectorSuperSampling = (int)_cfg.self.getOptionNumerical("DetectorSuperSampling", m_iDetectorSuperSampling);
+	CC.markOptionParsed("DetectorSuperSampling");
 	// GPU number
 	m_iGPUIndex = (int)_cfg.self.getOptionNumerical("GPUindex", -1);
 	m_iGPUIndex = (int)_cfg.self.getOptionNumerical("GPUIndex", m_iGPUIndex);
-	CC.markOptionParsed("GPUindex");
-	if (!_cfg.self.hasOption("GPUindex"))
-		CC.markOptionParsed("GPUIndex");
+	CC.markOptionParsed("GPUIndex");
+	if (!_cfg.self.hasOption("GPUIndex"))
+		CC.markOptionParsed("GPUindex");
 
-	// Detector supersampling factor
-	m_iDetectorSuperSampling = 1;
-	if (pCudaProjector) {
-		// New interface
-		m_iDetectorSuperSampling = pCudaProjector->getDetectorSuperSampling();
-	}
-	// Deprecated option
-	m_iDetectorSuperSampling = (int)_cfg.self.getOptionNumerical("DetectorSuperSampling", m_iDetectorSuperSampling);
-	CC.markOptionParsed("DetectorSuperSampling");
 
 
 	// return success
@@ -125,20 +137,16 @@ bool CCudaForwardProjectionAlgorithm::initialize(const Config& _cfg)
 
 //----------------------------------------------------------------------------------------
 // Initialize - C++
-bool CCudaForwardProjectionAlgorithm::initialize(CProjectionGeometry2D* _pProjectionGeometry,
-												 CVolumeGeometry2D* _pReconstructionGeometry,
+bool CCudaForwardProjectionAlgorithm::initialize(CProjector2D* _pProjector,
 												 CFloat32VolumeData2D* _pVolume,
-												 CFloat32ProjectionData2D* _pSinogram,
-												 int _iGPUindex, int _iDetectorSuperSampling)
+												 CFloat32ProjectionData2D* _pSinogram)
 {
 	// store classes
-	//m_pProjectionGeometry = _pProjectionGeometry;
-	//m_pReconstructionGeometry = _pReconstructionGeometry;
+	m_pProjector = _pProjector;
 	m_pVolume = _pVolume;
 	m_pSinogram = _pSinogram;
 
-	m_iDetectorSuperSampling = _iDetectorSuperSampling;
-	m_iGPUIndex = _iGPUindex;
+	initializeFromProjector();
 
 	// return success
 	return check();
diff --git a/src/CudaProjector2D.cpp b/src/CudaProjector2D.cpp
index a26e32d..acf6000 100644
--- a/src/CudaProjector2D.cpp
+++ b/src/CudaProjector2D.cpp
@@ -61,6 +61,7 @@ void CCudaProjector2D::_clear()
 	m_projectionKernel = ker2d_default;
 	m_iVoxelSuperSampling = 1;
 	m_iDetectorSuperSampling = 1;
+	m_iGPUIndex = -1;
 }
 
 //----------------------------------------------------------------------------------------
@@ -125,18 +126,18 @@ bool CCudaProjector2D::initialize(const Config& _cfg)
 	m_iDetectorSuperSampling = (int)_cfg.self.getOptionNumerical("DetectorSuperSampling", 1);
 	CC.markOptionParsed("DetectorSuperSampling");
 
+	// GPU number
+	m_iGPUIndex = (int)_cfg.self.getOptionNumerical("GPUindex", -1);
+	m_iGPUIndex = (int)_cfg.self.getOptionNumerical("GPUIndex", m_iGPUIndex);
+	CC.markOptionParsed("GPUIndex");
+	if (!_cfg.self.hasOption("GPUIndex"))
+		CC.markOptionParsed("GPUindex");
+
+
 	m_bIsInitialized = _check();
 	return m_bIsInitialized;
 }
 
-/*
-bool CProjector2D::initialize(astra::CProjectionGeometry2D *, astra::CVolumeGeometry2D *)
-{
-	ASTRA_ASSERT(false);
-
-	return false;
-}
-*/
 
 std::string CCudaProjector2D::description() const
 {
diff --git a/src/CudaReconstructionAlgorithm2D.cpp b/src/CudaReconstructionAlgorithm2D.cpp
index 71b6637..bccdb43 100644
--- a/src/CudaReconstructionAlgorithm2D.cpp
+++ b/src/CudaReconstructionAlgorithm2D.cpp
@@ -84,111 +84,51 @@ void CCudaReconstructionAlgorithm2D::_clear()
 }
 
 //---------------------------------------------------------------------------------------
-// Initialize - Config
-bool CCudaReconstructionAlgorithm2D::initialize(const Config& _cfg)
+void CCudaReconstructionAlgorithm2D::initializeFromProjector()
 {
-	ASTRA_ASSERT(_cfg.self);
-	ConfigStackCheck<CAlgorithm> CC("CudaReconstructionAlgorithm2D", this, _cfg);
-
-	// if already initialized, clear first
-	if (m_bIsInitialized) {
-		clear();
-	}
+	m_iPixelSuperSampling = 1;
+	m_iDetectorSuperSampling = 1;
+	m_iGPUIndex = -1;
 
 	// Projector
-	XMLNode node = _cfg.self.getSingleNode("ProjectorId");
-	CCudaProjector2D* pCudaProjector = 0;
-	if (node) {
-		int id = boost::lexical_cast<int>(node.getContent());
-		CProjector2D *projector = CProjector2DManager::getSingleton().get(id);
-		pCudaProjector = dynamic_cast<CCudaProjector2D*>(projector);
-		if (!pCudaProjector) {
+	CCudaProjector2D* pCudaProjector = dynamic_cast<CCudaProjector2D*>(m_pProjector);
+	if (!pCudaProjector) {
+		if (m_pProjector) {
 			ASTRA_WARN("non-CUDA Projector2D passed");
 		}
-	}
-	CC.markNodeParsed("ProjectorId");
-
-
-	// sinogram data
-	node = _cfg.self.getSingleNode("ProjectionDataId");
-	ASTRA_CONFIG_CHECK(node, "CudaSirt2", "No ProjectionDataId tag specified.");
-	int id = boost::lexical_cast<int>(node.getContent());
-	m_pSinogram = dynamic_cast<CFloat32ProjectionData2D*>(CData2DManager::getSingleton().get(id));
-	CC.markNodeParsed("ProjectionDataId");
-
-	// reconstruction data
-	node = _cfg.self.getSingleNode("ReconstructionDataId");
-	ASTRA_CONFIG_CHECK(node, "CudaSirt2", "No ReconstructionDataId tag specified.");
-	id = boost::lexical_cast<int>(node.getContent());
-	m_pReconstruction = dynamic_cast<CFloat32VolumeData2D*>(CData2DManager::getSingleton().get(id));
-	CC.markNodeParsed("ReconstructionDataId");
-
-	// fixed mask
-	if (_cfg.self.hasOption("ReconstructionMaskId")) {
-		m_bUseReconstructionMask = true;
-		id = boost::lexical_cast<int>(_cfg.self.getOption("ReconstructionMaskId"));
-		m_pReconstructionMask = dynamic_cast<CFloat32VolumeData2D*>(CData2DManager::getSingleton().get(id));
-		ASTRA_CONFIG_CHECK(m_pReconstructionMask, "CudaReconstruction2D", "Invalid ReconstructionMaskId.");
-	}
-	CC.markOptionParsed("ReconstructionMaskId");
-	// fixed mask
-	if (_cfg.self.hasOption("SinogramMaskId")) {
-		m_bUseSinogramMask = true;
-		id = boost::lexical_cast<int>(_cfg.self.getOption("SinogramMaskId"));
-		m_pSinogramMask = dynamic_cast<CFloat32ProjectionData2D*>(CData2DManager::getSingleton().get(id));
-		ASTRA_CONFIG_CHECK(m_pSinogramMask, "CudaReconstruction2D", "Invalid SinogramMaskId.");
-	}
-	CC.markOptionParsed("SinogramMaskId");
-
-	// Constraints - NEW
-	if (_cfg.self.hasOption("MinConstraint")) {
-		m_bUseMinConstraint = true;
-		m_fMinValue = _cfg.self.getOptionNumerical("MinConstraint", 0.0f);
-		CC.markOptionParsed("MinConstraint");
-	} else {
-		// Constraint - OLD
-		m_bUseMinConstraint = _cfg.self.getOptionBool("UseMinConstraint", false);
-		CC.markOptionParsed("UseMinConstraint");
-		if (m_bUseMinConstraint) {
-			m_fMinValue = _cfg.self.getOptionNumerical("MinConstraintValue", 0.0f);
-			CC.markOptionParsed("MinConstraintValue");
-		}
-	}
-	if (_cfg.self.hasOption("MaxConstraint")) {
-		m_bUseMaxConstraint = true;
-		m_fMaxValue = _cfg.self.getOptionNumerical("MaxConstraint", 255.0f);
-		CC.markOptionParsed("MaxConstraint");
 	} else {
-		// Constraint - OLD
-		m_bUseMaxConstraint = _cfg.self.getOptionBool("UseMaxConstraint", false);
-		CC.markOptionParsed("UseMaxConstraint");
-		if (m_bUseMaxConstraint) {
-			m_fMaxValue = _cfg.self.getOptionNumerical("MaxConstraintValue", 0.0f);
-			CC.markOptionParsed("MaxConstraintValue");
-		}
-	}
-
-	// GPU number
-	m_iGPUIndex = (int)_cfg.self.getOptionNumerical("GPUindex", -1);
-	m_iGPUIndex = (int)_cfg.self.getOptionNumerical("GPUIndex", m_iGPUIndex);
-	CC.markOptionParsed("GPUindex");
-	if (!_cfg.self.hasOption("GPUindex"))
-		CC.markOptionParsed("GPUIndex");
-
-	// Supersampling factors
-	m_iDetectorSuperSampling = 1;
-	m_iPixelSuperSampling = 1;
-	if (pCudaProjector) {
-		// New interface
 		m_iDetectorSuperSampling = pCudaProjector->getDetectorSuperSampling();
 		m_iPixelSuperSampling = pCudaProjector->getVoxelSuperSampling();
+		m_iGPUIndex = pCudaProjector->getGPUIndex();
 	}
+}
+
+//---------------------------------------------------------------------------------------
+// Initialize - Config
+bool CCudaReconstructionAlgorithm2D::initialize(const Config& _cfg)
+{
+	ASTRA_ASSERT(_cfg.self);
+	ConfigStackCheck<CAlgorithm> CC("CudaReconstructionAlgorithm2D", this, _cfg);
+
+	m_bIsInitialized = CReconstructionAlgorithm2D::initialize(_cfg);
+
+	if (!m_bIsInitialized)
+		return false;
+
+	initializeFromProjector();
+
 	// Deprecated options
 	m_iDetectorSuperSampling = (int)_cfg.self.getOptionNumerical("DetectorSuperSampling", m_iDetectorSuperSampling);
 	m_iPixelSuperSampling = (int)_cfg.self.getOptionNumerical("PixelSuperSampling", m_iPixelSuperSampling);
 	CC.markOptionParsed("DetectorSuperSampling");
 	CC.markOptionParsed("PixelSuperSampling");
 
+	// GPU number
+	m_iGPUIndex = (int)_cfg.self.getOptionNumerical("GPUindex", -1);
+	m_iGPUIndex = (int)_cfg.self.getOptionNumerical("GPUIndex", m_iGPUIndex);
+	CC.markOptionParsed("GPUIndex");
+	if (!_cfg.self.hasOption("GPUIndex"))
+		CC.markOptionParsed("GPUindex");
 
 	return _check();
 }
@@ -198,33 +138,19 @@ bool CCudaReconstructionAlgorithm2D::initialize(const Config& _cfg)
 bool CCudaReconstructionAlgorithm2D::initialize(CProjector2D* _pProjector,
                                      CFloat32ProjectionData2D* _pSinogram, 
                                      CFloat32VolumeData2D* _pReconstruction)
-{
-	return initialize(_pProjector, _pSinogram, _pReconstruction, 0, 1);
-}
-
-//---------------------------------------------------------------------------------------
-// Initialize - C++
-bool CCudaReconstructionAlgorithm2D::initialize(CProjector2D* _pProjector,
-                                     CFloat32ProjectionData2D* _pSinogram, 
-                                     CFloat32VolumeData2D* _pReconstruction,
-                                     int _iGPUindex,
-                                     int _iDetectorSuperSampling,
-                                     int _iPixelSuperSampling)
 {
 	// if already initialized, clear first
 	if (m_bIsInitialized) {
 		clear();
 	}
 	
-	m_pProjector = 0;
+	m_pProjector = _pProjector;
 	
 	// required classes
 	m_pSinogram = _pSinogram;
 	m_pReconstruction = _pReconstruction;
 
-	m_iDetectorSuperSampling = _iDetectorSuperSampling;
-	m_iPixelSuperSampling = _iPixelSuperSampling;
-	m_iGPUIndex = _iGPUindex;
+	initializeFromProjector();
 
 	return _check();
 }
@@ -234,40 +160,13 @@ bool CCudaReconstructionAlgorithm2D::initialize(CProjector2D* _pProjector,
 // Check
 bool CCudaReconstructionAlgorithm2D::_check() 
 {
-	// TODO: CLEAN UP
-
-
-	// check pointers
-	//ASTRA_CONFIG_CHECK(m_pProjector, "Reconstruction2D", "Invalid Projector Object.");
-	ASTRA_CONFIG_CHECK(m_pSinogram, "SIRT_CUDA", "Invalid Projection Data Object.");
-	ASTRA_CONFIG_CHECK(m_pReconstruction, "SIRT_CUDA", "Invalid Reconstruction Data Object.");
-
-	// check initializations
-	//ASTRA_CONFIG_CHECK(m_pProjector->isInitialized(), "Reconstruction2D", "Projector Object Not Initialized.");
-	ASTRA_CONFIG_CHECK(m_pSinogram->isInitialized(), "SIRT_CUDA", "Projection Data Object Not Initialized.");
-	ASTRA_CONFIG_CHECK(m_pReconstruction->isInitialized(), "SIRT_CUDA", "Reconstruction Data Object Not Initialized.");
+	if (!CReconstructionAlgorithm2D::_check())
+		return false;
 
 	ASTRA_CONFIG_CHECK(m_iDetectorSuperSampling >= 1, "SIRT_CUDA", "DetectorSuperSampling must be a positive integer.");
 	ASTRA_CONFIG_CHECK(m_iPixelSuperSampling >= 1, "SIRT_CUDA", "PixelSuperSampling must be a positive integer.");
 	ASTRA_CONFIG_CHECK(m_iGPUIndex >= -1, "SIRT_CUDA", "GPUIndex must be a non-negative integer.");
 
-	// check compatibility between projector and data classes
-//	ASTRA_CONFIG_CHECK(m_pSinogram->getGeometry()->isEqual(m_pProjector->getProjectionGeometry()), "SIRT_CUDA", "Projection Data not compatible with the specified Projector.");
-//	ASTRA_CONFIG_CHECK(m_pReconstruction->getGeometry()->isEqual(m_pProjector->getVolumeGeometry()), "SIRT_CUDA", "Reconstruction Data not compatible with the specified Projector.");
-
-	// todo: turn some of these back on
-
-// 	ASTRA_CONFIG_CHECK(m_pProjectionGeometry, "SIRT_CUDA", "ProjectionGeometry not specified.");
-// 	ASTRA_CONFIG_CHECK(m_pProjectionGeometry->isInitialized(), "SIRT_CUDA", "ProjectionGeometry not initialized.");
-// 	ASTRA_CONFIG_CHECK(m_pReconstructionGeometry, "SIRT_CUDA", "ReconstructionGeometry not specified.");
-// 	ASTRA_CONFIG_CHECK(m_pReconstructionGeometry->isInitialized(), "SIRT_CUDA", "ReconstructionGeometry not initialized.");
-
-	// check dimensions
-	//ASTRA_CONFIG_CHECK(m_pSinogram->getAngleCount() == m_pProjectionGeometry->getProjectionAngleCount(), "SIRT_CUDA", "Sinogram data object size mismatch.");
-	//ASTRA_CONFIG_CHECK(m_pSinogram->getDetectorCount() == m_pProjectionGeometry->getDetectorCount(), "SIRT_CUDA", "Sinogram data object size mismatch.");
-	//ASTRA_CONFIG_CHECK(m_pReconstruction->getWidth() == m_pReconstructionGeometry->getGridColCount(), "SIRT_CUDA", "Reconstruction data object size mismatch.");
-	//ASTRA_CONFIG_CHECK(m_pReconstruction->getHeight() == m_pReconstructionGeometry->getGridRowCount(), "SIRT_CUDA", "Reconstruction data object size mismatch.");
-	
 	// check restrictions
 	// TODO: check restrictions built into cuda code
 
diff --git a/src/CudaSartAlgorithm.cpp b/src/CudaSartAlgorithm.cpp
index 8c0c6d7..d202847 100644
--- a/src/CudaSartAlgorithm.cpp
+++ b/src/CudaSartAlgorithm.cpp
@@ -116,10 +116,9 @@ bool CCudaSartAlgorithm::initialize(const Config& _cfg)
 // Initialize - C++
 bool CCudaSartAlgorithm::initialize(CProjector2D* _pProjector,
                                      CFloat32ProjectionData2D* _pSinogram, 
-                                     CFloat32VolumeData2D* _pReconstruction,
-                                     int _iGPUindex, int _iDetectorSuperSampling)
+                                     CFloat32VolumeData2D* _pReconstruction)
 {
-	m_bIsInitialized = CCudaReconstructionAlgorithm2D::initialize(_pProjector, _pSinogram, _pReconstruction, _iGPUindex, _iDetectorSuperSampling, 1);
+	m_bIsInitialized = CCudaReconstructionAlgorithm2D::initialize(_pProjector, _pSinogram, _pReconstruction);
 
 	if (!m_bIsInitialized)
 		return false;
diff --git a/src/CudaSirtAlgorithm.cpp b/src/CudaSirtAlgorithm.cpp
index d424915..ab0a418 100644
--- a/src/CudaSirtAlgorithm.cpp
+++ b/src/CudaSirtAlgorithm.cpp
@@ -98,11 +98,9 @@ bool CCudaSirtAlgorithm::initialize(const Config& _cfg)
 // Initialize - C++
 bool CCudaSirtAlgorithm::initialize(CProjector2D* _pProjector,
                                      CFloat32ProjectionData2D* _pSinogram, 
-                                     CFloat32VolumeData2D* _pReconstruction,
-                                     int _iGPUindex, int _iDetectorSuperSampling,
-                                     int _iPixelSuperSampling)
+                                     CFloat32VolumeData2D* _pReconstruction)
 {
-	m_bIsInitialized = CCudaReconstructionAlgorithm2D::initialize(_pProjector, _pSinogram, _pReconstruction, _iGPUindex, _iDetectorSuperSampling, _iPixelSuperSampling);
+	m_bIsInitialized = CCudaReconstructionAlgorithm2D::initialize(_pProjector, _pSinogram, _pReconstruction);
 
 	if (!m_bIsInitialized)
 		return false;
diff --git a/src/ReconstructionAlgorithm2D.cpp b/src/ReconstructionAlgorithm2D.cpp
index 767efe6..4575ff7 100644
--- a/src/ReconstructionAlgorithm2D.cpp
+++ b/src/ReconstructionAlgorithm2D.cpp
@@ -85,9 +85,16 @@ bool CReconstructionAlgorithm2D::initialize(const Config& _cfg)
 	
 	// projector
 	XMLNode node = _cfg.self.getSingleNode("ProjectorId");
-	ASTRA_CONFIG_CHECK(node, "Reconstruction2D", "No ProjectorId tag specified.");
-	int id = boost::lexical_cast<int>(node.getContent());
-	m_pProjector = CProjector2DManager::getSingleton().get(id);
+	if (requiresProjector()) {
+		ASTRA_CONFIG_CHECK(node, "Reconstruction2D", "No ProjectorId tag specified.");
+	}
+	int id;
+	if (node) {
+		id = boost::lexical_cast<int>(node.getContent());
+		m_pProjector = CProjector2DManager::getSingleton().get(id);
+	} else {
+		m_pProjector = 0;
+	}
 	CC.markNodeParsed("ProjectorId");
 
 	// sinogram data
@@ -205,18 +212,22 @@ void CReconstructionAlgorithm2D::setSinogramMask(CFloat32ProjectionData2D* _pMas
 bool CReconstructionAlgorithm2D::_check() 
 {
 	// check pointers
-	ASTRA_CONFIG_CHECK(m_pProjector, "Reconstruction2D", "Invalid Projector Object.");
+	if (requiresProjector())
+		ASTRA_CONFIG_CHECK(m_pProjector, "Reconstruction2D", "Invalid Projector Object.");
 	ASTRA_CONFIG_CHECK(m_pSinogram, "Reconstruction2D", "Invalid Projection Data Object.");
 	ASTRA_CONFIG_CHECK(m_pReconstruction, "Reconstruction2D", "Invalid Reconstruction Data Object.");
 
 	// check initializations
-	ASTRA_CONFIG_CHECK(m_pProjector->isInitialized(), "Reconstruction2D", "Projector Object Not Initialized.");
+	if (requiresProjector())
+		ASTRA_CONFIG_CHECK(m_pProjector->isInitialized(), "Reconstruction2D", "Projector Object Not Initialized.");
 	ASTRA_CONFIG_CHECK(m_pSinogram->isInitialized(), "Reconstruction2D", "Projection Data Object Not Initialized.");
 	ASTRA_CONFIG_CHECK(m_pReconstruction->isInitialized(), "Reconstruction2D", "Reconstruction Data Object Not Initialized.");
 
 	// check compatibility between projector and data classes
-	ASTRA_CONFIG_CHECK(m_pSinogram->getGeometry()->isEqual(m_pProjector->getProjectionGeometry()), "Reconstruction2D", "Projection Data not compatible with the specified Projector.");
-	ASTRA_CONFIG_CHECK(m_pReconstruction->getGeometry()->isEqual(m_pProjector->getVolumeGeometry()), "Reconstruction2D", "Reconstruction Data not compatible with the specified Projector.");
+	if (requiresProjector()) {
+		ASTRA_CONFIG_CHECK(m_pSinogram->getGeometry()->isEqual(m_pProjector->getProjectionGeometry()), "Reconstruction2D", "Projection Data not compatible with the specified Projector.");
+		ASTRA_CONFIG_CHECK(m_pReconstruction->getGeometry()->isEqual(m_pProjector->getVolumeGeometry()), "Reconstruction2D", "Reconstruction Data not compatible with the specified Projector.");
+	}
 
 	// success
 	return true;
-- 
cgit v1.2.3


From f7e01f5a3ca7780a29d1fbc3790e527c310cc7f8 Mon Sep 17 00:00:00 2001
From: Willem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>
Date: Fri, 9 Oct 2015 15:55:11 +0200
Subject: Fix loop bounds in (unused) Float32ProjectionData3D arithmetic
 functions

---
 src/Float32ProjectionData3D.cpp | 36 ++++++++++++++++++++----------------
 1 file changed, 20 insertions(+), 16 deletions(-)

(limited to 'src')

diff --git a/src/Float32ProjectionData3D.cpp b/src/Float32ProjectionData3D.cpp
index 2bd0447..680ad55 100644
--- a/src/Float32ProjectionData3D.cpp
+++ b/src/Float32ProjectionData3D.cpp
@@ -53,13 +53,13 @@ CFloat32ProjectionData3D& CFloat32ProjectionData3D::operator+=(const CFloat32Pro
 	CProjectionGeometry3D * pThisGeometry = getGeometry();
 
 	int iProjectionCount = pThisGeometry->getProjectionCount();
+	int iDetectorCount = pThisGeometry->getDetectorTotCount();
 #ifdef _DEBUG
 	CProjectionGeometry3D * pDataGeometry = _data.getGeometry();
-	int iThisProjectionDetectorCount = pThisGeometry->getDetectorRowCount() * pThisGeometry->getDetectorColCount();
-	int iDataProjectionDetectorCount = pDataGeometry->getDetectorRowCount() * pDataGeometry->getDetectorColCount();
+	int iDataProjectionDetectorCount = pDataGeometry->getDetectorTotCount();
 
 	ASTRA_ASSERT(iProjectionCount == pDataGeometry->getProjectionCount());
-	ASTRA_ASSERT(iThisProjectionDetectorCount == iDataProjectionDetectorCount);
+	ASTRA_ASSERT(iDetectorCount == iDataProjectionDetectorCount);
 #endif
 
 	for(int iProjectionIndex = 0; iProjectionIndex < iProjectionCount; iProjectionIndex++)
@@ -67,7 +67,7 @@ CFloat32ProjectionData3D& CFloat32ProjectionData3D::operator+=(const CFloat32Pro
 		CFloat32VolumeData2D * pThisProjection = fetchProjection(iProjectionIndex);
 		CFloat32VolumeData2D * pDataProjection = _data.fetchProjection(iProjectionIndex);
 
-		for(int iDetectorIndex = 0; iDetectorIndex < iDetectorIndex; iDetectorIndex++)
+		for(int iDetectorIndex = 0; iDetectorIndex < iDetectorCount; iDetectorIndex++)
 		{
 			float32 fThisValue = pThisProjection->getData()[iDetectorIndex];
 			float32 fDataValue = pDataProjection->getDataConst()[iDetectorIndex];
@@ -91,13 +91,13 @@ CFloat32ProjectionData3D& CFloat32ProjectionData3D::operator-=(const CFloat32Pro
 	CProjectionGeometry3D * pThisGeometry = getGeometry();
 
 	int iProjectionCount = pThisGeometry->getProjectionCount();
+	int iDetectorCount = pThisGeometry->getDetectorTotCount();
 #ifdef _DEBUG
 	CProjectionGeometry3D * pDataGeometry = _data.getGeometry();
-	int iThisProjectionDetectorCount = pThisGeometry->getDetectorRowCount() * pThisGeometry->getDetectorColCount();
-	int iDataProjectionDetectorCount = pDataGeometry->getDetectorRowCount() * pDataGeometry->getDetectorColCount();
+	int iDataProjectionDetectorCount = pDataGeometry->getDetectorTotCount();
 
 	ASTRA_ASSERT(iProjectionCount == pDataGeometry->getProjectionCount());
-	ASTRA_ASSERT(iThisProjectionDetectorCount == iDataProjectionDetectorCount);
+	ASTRA_ASSERT(iDetectorCount == iDataProjectionDetectorCount);
 #endif
 
 	for(int iProjectionIndex = 0; iProjectionIndex < iProjectionCount; iProjectionIndex++)
@@ -105,7 +105,7 @@ CFloat32ProjectionData3D& CFloat32ProjectionData3D::operator-=(const CFloat32Pro
 		CFloat32VolumeData2D * pThisProjection = fetchProjection(iProjectionIndex);
 		CFloat32VolumeData2D * pDataProjection = _data.fetchProjection(iProjectionIndex);
 
-		for(int iDetectorIndex = 0; iDetectorIndex < iDetectorIndex; iDetectorIndex++)
+		for(int iDetectorIndex = 0; iDetectorIndex < iDetectorCount; iDetectorIndex++)
 		{
 			float32 fThisValue = pThisProjection->getData()[iDetectorIndex];
 			float32 fDataValue = pDataProjection->getDataConst()[iDetectorIndex];
@@ -129,13 +129,13 @@ CFloat32ProjectionData3D& CFloat32ProjectionData3D::operator*=(const CFloat32Pro
 	CProjectionGeometry3D * pThisGeometry = getGeometry();
 
 	int iProjectionCount = pThisGeometry->getProjectionCount();
+	int iDetectorCount = pThisGeometry->getDetectorTotCount();
 #ifdef _DEBUG
 	CProjectionGeometry3D * pDataGeometry = _data.getGeometry();
-	int iThisProjectionDetectorCount = pThisGeometry->getDetectorRowCount() * pThisGeometry->getDetectorColCount();
-	int iDataProjectionDetectorCount = pDataGeometry->getDetectorRowCount() * pDataGeometry->getDetectorColCount();
+	int iDataProjectionDetectorCount = pDataGeometry->getDetectorTotCount();
 
 	ASTRA_ASSERT(iProjectionCount == pDataGeometry->getProjectionCount());
-	ASTRA_ASSERT(iThisProjectionDetectorCount == iDataProjectionDetectorCount);
+	ASTRA_ASSERT(iDetectorCount == iDataProjectionDetectorCount);
 #endif
 
 	for(int iProjectionIndex = 0; iProjectionIndex < iProjectionCount; iProjectionIndex++)
@@ -143,7 +143,7 @@ CFloat32ProjectionData3D& CFloat32ProjectionData3D::operator*=(const CFloat32Pro
 		CFloat32VolumeData2D * pThisProjection = fetchProjection(iProjectionIndex);
 		CFloat32VolumeData2D * pDataProjection = _data.fetchProjection(iProjectionIndex);
 
-		for(int iDetectorIndex = 0; iDetectorIndex < iDetectorIndex; iDetectorIndex++)
+		for(int iDetectorIndex = 0; iDetectorIndex < iDetectorCount; iDetectorIndex++)
 		{
 			float32 fThisValue = pThisProjection->getData()[iDetectorIndex];
 			float32 fDataValue = pDataProjection->getDataConst()[iDetectorIndex];
@@ -167,12 +167,13 @@ CFloat32ProjectionData3D& CFloat32ProjectionData3D::operator*=(const float32& _f
 	CProjectionGeometry3D * pThisGeometry = getGeometry();
 
 	int iProjectionCount = pThisGeometry->getProjectionCount();
+	int iDetectorCount = pThisGeometry->getDetectorTotCount();
 
 	for(int iProjectionIndex = 0; iProjectionIndex < iProjectionCount; iProjectionIndex++)
 	{
 		CFloat32VolumeData2D * pThisProjection = fetchProjection(iProjectionIndex);
 
-		for(int iDetectorIndex = 0; iDetectorIndex < iDetectorIndex; iDetectorIndex++)
+		for(int iDetectorIndex = 0; iDetectorIndex < iDetectorCount; iDetectorIndex++)
 		{
 			float32 fThisValue = pThisProjection->getData()[iDetectorIndex];
 
@@ -194,12 +195,13 @@ CFloat32ProjectionData3D& CFloat32ProjectionData3D::operator/=(const float32& _f
 	CProjectionGeometry3D * pThisGeometry = getGeometry();
 
 	int iProjectionCount = pThisGeometry->getProjectionCount();
+	int iDetectorCount = pThisGeometry->getDetectorTotCount();
 
 	for(int iProjectionIndex = 0; iProjectionIndex < iProjectionCount; iProjectionIndex++)
 	{
 		CFloat32VolumeData2D * pThisProjection = fetchProjection(iProjectionIndex);
 
-		for(int iDetectorIndex = 0; iDetectorIndex < iDetectorIndex; iDetectorIndex++)
+		for(int iDetectorIndex = 0; iDetectorIndex < iDetectorCount; iDetectorIndex++)
 		{
 			float32 fThisValue = pThisProjection->getData()[iDetectorIndex];
 
@@ -221,12 +223,13 @@ CFloat32ProjectionData3D& CFloat32ProjectionData3D::operator+=(const float32& _f
 	CProjectionGeometry3D * pThisGeometry = getGeometry();
 
 	int iProjectionCount = pThisGeometry->getProjectionCount();
+	int iDetectorCount = pThisGeometry->getDetectorTotCount();
 
 	for(int iProjectionIndex = 0; iProjectionIndex < iProjectionCount; iProjectionIndex++)
 	{
 		CFloat32VolumeData2D * pThisProjection = fetchProjection(iProjectionIndex);
 
-		for(int iDetectorIndex = 0; iDetectorIndex < iDetectorIndex; iDetectorIndex++)
+		for(int iDetectorIndex = 0; iDetectorIndex < iDetectorCount; iDetectorIndex++)
 		{
 			float32 fThisValue = pThisProjection->getData()[iDetectorIndex];
 
@@ -248,12 +251,13 @@ CFloat32ProjectionData3D& CFloat32ProjectionData3D::operator-=(const float32& _f
 	CProjectionGeometry3D * pThisGeometry = getGeometry();
 
 	int iProjectionCount = pThisGeometry->getProjectionCount();
+	int iDetectorCount = pThisGeometry->getDetectorTotCount();
 
 	for(int iProjectionIndex = 0; iProjectionIndex < iProjectionCount; iProjectionIndex++)
 	{
 		CFloat32VolumeData2D * pThisProjection = fetchProjection(iProjectionIndex);
 
-		for(int iDetectorIndex = 0; iDetectorIndex < iDetectorIndex; iDetectorIndex++)
+		for(int iDetectorIndex = 0; iDetectorIndex < iDetectorCount; iDetectorIndex++)
 		{
 			float32 fThisValue = pThisProjection->getData()[iDetectorIndex];
 
-- 
cgit v1.2.3


From c7128284fdbbfa0d4a5cbc951b9cdeaf8f9b41e0 Mon Sep 17 00:00:00 2001
From: Willem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>
Date: Fri, 9 Oct 2015 16:10:45 +0200
Subject: Call check() function after initializing CUDA_FBP

This would cause crashes when specifying invalid data.
---
 src/CudaFilteredBackProjectionAlgorithm.cpp | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

(limited to 'src')

diff --git a/src/CudaFilteredBackProjectionAlgorithm.cpp b/src/CudaFilteredBackProjectionAlgorithm.cpp
index aac96d6..6353c46 100644
--- a/src/CudaFilteredBackProjectionAlgorithm.cpp
+++ b/src/CudaFilteredBackProjectionAlgorithm.cpp
@@ -189,9 +189,7 @@ bool CCudaFilteredBackProjectionAlgorithm::initialize(const Config& _cfg)
 	m_pFBP = new AstraFBP;
 	m_bAstraFBPInit = false;
 
-	// success
-	m_bIsInitialized = true;
-	return m_bIsInitialized;
+	return check();
 }
 
 bool CCudaFilteredBackProjectionAlgorithm::initialize(CFloat32ProjectionData2D * _pSinogram, CFloat32VolumeData2D * _pReconstruction, E_FBPFILTER _eFilter, const float * _pfFilter /* = NULL */, int _iFilterWidth /* = 0 */, int _iGPUIndex /* = 0 */, float _fFilterParameter /* = -1.0f */)
@@ -241,7 +239,7 @@ bool CCudaFilteredBackProjectionAlgorithm::initialize(CFloat32ProjectionData2D *
 
 	m_fFilterParameter = _fFilterParameter;
 
-	return m_bIsInitialized;
+	return check();
 }
 
 void CCudaFilteredBackProjectionAlgorithm::run(int _iNrIterations /* = 0 */)
@@ -361,7 +359,7 @@ bool CCudaFilteredBackProjectionAlgorithm::check()
 	ASTRA_CONFIG_CHECK(m_pReconstruction->isInitialized(), "FBP_CUDA", "Reconstruction Data Object Not Initialized.");
 
 	// check gpu index
-	ASTRA_CONFIG_CHECK(m_iGPUIndex >= -1, "FBP_CUDA", "GPUIndex must be a non-negative integer.");
+	ASTRA_CONFIG_CHECK(m_iGPUIndex >= -1, "FBP_CUDA", "GPUIndex must be a non-negative integer or -1.");
 	// check pixel supersampling
 	ASTRA_CONFIG_CHECK(m_iPixelSuperSampling >= 0, "FBP_CUDA", "PixelSuperSampling must be a non-negative integer.");
 
-- 
cgit v1.2.3


From fb44faa449990400861f1869b52f5afc8fefe01b Mon Sep 17 00:00:00 2001
From: Willem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>
Date: Fri, 9 Oct 2015 16:19:54 +0200
Subject: Fix warning text

---
 src/CudaReconstructionAlgorithm2D.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'src')

diff --git a/src/CudaReconstructionAlgorithm2D.cpp b/src/CudaReconstructionAlgorithm2D.cpp
index bccdb43..2d023b7 100644
--- a/src/CudaReconstructionAlgorithm2D.cpp
+++ b/src/CudaReconstructionAlgorithm2D.cpp
@@ -163,9 +163,9 @@ bool CCudaReconstructionAlgorithm2D::_check()
 	if (!CReconstructionAlgorithm2D::_check())
 		return false;
 
-	ASTRA_CONFIG_CHECK(m_iDetectorSuperSampling >= 1, "SIRT_CUDA", "DetectorSuperSampling must be a positive integer.");
-	ASTRA_CONFIG_CHECK(m_iPixelSuperSampling >= 1, "SIRT_CUDA", "PixelSuperSampling must be a positive integer.");
-	ASTRA_CONFIG_CHECK(m_iGPUIndex >= -1, "SIRT_CUDA", "GPUIndex must be a non-negative integer.");
+	ASTRA_CONFIG_CHECK(m_iDetectorSuperSampling >= 1, "CudaReconstructionAlgorithm2D", "DetectorSuperSampling must be a positive integer.");
+	ASTRA_CONFIG_CHECK(m_iPixelSuperSampling >= 1, "CudaReconstructionAlgorithm2D", "PixelSuperSampling must be a positive integer.");
+	ASTRA_CONFIG_CHECK(m_iGPUIndex >= -1, "CudaReconstructionAlgorithm2D", "GPUIndex must be a non-negative integer or -1.");
 
 	// check restrictions
 	// TODO: check restrictions built into cuda code
-- 
cgit v1.2.3


From 4298c2f212aac1e76f1f123ab199749a9a668415 Mon Sep 17 00:00:00 2001
From: Willem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>
Date: Fri, 9 Oct 2015 16:40:39 +0200
Subject: Give a warning on ignored Min/MaxContraint in some CUDA algorithms.

Previously it would fail an assertion.
---
 src/CudaReconstructionAlgorithm2D.cpp | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

(limited to 'src')

diff --git a/src/CudaReconstructionAlgorithm2D.cpp b/src/CudaReconstructionAlgorithm2D.cpp
index 71b6637..18627fc 100644
--- a/src/CudaReconstructionAlgorithm2D.cpp
+++ b/src/CudaReconstructionAlgorithm2D.cpp
@@ -462,10 +462,18 @@ void CCudaReconstructionAlgorithm2D::run(int _iNrIterations)
 
 	ASTRA_ASSERT(ok);
 
-	if (m_bUseMinConstraint)
-		ok &= m_pAlgo->setMinConstraint(m_fMinValue);
-	if (m_bUseMaxConstraint)
-		ok &= m_pAlgo->setMaxConstraint(m_fMaxValue);
+	if (m_bUseMinConstraint) {
+		bool ret = m_pAlgo->setMinConstraint(m_fMinValue);
+		if (!ret) {
+			ASTRA_WARN("This algorithm ignores MinConstraint");
+		}
+	}
+	if (m_bUseMaxConstraint) {
+		bool ret= m_pAlgo->setMaxConstraint(m_fMaxValue);
+		if (!ret) {
+			ASTRA_WARN("This algorithm ignores MaxConstraint");
+		}
+	}
 
 	ok &= m_pAlgo->iterate(_iNrIterations);
 	ASTRA_ASSERT(ok);
-- 
cgit v1.2.3


From 21d08656ead6f974f83b0a02b03b105a7cd617a8 Mon Sep 17 00:00:00 2001
From: "Daniel M. Pelt" <D.M.Pelt@cwi.nl>
Date: Tue, 13 Oct 2015 17:02:09 +0200
Subject: Do not reuse va_list when logging both to screen and file

---
 src/Logging.cpp | 60 ++++++++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 44 insertions(+), 16 deletions(-)

(limited to 'src')

diff --git a/src/Logging.cpp b/src/Logging.cpp
index 8290ca0..cd7e3f0 100644
--- a/src/Logging.cpp
+++ b/src/Logging.cpp
@@ -70,37 +70,65 @@ void CLogger::disable()
 void CLogger::debug(const char *sfile, int sline, const char *fmt, ...)
 {
 	_assureIsInitialized();
-	va_list ap;
-	va_start(ap, fmt);
-	if(m_bEnabledScreen) clog_debug(sfile,sline,0,fmt,ap);
-	if(m_bEnabledFile && m_bFileProvided) clog_debug(sfile,sline,1,fmt,ap);
+	va_list ap, apf;
+	if(m_bEnabledScreen){
+        va_start(ap, fmt);
+        clog_debug(sfile,sline,0,fmt,ap);
+        va_end(ap);
+    }
+	if(m_bEnabledFile && m_bFileProvided){
+        va_start(apf, fmt);
+        clog_debug(sfile,sline,1,fmt,apf);
+        va_end(apf);
+    }
 }
 
 void CLogger::info(const char *sfile, int sline, const char *fmt, ...)
 {
 	_assureIsInitialized();
-	va_list ap;
-	va_start(ap, fmt);
-	if(m_bEnabledScreen) clog_info(sfile,sline,0,fmt,ap);
-	if(m_bEnabledFile && m_bFileProvided) clog_info(sfile,sline,1,fmt,ap);
+	va_list ap, apf;
+	if(m_bEnabledScreen){
+        va_start(ap, fmt);
+        clog_info(sfile,sline,0,fmt,ap);
+        va_end(ap);
+    }
+	if(m_bEnabledFile && m_bFileProvided){
+        va_start(apf, fmt);
+        clog_info(sfile,sline,1,fmt,apf);
+        va_end(apf);
+    }
 }
 
 void CLogger::warn(const char *sfile, int sline, const char *fmt, ...)
 {
 	_assureIsInitialized();
-	va_list ap;
-	va_start(ap, fmt);
-	if(m_bEnabledScreen) clog_warn(sfile,sline,0,fmt,ap);
-	if(m_bEnabledFile && m_bFileProvided) clog_warn(sfile,sline,1,fmt,ap);
+	va_list ap, apf;
+	if(m_bEnabledScreen){
+        va_start(ap, fmt);
+        clog_warn(sfile,sline,0,fmt,ap);
+        va_end(ap);
+    }
+	if(m_bEnabledFile && m_bFileProvided){
+        va_start(apf, fmt);
+        clog_warn(sfile,sline,1,fmt,apf);
+        va_end(apf);
+    }
 }
 
 void CLogger::error(const char *sfile, int sline, const char *fmt, ...)
 {
 	_assureIsInitialized();
-	va_list ap;
-	va_start(ap, fmt);
-	if(m_bEnabledScreen) clog_error(sfile,sline,0,fmt,ap);
-	if(m_bEnabledFile && m_bFileProvided) clog_error(sfile,sline,1,fmt,ap);
+	va_list ap, apf;
+	if(m_bEnabledScreen){
+        va_start(ap, fmt);
+        clog_error(sfile,sline,0,fmt,ap);
+        va_end(ap);
+    }
+	if(m_bEnabledFile && m_bFileProvided){
+        va_start(apf, fmt);
+        clog_error(sfile,sline,1,fmt,apf);
+        va_end(apf);
+    }
 }
 
 void CLogger::_setLevel(int id, log_level m_eLevel)
-- 
cgit v1.2.3


From 07c31b932078544205d61551edd4a66f69be30ae Mon Sep 17 00:00:00 2001
From: Willem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>
Date: Wed, 2 Dec 2015 11:25:59 +0100
Subject: Avoid unnecessary include in header

---
 src/PluginAlgorithm.cpp | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/PluginAlgorithm.cpp b/src/PluginAlgorithm.cpp
index e79c77b..8f7dfc5 100644
--- a/src/PluginAlgorithm.cpp
+++ b/src/PluginAlgorithm.cpp
@@ -37,9 +37,13 @@ $Id$
 #include <fstream>
 #include <string>
 
+#include <Python.h>
+#include "bytesobject.h"
+
 namespace astra {
 
 
+
 void logPythonError(){
     if(PyErr_Occurred()){
         PyObject *ptype, *pvalue, *ptraceback;
@@ -394,4 +398,4 @@ PyObject* XMLNode2dict(XMLNode node){
 }
 
 }
-#endif
\ No newline at end of file
+#endif
-- 
cgit v1.2.3


From 3ea35516aceec4f5817871a00008b109777ebb13 Mon Sep 17 00:00:00 2001
From: Willem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>
Date: Mon, 30 Nov 2015 16:07:52 +0100
Subject: Disable error-prone checks

---
 src/VolumeGeometry3D.cpp | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'src')

diff --git a/src/VolumeGeometry3D.cpp b/src/VolumeGeometry3D.cpp
index a1cf424..3de146f 100644
--- a/src/VolumeGeometry3D.cpp
+++ b/src/VolumeGeometry3D.cpp
@@ -45,6 +45,7 @@ bool CVolumeGeometry3D::_check()
 	ASTRA_CONFIG_CHECK(m_fWindowMinZ < m_fWindowMaxZ, "VolumeGeometry3D", "WindowMinZ should be lower than WindowMaxZ.");
 
 	ASTRA_CONFIG_CHECK(m_iGridTotCount == (m_iGridColCount * m_iGridRowCount * m_iGridSliceCount), "VolumeGeometry3D", "Internal configuration error.");
+#if 0
 	ASTRA_CONFIG_CHECK(m_fWindowLengthX == (m_fWindowMaxX - m_fWindowMinX), "VolumeGeometry3D", "Internal configuration error.");
 	ASTRA_CONFIG_CHECK(m_fWindowLengthY == (m_fWindowMaxY - m_fWindowMinY), "VolumeGeometry3D", "Internal configuration error.");
 	ASTRA_CONFIG_CHECK(m_fWindowLengthZ == (m_fWindowMaxZ - m_fWindowMinZ), "VolumeGeometry3D", "Internal configuration error.");
@@ -57,6 +58,7 @@ bool CVolumeGeometry3D::_check()
 	ASTRA_CONFIG_CHECK(m_fDivPixelLengthX == (1.0f / m_fPixelLengthX), "VolumeGeometry3D", "Internal configuration error.");
 	ASTRA_CONFIG_CHECK(m_fDivPixelLengthY == (1.0f / m_fPixelLengthY), "VolumeGeometry3D", "Internal configuration error.");
 	ASTRA_CONFIG_CHECK(m_fDivPixelLengthZ == (1.0f / m_fPixelLengthZ), "VolumeGeometry3D", "Internal configuration error.");
+#endif
 
 	return true;
 }
-- 
cgit v1.2.3


From b14fb531ad9ae3d565f2cf28f5506408ab10dbed Mon Sep 17 00:00:00 2001
From: Willem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>
Date: Wed, 18 Nov 2015 11:26:15 +0100
Subject: Add CompositeGeometryManager

This handles FP and BP operations on multiple data objects at once,
splitting them to fit in GPU memory where necessary.
---
 src/CompositeGeometryManager.cpp         | 884 +++++++++++++++++++++++++++++++
 src/ConeProjectionGeometry3D.cpp         |  92 +++-
 src/ConeVecProjectionGeometry3D.cpp      |  58 +-
 src/CudaBackProjectionAlgorithm3D.cpp    |   8 +
 src/CudaForwardProjectionAlgorithm3D.cpp |   9 +
 src/GeometryUtil3D.cpp                   | 172 ++++++
 src/ParallelProjectionGeometry3D.cpp     |  81 ++-
 src/ParallelVecProjectionGeometry3D.cpp  |  61 ++-
 8 files changed, 1351 insertions(+), 14 deletions(-)
 create mode 100644 src/CompositeGeometryManager.cpp

(limited to 'src')

diff --git a/src/CompositeGeometryManager.cpp b/src/CompositeGeometryManager.cpp
new file mode 100644
index 0000000..fc8bc2e
--- /dev/null
+++ b/src/CompositeGeometryManager.cpp
@@ -0,0 +1,884 @@
+/*
+-----------------------------------------------------------------------
+Copyright: 2010-2015, iMinds-Vision Lab, University of Antwerp
+           2014-2015, CWI, Amsterdam
+
+Contact: astra@uantwerpen.be
+Website: http://sf.net/projects/astra-toolbox
+
+This file is part of the ASTRA Toolbox.
+
+
+The ASTRA Toolbox is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+The ASTRA Toolbox is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with the ASTRA Toolbox. If not, see <http://www.gnu.org/licenses/>.
+
+-----------------------------------------------------------------------
+*/
+
+#include "astra/CompositeGeometryManager.h"
+
+#ifdef ASTRA_CUDA
+
+#include "astra/GeometryUtil3D.h"
+#include "astra/VolumeGeometry3D.h"
+#include "astra/ConeProjectionGeometry3D.h"
+#include "astra/ConeVecProjectionGeometry3D.h"
+#include "astra/ParallelProjectionGeometry3D.h"
+#include "astra/ParallelVecProjectionGeometry3D.h"
+#include "astra/Projector3D.h"
+#include "astra/CudaProjector3D.h"
+#include "astra/Float32ProjectionData3DMemory.h"
+#include "astra/Float32VolumeData3DMemory.h"
+#include "astra/Logging.h"
+
+#include "../cuda/3d/mem3d.h"
+
+#include <cstring>
+
+namespace astra {
+
+// JOB:
+//  
+// VolumePart
+// ProjectionPart
+// FP-or-BP
+// SET-or-ADD
+
+
+// Running a set of jobs:
+//
+// [ Assume OUTPUT Parts in a single JobSet don't alias?? ]
+// Group jobs by output Part
+// One thread per group?
+
+// Automatically split parts if too large
+// Performance model for odd-sized tasks?
+// Automatically split parts if not enough tasks to fill available GPUs
+
+
+// Splitting:
+// Constraints:
+//   number of sub-parts divisible by N
+//   max size of sub-parts
+
+// For splitting on both input and output side:
+//   How to divide up memory? (Optimization problem; compute/benchmark)
+//   (First approach: 0.5/0.5)
+
+
+
+bool CCompositeGeometryManager::splitJobs(TJobSet &jobs, size_t maxSize, int div, TJobSet &split)
+{
+	split.clear();
+
+	for (TJobSet::const_iterator i = jobs.begin(); i != jobs.end(); ++i)
+	{
+		CPart* pOutput = i->first;
+		const TJobList &L = i->second;
+
+		// 1. Split output part
+		// 2. Per sub-part:
+		//    a. reduce input part
+		//    b. split input part
+		//    c. create jobs for new (input,output) subparts
+
+		TPartList splitOutput = pOutput->split(maxSize/3, div);
+
+		for (TJobList::const_iterator j = L.begin(); j != L.end(); ++j)
+		{
+			const SJob &job = *j;
+
+			for (TPartList::iterator i_out = splitOutput.begin();
+			     i_out != splitOutput.end(); ++i_out)
+			{
+				boost::shared_ptr<CPart> outputPart = *i_out;
+				split[outputPart.get()] = TJobList();
+
+				SJob newjob;
+				newjob.pOutput = outputPart;
+				newjob.eType = j->eType;
+				newjob.eMode = j->eMode;
+				newjob.pProjector = j->pProjector;
+
+				CPart* input = job.pInput->reduce(outputPart.get());
+
+				if (input->getSize() == 0) {
+					ASTRA_DEBUG("Empty input");
+					newjob.eType = SJob::JOB_NOP;
+					split[outputPart.get()].push_back(newjob);
+					continue;
+				}
+
+				size_t remainingSize = ( maxSize - outputPart->getSize() ) / 2;
+
+				TPartList splitInput = input->split(remainingSize, 1);
+				delete input;
+				ASTRA_DEBUG("Input split into %d parts", splitInput.size());
+
+				for (TPartList::iterator i_in = splitInput.begin();
+				     i_in != splitInput.end(); ++i_in)
+				{
+					newjob.pInput = *i_in;
+
+					split[outputPart.get()].push_back(newjob);
+
+					// Second and later (input) parts should always be added to
+					// output of first (input) part.
+					newjob.eMode = SJob::MODE_ADD;
+				}
+
+			
+			}
+
+		}
+	}
+
+	return true;
+}
+
+CCompositeGeometryManager::CPart::CPart(const CPart& other)
+{
+	eType = other.eType;
+	pData = other.pData;
+	subX = other.subX;
+	subY = other.subY;
+	subZ = other.subZ;
+}
+
+CCompositeGeometryManager::CVolumePart::CVolumePart(const CVolumePart& other)
+ : CPart(other)
+{
+	pGeom = other.pGeom->clone();
+}
+
+CCompositeGeometryManager::CVolumePart::~CVolumePart()
+{
+	delete pGeom;
+}
+
+void CCompositeGeometryManager::CVolumePart::getDims(size_t &x, size_t &y, size_t &z)
+{
+	if (!pGeom) {
+		x = y = z = 0;
+		return;
+	}
+
+	x = pGeom->getGridColCount();
+	y = pGeom->getGridRowCount();
+	z = pGeom->getGridSliceCount();
+}
+
+size_t CCompositeGeometryManager::CPart::getSize()
+{
+	size_t x, y, z;
+	getDims(x, y, z);
+	return x * y * z;
+}
+
+
+
+CCompositeGeometryManager::CPart* CCompositeGeometryManager::CVolumePart::reduce(const CPart *_other)
+{
+	const CProjectionPart *other = dynamic_cast<const CProjectionPart *>(_other);
+	assert(other);
+
+	// TODO: Is 0.5 sufficient?
+	double umin = -0.5;
+	double umax = other->pGeom->getDetectorColCount() + 0.5;
+	double vmin = -0.5;
+	double vmax = other->pGeom->getDetectorRowCount() + 0.5;
+
+	double uu[4];
+	double vv[4];
+	uu[0] = umin; vv[0] = vmin;
+	uu[1] = umin; vv[1] = vmax;
+	uu[2] = umax; vv[2] = vmin;
+	uu[3] = umax; vv[3] = vmax;
+
+	double pixx = pGeom->getPixelLengthX();
+	double pixy = pGeom->getPixelLengthY();
+	double pixz = pGeom->getPixelLengthZ();
+
+	double xmin = pGeom->getWindowMinX() - 0.5 * pixx;
+	double xmax = pGeom->getWindowMaxX() + 0.5 * pixx;
+	double ymin = pGeom->getWindowMinY() - 0.5 * pixy;
+	double ymax = pGeom->getWindowMaxY() + 0.5 * pixy;
+
+	// NB: Flipped
+	double zmax = pGeom->getWindowMinZ() - 2.5 * pixz;
+	double zmin = pGeom->getWindowMaxZ() + 2.5 * pixz;
+
+	// TODO: This isn't as tight as it could be.
+	// In particular it won't detect the detector being
+	// missed entirely on the u side.
+
+	for (int i = 0; i < other->pGeom->getProjectionCount(); ++i) {
+		for (int j = 0; j < 4; ++j) {
+			double px, py, pz;
+
+			other->pGeom->backprojectPointX(i, uu[j], vv[j], xmin, py, pz);
+			//ASTRA_DEBUG("%f %f (%f - %f)", py, pz, ymin, ymax);
+			if (pz < zmin) zmin = pz;
+			if (pz > zmax) zmax = pz;
+			other->pGeom->backprojectPointX(i, uu[j], vv[j], xmax, py, pz);
+			//ASTRA_DEBUG("%f %f (%f - %f)", py, pz, ymin, ymax);
+			if (pz < zmin) zmin = pz;
+			if (pz > zmax) zmax = pz;
+
+			other->pGeom->backprojectPointY(i, uu[j], vv[j], ymin, px, pz);
+			//ASTRA_DEBUG("%f %f (%f - %f)", px, pz, xmin, xmax);
+			if (pz < zmin) zmin = pz;
+			if (pz > zmax) zmax = pz;
+			other->pGeom->backprojectPointY(i, uu[j], vv[j], ymax, px, pz);
+			//ASTRA_DEBUG("%f %f (%f - %f)", px, pz, xmin, xmax);
+			if (pz < zmin) zmin = pz;
+			if (pz > zmax) zmax = pz;
+		}
+	}
+
+	//ASTRA_DEBUG("coord extent: %f - %f", zmin, zmax);
+
+	zmin = (zmin - pixz - pGeom->getWindowMinZ()) / pixz;
+	zmax = (zmax + pixz - pGeom->getWindowMinZ()) / pixz;
+
+	int _zmin = (int)floor(zmin);
+	int _zmax = (int)ceil(zmax);
+
+	//ASTRA_DEBUG("index extent: %d - %d", _zmin, _zmax);
+
+	if (_zmin < 0)
+		_zmin = 0;
+	if (_zmax > pGeom->getGridSliceCount())
+		_zmax = pGeom->getGridSliceCount();
+
+	if (_zmax <= _zmin) {
+		_zmin = _zmax = 0;
+	}
+	//ASTRA_DEBUG("adjusted extent: %d - %d", _zmin, _zmax);
+
+	CVolumePart *sub = new CVolumePart();
+	sub->subX = this->subX;
+	sub->subY = this->subY;
+	sub->subZ = this->subZ + _zmin;
+	sub->pData = pData;
+
+	if (_zmin == _zmax) {
+		sub->pGeom = 0;
+	} else {
+		sub->pGeom = new CVolumeGeometry3D(pGeom->getGridColCount(),
+		                                   pGeom->getGridRowCount(),
+		                                   _zmax - _zmin,
+		                                   pGeom->getWindowMinX(),
+		                                   pGeom->getWindowMinY(),
+		                                   pGeom->getWindowMinZ() + _zmin * pixz,
+		                                   pGeom->getWindowMaxX(),
+		                                   pGeom->getWindowMaxY(),
+		                                   pGeom->getWindowMinZ() + _zmax * pixz);
+	}
+
+	ASTRA_DEBUG("Reduce volume from %d - %d to %d - %d", this->subZ, this->subZ +  pGeom->getGridSliceCount(), this->subZ + _zmin, this->subZ + _zmax);
+
+	return sub;
+}
+
+
+
+static size_t ceildiv(size_t a, size_t b) {
+    return (a + b - 1) / b;
+}
+
+static size_t computeVerticalSplit(size_t maxBlock, int div, size_t sliceCount)
+{
+    size_t blockSize = maxBlock;
+    size_t blockCount = ceildiv(sliceCount, blockSize);
+
+    // Increase number of blocks to be divisible by div
+    size_t divCount = div * ceildiv(blockCount, div);
+
+    // If divCount is above sqrt(number of slices), then
+    // we can't guarantee divisibility by div, but let's try anyway
+    if (ceildiv(sliceCount, ceildiv(sliceCount, divCount)) % div == 0) {
+        blockCount = divCount;
+    } else {
+        // If divisibility isn't achievable, we may want to optimize
+        // differently.
+        // TODO: Figure out how to model and optimize this.
+    }
+
+    // Final adjustment to make blocks more evenly sized
+    // (This can't make the blocks larger)
+    blockSize = ceildiv(sliceCount, blockCount); 
+
+    ASTRA_DEBUG("%ld %ld -> %ld * %ld\n", sliceCount, maxBlock, blockCount, blockSize);
+
+    assert(blockSize <= maxBlock);
+    assert((divCount * divCount > sliceCount) || (blockCount % div) == 0);
+
+    return blockSize;
+}
+
+template<class V, class P>
+static V* getProjectionVectors(const P* geom);
+
+template<>
+SConeProjection* getProjectionVectors(const CConeProjectionGeometry3D* pProjGeom)
+{
+	return genConeProjections(pProjGeom->getProjectionCount(),
+	                          pProjGeom->getDetectorColCount(),
+	                          pProjGeom->getDetectorRowCount(),
+	                          pProjGeom->getOriginSourceDistance(),
+	                          pProjGeom->getOriginDetectorDistance(),
+	                          pProjGeom->getDetectorSpacingX(),
+	                          pProjGeom->getDetectorSpacingY(),
+	                          pProjGeom->getProjectionAngles());
+}
+
+template<>
+SConeProjection* getProjectionVectors(const CConeVecProjectionGeometry3D* pProjGeom)
+{
+	int nth = pProjGeom->getProjectionCount();
+
+	SConeProjection* pProjs = new SConeProjection[nth];
+	for (int i = 0; i < nth; ++i)
+		pProjs[i] = pProjGeom->getProjectionVectors()[i];
+
+	return pProjs;
+}
+
+template<>
+SPar3DProjection* getProjectionVectors(const CParallelProjectionGeometry3D* pProjGeom)
+{
+	return genPar3DProjections(pProjGeom->getProjectionCount(),
+	                           pProjGeom->getDetectorColCount(),
+	                           pProjGeom->getDetectorRowCount(),
+	                           pProjGeom->getDetectorSpacingX(),
+	                           pProjGeom->getDetectorSpacingY(),
+	                           pProjGeom->getProjectionAngles());
+}
+
+template<>
+SPar3DProjection* getProjectionVectors(const CParallelVecProjectionGeometry3D* pProjGeom)
+{
+	int nth = pProjGeom->getProjectionCount();
+
+	SPar3DProjection* pProjs = new SPar3DProjection[nth];
+	for (int i = 0; i < nth; ++i)
+		pProjs[i] = pProjGeom->getProjectionVectors()[i];
+
+	return pProjs;
+}
+
+
+template<class V>
+static void translateProjectionVectors(V* pProjs, int count, double dv)
+{
+	for (int i = 0; i < count; ++i) {
+		pProjs[i].fDetSX += dv * pProjs[i].fDetVX;
+		pProjs[i].fDetSY += dv * pProjs[i].fDetVY;
+		pProjs[i].fDetSZ += dv * pProjs[i].fDetVZ;
+	}
+}
+
+
+
+static CProjectionGeometry3D* getSubProjectionGeometry(const CProjectionGeometry3D* pProjGeom, int v, int size)
+{
+	// First convert to vectors, then translate, then convert into new object
+
+	const CConeProjectionGeometry3D* conegeom = dynamic_cast<const CConeProjectionGeometry3D*>(pProjGeom);
+	const CParallelProjectionGeometry3D* par3dgeom = dynamic_cast<const CParallelProjectionGeometry3D*>(pProjGeom);
+	const CParallelVecProjectionGeometry3D* parvec3dgeom = dynamic_cast<const CParallelVecProjectionGeometry3D*>(pProjGeom);
+	const CConeVecProjectionGeometry3D* conevec3dgeom = dynamic_cast<const CConeVecProjectionGeometry3D*>(pProjGeom);
+
+	if (conegeom || conevec3dgeom) {
+		SConeProjection* pConeProjs;
+		if (conegeom) {
+			pConeProjs = getProjectionVectors<SConeProjection>(conegeom);
+		} else {
+			pConeProjs = getProjectionVectors<SConeProjection>(conevec3dgeom);
+		}
+
+		translateProjectionVectors(pConeProjs, pProjGeom->getProjectionCount(), v);
+
+		CProjectionGeometry3D* ret = new CConeVecProjectionGeometry3D(pProjGeom->getProjectionCount(),
+		                                                              size,
+		                                                              pProjGeom->getDetectorColCount(),
+		                                                              pConeProjs);
+
+
+		delete[] pConeProjs;
+		return ret;
+	} else {
+		assert(par3dgeom || parvec3dgeom);
+		SPar3DProjection* pParProjs;
+		if (par3dgeom) {
+			pParProjs = getProjectionVectors<SPar3DProjection>(par3dgeom);
+		} else {
+			pParProjs = getProjectionVectors<SPar3DProjection>(parvec3dgeom);
+		}
+
+		translateProjectionVectors(pParProjs, pProjGeom->getProjectionCount(), v);
+
+		CProjectionGeometry3D* ret = new CParallelVecProjectionGeometry3D(pProjGeom->getProjectionCount(),
+		                                                                  size,
+		                                                                  pProjGeom->getDetectorColCount(),
+		                                                                  pParProjs);
+
+		delete[] pParProjs;
+		return ret;
+	}
+
+}
+
+
+
+// split self into sub-parts:
+// - each no bigger than maxSize
+// - number of sub-parts is divisible by div
+// - maybe all approximately the same size?
+CCompositeGeometryManager::TPartList CCompositeGeometryManager::CVolumePart::split(size_t maxSize, int div)
+{
+	TPartList ret;
+
+	if (true) {
+		// Split in vertical direction only at first, until we figure out
+		// a model for splitting in other directions
+
+		size_t sliceSize = ((size_t) pGeom->getGridColCount()) * pGeom->getGridRowCount();
+		int sliceCount = pGeom->getGridSliceCount();
+		size_t blockSize = computeVerticalSplit(maxSize / sliceSize, div, sliceCount);
+
+		int rem = sliceCount % blockSize;
+
+		ASTRA_DEBUG("From %d to %d step %d", -(rem / 2), sliceCount, blockSize);
+
+		for (int z = -(rem / 2); z < sliceCount; z += blockSize) {
+			int newsubZ = z;
+			if (newsubZ < 0) newsubZ = 0;
+			int endZ = z + blockSize;
+			if (endZ > sliceCount) endZ = sliceCount;
+			int size = endZ - newsubZ;
+
+			CVolumePart *sub = new CVolumePart();
+			sub->subX = this->subX;
+			sub->subY = this->subY;
+			sub->subZ = this->subZ + newsubZ;
+
+			ASTRA_DEBUG("VolumePart split %d %d %d -> %p", sub->subX, sub->subY, sub->subZ, (void*)sub);
+
+			double shift = pGeom->getPixelLengthZ() * newsubZ;
+
+			sub->pData = pData;
+			sub->pGeom = new CVolumeGeometry3D(pGeom->getGridColCount(),
+			                                   pGeom->getGridRowCount(),
+			                                   size,
+			                                   pGeom->getWindowMinX(),
+			                                   pGeom->getWindowMinY(),
+			                                   pGeom->getWindowMinZ() + shift,
+			                                   pGeom->getWindowMaxX(),
+			                                   pGeom->getWindowMaxY(),
+			                                   pGeom->getWindowMinZ() + shift + size * pGeom->getPixelLengthZ());
+
+			ret.push_back(boost::shared_ptr<CPart>(sub));
+		}
+	}
+
+	return ret;
+}
+
+CCompositeGeometryManager::CVolumePart* CCompositeGeometryManager::CVolumePart::clone() const
+{
+	return new CVolumePart(*this);
+}
+
+CCompositeGeometryManager::CProjectionPart::CProjectionPart(const CProjectionPart& other)
+ : CPart(other)
+{
+	pGeom = other.pGeom->clone();
+}
+
+CCompositeGeometryManager::CProjectionPart::~CProjectionPart()
+{
+	delete pGeom;
+}
+
+void CCompositeGeometryManager::CProjectionPart::getDims(size_t &x, size_t &y, size_t &z)
+{
+	if (!pGeom) {
+		x = y = z = 0;
+		return;
+	}
+
+	x = pGeom->getDetectorColCount();
+	y = pGeom->getProjectionCount();
+	z = pGeom->getDetectorRowCount();
+}
+
+
+CCompositeGeometryManager::CPart* CCompositeGeometryManager::CProjectionPart::reduce(const CPart *_other)
+{
+	const CVolumePart *other = dynamic_cast<const CVolumePart *>(_other);
+	assert(other);
+
+	double vmin_g, vmax_g;
+
+	// reduce self to only cover intersection with projection of VolumePart
+	// (Project corners of volume, take bounding box)
+
+	for (int i = 0; i < pGeom->getProjectionCount(); ++i) {
+
+		double vol_u[8];
+		double vol_v[8];
+
+		double pixx = other->pGeom->getPixelLengthX();
+		double pixy = other->pGeom->getPixelLengthY();
+		double pixz = other->pGeom->getPixelLengthZ();
+
+		// TODO: Is 0.5 sufficient?
+		double xmin = other->pGeom->getWindowMinX() - 0.5 * pixx;
+		double xmax = other->pGeom->getWindowMaxX() + 0.5 * pixx;
+		double ymin = other->pGeom->getWindowMinY() - 0.5 * pixy;
+		double ymax = other->pGeom->getWindowMaxY() + 0.5 * pixy;
+		double zmin = other->pGeom->getWindowMinZ() - 0.5 * pixz;
+		double zmax = other->pGeom->getWindowMaxZ() + 0.5 * pixz;
+
+		pGeom->projectPoint(xmin, ymin, zmin, i, vol_u[0], vol_v[0]);
+		pGeom->projectPoint(xmin, ymin, zmax, i, vol_u[1], vol_v[1]);
+		pGeom->projectPoint(xmin, ymax, zmin, i, vol_u[2], vol_v[2]);
+		pGeom->projectPoint(xmin, ymax, zmax, i, vol_u[3], vol_v[3]);
+		pGeom->projectPoint(xmax, ymin, zmin, i, vol_u[4], vol_v[4]);
+		pGeom->projectPoint(xmax, ymin, zmax, i, vol_u[5], vol_v[5]);
+		pGeom->projectPoint(xmax, ymax, zmin, i, vol_u[6], vol_v[6]);
+		pGeom->projectPoint(xmax, ymax, zmax, i, vol_u[7], vol_v[7]);
+
+		double vmin = vol_v[0];
+		double vmax = vol_v[0];
+
+		for (int j = 1; j < 8; ++j) {
+			if (vol_v[j] < vmin)
+				vmin = vol_v[j];
+			if (vol_v[j] > vmax)
+				vmax = vol_v[j];
+		}
+
+		if (i == 0 || vmin < vmin_g)
+			vmin_g = vmin;
+		if (i == 0 || vmax > vmax_g)
+			vmax_g = vmax;
+	}
+
+	// fprintf(stderr, "v extent: %f %f\n", vmin_g, vmax_g);
+
+	int _vmin = (int)floor(vmin_g - 1.0f);
+	int _vmax = (int)ceil(vmax_g + 1.0f);
+	if (_vmin < 0)
+		_vmin = 0;
+	if (_vmax > pGeom->getDetectorRowCount())
+		_vmax = pGeom->getDetectorRowCount();
+
+	if (_vmin >= _vmax) {
+		_vmin = _vmax = 0;
+	}
+
+	CProjectionPart *sub = new CProjectionPart();
+	sub->subX = this->subX;
+	sub->subY = this->subY;
+	sub->subZ = this->subZ + _vmin;
+
+	sub->pData = pData;
+
+	if (_vmin == _vmax) {
+		sub->pGeom = 0;
+	} else {
+		sub->pGeom = getSubProjectionGeometry(pGeom, _vmin, _vmax - _vmin);
+	}
+
+	ASTRA_DEBUG("Reduce projection from %d - %d to %d - %d", this->subZ, this->subZ + pGeom->getDetectorRowCount(), this->subZ + _vmin, this->subZ + _vmax);
+
+	return sub;
+}
+
+
+CCompositeGeometryManager::TPartList CCompositeGeometryManager::CProjectionPart::split(size_t maxSize, int div)
+{
+	TPartList ret;
+
+	if (true) {
+		// Split in vertical direction only at first, until we figure out
+		// a model for splitting in other directions
+
+		size_t sliceSize = ((size_t) pGeom->getDetectorColCount()) * pGeom->getProjectionCount();
+		int sliceCount = pGeom->getDetectorRowCount();
+		size_t blockSize = computeVerticalSplit(maxSize / sliceSize, div, sliceCount);
+
+		int rem = sliceCount % blockSize;
+
+		for (int z = -(rem / 2); z < sliceCount; z += blockSize) {
+			int newsubZ = z;
+			if (newsubZ < 0) newsubZ = 0;
+			int endZ = z + blockSize;
+			if (endZ > sliceCount) endZ = sliceCount;
+			int size = endZ - newsubZ;
+
+			CProjectionPart *sub = new CProjectionPart();
+			sub->subX = this->subX;
+			sub->subY = this->subY;
+			sub->subZ = this->subZ + newsubZ;
+
+			ASTRA_DEBUG("ProjectionPart split %d %d %d -> %p", sub->subX, sub->subY, sub->subZ, (void*)sub);
+
+			sub->pData = pData;
+
+			sub->pGeom = getSubProjectionGeometry(pGeom, newsubZ, size);
+
+			ret.push_back(boost::shared_ptr<CPart>(sub));
+		}
+	}
+
+	return ret;
+
+}
+
+CCompositeGeometryManager::CProjectionPart* CCompositeGeometryManager::CProjectionPart::clone() const
+{
+	return new CProjectionPart(*this);
+}
+
+
+bool CCompositeGeometryManager::doFP(CProjector3D *pProjector, CFloat32VolumeData3DMemory *pVolData,
+                                     CFloat32ProjectionData3DMemory *pProjData)
+{
+	ASTRA_DEBUG("CCompositeGeometryManager::doFP");
+	// Create single job for FP
+	// Run result
+
+	CVolumePart *input = new CVolumePart();
+	input->pData = pVolData;
+	input->subX = 0;
+	input->subY = 0;
+	input->subZ = 0;
+	input->pGeom = pVolData->getGeometry()->clone();
+	ASTRA_DEBUG("Main FP VolumePart -> %p", (void*)input);
+
+	CProjectionPart *output = new CProjectionPart();
+	output->pData = pProjData;
+	output->subX = 0;
+	output->subY = 0;
+	output->subZ = 0;
+	output->pGeom = pProjData->getGeometry()->clone();
+	ASTRA_DEBUG("Main FP ProjectionPart -> %p", (void*)output);
+
+	SJob FP;
+	FP.pInput = boost::shared_ptr<CPart>(input);
+	FP.pOutput = boost::shared_ptr<CPart>(output);
+	FP.pProjector = pProjector;
+	FP.eType = SJob::JOB_FP;
+	FP.eMode = SJob::MODE_SET;
+
+	TJobList L;
+	L.push_back(FP);
+
+	return doJobs(L);
+}
+
+bool CCompositeGeometryManager::doBP(CProjector3D *pProjector, CFloat32VolumeData3DMemory *pVolData,
+                                     CFloat32ProjectionData3DMemory *pProjData)
+{
+	ASTRA_DEBUG("CCompositeGeometryManager::doBP");
+	// Create single job for BP
+	// Run result
+
+	CProjectionPart *input = new CProjectionPart();
+	input->pData = pProjData;
+	input->subX = 0;
+	input->subY = 0;
+	input->subZ = 0;
+	input->pGeom = pProjData->getGeometry()->clone();
+
+	CVolumePart *output = new CVolumePart();
+	output->pData = pVolData;
+	output->subX = 0;
+	output->subY = 0;
+	output->subZ = 0;
+	output->pGeom = pVolData->getGeometry()->clone();
+
+	SJob BP;
+	BP.pInput = boost::shared_ptr<CPart>(input);
+	BP.pOutput = boost::shared_ptr<CPart>(output);
+	BP.pProjector = pProjector;
+	BP.eType = SJob::JOB_BP;
+	BP.eMode = SJob::MODE_SET;
+
+	TJobList L;
+	L.push_back(BP);
+
+	return doJobs(L);
+}
+
+
+
+bool CCompositeGeometryManager::doJobs(TJobList &jobs)
+{
+	ASTRA_DEBUG("CCompositeGeometryManager::doJobs");
+
+	// Sort job list into job set by output part
+	TJobSet jobset;
+
+	for (TJobList::iterator i = jobs.begin(); i != jobs.end(); ++i) {
+		jobset[i->pOutput.get()].push_back(*i);
+	}
+
+	size_t maxSize = astraCUDA3d::availableGPUMemory();
+	if (maxSize == 0) {
+		ASTRA_WARN("Unable to get available GPU memory. Defaulting to 1GB.");
+		maxSize = 1024 * 1024 * 1024;
+	} else {
+		ASTRA_DEBUG("Detected %lu bytes of GPU memory", maxSize);
+	}
+	maxSize = (maxSize * 9) / 10;
+
+	maxSize /= sizeof(float);
+	int div = 1;
+
+	// TODO: Multi-GPU support
+
+	// Split jobs to fit
+	TJobSet split;
+	splitJobs(jobset, maxSize, div, split);
+	jobset.clear();
+
+	// Run jobs
+	
+	for (TJobSet::iterator iter = split.begin(); iter != split.end(); ++iter) {
+
+		CPart* output = iter->first;
+		TJobList& L = iter->second;
+
+		assert(!L.empty());
+
+		bool zero = L.begin()->eMode == SJob::MODE_SET;
+
+		size_t outx, outy, outz;
+		output->getDims(outx, outy, outz);
+
+		if (L.begin()->eType == SJob::JOB_NOP) {
+			// just zero output?
+			if (zero) {
+				for (size_t z = 0; z < outz; ++z) {
+					for (size_t y = 0; y < outy; ++y) {
+						float* ptr = output->pData->getData();
+						ptr += (z + output->subX) * (size_t)output->pData->getHeight() * (size_t)output->pData->getWidth();
+						ptr += (y + output->subY) * (size_t)output->pData->getWidth();
+						ptr += output->subX;
+						memset(ptr, 0, sizeof(float) * outx);
+					}
+				}
+			}
+			continue;
+		}
+
+
+		astraCUDA3d::SSubDimensions3D dstdims;
+		dstdims.nx = output->pData->getWidth();
+		dstdims.pitch = dstdims.nx;
+		dstdims.ny = output->pData->getHeight();
+		dstdims.nz = output->pData->getDepth();
+		dstdims.subnx = outx;
+		dstdims.subny = outy;
+		dstdims.subnz = outz;
+		ASTRA_DEBUG("dstdims: %d,%d,%d in %d,%d,%d", dstdims.subnx, dstdims.subny, dstdims.subnz, dstdims.nx, dstdims.ny, dstdims.nz);
+		dstdims.subx = output->subX;
+		dstdims.suby = output->subY;
+		dstdims.subz = output->subZ;
+		float *dst = output->pData->getData();
+
+		astraCUDA3d::MemHandle3D outputMem = astraCUDA3d::allocateGPUMemory(outx, outy, outz, zero ? astraCUDA3d::INIT_ZERO : astraCUDA3d::INIT_NO);
+		bool ok = outputMem;
+
+		for (TJobList::iterator i = L.begin(); i != L.end(); ++i) {
+			SJob &j = *i;
+
+			assert(j.pInput);
+
+			CCudaProjector3D *projector = dynamic_cast<CCudaProjector3D*>(j.pProjector);
+			Cuda3DProjectionKernel projKernel = ker3d_default;
+			int detectorSuperSampling = 1;
+			int voxelSuperSampling = 1;
+			if (projector) {
+				projKernel = projector->getProjectionKernel();
+				detectorSuperSampling = projector->getDetectorSuperSampling();
+				voxelSuperSampling = projector->getVoxelSuperSampling();
+			}
+
+			size_t inx, iny, inz;
+			j.pInput->getDims(inx, iny, inz);
+			astraCUDA3d::MemHandle3D inputMem = astraCUDA3d::allocateGPUMemory(inx, iny, inz, astraCUDA3d::INIT_NO);
+
+			astraCUDA3d::SSubDimensions3D srcdims;
+			srcdims.nx = j.pInput->pData->getWidth();
+			srcdims.pitch = srcdims.nx;
+			srcdims.ny = j.pInput->pData->getHeight();
+			srcdims.nz = j.pInput->pData->getDepth();
+			srcdims.subnx = inx;
+			srcdims.subny = iny;
+			srcdims.subnz = inz;
+			srcdims.subx = j.pInput->subX;
+			srcdims.suby = j.pInput->subY;
+			srcdims.subz = j.pInput->subZ;
+			const float *src = j.pInput->pData->getDataConst();
+
+			ok = astraCUDA3d::copyToGPUMemory(src, inputMem, srcdims);
+			if (!ok) ASTRA_ERROR("Error copying input data to GPU");
+
+			if (j.eType == SJob::JOB_FP) {
+				assert(dynamic_cast<CVolumePart*>(j.pInput.get()));
+				assert(dynamic_cast<CProjectionPart*>(j.pOutput.get()));
+
+				ASTRA_DEBUG("CCompositeGeometryManager::doJobs: doing FP");
+
+				ok = astraCUDA3d::FP(((CProjectionPart*)j.pOutput.get())->pGeom, outputMem, ((CVolumePart*)j.pInput.get())->pGeom, inputMem, detectorSuperSampling, projKernel);
+				if (!ok) ASTRA_ERROR("Error performing sub-FP");
+				ASTRA_DEBUG("CCompositeGeometryManager::doJobs: FP done");
+			} else if (j.eType == SJob::JOB_BP) {
+				assert(dynamic_cast<CVolumePart*>(j.pOutput.get()));
+				assert(dynamic_cast<CProjectionPart*>(j.pInput.get()));
+
+				ASTRA_DEBUG("CCompositeGeometryManager::doJobs: doing BP");
+
+				ok = astraCUDA3d::BP(((CProjectionPart*)j.pInput.get())->pGeom, inputMem, ((CVolumePart*)j.pOutput.get())->pGeom, outputMem, voxelSuperSampling);
+				if (!ok) ASTRA_ERROR("Error performing sub-BP");
+				ASTRA_DEBUG("CCompositeGeometryManager::doJobs: BP done");
+			} else {
+				assert(false);
+			}
+
+			ok = astraCUDA3d::freeGPUMemory(inputMem);
+			if (!ok) ASTRA_ERROR("Error freeing GPU memory");
+
+		}
+
+		ok = astraCUDA3d::copyFromGPUMemory(dst, outputMem, dstdims);
+		if (!ok) ASTRA_ERROR("Error copying output data from GPU");
+		
+		ok = astraCUDA3d::freeGPUMemory(outputMem);
+		if (!ok) ASTRA_ERROR("Error freeing GPU memory");
+	}
+
+	return true;
+}
+
+
+
+}
+
+#endif
diff --git a/src/ConeProjectionGeometry3D.cpp b/src/ConeProjectionGeometry3D.cpp
index dd22eba..18f0f8a 100644
--- a/src/ConeProjectionGeometry3D.cpp
+++ b/src/ConeProjectionGeometry3D.cpp
@@ -29,6 +29,7 @@ $Id$
 #include "astra/ConeProjectionGeometry3D.h"
 
 #include "astra/Logging.h"
+#include "astra/GeometryUtil3D.h"
 
 #include <boost/lexical_cast.hpp>
 #include <cstring>
@@ -230,14 +231,14 @@ CVector3D CConeProjectionGeometry3D::getProjectionDirection(int _iProjectionInde
 	return ret;
 }
 
-void CConeProjectionGeometry3D::projectPoint(float32 fX, float32 fY, float32 fZ,
-                                                 int iAngleIndex,
-                                                 float32 &fU, float32 &fV) const
+void CConeProjectionGeometry3D::projectPoint(double fX, double fY, double fZ,
+                                             int iAngleIndex,
+                                             double &fU, double &fV) const
 {
 	ASTRA_ASSERT(iAngleIndex >= 0);
 	ASTRA_ASSERT(iAngleIndex < m_iProjectionAngleCount);
 
-	float alpha = m_pfProjectionAngles[iAngleIndex];
+	double alpha = m_pfProjectionAngles[iAngleIndex];
 
 	// Project point onto optical axis
 
@@ -245,14 +246,14 @@ void CConeProjectionGeometry3D::projectPoint(float32 fX, float32 fY, float32 fZ,
 	// Vector source->origin is (-sin(alpha), cos(alpha))
 
 	// Distance from source, projected on optical axis
-	float fD = -sin(alpha) * fX + cos(alpha) * fY + m_fOriginSourceDistance;
+	double fD = -sin(alpha) * fX + cos(alpha) * fY + m_fOriginSourceDistance;
 
 	// Scale fZ to detector plane
 	fV = detectorOffsetYToRowIndexFloat( (fZ * (m_fOriginSourceDistance + m_fOriginDetectorDistance)) / fD );
 
 
 	// Orthogonal distance in XY-plane to optical axis
-	float fS = cos(alpha) * fX + sin(alpha) * fY;
+	double fS = cos(alpha) * fX + sin(alpha) * fY;
 
 	// Scale fS to detector plane
 	fU = detectorOffsetXToColIndexFloat( (fS * (m_fOriginSourceDistance + m_fOriginDetectorDistance)) / fD );
@@ -261,5 +262,84 @@ void CConeProjectionGeometry3D::projectPoint(float32 fX, float32 fY, float32 fZ,
 
 }
 
+void CConeProjectionGeometry3D::backprojectPointX(int iAngleIndex, double fU, double fV,
+	                               double fX, double &fY, double &fZ) const
+{
+	ASTRA_ASSERT(iAngleIndex >= 0);
+	ASTRA_ASSERT(iAngleIndex < m_iProjectionAngleCount);
+
+	SConeProjection *projs = genConeProjections(1, m_iDetectorColCount, m_iDetectorRowCount,
+	                                           m_fOriginSourceDistance,
+	                                           m_fOriginDetectorDistance,
+	                                           m_fDetectorSpacingX, m_fDetectorSpacingY,
+	                                           &m_pfProjectionAngles[iAngleIndex]);
+
+	SConeProjection &proj = projs[0];
+
+	double px = proj.fDetSX + fU * proj.fDetUX + fV * proj.fDetVX;
+	double py = proj.fDetSY + fU * proj.fDetUY + fV * proj.fDetVY;
+	double pz = proj.fDetSZ + fU * proj.fDetUZ + fV * proj.fDetVZ;
+
+	double a = (fX - proj.fSrcX) / (px - proj.fSrcX);
+
+	fY = proj.fSrcY + a * (py - proj.fSrcY);
+	fZ = proj.fSrcZ + a * (pz - proj.fSrcZ);
+
+	delete[] projs;
+}
+
+void CConeProjectionGeometry3D::backprojectPointY(int iAngleIndex, double fU, double fV,
+	                               double fY, double &fX, double &fZ) const
+{
+	ASTRA_ASSERT(iAngleIndex >= 0);
+	ASTRA_ASSERT(iAngleIndex < m_iProjectionAngleCount);
+
+	SConeProjection *projs = genConeProjections(1, m_iDetectorColCount, m_iDetectorRowCount,
+	                                           m_fOriginSourceDistance,
+	                                           m_fOriginDetectorDistance,
+	                                           m_fDetectorSpacingX, m_fDetectorSpacingY,
+	                                           &m_pfProjectionAngles[iAngleIndex]);
+
+	SConeProjection &proj = projs[0];
+
+	double px = proj.fDetSX + fU * proj.fDetUX + fV * proj.fDetVX;
+	double py = proj.fDetSY + fU * proj.fDetUY + fV * proj.fDetVY;
+	double pz = proj.fDetSZ + fU * proj.fDetUZ + fV * proj.fDetVZ;
+
+	double a = (fY - proj.fSrcY) / (py - proj.fSrcY);
+
+	fX = proj.fSrcX + a * (px - proj.fSrcX);
+	fZ = proj.fSrcZ + a * (pz - proj.fSrcZ);
+
+	delete[] projs;
+}
+
+void CConeProjectionGeometry3D::backprojectPointZ(int iAngleIndex, double fU, double fV,
+	                               double fZ, double &fX, double &fY) const
+{
+	ASTRA_ASSERT(iAngleIndex >= 0);
+	ASTRA_ASSERT(iAngleIndex < m_iProjectionAngleCount);
+
+	SConeProjection *projs = genConeProjections(1, m_iDetectorColCount, m_iDetectorRowCount,
+	                                           m_fOriginSourceDistance,
+	                                           m_fOriginDetectorDistance,
+	                                           m_fDetectorSpacingX, m_fDetectorSpacingY,
+	                                           &m_pfProjectionAngles[iAngleIndex]);
+
+	SConeProjection &proj = projs[0];
+
+	double px = proj.fDetSX + fU * proj.fDetUX + fV * proj.fDetVX;
+	double py = proj.fDetSY + fU * proj.fDetUY + fV * proj.fDetVY;
+	double pz = proj.fDetSZ + fU * proj.fDetUZ + fV * proj.fDetVZ;
+
+	double a = (fZ - proj.fSrcZ) / (pz - proj.fSrcZ);
+
+	fX = proj.fSrcX + a * (px - proj.fSrcX);
+	fY = proj.fSrcY + a * (py - proj.fSrcY);
+
+	delete[] projs;
+}
+
+
 
 } // end namespace astra
diff --git a/src/ConeVecProjectionGeometry3D.cpp b/src/ConeVecProjectionGeometry3D.cpp
index 47ed630..86e3bd6 100644
--- a/src/ConeVecProjectionGeometry3D.cpp
+++ b/src/ConeVecProjectionGeometry3D.cpp
@@ -241,9 +241,9 @@ CVector3D CConeVecProjectionGeometry3D::getProjectionDirection(int _iProjectionI
 	return CVector3D(p.fDetSX + (u+0.5)*p.fDetUX + (v+0.5)*p.fDetVX - p.fSrcX, p.fDetSY + (u+0.5)*p.fDetUY + (v+0.5)*p.fDetVY - p.fSrcY, p.fDetSZ + (u+0.5)*p.fDetUZ + (v+0.5)*p.fDetVZ - p.fSrcZ);
 }
 
-void CConeVecProjectionGeometry3D::projectPoint(float32 fX, float32 fY, float32 fZ,
+void CConeVecProjectionGeometry3D::projectPoint(double fX, double fY, double fZ,
                                                  int iAngleIndex,
-                                                 float32 &fU, float32 &fV) const
+                                                 double &fU, double &fV) const
 {
 	ASTRA_ASSERT(iAngleIndex >= 0);
 	ASTRA_ASSERT(iAngleIndex < m_iProjectionAngleCount);
@@ -262,6 +262,60 @@ void CConeVecProjectionGeometry3D::projectPoint(float32 fX, float32 fY, float32
 }
 
 
+void CConeVecProjectionGeometry3D::backprojectPointX(int iAngleIndex, double fU, double fV,
+	                               double fX, double &fY, double &fZ) const
+{
+	ASTRA_ASSERT(iAngleIndex >= 0);
+	ASTRA_ASSERT(iAngleIndex < m_iProjectionAngleCount);
+
+	SConeProjection &proj = m_pProjectionAngles[iAngleIndex];
+
+	double px = proj.fDetSX + fU * proj.fDetUX + fV * proj.fDetVX;
+	double py = proj.fDetSY + fU * proj.fDetUY + fV * proj.fDetVY;
+	double pz = proj.fDetSZ + fU * proj.fDetUZ + fV * proj.fDetVZ;
+
+	double a = (fX - proj.fSrcX) / (px - proj.fSrcX);
+
+	fY = proj.fSrcY + a * (py - proj.fSrcY);
+	fZ = proj.fSrcZ + a * (pz - proj.fSrcZ);
+}
+
+void CConeVecProjectionGeometry3D::backprojectPointY(int iAngleIndex, double fU, double fV,
+	                               double fY, double &fX, double &fZ) const
+{
+	ASTRA_ASSERT(iAngleIndex >= 0);
+	ASTRA_ASSERT(iAngleIndex < m_iProjectionAngleCount);
+
+	SConeProjection &proj = m_pProjectionAngles[iAngleIndex];
+
+	double px = proj.fDetSX + fU * proj.fDetUX + fV * proj.fDetVX;
+	double py = proj.fDetSY + fU * proj.fDetUY + fV * proj.fDetVY;
+	double pz = proj.fDetSZ + fU * proj.fDetUZ + fV * proj.fDetVZ;
+
+	double a = (fY - proj.fSrcY) / (py - proj.fSrcY);
+
+	fX = proj.fSrcX + a * (px - proj.fSrcX);
+	fZ = proj.fSrcZ + a * (pz - proj.fSrcZ);
+}
+
+void CConeVecProjectionGeometry3D::backprojectPointZ(int iAngleIndex, double fU, double fV,
+	                               double fZ, double &fX, double &fY) const
+{
+	ASTRA_ASSERT(iAngleIndex >= 0);
+	ASTRA_ASSERT(iAngleIndex < m_iProjectionAngleCount);
+
+	SConeProjection &proj = m_pProjectionAngles[iAngleIndex];
+
+	double px = proj.fDetSX + fU * proj.fDetUX + fV * proj.fDetVX;
+	double py = proj.fDetSY + fU * proj.fDetUY + fV * proj.fDetVY;
+	double pz = proj.fDetSZ + fU * proj.fDetUZ + fV * proj.fDetVZ;
+
+	double a = (fZ - proj.fSrcZ) / (pz - proj.fSrcZ);
+
+	fX = proj.fSrcX + a * (px - proj.fSrcX);
+	fY = proj.fSrcY + a * (py - proj.fSrcY);
+}
+
 //----------------------------------------------------------------------------------------
 
 bool CConeVecProjectionGeometry3D::_check()
diff --git a/src/CudaBackProjectionAlgorithm3D.cpp b/src/CudaBackProjectionAlgorithm3D.cpp
index 8cf4c3b..ce8e111 100644
--- a/src/CudaBackProjectionAlgorithm3D.cpp
+++ b/src/CudaBackProjectionAlgorithm3D.cpp
@@ -37,6 +37,7 @@ $Id$
 #include "astra/ParallelProjectionGeometry3D.h"
 #include "astra/ParallelVecProjectionGeometry3D.h"
 #include "astra/ConeVecProjectionGeometry3D.h"
+#include "astra/CompositeGeometryManager.h"
 
 #include "astra/Logging.h"
 
@@ -203,9 +204,16 @@ void CCudaBackProjectionAlgorithm3D::run(int _iNrIterations)
 		                         &volgeom, projgeom,
 		                         m_iGPUIndex, m_iVoxelSuperSampling);
 	} else {
+
+#if 1
+		CCompositeGeometryManager cgm;
+
+		cgm.doBP(m_pProjector, pReconMem, pSinoMem);
+#else
 		astraCudaBP(pReconMem->getData(), pSinoMem->getDataConst(),
 		            &volgeom, projgeom,
 		            m_iGPUIndex, m_iVoxelSuperSampling);
+#endif
 	}
 
 }
diff --git a/src/CudaForwardProjectionAlgorithm3D.cpp b/src/CudaForwardProjectionAlgorithm3D.cpp
index e57e077..209f5a5 100644
--- a/src/CudaForwardProjectionAlgorithm3D.cpp
+++ b/src/CudaForwardProjectionAlgorithm3D.cpp
@@ -40,6 +40,8 @@ $Id$
 #include "astra/ParallelVecProjectionGeometry3D.h"
 #include "astra/ConeVecProjectionGeometry3D.h"
 
+#include "astra/CompositeGeometryManager.h"
+
 #include "astra/Logging.h"
 
 #include "../cuda/3d/astra3d.h"
@@ -263,6 +265,12 @@ void CCudaForwardProjectionAlgorithm3D::run(int)
 	// check initialized
 	assert(m_bIsInitialized);
 
+#if 1
+	CCompositeGeometryManager cgm;
+
+	cgm.doFP(m_pProjector, m_pVolume, m_pProjections);
+
+#else
 	const CProjectionGeometry3D* projgeom = m_pProjections->getGeometry();
 	const CVolumeGeometry3D& volgeom = *m_pVolume->getGeometry();
 
@@ -294,6 +302,7 @@ void CCudaForwardProjectionAlgorithm3D::run(int)
 	astraCudaFP(m_pVolume->getDataConst(), m_pProjections->getData(),
 	            &volgeom, projgeom,
 	            m_iGPUIndex, m_iDetectorSuperSampling, projKernel);
+#endif
 }
 
 
diff --git a/src/GeometryUtil3D.cpp b/src/GeometryUtil3D.cpp
index 52dd5a9..c6bfd8b 100644
--- a/src/GeometryUtil3D.cpp
+++ b/src/GeometryUtil3D.cpp
@@ -28,8 +28,96 @@ $Id$
 
 #include "astra/GeometryUtil3D.h"
 
+#include <cmath>
+
 namespace astra {
 
+
+SConeProjection* genConeProjections(unsigned int iProjAngles,
+                                    unsigned int iProjU,
+                                    unsigned int iProjV,
+                                    double fOriginSourceDistance,
+                                    double fOriginDetectorDistance,
+                                    double fDetUSize,
+                                    double fDetVSize,
+                                    const float *pfAngles)
+{
+	SConeProjection base;
+	base.fSrcX = 0.0f;
+	base.fSrcY = -fOriginSourceDistance;
+	base.fSrcZ = 0.0f;
+
+	base.fDetSX = iProjU * fDetUSize * -0.5f;
+	base.fDetSY = fOriginDetectorDistance;
+	base.fDetSZ = iProjV * fDetVSize * -0.5f;
+
+	base.fDetUX = fDetUSize;
+	base.fDetUY = 0.0f;
+	base.fDetUZ = 0.0f;
+
+	base.fDetVX = 0.0f;
+	base.fDetVY = 0.0f;
+	base.fDetVZ = fDetVSize;
+
+	SConeProjection* p = new SConeProjection[iProjAngles];
+
+#define ROTATE0(name,i,alpha) do { p[i].f##name##X = base.f##name##X * cos(alpha) - base.f##name##Y * sin(alpha); p[i].f##name##Y = base.f##name##X * sin(alpha) + base.f##name##Y * cos(alpha); p[i].f##name##Z = base.f##name##Z; } while(0)
+
+	for (unsigned int i = 0; i < iProjAngles; ++i) {
+		ROTATE0(Src, i, pfAngles[i]);
+		ROTATE0(DetS, i, pfAngles[i]);
+		ROTATE0(DetU, i, pfAngles[i]);
+		ROTATE0(DetV, i, pfAngles[i]);
+	}
+
+#undef ROTATE0
+
+	return p;
+}
+
+SPar3DProjection* genPar3DProjections(unsigned int iProjAngles,
+                                      unsigned int iProjU,
+                                      unsigned int iProjV,
+                                      double fDetUSize,
+                                      double fDetVSize,
+                                      const float *pfAngles)
+{
+	SPar3DProjection base;
+	base.fRayX = 0.0f;
+	base.fRayY = 1.0f;
+	base.fRayZ = 0.0f;
+
+	base.fDetSX = iProjU * fDetUSize * -0.5f;
+	base.fDetSY = 0.0f;
+	base.fDetSZ = iProjV * fDetVSize * -0.5f;
+
+	base.fDetUX = fDetUSize;
+	base.fDetUY = 0.0f;
+	base.fDetUZ = 0.0f;
+
+	base.fDetVX = 0.0f;
+	base.fDetVY = 0.0f;
+	base.fDetVZ = fDetVSize;
+
+	SPar3DProjection* p = new SPar3DProjection[iProjAngles];
+
+#define ROTATE0(name,i,alpha) do { p[i].f##name##X = base.f##name##X * cos(alpha) - base.f##name##Y * sin(alpha); p[i].f##name##Y = base.f##name##X * sin(alpha) + base.f##name##Y * cos(alpha); p[i].f##name##Z = base.f##name##Z; } while(0)
+
+	for (unsigned int i = 0; i < iProjAngles; ++i) {
+		ROTATE0(Ray, i, pfAngles[i]);
+		ROTATE0(DetS, i, pfAngles[i]);
+		ROTATE0(DetU, i, pfAngles[i]);
+		ROTATE0(DetV, i, pfAngles[i]);
+	}
+
+#undef ROTATE0
+
+	return p;
+}
+
+
+
+
 // (See declaration in header for (mathematical) description of these functions)
 
 
@@ -72,4 +160,88 @@ void computeBP_UV_Coeffs(const SConeProjection& proj, double &fUX, double &fUY,
 }
 
 
+// TODO: Handle cases of rays parallel to coordinate planes
+
+void backprojectPointX(const SPar3DProjection& proj, double fU, double fV,
+                       double fX, double &fY, double &fZ)
+{
+	double px = proj.fDetSX + fU * proj.fDetUX + fV * proj.fDetVX;
+	double py = proj.fDetSY + fU * proj.fDetUY + fV * proj.fDetVY;
+	double pz = proj.fDetSZ + fU * proj.fDetUZ + fV * proj.fDetVZ;
+
+	double a = (fX - px) / proj.fRayX;
+
+	fY = py + a * proj.fRayY;
+	fZ = pz + a * proj.fRayZ;
+}
+
+void backprojectPointY(const SPar3DProjection& proj, double fU, double fV,
+                       double fY, double &fX, double &fZ)
+{
+	double px = proj.fDetSX + fU * proj.fDetUX + fV * proj.fDetVX;
+	double py = proj.fDetSY + fU * proj.fDetUY + fV * proj.fDetVY;
+	double pz = proj.fDetSZ + fU * proj.fDetUZ + fV * proj.fDetVZ;
+
+	double a = (fY - py) / proj.fRayY;
+
+	fX = px + a * proj.fRayX;
+	fZ = pz + a * proj.fRayZ;
+
+}
+
+void backprojectPointZ(const SPar3DProjection& proj, double fU, double fV,
+                       double fZ, double &fX, double &fY)
+{
+	double px = proj.fDetSX + fU * proj.fDetUX + fV * proj.fDetVX;
+	double py = proj.fDetSY + fU * proj.fDetUY + fV * proj.fDetVY;
+	double pz = proj.fDetSZ + fU * proj.fDetUZ + fV * proj.fDetVZ;
+
+	double a = (fZ - pz) / proj.fRayZ;
+
+	fX = px + a * proj.fRayX;
+	fY = py + a * proj.fRayY;
+}
+
+
+
+void backprojectPointX(const SConeProjection& proj, double fU, double fV,
+                       double fX, double &fY, double &fZ)
+{
+	double px = proj.fDetSX + fU * proj.fDetUX + fV * proj.fDetVX;
+	double py = proj.fDetSY + fU * proj.fDetUY + fV * proj.fDetVY;
+	double pz = proj.fDetSZ + fU * proj.fDetUZ + fV * proj.fDetVZ;
+
+	double a = (fX - proj.fSrcX) / (px - proj.fSrcX);
+
+	fY = proj.fSrcY + a * (py - proj.fSrcY);
+	fZ = proj.fSrcZ + a * (pz - proj.fSrcZ);
+}
+
+void backprojectPointY(const SConeProjection& proj, double fU, double fV,
+                       double fY, double &fX, double &fZ)
+{
+	double px = proj.fDetSX + fU * proj.fDetUX + fV * proj.fDetVX;
+	double py = proj.fDetSY + fU * proj.fDetUY + fV * proj.fDetVY;
+	double pz = proj.fDetSZ + fU * proj.fDetUZ + fV * proj.fDetVZ;
+
+	double a = (fY - proj.fSrcY) / (py - proj.fSrcY);
+
+	fX = proj.fSrcX + a * (px - proj.fSrcX);
+	fZ = proj.fSrcZ + a * (pz - proj.fSrcZ);
+}
+
+void backprojectPointZ(const SConeProjection& proj, double fU, double fV,
+                       double fZ, double &fX, double &fY)
+{
+	double px = proj.fDetSX + fU * proj.fDetUX + fV * proj.fDetVX;
+	double py = proj.fDetSY + fU * proj.fDetUY + fV * proj.fDetVY;
+	double pz = proj.fDetSZ + fU * proj.fDetUZ + fV * proj.fDetVZ;
+
+	double a = (fZ - proj.fSrcZ) / (pz - proj.fSrcZ);
+
+	fX = proj.fSrcX + a * (px - proj.fSrcX);
+	fY = proj.fSrcY + a * (py - proj.fSrcY);
+}
+
+
 }
diff --git a/src/ParallelProjectionGeometry3D.cpp b/src/ParallelProjectionGeometry3D.cpp
index 1c87157..7b64fd9 100644
--- a/src/ParallelProjectionGeometry3D.cpp
+++ b/src/ParallelProjectionGeometry3D.cpp
@@ -27,8 +27,10 @@ $Id$
 */
 
 #include "astra/ParallelProjectionGeometry3D.h"
-#include <boost/lexical_cast.hpp>
 
+#include "astra/GeometryUtil3D.h"
+
+#include <boost/lexical_cast.hpp>
 #include <cstring>
 
 using namespace std;
@@ -185,9 +187,9 @@ CVector3D CParallelProjectionGeometry3D::getProjectionDirection(int _iProjection
 	return CVector3D(fDirX, fDirY, fDirZ);
 }
 
-void CParallelProjectionGeometry3D::projectPoint(float32 fX, float32 fY, float32 fZ,
+void CParallelProjectionGeometry3D::projectPoint(double fX, double fY, double fZ,
                                                  int iAngleIndex,
-                                                 float32 &fU, float32 &fV) const
+                                                 double &fU, double &fV) const
 {
 	ASTRA_ASSERT(iAngleIndex >= 0);
 	ASTRA_ASSERT(iAngleIndex < m_iProjectionAngleCount);
@@ -214,6 +216,79 @@ CParallelProjectionGeometry2D * CParallelProjectionGeometry3D::createProjectionG
 	return pOutput;
 }
 
+void CParallelProjectionGeometry3D::backprojectPointX(int iAngleIndex, double fU, double fV,
+	                               double fX, double &fY, double &fZ) const
+{
+	ASTRA_ASSERT(iAngleIndex >= 0);
+	ASTRA_ASSERT(iAngleIndex < m_iProjectionAngleCount);
+
+	SPar3DProjection *projs = genPar3DProjections(1, m_iDetectorColCount, m_iDetectorRowCount,
+	                                           m_fDetectorSpacingX, m_fDetectorSpacingY,
+	                                           &m_pfProjectionAngles[iAngleIndex]);
+
+	SPar3DProjection &proj = projs[0];
+
+	double px = proj.fDetSX + fU * proj.fDetUX + fV * proj.fDetVX;
+	double py = proj.fDetSY + fU * proj.fDetUY + fV * proj.fDetVY;
+	double pz = proj.fDetSZ + fU * proj.fDetUZ + fV * proj.fDetVZ;
+
+	double a = (fX - px) / proj.fRayX;
+
+	fY = py + a * proj.fRayY;
+	fZ = pz + a * proj.fRayZ;
+
+	delete[] projs;
+}
+
+void CParallelProjectionGeometry3D::backprojectPointY(int iAngleIndex, double fU, double fV,
+	                               double fY, double &fX, double &fZ) const
+{
+	ASTRA_ASSERT(iAngleIndex >= 0);
+	ASTRA_ASSERT(iAngleIndex < m_iProjectionAngleCount);
+
+	SPar3DProjection *projs = genPar3DProjections(1, m_iDetectorColCount, m_iDetectorRowCount,
+	                                           m_fDetectorSpacingX, m_fDetectorSpacingY,
+	                                           &m_pfProjectionAngles[iAngleIndex]);
+
+	SPar3DProjection &proj = projs[0];
+
+	double px = proj.fDetSX + fU * proj.fDetUX + fV * proj.fDetVX;
+	double py = proj.fDetSY + fU * proj.fDetUY + fV * proj.fDetVY;
+	double pz = proj.fDetSZ + fU * proj.fDetUZ + fV * proj.fDetVZ;
+
+	double a = (fY - py) / proj.fRayY;
+
+	fX = px + a * proj.fRayX;
+	fZ = pz + a * proj.fRayZ;
+
+	delete[] projs;
+}
+
+void CParallelProjectionGeometry3D::backprojectPointZ(int iAngleIndex, double fU, double fV,
+	                               double fZ, double &fX, double &fY) const
+{
+	ASTRA_ASSERT(iAngleIndex >= 0);
+	ASTRA_ASSERT(iAngleIndex < m_iProjectionAngleCount);
+
+	SPar3DProjection *projs = genPar3DProjections(1, m_iDetectorColCount, m_iDetectorRowCount,
+	                                           m_fDetectorSpacingX, m_fDetectorSpacingY,
+	                                           &m_pfProjectionAngles[iAngleIndex]);
+
+	SPar3DProjection &proj = projs[0];
+
+	double px = proj.fDetSX + fU * proj.fDetUX + fV * proj.fDetVX;
+	double py = proj.fDetSY + fU * proj.fDetUY + fV * proj.fDetVY;
+	double pz = proj.fDetSZ + fU * proj.fDetUZ + fV * proj.fDetVZ;
+
+	double a = (fZ - pz) / proj.fRayZ;
+
+	fX = px + a * proj.fRayX;
+	fY = py + a * proj.fRayY;
+
+	delete[] projs;
+}
+
+
 //----------------------------------------------------------------------------------------
 
 } // end namespace astra
diff --git a/src/ParallelVecProjectionGeometry3D.cpp b/src/ParallelVecProjectionGeometry3D.cpp
index ffad6d0..d04400b 100644
--- a/src/ParallelVecProjectionGeometry3D.cpp
+++ b/src/ParallelVecProjectionGeometry3D.cpp
@@ -239,9 +239,9 @@ CVector3D CParallelVecProjectionGeometry3D::getProjectionDirection(int _iProject
 	return CVector3D(p.fRayX, p.fRayY, p.fRayZ);
 }
 
-void CParallelVecProjectionGeometry3D::projectPoint(float32 fX, float32 fY, float32 fZ,
-                                                 int iAngleIndex,
-                                                 float32 &fU, float32 &fV) const
+void CParallelVecProjectionGeometry3D::projectPoint(double fX, double fY, double fZ,
+                                                    int iAngleIndex,
+                                                    double &fU, double &fV) const
 {
 	ASTRA_ASSERT(iAngleIndex >= 0);
 	ASTRA_ASSERT(iAngleIndex < m_iProjectionAngleCount);
@@ -258,6 +258,61 @@ void CParallelVecProjectionGeometry3D::projectPoint(float32 fX, float32 fY, floa
 
 }
 
+void CParallelVecProjectionGeometry3D::backprojectPointX(int iAngleIndex, double fU, double fV,
+	                               double fX, double &fY, double &fZ) const
+{
+	ASTRA_ASSERT(iAngleIndex >= 0);
+	ASTRA_ASSERT(iAngleIndex < m_iProjectionAngleCount);
+
+	SPar3DProjection &proj = m_pProjectionAngles[iAngleIndex];
+
+	double px = proj.fDetSX + fU * proj.fDetUX + fV * proj.fDetVX;
+	double py = proj.fDetSY + fU * proj.fDetUY + fV * proj.fDetVY;
+	double pz = proj.fDetSZ + fU * proj.fDetUZ + fV * proj.fDetVZ;
+
+	double a = (fX - px) / proj.fRayX;
+
+	fY = py + a * proj.fRayY;
+	fZ = pz + a * proj.fRayZ;
+}
+
+void CParallelVecProjectionGeometry3D::backprojectPointY(int iAngleIndex, double fU, double fV,
+	                               double fY, double &fX, double &fZ) const
+{
+	ASTRA_ASSERT(iAngleIndex >= 0);
+	ASTRA_ASSERT(iAngleIndex < m_iProjectionAngleCount);
+
+	SPar3DProjection &proj = m_pProjectionAngles[iAngleIndex];
+
+	double px = proj.fDetSX + fU * proj.fDetUX + fV * proj.fDetVX;
+	double py = proj.fDetSY + fU * proj.fDetUY + fV * proj.fDetVY;
+	double pz = proj.fDetSZ + fU * proj.fDetUZ + fV * proj.fDetVZ;
+
+	double a = (fY - py) / proj.fRayY;
+
+	fX = px + a * proj.fRayX;
+	fZ = pz + a * proj.fRayZ;
+}
+
+void CParallelVecProjectionGeometry3D::backprojectPointZ(int iAngleIndex, double fU, double fV,
+	                               double fZ, double &fX, double &fY) const
+{
+	ASTRA_ASSERT(iAngleIndex >= 0);
+	ASTRA_ASSERT(iAngleIndex < m_iProjectionAngleCount);
+
+	SPar3DProjection &proj = m_pProjectionAngles[iAngleIndex];
+
+	double px = proj.fDetSX + fU * proj.fDetUX + fV * proj.fDetVX;
+	double py = proj.fDetSY + fU * proj.fDetUY + fV * proj.fDetVY;
+	double pz = proj.fDetSZ + fU * proj.fDetUZ + fV * proj.fDetVZ;
+
+	double a = (fZ - pz) / proj.fRayZ;
+
+	fX = px + a * proj.fRayX;
+	fY = py + a * proj.fRayY;
+}
+
+
 //----------------------------------------------------------------------------------------
 
 bool CParallelVecProjectionGeometry3D::_check()
-- 
cgit v1.2.3


From 81e7385c110a6210d0f9bc402df522301ec162f6 Mon Sep 17 00:00:00 2001
From: Willem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>
Date: Fri, 4 Dec 2015 15:14:19 +0100
Subject: Add utility functions for creating FP/BP JobLists

---
 src/CompositeGeometryManager.cpp | 113 ++++++++++++++++++++++++++++++++++++++-
 1 file changed, 111 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/src/CompositeGeometryManager.cpp b/src/CompositeGeometryManager.cpp
index fc8bc2e..9be4797 100644
--- a/src/CompositeGeometryManager.cpp
+++ b/src/CompositeGeometryManager.cpp
@@ -102,7 +102,6 @@ bool CCompositeGeometryManager::splitJobs(TJobSet &jobs, size_t maxSize, int div
 			     i_out != splitOutput.end(); ++i_out)
 			{
 				boost::shared_ptr<CPart> outputPart = *i_out;
-				split[outputPart.get()] = TJobList();
 
 				SJob newjob;
 				newjob.pOutput = outputPart;
@@ -319,7 +318,7 @@ static size_t computeVerticalSplit(size_t maxBlock, int div, size_t sliceCount)
     // (This can't make the blocks larger)
     blockSize = ceildiv(sliceCount, blockCount); 
 
-    ASTRA_DEBUG("%ld %ld -> %ld * %ld\n", sliceCount, maxBlock, blockCount, blockSize);
+    ASTRA_DEBUG("%ld %ld -> %ld * %ld", sliceCount, maxBlock, blockCount, blockSize);
 
     assert(blockSize <= maxBlock);
     assert((divCount * divCount > sliceCount) || (blockCount % div) == 0);
@@ -725,6 +724,116 @@ bool CCompositeGeometryManager::doBP(CProjector3D *pProjector, CFloat32VolumeDat
 	return doJobs(L);
 }
 
+bool CCompositeGeometryManager::doFP(CProjector3D *pProjector, const std::vector<CFloat32VolumeData3DMemory *>& volData, const std::vector<CFloat32ProjectionData3DMemory *>& projData)
+{
+	ASTRA_DEBUG("CCompositeGeometryManager::doFP, multi-volume");
+
+	std::vector<CFloat32VolumeData3DMemory *>::const_iterator i;
+	std::vector<boost::shared_ptr<CPart> > inputs;
+
+	for (i = volData.begin(); i != volData.end(); ++i) {
+		CVolumePart *input = new CVolumePart();
+		input->pData = *i;
+		input->subX = 0;
+		input->subY = 0;
+		input->subZ = 0;
+		input->pGeom = (*i)->getGeometry()->clone();
+
+		inputs.push_back(boost::shared_ptr<CPart>(input));
+	}
+
+	std::vector<CFloat32ProjectionData3DMemory *>::const_iterator j;
+	std::vector<boost::shared_ptr<CPart> > outputs;
+
+	for (j = projData.begin(); j != projData.end(); ++j) {
+		CProjectionPart *output = new CProjectionPart();
+		output->pData = *j;
+		output->subX = 0;
+		output->subY = 0;
+		output->subZ = 0;
+		output->pGeom = (*j)->getGeometry()->clone();
+
+		outputs.push_back(boost::shared_ptr<CPart>(output));
+	}
+
+	std::vector<boost::shared_ptr<CPart> >::iterator i2;
+	std::vector<boost::shared_ptr<CPart> >::iterator j2;
+	TJobList L;
+
+	for (i2 = outputs.begin(); i2 != outputs.end(); ++i2) {
+		SJob FP;
+		FP.eMode = SJob::MODE_SET;
+		for (j2 = inputs.begin(); j2 != inputs.end(); ++j2) {
+			FP.pInput = *j2;
+			FP.pOutput = *i2;
+			FP.pProjector = pProjector;
+			FP.eType = SJob::JOB_FP;
+			L.push_back(FP);
+
+			// Set first, add rest
+			FP.eMode = SJob::MODE_ADD;
+		}
+	}
+
+	return doJobs(L);
+}
+
+bool CCompositeGeometryManager::doBP(CProjector3D *pProjector, const std::vector<CFloat32VolumeData3DMemory *>& volData, const std::vector<CFloat32ProjectionData3DMemory *>& projData)
+{
+	ASTRA_DEBUG("CCompositeGeometryManager::doBP, multi-volume");
+
+
+	std::vector<CFloat32VolumeData3DMemory *>::const_iterator i;
+	std::vector<boost::shared_ptr<CPart> > outputs;
+
+	for (i = volData.begin(); i != volData.end(); ++i) {
+		CVolumePart *output = new CVolumePart();
+		output->pData = *i;
+		output->subX = 0;
+		output->subY = 0;
+		output->subZ = 0;
+		output->pGeom = (*i)->getGeometry()->clone();
+
+		outputs.push_back(boost::shared_ptr<CPart>(output));
+	}
+
+	std::vector<CFloat32ProjectionData3DMemory *>::const_iterator j;
+	std::vector<boost::shared_ptr<CPart> > inputs;
+
+	for (j = projData.begin(); j != projData.end(); ++j) {
+		CProjectionPart *input = new CProjectionPart();
+		input->pData = *j;
+		input->subX = 0;
+		input->subY = 0;
+		input->subZ = 0;
+		input->pGeom = (*j)->getGeometry()->clone();
+
+		inputs.push_back(boost::shared_ptr<CPart>(input));
+	}
+
+	std::vector<boost::shared_ptr<CPart> >::iterator i2;
+	std::vector<boost::shared_ptr<CPart> >::iterator j2;
+	TJobList L;
+
+	for (i2 = outputs.begin(); i2 != outputs.end(); ++i2) {
+		SJob BP;
+		BP.eMode = SJob::MODE_SET;
+		for (j2 = inputs.begin(); j2 != inputs.end(); ++j2) {
+			BP.pInput = *j2;
+			BP.pOutput = *i2;
+			BP.pProjector = pProjector;
+			BP.eType = SJob::JOB_BP;
+			L.push_back(BP);
+
+			// Set first, add rest
+			BP.eMode = SJob::MODE_ADD;
+		}
+	}
+
+	return doJobs(L);
+}
+
+
 
 
 bool CCompositeGeometryManager::doJobs(TJobList &jobs)
-- 
cgit v1.2.3


From ea0ca6ec9a29bd95aaa74d701e51e65c2f64f894 Mon Sep 17 00:00:00 2001
From: Willem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>
Date: Wed, 9 Dec 2015 19:04:12 +0100
Subject: Fix uninitialized variable

---
 src/CudaForwardProjectionAlgorithm.cpp | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src')

diff --git a/src/CudaForwardProjectionAlgorithm.cpp b/src/CudaForwardProjectionAlgorithm.cpp
index 9ca13ae..d38469c 100644
--- a/src/CudaForwardProjectionAlgorithm.cpp
+++ b/src/CudaForwardProjectionAlgorithm.cpp
@@ -94,6 +94,7 @@ bool CCudaForwardProjectionAlgorithm::initialize(const Config& _cfg)
 	ConfigStackCheck<CAlgorithm> CC("CudaForwardProjectionAlgorithm", this, _cfg);
 
 	// Projector
+	m_pProjector = 0;
 	XMLNode node = _cfg.self.getSingleNode("ProjectorId");
 	if (node) {
 		int id = boost::lexical_cast<int>(node.getContent());
-- 
cgit v1.2.3


From 73fa0a8df8203288aca032c71caa5ff47d35a3e2 Mon Sep 17 00:00:00 2001
From: Willem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>
Date: Thu, 17 Dec 2015 10:47:02 +0100
Subject: Remove unused functions

---
 src/Utilities.cpp | 97 -------------------------------------------------------
 1 file changed, 97 deletions(-)

(limited to 'src')

diff --git a/src/Utilities.cpp b/src/Utilities.cpp
index 3f65e9a..cb54e93 100644
--- a/src/Utilities.cpp
+++ b/src/Utilities.cpp
@@ -28,101 +28,4 @@ $Id$
 
 #include "astra/Utilities.h"
 
-using namespace std;
-using namespace astra;
 
-//-----------------------------------------------------------------------------
-// Trim Whitespace Characters
-void StringUtil::trim(std::string& _sString, bool _bLeft, bool _bRight)
-{
-	// trim right
-	if (_bRight)
-		_sString.erase(_sString.find_last_not_of(" \t\r") + 1); 
-
-	// trim left
-	if (_bLeft)
-		_sString.erase(0, _sString.find_first_not_of(" \t\r")); 
-}
-//-----------------------------------------------------------------------------
-// Split String
-vector<string> StringUtil::split(const string& _sString, const string& _sDelims)
-{
-	std::vector<string> ret;
-
-	size_t start, pos;
-	start = 0;
-	do {
-		pos = _sString.find_first_of(_sDelims, start);
-		if (pos == start) {
-			// Do nothing
-			start = pos + 1;
-		} else if (pos == string::npos) {
-			// Copy the rest of the string
-			ret.push_back(_sString.substr(start));
-			break;
-		} else {
-			// Copy up to newt delimiter
-			ret.push_back(_sString.substr(start, pos - start));
-			start = pos + 1;
-		}
-
-		// Parse up to next real data (in case there are two delims after each other)
-		start = _sString.find_first_not_of(_sDelims, start);
-	} while (pos != string::npos);
-
-	return ret;
-}
-//-----------------------------------------------------------------------------
-// Cast string to int
-bool StringUtil::toInt(const string& _sString, int& _iValue)
-{
-	std::istringstream ss(_sString);
-	ss >> _iValue;
-	return !ss.fail();
-}
-//-----------------------------------------------------------------------------
-// Cast string to float
-bool StringUtil::toFloat32(const string& _sString, float32& _fValue)
-{
-	std::istringstream ss(_sString);
-	ss >> _fValue;
-	return !ss.fail();
-}
-//-----------------------------------------------------------------------------
-// Convert string to Lower Case
-void StringUtil::toLowerCase(std::string& _sString)
-{
-	std::transform(_sString.begin(),
-				   _sString.end(),		
-				   _sString.begin(),
-				   ::tolower);
-}
-//-----------------------------------------------------------------------------    
-// Convert string to Upper Case
-void StringUtil::toUpperCase(std::string& _sString) 
-{
-	std::transform(_sString.begin(),
-				   _sString.end(),
-				   _sString.begin(),
-				   ::toupper);
-}
-//-----------------------------------------------------------------------------
-
-
-
-
-//-----------------------------------------------------------------------------    
-// Get Extension
-string FileSystemUtil::getExtension(string& _sFilename)
-{
-	string sExtension = "";
-	for (int i = _sFilename.length() - 1; 0 < i; i--) {
-		if (_sFilename[i] == '.') {
-			std::transform(sExtension.begin(),sExtension.end(),sExtension.begin(),::tolower);
-			return sExtension;
-		}
-		sExtension = _sFilename[i] + sExtension;
-	}
-	return "";
-}
-//-----------------------------------------------------------------------------
-- 
cgit v1.2.3


From fc86917da1a175c04e9bd2e5f0bedb0a48a81c26 Mon Sep 17 00:00:00 2001
From: Willem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>
Date: Thu, 17 Dec 2015 14:37:41 +0100
Subject: Replace boost::lexical_cast by stringstreams

This is to avoid the dependence of lexical_cast on the current locale.
The stringstreams used for the new string parsing/output functions
are explicitly imbued with the C/classic locale.
---
 src/ArtAlgorithm.cpp                        |  2 -
 src/BackProjectionAlgorithm.cpp             |  2 -
 src/CglsAlgorithm.cpp                       |  2 -
 src/ConeProjectionGeometry3D.cpp            |  5 +-
 src/ConeVecProjectionGeometry3D.cpp         | 30 ++++-----
 src/CudaBackProjectionAlgorithm3D.cpp       |  2 -
 src/CudaCglsAlgorithm3D.cpp                 |  2 -
 src/CudaDartMaskAlgorithm.cpp               |  5 +-
 src/CudaDartMaskAlgorithm3D.cpp             |  5 +-
 src/CudaDartSmoothingAlgorithm.cpp          |  5 +-
 src/CudaDartSmoothingAlgorithm3D.cpp        |  5 +-
 src/CudaDataOperationAlgorithm.cpp          |  5 +-
 src/CudaFDKAlgorithm3D.cpp                  |  2 -
 src/CudaFilteredBackProjectionAlgorithm.cpp | 13 ++--
 src/CudaForwardProjectionAlgorithm.cpp      |  8 +--
 src/CudaForwardProjectionAlgorithm3D.cpp    |  8 +--
 src/CudaReconstructionAlgorithm2D.cpp       |  2 -
 src/CudaRoiSelectAlgorithm.cpp              |  3 +-
 src/CudaSirtAlgorithm.cpp                   |  5 +-
 src/CudaSirtAlgorithm3D.cpp                 |  2 -
 src/FanFlatBeamLineKernelProjector2D.cpp    |  1 -
 src/FanFlatBeamStripKernelProjector2D.cpp   |  1 -
 src/FanFlatProjectionGeometry2D.cpp         |  5 +-
 src/FanFlatVecProjectionGeometry2D.cpp      | 15 +++--
 src/FilteredBackProjectionAlgorithm.cpp     |  8 +--
 src/ForwardProjectionAlgorithm.cpp          | 12 ++--
 src/ParallelBeamBlobKernelProjector2D.cpp   |  7 +--
 src/ParallelBeamLineKernelProjector2D.cpp   |  1 -
 src/ParallelBeamLinearKernelProjector2D.cpp |  1 -
 src/ParallelBeamStripKernelProjector2D.cpp  |  1 -
 src/ParallelProjectionGeometry2D.cpp        |  1 -
 src/ParallelProjectionGeometry3D.cpp        |  1 -
 src/ParallelVecProjectionGeometry3D.cpp     | 30 ++++-----
 src/PluginAlgorithm.cpp                     | 14 ++---
 src/ProjectionGeometry2D.cpp                |  6 +-
 src/ProjectionGeometry3D.cpp                | 10 ++-
 src/ReconstructionAlgorithm2D.cpp           | 12 ++--
 src/ReconstructionAlgorithm3D.cpp           | 12 ++--
 src/ReconstructionAlgorithmMultiSlice2D.cpp |  8 +--
 src/SartAlgorithm.cpp                       |  2 -
 src/SirtAlgorithm.cpp                       |  2 -
 src/SparseMatrixProjectionGeometry2D.cpp    |  3 +-
 src/SparseMatrixProjector2D.cpp             |  1 -
 src/Utilities.cpp                           | 95 +++++++++++++++++++++++++++++
 src/VolumeGeometry2D.cpp                    |  5 +-
 src/VolumeGeometry3D.cpp                    |  8 +--
 src/XMLNode.cpp                             | 77 +++++++++--------------
 47 files changed, 234 insertions(+), 218 deletions(-)

(limited to 'src')

diff --git a/src/ArtAlgorithm.cpp b/src/ArtAlgorithm.cpp
index 6a699ec..b59bd93 100644
--- a/src/ArtAlgorithm.cpp
+++ b/src/ArtAlgorithm.cpp
@@ -28,8 +28,6 @@ $Id$
 
 #include "astra/ArtAlgorithm.h"
 
-#include <boost/lexical_cast.hpp>
-
 #include "astra/AstraObjectManager.h"
 
 using namespace std;
diff --git a/src/BackProjectionAlgorithm.cpp b/src/BackProjectionAlgorithm.cpp
index f561a90..c9beee1 100644
--- a/src/BackProjectionAlgorithm.cpp
+++ b/src/BackProjectionAlgorithm.cpp
@@ -28,8 +28,6 @@ $Id$
 
 #include "astra/BackProjectionAlgorithm.h"
 
-#include <boost/lexical_cast.hpp>
-
 #include "astra/AstraObjectManager.h"
 #include "astra/DataProjectorPolicies.h"
 
diff --git a/src/CglsAlgorithm.cpp b/src/CglsAlgorithm.cpp
index b9031e3..1ca2549 100644
--- a/src/CglsAlgorithm.cpp
+++ b/src/CglsAlgorithm.cpp
@@ -28,8 +28,6 @@ $Id$
 
 #include "astra/CglsAlgorithm.h"
 
-#include <boost/lexical_cast.hpp>
-
 #include "astra/AstraObjectManager.h"
 
 using namespace std;
diff --git a/src/ConeProjectionGeometry3D.cpp b/src/ConeProjectionGeometry3D.cpp
index 18f0f8a..99b4bf4 100644
--- a/src/ConeProjectionGeometry3D.cpp
+++ b/src/ConeProjectionGeometry3D.cpp
@@ -31,7 +31,6 @@ $Id$
 #include "astra/Logging.h"
 #include "astra/GeometryUtil3D.h"
 
-#include <boost/lexical_cast.hpp>
 #include <cstring>
 
 using namespace std;
@@ -90,13 +89,13 @@ bool CConeProjectionGeometry3D::initialize(const Config& _cfg)
 	// Required: DistanceOriginDetector
 	XMLNode node = _cfg.self.getSingleNode("DistanceOriginDetector");
 	ASTRA_CONFIG_CHECK(node, "ConeProjectionGeometry3D", "No DistanceOriginDetector tag specified.");
-	m_fOriginDetectorDistance = boost::lexical_cast<float32>(node.getContent());
+	m_fOriginDetectorDistance = node.getContentNumerical();
 	CC.markNodeParsed("DistanceOriginDetector");
 
 	// Required: DetectorOriginSource
 	node = _cfg.self.getSingleNode("DistanceOriginSource");
 	ASTRA_CONFIG_CHECK(node, "ConeProjectionGeometry3D", "No DistanceOriginSource tag specified.");
-	m_fOriginSourceDistance = boost::lexical_cast<float32>(node.getContent());
+	m_fOriginSourceDistance = node.getContentNumerical();
 	CC.markNodeParsed("DistanceOriginSource");
 
 	// success
diff --git a/src/ConeVecProjectionGeometry3D.cpp b/src/ConeVecProjectionGeometry3D.cpp
index 86e3bd6..f4f900d 100644
--- a/src/ConeVecProjectionGeometry3D.cpp
+++ b/src/ConeVecProjectionGeometry3D.cpp
@@ -27,9 +27,9 @@ $Id$
 */
 
 #include "astra/ConeVecProjectionGeometry3D.h"
+#include "astra/Utilities.h"
 
 #include <cstring>
-#include <boost/lexical_cast.hpp>
 
 using namespace std;
 
@@ -82,13 +82,13 @@ bool CConeVecProjectionGeometry3D::initialize(const Config& _cfg)
 	// Required: DetectorRowCount
 	node = _cfg.self.getSingleNode("DetectorRowCount");
 	ASTRA_CONFIG_CHECK(node, "ConeVecProjectionGeometry3D", "No DetectorRowCount tag specified.");
-	m_iDetectorRowCount = boost::lexical_cast<int>(node.getContent());
+	m_iDetectorRowCount = node.getContentInt();
 	CC.markNodeParsed("DetectorRowCount");
 
 	// Required: DetectorColCount
 	node = _cfg.self.getSingleNode("DetectorColCount");
 	ASTRA_CONFIG_CHECK(node, "ConeVecProjectionGeometry3D", "No DetectorColCount tag specified.");
-	m_iDetectorColCount = boost::lexical_cast<int>(node.getContent());
+	m_iDetectorColCount = node.getContentInt();
 	m_iDetectorTotCount = m_iDetectorRowCount * m_iDetectorColCount;
 	CC.markNodeParsed("DetectorColCount");
 
@@ -212,18 +212,18 @@ Config* CConeVecProjectionGeometry3D::getConfiguration() const
 	std::string vectors = "";
 	for (int i = 0; i < m_iProjectionAngleCount; ++i) {
 		SConeProjection& p = m_pProjectionAngles[i];
-		vectors += boost::lexical_cast<string>(p.fSrcX) + ",";
-		vectors += boost::lexical_cast<string>(p.fSrcY) + ",";
-		vectors += boost::lexical_cast<string>(p.fSrcZ) + ",";
-		vectors += boost::lexical_cast<string>(p.fDetSX + 0.5f*m_iDetectorRowCount*p.fDetVX + 0.5f*m_iDetectorColCount*p.fDetUX) + ",";
-		vectors += boost::lexical_cast<string>(p.fDetSY + 0.5f*m_iDetectorRowCount*p.fDetVY + 0.5f*m_iDetectorColCount*p.fDetUY) + ",";
-		vectors += boost::lexical_cast<string>(p.fDetSZ + 0.5f*m_iDetectorRowCount*p.fDetVZ + 0.5f*m_iDetectorColCount*p.fDetUZ) + ",";
-		vectors += boost::lexical_cast<string>(p.fDetUX) + ",";
-		vectors += boost::lexical_cast<string>(p.fDetUY) + ",";
-		vectors += boost::lexical_cast<string>(p.fDetUZ) + ",";
-		vectors += boost::lexical_cast<string>(p.fDetVX) + ",";
-		vectors += boost::lexical_cast<string>(p.fDetVY) + ",";
-		vectors += boost::lexical_cast<string>(p.fDetVZ);
+		vectors += StringUtil::toString(p.fSrcX) + ",";
+		vectors += StringUtil::toString(p.fSrcY) + ",";
+		vectors += StringUtil::toString(p.fSrcZ) + ",";
+		vectors += StringUtil::toString(p.fDetSX + 0.5f*m_iDetectorRowCount*p.fDetVX + 0.5f*m_iDetectorColCount*p.fDetUX) + ",";
+		vectors += StringUtil::toString(p.fDetSY + 0.5f*m_iDetectorRowCount*p.fDetVY + 0.5f*m_iDetectorColCount*p.fDetUY) + ",";
+		vectors += StringUtil::toString(p.fDetSZ + 0.5f*m_iDetectorRowCount*p.fDetVZ + 0.5f*m_iDetectorColCount*p.fDetUZ) + ",";
+		vectors += StringUtil::toString(p.fDetUX) + ",";
+		vectors += StringUtil::toString(p.fDetUY) + ",";
+		vectors += StringUtil::toString(p.fDetUZ) + ",";
+		vectors += StringUtil::toString(p.fDetVX) + ",";
+		vectors += StringUtil::toString(p.fDetVY) + ",";
+		vectors += StringUtil::toString(p.fDetVZ);
 		if (i < m_iProjectionAngleCount-1) vectors += ';';
 	}
 	cfg->self.addChildNode("Vectors", vectors);
diff --git a/src/CudaBackProjectionAlgorithm3D.cpp b/src/CudaBackProjectionAlgorithm3D.cpp
index ce8e111..76d7b35 100644
--- a/src/CudaBackProjectionAlgorithm3D.cpp
+++ b/src/CudaBackProjectionAlgorithm3D.cpp
@@ -28,8 +28,6 @@ $Id$
 
 #include "astra/CudaBackProjectionAlgorithm3D.h"
 
-#include <boost/lexical_cast.hpp>
-
 #include "astra/AstraObjectManager.h"
 
 #include "astra/CudaProjector3D.h"
diff --git a/src/CudaCglsAlgorithm3D.cpp b/src/CudaCglsAlgorithm3D.cpp
index abc18d1..930a71e 100644
--- a/src/CudaCglsAlgorithm3D.cpp
+++ b/src/CudaCglsAlgorithm3D.cpp
@@ -28,8 +28,6 @@ $Id$
 
 #include "astra/CudaCglsAlgorithm3D.h"
 
-#include <boost/lexical_cast.hpp>
-
 #include "astra/AstraObjectManager.h"
 
 #include "astra/CudaProjector3D.h"
diff --git a/src/CudaDartMaskAlgorithm.cpp b/src/CudaDartMaskAlgorithm.cpp
index 950b428..c2a4cca 100644
--- a/src/CudaDartMaskAlgorithm.cpp
+++ b/src/CudaDartMaskAlgorithm.cpp
@@ -34,7 +34,6 @@ $Id$
 #include "../cuda/2d/algo.h"
 
 #include "astra/AstraObjectManager.h"
-#include <boost/lexical_cast.hpp>
 
 using namespace std;
 
@@ -67,14 +66,14 @@ bool CCudaDartMaskAlgorithm::initialize(const Config& _cfg)
 	// reconstruction data
 	XMLNode node = _cfg.self.getSingleNode("SegmentationDataId");
 	ASTRA_CONFIG_CHECK(node, "CudaDartMask", "No SegmentationDataId tag specified.");
-	int id = boost::lexical_cast<int>(node.getContent());
+	int id = node.getContentInt();
 	m_pSegmentation = dynamic_cast<CFloat32VolumeData2D*>(CData2DManager::getSingleton().get(id));
 	CC.markNodeParsed("SegmentationDataId");
 
 	// reconstruction data
 	node = _cfg.self.getSingleNode("MaskDataId");
 	ASTRA_CONFIG_CHECK(node, "CudaDartMask", "No MaskDataId tag specified.");
-	id = boost::lexical_cast<int>(node.getContent());
+	id = node.getContentInt();
 	m_pMask = dynamic_cast<CFloat32VolumeData2D*>(CData2DManager::getSingleton().get(id));
 	CC.markNodeParsed("MaskDataId");
 
diff --git a/src/CudaDartMaskAlgorithm3D.cpp b/src/CudaDartMaskAlgorithm3D.cpp
index b0dfc5b..dd12c58 100644
--- a/src/CudaDartMaskAlgorithm3D.cpp
+++ b/src/CudaDartMaskAlgorithm3D.cpp
@@ -34,7 +34,6 @@ $Id$
 #include "../cuda/3d/dims3d.h"
 
 #include "astra/AstraObjectManager.h"
-#include <boost/lexical_cast.hpp>
 
 using namespace std;
 
@@ -67,14 +66,14 @@ bool CCudaDartMaskAlgorithm3D::initialize(const Config& _cfg)
 	// reconstruction data
 	XMLNode node = _cfg.self.getSingleNode("SegmentationDataId");
 	ASTRA_CONFIG_CHECK(node, "CudaDartMask", "No SegmentationDataId tag specified.");
-	int id = boost::lexical_cast<int>(node.getContent());
+	int id = node.getContentInt();
 	m_pSegmentation = dynamic_cast<CFloat32VolumeData3DMemory*>(CData3DManager::getSingleton().get(id));
 	CC.markNodeParsed("SegmentationDataId");
 
 	// reconstruction data
 	node = _cfg.self.getSingleNode("MaskDataId");
 	ASTRA_CONFIG_CHECK(node, "CudaDartMask", "No MaskDataId tag specified.");
-	id = boost::lexical_cast<int>(node.getContent());
+	id = node.getContentInt();
 	m_pMask = dynamic_cast<CFloat32VolumeData3DMemory*>(CData3DManager::getSingleton().get(id));
 	CC.markNodeParsed("MaskDataId");
 
diff --git a/src/CudaDartSmoothingAlgorithm.cpp b/src/CudaDartSmoothingAlgorithm.cpp
index 7e22809..425f0a3 100644
--- a/src/CudaDartSmoothingAlgorithm.cpp
+++ b/src/CudaDartSmoothingAlgorithm.cpp
@@ -34,7 +34,6 @@ $Id$
 #include "../cuda/2d/algo.h"
 
 #include "astra/AstraObjectManager.h"
-#include <boost/lexical_cast.hpp>
 
 using namespace std;
 
@@ -67,14 +66,14 @@ bool CCudaDartSmoothingAlgorithm::initialize(const Config& _cfg)
 	// reconstruction data
 	XMLNode node = _cfg.self.getSingleNode("InDataId");
 	ASTRA_CONFIG_CHECK(node, "CudaDartMask", "No InDataId tag specified.");
-	int id = boost::lexical_cast<int>(node.getContent());
+	int id = node.getContentInt();
 	m_pIn = dynamic_cast<CFloat32VolumeData2D*>(CData2DManager::getSingleton().get(id));
 	CC.markNodeParsed("InDataId");
 
 	// reconstruction data
 	node = _cfg.self.getSingleNode("OutDataId");
 	ASTRA_CONFIG_CHECK(node, "CudaDartMask", "No OutDataId tag specified.");
-	id = boost::lexical_cast<int>(node.getContent());
+	id = node.getContentInt();
 	m_pOut = dynamic_cast<CFloat32VolumeData2D*>(CData2DManager::getSingleton().get(id));
 	CC.markNodeParsed("OutDataId");
 
diff --git a/src/CudaDartSmoothingAlgorithm3D.cpp b/src/CudaDartSmoothingAlgorithm3D.cpp
index 9c4437a..df7e0df 100644
--- a/src/CudaDartSmoothingAlgorithm3D.cpp
+++ b/src/CudaDartSmoothingAlgorithm3D.cpp
@@ -34,7 +34,6 @@ $Id$
 #include "../cuda/3d/dims3d.h"
 
 #include "astra/AstraObjectManager.h"
-#include <boost/lexical_cast.hpp>
 
 using namespace std;
 
@@ -67,14 +66,14 @@ bool CCudaDartSmoothingAlgorithm3D::initialize(const Config& _cfg)
 	// reconstruction data
 	XMLNode node = _cfg.self.getSingleNode("InDataId");
 	ASTRA_CONFIG_CHECK(node, "CudaDartMask", "No InDataId tag specified.");
-	int id = boost::lexical_cast<int>(node.getContent());
+	int id = node.getContentInt();
 	m_pIn = dynamic_cast<CFloat32VolumeData3DMemory*>(CData3DManager::getSingleton().get(id));
 	CC.markNodeParsed("InDataId");
 
 	// reconstruction data
 	node = _cfg.self.getSingleNode("OutDataId");
 	ASTRA_CONFIG_CHECK(node, "CudaDartMask", "No OutDataId tag specified.");
-	id = boost::lexical_cast<int>(node.getContent());
+	id = node.getContentInt();
 	m_pOut = dynamic_cast<CFloat32VolumeData3DMemory*>(CData3DManager::getSingleton().get(id));
 	CC.markNodeParsed("OutDataId");
 
diff --git a/src/CudaDataOperationAlgorithm.cpp b/src/CudaDataOperationAlgorithm.cpp
index ae133c2..15886a4 100644
--- a/src/CudaDataOperationAlgorithm.cpp
+++ b/src/CudaDataOperationAlgorithm.cpp
@@ -35,7 +35,6 @@ $Id$
 #include "../cuda/2d/arith.h"
 
 #include "astra/AstraObjectManager.h"
-#include <boost/lexical_cast.hpp>
 
 using namespace std;
 
@@ -78,7 +77,7 @@ bool CCudaDataOperationAlgorithm::initialize(const Config& _cfg)
 	ASTRA_CONFIG_CHECK(node, "CCudaDataOperationAlgorithm", "No DataId tag specified.");
 	vector<string> data = node.getContentArray();
 	for (vector<string>::iterator it = data.begin(); it != data.end(); it++){
-		int id = boost::lexical_cast<int>(*it);
+		int id = StringUtil::stringToInt(*it);
 		m_pData.push_back(dynamic_cast<CFloat32Data2D*>(CData2DManager::getSingleton().get(id)));
 	}
 	CC.markNodeParsed("DataId");
@@ -97,7 +96,7 @@ bool CCudaDataOperationAlgorithm::initialize(const Config& _cfg)
 		CC.markOptionParsed("GPUIndex");
 
 	if (_cfg.self.hasOption("MaskId")) {
-		int id = boost::lexical_cast<int>(_cfg.self.getOption("MaskId"));
+		int id = _cfg.self.getOptionInt("MaskId");
 		m_pMask = dynamic_cast<CFloat32Data2D*>(CData2DManager::getSingleton().get(id));
 	}
 	CC.markOptionParsed("MaskId");
diff --git a/src/CudaFDKAlgorithm3D.cpp b/src/CudaFDKAlgorithm3D.cpp
index 1316daa..b5ce545 100644
--- a/src/CudaFDKAlgorithm3D.cpp
+++ b/src/CudaFDKAlgorithm3D.cpp
@@ -28,8 +28,6 @@ $Id$
 
 #include "astra/CudaFDKAlgorithm3D.h"
 
-#include <boost/lexical_cast.hpp>
-
 #include "astra/AstraObjectManager.h"
 
 #include "astra/CudaProjector3D.h"
diff --git a/src/CudaFilteredBackProjectionAlgorithm.cpp b/src/CudaFilteredBackProjectionAlgorithm.cpp
index bcd70c4..aa97eec 100644
--- a/src/CudaFilteredBackProjectionAlgorithm.cpp
+++ b/src/CudaFilteredBackProjectionAlgorithm.cpp
@@ -28,7 +28,6 @@ $Id$
 
 #include <astra/CudaFilteredBackProjectionAlgorithm.h>
 #include <astra/FanFlatProjectionGeometry2D.h>
-#include <boost/lexical_cast.hpp>
 #include <cstring>
 
 #include "astra/AstraObjectManager.h"
@@ -100,7 +99,7 @@ bool CCudaFilteredBackProjectionAlgorithm::initialize(const Config& _cfg)
 	XMLNode node = _cfg.self.getSingleNode("ProjectorId");
 	CCudaProjector2D* pCudaProjector = 0;
 	if (node) {
-		int id = boost::lexical_cast<int>(node.getContent());
+		int id = node.getContentInt();
 		CProjector2D *projector = CProjector2DManager::getSingleton().get(id);
 		pCudaProjector = dynamic_cast<CCudaProjector2D*>(projector);
 		if (!pCudaProjector) {
@@ -113,14 +112,14 @@ bool CCudaFilteredBackProjectionAlgorithm::initialize(const Config& _cfg)
 	// sinogram data
 	node = _cfg.self.getSingleNode("ProjectionDataId");
 	ASTRA_CONFIG_CHECK(node, "CudaFBP", "No ProjectionDataId tag specified.");
-	int id = boost::lexical_cast<int>(node.getContent());
+	int id = node.getContentInt();
 	m_pSinogram = dynamic_cast<CFloat32ProjectionData2D*>(CData2DManager::getSingleton().get(id));
 	CC.markNodeParsed("ProjectionDataId");
 
 	// reconstruction data
 	node = _cfg.self.getSingleNode("ReconstructionDataId");
 	ASTRA_CONFIG_CHECK(node, "CudaFBP", "No ReconstructionDataId tag specified.");
-	id = boost::lexical_cast<int>(node.getContent());
+	id = node.getContentInt();
 	m_pReconstruction = dynamic_cast<CFloat32VolumeData2D*>(CData2DManager::getSingleton().get(id));
 	CC.markNodeParsed("ReconstructionDataId");
 
@@ -140,7 +139,7 @@ bool CCudaFilteredBackProjectionAlgorithm::initialize(const Config& _cfg)
 	node = _cfg.self.getSingleNode("FilterSinogramId");
 	if (node)
 	{
-		id = boost::lexical_cast<int>(node.getContent());
+		id = node.getContentInt();
 		const CFloat32ProjectionData2D * pFilterData = dynamic_cast<CFloat32ProjectionData2D*>(CData2DManager::getSingleton().get(id));
 		m_iFilterWidth = pFilterData->getGeometry()->getDetectorCount();
 		int iFilterProjectionCount = pFilterData->getGeometry()->getProjectionAngleCount();
@@ -159,7 +158,7 @@ bool CCudaFilteredBackProjectionAlgorithm::initialize(const Config& _cfg)
 	node = _cfg.self.getSingleNode("FilterParameter");
 	if (node)
 	{
-		float fParameter = boost::lexical_cast<float>(node.getContent());
+		float fParameter = node.getContentNumerical();
 		m_fFilterParameter = fParameter;
 	}
 	else
@@ -172,7 +171,7 @@ bool CCudaFilteredBackProjectionAlgorithm::initialize(const Config& _cfg)
 	node = _cfg.self.getSingleNode("FilterD");
 	if (node)
 	{
-		float fD = boost::lexical_cast<float>(node.getContent());
+		float fD = node.getContentNumerical();
 		m_fFilterD = fD;
 	}
 	else
diff --git a/src/CudaForwardProjectionAlgorithm.cpp b/src/CudaForwardProjectionAlgorithm.cpp
index d38469c..80f2e02 100644
--- a/src/CudaForwardProjectionAlgorithm.cpp
+++ b/src/CudaForwardProjectionAlgorithm.cpp
@@ -35,8 +35,6 @@ $Id$
 #include <driver_types.h>
 #include <cuda_runtime_api.h>
 
-#include <boost/lexical_cast.hpp>
-
 #include "astra/AstraObjectManager.h"
 #include "astra/ParallelProjectionGeometry2D.h"
 #include "astra/FanFlatProjectionGeometry2D.h"
@@ -97,7 +95,7 @@ bool CCudaForwardProjectionAlgorithm::initialize(const Config& _cfg)
 	m_pProjector = 0;
 	XMLNode node = _cfg.self.getSingleNode("ProjectorId");
 	if (node) {
-		int id = boost::lexical_cast<int>(node.getContent());
+		int id = node.getContentInt();
 		m_pProjector = CProjector2DManager::getSingleton().get(id);
 	}
 	CC.markNodeParsed("ProjectorId");
@@ -107,14 +105,14 @@ bool CCudaForwardProjectionAlgorithm::initialize(const Config& _cfg)
 	// sinogram data
 	node = _cfg.self.getSingleNode("ProjectionDataId");
 	ASTRA_CONFIG_CHECK(node, "FP_CUDA", "No ProjectionDataId tag specified.");
-	int id = boost::lexical_cast<int>(node.getContent());
+	int id = node.getContentInt();
 	m_pSinogram = dynamic_cast<CFloat32ProjectionData2D*>(CData2DManager::getSingleton().get(id));
 	CC.markNodeParsed("ProjectionDataId");
 
 	// volume data
 	node = _cfg.self.getSingleNode("VolumeDataId");
 	ASTRA_CONFIG_CHECK(node, "FP_CUDA", "No VolumeDataId tag specified.");
-	id = boost::lexical_cast<int>(node.getContent());
+	id = node.getContentInt();
 	m_pVolume = dynamic_cast<CFloat32VolumeData2D*>(CData2DManager::getSingleton().get(id));
 	CC.markNodeParsed("VolumeDataId");
 
diff --git a/src/CudaForwardProjectionAlgorithm3D.cpp b/src/CudaForwardProjectionAlgorithm3D.cpp
index 209f5a5..f709e34 100644
--- a/src/CudaForwardProjectionAlgorithm3D.cpp
+++ b/src/CudaForwardProjectionAlgorithm3D.cpp
@@ -30,8 +30,6 @@ $Id$
 
 #ifdef ASTRA_CUDA
 
-#include <boost/lexical_cast.hpp>
-
 #include "astra/AstraObjectManager.h"
 
 #include "astra/CudaProjector3D.h"
@@ -103,14 +101,14 @@ bool CCudaForwardProjectionAlgorithm3D::initialize(const Config& _cfg)
 	// sinogram data
 	node = _cfg.self.getSingleNode("ProjectionDataId");
 	ASTRA_CONFIG_CHECK(node, "CudaForwardProjection3D", "No ProjectionDataId tag specified.");
-	id = boost::lexical_cast<int>(node.getContent());
+	id = node.getContentInt();
 	m_pProjections = dynamic_cast<CFloat32ProjectionData3DMemory*>(CData3DManager::getSingleton().get(id));
 	CC.markNodeParsed("ProjectionDataId");
 
 	// reconstruction data
 	node = _cfg.self.getSingleNode("VolumeDataId");
 	ASTRA_CONFIG_CHECK(node, "CudaForwardProjection3D", "No VolumeDataId tag specified.");
-	id = boost::lexical_cast<int>(node.getContent());
+	id = node.getContentInt();
 	m_pVolume = dynamic_cast<CFloat32VolumeData3DMemory*>(CData3DManager::getSingleton().get(id));
 	CC.markNodeParsed("VolumeDataId");
 
@@ -118,7 +116,7 @@ bool CCudaForwardProjectionAlgorithm3D::initialize(const Config& _cfg)
 	node = _cfg.self.getSingleNode("ProjectorId");
 	m_pProjector = 0;
 	if (node) {
-		id = boost::lexical_cast<int>(node.getContent());
+		id = node.getContentInt();
 		m_pProjector = CProjector3DManager::getSingleton().get(id);
 	}
 	CC.markNodeParsed("ProjectorId");
diff --git a/src/CudaReconstructionAlgorithm2D.cpp b/src/CudaReconstructionAlgorithm2D.cpp
index 71dddf7..5a1910c 100644
--- a/src/CudaReconstructionAlgorithm2D.cpp
+++ b/src/CudaReconstructionAlgorithm2D.cpp
@@ -30,8 +30,6 @@ $Id$
 
 #include "astra/CudaReconstructionAlgorithm2D.h"
 
-#include <boost/lexical_cast.hpp>
-
 #include "astra/AstraObjectManager.h"
 #include "astra/FanFlatProjectionGeometry2D.h"
 #include "astra/FanFlatVecProjectionGeometry2D.h"
diff --git a/src/CudaRoiSelectAlgorithm.cpp b/src/CudaRoiSelectAlgorithm.cpp
index 7635c69..dfb8056 100644
--- a/src/CudaRoiSelectAlgorithm.cpp
+++ b/src/CudaRoiSelectAlgorithm.cpp
@@ -34,7 +34,6 @@ $Id$
 #include "../cuda/2d/algo.h"
 
 #include "astra/AstraObjectManager.h"
-#include <boost/lexical_cast.hpp>
 
 using namespace std;
 
@@ -68,7 +67,7 @@ bool CCudaRoiSelectAlgorithm::initialize(const Config& _cfg)
 	// reconstruction data
 	XMLNode node = _cfg.self.getSingleNode("DataId");
 	ASTRA_CONFIG_CHECK(node, "CudaRoiSelect", "No DataId tag specified.");
-	int id = boost::lexical_cast<int>(node.getContent());
+	int id = node.getContentInt();
 	m_pData = dynamic_cast<CFloat32VolumeData2D*>(CData2DManager::getSingleton().get(id));
 	CC.markNodeParsed("DataId");
 
diff --git a/src/CudaSirtAlgorithm.cpp b/src/CudaSirtAlgorithm.cpp
index ab0a418..33e381a 100644
--- a/src/CudaSirtAlgorithm.cpp
+++ b/src/CudaSirtAlgorithm.cpp
@@ -30,7 +30,6 @@ $Id$
 
 #include "astra/CudaSirtAlgorithm.h"
 
-#include <boost/lexical_cast.hpp>
 #include "astra/AstraObjectManager.h"
 
 #include "../cuda/2d/sirt.h"
@@ -77,12 +76,12 @@ bool CCudaSirtAlgorithm::initialize(const Config& _cfg)
 
 	// min/max masks
 	if (_cfg.self.hasOption("MinMaskId")) {
-		int id = boost::lexical_cast<int>(_cfg.self.getOption("MinMaskId"));
+		int id = _cfg.self.getOptionInt("MinMaskId");
 		m_pMinMask = dynamic_cast<CFloat32VolumeData2D*>(CData2DManager::getSingleton().get(id));
 	}
 	CC.markOptionParsed("MinMaskId");
 	if (_cfg.self.hasOption("MaxMaskId")) {
-		int id = boost::lexical_cast<int>(_cfg.self.getOption("MaxMaskId"));
+		int id = _cfg.self.getOptionInt("MaxMaskId");
 		m_pMaxMask = dynamic_cast<CFloat32VolumeData2D*>(CData2DManager::getSingleton().get(id));
 	}
 	CC.markOptionParsed("MaxMaskId");
diff --git a/src/CudaSirtAlgorithm3D.cpp b/src/CudaSirtAlgorithm3D.cpp
index 1fa0da2..605c470 100644
--- a/src/CudaSirtAlgorithm3D.cpp
+++ b/src/CudaSirtAlgorithm3D.cpp
@@ -28,8 +28,6 @@ $Id$
 
 #include "astra/CudaSirtAlgorithm3D.h"
 
-#include <boost/lexical_cast.hpp>
-
 #include "astra/AstraObjectManager.h"
 
 #include "astra/ConeProjectionGeometry3D.h"
diff --git a/src/FanFlatBeamLineKernelProjector2D.cpp b/src/FanFlatBeamLineKernelProjector2D.cpp
index 0681715..fd4195b 100644
--- a/src/FanFlatBeamLineKernelProjector2D.cpp
+++ b/src/FanFlatBeamLineKernelProjector2D.cpp
@@ -30,7 +30,6 @@ $Id$
 
 #include <cmath>
 #include <cstring>
-#include <boost/lexical_cast.hpp>
 
 #include "astra/DataProjectorPolicies.h"
 
diff --git a/src/FanFlatBeamStripKernelProjector2D.cpp b/src/FanFlatBeamStripKernelProjector2D.cpp
index e94d3da..b48beab 100644
--- a/src/FanFlatBeamStripKernelProjector2D.cpp
+++ b/src/FanFlatBeamStripKernelProjector2D.cpp
@@ -29,7 +29,6 @@ $Id$
 #include "astra/FanFlatBeamStripKernelProjector2D.h"
 
 #include <cmath>
-#include <boost/lexical_cast.hpp>
 
 #include "astra/DataProjectorPolicies.h"
 
diff --git a/src/FanFlatProjectionGeometry2D.cpp b/src/FanFlatProjectionGeometry2D.cpp
index 32a19bc..8bee0d6 100644
--- a/src/FanFlatProjectionGeometry2D.cpp
+++ b/src/FanFlatProjectionGeometry2D.cpp
@@ -30,7 +30,6 @@ $Id$
 
 #include <cstring>
 #include <sstream>
-#include <boost/lexical_cast.hpp>
 
 using namespace std;
 
@@ -136,13 +135,13 @@ bool CFanFlatProjectionGeometry2D::initialize(const Config& _cfg)
 	// Required: DistanceOriginDetector
 	XMLNode node = _cfg.self.getSingleNode("DistanceOriginDetector");
 	ASTRA_CONFIG_CHECK(node, "FanFlatProjectionGeometry2D", "No DistanceOriginDetector tag specified.");
-	m_fOriginDetectorDistance = boost::lexical_cast<float32>(node.getContent());
+	m_fOriginDetectorDistance = node.getContentNumerical();
 	CC.markNodeParsed("DistanceOriginDetector");
 
 	// Required: DetectorOriginSource
 	node = _cfg.self.getSingleNode("DistanceOriginSource");
 	ASTRA_CONFIG_CHECK(node, "FanFlatProjectionGeometry2D", "No DistanceOriginSource tag specified.");
-	m_fOriginSourceDistance = boost::lexical_cast<float32>(node.getContent());
+	m_fOriginSourceDistance = node.getContentNumerical();
 	CC.markNodeParsed("DistanceOriginSource");
 
 	// success
diff --git a/src/FanFlatVecProjectionGeometry2D.cpp b/src/FanFlatVecProjectionGeometry2D.cpp
index 4104379..0b76fc5 100644
--- a/src/FanFlatVecProjectionGeometry2D.cpp
+++ b/src/FanFlatVecProjectionGeometry2D.cpp
@@ -30,7 +30,6 @@ $Id$
 
 #include <cstring>
 #include <sstream>
-#include <boost/lexical_cast.hpp>
 
 using namespace std;
 
@@ -125,7 +124,7 @@ bool CFanFlatVecProjectionGeometry2D::initialize(const Config& _cfg)
 	// Required: DetectorCount
 	node = _cfg.self.getSingleNode("DetectorCount");
 	ASTRA_CONFIG_CHECK(node, "FanFlatVecProjectionGeometry3D", "No DetectorRowCount tag specified.");
-	m_iDetectorCount = boost::lexical_cast<int>(node.getContent());
+	m_iDetectorCount = node.getContentInt();
 	CC.markNodeParsed("DetectorCount");
 
 	// Required: Vectors
@@ -235,12 +234,12 @@ Config* CFanFlatVecProjectionGeometry2D::getConfiguration() const
 	std::string vectors = "";
 	for (int i = 0; i < m_iProjectionAngleCount; ++i) {
 		SFanProjection& p = m_pProjectionAngles[i];
-		vectors += boost::lexical_cast<string>(p.fSrcX) + ",";
-		vectors += boost::lexical_cast<string>(p.fSrcY) + ",";
-		vectors += boost::lexical_cast<string>(p.fDetSX + 0.5f * m_iDetectorCount * p.fDetUX) + ",";
-		vectors += boost::lexical_cast<string>(p.fDetSY + 0.5f * m_iDetectorCount * p.fDetUY) + ",";
-		vectors += boost::lexical_cast<string>(p.fDetUX) + ",";
-		vectors += boost::lexical_cast<string>(p.fDetUY);
+		vectors += StringUtil::toString(p.fSrcX) + ",";
+		vectors += StringUtil::toString(p.fSrcY) + ",";
+		vectors += StringUtil::toString(p.fDetSX + 0.5f * m_iDetectorCount * p.fDetUX) + ",";
+		vectors += StringUtil::toString(p.fDetSY + 0.5f * m_iDetectorCount * p.fDetUY) + ",";
+		vectors += StringUtil::toString(p.fDetUX) + ",";
+		vectors += StringUtil::toString(p.fDetUY);
 		if (i < m_iProjectionAngleCount-1) vectors += ';';
 	}
 	cfg->self.addChildNode("Vectors", vectors);
diff --git a/src/FilteredBackProjectionAlgorithm.cpp b/src/FilteredBackProjectionAlgorithm.cpp
index f494d22..c195578 100644
--- a/src/FilteredBackProjectionAlgorithm.cpp
+++ b/src/FilteredBackProjectionAlgorithm.cpp
@@ -28,8 +28,6 @@ $Id$
 
 #include "astra/FilteredBackProjectionAlgorithm.h"
 
-#include <boost/lexical_cast.hpp>
-
 #include <iostream>
 #include <iomanip>
 #include <math.h>
@@ -96,19 +94,19 @@ bool CFilteredBackProjectionAlgorithm::initialize(const Config& _cfg)
 	// projector
 	XMLNode node = _cfg.self.getSingleNode("ProjectorId");
 	ASTRA_CONFIG_CHECK(node, "FilteredBackProjection", "No ProjectorId tag specified.");
-	int id = boost::lexical_cast<int>(node.getContent());
+	int id = node.getContentInt();
 	m_pProjector = CProjector2DManager::getSingleton().get(id);
 
 	// sinogram data
 	node = _cfg.self.getSingleNode("ProjectionDataId");
 	ASTRA_CONFIG_CHECK(node, "FilteredBackProjection", "No ProjectionDataId tag specified.");
-	id = boost::lexical_cast<int>(node.getContent());
+	id = node.getContentInt();
 	m_pSinogram = dynamic_cast<CFloat32ProjectionData2D*>(CData2DManager::getSingleton().get(id));
 
 	// volume data
 	node = _cfg.self.getSingleNode("ReconstructionDataId");
 	ASTRA_CONFIG_CHECK(node, "FilteredBackProjection", "No ReconstructionDataId tag specified.");
-	id = boost::lexical_cast<int>(node.getContent());
+	id = node.getContentInt();
 	m_pReconstruction = dynamic_cast<CFloat32VolumeData2D*>(CData2DManager::getSingleton().get(id));
 
 	node = _cfg.self.getSingleNode("ProjectionIndex");
diff --git a/src/ForwardProjectionAlgorithm.cpp b/src/ForwardProjectionAlgorithm.cpp
index f356824..dcf5790 100644
--- a/src/ForwardProjectionAlgorithm.cpp
+++ b/src/ForwardProjectionAlgorithm.cpp
@@ -28,8 +28,6 @@ $Id$
 
 #include "astra/ForwardProjectionAlgorithm.h"
 
-#include <boost/lexical_cast.hpp>
-
 #include "astra/AstraObjectManager.h"
 #include "astra/DataProjectorPolicies.h"
 
@@ -128,32 +126,32 @@ bool CForwardProjectionAlgorithm::initialize(const Config& _cfg)
 	// projector
 	XMLNode node = _cfg.self.getSingleNode("ProjectorId");
 	ASTRA_CONFIG_CHECK(node, "ForwardProjection", "No ProjectorId tag specified.");
-	int id = boost::lexical_cast<int>(node.getContent());
+	int id = node.getContentInt();
 	m_pProjector = CProjector2DManager::getSingleton().get(id);
 
 	// sinogram data
 	node = _cfg.self.getSingleNode("ProjectionDataId");
 	ASTRA_CONFIG_CHECK(node, "ForwardProjection", "No ProjectionDataId tag specified.");
-	id = boost::lexical_cast<int>(node.getContent());
+	id = node.getContentInt();
 	m_pSinogram = dynamic_cast<CFloat32ProjectionData2D*>(CData2DManager::getSingleton().get(id));
 
 	// volume data
 	node = _cfg.self.getSingleNode("VolumeDataId");
 	ASTRA_CONFIG_CHECK(node, "ForwardProjection", "No VolumeDataId tag specified.");
-	id = boost::lexical_cast<int>(node.getContent());
+	id = node.getContentInt();
 	m_pVolume = dynamic_cast<CFloat32VolumeData2D*>(CData2DManager::getSingleton().get(id));
 	
 	// volume mask
 	if (_cfg.self.hasOption("VolumeMaskId")) {
 		m_bUseVolumeMask = true;
-		id = boost::lexical_cast<int>(_cfg.self.getOption("VolumeMaskId"));
+		id = _cfg.self.getOptionInt("VolumeMaskId");
 		m_pVolumeMask = dynamic_cast<CFloat32VolumeData2D*>(CData2DManager::getSingleton().get(id));
 	}
 
 	// sino mask
 	if (_cfg.self.hasOption("SinogramMaskId")) {
 		m_bUseSinogramMask = true;
-		id = boost::lexical_cast<int>(_cfg.self.getOption("SinogramMaskId"));
+		id = _cfg.self.getOptionInt("SinogramMaskId");
 		m_pSinogramMask = dynamic_cast<CFloat32ProjectionData2D*>(CData2DManager::getSingleton().get(id));
 	}
 
diff --git a/src/ParallelBeamBlobKernelProjector2D.cpp b/src/ParallelBeamBlobKernelProjector2D.cpp
index 4559a48..679d5c6 100644
--- a/src/ParallelBeamBlobKernelProjector2D.cpp
+++ b/src/ParallelBeamBlobKernelProjector2D.cpp
@@ -29,7 +29,6 @@ $Id$
 #include "astra/ParallelBeamBlobKernelProjector2D.h"
 
 #include <cmath>
-#include <boost/lexical_cast.hpp>
 
 #include "astra/DataProjectorPolicies.h"
 
@@ -134,17 +133,17 @@ bool CParallelBeamBlobKernelProjector2D::initialize(const Config& _cfg)
 		// Required: KernelSize
 		XMLNode node2 = node.getSingleNode("KernelSize");
 		ASTRA_CONFIG_CHECK(node2, "BlobProjector", "No Kernel/KernelSize tag specified.");
-		m_fBlobSize = boost::lexical_cast<float32>(node2.getContent());
+		m_fBlobSize = node2.getContentNumerical();
 
 		// Required: SampleRate
 		node2 = node.getSingleNode("SampleRate");
 		ASTRA_CONFIG_CHECK(node2, "BlobProjector", "No Kernel/SampleRate tag specified.");
-		m_fBlobSampleRate = boost::lexical_cast<float32>(node2.getContent());
+		m_fBlobSampleRate = node2.getContentNumerical();
 	
 		// Required: SampleCount
 		node2 = node.getSingleNode("SampleCount");
 		ASTRA_CONFIG_CHECK(node2, "BlobProjector", "No Kernel/SampleCount tag specified.");
-		m_iBlobSampleCount = boost::lexical_cast<int>(node2.getContent());
+		m_iBlobSampleCount = node2.getContentInt();
 	
 		// Required: KernelValues
 		node2 = node.getSingleNode("KernelValues");
diff --git a/src/ParallelBeamLineKernelProjector2D.cpp b/src/ParallelBeamLineKernelProjector2D.cpp
index 5a23413..e4a1bff 100644
--- a/src/ParallelBeamLineKernelProjector2D.cpp
+++ b/src/ParallelBeamLineKernelProjector2D.cpp
@@ -29,7 +29,6 @@ $Id$
 #include "astra/ParallelBeamLineKernelProjector2D.h"
 
 #include <cmath>
-#include <boost/lexical_cast.hpp>
 
 #include "astra/DataProjectorPolicies.h"
 
diff --git a/src/ParallelBeamLinearKernelProjector2D.cpp b/src/ParallelBeamLinearKernelProjector2D.cpp
index a710664..27aa168 100644
--- a/src/ParallelBeamLinearKernelProjector2D.cpp
+++ b/src/ParallelBeamLinearKernelProjector2D.cpp
@@ -29,7 +29,6 @@ $Id$
 #include "astra/ParallelBeamLinearKernelProjector2D.h"
 
 #include <cmath>
-#include <boost/lexical_cast.hpp>
 
 #include "astra/DataProjectorPolicies.h"
 
diff --git a/src/ParallelBeamStripKernelProjector2D.cpp b/src/ParallelBeamStripKernelProjector2D.cpp
index 44c6fec..3f4e7f3 100644
--- a/src/ParallelBeamStripKernelProjector2D.cpp
+++ b/src/ParallelBeamStripKernelProjector2D.cpp
@@ -29,7 +29,6 @@ $Id$
 #include "astra/ParallelBeamStripKernelProjector2D.h"
 
 #include <cmath>
-#include <boost/lexical_cast.hpp>
 
 #include "astra/DataProjectorPolicies.h"
 
diff --git a/src/ParallelProjectionGeometry2D.cpp b/src/ParallelProjectionGeometry2D.cpp
index 7260b83..cc2a129 100644
--- a/src/ParallelProjectionGeometry2D.cpp
+++ b/src/ParallelProjectionGeometry2D.cpp
@@ -27,7 +27,6 @@ $Id$
 */
 
 #include "astra/ParallelProjectionGeometry2D.h"
-#include <boost/lexical_cast.hpp>
 
 #include <cstring>
 
diff --git a/src/ParallelProjectionGeometry3D.cpp b/src/ParallelProjectionGeometry3D.cpp
index 7b64fd9..2f80883 100644
--- a/src/ParallelProjectionGeometry3D.cpp
+++ b/src/ParallelProjectionGeometry3D.cpp
@@ -30,7 +30,6 @@ $Id$
 
 #include "astra/GeometryUtil3D.h"
 
-#include <boost/lexical_cast.hpp>
 #include <cstring>
 
 using namespace std;
diff --git a/src/ParallelVecProjectionGeometry3D.cpp b/src/ParallelVecProjectionGeometry3D.cpp
index d04400b..3172818 100644
--- a/src/ParallelVecProjectionGeometry3D.cpp
+++ b/src/ParallelVecProjectionGeometry3D.cpp
@@ -27,9 +27,9 @@ $Id$
 */
 
 #include "astra/ParallelVecProjectionGeometry3D.h"
+#include "astra/Utilities.h"
 
 #include <cstring>
-#include <boost/lexical_cast.hpp>
 
 using namespace std;
 
@@ -82,13 +82,13 @@ bool CParallelVecProjectionGeometry3D::initialize(const Config& _cfg)
 	// Required: DetectorRowCount
 	node = _cfg.self.getSingleNode("DetectorRowCount");
 	ASTRA_CONFIG_CHECK(node, "ParallelVecProjectionGeometry3D", "No DetectorRowCount tag specified.");
-	m_iDetectorRowCount = boost::lexical_cast<int>(node.getContent());
+	m_iDetectorRowCount = node.getContentInt();
 	CC.markNodeParsed("DetectorRowCount");
 
 	// Required: DetectorCount
 	node = _cfg.self.getSingleNode("DetectorColCount");
 	ASTRA_CONFIG_CHECK(node, "", "No DetectorColCount tag specified.");
-	m_iDetectorColCount = boost::lexical_cast<int>(node.getContent());
+	m_iDetectorColCount = node.getContentInt();
 	m_iDetectorTotCount = m_iDetectorRowCount * m_iDetectorColCount;
 	CC.markNodeParsed("DetectorColCount");
 
@@ -212,18 +212,18 @@ Config* CParallelVecProjectionGeometry3D::getConfiguration() const
 	std::string vectors = "";
 	for (int i = 0; i < m_iProjectionAngleCount; ++i) {
 		SPar3DProjection& p = m_pProjectionAngles[i];
-		vectors += boost::lexical_cast<string>(p.fRayX) + ",";
-		vectors += boost::lexical_cast<string>(p.fRayY) + ",";
-		vectors += boost::lexical_cast<string>(p.fRayZ) + ",";
-		vectors += boost::lexical_cast<string>(p.fDetSX + 0.5f*m_iDetectorRowCount*p.fDetVX + 0.5f*m_iDetectorColCount*p.fDetUX) + ",";
-		vectors += boost::lexical_cast<string>(p.fDetSY + 0.5f*m_iDetectorRowCount*p.fDetVY + 0.5f*m_iDetectorColCount*p.fDetUY) + ",";
-		vectors += boost::lexical_cast<string>(p.fDetSZ + 0.5f*m_iDetectorRowCount*p.fDetVZ + 0.5f*m_iDetectorColCount*p.fDetUZ) + ",";
-		vectors += boost::lexical_cast<string>(p.fDetUX) + ",";
-		vectors += boost::lexical_cast<string>(p.fDetUY) + ",";
-		vectors += boost::lexical_cast<string>(p.fDetUZ) + ",";
-		vectors += boost::lexical_cast<string>(p.fDetVX) + ",";
-		vectors += boost::lexical_cast<string>(p.fDetVY) + ",";
-		vectors += boost::lexical_cast<string>(p.fDetVZ);
+		vectors += StringUtil::toString(p.fRayX) + ",";
+		vectors += StringUtil::toString(p.fRayY) + ",";
+		vectors += StringUtil::toString(p.fRayZ) + ",";
+		vectors += StringUtil::toString(p.fDetSX + 0.5f*m_iDetectorRowCount*p.fDetVX + 0.5f*m_iDetectorColCount*p.fDetUX) + ",";
+		vectors += StringUtil::toString(p.fDetSY + 0.5f*m_iDetectorRowCount*p.fDetVY + 0.5f*m_iDetectorColCount*p.fDetUY) + ",";
+		vectors += StringUtil::toString(p.fDetSZ + 0.5f*m_iDetectorRowCount*p.fDetVZ + 0.5f*m_iDetectorColCount*p.fDetUZ) + ",";
+		vectors += StringUtil::toString(p.fDetUX) + ",";
+		vectors += StringUtil::toString(p.fDetUY) + ",";
+		vectors += StringUtil::toString(p.fDetUZ) + ",";
+		vectors += StringUtil::toString(p.fDetVX) + ",";
+		vectors += StringUtil::toString(p.fDetVY) + ",";
+		vectors += StringUtil::toString(p.fDetVZ);
 		if (i < m_iProjectionAngleCount-1) vectors += ';';
 	}
 	cfg->self.addChildNode("Vectors", vectors);
diff --git a/src/PluginAlgorithm.cpp b/src/PluginAlgorithm.cpp
index 8f7dfc5..9fc511a 100644
--- a/src/PluginAlgorithm.cpp
+++ b/src/PluginAlgorithm.cpp
@@ -30,9 +30,9 @@ $Id$
 
 #include "astra/PluginAlgorithm.h"
 #include "astra/Logging.h"
+#include "astra/Utilities.h"
 #include <boost/algorithm/string.hpp>
 #include <boost/algorithm/string/split.hpp>
-#include <boost/lexical_cast.hpp>
 #include <iostream>
 #include <fstream>
 #include <string>
@@ -338,7 +338,7 @@ PyObject* stringToPythonValue(std::string str){
             boost::split(row, rows[i], boost::is_any_of(","));
             PyObject *rowlist = PyList_New(row.size());
             for(unsigned int j=0;j<row.size();j++){
-                PyList_SetItem(rowlist, j, PyFloat_FromDouble(boost::lexical_cast<double>(row[j])));
+                PyList_SetItem(rowlist, j, PyFloat_FromDouble(StringUtil::stringToDouble(row[j])));
             }
             PyList_SetItem(mat, i, rowlist);
         }
@@ -349,16 +349,16 @@ PyObject* stringToPythonValue(std::string str){
         boost::split(vec, str, boost::is_any_of(","));
         PyObject *veclist = PyList_New(vec.size());
         for(unsigned int i=0;i<vec.size();i++){
-            PyList_SetItem(veclist, i, PyFloat_FromDouble(boost::lexical_cast<double>(vec[i])));
+            PyList_SetItem(veclist, i, PyFloat_FromDouble(StringUtil::stringToDouble(vec[i])));
         }
         return veclist;
     }
     try{
-        return PyLong_FromLong(boost::lexical_cast<long>(str));
-    }catch(const boost::bad_lexical_cast &){
+        return PyLong_FromLong(StringUtil::stringToInt(str));
+    }catch(const StringUtil::bad_cast &){
         try{
-            return PyFloat_FromDouble(boost::lexical_cast<double>(str));
-        }catch(const boost::bad_lexical_cast &){
+            return PyFloat_FromDouble(StringUtil::stringToDouble(str));
+        }catch(const StringUtil::bad_cast &){
             return pyStringFromString(str);
         }
     }
diff --git a/src/ProjectionGeometry2D.cpp b/src/ProjectionGeometry2D.cpp
index b89605b..8ce06dc 100644
--- a/src/ProjectionGeometry2D.cpp
+++ b/src/ProjectionGeometry2D.cpp
@@ -28,8 +28,6 @@ $Id$
 
 #include "astra/ProjectionGeometry2D.h"
 
-#include <boost/lexical_cast.hpp>
-
 using namespace std;
 
 namespace astra
@@ -126,13 +124,13 @@ bool CProjectionGeometry2D::initialize(const Config& _cfg)
 	// Required: DetectorWidth
 	XMLNode node = _cfg.self.getSingleNode("DetectorWidth");
 	ASTRA_CONFIG_CHECK(node, "ProjectionGeometry2D", "No DetectorWidth tag specified.");
-	m_fDetectorWidth = boost::lexical_cast<float32>(node.getContent());
+	m_fDetectorWidth = node.getContentNumerical();
 	CC.markNodeParsed("DetectorWidth");
 
 	// Required: DetectorCount
 	node = _cfg.self.getSingleNode("DetectorCount");
 	ASTRA_CONFIG_CHECK(node, "ProjectionGeometry2D", "No DetectorCount tag specified.");
-	m_iDetectorCount = boost::lexical_cast<int>(node.getContent());
+	m_iDetectorCount = node.getContentInt();
 	CC.markNodeParsed("DetectorCount");
 
 	// Required: ProjectionAngles
diff --git a/src/ProjectionGeometry3D.cpp b/src/ProjectionGeometry3D.cpp
index ef0246c..281db7c 100644
--- a/src/ProjectionGeometry3D.cpp
+++ b/src/ProjectionGeometry3D.cpp
@@ -28,8 +28,6 @@ $Id$
 
 #include "astra/ProjectionGeometry3D.h"
 
-#include <boost/lexical_cast.hpp>
-
 using namespace std;
 
 namespace astra
@@ -151,25 +149,25 @@ bool CProjectionGeometry3D::initialize(const Config& _cfg)
 	// Required: DetectorWidth
 	XMLNode node = _cfg.self.getSingleNode("DetectorSpacingX");
 	ASTRA_CONFIG_CHECK(node, "ProjectionGeometry3D", "No DetectorSpacingX tag specified.");
-	m_fDetectorSpacingX = boost::lexical_cast<float32>(node.getContent());
+	m_fDetectorSpacingX = node.getContentNumerical();
 	CC.markNodeParsed("DetectorSpacingX");
 
 	// Required: DetectorHeight
 	node = _cfg.self.getSingleNode("DetectorSpacingY");
 	ASTRA_CONFIG_CHECK(node, "ProjectionGeometry3D", "No DetectorSpacingY tag specified.");
-	m_fDetectorSpacingY = boost::lexical_cast<float32>(node.getContent());
+	m_fDetectorSpacingY = node.getContentNumerical();
 	CC.markNodeParsed("DetectorSpacingY");
 
 	// Required: DetectorRowCount
 	node = _cfg.self.getSingleNode("DetectorRowCount");
 	ASTRA_CONFIG_CHECK(node, "ProjectionGeometry3D", "No DetectorRowCount tag specified.");
-	m_iDetectorRowCount = boost::lexical_cast<int>(node.getContent());
+	m_iDetectorRowCount = node.getContentInt();
 	CC.markNodeParsed("DetectorRowCount");
 
 	// Required: DetectorCount
 	node = _cfg.self.getSingleNode("DetectorColCount");
 	ASTRA_CONFIG_CHECK(node, "ProjectionGeometry3D", "No DetectorColCount tag specified.");
-	m_iDetectorColCount = boost::lexical_cast<int>(node.getContent());
+	m_iDetectorColCount = node.getContentInt();
 	m_iDetectorTotCount = m_iDetectorRowCount * m_iDetectorColCount;
 	CC.markNodeParsed("DetectorColCount");
 
diff --git a/src/ReconstructionAlgorithm2D.cpp b/src/ReconstructionAlgorithm2D.cpp
index 4575ff7..1c6d855 100644
--- a/src/ReconstructionAlgorithm2D.cpp
+++ b/src/ReconstructionAlgorithm2D.cpp
@@ -28,8 +28,6 @@ $Id$
 
 #include "astra/ReconstructionAlgorithm2D.h"
 
-#include <boost/lexical_cast.hpp>
-
 #include "astra/AstraObjectManager.h"
 
 using namespace std;
@@ -90,7 +88,7 @@ bool CReconstructionAlgorithm2D::initialize(const Config& _cfg)
 	}
 	int id;
 	if (node) {
-		id = boost::lexical_cast<int>(node.getContent());
+		id = node.getContentInt();
 		m_pProjector = CProjector2DManager::getSingleton().get(id);
 	} else {
 		m_pProjector = 0;
@@ -100,21 +98,21 @@ bool CReconstructionAlgorithm2D::initialize(const Config& _cfg)
 	// sinogram data
 	node = _cfg.self.getSingleNode("ProjectionDataId");
 	ASTRA_CONFIG_CHECK(node, "Reconstruction2D", "No ProjectionDataId tag specified.");
-	id = boost::lexical_cast<int>(node.getContent());
+	id = node.getContentInt();
 	m_pSinogram = dynamic_cast<CFloat32ProjectionData2D*>(CData2DManager::getSingleton().get(id));
 	CC.markNodeParsed("ProjectionDataId");
 
 	// reconstruction data
 	node = _cfg.self.getSingleNode("ReconstructionDataId");
 	ASTRA_CONFIG_CHECK(node, "Reconstruction2D", "No ReconstructionDataId tag specified.");
-	id = boost::lexical_cast<int>(node.getContent());
+	id = node.getContentInt();
 	m_pReconstruction = dynamic_cast<CFloat32VolumeData2D*>(CData2DManager::getSingleton().get(id));
 	CC.markNodeParsed("ReconstructionDataId");
 
 	// fixed mask
 	if (_cfg.self.hasOption("ReconstructionMaskId")) {
 		m_bUseReconstructionMask = true;
-		id = boost::lexical_cast<int>(_cfg.self.getOption("ReconstructionMaskId"));
+		id = _cfg.self.getOptionInt("ReconstructionMaskId");
 		m_pReconstructionMask = dynamic_cast<CFloat32VolumeData2D*>(CData2DManager::getSingleton().get(id));
 		ASTRA_CONFIG_CHECK(m_pReconstructionMask, "Reconstruction2D", "Invalid ReconstructionMaskId.");
 	}
@@ -123,7 +121,7 @@ bool CReconstructionAlgorithm2D::initialize(const Config& _cfg)
 	// fixed mask
 	if (_cfg.self.hasOption("SinogramMaskId")) {
 		m_bUseSinogramMask = true;
-		id = boost::lexical_cast<int>(_cfg.self.getOption("SinogramMaskId"));
+		id = _cfg.self.getOptionInt("SinogramMaskId");
 		m_pSinogramMask = dynamic_cast<CFloat32ProjectionData2D*>(CData2DManager::getSingleton().get(id));
 		ASTRA_CONFIG_CHECK(m_pSinogramMask, "Reconstruction2D", "Invalid SinogramMaskId.");
 	}
diff --git a/src/ReconstructionAlgorithm3D.cpp b/src/ReconstructionAlgorithm3D.cpp
index 13d4b07..55f1031 100644
--- a/src/ReconstructionAlgorithm3D.cpp
+++ b/src/ReconstructionAlgorithm3D.cpp
@@ -28,8 +28,6 @@ $Id$
 
 #include "astra/ReconstructionAlgorithm3D.h"
 
-#include <boost/lexical_cast.hpp>
-
 #include "astra/AstraObjectManager.h"
 
 using namespace std;
@@ -111,7 +109,7 @@ bool CReconstructionAlgorithm3D::initialize(const Config& _cfg)
 	node = _cfg.self.getSingleNode("ProjectorId");
 	m_pProjector = 0;
 	if (node) {
-		id = boost::lexical_cast<int>(node.getContent());
+		id = node.getContentInt();
 		m_pProjector = CProjector3DManager::getSingleton().get(id);
 		if (!m_pProjector) {
 			// TODO: Report
@@ -122,21 +120,21 @@ bool CReconstructionAlgorithm3D::initialize(const Config& _cfg)
 	// sinogram data
 	node = _cfg.self.getSingleNode("ProjectionDataId");
 	ASTRA_CONFIG_CHECK(node, "Reconstruction3D", "No ProjectionDataId tag specified.");
-	id = boost::lexical_cast<int>(node.getContent());
+	id = node.getContentInt();
 	m_pSinogram = dynamic_cast<CFloat32ProjectionData3D*>(CData3DManager::getSingleton().get(id));
 	CC.markNodeParsed("ProjectionDataId");
 
 	// reconstruction data
 	node = _cfg.self.getSingleNode("ReconstructionDataId");
 	ASTRA_CONFIG_CHECK(node, "Reconstruction3D", "No ReconstructionDataId tag specified.");
-	id = boost::lexical_cast<int>(node.getContent());
+	id = node.getContentInt();
 	m_pReconstruction = dynamic_cast<CFloat32VolumeData3D*>(CData3DManager::getSingleton().get(id));
 	CC.markNodeParsed("ReconstructionDataId");
 
 	// fixed mask
 	if (_cfg.self.hasOption("ReconstructionMaskId")) {
 		m_bUseReconstructionMask = true;
-		id = boost::lexical_cast<int>(_cfg.self.getOption("ReconstructionMaskId"));
+		id = _cfg.self.getOptionInt("ReconstructionMaskId");
 		m_pReconstructionMask = dynamic_cast<CFloat32VolumeData3D*>(CData3DManager::getSingleton().get(id));
 	}
 	CC.markOptionParsed("ReconstructionMaskId");
@@ -144,7 +142,7 @@ bool CReconstructionAlgorithm3D::initialize(const Config& _cfg)
 	// fixed mask
 	if (_cfg.self.hasOption("SinogramMaskId")) {
 		m_bUseSinogramMask = true;
-		id = boost::lexical_cast<int>(_cfg.self.getOption("SinogramMaskId"));
+		id = _cfg.self.getOptionInt("SinogramMaskId");
 		m_pSinogramMask = dynamic_cast<CFloat32ProjectionData3D*>(CData3DManager::getSingleton().get(id));
 	}
 	CC.markOptionParsed("SinogramMaskId");
diff --git a/src/ReconstructionAlgorithmMultiSlice2D.cpp b/src/ReconstructionAlgorithmMultiSlice2D.cpp
index fe64c86..39c337f 100644
--- a/src/ReconstructionAlgorithmMultiSlice2D.cpp
+++ b/src/ReconstructionAlgorithmMultiSlice2D.cpp
@@ -28,8 +28,6 @@ $Id$
 
 #include "astra/ReconstructionAlgorithmMultiSlice2D.h"
 
-#include <boost/lexical_cast.hpp>
-
 #include "astra/AstraObjectManager.h"
 
 using namespace std;
@@ -96,7 +94,7 @@ bool CReconstructionAlgorithmMultiSlice2D::initialize(const Config& _cfg)
 	// projector
 	XMLNode* node = _cfg.self->getSingleNode("ProjectorId");
 	ASTRA_CONFIG_CHECK(node, "Reconstruction2D", "No ProjectorId tag specified.");
-	int id = boost::lexical_cast<int>(node->getContent());
+	int id = node->getContentInt();
 	m_pProjector = CProjector2DManager::getSingleton().get(id);
 	ASTRA_DELETE(node);
 	CC.markNodeParsed("ProjectorId");
@@ -125,7 +123,7 @@ bool CReconstructionAlgorithmMultiSlice2D::initialize(const Config& _cfg)
 	// reconstruction masks
 	if (_cfg.self->hasOption("ReconstructionMaskId")) {
 		m_bUseReconstructionMask = true;
-		id = boost::lexical_cast<int>(_cfg.self->getOption("ReconstructionMaskId"));
+		id = _cfg.self->getOptionInt("ReconstructionMaskId");
 		m_pReconstructionMask = dynamic_cast<CFloat32VolumeData2D*>(CData2DManager::getSingleton().get(id));
 	}
 	CC.markOptionParsed("ReconstructionMaskId");
@@ -133,7 +131,7 @@ bool CReconstructionAlgorithmMultiSlice2D::initialize(const Config& _cfg)
 	// sinogram masks
 	if (_cfg.self->hasOption("SinogramMaskId")) {
 		m_bUseSinogramMask = true;
-		id = boost::lexical_cast<int>(_cfg.self->getOption("SinogramMaskId"));
+		id = _cfg.self->getOptionInt("SinogramMaskId");
 		m_pSinogramMask = dynamic_cast<CFloat32ProjectionData2D*>(CData2DManager::getSingleton().get(id));
 	}
 	CC.markOptionParsed("SinogramMaskId");
diff --git a/src/SartAlgorithm.cpp b/src/SartAlgorithm.cpp
index e4dc5c7..9346160 100644
--- a/src/SartAlgorithm.cpp
+++ b/src/SartAlgorithm.cpp
@@ -28,8 +28,6 @@ $Id$
 
 #include "astra/SartAlgorithm.h"
 
-#include <boost/lexical_cast.hpp>
-
 #include "astra/AstraObjectManager.h"
 #include "astra/DataProjectorPolicies.h"
 
diff --git a/src/SirtAlgorithm.cpp b/src/SirtAlgorithm.cpp
index ae3b3bc..d9f3a65 100644
--- a/src/SirtAlgorithm.cpp
+++ b/src/SirtAlgorithm.cpp
@@ -28,8 +28,6 @@ $Id$
 
 #include "astra/SirtAlgorithm.h"
 
-#include <boost/lexical_cast.hpp>
-
 #include "astra/AstraObjectManager.h"
 #include "astra/DataProjectorPolicies.h"
 
diff --git a/src/SparseMatrixProjectionGeometry2D.cpp b/src/SparseMatrixProjectionGeometry2D.cpp
index 073720f..358c992 100644
--- a/src/SparseMatrixProjectionGeometry2D.cpp
+++ b/src/SparseMatrixProjectionGeometry2D.cpp
@@ -28,7 +28,6 @@ $Id$
 
 #include "astra/SparseMatrixProjectionGeometry2D.h"
 
-#include <boost/lexical_cast.hpp>
 #include "astra/AstraObjectManager.h"
 
 
@@ -100,7 +99,7 @@ bool CSparseMatrixProjectionGeometry2D::initialize(const Config& _cfg)
 	// get matrix
 	XMLNode node = _cfg.self.getSingleNode("MatrixID");
 	ASTRA_CONFIG_CHECK(node, "SparseMatrixProjectionGeometry2D", "No MatrixID tag specified.");
-	int id = boost::lexical_cast<int>(node.getContent());
+	int id = node.getContentInt();
 	m_pMatrix = CMatrixManager::getSingleton().get(id);
 	CC.markNodeParsed("MatrixID");
 
diff --git a/src/SparseMatrixProjector2D.cpp b/src/SparseMatrixProjector2D.cpp
index bc2e974..be7e069 100644
--- a/src/SparseMatrixProjector2D.cpp
+++ b/src/SparseMatrixProjector2D.cpp
@@ -29,7 +29,6 @@ $Id$
 #include "astra/SparseMatrixProjector2D.h"
 
 #include <cmath>
-#include <boost/lexical_cast.hpp>
 
 #include "astra/DataProjectorPolicies.h"
 
diff --git a/src/Utilities.cpp b/src/Utilities.cpp
index cb54e93..4b80503 100644
--- a/src/Utilities.cpp
+++ b/src/Utilities.cpp
@@ -28,4 +28,99 @@ $Id$
 
 #include "astra/Utilities.h"
 
+#include <boost/algorithm/string.hpp>
+#include <boost/algorithm/string/split.hpp>
+#include <boost/algorithm/string/classification.hpp>
 
+#include <sstream>
+#include <locale>
+#include <iomanip>
+
+namespace astra {
+
+namespace StringUtil {
+
+int stringToInt(const std::string& s)
+{
+	double i;
+	std::istringstream iss(s);
+	iss.imbue(std::locale::classic());
+	iss >> i;
+	if (iss.fail() || !iss.eof())
+		throw bad_cast();
+	return i;
+
+}
+
+float stringToFloat(const std::string& s)
+{
+	return (float)stringToDouble(s);
+}
+
+double stringToDouble(const std::string& s)
+{
+	double f;
+	std::istringstream iss(s);
+	iss.imbue(std::locale::classic());
+	iss >> f;
+	if (iss.fail() || !iss.eof())
+		throw bad_cast();
+	return f;
+}
+
+template<> float stringTo(const std::string& s) { return stringToFloat(s); }
+template<> double stringTo(const std::string& s) { return stringToDouble(s); }
+
+std::vector<float> stringToFloatVector(const std::string &s)
+{
+	return stringToVector<float>(s);
+}
+
+std::vector<double> stringToDoubleVector(const std::string &s)
+{
+	return stringToVector<double>(s);
+}
+
+template<typename T>
+std::vector<T> stringToVector(const std::string& s)
+{
+	// split
+	std::vector<std::string> items;
+	boost::split(items, s, boost::is_any_of(",;"));
+
+	// init list
+	std::vector<T> out;
+	out.resize(items.size());
+
+	// loop elements
+	for (unsigned int i = 0; i < items.size(); i++) {
+		out[i] = stringTo<T>(items[i]);
+	}
+	return out;
+}
+
+
+std::string floatToString(float f)
+{
+	std::ostringstream s;
+	s.imbue(std::locale::classic());
+	s << std::setprecision(9) << f;
+	return s.str();
+}
+
+std::string doubleToString(double f)
+{
+	std::ostringstream s;
+	s.imbue(std::locale::classic());
+	s << std::setprecision(17) << f;
+	return s.str();
+}
+
+
+template<> std::string toString(float f) { return floatToString(f); }
+template<> std::string toString(double f) { return doubleToString(f); }
+
+
+}
+
+}
diff --git a/src/VolumeGeometry2D.cpp b/src/VolumeGeometry2D.cpp
index 6eea1b2..9d74e47 100644
--- a/src/VolumeGeometry2D.cpp
+++ b/src/VolumeGeometry2D.cpp
@@ -28,7 +28,6 @@ $Id$
 
 #include "astra/VolumeGeometry2D.h"
 
-#include <boost/lexical_cast.hpp>
 #include <cmath>
 
 namespace astra
@@ -166,13 +165,13 @@ bool CVolumeGeometry2D::initialize(const Config& _cfg)
 	// Required: GridColCount
 	XMLNode node = _cfg.self.getSingleNode("GridColCount");
 	ASTRA_CONFIG_CHECK(node, "ReconstructionGeometry2D", "No GridColCount tag specified.");
-	m_iGridColCount = boost::lexical_cast<int>(node.getContent());
+	m_iGridColCount = node.getContentInt();
 	CC.markNodeParsed("GridColCount");
 
 	// Required: GridRowCount
 	node = _cfg.self.getSingleNode("GridRowCount");
 	ASTRA_CONFIG_CHECK(node, "ReconstructionGeometry2D", "No GridRowCount tag specified.");
-	m_iGridRowCount = boost::lexical_cast<int>(node.getContent());
+	m_iGridRowCount = node.getContentInt();
 	CC.markNodeParsed("GridRowCount");
 
 	// Optional: Window minima and maxima
diff --git a/src/VolumeGeometry3D.cpp b/src/VolumeGeometry3D.cpp
index 3de146f..5d72c24 100644
--- a/src/VolumeGeometry3D.cpp
+++ b/src/VolumeGeometry3D.cpp
@@ -28,8 +28,6 @@ $Id$
 
 #include "astra/VolumeGeometry3D.h"
 
-#include <boost/lexical_cast.hpp>
-
 namespace astra
 {
 
@@ -196,19 +194,19 @@ bool CVolumeGeometry3D::initialize(const Config& _cfg)
 	// Required: GridColCount
 	XMLNode node = _cfg.self.getSingleNode("GridColCount");
 	ASTRA_CONFIG_CHECK(node, "ReconstructionGeometry2D", "No GridColCount tag specified.");
-	m_iGridColCount = boost::lexical_cast<int>(node.getContent());
+	m_iGridColCount = node.getContentInt();
 	CC.markNodeParsed("GridColCount");
 
 	// Required: GridRowCount
 	node = _cfg.self.getSingleNode("GridRowCount");
 	ASTRA_CONFIG_CHECK(node, "ReconstructionGeometry2D", "No GridRowCount tag specified.");
-	m_iGridRowCount = boost::lexical_cast<int>(node.getContent());
+	m_iGridRowCount = node.getContentInt();
 	CC.markNodeParsed("GridRowCount");
 
 	// Required: GridRowCount
 	node = _cfg.self.getSingleNode("GridSliceCount");
 	ASTRA_CONFIG_CHECK(node, "ReconstructionGeometry2D", "No GridSliceCount tag specified.");
-	m_iGridSliceCount = boost::lexical_cast<int>(node.getContent());
+	m_iGridSliceCount = node.getContentInt();
 	CC.markNodeParsed("GridSliceCount");
 
 	// Optional: Window minima and maxima
diff --git a/src/XMLNode.cpp b/src/XMLNode.cpp
index 0ec701f..40a9b22 100644
--- a/src/XMLNode.cpp
+++ b/src/XMLNode.cpp
@@ -31,12 +31,6 @@ $Id$
 #include "rapidxml/rapidxml.hpp"
 #include "rapidxml/rapidxml_print.hpp"
 
-#include <boost/lexical_cast.hpp>
-#include <boost/algorithm/string.hpp>
-#include <boost/algorithm/string/split.hpp>
-#include <boost/algorithm/string/classification.hpp>
-
-
 
 using namespace rapidxml;
 using namespace astra;
@@ -138,8 +132,13 @@ string XMLNode::getContent() const
 // Get node content - NUMERICAL
 float32 XMLNode::getContentNumerical() const
 {
-	return boost::lexical_cast<float32>(getContent());
+	return StringUtil::stringToFloat(getContent());
 }
+int XMLNode::getContentInt() const
+{
+	return StringUtil::stringToInt(getContent());
+}
+
 
 //-----------------------------------------------------------------------------	
 // Get node content - BOOLEAN
@@ -154,7 +153,7 @@ bool XMLNode::getContentBool() const
 vector<string> XMLNode::getContentArray() const
 {
 	// get listsize
-	int iSize = boost::lexical_cast<int>(getAttribute("listsize"));
+	int iSize = StringUtil::stringToInt(getAttribute("listsize"));
 	// create result array
 	vector<string> res(iSize);
 	// loop all list item nodes
@@ -175,40 +174,12 @@ vector<string> XMLNode::getContentArray() const
 // NB: A 2D matrix is returned as a linear list
 vector<float32> XMLNode::getContentNumericalArray() const
 {
-	string input = getContent();
-
-	// split
-	std::vector<std::string> items;
-	boost::split(items, input, boost::is_any_of(",;"));
-
-	// init list
-	vector<float32> out;
-	out.resize(items.size());
-
-	// loop elements
-	for (unsigned int i = 0; i < items.size(); i++) {
-		out[i] = boost::lexical_cast<float32>(items[i]);
-	}
-	return out;
+	return StringUtil::stringToFloatVector(getContent());
 }
 
 vector<double> XMLNode::getContentNumericalArrayDouble() const
 {
-	string input = getContent();
-
-	// split
-	std::vector<std::string> items;
-	boost::split(items, input, boost::is_any_of(",;"));
-
-	// init list
-	vector<double> out;
-	out.resize(items.size());
-
-	// loop elements
-	for (unsigned int i = 0; i < items.size(); i++) {
-		out[i] = boost::lexical_cast<double>(items[i]);
-	}
-	return out;
+	return StringUtil::stringToDoubleVector(getContent());
 }
 
 //-----------------------------------------------------------------------------	
@@ -235,14 +206,20 @@ string XMLNode::getAttribute(string _sName, string _sDefaultValue) const
 float32 XMLNode::getAttributeNumerical(string _sName, float32 _fDefaultValue) const
 {
 	if (!hasAttribute(_sName)) return _fDefaultValue;
-	return boost::lexical_cast<float32>(getAttribute(_sName));
+	return StringUtil::stringToFloat(getAttribute(_sName));
 }
 double XMLNode::getAttributeNumericalDouble(string _sName, double _fDefaultValue) const
 {
 	if (!hasAttribute(_sName)) return _fDefaultValue;
-	return boost::lexical_cast<double>(getAttribute(_sName));
+	return StringUtil::stringToDouble(getAttribute(_sName));
+}
+int XMLNode::getAttributeInt(string _sName, int _iDefaultValue) const
+{
+	if (!hasAttribute(_sName)) return _iDefaultValue;
+	return StringUtil::stringToInt(getAttribute(_sName));
 }
 
+
 //-----------------------------------------------------------------------------	
 // Get attribute - BOOLEAN
 bool XMLNode::getAttributeBool(string _sName, bool _bDefaultValue) const
@@ -287,9 +264,15 @@ string XMLNode::getOption(string _sKey, string _sDefaultValue) const
 float32 XMLNode::getOptionNumerical(string _sKey, float32 _fDefaultValue) const
 {
 	if (!hasOption(_sKey)) return _fDefaultValue;
-	return boost::lexical_cast<float32>(getOption(_sKey));
+	return StringUtil::stringToFloat(getOption(_sKey));
+}
+int XMLNode::getOptionInt(string _sKey, int _iDefaultValue) const
+{
+	if (!hasOption(_sKey)) return _iDefaultValue;
+	return StringUtil::stringToInt(getOption(_sKey));
 }
 
+
 //-----------------------------------------------------------------------------	
 // Get option - BOOL
 bool XMLNode::getOptionBool(string _sKey, bool _bDefaultValue) const
@@ -386,7 +369,7 @@ void XMLNode::setContent(string _sText)
 // Set content - FLOAT
 void XMLNode::setContent(float32 _fValue) 
 {
-	setContent(boost::lexical_cast<string>(_fValue));
+	setContent(StringUtil::floatToString(_fValue));
 }
 
 //-----------------------------------------------------------------------------	
@@ -394,9 +377,9 @@ void XMLNode::setContent(float32 _fValue)
 
 template<typename T>
 static std::string setContentList_internal(T* pfList, int _iSize) {
-	std::string str = (_iSize > 0) ? boost::lexical_cast<std::string>(pfList[0]) : "";
+	std::string str = (_iSize > 0) ? StringUtil::toString(pfList[0]) : "";
 	for (int i = 1; i < _iSize; i++) {
-		str += "," + boost::lexical_cast<std::string>(pfList[i]);
+		str += "," + StringUtil::toString(pfList[i]);
 	}
 	return str;
 }
@@ -431,9 +414,9 @@ static std::string setContentMatrix_internal(T* _pfMatrix, int _iWidth, int _iHe
 
 	for (int y = 0; y < _iHeight; ++y) {
 		if (_iWidth > 0)
-			str += boost::lexical_cast<std::string>(_pfMatrix[0*s1 + y*s2]);
+			str += StringUtil::toString(_pfMatrix[0*s1 + y*s2]);
 			for (int x = 1; x < _iWidth; x++)
-				str += "," + boost::lexical_cast<std::string>(_pfMatrix[x*s1 + y*s2]);
+				str += "," + StringUtil::toString(_pfMatrix[x*s1 + y*s2]);
 
 		if (y != _iHeight-1)
 			str += ";";
@@ -468,7 +451,7 @@ void XMLNode::addAttribute(string _sName, string _sText)
 // Add attribute - FLOAT
 void XMLNode::addAttribute(string _sName, float32 _fValue) 
 {
-	addAttribute(_sName, boost::lexical_cast<string>(_fValue)); 
+	addAttribute(_sName, StringUtil::floatToString(_fValue));
 }
 
 //-----------------------------------------------------------------------------	
-- 
cgit v1.2.3


From 86ed34e9a5fa408c9338cd5c2871f7f7953806b7 Mon Sep 17 00:00:00 2001
From: Willem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>
Date: Thu, 14 Jan 2016 10:39:43 +0100
Subject: Fix projections parallel to XZ or YZ planes

The early-cutoff optimization in CompositeGeometryManager was
failing to properly handle +/-Inf.
---
 src/CompositeGeometryManager.cpp | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'src')

diff --git a/src/CompositeGeometryManager.cpp b/src/CompositeGeometryManager.cpp
index 9be4797..41f6319 100644
--- a/src/CompositeGeometryManager.cpp
+++ b/src/CompositeGeometryManager.cpp
@@ -247,6 +247,18 @@ CCompositeGeometryManager::CPart* CCompositeGeometryManager::CVolumePart::reduce
 
 	//ASTRA_DEBUG("coord extent: %f - %f", zmin, zmax);
 
+	// Clip both zmin and zmax to get rid of extreme (or infinite) values
+	// NB: When individual pz values are +/-Inf, the sign is determined
+	// by ray direction and on which side of the face the ray passes.
+	if (zmin < pGeom->getWindowMinZ() - 2*pixz)
+		zmin = pGeom->getWindowMinZ() - 2*pixz;
+	if (zmin > pGeom->getWindowMaxZ() + 2*pixz)
+		zmin = pGeom->getWindowMaxZ() + 2*pixz;
+	if (zmax < pGeom->getWindowMinZ() - 2*pixz)
+		zmax = pGeom->getWindowMinZ() - 2*pixz;
+	if (zmax > pGeom->getWindowMaxZ() + 2*pixz)
+		zmax = pGeom->getWindowMaxZ() + 2*pixz;
+
 	zmin = (zmin - pixz - pGeom->getWindowMinZ()) / pixz;
 	zmax = (zmax + pixz - pGeom->getWindowMinZ()) / pixz;
 
-- 
cgit v1.2.3


From 673bf2fd8fd40053ac6baba229953f2e58b7cb42 Mon Sep 17 00:00:00 2001
From: Willem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>
Date: Wed, 6 Jan 2016 14:27:13 +0100
Subject: Fix whitespace

---
 src/CompositeGeometryManager.cpp | 42 ++++++++++++++++++++--------------------
 1 file changed, 21 insertions(+), 21 deletions(-)

(limited to 'src')

diff --git a/src/CompositeGeometryManager.cpp b/src/CompositeGeometryManager.cpp
index 41f6319..eed06c4 100644
--- a/src/CompositeGeometryManager.cpp
+++ b/src/CompositeGeometryManager.cpp
@@ -305,37 +305,37 @@ CCompositeGeometryManager::CPart* CCompositeGeometryManager::CVolumePart::reduce
 
 
 static size_t ceildiv(size_t a, size_t b) {
-    return (a + b - 1) / b;
+	return (a + b - 1) / b;
 }
 
 static size_t computeVerticalSplit(size_t maxBlock, int div, size_t sliceCount)
 {
-    size_t blockSize = maxBlock;
-    size_t blockCount = ceildiv(sliceCount, blockSize);
+	size_t blockSize = maxBlock;
+	size_t blockCount = ceildiv(sliceCount, blockSize);
 
-    // Increase number of blocks to be divisible by div
-    size_t divCount = div * ceildiv(blockCount, div);
+	// Increase number of blocks to be divisible by div
+	size_t divCount = div * ceildiv(blockCount, div);
 
-    // If divCount is above sqrt(number of slices), then
-    // we can't guarantee divisibility by div, but let's try anyway
-    if (ceildiv(sliceCount, ceildiv(sliceCount, divCount)) % div == 0) {
-        blockCount = divCount;
-    } else {
-        // If divisibility isn't achievable, we may want to optimize
-        // differently.
-        // TODO: Figure out how to model and optimize this.
-    }
+	// If divCount is above sqrt(number of slices), then
+	// we can't guarantee divisibility by div, but let's try anyway
+	if (ceildiv(sliceCount, ceildiv(sliceCount, divCount)) % div == 0) {
+		blockCount = divCount;
+	} else {
+		// If divisibility isn't achievable, we may want to optimize
+		// differently.
+		// TODO: Figure out how to model and optimize this.
+	}
 
-    // Final adjustment to make blocks more evenly sized
-    // (This can't make the blocks larger)
-    blockSize = ceildiv(sliceCount, blockCount); 
+	// Final adjustment to make blocks more evenly sized
+	// (This can't make the blocks larger)
+	blockSize = ceildiv(sliceCount, blockCount);
 
-    ASTRA_DEBUG("%ld %ld -> %ld * %ld", sliceCount, maxBlock, blockCount, blockSize);
+	ASTRA_DEBUG("%ld %ld -> %ld * %ld", sliceCount, maxBlock, blockCount, blockSize);
 
-    assert(blockSize <= maxBlock);
-    assert((divCount * divCount > sliceCount) || (blockCount % div) == 0);
+	assert(blockSize <= maxBlock);
+	assert((divCount * divCount > sliceCount) || (blockCount % div) == 0);
 
-    return blockSize;
+	return blockSize;
 }
 
 template<class V, class P>
-- 
cgit v1.2.3


From 8e68248bd587456325101911a927b206b5450b31 Mon Sep 17 00:00:00 2001
From: Willem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>
Date: Wed, 6 Jan 2016 13:29:01 +0100
Subject: Remove noisy debugging output

---
 src/ConeProjectionGeometry3D.cpp | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'src')

diff --git a/src/ConeProjectionGeometry3D.cpp b/src/ConeProjectionGeometry3D.cpp
index 99b4bf4..96b04fb 100644
--- a/src/ConeProjectionGeometry3D.cpp
+++ b/src/ConeProjectionGeometry3D.cpp
@@ -256,9 +256,6 @@ void CConeProjectionGeometry3D::projectPoint(double fX, double fY, double fZ,
 
 	// Scale fS to detector plane
 	fU = detectorOffsetXToColIndexFloat( (fS * (m_fOriginSourceDistance + m_fOriginDetectorDistance)) / fD );
-
-	ASTRA_DEBUG("alpha: %f, D: %f, V: %f, S: %f, U: %f", alpha, fD, fV, fS, fU);
-
 }
 
 void CConeProjectionGeometry3D::backprojectPointX(int iAngleIndex, double fU, double fV,
-- 
cgit v1.2.3


From 687c5e244e46e51786afad77f5015cae9abad129 Mon Sep 17 00:00:00 2001
From: Willem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>
Date: Wed, 6 Jan 2016 15:10:34 +0100
Subject: Add multi-GPU support to CompositeGeometryManager

---
 src/CompositeGeometryManager.cpp | 434 +++++++++++++++++++++++++++++----------
 1 file changed, 320 insertions(+), 114 deletions(-)

(limited to 'src')

diff --git a/src/CompositeGeometryManager.cpp b/src/CompositeGeometryManager.cpp
index eed06c4..d1b713e 100644
--- a/src/CompositeGeometryManager.cpp
+++ b/src/CompositeGeometryManager.cpp
@@ -44,11 +44,31 @@ along with the ASTRA Toolbox. If not, see <http://www.gnu.org/licenses/>.
 #include "../cuda/3d/mem3d.h"
 
 #include <cstring>
+#include <sstream>
+
+#ifndef USE_PTHREADS
+#include <boost/thread/mutex.hpp>
+#include <boost/thread.hpp>
+#endif
 
 namespace astra {
 
+
+SGPUParams* CCompositeGeometryManager::s_params = 0;
+
+CCompositeGeometryManager::CCompositeGeometryManager()
+{
+	m_iMaxSize = 0;
+
+	if (s_params) {
+		m_iMaxSize = s_params->memory;
+		m_GPUIndices = s_params->GPUIndices;
+	}
+}
+
+
 // JOB:
-//  
+//
 // VolumePart
 // ProjectionPart
 // FP-or-BP
@@ -76,7 +96,6 @@ namespace astra {
 //   (First approach: 0.5/0.5)
 
 
-
 bool CCompositeGeometryManager::splitJobs(TJobSet &jobs, size_t maxSize, int div, TJobSet &split)
 {
 	split.clear();
@@ -848,6 +867,260 @@ bool CCompositeGeometryManager::doBP(CProjector3D *pProjector, const std::vector
 
 
 
+static bool doJob(const CCompositeGeometryManager::TJobSet::const_iterator& iter)
+{
+	CCompositeGeometryManager::CPart* output = iter->first;
+	const CCompositeGeometryManager::TJobList& L = iter->second;
+
+	assert(!L.empty());
+
+	bool zero = L.begin()->eMode == CCompositeGeometryManager::SJob::MODE_SET;
+
+	size_t outx, outy, outz;
+	output->getDims(outx, outy, outz);
+
+	if (L.begin()->eType == CCompositeGeometryManager::SJob::JOB_NOP) {
+		// just zero output?
+		if (zero) {
+			for (size_t z = 0; z < outz; ++z) {
+				for (size_t y = 0; y < outy; ++y) {
+					float* ptr = output->pData->getData();
+					ptr += (z + output->subX) * (size_t)output->pData->getHeight() * (size_t)output->pData->getWidth();
+					ptr += (y + output->subY) * (size_t)output->pData->getWidth();
+					ptr += output->subX;
+					memset(ptr, 0, sizeof(float) * outx);
+				}
+			}
+		}
+		return true;
+	}
+
+
+	astraCUDA3d::SSubDimensions3D dstdims;
+	dstdims.nx = output->pData->getWidth();
+	dstdims.pitch = dstdims.nx;
+	dstdims.ny = output->pData->getHeight();
+	dstdims.nz = output->pData->getDepth();
+	dstdims.subnx = outx;
+	dstdims.subny = outy;
+	dstdims.subnz = outz;
+	ASTRA_DEBUG("dstdims: %d,%d,%d in %d,%d,%d", dstdims.subnx, dstdims.subny, dstdims.subnz, dstdims.nx, dstdims.ny, dstdims.nz);
+	dstdims.subx = output->subX;
+	dstdims.suby = output->subY;
+	dstdims.subz = output->subZ;
+	float *dst = output->pData->getData();
+
+	astraCUDA3d::MemHandle3D outputMem = astraCUDA3d::allocateGPUMemory(outx, outy, outz, zero ? astraCUDA3d::INIT_ZERO : astraCUDA3d::INIT_NO);
+	bool ok = outputMem;
+
+	for (CCompositeGeometryManager::TJobList::const_iterator i = L.begin(); i != L.end(); ++i) {
+		const CCompositeGeometryManager::SJob &j = *i;
+
+		assert(j.pInput);
+
+		CCudaProjector3D *projector = dynamic_cast<CCudaProjector3D*>(j.pProjector);
+		Cuda3DProjectionKernel projKernel = ker3d_default;
+		int detectorSuperSampling = 1;
+		int voxelSuperSampling = 1;
+		if (projector) {
+			projKernel = projector->getProjectionKernel();
+			detectorSuperSampling = projector->getDetectorSuperSampling();
+			voxelSuperSampling = projector->getVoxelSuperSampling();
+		}
+
+		size_t inx, iny, inz;
+		j.pInput->getDims(inx, iny, inz);
+		astraCUDA3d::MemHandle3D inputMem = astraCUDA3d::allocateGPUMemory(inx, iny, inz, astraCUDA3d::INIT_NO);
+
+		astraCUDA3d::SSubDimensions3D srcdims;
+		srcdims.nx = j.pInput->pData->getWidth();
+		srcdims.pitch = srcdims.nx;
+		srcdims.ny = j.pInput->pData->getHeight();
+		srcdims.nz = j.pInput->pData->getDepth();
+		srcdims.subnx = inx;
+		srcdims.subny = iny;
+		srcdims.subnz = inz;
+		srcdims.subx = j.pInput->subX;
+		srcdims.suby = j.pInput->subY;
+		srcdims.subz = j.pInput->subZ;
+		const float *src = j.pInput->pData->getDataConst();
+
+		ok = astraCUDA3d::copyToGPUMemory(src, inputMem, srcdims);
+		if (!ok) ASTRA_ERROR("Error copying input data to GPU");
+
+		if (j.eType == CCompositeGeometryManager::SJob::JOB_FP) {
+			assert(dynamic_cast<CCompositeGeometryManager::CVolumePart*>(j.pInput.get()));
+			assert(dynamic_cast<CCompositeGeometryManager::CProjectionPart*>(j.pOutput.get()));
+
+			ASTRA_DEBUG("CCompositeGeometryManager::doJobs: doing FP");
+
+			ok = astraCUDA3d::FP(((CCompositeGeometryManager::CProjectionPart*)j.pOutput.get())->pGeom, outputMem, ((CCompositeGeometryManager::CVolumePart*)j.pInput.get())->pGeom, inputMem, detectorSuperSampling, projKernel);
+			if (!ok) ASTRA_ERROR("Error performing sub-FP");
+			ASTRA_DEBUG("CCompositeGeometryManager::doJobs: FP done");
+		} else if (j.eType == CCompositeGeometryManager::SJob::JOB_BP) {
+			assert(dynamic_cast<CCompositeGeometryManager::CVolumePart*>(j.pOutput.get()));
+			assert(dynamic_cast<CCompositeGeometryManager::CProjectionPart*>(j.pInput.get()));
+
+			ASTRA_DEBUG("CCompositeGeometryManager::doJobs: doing BP");
+
+			ok = astraCUDA3d::BP(((CCompositeGeometryManager::CProjectionPart*)j.pInput.get())->pGeom, inputMem, ((CCompositeGeometryManager::CVolumePart*)j.pOutput.get())->pGeom, outputMem, voxelSuperSampling);
+			if (!ok) ASTRA_ERROR("Error performing sub-BP");
+			ASTRA_DEBUG("CCompositeGeometryManager::doJobs: BP done");
+		} else {
+			assert(false);
+		}
+
+		ok = astraCUDA3d::freeGPUMemory(inputMem);
+		if (!ok) ASTRA_ERROR("Error freeing GPU memory");
+
+	}
+
+	ok = astraCUDA3d::copyFromGPUMemory(dst, outputMem, dstdims);
+	if (!ok) ASTRA_ERROR("Error copying output data from GPU");
+	
+	ok = astraCUDA3d::freeGPUMemory(outputMem);
+	if (!ok) ASTRA_ERROR("Error freeing GPU memory");
+
+	return true;
+}
+
+
+class WorkQueue {
+public:
+	WorkQueue(CCompositeGeometryManager::TJobSet &_jobs) : m_jobs(_jobs) {
+#ifdef USE_PTHREADS
+		pthread_mutex_init(&m_mutex, 0);
+#endif
+		m_iter = m_jobs.begin();
+	}
+	bool receive(CCompositeGeometryManager::TJobSet::const_iterator &i) {
+		lock();
+
+		if (m_iter == m_jobs.end()) {
+			unlock();
+			return false;
+		}
+
+		i = m_iter++;
+
+		unlock();
+
+		return true;	
+	}
+#ifdef USE_PTHREADS
+	void lock() {
+		// TODO: check mutex op return values
+		pthread_mutex_lock(&m_mutex);
+	}
+	void unlock() {
+		// TODO: check mutex op return values
+		pthread_mutex_unlock(&m_mutex);
+	}
+#else
+	void lock() {
+		m_mutex.lock();
+	}
+	void unlock() {
+		m_mutex.unlock();
+	}
+#endif
+
+private:
+	CCompositeGeometryManager::TJobSet &m_jobs;
+	CCompositeGeometryManager::TJobSet::const_iterator m_iter;
+#ifdef USE_PTHREADS
+	pthread_mutex_t m_mutex;
+#else
+	boost::mutex m_mutex;
+#endif
+};
+
+struct WorkThreadInfo {
+	WorkQueue* m_queue;
+	unsigned int m_iGPU;
+};
+
+#ifndef USE_PTHREADS
+
+void runEntries_boost(WorkThreadInfo* info)
+{
+	ASTRA_DEBUG("Launching thread on GPU %d\n", info->m_iGPU);
+	CCompositeGeometryManager::TJobSet::const_iterator i;
+	while (info->m_queue->receive(i)) {
+		ASTRA_DEBUG("Running block on GPU %d\n", info->m_iGPU);
+		astraCUDA3d::setGPUIndex(info->m_iGPU);
+		boost::this_thread::interruption_point();
+		doJob(i);
+		boost::this_thread::interruption_point();
+	}
+	ASTRA_DEBUG("Finishing thread on GPU %d\n", info->m_iGPU);
+}
+
+
+#else
+
+void* runEntries_pthreads(void* data) {
+	WorkThreadInfo* info = (WorkThreadInfo*)data;
+
+	ASTRA_DEBUG("Launching thread on GPU %d\n", info->m_iGPU);
+
+	CCompositeGeometryManager::TJobSet::const_iterator i;
+
+	while (info->m_queue->receive(i)) {
+		ASTRA_DEBUG("Running block on GPU %d\n", info->m_iGPU);
+		astraCUDA3d::setGPUIndex(info->m_iGPU);
+		pthread_testcancel();
+		doJob(i);
+		pthread_testcancel();
+	}
+	ASTRA_DEBUG("Finishing thread on GPU %d\n", info->m_iGPU);
+
+	return 0;
+}
+
+#endif
+
+
+void runWorkQueue(WorkQueue &queue, const std::vector<int> & iGPUIndices) {
+	int iThreadCount = iGPUIndices.size();
+
+	std::vector<WorkThreadInfo> infos;
+#ifdef USE_PTHREADS
+	std::vector<pthread_t> threads;
+#else
+	std::vector<boost::thread*> threads;
+#endif
+	infos.resize(iThreadCount);
+	threads.resize(iThreadCount);
+
+	for (int i = 0; i < iThreadCount; ++i) {
+		infos[i].m_queue = &queue;
+		infos[i].m_iGPU = iGPUIndices[i];
+#ifdef USE_PTHREADS
+		pthread_create(&threads[i], 0, runEntries_pthreads, (void*)&infos[i]);
+#else
+		threads[i] = new boost::thread(runEntries_boost, &infos[i]);
+#endif
+	}
+
+	// Wait for them to finish
+	for (int i = 0; i < iThreadCount; ++i) {
+#ifdef USE_PTHREADS
+		pthread_join(threads[i], 0);
+#else
+		threads[i]->join();
+		delete threads[i];
+		threads[i] = 0;
+#endif
+	}
+}
+
+
+void CCompositeGeometryManager::setGPUIndices(const std::vector<int>& GPUIndices)
+{
+	m_GPUIndices = GPUIndices;
+}
+
 bool CCompositeGeometryManager::doJobs(TJobList &jobs)
 {
 	ASTRA_DEBUG("CCompositeGeometryManager::doJobs");
@@ -859,140 +1132,53 @@ bool CCompositeGeometryManager::doJobs(TJobList &jobs)
 		jobset[i->pOutput.get()].push_back(*i);
 	}
 
-	size_t maxSize = astraCUDA3d::availableGPUMemory();
+	size_t maxSize = m_iMaxSize;
 	if (maxSize == 0) {
-		ASTRA_WARN("Unable to get available GPU memory. Defaulting to 1GB.");
-		maxSize = 1024 * 1024 * 1024;
+		// Get memory from first GPU. Not optimal...
+		if (!m_GPUIndices.empty())
+			astraCUDA3d::setGPUIndex(m_GPUIndices[0]);
+		maxSize = astraCUDA3d::availableGPUMemory();
+		if (maxSize == 0) {
+			ASTRA_WARN("Unable to get available GPU memory. Defaulting to 1GB.");
+			maxSize = 1024 * 1024 * 1024;
+		} else {
+			ASTRA_DEBUG("Detected %lu bytes of GPU memory", maxSize);
+		}
 	} else {
-		ASTRA_DEBUG("Detected %lu bytes of GPU memory", maxSize);
+		ASTRA_DEBUG("Set to %lu bytes of GPU memory", maxSize);
 	}
 	maxSize = (maxSize * 9) / 10;
 
 	maxSize /= sizeof(float);
 	int div = 1;
-
-	// TODO: Multi-GPU support
+	if (!m_GPUIndices.empty())
+		div = m_GPUIndices.size();
 
 	// Split jobs to fit
 	TJobSet split;
 	splitJobs(jobset, maxSize, div, split);
 	jobset.clear();
 
-	// Run jobs
-	
-	for (TJobSet::iterator iter = split.begin(); iter != split.end(); ++iter) {
-
-		CPart* output = iter->first;
-		TJobList& L = iter->second;
-
-		assert(!L.empty());
+	if (m_GPUIndices.size() <= 1) {
 
-		bool zero = L.begin()->eMode == SJob::MODE_SET;
+		// Run jobs
+		ASTRA_DEBUG("Running single-threaded");
 
-		size_t outx, outy, outz;
-		output->getDims(outx, outy, outz);
+		if (!m_GPUIndices.empty())
+			astraCUDA3d::setGPUIndex(m_GPUIndices[0]);
 
-		if (L.begin()->eType == SJob::JOB_NOP) {
-			// just zero output?
-			if (zero) {
-				for (size_t z = 0; z < outz; ++z) {
-					for (size_t y = 0; y < outy; ++y) {
-						float* ptr = output->pData->getData();
-						ptr += (z + output->subX) * (size_t)output->pData->getHeight() * (size_t)output->pData->getWidth();
-						ptr += (y + output->subY) * (size_t)output->pData->getWidth();
-						ptr += output->subX;
-						memset(ptr, 0, sizeof(float) * outx);
-					}
-				}
-			}
-			continue;
+		for (TJobSet::const_iterator iter = split.begin(); iter != split.end(); ++iter) {
+			doJob(iter);
 		}
 
+	} else {
 
-		astraCUDA3d::SSubDimensions3D dstdims;
-		dstdims.nx = output->pData->getWidth();
-		dstdims.pitch = dstdims.nx;
-		dstdims.ny = output->pData->getHeight();
-		dstdims.nz = output->pData->getDepth();
-		dstdims.subnx = outx;
-		dstdims.subny = outy;
-		dstdims.subnz = outz;
-		ASTRA_DEBUG("dstdims: %d,%d,%d in %d,%d,%d", dstdims.subnx, dstdims.subny, dstdims.subnz, dstdims.nx, dstdims.ny, dstdims.nz);
-		dstdims.subx = output->subX;
-		dstdims.suby = output->subY;
-		dstdims.subz = output->subZ;
-		float *dst = output->pData->getData();
-
-		astraCUDA3d::MemHandle3D outputMem = astraCUDA3d::allocateGPUMemory(outx, outy, outz, zero ? astraCUDA3d::INIT_ZERO : astraCUDA3d::INIT_NO);
-		bool ok = outputMem;
-
-		for (TJobList::iterator i = L.begin(); i != L.end(); ++i) {
-			SJob &j = *i;
-
-			assert(j.pInput);
-
-			CCudaProjector3D *projector = dynamic_cast<CCudaProjector3D*>(j.pProjector);
-			Cuda3DProjectionKernel projKernel = ker3d_default;
-			int detectorSuperSampling = 1;
-			int voxelSuperSampling = 1;
-			if (projector) {
-				projKernel = projector->getProjectionKernel();
-				detectorSuperSampling = projector->getDetectorSuperSampling();
-				voxelSuperSampling = projector->getVoxelSuperSampling();
-			}
-
-			size_t inx, iny, inz;
-			j.pInput->getDims(inx, iny, inz);
-			astraCUDA3d::MemHandle3D inputMem = astraCUDA3d::allocateGPUMemory(inx, iny, inz, astraCUDA3d::INIT_NO);
-
-			astraCUDA3d::SSubDimensions3D srcdims;
-			srcdims.nx = j.pInput->pData->getWidth();
-			srcdims.pitch = srcdims.nx;
-			srcdims.ny = j.pInput->pData->getHeight();
-			srcdims.nz = j.pInput->pData->getDepth();
-			srcdims.subnx = inx;
-			srcdims.subny = iny;
-			srcdims.subnz = inz;
-			srcdims.subx = j.pInput->subX;
-			srcdims.suby = j.pInput->subY;
-			srcdims.subz = j.pInput->subZ;
-			const float *src = j.pInput->pData->getDataConst();
-
-			ok = astraCUDA3d::copyToGPUMemory(src, inputMem, srcdims);
-			if (!ok) ASTRA_ERROR("Error copying input data to GPU");
-
-			if (j.eType == SJob::JOB_FP) {
-				assert(dynamic_cast<CVolumePart*>(j.pInput.get()));
-				assert(dynamic_cast<CProjectionPart*>(j.pOutput.get()));
-
-				ASTRA_DEBUG("CCompositeGeometryManager::doJobs: doing FP");
-
-				ok = astraCUDA3d::FP(((CProjectionPart*)j.pOutput.get())->pGeom, outputMem, ((CVolumePart*)j.pInput.get())->pGeom, inputMem, detectorSuperSampling, projKernel);
-				if (!ok) ASTRA_ERROR("Error performing sub-FP");
-				ASTRA_DEBUG("CCompositeGeometryManager::doJobs: FP done");
-			} else if (j.eType == SJob::JOB_BP) {
-				assert(dynamic_cast<CVolumePart*>(j.pOutput.get()));
-				assert(dynamic_cast<CProjectionPart*>(j.pInput.get()));
-
-				ASTRA_DEBUG("CCompositeGeometryManager::doJobs: doing BP");
-
-				ok = astraCUDA3d::BP(((CProjectionPart*)j.pInput.get())->pGeom, inputMem, ((CVolumePart*)j.pOutput.get())->pGeom, outputMem, voxelSuperSampling);
-				if (!ok) ASTRA_ERROR("Error performing sub-BP");
-				ASTRA_DEBUG("CCompositeGeometryManager::doJobs: BP done");
-			} else {
-				assert(false);
-			}
+		ASTRA_DEBUG("Running multi-threaded");
 
-			ok = astraCUDA3d::freeGPUMemory(inputMem);
-			if (!ok) ASTRA_ERROR("Error freeing GPU memory");
+		WorkQueue wq(split);
 
-		}
+		runWorkQueue(wq, m_GPUIndices);
 
-		ok = astraCUDA3d::copyFromGPUMemory(dst, outputMem, dstdims);
-		if (!ok) ASTRA_ERROR("Error copying output data from GPU");
-		
-		ok = astraCUDA3d::freeGPUMemory(outputMem);
-		if (!ok) ASTRA_ERROR("Error freeing GPU memory");
 	}
 
 	return true;
@@ -1000,6 +1186,26 @@ bool CCompositeGeometryManager::doJobs(TJobList &jobs)
 
 
 
+
+//static
+void CCompositeGeometryManager::setGlobalGPUParams(const SGPUParams& params)
+{
+	delete s_params;
+
+	s_params = new SGPUParams;
+	*s_params = params;
+
+	ASTRA_DEBUG("CompositeGeometryManager: Setting global GPU params:");
+	std::ostringstream s;
+	s << "GPU indices:";
+	for (unsigned int i = 0; i < params.GPUIndices.size(); ++i)
+		s << " " << params.GPUIndices[i];
+	std::string ss = s.str();
+	ASTRA_DEBUG(ss.c_str());
+	ASTRA_DEBUG("Memory: %llu", params.memory);
+}
+
+
 }
 
 #endif
-- 
cgit v1.2.3


From 56809b0359af7e9108adeb1fd21823a225edf6fa Mon Sep 17 00:00:00 2001
From: Willem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>
Date: Wed, 20 Jan 2016 18:08:59 +0100
Subject: Remove dependency of libastra on libpython by refactoring
 PluginAlgorithm

---
 src/PluginAlgorithm.cpp | 367 +-----------------------------------------------
 1 file changed, 1 insertion(+), 366 deletions(-)

(limited to 'src')

diff --git a/src/PluginAlgorithm.cpp b/src/PluginAlgorithm.cpp
index 9fc511a..1bcfbdb 100644
--- a/src/PluginAlgorithm.cpp
+++ b/src/PluginAlgorithm.cpp
@@ -26,376 +26,11 @@ along with the ASTRA Toolbox. If not, see <http://www.gnu.org/licenses/>.
 $Id$
 */
 
-#ifdef ASTRA_PYTHON
-
 #include "astra/PluginAlgorithm.h"
-#include "astra/Logging.h"
-#include "astra/Utilities.h"
-#include <boost/algorithm/string.hpp>
-#include <boost/algorithm/string/split.hpp>
-#include <iostream>
-#include <fstream>
-#include <string>
-
-#include <Python.h>
-#include "bytesobject.h"
 
 namespace astra {
 
+CPluginAlgorithmFactory *CPluginAlgorithmFactory::m_factory = 0;
 
-
-void logPythonError(){
-    if(PyErr_Occurred()){
-        PyObject *ptype, *pvalue, *ptraceback;
-        PyErr_Fetch(&ptype, &pvalue, &ptraceback);
-        PyErr_NormalizeException(&ptype, &pvalue, &ptraceback);
-        PyObject *traceback = PyImport_ImportModule("traceback");
-        if(traceback!=NULL){
-            PyObject *exc;
-            if(ptraceback==NULL){
-                exc = PyObject_CallMethod(traceback,"format_exception_only","OO",ptype, pvalue);
-            }else{
-                exc = PyObject_CallMethod(traceback,"format_exception","OOO",ptype, pvalue, ptraceback);
-            }
-            if(exc!=NULL){
-                PyObject *six = PyImport_ImportModule("six");
-                if(six!=NULL){
-                    PyObject *iter = PyObject_GetIter(exc);
-                    if(iter!=NULL){
-                        PyObject *line;
-                        std::string errStr = "";
-                        while(line = PyIter_Next(iter)){
-                            PyObject *retb = PyObject_CallMethod(six,"b","O",line);
-                            if(retb!=NULL){
-                                errStr += std::string(PyBytes_AsString(retb));
-                                Py_DECREF(retb);
-                            }
-                            Py_DECREF(line);
-                        }
-                        ASTRA_ERROR("%s",errStr.c_str());
-                        Py_DECREF(iter);
-                    }
-                    Py_DECREF(six);
-                }
-                Py_DECREF(exc);
-            }
-            Py_DECREF(traceback);
-        }
-        if(ptype!=NULL) Py_DECREF(ptype);
-        if(pvalue!=NULL) Py_DECREF(pvalue);
-        if(ptraceback!=NULL) Py_DECREF(ptraceback);
-    }
-}
-
-
-CPluginAlgorithm::CPluginAlgorithm(PyObject* pyclass){
-    instance = PyObject_CallObject(pyclass, NULL);
-    if(instance==NULL) logPythonError();
-}
-
-CPluginAlgorithm::~CPluginAlgorithm(){
-    if(instance!=NULL){
-        Py_DECREF(instance);
-        instance = NULL;
-    }
-}
-
-bool CPluginAlgorithm::initialize(const Config& _cfg){
-    if(instance==NULL) return false;
-    PyObject *cfgDict = XMLNode2dict(_cfg.self);
-    PyObject *retVal = PyObject_CallMethod(instance, "astra_init", "O",cfgDict);
-    Py_DECREF(cfgDict);
-    if(retVal==NULL){
-        logPythonError();
-        return false;
-    }
-    m_bIsInitialized = true;
-    Py_DECREF(retVal);
-    return m_bIsInitialized;
-}
-
-void CPluginAlgorithm::run(int _iNrIterations){
-    if(instance==NULL) return;
-    PyGILState_STATE state = PyGILState_Ensure();
-    PyObject *retVal = PyObject_CallMethod(instance, "run", "i",_iNrIterations);
-    if(retVal==NULL){
-        logPythonError();
-    }else{
-        Py_DECREF(retVal);
-    }
-    PyGILState_Release(state);
 }
 
-void fixLapackLoading(){
-    // When running in Matlab, we need to force numpy
-    // to use its internal lapack library instead of
-    // Matlab's MKL library to avoid errors. To do this,
-    // we set Python's dlopen flags to RTLD_NOW|RTLD_DEEPBIND
-    // and import 'numpy.linalg.lapack_lite' here. We reset
-    // Python's dlopen flags afterwards.
-    PyObject *sys = PyImport_ImportModule("sys");
-    if(sys!=NULL){
-        PyObject *curFlags = PyObject_CallMethod(sys,"getdlopenflags",NULL);
-        if(curFlags!=NULL){
-            PyObject *retVal = PyObject_CallMethod(sys, "setdlopenflags", "i",10);
-            if(retVal!=NULL){
-                PyObject *lapack = PyImport_ImportModule("numpy.linalg.lapack_lite");
-                if(lapack!=NULL){
-                    Py_DECREF(lapack);
-                }
-                PyObject_CallMethod(sys, "setdlopenflags", "O",curFlags);
-                Py_DECREF(retVal);
-            }
-            Py_DECREF(curFlags);
-        }
-        Py_DECREF(sys);
-    }
-}
-
-CPluginAlgorithmFactory::CPluginAlgorithmFactory(){
-    if(!Py_IsInitialized()){
-        Py_Initialize();
-        PyEval_InitThreads();
-    }
-#ifndef _MSC_VER
-    if(astra::running_in_matlab) fixLapackLoading();
-#endif
-    pluginDict = PyDict_New();
-    inspect = PyImport_ImportModule("inspect");
-    six = PyImport_ImportModule("six");
-}
-
-CPluginAlgorithmFactory::~CPluginAlgorithmFactory(){
-    if(pluginDict!=NULL){
-        Py_DECREF(pluginDict);
-    }
-    if(inspect!=NULL) Py_DECREF(inspect);
-    if(six!=NULL) Py_DECREF(six);
-}
-
-PyObject * getClassFromString(std::string str){
-    std::vector<std::string> items;
-    boost::split(items, str, boost::is_any_of("."));
-    PyObject *pyclass = PyImport_ImportModule(items[0].c_str());
-    if(pyclass==NULL){
-        logPythonError();
-        return NULL;
-    }
-    PyObject *submod = pyclass;
-    for(unsigned int i=1;i<items.size();i++){
-        submod = PyObject_GetAttrString(submod,items[i].c_str());
-        Py_DECREF(pyclass);
-        pyclass = submod;
-        if(pyclass==NULL){
-            logPythonError();
-            return NULL;
-        }
-    }
-    return pyclass;
-}
-
-bool CPluginAlgorithmFactory::registerPlugin(std::string name, std::string className){
-    PyObject *str = PyBytes_FromString(className.c_str());
-    PyDict_SetItemString(pluginDict, name.c_str(), str);
-    Py_DECREF(str);
-    return true;
-}
-
-bool CPluginAlgorithmFactory::registerPlugin(std::string className){
-    PyObject *pyclass = getClassFromString(className);
-    if(pyclass==NULL) return false;
-    bool ret = registerPluginClass(pyclass);
-    Py_DECREF(pyclass);
-    return ret;
-}
-
-bool CPluginAlgorithmFactory::registerPluginClass(std::string name, PyObject * className){
-    PyDict_SetItemString(pluginDict, name.c_str(), className);
-    return true;
-}
-
-bool CPluginAlgorithmFactory::registerPluginClass(PyObject * className){
-    PyObject *astra_name = PyObject_GetAttrString(className,"astra_name");
-    if(astra_name==NULL){
-        logPythonError();
-        return false;
-    }
-    PyObject *retb = PyObject_CallMethod(six,"b","O",astra_name);
-    if(retb!=NULL){
-        PyDict_SetItemString(pluginDict,PyBytes_AsString(retb),className);
-        Py_DECREF(retb);
-    }else{
-        logPythonError();
-    }
-    Py_DECREF(astra_name);
-    return true;
-}
-
-CPluginAlgorithm * CPluginAlgorithmFactory::getPlugin(std::string name){
-    PyObject *className = PyDict_GetItemString(pluginDict, name.c_str());
-    if(className==NULL) return NULL;
-    CPluginAlgorithm *alg = NULL;
-    if(PyBytes_Check(className)){
-        std::string str = std::string(PyBytes_AsString(className));
-    	PyObject *pyclass = getClassFromString(str);
-        if(pyclass!=NULL){
-            alg = new CPluginAlgorithm(pyclass);
-            Py_DECREF(pyclass);
-        }
-    }else{
-        alg = new CPluginAlgorithm(className);
-    }
-    return alg;
-}
-
-PyObject * CPluginAlgorithmFactory::getRegistered(){
-    Py_INCREF(pluginDict);
-    return pluginDict;
-}
-
-std::map<std::string, std::string> CPluginAlgorithmFactory::getRegisteredMap(){
-    std::map<std::string, std::string> ret;
-    PyObject *key, *value;
-    Py_ssize_t pos = 0;
-    while (PyDict_Next(pluginDict, &pos, &key, &value)) {
-        PyObject *keystr = PyObject_Str(key);
-        if(keystr!=NULL){
-            PyObject *valstr = PyObject_Str(value);
-            if(valstr!=NULL){
-                PyObject * keyb = PyObject_CallMethod(six,"b","O",keystr);
-                if(keyb!=NULL){
-                    PyObject * valb = PyObject_CallMethod(six,"b","O",valstr);
-                    if(valb!=NULL){
-                        ret[PyBytes_AsString(keyb)] = PyBytes_AsString(valb);
-                        Py_DECREF(valb);
-                    }
-                    Py_DECREF(keyb);
-                }
-                Py_DECREF(valstr);
-            }
-            Py_DECREF(keystr);
-        }
-        logPythonError();
-    }
-    return ret;
-}
-
-std::string CPluginAlgorithmFactory::getHelp(std::string name){
-    PyObject *className = PyDict_GetItemString(pluginDict, name.c_str());
-    if(className==NULL){
-        ASTRA_ERROR("Plugin %s not found!",name.c_str());
-        PyErr_Clear();
-        return "";
-    }
-    std::string ret = "";
-    PyObject *pyclass;
-    if(PyBytes_Check(className)){
-        std::string str = std::string(PyBytes_AsString(className));
-        pyclass = getClassFromString(str);
-    }else{
-        pyclass = className;
-    }
-    if(pyclass==NULL) return "";
-    if(inspect!=NULL && six!=NULL){
-        PyObject *retVal = PyObject_CallMethod(inspect,"getdoc","O",pyclass);
-        if(retVal!=NULL){
-            if(retVal!=Py_None){
-                PyObject *retb = PyObject_CallMethod(six,"b","O",retVal);
-                if(retb!=NULL){
-                    ret = std::string(PyBytes_AsString(retb));
-                    Py_DECREF(retb);
-                }
-            }
-            Py_DECREF(retVal);
-        }else{
-            logPythonError();
-        }
-    }
-    if(PyBytes_Check(className)){
-        Py_DECREF(pyclass);
-    }
-    return ret;
-}
-
-DEFINE_SINGLETON(CPluginAlgorithmFactory);
-
-#if PY_MAJOR_VERSION >= 3
-PyObject * pyStringFromString(std::string str){
-    return PyUnicode_FromString(str.c_str());
-}
-#else
-PyObject * pyStringFromString(std::string str){
-    return PyBytes_FromString(str.c_str());
-}
-#endif
-
-PyObject* stringToPythonValue(std::string str){
-    if(str.find(";")!=std::string::npos){
-        std::vector<std::string> rows, row;
-        boost::split(rows, str, boost::is_any_of(";"));
-        PyObject *mat = PyList_New(rows.size());
-        for(unsigned int i=0; i<rows.size(); i++){
-            boost::split(row, rows[i], boost::is_any_of(","));
-            PyObject *rowlist = PyList_New(row.size());
-            for(unsigned int j=0;j<row.size();j++){
-                PyList_SetItem(rowlist, j, PyFloat_FromDouble(StringUtil::stringToDouble(row[j])));
-            }
-            PyList_SetItem(mat, i, rowlist);
-        }
-        return mat;
-    }
-    if(str.find(",")!=std::string::npos){
-        std::vector<std::string> vec;
-        boost::split(vec, str, boost::is_any_of(","));
-        PyObject *veclist = PyList_New(vec.size());
-        for(unsigned int i=0;i<vec.size();i++){
-            PyList_SetItem(veclist, i, PyFloat_FromDouble(StringUtil::stringToDouble(vec[i])));
-        }
-        return veclist;
-    }
-    try{
-        return PyLong_FromLong(StringUtil::stringToInt(str));
-    }catch(const StringUtil::bad_cast &){
-        try{
-            return PyFloat_FromDouble(StringUtil::stringToDouble(str));
-        }catch(const StringUtil::bad_cast &){
-            return pyStringFromString(str);
-        }
-    }
-}
-
-PyObject* XMLNode2dict(XMLNode node){
-    PyObject *dct = PyDict_New();
-    PyObject *opts = PyDict_New();
-    if(node.hasAttribute("type")){
-        PyObject *obj = pyStringFromString(node.getAttribute("type").c_str());
-        PyDict_SetItemString(dct, "type", obj);
-        Py_DECREF(obj);
-    }
-    std::list<XMLNode> nodes = node.getNodes();
-    std::list<XMLNode>::iterator it = nodes.begin();
-    while(it!=nodes.end()){
-        XMLNode subnode = *it;
-        if(subnode.getName()=="Option"){
-            PyObject *obj;
-            if(subnode.hasAttribute("value")){
-                obj = stringToPythonValue(subnode.getAttribute("value"));
-            }else{
-                obj = stringToPythonValue(subnode.getContent());
-            }
-            PyDict_SetItemString(opts, subnode.getAttribute("key").c_str(), obj);
-            Py_DECREF(obj);
-        }else{
-            PyObject *obj = stringToPythonValue(subnode.getContent());
-            PyDict_SetItemString(dct, subnode.getName().c_str(), obj);
-            Py_DECREF(obj);
-        }
-        ++it;
-    }
-    PyDict_SetItemString(dct, "options", opts);
-    Py_DECREF(opts);
-    return dct;
-}
-
-}
-#endif
-- 
cgit v1.2.3


From 081355b609b11faf7f2d73414de9629e78cca2c5 Mon Sep 17 00:00:00 2001
From: Nicola Vigano <nicola.vigano@esrf.fr>
Date: Thu, 21 Jan 2016 17:10:54 +0100
Subject: Refactor FP and BP jobs creation in the composite geometry manager

---
 src/CompositeGeometryManager.cpp | 39 +++++++++++++++++++++++++--------------
 1 file changed, 25 insertions(+), 14 deletions(-)

(limited to 'src')

diff --git a/src/CompositeGeometryManager.cpp b/src/CompositeGeometryManager.cpp
index d1b713e..96b28e9 100644
--- a/src/CompositeGeometryManager.cpp
+++ b/src/CompositeGeometryManager.cpp
@@ -684,13 +684,12 @@ CCompositeGeometryManager::CProjectionPart* CCompositeGeometryManager::CProjecti
 	return new CProjectionPart(*this);
 }
 
-
-bool CCompositeGeometryManager::doFP(CProjector3D *pProjector, CFloat32VolumeData3DMemory *pVolData,
-                                     CFloat32ProjectionData3DMemory *pProjData)
+CCompositeGeometryManager::SJob CCompositeGeometryManager::createJobFP(CProjector3D *pProjector,
+                                            CFloat32VolumeData3DMemory *pVolData,
+                                            CFloat32ProjectionData3DMemory *pProjData)
 {
-	ASTRA_DEBUG("CCompositeGeometryManager::doFP");
+	ASTRA_DEBUG("CCompositeGeometryManager::createJobFP");
 	// Create single job for FP
-	// Run result
 
 	CVolumePart *input = new CVolumePart();
 	input->pData = pVolData;
@@ -715,18 +714,15 @@ bool CCompositeGeometryManager::doFP(CProjector3D *pProjector, CFloat32VolumeDat
 	FP.eType = SJob::JOB_FP;
 	FP.eMode = SJob::MODE_SET;
 
-	TJobList L;
-	L.push_back(FP);
-
-	return doJobs(L);
+	return FP;
 }
 
-bool CCompositeGeometryManager::doBP(CProjector3D *pProjector, CFloat32VolumeData3DMemory *pVolData,
-                                     CFloat32ProjectionData3DMemory *pProjData)
+CCompositeGeometryManager::SJob CCompositeGeometryManager::createJobBP(CProjector3D *pProjector,
+                                            CFloat32VolumeData3DMemory *pVolData,
+                                            CFloat32ProjectionData3DMemory *pProjData)
 {
-	ASTRA_DEBUG("CCompositeGeometryManager::doBP");
+	ASTRA_DEBUG("CCompositeGeometryManager::createJobBP");
 	// Create single job for BP
-	// Run result
 
 	CProjectionPart *input = new CProjectionPart();
 	input->pData = pProjData;
@@ -749,8 +745,23 @@ bool CCompositeGeometryManager::doBP(CProjector3D *pProjector, CFloat32VolumeDat
 	BP.eType = SJob::JOB_BP;
 	BP.eMode = SJob::MODE_SET;
 
+	return BP;
+}
+
+bool CCompositeGeometryManager::doFP(CProjector3D *pProjector, CFloat32VolumeData3DMemory *pVolData,
+                                     CFloat32ProjectionData3DMemory *pProjData)
+{
+	TJobList L;
+	L.push_back(createJobFP(pProjector, pVolData, pProjData));
+
+	return doJobs(L);
+}
+
+bool CCompositeGeometryManager::doBP(CProjector3D *pProjector, CFloat32VolumeData3DMemory *pVolData,
+                                     CFloat32ProjectionData3DMemory *pProjData)
+{
 	TJobList L;
-	L.push_back(BP);
+	L.push_back(createJobBP(pProjector, pVolData, pProjData));
 
 	return doJobs(L);
 }
-- 
cgit v1.2.3


From 838cfae58d825fb8915dc7d3c974d96e6a4f981c Mon Sep 17 00:00:00 2001
From: Willem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>
Date: Fri, 12 Feb 2016 16:27:08 +0100
Subject: Also split volumes in X/Y directions to respect CUDA limits

---
 src/CompositeGeometryManager.cpp | 261 +++++++++++++++++++++++++++++++++++----
 1 file changed, 240 insertions(+), 21 deletions(-)

(limited to 'src')

diff --git a/src/CompositeGeometryManager.cpp b/src/CompositeGeometryManager.cpp
index 96b28e9..1dd12ea 100644
--- a/src/CompositeGeometryManager.cpp
+++ b/src/CompositeGeometryManager.cpp
@@ -51,8 +51,11 @@ along with the ASTRA Toolbox. If not, see <http://www.gnu.org/licenses/>.
 #include <boost/thread.hpp>
 #endif
 
+
 namespace astra {
 
+static const size_t MAX_BLOCK_DIM = 4096;
+
 
 SGPUParams* CCompositeGeometryManager::s_params = 0;
 
@@ -111,7 +114,20 @@ bool CCompositeGeometryManager::splitJobs(TJobSet &jobs, size_t maxSize, int div
 		//    b. split input part
 		//    c. create jobs for new (input,output) subparts
 
-		TPartList splitOutput = pOutput->split(maxSize/3, div);
+		TPartList splitOutput;
+		pOutput->splitZ(splitOutput, maxSize/3, MAX_BLOCK_DIM, div);
+		TPartList splitOutput2;
+		for (TPartList::iterator i_out = splitOutput.begin(); i_out != splitOutput.end(); ++i_out) {
+			boost::shared_ptr<CPart> outputPart = *i_out;
+			outputPart.get()->splitX(splitOutput2, maxSize/3, MAX_BLOCK_DIM, 1);
+		}
+		splitOutput.clear();
+		for (TPartList::iterator i_out = splitOutput2.begin(); i_out != splitOutput2.end(); ++i_out) {
+			boost::shared_ptr<CPart> outputPart = *i_out;
+			outputPart.get()->splitY(splitOutput, maxSize/3, MAX_BLOCK_DIM, 1);
+		}
+		splitOutput2.clear();
+
 
 		for (TJobList::const_iterator j = L.begin(); j != L.end(); ++j)
 		{
@@ -139,8 +155,21 @@ bool CCompositeGeometryManager::splitJobs(TJobSet &jobs, size_t maxSize, int div
 
 				size_t remainingSize = ( maxSize - outputPart->getSize() ) / 2;
 
-				TPartList splitInput = input->split(remainingSize, 1);
+				TPartList splitInput;
+				input->splitZ(splitInput, remainingSize, MAX_BLOCK_DIM, 1);
 				delete input;
+				TPartList splitInput2;
+				for (TPartList::iterator i_in = splitInput.begin(); i_in != splitInput.end(); ++i_in) {
+					boost::shared_ptr<CPart> inputPart = *i_in;
+					inputPart.get()->splitX(splitInput2, maxSize/3, MAX_BLOCK_DIM, 1);
+				}
+				splitInput.clear();
+				for (TPartList::iterator i_in = splitInput2.begin(); i_in != splitInput2.end(); ++i_in) {
+					boost::shared_ptr<CPart> inputPart = *i_in;
+					inputPart.get()->splitY(splitInput, maxSize/3, MAX_BLOCK_DIM, 1);
+				}
+				splitInput2.clear();
+
 				ASTRA_DEBUG("Input split into %d parts", splitInput.size());
 
 				for (TPartList::iterator i_in = splitInput.begin();
@@ -327,7 +356,7 @@ static size_t ceildiv(size_t a, size_t b) {
 	return (a + b - 1) / b;
 }
 
-static size_t computeVerticalSplit(size_t maxBlock, int div, size_t sliceCount)
+static size_t computeLinearSplit(size_t maxBlock, int div, size_t sliceCount)
 {
 	size_t blockSize = maxBlock;
 	size_t blockCount = ceildiv(sliceCount, blockSize);
@@ -410,7 +439,17 @@ SPar3DProjection* getProjectionVectors(const CParallelVecProjectionGeometry3D* p
 
 
 template<class V>
-static void translateProjectionVectors(V* pProjs, int count, double dv)
+static void translateProjectionVectorsU(V* pProjs, int count, double du)
+{
+	for (int i = 0; i < count; ++i) {
+		pProjs[i].fDetSX += du * pProjs[i].fDetUX;
+		pProjs[i].fDetSY += du * pProjs[i].fDetUY;
+		pProjs[i].fDetSZ += du * pProjs[i].fDetUZ;
+	}
+}
+
+template<class V>
+static void translateProjectionVectorsV(V* pProjs, int count, double dv)
 {
 	for (int i = 0; i < count; ++i) {
 		pProjs[i].fDetSX += dv * pProjs[i].fDetVX;
@@ -420,8 +459,58 @@ static void translateProjectionVectors(V* pProjs, int count, double dv)
 }
 
 
+static CProjectionGeometry3D* getSubProjectionGeometryU(const CProjectionGeometry3D* pProjGeom, int u, int size)
+{
+	// First convert to vectors, then translate, then convert into new object
+
+	const CConeProjectionGeometry3D* conegeom = dynamic_cast<const CConeProjectionGeometry3D*>(pProjGeom);
+	const CParallelProjectionGeometry3D* par3dgeom = dynamic_cast<const CParallelProjectionGeometry3D*>(pProjGeom);
+	const CParallelVecProjectionGeometry3D* parvec3dgeom = dynamic_cast<const CParallelVecProjectionGeometry3D*>(pProjGeom);
+	const CConeVecProjectionGeometry3D* conevec3dgeom = dynamic_cast<const CConeVecProjectionGeometry3D*>(pProjGeom);
+
+	if (conegeom || conevec3dgeom) {
+		SConeProjection* pConeProjs;
+		if (conegeom) {
+			pConeProjs = getProjectionVectors<SConeProjection>(conegeom);
+		} else {
+			pConeProjs = getProjectionVectors<SConeProjection>(conevec3dgeom);
+		}
+
+		translateProjectionVectorsU(pConeProjs, pProjGeom->getProjectionCount(), u);
+
+		CProjectionGeometry3D* ret = new CConeVecProjectionGeometry3D(pProjGeom->getProjectionCount(),
+		                                                              pProjGeom->getDetectorRowCount(),
+		                                                              size,
+		                                                              pConeProjs);
+
+
+		delete[] pConeProjs;
+		return ret;
+	} else {
+		assert(par3dgeom || parvec3dgeom);
+		SPar3DProjection* pParProjs;
+		if (par3dgeom) {
+			pParProjs = getProjectionVectors<SPar3DProjection>(par3dgeom);
+		} else {
+			pParProjs = getProjectionVectors<SPar3DProjection>(parvec3dgeom);
+		}
+
+		translateProjectionVectorsU(pParProjs, pProjGeom->getProjectionCount(), u);
+
+		CProjectionGeometry3D* ret = new CParallelVecProjectionGeometry3D(pProjGeom->getProjectionCount(),
+		                                                                  pProjGeom->getDetectorRowCount(),
+		                                                                  size,
+		                                                                  pParProjs);
+
+		delete[] pParProjs;
+		return ret;
+	}
+
+}
+
+
 
-static CProjectionGeometry3D* getSubProjectionGeometry(const CProjectionGeometry3D* pProjGeom, int v, int size)
+static CProjectionGeometry3D* getSubProjectionGeometryV(const CProjectionGeometry3D* pProjGeom, int v, int size)
 {
 	// First convert to vectors, then translate, then convert into new object
 
@@ -438,7 +527,7 @@ static CProjectionGeometry3D* getSubProjectionGeometry(const CProjectionGeometry
 			pConeProjs = getProjectionVectors<SConeProjection>(conevec3dgeom);
 		}
 
-		translateProjectionVectors(pConeProjs, pProjGeom->getProjectionCount(), v);
+		translateProjectionVectorsV(pConeProjs, pProjGeom->getProjectionCount(), v);
 
 		CProjectionGeometry3D* ret = new CConeVecProjectionGeometry3D(pProjGeom->getProjectionCount(),
 		                                                              size,
@@ -457,7 +546,7 @@ static CProjectionGeometry3D* getSubProjectionGeometry(const CProjectionGeometry
 			pParProjs = getProjectionVectors<SPar3DProjection>(parvec3dgeom);
 		}
 
-		translateProjectionVectors(pParProjs, pProjGeom->getProjectionCount(), v);
+		translateProjectionVectorsV(pParProjs, pProjGeom->getProjectionCount(), v);
 
 		CProjectionGeometry3D* ret = new CParallelVecProjectionGeometry3D(pProjGeom->getProjectionCount(),
 		                                                                  size,
@@ -476,17 +565,110 @@ static CProjectionGeometry3D* getSubProjectionGeometry(const CProjectionGeometry
 // - each no bigger than maxSize
 // - number of sub-parts is divisible by div
 // - maybe all approximately the same size?
-CCompositeGeometryManager::TPartList CCompositeGeometryManager::CVolumePart::split(size_t maxSize, int div)
+void CCompositeGeometryManager::CVolumePart::splitX(CCompositeGeometryManager::TPartList& out, size_t maxSize, size_t maxDim, int div)
+{
+	if (true) {
+		// Split in vertical direction only at first, until we figure out
+		// a model for splitting in other directions
+
+		size_t sliceSize = ((size_t) pGeom->getGridSliceCount()) * pGeom->getGridRowCount();
+		int sliceCount = pGeom->getGridColCount();
+		size_t m = std::min(maxSize / sliceSize, maxDim);
+		size_t blockSize = computeLinearSplit(m, div, sliceCount);
+
+		int rem = sliceCount % blockSize;
+
+		ASTRA_DEBUG("From %d to %d step %d", -(rem / 2), sliceCount, blockSize);
+
+		for (int x = -(rem / 2); x < sliceCount; x += blockSize) {
+			int newsubX = x;
+			if (newsubX < 0) newsubX = 0;
+			int endX = x + blockSize;
+			if (endX > sliceCount) endX = sliceCount;
+			int size = endX - newsubX;
+
+			CVolumePart *sub = new CVolumePart();
+			sub->subX = this->subX + newsubX;
+			sub->subY = this->subY;
+			sub->subZ = this->subZ;
+
+			ASTRA_DEBUG("VolumePart split %d %d %d -> %p", sub->subX, sub->subY, sub->subZ, (void*)sub);
+
+			double shift = pGeom->getPixelLengthX() * newsubX;
+
+			sub->pData = pData;
+			sub->pGeom = new CVolumeGeometry3D(size,
+			                                   pGeom->getGridRowCount(),
+			                                   pGeom->getGridSliceCount(),
+			                                   pGeom->getWindowMinX() + shift,
+			                                   pGeom->getWindowMinY(),
+			                                   pGeom->getWindowMinZ(),
+			                                   pGeom->getWindowMinX() + shift + size * pGeom->getPixelLengthX(),
+			                                   pGeom->getWindowMaxY(),
+			                                   pGeom->getWindowMaxZ());
+
+			out.push_back(boost::shared_ptr<CPart>(sub));
+		}
+	}
+}
+
+void CCompositeGeometryManager::CVolumePart::splitY(CCompositeGeometryManager::TPartList& out, size_t maxSize, size_t maxDim, int div)
 {
-	TPartList ret;
+	if (true) {
+		// Split in vertical direction only at first, until we figure out
+		// a model for splitting in other directions
+
+		size_t sliceSize = ((size_t) pGeom->getGridColCount()) * pGeom->getGridSliceCount();
+		int sliceCount = pGeom->getGridRowCount();
+		size_t m = std::min(maxSize / sliceSize, maxDim);
+		size_t blockSize = computeLinearSplit(m, div, sliceCount);
+
+		int rem = sliceCount % blockSize;
+
+		ASTRA_DEBUG("From %d to %d step %d", -(rem / 2), sliceCount, blockSize);
+
+		for (int y = -(rem / 2); y < sliceCount; y += blockSize) {
+			int newsubY = y;
+			if (newsubY < 0) newsubY = 0;
+			int endY = y + blockSize;
+			if (endY > sliceCount) endY = sliceCount;
+			int size = endY - newsubY;
+
+			CVolumePart *sub = new CVolumePart();
+			sub->subX = this->subX;
+			sub->subY = this->subY + newsubY;
+			sub->subZ = this->subZ;
+
+			ASTRA_DEBUG("VolumePart split %d %d %d -> %p", sub->subX, sub->subY, sub->subZ, (void*)sub);
+
+			double shift = pGeom->getPixelLengthY() * newsubY;
+
+			sub->pData = pData;
+			sub->pGeom = new CVolumeGeometry3D(pGeom->getGridColCount(),
+			                                   size,
+			                                   pGeom->getGridSliceCount(),
+			                                   pGeom->getWindowMinX(),
+			                                   pGeom->getWindowMinY() + shift,
+			                                   pGeom->getWindowMinZ(),
+			                                   pGeom->getWindowMaxX(),
+			                                   pGeom->getWindowMinY() + shift + size * pGeom->getPixelLengthY(),
+			                                   pGeom->getWindowMaxZ());
 
+			out.push_back(boost::shared_ptr<CPart>(sub));
+		}
+	}
+}
+
+void CCompositeGeometryManager::CVolumePart::splitZ(CCompositeGeometryManager::TPartList& out, size_t maxSize, size_t maxDim, int div)
+{
 	if (true) {
 		// Split in vertical direction only at first, until we figure out
 		// a model for splitting in other directions
 
 		size_t sliceSize = ((size_t) pGeom->getGridColCount()) * pGeom->getGridRowCount();
 		int sliceCount = pGeom->getGridSliceCount();
-		size_t blockSize = computeVerticalSplit(maxSize / sliceSize, div, sliceCount);
+		size_t m = std::min(maxSize / sliceSize, maxDim);
+		size_t blockSize = computeLinearSplit(m, div, sliceCount);
 
 		int rem = sliceCount % blockSize;
 
@@ -519,11 +701,9 @@ CCompositeGeometryManager::TPartList CCompositeGeometryManager::CVolumePart::spl
 			                                   pGeom->getWindowMaxY(),
 			                                   pGeom->getWindowMinZ() + shift + size * pGeom->getPixelLengthZ());
 
-			ret.push_back(boost::shared_ptr<CPart>(sub));
+			out.push_back(boost::shared_ptr<CPart>(sub));
 		}
 	}
-
-	return ret;
 }
 
 CCompositeGeometryManager::CVolumePart* CCompositeGeometryManager::CVolumePart::clone() const
@@ -630,7 +810,7 @@ CCompositeGeometryManager::CPart* CCompositeGeometryManager::CProjectionPart::re
 	if (_vmin == _vmax) {
 		sub->pGeom = 0;
 	} else {
-		sub->pGeom = getSubProjectionGeometry(pGeom, _vmin, _vmax - _vmin);
+		sub->pGeom = getSubProjectionGeometryV(pGeom, _vmin, _vmax - _vmin);
 	}
 
 	ASTRA_DEBUG("Reduce projection from %d - %d to %d - %d", this->subZ, this->subZ + pGeom->getDetectorRowCount(), this->subZ + _vmin, this->subZ + _vmax);
@@ -639,17 +819,58 @@ CCompositeGeometryManager::CPart* CCompositeGeometryManager::CProjectionPart::re
 }
 
 
-CCompositeGeometryManager::TPartList CCompositeGeometryManager::CProjectionPart::split(size_t maxSize, int div)
+void CCompositeGeometryManager::CProjectionPart::splitX(CCompositeGeometryManager::TPartList &out, size_t maxSize, size_t maxDim, int div)
+{
+	if (true) {
+		// Split in vertical direction only at first, until we figure out
+		// a model for splitting in other directions
+
+		size_t sliceSize = ((size_t) pGeom->getDetectorRowCount()) * pGeom->getProjectionCount();
+		int sliceCount = pGeom->getDetectorColCount();
+		size_t m = std::min(maxSize / sliceSize, maxDim);
+		size_t blockSize = computeLinearSplit(m, div, sliceCount);
+
+		int rem = sliceCount % blockSize;
+
+		for (int x = -(rem / 2); x < sliceCount; x += blockSize) {
+			int newsubX = x;
+			if (newsubX < 0) newsubX = 0;
+			int endX = x + blockSize;
+			if (endX > sliceCount) endX = sliceCount;
+			int size = endX - newsubX;
+
+			CProjectionPart *sub = new CProjectionPart();
+			sub->subX = this->subX + newsubX;
+			sub->subY = this->subY;
+			sub->subZ = this->subZ;
+
+			ASTRA_DEBUG("ProjectionPart split %d %d %d -> %p", sub->subX, sub->subY, sub->subZ, (void*)sub);
+
+			sub->pData = pData;
+
+			sub->pGeom = getSubProjectionGeometryU(pGeom, newsubX, size);
+
+			out.push_back(boost::shared_ptr<CPart>(sub));
+		}
+	}
+}
+
+void CCompositeGeometryManager::CProjectionPart::splitY(CCompositeGeometryManager::TPartList &out, size_t maxSize, size_t maxDim, int div)
 {
-	TPartList ret;
+	// TODO
+	out.push_back(boost::shared_ptr<CPart>(clone()));
+}
 
+void CCompositeGeometryManager::CProjectionPart::splitZ(CCompositeGeometryManager::TPartList &out, size_t maxSize, size_t maxDim, int div)
+{
 	if (true) {
 		// Split in vertical direction only at first, until we figure out
 		// a model for splitting in other directions
 
 		size_t sliceSize = ((size_t) pGeom->getDetectorColCount()) * pGeom->getProjectionCount();
 		int sliceCount = pGeom->getDetectorRowCount();
-		size_t blockSize = computeVerticalSplit(maxSize / sliceSize, div, sliceCount);
+		size_t m = std::min(maxSize / sliceSize, maxDim);
+		size_t blockSize = computeLinearSplit(m, div, sliceCount);
 
 		int rem = sliceCount % blockSize;
 
@@ -669,14 +890,12 @@ CCompositeGeometryManager::TPartList CCompositeGeometryManager::CProjectionPart:
 
 			sub->pData = pData;
 
-			sub->pGeom = getSubProjectionGeometry(pGeom, newsubZ, size);
+			sub->pGeom = getSubProjectionGeometryV(pGeom, newsubZ, size);
 
-			ret.push_back(boost::shared_ptr<CPart>(sub));
+			out.push_back(boost::shared_ptr<CPart>(sub));
 		}
 	}
 
-	return ret;
-
 }
 
 CCompositeGeometryManager::CProjectionPart* CCompositeGeometryManager::CProjectionPart::clone() const
-- 
cgit v1.2.3


From e9fad320817cd8ab84f7ef81940fda63f975551e Mon Sep 17 00:00:00 2001
From: Willem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>
Date: Mon, 15 Feb 2016 13:37:22 +0100
Subject: Skip some unnecessary splitting

---
 src/CompositeGeometryManager.cpp | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

(limited to 'src')

diff --git a/src/CompositeGeometryManager.cpp b/src/CompositeGeometryManager.cpp
index 1dd12ea..1991731 100644
--- a/src/CompositeGeometryManager.cpp
+++ b/src/CompositeGeometryManager.cpp
@@ -45,6 +45,7 @@ along with the ASTRA Toolbox. If not, see <http://www.gnu.org/licenses/>.
 
 #include <cstring>
 #include <sstream>
+#include <stdint.h>
 
 #ifndef USE_PTHREADS
 #include <boost/thread/mutex.hpp>
@@ -115,18 +116,20 @@ bool CCompositeGeometryManager::splitJobs(TJobSet &jobs, size_t maxSize, int div
 		//    c. create jobs for new (input,output) subparts
 
 		TPartList splitOutput;
-		pOutput->splitZ(splitOutput, maxSize/3, MAX_BLOCK_DIM, div);
+		pOutput->splitZ(splitOutput, maxSize/3, SIZE_MAX, div);
+#if 0
 		TPartList splitOutput2;
 		for (TPartList::iterator i_out = splitOutput.begin(); i_out != splitOutput.end(); ++i_out) {
 			boost::shared_ptr<CPart> outputPart = *i_out;
-			outputPart.get()->splitX(splitOutput2, maxSize/3, MAX_BLOCK_DIM, 1);
+			outputPart.get()->splitX(splitOutput2, SIZE_MAX, SIZE_MAX, 1);
 		}
 		splitOutput.clear();
 		for (TPartList::iterator i_out = splitOutput2.begin(); i_out != splitOutput2.end(); ++i_out) {
 			boost::shared_ptr<CPart> outputPart = *i_out;
-			outputPart.get()->splitY(splitOutput, maxSize/3, MAX_BLOCK_DIM, 1);
+					outputPart.get()->splitY(splitOutput, SIZE_MAX, SIZE_MAX, 1);
 		}
 		splitOutput2.clear();
+#endif
 
 
 		for (TJobList::const_iterator j = L.begin(); j != L.end(); ++j)
@@ -161,12 +164,12 @@ bool CCompositeGeometryManager::splitJobs(TJobSet &jobs, size_t maxSize, int div
 				TPartList splitInput2;
 				for (TPartList::iterator i_in = splitInput.begin(); i_in != splitInput.end(); ++i_in) {
 					boost::shared_ptr<CPart> inputPart = *i_in;
-					inputPart.get()->splitX(splitInput2, maxSize/3, MAX_BLOCK_DIM, 1);
+					inputPart.get()->splitX(splitInput2, SIZE_MAX, MAX_BLOCK_DIM, 1);
 				}
 				splitInput.clear();
 				for (TPartList::iterator i_in = splitInput2.begin(); i_in != splitInput2.end(); ++i_in) {
 					boost::shared_ptr<CPart> inputPart = *i_in;
-					inputPart.get()->splitY(splitInput, maxSize/3, MAX_BLOCK_DIM, 1);
+					inputPart.get()->splitY(splitInput, SIZE_MAX, MAX_BLOCK_DIM, 1);
 				}
 				splitInput2.clear();
 
-- 
cgit v1.2.3


From 447e7acfb0c220f66d5fe25f31b25c989d4ec1d7 Mon Sep 17 00:00:00 2001
From: Willem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>
Date: Mon, 15 Feb 2016 15:44:49 +0100
Subject: Avoid (unlikely) integer overflow

---
 src/CompositeGeometryManager.cpp | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/CompositeGeometryManager.cpp b/src/CompositeGeometryManager.cpp
index 1991731..cafc452 100644
--- a/src/CompositeGeometryManager.cpp
+++ b/src/CompositeGeometryManager.cpp
@@ -362,7 +362,11 @@ static size_t ceildiv(size_t a, size_t b) {
 static size_t computeLinearSplit(size_t maxBlock, int div, size_t sliceCount)
 {
 	size_t blockSize = maxBlock;
-	size_t blockCount = ceildiv(sliceCount, blockSize);
+	size_t blockCount;
+	if (sliceCount <= blockSize)
+		blockCount = 1;
+	else
+		blockCount = ceildiv(sliceCount, blockSize);
 
 	// Increase number of blocks to be divisible by div
 	size_t divCount = div * ceildiv(blockCount, div);
-- 
cgit v1.2.3


From bc2e4018054f494fcba01e6a27a63e151bf1e9a4 Mon Sep 17 00:00:00 2001
From: Willem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>
Date: Tue, 16 Feb 2016 15:15:13 +0100
Subject: Refactor AstraObjectManager to add an AstraIndexManager

The new AstraIndexManager can be used to obtain information about objects
without knowing their type.
---
 src/AstraObjectManager.cpp | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'src')

diff --git a/src/AstraObjectManager.cpp b/src/AstraObjectManager.cpp
index c49f273..46eae4b 100644
--- a/src/AstraObjectManager.cpp
+++ b/src/AstraObjectManager.cpp
@@ -31,13 +31,13 @@ $Id$
 
 namespace astra {
 
-int CAstraIndexManager::m_iPreviousIndex = 0;
-
-DEFINE_SINGLETON(CAstraObjectManager<CProjector2D>);
-DEFINE_SINGLETON(CAstraObjectManager<CProjector3D>);
-DEFINE_SINGLETON(CAstraObjectManager<CFloat32Data2D>);
-DEFINE_SINGLETON(CAstraObjectManager<CFloat32Data3D>);
-DEFINE_SINGLETON(CAstraObjectManager<CAlgorithm>);
-DEFINE_SINGLETON(CAstraObjectManager<CSparseMatrix>);
+DEFINE_SINGLETON(CProjector2DManager);
+DEFINE_SINGLETON(CProjector3DManager);
+DEFINE_SINGLETON(CData2DManager);
+DEFINE_SINGLETON(CData3DManager);
+DEFINE_SINGLETON(CAlgorithmManager);
+DEFINE_SINGLETON(CMatrixManager);
+
+DEFINE_SINGLETON(CAstraIndexManager);
 
 } // end namespace
-- 
cgit v1.2.3


From 3743fdc534b39958c105f4124ad1130d3e8b042a Mon Sep 17 00:00:00 2001
From: Willem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>
Date: Tue, 16 Feb 2016 17:53:24 +0100
Subject: Query max texture size instead of hardcoding it

---
 src/CompositeGeometryManager.cpp | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'src')

diff --git a/src/CompositeGeometryManager.cpp b/src/CompositeGeometryManager.cpp
index cafc452..c9cbaaa 100644
--- a/src/CompositeGeometryManager.cpp
+++ b/src/CompositeGeometryManager.cpp
@@ -55,9 +55,6 @@ along with the ASTRA Toolbox. If not, see <http://www.gnu.org/licenses/>.
 
 namespace astra {
 
-static const size_t MAX_BLOCK_DIM = 4096;
-
-
 SGPUParams* CCompositeGeometryManager::s_params = 0;
 
 CCompositeGeometryManager::CCompositeGeometryManager()
@@ -102,6 +99,9 @@ CCompositeGeometryManager::CCompositeGeometryManager()
 
 bool CCompositeGeometryManager::splitJobs(TJobSet &jobs, size_t maxSize, int div, TJobSet &split)
 {
+	int maxBlockDim = astraCUDA3d::maxBlockDimension();
+	ASTRA_DEBUG("Found max block dim %d", maxBlockDim);
+
 	split.clear();
 
 	for (TJobSet::const_iterator i = jobs.begin(); i != jobs.end(); ++i)
@@ -159,17 +159,17 @@ bool CCompositeGeometryManager::splitJobs(TJobSet &jobs, size_t maxSize, int div
 				size_t remainingSize = ( maxSize - outputPart->getSize() ) / 2;
 
 				TPartList splitInput;
-				input->splitZ(splitInput, remainingSize, MAX_BLOCK_DIM, 1);
+				input->splitZ(splitInput, remainingSize, maxBlockDim, 1);
 				delete input;
 				TPartList splitInput2;
 				for (TPartList::iterator i_in = splitInput.begin(); i_in != splitInput.end(); ++i_in) {
 					boost::shared_ptr<CPart> inputPart = *i_in;
-					inputPart.get()->splitX(splitInput2, SIZE_MAX, MAX_BLOCK_DIM, 1);
+					inputPart.get()->splitX(splitInput2, SIZE_MAX, maxBlockDim, 1);
 				}
 				splitInput.clear();
 				for (TPartList::iterator i_in = splitInput2.begin(); i_in != splitInput2.end(); ++i_in) {
 					boost::shared_ptr<CPart> inputPart = *i_in;
-					inputPart.get()->splitY(splitInput, SIZE_MAX, MAX_BLOCK_DIM, 1);
+					inputPart.get()->splitY(splitInput, SIZE_MAX, maxBlockDim, 1);
 				}
 				splitInput2.clear();
 
-- 
cgit v1.2.3


From 7b8a508f0bc7a8a02766b15fa094dfd18c1b0525 Mon Sep 17 00:00:00 2001
From: Willem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>
Date: Tue, 1 Mar 2016 15:05:53 +0100
Subject: Fix build

---
 src/CompositeGeometryManager.cpp | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'src')

diff --git a/src/CompositeGeometryManager.cpp b/src/CompositeGeometryManager.cpp
index c9cbaaa..084ba8c 100644
--- a/src/CompositeGeometryManager.cpp
+++ b/src/CompositeGeometryManager.cpp
@@ -45,7 +45,7 @@ along with the ASTRA Toolbox. If not, see <http://www.gnu.org/licenses/>.
 
 #include <cstring>
 #include <sstream>
-#include <stdint.h>
+#include <climits>
 
 #ifndef USE_PTHREADS
 #include <boost/thread/mutex.hpp>
@@ -116,17 +116,17 @@ bool CCompositeGeometryManager::splitJobs(TJobSet &jobs, size_t maxSize, int div
 		//    c. create jobs for new (input,output) subparts
 
 		TPartList splitOutput;
-		pOutput->splitZ(splitOutput, maxSize/3, SIZE_MAX, div);
+		pOutput->splitZ(splitOutput, maxSize/3, UINT_MAX, div);
 #if 0
 		TPartList splitOutput2;
 		for (TPartList::iterator i_out = splitOutput.begin(); i_out != splitOutput.end(); ++i_out) {
 			boost::shared_ptr<CPart> outputPart = *i_out;
-			outputPart.get()->splitX(splitOutput2, SIZE_MAX, SIZE_MAX, 1);
+			outputPart.get()->splitX(splitOutput2, UINT_MAX, UINT_MAX, 1);
 		}
 		splitOutput.clear();
 		for (TPartList::iterator i_out = splitOutput2.begin(); i_out != splitOutput2.end(); ++i_out) {
 			boost::shared_ptr<CPart> outputPart = *i_out;
-					outputPart.get()->splitY(splitOutput, SIZE_MAX, SIZE_MAX, 1);
+					outputPart.get()->splitY(splitOutput, UINT_MAX, UINT_MAX, 1);
 		}
 		splitOutput2.clear();
 #endif
@@ -164,12 +164,12 @@ bool CCompositeGeometryManager::splitJobs(TJobSet &jobs, size_t maxSize, int div
 				TPartList splitInput2;
 				for (TPartList::iterator i_in = splitInput.begin(); i_in != splitInput.end(); ++i_in) {
 					boost::shared_ptr<CPart> inputPart = *i_in;
-					inputPart.get()->splitX(splitInput2, SIZE_MAX, maxBlockDim, 1);
+					inputPart.get()->splitX(splitInput2, UINT_MAX, maxBlockDim, 1);
 				}
 				splitInput.clear();
 				for (TPartList::iterator i_in = splitInput2.begin(); i_in != splitInput2.end(); ++i_in) {
 					boost::shared_ptr<CPart> inputPart = *i_in;
-					inputPart.get()->splitY(splitInput, SIZE_MAX, maxBlockDim, 1);
+					inputPart.get()->splitY(splitInput, UINT_MAX, maxBlockDim, 1);
 				}
 				splitInput2.clear();
 
-- 
cgit v1.2.3


From 14bef5ea534e4aa4e6d0819e728d0a8d2b0b7925 Mon Sep 17 00:00:00 2001
From: Willem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>
Date: Tue, 1 Mar 2016 15:05:53 +0100
Subject: Fix build

---
 src/CompositeGeometryManager.cpp | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'src')

diff --git a/src/CompositeGeometryManager.cpp b/src/CompositeGeometryManager.cpp
index c9cbaaa..084ba8c 100644
--- a/src/CompositeGeometryManager.cpp
+++ b/src/CompositeGeometryManager.cpp
@@ -45,7 +45,7 @@ along with the ASTRA Toolbox. If not, see <http://www.gnu.org/licenses/>.
 
 #include <cstring>
 #include <sstream>
-#include <stdint.h>
+#include <climits>
 
 #ifndef USE_PTHREADS
 #include <boost/thread/mutex.hpp>
@@ -116,17 +116,17 @@ bool CCompositeGeometryManager::splitJobs(TJobSet &jobs, size_t maxSize, int div
 		//    c. create jobs for new (input,output) subparts
 
 		TPartList splitOutput;
-		pOutput->splitZ(splitOutput, maxSize/3, SIZE_MAX, div);
+		pOutput->splitZ(splitOutput, maxSize/3, UINT_MAX, div);
 #if 0
 		TPartList splitOutput2;
 		for (TPartList::iterator i_out = splitOutput.begin(); i_out != splitOutput.end(); ++i_out) {
 			boost::shared_ptr<CPart> outputPart = *i_out;
-			outputPart.get()->splitX(splitOutput2, SIZE_MAX, SIZE_MAX, 1);
+			outputPart.get()->splitX(splitOutput2, UINT_MAX, UINT_MAX, 1);
 		}
 		splitOutput.clear();
 		for (TPartList::iterator i_out = splitOutput2.begin(); i_out != splitOutput2.end(); ++i_out) {
 			boost::shared_ptr<CPart> outputPart = *i_out;
-					outputPart.get()->splitY(splitOutput, SIZE_MAX, SIZE_MAX, 1);
+					outputPart.get()->splitY(splitOutput, UINT_MAX, UINT_MAX, 1);
 		}
 		splitOutput2.clear();
 #endif
@@ -164,12 +164,12 @@ bool CCompositeGeometryManager::splitJobs(TJobSet &jobs, size_t maxSize, int div
 				TPartList splitInput2;
 				for (TPartList::iterator i_in = splitInput.begin(); i_in != splitInput.end(); ++i_in) {
 					boost::shared_ptr<CPart> inputPart = *i_in;
-					inputPart.get()->splitX(splitInput2, SIZE_MAX, maxBlockDim, 1);
+					inputPart.get()->splitX(splitInput2, UINT_MAX, maxBlockDim, 1);
 				}
 				splitInput.clear();
 				for (TPartList::iterator i_in = splitInput2.begin(); i_in != splitInput2.end(); ++i_in) {
 					boost::shared_ptr<CPart> inputPart = *i_in;
-					inputPart.get()->splitY(splitInput, SIZE_MAX, maxBlockDim, 1);
+					inputPart.get()->splitY(splitInput, UINT_MAX, maxBlockDim, 1);
 				}
 				splitInput2.clear();
 
-- 
cgit v1.2.3


From 495903529d473a9968c1333d5a515e3b94732f0b Mon Sep 17 00:00:00 2001
From: Willem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>
Date: Wed, 23 Mar 2016 15:29:43 +0100
Subject: Move CUDA algorithm initialization to its own function

---
 src/CudaReconstructionAlgorithm2D.cpp | 22 +++++++++++++++-------
 src/CudaSirtAlgorithm.cpp             | 35 +++++++++++------------------------
 2 files changed, 26 insertions(+), 31 deletions(-)

(limited to 'src')

diff --git a/src/CudaReconstructionAlgorithm2D.cpp b/src/CudaReconstructionAlgorithm2D.cpp
index 5a1910c..2798434 100644
--- a/src/CudaReconstructionAlgorithm2D.cpp
+++ b/src/CudaReconstructionAlgorithm2D.cpp
@@ -328,6 +328,20 @@ bool CCudaReconstructionAlgorithm2D::setupGeometry()
 	return true;
 }
 
+//----------------------------------------------------------------------------------------
+
+void CCudaReconstructionAlgorithm2D::initCUDAAlgorithm()
+{
+	bool ok;
+
+	ok = setupGeometry();
+	ASTRA_ASSERT(ok);
+
+	ok = m_pAlgo->allocateBuffers();
+	ASTRA_ASSERT(ok);
+}
+
+
 //----------------------------------------------------------------------------------------
 // Iterate
 void CCudaReconstructionAlgorithm2D::run(int _iNrIterations)
@@ -339,13 +353,7 @@ void CCudaReconstructionAlgorithm2D::run(int _iNrIterations)
 	const CVolumeGeometry2D& volgeom = *m_pReconstruction->getGeometry();
 
 	if (!m_bAlgoInit) {
-
-		ok = setupGeometry();
-		ASTRA_ASSERT(ok);
-
-		ok = m_pAlgo->allocateBuffers();
-		ASTRA_ASSERT(ok);
-
+		initCUDAAlgorithm();
 		m_bAlgoInit = true;
 	}
 
diff --git a/src/CudaSirtAlgorithm.cpp b/src/CudaSirtAlgorithm.cpp
index 33e381a..7beb30e 100644
--- a/src/CudaSirtAlgorithm.cpp
+++ b/src/CudaSirtAlgorithm.cpp
@@ -113,36 +113,23 @@ bool CCudaSirtAlgorithm::initialize(CProjector2D* _pProjector,
 }
 
 //----------------------------------------------------------------------------------------
-// Iterate
-void CCudaSirtAlgorithm::run(int _iNrIterations)
-{
-	// check initialized
-	ASTRA_ASSERT(m_bIsInitialized);
 
-	if (!m_bAlgoInit) {
-		// We only override the initialisation step to copy the min/max masks
+void CCudaSirtAlgorithm::initCUDAAlgorithm()
+{
+	CCudaReconstructionAlgorithm2D::initCUDAAlgorithm();
 
-		bool ok = setupGeometry();
-		ASTRA_ASSERT(ok);
+	astraCUDA::SIRT* pSirt = dynamic_cast<astraCUDA::SIRT*>(m_pAlgo);
 
-		ok = m_pAlgo->allocateBuffers();
+	if (m_pMinMask || m_pMaxMask) {
+		const CVolumeGeometry2D& volgeom = *m_pReconstruction->getGeometry();
+		const float *pfMinMaskData = 0;
+		const float *pfMaxMaskData = 0;
+		if (m_pMinMask) pfMinMaskData = m_pMinMask->getDataConst();
+		if (m_pMaxMask) pfMaxMaskData = m_pMaxMask->getDataConst();
+		bool ok = pSirt->uploadMinMaxMasks(pfMinMaskData, pfMaxMaskData, volgeom.getGridColCount());
 		ASTRA_ASSERT(ok);
-
-		if (m_pMinMask || m_pMaxMask) {
-			const CVolumeGeometry2D& volgeom = *m_pReconstruction->getGeometry();
-			astraCUDA::SIRT* pSirt = dynamic_cast<astraCUDA::SIRT*>(m_pAlgo);
-			const float *pfMinMaskData = 0;
-			const float *pfMaxMaskData = 0;
-			if (m_pMinMask) pfMinMaskData = m_pMinMask->getDataConst();
-			if (m_pMaxMask) pfMaxMaskData = m_pMaxMask->getDataConst();
-			ok = pSirt->uploadMinMaxMasks(pfMinMaskData, pfMaxMaskData, volgeom.getGridColCount());
-			ASTRA_ASSERT(ok);
-		}
-
-		m_bAlgoInit = true;
 	}
 
-	CCudaReconstructionAlgorithm2D::run(_iNrIterations);
 }
 
 
-- 
cgit v1.2.3


From f03ceb16d2dbde0c43e8c90683c5feafe01e5356 Mon Sep 17 00:00:00 2001
From: Willem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>
Date: Wed, 23 Mar 2016 15:30:47 +0100
Subject: Rename ART lambda option to Relaxation

---
 src/ArtAlgorithm.cpp | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'src')

diff --git a/src/ArtAlgorithm.cpp b/src/ArtAlgorithm.cpp
index b59bd93..526c263 100644
--- a/src/ArtAlgorithm.cpp
+++ b/src/ArtAlgorithm.cpp
@@ -156,8 +156,12 @@ bool CArtAlgorithm::initialize(const Config& _cfg)
 		return false;
 	}
 
+	// "Lambda" is replaced by the more descriptive "Relaxation"
 	m_fLambda = _cfg.self.getOptionNumerical("Lambda", 1.0f);
-	CC.markOptionParsed("Lambda");
+	m_fLambda = _cfg.self.getOptionNumerical("Relaxation", m_fLambda);
+	if (!_cfg.self.hasOption("Relaxation"))
+		CC.markOptionParsed("Lambda");
+	CC.markOptionParsed("Relaxation");
 
 	// success
 	m_bIsInitialized = _check();
@@ -232,7 +236,7 @@ map<string,boost::any> CArtAlgorithm::getInformation()
 {
 	map<string, boost::any> res;
 	res["RayOrder"] = getInformation("RayOrder");
-	res["Lambda"] = getInformation("Lambda");
+	res["Relaxation"] = getInformation("Relaxation");
 	return mergeMap<string,boost::any>(CReconstructionAlgorithm2D::getInformation(), res);
 };
 
@@ -240,7 +244,7 @@ map<string,boost::any> CArtAlgorithm::getInformation()
 // Information - Specific
 boost::any CArtAlgorithm::getInformation(std::string _sIdentifier) 
 {
-	if (_sIdentifier == "Lambda")	{ return m_fLambda; }
+	if (_sIdentifier == "Relaxation")	{ return m_fLambda; }
 	if (_sIdentifier == "RayOrder") {
 		vector<float32> res;
 		for (int i = 0; i < m_iRayCount; i++) {
-- 
cgit v1.2.3


From 5edb35edc2c721b458334a65512b534912c2c542 Mon Sep 17 00:00:00 2001
From: Willem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>
Date: Wed, 23 Mar 2016 15:30:56 +0100
Subject: Add relaxation parameters to SIRT, SART

---
 src/CudaSartAlgorithm.cpp | 17 ++++++++++++++++-
 src/CudaSirtAlgorithm.cpp |  6 ++++++
 src/SartAlgorithm.cpp     |  8 +++++++-
 src/SirtAlgorithm.cpp     | 11 +++++++++--
 4 files changed, 38 insertions(+), 4 deletions(-)

(limited to 'src')

diff --git a/src/CudaSartAlgorithm.cpp b/src/CudaSartAlgorithm.cpp
index d202847..bf97224 100644
--- a/src/CudaSartAlgorithm.cpp
+++ b/src/CudaSartAlgorithm.cpp
@@ -107,7 +107,8 @@ bool CCudaSartAlgorithm::initialize(const Config& _cfg)
 		CC.markOptionParsed("ProjectionOrderList");
 	}
 
-
+	m_fLambda = _cfg.self.getOptionNumerical("Relaxation", 1.0f);
+	CC.markOptionParsed("Relaxation");
 
 	return true;
 }
@@ -123,12 +124,26 @@ bool CCudaSartAlgorithm::initialize(CProjector2D* _pProjector,
 	if (!m_bIsInitialized)
 		return false;
 
+	m_fLambda = 1.0f;
+
 	m_pAlgo = new astraCUDA::SART();
 	m_bAlgoInit = false;
 
 	return true;
 }
 
+//----------------------------------------------------------------------------------------
+
+void CCudaSartAlgorithm::initCUDAAlgorithm()
+{
+	CCudaReconstructionAlgorithm2D::initCUDAAlgorithm();
+
+	astraCUDA::SART* pSart = dynamic_cast<astraCUDA::SART*>(m_pAlgo);
+
+	pSart->setRelaxation(m_fLambda);
+}
+
+
 
 } // namespace astra
 
diff --git a/src/CudaSirtAlgorithm.cpp b/src/CudaSirtAlgorithm.cpp
index 7beb30e..c8dc677 100644
--- a/src/CudaSirtAlgorithm.cpp
+++ b/src/CudaSirtAlgorithm.cpp
@@ -50,6 +50,8 @@ CCudaSirtAlgorithm::CCudaSirtAlgorithm()
 
 	m_pMinMask = 0;
 	m_pMaxMask = 0;
+
+	m_fLambda = 1.0f;
 }
 
 //----------------------------------------------------------------------------------------
@@ -86,6 +88,8 @@ bool CCudaSirtAlgorithm::initialize(const Config& _cfg)
 	}
 	CC.markOptionParsed("MaxMaskId");
 
+	m_fLambda = _cfg.self.getOptionNumerical("Relaxation", 1.0f);
+	CC.markOptionParsed("Relaxation");
 
 	m_pAlgo = new astraCUDA::SIRT();
 	m_bAlgoInit = false;
@@ -108,6 +112,7 @@ bool CCudaSirtAlgorithm::initialize(CProjector2D* _pProjector,
 
 	m_pAlgo = new astraCUDA::SIRT();
 	m_bAlgoInit = false;
+	m_fLambda = 1.0f;
 
 	return true;
 }
@@ -130,6 +135,7 @@ void CCudaSirtAlgorithm::initCUDAAlgorithm()
 		ASTRA_ASSERT(ok);
 	}
 
+	pSirt->setRelaxation(m_fLambda);
 }
 
 
diff --git a/src/SartAlgorithm.cpp b/src/SartAlgorithm.cpp
index 9346160..403f851 100644
--- a/src/SartAlgorithm.cpp
+++ b/src/SartAlgorithm.cpp
@@ -151,6 +151,9 @@ bool CSartAlgorithm::initialize(const Config& _cfg)
 		CC.markOptionParsed("ProjectionOrderList");
 	}
 
+	m_fLambda = _cfg.self.getOptionNumerical("Relaxation", 1.0f);
+	CC.markOptionParsed("Relaxation");
+
 	// create data objects
 	m_pTotalRayLength = new CFloat32ProjectionData2D(m_pProjector->getProjectionGeometry());
 	m_pTotalPixelWeight = new CFloat32VolumeData2D(m_pProjector->getVolumeGeometry());
@@ -246,6 +249,7 @@ map<string,boost::any> CSartAlgorithm::getInformation()
 {
 	map<string, boost::any> res;
 	res["ProjectionOrder"] = getInformation("ProjectionOrder");
+	res["Relaxation"] = getInformation("Relaxation");
 	return mergeMap<string,boost::any>(CReconstructionAlgorithm2D::getInformation(), res);
 };
 
@@ -253,6 +257,8 @@ map<string,boost::any> CSartAlgorithm::getInformation()
 // Information - Specific
 boost::any CSartAlgorithm::getInformation(std::string _sIdentifier) 
 {
+	if (_sIdentifier == "Relaxation")
+		return m_fLambda;
 	if (_sIdentifier == "ProjectionOrder") {
 		vector<float32> res;
 		for (int i = 0; i < m_iProjectionCount; i++) {
@@ -286,7 +292,7 @@ void CSartAlgorithm::run(int _iNrIterations)
 			m_pProjector, 
 			SinogramMaskPolicy(m_pSinogramMask),														// sinogram mask
 			ReconstructionMaskPolicy(m_pReconstructionMask),											// reconstruction mask
-			SIRTBPPolicy(m_pReconstruction, m_pDiffSinogram, m_pTotalPixelWeight, m_pTotalRayLength),	// SIRT backprojection
+			SIRTBPPolicy(m_pReconstruction, m_pDiffSinogram, m_pTotalPixelWeight, m_pTotalRayLength, m_fLambda),	// SIRT backprojection
 			m_bUseSinogramMask, m_bUseReconstructionMask, true // options on/off
 		); 
 
diff --git a/src/SirtAlgorithm.cpp b/src/SirtAlgorithm.cpp
index d9f3a65..ff25648 100644
--- a/src/SirtAlgorithm.cpp
+++ b/src/SirtAlgorithm.cpp
@@ -76,6 +76,7 @@ void CSirtAlgorithm::_clear()
 	m_pDiffSinogram = NULL;
 	m_pTmpVolume = NULL;
 
+	m_fLambda = 1.0f;
 	m_iIterationCount = 0;
 }
 
@@ -91,6 +92,7 @@ void CSirtAlgorithm::clear()
 	ASTRA_DELETE(m_pDiffSinogram);
 	ASTRA_DELETE(m_pTmpVolume);
 
+	m_fLambda = 1.0f;
 	m_iIterationCount = 0;
 }
 
@@ -128,6 +130,9 @@ bool CSirtAlgorithm::initialize(const Config& _cfg)
 		return false;
 	}
 
+	m_fLambda = _cfg.self.getOptionNumerical("Relaxation", 1.0f);
+	CC.markOptionParsed("Relaxation");
+
 	// init data objects and data projectors
 	_init();
 
@@ -152,6 +157,8 @@ bool CSirtAlgorithm::initialize(CProjector2D* _pProjector,
 	m_pSinogram = _pSinogram;
 	m_pReconstruction = _pReconstruction;
 
+	m_fLambda = 1.0f;
+
 	// init data objects and data projectors
 	_init();
 
@@ -248,7 +255,7 @@ void CSirtAlgorithm::run(int _iNrIterations)
 			x = 1.0f / x;
 		else
 			x = 0.0f;
-		pfT[i] = x;
+		pfT[i] = m_fLambda * x;
 	}
 	pfT = m_pTotalRayLength->getData();
 	for (int i = 0; i < m_pTotalRayLength->getSize(); ++i) {
@@ -296,7 +303,7 @@ void CSirtAlgorithm::run(int _iNrIterations)
 		m_pTmpVolume->setData(0.0f);
 		pBackProjector->project();
 
-		// divide by pixel weights
+		// multiply with relaxation factor divided by pixel weights
 		(*m_pTmpVolume) *= (*m_pTotalPixelWeight);
 		(*m_pReconstruction) += (*m_pTmpVolume);
 
-- 
cgit v1.2.3


From 16430239d04ff738a21146c410918c285552543f Mon Sep 17 00:00:00 2001
From: Willem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>
Date: Wed, 23 Mar 2016 15:50:24 +0100
Subject: Add relaxation parameters to SIRT3D

---
 src/CudaSirtAlgorithm3D.cpp | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'src')

diff --git a/src/CudaSirtAlgorithm3D.cpp b/src/CudaSirtAlgorithm3D.cpp
index 605c470..c819f8e 100644
--- a/src/CudaSirtAlgorithm3D.cpp
+++ b/src/CudaSirtAlgorithm3D.cpp
@@ -56,6 +56,7 @@ CCudaSirtAlgorithm3D::CCudaSirtAlgorithm3D()
 	m_iGPUIndex = -1;
 	m_iVoxelSuperSampling = 1;
 	m_iDetectorSuperSampling = 1;
+	m_fLambda = 1.0f;
 }
 
 //----------------------------------------------------------------------------------------
@@ -128,6 +129,8 @@ bool CCudaSirtAlgorithm3D::initialize(const Config& _cfg)
 		return false;
 	}
 
+	m_fLambda = _cfg.self.getOptionNumerical("Relaxation");
+
 	initializeFromProjector();
 
 	// Deprecated options
@@ -135,6 +138,7 @@ bool CCudaSirtAlgorithm3D::initialize(const Config& _cfg)
 	m_iDetectorSuperSampling = (int)_cfg.self.getOptionNumerical("DetectorSuperSampling", m_iDetectorSuperSampling);
 	m_iGPUIndex = (int)_cfg.self.getOptionNumerical("GPUindex", m_iGPUIndex);
 	m_iGPUIndex = (int)_cfg.self.getOptionNumerical("GPUIndex", m_iGPUIndex);
+
 	CC.markOptionParsed("VoxelSuperSampling");
 	CC.markOptionParsed("DetectorSuperSampling");
 	CC.markOptionParsed("GPUIndex");
@@ -164,6 +168,8 @@ bool CCudaSirtAlgorithm3D::initialize(CProjector3D* _pProjector,
 		clear();
 	}
 
+	m_fLambda = 1.0f;
+
 	// required classes
 	m_pProjector = _pProjector;
 	m_pSinogram = _pSinogram;
@@ -224,6 +230,8 @@ void CCudaSirtAlgorithm3D::run(int _iNrIterations)
 
 		ASTRA_ASSERT(ok);
 
+		m_pSirt->setRelaxation(m_fLambda);
+
 		m_bAstraSIRTInit = true;
 
 	}
-- 
cgit v1.2.3


From f9cc36d3507f7cde4d20165836d65a584ced720f Mon Sep 17 00:00:00 2001
From: Willem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>
Date: Wed, 23 Mar 2016 18:21:42 +0100
Subject: Fix accumulating multiple raylengths in SART

Thanks to @mohamedadaly for noticing.
---
 src/SartAlgorithm.cpp | 24 +++++++++++++++++++-----
 1 file changed, 19 insertions(+), 5 deletions(-)

(limited to 'src')

diff --git a/src/SartAlgorithm.cpp b/src/SartAlgorithm.cpp
index 9346160..f80df61 100644
--- a/src/SartAlgorithm.cpp
+++ b/src/SartAlgorithm.cpp
@@ -272,9 +272,8 @@ void CSartAlgorithm::run(int _iNrIterations)
 
 	m_bShouldAbort = false;
 
-	int iIteration = 0;
-
 	// data projectors
+	CDataProjectorInterface* pFirstForwardProjector;
 	CDataProjectorInterface* pForwardProjector;
 	CDataProjectorInterface* pBackProjector;
 
@@ -292,7 +291,7 @@ void CSartAlgorithm::run(int _iNrIterations)
 
 	// first time forward projection data projector,
 	// also computes total pixel weight and total ray length
-	pForwardProjector = dispatchDataProjector(
+	pFirstForwardProjector = dispatchDataProjector(
 			m_pProjector, 
 			SinogramMaskPolicy(m_pSinogramMask),														// sinogram mask
 			ReconstructionMaskPolicy(m_pReconstructionMask),											// reconstruction mask
@@ -303,16 +302,30 @@ void CSartAlgorithm::run(int _iNrIterations)
 			m_bUseSinogramMask, m_bUseReconstructionMask, true											 // options on/off
 		);
 
+	// forward projection data projector
+	pForwardProjector = dispatchDataProjector(
+			m_pProjector,
+			SinogramMaskPolicy(m_pSinogramMask),														// sinogram mask
+			ReconstructionMaskPolicy(m_pReconstructionMask),											// reconstruction mask
+			CombinePolicy<DiffFPPolicy, TotalPixelWeightPolicy>(					// 2 basic operations
+				DiffFPPolicy(m_pReconstruction, m_pDiffSinogram, m_pSinogram),								// forward projection with difference calculation
+				TotalPixelWeightPolicy(m_pTotalPixelWeight)),												// calculate the total pixel weights
+			m_bUseSinogramMask, m_bUseReconstructionMask, true											 // options on/off
+		);
+
 
 
 	// iteration loop
-	for (; iIteration < _iNrIterations && !m_bShouldAbort; ++iIteration) {
+	for (int iIteration = 0; iIteration < _iNrIterations && !m_bShouldAbort; ++iIteration) {
 
 		int iProjection = m_piProjectionOrder[m_iIterationCount % m_iProjectionCount];
 	
 		// forward projection and difference calculation
 		m_pTotalPixelWeight->setData(0.0f);
-		pForwardProjector->projectSingleProjection(iProjection);
+		if (iIteration < m_iProjectionCount)
+			pFirstForwardProjector->projectSingleProjection(iProjection);
+		else
+			pForwardProjector->projectSingleProjection(iProjection);
 		// backprojection
 		pBackProjector->projectSingleProjection(iProjection);
 		// update iteration count
@@ -325,6 +338,7 @@ void CSartAlgorithm::run(int _iNrIterations)
 	}
 
 
+	ASTRA_DELETE(pFirstForwardProjector);
 	ASTRA_DELETE(pForwardProjector);
 	ASTRA_DELETE(pBackProjector);
 
-- 
cgit v1.2.3


From 5c6a9523523c1680e898b4cf897531cd75435f22 Mon Sep 17 00:00:00 2001
From: Willem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>
Date: Thu, 24 Mar 2016 17:34:34 +0100
Subject: Fix cppcheck warnings

---
 src/CudaDataOperationAlgorithm.cpp | 2 +-
 src/Float32VolumeData3DMemory.cpp  | 1 -
 src/XMLNode.cpp                    | 4 ++--
 3 files changed, 3 insertions(+), 4 deletions(-)

(limited to 'src')

diff --git a/src/CudaDataOperationAlgorithm.cpp b/src/CudaDataOperationAlgorithm.cpp
index 15886a4..82b676b 100644
--- a/src/CudaDataOperationAlgorithm.cpp
+++ b/src/CudaDataOperationAlgorithm.cpp
@@ -76,7 +76,7 @@ bool CCudaDataOperationAlgorithm::initialize(const Config& _cfg)
 	node = _cfg.self.getSingleNode("DataId");
 	ASTRA_CONFIG_CHECK(node, "CCudaDataOperationAlgorithm", "No DataId tag specified.");
 	vector<string> data = node.getContentArray();
-	for (vector<string>::iterator it = data.begin(); it != data.end(); it++){
+	for (vector<string>::iterator it = data.begin(); it != data.end(); ++it){
 		int id = StringUtil::stringToInt(*it);
 		m_pData.push_back(dynamic_cast<CFloat32Data2D*>(CData2DManager::getSingleton().get(id)));
 	}
diff --git a/src/Float32VolumeData3DMemory.cpp b/src/Float32VolumeData3DMemory.cpp
index af45cb9..14adb1a 100644
--- a/src/Float32VolumeData3DMemory.cpp
+++ b/src/Float32VolumeData3DMemory.cpp
@@ -136,7 +136,6 @@ CFloat32VolumeData2D * CFloat32VolumeData3DMemory::fetchSliceZ(int _iSliceIndex)
 	CFloat32VolumeData2D* res = new CFloat32VolumeData2D(&volGeom);
 
 	// copy data
-	int iSliceCount = m_pGeometry->getGridSliceCount();
 	float * pfTargetData = res->getData();
 	for(int iRowIndex = 0; iRowIndex < iRowCount; iRowIndex++)
 	{
diff --git a/src/XMLNode.cpp b/src/XMLNode.cpp
index 40a9b22..cf268c2 100644
--- a/src/XMLNode.cpp
+++ b/src/XMLNode.cpp
@@ -158,7 +158,7 @@ vector<string> XMLNode::getContentArray() const
 	vector<string> res(iSize);
 	// loop all list item nodes
 	list<XMLNode> nodes = getNodes("ListItem");
-	for (list<XMLNode>::iterator it = nodes.begin(); it != nodes.end(); it++) {
+	for (list<XMLNode>::iterator it = nodes.begin(); it != nodes.end(); ++it) {
 		int iIndex = it->getAttributeNumerical("index");
 		string sValue = it->getAttribute("value");
 		ASTRA_ASSERT(iIndex < iSize);
@@ -290,7 +290,7 @@ vector<float32> XMLNode::getOptionNumericalArray(string _sKey) const
 	if (!hasOption(_sKey)) return vector<float32>();
 
 	list<XMLNode> nodes = getNodes("Option");
-	for (list<XMLNode>::iterator it = nodes.begin(); it != nodes.end(); it++) {
+	for (list<XMLNode>::iterator it = nodes.begin(); it != nodes.end(); ++it) {
 		if (it->getAttribute("key") == _sKey) {
 			vector<float32> vals = it->getContentNumericalArray();
 			return vals;
-- 
cgit v1.2.3


From ab583834eec75d23e16b8f205ab65788d8df6ffe Mon Sep 17 00:00:00 2001
From: Willem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>
Date: Thu, 24 Mar 2016 17:34:50 +0100
Subject: Fix memory leak

---
 src/FilteredBackProjectionAlgorithm.cpp | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

(limited to 'src')

diff --git a/src/FilteredBackProjectionAlgorithm.cpp b/src/FilteredBackProjectionAlgorithm.cpp
index c195578..ccbfec6 100644
--- a/src/FilteredBackProjectionAlgorithm.cpp
+++ b/src/FilteredBackProjectionAlgorithm.cpp
@@ -117,12 +117,10 @@ bool CFilteredBackProjectionAlgorithm::initialize(const Config& _cfg)
 		int angleCount = projectionIndex.size();
 		int detectorCount = m_pProjector->getProjectionGeometry()->getDetectorCount();
 
+		// TODO: There is no need to allocate this. Better just
+		// create the CFloat32ProjectionData2D object directly, and use its
+		// memory.
 		float32 * sinogramData2D = new float32[angleCount* detectorCount];
-		float32 ** sinogramData = new float32*[angleCount];
-		for (int i = 0; i < angleCount; i++)
-		{
-			sinogramData[i] = &(sinogramData2D[i * detectorCount]);
-		}
 
 		float32 * projectionAngles = new float32[angleCount];
 		float32 detectorWidth = m_pProjector->getProjectionGeometry()->getDetectorWidth();
@@ -130,6 +128,8 @@ bool CFilteredBackProjectionAlgorithm::initialize(const Config& _cfg)
 		for (int i = 0; i < angleCount; i ++) {
 			if (projectionIndex[i] > m_pProjector->getProjectionGeometry()->getProjectionAngleCount() -1 )
 			{
+				delete[] sinogramData2D;
+				delete[] projectionAngles;
 				ASTRA_ERROR("Invalid Projection Index");
 				return false;
 			} else {
@@ -139,7 +139,6 @@ bool CFilteredBackProjectionAlgorithm::initialize(const Config& _cfg)
 				{
 					sinogramData2D[i*detectorCount+ iDetector] = m_pSinogram->getData2D()[orgIndex][iDetector];
 				}
-//				sinogramData[i] = m_pSinogram->getSingleProjectionData(projectionIndex[i]);
 				projectionAngles[i] = m_pProjector->getProjectionGeometry()->getProjectionAngle((int)projectionIndex[i] );
 
 			}
@@ -148,6 +147,9 @@ bool CFilteredBackProjectionAlgorithm::initialize(const Config& _cfg)
 		CParallelProjectionGeometry2D * pg = new CParallelProjectionGeometry2D(angleCount, detectorCount,detectorWidth,projectionAngles);
 		m_pProjector = new CParallelBeamLineKernelProjector2D(pg,m_pReconstruction->getGeometry());
 		m_pSinogram = new CFloat32ProjectionData2D(pg, sinogramData2D);
+
+		delete[] sinogramData2D;
+		delete[] projectionAngles;
 	}
 
 	// TODO: check that the angles are linearly spaced between 0 and pi
-- 
cgit v1.2.3


From 547def0ea6e3eab07b7e4c48cee6d6a81f6155e1 Mon Sep 17 00:00:00 2001
From: Willem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>
Date: Mon, 18 Apr 2016 11:43:48 +0200
Subject: Fix stringToInt parsing doubles

---
 src/Utilities.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/Utilities.cpp b/src/Utilities.cpp
index 4b80503..c9740bf 100644
--- a/src/Utilities.cpp
+++ b/src/Utilities.cpp
@@ -42,7 +42,7 @@ namespace StringUtil {
 
 int stringToInt(const std::string& s)
 {
-	double i;
+	int i;
 	std::istringstream iss(s);
 	iss.imbue(std::locale::classic());
 	iss >> i;
-- 
cgit v1.2.3


From 6ccde536191676f9b504055b16c68786858b693d Mon Sep 17 00:00:00 2001
From: "Daniel M. Pelt" <D.M.Pelt@cwi.nl>
Date: Fri, 22 Apr 2016 14:22:53 +0200
Subject: Change CPU FFT implementation

---
 src/FilteredBackProjectionAlgorithm.cpp |   45 +-
 src/Fourier.cpp                         | 3514 +++++++++++++++++++++++++++++--
 2 files changed, 3338 insertions(+), 221 deletions(-)

(limited to 'src')

diff --git a/src/FilteredBackProjectionAlgorithm.cpp b/src/FilteredBackProjectionAlgorithm.cpp
index ccbfec6..90efd52 100644
--- a/src/FilteredBackProjectionAlgorithm.cpp
+++ b/src/FilteredBackProjectionAlgorithm.cpp
@@ -274,60 +274,57 @@ void CFilteredBackProjectionAlgorithm::performFiltering(CFloat32ProjectionData2D
 		filter[iDetector] = (2.0f * (zpDetector - iDetector)) / zpDetector;
 
 
-	float32* pfRe = new float32[iAngleCount * zpDetector];
-	float32* pfIm = new float32[iAngleCount * zpDetector];
+	float32* pf = new float32[2 * iAngleCount * zpDetector];
+	int *ip = new int[int(2+sqrt(zpDetector)+1)];
+	ip[0]=0;
+	float32 *w = new float32[zpDetector/2];
 
 	// Copy and zero-pad data
 	for (int iAngle = 0; iAngle < iAngleCount; ++iAngle) {
-		float32* pfReRow = pfRe + iAngle * zpDetector;
-		float32* pfImRow = pfIm + iAngle * zpDetector;
+		float32* pfRow = pf + iAngle * 2 * zpDetector;
 		float32* pfDataRow = _pFilteredSinogram->getData() + iAngle * iDetectorCount;
 		for (int iDetector = 0; iDetector < iDetectorCount; ++iDetector) {
-			pfReRow[iDetector] = pfDataRow[iDetector];
-			pfImRow[iDetector] = 0.0f;
+			pfRow[2*iDetector] = pfDataRow[iDetector];
+			pfRow[2*iDetector+1] = 0.0f;
 		}
 		for (int iDetector = iDetectorCount; iDetector < zpDetector; ++iDetector) {
-			pfReRow[iDetector] = 0.0f;
-			pfImRow[iDetector] = 0.0f;
+			pfRow[2*iDetector] = 0.0f;
+			pfRow[2*iDetector+1] = 0.0f;
 		}
 	}
 
 	// in-place FFT
 	for (int iAngle = 0; iAngle < iAngleCount; ++iAngle) {
-		float32* pfReRow = pfRe + iAngle * zpDetector;
-		float32* pfImRow = pfIm + iAngle * zpDetector;
-
-		fastTwoPowerFourierTransform1D(zpDetector, pfReRow, pfImRow, pfReRow, pfImRow, 1, 1, false);
+		float32* pfRow = pf + iAngle * 2 * zpDetector;
+		cdft(2*zpDetector, -1, pfRow, ip, w);
 	}
 
 	// Filter
 	for (int iAngle = 0; iAngle < iAngleCount; ++iAngle) {
-		float32* pfReRow = pfRe + iAngle * zpDetector;
-		float32* pfImRow = pfIm + iAngle * zpDetector;
+		float32* pfRow = pf + iAngle * 2 * zpDetector;
 		for (int iDetector = 0; iDetector < zpDetector; ++iDetector) {
-			pfReRow[iDetector] *= filter[iDetector];
-			pfImRow[iDetector] *= filter[iDetector];
+			pfRow[2*iDetector] *= filter[iDetector];
+			pfRow[2*iDetector+1] *= filter[iDetector];
 		}
 	}
 
 	// in-place inverse FFT
 	for (int iAngle = 0; iAngle < iAngleCount; ++iAngle) {
-		float32* pfReRow = pfRe + iAngle * zpDetector;
-		float32* pfImRow = pfIm + iAngle * zpDetector;
-
-		fastTwoPowerFourierTransform1D(zpDetector, pfReRow, pfImRow, pfReRow, pfImRow, 1, 1, true);
+		float32* pfRow = pf + iAngle * 2 * zpDetector;
+		cdft(2*zpDetector, 1, pfRow, ip, w);
 	}
 
 	// Copy data back
 	for (int iAngle = 0; iAngle < iAngleCount; ++iAngle) {
-		float32* pfReRow = pfRe + iAngle * zpDetector;
+		float32* pfRow = pf + iAngle * 2 * zpDetector;
 		float32* pfDataRow = _pFilteredSinogram->getData() + iAngle * iDetectorCount;
 		for (int iDetector = 0; iDetector < iDetectorCount; ++iDetector)
-			pfDataRow[iDetector] = pfReRow[iDetector];
+			pfDataRow[iDetector] = pfRow[2*iDetector] / zpDetector;
 	}
 
-	delete[] pfRe;
-	delete[] pfIm;
+	delete[] pf;
+	delete[] w;
+	delete[] ip;
 	delete[] filter;
 }
 
diff --git a/src/Fourier.cpp b/src/Fourier.cpp
index 584b633..5ca22e6 100644
--- a/src/Fourier.cpp
+++ b/src/Fourier.cpp
@@ -30,204 +30,3324 @@ $Id$
 
 namespace astra {
 
+    /*
+Copyright Takuya OOURA, 1996-2001
 
-void discreteFourierTransform1D(unsigned int iLength,
-                                const float32* pfRealIn,
-                                const float32* pfImaginaryIn,
-                                float32* pfRealOut,
-                                float32* pfImaginaryOut,
-                                unsigned int iStrideIn,
-                                unsigned int iStrideOut,
-                                bool inverse)
-{
-	for (unsigned int w = 0; w < iLength; w++)
-	{
-		pfRealOut[iStrideOut*w] = pfImaginaryOut[iStrideOut*w] = 0;
-		for (unsigned int y = 0; y < iLength; y++)
-		{
-			float32 a = 2 * PI * w * y / float32(iLength);
-			if (!inverse)
-				a = -a;
-			float32 ca = cos(a);
-			float32 sa = sin(a);
-			pfRealOut[iStrideOut*w] += pfRealIn[iStrideIn*y] * ca - pfImaginaryIn[iStrideIn*y] * sa;
-			pfImaginaryOut[iStrideOut*w] += pfRealIn[iStrideIn*y] * sa + pfImaginaryIn[iStrideIn*y] * ca;   
-		}
-	}
-
-	if (inverse) {
-		for (unsigned int x = 0; x < iLength; ++x) {
-			pfRealOut[iStrideOut*x] /= iLength;
-			pfImaginaryOut[iStrideOut*x] /= iLength;
-		}
-	}
-}
-
-void discreteFourierTransform2D(unsigned int iHeight, unsigned int iWidth,
-                                const float32* pfRealIn,
-                                const float32* pfImaginaryIn,
-                                float32* pfRealOut,
-                                float32* pfImaginaryOut,
-                                bool inverse)
-{
-	float32* reTemp = new float32[iWidth * iHeight];
-	float32* imTemp = new float32[iWidth * iHeight];
-
-	//calculate the fourier transform of the columns
-	for (unsigned int x = 0; x < iWidth; x++)
-	{
-		discreteFourierTransform1D(iHeight, pfRealIn+x, pfImaginaryIn+x,
-		                           reTemp+x, imTemp+x,
-		                           iWidth, iWidth, inverse);
-	}
-
-	//calculate the fourier transform of the rows
-	for(unsigned int y = 0; y < iHeight; y++)
-	{
-		discreteFourierTransform1D(iWidth,
-		                           reTemp+y*iWidth,
-		                           imTemp+y*iWidth,
-		                           pfRealOut+y*iWidth,
-		                           pfImaginaryOut+y*iWidth,
-		                           1, 1, inverse);
-	}
-
-	delete[] reTemp;
-	delete[] imTemp;
-}
-
-/** permute the entries from pfDataIn into pfDataOut to prepare for an
- *  in-place FFT. pfDataIn may be equal to pfDataOut.
- */
-static void bitReverse(unsigned int iLength,
-                       const float32* pfDataIn, float32* pfDataOut,
-                       unsigned int iStrideShiftIn,
-                       unsigned int iStrideShiftOut)
-{
-	if (pfDataIn == pfDataOut) {
-		assert(iStrideShiftIn == iStrideShiftOut);
-		float32 t;
-		unsigned int j = 0;
-		for(unsigned int i = 0; i < iLength - 1; i++) {
-			if (i < j) {
-				t = pfDataOut[i<<iStrideShiftOut];
-				pfDataOut[i<<iStrideShiftOut] = pfDataOut[j<<iStrideShiftOut];
-				pfDataOut[j<<iStrideShiftOut] = t;
-			}
-			unsigned int k = iLength / 2;
-			while (k <= j) {
-				j -= k;
-				k /= 2;
-			}
-			j += k;
-		}
-	} else {
-		unsigned int j = 0;
-		for(unsigned int i = 0; i < iLength - 1; i++) {
-			pfDataOut[i<<iStrideShiftOut] = pfDataIn[j<<iStrideShiftIn];
-			unsigned int k = iLength / 2;
-			while (k <= j) {
-				j -= k;
-				k /= 2;
-			}
-			j += k;
-		}
-		pfDataOut[(iLength-1)<<iStrideShiftOut] = pfDataIn[(iLength-1)<<iStrideShiftOut];
-	}
-}
-
-static unsigned int log2(unsigned int n)
-{
-	unsigned int l = 0;
-	while (n > 1) {
-		n /= 2;
-		++l;
-	}
-	return l;
-}
-
-/** perform 1D FFT. iLength, iStrideIn, iStrideOut must be powers of two. */
-void fastTwoPowerFourierTransform1D(unsigned int iLength,
-                                    const float32* pfRealIn,
-                                    const float32* pfImaginaryIn,
-                                    float32* pfRealOut,
-                                    float32* pfImaginaryOut,
-                                    unsigned int iStrideIn,
-                                    unsigned int iStrideOut,
-                                    bool inverse)
-{
-	unsigned int iStrideShiftIn = log2(iStrideIn);
-	unsigned int iStrideShiftOut = log2(iStrideOut);
-	unsigned int iLogLength = log2(iLength);
-
-	bitReverse(iLength, pfRealIn, pfRealOut, iStrideShiftIn, iStrideShiftOut);
-	bitReverse(iLength, pfImaginaryIn, pfImaginaryOut, iStrideShiftIn, iStrideShiftOut);
-
-	float32 ca = -1.0;
-	float32 sa = 0.0;
-	unsigned int l1 = 1, l2 = 1;
-	for(unsigned int l=0; l < iLogLength; ++l)
-	{
-		l1 = l2;
-		l2 *= 2;
-		float32 u1 = 1.0;
-		float32 u2 = 0.0;
-		for(unsigned int j = 0; j < l1; j++)
-		{
-			for(unsigned int i = j; i < iLength; i += l2)
-			{
-				unsigned int i1 = i + l1;
-				float32 t1 = u1 * pfRealOut[i1<<iStrideShiftOut] - u2 * pfImaginaryOut[i1<<iStrideShiftOut];
-				float32 t2 = u1 * pfImaginaryOut[i1<<iStrideShiftOut] + u2 * pfRealOut[i1<<iStrideShiftOut];
-				pfRealOut[i1<<iStrideShiftOut] = pfRealOut[i<<iStrideShiftOut] - t1;
-				pfImaginaryOut[i1<<iStrideShiftOut] = pfImaginaryOut[i<<iStrideShiftOut] - t2;
-				pfRealOut[i<<iStrideShiftOut] += t1;
-				pfImaginaryOut[i<<iStrideShiftOut] += t2;
-			}
-			float32 z =  u1 * ca - u2 * sa;
-			u2 = u1 * sa + u2 * ca;
-			u1 = z;
-		}
-		sa = sqrt((1.0 - ca) / 2.0);
-		if (!inverse) 
-			sa = -sa;
-		ca = sqrt((1.0 + ca) / 2.0);
-	}
-
-	if (inverse) {
-		for (unsigned int i = 0; i < iLength; ++i) {
-			pfRealOut[i<<iStrideShiftOut] /= iLength;
-			pfImaginaryOut[i<<iStrideShiftOut] /= iLength;
-		}
-	}
-}
-
-void fastTwoPowerFourierTransform2D(unsigned int iHeight,
-                                    unsigned int iWidth,
-                                    const float32* pfRealIn,
-                                    const float32* pfImaginaryIn,
-                                    float32* pfRealOut,
-                                    float32* pfImaginaryOut,
-                                    bool inverse)
-{
-	//calculate the fourier transform of the columns
-	for (unsigned int x = 0; x < iWidth; x++)
-	{
-		fastTwoPowerFourierTransform1D(iHeight, pfRealIn+x, pfImaginaryIn+x,
-		                               pfRealOut+x, pfImaginaryOut+x,
-		                               iWidth, iWidth, inverse);
-	}
-
-	//calculate the fourier transform of the rows
-	for (unsigned int y = 0; y < iHeight; y++)
-	{
-		fastTwoPowerFourierTransform1D(iWidth,
-		                               pfRealOut+y*iWidth,
-		                               pfImaginaryOut+y*iWidth,
-		                               pfRealOut+y*iWidth,
-		                               pfImaginaryOut+y*iWidth,
-		                               1, 1, inverse);
-	}
+You may use, copy, modify and distribute this code for any purpose (include commercial use) and without fee.
+
+Source: http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html
+
+Fast Fourier/Cosine/Sine Transform
+    dimension   :one
+    data length :power of 2
+    decimation  :frequency
+    radix       :split-radix
+    data        :inplace
+    table       :use
+functions
+    cdft: Complex Discrete Fourier Transform
+    rdft: Real Discrete Fourier Transform
+    ddct: Discrete Cosine Transform
+    ddst: Discrete Sine Transform
+    dfct: Cosine Transform of RDFT (Real Symmetric DFT)
+    dfst: Sine Transform of RDFT (Real Anti-symmetric DFT)
+function prototypes
+    void cdft(int, int, float32 *, int *, float32 *);
+    void rdft(int, int, float32 *, int *, float32 *);
+    void ddct(int, int, float32 *, int *, float32 *);
+    void ddst(int, int, float32 *, int *, float32 *);
+    void dfct(int, float32 *, float32 *, int *, float32 *);
+    void dfst(int, float32 *, float32 *, int *, float32 *);
+macro definitions
+    USE_CDFT_PTHREADS : default=not defined
+        CDFT_THREADS_BEGIN_N  : must be >= 512, default=8192
+        CDFT_4THREADS_BEGIN_N : must be >= 512, default=65536
+    USE_CDFT_WINTHREADS : default=not defined
+        CDFT_THREADS_BEGIN_N  : must be >= 512, default=32768
+        CDFT_4THREADS_BEGIN_N : must be >= 512, default=524288
+
+
+-------- Complex DFT (Discrete Fourier Transform) --------
+    [definition]
+        <case1>
+            X[k] = sum_j=0^n-1 x[j]*exp(2*pi*i*j*k/n), 0<=k<n
+        <case2>
+            X[k] = sum_j=0^n-1 x[j]*exp(-2*pi*i*j*k/n), 0<=k<n
+        (notes: sum_j=0^n-1 is a summation from j=0 to n-1)
+    [usage]
+        <case1>
+            ip[0] = 0; // first time only
+            cdft(2*n, 1, a, ip, w);
+        <case2>
+            ip[0] = 0; // first time only
+            cdft(2*n, -1, a, ip, w);
+    [parameters]
+        2*n            :data length (int)
+                        n >= 1, n = power of 2
+        a[0...2*n-1]   :input/output data (float32 *)
+                        input data
+                            a[2*j] = Re(x[j]), 
+                            a[2*j+1] = Im(x[j]), 0<=j<n
+                        output data
+                            a[2*k] = Re(X[k]), 
+                            a[2*k+1] = Im(X[k]), 0<=k<n
+        ip[0...*]      :work area for bit reversal (int *)
+                        length of ip >= 2+sqrt(n)
+                        strictly, 
+                        length of ip >= 
+                            2+(1<<(int)(log(n+0.5)/log(2))/2).
+                        ip[0],ip[1] are pointers of the cos/sin table.
+        w[0...n/2-1]   :cos/sin table (float32 *)
+                        w[],ip[] are initialized if ip[0] == 0.
+    [remark]
+        Inverse of 
+            cdft(2*n, -1, a, ip, w);
+        is 
+            cdft(2*n, 1, a, ip, w);
+            for (j = 0; j <= 2 * n - 1; j++) {
+                a[j] *= 1.0 / n;
+            }
+        .
+
+
+-------- Real DFT / Inverse of Real DFT --------
+    [definition]
+        <case1> RDFT
+            R[k] = sum_j=0^n-1 a[j]*cos(2*pi*j*k/n), 0<=k<=n/2
+            I[k] = sum_j=0^n-1 a[j]*sin(2*pi*j*k/n), 0<k<n/2
+        <case2> IRDFT (excluding scale)
+            a[k] = (R[0] + R[n/2]*cos(pi*k))/2 + 
+                   sum_j=1^n/2-1 R[j]*cos(2*pi*j*k/n) + 
+                   sum_j=1^n/2-1 I[j]*sin(2*pi*j*k/n), 0<=k<n
+    [usage]
+        <case1>
+            ip[0] = 0; // first time only
+            rdft(n, 1, a, ip, w);
+        <case2>
+            ip[0] = 0; // first time only
+            rdft(n, -1, a, ip, w);
+    [parameters]
+        n              :data length (int)
+                        n >= 2, n = power of 2
+        a[0...n-1]     :input/output data (float32 *)
+                        <case1>
+                            output data
+                                a[2*k] = R[k], 0<=k<n/2
+                                a[2*k+1] = I[k], 0<k<n/2
+                                a[1] = R[n/2]
+                        <case2>
+                            input data
+                                a[2*j] = R[j], 0<=j<n/2
+                                a[2*j+1] = I[j], 0<j<n/2
+                                a[1] = R[n/2]
+        ip[0...*]      :work area for bit reversal (int *)
+                        length of ip >= 2+sqrt(n/2)
+                        strictly, 
+                        length of ip >= 
+                            2+(1<<(int)(log(n/2+0.5)/log(2))/2).
+                        ip[0],ip[1] are pointers of the cos/sin table.
+        w[0...n/2-1]   :cos/sin table (float32 *)
+                        w[],ip[] are initialized if ip[0] == 0.
+    [remark]
+        Inverse of 
+            rdft(n, 1, a, ip, w);
+        is 
+            rdft(n, -1, a, ip, w);
+            for (j = 0; j <= n - 1; j++) {
+                a[j] *= 2.0 / n;
+            }
+        .
+
+
+-------- DCT (Discrete Cosine Transform) / Inverse of DCT --------
+    [definition]
+        <case1> IDCT (excluding scale)
+            C[k] = sum_j=0^n-1 a[j]*cos(pi*j*(k+1/2)/n), 0<=k<n
+        <case2> DCT
+            C[k] = sum_j=0^n-1 a[j]*cos(pi*(j+1/2)*k/n), 0<=k<n
+    [usage]
+        <case1>
+            ip[0] = 0; // first time only
+            ddct(n, 1, a, ip, w);
+        <case2>
+            ip[0] = 0; // first time only
+            ddct(n, -1, a, ip, w);
+    [parameters]
+        n              :data length (int)
+                        n >= 2, n = power of 2
+        a[0...n-1]     :input/output data (float32 *)
+                        output data
+                            a[k] = C[k], 0<=k<n
+        ip[0...*]      :work area for bit reversal (int *)
+                        length of ip >= 2+sqrt(n/2)
+                        strictly, 
+                        length of ip >= 
+                            2+(1<<(int)(log(n/2+0.5)/log(2))/2).
+                        ip[0],ip[1] are pointers of the cos/sin table.
+        w[0...n*5/4-1] :cos/sin table (float32 *)
+                        w[],ip[] are initialized if ip[0] == 0.
+    [remark]
+        Inverse of 
+            ddct(n, -1, a, ip, w);
+        is 
+            a[0] *= 0.5;
+            ddct(n, 1, a, ip, w);
+            for (j = 0; j <= n - 1; j++) {
+                a[j] *= 2.0 / n;
+            }
+        .
+
+
+-------- DST (Discrete Sine Transform) / Inverse of DST --------
+    [definition]
+        <case1> IDST (excluding scale)
+            S[k] = sum_j=1^n A[j]*sin(pi*j*(k+1/2)/n), 0<=k<n
+        <case2> DST
+            S[k] = sum_j=0^n-1 a[j]*sin(pi*(j+1/2)*k/n), 0<k<=n
+    [usage]
+        <case1>
+            ip[0] = 0; // first time only
+            ddst(n, 1, a, ip, w);
+        <case2>
+            ip[0] = 0; // first time only
+            ddst(n, -1, a, ip, w);
+    [parameters]
+        n              :data length (int)
+                        n >= 2, n = power of 2
+        a[0...n-1]     :input/output data (float32 *)
+                        <case1>
+                            input data
+                                a[j] = A[j], 0<j<n
+                                a[0] = A[n]
+                            output data
+                                a[k] = S[k], 0<=k<n
+                        <case2>
+                            output data
+                                a[k] = S[k], 0<k<n
+                                a[0] = S[n]
+        ip[0...*]      :work area for bit reversal (int *)
+                        length of ip >= 2+sqrt(n/2)
+                        strictly, 
+                        length of ip >= 
+                            2+(1<<(int)(log(n/2+0.5)/log(2))/2).
+                        ip[0],ip[1] are pointers of the cos/sin table.
+        w[0...n*5/4-1] :cos/sin table (float32 *)
+                        w[],ip[] are initialized if ip[0] == 0.
+    [remark]
+        Inverse of 
+            ddst(n, -1, a, ip, w);
+        is 
+            a[0] *= 0.5;
+            ddst(n, 1, a, ip, w);
+            for (j = 0; j <= n - 1; j++) {
+                a[j] *= 2.0 / n;
+            }
+        .
+
+
+-------- Cosine Transform of RDFT (Real Symmetric DFT) --------
+    [definition]
+        C[k] = sum_j=0^n a[j]*cos(pi*j*k/n), 0<=k<=n
+    [usage]
+        ip[0] = 0; // first time only
+        dfct(n, a, t, ip, w);
+    [parameters]
+        n              :data length - 1 (int)
+                        n >= 2, n = power of 2
+        a[0...n]       :input/output data (float32 *)
+                        output data
+                            a[k] = C[k], 0<=k<=n
+        t[0...n/2]     :work area (float32 *)
+        ip[0...*]      :work area for bit reversal (int *)
+                        length of ip >= 2+sqrt(n/4)
+                        strictly, 
+                        length of ip >= 
+                            2+(1<<(int)(log(n/4+0.5)/log(2))/2).
+                        ip[0],ip[1] are pointers of the cos/sin table.
+        w[0...n*5/8-1] :cos/sin table (float32 *)
+                        w[],ip[] are initialized if ip[0] == 0.
+    [remark]
+        Inverse of 
+            a[0] *= 0.5;
+            a[n] *= 0.5;
+            dfct(n, a, t, ip, w);
+        is 
+            a[0] *= 0.5;
+            a[n] *= 0.5;
+            dfct(n, a, t, ip, w);
+            for (j = 0; j <= n; j++) {
+                a[j] *= 2.0 / n;
+            }
+        .
+
+
+-------- Sine Transform of RDFT (Real Anti-symmetric DFT) --------
+    [definition]
+        S[k] = sum_j=1^n-1 a[j]*sin(pi*j*k/n), 0<k<n
+    [usage]
+        ip[0] = 0; // first time only
+        dfst(n, a, t, ip, w);
+    [parameters]
+        n              :data length + 1 (int)
+                        n >= 2, n = power of 2
+        a[0...n-1]     :input/output data (float32 *)
+                        output data
+                            a[k] = S[k], 0<k<n
+                        (a[0] is used for work area)
+        t[0...n/2-1]   :work area (float32 *)
+        ip[0...*]      :work area for bit reversal (int *)
+                        length of ip >= 2+sqrt(n/4)
+                        strictly, 
+                        length of ip >= 
+                            2+(1<<(int)(log(n/4+0.5)/log(2))/2).
+                        ip[0],ip[1] are pointers of the cos/sin table.
+        w[0...n*5/8-1] :cos/sin table (float32 *)
+                        w[],ip[] are initialized if ip[0] == 0.
+    [remark]
+        Inverse of 
+            dfst(n, a, t, ip, w);
+        is 
+            dfst(n, a, t, ip, w);
+            for (j = 1; j <= n - 1; j++) {
+                a[j] *= 2.0 / n;
+            }
+        .
+
+
+Appendix :
+    The cos/sin table is recalculated when the larger table required.
+    w[] and ip[] are compatible with all routines.
+*/
+
+
+void cdft(int n, int isgn, float32 *a, int *ip, float32 *w)
+{
+    void makewt(int nw, int *ip, float32 *w);
+    void cftfsub(int n, float32 *a, int *ip, int nw, float32 *w);
+    void cftbsub(int n, float32 *a, int *ip, int nw, float32 *w);
+    int nw;
+    
+    nw = ip[0];
+    if (n > (nw << 2)) {
+        nw = n >> 2;
+        makewt(nw, ip, w);
+    }
+    if (isgn >= 0) {
+        cftfsub(n, a, ip, nw, w);
+    } else {
+        cftbsub(n, a, ip, nw, w);
+    }
+}
+
+
+void rdft(int n, int isgn, float32 *a, int *ip, float32 *w)
+{
+    void makewt(int nw, int *ip, float32 *w);
+    void makect(int nc, int *ip, float32 *c);
+    void cftfsub(int n, float32 *a, int *ip, int nw, float32 *w);
+    void cftbsub(int n, float32 *a, int *ip, int nw, float32 *w);
+    void rftfsub(int n, float32 *a, int nc, float32 *c);
+    void rftbsub(int n, float32 *a, int nc, float32 *c);
+    int nw, nc;
+    float32 xi;
+    
+    nw = ip[0];
+    if (n > (nw << 2)) {
+        nw = n >> 2;
+        makewt(nw, ip, w);
+    }
+    nc = ip[1];
+    if (n > (nc << 2)) {
+        nc = n >> 2;
+        makect(nc, ip, w + nw);
+    }
+    if (isgn >= 0) {
+        if (n > 4) {
+            cftfsub(n, a, ip, nw, w);
+            rftfsub(n, a, nc, w + nw);
+        } else if (n == 4) {
+            cftfsub(n, a, ip, nw, w);
+        }
+        xi = a[0] - a[1];
+        a[0] += a[1];
+        a[1] = xi;
+    } else {
+        a[1] = 0.5 * (a[0] - a[1]);
+        a[0] -= a[1];
+        if (n > 4) {
+            rftbsub(n, a, nc, w + nw);
+            cftbsub(n, a, ip, nw, w);
+        } else if (n == 4) {
+            cftbsub(n, a, ip, nw, w);
+        }
+    }
+}
+
+
+void ddct(int n, int isgn, float32 *a, int *ip, float32 *w)
+{
+    void makewt(int nw, int *ip, float32 *w);
+    void makect(int nc, int *ip, float32 *c);
+    void cftfsub(int n, float32 *a, int *ip, int nw, float32 *w);
+    void cftbsub(int n, float32 *a, int *ip, int nw, float32 *w);
+    void rftfsub(int n, float32 *a, int nc, float32 *c);
+    void rftbsub(int n, float32 *a, int nc, float32 *c);
+    void dctsub(int n, float32 *a, int nc, float32 *c);
+    int j, nw, nc;
+    float32 xr;
+    
+    nw = ip[0];
+    if (n > (nw << 2)) {
+        nw = n >> 2;
+        makewt(nw, ip, w);
+    }
+    nc = ip[1];
+    if (n > nc) {
+        nc = n;
+        makect(nc, ip, w + nw);
+    }
+    if (isgn < 0) {
+        xr = a[n - 1];
+        for (j = n - 2; j >= 2; j -= 2) {
+            a[j + 1] = a[j] - a[j - 1];
+            a[j] += a[j - 1];
+        }
+        a[1] = a[0] - xr;
+        a[0] += xr;
+        if (n > 4) {
+            rftbsub(n, a, nc, w + nw);
+            cftbsub(n, a, ip, nw, w);
+        } else if (n == 4) {
+            cftbsub(n, a, ip, nw, w);
+        }
+    }
+    dctsub(n, a, nc, w + nw);
+    if (isgn >= 0) {
+        if (n > 4) {
+            cftfsub(n, a, ip, nw, w);
+            rftfsub(n, a, nc, w + nw);
+        } else if (n == 4) {
+            cftfsub(n, a, ip, nw, w);
+        }
+        xr = a[0] - a[1];
+        a[0] += a[1];
+        for (j = 2; j < n; j += 2) {
+            a[j - 1] = a[j] - a[j + 1];
+            a[j] += a[j + 1];
+        }
+        a[n - 1] = xr;
+    }
+}
+
+
+void ddst(int n, int isgn, float32 *a, int *ip, float32 *w)
+{
+    void makewt(int nw, int *ip, float32 *w);
+    void makect(int nc, int *ip, float32 *c);
+    void cftfsub(int n, float32 *a, int *ip, int nw, float32 *w);
+    void cftbsub(int n, float32 *a, int *ip, int nw, float32 *w);
+    void rftfsub(int n, float32 *a, int nc, float32 *c);
+    void rftbsub(int n, float32 *a, int nc, float32 *c);
+    void dstsub(int n, float32 *a, int nc, float32 *c);
+    int j, nw, nc;
+    float32 xr;
+    
+    nw = ip[0];
+    if (n > (nw << 2)) {
+        nw = n >> 2;
+        makewt(nw, ip, w);
+    }
+    nc = ip[1];
+    if (n > nc) {
+        nc = n;
+        makect(nc, ip, w + nw);
+    }
+    if (isgn < 0) {
+        xr = a[n - 1];
+        for (j = n - 2; j >= 2; j -= 2) {
+            a[j + 1] = -a[j] - a[j - 1];
+            a[j] -= a[j - 1];
+        }
+        a[1] = a[0] + xr;
+        a[0] -= xr;
+        if (n > 4) {
+            rftbsub(n, a, nc, w + nw);
+            cftbsub(n, a, ip, nw, w);
+        } else if (n == 4) {
+            cftbsub(n, a, ip, nw, w);
+        }
+    }
+    dstsub(n, a, nc, w + nw);
+    if (isgn >= 0) {
+        if (n > 4) {
+            cftfsub(n, a, ip, nw, w);
+            rftfsub(n, a, nc, w + nw);
+        } else if (n == 4) {
+            cftfsub(n, a, ip, nw, w);
+        }
+        xr = a[0] - a[1];
+        a[0] += a[1];
+        for (j = 2; j < n; j += 2) {
+            a[j - 1] = -a[j] - a[j + 1];
+            a[j] -= a[j + 1];
+        }
+        a[n - 1] = -xr;
+    }
+}
+
+
+void dfct(int n, float32 *a, float32 *t, int *ip, float32 *w)
+{
+    void makewt(int nw, int *ip, float32 *w);
+    void makect(int nc, int *ip, float32 *c);
+    void cftfsub(int n, float32 *a, int *ip, int nw, float32 *w);
+    void rftfsub(int n, float32 *a, int nc, float32 *c);
+    void dctsub(int n, float32 *a, int nc, float32 *c);
+    int j, k, l, m, mh, nw, nc;
+    float32 xr, xi, yr, yi;
+    
+    nw = ip[0];
+    if (n > (nw << 3)) {
+        nw = n >> 3;
+        makewt(nw, ip, w);
+    }
+    nc = ip[1];
+    if (n > (nc << 1)) {
+        nc = n >> 1;
+        makect(nc, ip, w + nw);
+    }
+    m = n >> 1;
+    yi = a[m];
+    xi = a[0] + a[n];
+    a[0] -= a[n];
+    t[0] = xi - yi;
+    t[m] = xi + yi;
+    if (n > 2) {
+        mh = m >> 1;
+        for (j = 1; j < mh; j++) {
+            k = m - j;
+            xr = a[j] - a[n - j];
+            xi = a[j] + a[n - j];
+            yr = a[k] - a[n - k];
+            yi = a[k] + a[n - k];
+            a[j] = xr;
+            a[k] = yr;
+            t[j] = xi - yi;
+            t[k] = xi + yi;
+        }
+        t[mh] = a[mh] + a[n - mh];
+        a[mh] -= a[n - mh];
+        dctsub(m, a, nc, w + nw);
+        if (m > 4) {
+            cftfsub(m, a, ip, nw, w);
+            rftfsub(m, a, nc, w + nw);
+        } else if (m == 4) {
+            cftfsub(m, a, ip, nw, w);
+        }
+        a[n - 1] = a[0] - a[1];
+        a[1] = a[0] + a[1];
+        for (j = m - 2; j >= 2; j -= 2) {
+            a[2 * j + 1] = a[j] + a[j + 1];
+            a[2 * j - 1] = a[j] - a[j + 1];
+        }
+        l = 2;
+        m = mh;
+        while (m >= 2) {
+            dctsub(m, t, nc, w + nw);
+            if (m > 4) {
+                cftfsub(m, t, ip, nw, w);
+                rftfsub(m, t, nc, w + nw);
+            } else if (m == 4) {
+                cftfsub(m, t, ip, nw, w);
+            }
+            a[n - l] = t[0] - t[1];
+            a[l] = t[0] + t[1];
+            k = 0;
+            for (j = 2; j < m; j += 2) {
+                k += l << 2;
+                a[k - l] = t[j] - t[j + 1];
+                a[k + l] = t[j] + t[j + 1];
+            }
+            l <<= 1;
+            mh = m >> 1;
+            for (j = 0; j < mh; j++) {
+                k = m - j;
+                t[j] = t[m + k] - t[m + j];
+                t[k] = t[m + k] + t[m + j];
+            }
+            t[mh] = t[m + mh];
+            m = mh;
+        }
+        a[l] = t[0];
+        a[n] = t[2] - t[1];
+        a[0] = t[2] + t[1];
+    } else {
+        a[1] = a[0];
+        a[2] = t[0];
+        a[0] = t[1];
+    }
+}
+
+
+void dfst(int n, float32 *a, float32 *t, int *ip, float32 *w)
+{
+    void makewt(int nw, int *ip, float32 *w);
+    void makect(int nc, int *ip, float32 *c);
+    void cftfsub(int n, float32 *a, int *ip, int nw, float32 *w);
+    void rftfsub(int n, float32 *a, int nc, float32 *c);
+    void dstsub(int n, float32 *a, int nc, float32 *c);
+    int j, k, l, m, mh, nw, nc;
+    float32 xr, xi, yr, yi;
+    
+    nw = ip[0];
+    if (n > (nw << 3)) {
+        nw = n >> 3;
+        makewt(nw, ip, w);
+    }
+    nc = ip[1];
+    if (n > (nc << 1)) {
+        nc = n >> 1;
+        makect(nc, ip, w + nw);
+    }
+    if (n > 2) {
+        m = n >> 1;
+        mh = m >> 1;
+        for (j = 1; j < mh; j++) {
+            k = m - j;
+            xr = a[j] + a[n - j];
+            xi = a[j] - a[n - j];
+            yr = a[k] + a[n - k];
+            yi = a[k] - a[n - k];
+            a[j] = xr;
+            a[k] = yr;
+            t[j] = xi + yi;
+            t[k] = xi - yi;
+        }
+        t[0] = a[mh] - a[n - mh];
+        a[mh] += a[n - mh];
+        a[0] = a[m];
+        dstsub(m, a, nc, w + nw);
+        if (m > 4) {
+            cftfsub(m, a, ip, nw, w);
+            rftfsub(m, a, nc, w + nw);
+        } else if (m == 4) {
+            cftfsub(m, a, ip, nw, w);
+        }
+        a[n - 1] = a[1] - a[0];
+        a[1] = a[0] + a[1];
+        for (j = m - 2; j >= 2; j -= 2) {
+            a[2 * j + 1] = a[j] - a[j + 1];
+            a[2 * j - 1] = -a[j] - a[j + 1];
+        }
+        l = 2;
+        m = mh;
+        while (m >= 2) {
+            dstsub(m, t, nc, w + nw);
+            if (m > 4) {
+                cftfsub(m, t, ip, nw, w);
+                rftfsub(m, t, nc, w + nw);
+            } else if (m == 4) {
+                cftfsub(m, t, ip, nw, w);
+            }
+            a[n - l] = t[1] - t[0];
+            a[l] = t[0] + t[1];
+            k = 0;
+            for (j = 2; j < m; j += 2) {
+                k += l << 2;
+                a[k - l] = -t[j] - t[j + 1];
+                a[k + l] = t[j] - t[j + 1];
+            }
+            l <<= 1;
+            mh = m >> 1;
+            for (j = 1; j < mh; j++) {
+                k = m - j;
+                t[j] = t[m + k] + t[m + j];
+                t[k] = t[m + k] - t[m + j];
+            }
+            t[0] = t[m + mh];
+            m = mh;
+        }
+        a[l] = t[0];
+    }
+    a[0] = 0;
+}
+
+
+/* -------- initializing routines -------- */
+
+
+#include <math.h>
+
+void makewt(int nw, int *ip, float32 *w)
+{
+    void makeipt(int nw, int *ip);
+    int j, nwh, nw0, nw1;
+    float32 delta, wn4r, wk1r, wk1i, wk3r, wk3i;
+    
+    ip[0] = nw;
+    ip[1] = 1;
+    if (nw > 2) {
+        nwh = nw >> 1;
+        delta = atan(1.0) / nwh;
+        wn4r = cos(delta * nwh);
+        w[0] = 1;
+        w[1] = wn4r;
+        if (nwh == 4) {
+            w[2] = cos(delta * 2);
+            w[3] = sin(delta * 2);
+        } else if (nwh > 4) {
+            makeipt(nw, ip);
+            w[2] = 0.5 / cos(delta * 2);
+            w[3] = 0.5 / cos(delta * 6);
+            for (j = 4; j < nwh; j += 4) {
+                w[j] = cos(delta * j);
+                w[j + 1] = sin(delta * j);
+                w[j + 2] = cos(3 * delta * j);
+                w[j + 3] = -sin(3 * delta * j);
+            }
+        }
+        nw0 = 0;
+        while (nwh > 2) {
+            nw1 = nw0 + nwh;
+            nwh >>= 1;
+            w[nw1] = 1;
+            w[nw1 + 1] = wn4r;
+            if (nwh == 4) {
+                wk1r = w[nw0 + 4];
+                wk1i = w[nw0 + 5];
+                w[nw1 + 2] = wk1r;
+                w[nw1 + 3] = wk1i;
+            } else if (nwh > 4) {
+                wk1r = w[nw0 + 4];
+                wk3r = w[nw0 + 6];
+                w[nw1 + 2] = 0.5 / wk1r;
+                w[nw1 + 3] = 0.5 / wk3r;
+                for (j = 4; j < nwh; j += 4) {
+                    wk1r = w[nw0 + 2 * j];
+                    wk1i = w[nw0 + 2 * j + 1];
+                    wk3r = w[nw0 + 2 * j + 2];
+                    wk3i = w[nw0 + 2 * j + 3];
+                    w[nw1 + j] = wk1r;
+                    w[nw1 + j + 1] = wk1i;
+                    w[nw1 + j + 2] = wk3r;
+                    w[nw1 + j + 3] = wk3i;
+                }
+            }
+            nw0 = nw1;
+        }
+    }
+}
+
+
+void makeipt(int nw, int *ip)
+{
+    int j, l, m, m2, p, q;
+    
+    ip[2] = 0;
+    ip[3] = 16;
+    m = 2;
+    for (l = nw; l > 32; l >>= 2) {
+        m2 = m << 1;
+        q = m2 << 3;
+        for (j = m; j < m2; j++) {
+            p = ip[j] << 2;
+            ip[m + j] = p;
+            ip[m2 + j] = p + q;
+        }
+        m = m2;
+    }
+}
+
+
+void makect(int nc, int *ip, float32 *c)
+{
+    int j, nch;
+    float32 delta;
+    
+    ip[1] = nc;
+    if (nc > 1) {
+        nch = nc >> 1;
+        delta = atan(1.0) / nch;
+        c[0] = cos(delta * nch);
+        c[nch] = 0.5 * c[0];
+        for (j = 1; j < nch; j++) {
+            c[j] = 0.5 * cos(delta * j);
+            c[nc - j] = 0.5 * sin(delta * j);
+        }
+    }
+}
+
+
+/* -------- child routines -------- */
+
+
+#ifdef USE_CDFT_PTHREADS
+#define USE_CDFT_THREADS
+#ifndef CDFT_THREADS_BEGIN_N
+#define CDFT_THREADS_BEGIN_N 8192
+#endif
+#ifndef CDFT_4THREADS_BEGIN_N
+#define CDFT_4THREADS_BEGIN_N 65536
+#endif
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#define cdft_thread_t pthread_t
+#define cdft_thread_create(thp,func,argp) { \
+    if (pthread_create(thp, NULL, func, (void *) argp) != 0) { \
+        fprintf(stderr, "cdft thread error\n"); \
+        exit(1); \
+    } \
+}
+#define cdft_thread_wait(th) { \
+    if (pthread_join(th, NULL) != 0) { \
+        fprintf(stderr, "cdft thread error\n"); \
+        exit(1); \
+    } \
+}
+#endif /* USE_CDFT_PTHREADS */
+
+
+#ifdef USE_CDFT_WINTHREADS
+#define USE_CDFT_THREADS
+#ifndef CDFT_THREADS_BEGIN_N
+#define CDFT_THREADS_BEGIN_N 32768
+#endif
+#ifndef CDFT_4THREADS_BEGIN_N
+#define CDFT_4THREADS_BEGIN_N 524288
+#endif
+#include <windows.h>
+#include <stdio.h>
+#include <stdlib.h>
+#define cdft_thread_t HANDLE
+#define cdft_thread_create(thp,func,argp) { \
+    DWORD thid; \
+    *(thp) = CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE) func, (LPVOID) argp, 0, &thid); \
+    if (*(thp) == 0) { \
+        fprintf(stderr, "cdft thread error\n"); \
+        exit(1); \
+    } \
+}
+#define cdft_thread_wait(th) { \
+    WaitForSingleObject(th, INFINITE); \
+    CloseHandle(th); \
+}
+#endif /* USE_CDFT_WINTHREADS */
+
+
+void cftfsub(int n, float32 *a, int *ip, int nw, float32 *w)
+{
+    void bitrv2(int n, int *ip, float32 *a);
+    void bitrv216(float32 *a);
+    void bitrv208(float32 *a);
+    void cftf1st(int n, float32 *a, float32 *w);
+    void cftrec4(int n, float32 *a, int nw, float32 *w);
+    void cftleaf(int n, int isplt, float32 *a, int nw, float32 *w);
+    void cftfx41(int n, float32 *a, int nw, float32 *w);
+    void cftf161(float32 *a, float32 *w);
+    void cftf081(float32 *a, float32 *w);
+    void cftf040(float32 *a);
+    void cftx020(float32 *a);
+#ifdef USE_CDFT_THREADS
+    void cftrec4_th(int n, float32 *a, int nw, float32 *w);
+#endif /* USE_CDFT_THREADS */
+    
+    if (n > 8) {
+        if (n > 32) {
+            cftf1st(n, a, &w[nw - (n >> 2)]);
+#ifdef USE_CDFT_THREADS
+            if (n > CDFT_THREADS_BEGIN_N) {
+                cftrec4_th(n, a, nw, w);
+            } else 
+#endif /* USE_CDFT_THREADS */
+            if (n > 512) {
+                cftrec4(n, a, nw, w);
+            } else if (n > 128) {
+                cftleaf(n, 1, a, nw, w);
+            } else {
+                cftfx41(n, a, nw, w);
+            }
+            bitrv2(n, ip, a);
+        } else if (n == 32) {
+            cftf161(a, &w[nw - 8]);
+            bitrv216(a);
+        } else {
+            cftf081(a, w);
+            bitrv208(a);
+        }
+    } else if (n == 8) {
+        cftf040(a);
+    } else if (n == 4) {
+        cftx020(a);
+    }
+}
+
+
+void cftbsub(int n, float32 *a, int *ip, int nw, float32 *w)
+{
+    void bitrv2conj(int n, int *ip, float32 *a);
+    void bitrv216neg(float32 *a);
+    void bitrv208neg(float32 *a);
+    void cftb1st(int n, float32 *a, float32 *w);
+    void cftrec4(int n, float32 *a, int nw, float32 *w);
+    void cftleaf(int n, int isplt, float32 *a, int nw, float32 *w);
+    void cftfx41(int n, float32 *a, int nw, float32 *w);
+    void cftf161(float32 *a, float32 *w);
+    void cftf081(float32 *a, float32 *w);
+    void cftb040(float32 *a);
+    void cftx020(float32 *a);
+#ifdef USE_CDFT_THREADS
+    void cftrec4_th(int n, float32 *a, int nw, float32 *w);
+#endif /* USE_CDFT_THREADS */
+    
+    if (n > 8) {
+        if (n > 32) {
+            cftb1st(n, a, &w[nw - (n >> 2)]);
+#ifdef USE_CDFT_THREADS
+            if (n > CDFT_THREADS_BEGIN_N) {
+                cftrec4_th(n, a, nw, w);
+            } else 
+#endif /* USE_CDFT_THREADS */
+            if (n > 512) {
+                cftrec4(n, a, nw, w);
+            } else if (n > 128) {
+                cftleaf(n, 1, a, nw, w);
+            } else {
+                cftfx41(n, a, nw, w);
+            }
+            bitrv2conj(n, ip, a);
+        } else if (n == 32) {
+            cftf161(a, &w[nw - 8]);
+            bitrv216neg(a);
+        } else {
+            cftf081(a, w);
+            bitrv208neg(a);
+        }
+    } else if (n == 8) {
+        cftb040(a);
+    } else if (n == 4) {
+        cftx020(a);
+    }
+}
+
+
+void bitrv2(int n, int *ip, float32 *a)
+{
+    int j, j1, k, k1, l, m, nh, nm;
+    float32 xr, xi, yr, yi;
+    
+    m = 1;
+    for (l = n >> 2; l > 8; l >>= 2) {
+        m <<= 1;
+    }
+    nh = n >> 1;
+    nm = 4 * m;
+    if (l == 8) {
+        for (k = 0; k < m; k++) {
+            for (j = 0; j < k; j++) {
+                j1 = 4 * j + 2 * ip[m + k];
+                k1 = 4 * k + 2 * ip[m + j];
+                xr = a[j1];
+                xi = a[j1 + 1];
+                yr = a[k1];
+                yi = a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 += nm;
+                k1 += 2 * nm;
+                xr = a[j1];
+                xi = a[j1 + 1];
+                yr = a[k1];
+                yi = a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 += nm;
+                k1 -= nm;
+                xr = a[j1];
+                xi = a[j1 + 1];
+                yr = a[k1];
+                yi = a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 += nm;
+                k1 += 2 * nm;
+                xr = a[j1];
+                xi = a[j1 + 1];
+                yr = a[k1];
+                yi = a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 += nh;
+                k1 += 2;
+                xr = a[j1];
+                xi = a[j1 + 1];
+                yr = a[k1];
+                yi = a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 -= nm;
+                k1 -= 2 * nm;
+                xr = a[j1];
+                xi = a[j1 + 1];
+                yr = a[k1];
+                yi = a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 -= nm;
+                k1 += nm;
+                xr = a[j1];
+                xi = a[j1 + 1];
+                yr = a[k1];
+                yi = a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 -= nm;
+                k1 -= 2 * nm;
+                xr = a[j1];
+                xi = a[j1 + 1];
+                yr = a[k1];
+                yi = a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 += 2;
+                k1 += nh;
+                xr = a[j1];
+                xi = a[j1 + 1];
+                yr = a[k1];
+                yi = a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 += nm;
+                k1 += 2 * nm;
+                xr = a[j1];
+                xi = a[j1 + 1];
+                yr = a[k1];
+                yi = a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 += nm;
+                k1 -= nm;
+                xr = a[j1];
+                xi = a[j1 + 1];
+                yr = a[k1];
+                yi = a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 += nm;
+                k1 += 2 * nm;
+                xr = a[j1];
+                xi = a[j1 + 1];
+                yr = a[k1];
+                yi = a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 -= nh;
+                k1 -= 2;
+                xr = a[j1];
+                xi = a[j1 + 1];
+                yr = a[k1];
+                yi = a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 -= nm;
+                k1 -= 2 * nm;
+                xr = a[j1];
+                xi = a[j1 + 1];
+                yr = a[k1];
+                yi = a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 -= nm;
+                k1 += nm;
+                xr = a[j1];
+                xi = a[j1 + 1];
+                yr = a[k1];
+                yi = a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 -= nm;
+                k1 -= 2 * nm;
+                xr = a[j1];
+                xi = a[j1 + 1];
+                yr = a[k1];
+                yi = a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+            }
+            k1 = 4 * k + 2 * ip[m + k];
+            j1 = k1 + 2;
+            k1 += nh;
+            xr = a[j1];
+            xi = a[j1 + 1];
+            yr = a[k1];
+            yi = a[k1 + 1];
+            a[j1] = yr;
+            a[j1 + 1] = yi;
+            a[k1] = xr;
+            a[k1 + 1] = xi;
+            j1 += nm;
+            k1 += 2 * nm;
+            xr = a[j1];
+            xi = a[j1 + 1];
+            yr = a[k1];
+            yi = a[k1 + 1];
+            a[j1] = yr;
+            a[j1 + 1] = yi;
+            a[k1] = xr;
+            a[k1 + 1] = xi;
+            j1 += nm;
+            k1 -= nm;
+            xr = a[j1];
+            xi = a[j1 + 1];
+            yr = a[k1];
+            yi = a[k1 + 1];
+            a[j1] = yr;
+            a[j1 + 1] = yi;
+            a[k1] = xr;
+            a[k1 + 1] = xi;
+            j1 -= 2;
+            k1 -= nh;
+            xr = a[j1];
+            xi = a[j1 + 1];
+            yr = a[k1];
+            yi = a[k1 + 1];
+            a[j1] = yr;
+            a[j1 + 1] = yi;
+            a[k1] = xr;
+            a[k1 + 1] = xi;
+            j1 += nh + 2;
+            k1 += nh + 2;
+            xr = a[j1];
+            xi = a[j1 + 1];
+            yr = a[k1];
+            yi = a[k1 + 1];
+            a[j1] = yr;
+            a[j1 + 1] = yi;
+            a[k1] = xr;
+            a[k1 + 1] = xi;
+            j1 -= nh - nm;
+            k1 += 2 * nm - 2;
+            xr = a[j1];
+            xi = a[j1 + 1];
+            yr = a[k1];
+            yi = a[k1 + 1];
+            a[j1] = yr;
+            a[j1 + 1] = yi;
+            a[k1] = xr;
+            a[k1 + 1] = xi;
+        }
+    } else {
+        for (k = 0; k < m; k++) {
+            for (j = 0; j < k; j++) {
+                j1 = 4 * j + ip[m + k];
+                k1 = 4 * k + ip[m + j];
+                xr = a[j1];
+                xi = a[j1 + 1];
+                yr = a[k1];
+                yi = a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 += nm;
+                k1 += nm;
+                xr = a[j1];
+                xi = a[j1 + 1];
+                yr = a[k1];
+                yi = a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 += nh;
+                k1 += 2;
+                xr = a[j1];
+                xi = a[j1 + 1];
+                yr = a[k1];
+                yi = a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 -= nm;
+                k1 -= nm;
+                xr = a[j1];
+                xi = a[j1 + 1];
+                yr = a[k1];
+                yi = a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 += 2;
+                k1 += nh;
+                xr = a[j1];
+                xi = a[j1 + 1];
+                yr = a[k1];
+                yi = a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 += nm;
+                k1 += nm;
+                xr = a[j1];
+                xi = a[j1 + 1];
+                yr = a[k1];
+                yi = a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 -= nh;
+                k1 -= 2;
+                xr = a[j1];
+                xi = a[j1 + 1];
+                yr = a[k1];
+                yi = a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 -= nm;
+                k1 -= nm;
+                xr = a[j1];
+                xi = a[j1 + 1];
+                yr = a[k1];
+                yi = a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+            }
+            k1 = 4 * k + ip[m + k];
+            j1 = k1 + 2;
+            k1 += nh;
+            xr = a[j1];
+            xi = a[j1 + 1];
+            yr = a[k1];
+            yi = a[k1 + 1];
+            a[j1] = yr;
+            a[j1 + 1] = yi;
+            a[k1] = xr;
+            a[k1 + 1] = xi;
+            j1 += nm;
+            k1 += nm;
+            xr = a[j1];
+            xi = a[j1 + 1];
+            yr = a[k1];
+            yi = a[k1 + 1];
+            a[j1] = yr;
+            a[j1 + 1] = yi;
+            a[k1] = xr;
+            a[k1 + 1] = xi;
+        }
+    }
+}
+
+
+void bitrv2conj(int n, int *ip, float32 *a)
+{
+    int j, j1, k, k1, l, m, nh, nm;
+    float32 xr, xi, yr, yi;
+    
+    m = 1;
+    for (l = n >> 2; l > 8; l >>= 2) {
+        m <<= 1;
+    }
+    nh = n >> 1;
+    nm = 4 * m;
+    if (l == 8) {
+        for (k = 0; k < m; k++) {
+            for (j = 0; j < k; j++) {
+                j1 = 4 * j + 2 * ip[m + k];
+                k1 = 4 * k + 2 * ip[m + j];
+                xr = a[j1];
+                xi = -a[j1 + 1];
+                yr = a[k1];
+                yi = -a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 += nm;
+                k1 += 2 * nm;
+                xr = a[j1];
+                xi = -a[j1 + 1];
+                yr = a[k1];
+                yi = -a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 += nm;
+                k1 -= nm;
+                xr = a[j1];
+                xi = -a[j1 + 1];
+                yr = a[k1];
+                yi = -a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 += nm;
+                k1 += 2 * nm;
+                xr = a[j1];
+                xi = -a[j1 + 1];
+                yr = a[k1];
+                yi = -a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 += nh;
+                k1 += 2;
+                xr = a[j1];
+                xi = -a[j1 + 1];
+                yr = a[k1];
+                yi = -a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 -= nm;
+                k1 -= 2 * nm;
+                xr = a[j1];
+                xi = -a[j1 + 1];
+                yr = a[k1];
+                yi = -a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 -= nm;
+                k1 += nm;
+                xr = a[j1];
+                xi = -a[j1 + 1];
+                yr = a[k1];
+                yi = -a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 -= nm;
+                k1 -= 2 * nm;
+                xr = a[j1];
+                xi = -a[j1 + 1];
+                yr = a[k1];
+                yi = -a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 += 2;
+                k1 += nh;
+                xr = a[j1];
+                xi = -a[j1 + 1];
+                yr = a[k1];
+                yi = -a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 += nm;
+                k1 += 2 * nm;
+                xr = a[j1];
+                xi = -a[j1 + 1];
+                yr = a[k1];
+                yi = -a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 += nm;
+                k1 -= nm;
+                xr = a[j1];
+                xi = -a[j1 + 1];
+                yr = a[k1];
+                yi = -a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 += nm;
+                k1 += 2 * nm;
+                xr = a[j1];
+                xi = -a[j1 + 1];
+                yr = a[k1];
+                yi = -a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 -= nh;
+                k1 -= 2;
+                xr = a[j1];
+                xi = -a[j1 + 1];
+                yr = a[k1];
+                yi = -a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 -= nm;
+                k1 -= 2 * nm;
+                xr = a[j1];
+                xi = -a[j1 + 1];
+                yr = a[k1];
+                yi = -a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 -= nm;
+                k1 += nm;
+                xr = a[j1];
+                xi = -a[j1 + 1];
+                yr = a[k1];
+                yi = -a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 -= nm;
+                k1 -= 2 * nm;
+                xr = a[j1];
+                xi = -a[j1 + 1];
+                yr = a[k1];
+                yi = -a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+            }
+            k1 = 4 * k + 2 * ip[m + k];
+            j1 = k1 + 2;
+            k1 += nh;
+            a[j1 - 1] = -a[j1 - 1];
+            xr = a[j1];
+            xi = -a[j1 + 1];
+            yr = a[k1];
+            yi = -a[k1 + 1];
+            a[j1] = yr;
+            a[j1 + 1] = yi;
+            a[k1] = xr;
+            a[k1 + 1] = xi;
+            a[k1 + 3] = -a[k1 + 3];
+            j1 += nm;
+            k1 += 2 * nm;
+            xr = a[j1];
+            xi = -a[j1 + 1];
+            yr = a[k1];
+            yi = -a[k1 + 1];
+            a[j1] = yr;
+            a[j1 + 1] = yi;
+            a[k1] = xr;
+            a[k1 + 1] = xi;
+            j1 += nm;
+            k1 -= nm;
+            xr = a[j1];
+            xi = -a[j1 + 1];
+            yr = a[k1];
+            yi = -a[k1 + 1];
+            a[j1] = yr;
+            a[j1 + 1] = yi;
+            a[k1] = xr;
+            a[k1 + 1] = xi;
+            j1 -= 2;
+            k1 -= nh;
+            xr = a[j1];
+            xi = -a[j1 + 1];
+            yr = a[k1];
+            yi = -a[k1 + 1];
+            a[j1] = yr;
+            a[j1 + 1] = yi;
+            a[k1] = xr;
+            a[k1 + 1] = xi;
+            j1 += nh + 2;
+            k1 += nh + 2;
+            xr = a[j1];
+            xi = -a[j1 + 1];
+            yr = a[k1];
+            yi = -a[k1 + 1];
+            a[j1] = yr;
+            a[j1 + 1] = yi;
+            a[k1] = xr;
+            a[k1 + 1] = xi;
+            j1 -= nh - nm;
+            k1 += 2 * nm - 2;
+            a[j1 - 1] = -a[j1 - 1];
+            xr = a[j1];
+            xi = -a[j1 + 1];
+            yr = a[k1];
+            yi = -a[k1 + 1];
+            a[j1] = yr;
+            a[j1 + 1] = yi;
+            a[k1] = xr;
+            a[k1 + 1] = xi;
+            a[k1 + 3] = -a[k1 + 3];
+        }
+    } else {
+        for (k = 0; k < m; k++) {
+            for (j = 0; j < k; j++) {
+                j1 = 4 * j + ip[m + k];
+                k1 = 4 * k + ip[m + j];
+                xr = a[j1];
+                xi = -a[j1 + 1];
+                yr = a[k1];
+                yi = -a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 += nm;
+                k1 += nm;
+                xr = a[j1];
+                xi = -a[j1 + 1];
+                yr = a[k1];
+                yi = -a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 += nh;
+                k1 += 2;
+                xr = a[j1];
+                xi = -a[j1 + 1];
+                yr = a[k1];
+                yi = -a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 -= nm;
+                k1 -= nm;
+                xr = a[j1];
+                xi = -a[j1 + 1];
+                yr = a[k1];
+                yi = -a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 += 2;
+                k1 += nh;
+                xr = a[j1];
+                xi = -a[j1 + 1];
+                yr = a[k1];
+                yi = -a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 += nm;
+                k1 += nm;
+                xr = a[j1];
+                xi = -a[j1 + 1];
+                yr = a[k1];
+                yi = -a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 -= nh;
+                k1 -= 2;
+                xr = a[j1];
+                xi = -a[j1 + 1];
+                yr = a[k1];
+                yi = -a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 -= nm;
+                k1 -= nm;
+                xr = a[j1];
+                xi = -a[j1 + 1];
+                yr = a[k1];
+                yi = -a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+            }
+            k1 = 4 * k + ip[m + k];
+            j1 = k1 + 2;
+            k1 += nh;
+            a[j1 - 1] = -a[j1 - 1];
+            xr = a[j1];
+            xi = -a[j1 + 1];
+            yr = a[k1];
+            yi = -a[k1 + 1];
+            a[j1] = yr;
+            a[j1 + 1] = yi;
+            a[k1] = xr;
+            a[k1 + 1] = xi;
+            a[k1 + 3] = -a[k1 + 3];
+            j1 += nm;
+            k1 += nm;
+            a[j1 - 1] = -a[j1 - 1];
+            xr = a[j1];
+            xi = -a[j1 + 1];
+            yr = a[k1];
+            yi = -a[k1 + 1];
+            a[j1] = yr;
+            a[j1 + 1] = yi;
+            a[k1] = xr;
+            a[k1 + 1] = xi;
+            a[k1 + 3] = -a[k1 + 3];
+        }
+    }
+}
+
+
+void bitrv216(float32 *a)
+{
+    float32 x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i, 
+        x5r, x5i, x7r, x7i, x8r, x8i, x10r, x10i, 
+        x11r, x11i, x12r, x12i, x13r, x13i, x14r, x14i;
+    
+    x1r = a[2];
+    x1i = a[3];
+    x2r = a[4];
+    x2i = a[5];
+    x3r = a[6];
+    x3i = a[7];
+    x4r = a[8];
+    x4i = a[9];
+    x5r = a[10];
+    x5i = a[11];
+    x7r = a[14];
+    x7i = a[15];
+    x8r = a[16];
+    x8i = a[17];
+    x10r = a[20];
+    x10i = a[21];
+    x11r = a[22];
+    x11i = a[23];
+    x12r = a[24];
+    x12i = a[25];
+    x13r = a[26];
+    x13i = a[27];
+    x14r = a[28];
+    x14i = a[29];
+    a[2] = x8r;
+    a[3] = x8i;
+    a[4] = x4r;
+    a[5] = x4i;
+    a[6] = x12r;
+    a[7] = x12i;
+    a[8] = x2r;
+    a[9] = x2i;
+    a[10] = x10r;
+    a[11] = x10i;
+    a[14] = x14r;
+    a[15] = x14i;
+    a[16] = x1r;
+    a[17] = x1i;
+    a[20] = x5r;
+    a[21] = x5i;
+    a[22] = x13r;
+    a[23] = x13i;
+    a[24] = x3r;
+    a[25] = x3i;
+    a[26] = x11r;
+    a[27] = x11i;
+    a[28] = x7r;
+    a[29] = x7i;
+}
+
+
+void bitrv216neg(float32 *a)
+{
+    float32 x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i, 
+        x5r, x5i, x6r, x6i, x7r, x7i, x8r, x8i, 
+        x9r, x9i, x10r, x10i, x11r, x11i, x12r, x12i, 
+        x13r, x13i, x14r, x14i, x15r, x15i;
+    
+    x1r = a[2];
+    x1i = a[3];
+    x2r = a[4];
+    x2i = a[5];
+    x3r = a[6];
+    x3i = a[7];
+    x4r = a[8];
+    x4i = a[9];
+    x5r = a[10];
+    x5i = a[11];
+    x6r = a[12];
+    x6i = a[13];
+    x7r = a[14];
+    x7i = a[15];
+    x8r = a[16];
+    x8i = a[17];
+    x9r = a[18];
+    x9i = a[19];
+    x10r = a[20];
+    x10i = a[21];
+    x11r = a[22];
+    x11i = a[23];
+    x12r = a[24];
+    x12i = a[25];
+    x13r = a[26];
+    x13i = a[27];
+    x14r = a[28];
+    x14i = a[29];
+    x15r = a[30];
+    x15i = a[31];
+    a[2] = x15r;
+    a[3] = x15i;
+    a[4] = x7r;
+    a[5] = x7i;
+    a[6] = x11r;
+    a[7] = x11i;
+    a[8] = x3r;
+    a[9] = x3i;
+    a[10] = x13r;
+    a[11] = x13i;
+    a[12] = x5r;
+    a[13] = x5i;
+    a[14] = x9r;
+    a[15] = x9i;
+    a[16] = x1r;
+    a[17] = x1i;
+    a[18] = x14r;
+    a[19] = x14i;
+    a[20] = x6r;
+    a[21] = x6i;
+    a[22] = x10r;
+    a[23] = x10i;
+    a[24] = x2r;
+    a[25] = x2i;
+    a[26] = x12r;
+    a[27] = x12i;
+    a[28] = x4r;
+    a[29] = x4i;
+    a[30] = x8r;
+    a[31] = x8i;
+}
+
+
+void bitrv208(float32 *a)
+{
+    float32 x1r, x1i, x3r, x3i, x4r, x4i, x6r, x6i;
+    
+    x1r = a[2];
+    x1i = a[3];
+    x3r = a[6];
+    x3i = a[7];
+    x4r = a[8];
+    x4i = a[9];
+    x6r = a[12];
+    x6i = a[13];
+    a[2] = x4r;
+    a[3] = x4i;
+    a[6] = x6r;
+    a[7] = x6i;
+    a[8] = x1r;
+    a[9] = x1i;
+    a[12] = x3r;
+    a[13] = x3i;
+}
+
+
+void bitrv208neg(float32 *a)
+{
+    float32 x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i, 
+        x5r, x5i, x6r, x6i, x7r, x7i;
+    
+    x1r = a[2];
+    x1i = a[3];
+    x2r = a[4];
+    x2i = a[5];
+    x3r = a[6];
+    x3i = a[7];
+    x4r = a[8];
+    x4i = a[9];
+    x5r = a[10];
+    x5i = a[11];
+    x6r = a[12];
+    x6i = a[13];
+    x7r = a[14];
+    x7i = a[15];
+    a[2] = x7r;
+    a[3] = x7i;
+    a[4] = x3r;
+    a[5] = x3i;
+    a[6] = x5r;
+    a[7] = x5i;
+    a[8] = x1r;
+    a[9] = x1i;
+    a[10] = x6r;
+    a[11] = x6i;
+    a[12] = x2r;
+    a[13] = x2i;
+    a[14] = x4r;
+    a[15] = x4i;
+}
+
+
+void cftf1st(int n, float32 *a, float32 *w)
+{
+    int j, j0, j1, j2, j3, k, m, mh;
+    float32 wn4r, csc1, csc3, wk1r, wk1i, wk3r, wk3i, 
+        wd1r, wd1i, wd3r, wd3i;
+    float32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, 
+        y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i;
+    
+    mh = n >> 3;
+    m = 2 * mh;
+    j1 = m;
+    j2 = j1 + m;
+    j3 = j2 + m;
+    x0r = a[0] + a[j2];
+    x0i = a[1] + a[j2 + 1];
+    x1r = a[0] - a[j2];
+    x1i = a[1] - a[j2 + 1];
+    x2r = a[j1] + a[j3];
+    x2i = a[j1 + 1] + a[j3 + 1];
+    x3r = a[j1] - a[j3];
+    x3i = a[j1 + 1] - a[j3 + 1];
+    a[0] = x0r + x2r;
+    a[1] = x0i + x2i;
+    a[j1] = x0r - x2r;
+    a[j1 + 1] = x0i - x2i;
+    a[j2] = x1r - x3i;
+    a[j2 + 1] = x1i + x3r;
+    a[j3] = x1r + x3i;
+    a[j3 + 1] = x1i - x3r;
+    wn4r = w[1];
+    csc1 = w[2];
+    csc3 = w[3];
+    wd1r = 1;
+    wd1i = 0;
+    wd3r = 1;
+    wd3i = 0;
+    k = 0;
+    for (j = 2; j < mh - 2; j += 4) {
+        k += 4;
+        wk1r = csc1 * (wd1r + w[k]);
+        wk1i = csc1 * (wd1i + w[k + 1]);
+        wk3r = csc3 * (wd3r + w[k + 2]);
+        wk3i = csc3 * (wd3i + w[k + 3]);
+        wd1r = w[k];
+        wd1i = w[k + 1];
+        wd3r = w[k + 2];
+        wd3i = w[k + 3];
+        j1 = j + m;
+        j2 = j1 + m;
+        j3 = j2 + m;
+        x0r = a[j] + a[j2];
+        x0i = a[j + 1] + a[j2 + 1];
+        x1r = a[j] - a[j2];
+        x1i = a[j + 1] - a[j2 + 1];
+        y0r = a[j + 2] + a[j2 + 2];
+        y0i = a[j + 3] + a[j2 + 3];
+        y1r = a[j + 2] - a[j2 + 2];
+        y1i = a[j + 3] - a[j2 + 3];
+        x2r = a[j1] + a[j3];
+        x2i = a[j1 + 1] + a[j3 + 1];
+        x3r = a[j1] - a[j3];
+        x3i = a[j1 + 1] - a[j3 + 1];
+        y2r = a[j1 + 2] + a[j3 + 2];
+        y2i = a[j1 + 3] + a[j3 + 3];
+        y3r = a[j1 + 2] - a[j3 + 2];
+        y3i = a[j1 + 3] - a[j3 + 3];
+        a[j] = x0r + x2r;
+        a[j + 1] = x0i + x2i;
+        a[j + 2] = y0r + y2r;
+        a[j + 3] = y0i + y2i;
+        a[j1] = x0r - x2r;
+        a[j1 + 1] = x0i - x2i;
+        a[j1 + 2] = y0r - y2r;
+        a[j1 + 3] = y0i - y2i;
+        x0r = x1r - x3i;
+        x0i = x1i + x3r;
+        a[j2] = wk1r * x0r - wk1i * x0i;
+        a[j2 + 1] = wk1r * x0i + wk1i * x0r;
+        x0r = y1r - y3i;
+        x0i = y1i + y3r;
+        a[j2 + 2] = wd1r * x0r - wd1i * x0i;
+        a[j2 + 3] = wd1r * x0i + wd1i * x0r;
+        x0r = x1r + x3i;
+        x0i = x1i - x3r;
+        a[j3] = wk3r * x0r + wk3i * x0i;
+        a[j3 + 1] = wk3r * x0i - wk3i * x0r;
+        x0r = y1r + y3i;
+        x0i = y1i - y3r;
+        a[j3 + 2] = wd3r * x0r + wd3i * x0i;
+        a[j3 + 3] = wd3r * x0i - wd3i * x0r;
+        j0 = m - j;
+        j1 = j0 + m;
+        j2 = j1 + m;
+        j3 = j2 + m;
+        x0r = a[j0] + a[j2];
+        x0i = a[j0 + 1] + a[j2 + 1];
+        x1r = a[j0] - a[j2];
+        x1i = a[j0 + 1] - a[j2 + 1];
+        y0r = a[j0 - 2] + a[j2 - 2];
+        y0i = a[j0 - 1] + a[j2 - 1];
+        y1r = a[j0 - 2] - a[j2 - 2];
+        y1i = a[j0 - 1] - a[j2 - 1];
+        x2r = a[j1] + a[j3];
+        x2i = a[j1 + 1] + a[j3 + 1];
+        x3r = a[j1] - a[j3];
+        x3i = a[j1 + 1] - a[j3 + 1];
+        y2r = a[j1 - 2] + a[j3 - 2];
+        y2i = a[j1 - 1] + a[j3 - 1];
+        y3r = a[j1 - 2] - a[j3 - 2];
+        y3i = a[j1 - 1] - a[j3 - 1];
+        a[j0] = x0r + x2r;
+        a[j0 + 1] = x0i + x2i;
+        a[j0 - 2] = y0r + y2r;
+        a[j0 - 1] = y0i + y2i;
+        a[j1] = x0r - x2r;
+        a[j1 + 1] = x0i - x2i;
+        a[j1 - 2] = y0r - y2r;
+        a[j1 - 1] = y0i - y2i;
+        x0r = x1r - x3i;
+        x0i = x1i + x3r;
+        a[j2] = wk1i * x0r - wk1r * x0i;
+        a[j2 + 1] = wk1i * x0i + wk1r * x0r;
+        x0r = y1r - y3i;
+        x0i = y1i + y3r;
+        a[j2 - 2] = wd1i * x0r - wd1r * x0i;
+        a[j2 - 1] = wd1i * x0i + wd1r * x0r;
+        x0r = x1r + x3i;
+        x0i = x1i - x3r;
+        a[j3] = wk3i * x0r + wk3r * x0i;
+        a[j3 + 1] = wk3i * x0i - wk3r * x0r;
+        x0r = y1r + y3i;
+        x0i = y1i - y3r;
+        a[j3 - 2] = wd3i * x0r + wd3r * x0i;
+        a[j3 - 1] = wd3i * x0i - wd3r * x0r;
+    }
+    wk1r = csc1 * (wd1r + wn4r);
+    wk1i = csc1 * (wd1i + wn4r);
+    wk3r = csc3 * (wd3r - wn4r);
+    wk3i = csc3 * (wd3i - wn4r);
+    j0 = mh;
+    j1 = j0 + m;
+    j2 = j1 + m;
+    j3 = j2 + m;
+    x0r = a[j0 - 2] + a[j2 - 2];
+    x0i = a[j0 - 1] + a[j2 - 1];
+    x1r = a[j0 - 2] - a[j2 - 2];
+    x1i = a[j0 - 1] - a[j2 - 1];
+    x2r = a[j1 - 2] + a[j3 - 2];
+    x2i = a[j1 - 1] + a[j3 - 1];
+    x3r = a[j1 - 2] - a[j3 - 2];
+    x3i = a[j1 - 1] - a[j3 - 1];
+    a[j0 - 2] = x0r + x2r;
+    a[j0 - 1] = x0i + x2i;
+    a[j1 - 2] = x0r - x2r;
+    a[j1 - 1] = x0i - x2i;
+    x0r = x1r - x3i;
+    x0i = x1i + x3r;
+    a[j2 - 2] = wk1r * x0r - wk1i * x0i;
+    a[j2 - 1] = wk1r * x0i + wk1i * x0r;
+    x0r = x1r + x3i;
+    x0i = x1i - x3r;
+    a[j3 - 2] = wk3r * x0r + wk3i * x0i;
+    a[j3 - 1] = wk3r * x0i - wk3i * x0r;
+    x0r = a[j0] + a[j2];
+    x0i = a[j0 + 1] + a[j2 + 1];
+    x1r = a[j0] - a[j2];
+    x1i = a[j0 + 1] - a[j2 + 1];
+    x2r = a[j1] + a[j3];
+    x2i = a[j1 + 1] + a[j3 + 1];
+    x3r = a[j1] - a[j3];
+    x3i = a[j1 + 1] - a[j3 + 1];
+    a[j0] = x0r + x2r;
+    a[j0 + 1] = x0i + x2i;
+    a[j1] = x0r - x2r;
+    a[j1 + 1] = x0i - x2i;
+    x0r = x1r - x3i;
+    x0i = x1i + x3r;
+    a[j2] = wn4r * (x0r - x0i);
+    a[j2 + 1] = wn4r * (x0i + x0r);
+    x0r = x1r + x3i;
+    x0i = x1i - x3r;
+    a[j3] = -wn4r * (x0r + x0i);
+    a[j3 + 1] = -wn4r * (x0i - x0r);
+    x0r = a[j0 + 2] + a[j2 + 2];
+    x0i = a[j0 + 3] + a[j2 + 3];
+    x1r = a[j0 + 2] - a[j2 + 2];
+    x1i = a[j0 + 3] - a[j2 + 3];
+    x2r = a[j1 + 2] + a[j3 + 2];
+    x2i = a[j1 + 3] + a[j3 + 3];
+    x3r = a[j1 + 2] - a[j3 + 2];
+    x3i = a[j1 + 3] - a[j3 + 3];
+    a[j0 + 2] = x0r + x2r;
+    a[j0 + 3] = x0i + x2i;
+    a[j1 + 2] = x0r - x2r;
+    a[j1 + 3] = x0i - x2i;
+    x0r = x1r - x3i;
+    x0i = x1i + x3r;
+    a[j2 + 2] = wk1i * x0r - wk1r * x0i;
+    a[j2 + 3] = wk1i * x0i + wk1r * x0r;
+    x0r = x1r + x3i;
+    x0i = x1i - x3r;
+    a[j3 + 2] = wk3i * x0r + wk3r * x0i;
+    a[j3 + 3] = wk3i * x0i - wk3r * x0r;
+}
+
+
+void cftb1st(int n, float32 *a, float32 *w)
+{
+    int j, j0, j1, j2, j3, k, m, mh;
+    float32 wn4r, csc1, csc3, wk1r, wk1i, wk3r, wk3i, 
+        wd1r, wd1i, wd3r, wd3i;
+    float32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, 
+        y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i;
+    
+    mh = n >> 3;
+    m = 2 * mh;
+    j1 = m;
+    j2 = j1 + m;
+    j3 = j2 + m;
+    x0r = a[0] + a[j2];
+    x0i = -a[1] - a[j2 + 1];
+    x1r = a[0] - a[j2];
+    x1i = -a[1] + a[j2 + 1];
+    x2r = a[j1] + a[j3];
+    x2i = a[j1 + 1] + a[j3 + 1];
+    x3r = a[j1] - a[j3];
+    x3i = a[j1 + 1] - a[j3 + 1];
+    a[0] = x0r + x2r;
+    a[1] = x0i - x2i;
+    a[j1] = x0r - x2r;
+    a[j1 + 1] = x0i + x2i;
+    a[j2] = x1r + x3i;
+    a[j2 + 1] = x1i + x3r;
+    a[j3] = x1r - x3i;
+    a[j3 + 1] = x1i - x3r;
+    wn4r = w[1];
+    csc1 = w[2];
+    csc3 = w[3];
+    wd1r = 1;
+    wd1i = 0;
+    wd3r = 1;
+    wd3i = 0;
+    k = 0;
+    for (j = 2; j < mh - 2; j += 4) {
+        k += 4;
+        wk1r = csc1 * (wd1r + w[k]);
+        wk1i = csc1 * (wd1i + w[k + 1]);
+        wk3r = csc3 * (wd3r + w[k + 2]);
+        wk3i = csc3 * (wd3i + w[k + 3]);
+        wd1r = w[k];
+        wd1i = w[k + 1];
+        wd3r = w[k + 2];
+        wd3i = w[k + 3];
+        j1 = j + m;
+        j2 = j1 + m;
+        j3 = j2 + m;
+        x0r = a[j] + a[j2];
+        x0i = -a[j + 1] - a[j2 + 1];
+        x1r = a[j] - a[j2];
+        x1i = -a[j + 1] + a[j2 + 1];
+        y0r = a[j + 2] + a[j2 + 2];
+        y0i = -a[j + 3] - a[j2 + 3];
+        y1r = a[j + 2] - a[j2 + 2];
+        y1i = -a[j + 3] + a[j2 + 3];
+        x2r = a[j1] + a[j3];
+        x2i = a[j1 + 1] + a[j3 + 1];
+        x3r = a[j1] - a[j3];
+        x3i = a[j1 + 1] - a[j3 + 1];
+        y2r = a[j1 + 2] + a[j3 + 2];
+        y2i = a[j1 + 3] + a[j3 + 3];
+        y3r = a[j1 + 2] - a[j3 + 2];
+        y3i = a[j1 + 3] - a[j3 + 3];
+        a[j] = x0r + x2r;
+        a[j + 1] = x0i - x2i;
+        a[j + 2] = y0r + y2r;
+        a[j + 3] = y0i - y2i;
+        a[j1] = x0r - x2r;
+        a[j1 + 1] = x0i + x2i;
+        a[j1 + 2] = y0r - y2r;
+        a[j1 + 3] = y0i + y2i;
+        x0r = x1r + x3i;
+        x0i = x1i + x3r;
+        a[j2] = wk1r * x0r - wk1i * x0i;
+        a[j2 + 1] = wk1r * x0i + wk1i * x0r;
+        x0r = y1r + y3i;
+        x0i = y1i + y3r;
+        a[j2 + 2] = wd1r * x0r - wd1i * x0i;
+        a[j2 + 3] = wd1r * x0i + wd1i * x0r;
+        x0r = x1r - x3i;
+        x0i = x1i - x3r;
+        a[j3] = wk3r * x0r + wk3i * x0i;
+        a[j3 + 1] = wk3r * x0i - wk3i * x0r;
+        x0r = y1r - y3i;
+        x0i = y1i - y3r;
+        a[j3 + 2] = wd3r * x0r + wd3i * x0i;
+        a[j3 + 3] = wd3r * x0i - wd3i * x0r;
+        j0 = m - j;
+        j1 = j0 + m;
+        j2 = j1 + m;
+        j3 = j2 + m;
+        x0r = a[j0] + a[j2];
+        x0i = -a[j0 + 1] - a[j2 + 1];
+        x1r = a[j0] - a[j2];
+        x1i = -a[j0 + 1] + a[j2 + 1];
+        y0r = a[j0 - 2] + a[j2 - 2];
+        y0i = -a[j0 - 1] - a[j2 - 1];
+        y1r = a[j0 - 2] - a[j2 - 2];
+        y1i = -a[j0 - 1] + a[j2 - 1];
+        x2r = a[j1] + a[j3];
+        x2i = a[j1 + 1] + a[j3 + 1];
+        x3r = a[j1] - a[j3];
+        x3i = a[j1 + 1] - a[j3 + 1];
+        y2r = a[j1 - 2] + a[j3 - 2];
+        y2i = a[j1 - 1] + a[j3 - 1];
+        y3r = a[j1 - 2] - a[j3 - 2];
+        y3i = a[j1 - 1] - a[j3 - 1];
+        a[j0] = x0r + x2r;
+        a[j0 + 1] = x0i - x2i;
+        a[j0 - 2] = y0r + y2r;
+        a[j0 - 1] = y0i - y2i;
+        a[j1] = x0r - x2r;
+        a[j1 + 1] = x0i + x2i;
+        a[j1 - 2] = y0r - y2r;
+        a[j1 - 1] = y0i + y2i;
+        x0r = x1r + x3i;
+        x0i = x1i + x3r;
+        a[j2] = wk1i * x0r - wk1r * x0i;
+        a[j2 + 1] = wk1i * x0i + wk1r * x0r;
+        x0r = y1r + y3i;
+        x0i = y1i + y3r;
+        a[j2 - 2] = wd1i * x0r - wd1r * x0i;
+        a[j2 - 1] = wd1i * x0i + wd1r * x0r;
+        x0r = x1r - x3i;
+        x0i = x1i - x3r;
+        a[j3] = wk3i * x0r + wk3r * x0i;
+        a[j3 + 1] = wk3i * x0i - wk3r * x0r;
+        x0r = y1r - y3i;
+        x0i = y1i - y3r;
+        a[j3 - 2] = wd3i * x0r + wd3r * x0i;
+        a[j3 - 1] = wd3i * x0i - wd3r * x0r;
+    }
+    wk1r = csc1 * (wd1r + wn4r);
+    wk1i = csc1 * (wd1i + wn4r);
+    wk3r = csc3 * (wd3r - wn4r);
+    wk3i = csc3 * (wd3i - wn4r);
+    j0 = mh;
+    j1 = j0 + m;
+    j2 = j1 + m;
+    j3 = j2 + m;
+    x0r = a[j0 - 2] + a[j2 - 2];
+    x0i = -a[j0 - 1] - a[j2 - 1];
+    x1r = a[j0 - 2] - a[j2 - 2];
+    x1i = -a[j0 - 1] + a[j2 - 1];
+    x2r = a[j1 - 2] + a[j3 - 2];
+    x2i = a[j1 - 1] + a[j3 - 1];
+    x3r = a[j1 - 2] - a[j3 - 2];
+    x3i = a[j1 - 1] - a[j3 - 1];
+    a[j0 - 2] = x0r + x2r;
+    a[j0 - 1] = x0i - x2i;
+    a[j1 - 2] = x0r - x2r;
+    a[j1 - 1] = x0i + x2i;
+    x0r = x1r + x3i;
+    x0i = x1i + x3r;
+    a[j2 - 2] = wk1r * x0r - wk1i * x0i;
+    a[j2 - 1] = wk1r * x0i + wk1i * x0r;
+    x0r = x1r - x3i;
+    x0i = x1i - x3r;
+    a[j3 - 2] = wk3r * x0r + wk3i * x0i;
+    a[j3 - 1] = wk3r * x0i - wk3i * x0r;
+    x0r = a[j0] + a[j2];
+    x0i = -a[j0 + 1] - a[j2 + 1];
+    x1r = a[j0] - a[j2];
+    x1i = -a[j0 + 1] + a[j2 + 1];
+    x2r = a[j1] + a[j3];
+    x2i = a[j1 + 1] + a[j3 + 1];
+    x3r = a[j1] - a[j3];
+    x3i = a[j1 + 1] - a[j3 + 1];
+    a[j0] = x0r + x2r;
+    a[j0 + 1] = x0i - x2i;
+    a[j1] = x0r - x2r;
+    a[j1 + 1] = x0i + x2i;
+    x0r = x1r + x3i;
+    x0i = x1i + x3r;
+    a[j2] = wn4r * (x0r - x0i);
+    a[j2 + 1] = wn4r * (x0i + x0r);
+    x0r = x1r - x3i;
+    x0i = x1i - x3r;
+    a[j3] = -wn4r * (x0r + x0i);
+    a[j3 + 1] = -wn4r * (x0i - x0r);
+    x0r = a[j0 + 2] + a[j2 + 2];
+    x0i = -a[j0 + 3] - a[j2 + 3];
+    x1r = a[j0 + 2] - a[j2 + 2];
+    x1i = -a[j0 + 3] + a[j2 + 3];
+    x2r = a[j1 + 2] + a[j3 + 2];
+    x2i = a[j1 + 3] + a[j3 + 3];
+    x3r = a[j1 + 2] - a[j3 + 2];
+    x3i = a[j1 + 3] - a[j3 + 3];
+    a[j0 + 2] = x0r + x2r;
+    a[j0 + 3] = x0i - x2i;
+    a[j1 + 2] = x0r - x2r;
+    a[j1 + 3] = x0i + x2i;
+    x0r = x1r + x3i;
+    x0i = x1i + x3r;
+    a[j2 + 2] = wk1i * x0r - wk1r * x0i;
+    a[j2 + 3] = wk1i * x0i + wk1r * x0r;
+    x0r = x1r - x3i;
+    x0i = x1i - x3r;
+    a[j3 + 2] = wk3i * x0r + wk3r * x0i;
+    a[j3 + 3] = wk3i * x0i - wk3r * x0r;
+}
+
+
+#ifdef USE_CDFT_THREADS
+struct cdft_arg_st {
+    int n0;
+    int n;
+    float32 *a;
+    int nw;
+    float32 *w;
+};
+typedef struct cdft_arg_st cdft_arg_t;
+
+
+void cftrec4_th(int n, float32 *a, int nw, float32 *w)
+{
+    void *cftrec1_th(void *p);
+    void *cftrec2_th(void *p);
+    int i, idiv4, m, nthread;
+    cdft_thread_t th[4];
+    cdft_arg_t ag[4];
+    
+    nthread = 2;
+    idiv4 = 0;
+    m = n >> 1;
+    if (n > CDFT_4THREADS_BEGIN_N) {
+        nthread = 4;
+        idiv4 = 1;
+        m >>= 1;
+    }
+    for (i = 0; i < nthread; i++) {
+        ag[i].n0 = n;
+        ag[i].n = m;
+        ag[i].a = &a[i * m];
+        ag[i].nw = nw;
+        ag[i].w = w;
+        if (i != idiv4) {
+            cdft_thread_create(&th[i], cftrec1_th, &ag[i]);
+        } else {
+            cdft_thread_create(&th[i], cftrec2_th, &ag[i]);
+        }
+    }
+    for (i = 0; i < nthread; i++) {
+        cdft_thread_wait(th[i]);
+    }
+}
+
+
+void *cftrec1_th(void *p)
+{
+    int cfttree(int n, int j, int k, float32 *a, int nw, float32 *w);
+    void cftleaf(int n, int isplt, float32 *a, int nw, float32 *w);
+    void cftmdl1(int n, float32 *a, float32 *w);
+    int isplt, j, k, m, n, n0, nw;
+    float32 *a, *w;
+    
+    n0 = ((cdft_arg_t *) p)->n0;
+    n = ((cdft_arg_t *) p)->n;
+    a = ((cdft_arg_t *) p)->a;
+    nw = ((cdft_arg_t *) p)->nw;
+    w = ((cdft_arg_t *) p)->w;
+    m = n0;
+    while (m > 512) {
+        m >>= 2;
+        cftmdl1(m, &a[n - m], &w[nw - (m >> 1)]);
+    }
+    cftleaf(m, 1, &a[n - m], nw, w);
+    k = 0;
+    for (j = n - m; j > 0; j -= m) {
+        k++;
+        isplt = cfttree(m, j, k, a, nw, w);
+        cftleaf(m, isplt, &a[j - m], nw, w);
+    }
+    return (void *) 0;
+}
+
+
+void *cftrec2_th(void *p)
+{
+    int cfttree(int n, int j, int k, float32 *a, int nw, float32 *w);
+    void cftleaf(int n, int isplt, float32 *a, int nw, float32 *w);
+    void cftmdl2(int n, float32 *a, float32 *w);
+    int isplt, j, k, m, n, n0, nw;
+    float32 *a, *w;
+    
+    n0 = ((cdft_arg_t *) p)->n0;
+    n = ((cdft_arg_t *) p)->n;
+    a = ((cdft_arg_t *) p)->a;
+    nw = ((cdft_arg_t *) p)->nw;
+    w = ((cdft_arg_t *) p)->w;
+    k = 1;
+    m = n0;
+    while (m > 512) {
+        m >>= 2;
+        k <<= 2;
+        cftmdl2(m, &a[n - m], &w[nw - m]);
+    }
+    cftleaf(m, 0, &a[n - m], nw, w);
+    k >>= 1;
+    for (j = n - m; j > 0; j -= m) {
+        k++;
+        isplt = cfttree(m, j, k, a, nw, w);
+        cftleaf(m, isplt, &a[j - m], nw, w);
+    }
+    return (void *) 0;
+}
+#endif /* USE_CDFT_THREADS */
+
+
+void cftrec4(int n, float32 *a, int nw, float32 *w)
+{
+    int cfttree(int n, int j, int k, float32 *a, int nw, float32 *w);
+    void cftleaf(int n, int isplt, float32 *a, int nw, float32 *w);
+    void cftmdl1(int n, float32 *a, float32 *w);
+    int isplt, j, k, m;
+    
+    m = n;
+    while (m > 512) {
+        m >>= 2;
+        cftmdl1(m, &a[n - m], &w[nw - (m >> 1)]);
+    }
+    cftleaf(m, 1, &a[n - m], nw, w);
+    k = 0;
+    for (j = n - m; j > 0; j -= m) {
+        k++;
+        isplt = cfttree(m, j, k, a, nw, w);
+        cftleaf(m, isplt, &a[j - m], nw, w);
+    }
+}
+
+
+int cfttree(int n, int j, int k, float32 *a, int nw, float32 *w)
+{
+    void cftmdl1(int n, float32 *a, float32 *w);
+    void cftmdl2(int n, float32 *a, float32 *w);
+    int i, isplt, m;
+    
+    if ((k & 3) != 0) {
+        isplt = k & 1;
+        if (isplt != 0) {
+            cftmdl1(n, &a[j - n], &w[nw - (n >> 1)]);
+        } else {
+            cftmdl2(n, &a[j - n], &w[nw - n]);
+        }
+    } else {
+        m = n;
+        for (i = k; (i & 3) == 0; i >>= 2) {
+            m <<= 2;
+        }
+        isplt = i & 1;
+        if (isplt != 0) {
+            while (m > 128) {
+                cftmdl1(m, &a[j - m], &w[nw - (m >> 1)]);
+                m >>= 2;
+            }
+        } else {
+            while (m > 128) {
+                cftmdl2(m, &a[j - m], &w[nw - m]);
+                m >>= 2;
+            }
+        }
+    }
+    return isplt;
+}
+
+
+void cftleaf(int n, int isplt, float32 *a, int nw, float32 *w)
+{
+    void cftmdl1(int n, float32 *a, float32 *w);
+    void cftmdl2(int n, float32 *a, float32 *w);
+    void cftf161(float32 *a, float32 *w);
+    void cftf162(float32 *a, float32 *w);
+    void cftf081(float32 *a, float32 *w);
+    void cftf082(float32 *a, float32 *w);
+    
+    if (n == 512) {
+        cftmdl1(128, a, &w[nw - 64]);
+        cftf161(a, &w[nw - 8]);
+        cftf162(&a[32], &w[nw - 32]);
+        cftf161(&a[64], &w[nw - 8]);
+        cftf161(&a[96], &w[nw - 8]);
+        cftmdl2(128, &a[128], &w[nw - 128]);
+        cftf161(&a[128], &w[nw - 8]);
+        cftf162(&a[160], &w[nw - 32]);
+        cftf161(&a[192], &w[nw - 8]);
+        cftf162(&a[224], &w[nw - 32]);
+        cftmdl1(128, &a[256], &w[nw - 64]);
+        cftf161(&a[256], &w[nw - 8]);
+        cftf162(&a[288], &w[nw - 32]);
+        cftf161(&a[320], &w[nw - 8]);
+        cftf161(&a[352], &w[nw - 8]);
+        if (isplt != 0) {
+            cftmdl1(128, &a[384], &w[nw - 64]);
+            cftf161(&a[480], &w[nw - 8]);
+        } else {
+            cftmdl2(128, &a[384], &w[nw - 128]);
+            cftf162(&a[480], &w[nw - 32]);
+        }
+        cftf161(&a[384], &w[nw - 8]);
+        cftf162(&a[416], &w[nw - 32]);
+        cftf161(&a[448], &w[nw - 8]);
+    } else {
+        cftmdl1(64, a, &w[nw - 32]);
+        cftf081(a, &w[nw - 8]);
+        cftf082(&a[16], &w[nw - 8]);
+        cftf081(&a[32], &w[nw - 8]);
+        cftf081(&a[48], &w[nw - 8]);
+        cftmdl2(64, &a[64], &w[nw - 64]);
+        cftf081(&a[64], &w[nw - 8]);
+        cftf082(&a[80], &w[nw - 8]);
+        cftf081(&a[96], &w[nw - 8]);
+        cftf082(&a[112], &w[nw - 8]);
+        cftmdl1(64, &a[128], &w[nw - 32]);
+        cftf081(&a[128], &w[nw - 8]);
+        cftf082(&a[144], &w[nw - 8]);
+        cftf081(&a[160], &w[nw - 8]);
+        cftf081(&a[176], &w[nw - 8]);
+        if (isplt != 0) {
+            cftmdl1(64, &a[192], &w[nw - 32]);
+            cftf081(&a[240], &w[nw - 8]);
+        } else {
+            cftmdl2(64, &a[192], &w[nw - 64]);
+            cftf082(&a[240], &w[nw - 8]);
+        }
+        cftf081(&a[192], &w[nw - 8]);
+        cftf082(&a[208], &w[nw - 8]);
+        cftf081(&a[224], &w[nw - 8]);
+    }
+}
+
+
+void cftmdl1(int n, float32 *a, float32 *w)
+{
+    int j, j0, j1, j2, j3, k, m, mh;
+    float32 wn4r, wk1r, wk1i, wk3r, wk3i;
+    float32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
+    
+    mh = n >> 3;
+    m = 2 * mh;
+    j1 = m;
+    j2 = j1 + m;
+    j3 = j2 + m;
+    x0r = a[0] + a[j2];
+    x0i = a[1] + a[j2 + 1];
+    x1r = a[0] - a[j2];
+    x1i = a[1] - a[j2 + 1];
+    x2r = a[j1] + a[j3];
+    x2i = a[j1 + 1] + a[j3 + 1];
+    x3r = a[j1] - a[j3];
+    x3i = a[j1 + 1] - a[j3 + 1];
+    a[0] = x0r + x2r;
+    a[1] = x0i + x2i;
+    a[j1] = x0r - x2r;
+    a[j1 + 1] = x0i - x2i;
+    a[j2] = x1r - x3i;
+    a[j2 + 1] = x1i + x3r;
+    a[j3] = x1r + x3i;
+    a[j3 + 1] = x1i - x3r;
+    wn4r = w[1];
+    k = 0;
+    for (j = 2; j < mh; j += 2) {
+        k += 4;
+        wk1r = w[k];
+        wk1i = w[k + 1];
+        wk3r = w[k + 2];
+        wk3i = w[k + 3];
+        j1 = j + m;
+        j2 = j1 + m;
+        j3 = j2 + m;
+        x0r = a[j] + a[j2];
+        x0i = a[j + 1] + a[j2 + 1];
+        x1r = a[j] - a[j2];
+        x1i = a[j + 1] - a[j2 + 1];
+        x2r = a[j1] + a[j3];
+        x2i = a[j1 + 1] + a[j3 + 1];
+        x3r = a[j1] - a[j3];
+        x3i = a[j1 + 1] - a[j3 + 1];
+        a[j] = x0r + x2r;
+        a[j + 1] = x0i + x2i;
+        a[j1] = x0r - x2r;
+        a[j1 + 1] = x0i - x2i;
+        x0r = x1r - x3i;
+        x0i = x1i + x3r;
+        a[j2] = wk1r * x0r - wk1i * x0i;
+        a[j2 + 1] = wk1r * x0i + wk1i * x0r;
+        x0r = x1r + x3i;
+        x0i = x1i - x3r;
+        a[j3] = wk3r * x0r + wk3i * x0i;
+        a[j3 + 1] = wk3r * x0i - wk3i * x0r;
+        j0 = m - j;
+        j1 = j0 + m;
+        j2 = j1 + m;
+        j3 = j2 + m;
+        x0r = a[j0] + a[j2];
+        x0i = a[j0 + 1] + a[j2 + 1];
+        x1r = a[j0] - a[j2];
+        x1i = a[j0 + 1] - a[j2 + 1];
+        x2r = a[j1] + a[j3];
+        x2i = a[j1 + 1] + a[j3 + 1];
+        x3r = a[j1] - a[j3];
+        x3i = a[j1 + 1] - a[j3 + 1];
+        a[j0] = x0r + x2r;
+        a[j0 + 1] = x0i + x2i;
+        a[j1] = x0r - x2r;
+        a[j1 + 1] = x0i - x2i;
+        x0r = x1r - x3i;
+        x0i = x1i + x3r;
+        a[j2] = wk1i * x0r - wk1r * x0i;
+        a[j2 + 1] = wk1i * x0i + wk1r * x0r;
+        x0r = x1r + x3i;
+        x0i = x1i - x3r;
+        a[j3] = wk3i * x0r + wk3r * x0i;
+        a[j3 + 1] = wk3i * x0i - wk3r * x0r;
+    }
+    j0 = mh;
+    j1 = j0 + m;
+    j2 = j1 + m;
+    j3 = j2 + m;
+    x0r = a[j0] + a[j2];
+    x0i = a[j0 + 1] + a[j2 + 1];
+    x1r = a[j0] - a[j2];
+    x1i = a[j0 + 1] - a[j2 + 1];
+    x2r = a[j1] + a[j3];
+    x2i = a[j1 + 1] + a[j3 + 1];
+    x3r = a[j1] - a[j3];
+    x3i = a[j1 + 1] - a[j3 + 1];
+    a[j0] = x0r + x2r;
+    a[j0 + 1] = x0i + x2i;
+    a[j1] = x0r - x2r;
+    a[j1 + 1] = x0i - x2i;
+    x0r = x1r - x3i;
+    x0i = x1i + x3r;
+    a[j2] = wn4r * (x0r - x0i);
+    a[j2 + 1] = wn4r * (x0i + x0r);
+    x0r = x1r + x3i;
+    x0i = x1i - x3r;
+    a[j3] = -wn4r * (x0r + x0i);
+    a[j3 + 1] = -wn4r * (x0i - x0r);
+}
+
+
+void cftmdl2(int n, float32 *a, float32 *w)
+{
+    int j, j0, j1, j2, j3, k, kr, m, mh;
+    float32 wn4r, wk1r, wk1i, wk3r, wk3i, wd1r, wd1i, wd3r, wd3i;
+    float32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, y0r, y0i, y2r, y2i;
+    
+    mh = n >> 3;
+    m = 2 * mh;
+    wn4r = w[1];
+    j1 = m;
+    j2 = j1 + m;
+    j3 = j2 + m;
+    x0r = a[0] - a[j2 + 1];
+    x0i = a[1] + a[j2];
+    x1r = a[0] + a[j2 + 1];
+    x1i = a[1] - a[j2];
+    x2r = a[j1] - a[j3 + 1];
+    x2i = a[j1 + 1] + a[j3];
+    x3r = a[j1] + a[j3 + 1];
+    x3i = a[j1 + 1] - a[j3];
+    y0r = wn4r * (x2r - x2i);
+    y0i = wn4r * (x2i + x2r);
+    a[0] = x0r + y0r;
+    a[1] = x0i + y0i;
+    a[j1] = x0r - y0r;
+    a[j1 + 1] = x0i - y0i;
+    y0r = wn4r * (x3r - x3i);
+    y0i = wn4r * (x3i + x3r);
+    a[j2] = x1r - y0i;
+    a[j2 + 1] = x1i + y0r;
+    a[j3] = x1r + y0i;
+    a[j3 + 1] = x1i - y0r;
+    k = 0;
+    kr = 2 * m;
+    for (j = 2; j < mh; j += 2) {
+        k += 4;
+        wk1r = w[k];
+        wk1i = w[k + 1];
+        wk3r = w[k + 2];
+        wk3i = w[k + 3];
+        kr -= 4;
+        wd1i = w[kr];
+        wd1r = w[kr + 1];
+        wd3i = w[kr + 2];
+        wd3r = w[kr + 3];
+        j1 = j + m;
+        j2 = j1 + m;
+        j3 = j2 + m;
+        x0r = a[j] - a[j2 + 1];
+        x0i = a[j + 1] + a[j2];
+        x1r = a[j] + a[j2 + 1];
+        x1i = a[j + 1] - a[j2];
+        x2r = a[j1] - a[j3 + 1];
+        x2i = a[j1 + 1] + a[j3];
+        x3r = a[j1] + a[j3 + 1];
+        x3i = a[j1 + 1] - a[j3];
+        y0r = wk1r * x0r - wk1i * x0i;
+        y0i = wk1r * x0i + wk1i * x0r;
+        y2r = wd1r * x2r - wd1i * x2i;
+        y2i = wd1r * x2i + wd1i * x2r;
+        a[j] = y0r + y2r;
+        a[j + 1] = y0i + y2i;
+        a[j1] = y0r - y2r;
+        a[j1 + 1] = y0i - y2i;
+        y0r = wk3r * x1r + wk3i * x1i;
+        y0i = wk3r * x1i - wk3i * x1r;
+        y2r = wd3r * x3r + wd3i * x3i;
+        y2i = wd3r * x3i - wd3i * x3r;
+        a[j2] = y0r + y2r;
+        a[j2 + 1] = y0i + y2i;
+        a[j3] = y0r - y2r;
+        a[j3 + 1] = y0i - y2i;
+        j0 = m - j;
+        j1 = j0 + m;
+        j2 = j1 + m;
+        j3 = j2 + m;
+        x0r = a[j0] - a[j2 + 1];
+        x0i = a[j0 + 1] + a[j2];
+        x1r = a[j0] + a[j2 + 1];
+        x1i = a[j0 + 1] - a[j2];
+        x2r = a[j1] - a[j3 + 1];
+        x2i = a[j1 + 1] + a[j3];
+        x3r = a[j1] + a[j3 + 1];
+        x3i = a[j1 + 1] - a[j3];
+        y0r = wd1i * x0r - wd1r * x0i;
+        y0i = wd1i * x0i + wd1r * x0r;
+        y2r = wk1i * x2r - wk1r * x2i;
+        y2i = wk1i * x2i + wk1r * x2r;
+        a[j0] = y0r + y2r;
+        a[j0 + 1] = y0i + y2i;
+        a[j1] = y0r - y2r;
+        a[j1 + 1] = y0i - y2i;
+        y0r = wd3i * x1r + wd3r * x1i;
+        y0i = wd3i * x1i - wd3r * x1r;
+        y2r = wk3i * x3r + wk3r * x3i;
+        y2i = wk3i * x3i - wk3r * x3r;
+        a[j2] = y0r + y2r;
+        a[j2 + 1] = y0i + y2i;
+        a[j3] = y0r - y2r;
+        a[j3 + 1] = y0i - y2i;
+    }
+    wk1r = w[m];
+    wk1i = w[m + 1];
+    j0 = mh;
+    j1 = j0 + m;
+    j2 = j1 + m;
+    j3 = j2 + m;
+    x0r = a[j0] - a[j2 + 1];
+    x0i = a[j0 + 1] + a[j2];
+    x1r = a[j0] + a[j2 + 1];
+    x1i = a[j0 + 1] - a[j2];
+    x2r = a[j1] - a[j3 + 1];
+    x2i = a[j1 + 1] + a[j3];
+    x3r = a[j1] + a[j3 + 1];
+    x3i = a[j1 + 1] - a[j3];
+    y0r = wk1r * x0r - wk1i * x0i;
+    y0i = wk1r * x0i + wk1i * x0r;
+    y2r = wk1i * x2r - wk1r * x2i;
+    y2i = wk1i * x2i + wk1r * x2r;
+    a[j0] = y0r + y2r;
+    a[j0 + 1] = y0i + y2i;
+    a[j1] = y0r - y2r;
+    a[j1 + 1] = y0i - y2i;
+    y0r = wk1i * x1r - wk1r * x1i;
+    y0i = wk1i * x1i + wk1r * x1r;
+    y2r = wk1r * x3r - wk1i * x3i;
+    y2i = wk1r * x3i + wk1i * x3r;
+    a[j2] = y0r - y2r;
+    a[j2 + 1] = y0i - y2i;
+    a[j3] = y0r + y2r;
+    a[j3 + 1] = y0i + y2i;
+}
+
+
+void cftfx41(int n, float32 *a, int nw, float32 *w)
+{
+    void cftf161(float32 *a, float32 *w);
+    void cftf162(float32 *a, float32 *w);
+    void cftf081(float32 *a, float32 *w);
+    void cftf082(float32 *a, float32 *w);
+    
+    if (n == 128) {
+        cftf161(a, &w[nw - 8]);
+        cftf162(&a[32], &w[nw - 32]);
+        cftf161(&a[64], &w[nw - 8]);
+        cftf161(&a[96], &w[nw - 8]);
+    } else {
+        cftf081(a, &w[nw - 8]);
+        cftf082(&a[16], &w[nw - 8]);
+        cftf081(&a[32], &w[nw - 8]);
+        cftf081(&a[48], &w[nw - 8]);
+    }
+}
+
+
+void cftf161(float32 *a, float32 *w)
+{
+    float32 wn4r, wk1r, wk1i, 
+        x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, 
+        y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i, 
+        y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i, 
+        y8r, y8i, y9r, y9i, y10r, y10i, y11r, y11i, 
+        y12r, y12i, y13r, y13i, y14r, y14i, y15r, y15i;
+    
+    wn4r = w[1];
+    wk1r = w[2];
+    wk1i = w[3];
+    x0r = a[0] + a[16];
+    x0i = a[1] + a[17];
+    x1r = a[0] - a[16];
+    x1i = a[1] - a[17];
+    x2r = a[8] + a[24];
+    x2i = a[9] + a[25];
+    x3r = a[8] - a[24];
+    x3i = a[9] - a[25];
+    y0r = x0r + x2r;
+    y0i = x0i + x2i;
+    y4r = x0r - x2r;
+    y4i = x0i - x2i;
+    y8r = x1r - x3i;
+    y8i = x1i + x3r;
+    y12r = x1r + x3i;
+    y12i = x1i - x3r;
+    x0r = a[2] + a[18];
+    x0i = a[3] + a[19];
+    x1r = a[2] - a[18];
+    x1i = a[3] - a[19];
+    x2r = a[10] + a[26];
+    x2i = a[11] + a[27];
+    x3r = a[10] - a[26];
+    x3i = a[11] - a[27];
+    y1r = x0r + x2r;
+    y1i = x0i + x2i;
+    y5r = x0r - x2r;
+    y5i = x0i - x2i;
+    x0r = x1r - x3i;
+    x0i = x1i + x3r;
+    y9r = wk1r * x0r - wk1i * x0i;
+    y9i = wk1r * x0i + wk1i * x0r;
+    x0r = x1r + x3i;
+    x0i = x1i - x3r;
+    y13r = wk1i * x0r - wk1r * x0i;
+    y13i = wk1i * x0i + wk1r * x0r;
+    x0r = a[4] + a[20];
+    x0i = a[5] + a[21];
+    x1r = a[4] - a[20];
+    x1i = a[5] - a[21];
+    x2r = a[12] + a[28];
+    x2i = a[13] + a[29];
+    x3r = a[12] - a[28];
+    x3i = a[13] - a[29];
+    y2r = x0r + x2r;
+    y2i = x0i + x2i;
+    y6r = x0r - x2r;
+    y6i = x0i - x2i;
+    x0r = x1r - x3i;
+    x0i = x1i + x3r;
+    y10r = wn4r * (x0r - x0i);
+    y10i = wn4r * (x0i + x0r);
+    x0r = x1r + x3i;
+    x0i = x1i - x3r;
+    y14r = wn4r * (x0r + x0i);
+    y14i = wn4r * (x0i - x0r);
+    x0r = a[6] + a[22];
+    x0i = a[7] + a[23];
+    x1r = a[6] - a[22];
+    x1i = a[7] - a[23];
+    x2r = a[14] + a[30];
+    x2i = a[15] + a[31];
+    x3r = a[14] - a[30];
+    x3i = a[15] - a[31];
+    y3r = x0r + x2r;
+    y3i = x0i + x2i;
+    y7r = x0r - x2r;
+    y7i = x0i - x2i;
+    x0r = x1r - x3i;
+    x0i = x1i + x3r;
+    y11r = wk1i * x0r - wk1r * x0i;
+    y11i = wk1i * x0i + wk1r * x0r;
+    x0r = x1r + x3i;
+    x0i = x1i - x3r;
+    y15r = wk1r * x0r - wk1i * x0i;
+    y15i = wk1r * x0i + wk1i * x0r;
+    x0r = y12r - y14r;
+    x0i = y12i - y14i;
+    x1r = y12r + y14r;
+    x1i = y12i + y14i;
+    x2r = y13r - y15r;
+    x2i = y13i - y15i;
+    x3r = y13r + y15r;
+    x3i = y13i + y15i;
+    a[24] = x0r + x2r;
+    a[25] = x0i + x2i;
+    a[26] = x0r - x2r;
+    a[27] = x0i - x2i;
+    a[28] = x1r - x3i;
+    a[29] = x1i + x3r;
+    a[30] = x1r + x3i;
+    a[31] = x1i - x3r;
+    x0r = y8r + y10r;
+    x0i = y8i + y10i;
+    x1r = y8r - y10r;
+    x1i = y8i - y10i;
+    x2r = y9r + y11r;
+    x2i = y9i + y11i;
+    x3r = y9r - y11r;
+    x3i = y9i - y11i;
+    a[16] = x0r + x2r;
+    a[17] = x0i + x2i;
+    a[18] = x0r - x2r;
+    a[19] = x0i - x2i;
+    a[20] = x1r - x3i;
+    a[21] = x1i + x3r;
+    a[22] = x1r + x3i;
+    a[23] = x1i - x3r;
+    x0r = y5r - y7i;
+    x0i = y5i + y7r;
+    x2r = wn4r * (x0r - x0i);
+    x2i = wn4r * (x0i + x0r);
+    x0r = y5r + y7i;
+    x0i = y5i - y7r;
+    x3r = wn4r * (x0r - x0i);
+    x3i = wn4r * (x0i + x0r);
+    x0r = y4r - y6i;
+    x0i = y4i + y6r;
+    x1r = y4r + y6i;
+    x1i = y4i - y6r;
+    a[8] = x0r + x2r;
+    a[9] = x0i + x2i;
+    a[10] = x0r - x2r;
+    a[11] = x0i - x2i;
+    a[12] = x1r - x3i;
+    a[13] = x1i + x3r;
+    a[14] = x1r + x3i;
+    a[15] = x1i - x3r;
+    x0r = y0r + y2r;
+    x0i = y0i + y2i;
+    x1r = y0r - y2r;
+    x1i = y0i - y2i;
+    x2r = y1r + y3r;
+    x2i = y1i + y3i;
+    x3r = y1r - y3r;
+    x3i = y1i - y3i;
+    a[0] = x0r + x2r;
+    a[1] = x0i + x2i;
+    a[2] = x0r - x2r;
+    a[3] = x0i - x2i;
+    a[4] = x1r - x3i;
+    a[5] = x1i + x3r;
+    a[6] = x1r + x3i;
+    a[7] = x1i - x3r;
+}
+
+
+void cftf162(float32 *a, float32 *w)
+{
+    float32 wn4r, wk1r, wk1i, wk2r, wk2i, wk3r, wk3i, 
+        x0r, x0i, x1r, x1i, x2r, x2i, 
+        y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i, 
+        y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i, 
+        y8r, y8i, y9r, y9i, y10r, y10i, y11r, y11i, 
+        y12r, y12i, y13r, y13i, y14r, y14i, y15r, y15i;
+    
+    wn4r = w[1];
+    wk1r = w[4];
+    wk1i = w[5];
+    wk3r = w[6];
+    wk3i = -w[7];
+    wk2r = w[8];
+    wk2i = w[9];
+    x1r = a[0] - a[17];
+    x1i = a[1] + a[16];
+    x0r = a[8] - a[25];
+    x0i = a[9] + a[24];
+    x2r = wn4r * (x0r - x0i);
+    x2i = wn4r * (x0i + x0r);
+    y0r = x1r + x2r;
+    y0i = x1i + x2i;
+    y4r = x1r - x2r;
+    y4i = x1i - x2i;
+    x1r = a[0] + a[17];
+    x1i = a[1] - a[16];
+    x0r = a[8] + a[25];
+    x0i = a[9] - a[24];
+    x2r = wn4r * (x0r - x0i);
+    x2i = wn4r * (x0i + x0r);
+    y8r = x1r - x2i;
+    y8i = x1i + x2r;
+    y12r = x1r + x2i;
+    y12i = x1i - x2r;
+    x0r = a[2] - a[19];
+    x0i = a[3] + a[18];
+    x1r = wk1r * x0r - wk1i * x0i;
+    x1i = wk1r * x0i + wk1i * x0r;
+    x0r = a[10] - a[27];
+    x0i = a[11] + a[26];
+    x2r = wk3i * x0r - wk3r * x0i;
+    x2i = wk3i * x0i + wk3r * x0r;
+    y1r = x1r + x2r;
+    y1i = x1i + x2i;
+    y5r = x1r - x2r;
+    y5i = x1i - x2i;
+    x0r = a[2] + a[19];
+    x0i = a[3] - a[18];
+    x1r = wk3r * x0r - wk3i * x0i;
+    x1i = wk3r * x0i + wk3i * x0r;
+    x0r = a[10] + a[27];
+    x0i = a[11] - a[26];
+    x2r = wk1r * x0r + wk1i * x0i;
+    x2i = wk1r * x0i - wk1i * x0r;
+    y9r = x1r - x2r;
+    y9i = x1i - x2i;
+    y13r = x1r + x2r;
+    y13i = x1i + x2i;
+    x0r = a[4] - a[21];
+    x0i = a[5] + a[20];
+    x1r = wk2r * x0r - wk2i * x0i;
+    x1i = wk2r * x0i + wk2i * x0r;
+    x0r = a[12] - a[29];
+    x0i = a[13] + a[28];
+    x2r = wk2i * x0r - wk2r * x0i;
+    x2i = wk2i * x0i + wk2r * x0r;
+    y2r = x1r + x2r;
+    y2i = x1i + x2i;
+    y6r = x1r - x2r;
+    y6i = x1i - x2i;
+    x0r = a[4] + a[21];
+    x0i = a[5] - a[20];
+    x1r = wk2i * x0r - wk2r * x0i;
+    x1i = wk2i * x0i + wk2r * x0r;
+    x0r = a[12] + a[29];
+    x0i = a[13] - a[28];
+    x2r = wk2r * x0r - wk2i * x0i;
+    x2i = wk2r * x0i + wk2i * x0r;
+    y10r = x1r - x2r;
+    y10i = x1i - x2i;
+    y14r = x1r + x2r;
+    y14i = x1i + x2i;
+    x0r = a[6] - a[23];
+    x0i = a[7] + a[22];
+    x1r = wk3r * x0r - wk3i * x0i;
+    x1i = wk3r * x0i + wk3i * x0r;
+    x0r = a[14] - a[31];
+    x0i = a[15] + a[30];
+    x2r = wk1i * x0r - wk1r * x0i;
+    x2i = wk1i * x0i + wk1r * x0r;
+    y3r = x1r + x2r;
+    y3i = x1i + x2i;
+    y7r = x1r - x2r;
+    y7i = x1i - x2i;
+    x0r = a[6] + a[23];
+    x0i = a[7] - a[22];
+    x1r = wk1i * x0r + wk1r * x0i;
+    x1i = wk1i * x0i - wk1r * x0r;
+    x0r = a[14] + a[31];
+    x0i = a[15] - a[30];
+    x2r = wk3i * x0r - wk3r * x0i;
+    x2i = wk3i * x0i + wk3r * x0r;
+    y11r = x1r + x2r;
+    y11i = x1i + x2i;
+    y15r = x1r - x2r;
+    y15i = x1i - x2i;
+    x1r = y0r + y2r;
+    x1i = y0i + y2i;
+    x2r = y1r + y3r;
+    x2i = y1i + y3i;
+    a[0] = x1r + x2r;
+    a[1] = x1i + x2i;
+    a[2] = x1r - x2r;
+    a[3] = x1i - x2i;
+    x1r = y0r - y2r;
+    x1i = y0i - y2i;
+    x2r = y1r - y3r;
+    x2i = y1i - y3i;
+    a[4] = x1r - x2i;
+    a[5] = x1i + x2r;
+    a[6] = x1r + x2i;
+    a[7] = x1i - x2r;
+    x1r = y4r - y6i;
+    x1i = y4i + y6r;
+    x0r = y5r - y7i;
+    x0i = y5i + y7r;
+    x2r = wn4r * (x0r - x0i);
+    x2i = wn4r * (x0i + x0r);
+    a[8] = x1r + x2r;
+    a[9] = x1i + x2i;
+    a[10] = x1r - x2r;
+    a[11] = x1i - x2i;
+    x1r = y4r + y6i;
+    x1i = y4i - y6r;
+    x0r = y5r + y7i;
+    x0i = y5i - y7r;
+    x2r = wn4r * (x0r - x0i);
+    x2i = wn4r * (x0i + x0r);
+    a[12] = x1r - x2i;
+    a[13] = x1i + x2r;
+    a[14] = x1r + x2i;
+    a[15] = x1i - x2r;
+    x1r = y8r + y10r;
+    x1i = y8i + y10i;
+    x2r = y9r - y11r;
+    x2i = y9i - y11i;
+    a[16] = x1r + x2r;
+    a[17] = x1i + x2i;
+    a[18] = x1r - x2r;
+    a[19] = x1i - x2i;
+    x1r = y8r - y10r;
+    x1i = y8i - y10i;
+    x2r = y9r + y11r;
+    x2i = y9i + y11i;
+    a[20] = x1r - x2i;
+    a[21] = x1i + x2r;
+    a[22] = x1r + x2i;
+    a[23] = x1i - x2r;
+    x1r = y12r - y14i;
+    x1i = y12i + y14r;
+    x0r = y13r + y15i;
+    x0i = y13i - y15r;
+    x2r = wn4r * (x0r - x0i);
+    x2i = wn4r * (x0i + x0r);
+    a[24] = x1r + x2r;
+    a[25] = x1i + x2i;
+    a[26] = x1r - x2r;
+    a[27] = x1i - x2i;
+    x1r = y12r + y14i;
+    x1i = y12i - y14r;
+    x0r = y13r - y15i;
+    x0i = y13i + y15r;
+    x2r = wn4r * (x0r - x0i);
+    x2i = wn4r * (x0i + x0r);
+    a[28] = x1r - x2i;
+    a[29] = x1i + x2r;
+    a[30] = x1r + x2i;
+    a[31] = x1i - x2r;
+}
+
+
+void cftf081(float32 *a, float32 *w)
+{
+    float32 wn4r, x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, 
+        y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i, 
+        y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i;
+    
+    wn4r = w[1];
+    x0r = a[0] + a[8];
+    x0i = a[1] + a[9];
+    x1r = a[0] - a[8];
+    x1i = a[1] - a[9];
+    x2r = a[4] + a[12];
+    x2i = a[5] + a[13];
+    x3r = a[4] - a[12];
+    x3i = a[5] - a[13];
+    y0r = x0r + x2r;
+    y0i = x0i + x2i;
+    y2r = x0r - x2r;
+    y2i = x0i - x2i;
+    y1r = x1r - x3i;
+    y1i = x1i + x3r;
+    y3r = x1r + x3i;
+    y3i = x1i - x3r;
+    x0r = a[2] + a[10];
+    x0i = a[3] + a[11];
+    x1r = a[2] - a[10];
+    x1i = a[3] - a[11];
+    x2r = a[6] + a[14];
+    x2i = a[7] + a[15];
+    x3r = a[6] - a[14];
+    x3i = a[7] - a[15];
+    y4r = x0r + x2r;
+    y4i = x0i + x2i;
+    y6r = x0r - x2r;
+    y6i = x0i - x2i;
+    x0r = x1r - x3i;
+    x0i = x1i + x3r;
+    x2r = x1r + x3i;
+    x2i = x1i - x3r;
+    y5r = wn4r * (x0r - x0i);
+    y5i = wn4r * (x0r + x0i);
+    y7r = wn4r * (x2r - x2i);
+    y7i = wn4r * (x2r + x2i);
+    a[8] = y1r + y5r;
+    a[9] = y1i + y5i;
+    a[10] = y1r - y5r;
+    a[11] = y1i - y5i;
+    a[12] = y3r - y7i;
+    a[13] = y3i + y7r;
+    a[14] = y3r + y7i;
+    a[15] = y3i - y7r;
+    a[0] = y0r + y4r;
+    a[1] = y0i + y4i;
+    a[2] = y0r - y4r;
+    a[3] = y0i - y4i;
+    a[4] = y2r - y6i;
+    a[5] = y2i + y6r;
+    a[6] = y2r + y6i;
+    a[7] = y2i - y6r;
+}
+
+
+void cftf082(float32 *a, float32 *w)
+{
+    float32 wn4r, wk1r, wk1i, x0r, x0i, x1r, x1i, 
+        y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i, 
+        y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i;
+    
+    wn4r = w[1];
+    wk1r = w[2];
+    wk1i = w[3];
+    y0r = a[0] - a[9];
+    y0i = a[1] + a[8];
+    y1r = a[0] + a[9];
+    y1i = a[1] - a[8];
+    x0r = a[4] - a[13];
+    x0i = a[5] + a[12];
+    y2r = wn4r * (x0r - x0i);
+    y2i = wn4r * (x0i + x0r);
+    x0r = a[4] + a[13];
+    x0i = a[5] - a[12];
+    y3r = wn4r * (x0r - x0i);
+    y3i = wn4r * (x0i + x0r);
+    x0r = a[2] - a[11];
+    x0i = a[3] + a[10];
+    y4r = wk1r * x0r - wk1i * x0i;
+    y4i = wk1r * x0i + wk1i * x0r;
+    x0r = a[2] + a[11];
+    x0i = a[3] - a[10];
+    y5r = wk1i * x0r - wk1r * x0i;
+    y5i = wk1i * x0i + wk1r * x0r;
+    x0r = a[6] - a[15];
+    x0i = a[7] + a[14];
+    y6r = wk1i * x0r - wk1r * x0i;
+    y6i = wk1i * x0i + wk1r * x0r;
+    x0r = a[6] + a[15];
+    x0i = a[7] - a[14];
+    y7r = wk1r * x0r - wk1i * x0i;
+    y7i = wk1r * x0i + wk1i * x0r;
+    x0r = y0r + y2r;
+    x0i = y0i + y2i;
+    x1r = y4r + y6r;
+    x1i = y4i + y6i;
+    a[0] = x0r + x1r;
+    a[1] = x0i + x1i;
+    a[2] = x0r - x1r;
+    a[3] = x0i - x1i;
+    x0r = y0r - y2r;
+    x0i = y0i - y2i;
+    x1r = y4r - y6r;
+    x1i = y4i - y6i;
+    a[4] = x0r - x1i;
+    a[5] = x0i + x1r;
+    a[6] = x0r + x1i;
+    a[7] = x0i - x1r;
+    x0r = y1r - y3i;
+    x0i = y1i + y3r;
+    x1r = y5r - y7r;
+    x1i = y5i - y7i;
+    a[8] = x0r + x1r;
+    a[9] = x0i + x1i;
+    a[10] = x0r - x1r;
+    a[11] = x0i - x1i;
+    x0r = y1r + y3i;
+    x0i = y1i - y3r;
+    x1r = y5r + y7r;
+    x1i = y5i + y7i;
+    a[12] = x0r - x1i;
+    a[13] = x0i + x1r;
+    a[14] = x0r + x1i;
+    a[15] = x0i - x1r;
+}
+
+
+void cftf040(float32 *a)
+{
+    float32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
+    
+    x0r = a[0] + a[4];
+    x0i = a[1] + a[5];
+    x1r = a[0] - a[4];
+    x1i = a[1] - a[5];
+    x2r = a[2] + a[6];
+    x2i = a[3] + a[7];
+    x3r = a[2] - a[6];
+    x3i = a[3] - a[7];
+    a[0] = x0r + x2r;
+    a[1] = x0i + x2i;
+    a[2] = x1r - x3i;
+    a[3] = x1i + x3r;
+    a[4] = x0r - x2r;
+    a[5] = x0i - x2i;
+    a[6] = x1r + x3i;
+    a[7] = x1i - x3r;
+}
+
+
+void cftb040(float32 *a)
+{
+    float32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
+    
+    x0r = a[0] + a[4];
+    x0i = a[1] + a[5];
+    x1r = a[0] - a[4];
+    x1i = a[1] - a[5];
+    x2r = a[2] + a[6];
+    x2i = a[3] + a[7];
+    x3r = a[2] - a[6];
+    x3i = a[3] - a[7];
+    a[0] = x0r + x2r;
+    a[1] = x0i + x2i;
+    a[2] = x1r + x3i;
+    a[3] = x1i - x3r;
+    a[4] = x0r - x2r;
+    a[5] = x0i - x2i;
+    a[6] = x1r - x3i;
+    a[7] = x1i + x3r;
+}
+
+
+void cftx020(float32 *a)
+{
+    float32 x0r, x0i;
+    
+    x0r = a[0] - a[2];
+    x0i = a[1] - a[3];
+    a[0] += a[2];
+    a[1] += a[3];
+    a[2] = x0r;
+    a[3] = x0i;
+}
+
+
+void rftfsub(int n, float32 *a, int nc, float32 *c)
+{
+    int j, k, kk, ks, m;
+    float32 wkr, wki, xr, xi, yr, yi;
+    
+    m = n >> 1;
+    ks = 2 * nc / m;
+    kk = 0;
+    for (j = 2; j < m; j += 2) {
+        k = n - j;
+        kk += ks;
+        wkr = 0.5 - c[nc - kk];
+        wki = c[kk];
+        xr = a[j] - a[k];
+        xi = a[j + 1] + a[k + 1];
+        yr = wkr * xr - wki * xi;
+        yi = wkr * xi + wki * xr;
+        a[j] -= yr;
+        a[j + 1] -= yi;
+        a[k] += yr;
+        a[k + 1] -= yi;
+    }
+}
+
+
+void rftbsub(int n, float32 *a, int nc, float32 *c)
+{
+    int j, k, kk, ks, m;
+    float32 wkr, wki, xr, xi, yr, yi;
+    
+    m = n >> 1;
+    ks = 2 * nc / m;
+    kk = 0;
+    for (j = 2; j < m; j += 2) {
+        k = n - j;
+        kk += ks;
+        wkr = 0.5 - c[nc - kk];
+        wki = c[kk];
+        xr = a[j] - a[k];
+        xi = a[j + 1] + a[k + 1];
+        yr = wkr * xr + wki * xi;
+        yi = wkr * xi - wki * xr;
+        a[j] -= yr;
+        a[j + 1] -= yi;
+        a[k] += yr;
+        a[k + 1] -= yi;
+    }
+}
+
+
+void dctsub(int n, float32 *a, int nc, float32 *c)
+{
+    int j, k, kk, ks, m;
+    float32 wkr, wki, xr;
+    
+    m = n >> 1;
+    ks = nc / n;
+    kk = 0;
+    for (j = 1; j < m; j++) {
+        k = n - j;
+        kk += ks;
+        wkr = c[kk] - c[nc - kk];
+        wki = c[kk] + c[nc - kk];
+        xr = wki * a[j] - wkr * a[k];
+        a[j] = wkr * a[j] + wki * a[k];
+        a[k] = xr;
+    }
+    a[m] *= c[0];
+}
+
+
+void dstsub(int n, float32 *a, int nc, float32 *c)
+{
+    int j, k, kk, ks, m;
+    float32 wkr, wki, xr;
+    
+    m = n >> 1;
+    ks = nc / n;
+    kk = 0;
+    for (j = 1; j < m; j++) {
+        k = n - j;
+        kk += ks;
+        wkr = c[kk] - c[nc - kk];
+        wki = c[kk] + c[nc - kk];
+        xr = wki * a[k] - wkr * a[j];
+        a[k] = wkr * a[k] + wki * a[j];
+        a[j] = xr;
+    }
+    a[m] *= c[0];
 }
 
 }
-- 
cgit v1.2.3


From 7f8b7b37121e06cec76b2b1cab1b56920c2c4ef4 Mon Sep 17 00:00:00 2001
From: Willem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>
Date: Fri, 22 Apr 2016 17:15:03 +0200
Subject: Fix build

---
 src/FilteredBackProjectionAlgorithm.cpp |   2 +-
 src/Fourier.cpp                         | 206 ++++++++++++--------------------
 2 files changed, 77 insertions(+), 131 deletions(-)

(limited to 'src')

diff --git a/src/FilteredBackProjectionAlgorithm.cpp b/src/FilteredBackProjectionAlgorithm.cpp
index 90efd52..70462f7 100644
--- a/src/FilteredBackProjectionAlgorithm.cpp
+++ b/src/FilteredBackProjectionAlgorithm.cpp
@@ -275,7 +275,7 @@ void CFilteredBackProjectionAlgorithm::performFiltering(CFloat32ProjectionData2D
 
 
 	float32* pf = new float32[2 * iAngleCount * zpDetector];
-	int *ip = new int[int(2+sqrt(zpDetector)+1)];
+	int *ip = new int[int(2+sqrt((float)zpDetector)+1)];
 	ip[0]=0;
 	float32 *w = new float32[zpDetector/2];
 
diff --git a/src/Fourier.cpp b/src/Fourier.cpp
index 5ca22e6..c33f7bd 100644
--- a/src/Fourier.cpp
+++ b/src/Fourier.cpp
@@ -27,6 +27,7 @@ $Id$
 */
 
 #include "astra/Fourier.h"
+#include <cmath>
 
 namespace astra {
 
@@ -320,11 +321,45 @@ Appendix :
 */
 
 
-void cdft(int n, int isgn, float32 *a, int *ip, float32 *w)
+static int cfttree(int n, int j, int k, float32 *a, int nw, float32 *w);
+static void bitrv208(float32 *a);
+static void bitrv208neg(float32 *a);
+static void bitrv216(float32 *a);
+static void bitrv216neg(float32 *a);
+static void bitrv2conj(int n, int *ip, float32 *a);
+static void bitrv2(int n, int *ip, float32 *a);
+static void cftb040(float32 *a);
+static void cftb1st(int n, float32 *a, float32 *w);
+static void cftbsub(int n, float32 *a, int *ip, int nw, float32 *w);
+static void cftf040(float32 *a);
+static void cftf081(float32 *a, float32 *w);
+static void cftf082(float32 *a, float32 *w);
+static void cftf161(float32 *a, float32 *w);
+static void cftf162(float32 *a, float32 *w);
+static void cftf1st(int n, float32 *a, float32 *w);
+static void cftfsub(int n, float32 *a, int *ip, int nw, float32 *w);
+static void cftfx41(int n, float32 *a, int nw, float32 *w);
+static void cftleaf(int n, int isplt, float32 *a, int nw, float32 *w);
+static void cftmdl1(int n, float32 *a, float32 *w);
+static void cftmdl2(int n, float32 *a, float32 *w);
+static void *cftrec1_th(void *p);
+static void *cftrec2_th(void *p);
+static void cftrec4(int n, float32 *a, int nw, float32 *w);
+static void cftx020(float32 *a);
+static void dctsub(int n, float32 *a, int nc, float32 *c);
+static void dstsub(int n, float32 *a, int nc, float32 *c);
+static void makect(int nc, int *ip, float32 *c);
+static void makeipt(int nw, int *ip);
+static void makewt(int nw, int *ip, float32 *w);
+static void rftbsub(int n, float32 *a, int nc, float32 *c);
+static void rftfsub(int n, float32 *a, int nc, float32 *c);
+#ifdef USE_CDFT_THREADS
+static void cftrec4_th(int n, float32 *a, int nw, float32 *w);
+#endif /* USE_CDFT_THREADS */
+    
+  
+_AstraExport void cdft(int n, int isgn, float32 *a, int *ip, float32 *w)
 {
-    void makewt(int nw, int *ip, float32 *w);
-    void cftfsub(int n, float32 *a, int *ip, int nw, float32 *w);
-    void cftbsub(int n, float32 *a, int *ip, int nw, float32 *w);
     int nw;
     
     nw = ip[0];
@@ -340,14 +375,8 @@ void cdft(int n, int isgn, float32 *a, int *ip, float32 *w)
 }
 
 
-void rdft(int n, int isgn, float32 *a, int *ip, float32 *w)
+_AstraExport void rdft(int n, int isgn, float32 *a, int *ip, float32 *w)
 {
-    void makewt(int nw, int *ip, float32 *w);
-    void makect(int nc, int *ip, float32 *c);
-    void cftfsub(int n, float32 *a, int *ip, int nw, float32 *w);
-    void cftbsub(int n, float32 *a, int *ip, int nw, float32 *w);
-    void rftfsub(int n, float32 *a, int nc, float32 *c);
-    void rftbsub(int n, float32 *a, int nc, float32 *c);
     int nw, nc;
     float32 xi;
     
@@ -384,15 +413,8 @@ void rdft(int n, int isgn, float32 *a, int *ip, float32 *w)
 }
 
 
-void ddct(int n, int isgn, float32 *a, int *ip, float32 *w)
+_AstraExport void ddct(int n, int isgn, float32 *a, int *ip, float32 *w)
 {
-    void makewt(int nw, int *ip, float32 *w);
-    void makect(int nc, int *ip, float32 *c);
-    void cftfsub(int n, float32 *a, int *ip, int nw, float32 *w);
-    void cftbsub(int n, float32 *a, int *ip, int nw, float32 *w);
-    void rftfsub(int n, float32 *a, int nc, float32 *c);
-    void rftbsub(int n, float32 *a, int nc, float32 *c);
-    void dctsub(int n, float32 *a, int nc, float32 *c);
     int j, nw, nc;
     float32 xr;
     
@@ -440,15 +462,8 @@ void ddct(int n, int isgn, float32 *a, int *ip, float32 *w)
 }
 
 
-void ddst(int n, int isgn, float32 *a, int *ip, float32 *w)
+_AstraExport void ddst(int n, int isgn, float32 *a, int *ip, float32 *w)
 {
-    void makewt(int nw, int *ip, float32 *w);
-    void makect(int nc, int *ip, float32 *c);
-    void cftfsub(int n, float32 *a, int *ip, int nw, float32 *w);
-    void cftbsub(int n, float32 *a, int *ip, int nw, float32 *w);
-    void rftfsub(int n, float32 *a, int nc, float32 *c);
-    void rftbsub(int n, float32 *a, int nc, float32 *c);
-    void dstsub(int n, float32 *a, int nc, float32 *c);
     int j, nw, nc;
     float32 xr;
     
@@ -496,13 +511,8 @@ void ddst(int n, int isgn, float32 *a, int *ip, float32 *w)
 }
 
 
-void dfct(int n, float32 *a, float32 *t, int *ip, float32 *w)
+_AstraExport void dfct(int n, float32 *a, float32 *t, int *ip, float32 *w)
 {
-    void makewt(int nw, int *ip, float32 *w);
-    void makect(int nc, int *ip, float32 *c);
-    void cftfsub(int n, float32 *a, int *ip, int nw, float32 *w);
-    void rftfsub(int n, float32 *a, int nc, float32 *c);
-    void dctsub(int n, float32 *a, int nc, float32 *c);
     int j, k, l, m, mh, nw, nc;
     float32 xr, xi, yr, yi;
     
@@ -589,13 +599,8 @@ void dfct(int n, float32 *a, float32 *t, int *ip, float32 *w)
 }
 
 
-void dfst(int n, float32 *a, float32 *t, int *ip, float32 *w)
+_AstraExport void dfst(int n, float32 *a, float32 *t, int *ip, float32 *w)
 {
-    void makewt(int nw, int *ip, float32 *w);
-    void makect(int nc, int *ip, float32 *c);
-    void cftfsub(int n, float32 *a, int *ip, int nw, float32 *w);
-    void rftfsub(int n, float32 *a, int nc, float32 *c);
-    void dstsub(int n, float32 *a, int nc, float32 *c);
     int j, k, l, m, mh, nw, nc;
     float32 xr, xi, yr, yi;
     
@@ -675,12 +680,8 @@ void dfst(int n, float32 *a, float32 *t, int *ip, float32 *w)
 
 /* -------- initializing routines -------- */
 
-
-#include <math.h>
-
-void makewt(int nw, int *ip, float32 *w)
+static void makewt(int nw, int *ip, float32 *w)
 {
-    void makeipt(int nw, int *ip);
     int j, nwh, nw0, nw1;
     float32 delta, wn4r, wk1r, wk1i, wk3r, wk3i;
     
@@ -739,7 +740,7 @@ void makewt(int nw, int *ip, float32 *w)
 }
 
 
-void makeipt(int nw, int *ip)
+static void makeipt(int nw, int *ip)
 {
     int j, l, m, m2, p, q;
     
@@ -759,7 +760,7 @@ void makeipt(int nw, int *ip)
 }
 
 
-void makect(int nc, int *ip, float32 *c)
+static void makect(int nc, int *ip, float32 *c)
 {
     int j, nch;
     float32 delta;
@@ -835,23 +836,8 @@ void makect(int nc, int *ip, float32 *c)
 #endif /* USE_CDFT_WINTHREADS */
 
 
-void cftfsub(int n, float32 *a, int *ip, int nw, float32 *w)
+static void cftfsub(int n, float32 *a, int *ip, int nw, float32 *w)
 {
-    void bitrv2(int n, int *ip, float32 *a);
-    void bitrv216(float32 *a);
-    void bitrv208(float32 *a);
-    void cftf1st(int n, float32 *a, float32 *w);
-    void cftrec4(int n, float32 *a, int nw, float32 *w);
-    void cftleaf(int n, int isplt, float32 *a, int nw, float32 *w);
-    void cftfx41(int n, float32 *a, int nw, float32 *w);
-    void cftf161(float32 *a, float32 *w);
-    void cftf081(float32 *a, float32 *w);
-    void cftf040(float32 *a);
-    void cftx020(float32 *a);
-#ifdef USE_CDFT_THREADS
-    void cftrec4_th(int n, float32 *a, int nw, float32 *w);
-#endif /* USE_CDFT_THREADS */
-    
     if (n > 8) {
         if (n > 32) {
             cftf1st(n, a, &w[nw - (n >> 2)]);
@@ -883,23 +869,8 @@ void cftfsub(int n, float32 *a, int *ip, int nw, float32 *w)
 }
 
 
-void cftbsub(int n, float32 *a, int *ip, int nw, float32 *w)
+static void cftbsub(int n, float32 *a, int *ip, int nw, float32 *w)
 {
-    void bitrv2conj(int n, int *ip, float32 *a);
-    void bitrv216neg(float32 *a);
-    void bitrv208neg(float32 *a);
-    void cftb1st(int n, float32 *a, float32 *w);
-    void cftrec4(int n, float32 *a, int nw, float32 *w);
-    void cftleaf(int n, int isplt, float32 *a, int nw, float32 *w);
-    void cftfx41(int n, float32 *a, int nw, float32 *w);
-    void cftf161(float32 *a, float32 *w);
-    void cftf081(float32 *a, float32 *w);
-    void cftb040(float32 *a);
-    void cftx020(float32 *a);
-#ifdef USE_CDFT_THREADS
-    void cftrec4_th(int n, float32 *a, int nw, float32 *w);
-#endif /* USE_CDFT_THREADS */
-    
     if (n > 8) {
         if (n > 32) {
             cftb1st(n, a, &w[nw - (n >> 2)]);
@@ -931,7 +902,7 @@ void cftbsub(int n, float32 *a, int *ip, int nw, float32 *w)
 }
 
 
-void bitrv2(int n, int *ip, float32 *a)
+static void bitrv2(int n, int *ip, float32 *a)
 {
     int j, j1, k, k1, l, m, nh, nm;
     float32 xr, xi, yr, yi;
@@ -1278,7 +1249,7 @@ void bitrv2(int n, int *ip, float32 *a)
 }
 
 
-void bitrv2conj(int n, int *ip, float32 *a)
+static void bitrv2conj(int n, int *ip, float32 *a)
 {
     int j, j1, k, k1, l, m, nh, nm;
     float32 xr, xi, yr, yi;
@@ -1633,7 +1604,7 @@ void bitrv2conj(int n, int *ip, float32 *a)
 }
 
 
-void bitrv216(float32 *a)
+static void bitrv216(float32 *a)
 {
     float32 x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i, 
         x5r, x5i, x7r, x7i, x8r, x8i, x10r, x10i, 
@@ -1690,7 +1661,7 @@ void bitrv216(float32 *a)
 }
 
 
-void bitrv216neg(float32 *a)
+static void bitrv216neg(float32 *a)
 {
     float32 x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i, 
         x5r, x5i, x6r, x6i, x7r, x7i, x8r, x8i, 
@@ -1760,7 +1731,7 @@ void bitrv216neg(float32 *a)
 }
 
 
-void bitrv208(float32 *a)
+static void bitrv208(float32 *a)
 {
     float32 x1r, x1i, x3r, x3i, x4r, x4i, x6r, x6i;
     
@@ -1783,7 +1754,7 @@ void bitrv208(float32 *a)
 }
 
 
-void bitrv208neg(float32 *a)
+static void bitrv208neg(float32 *a)
 {
     float32 x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i, 
         x5r, x5i, x6r, x6i, x7r, x7i;
@@ -1819,7 +1790,7 @@ void bitrv208neg(float32 *a)
 }
 
 
-void cftf1st(int n, float32 *a, float32 *w)
+static void cftf1st(int n, float32 *a, float32 *w)
 {
     int j, j0, j1, j2, j3, k, m, mh;
     float32 wn4r, csc1, csc3, wk1r, wk1i, wk3r, wk3i, 
@@ -2025,7 +1996,7 @@ void cftf1st(int n, float32 *a, float32 *w)
 }
 
 
-void cftb1st(int n, float32 *a, float32 *w)
+static void cftb1st(int n, float32 *a, float32 *w)
 {
     int j, j0, j1, j2, j3, k, m, mh;
     float32 wn4r, csc1, csc3, wk1r, wk1i, wk3r, wk3i, 
@@ -2242,10 +2213,8 @@ struct cdft_arg_st {
 typedef struct cdft_arg_st cdft_arg_t;
 
 
-void cftrec4_th(int n, float32 *a, int nw, float32 *w)
+static void cftrec4_th(int n, float32 *a, int nw, float32 *w)
 {
-    void *cftrec1_th(void *p);
-    void *cftrec2_th(void *p);
     int i, idiv4, m, nthread;
     cdft_thread_t th[4];
     cdft_arg_t ag[4];
@@ -2276,11 +2245,8 @@ void cftrec4_th(int n, float32 *a, int nw, float32 *w)
 }
 
 
-void *cftrec1_th(void *p)
+static void *cftrec1_th(void *p)
 {
-    int cfttree(int n, int j, int k, float32 *a, int nw, float32 *w);
-    void cftleaf(int n, int isplt, float32 *a, int nw, float32 *w);
-    void cftmdl1(int n, float32 *a, float32 *w);
     int isplt, j, k, m, n, n0, nw;
     float32 *a, *w;
     
@@ -2305,11 +2271,8 @@ void *cftrec1_th(void *p)
 }
 
 
-void *cftrec2_th(void *p)
+static void *cftrec2_th(void *p)
 {
-    int cfttree(int n, int j, int k, float32 *a, int nw, float32 *w);
-    void cftleaf(int n, int isplt, float32 *a, int nw, float32 *w);
-    void cftmdl2(int n, float32 *a, float32 *w);
     int isplt, j, k, m, n, n0, nw;
     float32 *a, *w;
     
@@ -2337,11 +2300,8 @@ void *cftrec2_th(void *p)
 #endif /* USE_CDFT_THREADS */
 
 
-void cftrec4(int n, float32 *a, int nw, float32 *w)
+static void cftrec4(int n, float32 *a, int nw, float32 *w)
 {
-    int cfttree(int n, int j, int k, float32 *a, int nw, float32 *w);
-    void cftleaf(int n, int isplt, float32 *a, int nw, float32 *w);
-    void cftmdl1(int n, float32 *a, float32 *w);
     int isplt, j, k, m;
     
     m = n;
@@ -2361,8 +2321,6 @@ void cftrec4(int n, float32 *a, int nw, float32 *w)
 
 int cfttree(int n, int j, int k, float32 *a, int nw, float32 *w)
 {
-    void cftmdl1(int n, float32 *a, float32 *w);
-    void cftmdl2(int n, float32 *a, float32 *w);
     int i, isplt, m;
     
     if ((k & 3) != 0) {
@@ -2394,15 +2352,8 @@ int cfttree(int n, int j, int k, float32 *a, int nw, float32 *w)
 }
 
 
-void cftleaf(int n, int isplt, float32 *a, int nw, float32 *w)
+static void cftleaf(int n, int isplt, float32 *a, int nw, float32 *w)
 {
-    void cftmdl1(int n, float32 *a, float32 *w);
-    void cftmdl2(int n, float32 *a, float32 *w);
-    void cftf161(float32 *a, float32 *w);
-    void cftf162(float32 *a, float32 *w);
-    void cftf081(float32 *a, float32 *w);
-    void cftf082(float32 *a, float32 *w);
-    
     if (n == 512) {
         cftmdl1(128, a, &w[nw - 64]);
         cftf161(a, &w[nw - 8]);
@@ -2459,7 +2410,7 @@ void cftleaf(int n, int isplt, float32 *a, int nw, float32 *w)
 }
 
 
-void cftmdl1(int n, float32 *a, float32 *w)
+static void cftmdl1(int n, float32 *a, float32 *w)
 {
     int j, j0, j1, j2, j3, k, m, mh;
     float32 wn4r, wk1r, wk1i, wk3r, wk3i;
@@ -2569,7 +2520,7 @@ void cftmdl1(int n, float32 *a, float32 *w)
 }
 
 
-void cftmdl2(int n, float32 *a, float32 *w)
+static void cftmdl2(int n, float32 *a, float32 *w)
 {
     int j, j0, j1, j2, j3, k, kr, m, mh;
     float32 wn4r, wk1r, wk1i, wk3r, wk3i, wd1r, wd1i, wd3r, wd3i;
@@ -2703,13 +2654,8 @@ void cftmdl2(int n, float32 *a, float32 *w)
 }
 
 
-void cftfx41(int n, float32 *a, int nw, float32 *w)
+static void cftfx41(int n, float32 *a, int nw, float32 *w)
 {
-    void cftf161(float32 *a, float32 *w);
-    void cftf162(float32 *a, float32 *w);
-    void cftf081(float32 *a, float32 *w);
-    void cftf082(float32 *a, float32 *w);
-    
     if (n == 128) {
         cftf161(a, &w[nw - 8]);
         cftf162(&a[32], &w[nw - 32]);
@@ -2724,7 +2670,7 @@ void cftfx41(int n, float32 *a, int nw, float32 *w)
 }
 
 
-void cftf161(float32 *a, float32 *w)
+static void cftf161(float32 *a, float32 *w)
 {
     float32 wn4r, wk1r, wk1i, 
         x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, 
@@ -2883,7 +2829,7 @@ void cftf161(float32 *a, float32 *w)
 }
 
 
-void cftf162(float32 *a, float32 *w)
+static void cftf162(float32 *a, float32 *w)
 {
     float32 wn4r, wk1r, wk1i, wk2r, wk2i, wk3r, wk3i, 
         x0r, x0i, x1r, x1i, x2r, x2i, 
@@ -3066,7 +3012,7 @@ void cftf162(float32 *a, float32 *w)
 }
 
 
-void cftf081(float32 *a, float32 *w)
+static void cftf081(float32 *a, float32 *w)
 {
     float32 wn4r, x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, 
         y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i, 
@@ -3128,7 +3074,7 @@ void cftf081(float32 *a, float32 *w)
 }
 
 
-void cftf082(float32 *a, float32 *w)
+static void cftf082(float32 *a, float32 *w)
 {
     float32 wn4r, wk1r, wk1i, x0r, x0i, x1r, x1i, 
         y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i, 
@@ -3200,7 +3146,7 @@ void cftf082(float32 *a, float32 *w)
 }
 
 
-void cftf040(float32 *a)
+static void cftf040(float32 *a)
 {
     float32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
     
@@ -3223,7 +3169,7 @@ void cftf040(float32 *a)
 }
 
 
-void cftb040(float32 *a)
+static void cftb040(float32 *a)
 {
     float32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
     
@@ -3246,7 +3192,7 @@ void cftb040(float32 *a)
 }
 
 
-void cftx020(float32 *a)
+static void cftx020(float32 *a)
 {
     float32 x0r, x0i;
     
@@ -3259,7 +3205,7 @@ void cftx020(float32 *a)
 }
 
 
-void rftfsub(int n, float32 *a, int nc, float32 *c)
+static void rftfsub(int n, float32 *a, int nc, float32 *c)
 {
     int j, k, kk, ks, m;
     float32 wkr, wki, xr, xi, yr, yi;
@@ -3284,7 +3230,7 @@ void rftfsub(int n, float32 *a, int nc, float32 *c)
 }
 
 
-void rftbsub(int n, float32 *a, int nc, float32 *c)
+static void rftbsub(int n, float32 *a, int nc, float32 *c)
 {
     int j, k, kk, ks, m;
     float32 wkr, wki, xr, xi, yr, yi;
@@ -3309,7 +3255,7 @@ void rftbsub(int n, float32 *a, int nc, float32 *c)
 }
 
 
-void dctsub(int n, float32 *a, int nc, float32 *c)
+static void dctsub(int n, float32 *a, int nc, float32 *c)
 {
     int j, k, kk, ks, m;
     float32 wkr, wki, xr;
@@ -3330,7 +3276,7 @@ void dctsub(int n, float32 *a, int nc, float32 *c)
 }
 
 
-void dstsub(int n, float32 *a, int nc, float32 *c)
+static void dstsub(int n, float32 *a, int nc, float32 *c)
 {
     int j, k, kk, ks, m;
     float32 wkr, wki, xr;
-- 
cgit v1.2.3