summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--cuda/2d/astra.cu2
-rw-r--r--cuda/2d/cgls.cu8
-rw-r--r--cuda/2d/em.cu4
-rw-r--r--cuda/2d/sart.cu8
-rw-r--r--cuda/2d/sirt.cu8
-rw-r--r--cuda/2d/util.cu10
-rw-r--r--cuda/2d/util.h4
7 files changed, 28 insertions, 16 deletions
diff --git a/cuda/2d/astra.cu b/cuda/2d/astra.cu
index 15e487c..f4d4717 100644
--- a/cuda/2d/astra.cu
+++ b/cuda/2d/astra.cu
@@ -612,7 +612,7 @@ float BPalgo::computeDiffNorm()
allocateProjectionData(D_projData, projPitch, dims);
- cudaMemcpy2D(D_projData, sizeof(float)*projPitch, D_sinoData, sizeof(float)*sinoPitch, sizeof(float)*dims.iProjDets, dims.iProjAngles, cudaMemcpyDeviceToDevice);
+ duplicateProjectionData(D_projData, D_sinoData, sinoPitch, dims);
callFP(D_volumeData, volumePitch, D_projData, projPitch, -1.0f);
float s = dotProduct2D(D_projData, projPitch, dims.iProjDets, dims.iProjAngles);
diff --git a/cuda/2d/cgls.cu b/cuda/2d/cgls.cu
index fce8beb..066ac5d 100644
--- a/cuda/2d/cgls.cu
+++ b/cuda/2d/cgls.cu
@@ -120,12 +120,12 @@ bool CGLS::iterate(unsigned int iterations)
if (!sliceInitialized) {
// copy sinogram
- cudaMemcpy2D(D_r, sizeof(float)*rPitch, D_sinoData, sizeof(float)*sinoPitch, sizeof(float)*(dims.iProjDets), dims.iProjAngles, cudaMemcpyDeviceToDevice);
+ duplicateProjectionData(D_r, D_sinoData, sinoPitch, dims);
// r = sino - A*x
if (useVolumeMask) {
// Use z as temporary storage here since it is unused
- cudaMemcpy2D(D_z, sizeof(float)*zPitch, D_volumeData, sizeof(float)*volumePitch, sizeof(float)*(dims.iVolWidth), dims.iVolHeight, cudaMemcpyDeviceToDevice);
+ duplicateVolumeData(D_z, D_volumeData, volumePitch, dims);
processVol<opMul>(D_z, D_maskData, zPitch, dims);
callFP(D_z, zPitch, D_r, rPitch, -1.0f);
} else {
@@ -189,11 +189,11 @@ float CGLS::computeDiffNorm()
// used outside of iterations.
// copy sinogram to w
- cudaMemcpy2D(D_w, sizeof(float)*wPitch, D_sinoData, sizeof(float)*sinoPitch, sizeof(float)*(dims.iProjDets), dims.iProjAngles, cudaMemcpyDeviceToDevice);
+ duplicateProjectionData(D_w, D_sinoData, sinoPitch, dims);
// do FP, subtracting projection from sinogram
if (useVolumeMask) {
- cudaMemcpy2D(D_z, sizeof(float)*zPitch, D_volumeData, sizeof(float)*volumePitch, sizeof(float)*(dims.iVolWidth), dims.iVolHeight, cudaMemcpyDeviceToDevice);
+ duplicateVolumeData(D_z, D_volumeData, volumePitch, dims);
processVol<opMul>(D_z, D_maskData, zPitch, dims);
callFP(D_z, zPitch, D_w, wPitch, -1.0f);
} else {
diff --git a/cuda/2d/em.cu b/cuda/2d/em.cu
index c75f250..ebb76b5 100644
--- a/cuda/2d/em.cu
+++ b/cuda/2d/em.cu
@@ -150,11 +150,11 @@ bool EM::iterate(unsigned int iterations)
float EM::computeDiffNorm()
{
// copy sinogram to projection data
- cudaMemcpy2D(D_projData, sizeof(float)*projPitch, D_sinoData, sizeof(float)*sinoPitch, sizeof(float)*(dims.iProjDets), dims.iProjAngles, cudaMemcpyDeviceToDevice);
+ duplicateProjectionData(D_projData, D_sinoData, sinoPitch, dims);
// do FP, subtracting projection from sinogram
if (useVolumeMask) {
- cudaMemcpy2D(D_tmpData, sizeof(float)*tmpPitch, D_volumeData, sizeof(float)*volumePitch, sizeof(float)*(dims.iVolWidth), dims.iVolHeight, cudaMemcpyDeviceToDevice);
+ duplicateVolumeData(D_tmpData, D_volumeData, volumePitch, dims);
processVol<opMul>(D_tmpData, D_maskData, tmpPitch, dims);
callFP(D_tmpData, tmpPitch, D_projData, projPitch, -1.0f);
} else {
diff --git a/cuda/2d/sart.cu b/cuda/2d/sart.cu
index 048661f..64d6f28 100644
--- a/cuda/2d/sart.cu
+++ b/cuda/2d/sart.cu
@@ -180,11 +180,11 @@ bool SART::iterate(unsigned int iterations)
}
// copy one line of sinogram to projection data
- cudaMemcpy2D(D_projData, sizeof(float)*projPitch, D_sinoData + angle*sinoPitch, sizeof(float)*sinoPitch, sizeof(float)*(dims.iProjDets), 1, cudaMemcpyDeviceToDevice);
+ duplicateProjectionData(D_projData, D_sinoData, sinoPitch, dims);
// do FP, subtracting projection from sinogram
if (useVolumeMask) {
- cudaMemcpy2D(D_tmpData, sizeof(float)*tmpPitch, D_volumeData, sizeof(float)*volumePitch, sizeof(float)*(dims.iVolWidth), dims.iVolHeight, cudaMemcpyDeviceToDevice);
+ duplicateVolumeData(D_tmpData, D_volumeData, volumePitch, dims);
processVol<opMul>(D_tmpData, D_maskData, tmpPitch, dims);
callFP_SART(D_tmpData, tmpPitch, D_projData, projPitch, angle, -1.0f);
} else {
@@ -223,11 +223,11 @@ float SART::computeDiffNorm()
zeroProjectionData(D_p, pPitch, dims);
// copy sinogram to D_p
- cudaMemcpy2D(D_p, sizeof(float)*pPitch, D_sinoData, sizeof(float)*sinoPitch, sizeof(float)*(dims.iProjDets), dims.iProjAngles, cudaMemcpyDeviceToDevice);
+ duplicateProjectionData(D_p, D_sinoData, sinoPitch, dims);
// do FP, subtracting projection from sinogram
if (useVolumeMask) {
- cudaMemcpy2D(D_tmpData, sizeof(float)*tmpPitch, D_volumeData, sizeof(float)*volumePitch, sizeof(float)*(dims.iVolWidth), dims.iVolHeight, cudaMemcpyDeviceToDevice);
+ duplicateVolumeData(D_tmpData, D_volumeData, volumePitch, dims);
processVol<opMul>(D_tmpData, D_maskData, tmpPitch, dims);
callFP(D_tmpData, tmpPitch, D_projData, projPitch, -1.0f);
} else {
diff --git a/cuda/2d/sirt.cu b/cuda/2d/sirt.cu
index c402864..d34a180 100644
--- a/cuda/2d/sirt.cu
+++ b/cuda/2d/sirt.cu
@@ -191,11 +191,11 @@ bool SIRT::iterate(unsigned int iterations)
for (unsigned int iter = 0; iter < iterations && !shouldAbort; ++iter) {
// copy sinogram to projection data
- cudaMemcpy2D(D_projData, sizeof(float)*projPitch, D_sinoData, sizeof(float)*sinoPitch, sizeof(float)*(dims.iProjDets), dims.iProjAngles, cudaMemcpyDeviceToDevice);
+ duplicateProjectionData(D_projData, D_sinoData, projPitch, dims);
// do FP, subtracting projection from sinogram
if (useVolumeMask) {
- cudaMemcpy2D(D_tmpData, sizeof(float)*tmpPitch, D_volumeData, sizeof(float)*volumePitch, sizeof(float)*(dims.iVolWidth), dims.iVolHeight, cudaMemcpyDeviceToDevice);
+ duplicateVolumeData(D_tmpData, D_volumeData, volumePitch, dims);
processVol<opMul>(D_tmpData, D_maskData, tmpPitch, dims);
callFP(D_tmpData, tmpPitch, D_projData, projPitch, -1.0f);
} else {
@@ -226,11 +226,11 @@ bool SIRT::iterate(unsigned int iterations)
float SIRT::computeDiffNorm()
{
// copy sinogram to projection data
- cudaMemcpy2D(D_projData, sizeof(float)*projPitch, D_sinoData, sizeof(float)*sinoPitch, sizeof(float)*(dims.iProjDets), dims.iProjAngles, cudaMemcpyDeviceToDevice);
+ duplicateProjectionData(D_projData, D_sinoData, projPitch, dims);
// do FP, subtracting projection from sinogram
if (useVolumeMask) {
- cudaMemcpy2D(D_tmpData, sizeof(float)*tmpPitch, D_volumeData, sizeof(float)*volumePitch, sizeof(float)*(dims.iVolWidth), dims.iVolHeight, cudaMemcpyDeviceToDevice);
+ duplicateVolumeData(D_tmpData, D_volumeData, volumePitch, dims);
processVol<opMul>(D_tmpData, D_maskData, tmpPitch, dims);
callFP(D_tmpData, tmpPitch, D_projData, projPitch, -1.0f);
} else {
diff --git a/cuda/2d/util.cu b/cuda/2d/util.cu
index 8d3b625..dba70d9 100644
--- a/cuda/2d/util.cu
+++ b/cuda/2d/util.cu
@@ -129,6 +129,15 @@ void zeroProjectionData(float* D_ptr, unsigned int pitch, const SDimensions& dim
zeroVolume(D_ptr, pitch, dims.iProjDets, dims.iProjAngles);
}
+void duplicateVolumeData(float* D_dst, float* D_src, unsigned int pitch, const SDimensions& dims)
+{
+ cudaMemcpy2D(D_dst, sizeof(float)*pitch, D_src, sizeof(float)*pitch, sizeof(float)*dims.iVolWidth, dims.iVolHeight, cudaMemcpyDeviceToDevice);
+}
+
+void duplicateProjectionData(float* D_dst, float* D_src, unsigned int pitch, const SDimensions& dims)
+{
+ cudaMemcpy2D(D_dst, sizeof(float)*pitch, D_src, sizeof(float)*pitch, sizeof(float)*dims.iProjDets, dims.iProjAngles, cudaMemcpyDeviceToDevice);
+}
template <unsigned int blockSize>
__global__ void reduce1D(float *g_idata, float *g_odata, unsigned int n)
@@ -206,7 +215,6 @@ __global__ void reduce2D(float *g_idata, float *g_odata,
float dotProduct2D(float* D_data, unsigned int pitch,
unsigned int width, unsigned int height)
{
-#warning FIX MEMORY ORDER
unsigned int bx = (width + 15) / 16;
unsigned int by = (height + 127) / 128;
unsigned int shared_mem2 = sizeof(float) * 16 * 16;
diff --git a/cuda/2d/util.h b/cuda/2d/util.h
index 83cb794..c0ec49e 100644
--- a/cuda/2d/util.h
+++ b/cuda/2d/util.h
@@ -80,6 +80,10 @@ bool allocateProjectionData(float*& D_ptr, unsigned int& pitch, const SDimension
void zeroVolumeData(float* D_ptr, unsigned int pitch, const SDimensions& dims);
void zeroProjectionData(float* D_ptr, unsigned int pitch, const SDimensions& dims);
+void duplicateVolumeData(float* D_dst, float* D_src, unsigned int pitch, const SDimensions& dims);
+void duplicateProjectionData(float* D_dst, float* D_src, unsigned int pitch, const SDimensions& dims);
+
+
bool cudaTextForceKernelsCompletion();
void reportCudaError(cudaError_t err);