diff options
-rw-r--r-- | Core/regularizers_CPU/FGP_TV_core.c | 2 | ||||
-rw-r--r-- | Core/regularizers_CPU/FGP_TV_core.h | 2 | ||||
-rw-r--r-- | Core/regularizers_CPU/ROF_TV_core.c | 2 | ||||
-rw-r--r-- | Core/regularizers_CPU/ROF_TV_core.h | 2 | ||||
-rwxr-xr-x | Core/regularizers_GPU/TV_FGP/TV_FGP_GPU_core.cu (renamed from Core/regularizers_GPU/TV_FGP/FGP_TV_GPU_core.cu) | 4 | ||||
-rwxr-xr-x | Core/regularizers_GPU/TV_FGP/TV_FGP_GPU_core.h (renamed from Core/regularizers_GPU/TV_FGP/FGP_TV_GPU_core.h) | 6 | ||||
-rwxr-xr-x | Core/regularizers_GPU/TV_ROF/TV_ROF_GPU.h | 8 | ||||
-rwxr-xr-x | Core/regularizers_GPU/TV_ROF/TV_ROF_GPU_core.cu (renamed from Core/regularizers_GPU/TV_ROF/TV_ROF_GPU.cu) | 2 | ||||
-rwxr-xr-x | Core/regularizers_GPU/TV_ROF/TV_ROF_GPU_core.h | 8 | ||||
-rw-r--r-- | Wrappers/Python/src/cpu_regularizers.cpp | 546 | ||||
-rw-r--r-- | Wrappers/Python/src/gpu_regularizers.pyx | 69 | ||||
-rw-r--r-- | Wrappers/Python/test/test_cpu_vs_gpu.py | 10 | ||||
-rw-r--r-- | Wrappers/Python/test/test_cpu_vs_gpu_regularizers.py | 8 |
13 files changed, 371 insertions, 298 deletions
diff --git a/Core/regularizers_CPU/FGP_TV_core.c b/Core/regularizers_CPU/FGP_TV_core.c index 304848d..2f1439d 100644 --- a/Core/regularizers_CPU/FGP_TV_core.c +++ b/Core/regularizers_CPU/FGP_TV_core.c @@ -37,7 +37,7 @@ limitations under the License. * [1] Amir Beck and Marc Teboulle, "Fast Gradient-Based Algorithms for Constrained Total Variation Image Denoising and Deblurring Problems" */ -float FGP_TV_CPU(float *Input, float *Output, float lambda, int iter, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ) +float TV_FGP_CPU(float *Input, float *Output, float lambda, int iter, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ) { int ll, j, DimTotal; float re, re1; diff --git a/Core/regularizers_CPU/FGP_TV_core.h b/Core/regularizers_CPU/FGP_TV_core.h index b591819..98ceaec 100644 --- a/Core/regularizers_CPU/FGP_TV_core.h +++ b/Core/regularizers_CPU/FGP_TV_core.h @@ -47,7 +47,7 @@ limitations under the License. #ifdef __cplusplus extern "C" { #endif -CCPI_EXPORT float FGP_TV_CPU(float *Input, float *Output, float lambda, int iter, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ); +CCPI_EXPORT float TV_FGP_CPU(float *Input, float *Output, float lambda, int iter, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ); CCPI_EXPORT float Obj_func2D(float *A, float *D, float *R1, float *R2, float lambda, int dimX, int dimY); CCPI_EXPORT float Grad_func2D(float *P1, float *P2, float *D, float *R1, float *R2, float lambda, int dimX, int dimY); diff --git a/Core/regularizers_CPU/ROF_TV_core.c b/Core/regularizers_CPU/ROF_TV_core.c index fd47c3f..b2c6f00 100644 --- a/Core/regularizers_CPU/ROF_TV_core.c +++ b/Core/regularizers_CPU/ROF_TV_core.c @@ -46,7 +46,7 @@ int sign(float x) { */ /* Running iterations of TV-ROF function */ -float TV_ROF(float *Input, float *Output, int dimX, int dimY, int dimZ, int iterationsNumb, float tau, float lambda) +float TV_ROF_CPU(float *Input, float *Output, int dimX, int dimY, int dimZ, int iterationsNumb, float tau, float lambda) { float *D1, *D2, *D3; int i, DimTotal; diff --git a/Core/regularizers_CPU/ROF_TV_core.h b/Core/regularizers_CPU/ROF_TV_core.h index 5d69d27..b32d0d5 100644 --- a/Core/regularizers_CPU/ROF_TV_core.h +++ b/Core/regularizers_CPU/ROF_TV_core.h @@ -47,7 +47,7 @@ limitations under the License. extern "C" { #endif CCPI_EXPORT float TV_kernel(float *D1, float *D2, float *D3, float *B, float *A, float lambda, float tau, int dimY, int dimX, int dimZ); -CCPI_EXPORT float TV_ROF(float *Input, float *Output, int dimX, int dimY, int dimZ, int iterationsNumb, float tau, float lambda); +CCPI_EXPORT float TV_ROF_CPU(float *Input, float *Output, int dimX, int dimY, int dimZ, int iterationsNumb, float tau, float lambda); CCPI_EXPORT float D1_func(float *A, float *D1, int dimY, int dimX, int dimZ); CCPI_EXPORT float D2_func(float *A, float *D2, int dimY, int dimX, int dimZ); CCPI_EXPORT float D3_func(float *A, float *D3, int dimY, int dimX, int dimZ); diff --git a/Core/regularizers_GPU/TV_FGP/FGP_TV_GPU_core.cu b/Core/regularizers_GPU/TV_FGP/TV_FGP_GPU_core.cu index 21a95c9..0533a85 100755 --- a/Core/regularizers_GPU/TV_FGP/FGP_TV_GPU_core.cu +++ b/Core/regularizers_GPU/TV_FGP/TV_FGP_GPU_core.cu @@ -17,7 +17,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -#include "FGP_TV_GPU_core.h" +#include "TV_FGP_GPU_core.h" #include <thrust/device_vector.h> #include <thrust/transform_reduce.h> @@ -341,7 +341,7 @@ __global__ void copy_kernel3D(float *Input, float* Output, int N, int M, int Z, /*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/ ////////////MAIN HOST FUNCTION /////////////// -extern "C" void FGP_TV_GPU(float *Input, float *Output, float lambda, int iter, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ) +extern "C" void TV_FGP_GPU(float *Input, float *Output, float lambda, int iter, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ) { int deviceCount = -1; // number of devices cudaGetDeviceCount(&deviceCount); diff --git a/Core/regularizers_GPU/TV_FGP/FGP_TV_GPU_core.h b/Core/regularizers_GPU/TV_FGP/TV_FGP_GPU_core.h index a5d3f73..15c7120 100755 --- a/Core/regularizers_GPU/TV_FGP/FGP_TV_GPU_core.h +++ b/Core/regularizers_GPU/TV_FGP/TV_FGP_GPU_core.h @@ -2,9 +2,9 @@ #include <stdlib.h> #include <memory.h> -#ifndef _FGP_TV_GPU_ -#define _FGP_TV_GPU_ +#ifndef _TV_FGP_GPU_ +#define _TV_FGP_GPU_ -extern "C" void FGP_TV_GPU(float *Input, float *Output, float lambda, int iter, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ); +extern "C" void TV_FGP_GPU(float *Input, float *Output, float lambda, int iter, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ); #endif diff --git a/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU.h b/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU.h deleted file mode 100755 index 2938d2f..0000000 --- a/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU.h +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef __TVGPU_H__ -#define __TVGPU_H__ -#include "CCPiDefines.h" -#include <stdio.h> - -extern "C" CCPI_EXPORT void TV_ROF_GPU_kernel(float* Input, float* Output, int N, int M, int Z, int iter, float tau, float lambda); - -#endif diff --git a/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU.cu b/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU_core.cu index b67b53b..480855f 100755 --- a/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU.cu +++ b/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU_core.cu @@ -17,7 +17,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -#include "TV_ROF_GPU.h" +#include "TV_ROF_GPU_core.h" /* C-OMP implementation of ROF-TV denoising/regularization model [1] (2D/3D case) * diff --git a/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU_core.h b/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU_core.h new file mode 100755 index 0000000..8b64d99 --- /dev/null +++ b/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU_core.h @@ -0,0 +1,8 @@ +#ifndef __TVGPU_H__ +#define __TVGPU_H__ +#include "CCPiDefines.h" +#include <stdio.h> + +extern "C" CCPI_EXPORT void TV_ROF_GPU(float* Input, float* Output, int N, int M, int Z, int iter, float tau, float lambda); + +#endif diff --git a/Wrappers/Python/src/cpu_regularizers.cpp b/Wrappers/Python/src/cpu_regularizers.cpp index e311570..43d5d11 100644 --- a/Wrappers/Python/src/cpu_regularizers.cpp +++ b/Wrappers/Python/src/cpu_regularizers.cpp @@ -27,7 +27,7 @@ limitations under the License. #include "boost/tuple/tuple.hpp" #include "SplitBregman_TV_core.h" -#include "FGP_TV_core.h" +//#include "FGP_TV_core.h" #include "LLT_model_core.h" #include "PatchBased_Regul_core.h" #include "TGV_PD_core.h" @@ -305,289 +305,289 @@ bp::list SplitBregman_TV(np::ndarray input, double d_mu, int iter, double d_epsi -bp::list FGP_TV(np::ndarray input, double d_mu, int iter, double d_epsil, int methTV) { +//bp::list FGP_TV(np::ndarray input, double d_mu, int iter, double d_epsil, int methTV) { - // the result is in the following list - bp::list result; + //// the result is in the following list + //bp::list result; - int number_of_dims, dimX, dimY, dimZ, ll, j, count; - float *A, *D = NULL, *D_old = NULL, *P1 = NULL, *P2 = NULL, *P3 = NULL, *P1_old = NULL, *P2_old = NULL, *P3_old = NULL, *R1 = NULL, *R2 = NULL, *R3 = NULL; - float lambda, tk, tkp1, re, re1, re_old, epsil, funcval; + //int number_of_dims, dimX, dimY, dimZ, ll, j, count; + //float *A, *D = NULL, *D_old = NULL, *P1 = NULL, *P2 = NULL, *P3 = NULL, *P1_old = NULL, *P2_old = NULL, *P3_old = NULL, *R1 = NULL, *R2 = NULL, *R3 = NULL; + //float lambda, tk, tkp1, re, re1, re_old, epsil, funcval; - //number_of_dims = mxGetNumberOfDimensions(prhs[0]); - //dim_array = mxGetDimensions(prhs[0]); - - number_of_dims = input.get_nd(); - int dim_array[3]; + ////number_of_dims = mxGetNumberOfDimensions(prhs[0]); + ////dim_array = mxGetDimensions(prhs[0]); - dim_array[0] = input.shape(0); - dim_array[1] = input.shape(1); - if (number_of_dims == 2) { - dim_array[2] = -1; - } - else { - dim_array[2] = input.shape(2); - } - // Parameter handling is be done in Python - ///*Handling Matlab input data*/ - //if ((nrhs < 2) || (nrhs > 5)) mexErrMsgTxt("At least 2 parameters is required: Image(2D/3D), Regularization parameter. The full list of parameters: Image(2D/3D), Regularization parameter, iterations number, tolerance, penalty type ('iso' or 'l1')"); - - ///*Handling Matlab input data*/ - //A = (float *)mxGetData(prhs[0]); /*noisy image (2D/3D) */ - A = reinterpret_cast<float *>(input.get_data()); - - //mu = (float)mxGetScalar(prhs[1]); /* regularization parameter */ - lambda = (float)d_mu; - - //iter = 35; /* default iterations number */ + //number_of_dims = input.get_nd(); + //int dim_array[3]; - //epsil = 0.0001; /* default tolerance constant */ - epsil = (float)d_epsil; - //methTV = 0; /* default isotropic TV penalty */ - //if ((nrhs == 3) || (nrhs == 4) || (nrhs == 5)) iter = (int)mxGetScalar(prhs[2]); /* iterations number */ - //if ((nrhs == 4) || (nrhs == 5)) epsil = (float)mxGetScalar(prhs[3]); /* tolerance constant */ - //if (nrhs == 5) { - // char *penalty_type; - // penalty_type = mxArrayToString(prhs[4]); /* choosing TV penalty: 'iso' or 'l1', 'iso' is the default */ - // if ((strcmp(penalty_type, "l1") != 0) && (strcmp(penalty_type, "iso") != 0)) mexErrMsgTxt("Choose TV type: 'iso' or 'l1',"); - // if (strcmp(penalty_type, "l1") == 0) methTV = 1; /* enable 'l1' penalty */ - // mxFree(penalty_type); + //dim_array[0] = input.shape(0); + //dim_array[1] = input.shape(1); + //if (number_of_dims == 2) { + //dim_array[2] = -1; //} - //if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) { mexErrMsgTxt("The input image must be in a single precision"); } - - //plhs[1] = mxCreateNumericMatrix(1, 1, mxSINGLE_CLASS, mxREAL); - bp::tuple shape1 = bp::make_tuple(dim_array[0], dim_array[1]); - np::dtype dtype = np::dtype::get_builtin<float>(); - np::ndarray out1 = np::zeros(shape1, dtype); + //else { + //dim_array[2] = input.shape(2); + //} + //// Parameter handling is be done in Python + /////*Handling Matlab input data*/ + ////if ((nrhs < 2) || (nrhs > 5)) mexErrMsgTxt("At least 2 parameters is required: Image(2D/3D), Regularization parameter. The full list of parameters: Image(2D/3D), Regularization parameter, iterations number, tolerance, penalty type ('iso' or 'l1')"); + + /////*Handling Matlab input data*/ + ////A = (float *)mxGetData(prhs[0]); /*noisy image (2D/3D) */ + //A = reinterpret_cast<float *>(input.get_data()); + + ////mu = (float)mxGetScalar(prhs[1]); /* regularization parameter */ + //lambda = (float)d_mu; + + ////iter = 35; /* default iterations number */ + + ////epsil = 0.0001; /* default tolerance constant */ + //epsil = (float)d_epsil; + ////methTV = 0; /* default isotropic TV penalty */ + ////if ((nrhs == 3) || (nrhs == 4) || (nrhs == 5)) iter = (int)mxGetScalar(prhs[2]); /* iterations number */ + ////if ((nrhs == 4) || (nrhs == 5)) epsil = (float)mxGetScalar(prhs[3]); /* tolerance constant */ + ////if (nrhs == 5) { + //// char *penalty_type; + //// penalty_type = mxArrayToString(prhs[4]); /* choosing TV penalty: 'iso' or 'l1', 'iso' is the default */ + //// if ((strcmp(penalty_type, "l1") != 0) && (strcmp(penalty_type, "iso") != 0)) mexErrMsgTxt("Choose TV type: 'iso' or 'l1',"); + //// if (strcmp(penalty_type, "l1") == 0) methTV = 1; /* enable 'l1' penalty */ + //// mxFree(penalty_type); + ////} + ////if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) { mexErrMsgTxt("The input image must be in a single precision"); } + + ////plhs[1] = mxCreateNumericMatrix(1, 1, mxSINGLE_CLASS, mxREAL); + //bp::tuple shape1 = bp::make_tuple(dim_array[0], dim_array[1]); + //np::dtype dtype = np::dtype::get_builtin<float>(); + //np::ndarray out1 = np::zeros(shape1, dtype); - //float *funcvalA = (float *)mxGetData(plhs[1]); - float * funcvalA = reinterpret_cast<float *>(out1.get_data()); - //if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) { mexErrMsgTxt("The input image must be in a single precision"); } - - /*Handling Matlab output data*/ - dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2]; - - tk = 1.0f; - tkp1 = 1.0f; - count = 1; - re_old = 0.0f; - - if (number_of_dims == 2) { - dimZ = 1; /*2D case*/ - /*D = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); - D_old = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); - P1 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); - P2 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); - P1_old = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); - P2_old = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); - R1 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); - R2 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));*/ - - bp::tuple shape = bp::make_tuple(dim_array[0], dim_array[1]); - np::dtype dtype = np::dtype::get_builtin<float>(); - - - np::ndarray npD = np::zeros(shape, dtype); - np::ndarray npD_old = np::zeros(shape, dtype); - np::ndarray npP1 = np::zeros(shape, dtype); - np::ndarray npP2 = np::zeros(shape, dtype); - np::ndarray npP1_old = np::zeros(shape, dtype); - np::ndarray npP2_old = np::zeros(shape, dtype); - np::ndarray npR1 = np::zeros(shape, dtype); - np::ndarray npR2 = np::zeros(shape, dtype); - - D = reinterpret_cast<float *>(npD.get_data()); - D_old = reinterpret_cast<float *>(npD_old.get_data()); - P1 = reinterpret_cast<float *>(npP1.get_data()); - P2 = reinterpret_cast<float *>(npP2.get_data()); - P1_old = reinterpret_cast<float *>(npP1_old.get_data()); - P2_old = reinterpret_cast<float *>(npP2_old.get_data()); - R1 = reinterpret_cast<float *>(npR1.get_data()); - R2 = reinterpret_cast<float *>(npR2.get_data()); - - /* begin iterations */ - for (ll = 0; ll<iter; ll++) { - /* computing the gradient of the objective function */ - Obj_func2D(A, D, R1, R2, lambda, dimX, dimY); - - /*Taking a step towards minus of the gradient*/ - Grad_func2D(P1, P2, D, R1, R2, lambda, dimX, dimY); + ////float *funcvalA = (float *)mxGetData(plhs[1]); + //float * funcvalA = reinterpret_cast<float *>(out1.get_data()); + ////if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) { mexErrMsgTxt("The input image must be in a single precision"); } + + ///*Handling Matlab output data*/ + //dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2]; + + //tk = 1.0f; + //tkp1 = 1.0f; + //count = 1; + //re_old = 0.0f; + + //if (number_of_dims == 2) { + //dimZ = 1; /*2D case*/ + ///*D = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); + //D_old = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); + //P1 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); + //P2 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); + //P1_old = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); + //P2_old = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); + //R1 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); + //R2 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));*/ + + //bp::tuple shape = bp::make_tuple(dim_array[0], dim_array[1]); + //np::dtype dtype = np::dtype::get_builtin<float>(); + + + //np::ndarray npD = np::zeros(shape, dtype); + //np::ndarray npD_old = np::zeros(shape, dtype); + //np::ndarray npP1 = np::zeros(shape, dtype); + //np::ndarray npP2 = np::zeros(shape, dtype); + //np::ndarray npP1_old = np::zeros(shape, dtype); + //np::ndarray npP2_old = np::zeros(shape, dtype); + //np::ndarray npR1 = np::zeros(shape, dtype); + //np::ndarray npR2 = np::zeros(shape, dtype); + + //D = reinterpret_cast<float *>(npD.get_data()); + //D_old = reinterpret_cast<float *>(npD_old.get_data()); + //P1 = reinterpret_cast<float *>(npP1.get_data()); + //P2 = reinterpret_cast<float *>(npP2.get_data()); + //P1_old = reinterpret_cast<float *>(npP1_old.get_data()); + //P2_old = reinterpret_cast<float *>(npP2_old.get_data()); + //R1 = reinterpret_cast<float *>(npR1.get_data()); + //R2 = reinterpret_cast<float *>(npR2.get_data()); + + ///* begin iterations */ + //for (ll = 0; ll<iter; ll++) { + ///* computing the gradient of the objective function */ + //Obj_func2D(A, D, R1, R2, lambda, dimX, dimY); + + ///*Taking a step towards minus of the gradient*/ + //Grad_func2D(P1, P2, D, R1, R2, lambda, dimX, dimY); - /* projection step */ - Proj_func2D(P1, P2, methTV, dimX, dimY); + ///* projection step */ + //Proj_func2D(P1, P2, methTV, dimX, dimY); - /*updating R and t*/ - tkp1 = (1.0f + sqrt(1.0f + 4.0f*tk*tk))*0.5f; - Rupd_func2D(P1, P1_old, P2, P2_old, R1, R2, tkp1, tk, dimX, dimY); - - /* calculate norm */ - re = 0.0f; re1 = 0.0f; - for (j = 0; j<dimX*dimY*dimZ; j++) - { - re += pow(D[j] - D_old[j], 2); - re1 += pow(D[j], 2); - } - re = sqrt(re) / sqrt(re1); - if (re < epsil) count++; - if (count > 4) { - Obj_func2D(A, D, P1, P2, lambda, dimX, dimY); - funcval = 0.0f; - for (j = 0; j<dimX*dimY*dimZ; j++) funcval += pow(D[j], 2); - //funcvalA[0] = sqrt(funcval); - float fv = sqrt(funcval); - std::memcpy(funcvalA, &fv, sizeof(float)); - break; - } - - /* check that the residual norm is decreasing */ - if (ll > 2) { - if (re > re_old) { - Obj_func2D(A, D, P1, P2, lambda, dimX, dimY); - funcval = 0.0f; - for (j = 0; j<dimX*dimY*dimZ; j++) funcval += pow(D[j], 2); - //funcvalA[0] = sqrt(funcval); - float fv = sqrt(funcval); - std::memcpy(funcvalA, &fv, sizeof(float)); - break; - } - } - re_old = re; - /*printf("%f %i %i \n", re, ll, count); */ - - /*storing old values*/ - copyIm(D, D_old, dimX, dimY, dimZ); - copyIm(P1, P1_old, dimX, dimY, dimZ); - copyIm(P2, P2_old, dimX, dimY, dimZ); - tk = tkp1; - - /* calculating the objective function value */ - if (ll == (iter - 1)) { - Obj_func2D(A, D, P1, P2, lambda, dimX, dimY); - funcval = 0.0f; - for (j = 0; j<dimX*dimY*dimZ; j++) funcval += pow(D[j], 2); - //funcvalA[0] = sqrt(funcval); - float fv = sqrt(funcval); - std::memcpy(funcvalA, &fv, sizeof(float)); - } - } - //printf("FGP-TV iterations stopped at iteration %i with the function value %f \n", ll, funcvalA[0]); - result.append<np::ndarray>(npD); - result.append<np::ndarray>(out1); - result.append<int>(ll); - } - if (number_of_dims == 3) { - /*D = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); - D_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); - P1 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); - P2 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); - P3 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); - P1_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); - P2_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); - P3_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); - R1 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); - R2 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); - R3 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));*/ - bp::tuple shape = bp::make_tuple(dim_array[0], dim_array[1], dim_array[2]); - np::dtype dtype = np::dtype::get_builtin<float>(); + ///*updating R and t*/ + //tkp1 = (1.0f + sqrt(1.0f + 4.0f*tk*tk))*0.5f; + //Rupd_func2D(P1, P1_old, P2, P2_old, R1, R2, tkp1, tk, dimX, dimY); + + ///* calculate norm */ + //re = 0.0f; re1 = 0.0f; + //for (j = 0; j<dimX*dimY*dimZ; j++) + //{ + //re += pow(D[j] - D_old[j], 2); + //re1 += pow(D[j], 2); + //} + //re = sqrt(re) / sqrt(re1); + //if (re < epsil) count++; + //if (count > 4) { + //Obj_func2D(A, D, P1, P2, lambda, dimX, dimY); + //funcval = 0.0f; + //for (j = 0; j<dimX*dimY*dimZ; j++) funcval += pow(D[j], 2); + ////funcvalA[0] = sqrt(funcval); + //float fv = sqrt(funcval); + //std::memcpy(funcvalA, &fv, sizeof(float)); + //break; + //} + + ///* check that the residual norm is decreasing */ + //if (ll > 2) { + //if (re > re_old) { + //Obj_func2D(A, D, P1, P2, lambda, dimX, dimY); + //funcval = 0.0f; + //for (j = 0; j<dimX*dimY*dimZ; j++) funcval += pow(D[j], 2); + ////funcvalA[0] = sqrt(funcval); + //float fv = sqrt(funcval); + //std::memcpy(funcvalA, &fv, sizeof(float)); + //break; + //} + //} + //re_old = re; + ///*printf("%f %i %i \n", re, ll, count); */ + + ///*storing old values*/ + //copyIm(D, D_old, dimX, dimY, dimZ); + //copyIm(P1, P1_old, dimX, dimY, dimZ); + //copyIm(P2, P2_old, dimX, dimY, dimZ); + //tk = tkp1; + + ///* calculating the objective function value */ + //if (ll == (iter - 1)) { + //Obj_func2D(A, D, P1, P2, lambda, dimX, dimY); + //funcval = 0.0f; + //for (j = 0; j<dimX*dimY*dimZ; j++) funcval += pow(D[j], 2); + ////funcvalA[0] = sqrt(funcval); + //float fv = sqrt(funcval); + //std::memcpy(funcvalA, &fv, sizeof(float)); + //} + //} + ////printf("FGP-TV iterations stopped at iteration %i with the function value %f \n", ll, funcvalA[0]); + //result.append<np::ndarray>(npD); + //result.append<np::ndarray>(out1); + //result.append<int>(ll); + //} + //if (number_of_dims == 3) { + ///*D = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); + //D_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); + //P1 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); + //P2 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); + //P3 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); + //P1_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); + //P2_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); + //P3_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); + //R1 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); + //R2 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); + //R3 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));*/ + //bp::tuple shape = bp::make_tuple(dim_array[0], dim_array[1], dim_array[2]); + //np::dtype dtype = np::dtype::get_builtin<float>(); - np::ndarray npD = np::zeros(shape, dtype); - np::ndarray npD_old = np::zeros(shape, dtype); - np::ndarray npP1 = np::zeros(shape, dtype); - np::ndarray npP2 = np::zeros(shape, dtype); - np::ndarray npP3 = np::zeros(shape, dtype); - np::ndarray npP1_old = np::zeros(shape, dtype); - np::ndarray npP2_old = np::zeros(shape, dtype); - np::ndarray npP3_old = np::zeros(shape, dtype); - np::ndarray npR1 = np::zeros(shape, dtype); - np::ndarray npR2 = np::zeros(shape, dtype); - np::ndarray npR3 = np::zeros(shape, dtype); - - D = reinterpret_cast<float *>(npD.get_data()); - D_old = reinterpret_cast<float *>(npD_old.get_data()); - P1 = reinterpret_cast<float *>(npP1.get_data()); - P2 = reinterpret_cast<float *>(npP2.get_data()); - P3 = reinterpret_cast<float *>(npP3.get_data()); - P1_old = reinterpret_cast<float *>(npP1_old.get_data()); - P2_old = reinterpret_cast<float *>(npP2_old.get_data()); - P3_old = reinterpret_cast<float *>(npP3_old.get_data()); - R1 = reinterpret_cast<float *>(npR1.get_data()); - R2 = reinterpret_cast<float *>(npR2.get_data()); - R3 = reinterpret_cast<float *>(npR3.get_data()); - /* begin iterations */ - for (ll = 0; ll<iter; ll++) { - /* computing the gradient of the objective function */ - Obj_func3D(A, D, R1, R2, R3, lambda, dimX, dimY, dimZ); - /*Taking a step towards minus of the gradient*/ - Grad_func3D(P1, P2, P3, D, R1, R2, R3, lambda, dimX, dimY, dimZ); - - /* projection step */ - Proj_func3D(P1, P2, P3, dimX, dimY, dimZ); - - /*updating R and t*/ - tkp1 = (1.0f + sqrt(1.0f + 4.0f*tk*tk))*0.5f; - Rupd_func3D(P1, P1_old, P2, P2_old, P3, P3_old, R1, R2, R3, tkp1, tk, dimX, dimY, dimZ); - - /* calculate norm - stopping rules*/ - re = 0.0f; re1 = 0.0f; - for (j = 0; j<dimX*dimY*dimZ; j++) - { - re += pow(D[j] - D_old[j], 2); - re1 += pow(D[j], 2); - } - re = sqrt(re) / sqrt(re1); - /* stop if the norm residual is less than the tolerance EPS */ - if (re < epsil) count++; - if (count > 3) { - Obj_func3D(A, D, P1, P2, P3, lambda, dimX, dimY, dimZ); - funcval = 0.0f; - for (j = 0; j<dimX*dimY*dimZ; j++) funcval += pow(D[j], 2); - //funcvalA[0] = sqrt(funcval); - float fv = sqrt(funcval); - std::memcpy(funcvalA, &fv, sizeof(float)); - break; - } - - /* check that the residual norm is decreasing */ - if (ll > 2) { - if (re > re_old) { - Obj_func3D(A, D, P1, P2, P3, lambda, dimX, dimY, dimZ); - funcval = 0.0f; - for (j = 0; j<dimX*dimY*dimZ; j++) funcval += pow(D[j], 2); - //funcvalA[0] = sqrt(funcval); - float fv = sqrt(funcval); - std::memcpy(funcvalA, &fv, sizeof(float)); - break; - } - } - - re_old = re; - /*printf("%f %i %i \n", re, ll, count); */ - - /*storing old values*/ - copyIm(D, D_old, dimX, dimY, dimZ); - copyIm(P1, P1_old, dimX, dimY, dimZ); - copyIm(P2, P2_old, dimX, dimY, dimZ); - copyIm(P3, P3_old, dimX, dimY, dimZ); - tk = tkp1; - - if (ll == (iter - 1)) { - Obj_func3D(A, D, P1, P2, P3, lambda, dimX, dimY, dimZ); - funcval = 0.0f; - for (j = 0; j<dimX*dimY*dimZ; j++) funcval += pow(D[j], 2); - //funcvalA[0] = sqrt(funcval); - float fv = sqrt(funcval); - std::memcpy(funcvalA, &fv, sizeof(float)); - } - - } - //printf("FGP-TV iterations stopped at iteration %i with the function value %f \n", ll, funcvalA[0]); - result.append<np::ndarray>(npD); - result.append<np::ndarray>(out1); - result.append<int>(ll); - } + //np::ndarray npD = np::zeros(shape, dtype); + //np::ndarray npD_old = np::zeros(shape, dtype); + //np::ndarray npP1 = np::zeros(shape, dtype); + //np::ndarray npP2 = np::zeros(shape, dtype); + //np::ndarray npP3 = np::zeros(shape, dtype); + //np::ndarray npP1_old = np::zeros(shape, dtype); + //np::ndarray npP2_old = np::zeros(shape, dtype); + //np::ndarray npP3_old = np::zeros(shape, dtype); + //np::ndarray npR1 = np::zeros(shape, dtype); + //np::ndarray npR2 = np::zeros(shape, dtype); + //np::ndarray npR3 = np::zeros(shape, dtype); + + //D = reinterpret_cast<float *>(npD.get_data()); + //D_old = reinterpret_cast<float *>(npD_old.get_data()); + //P1 = reinterpret_cast<float *>(npP1.get_data()); + //P2 = reinterpret_cast<float *>(npP2.get_data()); + //P3 = reinterpret_cast<float *>(npP3.get_data()); + //P1_old = reinterpret_cast<float *>(npP1_old.get_data()); + //P2_old = reinterpret_cast<float *>(npP2_old.get_data()); + //P3_old = reinterpret_cast<float *>(npP3_old.get_data()); + //R1 = reinterpret_cast<float *>(npR1.get_data()); + //R2 = reinterpret_cast<float *>(npR2.get_data()); + //R3 = reinterpret_cast<float *>(npR3.get_data()); + ///* begin iterations */ + //for (ll = 0; ll<iter; ll++) { + ///* computing the gradient of the objective function */ + //Obj_func3D(A, D, R1, R2, R3, lambda, dimX, dimY, dimZ); + ///*Taking a step towards minus of the gradient*/ + //Grad_func3D(P1, P2, P3, D, R1, R2, R3, lambda, dimX, dimY, dimZ); + + ///* projection step */ + //Proj_func3D(P1, P2, P3, dimX, dimY, dimZ); + + ///*updating R and t*/ + //tkp1 = (1.0f + sqrt(1.0f + 4.0f*tk*tk))*0.5f; + //Rupd_func3D(P1, P1_old, P2, P2_old, P3, P3_old, R1, R2, R3, tkp1, tk, dimX, dimY, dimZ); + + ///* calculate norm - stopping rules*/ + //re = 0.0f; re1 = 0.0f; + //for (j = 0; j<dimX*dimY*dimZ; j++) + //{ + //re += pow(D[j] - D_old[j], 2); + //re1 += pow(D[j], 2); + //} + //re = sqrt(re) / sqrt(re1); + ///* stop if the norm residual is less than the tolerance EPS */ + //if (re < epsil) count++; + //if (count > 3) { + //Obj_func3D(A, D, P1, P2, P3, lambda, dimX, dimY, dimZ); + //funcval = 0.0f; + //for (j = 0; j<dimX*dimY*dimZ; j++) funcval += pow(D[j], 2); + ////funcvalA[0] = sqrt(funcval); + //float fv = sqrt(funcval); + //std::memcpy(funcvalA, &fv, sizeof(float)); + //break; + //} + + ///* check that the residual norm is decreasing */ + //if (ll > 2) { + //if (re > re_old) { + //Obj_func3D(A, D, P1, P2, P3, lambda, dimX, dimY, dimZ); + //funcval = 0.0f; + //for (j = 0; j<dimX*dimY*dimZ; j++) funcval += pow(D[j], 2); + ////funcvalA[0] = sqrt(funcval); + //float fv = sqrt(funcval); + //std::memcpy(funcvalA, &fv, sizeof(float)); + //break; + //} + //} + + //re_old = re; + ///*printf("%f %i %i \n", re, ll, count); */ + + ///*storing old values*/ + //copyIm(D, D_old, dimX, dimY, dimZ); + //copyIm(P1, P1_old, dimX, dimY, dimZ); + //copyIm(P2, P2_old, dimX, dimY, dimZ); + //copyIm(P3, P3_old, dimX, dimY, dimZ); + //tk = tkp1; + + //if (ll == (iter - 1)) { + //Obj_func3D(A, D, P1, P2, P3, lambda, dimX, dimY, dimZ); + //funcval = 0.0f; + //for (j = 0; j<dimX*dimY*dimZ; j++) funcval += pow(D[j], 2); + ////funcvalA[0] = sqrt(funcval); + //float fv = sqrt(funcval); + //std::memcpy(funcvalA, &fv, sizeof(float)); + //} + + //} + ////printf("FGP-TV iterations stopped at iteration %i with the function value %f \n", ll, funcvalA[0]); + //result.append<np::ndarray>(npD); + //result.append<np::ndarray>(out1); + //result.append<int>(ll); + //} - return result; -} + //return result; +//} bp::list LLT_model(np::ndarray input, double d_lambda, double d_tau, int iter, double d_epsil, int switcher) { // the result is in the following list diff --git a/Wrappers/Python/src/gpu_regularizers.pyx b/Wrappers/Python/src/gpu_regularizers.pyx index c724471..263fa4a 100644 --- a/Wrappers/Python/src/gpu_regularizers.pyx +++ b/Wrappers/Python/src/gpu_regularizers.pyx @@ -25,7 +25,9 @@ cdef extern void NLM_GPU_kernel(float *A, float* B, float *Eucl_Vec, int N, int M, int Z, int dimension, int SearchW, int SimilW, int SearchW_real, float denh2, float lambdaf); -cdef extern void TV_ROF_GPU_kernel(float* A, float* B, int N, int M, int Z, int iter, float tau, float lambdaf); +cdef extern void TV_ROF_GPU(float* Input, float* Output, int N, int M, int Z, int iter, float tau, float lambdaf); +cdef extern void TV_FGP_GPU(float *Input, float *Output, float lambda, int iter, float epsil, int methodTV, int nonneg, int printM, int N, int M, int Z); + cdef extern float pad_crop(float *A, float *Ap, int OldSizeX, int OldSizeY, int OldSizeZ, int NewSizeX, int NewSizeY, int NewSizeZ, @@ -343,7 +345,7 @@ def ROFTV2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, np.zeros([dims[0],dims[1]], dtype='float32') # Running CUDA code here - TV_ROF_GPU_kernel( + TV_ROF_GPU( &inputData[0,0], &B[0,0], dims[0], dims[1], 1, iterations , @@ -366,7 +368,7 @@ def ROFTV3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, np.zeros([dims[0],dims[1],dims[2]], dtype='float32') # Running CUDA code here - TV_ROF_GPU_kernel( + TV_ROF_GPU( &inputData[0,0,0], &B[0,0,0], dims[0], dims[1], dims[2], iterations , @@ -374,3 +376,64 @@ def ROFTV3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, regularization_parameter); return B + + +def TVFGP2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, + float regularization_parameter, + int iterations, + float tolerance_param, + int methodTV, + int nonneg, + int printM): + + cdef long dims[2] + dims[0] = inputData.shape[0] + dims[1] = inputData.shape[1] + + cdef np.ndarray[np.float32_t, ndim=2, mode="c"] B = \ + np.zeros([dims[0],dims[1]], dtype='float32') + + # Running CUDA code here + TV_FGP_GPU( + &inputData[0,0], &B[0,0], + regularization_parameter , + iterations, + tolerance_param, + methodTV, + nonneg, + printM, + dims[0], dims[1], 1); + + return B + +def TVFGP3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, + float regularization_parameter, + int iterations, + float tolerance_param, + int methodTV, + int nonneg, + int printM): + + cdef long dims[3] + dims[0] = inputData.shape[0] + dims[1] = inputData.shape[1] + dims[2] = inputData.shape[2] + + cdef np.ndarray[np.float32_t, ndim=3, mode="c"] B = \ + np.zeros([dims[0],dims[1],dims[2]], dtype='float32') + + # Running CUDA code here + TV_FGP_GPU( + &inputData[0,0,0], &B[0,0,0], + regularization_parameter , + iterations, + tolerance_param, + methodTV, + nonneg, + printM, + dims[0], dims[1], dims[2]); + + return B + + + diff --git a/Wrappers/Python/test/test_cpu_vs_gpu.py b/Wrappers/Python/test/test_cpu_vs_gpu.py new file mode 100644 index 0000000..74d65dd --- /dev/null +++ b/Wrappers/Python/test/test_cpu_vs_gpu.py @@ -0,0 +1,10 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Wed Feb 21 12:12:22 2018 + +# CPU vs GPU comparison tests + +@author: algol +""" + diff --git a/Wrappers/Python/test/test_cpu_vs_gpu_regularizers.py b/Wrappers/Python/test/test_cpu_vs_gpu_regularizers.py index d742a7f..6344021 100644 --- a/Wrappers/Python/test/test_cpu_vs_gpu_regularizers.py +++ b/Wrappers/Python/test/test_cpu_vs_gpu_regularizers.py @@ -12,8 +12,8 @@ import matplotlib.pyplot as plt import numpy as np import os import timeit -from ccpi.filters.gpu_regularizers import Diff4thHajiaboli, NML, GPU_ROF_TV -from ccpi.filters.cpu_regularizers_cython import ROF_TV +from ccpi.filters.gpu_regularizers import Diff4thHajiaboli, NML, TV_ROF_GPU +from ccpi.filters.cpu_regularizers_cython import TV_ROF_CPU ############################################################################### def printParametersToString(pars): txt = r'' @@ -64,7 +64,7 @@ pars = {'algorithm': ROF_TV , \ } print ("#################ROF TV CPU#####################") start_time = timeit.default_timer() -rof_cpu = ROF_TV(pars['input'], +rof_cpu = TV_ROF_CPU(pars['input'], pars['number_of_iterations'], pars['regularization_parameter'], pars['time_marching_parameter'] @@ -89,7 +89,7 @@ plt.title('{}'.format('CPU results')) print ("#################ROF TV GPU#####################") start_time = timeit.default_timer() -rof_gpu = GPU_ROF_TV(pars['input'], +rof_gpu = TV_ROF_GPU(pars['input'], pars['number_of_iterations'], pars['time_marching_parameter'], pars['regularization_parameter']) |