13 files changed, 371 insertions, 298 deletions
diff --git a/Core/regularizers_CPU/FGP_TV_core.c b/Core/regularizers_CPU/FGP_TV_core.c
index 304848d..2f1439d 100644
--- a/Core/regularizers_CPU/FGP_TV_core.c
+++ b/Core/regularizers_CPU/FGP_TV_core.c
@@ -37,7 +37,7 @@ limitations under the License.
  * [1] Amir Beck and Marc Teboulle, "Fast Gradient-Based Algorithms for Constrained Total Variation Image Denoising and Deblurring Problems"
  */
  
-float FGP_TV_CPU(float *Input, float *Output, float lambda, int iter, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ)
+float TV_FGP_CPU(float *Input, float *Output, float lambda, int iter, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ)
 {
 	int ll, j, DimTotal;
 	float re, re1;
diff --git a/Core/regularizers_CPU/FGP_TV_core.h b/Core/regularizers_CPU/FGP_TV_core.h
index b591819..98ceaec 100644
--- a/Core/regularizers_CPU/FGP_TV_core.h
+++ b/Core/regularizers_CPU/FGP_TV_core.h
@@ -47,7 +47,7 @@ limitations under the License.
 #ifdef __cplusplus
 extern "C" {
 #endif
-CCPI_EXPORT float FGP_TV_CPU(float *Input, float *Output, float lambda, int iter, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ);
+CCPI_EXPORT float TV_FGP_CPU(float *Input, float *Output, float lambda, int iter, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ);
 
 CCPI_EXPORT float Obj_func2D(float *A, float *D, float *R1, float *R2, float lambda, int dimX, int dimY);
 CCPI_EXPORT float Grad_func2D(float *P1, float *P2, float *D, float *R1, float *R2, float lambda, int dimX, int dimY);
diff --git a/Core/regularizers_CPU/ROF_TV_core.c b/Core/regularizers_CPU/ROF_TV_core.c
index fd47c3f..b2c6f00 100644
--- a/Core/regularizers_CPU/ROF_TV_core.c
+++ b/Core/regularizers_CPU/ROF_TV_core.c
@@ -46,7 +46,7 @@ int sign(float x) {
  */
 
 /* Running iterations of TV-ROF function */
-float TV_ROF(float *Input, float *Output, int dimX, int dimY, int dimZ, int iterationsNumb, float tau, float lambda)
+float TV_ROF_CPU(float *Input, float *Output, int dimX, int dimY, int dimZ, int iterationsNumb, float tau, float lambda)
 {
     float *D1, *D2, *D3;
     int i, DimTotal;
diff --git a/Core/regularizers_CPU/ROF_TV_core.h b/Core/regularizers_CPU/ROF_TV_core.h
index 5d69d27..b32d0d5 100644
--- a/Core/regularizers_CPU/ROF_TV_core.h
+++ b/Core/regularizers_CPU/ROF_TV_core.h
@@ -47,7 +47,7 @@ limitations under the License.
 extern "C" {
 #endif
 CCPI_EXPORT float TV_kernel(float *D1, float *D2, float *D3, float *B, float *A, float lambda, float tau, int dimY, int dimX, int dimZ);
-CCPI_EXPORT float TV_ROF(float *Input, float *Output, int dimX, int dimY, int dimZ, int iterationsNumb, float tau, float lambda);
+CCPI_EXPORT float TV_ROF_CPU(float *Input, float *Output, int dimX, int dimY, int dimZ, int iterationsNumb, float tau, float lambda);
 CCPI_EXPORT float D1_func(float *A, float *D1, int dimY, int dimX, int dimZ);
 CCPI_EXPORT float D2_func(float *A, float *D2, int dimY, int dimX, int dimZ);
 CCPI_EXPORT float D3_func(float *A, float *D3, int dimY, int dimX, int dimZ);
diff --git a/Core/regularizers_GPU/TV_FGP/FGP_TV_GPU_core.cu b/Core/regularizers_GPU/TV_FGP/TV_FGP_GPU_core.cu
index 21a95c9..0533a85 100755
--- a/Core/regularizers_GPU/TV_FGP/FGP_TV_GPU_core.cu
+++ b/Core/regularizers_GPU/TV_FGP/TV_FGP_GPU_core.cu
@@ -17,7 +17,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 */ 
 
-#include "FGP_TV_GPU_core.h"
+#include "TV_FGP_GPU_core.h"
 #include <thrust/device_vector.h>
 #include <thrust/transform_reduce.h>
 
@@ -341,7 +341,7 @@ __global__ void copy_kernel3D(float *Input, float* Output, int N, int M, int Z,
 /*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
 
 ////////////MAIN HOST FUNCTION ///////////////
-extern "C" void FGP_TV_GPU(float *Input, float *Output, float lambda, int iter, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ)
+extern "C" void TV_FGP_GPU(float *Input, float *Output, float lambda, int iter, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ)
 {
     int deviceCount = -1; // number of devices
     cudaGetDeviceCount(&deviceCount);
diff --git a/Core/regularizers_GPU/TV_FGP/FGP_TV_GPU_core.h b/Core/regularizers_GPU/TV_FGP/TV_FGP_GPU_core.h
index a5d3f73..15c7120 100755
--- a/Core/regularizers_GPU/TV_FGP/FGP_TV_GPU_core.h
+++ b/Core/regularizers_GPU/TV_FGP/TV_FGP_GPU_core.h
@@ -2,9 +2,9 @@
 #include <stdlib.h>
 #include <memory.h>
 
-#ifndef _FGP_TV_GPU_
-#define _FGP_TV_GPU_
+#ifndef _TV_FGP_GPU_
+#define _TV_FGP_GPU_
 
-extern "C" void FGP_TV_GPU(float *Input, float *Output, float lambda, int iter, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ);   
+extern "C" void TV_FGP_GPU(float *Input, float *Output, float lambda, int iter, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ);   
 
 #endif 
diff --git a/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU.h b/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU.h
deleted file mode 100755
index 2938d2f..0000000
--- a/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef __TVGPU_H__
-#define __TVGPU_H__
-#include "CCPiDefines.h"
-#include <stdio.h>
-
-extern "C" CCPI_EXPORT void TV_ROF_GPU_kernel(float* Input, float* Output, int N, int M, int Z, int iter, float tau, float lambda);
-
-#endif 
diff --git a/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU.cu b/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU_core.cu
index b67b53b..480855f 100755
--- a/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU.cu
+++ b/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU_core.cu
@@ -17,7 +17,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 */ 
 
-#include "TV_ROF_GPU.h"
+#include "TV_ROF_GPU_core.h"
 
 /* C-OMP implementation of ROF-TV denoising/regularization model [1] (2D/3D case)
 *
diff --git a/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU_core.h b/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU_core.h
new file mode 100755
index 0000000..8b64d99
--- /dev/null
+++ b/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU_core.h
@@ -0,0 +1,8 @@
+#ifndef __TVGPU_H__
+#define __TVGPU_H__
+#include "CCPiDefines.h"
+#include <stdio.h>
+
+extern "C" CCPI_EXPORT void TV_ROF_GPU(float* Input, float* Output, int N, int M, int Z, int iter, float tau, float lambda);
+
+#endif 
diff --git a/Wrappers/Python/src/cpu_regularizers.cpp b/Wrappers/Python/src/cpu_regularizers.cpp
index e311570..43d5d11 100644
--- a/Wrappers/Python/src/cpu_regularizers.cpp
+++ b/Wrappers/Python/src/cpu_regularizers.cpp
@@ -27,7 +27,7 @@ limitations under the License.
 #include "boost/tuple/tuple.hpp"
 
 #include "SplitBregman_TV_core.h"
-#include "FGP_TV_core.h"
+//#include "FGP_TV_core.h"
 #include "LLT_model_core.h"
 #include "PatchBased_Regul_core.h"
 #include "TGV_PD_core.h"
@@ -305,289 +305,289 @@ bp::list SplitBregman_TV(np::ndarray input, double d_mu, int iter, double d_epsi
 
 
 
-bp::list FGP_TV(np::ndarray input, double d_mu, int iter, double d_epsil, int methTV) {
+//bp::list FGP_TV(np::ndarray input, double d_mu, int iter, double d_epsil, int methTV) {
 
-	// the result is in the following list
-	bp::list result;
+	//// the result is in the following list
+	//bp::list result;
 
-	int number_of_dims, dimX, dimY, dimZ, ll, j, count;
-	float *A, *D = NULL, *D_old = NULL, *P1 = NULL, *P2 = NULL, *P3 = NULL, *P1_old = NULL, *P2_old = NULL, *P3_old = NULL, *R1 = NULL, *R2 = NULL, *R3 = NULL;
-	float lambda, tk, tkp1, re, re1, re_old, epsil, funcval;
+	//int number_of_dims, dimX, dimY, dimZ, ll, j, count;
+	//float *A, *D = NULL, *D_old = NULL, *P1 = NULL, *P2 = NULL, *P3 = NULL, *P1_old = NULL, *P2_old = NULL, *P3_old = NULL, *R1 = NULL, *R2 = NULL, *R3 = NULL;
+	//float lambda, tk, tkp1, re, re1, re_old, epsil, funcval;
 
-	//number_of_dims = mxGetNumberOfDimensions(prhs[0]);
-	//dim_array = mxGetDimensions(prhs[0]);
-
-	number_of_dims = input.get_nd();
-	int dim_array[3];
+	////number_of_dims = mxGetNumberOfDimensions(prhs[0]);
+	////dim_array = mxGetDimensions(prhs[0]);
 
-	dim_array[0] = input.shape(0);
-	dim_array[1] = input.shape(1);
-	if (number_of_dims == 2) {
-		dim_array[2] = -1;
-	}
-	else {
-		dim_array[2] = input.shape(2);
-	}
-	// Parameter handling is be done in Python
-	///*Handling Matlab input data*/
-	//if ((nrhs < 2) || (nrhs > 5)) mexErrMsgTxt("At least 2 parameters is required: Image(2D/3D), Regularization parameter. The full list of parameters: Image(2D/3D), Regularization parameter, iterations number, tolerance, penalty type ('iso' or 'l1')");
-
-	///*Handling Matlab input data*/
-	//A = (float *)mxGetData(prhs[0]); /*noisy image (2D/3D) */
-	A = reinterpret_cast<float *>(input.get_data());
-
-	//mu = (float)mxGetScalar(prhs[1]); /* regularization parameter */
-	lambda = (float)d_mu;
-
-	//iter = 35; /* default iterations number */
+	//number_of_dims = input.get_nd();
+	//int dim_array[3];
 
-	//epsil = 0.0001; /* default tolerance constant */
-	epsil = (float)d_epsil;
-	//methTV = 0;  /* default isotropic TV penalty */
-	//if ((nrhs == 3) || (nrhs == 4) || (nrhs == 5))  iter = (int)mxGetScalar(prhs[2]); /* iterations number */
-	//if ((nrhs == 4) || (nrhs == 5))  epsil = (float)mxGetScalar(prhs[3]); /* tolerance constant */
-	//if (nrhs == 5) {
-	//	char *penalty_type;
-	//	penalty_type = mxArrayToString(prhs[4]); /* choosing TV penalty: 'iso' or 'l1', 'iso' is the default */
-	//	if ((strcmp(penalty_type, "l1") != 0) && (strcmp(penalty_type, "iso") != 0)) mexErrMsgTxt("Choose TV type: 'iso' or 'l1',");
-	//	if (strcmp(penalty_type, "l1") == 0)  methTV = 1;  /* enable 'l1' penalty */
-	//	mxFree(penalty_type);
+	//dim_array[0] = input.shape(0);
+	//dim_array[1] = input.shape(1);
+	//if (number_of_dims == 2) {
+		//dim_array[2] = -1;
 	//}
-	//if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) { mexErrMsgTxt("The input image must be in a single precision"); }
-
-	//plhs[1] = mxCreateNumericMatrix(1, 1, mxSINGLE_CLASS, mxREAL);
-	bp::tuple shape1 = bp::make_tuple(dim_array[0], dim_array[1]);
-	np::dtype dtype = np::dtype::get_builtin<float>();
-	np::ndarray out1 = np::zeros(shape1, dtype);
+	//else {
+		//dim_array[2] = input.shape(2);
+	//}
+	//// Parameter handling is be done in Python
+	/////*Handling Matlab input data*/
+	////if ((nrhs < 2) || (nrhs > 5)) mexErrMsgTxt("At least 2 parameters is required: Image(2D/3D), Regularization parameter. The full list of parameters: Image(2D/3D), Regularization parameter, iterations number, tolerance, penalty type ('iso' or 'l1')");
+
+	/////*Handling Matlab input data*/
+	////A = (float *)mxGetData(prhs[0]); /*noisy image (2D/3D) */
+	//A = reinterpret_cast<float *>(input.get_data());
+
+	////mu = (float)mxGetScalar(prhs[1]); /* regularization parameter */
+	//lambda = (float)d_mu;
+
+	////iter = 35; /* default iterations number */
+
+	////epsil = 0.0001; /* default tolerance constant */
+	//epsil = (float)d_epsil;
+	////methTV = 0;  /* default isotropic TV penalty */
+	////if ((nrhs == 3) || (nrhs == 4) || (nrhs == 5))  iter = (int)mxGetScalar(prhs[2]); /* iterations number */
+	////if ((nrhs == 4) || (nrhs == 5))  epsil = (float)mxGetScalar(prhs[3]); /* tolerance constant */
+	////if (nrhs == 5) {
+	////	char *penalty_type;
+	////	penalty_type = mxArrayToString(prhs[4]); /* choosing TV penalty: 'iso' or 'l1', 'iso' is the default */
+	////	if ((strcmp(penalty_type, "l1") != 0) && (strcmp(penalty_type, "iso") != 0)) mexErrMsgTxt("Choose TV type: 'iso' or 'l1',");
+	////	if (strcmp(penalty_type, "l1") == 0)  methTV = 1;  /* enable 'l1' penalty */
+	////	mxFree(penalty_type);
+	////}
+	////if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) { mexErrMsgTxt("The input image must be in a single precision"); }
+
+	////plhs[1] = mxCreateNumericMatrix(1, 1, mxSINGLE_CLASS, mxREAL);
+	//bp::tuple shape1 = bp::make_tuple(dim_array[0], dim_array[1]);
+	//np::dtype dtype = np::dtype::get_builtin<float>();
+	//np::ndarray out1 = np::zeros(shape1, dtype);
 	
-	//float *funcvalA = (float *)mxGetData(plhs[1]);
-	float * funcvalA = reinterpret_cast<float *>(out1.get_data());
-	//if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) { mexErrMsgTxt("The input image must be in a single precision"); }
-
-	/*Handling Matlab output data*/
-	dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2];
-
-	tk = 1.0f;
-	tkp1 = 1.0f;
-	count = 1;
-	re_old = 0.0f;
-
-	if (number_of_dims == 2) {
-		dimZ = 1; /*2D case*/
-		/*D = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-		D_old = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-		P1 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-		P2 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-		P1_old = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-		P2_old = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-		R1 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-		R2 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));*/
-
-		bp::tuple shape = bp::make_tuple(dim_array[0], dim_array[1]);
-		np::dtype dtype = np::dtype::get_builtin<float>();
-
-
-		np::ndarray npD      = np::zeros(shape, dtype);
-		np::ndarray npD_old  = np::zeros(shape, dtype);
-		np::ndarray npP1     = np::zeros(shape, dtype);
-		np::ndarray npP2     = np::zeros(shape, dtype);
-		np::ndarray npP1_old = np::zeros(shape, dtype);
-		np::ndarray npP2_old = np::zeros(shape, dtype);
-		np::ndarray npR1     = np::zeros(shape, dtype);
-		np::ndarray npR2     = np::zeros(shape, dtype);
-
-		D      = reinterpret_cast<float *>(npD.get_data());
-		D_old  = reinterpret_cast<float *>(npD_old.get_data());
-		P1     = reinterpret_cast<float *>(npP1.get_data());
-		P2     = reinterpret_cast<float *>(npP2.get_data());
-		P1_old = reinterpret_cast<float *>(npP1_old.get_data());
-		P2_old = reinterpret_cast<float *>(npP2_old.get_data());
-		R1     = reinterpret_cast<float *>(npR1.get_data());
-		R2     = reinterpret_cast<float *>(npR2.get_data());
-
-		/* begin iterations */
-		for (ll = 0; ll<iter; ll++) {
-			/* computing the gradient of the objective function */
-			Obj_func2D(A, D, R1, R2, lambda, dimX, dimY);
-
-			/*Taking a step towards minus of the gradient*/
-			Grad_func2D(P1, P2, D, R1, R2, lambda, dimX, dimY);
+	////float *funcvalA = (float *)mxGetData(plhs[1]);
+	//float * funcvalA = reinterpret_cast<float *>(out1.get_data());
+	////if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) { mexErrMsgTxt("The input image must be in a single precision"); }
+
+	///*Handling Matlab output data*/
+	//dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2];
+
+	//tk = 1.0f;
+	//tkp1 = 1.0f;
+	//count = 1;
+	//re_old = 0.0f;
+
+	//if (number_of_dims == 2) {
+		//dimZ = 1; /*2D case*/
+		///*D = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+		//D_old = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+		//P1 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+		//P2 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+		//P1_old = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+		//P2_old = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+		//R1 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+		//R2 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));*/
+
+		//bp::tuple shape = bp::make_tuple(dim_array[0], dim_array[1]);
+		//np::dtype dtype = np::dtype::get_builtin<float>();
+
+
+		//np::ndarray npD      = np::zeros(shape, dtype);
+		//np::ndarray npD_old  = np::zeros(shape, dtype);
+		//np::ndarray npP1     = np::zeros(shape, dtype);
+		//np::ndarray npP2     = np::zeros(shape, dtype);
+		//np::ndarray npP1_old = np::zeros(shape, dtype);
+		//np::ndarray npP2_old = np::zeros(shape, dtype);
+		//np::ndarray npR1     = np::zeros(shape, dtype);
+		//np::ndarray npR2     = np::zeros(shape, dtype);
+
+		//D      = reinterpret_cast<float *>(npD.get_data());
+		//D_old  = reinterpret_cast<float *>(npD_old.get_data());
+		//P1     = reinterpret_cast<float *>(npP1.get_data());
+		//P2     = reinterpret_cast<float *>(npP2.get_data());
+		//P1_old = reinterpret_cast<float *>(npP1_old.get_data());
+		//P2_old = reinterpret_cast<float *>(npP2_old.get_data());
+		//R1     = reinterpret_cast<float *>(npR1.get_data());
+		//R2     = reinterpret_cast<float *>(npR2.get_data());
+
+		///* begin iterations */
+		//for (ll = 0; ll<iter; ll++) {
+			///* computing the gradient of the objective function */
+			//Obj_func2D(A, D, R1, R2, lambda, dimX, dimY);
+
+			///*Taking a step towards minus of the gradient*/
+			//Grad_func2D(P1, P2, D, R1, R2, lambda, dimX, dimY);
   
-            /* projection step */
-            Proj_func2D(P1, P2, methTV, dimX, dimY);
+            ///* projection step */
+            //Proj_func2D(P1, P2, methTV, dimX, dimY);
             
-            /*updating R and t*/
-            tkp1 = (1.0f + sqrt(1.0f + 4.0f*tk*tk))*0.5f;
-            Rupd_func2D(P1, P1_old, P2, P2_old, R1, R2, tkp1, tk, dimX, dimY);
-
-			/* calculate norm */
-			re = 0.0f; re1 = 0.0f;
-			for (j = 0; j<dimX*dimY*dimZ; j++)
-			{
-				re += pow(D[j] - D_old[j], 2);
-				re1 += pow(D[j], 2);
-			}
-			re = sqrt(re) / sqrt(re1);
-			if (re < epsil)  count++;
-			if (count > 4) {
-				Obj_func2D(A, D, P1, P2, lambda, dimX, dimY);
-				funcval = 0.0f;
-				for (j = 0; j<dimX*dimY*dimZ; j++) funcval += pow(D[j], 2);
-				//funcvalA[0] = sqrt(funcval);
-				float fv = sqrt(funcval);
-				std::memcpy(funcvalA, &fv, sizeof(float));
-				break;
-			}
-
-			/* check that the residual norm is decreasing */
-			if (ll > 2) {
-				if (re > re_old) {
-					Obj_func2D(A, D, P1, P2, lambda, dimX, dimY);
-					funcval = 0.0f;
-					for (j = 0; j<dimX*dimY*dimZ; j++) funcval += pow(D[j], 2);
-					//funcvalA[0] = sqrt(funcval);
-					float fv = sqrt(funcval);
-					std::memcpy(funcvalA, &fv, sizeof(float));
-					break;
-				}
-			}
-			re_old = re;
-			/*printf("%f %i %i \n", re, ll, count); */
-
-			/*storing old values*/
-			copyIm(D, D_old, dimX, dimY, dimZ);
-			copyIm(P1, P1_old, dimX, dimY, dimZ);
-			copyIm(P2, P2_old, dimX, dimY, dimZ);
-			tk = tkp1;
-
-			/* calculating the objective function value */
-			if (ll == (iter - 1)) {
-				Obj_func2D(A, D, P1, P2, lambda, dimX, dimY);
-				funcval = 0.0f;
-				for (j = 0; j<dimX*dimY*dimZ; j++) funcval += pow(D[j], 2);
-				//funcvalA[0] = sqrt(funcval);
-				float fv = sqrt(funcval);
-				std::memcpy(funcvalA, &fv, sizeof(float));
-			}
-		}
-		//printf("FGP-TV iterations stopped at iteration %i with the function value %f \n", ll, funcvalA[0]);
-		result.append<np::ndarray>(npD);
-		result.append<np::ndarray>(out1);
-		result.append<int>(ll);
-	}
-	if (number_of_dims == 3) {
-		/*D = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-		D_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-		P1 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-		P2 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-		P3 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-		P1_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-		P2_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-		P3_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-		R1 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-		R2 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-		R3 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));*/
-		bp::tuple shape = bp::make_tuple(dim_array[0], dim_array[1], dim_array[2]);
-		np::dtype dtype = np::dtype::get_builtin<float>();
+            ///*updating R and t*/
+            //tkp1 = (1.0f + sqrt(1.0f + 4.0f*tk*tk))*0.5f;
+            //Rupd_func2D(P1, P1_old, P2, P2_old, R1, R2, tkp1, tk, dimX, dimY);
+
+			///* calculate norm */
+			//re = 0.0f; re1 = 0.0f;
+			//for (j = 0; j<dimX*dimY*dimZ; j++)
+			//{
+				//re += pow(D[j] - D_old[j], 2);
+				//re1 += pow(D[j], 2);
+			//}
+			//re = sqrt(re) / sqrt(re1);
+			//if (re < epsil)  count++;
+			//if (count > 4) {
+				//Obj_func2D(A, D, P1, P2, lambda, dimX, dimY);
+				//funcval = 0.0f;
+				//for (j = 0; j<dimX*dimY*dimZ; j++) funcval += pow(D[j], 2);
+				////funcvalA[0] = sqrt(funcval);
+				//float fv = sqrt(funcval);
+				//std::memcpy(funcvalA, &fv, sizeof(float));
+				//break;
+			//}
+
+			///* check that the residual norm is decreasing */
+			//if (ll > 2) {
+				//if (re > re_old) {
+					//Obj_func2D(A, D, P1, P2, lambda, dimX, dimY);
+					//funcval = 0.0f;
+					//for (j = 0; j<dimX*dimY*dimZ; j++) funcval += pow(D[j], 2);
+					////funcvalA[0] = sqrt(funcval);
+					//float fv = sqrt(funcval);
+					//std::memcpy(funcvalA, &fv, sizeof(float));
+					//break;
+				//}
+			//}
+			//re_old = re;
+			///*printf("%f %i %i \n", re, ll, count); */
+
+			///*storing old values*/
+			//copyIm(D, D_old, dimX, dimY, dimZ);
+			//copyIm(P1, P1_old, dimX, dimY, dimZ);
+			//copyIm(P2, P2_old, dimX, dimY, dimZ);
+			//tk = tkp1;
+
+			///* calculating the objective function value */
+			//if (ll == (iter - 1)) {
+				//Obj_func2D(A, D, P1, P2, lambda, dimX, dimY);
+				//funcval = 0.0f;
+				//for (j = 0; j<dimX*dimY*dimZ; j++) funcval += pow(D[j], 2);
+				////funcvalA[0] = sqrt(funcval);
+				//float fv = sqrt(funcval);
+				//std::memcpy(funcvalA, &fv, sizeof(float));
+			//}
+		//}
+		////printf("FGP-TV iterations stopped at iteration %i with the function value %f \n", ll, funcvalA[0]);
+		//result.append<np::ndarray>(npD);
+		//result.append<np::ndarray>(out1);
+		//result.append<int>(ll);
+	//}
+	//if (number_of_dims == 3) {
+		///*D = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+		//D_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+		//P1 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+		//P2 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+		//P3 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+		//P1_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+		//P2_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+		//P3_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+		//R1 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+		//R2 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+		//R3 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));*/
+		//bp::tuple shape = bp::make_tuple(dim_array[0], dim_array[1], dim_array[2]);
+		//np::dtype dtype = np::dtype::get_builtin<float>();
 		
-		np::ndarray npD      = np::zeros(shape, dtype);
-		np::ndarray npD_old  = np::zeros(shape, dtype);
-		np::ndarray npP1     = np::zeros(shape, dtype);
-		np::ndarray npP2     = np::zeros(shape, dtype);
-		np::ndarray npP3     = np::zeros(shape, dtype);
-		np::ndarray npP1_old = np::zeros(shape, dtype);
-		np::ndarray npP2_old = np::zeros(shape, dtype);
-		np::ndarray npP3_old = np::zeros(shape, dtype);
-		np::ndarray npR1     = np::zeros(shape, dtype);
-		np::ndarray npR2     = np::zeros(shape, dtype);
-		np::ndarray npR3     = np::zeros(shape, dtype);
-
-		D      = reinterpret_cast<float *>(npD.get_data());
-		D_old  = reinterpret_cast<float *>(npD_old.get_data());
-		P1     = reinterpret_cast<float *>(npP1.get_data());
-		P2     = reinterpret_cast<float *>(npP2.get_data());
-		P3     = reinterpret_cast<float *>(npP3.get_data());
-		P1_old = reinterpret_cast<float *>(npP1_old.get_data());
-		P2_old = reinterpret_cast<float *>(npP2_old.get_data());
-		P3_old = reinterpret_cast<float *>(npP3_old.get_data());
-		R1     = reinterpret_cast<float *>(npR1.get_data());
-		R2     = reinterpret_cast<float *>(npR2.get_data());
-		R3     = reinterpret_cast<float *>(npR3.get_data());
-		/* begin iterations */
-		for (ll = 0; ll<iter; ll++) {
-			/* computing the gradient of the objective function */
-			Obj_func3D(A, D, R1, R2, R3, lambda, dimX, dimY, dimZ);
-			/*Taking a step towards minus of the gradient*/
-			Grad_func3D(P1, P2, P3, D, R1, R2, R3, lambda, dimX, dimY, dimZ);
-
-			/* projection step */
-			Proj_func3D(P1, P2, P3, dimX, dimY, dimZ);
-
-			/*updating R and t*/
-			tkp1 = (1.0f + sqrt(1.0f + 4.0f*tk*tk))*0.5f;
-			Rupd_func3D(P1, P1_old, P2, P2_old, P3, P3_old, R1, R2, R3, tkp1, tk, dimX, dimY, dimZ);
-
-			/* calculate norm - stopping rules*/
-			re = 0.0f; re1 = 0.0f;
-			for (j = 0; j<dimX*dimY*dimZ; j++)
-			{
-				re += pow(D[j] - D_old[j], 2);
-				re1 += pow(D[j], 2);
-			}
-			re = sqrt(re) / sqrt(re1);
-			/* stop if the norm residual is less than the tolerance EPS */
-			if (re < epsil)  count++;
-			if (count > 3) {
-				Obj_func3D(A, D, P1, P2, P3, lambda, dimX, dimY, dimZ);
-				funcval = 0.0f;
-				for (j = 0; j<dimX*dimY*dimZ; j++) funcval += pow(D[j], 2);
-				//funcvalA[0] = sqrt(funcval);
-				float fv = sqrt(funcval);
-				std::memcpy(funcvalA, &fv, sizeof(float));
-				break;
-			}
-
-			/* check that the residual norm is decreasing */
-			if (ll > 2) {
-				if (re > re_old) {
-					Obj_func3D(A, D, P1, P2, P3, lambda, dimX, dimY, dimZ);
-					funcval = 0.0f;
-					for (j = 0; j<dimX*dimY*dimZ; j++) funcval += pow(D[j], 2);
-					//funcvalA[0] = sqrt(funcval);
-					float fv = sqrt(funcval);
-					std::memcpy(funcvalA, &fv, sizeof(float));
-					break;
-				}
-			}
-
-			re_old = re;
-			/*printf("%f %i %i \n", re, ll, count); */
-
-			/*storing old values*/
-			copyIm(D, D_old, dimX, dimY, dimZ);
-			copyIm(P1, P1_old, dimX, dimY, dimZ);
-			copyIm(P2, P2_old, dimX, dimY, dimZ);
-			copyIm(P3, P3_old, dimX, dimY, dimZ);
-			tk = tkp1;
-
-			if (ll == (iter - 1)) {
-				Obj_func3D(A, D, P1, P2, P3, lambda, dimX, dimY, dimZ);
-				funcval = 0.0f;
-				for (j = 0; j<dimX*dimY*dimZ; j++) funcval += pow(D[j], 2);
-				//funcvalA[0] = sqrt(funcval);
-				float fv = sqrt(funcval);
-				std::memcpy(funcvalA, &fv, sizeof(float));
-			}
-
-		}
-		//printf("FGP-TV iterations stopped at iteration %i with the function value %f \n", ll, funcvalA[0]);
-		result.append<np::ndarray>(npD);
-		result.append<np::ndarray>(out1);
-		result.append<int>(ll);
-	}
+		//np::ndarray npD      = np::zeros(shape, dtype);
+		//np::ndarray npD_old  = np::zeros(shape, dtype);
+		//np::ndarray npP1     = np::zeros(shape, dtype);
+		//np::ndarray npP2     = np::zeros(shape, dtype);
+		//np::ndarray npP3     = np::zeros(shape, dtype);
+		//np::ndarray npP1_old = np::zeros(shape, dtype);
+		//np::ndarray npP2_old = np::zeros(shape, dtype);
+		//np::ndarray npP3_old = np::zeros(shape, dtype);
+		//np::ndarray npR1     = np::zeros(shape, dtype);
+		//np::ndarray npR2     = np::zeros(shape, dtype);
+		//np::ndarray npR3     = np::zeros(shape, dtype);
+
+		//D      = reinterpret_cast<float *>(npD.get_data());
+		//D_old  = reinterpret_cast<float *>(npD_old.get_data());
+		//P1     = reinterpret_cast<float *>(npP1.get_data());
+		//P2     = reinterpret_cast<float *>(npP2.get_data());
+		//P3     = reinterpret_cast<float *>(npP3.get_data());
+		//P1_old = reinterpret_cast<float *>(npP1_old.get_data());
+		//P2_old = reinterpret_cast<float *>(npP2_old.get_data());
+		//P3_old = reinterpret_cast<float *>(npP3_old.get_data());
+		//R1     = reinterpret_cast<float *>(npR1.get_data());
+		//R2     = reinterpret_cast<float *>(npR2.get_data());
+		//R3     = reinterpret_cast<float *>(npR3.get_data());
+		///* begin iterations */
+		//for (ll = 0; ll<iter; ll++) {
+			///* computing the gradient of the objective function */
+			//Obj_func3D(A, D, R1, R2, R3, lambda, dimX, dimY, dimZ);
+			///*Taking a step towards minus of the gradient*/
+			//Grad_func3D(P1, P2, P3, D, R1, R2, R3, lambda, dimX, dimY, dimZ);
+
+			///* projection step */
+			//Proj_func3D(P1, P2, P3, dimX, dimY, dimZ);
+
+			///*updating R and t*/
+			//tkp1 = (1.0f + sqrt(1.0f + 4.0f*tk*tk))*0.5f;
+			//Rupd_func3D(P1, P1_old, P2, P2_old, P3, P3_old, R1, R2, R3, tkp1, tk, dimX, dimY, dimZ);
+
+			///* calculate norm - stopping rules*/
+			//re = 0.0f; re1 = 0.0f;
+			//for (j = 0; j<dimX*dimY*dimZ; j++)
+			//{
+				//re += pow(D[j] - D_old[j], 2);
+				//re1 += pow(D[j], 2);
+			//}
+			//re = sqrt(re) / sqrt(re1);
+			///* stop if the norm residual is less than the tolerance EPS */
+			//if (re < epsil)  count++;
+			//if (count > 3) {
+				//Obj_func3D(A, D, P1, P2, P3, lambda, dimX, dimY, dimZ);
+				//funcval = 0.0f;
+				//for (j = 0; j<dimX*dimY*dimZ; j++) funcval += pow(D[j], 2);
+				////funcvalA[0] = sqrt(funcval);
+				//float fv = sqrt(funcval);
+				//std::memcpy(funcvalA, &fv, sizeof(float));
+				//break;
+			//}
+
+			///* check that the residual norm is decreasing */
+			//if (ll > 2) {
+				//if (re > re_old) {
+					//Obj_func3D(A, D, P1, P2, P3, lambda, dimX, dimY, dimZ);
+					//funcval = 0.0f;
+					//for (j = 0; j<dimX*dimY*dimZ; j++) funcval += pow(D[j], 2);
+					////funcvalA[0] = sqrt(funcval);
+					//float fv = sqrt(funcval);
+					//std::memcpy(funcvalA, &fv, sizeof(float));
+					//break;
+				//}
+			//}
+
+			//re_old = re;
+			///*printf("%f %i %i \n", re, ll, count); */
+
+			///*storing old values*/
+			//copyIm(D, D_old, dimX, dimY, dimZ);
+			//copyIm(P1, P1_old, dimX, dimY, dimZ);
+			//copyIm(P2, P2_old, dimX, dimY, dimZ);
+			//copyIm(P3, P3_old, dimX, dimY, dimZ);
+			//tk = tkp1;
+
+			//if (ll == (iter - 1)) {
+				//Obj_func3D(A, D, P1, P2, P3, lambda, dimX, dimY, dimZ);
+				//funcval = 0.0f;
+				//for (j = 0; j<dimX*dimY*dimZ; j++) funcval += pow(D[j], 2);
+				////funcvalA[0] = sqrt(funcval);
+				//float fv = sqrt(funcval);
+				//std::memcpy(funcvalA, &fv, sizeof(float));
+			//}
+
+		//}
+		////printf("FGP-TV iterations stopped at iteration %i with the function value %f \n", ll, funcvalA[0]);
+		//result.append<np::ndarray>(npD);
+		//result.append<np::ndarray>(out1);
+		//result.append<int>(ll);
+	//}
 
-	return result;
-}
+	//return result;
+//}
 
 bp::list LLT_model(np::ndarray input, double d_lambda, double d_tau, int iter, double d_epsil, int switcher) {
 	// the result is in the following list
diff --git a/Wrappers/Python/src/gpu_regularizers.pyx b/Wrappers/Python/src/gpu_regularizers.pyx
index c724471..263fa4a 100644
--- a/Wrappers/Python/src/gpu_regularizers.pyx
+++ b/Wrappers/Python/src/gpu_regularizers.pyx
@@ -25,7 +25,9 @@ cdef extern void NLM_GPU_kernel(float *A, float* B, float *Eucl_Vec,
                                 int N, int M,  int Z, int dimension, 
                                 int SearchW, int SimilW, 
                                 int SearchW_real, float denh2, float lambdaf);
-cdef extern void TV_ROF_GPU_kernel(float* A, float* B, int N, int M, int Z, int iter, float tau, float lambdaf);
+cdef extern void TV_ROF_GPU(float* Input, float* Output, int N, int M, int Z, int iter, float tau, float lambdaf);
+cdef extern void TV_FGP_GPU(float *Input, float *Output, float lambda, int iter, float epsil, int methodTV, int nonneg, int printM, int N, int M, int Z);
+
 cdef extern float pad_crop(float *A, float *Ap, 
                            int OldSizeX, int OldSizeY, int OldSizeZ, 
                            int NewSizeX, int NewSizeY, int NewSizeZ, 
@@ -343,7 +345,7 @@ def ROFTV2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData,
 		    np.zeros([dims[0],dims[1]], dtype='float32')
           
     # Running CUDA code here    
-    TV_ROF_GPU_kernel(            
+    TV_ROF_GPU(            
             &inputData[0,0], &B[0,0], 
                        dims[0], dims[1], 1, 
                        iterations , 
@@ -366,7 +368,7 @@ def ROFTV3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData,
 		    np.zeros([dims[0],dims[1],dims[2]], dtype='float32')
           
     # Running CUDA code here    
-    TV_ROF_GPU_kernel(            
+    TV_ROF_GPU(            
             &inputData[0,0,0], &B[0,0,0], 
                        dims[0], dims[1], dims[2], 
                        iterations , 
@@ -374,3 +376,64 @@ def ROFTV3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData,
                        regularization_parameter);   
      
     return B
+
+
+def TVFGP2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, 
+                     float regularization_parameter,
+                     int iterations, 
+                     float tolerance_param,
+                     int methodTV,
+                     int nonneg,
+                     int printM):
+    
+    cdef long dims[2]
+    dims[0] = inputData.shape[0]
+    dims[1] = inputData.shape[1]
+
+    cdef np.ndarray[np.float32_t, ndim=2, mode="c"] B = \
+		    np.zeros([dims[0],dims[1]], dtype='float32')
+          
+    # Running CUDA code here    
+    TV_FGP_GPU(            
+            &inputData[0,0], &B[0,0],                        
+                       regularization_parameter , 
+                       iterations, 
+                       tolerance_param,
+                       methodTV,
+                       nonneg,
+                       printM,
+                       dims[0], dims[1], 1);   
+     
+    return B
+    
+def TVFGP3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, 
+                     float regularization_parameter,
+                     int iterations, 
+                     float tolerance_param,
+                     int methodTV,
+                     int nonneg,
+                     int printM):
+    
+    cdef long dims[3]
+    dims[0] = inputData.shape[0]
+    dims[1] = inputData.shape[1]
+    dims[2] = inputData.shape[2]
+
+    cdef np.ndarray[np.float32_t, ndim=3, mode="c"] B = \
+		    np.zeros([dims[0],dims[1],dims[2]], dtype='float32')
+          
+    # Running CUDA code here    
+    TV_FGP_GPU(            
+            &inputData[0,0,0], &B[0,0,0], 
+                       regularization_parameter , 
+                       iterations, 
+                       tolerance_param,
+                       methodTV,
+                       nonneg,
+                       printM,
+                       dims[0], dims[1], dims[2]);   
+     
+    return B    
+    
+    
+    
diff --git a/Wrappers/Python/test/test_cpu_vs_gpu.py b/Wrappers/Python/test/test_cpu_vs_gpu.py
new file mode 100644
index 0000000..74d65dd
--- /dev/null
+++ b/Wrappers/Python/test/test_cpu_vs_gpu.py
@@ -0,0 +1,10 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Wed Feb 21 12:12:22 2018
+
+# CPU vs GPU comparison tests
+
+@author: algol
+"""
+
diff --git a/Wrappers/Python/test/test_cpu_vs_gpu_regularizers.py b/Wrappers/Python/test/test_cpu_vs_gpu_regularizers.py
index d742a7f..6344021 100644
--- a/Wrappers/Python/test/test_cpu_vs_gpu_regularizers.py
+++ b/Wrappers/Python/test/test_cpu_vs_gpu_regularizers.py
@@ -12,8 +12,8 @@ import matplotlib.pyplot as plt
 import numpy as np
 import os    
 import timeit
-from ccpi.filters.gpu_regularizers import Diff4thHajiaboli, NML, GPU_ROF_TV
-from ccpi.filters.cpu_regularizers_cython import ROF_TV
+from ccpi.filters.gpu_regularizers import Diff4thHajiaboli, NML, TV_ROF_GPU
+from ccpi.filters.cpu_regularizers_cython import TV_ROF_CPU
 ###############################################################################
 def printParametersToString(pars):
         txt = r''
@@ -64,7 +64,7 @@ pars = {'algorithm': ROF_TV , \
         }
 print ("#################ROF TV CPU#####################")
 start_time = timeit.default_timer()
-rof_cpu = ROF_TV(pars['input'],
+rof_cpu = TV_ROF_CPU(pars['input'],
              pars['number_of_iterations'],
              pars['regularization_parameter'],
              pars['time_marching_parameter'] 
@@ -89,7 +89,7 @@ plt.title('{}'.format('CPU results'))
 
 print ("#################ROF TV GPU#####################")
 start_time = timeit.default_timer()
-rof_gpu = GPU_ROF_TV(pars['input'], 
+rof_gpu = TV_ROF_GPU(pars['input'], 
                      pars['number_of_iterations'], 
                      pars['time_marching_parameter'], 
                      pars['regularization_parameter'])