From 22f6e22cbe6db04c6bbe8d259ce761e3748d7102 Mon Sep 17 00:00:00 2001
From: algol <dkazanc@hotmail.com>
Date: Thu, 12 Apr 2018 11:56:54 +0100
Subject: dTV some bugs in cython

---
 Core/regularisers_CPU/FGP_dTV_core.c               |  32 ++---
 Core/regularisers_CPU/FGP_dTV_core.h               |  16 +--
 Core/regularisers_GPU/dTV_FGP_GPU_core.cu          |  90 ++++++-------
 Wrappers/Python/ccpi/filters/regularisers.py       |   4 +-
 Wrappers/Python/conda-recipe/run_test.py           | 149 ---------------------
 Wrappers/Python/conda-recipe/run_test.py.in        | 148 ++++++++++++++++++++
 Wrappers/Python/conda-recipe/testLena.npy          | Bin 0 -> 1048656 bytes
 Wrappers/Python/demos/demo_cpu_regularisers.py     |   9 +-
 .../Python/demos/demo_cpu_vs_gpu_regularisers.py   |   2 +
 Wrappers/Python/demos/demo_gpu_regularisers.py     |   8 +-
 Wrappers/Python/src/cpu_regularisers.pyx           |   2 +-
 Wrappers/Python/src/gpu_regularisers.pyx           |   7 +-
 12 files changed, 236 insertions(+), 231 deletions(-)
 delete mode 100644 Wrappers/Python/conda-recipe/run_test.py
 create mode 100644 Wrappers/Python/conda-recipe/run_test.py.in
 create mode 100644 Wrappers/Python/conda-recipe/testLena.npy

diff --git a/Core/regularisers_CPU/FGP_dTV_core.c b/Core/regularisers_CPU/FGP_dTV_core.c
index b182d46..f6b4f79 100644
--- a/Core/regularisers_CPU/FGP_dTV_core.c
+++ b/Core/regularisers_CPU/FGP_dTV_core.c
@@ -75,20 +75,20 @@ float dTV_FGP_CPU_main(float *Input, float *InputRef, float *Output, float lambd
             ProjectVect_func2D(R1, R2, InputRef_x, InputRef_y, dimX, dimY);
             
             /* computing the gradient of the objective function */
-            Obj_func2D(Input, Output, R1, R2, lambdaPar, dimX, dimY);
+            Obj_dfunc2D(Input, Output, R1, R2, lambdaPar, dimX, dimY);
             
             /* apply nonnegativity */
             if (nonneg == 1) for(j=0; j<DimTotal; j++) {if (Output[j] < 0.0f) Output[j] = 0.0f;}
             
             /*Taking a step towards minus of the gradient*/
-            Grad_func2D(P1, P2, Output, R1, R2, InputRef_x, InputRef_y, lambdaPar, dimX, dimY);
+            Grad_dfunc2D(P1, P2, Output, R1, R2, InputRef_x, InputRef_y, lambdaPar, dimX, dimY);
             
             /* projection step */
-            Proj_func2D(P1, P2, methodTV, DimTotal);
+            Proj_dfunc2D(P1, P2, methodTV, DimTotal);
             
             /*updating R and t*/
             tkp1 = (1.0f + sqrt(1.0f + 4.0f*tk*tk))*0.5f;
-            Rupd_func2D(P1, P1_prev, P2, P2_prev, R1, R2, tkp1, tk, DimTotal);
+            Rupd_dfunc2D(P1, P1_prev, P2, P2_prev, R1, R2, tkp1, tk, DimTotal);
             
             /* check early stopping criteria */
             re = 0.0f; re1 = 0.0f;
@@ -139,20 +139,20 @@ float dTV_FGP_CPU_main(float *Input, float *InputRef, float *Output, float lambd
             ProjectVect_func3D(R1, R2, R3, InputRef_x, InputRef_y, InputRef_z, dimX, dimY, dimZ);
             
             /* computing the gradient of the objective function */
-            Obj_func3D(Input, Output, R1, R2, R3, lambdaPar, dimX, dimY, dimZ);
+            Obj_dfunc3D(Input, Output, R1, R2, R3, lambdaPar, dimX, dimY, dimZ);
             
             /* apply nonnegativity */
             if (nonneg == 1) for(j=0; j<DimTotal; j++) {if (Output[j] < 0.0f) Output[j] = 0.0f;}  
             
             /*Taking a step towards minus of the gradient*/
-            Grad_func3D(P1, P2, P3, Output, R1, R2, R3, InputRef_x, InputRef_y, InputRef_z, lambdaPar, dimX, dimY, dimZ);
+            Grad_dfunc3D(P1, P2, P3, Output, R1, R2, R3, InputRef_x, InputRef_y, InputRef_z, lambdaPar, dimX, dimY, dimZ);
             
             /* projection step */
-            Proj_func3D(P1, P2, P3, methodTV, DimTotal);
+            Proj_dfunc3D(P1, P2, P3, methodTV, DimTotal);
             
             /*updating R and t*/
             tkp1 = (1.0f + sqrt(1.0f + 4.0f*tk*tk))*0.5f;
-            Rupd_func3D(P1, P1_prev, P2, P2_prev, P3, P3_prev, R1, R2, R3, tkp1, tk, DimTotal);
+            Rupd_dfunc3D(P1, P1_prev, P2, P2_prev, P3, P3_prev, R1, R2, R3, tkp1, tk, DimTotal);
             
             /* calculate norm - stopping rules*/
             re = 0.0f; re1 = 0.0f;
@@ -220,7 +220,7 @@ float ProjectVect_func2D(float *R1, float *R2, float *B_x, float *B_y, int dimX,
     return 1;
 }
 
-float Obj_func2D(float *A, float *D, float *R1, float *R2, float lambda, int dimX, int dimY)
+float Obj_dfunc2D(float *A, float *D, float *R1, float *R2, float lambda, int dimX, int dimY)
 {
     float val1, val2;
     int i,j,index;
@@ -235,7 +235,7 @@ float Obj_func2D(float *A, float *D, float *R1, float *R2, float lambda, int dim
         }}
     return *D;
 }
-float Grad_func2D(float *P1, float *P2, float *D, float *R1, float *R2, float *B_x, float *B_y, float lambda, int dimX, int dimY)
+float Grad_dfunc2D(float *P1, float *P2, float *D, float *R1, float *R2, float *B_x, float *B_y, float lambda, int dimX, int dimY)
 {
     float val1, val2, multip, in_prod;
     int i,j,index;
@@ -258,7 +258,7 @@ float Grad_func2D(float *P1, float *P2, float *D, float *R1, float *R2, float *B
         }}
     return 1;
 }
-float Proj_func2D(float *P1, float *P2, int methTV, int DimTotal)
+float Proj_dfunc2D(float *P1, float *P2, int methTV, int DimTotal)
 {
     float val1, val2, denom, sq_denom;
     int i;
@@ -288,7 +288,7 @@ float Proj_func2D(float *P1, float *P2, int methTV, int DimTotal)
     }
     return 1;
 }
-float Rupd_func2D(float *P1, float *P1_old, float *P2, float *P2_old, float *R1, float *R2, float tkp1, float tk, int DimTotal)
+float Rupd_dfunc2D(float *P1, float *P1_old, float *P2, float *P2_old, float *R1, float *R2, float tkp1, float tk, int DimTotal)
 {
     int i;
     float multip;
@@ -348,7 +348,7 @@ float ProjectVect_func3D(float *R1, float *R2, float *R3, float *B_x, float *B_y
     return 1;
 }
 
-float Obj_func3D(float *A, float *D, float *R1, float *R2, float *R3, float lambda, int dimX, int dimY, int dimZ)
+float Obj_dfunc3D(float *A, float *D, float *R1, float *R2, float *R3, float lambda, int dimX, int dimY, int dimZ)
 {
     float val1, val2, val3;
     int i,j,k,index;
@@ -365,7 +365,7 @@ float Obj_func3D(float *A, float *D, float *R1, float *R2, float *R3, float lamb
             }}}
     return *D;
 }
-float Grad_func3D(float *P1, float *P2, float *P3, float *D, float *R1, float *R2, float *R3, float *B_x, float *B_y, float *B_z, float lambda, int dimX, int dimY, int dimZ)
+float Grad_dfunc3D(float *P1, float *P2, float *P3, float *D, float *R1, float *R2, float *R3, float *B_x, float *B_y, float *B_z, float lambda, int dimX, int dimY, int dimZ)
 {
     float val1, val2, val3, multip, in_prod;
     int i,j,k, index;
@@ -391,7 +391,7 @@ float Grad_func3D(float *P1, float *P2, float *P3, float *D, float *R1, float *R
             }}}
     return 1;
 }
-float Proj_func3D(float *P1, float *P2, float *P3, int methTV, int DimTotal)
+float Proj_dfunc3D(float *P1, float *P2, float *P3, int methTV, int DimTotal)
 {		
     float val1, val2, val3, denom, sq_denom;
     int i;
@@ -425,7 +425,7 @@ float Proj_func3D(float *P1, float *P2, float *P3, int methTV, int DimTotal)
 		}
     return 1;
 }
-float Rupd_func3D(float *P1, float *P1_old, float *P2, float *P2_old, float *P3, float *P3_old, float *R1, float *R2, float *R3, float tkp1, float tk, int DimTotal)
+float Rupd_dfunc3D(float *P1, float *P1_old, float *P2, float *P2_old, float *P3, float *P3_old, float *R1, float *R2, float *R3, float tkp1, float tk, int DimTotal)
 {
     int i;
     float multip;
diff --git a/Core/regularisers_CPU/FGP_dTV_core.h b/Core/regularisers_CPU/FGP_dTV_core.h
index ff9d8be..95dc249 100644
--- a/Core/regularisers_CPU/FGP_dTV_core.h
+++ b/Core/regularisers_CPU/FGP_dTV_core.h
@@ -56,17 +56,17 @@ float dTV_FGP_CPU_main(float *Input, float *InputRef, float *Output, float lambd
 
 CCPI_EXPORT float GradNorm_func2D(float *B, float *B_x, float *B_y, float eta, int dimX, int dimY);
 CCPI_EXPORT float ProjectVect_func2D(float *R1, float *R2, float *B_x, float *B_y, int dimX, int dimY);
-CCPI_EXPORT float Obj_func2D(float *A, float *D, float *R1, float *R2, float lambda, int dimX, int dimY);
-CCPI_EXPORT float Grad_func2D(float *P1, float *P2, float *D, float *R1, float *R2, float *B_x, float *B_y, float lambda, int dimX, int dimY);
-CCPI_EXPORT float Proj_func2D(float *P1, float *P2, int methTV, int DimTotal);
-CCPI_EXPORT float Rupd_func2D(float *P1, float *P1_old, float *P2, float *P2_old, float *R1, float *R2, float tkp1, float tk, int DimTotal);
+CCPI_EXPORT float Obj_dfunc2D(float *A, float *D, float *R1, float *R2, float lambda, int dimX, int dimY);
+CCPI_EXPORT float Grad_dfunc2D(float *P1, float *P2, float *D, float *R1, float *R2, float *B_x, float *B_y, float lambda, int dimX, int dimY);
+CCPI_EXPORT float Proj_dfunc2D(float *P1, float *P2, int methTV, int DimTotal);
+CCPI_EXPORT float Rupd_dfunc2D(float *P1, float *P1_old, float *P2, float *P2_old, float *R1, float *R2, float tkp1, float tk, int DimTotal);
 
 CCPI_EXPORT float GradNorm_func3D(float *B, float *B_x, float *B_y, float *B_z, float eta, int dimX, int dimY, int dimZ);
 CCPI_EXPORT float ProjectVect_func3D(float *R1, float *R2, float *R3, float *B_x, float *B_y, float *B_z, int dimX, int dimY, int dimZ);
-CCPI_EXPORT float Obj_func3D(float *A, float *D, float *R1, float *R2, float *R3, float lambda, int dimX, int dimY, int dimZ);
-CCPI_EXPORT float Grad_func3D(float *P1, float *P2, float *P3, float *D, float *R1, float *R2, float *R3, float *B_x, float *B_y, float *B_z, float lambda, int dimX, int dimY, int dimZ);
-CCPI_EXPORT float Proj_func3D(float *P1, float *P2, float *P3, int methTV, int DimTotal);
-CCPI_EXPORT float Rupd_func3D(float *P1, float *P1_old, float *P2, float *P2_old, float *P3, float *P3_old, float *R1, float *R2, float *R3, float tkp1, float tk, int DimTotal);
+CCPI_EXPORT float Obj_dfunc3D(float *A, float *D, float *R1, float *R2, float *R3, float lambda, int dimX, int dimY, int dimZ);
+CCPI_EXPORT float Grad_dfunc3D(float *P1, float *P2, float *P3, float *D, float *R1, float *R2, float *R3, float *B_x, float *B_y, float *B_z, float lambda, int dimX, int dimY, int dimZ);
+CCPI_EXPORT float Proj_dfunc3D(float *P1, float *P2, float *P3, int methTV, int DimTotal);
+CCPI_EXPORT float Rupd_dfunc3D(float *P1, float *P1_old, float *P2, float *P2_old, float *P3, float *P3_old, float *R1, float *R2, float *R3, float tkp1, float tk, int DimTotal);
 #ifdef __cplusplus
 }
 #endif
diff --git a/Core/regularisers_GPU/dTV_FGP_GPU_core.cu b/Core/regularisers_GPU/dTV_FGP_GPU_core.cu
index 2b450e6..04047a5 100644
--- a/Core/regularisers_GPU/dTV_FGP_GPU_core.cu
+++ b/Core/regularisers_GPU/dTV_FGP_GPU_core.cu
@@ -72,7 +72,7 @@ struct square { __host__ __device__ float operator()(float x) { return x * x; }
 /*****************2D modules*********************/
 /************************************************/
 
-__global__ void GradNorm_func2D(float *Refd, float *Refd_x, float *Refd_y, float eta, int N, int M, int ImSize)
+__global__ void GradNorm_func2D_kernel(float *Refd, float *Refd_x, float *Refd_y, float eta, int N, int M, int ImSize)
 {
     
     float val1, val2, gradX, gradY, magn;
@@ -97,7 +97,7 @@ __global__ void GradNorm_func2D(float *Refd, float *Refd_x, float *Refd_y, float
     return;
 }
 
-__global__ void ProjectVect_func2D(float *R1, float *R2, float *Refd_x, float *Refd_y, int N, int M, int ImSize)
+__global__ void ProjectVect_func2D_kernel(float *R1, float *R2, float *Refd_x, float *Refd_y, int N, int M, int ImSize)
 {
     
     float in_prod;
@@ -116,7 +116,7 @@ __global__ void ProjectVect_func2D(float *R1, float *R2, float *Refd_x, float *R
 }
 
 
-__global__ void Obj_func2D_kernel(float *Ad, float *D, float *R1, float *R2, int N, int M, int ImSize, float lambda)
+__global__ void Obj_dfunc2D_kernel(float *Ad, float *D, float *R1, float *R2, int N, int M, int ImSize, float lambda)
 {
     
     float val1,val2;
@@ -137,7 +137,7 @@ __global__ void Obj_func2D_kernel(float *Ad, float *D, float *R1, float *R2, int
     return;
 }
 
-__global__ void Grad_func2D_kernel(float *P1, float *P2, float *D, float *R1, float *R2,  float *Refd_x, float *Refd_y, int N, int M, int ImSize, float multip)
+__global__ void Grad_dfunc2D_kernel(float *P1, float *P2, float *D, float *R1, float *R2,  float *Refd_x, float *Refd_y, int N, int M, int ImSize, float multip)
 {
     
     float val1,val2,in_prod;
@@ -165,7 +165,7 @@ __global__ void Grad_func2D_kernel(float *P1, float *P2, float *D, float *R1, fl
     return;
 }
 
-__global__ void Proj_func2D_iso_kernel(float *P1, float *P2, int N, int M, int ImSize)
+__global__ void Proj_dfunc2D_iso_kernel(float *P1, float *P2, int N, int M, int ImSize)
 {
     
     float denom;    
@@ -184,7 +184,7 @@ __global__ void Proj_func2D_iso_kernel(float *P1, float *P2, int N, int M, int I
     }
     return;
 }
-__global__ void Proj_func2D_aniso_kernel(float *P1, float *P2, int N, int M, int ImSize)
+__global__ void Proj_dfunc2D_aniso_kernel(float *P1, float *P2, int N, int M, int ImSize)
 {
     
     float val1, val2;    
@@ -204,7 +204,7 @@ __global__ void Proj_func2D_aniso_kernel(float *P1, float *P2, int N, int M, int
     }
     return;
 }
-__global__ void Rupd_func2D_kernel(float *P1, float *P1_old, float *P2, float *P2_old, float *R1, float *R2, float tkp1, float tk, float multip2, int N, int M, int ImSize)
+__global__ void Rupd_dfunc2D_kernel(float *P1, float *P1_old, float *P2, float *P2_old, float *R1, float *R2, float tkp1, float tk, float multip2, int N, int M, int ImSize)
 {
     //calculate each thread global index
     const int xIndex=blockIdx.x*blockDim.x+threadIdx.x;
@@ -218,7 +218,7 @@ __global__ void Rupd_func2D_kernel(float *P1, float *P1_old, float *P2, float *P
     }
     return;
 }
-__global__ void nonneg2D_kernel(float* Output, int N, int M, int num_total)
+__global__ void dTVnonneg2D_kernel(float* Output, int N, int M, int num_total)
 {
     int xIndex = blockDim.x * blockIdx.x + threadIdx.x;
     int yIndex = blockDim.y * blockIdx.y + threadIdx.y;
@@ -229,7 +229,7 @@ __global__ void nonneg2D_kernel(float* Output, int N, int M, int num_total)
         if (Output[index] < 0.0f) Output[index] = 0.0f;
     }
 }
-__global__ void copy_kernel2D(float *Input, float* Output, int N, int M, int num_total)
+__global__ void dTVcopy_kernel2D(float *Input, float* Output, int N, int M, int num_total)
 {
     int xIndex = blockDim.x * blockIdx.x + threadIdx.x;
     int yIndex = blockDim.y * blockIdx.y + threadIdx.y;
@@ -240,7 +240,7 @@ __global__ void copy_kernel2D(float *Input, float* Output, int N, int M, int num
         Output[index] = Input[index];
     }
 }
-__global__ void ResidCalc2D_kernel(float *Input1, float *Input2, float* Output, int N, int M, int num_total)
+__global__ void dTVResidCalc2D_kernel(float *Input1, float *Input2, float* Output, int N, int M, int num_total)
 {
     int xIndex = blockDim.x * blockIdx.x + threadIdx.x;
     int yIndex = blockDim.y * blockIdx.y + threadIdx.y;
@@ -254,7 +254,7 @@ __global__ void ResidCalc2D_kernel(float *Input1, float *Input2, float* Output,
 /************************************************/
 /*****************3D modules*********************/
 /************************************************/
-__global__ void GradNorm_func3D(float *Refd, float *Refd_x, float *Refd_y, float *Refd_z, float eta, int N, int M, int Z, int ImSize)
+__global__ void GradNorm_func3D_kernel(float *Refd, float *Refd_x, float *Refd_y, float *Refd_z, float eta, int N, int M, int Z, int ImSize)
 {
     
     float val1, val2, val3, gradX, gradY, gradZ, magn;
@@ -283,7 +283,7 @@ __global__ void GradNorm_func3D(float *Refd, float *Refd_x, float *Refd_y, float
     return;
 }
 
-__global__ void ProjectVect_func3D(float *R1, float *R2, float *R3, float *Refd_x, float *Refd_y, float *Refd_z, int N, int M, int Z, int ImSize)
+__global__ void ProjectVect_func3D_kernel(float *R1, float *R2, float *R3, float *Refd_x, float *Refd_y, float *Refd_z, int N, int M, int Z, int ImSize)
 {
     
     float in_prod;
@@ -305,7 +305,7 @@ __global__ void ProjectVect_func3D(float *R1, float *R2, float *R3, float *Refd_
 }
 
 
-__global__ void Obj_func3D_kernel(float *Ad, float *D, float *R1, float *R2, float *R3, int N, int M, int Z, int ImSize, float lambda)
+__global__ void Obj_dfunc3D_kernel(float *Ad, float *D, float *R1, float *R2, float *R3, int N, int M, int Z, int ImSize, float lambda)
 {
     
     float val1,val2,val3;
@@ -327,7 +327,7 @@ __global__ void Obj_func3D_kernel(float *Ad, float *D, float *R1, float *R2, flo
     return;
 }
 
-__global__ void Grad_func3D_kernel(float *P1, float *P2, float *P3, float *D, float *R1, float *R2, float *R3, float *Refd_x, float *Refd_y, float *Refd_z, int N, int M, int Z, int ImSize, float multip)
+__global__ void Grad_dfunc3D_kernel(float *P1, float *P2, float *P3, float *D, float *R1, float *R2, float *R3, float *Refd_x, float *Refd_y, float *Refd_z, int N, int M, int Z, int ImSize, float multip)
 {
     
     float val1,val2,val3,in_prod;
@@ -358,7 +358,7 @@ __global__ void Grad_func3D_kernel(float *P1, float *P2, float *P3, float *D, fl
     return;
 }
 
-__global__ void Proj_func3D_iso_kernel(float *P1, float *P2, float *P3, int N, int M, int Z, int ImSize)
+__global__ void Proj_dfunc3D_iso_kernel(float *P1, float *P2, float *P3, int N, int M, int Z, int ImSize)
 {
     
     float denom,sq_denom;    
@@ -382,7 +382,7 @@ __global__ void Proj_func3D_iso_kernel(float *P1, float *P2, float *P3, int N, i
     return;
 }
 
-__global__ void Proj_func3D_aniso_kernel(float *P1, float *P2, float *P3, int N, int M, int Z, int ImSize)
+__global__ void Proj_dfunc3D_aniso_kernel(float *P1, float *P2, float *P3, int N, int M, int Z, int ImSize)
 {
     
     float val1, val2, val3;    
@@ -408,7 +408,7 @@ __global__ void Proj_func3D_aniso_kernel(float *P1, float *P2, float *P3, int N,
 }
 
 
-__global__ void Rupd_func3D_kernel(float *P1, float *P1_old, float *P2, float *P2_old, float *P3, float *P3_old, float *R1, float *R2, float *R3, float tkp1, float tk, float multip2, int N, int M, int Z, int ImSize)
+__global__ void Rupd_dfunc3D_kernel(float *P1, float *P1_old, float *P2, float *P2_old, float *P3, float *P3_old, float *R1, float *R2, float *R3, float tkp1, float tk, float multip2, int N, int M, int Z, int ImSize)
 {
     //calculate each thread global index
 	int i = blockDim.x * blockIdx.x + threadIdx.x;
@@ -425,7 +425,7 @@ __global__ void Rupd_func3D_kernel(float *P1, float *P1_old, float *P2, float *P
     return;
 }
 
-__global__ void nonneg3D_kernel(float* Output, int N, int M, int Z, int num_total)
+__global__ void dTVnonneg3D_kernel(float* Output, int N, int M, int Z, int num_total)
 {
     int i = blockDim.x * blockIdx.x + threadIdx.x;
     int j = blockDim.y * blockIdx.y + threadIdx.y;
@@ -438,7 +438,7 @@ __global__ void nonneg3D_kernel(float* Output, int N, int M, int Z, int num_tota
     }
 }
 
-__global__ void copy_kernel3D(float *Input, float* Output, int N, int M, int Z, int num_total)
+__global__ void dTVcopy_kernel3D(float *Input, float* Output, int N, int M, int Z, int num_total)
 {
 	int i = blockDim.x * blockIdx.x + threadIdx.x;
     int j = blockDim.y * blockIdx.y + threadIdx.y;
@@ -451,7 +451,7 @@ __global__ void copy_kernel3D(float *Input, float* Output, int N, int M, int Z,
     }
 }
 
-__global__ void ResidCalc3D_kernel(float *Input1, float *Input2, float* Output, int N, int M, int Z, int num_total)
+__global__ void dTVResidCalc3D_kernel(float *Input1, float *Input2, float* Output, int N, int M, int Z, int num_total)
 {
 	int i = blockDim.x * blockIdx.x + threadIdx.x;
     int j = blockDim.y * blockIdx.y + threadIdx.y;
@@ -517,7 +517,7 @@ extern "C" void dTV_FGP_GPU_main(float *Input, float *InputRef, float *Output, f
         /******************** Run CUDA 2D kernel here ********************/
         multip = (1.0f/(8.0f*lambdaPar));
         /* calculate gradient vectors for the reference */
-        GradNorm_func2D<<<dimGrid,dimBlock>>>(d_InputRef, InputRef_x, InputRef_y, eta, dimX, dimY, ImSize);
+        GradNorm_func2D_kernel<<<dimGrid,dimBlock>>>(d_InputRef, InputRef_x, InputRef_y, eta, dimX, dimY, ImSize);
         checkCudaErrors( cudaDeviceSynchronize() );
         checkCudaErrors(cudaPeekAtLastError() );
     
@@ -525,41 +525,41 @@ extern "C" void dTV_FGP_GPU_main(float *Input, float *InputRef, float *Output, f
         for (i = 0; i < iter; i++) {
         
             /*projects a 2D vector field R-1,2 onto the orthogonal complement of another 2D vector field InputRef_xy*/         
-            ProjectVect_func2D<<<dimGrid,dimBlock>>>(R1, R2, InputRef_x, InputRef_y, dimX, dimY, ImSize);
+            ProjectVect_func2D_kernel<<<dimGrid,dimBlock>>>(R1, R2, InputRef_x, InputRef_y, dimX, dimY, ImSize);
             checkCudaErrors( cudaDeviceSynchronize() );
             checkCudaErrors(cudaPeekAtLastError() );
             
             /* computing the gradient of the objective function */
-            Obj_func2D_kernel<<<dimGrid,dimBlock>>>(d_input, d_update, R1, R2, dimX, dimY, ImSize, lambdaPar);
+            Obj_dfunc2D_kernel<<<dimGrid,dimBlock>>>(d_input, d_update, R1, R2, dimX, dimY, ImSize, lambdaPar);
             checkCudaErrors( cudaDeviceSynchronize() );
             checkCudaErrors(cudaPeekAtLastError() );
             
             if (nonneg != 0) {
-            nonneg2D_kernel<<<dimGrid,dimBlock>>>(d_update, dimX, dimY, ImSize);
+            dTVnonneg2D_kernel<<<dimGrid,dimBlock>>>(d_update, dimX, dimY, ImSize);
             checkCudaErrors( cudaDeviceSynchronize() );
             checkCudaErrors(cudaPeekAtLastError() ); }
                     
             /*Taking a step towards minus of the gradient*/
-            Grad_func2D_kernel<<<dimGrid,dimBlock>>>(P1, P2, d_update, R1, R2, InputRef_x, InputRef_y, dimX, dimY, ImSize, multip);
+            Grad_dfunc2D_kernel<<<dimGrid,dimBlock>>>(P1, P2, d_update, R1, R2, InputRef_x, InputRef_y, dimX, dimY, ImSize, multip);
             checkCudaErrors( cudaDeviceSynchronize() );
             checkCudaErrors(cudaPeekAtLastError() );
         
             /* projection step */
-            if (methodTV == 0) Proj_func2D_iso_kernel<<<dimGrid,dimBlock>>>(P1, P2, dimX, dimY, ImSize); /*isotropic TV*/
-            else Proj_func2D_aniso_kernel<<<dimGrid,dimBlock>>>(P1, P2, dimX, dimY, ImSize); /*anisotropic TV*/            
+            if (methodTV == 0) Proj_dfunc2D_iso_kernel<<<dimGrid,dimBlock>>>(P1, P2, dimX, dimY, ImSize); /*isotropic TV*/
+            else Proj_dfunc2D_aniso_kernel<<<dimGrid,dimBlock>>>(P1, P2, dimX, dimY, ImSize); /*anisotropic TV*/            
             checkCudaErrors( cudaDeviceSynchronize() );
             checkCudaErrors(cudaPeekAtLastError() );
         
             tkp1 = (1.0f + sqrt(1.0f + 4.0f*tk*tk))*0.5f;
             multip2 = ((tk-1.0f)/tkp1);
         
-            Rupd_func2D_kernel<<<dimGrid,dimBlock>>>(P1, P1_prev, P2, P2_prev, R1, R2, tkp1, tk, multip2, dimX, dimY, ImSize);
+            Rupd_dfunc2D_kernel<<<dimGrid,dimBlock>>>(P1, P1_prev, P2, P2_prev, R1, R2, tkp1, tk, multip2, dimX, dimY, ImSize);
             checkCudaErrors( cudaDeviceSynchronize() );
             checkCudaErrors(cudaPeekAtLastError() );
         
             if (epsil != 0.0f) {
                 /* calculate norm - stopping rules using the Thrust library */
-                ResidCalc2D_kernel<<<dimGrid,dimBlock>>>(d_update, d_update_prev, P1_prev, dimX, dimY, ImSize);
+                dTVResidCalc2D_kernel<<<dimGrid,dimBlock>>>(d_update, d_update_prev, P1_prev, dimX, dimY, ImSize);
                 checkCudaErrors( cudaDeviceSynchronize() );
                 checkCudaErrors(cudaPeekAtLastError() );               
                 
@@ -572,16 +572,16 @@ extern "C" void dTV_FGP_GPU_main(float *Input, float *InputRef, float *Output, f
                 if (re < epsil)  count++;
                     if (count > 4) break;       
              
-                copy_kernel2D<<<dimGrid,dimBlock>>>(d_update, d_update_prev, dimX, dimY, ImSize);
+                dTVcopy_kernel2D<<<dimGrid,dimBlock>>>(d_update, d_update_prev, dimX, dimY, ImSize);
                 checkCudaErrors( cudaDeviceSynchronize() );
                 checkCudaErrors(cudaPeekAtLastError() );                                              
             }
         
-            copy_kernel2D<<<dimGrid,dimBlock>>>(P1, P1_prev, dimX, dimY, ImSize);
+            dTVcopy_kernel2D<<<dimGrid,dimBlock>>>(P1, P1_prev, dimX, dimY, ImSize);
             checkCudaErrors( cudaDeviceSynchronize() );
             checkCudaErrors(cudaPeekAtLastError() );
         
-            copy_kernel2D<<<dimGrid,dimBlock>>>(P2, P2_prev, dimX, dimY, ImSize);
+            dTVcopy_kernel2D<<<dimGrid,dimBlock>>>(P2, P2_prev, dimX, dimY, ImSize);
             checkCudaErrors( cudaDeviceSynchronize() );
             checkCudaErrors(cudaPeekAtLastError() );       
  
@@ -651,7 +651,7 @@ extern "C" void dTV_FGP_GPU_main(float *Input, float *InputRef, float *Output, f
             /********************** Run CUDA 3D kernel here ********************/    
             multip = (1.0f/(26.0f*lambdaPar));
             /* calculate gradient vectors for the reference */
-            GradNorm_func3D<<<dimGrid,dimBlock>>>(d_InputRef, InputRef_x, InputRef_y, InputRef_z, eta, dimX, dimY, dimZ, ImSize);
+            GradNorm_func3D_kernel<<<dimGrid,dimBlock>>>(d_InputRef, InputRef_x, InputRef_y, InputRef_z, eta, dimX, dimY, dimZ, ImSize);
             checkCudaErrors( cudaDeviceSynchronize() );
             checkCudaErrors(cudaPeekAtLastError() );
     
@@ -659,41 +659,41 @@ extern "C" void dTV_FGP_GPU_main(float *Input, float *InputRef, float *Output, f
         for (i = 0; i < iter; i++) {
 
 			/*projects a 3D vector field R-1,2,3 onto the orthogonal complement of another 3D vector field InputRef_xyz*/
-            ProjectVect_func3D<<<dimGrid,dimBlock>>>(R1, R2, R3, InputRef_x, InputRef_y, InputRef_z, dimX, dimY, dimZ, ImSize);
+            ProjectVect_func3D_kernel<<<dimGrid,dimBlock>>>(R1, R2, R3, InputRef_x, InputRef_y, InputRef_z, dimX, dimY, dimZ, ImSize);
             checkCudaErrors( cudaDeviceSynchronize() );
             checkCudaErrors(cudaPeekAtLastError() );
         
             /* computing the gradient of the objective function */
-            Obj_func3D_kernel<<<dimGrid,dimBlock>>>(d_input, d_update, R1, R2, R3, dimX, dimY, dimZ, ImSize, lambdaPar);
+            Obj_dfunc3D_kernel<<<dimGrid,dimBlock>>>(d_input, d_update, R1, R2, R3, dimX, dimY, dimZ, ImSize, lambdaPar);
             checkCudaErrors( cudaDeviceSynchronize() );
             checkCudaErrors(cudaPeekAtLastError() );
         
             if (nonneg != 0) {
-            nonneg3D_kernel<<<dimGrid,dimBlock>>>(d_update, dimX, dimY, dimZ, ImSize);
+            dTVnonneg3D_kernel<<<dimGrid,dimBlock>>>(d_update, dimX, dimY, dimZ, ImSize);
             checkCudaErrors( cudaDeviceSynchronize() );
             checkCudaErrors(cudaPeekAtLastError() ); }
             
             /*Taking a step towards minus of the gradient*/
-            Grad_func3D_kernel<<<dimGrid,dimBlock>>>(P1, P2, P3, d_update, R1, R2, R3, InputRef_x, InputRef_y, InputRef_z, dimX, dimY, dimZ, ImSize, multip);
+            Grad_dfunc3D_kernel<<<dimGrid,dimBlock>>>(P1, P2, P3, d_update, R1, R2, R3, InputRef_x, InputRef_y, InputRef_z, dimX, dimY, dimZ, ImSize, multip);
             checkCudaErrors( cudaDeviceSynchronize() );
             checkCudaErrors(cudaPeekAtLastError() );
         
             /* projection step */
-            if (methodTV == 0) Proj_func3D_iso_kernel<<<dimGrid,dimBlock>>>(P1, P2, P3, dimX, dimY, dimZ, ImSize); /* isotropic kernel */
-            else Proj_func3D_aniso_kernel<<<dimGrid,dimBlock>>>(P1, P2, P3, dimX, dimY, dimZ, ImSize); /* anisotropic kernel */
+            if (methodTV == 0) Proj_dfunc3D_iso_kernel<<<dimGrid,dimBlock>>>(P1, P2, P3, dimX, dimY, dimZ, ImSize); /* isotropic kernel */
+            else Proj_dfunc3D_aniso_kernel<<<dimGrid,dimBlock>>>(P1, P2, P3, dimX, dimY, dimZ, ImSize); /* anisotropic kernel */
             checkCudaErrors( cudaDeviceSynchronize() );
             checkCudaErrors(cudaPeekAtLastError() );
         
             tkp1 = (1.0f + sqrt(1.0f + 4.0f*tk*tk))*0.5f;
             multip2 = ((tk-1.0f)/tkp1);
         
-            Rupd_func3D_kernel<<<dimGrid,dimBlock>>>(P1, P1_prev, P2, P2_prev, P3, P3_prev, R1, R2, R3, tkp1, tk, multip2, dimX, dimY, dimZ, ImSize);
+            Rupd_dfunc3D_kernel<<<dimGrid,dimBlock>>>(P1, P1_prev, P2, P2_prev, P3, P3_prev, R1, R2, R3, tkp1, tk, multip2, dimX, dimY, dimZ, ImSize);
             checkCudaErrors( cudaDeviceSynchronize() );
             checkCudaErrors(cudaPeekAtLastError() );
             
             if (epsil != 0.0f) {
                 /* calculate norm - stopping rules using the Thrust library */
-                ResidCalc3D_kernel<<<dimGrid,dimBlock>>>(d_update, d_update_prev, P1_prev, dimX, dimY, dimZ, ImSize);
+                dTVResidCalc3D_kernel<<<dimGrid,dimBlock>>>(d_update, d_update_prev, P1_prev, dimX, dimY, dimZ, ImSize);
                 checkCudaErrors( cudaDeviceSynchronize() );
                 checkCudaErrors(cudaPeekAtLastError() );               
                 
@@ -706,20 +706,20 @@ extern "C" void dTV_FGP_GPU_main(float *Input, float *InputRef, float *Output, f
                 if (re < epsil)  count++;
                     if (count > 4) break;       
              
-                copy_kernel3D<<<dimGrid,dimBlock>>>(d_update, d_update_prev, dimX, dimY, dimZ, ImSize);
+                dTVcopy_kernel3D<<<dimGrid,dimBlock>>>(d_update, d_update_prev, dimX, dimY, dimZ, ImSize);
                 checkCudaErrors( cudaDeviceSynchronize() );
                 checkCudaErrors(cudaPeekAtLastError() );
             }
         
-            copy_kernel3D<<<dimGrid,dimBlock>>>(P1, P1_prev, dimX, dimY, dimZ, ImSize);
+            dTVcopy_kernel3D<<<dimGrid,dimBlock>>>(P1, P1_prev, dimX, dimY, dimZ, ImSize);
             checkCudaErrors( cudaDeviceSynchronize() );
             checkCudaErrors(cudaPeekAtLastError() );
         
-            copy_kernel3D<<<dimGrid,dimBlock>>>(P2, P2_prev, dimX, dimY, dimZ, ImSize);
+            dTVcopy_kernel3D<<<dimGrid,dimBlock>>>(P2, P2_prev, dimX, dimY, dimZ, ImSize);
             checkCudaErrors( cudaDeviceSynchronize() );
             checkCudaErrors(cudaPeekAtLastError() );   
             
-            copy_kernel3D<<<dimGrid,dimBlock>>>(P3, P3_prev, dimX, dimY, dimZ, ImSize);
+            dTVcopy_kernel3D<<<dimGrid,dimBlock>>>(P3, P3_prev, dimX, dimY, dimZ, ImSize);
             checkCudaErrors( cudaDeviceSynchronize() );
             checkCudaErrors(cudaPeekAtLastError() );      
  
diff --git a/Wrappers/Python/ccpi/filters/regularisers.py b/Wrappers/Python/ccpi/filters/regularisers.py
index c6723fa..376cc9c 100644
--- a/Wrappers/Python/ccpi/filters/regularisers.py
+++ b/Wrappers/Python/ccpi/filters/regularisers.py
@@ -2,8 +2,8 @@
 script which assigns a proper device core function based on a flag ('cpu' or 'gpu')
 """
 
-from ccpi.filters.cpu_regularisers_cython import TV_ROF_CPU, TV_FGP_CPU dTV_FGP_CPU
-from ccpi.filters.gpu_regularisers import TV_ROF_GPU, TV_FGP_GPU dTV_FGP_GPU
+from ccpi.filters.cpu_regularisers_cython import TV_ROF_CPU, TV_FGP_CPU, dTV_FGP_CPU
+from ccpi.filters.gpu_regularisers import TV_ROF_GPU, TV_FGP_GPU, dTV_FGP_GPU
 
 def ROF_TV(inputData, regularisation_parameter, iterations,
                      time_marching_parameter,device='cpu'):
diff --git a/Wrappers/Python/conda-recipe/run_test.py b/Wrappers/Python/conda-recipe/run_test.py
deleted file mode 100644
index 04bbd40..0000000
--- a/Wrappers/Python/conda-recipe/run_test.py
+++ /dev/null
@@ -1,149 +0,0 @@
-import unittest
-import numpy as np
-import os
-from ccpi.filters.regularisers import ROF_TV, FGP_TV
-import matplotlib.pyplot as plt
-
-def rmse(im1, im2):
-    rmse = np.sqrt(np.sum((im1 - im2) ** 2) / float(im1.size))
-    return rmse
-
-class TestRegularisers(unittest.TestCase):
-    
-    def setUp(self):
-        pass
-
-    def test_cpu_regularisers(self):
-        filename = os.path.join(".." , ".." , ".." , "data" ,"lena_gray_512.tif")
-        
-        # read noiseless image
-        Im = plt.imread(filename)
-        Im = np.asarray(Im, dtype='float32')
-
-        Im = Im/255
-        tolerance = 1e-05
-        rms_rof_exp = 0.006812507 #expected value for ROF model
-        rms_fgp_exp = 0.019152347 #expected value for FGP model
-        
-        # set parameters for ROF-TV
-        pars_rof_tv = {'algorithm': ROF_TV, \
-                            'input' : Im,\
-                            'regularisation_parameter':0.04,\
-                            'number_of_iterations': 50,\
-                            'time_marching_parameter': 0.0025
-                            }
-        # set parameters for FGP-TV
-        pars_fgp_tv = {'algorithm' : FGP_TV, \
-                            'input' : Im,\
-                            'regularisation_parameter':0.04, \
-                            'number_of_iterations' :50 ,\
-                            'tolerance_constant':1e-08,\
-                            'methodTV': 0 ,\
-                            'nonneg': 0 ,\
-                            'printingOut': 0 
-                            }
-        print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-        print ("_________testing ROF-TV (2D, CPU)__________")
-        print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-        res = True
-        rof_cpu = ROF_TV(pars_rof_tv['input'],
-             pars_rof_tv['regularisation_parameter'],
-             pars_rof_tv['number_of_iterations'],
-             pars_rof_tv['time_marching_parameter'],'cpu')
-        rms_rof = rmse(Im, rof_cpu)
-        # now compare obtained rms with the expected value
-        self.assertLess(abs(rms_rof-rms_rof_exp) , tolerance)
-        """
-        if abs(rms_rof-self.rms_rof_exp) > self.tolerance:
-            raise TypeError('ROF-TV (2D, CPU) test FAILED')
-        else:
-            print ("test PASSED")
-        """
-        print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-        print ("_________testing FGP-TV (2D, CPU)__________")
-        print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-        fgp_cpu = FGP_TV(pars_fgp_tv['input'], 
-              pars_fgp_tv['regularisation_parameter'],
-              pars_fgp_tv['number_of_iterations'],
-              pars_fgp_tv['tolerance_constant'], 
-              pars_fgp_tv['methodTV'],
-              pars_fgp_tv['nonneg'],
-              pars_fgp_tv['printingOut'],'cpu')  
-        rms_fgp = rmse(Im, fgp_cpu)
-        # now compare obtained rms with the expected value
-        self.assertLess(abs(rms_fgp-rms_fgp_exp) , tolerance)
-        """
-        if abs(rms_fgp-self.rms_fgp_exp) > self.tolerance:
-            raise TypeError('FGP-TV (2D, CPU) test FAILED')
-        else:
-            print ("test PASSED")
-        """
-        self.assertTrue(res)
-    def test_gpu_regularisers(self):
-        filename = os.path.join(".." , ".." , ".." , "data" ,"lena_gray_512.tif")
-        
-        # read noiseless image
-        Im = plt.imread(filename)
-        Im = np.asarray(Im, dtype='float32')
-
-        Im = Im/255
-        tolerance = 1e-05
-        rms_rof_exp = 0.006812507 #expected value for ROF model
-        rms_fgp_exp = 0.019152347 #expected value for FGP model
-        
-        # set parameters for ROF-TV
-        pars_rof_tv = {'algorithm': ROF_TV, \
-                            'input' : Im,\
-                            'regularisation_parameter':0.04,\
-                            'number_of_iterations': 50,\
-                            'time_marching_parameter': 0.0025
-                            }
-        # set parameters for FGP-TV
-        pars_fgp_tv = {'algorithm' : FGP_TV, \
-                            'input' : Im,\
-                            'regularisation_parameter':0.04, \
-                            'number_of_iterations' :50 ,\
-                            'tolerance_constant':1e-08,\
-                            'methodTV': 0 ,\
-                            'nonneg': 0 ,\
-                            'printingOut': 0 
-                            }
-        print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-        print ("_________testing ROF-TV (2D, GPU)__________")
-        print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-        res = True
-        rof_gpu = ROF_TV(pars_rof_tv['input'],
-             pars_rof_tv['regularisation_parameter'],
-             pars_rof_tv['number_of_iterations'],
-             pars_rof_tv['time_marching_parameter'],'gpu')
-        rms_rof = rmse(Im, rof_gpu)
-        # now compare obtained rms with the expected value
-        self.assertLess(abs(rms_rof-rms_rof_exp) , tolerance)
-        """
-        if abs(rms_rof-self.rms_rof_exp) > self.tolerance:
-            raise TypeError('ROF-TV (2D, GPU) test FAILED')
-        else:
-            print ("test PASSED")
-        """
-        print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-        print ("_________testing FGP-TV (2D, GPU)__________")
-        print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-        fgp_gpu = FGP_TV(pars_fgp_tv['input'], 
-              pars_fgp_tv['regularisation_parameter'],
-              pars_fgp_tv['number_of_iterations'],
-              pars_fgp_tv['tolerance_constant'], 
-              pars_fgp_tv['methodTV'],
-              pars_fgp_tv['nonneg'],
-              pars_fgp_tv['printingOut'],'gpu')  
-        rms_fgp = rmse(Im, fgp_gpu)
-        # now compare obtained rms with the expected value
-        self.assertLess(abs(rms_fgp-rms_fgp_exp) , tolerance)
-        """
-        if abs(rms_fgp-self.rms_fgp_exp) > self.tolerance:
-            raise TypeError('FGP-TV (2D, GPU) test FAILED')
-        else:
-            print ("test PASSED")
-        """
-        self.assertTrue(res)
-if __name__ == '__main__':
-    unittest.main()
\ No newline at end of file
diff --git a/Wrappers/Python/conda-recipe/run_test.py.in b/Wrappers/Python/conda-recipe/run_test.py.in
new file mode 100644
index 0000000..9a6f4de
--- /dev/null
+++ b/Wrappers/Python/conda-recipe/run_test.py.in
@@ -0,0 +1,148 @@
+import unittest
+import numpy as np
+from ccpi.filters.regularisers import ROF_TV, FGP_TV
+
+def rmse(im1, im2):
+    rmse = np.sqrt(np.sum((im1 - im2) ** 2) / float(im1.size))
+    return rmse
+
+class TestRegularisers(unittest.TestCase):
+    
+    def setUp(self):
+        pass
+
+    def test_cpu_regularisers(self):
+        #filename = os.path.join(".." , ".." , ".." , "data" ,"testLena.npy")
+        
+        Im = np.load('testLena.npy');
+        """
+        # read noiseless image
+        Im = plt.imread(filename)
+        Im = np.asarray(Im, dtype='float32')
+
+        Im = Im/255
+        """
+        tolerance = 1e-05
+        rms_rof_exp = 0.006812507 #expected value for ROF model
+        rms_fgp_exp = 0.019152347 #expected value for FGP model
+        
+        # set parameters for ROF-TV
+        pars_rof_tv = {'algorithm': ROF_TV, \
+                            'input' : Im,\
+                            'regularisation_parameter':0.04,\
+                            'number_of_iterations': 50,\
+                            'time_marching_parameter': 0.0025
+                            }
+        # set parameters for FGP-TV
+        pars_fgp_tv = {'algorithm' : FGP_TV, \
+                            'input' : Im,\
+                            'regularisation_parameter':0.04, \
+                            'number_of_iterations' :50 ,\
+                            'tolerance_constant':1e-08,\
+                            'methodTV': 0 ,\
+                            'nonneg': 0 ,\
+                            'printingOut': 0 
+                            }
+        print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+        print ("_________testing ROF-TV (2D, CPU)__________")
+        print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+        res = True
+        rof_cpu = ROF_TV(pars_rof_tv['input'],
+             pars_rof_tv['regularisation_parameter'],
+             pars_rof_tv['number_of_iterations'],
+             pars_rof_tv['time_marching_parameter'],'cpu')
+        rms_rof = rmse(Im, rof_cpu)
+        # now compare obtained rms with the expected value
+        self.assertLess(abs(rms_rof-rms_rof_exp) , tolerance)
+        """
+        if abs(rms_rof-self.rms_rof_exp) > self.tolerance:
+            raise TypeError('ROF-TV (2D, CPU) test FAILED')
+        else:
+            print ("test PASSED")
+        """
+        print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+        print ("_________testing FGP-TV (2D, CPU)__________")
+        print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+        fgp_cpu = FGP_TV(pars_fgp_tv['input'], 
+              pars_fgp_tv['regularisation_parameter'],
+              pars_fgp_tv['number_of_iterations'],
+              pars_fgp_tv['tolerance_constant'], 
+              pars_fgp_tv['methodTV'],
+              pars_fgp_tv['nonneg'],
+              pars_fgp_tv['printingOut'],'cpu')  
+        rms_fgp = rmse(Im, fgp_cpu)
+        # now compare obtained rms with the expected value
+        self.assertLess(abs(rms_fgp-rms_fgp_exp) , tolerance)
+        """
+        if abs(rms_fgp-self.rms_fgp_exp) > self.tolerance:
+            raise TypeError('FGP-TV (2D, CPU) test FAILED')
+        else:
+            print ("test PASSED")
+        """
+        self.assertTrue(res)
+    def test_gpu_regularisers(self):
+        #filename = os.path.join(".." , ".." , ".." , "data" ,"testLena.npy")
+        
+        Im = np.load('testLena.npy');
+
+        #Im = Im/255
+        tolerance = 1e-05
+        rms_rof_exp = 0.006812507 #expected value for ROF model
+        rms_fgp_exp = 0.019152347 #expected value for FGP model
+        
+        # set parameters for ROF-TV
+        pars_rof_tv = {'algorithm': ROF_TV, \
+                            'input' : Im,\
+                            'regularisation_parameter':0.04,\
+                            'number_of_iterations': 50,\
+                            'time_marching_parameter': 0.0025
+                            }
+        # set parameters for FGP-TV
+        pars_fgp_tv = {'algorithm' : FGP_TV, \
+                            'input' : Im,\
+                            'regularisation_parameter':0.04, \
+                            'number_of_iterations' :50 ,\
+                            'tolerance_constant':1e-08,\
+                            'methodTV': 0 ,\
+                            'nonneg': 0 ,\
+                            'printingOut': 0 
+                            }
+        print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+        print ("_________testing ROF-TV (2D, GPU)__________")
+        print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+        res = True
+        rof_gpu = ROF_TV(pars_rof_tv['input'],
+             pars_rof_tv['regularisation_parameter'],
+             pars_rof_tv['number_of_iterations'],
+             pars_rof_tv['time_marching_parameter'],'gpu')
+        rms_rof = rmse(Im, rof_gpu)
+        # now compare obtained rms with the expected value
+        self.assertLess(abs(rms_rof-rms_rof_exp) , tolerance)
+        """
+        if abs(rms_rof-self.rms_rof_exp) > self.tolerance:
+            raise TypeError('ROF-TV (2D, GPU) test FAILED')
+        else:
+            print ("test PASSED")
+        """
+        print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+        print ("_________testing FGP-TV (2D, GPU)__________")
+        print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+        fgp_gpu = FGP_TV(pars_fgp_tv['input'], 
+              pars_fgp_tv['regularisation_parameter'],
+              pars_fgp_tv['number_of_iterations'],
+              pars_fgp_tv['tolerance_constant'], 
+              pars_fgp_tv['methodTV'],
+              pars_fgp_tv['nonneg'],
+              pars_fgp_tv['printingOut'],'gpu')  
+        rms_fgp = rmse(Im, fgp_gpu)
+        # now compare obtained rms with the expected value
+        self.assertLess(abs(rms_fgp-rms_fgp_exp) , tolerance)
+        """
+        if abs(rms_fgp-self.rms_fgp_exp) > self.tolerance:
+            raise TypeError('FGP-TV (2D, GPU) test FAILED')
+        else:
+            print ("test PASSED")
+        """
+        self.assertTrue(res)
+if __name__ == '__main__':
+    unittest.main()
diff --git a/Wrappers/Python/conda-recipe/testLena.npy b/Wrappers/Python/conda-recipe/testLena.npy
new file mode 100644
index 0000000..14bc0e3
Binary files /dev/null and b/Wrappers/Python/conda-recipe/testLena.npy differ
diff --git a/Wrappers/Python/demos/demo_cpu_regularisers.py b/Wrappers/Python/demos/demo_cpu_regularisers.py
index fd3050c..00beb0b 100644
--- a/Wrappers/Python/demos/demo_cpu_regularisers.py
+++ b/Wrappers/Python/demos/demo_cpu_regularisers.py
@@ -22,6 +22,8 @@ def printParametersToString(pars):
                 txt += "{0} = {1}".format(key, value.__name__)
             elif key == 'input':
                 txt += "{0} = {1}".format(key, np.shape(value))
+            elif key == 'refdata':
+                txt += "{0} = {1}".format(key, np.shape(value))
             else:
                 txt += "{0} = {1}".format(key, value)
             txt += '\n'
@@ -196,7 +198,7 @@ plt.title('{}'.format('CPU results'))
 
 # Uncomment to test 3D regularisation performance 
 #%%
-
+"""
 N = 512
 slices = 20
 
@@ -318,8 +320,8 @@ a.set_title('Noisy Image')
 imgplot = plt.imshow(noisyVol[10,:,:],cmap="gray")
 
 # set parameters
-pars = {'algorithm' : FGP_dTV, \
-        'input' : noisyVol,\ 
+pars = {'algorithm' : FGP_dTV,\
+        'input' : noisyVol,\
         'refdata' : noisyRef,\
         'regularisation_parameter':0.04, \
         'number_of_iterations' :300 ,\
@@ -358,4 +360,5 @@ a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
          verticalalignment='top', bbox=props)
 imgplot = plt.imshow(fgp_dTV_cpu3D[10,:,:], cmap="gray")
 plt.title('{}'.format('Recovered volume on the CPU using FGP-dTV'))
+"""
 #%%
diff --git a/Wrappers/Python/demos/demo_cpu_vs_gpu_regularisers.py b/Wrappers/Python/demos/demo_cpu_vs_gpu_regularisers.py
index aa1f865..310cf75 100644
--- a/Wrappers/Python/demos/demo_cpu_vs_gpu_regularisers.py
+++ b/Wrappers/Python/demos/demo_cpu_vs_gpu_regularisers.py
@@ -22,6 +22,8 @@ def printParametersToString(pars):
                 txt += "{0} = {1}".format(key, value.__name__)
             elif key == 'input':
                 txt += "{0} = {1}".format(key, np.shape(value))
+            elif key == 'refdata':
+                txt += "{0} = {1}".format(key, np.shape(value))
             else:
                 txt += "{0} = {1}".format(key, value)
             txt += '\n'
diff --git a/Wrappers/Python/demos/demo_gpu_regularisers.py b/Wrappers/Python/demos/demo_gpu_regularisers.py
index 4759cc3..24a3c88 100644
--- a/Wrappers/Python/demos/demo_gpu_regularisers.py
+++ b/Wrappers/Python/demos/demo_gpu_regularisers.py
@@ -22,6 +22,8 @@ def printParametersToString(pars):
                 txt += "{0} = {1}".format(key, value.__name__)
             elif key == 'input':
                 txt += "{0} = {1}".format(key, np.shape(value))
+            elif key == 'refdata':
+                txt += "{0} = {1}".format(key, np.shape(value))
             else:
                 txt += "{0} = {1}".format(key, value)
             txt += '\n'
@@ -192,7 +194,7 @@ plt.title('{}'.format('GPU results'))
 
 # Uncomment to test 3D regularisation performance 
 #%%
-
+"""
 N = 512
 slices = 20
 
@@ -314,7 +316,7 @@ imgplot = plt.imshow(noisyVol[10,:,:],cmap="gray")
 
 # set parameters
 pars = {'algorithm' : FGP_dTV, \
-        'input' : noisyVol,\ 
+        'input' : noisyVol,\
         'refdata' : noisyRef,\
         'regularisation_parameter':0.04, \
         'number_of_iterations' :300 ,\
@@ -352,5 +354,5 @@ a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
          verticalalignment='top', bbox=props)
 imgplot = plt.imshow(fgp_dTV_gpu3D[10,:,:], cmap="gray")
 plt.title('{}'.format('Recovered volume on the GPU using FGP-dTV'))
-
+"""
 #%%
diff --git a/Wrappers/Python/src/cpu_regularisers.pyx b/Wrappers/Python/src/cpu_regularisers.pyx
index 8f9185a..1661375 100644
--- a/Wrappers/Python/src/cpu_regularisers.pyx
+++ b/Wrappers/Python/src/cpu_regularisers.pyx
@@ -156,8 +156,8 @@ def dTV_FGP_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData,
     dTV_FGP_CPU_main(&inputData[0,0], &refdata[0,0], &outputData[0,0], regularisation_parameter, 
                        iterationsNumb, 
                        tolerance_param,
-                       methodTV,
                        eta_const,
+                       methodTV,                       
                        nonneg,
                        printM,
                        dims[0], dims[1], 1)
diff --git a/Wrappers/Python/src/gpu_regularisers.pyx b/Wrappers/Python/src/gpu_regularisers.pyx
index 4a14f69..18efdcd 100644
--- a/Wrappers/Python/src/gpu_regularisers.pyx
+++ b/Wrappers/Python/src/gpu_regularisers.pyx
@@ -20,7 +20,7 @@ cimport numpy as np
 
 cdef extern void TV_ROF_GPU_main(float* Input, float* Output, float lambdaPar, int iter, float tau, int N, int M, int Z);
 cdef extern void TV_FGP_GPU_main(float *Input, float *Output, float lambdaPar, int iter, float epsil, int methodTV, int nonneg, int printM, int N, int M, int Z);
-cdef extern void dTV_FGP_CPU_main(float *Input, float *InputRef, float *Output, float lambdaPar, int iterationsNumb, float epsil, float eta, int methodTV, int nonneg, int printM, int N, int M, int Z);
+cdef extern void dTV_FGP_GPU_main(float *Input, float *InputRef, float *Output, float lambdaPar, int iterationsNumb, float epsil, float eta, int methodTV, int nonneg, int printM, int N, int M, int Z);
 
 # Total-variation Rudin-Osher-Fatemi (ROF)
 def TV_ROF_GPU(inputData,
@@ -187,8 +187,7 @@ def FGPTV3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData,
 		    np.zeros([dims[0],dims[1],dims[2]], dtype='float32')
           
     # Running CUDA code here    
-    TV_FGP_GPU_main(            
-            &inputData[0,0,0], &outputData[0,0,0], 
+    TV_FGP_GPU_main(&inputData[0,0,0], &outputData[0,0,0], 
                        regularisation_parameter , 
                        iterations, 
                        tolerance_param,
@@ -204,7 +203,7 @@ def FGPTV3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData,
 #****************************************************************#
 #******** Directional TV Fast-Gradient-Projection (FGP)*********#
 def FGPdTV2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, 
-             np.ndarray[np.float32_t, ndim=3, mode="c"] refdata,
+             np.ndarray[np.float32_t, ndim=2, mode="c"] refdata,
                      float regularisation_parameter,
                      int iterations, 
                      float tolerance_param,
-- 
cgit v1.2.3