From e53d631a2d0c34915459028e3db64153c3a936c3 Mon Sep 17 00:00:00 2001
From: Daniil Kazantsev <dkazanc@hotmail.com>
Date: Wed, 23 May 2018 15:41:35 +0100
Subject: TGV for CPU and GPU added with demos

---
 Core/CMakeLists.txt                                |   3 +-
 Core/regularisers_CPU/SB_TV_core.h                 |  56 +++-
 Core/regularisers_CPU/TGV_core.c                   | 251 ++++++++++++++++
 Core/regularisers_CPU/TGV_core.h                   |  61 ++++
 Core/regularisers_GPU/TGV_GPU_core.cu              | 323 +++++++++++++++++++++
 Core/regularisers_GPU/TGV_GPU_core.h               |   8 +
 Readme.md                                          |  23 +-
 Wrappers/Matlab/demos/demoMatlab_denoise.m         |  42 ++-
 Wrappers/Matlab/mex_compile/compileCPU_mex_Linux.m |   5 +-
 .../Matlab/mex_compile/compileCPU_mex_WINDOWS.m    |   9 +-
 Wrappers/Matlab/mex_compile/compileGPU_mex.m       |   6 +-
 Wrappers/Matlab/mex_compile/regularisers_CPU/TGV.c |  78 +++++
 .../mex_compile/regularisers_GPU/TGV_GPU.cpp       |  78 +++++
 Wrappers/Python/ccpi/filters/regularisers.py       |  24 +-
 Wrappers/Python/demos/demo_cpu_regularisers.py     |  67 ++++-
 .../Python/demos/demo_cpu_vs_gpu_regularisers.py   | 106 ++++++-
 Wrappers/Python/demos/demo_gpu_regularisers.py     |  70 ++++-
 Wrappers/Python/setup-regularisers.py.in           |   1 +
 Wrappers/Python/src/cpu_regularisers.pyx           |  34 +++
 Wrappers/Python/src/gpu_regularisers.pyx           |  34 +++
 20 files changed, 1214 insertions(+), 65 deletions(-)
 create mode 100644 Core/regularisers_CPU/TGV_core.c
 create mode 100644 Core/regularisers_CPU/TGV_core.h
 create mode 100644 Core/regularisers_GPU/TGV_GPU_core.cu
 create mode 100644 Core/regularisers_GPU/TGV_GPU_core.h
 create mode 100644 Wrappers/Matlab/mex_compile/regularisers_CPU/TGV.c
 create mode 100644 Wrappers/Matlab/mex_compile/regularisers_GPU/TGV_GPU.cpp

diff --git a/Core/CMakeLists.txt b/Core/CMakeLists.txt
index 7529d72..f6cb3af 100644
--- a/Core/CMakeLists.txt
+++ b/Core/CMakeLists.txt
@@ -76,11 +76,11 @@ message("Adding regularisers as a shared library")
 add_library(cilreg SHARED
 	    ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_CPU/FGP_TV_core.c
 	    ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_CPU/SB_TV_core.c
+	    ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_CPU/TGV_core.c
 	    ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_CPU/Diffusion_core.c
 	    ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_CPU/Diffus4th_order_core.c
 	    #${CMAKE_CURRENT_SOURCE_DIR}/regularisers_CPU/LLT_model_core.c
 	    #${CMAKE_CURRENT_SOURCE_DIR}/regularisers_CPU/PatchBased_Regul_core.c
-	    #${CMAKE_CURRENT_SOURCE_DIR}/regularisers_CPU/TGV_PD_core.c
         ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_CPU/ROF_TV_core.c
         ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_CPU/FGP_dTV_core.c
         ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_CPU/TNV_core.c
@@ -128,6 +128,7 @@ if (CUDA_FOUND)
 	${CMAKE_CURRENT_SOURCE_DIR}/regularisers_GPU/TV_ROF_GPU_core.cu
 	${CMAKE_CURRENT_SOURCE_DIR}/regularisers_GPU/TV_FGP_GPU_core.cu
 	${CMAKE_CURRENT_SOURCE_DIR}/regularisers_GPU/TV_SB_GPU_core.cu
+	${CMAKE_CURRENT_SOURCE_DIR}/regularisers_GPU/TGV_GPU_core.cu
 	${CMAKE_CURRENT_SOURCE_DIR}/regularisers_GPU/dTV_FGP_GPU_core.cu
 	${CMAKE_CURRENT_SOURCE_DIR}/regularisers_GPU/NonlDiff_GPU_core.cu
 	${CMAKE_CURRENT_SOURCE_DIR}/regularisers_GPU/Diffus_4thO_GPU_core.cu
diff --git a/Core/regularisers_CPU/SB_TV_core.h b/Core/regularisers_CPU/SB_TV_core.h
index 791d951..ad4f529 100755
--- a/Core/regularisers_CPU/SB_TV_core.h
+++ b/Core/regularisers_CPU/SB_TV_core.h
@@ -1,10 +1,3 @@
-#include <math.h>
-#include <stdlib.h>
-#include <memory.h>
-#include <stdio.h>
-#include "omp.h"
-#include "utils.h"
-
 /*
 This work is part of the Core Imaging Library developed by
 Visual Analytics and Imaging System Group of the Science Technology
@@ -24,14 +17,45 @@ See the License for the specific language governing permissions and
 limitations under the License.
 */
 
-float SB_TV_CPU_main(float *Input, float *Output, float mu, int iter, float epsil, int methodTV, int printM, int dimX, int dimY, int dimZ);
+#include <math.h>
+#include <stdlib.h>
+#include <memory.h>
+#include <stdio.h>
+#include "omp.h"
+#include "utils.h"
+#include "CCPiDefines.h"
+
+
+/* C-OMP implementation of Split Bregman - TV denoising-regularisation model (2D/3D) [1]
+*
+* Input Parameters:
+* 1. Noisy image/volume
+* 2. lambda - regularisation parameter
+* 3. Number of iterations [OPTIONAL parameter]
+* 4. eplsilon - tolerance constant [OPTIONAL parameter]
+* 5. TV-type: 'iso' or 'l1' [OPTIONAL parameter]
+* 6. print information: 0 (off) or 1 (on)  [OPTIONAL parameter]
+*
+* Output:
+* 1. Filtered/regularized image
+*
+* [1]. Goldstein, T. and Osher, S., 2009. The split Bregman method for L1-regularized problems. SIAM journal on imaging sciences, 2(2), pp.323-343.
+*/
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+CCPI_EXPORT float SB_TV_CPU_main(float *Input, float *Output, float mu, int iter, float epsil, int methodTV, int printM, int dimX, int dimY, int dimZ);
 
-float gauss_seidel2D(float *U, float *A, float *U_prev, float *Dx, float *Dy, float *Bx, float *By, int dimX, int dimY, float lambda, float mu);
-float updDxDy_shrinkAniso2D(float *U, float *Dx, float *Dy, float *Bx, float *By, int dimX, int dimY, float lambda);
-float updDxDy_shrinkIso2D(float *U, float *Dx, float *Dy, float *Bx, float *By, int dimX, int dimY, float lambda);
-float updBxBy2D(float *U, float *Dx, float *Dy, float *Bx, float *By, int dimX, int dimY);
+CCPI_EXPORT float gauss_seidel2D(float *U, float *A, float *U_prev, float *Dx, float *Dy, float *Bx, float *By, int dimX, int dimY, float lambda, float mu);
+CCPI_EXPORT float updDxDy_shrinkAniso2D(float *U, float *Dx, float *Dy, float *Bx, float *By, int dimX, int dimY, float lambda);
+CCPI_EXPORT float updDxDy_shrinkIso2D(float *U, float *Dx, float *Dy, float *Bx, float *By, int dimX, int dimY, float lambda);
+CCPI_EXPORT float updBxBy2D(float *U, float *Dx, float *Dy, float *Bx, float *By, int dimX, int dimY);
 
-float gauss_seidel3D(float *U, float *A, float *U_prev, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, int dimX, int dimY, int dimZ, float lambda, float mu);
-float updDxDyDz_shrinkAniso3D(float *U, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, int dimX, int dimY, int dimZ, float lambda);
-float updDxDyDz_shrinkIso3D(float *U, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, int dimX, int dimY, int dimZ, float lambda);
-float updBxByBz3D(float *U, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, int dimX, int dimY, int dimZ);
+CCPI_EXPORT float gauss_seidel3D(float *U, float *A, float *U_prev, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, int dimX, int dimY, int dimZ, float lambda, float mu);
+CCPI_EXPORT float updDxDyDz_shrinkAniso3D(float *U, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, int dimX, int dimY, int dimZ, float lambda);
+CCPI_EXPORT float updDxDyDz_shrinkIso3D(float *U, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, int dimX, int dimY, int dimZ, float lambda);
+CCPI_EXPORT float updBxByBz3D(float *U, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, int dimX, int dimY, int dimZ);
+#ifdef __cplusplus
+}
+#endif
diff --git a/Core/regularisers_CPU/TGV_core.c b/Core/regularisers_CPU/TGV_core.c
new file mode 100644
index 0000000..db88614
--- /dev/null
+++ b/Core/regularisers_CPU/TGV_core.c
@@ -0,0 +1,251 @@
+/*
+This work is part of the Core Imaging Library developed by
+Visual Analytics and Imaging System Group of the Science Technology
+Facilities Council, STFC
+
+Copyright 2017 Daniil Kazantsev
+Copyright 2017 Srikanth Nagella, Edoardo Pasca
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#include "TGV_core.h"
+
+/* C-OMP implementation of Primal-Dual denoising method for 
+ * Total Generilized Variation (TGV)-L2 model [1] (2D case only)
+ *
+ * Input Parameters:
+ * 1. Noisy image (2D)
+ * 2. lambda - regularisation parameter
+ * 3. parameter to control the first-order term (alpha1)
+ * 4. parameter to control the second-order term (alpha0)
+ * 5. Number of Chambolle-Pock (Primal-Dual) iterations
+ * 6. Lipshitz constant (default is 12)
+ *
+ * Output:
+ * Filtered/regulariaed image 
+ *
+ * References:
+ * [1] K. Bredies "Total Generalized Variation"
+ */
+ 
+float TGV_main(float *U0, float *U, float lambda, float alpha1, float alpha0, int iter, float L2, int dimX, int dimY)
+{
+	int DimTotal, ll;
+	float *U_old, *P1, *P2, *Q1, *Q2, *Q3, *V1, *V1_old, *V2, *V2_old, tau, sigma;
+		
+		DimTotal = dimX*dimY;
+        
+        /* dual variables */
+        P1 = calloc(DimTotal, sizeof(float));
+        P2 = calloc(DimTotal, sizeof(float));
+        
+        Q1 = calloc(DimTotal, sizeof(float));
+        Q2 = calloc(DimTotal, sizeof(float));
+        Q3 = calloc(DimTotal, sizeof(float));
+        
+        U_old = calloc(DimTotal, sizeof(float));
+        
+        V1 = calloc(DimTotal, sizeof(float));
+        V1_old = calloc(DimTotal, sizeof(float));
+        V2 = calloc(DimTotal, sizeof(float));
+        V2_old = calloc(DimTotal, sizeof(float));
+        
+        copyIm(U0, U, dimX, dimY, 1); /* initialize  */
+       
+        tau = pow(L2,-0.5);
+        sigma = pow(L2,-0.5);
+    
+        /* Primal-dual iterations begin here */
+        for(ll = 0; ll < iter; ll++) {
+            
+            /* Calculate Dual Variable P */
+            DualP_2D(U, V1, V2, P1, P2, dimX, dimY, sigma);
+            
+            /*Projection onto convex set for P*/
+            ProjP_2D(P1, P2, dimX, dimY, alpha1);
+            
+            /* Calculate Dual Variable Q */
+            DualQ_2D(V1, V2, Q1, Q2, Q3, dimX, dimY, sigma);
+            
+            /*Projection onto convex set for Q*/
+            ProjQ_2D(Q1, Q2, Q3, dimX, dimY, alpha0);
+            
+            /*saving U into U_old*/
+            copyIm(U, U_old, dimX, dimY, 1);
+            
+            /*adjoint operation  -> divergence and projection of P*/
+            DivProjP_2D(U, U0, P1, P2, dimX, dimY, lambda, tau);
+            
+            /*get updated solution U*/
+            newU(U, U_old, dimX, dimY);
+            
+            /*saving V into V_old*/
+            copyIm(V1, V1_old, dimX, dimY, 1);
+            copyIm(V2, V2_old, dimX, dimY, 1);
+            
+            /* upd V*/
+            UpdV_2D(V1, V2, P1, P2, Q1, Q2, Q3, dimX, dimY, tau);
+            
+            /*get new V*/
+            newU(V1, V1_old, dimX, dimY);
+            newU(V2, V2_old, dimX, dimY);
+        } /*end of iterations*/
+	return *U;
+}
+
+/********************************************************************/
+/***************************2D Functions*****************************/
+/********************************************************************/
+
+/*Calculating dual variable P (using forward differences)*/
+float DualP_2D(float *U, float *V1, float *V2, float *P1, float *P2, int dimX, int dimY, float sigma)
+{
+    int i,j, index;
+#pragma omp parallel for shared(U,V1,V2,P1,P2) private(i,j,index)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+			 index = j*dimX+i;
+            /* symmetric boundary conditions (Neuman) */
+            if (i == dimX-1) P1[index] += sigma*((U[j*dimX+(i-1)] - U[index]) - V1[index]); 
+            else P1[index] += sigma*((U[j*dimX+(i+1)] - U[index])  - V1[index]); 
+            if (j == dimY-1) P2[index] += sigma*((U[(j-1)*dimX+i] - U[index])  - V2[index]);
+            else  P2[index] += sigma*((U[(j+1)*dimX+i] - U[index])  - V2[index]);
+        }}
+    return 1;
+}
+/*Projection onto convex set for P*/
+float ProjP_2D(float *P1, float *P2, int dimX, int dimY, float alpha1)
+{
+    float grad_magn;
+    int i,j,index;
+#pragma omp parallel for shared(P1,P2) private(i,j,index,grad_magn)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+			index = j*dimX+i;
+            grad_magn = sqrt(pow(P1[index],2) + pow(P2[index],2));
+            grad_magn = grad_magn/alpha1;
+            if (grad_magn > 1.0) {
+                P1[index] /= grad_magn;
+                P2[index] /= grad_magn;
+            }
+        }}
+    return 1;
+}
+/*Calculating dual variable Q (using forward differences)*/
+float DualQ_2D(float *V1, float *V2, float *Q1, float *Q2, float *Q3, int dimX, int dimY, float sigma)
+{
+    int i,j,index;
+    float q1, q2, q11, q22;
+#pragma omp parallel for shared(Q1,Q2,Q3,V1,V2) private(i,j,index,q1,q2,q11,q22)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+			index = j*dimX+i;
+            /* symmetric boundary conditions (Neuman) */
+            if (i == dimX-1)
+            { q1 = (V1[j*dimX+(i-1)] - V1[index]);
+              q11 = (V2[j*dimX+(i-1)] - V2[index]);
+            }
+            else {
+                q1 = (V1[j*dimX+(i+1)] - V1[index]);
+                q11 = (V2[j*dimX+(i+1)] - V2[index]);
+            }
+            if (j == dimY-1) {
+                q2 = (V2[(j-1)*dimX+i] - V2[index]);
+                q22 = (V1[(j-1)*dimX+i] - V1[index]);
+            }
+            else {
+                q2 = V2[(j+1)*dimX+i] - V2[index];
+                q22 = V1[(j+1)*dimX+i] - V1[index];
+            }
+            Q1[index] += sigma*(q1);
+            Q2[index] += sigma*(q2);
+            Q3[index] += sigma*(0.5f*(q11 + q22));
+        }}
+    return 1;
+}
+float ProjQ_2D(float *Q1, float *Q2, float *Q3, int dimX, int dimY, float alpha0)
+{
+    float grad_magn;
+    int i,j,index;
+#pragma omp parallel for shared(Q1,Q2,Q3) private(i,j,index,grad_magn)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+			index = j*dimX+i;
+            grad_magn = sqrt(pow(Q1[index],2) + pow(Q2[index],2) + 2*pow(Q3[index],2));
+            grad_magn = grad_magn/alpha0;
+            if (grad_magn > 1.0) {
+                Q1[index] /= grad_magn;
+                Q2[index] /= grad_magn;
+                Q3[index] /= grad_magn;
+            }
+        }}
+    return 1;
+}
+/* Divergence and projection for P*/
+float DivProjP_2D(float *U, float *U0, float *P1, float *P2, int dimX, int dimY, float lambda, float tau)
+{
+    int i,j,index;
+    float P_v1, P_v2, div;
+#pragma omp parallel for shared(U,U0,P1,P2) private(i,j,index,P_v1,P_v2,div)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+			index = j*dimX+i;
+            if (i == 0) P_v1 = P1[index];
+            else P_v1 = P1[index] - P1[j*dimX+(i-1)];
+            if (j == 0) P_v2 = P2[index];
+            else  P_v2 = P2[index] - P2[(j-1)*dimX+i];
+            div = P_v1 + P_v2;
+            U[index] = (lambda*(U[index] + tau*div) + tau*U0[index])/(lambda + tau);
+        }}
+    return *U;
+}
+/*get updated solution U*/
+float newU(float *U, float *U_old, int dimX, int dimY)
+{
+    int i;
+#pragma omp parallel for shared(U,U_old) private(i)
+    for(i=0; i<dimX*dimY; i++) U[i] = 2*U[i] - U_old[i];
+    return *U;
+}
+/*get update for V*/
+float UpdV_2D(float *V1, float *V2, float *P1, float *P2, float *Q1, float *Q2, float *Q3, int dimX, int dimY, float tau)
+{
+    int i, j, index;
+    float q1, q11, q2, q22, div1, div2;
+#pragma omp parallel for shared(V1,V2,P1,P2,Q1,Q2,Q3) private(i, j, index, q1, q11, q2, q22, div1, div2)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+			index = j*dimX+i;
+            /* symmetric boundary conditions (Neuman) */
+            if (i == 0) {
+                q1 = Q1[index];
+                q11 = Q3[index];
+            }
+            else {
+                q1 = Q1[index] - Q1[j*dimX+(i-1)];
+                q11 = Q3[index] - Q3[j*dimX+(i-1)];
+            }
+            if (j == 0) {
+                q2 = Q2[index];
+                q22 = Q3[index];
+            }
+            else  {
+                q2 = Q2[index] - Q2[(j-1)*dimX+i];
+                q22 = Q3[index] - Q3[(j-1)*dimX+i];
+            }
+            div1 = q1 + q22;
+            div2 = q2 + q11;
+            V1[index] += tau*(P1[index] + div1);
+            V2[index] += tau*(P2[index] + div2);
+        }}
+    return 1;
+}
diff --git a/Core/regularisers_CPU/TGV_core.h b/Core/regularisers_CPU/TGV_core.h
new file mode 100644
index 0000000..d5632b9
--- /dev/null
+++ b/Core/regularisers_CPU/TGV_core.h
@@ -0,0 +1,61 @@
+/*
+This work is part of the Core Imaging Library developed by
+Visual Analytics and Imaging System Group of the Science Technology
+Facilities Council, STFC
+
+Copyright 2017 Daniil Kazantsev
+Copyright 2017 Srikanth Nagella, Edoardo Pasca
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#include <math.h>
+#include <stdlib.h>
+#include <memory.h>
+#include <stdio.h>
+#include "omp.h"
+#include "utils.h"
+#include "CCPiDefines.h"
+
+/* C-OMP implementation of Primal-Dual denoising method for 
+ * Total Generilized Variation (TGV)-L2 model [1] (2D case only)
+ *
+ * Input Parameters:
+ * 1. Noisy image (2D)
+ * 2. lambda - regularisation parameter
+ * 3. parameter to control the first-order term (alpha1)
+ * 4. parameter to control the second-order term (alpha0)
+ * 5. Number of Chambolle-Pock (Primal-Dual) iterations
+ * 6. Lipshitz constant (default is 12)
+ * 
+ * Output:
+ * Filtered/regulariaed image 
+ *
+ * References:
+ * [1] K. Bredies "Total Generalized Variation"
+ */
+ 
+ 
+#ifdef __cplusplus
+extern "C" {
+#endif
+/* 2D functions */
+CCPI_EXPORT float TGV_main(float *U0, float *U, float lambda, float alpha1, float alpha0, int iter, float L2, int dimX, int dimY);
+CCPI_EXPORT float DualP_2D(float *U, float *V1, float *V2, float *P1, float *P2, int dimX, int dimY, float sigma);
+CCPI_EXPORT float ProjP_2D(float *P1, float *P2, int dimX, int dimY, float alpha1);
+CCPI_EXPORT float DualQ_2D(float *V1, float *V2, float *Q1, float *Q2, float *Q3, int dimX, int dimY, float sigma);
+CCPI_EXPORT float ProjQ_2D(float *Q1, float *Q2, float *Q3, int dimX, int dimY, float alpha0);
+CCPI_EXPORT float DivProjP_2D(float *U, float *U0, float *P1, float *P2, int dimX, int dimY, float lambda, float tau);
+CCPI_EXPORT float UpdV_2D(float *V1, float *V2, float *P1, float *P2, float *Q1, float *Q2, float *Q3, int dimX, int dimY, float tau);
+CCPI_EXPORT float newU(float *U, float *U_old, int dimX, int dimY);
+#ifdef __cplusplus
+}
+#endif
diff --git a/Core/regularisers_GPU/TGV_GPU_core.cu b/Core/regularisers_GPU/TGV_GPU_core.cu
new file mode 100644
index 0000000..3081011
--- /dev/null
+++ b/Core/regularisers_GPU/TGV_GPU_core.cu
@@ -0,0 +1,323 @@
+ /*
+This work is part of the Core Imaging Library developed by
+Visual Analytics and Imaging System Group of the Science Technology
+Facilities Council, STFC
+
+Copyright 2017 Daniil Kazantsev
+Copyright 2017 Srikanth Nagella, Edoardo Pasca
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/ 
+
+#include "TGV_GPU_core.h"
+
+/* CUDA implementation of Primal-Dual denoising method for 
+ * Total Generilized Variation (TGV)-L2 model [1] (2D case only)
+ *
+ * Input Parameters:
+ * 1. Noisy image (2D)
+ * 2. lambda - regularisation parameter
+ * 3. parameter to control the first-order term (alpha1)
+ * 4. parameter to control the second-order term (alpha0)
+ * 5. Number of Chambolle-Pock (Primal-Dual) iterations
+ * 6. Lipshitz constant (default is 12)
+ *
+ * Output:
+ * Filtered/regulariaed image 
+ *
+ * References:
+ * [1] K. Bredies "Total Generalized Variation"
+ */
+
+#define CHECK(call)                                                            \
+{                                                                              \
+    const cudaError_t error = call;                                            \
+    if (error != cudaSuccess)                                                  \
+    {                                                                          \
+        fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__);                 \
+        fprintf(stderr, "code: %d, reason: %s\n", error,                       \
+                cudaGetErrorString(error));                                    \
+        exit(1);                                                               \
+    }                                                                          \
+}
+    
+    
+#define BLKXSIZE2D 16
+#define BLKYSIZE2D 16
+#define EPS 1.0e-7
+#define idivup(a, b) ( ((a)%(b) != 0) ? (a)/(b)+1 : (a)/(b) )
+
+
+/********************************************************************/
+/***************************2D Functions*****************************/
+/********************************************************************/
+__global__ void DualP_2D_kernel(float *U, float *V1, float *V2, float *P1, float *P2, int dimX, int dimY, float sigma)
+{
+    
+		int i = blockDim.x * blockIdx.x + threadIdx.x;
+        int j = blockDim.y * blockIdx.y + threadIdx.y;
+        
+        int index = i + dimX*j;
+        
+        if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY)) {
+            /* symmetric boundary conditions (Neuman) */
+            if (i == dimX-1) P1[index] += sigma*((U[j*dimX+(i-1)] - U[index]) - V1[index]); 
+            else P1[index] += sigma*((U[j*dimX+(i+1)] - U[index])  - V1[index]); 
+            if (j == dimY-1) P2[index] += sigma*((U[(j-1)*dimX+i] - U[index])  - V2[index]);
+            else  P2[index] += sigma*((U[(j+1)*dimX+i] - U[index])  - V2[index]);
+		}
+	return;
+} 
+
+__global__ void ProjP_2D_kernel(float *P1, float *P2, int dimX, int dimY, float alpha1)
+{
+    float grad_magn;
+
+		int i = blockDim.x * blockIdx.x + threadIdx.x;
+        int j = blockDim.y * blockIdx.y + threadIdx.y;
+        
+        int index = i + dimX*j;
+        
+        if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY)) {
+            
+            grad_magn = sqrt(pow(P1[index],2) + pow(P2[index],2));
+            grad_magn = grad_magn/alpha1;
+            if (grad_magn > 1.0) {
+                P1[index] /= grad_magn;
+                P2[index] /= grad_magn;
+            }
+		}
+	return;
+} 
+
+__global__ void DualQ_2D_kernel(float *V1, float *V2, float *Q1, float *Q2, float *Q3, int dimX, int dimY, float sigma)
+{
+        float q1, q2, q11, q22;
+
+		int i = blockDim.x * blockIdx.x + threadIdx.x;
+        int j = blockDim.y * blockIdx.y + threadIdx.y;
+        
+        int index = i + dimX*j;
+        
+        if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY)) {
+            
+            /* symmetric boundary conditions (Neuman) */
+            if (i == dimX-1)
+            { q1 = (V1[j*dimX+(i-1)] - V1[index]);
+              q11 = (V2[j*dimX+(i-1)] - V2[index]);
+            }
+            else {
+                q1 = (V1[j*dimX+(i+1)] - V1[index]);
+                q11 = (V2[j*dimX+(i+1)] - V2[index]);
+            }
+            if (j == dimY-1) {
+                q2 = (V2[(j-1)*dimX+i] - V2[index]);
+                q22 = (V1[(j-1)*dimX+i] - V1[index]);
+            }
+            else {
+                q2 = V2[(j+1)*dimX+i] - V2[index];
+                q22 = V1[(j+1)*dimX+i] - V1[index];
+            }
+            Q1[index] += sigma*(q1);
+            Q2[index] += sigma*(q2);
+            Q3[index] += sigma*(0.5f*(q11 + q22));
+		}
+	return;
+} 
+
+__global__ void ProjQ_2D_kernel(float *Q1, float *Q2, float *Q3, int dimX, int dimY, float alpha0)
+{
+    float grad_magn;
+
+		int i = blockDim.x * blockIdx.x + threadIdx.x;
+        int j = blockDim.y * blockIdx.y + threadIdx.y;
+        
+        int index = i + dimX*j;
+        
+        if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY)) {
+            
+            grad_magn = sqrt(pow(Q1[index],2) + pow(Q2[index],2) + 2*pow(Q3[index],2));
+            grad_magn = grad_magn/alpha0;
+            if (grad_magn > 1.0) {
+                Q1[index] /= grad_magn;
+                Q2[index] /= grad_magn;
+                Q3[index] /= grad_magn;
+            }
+		}
+	return;
+} 
+
+__global__ void DivProjP_2D_kernel(float *U, float *U0, float *P1, float *P2, int dimX, int dimY, float lambda, float tau)
+{
+		float P_v1, P_v2, div;
+
+		int i = blockDim.x * blockIdx.x + threadIdx.x;
+        int j = blockDim.y * blockIdx.y + threadIdx.y;
+        
+        int index = i + dimX*j;
+        
+        if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY)) {
+			
+            if (i == 0) P_v1 = P1[index];
+            else P_v1 = P1[index] - P1[j*dimX+(i-1)];
+            if (j == 0) P_v2 = P2[index];
+            else  P_v2 = P2[index] - P2[(j-1)*dimX+i];
+            div = P_v1 + P_v2;
+            U[index] = (lambda*(U[index] + tau*div) + tau*U0[index])/(lambda + tau);
+		}
+	return;
+} 
+
+__global__ void UpdV_2D_kernel(float *V1, float *V2, float *P1, float *P2, float *Q1, float *Q2, float *Q3, int dimX, int dimY, float tau)
+{
+		float q1, q11, q2, q22, div1, div2;
+
+		int i = blockDim.x * blockIdx.x + threadIdx.x;
+        int j = blockDim.y * blockIdx.y + threadIdx.y;
+        
+        int index = i + dimX*j;
+        
+        if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY)) {
+			
+   /* symmetric boundary conditions (Neuman) */
+            if (i == 0) {
+                q1 = Q1[index];
+                q11 = Q3[index];
+            }
+            else {
+                q1 = Q1[index] - Q1[j*dimX+(i-1)];
+                q11 = Q3[index] - Q3[j*dimX+(i-1)];
+            }
+            if (j == 0) {
+                q2 = Q2[index];
+                q22 = Q3[index];
+            }
+            else  {
+                q2 = Q2[index] - Q2[(j-1)*dimX+i];
+                q22 = Q3[index] - Q3[(j-1)*dimX+i];
+            }
+            div1 = q1 + q22;
+            div2 = q2 + q11;
+            V1[index] += tau*(P1[index] + div1);
+            V2[index] += tau*(P2[index] + div2);
+		}
+	return;
+} 
+
+__global__ void copyIm_TGV_kernel(float *Input, float* Output, int N, int M, int num_total)
+{
+    int xIndex = blockDim.x * blockIdx.x + threadIdx.x;
+    int yIndex = blockDim.y * blockIdx.y + threadIdx.y;
+    
+    int index = xIndex + N*yIndex;
+    
+    if (index < num_total)	{
+        Output[index] = Input[index];
+    }
+}
+
+__global__ void newU_kernel(float *U, float *U_old, int N, int M, int num_total)
+{
+    int xIndex = blockDim.x * blockIdx.x + threadIdx.x;
+    int yIndex = blockDim.y * blockIdx.y + threadIdx.y;
+    
+    int index = xIndex + N*yIndex;
+    
+    if (index < num_total)	{
+        U[index] = 2.0f*U[index] - U_old[index]; 
+    }
+}
+
+/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
+/********************* MAIN HOST FUNCTION ******************/
+/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
+extern "C" void TGV_GPU_main(float *U0, float *U, float lambda, float alpha1, float alpha0, int iterationsNumb, float L2, int dimX, int dimY)
+{
+		int dimTotal, dev = 0;
+		CHECK(cudaSetDevice(dev));
+		dimTotal = dimX*dimY;
+       
+        float *U_old, *d_U0, *d_U, *P1, *P2, *Q1, *Q2, *Q3, *V1, *V1_old, *V2, *V2_old, tau, sigma;
+        tau = pow(L2,-0.5);
+        sigma = pow(L2,-0.5);
+                                      
+        CHECK(cudaMalloc((void**)&d_U0,dimTotal*sizeof(float)));
+        CHECK(cudaMalloc((void**)&d_U,dimTotal*sizeof(float)));
+        CHECK(cudaMalloc((void**)&U_old,dimTotal*sizeof(float)));
+        CHECK(cudaMalloc((void**)&P1,dimTotal*sizeof(float)));
+        CHECK(cudaMalloc((void**)&P2,dimTotal*sizeof(float)));
+        
+        CHECK(cudaMalloc((void**)&Q1,dimTotal*sizeof(float)));
+        CHECK(cudaMalloc((void**)&Q2,dimTotal*sizeof(float)));
+        CHECK(cudaMalloc((void**)&Q3,dimTotal*sizeof(float)));
+        CHECK(cudaMalloc((void**)&V1,dimTotal*sizeof(float)));
+        CHECK(cudaMalloc((void**)&V2,dimTotal*sizeof(float)));
+        CHECK(cudaMalloc((void**)&V1_old,dimTotal*sizeof(float)));
+        CHECK(cudaMalloc((void**)&V2_old,dimTotal*sizeof(float)));
+        
+        CHECK(cudaMemcpy(d_U0,U0,dimTotal*sizeof(float),cudaMemcpyHostToDevice));
+        CHECK(cudaMemcpy(d_U,U0,dimTotal*sizeof(float),cudaMemcpyHostToDevice));      
+        
+	     /*2D case */
+        dim3 dimBlock(BLKXSIZE2D,BLKYSIZE2D);
+        dim3 dimGrid(idivup(dimX,BLKXSIZE2D), idivup(dimY,BLKYSIZE2D));
+             
+        for(int n=0; n < iterationsNumb; n++) {
+			
+		    /* Calculate Dual Variable P */
+            DualP_2D_kernel<<<dimGrid,dimBlock>>>(d_U, V1, V2, P1, P2, dimX, dimY, sigma);
+			CHECK(cudaDeviceSynchronize());
+			/*Projection onto convex set for P*/
+            ProjP_2D_kernel<<<dimGrid,dimBlock>>>(P1, P2, dimX, dimY, alpha1);
+            CHECK(cudaDeviceSynchronize());
+            /* Calculate Dual Variable Q */
+            DualQ_2D_kernel<<<dimGrid,dimBlock>>>(V1, V2, Q1, Q2, Q3, dimX, dimY, sigma);
+            CHECK(cudaDeviceSynchronize());
+             /*Projection onto convex set for Q*/
+            ProjQ_2D_kernel<<<dimGrid,dimBlock>>>(Q1, Q2, Q3, dimX, dimY, alpha0);
+            CHECK(cudaDeviceSynchronize());
+            /*saving U into U_old*/
+            copyIm_TGV_kernel<<<dimGrid,dimBlock>>>(d_U, U_old, dimX, dimY, dimTotal);
+            CHECK(cudaDeviceSynchronize());
+            /*adjoint operation  -> divergence and projection of P*/
+            DivProjP_2D_kernel<<<dimGrid,dimBlock>>>(d_U, d_U0, P1, P2, dimX, dimY, lambda, tau);
+            CHECK(cudaDeviceSynchronize());
+            /*get updated solution U*/
+            newU_kernel<<<dimGrid,dimBlock>>>(d_U, U_old, dimX, dimY, dimTotal);
+            CHECK(cudaDeviceSynchronize());
+            /*saving V into V_old*/
+            copyIm_TGV_kernel<<<dimGrid,dimBlock>>>(V1, V1_old, dimX, dimY, dimTotal);
+            copyIm_TGV_kernel<<<dimGrid,dimBlock>>>(V2, V2_old, dimX, dimY, dimTotal);
+            CHECK(cudaDeviceSynchronize());
+            /* upd V*/
+            UpdV_2D_kernel<<<dimGrid,dimBlock>>>(V1, V2, P1, P2, Q1, Q2, Q3, dimX, dimY, tau);
+            CHECK(cudaDeviceSynchronize());
+            /*get new V*/
+            newU_kernel<<<dimGrid,dimBlock>>>(V1, V1_old, dimX, dimY, dimTotal);
+            newU_kernel<<<dimGrid,dimBlock>>>(V2, V2_old, dimX, dimY, dimTotal);
+            CHECK(cudaDeviceSynchronize());            
+        }
+
+        CHECK(cudaMemcpy(U,d_U,dimTotal*sizeof(float),cudaMemcpyDeviceToHost));
+        CHECK(cudaFree(d_U0));
+        CHECK(cudaFree(d_U));
+        CHECK(cudaFree(U_old));
+        CHECK(cudaFree(P1));
+        CHECK(cudaFree(P2));
+        
+        CHECK(cudaFree(Q1));
+        CHECK(cudaFree(Q2));
+        CHECK(cudaFree(Q3));
+        CHECK(cudaFree(V1));
+        CHECK(cudaFree(V2));
+        CHECK(cudaFree(V1_old));
+        CHECK(cudaFree(V2_old));
+}
diff --git a/Core/regularisers_GPU/TGV_GPU_core.h b/Core/regularisers_GPU/TGV_GPU_core.h
new file mode 100644
index 0000000..663378f
--- /dev/null
+++ b/Core/regularisers_GPU/TGV_GPU_core.h
@@ -0,0 +1,8 @@
+#ifndef __TGV_GPU_H__
+#define __TGV_GPU_H__
+#include "CCPiDefines.h"
+#include <stdio.h>
+
+extern "C" CCPI_EXPORT void TGV_GPU_main(float *U0, float *U, float lambda, float alpha1, float alpha0, int iterationsNumb, float L2, int dimX, int dimY);
+
+#endif 
diff --git a/Readme.md b/Readme.md
index 356cacf..f3076e1 100644
--- a/Readme.md
+++ b/Readme.md
@@ -19,15 +19,16 @@
 1. Rudin-Osher-Fatemi (ROF) Total Variation (explicit PDE minimisation scheme) **2D/3D CPU/GPU** (Ref. *1*)
 2. Fast-Gradient-Projection (FGP) Total Variation **2D/3D CPU/GPU** (Ref. *2*)
 3. Split-Bregman (SB) Total Variation **2D/3D CPU/GPU** (Ref. *5*)
-4. Linear and nonlinear diffusion (explicit PDE minimisation scheme) **2D/3D CPU/GPU** (Ref. *7*)
-5. Anisotropic Fourth-Order Diffusion (explicit PDE minimisation) **2D/3D CPU/GPU** (Ref. *8*)
+4. Total Generilised Variation (TGV) model **2D CPU/GPU** (Ref. *6*)
+5. Linear and nonlinear diffusion (explicit PDE minimisation scheme) **2D/3D CPU/GPU** (Ref. *8*)
+6. Anisotropic Fourth-Order Diffusion (explicit PDE minimisation) **2D/3D CPU/GPU** (Ref. *9*)
 
 ### Multi-channel (denoising):
 1. Fast-Gradient-Projection (FGP) Directional Total Variation **2D/3D CPU/GPU** (Ref. *3,4,2*)
-2. Total Nuclear Variation (TNV) penalty **2D+channels CPU** (Ref. *6*)
+2. Total Nuclear Variation (TNV) penalty **2D+channels CPU** (Ref. *7*)
 
 ### Inpainting:
-1. Linear and nonlinear diffusion (explicit PDE minimisation scheme) **2D/3D CPU** (Ref. *7*)
+1. Linear and nonlinear diffusion (explicit PDE minimisation scheme) **2D/3D CPU** (Ref. *8*)
 2. Iterative nonlocal vertical marching method  **2D CPU**
 
 
@@ -35,12 +36,12 @@
 
 ### Python (conda-build)
 ```
-	export CIL_VERSION=0.9.2
+	export CIL_VERSION=0.9.4
 	conda build recipes/regularisers --numpy 1.12 --python 3.5 
-	conda install cil_regulariser=0.9.2 --use-local --force
+	conda install cil_regulariser=0.9.4 --use-local --force
 	cd Wrappers/Python
 	conda build conda-recipe --numpy 1.12 --python 3.5 
-	conda install ccpi-regulariser=0.9.2 --use-local --force
+	conda install ccpi-regulariser=0.9.4 --use-local --force
 	cd demos/
 	python demo_cpu_regularisers.py # to run CPU demo
 	python demo_gpu_regularisers.py # to run GPU demo
@@ -63,11 +64,13 @@
 
 *5. [Goldstein, T. and Osher, S., 2009. The split Bregman method for L1-regularized problems. SIAM journal on imaging sciences, 2(2), pp.323-343.](https://doi.org/10.1137/080725891)*
 
-*6. [Duran, J., Moeller, M., Sbert, C. and Cremers, D., 2016. Collaborative total variation: a general framework for vectorial TV models. SIAM Journal on Imaging Sciences, 9(1), pp.116-151.](https://doi.org/10.1137/15M102873X)*
+*6. [Bredies, K., Kunisch, K. and Pock, T., 2010. Total generalized variation. SIAM Journal on Imaging Sciences, 3(3), pp.492-526.](https://doi.org/10.1137/090769521)
 
-*7. [Black, M.J., Sapiro, G., Marimont, D.H. and Heeger, D., 1998. Robust anisotropic diffusion. IEEE Transactions on image processing, 7(3), pp.421-432.](https://doi.org/10.1109/83.661192)*
+*7. [Duran, J., Moeller, M., Sbert, C. and Cremers, D., 2016. Collaborative total variation: a general framework for vectorial TV models. SIAM Journal on Imaging Sciences, 9(1), pp.116-151.](https://doi.org/10.1137/15M102873X)
 
-*8. [Hajiaboli, M.R., 2011. An anisotropic fourth-order diffusion filter for image noise removal. International Journal of Computer Vision, 92(2), pp.177-191.](https://doi.org/10.1007/s11263-010-0330-1)*
+*8. [Black, M.J., Sapiro, G., Marimont, D.H. and Heeger, D., 1998. Robust anisotropic diffusion. IEEE Transactions on image processing, 7(3), pp.421-432.](https://doi.org/10.1109/83.661192)*
+
+*9. [Hajiaboli, M.R., 2011. An anisotropic fourth-order diffusion filter for image noise removal. International Journal of Computer Vision, 92(2), pp.177-191.](https://doi.org/10.1007/s11263-010-0330-1)*
 
 ### License:
 [Apache License, Version 2.0](http://www.apache.org/licenses/LICENSE-2.0)
diff --git a/Wrappers/Matlab/demos/demoMatlab_denoise.m b/Wrappers/Matlab/demos/demoMatlab_denoise.m
index 8289f41..3f0ca54 100644
--- a/Wrappers/Matlab/demos/demoMatlab_denoise.m
+++ b/Wrappers/Matlab/demos/demoMatlab_denoise.m
@@ -1,9 +1,11 @@
 % Image (2D) denoising demo using CCPi-RGL
 clear; close all
-Path1 = sprintf(['..' filesep 'mex_compile' filesep 'installed'], 1i);
-Path2 = sprintf(['..' filesep '..' filesep '..' filesep 'data' filesep], 1i);
-addpath(Path1);
-addpath(Path2);
+fsep = '/';
+
+Path1 = sprintf(['..' fsep 'mex_compile' fsep 'installed'], 1i);
+Path2 = sprintf(['..' fsep '..' fsep '..' fsep 'data' fsep], 1i);
+Path3 = sprintf(['..' fsep 'supp'], 1i);
+addpath(Path1); addpath(Path2); addpath(Path3);
 
 Im = double(imread('lena_gray_512.tif'))/255;  % loading image
 u0 = Im + .05*randn(size(Im)); u0(u0 < 0) = 0;
@@ -16,6 +18,8 @@ tau_rof = 0.0025; % time-marching constant
 iter_rof = 750; % number of ROF iterations
 tic; u_rof = ROF_TV(single(u0), lambda_reg, iter_rof, tau_rof); toc; 
 energyfunc_val_rof = TV_energy(single(u_rof),single(u0),lambda_reg, 1);  % get energy function value
+rmseROF = (RMSE(u_rof(:),Im(:)));
+fprintf('%s %f \n', 'RMSE error for ROF-TV is:', rmseROF);
 figure; imshow(u_rof, [0 1]); title('ROF-TV denoised image (CPU)');
 %%
 % fprintf('Denoise using the ROF-TV model (GPU) \n');
@@ -29,6 +33,8 @@ iter_fgp = 1000; % number of FGP iterations
 epsil_tol =  1.0e-06; % tolerance
 tic; u_fgp = FGP_TV(single(u0), lambda_reg, iter_fgp, epsil_tol); toc; 
 energyfunc_val_fgp = TV_energy(single(u_fgp),single(u0),lambda_reg, 1); % get energy function value
+rmseFGP = (RMSE(u_fgp(:),Im(:)));
+fprintf('%s %f \n', 'RMSE error for FGP-TV is:', rmseFGP);
 figure; imshow(u_fgp, [0 1]); title('FGP-TV denoised image (CPU)');
 
 %%
@@ -43,6 +49,8 @@ iter_sb = 150; % number of SB iterations
 epsil_tol =  1.0e-06; % tolerance
 tic; u_sb = SB_TV(single(u0), lambda_reg, iter_sb, epsil_tol); toc; 
 energyfunc_val_sb = TV_energy(single(u_sb),single(u0),lambda_reg, 1);  % get energy function value
+rmseSB = (RMSE(u_sb(:),Im(:)));
+fprintf('%s %f \n', 'RMSE error for SB-TV is:', rmseSB);
 figure; imshow(u_sb, [0 1]); title('SB-TV denoised image (CPU)');
 %%
 % fprintf('Denoise using the SB-TV model (GPU) \n');
@@ -51,12 +59,34 @@ figure; imshow(u_sb, [0 1]); title('SB-TV denoised image (CPU)');
 % tic; u_sbG = SB_TV_GPU(single(u0), lambda_reg, iter_sb, epsil_tol); toc; 
 % figure; imshow(u_sbG, [0 1]); title('SB-TV denoised image (GPU)');
 %%
+fprintf('Denoise using the TGV model (CPU) \n');
+lambda_TGV = 0.04; % regularisation parameter
+alpha1 = 1; % parameter to control the first-order term
+alpha0 = 0.7; % parameter to control the second-order term
+iter_TGV = 500; % number of Primal-Dual iterations for TGV
+tic; u_tgv = TGV(single(u0), lambda_TGV, alpha1, alpha0, iter_TGV); toc; 
+rmseTGV = (RMSE(u_tgv(:),Im(:)));
+fprintf('%s %f \n', 'RMSE error for TGV is:', rmseTGV);
+figure; imshow(u_tgv, [0 1]); title('TGV denoised image (CPU)');
+%%
+% fprintf('Denoise using the TGV model (GPU) \n');
+% lambda_TGV = 0.04; % regularisation parameter
+% alpha1 = 1; % parameter to control the first-order term
+% alpha0 = 0.7; % parameter to control the second-order term
+% iter_TGV = 500; % number of Primal-Dual iterations for TGV
+% tic; u_tgv_gpu = TGV_GPU(single(u0), lambda_TGV, alpha1, alpha0, iter_TGV); toc; 
+% rmseTGV_gpu = (RMSE(u_tgv_gpu(:),Im(:)));
+% fprintf('%s %f \n', 'RMSE error for TGV is:', rmseTGV_gpu);
+% figure; imshow(u_tgv_gpu, [0 1]); title('TGV denoised image (GPU)');
+%%
 fprintf('Denoise using Nonlinear-Diffusion model (CPU) \n');
 iter_diff = 800; % number of diffusion iterations
 lambda_regDiff = 0.025; % regularisation for the diffusivity 
 sigmaPar = 0.015; % edge-preserving parameter
 tau_param = 0.025; % time-marching constant 
 tic; u_diff = NonlDiff(single(u0), lambda_regDiff, sigmaPar, iter_diff, tau_param, 'Huber'); toc; 
+rmseDiffus = (RMSE(u_diff(:),Im(:)));
+fprintf('%s %f \n', 'RMSE error for Nonlinear Diffusion is:', rmseDiffus);
 figure; imshow(u_diff, [0 1]); title('Diffusion denoised image (CPU)');
 %%
 % fprintf('Denoise using Nonlinear-Diffusion model (GPU) \n');
@@ -73,6 +103,8 @@ lambda_regDiff = 3.5; % regularisation for the diffusivity
 sigmaPar = 0.02; % edge-preserving parameter
 tau_param = 0.0015; % time-marching constant 
 tic; u_diff4 = Diffusion_4thO(single(u0), lambda_regDiff, sigmaPar, iter_diff, tau_param); toc; 
+rmseDiffHO = (RMSE(u_diff4(:),Im(:)));
+fprintf('%s %f \n', 'RMSE error for Fourth-order anisotropic diffusion is:', rmseDiffHO);
 figure; imshow(u_diff4, [0 1]); title('Diffusion 4thO denoised image (CPU)');
 %%
 % fprintf('Denoise using Fourth-order anisotropic diffusion model (GPU) \n');
@@ -95,6 +127,8 @@ iter_fgp = 1000; % number of FGP iterations
 epsil_tol =  1.0e-06; % tolerance
 eta =  0.2; % Reference image gradient smoothing constant
 tic; u_fgp_dtv = FGP_dTV(single(u0), single(u_ref), lambda_reg, iter_fgp, epsil_tol, eta); toc; 
+rmse_dTV= (RMSE(u_fgp_dtv(:),Im(:)));
+fprintf('%s %f \n', 'RMSE error for Directional Total Variation (dTV) is:', rmse_dTV);
 figure; imshow(u_fgp_dtv, [0 1]); title('FGP-dTV denoised image (CPU)');
 %%
 % fprintf('Denoise using the FGP-dTV model (GPU) \n');
diff --git a/Wrappers/Matlab/mex_compile/compileCPU_mex_Linux.m b/Wrappers/Matlab/mex_compile/compileCPU_mex_Linux.m
index 82b681a..8acc1b7 100644
--- a/Wrappers/Matlab/mex_compile/compileCPU_mex_Linux.m
+++ b/Wrappers/Matlab/mex_compile/compileCPU_mex_Linux.m
@@ -36,6 +36,9 @@ movefile('NonlDiff.mex*',Pathmove);
 mex Diffusion_4thO.c Diffus4th_order_core.c utils.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
 movefile('Diffusion_4thO.mex*',Pathmove);
 
+mex TGV.c TGV_core.c utils.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
+movefile('TGV.mex*',Pathmove);
+
 mex TV_energy.c utils.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
 movefile('TV_energy.mex*',Pathmove);
 
@@ -46,7 +49,7 @@ movefile('NonlDiff_Inp.mex*',Pathmove);
 mex NonlocalMarching_Inpaint.c NonlocalMarching_Inpaint_core.c utils.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
 movefile('NonlocalMarching_Inpaint.mex*',Pathmove);
 
-delete SB_TV_core* ROF_TV_core* FGP_TV_core* FGP_dTV_core* TNV_core* utils* Diffusion_core* Diffus4th_order_core* CCPiDefines.h
+delete SB_TV_core* ROF_TV_core* FGP_TV_core* FGP_dTV_core* TNV_core* utils* Diffusion_core* Diffus4th_order_core* TGV_core* CCPiDefines.h
 delete Diffusion_Inpaint_core* NonlocalMarching_Inpaint_core*
 fprintf('%s \n', 'Regularisers successfully compiled!');
 
diff --git a/Wrappers/Matlab/mex_compile/compileCPU_mex_WINDOWS.m b/Wrappers/Matlab/mex_compile/compileCPU_mex_WINDOWS.m
index 629a346..ea1ad7d 100644
--- a/Wrappers/Matlab/mex_compile/compileCPU_mex_WINDOWS.m
+++ b/Wrappers/Matlab/mex_compile/compileCPU_mex_WINDOWS.m
@@ -44,6 +44,9 @@ movefile('NonlDiff.mex*',Pathmove);
 mex Diffusion_4thO.c Diffus4th_order_core.c utils.c COMPFLAGS="\$COMPFLAGS -fopenmp -Wall -std=c99"
 movefile('Diffusion_4thO.mex*',Pathmove);
 
+mex TGV.c TGV_core.c utils.c COMPFLAGS="\$COMPFLAGS -fopenmp -Wall -std=c99"
+movefile('TGV.mex*',Pathmove);
+
 mex TV_energy.c utils.c COMPFLAGS="\$COMPFLAGS -fopenmp -Wall -std=c99"
 movefile('TV_energy.mex*',Pathmove);
 
@@ -54,7 +57,7 @@ movefile('NonlDiff_Inp.mex*',Pathmove);
 mex NonlocalMarching_Inpaint.c NonlocalMarching_Inpaint_core.c utils.c COMPFLAGS="\$COMPFLAGS -fopenmp -Wall -std=c99"
 movefile('NonlocalMarching_Inpaint.mex*',Pathmove);
 
-delete SB_TV_core* ROF_TV_core* FGP_TV_core* FGP_dTV_core* TNV_core* utils* Diffusion_core* Diffus4th_order_core* CCPiDefines.h
+delete SB_TV_core* ROF_TV_core* FGP_TV_core* FGP_dTV_core* TNV_core* utils* Diffusion_core* Diffus4th_order_core* TGV_core* CCPiDefines.h
 delete Diffusion_Inpaint_core* NonlocalMarching_Inpaint_core*
 fprintf('%s \n', 'Regularisers successfully compiled!');
 %%
@@ -82,13 +85,15 @@ fprintf('%s \n', 'Regularisers successfully compiled!');
 % movefile('NonlDiff.mex*',Pathmove);
 % mex C:\TDMGCC\lib\gcc\x86_64-w64-mingw32\5.1.0\libgomp.a CXXFLAGS="$CXXFLAGS -std=c++11 -fopenmp" Diffusion_4thO.c Diffus4th_order_core.c utils.c
 % movefile('Diffusion_4thO.mex*',Pathmove);
+% mex C:\TDMGCC\lib\gcc\x86_64-w64-mingw32\5.1.0\libgomp.a CXXFLAGS="$CXXFLAGS -std=c++11 -fopenmp" TGV.c TGV_core.c utils.c
+% movefile('TGV.mex*',Pathmove);
 % mex C:\TDMGCC\lib\gcc\x86_64-w64-mingw32\5.1.0\libgomp.a CXXFLAGS="$CXXFLAGS -std=c++11 -fopenmp" TV_energy.c utils.c
 % movefile('TV_energy.mex*',Pathmove);
 % mex C:\TDMGCC\lib\gcc\x86_64-w64-mingw32\5.1.0\libgomp.a CXXFLAGS="$CXXFLAGS -std=c++11 -fopenmp" NonlDiff_Inp.c Diffusion_Inpaint_core.c utils.c
 % movefile('NonlDiff_Inp.mex*',Pathmove);
 % mex C:\TDMGCC\lib\gcc\x86_64-w64-mingw32\5.1.0\libgomp.a CXXFLAGS="$CXXFLAGS -std=c++11 -fopenmp" NonlocalMarching_Inpaint.c NonlocalMarching_Inpaint_core.c utils.c
 % movefile('NonlocalMarching_Inpaint.mex*',Pathmove);
-% delete SB_TV_core* ROF_TV_core* FGP_TV_core* FGP_dTV_core* TNV_core* utils* Diffusion_core* Diffus4th_order_core* CCPiDefines.h
+% delete SB_TV_core* ROF_TV_core* FGP_TV_core* FGP_dTV_core* TNV_core* utils* Diffusion_core* Diffus4th_order_core* TGV_core* CCPiDefines.h
 % delete Diffusion_Inpaint_core* NonlocalMarching_Inpaint_core*
 % fprintf('%s \n', 'Regularisers successfully compiled!');
 %%
diff --git a/Wrappers/Matlab/mex_compile/compileGPU_mex.m b/Wrappers/Matlab/mex_compile/compileGPU_mex.m
index a4a636e..003c6ec 100644
--- a/Wrappers/Matlab/mex_compile/compileGPU_mex.m
+++ b/Wrappers/Matlab/mex_compile/compileGPU_mex.m
@@ -37,6 +37,10 @@ movefile('FGP_TV_GPU.mex*',Pathmove);
 mex -g -I/usr/local/cuda-7.5/include -L/usr/local/cuda-7.5/lib64 -lcudart -lcufft -lmwgpu SB_TV_GPU.cpp TV_SB_GPU_core.o
 movefile('SB_TV_GPU.mex*',Pathmove);
 
+!/usr/local/cuda/bin/nvcc -O0 -c TGV_GPU_core.cu -Xcompiler -fPIC -I~/SOFT/MATLAB9/extern/include/
+mex -g -I/usr/local/cuda-7.5/include -L/usr/local/cuda-7.5/lib64 -lcudart -lcufft -lmwgpu TGV_GPU.cpp TGV_GPU_core.o
+movefile('TGV_GPU.mex*',Pathmove);
+
 !/usr/local/cuda/bin/nvcc -O0 -c dTV_FGP_GPU_core.cu -Xcompiler -fPIC -I~/SOFT/MATLAB9/extern/include/
 mex -g -I/usr/local/cuda-7.5/include -L/usr/local/cuda-7.5/lib64 -lcudart -lcufft -lmwgpu FGP_dTV_GPU.cpp dTV_FGP_GPU_core.o
 movefile('FGP_dTV_GPU.mex*',Pathmove);
@@ -49,7 +53,7 @@ movefile('NonlDiff_GPU.mex*',Pathmove);
 mex -g -I/usr/local/cuda-7.5/include -L/usr/local/cuda-7.5/lib64 -lcudart -lcufft -lmwgpu Diffusion_4thO_GPU.cpp Diffus_4thO_GPU_core.o
 movefile('Diffusion_4thO_GPU.mex*',Pathmove);
 
-delete TV_ROF_GPU_core* TV_FGP_GPU_core* TV_SB_GPU_core* dTV_FGP_GPU_core* NonlDiff_GPU_core* Diffus_4thO_GPU_core* CCPiDefines.h
+delete TV_ROF_GPU_core* TV_FGP_GPU_core* TV_SB_GPU_core* dTV_FGP_GPU_core* NonlDiff_GPU_core* Diffus_4thO_GPU_core* TGV_GPU_core* CCPiDefines.h
 fprintf('%s \n', 'All successfully compiled!');
 
 pathA2 = sprintf(['..' fsep '..' fsep], 1i);
diff --git a/Wrappers/Matlab/mex_compile/regularisers_CPU/TGV.c b/Wrappers/Matlab/mex_compile/regularisers_CPU/TGV.c
new file mode 100644
index 0000000..9516869
--- /dev/null
+++ b/Wrappers/Matlab/mex_compile/regularisers_CPU/TGV.c
@@ -0,0 +1,78 @@
+/*
+This work is part of the Core Imaging Library developed by
+Visual Analytics and Imaging System Group of the Science Technology
+Facilities Council, STFC
+
+Copyright 2017 Daniil Kazantsev
+Copyright 2017 Srikanth Nagella, Edoardo Pasca
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#include "mex.h"
+#include "TGV_core.h"
+
+/* C-OMP implementation of Primal-Dual denoising method for 
+ * Total Generilized Variation (TGV)-L2 model [1] (2D case only)
+ *
+ * Input Parameters:
+ * 1. Noisy image (2D) (required)
+ * 2. lambda - regularisation parameter (required)
+ * 3. parameter to control the first-order term (alpha1) (default - 1)
+ * 4. parameter to control the second-order term (alpha0) (default - 0.5)
+ * 5. Number of Chambolle-Pock (Primal-Dual) iterations (default is 300)
+ * 6. Lipshitz constant (default is 12)
+ *
+ * Output:
+ * Filtered/regulariaed image 
+ *
+ * References:
+ * [1] K. Bredies "Total Generalized Variation"
+ */
+
+void mexFunction(
+        int nlhs, mxArray *plhs[],
+        int nrhs, const mxArray *prhs[])
+        
+{
+    int number_of_dims, iter, dimX, dimY;
+    const int  *dim_array;
+    float *Input, *Output=NULL, lambda, alpha0, alpha1, L2;
+    
+    number_of_dims = mxGetNumberOfDimensions(prhs[0]);
+    dim_array = mxGetDimensions(prhs[0]);
+    
+    /*Handling Matlab input data*/
+    if ((nrhs < 2) || (nrhs > 6)) mexErrMsgTxt("At least 2 parameters is required, all parameters are: Image(2D), Regularisation parameter, alpha0, alpha1, iterations number, Lipshitz Constant");
+    
+    Input  = (float *) mxGetData(prhs[0]); /*noisy image (2D) */
+    lambda =  (float) mxGetScalar(prhs[1]); /* regularisation parameter */
+    alpha1 =  1.0f; /* parameter to control the first-order term */ 
+    alpha0 =  0.5f; /* parameter to control the second-order term */
+    iter =  300; /* Iterations number */      
+    L2 =  12.0f; /* Lipshitz constant */
+    
+    if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); }   
+    if ((nrhs == 3) || (nrhs == 4) || (nrhs == 5) || (nrhs == 6))  alpha1 =  (float) mxGetScalar(prhs[2]); /* parameter to control the first-order term */ 
+    if ((nrhs == 4) || (nrhs == 5) || (nrhs == 6))  alpha0 =  (float) mxGetScalar(prhs[3]);  /* parameter to control the second-order term */
+    if ((nrhs == 5) || (nrhs == 6))  iter =  (int) mxGetScalar(prhs[4]); /* Iterations number */      
+    if (nrhs == 6)  L2 =  (float) mxGetScalar(prhs[5]); /* Lipshitz constant */
+    
+    /*Handling Matlab output data*/
+    dimX = dim_array[0]; dimY = dim_array[1];
+    
+    if (number_of_dims == 2) {
+        Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+        /* running the function */
+        TGV_main(Input, Output, lambda, alpha1, alpha0, iter, L2, dimX, dimY);        
+    }
+    if (number_of_dims == 3) {mexErrMsgTxt("Only 2D images accepted");}       
+}
diff --git a/Wrappers/Matlab/mex_compile/regularisers_GPU/TGV_GPU.cpp b/Wrappers/Matlab/mex_compile/regularisers_GPU/TGV_GPU.cpp
new file mode 100644
index 0000000..5a0df5b
--- /dev/null
+++ b/Wrappers/Matlab/mex_compile/regularisers_GPU/TGV_GPU.cpp
@@ -0,0 +1,78 @@
+/*
+This work is part of the Core Imaging Library developed by
+Visual Analytics and Imaging System Group of the Science Technology
+Facilities Council, STFC
+
+Copyright 2017 Daniil Kazantsev
+Copyright 2017 Srikanth Nagella, Edoardo Pasca
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#include "mex.h"
+#include "TGV_GPU_core.h"
+
+/* CUDA implementation of Primal-Dual denoising method for 
+ * Total Generilized Variation (TGV)-L2 model [1] (2D case only)
+ *
+ * Input Parameters:
+ * 1. Noisy image (2D) (required)
+ * 2. lambda - regularisation parameter (required)
+ * 3. parameter to control the first-order term (alpha1) (default - 1)
+ * 4. parameter to control the second-order term (alpha0) (default - 0.5)
+ * 5. Number of Chambolle-Pock (Primal-Dual) iterations (default is 300)
+ * 6. Lipshitz constant (default is 12)
+ *
+ * Output:
+ * Filtered/regulariaed image 
+ *
+ * References:
+ * [1] K. Bredies "Total Generalized Variation"
+ */
+
+void mexFunction(
+        int nlhs, mxArray *plhs[],
+        int nrhs, const mxArray *prhs[])
+        
+{
+    int number_of_dims, iter, dimX, dimY;
+    const int  *dim_array;
+    float *Input, *Output=NULL, lambda, alpha0, alpha1, L2;
+    
+    number_of_dims = mxGetNumberOfDimensions(prhs[0]);
+    dim_array = mxGetDimensions(prhs[0]);
+    
+    /*Handling Matlab input data*/
+    if ((nrhs < 2) || (nrhs > 6)) mexErrMsgTxt("At least 2 parameters is required, all parameters are: Image(2D), Regularisation parameter, alpha0, alpha1, iterations number, Lipshitz Constant");
+    
+    Input  = (float *) mxGetData(prhs[0]); /*noisy image (2D) */
+    lambda =  (float) mxGetScalar(prhs[1]); /* regularisation parameter */
+    alpha1 =  1.0f; /* parameter to control the first-order term */ 
+    alpha0 =  0.5f; /* parameter to control the second-order term */
+    iter =  300; /* Iterations number */      
+    L2 =  12.0f; /* Lipshitz constant */
+    
+    if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); }   
+    if ((nrhs == 3) || (nrhs == 4) || (nrhs == 5) || (nrhs == 6))  alpha1 =  (float) mxGetScalar(prhs[2]); /* parameter to control the first-order term */ 
+    if ((nrhs == 4) || (nrhs == 5) || (nrhs == 6))  alpha0 =  (float) mxGetScalar(prhs[3]);  /* parameter to control the second-order term */
+    if ((nrhs == 5) || (nrhs == 6))  iter =  (int) mxGetScalar(prhs[4]); /* Iterations number */      
+    if (nrhs == 6)  L2 =  (float) mxGetScalar(prhs[5]); /* Lipshitz constant */
+    
+    /*Handling Matlab output data*/
+    dimX = dim_array[0]; dimY = dim_array[1];
+    
+    if (number_of_dims == 2) {
+        Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+        /* running the function */
+        TGV_GPU_main(Input, Output, lambda, alpha1, alpha0, iter, L2, dimX, dimY);        
+    }
+    if (number_of_dims == 3) {mexErrMsgTxt("Only 2D images accepted");}       
+}
diff --git a/Wrappers/Python/ccpi/filters/regularisers.py b/Wrappers/Python/ccpi/filters/regularisers.py
index 0b79dac..0e435a6 100644
--- a/Wrappers/Python/ccpi/filters/regularisers.py
+++ b/Wrappers/Python/ccpi/filters/regularisers.py
@@ -2,8 +2,8 @@
 script which assigns a proper device core function based on a flag ('cpu' or 'gpu')
 """
 
-from ccpi.filters.cpu_regularisers import TV_ROF_CPU, TV_FGP_CPU, TV_SB_CPU, dTV_FGP_CPU, TNV_CPU, NDF_CPU, Diff4th_CPU
-from ccpi.filters.gpu_regularisers import TV_ROF_GPU, TV_FGP_GPU, TV_SB_GPU, dTV_FGP_GPU, NDF_GPU, Diff4th_GPU
+from ccpi.filters.cpu_regularisers import TV_ROF_CPU, TV_FGP_CPU, TV_SB_CPU, dTV_FGP_CPU, TNV_CPU, NDF_CPU, Diff4th_CPU, TGV_CPU
+from ccpi.filters.gpu_regularisers import TV_ROF_GPU, TV_FGP_GPU, TV_SB_GPU, dTV_FGP_GPU, NDF_GPU, Diff4th_GPU, TGV_GPU
 from ccpi.filters.cpu_regularisers import NDF_INPAINT_CPU, NVM_INPAINT_CPU
 
 def ROF_TV(inputData, regularisation_parameter, iterations,
@@ -128,6 +128,26 @@ def DIFF4th(inputData, regularisation_parameter, edge_parameter, iterations,
     else:
         raise ValueError('Unknown device {0}. Expecting gpu or cpu'\
                          .format(device))
+def TGV(inputData, regularisation_parameter, alpha1, alpha0, iterations,
+                     LipshitzConst, device='cpu'):
+    if device == 'cpu':
+        return TGV_CPU(inputData, 
+					regularisation_parameter, 
+					alpha1, 
+					alpha0, 
+					iterations,
+                    LipshitzConst)
+    elif device == 'gpu':
+        return TGV_GPU(inputData, 
+					regularisation_parameter, 
+					alpha1, 
+					alpha0, 
+					iterations,
+                    LipshitzConst)
+    else:
+        raise ValueError('Unknown device {0}. Expecting gpu or cpu'\
+                         .format(device))
+                         
 def NDF_INP(inputData, maskData, regularisation_parameter, edge_parameter, iterations,
                      time_marching_parameter, penalty_type):
         return NDF_INPAINT_CPU(inputData, maskData, regularisation_parameter, 
diff --git a/Wrappers/Python/demos/demo_cpu_regularisers.py b/Wrappers/Python/demos/demo_cpu_regularisers.py
index ff500ae..5c20244 100644
--- a/Wrappers/Python/demos/demo_cpu_regularisers.py
+++ b/Wrappers/Python/demos/demo_cpu_regularisers.py
@@ -12,7 +12,7 @@ import matplotlib.pyplot as plt
 import numpy as np
 import os
 import timeit
-from ccpi.filters.regularisers import ROF_TV, FGP_TV, SB_TV, FGP_dTV, TNV, NDF, DIFF4th
+from ccpi.filters.regularisers import ROF_TV, FGP_TV, SB_TV, TGV, FGP_dTV, TNV, NDF, DIFF4th
 from qualitymetrics import rmse
 ###############################################################################
 def printParametersToString(pars):
@@ -74,7 +74,7 @@ print ("_______________ROF-TV (2D)_________________")
 print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
 
 ## plot 
-fig = plt.figure(1)
+fig = plt.figure()
 plt.suptitle('Performance of ROF-TV regulariser using the CPU')
 a=fig.add_subplot(1,2,1)
 a.set_title('Noisy Image')
@@ -109,13 +109,13 @@ a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
 imgplot = plt.imshow(rof_cpu, cmap="gray")
 plt.title('{}'.format('CPU results'))
 
-
+#%%
 print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
 print ("_______________FGP-TV (2D)__________________")
 print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
 
 ## plot 
-fig = plt.figure(2)
+fig = plt.figure()
 plt.suptitle('Performance of FGP-TV regulariser using the CPU')
 a=fig.add_subplot(1,2,1)
 a.set_title('Noisy Image')
@@ -159,12 +159,13 @@ a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
 imgplot = plt.imshow(fgp_cpu, cmap="gray")
 plt.title('{}'.format('CPU results'))
 
+#%%
 print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
 print ("_______________SB-TV (2D)__________________")
 print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
 
 ## plot 
-fig = plt.figure(3)
+fig = plt.figure()
 plt.suptitle('Performance of SB-TV regulariser using the CPU')
 a=fig.add_subplot(1,2,1)
 a.set_title('Noisy Image')
@@ -205,14 +206,62 @@ a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
          verticalalignment='top', bbox=props)
 imgplot = plt.imshow(sb_cpu, cmap="gray")
 plt.title('{}'.format('CPU results'))
+#%%
+
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("_____Total Generalised Variation (2D)______")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure()
+plt.suptitle('Performance of TGV regulariser using the CPU')
+a=fig.add_subplot(1,2,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(u0,cmap="gray")
+
+# set parameters
+pars = {'algorithm' : TGV, \
+        'input' : u0,\
+        'regularisation_parameter':0.04, \
+        'alpha1':1.0,\
+        'alpha0':0.7,\
+        'number_of_iterations' :250 ,\
+        'LipshitzConstant' :12 ,\
+        }
+        
+print ("#############TGV CPU####################")
+start_time = timeit.default_timer()
+tgv_cpu = TGV(pars['input'], 
+              pars['regularisation_parameter'],
+              pars['alpha1'],
+              pars['alpha0'],
+              pars['number_of_iterations'],
+              pars['LipshitzConstant'],'cpu')
+             
+             
+rms = rmse(Im, tgv_cpu)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,2,2)
 
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(tgv_cpu, cmap="gray")
+plt.title('{}'.format('CPU results'))
 
+#%%
 print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
 print ("________________NDF (2D)___________________")
 print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
 
 ## plot 
-fig = plt.figure(4)
+fig = plt.figure()
 plt.suptitle('Performance of NDF regulariser using the CPU')
 a=fig.add_subplot(1,2,1)
 a.set_title('Noisy Image')
@@ -259,7 +308,7 @@ print ("___Anisotropic Diffusion 4th Order (2D)____")
 print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
 
 ## plot 
-fig = plt.figure(5)
+fig = plt.figure()
 plt.suptitle('Performance of DIFF4th regulariser using the CPU')
 a=fig.add_subplot(1,2,1)
 a.set_title('Noisy Image')
@@ -304,7 +353,7 @@ print ("_____________FGP-dTV (2D)__________________")
 print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
 
 ## plot 
-fig = plt.figure(6)
+fig = plt.figure()
 plt.suptitle('Performance of FGP-dTV regulariser using the CPU')
 a=fig.add_subplot(1,2,1)
 a.set_title('Noisy Image')
@@ -356,7 +405,7 @@ print ("__________Total nuclear Variation__________")
 print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
 
 ## plot 
-fig = plt.figure(7)
+fig = plt.figure()
 plt.suptitle('Performance of TNV regulariser using the CPU')
 a=fig.add_subplot(1,2,1)
 a.set_title('Noisy Image')
diff --git a/Wrappers/Python/demos/demo_cpu_vs_gpu_regularisers.py b/Wrappers/Python/demos/demo_cpu_vs_gpu_regularisers.py
index 4611522..46b8ffc 100644
--- a/Wrappers/Python/demos/demo_cpu_vs_gpu_regularisers.py
+++ b/Wrappers/Python/demos/demo_cpu_vs_gpu_regularisers.py
@@ -12,7 +12,7 @@ import matplotlib.pyplot as plt
 import numpy as np
 import os
 import timeit
-from ccpi.filters.regularisers import ROF_TV, FGP_TV, SB_TV, FGP_dTV, NDF, DIFF4th
+from ccpi.filters.regularisers import ROF_TV, FGP_TV, SB_TV, TGV, FGP_dTV, NDF, DIFF4th
 from qualitymetrics import rmse
 ###############################################################################
 def printParametersToString(pars):
@@ -50,12 +50,13 @@ u_ref = Im + np.random.normal(loc = 0 ,
 u0 = u0.astype('float32')
 u_ref = u_ref.astype('float32')
 
+#%%
 print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
 print ("____________ROF-TV bench___________________")
 print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
 
 ## plot 
-fig = plt.figure(1)
+fig = plt.figure()
 plt.suptitle('Comparison of ROF-TV regulariser using CPU and GPU implementations')
 a=fig.add_subplot(1,4,1)
 a.set_title('Noisy Image')
@@ -90,7 +91,6 @@ a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
 imgplot = plt.imshow(rof_cpu, cmap="gray")
 plt.title('{}'.format('CPU results'))
 
-
 print ("##############ROF TV GPU##################")
 start_time = timeit.default_timer()
 rof_gpu = ROF_TV(pars['input'], 
@@ -128,12 +128,13 @@ if (diff_im.sum() > 1):
 else:
     print ("Arrays match")
 
+#%%
 print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
 print ("____________FGP-TV bench___________________")
 print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
 
 ## plot 
-fig = plt.figure(2)
+fig = plt.figure()
 plt.suptitle('Comparison of FGP-TV regulariser using CPU and GPU implementations')
 a=fig.add_subplot(1,4,1)
 a.set_title('Noisy Image')
@@ -218,12 +219,13 @@ if (diff_im.sum() > 1):
 else:
     print ("Arrays match")
 
+#%%
 print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
 print ("____________SB-TV bench___________________")
 print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
 
 ## plot 
-fig = plt.figure(3)
+fig = plt.figure()
 plt.suptitle('Comparison of SB-TV regulariser using CPU and GPU implementations')
 a=fig.add_subplot(1,4,1)
 a.set_title('Noisy Image')
@@ -303,14 +305,98 @@ if (diff_im.sum() > 1):
     print ("Arrays do not match!")
 else:
     print ("Arrays match")
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("____________TGV bench___________________")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure()
+plt.suptitle('Comparison of TGV regulariser using CPU and GPU implementations')
+a=fig.add_subplot(1,4,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(u0,cmap="gray")
+
+# set parameters
+pars = {'algorithm' : TGV, \
+        'input' : u0,\
+        'regularisation_parameter':0.04, \
+        'alpha1':1.0,\
+        'alpha0':0.7,\
+        'number_of_iterations' :250 ,\
+        'LipshitzConstant' :12 ,\
+        }
+        
+print ("#############TGV CPU####################")
+start_time = timeit.default_timer()
+tgv_cpu = TGV(pars['input'], 
+              pars['regularisation_parameter'],
+              pars['alpha1'],
+              pars['alpha0'],
+              pars['number_of_iterations'],
+              pars['LipshitzConstant'],'cpu')
+             
+rms = rmse(Im, tgv_cpu)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,4,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(tgv_cpu, cmap="gray")
+plt.title('{}'.format('CPU results'))
+
 
+print ("##############SB TV GPU##################")
+start_time = timeit.default_timer()
+tgv_gpu = TGV(pars['input'], 
+              pars['regularisation_parameter'],
+              pars['alpha1'],
+              pars['alpha0'],
+              pars['number_of_iterations'],
+              pars['LipshitzConstant'],'gpu')
+                                   
+rms = rmse(Im, tgv_gpu)
+pars['rmse'] = rms
+pars['algorithm'] = TGV
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,4,3)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(tgv_gpu, cmap="gray")
+plt.title('{}'.format('GPU results'))
 
+print ("--------Compare the results--------")
+tolerance = 1e-05
+diff_im = np.zeros(np.shape(tgv_gpu))
+diff_im = abs(tgv_cpu - tgv_gpu)
+diff_im[diff_im > tolerance] = 1
+a=fig.add_subplot(1,4,4)
+imgplot = plt.imshow(diff_im, vmin=0, vmax=1, cmap="gray")
+plt.title('{}'.format('Pixels larger threshold difference'))
+if (diff_im.sum() > 1):
+    print ("Arrays do not match!")
+else:
+    print ("Arrays match")
+#%%
 print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
 print ("_______________NDF bench___________________")
 print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
 
 ## plot 
-fig = plt.figure(4)
+fig = plt.figure()
 plt.suptitle('Comparison of NDF regulariser using CPU and GPU implementations')
 a=fig.add_subplot(1,4,1)
 a.set_title('Noisy Image')
@@ -390,13 +476,13 @@ if (diff_im.sum() > 1):
 else:
     print ("Arrays match")
 
-
+#%%
 print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
 print ("___Anisotropic Diffusion 4th Order (2D)____")
 print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
 
 ## plot 
-fig = plt.figure(5)
+fig = plt.figure()
 plt.suptitle('Comparison of Diff4th regulariser using CPU and GPU implementations')
 a=fig.add_subplot(1,4,1)
 a.set_title('Noisy Image')
@@ -472,12 +558,13 @@ if (diff_im.sum() > 1):
 else:
     print ("Arrays match")
 
+#%%
 print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
 print ("____________FGP-dTV bench___________________")
 print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
 
 ## plot 
-fig = plt.figure(6)
+fig = plt.figure()
 plt.suptitle('Comparison of FGP-dTV regulariser using CPU and GPU implementations')
 a=fig.add_subplot(1,4,1)
 a.set_title('Noisy Image')
@@ -565,3 +652,4 @@ if (diff_im.sum() > 1):
     print ("Arrays do not match!")
 else:
     print ("Arrays match")
+#%%
\ No newline at end of file
diff --git a/Wrappers/Python/demos/demo_gpu_regularisers.py b/Wrappers/Python/demos/demo_gpu_regularisers.py
index 3179428..792a019 100644
--- a/Wrappers/Python/demos/demo_gpu_regularisers.py
+++ b/Wrappers/Python/demos/demo_gpu_regularisers.py
@@ -12,7 +12,7 @@ import matplotlib.pyplot as plt
 import numpy as np
 import os
 import timeit
-from ccpi.filters.regularisers import ROF_TV, FGP_TV, SB_TV, FGP_dTV, NDF, DIFF4th
+from ccpi.filters.regularisers import ROF_TV, FGP_TV, SB_TV, TGV, FGP_dTV, NDF, DIFF4th
 from qualitymetrics import rmse
 ###############################################################################
 def printParametersToString(pars):
@@ -66,13 +66,14 @@ Im2[:,0:M] = Im[:,0:M]
 Im = Im2
 del Im2
 """
+#%%
 
 print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
 print ("____________ROF-TV regulariser_____________")
 print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
 
 ## plot 
-fig = plt.figure(1)
+fig = plt.figure()
 plt.suptitle('Performance of the ROF-TV regulariser using the GPU')
 a=fig.add_subplot(1,2,1)
 a.set_title('Noisy Image')
@@ -108,13 +109,13 @@ a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
 imgplot = plt.imshow(rof_gpu, cmap="gray")
 plt.title('{}'.format('GPU results'))
 
-
+#%%
 print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
 print ("____________FGP-TV regulariser_____________")
 print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
 
 ## plot 
-fig = plt.figure(2)
+fig = plt.figure()
 plt.suptitle('Performance of the FGP-TV regulariser using the GPU')
 a=fig.add_subplot(1,2,1)
 a.set_title('Noisy Image')
@@ -157,13 +158,13 @@ a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
 imgplot = plt.imshow(fgp_gpu, cmap="gray")
 plt.title('{}'.format('GPU results'))
 
-
+#%%
 print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
 print ("____________SB-TV regulariser______________")
 print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
 
 ## plot 
-fig = plt.figure(3)
+fig = plt.figure()
 plt.suptitle('Performance of the SB-TV regulariser using the GPU')
 a=fig.add_subplot(1,2,1)
 a.set_title('Noisy Image')
@@ -203,14 +204,62 @@ a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
          verticalalignment='top', bbox=props)
 imgplot = plt.imshow(sb_gpu, cmap="gray")
 plt.title('{}'.format('GPU results'))
+#%%
 
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("_____Total Generalised Variation (2D)______")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
 
+## plot 
+fig = plt.figure()
+plt.suptitle('Performance of TGV regulariser using the GPU')
+a=fig.add_subplot(1,2,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(u0,cmap="gray")
+
+# set parameters
+pars = {'algorithm' : TGV, \
+        'input' : u0,\
+        'regularisation_parameter':0.04, \
+        'alpha1':1.0,\
+        'alpha0':0.7,\
+        'number_of_iterations' :250 ,\
+        'LipshitzConstant' :12 ,\
+        }
+        
+print ("#############TGV CPU####################")
+start_time = timeit.default_timer()
+tgv_gpu = TGV(pars['input'], 
+              pars['regularisation_parameter'],
+              pars['alpha1'],
+              pars['alpha0'],
+              pars['number_of_iterations'],
+              pars['LipshitzConstant'],'gpu')  
+             
+             
+rms = rmse(Im, tgv_gpu)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,2,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(tgv_gpu, cmap="gray")
+plt.title('{}'.format('GPU results'))
+
+#%%
 print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
 print ("_______________NDF regulariser_____________")
 print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
 
 ## plot 
-fig = plt.figure(4)
+fig = plt.figure()
 plt.suptitle('Performance of the NDF regulariser using the GPU')
 a=fig.add_subplot(1,2,1)
 a.set_title('Noisy Image')
@@ -251,13 +300,13 @@ a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
 imgplot = plt.imshow(ndf_gpu, cmap="gray")
 plt.title('{}'.format('GPU results'))
 
-
+#%%
 print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
 print ("___Anisotropic Diffusion 4th Order (2D)____")
 print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
 
 ## plot 
-fig = plt.figure(5)
+fig = plt.figure()
 plt.suptitle('Performance of DIFF4th regulariser using the GPU')
 a=fig.add_subplot(1,2,1)
 a.set_title('Noisy Image')
@@ -296,12 +345,13 @@ a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
 imgplot = plt.imshow(diff4_gpu, cmap="gray")
 plt.title('{}'.format('GPU results'))
 
+#%%
 print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
 print ("____________FGP-dTV bench___________________")
 print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
 
 ## plot 
-fig = plt.figure(6)
+fig = plt.figure()
 plt.suptitle('Performance of the FGP-dTV regulariser using the GPU')
 a=fig.add_subplot(1,2,1)
 a.set_title('Noisy Image')
diff --git a/Wrappers/Python/setup-regularisers.py.in b/Wrappers/Python/setup-regularisers.py.in
index 76dfecf..89ebaf9 100644
--- a/Wrappers/Python/setup-regularisers.py.in
+++ b/Wrappers/Python/setup-regularisers.py.in
@@ -38,6 +38,7 @@ extra_include_dirs += [os.path.join(".." , ".." , "Core"),
                        os.path.join(".." , ".." , "Core",  "regularisers_GPU" , "TV_FGP" ) , 
                        os.path.join(".." , ".." , "Core",  "regularisers_GPU" , "TV_ROF" ) , 
                        os.path.join(".." , ".." , "Core",  "regularisers_GPU" , "TV_SB" ) ,
+                       os.path.join(".." , ".." , "Core",  "regularisers_GPU" , "TGV" ) ,
                        os.path.join(".." , ".." , "Core",  "regularisers_GPU" , "NDF" ) ,
                        os.path.join(".." , ".." , "Core",  "regularisers_GPU" , "dTV_FGP" ) , 
                        os.path.join(".." , ".." , "Core",  "regularisers_GPU" , "DIFF4th" ) , 
diff --git a/Wrappers/Python/src/cpu_regularisers.pyx b/Wrappers/Python/src/cpu_regularisers.pyx
index bdb1eff..cf81bec 100644
--- a/Wrappers/Python/src/cpu_regularisers.pyx
+++ b/Wrappers/Python/src/cpu_regularisers.pyx
@@ -21,6 +21,7 @@ cimport numpy as np
 cdef extern float TV_ROF_CPU_main(float *Input, float *Output, float lambdaPar, int iterationsNumb, float tau, int dimX, int dimY, int dimZ);
 cdef extern float TV_FGP_CPU_main(float *Input, float *Output, float lambdaPar, int iterationsNumb, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ);
 cdef extern float SB_TV_CPU_main(float *Input, float *Output, float lambdaPar, int iterationsNumb, float epsil, int methodTV, int printM, int dimX, int dimY, int dimZ);
+cdef extern float TGV_main(float *Input, float *Output, float lambdaPar, float alpha1, float alpha0, int iterationsNumb, float L2, int dimX, int dimY);
 cdef extern float Diffusion_CPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int penaltytype, int dimX, int dimY, int dimZ);
 cdef extern float Diffus4th_CPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int dimX, int dimY, int dimZ);
 cdef extern float TNV_CPU_main(float *Input, float *u, float lambdaPar, int maxIter, float tol, int dimX, int dimY, int dimZ);
@@ -189,6 +190,39 @@ def TV_SB_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData,
                        printM,
                        dims[2], dims[1], dims[0])
     return outputData 
+
+#***************************************************************#
+#***************** Total Generalised Variation *****************#
+#***************************************************************#
+def TGV_CPU(inputData, regularisation_parameter, alpha1, alpha0, iterations, LipshitzConst):
+    if inputData.ndim == 2:
+        return TGV_2D(inputData, regularisation_parameter, alpha1, alpha0, iterations, LipshitzConst)
+    elif inputData.ndim == 3:
+        return 0
+
+def TGV_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, 
+                     float regularisation_parameter,
+                     float alpha1,
+                     float alpha0,
+                     int iterationsNumb, 
+                     float LipshitzConst):
+                         
+    cdef long dims[2]
+    dims[0] = inputData.shape[0]
+    dims[1] = inputData.shape[1]
+    
+    cdef np.ndarray[np.float32_t, ndim=2, mode="c"] outputData = \
+            np.zeros([dims[0],dims[1]], dtype='float32')
+                   
+    #/* Run TGV iterations for 2D data */
+    TGV_main(&inputData[0,0], &outputData[0,0], regularisation_parameter, 
+                       alpha1,
+                       alpha0,
+                       iterationsNumb, 
+                       LipshitzConst,
+                       dims[1],dims[0])                           
+    return outputData
+    
 #****************************************************************#
 #**************Directional Total-variation FGP ******************#
 #****************************************************************#
diff --git a/Wrappers/Python/src/gpu_regularisers.pyx b/Wrappers/Python/src/gpu_regularisers.pyx
index b67e62b..4a202d7 100644
--- a/Wrappers/Python/src/gpu_regularisers.pyx
+++ b/Wrappers/Python/src/gpu_regularisers.pyx
@@ -21,6 +21,7 @@ cimport numpy as np
 cdef extern void TV_ROF_GPU_main(float* Input, float* Output, float lambdaPar, int iter, float tau, int N, int M, int Z);
 cdef extern void TV_FGP_GPU_main(float *Input, float *Output, float lambdaPar, int iter, float epsil, int methodTV, int nonneg, int printM, int N, int M, int Z);
 cdef extern void TV_SB_GPU_main(float *Input, float *Output, float lambdaPar, int iter, float epsil, int methodTV, int printM, int N, int M, int Z);
+cdef extern void TGV_GPU_main(float *Input, float *Output, float lambdaPar, float alpha1, float alpha0, int iterationsNumb, float L2, int dimX, int dimY);
 cdef extern void NonlDiff_GPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int penaltytype, int N, int M, int Z);
 cdef extern void dTV_FGP_GPU_main(float *Input, float *InputRef, float *Output, float lambdaPar, int iterationsNumb, float epsil, float eta, int methodTV, int nonneg, int printM, int N, int M, int Z);
 cdef extern void Diffus4th_GPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int N, int M, int Z);
@@ -86,6 +87,12 @@ def TV_SB_GPU(inputData,
                      tolerance_param,
                      methodTV,
                      printM)
+# Total Generilised Variation (TGV)
+def TGV_GPU(inputData, regularisation_parameter, alpha1, alpha0, iterations, LipshitzConst):
+    if inputData.ndim == 2:
+        return TGV2D(inputData, regularisation_parameter, alpha1, alpha0, iterations, LipshitzConst)
+    elif inputData.ndim == 3:
+        return 0
 # Directional Total-variation Fast-Gradient-Projection (FGP)
 def dTV_FGP_GPU(inputData,
                      refdata,
@@ -315,6 +322,33 @@ def SBTV3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData,
                        dims[2], dims[1], dims[0]);
      
     return outputData 
+
+#***************************************************************#
+#***************** Total Generalised Variation *****************#
+#***************************************************************#
+def TGV2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, 
+                     float regularisation_parameter,
+                     float alpha1,
+                     float alpha0,
+                     int iterationsNumb, 
+                     float LipshitzConst):
+                         
+    cdef long dims[2]
+    dims[0] = inputData.shape[0]
+    dims[1] = inputData.shape[1]
+    
+    cdef np.ndarray[np.float32_t, ndim=2, mode="c"] outputData = \
+            np.zeros([dims[0],dims[1]], dtype='float32')
+                   
+    #/* Run TGV iterations for 2D data */
+    TGV_GPU_main(&inputData[0,0], &outputData[0,0], regularisation_parameter, 
+                       alpha1,
+                       alpha0,
+                       iterationsNumb, 
+                       LipshitzConst,
+                       dims[1],dims[0])
+    return outputData
+
 #****************************************************************#
 #**************Directional Total-variation FGP ******************#
 #****************************************************************#
-- 
cgit v1.2.3