From 723a2d3fbe9a7a8c145b5f5ef481dcd4a3799383 Mon Sep 17 00:00:00 2001
From: Daniil Kazantsev <dkazanc@hotmail.com>
Date: Wed, 24 Jan 2018 17:39:38 +0000
Subject: all Matlab related stuff have been moved to wrappers

---
 Core/regularizers_CPU/FGP_TV.c                     | 216 -------
 Core/regularizers_CPU/LLT_model.c                  | 169 -----
 Core/regularizers_CPU/PatchBased_Regul.c           | 140 ----
 Core/regularizers_CPU/SplitBregman_TV.c            | 179 ------
 Core/regularizers_CPU/TGV_PD.c                     | 144 -----
 .../Diffus_HO/Diff4thHajiaboli_GPU.cpp             | 114 ----
 Core/regularizers_GPU/NL_Regul/NLM_GPU.cpp         | 171 -----
 Wrappers/Matlab/compile_mex.m                      |  11 -
 Wrappers/Matlab/demos/Demo_Phantom3D_Cone.m        |  67 ++
 Wrappers/Matlab/demos/Demo_Phantom3D_Parallel.m    | 121 ++++
 Wrappers/Matlab/demos/Demo_RealData3D_Parallel.m   | 186 ++++++
 Wrappers/Matlab/demos/exportDemoRD2Data.m          |  35 +
 Wrappers/Matlab/mex_compile/compile_mex.m          |  11 +
 .../Matlab/mex_compile/regularizers_CPU/FGP_TV.c   | 216 +++++++
 .../mex_compile/regularizers_CPU/FGP_TV_core.c     | 266 ++++++++
 .../mex_compile/regularizers_CPU/FGP_TV_core.h     |  71 +++
 .../mex_compile/regularizers_CPU/LLT_model.c       | 169 +++++
 .../mex_compile/regularizers_CPU/LLT_model_core.c  | 318 ++++++++++
 .../mex_compile/regularizers_CPU/LLT_model_core.h  |  46 ++
 .../regularizers_CPU/PatchBased_Regul.c            | 140 ++++
 .../regularizers_CPU/PatchBased_Regul_core.c       | 213 +++++++
 .../regularizers_CPU/PatchBased_Regul_core.h       |  69 ++
 .../mex_compile/regularizers_CPU/SplitBregman_TV.c | 179 ++++++
 .../regularizers_CPU/SplitBregman_TV_core.c        | 259 ++++++++
 .../regularizers_CPU/SplitBregman_TV_core.h        |  69 ++
 .../Matlab/mex_compile/regularizers_CPU/TGV_PD.c   | 144 +++++
 .../mex_compile/regularizers_CPU/TGV_PD_core.c     | 208 ++++++
 .../mex_compile/regularizers_CPU/TGV_PD_core.h     |  67 ++
 .../Matlab/mex_compile/regularizers_CPU/utils.c    |  29 +
 .../Matlab/mex_compile/regularizers_CPU/utils.h    |  32 +
 .../Diffus_HO/Diff4thHajiaboli_GPU.cpp             | 114 ++++
 .../Diffus_HO/Diff4th_GPU_kernel.cu                | 270 ++++++++
 .../Diffus_HO/Diff4th_GPU_kernel.h                 |   6 +
 .../regularizers_GPU/NL_Regul/NLM_GPU.cpp          | 171 +++++
 .../regularizers_GPU/NL_Regul/NLM_GPU_kernel.cu    | 239 +++++++
 .../regularizers_GPU/NL_Regul/NLM_GPU_kernel.h     |   6 +
 Wrappers/Matlab/studentst.m                        |  47 --
 Wrappers/Matlab/supp/RMSE.m                        |   7 +
 Wrappers/Matlab/supp/my_red_yellowMAP.mat          | Bin 0 -> 1761 bytes
 Wrappers/Matlab/supp/sino_add_artifacts.m          |  33 +
 Wrappers/Matlab/supp/studentst.m                   |  47 ++
 Wrappers/Matlab/supp/zing_rings_add.m              |  91 +++
 demos/Demo_Phantom3D_Cone.m                        |  67 --
 demos/Demo_Phantom3D_Parallel.m                    | 121 ----
 demos/Demo_RealData3D_Parallel.m                   | 186 ------
 demos/DendrData.h5                                 | Bin 72598872 -> 0 bytes
 demos/exportDemoRD2Data.m                          |  35 -
 main_func/FISTA_REC.m                              | 704 ---------------------
 main_func/compile_mex.m                            |  11 -
 main_func/regularizers_CPU/FGP_TV.c                | 216 -------
 main_func/regularizers_CPU/FGP_TV_core.c           | 266 --------
 main_func/regularizers_CPU/FGP_TV_core.h           |  71 ---
 main_func/regularizers_CPU/LLT_model.c             | 169 -----
 main_func/regularizers_CPU/LLT_model_core.c        | 318 ----------
 main_func/regularizers_CPU/LLT_model_core.h        |  46 --
 main_func/regularizers_CPU/PatchBased_Regul.c      | 140 ----
 main_func/regularizers_CPU/PatchBased_Regul_core.c | 213 -------
 main_func/regularizers_CPU/PatchBased_Regul_core.h |  69 --
 main_func/regularizers_CPU/SplitBregman_TV.c       | 179 ------
 main_func/regularizers_CPU/SplitBregman_TV_core.c  | 259 --------
 main_func/regularizers_CPU/SplitBregman_TV_core.h  |  69 --
 main_func/regularizers_CPU/TGV_PD.c                | 144 -----
 main_func/regularizers_CPU/TGV_PD_core.c           | 208 ------
 main_func/regularizers_CPU/TGV_PD_core.h           |  67 --
 main_func/regularizers_CPU/utils.c                 |  29 -
 main_func/regularizers_CPU/utils.h                 |  32 -
 .../Diffus_HO/Diff4thHajiaboli_GPU.cpp             | 114 ----
 .../Diffus_HO/Diff4th_GPU_kernel.cu                | 270 --------
 .../Diffus_HO/Diff4th_GPU_kernel.h                 |   6 -
 main_func/regularizers_GPU/NL_Regul/NLM_GPU.cpp    | 171 -----
 .../regularizers_GPU/NL_Regul/NLM_GPU_kernel.cu    | 239 -------
 .../regularizers_GPU/NL_Regul/NLM_GPU_kernel.h     |   6 -
 main_func/studentst.m                              |  47 --
 src/Python/conda-recipe/meta.yaml                  |   4 +-
 supp/RMSE.m                                        |   7 -
 supp/my_red_yellowMAP.mat                          | Bin 1761 -> 0 bytes
 supp/sino_add_artifacts.m                          |  33 -
 supp/zing_rings_add.m                              |  91 ---
 78 files changed, 3901 insertions(+), 5796 deletions(-)
 delete mode 100644 Core/regularizers_CPU/FGP_TV.c
 delete mode 100644 Core/regularizers_CPU/LLT_model.c
 delete mode 100644 Core/regularizers_CPU/PatchBased_Regul.c
 delete mode 100644 Core/regularizers_CPU/SplitBregman_TV.c
 delete mode 100644 Core/regularizers_CPU/TGV_PD.c
 delete mode 100644 Core/regularizers_GPU/Diffus_HO/Diff4thHajiaboli_GPU.cpp
 delete mode 100644 Core/regularizers_GPU/NL_Regul/NLM_GPU.cpp
 delete mode 100644 Wrappers/Matlab/compile_mex.m
 create mode 100644 Wrappers/Matlab/demos/Demo_Phantom3D_Cone.m
 create mode 100644 Wrappers/Matlab/demos/Demo_Phantom3D_Parallel.m
 create mode 100644 Wrappers/Matlab/demos/Demo_RealData3D_Parallel.m
 create mode 100644 Wrappers/Matlab/demos/exportDemoRD2Data.m
 create mode 100644 Wrappers/Matlab/mex_compile/compile_mex.m
 create mode 100644 Wrappers/Matlab/mex_compile/regularizers_CPU/FGP_TV.c
 create mode 100644 Wrappers/Matlab/mex_compile/regularizers_CPU/FGP_TV_core.c
 create mode 100644 Wrappers/Matlab/mex_compile/regularizers_CPU/FGP_TV_core.h
 create mode 100644 Wrappers/Matlab/mex_compile/regularizers_CPU/LLT_model.c
 create mode 100644 Wrappers/Matlab/mex_compile/regularizers_CPU/LLT_model_core.c
 create mode 100644 Wrappers/Matlab/mex_compile/regularizers_CPU/LLT_model_core.h
 create mode 100644 Wrappers/Matlab/mex_compile/regularizers_CPU/PatchBased_Regul.c
 create mode 100644 Wrappers/Matlab/mex_compile/regularizers_CPU/PatchBased_Regul_core.c
 create mode 100644 Wrappers/Matlab/mex_compile/regularizers_CPU/PatchBased_Regul_core.h
 create mode 100644 Wrappers/Matlab/mex_compile/regularizers_CPU/SplitBregman_TV.c
 create mode 100644 Wrappers/Matlab/mex_compile/regularizers_CPU/SplitBregman_TV_core.c
 create mode 100644 Wrappers/Matlab/mex_compile/regularizers_CPU/SplitBregman_TV_core.h
 create mode 100644 Wrappers/Matlab/mex_compile/regularizers_CPU/TGV_PD.c
 create mode 100644 Wrappers/Matlab/mex_compile/regularizers_CPU/TGV_PD_core.c
 create mode 100644 Wrappers/Matlab/mex_compile/regularizers_CPU/TGV_PD_core.h
 create mode 100644 Wrappers/Matlab/mex_compile/regularizers_CPU/utils.c
 create mode 100644 Wrappers/Matlab/mex_compile/regularizers_CPU/utils.h
 create mode 100644 Wrappers/Matlab/mex_compile/regularizers_GPU/Diffus_HO/Diff4thHajiaboli_GPU.cpp
 create mode 100644 Wrappers/Matlab/mex_compile/regularizers_GPU/Diffus_HO/Diff4th_GPU_kernel.cu
 create mode 100644 Wrappers/Matlab/mex_compile/regularizers_GPU/Diffus_HO/Diff4th_GPU_kernel.h
 create mode 100644 Wrappers/Matlab/mex_compile/regularizers_GPU/NL_Regul/NLM_GPU.cpp
 create mode 100644 Wrappers/Matlab/mex_compile/regularizers_GPU/NL_Regul/NLM_GPU_kernel.cu
 create mode 100644 Wrappers/Matlab/mex_compile/regularizers_GPU/NL_Regul/NLM_GPU_kernel.h
 delete mode 100644 Wrappers/Matlab/studentst.m
 create mode 100644 Wrappers/Matlab/supp/RMSE.m
 create mode 100644 Wrappers/Matlab/supp/my_red_yellowMAP.mat
 create mode 100644 Wrappers/Matlab/supp/sino_add_artifacts.m
 create mode 100644 Wrappers/Matlab/supp/studentst.m
 create mode 100644 Wrappers/Matlab/supp/zing_rings_add.m
 delete mode 100644 demos/Demo_Phantom3D_Cone.m
 delete mode 100644 demos/Demo_Phantom3D_Parallel.m
 delete mode 100644 demos/Demo_RealData3D_Parallel.m
 delete mode 100644 demos/DendrData.h5
 delete mode 100644 demos/exportDemoRD2Data.m
 delete mode 100644 main_func/FISTA_REC.m
 delete mode 100644 main_func/compile_mex.m
 delete mode 100644 main_func/regularizers_CPU/FGP_TV.c
 delete mode 100644 main_func/regularizers_CPU/FGP_TV_core.c
 delete mode 100644 main_func/regularizers_CPU/FGP_TV_core.h
 delete mode 100644 main_func/regularizers_CPU/LLT_model.c
 delete mode 100644 main_func/regularizers_CPU/LLT_model_core.c
 delete mode 100644 main_func/regularizers_CPU/LLT_model_core.h
 delete mode 100644 main_func/regularizers_CPU/PatchBased_Regul.c
 delete mode 100644 main_func/regularizers_CPU/PatchBased_Regul_core.c
 delete mode 100644 main_func/regularizers_CPU/PatchBased_Regul_core.h
 delete mode 100644 main_func/regularizers_CPU/SplitBregman_TV.c
 delete mode 100644 main_func/regularizers_CPU/SplitBregman_TV_core.c
 delete mode 100644 main_func/regularizers_CPU/SplitBregman_TV_core.h
 delete mode 100644 main_func/regularizers_CPU/TGV_PD.c
 delete mode 100644 main_func/regularizers_CPU/TGV_PD_core.c
 delete mode 100644 main_func/regularizers_CPU/TGV_PD_core.h
 delete mode 100644 main_func/regularizers_CPU/utils.c
 delete mode 100644 main_func/regularizers_CPU/utils.h
 delete mode 100644 main_func/regularizers_GPU/Diffus_HO/Diff4thHajiaboli_GPU.cpp
 delete mode 100644 main_func/regularizers_GPU/Diffus_HO/Diff4th_GPU_kernel.cu
 delete mode 100644 main_func/regularizers_GPU/Diffus_HO/Diff4th_GPU_kernel.h
 delete mode 100644 main_func/regularizers_GPU/NL_Regul/NLM_GPU.cpp
 delete mode 100644 main_func/regularizers_GPU/NL_Regul/NLM_GPU_kernel.cu
 delete mode 100644 main_func/regularizers_GPU/NL_Regul/NLM_GPU_kernel.h
 delete mode 100644 main_func/studentst.m
 delete mode 100644 supp/RMSE.m
 delete mode 100644 supp/my_red_yellowMAP.mat
 delete mode 100644 supp/sino_add_artifacts.m
 delete mode 100644 supp/zing_rings_add.m

diff --git a/Core/regularizers_CPU/FGP_TV.c b/Core/regularizers_CPU/FGP_TV.c
deleted file mode 100644
index 30cea1a..0000000
--- a/Core/regularizers_CPU/FGP_TV.c
+++ /dev/null
@@ -1,216 +0,0 @@
-/*
-This work is part of the Core Imaging Library developed by
-Visual Analytics and Imaging System Group of the Science Technology
-Facilities Council, STFC
-
-Copyright 2017 Daniil Kazantsev
-Copyright 2017 Srikanth Nagella, Edoardo Pasca
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-#include "matrix.h"
-#include "mex.h"
-#include "FGP_TV_core.h"
-
-/* C-OMP implementation of FGP-TV [1] denoising/regularization model (2D/3D case)
- *
- * Input Parameters:
- * 1. Noisy image/volume [REQUIRED]
- * 2. lambda - regularization parameter [REQUIRED]
- * 3. Number of iterations [OPTIONAL parameter]
- * 4. eplsilon: tolerance constant [OPTIONAL parameter]
- * 5. TV-type: 'iso' or 'l1' [OPTIONAL parameter]
- *
- * Output:
- * [1] Filtered/regularized image
- * [2] last function value 
- *
- * Example of image denoising:
- * figure;
- * Im = double(imread('lena_gray_256.tif'))/255;  % loading image
- * u0 = Im + .05*randn(size(Im)); % adding noise
- * u = FGP_TV(single(u0), 0.05, 100, 1e-04);
- *
- * to compile with OMP support: mex FGP_TV.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
- * This function is based on the Matlab's code and paper by
- * [1] Amir Beck and Marc Teboulle, "Fast Gradient-Based Algorithms for Constrained Total Variation Image Denoising and Deblurring Problems"
- *
- * D. Kazantsev, 2016-17
- *
- */
-
-
-void mexFunction(
-        int nlhs, mxArray *plhs[],
-        int nrhs, const mxArray *prhs[])
-        
-{
-    int number_of_dims, iter, dimX, dimY, dimZ, ll, j, count, methTV;
-    const int  *dim_array;
-    float *A, *D=NULL, *D_old=NULL, *P1=NULL, *P2=NULL, *P3=NULL, *P1_old=NULL, *P2_old=NULL, *P3_old=NULL, *R1=NULL, *R2=NULL, *R3=NULL, lambda, tk, tkp1, re, re1, re_old, epsil;
-    
-    number_of_dims = mxGetNumberOfDimensions(prhs[0]);
-    dim_array = mxGetDimensions(prhs[0]);
-    
-    /*Handling Matlab input data*/
-    if ((nrhs < 2) || (nrhs > 5)) mexErrMsgTxt("At least 2 parameters is required: Image(2D/3D), Regularization parameter. The full list of parameters: Image(2D/3D), Regularization parameter, iterations number, tolerance, penalty type ('iso' or 'l1')");
-    
-    A  = (float *) mxGetData(prhs[0]); /*noisy image (2D/3D) */
-    lambda =  (float) mxGetScalar(prhs[1]); /* regularization parameter */
-    iter = 50; /* default iterations number */
-    epsil = 0.0001; /* default tolerance constant */
-    methTV = 0;  /* default isotropic TV penalty */
-    
-    if ((nrhs == 3) || (nrhs == 4) || (nrhs == 5))  iter = (int) mxGetScalar(prhs[2]); /* iterations number */
-    if ((nrhs == 4) || (nrhs == 5))  epsil =  (float) mxGetScalar(prhs[3]); /* tolerance constant */
-    if (nrhs == 5)  {
-        char *penalty_type;
-        penalty_type = mxArrayToString(prhs[4]); /* choosing TV penalty: 'iso' or 'l1', 'iso' is the default */
-        if ((strcmp(penalty_type, "l1") != 0) && (strcmp(penalty_type, "iso") != 0)) mexErrMsgTxt("Choose TV type: 'iso' or 'l1',");
-        if (strcmp(penalty_type, "l1") == 0)  methTV = 1;  /* enable 'l1' penalty */
-        mxFree(penalty_type);
-    }
-    /*output function value (last iteration) */
-    plhs[1] = mxCreateNumericMatrix(1, 1, mxSINGLE_CLASS, mxREAL);  
-    float *funcvalA = (float *) mxGetData(plhs[1]);
-        
-    if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); }
-    
-    /*Handling Matlab output data*/
-    dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2];
-    
-    tk = 1.0f;
-    tkp1=1.0f;
-    count = 0;
-    re_old = 0.0f;
-    
-    if (number_of_dims == 2) {
-        dimZ = 1; /*2D case*/
-        D = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-        D_old = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-        P1 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-        P2 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-        P1_old = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-        P2_old = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-        R1 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-        R2 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-        
-        /* begin iterations */
-        for(ll=0; ll<iter; ll++) {
-            
-            /* computing the gradient of the objective function */
-            Obj_func2D(A, D, R1, R2, lambda, dimX, dimY);
-            
-            /*Taking a step towards minus of the gradient*/
-            Grad_func2D(P1, P2, D, R1, R2, lambda, dimX, dimY);
-            
-            /* projection step */
-            Proj_func2D(P1, P2, methTV, dimX, dimY);
-            
-            /*updating R and t*/
-            tkp1 = (1.0f + sqrt(1.0f + 4.0f*tk*tk))*0.5f;
-            Rupd_func2D(P1, P1_old, P2, P2_old, R1, R2, tkp1, tk, dimX, dimY);                
-            
-            /* calculate norm */
-            re = 0.0f; re1 = 0.0f;
-            for(j=0; j<dimX*dimY*dimZ; j++)
-            {
-                re += pow(D[j] - D_old[j],2);
-                re1 += pow(D[j],2);
-            }
-            re = sqrt(re)/sqrt(re1);
-            if (re < epsil)  count++;
-            if (count > 4) {
-                Obj_func_CALC2D(A, D, funcvalA, lambda, dimX, dimY); 
-                break; }
-            
-            /* check that the residual norm is decreasing */
-            if (ll > 2) {
-                if (re > re_old) {
-                    Obj_func_CALC2D(A, D, funcvalA, lambda, dimX, dimY);                                   
-                    break; }}            
-            re_old = re;
-            /*printf("%f %i %i \n", re, ll, count); */                      
-          
-            /*storing old values*/
-            copyIm(D, D_old, dimX, dimY, dimZ);
-            copyIm(P1, P1_old, dimX, dimY, dimZ);
-            copyIm(P2, P2_old, dimX, dimY, dimZ);
-            tk = tkp1;
-            
-            /* calculating the objective function value */
-            if (ll == (iter-1)) Obj_func_CALC2D(A, D, funcvalA, lambda, dimX, dimY);            
-        }
-        printf("FGP-TV iterations stopped at iteration %i with the function value %f \n", ll, funcvalA[0]);
-    }
-    if (number_of_dims == 3) {
-        D = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-        D_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-        P1 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-        P2 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-        P3 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-        P1_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-        P2_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-        P3_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-        R1 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-        R2 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-        R3 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-        
-        /* begin iterations */
-        for(ll=0; ll<iter; ll++) {
-            
-            /* computing the gradient of the objective function */
-            Obj_func3D(A, D, R1, R2, R3,lambda, dimX, dimY, dimZ);
-            
-            /*Taking a step towards minus of the gradient*/
-            Grad_func3D(P1, P2, P3, D, R1, R2, R3, lambda, dimX, dimY, dimZ);
-            
-            /* projection step */
-            Proj_func3D(P1, P2, P3, dimX, dimY, dimZ);
-            
-            /*updating R and t*/
-            tkp1 = (1.0f + sqrt(1.0f + 4.0f*tk*tk))*0.5f;
-            Rupd_func3D(P1, P1_old, P2, P2_old, P3, P3_old, R1, R2, R3, tkp1, tk, dimX, dimY, dimZ);
-            
-            /* calculate norm - stopping rules*/
-            re = 0.0f; re1 = 0.0f;
-            for(j=0; j<dimX*dimY*dimZ; j++)
-            {
-                re += pow(D[j] - D_old[j],2);
-                re1 += pow(D[j],2);
-            }
-            re = sqrt(re)/sqrt(re1);
-            /* stop if the norm residual is less than the tolerance EPS */
-            if (re < epsil)  count++;
-            if (count > 3) {
-                Obj_func_CALC3D(A, D, funcvalA, lambda, dimX, dimY, dimZ);                            
-                break;}
-            
-            /* check that the residual norm is decreasing */
-            if (ll > 2) {
-                if (re > re_old) {
-                Obj_func_CALC3D(A, D, funcvalA, lambda, dimX, dimY, dimZ);
-                }}            
-            re_old = re;
-            /*printf("%f %i %i \n", re, ll, count); */
-            
-            /*storing old values*/
-            copyIm(D, D_old, dimX, dimY, dimZ);
-            copyIm(P1, P1_old, dimX, dimY, dimZ);
-            copyIm(P2, P2_old, dimX, dimY, dimZ);
-            copyIm(P3, P3_old, dimX, dimY, dimZ);
-            tk = tkp1;
-            
-            if (ll == (iter-1)) Obj_func_CALC3D(A, D, funcvalA, lambda, dimX, dimY, dimZ);            
-        }
-        printf("FGP-TV iterations stopped at iteration %i with the function value %f \n", ll, funcvalA[0]);
-    }
-}
diff --git a/Core/regularizers_CPU/LLT_model.c b/Core/regularizers_CPU/LLT_model.c
deleted file mode 100644
index 0b07b47..0000000
--- a/Core/regularizers_CPU/LLT_model.c
+++ /dev/null
@@ -1,169 +0,0 @@
-/*
-This work is part of the Core Imaging Library developed by
-Visual Analytics and Imaging System Group of the Science Technology
-Facilities Council, STFC
-
-Copyright 2017 Daniil Kazantsev
-Copyright 2017 Srikanth Nagella, Edoardo Pasca
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
-#include "mex.h"
-#include "matrix.h"
-#include "LLT_model_core.h"
-
-/* C-OMP implementation of Lysaker, Lundervold and Tai (LLT) model of higher order regularization penalty
-*
-* Input Parameters:
-* 1. U0 - original noise image/volume
-* 2. lambda - regularization parameter
-* 3. tau - time-step  for explicit scheme
-* 4. iter - iterations number
-* 5. epsil  - tolerance constant (to terminate earlier)
-* 6. switcher - default is 0, switch to (1) to restrictive smoothing in Z dimension (in test)
-*
-* Output:
-* Filtered/regularized image
-*
-* Example:
-* figure;
-* Im = double(imread('lena_gray_256.tif'))/255;  % loading image
-* u0 = Im + .03*randn(size(Im)); % adding noise
-* [Den] = LLT_model(single(u0), 10, 0.1, 1);
-*
-*
-* to compile with OMP support: mex LLT_model.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
-* References: Lysaker, Lundervold and Tai (LLT) 2003, IEEE
-*
-* 28.11.16/Harwell
-*/
-
-void mexFunction(
-        int nlhs, mxArray *plhs[],
-        int nrhs, const mxArray *prhs[])
-        
-{
-    int number_of_dims, iter, dimX, dimY, dimZ, ll, j, count, switcher;
-    const int  *dim_array;
-    float *U0, *U=NULL, *U_old=NULL, *D1=NULL, *D2=NULL, *D3=NULL, lambda, tau, re, re1, epsil, re_old;
-    unsigned short *Map=NULL;
-    
-    number_of_dims = mxGetNumberOfDimensions(prhs[0]);
-    dim_array = mxGetDimensions(prhs[0]);
-    
-    /*Handling Matlab input data*/
-    U0  = (float *) mxGetData(prhs[0]); /*origanal noise image/volume*/
-    if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input in single precision is required"); }
-    lambda =  (float) mxGetScalar(prhs[1]); /*regularization parameter*/
-    tau =  (float) mxGetScalar(prhs[2]); /* time-step */
-    iter =  (int) mxGetScalar(prhs[3]); /*iterations number*/
-    epsil =  (float) mxGetScalar(prhs[4]); /* tolerance constant */
-    switcher =  (int) mxGetScalar(prhs[5]); /*switch on (1) restrictive smoothing in Z dimension*/
-     
-    /*Handling Matlab output data*/
-    dimX = dim_array[0]; dimY = dim_array[1];  dimZ = 1;
-    
-    if (number_of_dims == 2) {
-        /*2D case*/       
-        U = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-        U_old = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-        D1 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-        D2 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-    }
-    else if (number_of_dims == 3) {
-        /*3D case*/
-        dimZ = dim_array[2];
-        U = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-        U_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-        D1 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-        D2 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-        D3 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-        if (switcher != 0) {
-        Map = (unsigned short*)mxGetPr(plhs[1] = mxCreateNumericArray(3, dim_array, mxUINT16_CLASS, mxREAL));       
-        }
-    }
-    else {mexErrMsgTxt("The input data should be 2D or 3D");}
-    
-    /*Copy U0 to U*/
-    copyIm(U0, U, dimX, dimY, dimZ);
-    
-    count = 1;
-    re_old = 0.0f; 
-    if (number_of_dims == 2) {
-        for(ll = 0; ll < iter; ll++) {
-            
-            copyIm(U, U_old, dimX, dimY, dimZ);
-            
-            /*estimate inner derrivatives */
-            der2D(U, D1, D2, dimX, dimY, dimZ);
-            /* calculate div^2 and update */
-            div_upd2D(U0, U, D1, D2, dimX, dimY, dimZ, lambda, tau);
-            
-            /* calculate norm to terminate earlier */
-            re = 0.0f; re1 = 0.0f;
-            for(j=0; j<dimX*dimY*dimZ; j++)
-            {
-                re += pow(U_old[j] - U[j],2);
-                re1 += pow(U_old[j],2);
-            }
-            re = sqrt(re)/sqrt(re1);           
-            if (re < epsil)  count++;
-            if (count > 4) break;            
-            
-            /* check that the residual norm is decreasing */
-            if (ll > 2) {
-                if (re > re_old) break; 
-            }
-            re_old = re;         
-        
-        } /*end of iterations*/
-        printf("HO iterations stopped at iteration: %i\n", ll);          
-    }
-    /*3D version*/
-    if (number_of_dims == 3) {
-        
-        if (switcher == 1) {
-            /* apply restrictive smoothing */            
-            calcMap(U, Map, dimX, dimY, dimZ);
-            /*clear outliers */
-            cleanMap(Map, dimX, dimY, dimZ);           
-        }
-        for(ll = 0; ll < iter; ll++) {
-            
-            copyIm(U, U_old, dimX, dimY, dimZ);
-            
-            /*estimate inner derrivatives */
-            der3D(U, D1, D2, D3, dimX, dimY, dimZ);          
-            /* calculate div^2 and update */
-            div_upd3D(U0, U, D1, D2, D3, Map, switcher, dimX, dimY, dimZ, lambda, tau);                 
-            
-            /* calculate norm to terminate earlier */
-            re = 0.0f; re1 = 0.0f;
-            for(j=0; j<dimX*dimY*dimZ; j++)
-            {
-                re += pow(U_old[j] - U[j],2);
-                re1 += pow(U_old[j],2);
-            }
-            re = sqrt(re)/sqrt(re1);           
-            if (re < epsil)  count++;
-            if (count > 4) break;            
-            
-            /* check that the residual norm is decreasing */
-            if (ll > 2) {
-                if (re > re_old) break; 
-            }
-            re_old = re;     
-            
-        } /*end of iterations*/
-        printf("HO iterations stopped at iteration: %i\n", ll);       
-    }
-}
diff --git a/Core/regularizers_CPU/PatchBased_Regul.c b/Core/regularizers_CPU/PatchBased_Regul.c
deleted file mode 100644
index 5d89b0c..0000000
--- a/Core/regularizers_CPU/PatchBased_Regul.c
+++ /dev/null
@@ -1,140 +0,0 @@
-/*
-This work is part of the Core Imaging Library developed by
-Visual Analytics and Imaging System Group of the Science Technology
-Facilities Council, STFC
-
-Copyright 2017 Daniil Kazantsev
-Copyright 2017 Srikanth Nagella, Edoardo Pasca
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
-#include "mex.h"
-#include "matrix.h"
-#include "PatchBased_Regul_core.h"
-
-
-/* C-OMP implementation of  patch-based (PB) regularization (2D and 3D cases). 
- * This method finds self-similar patches in data and performs one fixed point iteration to mimimize the PB penalty function
- * 
- * References: 1. Yang Z. & Jacob M. "Nonlocal Regularization of Inverse Problems"
- *             2. Kazantsev D. et al. "4D-CT reconstruction with unified spatial-temporal patch-based regularization"
- *
- * Input Parameters:
- * 1. Image (2D or 3D) [required]
- * 2. ratio of the searching window (e.g. 3 = (2*3+1) = 7 pixels window) [optional]
- * 3. ratio of the similarity window (e.g. 1 = (2*1+1) = 3 pixels window) [optional]
- * 4. h - parameter for the PB penalty function [optional]
- * 5. lambda - regularization parameter  [optional]
-
- * Output:
- * 1. regularized (denoised) Image (N x N)/volume (N x N x N)
- *
- * 2D denoising example in Matlab:   
-   Im = double(imread('lena_gray_256.tif'))/255;  % loading image
-   u0 = Im + .03*randn(size(Im)); u0(u0<0) = 0; % adding noise
-   ImDen = PatchBased_Regul(single(u0), 3, 1, 0.08, 0.05); 
- *
- * Matlab + C/mex compilers needed
- * to compile with OMP support: mex PatchBased_Regul.c CFLAGS="\$CFLAGS -fopenmp -Wall" LDFLAGS="\$LDFLAGS -fopenmp"
- *
- * D. Kazantsev *
- * 02/07/2014
- * Harwell, UK
- */
-
-
-void mexFunction(
-        int nlhs, mxArray *plhs[],
-        int nrhs, const mxArray *prhs[]) 
-{    
-    int N, M, Z, numdims, SearchW, SimilW, SearchW_real, padXY, newsizeX, newsizeY, newsizeZ, switchpad_crop;
-    const int  *dims;
-    float *A, *B=NULL, *Ap=NULL, *Bp=NULL, h, lambda;
-    
-    numdims = mxGetNumberOfDimensions(prhs[0]);
-    dims = mxGetDimensions(prhs[0]);
-    
-    N = dims[0];
-    M = dims[1];
-    Z = dims[2];
-    
-    if ((numdims < 2) || (numdims > 3)) {mexErrMsgTxt("The input is 2D image or 3D volume");}
-    if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input in single precision is required"); }
-    
-    if(nrhs != 5) mexErrMsgTxt("Five inputs reqired: Image(2D,3D), SearchW, SimilW, Threshold, Regularization parameter");
-    
-    /*Handling inputs*/
-    A  = (float *) mxGetData(prhs[0]);    /* the image/volume to regularize/filter */
-    SearchW_real = 3; /*default value*/
-    SimilW = 1; /*default value*/
-    h = 0.1; 
-    lambda = 0.1;
-    
-    if ((nrhs == 2) || (nrhs == 3) || (nrhs == 4) || (nrhs == 5))   SearchW_real  = (int) mxGetScalar(prhs[1]); /* the searching window ratio */
-    if ((nrhs == 3) || (nrhs == 4) || (nrhs == 5))   SimilW =  (int) mxGetScalar(prhs[2]);  /* the similarity window ratio */
-    if ((nrhs == 4) || (nrhs == 5))  h =  (float) mxGetScalar(prhs[3]);  /* parameter for the PB filtering function */
-    if ((nrhs == 5))  lambda = (float) mxGetScalar(prhs[4]); /* regularization parameter */   
-
-
-    if (h <= 0) mexErrMsgTxt("Parmeter for the PB penalty function should be > 0");
-    if (lambda <= 0) mexErrMsgTxt(" Regularization parmeter should be > 0");
-       
-    SearchW = SearchW_real + 2*SimilW;
-    
-    /* SearchW_full = 2*SearchW + 1; */ /* the full searching window  size */
-    /* SimilW_full = 2*SimilW + 1;  */  /* the full similarity window  size */
-    
-    padXY = SearchW + 2*SimilW; /* padding sizes */
-    newsizeX = N + 2*(padXY); /* the X size of the padded array */
-    newsizeY = M + 2*(padXY); /* the Y size of the padded array */
-    newsizeZ = Z + 2*(padXY); /* the Z size of the padded array */
-    int N_dims[] = {newsizeX, newsizeY, newsizeZ};
-    
-    /******************************2D case ****************************/
-    if (numdims == 2) {
-        /*Handling output*/
-        B = (float*)mxGetData(plhs[0] = mxCreateNumericMatrix(N, M, mxSINGLE_CLASS, mxREAL));
-        /*allocating memory for the padded arrays */
-        Ap = (float*)mxGetData(mxCreateNumericMatrix(newsizeX, newsizeY, mxSINGLE_CLASS, mxREAL));
-        Bp = (float*)mxGetData(mxCreateNumericMatrix(newsizeX, newsizeY, mxSINGLE_CLASS, mxREAL));
-        /**************************************************************************/
-        /*Perform padding of image A to the size of [newsizeX * newsizeY] */
-        switchpad_crop = 0; /*padding*/
-        pad_crop(A, Ap, M, N, 0, newsizeY, newsizeX, 0, padXY, switchpad_crop);
-        
-        /* Do PB regularization with the padded array  */
-        PB_FUNC2D(Ap, Bp, newsizeY, newsizeX, padXY, SearchW, SimilW, (float)h, (float)lambda);
-        
-        switchpad_crop = 1; /*cropping*/
-        pad_crop(Bp, B, M, N, 0, newsizeY, newsizeX, 0, padXY, switchpad_crop);
-    }
-    else
-    {
-        /******************************3D case ****************************/
-        /*Handling output*/
-        B = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dims, mxSINGLE_CLASS, mxREAL));
-        /*allocating memory for the padded arrays */
-        Ap = (float*)mxGetPr(mxCreateNumericArray(3, N_dims, mxSINGLE_CLASS, mxREAL));
-        Bp = (float*)mxGetPr(mxCreateNumericArray(3, N_dims, mxSINGLE_CLASS, mxREAL));
-        /**************************************************************************/
-        
-        /*Perform padding of image A to the size of [newsizeX * newsizeY * newsizeZ] */
-        switchpad_crop = 0; /*padding*/
-        pad_crop(A, Ap, M, N, Z, newsizeY, newsizeX, newsizeZ, padXY, switchpad_crop);
-        
-        /* Do PB regularization with the padded array  */
-        PB_FUNC3D(Ap, Bp, newsizeY, newsizeX, newsizeZ, padXY, SearchW, SimilW, (float)h, (float)lambda);
-        
-        switchpad_crop = 1; /*cropping*/
-        pad_crop(Bp, B, M, N, Z, newsizeY, newsizeX, newsizeZ, padXY, switchpad_crop);
-    } /*end else ndims*/ 
-}    
diff --git a/Core/regularizers_CPU/SplitBregman_TV.c b/Core/regularizers_CPU/SplitBregman_TV.c
deleted file mode 100644
index 38f6a9d..0000000
--- a/Core/regularizers_CPU/SplitBregman_TV.c
+++ /dev/null
@@ -1,179 +0,0 @@
-/*
-This work is part of the Core Imaging Library developed by
-Visual Analytics and Imaging System Group of the Science Technology
-Facilities Council, STFC
-
-Copyright 2017 Daniil Kazantsev
-Copyright 2017 Srikanth Nagella, Edoardo Pasca
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
-#include "mex.h"
-#include <matrix.h>
-#include "SplitBregman_TV_core.h"
-
-/* C-OMP implementation of Split Bregman - TV denoising-regularization model (2D/3D)
- *
- * Input Parameters:
- * 1. Noisy image/volume
- * 2. lambda - regularization parameter
- * 3. Number of iterations [OPTIONAL parameter]
- * 4. eplsilon - tolerance constant [OPTIONAL parameter]
- * 5. TV-type: 'iso' or 'l1' [OPTIONAL parameter]
- *
- * Output:
- * Filtered/regularized image
- *
- * Example:
- * figure;
- * Im = double(imread('lena_gray_256.tif'))/255;  % loading image
- * u0 = Im + .05*randn(size(Im)); u0(u0 < 0) = 0;
- * u = SplitBregman_TV(single(u0), 10, 30, 1e-04);
- *
- * to compile with OMP support: mex SplitBregman_TV.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
- * References:
- * The Split Bregman Method for L1 Regularized Problems, by Tom Goldstein and Stanley Osher.
- * D. Kazantsev, 2016*
- */
-
-
-void mexFunction(
-        int nlhs, mxArray *plhs[],
-        int nrhs, const mxArray *prhs[])
-        
-{
-    int number_of_dims, iter, dimX, dimY, dimZ, ll, j, count, methTV;
-    const int  *dim_array;
-    float *A, *U=NULL, *U_old=NULL, *Dx=NULL, *Dy=NULL, *Dz=NULL, *Bx=NULL, *By=NULL, *Bz=NULL, lambda, mu, epsil, re, re1, re_old;
-    
-    number_of_dims = mxGetNumberOfDimensions(prhs[0]);
-    dim_array = mxGetDimensions(prhs[0]);
-    
-    /*Handling Matlab input data*/
-    if ((nrhs < 2) || (nrhs > 5)) mexErrMsgTxt("At least 2 parameters is required: Image(2D/3D), Regularization parameter. The full list of parameters: Image(2D/3D), Regularization parameter, iterations number, tolerance, penalty type ('iso' or 'l1')");
-    
-    /*Handling Matlab input data*/
-    A  = (float *) mxGetData(prhs[0]); /*noisy image (2D/3D) */
-    mu =  (float) mxGetScalar(prhs[1]); /* regularization parameter */
-    iter = 35; /* default iterations number */
-    epsil = 0.0001; /* default tolerance constant */
-    methTV = 0;  /* default isotropic TV penalty */
-    if ((nrhs == 3) || (nrhs == 4) || (nrhs == 5))  iter = (int) mxGetScalar(prhs[2]); /* iterations number */
-    if ((nrhs == 4) || (nrhs == 5))  epsil =  (float) mxGetScalar(prhs[3]); /* tolerance constant */
-    if (nrhs == 5)  {
-        char *penalty_type;
-        penalty_type = mxArrayToString(prhs[4]); /* choosing TV penalty: 'iso' or 'l1', 'iso' is the default */
-        if ((strcmp(penalty_type, "l1") != 0) && (strcmp(penalty_type, "iso") != 0)) mexErrMsgTxt("Choose TV type: 'iso' or 'l1',");
-        if (strcmp(penalty_type, "l1") == 0)  methTV = 1;  /* enable 'l1' penalty */
-        mxFree(penalty_type);
-    }
-    if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); }
-    
-    lambda = 2.0f*mu;
-    count = 1;
-    re_old = 0.0f;
-    /*Handling Matlab output data*/
-    dimY = dim_array[0]; dimX = dim_array[1]; dimZ = dim_array[2];
-    
-    if (number_of_dims == 2) {
-        dimZ = 1; /*2D case*/
-        U = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-        U_old = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-        Dx = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-        Dy = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-        Bx = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-        By = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-        
-        copyIm(A, U, dimX, dimY, dimZ); /*initialize */
-        
-        /* begin outer SB iterations */
-        for(ll=0; ll<iter; ll++) {
-            
-            /*storing old values*/
-            copyIm(U, U_old, dimX, dimY, dimZ);
-            
-            /*GS iteration */
-            gauss_seidel2D(U, A, Dx, Dy, Bx, By, dimX, dimY, lambda, mu);
-            
-            if (methTV == 1)  updDxDy_shrinkAniso2D(U, Dx, Dy, Bx, By, dimX, dimY, lambda);
-            else updDxDy_shrinkIso2D(U, Dx, Dy, Bx, By, dimX, dimY, lambda);
-            
-            updBxBy2D(U, Dx, Dy, Bx, By, dimX, dimY);
-            
-            /* calculate norm to terminate earlier */
-            re = 0.0f; re1 = 0.0f;
-            for(j=0; j<dimX*dimY*dimZ; j++)
-            {
-                re += pow(U_old[j] - U[j],2);
-                re1 += pow(U_old[j],2);
-            }
-            re = sqrt(re)/sqrt(re1);
-            if (re < epsil)  count++;
-            if (count > 4) break;
-            
-            /* check that the residual norm is decreasing */
-            if (ll > 2) {
-                if (re > re_old) break;
-            }
-            re_old = re;
-            /*printf("%f %i %i \n", re, ll, count); */
-            
-            /*copyIm(U_old, U, dimX, dimY, dimZ); */
-        }
-        printf("SB iterations stopped at iteration: %i\n", ll);
-    }
-    if (number_of_dims == 3) {
-        U = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-        U_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-        Dx = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-        Dy = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-        Dz = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-        Bx = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-        By = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-        Bz = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-        
-        copyIm(A, U, dimX, dimY, dimZ); /*initialize */
-        
-        /* begin outer SB iterations */
-        for(ll=0; ll<iter; ll++) {
-            
-            /*storing old values*/
-            copyIm(U, U_old, dimX, dimY, dimZ);
-            
-            /*GS iteration */
-            gauss_seidel3D(U, A, Dx, Dy, Dz, Bx, By, Bz, dimX, dimY, dimZ, lambda, mu);
-            
-            if (methTV == 1) updDxDyDz_shrinkAniso3D(U, Dx, Dy, Dz, Bx, By, Bz, dimX, dimY, dimZ, lambda);
-            else updDxDyDz_shrinkIso3D(U, Dx, Dy, Dz, Bx, By, Bz, dimX, dimY, dimZ, lambda);
-            
-            updBxByBz3D(U, Dx, Dy, Dz, Bx, By, Bz, dimX, dimY, dimZ);
-            
-            /* calculate norm to terminate earlier */
-            re = 0.0f; re1 = 0.0f;
-            for(j=0; j<dimX*dimY*dimZ; j++)
-            {
-                re += pow(U[j] - U_old[j],2);
-                re1 += pow(U[j],2);
-            }
-            re = sqrt(re)/sqrt(re1);
-            if (re < epsil)  count++;
-            if (count > 4) break;
-            
-            /* check that the residual norm is decreasing */
-            if (ll > 2) {
-                if (re > re_old) break; }
-            /*printf("%f %i %i \n", re, ll, count); */
-            re_old = re;
-        }
-        printf("SB iterations stopped at iteration: %i\n", ll);
-    }
-}
\ No newline at end of file
diff --git a/Core/regularizers_CPU/TGV_PD.c b/Core/regularizers_CPU/TGV_PD.c
deleted file mode 100644
index c9cb440..0000000
--- a/Core/regularizers_CPU/TGV_PD.c
+++ /dev/null
@@ -1,144 +0,0 @@
-/*
-This work is part of the Core Imaging Library developed by
-Visual Analytics and Imaging System Group of the Science Technology
-Facilities Council, STFC
-
-Copyright 2017 Daniil Kazantsev
-Copyright 2017 Srikanth Nagella, Edoardo Pasca
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
-#include "TGV_PD_core.h"
-#include "mex.h"
-
-/* C-OMP implementation of Primal-Dual denoising method for 
- * Total Generilized Variation (TGV)-L2 model (2D case only)
- *
- * Input Parameters:
- * 1. Noisy image/volume (2D)
- * 2. lambda - regularization parameter
- * 3. parameter to control first-order term (alpha1)
- * 4. parameter to control the second-order term (alpha0)
- * 5. Number of CP iterations
- *
- * Output:
- * Filtered/regularized image 
- *
- * Example:
- * figure;
- * Im = double(imread('lena_gray_256.tif'))/255;  % loading image
- * u0 = Im + .03*randn(size(Im)); % adding noise
- * tic; u = TGV_PD(single(u0), 0.02, 1.3, 1, 550); toc;
- *
- * to compile with OMP support: mex TGV_PD.c TGV_PD_core.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
- * References:
- * K. Bredies "Total Generalized Variation"
- *
- * 28.11.16/Harwell
- */
- 
-void mexFunction(
-        int nlhs, mxArray *plhs[],
-        int nrhs, const mxArray *prhs[])
-        
-{
-    int number_of_dims, iter, dimX, dimY, dimZ, ll;
-    const int  *dim_array;
-    float *A, *U, *U_old, *P1, *P2, *Q1, *Q2, *Q3, *V1, *V1_old, *V2, *V2_old, lambda, L2, tau, sigma,  alpha1, alpha0;
-    
-    number_of_dims = mxGetNumberOfDimensions(prhs[0]);
-    dim_array = mxGetDimensions(prhs[0]);
-    
-    /*Handling Matlab input data*/
-    A  = (float *) mxGetData(prhs[0]); /*origanal noise image/volume*/
-    if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input in single precision is required"); }
-    lambda =  (float) mxGetScalar(prhs[1]); /*regularization parameter*/
-    alpha1 =  (float) mxGetScalar(prhs[2]); /*first-order term*/
-    alpha0 =  (float) mxGetScalar(prhs[3]); /*second-order term*/
-    iter =  (int) mxGetScalar(prhs[4]); /*iterations number*/
-    if(nrhs != 5) mexErrMsgTxt("Five input parameters is reqired: Image(2D/3D), Regularization parameter, alpha1, alpha0, Iterations");
-    
-    /*Handling Matlab output data*/
-    dimX = dim_array[0]; dimY = dim_array[1];
-    
-    if (number_of_dims == 2) {
-        /*2D case*/
-        dimZ = 1;
-        U = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-        
-        /*dual variables*/
-        P1 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-        P2 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-        
-        Q1 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-        Q2 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-        Q3 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-        
-        U_old = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-        
-        V1 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-        V1_old = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-        V2 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-        V2_old = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));   
-        
-        
-         /*printf("%i \n", i);*/
-    L2 = 12.0f; /*Lipshitz constant*/
-    tau = 1.0/pow(L2,0.5);
-    sigma = 1.0/pow(L2,0.5);
-    
-    /*Copy A to U*/
-    copyIm(A, U, dimX, dimY, dimZ);
-    
-        /* Here primal-dual iterations begin for 2D */
-        for(ll = 0; ll < iter; ll++) {
-            
-            /* Calculate Dual Variable P */
-            DualP_2D(U, V1, V2, P1, P2, dimX, dimY, dimZ, sigma);
-            
-            /*Projection onto convex set for P*/
-            ProjP_2D(P1, P2, dimX, dimY, dimZ, alpha1);
-            
-            /* Calculate Dual Variable Q */
-            DualQ_2D(V1, V2, Q1, Q2, Q3, dimX, dimY, dimZ, sigma);
-            
-            /*Projection onto convex set for Q*/
-            ProjQ_2D(Q1, Q2, Q3, dimX, dimY, dimZ, alpha0);
-            
-            /*saving U into U_old*/
-            copyIm(U, U_old, dimX, dimY, dimZ);
-            
-            /*adjoint operation  -> divergence and projection of P*/
-            DivProjP_2D(U, A, P1, P2, dimX, dimY, dimZ, lambda, tau);
-            
-            /*get updated solution U*/
-            newU(U, U_old, dimX, dimY, dimZ);
-            
-            /*saving V into V_old*/
-            copyIm(V1, V1_old, dimX, dimY, dimZ);
-            copyIm(V2, V2_old, dimX, dimY, dimZ);
-            
-            /* upd V*/
-            UpdV_2D(V1, V2, P1, P2, Q1, Q2, Q3, dimX, dimY, dimZ, tau);
-            
-            /*get new V*/
-            newU(V1, V1_old, dimX, dimY, dimZ);
-            newU(V2, V2_old, dimX, dimY, dimZ);
-        } /*end of iterations*/        
-    }
-    else if (number_of_dims == 3) {
-        mexErrMsgTxt("The input data should be a 2D array");
-        /*3D case*/
-    }
-    else {mexErrMsgTxt("The input data should be a 2D array");}    
-   
-}
diff --git a/Core/regularizers_GPU/Diffus_HO/Diff4thHajiaboli_GPU.cpp b/Core/regularizers_GPU/Diffus_HO/Diff4thHajiaboli_GPU.cpp
deleted file mode 100644
index e5b1ee0..0000000
--- a/Core/regularizers_GPU/Diffus_HO/Diff4thHajiaboli_GPU.cpp
+++ /dev/null
@@ -1,114 +0,0 @@
-#include "mex.h"
-#include <matrix.h>
-#include <math.h>
-#include <stdlib.h>
-#include <memory.h>
-#include <stdio.h>
-#include <iostream>
-#include "Diff4th_GPU_kernel.h"
-
-/*
- * 2D and 3D CUDA implementation of the 4th order PDE denoising model by Hajiaboli
- *
- * Reference :
- * "An anisotropic fourth-order diffusion filter for image noise removal" by M. Hajiaboli
- * 
- * Example
- * figure;
- * Im = double(imread('lena_gray_256.tif'))/255;  % loading image
- * u0 = Im + .05*randn(size(Im)); % adding noise
- * u = Diff4thHajiaboli_GPU(single(u0), 0.02, 150);
- * subplot (1,2,1); imshow(u0,[ ]); title('Noisy Image')
- * subplot (1,2,2); imshow(u,[ ]); title('Denoised Image')
- *
- *
- * Linux/Matlab compilation:
- * compile in terminal: nvcc -Xcompiler -fPIC -shared -o Diff4th_GPU_kernel.o Diff4th_GPU_kernel.cu
- * then compile in Matlab: mex -I/usr/local/cuda-7.5/include -L/usr/local/cuda-7.5/lib64 -lcudart Diff4thHajiaboli_GPU.cpp Diff4th_GPU_kernel.o
- */
-
-void mexFunction(
-        int nlhs, mxArray *plhs[],
-        int nrhs, const mxArray *prhs[])
-{
-    int numdims, dimZ, size;
-    float *A, *B, *A_L, *B_L;
-    const int *dims;    
-    
-    numdims = mxGetNumberOfDimensions(prhs[0]);
-    dims = mxGetDimensions(prhs[0]);       
-    
-    float sigma = (float)mxGetScalar(prhs[1]); /* edge-preserving parameter */
-    float lambda = (float)mxGetScalar(prhs[2]); /* regularization parameter */
-    int iter = (int)mxGetScalar(prhs[3]); /* iterations number */
-    
-    if (numdims == 2)  {
-        
-        int N, M, Z, i, j;
-        Z = 0; // for the 2D case
-        float tau = 0.01; // time step is sufficiently small for an explicit methods
-        
-        /*Input data*/
-        A = (float*)mxGetData(prhs[0]);        
-        N = dims[0] + 2;
-        M = dims[1] + 2;
-        A_L = (float*)mxGetData(mxCreateNumericMatrix(N, M, mxSINGLE_CLASS, mxREAL));
-        B_L = (float*)mxGetData(mxCreateNumericMatrix(N, M, mxSINGLE_CLASS, mxREAL));
-        
-        /*Output data*/
-        B = (float*)mxGetData(plhs[0] = mxCreateNumericMatrix(dims[0], dims[1], mxSINGLE_CLASS, mxREAL));        
-        
-        // copy A to the bigger A_L with boundaries
-        #pragma omp parallel for shared(A_L, A) private(i,j)
-        for (i=0; i < N; i++) {
-            for (j=0; j < M; j++) {
-                if (((i > 0) && (i < N-1)) &&  ((j > 0) && (j < M-1)))  A_L[i*M+j] = A[(i-1)*(dims[1])+(j-1)];
-            }}
-        
-        // Running CUDA code here
-        Diff4th_GPU_kernel(A_L, B_L, N, M, Z, (float)sigma, iter, (float)tau, lambda);
-        
-        // copy the processed B_L to a smaller B
-        #pragma omp parallel for shared(B_L, B) private(i,j)
-        for (i=0; i < N; i++) {
-            for (j=0; j < M; j++) {
-                if (((i > 0) && (i < N-1)) &&  ((j > 0) && (j < M-1)))   B[(i-1)*(dims[1])+(j-1)] = B_L[i*M+j];
-            }}
-    }
-    if (numdims == 3)  {
-        //  3D image denoising / regularization
-        int N, M, Z, i, j, k;
-        float tau = 0.0007; // Time Step is small for an explicit methods
-        A = (float*)mxGetData(prhs[0]);
-        N = dims[0] + 2;
-        M = dims[1] + 2;
-        Z = dims[2] + 2;
-        int N_dims[] = {N, M, Z};
-        A_L = (float*)mxGetPr(mxCreateNumericArray(3, N_dims, mxSINGLE_CLASS, mxREAL));
-        B_L = (float*)mxGetPr(mxCreateNumericArray(3, N_dims, mxSINGLE_CLASS, mxREAL));
-        
-        /* output data */
-        B = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dims, mxSINGLE_CLASS, mxREAL));
-        
-        // copy A to the bigger A_L with boundaries
-        #pragma omp parallel for shared(A_L, A) private(i,j,k)
-        for (i=0; i < N; i++) {
-            for (j=0; j < M; j++) {
-                for (k=0; k < Z; k++) {
-                    if (((i > 0) && (i < N-1)) &&  ((j > 0) && (j < M-1)) &&  ((k > 0) && (k < Z-1))) {
-                        A_L[(N*M)*(k)+(i)*M+(j)] = A[(dims[0]*dims[1])*(k-1)+(i-1)*dims[1]+(j-1)];
-                    }}}}
-        
-        // Running CUDA kernel here for diffusivity
-        Diff4th_GPU_kernel(A_L, B_L, N, M, Z, (float)sigma, iter, (float)tau, lambda);
-        
-        // copy the processed B_L to a smaller B
-        #pragma omp parallel for shared(B_L, B) private(i,j,k)
-        for (i=0; i < N; i++) {
-            for (j=0; j < M; j++) {
-                for (k=0; k < Z; k++) {
-                    if (((i > 0) && (i < N-1)) &&  ((j > 0) && (j < M-1)) &&  ((k > 0) && (k < Z-1))) {
-                        B[(dims[0]*dims[1])*(k-1)+(i-1)*dims[1]+(j-1)] = B_L[(N*M)*(k)+(i)*M+(j)];
-                    }}}}
-    }
-}
\ No newline at end of file
diff --git a/Core/regularizers_GPU/NL_Regul/NLM_GPU.cpp b/Core/regularizers_GPU/NL_Regul/NLM_GPU.cpp
deleted file mode 100644
index 858b865..0000000
--- a/Core/regularizers_GPU/NL_Regul/NLM_GPU.cpp
+++ /dev/null
@@ -1,171 +0,0 @@
-#include "mex.h"
-#include <matrix.h>
-#include <math.h>
-#include <stdlib.h>
-#include <memory.h>
-#include <stdio.h>
-#include <iostream>
-#include "NLM_GPU_kernel.h"
-
-/* CUDA implementation of the patch-based (PB) regularization  for 2D and 3D images/volumes  
- * This method finds self-similar patches in data and performs one fixed point iteration to mimimize the PB penalty function
- * 
- * References: 1. Yang Z. & Jacob M. "Nonlocal Regularization of Inverse Problems"
- *             2. Kazantsev D. at. all "4D-CT reconstruction with unified spatial-temporal patch-based regularization"
- *
- * Input Parameters (mandatory):
- * 1. Image/volume (2D/3D)
- * 2. ratio of the searching window (e.g. 3 = (2*3+1) = 7 pixels window)
- * 3. ratio of the similarity window (e.g. 1 = (2*1+1) = 3 pixels window)
- * 4. h - parameter for the PB penalty function
- * 5. lambda - regularization parameter 
-
- * Output:
- * 1. regularized (denoised) Image/volume (N x N x N)
- *
- * In matlab check what kind of GPU you have with "gpuDevice" command,
- * then set your ComputeCapability, here I use -arch compute_35
- *
- * Quick 2D denoising example in Matlab:   
-   Im = double(imread('lena_gray_256.tif'))/255;  % loading image
-   u0 = Im + .03*randn(size(Im)); u0(u0<0) = 0; % adding noise
-   ImDen = NLM_GPU(single(u0), 3, 2, 0.15, 1);
- 
- * Linux/Matlab compilation:
- * compile in terminal: nvcc -Xcompiler -fPIC -shared -o NLM_GPU_kernel.o NLM_GPU_kernel.cu
- * then compile in Matlab: mex -I/usr/local/cuda-7.5/include -L/usr/local/cuda-7.5/lib64 -lcudart NLM_GPU.cpp NLM_GPU_kernel.o
- *
- * D. Kazantsev 
- * 2014-17
- * Harwell/Manchester UK
- */
-
-float pad_crop(float *A, float *Ap, int OldSizeX, int OldSizeY,  int OldSizeZ, int NewSizeX, int NewSizeY, int NewSizeZ, int padXY, int switchpad_crop);
-
-void mexFunction(
-        int nlhs, mxArray *plhs[],
-        int nrhs, const mxArray *prhs[])
-{
-    int N, M, Z, i_n, j_n, k_n, numdims, SearchW, SimilW, SearchW_real, padXY, newsizeX, newsizeY, newsizeZ, switchpad_crop, count, SearchW_full, SimilW_full;
-    const int  *dims;
-    float *A, *B=NULL, *Ap=NULL, *Bp=NULL, *Eucl_Vec, h, h2, lambda, val, denh2;
-    
-    numdims = mxGetNumberOfDimensions(prhs[0]);
-    dims = mxGetDimensions(prhs[0]);
-    
-    N = dims[0];
-    M = dims[1];
-    Z = dims[2];
-    
-    if ((numdims < 2) || (numdims > 3)) {mexErrMsgTxt("The input should be 2D image or 3D volume");}
-    if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input in single precision is required"); }
-    
-    if(nrhs != 5) mexErrMsgTxt("Five inputs reqired: Image(2D,3D), SearchW, SimilW, Threshold, Regularization parameter");
-    
-    /*Handling inputs*/
-    A  = (float *) mxGetData(prhs[0]);    /* the image to regularize/filter */
-    SearchW_real  = (int) mxGetScalar(prhs[1]); /* the searching window ratio */
-    SimilW =  (int) mxGetScalar(prhs[2]);  /* the similarity window ratio */
-    h =  (float) mxGetScalar(prhs[3]);  /* parameter for the PB filtering function */
-    lambda = (float) mxGetScalar(prhs[4]);
-    
-    if (h <= 0) mexErrMsgTxt("Parmeter for the PB penalty function should be > 0");
-      
-    SearchW = SearchW_real + 2*SimilW;
-    
-    SearchW_full = 2*SearchW + 1; /* the full searching window  size */
-    SimilW_full = 2*SimilW + 1;   /* the full similarity window  size */
-    h2 = h*h;
-    
-    padXY = SearchW + 2*SimilW; /* padding sizes */
-    newsizeX = N + 2*(padXY); /* the X size of the padded array */
-    newsizeY = M + 2*(padXY); /* the Y size of the padded array */
-    newsizeZ = Z + 2*(padXY); /* the Z size of the padded array */
-    int N_dims[] = {newsizeX, newsizeY, newsizeZ};
-    
-    /******************************2D case ****************************/
-    if (numdims == 2) {
-        /*Handling output*/
-        B = (float*)mxGetData(plhs[0] = mxCreateNumericMatrix(N, M, mxSINGLE_CLASS, mxREAL));
-        /*allocating memory for the padded arrays */
-        Ap = (float*)mxGetData(mxCreateNumericMatrix(newsizeX, newsizeY, mxSINGLE_CLASS, mxREAL));
-        Bp = (float*)mxGetData(mxCreateNumericMatrix(newsizeX, newsizeY, mxSINGLE_CLASS, mxREAL));
-        Eucl_Vec = (float*)mxGetData(mxCreateNumericMatrix(SimilW_full*SimilW_full, 1, mxSINGLE_CLASS, mxREAL));
-        
-        /*Gaussian kernel */
-        count = 0;
-        for(i_n=-SimilW; i_n<=SimilW; i_n++) {
-            for(j_n=-SimilW; j_n<=SimilW; j_n++) {
-                val = (float)(i_n*i_n + j_n*j_n)/(2*SimilW*SimilW);
-                Eucl_Vec[count] = exp(-val);
-                count = count + 1;
-            }} /*main neighb loop */
-        
-        /**************************************************************************/
-        /*Perform padding of image A to the size of [newsizeX * newsizeY] */
-        switchpad_crop = 0; /*padding*/
-        pad_crop(A, Ap, M, N, 0, newsizeY, newsizeX, 0, padXY, switchpad_crop);
-        
-        /* Do PB regularization with the padded array  */
-        NLM_GPU_kernel(Ap, Bp, Eucl_Vec, newsizeY, newsizeX, 0, numdims, SearchW, SimilW, SearchW_real, (float)h2, (float)lambda);
-        
-        switchpad_crop = 1; /*cropping*/
-        pad_crop(Bp, B, M, N, 0, newsizeY, newsizeX, 0, padXY, switchpad_crop);
-    }
-    else
-    {
-        /******************************3D case ****************************/
-        /*Handling output*/
-        B = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dims, mxSINGLE_CLASS, mxREAL));
-        /*allocating memory for the padded arrays */
-        Ap = (float*)mxGetPr(mxCreateNumericArray(3, N_dims, mxSINGLE_CLASS, mxREAL));
-        Bp = (float*)mxGetPr(mxCreateNumericArray(3, N_dims, mxSINGLE_CLASS, mxREAL));
-        Eucl_Vec = (float*)mxGetData(mxCreateNumericMatrix(SimilW_full*SimilW_full*SimilW_full, 1, mxSINGLE_CLASS, mxREAL));
-        
-        /*Gaussian kernel */
-        count = 0;
-        for(i_n=-SimilW; i_n<=SimilW; i_n++) {
-            for(j_n=-SimilW; j_n<=SimilW; j_n++) {
-                for(k_n=-SimilW; k_n<=SimilW; k_n++) {
-                    val = (float)(i_n*i_n + j_n*j_n + k_n*k_n)/(2*SimilW*SimilW*SimilW);
-                    Eucl_Vec[count] = exp(-val);
-                    count = count + 1;
-                }}} /*main neighb loop */
-        /**************************************************************************/
-        /*Perform padding of image A to the size of [newsizeX * newsizeY * newsizeZ] */
-        switchpad_crop = 0; /*padding*/
-        pad_crop(A, Ap, M, N, Z, newsizeY, newsizeX, newsizeZ, padXY, switchpad_crop);
-        
-        /* Do PB regularization with the padded array  */
-        NLM_GPU_kernel(Ap, Bp, Eucl_Vec, newsizeY, newsizeX, newsizeZ, numdims, SearchW, SimilW, SearchW_real, (float)h2, (float)lambda);
-        
-        switchpad_crop = 1; /*cropping*/
-        pad_crop(Bp, B, M, N, Z, newsizeY, newsizeX, newsizeZ, padXY, switchpad_crop);
-    } /*end else ndims*/
-}
-
-float pad_crop(float *A, float *Ap, int OldSizeX, int OldSizeY, int OldSizeZ, int NewSizeX, int NewSizeY, int NewSizeZ, int padXY, int switchpad_crop)
-{
-    /* padding-cropping function */
-    int i,j,k;    
-    if (NewSizeZ > 1) {    
-           for (i=0; i < NewSizeX; i++) {
-            for (j=0; j < NewSizeY; j++) {
-              for (k=0; k < NewSizeZ; k++) {
-                if (((i >= padXY) && (i < NewSizeX-padXY)) &&  ((j >= padXY) && (j < NewSizeY-padXY)) &&  ((k >= padXY) && (k < NewSizeZ-padXY))) {
-                    if (switchpad_crop == 0)  Ap[NewSizeX*NewSizeY*k + i*NewSizeY+j] = A[OldSizeX*OldSizeY*(k - padXY) + (i-padXY)*(OldSizeY)+(j-padXY)];
-                    else  Ap[OldSizeX*OldSizeY*(k - padXY) + (i-padXY)*(OldSizeY)+(j-padXY)] = A[NewSizeX*NewSizeY*k + i*NewSizeY+j];
-                }
-            }}}   
-    }
-    else {
-        for (i=0; i < NewSizeX; i++) {
-            for (j=0; j < NewSizeY; j++) {
-                if (((i >= padXY) && (i < NewSizeX-padXY)) &&  ((j >= padXY) && (j < NewSizeY-padXY))) {
-                    if (switchpad_crop == 0)  Ap[i*NewSizeY+j] = A[(i-padXY)*(OldSizeY)+(j-padXY)];
-                    else  Ap[(i-padXY)*(OldSizeY)+(j-padXY)] = A[i*NewSizeY+j];
-                }
-            }}
-    }
-    return *Ap;
-}
\ No newline at end of file
diff --git a/Wrappers/Matlab/compile_mex.m b/Wrappers/Matlab/compile_mex.m
deleted file mode 100644
index 66c05da..0000000
--- a/Wrappers/Matlab/compile_mex.m
+++ /dev/null
@@ -1,11 +0,0 @@
-% compile mex's in Matlab once
-cd regularizers_CPU/
-
-mex LLT_model.c LLT_model_core.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
-mex FGP_TV.c FGP_TV_core.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
-mex SplitBregman_TV.c SplitBregman_TV_core.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
-mex TGV_PD.c TGV_PD_core.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
-mex PatchBased_Regul.c PatchBased_Regul_core.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
-
-cd ../../
-cd demos
diff --git a/Wrappers/Matlab/demos/Demo_Phantom3D_Cone.m b/Wrappers/Matlab/demos/Demo_Phantom3D_Cone.m
new file mode 100644
index 0000000..a8f2c92
--- /dev/null
+++ b/Wrappers/Matlab/demos/Demo_Phantom3D_Cone.m
@@ -0,0 +1,67 @@
+% A demo script to reconstruct 3D synthetic data using FISTA method for
+% CONE BEAM geometry
+% requirements: ASTRA-toolbox and TomoPhantom toolbox
+
+close all;clc;clear all;
+% adding paths
+addpath('../data/');
+addpath('../main_func/'); addpath('../main_func/regularizers_CPU/'); addpath('../main_func/regularizers_GPU/NL_Regul/'); addpath('../main_func/regularizers_GPU/Diffus_HO/');
+addpath('../supp/');
+
+%%
+% build 3D phantom using TomoPhantom 
+modelNo = 3; % see Phantom3DLibrary.dat file in TomoPhantom
+N = 256; % x-y-z size (cubic image)
+angles = 0:1.5:360; % angles vector in degrees
+angles_rad = angles*(pi/180); % conversion to radians
+det_size = round(sqrt(2)*N); % detector size
+
+%---------TomoPhantom routines---------%
+pathTP = '/home/algol/Documents/MATLAB/TomoPhantom/functions/models/Phantom3DLibrary.dat'; % path to TomoPhantom parameters file
+TomoPhantom = buildPhantom3D(modelNo,N,pathTP); % generate 3D phantom
+%--------------------------------------%
+%%
+% using ASTRA-toolbox to set the projection geometry (cone beam)
+% eg: astra.create_proj_geom('cone', 1.0 (resol), 1.0 (resol), detectorRowCount, detectorColCount, angles, originToSource, originToDetector)
+vol_geom = astra_create_vol_geom(N,N,N);
+proj_geom = astra_create_proj_geom('cone', 1.0, 1.0, N, det_size, angles_rad, 2000, 2160);
+%% 
+% do forward projection using ASTRA
+% inverse crime data generation
+[sino_id, SinoCone3D] = astra_create_sino3d_cuda(TomoPhantom, proj_geom, vol_geom);
+astra_mex_data3d('delete', sino_id);
+%%
+fprintf('%s\n', 'Reconstructing with CGLS using ASTRA-toolbox ...');
+vol_id = astra_mex_data3d('create', '-vol', vol_geom, 0);
+proj_id = astra_mex_data3d('create', '-proj3d', proj_geom, SinoCone3D); 
+cfg = astra_struct('CGLS3D_CUDA');
+cfg.ProjectionDataId = proj_id;
+cfg.ReconstructionDataId = vol_id;
+cfg.option.MinConstraint = 0;
+alg_id = astra_mex_algorithm('create', cfg);
+astra_mex_algorithm('iterate', alg_id, 15);
+reconASTRA_3D = astra_mex_data3d('get', vol_id);
+%%
+fprintf('%s\n', 'Reconstruction using FISTA-LS without regularization...');
+clear params
+% define parameters
+params.proj_geom = proj_geom; % pass geometry to the function
+params.vol_geom = vol_geom;
+params.sino = single(SinoCone3D); % sinogram
+params.iterFISTA = 30; %max number of outer iterations
+params.X_ideal = TomoPhantom; % ideal phantom
+params.show = 1; % visualize reconstruction on each iteration
+params.slice = round(N/2); params.maxvalplot = 1; 
+tic; [X_FISTA, output] = FISTA_REC(params); toc; 
+
+error_FISTA = output.Resid_error; obj_FISTA = output.objective;
+fprintf('%s %.4f\n', 'Min RMSE for FISTA-LS reconstruction is:', min(error_FISTA(:)));
+
+Resid3D = (TomoPhantom - X_FISTA).^2;
+figure(2);
+subplot(1,2,1); imshow(X_FISTA(:,:,params.slice),[0 params.maxvalplot]); title('FISTA-LS reconstruction'); colorbar;
+subplot(1,2,2); imshow(Resid3D(:,:,params.slice),[0 0.1]);  title('residual'); colorbar;
+figure(3);
+subplot(1,2,1); plot(error_FISTA);  title('RMSE plot'); colorbar;
+subplot(1,2,2); plot(obj_FISTA);  title('Objective plot'); colorbar;
+%%
\ No newline at end of file
diff --git a/Wrappers/Matlab/demos/Demo_Phantom3D_Parallel.m b/Wrappers/Matlab/demos/Demo_Phantom3D_Parallel.m
new file mode 100644
index 0000000..4219bd1
--- /dev/null
+++ b/Wrappers/Matlab/demos/Demo_Phantom3D_Parallel.m
@@ -0,0 +1,121 @@
+% A demo script to reconstruct 3D synthetic data using FISTA method for
+% PARALLEL BEAM geometry
+% requirements: ASTRA-toolbox and TomoPhantom toolbox
+
+close all;clc;clear;
+% adding paths
+addpath('../data/');
+addpath('../main_func/'); addpath('../main_func/regularizers_CPU/'); addpath('../main_func/regularizers_GPU/NL_Regul/'); addpath('../main_func/regularizers_GPU/Diffus_HO/');
+addpath('../supp/');
+
+%%
+% Main reconstruction/data generation parameters 
+modelNo = 2; % see Phantom3DLibrary.dat file in TomoPhantom
+N = 256; % x-y-z size (cubic image)
+angles = 1:0.5:180; % angles vector in degrees
+angles_rad = angles*(pi/180); % conversion to radians
+det_size = round(sqrt(2)*N); % detector size
+
+%---------TomoPhantom routines---------%
+pathTP = '/home/algol/Documents/MATLAB/TomoPhantom/functions/models/Phantom3DLibrary.dat'; % path to TomoPhantom parameters file
+TomoPhantom = buildPhantom3D(modelNo,N,pathTP); % generate 3D phantom
+sino_tomophan3D = buildSino3D(modelNo, N, det_size, single(angles),pathTP); % generate ideal data
+%--------------------------------------%
+% Adding noise and distortions if required
+sino_tomophan3D = sino_add_artifacts(sino_tomophan3D,'rings');
+% adding Poisson noise
+dose =  3e9; % photon flux (controls noise level)
+multifactor = max(sino_tomophan3D(:));
+dataExp = dose.*exp(-sino_tomophan3D/multifactor); % noiseless raw data
+dataRaw = astra_add_noise_to_sino(dataExp, dose); % pre-log noisy raw data (weights)
+sino3D_log = log(dose./max(dataRaw,1))*multifactor; %log corrected data -> sinogram
+clear dataExp sino_tomophan3D
+%
+%%
+%-------------Astra toolbox------------%
+% one can generate data using ASTRA toolbox
+proj_geom = astra_create_proj_geom('parallel', 1, det_size, angles_rad);
+vol_geom = astra_create_vol_geom(N,N);
+sino_ASTRA3D = zeros(det_size, length(angles), N, 'single');
+for i = 1:N
+[sino_id, sinoT] = astra_create_sino_cuda(TomoPhantom(:,:,i), proj_geom, vol_geom);
+sino_ASTRA3D(:,:,i) = sinoT';
+astra_mex_data2d('delete', sino_id);
+end
+%--------------------------------------%
+%%
+% using ASTRA-toolbox to set the projection geometry (parallel beam)
+proj_geom = astra_create_proj_geom('parallel', 1, det_size, angles_rad);
+vol_geom = astra_create_vol_geom(N,N);
+%%
+fprintf('%s\n', 'Reconstructing with FBP using ASTRA-toolbox ...');
+reconASTRA_3D = zeros(size(TomoPhantom),'single');
+for k = 1:N
+vol_id = astra_mex_data2d('create', '-vol', vol_geom, 0);
+proj_id = astra_mex_data2d('create', '-sino', proj_geom, sino3D_log(:,:,k)'); 
+cfg = astra_struct('FBP_CUDA');
+cfg.ProjectionDataId = proj_id;
+cfg.ReconstructionDataId = vol_id;
+cfg.option.MinConstraint = 0;
+alg_id = astra_mex_algorithm('create', cfg);
+astra_mex_algorithm('iterate', alg_id, 1);
+rec = astra_mex_data2d('get', vol_id);
+reconASTRA_3D(:,:,k) = single(rec);
+end
+figure; imshow(reconASTRA_3D(:,:,128), [0 1.3]);
+%% 
+%% 
+fprintf('%s\n', 'Reconstruction using OS-FISTA-PWLS without regularization...');
+clear params
+% define parameters
+params.proj_geom = proj_geom; % pass geometry to the function
+params.vol_geom = vol_geom;
+params.sino = single(sino3D_log); % sinogram
+params.iterFISTA = 15; %max number of outer iterations
+params.X_ideal = TomoPhantom; % ideal phantom
+params.weights = dataRaw./max(dataRaw(:)); % statistical weight for PWLS
+params.subsets = 12; % the number of subsets
+params.show = 1; % visualize reconstruction on each iteration
+params.slice = 128; params.maxvalplot = 1.3; 
+tic; [X_FISTA, output] = FISTA_REC(params); toc; 
+
+error_FISTA = output.Resid_error; obj_FISTA = output.objective;
+fprintf('%s %.4f\n', 'Min RMSE for FISTA-PWLS reconstruction is:', min(error_FISTA(:)));
+
+Resid3D = (TomoPhantom - X_FISTA).^2;
+figure(2);
+subplot(1,2,1); imshow(X_FISTA(:,:,params.slice),[0 params.maxvalplot]); title('FISTA-LS reconstruction'); colorbar;
+subplot(1,2,2); imshow(Resid3D(:,:,params.slice),[0 0.1]);  title('residual'); colorbar;
+figure(3);
+subplot(1,2,1); plot(error_FISTA);  title('RMSE plot'); 
+subplot(1,2,2); plot(obj_FISTA);  title('Objective plot'); 
+%%
+%% 
+fprintf('%s\n', 'Reconstruction using OS-FISTA-GH with FGP-TV regularization...');
+clear params
+% define parameters
+params.proj_geom = proj_geom; % pass geometry to the function
+params.vol_geom = vol_geom;
+params.sino = single(sino3D_log); % sinogram
+params.iterFISTA = 15; %max number of outer iterations
+params.X_ideal = TomoPhantom; % ideal phantom
+params.weights = dataRaw./max(dataRaw(:)); % statistical weights for PWLS
+params.subsets = 12; % the number of subsets
+params.Regul_Lambda_FGPTV = 100; % TV regularization parameter for FGP-TV
+params.Ring_LambdaR_L1 = 0.02; % Soft-Thresh L1 ring variable parameter
+params.Ring_Alpha = 21; % to boost ring removal procedure
+params.show = 1; % visualize reconstruction on each iteration
+params.slice = 128; params.maxvalplot = 1.3; 
+tic; [X_FISTA_GH_TV, output] = FISTA_REC(params); toc; 
+
+error_FISTA_GH_TV = output.Resid_error; obj_FISTA_GH_TV = output.objective;
+fprintf('%s %.4f\n', 'Min RMSE for FISTA-PWLS reconstruction is:', min(error_FISTA_GH_TV(:)));
+
+Resid3D = (TomoPhantom - X_FISTA_GH_TV).^2;
+figure(2);
+subplot(1,2,1); imshow(X_FISTA_GH_TV(:,:,params.slice),[0 params.maxvalplot]); title('FISTA-LS reconstruction'); colorbar;
+subplot(1,2,2); imshow(Resid3D(:,:,params.slice),[0 0.1]);  title('residual'); colorbar;
+figure(3);
+subplot(1,2,1); plot(error_FISTA_GH_TV);  title('RMSE plot'); 
+subplot(1,2,2); plot(obj_FISTA_GH_TV);  title('Objective plot'); 
+%%
\ No newline at end of file
diff --git a/Wrappers/Matlab/demos/Demo_RealData3D_Parallel.m b/Wrappers/Matlab/demos/Demo_RealData3D_Parallel.m
new file mode 100644
index 0000000..f82e0b0
--- /dev/null
+++ b/Wrappers/Matlab/demos/Demo_RealData3D_Parallel.m
@@ -0,0 +1,186 @@
+% Demonstration of tomographic 3D reconstruction from X-ray synchrotron
+% dataset (dendrites) using various data fidelities
+% ! It is advisable not to run the whole script, it will take lots of time to reconstruct the whole 3D data using many algorithms !
+clear
+close all
+%%
+% % adding paths
+addpath('../data/');
+addpath('../main_func/'); addpath('../main_func/regularizers_CPU/'); addpath('../main_func/regularizers_GPU/NL_Regul/'); addpath('../main_func/regularizers_GPU/Diffus_HO/');
+addpath('../supp/');
+
+load('DendrRawData.mat') % load raw data of 3D dendritic set
+angles_rad = angles*(pi/180); % conversion to radians
+det_size = size(data_raw3D,1); % detectors dim
+angSize = size(data_raw3D, 2); % angles dim
+slices_tot = size(data_raw3D, 3); % no of slices
+recon_size = 950; % reconstruction size
+
+Sino3D = zeros(det_size, angSize, slices_tot, 'single'); % log-corrected sino
+% normalizing the data
+for  jj = 1:slices_tot
+    sino = data_raw3D(:,:,jj);
+    for ii = 1:angSize
+        Sino3D(:,ii,jj) = log((flats_ar(:,jj)-darks_ar(:,jj))./(single(sino(:,ii)) - darks_ar(:,jj)));
+    end
+end
+
+Sino3D = Sino3D.*1000;
+Weights3D = single(data_raw3D); % weights for PW model
+clear data_raw3D
+%%
+% set projection/reconstruction geometry here
+proj_geom = astra_create_proj_geom('parallel', 1, det_size, angles_rad);
+vol_geom = astra_create_vol_geom(recon_size,recon_size);
+%%
+fprintf('%s\n', 'Reconstruction using FBP...');
+FBP = iradon(Sino3D(:,:,10), angles,recon_size);
+figure; imshow(FBP , [0, 3]); title ('FBP reconstruction');
+
+%--------FISTA_REC modular reconstruction alogrithms---------
+%%
+fprintf('%s\n', 'Reconstruction using FISTA-OS-PWLS without regularization...');
+clear params
+params.proj_geom = proj_geom; % pass geometry to the function
+params.vol_geom = vol_geom;
+params.sino = Sino3D;
+params.iterFISTA  = 18;
+params.weights = Weights3D;
+params.subsets = 8; % the number of ordered subsets 
+params.show = 1;
+params.maxvalplot = 2.5; params.slice = 1;
+
+tic; [X_fista, outputFISTA] = FISTA_REC(params); toc;
+figure; imshow(X_fista(:,:,params.slice) , [0, 2.5]); title ('FISTA-OS-PWLS reconstruction');
+%%
+fprintf('%s\n', 'Reconstruction using FISTA-OS-PWLS-TV...');
+clear params
+params.proj_geom = proj_geom; % pass geometry to the function
+params.vol_geom = vol_geom;
+params.sino = Sino3D;
+params.iterFISTA  = 18;
+params.Regul_Lambda_FGPTV = 5.0000e+6; % TV regularization parameter for FGP-TV
+params.weights = Weights3D;
+params.subsets = 8; % the number of ordered subsets 
+params.show = 1;
+params.maxvalplot = 2.5; params.slice = 10;
+
+tic; [X_fista_TV, outputTV] = FISTA_REC(params); toc;
+figure; imshow(X_fista_TV(:,:,params.slice) , [0, 2.5]); title ('FISTA-OS-PWLS-TV reconstruction');
+%%
+fprintf('%s\n', 'Reconstruction using FISTA-OS-GH-TV...');
+clear params
+params.proj_geom = proj_geom; % pass geometry to the function
+params.vol_geom = vol_geom;
+params.sino = Sino3D(:,:,10);
+params.iterFISTA  = 18;
+params.Regul_Lambda_FGPTV = 5.0000e+6; % TV regularization parameter for FGP-TV
+params.Ring_LambdaR_L1 = 0.002; % Soft-Thresh L1 ring variable parameter
+params.Ring_Alpha = 21; % to boost ring removal procedure
+params.weights = Weights3D(:,:,10);
+params.subsets = 8; % the number of ordered subsets 
+params.show = 1;
+params.maxvalplot = 2.5; params.slice = 1;
+
+tic; [X_fista_GH_TV, outputGHTV] = FISTA_REC(params); toc;
+figure; imshow(X_fista_GH_TV(:,:,params.slice) , [0, 2.5]); title ('FISTA-OS-GH-TV reconstruction');
+%%
+fprintf('%s\n', 'Reconstruction using FISTA-OS-GH-TV-LLT...');
+clear params
+params.proj_geom = proj_geom; % pass geometry to the function
+params.vol_geom = vol_geom;
+params.sino = Sino3D;
+params.iterFISTA  = 12;
+params.Regul_Lambda_FGPTV = 5.0000e+6; % TV regularization parameter for FGP-TV
+params.Regul_LambdaLLT = 100;  % regularization parameter for LLT problem
+params.Regul_tauLLT = 0.0005; % time-step parameter for the explicit scheme
+params.Ring_LambdaR_L1 = 0.002; % Soft-Thresh L1 ring variable parameter
+params.Ring_Alpha = 21; % to boost ring removal procedure
+params.weights = Weights3D;
+params.subsets = 16; % the number of ordered subsets 
+params.show = 1;
+params.maxvalplot = 2.5; params.slice = 2;
+
+tic; [X_fista_GH_TVLLT, outputGH_TVLLT] = FISTA_REC(params); toc;
+figure; imshow(X_fista_GH_TVLLT(:,:,params.slice) , [0, 2.5]); title ('FISTA-OS-GH-TV-LLT reconstruction');
+
+%%
+fprintf('%s\n', 'Reconstruction using FISTA-OS-GH-HigherOrderDiffusion...');
+% !GPU version!
+clear params
+params.proj_geom = proj_geom; % pass geometry to the function
+params.vol_geom = vol_geom;
+params.sino = Sino3D(:,:,1:5);
+params.iterFISTA  = 25;
+params.Regul_LambdaDiffHO = 2; % DiffHO regularization parameter 
+params.Regul_DiffHO_EdgePar = 0.05; % threshold parameter
+params.Regul_Iterations = 150;
+params.Ring_LambdaR_L1 = 0.002; % Soft-Thresh L1 ring variable parameter
+params.Ring_Alpha = 21; % to boost ring removal procedure
+params.weights = Weights3D(:,:,1:5);
+params.subsets = 16; % the number of ordered subsets 
+params.show = 1;
+params.maxvalplot = 2.5; params.slice = 1;
+ 
+tic; [X_fista_GH_HO, outputHO] = FISTA_REC(params); toc;
+figure; imshow(X_fista_GH_HO(:,:,params.slice) , [0, 2.5]); title ('FISTA-OS-HigherOrderDiffusion reconstruction');
+
+%%
+fprintf('%s\n', 'Reconstruction using FISTA-PB...');
+% !GPU version!
+clear params
+params.proj_geom = proj_geom; % pass geometry to the function
+params.vol_geom = vol_geom;
+params.sino = Sino3D(:,:,1);
+params.iterFISTA  = 25;
+params.Regul_LambdaPatchBased_GPU = 3; % PB regularization parameter 
+params.Regul_PB_h = 0.04; % threhsold parameter
+params.Regul_PB_SearchW = 3;
+params.Regul_PB_SimilW  = 1;
+params.Ring_LambdaR_L1 = 0.002; % Soft-Thresh L1 ring variable parameter
+params.Ring_Alpha = 21; % to boost ring removal procedure
+params.weights = Weights3D(:,:,1);
+params.show = 1;
+params.maxvalplot = 2.5; params.slice = 1;
+ 
+tic; [X_fista_GH_PB, outputPB] = FISTA_REC(params); toc;
+figure; imshow(X_fista_GH_PB(:,:,params.slice) , [0, 2.5]); title ('FISTA-OS-PB reconstruction');
+%%
+fprintf('%s\n', 'Reconstruction using FISTA-OS-GH-TGV...');
+% still testing...
+clear params
+params.proj_geom = proj_geom; % pass geometry to the function
+params.vol_geom = vol_geom;
+params.sino = Sino3D;
+params.iterFISTA  = 12;
+params.Regul_LambdaTGV = 0.5; % TGV regularization parameter 
+params.Regul_Iterations = 5;
+params.Ring_LambdaR_L1 = 0.002; % Soft-Thresh L1 ring variable parameter
+params.Ring_Alpha = 21; % to boost ring removal procedure
+params.weights = Weights3D;
+params.subsets = 16; % the number of ordered subsets 
+params.show = 1;
+params.maxvalplot = 2.5; params.slice = 1;
+
+tic; [X_fista_GH_TGV, outputTGV] = FISTA_REC(params); toc;
+figure; imshow(X_fista_GH_TGV(:,:,params.slice) , [0, 2.5]); title ('FISTA-OS-GH-TGV reconstruction');
+
+
+%%
+% fprintf('%s\n', 'Reconstruction using FISTA-Student-TV...');
+% clear params
+% params.proj_geom = proj_geom; % pass geometry to the function
+% params.vol_geom = vol_geom;
+% params.sino = Sino3D(:,:,10);
+% params.iterFISTA  = 50;
+% params.L_const = 0.01; % Lipshitz constant
+% params.Regul_LambdaTV = 0.008; % TV regularization parameter for FISTA-TV
+% params.fidelity = 'student'; % choosing Student t penalty
+% params.weights = Weights3D(:,:,10);
+% params.show = 0;
+% params.initialize = 1;
+% params.maxvalplot = 2.5; params.slice = 1;
+% 
+% tic; [X_fistaStudentTV] = FISTA_REC(params); toc;
+% figure; imshow(X_fistaStudentTV(:,:,1), [0, 2.5]); title ('FISTA-Student-TV reconstruction');
+%%
diff --git a/Wrappers/Matlab/demos/exportDemoRD2Data.m b/Wrappers/Matlab/demos/exportDemoRD2Data.m
new file mode 100644
index 0000000..028353b
--- /dev/null
+++ b/Wrappers/Matlab/demos/exportDemoRD2Data.m
@@ -0,0 +1,35 @@
+clear all
+close all
+%%
+% % adding paths
+addpath('../data/');
+addpath('../main_func/'); addpath('../main_func/regularizers_CPU/');
+addpath('../supp/');
+
+load('DendrRawData.mat') % load raw data of 3D dendritic set
+angles_rad = angles*(pi/180); % conversion to radians
+size_det = size(data_raw3D,1); % detectors dim
+angSize = size(data_raw3D, 2); % angles dim
+slices_tot = size(data_raw3D, 3); % no of slices
+recon_size = 950; % reconstruction size
+
+Sino3D = zeros(size_det, angSize, slices_tot, 'single'); % log-corrected sino
+% normalizing the data
+for  jj = 1:slices_tot
+    sino = data_raw3D(:,:,jj);
+    for ii = 1:angSize
+        Sino3D(:,ii,jj) = log((flats_ar(:,jj)-darks_ar(:,jj))./(single(sino(:,ii)) - darks_ar(:,jj)));
+    end
+end
+
+Sino3D = Sino3D.*1000;
+Weights3D = single(data_raw3D); % weights for PW model
+clear data_raw3D
+
+hdf5write('DendrData.h5', '/Weights3D', Weights3D)
+hdf5write('DendrData.h5', '/Sino3D', Sino3D, 'WriteMode', 'append')
+hdf5write('DendrData.h5', '/angles_rad', angles_rad,  'WriteMode', 'append')
+hdf5write('DendrData.h5', '/size_det', size_det,  'WriteMode', 'append')
+hdf5write('DendrData.h5', '/angSize', angSize,  'WriteMode', 'append')
+hdf5write('DendrData.h5', '/slices_tot', slices_tot,  'WriteMode', 'append')
+hdf5write('DendrData.h5', '/recon_size', recon_size,  'WriteMode', 'append')
\ No newline at end of file
diff --git a/Wrappers/Matlab/mex_compile/compile_mex.m b/Wrappers/Matlab/mex_compile/compile_mex.m
new file mode 100644
index 0000000..1353859
--- /dev/null
+++ b/Wrappers/Matlab/mex_compile/compile_mex.m
@@ -0,0 +1,11 @@
+% compile mex's in Matlab once
+cd regularizers_CPU/
+
+mex LLT_model.c LLT_model_core.c utils.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
+mex FGP_TV.c FGP_TV_core.c utils.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
+mex SplitBregman_TV.c SplitBregman_TV_core.c utils.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
+mex TGV_PD.c TGV_PD_core.c utils.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
+mex PatchBased_Regul.c PatchBased_Regul_core.c utils.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
+
+cd ../../
+cd demos
diff --git a/Wrappers/Matlab/mex_compile/regularizers_CPU/FGP_TV.c b/Wrappers/Matlab/mex_compile/regularizers_CPU/FGP_TV.c
new file mode 100644
index 0000000..30cea1a
--- /dev/null
+++ b/Wrappers/Matlab/mex_compile/regularizers_CPU/FGP_TV.c
@@ -0,0 +1,216 @@
+/*
+This work is part of the Core Imaging Library developed by
+Visual Analytics and Imaging System Group of the Science Technology
+Facilities Council, STFC
+
+Copyright 2017 Daniil Kazantsev
+Copyright 2017 Srikanth Nagella, Edoardo Pasca
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+#include "matrix.h"
+#include "mex.h"
+#include "FGP_TV_core.h"
+
+/* C-OMP implementation of FGP-TV [1] denoising/regularization model (2D/3D case)
+ *
+ * Input Parameters:
+ * 1. Noisy image/volume [REQUIRED]
+ * 2. lambda - regularization parameter [REQUIRED]
+ * 3. Number of iterations [OPTIONAL parameter]
+ * 4. eplsilon: tolerance constant [OPTIONAL parameter]
+ * 5. TV-type: 'iso' or 'l1' [OPTIONAL parameter]
+ *
+ * Output:
+ * [1] Filtered/regularized image
+ * [2] last function value 
+ *
+ * Example of image denoising:
+ * figure;
+ * Im = double(imread('lena_gray_256.tif'))/255;  % loading image
+ * u0 = Im + .05*randn(size(Im)); % adding noise
+ * u = FGP_TV(single(u0), 0.05, 100, 1e-04);
+ *
+ * to compile with OMP support: mex FGP_TV.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
+ * This function is based on the Matlab's code and paper by
+ * [1] Amir Beck and Marc Teboulle, "Fast Gradient-Based Algorithms for Constrained Total Variation Image Denoising and Deblurring Problems"
+ *
+ * D. Kazantsev, 2016-17
+ *
+ */
+
+
+void mexFunction(
+        int nlhs, mxArray *plhs[],
+        int nrhs, const mxArray *prhs[])
+        
+{
+    int number_of_dims, iter, dimX, dimY, dimZ, ll, j, count, methTV;
+    const int  *dim_array;
+    float *A, *D=NULL, *D_old=NULL, *P1=NULL, *P2=NULL, *P3=NULL, *P1_old=NULL, *P2_old=NULL, *P3_old=NULL, *R1=NULL, *R2=NULL, *R3=NULL, lambda, tk, tkp1, re, re1, re_old, epsil;
+    
+    number_of_dims = mxGetNumberOfDimensions(prhs[0]);
+    dim_array = mxGetDimensions(prhs[0]);
+    
+    /*Handling Matlab input data*/
+    if ((nrhs < 2) || (nrhs > 5)) mexErrMsgTxt("At least 2 parameters is required: Image(2D/3D), Regularization parameter. The full list of parameters: Image(2D/3D), Regularization parameter, iterations number, tolerance, penalty type ('iso' or 'l1')");
+    
+    A  = (float *) mxGetData(prhs[0]); /*noisy image (2D/3D) */
+    lambda =  (float) mxGetScalar(prhs[1]); /* regularization parameter */
+    iter = 50; /* default iterations number */
+    epsil = 0.0001; /* default tolerance constant */
+    methTV = 0;  /* default isotropic TV penalty */
+    
+    if ((nrhs == 3) || (nrhs == 4) || (nrhs == 5))  iter = (int) mxGetScalar(prhs[2]); /* iterations number */
+    if ((nrhs == 4) || (nrhs == 5))  epsil =  (float) mxGetScalar(prhs[3]); /* tolerance constant */
+    if (nrhs == 5)  {
+        char *penalty_type;
+        penalty_type = mxArrayToString(prhs[4]); /* choosing TV penalty: 'iso' or 'l1', 'iso' is the default */
+        if ((strcmp(penalty_type, "l1") != 0) && (strcmp(penalty_type, "iso") != 0)) mexErrMsgTxt("Choose TV type: 'iso' or 'l1',");
+        if (strcmp(penalty_type, "l1") == 0)  methTV = 1;  /* enable 'l1' penalty */
+        mxFree(penalty_type);
+    }
+    /*output function value (last iteration) */
+    plhs[1] = mxCreateNumericMatrix(1, 1, mxSINGLE_CLASS, mxREAL);  
+    float *funcvalA = (float *) mxGetData(plhs[1]);
+        
+    if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); }
+    
+    /*Handling Matlab output data*/
+    dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2];
+    
+    tk = 1.0f;
+    tkp1=1.0f;
+    count = 0;
+    re_old = 0.0f;
+    
+    if (number_of_dims == 2) {
+        dimZ = 1; /*2D case*/
+        D = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+        D_old = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+        P1 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+        P2 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+        P1_old = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+        P2_old = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+        R1 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+        R2 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+        
+        /* begin iterations */
+        for(ll=0; ll<iter; ll++) {
+            
+            /* computing the gradient of the objective function */
+            Obj_func2D(A, D, R1, R2, lambda, dimX, dimY);
+            
+            /*Taking a step towards minus of the gradient*/
+            Grad_func2D(P1, P2, D, R1, R2, lambda, dimX, dimY);
+            
+            /* projection step */
+            Proj_func2D(P1, P2, methTV, dimX, dimY);
+            
+            /*updating R and t*/
+            tkp1 = (1.0f + sqrt(1.0f + 4.0f*tk*tk))*0.5f;
+            Rupd_func2D(P1, P1_old, P2, P2_old, R1, R2, tkp1, tk, dimX, dimY);                
+            
+            /* calculate norm */
+            re = 0.0f; re1 = 0.0f;
+            for(j=0; j<dimX*dimY*dimZ; j++)
+            {
+                re += pow(D[j] - D_old[j],2);
+                re1 += pow(D[j],2);
+            }
+            re = sqrt(re)/sqrt(re1);
+            if (re < epsil)  count++;
+            if (count > 4) {
+                Obj_func_CALC2D(A, D, funcvalA, lambda, dimX, dimY); 
+                break; }
+            
+            /* check that the residual norm is decreasing */
+            if (ll > 2) {
+                if (re > re_old) {
+                    Obj_func_CALC2D(A, D, funcvalA, lambda, dimX, dimY);                                   
+                    break; }}            
+            re_old = re;
+            /*printf("%f %i %i \n", re, ll, count); */                      
+          
+            /*storing old values*/
+            copyIm(D, D_old, dimX, dimY, dimZ);
+            copyIm(P1, P1_old, dimX, dimY, dimZ);
+            copyIm(P2, P2_old, dimX, dimY, dimZ);
+            tk = tkp1;
+            
+            /* calculating the objective function value */
+            if (ll == (iter-1)) Obj_func_CALC2D(A, D, funcvalA, lambda, dimX, dimY);            
+        }
+        printf("FGP-TV iterations stopped at iteration %i with the function value %f \n", ll, funcvalA[0]);
+    }
+    if (number_of_dims == 3) {
+        D = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+        D_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+        P1 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+        P2 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+        P3 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+        P1_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+        P2_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+        P3_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+        R1 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+        R2 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+        R3 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+        
+        /* begin iterations */
+        for(ll=0; ll<iter; ll++) {
+            
+            /* computing the gradient of the objective function */
+            Obj_func3D(A, D, R1, R2, R3,lambda, dimX, dimY, dimZ);
+            
+            /*Taking a step towards minus of the gradient*/
+            Grad_func3D(P1, P2, P3, D, R1, R2, R3, lambda, dimX, dimY, dimZ);
+            
+            /* projection step */
+            Proj_func3D(P1, P2, P3, dimX, dimY, dimZ);
+            
+            /*updating R and t*/
+            tkp1 = (1.0f + sqrt(1.0f + 4.0f*tk*tk))*0.5f;
+            Rupd_func3D(P1, P1_old, P2, P2_old, P3, P3_old, R1, R2, R3, tkp1, tk, dimX, dimY, dimZ);
+            
+            /* calculate norm - stopping rules*/
+            re = 0.0f; re1 = 0.0f;
+            for(j=0; j<dimX*dimY*dimZ; j++)
+            {
+                re += pow(D[j] - D_old[j],2);
+                re1 += pow(D[j],2);
+            }
+            re = sqrt(re)/sqrt(re1);
+            /* stop if the norm residual is less than the tolerance EPS */
+            if (re < epsil)  count++;
+            if (count > 3) {
+                Obj_func_CALC3D(A, D, funcvalA, lambda, dimX, dimY, dimZ);                            
+                break;}
+            
+            /* check that the residual norm is decreasing */
+            if (ll > 2) {
+                if (re > re_old) {
+                Obj_func_CALC3D(A, D, funcvalA, lambda, dimX, dimY, dimZ);
+                }}            
+            re_old = re;
+            /*printf("%f %i %i \n", re, ll, count); */
+            
+            /*storing old values*/
+            copyIm(D, D_old, dimX, dimY, dimZ);
+            copyIm(P1, P1_old, dimX, dimY, dimZ);
+            copyIm(P2, P2_old, dimX, dimY, dimZ);
+            copyIm(P3, P3_old, dimX, dimY, dimZ);
+            tk = tkp1;
+            
+            if (ll == (iter-1)) Obj_func_CALC3D(A, D, funcvalA, lambda, dimX, dimY, dimZ);            
+        }
+        printf("FGP-TV iterations stopped at iteration %i with the function value %f \n", ll, funcvalA[0]);
+    }
+}
diff --git a/Wrappers/Matlab/mex_compile/regularizers_CPU/FGP_TV_core.c b/Wrappers/Matlab/mex_compile/regularizers_CPU/FGP_TV_core.c
new file mode 100644
index 0000000..03cd445
--- /dev/null
+++ b/Wrappers/Matlab/mex_compile/regularizers_CPU/FGP_TV_core.c
@@ -0,0 +1,266 @@
+/*
+This work is part of the Core Imaging Library developed by
+Visual Analytics and Imaging System Group of the Science Technology
+Facilities Council, STFC
+
+Copyright 2017 Daniil Kazantsev
+Copyright 2017 Srikanth Nagella, Edoardo Pasca
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#include "FGP_TV_core.h"
+
+/* C-OMP implementation of FGP-TV [1] denoising/regularization model (2D/3D case)
+ *
+ * Input Parameters:
+ * 1. Noisy image/volume [REQUIRED]
+ * 2. lambda - regularization parameter [REQUIRED]
+ * 3. Number of iterations [OPTIONAL parameter]
+ * 4. eplsilon: tolerance constant [OPTIONAL parameter]
+ * 5. TV-type: 'iso' or 'l1' [OPTIONAL parameter]
+ *
+ * Output:
+ * [1] Filtered/regularized image
+ * [2] last function value 
+ *
+ * Example of image denoising:
+ * figure;
+ * Im = double(imread('lena_gray_256.tif'))/255;  % loading image
+ * u0 = Im + .05*randn(size(Im)); % adding noise
+ * u = FGP_TV(single(u0), 0.05, 100, 1e-04);
+ *
+ * This function is based on the Matlab's code and paper by
+ * [1] Amir Beck and Marc Teboulle, "Fast Gradient-Based Algorithms for Constrained Total Variation Image Denoising and Deblurring Problems"
+ *
+ * D. Kazantsev, 2016-17
+ *
+ */
+
+/* 2D-case related Functions */
+/*****************************************************************/
+float Obj_func_CALC2D(float *A, float *D, float *funcvalA, float lambda, int dimX, int dimY)
+{   
+    int i,j;
+    float f1, f2, val1, val2;
+    
+    /*data-related term */
+    f1 = 0.0f;
+    for(i=0; i<dimX*dimY; i++) f1 += pow(D[i] - A[i],2);    
+    
+    /*TV-related term */
+    f2 = 0.0f;
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+            /* boundary conditions  */
+            if (i == dimX-1) {val1 = 0.0f;} else {val1 = A[(i+1)*dimY + (j)] - A[(i)*dimY + (j)];}
+            if (j == dimY-1) {val2 = 0.0f;} else {val2 = A[(i)*dimY + (j+1)] - A[(i)*dimY + (j)];}    
+            f2 += sqrt(pow(val1,2) + pow(val2,2));
+        }}  
+    
+    /* sum of two terms */
+    funcvalA[0] = 0.5f*f1 + lambda*f2;     
+    return *funcvalA;
+}
+
+float Obj_func2D(float *A, float *D, float *R1, float *R2, float lambda, int dimX, int dimY)
+{
+	float val1, val2;
+	int i, j;
+#pragma omp parallel for shared(A,D,R1,R2) private(i,j,val1,val2)
+	for (i = 0; i<dimX; i++) {
+		for (j = 0; j<dimY; j++) {
+			/* boundary conditions  */
+			if (i == 0) { val1 = 0.0f; }
+			else { val1 = R1[(i - 1)*dimY + (j)]; }
+			if (j == 0) { val2 = 0.0f; }
+			else { val2 = R2[(i)*dimY + (j - 1)]; }
+			D[(i)*dimY + (j)] = A[(i)*dimY + (j)] - lambda*(R1[(i)*dimY + (j)] + R2[(i)*dimY + (j)] - val1 - val2);
+		}
+	}
+	return *D;
+}
+float Grad_func2D(float *P1, float *P2, float *D, float *R1, float *R2, float lambda, int dimX, int dimY)
+{
+	float val1, val2, multip;
+	int i, j;
+	multip = (1.0f / (8.0f*lambda));
+#pragma omp parallel for shared(P1,P2,D,R1,R2,multip) private(i,j,val1,val2)
+	for (i = 0; i<dimX; i++) {
+		for (j = 0; j<dimY; j++) {
+			/* boundary conditions */
+			if (i == dimX - 1) val1 = 0.0f; else val1 = D[(i)*dimY + (j)] - D[(i + 1)*dimY + (j)];
+			if (j == dimY - 1) val2 = 0.0f; else val2 = D[(i)*dimY + (j)] - D[(i)*dimY + (j + 1)];
+			P1[(i)*dimY + (j)] = R1[(i)*dimY + (j)] + multip*val1;
+			P2[(i)*dimY + (j)] = R2[(i)*dimY + (j)] + multip*val2;
+		}
+	}
+	return 1;
+}
+float Proj_func2D(float *P1, float *P2, int methTV, int dimX, int dimY)
+{
+	float val1, val2, denom;
+	int i, j;
+	if (methTV == 0) {
+		/* isotropic TV*/
+#pragma omp parallel for shared(P1,P2) private(i,j,denom)
+		for (i = 0; i<dimX; i++) {
+			for (j = 0; j<dimY; j++) {
+				denom = pow(P1[(i)*dimY + (j)], 2) + pow(P2[(i)*dimY + (j)], 2);
+				if (denom > 1) {
+					P1[(i)*dimY + (j)] = P1[(i)*dimY + (j)] / sqrt(denom);
+					P2[(i)*dimY + (j)] = P2[(i)*dimY + (j)] / sqrt(denom);
+				}
+			}
+		}
+	}
+	else {
+		/* anisotropic TV*/
+#pragma omp parallel for shared(P1,P2) private(i,j,val1,val2)
+		for (i = 0; i<dimX; i++) {
+			for (j = 0; j<dimY; j++) {
+				val1 = fabs(P1[(i)*dimY + (j)]);
+				val2 = fabs(P2[(i)*dimY + (j)]);
+				if (val1 < 1.0f) { val1 = 1.0f; }
+				if (val2 < 1.0f) { val2 = 1.0f; }
+				P1[(i)*dimY + (j)] = P1[(i)*dimY + (j)] / val1;
+				P2[(i)*dimY + (j)] = P2[(i)*dimY + (j)] / val2;
+			}
+		}
+	}
+	return 1;
+}
+float Rupd_func2D(float *P1, float *P1_old, float *P2, float *P2_old, float *R1, float *R2, float tkp1, float tk, int dimX, int dimY)
+{
+	int i, j;
+	float multip;
+	multip = ((tk - 1.0f) / tkp1);
+#pragma omp parallel for shared(P1,P2,P1_old,P2_old,R1,R2,multip) private(i,j)
+	for (i = 0; i<dimX; i++) {
+		for (j = 0; j<dimY; j++) {
+			R1[(i)*dimY + (j)] = P1[(i)*dimY + (j)] + multip*(P1[(i)*dimY + (j)] - P1_old[(i)*dimY + (j)]);
+			R2[(i)*dimY + (j)] = P2[(i)*dimY + (j)] + multip*(P2[(i)*dimY + (j)] - P2_old[(i)*dimY + (j)]);
+		}
+	}
+	return 1;
+}
+
+/* 3D-case related Functions */
+/*****************************************************************/
+float Obj_func_CALC3D(float *A, float *D, float *funcvalA, float lambda, int dimX, int dimY, int dimZ)
+{   
+    int i,j,k;
+    float f1, f2, val1, val2, val3;
+    
+    /*data-related term */
+    f1 = 0.0f;
+    for(i=0; i<dimX*dimY*dimZ; i++) f1 += pow(D[i] - A[i],2);    
+    
+    /*TV-related term */
+    f2 = 0.0f;
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+            for(k=0; k<dimZ; k++) {
+            /* boundary conditions  */
+            if (i == dimX-1) {val1 = 0.0f;} else {val1 = A[(dimX*dimY)*k + (i+1)*dimY + (j)] - A[(dimX*dimY)*k + (i)*dimY + (j)];}
+            if (j == dimY-1) {val2 = 0.0f;} else {val2 = A[(dimX*dimY)*k + (i)*dimY + (j+1)] - A[(dimX*dimY)*k + (i)*dimY + (j)];}    
+            if (k == dimZ-1) {val3 = 0.0f;} else {val3 = A[(dimX*dimY)*(k+1) + (i)*dimY + (j)] - A[(dimX*dimY)*k + (i)*dimY + (j)];}    
+            f2 += sqrt(pow(val1,2) + pow(val2,2)  + pow(val3,2));
+        }}}     
+    /* sum of two terms */
+    funcvalA[0] = 0.5f*f1 + lambda*f2;     
+    return *funcvalA;
+}
+
+float Obj_func3D(float *A, float *D, float *R1, float *R2, float *R3, float lambda, int dimX, int dimY, int dimZ)
+{
+	float val1, val2, val3;
+	int i, j, k;
+#pragma omp parallel for shared(A,D,R1,R2,R3) private(i,j,k,val1,val2,val3)
+	for (i = 0; i<dimX; i++) {
+		for (j = 0; j<dimY; j++) {
+			for (k = 0; k<dimZ; k++) {
+				/* boundary conditions */
+				if (i == 0) { val1 = 0.0f; }
+				else { val1 = R1[(dimX*dimY)*k + (i - 1)*dimY + (j)]; }
+				if (j == 0) { val2 = 0.0f; }
+				else { val2 = R2[(dimX*dimY)*k + (i)*dimY + (j - 1)]; }
+				if (k == 0) { val3 = 0.0f; }
+				else { val3 = R3[(dimX*dimY)*(k - 1) + (i)*dimY + (j)]; }
+				D[(dimX*dimY)*k + (i)*dimY + (j)] = A[(dimX*dimY)*k + (i)*dimY + (j)] - lambda*(R1[(dimX*dimY)*k + (i)*dimY + (j)] + R2[(dimX*dimY)*k + (i)*dimY + (j)] + R3[(dimX*dimY)*k + (i)*dimY + (j)] - val1 - val2 - val3);
+			}
+		}
+	}
+	return *D;
+}
+float Grad_func3D(float *P1, float *P2, float *P3, float *D, float *R1, float *R2, float *R3, float lambda, int dimX, int dimY, int dimZ)
+{
+	float val1, val2, val3, multip;
+	int i, j, k;
+	multip = (1.0f / (8.0f*lambda));
+#pragma omp parallel for shared(P1,P2,P3,D,R1,R2,R3,multip) private(i,j,k,val1,val2,val3)
+	for (i = 0; i<dimX; i++) {
+		for (j = 0; j<dimY; j++) {
+			for (k = 0; k<dimZ; k++) {
+				/* boundary conditions */
+				if (i == dimX - 1) val1 = 0.0f; else val1 = D[(dimX*dimY)*k + (i)*dimY + (j)] - D[(dimX*dimY)*k + (i + 1)*dimY + (j)];
+				if (j == dimY - 1) val2 = 0.0f; else val2 = D[(dimX*dimY)*k + (i)*dimY + (j)] - D[(dimX*dimY)*k + (i)*dimY + (j + 1)];
+				if (k == dimZ - 1) val3 = 0.0f; else val3 = D[(dimX*dimY)*k + (i)*dimY + (j)] - D[(dimX*dimY)*(k + 1) + (i)*dimY + (j)];
+				P1[(dimX*dimY)*k + (i)*dimY + (j)] = R1[(dimX*dimY)*k + (i)*dimY + (j)] + multip*val1;
+				P2[(dimX*dimY)*k + (i)*dimY + (j)] = R2[(dimX*dimY)*k + (i)*dimY + (j)] + multip*val2;
+				P3[(dimX*dimY)*k + (i)*dimY + (j)] = R3[(dimX*dimY)*k + (i)*dimY + (j)] + multip*val3;
+			}
+		}
+	}
+	return 1;
+}
+float Proj_func3D(float *P1, float *P2, float *P3, int dimX, int dimY, int dimZ)
+{
+	float val1, val2, val3;
+	int i, j, k;
+#pragma omp parallel for shared(P1,P2,P3) private(i,j,k,val1,val2,val3)
+	for (i = 0; i<dimX; i++) {
+		for (j = 0; j<dimY; j++) {
+			for (k = 0; k<dimZ; k++) {
+				val1 = fabs(P1[(dimX*dimY)*k + (i)*dimY + (j)]);
+				val2 = fabs(P2[(dimX*dimY)*k + (i)*dimY + (j)]);
+				val3 = fabs(P3[(dimX*dimY)*k + (i)*dimY + (j)]);
+				if (val1 < 1.0f) { val1 = 1.0f; }
+				if (val2 < 1.0f) { val2 = 1.0f; }
+				if (val3 < 1.0f) { val3 = 1.0f; }
+
+				P1[(dimX*dimY)*k + (i)*dimY + (j)] = P1[(dimX*dimY)*k + (i)*dimY + (j)] / val1;
+				P2[(dimX*dimY)*k + (i)*dimY + (j)] = P2[(dimX*dimY)*k + (i)*dimY + (j)] / val2;
+				P3[(dimX*dimY)*k + (i)*dimY + (j)] = P3[(dimX*dimY)*k + (i)*dimY + (j)] / val3;
+			}
+		}
+	}
+	return 1;
+}
+float Rupd_func3D(float *P1, float *P1_old, float *P2, float *P2_old, float *P3, float *P3_old, float *R1, float *R2, float *R3, float tkp1, float tk, int dimX, int dimY, int dimZ)
+{
+	int i, j, k;
+	float multip;
+	multip = ((tk - 1.0f) / tkp1);
+#pragma omp parallel for shared(P1,P2,P3,P1_old,P2_old,P3_old,R1,R2,R3,multip) private(i,j,k)
+	for (i = 0; i<dimX; i++) {
+		for (j = 0; j<dimY; j++) {
+			for (k = 0; k<dimZ; k++) {
+				R1[(dimX*dimY)*k + (i)*dimY + (j)] = P1[(dimX*dimY)*k + (i)*dimY + (j)] + multip*(P1[(dimX*dimY)*k + (i)*dimY + (j)] - P1_old[(dimX*dimY)*k + (i)*dimY + (j)]);
+				R2[(dimX*dimY)*k + (i)*dimY + (j)] = P2[(dimX*dimY)*k + (i)*dimY + (j)] + multip*(P2[(dimX*dimY)*k + (i)*dimY + (j)] - P2_old[(dimX*dimY)*k + (i)*dimY + (j)]);
+				R3[(dimX*dimY)*k + (i)*dimY + (j)] = P3[(dimX*dimY)*k + (i)*dimY + (j)] + multip*(P3[(dimX*dimY)*k + (i)*dimY + (j)] - P3_old[(dimX*dimY)*k + (i)*dimY + (j)]);
+			}
+		}
+	}
+	return 1;
+}
+
+
diff --git a/Wrappers/Matlab/mex_compile/regularizers_CPU/FGP_TV_core.h b/Wrappers/Matlab/mex_compile/regularizers_CPU/FGP_TV_core.h
new file mode 100644
index 0000000..6430bf2
--- /dev/null
+++ b/Wrappers/Matlab/mex_compile/regularizers_CPU/FGP_TV_core.h
@@ -0,0 +1,71 @@
+/*
+This work is part of the Core Imaging Library developed by
+Visual Analytics and Imaging System Group of the Science Technology
+Facilities Council, STFC
+
+Copyright 2017 Daniil Kazantsev
+Copyright 2017 Srikanth Nagella, Edoardo Pasca
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+//#include <matrix.h>
+#include <math.h>
+#include <stdlib.h>
+#include <memory.h>
+#include <stdio.h>
+#include "omp.h"
+#include "utils.h"
+
+/* C-OMP implementation of FGP-TV [1] denoising/regularization model (2D/3D case)
+*
+* Input Parameters:
+* 1. Noisy image/volume [REQUIRED]
+* 2. lambda - regularization parameter [REQUIRED]
+* 3. Number of iterations [OPTIONAL parameter]
+* 4. eplsilon: tolerance constant [OPTIONAL parameter]
+* 5. TV-type: 'iso' or 'l1' [OPTIONAL parameter]
+*
+* Output:
+* [1] Filtered/regularized image
+* [2] last function value
+*
+* Example of image denoising:
+* figure;
+* Im = double(imread('lena_gray_256.tif'))/255;  % loading image
+* u0 = Im + .05*randn(size(Im)); % adding noise
+* u = FGP_TV(single(u0), 0.05, 100, 1e-04);
+*
+* to compile with OMP support: mex FGP_TV.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
+* This function is based on the Matlab's code and paper by
+* [1] Amir Beck and Marc Teboulle, "Fast Gradient-Based Algorithms for Constrained Total Variation Image Denoising and Deblurring Problems"
+*
+* D. Kazantsev, 2016-17
+*
+*/
+#ifdef __cplusplus
+extern "C" {
+#endif
+//float copyIm(float *A, float *B, int dimX, int dimY, int dimZ);
+float Obj_func2D(float *A, float *D, float *R1, float *R2, float lambda, int dimX, int dimY);
+float Grad_func2D(float *P1, float *P2, float *D, float *R1, float *R2, float lambda, int dimX, int dimY);
+float Proj_func2D(float *P1, float *P2, int methTV, int dimX, int dimY);
+float Rupd_func2D(float *P1, float *P1_old, float *P2, float *P2_old, float *R1, float *R2, float tkp1, float tk, int dimX, int dimY);
+float Obj_func_CALC2D(float *A, float *D, float *funcvalA, float lambda, int dimX, int dimY);
+
+float Obj_func3D(float *A, float *D, float *R1, float *R2, float *R3, float lambda, int dimX, int dimY, int dimZ);
+float Grad_func3D(float *P1, float *P2, float *P3, float *D, float *R1, float *R2, float *R3, float lambda, int dimX, int dimY, int dimZ);
+float Proj_func3D(float *P1, float *P2, float *P3, int dimX, int dimY, int dimZ);
+float Rupd_func3D(float *P1, float *P1_old, float *P2, float *P2_old, float *P3, float *P3_old, float *R1, float *R2, float *R3, float tkp1, float tk, int dimX, int dimY, int dimZ);
+float Obj_func_CALC3D(float *A, float *D, float *funcvalA, float lambda, int dimX, int dimY, int dimZ);
+#ifdef __cplusplus
+}
+#endif
\ No newline at end of file
diff --git a/Wrappers/Matlab/mex_compile/regularizers_CPU/LLT_model.c b/Wrappers/Matlab/mex_compile/regularizers_CPU/LLT_model.c
new file mode 100644
index 0000000..0b07b47
--- /dev/null
+++ b/Wrappers/Matlab/mex_compile/regularizers_CPU/LLT_model.c
@@ -0,0 +1,169 @@
+/*
+This work is part of the Core Imaging Library developed by
+Visual Analytics and Imaging System Group of the Science Technology
+Facilities Council, STFC
+
+Copyright 2017 Daniil Kazantsev
+Copyright 2017 Srikanth Nagella, Edoardo Pasca
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#include "mex.h"
+#include "matrix.h"
+#include "LLT_model_core.h"
+
+/* C-OMP implementation of Lysaker, Lundervold and Tai (LLT) model of higher order regularization penalty
+*
+* Input Parameters:
+* 1. U0 - original noise image/volume
+* 2. lambda - regularization parameter
+* 3. tau - time-step  for explicit scheme
+* 4. iter - iterations number
+* 5. epsil  - tolerance constant (to terminate earlier)
+* 6. switcher - default is 0, switch to (1) to restrictive smoothing in Z dimension (in test)
+*
+* Output:
+* Filtered/regularized image
+*
+* Example:
+* figure;
+* Im = double(imread('lena_gray_256.tif'))/255;  % loading image
+* u0 = Im + .03*randn(size(Im)); % adding noise
+* [Den] = LLT_model(single(u0), 10, 0.1, 1);
+*
+*
+* to compile with OMP support: mex LLT_model.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
+* References: Lysaker, Lundervold and Tai (LLT) 2003, IEEE
+*
+* 28.11.16/Harwell
+*/
+
+void mexFunction(
+        int nlhs, mxArray *plhs[],
+        int nrhs, const mxArray *prhs[])
+        
+{
+    int number_of_dims, iter, dimX, dimY, dimZ, ll, j, count, switcher;
+    const int  *dim_array;
+    float *U0, *U=NULL, *U_old=NULL, *D1=NULL, *D2=NULL, *D3=NULL, lambda, tau, re, re1, epsil, re_old;
+    unsigned short *Map=NULL;
+    
+    number_of_dims = mxGetNumberOfDimensions(prhs[0]);
+    dim_array = mxGetDimensions(prhs[0]);
+    
+    /*Handling Matlab input data*/
+    U0  = (float *) mxGetData(prhs[0]); /*origanal noise image/volume*/
+    if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input in single precision is required"); }
+    lambda =  (float) mxGetScalar(prhs[1]); /*regularization parameter*/
+    tau =  (float) mxGetScalar(prhs[2]); /* time-step */
+    iter =  (int) mxGetScalar(prhs[3]); /*iterations number*/
+    epsil =  (float) mxGetScalar(prhs[4]); /* tolerance constant */
+    switcher =  (int) mxGetScalar(prhs[5]); /*switch on (1) restrictive smoothing in Z dimension*/
+     
+    /*Handling Matlab output data*/
+    dimX = dim_array[0]; dimY = dim_array[1];  dimZ = 1;
+    
+    if (number_of_dims == 2) {
+        /*2D case*/       
+        U = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+        U_old = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+        D1 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+        D2 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+    }
+    else if (number_of_dims == 3) {
+        /*3D case*/
+        dimZ = dim_array[2];
+        U = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+        U_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+        D1 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+        D2 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+        D3 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+        if (switcher != 0) {
+        Map = (unsigned short*)mxGetPr(plhs[1] = mxCreateNumericArray(3, dim_array, mxUINT16_CLASS, mxREAL));       
+        }
+    }
+    else {mexErrMsgTxt("The input data should be 2D or 3D");}
+    
+    /*Copy U0 to U*/
+    copyIm(U0, U, dimX, dimY, dimZ);
+    
+    count = 1;
+    re_old = 0.0f; 
+    if (number_of_dims == 2) {
+        for(ll = 0; ll < iter; ll++) {
+            
+            copyIm(U, U_old, dimX, dimY, dimZ);
+            
+            /*estimate inner derrivatives */
+            der2D(U, D1, D2, dimX, dimY, dimZ);
+            /* calculate div^2 and update */
+            div_upd2D(U0, U, D1, D2, dimX, dimY, dimZ, lambda, tau);
+            
+            /* calculate norm to terminate earlier */
+            re = 0.0f; re1 = 0.0f;
+            for(j=0; j<dimX*dimY*dimZ; j++)
+            {
+                re += pow(U_old[j] - U[j],2);
+                re1 += pow(U_old[j],2);
+            }
+            re = sqrt(re)/sqrt(re1);           
+            if (re < epsil)  count++;
+            if (count > 4) break;            
+            
+            /* check that the residual norm is decreasing */
+            if (ll > 2) {
+                if (re > re_old) break; 
+            }
+            re_old = re;         
+        
+        } /*end of iterations*/
+        printf("HO iterations stopped at iteration: %i\n", ll);          
+    }
+    /*3D version*/
+    if (number_of_dims == 3) {
+        
+        if (switcher == 1) {
+            /* apply restrictive smoothing */            
+            calcMap(U, Map, dimX, dimY, dimZ);
+            /*clear outliers */
+            cleanMap(Map, dimX, dimY, dimZ);           
+        }
+        for(ll = 0; ll < iter; ll++) {
+            
+            copyIm(U, U_old, dimX, dimY, dimZ);
+            
+            /*estimate inner derrivatives */
+            der3D(U, D1, D2, D3, dimX, dimY, dimZ);          
+            /* calculate div^2 and update */
+            div_upd3D(U0, U, D1, D2, D3, Map, switcher, dimX, dimY, dimZ, lambda, tau);                 
+            
+            /* calculate norm to terminate earlier */
+            re = 0.0f; re1 = 0.0f;
+            for(j=0; j<dimX*dimY*dimZ; j++)
+            {
+                re += pow(U_old[j] - U[j],2);
+                re1 += pow(U_old[j],2);
+            }
+            re = sqrt(re)/sqrt(re1);           
+            if (re < epsil)  count++;
+            if (count > 4) break;            
+            
+            /* check that the residual norm is decreasing */
+            if (ll > 2) {
+                if (re > re_old) break; 
+            }
+            re_old = re;     
+            
+        } /*end of iterations*/
+        printf("HO iterations stopped at iteration: %i\n", ll);       
+    }
+}
diff --git a/Wrappers/Matlab/mex_compile/regularizers_CPU/LLT_model_core.c b/Wrappers/Matlab/mex_compile/regularizers_CPU/LLT_model_core.c
new file mode 100644
index 0000000..3a853d2
--- /dev/null
+++ b/Wrappers/Matlab/mex_compile/regularizers_CPU/LLT_model_core.c
@@ -0,0 +1,318 @@
+/*
+This work is part of the Core Imaging Library developed by
+Visual Analytics and Imaging System Group of the Science Technology
+Facilities Council, STFC
+
+Copyright 2017 Daniil Kazantsev
+Copyright 2017 Srikanth Nagella, Edoardo Pasca
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#include "LLT_model_core.h"
+
+/* C-OMP implementation of Lysaker, Lundervold and Tai (LLT) model of higher order regularization penalty
+*
+* Input Parameters:
+* 1. U0 - origanal noise image/volume
+* 2. lambda - regularization parameter
+* 3. tau - time-step  for explicit scheme
+* 4. iter - iterations number
+* 5. epsil  - tolerance constant (to terminate earlier)
+* 6. switcher - default is 0, switch to (1) to restrictive smoothing in Z dimension (in test)
+*
+* Output:
+* Filtered/regularized image
+*
+* Example:
+* figure;
+* Im = double(imread('lena_gray_256.tif'))/255;  % loading image
+* u0 = Im + .03*randn(size(Im)); % adding noise
+* [Den] = LLT_model(single(u0), 10, 0.1, 1);
+*
+* References: Lysaker, Lundervold and Tai (LLT) 2003, IEEE
+*
+* 28.11.16/Harwell
+*/
+
+
+float der2D(float *U, float *D1, float *D2, int dimX, int dimY, int dimZ)
+{
+	int i, j, i_p, i_m, j_m, j_p;
+	float dxx, dyy, denom_xx, denom_yy;
+#pragma omp parallel for shared(U,D1,D2) private(i, j, i_p, i_m, j_m, j_p, denom_xx, denom_yy, dxx, dyy)
+	for (i = 0; i<dimX; i++) {
+		for (j = 0; j<dimY; j++) {
+			/* symmetric boundary conditions (Neuman) */
+			i_p = i + 1; if (i_p == dimX) i_p = i - 1;
+			i_m = i - 1; if (i_m < 0) i_m = i + 1;
+			j_p = j + 1; if (j_p == dimY) j_p = j - 1;
+			j_m = j - 1; if (j_m < 0) j_m = j + 1;
+
+			dxx = U[i_p*dimY + j] - 2.0f*U[i*dimY + j] + U[i_m*dimY + j];
+			dyy = U[i*dimY + j_p] - 2.0f*U[i*dimY + j] + U[i*dimY + j_m];
+
+			denom_xx = fabs(dxx) + EPS;
+			denom_yy = fabs(dyy) + EPS;
+
+			D1[i*dimY + j] = dxx / denom_xx;
+			D2[i*dimY + j] = dyy / denom_yy;
+		}
+	}
+	return 1;
+}
+float div_upd2D(float *U0, float *U, float *D1, float *D2, int dimX, int dimY, int dimZ, float lambda, float tau)
+{
+	int i, j, i_p, i_m, j_m, j_p;
+	float div, dxx, dyy;
+#pragma omp parallel for shared(U,U0,D1,D2) private(i, j, i_p, i_m, j_m, j_p, div, dxx, dyy)
+	for (i = 0; i<dimX; i++) {
+		for (j = 0; j<dimY; j++) {
+			/* symmetric boundary conditions (Neuman) */
+			i_p = i + 1; if (i_p == dimX) i_p = i - 1;
+			i_m = i - 1; if (i_m < 0) i_m = i + 1;
+			j_p = j + 1; if (j_p == dimY) j_p = j - 1;
+			j_m = j - 1; if (j_m < 0) j_m = j + 1;
+
+			dxx = D1[i_p*dimY + j] - 2.0f*D1[i*dimY + j] + D1[i_m*dimY + j];
+			dyy = D2[i*dimY + j_p] - 2.0f*D2[i*dimY + j] + D2[i*dimY + j_m];
+
+			div = dxx + dyy;
+
+			U[i*dimY + j] = U[i*dimY + j] - tau*div - tau*lambda*(U[i*dimY + j] - U0[i*dimY + j]);
+		}
+	}
+	return *U0;
+}
+
+float der3D(float *U, float *D1, float *D2, float *D3, int dimX, int dimY, int dimZ)
+{
+	int i, j, k, i_p, i_m, j_m, j_p, k_p, k_m;
+	float dxx, dyy, dzz, denom_xx, denom_yy, denom_zz;
+#pragma omp parallel for shared(U,D1,D2,D3) private(i, j, k, i_p, i_m, j_m, j_p, k_p, k_m, denom_xx, denom_yy, denom_zz, dxx, dyy, dzz)
+	for (i = 0; i<dimX; i++) {
+		/* symmetric boundary conditions (Neuman) */
+		i_p = i + 1; if (i_p == dimX) i_p = i - 1;
+		i_m = i - 1; if (i_m < 0) i_m = i + 1;
+		for (j = 0; j<dimY; j++) {
+			j_p = j + 1; if (j_p == dimY) j_p = j - 1;
+			j_m = j - 1; if (j_m < 0) j_m = j + 1;
+			for (k = 0; k<dimZ; k++) {
+				k_p = k + 1; if (k_p == dimZ) k_p = k - 1;
+				k_m = k - 1; if (k_m < 0) k_m = k + 1;
+
+				dxx = U[dimX*dimY*k + i_p*dimY + j] - 2.0f*U[dimX*dimY*k + i*dimY + j] + U[dimX*dimY*k + i_m*dimY + j];
+				dyy = U[dimX*dimY*k + i*dimY + j_p] - 2.0f*U[dimX*dimY*k + i*dimY + j] + U[dimX*dimY*k + i*dimY + j_m];
+				dzz = U[dimX*dimY*k_p + i*dimY + j] - 2.0f*U[dimX*dimY*k + i*dimY + j] + U[dimX*dimY*k_m + i*dimY + j];
+
+				denom_xx = fabs(dxx) + EPS;
+				denom_yy = fabs(dyy) + EPS;
+				denom_zz = fabs(dzz) + EPS;
+
+				D1[dimX*dimY*k + i*dimY + j] = dxx / denom_xx;
+				D2[dimX*dimY*k + i*dimY + j] = dyy / denom_yy;
+				D3[dimX*dimY*k + i*dimY + j] = dzz / denom_zz;
+
+			}
+		}
+	}
+	return 1;
+}
+
+float div_upd3D(float *U0, float *U, float *D1, float *D2, float *D3, unsigned short *Map, int switcher, int dimX, int dimY, int dimZ, float lambda, float tau)
+{
+	int i, j, k, i_p, i_m, j_m, j_p, k_p, k_m;
+	float div, dxx, dyy, dzz;
+#pragma omp parallel for shared(U,U0,D1,D2,D3) private(i, j, k, i_p, i_m, j_m, j_p, k_p, k_m, div, dxx, dyy, dzz)
+	for (i = 0; i<dimX; i++) {
+		/* symmetric boundary conditions (Neuman) */
+		i_p = i + 1; if (i_p == dimX) i_p = i - 1;
+		i_m = i - 1; if (i_m < 0) i_m = i + 1;
+		for (j = 0; j<dimY; j++) {
+			j_p = j + 1; if (j_p == dimY) j_p = j - 1;
+			j_m = j - 1; if (j_m < 0) j_m = j + 1;
+			for (k = 0; k<dimZ; k++) {
+				k_p = k + 1; if (k_p == dimZ) k_p = k - 1;
+				k_m = k - 1; if (k_m < 0) k_m = k + 1;
+				//                 k_p1 = k + 2; if (k_p1 >= dimZ) k_p1 = k - 2;
+				//                 k_m1 = k - 2; if (k_m1 < 0) k_m1 = k + 2;   
+
+				dxx = D1[dimX*dimY*k + i_p*dimY + j] - 2.0f*D1[dimX*dimY*k + i*dimY + j] + D1[dimX*dimY*k + i_m*dimY + j];
+				dyy = D2[dimX*dimY*k + i*dimY + j_p] - 2.0f*D2[dimX*dimY*k + i*dimY + j] + D2[dimX*dimY*k + i*dimY + j_m];
+				dzz = D3[dimX*dimY*k_p + i*dimY + j] - 2.0f*D3[dimX*dimY*k + i*dimY + j] + D3[dimX*dimY*k_m + i*dimY + j];
+
+				if ((switcher == 1) && (Map[dimX*dimY*k + i*dimY + j] == 0)) dzz = 0;
+				div = dxx + dyy + dzz;
+
+				//                 if (switcher == 1) {                    
+				// if (Map2[dimX*dimY*k + i*dimY + j] == 0) dzz2 = 0;
+				//else dzz2 = D4[dimX*dimY*k_p1 + i*dimY + j] - 2.0f*D4[dimX*dimY*k + i*dimY + j] + D4[dimX*dimY*k_m1 + i*dimY + j];
+				//                     div = dzz + dzz2;
+				//                 }
+
+				//                 dzz = D3[dimX*dimY*k_p + i*dimY + j] - 2.0f*D3[dimX*dimY*k + i*dimY + j] + D3[dimX*dimY*k_m + i*dimY + j];
+				//                 dzz2 = D4[dimX*dimY*k_p1 + i*dimY + j] - 2.0f*D4[dimX*dimY*k + i*dimY + j] + D4[dimX*dimY*k_m1 + i*dimY + j];  
+				//                 div = dzz + dzz2;
+
+				U[dimX*dimY*k + i*dimY + j] = U[dimX*dimY*k + i*dimY + j] - tau*div - tau*lambda*(U[dimX*dimY*k + i*dimY + j] - U0[dimX*dimY*k + i*dimY + j]);
+			}
+		}
+	}
+	return *U0;
+}
+
+// float der3D_2(float *U, float *D1, float *D2, float *D3, float *D4, int dimX, int dimY, int dimZ)
+// {
+//     int i, j, k, i_p, i_m, j_m, j_p, k_p, k_m, k_p1, k_m1;
+//     float dxx, dyy, dzz, dzz2, denom_xx, denom_yy, denom_zz, denom_zz2;
+// #pragma omp parallel for shared(U,D1,D2,D3,D4) private(i, j, k, i_p, i_m, j_m, j_p, k_p, k_m, denom_xx, denom_yy, denom_zz, denom_zz2, dxx, dyy, dzz, dzz2, k_p1, k_m1)
+//     for(i=0; i<dimX; i++) {
+//         /* symmetric boundary conditions (Neuman) */
+//         i_p = i + 1; if (i_p == dimX) i_p = i - 1;
+//         i_m = i - 1; if (i_m < 0) i_m = i + 1;
+//         for(j=0; j<dimY; j++) {
+//             j_p = j + 1; if (j_p == dimY) j_p = j - 1;
+//             j_m = j - 1; if (j_m < 0) j_m = j + 1;
+//             for(k=0; k<dimZ; k++) {
+//                 k_p = k + 1; if (k_p == dimZ) k_p = k - 1;
+//                 k_m = k - 1; if (k_m < 0) k_m = k + 1;
+//                 k_p1 = k + 2; if (k_p1 >= dimZ) k_p1 = k - 2;
+//                 k_m1 = k - 2; if (k_m1 < 0) k_m1 = k + 2;                
+//                 
+//                 dxx = U[dimX*dimY*k + i_p*dimY + j] - 2.0f*U[dimX*dimY*k + i*dimY + j] + U[dimX*dimY*k + i_m*dimY + j];
+//                 dyy = U[dimX*dimY*k + i*dimY + j_p] - 2.0f*U[dimX*dimY*k + i*dimY + j] + U[dimX*dimY*k + i*dimY + j_m];
+//                 dzz = U[dimX*dimY*k_p + i*dimY + j] - 2.0f*U[dimX*dimY*k + i*dimY + j] + U[dimX*dimY*k_m + i*dimY + j];                
+//                 dzz2 = U[dimX*dimY*k_p1 + i*dimY + j] - 2.0f*U[dimX*dimY*k + i*dimY + j] + U[dimX*dimY*k_m1 + i*dimY + j];                
+//                 
+//                 denom_xx = fabs(dxx) + EPS;
+//                 denom_yy = fabs(dyy) + EPS;
+//                 denom_zz = fabs(dzz) + EPS;
+//                 denom_zz2 = fabs(dzz2) + EPS;
+//                 
+//                 D1[dimX*dimY*k + i*dimY + j] = dxx/denom_xx;
+//                 D2[dimX*dimY*k + i*dimY + j] = dyy/denom_yy;
+//                 D3[dimX*dimY*k + i*dimY + j] = dzz/denom_zz;               
+//                 D4[dimX*dimY*k + i*dimY + j] = dzz2/denom_zz2;                               
+//             }}}
+//     return 1;
+// }
+
+float calcMap(float *U, unsigned short *Map, int dimX, int dimY, int dimZ)
+{
+	int i, j, k, i1, j1, i2, j2, windowSize;
+	float val1, val2, thresh_val, maxval;
+	windowSize = 1;
+	thresh_val = 0.0001; /*thresh_val = 0.0035;*/
+
+						 /* normalize volume first */
+	maxval = 0.0f;
+	for (i = 0; i<dimX; i++) {
+		for (j = 0; j<dimY; j++) {
+			for (k = 0; k<dimZ; k++) {
+				if (U[dimX*dimY*k + i*dimY + j] > maxval) maxval = U[dimX*dimY*k + i*dimY + j];
+			}
+		}
+	}
+
+	if (maxval != 0.0f) {
+		for (i = 0; i<dimX; i++) {
+			for (j = 0; j<dimY; j++) {
+				for (k = 0; k<dimZ; k++) {
+					U[dimX*dimY*k + i*dimY + j] = U[dimX*dimY*k + i*dimY + j] / maxval;
+				}
+			}
+		}
+	}
+	else {
+		printf("%s \n", "Maximum value is zero!");
+		return 0;
+	}
+
+#pragma omp parallel for shared(U,Map) private(i, j, k, i1, j1, i2, j2, val1, val2)
+	for (i = 0; i<dimX; i++) {
+		for (j = 0; j<dimY; j++) {
+			for (k = 0; k<dimZ; k++) {
+
+				Map[dimX*dimY*k + i*dimY + j] = 0;
+				//                 Map2[dimX*dimY*k + i*dimY + j] = 0; 
+
+				val1 = 0.0f; val2 = 0.0f;
+				for (i1 = -windowSize; i1 <= windowSize; i1++) {
+					for (j1 = -windowSize; j1 <= windowSize; j1++) {
+						i2 = i + i1;
+						j2 = j + j1;
+
+						if ((i2 >= 0) && (i2 < dimX) && (j2 >= 0) && (j2 < dimY)) {
+							if (k == 0) {
+								val1 += pow(U[dimX*dimY*k + i2*dimY + j2] - U[dimX*dimY*(k + 1) + i2*dimY + j2], 2);
+								//                           val3 += pow(U[dimX*dimY*k + i2*dimY + j2] - U[dimX*dimY*(k+2) + i2*dimY + j2],2);                                                  
+							}
+							else if (k == dimZ - 1) {
+								val1 += pow(U[dimX*dimY*k + i2*dimY + j2] - U[dimX*dimY*(k - 1) + i2*dimY + j2], 2);
+								//                           val3 += pow(U[dimX*dimY*k + i2*dimY + j2] - U[dimX*dimY*(k-2) + i2*dimY + j2],2);                           
+							}
+							//                       else if (k == 1) {
+							//                           val1 += pow(U[dimX*dimY*k + i2*dimY + j2] - U[dimX*dimY*(k-1) + i2*dimY + j2],2); 
+							//                           val2 += pow(U[dimX*dimY*k + i2*dimY + j2] - U[dimX*dimY*(k+1) + i2*dimY + j2],2);  
+							//                           val3 += pow(U[dimX*dimY*k + i2*dimY + j2] - U[dimX*dimY*(k+2) + i2*dimY + j2],2);                           
+							//                       }
+							//                       else if (k == dimZ-2) {
+							//                           val1 += pow(U[dimX*dimY*k + i2*dimY + j2] - U[dimX*dimY*(k-1) + i2*dimY + j2],2); 
+							//                           val2 += pow(U[dimX*dimY*k + i2*dimY + j2] - U[dimX*dimY*(k+1) + i2*dimY + j2],2);      
+							//                           val3 += pow(U[dimX*dimY*k + i2*dimY + j2] - U[dimX*dimY*(k-2) + i2*dimY + j2],2);                           
+							//                       }                      
+							else {
+								val1 += pow(U[dimX*dimY*k + i2*dimY + j2] - U[dimX*dimY*(k - 1) + i2*dimY + j2], 2);
+								val2 += pow(U[dimX*dimY*k + i2*dimY + j2] - U[dimX*dimY*(k + 1) + i2*dimY + j2], 2);
+								//                           val3 += pow(U[dimX*dimY*k + i2*dimY + j2] - U[dimX*dimY*(k-2) + i2*dimY + j2],2); 
+								//                           val4 += pow(U[dimX*dimY*k + i2*dimY + j2] - U[dimX*dimY*(k+2) + i2*dimY + j2],2);  
+							}
+						}
+					}
+				}
+
+				val1 = 0.111f*val1; val2 = 0.111f*val2;
+				//                  val3 = 0.111f*val3; val4 = 0.111f*val4;
+				if ((val1 <= thresh_val) && (val2 <= thresh_val)) Map[dimX*dimY*k + i*dimY + j] = 1;
+				//                  if ((val3 <= thresh_val) && (val4 <= thresh_val)) Map2[dimX*dimY*k + i*dimY + j] = 1;                        
+			}
+		}
+	}
+	return 1;
+}
+
+float cleanMap(unsigned short *Map, int dimX, int dimY, int dimZ)
+{
+	int i, j, k, i1, j1, i2, j2, counter;
+#pragma omp parallel for shared(Map) private(i, j, k, i1, j1, i2, j2, counter)
+	for (i = 0; i<dimX; i++) {
+		for (j = 0; j<dimY; j++) {
+			for (k = 0; k<dimZ; k++) {
+
+				counter = 0;
+				for (i1 = -3; i1 <= 3; i1++) {
+					for (j1 = -3; j1 <= 3; j1++) {
+						i2 = i + i1;
+						j2 = j + j1;
+						if ((i2 >= 0) && (i2 < dimX) && (j2 >= 0) && (j2 < dimY)) {
+							if (Map[dimX*dimY*k + i2*dimY + j2] == 0) counter++;
+						}
+					}
+				}
+				if (counter < 24) Map[dimX*dimY*k + i*dimY + j] = 1;
+			}
+		}
+	}
+	return *Map;
+}
+
+
+/*********************3D *********************/
\ No newline at end of file
diff --git a/Wrappers/Matlab/mex_compile/regularizers_CPU/LLT_model_core.h b/Wrappers/Matlab/mex_compile/regularizers_CPU/LLT_model_core.h
new file mode 100644
index 0000000..13fce5a
--- /dev/null
+++ b/Wrappers/Matlab/mex_compile/regularizers_CPU/LLT_model_core.h
@@ -0,0 +1,46 @@
+/*
+This work is part of the Core Imaging Library developed by
+Visual Analytics and Imaging System Group of the Science Technology
+Facilities Council, STFC
+
+Copyright 2017 Daniil Kazantsev
+Copyright 2017 Srikanth Nagella, Edoardo Pasca
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+//#include <matrix.h>
+#include <math.h>
+#include <stdlib.h>
+#include <memory.h>
+#include <stdio.h>
+#include "omp.h"
+#include "utils.h"
+
+#define EPS 0.01
+
+/* 2D functions */
+#ifdef __cplusplus
+extern "C" {
+#endif
+float der2D(float *U, float *D1, float *D2, int dimX, int dimY, int dimZ);
+float div_upd2D(float *U0, float *U, float *D1, float *D2, int dimX, int dimY, int dimZ, float lambda, float tau);
+
+float der3D(float *U, float *D1, float *D2, float *D3, int dimX, int dimY, int dimZ);
+float div_upd3D(float *U0, float *U, float *D1, float *D2, float *D3, unsigned short *Map, int switcher, int dimX, int dimY, int dimZ, float lambda, float tau);
+
+float calcMap(float *U, unsigned short *Map, int dimX, int dimY, int dimZ);
+float cleanMap(unsigned short *Map, int dimX, int dimY, int dimZ);
+
+//float copyIm(float *A, float *U, int dimX, int dimY, int dimZ);
+#ifdef __cplusplus
+}
+#endif
\ No newline at end of file
diff --git a/Wrappers/Matlab/mex_compile/regularizers_CPU/PatchBased_Regul.c b/Wrappers/Matlab/mex_compile/regularizers_CPU/PatchBased_Regul.c
new file mode 100644
index 0000000..9c925df
--- /dev/null
+++ b/Wrappers/Matlab/mex_compile/regularizers_CPU/PatchBased_Regul.c
@@ -0,0 +1,140 @@
+/*
+This work is part of the Core Imaging Library developed by
+Visual Analytics and Imaging System Group of the Science Technology
+Facilities Council, STFC
+
+Copyright 2017 Daniil Kazantsev
+Copyright 2017 Srikanth Nagella, Edoardo Pasca
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#include "mex.h"
+#include "matrix.h"
+#include "PatchBased_Regul_core.h"
+
+
+/* C-OMP implementation of  patch-based (PB) regularization (2D and 3D cases). 
+ * This method finds self-similar patches in data and performs one fixed point iteration to mimimize the PB penalty function
+ * 
+ * References: 1. Yang Z. & Jacob M. "Nonlocal Regularization of Inverse Problems"
+ *             2. Kazantsev D. et al. "4D-CT reconstruction with unified spatial-temporal patch-based regularization"
+ *
+ * Input Parameters:
+ * 1. Image (2D or 3D) [required]
+ * 2. ratio of the searching window (e.g. 3 = (2*3+1) = 7 pixels window) [optional]
+ * 3. ratio of the similarity window (e.g. 1 = (2*1+1) = 3 pixels window) [optional]
+ * 4. h - parameter for the PB penalty function [optional]
+ * 5. lambda - regularization parameter  [optional]
+
+ * Output:
+ * 1. regularized (denoised) Image (N x N)/volume (N x N x N)
+ *
+ * 2D denoising example in Matlab:   
+   Im = double(imread('lena_gray_256.tif'))/255;  % loading image
+   u0 = Im + .03*randn(size(Im)); u0(u0<0) = 0; % adding noise
+   ImDen = PatchBased_Regul(single(u0), 3, 1, 0.08, 0.05); 
+ *
+ * Matlab + C/mex compilers needed
+ * to compile with OMP support: mex PatchBased_Regul.c CFLAGS="\$CFLAGS -fopenmp -Wall" LDFLAGS="\$LDFLAGS -fopenmp"
+ *
+ * D. Kazantsev *
+ * 02/07/2014
+ * Harwell, UK
+ */
+
+
+void mexFunction(
+        int nlhs, mxArray *plhs[],
+        int nrhs, const mxArray *prhs[]) 
+{    
+    int N, M, Z, numdims, SearchW, SimilW, SearchW_real, padXY, newsizeX, newsizeY, newsizeZ, switchpad_crop;
+    const int  *dims;
+    float *A, *B=NULL, *Ap=NULL, *Bp=NULL, h, lambda;
+    
+    numdims = mxGetNumberOfDimensions(prhs[0]);
+    dims = mxGetDimensions(prhs[0]);
+    
+    N = dims[0];
+    M = dims[1];
+    Z = dims[2];
+    
+    if ((numdims < 2) || (numdims > 3)) {mexErrMsgTxt("The input is 2D image or 3D volume");}
+    if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input in single precision is required"); }
+    
+    if(nrhs != 5) mexErrMsgTxt("Five inputs reqired: Image(2D,3D), SearchW, SimilW, Threshold, Regularization parameter");
+    
+    /*Handling inputs*/
+    A  = (float *) mxGetData(prhs[0]);    /* the image/volume to regularize/filter */
+    SearchW_real = 3; /*default value*/
+    SimilW = 1; /*default value*/
+    h = 0.1; 
+    lambda = 0.1;
+    
+    if ((nrhs == 2) || (nrhs == 3) || (nrhs == 4) || (nrhs == 5))   SearchW_real  = (int) mxGetScalar(prhs[1]); /* the searching window ratio */
+    if ((nrhs == 3) || (nrhs == 4) || (nrhs == 5))   SimilW =  (int) mxGetScalar(prhs[2]);  /* the similarity window ratio */
+    if ((nrhs == 4) || (nrhs == 5))  h =  (float) mxGetScalar(prhs[3]);  /* parameter for the PB filtering function */
+    if ((nrhs == 5))  lambda = (float) mxGetScalar(prhs[4]); /* regularization parameter */   
+
+
+    if (h <= 0) mexErrMsgTxt("Parmeter for the PB penalty function should be > 0");
+    if (lambda <= 0) mexErrMsgTxt(" Regularization parmeter should be > 0");
+       
+    SearchW = SearchW_real + 2*SimilW;
+    
+    /* SearchW_full = 2*SearchW + 1; */ /* the full searching window  size */
+    /* SimilW_full = 2*SimilW + 1;  */  /* the full similarity window  size */
+    
+    padXY = SearchW + 2*SimilW; /* padding sizes */
+    newsizeX = N + 2*(padXY); /* the X size of the padded array */
+    newsizeY = M + 2*(padXY); /* the Y size of the padded array */
+    newsizeZ = Z + 2*(padXY); /* the Z size of the padded array */
+    int N_dims[] = {newsizeX, newsizeY, newsizeZ};
+    
+    /******************************2D case ****************************/
+    if (numdims == 2) {
+        /*Handling output*/
+        B = (float*)mxGetData(plhs[0] = mxCreateNumericMatrix(N, M, mxSINGLE_CLASS, mxREAL));
+        /*allocating memory for the padded arrays */
+        Ap = (float*)mxGetData(mxCreateNumericMatrix(newsizeX, newsizeY, mxSINGLE_CLASS, mxREAL));
+        Bp = (float*)mxGetData(mxCreateNumericMatrix(newsizeX, newsizeY, mxSINGLE_CLASS, mxREAL));
+        /**************************************************************************/
+        /*Perform padding of image A to the size of [newsizeX * newsizeY] */
+        switchpad_crop = 0; /*padding*/
+        pad_crop(A, Ap, M, N, 0, newsizeY, newsizeX, 0, padXY, switchpad_crop);
+        
+        /* Do PB regularization with the padded array  */
+        PB_FUNC2D(Ap, Bp, newsizeY, newsizeX, padXY, SearchW, SimilW, (float)h, (float)lambda);
+        
+        switchpad_crop = 1; /*cropping*/
+        pad_crop(Bp, B, M, N, 0, newsizeY, newsizeX, 0, padXY, switchpad_crop);
+    }
+    else
+    {
+        /******************************3D case ****************************/
+        /*Handling output*/
+        B = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dims, mxSINGLE_CLASS, mxREAL));
+        /*allocating memory for the padded arrays */
+        Ap = (float*)mxGetPr(mxCreateNumericArray(3, N_dims, mxSINGLE_CLASS, mxREAL));
+        Bp = (float*)mxGetPr(mxCreateNumericArray(3, N_dims, mxSINGLE_CLASS, mxREAL));
+        /**************************************************************************/
+        
+        /*Perform padding of image A to the size of [newsizeX * newsizeY * newsizeZ] */
+        switchpad_crop = 0; /*padding*/
+        pad_crop(A, Ap, M, N, Z, newsizeY, newsizeX, newsizeZ, padXY, switchpad_crop);
+        
+        /* Do PB regularization with the padded array  */
+        PB_FUNC3D(Ap, Bp, newsizeY, newsizeX, newsizeZ, padXY, SearchW, SimilW, (float)h, (float)lambda);
+        
+        switchpad_crop = 1; /*cropping*/
+        pad_crop(Bp, B, M, N, Z, newsizeY, newsizeX, newsizeZ, padXY, switchpad_crop);
+    } /*end else ndims*/ 
+}    
diff --git a/Wrappers/Matlab/mex_compile/regularizers_CPU/PatchBased_Regul_core.c b/Wrappers/Matlab/mex_compile/regularizers_CPU/PatchBased_Regul_core.c
new file mode 100644
index 0000000..acfb464
--- /dev/null
+++ b/Wrappers/Matlab/mex_compile/regularizers_CPU/PatchBased_Regul_core.c
@@ -0,0 +1,213 @@
+/*
+This work is part of the Core Imaging Library developed by
+Visual Analytics and Imaging System Group of the Science Technology
+Facilities Council, STFC
+
+Copyright 2017 Daniil Kazanteev
+Copyright 2017 Srikanth Nagella, Edoardo Pasca
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#include "PatchBased_Regul_core.h"
+
+/* C-OMP implementation of  patch-based (PB) regularization (2D and 3D cases). 
+ * This method finds self-similar patches in data and performs one fixed point iteration to mimimize the PB penalty function
+ * 
+ * References: 1. Yang Z. & Jacob M. "Nonlocal Regularization of Inverse Problems"
+ *             2. Kazantsev D. et al. "4D-CT reconstruction with unified spatial-temporal patch-based regularization"
+ *
+ * Input Parameters:
+ * 1. Image (2D or 3D) [required]
+ * 2. ratio of the searching window (e.g. 3 = (2*3+1) = 7 pixels window) [optional]
+ * 3. ratio of the similarity window (e.g. 1 = (2*1+1) = 3 pixels window) [optional]
+ * 4. h - parameter for the PB penalty function [optional]
+ * 5. lambda - regularization parameter  [optional]
+
+ * Output:
+ * 1. regularized (denoised) Image (N x N)/volume (N x N x N)
+ *
+ * 2D denoising example in Matlab:   
+   Im = double(imread('lena_gray_256.tif'))/255;  % loading image
+   u0 = Im + .03*randn(size(Im)); u0(u0<0) = 0; % adding noise
+   ImDen = PatchBased_Regul(single(u0), 3, 1, 0.08, 0.05); 
+ 
+ * D. Kazantsev *
+ * 02/07/2014
+ * Harwell, UK
+ */
+
+/*2D version function */
+float PB_FUNC2D(float *A, float *B, int dimX, int dimY, int padXY, int SearchW, int SimilW, float h, float lambda)
+{
+    int i, j, i_n, j_n, i_m, j_m, i_p, j_p, i_l, j_l, i1, j1, i2, j2, i3, j3, i5,j5, count, SimilW_full;
+    float *Eucl_Vec, h2, denh2, normsum, Weight, Weight_norm, value, denom, WeightGlob, t1;
+                 
+    /*SearchW_full = 2*SearchW + 1; */ /* the full searching window  size */
+    SimilW_full = 2*SimilW + 1;   /* the full similarity window  size */
+    h2 = h*h;
+    denh2 = 1/(2*h2);   
+    
+     /*Gaussian kernel */
+    Eucl_Vec = (float*) calloc (SimilW_full*SimilW_full,sizeof(float));
+    count = 0;
+    for(i_n=-SimilW; i_n<=SimilW; i_n++) {
+        for(j_n=-SimilW; j_n<=SimilW; j_n++) {
+            t1 = pow(((float)i_n), 2) + pow(((float)j_n), 2);
+            Eucl_Vec[count] = exp(-(t1)/(2*SimilW*SimilW));
+            count = count + 1;                       
+        }} /*main neighb loop */   
+    
+    /*The NLM code starts here*/         
+    /* setting OMP here */
+    #pragma omp parallel for shared (A, B, dimX, dimY, Eucl_Vec, lambda, denh2) private(denom, i, j, WeightGlob, count,  i1, j1, i2, j2, i3, j3, i5, j5, Weight_norm, normsum, i_m, j_m, i_n, j_n, i_l, j_l, i_p, j_p, Weight,  value)
+    
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+             if (((i >= padXY) && (i < dimX-padXY)) &&  ((j >= padXY) && (j < dimY-padXY))) {
+          
+                /* Massive Search window loop */
+                Weight_norm = 0; value = 0.0;
+                for(i_m=-SearchW; i_m<=SearchW; i_m++) {
+                    for(j_m=-SearchW; j_m<=SearchW; j_m++) {
+                       /*checking boundaries*/
+                        i1 = i+i_m; j1 = j+j_m;
+                        
+                        WeightGlob = 0.0;
+                        /* if inside the searching window */                        
+                         for(i_l=-SimilW; i_l<=SimilW; i_l++) {
+                             for(j_l=-SimilW; j_l<=SimilW; j_l++) {
+                                 i2 = i1+i_l; j2 = j1+j_l;
+                                 
+                                 i3 = i+i_l; j3 = j+j_l;   /*coordinates of the inner patch loop */
+                                
+                                 count = 0; normsum = 0.0;
+                                 for(i_p=-SimilW; i_p<=SimilW; i_p++) {
+                                     for(j_p=-SimilW; j_p<=SimilW; j_p++) {
+                                         i5 = i2 + i_p; j5 = j2 + j_p;
+                                         normsum = normsum + Eucl_Vec[count]*pow(A[(i3+i_p)*dimY+(j3+j_p)]-A[i5*dimY+j5], 2);        
+                                         count = count + 1;
+                                     }}
+                                  if (normsum != 0) Weight = (exp(-normsum*denh2)); 
+                                  else Weight = 0.0;
+                                 WeightGlob += Weight;
+                             }}                      
+      
+                         value += A[i1*dimY+j1]*WeightGlob;
+                         Weight_norm += WeightGlob;           
+                    }}      /*search window loop end*/
+                
+                /* the final loop to average all values in searching window with weights */
+                denom = 1 + lambda*Weight_norm;
+                B[i*dimY+j] = (A[i*dimY+j] + lambda*value)/denom;         
+             }
+        }}   /*main loop*/     
+    return (*B);
+    free(Eucl_Vec);    
+}
+ 
+/*3D version*/ 
+ float PB_FUNC3D(float *A, float *B, int dimX, int dimY, int dimZ, int padXY, int SearchW, int SimilW, float h, float lambda)       
+ {
+    int SimilW_full, count, i, j, k,  i_n, j_n, k_n, i_m, j_m, k_m, i_p, j_p, k_p, i_l, j_l, k_l, i1, j1, k1, i2, j2, k2, i3, j3, k3, i5, j5, k5;
+    float *Eucl_Vec, h2, denh2, normsum, Weight, Weight_norm, value, denom, WeightGlob;
+        
+    /*SearchW_full = 2*SearchW + 1; */ /* the full searching window  size */
+    SimilW_full = 2*SimilW + 1;   /* the full similarity window  size */
+    h2 = h*h;
+    denh2 = 1/(2*h2);
+    
+    /*Gaussian kernel */
+    Eucl_Vec = (float*) calloc (SimilW_full*SimilW_full*SimilW_full,sizeof(float));
+    count = 0;
+    for(i_n=-SimilW; i_n<=SimilW; i_n++) {
+        for(j_n=-SimilW; j_n<=SimilW; j_n++) {
+            for(k_n=-SimilW; k_n<=SimilW; k_n++) {
+                Eucl_Vec[count] = exp(-(pow((float)i_n, 2) + pow((float)j_n, 2) + pow((float)k_n, 2))/(2*SimilW*SimilW*SimilW));
+                count = count + 1;
+            }}} /*main neighb loop */
+    
+    /*The NLM code starts here*/         
+    /* setting OMP here */
+    #pragma omp parallel for shared (A, B, dimX, dimY, dimZ, Eucl_Vec, lambda, denh2) private(denom, i, j, k, WeightGlob,count,  i1, j1, k1, i2, j2, k2, i3, j3, k3, i5, j5, k5, Weight_norm, normsum, i_m, j_m, k_m, i_n, j_n, k_n, i_l, j_l, k_l, i_p, j_p, k_p, Weight, value)    
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+            for(k=0; k<dimZ; k++) {
+            if (((i >= padXY) && (i < dimX-padXY)) &&  ((j >= padXY) && (j < dimY-padXY)) &&  ((k >= padXY) && (k < dimZ-padXY))) {
+            /* take all elements around the pixel of interest */                             
+               /* Massive Search window loop */
+                Weight_norm = 0;  value = 0.0;
+                for(i_m=-SearchW; i_m<=SearchW; i_m++) {
+                    for(j_m=-SearchW; j_m<=SearchW; j_m++) {
+                        for(k_m=-SearchW; k_m<=SearchW; k_m++) {
+                         /*checking boundaries*/
+                        i1 = i+i_m; j1 = j+j_m; k1 = k+k_m;
+                        
+                        WeightGlob = 0.0;
+                        /* if inside the searching window */                        
+                         for(i_l=-SimilW; i_l<=SimilW; i_l++) {
+                             for(j_l=-SimilW; j_l<=SimilW; j_l++) {
+                                 for(k_l=-SimilW; k_l<=SimilW; k_l++) {                                 
+                                 i2 = i1+i_l; j2 = j1+j_l; k2 = k1+k_l;
+                                 
+                                 i3 = i+i_l; j3 = j+j_l; k3 = k+k_l;   /*coordinates of the inner patch loop */
+                                
+                                 count = 0; normsum = 0.0;
+                                 for(i_p=-SimilW; i_p<=SimilW; i_p++) {
+                                     for(j_p=-SimilW; j_p<=SimilW; j_p++) {
+                                         for(k_p=-SimilW; k_p<=SimilW; k_p++) {
+                                         i5 = i2 + i_p; j5 = j2 + j_p; k5 = k2 + k_p;
+                                         normsum = normsum + Eucl_Vec[count]*pow(A[(dimX*dimY)*(k3+k_p)+(i3+i_p)*dimY+(j3+j_p)]-A[(dimX*dimY)*k5 + i5*dimY+j5], 2);        
+                                         count = count + 1;
+                                     }}}
+                                  if (normsum != 0) Weight = (exp(-normsum*denh2)); 
+                                  else Weight = 0.0;
+                                 WeightGlob += Weight;
+                             }}}                                                 
+                         value += A[(dimX*dimY)*k1 + i1*dimY+j1]*WeightGlob;
+                         Weight_norm += WeightGlob;
+             
+                    }}}      /*search window loop end*/
+                
+                /* the final loop to average all values in searching window with weights */
+                denom = 1 + lambda*Weight_norm;               
+                B[(dimX*dimY)*k + i*dimY+j] = (A[(dimX*dimY)*k + i*dimY+j] + lambda*value)/denom;      
+            }            
+        }}}   /*main loop*/              
+       free(Eucl_Vec);        
+       return *B;
+}
+
+float pad_crop(float *A, float *Ap, int OldSizeX, int OldSizeY, int OldSizeZ, int NewSizeX, int NewSizeY, int NewSizeZ, int padXY, int switchpad_crop)
+{
+    /* padding-cropping function */
+    int i,j,k;    
+    if (NewSizeZ > 1) {    
+           for (i=0; i < NewSizeX; i++) {
+            for (j=0; j < NewSizeY; j++) {
+              for (k=0; k < NewSizeZ; k++) {
+                if (((i >= padXY) && (i < NewSizeX-padXY)) &&  ((j >= padXY) && (j < NewSizeY-padXY)) &&  ((k >= padXY) && (k < NewSizeZ-padXY))) {
+                    if (switchpad_crop == 0)  Ap[NewSizeX*NewSizeY*k + i*NewSizeY+j] = A[OldSizeX*OldSizeY*(k - padXY) + (i-padXY)*(OldSizeY)+(j-padXY)];
+                    else  Ap[OldSizeX*OldSizeY*(k - padXY) + (i-padXY)*(OldSizeY)+(j-padXY)] = A[NewSizeX*NewSizeY*k + i*NewSizeY+j];
+                }
+            }}}   
+    }
+    else {
+        for (i=0; i < NewSizeX; i++) {
+            for (j=0; j < NewSizeY; j++) {
+                if (((i >= padXY) && (i < NewSizeX-padXY)) &&  ((j >= padXY) && (j < NewSizeY-padXY))) {
+                    if (switchpad_crop == 0)  Ap[i*NewSizeY+j] = A[(i-padXY)*(OldSizeY)+(j-padXY)];
+                    else  Ap[(i-padXY)*(OldSizeY)+(j-padXY)] = A[i*NewSizeY+j];
+                }
+            }}
+    }
+    return *Ap;
+}
\ No newline at end of file
diff --git a/Wrappers/Matlab/mex_compile/regularizers_CPU/PatchBased_Regul_core.h b/Wrappers/Matlab/mex_compile/regularizers_CPU/PatchBased_Regul_core.h
new file mode 100644
index 0000000..d4a8a46
--- /dev/null
+++ b/Wrappers/Matlab/mex_compile/regularizers_CPU/PatchBased_Regul_core.h
@@ -0,0 +1,69 @@
+/*
+This work is part of the Core Imaging Library developed by
+Visual Analytics and Imaging System Group of the Science Technology
+Facilities Council, STFC
+
+Copyright 2017 Daniil Kazanteev
+Copyright 2017 Srikanth Nagella, Edoardo Pasca
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#define _USE_MATH_DEFINES
+
+//#include <matrix.h>
+#include <math.h>
+#include <stdlib.h>
+#include <memory.h>
+#include <stdio.h>
+#include "omp.h"
+
+/* C-OMP implementation of  patch-based (PB) regularization (2D and 3D cases).
+* This method finds self-similar patches in data and performs one fixed point iteration to mimimize the PB penalty function
+*
+* References: 1. Yang Z. & Jacob M. "Nonlocal Regularization of Inverse Problems"
+*             2. Kazantsev D. et al. "4D-CT reconstruction with unified spatial-temporal patch-based regularization"
+*
+* Input Parameters (mandatory):
+* 1. Image (2D or 3D)
+* 2. ratio of the searching window (e.g. 3 = (2*3+1) = 7 pixels window)
+* 3. ratio of the similarity window (e.g. 1 = (2*1+1) = 3 pixels window)
+* 4. h - parameter for the PB penalty function
+* 5. lambda - regularization parameter
+
+* Output:
+* 1. regularized (denoised) Image (N x N)/volume (N x N x N)
+*
+* Quick 2D denoising example in Matlab:
+Im = double(imread('lena_gray_256.tif'))/255;  % loading image
+u0 = Im + .03*randn(size(Im)); u0(u0<0) = 0; % adding noise
+ImDen = PB_Regul_CPU(single(u0), 3, 1, 0.08, 0.05);
+*
+* Please see more tests in a file:
+TestTemporalSmoothing.m
+
+*
+* Matlab + C/mex compilers needed
+* to compile with OMP support: mex PB_Regul_CPU.c CFLAGS="\$CFLAGS -fopenmp -Wall" LDFLAGS="\$LDFLAGS -fopenmp"
+*
+* D. Kazantsev *
+* 02/07/2014
+* Harwell, UK
+*/
+#ifdef __cplusplus
+extern "C" {
+#endif
+float pad_crop(float *A, float *Ap, int OldSizeX, int OldSizeY, int OldSizeZ, int NewSizeX, int NewSizeY, int NewSizeZ, int padXY, int switchpad_crop);
+float PB_FUNC2D(float *A, float *B, int dimX, int dimY, int padXY, int SearchW, int SimilW, float h, float lambda);
+float PB_FUNC3D(float *A, float *B, int dimX, int dimY, int dimZ, int padXY, int SearchW, int SimilW, float h, float lambda);
+#ifdef __cplusplus
+}
+#endif
\ No newline at end of file
diff --git a/Wrappers/Matlab/mex_compile/regularizers_CPU/SplitBregman_TV.c b/Wrappers/Matlab/mex_compile/regularizers_CPU/SplitBregman_TV.c
new file mode 100644
index 0000000..38f6a9d
--- /dev/null
+++ b/Wrappers/Matlab/mex_compile/regularizers_CPU/SplitBregman_TV.c
@@ -0,0 +1,179 @@
+/*
+This work is part of the Core Imaging Library developed by
+Visual Analytics and Imaging System Group of the Science Technology
+Facilities Council, STFC
+
+Copyright 2017 Daniil Kazantsev
+Copyright 2017 Srikanth Nagella, Edoardo Pasca
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#include "mex.h"
+#include <matrix.h>
+#include "SplitBregman_TV_core.h"
+
+/* C-OMP implementation of Split Bregman - TV denoising-regularization model (2D/3D)
+ *
+ * Input Parameters:
+ * 1. Noisy image/volume
+ * 2. lambda - regularization parameter
+ * 3. Number of iterations [OPTIONAL parameter]
+ * 4. eplsilon - tolerance constant [OPTIONAL parameter]
+ * 5. TV-type: 'iso' or 'l1' [OPTIONAL parameter]
+ *
+ * Output:
+ * Filtered/regularized image
+ *
+ * Example:
+ * figure;
+ * Im = double(imread('lena_gray_256.tif'))/255;  % loading image
+ * u0 = Im + .05*randn(size(Im)); u0(u0 < 0) = 0;
+ * u = SplitBregman_TV(single(u0), 10, 30, 1e-04);
+ *
+ * to compile with OMP support: mex SplitBregman_TV.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
+ * References:
+ * The Split Bregman Method for L1 Regularized Problems, by Tom Goldstein and Stanley Osher.
+ * D. Kazantsev, 2016*
+ */
+
+
+void mexFunction(
+        int nlhs, mxArray *plhs[],
+        int nrhs, const mxArray *prhs[])
+        
+{
+    int number_of_dims, iter, dimX, dimY, dimZ, ll, j, count, methTV;
+    const int  *dim_array;
+    float *A, *U=NULL, *U_old=NULL, *Dx=NULL, *Dy=NULL, *Dz=NULL, *Bx=NULL, *By=NULL, *Bz=NULL, lambda, mu, epsil, re, re1, re_old;
+    
+    number_of_dims = mxGetNumberOfDimensions(prhs[0]);
+    dim_array = mxGetDimensions(prhs[0]);
+    
+    /*Handling Matlab input data*/
+    if ((nrhs < 2) || (nrhs > 5)) mexErrMsgTxt("At least 2 parameters is required: Image(2D/3D), Regularization parameter. The full list of parameters: Image(2D/3D), Regularization parameter, iterations number, tolerance, penalty type ('iso' or 'l1')");
+    
+    /*Handling Matlab input data*/
+    A  = (float *) mxGetData(prhs[0]); /*noisy image (2D/3D) */
+    mu =  (float) mxGetScalar(prhs[1]); /* regularization parameter */
+    iter = 35; /* default iterations number */
+    epsil = 0.0001; /* default tolerance constant */
+    methTV = 0;  /* default isotropic TV penalty */
+    if ((nrhs == 3) || (nrhs == 4) || (nrhs == 5))  iter = (int) mxGetScalar(prhs[2]); /* iterations number */
+    if ((nrhs == 4) || (nrhs == 5))  epsil =  (float) mxGetScalar(prhs[3]); /* tolerance constant */
+    if (nrhs == 5)  {
+        char *penalty_type;
+        penalty_type = mxArrayToString(prhs[4]); /* choosing TV penalty: 'iso' or 'l1', 'iso' is the default */
+        if ((strcmp(penalty_type, "l1") != 0) && (strcmp(penalty_type, "iso") != 0)) mexErrMsgTxt("Choose TV type: 'iso' or 'l1',");
+        if (strcmp(penalty_type, "l1") == 0)  methTV = 1;  /* enable 'l1' penalty */
+        mxFree(penalty_type);
+    }
+    if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); }
+    
+    lambda = 2.0f*mu;
+    count = 1;
+    re_old = 0.0f;
+    /*Handling Matlab output data*/
+    dimY = dim_array[0]; dimX = dim_array[1]; dimZ = dim_array[2];
+    
+    if (number_of_dims == 2) {
+        dimZ = 1; /*2D case*/
+        U = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+        U_old = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+        Dx = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+        Dy = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+        Bx = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+        By = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+        
+        copyIm(A, U, dimX, dimY, dimZ); /*initialize */
+        
+        /* begin outer SB iterations */
+        for(ll=0; ll<iter; ll++) {
+            
+            /*storing old values*/
+            copyIm(U, U_old, dimX, dimY, dimZ);
+            
+            /*GS iteration */
+            gauss_seidel2D(U, A, Dx, Dy, Bx, By, dimX, dimY, lambda, mu);
+            
+            if (methTV == 1)  updDxDy_shrinkAniso2D(U, Dx, Dy, Bx, By, dimX, dimY, lambda);
+            else updDxDy_shrinkIso2D(U, Dx, Dy, Bx, By, dimX, dimY, lambda);
+            
+            updBxBy2D(U, Dx, Dy, Bx, By, dimX, dimY);
+            
+            /* calculate norm to terminate earlier */
+            re = 0.0f; re1 = 0.0f;
+            for(j=0; j<dimX*dimY*dimZ; j++)
+            {
+                re += pow(U_old[j] - U[j],2);
+                re1 += pow(U_old[j],2);
+            }
+            re = sqrt(re)/sqrt(re1);
+            if (re < epsil)  count++;
+            if (count > 4) break;
+            
+            /* check that the residual norm is decreasing */
+            if (ll > 2) {
+                if (re > re_old) break;
+            }
+            re_old = re;
+            /*printf("%f %i %i \n", re, ll, count); */
+            
+            /*copyIm(U_old, U, dimX, dimY, dimZ); */
+        }
+        printf("SB iterations stopped at iteration: %i\n", ll);
+    }
+    if (number_of_dims == 3) {
+        U = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+        U_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+        Dx = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+        Dy = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+        Dz = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+        Bx = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+        By = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+        Bz = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+        
+        copyIm(A, U, dimX, dimY, dimZ); /*initialize */
+        
+        /* begin outer SB iterations */
+        for(ll=0; ll<iter; ll++) {
+            
+            /*storing old values*/
+            copyIm(U, U_old, dimX, dimY, dimZ);
+            
+            /*GS iteration */
+            gauss_seidel3D(U, A, Dx, Dy, Dz, Bx, By, Bz, dimX, dimY, dimZ, lambda, mu);
+            
+            if (methTV == 1) updDxDyDz_shrinkAniso3D(U, Dx, Dy, Dz, Bx, By, Bz, dimX, dimY, dimZ, lambda);
+            else updDxDyDz_shrinkIso3D(U, Dx, Dy, Dz, Bx, By, Bz, dimX, dimY, dimZ, lambda);
+            
+            updBxByBz3D(U, Dx, Dy, Dz, Bx, By, Bz, dimX, dimY, dimZ);
+            
+            /* calculate norm to terminate earlier */
+            re = 0.0f; re1 = 0.0f;
+            for(j=0; j<dimX*dimY*dimZ; j++)
+            {
+                re += pow(U[j] - U_old[j],2);
+                re1 += pow(U[j],2);
+            }
+            re = sqrt(re)/sqrt(re1);
+            if (re < epsil)  count++;
+            if (count > 4) break;
+            
+            /* check that the residual norm is decreasing */
+            if (ll > 2) {
+                if (re > re_old) break; }
+            /*printf("%f %i %i \n", re, ll, count); */
+            re_old = re;
+        }
+        printf("SB iterations stopped at iteration: %i\n", ll);
+    }
+}
\ No newline at end of file
diff --git a/Wrappers/Matlab/mex_compile/regularizers_CPU/SplitBregman_TV_core.c b/Wrappers/Matlab/mex_compile/regularizers_CPU/SplitBregman_TV_core.c
new file mode 100644
index 0000000..4109a4b
--- /dev/null
+++ b/Wrappers/Matlab/mex_compile/regularizers_CPU/SplitBregman_TV_core.c
@@ -0,0 +1,259 @@
+/*
+This work is part of the Core Imaging Library developed by
+Visual Analytics and Imaging System Group of the Science Technology
+Facilities Council, STFC
+
+Copyright 2017 Daniil Kazantsev
+Copyright 2017 Srikanth Nagella, Edoardo Pasca
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#include "SplitBregman_TV_core.h"
+
+/* C-OMP implementation of Split Bregman - TV denoising-regularization model (2D/3D)
+*
+* Input Parameters:
+* 1. Noisy image/volume
+* 2. lambda - regularization parameter
+* 3. Number of iterations [OPTIONAL parameter]
+* 4. eplsilon - tolerance constant [OPTIONAL parameter]
+* 5. TV-type: 'iso' or 'l1' [OPTIONAL parameter]
+*
+* Output:
+* Filtered/regularized image
+*
+* Example:
+* figure;
+* Im = double(imread('lena_gray_256.tif'))/255;  % loading image
+* u0 = Im + .05*randn(size(Im)); u0(u0 < 0) = 0;
+* u = SplitBregman_TV(single(u0), 10, 30, 1e-04);
+*
+* References:
+* The Split Bregman Method for L1 Regularized Problems, by Tom Goldstein and Stanley Osher.
+* D. Kazantsev, 2016*
+*/
+
+
+/* 2D-case related Functions */
+/*****************************************************************/
+float gauss_seidel2D(float *U, float *A, float *Dx, float *Dy, float *Bx, float *By, int dimX, int dimY, float lambda, float mu)
+{
+    float sum, normConst;
+    int i,j,i1,i2,j1,j2;
+    normConst = 1.0f/(mu + 4.0f*lambda);
+    
+#pragma omp parallel for shared(U) private(i,j,i1,i2,j1,j2,sum)
+    for(i=0; i<dimX; i++) {
+        /* symmetric boundary conditions (Neuman) */
+        i1 = i+1; if (i1 == dimX) i1 = i-1;
+        i2 = i-1; if (i2 < 0) i2 = i+1;
+        for(j=0; j<dimY; j++) {
+            /* symmetric boundary conditions (Neuman) */
+            j1 = j+1; if (j1 == dimY) j1 = j-1;
+            j2 = j-1; if (j2 < 0) j2 = j+1;
+            
+            sum = Dx[(i2)*dimY + (j)] - Dx[(i)*dimY + (j)] + Dy[(i)*dimY + (j2)] - Dy[(i)*dimY + (j)] - Bx[(i2)*dimY + (j)] + Bx[(i)*dimY + (j)] - By[(i)*dimY + (j2)] + By[(i)*dimY + (j)];
+            sum += (U[(i1)*dimY + (j)] + U[(i2)*dimY + (j)] + U[(i)*dimY + (j1)] + U[(i)*dimY + (j2)]);
+            sum *= lambda;
+            sum += mu*A[(i)*dimY + (j)];
+            U[(i)*dimY + (j)] = normConst*sum;
+        }}
+    return *U;
+}
+
+float updDxDy_shrinkAniso2D(float *U, float *Dx, float *Dy, float *Bx, float *By, int dimX, int dimY, float lambda)
+{
+    int i,j,i1,j1;
+    float val1, val11, val2, val22, denom_lam;
+    denom_lam = 1.0f/lambda;
+#pragma omp parallel for shared(U,denom_lam) private(i,j,i1,j1,val1,val11,val2,val22)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+            /* symmetric boundary conditions (Neuman) */
+            i1 = i+1; if (i1 == dimX) i1 = i-1;
+            j1 = j+1; if (j1 == dimY) j1 = j-1;
+            
+            val1 = (U[(i1)*dimY + (j)] - U[(i)*dimY + (j)]) + Bx[(i)*dimY + (j)];
+            val2 = (U[(i)*dimY + (j1)] - U[(i)*dimY + (j)]) + By[(i)*dimY + (j)];
+            
+            val11 = fabs(val1) - denom_lam; if (val11 < 0) val11 = 0;
+            val22 = fabs(val2) - denom_lam; if (val22 < 0) val22 = 0;
+            
+            if (val1 !=0) Dx[(i)*dimY + (j)] = (val1/fabs(val1))*val11; else Dx[(i)*dimY + (j)] = 0;
+            if (val2 !=0) Dy[(i)*dimY + (j)] = (val2/fabs(val2))*val22; else Dy[(i)*dimY + (j)] = 0;
+            
+        }}
+    return 1;
+}
+float updDxDy_shrinkIso2D(float *U, float *Dx, float *Dy, float *Bx, float *By, int dimX, int dimY, float lambda)
+{
+    int i,j,i1,j1;
+    float val1, val11, val2, denom, denom_lam;
+    denom_lam = 1.0f/lambda;
+    
+#pragma omp parallel for shared(U,denom_lam) private(i,j,i1,j1,val1,val11,val2,denom)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+            /* symmetric boundary conditions (Neuman) */
+            i1 = i+1; if (i1 == dimX) i1 = i-1;
+            j1 = j+1; if (j1 == dimY) j1 = j-1;
+            
+            val1 = (U[(i1)*dimY + (j)] - U[(i)*dimY + (j)]) + Bx[(i)*dimY + (j)];
+            val2 = (U[(i)*dimY + (j1)] - U[(i)*dimY + (j)]) + By[(i)*dimY + (j)];
+            
+            denom = sqrt(val1*val1 + val2*val2);
+            
+            val11 = (denom - denom_lam); if (val11 < 0) val11 = 0.0f;
+            
+            if (denom != 0.0f) {
+                Dx[(i)*dimY + (j)] = val11*(val1/denom);
+                Dy[(i)*dimY + (j)] = val11*(val2/denom);
+            }
+            else {
+                Dx[(i)*dimY + (j)] = 0;
+                Dy[(i)*dimY + (j)] = 0;
+            }
+        }}
+    return 1;
+}
+float updBxBy2D(float *U, float *Dx, float *Dy, float *Bx, float *By, int dimX, int dimY)
+{
+    int i,j,i1,j1;
+#pragma omp parallel for shared(U) private(i,j,i1,j1)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+            /* symmetric boundary conditions (Neuman) */
+            i1 = i+1; if (i1 == dimX) i1 = i-1;
+            j1 = j+1; if (j1 == dimY) j1 = j-1;
+            
+            Bx[(i)*dimY + (j)] = Bx[(i)*dimY + (j)] + ((U[(i1)*dimY + (j)] - U[(i)*dimY + (j)]) - Dx[(i)*dimY + (j)]);
+            By[(i)*dimY + (j)] = By[(i)*dimY + (j)] + ((U[(i)*dimY + (j1)] - U[(i)*dimY + (j)]) - Dy[(i)*dimY + (j)]);
+        }}
+    return 1;
+}
+
+
+/* 3D-case related Functions */
+/*****************************************************************/
+float gauss_seidel3D(float *U, float *A, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, int dimX, int dimY, int dimZ, float lambda, float mu)
+{
+    float normConst, d_val, b_val, sum;
+    int i,j,i1,i2,j1,j2,k,k1,k2;
+    normConst = 1.0f/(mu + 6.0f*lambda);
+#pragma omp parallel for shared(U) private(i,j,i1,i2,j1,j2,k,k1,k2,d_val,b_val,sum)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+            for(k=0; k<dimZ; k++) {
+                /* symmetric boundary conditions (Neuman) */
+                i1 = i+1; if (i1 == dimX) i1 = i-1;
+                i2 = i-1; if (i2 < 0) i2 = i+1;
+                j1 = j+1; if (j1 == dimY) j1 = j-1;
+                j2 = j-1; if (j2 < 0) j2 = j+1;
+                k1 = k+1; if (k1 == dimZ) k1 = k-1;
+                k2 = k-1; if (k2 < 0) k2 = k+1;
+                
+                d_val = Dx[(dimX*dimY)*k + (i2)*dimY + (j)] - Dx[(dimX*dimY)*k + (i)*dimY + (j)] + Dy[(dimX*dimY)*k + (i)*dimY + (j2)] - Dy[(dimX*dimY)*k + (i)*dimY + (j)] + Dz[(dimX*dimY)*k2 + (i)*dimY + (j)] - Dz[(dimX*dimY)*k + (i)*dimY + (j)];
+                b_val = -Bx[(dimX*dimY)*k + (i2)*dimY + (j)] + Bx[(dimX*dimY)*k + (i)*dimY + (j)] - By[(dimX*dimY)*k + (i)*dimY + (j2)] + By[(dimX*dimY)*k + (i)*dimY + (j)] - Bz[(dimX*dimY)*k2 + (i)*dimY + (j)] + Bz[(dimX*dimY)*k + (i)*dimY + (j)];
+                sum =  d_val + b_val;
+                sum += U[(dimX*dimY)*k + (i1)*dimY + (j)] + U[(dimX*dimY)*k + (i2)*dimY + (j)] + U[(dimX*dimY)*k + (i)*dimY + (j1)] + U[(dimX*dimY)*k + (i)*dimY + (j2)] + U[(dimX*dimY)*k1 + (i)*dimY + (j)] + U[(dimX*dimY)*k2 + (i)*dimY + (j)];
+                sum *= lambda;
+                sum += mu*A[(dimX*dimY)*k + (i)*dimY + (j)];
+                U[(dimX*dimY)*k + (i)*dimY + (j)] = normConst*sum;
+            }}}
+    return *U;
+}
+
+float updDxDyDz_shrinkAniso3D(float *U, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, int dimX, int dimY, int dimZ, float lambda)
+{
+    int i,j,i1,j1,k,k1,index;
+    float val1, val11, val2, val22, val3, val33, denom_lam;
+    denom_lam = 1.0f/lambda;
+#pragma omp parallel for shared(U,denom_lam) private(index,i,j,i1,j1,k,k1,val1,val11,val2,val22,val3,val33)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+            for(k=0; k<dimZ; k++) {
+                index = (dimX*dimY)*k + (i)*dimY + (j);
+                /* symmetric boundary conditions (Neuman) */
+                i1 = i+1; if (i1 == dimX) i1 = i-1;
+                j1 = j+1; if (j1 == dimY) j1 = j-1;
+                k1 = k+1; if (k1 == dimZ) k1 = k-1;
+                
+                val1 = (U[(dimX*dimY)*k + (i1)*dimY + (j)] - U[index]) + Bx[index];
+                val2 = (U[(dimX*dimY)*k + (i)*dimY + (j1)] - U[index]) + By[index];
+                val3 = (U[(dimX*dimY)*k1 + (i)*dimY + (j)] - U[index]) + Bz[index];
+                
+                val11 = fabs(val1) - denom_lam; if (val11 < 0) val11 = 0;
+                val22 = fabs(val2) - denom_lam; if (val22 < 0) val22 = 0;
+                val33 = fabs(val3) - denom_lam; if (val33 < 0) val33 = 0;
+                
+                if (val1 !=0) Dx[index] = (val1/fabs(val1))*val11; else Dx[index] = 0;
+                if (val2 !=0) Dy[index] = (val2/fabs(val2))*val22; else Dy[index] = 0;
+                if (val3 !=0) Dz[index] = (val3/fabs(val3))*val33; else Dz[index] = 0;
+                
+            }}}
+    return 1;
+}
+float updDxDyDz_shrinkIso3D(float *U, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, int dimX, int dimY, int dimZ, float lambda)
+{
+    int i,j,i1,j1,k,k1,index;
+    float val1, val11, val2, val3, denom, denom_lam;
+    denom_lam = 1.0f/lambda;
+#pragma omp parallel for shared(U,denom_lam) private(index,denom,i,j,i1,j1,k,k1,val1,val11,val2,val3)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+            for(k=0; k<dimZ; k++) {
+                index = (dimX*dimY)*k + (i)*dimY + (j);
+                /* symmetric boundary conditions (Neuman) */
+                i1 = i+1; if (i1 == dimX) i1 = i-1;
+                j1 = j+1; if (j1 == dimY) j1 = j-1;
+                k1 = k+1; if (k1 == dimZ) k1 = k-1;
+                
+                val1 = (U[(dimX*dimY)*k + (i1)*dimY + (j)] - U[index]) + Bx[index];
+                val2 = (U[(dimX*dimY)*k + (i)*dimY + (j1)] - U[index]) + By[index];
+                val3 = (U[(dimX*dimY)*k1 + (i)*dimY + (j)] - U[index]) + Bz[index];
+                
+                denom = sqrt(val1*val1 + val2*val2 + val3*val3);
+                
+                val11 = (denom - denom_lam); if (val11 < 0) val11 = 0.0f;
+                
+                if (denom != 0.0f) {
+                    Dx[index] = val11*(val1/denom);
+                    Dy[index] = val11*(val2/denom);
+                    Dz[index] = val11*(val3/denom);
+                }
+                else {
+                    Dx[index] = 0;
+                    Dy[index] = 0;
+                    Dz[index] = 0;
+                }               
+            }}}
+    return 1;
+}
+float updBxByBz3D(float *U, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, int dimX, int dimY, int dimZ)
+{
+    int i,j,k,i1,j1,k1;
+#pragma omp parallel for shared(U) private(i,j,k,i1,j1,k1)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+            for(k=0; k<dimZ; k++) {
+                /* symmetric boundary conditions (Neuman) */
+                i1 = i+1; if (i1 == dimX) i1 = i-1;
+                j1 = j+1; if (j1 == dimY) j1 = j-1;
+                k1 = k+1; if (k1 == dimZ) k1 = k-1;
+                
+                Bx[(dimX*dimY)*k + (i)*dimY + (j)] = Bx[(dimX*dimY)*k + (i)*dimY + (j)] + ((U[(dimX*dimY)*k + (i1)*dimY + (j)] - U[(dimX*dimY)*k + (i)*dimY + (j)]) - Dx[(dimX*dimY)*k + (i)*dimY + (j)]);
+                By[(dimX*dimY)*k + (i)*dimY + (j)] = By[(dimX*dimY)*k + (i)*dimY + (j)] + ((U[(dimX*dimY)*k + (i)*dimY + (j1)] - U[(dimX*dimY)*k + (i)*dimY + (j)]) - Dy[(dimX*dimY)*k + (i)*dimY + (j)]);
+                Bz[(dimX*dimY)*k + (i)*dimY + (j)] = Bz[(dimX*dimY)*k + (i)*dimY + (j)] + ((U[(dimX*dimY)*k1 + (i)*dimY + (j)] - U[(dimX*dimY)*k + (i)*dimY + (j)]) - Dz[(dimX*dimY)*k + (i)*dimY + (j)]);
+                
+            }}}
+    return 1;
+}
diff --git a/Wrappers/Matlab/mex_compile/regularizers_CPU/SplitBregman_TV_core.h b/Wrappers/Matlab/mex_compile/regularizers_CPU/SplitBregman_TV_core.h
new file mode 100644
index 0000000..6ed3ff9
--- /dev/null
+++ b/Wrappers/Matlab/mex_compile/regularizers_CPU/SplitBregman_TV_core.h
@@ -0,0 +1,69 @@
+/*
+This work is part of the Core Imaging Library developed by
+Visual Analytics and Imaging System Group of the Science Technology
+Facilities Council, STFC
+
+Copyright 2017 Daniil Kazantsev
+Copyright 2017 Srikanth Nagella, Edoardo Pasca
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+//#include <matrix.h>
+#include <math.h>
+#include <stdlib.h>
+#include <memory.h>
+#include <stdio.h>
+#include "omp.h"
+
+#include "utils.h"
+
+/* C-OMP implementation of Split Bregman - TV denoising-regularization model (2D/3D)
+*
+* Input Parameters:
+* 1. Noisy image/volume
+* 2. lambda - regularization parameter
+* 3. Number of iterations [OPTIONAL parameter]
+* 4. eplsilon - tolerance constant [OPTIONAL parameter]
+* 5. TV-type: 'iso' or 'l1' [OPTIONAL parameter]
+*
+* Output:
+* Filtered/regularized image
+*
+* Example:
+* figure;
+* Im = double(imread('lena_gray_256.tif'))/255;  % loading image
+* u0 = Im + .05*randn(size(Im)); u0(u0 < 0) = 0;
+* u = SplitBregman_TV(single(u0), 10, 30, 1e-04);
+*
+* to compile with OMP support: mex SplitBregman_TV.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
+* References:
+* The Split Bregman Method for L1 Regularized Problems, by Tom Goldstein and Stanley Osher.
+* D. Kazantsev, 2016*
+*/
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+//float copyIm(float *A, float *B, int dimX, int dimY, int dimZ);
+float gauss_seidel2D(float *U, float *A, float *Dx, float *Dy, float *Bx, float *By, int dimX, int dimY, float lambda, float mu);
+float updDxDy_shrinkAniso2D(float *U, float *Dx, float *Dy, float *Bx, float *By, int dimX, int dimY, float lambda);
+float updDxDy_shrinkIso2D(float *U, float *Dx, float *Dy, float *Bx, float *By, int dimX, int dimY, float lambda);
+float updBxBy2D(float *U, float *Dx, float *Dy, float *Bx, float *By, int dimX, int dimY);
+
+float gauss_seidel3D(float *U, float *A, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, int dimX, int dimY, int dimZ, float lambda, float mu);
+float updDxDyDz_shrinkAniso3D(float *U, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, int dimX, int dimY, int dimZ, float lambda);
+float updDxDyDz_shrinkIso3D(float *U, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, int dimX, int dimY, int dimZ, float lambda);
+float updBxByBz3D(float *U, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, int dimX, int dimY, int dimZ);
+
+#ifdef __cplusplus
+}
+#endif
\ No newline at end of file
diff --git a/Wrappers/Matlab/mex_compile/regularizers_CPU/TGV_PD.c b/Wrappers/Matlab/mex_compile/regularizers_CPU/TGV_PD.c
new file mode 100644
index 0000000..c9cb440
--- /dev/null
+++ b/Wrappers/Matlab/mex_compile/regularizers_CPU/TGV_PD.c
@@ -0,0 +1,144 @@
+/*
+This work is part of the Core Imaging Library developed by
+Visual Analytics and Imaging System Group of the Science Technology
+Facilities Council, STFC
+
+Copyright 2017 Daniil Kazantsev
+Copyright 2017 Srikanth Nagella, Edoardo Pasca
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#include "TGV_PD_core.h"
+#include "mex.h"
+
+/* C-OMP implementation of Primal-Dual denoising method for 
+ * Total Generilized Variation (TGV)-L2 model (2D case only)
+ *
+ * Input Parameters:
+ * 1. Noisy image/volume (2D)
+ * 2. lambda - regularization parameter
+ * 3. parameter to control first-order term (alpha1)
+ * 4. parameter to control the second-order term (alpha0)
+ * 5. Number of CP iterations
+ *
+ * Output:
+ * Filtered/regularized image 
+ *
+ * Example:
+ * figure;
+ * Im = double(imread('lena_gray_256.tif'))/255;  % loading image
+ * u0 = Im + .03*randn(size(Im)); % adding noise
+ * tic; u = TGV_PD(single(u0), 0.02, 1.3, 1, 550); toc;
+ *
+ * to compile with OMP support: mex TGV_PD.c TGV_PD_core.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
+ * References:
+ * K. Bredies "Total Generalized Variation"
+ *
+ * 28.11.16/Harwell
+ */
+ 
+void mexFunction(
+        int nlhs, mxArray *plhs[],
+        int nrhs, const mxArray *prhs[])
+        
+{
+    int number_of_dims, iter, dimX, dimY, dimZ, ll;
+    const int  *dim_array;
+    float *A, *U, *U_old, *P1, *P2, *Q1, *Q2, *Q3, *V1, *V1_old, *V2, *V2_old, lambda, L2, tau, sigma,  alpha1, alpha0;
+    
+    number_of_dims = mxGetNumberOfDimensions(prhs[0]);
+    dim_array = mxGetDimensions(prhs[0]);
+    
+    /*Handling Matlab input data*/
+    A  = (float *) mxGetData(prhs[0]); /*origanal noise image/volume*/
+    if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input in single precision is required"); }
+    lambda =  (float) mxGetScalar(prhs[1]); /*regularization parameter*/
+    alpha1 =  (float) mxGetScalar(prhs[2]); /*first-order term*/
+    alpha0 =  (float) mxGetScalar(prhs[3]); /*second-order term*/
+    iter =  (int) mxGetScalar(prhs[4]); /*iterations number*/
+    if(nrhs != 5) mexErrMsgTxt("Five input parameters is reqired: Image(2D/3D), Regularization parameter, alpha1, alpha0, Iterations");
+    
+    /*Handling Matlab output data*/
+    dimX = dim_array[0]; dimY = dim_array[1];
+    
+    if (number_of_dims == 2) {
+        /*2D case*/
+        dimZ = 1;
+        U = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+        
+        /*dual variables*/
+        P1 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+        P2 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+        
+        Q1 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+        Q2 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+        Q3 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+        
+        U_old = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+        
+        V1 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+        V1_old = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+        V2 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+        V2_old = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));   
+        
+        
+         /*printf("%i \n", i);*/
+    L2 = 12.0f; /*Lipshitz constant*/
+    tau = 1.0/pow(L2,0.5);
+    sigma = 1.0/pow(L2,0.5);
+    
+    /*Copy A to U*/
+    copyIm(A, U, dimX, dimY, dimZ);
+    
+        /* Here primal-dual iterations begin for 2D */
+        for(ll = 0; ll < iter; ll++) {
+            
+            /* Calculate Dual Variable P */
+            DualP_2D(U, V1, V2, P1, P2, dimX, dimY, dimZ, sigma);
+            
+            /*Projection onto convex set for P*/
+            ProjP_2D(P1, P2, dimX, dimY, dimZ, alpha1);
+            
+            /* Calculate Dual Variable Q */
+            DualQ_2D(V1, V2, Q1, Q2, Q3, dimX, dimY, dimZ, sigma);
+            
+            /*Projection onto convex set for Q*/
+            ProjQ_2D(Q1, Q2, Q3, dimX, dimY, dimZ, alpha0);
+            
+            /*saving U into U_old*/
+            copyIm(U, U_old, dimX, dimY, dimZ);
+            
+            /*adjoint operation  -> divergence and projection of P*/
+            DivProjP_2D(U, A, P1, P2, dimX, dimY, dimZ, lambda, tau);
+            
+            /*get updated solution U*/
+            newU(U, U_old, dimX, dimY, dimZ);
+            
+            /*saving V into V_old*/
+            copyIm(V1, V1_old, dimX, dimY, dimZ);
+            copyIm(V2, V2_old, dimX, dimY, dimZ);
+            
+            /* upd V*/
+            UpdV_2D(V1, V2, P1, P2, Q1, Q2, Q3, dimX, dimY, dimZ, tau);
+            
+            /*get new V*/
+            newU(V1, V1_old, dimX, dimY, dimZ);
+            newU(V2, V2_old, dimX, dimY, dimZ);
+        } /*end of iterations*/        
+    }
+    else if (number_of_dims == 3) {
+        mexErrMsgTxt("The input data should be a 2D array");
+        /*3D case*/
+    }
+    else {mexErrMsgTxt("The input data should be a 2D array");}    
+   
+}
diff --git a/Wrappers/Matlab/mex_compile/regularizers_CPU/TGV_PD_core.c b/Wrappers/Matlab/mex_compile/regularizers_CPU/TGV_PD_core.c
new file mode 100644
index 0000000..4139d10
--- /dev/null
+++ b/Wrappers/Matlab/mex_compile/regularizers_CPU/TGV_PD_core.c
@@ -0,0 +1,208 @@
+/*
+This work is part of the Core Imaging Library developed by
+Visual Analytics and Imaging System Group of the Science Technology
+Facilities Council, STFC
+
+Copyright 2017 Daniil Kazanteev
+Copyright 2017 Srikanth Nagella, Edoardo Pasca
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#include "TGV_PD_core.h"
+
+/* C-OMP implementation of Primal-Dual denoising method for 
+ * Total Generilized Variation (TGV)-L2 model (2D case only)
+ *
+ * Input Parameters:
+ * 1. Noisy image/volume (2D)
+ * 2. lambda - regularization parameter
+ * 3. parameter to control first-order term (alpha1)
+ * 4. parameter to control the second-order term (alpha0)
+ * 5. Number of CP iterations
+ *
+ * Output:
+ * Filtered/regularized image 
+ *
+ * Example:
+ * figure;
+ * Im = double(imread('lena_gray_256.tif'))/255;  % loading image
+ * u0 = Im + .03*randn(size(Im)); % adding noise
+ * tic; u = PrimalDual_TGV(single(u0), 0.02, 1.3, 1, 550); toc;
+ *
+ * References:
+ * K. Bredies "Total Generalized Variation"
+ *
+ * 28.11.16/Harwell
+ */
+ 
+
+
+
+/*Calculating dual variable P (using forward differences)*/
+float DualP_2D(float *U, float *V1, float *V2, float *P1, float *P2, int dimX, int dimY, int dimZ, float sigma)
+{
+    int i,j;
+#pragma omp parallel for shared(U,V1,V2,P1,P2) private(i,j)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+            /* symmetric boundary conditions (Neuman) */
+            if (i == dimX-1) P1[i*dimY + (j)] = P1[i*dimY + (j)] + sigma*((U[(i-1)*dimY + (j)] - U[i*dimY + (j)])  - V1[i*dimY + (j)]);
+            else P1[i*dimY + (j)] = P1[i*dimY + (j)] + sigma*((U[(i + 1)*dimY + (j)] - U[i*dimY + (j)])  - V1[i*dimY + (j)]);
+            if (j == dimY-1) P2[i*dimY + (j)] = P2[i*dimY + (j)] + sigma*((U[(i)*dimY + (j-1)] - U[i*dimY + (j)])  - V2[i*dimY + (j)]);
+            else  P2[i*dimY + (j)] = P2[i*dimY + (j)] + sigma*((U[(i)*dimY + (j+1)] - U[i*dimY + (j)])  - V2[i*dimY + (j)]);
+        }}
+    return 1;
+}
+/*Projection onto convex set for P*/
+float ProjP_2D(float *P1, float *P2, int dimX, int dimY, int dimZ, float alpha1)
+{
+    float grad_magn;
+    int i,j;
+#pragma omp parallel for shared(P1,P2) private(i,j,grad_magn)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+            grad_magn = sqrt(pow(P1[i*dimY + (j)],2) + pow(P2[i*dimY + (j)],2));
+            grad_magn = grad_magn/alpha1;
+            if (grad_magn > 1.0) {
+                P1[i*dimY + (j)] = P1[i*dimY + (j)]/grad_magn;
+                P2[i*dimY + (j)] = P2[i*dimY + (j)]/grad_magn;
+            }
+        }}
+    return 1;
+}
+/*Calculating dual variable Q (using forward differences)*/
+float DualQ_2D(float *V1, float *V2, float *Q1, float *Q2, float *Q3, int dimX, int dimY, int dimZ, float sigma)
+{
+    int i,j;
+    float q1, q2, q11, q22;
+#pragma omp parallel for shared(Q1,Q2,Q3,V1,V2) private(i,j,q1,q2,q11,q22)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+            /* symmetric boundary conditions (Neuman) */
+            if (i == dimX-1)
+            { q1 = (V1[(i-1)*dimY + (j)] - V1[i*dimY + (j)]);
+              q11 = (V2[(i-1)*dimY + (j)] - V2[i*dimY + (j)]);
+            }
+            else {
+                q1 = (V1[(i+1)*dimY + (j)] - V1[i*dimY + (j)]);
+                q11 = (V2[(i+1)*dimY + (j)] - V2[i*dimY + (j)]);
+            }
+            if (j == dimY-1) {
+                q2 = (V2[(i)*dimY + (j-1)] - V2[i*dimY + (j)]);
+                q22 = (V1[(i)*dimY + (j-1)] - V1[i*dimY + (j)]);
+            }
+            else {
+                q2 = (V2[(i)*dimY + (j+1)] - V2[i*dimY + (j)]);
+                q22 = (V1[(i)*dimY + (j+1)] - V1[i*dimY + (j)]);
+            }
+            Q1[i*dimY + (j)] = Q1[i*dimY + (j)] + sigma*(q1);
+            Q2[i*dimY + (j)] = Q2[i*dimY + (j)] + sigma*(q2);
+            Q3[i*dimY + (j)] = Q3[i*dimY + (j)]  + sigma*(0.5f*(q11 + q22));
+        }}
+    return 1;
+}
+
+float ProjQ_2D(float *Q1, float *Q2, float *Q3, int dimX, int dimY, int dimZ, float alpha0)
+{
+    float grad_magn;
+    int i,j;
+#pragma omp parallel for shared(Q1,Q2,Q3) private(i,j,grad_magn)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+            grad_magn = sqrt(pow(Q1[i*dimY + (j)],2) + pow(Q2[i*dimY + (j)],2) + 2*pow(Q3[i*dimY + (j)],2));
+            grad_magn = grad_magn/alpha0;
+            if (grad_magn > 1.0) {
+                Q1[i*dimY + (j)] = Q1[i*dimY + (j)]/grad_magn;
+                Q2[i*dimY + (j)] = Q2[i*dimY + (j)]/grad_magn;
+                Q3[i*dimY + (j)] = Q3[i*dimY + (j)]/grad_magn;
+            }
+        }}
+    return 1;
+}
+/* Divergence and projection for P*/
+float DivProjP_2D(float *U, float *A, float *P1, float *P2, int dimX, int dimY, int dimZ, float lambda, float tau)
+{
+    int i,j;
+    float P_v1, P_v2, div;
+#pragma omp parallel for shared(U,A,P1,P2) private(i,j,P_v1,P_v2,div)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+            if (i == 0) P_v1 = (P1[i*dimY + (j)]);
+            else P_v1 = (P1[i*dimY + (j)] - P1[(i-1)*dimY + (j)]);
+            if (j == 0) P_v2 = (P2[i*dimY + (j)]);
+            else  P_v2 = (P2[i*dimY + (j)] - P2[(i)*dimY + (j-1)]);
+            div = P_v1 + P_v2;
+            U[i*dimY + (j)] = (lambda*(U[i*dimY + (j)] + tau*div) + tau*A[i*dimY + (j)])/(lambda + tau);
+        }}
+    return *U;
+}
+/*get updated solution U*/
+float newU(float *U, float *U_old, int dimX, int dimY, int dimZ)
+{
+    int i;
+#pragma omp parallel for shared(U,U_old) private(i)
+    for(i=0; i<dimX*dimY*dimZ; i++) U[i] = 2*U[i] - U_old[i];
+    return *U;
+}
+
+/*get update for V*/
+float UpdV_2D(float *V1, float *V2, float *P1, float *P2, float *Q1, float *Q2, float *Q3, int dimX, int dimY, int dimZ, float tau)
+{
+    int i,j;
+    float q1, q11, q2, q22, div1, div2;
+#pragma omp parallel for shared(V1,V2,P1,P2,Q1,Q2,Q3) private(i,j, q1, q11, q2, q22, div1, div2)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+            /* symmetric boundary conditions (Neuman) */
+            if (i == 0) {
+                q1 = (Q1[i*dimY + (j)]);
+                q11 = (Q3[i*dimY + (j)]);
+            }
+            else {
+                q1 = (Q1[i*dimY + (j)] - Q1[(i-1)*dimY + (j)]);
+                q11 = (Q3[i*dimY + (j)] - Q3[(i-1)*dimY + (j)]);
+            }
+            if (j == 0) {
+                q2 = (Q2[i*dimY + (j)]);
+                q22 = (Q3[i*dimY + (j)]);
+            }
+            else  {
+                q2 = (Q2[i*dimY + (j)] - Q2[(i)*dimY + (j-1)]);
+                q22 = (Q3[i*dimY + (j)] - Q3[(i)*dimY + (j-1)]);
+            }
+            div1 = q1 + q22;
+            div2 = q2 + q11;
+            V1[i*dimY + (j)] = V1[i*dimY + (j)] + tau*(P1[i*dimY + (j)] + div1);
+            V2[i*dimY + (j)] = V2[i*dimY + (j)] + tau*(P2[i*dimY + (j)] + div2);
+        }}
+    return 1;
+}
+/*********************3D *********************/
+
+/*Calculating dual variable P (using forward differences)*/
+float DualP_3D(float *U, float *V1, float *V2, float *V3, float *P1, float *P2, float *P3, int dimX, int dimY, int dimZ, float sigma)
+{
+    int i,j,k;
+#pragma omp parallel for shared(U,V1,V2,V3,P1,P2,P3) private(i,j,k)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+            for(k=0; k<dimZ; k++) {
+                /* symmetric boundary conditions (Neuman) */
+                if (i == dimX-1) P1[dimX*dimY*k + i*dimY + (j)] = P1[dimX*dimY*k + i*dimY + (j)] + sigma*((U[dimX*dimY*k + (i-1)*dimY + (j)] - U[dimX*dimY*k + i*dimY + (j)])  - V1[dimX*dimY*k + i*dimY + (j)]);
+                else P1[dimX*dimY*k + i*dimY + (j)] = P1[dimX*dimY*k + i*dimY + (j)] + sigma*((U[dimX*dimY*k + (i + 1)*dimY + (j)] - U[dimX*dimY*k + i*dimY + (j)])  - V1[dimX*dimY*k + i*dimY + (j)]);
+                if (j == dimY-1) P2[dimX*dimY*k + i*dimY + (j)] = P2[dimX*dimY*k + i*dimY + (j)] + sigma*((U[dimX*dimY*k + (i)*dimY + (j-1)] - U[dimX*dimY*k + i*dimY + (j)])  - V2[dimX*dimY*k + i*dimY + (j)]);
+                else  P2[dimX*dimY*k + i*dimY + (j)] = P2[dimX*dimY*k + i*dimY + (j)] + sigma*((U[dimX*dimY*k + (i)*dimY + (j+1)] - U[dimX*dimY*k + i*dimY + (j)])  - V2[dimX*dimY*k + i*dimY + (j)]);
+                if (k == dimZ-1) P3[dimX*dimY*k + i*dimY + (j)] = P3[dimX*dimY*k + i*dimY + (j)] + sigma*((U[dimX*dimY*(k-1) + (i)*dimY + (j)] - U[dimX*dimY*k + i*dimY + (j)])  - V3[dimX*dimY*k + i*dimY + (j)]);
+                else  P3[dimX*dimY*k + i*dimY + (j)] = P3[dimX*dimY*k + i*dimY + (j)] + sigma*((U[dimX*dimY*(k+1) + (i)*dimY + (j)] - U[dimX*dimY*k + i*dimY + (j)])  - V3[dimX*dimY*k + i*dimY + (j)]);
+            }}}
+    return 1;
+}
\ No newline at end of file
diff --git a/Wrappers/Matlab/mex_compile/regularizers_CPU/TGV_PD_core.h b/Wrappers/Matlab/mex_compile/regularizers_CPU/TGV_PD_core.h
new file mode 100644
index 0000000..d5378df
--- /dev/null
+++ b/Wrappers/Matlab/mex_compile/regularizers_CPU/TGV_PD_core.h
@@ -0,0 +1,67 @@
+/*
+This work is part of the Core Imaging Library developed by
+Visual Analytics and Imaging System Group of the Science Technology
+Facilities Council, STFC
+
+Copyright 2017 Daniil Kazantsev
+Copyright 2017 Srikanth Nagella, Edoardo Pasca
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+//#include <matrix.h>
+#include <math.h>
+#include <stdlib.h>
+#include <memory.h>
+#include <stdio.h>
+#include "omp.h"
+#include "utils.h"
+
+/* C-OMP implementation of Primal-Dual denoising method for
+* Total Generilized Variation (TGV)-L2 model (2D case only)
+*
+* Input Parameters:
+* 1. Noisy image/volume (2D)
+* 2. lambda - regularization parameter
+* 3. parameter to control first-order term (alpha1)
+* 4. parameter to control the second-order term (alpha0)
+* 5. Number of CP iterations
+*
+* Output:
+* Filtered/regularized image
+*
+* Example:
+* figure;
+* Im = double(imread('lena_gray_256.tif'))/255;  % loading image
+* u0 = Im + .03*randn(size(Im)); % adding noise
+* tic; u = PrimalDual_TGV(single(u0), 0.02, 1.3, 1, 550); toc;
+*
+* to compile with OMP support: mex TGV_PD.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
+* References:
+* K. Bredies "Total Generalized Variation"
+*
+* 28.11.16/Harwell
+*/
+#ifdef __cplusplus
+extern "C" {
+#endif
+/* 2D functions */
+float DualP_2D(float *U, float *V1, float *V2, float *P1, float *P2, int dimX, int dimY, int dimZ, float sigma);
+float ProjP_2D(float *P1, float *P2, int dimX, int dimY, int dimZ, float alpha1);
+float DualQ_2D(float *V1, float *V2, float *Q1, float *Q2, float *Q3, int dimX, int dimY, int dimZ, float sigma);
+float ProjQ_2D(float *Q1, float *Q2, float *Q3, int dimX, int dimY, int dimZ, float alpha0);
+float DivProjP_2D(float *U, float *A, float *P1, float *P2, int dimX, int dimY, int dimZ, float lambda, float tau);
+float UpdV_2D(float *V1, float *V2, float *P1, float *P2, float *Q1, float *Q2, float *Q3, int dimX, int dimY, int dimZ, float tau);
+float newU(float *U, float *U_old, int dimX, int dimY, int dimZ);
+//float copyIm(float *A, float *U, int dimX, int dimY, int dimZ);
+#ifdef __cplusplus
+}
+#endif
diff --git a/Wrappers/Matlab/mex_compile/regularizers_CPU/utils.c b/Wrappers/Matlab/mex_compile/regularizers_CPU/utils.c
new file mode 100644
index 0000000..0e83d2c
--- /dev/null
+++ b/Wrappers/Matlab/mex_compile/regularizers_CPU/utils.c
@@ -0,0 +1,29 @@
+/*
+This work is part of the Core Imaging Library developed by
+Visual Analytics and Imaging System Group of the Science Technology
+Facilities Council, STFC
+
+Copyright 2017 Daniil Kazanteev
+Copyright 2017 Srikanth Nagella, Edoardo Pasca
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#include "utils.h"
+
+/* Copy Image */
+float copyIm(float *A, float *U, int dimX, int dimY, int dimZ)
+{
+	int j;
+#pragma omp parallel for shared(A, U) private(j)
+	for (j = 0; j<dimX*dimY*dimZ; j++)  U[j] = A[j];
+	return *U;
+}
\ No newline at end of file
diff --git a/Wrappers/Matlab/mex_compile/regularizers_CPU/utils.h b/Wrappers/Matlab/mex_compile/regularizers_CPU/utils.h
new file mode 100644
index 0000000..53463a3
--- /dev/null
+++ b/Wrappers/Matlab/mex_compile/regularizers_CPU/utils.h
@@ -0,0 +1,32 @@
+/*
+This work is part of the Core Imaging Library developed by
+Visual Analytics and Imaging System Group of the Science Technology
+Facilities Council, STFC
+
+Copyright 2017 Daniil Kazantsev
+Copyright 2017 Srikanth Nagella, Edoardo Pasca
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+//#include <matrix.h>
+//#include <math.h>
+#include <stdlib.h>
+#include <memory.h>
+//#include <stdio.h>
+#include "omp.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+float copyIm(float *A, float *U, int dimX, int dimY, int dimZ);
+#ifdef __cplusplus
+}
+#endif
diff --git a/Wrappers/Matlab/mex_compile/regularizers_GPU/Diffus_HO/Diff4thHajiaboli_GPU.cpp b/Wrappers/Matlab/mex_compile/regularizers_GPU/Diffus_HO/Diff4thHajiaboli_GPU.cpp
new file mode 100644
index 0000000..5a8c7c0
--- /dev/null
+++ b/Wrappers/Matlab/mex_compile/regularizers_GPU/Diffus_HO/Diff4thHajiaboli_GPU.cpp
@@ -0,0 +1,114 @@
+#include "mex.h"
+#include <matrix.h>
+#include <math.h>
+#include <stdlib.h>
+#include <memory.h>
+#include <stdio.h>
+#include <iostream>
+#include "Diff4th_GPU_kernel.h"
+
+/*
+ * 2D and 3D CUDA implementation of the 4th order PDE denoising model by Hajiaboli
+ *
+ * Reference :
+ * "An anisotropic fourth-order diffusion filter for image noise removal" by M. Hajiaboli
+ * 
+ * Example
+ * figure;
+ * Im = double(imread('lena_gray_256.tif'))/255;  % loading image
+ * u0 = Im + .05*randn(size(Im)); % adding noise
+ * u = Diff4thHajiaboli_GPU(single(u0), 0.02, 150);
+ * subplot (1,2,1); imshow(u0,[ ]); title('Noisy Image')
+ * subplot (1,2,2); imshow(u,[ ]); title('Denoised Image')
+ *
+ *
+ * Linux/Matlab compilation:
+ * compile in terminal: nvcc -Xcompiler -fPIC -shared -o Diff4th_GPU_kernel.o Diff4th_GPU_kernel.cu
+ * then compile in Matlab: mex -I/usr/local/cuda-7.5/include -L/usr/local/cuda-7.5/lib64 -lcudart Diff4thHajiaboli_GPU.cpp Diff4th_GPU_kernel.o
+ */
+
+void mexFunction(
+        int nlhs, mxArray *plhs[],
+        int nrhs, const mxArray *prhs[])
+{
+    int numdims, dimZ, size;
+    float *A, *B, *A_L, *B_L;
+    const int *dims;    
+    
+    numdims = mxGetNumberOfDimensions(prhs[0]);
+    dims = mxGetDimensions(prhs[0]);       
+    
+    float sigma = (float)mxGetScalar(prhs[1]); /* edge-preserving parameter */
+    float lambda = (float)mxGetScalar(prhs[2]); /* regularization parameter */
+    int iter = (int)mxGetScalar(prhs[3]); /* iterations number */
+    
+    if (numdims == 2)  {
+        
+        int N, M, Z, i, j;
+        Z = 0; // for the 2D case
+        float tau = 0.01; // time step is sufficiently small for an explicit methods
+        
+        /*Input data*/
+        A = (float*)mxGetData(prhs[0]);        
+        N = dims[0] + 2;
+        M = dims[1] + 2;
+        A_L = (float*)mxGetData(mxCreateNumericMatrix(N, M, mxSINGLE_CLASS, mxREAL));
+        B_L = (float*)mxGetData(mxCreateNumericMatrix(N, M, mxSINGLE_CLASS, mxREAL));
+        
+        /*Output data*/
+        B = (float*)mxGetData(plhs[0] = mxCreateNumericMatrix(dims[0], dims[1], mxSINGLE_CLASS, mxREAL));        
+        
+        // copy A to the bigger A_L with boundaries
+        #pragma omp parallel for shared(A_L, A) private(i,j)
+        for (i=0; i < N; i++) {
+            for (j=0; j < M; j++) {
+                if (((i > 0) && (i < N-1)) &&  ((j > 0) && (j < M-1)))  A_L[i*M+j] = A[(i-1)*(dims[1])+(j-1)];
+            }}
+        
+        // Running CUDA code here
+        Diff4th_GPU_kernel(A_L, B_L, N, M, Z, (float)sigma, iter, (float)tau, lambda);
+        
+        // copy the processed B_L to a smaller B
+        #pragma omp parallel for shared(B_L, B) private(i,j)
+        for (i=0; i < N; i++) {
+            for (j=0; j < M; j++) {
+                if (((i > 0) && (i < N-1)) &&  ((j > 0) && (j < M-1)))   B[(i-1)*(dims[1])+(j-1)] = B_L[i*M+j];
+            }}
+    }
+    if (numdims == 3)  {
+        //  3D image denoising / regularization
+        int N, M, Z, i, j, k;
+        float tau = 0.0007; // Time Step is small for an explicit methods
+        A = (float*)mxGetData(prhs[0]);
+        N = dims[0] + 2;
+        M = dims[1] + 2;
+        Z = dims[2] + 2;
+        int N_dims[] = {N, M, Z};
+        A_L = (float*)mxGetPr(mxCreateNumericArray(3, N_dims, mxSINGLE_CLASS, mxREAL));
+        B_L = (float*)mxGetPr(mxCreateNumericArray(3, N_dims, mxSINGLE_CLASS, mxREAL));
+        
+        /* output data */
+        B = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dims, mxSINGLE_CLASS, mxREAL));
+        
+        // copy A to the bigger A_L with boundaries
+        #pragma omp parallel for shared(A_L, A) private(i,j,k)
+        for (i=0; i < N; i++) {
+            for (j=0; j < M; j++) {
+                for (k=0; k < Z; k++) {
+                    if (((i > 0) && (i < N-1)) &&  ((j > 0) && (j < M-1)) &&  ((k > 0) && (k < Z-1))) {
+                        A_L[(N*M)*(k)+(i)*M+(j)] = A[(dims[0]*dims[1])*(k-1)+(i-1)*dims[1]+(j-1)];
+                    }}}}
+        
+        // Running CUDA kernel here for diffusivity
+        Diff4th_GPU_kernel(A_L, B_L, N, M, Z, (float)sigma, iter, (float)tau, lambda);
+        
+        // copy the processed B_L to a smaller B
+        #pragma omp parallel for shared(B_L, B) private(i,j,k)
+        for (i=0; i < N; i++) {
+            for (j=0; j < M; j++) {
+                for (k=0; k < Z; k++) {
+                    if (((i > 0) && (i < N-1)) &&  ((j > 0) && (j < M-1)) &&  ((k > 0) && (k < Z-1))) {
+                        B[(dims[0]*dims[1])*(k-1)+(i-1)*dims[1]+(j-1)] = B_L[(N*M)*(k)+(i)*M+(j)];
+                    }}}}
+    }
+}
\ No newline at end of file
diff --git a/Wrappers/Matlab/mex_compile/regularizers_GPU/Diffus_HO/Diff4th_GPU_kernel.cu b/Wrappers/Matlab/mex_compile/regularizers_GPU/Diffus_HO/Diff4th_GPU_kernel.cu
new file mode 100644
index 0000000..178af00
--- /dev/null
+++ b/Wrappers/Matlab/mex_compile/regularizers_GPU/Diffus_HO/Diff4th_GPU_kernel.cu
@@ -0,0 +1,270 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <memory.h>
+#include "Diff4th_GPU_kernel.h"
+
+#define checkCudaErrors(err)           __checkCudaErrors (err, __FILE__, __LINE__)
+
+inline void __checkCudaErrors(cudaError err, const char *file, const int line)
+{
+    if (cudaSuccess != err)
+    {
+        fprintf(stderr, "%s(%i) : CUDA Runtime API error %d: %s.\n",
+                file, line, (int)err, cudaGetErrorString(err));
+        exit(EXIT_FAILURE);
+    }
+}
+
+#define idivup(a, b) ( ((a)%(b) != 0) ? (a)/(b)+1 : (a)/(b) )
+#define sizeT (sizeX*sizeY*sizeZ)
+#define epsilon 0.00000001
+
+/////////////////////////////////////////////////
+// 2D Image denosing - Second Step (The second derrivative)
+__global__ void Diff4th2D_derriv(float* B, float* A, float *A0, int N, int M, float sigma, int iter, float tau, float lambda)
+{
+    float gradXXc = 0, gradYYc = 0;
+    int i = blockIdx.x*blockDim.x + threadIdx.x;
+    int j = blockIdx.y*blockDim.y + threadIdx.y;
+    
+    int index = j + i*N;
+    
+    if (((i < 1) || (i > N-2)) || ((j < 1) || (j > M-2))) {
+        return;    }
+    
+    int indexN = (j)+(i-1)*(N); if (A[indexN] == 0) indexN = index;
+    int indexS = (j)+(i+1)*(N); if (A[indexS] == 0) indexS = index;
+    int indexW = (j-1)+(i)*(N); if (A[indexW] == 0) indexW = index;
+    int indexE = (j+1)+(i)*(N); if (A[indexE] == 0) indexE = index;
+    
+    gradXXc = B[indexN] + B[indexS] - 2*B[index] ;
+    gradYYc = B[indexW] + B[indexE] - 2*B[index] ;
+    A[index]  = A[index] - tau*((A[index] - A0[index]) + lambda*(gradXXc + gradYYc));
+}
+
+// 2D Image denosing - The First Step
+__global__ void Diff4th2D(float* A, float* B, int N, int M, float sigma, int iter, float tau)
+{
+    float gradX, gradX_sq, gradY, gradY_sq, gradXX, gradYY, gradXY, sq_sum, xy_2,  V_norm, V_orth, c, c_sq;
+    
+    int i = blockIdx.x*blockDim.x + threadIdx.x;
+    int j = blockIdx.y*blockDim.y + threadIdx.y;
+    
+    int index = j + i*N;
+    
+    V_norm = 0.0f; V_orth = 0.0f;
+    
+    if (((i < 1) || (i > N-2)) || ((j < 1) || (j > M-2))) {
+        return;    }
+    
+    int indexN = (j)+(i-1)*(N); if (A[indexN] == 0) indexN = index;
+    int indexS = (j)+(i+1)*(N); if (A[indexS] == 0) indexS = index;
+    int indexW = (j-1)+(i)*(N); if (A[indexW] == 0) indexW = index;
+    int indexE = (j+1)+(i)*(N); if (A[indexE] == 0) indexE = index;
+    int indexNW = (j-1)+(i-1)*(N); if (A[indexNW] == 0) indexNW = index;
+    int indexNE = (j+1)+(i-1)*(N); if (A[indexNE] == 0) indexNE = index;
+    int indexWS = (j-1)+(i+1)*(N); if (A[indexWS] == 0) indexWS = index;
+    int indexES = (j+1)+(i+1)*(N); if (A[indexES] == 0) indexES = index;
+    
+    gradX = 0.5f*(A[indexN]-A[indexS]);
+    gradX_sq = gradX*gradX;
+    gradXX = A[indexN] + A[indexS] - 2*A[index];
+    
+    gradY = 0.5f*(A[indexW]-A[indexE]);
+    gradY_sq = gradY*gradY;
+    gradYY = A[indexW] + A[indexE] - 2*A[index];
+    
+    gradXY = 0.25f*(A[indexNW] - A[indexNE] - A[indexWS] + A[indexES]);
+    xy_2 = 2.0f*gradX*gradY*gradXY;
+    sq_sum =  gradX_sq + gradY_sq;
+    
+    if (sq_sum <= epsilon) {
+        V_norm = (gradXX*gradX_sq + xy_2 + gradYY*gradY_sq)/epsilon;
+        V_orth = (gradXX*gradY_sq - xy_2 + gradYY*gradX_sq)/epsilon; }
+    else  {
+        V_norm = (gradXX*gradX_sq + xy_2 + gradYY*gradY_sq)/sq_sum;
+        V_orth = (gradXX*gradY_sq - xy_2 + gradYY*gradX_sq)/sq_sum;  }
+    
+    c = 1.0f/(1.0f + sq_sum/sigma);
+    c_sq = c*c;
+    B[index] =  c_sq*V_norm + c*V_orth;
+}
+
+/////////////////////////////////////////////////
+// 3D data parocerssing
+__global__ void Diff4th3D_derriv(float *B, float *A, float *A0, int N, int M, int Z, float sigma, int iter, float tau, float lambda)
+{
+    float gradXXc = 0, gradYYc = 0, gradZZc = 0;
+    int xIndex = blockDim.x * blockIdx.x + threadIdx.x;
+    int yIndex = blockDim.y * blockIdx.y + threadIdx.y;
+    int zIndex = blockDim.z * blockIdx.z + threadIdx.z;
+    
+    int index = xIndex + M*yIndex + N*M*zIndex;
+    
+    if (((xIndex < 1) || (xIndex > N-2)) || ((yIndex < 1) || (yIndex > M-2)) || ((zIndex < 1) || (zIndex > Z-2))) {
+        return;    }
+    
+    int indexN = (xIndex-1) + M*yIndex + N*M*zIndex; if (A[indexN] == 0) indexN = index;
+    int indexS = (xIndex+1) + M*yIndex + N*M*zIndex; if (A[indexS] == 0) indexS = index;
+    int indexW = xIndex + M*(yIndex-1) + N*M*zIndex; if (A[indexW] == 0) indexW = index;
+    int indexE = xIndex + M*(yIndex+1) + N*M*zIndex; if (A[indexE] == 0) indexE = index;
+    int indexU = xIndex + M*yIndex + N*M*(zIndex-1); if (A[indexU] == 0) indexU = index;
+    int indexD = xIndex + M*yIndex + N*M*(zIndex+1); if (A[indexD] == 0) indexD = index;
+    
+    gradXXc = B[indexN] + B[indexS] - 2*B[index] ;
+    gradYYc = B[indexW] + B[indexE] - 2*B[index] ;
+    gradZZc = B[indexU] + B[indexD] - 2*B[index] ;   
+        
+    A[index]  = A[index] - tau*((A[index] - A0[index]) + lambda*(gradXXc + gradYYc + gradZZc));    
+}
+
+__global__ void Diff4th3D(float* A, float* B, int N, int M, int Z, float sigma, int iter, float tau)
+{
+    float gradX, gradX_sq, gradY, gradY_sq, gradZ, gradZ_sq, gradXX, gradYY, gradZZ, gradXY, gradXZ, gradYZ, sq_sum, xy_2, xyz_1, xyz_2, V_norm, V_orth, c, c_sq;
+    
+    int xIndex = blockDim.x * blockIdx.x + threadIdx.x;
+    int yIndex = blockDim.y * blockIdx.y + threadIdx.y;
+    int zIndex = blockDim.z * blockIdx.z + threadIdx.z;
+    
+    int index = xIndex + M*yIndex + N*M*zIndex;
+    V_norm = 0.0f; V_orth = 0.0f;
+    
+    if (((xIndex < 1) || (xIndex > N-2)) || ((yIndex < 1) || (yIndex > M-2)) || ((zIndex < 1) || (zIndex > Z-2))) {
+        return;    }
+    
+    B[index] = 0;
+    
+    int indexN = (xIndex-1) + M*yIndex + N*M*zIndex; if (A[indexN] == 0) indexN = index;
+    int indexS = (xIndex+1) + M*yIndex + N*M*zIndex; if (A[indexS] == 0) indexS = index;
+    int indexW = xIndex + M*(yIndex-1) + N*M*zIndex; if (A[indexW] == 0) indexW = index;
+    int indexE = xIndex + M*(yIndex+1) + N*M*zIndex; if (A[indexE] == 0) indexE = index;
+    int indexU = xIndex + M*yIndex + N*M*(zIndex-1); if (A[indexU] == 0) indexU = index;
+    int indexD = xIndex + M*yIndex + N*M*(zIndex+1); if (A[indexD] == 0) indexD = index;
+    
+    int indexNW = (xIndex-1) + M*(yIndex-1) + N*M*zIndex;  if (A[indexNW] == 0) indexNW = index;
+    int indexNE = (xIndex-1) + M*(yIndex+1) + N*M*zIndex;  if (A[indexNE] == 0) indexNE = index;
+    int indexWS =  (xIndex+1) + M*(yIndex-1) + N*M*zIndex; if (A[indexWS] == 0) indexWS = index;
+    int indexES = (xIndex+1) + M*(yIndex+1) + N*M*zIndex;  if (A[indexES] == 0) indexES = index;
+    
+    int indexUW = (xIndex-1) + M*(yIndex) + N*M*(zIndex-1); if (A[indexUW] == 0) indexUW = index;
+    int indexUE = (xIndex+1) + M*(yIndex) + N*M*(zIndex-1); if (A[indexUE] == 0) indexUE = index;
+    int indexDW =  (xIndex-1) + M*(yIndex) + N*M*(zIndex+1); if (A[indexDW] == 0) indexDW = index;
+    int indexDE = (xIndex+1) + M*(yIndex) + N*M*(zIndex+1); if (A[indexDE] == 0) indexDE = index;
+    
+    int indexUN = (xIndex) + M*(yIndex-1) + N*M*(zIndex-1);  if (A[indexUN] == 0) indexUN = index;
+    int indexUS = (xIndex) + M*(yIndex+1) + N*M*(zIndex-1);  if (A[indexUS] == 0) indexUS = index;
+    int indexDN =  (xIndex) + M*(yIndex-1) + N*M*(zIndex+1); if (A[indexDN] == 0) indexDN = index;
+    int indexDS = (xIndex) + M*(yIndex+1) + N*M*(zIndex+1);  if (A[indexDS] == 0) indexDS = index;
+    
+    gradX = 0.5f*(A[indexN]-A[indexS]);
+    gradX_sq = gradX*gradX;
+    gradXX = A[indexN] + A[indexS] - 2*A[index];
+    
+    gradY = 0.5f*(A[indexW]-A[indexE]);
+    gradY_sq = gradY*gradY;
+    gradYY = A[indexW] + A[indexE] - 2*A[index];
+    
+    gradZ = 0.5f*(A[indexU]-A[indexD]);
+    gradZ_sq = gradZ*gradZ;
+    gradZZ = A[indexU] + A[indexD] - 2*A[index];
+    
+    gradXY = 0.25f*(A[indexNW] - A[indexNE] - A[indexWS] + A[indexES]);
+    gradXZ = 0.25f*(A[indexUW] - A[indexUE] - A[indexDW] + A[indexDE]);
+    gradYZ = 0.25f*(A[indexUN] - A[indexUS] - A[indexDN] + A[indexDS]);
+    
+    xy_2  = 2.0f*gradX*gradY*gradXY;
+    xyz_1 = 2.0f*gradX*gradZ*gradXZ;
+    xyz_2 = 2.0f*gradY*gradZ*gradYZ;
+    
+    sq_sum =  gradX_sq + gradY_sq + gradZ_sq;
+    
+    if (sq_sum <= epsilon) {
+        V_norm = (gradXX*gradX_sq + gradYY*gradY_sq + gradZZ*gradZ_sq + xy_2 + xyz_1 + xyz_2)/epsilon;
+        V_orth = ((gradY_sq + gradZ_sq)*gradXX + (gradX_sq + gradZ_sq)*gradYY + (gradX_sq + gradY_sq)*gradZZ - xy_2 - xyz_1 - xyz_2)/epsilon;  }
+    else  {
+        V_norm = (gradXX*gradX_sq + gradYY*gradY_sq + gradZZ*gradZ_sq + xy_2 + xyz_1 + xyz_2)/sq_sum;
+        V_orth = ((gradY_sq + gradZ_sq)*gradXX + (gradX_sq + gradZ_sq)*gradYY + (gradX_sq + gradY_sq)*gradZZ - xy_2 - xyz_1 - xyz_2)/sq_sum;  }
+    
+    c = 1;
+    if ((1.0f + sq_sum/sigma) != 0.0f)  {c = 1.0f/(1.0f + sq_sum/sigma);}
+    
+    c_sq = c*c;
+    B[index] =  c_sq*V_norm + c*V_orth;
+}
+
+/******************************************************/
+/********* HOST FUNCTION*************/
+extern "C" void Diff4th_GPU_kernel(float* A, float* B, int N, int M, int Z, float sigma, int iter, float tau, float lambda)
+{
+     int deviceCount = -1; // number of devices
+    cudaGetDeviceCount(&deviceCount);
+    if (deviceCount == 0) {
+        fprintf(stderr, "No CUDA devices found\n");
+        return;
+    }    
+    
+      int BLKXSIZE, BLKYSIZE,BLKZSIZE;
+      float *Ad, *Bd, *Cd;
+      sigma = sigma*sigma;
+    
+    if (Z == 0){
+        // 4th order diffusion for 2D case     
+        BLKXSIZE = 8;
+        BLKYSIZE = 16;
+        
+        dim3 dimBlock(BLKXSIZE,BLKYSIZE);
+        dim3 dimGrid(idivup(N,BLKXSIZE), idivup(M,BLKYSIZE));
+        
+        checkCudaErrors(cudaMalloc((void**)&Ad,N*M*sizeof(float)));
+        checkCudaErrors(cudaMalloc((void**)&Bd,N*M*sizeof(float)));
+        checkCudaErrors(cudaMalloc((void**)&Cd,N*M*sizeof(float)));
+        
+        checkCudaErrors(cudaMemcpy(Ad,A,N*M*sizeof(float),cudaMemcpyHostToDevice));
+        checkCudaErrors(cudaMemcpy(Bd,A,N*M*sizeof(float),cudaMemcpyHostToDevice));
+        checkCudaErrors(cudaMemcpy(Cd,A,N*M*sizeof(float),cudaMemcpyHostToDevice));
+        
+        int n = 1;
+        while (n <= iter) {
+            Diff4th2D<<<dimGrid,dimBlock>>>(Bd, Cd, N, M, sigma, iter, tau);
+            cudaDeviceSynchronize();
+            checkCudaErrors( cudaPeekAtLastError() );
+            Diff4th2D_derriv<<<dimGrid,dimBlock>>>(Cd, Bd, Ad, N, M, sigma, iter, tau, lambda);
+            cudaDeviceSynchronize();
+            checkCudaErrors( cudaPeekAtLastError() );
+            n++;
+        }
+        checkCudaErrors(cudaMemcpy(B,Bd,N*M*sizeof(float),cudaMemcpyDeviceToHost));
+        cudaFree(Ad); cudaFree(Bd); cudaFree(Cd);
+    }
+    
+    if (Z != 0){
+        // 4th order diffusion for 3D case
+        BLKXSIZE = 8;
+        BLKYSIZE = 8;
+        BLKZSIZE = 8;        
+        
+        dim3 dimBlock(BLKXSIZE,BLKYSIZE,BLKZSIZE);
+        dim3 dimGrid(idivup(N,BLKXSIZE), idivup(M,BLKYSIZE),idivup(Z,BLKXSIZE));
+        
+        checkCudaErrors(cudaMalloc((void**)&Ad,N*M*Z*sizeof(float)));
+        checkCudaErrors(cudaMalloc((void**)&Bd,N*M*Z*sizeof(float)));
+        checkCudaErrors(cudaMalloc((void**)&Cd,N*M*Z*sizeof(float)));
+        
+        checkCudaErrors(cudaMemcpy(Ad,A,N*M*Z*sizeof(float),cudaMemcpyHostToDevice));
+        checkCudaErrors(cudaMemcpy(Bd,A,N*M*Z*sizeof(float),cudaMemcpyHostToDevice));        
+        checkCudaErrors(cudaMemcpy(Cd,A,N*M*Z*sizeof(float),cudaMemcpyHostToDevice));
+        
+        int n = 1;
+        while (n <= iter) {
+            Diff4th3D<<<dimGrid,dimBlock>>>(Bd, Cd, N, M, Z, sigma, iter, tau);
+            cudaDeviceSynchronize();
+            checkCudaErrors( cudaPeekAtLastError() );
+            Diff4th3D_derriv<<<dimGrid,dimBlock>>>(Cd, Bd, Ad, N, M, Z, sigma, iter, tau, lambda);
+            cudaDeviceSynchronize();
+            checkCudaErrors( cudaPeekAtLastError() );
+            n++;
+        }
+        checkCudaErrors(cudaMemcpy(B,Bd,N*M*Z*sizeof(float),cudaMemcpyDeviceToHost));
+        cudaFree(Ad); cudaFree(Bd); cudaFree(Cd);
+    }
+}
\ No newline at end of file
diff --git a/Wrappers/Matlab/mex_compile/regularizers_GPU/Diffus_HO/Diff4th_GPU_kernel.h b/Wrappers/Matlab/mex_compile/regularizers_GPU/Diffus_HO/Diff4th_GPU_kernel.h
new file mode 100644
index 0000000..cfbb45a
--- /dev/null
+++ b/Wrappers/Matlab/mex_compile/regularizers_GPU/Diffus_HO/Diff4th_GPU_kernel.h
@@ -0,0 +1,6 @@
+#ifndef __DIFF_HO_H_
+#define __DIFF_HO_H_
+
+extern "C" void Diff4th_GPU_kernel(float* A, float* B, int N, int M, int Z, float sigma, int iter, float tau, float lambda);
+
+#endif 
diff --git a/Wrappers/Matlab/mex_compile/regularizers_GPU/NL_Regul/NLM_GPU.cpp b/Wrappers/Matlab/mex_compile/regularizers_GPU/NL_Regul/NLM_GPU.cpp
new file mode 100644
index 0000000..ff0cc90
--- /dev/null
+++ b/Wrappers/Matlab/mex_compile/regularizers_GPU/NL_Regul/NLM_GPU.cpp
@@ -0,0 +1,171 @@
+#include "mex.h"
+#include <matrix.h>
+#include <math.h>
+#include <stdlib.h>
+#include <memory.h>
+#include <stdio.h>
+#include <iostream>
+#include "NLM_GPU_kernel.h"
+
+/* CUDA implementation of the patch-based (PB) regularization  for 2D and 3D images/volumes  
+ * This method finds self-similar patches in data and performs one fixed point iteration to mimimize the PB penalty function
+ * 
+ * References: 1. Yang Z. & Jacob M. "Nonlocal Regularization of Inverse Problems"
+ *             2. Kazantsev D. at. all "4D-CT reconstruction with unified spatial-temporal patch-based regularization"
+ *
+ * Input Parameters (mandatory):
+ * 1. Image/volume (2D/3D)
+ * 2. ratio of the searching window (e.g. 3 = (2*3+1) = 7 pixels window)
+ * 3. ratio of the similarity window (e.g. 1 = (2*1+1) = 3 pixels window)
+ * 4. h - parameter for the PB penalty function
+ * 5. lambda - regularization parameter 
+
+ * Output:
+ * 1. regularized (denoised) Image/volume (N x N x N)
+ *
+ * In matlab check what kind of GPU you have with "gpuDevice" command,
+ * then set your ComputeCapability, here I use -arch compute_35
+ *
+ * Quick 2D denoising example in Matlab:   
+   Im = double(imread('lena_gray_256.tif'))/255;  % loading image
+   u0 = Im + .03*randn(size(Im)); u0(u0<0) = 0; % adding noise
+   ImDen = NLM_GPU(single(u0), 3, 2, 0.15, 1);
+ 
+ * Linux/Matlab compilation:
+ * compile in terminal: nvcc -Xcompiler -fPIC -shared -o NLM_GPU_kernel.o NLM_GPU_kernel.cu
+ * then compile in Matlab: mex -I/usr/local/cuda-7.5/include -L/usr/local/cuda-7.5/lib64 -lcudart NLM_GPU.cpp NLM_GPU_kernel.o
+ *
+ * D. Kazantsev 
+ * 2014-17
+ * Harwell/Manchester UK
+ */
+
+float pad_crop(float *A, float *Ap, int OldSizeX, int OldSizeY,  int OldSizeZ, int NewSizeX, int NewSizeY, int NewSizeZ, int padXY, int switchpad_crop);
+
+void mexFunction(
+        int nlhs, mxArray *plhs[],
+        int nrhs, const mxArray *prhs[])
+{
+    int N, M, Z, i_n, j_n, k_n, numdims, SearchW, SimilW, SearchW_real, padXY, newsizeX, newsizeY, newsizeZ, switchpad_crop, count, SearchW_full, SimilW_full;
+    const int  *dims;
+    float *A, *B=NULL, *Ap=NULL, *Bp=NULL, *Eucl_Vec, h, h2, lambda, val, denh2;
+    
+    numdims = mxGetNumberOfDimensions(prhs[0]);
+    dims = mxGetDimensions(prhs[0]);
+    
+    N = dims[0];
+    M = dims[1];
+    Z = dims[2];
+    
+    if ((numdims < 2) || (numdims > 3)) {mexErrMsgTxt("The input should be 2D image or 3D volume");}
+    if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input in single precision is required"); }
+    
+    if(nrhs != 5) mexErrMsgTxt("Five inputs reqired: Image(2D,3D), SearchW, SimilW, Threshold, Regularization parameter");
+    
+    /*Handling inputs*/
+    A  = (float *) mxGetData(prhs[0]);    /* the image to regularize/filter */
+    SearchW_real  = (int) mxGetScalar(prhs[1]); /* the searching window ratio */
+    SimilW =  (int) mxGetScalar(prhs[2]);  /* the similarity window ratio */
+    h =  (float) mxGetScalar(prhs[3]);  /* parameter for the PB filtering function */
+    lambda = (float) mxGetScalar(prhs[4]);
+    
+    if (h <= 0) mexErrMsgTxt("Parmeter for the PB penalty function should be > 0");
+      
+    SearchW = SearchW_real + 2*SimilW;
+    
+    SearchW_full = 2*SearchW + 1; /* the full searching window  size */
+    SimilW_full = 2*SimilW + 1;   /* the full similarity window  size */
+    h2 = h*h;
+    
+    padXY = SearchW + 2*SimilW; /* padding sizes */
+    newsizeX = N + 2*(padXY); /* the X size of the padded array */
+    newsizeY = M + 2*(padXY); /* the Y size of the padded array */
+    newsizeZ = Z + 2*(padXY); /* the Z size of the padded array */
+    int N_dims[] = {newsizeX, newsizeY, newsizeZ};
+    
+    /******************************2D case ****************************/
+    if (numdims == 2) {
+        /*Handling output*/
+        B = (float*)mxGetData(plhs[0] = mxCreateNumericMatrix(N, M, mxSINGLE_CLASS, mxREAL));
+        /*allocating memory for the padded arrays */
+        Ap = (float*)mxGetData(mxCreateNumericMatrix(newsizeX, newsizeY, mxSINGLE_CLASS, mxREAL));
+        Bp = (float*)mxGetData(mxCreateNumericMatrix(newsizeX, newsizeY, mxSINGLE_CLASS, mxREAL));
+        Eucl_Vec = (float*)mxGetData(mxCreateNumericMatrix(SimilW_full*SimilW_full, 1, mxSINGLE_CLASS, mxREAL));
+        
+        /*Gaussian kernel */
+        count = 0;
+        for(i_n=-SimilW; i_n<=SimilW; i_n++) {
+            for(j_n=-SimilW; j_n<=SimilW; j_n++) {
+                val = (float)(i_n*i_n + j_n*j_n)/(2*SimilW*SimilW);
+                Eucl_Vec[count] = exp(-val);
+                count = count + 1;
+            }} /*main neighb loop */
+        
+        /**************************************************************************/
+        /*Perform padding of image A to the size of [newsizeX * newsizeY] */
+        switchpad_crop = 0; /*padding*/
+        pad_crop(A, Ap, M, N, 0, newsizeY, newsizeX, 0, padXY, switchpad_crop);
+        
+        /* Do PB regularization with the padded array  */
+        NLM_GPU_kernel(Ap, Bp, Eucl_Vec, newsizeY, newsizeX, 0, numdims, SearchW, SimilW, SearchW_real, (float)h2, (float)lambda);
+        
+        switchpad_crop = 1; /*cropping*/
+        pad_crop(Bp, B, M, N, 0, newsizeY, newsizeX, 0, padXY, switchpad_crop);
+    }
+    else
+    {
+        /******************************3D case ****************************/
+        /*Handling output*/
+        B = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dims, mxSINGLE_CLASS, mxREAL));
+        /*allocating memory for the padded arrays */
+        Ap = (float*)mxGetPr(mxCreateNumericArray(3, N_dims, mxSINGLE_CLASS, mxREAL));
+        Bp = (float*)mxGetPr(mxCreateNumericArray(3, N_dims, mxSINGLE_CLASS, mxREAL));
+        Eucl_Vec = (float*)mxGetData(mxCreateNumericMatrix(SimilW_full*SimilW_full*SimilW_full, 1, mxSINGLE_CLASS, mxREAL));
+        
+        /*Gaussian kernel */
+        count = 0;
+        for(i_n=-SimilW; i_n<=SimilW; i_n++) {
+            for(j_n=-SimilW; j_n<=SimilW; j_n++) {
+                for(k_n=-SimilW; k_n<=SimilW; k_n++) {
+                    val = (float)(i_n*i_n + j_n*j_n + k_n*k_n)/(2*SimilW*SimilW*SimilW);
+                    Eucl_Vec[count] = exp(-val);
+                    count = count + 1;
+                }}} /*main neighb loop */
+        /**************************************************************************/
+        /*Perform padding of image A to the size of [newsizeX * newsizeY * newsizeZ] */
+        switchpad_crop = 0; /*padding*/
+        pad_crop(A, Ap, M, N, Z, newsizeY, newsizeX, newsizeZ, padXY, switchpad_crop);
+        
+        /* Do PB regularization with the padded array  */
+        NLM_GPU_kernel(Ap, Bp, Eucl_Vec, newsizeY, newsizeX, newsizeZ, numdims, SearchW, SimilW, SearchW_real, (float)h2, (float)lambda);
+        
+        switchpad_crop = 1; /*cropping*/
+        pad_crop(Bp, B, M, N, Z, newsizeY, newsizeX, newsizeZ, padXY, switchpad_crop);
+    } /*end else ndims*/
+}
+
+float pad_crop(float *A, float *Ap, int OldSizeX, int OldSizeY, int OldSizeZ, int NewSizeX, int NewSizeY, int NewSizeZ, int padXY, int switchpad_crop)
+{
+    /* padding-cropping function */
+    int i,j,k;    
+    if (NewSizeZ > 1) {    
+           for (i=0; i < NewSizeX; i++) {
+            for (j=0; j < NewSizeY; j++) {
+              for (k=0; k < NewSizeZ; k++) {
+                if (((i >= padXY) && (i < NewSizeX-padXY)) &&  ((j >= padXY) && (j < NewSizeY-padXY)) &&  ((k >= padXY) && (k < NewSizeZ-padXY))) {
+                    if (switchpad_crop == 0)  Ap[NewSizeX*NewSizeY*k + i*NewSizeY+j] = A[OldSizeX*OldSizeY*(k - padXY) + (i-padXY)*(OldSizeY)+(j-padXY)];
+                    else  Ap[OldSizeX*OldSizeY*(k - padXY) + (i-padXY)*(OldSizeY)+(j-padXY)] = A[NewSizeX*NewSizeY*k + i*NewSizeY+j];
+                }
+            }}}   
+    }
+    else {
+        for (i=0; i < NewSizeX; i++) {
+            for (j=0; j < NewSizeY; j++) {
+                if (((i >= padXY) && (i < NewSizeX-padXY)) &&  ((j >= padXY) && (j < NewSizeY-padXY))) {
+                    if (switchpad_crop == 0)  Ap[i*NewSizeY+j] = A[(i-padXY)*(OldSizeY)+(j-padXY)];
+                    else  Ap[(i-padXY)*(OldSizeY)+(j-padXY)] = A[i*NewSizeY+j];
+                }
+            }}
+    }
+    return *Ap;
+}
\ No newline at end of file
diff --git a/Wrappers/Matlab/mex_compile/regularizers_GPU/NL_Regul/NLM_GPU_kernel.cu b/Wrappers/Matlab/mex_compile/regularizers_GPU/NL_Regul/NLM_GPU_kernel.cu
new file mode 100644
index 0000000..17da3a8
--- /dev/null
+++ b/Wrappers/Matlab/mex_compile/regularizers_GPU/NL_Regul/NLM_GPU_kernel.cu
@@ -0,0 +1,239 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <memory.h>
+#include "NLM_GPU_kernel.h"
+
+#define checkCudaErrors(err)           __checkCudaErrors (err, __FILE__, __LINE__)
+
+inline void __checkCudaErrors(cudaError err, const char *file, const int line)
+{
+    if (cudaSuccess != err)
+    {
+        fprintf(stderr, "%s(%i) : CUDA Runtime API error %d: %s.\n",
+                file, line, (int)err, cudaGetErrorString(err));
+        exit(EXIT_FAILURE);
+    }
+}
+
+extern __shared__ float sharedmem[];
+
+// run PB den kernel here
+__global__ void NLM_kernel(float *Ad, float* Bd, float *Eucl_Vec_d, int N, int M, int Z, int SearchW, int SimilW, int SearchW_real, int SearchW_full, int SimilW_full, int padXY, float h2, float lambda,  dim3 imagedim, dim3 griddim, dim3 kerneldim, dim3 sharedmemdim, int nUpdatePerThread, float neighborsize)
+{
+    
+    int  i1, j1, k1, i2, j2, k2, i3, j3, k3, i_l, j_l, k_l, count;
+    float value, Weight_norm, normsum, Weight;
+    
+    int bidx = blockIdx.x;
+    int bidy = blockIdx.y%griddim.y;
+    int bidz = (int)((blockIdx.y)/griddim.y);
+    
+    // global index for block endpoint
+    int beidx = __mul24(bidx,blockDim.x);
+    int beidy = __mul24(bidy,blockDim.y);
+    int beidz = __mul24(bidz,blockDim.z);
+    
+    int tid = __mul24(threadIdx.z,__mul24(blockDim.x,blockDim.y)) +
+            __mul24(threadIdx.y,blockDim.x) + threadIdx.x;
+    
+    #ifdef __DEVICE_EMULATION__
+            printf("tid : %d", tid);
+    #endif
+            
+    // update shared memory
+    int nthreads = blockDim.x*blockDim.y*blockDim.z;
+    int sharedMemSize = sharedmemdim.x * sharedmemdim.y * sharedmemdim.z;
+    for(int i=0; i<nUpdatePerThread; i++)
+    {
+        int sid = tid + i*nthreads; // index in shared memory
+        if (sid < sharedMemSize)
+        {
+            // global x/y/z index in volume
+            int gidx, gidy, gidz;
+            int sidx, sidy, sidz, tid;
+            
+            sidz = sid / (sharedmemdim.x*sharedmemdim.y);
+            tid  = sid - sidz*(sharedmemdim.x*sharedmemdim.y);
+            sidy = tid / (sharedmemdim.x);
+            sidx = tid - sidy*(sharedmemdim.x);
+            
+            gidx = (int)sidx - (int)kerneldim.x + (int)beidx;
+            gidy = (int)sidy - (int)kerneldim.y + (int)beidy;
+            gidz = (int)sidz - (int)kerneldim.z + (int)beidz;
+            
+            // Neumann boundary condition
+            int cx = (int) min(max(0,gidx),imagedim.x-1);
+            int cy = (int) min(max(0,gidy),imagedim.y-1);
+            int cz = (int) min(max(0,gidz),imagedim.z-1);
+            
+            int gid = cz*imagedim.x*imagedim.y + cy*imagedim.x + cx;
+            
+            sharedmem[sid] = Ad[gid];
+        }
+    }
+    __syncthreads();
+    
+    // global index of the current voxel in the input volume
+    int idx = beidx + threadIdx.x;
+    int idy = beidy + threadIdx.y;
+    int idz = beidz + threadIdx.z;
+    
+    if (Z == 1) {
+        /* 2D case */
+        /*checking boundaries to be within the image and avoid padded spaces */
+        if( idx >= padXY && idx < (imagedim.x - padXY) &&
+                idy >= padXY && idy < (imagedim.y - padXY))
+        {
+            int i_centr = threadIdx.x + (SearchW); /*indices of the centrilized (main) pixel */
+            int j_centr = threadIdx.y + (SearchW); /*indices of the centrilized (main) pixel */
+            
+            if ((i_centr > 0) && (i_centr < N) && (j_centr > 0) && (j_centr < M)) {
+                
+                Weight_norm = 0; value = 0.0;
+                /* Massive Search window loop */
+                for(i1 = i_centr - SearchW_real ; i1 <= i_centr + SearchW_real; i1++) {
+                    for(j1 = j_centr - SearchW_real ; j1<= j_centr + SearchW_real ; j1++) {
+                        /* if inside the searching window */
+                        count = 0; normsum = 0.0;
+                        for(i_l=-SimilW; i_l<=SimilW; i_l++) {
+                            for(j_l=-SimilW; j_l<=SimilW; j_l++) {
+                                i2 = i1+i_l; j2 = j1+j_l;
+                                i3 = i_centr+i_l; j3 = j_centr+j_l;  /*coordinates of the inner patch loop */                                
+                                if ((i2 > 0) && (i2 < N) && (j2 > 0) && (j2 < M)) {
+                                       if ((i3 > 0) && (i3 < N) && (j3 > 0) && (j3 < M)) {
+                                            normsum += Eucl_Vec_d[count]*pow((sharedmem[(j3)*sharedmemdim.x+(i3)] - sharedmem[j2*sharedmemdim.x+i2]), 2);
+                                            }}
+                                        count++;
+                                }}
+                                if (normsum != 0) Weight = (expf(-normsum/h2));
+                                else Weight = 0.0;
+                                Weight_norm += Weight;
+                                value += sharedmem[j1*sharedmemdim.x+i1]*Weight;
+                            }}      
+                                
+                if (Weight_norm != 0) Bd[idz*imagedim.x*imagedim.y + idy*imagedim.x + idx] = value/Weight_norm;
+                else Bd[idz*imagedim.x*imagedim.y + idy*imagedim.x + idx] = Ad[idz*imagedim.x*imagedim.y + idy*imagedim.x + idx];
+            }
+        }      /*boundary conditions end*/
+    }
+    else {
+        /*3D case*/
+        /*checking boundaries to be within the image and avoid padded spaces */
+        if( idx >= padXY && idx < (imagedim.x - padXY) &&
+                idy >= padXY && idy < (imagedim.y - padXY) &&
+                idz >= padXY && idz < (imagedim.z - padXY) )
+        {
+            int i_centr = threadIdx.x + SearchW; /*indices of the centrilized (main) pixel */
+            int j_centr = threadIdx.y + SearchW; /*indices of the centrilized (main) pixel */
+            int k_centr = threadIdx.z + SearchW; /*indices of the centrilized (main) pixel */
+            
+            if ((i_centr > 0) && (i_centr < N) && (j_centr > 0) && (j_centr < M) && (k_centr > 0) && (k_centr < Z)) {
+                
+                Weight_norm = 0; value = 0.0;
+                /* Massive Search window loop */
+                for(i1 = i_centr - SearchW_real ; i1 <= i_centr + SearchW_real; i1++) {
+                    for(j1 = j_centr - SearchW_real ; j1<= j_centr + SearchW_real ; j1++) {
+                        for(k1 = k_centr - SearchW_real ; k1<= k_centr + SearchW_real ; k1++) {
+                            /* if inside the searching window */
+                            count = 0; normsum = 0.0;
+                            for(i_l=-SimilW; i_l<=SimilW; i_l++) {
+                                for(j_l=-SimilW; j_l<=SimilW; j_l++) {
+                                    for(k_l=-SimilW; k_l<=SimilW; k_l++) {
+                                        i2 = i1+i_l; j2 = j1+j_l; k2 = k1+k_l;
+                                        i3 = i_centr+i_l; j3 = j_centr+j_l; k3 = k_centr+k_l;   /*coordinates of the inner patch loop */                              
+                                                    if ((i2 > 0) && (i2 < N) && (j2 > 0) && (j2 < M) && (k2 > 0) && (k2 < Z)) {
+                                                        if ((i3 > 0) && (i3 < N) && (j3 > 0) && (j3 < M) && (k3 > 0) && (k3 < Z)) {
+                                                            normsum += Eucl_Vec_d[count]*pow((sharedmem[(k3)*sharedmemdim.x*sharedmemdim.y + (j3)*sharedmemdim.x+(i3)] - sharedmem[(k2)*sharedmemdim.x*sharedmemdim.y + j2*sharedmemdim.x+i2]), 2);
+                                                        }}
+                                                    count++;
+                                                }}}
+                                       if (normsum != 0) Weight = (expf(-normsum/h2));
+                                       else Weight = 0.0;
+                                       Weight_norm += Weight;
+                                       value += sharedmem[k1*sharedmemdim.x*sharedmemdim.y + j1*sharedmemdim.x+i1]*Weight;                                                                
+                        }}}      /* BIG search window loop end*/
+                
+               
+                if (Weight_norm != 0) Bd[idz*imagedim.x*imagedim.y + idy*imagedim.x + idx] = value/Weight_norm;
+                else Bd[idz*imagedim.x*imagedim.y + idy*imagedim.x + idx] = Ad[idz*imagedim.x*imagedim.y + idy*imagedim.x + idx];
+            }
+        }      /* boundary conditions end */
+    }
+}
+        
+/////////////////////////////////////////////////
+// HOST FUNCTION
+extern "C" void NLM_GPU_kernel(float *A, float* B, float *Eucl_Vec, int N, int M, int Z, int dimension, int SearchW, int SimilW, int SearchW_real, float h2, float lambda)
+{
+    int deviceCount = -1; // number of devices
+    cudaGetDeviceCount(&deviceCount);
+    if (deviceCount == 0) {
+        fprintf(stderr, "No CUDA devices found\n");
+        return;
+    }
+    
+//     cudaDeviceReset();
+    
+    int padXY, SearchW_full, SimilW_full,  blockWidth, blockHeight, blockDepth, nBlockX, nBlockY, nBlockZ, kernel_depth;
+    float *Ad, *Bd, *Eucl_Vec_d;
+    
+    if (dimension == 2) {
+        blockWidth  = 16;
+        blockHeight = 16;
+        blockDepth  = 1;
+        Z = 1;
+        kernel_depth = 0;
+    }
+    else {
+        blockWidth  = 8;
+        blockHeight = 8;
+        blockDepth  = 8;
+        kernel_depth = SearchW;
+    }
+    
+    // compute how many blocks are needed
+    nBlockX = ceil((float)N / (float)blockWidth);
+    nBlockY = ceil((float)M / (float)blockHeight);
+    nBlockZ = ceil((float)Z / (float)blockDepth);
+    
+    dim3 dimGrid(nBlockX,nBlockY*nBlockZ);
+    dim3 dimBlock(blockWidth, blockHeight, blockDepth);
+    dim3 imagedim(N,M,Z);
+    dim3 griddim(nBlockX,nBlockY,nBlockZ);
+    
+    dim3 kerneldim(SearchW,SearchW,kernel_depth);
+    dim3 sharedmemdim((SearchW*2)+blockWidth,(SearchW*2)+blockHeight,(kernel_depth*2)+blockDepth);
+    int sharedmemsize = sizeof(float)*sharedmemdim.x*sharedmemdim.y*sharedmemdim.z;
+    int updateperthread = ceil((float)(sharedmemdim.x*sharedmemdim.y*sharedmemdim.z)/(float)(blockWidth*blockHeight*blockDepth));
+    float neighborsize = (2*SearchW+1)*(2*SearchW+1)*(2*kernel_depth+1);
+    
+    padXY = SearchW + 2*SimilW; /* padding sizes */
+    
+    SearchW_full = 2*SearchW + 1; /* the full searching window  size */
+    SimilW_full = 2*SimilW + 1;   /* the full similarity window  size */
+    
+    /*allocate space for images on device*/
+    checkCudaErrors( cudaMalloc((void**)&Ad,N*M*Z*sizeof(float)) );
+    checkCudaErrors( cudaMalloc((void**)&Bd,N*M*Z*sizeof(float)) );
+    /*allocate space for vectors on device*/
+    if (dimension == 2) {
+        checkCudaErrors( cudaMalloc((void**)&Eucl_Vec_d,SimilW_full*SimilW_full*sizeof(float)) );
+        checkCudaErrors( cudaMemcpy(Eucl_Vec_d,Eucl_Vec,SimilW_full*SimilW_full*sizeof(float),cudaMemcpyHostToDevice) );
+    }
+    else {
+        checkCudaErrors( cudaMalloc((void**)&Eucl_Vec_d,SimilW_full*SimilW_full*SimilW_full*sizeof(float)) );
+        checkCudaErrors( cudaMemcpy(Eucl_Vec_d,Eucl_Vec,SimilW_full*SimilW_full*SimilW_full*sizeof(float),cudaMemcpyHostToDevice) );
+    }
+    
+    /* copy data from the host to device */
+    checkCudaErrors( cudaMemcpy(Ad,A,N*M*Z*sizeof(float),cudaMemcpyHostToDevice) );
+    
+    // Run CUDA kernel here
+    NLM_kernel<<<dimGrid,dimBlock,sharedmemsize>>>(Ad, Bd, Eucl_Vec_d, M, N, Z, SearchW, SimilW, SearchW_real, SearchW_full, SimilW_full, padXY, h2, lambda, imagedim, griddim, kerneldim, sharedmemdim, updateperthread, neighborsize);
+    
+    checkCudaErrors( cudaPeekAtLastError() );
+//     gpuErrchk( cudaDeviceSynchronize() );
+    
+    checkCudaErrors( cudaMemcpy(B,Bd,N*M*Z*sizeof(float),cudaMemcpyDeviceToHost) );
+    cudaFree(Ad); cudaFree(Bd); cudaFree(Eucl_Vec_d);
+}
diff --git a/Wrappers/Matlab/mex_compile/regularizers_GPU/NL_Regul/NLM_GPU_kernel.h b/Wrappers/Matlab/mex_compile/regularizers_GPU/NL_Regul/NLM_GPU_kernel.h
new file mode 100644
index 0000000..bc9d4a3
--- /dev/null
+++ b/Wrappers/Matlab/mex_compile/regularizers_GPU/NL_Regul/NLM_GPU_kernel.h
@@ -0,0 +1,6 @@
+#ifndef __NLMREG_KERNELS_H_
+#define __NLMREG_KERNELS_H_
+
+extern "C" void NLM_GPU_kernel(float *A, float* B, float *Eucl_Vec, int N, int M, int Z, int dimension, int SearchW, int SimilW, int SearchW_real, float denh2, float lambda);
+
+#endif 
diff --git a/Wrappers/Matlab/studentst.m b/Wrappers/Matlab/studentst.m
deleted file mode 100644
index 99fed1e..0000000
--- a/Wrappers/Matlab/studentst.m
+++ /dev/null
@@ -1,47 +0,0 @@
-function [f,g,h,s,k] = studentst(r,k,s)
-% Students T penalty with 'auto-tuning'
-%
-% use:
-%   [f,g,h,{k,{s}}] = studentst(r)     - automatically fits s and k
-%   [f,g,h,{k,{s}}] = studentst(r,k)   - automatically fits s
-%   [f,g,h,{k,{s}}] = studentst(r,k,s) - use given s and k
-%
-% input:
-%   r - residual as column vector
-%   s - scale (optional)
-%   k - degrees of freedom (optional)
-% 
-% output:
-%   f - misfit (scalar)
-%   g - gradient (column vector)
-%   h - positive approximation of the Hessian (column vector, Hessian is a diagonal matrix)
-%   s,k - scale and degrees of freedom
-%
-% Tristan van Leeuwen, 2012.
-% tleeuwen@eos.ubc.ca
-
-% fit both s and k
-if nargin == 1
-    opts = optimset('maxFunEvals',1e2);
-    tmp = fminsearch(@(x)st(r,x(1),x(2)),[1;2],opts);
-    s   = tmp(1);
-    k   = tmp(2);
-end
-
-
-if nargin == 2
-    opts = optimset('maxFunEvals',1e2);
-    tmp = fminsearch(@(x)st(r,x,k),[1],opts);
-    s   = tmp(1);
-end
-
-% evaulate penalty
-[f,g,h] = st(r,s,k);
-
-
-function [f,g,h] = st(r,s,k)
-n = length(r);
-c = -n*(gammaln((k+1)/2) - gammaln(k/2) - .5*log(pi*s*k));
-f = c + .5*(k+1)*sum(log(1 + conj(r).*r/(s*k)));
-g = (k+1)*r./(s*k + conj(r).*r);
-h = (k+1)./(s*k + conj(r).*r);
diff --git a/Wrappers/Matlab/supp/RMSE.m b/Wrappers/Matlab/supp/RMSE.m
new file mode 100644
index 0000000..002f776
--- /dev/null
+++ b/Wrappers/Matlab/supp/RMSE.m
@@ -0,0 +1,7 @@
+function err = RMSE(signal1, signal2)
+%RMSE Root Mean Squared Error
+
+err = sum((signal1 - signal2).^2)/length(signal1);  % MSE
+err = sqrt(err);                                    % RMSE
+
+end
\ No newline at end of file
diff --git a/Wrappers/Matlab/supp/my_red_yellowMAP.mat b/Wrappers/Matlab/supp/my_red_yellowMAP.mat
new file mode 100644
index 0000000..c2a5b87
Binary files /dev/null and b/Wrappers/Matlab/supp/my_red_yellowMAP.mat differ
diff --git a/Wrappers/Matlab/supp/sino_add_artifacts.m b/Wrappers/Matlab/supp/sino_add_artifacts.m
new file mode 100644
index 0000000..f601914
--- /dev/null
+++ b/Wrappers/Matlab/supp/sino_add_artifacts.m
@@ -0,0 +1,33 @@
+function sino_artifacts = sino_add_artifacts(sino,artifact_type)
+% function to add various distortions to the sinogram space, current
+% version includes: random rings and zingers (streaks)
+% Input: 
+% 1. sinogram
+% 2. artifact type: 'rings' or 'zingers' (streaks)
+
+
+[Detectors, anglesNumb, SlicesZ] = size(sino);
+fprintf('%s %i %s %i %s %i %s \n', 'Sinogram has a dimension of', Detectors, 'detectors;', anglesNumb, 'projections;', SlicesZ, 'vertical slices.');
+
+sino_artifacts = sino;
+
+if (strcmp(artifact_type,'rings'))
+    fprintf('%s \n', 'Adding rings...');    
+    NumRings = round(Detectors/20); % Number of rings relatively to the size of Detectors
+    IntenOff = linspace(0.05,0.5,NumRings); % the intensity of rings in the selected range
+    
+    for k = 1:SlicesZ
+        % generate random indices to propagate rings
+        RandInd = randperm(Detectors,Detectors);
+        for jj = 1:NumRings
+            ind_c = RandInd(jj);
+            sino_artifacts(ind_c,1:end,k) = sino_artifacts(ind_c,1:end,k) + IntenOff(jj).*sino_artifacts(ind_c,1:end,k); % generate a constant offset            
+        end
+        
+    end
+elseif (strcmp(artifact_type,'zingers'))
+    fprintf('%s \n', 'Adding zingers...');
+else
+    fprintf('%s \n', 'Nothing selected, the same sinogram returned...');
+end
+end
\ No newline at end of file
diff --git a/Wrappers/Matlab/supp/studentst.m b/Wrappers/Matlab/supp/studentst.m
new file mode 100644
index 0000000..99fed1e
--- /dev/null
+++ b/Wrappers/Matlab/supp/studentst.m
@@ -0,0 +1,47 @@
+function [f,g,h,s,k] = studentst(r,k,s)
+% Students T penalty with 'auto-tuning'
+%
+% use:
+%   [f,g,h,{k,{s}}] = studentst(r)     - automatically fits s and k
+%   [f,g,h,{k,{s}}] = studentst(r,k)   - automatically fits s
+%   [f,g,h,{k,{s}}] = studentst(r,k,s) - use given s and k
+%
+% input:
+%   r - residual as column vector
+%   s - scale (optional)
+%   k - degrees of freedom (optional)
+% 
+% output:
+%   f - misfit (scalar)
+%   g - gradient (column vector)
+%   h - positive approximation of the Hessian (column vector, Hessian is a diagonal matrix)
+%   s,k - scale and degrees of freedom
+%
+% Tristan van Leeuwen, 2012.
+% tleeuwen@eos.ubc.ca
+
+% fit both s and k
+if nargin == 1
+    opts = optimset('maxFunEvals',1e2);
+    tmp = fminsearch(@(x)st(r,x(1),x(2)),[1;2],opts);
+    s   = tmp(1);
+    k   = tmp(2);
+end
+
+
+if nargin == 2
+    opts = optimset('maxFunEvals',1e2);
+    tmp = fminsearch(@(x)st(r,x,k),[1],opts);
+    s   = tmp(1);
+end
+
+% evaulate penalty
+[f,g,h] = st(r,s,k);
+
+
+function [f,g,h] = st(r,s,k)
+n = length(r);
+c = -n*(gammaln((k+1)/2) - gammaln(k/2) - .5*log(pi*s*k));
+f = c + .5*(k+1)*sum(log(1 + conj(r).*r/(s*k)));
+g = (k+1)*r./(s*k + conj(r).*r);
+h = (k+1)./(s*k + conj(r).*r);
diff --git a/Wrappers/Matlab/supp/zing_rings_add.m b/Wrappers/Matlab/supp/zing_rings_add.m
new file mode 100644
index 0000000..d197b1f
--- /dev/null
+++ b/Wrappers/Matlab/supp/zing_rings_add.m
@@ -0,0 +1,91 @@
+%  uncomment this part of script to generate data with different noise characterisitcs
+
+fprintf('%s\n', 'Generating Projection Data...');
+
+% Creating RHS (b) - the sinogram (using a strip projection model)
+% vol_geom = astra_create_vol_geom(N, N);
+% proj_geom = astra_create_proj_geom('parallel', 1.0, P, theta_rad);
+% proj_id_temp = astra_create_projector('strip', proj_geom, vol_geom);
+% [sinogram_id, sinogramIdeal] = astra_create_sino(phantom, proj_id_temp);
+% astra_mex_data2d('delete',sinogram_id);
+% astra_mex_algorithm('delete',proj_id_temp);
+
+%%
+% inverse crime data generation
+[sino_id, sinogramIdeal] = astra_create_sino3d_cuda(phantom, proj_geom, vol_geom);
+astra_mex_data3d('delete', sino_id);
+
+% [id,x] = astra_create_backprojection3d_cuda(sinogramIdeal, proj_geom, vol_geom);
+% astra_mex_data3d('delete', id);
+%%
+%
+% % adding Gaussian noise
+% eta = 0.04;  % Relative noise level
+% E = randn(size(sinogram));
+% sinogram = sinogram + eta*norm(sinogram,'fro')*E/norm(E,'fro');  % adding noise to the sinogram
+% sinogram(sinogram<0) = 0;
+% clear E;
+
+%%
+% adding zingers
+val_offset = 0;
+sino_zing = sinogramIdeal';
+vec1 = [60, 80, 80, 70, 70, 90, 90, 40, 130, 145, 155, 125];
+vec2 = [350, 450, 190, 500, 250, 530, 330, 230, 550, 250, 450, 195];
+for jj = 1:length(vec1)
+    for i1 = -2:2
+        for j1 = -2:2
+            sino_zing(vec1(jj)+i1, vec2(jj)+j1) = val_offset;
+        end
+    end
+end
+
+% adding stripes into the signogram
+sino_zing_rings = sino_zing;
+coeff = linspace2(0.01,0.15,180);
+vmax = max(sinogramIdeal(:));
+sino_zing_rings(1:180,120) = sino_zing_rings(1:180,120) + vmax*0.13;
+sino_zing_rings(80:180,209) = sino_zing_rings(80:180,209) + vmax*0.14;
+sino_zing_rings(50:110,210) = sino_zing_rings(50:110,210) + vmax*0.12;
+sino_zing_rings(1:180,211) = sino_zing_rings(1:180,211) + vmax*0.14;
+sino_zing_rings(1:180,300) = sino_zing_rings(1:180,300) + vmax*coeff(:);
+sino_zing_rings(1:180,301) = sino_zing_rings(1:180,301) + vmax*0.14;
+sino_zing_rings(10:100,302) = sino_zing_rings(10:100,302) + vmax*0.15;
+sino_zing_rings(90:180,350) = sino_zing_rings(90:180,350) + vmax*0.11;
+sino_zing_rings(60:140,410) = sino_zing_rings(60:140,410) + vmax*0.12;
+sino_zing_rings(1:180,411) = sino_zing_rings(1:180,411) + vmax*0.14;
+sino_zing_rings(1:180,412) = sino_zing_rings(1:180,412) + vmax*coeff(:);
+sino_zing_rings(1:180,413) = sino_zing_rings(1:180,413) + vmax*coeff(:);
+sino_zing_rings(1:180,500) = sino_zing_rings(1:180,500) - vmax*0.12;
+sino_zing_rings(1:180,501) = sino_zing_rings(1:180,501) - vmax*0.12;
+sino_zing_rings(1:180,550) = sino_zing_rings(1:180,550) + vmax*0.11;
+sino_zing_rings(1:180,551) = sino_zing_rings(1:180,551) + vmax*0.11;
+sino_zing_rings(1:180,552) = sino_zing_rings(1:180,552) + vmax*0.11;
+
+sino_zing_rings(sino_zing_rings < 0) = 0;
+%%
+
+% adding Poisson noise
+dose = 50000;
+multifactor = 0.002;
+
+dataExp = dose.*exp(-sino_zing_rings*multifactor); % noiseless raw data
+dataPnoise = astra_add_noise_to_sino(dataExp, dose); % pre-log noisy raw data (weights)
+sino_zing_rings = log(dose./max(dataPnoise,1))/multifactor; %log corrected data -> sinogram
+Dweights = dataPnoise'; % statistical weights
+sino_zing_rings = sino_zing_rings';
+clear dataPnoise dataExp 
+
+% w = dose./exp(sinogram*multifactor); % getting back raw data from log-cor
+
+% figure(1);
+% set(gcf, 'Position', get(0,'Screensize'));
+% subplot(1,2,1); imshow(phantom,[0 0.6]); title('Ideal Phantom'); colorbar;
+% subplot(1,2,2); imshow(sinogram,[0 180]);  title('Noisy Sinogram');  colorbar;
+% colormap(cmapnew);
+
+% figure;
+% set(gcf, 'Position', get(0,'Screensize'));
+% subplot(1,2,1); imshow(sinogramIdeal,[0 180]);  title('Ideal Sinogram');  colorbar;
+% imshow(sino_zing_rings,[0 180]); title('Noisy Sinogram with zingers and stripes');  colorbar;
+% colormap(cmapnew);
\ No newline at end of file
diff --git a/demos/Demo_Phantom3D_Cone.m b/demos/Demo_Phantom3D_Cone.m
deleted file mode 100644
index a8f2c92..0000000
--- a/demos/Demo_Phantom3D_Cone.m
+++ /dev/null
@@ -1,67 +0,0 @@
-% A demo script to reconstruct 3D synthetic data using FISTA method for
-% CONE BEAM geometry
-% requirements: ASTRA-toolbox and TomoPhantom toolbox
-
-close all;clc;clear all;
-% adding paths
-addpath('../data/');
-addpath('../main_func/'); addpath('../main_func/regularizers_CPU/'); addpath('../main_func/regularizers_GPU/NL_Regul/'); addpath('../main_func/regularizers_GPU/Diffus_HO/');
-addpath('../supp/');
-
-%%
-% build 3D phantom using TomoPhantom 
-modelNo = 3; % see Phantom3DLibrary.dat file in TomoPhantom
-N = 256; % x-y-z size (cubic image)
-angles = 0:1.5:360; % angles vector in degrees
-angles_rad = angles*(pi/180); % conversion to radians
-det_size = round(sqrt(2)*N); % detector size
-
-%---------TomoPhantom routines---------%
-pathTP = '/home/algol/Documents/MATLAB/TomoPhantom/functions/models/Phantom3DLibrary.dat'; % path to TomoPhantom parameters file
-TomoPhantom = buildPhantom3D(modelNo,N,pathTP); % generate 3D phantom
-%--------------------------------------%
-%%
-% using ASTRA-toolbox to set the projection geometry (cone beam)
-% eg: astra.create_proj_geom('cone', 1.0 (resol), 1.0 (resol), detectorRowCount, detectorColCount, angles, originToSource, originToDetector)
-vol_geom = astra_create_vol_geom(N,N,N);
-proj_geom = astra_create_proj_geom('cone', 1.0, 1.0, N, det_size, angles_rad, 2000, 2160);
-%% 
-% do forward projection using ASTRA
-% inverse crime data generation
-[sino_id, SinoCone3D] = astra_create_sino3d_cuda(TomoPhantom, proj_geom, vol_geom);
-astra_mex_data3d('delete', sino_id);
-%%
-fprintf('%s\n', 'Reconstructing with CGLS using ASTRA-toolbox ...');
-vol_id = astra_mex_data3d('create', '-vol', vol_geom, 0);
-proj_id = astra_mex_data3d('create', '-proj3d', proj_geom, SinoCone3D); 
-cfg = astra_struct('CGLS3D_CUDA');
-cfg.ProjectionDataId = proj_id;
-cfg.ReconstructionDataId = vol_id;
-cfg.option.MinConstraint = 0;
-alg_id = astra_mex_algorithm('create', cfg);
-astra_mex_algorithm('iterate', alg_id, 15);
-reconASTRA_3D = astra_mex_data3d('get', vol_id);
-%%
-fprintf('%s\n', 'Reconstruction using FISTA-LS without regularization...');
-clear params
-% define parameters
-params.proj_geom = proj_geom; % pass geometry to the function
-params.vol_geom = vol_geom;
-params.sino = single(SinoCone3D); % sinogram
-params.iterFISTA = 30; %max number of outer iterations
-params.X_ideal = TomoPhantom; % ideal phantom
-params.show = 1; % visualize reconstruction on each iteration
-params.slice = round(N/2); params.maxvalplot = 1; 
-tic; [X_FISTA, output] = FISTA_REC(params); toc; 
-
-error_FISTA = output.Resid_error; obj_FISTA = output.objective;
-fprintf('%s %.4f\n', 'Min RMSE for FISTA-LS reconstruction is:', min(error_FISTA(:)));
-
-Resid3D = (TomoPhantom - X_FISTA).^2;
-figure(2);
-subplot(1,2,1); imshow(X_FISTA(:,:,params.slice),[0 params.maxvalplot]); title('FISTA-LS reconstruction'); colorbar;
-subplot(1,2,2); imshow(Resid3D(:,:,params.slice),[0 0.1]);  title('residual'); colorbar;
-figure(3);
-subplot(1,2,1); plot(error_FISTA);  title('RMSE plot'); colorbar;
-subplot(1,2,2); plot(obj_FISTA);  title('Objective plot'); colorbar;
-%%
\ No newline at end of file
diff --git a/demos/Demo_Phantom3D_Parallel.m b/demos/Demo_Phantom3D_Parallel.m
deleted file mode 100644
index 4219bd1..0000000
--- a/demos/Demo_Phantom3D_Parallel.m
+++ /dev/null
@@ -1,121 +0,0 @@
-% A demo script to reconstruct 3D synthetic data using FISTA method for
-% PARALLEL BEAM geometry
-% requirements: ASTRA-toolbox and TomoPhantom toolbox
-
-close all;clc;clear;
-% adding paths
-addpath('../data/');
-addpath('../main_func/'); addpath('../main_func/regularizers_CPU/'); addpath('../main_func/regularizers_GPU/NL_Regul/'); addpath('../main_func/regularizers_GPU/Diffus_HO/');
-addpath('../supp/');
-
-%%
-% Main reconstruction/data generation parameters 
-modelNo = 2; % see Phantom3DLibrary.dat file in TomoPhantom
-N = 256; % x-y-z size (cubic image)
-angles = 1:0.5:180; % angles vector in degrees
-angles_rad = angles*(pi/180); % conversion to radians
-det_size = round(sqrt(2)*N); % detector size
-
-%---------TomoPhantom routines---------%
-pathTP = '/home/algol/Documents/MATLAB/TomoPhantom/functions/models/Phantom3DLibrary.dat'; % path to TomoPhantom parameters file
-TomoPhantom = buildPhantom3D(modelNo,N,pathTP); % generate 3D phantom
-sino_tomophan3D = buildSino3D(modelNo, N, det_size, single(angles),pathTP); % generate ideal data
-%--------------------------------------%
-% Adding noise and distortions if required
-sino_tomophan3D = sino_add_artifacts(sino_tomophan3D,'rings');
-% adding Poisson noise
-dose =  3e9; % photon flux (controls noise level)
-multifactor = max(sino_tomophan3D(:));
-dataExp = dose.*exp(-sino_tomophan3D/multifactor); % noiseless raw data
-dataRaw = astra_add_noise_to_sino(dataExp, dose); % pre-log noisy raw data (weights)
-sino3D_log = log(dose./max(dataRaw,1))*multifactor; %log corrected data -> sinogram
-clear dataExp sino_tomophan3D
-%
-%%
-%-------------Astra toolbox------------%
-% one can generate data using ASTRA toolbox
-proj_geom = astra_create_proj_geom('parallel', 1, det_size, angles_rad);
-vol_geom = astra_create_vol_geom(N,N);
-sino_ASTRA3D = zeros(det_size, length(angles), N, 'single');
-for i = 1:N
-[sino_id, sinoT] = astra_create_sino_cuda(TomoPhantom(:,:,i), proj_geom, vol_geom);
-sino_ASTRA3D(:,:,i) = sinoT';
-astra_mex_data2d('delete', sino_id);
-end
-%--------------------------------------%
-%%
-% using ASTRA-toolbox to set the projection geometry (parallel beam)
-proj_geom = astra_create_proj_geom('parallel', 1, det_size, angles_rad);
-vol_geom = astra_create_vol_geom(N,N);
-%%
-fprintf('%s\n', 'Reconstructing with FBP using ASTRA-toolbox ...');
-reconASTRA_3D = zeros(size(TomoPhantom),'single');
-for k = 1:N
-vol_id = astra_mex_data2d('create', '-vol', vol_geom, 0);
-proj_id = astra_mex_data2d('create', '-sino', proj_geom, sino3D_log(:,:,k)'); 
-cfg = astra_struct('FBP_CUDA');
-cfg.ProjectionDataId = proj_id;
-cfg.ReconstructionDataId = vol_id;
-cfg.option.MinConstraint = 0;
-alg_id = astra_mex_algorithm('create', cfg);
-astra_mex_algorithm('iterate', alg_id, 1);
-rec = astra_mex_data2d('get', vol_id);
-reconASTRA_3D(:,:,k) = single(rec);
-end
-figure; imshow(reconASTRA_3D(:,:,128), [0 1.3]);
-%% 
-%% 
-fprintf('%s\n', 'Reconstruction using OS-FISTA-PWLS without regularization...');
-clear params
-% define parameters
-params.proj_geom = proj_geom; % pass geometry to the function
-params.vol_geom = vol_geom;
-params.sino = single(sino3D_log); % sinogram
-params.iterFISTA = 15; %max number of outer iterations
-params.X_ideal = TomoPhantom; % ideal phantom
-params.weights = dataRaw./max(dataRaw(:)); % statistical weight for PWLS
-params.subsets = 12; % the number of subsets
-params.show = 1; % visualize reconstruction on each iteration
-params.slice = 128; params.maxvalplot = 1.3; 
-tic; [X_FISTA, output] = FISTA_REC(params); toc; 
-
-error_FISTA = output.Resid_error; obj_FISTA = output.objective;
-fprintf('%s %.4f\n', 'Min RMSE for FISTA-PWLS reconstruction is:', min(error_FISTA(:)));
-
-Resid3D = (TomoPhantom - X_FISTA).^2;
-figure(2);
-subplot(1,2,1); imshow(X_FISTA(:,:,params.slice),[0 params.maxvalplot]); title('FISTA-LS reconstruction'); colorbar;
-subplot(1,2,2); imshow(Resid3D(:,:,params.slice),[0 0.1]);  title('residual'); colorbar;
-figure(3);
-subplot(1,2,1); plot(error_FISTA);  title('RMSE plot'); 
-subplot(1,2,2); plot(obj_FISTA);  title('Objective plot'); 
-%%
-%% 
-fprintf('%s\n', 'Reconstruction using OS-FISTA-GH with FGP-TV regularization...');
-clear params
-% define parameters
-params.proj_geom = proj_geom; % pass geometry to the function
-params.vol_geom = vol_geom;
-params.sino = single(sino3D_log); % sinogram
-params.iterFISTA = 15; %max number of outer iterations
-params.X_ideal = TomoPhantom; % ideal phantom
-params.weights = dataRaw./max(dataRaw(:)); % statistical weights for PWLS
-params.subsets = 12; % the number of subsets
-params.Regul_Lambda_FGPTV = 100; % TV regularization parameter for FGP-TV
-params.Ring_LambdaR_L1 = 0.02; % Soft-Thresh L1 ring variable parameter
-params.Ring_Alpha = 21; % to boost ring removal procedure
-params.show = 1; % visualize reconstruction on each iteration
-params.slice = 128; params.maxvalplot = 1.3; 
-tic; [X_FISTA_GH_TV, output] = FISTA_REC(params); toc; 
-
-error_FISTA_GH_TV = output.Resid_error; obj_FISTA_GH_TV = output.objective;
-fprintf('%s %.4f\n', 'Min RMSE for FISTA-PWLS reconstruction is:', min(error_FISTA_GH_TV(:)));
-
-Resid3D = (TomoPhantom - X_FISTA_GH_TV).^2;
-figure(2);
-subplot(1,2,1); imshow(X_FISTA_GH_TV(:,:,params.slice),[0 params.maxvalplot]); title('FISTA-LS reconstruction'); colorbar;
-subplot(1,2,2); imshow(Resid3D(:,:,params.slice),[0 0.1]);  title('residual'); colorbar;
-figure(3);
-subplot(1,2,1); plot(error_FISTA_GH_TV);  title('RMSE plot'); 
-subplot(1,2,2); plot(obj_FISTA_GH_TV);  title('Objective plot'); 
-%%
\ No newline at end of file
diff --git a/demos/Demo_RealData3D_Parallel.m b/demos/Demo_RealData3D_Parallel.m
deleted file mode 100644
index f82e0b0..0000000
--- a/demos/Demo_RealData3D_Parallel.m
+++ /dev/null
@@ -1,186 +0,0 @@
-% Demonstration of tomographic 3D reconstruction from X-ray synchrotron
-% dataset (dendrites) using various data fidelities
-% ! It is advisable not to run the whole script, it will take lots of time to reconstruct the whole 3D data using many algorithms !
-clear
-close all
-%%
-% % adding paths
-addpath('../data/');
-addpath('../main_func/'); addpath('../main_func/regularizers_CPU/'); addpath('../main_func/regularizers_GPU/NL_Regul/'); addpath('../main_func/regularizers_GPU/Diffus_HO/');
-addpath('../supp/');
-
-load('DendrRawData.mat') % load raw data of 3D dendritic set
-angles_rad = angles*(pi/180); % conversion to radians
-det_size = size(data_raw3D,1); % detectors dim
-angSize = size(data_raw3D, 2); % angles dim
-slices_tot = size(data_raw3D, 3); % no of slices
-recon_size = 950; % reconstruction size
-
-Sino3D = zeros(det_size, angSize, slices_tot, 'single'); % log-corrected sino
-% normalizing the data
-for  jj = 1:slices_tot
-    sino = data_raw3D(:,:,jj);
-    for ii = 1:angSize
-        Sino3D(:,ii,jj) = log((flats_ar(:,jj)-darks_ar(:,jj))./(single(sino(:,ii)) - darks_ar(:,jj)));
-    end
-end
-
-Sino3D = Sino3D.*1000;
-Weights3D = single(data_raw3D); % weights for PW model
-clear data_raw3D
-%%
-% set projection/reconstruction geometry here
-proj_geom = astra_create_proj_geom('parallel', 1, det_size, angles_rad);
-vol_geom = astra_create_vol_geom(recon_size,recon_size);
-%%
-fprintf('%s\n', 'Reconstruction using FBP...');
-FBP = iradon(Sino3D(:,:,10), angles,recon_size);
-figure; imshow(FBP , [0, 3]); title ('FBP reconstruction');
-
-%--------FISTA_REC modular reconstruction alogrithms---------
-%%
-fprintf('%s\n', 'Reconstruction using FISTA-OS-PWLS without regularization...');
-clear params
-params.proj_geom = proj_geom; % pass geometry to the function
-params.vol_geom = vol_geom;
-params.sino = Sino3D;
-params.iterFISTA  = 18;
-params.weights = Weights3D;
-params.subsets = 8; % the number of ordered subsets 
-params.show = 1;
-params.maxvalplot = 2.5; params.slice = 1;
-
-tic; [X_fista, outputFISTA] = FISTA_REC(params); toc;
-figure; imshow(X_fista(:,:,params.slice) , [0, 2.5]); title ('FISTA-OS-PWLS reconstruction');
-%%
-fprintf('%s\n', 'Reconstruction using FISTA-OS-PWLS-TV...');
-clear params
-params.proj_geom = proj_geom; % pass geometry to the function
-params.vol_geom = vol_geom;
-params.sino = Sino3D;
-params.iterFISTA  = 18;
-params.Regul_Lambda_FGPTV = 5.0000e+6; % TV regularization parameter for FGP-TV
-params.weights = Weights3D;
-params.subsets = 8; % the number of ordered subsets 
-params.show = 1;
-params.maxvalplot = 2.5; params.slice = 10;
-
-tic; [X_fista_TV, outputTV] = FISTA_REC(params); toc;
-figure; imshow(X_fista_TV(:,:,params.slice) , [0, 2.5]); title ('FISTA-OS-PWLS-TV reconstruction');
-%%
-fprintf('%s\n', 'Reconstruction using FISTA-OS-GH-TV...');
-clear params
-params.proj_geom = proj_geom; % pass geometry to the function
-params.vol_geom = vol_geom;
-params.sino = Sino3D(:,:,10);
-params.iterFISTA  = 18;
-params.Regul_Lambda_FGPTV = 5.0000e+6; % TV regularization parameter for FGP-TV
-params.Ring_LambdaR_L1 = 0.002; % Soft-Thresh L1 ring variable parameter
-params.Ring_Alpha = 21; % to boost ring removal procedure
-params.weights = Weights3D(:,:,10);
-params.subsets = 8; % the number of ordered subsets 
-params.show = 1;
-params.maxvalplot = 2.5; params.slice = 1;
-
-tic; [X_fista_GH_TV, outputGHTV] = FISTA_REC(params); toc;
-figure; imshow(X_fista_GH_TV(:,:,params.slice) , [0, 2.5]); title ('FISTA-OS-GH-TV reconstruction');
-%%
-fprintf('%s\n', 'Reconstruction using FISTA-OS-GH-TV-LLT...');
-clear params
-params.proj_geom = proj_geom; % pass geometry to the function
-params.vol_geom = vol_geom;
-params.sino = Sino3D;
-params.iterFISTA  = 12;
-params.Regul_Lambda_FGPTV = 5.0000e+6; % TV regularization parameter for FGP-TV
-params.Regul_LambdaLLT = 100;  % regularization parameter for LLT problem
-params.Regul_tauLLT = 0.0005; % time-step parameter for the explicit scheme
-params.Ring_LambdaR_L1 = 0.002; % Soft-Thresh L1 ring variable parameter
-params.Ring_Alpha = 21; % to boost ring removal procedure
-params.weights = Weights3D;
-params.subsets = 16; % the number of ordered subsets 
-params.show = 1;
-params.maxvalplot = 2.5; params.slice = 2;
-
-tic; [X_fista_GH_TVLLT, outputGH_TVLLT] = FISTA_REC(params); toc;
-figure; imshow(X_fista_GH_TVLLT(:,:,params.slice) , [0, 2.5]); title ('FISTA-OS-GH-TV-LLT reconstruction');
-
-%%
-fprintf('%s\n', 'Reconstruction using FISTA-OS-GH-HigherOrderDiffusion...');
-% !GPU version!
-clear params
-params.proj_geom = proj_geom; % pass geometry to the function
-params.vol_geom = vol_geom;
-params.sino = Sino3D(:,:,1:5);
-params.iterFISTA  = 25;
-params.Regul_LambdaDiffHO = 2; % DiffHO regularization parameter 
-params.Regul_DiffHO_EdgePar = 0.05; % threshold parameter
-params.Regul_Iterations = 150;
-params.Ring_LambdaR_L1 = 0.002; % Soft-Thresh L1 ring variable parameter
-params.Ring_Alpha = 21; % to boost ring removal procedure
-params.weights = Weights3D(:,:,1:5);
-params.subsets = 16; % the number of ordered subsets 
-params.show = 1;
-params.maxvalplot = 2.5; params.slice = 1;
- 
-tic; [X_fista_GH_HO, outputHO] = FISTA_REC(params); toc;
-figure; imshow(X_fista_GH_HO(:,:,params.slice) , [0, 2.5]); title ('FISTA-OS-HigherOrderDiffusion reconstruction');
-
-%%
-fprintf('%s\n', 'Reconstruction using FISTA-PB...');
-% !GPU version!
-clear params
-params.proj_geom = proj_geom; % pass geometry to the function
-params.vol_geom = vol_geom;
-params.sino = Sino3D(:,:,1);
-params.iterFISTA  = 25;
-params.Regul_LambdaPatchBased_GPU = 3; % PB regularization parameter 
-params.Regul_PB_h = 0.04; % threhsold parameter
-params.Regul_PB_SearchW = 3;
-params.Regul_PB_SimilW  = 1;
-params.Ring_LambdaR_L1 = 0.002; % Soft-Thresh L1 ring variable parameter
-params.Ring_Alpha = 21; % to boost ring removal procedure
-params.weights = Weights3D(:,:,1);
-params.show = 1;
-params.maxvalplot = 2.5; params.slice = 1;
- 
-tic; [X_fista_GH_PB, outputPB] = FISTA_REC(params); toc;
-figure; imshow(X_fista_GH_PB(:,:,params.slice) , [0, 2.5]); title ('FISTA-OS-PB reconstruction');
-%%
-fprintf('%s\n', 'Reconstruction using FISTA-OS-GH-TGV...');
-% still testing...
-clear params
-params.proj_geom = proj_geom; % pass geometry to the function
-params.vol_geom = vol_geom;
-params.sino = Sino3D;
-params.iterFISTA  = 12;
-params.Regul_LambdaTGV = 0.5; % TGV regularization parameter 
-params.Regul_Iterations = 5;
-params.Ring_LambdaR_L1 = 0.002; % Soft-Thresh L1 ring variable parameter
-params.Ring_Alpha = 21; % to boost ring removal procedure
-params.weights = Weights3D;
-params.subsets = 16; % the number of ordered subsets 
-params.show = 1;
-params.maxvalplot = 2.5; params.slice = 1;
-
-tic; [X_fista_GH_TGV, outputTGV] = FISTA_REC(params); toc;
-figure; imshow(X_fista_GH_TGV(:,:,params.slice) , [0, 2.5]); title ('FISTA-OS-GH-TGV reconstruction');
-
-
-%%
-% fprintf('%s\n', 'Reconstruction using FISTA-Student-TV...');
-% clear params
-% params.proj_geom = proj_geom; % pass geometry to the function
-% params.vol_geom = vol_geom;
-% params.sino = Sino3D(:,:,10);
-% params.iterFISTA  = 50;
-% params.L_const = 0.01; % Lipshitz constant
-% params.Regul_LambdaTV = 0.008; % TV regularization parameter for FISTA-TV
-% params.fidelity = 'student'; % choosing Student t penalty
-% params.weights = Weights3D(:,:,10);
-% params.show = 0;
-% params.initialize = 1;
-% params.maxvalplot = 2.5; params.slice = 1;
-% 
-% tic; [X_fistaStudentTV] = FISTA_REC(params); toc;
-% figure; imshow(X_fistaStudentTV(:,:,1), [0, 2.5]); title ('FISTA-Student-TV reconstruction');
-%%
diff --git a/demos/DendrData.h5 b/demos/DendrData.h5
deleted file mode 100644
index f048268..0000000
Binary files a/demos/DendrData.h5 and /dev/null differ
diff --git a/demos/exportDemoRD2Data.m b/demos/exportDemoRD2Data.m
deleted file mode 100644
index 028353b..0000000
--- a/demos/exportDemoRD2Data.m
+++ /dev/null
@@ -1,35 +0,0 @@
-clear all
-close all
-%%
-% % adding paths
-addpath('../data/');
-addpath('../main_func/'); addpath('../main_func/regularizers_CPU/');
-addpath('../supp/');
-
-load('DendrRawData.mat') % load raw data of 3D dendritic set
-angles_rad = angles*(pi/180); % conversion to radians
-size_det = size(data_raw3D,1); % detectors dim
-angSize = size(data_raw3D, 2); % angles dim
-slices_tot = size(data_raw3D, 3); % no of slices
-recon_size = 950; % reconstruction size
-
-Sino3D = zeros(size_det, angSize, slices_tot, 'single'); % log-corrected sino
-% normalizing the data
-for  jj = 1:slices_tot
-    sino = data_raw3D(:,:,jj);
-    for ii = 1:angSize
-        Sino3D(:,ii,jj) = log((flats_ar(:,jj)-darks_ar(:,jj))./(single(sino(:,ii)) - darks_ar(:,jj)));
-    end
-end
-
-Sino3D = Sino3D.*1000;
-Weights3D = single(data_raw3D); % weights for PW model
-clear data_raw3D
-
-hdf5write('DendrData.h5', '/Weights3D', Weights3D)
-hdf5write('DendrData.h5', '/Sino3D', Sino3D, 'WriteMode', 'append')
-hdf5write('DendrData.h5', '/angles_rad', angles_rad,  'WriteMode', 'append')
-hdf5write('DendrData.h5', '/size_det', size_det,  'WriteMode', 'append')
-hdf5write('DendrData.h5', '/angSize', angSize,  'WriteMode', 'append')
-hdf5write('DendrData.h5', '/slices_tot', slices_tot,  'WriteMode', 'append')
-hdf5write('DendrData.h5', '/recon_size', recon_size,  'WriteMode', 'append')
\ No newline at end of file
diff --git a/main_func/FISTA_REC.m b/main_func/FISTA_REC.m
deleted file mode 100644
index d717a03..0000000
--- a/main_func/FISTA_REC.m
+++ /dev/null
@@ -1,704 +0,0 @@
-function [X,  output] = FISTA_REC(params)
-
-% <<<< FISTA-based reconstruction routine using ASTRA-toolbox >>>>
-% This code solves regularised PWLS problem using FISTA approach.
-% The code contains multiple regularisation penalties as well as it can be
-% accelerated by using ordered-subset version. Various projection
-% geometries supported.
-
-% DISCLAIMER
-% It is recommended to use ASTRA version 1.8 or later in order to avoid
-% crashing due to GPU memory overflow for big datasets
-
-% ___Input___:
-% params.[] file:
-%----------------General Parameters------------------------
-%       - .proj_geom (geometry of the projector) [required]
-%       - .vol_geom (geometry of the reconstructed object) [required]
-%       - .sino (2D or 3D sinogram) [required]
-%       - .iterFISTA (iterations for the main loop, default 40)
-%       - .L_const (Lipschitz constant, default Power method)                                                                                                    )
-%       - .X_ideal (ideal image, if given)
-%       - .weights (statisitcal weights for the PWLS model, size of the sinogram)
-%       - .fidelity (use 'studentt' fidelity)
-%       - .ROI (Region-of-interest, only if X_ideal is given)
-%       - .initialize (a 'warm start' using SIRT method from ASTRA)
-%----------------Regularization choices------------------------
-%       1 .Regul_Lambda_FGPTV (FGP-TV regularization parameter)
-%       2 .Regul_Lambda_SBTV (SplitBregman-TV regularization parameter)
-%       3 .Regul_LambdaLLT (Higher order LLT regularization parameter)
-%          3.1 .Regul_tauLLT (time step parameter for LLT (HO) term)
-%       4 .Regul_LambdaPatchBased_CPU (Patch-based nonlocal regularization parameter)
-%          4.1  .Regul_PB_SearchW (ratio of the searching window (e.g. 3 = (2*3+1) = 7 pixels window))
-%          4.2  .Regul_PB_SimilW (ratio of the similarity window (e.g. 1 = (2*1+1) = 3 pixels window))
-%          4.3  .Regul_PB_h (PB penalty function threshold)
-%       5 .Regul_LambdaPatchBased_GPU (Patch-based nonlocal regularization parameter)
-%          5.1  .Regul_PB_SearchW (ratio of the searching window (e.g. 3 = (2*3+1) = 7 pixels window))
-%          5.2  .Regul_PB_SimilW (ratio of the similarity window (e.g. 1 = (2*1+1) = 3 pixels window))
-%          5.3  .Regul_PB_h (PB penalty function threshold)
-%       6 .Regul_LambdaDiffHO (Higher-Order Diffusion regularization parameter)
-%          6.1  .Regul_DiffHO_EdgePar (edge-preserving noise related parameter)
-%       7 .Regul_LambdaTGV (Total Generalized variation regularization parameter)
-%       - .Regul_tol (tolerance to terminate regul iterations, default 1.0e-04)
-%       - .Regul_Iterations (iterations for the selected penalty, default 25)
-%       - .Regul_Dimension ('2D' or '3D' way to apply regularization, '3D' is the default)
-%----------------Ring removal------------------------
-%       - .Ring_LambdaR_L1 (regularization parameter for L1-ring minimization, if lambdaR_L1 > 0 then switch on ring removal)
-%       - .Ring_Alpha (larger values can accelerate convergence but check stability, default 1)
-%----------------Visualization parameters------------------------
-%       - .show (visualize reconstruction 1/0, (0 default))
-%       - .maxvalplot (maximum value to use for imshow[0 maxvalplot])
-%       - .slice (for 3D volumes - slice number to imshow)
-% ___Output___:
-% 1. X - reconstructed image/volume
-% 2. output - a structure with
-%    - .Resid_error - residual error (if X_ideal is given)
-%    - .objective: value of the objective function
-%    - .L_const: Lipshitz constant to avoid recalculations
-
-% References:
-% 1. "A Fast Iterative Shrinkage-Thresholding Algorithm for Linear Inverse
-% Problems" by A. Beck and M Teboulle
-% 2. "Ring artifacts correction in compressed sensing..." by P. Paleo
-% 3. "A novel tomographic reconstruction method based on the robust
-% Student's t function for suppressing data outliers" D. Kazantsev et.al.
-% D. Kazantsev, 2016-17
-
-% Dealing with input parameters
-if (isfield(params,'proj_geom') == 0)
-    error('%s \n', 'Please provide ASTRA projection geometry - proj_geom');
-else
-    proj_geom = params.proj_geom;
-end
-if (isfield(params,'vol_geom') == 0)
-    error('%s \n', 'Please provide ASTRA object geometry - vol_geom');
-else
-    vol_geom = params.vol_geom;
-end
-N = params.vol_geom.GridColCount;
-if (isfield(params,'sino'))
-    sino = params.sino;
-    [Detectors, anglesNumb, SlicesZ] = size(sino);
-    fprintf('%s %i %s %i %s %i %s \n', 'Sinogram has a dimension of', Detectors, 'detectors;', anglesNumb, 'projections;', SlicesZ, 'vertical slices.');
-else
-    error('%s \n', 'Please provide a sinogram');
-end
-if (isfield(params,'iterFISTA'))
-    iterFISTA = params.iterFISTA;
-else
-    iterFISTA = 40;
-end
-if (isfield(params,'weights'))
-    weights = params.weights;
-else
-    weights = ones(size(sino));
-end
-if (isfield(params,'fidelity'))
-    studentt = 0;
-    if (strcmp(params.fidelity,'studentt') == 1)
-        studentt = 1;
-    end
-else
-    studentt = 0;
-end
-if (isfield(params,'L_const'))
-    L_const = params.L_const;
-else
-    % using Power method (PM) to establish L constant
-    fprintf('%s %s %s \n', 'Calculating Lipshitz constant for',proj_geom.type, 'beam geometry...');
-    if (strcmp(proj_geom.type,'parallel') || strcmp(proj_geom.type,'fanflat') || strcmp(proj_geom.type,'fanflat_vec'))
-        % for 2D geometry we can do just one selected slice
-        niter = 15; % number of iteration for the PM
-        x1 = rand(N,N,1);
-        sqweight = sqrt(weights(:,:,1));
-        [sino_id, y] = astra_create_sino_cuda(x1, proj_geom, vol_geom);
-        y = sqweight.*y';
-        astra_mex_data2d('delete', sino_id);
-        for i = 1:niter
-            [x1] = astra_create_backprojection_cuda((sqweight.*y)', proj_geom, vol_geom);
-            s = norm(x1(:));
-            x1 = x1./s;
-            [sino_id, y] = astra_create_sino_cuda(x1, proj_geom, vol_geom);
-            y = sqweight.*y';
-            astra_mex_data2d('delete', sino_id);
-        end
-    elseif (strcmp(proj_geom.type,'cone') || strcmp(proj_geom.type,'parallel3d') || strcmp(proj_geom.type,'parallel3d_vec') || strcmp(proj_geom.type,'cone_vec'))
-        % 3D geometry
-        niter = 8; % number of iteration for PM
-        x1 = rand(N,N,SlicesZ);
-        sqweight = sqrt(weights);
-        [sino_id, y] = astra_create_sino3d_cuda(x1, proj_geom, vol_geom);
-        y = sqweight.*y;
-        astra_mex_data3d('delete', sino_id);
-        
-        for i = 1:niter
-            [id,x1] = astra_create_backprojection3d_cuda(sqweight.*y, proj_geom, vol_geom);
-            s = norm(x1(:));
-            x1 = x1/s;
-            [sino_id, y] = astra_create_sino3d_cuda(x1, proj_geom, vol_geom);
-            y = sqweight.*y;
-            astra_mex_data3d('delete', sino_id);
-            astra_mex_data3d('delete', id);
-        end
-        clear x1
-    else
-        error('%s \n', 'No suitable geometry has been found!');
-    end
-    L_const = s;
-end
-if (isfield(params,'X_ideal'))
-    X_ideal = params.X_ideal;
-else
-    X_ideal = 'none';
-end
-if (isfield(params,'ROI'))
-    ROI = params.ROI;
-else
-    ROI = find(X_ideal>=0.0);
-end
-if (isfield(params,'Regul_Lambda_FGPTV'))
-    lambdaFGP_TV = params.Regul_Lambda_FGPTV;
-else
-    lambdaFGP_TV = 0;
-end
-if (isfield(params,'Regul_Lambda_SBTV'))
-    lambdaSB_TV = params.Regul_Lambda_SBTV;
-else
-    lambdaSB_TV = 0;
-end
-if (isfield(params,'Regul_tol'))
-    tol = params.Regul_tol;
-else
-    tol = 1.0e-05;
-end
-if (isfield(params,'Regul_Iterations'))
-    IterationsRegul = params.Regul_Iterations;
-else
-    IterationsRegul = 45;
-end
-if (isfield(params,'Regul_LambdaLLT'))
-    lambdaHO = params.Regul_LambdaLLT;
-else
-    lambdaHO = 0;
-end
-if (isfield(params,'Regul_iterHO'))
-    iterHO = params.Regul_iterHO;
-else
-    iterHO = 50;
-end
-if (isfield(params,'Regul_tauLLT'))
-    tauHO = params.Regul_tauLLT;
-else
-    tauHO = 0.0001;
-end
-if (isfield(params,'Regul_LambdaPatchBased_CPU'))
-    lambdaPB = params.Regul_LambdaPatchBased_CPU;
-else
-    lambdaPB = 0;
-end
-if (isfield(params,'Regul_LambdaPatchBased_GPU'))
-    lambdaPB_GPU = params.Regul_LambdaPatchBased_GPU;
-else
-    lambdaPB_GPU = 0;
-end
-if (isfield(params,'Regul_PB_SearchW'))
-    SearchW = params.Regul_PB_SearchW;
-else
-    SearchW = 3; % default
-end
-if (isfield(params,'Regul_PB_SimilW'))
-    SimilW = params.Regul_PB_SimilW;
-else
-    SimilW = 1; % default
-end
-if (isfield(params,'Regul_PB_h'))
-    h_PB = params.Regul_PB_h;
-else
-    h_PB = 0.1; % default
-end
-if (isfield(params,'Regul_LambdaDiffHO'))
-    LambdaDiff_HO = params.Regul_LambdaDiffHO;
-else
-    LambdaDiff_HO = 0;
-end
-if (isfield(params,'Regul_DiffHO_EdgePar'))
-    LambdaDiff_HO_EdgePar = params.Regul_DiffHO_EdgePar;
-else
-    LambdaDiff_HO_EdgePar = 0.01;
-end
-if (isfield(params,'Regul_LambdaTGV'))
-    LambdaTGV = params.Regul_LambdaTGV;
-else
-    LambdaTGV = 0;
-end
-if (isfield(params,'Ring_LambdaR_L1'))
-    lambdaR_L1 = params.Ring_LambdaR_L1;
-else
-    lambdaR_L1 = 0;
-end
-if (isfield(params,'Ring_Alpha'))
-    alpha_ring = params.Ring_Alpha; % higher values can accelerate ring removal procedure
-else
-    alpha_ring = 1;
-end
-if (isfield(params,'Regul_Dimension'))
-    Dimension = params.Regul_Dimension;
-    if ((strcmp('2D', Dimension) ~= 1) && (strcmp('3D', Dimension) ~= 1))
-        Dimension = '3D';
-    end
-else
-    Dimension = '3D';
-end
-if (isfield(params,'show'))
-    show = params.show;
-else
-    show = 0;
-end
-if (isfield(params,'maxvalplot'))
-    maxvalplot = params.maxvalplot;
-else
-    maxvalplot = 1;
-end
-if (isfield(params,'slice'))
-    slice = params.slice;
-else
-    slice = 1;
-end
-if (isfield(params,'initialize'))
-    X  = params.initialize;
-    if ((size(X,1) ~= N) || (size(X,2) ~= N) || (size(X,3) ~= SlicesZ))
-        error('%s \n', 'The initialized volume has different dimensions!');
-    end
-else
-    X = zeros(N,N,SlicesZ, 'single'); % storage for the solution
-end
-if (isfield(params,'subsets'))
-    % Ordered Subsets reorganisation of data and angles
-    subsets = params.subsets; % subsets number
-    angles = proj_geom.ProjectionAngles;
-    binEdges = linspace(min(angles),max(angles),subsets+1);
-    
-    % assign values to bins
-    [binsDiscr,~] = histc(angles, [binEdges(1:end-1) Inf]);
-    
-    % get rearranged subset indices
-    IndicesReorg = zeros(length(angles),1);
-    counterM = 0;
-    for ii = 1:max(binsDiscr(:))
-        counter = 0;
-        for jj = 1:subsets
-            curr_index = ii+jj-1 + counter;
-            if (binsDiscr(jj) >= ii)
-                counterM = counterM + 1;
-                IndicesReorg(counterM) = curr_index;
-            end
-            counter = (counter + binsDiscr(jj)) - 1;
-        end
-    end
-else
-    subsets = 0; % Classical FISTA
-end
-
-%----------------Reconstruction part------------------------
-Resid_error = zeros(iterFISTA,1); % errors vector (if the ground truth is given)
-objective = zeros(iterFISTA,1); % objective function values vector
-
-
-if (subsets == 0)
-    % Classical FISTA
-    t = 1;
-    X_t = X;
-    
-    r = zeros(Detectors,SlicesZ, 'single'); % 2D array (for 3D data) of sparse "ring" vectors
-    r_x = r; % another ring variable
-    residual = zeros(size(sino),'single');
-    
-    % Outer FISTA iterations loop
-    for i = 1:iterFISTA
-        
-        X_old = X;
-        t_old = t;
-        r_old = r;
-        
-        
-        if (strcmp(proj_geom.type,'parallel') || strcmp(proj_geom.type,'fanflat') || strcmp(proj_geom.type,'fanflat_vec'))
-            % if geometry is 2D use slice-by-slice projection-backprojection routine
-            sino_updt = zeros(size(sino),'single');
-            for kkk = 1:SlicesZ
-                [sino_id, sinoT] = astra_create_sino_cuda(X_t(:,:,kkk), proj_geom, vol_geom);
-                sino_updt(:,:,kkk) = sinoT';
-                astra_mex_data2d('delete', sino_id);
-            end
-        else
-            % for 3D geometry (watch the GPU memory overflow in earlier ASTRA versions < 1.8)
-            [sino_id, sino_updt] = astra_create_sino3d_cuda(X_t, proj_geom, vol_geom);
-            astra_mex_data3d('delete', sino_id);
-        end
-        
-        if (lambdaR_L1 > 0)
-            % the ring removal part (Group-Huber fidelity)
-            for kkk = 1:anglesNumb
-                residual(:,kkk,:) =  squeeze(weights(:,kkk,:)).*(squeeze(sino_updt(:,kkk,:)) - (squeeze(sino(:,kkk,:)) - alpha_ring.*r_x));
-            end
-            vec = sum(residual,2);
-            if (SlicesZ > 1)
-                vec = squeeze(vec(:,1,:));
-            end
-            r = r_x - (1./L_const).*vec;
-            objective(i) = (0.5*sum(residual(:).^2)); % for the objective function output
-        elseif (studentt > 0)
-            % artifacts removal with Students t penalty
-            residual = weights.*(sino_updt - sino);
-            for kkk = 1:SlicesZ
-                res_vec = reshape(residual(:,:,kkk), Detectors*anglesNumb, 1); % 1D vectorized sinogram
-                %s = 100;
-                %gr = (2)*res_vec./(s*2 + conj(res_vec).*res_vec);
-                [ff, gr] = studentst(res_vec, 1);
-                residual(:,:,kkk) = reshape(gr, Detectors, anglesNumb);
-            end
-            objective(i) = ff; % for the objective function output
-        else
-            % no ring removal (LS model)
-            residual = weights.*(sino_updt - sino);
-            objective(i) = 0.5*norm(residual(:)); % for the objective function output
-        end
-        
-        % if the geometry is 2D use slice-by-slice projection-backprojection routine
-        if (strcmp(proj_geom.type,'parallel') || strcmp(proj_geom.type,'fanflat') || strcmp(proj_geom.type,'fanflat_vec'))
-            x_temp = zeros(size(X),'single');
-            for kkk = 1:SlicesZ
-                [x_temp(:,:,kkk)] = astra_create_backprojection_cuda(squeeze(residual(:,:,kkk))', proj_geom, vol_geom);
-            end
-        else
-            [id, x_temp] = astra_create_backprojection3d_cuda(residual, proj_geom, vol_geom);
-            astra_mex_data3d('delete', id);
-        end
-        X = X_t - (1/L_const).*x_temp;
-        
-        % ----------------Regularization part------------------------%
-        if (lambdaFGP_TV > 0)
-            % FGP-TV regularization
-            if ((strcmp('2D', Dimension) == 1))
-                % 2D regularization
-                for kkk = 1:SlicesZ
-                    [X(:,:,kkk), f_val] = FGP_TV(single(X(:,:,kkk)), lambdaFGP_TV/L_const, IterationsRegul, tol, 'iso');
-                end
-            else
-                % 3D regularization
-                [X, f_val] = FGP_TV(single(X), lambdaFGP_TV/L_const, IterationsRegul, tol, 'iso');
-            end
-            objective(i) = (objective(i) + f_val)./(Detectors*anglesNumb*SlicesZ);
-        end
-        if (lambdaSB_TV > 0)
-            % Split Bregman regularization
-            if ((strcmp('2D', Dimension) == 1))
-                % 2D regularization
-                for kkk = 1:SlicesZ
-                    X(:,:,kkk) = SplitBregman_TV(single(X(:,:,kkk)), lambdaSB_TV/L_const, IterationsRegul, tol);  % (more memory efficent)
-                end
-            else
-                % 3D regularization
-                X = SplitBregman_TV(single(X), lambdaSB_TV/L_const, IterationsRegul, tol);  % (more memory efficent)
-            end
-        end
-        if (lambdaHO > 0)
-            % Higher Order (LLT) regularization
-            X2 = zeros(N,N,SlicesZ,'single');
-            if ((strcmp('2D', Dimension) == 1))
-                % 2D regularization
-                for kkk = 1:SlicesZ
-                    X2(:,:,kkk) = LLT_model(single(X(:,:,kkk)), lambdaHO/L_const, tauHO, iterHO, 3.0e-05, 0);
-                end
-            else
-                % 3D regularization
-                X2 = LLT_model(single(X), lambdaHO/L_const, tauHO, iterHO, 3.0e-05, 0);
-            end
-            X = 0.5.*(X + X2); % averaged combination of two solutions
-            
-        end
-        if (lambdaPB > 0)
-            % Patch-Based regularization (can be very slow on CPU)
-            if ((strcmp('2D', Dimension) == 1))
-                % 2D regularization
-                for kkk = 1:SlicesZ
-                    X(:,:,kkk) = PatchBased_Regul(single(X(:,:,kkk)), SearchW, SimilW, h_PB, lambdaPB/L_const);
-                end
-            else
-                X = PatchBased_Regul(single(X), SearchW, SimilW, h_PB, lambdaPB/L_const);
-            end
-        end
-        if (lambdaPB_GPU > 0)
-            % Patch-Based regularization (GPU CUDA implementation)
-            if ((strcmp('2D', Dimension) == 1))
-                % 2D regularization
-                for kkk = 1:SlicesZ
-                    X(:,:,kkk) = NLM_GPU(single(X(:,:,kkk)), SearchW, SimilW, h_PB, lambdaPB_GPU/L_const);
-                end
-            else
-                X = NLM_GPU(single(X), SearchW, SimilW, h_PB, lambdaPB_GPU/L_const);
-            end
-        end
-        if (LambdaDiff_HO > 0)
-            % Higher-order diffusion penalty (GPU CUDA implementation)
-            if ((strcmp('2D', Dimension) == 1))
-                % 2D regularization
-                for kkk = 1:SlicesZ
-                    X(:,:,kkk) = Diff4thHajiaboli_GPU(single(X(:,:,kkk)), LambdaDiff_HO_EdgePar, LambdaDiff_HO/L_const, IterationsRegul);
-                end
-            else
-                X = Diff4thHajiaboli_GPU(X, LambdaDiff_HO_EdgePar, LambdaDiff_HO/L_const, IterationsRegul);
-            end
-        end
-        if (LambdaTGV > 0)
-            % Total Generalized variation (currently only 2D)
-            lamTGV1 = 1.1; % smoothing trade-off parameters, see Pock's paper
-            lamTGV2 = 0.8; % second-order term
-            for kkk = 1:SlicesZ
-                X(:,:,kkk) = TGV_PD(single(X(:,:,kkk)), LambdaTGV/L_const, lamTGV1, lamTGV2, IterationsRegul);
-            end
-        end
-        
-        if (lambdaR_L1 > 0)
-            r =  max(abs(r)-lambdaR_L1, 0).*sign(r); % soft-thresholding operator for ring vector
-        end
-        
-        t = (1 + sqrt(1 + 4*t^2))/2; % updating t
-        X_t = X + ((t_old-1)/t).*(X - X_old); % updating X
-        
-        if (lambdaR_L1 > 0)
-            r_x = r + ((t_old-1)/t).*(r - r_old); % updating r
-        end
-        
-        if (show == 1)
-            figure(10); imshow(X(:,:,slice), [0 maxvalplot]);
-            if (lambdaR_L1 > 0)
-                figure(11); plot(r); title('Rings offset vector')
-            end
-            pause(0.01);
-        end
-        if (strcmp(X_ideal, 'none' ) == 0)
-            Resid_error(i) = RMSE(X(ROI), X_ideal(ROI));
-            fprintf('%s %i %s %s %.4f  %s %s %f \n', 'Iteration Number:', i, '|', 'Error RMSE:', Resid_error(i), '|', 'Objective:', objective(i));
-        else
-            fprintf('%s %i  %s %s %f \n', 'Iteration Number:', i, '|', 'Objective:', objective(i));
-        end
-    end
-else
-    % Ordered Subsets (OS) FISTA reconstruction routine (normally one order of magnitude faster than the classical version)
-    t = 1;
-    X_t = X;
-    proj_geomSUB = proj_geom;
-    
-    r = zeros(Detectors,SlicesZ, 'single'); % 2D array (for 3D data) of sparse "ring" vectors
-    r_x = r; % another ring variable
-    residual2 = zeros(size(sino),'single');
-    sino_updt_FULL = zeros(size(sino),'single');
-    
-    
-    % Outer FISTA iterations loop
-    for i = 1:iterFISTA
-        
-        if ((i > 1) && (lambdaR_L1 > 0))
-            % in order to make Group-Huber fidelity work with ordered subsets
-            % we still need to work with full sinogram
-            
-            % the offset variable must be calculated for the whole
-            % updated sinogram - sino_updt_FULL
-            for kkk = 1:anglesNumb
-                residual2(:,kkk,:) = squeeze(weights(:,kkk,:)).*(squeeze(sino_updt_FULL(:,kkk,:)) - (squeeze(sino(:,kkk,:)) - alpha_ring.*r_x));
-            end
-            
-            r_old = r;
-            vec = sum(residual2,2);
-            if (SlicesZ > 1)
-                vec = squeeze(vec(:,1,:));
-            end
-            r = r_x - (1./L_const).*vec; % update ring variable
-        end
-                
-       % subsets loop
-        counterInd = 1;        
-        for ss = 1:subsets
-            X_old = X;
-            t_old = t;
-            
-            numProjSub = binsDiscr(ss); % the number of projections per subset
-            sino_updt_Sub = zeros(Detectors, numProjSub, SlicesZ,'single');
-            CurrSubIndeces = IndicesReorg(counterInd:(counterInd + numProjSub - 1)); % extract indeces attached to the subset
-            proj_geomSUB.ProjectionAngles = angles(CurrSubIndeces);
-            
-            if (strcmp(proj_geom.type,'parallel') || strcmp(proj_geom.type,'fanflat') || strcmp(proj_geom.type,'fanflat_vec'))
-                % if geometry is 2D use slice-by-slice projection-backprojection routine
-                for kkk = 1:SlicesZ
-                    [sino_id, sinoT] = astra_create_sino_cuda(X_t(:,:,kkk), proj_geomSUB, vol_geom);
-                    sino_updt_Sub(:,:,kkk) = sinoT';
-                    astra_mex_data2d('delete', sino_id);
-                end
-            else
-                % for 3D geometry (watch the GPU memory overflow in earlier ASTRA versions < 1.8)
-                [sino_id, sino_updt_Sub] = astra_create_sino3d_cuda(X_t, proj_geomSUB, vol_geom);
-                astra_mex_data3d('delete', sino_id);
-            end
-            
-            if (lambdaR_L1 > 0)
-                % Group-Huber fidelity (ring removal)
-                residualSub = zeros(Detectors, numProjSub, SlicesZ,'single'); % residual for a chosen subset
-                for kkk = 1:numProjSub
-                    indC = CurrSubIndeces(kkk);
-                    residualSub(:,kkk,:) =  squeeze(weights(:,indC,:)).*(squeeze(sino_updt_Sub(:,kkk,:)) - (squeeze(sino(:,indC,:)) - alpha_ring.*r_x));
-                    sino_updt_FULL(:,indC,:) = squeeze(sino_updt_Sub(:,kkk,:)); % filling the full sinogram
-                end
-                
-            elseif (studentt > 0)
-                % student t data fidelity
-                
-                % artifacts removal with Students t penalty
-                residualSub = squeeze(weights(:,CurrSubIndeces,:)).*(sino_updt_Sub - squeeze(sino(:,CurrSubIndeces,:)));
-                
-                for kkk = 1:SlicesZ
-                    res_vec = reshape(residualSub(:,:,kkk), Detectors*numProjSub, 1); % 1D vectorized sinogram
-                    %s = 100;
-                    %gr = (2)*res_vec./(s*2 + conj(res_vec).*res_vec);
-                    [ff, gr] = studentst(res_vec, 1);
-                    residualSub(:,:,kkk) = reshape(gr, Detectors, numProjSub);
-                end
-                objective(i) = ff; % for the objective function output
-            else
-                % PWLS model                
-                residualSub = squeeze(weights(:,CurrSubIndeces,:)).*(sino_updt_Sub - squeeze(sino(:,CurrSubIndeces,:)));
-                objective(i) = 0.5*norm(residualSub(:)); % for the objective function output
-            end
-            
-            % perform backprojection of a subset
-            if (strcmp(proj_geom.type,'parallel') || strcmp(proj_geom.type,'fanflat') || strcmp(proj_geom.type,'fanflat_vec'))
-                % if geometry is 2D use slice-by-slice projection-backprojection routine
-                x_temp = zeros(size(X),'single');
-                for kkk = 1:SlicesZ
-                    [x_temp(:,:,kkk)] = astra_create_backprojection_cuda(squeeze(residualSub(:,:,kkk))', proj_geomSUB, vol_geom);
-                end
-            else
-                [id, x_temp] = astra_create_backprojection3d_cuda(residualSub, proj_geomSUB, vol_geom);
-                astra_mex_data3d('delete', id);
-            end
-            
-            X = X_t - (1/L_const).*x_temp;
-            
-            % ----------------Regularization part------------------------%
-            if (lambdaFGP_TV > 0)
-                % FGP-TV regularization
-                if ((strcmp('2D', Dimension) == 1))
-                    % 2D regularization
-                    for kkk = 1:SlicesZ
-                        [X(:,:,kkk), f_val] = FGP_TV(single(X(:,:,kkk)), lambdaFGP_TV/(subsets*L_const), IterationsRegul, tol, 'iso');
-                    end
-                else
-                    % 3D regularization
-                    [X, f_val] = FGP_TV(single(X), lambdaFGP_TV/(subsets*L_const), IterationsRegul, tol, 'iso');
-                end
-                objective(i) = objective(i) + f_val;
-            end
-            if (lambdaSB_TV > 0)
-                % Split Bregman regularization
-                if ((strcmp('2D', Dimension) == 1))
-                    % 2D regularization
-                    for kkk = 1:SlicesZ
-                        X(:,:,kkk) = SplitBregman_TV(single(X(:,:,kkk)), lambdaSB_TV/(subsets*L_const), IterationsRegul, tol);  % (more memory efficent)
-                    end
-                else
-                    % 3D regularization
-                    X = SplitBregman_TV(single(X), lambdaSB_TV/(subsets*L_const), IterationsRegul, tol);  % (more memory efficent)
-                end
-            end
-            if (lambdaHO > 0)
-                % Higher Order (LLT) regularization
-                X2 = zeros(N,N,SlicesZ,'single');
-                if ((strcmp('2D', Dimension) == 1))
-                    % 2D regularization
-                    for kkk = 1:SlicesZ
-                        X2(:,:,kkk) = LLT_model(single(X(:,:,kkk)), lambdaHO/(subsets*L_const), tauHO/subsets, iterHO, 2.0e-05, 0);
-                    end
-                else
-                    % 3D regularization
-                    X2 = LLT_model(single(X), lambdaHO/(subsets*L_const), tauHO/subsets, iterHO, 2.0e-05, 0);
-                end
-                X = 0.5.*(X + X2); % the averaged combination of two solutions
-            end
-            if (lambdaPB > 0)
-                % Patch-Based regularization (can be slow on CPU)
-                if ((strcmp('2D', Dimension) == 1))
-                    % 2D regularization
-                    for kkk = 1:SlicesZ
-                        X(:,:,kkk) = PatchBased_Regul(single(X(:,:,kkk)), SearchW, SimilW, h_PB, lambdaPB/(subsets*L_const));
-                    end
-                else
-                    X = PatchBased_Regul(single(X), SearchW, SimilW, h_PB, lambdaPB/(subsets*L_const));
-                end
-            end
-            if (lambdaPB_GPU > 0)
-                % Patch-Based regularization (GPU CUDA implementation)
-                if ((strcmp('2D', Dimension) == 1))
-                    % 2D regularization
-                    for kkk = 1:SlicesZ
-                        X(:,:,kkk) = NLM_GPU(single(X(:,:,kkk)), SearchW, SimilW, h_PB, lambdaPB_GPU/(subsets*L_const));
-                    end
-                else
-                    X = NLM_GPU(single(X), SearchW, SimilW, h_PB, lambdaPB_GPU/(subsets*L_const));
-                end
-            end
-            if (LambdaDiff_HO > 0)
-                % Higher-order diffusion penalty (GPU CUDA implementation)
-                if ((strcmp('2D', Dimension) == 1))
-                    % 2D regularization
-                    for kkk = 1:SlicesZ
-                        X(:,:,kkk) = Diff4thHajiaboli_GPU(single(X(:,:,kkk)), LambdaDiff_HO_EdgePar, LambdaDiff_HO/(subsets*L_const), round(IterationsRegul/subsets));
-                    end
-                else
-                    X = Diff4thHajiaboli_GPU(X, LambdaDiff_HO_EdgePar, LambdaDiff_HO/(subsets*L_const), round(IterationsRegul/subsets));
-                end
-            end
-            if (LambdaTGV > 0)
-                % Total Generalized variation (currently only 2D)
-                lamTGV1 = 1.1; % smoothing trade-off parameters, see Pock's paper
-                lamTGV2 = 0.5; % second-order term
-                for kkk = 1:SlicesZ
-                    X(:,:,kkk) = TGV_PD(single(X(:,:,kkk)), LambdaTGV/(subsets*L_const), lamTGV1, lamTGV2, IterationsRegul);
-                end
-            end
-            
-            t = (1 + sqrt(1 + 4*t^2))/2; % updating t
-            X_t = X + ((t_old-1)/t).*(X - X_old); % updating X
-            counterInd = counterInd + numProjSub;
-        end
-        
-        if (i == 1)
-            r_old = r;
-        end
-        
-        % working with a 'ring vector'
-        if (lambdaR_L1 > 0)
-            r =  max(abs(r)-lambdaR_L1, 0).*sign(r); % soft-thresholding operator for ring vector
-            r_x = r + ((t_old-1)/t).*(r - r_old); % updating r
-        end
-        
-        if (show == 1)
-            figure(10); imshow(X(:,:,slice), [0 maxvalplot]);
-            if (lambdaR_L1 > 0)
-                figure(11); plot(r); title('Rings offset vector')
-            end
-            pause(0.01);
-        end
-        
-        if (strcmp(X_ideal, 'none' ) == 0)
-            Resid_error(i) = RMSE(X(ROI), X_ideal(ROI));
-            fprintf('%s %i %s %s %.4f  %s %s %f \n', 'Iteration Number:', i, '|', 'Error RMSE:', Resid_error(i), '|', 'Objective:', objective(i));
-        else
-            fprintf('%s %i  %s %s %f \n', 'Iteration Number:', i, '|', 'Objective:', objective(i));
-        end
-    end
-end
-
-output.Resid_error = Resid_error;
-output.objective = objective;
-output.L_const = L_const;
-
-end
diff --git a/main_func/compile_mex.m b/main_func/compile_mex.m
deleted file mode 100644
index 1353859..0000000
--- a/main_func/compile_mex.m
+++ /dev/null
@@ -1,11 +0,0 @@
-% compile mex's in Matlab once
-cd regularizers_CPU/
-
-mex LLT_model.c LLT_model_core.c utils.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
-mex FGP_TV.c FGP_TV_core.c utils.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
-mex SplitBregman_TV.c SplitBregman_TV_core.c utils.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
-mex TGV_PD.c TGV_PD_core.c utils.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
-mex PatchBased_Regul.c PatchBased_Regul_core.c utils.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
-
-cd ../../
-cd demos
diff --git a/main_func/regularizers_CPU/FGP_TV.c b/main_func/regularizers_CPU/FGP_TV.c
deleted file mode 100644
index 30cea1a..0000000
--- a/main_func/regularizers_CPU/FGP_TV.c
+++ /dev/null
@@ -1,216 +0,0 @@
-/*
-This work is part of the Core Imaging Library developed by
-Visual Analytics and Imaging System Group of the Science Technology
-Facilities Council, STFC
-
-Copyright 2017 Daniil Kazantsev
-Copyright 2017 Srikanth Nagella, Edoardo Pasca
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-#include "matrix.h"
-#include "mex.h"
-#include "FGP_TV_core.h"
-
-/* C-OMP implementation of FGP-TV [1] denoising/regularization model (2D/3D case)
- *
- * Input Parameters:
- * 1. Noisy image/volume [REQUIRED]
- * 2. lambda - regularization parameter [REQUIRED]
- * 3. Number of iterations [OPTIONAL parameter]
- * 4. eplsilon: tolerance constant [OPTIONAL parameter]
- * 5. TV-type: 'iso' or 'l1' [OPTIONAL parameter]
- *
- * Output:
- * [1] Filtered/regularized image
- * [2] last function value 
- *
- * Example of image denoising:
- * figure;
- * Im = double(imread('lena_gray_256.tif'))/255;  % loading image
- * u0 = Im + .05*randn(size(Im)); % adding noise
- * u = FGP_TV(single(u0), 0.05, 100, 1e-04);
- *
- * to compile with OMP support: mex FGP_TV.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
- * This function is based on the Matlab's code and paper by
- * [1] Amir Beck and Marc Teboulle, "Fast Gradient-Based Algorithms for Constrained Total Variation Image Denoising and Deblurring Problems"
- *
- * D. Kazantsev, 2016-17
- *
- */
-
-
-void mexFunction(
-        int nlhs, mxArray *plhs[],
-        int nrhs, const mxArray *prhs[])
-        
-{
-    int number_of_dims, iter, dimX, dimY, dimZ, ll, j, count, methTV;
-    const int  *dim_array;
-    float *A, *D=NULL, *D_old=NULL, *P1=NULL, *P2=NULL, *P3=NULL, *P1_old=NULL, *P2_old=NULL, *P3_old=NULL, *R1=NULL, *R2=NULL, *R3=NULL, lambda, tk, tkp1, re, re1, re_old, epsil;
-    
-    number_of_dims = mxGetNumberOfDimensions(prhs[0]);
-    dim_array = mxGetDimensions(prhs[0]);
-    
-    /*Handling Matlab input data*/
-    if ((nrhs < 2) || (nrhs > 5)) mexErrMsgTxt("At least 2 parameters is required: Image(2D/3D), Regularization parameter. The full list of parameters: Image(2D/3D), Regularization parameter, iterations number, tolerance, penalty type ('iso' or 'l1')");
-    
-    A  = (float *) mxGetData(prhs[0]); /*noisy image (2D/3D) */
-    lambda =  (float) mxGetScalar(prhs[1]); /* regularization parameter */
-    iter = 50; /* default iterations number */
-    epsil = 0.0001; /* default tolerance constant */
-    methTV = 0;  /* default isotropic TV penalty */
-    
-    if ((nrhs == 3) || (nrhs == 4) || (nrhs == 5))  iter = (int) mxGetScalar(prhs[2]); /* iterations number */
-    if ((nrhs == 4) || (nrhs == 5))  epsil =  (float) mxGetScalar(prhs[3]); /* tolerance constant */
-    if (nrhs == 5)  {
-        char *penalty_type;
-        penalty_type = mxArrayToString(prhs[4]); /* choosing TV penalty: 'iso' or 'l1', 'iso' is the default */
-        if ((strcmp(penalty_type, "l1") != 0) && (strcmp(penalty_type, "iso") != 0)) mexErrMsgTxt("Choose TV type: 'iso' or 'l1',");
-        if (strcmp(penalty_type, "l1") == 0)  methTV = 1;  /* enable 'l1' penalty */
-        mxFree(penalty_type);
-    }
-    /*output function value (last iteration) */
-    plhs[1] = mxCreateNumericMatrix(1, 1, mxSINGLE_CLASS, mxREAL);  
-    float *funcvalA = (float *) mxGetData(plhs[1]);
-        
-    if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); }
-    
-    /*Handling Matlab output data*/
-    dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2];
-    
-    tk = 1.0f;
-    tkp1=1.0f;
-    count = 0;
-    re_old = 0.0f;
-    
-    if (number_of_dims == 2) {
-        dimZ = 1; /*2D case*/
-        D = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-        D_old = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-        P1 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-        P2 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-        P1_old = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-        P2_old = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-        R1 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-        R2 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-        
-        /* begin iterations */
-        for(ll=0; ll<iter; ll++) {
-            
-            /* computing the gradient of the objective function */
-            Obj_func2D(A, D, R1, R2, lambda, dimX, dimY);
-            
-            /*Taking a step towards minus of the gradient*/
-            Grad_func2D(P1, P2, D, R1, R2, lambda, dimX, dimY);
-            
-            /* projection step */
-            Proj_func2D(P1, P2, methTV, dimX, dimY);
-            
-            /*updating R and t*/
-            tkp1 = (1.0f + sqrt(1.0f + 4.0f*tk*tk))*0.5f;
-            Rupd_func2D(P1, P1_old, P2, P2_old, R1, R2, tkp1, tk, dimX, dimY);                
-            
-            /* calculate norm */
-            re = 0.0f; re1 = 0.0f;
-            for(j=0; j<dimX*dimY*dimZ; j++)
-            {
-                re += pow(D[j] - D_old[j],2);
-                re1 += pow(D[j],2);
-            }
-            re = sqrt(re)/sqrt(re1);
-            if (re < epsil)  count++;
-            if (count > 4) {
-                Obj_func_CALC2D(A, D, funcvalA, lambda, dimX, dimY); 
-                break; }
-            
-            /* check that the residual norm is decreasing */
-            if (ll > 2) {
-                if (re > re_old) {
-                    Obj_func_CALC2D(A, D, funcvalA, lambda, dimX, dimY);                                   
-                    break; }}            
-            re_old = re;
-            /*printf("%f %i %i \n", re, ll, count); */                      
-          
-            /*storing old values*/
-            copyIm(D, D_old, dimX, dimY, dimZ);
-            copyIm(P1, P1_old, dimX, dimY, dimZ);
-            copyIm(P2, P2_old, dimX, dimY, dimZ);
-            tk = tkp1;
-            
-            /* calculating the objective function value */
-            if (ll == (iter-1)) Obj_func_CALC2D(A, D, funcvalA, lambda, dimX, dimY);            
-        }
-        printf("FGP-TV iterations stopped at iteration %i with the function value %f \n", ll, funcvalA[0]);
-    }
-    if (number_of_dims == 3) {
-        D = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-        D_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-        P1 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-        P2 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-        P3 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-        P1_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-        P2_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-        P3_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-        R1 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-        R2 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-        R3 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-        
-        /* begin iterations */
-        for(ll=0; ll<iter; ll++) {
-            
-            /* computing the gradient of the objective function */
-            Obj_func3D(A, D, R1, R2, R3,lambda, dimX, dimY, dimZ);
-            
-            /*Taking a step towards minus of the gradient*/
-            Grad_func3D(P1, P2, P3, D, R1, R2, R3, lambda, dimX, dimY, dimZ);
-            
-            /* projection step */
-            Proj_func3D(P1, P2, P3, dimX, dimY, dimZ);
-            
-            /*updating R and t*/
-            tkp1 = (1.0f + sqrt(1.0f + 4.0f*tk*tk))*0.5f;
-            Rupd_func3D(P1, P1_old, P2, P2_old, P3, P3_old, R1, R2, R3, tkp1, tk, dimX, dimY, dimZ);
-            
-            /* calculate norm - stopping rules*/
-            re = 0.0f; re1 = 0.0f;
-            for(j=0; j<dimX*dimY*dimZ; j++)
-            {
-                re += pow(D[j] - D_old[j],2);
-                re1 += pow(D[j],2);
-            }
-            re = sqrt(re)/sqrt(re1);
-            /* stop if the norm residual is less than the tolerance EPS */
-            if (re < epsil)  count++;
-            if (count > 3) {
-                Obj_func_CALC3D(A, D, funcvalA, lambda, dimX, dimY, dimZ);                            
-                break;}
-            
-            /* check that the residual norm is decreasing */
-            if (ll > 2) {
-                if (re > re_old) {
-                Obj_func_CALC3D(A, D, funcvalA, lambda, dimX, dimY, dimZ);
-                }}            
-            re_old = re;
-            /*printf("%f %i %i \n", re, ll, count); */
-            
-            /*storing old values*/
-            copyIm(D, D_old, dimX, dimY, dimZ);
-            copyIm(P1, P1_old, dimX, dimY, dimZ);
-            copyIm(P2, P2_old, dimX, dimY, dimZ);
-            copyIm(P3, P3_old, dimX, dimY, dimZ);
-            tk = tkp1;
-            
-            if (ll == (iter-1)) Obj_func_CALC3D(A, D, funcvalA, lambda, dimX, dimY, dimZ);            
-        }
-        printf("FGP-TV iterations stopped at iteration %i with the function value %f \n", ll, funcvalA[0]);
-    }
-}
diff --git a/main_func/regularizers_CPU/FGP_TV_core.c b/main_func/regularizers_CPU/FGP_TV_core.c
deleted file mode 100644
index 03cd445..0000000
--- a/main_func/regularizers_CPU/FGP_TV_core.c
+++ /dev/null
@@ -1,266 +0,0 @@
-/*
-This work is part of the Core Imaging Library developed by
-Visual Analytics and Imaging System Group of the Science Technology
-Facilities Council, STFC
-
-Copyright 2017 Daniil Kazantsev
-Copyright 2017 Srikanth Nagella, Edoardo Pasca
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
-#include "FGP_TV_core.h"
-
-/* C-OMP implementation of FGP-TV [1] denoising/regularization model (2D/3D case)
- *
- * Input Parameters:
- * 1. Noisy image/volume [REQUIRED]
- * 2. lambda - regularization parameter [REQUIRED]
- * 3. Number of iterations [OPTIONAL parameter]
- * 4. eplsilon: tolerance constant [OPTIONAL parameter]
- * 5. TV-type: 'iso' or 'l1' [OPTIONAL parameter]
- *
- * Output:
- * [1] Filtered/regularized image
- * [2] last function value 
- *
- * Example of image denoising:
- * figure;
- * Im = double(imread('lena_gray_256.tif'))/255;  % loading image
- * u0 = Im + .05*randn(size(Im)); % adding noise
- * u = FGP_TV(single(u0), 0.05, 100, 1e-04);
- *
- * This function is based on the Matlab's code and paper by
- * [1] Amir Beck and Marc Teboulle, "Fast Gradient-Based Algorithms for Constrained Total Variation Image Denoising and Deblurring Problems"
- *
- * D. Kazantsev, 2016-17
- *
- */
-
-/* 2D-case related Functions */
-/*****************************************************************/
-float Obj_func_CALC2D(float *A, float *D, float *funcvalA, float lambda, int dimX, int dimY)
-{   
-    int i,j;
-    float f1, f2, val1, val2;
-    
-    /*data-related term */
-    f1 = 0.0f;
-    for(i=0; i<dimX*dimY; i++) f1 += pow(D[i] - A[i],2);    
-    
-    /*TV-related term */
-    f2 = 0.0f;
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
-            /* boundary conditions  */
-            if (i == dimX-1) {val1 = 0.0f;} else {val1 = A[(i+1)*dimY + (j)] - A[(i)*dimY + (j)];}
-            if (j == dimY-1) {val2 = 0.0f;} else {val2 = A[(i)*dimY + (j+1)] - A[(i)*dimY + (j)];}    
-            f2 += sqrt(pow(val1,2) + pow(val2,2));
-        }}  
-    
-    /* sum of two terms */
-    funcvalA[0] = 0.5f*f1 + lambda*f2;     
-    return *funcvalA;
-}
-
-float Obj_func2D(float *A, float *D, float *R1, float *R2, float lambda, int dimX, int dimY)
-{
-	float val1, val2;
-	int i, j;
-#pragma omp parallel for shared(A,D,R1,R2) private(i,j,val1,val2)
-	for (i = 0; i<dimX; i++) {
-		for (j = 0; j<dimY; j++) {
-			/* boundary conditions  */
-			if (i == 0) { val1 = 0.0f; }
-			else { val1 = R1[(i - 1)*dimY + (j)]; }
-			if (j == 0) { val2 = 0.0f; }
-			else { val2 = R2[(i)*dimY + (j - 1)]; }
-			D[(i)*dimY + (j)] = A[(i)*dimY + (j)] - lambda*(R1[(i)*dimY + (j)] + R2[(i)*dimY + (j)] - val1 - val2);
-		}
-	}
-	return *D;
-}
-float Grad_func2D(float *P1, float *P2, float *D, float *R1, float *R2, float lambda, int dimX, int dimY)
-{
-	float val1, val2, multip;
-	int i, j;
-	multip = (1.0f / (8.0f*lambda));
-#pragma omp parallel for shared(P1,P2,D,R1,R2,multip) private(i,j,val1,val2)
-	for (i = 0; i<dimX; i++) {
-		for (j = 0; j<dimY; j++) {
-			/* boundary conditions */
-			if (i == dimX - 1) val1 = 0.0f; else val1 = D[(i)*dimY + (j)] - D[(i + 1)*dimY + (j)];
-			if (j == dimY - 1) val2 = 0.0f; else val2 = D[(i)*dimY + (j)] - D[(i)*dimY + (j + 1)];
-			P1[(i)*dimY + (j)] = R1[(i)*dimY + (j)] + multip*val1;
-			P2[(i)*dimY + (j)] = R2[(i)*dimY + (j)] + multip*val2;
-		}
-	}
-	return 1;
-}
-float Proj_func2D(float *P1, float *P2, int methTV, int dimX, int dimY)
-{
-	float val1, val2, denom;
-	int i, j;
-	if (methTV == 0) {
-		/* isotropic TV*/
-#pragma omp parallel for shared(P1,P2) private(i,j,denom)
-		for (i = 0; i<dimX; i++) {
-			for (j = 0; j<dimY; j++) {
-				denom = pow(P1[(i)*dimY + (j)], 2) + pow(P2[(i)*dimY + (j)], 2);
-				if (denom > 1) {
-					P1[(i)*dimY + (j)] = P1[(i)*dimY + (j)] / sqrt(denom);
-					P2[(i)*dimY + (j)] = P2[(i)*dimY + (j)] / sqrt(denom);
-				}
-			}
-		}
-	}
-	else {
-		/* anisotropic TV*/
-#pragma omp parallel for shared(P1,P2) private(i,j,val1,val2)
-		for (i = 0; i<dimX; i++) {
-			for (j = 0; j<dimY; j++) {
-				val1 = fabs(P1[(i)*dimY + (j)]);
-				val2 = fabs(P2[(i)*dimY + (j)]);
-				if (val1 < 1.0f) { val1 = 1.0f; }
-				if (val2 < 1.0f) { val2 = 1.0f; }
-				P1[(i)*dimY + (j)] = P1[(i)*dimY + (j)] / val1;
-				P2[(i)*dimY + (j)] = P2[(i)*dimY + (j)] / val2;
-			}
-		}
-	}
-	return 1;
-}
-float Rupd_func2D(float *P1, float *P1_old, float *P2, float *P2_old, float *R1, float *R2, float tkp1, float tk, int dimX, int dimY)
-{
-	int i, j;
-	float multip;
-	multip = ((tk - 1.0f) / tkp1);
-#pragma omp parallel for shared(P1,P2,P1_old,P2_old,R1,R2,multip) private(i,j)
-	for (i = 0; i<dimX; i++) {
-		for (j = 0; j<dimY; j++) {
-			R1[(i)*dimY + (j)] = P1[(i)*dimY + (j)] + multip*(P1[(i)*dimY + (j)] - P1_old[(i)*dimY + (j)]);
-			R2[(i)*dimY + (j)] = P2[(i)*dimY + (j)] + multip*(P2[(i)*dimY + (j)] - P2_old[(i)*dimY + (j)]);
-		}
-	}
-	return 1;
-}
-
-/* 3D-case related Functions */
-/*****************************************************************/
-float Obj_func_CALC3D(float *A, float *D, float *funcvalA, float lambda, int dimX, int dimY, int dimZ)
-{   
-    int i,j,k;
-    float f1, f2, val1, val2, val3;
-    
-    /*data-related term */
-    f1 = 0.0f;
-    for(i=0; i<dimX*dimY*dimZ; i++) f1 += pow(D[i] - A[i],2);    
-    
-    /*TV-related term */
-    f2 = 0.0f;
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
-            for(k=0; k<dimZ; k++) {
-            /* boundary conditions  */
-            if (i == dimX-1) {val1 = 0.0f;} else {val1 = A[(dimX*dimY)*k + (i+1)*dimY + (j)] - A[(dimX*dimY)*k + (i)*dimY + (j)];}
-            if (j == dimY-1) {val2 = 0.0f;} else {val2 = A[(dimX*dimY)*k + (i)*dimY + (j+1)] - A[(dimX*dimY)*k + (i)*dimY + (j)];}    
-            if (k == dimZ-1) {val3 = 0.0f;} else {val3 = A[(dimX*dimY)*(k+1) + (i)*dimY + (j)] - A[(dimX*dimY)*k + (i)*dimY + (j)];}    
-            f2 += sqrt(pow(val1,2) + pow(val2,2)  + pow(val3,2));
-        }}}     
-    /* sum of two terms */
-    funcvalA[0] = 0.5f*f1 + lambda*f2;     
-    return *funcvalA;
-}
-
-float Obj_func3D(float *A, float *D, float *R1, float *R2, float *R3, float lambda, int dimX, int dimY, int dimZ)
-{
-	float val1, val2, val3;
-	int i, j, k;
-#pragma omp parallel for shared(A,D,R1,R2,R3) private(i,j,k,val1,val2,val3)
-	for (i = 0; i<dimX; i++) {
-		for (j = 0; j<dimY; j++) {
-			for (k = 0; k<dimZ; k++) {
-				/* boundary conditions */
-				if (i == 0) { val1 = 0.0f; }
-				else { val1 = R1[(dimX*dimY)*k + (i - 1)*dimY + (j)]; }
-				if (j == 0) { val2 = 0.0f; }
-				else { val2 = R2[(dimX*dimY)*k + (i)*dimY + (j - 1)]; }
-				if (k == 0) { val3 = 0.0f; }
-				else { val3 = R3[(dimX*dimY)*(k - 1) + (i)*dimY + (j)]; }
-				D[(dimX*dimY)*k + (i)*dimY + (j)] = A[(dimX*dimY)*k + (i)*dimY + (j)] - lambda*(R1[(dimX*dimY)*k + (i)*dimY + (j)] + R2[(dimX*dimY)*k + (i)*dimY + (j)] + R3[(dimX*dimY)*k + (i)*dimY + (j)] - val1 - val2 - val3);
-			}
-		}
-	}
-	return *D;
-}
-float Grad_func3D(float *P1, float *P2, float *P3, float *D, float *R1, float *R2, float *R3, float lambda, int dimX, int dimY, int dimZ)
-{
-	float val1, val2, val3, multip;
-	int i, j, k;
-	multip = (1.0f / (8.0f*lambda));
-#pragma omp parallel for shared(P1,P2,P3,D,R1,R2,R3,multip) private(i,j,k,val1,val2,val3)
-	for (i = 0; i<dimX; i++) {
-		for (j = 0; j<dimY; j++) {
-			for (k = 0; k<dimZ; k++) {
-				/* boundary conditions */
-				if (i == dimX - 1) val1 = 0.0f; else val1 = D[(dimX*dimY)*k + (i)*dimY + (j)] - D[(dimX*dimY)*k + (i + 1)*dimY + (j)];
-				if (j == dimY - 1) val2 = 0.0f; else val2 = D[(dimX*dimY)*k + (i)*dimY + (j)] - D[(dimX*dimY)*k + (i)*dimY + (j + 1)];
-				if (k == dimZ - 1) val3 = 0.0f; else val3 = D[(dimX*dimY)*k + (i)*dimY + (j)] - D[(dimX*dimY)*(k + 1) + (i)*dimY + (j)];
-				P1[(dimX*dimY)*k + (i)*dimY + (j)] = R1[(dimX*dimY)*k + (i)*dimY + (j)] + multip*val1;
-				P2[(dimX*dimY)*k + (i)*dimY + (j)] = R2[(dimX*dimY)*k + (i)*dimY + (j)] + multip*val2;
-				P3[(dimX*dimY)*k + (i)*dimY + (j)] = R3[(dimX*dimY)*k + (i)*dimY + (j)] + multip*val3;
-			}
-		}
-	}
-	return 1;
-}
-float Proj_func3D(float *P1, float *P2, float *P3, int dimX, int dimY, int dimZ)
-{
-	float val1, val2, val3;
-	int i, j, k;
-#pragma omp parallel for shared(P1,P2,P3) private(i,j,k,val1,val2,val3)
-	for (i = 0; i<dimX; i++) {
-		for (j = 0; j<dimY; j++) {
-			for (k = 0; k<dimZ; k++) {
-				val1 = fabs(P1[(dimX*dimY)*k + (i)*dimY + (j)]);
-				val2 = fabs(P2[(dimX*dimY)*k + (i)*dimY + (j)]);
-				val3 = fabs(P3[(dimX*dimY)*k + (i)*dimY + (j)]);
-				if (val1 < 1.0f) { val1 = 1.0f; }
-				if (val2 < 1.0f) { val2 = 1.0f; }
-				if (val3 < 1.0f) { val3 = 1.0f; }
-
-				P1[(dimX*dimY)*k + (i)*dimY + (j)] = P1[(dimX*dimY)*k + (i)*dimY + (j)] / val1;
-				P2[(dimX*dimY)*k + (i)*dimY + (j)] = P2[(dimX*dimY)*k + (i)*dimY + (j)] / val2;
-				P3[(dimX*dimY)*k + (i)*dimY + (j)] = P3[(dimX*dimY)*k + (i)*dimY + (j)] / val3;
-			}
-		}
-	}
-	return 1;
-}
-float Rupd_func3D(float *P1, float *P1_old, float *P2, float *P2_old, float *P3, float *P3_old, float *R1, float *R2, float *R3, float tkp1, float tk, int dimX, int dimY, int dimZ)
-{
-	int i, j, k;
-	float multip;
-	multip = ((tk - 1.0f) / tkp1);
-#pragma omp parallel for shared(P1,P2,P3,P1_old,P2_old,P3_old,R1,R2,R3,multip) private(i,j,k)
-	for (i = 0; i<dimX; i++) {
-		for (j = 0; j<dimY; j++) {
-			for (k = 0; k<dimZ; k++) {
-				R1[(dimX*dimY)*k + (i)*dimY + (j)] = P1[(dimX*dimY)*k + (i)*dimY + (j)] + multip*(P1[(dimX*dimY)*k + (i)*dimY + (j)] - P1_old[(dimX*dimY)*k + (i)*dimY + (j)]);
-				R2[(dimX*dimY)*k + (i)*dimY + (j)] = P2[(dimX*dimY)*k + (i)*dimY + (j)] + multip*(P2[(dimX*dimY)*k + (i)*dimY + (j)] - P2_old[(dimX*dimY)*k + (i)*dimY + (j)]);
-				R3[(dimX*dimY)*k + (i)*dimY + (j)] = P3[(dimX*dimY)*k + (i)*dimY + (j)] + multip*(P3[(dimX*dimY)*k + (i)*dimY + (j)] - P3_old[(dimX*dimY)*k + (i)*dimY + (j)]);
-			}
-		}
-	}
-	return 1;
-}
-
-
diff --git a/main_func/regularizers_CPU/FGP_TV_core.h b/main_func/regularizers_CPU/FGP_TV_core.h
deleted file mode 100644
index 6430bf2..0000000
--- a/main_func/regularizers_CPU/FGP_TV_core.h
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
-This work is part of the Core Imaging Library developed by
-Visual Analytics and Imaging System Group of the Science Technology
-Facilities Council, STFC
-
-Copyright 2017 Daniil Kazantsev
-Copyright 2017 Srikanth Nagella, Edoardo Pasca
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
-//#include <matrix.h>
-#include <math.h>
-#include <stdlib.h>
-#include <memory.h>
-#include <stdio.h>
-#include "omp.h"
-#include "utils.h"
-
-/* C-OMP implementation of FGP-TV [1] denoising/regularization model (2D/3D case)
-*
-* Input Parameters:
-* 1. Noisy image/volume [REQUIRED]
-* 2. lambda - regularization parameter [REQUIRED]
-* 3. Number of iterations [OPTIONAL parameter]
-* 4. eplsilon: tolerance constant [OPTIONAL parameter]
-* 5. TV-type: 'iso' or 'l1' [OPTIONAL parameter]
-*
-* Output:
-* [1] Filtered/regularized image
-* [2] last function value
-*
-* Example of image denoising:
-* figure;
-* Im = double(imread('lena_gray_256.tif'))/255;  % loading image
-* u0 = Im + .05*randn(size(Im)); % adding noise
-* u = FGP_TV(single(u0), 0.05, 100, 1e-04);
-*
-* to compile with OMP support: mex FGP_TV.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
-* This function is based on the Matlab's code and paper by
-* [1] Amir Beck and Marc Teboulle, "Fast Gradient-Based Algorithms for Constrained Total Variation Image Denoising and Deblurring Problems"
-*
-* D. Kazantsev, 2016-17
-*
-*/
-#ifdef __cplusplus
-extern "C" {
-#endif
-//float copyIm(float *A, float *B, int dimX, int dimY, int dimZ);
-float Obj_func2D(float *A, float *D, float *R1, float *R2, float lambda, int dimX, int dimY);
-float Grad_func2D(float *P1, float *P2, float *D, float *R1, float *R2, float lambda, int dimX, int dimY);
-float Proj_func2D(float *P1, float *P2, int methTV, int dimX, int dimY);
-float Rupd_func2D(float *P1, float *P1_old, float *P2, float *P2_old, float *R1, float *R2, float tkp1, float tk, int dimX, int dimY);
-float Obj_func_CALC2D(float *A, float *D, float *funcvalA, float lambda, int dimX, int dimY);
-
-float Obj_func3D(float *A, float *D, float *R1, float *R2, float *R3, float lambda, int dimX, int dimY, int dimZ);
-float Grad_func3D(float *P1, float *P2, float *P3, float *D, float *R1, float *R2, float *R3, float lambda, int dimX, int dimY, int dimZ);
-float Proj_func3D(float *P1, float *P2, float *P3, int dimX, int dimY, int dimZ);
-float Rupd_func3D(float *P1, float *P1_old, float *P2, float *P2_old, float *P3, float *P3_old, float *R1, float *R2, float *R3, float tkp1, float tk, int dimX, int dimY, int dimZ);
-float Obj_func_CALC3D(float *A, float *D, float *funcvalA, float lambda, int dimX, int dimY, int dimZ);
-#ifdef __cplusplus
-}
-#endif
\ No newline at end of file
diff --git a/main_func/regularizers_CPU/LLT_model.c b/main_func/regularizers_CPU/LLT_model.c
deleted file mode 100644
index 0b07b47..0000000
--- a/main_func/regularizers_CPU/LLT_model.c
+++ /dev/null
@@ -1,169 +0,0 @@
-/*
-This work is part of the Core Imaging Library developed by
-Visual Analytics and Imaging System Group of the Science Technology
-Facilities Council, STFC
-
-Copyright 2017 Daniil Kazantsev
-Copyright 2017 Srikanth Nagella, Edoardo Pasca
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
-#include "mex.h"
-#include "matrix.h"
-#include "LLT_model_core.h"
-
-/* C-OMP implementation of Lysaker, Lundervold and Tai (LLT) model of higher order regularization penalty
-*
-* Input Parameters:
-* 1. U0 - original noise image/volume
-* 2. lambda - regularization parameter
-* 3. tau - time-step  for explicit scheme
-* 4. iter - iterations number
-* 5. epsil  - tolerance constant (to terminate earlier)
-* 6. switcher - default is 0, switch to (1) to restrictive smoothing in Z dimension (in test)
-*
-* Output:
-* Filtered/regularized image
-*
-* Example:
-* figure;
-* Im = double(imread('lena_gray_256.tif'))/255;  % loading image
-* u0 = Im + .03*randn(size(Im)); % adding noise
-* [Den] = LLT_model(single(u0), 10, 0.1, 1);
-*
-*
-* to compile with OMP support: mex LLT_model.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
-* References: Lysaker, Lundervold and Tai (LLT) 2003, IEEE
-*
-* 28.11.16/Harwell
-*/
-
-void mexFunction(
-        int nlhs, mxArray *plhs[],
-        int nrhs, const mxArray *prhs[])
-        
-{
-    int number_of_dims, iter, dimX, dimY, dimZ, ll, j, count, switcher;
-    const int  *dim_array;
-    float *U0, *U=NULL, *U_old=NULL, *D1=NULL, *D2=NULL, *D3=NULL, lambda, tau, re, re1, epsil, re_old;
-    unsigned short *Map=NULL;
-    
-    number_of_dims = mxGetNumberOfDimensions(prhs[0]);
-    dim_array = mxGetDimensions(prhs[0]);
-    
-    /*Handling Matlab input data*/
-    U0  = (float *) mxGetData(prhs[0]); /*origanal noise image/volume*/
-    if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input in single precision is required"); }
-    lambda =  (float) mxGetScalar(prhs[1]); /*regularization parameter*/
-    tau =  (float) mxGetScalar(prhs[2]); /* time-step */
-    iter =  (int) mxGetScalar(prhs[3]); /*iterations number*/
-    epsil =  (float) mxGetScalar(prhs[4]); /* tolerance constant */
-    switcher =  (int) mxGetScalar(prhs[5]); /*switch on (1) restrictive smoothing in Z dimension*/
-     
-    /*Handling Matlab output data*/
-    dimX = dim_array[0]; dimY = dim_array[1];  dimZ = 1;
-    
-    if (number_of_dims == 2) {
-        /*2D case*/       
-        U = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-        U_old = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-        D1 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-        D2 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-    }
-    else if (number_of_dims == 3) {
-        /*3D case*/
-        dimZ = dim_array[2];
-        U = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-        U_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-        D1 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-        D2 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-        D3 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-        if (switcher != 0) {
-        Map = (unsigned short*)mxGetPr(plhs[1] = mxCreateNumericArray(3, dim_array, mxUINT16_CLASS, mxREAL));       
-        }
-    }
-    else {mexErrMsgTxt("The input data should be 2D or 3D");}
-    
-    /*Copy U0 to U*/
-    copyIm(U0, U, dimX, dimY, dimZ);
-    
-    count = 1;
-    re_old = 0.0f; 
-    if (number_of_dims == 2) {
-        for(ll = 0; ll < iter; ll++) {
-            
-            copyIm(U, U_old, dimX, dimY, dimZ);
-            
-            /*estimate inner derrivatives */
-            der2D(U, D1, D2, dimX, dimY, dimZ);
-            /* calculate div^2 and update */
-            div_upd2D(U0, U, D1, D2, dimX, dimY, dimZ, lambda, tau);
-            
-            /* calculate norm to terminate earlier */
-            re = 0.0f; re1 = 0.0f;
-            for(j=0; j<dimX*dimY*dimZ; j++)
-            {
-                re += pow(U_old[j] - U[j],2);
-                re1 += pow(U_old[j],2);
-            }
-            re = sqrt(re)/sqrt(re1);           
-            if (re < epsil)  count++;
-            if (count > 4) break;            
-            
-            /* check that the residual norm is decreasing */
-            if (ll > 2) {
-                if (re > re_old) break; 
-            }
-            re_old = re;         
-        
-        } /*end of iterations*/
-        printf("HO iterations stopped at iteration: %i\n", ll);          
-    }
-    /*3D version*/
-    if (number_of_dims == 3) {
-        
-        if (switcher == 1) {
-            /* apply restrictive smoothing */            
-            calcMap(U, Map, dimX, dimY, dimZ);
-            /*clear outliers */
-            cleanMap(Map, dimX, dimY, dimZ);           
-        }
-        for(ll = 0; ll < iter; ll++) {
-            
-            copyIm(U, U_old, dimX, dimY, dimZ);
-            
-            /*estimate inner derrivatives */
-            der3D(U, D1, D2, D3, dimX, dimY, dimZ);          
-            /* calculate div^2 and update */
-            div_upd3D(U0, U, D1, D2, D3, Map, switcher, dimX, dimY, dimZ, lambda, tau);                 
-            
-            /* calculate norm to terminate earlier */
-            re = 0.0f; re1 = 0.0f;
-            for(j=0; j<dimX*dimY*dimZ; j++)
-            {
-                re += pow(U_old[j] - U[j],2);
-                re1 += pow(U_old[j],2);
-            }
-            re = sqrt(re)/sqrt(re1);           
-            if (re < epsil)  count++;
-            if (count > 4) break;            
-            
-            /* check that the residual norm is decreasing */
-            if (ll > 2) {
-                if (re > re_old) break; 
-            }
-            re_old = re;     
-            
-        } /*end of iterations*/
-        printf("HO iterations stopped at iteration: %i\n", ll);       
-    }
-}
diff --git a/main_func/regularizers_CPU/LLT_model_core.c b/main_func/regularizers_CPU/LLT_model_core.c
deleted file mode 100644
index 3a853d2..0000000
--- a/main_func/regularizers_CPU/LLT_model_core.c
+++ /dev/null
@@ -1,318 +0,0 @@
-/*
-This work is part of the Core Imaging Library developed by
-Visual Analytics and Imaging System Group of the Science Technology
-Facilities Council, STFC
-
-Copyright 2017 Daniil Kazantsev
-Copyright 2017 Srikanth Nagella, Edoardo Pasca
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
-#include "LLT_model_core.h"
-
-/* C-OMP implementation of Lysaker, Lundervold and Tai (LLT) model of higher order regularization penalty
-*
-* Input Parameters:
-* 1. U0 - origanal noise image/volume
-* 2. lambda - regularization parameter
-* 3. tau - time-step  for explicit scheme
-* 4. iter - iterations number
-* 5. epsil  - tolerance constant (to terminate earlier)
-* 6. switcher - default is 0, switch to (1) to restrictive smoothing in Z dimension (in test)
-*
-* Output:
-* Filtered/regularized image
-*
-* Example:
-* figure;
-* Im = double(imread('lena_gray_256.tif'))/255;  % loading image
-* u0 = Im + .03*randn(size(Im)); % adding noise
-* [Den] = LLT_model(single(u0), 10, 0.1, 1);
-*
-* References: Lysaker, Lundervold and Tai (LLT) 2003, IEEE
-*
-* 28.11.16/Harwell
-*/
-
-
-float der2D(float *U, float *D1, float *D2, int dimX, int dimY, int dimZ)
-{
-	int i, j, i_p, i_m, j_m, j_p;
-	float dxx, dyy, denom_xx, denom_yy;
-#pragma omp parallel for shared(U,D1,D2) private(i, j, i_p, i_m, j_m, j_p, denom_xx, denom_yy, dxx, dyy)
-	for (i = 0; i<dimX; i++) {
-		for (j = 0; j<dimY; j++) {
-			/* symmetric boundary conditions (Neuman) */
-			i_p = i + 1; if (i_p == dimX) i_p = i - 1;
-			i_m = i - 1; if (i_m < 0) i_m = i + 1;
-			j_p = j + 1; if (j_p == dimY) j_p = j - 1;
-			j_m = j - 1; if (j_m < 0) j_m = j + 1;
-
-			dxx = U[i_p*dimY + j] - 2.0f*U[i*dimY + j] + U[i_m*dimY + j];
-			dyy = U[i*dimY + j_p] - 2.0f*U[i*dimY + j] + U[i*dimY + j_m];
-
-			denom_xx = fabs(dxx) + EPS;
-			denom_yy = fabs(dyy) + EPS;
-
-			D1[i*dimY + j] = dxx / denom_xx;
-			D2[i*dimY + j] = dyy / denom_yy;
-		}
-	}
-	return 1;
-}
-float div_upd2D(float *U0, float *U, float *D1, float *D2, int dimX, int dimY, int dimZ, float lambda, float tau)
-{
-	int i, j, i_p, i_m, j_m, j_p;
-	float div, dxx, dyy;
-#pragma omp parallel for shared(U,U0,D1,D2) private(i, j, i_p, i_m, j_m, j_p, div, dxx, dyy)
-	for (i = 0; i<dimX; i++) {
-		for (j = 0; j<dimY; j++) {
-			/* symmetric boundary conditions (Neuman) */
-			i_p = i + 1; if (i_p == dimX) i_p = i - 1;
-			i_m = i - 1; if (i_m < 0) i_m = i + 1;
-			j_p = j + 1; if (j_p == dimY) j_p = j - 1;
-			j_m = j - 1; if (j_m < 0) j_m = j + 1;
-
-			dxx = D1[i_p*dimY + j] - 2.0f*D1[i*dimY + j] + D1[i_m*dimY + j];
-			dyy = D2[i*dimY + j_p] - 2.0f*D2[i*dimY + j] + D2[i*dimY + j_m];
-
-			div = dxx + dyy;
-
-			U[i*dimY + j] = U[i*dimY + j] - tau*div - tau*lambda*(U[i*dimY + j] - U0[i*dimY + j]);
-		}
-	}
-	return *U0;
-}
-
-float der3D(float *U, float *D1, float *D2, float *D3, int dimX, int dimY, int dimZ)
-{
-	int i, j, k, i_p, i_m, j_m, j_p, k_p, k_m;
-	float dxx, dyy, dzz, denom_xx, denom_yy, denom_zz;
-#pragma omp parallel for shared(U,D1,D2,D3) private(i, j, k, i_p, i_m, j_m, j_p, k_p, k_m, denom_xx, denom_yy, denom_zz, dxx, dyy, dzz)
-	for (i = 0; i<dimX; i++) {
-		/* symmetric boundary conditions (Neuman) */
-		i_p = i + 1; if (i_p == dimX) i_p = i - 1;
-		i_m = i - 1; if (i_m < 0) i_m = i + 1;
-		for (j = 0; j<dimY; j++) {
-			j_p = j + 1; if (j_p == dimY) j_p = j - 1;
-			j_m = j - 1; if (j_m < 0) j_m = j + 1;
-			for (k = 0; k<dimZ; k++) {
-				k_p = k + 1; if (k_p == dimZ) k_p = k - 1;
-				k_m = k - 1; if (k_m < 0) k_m = k + 1;
-
-				dxx = U[dimX*dimY*k + i_p*dimY + j] - 2.0f*U[dimX*dimY*k + i*dimY + j] + U[dimX*dimY*k + i_m*dimY + j];
-				dyy = U[dimX*dimY*k + i*dimY + j_p] - 2.0f*U[dimX*dimY*k + i*dimY + j] + U[dimX*dimY*k + i*dimY + j_m];
-				dzz = U[dimX*dimY*k_p + i*dimY + j] - 2.0f*U[dimX*dimY*k + i*dimY + j] + U[dimX*dimY*k_m + i*dimY + j];
-
-				denom_xx = fabs(dxx) + EPS;
-				denom_yy = fabs(dyy) + EPS;
-				denom_zz = fabs(dzz) + EPS;
-
-				D1[dimX*dimY*k + i*dimY + j] = dxx / denom_xx;
-				D2[dimX*dimY*k + i*dimY + j] = dyy / denom_yy;
-				D3[dimX*dimY*k + i*dimY + j] = dzz / denom_zz;
-
-			}
-		}
-	}
-	return 1;
-}
-
-float div_upd3D(float *U0, float *U, float *D1, float *D2, float *D3, unsigned short *Map, int switcher, int dimX, int dimY, int dimZ, float lambda, float tau)
-{
-	int i, j, k, i_p, i_m, j_m, j_p, k_p, k_m;
-	float div, dxx, dyy, dzz;
-#pragma omp parallel for shared(U,U0,D1,D2,D3) private(i, j, k, i_p, i_m, j_m, j_p, k_p, k_m, div, dxx, dyy, dzz)
-	for (i = 0; i<dimX; i++) {
-		/* symmetric boundary conditions (Neuman) */
-		i_p = i + 1; if (i_p == dimX) i_p = i - 1;
-		i_m = i - 1; if (i_m < 0) i_m = i + 1;
-		for (j = 0; j<dimY; j++) {
-			j_p = j + 1; if (j_p == dimY) j_p = j - 1;
-			j_m = j - 1; if (j_m < 0) j_m = j + 1;
-			for (k = 0; k<dimZ; k++) {
-				k_p = k + 1; if (k_p == dimZ) k_p = k - 1;
-				k_m = k - 1; if (k_m < 0) k_m = k + 1;
-				//                 k_p1 = k + 2; if (k_p1 >= dimZ) k_p1 = k - 2;
-				//                 k_m1 = k - 2; if (k_m1 < 0) k_m1 = k + 2;   
-
-				dxx = D1[dimX*dimY*k + i_p*dimY + j] - 2.0f*D1[dimX*dimY*k + i*dimY + j] + D1[dimX*dimY*k + i_m*dimY + j];
-				dyy = D2[dimX*dimY*k + i*dimY + j_p] - 2.0f*D2[dimX*dimY*k + i*dimY + j] + D2[dimX*dimY*k + i*dimY + j_m];
-				dzz = D3[dimX*dimY*k_p + i*dimY + j] - 2.0f*D3[dimX*dimY*k + i*dimY + j] + D3[dimX*dimY*k_m + i*dimY + j];
-
-				if ((switcher == 1) && (Map[dimX*dimY*k + i*dimY + j] == 0)) dzz = 0;
-				div = dxx + dyy + dzz;
-
-				//                 if (switcher == 1) {                    
-				// if (Map2[dimX*dimY*k + i*dimY + j] == 0) dzz2 = 0;
-				//else dzz2 = D4[dimX*dimY*k_p1 + i*dimY + j] - 2.0f*D4[dimX*dimY*k + i*dimY + j] + D4[dimX*dimY*k_m1 + i*dimY + j];
-				//                     div = dzz + dzz2;
-				//                 }
-
-				//                 dzz = D3[dimX*dimY*k_p + i*dimY + j] - 2.0f*D3[dimX*dimY*k + i*dimY + j] + D3[dimX*dimY*k_m + i*dimY + j];
-				//                 dzz2 = D4[dimX*dimY*k_p1 + i*dimY + j] - 2.0f*D4[dimX*dimY*k + i*dimY + j] + D4[dimX*dimY*k_m1 + i*dimY + j];  
-				//                 div = dzz + dzz2;
-
-				U[dimX*dimY*k + i*dimY + j] = U[dimX*dimY*k + i*dimY + j] - tau*div - tau*lambda*(U[dimX*dimY*k + i*dimY + j] - U0[dimX*dimY*k + i*dimY + j]);
-			}
-		}
-	}
-	return *U0;
-}
-
-// float der3D_2(float *U, float *D1, float *D2, float *D3, float *D4, int dimX, int dimY, int dimZ)
-// {
-//     int i, j, k, i_p, i_m, j_m, j_p, k_p, k_m, k_p1, k_m1;
-//     float dxx, dyy, dzz, dzz2, denom_xx, denom_yy, denom_zz, denom_zz2;
-// #pragma omp parallel for shared(U,D1,D2,D3,D4) private(i, j, k, i_p, i_m, j_m, j_p, k_p, k_m, denom_xx, denom_yy, denom_zz, denom_zz2, dxx, dyy, dzz, dzz2, k_p1, k_m1)
-//     for(i=0; i<dimX; i++) {
-//         /* symmetric boundary conditions (Neuman) */
-//         i_p = i + 1; if (i_p == dimX) i_p = i - 1;
-//         i_m = i - 1; if (i_m < 0) i_m = i + 1;
-//         for(j=0; j<dimY; j++) {
-//             j_p = j + 1; if (j_p == dimY) j_p = j - 1;
-//             j_m = j - 1; if (j_m < 0) j_m = j + 1;
-//             for(k=0; k<dimZ; k++) {
-//                 k_p = k + 1; if (k_p == dimZ) k_p = k - 1;
-//                 k_m = k - 1; if (k_m < 0) k_m = k + 1;
-//                 k_p1 = k + 2; if (k_p1 >= dimZ) k_p1 = k - 2;
-//                 k_m1 = k - 2; if (k_m1 < 0) k_m1 = k + 2;                
-//                 
-//                 dxx = U[dimX*dimY*k + i_p*dimY + j] - 2.0f*U[dimX*dimY*k + i*dimY + j] + U[dimX*dimY*k + i_m*dimY + j];
-//                 dyy = U[dimX*dimY*k + i*dimY + j_p] - 2.0f*U[dimX*dimY*k + i*dimY + j] + U[dimX*dimY*k + i*dimY + j_m];
-//                 dzz = U[dimX*dimY*k_p + i*dimY + j] - 2.0f*U[dimX*dimY*k + i*dimY + j] + U[dimX*dimY*k_m + i*dimY + j];                
-//                 dzz2 = U[dimX*dimY*k_p1 + i*dimY + j] - 2.0f*U[dimX*dimY*k + i*dimY + j] + U[dimX*dimY*k_m1 + i*dimY + j];                
-//                 
-//                 denom_xx = fabs(dxx) + EPS;
-//                 denom_yy = fabs(dyy) + EPS;
-//                 denom_zz = fabs(dzz) + EPS;
-//                 denom_zz2 = fabs(dzz2) + EPS;
-//                 
-//                 D1[dimX*dimY*k + i*dimY + j] = dxx/denom_xx;
-//                 D2[dimX*dimY*k + i*dimY + j] = dyy/denom_yy;
-//                 D3[dimX*dimY*k + i*dimY + j] = dzz/denom_zz;               
-//                 D4[dimX*dimY*k + i*dimY + j] = dzz2/denom_zz2;                               
-//             }}}
-//     return 1;
-// }
-
-float calcMap(float *U, unsigned short *Map, int dimX, int dimY, int dimZ)
-{
-	int i, j, k, i1, j1, i2, j2, windowSize;
-	float val1, val2, thresh_val, maxval;
-	windowSize = 1;
-	thresh_val = 0.0001; /*thresh_val = 0.0035;*/
-
-						 /* normalize volume first */
-	maxval = 0.0f;
-	for (i = 0; i<dimX; i++) {
-		for (j = 0; j<dimY; j++) {
-			for (k = 0; k<dimZ; k++) {
-				if (U[dimX*dimY*k + i*dimY + j] > maxval) maxval = U[dimX*dimY*k + i*dimY + j];
-			}
-		}
-	}
-
-	if (maxval != 0.0f) {
-		for (i = 0; i<dimX; i++) {
-			for (j = 0; j<dimY; j++) {
-				for (k = 0; k<dimZ; k++) {
-					U[dimX*dimY*k + i*dimY + j] = U[dimX*dimY*k + i*dimY + j] / maxval;
-				}
-			}
-		}
-	}
-	else {
-		printf("%s \n", "Maximum value is zero!");
-		return 0;
-	}
-
-#pragma omp parallel for shared(U,Map) private(i, j, k, i1, j1, i2, j2, val1, val2)
-	for (i = 0; i<dimX; i++) {
-		for (j = 0; j<dimY; j++) {
-			for (k = 0; k<dimZ; k++) {
-
-				Map[dimX*dimY*k + i*dimY + j] = 0;
-				//                 Map2[dimX*dimY*k + i*dimY + j] = 0; 
-
-				val1 = 0.0f; val2 = 0.0f;
-				for (i1 = -windowSize; i1 <= windowSize; i1++) {
-					for (j1 = -windowSize; j1 <= windowSize; j1++) {
-						i2 = i + i1;
-						j2 = j + j1;
-
-						if ((i2 >= 0) && (i2 < dimX) && (j2 >= 0) && (j2 < dimY)) {
-							if (k == 0) {
-								val1 += pow(U[dimX*dimY*k + i2*dimY + j2] - U[dimX*dimY*(k + 1) + i2*dimY + j2], 2);
-								//                           val3 += pow(U[dimX*dimY*k + i2*dimY + j2] - U[dimX*dimY*(k+2) + i2*dimY + j2],2);                                                  
-							}
-							else if (k == dimZ - 1) {
-								val1 += pow(U[dimX*dimY*k + i2*dimY + j2] - U[dimX*dimY*(k - 1) + i2*dimY + j2], 2);
-								//                           val3 += pow(U[dimX*dimY*k + i2*dimY + j2] - U[dimX*dimY*(k-2) + i2*dimY + j2],2);                           
-							}
-							//                       else if (k == 1) {
-							//                           val1 += pow(U[dimX*dimY*k + i2*dimY + j2] - U[dimX*dimY*(k-1) + i2*dimY + j2],2); 
-							//                           val2 += pow(U[dimX*dimY*k + i2*dimY + j2] - U[dimX*dimY*(k+1) + i2*dimY + j2],2);  
-							//                           val3 += pow(U[dimX*dimY*k + i2*dimY + j2] - U[dimX*dimY*(k+2) + i2*dimY + j2],2);                           
-							//                       }
-							//                       else if (k == dimZ-2) {
-							//                           val1 += pow(U[dimX*dimY*k + i2*dimY + j2] - U[dimX*dimY*(k-1) + i2*dimY + j2],2); 
-							//                           val2 += pow(U[dimX*dimY*k + i2*dimY + j2] - U[dimX*dimY*(k+1) + i2*dimY + j2],2);      
-							//                           val3 += pow(U[dimX*dimY*k + i2*dimY + j2] - U[dimX*dimY*(k-2) + i2*dimY + j2],2);                           
-							//                       }                      
-							else {
-								val1 += pow(U[dimX*dimY*k + i2*dimY + j2] - U[dimX*dimY*(k - 1) + i2*dimY + j2], 2);
-								val2 += pow(U[dimX*dimY*k + i2*dimY + j2] - U[dimX*dimY*(k + 1) + i2*dimY + j2], 2);
-								//                           val3 += pow(U[dimX*dimY*k + i2*dimY + j2] - U[dimX*dimY*(k-2) + i2*dimY + j2],2); 
-								//                           val4 += pow(U[dimX*dimY*k + i2*dimY + j2] - U[dimX*dimY*(k+2) + i2*dimY + j2],2);  
-							}
-						}
-					}
-				}
-
-				val1 = 0.111f*val1; val2 = 0.111f*val2;
-				//                  val3 = 0.111f*val3; val4 = 0.111f*val4;
-				if ((val1 <= thresh_val) && (val2 <= thresh_val)) Map[dimX*dimY*k + i*dimY + j] = 1;
-				//                  if ((val3 <= thresh_val) && (val4 <= thresh_val)) Map2[dimX*dimY*k + i*dimY + j] = 1;                        
-			}
-		}
-	}
-	return 1;
-}
-
-float cleanMap(unsigned short *Map, int dimX, int dimY, int dimZ)
-{
-	int i, j, k, i1, j1, i2, j2, counter;
-#pragma omp parallel for shared(Map) private(i, j, k, i1, j1, i2, j2, counter)
-	for (i = 0; i<dimX; i++) {
-		for (j = 0; j<dimY; j++) {
-			for (k = 0; k<dimZ; k++) {
-
-				counter = 0;
-				for (i1 = -3; i1 <= 3; i1++) {
-					for (j1 = -3; j1 <= 3; j1++) {
-						i2 = i + i1;
-						j2 = j + j1;
-						if ((i2 >= 0) && (i2 < dimX) && (j2 >= 0) && (j2 < dimY)) {
-							if (Map[dimX*dimY*k + i2*dimY + j2] == 0) counter++;
-						}
-					}
-				}
-				if (counter < 24) Map[dimX*dimY*k + i*dimY + j] = 1;
-			}
-		}
-	}
-	return *Map;
-}
-
-
-/*********************3D *********************/
\ No newline at end of file
diff --git a/main_func/regularizers_CPU/LLT_model_core.h b/main_func/regularizers_CPU/LLT_model_core.h
deleted file mode 100644
index 13fce5a..0000000
--- a/main_func/regularizers_CPU/LLT_model_core.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
-This work is part of the Core Imaging Library developed by
-Visual Analytics and Imaging System Group of the Science Technology
-Facilities Council, STFC
-
-Copyright 2017 Daniil Kazantsev
-Copyright 2017 Srikanth Nagella, Edoardo Pasca
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
-//#include <matrix.h>
-#include <math.h>
-#include <stdlib.h>
-#include <memory.h>
-#include <stdio.h>
-#include "omp.h"
-#include "utils.h"
-
-#define EPS 0.01
-
-/* 2D functions */
-#ifdef __cplusplus
-extern "C" {
-#endif
-float der2D(float *U, float *D1, float *D2, int dimX, int dimY, int dimZ);
-float div_upd2D(float *U0, float *U, float *D1, float *D2, int dimX, int dimY, int dimZ, float lambda, float tau);
-
-float der3D(float *U, float *D1, float *D2, float *D3, int dimX, int dimY, int dimZ);
-float div_upd3D(float *U0, float *U, float *D1, float *D2, float *D3, unsigned short *Map, int switcher, int dimX, int dimY, int dimZ, float lambda, float tau);
-
-float calcMap(float *U, unsigned short *Map, int dimX, int dimY, int dimZ);
-float cleanMap(unsigned short *Map, int dimX, int dimY, int dimZ);
-
-//float copyIm(float *A, float *U, int dimX, int dimY, int dimZ);
-#ifdef __cplusplus
-}
-#endif
\ No newline at end of file
diff --git a/main_func/regularizers_CPU/PatchBased_Regul.c b/main_func/regularizers_CPU/PatchBased_Regul.c
deleted file mode 100644
index 9c925df..0000000
--- a/main_func/regularizers_CPU/PatchBased_Regul.c
+++ /dev/null
@@ -1,140 +0,0 @@
-/*
-This work is part of the Core Imaging Library developed by
-Visual Analytics and Imaging System Group of the Science Technology
-Facilities Council, STFC
-
-Copyright 2017 Daniil Kazantsev
-Copyright 2017 Srikanth Nagella, Edoardo Pasca
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
-#include "mex.h"
-#include "matrix.h"
-#include "PatchBased_Regul_core.h"
-
-
-/* C-OMP implementation of  patch-based (PB) regularization (2D and 3D cases). 
- * This method finds self-similar patches in data and performs one fixed point iteration to mimimize the PB penalty function
- * 
- * References: 1. Yang Z. & Jacob M. "Nonlocal Regularization of Inverse Problems"
- *             2. Kazantsev D. et al. "4D-CT reconstruction with unified spatial-temporal patch-based regularization"
- *
- * Input Parameters:
- * 1. Image (2D or 3D) [required]
- * 2. ratio of the searching window (e.g. 3 = (2*3+1) = 7 pixels window) [optional]
- * 3. ratio of the similarity window (e.g. 1 = (2*1+1) = 3 pixels window) [optional]
- * 4. h - parameter for the PB penalty function [optional]
- * 5. lambda - regularization parameter  [optional]
-
- * Output:
- * 1. regularized (denoised) Image (N x N)/volume (N x N x N)
- *
- * 2D denoising example in Matlab:   
-   Im = double(imread('lena_gray_256.tif'))/255;  % loading image
-   u0 = Im + .03*randn(size(Im)); u0(u0<0) = 0; % adding noise
-   ImDen = PatchBased_Regul(single(u0), 3, 1, 0.08, 0.05); 
- *
- * Matlab + C/mex compilers needed
- * to compile with OMP support: mex PatchBased_Regul.c CFLAGS="\$CFLAGS -fopenmp -Wall" LDFLAGS="\$LDFLAGS -fopenmp"
- *
- * D. Kazantsev *
- * 02/07/2014
- * Harwell, UK
- */
-
-
-void mexFunction(
-        int nlhs, mxArray *plhs[],
-        int nrhs, const mxArray *prhs[]) 
-{    
-    int N, M, Z, numdims, SearchW, SimilW, SearchW_real, padXY, newsizeX, newsizeY, newsizeZ, switchpad_crop;
-    const int  *dims;
-    float *A, *B=NULL, *Ap=NULL, *Bp=NULL, h, lambda;
-    
-    numdims = mxGetNumberOfDimensions(prhs[0]);
-    dims = mxGetDimensions(prhs[0]);
-    
-    N = dims[0];
-    M = dims[1];
-    Z = dims[2];
-    
-    if ((numdims < 2) || (numdims > 3)) {mexErrMsgTxt("The input is 2D image or 3D volume");}
-    if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input in single precision is required"); }
-    
-    if(nrhs != 5) mexErrMsgTxt("Five inputs reqired: Image(2D,3D), SearchW, SimilW, Threshold, Regularization parameter");
-    
-    /*Handling inputs*/
-    A  = (float *) mxGetData(prhs[0]);    /* the image/volume to regularize/filter */
-    SearchW_real = 3; /*default value*/
-    SimilW = 1; /*default value*/
-    h = 0.1; 
-    lambda = 0.1;
-    
-    if ((nrhs == 2) || (nrhs == 3) || (nrhs == 4) || (nrhs == 5))   SearchW_real  = (int) mxGetScalar(prhs[1]); /* the searching window ratio */
-    if ((nrhs == 3) || (nrhs == 4) || (nrhs == 5))   SimilW =  (int) mxGetScalar(prhs[2]);  /* the similarity window ratio */
-    if ((nrhs == 4) || (nrhs == 5))  h =  (float) mxGetScalar(prhs[3]);  /* parameter for the PB filtering function */
-    if ((nrhs == 5))  lambda = (float) mxGetScalar(prhs[4]); /* regularization parameter */   
-
-
-    if (h <= 0) mexErrMsgTxt("Parmeter for the PB penalty function should be > 0");
-    if (lambda <= 0) mexErrMsgTxt(" Regularization parmeter should be > 0");
-       
-    SearchW = SearchW_real + 2*SimilW;
-    
-    /* SearchW_full = 2*SearchW + 1; */ /* the full searching window  size */
-    /* SimilW_full = 2*SimilW + 1;  */  /* the full similarity window  size */
-    
-    padXY = SearchW + 2*SimilW; /* padding sizes */
-    newsizeX = N + 2*(padXY); /* the X size of the padded array */
-    newsizeY = M + 2*(padXY); /* the Y size of the padded array */
-    newsizeZ = Z + 2*(padXY); /* the Z size of the padded array */
-    int N_dims[] = {newsizeX, newsizeY, newsizeZ};
-    
-    /******************************2D case ****************************/
-    if (numdims == 2) {
-        /*Handling output*/
-        B = (float*)mxGetData(plhs[0] = mxCreateNumericMatrix(N, M, mxSINGLE_CLASS, mxREAL));
-        /*allocating memory for the padded arrays */
-        Ap = (float*)mxGetData(mxCreateNumericMatrix(newsizeX, newsizeY, mxSINGLE_CLASS, mxREAL));
-        Bp = (float*)mxGetData(mxCreateNumericMatrix(newsizeX, newsizeY, mxSINGLE_CLASS, mxREAL));
-        /**************************************************************************/
-        /*Perform padding of image A to the size of [newsizeX * newsizeY] */
-        switchpad_crop = 0; /*padding*/
-        pad_crop(A, Ap, M, N, 0, newsizeY, newsizeX, 0, padXY, switchpad_crop);
-        
-        /* Do PB regularization with the padded array  */
-        PB_FUNC2D(Ap, Bp, newsizeY, newsizeX, padXY, SearchW, SimilW, (float)h, (float)lambda);
-        
-        switchpad_crop = 1; /*cropping*/
-        pad_crop(Bp, B, M, N, 0, newsizeY, newsizeX, 0, padXY, switchpad_crop);
-    }
-    else
-    {
-        /******************************3D case ****************************/
-        /*Handling output*/
-        B = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dims, mxSINGLE_CLASS, mxREAL));
-        /*allocating memory for the padded arrays */
-        Ap = (float*)mxGetPr(mxCreateNumericArray(3, N_dims, mxSINGLE_CLASS, mxREAL));
-        Bp = (float*)mxGetPr(mxCreateNumericArray(3, N_dims, mxSINGLE_CLASS, mxREAL));
-        /**************************************************************************/
-        
-        /*Perform padding of image A to the size of [newsizeX * newsizeY * newsizeZ] */
-        switchpad_crop = 0; /*padding*/
-        pad_crop(A, Ap, M, N, Z, newsizeY, newsizeX, newsizeZ, padXY, switchpad_crop);
-        
-        /* Do PB regularization with the padded array  */
-        PB_FUNC3D(Ap, Bp, newsizeY, newsizeX, newsizeZ, padXY, SearchW, SimilW, (float)h, (float)lambda);
-        
-        switchpad_crop = 1; /*cropping*/
-        pad_crop(Bp, B, M, N, Z, newsizeY, newsizeX, newsizeZ, padXY, switchpad_crop);
-    } /*end else ndims*/ 
-}    
diff --git a/main_func/regularizers_CPU/PatchBased_Regul_core.c b/main_func/regularizers_CPU/PatchBased_Regul_core.c
deleted file mode 100644
index acfb464..0000000
--- a/main_func/regularizers_CPU/PatchBased_Regul_core.c
+++ /dev/null
@@ -1,213 +0,0 @@
-/*
-This work is part of the Core Imaging Library developed by
-Visual Analytics and Imaging System Group of the Science Technology
-Facilities Council, STFC
-
-Copyright 2017 Daniil Kazanteev
-Copyright 2017 Srikanth Nagella, Edoardo Pasca
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
-#include "PatchBased_Regul_core.h"
-
-/* C-OMP implementation of  patch-based (PB) regularization (2D and 3D cases). 
- * This method finds self-similar patches in data and performs one fixed point iteration to mimimize the PB penalty function
- * 
- * References: 1. Yang Z. & Jacob M. "Nonlocal Regularization of Inverse Problems"
- *             2. Kazantsev D. et al. "4D-CT reconstruction with unified spatial-temporal patch-based regularization"
- *
- * Input Parameters:
- * 1. Image (2D or 3D) [required]
- * 2. ratio of the searching window (e.g. 3 = (2*3+1) = 7 pixels window) [optional]
- * 3. ratio of the similarity window (e.g. 1 = (2*1+1) = 3 pixels window) [optional]
- * 4. h - parameter for the PB penalty function [optional]
- * 5. lambda - regularization parameter  [optional]
-
- * Output:
- * 1. regularized (denoised) Image (N x N)/volume (N x N x N)
- *
- * 2D denoising example in Matlab:   
-   Im = double(imread('lena_gray_256.tif'))/255;  % loading image
-   u0 = Im + .03*randn(size(Im)); u0(u0<0) = 0; % adding noise
-   ImDen = PatchBased_Regul(single(u0), 3, 1, 0.08, 0.05); 
- 
- * D. Kazantsev *
- * 02/07/2014
- * Harwell, UK
- */
-
-/*2D version function */
-float PB_FUNC2D(float *A, float *B, int dimX, int dimY, int padXY, int SearchW, int SimilW, float h, float lambda)
-{
-    int i, j, i_n, j_n, i_m, j_m, i_p, j_p, i_l, j_l, i1, j1, i2, j2, i3, j3, i5,j5, count, SimilW_full;
-    float *Eucl_Vec, h2, denh2, normsum, Weight, Weight_norm, value, denom, WeightGlob, t1;
-                 
-    /*SearchW_full = 2*SearchW + 1; */ /* the full searching window  size */
-    SimilW_full = 2*SimilW + 1;   /* the full similarity window  size */
-    h2 = h*h;
-    denh2 = 1/(2*h2);   
-    
-     /*Gaussian kernel */
-    Eucl_Vec = (float*) calloc (SimilW_full*SimilW_full,sizeof(float));
-    count = 0;
-    for(i_n=-SimilW; i_n<=SimilW; i_n++) {
-        for(j_n=-SimilW; j_n<=SimilW; j_n++) {
-            t1 = pow(((float)i_n), 2) + pow(((float)j_n), 2);
-            Eucl_Vec[count] = exp(-(t1)/(2*SimilW*SimilW));
-            count = count + 1;                       
-        }} /*main neighb loop */   
-    
-    /*The NLM code starts here*/         
-    /* setting OMP here */
-    #pragma omp parallel for shared (A, B, dimX, dimY, Eucl_Vec, lambda, denh2) private(denom, i, j, WeightGlob, count,  i1, j1, i2, j2, i3, j3, i5, j5, Weight_norm, normsum, i_m, j_m, i_n, j_n, i_l, j_l, i_p, j_p, Weight,  value)
-    
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
-             if (((i >= padXY) && (i < dimX-padXY)) &&  ((j >= padXY) && (j < dimY-padXY))) {
-          
-                /* Massive Search window loop */
-                Weight_norm = 0; value = 0.0;
-                for(i_m=-SearchW; i_m<=SearchW; i_m++) {
-                    for(j_m=-SearchW; j_m<=SearchW; j_m++) {
-                       /*checking boundaries*/
-                        i1 = i+i_m; j1 = j+j_m;
-                        
-                        WeightGlob = 0.0;
-                        /* if inside the searching window */                        
-                         for(i_l=-SimilW; i_l<=SimilW; i_l++) {
-                             for(j_l=-SimilW; j_l<=SimilW; j_l++) {
-                                 i2 = i1+i_l; j2 = j1+j_l;
-                                 
-                                 i3 = i+i_l; j3 = j+j_l;   /*coordinates of the inner patch loop */
-                                
-                                 count = 0; normsum = 0.0;
-                                 for(i_p=-SimilW; i_p<=SimilW; i_p++) {
-                                     for(j_p=-SimilW; j_p<=SimilW; j_p++) {
-                                         i5 = i2 + i_p; j5 = j2 + j_p;
-                                         normsum = normsum + Eucl_Vec[count]*pow(A[(i3+i_p)*dimY+(j3+j_p)]-A[i5*dimY+j5], 2);        
-                                         count = count + 1;
-                                     }}
-                                  if (normsum != 0) Weight = (exp(-normsum*denh2)); 
-                                  else Weight = 0.0;
-                                 WeightGlob += Weight;
-                             }}                      
-      
-                         value += A[i1*dimY+j1]*WeightGlob;
-                         Weight_norm += WeightGlob;           
-                    }}      /*search window loop end*/
-                
-                /* the final loop to average all values in searching window with weights */
-                denom = 1 + lambda*Weight_norm;
-                B[i*dimY+j] = (A[i*dimY+j] + lambda*value)/denom;         
-             }
-        }}   /*main loop*/     
-    return (*B);
-    free(Eucl_Vec);    
-}
- 
-/*3D version*/ 
- float PB_FUNC3D(float *A, float *B, int dimX, int dimY, int dimZ, int padXY, int SearchW, int SimilW, float h, float lambda)       
- {
-    int SimilW_full, count, i, j, k,  i_n, j_n, k_n, i_m, j_m, k_m, i_p, j_p, k_p, i_l, j_l, k_l, i1, j1, k1, i2, j2, k2, i3, j3, k3, i5, j5, k5;
-    float *Eucl_Vec, h2, denh2, normsum, Weight, Weight_norm, value, denom, WeightGlob;
-        
-    /*SearchW_full = 2*SearchW + 1; */ /* the full searching window  size */
-    SimilW_full = 2*SimilW + 1;   /* the full similarity window  size */
-    h2 = h*h;
-    denh2 = 1/(2*h2);
-    
-    /*Gaussian kernel */
-    Eucl_Vec = (float*) calloc (SimilW_full*SimilW_full*SimilW_full,sizeof(float));
-    count = 0;
-    for(i_n=-SimilW; i_n<=SimilW; i_n++) {
-        for(j_n=-SimilW; j_n<=SimilW; j_n++) {
-            for(k_n=-SimilW; k_n<=SimilW; k_n++) {
-                Eucl_Vec[count] = exp(-(pow((float)i_n, 2) + pow((float)j_n, 2) + pow((float)k_n, 2))/(2*SimilW*SimilW*SimilW));
-                count = count + 1;
-            }}} /*main neighb loop */
-    
-    /*The NLM code starts here*/         
-    /* setting OMP here */
-    #pragma omp parallel for shared (A, B, dimX, dimY, dimZ, Eucl_Vec, lambda, denh2) private(denom, i, j, k, WeightGlob,count,  i1, j1, k1, i2, j2, k2, i3, j3, k3, i5, j5, k5, Weight_norm, normsum, i_m, j_m, k_m, i_n, j_n, k_n, i_l, j_l, k_l, i_p, j_p, k_p, Weight, value)    
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
-            for(k=0; k<dimZ; k++) {
-            if (((i >= padXY) && (i < dimX-padXY)) &&  ((j >= padXY) && (j < dimY-padXY)) &&  ((k >= padXY) && (k < dimZ-padXY))) {
-            /* take all elements around the pixel of interest */                             
-               /* Massive Search window loop */
-                Weight_norm = 0;  value = 0.0;
-                for(i_m=-SearchW; i_m<=SearchW; i_m++) {
-                    for(j_m=-SearchW; j_m<=SearchW; j_m++) {
-                        for(k_m=-SearchW; k_m<=SearchW; k_m++) {
-                         /*checking boundaries*/
-                        i1 = i+i_m; j1 = j+j_m; k1 = k+k_m;
-                        
-                        WeightGlob = 0.0;
-                        /* if inside the searching window */                        
-                         for(i_l=-SimilW; i_l<=SimilW; i_l++) {
-                             for(j_l=-SimilW; j_l<=SimilW; j_l++) {
-                                 for(k_l=-SimilW; k_l<=SimilW; k_l++) {                                 
-                                 i2 = i1+i_l; j2 = j1+j_l; k2 = k1+k_l;
-                                 
-                                 i3 = i+i_l; j3 = j+j_l; k3 = k+k_l;   /*coordinates of the inner patch loop */
-                                
-                                 count = 0; normsum = 0.0;
-                                 for(i_p=-SimilW; i_p<=SimilW; i_p++) {
-                                     for(j_p=-SimilW; j_p<=SimilW; j_p++) {
-                                         for(k_p=-SimilW; k_p<=SimilW; k_p++) {
-                                         i5 = i2 + i_p; j5 = j2 + j_p; k5 = k2 + k_p;
-                                         normsum = normsum + Eucl_Vec[count]*pow(A[(dimX*dimY)*(k3+k_p)+(i3+i_p)*dimY+(j3+j_p)]-A[(dimX*dimY)*k5 + i5*dimY+j5], 2);        
-                                         count = count + 1;
-                                     }}}
-                                  if (normsum != 0) Weight = (exp(-normsum*denh2)); 
-                                  else Weight = 0.0;
-                                 WeightGlob += Weight;
-                             }}}                                                 
-                         value += A[(dimX*dimY)*k1 + i1*dimY+j1]*WeightGlob;
-                         Weight_norm += WeightGlob;
-             
-                    }}}      /*search window loop end*/
-                
-                /* the final loop to average all values in searching window with weights */
-                denom = 1 + lambda*Weight_norm;               
-                B[(dimX*dimY)*k + i*dimY+j] = (A[(dimX*dimY)*k + i*dimY+j] + lambda*value)/denom;      
-            }            
-        }}}   /*main loop*/              
-       free(Eucl_Vec);        
-       return *B;
-}
-
-float pad_crop(float *A, float *Ap, int OldSizeX, int OldSizeY, int OldSizeZ, int NewSizeX, int NewSizeY, int NewSizeZ, int padXY, int switchpad_crop)
-{
-    /* padding-cropping function */
-    int i,j,k;    
-    if (NewSizeZ > 1) {    
-           for (i=0; i < NewSizeX; i++) {
-            for (j=0; j < NewSizeY; j++) {
-              for (k=0; k < NewSizeZ; k++) {
-                if (((i >= padXY) && (i < NewSizeX-padXY)) &&  ((j >= padXY) && (j < NewSizeY-padXY)) &&  ((k >= padXY) && (k < NewSizeZ-padXY))) {
-                    if (switchpad_crop == 0)  Ap[NewSizeX*NewSizeY*k + i*NewSizeY+j] = A[OldSizeX*OldSizeY*(k - padXY) + (i-padXY)*(OldSizeY)+(j-padXY)];
-                    else  Ap[OldSizeX*OldSizeY*(k - padXY) + (i-padXY)*(OldSizeY)+(j-padXY)] = A[NewSizeX*NewSizeY*k + i*NewSizeY+j];
-                }
-            }}}   
-    }
-    else {
-        for (i=0; i < NewSizeX; i++) {
-            for (j=0; j < NewSizeY; j++) {
-                if (((i >= padXY) && (i < NewSizeX-padXY)) &&  ((j >= padXY) && (j < NewSizeY-padXY))) {
-                    if (switchpad_crop == 0)  Ap[i*NewSizeY+j] = A[(i-padXY)*(OldSizeY)+(j-padXY)];
-                    else  Ap[(i-padXY)*(OldSizeY)+(j-padXY)] = A[i*NewSizeY+j];
-                }
-            }}
-    }
-    return *Ap;
-}
\ No newline at end of file
diff --git a/main_func/regularizers_CPU/PatchBased_Regul_core.h b/main_func/regularizers_CPU/PatchBased_Regul_core.h
deleted file mode 100644
index d4a8a46..0000000
--- a/main_func/regularizers_CPU/PatchBased_Regul_core.h
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
-This work is part of the Core Imaging Library developed by
-Visual Analytics and Imaging System Group of the Science Technology
-Facilities Council, STFC
-
-Copyright 2017 Daniil Kazanteev
-Copyright 2017 Srikanth Nagella, Edoardo Pasca
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
-#define _USE_MATH_DEFINES
-
-//#include <matrix.h>
-#include <math.h>
-#include <stdlib.h>
-#include <memory.h>
-#include <stdio.h>
-#include "omp.h"
-
-/* C-OMP implementation of  patch-based (PB) regularization (2D and 3D cases).
-* This method finds self-similar patches in data and performs one fixed point iteration to mimimize the PB penalty function
-*
-* References: 1. Yang Z. & Jacob M. "Nonlocal Regularization of Inverse Problems"
-*             2. Kazantsev D. et al. "4D-CT reconstruction with unified spatial-temporal patch-based regularization"
-*
-* Input Parameters (mandatory):
-* 1. Image (2D or 3D)
-* 2. ratio of the searching window (e.g. 3 = (2*3+1) = 7 pixels window)
-* 3. ratio of the similarity window (e.g. 1 = (2*1+1) = 3 pixels window)
-* 4. h - parameter for the PB penalty function
-* 5. lambda - regularization parameter
-
-* Output:
-* 1. regularized (denoised) Image (N x N)/volume (N x N x N)
-*
-* Quick 2D denoising example in Matlab:
-Im = double(imread('lena_gray_256.tif'))/255;  % loading image
-u0 = Im + .03*randn(size(Im)); u0(u0<0) = 0; % adding noise
-ImDen = PB_Regul_CPU(single(u0), 3, 1, 0.08, 0.05);
-*
-* Please see more tests in a file:
-TestTemporalSmoothing.m
-
-*
-* Matlab + C/mex compilers needed
-* to compile with OMP support: mex PB_Regul_CPU.c CFLAGS="\$CFLAGS -fopenmp -Wall" LDFLAGS="\$LDFLAGS -fopenmp"
-*
-* D. Kazantsev *
-* 02/07/2014
-* Harwell, UK
-*/
-#ifdef __cplusplus
-extern "C" {
-#endif
-float pad_crop(float *A, float *Ap, int OldSizeX, int OldSizeY, int OldSizeZ, int NewSizeX, int NewSizeY, int NewSizeZ, int padXY, int switchpad_crop);
-float PB_FUNC2D(float *A, float *B, int dimX, int dimY, int padXY, int SearchW, int SimilW, float h, float lambda);
-float PB_FUNC3D(float *A, float *B, int dimX, int dimY, int dimZ, int padXY, int SearchW, int SimilW, float h, float lambda);
-#ifdef __cplusplus
-}
-#endif
\ No newline at end of file
diff --git a/main_func/regularizers_CPU/SplitBregman_TV.c b/main_func/regularizers_CPU/SplitBregman_TV.c
deleted file mode 100644
index 38f6a9d..0000000
--- a/main_func/regularizers_CPU/SplitBregman_TV.c
+++ /dev/null
@@ -1,179 +0,0 @@
-/*
-This work is part of the Core Imaging Library developed by
-Visual Analytics and Imaging System Group of the Science Technology
-Facilities Council, STFC
-
-Copyright 2017 Daniil Kazantsev
-Copyright 2017 Srikanth Nagella, Edoardo Pasca
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
-#include "mex.h"
-#include <matrix.h>
-#include "SplitBregman_TV_core.h"
-
-/* C-OMP implementation of Split Bregman - TV denoising-regularization model (2D/3D)
- *
- * Input Parameters:
- * 1. Noisy image/volume
- * 2. lambda - regularization parameter
- * 3. Number of iterations [OPTIONAL parameter]
- * 4. eplsilon - tolerance constant [OPTIONAL parameter]
- * 5. TV-type: 'iso' or 'l1' [OPTIONAL parameter]
- *
- * Output:
- * Filtered/regularized image
- *
- * Example:
- * figure;
- * Im = double(imread('lena_gray_256.tif'))/255;  % loading image
- * u0 = Im + .05*randn(size(Im)); u0(u0 < 0) = 0;
- * u = SplitBregman_TV(single(u0), 10, 30, 1e-04);
- *
- * to compile with OMP support: mex SplitBregman_TV.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
- * References:
- * The Split Bregman Method for L1 Regularized Problems, by Tom Goldstein and Stanley Osher.
- * D. Kazantsev, 2016*
- */
-
-
-void mexFunction(
-        int nlhs, mxArray *plhs[],
-        int nrhs, const mxArray *prhs[])
-        
-{
-    int number_of_dims, iter, dimX, dimY, dimZ, ll, j, count, methTV;
-    const int  *dim_array;
-    float *A, *U=NULL, *U_old=NULL, *Dx=NULL, *Dy=NULL, *Dz=NULL, *Bx=NULL, *By=NULL, *Bz=NULL, lambda, mu, epsil, re, re1, re_old;
-    
-    number_of_dims = mxGetNumberOfDimensions(prhs[0]);
-    dim_array = mxGetDimensions(prhs[0]);
-    
-    /*Handling Matlab input data*/
-    if ((nrhs < 2) || (nrhs > 5)) mexErrMsgTxt("At least 2 parameters is required: Image(2D/3D), Regularization parameter. The full list of parameters: Image(2D/3D), Regularization parameter, iterations number, tolerance, penalty type ('iso' or 'l1')");
-    
-    /*Handling Matlab input data*/
-    A  = (float *) mxGetData(prhs[0]); /*noisy image (2D/3D) */
-    mu =  (float) mxGetScalar(prhs[1]); /* regularization parameter */
-    iter = 35; /* default iterations number */
-    epsil = 0.0001; /* default tolerance constant */
-    methTV = 0;  /* default isotropic TV penalty */
-    if ((nrhs == 3) || (nrhs == 4) || (nrhs == 5))  iter = (int) mxGetScalar(prhs[2]); /* iterations number */
-    if ((nrhs == 4) || (nrhs == 5))  epsil =  (float) mxGetScalar(prhs[3]); /* tolerance constant */
-    if (nrhs == 5)  {
-        char *penalty_type;
-        penalty_type = mxArrayToString(prhs[4]); /* choosing TV penalty: 'iso' or 'l1', 'iso' is the default */
-        if ((strcmp(penalty_type, "l1") != 0) && (strcmp(penalty_type, "iso") != 0)) mexErrMsgTxt("Choose TV type: 'iso' or 'l1',");
-        if (strcmp(penalty_type, "l1") == 0)  methTV = 1;  /* enable 'l1' penalty */
-        mxFree(penalty_type);
-    }
-    if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); }
-    
-    lambda = 2.0f*mu;
-    count = 1;
-    re_old = 0.0f;
-    /*Handling Matlab output data*/
-    dimY = dim_array[0]; dimX = dim_array[1]; dimZ = dim_array[2];
-    
-    if (number_of_dims == 2) {
-        dimZ = 1; /*2D case*/
-        U = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-        U_old = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-        Dx = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-        Dy = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-        Bx = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-        By = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-        
-        copyIm(A, U, dimX, dimY, dimZ); /*initialize */
-        
-        /* begin outer SB iterations */
-        for(ll=0; ll<iter; ll++) {
-            
-            /*storing old values*/
-            copyIm(U, U_old, dimX, dimY, dimZ);
-            
-            /*GS iteration */
-            gauss_seidel2D(U, A, Dx, Dy, Bx, By, dimX, dimY, lambda, mu);
-            
-            if (methTV == 1)  updDxDy_shrinkAniso2D(U, Dx, Dy, Bx, By, dimX, dimY, lambda);
-            else updDxDy_shrinkIso2D(U, Dx, Dy, Bx, By, dimX, dimY, lambda);
-            
-            updBxBy2D(U, Dx, Dy, Bx, By, dimX, dimY);
-            
-            /* calculate norm to terminate earlier */
-            re = 0.0f; re1 = 0.0f;
-            for(j=0; j<dimX*dimY*dimZ; j++)
-            {
-                re += pow(U_old[j] - U[j],2);
-                re1 += pow(U_old[j],2);
-            }
-            re = sqrt(re)/sqrt(re1);
-            if (re < epsil)  count++;
-            if (count > 4) break;
-            
-            /* check that the residual norm is decreasing */
-            if (ll > 2) {
-                if (re > re_old) break;
-            }
-            re_old = re;
-            /*printf("%f %i %i \n", re, ll, count); */
-            
-            /*copyIm(U_old, U, dimX, dimY, dimZ); */
-        }
-        printf("SB iterations stopped at iteration: %i\n", ll);
-    }
-    if (number_of_dims == 3) {
-        U = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-        U_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-        Dx = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-        Dy = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-        Dz = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-        Bx = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-        By = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-        Bz = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-        
-        copyIm(A, U, dimX, dimY, dimZ); /*initialize */
-        
-        /* begin outer SB iterations */
-        for(ll=0; ll<iter; ll++) {
-            
-            /*storing old values*/
-            copyIm(U, U_old, dimX, dimY, dimZ);
-            
-            /*GS iteration */
-            gauss_seidel3D(U, A, Dx, Dy, Dz, Bx, By, Bz, dimX, dimY, dimZ, lambda, mu);
-            
-            if (methTV == 1) updDxDyDz_shrinkAniso3D(U, Dx, Dy, Dz, Bx, By, Bz, dimX, dimY, dimZ, lambda);
-            else updDxDyDz_shrinkIso3D(U, Dx, Dy, Dz, Bx, By, Bz, dimX, dimY, dimZ, lambda);
-            
-            updBxByBz3D(U, Dx, Dy, Dz, Bx, By, Bz, dimX, dimY, dimZ);
-            
-            /* calculate norm to terminate earlier */
-            re = 0.0f; re1 = 0.0f;
-            for(j=0; j<dimX*dimY*dimZ; j++)
-            {
-                re += pow(U[j] - U_old[j],2);
-                re1 += pow(U[j],2);
-            }
-            re = sqrt(re)/sqrt(re1);
-            if (re < epsil)  count++;
-            if (count > 4) break;
-            
-            /* check that the residual norm is decreasing */
-            if (ll > 2) {
-                if (re > re_old) break; }
-            /*printf("%f %i %i \n", re, ll, count); */
-            re_old = re;
-        }
-        printf("SB iterations stopped at iteration: %i\n", ll);
-    }
-}
\ No newline at end of file
diff --git a/main_func/regularizers_CPU/SplitBregman_TV_core.c b/main_func/regularizers_CPU/SplitBregman_TV_core.c
deleted file mode 100644
index 4109a4b..0000000
--- a/main_func/regularizers_CPU/SplitBregman_TV_core.c
+++ /dev/null
@@ -1,259 +0,0 @@
-/*
-This work is part of the Core Imaging Library developed by
-Visual Analytics and Imaging System Group of the Science Technology
-Facilities Council, STFC
-
-Copyright 2017 Daniil Kazantsev
-Copyright 2017 Srikanth Nagella, Edoardo Pasca
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
-#include "SplitBregman_TV_core.h"
-
-/* C-OMP implementation of Split Bregman - TV denoising-regularization model (2D/3D)
-*
-* Input Parameters:
-* 1. Noisy image/volume
-* 2. lambda - regularization parameter
-* 3. Number of iterations [OPTIONAL parameter]
-* 4. eplsilon - tolerance constant [OPTIONAL parameter]
-* 5. TV-type: 'iso' or 'l1' [OPTIONAL parameter]
-*
-* Output:
-* Filtered/regularized image
-*
-* Example:
-* figure;
-* Im = double(imread('lena_gray_256.tif'))/255;  % loading image
-* u0 = Im + .05*randn(size(Im)); u0(u0 < 0) = 0;
-* u = SplitBregman_TV(single(u0), 10, 30, 1e-04);
-*
-* References:
-* The Split Bregman Method for L1 Regularized Problems, by Tom Goldstein and Stanley Osher.
-* D. Kazantsev, 2016*
-*/
-
-
-/* 2D-case related Functions */
-/*****************************************************************/
-float gauss_seidel2D(float *U, float *A, float *Dx, float *Dy, float *Bx, float *By, int dimX, int dimY, float lambda, float mu)
-{
-    float sum, normConst;
-    int i,j,i1,i2,j1,j2;
-    normConst = 1.0f/(mu + 4.0f*lambda);
-    
-#pragma omp parallel for shared(U) private(i,j,i1,i2,j1,j2,sum)
-    for(i=0; i<dimX; i++) {
-        /* symmetric boundary conditions (Neuman) */
-        i1 = i+1; if (i1 == dimX) i1 = i-1;
-        i2 = i-1; if (i2 < 0) i2 = i+1;
-        for(j=0; j<dimY; j++) {
-            /* symmetric boundary conditions (Neuman) */
-            j1 = j+1; if (j1 == dimY) j1 = j-1;
-            j2 = j-1; if (j2 < 0) j2 = j+1;
-            
-            sum = Dx[(i2)*dimY + (j)] - Dx[(i)*dimY + (j)] + Dy[(i)*dimY + (j2)] - Dy[(i)*dimY + (j)] - Bx[(i2)*dimY + (j)] + Bx[(i)*dimY + (j)] - By[(i)*dimY + (j2)] + By[(i)*dimY + (j)];
-            sum += (U[(i1)*dimY + (j)] + U[(i2)*dimY + (j)] + U[(i)*dimY + (j1)] + U[(i)*dimY + (j2)]);
-            sum *= lambda;
-            sum += mu*A[(i)*dimY + (j)];
-            U[(i)*dimY + (j)] = normConst*sum;
-        }}
-    return *U;
-}
-
-float updDxDy_shrinkAniso2D(float *U, float *Dx, float *Dy, float *Bx, float *By, int dimX, int dimY, float lambda)
-{
-    int i,j,i1,j1;
-    float val1, val11, val2, val22, denom_lam;
-    denom_lam = 1.0f/lambda;
-#pragma omp parallel for shared(U,denom_lam) private(i,j,i1,j1,val1,val11,val2,val22)
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
-            /* symmetric boundary conditions (Neuman) */
-            i1 = i+1; if (i1 == dimX) i1 = i-1;
-            j1 = j+1; if (j1 == dimY) j1 = j-1;
-            
-            val1 = (U[(i1)*dimY + (j)] - U[(i)*dimY + (j)]) + Bx[(i)*dimY + (j)];
-            val2 = (U[(i)*dimY + (j1)] - U[(i)*dimY + (j)]) + By[(i)*dimY + (j)];
-            
-            val11 = fabs(val1) - denom_lam; if (val11 < 0) val11 = 0;
-            val22 = fabs(val2) - denom_lam; if (val22 < 0) val22 = 0;
-            
-            if (val1 !=0) Dx[(i)*dimY + (j)] = (val1/fabs(val1))*val11; else Dx[(i)*dimY + (j)] = 0;
-            if (val2 !=0) Dy[(i)*dimY + (j)] = (val2/fabs(val2))*val22; else Dy[(i)*dimY + (j)] = 0;
-            
-        }}
-    return 1;
-}
-float updDxDy_shrinkIso2D(float *U, float *Dx, float *Dy, float *Bx, float *By, int dimX, int dimY, float lambda)
-{
-    int i,j,i1,j1;
-    float val1, val11, val2, denom, denom_lam;
-    denom_lam = 1.0f/lambda;
-    
-#pragma omp parallel for shared(U,denom_lam) private(i,j,i1,j1,val1,val11,val2,denom)
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
-            /* symmetric boundary conditions (Neuman) */
-            i1 = i+1; if (i1 == dimX) i1 = i-1;
-            j1 = j+1; if (j1 == dimY) j1 = j-1;
-            
-            val1 = (U[(i1)*dimY + (j)] - U[(i)*dimY + (j)]) + Bx[(i)*dimY + (j)];
-            val2 = (U[(i)*dimY + (j1)] - U[(i)*dimY + (j)]) + By[(i)*dimY + (j)];
-            
-            denom = sqrt(val1*val1 + val2*val2);
-            
-            val11 = (denom - denom_lam); if (val11 < 0) val11 = 0.0f;
-            
-            if (denom != 0.0f) {
-                Dx[(i)*dimY + (j)] = val11*(val1/denom);
-                Dy[(i)*dimY + (j)] = val11*(val2/denom);
-            }
-            else {
-                Dx[(i)*dimY + (j)] = 0;
-                Dy[(i)*dimY + (j)] = 0;
-            }
-        }}
-    return 1;
-}
-float updBxBy2D(float *U, float *Dx, float *Dy, float *Bx, float *By, int dimX, int dimY)
-{
-    int i,j,i1,j1;
-#pragma omp parallel for shared(U) private(i,j,i1,j1)
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
-            /* symmetric boundary conditions (Neuman) */
-            i1 = i+1; if (i1 == dimX) i1 = i-1;
-            j1 = j+1; if (j1 == dimY) j1 = j-1;
-            
-            Bx[(i)*dimY + (j)] = Bx[(i)*dimY + (j)] + ((U[(i1)*dimY + (j)] - U[(i)*dimY + (j)]) - Dx[(i)*dimY + (j)]);
-            By[(i)*dimY + (j)] = By[(i)*dimY + (j)] + ((U[(i)*dimY + (j1)] - U[(i)*dimY + (j)]) - Dy[(i)*dimY + (j)]);
-        }}
-    return 1;
-}
-
-
-/* 3D-case related Functions */
-/*****************************************************************/
-float gauss_seidel3D(float *U, float *A, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, int dimX, int dimY, int dimZ, float lambda, float mu)
-{
-    float normConst, d_val, b_val, sum;
-    int i,j,i1,i2,j1,j2,k,k1,k2;
-    normConst = 1.0f/(mu + 6.0f*lambda);
-#pragma omp parallel for shared(U) private(i,j,i1,i2,j1,j2,k,k1,k2,d_val,b_val,sum)
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
-            for(k=0; k<dimZ; k++) {
-                /* symmetric boundary conditions (Neuman) */
-                i1 = i+1; if (i1 == dimX) i1 = i-1;
-                i2 = i-1; if (i2 < 0) i2 = i+1;
-                j1 = j+1; if (j1 == dimY) j1 = j-1;
-                j2 = j-1; if (j2 < 0) j2 = j+1;
-                k1 = k+1; if (k1 == dimZ) k1 = k-1;
-                k2 = k-1; if (k2 < 0) k2 = k+1;
-                
-                d_val = Dx[(dimX*dimY)*k + (i2)*dimY + (j)] - Dx[(dimX*dimY)*k + (i)*dimY + (j)] + Dy[(dimX*dimY)*k + (i)*dimY + (j2)] - Dy[(dimX*dimY)*k + (i)*dimY + (j)] + Dz[(dimX*dimY)*k2 + (i)*dimY + (j)] - Dz[(dimX*dimY)*k + (i)*dimY + (j)];
-                b_val = -Bx[(dimX*dimY)*k + (i2)*dimY + (j)] + Bx[(dimX*dimY)*k + (i)*dimY + (j)] - By[(dimX*dimY)*k + (i)*dimY + (j2)] + By[(dimX*dimY)*k + (i)*dimY + (j)] - Bz[(dimX*dimY)*k2 + (i)*dimY + (j)] + Bz[(dimX*dimY)*k + (i)*dimY + (j)];
-                sum =  d_val + b_val;
-                sum += U[(dimX*dimY)*k + (i1)*dimY + (j)] + U[(dimX*dimY)*k + (i2)*dimY + (j)] + U[(dimX*dimY)*k + (i)*dimY + (j1)] + U[(dimX*dimY)*k + (i)*dimY + (j2)] + U[(dimX*dimY)*k1 + (i)*dimY + (j)] + U[(dimX*dimY)*k2 + (i)*dimY + (j)];
-                sum *= lambda;
-                sum += mu*A[(dimX*dimY)*k + (i)*dimY + (j)];
-                U[(dimX*dimY)*k + (i)*dimY + (j)] = normConst*sum;
-            }}}
-    return *U;
-}
-
-float updDxDyDz_shrinkAniso3D(float *U, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, int dimX, int dimY, int dimZ, float lambda)
-{
-    int i,j,i1,j1,k,k1,index;
-    float val1, val11, val2, val22, val3, val33, denom_lam;
-    denom_lam = 1.0f/lambda;
-#pragma omp parallel for shared(U,denom_lam) private(index,i,j,i1,j1,k,k1,val1,val11,val2,val22,val3,val33)
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
-            for(k=0; k<dimZ; k++) {
-                index = (dimX*dimY)*k + (i)*dimY + (j);
-                /* symmetric boundary conditions (Neuman) */
-                i1 = i+1; if (i1 == dimX) i1 = i-1;
-                j1 = j+1; if (j1 == dimY) j1 = j-1;
-                k1 = k+1; if (k1 == dimZ) k1 = k-1;
-                
-                val1 = (U[(dimX*dimY)*k + (i1)*dimY + (j)] - U[index]) + Bx[index];
-                val2 = (U[(dimX*dimY)*k + (i)*dimY + (j1)] - U[index]) + By[index];
-                val3 = (U[(dimX*dimY)*k1 + (i)*dimY + (j)] - U[index]) + Bz[index];
-                
-                val11 = fabs(val1) - denom_lam; if (val11 < 0) val11 = 0;
-                val22 = fabs(val2) - denom_lam; if (val22 < 0) val22 = 0;
-                val33 = fabs(val3) - denom_lam; if (val33 < 0) val33 = 0;
-                
-                if (val1 !=0) Dx[index] = (val1/fabs(val1))*val11; else Dx[index] = 0;
-                if (val2 !=0) Dy[index] = (val2/fabs(val2))*val22; else Dy[index] = 0;
-                if (val3 !=0) Dz[index] = (val3/fabs(val3))*val33; else Dz[index] = 0;
-                
-            }}}
-    return 1;
-}
-float updDxDyDz_shrinkIso3D(float *U, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, int dimX, int dimY, int dimZ, float lambda)
-{
-    int i,j,i1,j1,k,k1,index;
-    float val1, val11, val2, val3, denom, denom_lam;
-    denom_lam = 1.0f/lambda;
-#pragma omp parallel for shared(U,denom_lam) private(index,denom,i,j,i1,j1,k,k1,val1,val11,val2,val3)
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
-            for(k=0; k<dimZ; k++) {
-                index = (dimX*dimY)*k + (i)*dimY + (j);
-                /* symmetric boundary conditions (Neuman) */
-                i1 = i+1; if (i1 == dimX) i1 = i-1;
-                j1 = j+1; if (j1 == dimY) j1 = j-1;
-                k1 = k+1; if (k1 == dimZ) k1 = k-1;
-                
-                val1 = (U[(dimX*dimY)*k + (i1)*dimY + (j)] - U[index]) + Bx[index];
-                val2 = (U[(dimX*dimY)*k + (i)*dimY + (j1)] - U[index]) + By[index];
-                val3 = (U[(dimX*dimY)*k1 + (i)*dimY + (j)] - U[index]) + Bz[index];
-                
-                denom = sqrt(val1*val1 + val2*val2 + val3*val3);
-                
-                val11 = (denom - denom_lam); if (val11 < 0) val11 = 0.0f;
-                
-                if (denom != 0.0f) {
-                    Dx[index] = val11*(val1/denom);
-                    Dy[index] = val11*(val2/denom);
-                    Dz[index] = val11*(val3/denom);
-                }
-                else {
-                    Dx[index] = 0;
-                    Dy[index] = 0;
-                    Dz[index] = 0;
-                }               
-            }}}
-    return 1;
-}
-float updBxByBz3D(float *U, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, int dimX, int dimY, int dimZ)
-{
-    int i,j,k,i1,j1,k1;
-#pragma omp parallel for shared(U) private(i,j,k,i1,j1,k1)
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
-            for(k=0; k<dimZ; k++) {
-                /* symmetric boundary conditions (Neuman) */
-                i1 = i+1; if (i1 == dimX) i1 = i-1;
-                j1 = j+1; if (j1 == dimY) j1 = j-1;
-                k1 = k+1; if (k1 == dimZ) k1 = k-1;
-                
-                Bx[(dimX*dimY)*k + (i)*dimY + (j)] = Bx[(dimX*dimY)*k + (i)*dimY + (j)] + ((U[(dimX*dimY)*k + (i1)*dimY + (j)] - U[(dimX*dimY)*k + (i)*dimY + (j)]) - Dx[(dimX*dimY)*k + (i)*dimY + (j)]);
-                By[(dimX*dimY)*k + (i)*dimY + (j)] = By[(dimX*dimY)*k + (i)*dimY + (j)] + ((U[(dimX*dimY)*k + (i)*dimY + (j1)] - U[(dimX*dimY)*k + (i)*dimY + (j)]) - Dy[(dimX*dimY)*k + (i)*dimY + (j)]);
-                Bz[(dimX*dimY)*k + (i)*dimY + (j)] = Bz[(dimX*dimY)*k + (i)*dimY + (j)] + ((U[(dimX*dimY)*k1 + (i)*dimY + (j)] - U[(dimX*dimY)*k + (i)*dimY + (j)]) - Dz[(dimX*dimY)*k + (i)*dimY + (j)]);
-                
-            }}}
-    return 1;
-}
diff --git a/main_func/regularizers_CPU/SplitBregman_TV_core.h b/main_func/regularizers_CPU/SplitBregman_TV_core.h
deleted file mode 100644
index 6ed3ff9..0000000
--- a/main_func/regularizers_CPU/SplitBregman_TV_core.h
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
-This work is part of the Core Imaging Library developed by
-Visual Analytics and Imaging System Group of the Science Technology
-Facilities Council, STFC
-
-Copyright 2017 Daniil Kazantsev
-Copyright 2017 Srikanth Nagella, Edoardo Pasca
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-//#include <matrix.h>
-#include <math.h>
-#include <stdlib.h>
-#include <memory.h>
-#include <stdio.h>
-#include "omp.h"
-
-#include "utils.h"
-
-/* C-OMP implementation of Split Bregman - TV denoising-regularization model (2D/3D)
-*
-* Input Parameters:
-* 1. Noisy image/volume
-* 2. lambda - regularization parameter
-* 3. Number of iterations [OPTIONAL parameter]
-* 4. eplsilon - tolerance constant [OPTIONAL parameter]
-* 5. TV-type: 'iso' or 'l1' [OPTIONAL parameter]
-*
-* Output:
-* Filtered/regularized image
-*
-* Example:
-* figure;
-* Im = double(imread('lena_gray_256.tif'))/255;  % loading image
-* u0 = Im + .05*randn(size(Im)); u0(u0 < 0) = 0;
-* u = SplitBregman_TV(single(u0), 10, 30, 1e-04);
-*
-* to compile with OMP support: mex SplitBregman_TV.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
-* References:
-* The Split Bregman Method for L1 Regularized Problems, by Tom Goldstein and Stanley Osher.
-* D. Kazantsev, 2016*
-*/
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-//float copyIm(float *A, float *B, int dimX, int dimY, int dimZ);
-float gauss_seidel2D(float *U, float *A, float *Dx, float *Dy, float *Bx, float *By, int dimX, int dimY, float lambda, float mu);
-float updDxDy_shrinkAniso2D(float *U, float *Dx, float *Dy, float *Bx, float *By, int dimX, int dimY, float lambda);
-float updDxDy_shrinkIso2D(float *U, float *Dx, float *Dy, float *Bx, float *By, int dimX, int dimY, float lambda);
-float updBxBy2D(float *U, float *Dx, float *Dy, float *Bx, float *By, int dimX, int dimY);
-
-float gauss_seidel3D(float *U, float *A, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, int dimX, int dimY, int dimZ, float lambda, float mu);
-float updDxDyDz_shrinkAniso3D(float *U, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, int dimX, int dimY, int dimZ, float lambda);
-float updDxDyDz_shrinkIso3D(float *U, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, int dimX, int dimY, int dimZ, float lambda);
-float updBxByBz3D(float *U, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, int dimX, int dimY, int dimZ);
-
-#ifdef __cplusplus
-}
-#endif
\ No newline at end of file
diff --git a/main_func/regularizers_CPU/TGV_PD.c b/main_func/regularizers_CPU/TGV_PD.c
deleted file mode 100644
index c9cb440..0000000
--- a/main_func/regularizers_CPU/TGV_PD.c
+++ /dev/null
@@ -1,144 +0,0 @@
-/*
-This work is part of the Core Imaging Library developed by
-Visual Analytics and Imaging System Group of the Science Technology
-Facilities Council, STFC
-
-Copyright 2017 Daniil Kazantsev
-Copyright 2017 Srikanth Nagella, Edoardo Pasca
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
-#include "TGV_PD_core.h"
-#include "mex.h"
-
-/* C-OMP implementation of Primal-Dual denoising method for 
- * Total Generilized Variation (TGV)-L2 model (2D case only)
- *
- * Input Parameters:
- * 1. Noisy image/volume (2D)
- * 2. lambda - regularization parameter
- * 3. parameter to control first-order term (alpha1)
- * 4. parameter to control the second-order term (alpha0)
- * 5. Number of CP iterations
- *
- * Output:
- * Filtered/regularized image 
- *
- * Example:
- * figure;
- * Im = double(imread('lena_gray_256.tif'))/255;  % loading image
- * u0 = Im + .03*randn(size(Im)); % adding noise
- * tic; u = TGV_PD(single(u0), 0.02, 1.3, 1, 550); toc;
- *
- * to compile with OMP support: mex TGV_PD.c TGV_PD_core.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
- * References:
- * K. Bredies "Total Generalized Variation"
- *
- * 28.11.16/Harwell
- */
- 
-void mexFunction(
-        int nlhs, mxArray *plhs[],
-        int nrhs, const mxArray *prhs[])
-        
-{
-    int number_of_dims, iter, dimX, dimY, dimZ, ll;
-    const int  *dim_array;
-    float *A, *U, *U_old, *P1, *P2, *Q1, *Q2, *Q3, *V1, *V1_old, *V2, *V2_old, lambda, L2, tau, sigma,  alpha1, alpha0;
-    
-    number_of_dims = mxGetNumberOfDimensions(prhs[0]);
-    dim_array = mxGetDimensions(prhs[0]);
-    
-    /*Handling Matlab input data*/
-    A  = (float *) mxGetData(prhs[0]); /*origanal noise image/volume*/
-    if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input in single precision is required"); }
-    lambda =  (float) mxGetScalar(prhs[1]); /*regularization parameter*/
-    alpha1 =  (float) mxGetScalar(prhs[2]); /*first-order term*/
-    alpha0 =  (float) mxGetScalar(prhs[3]); /*second-order term*/
-    iter =  (int) mxGetScalar(prhs[4]); /*iterations number*/
-    if(nrhs != 5) mexErrMsgTxt("Five input parameters is reqired: Image(2D/3D), Regularization parameter, alpha1, alpha0, Iterations");
-    
-    /*Handling Matlab output data*/
-    dimX = dim_array[0]; dimY = dim_array[1];
-    
-    if (number_of_dims == 2) {
-        /*2D case*/
-        dimZ = 1;
-        U = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-        
-        /*dual variables*/
-        P1 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-        P2 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-        
-        Q1 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-        Q2 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-        Q3 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-        
-        U_old = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-        
-        V1 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-        V1_old = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-        V2 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-        V2_old = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));   
-        
-        
-         /*printf("%i \n", i);*/
-    L2 = 12.0f; /*Lipshitz constant*/
-    tau = 1.0/pow(L2,0.5);
-    sigma = 1.0/pow(L2,0.5);
-    
-    /*Copy A to U*/
-    copyIm(A, U, dimX, dimY, dimZ);
-    
-        /* Here primal-dual iterations begin for 2D */
-        for(ll = 0; ll < iter; ll++) {
-            
-            /* Calculate Dual Variable P */
-            DualP_2D(U, V1, V2, P1, P2, dimX, dimY, dimZ, sigma);
-            
-            /*Projection onto convex set for P*/
-            ProjP_2D(P1, P2, dimX, dimY, dimZ, alpha1);
-            
-            /* Calculate Dual Variable Q */
-            DualQ_2D(V1, V2, Q1, Q2, Q3, dimX, dimY, dimZ, sigma);
-            
-            /*Projection onto convex set for Q*/
-            ProjQ_2D(Q1, Q2, Q3, dimX, dimY, dimZ, alpha0);
-            
-            /*saving U into U_old*/
-            copyIm(U, U_old, dimX, dimY, dimZ);
-            
-            /*adjoint operation  -> divergence and projection of P*/
-            DivProjP_2D(U, A, P1, P2, dimX, dimY, dimZ, lambda, tau);
-            
-            /*get updated solution U*/
-            newU(U, U_old, dimX, dimY, dimZ);
-            
-            /*saving V into V_old*/
-            copyIm(V1, V1_old, dimX, dimY, dimZ);
-            copyIm(V2, V2_old, dimX, dimY, dimZ);
-            
-            /* upd V*/
-            UpdV_2D(V1, V2, P1, P2, Q1, Q2, Q3, dimX, dimY, dimZ, tau);
-            
-            /*get new V*/
-            newU(V1, V1_old, dimX, dimY, dimZ);
-            newU(V2, V2_old, dimX, dimY, dimZ);
-        } /*end of iterations*/        
-    }
-    else if (number_of_dims == 3) {
-        mexErrMsgTxt("The input data should be a 2D array");
-        /*3D case*/
-    }
-    else {mexErrMsgTxt("The input data should be a 2D array");}    
-   
-}
diff --git a/main_func/regularizers_CPU/TGV_PD_core.c b/main_func/regularizers_CPU/TGV_PD_core.c
deleted file mode 100644
index 4139d10..0000000
--- a/main_func/regularizers_CPU/TGV_PD_core.c
+++ /dev/null
@@ -1,208 +0,0 @@
-/*
-This work is part of the Core Imaging Library developed by
-Visual Analytics and Imaging System Group of the Science Technology
-Facilities Council, STFC
-
-Copyright 2017 Daniil Kazanteev
-Copyright 2017 Srikanth Nagella, Edoardo Pasca
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
-#include "TGV_PD_core.h"
-
-/* C-OMP implementation of Primal-Dual denoising method for 
- * Total Generilized Variation (TGV)-L2 model (2D case only)
- *
- * Input Parameters:
- * 1. Noisy image/volume (2D)
- * 2. lambda - regularization parameter
- * 3. parameter to control first-order term (alpha1)
- * 4. parameter to control the second-order term (alpha0)
- * 5. Number of CP iterations
- *
- * Output:
- * Filtered/regularized image 
- *
- * Example:
- * figure;
- * Im = double(imread('lena_gray_256.tif'))/255;  % loading image
- * u0 = Im + .03*randn(size(Im)); % adding noise
- * tic; u = PrimalDual_TGV(single(u0), 0.02, 1.3, 1, 550); toc;
- *
- * References:
- * K. Bredies "Total Generalized Variation"
- *
- * 28.11.16/Harwell
- */
- 
-
-
-
-/*Calculating dual variable P (using forward differences)*/
-float DualP_2D(float *U, float *V1, float *V2, float *P1, float *P2, int dimX, int dimY, int dimZ, float sigma)
-{
-    int i,j;
-#pragma omp parallel for shared(U,V1,V2,P1,P2) private(i,j)
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
-            /* symmetric boundary conditions (Neuman) */
-            if (i == dimX-1) P1[i*dimY + (j)] = P1[i*dimY + (j)] + sigma*((U[(i-1)*dimY + (j)] - U[i*dimY + (j)])  - V1[i*dimY + (j)]);
-            else P1[i*dimY + (j)] = P1[i*dimY + (j)] + sigma*((U[(i + 1)*dimY + (j)] - U[i*dimY + (j)])  - V1[i*dimY + (j)]);
-            if (j == dimY-1) P2[i*dimY + (j)] = P2[i*dimY + (j)] + sigma*((U[(i)*dimY + (j-1)] - U[i*dimY + (j)])  - V2[i*dimY + (j)]);
-            else  P2[i*dimY + (j)] = P2[i*dimY + (j)] + sigma*((U[(i)*dimY + (j+1)] - U[i*dimY + (j)])  - V2[i*dimY + (j)]);
-        }}
-    return 1;
-}
-/*Projection onto convex set for P*/
-float ProjP_2D(float *P1, float *P2, int dimX, int dimY, int dimZ, float alpha1)
-{
-    float grad_magn;
-    int i,j;
-#pragma omp parallel for shared(P1,P2) private(i,j,grad_magn)
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
-            grad_magn = sqrt(pow(P1[i*dimY + (j)],2) + pow(P2[i*dimY + (j)],2));
-            grad_magn = grad_magn/alpha1;
-            if (grad_magn > 1.0) {
-                P1[i*dimY + (j)] = P1[i*dimY + (j)]/grad_magn;
-                P2[i*dimY + (j)] = P2[i*dimY + (j)]/grad_magn;
-            }
-        }}
-    return 1;
-}
-/*Calculating dual variable Q (using forward differences)*/
-float DualQ_2D(float *V1, float *V2, float *Q1, float *Q2, float *Q3, int dimX, int dimY, int dimZ, float sigma)
-{
-    int i,j;
-    float q1, q2, q11, q22;
-#pragma omp parallel for shared(Q1,Q2,Q3,V1,V2) private(i,j,q1,q2,q11,q22)
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
-            /* symmetric boundary conditions (Neuman) */
-            if (i == dimX-1)
-            { q1 = (V1[(i-1)*dimY + (j)] - V1[i*dimY + (j)]);
-              q11 = (V2[(i-1)*dimY + (j)] - V2[i*dimY + (j)]);
-            }
-            else {
-                q1 = (V1[(i+1)*dimY + (j)] - V1[i*dimY + (j)]);
-                q11 = (V2[(i+1)*dimY + (j)] - V2[i*dimY + (j)]);
-            }
-            if (j == dimY-1) {
-                q2 = (V2[(i)*dimY + (j-1)] - V2[i*dimY + (j)]);
-                q22 = (V1[(i)*dimY + (j-1)] - V1[i*dimY + (j)]);
-            }
-            else {
-                q2 = (V2[(i)*dimY + (j+1)] - V2[i*dimY + (j)]);
-                q22 = (V1[(i)*dimY + (j+1)] - V1[i*dimY + (j)]);
-            }
-            Q1[i*dimY + (j)] = Q1[i*dimY + (j)] + sigma*(q1);
-            Q2[i*dimY + (j)] = Q2[i*dimY + (j)] + sigma*(q2);
-            Q3[i*dimY + (j)] = Q3[i*dimY + (j)]  + sigma*(0.5f*(q11 + q22));
-        }}
-    return 1;
-}
-
-float ProjQ_2D(float *Q1, float *Q2, float *Q3, int dimX, int dimY, int dimZ, float alpha0)
-{
-    float grad_magn;
-    int i,j;
-#pragma omp parallel for shared(Q1,Q2,Q3) private(i,j,grad_magn)
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
-            grad_magn = sqrt(pow(Q1[i*dimY + (j)],2) + pow(Q2[i*dimY + (j)],2) + 2*pow(Q3[i*dimY + (j)],2));
-            grad_magn = grad_magn/alpha0;
-            if (grad_magn > 1.0) {
-                Q1[i*dimY + (j)] = Q1[i*dimY + (j)]/grad_magn;
-                Q2[i*dimY + (j)] = Q2[i*dimY + (j)]/grad_magn;
-                Q3[i*dimY + (j)] = Q3[i*dimY + (j)]/grad_magn;
-            }
-        }}
-    return 1;
-}
-/* Divergence and projection for P*/
-float DivProjP_2D(float *U, float *A, float *P1, float *P2, int dimX, int dimY, int dimZ, float lambda, float tau)
-{
-    int i,j;
-    float P_v1, P_v2, div;
-#pragma omp parallel for shared(U,A,P1,P2) private(i,j,P_v1,P_v2,div)
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
-            if (i == 0) P_v1 = (P1[i*dimY + (j)]);
-            else P_v1 = (P1[i*dimY + (j)] - P1[(i-1)*dimY + (j)]);
-            if (j == 0) P_v2 = (P2[i*dimY + (j)]);
-            else  P_v2 = (P2[i*dimY + (j)] - P2[(i)*dimY + (j-1)]);
-            div = P_v1 + P_v2;
-            U[i*dimY + (j)] = (lambda*(U[i*dimY + (j)] + tau*div) + tau*A[i*dimY + (j)])/(lambda + tau);
-        }}
-    return *U;
-}
-/*get updated solution U*/
-float newU(float *U, float *U_old, int dimX, int dimY, int dimZ)
-{
-    int i;
-#pragma omp parallel for shared(U,U_old) private(i)
-    for(i=0; i<dimX*dimY*dimZ; i++) U[i] = 2*U[i] - U_old[i];
-    return *U;
-}
-
-/*get update for V*/
-float UpdV_2D(float *V1, float *V2, float *P1, float *P2, float *Q1, float *Q2, float *Q3, int dimX, int dimY, int dimZ, float tau)
-{
-    int i,j;
-    float q1, q11, q2, q22, div1, div2;
-#pragma omp parallel for shared(V1,V2,P1,P2,Q1,Q2,Q3) private(i,j, q1, q11, q2, q22, div1, div2)
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
-            /* symmetric boundary conditions (Neuman) */
-            if (i == 0) {
-                q1 = (Q1[i*dimY + (j)]);
-                q11 = (Q3[i*dimY + (j)]);
-            }
-            else {
-                q1 = (Q1[i*dimY + (j)] - Q1[(i-1)*dimY + (j)]);
-                q11 = (Q3[i*dimY + (j)] - Q3[(i-1)*dimY + (j)]);
-            }
-            if (j == 0) {
-                q2 = (Q2[i*dimY + (j)]);
-                q22 = (Q3[i*dimY + (j)]);
-            }
-            else  {
-                q2 = (Q2[i*dimY + (j)] - Q2[(i)*dimY + (j-1)]);
-                q22 = (Q3[i*dimY + (j)] - Q3[(i)*dimY + (j-1)]);
-            }
-            div1 = q1 + q22;
-            div2 = q2 + q11;
-            V1[i*dimY + (j)] = V1[i*dimY + (j)] + tau*(P1[i*dimY + (j)] + div1);
-            V2[i*dimY + (j)] = V2[i*dimY + (j)] + tau*(P2[i*dimY + (j)] + div2);
-        }}
-    return 1;
-}
-/*********************3D *********************/
-
-/*Calculating dual variable P (using forward differences)*/
-float DualP_3D(float *U, float *V1, float *V2, float *V3, float *P1, float *P2, float *P3, int dimX, int dimY, int dimZ, float sigma)
-{
-    int i,j,k;
-#pragma omp parallel for shared(U,V1,V2,V3,P1,P2,P3) private(i,j,k)
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
-            for(k=0; k<dimZ; k++) {
-                /* symmetric boundary conditions (Neuman) */
-                if (i == dimX-1) P1[dimX*dimY*k + i*dimY + (j)] = P1[dimX*dimY*k + i*dimY + (j)] + sigma*((U[dimX*dimY*k + (i-1)*dimY + (j)] - U[dimX*dimY*k + i*dimY + (j)])  - V1[dimX*dimY*k + i*dimY + (j)]);
-                else P1[dimX*dimY*k + i*dimY + (j)] = P1[dimX*dimY*k + i*dimY + (j)] + sigma*((U[dimX*dimY*k + (i + 1)*dimY + (j)] - U[dimX*dimY*k + i*dimY + (j)])  - V1[dimX*dimY*k + i*dimY + (j)]);
-                if (j == dimY-1) P2[dimX*dimY*k + i*dimY + (j)] = P2[dimX*dimY*k + i*dimY + (j)] + sigma*((U[dimX*dimY*k + (i)*dimY + (j-1)] - U[dimX*dimY*k + i*dimY + (j)])  - V2[dimX*dimY*k + i*dimY + (j)]);
-                else  P2[dimX*dimY*k + i*dimY + (j)] = P2[dimX*dimY*k + i*dimY + (j)] + sigma*((U[dimX*dimY*k + (i)*dimY + (j+1)] - U[dimX*dimY*k + i*dimY + (j)])  - V2[dimX*dimY*k + i*dimY + (j)]);
-                if (k == dimZ-1) P3[dimX*dimY*k + i*dimY + (j)] = P3[dimX*dimY*k + i*dimY + (j)] + sigma*((U[dimX*dimY*(k-1) + (i)*dimY + (j)] - U[dimX*dimY*k + i*dimY + (j)])  - V3[dimX*dimY*k + i*dimY + (j)]);
-                else  P3[dimX*dimY*k + i*dimY + (j)] = P3[dimX*dimY*k + i*dimY + (j)] + sigma*((U[dimX*dimY*(k+1) + (i)*dimY + (j)] - U[dimX*dimY*k + i*dimY + (j)])  - V3[dimX*dimY*k + i*dimY + (j)]);
-            }}}
-    return 1;
-}
\ No newline at end of file
diff --git a/main_func/regularizers_CPU/TGV_PD_core.h b/main_func/regularizers_CPU/TGV_PD_core.h
deleted file mode 100644
index d5378df..0000000
--- a/main_func/regularizers_CPU/TGV_PD_core.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
-This work is part of the Core Imaging Library developed by
-Visual Analytics and Imaging System Group of the Science Technology
-Facilities Council, STFC
-
-Copyright 2017 Daniil Kazantsev
-Copyright 2017 Srikanth Nagella, Edoardo Pasca
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
-//#include <matrix.h>
-#include <math.h>
-#include <stdlib.h>
-#include <memory.h>
-#include <stdio.h>
-#include "omp.h"
-#include "utils.h"
-
-/* C-OMP implementation of Primal-Dual denoising method for
-* Total Generilized Variation (TGV)-L2 model (2D case only)
-*
-* Input Parameters:
-* 1. Noisy image/volume (2D)
-* 2. lambda - regularization parameter
-* 3. parameter to control first-order term (alpha1)
-* 4. parameter to control the second-order term (alpha0)
-* 5. Number of CP iterations
-*
-* Output:
-* Filtered/regularized image
-*
-* Example:
-* figure;
-* Im = double(imread('lena_gray_256.tif'))/255;  % loading image
-* u0 = Im + .03*randn(size(Im)); % adding noise
-* tic; u = PrimalDual_TGV(single(u0), 0.02, 1.3, 1, 550); toc;
-*
-* to compile with OMP support: mex TGV_PD.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
-* References:
-* K. Bredies "Total Generalized Variation"
-*
-* 28.11.16/Harwell
-*/
-#ifdef __cplusplus
-extern "C" {
-#endif
-/* 2D functions */
-float DualP_2D(float *U, float *V1, float *V2, float *P1, float *P2, int dimX, int dimY, int dimZ, float sigma);
-float ProjP_2D(float *P1, float *P2, int dimX, int dimY, int dimZ, float alpha1);
-float DualQ_2D(float *V1, float *V2, float *Q1, float *Q2, float *Q3, int dimX, int dimY, int dimZ, float sigma);
-float ProjQ_2D(float *Q1, float *Q2, float *Q3, int dimX, int dimY, int dimZ, float alpha0);
-float DivProjP_2D(float *U, float *A, float *P1, float *P2, int dimX, int dimY, int dimZ, float lambda, float tau);
-float UpdV_2D(float *V1, float *V2, float *P1, float *P2, float *Q1, float *Q2, float *Q3, int dimX, int dimY, int dimZ, float tau);
-float newU(float *U, float *U_old, int dimX, int dimY, int dimZ);
-//float copyIm(float *A, float *U, int dimX, int dimY, int dimZ);
-#ifdef __cplusplus
-}
-#endif
diff --git a/main_func/regularizers_CPU/utils.c b/main_func/regularizers_CPU/utils.c
deleted file mode 100644
index 0e83d2c..0000000
--- a/main_func/regularizers_CPU/utils.c
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
-This work is part of the Core Imaging Library developed by
-Visual Analytics and Imaging System Group of the Science Technology
-Facilities Council, STFC
-
-Copyright 2017 Daniil Kazanteev
-Copyright 2017 Srikanth Nagella, Edoardo Pasca
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
-#include "utils.h"
-
-/* Copy Image */
-float copyIm(float *A, float *U, int dimX, int dimY, int dimZ)
-{
-	int j;
-#pragma omp parallel for shared(A, U) private(j)
-	for (j = 0; j<dimX*dimY*dimZ; j++)  U[j] = A[j];
-	return *U;
-}
\ No newline at end of file
diff --git a/main_func/regularizers_CPU/utils.h b/main_func/regularizers_CPU/utils.h
deleted file mode 100644
index 53463a3..0000000
--- a/main_func/regularizers_CPU/utils.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
-This work is part of the Core Imaging Library developed by
-Visual Analytics and Imaging System Group of the Science Technology
-Facilities Council, STFC
-
-Copyright 2017 Daniil Kazantsev
-Copyright 2017 Srikanth Nagella, Edoardo Pasca
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
-//#include <matrix.h>
-//#include <math.h>
-#include <stdlib.h>
-#include <memory.h>
-//#include <stdio.h>
-#include "omp.h"
-#ifdef __cplusplus
-extern "C" {
-#endif
-float copyIm(float *A, float *U, int dimX, int dimY, int dimZ);
-#ifdef __cplusplus
-}
-#endif
diff --git a/main_func/regularizers_GPU/Diffus_HO/Diff4thHajiaboli_GPU.cpp b/main_func/regularizers_GPU/Diffus_HO/Diff4thHajiaboli_GPU.cpp
deleted file mode 100644
index 5a8c7c0..0000000
--- a/main_func/regularizers_GPU/Diffus_HO/Diff4thHajiaboli_GPU.cpp
+++ /dev/null
@@ -1,114 +0,0 @@
-#include "mex.h"
-#include <matrix.h>
-#include <math.h>
-#include <stdlib.h>
-#include <memory.h>
-#include <stdio.h>
-#include <iostream>
-#include "Diff4th_GPU_kernel.h"
-
-/*
- * 2D and 3D CUDA implementation of the 4th order PDE denoising model by Hajiaboli
- *
- * Reference :
- * "An anisotropic fourth-order diffusion filter for image noise removal" by M. Hajiaboli
- * 
- * Example
- * figure;
- * Im = double(imread('lena_gray_256.tif'))/255;  % loading image
- * u0 = Im + .05*randn(size(Im)); % adding noise
- * u = Diff4thHajiaboli_GPU(single(u0), 0.02, 150);
- * subplot (1,2,1); imshow(u0,[ ]); title('Noisy Image')
- * subplot (1,2,2); imshow(u,[ ]); title('Denoised Image')
- *
- *
- * Linux/Matlab compilation:
- * compile in terminal: nvcc -Xcompiler -fPIC -shared -o Diff4th_GPU_kernel.o Diff4th_GPU_kernel.cu
- * then compile in Matlab: mex -I/usr/local/cuda-7.5/include -L/usr/local/cuda-7.5/lib64 -lcudart Diff4thHajiaboli_GPU.cpp Diff4th_GPU_kernel.o
- */
-
-void mexFunction(
-        int nlhs, mxArray *plhs[],
-        int nrhs, const mxArray *prhs[])
-{
-    int numdims, dimZ, size;
-    float *A, *B, *A_L, *B_L;
-    const int *dims;    
-    
-    numdims = mxGetNumberOfDimensions(prhs[0]);
-    dims = mxGetDimensions(prhs[0]);       
-    
-    float sigma = (float)mxGetScalar(prhs[1]); /* edge-preserving parameter */
-    float lambda = (float)mxGetScalar(prhs[2]); /* regularization parameter */
-    int iter = (int)mxGetScalar(prhs[3]); /* iterations number */
-    
-    if (numdims == 2)  {
-        
-        int N, M, Z, i, j;
-        Z = 0; // for the 2D case
-        float tau = 0.01; // time step is sufficiently small for an explicit methods
-        
-        /*Input data*/
-        A = (float*)mxGetData(prhs[0]);        
-        N = dims[0] + 2;
-        M = dims[1] + 2;
-        A_L = (float*)mxGetData(mxCreateNumericMatrix(N, M, mxSINGLE_CLASS, mxREAL));
-        B_L = (float*)mxGetData(mxCreateNumericMatrix(N, M, mxSINGLE_CLASS, mxREAL));
-        
-        /*Output data*/
-        B = (float*)mxGetData(plhs[0] = mxCreateNumericMatrix(dims[0], dims[1], mxSINGLE_CLASS, mxREAL));        
-        
-        // copy A to the bigger A_L with boundaries
-        #pragma omp parallel for shared(A_L, A) private(i,j)
-        for (i=0; i < N; i++) {
-            for (j=0; j < M; j++) {
-                if (((i > 0) && (i < N-1)) &&  ((j > 0) && (j < M-1)))  A_L[i*M+j] = A[(i-1)*(dims[1])+(j-1)];
-            }}
-        
-        // Running CUDA code here
-        Diff4th_GPU_kernel(A_L, B_L, N, M, Z, (float)sigma, iter, (float)tau, lambda);
-        
-        // copy the processed B_L to a smaller B
-        #pragma omp parallel for shared(B_L, B) private(i,j)
-        for (i=0; i < N; i++) {
-            for (j=0; j < M; j++) {
-                if (((i > 0) && (i < N-1)) &&  ((j > 0) && (j < M-1)))   B[(i-1)*(dims[1])+(j-1)] = B_L[i*M+j];
-            }}
-    }
-    if (numdims == 3)  {
-        //  3D image denoising / regularization
-        int N, M, Z, i, j, k;
-        float tau = 0.0007; // Time Step is small for an explicit methods
-        A = (float*)mxGetData(prhs[0]);
-        N = dims[0] + 2;
-        M = dims[1] + 2;
-        Z = dims[2] + 2;
-        int N_dims[] = {N, M, Z};
-        A_L = (float*)mxGetPr(mxCreateNumericArray(3, N_dims, mxSINGLE_CLASS, mxREAL));
-        B_L = (float*)mxGetPr(mxCreateNumericArray(3, N_dims, mxSINGLE_CLASS, mxREAL));
-        
-        /* output data */
-        B = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dims, mxSINGLE_CLASS, mxREAL));
-        
-        // copy A to the bigger A_L with boundaries
-        #pragma omp parallel for shared(A_L, A) private(i,j,k)
-        for (i=0; i < N; i++) {
-            for (j=0; j < M; j++) {
-                for (k=0; k < Z; k++) {
-                    if (((i > 0) && (i < N-1)) &&  ((j > 0) && (j < M-1)) &&  ((k > 0) && (k < Z-1))) {
-                        A_L[(N*M)*(k)+(i)*M+(j)] = A[(dims[0]*dims[1])*(k-1)+(i-1)*dims[1]+(j-1)];
-                    }}}}
-        
-        // Running CUDA kernel here for diffusivity
-        Diff4th_GPU_kernel(A_L, B_L, N, M, Z, (float)sigma, iter, (float)tau, lambda);
-        
-        // copy the processed B_L to a smaller B
-        #pragma omp parallel for shared(B_L, B) private(i,j,k)
-        for (i=0; i < N; i++) {
-            for (j=0; j < M; j++) {
-                for (k=0; k < Z; k++) {
-                    if (((i > 0) && (i < N-1)) &&  ((j > 0) && (j < M-1)) &&  ((k > 0) && (k < Z-1))) {
-                        B[(dims[0]*dims[1])*(k-1)+(i-1)*dims[1]+(j-1)] = B_L[(N*M)*(k)+(i)*M+(j)];
-                    }}}}
-    }
-}
\ No newline at end of file
diff --git a/main_func/regularizers_GPU/Diffus_HO/Diff4th_GPU_kernel.cu b/main_func/regularizers_GPU/Diffus_HO/Diff4th_GPU_kernel.cu
deleted file mode 100644
index 178af00..0000000
--- a/main_func/regularizers_GPU/Diffus_HO/Diff4th_GPU_kernel.cu
+++ /dev/null
@@ -1,270 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <memory.h>
-#include "Diff4th_GPU_kernel.h"
-
-#define checkCudaErrors(err)           __checkCudaErrors (err, __FILE__, __LINE__)
-
-inline void __checkCudaErrors(cudaError err, const char *file, const int line)
-{
-    if (cudaSuccess != err)
-    {
-        fprintf(stderr, "%s(%i) : CUDA Runtime API error %d: %s.\n",
-                file, line, (int)err, cudaGetErrorString(err));
-        exit(EXIT_FAILURE);
-    }
-}
-
-#define idivup(a, b) ( ((a)%(b) != 0) ? (a)/(b)+1 : (a)/(b) )
-#define sizeT (sizeX*sizeY*sizeZ)
-#define epsilon 0.00000001
-
-/////////////////////////////////////////////////
-// 2D Image denosing - Second Step (The second derrivative)
-__global__ void Diff4th2D_derriv(float* B, float* A, float *A0, int N, int M, float sigma, int iter, float tau, float lambda)
-{
-    float gradXXc = 0, gradYYc = 0;
-    int i = blockIdx.x*blockDim.x + threadIdx.x;
-    int j = blockIdx.y*blockDim.y + threadIdx.y;
-    
-    int index = j + i*N;
-    
-    if (((i < 1) || (i > N-2)) || ((j < 1) || (j > M-2))) {
-        return;    }
-    
-    int indexN = (j)+(i-1)*(N); if (A[indexN] == 0) indexN = index;
-    int indexS = (j)+(i+1)*(N); if (A[indexS] == 0) indexS = index;
-    int indexW = (j-1)+(i)*(N); if (A[indexW] == 0) indexW = index;
-    int indexE = (j+1)+(i)*(N); if (A[indexE] == 0) indexE = index;
-    
-    gradXXc = B[indexN] + B[indexS] - 2*B[index] ;
-    gradYYc = B[indexW] + B[indexE] - 2*B[index] ;
-    A[index]  = A[index] - tau*((A[index] - A0[index]) + lambda*(gradXXc + gradYYc));
-}
-
-// 2D Image denosing - The First Step
-__global__ void Diff4th2D(float* A, float* B, int N, int M, float sigma, int iter, float tau)
-{
-    float gradX, gradX_sq, gradY, gradY_sq, gradXX, gradYY, gradXY, sq_sum, xy_2,  V_norm, V_orth, c, c_sq;
-    
-    int i = blockIdx.x*blockDim.x + threadIdx.x;
-    int j = blockIdx.y*blockDim.y + threadIdx.y;
-    
-    int index = j + i*N;
-    
-    V_norm = 0.0f; V_orth = 0.0f;
-    
-    if (((i < 1) || (i > N-2)) || ((j < 1) || (j > M-2))) {
-        return;    }
-    
-    int indexN = (j)+(i-1)*(N); if (A[indexN] == 0) indexN = index;
-    int indexS = (j)+(i+1)*(N); if (A[indexS] == 0) indexS = index;
-    int indexW = (j-1)+(i)*(N); if (A[indexW] == 0) indexW = index;
-    int indexE = (j+1)+(i)*(N); if (A[indexE] == 0) indexE = index;
-    int indexNW = (j-1)+(i-1)*(N); if (A[indexNW] == 0) indexNW = index;
-    int indexNE = (j+1)+(i-1)*(N); if (A[indexNE] == 0) indexNE = index;
-    int indexWS = (j-1)+(i+1)*(N); if (A[indexWS] == 0) indexWS = index;
-    int indexES = (j+1)+(i+1)*(N); if (A[indexES] == 0) indexES = index;
-    
-    gradX = 0.5f*(A[indexN]-A[indexS]);
-    gradX_sq = gradX*gradX;
-    gradXX = A[indexN] + A[indexS] - 2*A[index];
-    
-    gradY = 0.5f*(A[indexW]-A[indexE]);
-    gradY_sq = gradY*gradY;
-    gradYY = A[indexW] + A[indexE] - 2*A[index];
-    
-    gradXY = 0.25f*(A[indexNW] - A[indexNE] - A[indexWS] + A[indexES]);
-    xy_2 = 2.0f*gradX*gradY*gradXY;
-    sq_sum =  gradX_sq + gradY_sq;
-    
-    if (sq_sum <= epsilon) {
-        V_norm = (gradXX*gradX_sq + xy_2 + gradYY*gradY_sq)/epsilon;
-        V_orth = (gradXX*gradY_sq - xy_2 + gradYY*gradX_sq)/epsilon; }
-    else  {
-        V_norm = (gradXX*gradX_sq + xy_2 + gradYY*gradY_sq)/sq_sum;
-        V_orth = (gradXX*gradY_sq - xy_2 + gradYY*gradX_sq)/sq_sum;  }
-    
-    c = 1.0f/(1.0f + sq_sum/sigma);
-    c_sq = c*c;
-    B[index] =  c_sq*V_norm + c*V_orth;
-}
-
-/////////////////////////////////////////////////
-// 3D data parocerssing
-__global__ void Diff4th3D_derriv(float *B, float *A, float *A0, int N, int M, int Z, float sigma, int iter, float tau, float lambda)
-{
-    float gradXXc = 0, gradYYc = 0, gradZZc = 0;
-    int xIndex = blockDim.x * blockIdx.x + threadIdx.x;
-    int yIndex = blockDim.y * blockIdx.y + threadIdx.y;
-    int zIndex = blockDim.z * blockIdx.z + threadIdx.z;
-    
-    int index = xIndex + M*yIndex + N*M*zIndex;
-    
-    if (((xIndex < 1) || (xIndex > N-2)) || ((yIndex < 1) || (yIndex > M-2)) || ((zIndex < 1) || (zIndex > Z-2))) {
-        return;    }
-    
-    int indexN = (xIndex-1) + M*yIndex + N*M*zIndex; if (A[indexN] == 0) indexN = index;
-    int indexS = (xIndex+1) + M*yIndex + N*M*zIndex; if (A[indexS] == 0) indexS = index;
-    int indexW = xIndex + M*(yIndex-1) + N*M*zIndex; if (A[indexW] == 0) indexW = index;
-    int indexE = xIndex + M*(yIndex+1) + N*M*zIndex; if (A[indexE] == 0) indexE = index;
-    int indexU = xIndex + M*yIndex + N*M*(zIndex-1); if (A[indexU] == 0) indexU = index;
-    int indexD = xIndex + M*yIndex + N*M*(zIndex+1); if (A[indexD] == 0) indexD = index;
-    
-    gradXXc = B[indexN] + B[indexS] - 2*B[index] ;
-    gradYYc = B[indexW] + B[indexE] - 2*B[index] ;
-    gradZZc = B[indexU] + B[indexD] - 2*B[index] ;   
-        
-    A[index]  = A[index] - tau*((A[index] - A0[index]) + lambda*(gradXXc + gradYYc + gradZZc));    
-}
-
-__global__ void Diff4th3D(float* A, float* B, int N, int M, int Z, float sigma, int iter, float tau)
-{
-    float gradX, gradX_sq, gradY, gradY_sq, gradZ, gradZ_sq, gradXX, gradYY, gradZZ, gradXY, gradXZ, gradYZ, sq_sum, xy_2, xyz_1, xyz_2, V_norm, V_orth, c, c_sq;
-    
-    int xIndex = blockDim.x * blockIdx.x + threadIdx.x;
-    int yIndex = blockDim.y * blockIdx.y + threadIdx.y;
-    int zIndex = blockDim.z * blockIdx.z + threadIdx.z;
-    
-    int index = xIndex + M*yIndex + N*M*zIndex;
-    V_norm = 0.0f; V_orth = 0.0f;
-    
-    if (((xIndex < 1) || (xIndex > N-2)) || ((yIndex < 1) || (yIndex > M-2)) || ((zIndex < 1) || (zIndex > Z-2))) {
-        return;    }
-    
-    B[index] = 0;
-    
-    int indexN = (xIndex-1) + M*yIndex + N*M*zIndex; if (A[indexN] == 0) indexN = index;
-    int indexS = (xIndex+1) + M*yIndex + N*M*zIndex; if (A[indexS] == 0) indexS = index;
-    int indexW = xIndex + M*(yIndex-1) + N*M*zIndex; if (A[indexW] == 0) indexW = index;
-    int indexE = xIndex + M*(yIndex+1) + N*M*zIndex; if (A[indexE] == 0) indexE = index;
-    int indexU = xIndex + M*yIndex + N*M*(zIndex-1); if (A[indexU] == 0) indexU = index;
-    int indexD = xIndex + M*yIndex + N*M*(zIndex+1); if (A[indexD] == 0) indexD = index;
-    
-    int indexNW = (xIndex-1) + M*(yIndex-1) + N*M*zIndex;  if (A[indexNW] == 0) indexNW = index;
-    int indexNE = (xIndex-1) + M*(yIndex+1) + N*M*zIndex;  if (A[indexNE] == 0) indexNE = index;
-    int indexWS =  (xIndex+1) + M*(yIndex-1) + N*M*zIndex; if (A[indexWS] == 0) indexWS = index;
-    int indexES = (xIndex+1) + M*(yIndex+1) + N*M*zIndex;  if (A[indexES] == 0) indexES = index;
-    
-    int indexUW = (xIndex-1) + M*(yIndex) + N*M*(zIndex-1); if (A[indexUW] == 0) indexUW = index;
-    int indexUE = (xIndex+1) + M*(yIndex) + N*M*(zIndex-1); if (A[indexUE] == 0) indexUE = index;
-    int indexDW =  (xIndex-1) + M*(yIndex) + N*M*(zIndex+1); if (A[indexDW] == 0) indexDW = index;
-    int indexDE = (xIndex+1) + M*(yIndex) + N*M*(zIndex+1); if (A[indexDE] == 0) indexDE = index;
-    
-    int indexUN = (xIndex) + M*(yIndex-1) + N*M*(zIndex-1);  if (A[indexUN] == 0) indexUN = index;
-    int indexUS = (xIndex) + M*(yIndex+1) + N*M*(zIndex-1);  if (A[indexUS] == 0) indexUS = index;
-    int indexDN =  (xIndex) + M*(yIndex-1) + N*M*(zIndex+1); if (A[indexDN] == 0) indexDN = index;
-    int indexDS = (xIndex) + M*(yIndex+1) + N*M*(zIndex+1);  if (A[indexDS] == 0) indexDS = index;
-    
-    gradX = 0.5f*(A[indexN]-A[indexS]);
-    gradX_sq = gradX*gradX;
-    gradXX = A[indexN] + A[indexS] - 2*A[index];
-    
-    gradY = 0.5f*(A[indexW]-A[indexE]);
-    gradY_sq = gradY*gradY;
-    gradYY = A[indexW] + A[indexE] - 2*A[index];
-    
-    gradZ = 0.5f*(A[indexU]-A[indexD]);
-    gradZ_sq = gradZ*gradZ;
-    gradZZ = A[indexU] + A[indexD] - 2*A[index];
-    
-    gradXY = 0.25f*(A[indexNW] - A[indexNE] - A[indexWS] + A[indexES]);
-    gradXZ = 0.25f*(A[indexUW] - A[indexUE] - A[indexDW] + A[indexDE]);
-    gradYZ = 0.25f*(A[indexUN] - A[indexUS] - A[indexDN] + A[indexDS]);
-    
-    xy_2  = 2.0f*gradX*gradY*gradXY;
-    xyz_1 = 2.0f*gradX*gradZ*gradXZ;
-    xyz_2 = 2.0f*gradY*gradZ*gradYZ;
-    
-    sq_sum =  gradX_sq + gradY_sq + gradZ_sq;
-    
-    if (sq_sum <= epsilon) {
-        V_norm = (gradXX*gradX_sq + gradYY*gradY_sq + gradZZ*gradZ_sq + xy_2 + xyz_1 + xyz_2)/epsilon;
-        V_orth = ((gradY_sq + gradZ_sq)*gradXX + (gradX_sq + gradZ_sq)*gradYY + (gradX_sq + gradY_sq)*gradZZ - xy_2 - xyz_1 - xyz_2)/epsilon;  }
-    else  {
-        V_norm = (gradXX*gradX_sq + gradYY*gradY_sq + gradZZ*gradZ_sq + xy_2 + xyz_1 + xyz_2)/sq_sum;
-        V_orth = ((gradY_sq + gradZ_sq)*gradXX + (gradX_sq + gradZ_sq)*gradYY + (gradX_sq + gradY_sq)*gradZZ - xy_2 - xyz_1 - xyz_2)/sq_sum;  }
-    
-    c = 1;
-    if ((1.0f + sq_sum/sigma) != 0.0f)  {c = 1.0f/(1.0f + sq_sum/sigma);}
-    
-    c_sq = c*c;
-    B[index] =  c_sq*V_norm + c*V_orth;
-}
-
-/******************************************************/
-/********* HOST FUNCTION*************/
-extern "C" void Diff4th_GPU_kernel(float* A, float* B, int N, int M, int Z, float sigma, int iter, float tau, float lambda)
-{
-     int deviceCount = -1; // number of devices
-    cudaGetDeviceCount(&deviceCount);
-    if (deviceCount == 0) {
-        fprintf(stderr, "No CUDA devices found\n");
-        return;
-    }    
-    
-      int BLKXSIZE, BLKYSIZE,BLKZSIZE;
-      float *Ad, *Bd, *Cd;
-      sigma = sigma*sigma;
-    
-    if (Z == 0){
-        // 4th order diffusion for 2D case     
-        BLKXSIZE = 8;
-        BLKYSIZE = 16;
-        
-        dim3 dimBlock(BLKXSIZE,BLKYSIZE);
-        dim3 dimGrid(idivup(N,BLKXSIZE), idivup(M,BLKYSIZE));
-        
-        checkCudaErrors(cudaMalloc((void**)&Ad,N*M*sizeof(float)));
-        checkCudaErrors(cudaMalloc((void**)&Bd,N*M*sizeof(float)));
-        checkCudaErrors(cudaMalloc((void**)&Cd,N*M*sizeof(float)));
-        
-        checkCudaErrors(cudaMemcpy(Ad,A,N*M*sizeof(float),cudaMemcpyHostToDevice));
-        checkCudaErrors(cudaMemcpy(Bd,A,N*M*sizeof(float),cudaMemcpyHostToDevice));
-        checkCudaErrors(cudaMemcpy(Cd,A,N*M*sizeof(float),cudaMemcpyHostToDevice));
-        
-        int n = 1;
-        while (n <= iter) {
-            Diff4th2D<<<dimGrid,dimBlock>>>(Bd, Cd, N, M, sigma, iter, tau);
-            cudaDeviceSynchronize();
-            checkCudaErrors( cudaPeekAtLastError() );
-            Diff4th2D_derriv<<<dimGrid,dimBlock>>>(Cd, Bd, Ad, N, M, sigma, iter, tau, lambda);
-            cudaDeviceSynchronize();
-            checkCudaErrors( cudaPeekAtLastError() );
-            n++;
-        }
-        checkCudaErrors(cudaMemcpy(B,Bd,N*M*sizeof(float),cudaMemcpyDeviceToHost));
-        cudaFree(Ad); cudaFree(Bd); cudaFree(Cd);
-    }
-    
-    if (Z != 0){
-        // 4th order diffusion for 3D case
-        BLKXSIZE = 8;
-        BLKYSIZE = 8;
-        BLKZSIZE = 8;        
-        
-        dim3 dimBlock(BLKXSIZE,BLKYSIZE,BLKZSIZE);
-        dim3 dimGrid(idivup(N,BLKXSIZE), idivup(M,BLKYSIZE),idivup(Z,BLKXSIZE));
-        
-        checkCudaErrors(cudaMalloc((void**)&Ad,N*M*Z*sizeof(float)));
-        checkCudaErrors(cudaMalloc((void**)&Bd,N*M*Z*sizeof(float)));
-        checkCudaErrors(cudaMalloc((void**)&Cd,N*M*Z*sizeof(float)));
-        
-        checkCudaErrors(cudaMemcpy(Ad,A,N*M*Z*sizeof(float),cudaMemcpyHostToDevice));
-        checkCudaErrors(cudaMemcpy(Bd,A,N*M*Z*sizeof(float),cudaMemcpyHostToDevice));        
-        checkCudaErrors(cudaMemcpy(Cd,A,N*M*Z*sizeof(float),cudaMemcpyHostToDevice));
-        
-        int n = 1;
-        while (n <= iter) {
-            Diff4th3D<<<dimGrid,dimBlock>>>(Bd, Cd, N, M, Z, sigma, iter, tau);
-            cudaDeviceSynchronize();
-            checkCudaErrors( cudaPeekAtLastError() );
-            Diff4th3D_derriv<<<dimGrid,dimBlock>>>(Cd, Bd, Ad, N, M, Z, sigma, iter, tau, lambda);
-            cudaDeviceSynchronize();
-            checkCudaErrors( cudaPeekAtLastError() );
-            n++;
-        }
-        checkCudaErrors(cudaMemcpy(B,Bd,N*M*Z*sizeof(float),cudaMemcpyDeviceToHost));
-        cudaFree(Ad); cudaFree(Bd); cudaFree(Cd);
-    }
-}
\ No newline at end of file
diff --git a/main_func/regularizers_GPU/Diffus_HO/Diff4th_GPU_kernel.h b/main_func/regularizers_GPU/Diffus_HO/Diff4th_GPU_kernel.h
deleted file mode 100644
index cfbb45a..0000000
--- a/main_func/regularizers_GPU/Diffus_HO/Diff4th_GPU_kernel.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __DIFF_HO_H_
-#define __DIFF_HO_H_
-
-extern "C" void Diff4th_GPU_kernel(float* A, float* B, int N, int M, int Z, float sigma, int iter, float tau, float lambda);
-
-#endif 
diff --git a/main_func/regularizers_GPU/NL_Regul/NLM_GPU.cpp b/main_func/regularizers_GPU/NL_Regul/NLM_GPU.cpp
deleted file mode 100644
index ff0cc90..0000000
--- a/main_func/regularizers_GPU/NL_Regul/NLM_GPU.cpp
+++ /dev/null
@@ -1,171 +0,0 @@
-#include "mex.h"
-#include <matrix.h>
-#include <math.h>
-#include <stdlib.h>
-#include <memory.h>
-#include <stdio.h>
-#include <iostream>
-#include "NLM_GPU_kernel.h"
-
-/* CUDA implementation of the patch-based (PB) regularization  for 2D and 3D images/volumes  
- * This method finds self-similar patches in data and performs one fixed point iteration to mimimize the PB penalty function
- * 
- * References: 1. Yang Z. & Jacob M. "Nonlocal Regularization of Inverse Problems"
- *             2. Kazantsev D. at. all "4D-CT reconstruction with unified spatial-temporal patch-based regularization"
- *
- * Input Parameters (mandatory):
- * 1. Image/volume (2D/3D)
- * 2. ratio of the searching window (e.g. 3 = (2*3+1) = 7 pixels window)
- * 3. ratio of the similarity window (e.g. 1 = (2*1+1) = 3 pixels window)
- * 4. h - parameter for the PB penalty function
- * 5. lambda - regularization parameter 
-
- * Output:
- * 1. regularized (denoised) Image/volume (N x N x N)
- *
- * In matlab check what kind of GPU you have with "gpuDevice" command,
- * then set your ComputeCapability, here I use -arch compute_35
- *
- * Quick 2D denoising example in Matlab:   
-   Im = double(imread('lena_gray_256.tif'))/255;  % loading image
-   u0 = Im + .03*randn(size(Im)); u0(u0<0) = 0; % adding noise
-   ImDen = NLM_GPU(single(u0), 3, 2, 0.15, 1);
- 
- * Linux/Matlab compilation:
- * compile in terminal: nvcc -Xcompiler -fPIC -shared -o NLM_GPU_kernel.o NLM_GPU_kernel.cu
- * then compile in Matlab: mex -I/usr/local/cuda-7.5/include -L/usr/local/cuda-7.5/lib64 -lcudart NLM_GPU.cpp NLM_GPU_kernel.o
- *
- * D. Kazantsev 
- * 2014-17
- * Harwell/Manchester UK
- */
-
-float pad_crop(float *A, float *Ap, int OldSizeX, int OldSizeY,  int OldSizeZ, int NewSizeX, int NewSizeY, int NewSizeZ, int padXY, int switchpad_crop);
-
-void mexFunction(
-        int nlhs, mxArray *plhs[],
-        int nrhs, const mxArray *prhs[])
-{
-    int N, M, Z, i_n, j_n, k_n, numdims, SearchW, SimilW, SearchW_real, padXY, newsizeX, newsizeY, newsizeZ, switchpad_crop, count, SearchW_full, SimilW_full;
-    const int  *dims;
-    float *A, *B=NULL, *Ap=NULL, *Bp=NULL, *Eucl_Vec, h, h2, lambda, val, denh2;
-    
-    numdims = mxGetNumberOfDimensions(prhs[0]);
-    dims = mxGetDimensions(prhs[0]);
-    
-    N = dims[0];
-    M = dims[1];
-    Z = dims[2];
-    
-    if ((numdims < 2) || (numdims > 3)) {mexErrMsgTxt("The input should be 2D image or 3D volume");}
-    if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input in single precision is required"); }
-    
-    if(nrhs != 5) mexErrMsgTxt("Five inputs reqired: Image(2D,3D), SearchW, SimilW, Threshold, Regularization parameter");
-    
-    /*Handling inputs*/
-    A  = (float *) mxGetData(prhs[0]);    /* the image to regularize/filter */
-    SearchW_real  = (int) mxGetScalar(prhs[1]); /* the searching window ratio */
-    SimilW =  (int) mxGetScalar(prhs[2]);  /* the similarity window ratio */
-    h =  (float) mxGetScalar(prhs[3]);  /* parameter for the PB filtering function */
-    lambda = (float) mxGetScalar(prhs[4]);
-    
-    if (h <= 0) mexErrMsgTxt("Parmeter for the PB penalty function should be > 0");
-      
-    SearchW = SearchW_real + 2*SimilW;
-    
-    SearchW_full = 2*SearchW + 1; /* the full searching window  size */
-    SimilW_full = 2*SimilW + 1;   /* the full similarity window  size */
-    h2 = h*h;
-    
-    padXY = SearchW + 2*SimilW; /* padding sizes */
-    newsizeX = N + 2*(padXY); /* the X size of the padded array */
-    newsizeY = M + 2*(padXY); /* the Y size of the padded array */
-    newsizeZ = Z + 2*(padXY); /* the Z size of the padded array */
-    int N_dims[] = {newsizeX, newsizeY, newsizeZ};
-    
-    /******************************2D case ****************************/
-    if (numdims == 2) {
-        /*Handling output*/
-        B = (float*)mxGetData(plhs[0] = mxCreateNumericMatrix(N, M, mxSINGLE_CLASS, mxREAL));
-        /*allocating memory for the padded arrays */
-        Ap = (float*)mxGetData(mxCreateNumericMatrix(newsizeX, newsizeY, mxSINGLE_CLASS, mxREAL));
-        Bp = (float*)mxGetData(mxCreateNumericMatrix(newsizeX, newsizeY, mxSINGLE_CLASS, mxREAL));
-        Eucl_Vec = (float*)mxGetData(mxCreateNumericMatrix(SimilW_full*SimilW_full, 1, mxSINGLE_CLASS, mxREAL));
-        
-        /*Gaussian kernel */
-        count = 0;
-        for(i_n=-SimilW; i_n<=SimilW; i_n++) {
-            for(j_n=-SimilW; j_n<=SimilW; j_n++) {
-                val = (float)(i_n*i_n + j_n*j_n)/(2*SimilW*SimilW);
-                Eucl_Vec[count] = exp(-val);
-                count = count + 1;
-            }} /*main neighb loop */
-        
-        /**************************************************************************/
-        /*Perform padding of image A to the size of [newsizeX * newsizeY] */
-        switchpad_crop = 0; /*padding*/
-        pad_crop(A, Ap, M, N, 0, newsizeY, newsizeX, 0, padXY, switchpad_crop);
-        
-        /* Do PB regularization with the padded array  */
-        NLM_GPU_kernel(Ap, Bp, Eucl_Vec, newsizeY, newsizeX, 0, numdims, SearchW, SimilW, SearchW_real, (float)h2, (float)lambda);
-        
-        switchpad_crop = 1; /*cropping*/
-        pad_crop(Bp, B, M, N, 0, newsizeY, newsizeX, 0, padXY, switchpad_crop);
-    }
-    else
-    {
-        /******************************3D case ****************************/
-        /*Handling output*/
-        B = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dims, mxSINGLE_CLASS, mxREAL));
-        /*allocating memory for the padded arrays */
-        Ap = (float*)mxGetPr(mxCreateNumericArray(3, N_dims, mxSINGLE_CLASS, mxREAL));
-        Bp = (float*)mxGetPr(mxCreateNumericArray(3, N_dims, mxSINGLE_CLASS, mxREAL));
-        Eucl_Vec = (float*)mxGetData(mxCreateNumericMatrix(SimilW_full*SimilW_full*SimilW_full, 1, mxSINGLE_CLASS, mxREAL));
-        
-        /*Gaussian kernel */
-        count = 0;
-        for(i_n=-SimilW; i_n<=SimilW; i_n++) {
-            for(j_n=-SimilW; j_n<=SimilW; j_n++) {
-                for(k_n=-SimilW; k_n<=SimilW; k_n++) {
-                    val = (float)(i_n*i_n + j_n*j_n + k_n*k_n)/(2*SimilW*SimilW*SimilW);
-                    Eucl_Vec[count] = exp(-val);
-                    count = count + 1;
-                }}} /*main neighb loop */
-        /**************************************************************************/
-        /*Perform padding of image A to the size of [newsizeX * newsizeY * newsizeZ] */
-        switchpad_crop = 0; /*padding*/
-        pad_crop(A, Ap, M, N, Z, newsizeY, newsizeX, newsizeZ, padXY, switchpad_crop);
-        
-        /* Do PB regularization with the padded array  */
-        NLM_GPU_kernel(Ap, Bp, Eucl_Vec, newsizeY, newsizeX, newsizeZ, numdims, SearchW, SimilW, SearchW_real, (float)h2, (float)lambda);
-        
-        switchpad_crop = 1; /*cropping*/
-        pad_crop(Bp, B, M, N, Z, newsizeY, newsizeX, newsizeZ, padXY, switchpad_crop);
-    } /*end else ndims*/
-}
-
-float pad_crop(float *A, float *Ap, int OldSizeX, int OldSizeY, int OldSizeZ, int NewSizeX, int NewSizeY, int NewSizeZ, int padXY, int switchpad_crop)
-{
-    /* padding-cropping function */
-    int i,j,k;    
-    if (NewSizeZ > 1) {    
-           for (i=0; i < NewSizeX; i++) {
-            for (j=0; j < NewSizeY; j++) {
-              for (k=0; k < NewSizeZ; k++) {
-                if (((i >= padXY) && (i < NewSizeX-padXY)) &&  ((j >= padXY) && (j < NewSizeY-padXY)) &&  ((k >= padXY) && (k < NewSizeZ-padXY))) {
-                    if (switchpad_crop == 0)  Ap[NewSizeX*NewSizeY*k + i*NewSizeY+j] = A[OldSizeX*OldSizeY*(k - padXY) + (i-padXY)*(OldSizeY)+(j-padXY)];
-                    else  Ap[OldSizeX*OldSizeY*(k - padXY) + (i-padXY)*(OldSizeY)+(j-padXY)] = A[NewSizeX*NewSizeY*k + i*NewSizeY+j];
-                }
-            }}}   
-    }
-    else {
-        for (i=0; i < NewSizeX; i++) {
-            for (j=0; j < NewSizeY; j++) {
-                if (((i >= padXY) && (i < NewSizeX-padXY)) &&  ((j >= padXY) && (j < NewSizeY-padXY))) {
-                    if (switchpad_crop == 0)  Ap[i*NewSizeY+j] = A[(i-padXY)*(OldSizeY)+(j-padXY)];
-                    else  Ap[(i-padXY)*(OldSizeY)+(j-padXY)] = A[i*NewSizeY+j];
-                }
-            }}
-    }
-    return *Ap;
-}
\ No newline at end of file
diff --git a/main_func/regularizers_GPU/NL_Regul/NLM_GPU_kernel.cu b/main_func/regularizers_GPU/NL_Regul/NLM_GPU_kernel.cu
deleted file mode 100644
index 17da3a8..0000000
--- a/main_func/regularizers_GPU/NL_Regul/NLM_GPU_kernel.cu
+++ /dev/null
@@ -1,239 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <memory.h>
-#include "NLM_GPU_kernel.h"
-
-#define checkCudaErrors(err)           __checkCudaErrors (err, __FILE__, __LINE__)
-
-inline void __checkCudaErrors(cudaError err, const char *file, const int line)
-{
-    if (cudaSuccess != err)
-    {
-        fprintf(stderr, "%s(%i) : CUDA Runtime API error %d: %s.\n",
-                file, line, (int)err, cudaGetErrorString(err));
-        exit(EXIT_FAILURE);
-    }
-}
-
-extern __shared__ float sharedmem[];
-
-// run PB den kernel here
-__global__ void NLM_kernel(float *Ad, float* Bd, float *Eucl_Vec_d, int N, int M, int Z, int SearchW, int SimilW, int SearchW_real, int SearchW_full, int SimilW_full, int padXY, float h2, float lambda,  dim3 imagedim, dim3 griddim, dim3 kerneldim, dim3 sharedmemdim, int nUpdatePerThread, float neighborsize)
-{
-    
-    int  i1, j1, k1, i2, j2, k2, i3, j3, k3, i_l, j_l, k_l, count;
-    float value, Weight_norm, normsum, Weight;
-    
-    int bidx = blockIdx.x;
-    int bidy = blockIdx.y%griddim.y;
-    int bidz = (int)((blockIdx.y)/griddim.y);
-    
-    // global index for block endpoint
-    int beidx = __mul24(bidx,blockDim.x);
-    int beidy = __mul24(bidy,blockDim.y);
-    int beidz = __mul24(bidz,blockDim.z);
-    
-    int tid = __mul24(threadIdx.z,__mul24(blockDim.x,blockDim.y)) +
-            __mul24(threadIdx.y,blockDim.x) + threadIdx.x;
-    
-    #ifdef __DEVICE_EMULATION__
-            printf("tid : %d", tid);
-    #endif
-            
-    // update shared memory
-    int nthreads = blockDim.x*blockDim.y*blockDim.z;
-    int sharedMemSize = sharedmemdim.x * sharedmemdim.y * sharedmemdim.z;
-    for(int i=0; i<nUpdatePerThread; i++)
-    {
-        int sid = tid + i*nthreads; // index in shared memory
-        if (sid < sharedMemSize)
-        {
-            // global x/y/z index in volume
-            int gidx, gidy, gidz;
-            int sidx, sidy, sidz, tid;
-            
-            sidz = sid / (sharedmemdim.x*sharedmemdim.y);
-            tid  = sid - sidz*(sharedmemdim.x*sharedmemdim.y);
-            sidy = tid / (sharedmemdim.x);
-            sidx = tid - sidy*(sharedmemdim.x);
-            
-            gidx = (int)sidx - (int)kerneldim.x + (int)beidx;
-            gidy = (int)sidy - (int)kerneldim.y + (int)beidy;
-            gidz = (int)sidz - (int)kerneldim.z + (int)beidz;
-            
-            // Neumann boundary condition
-            int cx = (int) min(max(0,gidx),imagedim.x-1);
-            int cy = (int) min(max(0,gidy),imagedim.y-1);
-            int cz = (int) min(max(0,gidz),imagedim.z-1);
-            
-            int gid = cz*imagedim.x*imagedim.y + cy*imagedim.x + cx;
-            
-            sharedmem[sid] = Ad[gid];
-        }
-    }
-    __syncthreads();
-    
-    // global index of the current voxel in the input volume
-    int idx = beidx + threadIdx.x;
-    int idy = beidy + threadIdx.y;
-    int idz = beidz + threadIdx.z;
-    
-    if (Z == 1) {
-        /* 2D case */
-        /*checking boundaries to be within the image and avoid padded spaces */
-        if( idx >= padXY && idx < (imagedim.x - padXY) &&
-                idy >= padXY && idy < (imagedim.y - padXY))
-        {
-            int i_centr = threadIdx.x + (SearchW); /*indices of the centrilized (main) pixel */
-            int j_centr = threadIdx.y + (SearchW); /*indices of the centrilized (main) pixel */
-            
-            if ((i_centr > 0) && (i_centr < N) && (j_centr > 0) && (j_centr < M)) {
-                
-                Weight_norm = 0; value = 0.0;
-                /* Massive Search window loop */
-                for(i1 = i_centr - SearchW_real ; i1 <= i_centr + SearchW_real; i1++) {
-                    for(j1 = j_centr - SearchW_real ; j1<= j_centr + SearchW_real ; j1++) {
-                        /* if inside the searching window */
-                        count = 0; normsum = 0.0;
-                        for(i_l=-SimilW; i_l<=SimilW; i_l++) {
-                            for(j_l=-SimilW; j_l<=SimilW; j_l++) {
-                                i2 = i1+i_l; j2 = j1+j_l;
-                                i3 = i_centr+i_l; j3 = j_centr+j_l;  /*coordinates of the inner patch loop */                                
-                                if ((i2 > 0) && (i2 < N) && (j2 > 0) && (j2 < M)) {
-                                       if ((i3 > 0) && (i3 < N) && (j3 > 0) && (j3 < M)) {
-                                            normsum += Eucl_Vec_d[count]*pow((sharedmem[(j3)*sharedmemdim.x+(i3)] - sharedmem[j2*sharedmemdim.x+i2]), 2);
-                                            }}
-                                        count++;
-                                }}
-                                if (normsum != 0) Weight = (expf(-normsum/h2));
-                                else Weight = 0.0;
-                                Weight_norm += Weight;
-                                value += sharedmem[j1*sharedmemdim.x+i1]*Weight;
-                            }}      
-                                
-                if (Weight_norm != 0) Bd[idz*imagedim.x*imagedim.y + idy*imagedim.x + idx] = value/Weight_norm;
-                else Bd[idz*imagedim.x*imagedim.y + idy*imagedim.x + idx] = Ad[idz*imagedim.x*imagedim.y + idy*imagedim.x + idx];
-            }
-        }      /*boundary conditions end*/
-    }
-    else {
-        /*3D case*/
-        /*checking boundaries to be within the image and avoid padded spaces */
-        if( idx >= padXY && idx < (imagedim.x - padXY) &&
-                idy >= padXY && idy < (imagedim.y - padXY) &&
-                idz >= padXY && idz < (imagedim.z - padXY) )
-        {
-            int i_centr = threadIdx.x + SearchW; /*indices of the centrilized (main) pixel */
-            int j_centr = threadIdx.y + SearchW; /*indices of the centrilized (main) pixel */
-            int k_centr = threadIdx.z + SearchW; /*indices of the centrilized (main) pixel */
-            
-            if ((i_centr > 0) && (i_centr < N) && (j_centr > 0) && (j_centr < M) && (k_centr > 0) && (k_centr < Z)) {
-                
-                Weight_norm = 0; value = 0.0;
-                /* Massive Search window loop */
-                for(i1 = i_centr - SearchW_real ; i1 <= i_centr + SearchW_real; i1++) {
-                    for(j1 = j_centr - SearchW_real ; j1<= j_centr + SearchW_real ; j1++) {
-                        for(k1 = k_centr - SearchW_real ; k1<= k_centr + SearchW_real ; k1++) {
-                            /* if inside the searching window */
-                            count = 0; normsum = 0.0;
-                            for(i_l=-SimilW; i_l<=SimilW; i_l++) {
-                                for(j_l=-SimilW; j_l<=SimilW; j_l++) {
-                                    for(k_l=-SimilW; k_l<=SimilW; k_l++) {
-                                        i2 = i1+i_l; j2 = j1+j_l; k2 = k1+k_l;
-                                        i3 = i_centr+i_l; j3 = j_centr+j_l; k3 = k_centr+k_l;   /*coordinates of the inner patch loop */                              
-                                                    if ((i2 > 0) && (i2 < N) && (j2 > 0) && (j2 < M) && (k2 > 0) && (k2 < Z)) {
-                                                        if ((i3 > 0) && (i3 < N) && (j3 > 0) && (j3 < M) && (k3 > 0) && (k3 < Z)) {
-                                                            normsum += Eucl_Vec_d[count]*pow((sharedmem[(k3)*sharedmemdim.x*sharedmemdim.y + (j3)*sharedmemdim.x+(i3)] - sharedmem[(k2)*sharedmemdim.x*sharedmemdim.y + j2*sharedmemdim.x+i2]), 2);
-                                                        }}
-                                                    count++;
-                                                }}}
-                                       if (normsum != 0) Weight = (expf(-normsum/h2));
-                                       else Weight = 0.0;
-                                       Weight_norm += Weight;
-                                       value += sharedmem[k1*sharedmemdim.x*sharedmemdim.y + j1*sharedmemdim.x+i1]*Weight;                                                                
-                        }}}      /* BIG search window loop end*/
-                
-               
-                if (Weight_norm != 0) Bd[idz*imagedim.x*imagedim.y + idy*imagedim.x + idx] = value/Weight_norm;
-                else Bd[idz*imagedim.x*imagedim.y + idy*imagedim.x + idx] = Ad[idz*imagedim.x*imagedim.y + idy*imagedim.x + idx];
-            }
-        }      /* boundary conditions end */
-    }
-}
-        
-/////////////////////////////////////////////////
-// HOST FUNCTION
-extern "C" void NLM_GPU_kernel(float *A, float* B, float *Eucl_Vec, int N, int M, int Z, int dimension, int SearchW, int SimilW, int SearchW_real, float h2, float lambda)
-{
-    int deviceCount = -1; // number of devices
-    cudaGetDeviceCount(&deviceCount);
-    if (deviceCount == 0) {
-        fprintf(stderr, "No CUDA devices found\n");
-        return;
-    }
-    
-//     cudaDeviceReset();
-    
-    int padXY, SearchW_full, SimilW_full,  blockWidth, blockHeight, blockDepth, nBlockX, nBlockY, nBlockZ, kernel_depth;
-    float *Ad, *Bd, *Eucl_Vec_d;
-    
-    if (dimension == 2) {
-        blockWidth  = 16;
-        blockHeight = 16;
-        blockDepth  = 1;
-        Z = 1;
-        kernel_depth = 0;
-    }
-    else {
-        blockWidth  = 8;
-        blockHeight = 8;
-        blockDepth  = 8;
-        kernel_depth = SearchW;
-    }
-    
-    // compute how many blocks are needed
-    nBlockX = ceil((float)N / (float)blockWidth);
-    nBlockY = ceil((float)M / (float)blockHeight);
-    nBlockZ = ceil((float)Z / (float)blockDepth);
-    
-    dim3 dimGrid(nBlockX,nBlockY*nBlockZ);
-    dim3 dimBlock(blockWidth, blockHeight, blockDepth);
-    dim3 imagedim(N,M,Z);
-    dim3 griddim(nBlockX,nBlockY,nBlockZ);
-    
-    dim3 kerneldim(SearchW,SearchW,kernel_depth);
-    dim3 sharedmemdim((SearchW*2)+blockWidth,(SearchW*2)+blockHeight,(kernel_depth*2)+blockDepth);
-    int sharedmemsize = sizeof(float)*sharedmemdim.x*sharedmemdim.y*sharedmemdim.z;
-    int updateperthread = ceil((float)(sharedmemdim.x*sharedmemdim.y*sharedmemdim.z)/(float)(blockWidth*blockHeight*blockDepth));
-    float neighborsize = (2*SearchW+1)*(2*SearchW+1)*(2*kernel_depth+1);
-    
-    padXY = SearchW + 2*SimilW; /* padding sizes */
-    
-    SearchW_full = 2*SearchW + 1; /* the full searching window  size */
-    SimilW_full = 2*SimilW + 1;   /* the full similarity window  size */
-    
-    /*allocate space for images on device*/
-    checkCudaErrors( cudaMalloc((void**)&Ad,N*M*Z*sizeof(float)) );
-    checkCudaErrors( cudaMalloc((void**)&Bd,N*M*Z*sizeof(float)) );
-    /*allocate space for vectors on device*/
-    if (dimension == 2) {
-        checkCudaErrors( cudaMalloc((void**)&Eucl_Vec_d,SimilW_full*SimilW_full*sizeof(float)) );
-        checkCudaErrors( cudaMemcpy(Eucl_Vec_d,Eucl_Vec,SimilW_full*SimilW_full*sizeof(float),cudaMemcpyHostToDevice) );
-    }
-    else {
-        checkCudaErrors( cudaMalloc((void**)&Eucl_Vec_d,SimilW_full*SimilW_full*SimilW_full*sizeof(float)) );
-        checkCudaErrors( cudaMemcpy(Eucl_Vec_d,Eucl_Vec,SimilW_full*SimilW_full*SimilW_full*sizeof(float),cudaMemcpyHostToDevice) );
-    }
-    
-    /* copy data from the host to device */
-    checkCudaErrors( cudaMemcpy(Ad,A,N*M*Z*sizeof(float),cudaMemcpyHostToDevice) );
-    
-    // Run CUDA kernel here
-    NLM_kernel<<<dimGrid,dimBlock,sharedmemsize>>>(Ad, Bd, Eucl_Vec_d, M, N, Z, SearchW, SimilW, SearchW_real, SearchW_full, SimilW_full, padXY, h2, lambda, imagedim, griddim, kerneldim, sharedmemdim, updateperthread, neighborsize);
-    
-    checkCudaErrors( cudaPeekAtLastError() );
-//     gpuErrchk( cudaDeviceSynchronize() );
-    
-    checkCudaErrors( cudaMemcpy(B,Bd,N*M*Z*sizeof(float),cudaMemcpyDeviceToHost) );
-    cudaFree(Ad); cudaFree(Bd); cudaFree(Eucl_Vec_d);
-}
diff --git a/main_func/regularizers_GPU/NL_Regul/NLM_GPU_kernel.h b/main_func/regularizers_GPU/NL_Regul/NLM_GPU_kernel.h
deleted file mode 100644
index bc9d4a3..0000000
--- a/main_func/regularizers_GPU/NL_Regul/NLM_GPU_kernel.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __NLMREG_KERNELS_H_
-#define __NLMREG_KERNELS_H_
-
-extern "C" void NLM_GPU_kernel(float *A, float* B, float *Eucl_Vec, int N, int M, int Z, int dimension, int SearchW, int SimilW, int SearchW_real, float denh2, float lambda);
-
-#endif 
diff --git a/main_func/studentst.m b/main_func/studentst.m
deleted file mode 100644
index 93e0a0a..0000000
--- a/main_func/studentst.m
+++ /dev/null
@@ -1,47 +0,0 @@
-function [f,g,h,s,k] = studentst(r,k,s)
-% Students T penalty with 'auto-tuning'
-%
-% use:
-%   [f,g,h,{k,{s}}] = studentst(r)     - automatically fits s and k
-%   [f,g,h,{k,{s}}] = studentst(r,k)   - automatically fits s
-%   [f,g,h,{k,{s}}] = studentst(r,k,s) - use given s and k
-%
-% input:
-%   r - residual as column vector
-%   s - scale (optional)
-%   k - degrees of freedom (optional)
-% 
-% output:
-%   f - misfit (scalar)
-%   g - gradient (column vector)
-%   h - positive approximation of the Hessian (column vector, Hessian is a diagonal matrix)
-%   s,k - scale and degrees of freedom
-%
-% Tristan van Leeuwen, 2012.
-% tleeuwen@eos.ubc.ca
-
-% fit both s and k
-if nargin == 1
-    opts = optimset('maxFunEvals',1e2);
-    tmp = fminsearch(@(x)st(r,x(1),x(2)),[1;2],opts);
-    s   = tmp(1);
-    k   = tmp(2);
-end
-
-
-if nargin == 2
-    opts = optimset('maxFunEvals',1e2);
-    tmp = fminsearch(@(x)st(r,x,k),[1],opts);
-    s   = tmp(1);
-end
-
-% evaulate penalty
-[f,g,h] = st(r,s,k);
-
-
-function [f,g,h] = st(r,s,k)
-n = length(r);
-c = -n*(gammaln((k+1)/2) - gammaln(k/2) - .5*log(pi*s*k));
-f = c + .5*(k+1)*sum(log(1 + conj(r).*r/(s*k)));
-g = (k+1)*r./(s*k + conj(r).*r);
-h = (k+1)./(s*k + conj(r).*r);
diff --git a/src/Python/conda-recipe/meta.yaml b/src/Python/conda-recipe/meta.yaml
index 7068e9d..9ef9156 100644
--- a/src/Python/conda-recipe/meta.yaml
+++ b/src/Python/conda-recipe/meta.yaml
@@ -14,8 +14,8 @@ requirements:
     - python
     - numpy
     - setuptools
-    - boost ==1.64
-    - boost-cpp ==1.64
+    - boost ==1.65
+    - boost-cpp ==1.65
     - cython
 
   run:
diff --git a/supp/RMSE.m b/supp/RMSE.m
deleted file mode 100644
index 002f776..0000000
--- a/supp/RMSE.m
+++ /dev/null
@@ -1,7 +0,0 @@
-function err = RMSE(signal1, signal2)
-%RMSE Root Mean Squared Error
-
-err = sum((signal1 - signal2).^2)/length(signal1);  % MSE
-err = sqrt(err);                                    % RMSE
-
-end
\ No newline at end of file
diff --git a/supp/my_red_yellowMAP.mat b/supp/my_red_yellowMAP.mat
deleted file mode 100644
index c2a5b87..0000000
Binary files a/supp/my_red_yellowMAP.mat and /dev/null differ
diff --git a/supp/sino_add_artifacts.m b/supp/sino_add_artifacts.m
deleted file mode 100644
index f601914..0000000
--- a/supp/sino_add_artifacts.m
+++ /dev/null
@@ -1,33 +0,0 @@
-function sino_artifacts = sino_add_artifacts(sino,artifact_type)
-% function to add various distortions to the sinogram space, current
-% version includes: random rings and zingers (streaks)
-% Input: 
-% 1. sinogram
-% 2. artifact type: 'rings' or 'zingers' (streaks)
-
-
-[Detectors, anglesNumb, SlicesZ] = size(sino);
-fprintf('%s %i %s %i %s %i %s \n', 'Sinogram has a dimension of', Detectors, 'detectors;', anglesNumb, 'projections;', SlicesZ, 'vertical slices.');
-
-sino_artifacts = sino;
-
-if (strcmp(artifact_type,'rings'))
-    fprintf('%s \n', 'Adding rings...');    
-    NumRings = round(Detectors/20); % Number of rings relatively to the size of Detectors
-    IntenOff = linspace(0.05,0.5,NumRings); % the intensity of rings in the selected range
-    
-    for k = 1:SlicesZ
-        % generate random indices to propagate rings
-        RandInd = randperm(Detectors,Detectors);
-        for jj = 1:NumRings
-            ind_c = RandInd(jj);
-            sino_artifacts(ind_c,1:end,k) = sino_artifacts(ind_c,1:end,k) + IntenOff(jj).*sino_artifacts(ind_c,1:end,k); % generate a constant offset            
-        end
-        
-    end
-elseif (strcmp(artifact_type,'zingers'))
-    fprintf('%s \n', 'Adding zingers...');
-else
-    fprintf('%s \n', 'Nothing selected, the same sinogram returned...');
-end
-end
\ No newline at end of file
diff --git a/supp/zing_rings_add.m b/supp/zing_rings_add.m
deleted file mode 100644
index d197b1f..0000000
--- a/supp/zing_rings_add.m
+++ /dev/null
@@ -1,91 +0,0 @@
-%  uncomment this part of script to generate data with different noise characterisitcs
-
-fprintf('%s\n', 'Generating Projection Data...');
-
-% Creating RHS (b) - the sinogram (using a strip projection model)
-% vol_geom = astra_create_vol_geom(N, N);
-% proj_geom = astra_create_proj_geom('parallel', 1.0, P, theta_rad);
-% proj_id_temp = astra_create_projector('strip', proj_geom, vol_geom);
-% [sinogram_id, sinogramIdeal] = astra_create_sino(phantom, proj_id_temp);
-% astra_mex_data2d('delete',sinogram_id);
-% astra_mex_algorithm('delete',proj_id_temp);
-
-%%
-% inverse crime data generation
-[sino_id, sinogramIdeal] = astra_create_sino3d_cuda(phantom, proj_geom, vol_geom);
-astra_mex_data3d('delete', sino_id);
-
-% [id,x] = astra_create_backprojection3d_cuda(sinogramIdeal, proj_geom, vol_geom);
-% astra_mex_data3d('delete', id);
-%%
-%
-% % adding Gaussian noise
-% eta = 0.04;  % Relative noise level
-% E = randn(size(sinogram));
-% sinogram = sinogram + eta*norm(sinogram,'fro')*E/norm(E,'fro');  % adding noise to the sinogram
-% sinogram(sinogram<0) = 0;
-% clear E;
-
-%%
-% adding zingers
-val_offset = 0;
-sino_zing = sinogramIdeal';
-vec1 = [60, 80, 80, 70, 70, 90, 90, 40, 130, 145, 155, 125];
-vec2 = [350, 450, 190, 500, 250, 530, 330, 230, 550, 250, 450, 195];
-for jj = 1:length(vec1)
-    for i1 = -2:2
-        for j1 = -2:2
-            sino_zing(vec1(jj)+i1, vec2(jj)+j1) = val_offset;
-        end
-    end
-end
-
-% adding stripes into the signogram
-sino_zing_rings = sino_zing;
-coeff = linspace2(0.01,0.15,180);
-vmax = max(sinogramIdeal(:));
-sino_zing_rings(1:180,120) = sino_zing_rings(1:180,120) + vmax*0.13;
-sino_zing_rings(80:180,209) = sino_zing_rings(80:180,209) + vmax*0.14;
-sino_zing_rings(50:110,210) = sino_zing_rings(50:110,210) + vmax*0.12;
-sino_zing_rings(1:180,211) = sino_zing_rings(1:180,211) + vmax*0.14;
-sino_zing_rings(1:180,300) = sino_zing_rings(1:180,300) + vmax*coeff(:);
-sino_zing_rings(1:180,301) = sino_zing_rings(1:180,301) + vmax*0.14;
-sino_zing_rings(10:100,302) = sino_zing_rings(10:100,302) + vmax*0.15;
-sino_zing_rings(90:180,350) = sino_zing_rings(90:180,350) + vmax*0.11;
-sino_zing_rings(60:140,410) = sino_zing_rings(60:140,410) + vmax*0.12;
-sino_zing_rings(1:180,411) = sino_zing_rings(1:180,411) + vmax*0.14;
-sino_zing_rings(1:180,412) = sino_zing_rings(1:180,412) + vmax*coeff(:);
-sino_zing_rings(1:180,413) = sino_zing_rings(1:180,413) + vmax*coeff(:);
-sino_zing_rings(1:180,500) = sino_zing_rings(1:180,500) - vmax*0.12;
-sino_zing_rings(1:180,501) = sino_zing_rings(1:180,501) - vmax*0.12;
-sino_zing_rings(1:180,550) = sino_zing_rings(1:180,550) + vmax*0.11;
-sino_zing_rings(1:180,551) = sino_zing_rings(1:180,551) + vmax*0.11;
-sino_zing_rings(1:180,552) = sino_zing_rings(1:180,552) + vmax*0.11;
-
-sino_zing_rings(sino_zing_rings < 0) = 0;
-%%
-
-% adding Poisson noise
-dose = 50000;
-multifactor = 0.002;
-
-dataExp = dose.*exp(-sino_zing_rings*multifactor); % noiseless raw data
-dataPnoise = astra_add_noise_to_sino(dataExp, dose); % pre-log noisy raw data (weights)
-sino_zing_rings = log(dose./max(dataPnoise,1))/multifactor; %log corrected data -> sinogram
-Dweights = dataPnoise'; % statistical weights
-sino_zing_rings = sino_zing_rings';
-clear dataPnoise dataExp 
-
-% w = dose./exp(sinogram*multifactor); % getting back raw data from log-cor
-
-% figure(1);
-% set(gcf, 'Position', get(0,'Screensize'));
-% subplot(1,2,1); imshow(phantom,[0 0.6]); title('Ideal Phantom'); colorbar;
-% subplot(1,2,2); imshow(sinogram,[0 180]);  title('Noisy Sinogram');  colorbar;
-% colormap(cmapnew);
-
-% figure;
-% set(gcf, 'Position', get(0,'Screensize'));
-% subplot(1,2,1); imshow(sinogramIdeal,[0 180]);  title('Ideal Sinogram');  colorbar;
-% imshow(sino_zing_rings,[0 180]); title('Noisy Sinogram with zingers and stripes');  colorbar;
-% colormap(cmapnew);
\ No newline at end of file
-- 
cgit v1.2.3