From ef41d45779400ae029a14822df79d5d8910ef03d Mon Sep 17 00:00:00 2001 From: algol Date: Thu, 25 Jan 2018 17:21:37 +0000 Subject: #15 solves dependencies on core C/CUDA functions for Matlab, all duplicates deleted --- Wrappers/Matlab/mex_compile/compile_mex.m | 11 + .../mex_compile/regularizers_CPU/FGP_TV_core.c | 266 ----------------- .../mex_compile/regularizers_CPU/FGP_TV_core.h | 71 ----- .../mex_compile/regularizers_CPU/LLT_model_core.c | 318 --------------------- .../mex_compile/regularizers_CPU/LLT_model_core.h | 46 --- .../regularizers_CPU/PatchBased_Regul_core.c | 213 -------------- .../regularizers_CPU/PatchBased_Regul_core.h | 69 ----- .../regularizers_CPU/SplitBregman_TV_core.c | 259 ----------------- .../regularizers_CPU/SplitBregman_TV_core.h | 69 ----- .../mex_compile/regularizers_CPU/TGV_PD_core.c | 208 -------------- .../mex_compile/regularizers_CPU/TGV_PD_core.h | 67 ----- .../Matlab/mex_compile/regularizers_CPU/utils.c | 29 -- .../Matlab/mex_compile/regularizers_CPU/utils.h | 32 --- .../Diffus_HO/Diff4th_GPU_kernel.cu | 270 ----------------- .../Diffus_HO/Diff4th_GPU_kernel.h | 6 - .../regularizers_GPU/NL_Regul/NLM_GPU_kernel.cu | 239 ---------------- .../regularizers_GPU/NL_Regul/NLM_GPU_kernel.h | 6 - 17 files changed, 11 insertions(+), 2168 deletions(-) delete mode 100644 Wrappers/Matlab/mex_compile/regularizers_CPU/FGP_TV_core.c delete mode 100644 Wrappers/Matlab/mex_compile/regularizers_CPU/FGP_TV_core.h delete mode 100644 Wrappers/Matlab/mex_compile/regularizers_CPU/LLT_model_core.c delete mode 100644 Wrappers/Matlab/mex_compile/regularizers_CPU/LLT_model_core.h delete mode 100644 Wrappers/Matlab/mex_compile/regularizers_CPU/PatchBased_Regul_core.c delete mode 100644 Wrappers/Matlab/mex_compile/regularizers_CPU/PatchBased_Regul_core.h delete mode 100644 Wrappers/Matlab/mex_compile/regularizers_CPU/SplitBregman_TV_core.c delete mode 100644 Wrappers/Matlab/mex_compile/regularizers_CPU/SplitBregman_TV_core.h delete mode 100644 Wrappers/Matlab/mex_compile/regularizers_CPU/TGV_PD_core.c delete mode 100644 Wrappers/Matlab/mex_compile/regularizers_CPU/TGV_PD_core.h delete mode 100644 Wrappers/Matlab/mex_compile/regularizers_CPU/utils.c delete mode 100644 Wrappers/Matlab/mex_compile/regularizers_CPU/utils.h delete mode 100644 Wrappers/Matlab/mex_compile/regularizers_GPU/Diffus_HO/Diff4th_GPU_kernel.cu delete mode 100644 Wrappers/Matlab/mex_compile/regularizers_GPU/Diffus_HO/Diff4th_GPU_kernel.h delete mode 100644 Wrappers/Matlab/mex_compile/regularizers_GPU/NL_Regul/NLM_GPU_kernel.cu delete mode 100644 Wrappers/Matlab/mex_compile/regularizers_GPU/NL_Regul/NLM_GPU_kernel.h diff --git a/Wrappers/Matlab/mex_compile/compile_mex.m b/Wrappers/Matlab/mex_compile/compile_mex.m index 1353859..e1debf3 100644 --- a/Wrappers/Matlab/mex_compile/compile_mex.m +++ b/Wrappers/Matlab/mex_compile/compile_mex.m @@ -1,11 +1,22 @@ % compile mex's in Matlab once +copyfile ../../../Core/regularizers_CPU/ regularizers_CPU/ +copyfile ../../../Core/regularizers_GPU/ regularizers_GPU/ +copyfile ../../../Core/CCPiDefines.h regularizers_CPU/ + cd regularizers_CPU/ +% compile C regularizers + mex LLT_model.c LLT_model_core.c utils.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp" mex FGP_TV.c FGP_TV_core.c utils.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp" mex SplitBregman_TV.c SplitBregman_TV_core.c utils.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp" mex TGV_PD.c TGV_PD_core.c utils.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp" mex PatchBased_Regul.c PatchBased_Regul_core.c utils.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp" +delete LLT_model_core.c LLT_model_core.h FGP_TV_core.c FGP_TV_core.h SplitBregman_TV_core.c SplitBregman_TV_core.h TGV_PD_core.c TGV_PD_core.h PatchBased_Regul_core.c PatchBased_Regul_core.h utils.c utils.h CCPiDefines.h + +% compile CUDA-based regularizers +%cd regularizers_GPU/ + cd ../../ cd demos diff --git a/Wrappers/Matlab/mex_compile/regularizers_CPU/FGP_TV_core.c b/Wrappers/Matlab/mex_compile/regularizers_CPU/FGP_TV_core.c deleted file mode 100644 index 03cd445..0000000 --- a/Wrappers/Matlab/mex_compile/regularizers_CPU/FGP_TV_core.c +++ /dev/null @@ -1,266 +0,0 @@ -/* -This work is part of the Core Imaging Library developed by -Visual Analytics and Imaging System Group of the Science Technology -Facilities Council, STFC - -Copyright 2017 Daniil Kazantsev -Copyright 2017 Srikanth Nagella, Edoardo Pasca - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at -http://www.apache.org/licenses/LICENSE-2.0 -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -#include "FGP_TV_core.h" - -/* C-OMP implementation of FGP-TV [1] denoising/regularization model (2D/3D case) - * - * Input Parameters: - * 1. Noisy image/volume [REQUIRED] - * 2. lambda - regularization parameter [REQUIRED] - * 3. Number of iterations [OPTIONAL parameter] - * 4. eplsilon: tolerance constant [OPTIONAL parameter] - * 5. TV-type: 'iso' or 'l1' [OPTIONAL parameter] - * - * Output: - * [1] Filtered/regularized image - * [2] last function value - * - * Example of image denoising: - * figure; - * Im = double(imread('lena_gray_256.tif'))/255; % loading image - * u0 = Im + .05*randn(size(Im)); % adding noise - * u = FGP_TV(single(u0), 0.05, 100, 1e-04); - * - * This function is based on the Matlab's code and paper by - * [1] Amir Beck and Marc Teboulle, "Fast Gradient-Based Algorithms for Constrained Total Variation Image Denoising and Deblurring Problems" - * - * D. Kazantsev, 2016-17 - * - */ - -/* 2D-case related Functions */ -/*****************************************************************/ -float Obj_func_CALC2D(float *A, float *D, float *funcvalA, float lambda, int dimX, int dimY) -{ - int i,j; - float f1, f2, val1, val2; - - /*data-related term */ - f1 = 0.0f; - for(i=0; i 1) { - P1[(i)*dimY + (j)] = P1[(i)*dimY + (j)] / sqrt(denom); - P2[(i)*dimY + (j)] = P2[(i)*dimY + (j)] / sqrt(denom); - } - } - } - } - else { - /* anisotropic TV*/ -#pragma omp parallel for shared(P1,P2) private(i,j,val1,val2) - for (i = 0; i -#include -#include -#include -#include -#include "omp.h" -#include "utils.h" - -/* C-OMP implementation of FGP-TV [1] denoising/regularization model (2D/3D case) -* -* Input Parameters: -* 1. Noisy image/volume [REQUIRED] -* 2. lambda - regularization parameter [REQUIRED] -* 3. Number of iterations [OPTIONAL parameter] -* 4. eplsilon: tolerance constant [OPTIONAL parameter] -* 5. TV-type: 'iso' or 'l1' [OPTIONAL parameter] -* -* Output: -* [1] Filtered/regularized image -* [2] last function value -* -* Example of image denoising: -* figure; -* Im = double(imread('lena_gray_256.tif'))/255; % loading image -* u0 = Im + .05*randn(size(Im)); % adding noise -* u = FGP_TV(single(u0), 0.05, 100, 1e-04); -* -* to compile with OMP support: mex FGP_TV.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp" -* This function is based on the Matlab's code and paper by -* [1] Amir Beck and Marc Teboulle, "Fast Gradient-Based Algorithms for Constrained Total Variation Image Denoising and Deblurring Problems" -* -* D. Kazantsev, 2016-17 -* -*/ -#ifdef __cplusplus -extern "C" { -#endif -//float copyIm(float *A, float *B, int dimX, int dimY, int dimZ); -float Obj_func2D(float *A, float *D, float *R1, float *R2, float lambda, int dimX, int dimY); -float Grad_func2D(float *P1, float *P2, float *D, float *R1, float *R2, float lambda, int dimX, int dimY); -float Proj_func2D(float *P1, float *P2, int methTV, int dimX, int dimY); -float Rupd_func2D(float *P1, float *P1_old, float *P2, float *P2_old, float *R1, float *R2, float tkp1, float tk, int dimX, int dimY); -float Obj_func_CALC2D(float *A, float *D, float *funcvalA, float lambda, int dimX, int dimY); - -float Obj_func3D(float *A, float *D, float *R1, float *R2, float *R3, float lambda, int dimX, int dimY, int dimZ); -float Grad_func3D(float *P1, float *P2, float *P3, float *D, float *R1, float *R2, float *R3, float lambda, int dimX, int dimY, int dimZ); -float Proj_func3D(float *P1, float *P2, float *P3, int dimX, int dimY, int dimZ); -float Rupd_func3D(float *P1, float *P1_old, float *P2, float *P2_old, float *P3, float *P3_old, float *R1, float *R2, float *R3, float tkp1, float tk, int dimX, int dimY, int dimZ); -float Obj_func_CALC3D(float *A, float *D, float *funcvalA, float lambda, int dimX, int dimY, int dimZ); -#ifdef __cplusplus -} -#endif \ No newline at end of file diff --git a/Wrappers/Matlab/mex_compile/regularizers_CPU/LLT_model_core.c b/Wrappers/Matlab/mex_compile/regularizers_CPU/LLT_model_core.c deleted file mode 100644 index 3a853d2..0000000 --- a/Wrappers/Matlab/mex_compile/regularizers_CPU/LLT_model_core.c +++ /dev/null @@ -1,318 +0,0 @@ -/* -This work is part of the Core Imaging Library developed by -Visual Analytics and Imaging System Group of the Science Technology -Facilities Council, STFC - -Copyright 2017 Daniil Kazantsev -Copyright 2017 Srikanth Nagella, Edoardo Pasca - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at -http://www.apache.org/licenses/LICENSE-2.0 -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -#include "LLT_model_core.h" - -/* C-OMP implementation of Lysaker, Lundervold and Tai (LLT) model of higher order regularization penalty -* -* Input Parameters: -* 1. U0 - origanal noise image/volume -* 2. lambda - regularization parameter -* 3. tau - time-step for explicit scheme -* 4. iter - iterations number -* 5. epsil - tolerance constant (to terminate earlier) -* 6. switcher - default is 0, switch to (1) to restrictive smoothing in Z dimension (in test) -* -* Output: -* Filtered/regularized image -* -* Example: -* figure; -* Im = double(imread('lena_gray_256.tif'))/255; % loading image -* u0 = Im + .03*randn(size(Im)); % adding noise -* [Den] = LLT_model(single(u0), 10, 0.1, 1); -* -* References: Lysaker, Lundervold and Tai (LLT) 2003, IEEE -* -* 28.11.16/Harwell -*/ - - -float der2D(float *U, float *D1, float *D2, int dimX, int dimY, int dimZ) -{ - int i, j, i_p, i_m, j_m, j_p; - float dxx, dyy, denom_xx, denom_yy; -#pragma omp parallel for shared(U,D1,D2) private(i, j, i_p, i_m, j_m, j_p, denom_xx, denom_yy, dxx, dyy) - for (i = 0; i= dimZ) k_p1 = k - 2; - // k_m1 = k - 2; if (k_m1 < 0) k_m1 = k + 2; - - dxx = D1[dimX*dimY*k + i_p*dimY + j] - 2.0f*D1[dimX*dimY*k + i*dimY + j] + D1[dimX*dimY*k + i_m*dimY + j]; - dyy = D2[dimX*dimY*k + i*dimY + j_p] - 2.0f*D2[dimX*dimY*k + i*dimY + j] + D2[dimX*dimY*k + i*dimY + j_m]; - dzz = D3[dimX*dimY*k_p + i*dimY + j] - 2.0f*D3[dimX*dimY*k + i*dimY + j] + D3[dimX*dimY*k_m + i*dimY + j]; - - if ((switcher == 1) && (Map[dimX*dimY*k + i*dimY + j] == 0)) dzz = 0; - div = dxx + dyy + dzz; - - // if (switcher == 1) { - // if (Map2[dimX*dimY*k + i*dimY + j] == 0) dzz2 = 0; - //else dzz2 = D4[dimX*dimY*k_p1 + i*dimY + j] - 2.0f*D4[dimX*dimY*k + i*dimY + j] + D4[dimX*dimY*k_m1 + i*dimY + j]; - // div = dzz + dzz2; - // } - - // dzz = D3[dimX*dimY*k_p + i*dimY + j] - 2.0f*D3[dimX*dimY*k + i*dimY + j] + D3[dimX*dimY*k_m + i*dimY + j]; - // dzz2 = D4[dimX*dimY*k_p1 + i*dimY + j] - 2.0f*D4[dimX*dimY*k + i*dimY + j] + D4[dimX*dimY*k_m1 + i*dimY + j]; - // div = dzz + dzz2; - - U[dimX*dimY*k + i*dimY + j] = U[dimX*dimY*k + i*dimY + j] - tau*div - tau*lambda*(U[dimX*dimY*k + i*dimY + j] - U0[dimX*dimY*k + i*dimY + j]); - } - } - } - return *U0; -} - -// float der3D_2(float *U, float *D1, float *D2, float *D3, float *D4, int dimX, int dimY, int dimZ) -// { -// int i, j, k, i_p, i_m, j_m, j_p, k_p, k_m, k_p1, k_m1; -// float dxx, dyy, dzz, dzz2, denom_xx, denom_yy, denom_zz, denom_zz2; -// #pragma omp parallel for shared(U,D1,D2,D3,D4) private(i, j, k, i_p, i_m, j_m, j_p, k_p, k_m, denom_xx, denom_yy, denom_zz, denom_zz2, dxx, dyy, dzz, dzz2, k_p1, k_m1) -// for(i=0; i= dimZ) k_p1 = k - 2; -// k_m1 = k - 2; if (k_m1 < 0) k_m1 = k + 2; -// -// dxx = U[dimX*dimY*k + i_p*dimY + j] - 2.0f*U[dimX*dimY*k + i*dimY + j] + U[dimX*dimY*k + i_m*dimY + j]; -// dyy = U[dimX*dimY*k + i*dimY + j_p] - 2.0f*U[dimX*dimY*k + i*dimY + j] + U[dimX*dimY*k + i*dimY + j_m]; -// dzz = U[dimX*dimY*k_p + i*dimY + j] - 2.0f*U[dimX*dimY*k + i*dimY + j] + U[dimX*dimY*k_m + i*dimY + j]; -// dzz2 = U[dimX*dimY*k_p1 + i*dimY + j] - 2.0f*U[dimX*dimY*k + i*dimY + j] + U[dimX*dimY*k_m1 + i*dimY + j]; -// -// denom_xx = fabs(dxx) + EPS; -// denom_yy = fabs(dyy) + EPS; -// denom_zz = fabs(dzz) + EPS; -// denom_zz2 = fabs(dzz2) + EPS; -// -// D1[dimX*dimY*k + i*dimY + j] = dxx/denom_xx; -// D2[dimX*dimY*k + i*dimY + j] = dyy/denom_yy; -// D3[dimX*dimY*k + i*dimY + j] = dzz/denom_zz; -// D4[dimX*dimY*k + i*dimY + j] = dzz2/denom_zz2; -// }}} -// return 1; -// } - -float calcMap(float *U, unsigned short *Map, int dimX, int dimY, int dimZ) -{ - int i, j, k, i1, j1, i2, j2, windowSize; - float val1, val2, thresh_val, maxval; - windowSize = 1; - thresh_val = 0.0001; /*thresh_val = 0.0035;*/ - - /* normalize volume first */ - maxval = 0.0f; - for (i = 0; i maxval) maxval = U[dimX*dimY*k + i*dimY + j]; - } - } - } - - if (maxval != 0.0f) { - for (i = 0; i= 0) && (i2 < dimX) && (j2 >= 0) && (j2 < dimY)) { - if (k == 0) { - val1 += pow(U[dimX*dimY*k + i2*dimY + j2] - U[dimX*dimY*(k + 1) + i2*dimY + j2], 2); - // val3 += pow(U[dimX*dimY*k + i2*dimY + j2] - U[dimX*dimY*(k+2) + i2*dimY + j2],2); - } - else if (k == dimZ - 1) { - val1 += pow(U[dimX*dimY*k + i2*dimY + j2] - U[dimX*dimY*(k - 1) + i2*dimY + j2], 2); - // val3 += pow(U[dimX*dimY*k + i2*dimY + j2] - U[dimX*dimY*(k-2) + i2*dimY + j2],2); - } - // else if (k == 1) { - // val1 += pow(U[dimX*dimY*k + i2*dimY + j2] - U[dimX*dimY*(k-1) + i2*dimY + j2],2); - // val2 += pow(U[dimX*dimY*k + i2*dimY + j2] - U[dimX*dimY*(k+1) + i2*dimY + j2],2); - // val3 += pow(U[dimX*dimY*k + i2*dimY + j2] - U[dimX*dimY*(k+2) + i2*dimY + j2],2); - // } - // else if (k == dimZ-2) { - // val1 += pow(U[dimX*dimY*k + i2*dimY + j2] - U[dimX*dimY*(k-1) + i2*dimY + j2],2); - // val2 += pow(U[dimX*dimY*k + i2*dimY + j2] - U[dimX*dimY*(k+1) + i2*dimY + j2],2); - // val3 += pow(U[dimX*dimY*k + i2*dimY + j2] - U[dimX*dimY*(k-2) + i2*dimY + j2],2); - // } - else { - val1 += pow(U[dimX*dimY*k + i2*dimY + j2] - U[dimX*dimY*(k - 1) + i2*dimY + j2], 2); - val2 += pow(U[dimX*dimY*k + i2*dimY + j2] - U[dimX*dimY*(k + 1) + i2*dimY + j2], 2); - // val3 += pow(U[dimX*dimY*k + i2*dimY + j2] - U[dimX*dimY*(k-2) + i2*dimY + j2],2); - // val4 += pow(U[dimX*dimY*k + i2*dimY + j2] - U[dimX*dimY*(k+2) + i2*dimY + j2],2); - } - } - } - } - - val1 = 0.111f*val1; val2 = 0.111f*val2; - // val3 = 0.111f*val3; val4 = 0.111f*val4; - if ((val1 <= thresh_val) && (val2 <= thresh_val)) Map[dimX*dimY*k + i*dimY + j] = 1; - // if ((val3 <= thresh_val) && (val4 <= thresh_val)) Map2[dimX*dimY*k + i*dimY + j] = 1; - } - } - } - return 1; -} - -float cleanMap(unsigned short *Map, int dimX, int dimY, int dimZ) -{ - int i, j, k, i1, j1, i2, j2, counter; -#pragma omp parallel for shared(Map) private(i, j, k, i1, j1, i2, j2, counter) - for (i = 0; i= 0) && (i2 < dimX) && (j2 >= 0) && (j2 < dimY)) { - if (Map[dimX*dimY*k + i2*dimY + j2] == 0) counter++; - } - } - } - if (counter < 24) Map[dimX*dimY*k + i*dimY + j] = 1; - } - } - } - return *Map; -} - - -/*********************3D *********************/ \ No newline at end of file diff --git a/Wrappers/Matlab/mex_compile/regularizers_CPU/LLT_model_core.h b/Wrappers/Matlab/mex_compile/regularizers_CPU/LLT_model_core.h deleted file mode 100644 index 13fce5a..0000000 --- a/Wrappers/Matlab/mex_compile/regularizers_CPU/LLT_model_core.h +++ /dev/null @@ -1,46 +0,0 @@ -/* -This work is part of the Core Imaging Library developed by -Visual Analytics and Imaging System Group of the Science Technology -Facilities Council, STFC - -Copyright 2017 Daniil Kazantsev -Copyright 2017 Srikanth Nagella, Edoardo Pasca - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at -http://www.apache.org/licenses/LICENSE-2.0 -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -//#include -#include -#include -#include -#include -#include "omp.h" -#include "utils.h" - -#define EPS 0.01 - -/* 2D functions */ -#ifdef __cplusplus -extern "C" { -#endif -float der2D(float *U, float *D1, float *D2, int dimX, int dimY, int dimZ); -float div_upd2D(float *U0, float *U, float *D1, float *D2, int dimX, int dimY, int dimZ, float lambda, float tau); - -float der3D(float *U, float *D1, float *D2, float *D3, int dimX, int dimY, int dimZ); -float div_upd3D(float *U0, float *U, float *D1, float *D2, float *D3, unsigned short *Map, int switcher, int dimX, int dimY, int dimZ, float lambda, float tau); - -float calcMap(float *U, unsigned short *Map, int dimX, int dimY, int dimZ); -float cleanMap(unsigned short *Map, int dimX, int dimY, int dimZ); - -//float copyIm(float *A, float *U, int dimX, int dimY, int dimZ); -#ifdef __cplusplus -} -#endif \ No newline at end of file diff --git a/Wrappers/Matlab/mex_compile/regularizers_CPU/PatchBased_Regul_core.c b/Wrappers/Matlab/mex_compile/regularizers_CPU/PatchBased_Regul_core.c deleted file mode 100644 index acfb464..0000000 --- a/Wrappers/Matlab/mex_compile/regularizers_CPU/PatchBased_Regul_core.c +++ /dev/null @@ -1,213 +0,0 @@ -/* -This work is part of the Core Imaging Library developed by -Visual Analytics and Imaging System Group of the Science Technology -Facilities Council, STFC - -Copyright 2017 Daniil Kazanteev -Copyright 2017 Srikanth Nagella, Edoardo Pasca - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at -http://www.apache.org/licenses/LICENSE-2.0 -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -#include "PatchBased_Regul_core.h" - -/* C-OMP implementation of patch-based (PB) regularization (2D and 3D cases). - * This method finds self-similar patches in data and performs one fixed point iteration to mimimize the PB penalty function - * - * References: 1. Yang Z. & Jacob M. "Nonlocal Regularization of Inverse Problems" - * 2. Kazantsev D. et al. "4D-CT reconstruction with unified spatial-temporal patch-based regularization" - * - * Input Parameters: - * 1. Image (2D or 3D) [required] - * 2. ratio of the searching window (e.g. 3 = (2*3+1) = 7 pixels window) [optional] - * 3. ratio of the similarity window (e.g. 1 = (2*1+1) = 3 pixels window) [optional] - * 4. h - parameter for the PB penalty function [optional] - * 5. lambda - regularization parameter [optional] - - * Output: - * 1. regularized (denoised) Image (N x N)/volume (N x N x N) - * - * 2D denoising example in Matlab: - Im = double(imread('lena_gray_256.tif'))/255; % loading image - u0 = Im + .03*randn(size(Im)); u0(u0<0) = 0; % adding noise - ImDen = PatchBased_Regul(single(u0), 3, 1, 0.08, 0.05); - - * D. Kazantsev * - * 02/07/2014 - * Harwell, UK - */ - -/*2D version function */ -float PB_FUNC2D(float *A, float *B, int dimX, int dimY, int padXY, int SearchW, int SimilW, float h, float lambda) -{ - int i, j, i_n, j_n, i_m, j_m, i_p, j_p, i_l, j_l, i1, j1, i2, j2, i3, j3, i5,j5, count, SimilW_full; - float *Eucl_Vec, h2, denh2, normsum, Weight, Weight_norm, value, denom, WeightGlob, t1; - - /*SearchW_full = 2*SearchW + 1; */ /* the full searching window size */ - SimilW_full = 2*SimilW + 1; /* the full similarity window size */ - h2 = h*h; - denh2 = 1/(2*h2); - - /*Gaussian kernel */ - Eucl_Vec = (float*) calloc (SimilW_full*SimilW_full,sizeof(float)); - count = 0; - for(i_n=-SimilW; i_n<=SimilW; i_n++) { - for(j_n=-SimilW; j_n<=SimilW; j_n++) { - t1 = pow(((float)i_n), 2) + pow(((float)j_n), 2); - Eucl_Vec[count] = exp(-(t1)/(2*SimilW*SimilW)); - count = count + 1; - }} /*main neighb loop */ - - /*The NLM code starts here*/ - /* setting OMP here */ - #pragma omp parallel for shared (A, B, dimX, dimY, Eucl_Vec, lambda, denh2) private(denom, i, j, WeightGlob, count, i1, j1, i2, j2, i3, j3, i5, j5, Weight_norm, normsum, i_m, j_m, i_n, j_n, i_l, j_l, i_p, j_p, Weight, value) - - for(i=0; i= padXY) && (i < dimX-padXY)) && ((j >= padXY) && (j < dimY-padXY))) { - - /* Massive Search window loop */ - Weight_norm = 0; value = 0.0; - for(i_m=-SearchW; i_m<=SearchW; i_m++) { - for(j_m=-SearchW; j_m<=SearchW; j_m++) { - /*checking boundaries*/ - i1 = i+i_m; j1 = j+j_m; - - WeightGlob = 0.0; - /* if inside the searching window */ - for(i_l=-SimilW; i_l<=SimilW; i_l++) { - for(j_l=-SimilW; j_l<=SimilW; j_l++) { - i2 = i1+i_l; j2 = j1+j_l; - - i3 = i+i_l; j3 = j+j_l; /*coordinates of the inner patch loop */ - - count = 0; normsum = 0.0; - for(i_p=-SimilW; i_p<=SimilW; i_p++) { - for(j_p=-SimilW; j_p<=SimilW; j_p++) { - i5 = i2 + i_p; j5 = j2 + j_p; - normsum = normsum + Eucl_Vec[count]*pow(A[(i3+i_p)*dimY+(j3+j_p)]-A[i5*dimY+j5], 2); - count = count + 1; - }} - if (normsum != 0) Weight = (exp(-normsum*denh2)); - else Weight = 0.0; - WeightGlob += Weight; - }} - - value += A[i1*dimY+j1]*WeightGlob; - Weight_norm += WeightGlob; - }} /*search window loop end*/ - - /* the final loop to average all values in searching window with weights */ - denom = 1 + lambda*Weight_norm; - B[i*dimY+j] = (A[i*dimY+j] + lambda*value)/denom; - } - }} /*main loop*/ - return (*B); - free(Eucl_Vec); -} - -/*3D version*/ - float PB_FUNC3D(float *A, float *B, int dimX, int dimY, int dimZ, int padXY, int SearchW, int SimilW, float h, float lambda) - { - int SimilW_full, count, i, j, k, i_n, j_n, k_n, i_m, j_m, k_m, i_p, j_p, k_p, i_l, j_l, k_l, i1, j1, k1, i2, j2, k2, i3, j3, k3, i5, j5, k5; - float *Eucl_Vec, h2, denh2, normsum, Weight, Weight_norm, value, denom, WeightGlob; - - /*SearchW_full = 2*SearchW + 1; */ /* the full searching window size */ - SimilW_full = 2*SimilW + 1; /* the full similarity window size */ - h2 = h*h; - denh2 = 1/(2*h2); - - /*Gaussian kernel */ - Eucl_Vec = (float*) calloc (SimilW_full*SimilW_full*SimilW_full,sizeof(float)); - count = 0; - for(i_n=-SimilW; i_n<=SimilW; i_n++) { - for(j_n=-SimilW; j_n<=SimilW; j_n++) { - for(k_n=-SimilW; k_n<=SimilW; k_n++) { - Eucl_Vec[count] = exp(-(pow((float)i_n, 2) + pow((float)j_n, 2) + pow((float)k_n, 2))/(2*SimilW*SimilW*SimilW)); - count = count + 1; - }}} /*main neighb loop */ - - /*The NLM code starts here*/ - /* setting OMP here */ - #pragma omp parallel for shared (A, B, dimX, dimY, dimZ, Eucl_Vec, lambda, denh2) private(denom, i, j, k, WeightGlob,count, i1, j1, k1, i2, j2, k2, i3, j3, k3, i5, j5, k5, Weight_norm, normsum, i_m, j_m, k_m, i_n, j_n, k_n, i_l, j_l, k_l, i_p, j_p, k_p, Weight, value) - for(i=0; i= padXY) && (i < dimX-padXY)) && ((j >= padXY) && (j < dimY-padXY)) && ((k >= padXY) && (k < dimZ-padXY))) { - /* take all elements around the pixel of interest */ - /* Massive Search window loop */ - Weight_norm = 0; value = 0.0; - for(i_m=-SearchW; i_m<=SearchW; i_m++) { - for(j_m=-SearchW; j_m<=SearchW; j_m++) { - for(k_m=-SearchW; k_m<=SearchW; k_m++) { - /*checking boundaries*/ - i1 = i+i_m; j1 = j+j_m; k1 = k+k_m; - - WeightGlob = 0.0; - /* if inside the searching window */ - for(i_l=-SimilW; i_l<=SimilW; i_l++) { - for(j_l=-SimilW; j_l<=SimilW; j_l++) { - for(k_l=-SimilW; k_l<=SimilW; k_l++) { - i2 = i1+i_l; j2 = j1+j_l; k2 = k1+k_l; - - i3 = i+i_l; j3 = j+j_l; k3 = k+k_l; /*coordinates of the inner patch loop */ - - count = 0; normsum = 0.0; - for(i_p=-SimilW; i_p<=SimilW; i_p++) { - for(j_p=-SimilW; j_p<=SimilW; j_p++) { - for(k_p=-SimilW; k_p<=SimilW; k_p++) { - i5 = i2 + i_p; j5 = j2 + j_p; k5 = k2 + k_p; - normsum = normsum + Eucl_Vec[count]*pow(A[(dimX*dimY)*(k3+k_p)+(i3+i_p)*dimY+(j3+j_p)]-A[(dimX*dimY)*k5 + i5*dimY+j5], 2); - count = count + 1; - }}} - if (normsum != 0) Weight = (exp(-normsum*denh2)); - else Weight = 0.0; - WeightGlob += Weight; - }}} - value += A[(dimX*dimY)*k1 + i1*dimY+j1]*WeightGlob; - Weight_norm += WeightGlob; - - }}} /*search window loop end*/ - - /* the final loop to average all values in searching window with weights */ - denom = 1 + lambda*Weight_norm; - B[(dimX*dimY)*k + i*dimY+j] = (A[(dimX*dimY)*k + i*dimY+j] + lambda*value)/denom; - } - }}} /*main loop*/ - free(Eucl_Vec); - return *B; -} - -float pad_crop(float *A, float *Ap, int OldSizeX, int OldSizeY, int OldSizeZ, int NewSizeX, int NewSizeY, int NewSizeZ, int padXY, int switchpad_crop) -{ - /* padding-cropping function */ - int i,j,k; - if (NewSizeZ > 1) { - for (i=0; i < NewSizeX; i++) { - for (j=0; j < NewSizeY; j++) { - for (k=0; k < NewSizeZ; k++) { - if (((i >= padXY) && (i < NewSizeX-padXY)) && ((j >= padXY) && (j < NewSizeY-padXY)) && ((k >= padXY) && (k < NewSizeZ-padXY))) { - if (switchpad_crop == 0) Ap[NewSizeX*NewSizeY*k + i*NewSizeY+j] = A[OldSizeX*OldSizeY*(k - padXY) + (i-padXY)*(OldSizeY)+(j-padXY)]; - else Ap[OldSizeX*OldSizeY*(k - padXY) + (i-padXY)*(OldSizeY)+(j-padXY)] = A[NewSizeX*NewSizeY*k + i*NewSizeY+j]; - } - }}} - } - else { - for (i=0; i < NewSizeX; i++) { - for (j=0; j < NewSizeY; j++) { - if (((i >= padXY) && (i < NewSizeX-padXY)) && ((j >= padXY) && (j < NewSizeY-padXY))) { - if (switchpad_crop == 0) Ap[i*NewSizeY+j] = A[(i-padXY)*(OldSizeY)+(j-padXY)]; - else Ap[(i-padXY)*(OldSizeY)+(j-padXY)] = A[i*NewSizeY+j]; - } - }} - } - return *Ap; -} \ No newline at end of file diff --git a/Wrappers/Matlab/mex_compile/regularizers_CPU/PatchBased_Regul_core.h b/Wrappers/Matlab/mex_compile/regularizers_CPU/PatchBased_Regul_core.h deleted file mode 100644 index d4a8a46..0000000 --- a/Wrappers/Matlab/mex_compile/regularizers_CPU/PatchBased_Regul_core.h +++ /dev/null @@ -1,69 +0,0 @@ -/* -This work is part of the Core Imaging Library developed by -Visual Analytics and Imaging System Group of the Science Technology -Facilities Council, STFC - -Copyright 2017 Daniil Kazanteev -Copyright 2017 Srikanth Nagella, Edoardo Pasca - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at -http://www.apache.org/licenses/LICENSE-2.0 -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -#define _USE_MATH_DEFINES - -//#include -#include -#include -#include -#include -#include "omp.h" - -/* C-OMP implementation of patch-based (PB) regularization (2D and 3D cases). -* This method finds self-similar patches in data and performs one fixed point iteration to mimimize the PB penalty function -* -* References: 1. Yang Z. & Jacob M. "Nonlocal Regularization of Inverse Problems" -* 2. Kazantsev D. et al. "4D-CT reconstruction with unified spatial-temporal patch-based regularization" -* -* Input Parameters (mandatory): -* 1. Image (2D or 3D) -* 2. ratio of the searching window (e.g. 3 = (2*3+1) = 7 pixels window) -* 3. ratio of the similarity window (e.g. 1 = (2*1+1) = 3 pixels window) -* 4. h - parameter for the PB penalty function -* 5. lambda - regularization parameter - -* Output: -* 1. regularized (denoised) Image (N x N)/volume (N x N x N) -* -* Quick 2D denoising example in Matlab: -Im = double(imread('lena_gray_256.tif'))/255; % loading image -u0 = Im + .03*randn(size(Im)); u0(u0<0) = 0; % adding noise -ImDen = PB_Regul_CPU(single(u0), 3, 1, 0.08, 0.05); -* -* Please see more tests in a file: -TestTemporalSmoothing.m - -* -* Matlab + C/mex compilers needed -* to compile with OMP support: mex PB_Regul_CPU.c CFLAGS="\$CFLAGS -fopenmp -Wall" LDFLAGS="\$LDFLAGS -fopenmp" -* -* D. Kazantsev * -* 02/07/2014 -* Harwell, UK -*/ -#ifdef __cplusplus -extern "C" { -#endif -float pad_crop(float *A, float *Ap, int OldSizeX, int OldSizeY, int OldSizeZ, int NewSizeX, int NewSizeY, int NewSizeZ, int padXY, int switchpad_crop); -float PB_FUNC2D(float *A, float *B, int dimX, int dimY, int padXY, int SearchW, int SimilW, float h, float lambda); -float PB_FUNC3D(float *A, float *B, int dimX, int dimY, int dimZ, int padXY, int SearchW, int SimilW, float h, float lambda); -#ifdef __cplusplus -} -#endif \ No newline at end of file diff --git a/Wrappers/Matlab/mex_compile/regularizers_CPU/SplitBregman_TV_core.c b/Wrappers/Matlab/mex_compile/regularizers_CPU/SplitBregman_TV_core.c deleted file mode 100644 index 4109a4b..0000000 --- a/Wrappers/Matlab/mex_compile/regularizers_CPU/SplitBregman_TV_core.c +++ /dev/null @@ -1,259 +0,0 @@ -/* -This work is part of the Core Imaging Library developed by -Visual Analytics and Imaging System Group of the Science Technology -Facilities Council, STFC - -Copyright 2017 Daniil Kazantsev -Copyright 2017 Srikanth Nagella, Edoardo Pasca - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at -http://www.apache.org/licenses/LICENSE-2.0 -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -#include "SplitBregman_TV_core.h" - -/* C-OMP implementation of Split Bregman - TV denoising-regularization model (2D/3D) -* -* Input Parameters: -* 1. Noisy image/volume -* 2. lambda - regularization parameter -* 3. Number of iterations [OPTIONAL parameter] -* 4. eplsilon - tolerance constant [OPTIONAL parameter] -* 5. TV-type: 'iso' or 'l1' [OPTIONAL parameter] -* -* Output: -* Filtered/regularized image -* -* Example: -* figure; -* Im = double(imread('lena_gray_256.tif'))/255; % loading image -* u0 = Im + .05*randn(size(Im)); u0(u0 < 0) = 0; -* u = SplitBregman_TV(single(u0), 10, 30, 1e-04); -* -* References: -* The Split Bregman Method for L1 Regularized Problems, by Tom Goldstein and Stanley Osher. -* D. Kazantsev, 2016* -*/ - - -/* 2D-case related Functions */ -/*****************************************************************/ -float gauss_seidel2D(float *U, float *A, float *Dx, float *Dy, float *Bx, float *By, int dimX, int dimY, float lambda, float mu) -{ - float sum, normConst; - int i,j,i1,i2,j1,j2; - normConst = 1.0f/(mu + 4.0f*lambda); - -#pragma omp parallel for shared(U) private(i,j,i1,i2,j1,j2,sum) - for(i=0; i -#include -#include -#include -#include -#include "omp.h" - -#include "utils.h" - -/* C-OMP implementation of Split Bregman - TV denoising-regularization model (2D/3D) -* -* Input Parameters: -* 1. Noisy image/volume -* 2. lambda - regularization parameter -* 3. Number of iterations [OPTIONAL parameter] -* 4. eplsilon - tolerance constant [OPTIONAL parameter] -* 5. TV-type: 'iso' or 'l1' [OPTIONAL parameter] -* -* Output: -* Filtered/regularized image -* -* Example: -* figure; -* Im = double(imread('lena_gray_256.tif'))/255; % loading image -* u0 = Im + .05*randn(size(Im)); u0(u0 < 0) = 0; -* u = SplitBregman_TV(single(u0), 10, 30, 1e-04); -* -* to compile with OMP support: mex SplitBregman_TV.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp" -* References: -* The Split Bregman Method for L1 Regularized Problems, by Tom Goldstein and Stanley Osher. -* D. Kazantsev, 2016* -*/ - -#ifdef __cplusplus -extern "C" { -#endif - -//float copyIm(float *A, float *B, int dimX, int dimY, int dimZ); -float gauss_seidel2D(float *U, float *A, float *Dx, float *Dy, float *Bx, float *By, int dimX, int dimY, float lambda, float mu); -float updDxDy_shrinkAniso2D(float *U, float *Dx, float *Dy, float *Bx, float *By, int dimX, int dimY, float lambda); -float updDxDy_shrinkIso2D(float *U, float *Dx, float *Dy, float *Bx, float *By, int dimX, int dimY, float lambda); -float updBxBy2D(float *U, float *Dx, float *Dy, float *Bx, float *By, int dimX, int dimY); - -float gauss_seidel3D(float *U, float *A, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, int dimX, int dimY, int dimZ, float lambda, float mu); -float updDxDyDz_shrinkAniso3D(float *U, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, int dimX, int dimY, int dimZ, float lambda); -float updDxDyDz_shrinkIso3D(float *U, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, int dimX, int dimY, int dimZ, float lambda); -float updBxByBz3D(float *U, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, int dimX, int dimY, int dimZ); - -#ifdef __cplusplus -} -#endif \ No newline at end of file diff --git a/Wrappers/Matlab/mex_compile/regularizers_CPU/TGV_PD_core.c b/Wrappers/Matlab/mex_compile/regularizers_CPU/TGV_PD_core.c deleted file mode 100644 index 4139d10..0000000 --- a/Wrappers/Matlab/mex_compile/regularizers_CPU/TGV_PD_core.c +++ /dev/null @@ -1,208 +0,0 @@ -/* -This work is part of the Core Imaging Library developed by -Visual Analytics and Imaging System Group of the Science Technology -Facilities Council, STFC - -Copyright 2017 Daniil Kazanteev -Copyright 2017 Srikanth Nagella, Edoardo Pasca - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at -http://www.apache.org/licenses/LICENSE-2.0 -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -#include "TGV_PD_core.h" - -/* C-OMP implementation of Primal-Dual denoising method for - * Total Generilized Variation (TGV)-L2 model (2D case only) - * - * Input Parameters: - * 1. Noisy image/volume (2D) - * 2. lambda - regularization parameter - * 3. parameter to control first-order term (alpha1) - * 4. parameter to control the second-order term (alpha0) - * 5. Number of CP iterations - * - * Output: - * Filtered/regularized image - * - * Example: - * figure; - * Im = double(imread('lena_gray_256.tif'))/255; % loading image - * u0 = Im + .03*randn(size(Im)); % adding noise - * tic; u = PrimalDual_TGV(single(u0), 0.02, 1.3, 1, 550); toc; - * - * References: - * K. Bredies "Total Generalized Variation" - * - * 28.11.16/Harwell - */ - - - - -/*Calculating dual variable P (using forward differences)*/ -float DualP_2D(float *U, float *V1, float *V2, float *P1, float *P2, int dimX, int dimY, int dimZ, float sigma) -{ - int i,j; -#pragma omp parallel for shared(U,V1,V2,P1,P2) private(i,j) - for(i=0; i 1.0) { - P1[i*dimY + (j)] = P1[i*dimY + (j)]/grad_magn; - P2[i*dimY + (j)] = P2[i*dimY + (j)]/grad_magn; - } - }} - return 1; -} -/*Calculating dual variable Q (using forward differences)*/ -float DualQ_2D(float *V1, float *V2, float *Q1, float *Q2, float *Q3, int dimX, int dimY, int dimZ, float sigma) -{ - int i,j; - float q1, q2, q11, q22; -#pragma omp parallel for shared(Q1,Q2,Q3,V1,V2) private(i,j,q1,q2,q11,q22) - for(i=0; i 1.0) { - Q1[i*dimY + (j)] = Q1[i*dimY + (j)]/grad_magn; - Q2[i*dimY + (j)] = Q2[i*dimY + (j)]/grad_magn; - Q3[i*dimY + (j)] = Q3[i*dimY + (j)]/grad_magn; - } - }} - return 1; -} -/* Divergence and projection for P*/ -float DivProjP_2D(float *U, float *A, float *P1, float *P2, int dimX, int dimY, int dimZ, float lambda, float tau) -{ - int i,j; - float P_v1, P_v2, div; -#pragma omp parallel for shared(U,A,P1,P2) private(i,j,P_v1,P_v2,div) - for(i=0; i -#include -#include -#include -#include -#include "omp.h" -#include "utils.h" - -/* C-OMP implementation of Primal-Dual denoising method for -* Total Generilized Variation (TGV)-L2 model (2D case only) -* -* Input Parameters: -* 1. Noisy image/volume (2D) -* 2. lambda - regularization parameter -* 3. parameter to control first-order term (alpha1) -* 4. parameter to control the second-order term (alpha0) -* 5. Number of CP iterations -* -* Output: -* Filtered/regularized image -* -* Example: -* figure; -* Im = double(imread('lena_gray_256.tif'))/255; % loading image -* u0 = Im + .03*randn(size(Im)); % adding noise -* tic; u = PrimalDual_TGV(single(u0), 0.02, 1.3, 1, 550); toc; -* -* to compile with OMP support: mex TGV_PD.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp" -* References: -* K. Bredies "Total Generalized Variation" -* -* 28.11.16/Harwell -*/ -#ifdef __cplusplus -extern "C" { -#endif -/* 2D functions */ -float DualP_2D(float *U, float *V1, float *V2, float *P1, float *P2, int dimX, int dimY, int dimZ, float sigma); -float ProjP_2D(float *P1, float *P2, int dimX, int dimY, int dimZ, float alpha1); -float DualQ_2D(float *V1, float *V2, float *Q1, float *Q2, float *Q3, int dimX, int dimY, int dimZ, float sigma); -float ProjQ_2D(float *Q1, float *Q2, float *Q3, int dimX, int dimY, int dimZ, float alpha0); -float DivProjP_2D(float *U, float *A, float *P1, float *P2, int dimX, int dimY, int dimZ, float lambda, float tau); -float UpdV_2D(float *V1, float *V2, float *P1, float *P2, float *Q1, float *Q2, float *Q3, int dimX, int dimY, int dimZ, float tau); -float newU(float *U, float *U_old, int dimX, int dimY, int dimZ); -//float copyIm(float *A, float *U, int dimX, int dimY, int dimZ); -#ifdef __cplusplus -} -#endif diff --git a/Wrappers/Matlab/mex_compile/regularizers_CPU/utils.c b/Wrappers/Matlab/mex_compile/regularizers_CPU/utils.c deleted file mode 100644 index 0e83d2c..0000000 --- a/Wrappers/Matlab/mex_compile/regularizers_CPU/utils.c +++ /dev/null @@ -1,29 +0,0 @@ -/* -This work is part of the Core Imaging Library developed by -Visual Analytics and Imaging System Group of the Science Technology -Facilities Council, STFC - -Copyright 2017 Daniil Kazanteev -Copyright 2017 Srikanth Nagella, Edoardo Pasca - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at -http://www.apache.org/licenses/LICENSE-2.0 -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -#include "utils.h" - -/* Copy Image */ -float copyIm(float *A, float *U, int dimX, int dimY, int dimZ) -{ - int j; -#pragma omp parallel for shared(A, U) private(j) - for (j = 0; j -//#include -#include -#include -//#include -#include "omp.h" -#ifdef __cplusplus -extern "C" { -#endif -float copyIm(float *A, float *U, int dimX, int dimY, int dimZ); -#ifdef __cplusplus -} -#endif diff --git a/Wrappers/Matlab/mex_compile/regularizers_GPU/Diffus_HO/Diff4th_GPU_kernel.cu b/Wrappers/Matlab/mex_compile/regularizers_GPU/Diffus_HO/Diff4th_GPU_kernel.cu deleted file mode 100644 index 178af00..0000000 --- a/Wrappers/Matlab/mex_compile/regularizers_GPU/Diffus_HO/Diff4th_GPU_kernel.cu +++ /dev/null @@ -1,270 +0,0 @@ -#include -#include -#include -#include "Diff4th_GPU_kernel.h" - -#define checkCudaErrors(err) __checkCudaErrors (err, __FILE__, __LINE__) - -inline void __checkCudaErrors(cudaError err, const char *file, const int line) -{ - if (cudaSuccess != err) - { - fprintf(stderr, "%s(%i) : CUDA Runtime API error %d: %s.\n", - file, line, (int)err, cudaGetErrorString(err)); - exit(EXIT_FAILURE); - } -} - -#define idivup(a, b) ( ((a)%(b) != 0) ? (a)/(b)+1 : (a)/(b) ) -#define sizeT (sizeX*sizeY*sizeZ) -#define epsilon 0.00000001 - -///////////////////////////////////////////////// -// 2D Image denosing - Second Step (The second derrivative) -__global__ void Diff4th2D_derriv(float* B, float* A, float *A0, int N, int M, float sigma, int iter, float tau, float lambda) -{ - float gradXXc = 0, gradYYc = 0; - int i = blockIdx.x*blockDim.x + threadIdx.x; - int j = blockIdx.y*blockDim.y + threadIdx.y; - - int index = j + i*N; - - if (((i < 1) || (i > N-2)) || ((j < 1) || (j > M-2))) { - return; } - - int indexN = (j)+(i-1)*(N); if (A[indexN] == 0) indexN = index; - int indexS = (j)+(i+1)*(N); if (A[indexS] == 0) indexS = index; - int indexW = (j-1)+(i)*(N); if (A[indexW] == 0) indexW = index; - int indexE = (j+1)+(i)*(N); if (A[indexE] == 0) indexE = index; - - gradXXc = B[indexN] + B[indexS] - 2*B[index] ; - gradYYc = B[indexW] + B[indexE] - 2*B[index] ; - A[index] = A[index] - tau*((A[index] - A0[index]) + lambda*(gradXXc + gradYYc)); -} - -// 2D Image denosing - The First Step -__global__ void Diff4th2D(float* A, float* B, int N, int M, float sigma, int iter, float tau) -{ - float gradX, gradX_sq, gradY, gradY_sq, gradXX, gradYY, gradXY, sq_sum, xy_2, V_norm, V_orth, c, c_sq; - - int i = blockIdx.x*blockDim.x + threadIdx.x; - int j = blockIdx.y*blockDim.y + threadIdx.y; - - int index = j + i*N; - - V_norm = 0.0f; V_orth = 0.0f; - - if (((i < 1) || (i > N-2)) || ((j < 1) || (j > M-2))) { - return; } - - int indexN = (j)+(i-1)*(N); if (A[indexN] == 0) indexN = index; - int indexS = (j)+(i+1)*(N); if (A[indexS] == 0) indexS = index; - int indexW = (j-1)+(i)*(N); if (A[indexW] == 0) indexW = index; - int indexE = (j+1)+(i)*(N); if (A[indexE] == 0) indexE = index; - int indexNW = (j-1)+(i-1)*(N); if (A[indexNW] == 0) indexNW = index; - int indexNE = (j+1)+(i-1)*(N); if (A[indexNE] == 0) indexNE = index; - int indexWS = (j-1)+(i+1)*(N); if (A[indexWS] == 0) indexWS = index; - int indexES = (j+1)+(i+1)*(N); if (A[indexES] == 0) indexES = index; - - gradX = 0.5f*(A[indexN]-A[indexS]); - gradX_sq = gradX*gradX; - gradXX = A[indexN] + A[indexS] - 2*A[index]; - - gradY = 0.5f*(A[indexW]-A[indexE]); - gradY_sq = gradY*gradY; - gradYY = A[indexW] + A[indexE] - 2*A[index]; - - gradXY = 0.25f*(A[indexNW] - A[indexNE] - A[indexWS] + A[indexES]); - xy_2 = 2.0f*gradX*gradY*gradXY; - sq_sum = gradX_sq + gradY_sq; - - if (sq_sum <= epsilon) { - V_norm = (gradXX*gradX_sq + xy_2 + gradYY*gradY_sq)/epsilon; - V_orth = (gradXX*gradY_sq - xy_2 + gradYY*gradX_sq)/epsilon; } - else { - V_norm = (gradXX*gradX_sq + xy_2 + gradYY*gradY_sq)/sq_sum; - V_orth = (gradXX*gradY_sq - xy_2 + gradYY*gradX_sq)/sq_sum; } - - c = 1.0f/(1.0f + sq_sum/sigma); - c_sq = c*c; - B[index] = c_sq*V_norm + c*V_orth; -} - -///////////////////////////////////////////////// -// 3D data parocerssing -__global__ void Diff4th3D_derriv(float *B, float *A, float *A0, int N, int M, int Z, float sigma, int iter, float tau, float lambda) -{ - float gradXXc = 0, gradYYc = 0, gradZZc = 0; - int xIndex = blockDim.x * blockIdx.x + threadIdx.x; - int yIndex = blockDim.y * blockIdx.y + threadIdx.y; - int zIndex = blockDim.z * blockIdx.z + threadIdx.z; - - int index = xIndex + M*yIndex + N*M*zIndex; - - if (((xIndex < 1) || (xIndex > N-2)) || ((yIndex < 1) || (yIndex > M-2)) || ((zIndex < 1) || (zIndex > Z-2))) { - return; } - - int indexN = (xIndex-1) + M*yIndex + N*M*zIndex; if (A[indexN] == 0) indexN = index; - int indexS = (xIndex+1) + M*yIndex + N*M*zIndex; if (A[indexS] == 0) indexS = index; - int indexW = xIndex + M*(yIndex-1) + N*M*zIndex; if (A[indexW] == 0) indexW = index; - int indexE = xIndex + M*(yIndex+1) + N*M*zIndex; if (A[indexE] == 0) indexE = index; - int indexU = xIndex + M*yIndex + N*M*(zIndex-1); if (A[indexU] == 0) indexU = index; - int indexD = xIndex + M*yIndex + N*M*(zIndex+1); if (A[indexD] == 0) indexD = index; - - gradXXc = B[indexN] + B[indexS] - 2*B[index] ; - gradYYc = B[indexW] + B[indexE] - 2*B[index] ; - gradZZc = B[indexU] + B[indexD] - 2*B[index] ; - - A[index] = A[index] - tau*((A[index] - A0[index]) + lambda*(gradXXc + gradYYc + gradZZc)); -} - -__global__ void Diff4th3D(float* A, float* B, int N, int M, int Z, float sigma, int iter, float tau) -{ - float gradX, gradX_sq, gradY, gradY_sq, gradZ, gradZ_sq, gradXX, gradYY, gradZZ, gradXY, gradXZ, gradYZ, sq_sum, xy_2, xyz_1, xyz_2, V_norm, V_orth, c, c_sq; - - int xIndex = blockDim.x * blockIdx.x + threadIdx.x; - int yIndex = blockDim.y * blockIdx.y + threadIdx.y; - int zIndex = blockDim.z * blockIdx.z + threadIdx.z; - - int index = xIndex + M*yIndex + N*M*zIndex; - V_norm = 0.0f; V_orth = 0.0f; - - if (((xIndex < 1) || (xIndex > N-2)) || ((yIndex < 1) || (yIndex > M-2)) || ((zIndex < 1) || (zIndex > Z-2))) { - return; } - - B[index] = 0; - - int indexN = (xIndex-1) + M*yIndex + N*M*zIndex; if (A[indexN] == 0) indexN = index; - int indexS = (xIndex+1) + M*yIndex + N*M*zIndex; if (A[indexS] == 0) indexS = index; - int indexW = xIndex + M*(yIndex-1) + N*M*zIndex; if (A[indexW] == 0) indexW = index; - int indexE = xIndex + M*(yIndex+1) + N*M*zIndex; if (A[indexE] == 0) indexE = index; - int indexU = xIndex + M*yIndex + N*M*(zIndex-1); if (A[indexU] == 0) indexU = index; - int indexD = xIndex + M*yIndex + N*M*(zIndex+1); if (A[indexD] == 0) indexD = index; - - int indexNW = (xIndex-1) + M*(yIndex-1) + N*M*zIndex; if (A[indexNW] == 0) indexNW = index; - int indexNE = (xIndex-1) + M*(yIndex+1) + N*M*zIndex; if (A[indexNE] == 0) indexNE = index; - int indexWS = (xIndex+1) + M*(yIndex-1) + N*M*zIndex; if (A[indexWS] == 0) indexWS = index; - int indexES = (xIndex+1) + M*(yIndex+1) + N*M*zIndex; if (A[indexES] == 0) indexES = index; - - int indexUW = (xIndex-1) + M*(yIndex) + N*M*(zIndex-1); if (A[indexUW] == 0) indexUW = index; - int indexUE = (xIndex+1) + M*(yIndex) + N*M*(zIndex-1); if (A[indexUE] == 0) indexUE = index; - int indexDW = (xIndex-1) + M*(yIndex) + N*M*(zIndex+1); if (A[indexDW] == 0) indexDW = index; - int indexDE = (xIndex+1) + M*(yIndex) + N*M*(zIndex+1); if (A[indexDE] == 0) indexDE = index; - - int indexUN = (xIndex) + M*(yIndex-1) + N*M*(zIndex-1); if (A[indexUN] == 0) indexUN = index; - int indexUS = (xIndex) + M*(yIndex+1) + N*M*(zIndex-1); if (A[indexUS] == 0) indexUS = index; - int indexDN = (xIndex) + M*(yIndex-1) + N*M*(zIndex+1); if (A[indexDN] == 0) indexDN = index; - int indexDS = (xIndex) + M*(yIndex+1) + N*M*(zIndex+1); if (A[indexDS] == 0) indexDS = index; - - gradX = 0.5f*(A[indexN]-A[indexS]); - gradX_sq = gradX*gradX; - gradXX = A[indexN] + A[indexS] - 2*A[index]; - - gradY = 0.5f*(A[indexW]-A[indexE]); - gradY_sq = gradY*gradY; - gradYY = A[indexW] + A[indexE] - 2*A[index]; - - gradZ = 0.5f*(A[indexU]-A[indexD]); - gradZ_sq = gradZ*gradZ; - gradZZ = A[indexU] + A[indexD] - 2*A[index]; - - gradXY = 0.25f*(A[indexNW] - A[indexNE] - A[indexWS] + A[indexES]); - gradXZ = 0.25f*(A[indexUW] - A[indexUE] - A[indexDW] + A[indexDE]); - gradYZ = 0.25f*(A[indexUN] - A[indexUS] - A[indexDN] + A[indexDS]); - - xy_2 = 2.0f*gradX*gradY*gradXY; - xyz_1 = 2.0f*gradX*gradZ*gradXZ; - xyz_2 = 2.0f*gradY*gradZ*gradYZ; - - sq_sum = gradX_sq + gradY_sq + gradZ_sq; - - if (sq_sum <= epsilon) { - V_norm = (gradXX*gradX_sq + gradYY*gradY_sq + gradZZ*gradZ_sq + xy_2 + xyz_1 + xyz_2)/epsilon; - V_orth = ((gradY_sq + gradZ_sq)*gradXX + (gradX_sq + gradZ_sq)*gradYY + (gradX_sq + gradY_sq)*gradZZ - xy_2 - xyz_1 - xyz_2)/epsilon; } - else { - V_norm = (gradXX*gradX_sq + gradYY*gradY_sq + gradZZ*gradZ_sq + xy_2 + xyz_1 + xyz_2)/sq_sum; - V_orth = ((gradY_sq + gradZ_sq)*gradXX + (gradX_sq + gradZ_sq)*gradYY + (gradX_sq + gradY_sq)*gradZZ - xy_2 - xyz_1 - xyz_2)/sq_sum; } - - c = 1; - if ((1.0f + sq_sum/sigma) != 0.0f) {c = 1.0f/(1.0f + sq_sum/sigma);} - - c_sq = c*c; - B[index] = c_sq*V_norm + c*V_orth; -} - -/******************************************************/ -/********* HOST FUNCTION*************/ -extern "C" void Diff4th_GPU_kernel(float* A, float* B, int N, int M, int Z, float sigma, int iter, float tau, float lambda) -{ - int deviceCount = -1; // number of devices - cudaGetDeviceCount(&deviceCount); - if (deviceCount == 0) { - fprintf(stderr, "No CUDA devices found\n"); - return; - } - - int BLKXSIZE, BLKYSIZE,BLKZSIZE; - float *Ad, *Bd, *Cd; - sigma = sigma*sigma; - - if (Z == 0){ - // 4th order diffusion for 2D case - BLKXSIZE = 8; - BLKYSIZE = 16; - - dim3 dimBlock(BLKXSIZE,BLKYSIZE); - dim3 dimGrid(idivup(N,BLKXSIZE), idivup(M,BLKYSIZE)); - - checkCudaErrors(cudaMalloc((void**)&Ad,N*M*sizeof(float))); - checkCudaErrors(cudaMalloc((void**)&Bd,N*M*sizeof(float))); - checkCudaErrors(cudaMalloc((void**)&Cd,N*M*sizeof(float))); - - checkCudaErrors(cudaMemcpy(Ad,A,N*M*sizeof(float),cudaMemcpyHostToDevice)); - checkCudaErrors(cudaMemcpy(Bd,A,N*M*sizeof(float),cudaMemcpyHostToDevice)); - checkCudaErrors(cudaMemcpy(Cd,A,N*M*sizeof(float),cudaMemcpyHostToDevice)); - - int n = 1; - while (n <= iter) { - Diff4th2D<<>>(Bd, Cd, N, M, sigma, iter, tau); - cudaDeviceSynchronize(); - checkCudaErrors( cudaPeekAtLastError() ); - Diff4th2D_derriv<<>>(Cd, Bd, Ad, N, M, sigma, iter, tau, lambda); - cudaDeviceSynchronize(); - checkCudaErrors( cudaPeekAtLastError() ); - n++; - } - checkCudaErrors(cudaMemcpy(B,Bd,N*M*sizeof(float),cudaMemcpyDeviceToHost)); - cudaFree(Ad); cudaFree(Bd); cudaFree(Cd); - } - - if (Z != 0){ - // 4th order diffusion for 3D case - BLKXSIZE = 8; - BLKYSIZE = 8; - BLKZSIZE = 8; - - dim3 dimBlock(BLKXSIZE,BLKYSIZE,BLKZSIZE); - dim3 dimGrid(idivup(N,BLKXSIZE), idivup(M,BLKYSIZE),idivup(Z,BLKXSIZE)); - - checkCudaErrors(cudaMalloc((void**)&Ad,N*M*Z*sizeof(float))); - checkCudaErrors(cudaMalloc((void**)&Bd,N*M*Z*sizeof(float))); - checkCudaErrors(cudaMalloc((void**)&Cd,N*M*Z*sizeof(float))); - - checkCudaErrors(cudaMemcpy(Ad,A,N*M*Z*sizeof(float),cudaMemcpyHostToDevice)); - checkCudaErrors(cudaMemcpy(Bd,A,N*M*Z*sizeof(float),cudaMemcpyHostToDevice)); - checkCudaErrors(cudaMemcpy(Cd,A,N*M*Z*sizeof(float),cudaMemcpyHostToDevice)); - - int n = 1; - while (n <= iter) { - Diff4th3D<<>>(Bd, Cd, N, M, Z, sigma, iter, tau); - cudaDeviceSynchronize(); - checkCudaErrors( cudaPeekAtLastError() ); - Diff4th3D_derriv<<>>(Cd, Bd, Ad, N, M, Z, sigma, iter, tau, lambda); - cudaDeviceSynchronize(); - checkCudaErrors( cudaPeekAtLastError() ); - n++; - } - checkCudaErrors(cudaMemcpy(B,Bd,N*M*Z*sizeof(float),cudaMemcpyDeviceToHost)); - cudaFree(Ad); cudaFree(Bd); cudaFree(Cd); - } -} \ No newline at end of file diff --git a/Wrappers/Matlab/mex_compile/regularizers_GPU/Diffus_HO/Diff4th_GPU_kernel.h b/Wrappers/Matlab/mex_compile/regularizers_GPU/Diffus_HO/Diff4th_GPU_kernel.h deleted file mode 100644 index cfbb45a..0000000 --- a/Wrappers/Matlab/mex_compile/regularizers_GPU/Diffus_HO/Diff4th_GPU_kernel.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __DIFF_HO_H_ -#define __DIFF_HO_H_ - -extern "C" void Diff4th_GPU_kernel(float* A, float* B, int N, int M, int Z, float sigma, int iter, float tau, float lambda); - -#endif diff --git a/Wrappers/Matlab/mex_compile/regularizers_GPU/NL_Regul/NLM_GPU_kernel.cu b/Wrappers/Matlab/mex_compile/regularizers_GPU/NL_Regul/NLM_GPU_kernel.cu deleted file mode 100644 index 17da3a8..0000000 --- a/Wrappers/Matlab/mex_compile/regularizers_GPU/NL_Regul/NLM_GPU_kernel.cu +++ /dev/null @@ -1,239 +0,0 @@ -#include -#include -#include -#include "NLM_GPU_kernel.h" - -#define checkCudaErrors(err) __checkCudaErrors (err, __FILE__, __LINE__) - -inline void __checkCudaErrors(cudaError err, const char *file, const int line) -{ - if (cudaSuccess != err) - { - fprintf(stderr, "%s(%i) : CUDA Runtime API error %d: %s.\n", - file, line, (int)err, cudaGetErrorString(err)); - exit(EXIT_FAILURE); - } -} - -extern __shared__ float sharedmem[]; - -// run PB den kernel here -__global__ void NLM_kernel(float *Ad, float* Bd, float *Eucl_Vec_d, int N, int M, int Z, int SearchW, int SimilW, int SearchW_real, int SearchW_full, int SimilW_full, int padXY, float h2, float lambda, dim3 imagedim, dim3 griddim, dim3 kerneldim, dim3 sharedmemdim, int nUpdatePerThread, float neighborsize) -{ - - int i1, j1, k1, i2, j2, k2, i3, j3, k3, i_l, j_l, k_l, count; - float value, Weight_norm, normsum, Weight; - - int bidx = blockIdx.x; - int bidy = blockIdx.y%griddim.y; - int bidz = (int)((blockIdx.y)/griddim.y); - - // global index for block endpoint - int beidx = __mul24(bidx,blockDim.x); - int beidy = __mul24(bidy,blockDim.y); - int beidz = __mul24(bidz,blockDim.z); - - int tid = __mul24(threadIdx.z,__mul24(blockDim.x,blockDim.y)) + - __mul24(threadIdx.y,blockDim.x) + threadIdx.x; - - #ifdef __DEVICE_EMULATION__ - printf("tid : %d", tid); - #endif - - // update shared memory - int nthreads = blockDim.x*blockDim.y*blockDim.z; - int sharedMemSize = sharedmemdim.x * sharedmemdim.y * sharedmemdim.z; - for(int i=0; i= padXY && idx < (imagedim.x - padXY) && - idy >= padXY && idy < (imagedim.y - padXY)) - { - int i_centr = threadIdx.x + (SearchW); /*indices of the centrilized (main) pixel */ - int j_centr = threadIdx.y + (SearchW); /*indices of the centrilized (main) pixel */ - - if ((i_centr > 0) && (i_centr < N) && (j_centr > 0) && (j_centr < M)) { - - Weight_norm = 0; value = 0.0; - /* Massive Search window loop */ - for(i1 = i_centr - SearchW_real ; i1 <= i_centr + SearchW_real; i1++) { - for(j1 = j_centr - SearchW_real ; j1<= j_centr + SearchW_real ; j1++) { - /* if inside the searching window */ - count = 0; normsum = 0.0; - for(i_l=-SimilW; i_l<=SimilW; i_l++) { - for(j_l=-SimilW; j_l<=SimilW; j_l++) { - i2 = i1+i_l; j2 = j1+j_l; - i3 = i_centr+i_l; j3 = j_centr+j_l; /*coordinates of the inner patch loop */ - if ((i2 > 0) && (i2 < N) && (j2 > 0) && (j2 < M)) { - if ((i3 > 0) && (i3 < N) && (j3 > 0) && (j3 < M)) { - normsum += Eucl_Vec_d[count]*pow((sharedmem[(j3)*sharedmemdim.x+(i3)] - sharedmem[j2*sharedmemdim.x+i2]), 2); - }} - count++; - }} - if (normsum != 0) Weight = (expf(-normsum/h2)); - else Weight = 0.0; - Weight_norm += Weight; - value += sharedmem[j1*sharedmemdim.x+i1]*Weight; - }} - - if (Weight_norm != 0) Bd[idz*imagedim.x*imagedim.y + idy*imagedim.x + idx] = value/Weight_norm; - else Bd[idz*imagedim.x*imagedim.y + idy*imagedim.x + idx] = Ad[idz*imagedim.x*imagedim.y + idy*imagedim.x + idx]; - } - } /*boundary conditions end*/ - } - else { - /*3D case*/ - /*checking boundaries to be within the image and avoid padded spaces */ - if( idx >= padXY && idx < (imagedim.x - padXY) && - idy >= padXY && idy < (imagedim.y - padXY) && - idz >= padXY && idz < (imagedim.z - padXY) ) - { - int i_centr = threadIdx.x + SearchW; /*indices of the centrilized (main) pixel */ - int j_centr = threadIdx.y + SearchW; /*indices of the centrilized (main) pixel */ - int k_centr = threadIdx.z + SearchW; /*indices of the centrilized (main) pixel */ - - if ((i_centr > 0) && (i_centr < N) && (j_centr > 0) && (j_centr < M) && (k_centr > 0) && (k_centr < Z)) { - - Weight_norm = 0; value = 0.0; - /* Massive Search window loop */ - for(i1 = i_centr - SearchW_real ; i1 <= i_centr + SearchW_real; i1++) { - for(j1 = j_centr - SearchW_real ; j1<= j_centr + SearchW_real ; j1++) { - for(k1 = k_centr - SearchW_real ; k1<= k_centr + SearchW_real ; k1++) { - /* if inside the searching window */ - count = 0; normsum = 0.0; - for(i_l=-SimilW; i_l<=SimilW; i_l++) { - for(j_l=-SimilW; j_l<=SimilW; j_l++) { - for(k_l=-SimilW; k_l<=SimilW; k_l++) { - i2 = i1+i_l; j2 = j1+j_l; k2 = k1+k_l; - i3 = i_centr+i_l; j3 = j_centr+j_l; k3 = k_centr+k_l; /*coordinates of the inner patch loop */ - if ((i2 > 0) && (i2 < N) && (j2 > 0) && (j2 < M) && (k2 > 0) && (k2 < Z)) { - if ((i3 > 0) && (i3 < N) && (j3 > 0) && (j3 < M) && (k3 > 0) && (k3 < Z)) { - normsum += Eucl_Vec_d[count]*pow((sharedmem[(k3)*sharedmemdim.x*sharedmemdim.y + (j3)*sharedmemdim.x+(i3)] - sharedmem[(k2)*sharedmemdim.x*sharedmemdim.y + j2*sharedmemdim.x+i2]), 2); - }} - count++; - }}} - if (normsum != 0) Weight = (expf(-normsum/h2)); - else Weight = 0.0; - Weight_norm += Weight; - value += sharedmem[k1*sharedmemdim.x*sharedmemdim.y + j1*sharedmemdim.x+i1]*Weight; - }}} /* BIG search window loop end*/ - - - if (Weight_norm != 0) Bd[idz*imagedim.x*imagedim.y + idy*imagedim.x + idx] = value/Weight_norm; - else Bd[idz*imagedim.x*imagedim.y + idy*imagedim.x + idx] = Ad[idz*imagedim.x*imagedim.y + idy*imagedim.x + idx]; - } - } /* boundary conditions end */ - } -} - -///////////////////////////////////////////////// -// HOST FUNCTION -extern "C" void NLM_GPU_kernel(float *A, float* B, float *Eucl_Vec, int N, int M, int Z, int dimension, int SearchW, int SimilW, int SearchW_real, float h2, float lambda) -{ - int deviceCount = -1; // number of devices - cudaGetDeviceCount(&deviceCount); - if (deviceCount == 0) { - fprintf(stderr, "No CUDA devices found\n"); - return; - } - -// cudaDeviceReset(); - - int padXY, SearchW_full, SimilW_full, blockWidth, blockHeight, blockDepth, nBlockX, nBlockY, nBlockZ, kernel_depth; - float *Ad, *Bd, *Eucl_Vec_d; - - if (dimension == 2) { - blockWidth = 16; - blockHeight = 16; - blockDepth = 1; - Z = 1; - kernel_depth = 0; - } - else { - blockWidth = 8; - blockHeight = 8; - blockDepth = 8; - kernel_depth = SearchW; - } - - // compute how many blocks are needed - nBlockX = ceil((float)N / (float)blockWidth); - nBlockY = ceil((float)M / (float)blockHeight); - nBlockZ = ceil((float)Z / (float)blockDepth); - - dim3 dimGrid(nBlockX,nBlockY*nBlockZ); - dim3 dimBlock(blockWidth, blockHeight, blockDepth); - dim3 imagedim(N,M,Z); - dim3 griddim(nBlockX,nBlockY,nBlockZ); - - dim3 kerneldim(SearchW,SearchW,kernel_depth); - dim3 sharedmemdim((SearchW*2)+blockWidth,(SearchW*2)+blockHeight,(kernel_depth*2)+blockDepth); - int sharedmemsize = sizeof(float)*sharedmemdim.x*sharedmemdim.y*sharedmemdim.z; - int updateperthread = ceil((float)(sharedmemdim.x*sharedmemdim.y*sharedmemdim.z)/(float)(blockWidth*blockHeight*blockDepth)); - float neighborsize = (2*SearchW+1)*(2*SearchW+1)*(2*kernel_depth+1); - - padXY = SearchW + 2*SimilW; /* padding sizes */ - - SearchW_full = 2*SearchW + 1; /* the full searching window size */ - SimilW_full = 2*SimilW + 1; /* the full similarity window size */ - - /*allocate space for images on device*/ - checkCudaErrors( cudaMalloc((void**)&Ad,N*M*Z*sizeof(float)) ); - checkCudaErrors( cudaMalloc((void**)&Bd,N*M*Z*sizeof(float)) ); - /*allocate space for vectors on device*/ - if (dimension == 2) { - checkCudaErrors( cudaMalloc((void**)&Eucl_Vec_d,SimilW_full*SimilW_full*sizeof(float)) ); - checkCudaErrors( cudaMemcpy(Eucl_Vec_d,Eucl_Vec,SimilW_full*SimilW_full*sizeof(float),cudaMemcpyHostToDevice) ); - } - else { - checkCudaErrors( cudaMalloc((void**)&Eucl_Vec_d,SimilW_full*SimilW_full*SimilW_full*sizeof(float)) ); - checkCudaErrors( cudaMemcpy(Eucl_Vec_d,Eucl_Vec,SimilW_full*SimilW_full*SimilW_full*sizeof(float),cudaMemcpyHostToDevice) ); - } - - /* copy data from the host to device */ - checkCudaErrors( cudaMemcpy(Ad,A,N*M*Z*sizeof(float),cudaMemcpyHostToDevice) ); - - // Run CUDA kernel here - NLM_kernel<<>>(Ad, Bd, Eucl_Vec_d, M, N, Z, SearchW, SimilW, SearchW_real, SearchW_full, SimilW_full, padXY, h2, lambda, imagedim, griddim, kerneldim, sharedmemdim, updateperthread, neighborsize); - - checkCudaErrors( cudaPeekAtLastError() ); -// gpuErrchk( cudaDeviceSynchronize() ); - - checkCudaErrors( cudaMemcpy(B,Bd,N*M*Z*sizeof(float),cudaMemcpyDeviceToHost) ); - cudaFree(Ad); cudaFree(Bd); cudaFree(Eucl_Vec_d); -} diff --git a/Wrappers/Matlab/mex_compile/regularizers_GPU/NL_Regul/NLM_GPU_kernel.h b/Wrappers/Matlab/mex_compile/regularizers_GPU/NL_Regul/NLM_GPU_kernel.h deleted file mode 100644 index bc9d4a3..0000000 --- a/Wrappers/Matlab/mex_compile/regularizers_GPU/NL_Regul/NLM_GPU_kernel.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __NLMREG_KERNELS_H_ -#define __NLMREG_KERNELS_H_ - -extern "C" void NLM_GPU_kernel(float *A, float* B, float *Eucl_Vec, int N, int M, int Z, int dimension, int SearchW, int SimilW, int SearchW_real, float denh2, float lambda); - -#endif -- cgit v1.2.3