From bb86cf3cb44fa66a2def258d346ebb68fe14ed61 Mon Sep 17 00:00:00 2001 From: Daniil Kazantsev Date: Mon, 9 Apr 2018 09:38:35 +0100 Subject: fixes a memory leak in FGP-TV(CPU)#43, matlab CPU/GPU wrappers and demos --- Core/CMakeLists.txt | 4 +- Core/regularizers_CPU/FGP_TV_core.c | 38 +- Core/regularizers_CPU/ROF_TV_core.c | 14 +- Core/regularizers_GPU/TV_FGP/TV_FGP_GPU_core.cu | 561 --------------------- Core/regularizers_GPU/TV_FGP/TV_FGP_GPU_core.h | 10 - Core/regularizers_GPU/TV_FGP_GPU_core.cu | 561 +++++++++++++++++++++ Core/regularizers_GPU/TV_FGP_GPU_core.h | 10 + Core/regularizers_GPU/TV_ROF/TV_ROF_GPU_core.cu | 368 -------------- Core/regularizers_GPU/TV_ROF/TV_ROF_GPU_core.h | 8 - Core/regularizers_GPU/TV_ROF_GPU_core.cu | 368 ++++++++++++++ Core/regularizers_GPU/TV_ROF_GPU_core.h | 8 + Readme.md | 30 +- Wrappers/Matlab/demos/demoMatlab_denoise.m | 38 ++ Wrappers/Matlab/demos/demoMatlab_denoise.m~ | 31 ++ Wrappers/Matlab/mex_compile/compileCPU_mex.m | 19 + Wrappers/Matlab/mex_compile/compileGPU_mex.m | 30 ++ Wrappers/Matlab/mex_compile/compile_mex.m | 19 - .../Matlab/mex_compile/regularizers_CPU/FGP_TV.c | 96 ++-- .../mex_compile/regularizers_CPU/LLT_model.c | 169 ------- .../regularizers_CPU/PatchBased_Regul.c | 140 ----- .../Matlab/mex_compile/regularizers_CPU/ROF_TV.c | 73 +-- .../mex_compile/regularizers_CPU/SplitBregman_TV.c | 179 ------- .../Matlab/mex_compile/regularizers_CPU/TGV_PD.c | 144 ------ .../Diffus_HO/Diff4thHajiaboli_GPU.cpp | 125 ----- .../mex_compile/regularizers_GPU/FGP_TV_GPU.cpp | 95 ++++ .../regularizers_GPU/NL_Regul/NLM_GPU.cpp | 171 ------- .../mex_compile/regularizers_GPU/ROF_TV_GPU.cpp | 73 +++ 27 files changed, 1348 insertions(+), 2034 deletions(-) delete mode 100755 Core/regularizers_GPU/TV_FGP/TV_FGP_GPU_core.cu delete mode 100755 Core/regularizers_GPU/TV_FGP/TV_FGP_GPU_core.h create mode 100755 Core/regularizers_GPU/TV_FGP_GPU_core.cu create mode 100755 Core/regularizers_GPU/TV_FGP_GPU_core.h delete mode 100755 Core/regularizers_GPU/TV_ROF/TV_ROF_GPU_core.cu delete mode 100755 Core/regularizers_GPU/TV_ROF/TV_ROF_GPU_core.h create mode 100755 Core/regularizers_GPU/TV_ROF_GPU_core.cu create mode 100755 Core/regularizers_GPU/TV_ROF_GPU_core.h create mode 100644 Wrappers/Matlab/demos/demoMatlab_denoise.m create mode 100644 Wrappers/Matlab/demos/demoMatlab_denoise.m~ create mode 100644 Wrappers/Matlab/mex_compile/compileCPU_mex.m create mode 100644 Wrappers/Matlab/mex_compile/compileGPU_mex.m delete mode 100644 Wrappers/Matlab/mex_compile/compile_mex.m delete mode 100644 Wrappers/Matlab/mex_compile/regularizers_CPU/LLT_model.c delete mode 100644 Wrappers/Matlab/mex_compile/regularizers_CPU/PatchBased_Regul.c delete mode 100644 Wrappers/Matlab/mex_compile/regularizers_CPU/SplitBregman_TV.c delete mode 100644 Wrappers/Matlab/mex_compile/regularizers_CPU/TGV_PD.c delete mode 100644 Wrappers/Matlab/mex_compile/regularizers_GPU/Diffus_HO/Diff4thHajiaboli_GPU.cpp create mode 100644 Wrappers/Matlab/mex_compile/regularizers_GPU/FGP_TV_GPU.cpp delete mode 100644 Wrappers/Matlab/mex_compile/regularizers_GPU/NL_Regul/NLM_GPU.cpp create mode 100644 Wrappers/Matlab/mex_compile/regularizers_GPU/ROF_TV_GPU.cpp diff --git a/Core/CMakeLists.txt b/Core/CMakeLists.txt index f31454d..f53c538 100644 --- a/Core/CMakeLists.txt +++ b/Core/CMakeLists.txt @@ -127,8 +127,8 @@ if (CUDA_FOUND) set(CUDA_NVCC_FLAGS "-Xcompiler -fPIC -shared -D_FORCE_INLINES") message("CUDA FLAGS ${CUDA_NVCC_FLAGS}") CUDA_ADD_LIBRARY(cilregcuda SHARED - ${CMAKE_CURRENT_SOURCE_DIR}/regularizers_GPU/TV_ROF/TV_ROF_GPU_core.cu - ${CMAKE_CURRENT_SOURCE_DIR}/regularizers_GPU/TV_FGP/TV_FGP_GPU_core.cu + ${CMAKE_CURRENT_SOURCE_DIR}/regularizers_GPU/TV_ROF_GPU_core.cu + ${CMAKE_CURRENT_SOURCE_DIR}/regularizers_GPU/TV_FGP_GPU_core.cu ) if (UNIX) message ("I'd install into ${CMAKE_INSTALL_PREFIX}/lib") diff --git a/Core/regularizers_CPU/FGP_TV_core.c b/Core/regularizers_CPU/FGP_TV_core.c index 7a8ce48..462c3f7 100644 --- a/Core/regularizers_CPU/FGP_TV_core.c +++ b/Core/regularizers_CPU/FGP_TV_core.c @@ -50,13 +50,13 @@ float TV_FGP_CPU_main(float *Input, float *Output, float lambdaPar, int iteratio float *Output_prev=NULL, *P1=NULL, *P2=NULL, *P1_prev=NULL, *P2_prev=NULL, *R1=NULL, *R2=NULL; DimTotal = dimX*dimY; - Output_prev = (float *) malloc( DimTotal * sizeof(float) ); - P1 = (float *) malloc( DimTotal * sizeof(float) ); - P2 = (float *) malloc( DimTotal * sizeof(float) ); - P1_prev = (float *) malloc( DimTotal * sizeof(float) ); - P2_prev = (float *) malloc( DimTotal * sizeof(float) ); - R1 = (float *) malloc( DimTotal * sizeof(float) ); - R2 = (float *) malloc( DimTotal * sizeof(float) ); + Output_prev = calloc(DimTotal, sizeof(float)); + P1 = calloc(DimTotal, sizeof(float)); + P2 = calloc(DimTotal, sizeof(float)); + P1_prev = calloc(DimTotal, sizeof(float)); + P2_prev = calloc(DimTotal, sizeof(float)); + R1 = calloc(DimTotal, sizeof(float)); + R2 = calloc(DimTotal, sizeof(float)); /* begin iterations */ for(ll=0; ll -#include - -/* CUDA implementation of FGP-TV [1] denoising/regularization model (2D/3D case) - * - * Input Parameters: - * 1. Noisy image/volume - * 2. lambdaPar - regularization parameter - * 3. Number of iterations - * 4. eplsilon: tolerance constant - * 5. TV-type: methodTV - 'iso' (0) or 'l1' (1) - * 6. nonneg: 'nonnegativity (0 is OFF by default) - * 7. print information: 0 (off) or 1 (on) - * - * Output: - * [1] Filtered/regularized image - * - * This function is based on the Matlab's code and paper by - * [1] Amir Beck and Marc Teboulle, "Fast Gradient-Based Algorithms for Constrained Total Variation Image Denoising and Deblurring Problems" - */ - -// This will output the proper CUDA error strings in the event that a CUDA host call returns an error -#define checkCudaErrors(err) __checkCudaErrors (err, __FILE__, __LINE__) - -inline void __checkCudaErrors(cudaError err, const char *file, const int line) -{ - if (cudaSuccess != err) - { - fprintf(stderr, "%s(%i) : CUDA Runtime API error %d: %s.\n", - file, line, (int)err, cudaGetErrorString(err)); - exit(EXIT_FAILURE); - } -} - -#define BLKXSIZE2D 16 -#define BLKYSIZE2D 16 - -#define BLKXSIZE 8 -#define BLKYSIZE 8 -#define BLKZSIZE 8 - -#define idivup(a, b) ( ((a)%(b) != 0) ? (a)/(b)+1 : (a)/(b) ) -struct square { __host__ __device__ float operator()(float x) { return x * x; } }; - -/************************************************/ -/*****************2D modules*********************/ -/************************************************/ -__global__ void Obj_func2D_kernel(float *Ad, float *D, float *R1, float *R2, int N, int M, int ImSize, float lambda) -{ - - float val1,val2; - - //calculate each thread global index - const int xIndex=blockIdx.x*blockDim.x+threadIdx.x; - const int yIndex=blockIdx.y*blockDim.y+threadIdx.y; - - int index = xIndex + N*yIndex; - - if ((xIndex < N) && (yIndex < M)) { - if (xIndex <= 0) {val1 = 0.0f;} else {val1 = R1[(xIndex-1) + N*yIndex];} - if (yIndex <= 0) {val2 = 0.0f;} else {val2 = R2[xIndex + N*(yIndex-1)];} - //Write final result to global memory - D[index] = Ad[index] - lambda*(R1[index] + R2[index] - val1 - val2); - } - return; -} - -__global__ void Grad_func2D_kernel(float *P1, float *P2, float *D, float *R1, float *R2, int N, int M, int ImSize, float multip) -{ - - float val1,val2; - - //calculate each thread global index - const int xIndex=blockIdx.x*blockDim.x+threadIdx.x; - const int yIndex=blockIdx.y*blockDim.y+threadIdx.y; - - int index = xIndex + N*yIndex; - - if ((xIndex < N) && (yIndex < M)) { - - /* boundary conditions */ - if (xIndex >= N-1) val1 = 0.0f; else val1 = D[index] - D[(xIndex+1) + N*yIndex]; - if (yIndex >= M-1) val2 = 0.0f; else val2 = D[index] - D[(xIndex) + N*(yIndex + 1)]; - - //Write final result to global memory - P1[index] = R1[index] + multip*val1; - P2[index] = R2[index] + multip*val2; - } - return; -} - -__global__ void Proj_func2D_iso_kernel(float *P1, float *P2, int N, int M, int ImSize) -{ - - float denom; - //calculate each thread global index - const int xIndex=blockIdx.x*blockDim.x+threadIdx.x; - const int yIndex=blockIdx.y*blockDim.y+threadIdx.y; - - int index = xIndex + N*yIndex; - - if ((xIndex < N) && (yIndex < M)) { - denom = pow(P1[index],2) + pow(P2[index],2); - if (denom > 1.0f) { - P1[index] = P1[index]/sqrt(denom); - P2[index] = P2[index]/sqrt(denom); - } - } - return; -} -__global__ void Proj_func2D_aniso_kernel(float *P1, float *P2, int N, int M, int ImSize) -{ - - float val1, val2; - //calculate each thread global index - const int xIndex=blockIdx.x*blockDim.x+threadIdx.x; - const int yIndex=blockIdx.y*blockDim.y+threadIdx.y; - - int index = xIndex + N*yIndex; - - if ((xIndex < N) && (yIndex < M)) { - val1 = abs(P1[index]); - val2 = abs(P2[index]); - if (val1 < 1.0f) {val1 = 1.0f;} - if (val2 < 1.0f) {val2 = 1.0f;} - P1[index] = P1[index]/val1; - P2[index] = P2[index]/val2; - } - return; -} -__global__ void Rupd_func2D_kernel(float *P1, float *P1_old, float *P2, float *P2_old, float *R1, float *R2, float tkp1, float tk, float multip2, int N, int M, int ImSize) -{ - //calculate each thread global index - const int xIndex=blockIdx.x*blockDim.x+threadIdx.x; - const int yIndex=blockIdx.y*blockDim.y+threadIdx.y; - - int index = xIndex + N*yIndex; - - if ((xIndex < N) && (yIndex < M)) { - R1[index] = P1[index] + multip2*(P1[index] - P1_old[index]); - R2[index] = P2[index] + multip2*(P2[index] - P2_old[index]); - } - return; -} -__global__ void nonneg2D_kernel(float* Output, int N, int M, int num_total) -{ - int xIndex = blockDim.x * blockIdx.x + threadIdx.x; - int yIndex = blockDim.y * blockIdx.y + threadIdx.y; - - int index = xIndex + N*yIndex; - - if (index < num_total) { - if (Output[index] < 0.0f) Output[index] = 0.0f; - } -} -__global__ void copy_kernel2D(float *Input, float* Output, int N, int M, int num_total) -{ - int xIndex = blockDim.x * blockIdx.x + threadIdx.x; - int yIndex = blockDim.y * blockIdx.y + threadIdx.y; - - int index = xIndex + N*yIndex; - - if (index < num_total) { - Output[index] = Input[index]; - } -} -__global__ void ResidCalc2D_kernel(float *Input1, float *Input2, float* Output, int N, int M, int num_total) -{ - int xIndex = blockDim.x * blockIdx.x + threadIdx.x; - int yIndex = blockDim.y * blockIdx.y + threadIdx.y; - - int index = xIndex + N*yIndex; - - if (index < num_total) { - Output[index] = Input1[index] - Input2[index]; - } -} -/************************************************/ -/*****************3D modules*********************/ -/************************************************/ -__global__ void Obj_func3D_kernel(float *Ad, float *D, float *R1, float *R2, float *R3, int N, int M, int Z, int ImSize, float lambda) -{ - - float val1,val2,val3; - - //calculate each thread global index - int i = blockDim.x * blockIdx.x + threadIdx.x; - int j = blockDim.y * blockIdx.y + threadIdx.y; - int k = blockDim.z * blockIdx.z + threadIdx.z; - - int index = (N*M)*k + i + N*j; - - if ((i < N) && (j < M) && (k < Z)) { - if (i <= 0) {val1 = 0.0f;} else {val1 = R1[(N*M)*(k) + (i-1) + N*j];} - if (j <= 0) {val2 = 0.0f;} else {val2 = R2[(N*M)*(k) + i + N*(j-1)];} - if (k <= 0) {val3 = 0.0f;} else {val3 = R3[(N*M)*(k-1) + i + N*j];} - //Write final result to global memory - D[index] = Ad[index] - lambda*(R1[index] + R2[index] + R3[index] - val1 - val2 - val3); - } - return; -} - -__global__ void Grad_func3D_kernel(float *P1, float *P2, float *P3, float *D, float *R1, float *R2, float *R3, int N, int M, int Z, int ImSize, float multip) -{ - - float val1,val2,val3; - - //calculate each thread global index - int i = blockDim.x * blockIdx.x + threadIdx.x; - int j = blockDim.y * blockIdx.y + threadIdx.y; - int k = blockDim.z * blockIdx.z + threadIdx.z; - - int index = (N*M)*k + i + N*j; - - if ((i < N) && (j < M) && (k < Z)) { - /* boundary conditions */ - if (i >= N-1) val1 = 0.0f; else val1 = D[index] - D[(N*M)*(k) + (i+1) + N*j]; - if (j >= M-1) val2 = 0.0f; else val2 = D[index] - D[(N*M)*(k) + i + N*(j+1)]; - if (k >= Z-1) val3 = 0.0f; else val3 = D[index] - D[(N*M)*(k+1) + i + N*j]; - - //Write final result to global memory - P1[index] = R1[index] + multip*val1; - P2[index] = R2[index] + multip*val2; - P3[index] = R3[index] + multip*val3; - } - return; -} - -__global__ void Proj_func3D_iso_kernel(float *P1, float *P2, float *P3, int N, int M, int Z, int ImSize) -{ - - float denom,sq_denom; - //calculate each thread global index - int i = blockDim.x * blockIdx.x + threadIdx.x; - int j = blockDim.y * blockIdx.y + threadIdx.y; - int k = blockDim.z * blockIdx.z + threadIdx.z; - - int index = (N*M)*k + i + N*j; - - if ((i < N) && (j < M) && (k < Z)) { - denom = pow(P1[index],2) + pow(P2[index],2) + pow(P3[index],2); - - if (denom > 1.0f) { - sq_denom = 1.0f/sqrt(denom); - P1[index] = P1[index]*sq_denom; - P2[index] = P2[index]*sq_denom; - P3[index] = P3[index]*sq_denom; - } - } - return; -} - -__global__ void Proj_func3D_aniso_kernel(float *P1, float *P2, float *P3, int N, int M, int Z, int ImSize) -{ - - float val1, val2, val3; - //calculate each thread global index - int i = blockDim.x * blockIdx.x + threadIdx.x; - int j = blockDim.y * blockIdx.y + threadIdx.y; - int k = blockDim.z * blockIdx.z + threadIdx.z; - - int index = (N*M)*k + i + N*j; - - if ((i < N) && (j < M) && (k < Z)) { - val1 = abs(P1[index]); - val2 = abs(P2[index]); - val3 = abs(P3[index]); - if (val1 < 1.0f) {val1 = 1.0f;} - if (val2 < 1.0f) {val2 = 1.0f;} - if (val3 < 1.0f) {val3 = 1.0f;} - P1[index] = P1[index]/val1; - P2[index] = P2[index]/val2; - P3[index] = P3[index]/val3; - } - return; -} - - -__global__ void Rupd_func3D_kernel(float *P1, float *P1_old, float *P2, float *P2_old, float *P3, float *P3_old, float *R1, float *R2, float *R3, float tkp1, float tk, float multip2, int N, int M, int Z, int ImSize) -{ - //calculate each thread global index - int i = blockDim.x * blockIdx.x + threadIdx.x; - int j = blockDim.y * blockIdx.y + threadIdx.y; - int k = blockDim.z * blockIdx.z + threadIdx.z; - - int index = (N*M)*k + i + N*j; - - if ((i < N) && (j < M) && (k < Z)) { - R1[index] = P1[index] + multip2*(P1[index] - P1_old[index]); - R2[index] = P2[index] + multip2*(P2[index] - P2_old[index]); - R3[index] = P3[index] + multip2*(P3[index] - P3_old[index]); - } - return; -} - -__global__ void nonneg3D_kernel(float* Output, int N, int M, int Z, int num_total) -{ - int i = blockDim.x * blockIdx.x + threadIdx.x; - int j = blockDim.y * blockIdx.y + threadIdx.y; - int k = blockDim.z * blockIdx.z + threadIdx.z; - - int index = (N*M)*k + i + N*j; - - if (index < num_total) { - if (Output[index] < 0.0f) Output[index] = 0.0f; - } -} - -__global__ void copy_kernel3D(float *Input, float* Output, int N, int M, int Z, int num_total) -{ - int i = blockDim.x * blockIdx.x + threadIdx.x; - int j = blockDim.y * blockIdx.y + threadIdx.y; - int k = blockDim.z * blockIdx.z + threadIdx.z; - - int index = (N*M)*k + i + N*j; - - if (index < num_total) { - Output[index] = Input[index]; - } -} -/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/ - -////////////MAIN HOST FUNCTION /////////////// -extern "C" void TV_FGP_GPU_main(float *Input, float *Output, float lambdaPar, int iter, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ) -{ - int deviceCount = -1; // number of devices - cudaGetDeviceCount(&deviceCount); - if (deviceCount == 0) { - fprintf(stderr, "No CUDA devices found\n"); - return; - } - - int count = 0, i; - float re, multip,multip2; - float tk = 1.0f; - float tkp1=1.0f; - - if (dimZ <= 1) { - /*2D verson*/ - int ImSize = dimX*dimY; - float *d_input, *d_update=NULL, *d_update_prev=NULL, *P1=NULL, *P2=NULL, *P1_prev=NULL, *P2_prev=NULL, *R1=NULL, *R2=NULL; - - dim3 dimBlock(BLKXSIZE2D,BLKYSIZE2D); - dim3 dimGrid(idivup(dimX,BLKXSIZE2D), idivup(dimY,BLKYSIZE2D)); - - /*allocate space for images on device*/ - checkCudaErrors( cudaMalloc((void**)&d_input,ImSize*sizeof(float)) ); - checkCudaErrors( cudaMalloc((void**)&d_update,ImSize*sizeof(float)) ); - if (epsil != 0.0f) checkCudaErrors( cudaMalloc((void**)&d_update_prev,ImSize*sizeof(float)) ); - checkCudaErrors( cudaMalloc((void**)&P1,ImSize*sizeof(float)) ); - checkCudaErrors( cudaMalloc((void**)&P2,ImSize*sizeof(float)) ); - checkCudaErrors( cudaMalloc((void**)&P1_prev,ImSize*sizeof(float)) ); - checkCudaErrors( cudaMalloc((void**)&P2_prev,ImSize*sizeof(float)) ); - checkCudaErrors( cudaMalloc((void**)&R1,ImSize*sizeof(float)) ); - checkCudaErrors( cudaMalloc((void**)&R2,ImSize*sizeof(float)) ); - - checkCudaErrors( cudaMemcpy(d_input,Input,ImSize*sizeof(float),cudaMemcpyHostToDevice)); - cudaMemset(P1, 0, ImSize*sizeof(float)); - cudaMemset(P2, 0, ImSize*sizeof(float)); - cudaMemset(P1_prev, 0, ImSize*sizeof(float)); - cudaMemset(P2_prev, 0, ImSize*sizeof(float)); - cudaMemset(R1, 0, ImSize*sizeof(float)); - cudaMemset(R2, 0, ImSize*sizeof(float)); - - /********************** Run CUDA 2D kernel here ********************/ - multip = (1.0f/(8.0f*lambdaPar)); - - /* The main kernel */ - for (i = 0; i < iter; i++) { - - /* computing the gradient of the objective function */ - Obj_func2D_kernel<<>>(d_input, d_update, R1, R2, dimX, dimY, ImSize, lambdaPar); - checkCudaErrors( cudaDeviceSynchronize() ); - checkCudaErrors(cudaPeekAtLastError() ); - - if (nonneg != 0) { - nonneg2D_kernel<<>>(d_update, dimX, dimY, ImSize); - checkCudaErrors( cudaDeviceSynchronize() ); - checkCudaErrors(cudaPeekAtLastError() ); } - - /*Taking a step towards minus of the gradient*/ - Grad_func2D_kernel<<>>(P1, P2, d_update, R1, R2, dimX, dimY, ImSize, multip); - checkCudaErrors( cudaDeviceSynchronize() ); - checkCudaErrors(cudaPeekAtLastError() ); - - /* projection step */ - if (methodTV == 0) Proj_func2D_iso_kernel<<>>(P1, P2, dimX, dimY, ImSize); /*isotropic TV*/ - else Proj_func2D_aniso_kernel<<>>(P1, P2, dimX, dimY, ImSize); /*anisotropic TV*/ - checkCudaErrors( cudaDeviceSynchronize() ); - checkCudaErrors(cudaPeekAtLastError() ); - - tkp1 = (1.0f + sqrt(1.0f + 4.0f*tk*tk))*0.5f; - multip2 = ((tk-1.0f)/tkp1); - - Rupd_func2D_kernel<<>>(P1, P1_prev, P2, P2_prev, R1, R2, tkp1, tk, multip2, dimX, dimY, ImSize); - checkCudaErrors( cudaDeviceSynchronize() ); - checkCudaErrors(cudaPeekAtLastError() ); - - if (epsil != 0.0f) { - /* calculate norm - stopping rules using the Thrust library */ - ResidCalc2D_kernel<<>>(d_update, d_update_prev, P1_prev, dimX, dimY, ImSize); - checkCudaErrors( cudaDeviceSynchronize() ); - checkCudaErrors(cudaPeekAtLastError() ); - - thrust::device_vector d_vec(P1_prev, P1_prev + ImSize); - float reduction = sqrt(thrust::transform_reduce(d_vec.begin(), d_vec.end(), square(), 0.0f, thrust::plus())); - thrust::device_vector d_vec2(d_update, d_update + ImSize); - float reduction2 = sqrt(thrust::transform_reduce(d_vec2.begin(), d_vec2.end(), square(), 0.0f, thrust::plus())); - - re = (reduction/reduction2); - if (re < epsil) count++; - if (count > 4) break; - - copy_kernel2D<<>>(d_update, d_update_prev, dimX, dimY, ImSize); - checkCudaErrors( cudaDeviceSynchronize() ); - checkCudaErrors(cudaPeekAtLastError() ); - } - - copy_kernel2D<<>>(P1, P1_prev, dimX, dimY, ImSize); - checkCudaErrors( cudaDeviceSynchronize() ); - checkCudaErrors(cudaPeekAtLastError() ); - - copy_kernel2D<<>>(P2, P2_prev, dimX, dimY, ImSize); - checkCudaErrors( cudaDeviceSynchronize() ); - checkCudaErrors(cudaPeekAtLastError() ); - - tk = tkp1; - } - if (printM == 1) printf("FGP-TV iterations stopped at iteration %i \n", i); - /***************************************************************/ - //copy result matrix from device to host memory - cudaMemcpy(Output,d_update,ImSize*sizeof(float),cudaMemcpyDeviceToHost); - - cudaFree(d_input); - cudaFree(d_update); - if (epsil != 0.0f) cudaFree(d_update_prev); - cudaFree(P1); - cudaFree(P2); - cudaFree(P1_prev); - cudaFree(P2_prev); - cudaFree(R1); - cudaFree(R2); - } - else { - /*3D verson*/ - int ImSize = dimX*dimY*dimZ; - float *d_input, *d_update=NULL, *P1=NULL, *P2=NULL, *P3=NULL, *P1_prev=NULL, *P2_prev=NULL, *P3_prev=NULL, *R1=NULL, *R2=NULL, *R3=NULL; - - dim3 dimBlock(BLKXSIZE,BLKYSIZE,BLKZSIZE); - dim3 dimGrid(idivup(dimX,BLKXSIZE), idivup(dimY,BLKYSIZE),idivup(dimZ,BLKZSIZE)); - - /*allocate space for images on device*/ - checkCudaErrors( cudaMalloc((void**)&d_input,ImSize*sizeof(float)) ); - checkCudaErrors( cudaMalloc((void**)&d_update,ImSize*sizeof(float)) ); - checkCudaErrors( cudaMalloc((void**)&P1,ImSize*sizeof(float)) ); - checkCudaErrors( cudaMalloc((void**)&P2,ImSize*sizeof(float)) ); - checkCudaErrors( cudaMalloc((void**)&P3,ImSize*sizeof(float)) ); - checkCudaErrors( cudaMalloc((void**)&P1_prev,ImSize*sizeof(float)) ); - checkCudaErrors( cudaMalloc((void**)&P2_prev,ImSize*sizeof(float)) ); - checkCudaErrors( cudaMalloc((void**)&P3_prev,ImSize*sizeof(float)) ); - checkCudaErrors( cudaMalloc((void**)&R1,ImSize*sizeof(float)) ); - checkCudaErrors( cudaMalloc((void**)&R2,ImSize*sizeof(float)) ); - checkCudaErrors( cudaMalloc((void**)&R3,ImSize*sizeof(float)) ); - - checkCudaErrors( cudaMemcpy(d_input,Input,ImSize*sizeof(float),cudaMemcpyHostToDevice)); - cudaMemset(P1, 0, ImSize*sizeof(float)); - cudaMemset(P2, 0, ImSize*sizeof(float)); - cudaMemset(P3, 0, ImSize*sizeof(float)); - cudaMemset(P1_prev, 0, ImSize*sizeof(float)); - cudaMemset(P2_prev, 0, ImSize*sizeof(float)); - cudaMemset(P3_prev, 0, ImSize*sizeof(float)); - cudaMemset(R1, 0, ImSize*sizeof(float)); - cudaMemset(R2, 0, ImSize*sizeof(float)); - cudaMemset(R3, 0, ImSize*sizeof(float)); - /********************** Run CUDA 3D kernel here ********************/ - multip = (1.0f/(8.0f*lambdaPar)); - - /* The main kernel */ - for (i = 0; i < iter; i++) { - - /* computing the gradient of the objective function */ - Obj_func3D_kernel<<>>(d_input, d_update, R1, R2, R3, dimX, dimY, dimZ, ImSize, lambdaPar); - checkCudaErrors( cudaDeviceSynchronize() ); - checkCudaErrors(cudaPeekAtLastError() ); - - if (nonneg != 0) { - nonneg3D_kernel<<>>(d_update, dimX, dimY, dimZ, ImSize); - checkCudaErrors( cudaDeviceSynchronize() ); - checkCudaErrors(cudaPeekAtLastError() ); } - - /*Taking a step towards minus of the gradient*/ - Grad_func3D_kernel<<>>(P1, P2, P3, d_update, R1, R2, R3, dimX, dimY, dimZ, ImSize, multip); - checkCudaErrors( cudaDeviceSynchronize() ); - checkCudaErrors(cudaPeekAtLastError() ); - - /* projection step */ - if (methodTV == 0) Proj_func3D_iso_kernel<<>>(P1, P2, P3, dimX, dimY, dimZ, ImSize); /* isotropic kernel */ - else Proj_func3D_aniso_kernel<<>>(P1, P2, P3, dimX, dimY, dimZ, ImSize); /* anisotropic kernel */ - checkCudaErrors( cudaDeviceSynchronize() ); - checkCudaErrors(cudaPeekAtLastError() ); - - tkp1 = (1.0f + sqrt(1.0f + 4.0f*tk*tk))*0.5f; - multip2 = ((tk-1.0f)/tkp1); - - Rupd_func3D_kernel<<>>(P1, P1_prev, P2, P2_prev, P3, P3_prev, R1, R2, R3, tkp1, tk, multip2, dimX, dimY, dimZ, ImSize); - checkCudaErrors( cudaDeviceSynchronize() ); - checkCudaErrors(cudaPeekAtLastError() ); - - copy_kernel3D<<>>(P1, P1_prev, dimX, dimY, dimZ, ImSize); - checkCudaErrors( cudaDeviceSynchronize() ); - checkCudaErrors(cudaPeekAtLastError() ); - - copy_kernel3D<<>>(P2, P2_prev, dimX, dimY, dimZ, ImSize); - checkCudaErrors( cudaDeviceSynchronize() ); - checkCudaErrors(cudaPeekAtLastError() ); - - copy_kernel3D<<>>(P3, P3_prev, dimX, dimY, dimZ, ImSize); - checkCudaErrors( cudaDeviceSynchronize() ); - checkCudaErrors(cudaPeekAtLastError() ); - - tk = tkp1; - } - if (printM == 1) printf("FGP-TV iterations stopped at iteration %i \n", i); - /***************************************************************/ - //copy result matrix from device to host memory - cudaMemcpy(Output,d_update,ImSize*sizeof(float),cudaMemcpyDeviceToHost); - - cudaFree(d_input); - cudaFree(d_update); - cudaFree(P1); - cudaFree(P2); - cudaFree(P3); - cudaFree(P1_prev); - cudaFree(P2_prev); - cudaFree(P3_prev); - cudaFree(R1); - cudaFree(R2); - cudaFree(R3); - } - cudaDeviceReset(); -} diff --git a/Core/regularizers_GPU/TV_FGP/TV_FGP_GPU_core.h b/Core/regularizers_GPU/TV_FGP/TV_FGP_GPU_core.h deleted file mode 100755 index 107d243..0000000 --- a/Core/regularizers_GPU/TV_FGP/TV_FGP_GPU_core.h +++ /dev/null @@ -1,10 +0,0 @@ -#include -#include -#include - -#ifndef _TV_FGP_GPU_ -#define _TV_FGP_GPU_ - -extern "C" void TV_FGP_GPU_main(float *Input, float *Output, float lambdaPar, int iter, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ); - -#endif diff --git a/Core/regularizers_GPU/TV_FGP_GPU_core.cu b/Core/regularizers_GPU/TV_FGP_GPU_core.cu new file mode 100755 index 0000000..61097fb --- /dev/null +++ b/Core/regularizers_GPU/TV_FGP_GPU_core.cu @@ -0,0 +1,561 @@ + /* +This work is part of the Core Imaging Library developed by +Visual Analytics and Imaging System Group of the Science Technology +Facilities Council, STFC + +Copyright 2017 Daniil Kazantsev +Copyright 2017 Srikanth Nagella, Edoardo Pasca + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at +http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +#include "TV_FGP_GPU_core.h" +#include +#include + +/* CUDA implementation of FGP-TV [1] denoising/regularization model (2D/3D case) + * + * Input Parameters: + * 1. Noisy image/volume + * 2. lambdaPar - regularization parameter + * 3. Number of iterations + * 4. eplsilon: tolerance constant + * 5. TV-type: methodTV - 'iso' (0) or 'l1' (1) + * 6. nonneg: 'nonnegativity (0 is OFF by default) + * 7. print information: 0 (off) or 1 (on) + * + * Output: + * [1] Filtered/regularized image + * + * This function is based on the Matlab's code and paper by + * [1] Amir Beck and Marc Teboulle, "Fast Gradient-Based Algorithms for Constrained Total Variation Image Denoising and Deblurring Problems" + */ + +// This will output the proper CUDA error strings in the event that a CUDA host call returns an error +#define checkCudaErrors(err) __checkCudaErrors (err, __FILE__, __LINE__) + +inline void __checkCudaErrors(cudaError err, const char *file, const int line) +{ + if (cudaSuccess != err) + { + fprintf(stderr, "%s(%i) : CUDA Runtime API error %d: %s.\n", + file, line, (int)err, cudaGetErrorString(err)); + exit(EXIT_FAILURE); + } +} + +#define BLKXSIZE2D 16 +#define BLKYSIZE2D 16 + +#define BLKXSIZE 8 +#define BLKYSIZE 8 +#define BLKZSIZE 8 + +#define idivup(a, b) ( ((a)%(b) != 0) ? (a)/(b)+1 : (a)/(b) ) +struct square { __host__ __device__ float operator()(float x) { return x * x; } }; + +/************************************************/ +/*****************2D modules*********************/ +/************************************************/ +__global__ void Obj_func2D_kernel(float *Ad, float *D, float *R1, float *R2, int N, int M, int ImSize, float lambda) +{ + + float val1,val2; + + //calculate each thread global index + const int xIndex=blockIdx.x*blockDim.x+threadIdx.x; + const int yIndex=blockIdx.y*blockDim.y+threadIdx.y; + + int index = xIndex + N*yIndex; + + if ((xIndex < N) && (yIndex < M)) { + if (xIndex <= 0) {val1 = 0.0f;} else {val1 = R1[(xIndex-1) + N*yIndex];} + if (yIndex <= 0) {val2 = 0.0f;} else {val2 = R2[xIndex + N*(yIndex-1)];} + //Write final result to global memory + D[index] = Ad[index] - lambda*(R1[index] + R2[index] - val1 - val2); + } + return; +} + +__global__ void Grad_func2D_kernel(float *P1, float *P2, float *D, float *R1, float *R2, int N, int M, int ImSize, float multip) +{ + + float val1,val2; + + //calculate each thread global index + const int xIndex=blockIdx.x*blockDim.x+threadIdx.x; + const int yIndex=blockIdx.y*blockDim.y+threadIdx.y; + + int index = xIndex + N*yIndex; + + if ((xIndex < N) && (yIndex < M)) { + + /* boundary conditions */ + if (xIndex >= N-1) val1 = 0.0f; else val1 = D[index] - D[(xIndex+1) + N*yIndex]; + if (yIndex >= M-1) val2 = 0.0f; else val2 = D[index] - D[(xIndex) + N*(yIndex + 1)]; + + //Write final result to global memory + P1[index] = R1[index] + multip*val1; + P2[index] = R2[index] + multip*val2; + } + return; +} + +__global__ void Proj_func2D_iso_kernel(float *P1, float *P2, int N, int M, int ImSize) +{ + + float denom; + //calculate each thread global index + const int xIndex=blockIdx.x*blockDim.x+threadIdx.x; + const int yIndex=blockIdx.y*blockDim.y+threadIdx.y; + + int index = xIndex + N*yIndex; + + if ((xIndex < N) && (yIndex < M)) { + denom = pow(P1[index],2) + pow(P2[index],2); + if (denom > 1.0f) { + P1[index] = P1[index]/sqrt(denom); + P2[index] = P2[index]/sqrt(denom); + } + } + return; +} +__global__ void Proj_func2D_aniso_kernel(float *P1, float *P2, int N, int M, int ImSize) +{ + + float val1, val2; + //calculate each thread global index + const int xIndex=blockIdx.x*blockDim.x+threadIdx.x; + const int yIndex=blockIdx.y*blockDim.y+threadIdx.y; + + int index = xIndex + N*yIndex; + + if ((xIndex < N) && (yIndex < M)) { + val1 = abs(P1[index]); + val2 = abs(P2[index]); + if (val1 < 1.0f) {val1 = 1.0f;} + if (val2 < 1.0f) {val2 = 1.0f;} + P1[index] = P1[index]/val1; + P2[index] = P2[index]/val2; + } + return; +} +__global__ void Rupd_func2D_kernel(float *P1, float *P1_old, float *P2, float *P2_old, float *R1, float *R2, float tkp1, float tk, float multip2, int N, int M, int ImSize) +{ + //calculate each thread global index + const int xIndex=blockIdx.x*blockDim.x+threadIdx.x; + const int yIndex=blockIdx.y*blockDim.y+threadIdx.y; + + int index = xIndex + N*yIndex; + + if ((xIndex < N) && (yIndex < M)) { + R1[index] = P1[index] + multip2*(P1[index] - P1_old[index]); + R2[index] = P2[index] + multip2*(P2[index] - P2_old[index]); + } + return; +} +__global__ void nonneg2D_kernel(float* Output, int N, int M, int num_total) +{ + int xIndex = blockDim.x * blockIdx.x + threadIdx.x; + int yIndex = blockDim.y * blockIdx.y + threadIdx.y; + + int index = xIndex + N*yIndex; + + if (index < num_total) { + if (Output[index] < 0.0f) Output[index] = 0.0f; + } +} +__global__ void copy_kernel2D(float *Input, float* Output, int N, int M, int num_total) +{ + int xIndex = blockDim.x * blockIdx.x + threadIdx.x; + int yIndex = blockDim.y * blockIdx.y + threadIdx.y; + + int index = xIndex + N*yIndex; + + if (index < num_total) { + Output[index] = Input[index]; + } +} +__global__ void ResidCalc2D_kernel(float *Input1, float *Input2, float* Output, int N, int M, int num_total) +{ + int xIndex = blockDim.x * blockIdx.x + threadIdx.x; + int yIndex = blockDim.y * blockIdx.y + threadIdx.y; + + int index = xIndex + N*yIndex; + + if (index < num_total) { + Output[index] = Input1[index] - Input2[index]; + } +} +/************************************************/ +/*****************3D modules*********************/ +/************************************************/ +__global__ void Obj_func3D_kernel(float *Ad, float *D, float *R1, float *R2, float *R3, int N, int M, int Z, int ImSize, float lambda) +{ + + float val1,val2,val3; + + //calculate each thread global index + int i = blockDim.x * blockIdx.x + threadIdx.x; + int j = blockDim.y * blockIdx.y + threadIdx.y; + int k = blockDim.z * blockIdx.z + threadIdx.z; + + int index = (N*M)*k + i + N*j; + + if ((i < N) && (j < M) && (k < Z)) { + if (i <= 0) {val1 = 0.0f;} else {val1 = R1[(N*M)*(k) + (i-1) + N*j];} + if (j <= 0) {val2 = 0.0f;} else {val2 = R2[(N*M)*(k) + i + N*(j-1)];} + if (k <= 0) {val3 = 0.0f;} else {val3 = R3[(N*M)*(k-1) + i + N*j];} + //Write final result to global memory + D[index] = Ad[index] - lambda*(R1[index] + R2[index] + R3[index] - val1 - val2 - val3); + } + return; +} + +__global__ void Grad_func3D_kernel(float *P1, float *P2, float *P3, float *D, float *R1, float *R2, float *R3, int N, int M, int Z, int ImSize, float multip) +{ + + float val1,val2,val3; + + //calculate each thread global index + int i = blockDim.x * blockIdx.x + threadIdx.x; + int j = blockDim.y * blockIdx.y + threadIdx.y; + int k = blockDim.z * blockIdx.z + threadIdx.z; + + int index = (N*M)*k + i + N*j; + + if ((i < N) && (j < M) && (k < Z)) { + /* boundary conditions */ + if (i >= N-1) val1 = 0.0f; else val1 = D[index] - D[(N*M)*(k) + (i+1) + N*j]; + if (j >= M-1) val2 = 0.0f; else val2 = D[index] - D[(N*M)*(k) + i + N*(j+1)]; + if (k >= Z-1) val3 = 0.0f; else val3 = D[index] - D[(N*M)*(k+1) + i + N*j]; + + //Write final result to global memory + P1[index] = R1[index] + multip*val1; + P2[index] = R2[index] + multip*val2; + P3[index] = R3[index] + multip*val3; + } + return; +} + +__global__ void Proj_func3D_iso_kernel(float *P1, float *P2, float *P3, int N, int M, int Z, int ImSize) +{ + + float denom,sq_denom; + //calculate each thread global index + int i = blockDim.x * blockIdx.x + threadIdx.x; + int j = blockDim.y * blockIdx.y + threadIdx.y; + int k = blockDim.z * blockIdx.z + threadIdx.z; + + int index = (N*M)*k + i + N*j; + + if ((i < N) && (j < M) && (k < Z)) { + denom = pow(P1[index],2) + pow(P2[index],2) + pow(P3[index],2); + + if (denom > 1.0f) { + sq_denom = 1.0f/sqrt(denom); + P1[index] = P1[index]*sq_denom; + P2[index] = P2[index]*sq_denom; + P3[index] = P3[index]*sq_denom; + } + } + return; +} + +__global__ void Proj_func3D_aniso_kernel(float *P1, float *P2, float *P3, int N, int M, int Z, int ImSize) +{ + + float val1, val2, val3; + //calculate each thread global index + int i = blockDim.x * blockIdx.x + threadIdx.x; + int j = blockDim.y * blockIdx.y + threadIdx.y; + int k = blockDim.z * blockIdx.z + threadIdx.z; + + int index = (N*M)*k + i + N*j; + + if ((i < N) && (j < M) && (k < Z)) { + val1 = abs(P1[index]); + val2 = abs(P2[index]); + val3 = abs(P3[index]); + if (val1 < 1.0f) {val1 = 1.0f;} + if (val2 < 1.0f) {val2 = 1.0f;} + if (val3 < 1.0f) {val3 = 1.0f;} + P1[index] = P1[index]/val1; + P2[index] = P2[index]/val2; + P3[index] = P3[index]/val3; + } + return; +} + + +__global__ void Rupd_func3D_kernel(float *P1, float *P1_old, float *P2, float *P2_old, float *P3, float *P3_old, float *R1, float *R2, float *R3, float tkp1, float tk, float multip2, int N, int M, int Z, int ImSize) +{ + //calculate each thread global index + int i = blockDim.x * blockIdx.x + threadIdx.x; + int j = blockDim.y * blockIdx.y + threadIdx.y; + int k = blockDim.z * blockIdx.z + threadIdx.z; + + int index = (N*M)*k + i + N*j; + + if ((i < N) && (j < M) && (k < Z)) { + R1[index] = P1[index] + multip2*(P1[index] - P1_old[index]); + R2[index] = P2[index] + multip2*(P2[index] - P2_old[index]); + R3[index] = P3[index] + multip2*(P3[index] - P3_old[index]); + } + return; +} + +__global__ void nonneg3D_kernel(float* Output, int N, int M, int Z, int num_total) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + int j = blockDim.y * blockIdx.y + threadIdx.y; + int k = blockDim.z * blockIdx.z + threadIdx.z; + + int index = (N*M)*k + i + N*j; + + if (index < num_total) { + if (Output[index] < 0.0f) Output[index] = 0.0f; + } +} + +__global__ void copy_kernel3D(float *Input, float* Output, int N, int M, int Z, int num_total) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + int j = blockDim.y * blockIdx.y + threadIdx.y; + int k = blockDim.z * blockIdx.z + threadIdx.z; + + int index = (N*M)*k + i + N*j; + + if (index < num_total) { + Output[index] = Input[index]; + } +} +/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/ + +////////////MAIN HOST FUNCTION /////////////// +extern "C" void TV_FGP_GPU_main(float *Input, float *Output, float lambdaPar, int iter, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ) +{ + int deviceCount = -1; // number of devices + cudaGetDeviceCount(&deviceCount); + if (deviceCount == 0) { + fprintf(stderr, "No CUDA devices found\n"); + return; + } + + int count = 0, i; + float re, multip,multip2; + float tk = 1.0f; + float tkp1=1.0f; + + if (dimZ <= 1) { + /*2D verson*/ + int ImSize = dimX*dimY; + float *d_input, *d_update=NULL, *d_update_prev=NULL, *P1=NULL, *P2=NULL, *P1_prev=NULL, *P2_prev=NULL, *R1=NULL, *R2=NULL; + + dim3 dimBlock(BLKXSIZE2D,BLKYSIZE2D); + dim3 dimGrid(idivup(dimX,BLKXSIZE2D), idivup(dimY,BLKYSIZE2D)); + + /*allocate space for images on device*/ + checkCudaErrors( cudaMalloc((void**)&d_input,ImSize*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&d_update,ImSize*sizeof(float)) ); + if (epsil != 0.0f) checkCudaErrors( cudaMalloc((void**)&d_update_prev,ImSize*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&P1,ImSize*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&P2,ImSize*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&P1_prev,ImSize*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&P2_prev,ImSize*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&R1,ImSize*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&R2,ImSize*sizeof(float)) ); + + checkCudaErrors( cudaMemcpy(d_input,Input,ImSize*sizeof(float),cudaMemcpyHostToDevice)); + cudaMemset(P1, 0, ImSize*sizeof(float)); + cudaMemset(P2, 0, ImSize*sizeof(float)); + cudaMemset(P1_prev, 0, ImSize*sizeof(float)); + cudaMemset(P2_prev, 0, ImSize*sizeof(float)); + cudaMemset(R1, 0, ImSize*sizeof(float)); + cudaMemset(R2, 0, ImSize*sizeof(float)); + + /********************** Run CUDA 2D kernel here ********************/ + multip = (1.0f/(8.0f*lambdaPar)); + + /* The main kernel */ + for (i = 0; i < iter; i++) { + + /* computing the gradient of the objective function */ + Obj_func2D_kernel<<>>(d_input, d_update, R1, R2, dimX, dimY, ImSize, lambdaPar); + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); + + if (nonneg != 0) { + nonneg2D_kernel<<>>(d_update, dimX, dimY, ImSize); + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); } + + /*Taking a step towards minus of the gradient*/ + Grad_func2D_kernel<<>>(P1, P2, d_update, R1, R2, dimX, dimY, ImSize, multip); + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); + + /* projection step */ + if (methodTV == 0) Proj_func2D_iso_kernel<<>>(P1, P2, dimX, dimY, ImSize); /*isotropic TV*/ + else Proj_func2D_aniso_kernel<<>>(P1, P2, dimX, dimY, ImSize); /*anisotropic TV*/ + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); + + tkp1 = (1.0f + sqrt(1.0f + 4.0f*tk*tk))*0.5f; + multip2 = ((tk-1.0f)/tkp1); + + Rupd_func2D_kernel<<>>(P1, P1_prev, P2, P2_prev, R1, R2, tkp1, tk, multip2, dimX, dimY, ImSize); + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); + + if (epsil != 0.0f) { + /* calculate norm - stopping rules using the Thrust library */ + ResidCalc2D_kernel<<>>(d_update, d_update_prev, P1_prev, dimX, dimY, ImSize); + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); + + thrust::device_vector d_vec(P1_prev, P1_prev + ImSize); + float reduction = sqrt(thrust::transform_reduce(d_vec.begin(), d_vec.end(), square(), 0.0f, thrust::plus())); + thrust::device_vector d_vec2(d_update, d_update + ImSize); + float reduction2 = sqrt(thrust::transform_reduce(d_vec2.begin(), d_vec2.end(), square(), 0.0f, thrust::plus())); + + re = (reduction/reduction2); + if (re < epsil) count++; + if (count > 4) break; + + copy_kernel2D<<>>(d_update, d_update_prev, dimX, dimY, ImSize); + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); + } + + copy_kernel2D<<>>(P1, P1_prev, dimX, dimY, ImSize); + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); + + copy_kernel2D<<>>(P2, P2_prev, dimX, dimY, ImSize); + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); + + tk = tkp1; + } + if (printM == 1) printf("FGP-TV iterations stopped at iteration %i \n", i); + /***************************************************************/ + //copy result matrix from device to host memory + cudaMemcpy(Output,d_update,ImSize*sizeof(float),cudaMemcpyDeviceToHost); + + cudaFree(d_input); + cudaFree(d_update); + if (epsil != 0.0f) cudaFree(d_update_prev); + cudaFree(P1); + cudaFree(P2); + cudaFree(P1_prev); + cudaFree(P2_prev); + cudaFree(R1); + cudaFree(R2); + } + else { + /*3D verson*/ + int ImSize = dimX*dimY*dimZ; + float *d_input, *d_update=NULL, *P1=NULL, *P2=NULL, *P3=NULL, *P1_prev=NULL, *P2_prev=NULL, *P3_prev=NULL, *R1=NULL, *R2=NULL, *R3=NULL; + + dim3 dimBlock(BLKXSIZE,BLKYSIZE,BLKZSIZE); + dim3 dimGrid(idivup(dimX,BLKXSIZE), idivup(dimY,BLKYSIZE),idivup(dimZ,BLKZSIZE)); + + /*allocate space for images on device*/ + checkCudaErrors( cudaMalloc((void**)&d_input,ImSize*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&d_update,ImSize*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&P1,ImSize*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&P2,ImSize*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&P3,ImSize*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&P1_prev,ImSize*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&P2_prev,ImSize*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&P3_prev,ImSize*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&R1,ImSize*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&R2,ImSize*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&R3,ImSize*sizeof(float)) ); + + checkCudaErrors( cudaMemcpy(d_input,Input,ImSize*sizeof(float),cudaMemcpyHostToDevice)); + cudaMemset(P1, 0, ImSize*sizeof(float)); + cudaMemset(P2, 0, ImSize*sizeof(float)); + cudaMemset(P3, 0, ImSize*sizeof(float)); + cudaMemset(P1_prev, 0, ImSize*sizeof(float)); + cudaMemset(P2_prev, 0, ImSize*sizeof(float)); + cudaMemset(P3_prev, 0, ImSize*sizeof(float)); + cudaMemset(R1, 0, ImSize*sizeof(float)); + cudaMemset(R2, 0, ImSize*sizeof(float)); + cudaMemset(R3, 0, ImSize*sizeof(float)); + /********************** Run CUDA 3D kernel here ********************/ + multip = (1.0f/(8.0f*lambdaPar)); + + /* The main kernel */ + for (i = 0; i < iter; i++) { + + /* computing the gradient of the objective function */ + Obj_func3D_kernel<<>>(d_input, d_update, R1, R2, R3, dimX, dimY, dimZ, ImSize, lambdaPar); + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); + + if (nonneg != 0) { + nonneg3D_kernel<<>>(d_update, dimX, dimY, dimZ, ImSize); + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); } + + /*Taking a step towards minus of the gradient*/ + Grad_func3D_kernel<<>>(P1, P2, P3, d_update, R1, R2, R3, dimX, dimY, dimZ, ImSize, multip); + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); + + /* projection step */ + if (methodTV == 0) Proj_func3D_iso_kernel<<>>(P1, P2, P3, dimX, dimY, dimZ, ImSize); /* isotropic kernel */ + else Proj_func3D_aniso_kernel<<>>(P1, P2, P3, dimX, dimY, dimZ, ImSize); /* anisotropic kernel */ + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); + + tkp1 = (1.0f + sqrt(1.0f + 4.0f*tk*tk))*0.5f; + multip2 = ((tk-1.0f)/tkp1); + + Rupd_func3D_kernel<<>>(P1, P1_prev, P2, P2_prev, P3, P3_prev, R1, R2, R3, tkp1, tk, multip2, dimX, dimY, dimZ, ImSize); + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); + + copy_kernel3D<<>>(P1, P1_prev, dimX, dimY, dimZ, ImSize); + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); + + copy_kernel3D<<>>(P2, P2_prev, dimX, dimY, dimZ, ImSize); + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); + + copy_kernel3D<<>>(P3, P3_prev, dimX, dimY, dimZ, ImSize); + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); + + tk = tkp1; + } + if (printM == 1) printf("FGP-TV iterations stopped at iteration %i \n", i); + /***************************************************************/ + //copy result matrix from device to host memory + cudaMemcpy(Output,d_update,ImSize*sizeof(float),cudaMemcpyDeviceToHost); + + cudaFree(d_input); + cudaFree(d_update); + cudaFree(P1); + cudaFree(P2); + cudaFree(P3); + cudaFree(P1_prev); + cudaFree(P2_prev); + cudaFree(P3_prev); + cudaFree(R1); + cudaFree(R2); + cudaFree(R3); + } + cudaDeviceReset(); +} diff --git a/Core/regularizers_GPU/TV_FGP_GPU_core.h b/Core/regularizers_GPU/TV_FGP_GPU_core.h new file mode 100755 index 0000000..107d243 --- /dev/null +++ b/Core/regularizers_GPU/TV_FGP_GPU_core.h @@ -0,0 +1,10 @@ +#include +#include +#include + +#ifndef _TV_FGP_GPU_ +#define _TV_FGP_GPU_ + +extern "C" void TV_FGP_GPU_main(float *Input, float *Output, float lambdaPar, int iter, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ); + +#endif diff --git a/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU_core.cu b/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU_core.cu deleted file mode 100755 index a30a89b..0000000 --- a/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU_core.cu +++ /dev/null @@ -1,368 +0,0 @@ - /* -This work is part of the Core Imaging Library developed by -Visual Analytics and Imaging System Group of the Science Technology -Facilities Council, STFC - -Copyright 2017 Daniil Kazantsev -Copyright 2017 Srikanth Nagella, Edoardo Pasca - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at -http://www.apache.org/licenses/LICENSE-2.0 -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -#include "TV_ROF_GPU_core.h" - -/* C-OMP implementation of ROF-TV denoising/regularization model [1] (2D/3D case) -* -* Input Parameters: -* 1. Noisy image/volume [REQUIRED] -* 2. lambda - regularization parameter [REQUIRED] -* 3. tau - marching step for explicit scheme, ~0.1 is recommended [REQUIRED] -* 4. Number of iterations, for explicit scheme >= 150 is recommended [REQUIRED] -* -* Output: -* [1] Regularized image/volume - - * This function is based on the paper by -* [1] Rudin, Osher, Fatemi, "Nonlinear Total Variation based noise removal algorithms" -* -* D. Kazantsev, 2016-18 -*/ - -#define CHECK(call) \ -{ \ - const cudaError_t error = call; \ - if (error != cudaSuccess) \ - { \ - fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__); \ - fprintf(stderr, "code: %d, reason: %s\n", error, \ - cudaGetErrorString(error)); \ - exit(1); \ - } \ -} - -#define BLKXSIZE 8 -#define BLKYSIZE 8 -#define BLKZSIZE 8 - -#define BLKXSIZE2D 16 -#define BLKYSIZE2D 16 -#define EPS 1.0e-5 - -#define idivup(a, b) ( ((a)%(b) != 0) ? (a)/(b)+1 : (a)/(b) ) - -#define MAX(x, y) (((x) > (y)) ? (x) : (y)) -#define MIN(x, y) (((x) < (y)) ? (x) : (y)) - -__host__ __device__ int sign (float x) -{ - return (x > 0) - (x < 0); -} - -/*********************2D case****************************/ - - /* differences 1 */ - __global__ void D1_func2D(float* Input, float* D1, int N, int M) - { - int i1, j1, i2; - float NOMx_1,NOMy_1,NOMy_0,denom1,denom2,T1; - int i = blockDim.x * blockIdx.x + threadIdx.x; - int j = blockDim.y * blockIdx.y + threadIdx.y; - - int index = i + N*j; - - if ((i >= 0) && (i < N) && (j >= 0) && (j < M)) { - - /* boundary conditions (Neumann reflections) */ - i1 = i + 1; if (i1 >= N) i1 = i-1; - i2 = i - 1; if (i2 < 0) i2 = i+1; - j1 = j + 1; if (j1 >= M) j1 = j-1; - - /* Forward-backward differences */ - NOMx_1 = Input[j1*N + i] - Input[index]; /* x+ */ - NOMy_1 = Input[j*N + i1] - Input[index]; /* y+ */ - NOMy_0 = Input[index] - Input[j*N + i2]; /* y- */ - - denom1 = NOMx_1*NOMx_1; - denom2 = 0.5f*(sign((float)NOMy_1) + sign((float)NOMy_0))*(MIN(abs((float)NOMy_1),abs((float)NOMy_0))); - denom2 = denom2*denom2; - T1 = sqrt(denom1 + denom2 + EPS); - D1[index] = NOMx_1/T1; - } - } - - /* differences 2 */ - __global__ void D2_func2D(float* Input, float* D2, int N, int M) - { - int i1, j1, j2; - float NOMx_1,NOMy_1,NOMx_0,denom1,denom2,T2; - int i = blockDim.x * blockIdx.x + threadIdx.x; - int j = blockDim.y * blockIdx.y + threadIdx.y; - - int index = i + N*j; - - if ((i >= 0) && (i < (N)) && (j >= 0) && (j < (M))) { - - /* boundary conditions (Neumann reflections) */ - i1 = i + 1; if (i1 >= N) i1 = i-1; - j1 = j + 1; if (j1 >= M) j1 = j-1; - j2 = j - 1; if (j2 < 0) j2 = j+1; - - /* Forward-backward differences */ - NOMx_1 = Input[j1*N + i] - Input[index]; /* x+ */ - NOMy_1 = Input[j*N + i1] - Input[index]; /* y+ */ - NOMx_0 = Input[index] - Input[j2*N + i]; /* x- */ - - denom1 = NOMy_1*NOMy_1; - denom2 = 0.5f*(sign((float)NOMx_1) + sign((float)NOMx_0))*(MIN(abs((float)NOMx_1),abs((float)NOMx_0))); - denom2 = denom2*denom2; - T2 = sqrt(denom1 + denom2 + EPS); - D2[index] = NOMy_1/T2; - } - } - - __global__ void TV_kernel2D(float *D1, float *D2, float *Update, float *Input, float lambda, float tau, int N, int M) - { - int i2, j2; - float dv1,dv2; - int i = blockDim.x * blockIdx.x + threadIdx.x; - int j = blockDim.y * blockIdx.y + threadIdx.y; - - int index = i + N*j; - - if ((i >= 0) && (i < (N)) && (j >= 0) && (j < (M))) { - - /* boundary conditions (Neumann reflections) */ - i2 = i - 1; if (i2 < 0) i2 = i+1; - j2 = j - 1; if (j2 < 0) j2 = j+1; - - /* divergence components */ - dv1 = D1[index] - D1[j2*N + i]; - dv2 = D2[index] - D2[j*N + i2]; - - Update[index] = Update[index] + tau*(2.0f*lambda*(dv1 + dv2) - (Update[index] - Input[index])); - - } - } -/*********************3D case****************************/ - - /* differences 1 */ - __global__ void D1_func3D(float* Input, float* D1, int dimX, int dimY, int dimZ) - { - float NOMx_1, NOMy_1, NOMy_0, NOMz_1, NOMz_0, denom1, denom2,denom3, T1; - int i1,i2,k1,j1,j2,k2; - - int i = blockDim.x * blockIdx.x + threadIdx.x; - int j = blockDim.y * blockIdx.y + threadIdx.y; - int k = blockDim.z * blockIdx.z + threadIdx.z; - - int index = (dimX*dimY)*k + j*dimX+i; - - if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) { - - /* symmetric boundary conditions (Neuman) */ - i1 = i + 1; if (i1 >= dimX) i1 = i-1; - i2 = i - 1; if (i2 < 0) i2 = i+1; - j1 = j + 1; if (j1 >= dimY) j1 = j-1; - j2 = j - 1; if (j2 < 0) j2 = j+1; - k1 = k + 1; if (k1 >= dimZ) k1 = k-1; - k2 = k - 1; if (k2 < 0) k2 = k+1; - - /* Forward-backward differences */ - NOMx_1 = Input[(dimX*dimY)*k + j1*dimX + i] - Input[index]; /* x+ */ - NOMy_1 = Input[(dimX*dimY)*k + j*dimX + i1] - Input[index]; /* y+ */ - NOMy_0 = Input[index] - Input[(dimX*dimY)*k + j*dimX + i2]; /* y- */ - - NOMz_1 = Input[(dimX*dimY)*k1 + j*dimX + i] - Input[index]; /* z+ */ - NOMz_0 = Input[index] - Input[(dimX*dimY)*k2 + j*dimX + i]; /* z- */ - - - denom1 = NOMx_1*NOMx_1; - denom2 = 0.5*(sign(NOMy_1) + sign(NOMy_0))*(MIN(abs(NOMy_1),abs(NOMy_0))); - denom2 = denom2*denom2; - denom3 = 0.5*(sign(NOMz_1) + sign(NOMz_0))*(MIN(abs(NOMz_1),abs(NOMz_0))); - denom3 = denom3*denom3; - T1 = sqrt(denom1 + denom2 + denom3 + EPS); - D1[index] = NOMx_1/T1; - } - } - - /* differences 2 */ - __global__ void D2_func3D(float* Input, float* D2, int dimX, int dimY, int dimZ) - { - float NOMx_1, NOMy_1, NOMx_0, NOMz_1, NOMz_0, denom1, denom2, denom3, T2; - int i1,i2,k1,j1,j2,k2; - - int i = blockDim.x * blockIdx.x + threadIdx.x; - int j = blockDim.y * blockIdx.y + threadIdx.y; - int k = blockDim.z * blockIdx.z + threadIdx.z; - - int index = (dimX*dimY)*k + j*dimX+i; - - if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) { - /* symmetric boundary conditions (Neuman) */ - i1 = i + 1; if (i1 >= dimX) i1 = i-1; - i2 = i - 1; if (i2 < 0) i2 = i+1; - j1 = j + 1; if (j1 >= dimY) j1 = j-1; - j2 = j - 1; if (j2 < 0) j2 = j+1; - k1 = k + 1; if (k1 >= dimZ) k1 = k-1; - k2 = k - 1; if (k2 < 0) k2 = k+1; - - - /* Forward-backward differences */ - NOMx_1 = Input[(dimX*dimY)*k + (j1)*dimX + i] - Input[index]; /* x+ */ - NOMy_1 = Input[(dimX*dimY)*k + (j)*dimX + i1] - Input[index]; /* y+ */ - NOMx_0 = Input[index] - Input[(dimX*dimY)*k + (j2)*dimX + i]; /* x- */ - NOMz_1 = Input[(dimX*dimY)*k1 + j*dimX + i] - Input[index]; /* z+ */ - NOMz_0 = Input[index] - Input[(dimX*dimY)*k2 + (j)*dimX + i]; /* z- */ - - - denom1 = NOMy_1*NOMy_1; - denom2 = 0.5*(sign(NOMx_1) + sign(NOMx_0))*(MIN(abs(NOMx_1),abs(NOMx_0))); - denom2 = denom2*denom2; - denom3 = 0.5*(sign(NOMz_1) + sign(NOMz_0))*(MIN(abs(NOMz_1),abs(NOMz_0))); - denom3 = denom3*denom3; - T2 = sqrt(denom1 + denom2 + denom3 + EPS); - D2[index] = NOMy_1/T2; - } - } - - /* differences 3 */ - __global__ void D3_func3D(float* Input, float* D3, int dimX, int dimY, int dimZ) - { - float NOMx_1, NOMy_1, NOMx_0, NOMy_0, NOMz_1, denom1, denom2, denom3, T3; - int i1,i2,k1,j1,j2,k2; - - int i = blockDim.x * blockIdx.x + threadIdx.x; - int j = blockDim.y * blockIdx.y + threadIdx.y; - int k = blockDim.z * blockIdx.z + threadIdx.z; - - int index = (dimX*dimY)*k + j*dimX+i; - - if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) { - - i1 = i + 1; if (i1 >= dimX) i1 = i-1; - i2 = i - 1; if (i2 < 0) i2 = i+1; - j1 = j + 1; if (j1 >= dimY) j1 = j-1; - j2 = j - 1; if (j2 < 0) j2 = j+1; - k1 = k + 1; if (k1 >= dimZ) k1 = k-1; - k2 = k - 1; if (k2 < 0) k2 = k+1; - - /* Forward-backward differences */ - NOMx_1 = Input[(dimX*dimY)*k + (j1)*dimX + i] - Input[index]; /* x+ */ - NOMy_1 = Input[(dimX*dimY)*k + (j)*dimX + i1] - Input[index]; /* y+ */ - NOMy_0 = Input[index] - Input[(dimX*dimY)*k + (j)*dimX + i2]; /* y- */ - NOMx_0 = Input[index] - Input[(dimX*dimY)*k + (j2)*dimX + i]; /* x- */ - NOMz_1 = Input[(dimX*dimY)*k1 + j*dimX + i] - Input[index]; /* z+ */ - - denom1 = NOMz_1*NOMz_1; - denom2 = 0.5*(sign(NOMx_1) + sign(NOMx_0))*(MIN(abs(NOMx_1),abs(NOMx_0))); - denom2 = denom2*denom2; - denom3 = 0.5*(sign(NOMy_1) + sign(NOMy_0))*(MIN(abs(NOMy_1),abs(NOMy_0))); - denom3 = denom3*denom3; - T3 = sqrt(denom1 + denom2 + denom3 + EPS); - D3[index] = NOMz_1/T3; - } - } - - __global__ void TV_kernel3D(float *D1, float *D2, float *D3, float *Update, float *Input, float lambda, float tau, int dimX, int dimY, int dimZ) - { - float dv1, dv2, dv3; - int i1,i2,k1,j1,j2,k2; - int i = blockDim.x * blockIdx.x + threadIdx.x; - int j = blockDim.y * blockIdx.y + threadIdx.y; - int k = blockDim.z * blockIdx.z + threadIdx.z; - - int index = (dimX*dimY)*k + j*dimX+i; - - if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) { - - /* symmetric boundary conditions (Neuman) */ - i1 = i + 1; if (i1 >= dimX) i1 = i-1; - i2 = i - 1; if (i2 < 0) i2 = i+1; - j1 = j + 1; if (j1 >= dimY) j1 = j-1; - j2 = j - 1; if (j2 < 0) j2 = j+1; - k1 = k + 1; if (k1 >= dimZ) k1 = k-1; - k2 = k - 1; if (k2 < 0) k2 = k+1; - - /*divergence components */ - dv1 = D1[index] - D1[(dimX*dimY)*k + j2*dimX+i]; - dv2 = D2[index] - D2[(dimX*dimY)*k + j*dimX+i2]; - dv3 = D3[index] - D3[(dimX*dimY)*k2 + j*dimX+i]; - - Update[index] = Update[index] + tau*(2.0f*lambda*(dv1 + dv2 + dv3) - (Update[index] - Input[index])); - - } - } - -///////////////////////////////////////////////// -// HOST FUNCTION -extern "C" void TV_ROF_GPU_main(float* Input, float* Output, float lambdaPar, int iter, float tau, int N, int M, int Z) -{ - // set up device - int dev = 0; - CHECK(cudaSetDevice(dev)); - float *d_input, *d_update, *d_D1, *d_D2; - - if (Z == 0) Z = 1; - CHECK(cudaMalloc((void**)&d_input,N*M*Z*sizeof(float))); - CHECK(cudaMalloc((void**)&d_update,N*M*Z*sizeof(float))); - CHECK(cudaMalloc((void**)&d_D1,N*M*Z*sizeof(float))); - CHECK(cudaMalloc((void**)&d_D2,N*M*Z*sizeof(float))); - - CHECK(cudaMemcpy(d_input,Input,N*M*Z*sizeof(float),cudaMemcpyHostToDevice)); - CHECK(cudaMemcpy(d_update,Input,N*M*Z*sizeof(float),cudaMemcpyHostToDevice)); - - if (Z > 1) { - // TV - 3D case - dim3 dimBlock(BLKXSIZE,BLKYSIZE,BLKZSIZE); - dim3 dimGrid(idivup(N,BLKXSIZE), idivup(M,BLKYSIZE),idivup(Z,BLKXSIZE)); - - float *d_D3; - CHECK(cudaMalloc((void**)&d_D3,N*M*Z*sizeof(float))); - - for(int n=0; n < iter; n++) { - /* calculate differences */ - D1_func3D<<>>(d_update, d_D1, N, M, Z); - CHECK(cudaDeviceSynchronize()); - D2_func3D<<>>(d_update, d_D2, N, M, Z); - CHECK(cudaDeviceSynchronize()); - D3_func3D<<>>(d_update, d_D3, N, M, Z); - CHECK(cudaDeviceSynchronize()); - /*running main kernel*/ - TV_kernel3D<<>>(d_D1, d_D2, d_D3, d_update, d_input, lambdaPar, tau, N, M, Z); - CHECK(cudaDeviceSynchronize()); - } - - CHECK(cudaFree(d_D3)); - } - else { - // TV - 2D case - dim3 dimBlock(BLKXSIZE2D,BLKYSIZE2D); - dim3 dimGrid(idivup(N,BLKXSIZE2D), idivup(M,BLKYSIZE2D)); - - for(int n=0; n < iter; n++) { - /* calculate differences */ - D1_func2D<<>>(d_update, d_D1, N, M); - CHECK(cudaDeviceSynchronize()); - D2_func2D<<>>(d_update, d_D2, N, M); - CHECK(cudaDeviceSynchronize()); - /*running main kernel*/ - TV_kernel2D<<>>(d_D1, d_D2, d_update, d_input, lambdaPar, tau, N, M); - CHECK(cudaDeviceSynchronize()); - } - } - CHECK(cudaMemcpy(Output,d_update,N*M*Z*sizeof(float),cudaMemcpyDeviceToHost)); - CHECK(cudaFree(d_input)); - CHECK(cudaFree(d_update)); - CHECK(cudaFree(d_D1)); - CHECK(cudaFree(d_D2)); - cudaDeviceReset(); -} diff --git a/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU_core.h b/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU_core.h deleted file mode 100755 index d772aba..0000000 --- a/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU_core.h +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef __TVGPU_H__ -#define __TVGPU_H__ -#include "CCPiDefines.h" -#include - -extern "C" CCPI_EXPORT void TV_ROF_GPU_main(float* Input, float* Output, float lambdaPar, int iter, float tau, int N, int M, int Z); - -#endif diff --git a/Core/regularizers_GPU/TV_ROF_GPU_core.cu b/Core/regularizers_GPU/TV_ROF_GPU_core.cu new file mode 100755 index 0000000..1a54d02 --- /dev/null +++ b/Core/regularizers_GPU/TV_ROF_GPU_core.cu @@ -0,0 +1,368 @@ + /* +This work is part of the Core Imaging Library developed by +Visual Analytics and Imaging System Group of the Science Technology +Facilities Council, STFC + +Copyright 2017 Daniil Kazantsev +Copyright 2017 Srikanth Nagella, Edoardo Pasca + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at +http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +#include "TV_ROF_GPU_core.h" + +/* C-OMP implementation of ROF-TV denoising/regularization model [1] (2D/3D case) +* +* Input Parameters: +* 1. Noisy image/volume [REQUIRED] +* 2. lambda - regularization parameter [REQUIRED] +* 3. tau - marching step for explicit scheme, ~0.1 is recommended [REQUIRED] +* 4. Number of iterations, for explicit scheme >= 150 is recommended [REQUIRED] +* +* Output: +* [1] Regularized image/volume + + * This function is based on the paper by +* [1] Rudin, Osher, Fatemi, "Nonlinear Total Variation based noise removal algorithms" +* +* D. Kazantsev, 2016-18 +*/ + +#define CHECK(call) \ +{ \ + const cudaError_t error = call; \ + if (error != cudaSuccess) \ + { \ + fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__); \ + fprintf(stderr, "code: %d, reason: %s\n", error, \ + cudaGetErrorString(error)); \ + exit(1); \ + } \ +} + +#define BLKXSIZE 8 +#define BLKYSIZE 8 +#define BLKZSIZE 8 + +#define BLKXSIZE2D 16 +#define BLKYSIZE2D 16 +#define EPS 1.0e-5 + +#define idivup(a, b) ( ((a)%(b) != 0) ? (a)/(b)+1 : (a)/(b) ) + +#define MAX(x, y) (((x) > (y)) ? (x) : (y)) +#define MIN(x, y) (((x) < (y)) ? (x) : (y)) + +__host__ __device__ int sign (float x) +{ + return (x > 0) - (x < 0); +} + +/*********************2D case****************************/ + + /* differences 1 */ + __global__ void D1_func2D(float* Input, float* D1, int N, int M) + { + int i1, j1, i2; + float NOMx_1,NOMy_1,NOMy_0,denom1,denom2,T1; + int i = blockDim.x * blockIdx.x + threadIdx.x; + int j = blockDim.y * blockIdx.y + threadIdx.y; + + int index = i + N*j; + + if ((i >= 0) && (i < N) && (j >= 0) && (j < M)) { + + /* boundary conditions (Neumann reflections) */ + i1 = i + 1; if (i1 >= N) i1 = i-1; + i2 = i - 1; if (i2 < 0) i2 = i+1; + j1 = j + 1; if (j1 >= M) j1 = j-1; + + /* Forward-backward differences */ + NOMx_1 = Input[j1*N + i] - Input[index]; /* x+ */ + NOMy_1 = Input[j*N + i1] - Input[index]; /* y+ */ + NOMy_0 = Input[index] - Input[j*N + i2]; /* y- */ + + denom1 = NOMx_1*NOMx_1; + denom2 = 0.5f*(sign((float)NOMy_1) + sign((float)NOMy_0))*(MIN(abs((float)NOMy_1),abs((float)NOMy_0))); + denom2 = denom2*denom2; + T1 = sqrt(denom1 + denom2 + EPS); + D1[index] = NOMx_1/T1; + } + } + + /* differences 2 */ + __global__ void D2_func2D(float* Input, float* D2, int N, int M) + { + int i1, j1, j2; + float NOMx_1,NOMy_1,NOMx_0,denom1,denom2,T2; + int i = blockDim.x * blockIdx.x + threadIdx.x; + int j = blockDim.y * blockIdx.y + threadIdx.y; + + int index = i + N*j; + + if ((i >= 0) && (i < (N)) && (j >= 0) && (j < (M))) { + + /* boundary conditions (Neumann reflections) */ + i1 = i + 1; if (i1 >= N) i1 = i-1; + j1 = j + 1; if (j1 >= M) j1 = j-1; + j2 = j - 1; if (j2 < 0) j2 = j+1; + + /* Forward-backward differences */ + NOMx_1 = Input[j1*N + i] - Input[index]; /* x+ */ + NOMy_1 = Input[j*N + i1] - Input[index]; /* y+ */ + NOMx_0 = Input[index] - Input[j2*N + i]; /* x- */ + + denom1 = NOMy_1*NOMy_1; + denom2 = 0.5f*(sign((float)NOMx_1) + sign((float)NOMx_0))*(MIN(abs((float)NOMx_1),abs((float)NOMx_0))); + denom2 = denom2*denom2; + T2 = sqrt(denom1 + denom2 + EPS); + D2[index] = NOMy_1/T2; + } + } + + __global__ void TV_kernel2D(float *D1, float *D2, float *Update, float *Input, float lambda, float tau, int N, int M) + { + int i2, j2; + float dv1,dv2; + int i = blockDim.x * blockIdx.x + threadIdx.x; + int j = blockDim.y * blockIdx.y + threadIdx.y; + + int index = i + N*j; + + if ((i >= 0) && (i < (N)) && (j >= 0) && (j < (M))) { + + /* boundary conditions (Neumann reflections) */ + i2 = i - 1; if (i2 < 0) i2 = i+1; + j2 = j - 1; if (j2 < 0) j2 = j+1; + + /* divergence components */ + dv1 = D1[index] - D1[j2*N + i]; + dv2 = D2[index] - D2[j*N + i2]; + + Update[index] = Update[index] + tau*(lambda*(dv1 + dv2) - (Update[index] - Input[index])); + + } + } +/*********************3D case****************************/ + + /* differences 1 */ + __global__ void D1_func3D(float* Input, float* D1, int dimX, int dimY, int dimZ) + { + float NOMx_1, NOMy_1, NOMy_0, NOMz_1, NOMz_0, denom1, denom2,denom3, T1; + int i1,i2,k1,j1,j2,k2; + + int i = blockDim.x * blockIdx.x + threadIdx.x; + int j = blockDim.y * blockIdx.y + threadIdx.y; + int k = blockDim.z * blockIdx.z + threadIdx.z; + + int index = (dimX*dimY)*k + j*dimX+i; + + if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) { + + /* symmetric boundary conditions (Neuman) */ + i1 = i + 1; if (i1 >= dimX) i1 = i-1; + i2 = i - 1; if (i2 < 0) i2 = i+1; + j1 = j + 1; if (j1 >= dimY) j1 = j-1; + j2 = j - 1; if (j2 < 0) j2 = j+1; + k1 = k + 1; if (k1 >= dimZ) k1 = k-1; + k2 = k - 1; if (k2 < 0) k2 = k+1; + + /* Forward-backward differences */ + NOMx_1 = Input[(dimX*dimY)*k + j1*dimX + i] - Input[index]; /* x+ */ + NOMy_1 = Input[(dimX*dimY)*k + j*dimX + i1] - Input[index]; /* y+ */ + NOMy_0 = Input[index] - Input[(dimX*dimY)*k + j*dimX + i2]; /* y- */ + + NOMz_1 = Input[(dimX*dimY)*k1 + j*dimX + i] - Input[index]; /* z+ */ + NOMz_0 = Input[index] - Input[(dimX*dimY)*k2 + j*dimX + i]; /* z- */ + + + denom1 = NOMx_1*NOMx_1; + denom2 = 0.5*(sign(NOMy_1) + sign(NOMy_0))*(MIN(abs(NOMy_1),abs(NOMy_0))); + denom2 = denom2*denom2; + denom3 = 0.5*(sign(NOMz_1) + sign(NOMz_0))*(MIN(abs(NOMz_1),abs(NOMz_0))); + denom3 = denom3*denom3; + T1 = sqrt(denom1 + denom2 + denom3 + EPS); + D1[index] = NOMx_1/T1; + } + } + + /* differences 2 */ + __global__ void D2_func3D(float* Input, float* D2, int dimX, int dimY, int dimZ) + { + float NOMx_1, NOMy_1, NOMx_0, NOMz_1, NOMz_0, denom1, denom2, denom3, T2; + int i1,i2,k1,j1,j2,k2; + + int i = blockDim.x * blockIdx.x + threadIdx.x; + int j = blockDim.y * blockIdx.y + threadIdx.y; + int k = blockDim.z * blockIdx.z + threadIdx.z; + + int index = (dimX*dimY)*k + j*dimX+i; + + if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) { + /* symmetric boundary conditions (Neuman) */ + i1 = i + 1; if (i1 >= dimX) i1 = i-1; + i2 = i - 1; if (i2 < 0) i2 = i+1; + j1 = j + 1; if (j1 >= dimY) j1 = j-1; + j2 = j - 1; if (j2 < 0) j2 = j+1; + k1 = k + 1; if (k1 >= dimZ) k1 = k-1; + k2 = k - 1; if (k2 < 0) k2 = k+1; + + + /* Forward-backward differences */ + NOMx_1 = Input[(dimX*dimY)*k + (j1)*dimX + i] - Input[index]; /* x+ */ + NOMy_1 = Input[(dimX*dimY)*k + (j)*dimX + i1] - Input[index]; /* y+ */ + NOMx_0 = Input[index] - Input[(dimX*dimY)*k + (j2)*dimX + i]; /* x- */ + NOMz_1 = Input[(dimX*dimY)*k1 + j*dimX + i] - Input[index]; /* z+ */ + NOMz_0 = Input[index] - Input[(dimX*dimY)*k2 + (j)*dimX + i]; /* z- */ + + + denom1 = NOMy_1*NOMy_1; + denom2 = 0.5*(sign(NOMx_1) + sign(NOMx_0))*(MIN(abs(NOMx_1),abs(NOMx_0))); + denom2 = denom2*denom2; + denom3 = 0.5*(sign(NOMz_1) + sign(NOMz_0))*(MIN(abs(NOMz_1),abs(NOMz_0))); + denom3 = denom3*denom3; + T2 = sqrt(denom1 + denom2 + denom3 + EPS); + D2[index] = NOMy_1/T2; + } + } + + /* differences 3 */ + __global__ void D3_func3D(float* Input, float* D3, int dimX, int dimY, int dimZ) + { + float NOMx_1, NOMy_1, NOMx_0, NOMy_0, NOMz_1, denom1, denom2, denom3, T3; + int i1,i2,k1,j1,j2,k2; + + int i = blockDim.x * blockIdx.x + threadIdx.x; + int j = blockDim.y * blockIdx.y + threadIdx.y; + int k = blockDim.z * blockIdx.z + threadIdx.z; + + int index = (dimX*dimY)*k + j*dimX+i; + + if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) { + + i1 = i + 1; if (i1 >= dimX) i1 = i-1; + i2 = i - 1; if (i2 < 0) i2 = i+1; + j1 = j + 1; if (j1 >= dimY) j1 = j-1; + j2 = j - 1; if (j2 < 0) j2 = j+1; + k1 = k + 1; if (k1 >= dimZ) k1 = k-1; + k2 = k - 1; if (k2 < 0) k2 = k+1; + + /* Forward-backward differences */ + NOMx_1 = Input[(dimX*dimY)*k + (j1)*dimX + i] - Input[index]; /* x+ */ + NOMy_1 = Input[(dimX*dimY)*k + (j)*dimX + i1] - Input[index]; /* y+ */ + NOMy_0 = Input[index] - Input[(dimX*dimY)*k + (j)*dimX + i2]; /* y- */ + NOMx_0 = Input[index] - Input[(dimX*dimY)*k + (j2)*dimX + i]; /* x- */ + NOMz_1 = Input[(dimX*dimY)*k1 + j*dimX + i] - Input[index]; /* z+ */ + + denom1 = NOMz_1*NOMz_1; + denom2 = 0.5*(sign(NOMx_1) + sign(NOMx_0))*(MIN(abs(NOMx_1),abs(NOMx_0))); + denom2 = denom2*denom2; + denom3 = 0.5*(sign(NOMy_1) + sign(NOMy_0))*(MIN(abs(NOMy_1),abs(NOMy_0))); + denom3 = denom3*denom3; + T3 = sqrt(denom1 + denom2 + denom3 + EPS); + D3[index] = NOMz_1/T3; + } + } + + __global__ void TV_kernel3D(float *D1, float *D2, float *D3, float *Update, float *Input, float lambda, float tau, int dimX, int dimY, int dimZ) + { + float dv1, dv2, dv3; + int i1,i2,k1,j1,j2,k2; + int i = blockDim.x * blockIdx.x + threadIdx.x; + int j = blockDim.y * blockIdx.y + threadIdx.y; + int k = blockDim.z * blockIdx.z + threadIdx.z; + + int index = (dimX*dimY)*k + j*dimX+i; + + if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) { + + /* symmetric boundary conditions (Neuman) */ + i1 = i + 1; if (i1 >= dimX) i1 = i-1; + i2 = i - 1; if (i2 < 0) i2 = i+1; + j1 = j + 1; if (j1 >= dimY) j1 = j-1; + j2 = j - 1; if (j2 < 0) j2 = j+1; + k1 = k + 1; if (k1 >= dimZ) k1 = k-1; + k2 = k - 1; if (k2 < 0) k2 = k+1; + + /*divergence components */ + dv1 = D1[index] - D1[(dimX*dimY)*k + j2*dimX+i]; + dv2 = D2[index] - D2[(dimX*dimY)*k + j*dimX+i2]; + dv3 = D3[index] - D3[(dimX*dimY)*k2 + j*dimX+i]; + + Update[index] = Update[index] + tau*(lambda*(dv1 + dv2 + dv3) - (Update[index] - Input[index])); + + } + } + +///////////////////////////////////////////////// +// HOST FUNCTION +extern "C" void TV_ROF_GPU_main(float* Input, float* Output, float lambdaPar, int iter, float tau, int N, int M, int Z) +{ + // set up device + int dev = 0; + CHECK(cudaSetDevice(dev)); + float *d_input, *d_update, *d_D1, *d_D2; + + if (Z == 0) Z = 1; + CHECK(cudaMalloc((void**)&d_input,N*M*Z*sizeof(float))); + CHECK(cudaMalloc((void**)&d_update,N*M*Z*sizeof(float))); + CHECK(cudaMalloc((void**)&d_D1,N*M*Z*sizeof(float))); + CHECK(cudaMalloc((void**)&d_D2,N*M*Z*sizeof(float))); + + CHECK(cudaMemcpy(d_input,Input,N*M*Z*sizeof(float),cudaMemcpyHostToDevice)); + CHECK(cudaMemcpy(d_update,Input,N*M*Z*sizeof(float),cudaMemcpyHostToDevice)); + + if (Z > 1) { + // TV - 3D case + dim3 dimBlock(BLKXSIZE,BLKYSIZE,BLKZSIZE); + dim3 dimGrid(idivup(N,BLKXSIZE), idivup(M,BLKYSIZE),idivup(Z,BLKXSIZE)); + + float *d_D3; + CHECK(cudaMalloc((void**)&d_D3,N*M*Z*sizeof(float))); + + for(int n=0; n < iter; n++) { + /* calculate differences */ + D1_func3D<<>>(d_update, d_D1, N, M, Z); + CHECK(cudaDeviceSynchronize()); + D2_func3D<<>>(d_update, d_D2, N, M, Z); + CHECK(cudaDeviceSynchronize()); + D3_func3D<<>>(d_update, d_D3, N, M, Z); + CHECK(cudaDeviceSynchronize()); + /*running main kernel*/ + TV_kernel3D<<>>(d_D1, d_D2, d_D3, d_update, d_input, lambdaPar, tau, N, M, Z); + CHECK(cudaDeviceSynchronize()); + } + + CHECK(cudaFree(d_D3)); + } + else { + // TV - 2D case + dim3 dimBlock(BLKXSIZE2D,BLKYSIZE2D); + dim3 dimGrid(idivup(N,BLKXSIZE2D), idivup(M,BLKYSIZE2D)); + + for(int n=0; n < iter; n++) { + /* calculate differences */ + D1_func2D<<>>(d_update, d_D1, N, M); + CHECK(cudaDeviceSynchronize()); + D2_func2D<<>>(d_update, d_D2, N, M); + CHECK(cudaDeviceSynchronize()); + /*running main kernel*/ + TV_kernel2D<<>>(d_D1, d_D2, d_update, d_input, lambdaPar, tau, N, M); + CHECK(cudaDeviceSynchronize()); + } + } + CHECK(cudaMemcpy(Output,d_update,N*M*Z*sizeof(float),cudaMemcpyDeviceToHost)); + CHECK(cudaFree(d_input)); + CHECK(cudaFree(d_update)); + CHECK(cudaFree(d_D1)); + CHECK(cudaFree(d_D2)); + cudaDeviceReset(); +} diff --git a/Core/regularizers_GPU/TV_ROF_GPU_core.h b/Core/regularizers_GPU/TV_ROF_GPU_core.h new file mode 100755 index 0000000..d772aba --- /dev/null +++ b/Core/regularizers_GPU/TV_ROF_GPU_core.h @@ -0,0 +1,8 @@ +#ifndef __TVGPU_H__ +#define __TVGPU_H__ +#include "CCPiDefines.h" +#include + +extern "C" CCPI_EXPORT void TV_ROF_GPU_main(float* Input, float* Output, float lambdaPar, int iter, float tau, int N, int M, int Z); + +#endif diff --git a/Readme.md b/Readme.md index d91e420..3ec20dc 100644 --- a/Readme.md +++ b/Readme.md @@ -1,24 +1,27 @@ -# CCPi-Regularisation Toolkit (CCPi-RGL) +# CCPi-Regularization Toolkit (CCPi-RGL) -**Iterative image reconstruction (IIR) methods normally require regularisation to stabilise convergence and make the reconstruction problem more well-posed. -CCPi-RGL software consist of 2D/3D regularisation modules which frequently used for IIR. -The core modules are written in C-OMP and CUDA languages and wrappers for Matlab and Python are provided.** +**Iterative image reconstruction (IIR) methods normally require regularization to stabilize the convergence and make the reconstruction problem more well-posed. +CCPi-RGL software consist of 2D/3D regularization modules for single-channel and multi-channel reconstruction problems. The modules especially suited for IIR, however, +can also be used as image denoising iterative filters. The core modules are written in C-OMP and CUDA languages and wrappers for Matlab and Python are provided.** ## Prerequisites: - * MATLAB (www.mathworks.com/products/matlab/) - * Python (ver. 3.5); Cython + * MATLAB (www.mathworks.com/products/matlab/) OR + * Python (tested ver. 3.5); Cython * C compilers * nvcc (CUDA SDK) compilers ## Package modules (regularisers): -1. Rudin-Osher-Fatemi Total Variation (explicit PDE minimisation scheme) [2D/3D GPU/CPU] -2. Fast-Gradient-Projection Total Variation [2D/3D GPU/CPU] +### Single-channel +1. Rudin-Osher-Fatemi (ROF) Total Variation (explicit PDE minimisation scheme) [2D/3D GPU/CPU]; (Ref. 1) +2. Fast-Gradient-Projection (FGP) Total Variation [2D/3D GPU/CPU]; (Ref. 2) -### Installation: +### Multi-channel -#### Python (conda-build) +## Installation: + +### Python (conda-build) ``` export CIL_VERSION=0.9.2 conda build recipes/regularizers --numpy 1.12 --python 3.5 @@ -29,7 +32,12 @@ The core modules are written in C-OMP and CUDA languages and wrappers for Matlab cd test/ python test_cpu_vs_gpu_regularizers.py ``` -#### Matlab +### Matlab +``` + cd /Wrappers/Matlab/mex_compile + compileCPU_mex.m % to compile CPU modules + compileGPU_mex.m % to compile GPU modules (see instructions in the file) +``` ### References: 1. Rudin, L.I., Osher, S. and Fatemi, E., 1992. Nonlinear total variation based noise removal algorithms. Physica D: nonlinear phenomena, 60(1-4), pp.259-268. diff --git a/Wrappers/Matlab/demos/demoMatlab_denoise.m b/Wrappers/Matlab/demos/demoMatlab_denoise.m new file mode 100644 index 0000000..7258e5e --- /dev/null +++ b/Wrappers/Matlab/demos/demoMatlab_denoise.m @@ -0,0 +1,38 @@ +% Image (2D) denoising demo using CCPi-RGL + +addpath('../mex_compile/installed'); +addpath('../../../data/'); + +Im = double(imread('lena_gray_256.tif'))/255; % loading image +u0 = Im + .05*randn(size(Im)); u0(u0 < 0) = 0; +figure; imshow(u0, [0 1]); title('Noisy image'); + +%% +fprintf('Denoise using ROF-TV model (CPU) \n'); +lambda_rof = 0.03; % regularization parameter +tau_rof = 0.0025; % time-marching constant +iter_rof = 2000; % number of ROF iterations +tic; u_rof = ROF_TV(single(u0), lambda_rof, iter_rof, tau_rof); toc; +figure; imshow(u_rof, [0 1]); title('ROF-TV denoised image (CPU)'); +%% +% fprintf('Denoise using ROF-TV model (GPU) \n'); +% lambda_rof = 0.03; % regularization parameter +% tau_rof = 0.0025; % time-marching constant +% iter_rof = 2000; % number of ROF iterations +% tic; u_rofG = ROF_TV_GPU(single(u0), lambda_rof, iter_rof, tau_rof); toc; +% figure; imshow(u_rofG, [0 1]); title('ROF-TV denoised image (GPU)'); +%% +fprintf('Denoise using FGP-TV model (CPU) \n'); +lambda_fgp = 0.03; % regularization parameter +iter_fgp = 1000; % number of FGP iterations +epsil_tol = 1.0e-05; % tolerance +tic; u_fgp = FGP_TV(single(u0), lambda_fgp, iter_fgp, epsil_tol); toc; +figure; imshow(u_fgp, [0 1]); title('FGP-TV denoised image (CPU)'); +%% +% fprintf('Denoise using FGP-TV model (GPU) \n'); +% lambda_fgp = 0.03; % regularization parameter +% iter_fgp = 1000; % number of FGP iterations +% epsil_tol = 1.0e-05; % tolerance +% tic; u_fgpG = FGP_TV_GPU(single(u0), lambda_fgp, iter_fgp, epsil_tol); toc; +% figure; imshow(u_fgpG, [0 1]); title('FGP-TV denoised image (GPU)'); +%% \ No newline at end of file diff --git a/Wrappers/Matlab/demos/demoMatlab_denoise.m~ b/Wrappers/Matlab/demos/demoMatlab_denoise.m~ new file mode 100644 index 0000000..3f4403e --- /dev/null +++ b/Wrappers/Matlab/demos/demoMatlab_denoise.m~ @@ -0,0 +1,31 @@ +% Image (2D) denoising demo using CCPi-RGL + +addpath('../mex_compile/installed'); +addpath('../../../data/'); + +Im = double(imread('lena_gray_256.tif'))/255; % loading image +u0 = Im + .05*randn(size(Im)); u0(u0 < 0) = 0; +figure; imshow(u0, [0 1]); title('Noisy image'); + +%% +fprintf('Denoise using ROF-TV model (CPU) \n'); +lambda_rof = 0.02; % regularization parameter +tau_rof = 0.0025; % time-marching constant +iter_rof = 2000; % number of ROF iterations +tic; u_rof = ROF_TV(single(u0), lambda_rof, iter_rof, tau_rof); toc; +figure; imshow(u_rof, [0 1]); title('ROF-TV denoised image (CPU)'); +%% +% fprintf('Denoise using ROF-TV model (GPU) \n'); +% lambda_rof = 0.02; % regularization parameter +% tau_rof = 0.0025; % time-marching constant +% iter_rof = 2000; % number of ROF iterations +% tic; u_rof = ROF_TV_GPU(single(u0), lambda_rof, iter_rof, tau_rof); toc; +% figure; imshow(u_rof, [0 1]); title('ROF-TV denoised image (GPU)'); +%% +fprintf('Denoise using FGP-TV model (CPU) \n'); +lambda_fgp = 0.02; % regularization parameter +iter_fgp = 2000; % number of FGP iterations +epsil_tol = 1.0e-05; % tolerance +tic; u_fgp = FGP_TV(single(u0), lambda_fgp, iter_fgp, epsil_tol); toc; +figure; imshow(u_rof, [0 1]); title('ROF-TV denoised image (CPU)'); +%% \ No newline at end of file diff --git a/Wrappers/Matlab/mex_compile/compileCPU_mex.m b/Wrappers/Matlab/mex_compile/compileCPU_mex.m new file mode 100644 index 0000000..fcee53a --- /dev/null +++ b/Wrappers/Matlab/mex_compile/compileCPU_mex.m @@ -0,0 +1,19 @@ +% execute this mex file in Matlab once +copyfile ../../../Core/regularizers_CPU/ regularizers_CPU/ +copyfile ../../../Core/CCPiDefines.h regularizers_CPU/ + +cd regularizers_CPU/ + +fprintf('%s \n', 'Compiling CPU regularizers...'); +mex ROF_TV.c ROF_TV_core.c utils.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp" +movefile ROF_TV.mex* ../installed/ + +mex FGP_TV.c FGP_TV_core.c utils.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp" +movefile FGP_TV.mex* ../installed/ + +delete ROF_TV_core* FGP_TV_core* utils.c utils.h CCPiDefines.h + +fprintf('%s \n', 'All successfully compiled!'); + +cd ../../ +cd demos diff --git a/Wrappers/Matlab/mex_compile/compileGPU_mex.m b/Wrappers/Matlab/mex_compile/compileGPU_mex.m new file mode 100644 index 0000000..df29a3e --- /dev/null +++ b/Wrappers/Matlab/mex_compile/compileGPU_mex.m @@ -0,0 +1,30 @@ +% execute this mex file in Matlab once + +%>>>>>>>>>>>>>>Important<<<<<<<<<<<<<<<<<<< +% In order to compile CUDA modules one needs to have nvcc-compiler +% installed (see CUDA SDK) +% check it under MATLAB with !nvcc --version +% In the code bellow we provide a full path to nvcc compiler +% ! paths to matlab and CUDA sdk can be different, modify accordingly ! + +% tested on Ubuntu 16.04/MATLAB 2016b + +copyfile ../../../Core/regularizers_GPU/ regularizers_GPU/ +copyfile ../../../Core/CCPiDefines.h regularizers_GPU/ + +cd regularizers_GPU/ + +fprintf('%s \n', 'Compiling GPU regularizers (CUDA)...'); +!/usr/local/cuda/bin/nvcc -O0 -c TV_ROF_GPU_core.cu -Xcompiler -fPIC -I~/SOFT/MATLAB9/extern/include/ +mex -g -I/usr/local/cuda-7.5/include -L/usr/local/cuda-7.5/lib64 -lcudart -lcufft -lmwgpu ROF_TV_GPU.cpp TV_ROF_GPU_core.o +movefile ROF_TV_GPU.mex* ../installed/ + +!/usr/local/cuda/bin/nvcc -O0 -c TV_FGP_GPU_core.cu -Xcompiler -fPIC -I~/SOFT/MATLAB9/extern/include/ +mex -g -I/usr/local/cuda-7.5/include -L/usr/local/cuda-7.5/lib64 -lcudart -lcufft -lmwgpu FGP_TV_GPU.cpp TV_FGP_GPU_core.o +movefile FGP_TV_GPU.mex* ../installed/ + +delete TV_ROF_GPU_core* TV_FGP_GPU_core* CCPiDefines.h +fprintf('%s \n', 'All successfully compiled!'); + +cd ../../ +cd demos diff --git a/Wrappers/Matlab/mex_compile/compile_mex.m b/Wrappers/Matlab/mex_compile/compile_mex.m deleted file mode 100644 index d45ac04..0000000 --- a/Wrappers/Matlab/mex_compile/compile_mex.m +++ /dev/null @@ -1,19 +0,0 @@ -% compile mex's in Matlab once -copyfile ../../../Core/regularizers_CPU/ regularizers_CPU/ -copyfile ../../../Core/regularizers_GPU/ regularizers_GPU/ -copyfile ../../../Core/CCPiDefines.h regularizers_CPU/ - -cd regularizers_CPU/ - -% compile C regularizers - -mex ROF_TV.c ROF_TV_core.c utils.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp" -mex FGP_TV.c FGP_TV_core.c utils.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp" - -delete ROF_TV_core.c ROF_TV_core.h FGP_TV_core.c FGP_TV_core.h utils.c utils.h CCPiDefines.h - -% compile CUDA-based regularizers -%cd regularizers_GPU/ - -cd ../../ -cd demos diff --git a/Wrappers/Matlab/mex_compile/regularizers_CPU/FGP_TV.c b/Wrappers/Matlab/mex_compile/regularizers_CPU/FGP_TV.c index 7cc861a..ba06cc7 100644 --- a/Wrappers/Matlab/mex_compile/regularizers_CPU/FGP_TV.c +++ b/Wrappers/Matlab/mex_compile/regularizers_CPU/FGP_TV.c @@ -1,21 +1,21 @@ /* -This work is part of the Core Imaging Library developed by -Visual Analytics and Imaging System Group of the Science Technology -Facilities Council, STFC - -Copyright 2017 Daniil Kazantsev -Copyright 2017 Srikanth Nagella, Edoardo Pasca - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at -http://www.apache.org/licenses/LICENSE-2.0 -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ + * This work is part of the Core Imaging Library developed by + * Visual Analytics and Imaging System Group of the Science Technology + * Facilities Council, STFC + * + * Copyright 2017 Daniil Kazantsev + * Copyright 2017 Srikanth Nagella, Edoardo Pasca + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ #include "matrix.h" #include "mex.h" #include "FGP_TV_core.h" @@ -23,28 +23,19 @@ limitations under the License. /* C-OMP implementation of FGP-TV [1] denoising/regularization model (2D/3D case) * * Input Parameters: - * 1. Noisy image/volume [REQUIRED] - * 2. lambda - regularization parameter [REQUIRED] - * 3. Number of iterations [OPTIONAL parameter] - * 4. eplsilon: tolerance constant [OPTIONAL parameter] - * 5. TV-type: 'iso' or 'l1' [OPTIONAL parameter] + * 1. Noisy image/volume + * 2. lambdaPar - regularization parameter + * 3. Number of iterations + * 4. eplsilon: tolerance constant + * 5. TV-type: methodTV - 'iso' (0) or 'l1' (1) + * 6. nonneg: 'nonnegativity (0 is OFF by default) + * 7. print information: 0 (off) or 1 (on) * * Output: * [1] Filtered/regularized image - * [2] last function value - * - * Example of image denoising: - * figure; - * Im = double(imread('lena_gray_256.tif'))/255; % loading image - * u0 = Im + .05*randn(size(Im)); % adding noise - * u = FGP_TV(single(u0), 0.05, 100, 1e-04); * - * to compile with OMP support: mex FGP_TV.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp" * This function is based on the Matlab's code and paper by * [1] Amir Beck and Marc Teboulle, "Fast Gradient-Based Algorithms for Constrained Total Variation Image Denoising and Deblurring Problems" - * - * D. Kazantsev, 2016-17 - * */ @@ -53,42 +44,53 @@ void mexFunction( int nrhs, const mxArray *prhs[]) { - int number_of_dims, iter, dimX, dimY, dimZ, methTV; + int number_of_dims, iter, dimX, dimY, dimZ, methTV, printswitch, nonneg; const int *dim_array; - float *Input, *Output, lambda, epsil; + float *Input, *Output=NULL, lambda, epsil; number_of_dims = mxGetNumberOfDimensions(prhs[0]); dim_array = mxGetDimensions(prhs[0]); /*Handling Matlab input data*/ - if ((nrhs < 2) || (nrhs > 5)) mexErrMsgTxt("At least 2 parameters is required: Image(2D/3D), Regularization parameter. The full list of parameters: Image(2D/3D), Regularization parameter, iterations number, tolerance, penalty type ('iso' or 'l1')"); + if ((nrhs < 2) || (nrhs > 7)) mexErrMsgTxt("At least 2 parameters is required, all parameters are: Image(2D/3D), Regularization parameter. The full list of parameters: Image(2D/3D), Regularization parameter, iterations number, tolerance, penalty type ('iso' or 'l1'), nonnegativity switch, print switch"); Input = (float *) mxGetData(prhs[0]); /*noisy image (2D/3D) */ lambda = (float) mxGetScalar(prhs[1]); /* regularization parameter */ iter = 300; /* default iterations number */ epsil = 0.0001; /* default tolerance constant */ methTV = 0; /* default isotropic TV penalty */ + nonneg = 0; /* default nonnegativity switch, off - 0 */ + printswitch = 0; /*default print is switched, off - 0 */ - if ((nrhs == 3) || (nrhs == 4) || (nrhs == 5)) iter = (int) mxGetScalar(prhs[2]); /* iterations number */ - if ((nrhs == 4) || (nrhs == 5)) epsil = (float) mxGetScalar(prhs[3]); /* tolerance constant */ - if (nrhs == 5) { + if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); } + + if ((nrhs == 3) || (nrhs == 4) || (nrhs == 5) || (nrhs == 6) || (nrhs == 7)) iter = (int) mxGetScalar(prhs[2]); /* iterations number */ + if ((nrhs == 4) || (nrhs == 5) || (nrhs == 6) || (nrhs == 7)) epsil = (float) mxGetScalar(prhs[3]); /* tolerance constant */ + if ((nrhs == 5) || (nrhs == 6) || (nrhs == 7)) { char *penalty_type; penalty_type = mxArrayToString(prhs[4]); /* choosing TV penalty: 'iso' or 'l1', 'iso' is the default */ if ((strcmp(penalty_type, "l1") != 0) && (strcmp(penalty_type, "iso") != 0)) mexErrMsgTxt("Choose TV type: 'iso' or 'l1',"); if (strcmp(penalty_type, "l1") == 0) methTV = 1; /* enable 'l1' penalty */ mxFree(penalty_type); } - - if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); } + if ((nrhs == 6) || (nrhs == 7)) { + nonneg = (int) mxGetScalar(prhs[5]); + if ((nonneg != 0) && (nonneg != 1)) mexErrMsgTxt("Nonnegativity constraint can be enabled by choosing 1 or off - 0"); + } + if (nrhs == 7) { + printswitch = (int) mxGetScalar(prhs[6]); + if ((printswitch != 0) && (printswitch != 1)) mexErrMsgTxt("Print can be enabled by choosing 1 or off - 0"); + } /*Handling Matlab output data*/ - dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2]; + dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2]; if (number_of_dims == 2) { dimZ = 1; /*2D case*/ Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); - TV_FGP_CPU_main(Input, Output, lambda, iter, epsil, methTV, 0, 0, dimX, dimY, dimZ); - } - if (number_of_dims == 3) { - } -} + } + if (number_of_dims == 3) Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); + + /* running the function */ + TV_FGP_CPU_main(Input, Output, lambda, iter, epsil, methTV, nonneg, printswitch, dimX, dimY, dimZ); +} \ No newline at end of file diff --git a/Wrappers/Matlab/mex_compile/regularizers_CPU/LLT_model.c b/Wrappers/Matlab/mex_compile/regularizers_CPU/LLT_model.c deleted file mode 100644 index 0b07b47..0000000 --- a/Wrappers/Matlab/mex_compile/regularizers_CPU/LLT_model.c +++ /dev/null @@ -1,169 +0,0 @@ -/* -This work is part of the Core Imaging Library developed by -Visual Analytics and Imaging System Group of the Science Technology -Facilities Council, STFC - -Copyright 2017 Daniil Kazantsev -Copyright 2017 Srikanth Nagella, Edoardo Pasca - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at -http://www.apache.org/licenses/LICENSE-2.0 -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -#include "mex.h" -#include "matrix.h" -#include "LLT_model_core.h" - -/* C-OMP implementation of Lysaker, Lundervold and Tai (LLT) model of higher order regularization penalty -* -* Input Parameters: -* 1. U0 - original noise image/volume -* 2. lambda - regularization parameter -* 3. tau - time-step for explicit scheme -* 4. iter - iterations number -* 5. epsil - tolerance constant (to terminate earlier) -* 6. switcher - default is 0, switch to (1) to restrictive smoothing in Z dimension (in test) -* -* Output: -* Filtered/regularized image -* -* Example: -* figure; -* Im = double(imread('lena_gray_256.tif'))/255; % loading image -* u0 = Im + .03*randn(size(Im)); % adding noise -* [Den] = LLT_model(single(u0), 10, 0.1, 1); -* -* -* to compile with OMP support: mex LLT_model.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp" -* References: Lysaker, Lundervold and Tai (LLT) 2003, IEEE -* -* 28.11.16/Harwell -*/ - -void mexFunction( - int nlhs, mxArray *plhs[], - int nrhs, const mxArray *prhs[]) - -{ - int number_of_dims, iter, dimX, dimY, dimZ, ll, j, count, switcher; - const int *dim_array; - float *U0, *U=NULL, *U_old=NULL, *D1=NULL, *D2=NULL, *D3=NULL, lambda, tau, re, re1, epsil, re_old; - unsigned short *Map=NULL; - - number_of_dims = mxGetNumberOfDimensions(prhs[0]); - dim_array = mxGetDimensions(prhs[0]); - - /*Handling Matlab input data*/ - U0 = (float *) mxGetData(prhs[0]); /*origanal noise image/volume*/ - if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input in single precision is required"); } - lambda = (float) mxGetScalar(prhs[1]); /*regularization parameter*/ - tau = (float) mxGetScalar(prhs[2]); /* time-step */ - iter = (int) mxGetScalar(prhs[3]); /*iterations number*/ - epsil = (float) mxGetScalar(prhs[4]); /* tolerance constant */ - switcher = (int) mxGetScalar(prhs[5]); /*switch on (1) restrictive smoothing in Z dimension*/ - - /*Handling Matlab output data*/ - dimX = dim_array[0]; dimY = dim_array[1]; dimZ = 1; - - if (number_of_dims == 2) { - /*2D case*/ - U = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); - U_old = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); - D1 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); - D2 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); - } - else if (number_of_dims == 3) { - /*3D case*/ - dimZ = dim_array[2]; - U = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); - U_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); - D1 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); - D2 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); - D3 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); - if (switcher != 0) { - Map = (unsigned short*)mxGetPr(plhs[1] = mxCreateNumericArray(3, dim_array, mxUINT16_CLASS, mxREAL)); - } - } - else {mexErrMsgTxt("The input data should be 2D or 3D");} - - /*Copy U0 to U*/ - copyIm(U0, U, dimX, dimY, dimZ); - - count = 1; - re_old = 0.0f; - if (number_of_dims == 2) { - for(ll = 0; ll < iter; ll++) { - - copyIm(U, U_old, dimX, dimY, dimZ); - - /*estimate inner derrivatives */ - der2D(U, D1, D2, dimX, dimY, dimZ); - /* calculate div^2 and update */ - div_upd2D(U0, U, D1, D2, dimX, dimY, dimZ, lambda, tau); - - /* calculate norm to terminate earlier */ - re = 0.0f; re1 = 0.0f; - for(j=0; j 4) break; - - /* check that the residual norm is decreasing */ - if (ll > 2) { - if (re > re_old) break; - } - re_old = re; - - } /*end of iterations*/ - printf("HO iterations stopped at iteration: %i\n", ll); - } - /*3D version*/ - if (number_of_dims == 3) { - - if (switcher == 1) { - /* apply restrictive smoothing */ - calcMap(U, Map, dimX, dimY, dimZ); - /*clear outliers */ - cleanMap(Map, dimX, dimY, dimZ); - } - for(ll = 0; ll < iter; ll++) { - - copyIm(U, U_old, dimX, dimY, dimZ); - - /*estimate inner derrivatives */ - der3D(U, D1, D2, D3, dimX, dimY, dimZ); - /* calculate div^2 and update */ - div_upd3D(U0, U, D1, D2, D3, Map, switcher, dimX, dimY, dimZ, lambda, tau); - - /* calculate norm to terminate earlier */ - re = 0.0f; re1 = 0.0f; - for(j=0; j 4) break; - - /* check that the residual norm is decreasing */ - if (ll > 2) { - if (re > re_old) break; - } - re_old = re; - - } /*end of iterations*/ - printf("HO iterations stopped at iteration: %i\n", ll); - } -} diff --git a/Wrappers/Matlab/mex_compile/regularizers_CPU/PatchBased_Regul.c b/Wrappers/Matlab/mex_compile/regularizers_CPU/PatchBased_Regul.c deleted file mode 100644 index 9c925df..0000000 --- a/Wrappers/Matlab/mex_compile/regularizers_CPU/PatchBased_Regul.c +++ /dev/null @@ -1,140 +0,0 @@ -/* -This work is part of the Core Imaging Library developed by -Visual Analytics and Imaging System Group of the Science Technology -Facilities Council, STFC - -Copyright 2017 Daniil Kazantsev -Copyright 2017 Srikanth Nagella, Edoardo Pasca - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at -http://www.apache.org/licenses/LICENSE-2.0 -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -#include "mex.h" -#include "matrix.h" -#include "PatchBased_Regul_core.h" - - -/* C-OMP implementation of patch-based (PB) regularization (2D and 3D cases). - * This method finds self-similar patches in data and performs one fixed point iteration to mimimize the PB penalty function - * - * References: 1. Yang Z. & Jacob M. "Nonlocal Regularization of Inverse Problems" - * 2. Kazantsev D. et al. "4D-CT reconstruction with unified spatial-temporal patch-based regularization" - * - * Input Parameters: - * 1. Image (2D or 3D) [required] - * 2. ratio of the searching window (e.g. 3 = (2*3+1) = 7 pixels window) [optional] - * 3. ratio of the similarity window (e.g. 1 = (2*1+1) = 3 pixels window) [optional] - * 4. h - parameter for the PB penalty function [optional] - * 5. lambda - regularization parameter [optional] - - * Output: - * 1. regularized (denoised) Image (N x N)/volume (N x N x N) - * - * 2D denoising example in Matlab: - Im = double(imread('lena_gray_256.tif'))/255; % loading image - u0 = Im + .03*randn(size(Im)); u0(u0<0) = 0; % adding noise - ImDen = PatchBased_Regul(single(u0), 3, 1, 0.08, 0.05); - * - * Matlab + C/mex compilers needed - * to compile with OMP support: mex PatchBased_Regul.c CFLAGS="\$CFLAGS -fopenmp -Wall" LDFLAGS="\$LDFLAGS -fopenmp" - * - * D. Kazantsev * - * 02/07/2014 - * Harwell, UK - */ - - -void mexFunction( - int nlhs, mxArray *plhs[], - int nrhs, const mxArray *prhs[]) -{ - int N, M, Z, numdims, SearchW, SimilW, SearchW_real, padXY, newsizeX, newsizeY, newsizeZ, switchpad_crop; - const int *dims; - float *A, *B=NULL, *Ap=NULL, *Bp=NULL, h, lambda; - - numdims = mxGetNumberOfDimensions(prhs[0]); - dims = mxGetDimensions(prhs[0]); - - N = dims[0]; - M = dims[1]; - Z = dims[2]; - - if ((numdims < 2) || (numdims > 3)) {mexErrMsgTxt("The input is 2D image or 3D volume");} - if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input in single precision is required"); } - - if(nrhs != 5) mexErrMsgTxt("Five inputs reqired: Image(2D,3D), SearchW, SimilW, Threshold, Regularization parameter"); - - /*Handling inputs*/ - A = (float *) mxGetData(prhs[0]); /* the image/volume to regularize/filter */ - SearchW_real = 3; /*default value*/ - SimilW = 1; /*default value*/ - h = 0.1; - lambda = 0.1; - - if ((nrhs == 2) || (nrhs == 3) || (nrhs == 4) || (nrhs == 5)) SearchW_real = (int) mxGetScalar(prhs[1]); /* the searching window ratio */ - if ((nrhs == 3) || (nrhs == 4) || (nrhs == 5)) SimilW = (int) mxGetScalar(prhs[2]); /* the similarity window ratio */ - if ((nrhs == 4) || (nrhs == 5)) h = (float) mxGetScalar(prhs[3]); /* parameter for the PB filtering function */ - if ((nrhs == 5)) lambda = (float) mxGetScalar(prhs[4]); /* regularization parameter */ - - - if (h <= 0) mexErrMsgTxt("Parmeter for the PB penalty function should be > 0"); - if (lambda <= 0) mexErrMsgTxt(" Regularization parmeter should be > 0"); - - SearchW = SearchW_real + 2*SimilW; - - /* SearchW_full = 2*SearchW + 1; */ /* the full searching window size */ - /* SimilW_full = 2*SimilW + 1; */ /* the full similarity window size */ - - padXY = SearchW + 2*SimilW; /* padding sizes */ - newsizeX = N + 2*(padXY); /* the X size of the padded array */ - newsizeY = M + 2*(padXY); /* the Y size of the padded array */ - newsizeZ = Z + 2*(padXY); /* the Z size of the padded array */ - int N_dims[] = {newsizeX, newsizeY, newsizeZ}; - - /******************************2D case ****************************/ - if (numdims == 2) { - /*Handling output*/ - B = (float*)mxGetData(plhs[0] = mxCreateNumericMatrix(N, M, mxSINGLE_CLASS, mxREAL)); - /*allocating memory for the padded arrays */ - Ap = (float*)mxGetData(mxCreateNumericMatrix(newsizeX, newsizeY, mxSINGLE_CLASS, mxREAL)); - Bp = (float*)mxGetData(mxCreateNumericMatrix(newsizeX, newsizeY, mxSINGLE_CLASS, mxREAL)); - /**************************************************************************/ - /*Perform padding of image A to the size of [newsizeX * newsizeY] */ - switchpad_crop = 0; /*padding*/ - pad_crop(A, Ap, M, N, 0, newsizeY, newsizeX, 0, padXY, switchpad_crop); - - /* Do PB regularization with the padded array */ - PB_FUNC2D(Ap, Bp, newsizeY, newsizeX, padXY, SearchW, SimilW, (float)h, (float)lambda); - - switchpad_crop = 1; /*cropping*/ - pad_crop(Bp, B, M, N, 0, newsizeY, newsizeX, 0, padXY, switchpad_crop); - } - else - { - /******************************3D case ****************************/ - /*Handling output*/ - B = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dims, mxSINGLE_CLASS, mxREAL)); - /*allocating memory for the padded arrays */ - Ap = (float*)mxGetPr(mxCreateNumericArray(3, N_dims, mxSINGLE_CLASS, mxREAL)); - Bp = (float*)mxGetPr(mxCreateNumericArray(3, N_dims, mxSINGLE_CLASS, mxREAL)); - /**************************************************************************/ - - /*Perform padding of image A to the size of [newsizeX * newsizeY * newsizeZ] */ - switchpad_crop = 0; /*padding*/ - pad_crop(A, Ap, M, N, Z, newsizeY, newsizeX, newsizeZ, padXY, switchpad_crop); - - /* Do PB regularization with the padded array */ - PB_FUNC3D(Ap, Bp, newsizeY, newsizeX, newsizeZ, padXY, SearchW, SimilW, (float)h, (float)lambda); - - switchpad_crop = 1; /*cropping*/ - pad_crop(Bp, B, M, N, Z, newsizeY, newsizeX, newsizeZ, padXY, switchpad_crop); - } /*end else ndims*/ -} diff --git a/Wrappers/Matlab/mex_compile/regularizers_CPU/ROF_TV.c b/Wrappers/Matlab/mex_compile/regularizers_CPU/ROF_TV.c index a800add..6b9e1ea 100644 --- a/Wrappers/Matlab/mex_compile/regularizers_CPU/ROF_TV.c +++ b/Wrappers/Matlab/mex_compile/regularizers_CPU/ROF_TV.c @@ -20,20 +20,21 @@ #include "mex.h" #include "ROF_TV_core.h" -/* C-OMP implementation of ROF-TV denoising/regularization model [1] (2D/3D case) - * +/* ROF-TV denoising/regularization model [1] (2D/3D case) + * (MEX wrapper for MATLAB) + * * Input Parameters: * 1. Noisy image/volume [REQUIRED] * 2. lambda - regularization parameter [REQUIRED] - * 3. tau - marching step for explicit scheme, ~0.001 is recommended [REQUIRED] - * 4. Number of iterations, for explicit scheme >= 150 is recommended [REQUIRED] + * 3. Number of iterations, for explicit scheme >= 150 is recommended [REQUIRED] + * 4. tau - marching step for explicit scheme, ~1 is recommended [REQUIRED] * * Output: - * [1] Regularized image/volume + * [1] Regularized image/volume * * This function is based on the paper by * [1] Rudin, Osher, Fatemi, "Nonlinear Total Variation based noise removal algorithms" - * compile: mex ROF_TV.c ROF_TV_core.c utils.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp" + * * D. Kazantsev, 2016-18 */ @@ -42,65 +43,31 @@ void mexFunction( int nrhs, const mxArray *prhs[]) { - int i, number_of_dims, iter_numb, dimX, dimY, dimZ; + int number_of_dims, iter_numb, dimX, dimY, dimZ; const int *dim_array; - float *A, *B, *D1, *D2, *D3, lambda, tau; + float *Input, *Output=NULL, lambda, tau; dim_array = mxGetDimensions(prhs[0]); number_of_dims = mxGetNumberOfDimensions(prhs[0]); /*Handling Matlab input data*/ - A = (float *) mxGetData(prhs[0]); + Input = (float *) mxGetData(prhs[0]); lambda = (float) mxGetScalar(prhs[1]); /* regularization parameter */ - tau = (float) mxGetScalar(prhs[2]); /* marching step parameter */ - iter_numb = (int) mxGetScalar(prhs[3]); /* iterations number */ + iter_numb = (int) mxGetScalar(prhs[2]); /* iterations number */ + tau = (float) mxGetScalar(prhs[3]); /* marching step parameter */ if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); } + if(nrhs != 4) mexErrMsgTxt("Four inputs reqired: Image(2D,3D), regularization parameter, iterations number, marching step constant"); /*Handling Matlab output data*/ dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2]; /* output arrays*/ if (number_of_dims == 2) { - dimZ = 0; /*2D case*/ - B = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); - D1 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); - D2 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); - - /* copy into B */ - copyIm(A, B, dimX, dimY, 1); - - /* start TV iterations */ - for(i=0; i < iter_numb; i++) { - - /* calculate differences */ - D1_func(B, D1, dimX, dimY, dimZ); - D2_func(B, D2, dimX, dimY, dimZ); - - /* calculate divergence and image update*/ - TV_main(D1, D2, D2, B, A, lambda, tau, dimX, dimY, dimZ); - } - } - - if (number_of_dims == 3) { - B = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); - D1 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); - D2 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); - D3 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); - - /* copy into B */ - copyIm(A, B, dimX, dimY, dimZ); - - /* start TV iterations */ - for(i=0; i < iter_numb; i++) { - - /* calculate differences */ - D1_func(B, D1, dimX, dimY, dimZ); - D2_func(B, D2, dimX, dimY, dimZ); - D3_func(B, D3, dimX, dimY, dimZ); - - /* calculate divergence and image update*/ - TV_main(D1, D2, D3, B, A, lambda, tau, dimX, dimY, dimZ); - } - } + dimZ = 1; /*2D case*/ + /* output image/volume */ + Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); + } + if (number_of_dims == 3) Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); -} + TV_ROF_CPU_main(Input, Output, lambda, iter_numb, tau, dimX, dimY, dimZ); +} \ No newline at end of file diff --git a/Wrappers/Matlab/mex_compile/regularizers_CPU/SplitBregman_TV.c b/Wrappers/Matlab/mex_compile/regularizers_CPU/SplitBregman_TV.c deleted file mode 100644 index 38f6a9d..0000000 --- a/Wrappers/Matlab/mex_compile/regularizers_CPU/SplitBregman_TV.c +++ /dev/null @@ -1,179 +0,0 @@ -/* -This work is part of the Core Imaging Library developed by -Visual Analytics and Imaging System Group of the Science Technology -Facilities Council, STFC - -Copyright 2017 Daniil Kazantsev -Copyright 2017 Srikanth Nagella, Edoardo Pasca - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at -http://www.apache.org/licenses/LICENSE-2.0 -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -#include "mex.h" -#include -#include "SplitBregman_TV_core.h" - -/* C-OMP implementation of Split Bregman - TV denoising-regularization model (2D/3D) - * - * Input Parameters: - * 1. Noisy image/volume - * 2. lambda - regularization parameter - * 3. Number of iterations [OPTIONAL parameter] - * 4. eplsilon - tolerance constant [OPTIONAL parameter] - * 5. TV-type: 'iso' or 'l1' [OPTIONAL parameter] - * - * Output: - * Filtered/regularized image - * - * Example: - * figure; - * Im = double(imread('lena_gray_256.tif'))/255; % loading image - * u0 = Im + .05*randn(size(Im)); u0(u0 < 0) = 0; - * u = SplitBregman_TV(single(u0), 10, 30, 1e-04); - * - * to compile with OMP support: mex SplitBregman_TV.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp" - * References: - * The Split Bregman Method for L1 Regularized Problems, by Tom Goldstein and Stanley Osher. - * D. Kazantsev, 2016* - */ - - -void mexFunction( - int nlhs, mxArray *plhs[], - int nrhs, const mxArray *prhs[]) - -{ - int number_of_dims, iter, dimX, dimY, dimZ, ll, j, count, methTV; - const int *dim_array; - float *A, *U=NULL, *U_old=NULL, *Dx=NULL, *Dy=NULL, *Dz=NULL, *Bx=NULL, *By=NULL, *Bz=NULL, lambda, mu, epsil, re, re1, re_old; - - number_of_dims = mxGetNumberOfDimensions(prhs[0]); - dim_array = mxGetDimensions(prhs[0]); - - /*Handling Matlab input data*/ - if ((nrhs < 2) || (nrhs > 5)) mexErrMsgTxt("At least 2 parameters is required: Image(2D/3D), Regularization parameter. The full list of parameters: Image(2D/3D), Regularization parameter, iterations number, tolerance, penalty type ('iso' or 'l1')"); - - /*Handling Matlab input data*/ - A = (float *) mxGetData(prhs[0]); /*noisy image (2D/3D) */ - mu = (float) mxGetScalar(prhs[1]); /* regularization parameter */ - iter = 35; /* default iterations number */ - epsil = 0.0001; /* default tolerance constant */ - methTV = 0; /* default isotropic TV penalty */ - if ((nrhs == 3) || (nrhs == 4) || (nrhs == 5)) iter = (int) mxGetScalar(prhs[2]); /* iterations number */ - if ((nrhs == 4) || (nrhs == 5)) epsil = (float) mxGetScalar(prhs[3]); /* tolerance constant */ - if (nrhs == 5) { - char *penalty_type; - penalty_type = mxArrayToString(prhs[4]); /* choosing TV penalty: 'iso' or 'l1', 'iso' is the default */ - if ((strcmp(penalty_type, "l1") != 0) && (strcmp(penalty_type, "iso") != 0)) mexErrMsgTxt("Choose TV type: 'iso' or 'l1',"); - if (strcmp(penalty_type, "l1") == 0) methTV = 1; /* enable 'l1' penalty */ - mxFree(penalty_type); - } - if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); } - - lambda = 2.0f*mu; - count = 1; - re_old = 0.0f; - /*Handling Matlab output data*/ - dimY = dim_array[0]; dimX = dim_array[1]; dimZ = dim_array[2]; - - if (number_of_dims == 2) { - dimZ = 1; /*2D case*/ - U = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); - U_old = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); - Dx = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); - Dy = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); - Bx = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); - By = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); - - copyIm(A, U, dimX, dimY, dimZ); /*initialize */ - - /* begin outer SB iterations */ - for(ll=0; ll 4) break; - - /* check that the residual norm is decreasing */ - if (ll > 2) { - if (re > re_old) break; - } - re_old = re; - /*printf("%f %i %i \n", re, ll, count); */ - - /*copyIm(U_old, U, dimX, dimY, dimZ); */ - } - printf("SB iterations stopped at iteration: %i\n", ll); - } - if (number_of_dims == 3) { - U = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); - U_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); - Dx = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); - Dy = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); - Dz = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); - Bx = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); - By = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); - Bz = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); - - copyIm(A, U, dimX, dimY, dimZ); /*initialize */ - - /* begin outer SB iterations */ - for(ll=0; ll 4) break; - - /* check that the residual norm is decreasing */ - if (ll > 2) { - if (re > re_old) break; } - /*printf("%f %i %i \n", re, ll, count); */ - re_old = re; - } - printf("SB iterations stopped at iteration: %i\n", ll); - } -} \ No newline at end of file diff --git a/Wrappers/Matlab/mex_compile/regularizers_CPU/TGV_PD.c b/Wrappers/Matlab/mex_compile/regularizers_CPU/TGV_PD.c deleted file mode 100644 index c9cb440..0000000 --- a/Wrappers/Matlab/mex_compile/regularizers_CPU/TGV_PD.c +++ /dev/null @@ -1,144 +0,0 @@ -/* -This work is part of the Core Imaging Library developed by -Visual Analytics and Imaging System Group of the Science Technology -Facilities Council, STFC - -Copyright 2017 Daniil Kazantsev -Copyright 2017 Srikanth Nagella, Edoardo Pasca - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at -http://www.apache.org/licenses/LICENSE-2.0 -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -#include "TGV_PD_core.h" -#include "mex.h" - -/* C-OMP implementation of Primal-Dual denoising method for - * Total Generilized Variation (TGV)-L2 model (2D case only) - * - * Input Parameters: - * 1. Noisy image/volume (2D) - * 2. lambda - regularization parameter - * 3. parameter to control first-order term (alpha1) - * 4. parameter to control the second-order term (alpha0) - * 5. Number of CP iterations - * - * Output: - * Filtered/regularized image - * - * Example: - * figure; - * Im = double(imread('lena_gray_256.tif'))/255; % loading image - * u0 = Im + .03*randn(size(Im)); % adding noise - * tic; u = TGV_PD(single(u0), 0.02, 1.3, 1, 550); toc; - * - * to compile with OMP support: mex TGV_PD.c TGV_PD_core.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp" - * References: - * K. Bredies "Total Generalized Variation" - * - * 28.11.16/Harwell - */ - -void mexFunction( - int nlhs, mxArray *plhs[], - int nrhs, const mxArray *prhs[]) - -{ - int number_of_dims, iter, dimX, dimY, dimZ, ll; - const int *dim_array; - float *A, *U, *U_old, *P1, *P2, *Q1, *Q2, *Q3, *V1, *V1_old, *V2, *V2_old, lambda, L2, tau, sigma, alpha1, alpha0; - - number_of_dims = mxGetNumberOfDimensions(prhs[0]); - dim_array = mxGetDimensions(prhs[0]); - - /*Handling Matlab input data*/ - A = (float *) mxGetData(prhs[0]); /*origanal noise image/volume*/ - if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input in single precision is required"); } - lambda = (float) mxGetScalar(prhs[1]); /*regularization parameter*/ - alpha1 = (float) mxGetScalar(prhs[2]); /*first-order term*/ - alpha0 = (float) mxGetScalar(prhs[3]); /*second-order term*/ - iter = (int) mxGetScalar(prhs[4]); /*iterations number*/ - if(nrhs != 5) mexErrMsgTxt("Five input parameters is reqired: Image(2D/3D), Regularization parameter, alpha1, alpha0, Iterations"); - - /*Handling Matlab output data*/ - dimX = dim_array[0]; dimY = dim_array[1]; - - if (number_of_dims == 2) { - /*2D case*/ - dimZ = 1; - U = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); - - /*dual variables*/ - P1 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); - P2 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); - - Q1 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); - Q2 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); - Q3 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); - - U_old = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); - - V1 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); - V1_old = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); - V2 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); - V2_old = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); - - - /*printf("%i \n", i);*/ - L2 = 12.0f; /*Lipshitz constant*/ - tau = 1.0/pow(L2,0.5); - sigma = 1.0/pow(L2,0.5); - - /*Copy A to U*/ - copyIm(A, U, dimX, dimY, dimZ); - - /* Here primal-dual iterations begin for 2D */ - for(ll = 0; ll < iter; ll++) { - - /* Calculate Dual Variable P */ - DualP_2D(U, V1, V2, P1, P2, dimX, dimY, dimZ, sigma); - - /*Projection onto convex set for P*/ - ProjP_2D(P1, P2, dimX, dimY, dimZ, alpha1); - - /* Calculate Dual Variable Q */ - DualQ_2D(V1, V2, Q1, Q2, Q3, dimX, dimY, dimZ, sigma); - - /*Projection onto convex set for Q*/ - ProjQ_2D(Q1, Q2, Q3, dimX, dimY, dimZ, alpha0); - - /*saving U into U_old*/ - copyIm(U, U_old, dimX, dimY, dimZ); - - /*adjoint operation -> divergence and projection of P*/ - DivProjP_2D(U, A, P1, P2, dimX, dimY, dimZ, lambda, tau); - - /*get updated solution U*/ - newU(U, U_old, dimX, dimY, dimZ); - - /*saving V into V_old*/ - copyIm(V1, V1_old, dimX, dimY, dimZ); - copyIm(V2, V2_old, dimX, dimY, dimZ); - - /* upd V*/ - UpdV_2D(V1, V2, P1, P2, Q1, Q2, Q3, dimX, dimY, dimZ, tau); - - /*get new V*/ - newU(V1, V1_old, dimX, dimY, dimZ); - newU(V2, V2_old, dimX, dimY, dimZ); - } /*end of iterations*/ - } - else if (number_of_dims == 3) { - mexErrMsgTxt("The input data should be a 2D array"); - /*3D case*/ - } - else {mexErrMsgTxt("The input data should be a 2D array");} - -} diff --git a/Wrappers/Matlab/mex_compile/regularizers_GPU/Diffus_HO/Diff4thHajiaboli_GPU.cpp b/Wrappers/Matlab/mex_compile/regularizers_GPU/Diffus_HO/Diff4thHajiaboli_GPU.cpp deleted file mode 100644 index 7fd6077..0000000 --- a/Wrappers/Matlab/mex_compile/regularizers_GPU/Diffus_HO/Diff4thHajiaboli_GPU.cpp +++ /dev/null @@ -1,125 +0,0 @@ -#include "mex.h" -#include -#include -#include -#include -#include -#include -#include "Diff4th_GPU_kernel.h" - -/* - * 2D and 3D CUDA implementation of the 4th order PDE denoising model by Hajiaboli - * - * Reference : - * "An anisotropic fourth-order diffusion filter for image noise removal" by M. Hajiaboli - * - * Example - * figure; - * Im = double(imread('lena_gray_256.tif'))/255; % loading image - * u0 = Im + .05*randn(size(Im)); % adding noise - * u = Diff4thHajiaboli_GPU(single(u0), 0.02, 150); - * subplot (1,2,1); imshow(u0,[ ]); title('Noisy Image') - * subplot (1,2,2); imshow(u,[ ]); title('Denoised Image') - * - * - * Linux/Matlab compilation: - * compile in terminal: nvcc -Xcompiler -fPIC -shared -o Diff4th_GPU_kernel.o Diff4th_GPU_kernel.cu - * then compile in Matlab: mex -I/usr/local/cuda-7.5/include -L/usr/local/cuda-7.5/lib64 -lcudart Diff4thHajiaboli_GPU.cpp Diff4th_GPU_kernel.o - - void mexFunction( -int nlhs, mxArray *plhs[], -int nrhs, const mxArray *prhs[]) -where: -prhs Array of pointers to the INPUT mxArrays -nrhs int number of INPUT mxArrays - -nlhs Array of pointers to the OUTPUT mxArrays -plhs int number of OUTPUT mxArrays - - */ - -void mexFunction( - int nlhs, mxArray *plhs[], - int nrhs, const mxArray *prhs[]) -{ - int numdims, dimZ, size; - float *A, *B, *A_L, *B_L; - const int *dims; - - numdims = mxGetNumberOfDimensions(prhs[0]); - dims = mxGetDimensions(prhs[0]); - - float sigma = (float)mxGetScalar(prhs[1]); /* edge-preserving parameter */ - float lambda = (float)mxGetScalar(prhs[2]); /* regularization parameter */ - int iter = (int)mxGetScalar(prhs[3]); /* iterations number */ - - if (numdims == 2) { - - int N, M, Z, i, j; - Z = 0; // for the 2D case - float tau = 0.01; // time step is sufficiently small for an explicit methods - - /*Input data*/ - A = (float*)mxGetData(prhs[0]); - N = dims[0] + 2; - M = dims[1] + 2; - A_L = (float*)mxGetData(mxCreateNumericMatrix(N, M, mxSINGLE_CLASS, mxREAL)); - B_L = (float*)mxGetData(mxCreateNumericMatrix(N, M, mxSINGLE_CLASS, mxREAL)); - - /*Output data*/ - B = (float*)mxGetData(plhs[0] = mxCreateNumericMatrix(dims[0], dims[1], mxSINGLE_CLASS, mxREAL)); - - // copy A to the bigger A_L with boundaries - #pragma omp parallel for shared(A_L, A) private(i,j) - for (i=0; i < N; i++) { - for (j=0; j < M; j++) { - if (((i > 0) && (i < N-1)) && ((j > 0) && (j < M-1))) A_L[i*M+j] = A[(i-1)*(dims[1])+(j-1)]; - }} - - // Running CUDA code here - Diff4th_GPU_kernel(A_L, B_L, N, M, Z, (float)sigma, iter, (float)tau, lambda); - - // copy the processed B_L to a smaller B - #pragma omp parallel for shared(B_L, B) private(i,j) - for (i=0; i < N; i++) { - for (j=0; j < M; j++) { - if (((i > 0) && (i < N-1)) && ((j > 0) && (j < M-1))) B[(i-1)*(dims[1])+(j-1)] = B_L[i*M+j]; - }} - } - if (numdims == 3) { - // 3D image denoising / regularization - int N, M, Z, i, j, k; - float tau = 0.0007; // Time Step is small for an explicit methods - A = (float*)mxGetData(prhs[0]); - N = dims[0] + 2; - M = dims[1] + 2; - Z = dims[2] + 2; - int N_dims[] = {N, M, Z}; - A_L = (float*)mxGetPr(mxCreateNumericArray(3, N_dims, mxSINGLE_CLASS, mxREAL)); - B_L = (float*)mxGetPr(mxCreateNumericArray(3, N_dims, mxSINGLE_CLASS, mxREAL)); - - /* output data */ - B = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dims, mxSINGLE_CLASS, mxREAL)); - - // copy A to the bigger A_L with boundaries - #pragma omp parallel for shared(A_L, A) private(i,j,k) - for (i=0; i < N; i++) { - for (j=0; j < M; j++) { - for (k=0; k < Z; k++) { - if (((i > 0) && (i < N-1)) && ((j > 0) && (j < M-1)) && ((k > 0) && (k < Z-1))) { - A_L[(N*M)*(k)+(i)*M+(j)] = A[(dims[0]*dims[1])*(k-1)+(i-1)*dims[1]+(j-1)]; - }}}} - - // Running CUDA kernel here for diffusivity - Diff4th_GPU_kernel(A_L, B_L, N, M, Z, (float)sigma, iter, (float)tau, lambda); - - // copy the processed B_L to a smaller B - #pragma omp parallel for shared(B_L, B) private(i,j,k) - for (i=0; i < N; i++) { - for (j=0; j < M; j++) { - for (k=0; k < Z; k++) { - if (((i > 0) && (i < N-1)) && ((j > 0) && (j < M-1)) && ((k > 0) && (k < Z-1))) { - B[(dims[0]*dims[1])*(k-1)+(i-1)*dims[1]+(j-1)] = B_L[(N*M)*(k)+(i)*M+(j)]; - }}}} - } -} \ No newline at end of file diff --git a/Wrappers/Matlab/mex_compile/regularizers_GPU/FGP_TV_GPU.cpp b/Wrappers/Matlab/mex_compile/regularizers_GPU/FGP_TV_GPU.cpp new file mode 100644 index 0000000..9ed9ae0 --- /dev/null +++ b/Wrappers/Matlab/mex_compile/regularizers_GPU/FGP_TV_GPU.cpp @@ -0,0 +1,95 @@ +/* + * This work is part of the Core Imaging Library developed by + * Visual Analytics and Imaging System Group of the Science Technology + * Facilities Council, STFC + * + * Copyright 2017 Daniil Kazantsev + * Copyright 2017 Srikanth Nagella, Edoardo Pasca + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "matrix.h" +#include "mex.h" +#include "TV_FGP_GPU_core.h" + +/* GPU (CUDA) implementation of FGP-TV [1] denoising/regularization model (2D/3D case) + * + * Input Parameters: + * 1. Noisy image/volume + * 2. lambdaPar - regularization parameter + * 3. Number of iterations + * 4. eplsilon: tolerance constant + * 5. TV-type: methodTV - 'iso' (0) or 'l1' (1) + * 6. nonneg: 'nonnegativity (0 is OFF by default) + * 7. print information: 0 (off) or 1 (on) + * + * Output: + * [1] Filtered/regularized image + * + * This function is based on the Matlab's code and paper by + * [1] Amir Beck and Marc Teboulle, "Fast Gradient-Based Algorithms for Constrained Total Variation Image Denoising and Deblurring Problems" + */ + +void mexFunction( + int nlhs, mxArray *plhs[], + int nrhs, const mxArray *prhs[]) + +{ + int number_of_dims, iter, dimX, dimY, dimZ, methTV, printswitch, nonneg; + const int *dim_array; + float *Input, *Output=NULL, lambda, epsil; + + number_of_dims = mxGetNumberOfDimensions(prhs[0]); + dim_array = mxGetDimensions(prhs[0]); + + /*Handling Matlab input data*/ + if ((nrhs < 2) || (nrhs > 7)) mexErrMsgTxt("At least 2 parameters is required, all parameters are: Image(2D/3D), Regularization parameter. The full list of parameters: Image(2D/3D), Regularization parameter, iterations number, tolerance, penalty type ('iso' or 'l1'), nonnegativity switch, print switch"); + + Input = (float *) mxGetData(prhs[0]); /*noisy image (2D/3D) */ + lambda = (float) mxGetScalar(prhs[1]); /* regularization parameter */ + iter = 300; /* default iterations number */ + epsil = 0.0001; /* default tolerance constant */ + methTV = 0; /* default isotropic TV penalty */ + nonneg = 0; /* default nonnegativity switch, off - 0 */ + printswitch = 0; /*default print is switched, off - 0 */ + + if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); } + + if ((nrhs == 3) || (nrhs == 4) || (nrhs == 5) || (nrhs == 6) || (nrhs == 7)) iter = (int) mxGetScalar(prhs[2]); /* iterations number */ + if ((nrhs == 4) || (nrhs == 5) || (nrhs == 6) || (nrhs == 7)) epsil = (float) mxGetScalar(prhs[3]); /* tolerance constant */ + if ((nrhs == 5) || (nrhs == 6) || (nrhs == 7)) { + char *penalty_type; + penalty_type = mxArrayToString(prhs[4]); /* choosing TV penalty: 'iso' or 'l1', 'iso' is the default */ + if ((strcmp(penalty_type, "l1") != 0) && (strcmp(penalty_type, "iso") != 0)) mexErrMsgTxt("Choose TV type: 'iso' or 'l1',"); + if (strcmp(penalty_type, "l1") == 0) methTV = 1; /* enable 'l1' penalty */ + mxFree(penalty_type); + } + if ((nrhs == 6) || (nrhs == 7)) { + nonneg = (int) mxGetScalar(prhs[5]); + if ((nonneg != 0) && (nonneg != 1)) mexErrMsgTxt("Nonnegativity constraint can be enabled by choosing 1 or off - 0"); + } + if (nrhs == 7) { + printswitch = (int) mxGetScalar(prhs[6]); + if ((printswitch != 0) && (printswitch != 1)) mexErrMsgTxt("Print can be enabled by choosing 1 or off - 0"); + } + + /*Handling Matlab output data*/ + dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2]; + + if (number_of_dims == 2) { + dimZ = 1; /*2D case*/ + Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); + } + if (number_of_dims == 3) Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); + + /* running the function */ + TV_FGP_GPU_main(Input, Output, lambda, iter, epsil, methTV, nonneg, printswitch, dimX, dimY, dimZ); +} \ No newline at end of file diff --git a/Wrappers/Matlab/mex_compile/regularizers_GPU/NL_Regul/NLM_GPU.cpp b/Wrappers/Matlab/mex_compile/regularizers_GPU/NL_Regul/NLM_GPU.cpp deleted file mode 100644 index ff0cc90..0000000 --- a/Wrappers/Matlab/mex_compile/regularizers_GPU/NL_Regul/NLM_GPU.cpp +++ /dev/null @@ -1,171 +0,0 @@ -#include "mex.h" -#include -#include -#include -#include -#include -#include -#include "NLM_GPU_kernel.h" - -/* CUDA implementation of the patch-based (PB) regularization for 2D and 3D images/volumes - * This method finds self-similar patches in data and performs one fixed point iteration to mimimize the PB penalty function - * - * References: 1. Yang Z. & Jacob M. "Nonlocal Regularization of Inverse Problems" - * 2. Kazantsev D. at. all "4D-CT reconstruction with unified spatial-temporal patch-based regularization" - * - * Input Parameters (mandatory): - * 1. Image/volume (2D/3D) - * 2. ratio of the searching window (e.g. 3 = (2*3+1) = 7 pixels window) - * 3. ratio of the similarity window (e.g. 1 = (2*1+1) = 3 pixels window) - * 4. h - parameter for the PB penalty function - * 5. lambda - regularization parameter - - * Output: - * 1. regularized (denoised) Image/volume (N x N x N) - * - * In matlab check what kind of GPU you have with "gpuDevice" command, - * then set your ComputeCapability, here I use -arch compute_35 - * - * Quick 2D denoising example in Matlab: - Im = double(imread('lena_gray_256.tif'))/255; % loading image - u0 = Im + .03*randn(size(Im)); u0(u0<0) = 0; % adding noise - ImDen = NLM_GPU(single(u0), 3, 2, 0.15, 1); - - * Linux/Matlab compilation: - * compile in terminal: nvcc -Xcompiler -fPIC -shared -o NLM_GPU_kernel.o NLM_GPU_kernel.cu - * then compile in Matlab: mex -I/usr/local/cuda-7.5/include -L/usr/local/cuda-7.5/lib64 -lcudart NLM_GPU.cpp NLM_GPU_kernel.o - * - * D. Kazantsev - * 2014-17 - * Harwell/Manchester UK - */ - -float pad_crop(float *A, float *Ap, int OldSizeX, int OldSizeY, int OldSizeZ, int NewSizeX, int NewSizeY, int NewSizeZ, int padXY, int switchpad_crop); - -void mexFunction( - int nlhs, mxArray *plhs[], - int nrhs, const mxArray *prhs[]) -{ - int N, M, Z, i_n, j_n, k_n, numdims, SearchW, SimilW, SearchW_real, padXY, newsizeX, newsizeY, newsizeZ, switchpad_crop, count, SearchW_full, SimilW_full; - const int *dims; - float *A, *B=NULL, *Ap=NULL, *Bp=NULL, *Eucl_Vec, h, h2, lambda, val, denh2; - - numdims = mxGetNumberOfDimensions(prhs[0]); - dims = mxGetDimensions(prhs[0]); - - N = dims[0]; - M = dims[1]; - Z = dims[2]; - - if ((numdims < 2) || (numdims > 3)) {mexErrMsgTxt("The input should be 2D image or 3D volume");} - if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input in single precision is required"); } - - if(nrhs != 5) mexErrMsgTxt("Five inputs reqired: Image(2D,3D), SearchW, SimilW, Threshold, Regularization parameter"); - - /*Handling inputs*/ - A = (float *) mxGetData(prhs[0]); /* the image to regularize/filter */ - SearchW_real = (int) mxGetScalar(prhs[1]); /* the searching window ratio */ - SimilW = (int) mxGetScalar(prhs[2]); /* the similarity window ratio */ - h = (float) mxGetScalar(prhs[3]); /* parameter for the PB filtering function */ - lambda = (float) mxGetScalar(prhs[4]); - - if (h <= 0) mexErrMsgTxt("Parmeter for the PB penalty function should be > 0"); - - SearchW = SearchW_real + 2*SimilW; - - SearchW_full = 2*SearchW + 1; /* the full searching window size */ - SimilW_full = 2*SimilW + 1; /* the full similarity window size */ - h2 = h*h; - - padXY = SearchW + 2*SimilW; /* padding sizes */ - newsizeX = N + 2*(padXY); /* the X size of the padded array */ - newsizeY = M + 2*(padXY); /* the Y size of the padded array */ - newsizeZ = Z + 2*(padXY); /* the Z size of the padded array */ - int N_dims[] = {newsizeX, newsizeY, newsizeZ}; - - /******************************2D case ****************************/ - if (numdims == 2) { - /*Handling output*/ - B = (float*)mxGetData(plhs[0] = mxCreateNumericMatrix(N, M, mxSINGLE_CLASS, mxREAL)); - /*allocating memory for the padded arrays */ - Ap = (float*)mxGetData(mxCreateNumericMatrix(newsizeX, newsizeY, mxSINGLE_CLASS, mxREAL)); - Bp = (float*)mxGetData(mxCreateNumericMatrix(newsizeX, newsizeY, mxSINGLE_CLASS, mxREAL)); - Eucl_Vec = (float*)mxGetData(mxCreateNumericMatrix(SimilW_full*SimilW_full, 1, mxSINGLE_CLASS, mxREAL)); - - /*Gaussian kernel */ - count = 0; - for(i_n=-SimilW; i_n<=SimilW; i_n++) { - for(j_n=-SimilW; j_n<=SimilW; j_n++) { - val = (float)(i_n*i_n + j_n*j_n)/(2*SimilW*SimilW); - Eucl_Vec[count] = exp(-val); - count = count + 1; - }} /*main neighb loop */ - - /**************************************************************************/ - /*Perform padding of image A to the size of [newsizeX * newsizeY] */ - switchpad_crop = 0; /*padding*/ - pad_crop(A, Ap, M, N, 0, newsizeY, newsizeX, 0, padXY, switchpad_crop); - - /* Do PB regularization with the padded array */ - NLM_GPU_kernel(Ap, Bp, Eucl_Vec, newsizeY, newsizeX, 0, numdims, SearchW, SimilW, SearchW_real, (float)h2, (float)lambda); - - switchpad_crop = 1; /*cropping*/ - pad_crop(Bp, B, M, N, 0, newsizeY, newsizeX, 0, padXY, switchpad_crop); - } - else - { - /******************************3D case ****************************/ - /*Handling output*/ - B = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dims, mxSINGLE_CLASS, mxREAL)); - /*allocating memory for the padded arrays */ - Ap = (float*)mxGetPr(mxCreateNumericArray(3, N_dims, mxSINGLE_CLASS, mxREAL)); - Bp = (float*)mxGetPr(mxCreateNumericArray(3, N_dims, mxSINGLE_CLASS, mxREAL)); - Eucl_Vec = (float*)mxGetData(mxCreateNumericMatrix(SimilW_full*SimilW_full*SimilW_full, 1, mxSINGLE_CLASS, mxREAL)); - - /*Gaussian kernel */ - count = 0; - for(i_n=-SimilW; i_n<=SimilW; i_n++) { - for(j_n=-SimilW; j_n<=SimilW; j_n++) { - for(k_n=-SimilW; k_n<=SimilW; k_n++) { - val = (float)(i_n*i_n + j_n*j_n + k_n*k_n)/(2*SimilW*SimilW*SimilW); - Eucl_Vec[count] = exp(-val); - count = count + 1; - }}} /*main neighb loop */ - /**************************************************************************/ - /*Perform padding of image A to the size of [newsizeX * newsizeY * newsizeZ] */ - switchpad_crop = 0; /*padding*/ - pad_crop(A, Ap, M, N, Z, newsizeY, newsizeX, newsizeZ, padXY, switchpad_crop); - - /* Do PB regularization with the padded array */ - NLM_GPU_kernel(Ap, Bp, Eucl_Vec, newsizeY, newsizeX, newsizeZ, numdims, SearchW, SimilW, SearchW_real, (float)h2, (float)lambda); - - switchpad_crop = 1; /*cropping*/ - pad_crop(Bp, B, M, N, Z, newsizeY, newsizeX, newsizeZ, padXY, switchpad_crop); - } /*end else ndims*/ -} - -float pad_crop(float *A, float *Ap, int OldSizeX, int OldSizeY, int OldSizeZ, int NewSizeX, int NewSizeY, int NewSizeZ, int padXY, int switchpad_crop) -{ - /* padding-cropping function */ - int i,j,k; - if (NewSizeZ > 1) { - for (i=0; i < NewSizeX; i++) { - for (j=0; j < NewSizeY; j++) { - for (k=0; k < NewSizeZ; k++) { - if (((i >= padXY) && (i < NewSizeX-padXY)) && ((j >= padXY) && (j < NewSizeY-padXY)) && ((k >= padXY) && (k < NewSizeZ-padXY))) { - if (switchpad_crop == 0) Ap[NewSizeX*NewSizeY*k + i*NewSizeY+j] = A[OldSizeX*OldSizeY*(k - padXY) + (i-padXY)*(OldSizeY)+(j-padXY)]; - else Ap[OldSizeX*OldSizeY*(k - padXY) + (i-padXY)*(OldSizeY)+(j-padXY)] = A[NewSizeX*NewSizeY*k + i*NewSizeY+j]; - } - }}} - } - else { - for (i=0; i < NewSizeX; i++) { - for (j=0; j < NewSizeY; j++) { - if (((i >= padXY) && (i < NewSizeX-padXY)) && ((j >= padXY) && (j < NewSizeY-padXY))) { - if (switchpad_crop == 0) Ap[i*NewSizeY+j] = A[(i-padXY)*(OldSizeY)+(j-padXY)]; - else Ap[(i-padXY)*(OldSizeY)+(j-padXY)] = A[i*NewSizeY+j]; - } - }} - } - return *Ap; -} \ No newline at end of file diff --git a/Wrappers/Matlab/mex_compile/regularizers_GPU/ROF_TV_GPU.cpp b/Wrappers/Matlab/mex_compile/regularizers_GPU/ROF_TV_GPU.cpp new file mode 100644 index 0000000..7bbe3af --- /dev/null +++ b/Wrappers/Matlab/mex_compile/regularizers_GPU/ROF_TV_GPU.cpp @@ -0,0 +1,73 @@ +/* + * This work is part of the Core Imaging Library developed by + * Visual Analytics and Imaging System Group of the Science Technology + * Facilities Council, STFC + * + * Copyright 2017 Daniil Kazantsev + * Copyright 2017 Srikanth Nagella, Edoardo Pasca + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "matrix.h" +#include "mex.h" +#include "TV_ROF_GPU_core.h" + +/* ROF-TV denoising/regularization model [1] (2D/3D case) + * (MEX wrapper for MATLAB) + * + * Input Parameters: + * 1. Noisy image/volume [REQUIRED] + * 2. lambda - regularization parameter [REQUIRED] + * 3. Number of iterations, for explicit scheme >= 150 is recommended [REQUIRED] + * 4. tau - marching step for explicit scheme, ~1 is recommended [REQUIRED] + * + * Output: + * [1] Regularized image/volume + * + * This function is based on the paper by + * [1] Rudin, Osher, Fatemi, "Nonlinear Total Variation based noise removal algorithms" + * + * D. Kazantsev, 2016-18 + */ + +void mexFunction( + int nlhs, mxArray *plhs[], + int nrhs, const mxArray *prhs[]) + +{ + int number_of_dims, iter_numb, dimX, dimY, dimZ; + const int *dim_array; + float *Input, *Output=NULL, lambda, tau; + + dim_array = mxGetDimensions(prhs[0]); + number_of_dims = mxGetNumberOfDimensions(prhs[0]); + + /*Handling Matlab input data*/ + Input = (float *) mxGetData(prhs[0]); + lambda = (float) mxGetScalar(prhs[1]); /* regularization parameter */ + iter_numb = (int) mxGetScalar(prhs[2]); /* iterations number */ + tau = (float) mxGetScalar(prhs[3]); /* marching step parameter */ + + if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); } + if(nrhs != 4) mexErrMsgTxt("Four inputs reqired: Image(2D,3D), regularization parameter, iterations number, marching step constant"); + /*Handling Matlab output data*/ + dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2]; + + /* output arrays*/ + if (number_of_dims == 2) { + dimZ = 1; /*2D case*/ + /* output image/volume */ + Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); + } + if (number_of_dims == 3) Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); + + TV_ROF_GPU_main(Input, Output, lambda, iter_numb, tau, dimX, dimY, dimZ); +} \ No newline at end of file -- cgit v1.2.3