From 49423f47c7282672fea979cd122c063c0f5ee465 Mon Sep 17 00:00:00 2001 From: Daniil Kazantsev Date: Wed, 21 Feb 2018 12:06:12 +0000 Subject: ROF_TV demo --- Wrappers/Python/test/test_gpu_regularizers.py | 44 +++++++++++++++++++++++++-- 1 file changed, 41 insertions(+), 3 deletions(-) diff --git a/Wrappers/Python/test/test_gpu_regularizers.py b/Wrappers/Python/test/test_gpu_regularizers.py index 735a25d..3346fb2 100644 --- a/Wrappers/Python/test/test_gpu_regularizers.py +++ b/Wrappers/Python/test/test_gpu_regularizers.py @@ -6,14 +6,12 @@ Created on Tue Jan 30 10:24:26 2018 @author: ofn77899 """ - - import matplotlib.pyplot as plt import numpy as np import os from enum import Enum import timeit -from ccpi.filters.gpu_regularizers import Diff4thHajiaboli, NML +from ccpi.filters.gpu_regularizers import Diff4thHajiaboli, NML, ROF_TV_GPU ############################################################################### def printParametersToString(pars): txt = r'' @@ -146,3 +144,43 @@ imgplot = plt.imshow((nml - u0)**2, cmap="gray") plt.show() + +## Rudin-Osher-Fatemi (ROF) TV regularization +start_time = timeit.default_timer() + +pars = { + 'input' : u0, + 'regularization_parameter': 1,\ + 'time_marching_parameter': 0.003, \ + 'number_of_iterations':300 + } + +rof_tv = ROF_TV_GPU(pars['input'], + pars['number_of_iterations'], + pars['time_marching_parameter'], + pars['number_of_iterations']) + +rms = rmse(Im, rof_tv) +pars['rmse'] = rms +txtstr = printParametersToString(pars) +txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time) +print (txtstr) +a=fig.add_subplot(2,4,1) + +# these are matplotlib.patch.Patch properties +props = dict(boxstyle='round', facecolor='wheat', alpha=0.75) +# place a text box in upper left in axes coords +a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=12, + verticalalignment='top', bbox=props) +imgplot = plt.imshow(nml, cmap="gray") + +a=fig.add_subplot(2,4,2) + +# these are matplotlib.patch.Patch properties +props = dict(boxstyle='round', facecolor='wheat', alpha=0.5) +# place a text box in upper left in axes coords +a.text(0.05, 0.95, 'rof_tv - u0', transform=a.transAxes, fontsize=14, + verticalalignment='top', bbox=props) +imgplot = plt.imshow((rof_tv - u0)**2, cmap="gray") + +plt.show() -- cgit v1.2.3 From 75917255b4f0b8aae2e6ce9492a75e0f749bfb3e Mon Sep 17 00:00:00 2001 From: Edoardo Pasca Date: Wed, 21 Feb 2018 12:49:59 +0000 Subject: added TV_ROF --- Core/CMakeLists.txt | 3 ++- Core/regularizers_GPU/TV_ROF/TV_ROF_GPU.cu | 2 +- Core/regularizers_GPU/TV_ROF/TV_ROF_GPU.h | 3 ++- Wrappers/Python/src/gpu_regularizers.pyx | 8 ++++---- Wrappers/Python/test/test_gpu_regularizers.py | 2 +- 5 files changed, 10 insertions(+), 8 deletions(-) diff --git a/Core/CMakeLists.txt b/Core/CMakeLists.txt index 3202581..3e3f89e 100644 --- a/Core/CMakeLists.txt +++ b/Core/CMakeLists.txt @@ -88,7 +88,7 @@ add_library(cilreg SHARED ${CMAKE_CURRENT_SOURCE_DIR}/regularizers_CPU/PatchBased_Regul_core.c ${CMAKE_CURRENT_SOURCE_DIR}/regularizers_CPU/SplitBregman_TV_core.c ${CMAKE_CURRENT_SOURCE_DIR}/regularizers_CPU/TGV_PD_core.c - ${CMAKE_CURRENT_SOURCE_DIR}/regularizers_CPU/ROF_TV_core.c + ${CMAKE_CURRENT_SOURCE_DIR}/regularizers_CPU/ROF_TV_core.c ${CMAKE_CURRENT_SOURCE_DIR}/regularizers_CPU/utils.c ) target_link_libraries(cilreg ${EXTRA_LIBRARIES} ) @@ -129,6 +129,7 @@ if (CUDA_FOUND) CUDA_ADD_LIBRARY(cilregcuda SHARED ${CMAKE_CURRENT_SOURCE_DIR}/regularizers_GPU/Diffus_HO/Diff4th_GPU_kernel.cu ${CMAKE_CURRENT_SOURCE_DIR}/regularizers_GPU/NL_Regul/NLM_GPU_kernel.cu + ${CMAKE_CURRENT_SOURCE_DIR}/regularizers_GPU/TV_ROF/TV_ROF_GPU.cu ) if (UNIX) message ("I'd install into ${CMAKE_INSTALL_PREFIX}/lib") diff --git a/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU.cu b/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU.cu index 633bf63..73a52e1 100755 --- a/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU.cu +++ b/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU.cu @@ -306,7 +306,7 @@ __host__ __device__ int sign (float x) ///////////////////////////////////////////////// // HOST FUNCTION -extern "C" void TV_ROF_GPU(float* Input, float* Output, int N, int M, int Z, int iter, float tau, float lambda) +extern "C" void TV_ROF_GPU_kernel(float* Input, float* Output, int N, int M, int Z, int iter, float tau, float lambda) { // set up device int dev = 0; diff --git a/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU.h b/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU.h index 3163482..2938d2f 100755 --- a/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU.h +++ b/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU.h @@ -1,7 +1,8 @@ #ifndef __TVGPU_H__ #define __TVGPU_H__ #include "CCPiDefines.h" +#include -extern "C" CCPI_EXPORT void TV_ROF_GPU(float* Input, float* Output, int N, int M, int Z, int iter, float tau, float lambda); +extern "C" CCPI_EXPORT void TV_ROF_GPU_kernel(float* Input, float* Output, int N, int M, int Z, int iter, float tau, float lambda); #endif diff --git a/Wrappers/Python/src/gpu_regularizers.pyx b/Wrappers/Python/src/gpu_regularizers.pyx index 5a5d274..fcb91cc 100644 --- a/Wrappers/Python/src/gpu_regularizers.pyx +++ b/Wrappers/Python/src/gpu_regularizers.pyx @@ -25,7 +25,7 @@ cdef extern void NLM_GPU_kernel(float *A, float* B, float *Eucl_Vec, int N, int M, int Z, int dimension, int SearchW, int SimilW, int SearchW_real, float denh2, float lambdaf); -cdef extern void TV_ROF_GPU(float* A, float* B, int N, int M, int Z, int iter, float tau, float lambdaf); +cdef extern void TV_ROF_GPU_kernel(float* A, float* B, int N, int M, int Z, int iter, float tau, float lambdaf); cdef extern float pad_crop(float *A, float *Ap, int OldSizeX, int OldSizeY, int OldSizeZ, int NewSizeX, int NewSizeY, int NewSizeZ, @@ -67,7 +67,7 @@ def NML(inputData, h, lambdaf) -def ROF_TV_GPU(inputData, +def GPU_ROF_TV(inputData, iterations, time_marching_parameter, regularization_parameter): @@ -343,7 +343,7 @@ def ROFTV2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, np.zeros([dims[0],dims[1]], dtype='float32') # Running CUDA code here - TV_ROF_GPU( + TV_ROF_GPU_kernel( &inputData[0,0], &B[0,0], dims[0], dims[1], 0, iterations , @@ -366,7 +366,7 @@ def ROFTV3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, np.zeros([dims[0],dims[1],dims[2]], dtype='float32') # Running CUDA code here - TV_ROF_GPU( + TV_ROF_GPU_kernel( &inputData[0,0,0], &B[0,0,0], dims[0], dims[1], dims[2], iterations , diff --git a/Wrappers/Python/test/test_gpu_regularizers.py b/Wrappers/Python/test/test_gpu_regularizers.py index 735a25d..75127a6 100644 --- a/Wrappers/Python/test/test_gpu_regularizers.py +++ b/Wrappers/Python/test/test_gpu_regularizers.py @@ -13,7 +13,7 @@ import numpy as np import os from enum import Enum import timeit -from ccpi.filters.gpu_regularizers import Diff4thHajiaboli, NML +from ccpi.filters.gpu_regularizers import Diff4thHajiaboli, NML, GPU_ROF_TV ############################################################################### def printParametersToString(pars): txt = r'' -- cgit v1.2.3 From ce313c7dcf45edaf4c824690d8caa7df5df90120 Mon Sep 17 00:00:00 2001 From: Edoardo Pasca Date: Wed, 21 Feb 2018 14:43:35 +0000 Subject: modified gpu regularizers build --- Wrappers/Python/setup-regularizers.py.in | 6 ++++-- Wrappers/Python/test/test_gpu_regularizers.py | 30 ++++++++++++++------------- 2 files changed, 20 insertions(+), 16 deletions(-) diff --git a/Wrappers/Python/setup-regularizers.py.in b/Wrappers/Python/setup-regularizers.py.in index a125261..8655a2e 100644 --- a/Wrappers/Python/setup-regularizers.py.in +++ b/Wrappers/Python/setup-regularizers.py.in @@ -34,7 +34,9 @@ extra_libraries = ['cilreg'] extra_include_dirs += [os.path.join(".." , ".." , "Core"), os.path.join(".." , ".." , "Core", "regularizers_CPU"), - os.path.join(".." , ".." , "Core", "regularizers_GPU") , + os.path.join(".." , ".." , "Core", "regularizers_GPU" , "Diffus_HO" ) , + os.path.join(".." , ".." , "Core", "regularizers_GPU" , "NL_Regul" ) , + os.path.join(".." , ".." , "Core", "regularizers_GPU" , "TV_ROF" ) , "."] if platform.system() == 'Windows': @@ -81,4 +83,4 @@ setup( ) -@SETUP_GPU_WRAPPERS@ \ No newline at end of file +@SETUP_GPU_WRAPPERS@ diff --git a/Wrappers/Python/test/test_gpu_regularizers.py b/Wrappers/Python/test/test_gpu_regularizers.py index ff8af44..9e37627 100644 --- a/Wrappers/Python/test/test_gpu_regularizers.py +++ b/Wrappers/Python/test/test_gpu_regularizers.py @@ -50,14 +50,15 @@ u0 = f(u0).astype('float32') ## plot fig = plt.figure() -a=fig.add_subplot(2,3,1) +a=fig.add_subplot(2,4,1) a.set_title('noise') imgplot = plt.imshow(u0,cmap="gray") ## Diff4thHajiaboli start_time = timeit.default_timer() -pars = {'algorithm' : Diff4thHajiaboli , \ +pars = { +'algorithm' : Diff4thHajiaboli , \ 'input' : u0, 'edge_preserv_parameter':0.1 , \ 'number_of_iterations' :250 ,\ @@ -76,7 +77,7 @@ pars['rmse'] = rms txtstr = printParametersToString(pars) txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time) print (txtstr) -a=fig.add_subplot(2,3,2) +a=fig.add_subplot(2,4,2) # these are matplotlib.patch.Patch properties props = dict(boxstyle='round', facecolor='wheat', alpha=0.75) @@ -85,14 +86,14 @@ a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=12, verticalalignment='top', bbox=props) imgplot = plt.imshow(d4h, cmap="gray") -a=fig.add_subplot(2,3,5) +a=fig.add_subplot(2,4,6) # these are matplotlib.patch.Patch properties props = dict(boxstyle='round', facecolor='wheat', alpha=0.5) # place a text box in upper left in axes coords a.text(0.05, 0.95, 'd4h - u0', transform=a.transAxes, fontsize=12, verticalalignment='top', bbox=props) -imgplot = plt.imshow((d4h - u0)**2, cmap="gray") +imgplot = plt.imshow((d4h - u0), cmap="gray") ## Patch Based Regul NML @@ -107,6 +108,7 @@ pars = {'algorithm' : NML , \ } """ pars = { +'algorithm' : NML , \ 'input' : u0, 'regularization_parameter': 0.01,\ 'searching_window_ratio':3, \ @@ -124,7 +126,7 @@ pars['rmse'] = rms txtstr = printParametersToString(pars) txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time) print (txtstr) -a=fig.add_subplot(2,3,3) +a=fig.add_subplot(2,4,3) # these are matplotlib.patch.Patch properties props = dict(boxstyle='round', facecolor='wheat', alpha=0.75) @@ -133,22 +135,22 @@ a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=12, verticalalignment='top', bbox=props) imgplot = plt.imshow(nml, cmap="gray") -a=fig.add_subplot(2,3,6) +a=fig.add_subplot(2,4,7) # these are matplotlib.patch.Patch properties props = dict(boxstyle='round', facecolor='wheat', alpha=0.5) # place a text box in upper left in axes coords a.text(0.05, 0.95, 'nml - u0', transform=a.transAxes, fontsize=14, verticalalignment='top', bbox=props) -imgplot = plt.imshow((nml - u0)**2, cmap="gray") +imgplot = plt.imshow((nml - u0), cmap="gray") -plt.show() ## Rudin-Osher-Fatemi (ROF) TV regularization start_time = timeit.default_timer() pars = { +'algorithm' : GPU_ROF_TV , \ 'input' : u0, 'regularization_parameter': 1,\ 'time_marching_parameter': 0.003, \ @@ -158,29 +160,29 @@ pars = { rof_tv = GPU_ROF_TV(pars['input'], pars['number_of_iterations'], pars['time_marching_parameter'], - pars['number_of_iterations']) + pars['regularization_parameter']) rms = rmse(Im, rof_tv) pars['rmse'] = rms txtstr = printParametersToString(pars) txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time) print (txtstr) -a=fig.add_subplot(2,4,1) +a=fig.add_subplot(2,4,4) # these are matplotlib.patch.Patch properties props = dict(boxstyle='round', facecolor='wheat', alpha=0.75) # place a text box in upper left in axes coords a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=12, verticalalignment='top', bbox=props) -imgplot = plt.imshow(nml, cmap="gray") +imgplot = plt.imshow(rof_tv, cmap="gray") -a=fig.add_subplot(2,4,2) +a=fig.add_subplot(2,4,8) # these are matplotlib.patch.Patch properties props = dict(boxstyle='round', facecolor='wheat', alpha=0.5) # place a text box in upper left in axes coords a.text(0.05, 0.95, 'rof_tv - u0', transform=a.transAxes, fontsize=14, verticalalignment='top', bbox=props) -imgplot = plt.imshow((rof_tv - u0)**2, cmap="gray") +imgplot = plt.imshow((rof_tv - u0), cmap="gray") plt.show() -- cgit v1.2.3 From d4d12945f5396b10259fdeeb2b09f99b0e2c6afd Mon Sep 17 00:00:00 2001 From: algol Date: Thu, 22 Feb 2018 12:34:39 +0000 Subject: GPU test fixed, cpu vs gpu test added for ROF-TV --- Core/regularizers_GPU/TV_ROF/TV_ROF_GPU.cu | 2 +- Wrappers/Python/demo/test_cpu_regularizers.py | 10 +- Wrappers/Python/src/gpu_regularizers.pyx | 2 +- .../Python/test/test_cpu_vs_gpu_regularizers.py | 123 +++++++++++++++++++++ Wrappers/Python/test/test_gpu_regularizers.py | 16 +-- 5 files changed, 138 insertions(+), 15 deletions(-) create mode 100644 Wrappers/Python/test/test_cpu_vs_gpu_regularizers.py diff --git a/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU.cu b/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU.cu index 73a52e1..897b5d0 100755 --- a/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU.cu +++ b/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU.cu @@ -312,7 +312,7 @@ extern "C" void TV_ROF_GPU_kernel(float* Input, float* Output, int N, int M, int int dev = 0; CHECK(cudaSetDevice(dev)); - float *d_input, *d_update, *d_D1, *d_D2; + float *d_input, *d_update, *d_D1, *d_D2; CHECK(cudaMalloc((void**)&d_input,N*M*Z*sizeof(float))); CHECK(cudaMalloc((void**)&d_update,N*M*Z*sizeof(float))); diff --git a/Wrappers/Python/demo/test_cpu_regularizers.py b/Wrappers/Python/demo/test_cpu_regularizers.py index 5908c3c..d147b85 100644 --- a/Wrappers/Python/demo/test_cpu_regularizers.py +++ b/Wrappers/Python/demo/test_cpu_regularizers.py @@ -284,9 +284,9 @@ start_time = timeit.default_timer() pars = {'algorithm': ROF_TV , \ 'input' : u0,\ - 'regularization_parameter':1,\ - 'marching_step': 0.003,\ - 'number_of_iterations': 300 + 'regularization_parameter':25,\ + 'marching_step': 0.001,\ + 'number_of_iterations': 350 } rof = ROF_TV(pars['input'], pars['number_of_iterations'], @@ -307,9 +307,7 @@ props = dict(boxstyle='round', facecolor='wheat', alpha=0.5) # place a text box in upper left in axes coords a.text(0.05, 0.95, txtstr, transform=a.transAxes, fontsize=14, verticalalignment='top', bbox=props) -imgplot = plt.imshow(tgv, cmap="gray") - - +imgplot = plt.imshow(rof, cmap="gray") plt.show() diff --git a/Wrappers/Python/src/gpu_regularizers.pyx b/Wrappers/Python/src/gpu_regularizers.pyx index fcb91cc..c724471 100644 --- a/Wrappers/Python/src/gpu_regularizers.pyx +++ b/Wrappers/Python/src/gpu_regularizers.pyx @@ -345,7 +345,7 @@ def ROFTV2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, # Running CUDA code here TV_ROF_GPU_kernel( &inputData[0,0], &B[0,0], - dims[0], dims[1], 0, + dims[0], dims[1], 1, iterations , time_marching_parameter, regularization_parameter); diff --git a/Wrappers/Python/test/test_cpu_vs_gpu_regularizers.py b/Wrappers/Python/test/test_cpu_vs_gpu_regularizers.py new file mode 100644 index 0000000..8c91c73 --- /dev/null +++ b/Wrappers/Python/test/test_cpu_vs_gpu_regularizers.py @@ -0,0 +1,123 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Thu Feb 22 11:39:43 2018 + +Testing CPU implementation against GPU one + +@author: algol +""" + +import matplotlib.pyplot as plt +import numpy as np +import os +import timeit +from ccpi.filters.gpu_regularizers import Diff4thHajiaboli, NML, GPU_ROF_TV +from ccpi.filters.cpu_regularizers_cython import ROF_TV +############################################################################### +def printParametersToString(pars): + txt = r'' + for key, value in pars.items(): + if key== 'algorithm' : + txt += "{0} = {1}".format(key, value.__name__) + elif key == 'input': + txt += "{0} = {1}".format(key, np.shape(value)) + else: + txt += "{0} = {1}".format(key, value) + txt += '\n' + return txt +############################################################################### +def rmse(im1, im2): + a, b = im1.shape + rmse = np.sqrt(np.sum((im1 - im2) ** 2) / float(a * b)) + return rmse + +filename = os.path.join(".." , ".." , ".." , "data" ,"lena_gray_512.tif") + +# read image +Im = plt.imread(filename) +Im = np.asarray(Im, dtype='float32') + +Im = Im/255 +perc = 0.075 +u0 = Im + np.random.normal(loc = Im , + scale = perc * Im , + size = np.shape(Im)) +# map the u0 u0->u0>0 +f = np.frompyfunc(lambda x: 0 if x < 0 else x, 1,1) +u0 = f(u0).astype('float32') + +## plot +fig = plt.figure(1) +plt.suptitle('Comparison of ROF-TV regularizer using CPU and GPU implementations') +a=fig.add_subplot(1,4,1) +a.set_title('Noisy Image') +imgplot = plt.imshow(u0,cmap="gray") + + +# set parameters +pars = {'algorithm': ROF_TV , \ + 'input' : u0,\ + 'regularization_parameter':12,\ + 'time_marching_parameter': 0.001,\ + 'number_of_iterations': 600 + } +print ("#################ROF TV CPU#####################") +start_time = timeit.default_timer() +rof_cpu = ROF_TV(pars['input'], + pars['number_of_iterations'], + pars['regularization_parameter'], + pars['time_marching_parameter'] + ) +#tgv = out +rms = rmse(Im, rof_cpu) +pars['rmse'] = rms + +txtstr = printParametersToString(pars) +txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time) +print (txtstr) +a=fig.add_subplot(1,4,2) + +# these are matplotlib.patch.Patch properties +props = dict(boxstyle='round', facecolor='wheat', alpha=0.75) +# place a text box in upper left in axes coords +a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14, + verticalalignment='top', bbox=props) +imgplot = plt.imshow(rof_cpu, cmap="gray") +plt.title('{}'.format('CPU results')) + + +print ("#################ROF TV GPU#####################") +start_time = timeit.default_timer() +rof_gpu = GPU_ROF_TV(pars['input'], + pars['number_of_iterations'], + pars['time_marching_parameter'], + pars['regularization_parameter']) + +rms = rmse(Im, rof_gpu) +pars['rmse'] = rms +txtstr = printParametersToString(pars) +txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time) +print (txtstr) +a=fig.add_subplot(1,4,3) + +# these are matplotlib.patch.Patch properties +props = dict(boxstyle='round', facecolor='wheat', alpha=0.75) +# place a text box in upper left in axes coords +a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14, + verticalalignment='top', bbox=props) +imgplot = plt.imshow(rof_gpu, cmap="gray") +plt.title('{}'.format('GPU results')) + + +print ("--------Compare the results--------") +tolerance = 1e-06 +diff_im = abs(rof_cpu - rof_gpu) +diff_im[diff_im > tolerance] = 1 +a=fig.add_subplot(1,4,4) +imgplot = plt.imshow(diff_im, vmin=0, vmax=1, cmap="gray") +plt.title('{}'.format('Pixels larger threshold difference')) +if (diff_im.sum() > 1): + print ("Arrays do not match!") +else: + print ("Arrays match") \ No newline at end of file diff --git a/Wrappers/Python/test/test_gpu_regularizers.py b/Wrappers/Python/test/test_gpu_regularizers.py index 9e37627..29f5bad 100644 --- a/Wrappers/Python/test/test_gpu_regularizers.py +++ b/Wrappers/Python/test/test_gpu_regularizers.py @@ -93,7 +93,8 @@ props = dict(boxstyle='round', facecolor='wheat', alpha=0.5) # place a text box in upper left in axes coords a.text(0.05, 0.95, 'd4h - u0', transform=a.transAxes, fontsize=12, verticalalignment='top', bbox=props) -imgplot = plt.imshow((d4h - u0), cmap="gray") +imgplot = plt.imshow((d4h - u0)**2, vmin=0, vmax=0.03, cmap="gray") +plt.colorbar(ticks=[0, 0.03], orientation='vertical') ## Patch Based Regul NML @@ -142,7 +143,8 @@ props = dict(boxstyle='round', facecolor='wheat', alpha=0.5) # place a text box in upper left in axes coords a.text(0.05, 0.95, 'nml - u0', transform=a.transAxes, fontsize=14, verticalalignment='top', bbox=props) -imgplot = plt.imshow((nml - u0), cmap="gray") +imgplot = plt.imshow((nml - u0)**2, vmin=0, vmax=0.03, cmap="gray") +plt.colorbar(ticks=[0, 0.03], orientation='vertical') @@ -152,9 +154,9 @@ start_time = timeit.default_timer() pars = { 'algorithm' : GPU_ROF_TV , \ 'input' : u0, - 'regularization_parameter': 1,\ - 'time_marching_parameter': 0.003, \ - 'number_of_iterations':300 + 'regularization_parameter': 25,\ + 'time_marching_parameter': 0.001, \ + 'number_of_iterations':350 } rof_tv = GPU_ROF_TV(pars['input'], @@ -183,6 +185,6 @@ props = dict(boxstyle='round', facecolor='wheat', alpha=0.5) # place a text box in upper left in axes coords a.text(0.05, 0.95, 'rof_tv - u0', transform=a.transAxes, fontsize=14, verticalalignment='top', bbox=props) -imgplot = plt.imshow((rof_tv - u0), cmap="gray") - +imgplot = plt.imshow((rof_tv - u0)**2, vmin=0, vmax=0.03, cmap="gray") +plt.colorbar(ticks=[0, 0.03], orientation='vertical') plt.show() -- cgit v1.2.3 From 0ebf1f949b7150692e356627c455e905b642af97 Mon Sep 17 00:00:00 2001 From: Daniil Kazantsev Date: Sun, 25 Feb 2018 17:54:03 +0000 Subject: updates to CPU/GPU core for ROF and demos --- Core/regularizers_CPU/ROF_TV_core.c | 9 ++++----- Core/regularizers_GPU/TV_ROF/TV_ROF_GPU.cu | 19 +++++++++---------- Wrappers/Python/demo/test_cpu_regularizers.py | 6 +++--- Wrappers/Python/test/test_cpu_vs_gpu_regularizers.py | 6 +++--- Wrappers/Python/test/test_gpu_regularizers.py | 6 +++--- 5 files changed, 22 insertions(+), 24 deletions(-) diff --git a/Core/regularizers_CPU/ROF_TV_core.c b/Core/regularizers_CPU/ROF_TV_core.c index ba7fe48..fd47c3f 100644 --- a/Core/regularizers_CPU/ROF_TV_core.c +++ b/Core/regularizers_CPU/ROF_TV_core.c @@ -268,7 +268,7 @@ float TV_kernel(float *D1, float *D2, float *D3, float *B, float *A, float lambd dv2 = D2[index] - D2[(dimX*dimY)*k + j*dimX+i2]; dv3 = D3[index] - D3[(dimX*dimY)*k2 + j*dimX+i]; - B[index] = B[index] + tau*lambda*(dv1 + dv2 + dv3) + tau*(A[index] - B[index]); + B[index] = B[index] + tau*(2.0f*lambda*(dv1 + dv2 + dv3) - (B[index] - A[index])); }}} } else { @@ -284,10 +284,9 @@ float TV_kernel(float *D1, float *D2, float *D3, float *B, float *A, float lambd /* divergence components */ dv1 = D1[index] - D1[j2*dimX + i]; - dv2 = D2[index] - D2[j*dimX + i2]; - - //B[index] = B[index] + tau*lambda*(dv1 + dv2) + tau*(A[index] - B[index]); - B[index] = B[index] + tau*((dv1 + dv2) - lambda*(B[index] - A[index])); + dv2 = D2[index] - D2[j*dimX + i2]; + + B[index] = B[index] + tau*(2.0f*lambda*(dv1 + dv2) - (B[index] - A[index])); }} } return *B; diff --git a/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU.cu b/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU.cu index 897b5d0..b67b53b 100755 --- a/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU.cu +++ b/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU.cu @@ -54,7 +54,7 @@ limitations under the License. #define BLKXSIZE2D 16 #define BLKYSIZE2D 16 -#define EPS 1.0e-4 +#define EPS 1.0e-5 #define idivup(a, b) ( ((a)%(b) != 0) ? (a)/(b)+1 : (a)/(b) ) @@ -71,7 +71,7 @@ __host__ __device__ int sign (float x) /* differences 1 */ __global__ void D1_func2D(float* Input, float* D1, int N, int M) { - int i1, j1, i2, j2; + int i1, j1, i2; float NOMx_1,NOMy_1,NOMy_0,denom1,denom2,T1; int i = blockDim.x * blockIdx.x + threadIdx.x; int j = blockDim.y * blockIdx.y + threadIdx.y; @@ -84,7 +84,6 @@ __host__ __device__ int sign (float x) i1 = i + 1; if (i1 >= N) i1 = i-1; i2 = i - 1; if (i2 < 0) i2 = i+1; j1 = j + 1; if (j1 >= M) j1 = j-1; - j2 = j - 1; if (j2 < 0) j2 = j+1; /* Forward-backward differences */ NOMx_1 = Input[j1*N + i] - Input[index]; /* x+ */ @@ -102,7 +101,7 @@ __host__ __device__ int sign (float x) /* differences 2 */ __global__ void D2_func2D(float* Input, float* D2, int N, int M) { - int i1, j1, i2, j2; + int i1, j1, j2; float NOMx_1,NOMy_1,NOMx_0,denom1,denom2,T2; int i = blockDim.x * blockIdx.x + threadIdx.x; int j = blockDim.y * blockIdx.y + threadIdx.y; @@ -113,7 +112,6 @@ __host__ __device__ int sign (float x) /* boundary conditions (Neumann reflections) */ i1 = i + 1; if (i1 >= N) i1 = i-1; - i2 = i - 1; if (i2 < 0) i2 = i+1; j1 = j + 1; if (j1 >= M) j1 = j-1; j2 = j - 1; if (j2 < 0) j2 = j+1; @@ -149,7 +147,7 @@ __host__ __device__ int sign (float x) dv1 = D1[index] - D1[j2*N + i]; dv2 = D2[index] - D2[j*N + i2]; - Update[index] = Update[index] + tau*((dv1 + dv2) - lambda*(Update[index] - Input[index])); + Update[index] = Update[index] + tau*(2.0f*lambda*(dv1 + dv2) - (Update[index] - Input[index])); } } @@ -299,21 +297,22 @@ __host__ __device__ int sign (float x) dv2 = D2[index] - D2[(dimX*dimY)*k + j*dimX+i2]; dv3 = D3[index] - D3[(dimX*dimY)*k2 + j*dimX+i]; - Update[index] = Update[index] + tau*lambda*(dv1 + dv2 + dv3) + tau*(Update[index] - Input[index]); + Update[index] = Update[index] + tau*(2.0f*lambda*(dv1 + dv2 + dv3) - (Update[index] - Input[index])); } } ///////////////////////////////////////////////// // HOST FUNCTION -extern "C" void TV_ROF_GPU_kernel(float* Input, float* Output, int N, int M, int Z, int iter, float tau, float lambda) +extern "C" void TV_ROF_GPU(float* Input, float* Output, int N, int M, int Z, int iter, float tau, float lambda) { // set up device int dev = 0; CHECK(cudaSetDevice(dev)); - float *d_input, *d_update, *d_D1, *d_D2; + float *d_input, *d_update, *d_D1, *d_D2; + if (Z == 0) Z = 1; CHECK(cudaMalloc((void**)&d_input,N*M*Z*sizeof(float))); CHECK(cudaMalloc((void**)&d_update,N*M*Z*sizeof(float))); CHECK(cudaMalloc((void**)&d_D1,N*M*Z*sizeof(float))); @@ -346,7 +345,7 @@ extern "C" void TV_ROF_GPU_kernel(float* Input, float* Output, int N, int M, int CHECK(cudaFree(d_D3)); } else { - // TV - 2D case + // TV - 2D case dim3 dimBlock(BLKXSIZE2D,BLKYSIZE2D); dim3 dimGrid(idivup(N,BLKXSIZE2D), idivup(M,BLKYSIZE2D)); diff --git a/Wrappers/Python/demo/test_cpu_regularizers.py b/Wrappers/Python/demo/test_cpu_regularizers.py index d147b85..b08c418 100644 --- a/Wrappers/Python/demo/test_cpu_regularizers.py +++ b/Wrappers/Python/demo/test_cpu_regularizers.py @@ -284,9 +284,9 @@ start_time = timeit.default_timer() pars = {'algorithm': ROF_TV , \ 'input' : u0,\ - 'regularization_parameter':25,\ - 'marching_step': 0.001,\ - 'number_of_iterations': 350 + 'regularization_parameter':0.04,\ + 'marching_step': 0.0025,\ + 'number_of_iterations': 300 } rof = ROF_TV(pars['input'], pars['number_of_iterations'], diff --git a/Wrappers/Python/test/test_cpu_vs_gpu_regularizers.py b/Wrappers/Python/test/test_cpu_vs_gpu_regularizers.py index 8c91c73..d742a7f 100644 --- a/Wrappers/Python/test/test_cpu_vs_gpu_regularizers.py +++ b/Wrappers/Python/test/test_cpu_vs_gpu_regularizers.py @@ -58,8 +58,8 @@ imgplot = plt.imshow(u0,cmap="gray") # set parameters pars = {'algorithm': ROF_TV , \ 'input' : u0,\ - 'regularization_parameter':12,\ - 'time_marching_parameter': 0.001,\ + 'regularization_parameter':0.04,\ + 'time_marching_parameter': 0.0025,\ 'number_of_iterations': 600 } print ("#################ROF TV CPU#####################") @@ -120,4 +120,4 @@ plt.title('{}'.format('Pixels larger threshold difference')) if (diff_im.sum() > 1): print ("Arrays do not match!") else: - print ("Arrays match") \ No newline at end of file + print ("Arrays match") diff --git a/Wrappers/Python/test/test_gpu_regularizers.py b/Wrappers/Python/test/test_gpu_regularizers.py index 29f5bad..c473270 100644 --- a/Wrappers/Python/test/test_gpu_regularizers.py +++ b/Wrappers/Python/test/test_gpu_regularizers.py @@ -154,9 +154,9 @@ start_time = timeit.default_timer() pars = { 'algorithm' : GPU_ROF_TV , \ 'input' : u0, - 'regularization_parameter': 25,\ - 'time_marching_parameter': 0.001, \ - 'number_of_iterations':350 + 'regularization_parameter': 0.04,\ + 'time_marching_parameter': 0.0025, \ + 'number_of_iterations':300 } rof_tv = GPU_ROF_TV(pars['input'], -- cgit v1.2.3 From b9562bbfdf6ca2cc8775e242ae13cbfaeeb2479b Mon Sep 17 00:00:00 2001 From: Daniil Kazantsev Date: Mon, 26 Feb 2018 11:16:25 +0000 Subject: FGP CPU parameters passing simplified, FGP GPU added --- Core/regularizers_CPU/FGP_TV_core.c | 481 +++++++++++--------- Core/regularizers_CPU/FGP_TV_core.h | 51 +-- Core/regularizers_GPU/TV_FGP/FGP_TV_GPU_core.cu | 561 ++++++++++++++++++++++++ Core/regularizers_GPU/TV_FGP/FGP_TV_GPU_core.h | 10 + 4 files changed, 867 insertions(+), 236 deletions(-) create mode 100755 Core/regularizers_GPU/TV_FGP/FGP_TV_GPU_core.cu create mode 100755 Core/regularizers_GPU/TV_FGP/FGP_TV_GPU_core.h diff --git a/Core/regularizers_CPU/FGP_TV_core.c b/Core/regularizers_CPU/FGP_TV_core.c index 03cd445..304848d 100644 --- a/Core/regularizers_CPU/FGP_TV_core.c +++ b/Core/regularizers_CPU/FGP_TV_core.c @@ -22,245 +22,314 @@ limitations under the License. /* C-OMP implementation of FGP-TV [1] denoising/regularization model (2D/3D case) * * Input Parameters: - * 1. Noisy image/volume [REQUIRED] - * 2. lambda - regularization parameter [REQUIRED] - * 3. Number of iterations [OPTIONAL parameter] - * 4. eplsilon: tolerance constant [OPTIONAL parameter] - * 5. TV-type: 'iso' or 'l1' [OPTIONAL parameter] + * 1. Noisy image/volume + * 2. lambda - regularization parameter + * 3. Number of iterations + * 4. eplsilon: tolerance constant + * 5. TV-type: methodTV - 'iso' (0) or 'l1' (1) + * 6. nonneg: 'nonnegativity (0 is OFF by default) + * 7. print information: 0 (off) or 1 (on) * * Output: * [1] Filtered/regularized image - * [2] last function value - * - * Example of image denoising: - * figure; - * Im = double(imread('lena_gray_256.tif'))/255; % loading image - * u0 = Im + .05*randn(size(Im)); % adding noise - * u = FGP_TV(single(u0), 0.05, 100, 1e-04); * * This function is based on the Matlab's code and paper by * [1] Amir Beck and Marc Teboulle, "Fast Gradient-Based Algorithms for Constrained Total Variation Image Denoising and Deblurring Problems" - * - * D. Kazantsev, 2016-17 - * */ - -/* 2D-case related Functions */ -/*****************************************************************/ -float Obj_func_CALC2D(float *A, float *D, float *funcvalA, float lambda, int dimX, int dimY) -{ - int i,j; - float f1, f2, val1, val2; - - /*data-related term */ - f1 = 0.0f; - for(i=0; i 4) break; + + /*storing old values*/ + copyIm(Output, Output_prev, dimX, dimY, 1); + copyIm(P1, P1_prev, dimX, dimY, 1); + copyIm(P2, P2_prev, dimX, dimY, 1); + tk = tkp1; + } + if (printM == 1) printf("FGP-TV iterations stopped at iteration %i \n", ll); + free(Output_prev); free(P1); free(P2); free(P1_prev); free(P2_prev); free(R1); free(R2); + } + else { + /*3D case*/ + float *Output_prev=NULL, *P1=NULL, *P2=NULL, *P3=NULL, *P1_prev=NULL, *P2_prev=NULL, *P3_prev=NULL, *R1=NULL, *R2=NULL, *R3=NULL; + DimTotal = dimX*dimY*dimZ; + + Output_prev = (float *) malloc( DimTotal * sizeof(float) ); + P1 = (float *) malloc( DimTotal * sizeof(float) ); + P2 = (float *) malloc( DimTotal * sizeof(float) ); + P3 = (float *) malloc( DimTotal * sizeof(float) ); + P1_prev = (float *) malloc( DimTotal * sizeof(float) ); + P2_prev = (float *) malloc( DimTotal * sizeof(float) ); + P3_prev = (float *) malloc( DimTotal * sizeof(float) ); + R1 = (float *) malloc( DimTotal * sizeof(float) ); + R2 = (float *) malloc( DimTotal * sizeof(float) ); + R3 = (float *) malloc( DimTotal * sizeof(float) ); + + /* begin iterations */ + for(ll=0; ll 4) break; + + /*storing old values*/ + copyIm(Output, Output_prev, dimX, dimY, dimZ); + copyIm(P1, P1_prev, dimX, dimY, dimZ); + copyIm(P2, P2_prev, dimX, dimY, dimZ); + copyIm(P3, P3_prev, dimX, dimY, dimZ); + tk = tkp1; + } + if (printM == 1) printf("FGP-TV iterations stopped at iteration %i \n", ll); + free(Output_prev); free(P1); free(P2); free(P3); free(P1_prev); free(P2_prev); free(P3_prev); free(R1); free(R2); free(R3); + } + return *Output; } float Obj_func2D(float *A, float *D, float *R1, float *R2, float lambda, int dimX, int dimY) { - float val1, val2; - int i, j; -#pragma omp parallel for shared(A,D,R1,R2) private(i,j,val1,val2) - for (i = 0; i 1) { - P1[(i)*dimY + (j)] = P1[(i)*dimY + (j)] / sqrt(denom); - P2[(i)*dimY + (j)] = P2[(i)*dimY + (j)] / sqrt(denom); - } - } - } - } - else { - /* anisotropic TV*/ -#pragma omp parallel for shared(P1,P2) private(i,j,val1,val2) - for (i = 0; i 1.0f) { + sq_denom = 1.0f/sqrt(denom); + P1[index] = P1[index]*sq_denom; + P2[index] = P2[index]*sq_denom; + } + }} + } + else { + /* anisotropic TV*/ +#pragma omp parallel for shared(P1,P2) private(index,i,j,val1,val2) + for(i=0; i 1.0f) { + sq_denom = 1.0f/sqrt(denom); + P1[index] = P1[index]*sq_denom; + P2[index] = P2[index]*sq_denom; + P3[index] = P3[index]*sq_denom; + } + }}} + } + else { + /* anisotropic TV*/ +#pragma omp parallel for shared(P1,P2,P3) private(index,i,j,k,val1,val2,val3) + for(i=0; i +#include + +/* CUDA implementation of FGP-TV [1] denoising/regularization model (2D/3D case) + * + * Input Parameters: + * 1. Noisy image/volume + * 2. lambda - regularization parameter + * 3. Number of iterations + * 4. eplsilon: tolerance constant + * 5. TV-type: methodTV - 'iso' (0) or 'l1' (1) + * 6. nonneg: 'nonnegativity (0 is OFF by default) + * 7. print information: 0 (off) or 1 (on) + * + * Output: + * [1] Filtered/regularized image + * + * This function is based on the Matlab's code and paper by + * [1] Amir Beck and Marc Teboulle, "Fast Gradient-Based Algorithms for Constrained Total Variation Image Denoising and Deblurring Problems" + */ + +// This will output the proper CUDA error strings in the event that a CUDA host call returns an error +#define checkCudaErrors(err) __checkCudaErrors (err, __FILE__, __LINE__) + +inline void __checkCudaErrors(cudaError err, const char *file, const int line) +{ + if (cudaSuccess != err) + { + fprintf(stderr, "%s(%i) : CUDA Runtime API error %d: %s.\n", + file, line, (int)err, cudaGetErrorString(err)); + exit(EXIT_FAILURE); + } +} + +#define BLKXSIZE2D 16 +#define BLKYSIZE2D 16 + +#define BLKXSIZE 8 +#define BLKYSIZE 8 +#define BLKZSIZE 8 + +#define idivup(a, b) ( ((a)%(b) != 0) ? (a)/(b)+1 : (a)/(b) ) +struct square { __host__ __device__ float operator()(float x) { return x * x; } }; + +/************************************************/ +/*****************2D modules*********************/ +/************************************************/ +__global__ void Obj_func2D_kernel(float *Ad, float *D, float *R1, float *R2, int N, int M, int ImSize, float lambda) +{ + + float val1,val2; + + //calculate each thread global index + const int xIndex=blockIdx.x*blockDim.x+threadIdx.x; + const int yIndex=blockIdx.y*blockDim.y+threadIdx.y; + + int index = xIndex + N*yIndex; + + if ((xIndex < N) && (yIndex < M)) { + if (xIndex <= 0) {val1 = 0.0f;} else {val1 = R1[(xIndex-1) + N*yIndex];} + if (yIndex <= 0) {val2 = 0.0f;} else {val2 = R2[xIndex + N*(yIndex-1)];} + //Write final result to global memory + D[index] = Ad[index] - lambda*(R1[index] + R2[index] - val1 - val2); + } + return; +} + +__global__ void Grad_func2D_kernel(float *P1, float *P2, float *D, float *R1, float *R2, int N, int M, int ImSize, float multip) +{ + + float val1,val2; + + //calculate each thread global index + const int xIndex=blockIdx.x*blockDim.x+threadIdx.x; + const int yIndex=blockIdx.y*blockDim.y+threadIdx.y; + + int index = xIndex + N*yIndex; + + if ((xIndex < N) && (yIndex < M)) { + + /* boundary conditions */ + if (xIndex >= N-1) val1 = 0.0f; else val1 = D[index] - D[(xIndex+1) + N*yIndex]; + if (yIndex >= M-1) val2 = 0.0f; else val2 = D[index] - D[(xIndex) + N*(yIndex + 1)]; + + //Write final result to global memory + P1[index] = R1[index] + multip*val1; + P2[index] = R2[index] + multip*val2; + } + return; +} + +__global__ void Proj_func2D_iso_kernel(float *P1, float *P2, int N, int M, int ImSize) +{ + + float denom; + //calculate each thread global index + const int xIndex=blockIdx.x*blockDim.x+threadIdx.x; + const int yIndex=blockIdx.y*blockDim.y+threadIdx.y; + + int index = xIndex + N*yIndex; + + if ((xIndex < N) && (yIndex < M)) { + denom = pow(P1[index],2) + pow(P2[index],2); + if (denom > 1.0f) { + P1[index] = P1[index]/sqrt(denom); + P2[index] = P2[index]/sqrt(denom); + } + } + return; +} +__global__ void Proj_func2D_aniso_kernel(float *P1, float *P2, int N, int M, int ImSize) +{ + + float val1, val2; + //calculate each thread global index + const int xIndex=blockIdx.x*blockDim.x+threadIdx.x; + const int yIndex=blockIdx.y*blockDim.y+threadIdx.y; + + int index = xIndex + N*yIndex; + + if ((xIndex < N) && (yIndex < M)) { + val1 = abs(P1[index]); + val2 = abs(P2[index]); + if (val1 < 1.0f) {val1 = 1.0f;} + if (val2 < 1.0f) {val2 = 1.0f;} + P1[index] = P1[index]/val1; + P2[index] = P2[index]/val2; + } + return; +} +__global__ void Rupd_func2D_kernel(float *P1, float *P1_old, float *P2, float *P2_old, float *R1, float *R2, float tkp1, float tk, float multip2, int N, int M, int ImSize) +{ + //calculate each thread global index + const int xIndex=blockIdx.x*blockDim.x+threadIdx.x; + const int yIndex=blockIdx.y*blockDim.y+threadIdx.y; + + int index = xIndex + N*yIndex; + + if ((xIndex < N) && (yIndex < M)) { + R1[index] = P1[index] + multip2*(P1[index] - P1_old[index]); + R2[index] = P2[index] + multip2*(P2[index] - P2_old[index]); + } + return; +} +__global__ void nonneg2D_kernel(float* Output, int N, int M, int num_total) +{ + int xIndex = blockDim.x * blockIdx.x + threadIdx.x; + int yIndex = blockDim.y * blockIdx.y + threadIdx.y; + + int index = xIndex + N*yIndex; + + if (index < num_total) { + if (Output[index] < 0.0f) Output[index] = 0.0f; + } +} +__global__ void copy_kernel2D(float *Input, float* Output, int N, int M, int num_total) +{ + int xIndex = blockDim.x * blockIdx.x + threadIdx.x; + int yIndex = blockDim.y * blockIdx.y + threadIdx.y; + + int index = xIndex + N*yIndex; + + if (index < num_total) { + Output[index] = Input[index]; + } +} +__global__ void ResidCalc2D_kernel(float *Input1, float *Input2, float* Output, int N, int M, int num_total) +{ + int xIndex = blockDim.x * blockIdx.x + threadIdx.x; + int yIndex = blockDim.y * blockIdx.y + threadIdx.y; + + int index = xIndex + N*yIndex; + + if (index < num_total) { + Output[index] = Input1[index] - Input2[index]; + } +} +/************************************************/ +/*****************3D modules*********************/ +/************************************************/ +__global__ void Obj_func3D_kernel(float *Ad, float *D, float *R1, float *R2, float *R3, int N, int M, int Z, int ImSize, float lambda) +{ + + float val1,val2,val3; + + //calculate each thread global index + int i = blockDim.x * blockIdx.x + threadIdx.x; + int j = blockDim.y * blockIdx.y + threadIdx.y; + int k = blockDim.z * blockIdx.z + threadIdx.z; + + int index = (N*M)*k + i + N*j; + + if ((i < N) && (j < M) && (k < Z)) { + if (i <= 0) {val1 = 0.0f;} else {val1 = R1[(N*M)*(k) + (i-1) + N*j];} + if (j <= 0) {val2 = 0.0f;} else {val2 = R2[(N*M)*(k) + i + N*(j-1)];} + if (k <= 0) {val3 = 0.0f;} else {val3 = R3[(N*M)*(k-1) + i + N*j];} + //Write final result to global memory + D[index] = Ad[index] - lambda*(R1[index] + R2[index] + R3[index] - val1 - val2 - val3); + } + return; +} + +__global__ void Grad_func3D_kernel(float *P1, float *P2, float *P3, float *D, float *R1, float *R2, float *R3, int N, int M, int Z, int ImSize, float multip) +{ + + float val1,val2,val3; + + //calculate each thread global index + int i = blockDim.x * blockIdx.x + threadIdx.x; + int j = blockDim.y * blockIdx.y + threadIdx.y; + int k = blockDim.z * blockIdx.z + threadIdx.z; + + int index = (N*M)*k + i + N*j; + + if ((i < N) && (j < M) && (k < Z)) { + /* boundary conditions */ + if (i >= N-1) val1 = 0.0f; else val1 = D[index] - D[(N*M)*(k) + (i+1) + N*j]; + if (j >= M-1) val2 = 0.0f; else val2 = D[index] - D[(N*M)*(k) + i + N*(j+1)]; + if (k >= Z-1) val3 = 0.0f; else val3 = D[index] - D[(N*M)*(k+1) + i + N*j]; + + //Write final result to global memory + P1[index] = R1[index] + multip*val1; + P2[index] = R2[index] + multip*val2; + P3[index] = R3[index] + multip*val3; + } + return; +} + +__global__ void Proj_func3D_iso_kernel(float *P1, float *P2, float *P3, int N, int M, int Z, int ImSize) +{ + + float denom,sq_denom; + //calculate each thread global index + int i = blockDim.x * blockIdx.x + threadIdx.x; + int j = blockDim.y * blockIdx.y + threadIdx.y; + int k = blockDim.z * blockIdx.z + threadIdx.z; + + int index = (N*M)*k + i + N*j; + + if ((i < N) && (j < M) && (k < Z)) { + denom = pow(P1[index],2) + pow(P2[index],2) + pow(P3[index],2); + + if (denom > 1.0f) { + sq_denom = 1.0f/sqrt(denom); + P1[index] = P1[index]*sq_denom; + P2[index] = P2[index]*sq_denom; + P3[index] = P3[index]*sq_denom; + } + } + return; +} + +__global__ void Proj_func3D_aniso_kernel(float *P1, float *P2, float *P3, int N, int M, int Z, int ImSize) +{ + + float val1, val2, val3; + //calculate each thread global index + int i = blockDim.x * blockIdx.x + threadIdx.x; + int j = blockDim.y * blockIdx.y + threadIdx.y; + int k = blockDim.z * blockIdx.z + threadIdx.z; + + int index = (N*M)*k + i + N*j; + + if ((i < N) && (j < M) && (k < Z)) { + val1 = abs(P1[index]); + val2 = abs(P2[index]); + val3 = abs(P3[index]); + if (val1 < 1.0f) {val1 = 1.0f;} + if (val2 < 1.0f) {val2 = 1.0f;} + if (val3 < 1.0f) {val3 = 1.0f;} + P1[index] = P1[index]/val1; + P2[index] = P2[index]/val2; + P3[index] = P3[index]/val3; + } + return; +} + + +__global__ void Rupd_func3D_kernel(float *P1, float *P1_old, float *P2, float *P2_old, float *P3, float *P3_old, float *R1, float *R2, float *R3, float tkp1, float tk, float multip2, int N, int M, int Z, int ImSize) +{ + //calculate each thread global index + int i = blockDim.x * blockIdx.x + threadIdx.x; + int j = blockDim.y * blockIdx.y + threadIdx.y; + int k = blockDim.z * blockIdx.z + threadIdx.z; + + int index = (N*M)*k + i + N*j; + + if ((i < N) && (j < M) && (k < Z)) { + R1[index] = P1[index] + multip2*(P1[index] - P1_old[index]); + R2[index] = P2[index] + multip2*(P2[index] - P2_old[index]); + R3[index] = P3[index] + multip2*(P3[index] - P3_old[index]); + } + return; +} + +__global__ void nonneg3D_kernel(float* Output, int N, int M, int Z, int num_total) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + int j = blockDim.y * blockIdx.y + threadIdx.y; + int k = blockDim.z * blockIdx.z + threadIdx.z; + + int index = (N*M)*k + i + N*j; + + if (index < num_total) { + if (Output[index] < 0.0f) Output[index] = 0.0f; + } +} + +__global__ void copy_kernel3D(float *Input, float* Output, int N, int M, int Z, int num_total) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + int j = blockDim.y * blockIdx.y + threadIdx.y; + int k = blockDim.z * blockIdx.z + threadIdx.z; + + int index = (N*M)*k + i + N*j; + + if (index < num_total) { + Output[index] = Input[index]; + } +} +/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/ + +////////////MAIN HOST FUNCTION /////////////// +extern "C" void FGP_TV_GPU(float *Input, float *Output, float lambda, int iter, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ) +{ + int deviceCount = -1; // number of devices + cudaGetDeviceCount(&deviceCount); + if (deviceCount == 0) { + fprintf(stderr, "No CUDA devices found\n"); + return; + } + + int count = 0, i; + float re, multip,multip2; + float tk = 1.0f; + float tkp1=1.0f; + + if (dimZ <= 1) { + /*2D verson*/ + int ImSize = dimX*dimY; + float *d_input, *d_update=NULL, *d_update_prev=NULL, *P1=NULL, *P2=NULL, *P1_prev=NULL, *P2_prev=NULL, *R1=NULL, *R2=NULL; + + dim3 dimBlock(BLKXSIZE2D,BLKYSIZE2D); + dim3 dimGrid(idivup(dimX,BLKXSIZE2D), idivup(dimY,BLKYSIZE2D)); + + /*allocate space for images on device*/ + checkCudaErrors( cudaMalloc((void**)&d_input,ImSize*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&d_update,ImSize*sizeof(float)) ); + if (epsil != 0.0f) checkCudaErrors( cudaMalloc((void**)&d_update_prev,ImSize*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&P1,ImSize*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&P2,ImSize*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&P1_prev,ImSize*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&P2_prev,ImSize*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&R1,ImSize*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&R2,ImSize*sizeof(float)) ); + + checkCudaErrors( cudaMemcpy(d_input,Input,ImSize*sizeof(float),cudaMemcpyHostToDevice)); + cudaMemset(P1, 0, ImSize*sizeof(float)); + cudaMemset(P2, 0, ImSize*sizeof(float)); + cudaMemset(P1_prev, 0, ImSize*sizeof(float)); + cudaMemset(P2_prev, 0, ImSize*sizeof(float)); + cudaMemset(R1, 0, ImSize*sizeof(float)); + cudaMemset(R2, 0, ImSize*sizeof(float)); + + /********************** Run CUDA 2D kernel here ********************/ + multip = (1.0f/(8.0f*lambda)); + + /* The main kernel */ + for (i = 0; i < iter; i++) { + + /* computing the gradient of the objective function */ + Obj_func2D_kernel<<>>(d_input, d_update, R1, R2, dimX, dimY, ImSize, lambda); + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); + + if (nonneg != 0) { + nonneg2D_kernel<<>>(d_update, dimX, dimY, ImSize); + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); } + + /*Taking a step towards minus of the gradient*/ + Grad_func2D_kernel<<>>(P1, P2, d_update, R1, R2, dimX, dimY, ImSize, multip); + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); + + /* projection step */ + if (methodTV == 0) Proj_func2D_iso_kernel<<>>(P1, P2, dimX, dimY, ImSize); /*isotropic TV*/ + else Proj_func2D_aniso_kernel<<>>(P1, P2, dimX, dimY, ImSize); /*anisotropic TV*/ + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); + + tkp1 = (1.0f + sqrt(1.0f + 4.0f*tk*tk))*0.5f; + multip2 = ((tk-1.0f)/tkp1); + + Rupd_func2D_kernel<<>>(P1, P1_prev, P2, P2_prev, R1, R2, tkp1, tk, multip2, dimX, dimY, ImSize); + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); + + if (epsil != 0.0f) { + /* calculate norm - stopping rules using the Thrust library */ + ResidCalc2D_kernel<<>>(d_update, d_update_prev, P1_prev, dimX, dimY, ImSize); + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); + + thrust::device_vector d_vec(P1_prev, P1_prev + ImSize); + float reduction = sqrt(thrust::transform_reduce(d_vec.begin(), d_vec.end(), square(), 0.0f, thrust::plus())); + thrust::device_vector d_vec2(d_update, d_update + ImSize); + float reduction2 = sqrt(thrust::transform_reduce(d_vec2.begin(), d_vec2.end(), square(), 0.0f, thrust::plus())); + + re = (reduction/reduction2); + if (re < epsil) count++; + if (count > 4) break; + + copy_kernel2D<<>>(d_update, d_update_prev, dimX, dimY, ImSize); + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); + } + + copy_kernel2D<<>>(P1, P1_prev, dimX, dimY, ImSize); + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); + + copy_kernel2D<<>>(P2, P2_prev, dimX, dimY, ImSize); + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); + + tk = tkp1; + } + if (printM == 1) printf("FGP-TV iterations stopped at iteration %i \n", i); + /***************************************************************/ + //copy result matrix from device to host memory + cudaMemcpy(Output,d_update,ImSize*sizeof(float),cudaMemcpyDeviceToHost); + + cudaFree(d_input); + cudaFree(d_update); + if (epsil != 0.0f) cudaFree(d_update_prev); + cudaFree(P1); + cudaFree(P2); + cudaFree(P1_prev); + cudaFree(P2_prev); + cudaFree(R1); + cudaFree(R2); + } + else { + /*3D verson*/ + int ImSize = dimX*dimY*dimZ; + float *d_input, *d_update=NULL, *P1=NULL, *P2=NULL, *P3=NULL, *P1_prev=NULL, *P2_prev=NULL, *P3_prev=NULL, *R1=NULL, *R2=NULL, *R3=NULL; + + dim3 dimBlock(BLKXSIZE,BLKYSIZE,BLKZSIZE); + dim3 dimGrid(idivup(dimX,BLKXSIZE), idivup(dimY,BLKYSIZE),idivup(dimZ,BLKZSIZE)); + + /*allocate space for images on device*/ + checkCudaErrors( cudaMalloc((void**)&d_input,ImSize*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&d_update,ImSize*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&P1,ImSize*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&P2,ImSize*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&P3,ImSize*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&P1_prev,ImSize*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&P2_prev,ImSize*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&P3_prev,ImSize*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&R1,ImSize*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&R2,ImSize*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&R3,ImSize*sizeof(float)) ); + + checkCudaErrors( cudaMemcpy(d_input,Input,ImSize*sizeof(float),cudaMemcpyHostToDevice)); + cudaMemset(P1, 0, ImSize*sizeof(float)); + cudaMemset(P2, 0, ImSize*sizeof(float)); + cudaMemset(P3, 0, ImSize*sizeof(float)); + cudaMemset(P1_prev, 0, ImSize*sizeof(float)); + cudaMemset(P2_prev, 0, ImSize*sizeof(float)); + cudaMemset(P3_prev, 0, ImSize*sizeof(float)); + cudaMemset(R1, 0, ImSize*sizeof(float)); + cudaMemset(R2, 0, ImSize*sizeof(float)); + cudaMemset(R3, 0, ImSize*sizeof(float)); + /********************** Run CUDA 3D kernel here ********************/ + multip = (1.0f/(8.0f*lambda)); + + /* The main kernel */ + for (i = 0; i < iter; i++) { + + /* computing the gradient of the objective function */ + Obj_func3D_kernel<<>>(d_input, d_update, R1, R2, R3, dimX, dimY, dimZ, ImSize, lambda); + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); + + if (nonneg != 0) { + nonneg3D_kernel<<>>(d_update, dimX, dimY, dimZ, ImSize); + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); } + + /*Taking a step towards minus of the gradient*/ + Grad_func3D_kernel<<>>(P1, P2, P3, d_update, R1, R2, R3, dimX, dimY, dimZ, ImSize, multip); + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); + + /* projection step */ + if (methodTV == 0) Proj_func3D_iso_kernel<<>>(P1, P2, P3, dimX, dimY, dimZ, ImSize); /* isotropic kernel */ + else Proj_func3D_aniso_kernel<<>>(P1, P2, P3, dimX, dimY, dimZ, ImSize); /* anisotropic kernel */ + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); + + tkp1 = (1.0f + sqrt(1.0f + 4.0f*tk*tk))*0.5f; + multip2 = ((tk-1.0f)/tkp1); + + Rupd_func3D_kernel<<>>(P1, P1_prev, P2, P2_prev, P3, P3_prev, R1, R2, R3, tkp1, tk, multip2, dimX, dimY, dimZ, ImSize); + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); + + copy_kernel3D<<>>(P1, P1_prev, dimX, dimY, dimZ, ImSize); + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); + + copy_kernel3D<<>>(P2, P2_prev, dimX, dimY, dimZ, ImSize); + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); + + copy_kernel3D<<>>(P3, P3_prev, dimX, dimY, dimZ, ImSize); + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); + + tk = tkp1; + } + if (printM == 1) printf("FGP-TV iterations stopped at iteration %i \n", i); + /***************************************************************/ + //copy result matrix from device to host memory + cudaMemcpy(Output,d_update,ImSize*sizeof(float),cudaMemcpyDeviceToHost); + + cudaFree(d_input); + cudaFree(d_update); + cudaFree(P1); + cudaFree(P2); + cudaFree(P3); + cudaFree(P1_prev); + cudaFree(P2_prev); + cudaFree(P3_prev); + cudaFree(R1); + cudaFree(R2); + cudaFree(R3); + } + cudaDeviceReset(); +} diff --git a/Core/regularizers_GPU/TV_FGP/FGP_TV_GPU_core.h b/Core/regularizers_GPU/TV_FGP/FGP_TV_GPU_core.h new file mode 100755 index 0000000..a5d3f73 --- /dev/null +++ b/Core/regularizers_GPU/TV_FGP/FGP_TV_GPU_core.h @@ -0,0 +1,10 @@ +#include +#include +#include + +#ifndef _FGP_TV_GPU_ +#define _FGP_TV_GPU_ + +extern "C" void FGP_TV_GPU(float *Input, float *Output, float lambda, int iter, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ); + +#endif -- cgit v1.2.3 From bcdf186ccdca54a3df383512ad5a117004500a60 Mon Sep 17 00:00:00 2001 From: Daniil Kazantsev Date: Mon, 26 Feb 2018 12:50:58 +0000 Subject: CPU-GPU naming consistency --- Core/regularizers_CPU/FGP_TV_core.c | 2 +- Core/regularizers_CPU/FGP_TV_core.h | 2 +- Core/regularizers_CPU/ROF_TV_core.c | 2 +- Core/regularizers_CPU/ROF_TV_core.h | 2 +- Core/regularizers_GPU/TV_FGP/FGP_TV_GPU_core.cu | 561 --------------------- Core/regularizers_GPU/TV_FGP/FGP_TV_GPU_core.h | 10 - Core/regularizers_GPU/TV_FGP/TV_FGP_GPU_core.cu | 561 +++++++++++++++++++++ Core/regularizers_GPU/TV_FGP/TV_FGP_GPU_core.h | 10 + Core/regularizers_GPU/TV_ROF/TV_ROF_GPU.cu | 369 -------------- Core/regularizers_GPU/TV_ROF/TV_ROF_GPU.h | 8 - Core/regularizers_GPU/TV_ROF/TV_ROF_GPU_core.cu | 369 ++++++++++++++ Core/regularizers_GPU/TV_ROF/TV_ROF_GPU_core.h | 8 + Wrappers/Python/src/cpu_regularizers.cpp | 546 ++++++++++---------- Wrappers/Python/src/gpu_regularizers.pyx | 69 ++- Wrappers/Python/test/test_cpu_vs_gpu.py | 10 + .../Python/test/test_cpu_vs_gpu_regularizers.py | 8 +- 16 files changed, 1305 insertions(+), 1232 deletions(-) delete mode 100755 Core/regularizers_GPU/TV_FGP/FGP_TV_GPU_core.cu delete mode 100755 Core/regularizers_GPU/TV_FGP/FGP_TV_GPU_core.h create mode 100755 Core/regularizers_GPU/TV_FGP/TV_FGP_GPU_core.cu create mode 100755 Core/regularizers_GPU/TV_FGP/TV_FGP_GPU_core.h delete mode 100755 Core/regularizers_GPU/TV_ROF/TV_ROF_GPU.cu delete mode 100755 Core/regularizers_GPU/TV_ROF/TV_ROF_GPU.h create mode 100755 Core/regularizers_GPU/TV_ROF/TV_ROF_GPU_core.cu create mode 100755 Core/regularizers_GPU/TV_ROF/TV_ROF_GPU_core.h create mode 100644 Wrappers/Python/test/test_cpu_vs_gpu.py diff --git a/Core/regularizers_CPU/FGP_TV_core.c b/Core/regularizers_CPU/FGP_TV_core.c index 304848d..2f1439d 100644 --- a/Core/regularizers_CPU/FGP_TV_core.c +++ b/Core/regularizers_CPU/FGP_TV_core.c @@ -37,7 +37,7 @@ limitations under the License. * [1] Amir Beck and Marc Teboulle, "Fast Gradient-Based Algorithms for Constrained Total Variation Image Denoising and Deblurring Problems" */ -float FGP_TV_CPU(float *Input, float *Output, float lambda, int iter, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ) +float TV_FGP_CPU(float *Input, float *Output, float lambda, int iter, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ) { int ll, j, DimTotal; float re, re1; diff --git a/Core/regularizers_CPU/FGP_TV_core.h b/Core/regularizers_CPU/FGP_TV_core.h index b591819..98ceaec 100644 --- a/Core/regularizers_CPU/FGP_TV_core.h +++ b/Core/regularizers_CPU/FGP_TV_core.h @@ -47,7 +47,7 @@ limitations under the License. #ifdef __cplusplus extern "C" { #endif -CCPI_EXPORT float FGP_TV_CPU(float *Input, float *Output, float lambda, int iter, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ); +CCPI_EXPORT float TV_FGP_CPU(float *Input, float *Output, float lambda, int iter, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ); CCPI_EXPORT float Obj_func2D(float *A, float *D, float *R1, float *R2, float lambda, int dimX, int dimY); CCPI_EXPORT float Grad_func2D(float *P1, float *P2, float *D, float *R1, float *R2, float lambda, int dimX, int dimY); diff --git a/Core/regularizers_CPU/ROF_TV_core.c b/Core/regularizers_CPU/ROF_TV_core.c index fd47c3f..b2c6f00 100644 --- a/Core/regularizers_CPU/ROF_TV_core.c +++ b/Core/regularizers_CPU/ROF_TV_core.c @@ -46,7 +46,7 @@ int sign(float x) { */ /* Running iterations of TV-ROF function */ -float TV_ROF(float *Input, float *Output, int dimX, int dimY, int dimZ, int iterationsNumb, float tau, float lambda) +float TV_ROF_CPU(float *Input, float *Output, int dimX, int dimY, int dimZ, int iterationsNumb, float tau, float lambda) { float *D1, *D2, *D3; int i, DimTotal; diff --git a/Core/regularizers_CPU/ROF_TV_core.h b/Core/regularizers_CPU/ROF_TV_core.h index 5d69d27..b32d0d5 100644 --- a/Core/regularizers_CPU/ROF_TV_core.h +++ b/Core/regularizers_CPU/ROF_TV_core.h @@ -47,7 +47,7 @@ limitations under the License. extern "C" { #endif CCPI_EXPORT float TV_kernel(float *D1, float *D2, float *D3, float *B, float *A, float lambda, float tau, int dimY, int dimX, int dimZ); -CCPI_EXPORT float TV_ROF(float *Input, float *Output, int dimX, int dimY, int dimZ, int iterationsNumb, float tau, float lambda); +CCPI_EXPORT float TV_ROF_CPU(float *Input, float *Output, int dimX, int dimY, int dimZ, int iterationsNumb, float tau, float lambda); CCPI_EXPORT float D1_func(float *A, float *D1, int dimY, int dimX, int dimZ); CCPI_EXPORT float D2_func(float *A, float *D2, int dimY, int dimX, int dimZ); CCPI_EXPORT float D3_func(float *A, float *D3, int dimY, int dimX, int dimZ); diff --git a/Core/regularizers_GPU/TV_FGP/FGP_TV_GPU_core.cu b/Core/regularizers_GPU/TV_FGP/FGP_TV_GPU_core.cu deleted file mode 100755 index 21a95c9..0000000 --- a/Core/regularizers_GPU/TV_FGP/FGP_TV_GPU_core.cu +++ /dev/null @@ -1,561 +0,0 @@ - /* -This work is part of the Core Imaging Library developed by -Visual Analytics and Imaging System Group of the Science Technology -Facilities Council, STFC - -Copyright 2017 Daniil Kazantsev -Copyright 2017 Srikanth Nagella, Edoardo Pasca - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at -http://www.apache.org/licenses/LICENSE-2.0 -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -#include "FGP_TV_GPU_core.h" -#include -#include - -/* CUDA implementation of FGP-TV [1] denoising/regularization model (2D/3D case) - * - * Input Parameters: - * 1. Noisy image/volume - * 2. lambda - regularization parameter - * 3. Number of iterations - * 4. eplsilon: tolerance constant - * 5. TV-type: methodTV - 'iso' (0) or 'l1' (1) - * 6. nonneg: 'nonnegativity (0 is OFF by default) - * 7. print information: 0 (off) or 1 (on) - * - * Output: - * [1] Filtered/regularized image - * - * This function is based on the Matlab's code and paper by - * [1] Amir Beck and Marc Teboulle, "Fast Gradient-Based Algorithms for Constrained Total Variation Image Denoising and Deblurring Problems" - */ - -// This will output the proper CUDA error strings in the event that a CUDA host call returns an error -#define checkCudaErrors(err) __checkCudaErrors (err, __FILE__, __LINE__) - -inline void __checkCudaErrors(cudaError err, const char *file, const int line) -{ - if (cudaSuccess != err) - { - fprintf(stderr, "%s(%i) : CUDA Runtime API error %d: %s.\n", - file, line, (int)err, cudaGetErrorString(err)); - exit(EXIT_FAILURE); - } -} - -#define BLKXSIZE2D 16 -#define BLKYSIZE2D 16 - -#define BLKXSIZE 8 -#define BLKYSIZE 8 -#define BLKZSIZE 8 - -#define idivup(a, b) ( ((a)%(b) != 0) ? (a)/(b)+1 : (a)/(b) ) -struct square { __host__ __device__ float operator()(float x) { return x * x; } }; - -/************************************************/ -/*****************2D modules*********************/ -/************************************************/ -__global__ void Obj_func2D_kernel(float *Ad, float *D, float *R1, float *R2, int N, int M, int ImSize, float lambda) -{ - - float val1,val2; - - //calculate each thread global index - const int xIndex=blockIdx.x*blockDim.x+threadIdx.x; - const int yIndex=blockIdx.y*blockDim.y+threadIdx.y; - - int index = xIndex + N*yIndex; - - if ((xIndex < N) && (yIndex < M)) { - if (xIndex <= 0) {val1 = 0.0f;} else {val1 = R1[(xIndex-1) + N*yIndex];} - if (yIndex <= 0) {val2 = 0.0f;} else {val2 = R2[xIndex + N*(yIndex-1)];} - //Write final result to global memory - D[index] = Ad[index] - lambda*(R1[index] + R2[index] - val1 - val2); - } - return; -} - -__global__ void Grad_func2D_kernel(float *P1, float *P2, float *D, float *R1, float *R2, int N, int M, int ImSize, float multip) -{ - - float val1,val2; - - //calculate each thread global index - const int xIndex=blockIdx.x*blockDim.x+threadIdx.x; - const int yIndex=blockIdx.y*blockDim.y+threadIdx.y; - - int index = xIndex + N*yIndex; - - if ((xIndex < N) && (yIndex < M)) { - - /* boundary conditions */ - if (xIndex >= N-1) val1 = 0.0f; else val1 = D[index] - D[(xIndex+1) + N*yIndex]; - if (yIndex >= M-1) val2 = 0.0f; else val2 = D[index] - D[(xIndex) + N*(yIndex + 1)]; - - //Write final result to global memory - P1[index] = R1[index] + multip*val1; - P2[index] = R2[index] + multip*val2; - } - return; -} - -__global__ void Proj_func2D_iso_kernel(float *P1, float *P2, int N, int M, int ImSize) -{ - - float denom; - //calculate each thread global index - const int xIndex=blockIdx.x*blockDim.x+threadIdx.x; - const int yIndex=blockIdx.y*blockDim.y+threadIdx.y; - - int index = xIndex + N*yIndex; - - if ((xIndex < N) && (yIndex < M)) { - denom = pow(P1[index],2) + pow(P2[index],2); - if (denom > 1.0f) { - P1[index] = P1[index]/sqrt(denom); - P2[index] = P2[index]/sqrt(denom); - } - } - return; -} -__global__ void Proj_func2D_aniso_kernel(float *P1, float *P2, int N, int M, int ImSize) -{ - - float val1, val2; - //calculate each thread global index - const int xIndex=blockIdx.x*blockDim.x+threadIdx.x; - const int yIndex=blockIdx.y*blockDim.y+threadIdx.y; - - int index = xIndex + N*yIndex; - - if ((xIndex < N) && (yIndex < M)) { - val1 = abs(P1[index]); - val2 = abs(P2[index]); - if (val1 < 1.0f) {val1 = 1.0f;} - if (val2 < 1.0f) {val2 = 1.0f;} - P1[index] = P1[index]/val1; - P2[index] = P2[index]/val2; - } - return; -} -__global__ void Rupd_func2D_kernel(float *P1, float *P1_old, float *P2, float *P2_old, float *R1, float *R2, float tkp1, float tk, float multip2, int N, int M, int ImSize) -{ - //calculate each thread global index - const int xIndex=blockIdx.x*blockDim.x+threadIdx.x; - const int yIndex=blockIdx.y*blockDim.y+threadIdx.y; - - int index = xIndex + N*yIndex; - - if ((xIndex < N) && (yIndex < M)) { - R1[index] = P1[index] + multip2*(P1[index] - P1_old[index]); - R2[index] = P2[index] + multip2*(P2[index] - P2_old[index]); - } - return; -} -__global__ void nonneg2D_kernel(float* Output, int N, int M, int num_total) -{ - int xIndex = blockDim.x * blockIdx.x + threadIdx.x; - int yIndex = blockDim.y * blockIdx.y + threadIdx.y; - - int index = xIndex + N*yIndex; - - if (index < num_total) { - if (Output[index] < 0.0f) Output[index] = 0.0f; - } -} -__global__ void copy_kernel2D(float *Input, float* Output, int N, int M, int num_total) -{ - int xIndex = blockDim.x * blockIdx.x + threadIdx.x; - int yIndex = blockDim.y * blockIdx.y + threadIdx.y; - - int index = xIndex + N*yIndex; - - if (index < num_total) { - Output[index] = Input[index]; - } -} -__global__ void ResidCalc2D_kernel(float *Input1, float *Input2, float* Output, int N, int M, int num_total) -{ - int xIndex = blockDim.x * blockIdx.x + threadIdx.x; - int yIndex = blockDim.y * blockIdx.y + threadIdx.y; - - int index = xIndex + N*yIndex; - - if (index < num_total) { - Output[index] = Input1[index] - Input2[index]; - } -} -/************************************************/ -/*****************3D modules*********************/ -/************************************************/ -__global__ void Obj_func3D_kernel(float *Ad, float *D, float *R1, float *R2, float *R3, int N, int M, int Z, int ImSize, float lambda) -{ - - float val1,val2,val3; - - //calculate each thread global index - int i = blockDim.x * blockIdx.x + threadIdx.x; - int j = blockDim.y * blockIdx.y + threadIdx.y; - int k = blockDim.z * blockIdx.z + threadIdx.z; - - int index = (N*M)*k + i + N*j; - - if ((i < N) && (j < M) && (k < Z)) { - if (i <= 0) {val1 = 0.0f;} else {val1 = R1[(N*M)*(k) + (i-1) + N*j];} - if (j <= 0) {val2 = 0.0f;} else {val2 = R2[(N*M)*(k) + i + N*(j-1)];} - if (k <= 0) {val3 = 0.0f;} else {val3 = R3[(N*M)*(k-1) + i + N*j];} - //Write final result to global memory - D[index] = Ad[index] - lambda*(R1[index] + R2[index] + R3[index] - val1 - val2 - val3); - } - return; -} - -__global__ void Grad_func3D_kernel(float *P1, float *P2, float *P3, float *D, float *R1, float *R2, float *R3, int N, int M, int Z, int ImSize, float multip) -{ - - float val1,val2,val3; - - //calculate each thread global index - int i = blockDim.x * blockIdx.x + threadIdx.x; - int j = blockDim.y * blockIdx.y + threadIdx.y; - int k = blockDim.z * blockIdx.z + threadIdx.z; - - int index = (N*M)*k + i + N*j; - - if ((i < N) && (j < M) && (k < Z)) { - /* boundary conditions */ - if (i >= N-1) val1 = 0.0f; else val1 = D[index] - D[(N*M)*(k) + (i+1) + N*j]; - if (j >= M-1) val2 = 0.0f; else val2 = D[index] - D[(N*M)*(k) + i + N*(j+1)]; - if (k >= Z-1) val3 = 0.0f; else val3 = D[index] - D[(N*M)*(k+1) + i + N*j]; - - //Write final result to global memory - P1[index] = R1[index] + multip*val1; - P2[index] = R2[index] + multip*val2; - P3[index] = R3[index] + multip*val3; - } - return; -} - -__global__ void Proj_func3D_iso_kernel(float *P1, float *P2, float *P3, int N, int M, int Z, int ImSize) -{ - - float denom,sq_denom; - //calculate each thread global index - int i = blockDim.x * blockIdx.x + threadIdx.x; - int j = blockDim.y * blockIdx.y + threadIdx.y; - int k = blockDim.z * blockIdx.z + threadIdx.z; - - int index = (N*M)*k + i + N*j; - - if ((i < N) && (j < M) && (k < Z)) { - denom = pow(P1[index],2) + pow(P2[index],2) + pow(P3[index],2); - - if (denom > 1.0f) { - sq_denom = 1.0f/sqrt(denom); - P1[index] = P1[index]*sq_denom; - P2[index] = P2[index]*sq_denom; - P3[index] = P3[index]*sq_denom; - } - } - return; -} - -__global__ void Proj_func3D_aniso_kernel(float *P1, float *P2, float *P3, int N, int M, int Z, int ImSize) -{ - - float val1, val2, val3; - //calculate each thread global index - int i = blockDim.x * blockIdx.x + threadIdx.x; - int j = blockDim.y * blockIdx.y + threadIdx.y; - int k = blockDim.z * blockIdx.z + threadIdx.z; - - int index = (N*M)*k + i + N*j; - - if ((i < N) && (j < M) && (k < Z)) { - val1 = abs(P1[index]); - val2 = abs(P2[index]); - val3 = abs(P3[index]); - if (val1 < 1.0f) {val1 = 1.0f;} - if (val2 < 1.0f) {val2 = 1.0f;} - if (val3 < 1.0f) {val3 = 1.0f;} - P1[index] = P1[index]/val1; - P2[index] = P2[index]/val2; - P3[index] = P3[index]/val3; - } - return; -} - - -__global__ void Rupd_func3D_kernel(float *P1, float *P1_old, float *P2, float *P2_old, float *P3, float *P3_old, float *R1, float *R2, float *R3, float tkp1, float tk, float multip2, int N, int M, int Z, int ImSize) -{ - //calculate each thread global index - int i = blockDim.x * blockIdx.x + threadIdx.x; - int j = blockDim.y * blockIdx.y + threadIdx.y; - int k = blockDim.z * blockIdx.z + threadIdx.z; - - int index = (N*M)*k + i + N*j; - - if ((i < N) && (j < M) && (k < Z)) { - R1[index] = P1[index] + multip2*(P1[index] - P1_old[index]); - R2[index] = P2[index] + multip2*(P2[index] - P2_old[index]); - R3[index] = P3[index] + multip2*(P3[index] - P3_old[index]); - } - return; -} - -__global__ void nonneg3D_kernel(float* Output, int N, int M, int Z, int num_total) -{ - int i = blockDim.x * blockIdx.x + threadIdx.x; - int j = blockDim.y * blockIdx.y + threadIdx.y; - int k = blockDim.z * blockIdx.z + threadIdx.z; - - int index = (N*M)*k + i + N*j; - - if (index < num_total) { - if (Output[index] < 0.0f) Output[index] = 0.0f; - } -} - -__global__ void copy_kernel3D(float *Input, float* Output, int N, int M, int Z, int num_total) -{ - int i = blockDim.x * blockIdx.x + threadIdx.x; - int j = blockDim.y * blockIdx.y + threadIdx.y; - int k = blockDim.z * blockIdx.z + threadIdx.z; - - int index = (N*M)*k + i + N*j; - - if (index < num_total) { - Output[index] = Input[index]; - } -} -/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/ - -////////////MAIN HOST FUNCTION /////////////// -extern "C" void FGP_TV_GPU(float *Input, float *Output, float lambda, int iter, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ) -{ - int deviceCount = -1; // number of devices - cudaGetDeviceCount(&deviceCount); - if (deviceCount == 0) { - fprintf(stderr, "No CUDA devices found\n"); - return; - } - - int count = 0, i; - float re, multip,multip2; - float tk = 1.0f; - float tkp1=1.0f; - - if (dimZ <= 1) { - /*2D verson*/ - int ImSize = dimX*dimY; - float *d_input, *d_update=NULL, *d_update_prev=NULL, *P1=NULL, *P2=NULL, *P1_prev=NULL, *P2_prev=NULL, *R1=NULL, *R2=NULL; - - dim3 dimBlock(BLKXSIZE2D,BLKYSIZE2D); - dim3 dimGrid(idivup(dimX,BLKXSIZE2D), idivup(dimY,BLKYSIZE2D)); - - /*allocate space for images on device*/ - checkCudaErrors( cudaMalloc((void**)&d_input,ImSize*sizeof(float)) ); - checkCudaErrors( cudaMalloc((void**)&d_update,ImSize*sizeof(float)) ); - if (epsil != 0.0f) checkCudaErrors( cudaMalloc((void**)&d_update_prev,ImSize*sizeof(float)) ); - checkCudaErrors( cudaMalloc((void**)&P1,ImSize*sizeof(float)) ); - checkCudaErrors( cudaMalloc((void**)&P2,ImSize*sizeof(float)) ); - checkCudaErrors( cudaMalloc((void**)&P1_prev,ImSize*sizeof(float)) ); - checkCudaErrors( cudaMalloc((void**)&P2_prev,ImSize*sizeof(float)) ); - checkCudaErrors( cudaMalloc((void**)&R1,ImSize*sizeof(float)) ); - checkCudaErrors( cudaMalloc((void**)&R2,ImSize*sizeof(float)) ); - - checkCudaErrors( cudaMemcpy(d_input,Input,ImSize*sizeof(float),cudaMemcpyHostToDevice)); - cudaMemset(P1, 0, ImSize*sizeof(float)); - cudaMemset(P2, 0, ImSize*sizeof(float)); - cudaMemset(P1_prev, 0, ImSize*sizeof(float)); - cudaMemset(P2_prev, 0, ImSize*sizeof(float)); - cudaMemset(R1, 0, ImSize*sizeof(float)); - cudaMemset(R2, 0, ImSize*sizeof(float)); - - /********************** Run CUDA 2D kernel here ********************/ - multip = (1.0f/(8.0f*lambda)); - - /* The main kernel */ - for (i = 0; i < iter; i++) { - - /* computing the gradient of the objective function */ - Obj_func2D_kernel<<>>(d_input, d_update, R1, R2, dimX, dimY, ImSize, lambda); - checkCudaErrors( cudaDeviceSynchronize() ); - checkCudaErrors(cudaPeekAtLastError() ); - - if (nonneg != 0) { - nonneg2D_kernel<<>>(d_update, dimX, dimY, ImSize); - checkCudaErrors( cudaDeviceSynchronize() ); - checkCudaErrors(cudaPeekAtLastError() ); } - - /*Taking a step towards minus of the gradient*/ - Grad_func2D_kernel<<>>(P1, P2, d_update, R1, R2, dimX, dimY, ImSize, multip); - checkCudaErrors( cudaDeviceSynchronize() ); - checkCudaErrors(cudaPeekAtLastError() ); - - /* projection step */ - if (methodTV == 0) Proj_func2D_iso_kernel<<>>(P1, P2, dimX, dimY, ImSize); /*isotropic TV*/ - else Proj_func2D_aniso_kernel<<>>(P1, P2, dimX, dimY, ImSize); /*anisotropic TV*/ - checkCudaErrors( cudaDeviceSynchronize() ); - checkCudaErrors(cudaPeekAtLastError() ); - - tkp1 = (1.0f + sqrt(1.0f + 4.0f*tk*tk))*0.5f; - multip2 = ((tk-1.0f)/tkp1); - - Rupd_func2D_kernel<<>>(P1, P1_prev, P2, P2_prev, R1, R2, tkp1, tk, multip2, dimX, dimY, ImSize); - checkCudaErrors( cudaDeviceSynchronize() ); - checkCudaErrors(cudaPeekAtLastError() ); - - if (epsil != 0.0f) { - /* calculate norm - stopping rules using the Thrust library */ - ResidCalc2D_kernel<<>>(d_update, d_update_prev, P1_prev, dimX, dimY, ImSize); - checkCudaErrors( cudaDeviceSynchronize() ); - checkCudaErrors(cudaPeekAtLastError() ); - - thrust::device_vector d_vec(P1_prev, P1_prev + ImSize); - float reduction = sqrt(thrust::transform_reduce(d_vec.begin(), d_vec.end(), square(), 0.0f, thrust::plus())); - thrust::device_vector d_vec2(d_update, d_update + ImSize); - float reduction2 = sqrt(thrust::transform_reduce(d_vec2.begin(), d_vec2.end(), square(), 0.0f, thrust::plus())); - - re = (reduction/reduction2); - if (re < epsil) count++; - if (count > 4) break; - - copy_kernel2D<<>>(d_update, d_update_prev, dimX, dimY, ImSize); - checkCudaErrors( cudaDeviceSynchronize() ); - checkCudaErrors(cudaPeekAtLastError() ); - } - - copy_kernel2D<<>>(P1, P1_prev, dimX, dimY, ImSize); - checkCudaErrors( cudaDeviceSynchronize() ); - checkCudaErrors(cudaPeekAtLastError() ); - - copy_kernel2D<<>>(P2, P2_prev, dimX, dimY, ImSize); - checkCudaErrors( cudaDeviceSynchronize() ); - checkCudaErrors(cudaPeekAtLastError() ); - - tk = tkp1; - } - if (printM == 1) printf("FGP-TV iterations stopped at iteration %i \n", i); - /***************************************************************/ - //copy result matrix from device to host memory - cudaMemcpy(Output,d_update,ImSize*sizeof(float),cudaMemcpyDeviceToHost); - - cudaFree(d_input); - cudaFree(d_update); - if (epsil != 0.0f) cudaFree(d_update_prev); - cudaFree(P1); - cudaFree(P2); - cudaFree(P1_prev); - cudaFree(P2_prev); - cudaFree(R1); - cudaFree(R2); - } - else { - /*3D verson*/ - int ImSize = dimX*dimY*dimZ; - float *d_input, *d_update=NULL, *P1=NULL, *P2=NULL, *P3=NULL, *P1_prev=NULL, *P2_prev=NULL, *P3_prev=NULL, *R1=NULL, *R2=NULL, *R3=NULL; - - dim3 dimBlock(BLKXSIZE,BLKYSIZE,BLKZSIZE); - dim3 dimGrid(idivup(dimX,BLKXSIZE), idivup(dimY,BLKYSIZE),idivup(dimZ,BLKZSIZE)); - - /*allocate space for images on device*/ - checkCudaErrors( cudaMalloc((void**)&d_input,ImSize*sizeof(float)) ); - checkCudaErrors( cudaMalloc((void**)&d_update,ImSize*sizeof(float)) ); - checkCudaErrors( cudaMalloc((void**)&P1,ImSize*sizeof(float)) ); - checkCudaErrors( cudaMalloc((void**)&P2,ImSize*sizeof(float)) ); - checkCudaErrors( cudaMalloc((void**)&P3,ImSize*sizeof(float)) ); - checkCudaErrors( cudaMalloc((void**)&P1_prev,ImSize*sizeof(float)) ); - checkCudaErrors( cudaMalloc((void**)&P2_prev,ImSize*sizeof(float)) ); - checkCudaErrors( cudaMalloc((void**)&P3_prev,ImSize*sizeof(float)) ); - checkCudaErrors( cudaMalloc((void**)&R1,ImSize*sizeof(float)) ); - checkCudaErrors( cudaMalloc((void**)&R2,ImSize*sizeof(float)) ); - checkCudaErrors( cudaMalloc((void**)&R3,ImSize*sizeof(float)) ); - - checkCudaErrors( cudaMemcpy(d_input,Input,ImSize*sizeof(float),cudaMemcpyHostToDevice)); - cudaMemset(P1, 0, ImSize*sizeof(float)); - cudaMemset(P2, 0, ImSize*sizeof(float)); - cudaMemset(P3, 0, ImSize*sizeof(float)); - cudaMemset(P1_prev, 0, ImSize*sizeof(float)); - cudaMemset(P2_prev, 0, ImSize*sizeof(float)); - cudaMemset(P3_prev, 0, ImSize*sizeof(float)); - cudaMemset(R1, 0, ImSize*sizeof(float)); - cudaMemset(R2, 0, ImSize*sizeof(float)); - cudaMemset(R3, 0, ImSize*sizeof(float)); - /********************** Run CUDA 3D kernel here ********************/ - multip = (1.0f/(8.0f*lambda)); - - /* The main kernel */ - for (i = 0; i < iter; i++) { - - /* computing the gradient of the objective function */ - Obj_func3D_kernel<<>>(d_input, d_update, R1, R2, R3, dimX, dimY, dimZ, ImSize, lambda); - checkCudaErrors( cudaDeviceSynchronize() ); - checkCudaErrors(cudaPeekAtLastError() ); - - if (nonneg != 0) { - nonneg3D_kernel<<>>(d_update, dimX, dimY, dimZ, ImSize); - checkCudaErrors( cudaDeviceSynchronize() ); - checkCudaErrors(cudaPeekAtLastError() ); } - - /*Taking a step towards minus of the gradient*/ - Grad_func3D_kernel<<>>(P1, P2, P3, d_update, R1, R2, R3, dimX, dimY, dimZ, ImSize, multip); - checkCudaErrors( cudaDeviceSynchronize() ); - checkCudaErrors(cudaPeekAtLastError() ); - - /* projection step */ - if (methodTV == 0) Proj_func3D_iso_kernel<<>>(P1, P2, P3, dimX, dimY, dimZ, ImSize); /* isotropic kernel */ - else Proj_func3D_aniso_kernel<<>>(P1, P2, P3, dimX, dimY, dimZ, ImSize); /* anisotropic kernel */ - checkCudaErrors( cudaDeviceSynchronize() ); - checkCudaErrors(cudaPeekAtLastError() ); - - tkp1 = (1.0f + sqrt(1.0f + 4.0f*tk*tk))*0.5f; - multip2 = ((tk-1.0f)/tkp1); - - Rupd_func3D_kernel<<>>(P1, P1_prev, P2, P2_prev, P3, P3_prev, R1, R2, R3, tkp1, tk, multip2, dimX, dimY, dimZ, ImSize); - checkCudaErrors( cudaDeviceSynchronize() ); - checkCudaErrors(cudaPeekAtLastError() ); - - copy_kernel3D<<>>(P1, P1_prev, dimX, dimY, dimZ, ImSize); - checkCudaErrors( cudaDeviceSynchronize() ); - checkCudaErrors(cudaPeekAtLastError() ); - - copy_kernel3D<<>>(P2, P2_prev, dimX, dimY, dimZ, ImSize); - checkCudaErrors( cudaDeviceSynchronize() ); - checkCudaErrors(cudaPeekAtLastError() ); - - copy_kernel3D<<>>(P3, P3_prev, dimX, dimY, dimZ, ImSize); - checkCudaErrors( cudaDeviceSynchronize() ); - checkCudaErrors(cudaPeekAtLastError() ); - - tk = tkp1; - } - if (printM == 1) printf("FGP-TV iterations stopped at iteration %i \n", i); - /***************************************************************/ - //copy result matrix from device to host memory - cudaMemcpy(Output,d_update,ImSize*sizeof(float),cudaMemcpyDeviceToHost); - - cudaFree(d_input); - cudaFree(d_update); - cudaFree(P1); - cudaFree(P2); - cudaFree(P3); - cudaFree(P1_prev); - cudaFree(P2_prev); - cudaFree(P3_prev); - cudaFree(R1); - cudaFree(R2); - cudaFree(R3); - } - cudaDeviceReset(); -} diff --git a/Core/regularizers_GPU/TV_FGP/FGP_TV_GPU_core.h b/Core/regularizers_GPU/TV_FGP/FGP_TV_GPU_core.h deleted file mode 100755 index a5d3f73..0000000 --- a/Core/regularizers_GPU/TV_FGP/FGP_TV_GPU_core.h +++ /dev/null @@ -1,10 +0,0 @@ -#include -#include -#include - -#ifndef _FGP_TV_GPU_ -#define _FGP_TV_GPU_ - -extern "C" void FGP_TV_GPU(float *Input, float *Output, float lambda, int iter, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ); - -#endif diff --git a/Core/regularizers_GPU/TV_FGP/TV_FGP_GPU_core.cu b/Core/regularizers_GPU/TV_FGP/TV_FGP_GPU_core.cu new file mode 100755 index 0000000..0533a85 --- /dev/null +++ b/Core/regularizers_GPU/TV_FGP/TV_FGP_GPU_core.cu @@ -0,0 +1,561 @@ + /* +This work is part of the Core Imaging Library developed by +Visual Analytics and Imaging System Group of the Science Technology +Facilities Council, STFC + +Copyright 2017 Daniil Kazantsev +Copyright 2017 Srikanth Nagella, Edoardo Pasca + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at +http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +#include "TV_FGP_GPU_core.h" +#include +#include + +/* CUDA implementation of FGP-TV [1] denoising/regularization model (2D/3D case) + * + * Input Parameters: + * 1. Noisy image/volume + * 2. lambda - regularization parameter + * 3. Number of iterations + * 4. eplsilon: tolerance constant + * 5. TV-type: methodTV - 'iso' (0) or 'l1' (1) + * 6. nonneg: 'nonnegativity (0 is OFF by default) + * 7. print information: 0 (off) or 1 (on) + * + * Output: + * [1] Filtered/regularized image + * + * This function is based on the Matlab's code and paper by + * [1] Amir Beck and Marc Teboulle, "Fast Gradient-Based Algorithms for Constrained Total Variation Image Denoising and Deblurring Problems" + */ + +// This will output the proper CUDA error strings in the event that a CUDA host call returns an error +#define checkCudaErrors(err) __checkCudaErrors (err, __FILE__, __LINE__) + +inline void __checkCudaErrors(cudaError err, const char *file, const int line) +{ + if (cudaSuccess != err) + { + fprintf(stderr, "%s(%i) : CUDA Runtime API error %d: %s.\n", + file, line, (int)err, cudaGetErrorString(err)); + exit(EXIT_FAILURE); + } +} + +#define BLKXSIZE2D 16 +#define BLKYSIZE2D 16 + +#define BLKXSIZE 8 +#define BLKYSIZE 8 +#define BLKZSIZE 8 + +#define idivup(a, b) ( ((a)%(b) != 0) ? (a)/(b)+1 : (a)/(b) ) +struct square { __host__ __device__ float operator()(float x) { return x * x; } }; + +/************************************************/ +/*****************2D modules*********************/ +/************************************************/ +__global__ void Obj_func2D_kernel(float *Ad, float *D, float *R1, float *R2, int N, int M, int ImSize, float lambda) +{ + + float val1,val2; + + //calculate each thread global index + const int xIndex=blockIdx.x*blockDim.x+threadIdx.x; + const int yIndex=blockIdx.y*blockDim.y+threadIdx.y; + + int index = xIndex + N*yIndex; + + if ((xIndex < N) && (yIndex < M)) { + if (xIndex <= 0) {val1 = 0.0f;} else {val1 = R1[(xIndex-1) + N*yIndex];} + if (yIndex <= 0) {val2 = 0.0f;} else {val2 = R2[xIndex + N*(yIndex-1)];} + //Write final result to global memory + D[index] = Ad[index] - lambda*(R1[index] + R2[index] - val1 - val2); + } + return; +} + +__global__ void Grad_func2D_kernel(float *P1, float *P2, float *D, float *R1, float *R2, int N, int M, int ImSize, float multip) +{ + + float val1,val2; + + //calculate each thread global index + const int xIndex=blockIdx.x*blockDim.x+threadIdx.x; + const int yIndex=blockIdx.y*blockDim.y+threadIdx.y; + + int index = xIndex + N*yIndex; + + if ((xIndex < N) && (yIndex < M)) { + + /* boundary conditions */ + if (xIndex >= N-1) val1 = 0.0f; else val1 = D[index] - D[(xIndex+1) + N*yIndex]; + if (yIndex >= M-1) val2 = 0.0f; else val2 = D[index] - D[(xIndex) + N*(yIndex + 1)]; + + //Write final result to global memory + P1[index] = R1[index] + multip*val1; + P2[index] = R2[index] + multip*val2; + } + return; +} + +__global__ void Proj_func2D_iso_kernel(float *P1, float *P2, int N, int M, int ImSize) +{ + + float denom; + //calculate each thread global index + const int xIndex=blockIdx.x*blockDim.x+threadIdx.x; + const int yIndex=blockIdx.y*blockDim.y+threadIdx.y; + + int index = xIndex + N*yIndex; + + if ((xIndex < N) && (yIndex < M)) { + denom = pow(P1[index],2) + pow(P2[index],2); + if (denom > 1.0f) { + P1[index] = P1[index]/sqrt(denom); + P2[index] = P2[index]/sqrt(denom); + } + } + return; +} +__global__ void Proj_func2D_aniso_kernel(float *P1, float *P2, int N, int M, int ImSize) +{ + + float val1, val2; + //calculate each thread global index + const int xIndex=blockIdx.x*blockDim.x+threadIdx.x; + const int yIndex=blockIdx.y*blockDim.y+threadIdx.y; + + int index = xIndex + N*yIndex; + + if ((xIndex < N) && (yIndex < M)) { + val1 = abs(P1[index]); + val2 = abs(P2[index]); + if (val1 < 1.0f) {val1 = 1.0f;} + if (val2 < 1.0f) {val2 = 1.0f;} + P1[index] = P1[index]/val1; + P2[index] = P2[index]/val2; + } + return; +} +__global__ void Rupd_func2D_kernel(float *P1, float *P1_old, float *P2, float *P2_old, float *R1, float *R2, float tkp1, float tk, float multip2, int N, int M, int ImSize) +{ + //calculate each thread global index + const int xIndex=blockIdx.x*blockDim.x+threadIdx.x; + const int yIndex=blockIdx.y*blockDim.y+threadIdx.y; + + int index = xIndex + N*yIndex; + + if ((xIndex < N) && (yIndex < M)) { + R1[index] = P1[index] + multip2*(P1[index] - P1_old[index]); + R2[index] = P2[index] + multip2*(P2[index] - P2_old[index]); + } + return; +} +__global__ void nonneg2D_kernel(float* Output, int N, int M, int num_total) +{ + int xIndex = blockDim.x * blockIdx.x + threadIdx.x; + int yIndex = blockDim.y * blockIdx.y + threadIdx.y; + + int index = xIndex + N*yIndex; + + if (index < num_total) { + if (Output[index] < 0.0f) Output[index] = 0.0f; + } +} +__global__ void copy_kernel2D(float *Input, float* Output, int N, int M, int num_total) +{ + int xIndex = blockDim.x * blockIdx.x + threadIdx.x; + int yIndex = blockDim.y * blockIdx.y + threadIdx.y; + + int index = xIndex + N*yIndex; + + if (index < num_total) { + Output[index] = Input[index]; + } +} +__global__ void ResidCalc2D_kernel(float *Input1, float *Input2, float* Output, int N, int M, int num_total) +{ + int xIndex = blockDim.x * blockIdx.x + threadIdx.x; + int yIndex = blockDim.y * blockIdx.y + threadIdx.y; + + int index = xIndex + N*yIndex; + + if (index < num_total) { + Output[index] = Input1[index] - Input2[index]; + } +} +/************************************************/ +/*****************3D modules*********************/ +/************************************************/ +__global__ void Obj_func3D_kernel(float *Ad, float *D, float *R1, float *R2, float *R3, int N, int M, int Z, int ImSize, float lambda) +{ + + float val1,val2,val3; + + //calculate each thread global index + int i = blockDim.x * blockIdx.x + threadIdx.x; + int j = blockDim.y * blockIdx.y + threadIdx.y; + int k = blockDim.z * blockIdx.z + threadIdx.z; + + int index = (N*M)*k + i + N*j; + + if ((i < N) && (j < M) && (k < Z)) { + if (i <= 0) {val1 = 0.0f;} else {val1 = R1[(N*M)*(k) + (i-1) + N*j];} + if (j <= 0) {val2 = 0.0f;} else {val2 = R2[(N*M)*(k) + i + N*(j-1)];} + if (k <= 0) {val3 = 0.0f;} else {val3 = R3[(N*M)*(k-1) + i + N*j];} + //Write final result to global memory + D[index] = Ad[index] - lambda*(R1[index] + R2[index] + R3[index] - val1 - val2 - val3); + } + return; +} + +__global__ void Grad_func3D_kernel(float *P1, float *P2, float *P3, float *D, float *R1, float *R2, float *R3, int N, int M, int Z, int ImSize, float multip) +{ + + float val1,val2,val3; + + //calculate each thread global index + int i = blockDim.x * blockIdx.x + threadIdx.x; + int j = blockDim.y * blockIdx.y + threadIdx.y; + int k = blockDim.z * blockIdx.z + threadIdx.z; + + int index = (N*M)*k + i + N*j; + + if ((i < N) && (j < M) && (k < Z)) { + /* boundary conditions */ + if (i >= N-1) val1 = 0.0f; else val1 = D[index] - D[(N*M)*(k) + (i+1) + N*j]; + if (j >= M-1) val2 = 0.0f; else val2 = D[index] - D[(N*M)*(k) + i + N*(j+1)]; + if (k >= Z-1) val3 = 0.0f; else val3 = D[index] - D[(N*M)*(k+1) + i + N*j]; + + //Write final result to global memory + P1[index] = R1[index] + multip*val1; + P2[index] = R2[index] + multip*val2; + P3[index] = R3[index] + multip*val3; + } + return; +} + +__global__ void Proj_func3D_iso_kernel(float *P1, float *P2, float *P3, int N, int M, int Z, int ImSize) +{ + + float denom,sq_denom; + //calculate each thread global index + int i = blockDim.x * blockIdx.x + threadIdx.x; + int j = blockDim.y * blockIdx.y + threadIdx.y; + int k = blockDim.z * blockIdx.z + threadIdx.z; + + int index = (N*M)*k + i + N*j; + + if ((i < N) && (j < M) && (k < Z)) { + denom = pow(P1[index],2) + pow(P2[index],2) + pow(P3[index],2); + + if (denom > 1.0f) { + sq_denom = 1.0f/sqrt(denom); + P1[index] = P1[index]*sq_denom; + P2[index] = P2[index]*sq_denom; + P3[index] = P3[index]*sq_denom; + } + } + return; +} + +__global__ void Proj_func3D_aniso_kernel(float *P1, float *P2, float *P3, int N, int M, int Z, int ImSize) +{ + + float val1, val2, val3; + //calculate each thread global index + int i = blockDim.x * blockIdx.x + threadIdx.x; + int j = blockDim.y * blockIdx.y + threadIdx.y; + int k = blockDim.z * blockIdx.z + threadIdx.z; + + int index = (N*M)*k + i + N*j; + + if ((i < N) && (j < M) && (k < Z)) { + val1 = abs(P1[index]); + val2 = abs(P2[index]); + val3 = abs(P3[index]); + if (val1 < 1.0f) {val1 = 1.0f;} + if (val2 < 1.0f) {val2 = 1.0f;} + if (val3 < 1.0f) {val3 = 1.0f;} + P1[index] = P1[index]/val1; + P2[index] = P2[index]/val2; + P3[index] = P3[index]/val3; + } + return; +} + + +__global__ void Rupd_func3D_kernel(float *P1, float *P1_old, float *P2, float *P2_old, float *P3, float *P3_old, float *R1, float *R2, float *R3, float tkp1, float tk, float multip2, int N, int M, int Z, int ImSize) +{ + //calculate each thread global index + int i = blockDim.x * blockIdx.x + threadIdx.x; + int j = blockDim.y * blockIdx.y + threadIdx.y; + int k = blockDim.z * blockIdx.z + threadIdx.z; + + int index = (N*M)*k + i + N*j; + + if ((i < N) && (j < M) && (k < Z)) { + R1[index] = P1[index] + multip2*(P1[index] - P1_old[index]); + R2[index] = P2[index] + multip2*(P2[index] - P2_old[index]); + R3[index] = P3[index] + multip2*(P3[index] - P3_old[index]); + } + return; +} + +__global__ void nonneg3D_kernel(float* Output, int N, int M, int Z, int num_total) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + int j = blockDim.y * blockIdx.y + threadIdx.y; + int k = blockDim.z * blockIdx.z + threadIdx.z; + + int index = (N*M)*k + i + N*j; + + if (index < num_total) { + if (Output[index] < 0.0f) Output[index] = 0.0f; + } +} + +__global__ void copy_kernel3D(float *Input, float* Output, int N, int M, int Z, int num_total) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + int j = blockDim.y * blockIdx.y + threadIdx.y; + int k = blockDim.z * blockIdx.z + threadIdx.z; + + int index = (N*M)*k + i + N*j; + + if (index < num_total) { + Output[index] = Input[index]; + } +} +/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/ + +////////////MAIN HOST FUNCTION /////////////// +extern "C" void TV_FGP_GPU(float *Input, float *Output, float lambda, int iter, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ) +{ + int deviceCount = -1; // number of devices + cudaGetDeviceCount(&deviceCount); + if (deviceCount == 0) { + fprintf(stderr, "No CUDA devices found\n"); + return; + } + + int count = 0, i; + float re, multip,multip2; + float tk = 1.0f; + float tkp1=1.0f; + + if (dimZ <= 1) { + /*2D verson*/ + int ImSize = dimX*dimY; + float *d_input, *d_update=NULL, *d_update_prev=NULL, *P1=NULL, *P2=NULL, *P1_prev=NULL, *P2_prev=NULL, *R1=NULL, *R2=NULL; + + dim3 dimBlock(BLKXSIZE2D,BLKYSIZE2D); + dim3 dimGrid(idivup(dimX,BLKXSIZE2D), idivup(dimY,BLKYSIZE2D)); + + /*allocate space for images on device*/ + checkCudaErrors( cudaMalloc((void**)&d_input,ImSize*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&d_update,ImSize*sizeof(float)) ); + if (epsil != 0.0f) checkCudaErrors( cudaMalloc((void**)&d_update_prev,ImSize*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&P1,ImSize*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&P2,ImSize*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&P1_prev,ImSize*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&P2_prev,ImSize*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&R1,ImSize*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&R2,ImSize*sizeof(float)) ); + + checkCudaErrors( cudaMemcpy(d_input,Input,ImSize*sizeof(float),cudaMemcpyHostToDevice)); + cudaMemset(P1, 0, ImSize*sizeof(float)); + cudaMemset(P2, 0, ImSize*sizeof(float)); + cudaMemset(P1_prev, 0, ImSize*sizeof(float)); + cudaMemset(P2_prev, 0, ImSize*sizeof(float)); + cudaMemset(R1, 0, ImSize*sizeof(float)); + cudaMemset(R2, 0, ImSize*sizeof(float)); + + /********************** Run CUDA 2D kernel here ********************/ + multip = (1.0f/(8.0f*lambda)); + + /* The main kernel */ + for (i = 0; i < iter; i++) { + + /* computing the gradient of the objective function */ + Obj_func2D_kernel<<>>(d_input, d_update, R1, R2, dimX, dimY, ImSize, lambda); + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); + + if (nonneg != 0) { + nonneg2D_kernel<<>>(d_update, dimX, dimY, ImSize); + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); } + + /*Taking a step towards minus of the gradient*/ + Grad_func2D_kernel<<>>(P1, P2, d_update, R1, R2, dimX, dimY, ImSize, multip); + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); + + /* projection step */ + if (methodTV == 0) Proj_func2D_iso_kernel<<>>(P1, P2, dimX, dimY, ImSize); /*isotropic TV*/ + else Proj_func2D_aniso_kernel<<>>(P1, P2, dimX, dimY, ImSize); /*anisotropic TV*/ + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); + + tkp1 = (1.0f + sqrt(1.0f + 4.0f*tk*tk))*0.5f; + multip2 = ((tk-1.0f)/tkp1); + + Rupd_func2D_kernel<<>>(P1, P1_prev, P2, P2_prev, R1, R2, tkp1, tk, multip2, dimX, dimY, ImSize); + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); + + if (epsil != 0.0f) { + /* calculate norm - stopping rules using the Thrust library */ + ResidCalc2D_kernel<<>>(d_update, d_update_prev, P1_prev, dimX, dimY, ImSize); + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); + + thrust::device_vector d_vec(P1_prev, P1_prev + ImSize); + float reduction = sqrt(thrust::transform_reduce(d_vec.begin(), d_vec.end(), square(), 0.0f, thrust::plus())); + thrust::device_vector d_vec2(d_update, d_update + ImSize); + float reduction2 = sqrt(thrust::transform_reduce(d_vec2.begin(), d_vec2.end(), square(), 0.0f, thrust::plus())); + + re = (reduction/reduction2); + if (re < epsil) count++; + if (count > 4) break; + + copy_kernel2D<<>>(d_update, d_update_prev, dimX, dimY, ImSize); + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); + } + + copy_kernel2D<<>>(P1, P1_prev, dimX, dimY, ImSize); + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); + + copy_kernel2D<<>>(P2, P2_prev, dimX, dimY, ImSize); + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); + + tk = tkp1; + } + if (printM == 1) printf("FGP-TV iterations stopped at iteration %i \n", i); + /***************************************************************/ + //copy result matrix from device to host memory + cudaMemcpy(Output,d_update,ImSize*sizeof(float),cudaMemcpyDeviceToHost); + + cudaFree(d_input); + cudaFree(d_update); + if (epsil != 0.0f) cudaFree(d_update_prev); + cudaFree(P1); + cudaFree(P2); + cudaFree(P1_prev); + cudaFree(P2_prev); + cudaFree(R1); + cudaFree(R2); + } + else { + /*3D verson*/ + int ImSize = dimX*dimY*dimZ; + float *d_input, *d_update=NULL, *P1=NULL, *P2=NULL, *P3=NULL, *P1_prev=NULL, *P2_prev=NULL, *P3_prev=NULL, *R1=NULL, *R2=NULL, *R3=NULL; + + dim3 dimBlock(BLKXSIZE,BLKYSIZE,BLKZSIZE); + dim3 dimGrid(idivup(dimX,BLKXSIZE), idivup(dimY,BLKYSIZE),idivup(dimZ,BLKZSIZE)); + + /*allocate space for images on device*/ + checkCudaErrors( cudaMalloc((void**)&d_input,ImSize*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&d_update,ImSize*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&P1,ImSize*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&P2,ImSize*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&P3,ImSize*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&P1_prev,ImSize*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&P2_prev,ImSize*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&P3_prev,ImSize*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&R1,ImSize*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&R2,ImSize*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&R3,ImSize*sizeof(float)) ); + + checkCudaErrors( cudaMemcpy(d_input,Input,ImSize*sizeof(float),cudaMemcpyHostToDevice)); + cudaMemset(P1, 0, ImSize*sizeof(float)); + cudaMemset(P2, 0, ImSize*sizeof(float)); + cudaMemset(P3, 0, ImSize*sizeof(float)); + cudaMemset(P1_prev, 0, ImSize*sizeof(float)); + cudaMemset(P2_prev, 0, ImSize*sizeof(float)); + cudaMemset(P3_prev, 0, ImSize*sizeof(float)); + cudaMemset(R1, 0, ImSize*sizeof(float)); + cudaMemset(R2, 0, ImSize*sizeof(float)); + cudaMemset(R3, 0, ImSize*sizeof(float)); + /********************** Run CUDA 3D kernel here ********************/ + multip = (1.0f/(8.0f*lambda)); + + /* The main kernel */ + for (i = 0; i < iter; i++) { + + /* computing the gradient of the objective function */ + Obj_func3D_kernel<<>>(d_input, d_update, R1, R2, R3, dimX, dimY, dimZ, ImSize, lambda); + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); + + if (nonneg != 0) { + nonneg3D_kernel<<>>(d_update, dimX, dimY, dimZ, ImSize); + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); } + + /*Taking a step towards minus of the gradient*/ + Grad_func3D_kernel<<>>(P1, P2, P3, d_update, R1, R2, R3, dimX, dimY, dimZ, ImSize, multip); + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); + + /* projection step */ + if (methodTV == 0) Proj_func3D_iso_kernel<<>>(P1, P2, P3, dimX, dimY, dimZ, ImSize); /* isotropic kernel */ + else Proj_func3D_aniso_kernel<<>>(P1, P2, P3, dimX, dimY, dimZ, ImSize); /* anisotropic kernel */ + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); + + tkp1 = (1.0f + sqrt(1.0f + 4.0f*tk*tk))*0.5f; + multip2 = ((tk-1.0f)/tkp1); + + Rupd_func3D_kernel<<>>(P1, P1_prev, P2, P2_prev, P3, P3_prev, R1, R2, R3, tkp1, tk, multip2, dimX, dimY, dimZ, ImSize); + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); + + copy_kernel3D<<>>(P1, P1_prev, dimX, dimY, dimZ, ImSize); + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); + + copy_kernel3D<<>>(P2, P2_prev, dimX, dimY, dimZ, ImSize); + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); + + copy_kernel3D<<>>(P3, P3_prev, dimX, dimY, dimZ, ImSize); + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); + + tk = tkp1; + } + if (printM == 1) printf("FGP-TV iterations stopped at iteration %i \n", i); + /***************************************************************/ + //copy result matrix from device to host memory + cudaMemcpy(Output,d_update,ImSize*sizeof(float),cudaMemcpyDeviceToHost); + + cudaFree(d_input); + cudaFree(d_update); + cudaFree(P1); + cudaFree(P2); + cudaFree(P3); + cudaFree(P1_prev); + cudaFree(P2_prev); + cudaFree(P3_prev); + cudaFree(R1); + cudaFree(R2); + cudaFree(R3); + } + cudaDeviceReset(); +} diff --git a/Core/regularizers_GPU/TV_FGP/TV_FGP_GPU_core.h b/Core/regularizers_GPU/TV_FGP/TV_FGP_GPU_core.h new file mode 100755 index 0000000..15c7120 --- /dev/null +++ b/Core/regularizers_GPU/TV_FGP/TV_FGP_GPU_core.h @@ -0,0 +1,10 @@ +#include +#include +#include + +#ifndef _TV_FGP_GPU_ +#define _TV_FGP_GPU_ + +extern "C" void TV_FGP_GPU(float *Input, float *Output, float lambda, int iter, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ); + +#endif diff --git a/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU.cu b/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU.cu deleted file mode 100755 index b67b53b..0000000 --- a/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU.cu +++ /dev/null @@ -1,369 +0,0 @@ - /* -This work is part of the Core Imaging Library developed by -Visual Analytics and Imaging System Group of the Science Technology -Facilities Council, STFC - -Copyright 2017 Daniil Kazantsev -Copyright 2017 Srikanth Nagella, Edoardo Pasca - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at -http://www.apache.org/licenses/LICENSE-2.0 -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -#include "TV_ROF_GPU.h" - -/* C-OMP implementation of ROF-TV denoising/regularization model [1] (2D/3D case) -* -* Input Parameters: -* 1. Noisy image/volume [REQUIRED] -* 2. lambda - regularization parameter [REQUIRED] -* 3. tau - marching step for explicit scheme, ~0.1 is recommended [REQUIRED] -* 4. Number of iterations, for explicit scheme >= 150 is recommended [REQUIRED] -* -* Output: -* [1] Regularized image/volume - - * This function is based on the paper by -* [1] Rudin, Osher, Fatemi, "Nonlinear Total Variation based noise removal algorithms" -* -* D. Kazantsev, 2016-18 -*/ - -#define CHECK(call) \ -{ \ - const cudaError_t error = call; \ - if (error != cudaSuccess) \ - { \ - fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__); \ - fprintf(stderr, "code: %d, reason: %s\n", error, \ - cudaGetErrorString(error)); \ - exit(1); \ - } \ -} - -#define BLKXSIZE 8 -#define BLKYSIZE 8 -#define BLKZSIZE 8 - -#define BLKXSIZE2D 16 -#define BLKYSIZE2D 16 -#define EPS 1.0e-5 - -#define idivup(a, b) ( ((a)%(b) != 0) ? (a)/(b)+1 : (a)/(b) ) - -#define MAX(x, y) (((x) > (y)) ? (x) : (y)) -#define MIN(x, y) (((x) < (y)) ? (x) : (y)) - -__host__ __device__ int sign (float x) -{ - return (x > 0) - (x < 0); -} - -/*********************2D case****************************/ - - /* differences 1 */ - __global__ void D1_func2D(float* Input, float* D1, int N, int M) - { - int i1, j1, i2; - float NOMx_1,NOMy_1,NOMy_0,denom1,denom2,T1; - int i = blockDim.x * blockIdx.x + threadIdx.x; - int j = blockDim.y * blockIdx.y + threadIdx.y; - - int index = i + N*j; - - if ((i >= 0) && (i < N) && (j >= 0) && (j < M)) { - - /* boundary conditions (Neumann reflections) */ - i1 = i + 1; if (i1 >= N) i1 = i-1; - i2 = i - 1; if (i2 < 0) i2 = i+1; - j1 = j + 1; if (j1 >= M) j1 = j-1; - - /* Forward-backward differences */ - NOMx_1 = Input[j1*N + i] - Input[index]; /* x+ */ - NOMy_1 = Input[j*N + i1] - Input[index]; /* y+ */ - NOMy_0 = Input[index] - Input[j*N + i2]; /* y- */ - - denom1 = NOMx_1*NOMx_1; - denom2 = 0.5f*(sign((float)NOMy_1) + sign((float)NOMy_0))*(MIN(abs((float)NOMy_1),abs((float)NOMy_0))); - denom2 = denom2*denom2; - T1 = sqrt(denom1 + denom2 + EPS); - D1[index] = NOMx_1/T1; - } - } - - /* differences 2 */ - __global__ void D2_func2D(float* Input, float* D2, int N, int M) - { - int i1, j1, j2; - float NOMx_1,NOMy_1,NOMx_0,denom1,denom2,T2; - int i = blockDim.x * blockIdx.x + threadIdx.x; - int j = blockDim.y * blockIdx.y + threadIdx.y; - - int index = i + N*j; - - if ((i >= 0) && (i < (N)) && (j >= 0) && (j < (M))) { - - /* boundary conditions (Neumann reflections) */ - i1 = i + 1; if (i1 >= N) i1 = i-1; - j1 = j + 1; if (j1 >= M) j1 = j-1; - j2 = j - 1; if (j2 < 0) j2 = j+1; - - /* Forward-backward differences */ - NOMx_1 = Input[j1*N + i] - Input[index]; /* x+ */ - NOMy_1 = Input[j*N + i1] - Input[index]; /* y+ */ - NOMx_0 = Input[index] - Input[j2*N + i]; /* x- */ - - denom1 = NOMy_1*NOMy_1; - denom2 = 0.5f*(sign((float)NOMx_1) + sign((float)NOMx_0))*(MIN(abs((float)NOMx_1),abs((float)NOMx_0))); - denom2 = denom2*denom2; - T2 = sqrt(denom1 + denom2 + EPS); - D2[index] = NOMy_1/T2; - } - } - - __global__ void TV_kernel2D(float *D1, float *D2, float *Update, float *Input, float lambda, float tau, int N, int M) - { - int i2, j2; - float dv1,dv2; - int i = blockDim.x * blockIdx.x + threadIdx.x; - int j = blockDim.y * blockIdx.y + threadIdx.y; - - int index = i + N*j; - - if ((i >= 0) && (i < (N)) && (j >= 0) && (j < (M))) { - - /* boundary conditions (Neumann reflections) */ - i2 = i - 1; if (i2 < 0) i2 = i+1; - j2 = j - 1; if (j2 < 0) j2 = j+1; - - /* divergence components */ - dv1 = D1[index] - D1[j2*N + i]; - dv2 = D2[index] - D2[j*N + i2]; - - Update[index] = Update[index] + tau*(2.0f*lambda*(dv1 + dv2) - (Update[index] - Input[index])); - - } - } -/*********************3D case****************************/ - - /* differences 1 */ - __global__ void D1_func3D(float* Input, float* D1, int dimX, int dimY, int dimZ) - { - float NOMx_1, NOMy_1, NOMy_0, NOMz_1, NOMz_0, denom1, denom2,denom3, T1; - int i1,i2,k1,j1,j2,k2; - - int i = blockDim.x * blockIdx.x + threadIdx.x; - int j = blockDim.y * blockIdx.y + threadIdx.y; - int k = blockDim.z * blockIdx.z + threadIdx.z; - - int index = (dimX*dimY)*k + j*dimX+i; - - if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) { - - /* symmetric boundary conditions (Neuman) */ - i1 = i + 1; if (i1 >= dimX) i1 = i-1; - i2 = i - 1; if (i2 < 0) i2 = i+1; - j1 = j + 1; if (j1 >= dimY) j1 = j-1; - j2 = j - 1; if (j2 < 0) j2 = j+1; - k1 = k + 1; if (k1 >= dimZ) k1 = k-1; - k2 = k - 1; if (k2 < 0) k2 = k+1; - - /* Forward-backward differences */ - NOMx_1 = Input[(dimX*dimY)*k + j1*dimX + i] - Input[index]; /* x+ */ - NOMy_1 = Input[(dimX*dimY)*k + j*dimX + i1] - Input[index]; /* y+ */ - NOMy_0 = Input[index] - Input[(dimX*dimY)*k + j*dimX + i2]; /* y- */ - - NOMz_1 = Input[(dimX*dimY)*k1 + j*dimX + i] - Input[index]; /* z+ */ - NOMz_0 = Input[index] - Input[(dimX*dimY)*k2 + j*dimX + i]; /* z- */ - - - denom1 = NOMx_1*NOMx_1; - denom2 = 0.5*(sign(NOMy_1) + sign(NOMy_0))*(MIN(abs(NOMy_1),abs(NOMy_0))); - denom2 = denom2*denom2; - denom3 = 0.5*(sign(NOMz_1) + sign(NOMz_0))*(MIN(abs(NOMz_1),abs(NOMz_0))); - denom3 = denom3*denom3; - T1 = sqrt(denom1 + denom2 + denom3 + EPS); - D1[index] = NOMx_1/T1; - } - } - - /* differences 2 */ - __global__ void D2_func3D(float* Input, float* D2, int dimX, int dimY, int dimZ) - { - float NOMx_1, NOMy_1, NOMx_0, NOMz_1, NOMz_0, denom1, denom2, denom3, T2; - int i1,i2,k1,j1,j2,k2; - - int i = blockDim.x * blockIdx.x + threadIdx.x; - int j = blockDim.y * blockIdx.y + threadIdx.y; - int k = blockDim.z * blockIdx.z + threadIdx.z; - - int index = (dimX*dimY)*k + j*dimX+i; - - if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) { - /* symmetric boundary conditions (Neuman) */ - i1 = i + 1; if (i1 >= dimX) i1 = i-1; - i2 = i - 1; if (i2 < 0) i2 = i+1; - j1 = j + 1; if (j1 >= dimY) j1 = j-1; - j2 = j - 1; if (j2 < 0) j2 = j+1; - k1 = k + 1; if (k1 >= dimZ) k1 = k-1; - k2 = k - 1; if (k2 < 0) k2 = k+1; - - - /* Forward-backward differences */ - NOMx_1 = Input[(dimX*dimY)*k + (j1)*dimX + i] - Input[index]; /* x+ */ - NOMy_1 = Input[(dimX*dimY)*k + (j)*dimX + i1] - Input[index]; /* y+ */ - NOMx_0 = Input[index] - Input[(dimX*dimY)*k + (j2)*dimX + i]; /* x- */ - NOMz_1 = Input[(dimX*dimY)*k1 + j*dimX + i] - Input[index]; /* z+ */ - NOMz_0 = Input[index] - Input[(dimX*dimY)*k2 + (j)*dimX + i]; /* z- */ - - - denom1 = NOMy_1*NOMy_1; - denom2 = 0.5*(sign(NOMx_1) + sign(NOMx_0))*(MIN(abs(NOMx_1),abs(NOMx_0))); - denom2 = denom2*denom2; - denom3 = 0.5*(sign(NOMz_1) + sign(NOMz_0))*(MIN(abs(NOMz_1),abs(NOMz_0))); - denom3 = denom3*denom3; - T2 = sqrt(denom1 + denom2 + denom3 + EPS); - D2[index] = NOMy_1/T2; - } - } - - /* differences 3 */ - __global__ void D3_func3D(float* Input, float* D3, int dimX, int dimY, int dimZ) - { - float NOMx_1, NOMy_1, NOMx_0, NOMy_0, NOMz_1, denom1, denom2, denom3, T3; - int i1,i2,k1,j1,j2,k2; - - int i = blockDim.x * blockIdx.x + threadIdx.x; - int j = blockDim.y * blockIdx.y + threadIdx.y; - int k = blockDim.z * blockIdx.z + threadIdx.z; - - int index = (dimX*dimY)*k + j*dimX+i; - - if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) { - - i1 = i + 1; if (i1 >= dimX) i1 = i-1; - i2 = i - 1; if (i2 < 0) i2 = i+1; - j1 = j + 1; if (j1 >= dimY) j1 = j-1; - j2 = j - 1; if (j2 < 0) j2 = j+1; - k1 = k + 1; if (k1 >= dimZ) k1 = k-1; - k2 = k - 1; if (k2 < 0) k2 = k+1; - - /* Forward-backward differences */ - NOMx_1 = Input[(dimX*dimY)*k + (j1)*dimX + i] - Input[index]; /* x+ */ - NOMy_1 = Input[(dimX*dimY)*k + (j)*dimX + i1] - Input[index]; /* y+ */ - NOMy_0 = Input[index] - Input[(dimX*dimY)*k + (j)*dimX + i2]; /* y- */ - NOMx_0 = Input[index] - Input[(dimX*dimY)*k + (j2)*dimX + i]; /* x- */ - NOMz_1 = Input[(dimX*dimY)*k1 + j*dimX + i] - Input[index]; /* z+ */ - - denom1 = NOMz_1*NOMz_1; - denom2 = 0.5*(sign(NOMx_1) + sign(NOMx_0))*(MIN(abs(NOMx_1),abs(NOMx_0))); - denom2 = denom2*denom2; - denom3 = 0.5*(sign(NOMy_1) + sign(NOMy_0))*(MIN(abs(NOMy_1),abs(NOMy_0))); - denom3 = denom3*denom3; - T3 = sqrt(denom1 + denom2 + denom3 + EPS); - D3[index] = NOMz_1/T3; - } - } - - __global__ void TV_kernel3D(float *D1, float *D2, float *D3, float *Update, float *Input, float lambda, float tau, int dimX, int dimY, int dimZ) - { - float dv1, dv2, dv3; - int i1,i2,k1,j1,j2,k2; - int i = blockDim.x * blockIdx.x + threadIdx.x; - int j = blockDim.y * blockIdx.y + threadIdx.y; - int k = blockDim.z * blockIdx.z + threadIdx.z; - - int index = (dimX*dimY)*k + j*dimX+i; - - if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) { - - /* symmetric boundary conditions (Neuman) */ - i1 = i + 1; if (i1 >= dimX) i1 = i-1; - i2 = i - 1; if (i2 < 0) i2 = i+1; - j1 = j + 1; if (j1 >= dimY) j1 = j-1; - j2 = j - 1; if (j2 < 0) j2 = j+1; - k1 = k + 1; if (k1 >= dimZ) k1 = k-1; - k2 = k - 1; if (k2 < 0) k2 = k+1; - - /*divergence components */ - dv1 = D1[index] - D1[(dimX*dimY)*k + j2*dimX+i]; - dv2 = D2[index] - D2[(dimX*dimY)*k + j*dimX+i2]; - dv3 = D3[index] - D3[(dimX*dimY)*k2 + j*dimX+i]; - - Update[index] = Update[index] + tau*(2.0f*lambda*(dv1 + dv2 + dv3) - (Update[index] - Input[index])); - - } - } - -///////////////////////////////////////////////// -// HOST FUNCTION -extern "C" void TV_ROF_GPU(float* Input, float* Output, int N, int M, int Z, int iter, float tau, float lambda) -{ - // set up device - int dev = 0; - CHECK(cudaSetDevice(dev)); - - float *d_input, *d_update, *d_D1, *d_D2; - - if (Z == 0) Z = 1; - CHECK(cudaMalloc((void**)&d_input,N*M*Z*sizeof(float))); - CHECK(cudaMalloc((void**)&d_update,N*M*Z*sizeof(float))); - CHECK(cudaMalloc((void**)&d_D1,N*M*Z*sizeof(float))); - CHECK(cudaMalloc((void**)&d_D2,N*M*Z*sizeof(float))); - - CHECK(cudaMemcpy(d_input,Input,N*M*Z*sizeof(float),cudaMemcpyHostToDevice)); - CHECK(cudaMemcpy(d_update,Input,N*M*Z*sizeof(float),cudaMemcpyHostToDevice)); - - if (Z > 1) { - // TV - 3D case - dim3 dimBlock(BLKXSIZE,BLKYSIZE,BLKZSIZE); - dim3 dimGrid(idivup(N,BLKXSIZE), idivup(M,BLKYSIZE),idivup(Z,BLKXSIZE)); - - float *d_D3; - CHECK(cudaMalloc((void**)&d_D3,N*M*Z*sizeof(float))); - - for(int n=0; n < iter; n++) { - /* calculate differences */ - D1_func3D<<>>(d_update, d_D1, N, M, Z); - CHECK(cudaDeviceSynchronize()); - D2_func3D<<>>(d_update, d_D2, N, M, Z); - CHECK(cudaDeviceSynchronize()); - D3_func3D<<>>(d_update, d_D3, N, M, Z); - CHECK(cudaDeviceSynchronize()); - /*running main kernel*/ - TV_kernel3D<<>>(d_D1, d_D2, d_D3, d_update, d_input, lambda, tau, N, M, Z); - CHECK(cudaDeviceSynchronize()); - } - - CHECK(cudaFree(d_D3)); - } - else { - // TV - 2D case - dim3 dimBlock(BLKXSIZE2D,BLKYSIZE2D); - dim3 dimGrid(idivup(N,BLKXSIZE2D), idivup(M,BLKYSIZE2D)); - - for(int n=0; n < iter; n++) { - /* calculate differences */ - D1_func2D<<>>(d_update, d_D1, N, M); - CHECK(cudaDeviceSynchronize()); - D2_func2D<<>>(d_update, d_D2, N, M); - CHECK(cudaDeviceSynchronize()); - /*running main kernel*/ - TV_kernel2D<<>>(d_D1, d_D2, d_update, d_input, lambda, tau, N, M); - CHECK(cudaDeviceSynchronize()); - } - } - CHECK(cudaMemcpy(Output,d_update,N*M*Z*sizeof(float),cudaMemcpyDeviceToHost)); - CHECK(cudaFree(d_input)); - CHECK(cudaFree(d_update)); - CHECK(cudaFree(d_D1)); - CHECK(cudaFree(d_D2)); - cudaDeviceReset(); -} diff --git a/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU.h b/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU.h deleted file mode 100755 index 2938d2f..0000000 --- a/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU.h +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef __TVGPU_H__ -#define __TVGPU_H__ -#include "CCPiDefines.h" -#include - -extern "C" CCPI_EXPORT void TV_ROF_GPU_kernel(float* Input, float* Output, int N, int M, int Z, int iter, float tau, float lambda); - -#endif diff --git a/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU_core.cu b/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU_core.cu new file mode 100755 index 0000000..480855f --- /dev/null +++ b/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU_core.cu @@ -0,0 +1,369 @@ + /* +This work is part of the Core Imaging Library developed by +Visual Analytics and Imaging System Group of the Science Technology +Facilities Council, STFC + +Copyright 2017 Daniil Kazantsev +Copyright 2017 Srikanth Nagella, Edoardo Pasca + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at +http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +#include "TV_ROF_GPU_core.h" + +/* C-OMP implementation of ROF-TV denoising/regularization model [1] (2D/3D case) +* +* Input Parameters: +* 1. Noisy image/volume [REQUIRED] +* 2. lambda - regularization parameter [REQUIRED] +* 3. tau - marching step for explicit scheme, ~0.1 is recommended [REQUIRED] +* 4. Number of iterations, for explicit scheme >= 150 is recommended [REQUIRED] +* +* Output: +* [1] Regularized image/volume + + * This function is based on the paper by +* [1] Rudin, Osher, Fatemi, "Nonlinear Total Variation based noise removal algorithms" +* +* D. Kazantsev, 2016-18 +*/ + +#define CHECK(call) \ +{ \ + const cudaError_t error = call; \ + if (error != cudaSuccess) \ + { \ + fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__); \ + fprintf(stderr, "code: %d, reason: %s\n", error, \ + cudaGetErrorString(error)); \ + exit(1); \ + } \ +} + +#define BLKXSIZE 8 +#define BLKYSIZE 8 +#define BLKZSIZE 8 + +#define BLKXSIZE2D 16 +#define BLKYSIZE2D 16 +#define EPS 1.0e-5 + +#define idivup(a, b) ( ((a)%(b) != 0) ? (a)/(b)+1 : (a)/(b) ) + +#define MAX(x, y) (((x) > (y)) ? (x) : (y)) +#define MIN(x, y) (((x) < (y)) ? (x) : (y)) + +__host__ __device__ int sign (float x) +{ + return (x > 0) - (x < 0); +} + +/*********************2D case****************************/ + + /* differences 1 */ + __global__ void D1_func2D(float* Input, float* D1, int N, int M) + { + int i1, j1, i2; + float NOMx_1,NOMy_1,NOMy_0,denom1,denom2,T1; + int i = blockDim.x * blockIdx.x + threadIdx.x; + int j = blockDim.y * blockIdx.y + threadIdx.y; + + int index = i + N*j; + + if ((i >= 0) && (i < N) && (j >= 0) && (j < M)) { + + /* boundary conditions (Neumann reflections) */ + i1 = i + 1; if (i1 >= N) i1 = i-1; + i2 = i - 1; if (i2 < 0) i2 = i+1; + j1 = j + 1; if (j1 >= M) j1 = j-1; + + /* Forward-backward differences */ + NOMx_1 = Input[j1*N + i] - Input[index]; /* x+ */ + NOMy_1 = Input[j*N + i1] - Input[index]; /* y+ */ + NOMy_0 = Input[index] - Input[j*N + i2]; /* y- */ + + denom1 = NOMx_1*NOMx_1; + denom2 = 0.5f*(sign((float)NOMy_1) + sign((float)NOMy_0))*(MIN(abs((float)NOMy_1),abs((float)NOMy_0))); + denom2 = denom2*denom2; + T1 = sqrt(denom1 + denom2 + EPS); + D1[index] = NOMx_1/T1; + } + } + + /* differences 2 */ + __global__ void D2_func2D(float* Input, float* D2, int N, int M) + { + int i1, j1, j2; + float NOMx_1,NOMy_1,NOMx_0,denom1,denom2,T2; + int i = blockDim.x * blockIdx.x + threadIdx.x; + int j = blockDim.y * blockIdx.y + threadIdx.y; + + int index = i + N*j; + + if ((i >= 0) && (i < (N)) && (j >= 0) && (j < (M))) { + + /* boundary conditions (Neumann reflections) */ + i1 = i + 1; if (i1 >= N) i1 = i-1; + j1 = j + 1; if (j1 >= M) j1 = j-1; + j2 = j - 1; if (j2 < 0) j2 = j+1; + + /* Forward-backward differences */ + NOMx_1 = Input[j1*N + i] - Input[index]; /* x+ */ + NOMy_1 = Input[j*N + i1] - Input[index]; /* y+ */ + NOMx_0 = Input[index] - Input[j2*N + i]; /* x- */ + + denom1 = NOMy_1*NOMy_1; + denom2 = 0.5f*(sign((float)NOMx_1) + sign((float)NOMx_0))*(MIN(abs((float)NOMx_1),abs((float)NOMx_0))); + denom2 = denom2*denom2; + T2 = sqrt(denom1 + denom2 + EPS); + D2[index] = NOMy_1/T2; + } + } + + __global__ void TV_kernel2D(float *D1, float *D2, float *Update, float *Input, float lambda, float tau, int N, int M) + { + int i2, j2; + float dv1,dv2; + int i = blockDim.x * blockIdx.x + threadIdx.x; + int j = blockDim.y * blockIdx.y + threadIdx.y; + + int index = i + N*j; + + if ((i >= 0) && (i < (N)) && (j >= 0) && (j < (M))) { + + /* boundary conditions (Neumann reflections) */ + i2 = i - 1; if (i2 < 0) i2 = i+1; + j2 = j - 1; if (j2 < 0) j2 = j+1; + + /* divergence components */ + dv1 = D1[index] - D1[j2*N + i]; + dv2 = D2[index] - D2[j*N + i2]; + + Update[index] = Update[index] + tau*(2.0f*lambda*(dv1 + dv2) - (Update[index] - Input[index])); + + } + } +/*********************3D case****************************/ + + /* differences 1 */ + __global__ void D1_func3D(float* Input, float* D1, int dimX, int dimY, int dimZ) + { + float NOMx_1, NOMy_1, NOMy_0, NOMz_1, NOMz_0, denom1, denom2,denom3, T1; + int i1,i2,k1,j1,j2,k2; + + int i = blockDim.x * blockIdx.x + threadIdx.x; + int j = blockDim.y * blockIdx.y + threadIdx.y; + int k = blockDim.z * blockIdx.z + threadIdx.z; + + int index = (dimX*dimY)*k + j*dimX+i; + + if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) { + + /* symmetric boundary conditions (Neuman) */ + i1 = i + 1; if (i1 >= dimX) i1 = i-1; + i2 = i - 1; if (i2 < 0) i2 = i+1; + j1 = j + 1; if (j1 >= dimY) j1 = j-1; + j2 = j - 1; if (j2 < 0) j2 = j+1; + k1 = k + 1; if (k1 >= dimZ) k1 = k-1; + k2 = k - 1; if (k2 < 0) k2 = k+1; + + /* Forward-backward differences */ + NOMx_1 = Input[(dimX*dimY)*k + j1*dimX + i] - Input[index]; /* x+ */ + NOMy_1 = Input[(dimX*dimY)*k + j*dimX + i1] - Input[index]; /* y+ */ + NOMy_0 = Input[index] - Input[(dimX*dimY)*k + j*dimX + i2]; /* y- */ + + NOMz_1 = Input[(dimX*dimY)*k1 + j*dimX + i] - Input[index]; /* z+ */ + NOMz_0 = Input[index] - Input[(dimX*dimY)*k2 + j*dimX + i]; /* z- */ + + + denom1 = NOMx_1*NOMx_1; + denom2 = 0.5*(sign(NOMy_1) + sign(NOMy_0))*(MIN(abs(NOMy_1),abs(NOMy_0))); + denom2 = denom2*denom2; + denom3 = 0.5*(sign(NOMz_1) + sign(NOMz_0))*(MIN(abs(NOMz_1),abs(NOMz_0))); + denom3 = denom3*denom3; + T1 = sqrt(denom1 + denom2 + denom3 + EPS); + D1[index] = NOMx_1/T1; + } + } + + /* differences 2 */ + __global__ void D2_func3D(float* Input, float* D2, int dimX, int dimY, int dimZ) + { + float NOMx_1, NOMy_1, NOMx_0, NOMz_1, NOMz_0, denom1, denom2, denom3, T2; + int i1,i2,k1,j1,j2,k2; + + int i = blockDim.x * blockIdx.x + threadIdx.x; + int j = blockDim.y * blockIdx.y + threadIdx.y; + int k = blockDim.z * blockIdx.z + threadIdx.z; + + int index = (dimX*dimY)*k + j*dimX+i; + + if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) { + /* symmetric boundary conditions (Neuman) */ + i1 = i + 1; if (i1 >= dimX) i1 = i-1; + i2 = i - 1; if (i2 < 0) i2 = i+1; + j1 = j + 1; if (j1 >= dimY) j1 = j-1; + j2 = j - 1; if (j2 < 0) j2 = j+1; + k1 = k + 1; if (k1 >= dimZ) k1 = k-1; + k2 = k - 1; if (k2 < 0) k2 = k+1; + + + /* Forward-backward differences */ + NOMx_1 = Input[(dimX*dimY)*k + (j1)*dimX + i] - Input[index]; /* x+ */ + NOMy_1 = Input[(dimX*dimY)*k + (j)*dimX + i1] - Input[index]; /* y+ */ + NOMx_0 = Input[index] - Input[(dimX*dimY)*k + (j2)*dimX + i]; /* x- */ + NOMz_1 = Input[(dimX*dimY)*k1 + j*dimX + i] - Input[index]; /* z+ */ + NOMz_0 = Input[index] - Input[(dimX*dimY)*k2 + (j)*dimX + i]; /* z- */ + + + denom1 = NOMy_1*NOMy_1; + denom2 = 0.5*(sign(NOMx_1) + sign(NOMx_0))*(MIN(abs(NOMx_1),abs(NOMx_0))); + denom2 = denom2*denom2; + denom3 = 0.5*(sign(NOMz_1) + sign(NOMz_0))*(MIN(abs(NOMz_1),abs(NOMz_0))); + denom3 = denom3*denom3; + T2 = sqrt(denom1 + denom2 + denom3 + EPS); + D2[index] = NOMy_1/T2; + } + } + + /* differences 3 */ + __global__ void D3_func3D(float* Input, float* D3, int dimX, int dimY, int dimZ) + { + float NOMx_1, NOMy_1, NOMx_0, NOMy_0, NOMz_1, denom1, denom2, denom3, T3; + int i1,i2,k1,j1,j2,k2; + + int i = blockDim.x * blockIdx.x + threadIdx.x; + int j = blockDim.y * blockIdx.y + threadIdx.y; + int k = blockDim.z * blockIdx.z + threadIdx.z; + + int index = (dimX*dimY)*k + j*dimX+i; + + if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) { + + i1 = i + 1; if (i1 >= dimX) i1 = i-1; + i2 = i - 1; if (i2 < 0) i2 = i+1; + j1 = j + 1; if (j1 >= dimY) j1 = j-1; + j2 = j - 1; if (j2 < 0) j2 = j+1; + k1 = k + 1; if (k1 >= dimZ) k1 = k-1; + k2 = k - 1; if (k2 < 0) k2 = k+1; + + /* Forward-backward differences */ + NOMx_1 = Input[(dimX*dimY)*k + (j1)*dimX + i] - Input[index]; /* x+ */ + NOMy_1 = Input[(dimX*dimY)*k + (j)*dimX + i1] - Input[index]; /* y+ */ + NOMy_0 = Input[index] - Input[(dimX*dimY)*k + (j)*dimX + i2]; /* y- */ + NOMx_0 = Input[index] - Input[(dimX*dimY)*k + (j2)*dimX + i]; /* x- */ + NOMz_1 = Input[(dimX*dimY)*k1 + j*dimX + i] - Input[index]; /* z+ */ + + denom1 = NOMz_1*NOMz_1; + denom2 = 0.5*(sign(NOMx_1) + sign(NOMx_0))*(MIN(abs(NOMx_1),abs(NOMx_0))); + denom2 = denom2*denom2; + denom3 = 0.5*(sign(NOMy_1) + sign(NOMy_0))*(MIN(abs(NOMy_1),abs(NOMy_0))); + denom3 = denom3*denom3; + T3 = sqrt(denom1 + denom2 + denom3 + EPS); + D3[index] = NOMz_1/T3; + } + } + + __global__ void TV_kernel3D(float *D1, float *D2, float *D3, float *Update, float *Input, float lambda, float tau, int dimX, int dimY, int dimZ) + { + float dv1, dv2, dv3; + int i1,i2,k1,j1,j2,k2; + int i = blockDim.x * blockIdx.x + threadIdx.x; + int j = blockDim.y * blockIdx.y + threadIdx.y; + int k = blockDim.z * blockIdx.z + threadIdx.z; + + int index = (dimX*dimY)*k + j*dimX+i; + + if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) { + + /* symmetric boundary conditions (Neuman) */ + i1 = i + 1; if (i1 >= dimX) i1 = i-1; + i2 = i - 1; if (i2 < 0) i2 = i+1; + j1 = j + 1; if (j1 >= dimY) j1 = j-1; + j2 = j - 1; if (j2 < 0) j2 = j+1; + k1 = k + 1; if (k1 >= dimZ) k1 = k-1; + k2 = k - 1; if (k2 < 0) k2 = k+1; + + /*divergence components */ + dv1 = D1[index] - D1[(dimX*dimY)*k + j2*dimX+i]; + dv2 = D2[index] - D2[(dimX*dimY)*k + j*dimX+i2]; + dv3 = D3[index] - D3[(dimX*dimY)*k2 + j*dimX+i]; + + Update[index] = Update[index] + tau*(2.0f*lambda*(dv1 + dv2 + dv3) - (Update[index] - Input[index])); + + } + } + +///////////////////////////////////////////////// +// HOST FUNCTION +extern "C" void TV_ROF_GPU(float* Input, float* Output, int N, int M, int Z, int iter, float tau, float lambda) +{ + // set up device + int dev = 0; + CHECK(cudaSetDevice(dev)); + + float *d_input, *d_update, *d_D1, *d_D2; + + if (Z == 0) Z = 1; + CHECK(cudaMalloc((void**)&d_input,N*M*Z*sizeof(float))); + CHECK(cudaMalloc((void**)&d_update,N*M*Z*sizeof(float))); + CHECK(cudaMalloc((void**)&d_D1,N*M*Z*sizeof(float))); + CHECK(cudaMalloc((void**)&d_D2,N*M*Z*sizeof(float))); + + CHECK(cudaMemcpy(d_input,Input,N*M*Z*sizeof(float),cudaMemcpyHostToDevice)); + CHECK(cudaMemcpy(d_update,Input,N*M*Z*sizeof(float),cudaMemcpyHostToDevice)); + + if (Z > 1) { + // TV - 3D case + dim3 dimBlock(BLKXSIZE,BLKYSIZE,BLKZSIZE); + dim3 dimGrid(idivup(N,BLKXSIZE), idivup(M,BLKYSIZE),idivup(Z,BLKXSIZE)); + + float *d_D3; + CHECK(cudaMalloc((void**)&d_D3,N*M*Z*sizeof(float))); + + for(int n=0; n < iter; n++) { + /* calculate differences */ + D1_func3D<<>>(d_update, d_D1, N, M, Z); + CHECK(cudaDeviceSynchronize()); + D2_func3D<<>>(d_update, d_D2, N, M, Z); + CHECK(cudaDeviceSynchronize()); + D3_func3D<<>>(d_update, d_D3, N, M, Z); + CHECK(cudaDeviceSynchronize()); + /*running main kernel*/ + TV_kernel3D<<>>(d_D1, d_D2, d_D3, d_update, d_input, lambda, tau, N, M, Z); + CHECK(cudaDeviceSynchronize()); + } + + CHECK(cudaFree(d_D3)); + } + else { + // TV - 2D case + dim3 dimBlock(BLKXSIZE2D,BLKYSIZE2D); + dim3 dimGrid(idivup(N,BLKXSIZE2D), idivup(M,BLKYSIZE2D)); + + for(int n=0; n < iter; n++) { + /* calculate differences */ + D1_func2D<<>>(d_update, d_D1, N, M); + CHECK(cudaDeviceSynchronize()); + D2_func2D<<>>(d_update, d_D2, N, M); + CHECK(cudaDeviceSynchronize()); + /*running main kernel*/ + TV_kernel2D<<>>(d_D1, d_D2, d_update, d_input, lambda, tau, N, M); + CHECK(cudaDeviceSynchronize()); + } + } + CHECK(cudaMemcpy(Output,d_update,N*M*Z*sizeof(float),cudaMemcpyDeviceToHost)); + CHECK(cudaFree(d_input)); + CHECK(cudaFree(d_update)); + CHECK(cudaFree(d_D1)); + CHECK(cudaFree(d_D2)); + cudaDeviceReset(); +} diff --git a/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU_core.h b/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU_core.h new file mode 100755 index 0000000..8b64d99 --- /dev/null +++ b/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU_core.h @@ -0,0 +1,8 @@ +#ifndef __TVGPU_H__ +#define __TVGPU_H__ +#include "CCPiDefines.h" +#include + +extern "C" CCPI_EXPORT void TV_ROF_GPU(float* Input, float* Output, int N, int M, int Z, int iter, float tau, float lambda); + +#endif diff --git a/Wrappers/Python/src/cpu_regularizers.cpp b/Wrappers/Python/src/cpu_regularizers.cpp index e311570..43d5d11 100644 --- a/Wrappers/Python/src/cpu_regularizers.cpp +++ b/Wrappers/Python/src/cpu_regularizers.cpp @@ -27,7 +27,7 @@ limitations under the License. #include "boost/tuple/tuple.hpp" #include "SplitBregman_TV_core.h" -#include "FGP_TV_core.h" +//#include "FGP_TV_core.h" #include "LLT_model_core.h" #include "PatchBased_Regul_core.h" #include "TGV_PD_core.h" @@ -305,289 +305,289 @@ bp::list SplitBregman_TV(np::ndarray input, double d_mu, int iter, double d_epsi -bp::list FGP_TV(np::ndarray input, double d_mu, int iter, double d_epsil, int methTV) { +//bp::list FGP_TV(np::ndarray input, double d_mu, int iter, double d_epsil, int methTV) { - // the result is in the following list - bp::list result; + //// the result is in the following list + //bp::list result; - int number_of_dims, dimX, dimY, dimZ, ll, j, count; - float *A, *D = NULL, *D_old = NULL, *P1 = NULL, *P2 = NULL, *P3 = NULL, *P1_old = NULL, *P2_old = NULL, *P3_old = NULL, *R1 = NULL, *R2 = NULL, *R3 = NULL; - float lambda, tk, tkp1, re, re1, re_old, epsil, funcval; + //int number_of_dims, dimX, dimY, dimZ, ll, j, count; + //float *A, *D = NULL, *D_old = NULL, *P1 = NULL, *P2 = NULL, *P3 = NULL, *P1_old = NULL, *P2_old = NULL, *P3_old = NULL, *R1 = NULL, *R2 = NULL, *R3 = NULL; + //float lambda, tk, tkp1, re, re1, re_old, epsil, funcval; - //number_of_dims = mxGetNumberOfDimensions(prhs[0]); - //dim_array = mxGetDimensions(prhs[0]); - - number_of_dims = input.get_nd(); - int dim_array[3]; + ////number_of_dims = mxGetNumberOfDimensions(prhs[0]); + ////dim_array = mxGetDimensions(prhs[0]); - dim_array[0] = input.shape(0); - dim_array[1] = input.shape(1); - if (number_of_dims == 2) { - dim_array[2] = -1; - } - else { - dim_array[2] = input.shape(2); - } - // Parameter handling is be done in Python - ///*Handling Matlab input data*/ - //if ((nrhs < 2) || (nrhs > 5)) mexErrMsgTxt("At least 2 parameters is required: Image(2D/3D), Regularization parameter. The full list of parameters: Image(2D/3D), Regularization parameter, iterations number, tolerance, penalty type ('iso' or 'l1')"); - - ///*Handling Matlab input data*/ - //A = (float *)mxGetData(prhs[0]); /*noisy image (2D/3D) */ - A = reinterpret_cast(input.get_data()); - - //mu = (float)mxGetScalar(prhs[1]); /* regularization parameter */ - lambda = (float)d_mu; - - //iter = 35; /* default iterations number */ + //number_of_dims = input.get_nd(); + //int dim_array[3]; - //epsil = 0.0001; /* default tolerance constant */ - epsil = (float)d_epsil; - //methTV = 0; /* default isotropic TV penalty */ - //if ((nrhs == 3) || (nrhs == 4) || (nrhs == 5)) iter = (int)mxGetScalar(prhs[2]); /* iterations number */ - //if ((nrhs == 4) || (nrhs == 5)) epsil = (float)mxGetScalar(prhs[3]); /* tolerance constant */ - //if (nrhs == 5) { - // char *penalty_type; - // penalty_type = mxArrayToString(prhs[4]); /* choosing TV penalty: 'iso' or 'l1', 'iso' is the default */ - // if ((strcmp(penalty_type, "l1") != 0) && (strcmp(penalty_type, "iso") != 0)) mexErrMsgTxt("Choose TV type: 'iso' or 'l1',"); - // if (strcmp(penalty_type, "l1") == 0) methTV = 1; /* enable 'l1' penalty */ - // mxFree(penalty_type); + //dim_array[0] = input.shape(0); + //dim_array[1] = input.shape(1); + //if (number_of_dims == 2) { + //dim_array[2] = -1; //} - //if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) { mexErrMsgTxt("The input image must be in a single precision"); } - - //plhs[1] = mxCreateNumericMatrix(1, 1, mxSINGLE_CLASS, mxREAL); - bp::tuple shape1 = bp::make_tuple(dim_array[0], dim_array[1]); - np::dtype dtype = np::dtype::get_builtin(); - np::ndarray out1 = np::zeros(shape1, dtype); + //else { + //dim_array[2] = input.shape(2); + //} + //// Parameter handling is be done in Python + /////*Handling Matlab input data*/ + ////if ((nrhs < 2) || (nrhs > 5)) mexErrMsgTxt("At least 2 parameters is required: Image(2D/3D), Regularization parameter. The full list of parameters: Image(2D/3D), Regularization parameter, iterations number, tolerance, penalty type ('iso' or 'l1')"); + + /////*Handling Matlab input data*/ + ////A = (float *)mxGetData(prhs[0]); /*noisy image (2D/3D) */ + //A = reinterpret_cast(input.get_data()); + + ////mu = (float)mxGetScalar(prhs[1]); /* regularization parameter */ + //lambda = (float)d_mu; + + ////iter = 35; /* default iterations number */ + + ////epsil = 0.0001; /* default tolerance constant */ + //epsil = (float)d_epsil; + ////methTV = 0; /* default isotropic TV penalty */ + ////if ((nrhs == 3) || (nrhs == 4) || (nrhs == 5)) iter = (int)mxGetScalar(prhs[2]); /* iterations number */ + ////if ((nrhs == 4) || (nrhs == 5)) epsil = (float)mxGetScalar(prhs[3]); /* tolerance constant */ + ////if (nrhs == 5) { + //// char *penalty_type; + //// penalty_type = mxArrayToString(prhs[4]); /* choosing TV penalty: 'iso' or 'l1', 'iso' is the default */ + //// if ((strcmp(penalty_type, "l1") != 0) && (strcmp(penalty_type, "iso") != 0)) mexErrMsgTxt("Choose TV type: 'iso' or 'l1',"); + //// if (strcmp(penalty_type, "l1") == 0) methTV = 1; /* enable 'l1' penalty */ + //// mxFree(penalty_type); + ////} + ////if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) { mexErrMsgTxt("The input image must be in a single precision"); } + + ////plhs[1] = mxCreateNumericMatrix(1, 1, mxSINGLE_CLASS, mxREAL); + //bp::tuple shape1 = bp::make_tuple(dim_array[0], dim_array[1]); + //np::dtype dtype = np::dtype::get_builtin(); + //np::ndarray out1 = np::zeros(shape1, dtype); - //float *funcvalA = (float *)mxGetData(plhs[1]); - float * funcvalA = reinterpret_cast(out1.get_data()); - //if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) { mexErrMsgTxt("The input image must be in a single precision"); } - - /*Handling Matlab output data*/ - dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2]; - - tk = 1.0f; - tkp1 = 1.0f; - count = 1; - re_old = 0.0f; - - if (number_of_dims == 2) { - dimZ = 1; /*2D case*/ - /*D = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); - D_old = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); - P1 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); - P2 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); - P1_old = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); - P2_old = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); - R1 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); - R2 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));*/ - - bp::tuple shape = bp::make_tuple(dim_array[0], dim_array[1]); - np::dtype dtype = np::dtype::get_builtin(); - - - np::ndarray npD = np::zeros(shape, dtype); - np::ndarray npD_old = np::zeros(shape, dtype); - np::ndarray npP1 = np::zeros(shape, dtype); - np::ndarray npP2 = np::zeros(shape, dtype); - np::ndarray npP1_old = np::zeros(shape, dtype); - np::ndarray npP2_old = np::zeros(shape, dtype); - np::ndarray npR1 = np::zeros(shape, dtype); - np::ndarray npR2 = np::zeros(shape, dtype); - - D = reinterpret_cast(npD.get_data()); - D_old = reinterpret_cast(npD_old.get_data()); - P1 = reinterpret_cast(npP1.get_data()); - P2 = reinterpret_cast(npP2.get_data()); - P1_old = reinterpret_cast(npP1_old.get_data()); - P2_old = reinterpret_cast(npP2_old.get_data()); - R1 = reinterpret_cast(npR1.get_data()); - R2 = reinterpret_cast(npR2.get_data()); - - /* begin iterations */ - for (ll = 0; ll(out1.get_data()); + ////if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) { mexErrMsgTxt("The input image must be in a single precision"); } + + ///*Handling Matlab output data*/ + //dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2]; + + //tk = 1.0f; + //tkp1 = 1.0f; + //count = 1; + //re_old = 0.0f; + + //if (number_of_dims == 2) { + //dimZ = 1; /*2D case*/ + ///*D = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); + //D_old = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); + //P1 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); + //P2 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); + //P1_old = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); + //P2_old = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); + //R1 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); + //R2 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));*/ + + //bp::tuple shape = bp::make_tuple(dim_array[0], dim_array[1]); + //np::dtype dtype = np::dtype::get_builtin(); + + + //np::ndarray npD = np::zeros(shape, dtype); + //np::ndarray npD_old = np::zeros(shape, dtype); + //np::ndarray npP1 = np::zeros(shape, dtype); + //np::ndarray npP2 = np::zeros(shape, dtype); + //np::ndarray npP1_old = np::zeros(shape, dtype); + //np::ndarray npP2_old = np::zeros(shape, dtype); + //np::ndarray npR1 = np::zeros(shape, dtype); + //np::ndarray npR2 = np::zeros(shape, dtype); + + //D = reinterpret_cast(npD.get_data()); + //D_old = reinterpret_cast(npD_old.get_data()); + //P1 = reinterpret_cast(npP1.get_data()); + //P2 = reinterpret_cast(npP2.get_data()); + //P1_old = reinterpret_cast(npP1_old.get_data()); + //P2_old = reinterpret_cast(npP2_old.get_data()); + //R1 = reinterpret_cast(npR1.get_data()); + //R2 = reinterpret_cast(npR2.get_data()); + + ///* begin iterations */ + //for (ll = 0; ll 4) { - Obj_func2D(A, D, P1, P2, lambda, dimX, dimY); - funcval = 0.0f; - for (j = 0; j 2) { - if (re > re_old) { - Obj_func2D(A, D, P1, P2, lambda, dimX, dimY); - funcval = 0.0f; - for (j = 0; j(npD); - result.append(out1); - result.append(ll); - } - if (number_of_dims == 3) { - /*D = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); - D_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); - P1 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); - P2 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); - P3 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); - P1_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); - P2_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); - P3_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); - R1 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); - R2 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); - R3 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));*/ - bp::tuple shape = bp::make_tuple(dim_array[0], dim_array[1], dim_array[2]); - np::dtype dtype = np::dtype::get_builtin(); + ///*updating R and t*/ + //tkp1 = (1.0f + sqrt(1.0f + 4.0f*tk*tk))*0.5f; + //Rupd_func2D(P1, P1_old, P2, P2_old, R1, R2, tkp1, tk, dimX, dimY); + + ///* calculate norm */ + //re = 0.0f; re1 = 0.0f; + //for (j = 0; j 4) { + //Obj_func2D(A, D, P1, P2, lambda, dimX, dimY); + //funcval = 0.0f; + //for (j = 0; j 2) { + //if (re > re_old) { + //Obj_func2D(A, D, P1, P2, lambda, dimX, dimY); + //funcval = 0.0f; + //for (j = 0; j(npD); + //result.append(out1); + //result.append(ll); + //} + //if (number_of_dims == 3) { + ///*D = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); + //D_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); + //P1 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); + //P2 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); + //P3 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); + //P1_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); + //P2_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); + //P3_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); + //R1 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); + //R2 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); + //R3 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));*/ + //bp::tuple shape = bp::make_tuple(dim_array[0], dim_array[1], dim_array[2]); + //np::dtype dtype = np::dtype::get_builtin(); - np::ndarray npD = np::zeros(shape, dtype); - np::ndarray npD_old = np::zeros(shape, dtype); - np::ndarray npP1 = np::zeros(shape, dtype); - np::ndarray npP2 = np::zeros(shape, dtype); - np::ndarray npP3 = np::zeros(shape, dtype); - np::ndarray npP1_old = np::zeros(shape, dtype); - np::ndarray npP2_old = np::zeros(shape, dtype); - np::ndarray npP3_old = np::zeros(shape, dtype); - np::ndarray npR1 = np::zeros(shape, dtype); - np::ndarray npR2 = np::zeros(shape, dtype); - np::ndarray npR3 = np::zeros(shape, dtype); - - D = reinterpret_cast(npD.get_data()); - D_old = reinterpret_cast(npD_old.get_data()); - P1 = reinterpret_cast(npP1.get_data()); - P2 = reinterpret_cast(npP2.get_data()); - P3 = reinterpret_cast(npP3.get_data()); - P1_old = reinterpret_cast(npP1_old.get_data()); - P2_old = reinterpret_cast(npP2_old.get_data()); - P3_old = reinterpret_cast(npP3_old.get_data()); - R1 = reinterpret_cast(npR1.get_data()); - R2 = reinterpret_cast(npR2.get_data()); - R3 = reinterpret_cast(npR3.get_data()); - /* begin iterations */ - for (ll = 0; ll 3) { - Obj_func3D(A, D, P1, P2, P3, lambda, dimX, dimY, dimZ); - funcval = 0.0f; - for (j = 0; j 2) { - if (re > re_old) { - Obj_func3D(A, D, P1, P2, P3, lambda, dimX, dimY, dimZ); - funcval = 0.0f; - for (j = 0; j(npD); - result.append(out1); - result.append(ll); - } + //np::ndarray npD = np::zeros(shape, dtype); + //np::ndarray npD_old = np::zeros(shape, dtype); + //np::ndarray npP1 = np::zeros(shape, dtype); + //np::ndarray npP2 = np::zeros(shape, dtype); + //np::ndarray npP3 = np::zeros(shape, dtype); + //np::ndarray npP1_old = np::zeros(shape, dtype); + //np::ndarray npP2_old = np::zeros(shape, dtype); + //np::ndarray npP3_old = np::zeros(shape, dtype); + //np::ndarray npR1 = np::zeros(shape, dtype); + //np::ndarray npR2 = np::zeros(shape, dtype); + //np::ndarray npR3 = np::zeros(shape, dtype); + + //D = reinterpret_cast(npD.get_data()); + //D_old = reinterpret_cast(npD_old.get_data()); + //P1 = reinterpret_cast(npP1.get_data()); + //P2 = reinterpret_cast(npP2.get_data()); + //P3 = reinterpret_cast(npP3.get_data()); + //P1_old = reinterpret_cast(npP1_old.get_data()); + //P2_old = reinterpret_cast(npP2_old.get_data()); + //P3_old = reinterpret_cast(npP3_old.get_data()); + //R1 = reinterpret_cast(npR1.get_data()); + //R2 = reinterpret_cast(npR2.get_data()); + //R3 = reinterpret_cast(npR3.get_data()); + ///* begin iterations */ + //for (ll = 0; ll 3) { + //Obj_func3D(A, D, P1, P2, P3, lambda, dimX, dimY, dimZ); + //funcval = 0.0f; + //for (j = 0; j 2) { + //if (re > re_old) { + //Obj_func3D(A, D, P1, P2, P3, lambda, dimX, dimY, dimZ); + //funcval = 0.0f; + //for (j = 0; j(npD); + //result.append(out1); + //result.append(ll); + //} - return result; -} + //return result; +//} bp::list LLT_model(np::ndarray input, double d_lambda, double d_tau, int iter, double d_epsil, int switcher) { // the result is in the following list diff --git a/Wrappers/Python/src/gpu_regularizers.pyx b/Wrappers/Python/src/gpu_regularizers.pyx index c724471..263fa4a 100644 --- a/Wrappers/Python/src/gpu_regularizers.pyx +++ b/Wrappers/Python/src/gpu_regularizers.pyx @@ -25,7 +25,9 @@ cdef extern void NLM_GPU_kernel(float *A, float* B, float *Eucl_Vec, int N, int M, int Z, int dimension, int SearchW, int SimilW, int SearchW_real, float denh2, float lambdaf); -cdef extern void TV_ROF_GPU_kernel(float* A, float* B, int N, int M, int Z, int iter, float tau, float lambdaf); +cdef extern void TV_ROF_GPU(float* Input, float* Output, int N, int M, int Z, int iter, float tau, float lambdaf); +cdef extern void TV_FGP_GPU(float *Input, float *Output, float lambda, int iter, float epsil, int methodTV, int nonneg, int printM, int N, int M, int Z); + cdef extern float pad_crop(float *A, float *Ap, int OldSizeX, int OldSizeY, int OldSizeZ, int NewSizeX, int NewSizeY, int NewSizeZ, @@ -343,7 +345,7 @@ def ROFTV2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, np.zeros([dims[0],dims[1]], dtype='float32') # Running CUDA code here - TV_ROF_GPU_kernel( + TV_ROF_GPU( &inputData[0,0], &B[0,0], dims[0], dims[1], 1, iterations , @@ -366,7 +368,7 @@ def ROFTV3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, np.zeros([dims[0],dims[1],dims[2]], dtype='float32') # Running CUDA code here - TV_ROF_GPU_kernel( + TV_ROF_GPU( &inputData[0,0,0], &B[0,0,0], dims[0], dims[1], dims[2], iterations , @@ -374,3 +376,64 @@ def ROFTV3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, regularization_parameter); return B + + +def TVFGP2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, + float regularization_parameter, + int iterations, + float tolerance_param, + int methodTV, + int nonneg, + int printM): + + cdef long dims[2] + dims[0] = inputData.shape[0] + dims[1] = inputData.shape[1] + + cdef np.ndarray[np.float32_t, ndim=2, mode="c"] B = \ + np.zeros([dims[0],dims[1]], dtype='float32') + + # Running CUDA code here + TV_FGP_GPU( + &inputData[0,0], &B[0,0], + regularization_parameter , + iterations, + tolerance_param, + methodTV, + nonneg, + printM, + dims[0], dims[1], 1); + + return B + +def TVFGP3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, + float regularization_parameter, + int iterations, + float tolerance_param, + int methodTV, + int nonneg, + int printM): + + cdef long dims[3] + dims[0] = inputData.shape[0] + dims[1] = inputData.shape[1] + dims[2] = inputData.shape[2] + + cdef np.ndarray[np.float32_t, ndim=3, mode="c"] B = \ + np.zeros([dims[0],dims[1],dims[2]], dtype='float32') + + # Running CUDA code here + TV_FGP_GPU( + &inputData[0,0,0], &B[0,0,0], + regularization_parameter , + iterations, + tolerance_param, + methodTV, + nonneg, + printM, + dims[0], dims[1], dims[2]); + + return B + + + diff --git a/Wrappers/Python/test/test_cpu_vs_gpu.py b/Wrappers/Python/test/test_cpu_vs_gpu.py new file mode 100644 index 0000000..74d65dd --- /dev/null +++ b/Wrappers/Python/test/test_cpu_vs_gpu.py @@ -0,0 +1,10 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Wed Feb 21 12:12:22 2018 + +# CPU vs GPU comparison tests + +@author: algol +""" + diff --git a/Wrappers/Python/test/test_cpu_vs_gpu_regularizers.py b/Wrappers/Python/test/test_cpu_vs_gpu_regularizers.py index d742a7f..6344021 100644 --- a/Wrappers/Python/test/test_cpu_vs_gpu_regularizers.py +++ b/Wrappers/Python/test/test_cpu_vs_gpu_regularizers.py @@ -12,8 +12,8 @@ import matplotlib.pyplot as plt import numpy as np import os import timeit -from ccpi.filters.gpu_regularizers import Diff4thHajiaboli, NML, GPU_ROF_TV -from ccpi.filters.cpu_regularizers_cython import ROF_TV +from ccpi.filters.gpu_regularizers import Diff4thHajiaboli, NML, TV_ROF_GPU +from ccpi.filters.cpu_regularizers_cython import TV_ROF_CPU ############################################################################### def printParametersToString(pars): txt = r'' @@ -64,7 +64,7 @@ pars = {'algorithm': ROF_TV , \ } print ("#################ROF TV CPU#####################") start_time = timeit.default_timer() -rof_cpu = ROF_TV(pars['input'], +rof_cpu = TV_ROF_CPU(pars['input'], pars['number_of_iterations'], pars['regularization_parameter'], pars['time_marching_parameter'] @@ -89,7 +89,7 @@ plt.title('{}'.format('CPU results')) print ("#################ROF TV GPU#####################") start_time = timeit.default_timer() -rof_gpu = GPU_ROF_TV(pars['input'], +rof_gpu = TV_ROF_GPU(pars['input'], pars['number_of_iterations'], pars['time_marching_parameter'], pars['regularization_parameter']) -- cgit v1.2.3 From 8082a76d4dfd9588590bab3fd26eae976b744a94 Mon Sep 17 00:00:00 2001 From: Daniil Kazantsev Date: Mon, 26 Feb 2018 13:20:33 +0000 Subject: cpu-related cython file updated #33 --- Wrappers/Python/src/cpu_regularizers.pyx | 77 +++++++++++++++++++++++++++----- Wrappers/Python/src/gpu_regularizers.pyx | 2 +- 2 files changed, 68 insertions(+), 11 deletions(-) diff --git a/Wrappers/Python/src/cpu_regularizers.pyx b/Wrappers/Python/src/cpu_regularizers.pyx index e7ff78c..b8089a8 100644 --- a/Wrappers/Python/src/cpu_regularizers.pyx +++ b/Wrappers/Python/src/cpu_regularizers.pyx @@ -18,20 +18,20 @@ import cython import numpy as np cimport numpy as np -cdef extern float TV_ROF(float *A, float *B, int dimX, int dimY, int dimZ, - int iterationsNumb, float tau, float flambda); +cdef extern float TV_ROF_CPU(float *Input, float *Output, int dimX, int dimY, int dimZ, int iterationsNumb, float tau, float flambda); +cdef extern float TV_FGP_CPU(float *Input, float *Output, float lambda, int iter, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ); - -def ROF_TV(inputData, iterations, regularization_parameter, +# Can we use the same name here in "def" as the C function? +def TV_ROF_CPU(inputData, iterations, regularization_parameter, marching_step_parameter): if inputData.ndim == 2: - return ROF_TV_2D(inputData, iterations, regularization_parameter, + return TV_ROF_2D(inputData, iterations, regularization_parameter, marching_step_parameter) elif inputData.ndim == 3: - return ROF_TV_3D(inputData, iterations, regularization_parameter, + return TV_ROF_3D(inputData, iterations, regularization_parameter, marching_step_parameter) -def ROF_TV_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, +def TV_ROF_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, int iterations, float regularization_parameter, float marching_step_parameter @@ -45,13 +45,13 @@ def ROF_TV_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, np.zeros([dims[0],dims[1]], dtype='float32') #/* Run ROF iterations for 2D data */ - TV_ROF(&inputData[0,0], &B[0,0], dims[0], dims[1], 1, iterations, + TV_ROF_CPU(&inputData[0,0], &B[0,0], dims[0], dims[1], 1, iterations, marching_step_parameter, regularization_parameter) return B -def ROF_TV_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, +def TV_ROF_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, int iterations, float regularization_parameter, float marching_step_parameter @@ -65,8 +65,65 @@ def ROF_TV_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, np.zeros([dims[0],dims[1],dims[2]], dtype='float32') #/* Run ROF iterations for 3D data */ - TV_ROF(&inputData[0,0, 0], &B[0,0, 0], dims[0], dims[1], dims[2], iterations, + TV_FGP_CPU(&inputData[0,0,0], &B[0,0,0], dims[0], dims[1], dims[2], iterations, marching_step_parameter, regularization_parameter) return B +def TV_FGP_CPU(inputData, regularization_parameter, iterations, tolerance_param, methodTV, nonneg, printM): + if inputData.ndim == 2: + return TV_FGP_2D(inputData, regularization_parameter, iterations, tolerance_param, methodTV, nonneg, printM) + elif inputData.ndim == 3: + return TV_FGP_3D(inputData, regularization_parameter, iterations, tolerance_param, methodTV, nonneg, printM) + +def TV_FGP_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, + float regularization_parameter, + int iterations, + float tolerance_param, + int methodTV, + int nonneg, + int printM): + + cdef long dims[2] + dims[0] = inputData.shape[0] + dims[1] = inputData.shape[1] + + cdef np.ndarray[np.float32_t, ndim=2, mode="c"] B = \ + np.zeros([dims[0],dims[1]], dtype='float32') + + #/* Run ROF iterations for 2D data */ + TV_FGP_CPU(&inputData[0,0], &B[0,0], regularization_parameter, + iterations, + tolerance_param, + methodTV, + nonneg, + printM, + dims[0], dims[1], 1) + + return B + + +def TV_FGP_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, + float regularization_parameter, + int iterations, + float tolerance_param, + int methodTV, + int nonneg, + int printM): + cdef long dims[2] + dims[0] = inputData.shape[0] + dims[1] = inputData.shape[1] + dims[2] = inputData.shape[2] + + cdef np.ndarray[np.float32_t, ndim=3, mode="c"] B = \ + np.zeros([dims[0],dims[1],dims[2]], dtype='float32') + + #/* Run ROF iterations for 3D data */ + TV_FGP_CPU(&inputData[0,0, 0], &B[0,0, 0], iterations, + tolerance_param, + methodTV, + nonneg, + printM, + dims[0], dims[1], dims[2]) + + return B diff --git a/Wrappers/Python/src/gpu_regularizers.pyx b/Wrappers/Python/src/gpu_regularizers.pyx index 263fa4a..a14a20d 100644 --- a/Wrappers/Python/src/gpu_regularizers.pyx +++ b/Wrappers/Python/src/gpu_regularizers.pyx @@ -396,7 +396,7 @@ def TVFGP2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, # Running CUDA code here TV_FGP_GPU( &inputData[0,0], &B[0,0], - regularization_parameter , + regularization_parameter, iterations, tolerance_param, methodTV, -- cgit v1.2.3 From 74ff5b5f319b2f7ea3e078c62041ec8a1bb28335 Mon Sep 17 00:00:00 2001 From: algol Date: Mon, 5 Mar 2018 18:12:01 +0000 Subject: Cmake/Cython fixes to compile ROF-FGP --- Core/CMakeLists.txt | 3 ++- Core/regularizers_CPU/FGP_TV_core.c | 12 +++++------ Core/regularizers_CPU/FGP_TV_core.h | 2 +- Core/regularizers_CPU/ROF_TV_core.c | 4 ++-- Core/regularizers_CPU/ROF_TV_core.h | 2 +- Core/regularizers_GPU/TV_FGP/TV_FGP_GPU_core.cu | 12 +++++------ Core/regularizers_GPU/TV_FGP/TV_FGP_GPU_core.h | 2 +- Wrappers/Python/demo/test_cpu_regularizers.py | 25 ++++++++++++---------- Wrappers/Python/src/cpu_regularizers.cpp | 1 - Wrappers/Python/src/cpu_regularizers.pyx | 25 +++++++++++----------- Wrappers/Python/src/gpu_regularizers.pyx | 2 +- .../Python/test/test_cpu_vs_gpu_regularizers.py | 9 ++++---- 12 files changed, 51 insertions(+), 48 deletions(-) diff --git a/Core/CMakeLists.txt b/Core/CMakeLists.txt index 3e3f89e..68b7111 100644 --- a/Core/CMakeLists.txt +++ b/Core/CMakeLists.txt @@ -129,7 +129,8 @@ if (CUDA_FOUND) CUDA_ADD_LIBRARY(cilregcuda SHARED ${CMAKE_CURRENT_SOURCE_DIR}/regularizers_GPU/Diffus_HO/Diff4th_GPU_kernel.cu ${CMAKE_CURRENT_SOURCE_DIR}/regularizers_GPU/NL_Regul/NLM_GPU_kernel.cu - ${CMAKE_CURRENT_SOURCE_DIR}/regularizers_GPU/TV_ROF/TV_ROF_GPU.cu + ${CMAKE_CURRENT_SOURCE_DIR}/regularizers_GPU/TV_ROF/TV_ROF_GPU_core.cu + ${CMAKE_CURRENT_SOURCE_DIR}/regularizers_GPU/TV_FGP/TV_FGP_GPU_core.cu ) if (UNIX) message ("I'd install into ${CMAKE_INSTALL_PREFIX}/lib") diff --git a/Core/regularizers_CPU/FGP_TV_core.c b/Core/regularizers_CPU/FGP_TV_core.c index 2f1439d..5365631 100644 --- a/Core/regularizers_CPU/FGP_TV_core.c +++ b/Core/regularizers_CPU/FGP_TV_core.c @@ -23,7 +23,7 @@ limitations under the License. * * Input Parameters: * 1. Noisy image/volume - * 2. lambda - regularization parameter + * 2. lambdaPar - regularization parameter * 3. Number of iterations * 4. eplsilon: tolerance constant * 5. TV-type: methodTV - 'iso' (0) or 'l1' (1) @@ -37,7 +37,7 @@ limitations under the License. * [1] Amir Beck and Marc Teboulle, "Fast Gradient-Based Algorithms for Constrained Total Variation Image Denoising and Deblurring Problems" */ -float TV_FGP_CPU(float *Input, float *Output, float lambda, int iter, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ) +float TV_FGP_CPU_main(float *Input, float *Output, float lambdaPar, int iter, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ) { int ll, j, DimTotal; float re, re1; @@ -62,13 +62,13 @@ float TV_FGP_CPU(float *Input, float *Output, float lambda, int iter, float epsi for(ll=0; ll (y)) ? (x) : (y)) #define MIN(x, y) (((x) < (y)) ? (x) : (y)) @@ -46,7 +46,7 @@ int sign(float x) { */ /* Running iterations of TV-ROF function */ -float TV_ROF_CPU(float *Input, float *Output, int dimX, int dimY, int dimZ, int iterationsNumb, float tau, float lambda) +float TV_ROF_CPU_main(float *Input, float *Output, int dimX, int dimY, int dimZ, int iterationsNumb, float tau, float lambda) { float *D1, *D2, *D3; int i, DimTotal; diff --git a/Core/regularizers_CPU/ROF_TV_core.h b/Core/regularizers_CPU/ROF_TV_core.h index b32d0d5..63c0419 100644 --- a/Core/regularizers_CPU/ROF_TV_core.h +++ b/Core/regularizers_CPU/ROF_TV_core.h @@ -47,7 +47,7 @@ limitations under the License. extern "C" { #endif CCPI_EXPORT float TV_kernel(float *D1, float *D2, float *D3, float *B, float *A, float lambda, float tau, int dimY, int dimX, int dimZ); -CCPI_EXPORT float TV_ROF_CPU(float *Input, float *Output, int dimX, int dimY, int dimZ, int iterationsNumb, float tau, float lambda); +CCPI_EXPORT float TV_ROF_CPU_main(float *Input, float *Output, int dimX, int dimY, int dimZ, int iterationsNumb, float tau, float lambda); CCPI_EXPORT float D1_func(float *A, float *D1, int dimY, int dimX, int dimZ); CCPI_EXPORT float D2_func(float *A, float *D2, int dimY, int dimX, int dimZ); CCPI_EXPORT float D3_func(float *A, float *D3, int dimY, int dimX, int dimZ); diff --git a/Core/regularizers_GPU/TV_FGP/TV_FGP_GPU_core.cu b/Core/regularizers_GPU/TV_FGP/TV_FGP_GPU_core.cu index 0533a85..cbe9b5e 100755 --- a/Core/regularizers_GPU/TV_FGP/TV_FGP_GPU_core.cu +++ b/Core/regularizers_GPU/TV_FGP/TV_FGP_GPU_core.cu @@ -25,7 +25,7 @@ limitations under the License. * * Input Parameters: * 1. Noisy image/volume - * 2. lambda - regularization parameter + * 2. lambdaPar - regularization parameter * 3. Number of iterations * 4. eplsilon: tolerance constant * 5. TV-type: methodTV - 'iso' (0) or 'l1' (1) @@ -341,7 +341,7 @@ __global__ void copy_kernel3D(float *Input, float* Output, int N, int M, int Z, /*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/ ////////////MAIN HOST FUNCTION /////////////// -extern "C" void TV_FGP_GPU(float *Input, float *Output, float lambda, int iter, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ) +extern "C" void TV_FGP_GPU(float *Input, float *Output, float lambdaPar, int iter, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ) { int deviceCount = -1; // number of devices cudaGetDeviceCount(&deviceCount); @@ -383,13 +383,13 @@ extern "C" void TV_FGP_GPU(float *Input, float *Output, float lambda, int iter, cudaMemset(R2, 0, ImSize*sizeof(float)); /********************** Run CUDA 2D kernel here ********************/ - multip = (1.0f/(8.0f*lambda)); + multip = (1.0f/(8.0f*lambdaPar)); /* The main kernel */ for (i = 0; i < iter; i++) { /* computing the gradient of the objective function */ - Obj_func2D_kernel<<>>(d_input, d_update, R1, R2, dimX, dimY, ImSize, lambda); + Obj_func2D_kernel<<>>(d_input, d_update, R1, R2, dimX, dimY, ImSize, lambdaPar); checkCudaErrors( cudaDeviceSynchronize() ); checkCudaErrors(cudaPeekAtLastError() ); @@ -493,13 +493,13 @@ extern "C" void TV_FGP_GPU(float *Input, float *Output, float lambda, int iter, cudaMemset(R2, 0, ImSize*sizeof(float)); cudaMemset(R3, 0, ImSize*sizeof(float)); /********************** Run CUDA 3D kernel here ********************/ - multip = (1.0f/(8.0f*lambda)); + multip = (1.0f/(8.0f*lambdaPar)); /* The main kernel */ for (i = 0; i < iter; i++) { /* computing the gradient of the objective function */ - Obj_func3D_kernel<<>>(d_input, d_update, R1, R2, R3, dimX, dimY, dimZ, ImSize, lambda); + Obj_func3D_kernel<<>>(d_input, d_update, R1, R2, R3, dimX, dimY, dimZ, ImSize, lambdaPar); checkCudaErrors( cudaDeviceSynchronize() ); checkCudaErrors(cudaPeekAtLastError() ); diff --git a/Core/regularizers_GPU/TV_FGP/TV_FGP_GPU_core.h b/Core/regularizers_GPU/TV_FGP/TV_FGP_GPU_core.h index 15c7120..2b8fdcf 100755 --- a/Core/regularizers_GPU/TV_FGP/TV_FGP_GPU_core.h +++ b/Core/regularizers_GPU/TV_FGP/TV_FGP_GPU_core.h @@ -5,6 +5,6 @@ #ifndef _TV_FGP_GPU_ #define _TV_FGP_GPU_ -extern "C" void TV_FGP_GPU(float *Input, float *Output, float lambda, int iter, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ); +extern "C" void TV_FGP_GPU(float *Input, float *Output, float lambdaPar, int iter, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ); #endif diff --git a/Wrappers/Python/demo/test_cpu_regularizers.py b/Wrappers/Python/demo/test_cpu_regularizers.py index b08c418..53b8538 100644 --- a/Wrappers/Python/demo/test_cpu_regularizers.py +++ b/Wrappers/Python/demo/test_cpu_regularizers.py @@ -11,10 +11,9 @@ import numpy as np import os from enum import Enum import timeit -from ccpi.filters.cpu_regularizers_boost import SplitBregman_TV , FGP_TV ,\ - LLT_model, PatchBased_Regul ,\ +from ccpi.filters.cpu_regularizers_boost import SplitBregman_TV, LLT_model, PatchBased_Regul ,\ TGV_PD -from ccpi.filters.cpu_regularizers_cython import ROF_TV +from ccpi.filters.cpu_regularizers_cython import TV_ROF_CPU, TV_FGP_CPU ############################################################################### #https://stackoverflow.com/questions/13875989/comparing-image-in-url-to-image-in-filesystem-in-python/13884956#13884956 @@ -128,21 +127,25 @@ imgplot = plt.imshow(splitbregman,\ ) ###################### FGP_TV ######################################### -# u = FGP_TV(single(u0), 0.05, 100, 1e-04); + start_time = timeit.default_timer() -pars = {'algorithm' : FGP_TV , \ +pars = {'algorithm' : TV_FGP_CPU , \ 'input' : u0, 'regularization_parameter':0.05, \ 'number_of_iterations' :200 ,\ - 'tolerance_constant':1e-4,\ - 'TV_penalty': 0 + 'tolerance_constant':1e-5,\ + 'methodTV': 0 ,\ + 'nonneg': 0 ,\ + 'printingOut': 0 } -out = FGP_TV (pars['input'], +out = TV_FGP_CPU (pars['input'], pars['regularization_parameter'], pars['number_of_iterations'], pars['tolerance_constant'], - pars['TV_penalty']) + pars['methodTV'], + pars['nonneg'], + pars['printingOut']) fgp = out[0] rms = rmse(Im, fgp) @@ -282,13 +285,13 @@ imgplot = plt.imshow(tgv, cmap="gray") start_time = timeit.default_timer() -pars = {'algorithm': ROF_TV , \ +pars = {'algorithm': TV_ROF_CPU , \ 'input' : u0,\ 'regularization_parameter':0.04,\ 'marching_step': 0.0025,\ 'number_of_iterations': 300 } -rof = ROF_TV(pars['input'], +rof = TV_ROF_CPU(pars['input'], pars['number_of_iterations'], pars['regularization_parameter'], pars['marching_step'] diff --git a/Wrappers/Python/src/cpu_regularizers.cpp b/Wrappers/Python/src/cpu_regularizers.cpp index 43d5d11..b8156d5 100644 --- a/Wrappers/Python/src/cpu_regularizers.cpp +++ b/Wrappers/Python/src/cpu_regularizers.cpp @@ -1040,7 +1040,6 @@ BOOST_PYTHON_MODULE(cpu_regularizers_boost) np::dtype dt2 = np::dtype::get_builtin(); def("SplitBregman_TV", SplitBregman_TV); - def("FGP_TV", FGP_TV); def("LLT_model", LLT_model); def("PatchBased_Regul", PatchBased_Regul); def("TGV_PD", TGV_PD); diff --git a/Wrappers/Python/src/cpu_regularizers.pyx b/Wrappers/Python/src/cpu_regularizers.pyx index b8089a8..448da31 100644 --- a/Wrappers/Python/src/cpu_regularizers.pyx +++ b/Wrappers/Python/src/cpu_regularizers.pyx @@ -18,8 +18,8 @@ import cython import numpy as np cimport numpy as np -cdef extern float TV_ROF_CPU(float *Input, float *Output, int dimX, int dimY, int dimZ, int iterationsNumb, float tau, float flambda); -cdef extern float TV_FGP_CPU(float *Input, float *Output, float lambda, int iter, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ); +cdef extern float TV_ROF_CPU_main(float *Input, float *Output, int dimX, int dimY, int dimZ, int iterationsNumb, float tau, float flambda); +cdef extern float TV_FGP_CPU_main(float *Input, float *Output, float lambdaPar, int iter, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ); # Can we use the same name here in "def" as the C function? def TV_ROF_CPU(inputData, iterations, regularization_parameter, @@ -45,11 +45,10 @@ def TV_ROF_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, np.zeros([dims[0],dims[1]], dtype='float32') #/* Run ROF iterations for 2D data */ - TV_ROF_CPU(&inputData[0,0], &B[0,0], dims[0], dims[1], 1, iterations, + TV_ROF_CPU_main(&inputData[0,0], &B[0,0], dims[0], dims[1], 1, iterations, marching_step_parameter, regularization_parameter) - return B - + return B def TV_ROF_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, int iterations, @@ -65,7 +64,7 @@ def TV_ROF_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, np.zeros([dims[0],dims[1],dims[2]], dtype='float32') #/* Run ROF iterations for 3D data */ - TV_FGP_CPU(&inputData[0,0,0], &B[0,0,0], dims[0], dims[1], dims[2], iterations, + TV_ROF_CPU_main(&inputData[0,0,0], &B[0,0,0], dims[0], dims[1], dims[2], iterations, marching_step_parameter, regularization_parameter) return B @@ -88,11 +87,11 @@ def TV_FGP_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, dims[0] = inputData.shape[0] dims[1] = inputData.shape[1] - cdef np.ndarray[np.float32_t, ndim=2, mode="c"] B = \ + cdef np.ndarray[np.float32_t, ndim=2, mode="c"] outputData = \ np.zeros([dims[0],dims[1]], dtype='float32') #/* Run ROF iterations for 2D data */ - TV_FGP_CPU(&inputData[0,0], &B[0,0], regularization_parameter, + TV_FGP_CPU_main(&inputData[0,0], &outputData[0,0], regularization_parameter, iterations, tolerance_param, methodTV, @@ -100,8 +99,7 @@ def TV_FGP_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, printM, dims[0], dims[1], 1) - return B - + return outputData def TV_FGP_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, float regularization_parameter, @@ -115,15 +113,16 @@ def TV_FGP_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, dims[1] = inputData.shape[1] dims[2] = inputData.shape[2] - cdef np.ndarray[np.float32_t, ndim=3, mode="c"] B = \ + cdef np.ndarray[np.float32_t, ndim=3, mode="c"] outputData = \ np.zeros([dims[0],dims[1],dims[2]], dtype='float32') #/* Run ROF iterations for 3D data */ - TV_FGP_CPU(&inputData[0,0, 0], &B[0,0, 0], iterations, + TV_FGP_CPU_main(&inputData[0,0,0], &outputData[0,0, 0], regularization_parameter, + iterations, tolerance_param, methodTV, nonneg, printM, dims[0], dims[1], dims[2]) - return B + return outputData diff --git a/Wrappers/Python/src/gpu_regularizers.pyx b/Wrappers/Python/src/gpu_regularizers.pyx index a14a20d..e99bfa7 100644 --- a/Wrappers/Python/src/gpu_regularizers.pyx +++ b/Wrappers/Python/src/gpu_regularizers.pyx @@ -26,7 +26,7 @@ cdef extern void NLM_GPU_kernel(float *A, float* B, float *Eucl_Vec, int SearchW, int SimilW, int SearchW_real, float denh2, float lambdaf); cdef extern void TV_ROF_GPU(float* Input, float* Output, int N, int M, int Z, int iter, float tau, float lambdaf); -cdef extern void TV_FGP_GPU(float *Input, float *Output, float lambda, int iter, float epsil, int methodTV, int nonneg, int printM, int N, int M, int Z); +cdef extern void TV_FGP_GPU(float *Input, float *Output, float lambdaPar, int iter, float epsil, int methodTV, int nonneg, int printM, int N, int M, int Z); cdef extern float pad_crop(float *A, float *Ap, int OldSizeX, int OldSizeY, int OldSizeZ, diff --git a/Wrappers/Python/test/test_cpu_vs_gpu_regularizers.py b/Wrappers/Python/test/test_cpu_vs_gpu_regularizers.py index 6344021..e162afa 100644 --- a/Wrappers/Python/test/test_cpu_vs_gpu_regularizers.py +++ b/Wrappers/Python/test/test_cpu_vs_gpu_regularizers.py @@ -12,7 +12,7 @@ import matplotlib.pyplot as plt import numpy as np import os import timeit -from ccpi.filters.gpu_regularizers import Diff4thHajiaboli, NML, TV_ROF_GPU +from ccpi.filters.gpu_regularizers import Diff4thHajiaboli, NML, GPU_ROF_TV from ccpi.filters.cpu_regularizers_cython import TV_ROF_CPU ############################################################################### def printParametersToString(pars): @@ -56,11 +56,11 @@ imgplot = plt.imshow(u0,cmap="gray") # set parameters -pars = {'algorithm': ROF_TV , \ +pars = {'algorithm': TV_ROF_CPU , \ 'input' : u0,\ 'regularization_parameter':0.04,\ 'time_marching_parameter': 0.0025,\ - 'number_of_iterations': 600 + 'number_of_iterations': 1200 } print ("#################ROF TV CPU#####################") start_time = timeit.default_timer() @@ -89,13 +89,14 @@ plt.title('{}'.format('CPU results')) print ("#################ROF TV GPU#####################") start_time = timeit.default_timer() -rof_gpu = TV_ROF_GPU(pars['input'], +rof_gpu = GPU_ROF_TV(pars['input'], pars['number_of_iterations'], pars['time_marching_parameter'], pars['regularization_parameter']) rms = rmse(Im, rof_gpu) pars['rmse'] = rms +pars['algorithm'] = GPU_ROF_TV txtstr = printParametersToString(pars) txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time) print (txtstr) -- cgit v1.2.3 From ccf9b61bba1004af783c6333d58ea9611c0f81f2 Mon Sep 17 00:00:00 2001 From: Daniil Kazantsev Date: Tue, 6 Mar 2018 11:16:41 +0000 Subject: ROF_FGP_unified syntax --- Core/regularizers_CPU/FGP_TV_core.c | 8 ++--- Core/regularizers_CPU/FGP_TV_core.h | 2 +- Core/regularizers_CPU/ROF_TV_core.c | 4 +-- Core/regularizers_CPU/ROF_TV_core.h | 6 ++-- Wrappers/Python/src/cpu_regularizers.pyx | 59 +++++++++++++++----------------- 5 files changed, 37 insertions(+), 42 deletions(-) diff --git a/Core/regularizers_CPU/FGP_TV_core.c b/Core/regularizers_CPU/FGP_TV_core.c index 5365631..9c0fcfc 100644 --- a/Core/regularizers_CPU/FGP_TV_core.c +++ b/Core/regularizers_CPU/FGP_TV_core.c @@ -37,7 +37,7 @@ limitations under the License. * [1] Amir Beck and Marc Teboulle, "Fast Gradient-Based Algorithms for Constrained Total Variation Image Denoising and Deblurring Problems" */ -float TV_FGP_CPU_main(float *Input, float *Output, float lambdaPar, int iter, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ) +float TV_FGP_CPU_main(float *Input, float *Output, float lambdaPar, int iterationsNumb, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ) { int ll, j, DimTotal; float re, re1; @@ -47,7 +47,7 @@ float TV_FGP_CPU_main(float *Input, float *Output, float lambdaPar, int iter, fl if (dimZ <= 1) { /*2D case */ - float *Output_prev=NULL, *P1=NULL, *P2=NULL, *P1_prev=NULL, *P2_prev=NULL, *R1=NULL, *R2=NULL; + float *Output_prev=NULL, *P1=NULL, *P2=NULL, *P1_prev=NULL, *P2_prev=NULL, *R1=NULL, *R2=NULL; DimTotal = dimX*dimY; Output_prev = (float *) malloc( DimTotal * sizeof(float) ); @@ -59,7 +59,7 @@ float TV_FGP_CPU_main(float *Input, float *Output, float lambdaPar, int iter, fl R2 = (float *) malloc( DimTotal * sizeof(float) ); /* begin iterations */ - for(ll=0; ll 1) D3_func(Output, D3, dimX, dimY, dimZ); - TV_kernel(D1, D2, D3, Output, Input, lambda, tau, dimX, dimY, dimZ); + TV_kernel(D1, D2, D3, Output, Input, lambdaPar, tau, dimX, dimY, dimZ); } free(D1);free(D2); free(D3); return *Output; diff --git a/Core/regularizers_CPU/ROF_TV_core.h b/Core/regularizers_CPU/ROF_TV_core.h index 63c0419..14daf58 100644 --- a/Core/regularizers_CPU/ROF_TV_core.h +++ b/Core/regularizers_CPU/ROF_TV_core.h @@ -31,8 +31,8 @@ limitations under the License. * Input Parameters: * 1. Noisy image/volume [REQUIRED] * 2. lambda - regularization parameter [REQUIRED] - * 3. tau - marching step for explicit scheme, ~1 is recommended [REQUIRED] - * 4. Number of iterations, for explicit scheme >= 150 is recommended [REQUIRED] + * 3. Number of iterations, for explicit scheme >= 150 is recommended [REQUIRED] + * 4. tau - marching step for explicit scheme, ~1 is recommended [REQUIRED] * * Output: * [1] Regularized image/volume @@ -47,7 +47,7 @@ limitations under the License. extern "C" { #endif CCPI_EXPORT float TV_kernel(float *D1, float *D2, float *D3, float *B, float *A, float lambda, float tau, int dimY, int dimX, int dimZ); -CCPI_EXPORT float TV_ROF_CPU_main(float *Input, float *Output, int dimX, int dimY, int dimZ, int iterationsNumb, float tau, float lambda); +CCPI_EXPORT float TV_ROF_CPU_main(float *Input, float *Output, float lambdaPar, int iterationsNumb, float tau, int dimX, int dimY, int dimZ); CCPI_EXPORT float D1_func(float *A, float *D1, int dimY, int dimX, int dimZ); CCPI_EXPORT float D2_func(float *A, float *D2, int dimY, int dimX, int dimZ); CCPI_EXPORT float D3_func(float *A, float *D3, int dimY, int dimX, int dimZ); diff --git a/Wrappers/Python/src/cpu_regularizers.pyx b/Wrappers/Python/src/cpu_regularizers.pyx index 448da31..2654831 100644 --- a/Wrappers/Python/src/cpu_regularizers.pyx +++ b/Wrappers/Python/src/cpu_regularizers.pyx @@ -11,73 +11,68 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. -Author: Edoardo Pasca +Author: Edoardo Pasca, Daniil Kazantsev """ import cython import numpy as np cimport numpy as np -cdef extern float TV_ROF_CPU_main(float *Input, float *Output, int dimX, int dimY, int dimZ, int iterationsNumb, float tau, float flambda); -cdef extern float TV_FGP_CPU_main(float *Input, float *Output, float lambdaPar, int iter, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ); +cdef extern float TV_ROF_CPU_main(float *Input, float *Output, float lambdaPar, int iterationsNumb, float tau, int dimX, int dimY, int dimZ); +cdef extern float TV_FGP_CPU_main(float *Input, float *Output, float lambdaPar, int iterationsNumb, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ); -# Can we use the same name here in "def" as the C function? -def TV_ROF_CPU(inputData, iterations, regularization_parameter, +def TV_ROF_CPU(inputData, regularization_parameter, iterationsNumb marching_step_parameter): if inputData.ndim == 2: - return TV_ROF_2D(inputData, iterations, regularization_parameter, + return TV_ROF_2D(inputData, regularization_parameter, iterationsNumb marching_step_parameter) elif inputData.ndim == 3: - return TV_ROF_3D(inputData, iterations, regularization_parameter, + return TV_ROF_3D(inputData, regularization_parameter, iterationsNumb marching_step_parameter) def TV_ROF_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, - int iterations, float regularization_parameter, - float marching_step_parameter - ): - + int iterationsNumb, + float marching_step_parameter): cdef long dims[2] dims[0] = inputData.shape[0] dims[1] = inputData.shape[1] - cdef np.ndarray[np.float32_t, ndim=2, mode="c"] B = \ + cdef np.ndarray[np.float32_t, ndim=2, mode="c"] outputData = \ np.zeros([dims[0],dims[1]], dtype='float32') - #/* Run ROF iterations for 2D data */ - TV_ROF_CPU_main(&inputData[0,0], &B[0,0], dims[0], dims[1], 1, iterations, - marching_step_parameter, regularization_parameter) + # Run ROF iterations for 2D data + TV_ROF_CPU_main(&inputData[0,0], &outputData[0,0], regularization_parameter, iterationsNumb, marching_step_parameter, dims[0], dims[1], 1) - return B + return outputData def TV_ROF_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, int iterations, float regularization_parameter, float marching_step_parameter ): - cdef long dims[2] + cdef long dims[3] dims[0] = inputData.shape[0] dims[1] = inputData.shape[1] dims[2] = inputData.shape[2] - cdef np.ndarray[np.float32_t, ndim=3, mode="c"] B = \ + cdef np.ndarray[np.float32_t, ndim=3, mode="c"] outputData = \ np.zeros([dims[0],dims[1],dims[2]], dtype='float32') - #/* Run ROF iterations for 3D data */ - TV_ROF_CPU_main(&inputData[0,0,0], &B[0,0,0], dims[0], dims[1], dims[2], iterations, - marching_step_parameter, regularization_parameter) + # Run ROF iterations for 3D data + TV_ROF_CPU_main(&inputData[0,0,0], &outputData[0,0,0], regularization_parameter, iterationsNumb, marching_step_parameter, dims[0], dims[1], dims[2]) - return B + return outputData -def TV_FGP_CPU(inputData, regularization_parameter, iterations, tolerance_param, methodTV, nonneg, printM): +def TV_FGP_CPU(inputData, regularization_parameter, iterationsNumb, tolerance_param, methodTV, nonneg, printM): if inputData.ndim == 2: - return TV_FGP_2D(inputData, regularization_parameter, iterations, tolerance_param, methodTV, nonneg, printM) + return TV_FGP_2D(inputData, regularization_parameter, iterationsNumb, tolerance_param, methodTV, nonneg, printM) elif inputData.ndim == 3: - return TV_FGP_3D(inputData, regularization_parameter, iterations, tolerance_param, methodTV, nonneg, printM) + return TV_FGP_3D(inputData, regularization_parameter, iterationsNumb, tolerance_param, methodTV, nonneg, printM) def TV_FGP_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, float regularization_parameter, - int iterations, + int iterationsNumb, float tolerance_param, int methodTV, int nonneg, @@ -92,7 +87,7 @@ def TV_FGP_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, #/* Run ROF iterations for 2D data */ TV_FGP_CPU_main(&inputData[0,0], &outputData[0,0], regularization_parameter, - iterations, + iterationsNumb, tolerance_param, methodTV, nonneg, @@ -103,22 +98,22 @@ def TV_FGP_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, def TV_FGP_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, float regularization_parameter, - int iterations, + int iterationsNumb, float tolerance_param, int methodTV, int nonneg, int printM): - cdef long dims[2] + cdef long dims[3] dims[0] = inputData.shape[0] dims[1] = inputData.shape[1] dims[2] = inputData.shape[2] cdef np.ndarray[np.float32_t, ndim=3, mode="c"] outputData = \ - np.zeros([dims[0],dims[1],dims[2]], dtype='float32') + np.zeros([dims[0], dims[1], dims[2]], dtype='float32') #/* Run ROF iterations for 3D data */ - TV_FGP_CPU_main(&inputData[0,0,0], &outputData[0,0, 0], regularization_parameter, - iterations, + TV_FGP_CPU_main(&inputData[0,0,0], &outputData[0,0,0], regularization_parameter, + iterationsNumb, tolerance_param, methodTV, nonneg, -- cgit v1.2.3 From 8d310478254f3cda63f3663729b416f425ad70b6 Mon Sep 17 00:00:00 2001 From: algol Date: Tue, 6 Mar 2018 11:45:53 +0000 Subject: work on FGP intergration --- Core/CMakeLists.txt | 2 +- Core/regularizers_CPU/ROF_TV_core.c | 22 +++++++++++----------- Core/regularizers_CPU/utils.c | 8 ++++---- Wrappers/Python/demo/test_cpu_regularizers.py | 17 +++++++++-------- Wrappers/Python/src/cpu_regularizers.pyx | 14 +++++--------- 5 files changed, 30 insertions(+), 33 deletions(-) diff --git a/Core/CMakeLists.txt b/Core/CMakeLists.txt index 68b7111..4e85002 100644 --- a/Core/CMakeLists.txt +++ b/Core/CMakeLists.txt @@ -88,7 +88,7 @@ add_library(cilreg SHARED ${CMAKE_CURRENT_SOURCE_DIR}/regularizers_CPU/PatchBased_Regul_core.c ${CMAKE_CURRENT_SOURCE_DIR}/regularizers_CPU/SplitBregman_TV_core.c ${CMAKE_CURRENT_SOURCE_DIR}/regularizers_CPU/TGV_PD_core.c - ${CMAKE_CURRENT_SOURCE_DIR}/regularizers_CPU/ROF_TV_core.c + ${CMAKE_CURRENT_SOURCE_DIR}/regularizers_CPU/ROF_TV_core.c ${CMAKE_CURRENT_SOURCE_DIR}/regularizers_CPU/utils.c ) target_link_libraries(cilreg ${EXTRA_LIBRARIES} ) diff --git a/Core/regularizers_CPU/ROF_TV_core.c b/Core/regularizers_CPU/ROF_TV_core.c index a59a3c6..a4f82a6 100644 --- a/Core/regularizers_CPU/ROF_TV_core.c +++ b/Core/regularizers_CPU/ROF_TV_core.c @@ -46,7 +46,7 @@ int sign(float x) { */ /* Running iterations of TV-ROF function */ -float TV_ROF_CPU_mainfloat TV_ROF_CPU_main(float *Input, float *Output, float lambdaPar, int iterationsNumb, float tau, int dimX, int dimY, int dimZ) +float TV_ROF_CPU_main(float *Input, float *Output, float lambdaPar, int iterationsNumb, float tau, int dimX, int dimY, int dimZ) { float *D1, *D2, *D3; int i, DimTotal; @@ -132,9 +132,9 @@ float D1_func(float *A, float *D1, int dimX, int dimY, int dimZ) NOMy_0 = A[index] - A[(j)*dimX + i2]; /* y- */ denom1 = NOMx_1*NOMx_1; - denom2 = 0.5*(sign(NOMy_1) + sign(NOMy_0))*(MIN(fabs(NOMy_1),fabs(NOMy_0))); + denom2 = 0.5f*(sign(NOMy_1) + sign(NOMy_0))*(MIN(fabs(NOMy_1),fabs(NOMy_0))); denom2 = denom2*denom2; - T1 = sqrt(denom1 + denom2 + EPS); + T1 = sqrtf(denom1 + denom2 + EPS); D1[index] = NOMx_1/T1; }} } @@ -170,11 +170,11 @@ float D2_func(float *A, float *D2, int dimX, int dimY, int dimZ) denom1 = NOMy_1*NOMy_1; - denom2 = 0.5*(sign(NOMx_1) + sign(NOMx_0))*(MIN(fabs(NOMx_1),fabs(NOMx_0))); + denom2 = 0.5f*(sign(NOMx_1) + sign(NOMx_0))*(MIN(fabs(NOMx_1),fabs(NOMx_0))); denom2 = denom2*denom2; - denom3 = 0.5*(sign(NOMz_1) + sign(NOMz_0))*(MIN(fabs(NOMz_1),fabs(NOMz_0))); + denom3 = 0.5f*(sign(NOMz_1) + sign(NOMz_0))*(MIN(fabs(NOMz_1),fabs(NOMz_0))); denom3 = denom3*denom3; - T2 = sqrt(denom1 + denom2 + denom3 + EPS); + T2 = sqrtf(denom1 + denom2 + denom3 + EPS); D2[index] = NOMy_1/T2; }}} } @@ -196,9 +196,9 @@ float D2_func(float *A, float *D2, int dimX, int dimY, int dimZ) /*NOMy_0 = A[(i)*dimY + j] - A[(i)*dimY + j2]; */ /* y- */ denom1 = NOMy_1*NOMy_1; - denom2 = 0.5*(sign(NOMx_1) + sign(NOMx_0))*(MIN(fabs(NOMx_1),fabs(NOMx_0))); + denom2 = 0.5f*(sign(NOMx_1) + sign(NOMx_0))*(MIN(fabs(NOMx_1),fabs(NOMx_0))); denom2 = denom2*denom2; - T2 = sqrt(denom1 + denom2 + EPS); + T2 = sqrtf(denom1 + denom2 + EPS); D2[index] = NOMy_1/T2; }} } @@ -233,11 +233,11 @@ float D3_func(float *A, float *D3, int dimY, int dimX, int dimZ) /*NOMz_0 = A[(dimX*dimY)*k + (i)*dimY + j] - A[(dimX*dimY)*k2 + (i)*dimY + j]; */ /* z- */ denom1 = NOMz_1*NOMz_1; - denom2 = 0.5*(sign(NOMx_1) + sign(NOMx_0))*(MIN(fabs(NOMx_1),fabs(NOMx_0))); + denom2 = 0.5f*(sign(NOMx_1) + sign(NOMx_0))*(MIN(fabs(NOMx_1),fabs(NOMx_0))); denom2 = denom2*denom2; - denom3 = 0.5*(sign(NOMy_1) + sign(NOMy_0))*(MIN(fabs(NOMy_1),fabs(NOMy_0))); + denom3 = 0.5f*(sign(NOMy_1) + sign(NOMy_0))*(MIN(fabs(NOMy_1),fabs(NOMy_0))); denom3 = denom3*denom3; - T3 = sqrt(denom1 + denom2 + denom3 + EPS); + T3 = sqrtf(denom1 + denom2 + denom3 + EPS); D3[index] = NOMz_1/T3; }}} return *D3; diff --git a/Core/regularizers_CPU/utils.c b/Core/regularizers_CPU/utils.c index 951fb91..cdf3d0e 100644 --- a/Core/regularizers_CPU/utils.c +++ b/Core/regularizers_CPU/utils.c @@ -45,10 +45,10 @@ float TV_energy2D(float *U, float *U0, float *E_val, float lambda, int dimX, int j1 = j + 1; if (j == dimY-1) j1 = j; /* Forward differences */ - NOMx_2 = pow(U[j1*dimX + i] - U[index],2); /* x+ */ - NOMy_2 = pow(U[j*dimX + i1] - U[index],2); /* y+ */ - E_Grad += sqrt(NOMx_2 + NOMy_2); /* gradient term energy */ - E_Data += 0.5f * lambda*(pow((U[index]-U0[index]),2)); /* fidelity term energy */ + NOMx_2 = powf(U[j1*dimX + i] - U[index],2); /* x+ */ + NOMy_2 = powf(U[j*dimX + i1] - U[index],2); /* y+ */ + E_Grad += sqrtf(NOMx_2 + NOMy_2); /* gradient term energy */ + E_Data += 0.5f * lambda*(powf((U[index]-U0[index]),2)); /* fidelity term energy */ } } E_val[0] = E_Grad + E_Data; diff --git a/Wrappers/Python/demo/test_cpu_regularizers.py b/Wrappers/Python/demo/test_cpu_regularizers.py index 53b8538..f1eb3c3 100644 --- a/Wrappers/Python/demo/test_cpu_regularizers.py +++ b/Wrappers/Python/demo/test_cpu_regularizers.py @@ -131,9 +131,9 @@ imgplot = plt.imshow(splitbregman,\ start_time = timeit.default_timer() pars = {'algorithm' : TV_FGP_CPU , \ 'input' : u0, - 'regularization_parameter':0.05, \ - 'number_of_iterations' :200 ,\ - 'tolerance_constant':1e-5,\ + 'regularization_parameter':0.07, \ + 'number_of_iterations' :300 ,\ + 'tolerance_constant':0.00001,\ 'methodTV': 0 ,\ 'nonneg': 0 ,\ 'printingOut': 0 @@ -156,7 +156,7 @@ txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time) print (txtstr) -a=fig.add_subplot(2,4,3) +a=fig.add_subplot(2,4,4) # these are matplotlib.patch.Patch properties props = dict(boxstyle='round', facecolor='wheat', alpha=0.5) @@ -168,8 +168,9 @@ imgplot = plt.imshow(fgp, \ a.text(0.05, 0.95, txtstr, transform=a.transAxes, fontsize=14, verticalalignment='top', bbox=props) -###################### LLT_model ######################################### +###################### LLT_model ######################################### +""" start_time = timeit.default_timer() pars = {'algorithm': LLT_model , \ @@ -204,7 +205,7 @@ a.text(0.05, 0.95, txtstr, transform=a.transAxes, fontsize=14, imgplot = plt.imshow(llt,\ cmap="gray" ) - +""" # ###################### PatchBased_Regul ######################################### # # Quick 2D denoising example in Matlab: @@ -292,8 +293,8 @@ pars = {'algorithm': TV_ROF_CPU , \ 'number_of_iterations': 300 } rof = TV_ROF_CPU(pars['input'], - pars['number_of_iterations'], - pars['regularization_parameter'], + pars['regularization_parameter'], + pars['number_of_iterations'], pars['marching_step'] ) #tgv = out diff --git a/Wrappers/Python/src/cpu_regularizers.pyx b/Wrappers/Python/src/cpu_regularizers.pyx index 2654831..d62ca59 100644 --- a/Wrappers/Python/src/cpu_regularizers.pyx +++ b/Wrappers/Python/src/cpu_regularizers.pyx @@ -21,14 +21,11 @@ cimport numpy as np cdef extern float TV_ROF_CPU_main(float *Input, float *Output, float lambdaPar, int iterationsNumb, float tau, int dimX, int dimY, int dimZ); cdef extern float TV_FGP_CPU_main(float *Input, float *Output, float lambdaPar, int iterationsNumb, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ); -def TV_ROF_CPU(inputData, regularization_parameter, iterationsNumb - marching_step_parameter): +def TV_ROF_CPU(inputData, regularization_parameter, iterationsNumb, marching_step_parameter): if inputData.ndim == 2: - return TV_ROF_2D(inputData, regularization_parameter, iterationsNumb - marching_step_parameter) + return TV_ROF_2D(inputData, regularization_parameter, iterationsNumb, marching_step_parameter) elif inputData.ndim == 3: - return TV_ROF_3D(inputData, regularization_parameter, iterationsNumb - marching_step_parameter) + return TV_ROF_3D(inputData, regularization_parameter, iterationsNumb, marching_step_parameter) def TV_ROF_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, float regularization_parameter, @@ -47,10 +44,9 @@ def TV_ROF_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, return outputData def TV_ROF_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, - int iterations, + int iterationsNumb, float regularization_parameter, - float marching_step_parameter - ): + float marching_step_parameter): cdef long dims[3] dims[0] = inputData.shape[0] dims[1] = inputData.shape[1] -- cgit v1.2.3 From 309d84445b5ec2980db7c79832958a6481343f17 Mon Sep 17 00:00:00 2001 From: Daniil Kazantsev Date: Tue, 6 Mar 2018 12:46:09 +0000 Subject: FGP/ROF updates --- Core/regularizers_CPU/FGP_TV_core.c | 139 +++++++++----------- Core/regularizers_CPU/FGP_TV_core.h | 8 +- Core/regularizers_CPU/utils.c | 13 +- .../Matlab/mex_compile/regularizers_CPU/FGP_TV.c | 142 ++------------------- Wrappers/Python/demo/test_cpu_regularizers.py | 10 +- Wrappers/Python/src/cpu_regularizers.pyx | 5 +- 6 files changed, 89 insertions(+), 228 deletions(-) diff --git a/Core/regularizers_CPU/FGP_TV_core.c b/Core/regularizers_CPU/FGP_TV_core.c index 9c0fcfc..7a8ce48 100644 --- a/Core/regularizers_CPU/FGP_TV_core.c +++ b/Core/regularizers_CPU/FGP_TV_core.c @@ -46,7 +46,7 @@ float TV_FGP_CPU_main(float *Input, float *Output, float lambdaPar, int iteratio int count = 0; if (dimZ <= 1) { - /*2D case */ + /*2D case */ float *Output_prev=NULL, *P1=NULL, *P2=NULL, *P1_prev=NULL, *P2_prev=NULL, *R1=NULL, *R2=NULL; DimTotal = dimX*dimY; @@ -65,28 +65,28 @@ float TV_FGP_CPU_main(float *Input, float *Output, float lambdaPar, int iteratio Obj_func2D(Input, Output, R1, R2, lambdaPar, dimX, dimY); /* apply nonnegativity */ - if (nonneg == 1) for(j=0; j 4) break; + if (count > 4) break; /*storing old values*/ copyIm(Output, Output_prev, dimX, dimY, 1); @@ -120,21 +120,21 @@ float TV_FGP_CPU_main(float *Input, float *Output, float lambdaPar, int iteratio Obj_func3D(Input, Output, R1, R2, R3, lambdaPar, dimX, dimY, dimZ); /* apply nonnegativity */ - if (nonneg == 1) for(j=0; j 1.0f) { - sq_denom = 1.0f/sqrt(denom); - P1[index] = P1[index]*sq_denom; - P2[index] = P2[index]*sq_denom; + sq_denom = 1.0f/sqrtf(denom); + P1[i] = P1[i]*sq_denom; + P2[i] = P2[i]*sq_denom; } - }} + } } else { /* anisotropic TV*/ -#pragma omp parallel for shared(P1,P2) private(index,i,j,val1,val2) - for(i=0; i 1.0f) { - sq_denom = 1.0f/sqrt(denom); - P1[index] = P1[index]*sq_denom; - P2[index] = P2[index]*sq_denom; - P3[index] = P3[index]*sq_denom; - } - }}} + sq_denom = 1.0f/sqrtf(denom); + P1[i] = P1[i]*sq_denom; + P2[i] = P2[i]*sq_denom; + P3[i] = P3[i]*sq_denom; + } + } } else { /* anisotropic TV*/ -#pragma omp parallel for shared(P1,P2,P3) private(index,i,j,k,val1,val2,val3) - for(i=0; i /* Copy Image */ float copyIm(float *A, float *U, int dimX, int dimY, int dimZ) @@ -34,7 +35,7 @@ float copyIm(float *A, float *U, int dimX, int dimY, int dimZ) float TV_energy2D(float *U, float *U0, float *E_val, float lambda, int dimX, int dimY) { int i, j, i1, j1, index; - float NOMx_2, NOMy_2, E_Grad, E_Data; + float NOMx_2, NOMy_2, E_Grad=0.0f, E_Data=0.0f; /* first calculate \grad U_xy*/ for(j=0; j 5)) mexErrMsgTxt("At least 2 parameters is required: Image(2D/3D), Regularization parameter. The full list of parameters: Image(2D/3D), Regularization parameter, iterations number, tolerance, penalty type ('iso' or 'l1')"); - A = (float *) mxGetData(prhs[0]); /*noisy image (2D/3D) */ + Input = (float *) mxGetData(prhs[0]); /*noisy image (2D/3D) */ lambda = (float) mxGetScalar(prhs[1]); /* regularization parameter */ - iter = 50; /* default iterations number */ + iter = 300; /* default iterations number */ epsil = 0.0001; /* default tolerance constant */ methTV = 0; /* default isotropic TV penalty */ @@ -78,139 +78,17 @@ void mexFunction( if (strcmp(penalty_type, "l1") == 0) methTV = 1; /* enable 'l1' penalty */ mxFree(penalty_type); } - /*output function value (last iteration) */ - plhs[1] = mxCreateNumericMatrix(1, 1, mxSINGLE_CLASS, mxREAL); - float *funcvalA = (float *) mxGetData(plhs[1]); if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); } /*Handling Matlab output data*/ - dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2]; - - tk = 1.0f; - tkp1=1.0f; - count = 0; - re_old = 0.0f; + dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2]; if (number_of_dims == 2) { dimZ = 1; /*2D case*/ - D = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); - D_old = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); - P1 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); - P2 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); - P1_old = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); - P2_old = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); - R1 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); - R2 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); - - /* begin iterations */ - for(ll=0; ll 4) { - Obj_func_CALC2D(A, D, funcvalA, lambda, dimX, dimY); - break; } - - /* check that the residual norm is decreasing */ - if (ll > 2) { - if (re > re_old) { - Obj_func_CALC2D(A, D, funcvalA, lambda, dimX, dimY); - break; }} - re_old = re; - /*printf("%f %i %i \n", re, ll, count); */ - - /*storing old values*/ - copyIm(D, D_old, dimX, dimY, dimZ); - copyIm(P1, P1_old, dimX, dimY, dimZ); - copyIm(P2, P2_old, dimX, dimY, dimZ); - tk = tkp1; - - /* calculating the objective function value */ - if (ll == (iter-1)) Obj_func_CALC2D(A, D, funcvalA, lambda, dimX, dimY); - } - printf("FGP-TV iterations stopped at iteration %i with the function value %f \n", ll, funcvalA[0]); - } - if (number_of_dims == 3) { - D = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); - D_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); - P1 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); - P2 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); - P3 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); - P1_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); - P2_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); - P3_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); - R1 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); - R2 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); - R3 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); - - /* begin iterations */ - for(ll=0; ll 3) { - Obj_func_CALC3D(A, D, funcvalA, lambda, dimX, dimY, dimZ); - break;} - - /* check that the residual norm is decreasing */ - if (ll > 2) { - if (re > re_old) { - Obj_func_CALC3D(A, D, funcvalA, lambda, dimX, dimY, dimZ); - }} - re_old = re; - /*printf("%f %i %i \n", re, ll, count); */ - - /*storing old values*/ - copyIm(D, D_old, dimX, dimY, dimZ); - copyIm(P1, P1_old, dimX, dimY, dimZ); - copyIm(P2, P2_old, dimX, dimY, dimZ); - copyIm(P3, P3_old, dimX, dimY, dimZ); - tk = tkp1; - - if (ll == (iter-1)) Obj_func_CALC3D(A, D, funcvalA, lambda, dimX, dimY, dimZ); - } - printf("FGP-TV iterations stopped at iteration %i with the function value %f \n", ll, funcvalA[0]); - } + Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); + TV_FGP_CPU_main(Input, Output, lambda, iter, epsil, methTV, 0, 0, dimX, dimY, dimZ); + } + if (number_of_dims == 3) { + } } diff --git a/Wrappers/Python/demo/test_cpu_regularizers.py b/Wrappers/Python/demo/test_cpu_regularizers.py index f1eb3c3..7f08605 100644 --- a/Wrappers/Python/demo/test_cpu_regularizers.py +++ b/Wrappers/Python/demo/test_cpu_regularizers.py @@ -111,12 +111,10 @@ rms = rmse(Im, splitbregman) pars['rmse'] = rms txtstr = printParametersToString(pars) txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time) -print (txtstr) - +print (txtstr) a=fig.add_subplot(2,4,2) - # these are matplotlib.patch.Patch properties props = dict(boxstyle='round', facecolor='wheat', alpha=0.5) # place a text box in upper left in axes coords @@ -130,14 +128,14 @@ imgplot = plt.imshow(splitbregman,\ start_time = timeit.default_timer() pars = {'algorithm' : TV_FGP_CPU , \ - 'input' : u0, + 'input' : u0,\ 'regularization_parameter':0.07, \ 'number_of_iterations' :300 ,\ 'tolerance_constant':0.00001,\ 'methodTV': 0 ,\ 'nonneg': 0 ,\ - 'printingOut': 0 -} + 'printingOut': 0 + } out = TV_FGP_CPU (pars['input'], pars['regularization_parameter'], diff --git a/Wrappers/Python/src/cpu_regularizers.pyx b/Wrappers/Python/src/cpu_regularizers.pyx index d62ca59..60e8627 100644 --- a/Wrappers/Python/src/cpu_regularizers.pyx +++ b/Wrappers/Python/src/cpu_regularizers.pyx @@ -93,7 +93,7 @@ def TV_FGP_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, return outputData def TV_FGP_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, - float regularization_parameter, + float regularization_parameter, int iterationsNumb, float tolerance_param, int methodTV, @@ -109,11 +109,10 @@ def TV_FGP_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, #/* Run ROF iterations for 3D data */ TV_FGP_CPU_main(&inputData[0,0,0], &outputData[0,0,0], regularization_parameter, - iterationsNumb, + iterationsNumb, tolerance_param, methodTV, nonneg, printM, dims[0], dims[1], dims[2]) - return outputData -- cgit v1.2.3 From 69ecdd57434d591eb3fa4afefb72174d3e025fb9 Mon Sep 17 00:00:00 2001 From: algol Date: Tue, 6 Mar 2018 12:48:21 +0000 Subject: FGP_CPU (Cythonized) now works in demo --- Wrappers/Python/demo/test_cpu_regularizers.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/Wrappers/Python/demo/test_cpu_regularizers.py b/Wrappers/Python/demo/test_cpu_regularizers.py index 7f08605..1d97857 100644 --- a/Wrappers/Python/demo/test_cpu_regularizers.py +++ b/Wrappers/Python/demo/test_cpu_regularizers.py @@ -137,7 +137,7 @@ pars = {'algorithm' : TV_FGP_CPU , \ 'printingOut': 0 } -out = TV_FGP_CPU (pars['input'], +fgp = TV_FGP_CPU(pars['input'], pars['regularization_parameter'], pars['number_of_iterations'], pars['tolerance_constant'], @@ -145,7 +145,6 @@ out = TV_FGP_CPU (pars['input'], pars['nonneg'], pars['printingOut']) -fgp = out[0] rms = rmse(Im, fgp) pars['rmse'] = rms @@ -154,7 +153,7 @@ txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time) print (txtstr) -a=fig.add_subplot(2,4,4) +a=fig.add_subplot(2,4,3) # these are matplotlib.patch.Patch properties props = dict(boxstyle='round', facecolor='wheat', alpha=0.5) @@ -168,7 +167,6 @@ a.text(0.05, 0.95, txtstr, transform=a.transAxes, fontsize=14, ###################### LLT_model ######################################### -""" start_time = timeit.default_timer() pars = {'algorithm': LLT_model , \ @@ -203,8 +201,6 @@ a.text(0.05, 0.95, txtstr, transform=a.transAxes, fontsize=14, imgplot = plt.imshow(llt,\ cmap="gray" ) -""" - # ###################### PatchBased_Regul ######################################### # # Quick 2D denoising example in Matlab: # # Im = double(imread('lena_gray_256.tif'))/255; % loading image @@ -286,7 +282,7 @@ start_time = timeit.default_timer() pars = {'algorithm': TV_ROF_CPU , \ 'input' : u0,\ - 'regularization_parameter':0.04,\ + 'regularization_parameter':0.07,\ 'marching_step': 0.0025,\ 'number_of_iterations': 300 } -- cgit v1.2.3 From 5411ebbd4165c81b398b010d6ad9d11d2e973aad Mon Sep 17 00:00:00 2001 From: Daniil Kazantsev Date: Tue, 6 Mar 2018 14:40:11 +0000 Subject: work on cythonization2 --- Core/regularizers_GPU/TV_ROF/TV_ROF_GPU_core.cu | 17 ++--- Core/regularizers_GPU/TV_ROF/TV_ROF_GPU_core.h | 2 +- Wrappers/Python/src/gpu_regularizers.pyx | 97 ++++++++++++++----------- 3 files changed, 65 insertions(+), 51 deletions(-) diff --git a/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU_core.cu b/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU_core.cu index 480855f..a30a89b 100755 --- a/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU_core.cu +++ b/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU_core.cu @@ -304,12 +304,11 @@ __host__ __device__ int sign (float x) ///////////////////////////////////////////////// // HOST FUNCTION -extern "C" void TV_ROF_GPU(float* Input, float* Output, int N, int M, int Z, int iter, float tau, float lambda) +extern "C" void TV_ROF_GPU_main(float* Input, float* Output, float lambdaPar, int iter, float tau, int N, int M, int Z) { // set up device int dev = 0; CHECK(cudaSetDevice(dev)); - float *d_input, *d_update, *d_D1, *d_D2; if (Z == 0) Z = 1; @@ -331,14 +330,14 @@ extern "C" void TV_ROF_GPU(float* Input, float* Output, int N, int M, int Z, int for(int n=0; n < iter; n++) { /* calculate differences */ - D1_func3D<<>>(d_update, d_D1, N, M, Z); + D1_func3D<<>>(d_update, d_D1, N, M, Z); CHECK(cudaDeviceSynchronize()); - D2_func3D<<>>(d_update, d_D2, N, M, Z); + D2_func3D<<>>(d_update, d_D2, N, M, Z); CHECK(cudaDeviceSynchronize()); - D3_func3D<<>>(d_update, d_D3, N, M, Z); + D3_func3D<<>>(d_update, d_D3, N, M, Z); CHECK(cudaDeviceSynchronize()); /*running main kernel*/ - TV_kernel3D<<>>(d_D1, d_D2, d_D3, d_update, d_input, lambda, tau, N, M, Z); + TV_kernel3D<<>>(d_D1, d_D2, d_D3, d_update, d_input, lambdaPar, tau, N, M, Z); CHECK(cudaDeviceSynchronize()); } @@ -351,12 +350,12 @@ extern "C" void TV_ROF_GPU(float* Input, float* Output, int N, int M, int Z, int for(int n=0; n < iter; n++) { /* calculate differences */ - D1_func2D<<>>(d_update, d_D1, N, M); + D1_func2D<<>>(d_update, d_D1, N, M); CHECK(cudaDeviceSynchronize()); - D2_func2D<<>>(d_update, d_D2, N, M); + D2_func2D<<>>(d_update, d_D2, N, M); CHECK(cudaDeviceSynchronize()); /*running main kernel*/ - TV_kernel2D<<>>(d_D1, d_D2, d_update, d_input, lambda, tau, N, M); + TV_kernel2D<<>>(d_D1, d_D2, d_update, d_input, lambdaPar, tau, N, M); CHECK(cudaDeviceSynchronize()); } } diff --git a/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU_core.h b/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU_core.h index 8b64d99..d772aba 100755 --- a/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU_core.h +++ b/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU_core.h @@ -3,6 +3,6 @@ #include "CCPiDefines.h" #include -extern "C" CCPI_EXPORT void TV_ROF_GPU(float* Input, float* Output, int N, int M, int Z, int iter, float tau, float lambda); +extern "C" CCPI_EXPORT void TV_ROF_GPU_main(float* Input, float* Output, float lambdaPar, int iter, float tau, int N, int M, int Z); #endif diff --git a/Wrappers/Python/src/gpu_regularizers.pyx b/Wrappers/Python/src/gpu_regularizers.pyx index e99bfa7..cb94e86 100644 --- a/Wrappers/Python/src/gpu_regularizers.pyx +++ b/Wrappers/Python/src/gpu_regularizers.pyx @@ -11,7 +11,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. -Author: Edoardo Pasca +Author: Edoardo Pasca, Daniil Kazantsev """ import cython @@ -25,14 +25,16 @@ cdef extern void NLM_GPU_kernel(float *A, float* B, float *Eucl_Vec, int N, int M, int Z, int dimension, int SearchW, int SimilW, int SearchW_real, float denh2, float lambdaf); -cdef extern void TV_ROF_GPU(float* Input, float* Output, int N, int M, int Z, int iter, float tau, float lambdaf); -cdef extern void TV_FGP_GPU(float *Input, float *Output, float lambdaPar, int iter, float epsil, int methodTV, int nonneg, int printM, int N, int M, int Z); +cdef extern void TV_ROF_GPU_main(float* Input, float* Output, float lambdaPar, int iter, float tau, int N, int M, int Z); +#cdef extern void TV_FGP_GPU(float *Input, float *Output, float lambdaPar, int iter, float epsil, int methodTV, int nonneg, int printM, int N, int M, int Z); +# correct the function +cdef extern void TV_FGP_GPU_main(float *Input, float *Output, float lambdaPar, int iter, float epsil, int methodTV, int nonneg, int printM, int N, int M, int Z); cdef extern float pad_crop(float *A, float *Ap, int OldSizeX, int OldSizeY, int OldSizeZ, int NewSizeX, int NewSizeY, int NewSizeZ, int padXY, int switchpad_crop); - +#Diffusion 4th order regularizer def Diff4thHajiaboli(inputData, edge_preserv_parameter, iterations, @@ -50,7 +52,7 @@ def Diff4thHajiaboli(inputData, iterations, time_marching_parameter, regularization_parameter) - +# patch-based nonlocal regularization def NML(inputData, SearchW_real, SimilW, @@ -68,23 +70,37 @@ def NML(inputData, SimilW, h, lambdaf) - -def GPU_ROF_TV(inputData, +# Total-variation Rudin-Osher-Fatemi (ROF) +def TV_ROF_GPU(inputData, + regularization_parameter, iterations, - time_marching_parameter, - regularization_parameter): + time_marching_parameter): if inputData.ndim == 2: return ROFTV2D(inputData, - iterations, - time_marching_parameter, - regularization_parameter) + regularization_parameter, + iterations, + time_marching_parameter) elif inputData.ndim == 3: return ROFTV3D(inputData, + regularization_parameter iterations, - time_marching_parameter, - regularization_parameter) - - + time_marching_parameter) +# Total-variation Fast-Gradient-Projection (FGP) +def TV_FGP_GPU(inputData, + regularization_parameter, + iterations, + time_marching_parameter): + if inputData.ndim == 2: + return FGPTV2D(inputData, + regularization_parameter, + iterations, + time_marching_parameter) + elif inputData.ndim == 3: + return FGPTV3D(inputData, + regularization_parameter + iterations, + time_marching_parameter) +#****************************************************************# def Diff4thHajiaboli2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, float edge_preserv_parameter, int iterations, @@ -333,52 +349,51 @@ def NML3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, return B def ROFTV2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, + float regularization_parameter, int iterations, - float time_marching_parameter, - float regularization_parameter): + float time_marching_parameter): cdef long dims[2] dims[0] = inputData.shape[0] dims[1] = inputData.shape[1] - cdef np.ndarray[np.float32_t, ndim=2, mode="c"] B = \ + cdef np.ndarray[np.float32_t, ndim=2, mode="c"] outputData = \ np.zeros([dims[0],dims[1]], dtype='float32') # Running CUDA code here - TV_ROF_GPU( - &inputData[0,0], &B[0,0], - dims[0], dims[1], 1, + TV_ROF_GPU_main( + &inputData[0,0], &outputData[0,0], + regularization_parameter, iterations , time_marching_parameter, - regularization_parameter); + dims[0], dims[1], 1); - return B + return outputData def ROFTV3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, + float regularization_parameter, int iterations, - float time_marching_parameter, - float regularization_parameter): + float time_marching_parameter): cdef long dims[3] dims[0] = inputData.shape[0] dims[1] = inputData.shape[1] dims[2] = inputData.shape[2] - cdef np.ndarray[np.float32_t, ndim=3, mode="c"] B = \ + cdef np.ndarray[np.float32_t, ndim=3, mode="c"] outputData = \ np.zeros([dims[0],dims[1],dims[2]], dtype='float32') # Running CUDA code here - TV_ROF_GPU( - &inputData[0,0,0], &B[0,0,0], - dims[0], dims[1], dims[2], + TV_ROF_GPU_main( + &inputData[0,0,0], &outputData[0,0,0], + regularization_parameter, iterations , time_marching_parameter, - regularization_parameter); + dims[0], dims[1], dims[2]); - return B - + return outputData -def TVFGP2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, +def FGPTV2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, float regularization_parameter, int iterations, float tolerance_param, @@ -390,12 +405,12 @@ def TVFGP2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, dims[0] = inputData.shape[0] dims[1] = inputData.shape[1] - cdef np.ndarray[np.float32_t, ndim=2, mode="c"] B = \ + cdef np.ndarray[np.float32_t, ndim=2, mode="c"] outputData = \ np.zeros([dims[0],dims[1]], dtype='float32') # Running CUDA code here TV_FGP_GPU( - &inputData[0,0], &B[0,0], + &inputData[0,0], &outputData[0,0], regularization_parameter, iterations, tolerance_param, @@ -404,9 +419,9 @@ def TVFGP2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, printM, dims[0], dims[1], 1); - return B + return outputData -def TVFGP3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, +def FGPTV3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, float regularization_parameter, int iterations, float tolerance_param, @@ -419,12 +434,12 @@ def TVFGP3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, dims[1] = inputData.shape[1] dims[2] = inputData.shape[2] - cdef np.ndarray[np.float32_t, ndim=3, mode="c"] B = \ + cdef np.ndarray[np.float32_t, ndim=3, mode="c"] outputData = \ np.zeros([dims[0],dims[1],dims[2]], dtype='float32') # Running CUDA code here TV_FGP_GPU( - &inputData[0,0,0], &B[0,0,0], + &inputData[0,0,0], &outputData[0,0,0], regularization_parameter , iterations, tolerance_param, @@ -433,7 +448,7 @@ def TVFGP3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, printM, dims[0], dims[1], dims[2]); - return B + return outputData -- cgit v1.2.3 From 63ad8306c261a90c572d95084bf1dd8db2b3dce7 Mon Sep 17 00:00:00 2001 From: algol Date: Tue, 6 Mar 2018 16:18:20 +0000 Subject: FGP/ROF-CPU/GPU fully working with new interfaces and regularizers.py script. This closes #39 and #38 --- Core/regularizers_GPU/TV_FGP/TV_FGP_GPU_core.cu | 2 +- Core/regularizers_GPU/TV_FGP/TV_FGP_GPU_core.h | 2 +- Wrappers/Python/ccpi/filters/regularizers.py | 44 +++++++ Wrappers/Python/src/gpu_regularizers.pyx | 43 ++++--- Wrappers/Python/test/test_cpu_vs_gpu.py | 10 -- .../Python/test/test_cpu_vs_gpu_regularizers.py | 137 ++++++++++++++++++--- Wrappers/Python/test/test_gpu_regularizers.py | 16 +-- 7 files changed, 198 insertions(+), 56 deletions(-) create mode 100644 Wrappers/Python/ccpi/filters/regularizers.py delete mode 100644 Wrappers/Python/test/test_cpu_vs_gpu.py diff --git a/Core/regularizers_GPU/TV_FGP/TV_FGP_GPU_core.cu b/Core/regularizers_GPU/TV_FGP/TV_FGP_GPU_core.cu index cbe9b5e..61097fb 100755 --- a/Core/regularizers_GPU/TV_FGP/TV_FGP_GPU_core.cu +++ b/Core/regularizers_GPU/TV_FGP/TV_FGP_GPU_core.cu @@ -341,7 +341,7 @@ __global__ void copy_kernel3D(float *Input, float* Output, int N, int M, int Z, /*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/ ////////////MAIN HOST FUNCTION /////////////// -extern "C" void TV_FGP_GPU(float *Input, float *Output, float lambdaPar, int iter, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ) +extern "C" void TV_FGP_GPU_main(float *Input, float *Output, float lambdaPar, int iter, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ) { int deviceCount = -1; // number of devices cudaGetDeviceCount(&deviceCount); diff --git a/Core/regularizers_GPU/TV_FGP/TV_FGP_GPU_core.h b/Core/regularizers_GPU/TV_FGP/TV_FGP_GPU_core.h index 2b8fdcf..107d243 100755 --- a/Core/regularizers_GPU/TV_FGP/TV_FGP_GPU_core.h +++ b/Core/regularizers_GPU/TV_FGP/TV_FGP_GPU_core.h @@ -5,6 +5,6 @@ #ifndef _TV_FGP_GPU_ #define _TV_FGP_GPU_ -extern "C" void TV_FGP_GPU(float *Input, float *Output, float lambdaPar, int iter, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ); +extern "C" void TV_FGP_GPU_main(float *Input, float *Output, float lambdaPar, int iter, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ); #endif diff --git a/Wrappers/Python/ccpi/filters/regularizers.py b/Wrappers/Python/ccpi/filters/regularizers.py new file mode 100644 index 0000000..d6dfa8c --- /dev/null +++ b/Wrappers/Python/ccpi/filters/regularizers.py @@ -0,0 +1,44 @@ +""" +script which assigns a proper device core function based on a flag ('cpu' or 'gpu') +""" + +from ccpi.filters.cpu_regularizers_cython import TV_ROF_CPU, TV_FGP_CPU +from ccpi.filters.gpu_regularizers import TV_ROF_GPU, TV_FGP_GPU + +def ROF_TV(inputData, regularization_parameter, iterations, + time_marching_parameter,device='cpu'): + if device == 'cpu': + return TV_ROF_CPU(inputData, + regularization_parameter, + iterations, + time_marching_parameter) + elif device == 'gpu': + return TV_ROF_GPU(inputData, + regularization_parameter, + iterations, + time_marching_parameter) + else: + raise ValueError('Unknown device {0}. Expecting gpu or cpu'\ + .format(device)) + +def FGP_TV(inputData, regularization_parameter,iterations, + tolerance_param, methodTV, nonneg, printM, device='cpu'): + if device == 'cpu': + return TV_FGP_CPU(inputData, + regularization_parameter, + iterations, + tolerance_param, + methodTV, + nonneg, + printM) + elif device == 'gpu': + return TV_FGP_GPU(inputData, + regularization_parameter, + iterations, + tolerance_param, + methodTV, + nonneg, + printM) + else: + raise ValueError('Unknown device {0}. Expecting gpu or cpu'\ + .format(device)) \ No newline at end of file diff --git a/Wrappers/Python/src/gpu_regularizers.pyx b/Wrappers/Python/src/gpu_regularizers.pyx index cb94e86..f96156a 100644 --- a/Wrappers/Python/src/gpu_regularizers.pyx +++ b/Wrappers/Python/src/gpu_regularizers.pyx @@ -26,14 +26,14 @@ cdef extern void NLM_GPU_kernel(float *A, float* B, float *Eucl_Vec, int SearchW, int SimilW, int SearchW_real, float denh2, float lambdaf); cdef extern void TV_ROF_GPU_main(float* Input, float* Output, float lambdaPar, int iter, float tau, int N, int M, int Z); -#cdef extern void TV_FGP_GPU(float *Input, float *Output, float lambdaPar, int iter, float epsil, int methodTV, int nonneg, int printM, int N, int M, int Z); -# correct the function cdef extern void TV_FGP_GPU_main(float *Input, float *Output, float lambdaPar, int iter, float epsil, int methodTV, int nonneg, int printM, int N, int M, int Z); +# padding function cdef extern float pad_crop(float *A, float *Ap, int OldSizeX, int OldSizeY, int OldSizeZ, int NewSizeX, int NewSizeY, int NewSizeZ, int padXY, int switchpad_crop); + #Diffusion 4th order regularizer def Diff4thHajiaboli(inputData, edge_preserv_parameter, @@ -70,6 +70,7 @@ def NML(inputData, SimilW, h, lambdaf) + # Total-variation Rudin-Osher-Fatemi (ROF) def TV_ROF_GPU(inputData, regularization_parameter, @@ -82,24 +83,34 @@ def TV_ROF_GPU(inputData, time_marching_parameter) elif inputData.ndim == 3: return ROFTV3D(inputData, - regularization_parameter + regularization_parameter, iterations, time_marching_parameter) + # Total-variation Fast-Gradient-Projection (FGP) def TV_FGP_GPU(inputData, regularization_parameter, iterations, - time_marching_parameter): + tolerance_param, + methodTV, + nonneg, + printM): if inputData.ndim == 2: - return FGPTV2D(inputData, + return FGPTV2D(inputData, regularization_parameter, - iterations, - time_marching_parameter) + iterations, + tolerance_param, + methodTV, + nonneg, + printM) elif inputData.ndim == 3: - return FGPTV3D(inputData, - regularization_parameter + return FGPTV3D(inputData, + regularization_parameter, iterations, - time_marching_parameter) + tolerance_param, + methodTV, + nonneg, + printM) #****************************************************************# def Diff4thHajiaboli2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, float edge_preserv_parameter, @@ -347,7 +358,8 @@ def NML3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, switchpad_crop) return B - + +# Total-variation ROF def ROFTV2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, float regularization_parameter, int iterations, @@ -393,6 +405,7 @@ def ROFTV3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, return outputData +# Total-variation Fast-Gradient-Projection (FGP) def FGPTV2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, float regularization_parameter, int iterations, @@ -409,8 +422,7 @@ def FGPTV2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, np.zeros([dims[0],dims[1]], dtype='float32') # Running CUDA code here - TV_FGP_GPU( - &inputData[0,0], &outputData[0,0], + TV_FGP_GPU_main(&inputData[0,0], &outputData[0,0], regularization_parameter, iterations, tolerance_param, @@ -438,7 +450,7 @@ def FGPTV3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, np.zeros([dims[0],dims[1],dims[2]], dtype='float32') # Running CUDA code here - TV_FGP_GPU( + TV_FGP_GPU_main( &inputData[0,0,0], &outputData[0,0,0], regularization_parameter , iterations, @@ -449,6 +461,3 @@ def FGPTV3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, dims[0], dims[1], dims[2]); return outputData - - - diff --git a/Wrappers/Python/test/test_cpu_vs_gpu.py b/Wrappers/Python/test/test_cpu_vs_gpu.py deleted file mode 100644 index 74d65dd..0000000 --- a/Wrappers/Python/test/test_cpu_vs_gpu.py +++ /dev/null @@ -1,10 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" -Created on Wed Feb 21 12:12:22 2018 - -# CPU vs GPU comparison tests - -@author: algol -""" - diff --git a/Wrappers/Python/test/test_cpu_vs_gpu_regularizers.py b/Wrappers/Python/test/test_cpu_vs_gpu_regularizers.py index e162afa..a9d0f31 100644 --- a/Wrappers/Python/test/test_cpu_vs_gpu_regularizers.py +++ b/Wrappers/Python/test/test_cpu_vs_gpu_regularizers.py @@ -5,15 +5,17 @@ Created on Thu Feb 22 11:39:43 2018 Testing CPU implementation against GPU one -@author: algol +@author: Daniil Kazantsev """ import matplotlib.pyplot as plt import numpy as np -import os +import os import timeit -from ccpi.filters.gpu_regularizers import Diff4thHajiaboli, NML, GPU_ROF_TV -from ccpi.filters.cpu_regularizers_cython import TV_ROF_CPU +#from ccpi.filters.cpu_regularizers_cython import TV_ROF_CPU, TV_FGP_CPU +#from ccpi.filters.gpu_regularizers import TV_ROF_GPU, TV_FGP_GPU +from ccpi.filters.regularizers import ROF_TV, FGP_TV + ############################################################################### def printParametersToString(pars): txt = r'' @@ -47,6 +49,11 @@ u0 = Im + np.random.normal(loc = Im , f = np.frompyfunc(lambda x: 0 if x < 0 else x, 1,1) u0 = f(u0).astype('float32') + +print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") +print ("____________ROF-TV bench___________________") +print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") + ## plot fig = plt.figure(1) plt.suptitle('Comparison of ROF-TV regularizer using CPU and GPU implementations') @@ -54,22 +61,19 @@ a=fig.add_subplot(1,4,1) a.set_title('Noisy Image') imgplot = plt.imshow(u0,cmap="gray") - # set parameters -pars = {'algorithm': TV_ROF_CPU , \ +pars = {'algorithm': ROF_TV, \ 'input' : u0,\ 'regularization_parameter':0.04,\ - 'time_marching_parameter': 0.0025,\ - 'number_of_iterations': 1200 + 'number_of_iterations': 1200,\ + 'time_marching_parameter': 0.0025 } -print ("#################ROF TV CPU#####################") +print ("#############ROF TV CPU####################") start_time = timeit.default_timer() -rof_cpu = TV_ROF_CPU(pars['input'], - pars['number_of_iterations'], +rof_cpu = ROF_TV(pars['input'], pars['regularization_parameter'], - pars['time_marching_parameter'] - ) -#tgv = out + pars['number_of_iterations'], + pars['time_marching_parameter'],'cpu') rms = rmse(Im, rof_cpu) pars['rmse'] = rms @@ -87,16 +91,16 @@ imgplot = plt.imshow(rof_cpu, cmap="gray") plt.title('{}'.format('CPU results')) -print ("#################ROF TV GPU#####################") +print ("##############ROF TV GPU##################") start_time = timeit.default_timer() -rof_gpu = GPU_ROF_TV(pars['input'], +rof_gpu = ROF_TV(pars['input'], + pars['regularization_parameter'], pars['number_of_iterations'], - pars['time_marching_parameter'], - pars['regularization_parameter']) + pars['time_marching_parameter'],'gpu') rms = rmse(Im, rof_gpu) pars['rmse'] = rms -pars['algorithm'] = GPU_ROF_TV +pars['algorithm'] = ROF_TV txtstr = printParametersToString(pars) txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time) print (txtstr) @@ -112,7 +116,8 @@ plt.title('{}'.format('GPU results')) print ("--------Compare the results--------") -tolerance = 1e-06 +tolerance = 1e-05 +diff_im = np.zeros(np.shape(rof_cpu)) diff_im = abs(rof_cpu - rof_gpu) diff_im[diff_im > tolerance] = 1 a=fig.add_subplot(1,4,4) @@ -122,3 +127,95 @@ if (diff_im.sum() > 1): print ("Arrays do not match!") else: print ("Arrays match") + +print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") +print ("____________FGP-TV bench___________________") +print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") + +## plot +fig = plt.figure(2) +plt.suptitle('Comparison of FGP-TV regularizer using CPU and GPU implementations') +a=fig.add_subplot(1,4,1) +a.set_title('Noisy Image') +imgplot = plt.imshow(u0,cmap="gray") + +# set parameters +pars = {'algorithm' : FGP_TV, \ + 'input' : u0,\ + 'regularization_parameter':0.04, \ + 'number_of_iterations' :1200 ,\ + 'tolerance_constant':0.00001,\ + 'methodTV': 0 ,\ + 'nonneg': 0 ,\ + 'printingOut': 0 + } + +print ("#############FGP TV CPU####################") +start_time = timeit.default_timer() +fgp_cpu = FGP_TV(pars['input'], + pars['regularization_parameter'], + pars['number_of_iterations'], + pars['tolerance_constant'], + pars['methodTV'], + pars['nonneg'], + pars['printingOut'],'cpu') + + +rms = rmse(Im, fgp_cpu) +pars['rmse'] = rms + +txtstr = printParametersToString(pars) +txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time) +print (txtstr) +a=fig.add_subplot(1,4,2) + +# these are matplotlib.patch.Patch properties +props = dict(boxstyle='round', facecolor='wheat', alpha=0.75) +# place a text box in upper left in axes coords +a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14, + verticalalignment='top', bbox=props) +imgplot = plt.imshow(fgp_cpu, cmap="gray") +plt.title('{}'.format('CPU results')) + + +print ("##############FGP TV GPU##################") +start_time = timeit.default_timer() +fgp_gpu = FGP_TV(pars['input'], + pars['regularization_parameter'], + pars['number_of_iterations'], + pars['tolerance_constant'], + pars['methodTV'], + pars['nonneg'], + pars['printingOut'],'gpu') + +rms = rmse(Im, fgp_gpu) +pars['rmse'] = rms +pars['algorithm'] = FGP_TV +txtstr = printParametersToString(pars) +txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time) +print (txtstr) +a=fig.add_subplot(1,4,3) + +# these are matplotlib.patch.Patch properties +props = dict(boxstyle='round', facecolor='wheat', alpha=0.75) +# place a text box in upper left in axes coords +a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14, + verticalalignment='top', bbox=props) +imgplot = plt.imshow(fgp_gpu, cmap="gray") +plt.title('{}'.format('GPU results')) + + +print ("--------Compare the results--------") +tolerance = 1e-05 +diff_im = np.zeros(np.shape(rof_cpu)) +diff_im = abs(fgp_cpu - fgp_gpu) +diff_im[diff_im > tolerance] = 1 +a=fig.add_subplot(1,4,4) +imgplot = plt.imshow(diff_im, vmin=0, vmax=1, cmap="gray") +plt.title('{}'.format('Pixels larger threshold difference')) +if (diff_im.sum() > 1): + print ("Arrays do not match!") +else: + print ("Arrays match") + + diff --git a/Wrappers/Python/test/test_gpu_regularizers.py b/Wrappers/Python/test/test_gpu_regularizers.py index c473270..04aeeb4 100644 --- a/Wrappers/Python/test/test_gpu_regularizers.py +++ b/Wrappers/Python/test/test_gpu_regularizers.py @@ -11,7 +11,8 @@ import numpy as np import os from enum import Enum import timeit -from ccpi.filters.gpu_regularizers import Diff4thHajiaboli, NML, GPU_ROF_TV +from ccpi.filters.gpu_regularizers import Diff4thHajiaboli, NML, TV_ROF_GPU + ############################################################################### def printParametersToString(pars): txt = r'' @@ -152,17 +153,18 @@ plt.colorbar(ticks=[0, 0.03], orientation='vertical') start_time = timeit.default_timer() pars = { -'algorithm' : GPU_ROF_TV , \ +'algorithm' : TV_ROF_GPU , \ 'input' : u0, 'regularization_parameter': 0.04,\ - 'time_marching_parameter': 0.0025, \ - 'number_of_iterations':300 + 'number_of_iterations':300,\ + 'time_marching_parameter': 0.0025 + } -rof_tv = GPU_ROF_TV(pars['input'], +rof_tv = TV_ROF_GPU(pars['input'], + pars['regularization_parameter'], pars['number_of_iterations'], - pars['time_marching_parameter'], - pars['regularization_parameter']) + pars['time_marching_parameter']) rms = rmse(Im, rof_tv) pars['rmse'] = rms -- cgit v1.2.3 From 4593040d228fec5ef1f1a301e511708d47f0d55e Mon Sep 17 00:00:00 2001 From: algol Date: Tue, 6 Mar 2018 16:51:31 +0000 Subject: demos fixed #40 --- Wrappers/Python/demo/test_cpu_regularizers.py | 21 +- Wrappers/Python/src/cpu_regularizers.cpp | 289 --------------------- .../Python/test/test_cpu_vs_gpu_regularizers.py | 4 +- Wrappers/Python/test/test_gpu_regularizers.py | 59 ++++- 4 files changed, 63 insertions(+), 310 deletions(-) diff --git a/Wrappers/Python/demo/test_cpu_regularizers.py b/Wrappers/Python/demo/test_cpu_regularizers.py index 1d97857..76b9de7 100644 --- a/Wrappers/Python/demo/test_cpu_regularizers.py +++ b/Wrappers/Python/demo/test_cpu_regularizers.py @@ -11,10 +11,8 @@ import numpy as np import os from enum import Enum import timeit -from ccpi.filters.cpu_regularizers_boost import SplitBregman_TV, LLT_model, PatchBased_Regul ,\ - TGV_PD -from ccpi.filters.cpu_regularizers_cython import TV_ROF_CPU, TV_FGP_CPU - +from ccpi.filters.cpu_regularizers_boost import SplitBregman_TV, LLT_model, PatchBased_Regul, TGV_PD +from ccpi.filters.regularizers import ROF_TV, FGP_TV ############################################################################### #https://stackoverflow.com/questions/13875989/comparing-image-in-url-to-image-in-filesystem-in-python/13884956#13884956 #NRMSE a normalization of the root of the mean squared error @@ -127,7 +125,7 @@ imgplot = plt.imshow(splitbregman,\ ###################### FGP_TV ######################################### start_time = timeit.default_timer() -pars = {'algorithm' : TV_FGP_CPU , \ +pars = {'algorithm' : FGP_TV , \ 'input' : u0,\ 'regularization_parameter':0.07, \ 'number_of_iterations' :300 ,\ @@ -137,13 +135,13 @@ pars = {'algorithm' : TV_FGP_CPU , \ 'printingOut': 0 } -fgp = TV_FGP_CPU(pars['input'], +fgp = FGP_TV(pars['input'], pars['regularization_parameter'], pars['number_of_iterations'], pars['tolerance_constant'], pars['methodTV'], pars['nonneg'], - pars['printingOut']) + pars['printingOut'], 'cpu') rms = rmse(Im, fgp) pars['rmse'] = rms @@ -280,18 +278,17 @@ imgplot = plt.imshow(tgv, cmap="gray") start_time = timeit.default_timer() -pars = {'algorithm': TV_ROF_CPU , \ +pars = {'algorithm': ROF_TV , \ 'input' : u0,\ 'regularization_parameter':0.07,\ 'marching_step': 0.0025,\ 'number_of_iterations': 300 } -rof = TV_ROF_CPU(pars['input'], +rof = ROF_TV(pars['input'], pars['regularization_parameter'], pars['number_of_iterations'], - pars['marching_step'] - ) -#tgv = out + pars['marching_step'], 'cpu') + rms = rmse(Im, rof) pars['rmse'] = rms diff --git a/Wrappers/Python/src/cpu_regularizers.cpp b/Wrappers/Python/src/cpu_regularizers.cpp index b8156d5..88b7c3c 100644 --- a/Wrappers/Python/src/cpu_regularizers.cpp +++ b/Wrappers/Python/src/cpu_regularizers.cpp @@ -303,292 +303,6 @@ bp::list SplitBregman_TV(np::ndarray input, double d_mu, int iter, double d_epsi } - - -//bp::list FGP_TV(np::ndarray input, double d_mu, int iter, double d_epsil, int methTV) { - - //// the result is in the following list - //bp::list result; - - //int number_of_dims, dimX, dimY, dimZ, ll, j, count; - //float *A, *D = NULL, *D_old = NULL, *P1 = NULL, *P2 = NULL, *P3 = NULL, *P1_old = NULL, *P2_old = NULL, *P3_old = NULL, *R1 = NULL, *R2 = NULL, *R3 = NULL; - //float lambda, tk, tkp1, re, re1, re_old, epsil, funcval; - - ////number_of_dims = mxGetNumberOfDimensions(prhs[0]); - ////dim_array = mxGetDimensions(prhs[0]); - - //number_of_dims = input.get_nd(); - //int dim_array[3]; - - //dim_array[0] = input.shape(0); - //dim_array[1] = input.shape(1); - //if (number_of_dims == 2) { - //dim_array[2] = -1; - //} - //else { - //dim_array[2] = input.shape(2); - //} - //// Parameter handling is be done in Python - /////*Handling Matlab input data*/ - ////if ((nrhs < 2) || (nrhs > 5)) mexErrMsgTxt("At least 2 parameters is required: Image(2D/3D), Regularization parameter. The full list of parameters: Image(2D/3D), Regularization parameter, iterations number, tolerance, penalty type ('iso' or 'l1')"); - - /////*Handling Matlab input data*/ - ////A = (float *)mxGetData(prhs[0]); /*noisy image (2D/3D) */ - //A = reinterpret_cast(input.get_data()); - - ////mu = (float)mxGetScalar(prhs[1]); /* regularization parameter */ - //lambda = (float)d_mu; - - ////iter = 35; /* default iterations number */ - - ////epsil = 0.0001; /* default tolerance constant */ - //epsil = (float)d_epsil; - ////methTV = 0; /* default isotropic TV penalty */ - ////if ((nrhs == 3) || (nrhs == 4) || (nrhs == 5)) iter = (int)mxGetScalar(prhs[2]); /* iterations number */ - ////if ((nrhs == 4) || (nrhs == 5)) epsil = (float)mxGetScalar(prhs[3]); /* tolerance constant */ - ////if (nrhs == 5) { - //// char *penalty_type; - //// penalty_type = mxArrayToString(prhs[4]); /* choosing TV penalty: 'iso' or 'l1', 'iso' is the default */ - //// if ((strcmp(penalty_type, "l1") != 0) && (strcmp(penalty_type, "iso") != 0)) mexErrMsgTxt("Choose TV type: 'iso' or 'l1',"); - //// if (strcmp(penalty_type, "l1") == 0) methTV = 1; /* enable 'l1' penalty */ - //// mxFree(penalty_type); - ////} - ////if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) { mexErrMsgTxt("The input image must be in a single precision"); } - - ////plhs[1] = mxCreateNumericMatrix(1, 1, mxSINGLE_CLASS, mxREAL); - //bp::tuple shape1 = bp::make_tuple(dim_array[0], dim_array[1]); - //np::dtype dtype = np::dtype::get_builtin(); - //np::ndarray out1 = np::zeros(shape1, dtype); - - ////float *funcvalA = (float *)mxGetData(plhs[1]); - //float * funcvalA = reinterpret_cast(out1.get_data()); - ////if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) { mexErrMsgTxt("The input image must be in a single precision"); } - - ///*Handling Matlab output data*/ - //dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2]; - - //tk = 1.0f; - //tkp1 = 1.0f; - //count = 1; - //re_old = 0.0f; - - //if (number_of_dims == 2) { - //dimZ = 1; /*2D case*/ - ///*D = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); - //D_old = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); - //P1 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); - //P2 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); - //P1_old = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); - //P2_old = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); - //R1 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); - //R2 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));*/ - - //bp::tuple shape = bp::make_tuple(dim_array[0], dim_array[1]); - //np::dtype dtype = np::dtype::get_builtin(); - - - //np::ndarray npD = np::zeros(shape, dtype); - //np::ndarray npD_old = np::zeros(shape, dtype); - //np::ndarray npP1 = np::zeros(shape, dtype); - //np::ndarray npP2 = np::zeros(shape, dtype); - //np::ndarray npP1_old = np::zeros(shape, dtype); - //np::ndarray npP2_old = np::zeros(shape, dtype); - //np::ndarray npR1 = np::zeros(shape, dtype); - //np::ndarray npR2 = np::zeros(shape, dtype); - - //D = reinterpret_cast(npD.get_data()); - //D_old = reinterpret_cast(npD_old.get_data()); - //P1 = reinterpret_cast(npP1.get_data()); - //P2 = reinterpret_cast(npP2.get_data()); - //P1_old = reinterpret_cast(npP1_old.get_data()); - //P2_old = reinterpret_cast(npP2_old.get_data()); - //R1 = reinterpret_cast(npR1.get_data()); - //R2 = reinterpret_cast(npR2.get_data()); - - ///* begin iterations */ - //for (ll = 0; ll 4) { - //Obj_func2D(A, D, P1, P2, lambda, dimX, dimY); - //funcval = 0.0f; - //for (j = 0; j 2) { - //if (re > re_old) { - //Obj_func2D(A, D, P1, P2, lambda, dimX, dimY); - //funcval = 0.0f; - //for (j = 0; j(npD); - //result.append(out1); - //result.append(ll); - //} - //if (number_of_dims == 3) { - ///*D = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); - //D_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); - //P1 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); - //P2 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); - //P3 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); - //P1_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); - //P2_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); - //P3_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); - //R1 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); - //R2 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); - //R3 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));*/ - //bp::tuple shape = bp::make_tuple(dim_array[0], dim_array[1], dim_array[2]); - //np::dtype dtype = np::dtype::get_builtin(); - - //np::ndarray npD = np::zeros(shape, dtype); - //np::ndarray npD_old = np::zeros(shape, dtype); - //np::ndarray npP1 = np::zeros(shape, dtype); - //np::ndarray npP2 = np::zeros(shape, dtype); - //np::ndarray npP3 = np::zeros(shape, dtype); - //np::ndarray npP1_old = np::zeros(shape, dtype); - //np::ndarray npP2_old = np::zeros(shape, dtype); - //np::ndarray npP3_old = np::zeros(shape, dtype); - //np::ndarray npR1 = np::zeros(shape, dtype); - //np::ndarray npR2 = np::zeros(shape, dtype); - //np::ndarray npR3 = np::zeros(shape, dtype); - - //D = reinterpret_cast(npD.get_data()); - //D_old = reinterpret_cast(npD_old.get_data()); - //P1 = reinterpret_cast(npP1.get_data()); - //P2 = reinterpret_cast(npP2.get_data()); - //P3 = reinterpret_cast(npP3.get_data()); - //P1_old = reinterpret_cast(npP1_old.get_data()); - //P2_old = reinterpret_cast(npP2_old.get_data()); - //P3_old = reinterpret_cast(npP3_old.get_data()); - //R1 = reinterpret_cast(npR1.get_data()); - //R2 = reinterpret_cast(npR2.get_data()); - //R3 = reinterpret_cast(npR3.get_data()); - ///* begin iterations */ - //for (ll = 0; ll 3) { - //Obj_func3D(A, D, P1, P2, P3, lambda, dimX, dimY, dimZ); - //funcval = 0.0f; - //for (j = 0; j 2) { - //if (re > re_old) { - //Obj_func3D(A, D, P1, P2, P3, lambda, dimX, dimY, dimZ); - //funcval = 0.0f; - //for (j = 0; j(npD); - //result.append(out1); - //result.append(ll); - //} - - //return result; -//} - bp::list LLT_model(np::ndarray input, double d_lambda, double d_tau, int iter, double d_epsil, int switcher) { // the result is in the following list bp::list result; @@ -1022,9 +736,6 @@ bp::list TGV_PD(np::ndarray input, double d_lambda, double d_alpha1, double d_al result.append(npU); } - - - return result; } diff --git a/Wrappers/Python/test/test_cpu_vs_gpu_regularizers.py b/Wrappers/Python/test/test_cpu_vs_gpu_regularizers.py index a9d0f31..63be1a0 100644 --- a/Wrappers/Python/test/test_cpu_vs_gpu_regularizers.py +++ b/Wrappers/Python/test/test_cpu_vs_gpu_regularizers.py @@ -3,7 +3,7 @@ """ Created on Thu Feb 22 11:39:43 2018 -Testing CPU implementation against GPU one +Testing CPU implementation against the GPU one @author: Daniil Kazantsev """ @@ -12,8 +12,6 @@ import matplotlib.pyplot as plt import numpy as np import os import timeit -#from ccpi.filters.cpu_regularizers_cython import TV_ROF_CPU, TV_FGP_CPU -#from ccpi.filters.gpu_regularizers import TV_ROF_GPU, TV_FGP_GPU from ccpi.filters.regularizers import ROF_TV, FGP_TV ############################################################################### diff --git a/Wrappers/Python/test/test_gpu_regularizers.py b/Wrappers/Python/test/test_gpu_regularizers.py index 04aeeb4..640b3f9 100644 --- a/Wrappers/Python/test/test_gpu_regularizers.py +++ b/Wrappers/Python/test/test_gpu_regularizers.py @@ -11,8 +11,8 @@ import numpy as np import os from enum import Enum import timeit -from ccpi.filters.gpu_regularizers import Diff4thHajiaboli, NML, TV_ROF_GPU - +from ccpi.filters.gpu_regularizers import Diff4thHajiaboli, NML +from ccpi.filters.regularizers import ROF_TV, FGP_TV ############################################################################### def printParametersToString(pars): txt = r'' @@ -151,20 +151,19 @@ plt.colorbar(ticks=[0, 0.03], orientation='vertical') ## Rudin-Osher-Fatemi (ROF) TV regularization start_time = timeit.default_timer() - pars = { -'algorithm' : TV_ROF_GPU , \ +'algorithm' : ROF_TV , \ 'input' : u0, 'regularization_parameter': 0.04,\ 'number_of_iterations':300,\ 'time_marching_parameter': 0.0025 } - + rof_tv = TV_ROF_GPU(pars['input'], pars['regularization_parameter'], pars['number_of_iterations'], - pars['time_marching_parameter']) + pars['time_marching_parameter'],'gpu') rms = rmse(Im, rof_tv) pars['rmse'] = rms @@ -190,3 +189,51 @@ a.text(0.05, 0.95, 'rof_tv - u0', transform=a.transAxes, fontsize=14, imgplot = plt.imshow((rof_tv - u0)**2, vmin=0, vmax=0.03, cmap="gray") plt.colorbar(ticks=[0, 0.03], orientation='vertical') plt.show() + +## Fast-Gradient Projection TV regularization +""" +start_time = timeit.default_timer() + +pars = {'algorithm' : FGP_TV, \ + 'input' : u0,\ + 'regularization_parameter':0.04, \ + 'number_of_iterations' :1200 ,\ + 'tolerance_constant':0.00001,\ + 'methodTV': 0 ,\ + 'nonneg': 0 ,\ + 'printingOut': 0 + } + +fgp_gpu = FGP_TV(pars['input'], + pars['regularization_parameter'], + pars['number_of_iterations'], + pars['tolerance_constant'], + pars['methodTV'], + pars['nonneg'], + pars['printingOut'],'gpu') + +rms = rmse(Im, fgp_gpu) +pars['rmse'] = rms +txtstr = printParametersToString(pars) +txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time) +print (txtstr) +a=fig.add_subplot(2,4,4) + +# these are matplotlib.patch.Patch properties +props = dict(boxstyle='round', facecolor='wheat', alpha=0.75) +# place a text box in upper left in axes coords +a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=12, + verticalalignment='top', bbox=props) +imgplot = plt.imshow(fgp_gpu, cmap="gray") + +a=fig.add_subplot(2,4,8) + +# these are matplotlib.patch.Patch properties +props = dict(boxstyle='round', facecolor='wheat', alpha=0.5) +# place a text box in upper left in axes coords +a.text(0.05, 0.95, 'fgp_gpu - u0', transform=a.transAxes, fontsize=14, + verticalalignment='top', bbox=props) +imgplot = plt.imshow((fgp_gpu - u0)**2, vmin=0, vmax=0.03, cmap="gray") +plt.colorbar(ticks=[0, 0.03], orientation='vertical') +plt.show() +""" -- cgit v1.2.3 From fbb7a12f7714978c251f02bf84ab9a66c762f428 Mon Sep 17 00:00:00 2001 From: algol Date: Wed, 7 Mar 2018 10:15:13 +0000 Subject: small correct #40 --- Wrappers/Python/src/cpu_regularizers.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/Wrappers/Python/src/cpu_regularizers.cpp b/Wrappers/Python/src/cpu_regularizers.cpp index 88b7c3c..3529ebd 100644 --- a/Wrappers/Python/src/cpu_regularizers.cpp +++ b/Wrappers/Python/src/cpu_regularizers.cpp @@ -27,7 +27,6 @@ limitations under the License. #include "boost/tuple/tuple.hpp" #include "SplitBregman_TV_core.h" -//#include "FGP_TV_core.h" #include "LLT_model_core.h" #include "PatchBased_Regul_core.h" #include "TGV_PD_core.h" -- cgit v1.2.3