From 0ebf1f949b7150692e356627c455e905b642af97 Mon Sep 17 00:00:00 2001 From: Daniil Kazantsev Date: Sun, 25 Feb 2018 17:54:03 +0000 Subject: updates to CPU/GPU core for ROF and demos --- Core/regularizers_CPU/ROF_TV_core.c | 9 ++++----- Core/regularizers_GPU/TV_ROF/TV_ROF_GPU.cu | 19 +++++++++---------- Wrappers/Python/demo/test_cpu_regularizers.py | 6 +++--- Wrappers/Python/test/test_cpu_vs_gpu_regularizers.py | 6 +++--- Wrappers/Python/test/test_gpu_regularizers.py | 6 +++--- 5 files changed, 22 insertions(+), 24 deletions(-) diff --git a/Core/regularizers_CPU/ROF_TV_core.c b/Core/regularizers_CPU/ROF_TV_core.c index ba7fe48..fd47c3f 100644 --- a/Core/regularizers_CPU/ROF_TV_core.c +++ b/Core/regularizers_CPU/ROF_TV_core.c @@ -268,7 +268,7 @@ float TV_kernel(float *D1, float *D2, float *D3, float *B, float *A, float lambd dv2 = D2[index] - D2[(dimX*dimY)*k + j*dimX+i2]; dv3 = D3[index] - D3[(dimX*dimY)*k2 + j*dimX+i]; - B[index] = B[index] + tau*lambda*(dv1 + dv2 + dv3) + tau*(A[index] - B[index]); + B[index] = B[index] + tau*(2.0f*lambda*(dv1 + dv2 + dv3) - (B[index] - A[index])); }}} } else { @@ -284,10 +284,9 @@ float TV_kernel(float *D1, float *D2, float *D3, float *B, float *A, float lambd /* divergence components */ dv1 = D1[index] - D1[j2*dimX + i]; - dv2 = D2[index] - D2[j*dimX + i2]; - - //B[index] = B[index] + tau*lambda*(dv1 + dv2) + tau*(A[index] - B[index]); - B[index] = B[index] + tau*((dv1 + dv2) - lambda*(B[index] - A[index])); + dv2 = D2[index] - D2[j*dimX + i2]; + + B[index] = B[index] + tau*(2.0f*lambda*(dv1 + dv2) - (B[index] - A[index])); }} } return *B; diff --git a/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU.cu b/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU.cu index 897b5d0..b67b53b 100755 --- a/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU.cu +++ b/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU.cu @@ -54,7 +54,7 @@ limitations under the License. #define BLKXSIZE2D 16 #define BLKYSIZE2D 16 -#define EPS 1.0e-4 +#define EPS 1.0e-5 #define idivup(a, b) ( ((a)%(b) != 0) ? (a)/(b)+1 : (a)/(b) ) @@ -71,7 +71,7 @@ __host__ __device__ int sign (float x) /* differences 1 */ __global__ void D1_func2D(float* Input, float* D1, int N, int M) { - int i1, j1, i2, j2; + int i1, j1, i2; float NOMx_1,NOMy_1,NOMy_0,denom1,denom2,T1; int i = blockDim.x * blockIdx.x + threadIdx.x; int j = blockDim.y * blockIdx.y + threadIdx.y; @@ -84,7 +84,6 @@ __host__ __device__ int sign (float x) i1 = i + 1; if (i1 >= N) i1 = i-1; i2 = i - 1; if (i2 < 0) i2 = i+1; j1 = j + 1; if (j1 >= M) j1 = j-1; - j2 = j - 1; if (j2 < 0) j2 = j+1; /* Forward-backward differences */ NOMx_1 = Input[j1*N + i] - Input[index]; /* x+ */ @@ -102,7 +101,7 @@ __host__ __device__ int sign (float x) /* differences 2 */ __global__ void D2_func2D(float* Input, float* D2, int N, int M) { - int i1, j1, i2, j2; + int i1, j1, j2; float NOMx_1,NOMy_1,NOMx_0,denom1,denom2,T2; int i = blockDim.x * blockIdx.x + threadIdx.x; int j = blockDim.y * blockIdx.y + threadIdx.y; @@ -113,7 +112,6 @@ __host__ __device__ int sign (float x) /* boundary conditions (Neumann reflections) */ i1 = i + 1; if (i1 >= N) i1 = i-1; - i2 = i - 1; if (i2 < 0) i2 = i+1; j1 = j + 1; if (j1 >= M) j1 = j-1; j2 = j - 1; if (j2 < 0) j2 = j+1; @@ -149,7 +147,7 @@ __host__ __device__ int sign (float x) dv1 = D1[index] - D1[j2*N + i]; dv2 = D2[index] - D2[j*N + i2]; - Update[index] = Update[index] + tau*((dv1 + dv2) - lambda*(Update[index] - Input[index])); + Update[index] = Update[index] + tau*(2.0f*lambda*(dv1 + dv2) - (Update[index] - Input[index])); } } @@ -299,21 +297,22 @@ __host__ __device__ int sign (float x) dv2 = D2[index] - D2[(dimX*dimY)*k + j*dimX+i2]; dv3 = D3[index] - D3[(dimX*dimY)*k2 + j*dimX+i]; - Update[index] = Update[index] + tau*lambda*(dv1 + dv2 + dv3) + tau*(Update[index] - Input[index]); + Update[index] = Update[index] + tau*(2.0f*lambda*(dv1 + dv2 + dv3) - (Update[index] - Input[index])); } } ///////////////////////////////////////////////// // HOST FUNCTION -extern "C" void TV_ROF_GPU_kernel(float* Input, float* Output, int N, int M, int Z, int iter, float tau, float lambda) +extern "C" void TV_ROF_GPU(float* Input, float* Output, int N, int M, int Z, int iter, float tau, float lambda) { // set up device int dev = 0; CHECK(cudaSetDevice(dev)); - float *d_input, *d_update, *d_D1, *d_D2; + float *d_input, *d_update, *d_D1, *d_D2; + if (Z == 0) Z = 1; CHECK(cudaMalloc((void**)&d_input,N*M*Z*sizeof(float))); CHECK(cudaMalloc((void**)&d_update,N*M*Z*sizeof(float))); CHECK(cudaMalloc((void**)&d_D1,N*M*Z*sizeof(float))); @@ -346,7 +345,7 @@ extern "C" void TV_ROF_GPU_kernel(float* Input, float* Output, int N, int M, int CHECK(cudaFree(d_D3)); } else { - // TV - 2D case + // TV - 2D case dim3 dimBlock(BLKXSIZE2D,BLKYSIZE2D); dim3 dimGrid(idivup(N,BLKXSIZE2D), idivup(M,BLKYSIZE2D)); diff --git a/Wrappers/Python/demo/test_cpu_regularizers.py b/Wrappers/Python/demo/test_cpu_regularizers.py index d147b85..b08c418 100644 --- a/Wrappers/Python/demo/test_cpu_regularizers.py +++ b/Wrappers/Python/demo/test_cpu_regularizers.py @@ -284,9 +284,9 @@ start_time = timeit.default_timer() pars = {'algorithm': ROF_TV , \ 'input' : u0,\ - 'regularization_parameter':25,\ - 'marching_step': 0.001,\ - 'number_of_iterations': 350 + 'regularization_parameter':0.04,\ + 'marching_step': 0.0025,\ + 'number_of_iterations': 300 } rof = ROF_TV(pars['input'], pars['number_of_iterations'], diff --git a/Wrappers/Python/test/test_cpu_vs_gpu_regularizers.py b/Wrappers/Python/test/test_cpu_vs_gpu_regularizers.py index 8c91c73..d742a7f 100644 --- a/Wrappers/Python/test/test_cpu_vs_gpu_regularizers.py +++ b/Wrappers/Python/test/test_cpu_vs_gpu_regularizers.py @@ -58,8 +58,8 @@ imgplot = plt.imshow(u0,cmap="gray") # set parameters pars = {'algorithm': ROF_TV , \ 'input' : u0,\ - 'regularization_parameter':12,\ - 'time_marching_parameter': 0.001,\ + 'regularization_parameter':0.04,\ + 'time_marching_parameter': 0.0025,\ 'number_of_iterations': 600 } print ("#################ROF TV CPU#####################") @@ -120,4 +120,4 @@ plt.title('{}'.format('Pixels larger threshold difference')) if (diff_im.sum() > 1): print ("Arrays do not match!") else: - print ("Arrays match") \ No newline at end of file + print ("Arrays match") diff --git a/Wrappers/Python/test/test_gpu_regularizers.py b/Wrappers/Python/test/test_gpu_regularizers.py index 29f5bad..c473270 100644 --- a/Wrappers/Python/test/test_gpu_regularizers.py +++ b/Wrappers/Python/test/test_gpu_regularizers.py @@ -154,9 +154,9 @@ start_time = timeit.default_timer() pars = { 'algorithm' : GPU_ROF_TV , \ 'input' : u0, - 'regularization_parameter': 25,\ - 'time_marching_parameter': 0.001, \ - 'number_of_iterations':350 + 'regularization_parameter': 0.04,\ + 'time_marching_parameter': 0.0025, \ + 'number_of_iterations':300 } rof_tv = GPU_ROF_TV(pars['input'], -- cgit v1.2.3