summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authordkazanc <dkazanc@hotmail.com>2018-12-04 16:13:38 +0000
committerdkazanc <dkazanc@hotmail.com>2018-12-04 16:13:38 +0000
commitc9ee9ecc84881595b04f19280c93bcd587171270 (patch)
tree5074abd308c3e2f4425ee27251d242f3273f1dd4
parent8b8dfc68fa6b70ec7eefcdfb928fb383196bec97 (diff)
downloadregularization-c9ee9ecc84881595b04f19280c93bcd587171270.tar.gz
regularization-c9ee9ecc84881595b04f19280c93bcd587171270.tar.bz2
regularization-c9ee9ecc84881595b04f19280c93bcd587171270.tar.xz
regularization-c9ee9ecc84881595b04f19280c93bcd587171270.zip
GPU version, this completes implementation of nltv #68
-rw-r--r--Core/CMakeLists.txt1
-rw-r--r--Core/regularisers_CPU/PatchSelect_core.c100
-rw-r--r--Core/regularisers_GPU/PatchSelect_GPU_core.cu468
-rw-r--r--Core/regularisers_GPU/PatchSelect_GPU_core.h8
-rw-r--r--Readme.md2
-rw-r--r--Wrappers/Matlab/demos/demoMatlab_denoise.m10
-rw-r--r--Wrappers/Python/ccpi/filters/regularisers.py8
-rwxr-xr-xWrappers/Python/conda-recipe/run_test.py2
-rw-r--r--Wrappers/Python/demos/demo_cpu_regularisers.py18
-rw-r--r--Wrappers/Python/demos/demo_cpu_vs_gpu_regularisers.py55
-rw-r--r--Wrappers/Python/demos/demo_gpu_regularisers.py74
-rw-r--r--Wrappers/Python/setup-regularisers.py.in1
-rw-r--r--Wrappers/Python/src/cpu_regularisers.pyx6
-rw-r--r--Wrappers/Python/src/gpu_regularisers.pyx32
14 files changed, 716 insertions, 69 deletions
diff --git a/Core/CMakeLists.txt b/Core/CMakeLists.txt
index df01bb7..d92db82 100644
--- a/Core/CMakeLists.txt
+++ b/Core/CMakeLists.txt
@@ -120,6 +120,7 @@ if (BUILD_CUDA)
${CMAKE_CURRENT_SOURCE_DIR}/regularisers_GPU/dTV_FGP_GPU_core.cu
${CMAKE_CURRENT_SOURCE_DIR}/regularisers_GPU/NonlDiff_GPU_core.cu
${CMAKE_CURRENT_SOURCE_DIR}/regularisers_GPU/Diffus_4thO_GPU_core.cu
+ ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_GPU/PatchSelect_GPU_core.cu
)
if (UNIX)
message ("I'd install into ${CMAKE_INSTALL_PREFIX}/lib")
diff --git a/Core/regularisers_CPU/PatchSelect_core.c b/Core/regularisers_CPU/PatchSelect_core.c
index 45cf8c8..cf5cdc7 100644
--- a/Core/regularisers_CPU/PatchSelect_core.c
+++ b/Core/regularisers_CPU/PatchSelect_core.c
@@ -44,6 +44,19 @@
* 4. Weights_ijk - associated weights
*/
+void swap(float *xp, float *yp)
+{
+ float temp = *xp;
+ *xp = *yp;
+ *yp = temp;
+}
+
+void swapUS(unsigned short *xp, unsigned short *yp)
+{
+ unsigned short temp = *xp;
+ *xp = *yp;
+ *yp = temp;
+}
/**************************************************/
float PatchSelect_CPU_main(float *A, unsigned short *H_i, unsigned short *H_j, unsigned short *H_k, float *Weights, int dimX, int dimY, int dimZ, int SearchWindow, int SimilarWin, int NumNeighb, float h, int switchM)
@@ -51,8 +64,7 @@ float PatchSelect_CPU_main(float *A, unsigned short *H_i, unsigned short *H_j, u
int counterG;
long i, j, k;
float *Eucl_Vec, h2;
- h2 = h*h;
-
+ h2 = h*h;
/****************2D INPUT ***************/
if (dimZ == 0) {
/* generate a 2D Gaussian kernel for NLM procedure */
@@ -67,14 +79,14 @@ float PatchSelect_CPU_main(float *A, unsigned short *H_i, unsigned short *H_j, u
if (switchM == 1) {
#pragma omp parallel for shared (A, Weights, H_i, H_j) private(i,j)
for(i=0; i<(long)(dimX); i++) {
- for(j=0; j<(long)(dimY); j++) {
+ for(j=0; j<(long)(dimY); j++) {
Indeces2D_p(A, H_i, H_j, Weights, i, j, (long)(dimX), (long)(dimY), Eucl_Vec, NumNeighb, SearchWindow, SimilarWin, h2);
}}
}
else {
#pragma omp parallel for shared (A, Weights, H_i, H_j) private(i,j)
for(i=0; i<(long)(dimX); i++) {
- for(j=0; j<(long)(dimY); j++) {
+ for(j=0; j<(long)(dimY); j++) {
Indeces2D(A, H_i, H_j, Weights, i, j, (long)(dimX), (long)(dimY), Eucl_Vec, NumNeighb, SearchWindow, SimilarWin, h2);
}}
}
@@ -116,8 +128,8 @@ float PatchSelect_CPU_main(float *A, unsigned short *H_i, unsigned short *H_j, u
float Indeces2D(float *Aorig, unsigned short *H_i, unsigned short *H_j, float *Weights, long i, long j, long dimX, long dimY, float *Eucl_Vec, int NumNeighb, int SearchWindow, int SimilarWin, float h2)
{
long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, index, sizeWin_tot, counterG;
- float *Weight_Vec, normsum, temp;
- unsigned short *ind_i, *ind_j, temp_i, temp_j;
+ float *Weight_Vec, normsum;
+ unsigned short *ind_i, *ind_j;
sizeWin_tot = (2*SearchWindow + 1)*(2*SearchWindow + 1);
@@ -146,51 +158,43 @@ float Indeces2D(float *Aorig, unsigned short *H_i, unsigned short *H_j, float *W
}}
/* writing temporarily into vectors */
- if (normsum > EPS) {
+ if (normsum > EPS) {
Weight_Vec[counter] = expf(-normsum/h2);
ind_i[counter] = i1;
- ind_j[counter] = j1;
+ ind_j[counter] = j1;
counter++;
}
}
- }}
+ }}
/* do sorting to choose the most prominent weights [HIGH to LOW] */
/* and re-arrange indeces accordingly */
- for (x = 0; x < counter; x++) {
- for (y = 0; y < counter; y++) {
- if (Weight_Vec[y] < Weight_Vec[x]) {
- temp = Weight_Vec[y+1];
- temp_i = ind_i[y+1];
- temp_j = ind_j[y+1];
- Weight_Vec[y+1] = Weight_Vec[y];
- Weight_Vec[y] = temp;
- ind_i[y+1] = ind_i[y];
- ind_i[y] = temp_i;
- ind_j[y+1] = ind_j[y];
- ind_j[y] = temp_j;
- }}}
- /*sorting loop finished*/
-
- /*now select the NumNeighb more prominent weights and store into arrays */
+ for (x = 0; x < counter-1; x++) {
+ for (y = 0; y < counter-x-1; y++) {
+ if (Weight_Vec[y] < Weight_Vec[y+1]) {
+ swap(&Weight_Vec[y], &Weight_Vec[y+1]);
+ swapUS(&ind_i[y], &ind_i[y+1]);
+ swapUS(&ind_j[y], &ind_j[y+1]);
+ }
+ }
+ }
+ /*sorting loop finished*/
+ /*now select the NumNeighb more prominent weights and store into pre-allocated arrays */
for(x=0; x < NumNeighb; x++) {
- index = (dimX*dimY*x) + j*dimX+i;
+ index = (dimX*dimY*x) + j*dimX+i;
H_i[index] = ind_i[x];
H_j[index] = ind_j[x];
Weights[index] = Weight_Vec[x];
- }
-
+ }
free(ind_i);
free(ind_j);
free(Weight_Vec);
return 1;
}
-
-
float Indeces2D_p(float *Aorig, unsigned short *H_i, unsigned short *H_j, float *Weights, long i, long j, long dimX, long dimY, float *Eucl_Vec, int NumNeighb, int SearchWindow, int SimilarWin, float h2)
{
long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, index, sizeWin_tot, counterG;
- float *Weight_Vec, normsum, temp;
- unsigned short *ind_i, *ind_j, temp_i, temp_j;
+ float *Weight_Vec, normsum;
+ unsigned short *ind_i, *ind_j;
sizeWin_tot = (2*SearchWindow + 1)*(2*SearchWindow + 1);
@@ -228,32 +232,26 @@ float Indeces2D_p(float *Aorig, unsigned short *H_i, unsigned short *H_j, float
}
}
}}
- /* do sorting to choose the most prominent weights [HIGH to LOW] */
+ /* do sorting to choose the most prominent weights [HIGH to LOW] */
/* and re-arrange indeces accordingly */
- for (x = 0; x < counter; x++) {
- for (y = 0; y < counter; y++) {
- if (Weight_Vec[y] < Weight_Vec[x]) {
- temp = Weight_Vec[y+1];
- temp_i = ind_i[y+1];
- temp_j = ind_j[y+1];
- Weight_Vec[y+1] = Weight_Vec[y];
- Weight_Vec[y] = temp;
- ind_i[y+1] = ind_i[y];
- ind_i[y] = temp_i;
- ind_j[y+1] = ind_j[y];
- ind_j[y] = temp_j;
- }}}
+ for (x = 0; x < counter-1; x++) {
+ for (y = 0; y < counter-x-1; y++) {
+ if (Weight_Vec[y] < Weight_Vec[y+1]) {
+ swap(&Weight_Vec[y], &Weight_Vec[y+1]);
+ swapUS(&ind_i[y], &ind_i[y+1]);
+ swapUS(&ind_j[y], &ind_j[y+1]);
+ }
+ }
+ }
/*sorting loop finished*/
- /*now select the NumNeighb more prominent weights and store into arrays */
+ /*now select the NumNeighb more prominent weights and store into pre-allocated arrays */
for(x=0; x < NumNeighb; x++) {
- //index = (dimX*dimY*x) + j*dimX+i;
- index = (dimX*dimY*x) + i*dimY+j;
+ index = (dimX*dimY*x) + i*dimY+j;
H_i[index] = ind_i[x];
H_j[index] = ind_j[x];
Weights[index] = Weight_Vec[x];
- }
-
+ }
free(ind_i);
free(ind_j);
free(Weight_Vec);
diff --git a/Core/regularisers_GPU/PatchSelect_GPU_core.cu b/Core/regularisers_GPU/PatchSelect_GPU_core.cu
new file mode 100644
index 0000000..f558b0f
--- /dev/null
+++ b/Core/regularisers_GPU/PatchSelect_GPU_core.cu
@@ -0,0 +1,468 @@
+/*
+ * This work is part of the Core Imaging Library developed by
+ * Visual Analytics and Imaging System Group of the Science Technology
+ * Facilities Council, STFC and Diamond Light Source Ltd.
+ *
+ * Copyright 2017 Daniil Kazantsev
+ * Copyright 2017 Srikanth Nagella, Edoardo Pasca
+ * Copyright 2018 Diamond Light Source Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PatchSelect_GPU_core.h"
+
+/* CUDA implementation of non-local weight pre-calculation for non-local priors
+ * Weights and associated indices are stored into pre-allocated arrays and passed
+ * to the regulariser
+ *
+ *
+ * Input Parameters:
+ * 1. 2D grayscale image (classical 3D version will not be supported but rather 2D + dim extension (TODO))
+ * 2. Searching window (half-size of the main bigger searching window, e.g. 11)
+ * 3. Similarity window (half-size of the patch window, e.g. 2)
+ * 4. The number of neighbours to take (the most prominent after sorting neighbours will be taken)
+ * 5. noise-related parameter to calculate non-local weights
+ *
+ * Output [2D]:
+ * 1. AR_i - indeces of i neighbours
+ * 2. AR_j - indeces of j neighbours
+ * 3. Weights_ij - associated weights
+ */
+
+// This will output the proper CUDA error strings in the event that a CUDA host call returns an error
+#define checkCudaErrors(err) __checkCudaErrors (err, __FILE__, __LINE__)
+
+inline void __checkCudaErrors(cudaError err, const char *file, const int line)
+{
+ if (cudaSuccess != err)
+ {
+ fprintf(stderr, "%s(%i) : CUDA Runtime API error %d: %s.\n",
+ file, line, (int)err, cudaGetErrorString(err));
+ exit(EXIT_FAILURE);
+ }
+}
+
+#define BLKXSIZE 16
+#define BLKYSIZE 16
+#define idivup(a, b) ( ((a)%(b) != 0) ? (a)/(b)+1 : (a)/(b) )
+#define M_PI 3.14159265358979323846
+#define EPS 1.0e-8
+#define CONSTVECSIZE5 121
+#define CONSTVECSIZE7 225
+#define CONSTVECSIZE9 361
+#define CONSTVECSIZE11 529
+#define CONSTVECSIZE13 729
+
+__device__ void swap(float *xp, float *yp)
+{
+ float temp = *xp;
+ *xp = *yp;
+ *yp = temp;
+}
+__device__ void swapUS(unsigned short *xp, unsigned short *yp)
+{
+ unsigned short temp = *xp;
+ *xp = *yp;
+ *yp = temp;
+}
+
+/********************************************************************************/
+__global__ void IndexSelect2D_5_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2)
+{
+
+ long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2;
+ float normsum;
+
+ float Weight_Vec[CONSTVECSIZE5];
+ unsigned short ind_i[CONSTVECSIZE5];
+ unsigned short ind_j[CONSTVECSIZE5];
+
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+
+ long index = i*M+j;
+
+ counter = 0;
+ for(i_m=-SearchWindow; i_m<=SearchWindow; i_m++) {
+ for(j_m=-SearchWindow; j_m<=SearchWindow; j_m++) {
+ i1 = i+i_m;
+ j1 = j+j_m;
+ if (((i1 >= 0) && (i1 < N)) && ((j1 >= 0) && (j1 < M))) {
+ normsum = 0.0f; counterG = 0;
+ for(i_c=-SimilarWin; i_c<=SimilarWin; i_c++) {
+ for(j_c=-SimilarWin; j_c<=SimilarWin; j_c++) {
+ i2 = i1 + i_c;
+ j2 = j1 + j_c;
+ i3 = i + i_c;
+ j3 = j + j_c;
+ if (((i2 >= 0) && (i2 < N)) && ((j2 >= 0) && (j2 < M))) {
+ if (((i3 >= 0) && (i3 < N)) && ((j3 >= 0) && (j3 < M))) {
+ normsum += Eucl_Vec_d[counterG]*powf(Ad[i3*M + j3] - Ad[i2*M + j2], 2);
+ counterG++;
+ }}
+ }}
+ /* writing temporarily into vectors */
+ if (normsum > EPS) {
+ Weight_Vec[counter] = __expf(-normsum/h2);
+ ind_i[counter] = i1;
+ ind_j[counter] = j1;
+ counter++;
+ }
+ }
+ }}
+
+ /* do sorting to choose the most prominent weights [HIGH to LOW] */
+ /* and re-arrange indeces accordingly */
+ for (x = 0; x < counter-1; x++) {
+ for (y = 0; y < counter-x-1; y++) {
+ if (Weight_Vec[y] < Weight_Vec[y+1]) {
+ swap(&Weight_Vec[y], &Weight_Vec[y+1]);
+ swapUS(&ind_i[y], &ind_i[y+1]);
+ swapUS(&ind_j[y], &ind_j[y+1]);
+ }
+ }
+ }
+ /*sorting loop finished*/
+ /*now select the NumNeighb more prominent weights and store into arrays */
+ for(x=0; x < NumNeighb; x++) {
+ index2 = (N*M*x) + index;
+ H_i_d[index2] = ind_i[x];
+ H_j_d[index2] = ind_j[x];
+ Weights_d[index2] = Weight_Vec[x];
+ }
+}
+/********************************************************************************/
+__global__ void IndexSelect2D_7_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2)
+{
+
+ long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2;
+ float normsum;
+
+ float Weight_Vec[CONSTVECSIZE7];
+ unsigned short ind_i[CONSTVECSIZE7];
+ unsigned short ind_j[CONSTVECSIZE7];
+
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+
+ long index = i*M+j;
+
+ counter = 0;
+ for(i_m=-SearchWindow; i_m<=SearchWindow; i_m++) {
+ for(j_m=-SearchWindow; j_m<=SearchWindow; j_m++) {
+ i1 = i+i_m;
+ j1 = j+j_m;
+ if (((i1 >= 0) && (i1 < N)) && ((j1 >= 0) && (j1 < M))) {
+ normsum = 0.0f; counterG = 0;
+ for(i_c=-SimilarWin; i_c<=SimilarWin; i_c++) {
+ for(j_c=-SimilarWin; j_c<=SimilarWin; j_c++) {
+ i2 = i1 + i_c;
+ j2 = j1 + j_c;
+ i3 = i + i_c;
+ j3 = j + j_c;
+ if (((i2 >= 0) && (i2 < N)) && ((j2 >= 0) && (j2 < M))) {
+ if (((i3 >= 0) && (i3 < N)) && ((j3 >= 0) && (j3 < M))) {
+ normsum += Eucl_Vec_d[counterG]*powf(Ad[i3*M + j3] - Ad[i2*M + j2], 2);
+ counterG++;
+ }}
+ }}
+ /* writing temporarily into vectors */
+ if (normsum > EPS) {
+ Weight_Vec[counter] = __expf(-normsum/h2);
+ ind_i[counter] = i1;
+ ind_j[counter] = j1;
+ counter++;
+ }
+ }
+ }}
+
+ /* do sorting to choose the most prominent weights [HIGH to LOW] */
+ /* and re-arrange indeces accordingly */
+ for (x = 0; x < counter-1; x++) {
+ for (y = 0; y < counter-x-1; y++) {
+ if (Weight_Vec[y] < Weight_Vec[y+1]) {
+ swap(&Weight_Vec[y], &Weight_Vec[y+1]);
+ swapUS(&ind_i[y], &ind_i[y+1]);
+ swapUS(&ind_j[y], &ind_j[y+1]);
+ }
+ }
+ }
+ /*sorting loop finished*/
+ /*now select the NumNeighb more prominent weights and store into arrays */
+ for(x=0; x < NumNeighb; x++) {
+ index2 = (N*M*x) + index;
+ H_i_d[index2] = ind_i[x];
+ H_j_d[index2] = ind_j[x];
+ Weights_d[index2] = Weight_Vec[x];
+ }
+}
+__global__ void IndexSelect2D_9_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2)
+{
+
+ long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2;
+ float normsum;
+
+ float Weight_Vec[CONSTVECSIZE9];
+ unsigned short ind_i[CONSTVECSIZE9];
+ unsigned short ind_j[CONSTVECSIZE9];
+
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+
+ long index = i*M+j;
+
+ counter = 0;
+ for(i_m=-SearchWindow; i_m<=SearchWindow; i_m++) {
+ for(j_m=-SearchWindow; j_m<=SearchWindow; j_m++) {
+ i1 = i+i_m;
+ j1 = j+j_m;
+ if (((i1 >= 0) && (i1 < N)) && ((j1 >= 0) && (j1 < M))) {
+ normsum = 0.0f; counterG = 0;
+ for(i_c=-SimilarWin; i_c<=SimilarWin; i_c++) {
+ for(j_c=-SimilarWin; j_c<=SimilarWin; j_c++) {
+ i2 = i1 + i_c;
+ j2 = j1 + j_c;
+ i3 = i + i_c;
+ j3 = j + j_c;
+ if (((i2 >= 0) && (i2 < N)) && ((j2 >= 0) && (j2 < M))) {
+ if (((i3 >= 0) && (i3 < N)) && ((j3 >= 0) && (j3 < M))) {
+ normsum += Eucl_Vec_d[counterG]*powf(Ad[i3*M + j3] - Ad[i2*M + j2], 2);
+ counterG++;
+ }}
+ }}
+ /* writing temporarily into vectors */
+ if (normsum > EPS) {
+ Weight_Vec[counter] = expf(-normsum/h2);
+ ind_i[counter] = i1;
+ ind_j[counter] = j1;
+ counter++;
+ }
+ }
+ }}
+
+ /* do sorting to choose the most prominent weights [HIGH to LOW] */
+ /* and re-arrange indeces accordingly */
+ for (x = 0; x < counter-1; x++) {
+ for (y = 0; y < counter-x-1; y++) {
+ if (Weight_Vec[y] < Weight_Vec[y+1]) {
+ swap(&Weight_Vec[y], &Weight_Vec[y+1]);
+ swapUS(&ind_i[y], &ind_i[y+1]);
+ swapUS(&ind_j[y], &ind_j[y+1]);
+ }
+ }
+ }
+ /*sorting loop finished*/
+ /*now select the NumNeighb more prominent weights and store into arrays */
+ for(x=0; x < NumNeighb; x++) {
+ index2 = (N*M*x) + index;
+ H_i_d[index2] = ind_i[x];
+ H_j_d[index2] = ind_j[x];
+ Weights_d[index2] = Weight_Vec[x];
+ }
+}
+__global__ void IndexSelect2D_11_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2)
+{
+
+ long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2;
+ float normsum;
+
+ float Weight_Vec[CONSTVECSIZE11];
+ unsigned short ind_i[CONSTVECSIZE11];
+ unsigned short ind_j[CONSTVECSIZE11];
+
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+
+ long index = i*M+j;
+
+ counter = 0;
+ for(i_m=-SearchWindow; i_m<=SearchWindow; i_m++) {
+ for(j_m=-SearchWindow; j_m<=SearchWindow; j_m++) {
+ i1 = i+i_m;
+ j1 = j+j_m;
+ if (((i1 >= 0) && (i1 < N)) && ((j1 >= 0) && (j1 < M))) {
+ normsum = 0.0f; counterG = 0;
+ for(i_c=-SimilarWin; i_c<=SimilarWin; i_c++) {
+ for(j_c=-SimilarWin; j_c<=SimilarWin; j_c++) {
+ i2 = i1 + i_c;
+ j2 = j1 + j_c;
+ i3 = i + i_c;
+ j3 = j + j_c;
+ if (((i2 >= 0) && (i2 < N)) && ((j2 >= 0) && (j2 < M))) {
+ if (((i3 >= 0) && (i3 < N)) && ((j3 >= 0) && (j3 < M))) {
+ normsum += Eucl_Vec_d[counterG]*powf(Ad[i3*M + j3] - Ad[i2*M + j2], 2);
+ counterG++;
+ }}
+ }}
+ /* writing temporarily into vectors */
+ if (normsum > EPS) {
+ Weight_Vec[counter] = __expf(-normsum/h2);
+ ind_i[counter] = i1;
+ ind_j[counter] = j1;
+ counter++;
+ }
+ }
+ }}
+
+ /* do sorting to choose the most prominent weights [HIGH to LOW] */
+ /* and re-arrange indeces accordingly */
+ for (x = 0; x < counter-1; x++) {
+ for (y = 0; y < counter-x-1; y++) {
+ if (Weight_Vec[y] < Weight_Vec[y+1]) {
+ swap(&Weight_Vec[y], &Weight_Vec[y+1]);
+ swapUS(&ind_i[y], &ind_i[y+1]);
+ swapUS(&ind_j[y], &ind_j[y+1]);
+ }
+ }
+ }
+ /*sorting loop finished*/
+ /*now select the NumNeighb more prominent weights and store into arrays */
+ for(x=0; x < NumNeighb; x++) {
+ index2 = (N*M*x) + index;
+ H_i_d[index2] = ind_i[x];
+ H_j_d[index2] = ind_j[x];
+ Weights_d[index2] = Weight_Vec[x];
+ }
+}
+__global__ void IndexSelect2D_13_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2)
+{
+
+ long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2;
+ float normsum;
+
+ float Weight_Vec[CONSTVECSIZE13];
+ unsigned short ind_i[CONSTVECSIZE13];
+ unsigned short ind_j[CONSTVECSIZE13];
+
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+
+ long index = i*M+j;
+
+ counter = 0;
+ for(i_m=-SearchWindow; i_m<=SearchWindow; i_m++) {
+ for(j_m=-SearchWindow; j_m<=SearchWindow; j_m++) {
+ i1 = i+i_m;
+ j1 = j+j_m;
+ if (((i1 >= 0) && (i1 < N)) && ((j1 >= 0) && (j1 < M))) {
+ normsum = 0.0f; counterG = 0;
+ for(i_c=-SimilarWin; i_c<=SimilarWin; i_c++) {
+ for(j_c=-SimilarWin; j_c<=SimilarWin; j_c++) {
+ i2 = i1 + i_c;
+ j2 = j1 + j_c;
+ i3 = i + i_c;
+ j3 = j + j_c;
+ if (((i2 >= 0) && (i2 < N)) && ((j2 >= 0) && (j2 < M))) {
+ if (((i3 >= 0) && (i3 < N)) && ((j3 >= 0) && (j3 < M))) {
+ normsum += Eucl_Vec_d[counterG]*powf(Ad[i3*M + j3] - Ad[i2*M + j2], 2);
+ counterG++;
+ }}
+ }}
+ /* writing temporarily into vectors */
+ if (normsum > EPS) {
+ Weight_Vec[counter] = __expf(-normsum/h2);
+ ind_i[counter] = i1;
+ ind_j[counter] = j1;
+ counter++;
+ }
+ }
+ }}
+
+ /* do sorting to choose the most prominent weights [HIGH to LOW] */
+ /* and re-arrange indeces accordingly */
+ for (x = 0; x < counter-1; x++) {
+ for (y = 0; y < counter-x-1; y++) {
+ if (Weight_Vec[y] < Weight_Vec[y+1]) {
+ swap(&Weight_Vec[y], &Weight_Vec[y+1]);
+ swapUS(&ind_i[y], &ind_i[y+1]);
+ swapUS(&ind_j[y], &ind_j[y+1]);
+ }
+ }
+ }
+ /*sorting loop finished*/
+ /*now select the NumNeighb more prominent weights and store into arrays */
+ for(x=0; x < NumNeighb; x++) {
+ index2 = (N*M*x) + index;
+ H_i_d[index2] = ind_i[x];
+ H_j_d[index2] = ind_j[x];
+ Weights_d[index2] = Weight_Vec[x];
+ }
+}
+
+
+/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
+/********************* MAIN HOST FUNCTION ******************/
+/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
+extern "C" void PatchSelect_GPU_main(float *A, unsigned short *H_i, unsigned short *H_j, float *Weights, int N, int M, int SearchWindow, int SimilarWin, int NumNeighb, float h)
+{
+ int deviceCount = -1; // number of devices
+ cudaGetDeviceCount(&deviceCount);
+ if (deviceCount == 0) {
+ fprintf(stderr, "No CUDA devices found\n");
+ return;
+ }
+
+ int SearchW_full, SimilW_full, counterG, i, j;
+ float *Ad, *Weights_d, h2, *Eucl_Vec, *Eucl_Vec_d;
+ unsigned short *H_i_d, *H_j_d;
+ h2 = h*h;
+
+ dim3 dimBlock(BLKXSIZE,BLKYSIZE);
+ dim3 dimGrid(idivup(N,BLKXSIZE), idivup(M,BLKYSIZE));
+
+ SearchW_full = (2*SearchWindow + 1)*(2*SearchWindow + 1); /* the full searching window size */
+ SimilW_full = (2*SimilarWin + 1)*(2*SimilarWin + 1); /* the full similarity window size */
+
+ /* generate a 2D Gaussian kernel for NLM procedure */
+ Eucl_Vec = (float*) calloc (SimilW_full,sizeof(float));
+ counterG = 0;
+ for(i=-SimilarWin; i<=SimilarWin; i++) {
+ for(j=-SimilarWin; j<=SimilarWin; j++) {
+ Eucl_Vec[counterG] = (float)exp(-(pow(((float) i), 2) + pow(((float) j), 2))/(2.0*SimilarWin*SimilarWin));
+ counterG++;
+ }} /*main neighb loop */
+
+
+ /*allocate space on the device*/
+ checkCudaErrors( cudaMalloc((void**)&Ad, N*M*sizeof(float)) );
+ checkCudaErrors( cudaMalloc((void**)&H_i_d, N*M*NumNeighb*sizeof(unsigned short)) );
+ checkCudaErrors( cudaMalloc((void**)&H_j_d, N*M*NumNeighb*sizeof(unsigned short)) );
+ checkCudaErrors( cudaMalloc((void**)&Weights_d, N*M*NumNeighb*sizeof(float)) );
+ checkCudaErrors( cudaMalloc((void**)&Eucl_Vec_d, SimilW_full*sizeof(float)) );
+
+ /* copy data from the host to the device */
+ checkCudaErrors( cudaMemcpy(Ad,A,N*M*sizeof(float),cudaMemcpyHostToDevice) );
+ checkCudaErrors( cudaMemcpy(Eucl_Vec_d,Eucl_Vec,SimilW_full*sizeof(float),cudaMemcpyHostToDevice) );
+
+ /********************** Run CUDA kernel here ********************/
+ if (SearchWindow == 5) IndexSelect2D_5_kernel<<<dimGrid,dimBlock>>>(Ad, H_i_d, H_j_d, Weights_d, Eucl_Vec_d, N, M, SearchWindow, SearchW_full, SimilarWin, NumNeighb, h2);
+ else if (SearchWindow == 7) IndexSelect2D_7_kernel<<<dimGrid,dimBlock>>>(Ad, H_i_d, H_j_d, Weights_d, Eucl_Vec_d, N, M, SearchWindow, SearchW_full, SimilarWin, NumNeighb, h2);
+ else if (SearchWindow == 9) IndexSelect2D_9_kernel<<<dimGrid,dimBlock>>>(Ad, H_i_d, H_j_d, Weights_d, Eucl_Vec_d, N, M, SearchWindow, SearchW_full, SimilarWin, NumNeighb, h2);
+ else if (SearchWindow == 11) IndexSelect2D_11_kernel<<<dimGrid,dimBlock>>>(Ad, H_i_d, H_j_d, Weights_d, Eucl_Vec_d, N, M, SearchWindow, SearchW_full, SimilarWin, NumNeighb, h2);
+ else if (SearchWindow == 13) IndexSelect2D_13_kernel<<<dimGrid,dimBlock>>>(Ad, H_i_d, H_j_d, Weights_d, Eucl_Vec_d, N, M, SearchWindow, SearchW_full, SimilarWin, NumNeighb, h2);
+ else {
+ fprintf(stderr, "Select the searching window size from 5, 7, 9, 11 or 13\n");
+ return;}
+ checkCudaErrors(cudaPeekAtLastError() );
+ checkCudaErrors(cudaDeviceSynchronize());
+ /***************************************************************/
+
+ checkCudaErrors(cudaMemcpy(H_i, H_i_d, N*M*NumNeighb*sizeof(unsigned short),cudaMemcpyDeviceToHost) );
+ checkCudaErrors(cudaMemcpy(H_j, H_j_d, N*M*NumNeighb*sizeof(unsigned short),cudaMemcpyDeviceToHost) );
+ checkCudaErrors(cudaMemcpy(Weights, Weights_d, N*M*NumNeighb*sizeof(float),cudaMemcpyDeviceToHost) );
+
+ cudaFree(Ad);
+ cudaFree(H_i_d);
+ cudaFree(H_j_d);
+ cudaFree(Weights_d);
+ cudaFree(Eucl_Vec_d);
+}
diff --git a/Core/regularisers_GPU/PatchSelect_GPU_core.h b/Core/regularisers_GPU/PatchSelect_GPU_core.h
new file mode 100644
index 0000000..d20fe9f
--- /dev/null
+++ b/Core/regularisers_GPU/PatchSelect_GPU_core.h
@@ -0,0 +1,8 @@
+#ifndef __NLREG_KERNELS_H_
+#define __NLREG_KERNELS_H_
+#include "CCPiDefines.h"
+#include <stdio.h>
+
+extern "C" CCPI_EXPORT void PatchSelect_GPU_main(float *A, unsigned short *H_i, unsigned short *H_j, float *Weights, int N, int M, int SearchWindow, int SimilarWin, int NumNeighb, float h);
+
+#endif
diff --git a/Readme.md b/Readme.md
index cc74dbc..01d4586 100644
--- a/Readme.md
+++ b/Readme.md
@@ -27,7 +27,7 @@
5. Linear and nonlinear diffusion (explicit PDE minimisation scheme) **2D/3D CPU/GPU** (Ref. *8*)
6. Anisotropic Fourth-Order Diffusion (explicit PDE minimisation) **2D/3D CPU/GPU** (Ref. *9*)
7. A joint ROF-LLT (Lysaker-Lundervold-Tai) model for higher-order regularisation **2D/3D CPU/GPU** (Ref. *10,11*)
-8. Nonlocal Total Variation regularisation (GS fixed point iteration) **2D/3D CPU/GPU** (Ref. *12*)
+8. Nonlocal Total Variation regularisation (GS fixed point iteration) **2D CPU/GPU** (Ref. *12*)
### Multi-channel (denoising):
1. Fast-Gradient-Projection (FGP) Directional Total Variation **2D/3D CPU/GPU** (Ref. *3,4,2*)
diff --git a/Wrappers/Matlab/demos/demoMatlab_denoise.m b/Wrappers/Matlab/demos/demoMatlab_denoise.m
index 54b8bac..3506cca 100644
--- a/Wrappers/Matlab/demos/demoMatlab_denoise.m
+++ b/Wrappers/Matlab/demos/demoMatlab_denoise.m
@@ -138,17 +138,17 @@ figure; imshow(u_diff4, [0 1]); title('Diffusion 4thO denoised image (CPU)');
fprintf('Weights pre-calculation for Non-local TV (takes time on CPU) \n');
SearchingWindow = 7;
PatchWindow = 2;
-NeighboursNumber = 15; % the number of neibours to include
+NeighboursNumber = 20; % the number of neibours to include
h = 0.23; % edge related parameter for NLM
-[H_i, H_j, Weights] = PatchSelect(single(u0), SearchingWindow, PatchWindow, NeighboursNumber, h);
+tic; [H_i, H_j, Weights] = PatchSelect(single(u0), SearchingWindow, PatchWindow, NeighboursNumber, h); toc;
%%
fprintf('Denoise using Non-local Total Variation (CPU) \n');
-iter_nltv = 2; % number of nltv iterations
-lambda_nltv = 0.085; % regularisation parameter for nltv
+iter_nltv = 3; % number of nltv iterations
+lambda_nltv = 0.05; % regularisation parameter for nltv
tic; u_nltv = Nonlocal_TV(single(u0), H_i, H_j, 0, Weights, lambda_nltv, iter_nltv); toc;
rmse_nltv = (RMSE(u_nltv(:),Im(:)));
fprintf('%s %f \n', 'RMSE error for Non-local Total Variation is:', rmse_nltv);
-figure; imshow(u_nltv, [0 1]); title('Non-local Total Variation denoised image (CPU)');
+figure; imagesc(u_nltv, [0 1]); colormap(gray); daspect([1 1 1]); title('Non-local Total Variation denoised image (CPU)');
%%
%>>>>>>>>>>>>>> MULTI-CHANNEL priors <<<<<<<<<<<<<<< %
diff --git a/Wrappers/Python/ccpi/filters/regularisers.py b/Wrappers/Python/ccpi/filters/regularisers.py
index bf7e23c..0a65590 100644
--- a/Wrappers/Python/ccpi/filters/regularisers.py
+++ b/Wrappers/Python/ccpi/filters/regularisers.py
@@ -4,7 +4,7 @@ script which assigns a proper device core function based on a flag ('cpu' or 'gp
from ccpi.filters.cpu_regularisers import TV_ROF_CPU, TV_FGP_CPU, TV_SB_CPU, dTV_FGP_CPU, TNV_CPU, NDF_CPU, Diff4th_CPU, TGV_CPU, LLT_ROF_CPU, PATCHSEL_CPU, NLTV_CPU
try:
- from ccpi.filters.gpu_regularisers import TV_ROF_GPU, TV_FGP_GPU, TV_SB_GPU, dTV_FGP_GPU, NDF_GPU, Diff4th_GPU, TGV_GPU, LLT_ROF_GPU
+ from ccpi.filters.gpu_regularisers import TV_ROF_GPU, TV_FGP_GPU, TV_SB_GPU, dTV_FGP_GPU, NDF_GPU, Diff4th_GPU, TGV_GPU, LLT_ROF_GPU, PATCHSEL_GPU
gpu_enabled = True
except ImportError:
gpu_enabled = False
@@ -153,7 +153,11 @@ def PatchSelect(inputData, searchwindow, patchwindow, neighbours, edge_parameter
neighbours,
edge_parameter)
elif device == 'gpu' and gpu_enabled:
- return 1
+ return PATCHSEL_GPU(inputData,
+ searchwindow,
+ patchwindow,
+ neighbours,
+ edge_parameter)
else:
if not gpu_enabled and device == 'gpu':
raise ValueError ('GPU is not available')
diff --git a/Wrappers/Python/conda-recipe/run_test.py b/Wrappers/Python/conda-recipe/run_test.py
index 6ffaca1..499ae7f 100755
--- a/Wrappers/Python/conda-recipe/run_test.py
+++ b/Wrappers/Python/conda-recipe/run_test.py
@@ -2,7 +2,7 @@ import unittest
import numpy as np
import os
import timeit
-from ccpi.filters.regularisers import ROF_TV, FGP_TV, SB_TV, TGV, LLT_ROF, FGP_dTV, NDF, DIFF4th
+from ccpi.filters.regularisers import ROF_TV, FGP_TV, SB_TV, TGV, LLT_ROF, FGP_dTV, NDF, DIFF4th
from PIL import Image
class TiffReader(object):
diff --git a/Wrappers/Python/demos/demo_cpu_regularisers.py b/Wrappers/Python/demos/demo_cpu_regularisers.py
index 31e4cad..78e9aff 100644
--- a/Wrappers/Python/demos/demo_cpu_regularisers.py
+++ b/Wrappers/Python/demos/demo_cpu_regularisers.py
@@ -400,20 +400,29 @@ plt.title('{}'.format('CPU results'))
print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
print ("___Nonlocal patches pre-calculation____")
print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+start_time = timeit.default_timer()
# set parameters
pars = {'algorithm' : PatchSelect, \
'input' : u0,\
'searchwindow': 7, \
'patchwindow': 2,\
'neighbours' : 15 ,\
- 'edge_parameter':0.23}
+ 'edge_parameter':0.18}
H_i, H_j, Weights = PatchSelect(pars['input'],
pars['searchwindow'],
pars['patchwindow'],
pars['neighbours'],
pars['edge_parameter'],'cpu')
-
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+"""
+plt.figure()
+plt.imshow(Weights[0,:,:],cmap="gray",interpolation="nearest",vmin=0, vmax=1)
+plt.show()
+"""
#%%
print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
print ("___Nonlocal Total Variation penalty____")
@@ -431,10 +440,9 @@ pars2 = {'algorithm' : NLTV, \
'H_j': H_j,\
'H_k' : 0,\
'Weights' : Weights,\
- 'regularisation_parameter': 0.085,\
- 'iterations': 2
+ 'regularisation_parameter': 0.04,\
+ 'iterations': 3
}
-#%%
start_time = timeit.default_timer()
nltv_cpu = NLTV(pars2['input'],
pars2['H_i'],
diff --git a/Wrappers/Python/demos/demo_cpu_vs_gpu_regularisers.py b/Wrappers/Python/demos/demo_cpu_vs_gpu_regularisers.py
index 3d6e92f..616eab0 100644
--- a/Wrappers/Python/demos/demo_cpu_vs_gpu_regularisers.py
+++ b/Wrappers/Python/demos/demo_cpu_vs_gpu_regularisers.py
@@ -13,6 +13,7 @@ import numpy as np
import os
import timeit
from ccpi.filters.regularisers import ROF_TV, FGP_TV, SB_TV, TGV, LLT_ROF, FGP_dTV, NDF, DIFF4th
+from ccpi.filters.regularisers import PatchSelect
from qualitymetrics import rmse
###############################################################################
def printParametersToString(pars):
@@ -732,4 +733,58 @@ if (diff_im.sum() > 1):
print ("Arrays do not match!")
else:
print ("Arrays match")
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("____Non-local regularisation bench_________")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot
+fig = plt.figure()
+plt.suptitle('Comparison of Nonlocal TV regulariser using CPU and GPU implementations')
+a=fig.add_subplot(1,2,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(u0,cmap="gray")
+
+pars = {'algorithm' : PatchSelect, \
+ 'input' : u0,\
+ 'searchwindow': 7, \
+ 'patchwindow': 2,\
+ 'neighbours' : 15 ,\
+ 'edge_parameter':0.18}
+
+print ("############## Nonlocal Patches on CPU##################")
+start_time = timeit.default_timer()
+H_i, H_j, WeightsCPU = PatchSelect(pars['input'],
+ pars['searchwindow'],
+ pars['patchwindow'],
+ pars['neighbours'],
+ pars['edge_parameter'],'cpu')
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+
+print ("############## Nonlocal Patches on GPU##################")
+start_time = timeit.default_timer()
+start_time = timeit.default_timer()
+H_i, H_j, WeightsGPU = PatchSelect(pars['input'],
+ pars['searchwindow'],
+ pars['patchwindow'],
+ pars['neighbours'],
+ pars['edge_parameter'],'gpu')
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+
+print ("--------Compare the results--------")
+tolerance = 1e-05
+diff_im = np.zeros(np.shape(u0))
+diff_im = abs(WeightsCPU[0,:,:] - WeightsGPU[0,:,:])
+diff_im[diff_im > tolerance] = 1
+a=fig.add_subplot(1,2,2)
+imgplot = plt.imshow(diff_im, vmin=0, vmax=1, cmap="gray")
+plt.title('{}'.format('Pixels larger threshold difference'))
+if (diff_im.sum() > 1):
+ print ("Arrays do not match!")
+else:
+ print ("Arrays match")
#%% \ No newline at end of file
diff --git a/Wrappers/Python/demos/demo_gpu_regularisers.py b/Wrappers/Python/demos/demo_gpu_regularisers.py
index de0cbde..2ada559 100644
--- a/Wrappers/Python/demos/demo_gpu_regularisers.py
+++ b/Wrappers/Python/demos/demo_gpu_regularisers.py
@@ -13,6 +13,7 @@ import numpy as np
import os
import timeit
from ccpi.filters.regularisers import ROF_TV, FGP_TV, SB_TV, TGV, LLT_ROF, FGP_dTV, NDF, DIFF4th
+from ccpi.filters.regularisers import PatchSelect, NLTV
from qualitymetrics import rmse
###############################################################################
def printParametersToString(pars):
@@ -84,7 +85,7 @@ pars = {'algorithm': ROF_TV, \
'input' : u0,\
'regularisation_parameter':0.04,\
'number_of_iterations': 1200,\
- 'time_marching_parameter': 0.0025
+ 'time_marching_parameter': 0.0025
}
print ("##############ROF TV GPU##################")
start_time = timeit.default_timer()
@@ -394,6 +395,77 @@ plt.title('{}'.format('GPU results'))
#%%
print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("___Nonlocal patches pre-calculation____")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+start_time = timeit.default_timer()
+# set parameters
+pars = {'algorithm' : PatchSelect, \
+ 'input' : u0,\
+ 'searchwindow': 7, \
+ 'patchwindow': 2,\
+ 'neighbours' : 15 ,\
+ 'edge_parameter':0.18}
+
+H_i, H_j, Weights = PatchSelect(pars['input'],
+ pars['searchwindow'],
+ pars['patchwindow'],
+ pars['neighbours'],
+ pars['edge_parameter'],'gpu')
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+"""
+plt.figure()
+plt.imshow(Weights[0,:,:],cmap="gray",interpolation="nearest",vmin=0, vmax=1)
+plt.show()
+"""
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("___Nonlocal Total Variation penalty____")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+## plot
+fig = plt.figure()
+plt.suptitle('Performance of NLTV regulariser using the CPU')
+a=fig.add_subplot(1,2,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(u0,cmap="gray")
+
+pars2 = {'algorithm' : NLTV, \
+ 'input' : u0,\
+ 'H_i': H_i, \
+ 'H_j': H_j,\
+ 'H_k' : 0,\
+ 'Weights' : Weights,\
+ 'regularisation_parameter': 0.02,\
+ 'iterations': 3
+ }
+start_time = timeit.default_timer()
+nltv_cpu = NLTV(pars2['input'],
+ pars2['H_i'],
+ pars2['H_j'],
+ pars2['H_k'],
+ pars2['Weights'],
+ pars2['regularisation_parameter'],
+ pars2['iterations'])
+
+rms = rmse(Im, nltv_cpu)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,2,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+ verticalalignment='top', bbox=props)
+imgplot = plt.imshow(nltv_cpu, cmap="gray")
+plt.title('{}'.format('CPU results'))
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
print ("____________FGP-dTV bench___________________")
print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
diff --git a/Wrappers/Python/setup-regularisers.py.in b/Wrappers/Python/setup-regularisers.py.in
index 542dcb4..462edda 100644
--- a/Wrappers/Python/setup-regularisers.py.in
+++ b/Wrappers/Python/setup-regularisers.py.in
@@ -45,6 +45,7 @@ extra_include_dirs += [os.path.join(".." , ".." , "Core"),
os.path.join(".." , ".." , "Core", "regularisers_GPU" , "NDF" ) ,
os.path.join(".." , ".." , "Core", "regularisers_GPU" , "dTV_FGP" ) ,
os.path.join(".." , ".." , "Core", "regularisers_GPU" , "DIFF4th" ) ,
+ os.path.join(".." , ".." , "Core", "regularisers_GPU" , "PatchSelect" ) ,
"."]
if platform.system() == 'Windows':
diff --git a/Wrappers/Python/src/cpu_regularisers.pyx b/Wrappers/Python/src/cpu_regularisers.pyx
index e51e6d8..4aa3251 100644
--- a/Wrappers/Python/src/cpu_regularisers.pyx
+++ b/Wrappers/Python/src/cpu_regularisers.pyx
@@ -456,7 +456,7 @@ def PATCHSEL_CPU(inputData, searchwindow, patchwindow, neighbours, edge_paramete
if inputData.ndim == 2:
return PatchSel_2D(inputData, searchwindow, patchwindow, neighbours, edge_parameter)
elif inputData.ndim == 3:
- return PatchSel_3D(inputData, searchwindow, patchwindow, neighbours, edge_parameter)
+ return 1
def PatchSel_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData,
int searchwindow,
int patchwindow,
@@ -480,7 +480,7 @@ def PatchSel_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData,
# Run patch-based weight selection function
PatchSelect_CPU_main(&inputData[0,0], &H_j[0,0,0], &H_i[0,0,0], &H_i[0,0,0], &Weights[0,0,0], dims[2], dims[1], 0, searchwindow, patchwindow, neighbours, edge_parameter, 1)
return H_i, H_j, Weights
-
+"""
def PatchSel_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData,
int searchwindow,
int patchwindow,
@@ -507,7 +507,7 @@ def PatchSel_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData,
# Run patch-based weight selection function
PatchSelect_CPU_main(&inputData[0,0,0], &H_i[0,0,0,0], &H_j[0,0,0,0], &H_k[0,0,0,0], &Weights[0,0,0,0], dims[2], dims[1], dims[0], searchwindow, patchwindow, neighbours, edge_parameter, 1)
return H_i, H_j, H_k, Weights
-
+"""
#****************************************************************#
#***************Non-local Total Variation******************#
diff --git a/Wrappers/Python/src/gpu_regularisers.pyx b/Wrappers/Python/src/gpu_regularisers.pyx
index 82d3e01..302727e 100644
--- a/Wrappers/Python/src/gpu_regularisers.pyx
+++ b/Wrappers/Python/src/gpu_regularisers.pyx
@@ -26,6 +26,7 @@ cdef extern void LLT_ROF_GPU_main(float *Input, float *Output, float lambdaROF,
cdef extern void NonlDiff_GPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int penaltytype, int N, int M, int Z);
cdef extern void dTV_FGP_GPU_main(float *Input, float *InputRef, float *Output, float lambdaPar, int iterationsNumb, float epsil, float eta, int methodTV, int nonneg, int printM, int N, int M, int Z);
cdef extern void Diffus4th_GPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int N, int M, int Z);
+cdef extern void PatchSelect_GPU_main(float *Input, unsigned short *H_i, unsigned short *H_j, float *Weights, int N, int M, int SearchWindow, int SimilarWin, int NumNeighb, float h);
# Total-variation Rudin-Osher-Fatemi (ROF)
def TV_ROF_GPU(inputData,
@@ -542,3 +543,34 @@ def Diff4th_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData,
Diffus4th_GPU_main(&inputData[0,0,0], &outputData[0,0,0], regularisation_parameter, edge_parameter, iterationsNumb, time_marching_parameter, dims[2], dims[1], dims[0])
return outputData
+#****************************************************************#
+#************Patch-based weights pre-selection******************#
+#****************************************************************#
+def PATCHSEL_GPU(inputData, searchwindow, patchwindow, neighbours, edge_parameter):
+ if inputData.ndim == 2:
+ return PatchSel_2D(inputData, searchwindow, patchwindow, neighbours, edge_parameter)
+ elif inputData.ndim == 3:
+ return 1
+def PatchSel_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData,
+ int searchwindow,
+ int patchwindow,
+ int neighbours,
+ float edge_parameter):
+ cdef long dims[3]
+ dims[0] = neighbours
+ dims[1] = inputData.shape[0]
+ dims[2] = inputData.shape[1]
+
+ cdef np.ndarray[np.float32_t, ndim=3, mode="c"] Weights = \
+ np.zeros([dims[0], dims[1],dims[2]], dtype='float32')
+
+ cdef np.ndarray[np.uint16_t, ndim=3, mode="c"] H_i = \
+ np.zeros([dims[0], dims[1],dims[2]], dtype='uint16')
+
+ cdef np.ndarray[np.uint16_t, ndim=3, mode="c"] H_j = \
+ np.zeros([dims[0], dims[1],dims[2]], dtype='uint16')
+
+ # Run patch-based weight selection function
+ PatchSelect_GPU_main(&inputData[0,0], &H_j[0,0,0], &H_i[0,0,0], &Weights[0,0,0], dims[2], dims[1], searchwindow, patchwindow, neighbours, edge_parameter)
+
+ return H_i, H_j, Weights