summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDaniil Kazantsev <dkazanc@hotmail.com>2018-02-16 14:09:52 +0000
committerEdoardo Pasca <edo.paskino@gmail.com>2018-02-20 16:18:46 +0000
commitdeac578f1919cba530d4dd0a92ea4975eb8ed43b (patch)
tree8e01f61286ee33a9c48d33a3fb362b6b7d4caf38
parentfe095e13aed4831c372a542ba83c409f6c71af87 (diff)
downloadregularization-deac578f1919cba530d4dd0a92ea4975eb8ed43b.tar.gz
regularization-deac578f1919cba530d4dd0a92ea4975eb8ed43b.tar.bz2
regularization-deac578f1919cba530d4dd0a92ea4975eb8ed43b.tar.xz
regularization-deac578f1919cba530d4dd0a92ea4975eb8ed43b.zip
ROF_TV modified and GPU code added, all cython related files updated, but not testes
-rw-r--r--Core/regularizers_CPU/ROF_TV_core.c249
-rw-r--r--Core/regularizers_CPU/ROF_TV_core.h32
-rw-r--r--Core/regularizers_CPU/utils.c29
-rw-r--r--Core/regularizers_CPU/utils.h1
-rwxr-xr-xCore/regularizers_GPU/TV_ROF/TV_ROF_GPU.cu370
-rwxr-xr-xCore/regularizers_GPU/TV_ROF/TV_ROF_GPU.h7
-rw-r--r--Wrappers/Python/src/cpu_regularizers.pyx57
-rw-r--r--Wrappers/Python/src/gpu_regularizers.pyx62
8 files changed, 638 insertions, 169 deletions
diff --git a/Core/regularizers_CPU/ROF_TV_core.c b/Core/regularizers_CPU/ROF_TV_core.c
index 0b24806..ba7fe48 100644
--- a/Core/regularizers_CPU/ROF_TV_core.c
+++ b/Core/regularizers_CPU/ROF_TV_core.c
@@ -19,7 +19,7 @@
#include "ROF_TV_core.h"
-#define EPS 0.000001
+#define EPS 1.0e-4
#define MAX(x, y) (((x) > (y)) ? (x) : (y))
#define MIN(x, y) (((x) < (y)) ? (x) : (y))
@@ -29,14 +29,15 @@ int sign(float x) {
/* C-OMP implementation of ROF-TV denoising/regularization model [1] (2D/3D case)
*
+ *
* Input Parameters:
* 1. Noisy image/volume [REQUIRED]
* 2. lambda - regularization parameter [REQUIRED]
- * 3. tau - marching step for explicit scheme, ~0.001 is recommended [REQUIRED]
+ * 3. tau - marching step for explicit scheme, ~1 is recommended [REQUIRED]
* 4. Number of iterations, for explicit scheme >= 150 is recommended [REQUIRED]
*
* Output:
- * [1] Regularized image/volume
+ * [1] Regularized image/volume
*
* This function is based on the paper by
* [1] Rudin, Osher, Fatemi, "Nonlinear Total Variation based noise removal algorithms"
@@ -44,57 +45,64 @@ int sign(float x) {
* D. Kazantsev, 2016-18
*/
+/* Running iterations of TV-ROF function */
+float TV_ROF(float *Input, float *Output, int dimX, int dimY, int dimZ, int iterationsNumb, float tau, float lambda)
+{
+ float *D1, *D2, *D3;
+ int i, DimTotal;
+ DimTotal = dimX*dimY*dimZ;
+
+
+ D1 = (float *) malloc( DimTotal * sizeof(float) );
+ D2 = (float *) malloc( DimTotal * sizeof(float) );
+ D3 = (float *) malloc( DimTotal * sizeof(float) );
+
+
+ /* copy into output */
+ copyIm(Input, Output, dimX, dimY, dimZ);
+
+ /* start TV iterations */
+ for(i=0; i < iterationsNumb; i++) {
+
+ /* calculate differences */
+ D1_func(Output, D1, dimX, dimY, dimZ);
+ D2_func(Output, D2, dimX, dimY, dimZ);
+ if (dimZ > 1) D3_func(Output, D3, dimX, dimY, dimZ);
+ TV_kernel(D1, D2, D3, Output, Input, lambda, tau, dimX, dimY, dimZ);
+ }
+ free(D1);free(D2); free(D3);
+ return *Output;
+}
+
/* calculate differences 1 */
-float D1_func(float *A, float *D1, int dimY, int dimX, int dimZ)
+float D1_func(float *A, float *D1, int dimX, int dimY, int dimZ)
{
float NOMx_1, NOMy_1, NOMy_0, NOMz_1, NOMz_0, denom1, denom2,denom3, T1;
- int i,j,k,i1,i2,k1,j1,j2,k2;
+ int i,j,k,i1,i2,k1,j1,j2,k2,index;
- if (dimZ == 0) {
-#pragma omp parallel for shared (A, D1, dimX, dimY) private(i, j, i1, j1, i2, j2,NOMx_1,NOMy_1,NOMy_0,denom1,denom2,T1)
- for(j=0; j<dimY; j++) {
- for(i=0; i<dimX; i++) {
- /* symmetric boundary conditions (Neuman) */
- i1 = i + 1; if (i1 >= dimY) i1 = i-1;
- i2 = i - 1; if (i2 < 0) i2 = i+1;
- j1 = j + 1; if (j1 >= dimX) j1 = j-1;
- j2 = j - 1; if (j2 < 0) j2 = j+1;
-
- /* Forward-backward differences */
- NOMx_1 = A[i1*dimY + j] - A[(i)*dimY + j]; /* x+ */
- NOMy_1 = A[i*dimY + j1] - A[(i)*dimY + j]; /* y+ */
- /*NOMx_0 = (A[(i)*dimY + j] - A[(i2)*dimY + j]); */ /* x- */
- NOMy_0 = A[(i)*dimY + j] - A[(i)*dimY + j2]; /* y- */
-
- denom1 = NOMx_1*NOMx_1;
- denom2 = 0.5*(sign(NOMy_1) + sign(NOMy_0))*(MIN(fabs(NOMy_1),fabs(NOMy_0)));
- denom2 = denom2*denom2;
- T1 = sqrt(denom1 + denom2 + EPS);
- D1[i*dimY+j] = NOMx_1/T1;
- }}
- }
- else {
-#pragma omp parallel for shared (A, D1, dimX, dimY, dimZ) private(i, j, k, i1, j1, k1, i2, j2, k2, NOMx_1,NOMy_1,NOMy_0,NOMz_1,NOMz_0,denom1,denom2,denom3,T1)
+ if (dimZ > 1) {
+#pragma omp parallel for shared (A, D1, dimX, dimY, dimZ) private(index, i, j, k, i1, j1, k1, i2, j2, k2, NOMx_1,NOMy_1,NOMy_0,NOMz_1,NOMz_0,denom1,denom2,denom3,T1)
for(j=0; j<dimY; j++) {
for(i=0; i<dimX; i++) {
for(k=0; k<dimZ; k++) {
+ index = (dimX*dimY)*k + j*dimX+i;
/* symmetric boundary conditions (Neuman) */
- i1 = i + 1; if (i1 >= dimY) i1 = i-1;
+ i1 = i + 1; if (i1 >= dimX) i1 = i-1;
i2 = i - 1; if (i2 < 0) i2 = i+1;
- j1 = j + 1; if (j1 >= dimX) j1 = j-1;
+ j1 = j + 1; if (j1 >= dimY) j1 = j-1;
j2 = j - 1; if (j2 < 0) j2 = j+1;
k1 = k + 1; if (k1 >= dimZ) k1 = k-1;
k2 = k - 1; if (k2 < 0) k2 = k+1;
/*B[(dimX*dimY)*k + i*dimY+j] = 0.25*(A[(dimX*dimY)*k + (i1)*dimY + j] + A[(dimX*dimY)*k + (i2)*dimY + j] + A[(dimX*dimY)*k + (i)*dimY + j1] + A[(dimX*dimY)*k + (i)*dimY + j2]) - A[(dimX*dimY)*k + i*dimY + j];*/
/* Forward-backward differences */
- NOMx_1 = A[(dimX*dimY)*k + (i1)*dimY + j] - A[(dimX*dimY)*k + (i)*dimY + j]; /* x+ */
- NOMy_1 = A[(dimX*dimY)*k + (i)*dimY + j1] - A[(dimX*dimY)*k + (i)*dimY + j]; /* y+ */
+ NOMx_1 = A[(dimX*dimY)*k + j1*dimX + i] - A[index]; /* x+ */
+ NOMy_1 = A[(dimX*dimY)*k + j*dimX + i1] - A[index]; /* y+ */
/*NOMx_0 = (A[(i)*dimY + j] - A[(i2)*dimY + j]); */ /* x- */
- NOMy_0 = A[(dimX*dimY)*k + (i)*dimY + j] - A[(dimX*dimY)*k + (i)*dimY + j2]; /* y- */
+ NOMy_0 = A[index] - A[(dimX*dimY)*k + j*dimX + i2]; /* y- */
- NOMz_1 = A[(dimX*dimY)*k1 + (i)*dimY + j] - A[(dimX*dimY)*k + (i)*dimY + j]; /* z+ */
- NOMz_0 = A[(dimX*dimY)*k + (i)*dimY + j] - A[(dimX*dimY)*k2 + (i)*dimY + j]; /* z- */
+ NOMz_1 = A[(dimX*dimY)*k1 + j*dimX + i] - A[index]; /* z+ */
+ NOMz_0 = A[index] - A[(dimX*dimY)*k2 + j*dimX + i]; /* z- */
denom1 = NOMx_1*NOMx_1;
@@ -103,60 +111,62 @@ float D1_func(float *A, float *D1, int dimY, int dimX, int dimZ)
denom3 = 0.5*(sign(NOMz_1) + sign(NOMz_0))*(MIN(fabs(NOMz_1),fabs(NOMz_0)));
denom3 = denom3*denom3;
T1 = sqrt(denom1 + denom2 + denom3 + EPS);
- D1[(dimX*dimY)*k + i*dimY+j] = NOMx_1/T1;
+ D1[index] = NOMx_1/T1;
}}}
}
- return *D1;
-}
-/* calculate differences 2 */
-float D2_func(float *A, float *D2, int dimY, int dimX, int dimZ)
-{
- float NOMx_1, NOMy_1, NOMx_0, NOMz_1, NOMz_0, denom1, denom2, denom3, T2;
- int i,j,k,i1,i2,k1,j1,j2,k2;
-
- if (dimZ == 0) {
-#pragma omp parallel for shared (A, D2, dimX, dimY) private(i, j, i1, j1, i2, j2, NOMx_1,NOMy_1,NOMx_0,denom1,denom2,T2)
+ else {
+#pragma omp parallel for shared (A, D1, dimX, dimY) private(i, j, i1, j1, i2, j2,NOMx_1,NOMy_1,NOMy_0,denom1,denom2,T1,index)
for(j=0; j<dimY; j++) {
for(i=0; i<dimX; i++) {
+ index = j*dimX+i;
/* symmetric boundary conditions (Neuman) */
- i1 = i + 1; if (i1 >= dimY) i1 = i-1;
+ i1 = i + 1; if (i1 >= dimX) i1 = i-1;
i2 = i - 1; if (i2 < 0) i2 = i+1;
- j1 = j + 1; if (j1 >= dimX) j1 = j-1;
+ j1 = j + 1; if (j1 >= dimY) j1 = j-1;
j2 = j - 1; if (j2 < 0) j2 = j+1;
/* Forward-backward differences */
- NOMx_1 = A[(i1)*dimY + j] - A[(i)*dimY + j]; /* x+ */
- NOMy_1 = A[i*dimY + j1] - A[(i)*dimY + j]; /* y+ */
- NOMx_0 = A[(i)*dimY + j] - A[(i2)*dimY + j]; /* x- */
- /*NOMy_0 = A[(i)*dimY + j] - A[(i)*dimY + j2]; */ /* y- */
+ NOMx_1 = A[j1*dimX + i] - A[index]; /* x+ */
+ NOMy_1 = A[j*dimX + i1] - A[index]; /* y+ */
+ /*NOMx_0 = (A[(i)*dimY + j] - A[(i2)*dimY + j]); */ /* x- */
+ NOMy_0 = A[index] - A[(j)*dimX + i2]; /* y- */
- denom1 = NOMy_1*NOMy_1;
- denom2 = 0.5*(sign(NOMx_1) + sign(NOMx_0))*(MIN(fabs(NOMx_1),fabs(NOMx_0)));
+ denom1 = NOMx_1*NOMx_1;
+ denom2 = 0.5*(sign(NOMy_1) + sign(NOMy_0))*(MIN(fabs(NOMy_1),fabs(NOMy_0)));
denom2 = denom2*denom2;
- T2 = sqrt(denom1 + denom2 + EPS);
- D2[i*dimY+j] = NOMy_1/T2;
+ T1 = sqrt(denom1 + denom2 + EPS);
+ D1[index] = NOMx_1/T1;
}}
}
- else {
-#pragma omp parallel for shared (A, D2, dimX, dimY, dimZ) private(i, j, k, i1, j1, k1, i2, j2, k2, NOMx_1, NOMy_1, NOMx_0, NOMz_1, NOMz_0, denom1, denom2, denom3, T2)
+ return *D1;
+}
+/* calculate differences 2 */
+float D2_func(float *A, float *D2, int dimX, int dimY, int dimZ)
+{
+ float NOMx_1, NOMy_1, NOMx_0, NOMz_1, NOMz_0, denom1, denom2, denom3, T2;
+ int i,j,k,i1,i2,k1,j1,j2,k2,index;
+
+ if (dimZ > 1) {
+#pragma omp parallel for shared (A, D2, dimX, dimY, dimZ) private(index, i, j, k, i1, j1, k1, i2, j2, k2, NOMx_1, NOMy_1, NOMx_0, NOMz_1, NOMz_0, denom1, denom2, denom3, T2)
for(j=0; j<dimY; j++) {
for(i=0; i<dimX; i++) {
for(k=0; k<dimZ; k++) {
+ index = (dimX*dimY)*k + j*dimX+i;
/* symmetric boundary conditions (Neuman) */
- i1 = i + 1; if (i1 >= dimY) i1 = i-1;
+ i1 = i + 1; if (i1 >= dimX) i1 = i-1;
i2 = i - 1; if (i2 < 0) i2 = i+1;
- j1 = j + 1; if (j1 >= dimX) j1 = j-1;
+ j1 = j + 1; if (j1 >= dimY) j1 = j-1;
j2 = j - 1; if (j2 < 0) j2 = j+1;
k1 = k + 1; if (k1 >= dimZ) k1 = k-1;
k2 = k - 1; if (k2 < 0) k2 = k+1;
/* Forward-backward differences */
- NOMx_1 = A[(dimX*dimY)*k + (i1)*dimY + j] - A[(dimX*dimY)*k + (i)*dimY + j]; /* x+ */
- NOMy_1 = A[(dimX*dimY)*k + (i)*dimY + j1] - A[(dimX*dimY)*k + (i)*dimY + j]; /* y+ */
- NOMx_0 = A[(dimX*dimY)*k + (i)*dimY + j] - A[(dimX*dimY)*k + (i2)*dimY + j]; /* x- */
- NOMz_1 = A[(dimX*dimY)*k1 + (i)*dimY + j] - A[(dimX*dimY)*k + (i)*dimY + j]; /* z+ */
- NOMz_0 = A[(dimX*dimY)*k + (i)*dimY + j] - A[(dimX*dimY)*k2 + (i)*dimY + j]; /* z- */
+ NOMx_1 = A[(dimX*dimY)*k + (j1)*dimX + i] - A[index]; /* x+ */
+ NOMy_1 = A[(dimX*dimY)*k + (j)*dimX + i1] - A[index]; /* y+ */
+ NOMx_0 = A[index] - A[(dimX*dimY)*k + (j2)*dimX + i]; /* x- */
+ NOMz_1 = A[(dimX*dimY)*k1 + j*dimX + i] - A[index]; /* z+ */
+ NOMz_0 = A[index] - A[(dimX*dimY)*k2 + (j)*dimX + i]; /* z- */
denom1 = NOMy_1*NOMy_1;
@@ -165,9 +175,33 @@ float D2_func(float *A, float *D2, int dimY, int dimX, int dimZ)
denom3 = 0.5*(sign(NOMz_1) + sign(NOMz_0))*(MIN(fabs(NOMz_1),fabs(NOMz_0)));
denom3 = denom3*denom3;
T2 = sqrt(denom1 + denom2 + denom3 + EPS);
- D2[(dimX*dimY)*k + i*dimY+j] = NOMy_1/T2;
+ D2[index] = NOMy_1/T2;
}}}
}
+ else {
+#pragma omp parallel for shared (A, D2, dimX, dimY) private(i, j, i1, j1, i2, j2, NOMx_1,NOMy_1,NOMx_0,denom1,denom2,T2,index)
+ for(j=0; j<dimY; j++) {
+ for(i=0; i<dimX; i++) {
+ index = j*dimX+i;
+ /* symmetric boundary conditions (Neuman) */
+ i1 = i + 1; if (i1 >= dimX) i1 = i-1;
+ i2 = i - 1; if (i2 < 0) i2 = i+1;
+ j1 = j + 1; if (j1 >= dimY) j1 = j-1;
+ j2 = j - 1; if (j2 < 0) j2 = j+1;
+
+ /* Forward-backward differences */
+ NOMx_1 = A[j1*dimX + i] - A[index]; /* x+ */
+ NOMy_1 = A[j*dimX + i1] - A[index]; /* y+ */
+ NOMx_0 = A[index] - A[j2*dimX + i]; /* x- */
+ /*NOMy_0 = A[(i)*dimY + j] - A[(i)*dimY + j2]; */ /* y- */
+
+ denom1 = NOMy_1*NOMy_1;
+ denom2 = 0.5*(sign(NOMx_1) + sign(NOMx_0))*(MIN(fabs(NOMx_1),fabs(NOMx_0)));
+ denom2 = denom2*denom2;
+ T2 = sqrt(denom1 + denom2 + EPS);
+ D2[index] = NOMy_1/T2;
+ }}
+ }
return *D2;
}
@@ -175,26 +209,27 @@ float D2_func(float *A, float *D2, int dimY, int dimX, int dimZ)
float D3_func(float *A, float *D3, int dimY, int dimX, int dimZ)
{
float NOMx_1, NOMy_1, NOMx_0, NOMy_0, NOMz_1, denom1, denom2, denom3, T3;
- int i,j,k,i1,i2,k1,j1,j2,k2;
+ int index,i,j,k,i1,i2,k1,j1,j2,k2;
-#pragma omp parallel for shared (A, D3, dimX, dimY, dimZ) private(i, j, k, i1, j1, k1, i2, j2, k2, NOMx_1, NOMy_1, NOMy_0, NOMx_0, NOMz_1, denom1, denom2, denom3, T3)
+#pragma omp parallel for shared (A, D3, dimX, dimY, dimZ) private(index, i, j, k, i1, j1, k1, i2, j2, k2, NOMx_1, NOMy_1, NOMy_0, NOMx_0, NOMz_1, denom1, denom2, denom3, T3)
for(j=0; j<dimY; j++) {
for(i=0; i<dimX; i++) {
for(k=0; k<dimZ; k++) {
+ index = (dimX*dimY)*k + j*dimX+i;
/* symmetric boundary conditions (Neuman) */
- i1 = i + 1; if (i1 >= dimY) i1 = i-1;
+ i1 = i + 1; if (i1 >= dimX) i1 = i-1;
i2 = i - 1; if (i2 < 0) i2 = i+1;
- j1 = j + 1; if (j1 >= dimX) j1 = j-1;
+ j1 = j + 1; if (j1 >= dimY) j1 = j-1;
j2 = j - 1; if (j2 < 0) j2 = j+1;
k1 = k + 1; if (k1 >= dimZ) k1 = k-1;
k2 = k - 1; if (k2 < 0) k2 = k+1;
/* Forward-backward differences */
- NOMx_1 = A[(dimX*dimY)*k + (i1)*dimY + j] - A[(dimX*dimY)*k + (i)*dimY + j]; /* x+ */
- NOMy_1 = A[(dimX*dimY)*k + (i)*dimY + j1] - A[(dimX*dimY)*k + (i)*dimY + j]; /* y+ */
- NOMy_0 = A[(dimX*dimY)*k + (i)*dimY + j] - A[(dimX*dimY)*k + (i)*dimY + j2]; /* y- */
- NOMx_0 = A[(dimX*dimY)*k + (i)*dimY + j] - A[(dimX*dimY)*k + (i2)*dimY + j]; /* x- */
- NOMz_1 = A[(dimX*dimY)*k1 + (i)*dimY + j] - A[(dimX*dimY)*k + (i)*dimY + j]; /* z+ */
+ NOMx_1 = A[(dimX*dimY)*k + (j1)*dimX + i] - A[index]; /* x+ */
+ NOMy_1 = A[(dimX*dimY)*k + (j)*dimX + i1] - A[index]; /* y+ */
+ NOMy_0 = A[index] - A[(dimX*dimY)*k + (j)*dimX + i2]; /* y- */
+ NOMx_0 = A[index] - A[(dimX*dimY)*k + (j2)*dimX + i]; /* x- */
+ NOMz_1 = A[(dimX*dimY)*k1 + j*dimX + i] - A[index]; /* z+ */
/*NOMz_0 = A[(dimX*dimY)*k + (i)*dimY + j] - A[(dimX*dimY)*k2 + (i)*dimY + j]; */ /* z- */
denom1 = NOMz_1*NOMz_1;
@@ -203,57 +238,57 @@ float D3_func(float *A, float *D3, int dimY, int dimX, int dimZ)
denom3 = 0.5*(sign(NOMy_1) + sign(NOMy_0))*(MIN(fabs(NOMy_1),fabs(NOMy_0)));
denom3 = denom3*denom3;
T3 = sqrt(denom1 + denom2 + denom3 + EPS);
- D3[(dimX*dimY)*k + i*dimY+j] = NOMz_1/T3;
+ D3[index] = NOMz_1/T3;
}}}
return *D3;
}
/* calculate divergence */
-float TV_main(float *D1, float *D2, float *D3, float *B, float *A, float lambda, float tau, int dimY, int dimX, int dimZ)
+float TV_kernel(float *D1, float *D2, float *D3, float *B, float *A, float lambda, float tau, int dimX, int dimY, int dimZ)
{
float dv1, dv2, dv3;
int index,i,j,k,i1,i2,k1,j1,j2,k2;
- if (dimZ == 0) {
-#pragma omp parallel for shared (D1, D2, B, dimX, dimY) private(index, i, j, i1, j1, i2, j2,dv1,dv2)
- for(j=0; j<dimY; j++) {
- for(i=0; i<dimX; i++) {
- index = (i)*dimY + j;
- /* symmetric boundary conditions (Neuman) */
- i1 = i + 1; if (i1 >= dimY) i1 = i-1;
- i2 = i - 1; if (i2 < 0) i2 = i+1;
- j1 = j + 1; if (j1 >= dimX) j1 = j-1;
- j2 = j - 1; if (j2 < 0) j2 = j+1;
-
- /* divergence components */
- dv1 = D1[index] - D1[(i2)*dimY + j];
- dv2 = D2[index] - D2[(i)*dimY + j2];
-
- B[index] = B[index] + tau*lambda*(dv1 + dv2) + tau*(A[index] - B[index]);
-
- }}
- }
- else {
+ if (dimZ > 1) {
#pragma omp parallel for shared (D1, D2, D3, B, dimX, dimY, dimZ) private(index, i, j, k, i1, j1, k1, i2, j2, k2, dv1,dv2,dv3)
for(j=0; j<dimY; j++) {
for(i=0; i<dimX; i++) {
for(k=0; k<dimZ; k++) {
- index = (dimX*dimY)*k + i*dimY+j;
+ index = (dimX*dimY)*k + j*dimX+i;
/* symmetric boundary conditions (Neuman) */
- i1 = i + 1; if (i1 >= dimY) i1 = i-1;
+ i1 = i + 1; if (i1 >= dimX) i1 = i-1;
i2 = i - 1; if (i2 < 0) i2 = i+1;
- j1 = j + 1; if (j1 >= dimX) j1 = j-1;
+ j1 = j + 1; if (j1 >= dimY) j1 = j-1;
j2 = j - 1; if (j2 < 0) j2 = j+1;
k1 = k + 1; if (k1 >= dimZ) k1 = k-1;
k2 = k - 1; if (k2 < 0) k2 = k+1;
/*divergence components */
- dv1 = D1[index] - D1[(dimX*dimY)*k + i2*dimY+j];
- dv2 = D2[index] - D2[(dimX*dimY)*k + i*dimY+j2];
- dv3 = D3[index] - D3[(dimX*dimY)*k2 + i*dimY+j];
+ dv1 = D1[index] - D1[(dimX*dimY)*k + j2*dimX+i];
+ dv2 = D2[index] - D2[(dimX*dimY)*k + j*dimX+i2];
+ dv3 = D3[index] - D3[(dimX*dimY)*k2 + j*dimX+i];
B[index] = B[index] + tau*lambda*(dv1 + dv2 + dv3) + tau*(A[index] - B[index]);
- }}}
+ }}}
+ }
+ else {
+#pragma omp parallel for shared (D1, D2, B, dimX, dimY) private(index, i, j, i1, j1, i2, j2,dv1,dv2)
+ for(j=0; j<dimY; j++) {
+ for(i=0; i<dimX; i++) {
+ index = j*dimX+i;
+ /* symmetric boundary conditions (Neuman) */
+ i1 = i + 1; if (i1 >= dimX) i1 = i-1;
+ i2 = i - 1; if (i2 < 0) i2 = i+1;
+ j1 = j + 1; if (j1 >= dimY) j1 = j-1;
+ j2 = j - 1; if (j2 < 0) j2 = j+1;
+
+ /* divergence components */
+ dv1 = D1[index] - D1[j2*dimX + i];
+ dv2 = D2[index] - D2[j*dimX + i2];
+
+ //B[index] = B[index] + tau*lambda*(dv1 + dv2) + tau*(A[index] - B[index]);
+ B[index] = B[index] + tau*((dv1 + dv2) - lambda*(B[index] - A[index]));
+ }}
}
return *B;
-} \ No newline at end of file
+}
diff --git a/Core/regularizers_CPU/ROF_TV_core.h b/Core/regularizers_CPU/ROF_TV_core.h
index 6e4f961..5d69d27 100644
--- a/Core/regularizers_CPU/ROF_TV_core.h
+++ b/Core/regularizers_CPU/ROF_TV_core.h
@@ -26,29 +26,31 @@ limitations under the License.
#include "CCPiDefines.h"
/* C-OMP implementation of ROF-TV denoising/regularization model [1] (2D/3D case)
-*
-* Input Parameters:
+ *
+ *
+ * Input Parameters:
* 1. Noisy image/volume [REQUIRED]
* 2. lambda - regularization parameter [REQUIRED]
- * 3. tau - marching step for explicit scheme, ~0.001 is recommended [REQUIRED]
- * 4. Number of iterations, for explicit scheme >= 150 is recommended [REQUIRED]
-*
-* Output:
-* [1] Regularized image/volume
-
+ * 3. tau - marching step for explicit scheme, ~1 is recommended [REQUIRED]
+ * 4. Number of iterations, for explicit scheme >= 150 is recommended [REQUIRED]
+ *
+ * Output:
+ * [1] Regularized image/volume
+ *
* This function is based on the paper by
-* [1] Rudin, Osher, Fatemi, "Nonlinear Total Variation based noise removal algorithms"
-*
-* D. Kazantsev, 2016-18
-*/
+ * [1] Rudin, Osher, Fatemi, "Nonlinear Total Variation based noise removal algorithms"
+ *
+ * D. Kazantsev, 2016-18
+ */
+
#ifdef __cplusplus
extern "C" {
#endif
-//float copyIm(float *A, float *B, int dimX, int dimY, int dimZ);
-CCPI_EXPORT float TV_main(float *D1, float *D2, float *D3, float *B, float *A, float lambda, float tau, int dimY, int dimX, int dimZ);
+CCPI_EXPORT float TV_kernel(float *D1, float *D2, float *D3, float *B, float *A, float lambda, float tau, int dimY, int dimX, int dimZ);
+CCPI_EXPORT float TV_ROF(float *Input, float *Output, int dimX, int dimY, int dimZ, int iterationsNumb, float tau, float lambda);
CCPI_EXPORT float D1_func(float *A, float *D1, int dimY, int dimX, int dimZ);
CCPI_EXPORT float D2_func(float *A, float *D2, int dimY, int dimX, int dimZ);
CCPI_EXPORT float D3_func(float *A, float *D3, int dimY, int dimX, int dimZ);
#ifdef __cplusplus
}
-#endif \ No newline at end of file
+#endif
diff --git a/Core/regularizers_CPU/utils.c b/Core/regularizers_CPU/utils.c
index 0e83d2c..951fb91 100644
--- a/Core/regularizers_CPU/utils.c
+++ b/Core/regularizers_CPU/utils.c
@@ -26,4 +26,31 @@ float copyIm(float *A, float *U, int dimX, int dimY, int dimZ)
#pragma omp parallel for shared(A, U) private(j)
for (j = 0; j<dimX*dimY*dimZ; j++) U[j] = A[j];
return *U;
-} \ No newline at end of file
+}
+
+/* function that calculates TV energy (ROF model)
+ * min||\nabla u|| + 0.5*lambda*||u -u0||^2
+ * */
+float TV_energy2D(float *U, float *U0, float *E_val, float lambda, int dimX, int dimY)
+{
+ int i, j, i1, j1, index;
+ float NOMx_2, NOMy_2, E_Grad, E_Data;
+
+ /* first calculate \grad U_xy*/
+ for(j=0; j<dimY; j++) {
+ for(i=0; i<dimX; i++) {
+ index = j*dimX+i;
+ /* boundary conditions */
+ i1 = i + 1; if (i == dimX-1) i1 = i;
+ j1 = j + 1; if (j == dimY-1) j1 = j;
+
+ /* Forward differences */
+ NOMx_2 = pow(U[j1*dimX + i] - U[index],2); /* x+ */
+ NOMy_2 = pow(U[j*dimX + i1] - U[index],2); /* y+ */
+ E_Grad += sqrt(NOMx_2 + NOMy_2); /* gradient term energy */
+ E_Data += 0.5f * lambda*(pow((U[index]-U0[index]),2)); /* fidelity term energy */
+ }
+ }
+ E_val[0] = E_Grad + E_Data;
+ return *E_val;
+}
diff --git a/Core/regularizers_CPU/utils.h b/Core/regularizers_CPU/utils.h
index 91b8209..bd76bf0 100644
--- a/Core/regularizers_CPU/utils.h
+++ b/Core/regularizers_CPU/utils.h
@@ -28,6 +28,7 @@ limitations under the License.
extern "C" {
#endif
CCPI_EXPORT float copyIm(float *A, float *U, int dimX, int dimY, int dimZ);
+CCPI_EXPORT float TV_energy2D(float *U, float *U0, float *E_val, float lambda, int dimX, int dimY);
#ifdef __cplusplus
}
#endif
diff --git a/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU.cu b/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU.cu
new file mode 100755
index 0000000..633bf63
--- /dev/null
+++ b/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU.cu
@@ -0,0 +1,370 @@
+ /*
+This work is part of the Core Imaging Library developed by
+Visual Analytics and Imaging System Group of the Science Technology
+Facilities Council, STFC
+
+Copyright 2017 Daniil Kazantsev
+Copyright 2017 Srikanth Nagella, Edoardo Pasca
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#include "TV_ROF_GPU.h"
+
+/* C-OMP implementation of ROF-TV denoising/regularization model [1] (2D/3D case)
+*
+* Input Parameters:
+* 1. Noisy image/volume [REQUIRED]
+* 2. lambda - regularization parameter [REQUIRED]
+* 3. tau - marching step for explicit scheme, ~0.1 is recommended [REQUIRED]
+* 4. Number of iterations, for explicit scheme >= 150 is recommended [REQUIRED]
+*
+* Output:
+* [1] Regularized image/volume
+
+ * This function is based on the paper by
+* [1] Rudin, Osher, Fatemi, "Nonlinear Total Variation based noise removal algorithms"
+*
+* D. Kazantsev, 2016-18
+*/
+
+#define CHECK(call) \
+{ \
+ const cudaError_t error = call; \
+ if (error != cudaSuccess) \
+ { \
+ fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__); \
+ fprintf(stderr, "code: %d, reason: %s\n", error, \
+ cudaGetErrorString(error)); \
+ exit(1); \
+ } \
+}
+
+#define BLKXSIZE 8
+#define BLKYSIZE 8
+#define BLKZSIZE 8
+
+#define BLKXSIZE2D 16
+#define BLKYSIZE2D 16
+#define EPS 1.0e-4
+
+#define idivup(a, b) ( ((a)%(b) != 0) ? (a)/(b)+1 : (a)/(b) )
+
+#define MAX(x, y) (((x) > (y)) ? (x) : (y))
+#define MIN(x, y) (((x) < (y)) ? (x) : (y))
+
+__host__ __device__ int sign (float x)
+{
+ return (x > 0) - (x < 0);
+}
+
+/*********************2D case****************************/
+
+ /* differences 1 */
+ __global__ void D1_func2D(float* Input, float* D1, int N, int M)
+ {
+ int i1, j1, i2, j2;
+ float NOMx_1,NOMy_1,NOMy_0,denom1,denom2,T1;
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+
+ int index = i + N*j;
+
+ if ((i >= 0) && (i < N) && (j >= 0) && (j < M)) {
+
+ /* boundary conditions (Neumann reflections) */
+ i1 = i + 1; if (i1 >= N) i1 = i-1;
+ i2 = i - 1; if (i2 < 0) i2 = i+1;
+ j1 = j + 1; if (j1 >= M) j1 = j-1;
+ j2 = j - 1; if (j2 < 0) j2 = j+1;
+
+ /* Forward-backward differences */
+ NOMx_1 = Input[j1*N + i] - Input[index]; /* x+ */
+ NOMy_1 = Input[j*N + i1] - Input[index]; /* y+ */
+ NOMy_0 = Input[index] - Input[j*N + i2]; /* y- */
+
+ denom1 = NOMx_1*NOMx_1;
+ denom2 = 0.5f*(sign((float)NOMy_1) + sign((float)NOMy_0))*(MIN(abs((float)NOMy_1),abs((float)NOMy_0)));
+ denom2 = denom2*denom2;
+ T1 = sqrt(denom1 + denom2 + EPS);
+ D1[index] = NOMx_1/T1;
+ }
+ }
+
+ /* differences 2 */
+ __global__ void D2_func2D(float* Input, float* D2, int N, int M)
+ {
+ int i1, j1, i2, j2;
+ float NOMx_1,NOMy_1,NOMx_0,denom1,denom2,T2;
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+
+ int index = i + N*j;
+
+ if ((i >= 0) && (i < (N)) && (j >= 0) && (j < (M))) {
+
+ /* boundary conditions (Neumann reflections) */
+ i1 = i + 1; if (i1 >= N) i1 = i-1;
+ i2 = i - 1; if (i2 < 0) i2 = i+1;
+ j1 = j + 1; if (j1 >= M) j1 = j-1;
+ j2 = j - 1; if (j2 < 0) j2 = j+1;
+
+ /* Forward-backward differences */
+ NOMx_1 = Input[j1*N + i] - Input[index]; /* x+ */
+ NOMy_1 = Input[j*N + i1] - Input[index]; /* y+ */
+ NOMx_0 = Input[index] - Input[j2*N + i]; /* x- */
+
+ denom1 = NOMy_1*NOMy_1;
+ denom2 = 0.5f*(sign((float)NOMx_1) + sign((float)NOMx_0))*(MIN(abs((float)NOMx_1),abs((float)NOMx_0)));
+ denom2 = denom2*denom2;
+ T2 = sqrt(denom1 + denom2 + EPS);
+ D2[index] = NOMy_1/T2;
+ }
+ }
+
+ __global__ void TV_kernel2D(float *D1, float *D2, float *Update, float *Input, float lambda, float tau, int N, int M)
+ {
+ int i2, j2;
+ float dv1,dv2;
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+
+ int index = i + N*j;
+
+ if ((i >= 0) && (i < (N)) && (j >= 0) && (j < (M))) {
+
+ /* boundary conditions (Neumann reflections) */
+ i2 = i - 1; if (i2 < 0) i2 = i+1;
+ j2 = j - 1; if (j2 < 0) j2 = j+1;
+
+ /* divergence components */
+ dv1 = D1[index] - D1[j2*N + i];
+ dv2 = D2[index] - D2[j*N + i2];
+
+ Update[index] = Update[index] + tau*((dv1 + dv2) - lambda*(Update[index] - Input[index]));
+
+ }
+ }
+/*********************3D case****************************/
+
+ /* differences 1 */
+ __global__ void D1_func3D(float* Input, float* D1, int dimX, int dimY, int dimZ)
+ {
+ float NOMx_1, NOMy_1, NOMy_0, NOMz_1, NOMz_0, denom1, denom2,denom3, T1;
+ int i1,i2,k1,j1,j2,k2;
+
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+ int k = blockDim.z * blockIdx.z + threadIdx.z;
+
+ int index = (dimX*dimY)*k + j*dimX+i;
+
+ if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) {
+
+ /* symmetric boundary conditions (Neuman) */
+ i1 = i + 1; if (i1 >= dimX) i1 = i-1;
+ i2 = i - 1; if (i2 < 0) i2 = i+1;
+ j1 = j + 1; if (j1 >= dimY) j1 = j-1;
+ j2 = j - 1; if (j2 < 0) j2 = j+1;
+ k1 = k + 1; if (k1 >= dimZ) k1 = k-1;
+ k2 = k - 1; if (k2 < 0) k2 = k+1;
+
+ /* Forward-backward differences */
+ NOMx_1 = Input[(dimX*dimY)*k + j1*dimX + i] - Input[index]; /* x+ */
+ NOMy_1 = Input[(dimX*dimY)*k + j*dimX + i1] - Input[index]; /* y+ */
+ NOMy_0 = Input[index] - Input[(dimX*dimY)*k + j*dimX + i2]; /* y- */
+
+ NOMz_1 = Input[(dimX*dimY)*k1 + j*dimX + i] - Input[index]; /* z+ */
+ NOMz_0 = Input[index] - Input[(dimX*dimY)*k2 + j*dimX + i]; /* z- */
+
+
+ denom1 = NOMx_1*NOMx_1;
+ denom2 = 0.5*(sign(NOMy_1) + sign(NOMy_0))*(MIN(abs(NOMy_1),abs(NOMy_0)));
+ denom2 = denom2*denom2;
+ denom3 = 0.5*(sign(NOMz_1) + sign(NOMz_0))*(MIN(abs(NOMz_1),abs(NOMz_0)));
+ denom3 = denom3*denom3;
+ T1 = sqrt(denom1 + denom2 + denom3 + EPS);
+ D1[index] = NOMx_1/T1;
+ }
+ }
+
+ /* differences 2 */
+ __global__ void D2_func3D(float* Input, float* D2, int dimX, int dimY, int dimZ)
+ {
+ float NOMx_1, NOMy_1, NOMx_0, NOMz_1, NOMz_0, denom1, denom2, denom3, T2;
+ int i1,i2,k1,j1,j2,k2;
+
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+ int k = blockDim.z * blockIdx.z + threadIdx.z;
+
+ int index = (dimX*dimY)*k + j*dimX+i;
+
+ if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) {
+ /* symmetric boundary conditions (Neuman) */
+ i1 = i + 1; if (i1 >= dimX) i1 = i-1;
+ i2 = i - 1; if (i2 < 0) i2 = i+1;
+ j1 = j + 1; if (j1 >= dimY) j1 = j-1;
+ j2 = j - 1; if (j2 < 0) j2 = j+1;
+ k1 = k + 1; if (k1 >= dimZ) k1 = k-1;
+ k2 = k - 1; if (k2 < 0) k2 = k+1;
+
+
+ /* Forward-backward differences */
+ NOMx_1 = Input[(dimX*dimY)*k + (j1)*dimX + i] - Input[index]; /* x+ */
+ NOMy_1 = Input[(dimX*dimY)*k + (j)*dimX + i1] - Input[index]; /* y+ */
+ NOMx_0 = Input[index] - Input[(dimX*dimY)*k + (j2)*dimX + i]; /* x- */
+ NOMz_1 = Input[(dimX*dimY)*k1 + j*dimX + i] - Input[index]; /* z+ */
+ NOMz_0 = Input[index] - Input[(dimX*dimY)*k2 + (j)*dimX + i]; /* z- */
+
+
+ denom1 = NOMy_1*NOMy_1;
+ denom2 = 0.5*(sign(NOMx_1) + sign(NOMx_0))*(MIN(abs(NOMx_1),abs(NOMx_0)));
+ denom2 = denom2*denom2;
+ denom3 = 0.5*(sign(NOMz_1) + sign(NOMz_0))*(MIN(abs(NOMz_1),abs(NOMz_0)));
+ denom3 = denom3*denom3;
+ T2 = sqrt(denom1 + denom2 + denom3 + EPS);
+ D2[index] = NOMy_1/T2;
+ }
+ }
+
+ /* differences 3 */
+ __global__ void D3_func3D(float* Input, float* D3, int dimX, int dimY, int dimZ)
+ {
+ float NOMx_1, NOMy_1, NOMx_0, NOMy_0, NOMz_1, denom1, denom2, denom3, T3;
+ int i1,i2,k1,j1,j2,k2;
+
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+ int k = blockDim.z * blockIdx.z + threadIdx.z;
+
+ int index = (dimX*dimY)*k + j*dimX+i;
+
+ if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) {
+
+ i1 = i + 1; if (i1 >= dimX) i1 = i-1;
+ i2 = i - 1; if (i2 < 0) i2 = i+1;
+ j1 = j + 1; if (j1 >= dimY) j1 = j-1;
+ j2 = j - 1; if (j2 < 0) j2 = j+1;
+ k1 = k + 1; if (k1 >= dimZ) k1 = k-1;
+ k2 = k - 1; if (k2 < 0) k2 = k+1;
+
+ /* Forward-backward differences */
+ NOMx_1 = Input[(dimX*dimY)*k + (j1)*dimX + i] - Input[index]; /* x+ */
+ NOMy_1 = Input[(dimX*dimY)*k + (j)*dimX + i1] - Input[index]; /* y+ */
+ NOMy_0 = Input[index] - Input[(dimX*dimY)*k + (j)*dimX + i2]; /* y- */
+ NOMx_0 = Input[index] - Input[(dimX*dimY)*k + (j2)*dimX + i]; /* x- */
+ NOMz_1 = Input[(dimX*dimY)*k1 + j*dimX + i] - Input[index]; /* z+ */
+
+ denom1 = NOMz_1*NOMz_1;
+ denom2 = 0.5*(sign(NOMx_1) + sign(NOMx_0))*(MIN(abs(NOMx_1),abs(NOMx_0)));
+ denom2 = denom2*denom2;
+ denom3 = 0.5*(sign(NOMy_1) + sign(NOMy_0))*(MIN(abs(NOMy_1),abs(NOMy_0)));
+ denom3 = denom3*denom3;
+ T3 = sqrt(denom1 + denom2 + denom3 + EPS);
+ D3[index] = NOMz_1/T3;
+ }
+ }
+
+ __global__ void TV_kernel3D(float *D1, float *D2, float *D3, float *Update, float *Input, float lambda, float tau, int dimX, int dimY, int dimZ)
+ {
+ float dv1, dv2, dv3;
+ int i1,i2,k1,j1,j2,k2;
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+ int k = blockDim.z * blockIdx.z + threadIdx.z;
+
+ int index = (dimX*dimY)*k + j*dimX+i;
+
+ if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) {
+
+ /* symmetric boundary conditions (Neuman) */
+ i1 = i + 1; if (i1 >= dimX) i1 = i-1;
+ i2 = i - 1; if (i2 < 0) i2 = i+1;
+ j1 = j + 1; if (j1 >= dimY) j1 = j-1;
+ j2 = j - 1; if (j2 < 0) j2 = j+1;
+ k1 = k + 1; if (k1 >= dimZ) k1 = k-1;
+ k2 = k - 1; if (k2 < 0) k2 = k+1;
+
+ /*divergence components */
+ dv1 = D1[index] - D1[(dimX*dimY)*k + j2*dimX+i];
+ dv2 = D2[index] - D2[(dimX*dimY)*k + j*dimX+i2];
+ dv3 = D3[index] - D3[(dimX*dimY)*k2 + j*dimX+i];
+
+ Update[index] = Update[index] + tau*lambda*(dv1 + dv2 + dv3) + tau*(Update[index] - Input[index]);
+
+ }
+ }
+
+/////////////////////////////////////////////////
+// HOST FUNCTION
+extern "C" void TV_ROF_GPU(float* Input, float* Output, int N, int M, int Z, int iter, float tau, float lambda)
+{
+ // set up device
+ int dev = 0;
+ CHECK(cudaSetDevice(dev));
+
+ float *d_input, *d_update, *d_D1, *d_D2;
+
+ CHECK(cudaMalloc((void**)&d_input,N*M*Z*sizeof(float)));
+ CHECK(cudaMalloc((void**)&d_update,N*M*Z*sizeof(float)));
+ CHECK(cudaMalloc((void**)&d_D1,N*M*Z*sizeof(float)));
+ CHECK(cudaMalloc((void**)&d_D2,N*M*Z*sizeof(float)));
+
+ CHECK(cudaMemcpy(d_input,Input,N*M*Z*sizeof(float),cudaMemcpyHostToDevice));
+ CHECK(cudaMemcpy(d_update,Input,N*M*Z*sizeof(float),cudaMemcpyHostToDevice));
+
+ if (Z > 1) {
+ // TV - 3D case
+ dim3 dimBlock(BLKXSIZE,BLKYSIZE,BLKZSIZE);
+ dim3 dimGrid(idivup(N,BLKXSIZE), idivup(M,BLKYSIZE),idivup(Z,BLKXSIZE));
+
+ float *d_D3;
+ CHECK(cudaMalloc((void**)&d_D3,N*M*Z*sizeof(float)));
+
+ for(int n=0; n < iter; n++) {
+ /* calculate differences */
+ D1_func3D<<<dimGrid,dimBlock>>>(d_update, d_D1, N, M, Z);
+ CHECK(cudaDeviceSynchronize());
+ D2_func3D<<<dimGrid,dimBlock>>>(d_update, d_D2, N, M, Z);
+ CHECK(cudaDeviceSynchronize());
+ D3_func3D<<<dimGrid,dimBlock>>>(d_update, d_D3, N, M, Z);
+ CHECK(cudaDeviceSynchronize());
+ /*running main kernel*/
+ TV_kernel3D<<<dimGrid,dimBlock>>>(d_D1, d_D2, d_D3, d_update, d_input, lambda, tau, N, M, Z);
+ CHECK(cudaDeviceSynchronize());
+ }
+
+ CHECK(cudaFree(d_D3));
+ }
+ else {
+ // TV - 2D case
+ dim3 dimBlock(BLKXSIZE2D,BLKYSIZE2D);
+ dim3 dimGrid(idivup(N,BLKXSIZE2D), idivup(M,BLKYSIZE2D));
+
+ for(int n=0; n < iter; n++) {
+ /* calculate differences */
+ D1_func2D<<<dimGrid,dimBlock>>>(d_update, d_D1, N, M);
+ CHECK(cudaDeviceSynchronize());
+ D2_func2D<<<dimGrid,dimBlock>>>(d_update, d_D2, N, M);
+ CHECK(cudaDeviceSynchronize());
+ /*running main kernel*/
+ TV_kernel2D<<<dimGrid,dimBlock>>>(d_D1, d_D2, d_update, d_input, lambda, tau, N, M);
+ CHECK(cudaDeviceSynchronize());
+ }
+ }
+ CHECK(cudaMemcpy(Output,d_update,N*M*Z*sizeof(float),cudaMemcpyDeviceToHost));
+ CHECK(cudaFree(d_input));
+ CHECK(cudaFree(d_update));
+ CHECK(cudaFree(d_D1));
+ CHECK(cudaFree(d_D2));
+ cudaDeviceReset();
+}
diff --git a/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU.h b/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU.h
new file mode 100755
index 0000000..3163482
--- /dev/null
+++ b/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU.h
@@ -0,0 +1,7 @@
+#ifndef __TVGPU_H__
+#define __TVGPU_H__
+#include "CCPiDefines.h"
+
+extern "C" CCPI_EXPORT void TV_ROF_GPU(float* Input, float* Output, int N, int M, int Z, int iter, float tau, float lambda);
+
+#endif
diff --git a/Wrappers/Python/src/cpu_regularizers.pyx b/Wrappers/Python/src/cpu_regularizers.pyx
index b22e603..e306ab3 100644
--- a/Wrappers/Python/src/cpu_regularizers.pyx
+++ b/Wrappers/Python/src/cpu_regularizers.pyx
@@ -18,12 +18,7 @@ import cython
import numpy as np
cimport numpy as np
-cdef extern float TV_main(float *D1, float *D2, float *D3, float *B, float *A,
- float lambda, float tau, int dimY, int dimX, int dimZ);
-cdef extern float D1_func(float *A, float *D1, int dimY, int dimX, int dimZ);
-cdef extern float D2_func(float *A, float *D2, int dimY, int dimX, int dimZ);
-cdef extern float D3_func(float *A, float *D3, int dimY, int dimX, int dimZ);
-cdef extern void copyIm (float *A, float *U, int dimX, int dimY, int dimZ);
+cdef extern float TV_ROF(float *A, float *B, int dimX, int dimY, int dimZ, int iterationsNumb, float tau, float lambda);
def ROF_TV(inputData, iterations, regularization_parameter, marching_step_parameter):
@@ -39,30 +34,17 @@ def ROF_TV_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData,
float regularization_parameter
float marching_step_parameter
):
+
cdef long dims[2]
dims[0] = inputData.shape[0]
dims[1] = inputData.shape[1]
cdef np.ndarray[np.float32_t, ndim=2, mode="c"] B = \
np.zeros([dims[0],dims[1]], dtype='float32')
- cdef np.ndarray[np.float32_t, ndim=2, mode="c"] D1 = \
- np.zeros([dims[0],dims[1]], dtype='float32')
- cdef np.ndarray[np.float32_t, ndim=2, mode="c"] D2 = \
- np.zeros([dims[0],dims[1]], dtype='float32')
-
- copyIm(&inputData[0,0], &B[0,0], dims[0], dims[1], 1);
- #/* start TV iterations */
- cdef int i = 0;
- for i in range(iterations):
-
- #/* calculate differences */
- D1_func(&B[0,0], &D1[0,0], dims[0], dims[1], 1);
- D2_func(&B[0,0], &D2[0,0], dims[0], dims[1], 1);
-
- #/* calculate divergence and image update*/
- TV_main(&D1[0,0], &D2[0,0], &D2[0,0], &B[0,0], &A[0,0],
- regularization_parameter, marching_step_parameter,
- dims[0], dims[1], 1)
+
+ #/* Run ROF iterations for 2D data */
+ TV_ROF(&A[0,0], &B[0,0], dims[0], dims[1], 1, iterations, marching_step_parameter, regularization_parameter)
+
return B
@@ -78,26 +60,9 @@ def ROF_TV_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData,
cdef np.ndarray[np.float32_t, ndim=3, mode="c"] B = \
np.zeros([dims[0],dims[1],dims[2]], dtype='float32')
- cdef np.ndarray[np.float32_t, ndim=3, mode="c"] D1 = \
- np.zeros([dims[0],dims[1],dims[2]], dtype='float32')
- cdef np.ndarray[np.float32_t, ndim=3, mode="c"] D2 = \
- np.zeros([dims[0],dims[1],dims[2]], dtype='float32')
- cdef np.ndarray[np.float32_t, ndim=3, mode="c"] D3 = \
- np.zeros([dims[0],dims[1],dims[2]], dtype='float32')
-
- copyIm(&inputData[0,0,0], &B[0,0,0], dims[0], dims[1], dims[2]);
- #/* start TV iterations */
- cdef int i = 0;
- for i in range(iterations):
-
- #/* calculate differences */
- D1_func(&B[0,0,0], &D1[0,0,0], dims[0], dims[1], dims[2]);
- D2_func(&B[0,0,0], &D2[0,0,0], dims[0], dims[1], dims[2]);
- D3_func(&B[0,0,0], &D3[0,0,0], dims[0], dims[1], dims[2]);
-
- #/* calculate divergence and image update*/
- TV_main(&D1[0,0,0], &D2[0,0,0], &D3[0,0,0], &B[0,0,0], &A[0,0,0],
- regularization_parameter, marching_step_parameter,
- dims[0], dims[1], dims[2])
+
+ #/* Run ROF iterations for 3D data */
+ TV_ROF(&A[0,0, 0], &B[0,0, 0], dims[0], dims[1], dims[2], iterations, marching_step_parameter, regularization_parameter)
+
return B
- \ No newline at end of file
+
diff --git a/Wrappers/Python/src/gpu_regularizers.pyx b/Wrappers/Python/src/gpu_regularizers.pyx
index 84fc30a..649015e 100644
--- a/Wrappers/Python/src/gpu_regularizers.pyx
+++ b/Wrappers/Python/src/gpu_regularizers.pyx
@@ -25,6 +25,7 @@ cdef extern void NLM_GPU_kernel(float *A, float* B, float *Eucl_Vec,
int N, int M, int Z, int dimension,
int SearchW, int SimilW,
int SearchW_real, float denh2, float lambdaf);
+cdef extern void TV_ROF_GPU(float* A, float* B, int N, int M, int Z, int iter, float tau, float lambdaf);
cdef extern float pad_crop(float *A, float *Ap,
int OldSizeX, int OldSizeY, int OldSizeZ,
int NewSizeX, int NewSizeY, int NewSizeZ,
@@ -65,6 +66,22 @@ def NML(inputData,
SimilW,
h,
lambdaf)
+
+def ROF_TV(inputData,
+ iterations,
+ time_marching_parameter,
+ regularization_parameter):
+ if inputData.ndim == 2:
+ return ROFTV2D(inputData,
+ iterations,
+ time_marching_parameter,
+ regularization_parameter)
+ elif inputData.ndim == 3:
+ return ROFTV3D(inputData,
+ iterations,
+ time_marching_parameter,
+ regularization_parameter)
+
def Diff4thHajiaboli2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData,
float edge_preserv_parameter,
@@ -311,4 +328,49 @@ def NML3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData,
padXY,
switchpad_crop)
+ return B
+
+def ROFTV2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData,
+ int iterations,
+ float time_marching_parameter,
+ float regularization_parameter):
+
+ cdef long dims[2]
+ dims[0] = inputData.shape[0]
+ dims[1] = inputData.shape[1]
+
+ cdef np.ndarray[np.float32_t, ndim=2, mode="c"] B = \
+ np.zeros([dims[0],dims[1]], dtype='float32')
+
+ # Running CUDA code here
+ TV_ROF_GPU(
+ &inputData[0,0], &B[0,0],
+ dims[0], dims[1], 0,
+ iterations ,
+ time_marching_parameter,
+ regularization_parameter);
+
+ return B
+
+def ROFTV3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData,
+ int iterations,
+ float time_marching_parameter,
+ float regularization_parameter):
+
+ cdef long dims[3]
+ dims[0] = inputData.shape[0]
+ dims[1] = inputData.shape[1]
+ dims[2] = inputData.shape[2]
+
+ cdef np.ndarray[np.float32_t, ndim=3, mode="c"] B = \
+ np.zeros([dims[0],dims[1],dims[2]], dtype='float32')
+
+ # Running CUDA code here
+ TV_ROF_GPU(
+ &inputData[0,0,0], &B[0,0,0],
+ dims[0], dims[1], dims[2],
+ iterations ,
+ time_marching_parameter,
+ regularization_parameter);
+
return B