diff options
author | Daniil Kazantsev <dkazanc@hotmail.com> | 2018-02-16 14:09:52 +0000 |
---|---|---|
committer | Edoardo Pasca <edo.paskino@gmail.com> | 2018-02-20 16:18:46 +0000 |
commit | deac578f1919cba530d4dd0a92ea4975eb8ed43b (patch) | |
tree | 8e01f61286ee33a9c48d33a3fb362b6b7d4caf38 | |
parent | fe095e13aed4831c372a542ba83c409f6c71af87 (diff) | |
download | regularization-deac578f1919cba530d4dd0a92ea4975eb8ed43b.tar.gz regularization-deac578f1919cba530d4dd0a92ea4975eb8ed43b.tar.bz2 regularization-deac578f1919cba530d4dd0a92ea4975eb8ed43b.tar.xz regularization-deac578f1919cba530d4dd0a92ea4975eb8ed43b.zip |
ROF_TV modified and GPU code added, all cython related files updated, but not testes
-rw-r--r-- | Core/regularizers_CPU/ROF_TV_core.c | 249 | ||||
-rw-r--r-- | Core/regularizers_CPU/ROF_TV_core.h | 32 | ||||
-rw-r--r-- | Core/regularizers_CPU/utils.c | 29 | ||||
-rw-r--r-- | Core/regularizers_CPU/utils.h | 1 | ||||
-rwxr-xr-x | Core/regularizers_GPU/TV_ROF/TV_ROF_GPU.cu | 370 | ||||
-rwxr-xr-x | Core/regularizers_GPU/TV_ROF/TV_ROF_GPU.h | 7 | ||||
-rw-r--r-- | Wrappers/Python/src/cpu_regularizers.pyx | 57 | ||||
-rw-r--r-- | Wrappers/Python/src/gpu_regularizers.pyx | 62 |
8 files changed, 638 insertions, 169 deletions
diff --git a/Core/regularizers_CPU/ROF_TV_core.c b/Core/regularizers_CPU/ROF_TV_core.c index 0b24806..ba7fe48 100644 --- a/Core/regularizers_CPU/ROF_TV_core.c +++ b/Core/regularizers_CPU/ROF_TV_core.c @@ -19,7 +19,7 @@ #include "ROF_TV_core.h" -#define EPS 0.000001 +#define EPS 1.0e-4 #define MAX(x, y) (((x) > (y)) ? (x) : (y)) #define MIN(x, y) (((x) < (y)) ? (x) : (y)) @@ -29,14 +29,15 @@ int sign(float x) { /* C-OMP implementation of ROF-TV denoising/regularization model [1] (2D/3D case) * + * * Input Parameters: * 1. Noisy image/volume [REQUIRED] * 2. lambda - regularization parameter [REQUIRED] - * 3. tau - marching step for explicit scheme, ~0.001 is recommended [REQUIRED] + * 3. tau - marching step for explicit scheme, ~1 is recommended [REQUIRED] * 4. Number of iterations, for explicit scheme >= 150 is recommended [REQUIRED] * * Output: - * [1] Regularized image/volume + * [1] Regularized image/volume * * This function is based on the paper by * [1] Rudin, Osher, Fatemi, "Nonlinear Total Variation based noise removal algorithms" @@ -44,57 +45,64 @@ int sign(float x) { * D. Kazantsev, 2016-18 */ +/* Running iterations of TV-ROF function */ +float TV_ROF(float *Input, float *Output, int dimX, int dimY, int dimZ, int iterationsNumb, float tau, float lambda) +{ + float *D1, *D2, *D3; + int i, DimTotal; + DimTotal = dimX*dimY*dimZ; + + + D1 = (float *) malloc( DimTotal * sizeof(float) ); + D2 = (float *) malloc( DimTotal * sizeof(float) ); + D3 = (float *) malloc( DimTotal * sizeof(float) ); + + + /* copy into output */ + copyIm(Input, Output, dimX, dimY, dimZ); + + /* start TV iterations */ + for(i=0; i < iterationsNumb; i++) { + + /* calculate differences */ + D1_func(Output, D1, dimX, dimY, dimZ); + D2_func(Output, D2, dimX, dimY, dimZ); + if (dimZ > 1) D3_func(Output, D3, dimX, dimY, dimZ); + TV_kernel(D1, D2, D3, Output, Input, lambda, tau, dimX, dimY, dimZ); + } + free(D1);free(D2); free(D3); + return *Output; +} + /* calculate differences 1 */ -float D1_func(float *A, float *D1, int dimY, int dimX, int dimZ) +float D1_func(float *A, float *D1, int dimX, int dimY, int dimZ) { float NOMx_1, NOMy_1, NOMy_0, NOMz_1, NOMz_0, denom1, denom2,denom3, T1; - int i,j,k,i1,i2,k1,j1,j2,k2; + int i,j,k,i1,i2,k1,j1,j2,k2,index; - if (dimZ == 0) { -#pragma omp parallel for shared (A, D1, dimX, dimY) private(i, j, i1, j1, i2, j2,NOMx_1,NOMy_1,NOMy_0,denom1,denom2,T1) - for(j=0; j<dimY; j++) { - for(i=0; i<dimX; i++) { - /* symmetric boundary conditions (Neuman) */ - i1 = i + 1; if (i1 >= dimY) i1 = i-1; - i2 = i - 1; if (i2 < 0) i2 = i+1; - j1 = j + 1; if (j1 >= dimX) j1 = j-1; - j2 = j - 1; if (j2 < 0) j2 = j+1; - - /* Forward-backward differences */ - NOMx_1 = A[i1*dimY + j] - A[(i)*dimY + j]; /* x+ */ - NOMy_1 = A[i*dimY + j1] - A[(i)*dimY + j]; /* y+ */ - /*NOMx_0 = (A[(i)*dimY + j] - A[(i2)*dimY + j]); */ /* x- */ - NOMy_0 = A[(i)*dimY + j] - A[(i)*dimY + j2]; /* y- */ - - denom1 = NOMx_1*NOMx_1; - denom2 = 0.5*(sign(NOMy_1) + sign(NOMy_0))*(MIN(fabs(NOMy_1),fabs(NOMy_0))); - denom2 = denom2*denom2; - T1 = sqrt(denom1 + denom2 + EPS); - D1[i*dimY+j] = NOMx_1/T1; - }} - } - else { -#pragma omp parallel for shared (A, D1, dimX, dimY, dimZ) private(i, j, k, i1, j1, k1, i2, j2, k2, NOMx_1,NOMy_1,NOMy_0,NOMz_1,NOMz_0,denom1,denom2,denom3,T1) + if (dimZ > 1) { +#pragma omp parallel for shared (A, D1, dimX, dimY, dimZ) private(index, i, j, k, i1, j1, k1, i2, j2, k2, NOMx_1,NOMy_1,NOMy_0,NOMz_1,NOMz_0,denom1,denom2,denom3,T1) for(j=0; j<dimY; j++) { for(i=0; i<dimX; i++) { for(k=0; k<dimZ; k++) { + index = (dimX*dimY)*k + j*dimX+i; /* symmetric boundary conditions (Neuman) */ - i1 = i + 1; if (i1 >= dimY) i1 = i-1; + i1 = i + 1; if (i1 >= dimX) i1 = i-1; i2 = i - 1; if (i2 < 0) i2 = i+1; - j1 = j + 1; if (j1 >= dimX) j1 = j-1; + j1 = j + 1; if (j1 >= dimY) j1 = j-1; j2 = j - 1; if (j2 < 0) j2 = j+1; k1 = k + 1; if (k1 >= dimZ) k1 = k-1; k2 = k - 1; if (k2 < 0) k2 = k+1; /*B[(dimX*dimY)*k + i*dimY+j] = 0.25*(A[(dimX*dimY)*k + (i1)*dimY + j] + A[(dimX*dimY)*k + (i2)*dimY + j] + A[(dimX*dimY)*k + (i)*dimY + j1] + A[(dimX*dimY)*k + (i)*dimY + j2]) - A[(dimX*dimY)*k + i*dimY + j];*/ /* Forward-backward differences */ - NOMx_1 = A[(dimX*dimY)*k + (i1)*dimY + j] - A[(dimX*dimY)*k + (i)*dimY + j]; /* x+ */ - NOMy_1 = A[(dimX*dimY)*k + (i)*dimY + j1] - A[(dimX*dimY)*k + (i)*dimY + j]; /* y+ */ + NOMx_1 = A[(dimX*dimY)*k + j1*dimX + i] - A[index]; /* x+ */ + NOMy_1 = A[(dimX*dimY)*k + j*dimX + i1] - A[index]; /* y+ */ /*NOMx_0 = (A[(i)*dimY + j] - A[(i2)*dimY + j]); */ /* x- */ - NOMy_0 = A[(dimX*dimY)*k + (i)*dimY + j] - A[(dimX*dimY)*k + (i)*dimY + j2]; /* y- */ + NOMy_0 = A[index] - A[(dimX*dimY)*k + j*dimX + i2]; /* y- */ - NOMz_1 = A[(dimX*dimY)*k1 + (i)*dimY + j] - A[(dimX*dimY)*k + (i)*dimY + j]; /* z+ */ - NOMz_0 = A[(dimX*dimY)*k + (i)*dimY + j] - A[(dimX*dimY)*k2 + (i)*dimY + j]; /* z- */ + NOMz_1 = A[(dimX*dimY)*k1 + j*dimX + i] - A[index]; /* z+ */ + NOMz_0 = A[index] - A[(dimX*dimY)*k2 + j*dimX + i]; /* z- */ denom1 = NOMx_1*NOMx_1; @@ -103,60 +111,62 @@ float D1_func(float *A, float *D1, int dimY, int dimX, int dimZ) denom3 = 0.5*(sign(NOMz_1) + sign(NOMz_0))*(MIN(fabs(NOMz_1),fabs(NOMz_0))); denom3 = denom3*denom3; T1 = sqrt(denom1 + denom2 + denom3 + EPS); - D1[(dimX*dimY)*k + i*dimY+j] = NOMx_1/T1; + D1[index] = NOMx_1/T1; }}} } - return *D1; -} -/* calculate differences 2 */ -float D2_func(float *A, float *D2, int dimY, int dimX, int dimZ) -{ - float NOMx_1, NOMy_1, NOMx_0, NOMz_1, NOMz_0, denom1, denom2, denom3, T2; - int i,j,k,i1,i2,k1,j1,j2,k2; - - if (dimZ == 0) { -#pragma omp parallel for shared (A, D2, dimX, dimY) private(i, j, i1, j1, i2, j2, NOMx_1,NOMy_1,NOMx_0,denom1,denom2,T2) + else { +#pragma omp parallel for shared (A, D1, dimX, dimY) private(i, j, i1, j1, i2, j2,NOMx_1,NOMy_1,NOMy_0,denom1,denom2,T1,index) for(j=0; j<dimY; j++) { for(i=0; i<dimX; i++) { + index = j*dimX+i; /* symmetric boundary conditions (Neuman) */ - i1 = i + 1; if (i1 >= dimY) i1 = i-1; + i1 = i + 1; if (i1 >= dimX) i1 = i-1; i2 = i - 1; if (i2 < 0) i2 = i+1; - j1 = j + 1; if (j1 >= dimX) j1 = j-1; + j1 = j + 1; if (j1 >= dimY) j1 = j-1; j2 = j - 1; if (j2 < 0) j2 = j+1; /* Forward-backward differences */ - NOMx_1 = A[(i1)*dimY + j] - A[(i)*dimY + j]; /* x+ */ - NOMy_1 = A[i*dimY + j1] - A[(i)*dimY + j]; /* y+ */ - NOMx_0 = A[(i)*dimY + j] - A[(i2)*dimY + j]; /* x- */ - /*NOMy_0 = A[(i)*dimY + j] - A[(i)*dimY + j2]; */ /* y- */ + NOMx_1 = A[j1*dimX + i] - A[index]; /* x+ */ + NOMy_1 = A[j*dimX + i1] - A[index]; /* y+ */ + /*NOMx_0 = (A[(i)*dimY + j] - A[(i2)*dimY + j]); */ /* x- */ + NOMy_0 = A[index] - A[(j)*dimX + i2]; /* y- */ - denom1 = NOMy_1*NOMy_1; - denom2 = 0.5*(sign(NOMx_1) + sign(NOMx_0))*(MIN(fabs(NOMx_1),fabs(NOMx_0))); + denom1 = NOMx_1*NOMx_1; + denom2 = 0.5*(sign(NOMy_1) + sign(NOMy_0))*(MIN(fabs(NOMy_1),fabs(NOMy_0))); denom2 = denom2*denom2; - T2 = sqrt(denom1 + denom2 + EPS); - D2[i*dimY+j] = NOMy_1/T2; + T1 = sqrt(denom1 + denom2 + EPS); + D1[index] = NOMx_1/T1; }} } - else { -#pragma omp parallel for shared (A, D2, dimX, dimY, dimZ) private(i, j, k, i1, j1, k1, i2, j2, k2, NOMx_1, NOMy_1, NOMx_0, NOMz_1, NOMz_0, denom1, denom2, denom3, T2) + return *D1; +} +/* calculate differences 2 */ +float D2_func(float *A, float *D2, int dimX, int dimY, int dimZ) +{ + float NOMx_1, NOMy_1, NOMx_0, NOMz_1, NOMz_0, denom1, denom2, denom3, T2; + int i,j,k,i1,i2,k1,j1,j2,k2,index; + + if (dimZ > 1) { +#pragma omp parallel for shared (A, D2, dimX, dimY, dimZ) private(index, i, j, k, i1, j1, k1, i2, j2, k2, NOMx_1, NOMy_1, NOMx_0, NOMz_1, NOMz_0, denom1, denom2, denom3, T2) for(j=0; j<dimY; j++) { for(i=0; i<dimX; i++) { for(k=0; k<dimZ; k++) { + index = (dimX*dimY)*k + j*dimX+i; /* symmetric boundary conditions (Neuman) */ - i1 = i + 1; if (i1 >= dimY) i1 = i-1; + i1 = i + 1; if (i1 >= dimX) i1 = i-1; i2 = i - 1; if (i2 < 0) i2 = i+1; - j1 = j + 1; if (j1 >= dimX) j1 = j-1; + j1 = j + 1; if (j1 >= dimY) j1 = j-1; j2 = j - 1; if (j2 < 0) j2 = j+1; k1 = k + 1; if (k1 >= dimZ) k1 = k-1; k2 = k - 1; if (k2 < 0) k2 = k+1; /* Forward-backward differences */ - NOMx_1 = A[(dimX*dimY)*k + (i1)*dimY + j] - A[(dimX*dimY)*k + (i)*dimY + j]; /* x+ */ - NOMy_1 = A[(dimX*dimY)*k + (i)*dimY + j1] - A[(dimX*dimY)*k + (i)*dimY + j]; /* y+ */ - NOMx_0 = A[(dimX*dimY)*k + (i)*dimY + j] - A[(dimX*dimY)*k + (i2)*dimY + j]; /* x- */ - NOMz_1 = A[(dimX*dimY)*k1 + (i)*dimY + j] - A[(dimX*dimY)*k + (i)*dimY + j]; /* z+ */ - NOMz_0 = A[(dimX*dimY)*k + (i)*dimY + j] - A[(dimX*dimY)*k2 + (i)*dimY + j]; /* z- */ + NOMx_1 = A[(dimX*dimY)*k + (j1)*dimX + i] - A[index]; /* x+ */ + NOMy_1 = A[(dimX*dimY)*k + (j)*dimX + i1] - A[index]; /* y+ */ + NOMx_0 = A[index] - A[(dimX*dimY)*k + (j2)*dimX + i]; /* x- */ + NOMz_1 = A[(dimX*dimY)*k1 + j*dimX + i] - A[index]; /* z+ */ + NOMz_0 = A[index] - A[(dimX*dimY)*k2 + (j)*dimX + i]; /* z- */ denom1 = NOMy_1*NOMy_1; @@ -165,9 +175,33 @@ float D2_func(float *A, float *D2, int dimY, int dimX, int dimZ) denom3 = 0.5*(sign(NOMz_1) + sign(NOMz_0))*(MIN(fabs(NOMz_1),fabs(NOMz_0))); denom3 = denom3*denom3; T2 = sqrt(denom1 + denom2 + denom3 + EPS); - D2[(dimX*dimY)*k + i*dimY+j] = NOMy_1/T2; + D2[index] = NOMy_1/T2; }}} } + else { +#pragma omp parallel for shared (A, D2, dimX, dimY) private(i, j, i1, j1, i2, j2, NOMx_1,NOMy_1,NOMx_0,denom1,denom2,T2,index) + for(j=0; j<dimY; j++) { + for(i=0; i<dimX; i++) { + index = j*dimX+i; + /* symmetric boundary conditions (Neuman) */ + i1 = i + 1; if (i1 >= dimX) i1 = i-1; + i2 = i - 1; if (i2 < 0) i2 = i+1; + j1 = j + 1; if (j1 >= dimY) j1 = j-1; + j2 = j - 1; if (j2 < 0) j2 = j+1; + + /* Forward-backward differences */ + NOMx_1 = A[j1*dimX + i] - A[index]; /* x+ */ + NOMy_1 = A[j*dimX + i1] - A[index]; /* y+ */ + NOMx_0 = A[index] - A[j2*dimX + i]; /* x- */ + /*NOMy_0 = A[(i)*dimY + j] - A[(i)*dimY + j2]; */ /* y- */ + + denom1 = NOMy_1*NOMy_1; + denom2 = 0.5*(sign(NOMx_1) + sign(NOMx_0))*(MIN(fabs(NOMx_1),fabs(NOMx_0))); + denom2 = denom2*denom2; + T2 = sqrt(denom1 + denom2 + EPS); + D2[index] = NOMy_1/T2; + }} + } return *D2; } @@ -175,26 +209,27 @@ float D2_func(float *A, float *D2, int dimY, int dimX, int dimZ) float D3_func(float *A, float *D3, int dimY, int dimX, int dimZ) { float NOMx_1, NOMy_1, NOMx_0, NOMy_0, NOMz_1, denom1, denom2, denom3, T3; - int i,j,k,i1,i2,k1,j1,j2,k2; + int index,i,j,k,i1,i2,k1,j1,j2,k2; -#pragma omp parallel for shared (A, D3, dimX, dimY, dimZ) private(i, j, k, i1, j1, k1, i2, j2, k2, NOMx_1, NOMy_1, NOMy_0, NOMx_0, NOMz_1, denom1, denom2, denom3, T3) +#pragma omp parallel for shared (A, D3, dimX, dimY, dimZ) private(index, i, j, k, i1, j1, k1, i2, j2, k2, NOMx_1, NOMy_1, NOMy_0, NOMx_0, NOMz_1, denom1, denom2, denom3, T3) for(j=0; j<dimY; j++) { for(i=0; i<dimX; i++) { for(k=0; k<dimZ; k++) { + index = (dimX*dimY)*k + j*dimX+i; /* symmetric boundary conditions (Neuman) */ - i1 = i + 1; if (i1 >= dimY) i1 = i-1; + i1 = i + 1; if (i1 >= dimX) i1 = i-1; i2 = i - 1; if (i2 < 0) i2 = i+1; - j1 = j + 1; if (j1 >= dimX) j1 = j-1; + j1 = j + 1; if (j1 >= dimY) j1 = j-1; j2 = j - 1; if (j2 < 0) j2 = j+1; k1 = k + 1; if (k1 >= dimZ) k1 = k-1; k2 = k - 1; if (k2 < 0) k2 = k+1; /* Forward-backward differences */ - NOMx_1 = A[(dimX*dimY)*k + (i1)*dimY + j] - A[(dimX*dimY)*k + (i)*dimY + j]; /* x+ */ - NOMy_1 = A[(dimX*dimY)*k + (i)*dimY + j1] - A[(dimX*dimY)*k + (i)*dimY + j]; /* y+ */ - NOMy_0 = A[(dimX*dimY)*k + (i)*dimY + j] - A[(dimX*dimY)*k + (i)*dimY + j2]; /* y- */ - NOMx_0 = A[(dimX*dimY)*k + (i)*dimY + j] - A[(dimX*dimY)*k + (i2)*dimY + j]; /* x- */ - NOMz_1 = A[(dimX*dimY)*k1 + (i)*dimY + j] - A[(dimX*dimY)*k + (i)*dimY + j]; /* z+ */ + NOMx_1 = A[(dimX*dimY)*k + (j1)*dimX + i] - A[index]; /* x+ */ + NOMy_1 = A[(dimX*dimY)*k + (j)*dimX + i1] - A[index]; /* y+ */ + NOMy_0 = A[index] - A[(dimX*dimY)*k + (j)*dimX + i2]; /* y- */ + NOMx_0 = A[index] - A[(dimX*dimY)*k + (j2)*dimX + i]; /* x- */ + NOMz_1 = A[(dimX*dimY)*k1 + j*dimX + i] - A[index]; /* z+ */ /*NOMz_0 = A[(dimX*dimY)*k + (i)*dimY + j] - A[(dimX*dimY)*k2 + (i)*dimY + j]; */ /* z- */ denom1 = NOMz_1*NOMz_1; @@ -203,57 +238,57 @@ float D3_func(float *A, float *D3, int dimY, int dimX, int dimZ) denom3 = 0.5*(sign(NOMy_1) + sign(NOMy_0))*(MIN(fabs(NOMy_1),fabs(NOMy_0))); denom3 = denom3*denom3; T3 = sqrt(denom1 + denom2 + denom3 + EPS); - D3[(dimX*dimY)*k + i*dimY+j] = NOMz_1/T3; + D3[index] = NOMz_1/T3; }}} return *D3; } /* calculate divergence */ -float TV_main(float *D1, float *D2, float *D3, float *B, float *A, float lambda, float tau, int dimY, int dimX, int dimZ) +float TV_kernel(float *D1, float *D2, float *D3, float *B, float *A, float lambda, float tau, int dimX, int dimY, int dimZ) { float dv1, dv2, dv3; int index,i,j,k,i1,i2,k1,j1,j2,k2; - if (dimZ == 0) { -#pragma omp parallel for shared (D1, D2, B, dimX, dimY) private(index, i, j, i1, j1, i2, j2,dv1,dv2) - for(j=0; j<dimY; j++) { - for(i=0; i<dimX; i++) { - index = (i)*dimY + j; - /* symmetric boundary conditions (Neuman) */ - i1 = i + 1; if (i1 >= dimY) i1 = i-1; - i2 = i - 1; if (i2 < 0) i2 = i+1; - j1 = j + 1; if (j1 >= dimX) j1 = j-1; - j2 = j - 1; if (j2 < 0) j2 = j+1; - - /* divergence components */ - dv1 = D1[index] - D1[(i2)*dimY + j]; - dv2 = D2[index] - D2[(i)*dimY + j2]; - - B[index] = B[index] + tau*lambda*(dv1 + dv2) + tau*(A[index] - B[index]); - - }} - } - else { + if (dimZ > 1) { #pragma omp parallel for shared (D1, D2, D3, B, dimX, dimY, dimZ) private(index, i, j, k, i1, j1, k1, i2, j2, k2, dv1,dv2,dv3) for(j=0; j<dimY; j++) { for(i=0; i<dimX; i++) { for(k=0; k<dimZ; k++) { - index = (dimX*dimY)*k + i*dimY+j; + index = (dimX*dimY)*k + j*dimX+i; /* symmetric boundary conditions (Neuman) */ - i1 = i + 1; if (i1 >= dimY) i1 = i-1; + i1 = i + 1; if (i1 >= dimX) i1 = i-1; i2 = i - 1; if (i2 < 0) i2 = i+1; - j1 = j + 1; if (j1 >= dimX) j1 = j-1; + j1 = j + 1; if (j1 >= dimY) j1 = j-1; j2 = j - 1; if (j2 < 0) j2 = j+1; k1 = k + 1; if (k1 >= dimZ) k1 = k-1; k2 = k - 1; if (k2 < 0) k2 = k+1; /*divergence components */ - dv1 = D1[index] - D1[(dimX*dimY)*k + i2*dimY+j]; - dv2 = D2[index] - D2[(dimX*dimY)*k + i*dimY+j2]; - dv3 = D3[index] - D3[(dimX*dimY)*k2 + i*dimY+j]; + dv1 = D1[index] - D1[(dimX*dimY)*k + j2*dimX+i]; + dv2 = D2[index] - D2[(dimX*dimY)*k + j*dimX+i2]; + dv3 = D3[index] - D3[(dimX*dimY)*k2 + j*dimX+i]; B[index] = B[index] + tau*lambda*(dv1 + dv2 + dv3) + tau*(A[index] - B[index]); - }}} + }}} + } + else { +#pragma omp parallel for shared (D1, D2, B, dimX, dimY) private(index, i, j, i1, j1, i2, j2,dv1,dv2) + for(j=0; j<dimY; j++) { + for(i=0; i<dimX; i++) { + index = j*dimX+i; + /* symmetric boundary conditions (Neuman) */ + i1 = i + 1; if (i1 >= dimX) i1 = i-1; + i2 = i - 1; if (i2 < 0) i2 = i+1; + j1 = j + 1; if (j1 >= dimY) j1 = j-1; + j2 = j - 1; if (j2 < 0) j2 = j+1; + + /* divergence components */ + dv1 = D1[index] - D1[j2*dimX + i]; + dv2 = D2[index] - D2[j*dimX + i2]; + + //B[index] = B[index] + tau*lambda*(dv1 + dv2) + tau*(A[index] - B[index]); + B[index] = B[index] + tau*((dv1 + dv2) - lambda*(B[index] - A[index])); + }} } return *B; -}
\ No newline at end of file +} diff --git a/Core/regularizers_CPU/ROF_TV_core.h b/Core/regularizers_CPU/ROF_TV_core.h index 6e4f961..5d69d27 100644 --- a/Core/regularizers_CPU/ROF_TV_core.h +++ b/Core/regularizers_CPU/ROF_TV_core.h @@ -26,29 +26,31 @@ limitations under the License. #include "CCPiDefines.h" /* C-OMP implementation of ROF-TV denoising/regularization model [1] (2D/3D case) -* -* Input Parameters: + * + * + * Input Parameters: * 1. Noisy image/volume [REQUIRED] * 2. lambda - regularization parameter [REQUIRED] - * 3. tau - marching step for explicit scheme, ~0.001 is recommended [REQUIRED] - * 4. Number of iterations, for explicit scheme >= 150 is recommended [REQUIRED] -* -* Output: -* [1] Regularized image/volume - + * 3. tau - marching step for explicit scheme, ~1 is recommended [REQUIRED] + * 4. Number of iterations, for explicit scheme >= 150 is recommended [REQUIRED] + * + * Output: + * [1] Regularized image/volume + * * This function is based on the paper by -* [1] Rudin, Osher, Fatemi, "Nonlinear Total Variation based noise removal algorithms" -* -* D. Kazantsev, 2016-18 -*/ + * [1] Rudin, Osher, Fatemi, "Nonlinear Total Variation based noise removal algorithms" + * + * D. Kazantsev, 2016-18 + */ + #ifdef __cplusplus extern "C" { #endif -//float copyIm(float *A, float *B, int dimX, int dimY, int dimZ); -CCPI_EXPORT float TV_main(float *D1, float *D2, float *D3, float *B, float *A, float lambda, float tau, int dimY, int dimX, int dimZ); +CCPI_EXPORT float TV_kernel(float *D1, float *D2, float *D3, float *B, float *A, float lambda, float tau, int dimY, int dimX, int dimZ); +CCPI_EXPORT float TV_ROF(float *Input, float *Output, int dimX, int dimY, int dimZ, int iterationsNumb, float tau, float lambda); CCPI_EXPORT float D1_func(float *A, float *D1, int dimY, int dimX, int dimZ); CCPI_EXPORT float D2_func(float *A, float *D2, int dimY, int dimX, int dimZ); CCPI_EXPORT float D3_func(float *A, float *D3, int dimY, int dimX, int dimZ); #ifdef __cplusplus } -#endif
\ No newline at end of file +#endif diff --git a/Core/regularizers_CPU/utils.c b/Core/regularizers_CPU/utils.c index 0e83d2c..951fb91 100644 --- a/Core/regularizers_CPU/utils.c +++ b/Core/regularizers_CPU/utils.c @@ -26,4 +26,31 @@ float copyIm(float *A, float *U, int dimX, int dimY, int dimZ) #pragma omp parallel for shared(A, U) private(j) for (j = 0; j<dimX*dimY*dimZ; j++) U[j] = A[j]; return *U; -}
\ No newline at end of file +} + +/* function that calculates TV energy (ROF model) + * min||\nabla u|| + 0.5*lambda*||u -u0||^2 + * */ +float TV_energy2D(float *U, float *U0, float *E_val, float lambda, int dimX, int dimY) +{ + int i, j, i1, j1, index; + float NOMx_2, NOMy_2, E_Grad, E_Data; + + /* first calculate \grad U_xy*/ + for(j=0; j<dimY; j++) { + for(i=0; i<dimX; i++) { + index = j*dimX+i; + /* boundary conditions */ + i1 = i + 1; if (i == dimX-1) i1 = i; + j1 = j + 1; if (j == dimY-1) j1 = j; + + /* Forward differences */ + NOMx_2 = pow(U[j1*dimX + i] - U[index],2); /* x+ */ + NOMy_2 = pow(U[j*dimX + i1] - U[index],2); /* y+ */ + E_Grad += sqrt(NOMx_2 + NOMy_2); /* gradient term energy */ + E_Data += 0.5f * lambda*(pow((U[index]-U0[index]),2)); /* fidelity term energy */ + } + } + E_val[0] = E_Grad + E_Data; + return *E_val; +} diff --git a/Core/regularizers_CPU/utils.h b/Core/regularizers_CPU/utils.h index 91b8209..bd76bf0 100644 --- a/Core/regularizers_CPU/utils.h +++ b/Core/regularizers_CPU/utils.h @@ -28,6 +28,7 @@ limitations under the License. extern "C" { #endif CCPI_EXPORT float copyIm(float *A, float *U, int dimX, int dimY, int dimZ); +CCPI_EXPORT float TV_energy2D(float *U, float *U0, float *E_val, float lambda, int dimX, int dimY); #ifdef __cplusplus } #endif diff --git a/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU.cu b/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU.cu new file mode 100755 index 0000000..633bf63 --- /dev/null +++ b/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU.cu @@ -0,0 +1,370 @@ + /* +This work is part of the Core Imaging Library developed by +Visual Analytics and Imaging System Group of the Science Technology +Facilities Council, STFC + +Copyright 2017 Daniil Kazantsev +Copyright 2017 Srikanth Nagella, Edoardo Pasca + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at +http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +#include "TV_ROF_GPU.h" + +/* C-OMP implementation of ROF-TV denoising/regularization model [1] (2D/3D case) +* +* Input Parameters: +* 1. Noisy image/volume [REQUIRED] +* 2. lambda - regularization parameter [REQUIRED] +* 3. tau - marching step for explicit scheme, ~0.1 is recommended [REQUIRED] +* 4. Number of iterations, for explicit scheme >= 150 is recommended [REQUIRED] +* +* Output: +* [1] Regularized image/volume + + * This function is based on the paper by +* [1] Rudin, Osher, Fatemi, "Nonlinear Total Variation based noise removal algorithms" +* +* D. Kazantsev, 2016-18 +*/ + +#define CHECK(call) \ +{ \ + const cudaError_t error = call; \ + if (error != cudaSuccess) \ + { \ + fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__); \ + fprintf(stderr, "code: %d, reason: %s\n", error, \ + cudaGetErrorString(error)); \ + exit(1); \ + } \ +} + +#define BLKXSIZE 8 +#define BLKYSIZE 8 +#define BLKZSIZE 8 + +#define BLKXSIZE2D 16 +#define BLKYSIZE2D 16 +#define EPS 1.0e-4 + +#define idivup(a, b) ( ((a)%(b) != 0) ? (a)/(b)+1 : (a)/(b) ) + +#define MAX(x, y) (((x) > (y)) ? (x) : (y)) +#define MIN(x, y) (((x) < (y)) ? (x) : (y)) + +__host__ __device__ int sign (float x) +{ + return (x > 0) - (x < 0); +} + +/*********************2D case****************************/ + + /* differences 1 */ + __global__ void D1_func2D(float* Input, float* D1, int N, int M) + { + int i1, j1, i2, j2; + float NOMx_1,NOMy_1,NOMy_0,denom1,denom2,T1; + int i = blockDim.x * blockIdx.x + threadIdx.x; + int j = blockDim.y * blockIdx.y + threadIdx.y; + + int index = i + N*j; + + if ((i >= 0) && (i < N) && (j >= 0) && (j < M)) { + + /* boundary conditions (Neumann reflections) */ + i1 = i + 1; if (i1 >= N) i1 = i-1; + i2 = i - 1; if (i2 < 0) i2 = i+1; + j1 = j + 1; if (j1 >= M) j1 = j-1; + j2 = j - 1; if (j2 < 0) j2 = j+1; + + /* Forward-backward differences */ + NOMx_1 = Input[j1*N + i] - Input[index]; /* x+ */ + NOMy_1 = Input[j*N + i1] - Input[index]; /* y+ */ + NOMy_0 = Input[index] - Input[j*N + i2]; /* y- */ + + denom1 = NOMx_1*NOMx_1; + denom2 = 0.5f*(sign((float)NOMy_1) + sign((float)NOMy_0))*(MIN(abs((float)NOMy_1),abs((float)NOMy_0))); + denom2 = denom2*denom2; + T1 = sqrt(denom1 + denom2 + EPS); + D1[index] = NOMx_1/T1; + } + } + + /* differences 2 */ + __global__ void D2_func2D(float* Input, float* D2, int N, int M) + { + int i1, j1, i2, j2; + float NOMx_1,NOMy_1,NOMx_0,denom1,denom2,T2; + int i = blockDim.x * blockIdx.x + threadIdx.x; + int j = blockDim.y * blockIdx.y + threadIdx.y; + + int index = i + N*j; + + if ((i >= 0) && (i < (N)) && (j >= 0) && (j < (M))) { + + /* boundary conditions (Neumann reflections) */ + i1 = i + 1; if (i1 >= N) i1 = i-1; + i2 = i - 1; if (i2 < 0) i2 = i+1; + j1 = j + 1; if (j1 >= M) j1 = j-1; + j2 = j - 1; if (j2 < 0) j2 = j+1; + + /* Forward-backward differences */ + NOMx_1 = Input[j1*N + i] - Input[index]; /* x+ */ + NOMy_1 = Input[j*N + i1] - Input[index]; /* y+ */ + NOMx_0 = Input[index] - Input[j2*N + i]; /* x- */ + + denom1 = NOMy_1*NOMy_1; + denom2 = 0.5f*(sign((float)NOMx_1) + sign((float)NOMx_0))*(MIN(abs((float)NOMx_1),abs((float)NOMx_0))); + denom2 = denom2*denom2; + T2 = sqrt(denom1 + denom2 + EPS); + D2[index] = NOMy_1/T2; + } + } + + __global__ void TV_kernel2D(float *D1, float *D2, float *Update, float *Input, float lambda, float tau, int N, int M) + { + int i2, j2; + float dv1,dv2; + int i = blockDim.x * blockIdx.x + threadIdx.x; + int j = blockDim.y * blockIdx.y + threadIdx.y; + + int index = i + N*j; + + if ((i >= 0) && (i < (N)) && (j >= 0) && (j < (M))) { + + /* boundary conditions (Neumann reflections) */ + i2 = i - 1; if (i2 < 0) i2 = i+1; + j2 = j - 1; if (j2 < 0) j2 = j+1; + + /* divergence components */ + dv1 = D1[index] - D1[j2*N + i]; + dv2 = D2[index] - D2[j*N + i2]; + + Update[index] = Update[index] + tau*((dv1 + dv2) - lambda*(Update[index] - Input[index])); + + } + } +/*********************3D case****************************/ + + /* differences 1 */ + __global__ void D1_func3D(float* Input, float* D1, int dimX, int dimY, int dimZ) + { + float NOMx_1, NOMy_1, NOMy_0, NOMz_1, NOMz_0, denom1, denom2,denom3, T1; + int i1,i2,k1,j1,j2,k2; + + int i = blockDim.x * blockIdx.x + threadIdx.x; + int j = blockDim.y * blockIdx.y + threadIdx.y; + int k = blockDim.z * blockIdx.z + threadIdx.z; + + int index = (dimX*dimY)*k + j*dimX+i; + + if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) { + + /* symmetric boundary conditions (Neuman) */ + i1 = i + 1; if (i1 >= dimX) i1 = i-1; + i2 = i - 1; if (i2 < 0) i2 = i+1; + j1 = j + 1; if (j1 >= dimY) j1 = j-1; + j2 = j - 1; if (j2 < 0) j2 = j+1; + k1 = k + 1; if (k1 >= dimZ) k1 = k-1; + k2 = k - 1; if (k2 < 0) k2 = k+1; + + /* Forward-backward differences */ + NOMx_1 = Input[(dimX*dimY)*k + j1*dimX + i] - Input[index]; /* x+ */ + NOMy_1 = Input[(dimX*dimY)*k + j*dimX + i1] - Input[index]; /* y+ */ + NOMy_0 = Input[index] - Input[(dimX*dimY)*k + j*dimX + i2]; /* y- */ + + NOMz_1 = Input[(dimX*dimY)*k1 + j*dimX + i] - Input[index]; /* z+ */ + NOMz_0 = Input[index] - Input[(dimX*dimY)*k2 + j*dimX + i]; /* z- */ + + + denom1 = NOMx_1*NOMx_1; + denom2 = 0.5*(sign(NOMy_1) + sign(NOMy_0))*(MIN(abs(NOMy_1),abs(NOMy_0))); + denom2 = denom2*denom2; + denom3 = 0.5*(sign(NOMz_1) + sign(NOMz_0))*(MIN(abs(NOMz_1),abs(NOMz_0))); + denom3 = denom3*denom3; + T1 = sqrt(denom1 + denom2 + denom3 + EPS); + D1[index] = NOMx_1/T1; + } + } + + /* differences 2 */ + __global__ void D2_func3D(float* Input, float* D2, int dimX, int dimY, int dimZ) + { + float NOMx_1, NOMy_1, NOMx_0, NOMz_1, NOMz_0, denom1, denom2, denom3, T2; + int i1,i2,k1,j1,j2,k2; + + int i = blockDim.x * blockIdx.x + threadIdx.x; + int j = blockDim.y * blockIdx.y + threadIdx.y; + int k = blockDim.z * blockIdx.z + threadIdx.z; + + int index = (dimX*dimY)*k + j*dimX+i; + + if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) { + /* symmetric boundary conditions (Neuman) */ + i1 = i + 1; if (i1 >= dimX) i1 = i-1; + i2 = i - 1; if (i2 < 0) i2 = i+1; + j1 = j + 1; if (j1 >= dimY) j1 = j-1; + j2 = j - 1; if (j2 < 0) j2 = j+1; + k1 = k + 1; if (k1 >= dimZ) k1 = k-1; + k2 = k - 1; if (k2 < 0) k2 = k+1; + + + /* Forward-backward differences */ + NOMx_1 = Input[(dimX*dimY)*k + (j1)*dimX + i] - Input[index]; /* x+ */ + NOMy_1 = Input[(dimX*dimY)*k + (j)*dimX + i1] - Input[index]; /* y+ */ + NOMx_0 = Input[index] - Input[(dimX*dimY)*k + (j2)*dimX + i]; /* x- */ + NOMz_1 = Input[(dimX*dimY)*k1 + j*dimX + i] - Input[index]; /* z+ */ + NOMz_0 = Input[index] - Input[(dimX*dimY)*k2 + (j)*dimX + i]; /* z- */ + + + denom1 = NOMy_1*NOMy_1; + denom2 = 0.5*(sign(NOMx_1) + sign(NOMx_0))*(MIN(abs(NOMx_1),abs(NOMx_0))); + denom2 = denom2*denom2; + denom3 = 0.5*(sign(NOMz_1) + sign(NOMz_0))*(MIN(abs(NOMz_1),abs(NOMz_0))); + denom3 = denom3*denom3; + T2 = sqrt(denom1 + denom2 + denom3 + EPS); + D2[index] = NOMy_1/T2; + } + } + + /* differences 3 */ + __global__ void D3_func3D(float* Input, float* D3, int dimX, int dimY, int dimZ) + { + float NOMx_1, NOMy_1, NOMx_0, NOMy_0, NOMz_1, denom1, denom2, denom3, T3; + int i1,i2,k1,j1,j2,k2; + + int i = blockDim.x * blockIdx.x + threadIdx.x; + int j = blockDim.y * blockIdx.y + threadIdx.y; + int k = blockDim.z * blockIdx.z + threadIdx.z; + + int index = (dimX*dimY)*k + j*dimX+i; + + if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) { + + i1 = i + 1; if (i1 >= dimX) i1 = i-1; + i2 = i - 1; if (i2 < 0) i2 = i+1; + j1 = j + 1; if (j1 >= dimY) j1 = j-1; + j2 = j - 1; if (j2 < 0) j2 = j+1; + k1 = k + 1; if (k1 >= dimZ) k1 = k-1; + k2 = k - 1; if (k2 < 0) k2 = k+1; + + /* Forward-backward differences */ + NOMx_1 = Input[(dimX*dimY)*k + (j1)*dimX + i] - Input[index]; /* x+ */ + NOMy_1 = Input[(dimX*dimY)*k + (j)*dimX + i1] - Input[index]; /* y+ */ + NOMy_0 = Input[index] - Input[(dimX*dimY)*k + (j)*dimX + i2]; /* y- */ + NOMx_0 = Input[index] - Input[(dimX*dimY)*k + (j2)*dimX + i]; /* x- */ + NOMz_1 = Input[(dimX*dimY)*k1 + j*dimX + i] - Input[index]; /* z+ */ + + denom1 = NOMz_1*NOMz_1; + denom2 = 0.5*(sign(NOMx_1) + sign(NOMx_0))*(MIN(abs(NOMx_1),abs(NOMx_0))); + denom2 = denom2*denom2; + denom3 = 0.5*(sign(NOMy_1) + sign(NOMy_0))*(MIN(abs(NOMy_1),abs(NOMy_0))); + denom3 = denom3*denom3; + T3 = sqrt(denom1 + denom2 + denom3 + EPS); + D3[index] = NOMz_1/T3; + } + } + + __global__ void TV_kernel3D(float *D1, float *D2, float *D3, float *Update, float *Input, float lambda, float tau, int dimX, int dimY, int dimZ) + { + float dv1, dv2, dv3; + int i1,i2,k1,j1,j2,k2; + int i = blockDim.x * blockIdx.x + threadIdx.x; + int j = blockDim.y * blockIdx.y + threadIdx.y; + int k = blockDim.z * blockIdx.z + threadIdx.z; + + int index = (dimX*dimY)*k + j*dimX+i; + + if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) { + + /* symmetric boundary conditions (Neuman) */ + i1 = i + 1; if (i1 >= dimX) i1 = i-1; + i2 = i - 1; if (i2 < 0) i2 = i+1; + j1 = j + 1; if (j1 >= dimY) j1 = j-1; + j2 = j - 1; if (j2 < 0) j2 = j+1; + k1 = k + 1; if (k1 >= dimZ) k1 = k-1; + k2 = k - 1; if (k2 < 0) k2 = k+1; + + /*divergence components */ + dv1 = D1[index] - D1[(dimX*dimY)*k + j2*dimX+i]; + dv2 = D2[index] - D2[(dimX*dimY)*k + j*dimX+i2]; + dv3 = D3[index] - D3[(dimX*dimY)*k2 + j*dimX+i]; + + Update[index] = Update[index] + tau*lambda*(dv1 + dv2 + dv3) + tau*(Update[index] - Input[index]); + + } + } + +///////////////////////////////////////////////// +// HOST FUNCTION +extern "C" void TV_ROF_GPU(float* Input, float* Output, int N, int M, int Z, int iter, float tau, float lambda) +{ + // set up device + int dev = 0; + CHECK(cudaSetDevice(dev)); + + float *d_input, *d_update, *d_D1, *d_D2; + + CHECK(cudaMalloc((void**)&d_input,N*M*Z*sizeof(float))); + CHECK(cudaMalloc((void**)&d_update,N*M*Z*sizeof(float))); + CHECK(cudaMalloc((void**)&d_D1,N*M*Z*sizeof(float))); + CHECK(cudaMalloc((void**)&d_D2,N*M*Z*sizeof(float))); + + CHECK(cudaMemcpy(d_input,Input,N*M*Z*sizeof(float),cudaMemcpyHostToDevice)); + CHECK(cudaMemcpy(d_update,Input,N*M*Z*sizeof(float),cudaMemcpyHostToDevice)); + + if (Z > 1) { + // TV - 3D case + dim3 dimBlock(BLKXSIZE,BLKYSIZE,BLKZSIZE); + dim3 dimGrid(idivup(N,BLKXSIZE), idivup(M,BLKYSIZE),idivup(Z,BLKXSIZE)); + + float *d_D3; + CHECK(cudaMalloc((void**)&d_D3,N*M*Z*sizeof(float))); + + for(int n=0; n < iter; n++) { + /* calculate differences */ + D1_func3D<<<dimGrid,dimBlock>>>(d_update, d_D1, N, M, Z); + CHECK(cudaDeviceSynchronize()); + D2_func3D<<<dimGrid,dimBlock>>>(d_update, d_D2, N, M, Z); + CHECK(cudaDeviceSynchronize()); + D3_func3D<<<dimGrid,dimBlock>>>(d_update, d_D3, N, M, Z); + CHECK(cudaDeviceSynchronize()); + /*running main kernel*/ + TV_kernel3D<<<dimGrid,dimBlock>>>(d_D1, d_D2, d_D3, d_update, d_input, lambda, tau, N, M, Z); + CHECK(cudaDeviceSynchronize()); + } + + CHECK(cudaFree(d_D3)); + } + else { + // TV - 2D case + dim3 dimBlock(BLKXSIZE2D,BLKYSIZE2D); + dim3 dimGrid(idivup(N,BLKXSIZE2D), idivup(M,BLKYSIZE2D)); + + for(int n=0; n < iter; n++) { + /* calculate differences */ + D1_func2D<<<dimGrid,dimBlock>>>(d_update, d_D1, N, M); + CHECK(cudaDeviceSynchronize()); + D2_func2D<<<dimGrid,dimBlock>>>(d_update, d_D2, N, M); + CHECK(cudaDeviceSynchronize()); + /*running main kernel*/ + TV_kernel2D<<<dimGrid,dimBlock>>>(d_D1, d_D2, d_update, d_input, lambda, tau, N, M); + CHECK(cudaDeviceSynchronize()); + } + } + CHECK(cudaMemcpy(Output,d_update,N*M*Z*sizeof(float),cudaMemcpyDeviceToHost)); + CHECK(cudaFree(d_input)); + CHECK(cudaFree(d_update)); + CHECK(cudaFree(d_D1)); + CHECK(cudaFree(d_D2)); + cudaDeviceReset(); +} diff --git a/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU.h b/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU.h new file mode 100755 index 0000000..3163482 --- /dev/null +++ b/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU.h @@ -0,0 +1,7 @@ +#ifndef __TVGPU_H__ +#define __TVGPU_H__ +#include "CCPiDefines.h" + +extern "C" CCPI_EXPORT void TV_ROF_GPU(float* Input, float* Output, int N, int M, int Z, int iter, float tau, float lambda); + +#endif diff --git a/Wrappers/Python/src/cpu_regularizers.pyx b/Wrappers/Python/src/cpu_regularizers.pyx index b22e603..e306ab3 100644 --- a/Wrappers/Python/src/cpu_regularizers.pyx +++ b/Wrappers/Python/src/cpu_regularizers.pyx @@ -18,12 +18,7 @@ import cython import numpy as np cimport numpy as np -cdef extern float TV_main(float *D1, float *D2, float *D3, float *B, float *A, - float lambda, float tau, int dimY, int dimX, int dimZ); -cdef extern float D1_func(float *A, float *D1, int dimY, int dimX, int dimZ); -cdef extern float D2_func(float *A, float *D2, int dimY, int dimX, int dimZ); -cdef extern float D3_func(float *A, float *D3, int dimY, int dimX, int dimZ); -cdef extern void copyIm (float *A, float *U, int dimX, int dimY, int dimZ); +cdef extern float TV_ROF(float *A, float *B, int dimX, int dimY, int dimZ, int iterationsNumb, float tau, float lambda); def ROF_TV(inputData, iterations, regularization_parameter, marching_step_parameter): @@ -39,30 +34,17 @@ def ROF_TV_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, float regularization_parameter float marching_step_parameter ): + cdef long dims[2] dims[0] = inputData.shape[0] dims[1] = inputData.shape[1] cdef np.ndarray[np.float32_t, ndim=2, mode="c"] B = \ np.zeros([dims[0],dims[1]], dtype='float32') - cdef np.ndarray[np.float32_t, ndim=2, mode="c"] D1 = \ - np.zeros([dims[0],dims[1]], dtype='float32') - cdef np.ndarray[np.float32_t, ndim=2, mode="c"] D2 = \ - np.zeros([dims[0],dims[1]], dtype='float32') - - copyIm(&inputData[0,0], &B[0,0], dims[0], dims[1], 1); - #/* start TV iterations */ - cdef int i = 0; - for i in range(iterations): - - #/* calculate differences */ - D1_func(&B[0,0], &D1[0,0], dims[0], dims[1], 1); - D2_func(&B[0,0], &D2[0,0], dims[0], dims[1], 1); - - #/* calculate divergence and image update*/ - TV_main(&D1[0,0], &D2[0,0], &D2[0,0], &B[0,0], &A[0,0], - regularization_parameter, marching_step_parameter, - dims[0], dims[1], 1) + + #/* Run ROF iterations for 2D data */ + TV_ROF(&A[0,0], &B[0,0], dims[0], dims[1], 1, iterations, marching_step_parameter, regularization_parameter) + return B @@ -78,26 +60,9 @@ def ROF_TV_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, cdef np.ndarray[np.float32_t, ndim=3, mode="c"] B = \ np.zeros([dims[0],dims[1],dims[2]], dtype='float32') - cdef np.ndarray[np.float32_t, ndim=3, mode="c"] D1 = \ - np.zeros([dims[0],dims[1],dims[2]], dtype='float32') - cdef np.ndarray[np.float32_t, ndim=3, mode="c"] D2 = \ - np.zeros([dims[0],dims[1],dims[2]], dtype='float32') - cdef np.ndarray[np.float32_t, ndim=3, mode="c"] D3 = \ - np.zeros([dims[0],dims[1],dims[2]], dtype='float32') - - copyIm(&inputData[0,0,0], &B[0,0,0], dims[0], dims[1], dims[2]); - #/* start TV iterations */ - cdef int i = 0; - for i in range(iterations): - - #/* calculate differences */ - D1_func(&B[0,0,0], &D1[0,0,0], dims[0], dims[1], dims[2]); - D2_func(&B[0,0,0], &D2[0,0,0], dims[0], dims[1], dims[2]); - D3_func(&B[0,0,0], &D3[0,0,0], dims[0], dims[1], dims[2]); - - #/* calculate divergence and image update*/ - TV_main(&D1[0,0,0], &D2[0,0,0], &D3[0,0,0], &B[0,0,0], &A[0,0,0], - regularization_parameter, marching_step_parameter, - dims[0], dims[1], dims[2]) + + #/* Run ROF iterations for 3D data */ + TV_ROF(&A[0,0, 0], &B[0,0, 0], dims[0], dims[1], dims[2], iterations, marching_step_parameter, regularization_parameter) + return B -
\ No newline at end of file + diff --git a/Wrappers/Python/src/gpu_regularizers.pyx b/Wrappers/Python/src/gpu_regularizers.pyx index 84fc30a..649015e 100644 --- a/Wrappers/Python/src/gpu_regularizers.pyx +++ b/Wrappers/Python/src/gpu_regularizers.pyx @@ -25,6 +25,7 @@ cdef extern void NLM_GPU_kernel(float *A, float* B, float *Eucl_Vec, int N, int M, int Z, int dimension, int SearchW, int SimilW, int SearchW_real, float denh2, float lambdaf); +cdef extern void TV_ROF_GPU(float* A, float* B, int N, int M, int Z, int iter, float tau, float lambdaf); cdef extern float pad_crop(float *A, float *Ap, int OldSizeX, int OldSizeY, int OldSizeZ, int NewSizeX, int NewSizeY, int NewSizeZ, @@ -65,6 +66,22 @@ def NML(inputData, SimilW, h, lambdaf) + +def ROF_TV(inputData, + iterations, + time_marching_parameter, + regularization_parameter): + if inputData.ndim == 2: + return ROFTV2D(inputData, + iterations, + time_marching_parameter, + regularization_parameter) + elif inputData.ndim == 3: + return ROFTV3D(inputData, + iterations, + time_marching_parameter, + regularization_parameter) + def Diff4thHajiaboli2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, float edge_preserv_parameter, @@ -311,4 +328,49 @@ def NML3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, padXY, switchpad_crop) + return B + +def ROFTV2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, + int iterations, + float time_marching_parameter, + float regularization_parameter): + + cdef long dims[2] + dims[0] = inputData.shape[0] + dims[1] = inputData.shape[1] + + cdef np.ndarray[np.float32_t, ndim=2, mode="c"] B = \ + np.zeros([dims[0],dims[1]], dtype='float32') + + # Running CUDA code here + TV_ROF_GPU( + &inputData[0,0], &B[0,0], + dims[0], dims[1], 0, + iterations , + time_marching_parameter, + regularization_parameter); + + return B + +def ROFTV3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, + int iterations, + float time_marching_parameter, + float regularization_parameter): + + cdef long dims[3] + dims[0] = inputData.shape[0] + dims[1] = inputData.shape[1] + dims[2] = inputData.shape[2] + + cdef np.ndarray[np.float32_t, ndim=3, mode="c"] B = \ + np.zeros([dims[0],dims[1],dims[2]], dtype='float32') + + # Running CUDA code here + TV_ROF_GPU( + &inputData[0,0,0], &B[0,0,0], + dims[0], dims[1], dims[2], + iterations , + time_marching_parameter, + regularization_parameter); + return B |