summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Core/CMakeLists.txt4
-rw-r--r--Core/regularizers_CPU/FGP_TV_core.c470
-rw-r--r--Core/regularizers_CPU/FGP_TV_core.h57
-rw-r--r--Core/regularizers_CPU/ROF_TV_core.c35
-rw-r--r--Core/regularizers_CPU/ROF_TV_core.h6
-rw-r--r--Core/regularizers_CPU/utils.c13
-rwxr-xr-xCore/regularizers_GPU/TV_FGP/TV_FGP_GPU_core.cu561
-rwxr-xr-xCore/regularizers_GPU/TV_FGP/TV_FGP_GPU_core.h10
-rwxr-xr-xCore/regularizers_GPU/TV_ROF/TV_ROF_GPU.h7
-rwxr-xr-xCore/regularizers_GPU/TV_ROF/TV_ROF_GPU_core.cu (renamed from Core/regularizers_GPU/TV_ROF/TV_ROF_GPU.cu)34
-rwxr-xr-xCore/regularizers_GPU/TV_ROF/TV_ROF_GPU_core.h8
-rw-r--r--Wrappers/Matlab/mex_compile/regularizers_CPU/FGP_TV.c142
-rw-r--r--Wrappers/Python/ccpi/filters/regularizers.py44
-rw-r--r--Wrappers/Python/demo/test_cpu_regularizers.py55
-rw-r--r--Wrappers/Python/setup-regularizers.py.in6
-rw-r--r--Wrappers/Python/src/cpu_regularizers.cpp291
-rw-r--r--Wrappers/Python/src/cpu_regularizers.pyx106
-rw-r--r--Wrappers/Python/src/gpu_regularizers.pyx151
-rw-r--r--Wrappers/Python/test/test_cpu_vs_gpu_regularizers.py219
-rw-r--r--Wrappers/Python/test/test_gpu_regularizers.py113
20 files changed, 1509 insertions, 823 deletions
diff --git a/Core/CMakeLists.txt b/Core/CMakeLists.txt
index 3202581..4e85002 100644
--- a/Core/CMakeLists.txt
+++ b/Core/CMakeLists.txt
@@ -88,7 +88,7 @@ add_library(cilreg SHARED
${CMAKE_CURRENT_SOURCE_DIR}/regularizers_CPU/PatchBased_Regul_core.c
${CMAKE_CURRENT_SOURCE_DIR}/regularizers_CPU/SplitBregman_TV_core.c
${CMAKE_CURRENT_SOURCE_DIR}/regularizers_CPU/TGV_PD_core.c
- ${CMAKE_CURRENT_SOURCE_DIR}/regularizers_CPU/ROF_TV_core.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/regularizers_CPU/ROF_TV_core.c
${CMAKE_CURRENT_SOURCE_DIR}/regularizers_CPU/utils.c
)
target_link_libraries(cilreg ${EXTRA_LIBRARIES} )
@@ -129,6 +129,8 @@ if (CUDA_FOUND)
CUDA_ADD_LIBRARY(cilregcuda SHARED
${CMAKE_CURRENT_SOURCE_DIR}/regularizers_GPU/Diffus_HO/Diff4th_GPU_kernel.cu
${CMAKE_CURRENT_SOURCE_DIR}/regularizers_GPU/NL_Regul/NLM_GPU_kernel.cu
+ ${CMAKE_CURRENT_SOURCE_DIR}/regularizers_GPU/TV_ROF/TV_ROF_GPU_core.cu
+ ${CMAKE_CURRENT_SOURCE_DIR}/regularizers_GPU/TV_FGP/TV_FGP_GPU_core.cu
)
if (UNIX)
message ("I'd install into ${CMAKE_INSTALL_PREFIX}/lib")
diff --git a/Core/regularizers_CPU/FGP_TV_core.c b/Core/regularizers_CPU/FGP_TV_core.c
index 03cd445..7a8ce48 100644
--- a/Core/regularizers_CPU/FGP_TV_core.c
+++ b/Core/regularizers_CPU/FGP_TV_core.c
@@ -22,245 +22,299 @@ limitations under the License.
/* C-OMP implementation of FGP-TV [1] denoising/regularization model (2D/3D case)
*
* Input Parameters:
- * 1. Noisy image/volume [REQUIRED]
- * 2. lambda - regularization parameter [REQUIRED]
- * 3. Number of iterations [OPTIONAL parameter]
- * 4. eplsilon: tolerance constant [OPTIONAL parameter]
- * 5. TV-type: 'iso' or 'l1' [OPTIONAL parameter]
+ * 1. Noisy image/volume
+ * 2. lambdaPar - regularization parameter
+ * 3. Number of iterations
+ * 4. eplsilon: tolerance constant
+ * 5. TV-type: methodTV - 'iso' (0) or 'l1' (1)
+ * 6. nonneg: 'nonnegativity (0 is OFF by default)
+ * 7. print information: 0 (off) or 1 (on)
*
* Output:
* [1] Filtered/regularized image
- * [2] last function value
- *
- * Example of image denoising:
- * figure;
- * Im = double(imread('lena_gray_256.tif'))/255; % loading image
- * u0 = Im + .05*randn(size(Im)); % adding noise
- * u = FGP_TV(single(u0), 0.05, 100, 1e-04);
*
* This function is based on the Matlab's code and paper by
* [1] Amir Beck and Marc Teboulle, "Fast Gradient-Based Algorithms for Constrained Total Variation Image Denoising and Deblurring Problems"
- *
- * D. Kazantsev, 2016-17
- *
*/
-
-/* 2D-case related Functions */
-/*****************************************************************/
-float Obj_func_CALC2D(float *A, float *D, float *funcvalA, float lambda, int dimX, int dimY)
-{
- int i,j;
- float f1, f2, val1, val2;
-
- /*data-related term */
- f1 = 0.0f;
- for(i=0; i<dimX*dimY; i++) f1 += pow(D[i] - A[i],2);
-
- /*TV-related term */
- f2 = 0.0f;
- for(i=0; i<dimX; i++) {
- for(j=0; j<dimY; j++) {
- /* boundary conditions */
- if (i == dimX-1) {val1 = 0.0f;} else {val1 = A[(i+1)*dimY + (j)] - A[(i)*dimY + (j)];}
- if (j == dimY-1) {val2 = 0.0f;} else {val2 = A[(i)*dimY + (j+1)] - A[(i)*dimY + (j)];}
- f2 += sqrt(pow(val1,2) + pow(val2,2));
- }}
-
- /* sum of two terms */
- funcvalA[0] = 0.5f*f1 + lambda*f2;
- return *funcvalA;
+
+float TV_FGP_CPU_main(float *Input, float *Output, float lambdaPar, int iterationsNumb, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ)
+{
+ int ll, j, DimTotal;
+ float re, re1;
+ float tk = 1.0f;
+ float tkp1=1.0f;
+ int count = 0;
+
+ if (dimZ <= 1) {
+ /*2D case */
+ float *Output_prev=NULL, *P1=NULL, *P2=NULL, *P1_prev=NULL, *P2_prev=NULL, *R1=NULL, *R2=NULL;
+ DimTotal = dimX*dimY;
+
+ Output_prev = (float *) malloc( DimTotal * sizeof(float) );
+ P1 = (float *) malloc( DimTotal * sizeof(float) );
+ P2 = (float *) malloc( DimTotal * sizeof(float) );
+ P1_prev = (float *) malloc( DimTotal * sizeof(float) );
+ P2_prev = (float *) malloc( DimTotal * sizeof(float) );
+ R1 = (float *) malloc( DimTotal * sizeof(float) );
+ R2 = (float *) malloc( DimTotal * sizeof(float) );
+
+ /* begin iterations */
+ for(ll=0; ll<iterationsNumb; ll++) {
+
+ /* computing the gradient of the objective function */
+ Obj_func2D(Input, Output, R1, R2, lambdaPar, dimX, dimY);
+
+ /* apply nonnegativity */
+ if (nonneg == 1) for(j=0; j<DimTotal; j++) {if (Output[j] < 0.0f) Output[j] = 0.0f;}
+
+ /*Taking a step towards minus of the gradient*/
+ Grad_func2D(P1, P2, Output, R1, R2, lambdaPar, dimX, dimY);
+
+ /* projection step */
+ Proj_func2D(P1, P2, methodTV, DimTotal);
+
+ /*updating R and t*/
+ tkp1 = (1.0f + sqrt(1.0f + 4.0f*tk*tk))*0.5f;
+ Rupd_func2D(P1, P1_prev, P2, P2_prev, R1, R2, tkp1, tk, DimTotal);
+
+ /* check early stopping criteria */
+ re = 0.0f; re1 = 0.0f;
+ for(j=0; j<DimTotal; j++)
+ {
+ re += pow(Output[j] - Output_prev[j],2);
+ re1 += pow(Output[j],2);
+ }
+ re = sqrt(re)/sqrt(re1);
+ if (re < epsil) count++;
+ if (count > 4) break;
+
+ /*storing old values*/
+ copyIm(Output, Output_prev, dimX, dimY, 1);
+ copyIm(P1, P1_prev, dimX, dimY, 1);
+ copyIm(P2, P2_prev, dimX, dimY, 1);
+ tk = tkp1;
+ }
+ if (printM == 1) printf("FGP-TV iterations stopped at iteration %i \n", ll);
+ free(Output_prev); free(P1); free(P2); free(P1_prev); free(P2_prev); free(R1); free(R2);
+ }
+ else {
+ /*3D case*/
+ float *Output_prev=NULL, *P1=NULL, *P2=NULL, *P3=NULL, *P1_prev=NULL, *P2_prev=NULL, *P3_prev=NULL, *R1=NULL, *R2=NULL, *R3=NULL;
+ DimTotal = dimX*dimY*dimZ;
+
+ Output_prev = (float *) malloc( DimTotal * sizeof(float) );
+ P1 = (float *) malloc( DimTotal * sizeof(float) );
+ P2 = (float *) malloc( DimTotal * sizeof(float) );
+ P3 = (float *) malloc( DimTotal * sizeof(float) );
+ P1_prev = (float *) malloc( DimTotal * sizeof(float) );
+ P2_prev = (float *) malloc( DimTotal * sizeof(float) );
+ P3_prev = (float *) malloc( DimTotal * sizeof(float) );
+ R1 = (float *) malloc( DimTotal * sizeof(float) );
+ R2 = (float *) malloc( DimTotal * sizeof(float) );
+ R3 = (float *) malloc( DimTotal * sizeof(float) );
+
+ /* begin iterations */
+ for(ll=0; ll<iterationsNumb; ll++) {
+
+ /* computing the gradient of the objective function */
+ Obj_func3D(Input, Output, R1, R2, R3, lambdaPar, dimX, dimY, dimZ);
+
+ /* apply nonnegativity */
+ if (nonneg == 1) for(j=0; j<DimTotal; j++) {if (Output[j] < 0.0f) Output[j] = 0.0f;}
+
+ /*Taking a step towards minus of the gradient*/
+ Grad_func3D(P1, P2, P3, Output, R1, R2, R3, lambdaPar, dimX, dimY, dimZ);
+
+ /* projection step */
+ Proj_func3D(P1, P2, P3, methodTV, DimTotal);
+
+ /*updating R and t*/
+ tkp1 = (1.0f + sqrt(1.0f + 4.0f*tk*tk))*0.5f;
+ Rupd_func3D(P1, P1_prev, P2, P2_prev, P3, P3_prev, R1, R2, R3, tkp1, tk, DimTotal);
+
+ /* calculate norm - stopping rules*/
+ re = 0.0f; re1 = 0.0f;
+ for(j=0; j<DimTotal; j++)
+ {
+ re += pow(Output[j] - Output_prev[j],2);
+ re1 += pow(Output[j],2);
+ }
+ re = sqrt(re)/sqrt(re1);
+ /* stop if the norm residual is less than the tolerance EPS */
+ if (re < epsil) count++;
+ if (count > 4) break;
+
+ /*storing old values*/
+ copyIm(Output, Output_prev, dimX, dimY, dimZ);
+ copyIm(P1, P1_prev, dimX, dimY, dimZ);
+ copyIm(P2, P2_prev, dimX, dimY, dimZ);
+ copyIm(P3, P3_prev, dimX, dimY, dimZ);
+ tk = tkp1;
+ }
+ if (printM == 1) printf("FGP-TV iterations stopped at iteration %i \n", ll);
+ free(Output_prev); free(P1); free(P2); free(P3); free(P1_prev); free(P2_prev); free(P3_prev); free(R1); free(R2); free(R3);
+ }
+ return *Output;
}
float Obj_func2D(float *A, float *D, float *R1, float *R2, float lambda, int dimX, int dimY)
{
- float val1, val2;
- int i, j;
-#pragma omp parallel for shared(A,D,R1,R2) private(i,j,val1,val2)
- for (i = 0; i<dimX; i++) {
- for (j = 0; j<dimY; j++) {
- /* boundary conditions */
- if (i == 0) { val1 = 0.0f; }
- else { val1 = R1[(i - 1)*dimY + (j)]; }
- if (j == 0) { val2 = 0.0f; }
- else { val2 = R2[(i)*dimY + (j - 1)]; }
- D[(i)*dimY + (j)] = A[(i)*dimY + (j)] - lambda*(R1[(i)*dimY + (j)] + R2[(i)*dimY + (j)] - val1 - val2);
- }
- }
- return *D;
+ float val1, val2;
+ int i,j,index;
+#pragma omp parallel for shared(A,D,R1,R2) private(index,i,j,val1,val2)
+ for(i=0; i<dimX; i++) {
+ for(j=0; j<dimY; j++) {
+ index = j*dimX+i;
+ /* boundary conditions */
+ if (i == 0) {val1 = 0.0f;} else {val1 = R1[j*dimX + (i-1)];}
+ if (j == 0) {val2 = 0.0f;} else {val2 = R2[(j-1)*dimX + i];}
+ D[index] = A[index] - lambda*(R1[index] + R2[index] - val1 - val2);
+ }}
+ return *D;
}
float Grad_func2D(float *P1, float *P2, float *D, float *R1, float *R2, float lambda, int dimX, int dimY)
{
- float val1, val2, multip;
- int i, j;
- multip = (1.0f / (8.0f*lambda));
-#pragma omp parallel for shared(P1,P2,D,R1,R2,multip) private(i,j,val1,val2)
- for (i = 0; i<dimX; i++) {
- for (j = 0; j<dimY; j++) {
- /* boundary conditions */
- if (i == dimX - 1) val1 = 0.0f; else val1 = D[(i)*dimY + (j)] - D[(i + 1)*dimY + (j)];
- if (j == dimY - 1) val2 = 0.0f; else val2 = D[(i)*dimY + (j)] - D[(i)*dimY + (j + 1)];
- P1[(i)*dimY + (j)] = R1[(i)*dimY + (j)] + multip*val1;
- P2[(i)*dimY + (j)] = R2[(i)*dimY + (j)] + multip*val2;
- }
- }
- return 1;
+ float val1, val2, multip;
+ int i,j,index;
+ multip = (1.0f/(8.0f*lambda));
+#pragma omp parallel for shared(P1,P2,D,R1,R2,multip) private(index,i,j,val1,val2)
+ for(i=0; i<dimX; i++) {
+ for(j=0; j<dimY; j++) {
+ index = j*dimX+i;
+ /* boundary conditions */
+ if (i == dimX-1) val1 = 0.0f; else val1 = D[index] - D[j*dimX + (i+1)];
+ if (j == dimY-1) val2 = 0.0f; else val2 = D[index] - D[(j+1)*dimX + i];
+ P1[index] = R1[index] + multip*val1;
+ P2[index] = R2[index] + multip*val2;
+ }}
+ return 1;
}
-float Proj_func2D(float *P1, float *P2, int methTV, int dimX, int dimY)
+float Proj_func2D(float *P1, float *P2, int methTV, int DimTotal)
{
- float val1, val2, denom;
- int i, j;
- if (methTV == 0) {
- /* isotropic TV*/
-#pragma omp parallel for shared(P1,P2) private(i,j,denom)
- for (i = 0; i<dimX; i++) {
- for (j = 0; j<dimY; j++) {
- denom = pow(P1[(i)*dimY + (j)], 2) + pow(P2[(i)*dimY + (j)], 2);
- if (denom > 1) {
- P1[(i)*dimY + (j)] = P1[(i)*dimY + (j)] / sqrt(denom);
- P2[(i)*dimY + (j)] = P2[(i)*dimY + (j)] / sqrt(denom);
- }
- }
- }
- }
- else {
- /* anisotropic TV*/
-#pragma omp parallel for shared(P1,P2) private(i,j,val1,val2)
- for (i = 0; i<dimX; i++) {
- for (j = 0; j<dimY; j++) {
- val1 = fabs(P1[(i)*dimY + (j)]);
- val2 = fabs(P2[(i)*dimY + (j)]);
- if (val1 < 1.0f) { val1 = 1.0f; }
- if (val2 < 1.0f) { val2 = 1.0f; }
- P1[(i)*dimY + (j)] = P1[(i)*dimY + (j)] / val1;
- P2[(i)*dimY + (j)] = P2[(i)*dimY + (j)] / val2;
- }
- }
- }
- return 1;
+ float val1, val2, denom, sq_denom;
+ int i;
+ if (methTV == 0) {
+ /* isotropic TV*/
+#pragma omp parallel for shared(P1,P2) private(i,denom,sq_denom)
+ for(i=0; i<DimTotal; i++) {
+ denom = powf(P1[i],2) + powf(P2[i],2);
+ if (denom > 1.0f) {
+ sq_denom = 1.0f/sqrtf(denom);
+ P1[i] = P1[i]*sq_denom;
+ P2[i] = P2[i]*sq_denom;
+ }
+ }
+ }
+ else {
+ /* anisotropic TV*/
+#pragma omp parallel for shared(P1,P2) private(i,val1,val2)
+ for(i=0; i<DimTotal; i++) {
+ val1 = fabs(P1[i]);
+ val2 = fabs(P2[i]);
+ if (val1 < 1.0f) {val1 = 1.0f;}
+ if (val2 < 1.0f) {val2 = 1.0f;}
+ P1[i] = P1[i]/val1;
+ P2[i] = P2[i]/val2;
+ }
+ }
+ return 1;
}
-float Rupd_func2D(float *P1, float *P1_old, float *P2, float *P2_old, float *R1, float *R2, float tkp1, float tk, int dimX, int dimY)
+float Rupd_func2D(float *P1, float *P1_old, float *P2, float *P2_old, float *R1, float *R2, float tkp1, float tk, int DimTotal)
{
- int i, j;
- float multip;
- multip = ((tk - 1.0f) / tkp1);
-#pragma omp parallel for shared(P1,P2,P1_old,P2_old,R1,R2,multip) private(i,j)
- for (i = 0; i<dimX; i++) {
- for (j = 0; j<dimY; j++) {
- R1[(i)*dimY + (j)] = P1[(i)*dimY + (j)] + multip*(P1[(i)*dimY + (j)] - P1_old[(i)*dimY + (j)]);
- R2[(i)*dimY + (j)] = P2[(i)*dimY + (j)] + multip*(P2[(i)*dimY + (j)] - P2_old[(i)*dimY + (j)]);
- }
- }
- return 1;
+ int i;
+ float multip;
+ multip = ((tk-1.0f)/tkp1);
+#pragma omp parallel for shared(P1,P2,P1_old,P2_old,R1,R2,multip) private(i)
+ for(i=0; i<DimTotal; i++) {
+ R1[i] = P1[i] + multip*(P1[i] - P1_old[i]);
+ R2[i] = P2[i] + multip*(P2[i] - P2_old[i]);
+ }
+ return 1;
}
/* 3D-case related Functions */
/*****************************************************************/
-float Obj_func_CALC3D(float *A, float *D, float *funcvalA, float lambda, int dimX, int dimY, int dimZ)
-{
- int i,j,k;
- float f1, f2, val1, val2, val3;
-
- /*data-related term */
- f1 = 0.0f;
- for(i=0; i<dimX*dimY*dimZ; i++) f1 += pow(D[i] - A[i],2);
-
- /*TV-related term */
- f2 = 0.0f;
+float Obj_func3D(float *A, float *D, float *R1, float *R2, float *R3, float lambda, int dimX, int dimY, int dimZ)
+{
+ float val1, val2, val3;
+ int i,j,k,index;
+#pragma omp parallel for shared(A,D,R1,R2,R3) private(index,i,j,k,val1,val2,val3)
for(i=0; i<dimX; i++) {
for(j=0; j<dimY; j++) {
for(k=0; k<dimZ; k++) {
- /* boundary conditions */
- if (i == dimX-1) {val1 = 0.0f;} else {val1 = A[(dimX*dimY)*k + (i+1)*dimY + (j)] - A[(dimX*dimY)*k + (i)*dimY + (j)];}
- if (j == dimY-1) {val2 = 0.0f;} else {val2 = A[(dimX*dimY)*k + (i)*dimY + (j+1)] - A[(dimX*dimY)*k + (i)*dimY + (j)];}
- if (k == dimZ-1) {val3 = 0.0f;} else {val3 = A[(dimX*dimY)*(k+1) + (i)*dimY + (j)] - A[(dimX*dimY)*k + (i)*dimY + (j)];}
- f2 += sqrt(pow(val1,2) + pow(val2,2) + pow(val3,2));
- }}}
- /* sum of two terms */
- funcvalA[0] = 0.5f*f1 + lambda*f2;
- return *funcvalA;
-}
-
-float Obj_func3D(float *A, float *D, float *R1, float *R2, float *R3, float lambda, int dimX, int dimY, int dimZ)
-{
- float val1, val2, val3;
- int i, j, k;
-#pragma omp parallel for shared(A,D,R1,R2,R3) private(i,j,k,val1,val2,val3)
- for (i = 0; i<dimX; i++) {
- for (j = 0; j<dimY; j++) {
- for (k = 0; k<dimZ; k++) {
- /* boundary conditions */
- if (i == 0) { val1 = 0.0f; }
- else { val1 = R1[(dimX*dimY)*k + (i - 1)*dimY + (j)]; }
- if (j == 0) { val2 = 0.0f; }
- else { val2 = R2[(dimX*dimY)*k + (i)*dimY + (j - 1)]; }
- if (k == 0) { val3 = 0.0f; }
- else { val3 = R3[(dimX*dimY)*(k - 1) + (i)*dimY + (j)]; }
- D[(dimX*dimY)*k + (i)*dimY + (j)] = A[(dimX*dimY)*k + (i)*dimY + (j)] - lambda*(R1[(dimX*dimY)*k + (i)*dimY + (j)] + R2[(dimX*dimY)*k + (i)*dimY + (j)] + R3[(dimX*dimY)*k + (i)*dimY + (j)] - val1 - val2 - val3);
- }
- }
- }
- return *D;
+ index = (dimX*dimY)*k + j*dimX+i;
+ /* boundary conditions */
+ if (i == 0) {val1 = 0.0f;} else {val1 = R1[(dimX*dimY)*k + j*dimX + (i-1)];}
+ if (j == 0) {val2 = 0.0f;} else {val2 = R2[(dimX*dimY)*k + (j-1)*dimX + i];}
+ if (k == 0) {val3 = 0.0f;} else {val3 = R3[(dimX*dimY)*(k-1) + j*dimX + i];}
+ D[index] = A[index] - lambda*(R1[index] + R2[index] + R3[index] - val1 - val2 - val3);
+ }}}
+ return *D;
}
float Grad_func3D(float *P1, float *P2, float *P3, float *D, float *R1, float *R2, float *R3, float lambda, int dimX, int dimY, int dimZ)
{
- float val1, val2, val3, multip;
- int i, j, k;
- multip = (1.0f / (8.0f*lambda));
-#pragma omp parallel for shared(P1,P2,P3,D,R1,R2,R3,multip) private(i,j,k,val1,val2,val3)
- for (i = 0; i<dimX; i++) {
- for (j = 0; j<dimY; j++) {
- for (k = 0; k<dimZ; k++) {
- /* boundary conditions */
- if (i == dimX - 1) val1 = 0.0f; else val1 = D[(dimX*dimY)*k + (i)*dimY + (j)] - D[(dimX*dimY)*k + (i + 1)*dimY + (j)];
- if (j == dimY - 1) val2 = 0.0f; else val2 = D[(dimX*dimY)*k + (i)*dimY + (j)] - D[(dimX*dimY)*k + (i)*dimY + (j + 1)];
- if (k == dimZ - 1) val3 = 0.0f; else val3 = D[(dimX*dimY)*k + (i)*dimY + (j)] - D[(dimX*dimY)*(k + 1) + (i)*dimY + (j)];
- P1[(dimX*dimY)*k + (i)*dimY + (j)] = R1[(dimX*dimY)*k + (i)*dimY + (j)] + multip*val1;
- P2[(dimX*dimY)*k + (i)*dimY + (j)] = R2[(dimX*dimY)*k + (i)*dimY + (j)] + multip*val2;
- P3[(dimX*dimY)*k + (i)*dimY + (j)] = R3[(dimX*dimY)*k + (i)*dimY + (j)] + multip*val3;
- }
- }
- }
- return 1;
+ float val1, val2, val3, multip;
+ int i,j,k, index;
+ multip = (1.0f/(8.0f*lambda));
+#pragma omp parallel for shared(P1,P2,P3,D,R1,R2,R3,multip) private(index,i,j,k,val1,val2,val3)
+ for(i=0; i<dimX; i++) {
+ for(j=0; j<dimY; j++) {
+ for(k=0; k<dimZ; k++) {
+ index = (dimX*dimY)*k + j*dimX+i;
+ /* boundary conditions */
+ if (i == dimX-1) val1 = 0.0f; else val1 = D[index] - D[(dimX*dimY)*k + j*dimX + (i+1)];
+ if (j == dimY-1) val2 = 0.0f; else val2 = D[index] - D[(dimX*dimY)*k + (j+1)*dimX + i];
+ if (k == dimZ-1) val3 = 0.0f; else val3 = D[index] - D[(dimX*dimY)*(k+1) + j*dimX + i];
+ P1[index] = R1[index] + multip*val1;
+ P2[index] = R2[index] + multip*val2;
+ P3[index] = R3[index] + multip*val3;
+ }}}
+ return 1;
}
-float Proj_func3D(float *P1, float *P2, float *P3, int dimX, int dimY, int dimZ)
-{
- float val1, val2, val3;
- int i, j, k;
-#pragma omp parallel for shared(P1,P2,P3) private(i,j,k,val1,val2,val3)
- for (i = 0; i<dimX; i++) {
- for (j = 0; j<dimY; j++) {
- for (k = 0; k<dimZ; k++) {
- val1 = fabs(P1[(dimX*dimY)*k + (i)*dimY + (j)]);
- val2 = fabs(P2[(dimX*dimY)*k + (i)*dimY + (j)]);
- val3 = fabs(P3[(dimX*dimY)*k + (i)*dimY + (j)]);
- if (val1 < 1.0f) { val1 = 1.0f; }
- if (val2 < 1.0f) { val2 = 1.0f; }
- if (val3 < 1.0f) { val3 = 1.0f; }
-
- P1[(dimX*dimY)*k + (i)*dimY + (j)] = P1[(dimX*dimY)*k + (i)*dimY + (j)] / val1;
- P2[(dimX*dimY)*k + (i)*dimY + (j)] = P2[(dimX*dimY)*k + (i)*dimY + (j)] / val2;
- P3[(dimX*dimY)*k + (i)*dimY + (j)] = P3[(dimX*dimY)*k + (i)*dimY + (j)] / val3;
+float Proj_func3D(float *P1, float *P2, float *P3, int methTV, int DimTotal)
+{
+ float val1, val2, val3, denom, sq_denom;
+ int i;
+ if (methTV == 0) {
+ /* isotropic TV*/
+ #pragma omp parallel for shared(P1,P2,P3) private(i,val1,val2,val3,sq_denom)
+ for(i=0; i<DimTotal; i++) {
+ denom = powf(P1[i],2) + powf(P2[i],2) + powf(P3[i],2);
+ if (denom > 1.0f) {
+ sq_denom = 1.0f/sqrtf(denom);
+ P1[i] = P1[i]*sq_denom;
+ P2[i] = P2[i]*sq_denom;
+ P3[i] = P3[i]*sq_denom;
+ }
}
+ }
+ else {
+ /* anisotropic TV*/
+#pragma omp parallel for shared(P1,P2,P3) private(i,val1,val2,val3)
+ for(i=0; i<DimTotal; i++) {
+ val1 = fabs(P1[i]);
+ val2 = fabs(P2[i]);
+ val3 = fabs(P3[i]);
+ if (val1 < 1.0f) {val1 = 1.0f;}
+ if (val2 < 1.0f) {val2 = 1.0f;}
+ if (val3 < 1.0f) {val3 = 1.0f;}
+ P1[i] = P1[i]/val1;
+ P2[i] = P2[i]/val2;
+ P3[i] = P3[i]/val3;
+ }
}
- }
- return 1;
+ return 1;
}
-float Rupd_func3D(float *P1, float *P1_old, float *P2, float *P2_old, float *P3, float *P3_old, float *R1, float *R2, float *R3, float tkp1, float tk, int dimX, int dimY, int dimZ)
+float Rupd_func3D(float *P1, float *P1_old, float *P2, float *P2_old, float *P3, float *P3_old, float *R1, float *R2, float *R3, float tkp1, float tk, int DimTotal)
{
- int i, j, k;
- float multip;
- multip = ((tk - 1.0f) / tkp1);
-#pragma omp parallel for shared(P1,P2,P3,P1_old,P2_old,P3_old,R1,R2,R3,multip) private(i,j,k)
- for (i = 0; i<dimX; i++) {
- for (j = 0; j<dimY; j++) {
- for (k = 0; k<dimZ; k++) {
- R1[(dimX*dimY)*k + (i)*dimY + (j)] = P1[(dimX*dimY)*k + (i)*dimY + (j)] + multip*(P1[(dimX*dimY)*k + (i)*dimY + (j)] - P1_old[(dimX*dimY)*k + (i)*dimY + (j)]);
- R2[(dimX*dimY)*k + (i)*dimY + (j)] = P2[(dimX*dimY)*k + (i)*dimY + (j)] + multip*(P2[(dimX*dimY)*k + (i)*dimY + (j)] - P2_old[(dimX*dimY)*k + (i)*dimY + (j)]);
- R3[(dimX*dimY)*k + (i)*dimY + (j)] = P3[(dimX*dimY)*k + (i)*dimY + (j)] + multip*(P3[(dimX*dimY)*k + (i)*dimY + (j)] - P3_old[(dimX*dimY)*k + (i)*dimY + (j)]);
- }
- }
- }
- return 1;
+ int i;
+ float multip;
+ multip = ((tk-1.0f)/tkp1);
+#pragma omp parallel for shared(P1,P2,P3,P1_old,P2_old,P3_old,R1,R2,R3,multip) private(i)
+ for(i=0; i<DimTotal; i++) {
+ R1[i] = P1[i] + multip*(P1[i] - P1_old[i]);
+ R2[i] = P2[i] + multip*(P2[i] - P2_old[i]);
+ R3[i] = P3[i] + multip*(P3[i] - P3_old[i]);
+ }
+ return 1;
}
-
-
diff --git a/Core/regularizers_CPU/FGP_TV_core.h b/Core/regularizers_CPU/FGP_TV_core.h
index 10ea224..37e32f7 100644
--- a/Core/regularizers_CPU/FGP_TV_core.h
+++ b/Core/regularizers_CPU/FGP_TV_core.h
@@ -27,46 +27,37 @@ limitations under the License.
#include "CCPiDefines.h"
/* C-OMP implementation of FGP-TV [1] denoising/regularization model (2D/3D case)
-*
-* Input Parameters:
-* 1. Noisy image/volume [REQUIRED]
-* 2. lambda - regularization parameter [REQUIRED]
-* 3. Number of iterations [OPTIONAL parameter]
-* 4. eplsilon: tolerance constant [OPTIONAL parameter]
-* 5. TV-type: 'iso' or 'l1' [OPTIONAL parameter]
-*
-* Output:
-* [1] Filtered/regularized image
-* [2] last function value
-*
-* Example of image denoising:
-* figure;
-* Im = double(imread('lena_gray_256.tif'))/255; % loading image
-* u0 = Im + .05*randn(size(Im)); % adding noise
-* u = FGP_TV(single(u0), 0.05, 100, 1e-04);
-*
-* to compile with OMP support: mex FGP_TV.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
-* This function is based on the Matlab's code and paper by
-* [1] Amir Beck and Marc Teboulle, "Fast Gradient-Based Algorithms for Constrained Total Variation Image Denoising and Deblurring Problems"
-*
-* D. Kazantsev, 2016-17
-*
-*/
+ *
+ * Input Parameters:
+ * 1. Noisy image/volume
+ * 2. lambda - regularization parameter
+ * 3. Number of iterations
+ * 4. eplsilon: tolerance constant
+ * 5. TV-type: methodTV - 'iso' (0) or 'l1' (1)
+ * 6. nonneg: 'nonnegativity (0 is OFF by default)
+ * 7. print information: 0 (off) or 1 (on)
+ *
+ * Output:
+ * [1] Filtered/regularized image
+ *
+ * This function is based on the Matlab's code and paper by
+ * [1] Amir Beck and Marc Teboulle, "Fast Gradient-Based Algorithms for Constrained Total Variation Image Denoising and Deblurring Problems"
+ */
+
#ifdef __cplusplus
extern "C" {
#endif
-//float copyIm(float *A, float *B, int dimX, int dimY, int dimZ);
+CCPI_EXPORT float TV_FGP_CPU_main(float *Input, float *Output, float lambdaPar, int iterationsNumb, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ);
+
CCPI_EXPORT float Obj_func2D(float *A, float *D, float *R1, float *R2, float lambda, int dimX, int dimY);
CCPI_EXPORT float Grad_func2D(float *P1, float *P2, float *D, float *R1, float *R2, float lambda, int dimX, int dimY);
-CCPI_EXPORT float Proj_func2D(float *P1, float *P2, int methTV, int dimX, int dimY);
-CCPI_EXPORT float Rupd_func2D(float *P1, float *P1_old, float *P2, float *P2_old, float *R1, float *R2, float tkp1, float tk, int dimX, int dimY);
-CCPI_EXPORT float Obj_func_CALC2D(float *A, float *D, float *funcvalA, float lambda, int dimX, int dimY);
+CCPI_EXPORT float Proj_func2D(float *P1, float *P2, int methTV, int DimTotal);
+CCPI_EXPORT float Rupd_func2D(float *P1, float *P1_old, float *P2, float *P2_old, float *R1, float *R2, float tkp1, float tk, int DimTotal);
CCPI_EXPORT float Obj_func3D(float *A, float *D, float *R1, float *R2, float *R3, float lambda, int dimX, int dimY, int dimZ);
CCPI_EXPORT float Grad_func3D(float *P1, float *P2, float *P3, float *D, float *R1, float *R2, float *R3, float lambda, int dimX, int dimY, int dimZ);
-CCPI_EXPORT float Proj_func3D(float *P1, float *P2, float *P3, int dimX, int dimY, int dimZ);
-CCPI_EXPORT float Rupd_func3D(float *P1, float *P1_old, float *P2, float *P2_old, float *P3, float *P3_old, float *R1, float *R2, float *R3, float tkp1, float tk, int dimX, int dimY, int dimZ);
-CCPI_EXPORT float Obj_func_CALC3D(float *A, float *D, float *funcvalA, float lambda, int dimX, int dimY, int dimZ);
+CCPI_EXPORT float Proj_func3D(float *P1, float *P2, float *P3, int methTV, int DimTotal);
+CCPI_EXPORT float Rupd_func3D(float *P1, float *P1_old, float *P2, float *P2_old, float *P3, float *P3_old, float *R1, float *R2, float *R3, float tkp1, float tk, int DimTotal);
#ifdef __cplusplus
}
-#endif \ No newline at end of file
+#endif
diff --git a/Core/regularizers_CPU/ROF_TV_core.c b/Core/regularizers_CPU/ROF_TV_core.c
index ba7fe48..a4f82a6 100644
--- a/Core/regularizers_CPU/ROF_TV_core.c
+++ b/Core/regularizers_CPU/ROF_TV_core.c
@@ -19,7 +19,7 @@
#include "ROF_TV_core.h"
-#define EPS 1.0e-4
+#define EPS 1.0e-5
#define MAX(x, y) (((x) > (y)) ? (x) : (y))
#define MIN(x, y) (((x) < (y)) ? (x) : (y))
@@ -46,7 +46,7 @@ int sign(float x) {
*/
/* Running iterations of TV-ROF function */
-float TV_ROF(float *Input, float *Output, int dimX, int dimY, int dimZ, int iterationsNumb, float tau, float lambda)
+float TV_ROF_CPU_main(float *Input, float *Output, float lambdaPar, int iterationsNumb, float tau, int dimX, int dimY, int dimZ)
{
float *D1, *D2, *D3;
int i, DimTotal;
@@ -68,7 +68,7 @@ float TV_ROF(float *Input, float *Output, int dimX, int dimY, int dimZ, int iter
D1_func(Output, D1, dimX, dimY, dimZ);
D2_func(Output, D2, dimX, dimY, dimZ);
if (dimZ > 1) D3_func(Output, D3, dimX, dimY, dimZ);
- TV_kernel(D1, D2, D3, Output, Input, lambda, tau, dimX, dimY, dimZ);
+ TV_kernel(D1, D2, D3, Output, Input, lambdaPar, tau, dimX, dimY, dimZ);
}
free(D1);free(D2); free(D3);
return *Output;
@@ -132,9 +132,9 @@ float D1_func(float *A, float *D1, int dimX, int dimY, int dimZ)
NOMy_0 = A[index] - A[(j)*dimX + i2]; /* y- */
denom1 = NOMx_1*NOMx_1;
- denom2 = 0.5*(sign(NOMy_1) + sign(NOMy_0))*(MIN(fabs(NOMy_1),fabs(NOMy_0)));
+ denom2 = 0.5f*(sign(NOMy_1) + sign(NOMy_0))*(MIN(fabs(NOMy_1),fabs(NOMy_0)));
denom2 = denom2*denom2;
- T1 = sqrt(denom1 + denom2 + EPS);
+ T1 = sqrtf(denom1 + denom2 + EPS);
D1[index] = NOMx_1/T1;
}}
}
@@ -170,11 +170,11 @@ float D2_func(float *A, float *D2, int dimX, int dimY, int dimZ)
denom1 = NOMy_1*NOMy_1;
- denom2 = 0.5*(sign(NOMx_1) + sign(NOMx_0))*(MIN(fabs(NOMx_1),fabs(NOMx_0)));
+ denom2 = 0.5f*(sign(NOMx_1) + sign(NOMx_0))*(MIN(fabs(NOMx_1),fabs(NOMx_0)));
denom2 = denom2*denom2;
- denom3 = 0.5*(sign(NOMz_1) + sign(NOMz_0))*(MIN(fabs(NOMz_1),fabs(NOMz_0)));
+ denom3 = 0.5f*(sign(NOMz_1) + sign(NOMz_0))*(MIN(fabs(NOMz_1),fabs(NOMz_0)));
denom3 = denom3*denom3;
- T2 = sqrt(denom1 + denom2 + denom3 + EPS);
+ T2 = sqrtf(denom1 + denom2 + denom3 + EPS);
D2[index] = NOMy_1/T2;
}}}
}
@@ -196,9 +196,9 @@ float D2_func(float *A, float *D2, int dimX, int dimY, int dimZ)
/*NOMy_0 = A[(i)*dimY + j] - A[(i)*dimY + j2]; */ /* y- */
denom1 = NOMy_1*NOMy_1;
- denom2 = 0.5*(sign(NOMx_1) + sign(NOMx_0))*(MIN(fabs(NOMx_1),fabs(NOMx_0)));
+ denom2 = 0.5f*(sign(NOMx_1) + sign(NOMx_0))*(MIN(fabs(NOMx_1),fabs(NOMx_0)));
denom2 = denom2*denom2;
- T2 = sqrt(denom1 + denom2 + EPS);
+ T2 = sqrtf(denom1 + denom2 + EPS);
D2[index] = NOMy_1/T2;
}}
}
@@ -233,11 +233,11 @@ float D3_func(float *A, float *D3, int dimY, int dimX, int dimZ)
/*NOMz_0 = A[(dimX*dimY)*k + (i)*dimY + j] - A[(dimX*dimY)*k2 + (i)*dimY + j]; */ /* z- */
denom1 = NOMz_1*NOMz_1;
- denom2 = 0.5*(sign(NOMx_1) + sign(NOMx_0))*(MIN(fabs(NOMx_1),fabs(NOMx_0)));
+ denom2 = 0.5f*(sign(NOMx_1) + sign(NOMx_0))*(MIN(fabs(NOMx_1),fabs(NOMx_0)));
denom2 = denom2*denom2;
- denom3 = 0.5*(sign(NOMy_1) + sign(NOMy_0))*(MIN(fabs(NOMy_1),fabs(NOMy_0)));
+ denom3 = 0.5f*(sign(NOMy_1) + sign(NOMy_0))*(MIN(fabs(NOMy_1),fabs(NOMy_0)));
denom3 = denom3*denom3;
- T3 = sqrt(denom1 + denom2 + denom3 + EPS);
+ T3 = sqrtf(denom1 + denom2 + denom3 + EPS);
D3[index] = NOMz_1/T3;
}}}
return *D3;
@@ -268,7 +268,7 @@ float TV_kernel(float *D1, float *D2, float *D3, float *B, float *A, float lambd
dv2 = D2[index] - D2[(dimX*dimY)*k + j*dimX+i2];
dv3 = D3[index] - D3[(dimX*dimY)*k2 + j*dimX+i];
- B[index] = B[index] + tau*lambda*(dv1 + dv2 + dv3) + tau*(A[index] - B[index]);
+ B[index] = B[index] + tau*(2.0f*lambda*(dv1 + dv2 + dv3) - (B[index] - A[index]));
}}}
}
else {
@@ -284,10 +284,9 @@ float TV_kernel(float *D1, float *D2, float *D3, float *B, float *A, float lambd
/* divergence components */
dv1 = D1[index] - D1[j2*dimX + i];
- dv2 = D2[index] - D2[j*dimX + i2];
-
- //B[index] = B[index] + tau*lambda*(dv1 + dv2) + tau*(A[index] - B[index]);
- B[index] = B[index] + tau*((dv1 + dv2) - lambda*(B[index] - A[index]));
+ dv2 = D2[index] - D2[j*dimX + i2];
+
+ B[index] = B[index] + tau*(2.0f*lambda*(dv1 + dv2) - (B[index] - A[index]));
}}
}
return *B;
diff --git a/Core/regularizers_CPU/ROF_TV_core.h b/Core/regularizers_CPU/ROF_TV_core.h
index 5d69d27..14daf58 100644
--- a/Core/regularizers_CPU/ROF_TV_core.h
+++ b/Core/regularizers_CPU/ROF_TV_core.h
@@ -31,8 +31,8 @@ limitations under the License.
* Input Parameters:
* 1. Noisy image/volume [REQUIRED]
* 2. lambda - regularization parameter [REQUIRED]
- * 3. tau - marching step for explicit scheme, ~1 is recommended [REQUIRED]
- * 4. Number of iterations, for explicit scheme >= 150 is recommended [REQUIRED]
+ * 3. Number of iterations, for explicit scheme >= 150 is recommended [REQUIRED]
+ * 4. tau - marching step for explicit scheme, ~1 is recommended [REQUIRED]
*
* Output:
* [1] Regularized image/volume
@@ -47,7 +47,7 @@ limitations under the License.
extern "C" {
#endif
CCPI_EXPORT float TV_kernel(float *D1, float *D2, float *D3, float *B, float *A, float lambda, float tau, int dimY, int dimX, int dimZ);
-CCPI_EXPORT float TV_ROF(float *Input, float *Output, int dimX, int dimY, int dimZ, int iterationsNumb, float tau, float lambda);
+CCPI_EXPORT float TV_ROF_CPU_main(float *Input, float *Output, float lambdaPar, int iterationsNumb, float tau, int dimX, int dimY, int dimZ);
CCPI_EXPORT float D1_func(float *A, float *D1, int dimY, int dimX, int dimZ);
CCPI_EXPORT float D2_func(float *A, float *D2, int dimY, int dimX, int dimZ);
CCPI_EXPORT float D3_func(float *A, float *D3, int dimY, int dimX, int dimZ);
diff --git a/Core/regularizers_CPU/utils.c b/Core/regularizers_CPU/utils.c
index 951fb91..0c02c45 100644
--- a/Core/regularizers_CPU/utils.c
+++ b/Core/regularizers_CPU/utils.c
@@ -18,6 +18,7 @@ limitations under the License.
*/
#include "utils.h"
+#include <math.h>
/* Copy Image */
float copyIm(float *A, float *U, int dimX, int dimY, int dimZ)
@@ -34,7 +35,7 @@ float copyIm(float *A, float *U, int dimX, int dimY, int dimZ)
float TV_energy2D(float *U, float *U0, float *E_val, float lambda, int dimX, int dimY)
{
int i, j, i1, j1, index;
- float NOMx_2, NOMy_2, E_Grad, E_Data;
+ float NOMx_2, NOMy_2, E_Grad=0.0f, E_Data=0.0f;
/* first calculate \grad U_xy*/
for(j=0; j<dimY; j++) {
@@ -44,11 +45,11 @@ float TV_energy2D(float *U, float *U0, float *E_val, float lambda, int dimX, int
i1 = i + 1; if (i == dimX-1) i1 = i;
j1 = j + 1; if (j == dimY-1) j1 = j;
- /* Forward differences */
- NOMx_2 = pow(U[j1*dimX + i] - U[index],2); /* x+ */
- NOMy_2 = pow(U[j*dimX + i1] - U[index],2); /* y+ */
- E_Grad += sqrt(NOMx_2 + NOMy_2); /* gradient term energy */
- E_Data += 0.5f * lambda*(pow((U[index]-U0[index]),2)); /* fidelity term energy */
+ /* Forward differences */
+ NOMx_2 = powf((float)(U[j1*dimX + i] - U[index]),2); /* x+ */
+ NOMy_2 = powf((float)(U[j*dimX + i1] - U[index]),2); /* y+ */
+ E_Grad += sqrtf((float)(NOMx_2) + (float)(NOMy_2)); /* gradient term energy */
+ E_Data += 0.5f * lambda*(powf((float)(U[index]-U0[index]),2)); /* fidelity term energy */
}
}
E_val[0] = E_Grad + E_Data;
diff --git a/Core/regularizers_GPU/TV_FGP/TV_FGP_GPU_core.cu b/Core/regularizers_GPU/TV_FGP/TV_FGP_GPU_core.cu
new file mode 100755
index 0000000..61097fb
--- /dev/null
+++ b/Core/regularizers_GPU/TV_FGP/TV_FGP_GPU_core.cu
@@ -0,0 +1,561 @@
+ /*
+This work is part of the Core Imaging Library developed by
+Visual Analytics and Imaging System Group of the Science Technology
+Facilities Council, STFC
+
+Copyright 2017 Daniil Kazantsev
+Copyright 2017 Srikanth Nagella, Edoardo Pasca
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#include "TV_FGP_GPU_core.h"
+#include <thrust/device_vector.h>
+#include <thrust/transform_reduce.h>
+
+/* CUDA implementation of FGP-TV [1] denoising/regularization model (2D/3D case)
+ *
+ * Input Parameters:
+ * 1. Noisy image/volume
+ * 2. lambdaPar - regularization parameter
+ * 3. Number of iterations
+ * 4. eplsilon: tolerance constant
+ * 5. TV-type: methodTV - 'iso' (0) or 'l1' (1)
+ * 6. nonneg: 'nonnegativity (0 is OFF by default)
+ * 7. print information: 0 (off) or 1 (on)
+ *
+ * Output:
+ * [1] Filtered/regularized image
+ *
+ * This function is based on the Matlab's code and paper by
+ * [1] Amir Beck and Marc Teboulle, "Fast Gradient-Based Algorithms for Constrained Total Variation Image Denoising and Deblurring Problems"
+ */
+
+// This will output the proper CUDA error strings in the event that a CUDA host call returns an error
+#define checkCudaErrors(err) __checkCudaErrors (err, __FILE__, __LINE__)
+
+inline void __checkCudaErrors(cudaError err, const char *file, const int line)
+{
+ if (cudaSuccess != err)
+ {
+ fprintf(stderr, "%s(%i) : CUDA Runtime API error %d: %s.\n",
+ file, line, (int)err, cudaGetErrorString(err));
+ exit(EXIT_FAILURE);
+ }
+}
+
+#define BLKXSIZE2D 16
+#define BLKYSIZE2D 16
+
+#define BLKXSIZE 8
+#define BLKYSIZE 8
+#define BLKZSIZE 8
+
+#define idivup(a, b) ( ((a)%(b) != 0) ? (a)/(b)+1 : (a)/(b) )
+struct square { __host__ __device__ float operator()(float x) { return x * x; } };
+
+/************************************************/
+/*****************2D modules*********************/
+/************************************************/
+__global__ void Obj_func2D_kernel(float *Ad, float *D, float *R1, float *R2, int N, int M, int ImSize, float lambda)
+{
+
+ float val1,val2;
+
+ //calculate each thread global index
+ const int xIndex=blockIdx.x*blockDim.x+threadIdx.x;
+ const int yIndex=blockIdx.y*blockDim.y+threadIdx.y;
+
+ int index = xIndex + N*yIndex;
+
+ if ((xIndex < N) && (yIndex < M)) {
+ if (xIndex <= 0) {val1 = 0.0f;} else {val1 = R1[(xIndex-1) + N*yIndex];}
+ if (yIndex <= 0) {val2 = 0.0f;} else {val2 = R2[xIndex + N*(yIndex-1)];}
+ //Write final result to global memory
+ D[index] = Ad[index] - lambda*(R1[index] + R2[index] - val1 - val2);
+ }
+ return;
+}
+
+__global__ void Grad_func2D_kernel(float *P1, float *P2, float *D, float *R1, float *R2, int N, int M, int ImSize, float multip)
+{
+
+ float val1,val2;
+
+ //calculate each thread global index
+ const int xIndex=blockIdx.x*blockDim.x+threadIdx.x;
+ const int yIndex=blockIdx.y*blockDim.y+threadIdx.y;
+
+ int index = xIndex + N*yIndex;
+
+ if ((xIndex < N) && (yIndex < M)) {
+
+ /* boundary conditions */
+ if (xIndex >= N-1) val1 = 0.0f; else val1 = D[index] - D[(xIndex+1) + N*yIndex];
+ if (yIndex >= M-1) val2 = 0.0f; else val2 = D[index] - D[(xIndex) + N*(yIndex + 1)];
+
+ //Write final result to global memory
+ P1[index] = R1[index] + multip*val1;
+ P2[index] = R2[index] + multip*val2;
+ }
+ return;
+}
+
+__global__ void Proj_func2D_iso_kernel(float *P1, float *P2, int N, int M, int ImSize)
+{
+
+ float denom;
+ //calculate each thread global index
+ const int xIndex=blockIdx.x*blockDim.x+threadIdx.x;
+ const int yIndex=blockIdx.y*blockDim.y+threadIdx.y;
+
+ int index = xIndex + N*yIndex;
+
+ if ((xIndex < N) && (yIndex < M)) {
+ denom = pow(P1[index],2) + pow(P2[index],2);
+ if (denom > 1.0f) {
+ P1[index] = P1[index]/sqrt(denom);
+ P2[index] = P2[index]/sqrt(denom);
+ }
+ }
+ return;
+}
+__global__ void Proj_func2D_aniso_kernel(float *P1, float *P2, int N, int M, int ImSize)
+{
+
+ float val1, val2;
+ //calculate each thread global index
+ const int xIndex=blockIdx.x*blockDim.x+threadIdx.x;
+ const int yIndex=blockIdx.y*blockDim.y+threadIdx.y;
+
+ int index = xIndex + N*yIndex;
+
+ if ((xIndex < N) && (yIndex < M)) {
+ val1 = abs(P1[index]);
+ val2 = abs(P2[index]);
+ if (val1 < 1.0f) {val1 = 1.0f;}
+ if (val2 < 1.0f) {val2 = 1.0f;}
+ P1[index] = P1[index]/val1;
+ P2[index] = P2[index]/val2;
+ }
+ return;
+}
+__global__ void Rupd_func2D_kernel(float *P1, float *P1_old, float *P2, float *P2_old, float *R1, float *R2, float tkp1, float tk, float multip2, int N, int M, int ImSize)
+{
+ //calculate each thread global index
+ const int xIndex=blockIdx.x*blockDim.x+threadIdx.x;
+ const int yIndex=blockIdx.y*blockDim.y+threadIdx.y;
+
+ int index = xIndex + N*yIndex;
+
+ if ((xIndex < N) && (yIndex < M)) {
+ R1[index] = P1[index] + multip2*(P1[index] - P1_old[index]);
+ R2[index] = P2[index] + multip2*(P2[index] - P2_old[index]);
+ }
+ return;
+}
+__global__ void nonneg2D_kernel(float* Output, int N, int M, int num_total)
+{
+ int xIndex = blockDim.x * blockIdx.x + threadIdx.x;
+ int yIndex = blockDim.y * blockIdx.y + threadIdx.y;
+
+ int index = xIndex + N*yIndex;
+
+ if (index < num_total) {
+ if (Output[index] < 0.0f) Output[index] = 0.0f;
+ }
+}
+__global__ void copy_kernel2D(float *Input, float* Output, int N, int M, int num_total)
+{
+ int xIndex = blockDim.x * blockIdx.x + threadIdx.x;
+ int yIndex = blockDim.y * blockIdx.y + threadIdx.y;
+
+ int index = xIndex + N*yIndex;
+
+ if (index < num_total) {
+ Output[index] = Input[index];
+ }
+}
+__global__ void ResidCalc2D_kernel(float *Input1, float *Input2, float* Output, int N, int M, int num_total)
+{
+ int xIndex = blockDim.x * blockIdx.x + threadIdx.x;
+ int yIndex = blockDim.y * blockIdx.y + threadIdx.y;
+
+ int index = xIndex + N*yIndex;
+
+ if (index < num_total) {
+ Output[index] = Input1[index] - Input2[index];
+ }
+}
+/************************************************/
+/*****************3D modules*********************/
+/************************************************/
+__global__ void Obj_func3D_kernel(float *Ad, float *D, float *R1, float *R2, float *R3, int N, int M, int Z, int ImSize, float lambda)
+{
+
+ float val1,val2,val3;
+
+ //calculate each thread global index
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+ int k = blockDim.z * blockIdx.z + threadIdx.z;
+
+ int index = (N*M)*k + i + N*j;
+
+ if ((i < N) && (j < M) && (k < Z)) {
+ if (i <= 0) {val1 = 0.0f;} else {val1 = R1[(N*M)*(k) + (i-1) + N*j];}
+ if (j <= 0) {val2 = 0.0f;} else {val2 = R2[(N*M)*(k) + i + N*(j-1)];}
+ if (k <= 0) {val3 = 0.0f;} else {val3 = R3[(N*M)*(k-1) + i + N*j];}
+ //Write final result to global memory
+ D[index] = Ad[index] - lambda*(R1[index] + R2[index] + R3[index] - val1 - val2 - val3);
+ }
+ return;
+}
+
+__global__ void Grad_func3D_kernel(float *P1, float *P2, float *P3, float *D, float *R1, float *R2, float *R3, int N, int M, int Z, int ImSize, float multip)
+{
+
+ float val1,val2,val3;
+
+ //calculate each thread global index
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+ int k = blockDim.z * blockIdx.z + threadIdx.z;
+
+ int index = (N*M)*k + i + N*j;
+
+ if ((i < N) && (j < M) && (k < Z)) {
+ /* boundary conditions */
+ if (i >= N-1) val1 = 0.0f; else val1 = D[index] - D[(N*M)*(k) + (i+1) + N*j];
+ if (j >= M-1) val2 = 0.0f; else val2 = D[index] - D[(N*M)*(k) + i + N*(j+1)];
+ if (k >= Z-1) val3 = 0.0f; else val3 = D[index] - D[(N*M)*(k+1) + i + N*j];
+
+ //Write final result to global memory
+ P1[index] = R1[index] + multip*val1;
+ P2[index] = R2[index] + multip*val2;
+ P3[index] = R3[index] + multip*val3;
+ }
+ return;
+}
+
+__global__ void Proj_func3D_iso_kernel(float *P1, float *P2, float *P3, int N, int M, int Z, int ImSize)
+{
+
+ float denom,sq_denom;
+ //calculate each thread global index
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+ int k = blockDim.z * blockIdx.z + threadIdx.z;
+
+ int index = (N*M)*k + i + N*j;
+
+ if ((i < N) && (j < M) && (k < Z)) {
+ denom = pow(P1[index],2) + pow(P2[index],2) + pow(P3[index],2);
+
+ if (denom > 1.0f) {
+ sq_denom = 1.0f/sqrt(denom);
+ P1[index] = P1[index]*sq_denom;
+ P2[index] = P2[index]*sq_denom;
+ P3[index] = P3[index]*sq_denom;
+ }
+ }
+ return;
+}
+
+__global__ void Proj_func3D_aniso_kernel(float *P1, float *P2, float *P3, int N, int M, int Z, int ImSize)
+{
+
+ float val1, val2, val3;
+ //calculate each thread global index
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+ int k = blockDim.z * blockIdx.z + threadIdx.z;
+
+ int index = (N*M)*k + i + N*j;
+
+ if ((i < N) && (j < M) && (k < Z)) {
+ val1 = abs(P1[index]);
+ val2 = abs(P2[index]);
+ val3 = abs(P3[index]);
+ if (val1 < 1.0f) {val1 = 1.0f;}
+ if (val2 < 1.0f) {val2 = 1.0f;}
+ if (val3 < 1.0f) {val3 = 1.0f;}
+ P1[index] = P1[index]/val1;
+ P2[index] = P2[index]/val2;
+ P3[index] = P3[index]/val3;
+ }
+ return;
+}
+
+
+__global__ void Rupd_func3D_kernel(float *P1, float *P1_old, float *P2, float *P2_old, float *P3, float *P3_old, float *R1, float *R2, float *R3, float tkp1, float tk, float multip2, int N, int M, int Z, int ImSize)
+{
+ //calculate each thread global index
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+ int k = blockDim.z * blockIdx.z + threadIdx.z;
+
+ int index = (N*M)*k + i + N*j;
+
+ if ((i < N) && (j < M) && (k < Z)) {
+ R1[index] = P1[index] + multip2*(P1[index] - P1_old[index]);
+ R2[index] = P2[index] + multip2*(P2[index] - P2_old[index]);
+ R3[index] = P3[index] + multip2*(P3[index] - P3_old[index]);
+ }
+ return;
+}
+
+__global__ void nonneg3D_kernel(float* Output, int N, int M, int Z, int num_total)
+{
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+ int k = blockDim.z * blockIdx.z + threadIdx.z;
+
+ int index = (N*M)*k + i + N*j;
+
+ if (index < num_total) {
+ if (Output[index] < 0.0f) Output[index] = 0.0f;
+ }
+}
+
+__global__ void copy_kernel3D(float *Input, float* Output, int N, int M, int Z, int num_total)
+{
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+ int k = blockDim.z * blockIdx.z + threadIdx.z;
+
+ int index = (N*M)*k + i + N*j;
+
+ if (index < num_total) {
+ Output[index] = Input[index];
+ }
+}
+/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
+
+////////////MAIN HOST FUNCTION ///////////////
+extern "C" void TV_FGP_GPU_main(float *Input, float *Output, float lambdaPar, int iter, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ)
+{
+ int deviceCount = -1; // number of devices
+ cudaGetDeviceCount(&deviceCount);
+ if (deviceCount == 0) {
+ fprintf(stderr, "No CUDA devices found\n");
+ return;
+ }
+
+ int count = 0, i;
+ float re, multip,multip2;
+ float tk = 1.0f;
+ float tkp1=1.0f;
+
+ if (dimZ <= 1) {
+ /*2D verson*/
+ int ImSize = dimX*dimY;
+ float *d_input, *d_update=NULL, *d_update_prev=NULL, *P1=NULL, *P2=NULL, *P1_prev=NULL, *P2_prev=NULL, *R1=NULL, *R2=NULL;
+
+ dim3 dimBlock(BLKXSIZE2D,BLKYSIZE2D);
+ dim3 dimGrid(idivup(dimX,BLKXSIZE2D), idivup(dimY,BLKYSIZE2D));
+
+ /*allocate space for images on device*/
+ checkCudaErrors( cudaMalloc((void**)&d_input,ImSize*sizeof(float)) );
+ checkCudaErrors( cudaMalloc((void**)&d_update,ImSize*sizeof(float)) );
+ if (epsil != 0.0f) checkCudaErrors( cudaMalloc((void**)&d_update_prev,ImSize*sizeof(float)) );
+ checkCudaErrors( cudaMalloc((void**)&P1,ImSize*sizeof(float)) );
+ checkCudaErrors( cudaMalloc((void**)&P2,ImSize*sizeof(float)) );
+ checkCudaErrors( cudaMalloc((void**)&P1_prev,ImSize*sizeof(float)) );
+ checkCudaErrors( cudaMalloc((void**)&P2_prev,ImSize*sizeof(float)) );
+ checkCudaErrors( cudaMalloc((void**)&R1,ImSize*sizeof(float)) );
+ checkCudaErrors( cudaMalloc((void**)&R2,ImSize*sizeof(float)) );
+
+ checkCudaErrors( cudaMemcpy(d_input,Input,ImSize*sizeof(float),cudaMemcpyHostToDevice));
+ cudaMemset(P1, 0, ImSize*sizeof(float));
+ cudaMemset(P2, 0, ImSize*sizeof(float));
+ cudaMemset(P1_prev, 0, ImSize*sizeof(float));
+ cudaMemset(P2_prev, 0, ImSize*sizeof(float));
+ cudaMemset(R1, 0, ImSize*sizeof(float));
+ cudaMemset(R2, 0, ImSize*sizeof(float));
+
+ /********************** Run CUDA 2D kernel here ********************/
+ multip = (1.0f/(8.0f*lambdaPar));
+
+ /* The main kernel */
+ for (i = 0; i < iter; i++) {
+
+ /* computing the gradient of the objective function */
+ Obj_func2D_kernel<<<dimGrid,dimBlock>>>(d_input, d_update, R1, R2, dimX, dimY, ImSize, lambdaPar);
+ checkCudaErrors( cudaDeviceSynchronize() );
+ checkCudaErrors(cudaPeekAtLastError() );
+
+ if (nonneg != 0) {
+ nonneg2D_kernel<<<dimGrid,dimBlock>>>(d_update, dimX, dimY, ImSize);
+ checkCudaErrors( cudaDeviceSynchronize() );
+ checkCudaErrors(cudaPeekAtLastError() ); }
+
+ /*Taking a step towards minus of the gradient*/
+ Grad_func2D_kernel<<<dimGrid,dimBlock>>>(P1, P2, d_update, R1, R2, dimX, dimY, ImSize, multip);
+ checkCudaErrors( cudaDeviceSynchronize() );
+ checkCudaErrors(cudaPeekAtLastError() );
+
+ /* projection step */
+ if (methodTV == 0) Proj_func2D_iso_kernel<<<dimGrid,dimBlock>>>(P1, P2, dimX, dimY, ImSize); /*isotropic TV*/
+ else Proj_func2D_aniso_kernel<<<dimGrid,dimBlock>>>(P1, P2, dimX, dimY, ImSize); /*anisotropic TV*/
+ checkCudaErrors( cudaDeviceSynchronize() );
+ checkCudaErrors(cudaPeekAtLastError() );
+
+ tkp1 = (1.0f + sqrt(1.0f + 4.0f*tk*tk))*0.5f;
+ multip2 = ((tk-1.0f)/tkp1);
+
+ Rupd_func2D_kernel<<<dimGrid,dimBlock>>>(P1, P1_prev, P2, P2_prev, R1, R2, tkp1, tk, multip2, dimX, dimY, ImSize);
+ checkCudaErrors( cudaDeviceSynchronize() );
+ checkCudaErrors(cudaPeekAtLastError() );
+
+ if (epsil != 0.0f) {
+ /* calculate norm - stopping rules using the Thrust library */
+ ResidCalc2D_kernel<<<dimGrid,dimBlock>>>(d_update, d_update_prev, P1_prev, dimX, dimY, ImSize);
+ checkCudaErrors( cudaDeviceSynchronize() );
+ checkCudaErrors(cudaPeekAtLastError() );
+
+ thrust::device_vector<float> d_vec(P1_prev, P1_prev + ImSize);
+ float reduction = sqrt(thrust::transform_reduce(d_vec.begin(), d_vec.end(), square(), 0.0f, thrust::plus<float>()));
+ thrust::device_vector<float> d_vec2(d_update, d_update + ImSize);
+ float reduction2 = sqrt(thrust::transform_reduce(d_vec2.begin(), d_vec2.end(), square(), 0.0f, thrust::plus<float>()));
+
+ re = (reduction/reduction2);
+ if (re < epsil) count++;
+ if (count > 4) break;
+
+ copy_kernel2D<<<dimGrid,dimBlock>>>(d_update, d_update_prev, dimX, dimY, ImSize);
+ checkCudaErrors( cudaDeviceSynchronize() );
+ checkCudaErrors(cudaPeekAtLastError() );
+ }
+
+ copy_kernel2D<<<dimGrid,dimBlock>>>(P1, P1_prev, dimX, dimY, ImSize);
+ checkCudaErrors( cudaDeviceSynchronize() );
+ checkCudaErrors(cudaPeekAtLastError() );
+
+ copy_kernel2D<<<dimGrid,dimBlock>>>(P2, P2_prev, dimX, dimY, ImSize);
+ checkCudaErrors( cudaDeviceSynchronize() );
+ checkCudaErrors(cudaPeekAtLastError() );
+
+ tk = tkp1;
+ }
+ if (printM == 1) printf("FGP-TV iterations stopped at iteration %i \n", i);
+ /***************************************************************/
+ //copy result matrix from device to host memory
+ cudaMemcpy(Output,d_update,ImSize*sizeof(float),cudaMemcpyDeviceToHost);
+
+ cudaFree(d_input);
+ cudaFree(d_update);
+ if (epsil != 0.0f) cudaFree(d_update_prev);
+ cudaFree(P1);
+ cudaFree(P2);
+ cudaFree(P1_prev);
+ cudaFree(P2_prev);
+ cudaFree(R1);
+ cudaFree(R2);
+ }
+ else {
+ /*3D verson*/
+ int ImSize = dimX*dimY*dimZ;
+ float *d_input, *d_update=NULL, *P1=NULL, *P2=NULL, *P3=NULL, *P1_prev=NULL, *P2_prev=NULL, *P3_prev=NULL, *R1=NULL, *R2=NULL, *R3=NULL;
+
+ dim3 dimBlock(BLKXSIZE,BLKYSIZE,BLKZSIZE);
+ dim3 dimGrid(idivup(dimX,BLKXSIZE), idivup(dimY,BLKYSIZE),idivup(dimZ,BLKZSIZE));
+
+ /*allocate space for images on device*/
+ checkCudaErrors( cudaMalloc((void**)&d_input,ImSize*sizeof(float)) );
+ checkCudaErrors( cudaMalloc((void**)&d_update,ImSize*sizeof(float)) );
+ checkCudaErrors( cudaMalloc((void**)&P1,ImSize*sizeof(float)) );
+ checkCudaErrors( cudaMalloc((void**)&P2,ImSize*sizeof(float)) );
+ checkCudaErrors( cudaMalloc((void**)&P3,ImSize*sizeof(float)) );
+ checkCudaErrors( cudaMalloc((void**)&P1_prev,ImSize*sizeof(float)) );
+ checkCudaErrors( cudaMalloc((void**)&P2_prev,ImSize*sizeof(float)) );
+ checkCudaErrors( cudaMalloc((void**)&P3_prev,ImSize*sizeof(float)) );
+ checkCudaErrors( cudaMalloc((void**)&R1,ImSize*sizeof(float)) );
+ checkCudaErrors( cudaMalloc((void**)&R2,ImSize*sizeof(float)) );
+ checkCudaErrors( cudaMalloc((void**)&R3,ImSize*sizeof(float)) );
+
+ checkCudaErrors( cudaMemcpy(d_input,Input,ImSize*sizeof(float),cudaMemcpyHostToDevice));
+ cudaMemset(P1, 0, ImSize*sizeof(float));
+ cudaMemset(P2, 0, ImSize*sizeof(float));
+ cudaMemset(P3, 0, ImSize*sizeof(float));
+ cudaMemset(P1_prev, 0, ImSize*sizeof(float));
+ cudaMemset(P2_prev, 0, ImSize*sizeof(float));
+ cudaMemset(P3_prev, 0, ImSize*sizeof(float));
+ cudaMemset(R1, 0, ImSize*sizeof(float));
+ cudaMemset(R2, 0, ImSize*sizeof(float));
+ cudaMemset(R3, 0, ImSize*sizeof(float));
+ /********************** Run CUDA 3D kernel here ********************/
+ multip = (1.0f/(8.0f*lambdaPar));
+
+ /* The main kernel */
+ for (i = 0; i < iter; i++) {
+
+ /* computing the gradient of the objective function */
+ Obj_func3D_kernel<<<dimGrid,dimBlock>>>(d_input, d_update, R1, R2, R3, dimX, dimY, dimZ, ImSize, lambdaPar);
+ checkCudaErrors( cudaDeviceSynchronize() );
+ checkCudaErrors(cudaPeekAtLastError() );
+
+ if (nonneg != 0) {
+ nonneg3D_kernel<<<dimGrid,dimBlock>>>(d_update, dimX, dimY, dimZ, ImSize);
+ checkCudaErrors( cudaDeviceSynchronize() );
+ checkCudaErrors(cudaPeekAtLastError() ); }
+
+ /*Taking a step towards minus of the gradient*/
+ Grad_func3D_kernel<<<dimGrid,dimBlock>>>(P1, P2, P3, d_update, R1, R2, R3, dimX, dimY, dimZ, ImSize, multip);
+ checkCudaErrors( cudaDeviceSynchronize() );
+ checkCudaErrors(cudaPeekAtLastError() );
+
+ /* projection step */
+ if (methodTV == 0) Proj_func3D_iso_kernel<<<dimGrid,dimBlock>>>(P1, P2, P3, dimX, dimY, dimZ, ImSize); /* isotropic kernel */
+ else Proj_func3D_aniso_kernel<<<dimGrid,dimBlock>>>(P1, P2, P3, dimX, dimY, dimZ, ImSize); /* anisotropic kernel */
+ checkCudaErrors( cudaDeviceSynchronize() );
+ checkCudaErrors(cudaPeekAtLastError() );
+
+ tkp1 = (1.0f + sqrt(1.0f + 4.0f*tk*tk))*0.5f;
+ multip2 = ((tk-1.0f)/tkp1);
+
+ Rupd_func3D_kernel<<<dimGrid,dimBlock>>>(P1, P1_prev, P2, P2_prev, P3, P3_prev, R1, R2, R3, tkp1, tk, multip2, dimX, dimY, dimZ, ImSize);
+ checkCudaErrors( cudaDeviceSynchronize() );
+ checkCudaErrors(cudaPeekAtLastError() );
+
+ copy_kernel3D<<<dimGrid,dimBlock>>>(P1, P1_prev, dimX, dimY, dimZ, ImSize);
+ checkCudaErrors( cudaDeviceSynchronize() );
+ checkCudaErrors(cudaPeekAtLastError() );
+
+ copy_kernel3D<<<dimGrid,dimBlock>>>(P2, P2_prev, dimX, dimY, dimZ, ImSize);
+ checkCudaErrors( cudaDeviceSynchronize() );
+ checkCudaErrors(cudaPeekAtLastError() );
+
+ copy_kernel3D<<<dimGrid,dimBlock>>>(P3, P3_prev, dimX, dimY, dimZ, ImSize);
+ checkCudaErrors( cudaDeviceSynchronize() );
+ checkCudaErrors(cudaPeekAtLastError() );
+
+ tk = tkp1;
+ }
+ if (printM == 1) printf("FGP-TV iterations stopped at iteration %i \n", i);
+ /***************************************************************/
+ //copy result matrix from device to host memory
+ cudaMemcpy(Output,d_update,ImSize*sizeof(float),cudaMemcpyDeviceToHost);
+
+ cudaFree(d_input);
+ cudaFree(d_update);
+ cudaFree(P1);
+ cudaFree(P2);
+ cudaFree(P3);
+ cudaFree(P1_prev);
+ cudaFree(P2_prev);
+ cudaFree(P3_prev);
+ cudaFree(R1);
+ cudaFree(R2);
+ cudaFree(R3);
+ }
+ cudaDeviceReset();
+}
diff --git a/Core/regularizers_GPU/TV_FGP/TV_FGP_GPU_core.h b/Core/regularizers_GPU/TV_FGP/TV_FGP_GPU_core.h
new file mode 100755
index 0000000..107d243
--- /dev/null
+++ b/Core/regularizers_GPU/TV_FGP/TV_FGP_GPU_core.h
@@ -0,0 +1,10 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <memory.h>
+
+#ifndef _TV_FGP_GPU_
+#define _TV_FGP_GPU_
+
+extern "C" void TV_FGP_GPU_main(float *Input, float *Output, float lambdaPar, int iter, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ);
+
+#endif
diff --git a/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU.h b/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU.h
deleted file mode 100755
index 3163482..0000000
--- a/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU.h
+++ /dev/null
@@ -1,7 +0,0 @@
-#ifndef __TVGPU_H__
-#define __TVGPU_H__
-#include "CCPiDefines.h"
-
-extern "C" CCPI_EXPORT void TV_ROF_GPU(float* Input, float* Output, int N, int M, int Z, int iter, float tau, float lambda);
-
-#endif
diff --git a/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU.cu b/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU_core.cu
index 633bf63..a30a89b 100755
--- a/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU.cu
+++ b/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU_core.cu
@@ -17,7 +17,7 @@ See the License for the specific language governing permissions and
limitations under the License.
*/
-#include "TV_ROF_GPU.h"
+#include "TV_ROF_GPU_core.h"
/* C-OMP implementation of ROF-TV denoising/regularization model [1] (2D/3D case)
*
@@ -54,7 +54,7 @@ limitations under the License.
#define BLKXSIZE2D 16
#define BLKYSIZE2D 16
-#define EPS 1.0e-4
+#define EPS 1.0e-5
#define idivup(a, b) ( ((a)%(b) != 0) ? (a)/(b)+1 : (a)/(b) )
@@ -71,7 +71,7 @@ __host__ __device__ int sign (float x)
/* differences 1 */
__global__ void D1_func2D(float* Input, float* D1, int N, int M)
{
- int i1, j1, i2, j2;
+ int i1, j1, i2;
float NOMx_1,NOMy_1,NOMy_0,denom1,denom2,T1;
int i = blockDim.x * blockIdx.x + threadIdx.x;
int j = blockDim.y * blockIdx.y + threadIdx.y;
@@ -84,7 +84,6 @@ __host__ __device__ int sign (float x)
i1 = i + 1; if (i1 >= N) i1 = i-1;
i2 = i - 1; if (i2 < 0) i2 = i+1;
j1 = j + 1; if (j1 >= M) j1 = j-1;
- j2 = j - 1; if (j2 < 0) j2 = j+1;
/* Forward-backward differences */
NOMx_1 = Input[j1*N + i] - Input[index]; /* x+ */
@@ -102,7 +101,7 @@ __host__ __device__ int sign (float x)
/* differences 2 */
__global__ void D2_func2D(float* Input, float* D2, int N, int M)
{
- int i1, j1, i2, j2;
+ int i1, j1, j2;
float NOMx_1,NOMy_1,NOMx_0,denom1,denom2,T2;
int i = blockDim.x * blockIdx.x + threadIdx.x;
int j = blockDim.y * blockIdx.y + threadIdx.y;
@@ -113,7 +112,6 @@ __host__ __device__ int sign (float x)
/* boundary conditions (Neumann reflections) */
i1 = i + 1; if (i1 >= N) i1 = i-1;
- i2 = i - 1; if (i2 < 0) i2 = i+1;
j1 = j + 1; if (j1 >= M) j1 = j-1;
j2 = j - 1; if (j2 < 0) j2 = j+1;
@@ -149,7 +147,7 @@ __host__ __device__ int sign (float x)
dv1 = D1[index] - D1[j2*N + i];
dv2 = D2[index] - D2[j*N + i2];
- Update[index] = Update[index] + tau*((dv1 + dv2) - lambda*(Update[index] - Input[index]));
+ Update[index] = Update[index] + tau*(2.0f*lambda*(dv1 + dv2) - (Update[index] - Input[index]));
}
}
@@ -299,21 +297,21 @@ __host__ __device__ int sign (float x)
dv2 = D2[index] - D2[(dimX*dimY)*k + j*dimX+i2];
dv3 = D3[index] - D3[(dimX*dimY)*k2 + j*dimX+i];
- Update[index] = Update[index] + tau*lambda*(dv1 + dv2 + dv3) + tau*(Update[index] - Input[index]);
+ Update[index] = Update[index] + tau*(2.0f*lambda*(dv1 + dv2 + dv3) - (Update[index] - Input[index]));
}
}
/////////////////////////////////////////////////
// HOST FUNCTION
-extern "C" void TV_ROF_GPU(float* Input, float* Output, int N, int M, int Z, int iter, float tau, float lambda)
+extern "C" void TV_ROF_GPU_main(float* Input, float* Output, float lambdaPar, int iter, float tau, int N, int M, int Z)
{
// set up device
int dev = 0;
CHECK(cudaSetDevice(dev));
-
float *d_input, *d_update, *d_D1, *d_D2;
+ if (Z == 0) Z = 1;
CHECK(cudaMalloc((void**)&d_input,N*M*Z*sizeof(float)));
CHECK(cudaMalloc((void**)&d_update,N*M*Z*sizeof(float)));
CHECK(cudaMalloc((void**)&d_D1,N*M*Z*sizeof(float)));
@@ -332,32 +330,32 @@ extern "C" void TV_ROF_GPU(float* Input, float* Output, int N, int M, int Z, int
for(int n=0; n < iter; n++) {
/* calculate differences */
- D1_func3D<<<dimGrid,dimBlock>>>(d_update, d_D1, N, M, Z);
+ D1_func3D<<<dimGrid,dimBlock>>>(d_update, d_D1, N, M, Z);
CHECK(cudaDeviceSynchronize());
- D2_func3D<<<dimGrid,dimBlock>>>(d_update, d_D2, N, M, Z);
+ D2_func3D<<<dimGrid,dimBlock>>>(d_update, d_D2, N, M, Z);
CHECK(cudaDeviceSynchronize());
- D3_func3D<<<dimGrid,dimBlock>>>(d_update, d_D3, N, M, Z);
+ D3_func3D<<<dimGrid,dimBlock>>>(d_update, d_D3, N, M, Z);
CHECK(cudaDeviceSynchronize());
/*running main kernel*/
- TV_kernel3D<<<dimGrid,dimBlock>>>(d_D1, d_D2, d_D3, d_update, d_input, lambda, tau, N, M, Z);
+ TV_kernel3D<<<dimGrid,dimBlock>>>(d_D1, d_D2, d_D3, d_update, d_input, lambdaPar, tau, N, M, Z);
CHECK(cudaDeviceSynchronize());
}
CHECK(cudaFree(d_D3));
}
else {
- // TV - 2D case
+ // TV - 2D case
dim3 dimBlock(BLKXSIZE2D,BLKYSIZE2D);
dim3 dimGrid(idivup(N,BLKXSIZE2D), idivup(M,BLKYSIZE2D));
for(int n=0; n < iter; n++) {
/* calculate differences */
- D1_func2D<<<dimGrid,dimBlock>>>(d_update, d_D1, N, M);
+ D1_func2D<<<dimGrid,dimBlock>>>(d_update, d_D1, N, M);
CHECK(cudaDeviceSynchronize());
- D2_func2D<<<dimGrid,dimBlock>>>(d_update, d_D2, N, M);
+ D2_func2D<<<dimGrid,dimBlock>>>(d_update, d_D2, N, M);
CHECK(cudaDeviceSynchronize());
/*running main kernel*/
- TV_kernel2D<<<dimGrid,dimBlock>>>(d_D1, d_D2, d_update, d_input, lambda, tau, N, M);
+ TV_kernel2D<<<dimGrid,dimBlock>>>(d_D1, d_D2, d_update, d_input, lambdaPar, tau, N, M);
CHECK(cudaDeviceSynchronize());
}
}
diff --git a/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU_core.h b/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU_core.h
new file mode 100755
index 0000000..d772aba
--- /dev/null
+++ b/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU_core.h
@@ -0,0 +1,8 @@
+#ifndef __TVGPU_H__
+#define __TVGPU_H__
+#include "CCPiDefines.h"
+#include <stdio.h>
+
+extern "C" CCPI_EXPORT void TV_ROF_GPU_main(float* Input, float* Output, float lambdaPar, int iter, float tau, int N, int M, int Z);
+
+#endif
diff --git a/Wrappers/Matlab/mex_compile/regularizers_CPU/FGP_TV.c b/Wrappers/Matlab/mex_compile/regularizers_CPU/FGP_TV.c
index 30cea1a..7cc861a 100644
--- a/Wrappers/Matlab/mex_compile/regularizers_CPU/FGP_TV.c
+++ b/Wrappers/Matlab/mex_compile/regularizers_CPU/FGP_TV.c
@@ -53,9 +53,9 @@ void mexFunction(
int nrhs, const mxArray *prhs[])
{
- int number_of_dims, iter, dimX, dimY, dimZ, ll, j, count, methTV;
+ int number_of_dims, iter, dimX, dimY, dimZ, methTV;
const int *dim_array;
- float *A, *D=NULL, *D_old=NULL, *P1=NULL, *P2=NULL, *P3=NULL, *P1_old=NULL, *P2_old=NULL, *P3_old=NULL, *R1=NULL, *R2=NULL, *R3=NULL, lambda, tk, tkp1, re, re1, re_old, epsil;
+ float *Input, *Output, lambda, epsil;
number_of_dims = mxGetNumberOfDimensions(prhs[0]);
dim_array = mxGetDimensions(prhs[0]);
@@ -63,9 +63,9 @@ void mexFunction(
/*Handling Matlab input data*/
if ((nrhs < 2) || (nrhs > 5)) mexErrMsgTxt("At least 2 parameters is required: Image(2D/3D), Regularization parameter. The full list of parameters: Image(2D/3D), Regularization parameter, iterations number, tolerance, penalty type ('iso' or 'l1')");
- A = (float *) mxGetData(prhs[0]); /*noisy image (2D/3D) */
+ Input = (float *) mxGetData(prhs[0]); /*noisy image (2D/3D) */
lambda = (float) mxGetScalar(prhs[1]); /* regularization parameter */
- iter = 50; /* default iterations number */
+ iter = 300; /* default iterations number */
epsil = 0.0001; /* default tolerance constant */
methTV = 0; /* default isotropic TV penalty */
@@ -78,139 +78,17 @@ void mexFunction(
if (strcmp(penalty_type, "l1") == 0) methTV = 1; /* enable 'l1' penalty */
mxFree(penalty_type);
}
- /*output function value (last iteration) */
- plhs[1] = mxCreateNumericMatrix(1, 1, mxSINGLE_CLASS, mxREAL);
- float *funcvalA = (float *) mxGetData(plhs[1]);
if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); }
/*Handling Matlab output data*/
- dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2];
-
- tk = 1.0f;
- tkp1=1.0f;
- count = 0;
- re_old = 0.0f;
+ dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2];
if (number_of_dims == 2) {
dimZ = 1; /*2D case*/
- D = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
- D_old = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
- P1 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
- P2 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
- P1_old = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
- P2_old = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
- R1 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
- R2 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-
- /* begin iterations */
- for(ll=0; ll<iter; ll++) {
-
- /* computing the gradient of the objective function */
- Obj_func2D(A, D, R1, R2, lambda, dimX, dimY);
-
- /*Taking a step towards minus of the gradient*/
- Grad_func2D(P1, P2, D, R1, R2, lambda, dimX, dimY);
-
- /* projection step */
- Proj_func2D(P1, P2, methTV, dimX, dimY);
-
- /*updating R and t*/
- tkp1 = (1.0f + sqrt(1.0f + 4.0f*tk*tk))*0.5f;
- Rupd_func2D(P1, P1_old, P2, P2_old, R1, R2, tkp1, tk, dimX, dimY);
-
- /* calculate norm */
- re = 0.0f; re1 = 0.0f;
- for(j=0; j<dimX*dimY*dimZ; j++)
- {
- re += pow(D[j] - D_old[j],2);
- re1 += pow(D[j],2);
- }
- re = sqrt(re)/sqrt(re1);
- if (re < epsil) count++;
- if (count > 4) {
- Obj_func_CALC2D(A, D, funcvalA, lambda, dimX, dimY);
- break; }
-
- /* check that the residual norm is decreasing */
- if (ll > 2) {
- if (re > re_old) {
- Obj_func_CALC2D(A, D, funcvalA, lambda, dimX, dimY);
- break; }}
- re_old = re;
- /*printf("%f %i %i \n", re, ll, count); */
-
- /*storing old values*/
- copyIm(D, D_old, dimX, dimY, dimZ);
- copyIm(P1, P1_old, dimX, dimY, dimZ);
- copyIm(P2, P2_old, dimX, dimY, dimZ);
- tk = tkp1;
-
- /* calculating the objective function value */
- if (ll == (iter-1)) Obj_func_CALC2D(A, D, funcvalA, lambda, dimX, dimY);
- }
- printf("FGP-TV iterations stopped at iteration %i with the function value %f \n", ll, funcvalA[0]);
- }
- if (number_of_dims == 3) {
- D = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
- D_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
- P1 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
- P2 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
- P3 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
- P1_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
- P2_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
- P3_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
- R1 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
- R2 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
- R3 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-
- /* begin iterations */
- for(ll=0; ll<iter; ll++) {
-
- /* computing the gradient of the objective function */
- Obj_func3D(A, D, R1, R2, R3,lambda, dimX, dimY, dimZ);
-
- /*Taking a step towards minus of the gradient*/
- Grad_func3D(P1, P2, P3, D, R1, R2, R3, lambda, dimX, dimY, dimZ);
-
- /* projection step */
- Proj_func3D(P1, P2, P3, dimX, dimY, dimZ);
-
- /*updating R and t*/
- tkp1 = (1.0f + sqrt(1.0f + 4.0f*tk*tk))*0.5f;
- Rupd_func3D(P1, P1_old, P2, P2_old, P3, P3_old, R1, R2, R3, tkp1, tk, dimX, dimY, dimZ);
-
- /* calculate norm - stopping rules*/
- re = 0.0f; re1 = 0.0f;
- for(j=0; j<dimX*dimY*dimZ; j++)
- {
- re += pow(D[j] - D_old[j],2);
- re1 += pow(D[j],2);
- }
- re = sqrt(re)/sqrt(re1);
- /* stop if the norm residual is less than the tolerance EPS */
- if (re < epsil) count++;
- if (count > 3) {
- Obj_func_CALC3D(A, D, funcvalA, lambda, dimX, dimY, dimZ);
- break;}
-
- /* check that the residual norm is decreasing */
- if (ll > 2) {
- if (re > re_old) {
- Obj_func_CALC3D(A, D, funcvalA, lambda, dimX, dimY, dimZ);
- }}
- re_old = re;
- /*printf("%f %i %i \n", re, ll, count); */
-
- /*storing old values*/
- copyIm(D, D_old, dimX, dimY, dimZ);
- copyIm(P1, P1_old, dimX, dimY, dimZ);
- copyIm(P2, P2_old, dimX, dimY, dimZ);
- copyIm(P3, P3_old, dimX, dimY, dimZ);
- tk = tkp1;
-
- if (ll == (iter-1)) Obj_func_CALC3D(A, D, funcvalA, lambda, dimX, dimY, dimZ);
- }
- printf("FGP-TV iterations stopped at iteration %i with the function value %f \n", ll, funcvalA[0]);
- }
+ Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+ TV_FGP_CPU_main(Input, Output, lambda, iter, epsil, methTV, 0, 0, dimX, dimY, dimZ);
+ }
+ if (number_of_dims == 3) {
+ }
}
diff --git a/Wrappers/Python/ccpi/filters/regularizers.py b/Wrappers/Python/ccpi/filters/regularizers.py
new file mode 100644
index 0000000..d6dfa8c
--- /dev/null
+++ b/Wrappers/Python/ccpi/filters/regularizers.py
@@ -0,0 +1,44 @@
+"""
+script which assigns a proper device core function based on a flag ('cpu' or 'gpu')
+"""
+
+from ccpi.filters.cpu_regularizers_cython import TV_ROF_CPU, TV_FGP_CPU
+from ccpi.filters.gpu_regularizers import TV_ROF_GPU, TV_FGP_GPU
+
+def ROF_TV(inputData, regularization_parameter, iterations,
+ time_marching_parameter,device='cpu'):
+ if device == 'cpu':
+ return TV_ROF_CPU(inputData,
+ regularization_parameter,
+ iterations,
+ time_marching_parameter)
+ elif device == 'gpu':
+ return TV_ROF_GPU(inputData,
+ regularization_parameter,
+ iterations,
+ time_marching_parameter)
+ else:
+ raise ValueError('Unknown device {0}. Expecting gpu or cpu'\
+ .format(device))
+
+def FGP_TV(inputData, regularization_parameter,iterations,
+ tolerance_param, methodTV, nonneg, printM, device='cpu'):
+ if device == 'cpu':
+ return TV_FGP_CPU(inputData,
+ regularization_parameter,
+ iterations,
+ tolerance_param,
+ methodTV,
+ nonneg,
+ printM)
+ elif device == 'gpu':
+ return TV_FGP_GPU(inputData,
+ regularization_parameter,
+ iterations,
+ tolerance_param,
+ methodTV,
+ nonneg,
+ printM)
+ else:
+ raise ValueError('Unknown device {0}. Expecting gpu or cpu'\
+ .format(device)) \ No newline at end of file
diff --git a/Wrappers/Python/demo/test_cpu_regularizers.py b/Wrappers/Python/demo/test_cpu_regularizers.py
index 5908c3c..76b9de7 100644
--- a/Wrappers/Python/demo/test_cpu_regularizers.py
+++ b/Wrappers/Python/demo/test_cpu_regularizers.py
@@ -11,11 +11,8 @@ import numpy as np
import os
from enum import Enum
import timeit
-from ccpi.filters.cpu_regularizers_boost import SplitBregman_TV , FGP_TV ,\
- LLT_model, PatchBased_Regul ,\
- TGV_PD
-from ccpi.filters.cpu_regularizers_cython import ROF_TV
-
+from ccpi.filters.cpu_regularizers_boost import SplitBregman_TV, LLT_model, PatchBased_Regul, TGV_PD
+from ccpi.filters.regularizers import ROF_TV, FGP_TV
###############################################################################
#https://stackoverflow.com/questions/13875989/comparing-image-in-url-to-image-in-filesystem-in-python/13884956#13884956
#NRMSE a normalization of the root of the mean squared error
@@ -112,12 +109,10 @@ rms = rmse(Im, splitbregman)
pars['rmse'] = rms
txtstr = printParametersToString(pars)
txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-
+print (txtstr)
a=fig.add_subplot(2,4,2)
-
# these are matplotlib.patch.Patch properties
props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)
# place a text box in upper left in axes coords
@@ -128,23 +123,26 @@ imgplot = plt.imshow(splitbregman,\
)
###################### FGP_TV #########################################
-# u = FGP_TV(single(u0), 0.05, 100, 1e-04);
+
start_time = timeit.default_timer()
pars = {'algorithm' : FGP_TV , \
- 'input' : u0,
- 'regularization_parameter':0.05, \
- 'number_of_iterations' :200 ,\
- 'tolerance_constant':1e-4,\
- 'TV_penalty': 0
-}
+ 'input' : u0,\
+ 'regularization_parameter':0.07, \
+ 'number_of_iterations' :300 ,\
+ 'tolerance_constant':0.00001,\
+ 'methodTV': 0 ,\
+ 'nonneg': 0 ,\
+ 'printingOut': 0
+ }
-out = FGP_TV (pars['input'],
+fgp = FGP_TV(pars['input'],
pars['regularization_parameter'],
pars['number_of_iterations'],
pars['tolerance_constant'],
- pars['TV_penalty'])
+ pars['methodTV'],
+ pars['nonneg'],
+ pars['printingOut'], 'cpu')
-fgp = out[0]
rms = rmse(Im, fgp)
pars['rmse'] = rms
@@ -165,8 +163,8 @@ imgplot = plt.imshow(fgp, \
a.text(0.05, 0.95, txtstr, transform=a.transAxes, fontsize=14,
verticalalignment='top', bbox=props)
-###################### LLT_model #########################################
+###################### LLT_model #########################################
start_time = timeit.default_timer()
pars = {'algorithm': LLT_model , \
@@ -201,8 +199,6 @@ a.text(0.05, 0.95, txtstr, transform=a.transAxes, fontsize=14,
imgplot = plt.imshow(llt,\
cmap="gray"
)
-
-
# ###################### PatchBased_Regul #########################################
# # Quick 2D denoising example in Matlab:
# # Im = double(imread('lena_gray_256.tif'))/255; % loading image
@@ -284,16 +280,15 @@ start_time = timeit.default_timer()
pars = {'algorithm': ROF_TV , \
'input' : u0,\
- 'regularization_parameter':1,\
- 'marching_step': 0.003,\
+ 'regularization_parameter':0.07,\
+ 'marching_step': 0.0025,\
'number_of_iterations': 300
}
rof = ROF_TV(pars['input'],
- pars['number_of_iterations'],
- pars['regularization_parameter'],
- pars['marching_step']
- )
-#tgv = out
+ pars['regularization_parameter'],
+ pars['number_of_iterations'],
+ pars['marching_step'], 'cpu')
+
rms = rmse(Im, rof)
pars['rmse'] = rms
@@ -307,9 +302,7 @@ props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)
# place a text box in upper left in axes coords
a.text(0.05, 0.95, txtstr, transform=a.transAxes, fontsize=14,
verticalalignment='top', bbox=props)
-imgplot = plt.imshow(tgv, cmap="gray")
-
-
+imgplot = plt.imshow(rof, cmap="gray")
plt.show()
diff --git a/Wrappers/Python/setup-regularizers.py.in b/Wrappers/Python/setup-regularizers.py.in
index a125261..8655a2e 100644
--- a/Wrappers/Python/setup-regularizers.py.in
+++ b/Wrappers/Python/setup-regularizers.py.in
@@ -34,7 +34,9 @@ extra_libraries = ['cilreg']
extra_include_dirs += [os.path.join(".." , ".." , "Core"),
os.path.join(".." , ".." , "Core", "regularizers_CPU"),
- os.path.join(".." , ".." , "Core", "regularizers_GPU") ,
+ os.path.join(".." , ".." , "Core", "regularizers_GPU" , "Diffus_HO" ) ,
+ os.path.join(".." , ".." , "Core", "regularizers_GPU" , "NL_Regul" ) ,
+ os.path.join(".." , ".." , "Core", "regularizers_GPU" , "TV_ROF" ) ,
"."]
if platform.system() == 'Windows':
@@ -81,4 +83,4 @@ setup(
)
-@SETUP_GPU_WRAPPERS@ \ No newline at end of file
+@SETUP_GPU_WRAPPERS@
diff --git a/Wrappers/Python/src/cpu_regularizers.cpp b/Wrappers/Python/src/cpu_regularizers.cpp
index e311570..3529ebd 100644
--- a/Wrappers/Python/src/cpu_regularizers.cpp
+++ b/Wrappers/Python/src/cpu_regularizers.cpp
@@ -27,7 +27,6 @@ limitations under the License.
#include "boost/tuple/tuple.hpp"
#include "SplitBregman_TV_core.h"
-#include "FGP_TV_core.h"
#include "LLT_model_core.h"
#include "PatchBased_Regul_core.h"
#include "TGV_PD_core.h"
@@ -303,292 +302,6 @@ bp::list SplitBregman_TV(np::ndarray input, double d_mu, int iter, double d_epsi
}
-
-
-bp::list FGP_TV(np::ndarray input, double d_mu, int iter, double d_epsil, int methTV) {
-
- // the result is in the following list
- bp::list result;
-
- int number_of_dims, dimX, dimY, dimZ, ll, j, count;
- float *A, *D = NULL, *D_old = NULL, *P1 = NULL, *P2 = NULL, *P3 = NULL, *P1_old = NULL, *P2_old = NULL, *P3_old = NULL, *R1 = NULL, *R2 = NULL, *R3 = NULL;
- float lambda, tk, tkp1, re, re1, re_old, epsil, funcval;
-
- //number_of_dims = mxGetNumberOfDimensions(prhs[0]);
- //dim_array = mxGetDimensions(prhs[0]);
-
- number_of_dims = input.get_nd();
- int dim_array[3];
-
- dim_array[0] = input.shape(0);
- dim_array[1] = input.shape(1);
- if (number_of_dims == 2) {
- dim_array[2] = -1;
- }
- else {
- dim_array[2] = input.shape(2);
- }
- // Parameter handling is be done in Python
- ///*Handling Matlab input data*/
- //if ((nrhs < 2) || (nrhs > 5)) mexErrMsgTxt("At least 2 parameters is required: Image(2D/3D), Regularization parameter. The full list of parameters: Image(2D/3D), Regularization parameter, iterations number, tolerance, penalty type ('iso' or 'l1')");
-
- ///*Handling Matlab input data*/
- //A = (float *)mxGetData(prhs[0]); /*noisy image (2D/3D) */
- A = reinterpret_cast<float *>(input.get_data());
-
- //mu = (float)mxGetScalar(prhs[1]); /* regularization parameter */
- lambda = (float)d_mu;
-
- //iter = 35; /* default iterations number */
-
- //epsil = 0.0001; /* default tolerance constant */
- epsil = (float)d_epsil;
- //methTV = 0; /* default isotropic TV penalty */
- //if ((nrhs == 3) || (nrhs == 4) || (nrhs == 5)) iter = (int)mxGetScalar(prhs[2]); /* iterations number */
- //if ((nrhs == 4) || (nrhs == 5)) epsil = (float)mxGetScalar(prhs[3]); /* tolerance constant */
- //if (nrhs == 5) {
- // char *penalty_type;
- // penalty_type = mxArrayToString(prhs[4]); /* choosing TV penalty: 'iso' or 'l1', 'iso' is the default */
- // if ((strcmp(penalty_type, "l1") != 0) && (strcmp(penalty_type, "iso") != 0)) mexErrMsgTxt("Choose TV type: 'iso' or 'l1',");
- // if (strcmp(penalty_type, "l1") == 0) methTV = 1; /* enable 'l1' penalty */
- // mxFree(penalty_type);
- //}
- //if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) { mexErrMsgTxt("The input image must be in a single precision"); }
-
- //plhs[1] = mxCreateNumericMatrix(1, 1, mxSINGLE_CLASS, mxREAL);
- bp::tuple shape1 = bp::make_tuple(dim_array[0], dim_array[1]);
- np::dtype dtype = np::dtype::get_builtin<float>();
- np::ndarray out1 = np::zeros(shape1, dtype);
-
- //float *funcvalA = (float *)mxGetData(plhs[1]);
- float * funcvalA = reinterpret_cast<float *>(out1.get_data());
- //if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) { mexErrMsgTxt("The input image must be in a single precision"); }
-
- /*Handling Matlab output data*/
- dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2];
-
- tk = 1.0f;
- tkp1 = 1.0f;
- count = 1;
- re_old = 0.0f;
-
- if (number_of_dims == 2) {
- dimZ = 1; /*2D case*/
- /*D = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
- D_old = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
- P1 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
- P2 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
- P1_old = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
- P2_old = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
- R1 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
- R2 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));*/
-
- bp::tuple shape = bp::make_tuple(dim_array[0], dim_array[1]);
- np::dtype dtype = np::dtype::get_builtin<float>();
-
-
- np::ndarray npD = np::zeros(shape, dtype);
- np::ndarray npD_old = np::zeros(shape, dtype);
- np::ndarray npP1 = np::zeros(shape, dtype);
- np::ndarray npP2 = np::zeros(shape, dtype);
- np::ndarray npP1_old = np::zeros(shape, dtype);
- np::ndarray npP2_old = np::zeros(shape, dtype);
- np::ndarray npR1 = np::zeros(shape, dtype);
- np::ndarray npR2 = np::zeros(shape, dtype);
-
- D = reinterpret_cast<float *>(npD.get_data());
- D_old = reinterpret_cast<float *>(npD_old.get_data());
- P1 = reinterpret_cast<float *>(npP1.get_data());
- P2 = reinterpret_cast<float *>(npP2.get_data());
- P1_old = reinterpret_cast<float *>(npP1_old.get_data());
- P2_old = reinterpret_cast<float *>(npP2_old.get_data());
- R1 = reinterpret_cast<float *>(npR1.get_data());
- R2 = reinterpret_cast<float *>(npR2.get_data());
-
- /* begin iterations */
- for (ll = 0; ll<iter; ll++) {
- /* computing the gradient of the objective function */
- Obj_func2D(A, D, R1, R2, lambda, dimX, dimY);
-
- /*Taking a step towards minus of the gradient*/
- Grad_func2D(P1, P2, D, R1, R2, lambda, dimX, dimY);
-
- /* projection step */
- Proj_func2D(P1, P2, methTV, dimX, dimY);
-
- /*updating R and t*/
- tkp1 = (1.0f + sqrt(1.0f + 4.0f*tk*tk))*0.5f;
- Rupd_func2D(P1, P1_old, P2, P2_old, R1, R2, tkp1, tk, dimX, dimY);
-
- /* calculate norm */
- re = 0.0f; re1 = 0.0f;
- for (j = 0; j<dimX*dimY*dimZ; j++)
- {
- re += pow(D[j] - D_old[j], 2);
- re1 += pow(D[j], 2);
- }
- re = sqrt(re) / sqrt(re1);
- if (re < epsil) count++;
- if (count > 4) {
- Obj_func2D(A, D, P1, P2, lambda, dimX, dimY);
- funcval = 0.0f;
- for (j = 0; j<dimX*dimY*dimZ; j++) funcval += pow(D[j], 2);
- //funcvalA[0] = sqrt(funcval);
- float fv = sqrt(funcval);
- std::memcpy(funcvalA, &fv, sizeof(float));
- break;
- }
-
- /* check that the residual norm is decreasing */
- if (ll > 2) {
- if (re > re_old) {
- Obj_func2D(A, D, P1, P2, lambda, dimX, dimY);
- funcval = 0.0f;
- for (j = 0; j<dimX*dimY*dimZ; j++) funcval += pow(D[j], 2);
- //funcvalA[0] = sqrt(funcval);
- float fv = sqrt(funcval);
- std::memcpy(funcvalA, &fv, sizeof(float));
- break;
- }
- }
- re_old = re;
- /*printf("%f %i %i \n", re, ll, count); */
-
- /*storing old values*/
- copyIm(D, D_old, dimX, dimY, dimZ);
- copyIm(P1, P1_old, dimX, dimY, dimZ);
- copyIm(P2, P2_old, dimX, dimY, dimZ);
- tk = tkp1;
-
- /* calculating the objective function value */
- if (ll == (iter - 1)) {
- Obj_func2D(A, D, P1, P2, lambda, dimX, dimY);
- funcval = 0.0f;
- for (j = 0; j<dimX*dimY*dimZ; j++) funcval += pow(D[j], 2);
- //funcvalA[0] = sqrt(funcval);
- float fv = sqrt(funcval);
- std::memcpy(funcvalA, &fv, sizeof(float));
- }
- }
- //printf("FGP-TV iterations stopped at iteration %i with the function value %f \n", ll, funcvalA[0]);
- result.append<np::ndarray>(npD);
- result.append<np::ndarray>(out1);
- result.append<int>(ll);
- }
- if (number_of_dims == 3) {
- /*D = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
- D_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
- P1 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
- P2 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
- P3 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
- P1_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
- P2_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
- P3_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
- R1 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
- R2 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
- R3 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));*/
- bp::tuple shape = bp::make_tuple(dim_array[0], dim_array[1], dim_array[2]);
- np::dtype dtype = np::dtype::get_builtin<float>();
-
- np::ndarray npD = np::zeros(shape, dtype);
- np::ndarray npD_old = np::zeros(shape, dtype);
- np::ndarray npP1 = np::zeros(shape, dtype);
- np::ndarray npP2 = np::zeros(shape, dtype);
- np::ndarray npP3 = np::zeros(shape, dtype);
- np::ndarray npP1_old = np::zeros(shape, dtype);
- np::ndarray npP2_old = np::zeros(shape, dtype);
- np::ndarray npP3_old = np::zeros(shape, dtype);
- np::ndarray npR1 = np::zeros(shape, dtype);
- np::ndarray npR2 = np::zeros(shape, dtype);
- np::ndarray npR3 = np::zeros(shape, dtype);
-
- D = reinterpret_cast<float *>(npD.get_data());
- D_old = reinterpret_cast<float *>(npD_old.get_data());
- P1 = reinterpret_cast<float *>(npP1.get_data());
- P2 = reinterpret_cast<float *>(npP2.get_data());
- P3 = reinterpret_cast<float *>(npP3.get_data());
- P1_old = reinterpret_cast<float *>(npP1_old.get_data());
- P2_old = reinterpret_cast<float *>(npP2_old.get_data());
- P3_old = reinterpret_cast<float *>(npP3_old.get_data());
- R1 = reinterpret_cast<float *>(npR1.get_data());
- R2 = reinterpret_cast<float *>(npR2.get_data());
- R3 = reinterpret_cast<float *>(npR3.get_data());
- /* begin iterations */
- for (ll = 0; ll<iter; ll++) {
- /* computing the gradient of the objective function */
- Obj_func3D(A, D, R1, R2, R3, lambda, dimX, dimY, dimZ);
- /*Taking a step towards minus of the gradient*/
- Grad_func3D(P1, P2, P3, D, R1, R2, R3, lambda, dimX, dimY, dimZ);
-
- /* projection step */
- Proj_func3D(P1, P2, P3, dimX, dimY, dimZ);
-
- /*updating R and t*/
- tkp1 = (1.0f + sqrt(1.0f + 4.0f*tk*tk))*0.5f;
- Rupd_func3D(P1, P1_old, P2, P2_old, P3, P3_old, R1, R2, R3, tkp1, tk, dimX, dimY, dimZ);
-
- /* calculate norm - stopping rules*/
- re = 0.0f; re1 = 0.0f;
- for (j = 0; j<dimX*dimY*dimZ; j++)
- {
- re += pow(D[j] - D_old[j], 2);
- re1 += pow(D[j], 2);
- }
- re = sqrt(re) / sqrt(re1);
- /* stop if the norm residual is less than the tolerance EPS */
- if (re < epsil) count++;
- if (count > 3) {
- Obj_func3D(A, D, P1, P2, P3, lambda, dimX, dimY, dimZ);
- funcval = 0.0f;
- for (j = 0; j<dimX*dimY*dimZ; j++) funcval += pow(D[j], 2);
- //funcvalA[0] = sqrt(funcval);
- float fv = sqrt(funcval);
- std::memcpy(funcvalA, &fv, sizeof(float));
- break;
- }
-
- /* check that the residual norm is decreasing */
- if (ll > 2) {
- if (re > re_old) {
- Obj_func3D(A, D, P1, P2, P3, lambda, dimX, dimY, dimZ);
- funcval = 0.0f;
- for (j = 0; j<dimX*dimY*dimZ; j++) funcval += pow(D[j], 2);
- //funcvalA[0] = sqrt(funcval);
- float fv = sqrt(funcval);
- std::memcpy(funcvalA, &fv, sizeof(float));
- break;
- }
- }
-
- re_old = re;
- /*printf("%f %i %i \n", re, ll, count); */
-
- /*storing old values*/
- copyIm(D, D_old, dimX, dimY, dimZ);
- copyIm(P1, P1_old, dimX, dimY, dimZ);
- copyIm(P2, P2_old, dimX, dimY, dimZ);
- copyIm(P3, P3_old, dimX, dimY, dimZ);
- tk = tkp1;
-
- if (ll == (iter - 1)) {
- Obj_func3D(A, D, P1, P2, P3, lambda, dimX, dimY, dimZ);
- funcval = 0.0f;
- for (j = 0; j<dimX*dimY*dimZ; j++) funcval += pow(D[j], 2);
- //funcvalA[0] = sqrt(funcval);
- float fv = sqrt(funcval);
- std::memcpy(funcvalA, &fv, sizeof(float));
- }
-
- }
- //printf("FGP-TV iterations stopped at iteration %i with the function value %f \n", ll, funcvalA[0]);
- result.append<np::ndarray>(npD);
- result.append<np::ndarray>(out1);
- result.append<int>(ll);
- }
-
- return result;
-}
-
bp::list LLT_model(np::ndarray input, double d_lambda, double d_tau, int iter, double d_epsil, int switcher) {
// the result is in the following list
bp::list result;
@@ -1022,9 +735,6 @@ bp::list TGV_PD(np::ndarray input, double d_lambda, double d_alpha1, double d_al
result.append<np::ndarray>(npU);
}
-
-
-
return result;
}
@@ -1040,7 +750,6 @@ BOOST_PYTHON_MODULE(cpu_regularizers_boost)
np::dtype dt2 = np::dtype::get_builtin<uint16_t>();
def("SplitBregman_TV", SplitBregman_TV);
- def("FGP_TV", FGP_TV);
def("LLT_model", LLT_model);
def("PatchBased_Regul", PatchBased_Regul);
def("TGV_PD", TGV_PD);
diff --git a/Wrappers/Python/src/cpu_regularizers.pyx b/Wrappers/Python/src/cpu_regularizers.pyx
index e7ff78c..60e8627 100644
--- a/Wrappers/Python/src/cpu_regularizers.pyx
+++ b/Wrappers/Python/src/cpu_regularizers.pyx
@@ -11,62 +11,108 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-Author: Edoardo Pasca
+Author: Edoardo Pasca, Daniil Kazantsev
"""
import cython
import numpy as np
cimport numpy as np
-cdef extern float TV_ROF(float *A, float *B, int dimX, int dimY, int dimZ,
- int iterationsNumb, float tau, float flambda);
+cdef extern float TV_ROF_CPU_main(float *Input, float *Output, float lambdaPar, int iterationsNumb, float tau, int dimX, int dimY, int dimZ);
+cdef extern float TV_FGP_CPU_main(float *Input, float *Output, float lambdaPar, int iterationsNumb, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ);
+def TV_ROF_CPU(inputData, regularization_parameter, iterationsNumb, marching_step_parameter):
+ if inputData.ndim == 2:
+ return TV_ROF_2D(inputData, regularization_parameter, iterationsNumb, marching_step_parameter)
+ elif inputData.ndim == 3:
+ return TV_ROF_3D(inputData, regularization_parameter, iterationsNumb, marching_step_parameter)
+
+def TV_ROF_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData,
+ float regularization_parameter,
+ int iterationsNumb,
+ float marching_step_parameter):
+ cdef long dims[2]
+ dims[0] = inputData.shape[0]
+ dims[1] = inputData.shape[1]
+
+ cdef np.ndarray[np.float32_t, ndim=2, mode="c"] outputData = \
+ np.zeros([dims[0],dims[1]], dtype='float32')
+
+ # Run ROF iterations for 2D data
+ TV_ROF_CPU_main(&inputData[0,0], &outputData[0,0], regularization_parameter, iterationsNumb, marching_step_parameter, dims[0], dims[1], 1)
+
+ return outputData
+
+def TV_ROF_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData,
+ int iterationsNumb,
+ float regularization_parameter,
+ float marching_step_parameter):
+ cdef long dims[3]
+ dims[0] = inputData.shape[0]
+ dims[1] = inputData.shape[1]
+ dims[2] = inputData.shape[2]
+
+ cdef np.ndarray[np.float32_t, ndim=3, mode="c"] outputData = \
+ np.zeros([dims[0],dims[1],dims[2]], dtype='float32')
+
+ # Run ROF iterations for 3D data
+ TV_ROF_CPU_main(&inputData[0,0,0], &outputData[0,0,0], regularization_parameter, iterationsNumb, marching_step_parameter, dims[0], dims[1], dims[2])
-def ROF_TV(inputData, iterations, regularization_parameter,
- marching_step_parameter):
+ return outputData
+
+def TV_FGP_CPU(inputData, regularization_parameter, iterationsNumb, tolerance_param, methodTV, nonneg, printM):
if inputData.ndim == 2:
- return ROF_TV_2D(inputData, iterations, regularization_parameter,
- marching_step_parameter)
+ return TV_FGP_2D(inputData, regularization_parameter, iterationsNumb, tolerance_param, methodTV, nonneg, printM)
elif inputData.ndim == 3:
- return ROF_TV_3D(inputData, iterations, regularization_parameter,
- marching_step_parameter)
+ return TV_FGP_3D(inputData, regularization_parameter, iterationsNumb, tolerance_param, methodTV, nonneg, printM)
-def ROF_TV_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData,
- int iterations,
+def TV_FGP_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData,
float regularization_parameter,
- float marching_step_parameter
- ):
+ int iterationsNumb,
+ float tolerance_param,
+ int methodTV,
+ int nonneg,
+ int printM):
cdef long dims[2]
dims[0] = inputData.shape[0]
dims[1] = inputData.shape[1]
- cdef np.ndarray[np.float32_t, ndim=2, mode="c"] B = \
+ cdef np.ndarray[np.float32_t, ndim=2, mode="c"] outputData = \
np.zeros([dims[0],dims[1]], dtype='float32')
#/* Run ROF iterations for 2D data */
- TV_ROF(&inputData[0,0], &B[0,0], dims[0], dims[1], 1, iterations,
- marching_step_parameter, regularization_parameter)
+ TV_FGP_CPU_main(&inputData[0,0], &outputData[0,0], regularization_parameter,
+ iterationsNumb,
+ tolerance_param,
+ methodTV,
+ nonneg,
+ printM,
+ dims[0], dims[1], 1)
- return B
-
+ return outputData
-def ROF_TV_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData,
- int iterations,
+def TV_FGP_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData,
float regularization_parameter,
- float marching_step_parameter
- ):
- cdef long dims[2]
+ int iterationsNumb,
+ float tolerance_param,
+ int methodTV,
+ int nonneg,
+ int printM):
+ cdef long dims[3]
dims[0] = inputData.shape[0]
dims[1] = inputData.shape[1]
dims[2] = inputData.shape[2]
- cdef np.ndarray[np.float32_t, ndim=3, mode="c"] B = \
- np.zeros([dims[0],dims[1],dims[2]], dtype='float32')
+ cdef np.ndarray[np.float32_t, ndim=3, mode="c"] outputData = \
+ np.zeros([dims[0], dims[1], dims[2]], dtype='float32')
#/* Run ROF iterations for 3D data */
- TV_ROF(&inputData[0,0, 0], &B[0,0, 0], dims[0], dims[1], dims[2], iterations,
- marching_step_parameter, regularization_parameter)
-
- return B
-
+ TV_FGP_CPU_main(&inputData[0,0,0], &outputData[0,0,0], regularization_parameter,
+ iterationsNumb,
+ tolerance_param,
+ methodTV,
+ nonneg,
+ printM,
+ dims[0], dims[1], dims[2])
+ return outputData
diff --git a/Wrappers/Python/src/gpu_regularizers.pyx b/Wrappers/Python/src/gpu_regularizers.pyx
index 5a5d274..f96156a 100644
--- a/Wrappers/Python/src/gpu_regularizers.pyx
+++ b/Wrappers/Python/src/gpu_regularizers.pyx
@@ -11,7 +11,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-Author: Edoardo Pasca
+Author: Edoardo Pasca, Daniil Kazantsev
"""
import cython
@@ -25,12 +25,16 @@ cdef extern void NLM_GPU_kernel(float *A, float* B, float *Eucl_Vec,
int N, int M, int Z, int dimension,
int SearchW, int SimilW,
int SearchW_real, float denh2, float lambdaf);
-cdef extern void TV_ROF_GPU(float* A, float* B, int N, int M, int Z, int iter, float tau, float lambdaf);
+cdef extern void TV_ROF_GPU_main(float* Input, float* Output, float lambdaPar, int iter, float tau, int N, int M, int Z);
+cdef extern void TV_FGP_GPU_main(float *Input, float *Output, float lambdaPar, int iter, float epsil, int methodTV, int nonneg, int printM, int N, int M, int Z);
+
+# padding function
cdef extern float pad_crop(float *A, float *Ap,
int OldSizeX, int OldSizeY, int OldSizeZ,
int NewSizeX, int NewSizeY, int NewSizeZ,
int padXY, int switchpad_crop);
-
+
+#Diffusion 4th order regularizer
def Diff4thHajiaboli(inputData,
edge_preserv_parameter,
iterations,
@@ -48,7 +52,7 @@ def Diff4thHajiaboli(inputData,
iterations,
time_marching_parameter,
regularization_parameter)
-
+# patch-based nonlocal regularization
def NML(inputData,
SearchW_real,
SimilW,
@@ -66,23 +70,48 @@ def NML(inputData,
SimilW,
h,
lambdaf)
-
-def ROF_TV_GPU(inputData,
+
+# Total-variation Rudin-Osher-Fatemi (ROF)
+def TV_ROF_GPU(inputData,
+ regularization_parameter,
iterations,
- time_marching_parameter,
- regularization_parameter):
+ time_marching_parameter):
if inputData.ndim == 2:
return ROFTV2D(inputData,
- iterations,
- time_marching_parameter,
- regularization_parameter)
+ regularization_parameter,
+ iterations,
+ time_marching_parameter)
elif inputData.ndim == 3:
return ROFTV3D(inputData,
+ regularization_parameter,
iterations,
- time_marching_parameter,
- regularization_parameter)
-
-
+ time_marching_parameter)
+
+# Total-variation Fast-Gradient-Projection (FGP)
+def TV_FGP_GPU(inputData,
+ regularization_parameter,
+ iterations,
+ tolerance_param,
+ methodTV,
+ nonneg,
+ printM):
+ if inputData.ndim == 2:
+ return FGPTV2D(inputData,
+ regularization_parameter,
+ iterations,
+ tolerance_param,
+ methodTV,
+ nonneg,
+ printM)
+ elif inputData.ndim == 3:
+ return FGPTV3D(inputData,
+ regularization_parameter,
+ iterations,
+ tolerance_param,
+ methodTV,
+ nonneg,
+ printM)
+#****************************************************************#
def Diff4thHajiaboli2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData,
float edge_preserv_parameter,
int iterations,
@@ -329,48 +358,106 @@ def NML3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData,
switchpad_crop)
return B
-
+
+# Total-variation ROF
def ROFTV2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData,
+ float regularization_parameter,
int iterations,
- float time_marching_parameter,
- float regularization_parameter):
+ float time_marching_parameter):
cdef long dims[2]
dims[0] = inputData.shape[0]
dims[1] = inputData.shape[1]
- cdef np.ndarray[np.float32_t, ndim=2, mode="c"] B = \
+ cdef np.ndarray[np.float32_t, ndim=2, mode="c"] outputData = \
np.zeros([dims[0],dims[1]], dtype='float32')
# Running CUDA code here
- TV_ROF_GPU(
- &inputData[0,0], &B[0,0],
- dims[0], dims[1], 0,
+ TV_ROF_GPU_main(
+ &inputData[0,0], &outputData[0,0],
+ regularization_parameter,
iterations ,
time_marching_parameter,
- regularization_parameter);
+ dims[0], dims[1], 1);
- return B
+ return outputData
def ROFTV3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData,
+ float regularization_parameter,
int iterations,
- float time_marching_parameter,
- float regularization_parameter):
+ float time_marching_parameter):
cdef long dims[3]
dims[0] = inputData.shape[0]
dims[1] = inputData.shape[1]
dims[2] = inputData.shape[2]
- cdef np.ndarray[np.float32_t, ndim=3, mode="c"] B = \
+ cdef np.ndarray[np.float32_t, ndim=3, mode="c"] outputData = \
np.zeros([dims[0],dims[1],dims[2]], dtype='float32')
# Running CUDA code here
- TV_ROF_GPU(
- &inputData[0,0,0], &B[0,0,0],
- dims[0], dims[1], dims[2],
+ TV_ROF_GPU_main(
+ &inputData[0,0,0], &outputData[0,0,0],
+ regularization_parameter,
iterations ,
time_marching_parameter,
- regularization_parameter);
+ dims[0], dims[1], dims[2]);
- return B
+ return outputData
+
+# Total-variation Fast-Gradient-Projection (FGP)
+def FGPTV2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData,
+ float regularization_parameter,
+ int iterations,
+ float tolerance_param,
+ int methodTV,
+ int nonneg,
+ int printM):
+
+ cdef long dims[2]
+ dims[0] = inputData.shape[0]
+ dims[1] = inputData.shape[1]
+
+ cdef np.ndarray[np.float32_t, ndim=2, mode="c"] outputData = \
+ np.zeros([dims[0],dims[1]], dtype='float32')
+
+ # Running CUDA code here
+ TV_FGP_GPU_main(&inputData[0,0], &outputData[0,0],
+ regularization_parameter,
+ iterations,
+ tolerance_param,
+ methodTV,
+ nonneg,
+ printM,
+ dims[0], dims[1], 1);
+
+ return outputData
+
+def FGPTV3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData,
+ float regularization_parameter,
+ int iterations,
+ float tolerance_param,
+ int methodTV,
+ int nonneg,
+ int printM):
+
+ cdef long dims[3]
+ dims[0] = inputData.shape[0]
+ dims[1] = inputData.shape[1]
+ dims[2] = inputData.shape[2]
+
+ cdef np.ndarray[np.float32_t, ndim=3, mode="c"] outputData = \
+ np.zeros([dims[0],dims[1],dims[2]], dtype='float32')
+
+ # Running CUDA code here
+ TV_FGP_GPU_main(
+ &inputData[0,0,0], &outputData[0,0,0],
+ regularization_parameter ,
+ iterations,
+ tolerance_param,
+ methodTV,
+ nonneg,
+ printM,
+ dims[0], dims[1], dims[2]);
+
+ return outputData
diff --git a/Wrappers/Python/test/test_cpu_vs_gpu_regularizers.py b/Wrappers/Python/test/test_cpu_vs_gpu_regularizers.py
new file mode 100644
index 0000000..63be1a0
--- /dev/null
+++ b/Wrappers/Python/test/test_cpu_vs_gpu_regularizers.py
@@ -0,0 +1,219 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Thu Feb 22 11:39:43 2018
+
+Testing CPU implementation against the GPU one
+
+@author: Daniil Kazantsev
+"""
+
+import matplotlib.pyplot as plt
+import numpy as np
+import os
+import timeit
+from ccpi.filters.regularizers import ROF_TV, FGP_TV
+
+###############################################################################
+def printParametersToString(pars):
+ txt = r''
+ for key, value in pars.items():
+ if key== 'algorithm' :
+ txt += "{0} = {1}".format(key, value.__name__)
+ elif key == 'input':
+ txt += "{0} = {1}".format(key, np.shape(value))
+ else:
+ txt += "{0} = {1}".format(key, value)
+ txt += '\n'
+ return txt
+###############################################################################
+def rmse(im1, im2):
+ a, b = im1.shape
+ rmse = np.sqrt(np.sum((im1 - im2) ** 2) / float(a * b))
+ return rmse
+
+filename = os.path.join(".." , ".." , ".." , "data" ,"lena_gray_512.tif")
+
+# read image
+Im = plt.imread(filename)
+Im = np.asarray(Im, dtype='float32')
+
+Im = Im/255
+perc = 0.075
+u0 = Im + np.random.normal(loc = Im ,
+ scale = perc * Im ,
+ size = np.shape(Im))
+# map the u0 u0->u0>0
+f = np.frompyfunc(lambda x: 0 if x < 0 else x, 1,1)
+u0 = f(u0).astype('float32')
+
+
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("____________ROF-TV bench___________________")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot
+fig = plt.figure(1)
+plt.suptitle('Comparison of ROF-TV regularizer using CPU and GPU implementations')
+a=fig.add_subplot(1,4,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(u0,cmap="gray")
+
+# set parameters
+pars = {'algorithm': ROF_TV, \
+ 'input' : u0,\
+ 'regularization_parameter':0.04,\
+ 'number_of_iterations': 1200,\
+ 'time_marching_parameter': 0.0025
+ }
+print ("#############ROF TV CPU####################")
+start_time = timeit.default_timer()
+rof_cpu = ROF_TV(pars['input'],
+ pars['regularization_parameter'],
+ pars['number_of_iterations'],
+ pars['time_marching_parameter'],'cpu')
+rms = rmse(Im, rof_cpu)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,4,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+ verticalalignment='top', bbox=props)
+imgplot = plt.imshow(rof_cpu, cmap="gray")
+plt.title('{}'.format('CPU results'))
+
+
+print ("##############ROF TV GPU##################")
+start_time = timeit.default_timer()
+rof_gpu = ROF_TV(pars['input'],
+ pars['regularization_parameter'],
+ pars['number_of_iterations'],
+ pars['time_marching_parameter'],'gpu')
+
+rms = rmse(Im, rof_gpu)
+pars['rmse'] = rms
+pars['algorithm'] = ROF_TV
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,4,3)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+ verticalalignment='top', bbox=props)
+imgplot = plt.imshow(rof_gpu, cmap="gray")
+plt.title('{}'.format('GPU results'))
+
+
+print ("--------Compare the results--------")
+tolerance = 1e-05
+diff_im = np.zeros(np.shape(rof_cpu))
+diff_im = abs(rof_cpu - rof_gpu)
+diff_im[diff_im > tolerance] = 1
+a=fig.add_subplot(1,4,4)
+imgplot = plt.imshow(diff_im, vmin=0, vmax=1, cmap="gray")
+plt.title('{}'.format('Pixels larger threshold difference'))
+if (diff_im.sum() > 1):
+ print ("Arrays do not match!")
+else:
+ print ("Arrays match")
+
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("____________FGP-TV bench___________________")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot
+fig = plt.figure(2)
+plt.suptitle('Comparison of FGP-TV regularizer using CPU and GPU implementations')
+a=fig.add_subplot(1,4,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(u0,cmap="gray")
+
+# set parameters
+pars = {'algorithm' : FGP_TV, \
+ 'input' : u0,\
+ 'regularization_parameter':0.04, \
+ 'number_of_iterations' :1200 ,\
+ 'tolerance_constant':0.00001,\
+ 'methodTV': 0 ,\
+ 'nonneg': 0 ,\
+ 'printingOut': 0
+ }
+
+print ("#############FGP TV CPU####################")
+start_time = timeit.default_timer()
+fgp_cpu = FGP_TV(pars['input'],
+ pars['regularization_parameter'],
+ pars['number_of_iterations'],
+ pars['tolerance_constant'],
+ pars['methodTV'],
+ pars['nonneg'],
+ pars['printingOut'],'cpu')
+
+
+rms = rmse(Im, fgp_cpu)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,4,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+ verticalalignment='top', bbox=props)
+imgplot = plt.imshow(fgp_cpu, cmap="gray")
+plt.title('{}'.format('CPU results'))
+
+
+print ("##############FGP TV GPU##################")
+start_time = timeit.default_timer()
+fgp_gpu = FGP_TV(pars['input'],
+ pars['regularization_parameter'],
+ pars['number_of_iterations'],
+ pars['tolerance_constant'],
+ pars['methodTV'],
+ pars['nonneg'],
+ pars['printingOut'],'gpu')
+
+rms = rmse(Im, fgp_gpu)
+pars['rmse'] = rms
+pars['algorithm'] = FGP_TV
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,4,3)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+ verticalalignment='top', bbox=props)
+imgplot = plt.imshow(fgp_gpu, cmap="gray")
+plt.title('{}'.format('GPU results'))
+
+
+print ("--------Compare the results--------")
+tolerance = 1e-05
+diff_im = np.zeros(np.shape(rof_cpu))
+diff_im = abs(fgp_cpu - fgp_gpu)
+diff_im[diff_im > tolerance] = 1
+a=fig.add_subplot(1,4,4)
+imgplot = plt.imshow(diff_im, vmin=0, vmax=1, cmap="gray")
+plt.title('{}'.format('Pixels larger threshold difference'))
+if (diff_im.sum() > 1):
+ print ("Arrays do not match!")
+else:
+ print ("Arrays match")
+
+
diff --git a/Wrappers/Python/test/test_gpu_regularizers.py b/Wrappers/Python/test/test_gpu_regularizers.py
index 735a25d..640b3f9 100644
--- a/Wrappers/Python/test/test_gpu_regularizers.py
+++ b/Wrappers/Python/test/test_gpu_regularizers.py
@@ -6,14 +6,13 @@ Created on Tue Jan 30 10:24:26 2018
@author: ofn77899
"""
-
-
import matplotlib.pyplot as plt
import numpy as np
import os
from enum import Enum
import timeit
from ccpi.filters.gpu_regularizers import Diff4thHajiaboli, NML
+from ccpi.filters.regularizers import ROF_TV, FGP_TV
###############################################################################
def printParametersToString(pars):
txt = r''
@@ -52,14 +51,15 @@ u0 = f(u0).astype('float32')
## plot
fig = plt.figure()
-a=fig.add_subplot(2,3,1)
+a=fig.add_subplot(2,4,1)
a.set_title('noise')
imgplot = plt.imshow(u0,cmap="gray")
## Diff4thHajiaboli
start_time = timeit.default_timer()
-pars = {'algorithm' : Diff4thHajiaboli , \
+pars = {
+'algorithm' : Diff4thHajiaboli , \
'input' : u0,
'edge_preserv_parameter':0.1 , \
'number_of_iterations' :250 ,\
@@ -78,7 +78,7 @@ pars['rmse'] = rms
txtstr = printParametersToString(pars)
txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
print (txtstr)
-a=fig.add_subplot(2,3,2)
+a=fig.add_subplot(2,4,2)
# these are matplotlib.patch.Patch properties
props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
@@ -87,14 +87,15 @@ a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=12,
verticalalignment='top', bbox=props)
imgplot = plt.imshow(d4h, cmap="gray")
-a=fig.add_subplot(2,3,5)
+a=fig.add_subplot(2,4,6)
# these are matplotlib.patch.Patch properties
props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)
# place a text box in upper left in axes coords
a.text(0.05, 0.95, 'd4h - u0', transform=a.transAxes, fontsize=12,
verticalalignment='top', bbox=props)
-imgplot = plt.imshow((d4h - u0)**2, cmap="gray")
+imgplot = plt.imshow((d4h - u0)**2, vmin=0, vmax=0.03, cmap="gray")
+plt.colorbar(ticks=[0, 0.03], orientation='vertical')
## Patch Based Regul NML
@@ -109,6 +110,7 @@ pars = {'algorithm' : NML , \
}
"""
pars = {
+'algorithm' : NML , \
'input' : u0,
'regularization_parameter': 0.01,\
'searching_window_ratio':3, \
@@ -126,7 +128,7 @@ pars['rmse'] = rms
txtstr = printParametersToString(pars)
txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
print (txtstr)
-a=fig.add_subplot(2,3,3)
+a=fig.add_subplot(2,4,3)
# these are matplotlib.patch.Patch properties
props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
@@ -135,14 +137,103 @@ a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=12,
verticalalignment='top', bbox=props)
imgplot = plt.imshow(nml, cmap="gray")
-a=fig.add_subplot(2,3,6)
+a=fig.add_subplot(2,4,7)
# these are matplotlib.patch.Patch properties
props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)
# place a text box in upper left in axes coords
a.text(0.05, 0.95, 'nml - u0', transform=a.transAxes, fontsize=14,
verticalalignment='top', bbox=props)
-imgplot = plt.imshow((nml - u0)**2, cmap="gray")
+imgplot = plt.imshow((nml - u0)**2, vmin=0, vmax=0.03, cmap="gray")
+plt.colorbar(ticks=[0, 0.03], orientation='vertical')
-plt.show()
+
+## Rudin-Osher-Fatemi (ROF) TV regularization
+start_time = timeit.default_timer()
+pars = {
+'algorithm' : ROF_TV , \
+ 'input' : u0,
+ 'regularization_parameter': 0.04,\
+ 'number_of_iterations':300,\
+ 'time_marching_parameter': 0.0025
+
+ }
+
+rof_tv = TV_ROF_GPU(pars['input'],
+ pars['regularization_parameter'],
+ pars['number_of_iterations'],
+ pars['time_marching_parameter'],'gpu')
+
+rms = rmse(Im, rof_tv)
+pars['rmse'] = rms
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(2,4,4)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=12,
+ verticalalignment='top', bbox=props)
+imgplot = plt.imshow(rof_tv, cmap="gray")
+
+a=fig.add_subplot(2,4,8)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)
+# place a text box in upper left in axes coords
+a.text(0.05, 0.95, 'rof_tv - u0', transform=a.transAxes, fontsize=14,
+ verticalalignment='top', bbox=props)
+imgplot = plt.imshow((rof_tv - u0)**2, vmin=0, vmax=0.03, cmap="gray")
+plt.colorbar(ticks=[0, 0.03], orientation='vertical')
+plt.show()
+
+## Fast-Gradient Projection TV regularization
+"""
+start_time = timeit.default_timer()
+
+pars = {'algorithm' : FGP_TV, \
+ 'input' : u0,\
+ 'regularization_parameter':0.04, \
+ 'number_of_iterations' :1200 ,\
+ 'tolerance_constant':0.00001,\
+ 'methodTV': 0 ,\
+ 'nonneg': 0 ,\
+ 'printingOut': 0
+ }
+
+fgp_gpu = FGP_TV(pars['input'],
+ pars['regularization_parameter'],
+ pars['number_of_iterations'],
+ pars['tolerance_constant'],
+ pars['methodTV'],
+ pars['nonneg'],
+ pars['printingOut'],'gpu')
+
+rms = rmse(Im, fgp_gpu)
+pars['rmse'] = rms
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(2,4,4)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=12,
+ verticalalignment='top', bbox=props)
+imgplot = plt.imshow(fgp_gpu, cmap="gray")
+
+a=fig.add_subplot(2,4,8)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)
+# place a text box in upper left in axes coords
+a.text(0.05, 0.95, 'fgp_gpu - u0', transform=a.transAxes, fontsize=14,
+ verticalalignment='top', bbox=props)
+imgplot = plt.imshow((fgp_gpu - u0)**2, vmin=0, vmax=0.03, cmap="gray")
+plt.colorbar(ticks=[0, 0.03], orientation='vertical')
+plt.show()
+"""