diff options
20 files changed, 1509 insertions, 823 deletions
diff --git a/Core/CMakeLists.txt b/Core/CMakeLists.txt index 3202581..4e85002 100644 --- a/Core/CMakeLists.txt +++ b/Core/CMakeLists.txt @@ -88,7 +88,7 @@ add_library(cilreg SHARED ${CMAKE_CURRENT_SOURCE_DIR}/regularizers_CPU/PatchBased_Regul_core.c ${CMAKE_CURRENT_SOURCE_DIR}/regularizers_CPU/SplitBregman_TV_core.c ${CMAKE_CURRENT_SOURCE_DIR}/regularizers_CPU/TGV_PD_core.c - ${CMAKE_CURRENT_SOURCE_DIR}/regularizers_CPU/ROF_TV_core.c + ${CMAKE_CURRENT_SOURCE_DIR}/regularizers_CPU/ROF_TV_core.c ${CMAKE_CURRENT_SOURCE_DIR}/regularizers_CPU/utils.c ) target_link_libraries(cilreg ${EXTRA_LIBRARIES} ) @@ -129,6 +129,8 @@ if (CUDA_FOUND) CUDA_ADD_LIBRARY(cilregcuda SHARED ${CMAKE_CURRENT_SOURCE_DIR}/regularizers_GPU/Diffus_HO/Diff4th_GPU_kernel.cu ${CMAKE_CURRENT_SOURCE_DIR}/regularizers_GPU/NL_Regul/NLM_GPU_kernel.cu + ${CMAKE_CURRENT_SOURCE_DIR}/regularizers_GPU/TV_ROF/TV_ROF_GPU_core.cu + ${CMAKE_CURRENT_SOURCE_DIR}/regularizers_GPU/TV_FGP/TV_FGP_GPU_core.cu ) if (UNIX) message ("I'd install into ${CMAKE_INSTALL_PREFIX}/lib") diff --git a/Core/regularizers_CPU/FGP_TV_core.c b/Core/regularizers_CPU/FGP_TV_core.c index 03cd445..7a8ce48 100644 --- a/Core/regularizers_CPU/FGP_TV_core.c +++ b/Core/regularizers_CPU/FGP_TV_core.c @@ -22,245 +22,299 @@ limitations under the License. /* C-OMP implementation of FGP-TV [1] denoising/regularization model (2D/3D case) * * Input Parameters: - * 1. Noisy image/volume [REQUIRED] - * 2. lambda - regularization parameter [REQUIRED] - * 3. Number of iterations [OPTIONAL parameter] - * 4. eplsilon: tolerance constant [OPTIONAL parameter] - * 5. TV-type: 'iso' or 'l1' [OPTIONAL parameter] + * 1. Noisy image/volume + * 2. lambdaPar - regularization parameter + * 3. Number of iterations + * 4. eplsilon: tolerance constant + * 5. TV-type: methodTV - 'iso' (0) or 'l1' (1) + * 6. nonneg: 'nonnegativity (0 is OFF by default) + * 7. print information: 0 (off) or 1 (on) * * Output: * [1] Filtered/regularized image - * [2] last function value - * - * Example of image denoising: - * figure; - * Im = double(imread('lena_gray_256.tif'))/255; % loading image - * u0 = Im + .05*randn(size(Im)); % adding noise - * u = FGP_TV(single(u0), 0.05, 100, 1e-04); * * This function is based on the Matlab's code and paper by * [1] Amir Beck and Marc Teboulle, "Fast Gradient-Based Algorithms for Constrained Total Variation Image Denoising and Deblurring Problems" - * - * D. Kazantsev, 2016-17 - * */ - -/* 2D-case related Functions */ -/*****************************************************************/ -float Obj_func_CALC2D(float *A, float *D, float *funcvalA, float lambda, int dimX, int dimY) -{ - int i,j; - float f1, f2, val1, val2; - - /*data-related term */ - f1 = 0.0f; - for(i=0; i<dimX*dimY; i++) f1 += pow(D[i] - A[i],2); - - /*TV-related term */ - f2 = 0.0f; - for(i=0; i<dimX; i++) { - for(j=0; j<dimY; j++) { - /* boundary conditions */ - if (i == dimX-1) {val1 = 0.0f;} else {val1 = A[(i+1)*dimY + (j)] - A[(i)*dimY + (j)];} - if (j == dimY-1) {val2 = 0.0f;} else {val2 = A[(i)*dimY + (j+1)] - A[(i)*dimY + (j)];} - f2 += sqrt(pow(val1,2) + pow(val2,2)); - }} - - /* sum of two terms */ - funcvalA[0] = 0.5f*f1 + lambda*f2; - return *funcvalA; + +float TV_FGP_CPU_main(float *Input, float *Output, float lambdaPar, int iterationsNumb, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ) +{ + int ll, j, DimTotal; + float re, re1; + float tk = 1.0f; + float tkp1=1.0f; + int count = 0; + + if (dimZ <= 1) { + /*2D case */ + float *Output_prev=NULL, *P1=NULL, *P2=NULL, *P1_prev=NULL, *P2_prev=NULL, *R1=NULL, *R2=NULL; + DimTotal = dimX*dimY; + + Output_prev = (float *) malloc( DimTotal * sizeof(float) ); + P1 = (float *) malloc( DimTotal * sizeof(float) ); + P2 = (float *) malloc( DimTotal * sizeof(float) ); + P1_prev = (float *) malloc( DimTotal * sizeof(float) ); + P2_prev = (float *) malloc( DimTotal * sizeof(float) ); + R1 = (float *) malloc( DimTotal * sizeof(float) ); + R2 = (float *) malloc( DimTotal * sizeof(float) ); + + /* begin iterations */ + for(ll=0; ll<iterationsNumb; ll++) { + + /* computing the gradient of the objective function */ + Obj_func2D(Input, Output, R1, R2, lambdaPar, dimX, dimY); + + /* apply nonnegativity */ + if (nonneg == 1) for(j=0; j<DimTotal; j++) {if (Output[j] < 0.0f) Output[j] = 0.0f;} + + /*Taking a step towards minus of the gradient*/ + Grad_func2D(P1, P2, Output, R1, R2, lambdaPar, dimX, dimY); + + /* projection step */ + Proj_func2D(P1, P2, methodTV, DimTotal); + + /*updating R and t*/ + tkp1 = (1.0f + sqrt(1.0f + 4.0f*tk*tk))*0.5f; + Rupd_func2D(P1, P1_prev, P2, P2_prev, R1, R2, tkp1, tk, DimTotal); + + /* check early stopping criteria */ + re = 0.0f; re1 = 0.0f; + for(j=0; j<DimTotal; j++) + { + re += pow(Output[j] - Output_prev[j],2); + re1 += pow(Output[j],2); + } + re = sqrt(re)/sqrt(re1); + if (re < epsil) count++; + if (count > 4) break; + + /*storing old values*/ + copyIm(Output, Output_prev, dimX, dimY, 1); + copyIm(P1, P1_prev, dimX, dimY, 1); + copyIm(P2, P2_prev, dimX, dimY, 1); + tk = tkp1; + } + if (printM == 1) printf("FGP-TV iterations stopped at iteration %i \n", ll); + free(Output_prev); free(P1); free(P2); free(P1_prev); free(P2_prev); free(R1); free(R2); + } + else { + /*3D case*/ + float *Output_prev=NULL, *P1=NULL, *P2=NULL, *P3=NULL, *P1_prev=NULL, *P2_prev=NULL, *P3_prev=NULL, *R1=NULL, *R2=NULL, *R3=NULL; + DimTotal = dimX*dimY*dimZ; + + Output_prev = (float *) malloc( DimTotal * sizeof(float) ); + P1 = (float *) malloc( DimTotal * sizeof(float) ); + P2 = (float *) malloc( DimTotal * sizeof(float) ); + P3 = (float *) malloc( DimTotal * sizeof(float) ); + P1_prev = (float *) malloc( DimTotal * sizeof(float) ); + P2_prev = (float *) malloc( DimTotal * sizeof(float) ); + P3_prev = (float *) malloc( DimTotal * sizeof(float) ); + R1 = (float *) malloc( DimTotal * sizeof(float) ); + R2 = (float *) malloc( DimTotal * sizeof(float) ); + R3 = (float *) malloc( DimTotal * sizeof(float) ); + + /* begin iterations */ + for(ll=0; ll<iterationsNumb; ll++) { + + /* computing the gradient of the objective function */ + Obj_func3D(Input, Output, R1, R2, R3, lambdaPar, dimX, dimY, dimZ); + + /* apply nonnegativity */ + if (nonneg == 1) for(j=0; j<DimTotal; j++) {if (Output[j] < 0.0f) Output[j] = 0.0f;} + + /*Taking a step towards minus of the gradient*/ + Grad_func3D(P1, P2, P3, Output, R1, R2, R3, lambdaPar, dimX, dimY, dimZ); + + /* projection step */ + Proj_func3D(P1, P2, P3, methodTV, DimTotal); + + /*updating R and t*/ + tkp1 = (1.0f + sqrt(1.0f + 4.0f*tk*tk))*0.5f; + Rupd_func3D(P1, P1_prev, P2, P2_prev, P3, P3_prev, R1, R2, R3, tkp1, tk, DimTotal); + + /* calculate norm - stopping rules*/ + re = 0.0f; re1 = 0.0f; + for(j=0; j<DimTotal; j++) + { + re += pow(Output[j] - Output_prev[j],2); + re1 += pow(Output[j],2); + } + re = sqrt(re)/sqrt(re1); + /* stop if the norm residual is less than the tolerance EPS */ + if (re < epsil) count++; + if (count > 4) break; + + /*storing old values*/ + copyIm(Output, Output_prev, dimX, dimY, dimZ); + copyIm(P1, P1_prev, dimX, dimY, dimZ); + copyIm(P2, P2_prev, dimX, dimY, dimZ); + copyIm(P3, P3_prev, dimX, dimY, dimZ); + tk = tkp1; + } + if (printM == 1) printf("FGP-TV iterations stopped at iteration %i \n", ll); + free(Output_prev); free(P1); free(P2); free(P3); free(P1_prev); free(P2_prev); free(P3_prev); free(R1); free(R2); free(R3); + } + return *Output; } float Obj_func2D(float *A, float *D, float *R1, float *R2, float lambda, int dimX, int dimY) { - float val1, val2; - int i, j; -#pragma omp parallel for shared(A,D,R1,R2) private(i,j,val1,val2) - for (i = 0; i<dimX; i++) { - for (j = 0; j<dimY; j++) { - /* boundary conditions */ - if (i == 0) { val1 = 0.0f; } - else { val1 = R1[(i - 1)*dimY + (j)]; } - if (j == 0) { val2 = 0.0f; } - else { val2 = R2[(i)*dimY + (j - 1)]; } - D[(i)*dimY + (j)] = A[(i)*dimY + (j)] - lambda*(R1[(i)*dimY + (j)] + R2[(i)*dimY + (j)] - val1 - val2); - } - } - return *D; + float val1, val2; + int i,j,index; +#pragma omp parallel for shared(A,D,R1,R2) private(index,i,j,val1,val2) + for(i=0; i<dimX; i++) { + for(j=0; j<dimY; j++) { + index = j*dimX+i; + /* boundary conditions */ + if (i == 0) {val1 = 0.0f;} else {val1 = R1[j*dimX + (i-1)];} + if (j == 0) {val2 = 0.0f;} else {val2 = R2[(j-1)*dimX + i];} + D[index] = A[index] - lambda*(R1[index] + R2[index] - val1 - val2); + }} + return *D; } float Grad_func2D(float *P1, float *P2, float *D, float *R1, float *R2, float lambda, int dimX, int dimY) { - float val1, val2, multip; - int i, j; - multip = (1.0f / (8.0f*lambda)); -#pragma omp parallel for shared(P1,P2,D,R1,R2,multip) private(i,j,val1,val2) - for (i = 0; i<dimX; i++) { - for (j = 0; j<dimY; j++) { - /* boundary conditions */ - if (i == dimX - 1) val1 = 0.0f; else val1 = D[(i)*dimY + (j)] - D[(i + 1)*dimY + (j)]; - if (j == dimY - 1) val2 = 0.0f; else val2 = D[(i)*dimY + (j)] - D[(i)*dimY + (j + 1)]; - P1[(i)*dimY + (j)] = R1[(i)*dimY + (j)] + multip*val1; - P2[(i)*dimY + (j)] = R2[(i)*dimY + (j)] + multip*val2; - } - } - return 1; + float val1, val2, multip; + int i,j,index; + multip = (1.0f/(8.0f*lambda)); +#pragma omp parallel for shared(P1,P2,D,R1,R2,multip) private(index,i,j,val1,val2) + for(i=0; i<dimX; i++) { + for(j=0; j<dimY; j++) { + index = j*dimX+i; + /* boundary conditions */ + if (i == dimX-1) val1 = 0.0f; else val1 = D[index] - D[j*dimX + (i+1)]; + if (j == dimY-1) val2 = 0.0f; else val2 = D[index] - D[(j+1)*dimX + i]; + P1[index] = R1[index] + multip*val1; + P2[index] = R2[index] + multip*val2; + }} + return 1; } -float Proj_func2D(float *P1, float *P2, int methTV, int dimX, int dimY) +float Proj_func2D(float *P1, float *P2, int methTV, int DimTotal) { - float val1, val2, denom; - int i, j; - if (methTV == 0) { - /* isotropic TV*/ -#pragma omp parallel for shared(P1,P2) private(i,j,denom) - for (i = 0; i<dimX; i++) { - for (j = 0; j<dimY; j++) { - denom = pow(P1[(i)*dimY + (j)], 2) + pow(P2[(i)*dimY + (j)], 2); - if (denom > 1) { - P1[(i)*dimY + (j)] = P1[(i)*dimY + (j)] / sqrt(denom); - P2[(i)*dimY + (j)] = P2[(i)*dimY + (j)] / sqrt(denom); - } - } - } - } - else { - /* anisotropic TV*/ -#pragma omp parallel for shared(P1,P2) private(i,j,val1,val2) - for (i = 0; i<dimX; i++) { - for (j = 0; j<dimY; j++) { - val1 = fabs(P1[(i)*dimY + (j)]); - val2 = fabs(P2[(i)*dimY + (j)]); - if (val1 < 1.0f) { val1 = 1.0f; } - if (val2 < 1.0f) { val2 = 1.0f; } - P1[(i)*dimY + (j)] = P1[(i)*dimY + (j)] / val1; - P2[(i)*dimY + (j)] = P2[(i)*dimY + (j)] / val2; - } - } - } - return 1; + float val1, val2, denom, sq_denom; + int i; + if (methTV == 0) { + /* isotropic TV*/ +#pragma omp parallel for shared(P1,P2) private(i,denom,sq_denom) + for(i=0; i<DimTotal; i++) { + denom = powf(P1[i],2) + powf(P2[i],2); + if (denom > 1.0f) { + sq_denom = 1.0f/sqrtf(denom); + P1[i] = P1[i]*sq_denom; + P2[i] = P2[i]*sq_denom; + } + } + } + else { + /* anisotropic TV*/ +#pragma omp parallel for shared(P1,P2) private(i,val1,val2) + for(i=0; i<DimTotal; i++) { + val1 = fabs(P1[i]); + val2 = fabs(P2[i]); + if (val1 < 1.0f) {val1 = 1.0f;} + if (val2 < 1.0f) {val2 = 1.0f;} + P1[i] = P1[i]/val1; + P2[i] = P2[i]/val2; + } + } + return 1; } -float Rupd_func2D(float *P1, float *P1_old, float *P2, float *P2_old, float *R1, float *R2, float tkp1, float tk, int dimX, int dimY) +float Rupd_func2D(float *P1, float *P1_old, float *P2, float *P2_old, float *R1, float *R2, float tkp1, float tk, int DimTotal) { - int i, j; - float multip; - multip = ((tk - 1.0f) / tkp1); -#pragma omp parallel for shared(P1,P2,P1_old,P2_old,R1,R2,multip) private(i,j) - for (i = 0; i<dimX; i++) { - for (j = 0; j<dimY; j++) { - R1[(i)*dimY + (j)] = P1[(i)*dimY + (j)] + multip*(P1[(i)*dimY + (j)] - P1_old[(i)*dimY + (j)]); - R2[(i)*dimY + (j)] = P2[(i)*dimY + (j)] + multip*(P2[(i)*dimY + (j)] - P2_old[(i)*dimY + (j)]); - } - } - return 1; + int i; + float multip; + multip = ((tk-1.0f)/tkp1); +#pragma omp parallel for shared(P1,P2,P1_old,P2_old,R1,R2,multip) private(i) + for(i=0; i<DimTotal; i++) { + R1[i] = P1[i] + multip*(P1[i] - P1_old[i]); + R2[i] = P2[i] + multip*(P2[i] - P2_old[i]); + } + return 1; } /* 3D-case related Functions */ /*****************************************************************/ -float Obj_func_CALC3D(float *A, float *D, float *funcvalA, float lambda, int dimX, int dimY, int dimZ) -{ - int i,j,k; - float f1, f2, val1, val2, val3; - - /*data-related term */ - f1 = 0.0f; - for(i=0; i<dimX*dimY*dimZ; i++) f1 += pow(D[i] - A[i],2); - - /*TV-related term */ - f2 = 0.0f; +float Obj_func3D(float *A, float *D, float *R1, float *R2, float *R3, float lambda, int dimX, int dimY, int dimZ) +{ + float val1, val2, val3; + int i,j,k,index; +#pragma omp parallel for shared(A,D,R1,R2,R3) private(index,i,j,k,val1,val2,val3) for(i=0; i<dimX; i++) { for(j=0; j<dimY; j++) { for(k=0; k<dimZ; k++) { - /* boundary conditions */ - if (i == dimX-1) {val1 = 0.0f;} else {val1 = A[(dimX*dimY)*k + (i+1)*dimY + (j)] - A[(dimX*dimY)*k + (i)*dimY + (j)];} - if (j == dimY-1) {val2 = 0.0f;} else {val2 = A[(dimX*dimY)*k + (i)*dimY + (j+1)] - A[(dimX*dimY)*k + (i)*dimY + (j)];} - if (k == dimZ-1) {val3 = 0.0f;} else {val3 = A[(dimX*dimY)*(k+1) + (i)*dimY + (j)] - A[(dimX*dimY)*k + (i)*dimY + (j)];} - f2 += sqrt(pow(val1,2) + pow(val2,2) + pow(val3,2)); - }}} - /* sum of two terms */ - funcvalA[0] = 0.5f*f1 + lambda*f2; - return *funcvalA; -} - -float Obj_func3D(float *A, float *D, float *R1, float *R2, float *R3, float lambda, int dimX, int dimY, int dimZ) -{ - float val1, val2, val3; - int i, j, k; -#pragma omp parallel for shared(A,D,R1,R2,R3) private(i,j,k,val1,val2,val3) - for (i = 0; i<dimX; i++) { - for (j = 0; j<dimY; j++) { - for (k = 0; k<dimZ; k++) { - /* boundary conditions */ - if (i == 0) { val1 = 0.0f; } - else { val1 = R1[(dimX*dimY)*k + (i - 1)*dimY + (j)]; } - if (j == 0) { val2 = 0.0f; } - else { val2 = R2[(dimX*dimY)*k + (i)*dimY + (j - 1)]; } - if (k == 0) { val3 = 0.0f; } - else { val3 = R3[(dimX*dimY)*(k - 1) + (i)*dimY + (j)]; } - D[(dimX*dimY)*k + (i)*dimY + (j)] = A[(dimX*dimY)*k + (i)*dimY + (j)] - lambda*(R1[(dimX*dimY)*k + (i)*dimY + (j)] + R2[(dimX*dimY)*k + (i)*dimY + (j)] + R3[(dimX*dimY)*k + (i)*dimY + (j)] - val1 - val2 - val3); - } - } - } - return *D; + index = (dimX*dimY)*k + j*dimX+i; + /* boundary conditions */ + if (i == 0) {val1 = 0.0f;} else {val1 = R1[(dimX*dimY)*k + j*dimX + (i-1)];} + if (j == 0) {val2 = 0.0f;} else {val2 = R2[(dimX*dimY)*k + (j-1)*dimX + i];} + if (k == 0) {val3 = 0.0f;} else {val3 = R3[(dimX*dimY)*(k-1) + j*dimX + i];} + D[index] = A[index] - lambda*(R1[index] + R2[index] + R3[index] - val1 - val2 - val3); + }}} + return *D; } float Grad_func3D(float *P1, float *P2, float *P3, float *D, float *R1, float *R2, float *R3, float lambda, int dimX, int dimY, int dimZ) { - float val1, val2, val3, multip; - int i, j, k; - multip = (1.0f / (8.0f*lambda)); -#pragma omp parallel for shared(P1,P2,P3,D,R1,R2,R3,multip) private(i,j,k,val1,val2,val3) - for (i = 0; i<dimX; i++) { - for (j = 0; j<dimY; j++) { - for (k = 0; k<dimZ; k++) { - /* boundary conditions */ - if (i == dimX - 1) val1 = 0.0f; else val1 = D[(dimX*dimY)*k + (i)*dimY + (j)] - D[(dimX*dimY)*k + (i + 1)*dimY + (j)]; - if (j == dimY - 1) val2 = 0.0f; else val2 = D[(dimX*dimY)*k + (i)*dimY + (j)] - D[(dimX*dimY)*k + (i)*dimY + (j + 1)]; - if (k == dimZ - 1) val3 = 0.0f; else val3 = D[(dimX*dimY)*k + (i)*dimY + (j)] - D[(dimX*dimY)*(k + 1) + (i)*dimY + (j)]; - P1[(dimX*dimY)*k + (i)*dimY + (j)] = R1[(dimX*dimY)*k + (i)*dimY + (j)] + multip*val1; - P2[(dimX*dimY)*k + (i)*dimY + (j)] = R2[(dimX*dimY)*k + (i)*dimY + (j)] + multip*val2; - P3[(dimX*dimY)*k + (i)*dimY + (j)] = R3[(dimX*dimY)*k + (i)*dimY + (j)] + multip*val3; - } - } - } - return 1; + float val1, val2, val3, multip; + int i,j,k, index; + multip = (1.0f/(8.0f*lambda)); +#pragma omp parallel for shared(P1,P2,P3,D,R1,R2,R3,multip) private(index,i,j,k,val1,val2,val3) + for(i=0; i<dimX; i++) { + for(j=0; j<dimY; j++) { + for(k=0; k<dimZ; k++) { + index = (dimX*dimY)*k + j*dimX+i; + /* boundary conditions */ + if (i == dimX-1) val1 = 0.0f; else val1 = D[index] - D[(dimX*dimY)*k + j*dimX + (i+1)]; + if (j == dimY-1) val2 = 0.0f; else val2 = D[index] - D[(dimX*dimY)*k + (j+1)*dimX + i]; + if (k == dimZ-1) val3 = 0.0f; else val3 = D[index] - D[(dimX*dimY)*(k+1) + j*dimX + i]; + P1[index] = R1[index] + multip*val1; + P2[index] = R2[index] + multip*val2; + P3[index] = R3[index] + multip*val3; + }}} + return 1; } -float Proj_func3D(float *P1, float *P2, float *P3, int dimX, int dimY, int dimZ) -{ - float val1, val2, val3; - int i, j, k; -#pragma omp parallel for shared(P1,P2,P3) private(i,j,k,val1,val2,val3) - for (i = 0; i<dimX; i++) { - for (j = 0; j<dimY; j++) { - for (k = 0; k<dimZ; k++) { - val1 = fabs(P1[(dimX*dimY)*k + (i)*dimY + (j)]); - val2 = fabs(P2[(dimX*dimY)*k + (i)*dimY + (j)]); - val3 = fabs(P3[(dimX*dimY)*k + (i)*dimY + (j)]); - if (val1 < 1.0f) { val1 = 1.0f; } - if (val2 < 1.0f) { val2 = 1.0f; } - if (val3 < 1.0f) { val3 = 1.0f; } - - P1[(dimX*dimY)*k + (i)*dimY + (j)] = P1[(dimX*dimY)*k + (i)*dimY + (j)] / val1; - P2[(dimX*dimY)*k + (i)*dimY + (j)] = P2[(dimX*dimY)*k + (i)*dimY + (j)] / val2; - P3[(dimX*dimY)*k + (i)*dimY + (j)] = P3[(dimX*dimY)*k + (i)*dimY + (j)] / val3; +float Proj_func3D(float *P1, float *P2, float *P3, int methTV, int DimTotal) +{ + float val1, val2, val3, denom, sq_denom; + int i; + if (methTV == 0) { + /* isotropic TV*/ + #pragma omp parallel for shared(P1,P2,P3) private(i,val1,val2,val3,sq_denom) + for(i=0; i<DimTotal; i++) { + denom = powf(P1[i],2) + powf(P2[i],2) + powf(P3[i],2); + if (denom > 1.0f) { + sq_denom = 1.0f/sqrtf(denom); + P1[i] = P1[i]*sq_denom; + P2[i] = P2[i]*sq_denom; + P3[i] = P3[i]*sq_denom; + } } + } + else { + /* anisotropic TV*/ +#pragma omp parallel for shared(P1,P2,P3) private(i,val1,val2,val3) + for(i=0; i<DimTotal; i++) { + val1 = fabs(P1[i]); + val2 = fabs(P2[i]); + val3 = fabs(P3[i]); + if (val1 < 1.0f) {val1 = 1.0f;} + if (val2 < 1.0f) {val2 = 1.0f;} + if (val3 < 1.0f) {val3 = 1.0f;} + P1[i] = P1[i]/val1; + P2[i] = P2[i]/val2; + P3[i] = P3[i]/val3; + } } - } - return 1; + return 1; } -float Rupd_func3D(float *P1, float *P1_old, float *P2, float *P2_old, float *P3, float *P3_old, float *R1, float *R2, float *R3, float tkp1, float tk, int dimX, int dimY, int dimZ) +float Rupd_func3D(float *P1, float *P1_old, float *P2, float *P2_old, float *P3, float *P3_old, float *R1, float *R2, float *R3, float tkp1, float tk, int DimTotal) { - int i, j, k; - float multip; - multip = ((tk - 1.0f) / tkp1); -#pragma omp parallel for shared(P1,P2,P3,P1_old,P2_old,P3_old,R1,R2,R3,multip) private(i,j,k) - for (i = 0; i<dimX; i++) { - for (j = 0; j<dimY; j++) { - for (k = 0; k<dimZ; k++) { - R1[(dimX*dimY)*k + (i)*dimY + (j)] = P1[(dimX*dimY)*k + (i)*dimY + (j)] + multip*(P1[(dimX*dimY)*k + (i)*dimY + (j)] - P1_old[(dimX*dimY)*k + (i)*dimY + (j)]); - R2[(dimX*dimY)*k + (i)*dimY + (j)] = P2[(dimX*dimY)*k + (i)*dimY + (j)] + multip*(P2[(dimX*dimY)*k + (i)*dimY + (j)] - P2_old[(dimX*dimY)*k + (i)*dimY + (j)]); - R3[(dimX*dimY)*k + (i)*dimY + (j)] = P3[(dimX*dimY)*k + (i)*dimY + (j)] + multip*(P3[(dimX*dimY)*k + (i)*dimY + (j)] - P3_old[(dimX*dimY)*k + (i)*dimY + (j)]); - } - } - } - return 1; + int i; + float multip; + multip = ((tk-1.0f)/tkp1); +#pragma omp parallel for shared(P1,P2,P3,P1_old,P2_old,P3_old,R1,R2,R3,multip) private(i) + for(i=0; i<DimTotal; i++) { + R1[i] = P1[i] + multip*(P1[i] - P1_old[i]); + R2[i] = P2[i] + multip*(P2[i] - P2_old[i]); + R3[i] = P3[i] + multip*(P3[i] - P3_old[i]); + } + return 1; } - - diff --git a/Core/regularizers_CPU/FGP_TV_core.h b/Core/regularizers_CPU/FGP_TV_core.h index 10ea224..37e32f7 100644 --- a/Core/regularizers_CPU/FGP_TV_core.h +++ b/Core/regularizers_CPU/FGP_TV_core.h @@ -27,46 +27,37 @@ limitations under the License. #include "CCPiDefines.h" /* C-OMP implementation of FGP-TV [1] denoising/regularization model (2D/3D case) -* -* Input Parameters: -* 1. Noisy image/volume [REQUIRED] -* 2. lambda - regularization parameter [REQUIRED] -* 3. Number of iterations [OPTIONAL parameter] -* 4. eplsilon: tolerance constant [OPTIONAL parameter] -* 5. TV-type: 'iso' or 'l1' [OPTIONAL parameter] -* -* Output: -* [1] Filtered/regularized image -* [2] last function value -* -* Example of image denoising: -* figure; -* Im = double(imread('lena_gray_256.tif'))/255; % loading image -* u0 = Im + .05*randn(size(Im)); % adding noise -* u = FGP_TV(single(u0), 0.05, 100, 1e-04); -* -* to compile with OMP support: mex FGP_TV.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp" -* This function is based on the Matlab's code and paper by -* [1] Amir Beck and Marc Teboulle, "Fast Gradient-Based Algorithms for Constrained Total Variation Image Denoising and Deblurring Problems" -* -* D. Kazantsev, 2016-17 -* -*/ + * + * Input Parameters: + * 1. Noisy image/volume + * 2. lambda - regularization parameter + * 3. Number of iterations + * 4. eplsilon: tolerance constant + * 5. TV-type: methodTV - 'iso' (0) or 'l1' (1) + * 6. nonneg: 'nonnegativity (0 is OFF by default) + * 7. print information: 0 (off) or 1 (on) + * + * Output: + * [1] Filtered/regularized image + * + * This function is based on the Matlab's code and paper by + * [1] Amir Beck and Marc Teboulle, "Fast Gradient-Based Algorithms for Constrained Total Variation Image Denoising and Deblurring Problems" + */ + #ifdef __cplusplus extern "C" { #endif -//float copyIm(float *A, float *B, int dimX, int dimY, int dimZ); +CCPI_EXPORT float TV_FGP_CPU_main(float *Input, float *Output, float lambdaPar, int iterationsNumb, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ); + CCPI_EXPORT float Obj_func2D(float *A, float *D, float *R1, float *R2, float lambda, int dimX, int dimY); CCPI_EXPORT float Grad_func2D(float *P1, float *P2, float *D, float *R1, float *R2, float lambda, int dimX, int dimY); -CCPI_EXPORT float Proj_func2D(float *P1, float *P2, int methTV, int dimX, int dimY); -CCPI_EXPORT float Rupd_func2D(float *P1, float *P1_old, float *P2, float *P2_old, float *R1, float *R2, float tkp1, float tk, int dimX, int dimY); -CCPI_EXPORT float Obj_func_CALC2D(float *A, float *D, float *funcvalA, float lambda, int dimX, int dimY); +CCPI_EXPORT float Proj_func2D(float *P1, float *P2, int methTV, int DimTotal); +CCPI_EXPORT float Rupd_func2D(float *P1, float *P1_old, float *P2, float *P2_old, float *R1, float *R2, float tkp1, float tk, int DimTotal); CCPI_EXPORT float Obj_func3D(float *A, float *D, float *R1, float *R2, float *R3, float lambda, int dimX, int dimY, int dimZ); CCPI_EXPORT float Grad_func3D(float *P1, float *P2, float *P3, float *D, float *R1, float *R2, float *R3, float lambda, int dimX, int dimY, int dimZ); -CCPI_EXPORT float Proj_func3D(float *P1, float *P2, float *P3, int dimX, int dimY, int dimZ); -CCPI_EXPORT float Rupd_func3D(float *P1, float *P1_old, float *P2, float *P2_old, float *P3, float *P3_old, float *R1, float *R2, float *R3, float tkp1, float tk, int dimX, int dimY, int dimZ); -CCPI_EXPORT float Obj_func_CALC3D(float *A, float *D, float *funcvalA, float lambda, int dimX, int dimY, int dimZ); +CCPI_EXPORT float Proj_func3D(float *P1, float *P2, float *P3, int methTV, int DimTotal); +CCPI_EXPORT float Rupd_func3D(float *P1, float *P1_old, float *P2, float *P2_old, float *P3, float *P3_old, float *R1, float *R2, float *R3, float tkp1, float tk, int DimTotal); #ifdef __cplusplus } -#endif
\ No newline at end of file +#endif diff --git a/Core/regularizers_CPU/ROF_TV_core.c b/Core/regularizers_CPU/ROF_TV_core.c index ba7fe48..a4f82a6 100644 --- a/Core/regularizers_CPU/ROF_TV_core.c +++ b/Core/regularizers_CPU/ROF_TV_core.c @@ -19,7 +19,7 @@ #include "ROF_TV_core.h" -#define EPS 1.0e-4 +#define EPS 1.0e-5 #define MAX(x, y) (((x) > (y)) ? (x) : (y)) #define MIN(x, y) (((x) < (y)) ? (x) : (y)) @@ -46,7 +46,7 @@ int sign(float x) { */ /* Running iterations of TV-ROF function */ -float TV_ROF(float *Input, float *Output, int dimX, int dimY, int dimZ, int iterationsNumb, float tau, float lambda) +float TV_ROF_CPU_main(float *Input, float *Output, float lambdaPar, int iterationsNumb, float tau, int dimX, int dimY, int dimZ) { float *D1, *D2, *D3; int i, DimTotal; @@ -68,7 +68,7 @@ float TV_ROF(float *Input, float *Output, int dimX, int dimY, int dimZ, int iter D1_func(Output, D1, dimX, dimY, dimZ); D2_func(Output, D2, dimX, dimY, dimZ); if (dimZ > 1) D3_func(Output, D3, dimX, dimY, dimZ); - TV_kernel(D1, D2, D3, Output, Input, lambda, tau, dimX, dimY, dimZ); + TV_kernel(D1, D2, D3, Output, Input, lambdaPar, tau, dimX, dimY, dimZ); } free(D1);free(D2); free(D3); return *Output; @@ -132,9 +132,9 @@ float D1_func(float *A, float *D1, int dimX, int dimY, int dimZ) NOMy_0 = A[index] - A[(j)*dimX + i2]; /* y- */ denom1 = NOMx_1*NOMx_1; - denom2 = 0.5*(sign(NOMy_1) + sign(NOMy_0))*(MIN(fabs(NOMy_1),fabs(NOMy_0))); + denom2 = 0.5f*(sign(NOMy_1) + sign(NOMy_0))*(MIN(fabs(NOMy_1),fabs(NOMy_0))); denom2 = denom2*denom2; - T1 = sqrt(denom1 + denom2 + EPS); + T1 = sqrtf(denom1 + denom2 + EPS); D1[index] = NOMx_1/T1; }} } @@ -170,11 +170,11 @@ float D2_func(float *A, float *D2, int dimX, int dimY, int dimZ) denom1 = NOMy_1*NOMy_1; - denom2 = 0.5*(sign(NOMx_1) + sign(NOMx_0))*(MIN(fabs(NOMx_1),fabs(NOMx_0))); + denom2 = 0.5f*(sign(NOMx_1) + sign(NOMx_0))*(MIN(fabs(NOMx_1),fabs(NOMx_0))); denom2 = denom2*denom2; - denom3 = 0.5*(sign(NOMz_1) + sign(NOMz_0))*(MIN(fabs(NOMz_1),fabs(NOMz_0))); + denom3 = 0.5f*(sign(NOMz_1) + sign(NOMz_0))*(MIN(fabs(NOMz_1),fabs(NOMz_0))); denom3 = denom3*denom3; - T2 = sqrt(denom1 + denom2 + denom3 + EPS); + T2 = sqrtf(denom1 + denom2 + denom3 + EPS); D2[index] = NOMy_1/T2; }}} } @@ -196,9 +196,9 @@ float D2_func(float *A, float *D2, int dimX, int dimY, int dimZ) /*NOMy_0 = A[(i)*dimY + j] - A[(i)*dimY + j2]; */ /* y- */ denom1 = NOMy_1*NOMy_1; - denom2 = 0.5*(sign(NOMx_1) + sign(NOMx_0))*(MIN(fabs(NOMx_1),fabs(NOMx_0))); + denom2 = 0.5f*(sign(NOMx_1) + sign(NOMx_0))*(MIN(fabs(NOMx_1),fabs(NOMx_0))); denom2 = denom2*denom2; - T2 = sqrt(denom1 + denom2 + EPS); + T2 = sqrtf(denom1 + denom2 + EPS); D2[index] = NOMy_1/T2; }} } @@ -233,11 +233,11 @@ float D3_func(float *A, float *D3, int dimY, int dimX, int dimZ) /*NOMz_0 = A[(dimX*dimY)*k + (i)*dimY + j] - A[(dimX*dimY)*k2 + (i)*dimY + j]; */ /* z- */ denom1 = NOMz_1*NOMz_1; - denom2 = 0.5*(sign(NOMx_1) + sign(NOMx_0))*(MIN(fabs(NOMx_1),fabs(NOMx_0))); + denom2 = 0.5f*(sign(NOMx_1) + sign(NOMx_0))*(MIN(fabs(NOMx_1),fabs(NOMx_0))); denom2 = denom2*denom2; - denom3 = 0.5*(sign(NOMy_1) + sign(NOMy_0))*(MIN(fabs(NOMy_1),fabs(NOMy_0))); + denom3 = 0.5f*(sign(NOMy_1) + sign(NOMy_0))*(MIN(fabs(NOMy_1),fabs(NOMy_0))); denom3 = denom3*denom3; - T3 = sqrt(denom1 + denom2 + denom3 + EPS); + T3 = sqrtf(denom1 + denom2 + denom3 + EPS); D3[index] = NOMz_1/T3; }}} return *D3; @@ -268,7 +268,7 @@ float TV_kernel(float *D1, float *D2, float *D3, float *B, float *A, float lambd dv2 = D2[index] - D2[(dimX*dimY)*k + j*dimX+i2]; dv3 = D3[index] - D3[(dimX*dimY)*k2 + j*dimX+i]; - B[index] = B[index] + tau*lambda*(dv1 + dv2 + dv3) + tau*(A[index] - B[index]); + B[index] = B[index] + tau*(2.0f*lambda*(dv1 + dv2 + dv3) - (B[index] - A[index])); }}} } else { @@ -284,10 +284,9 @@ float TV_kernel(float *D1, float *D2, float *D3, float *B, float *A, float lambd /* divergence components */ dv1 = D1[index] - D1[j2*dimX + i]; - dv2 = D2[index] - D2[j*dimX + i2]; - - //B[index] = B[index] + tau*lambda*(dv1 + dv2) + tau*(A[index] - B[index]); - B[index] = B[index] + tau*((dv1 + dv2) - lambda*(B[index] - A[index])); + dv2 = D2[index] - D2[j*dimX + i2]; + + B[index] = B[index] + tau*(2.0f*lambda*(dv1 + dv2) - (B[index] - A[index])); }} } return *B; diff --git a/Core/regularizers_CPU/ROF_TV_core.h b/Core/regularizers_CPU/ROF_TV_core.h index 5d69d27..14daf58 100644 --- a/Core/regularizers_CPU/ROF_TV_core.h +++ b/Core/regularizers_CPU/ROF_TV_core.h @@ -31,8 +31,8 @@ limitations under the License. * Input Parameters: * 1. Noisy image/volume [REQUIRED] * 2. lambda - regularization parameter [REQUIRED] - * 3. tau - marching step for explicit scheme, ~1 is recommended [REQUIRED] - * 4. Number of iterations, for explicit scheme >= 150 is recommended [REQUIRED] + * 3. Number of iterations, for explicit scheme >= 150 is recommended [REQUIRED] + * 4. tau - marching step for explicit scheme, ~1 is recommended [REQUIRED] * * Output: * [1] Regularized image/volume @@ -47,7 +47,7 @@ limitations under the License. extern "C" { #endif CCPI_EXPORT float TV_kernel(float *D1, float *D2, float *D3, float *B, float *A, float lambda, float tau, int dimY, int dimX, int dimZ); -CCPI_EXPORT float TV_ROF(float *Input, float *Output, int dimX, int dimY, int dimZ, int iterationsNumb, float tau, float lambda); +CCPI_EXPORT float TV_ROF_CPU_main(float *Input, float *Output, float lambdaPar, int iterationsNumb, float tau, int dimX, int dimY, int dimZ); CCPI_EXPORT float D1_func(float *A, float *D1, int dimY, int dimX, int dimZ); CCPI_EXPORT float D2_func(float *A, float *D2, int dimY, int dimX, int dimZ); CCPI_EXPORT float D3_func(float *A, float *D3, int dimY, int dimX, int dimZ); diff --git a/Core/regularizers_CPU/utils.c b/Core/regularizers_CPU/utils.c index 951fb91..0c02c45 100644 --- a/Core/regularizers_CPU/utils.c +++ b/Core/regularizers_CPU/utils.c @@ -18,6 +18,7 @@ limitations under the License. */ #include "utils.h" +#include <math.h> /* Copy Image */ float copyIm(float *A, float *U, int dimX, int dimY, int dimZ) @@ -34,7 +35,7 @@ float copyIm(float *A, float *U, int dimX, int dimY, int dimZ) float TV_energy2D(float *U, float *U0, float *E_val, float lambda, int dimX, int dimY) { int i, j, i1, j1, index; - float NOMx_2, NOMy_2, E_Grad, E_Data; + float NOMx_2, NOMy_2, E_Grad=0.0f, E_Data=0.0f; /* first calculate \grad U_xy*/ for(j=0; j<dimY; j++) { @@ -44,11 +45,11 @@ float TV_energy2D(float *U, float *U0, float *E_val, float lambda, int dimX, int i1 = i + 1; if (i == dimX-1) i1 = i; j1 = j + 1; if (j == dimY-1) j1 = j; - /* Forward differences */ - NOMx_2 = pow(U[j1*dimX + i] - U[index],2); /* x+ */ - NOMy_2 = pow(U[j*dimX + i1] - U[index],2); /* y+ */ - E_Grad += sqrt(NOMx_2 + NOMy_2); /* gradient term energy */ - E_Data += 0.5f * lambda*(pow((U[index]-U0[index]),2)); /* fidelity term energy */ + /* Forward differences */ + NOMx_2 = powf((float)(U[j1*dimX + i] - U[index]),2); /* x+ */ + NOMy_2 = powf((float)(U[j*dimX + i1] - U[index]),2); /* y+ */ + E_Grad += sqrtf((float)(NOMx_2) + (float)(NOMy_2)); /* gradient term energy */ + E_Data += 0.5f * lambda*(powf((float)(U[index]-U0[index]),2)); /* fidelity term energy */ } } E_val[0] = E_Grad + E_Data; diff --git a/Core/regularizers_GPU/TV_FGP/TV_FGP_GPU_core.cu b/Core/regularizers_GPU/TV_FGP/TV_FGP_GPU_core.cu new file mode 100755 index 0000000..61097fb --- /dev/null +++ b/Core/regularizers_GPU/TV_FGP/TV_FGP_GPU_core.cu @@ -0,0 +1,561 @@ + /* +This work is part of the Core Imaging Library developed by +Visual Analytics and Imaging System Group of the Science Technology +Facilities Council, STFC + +Copyright 2017 Daniil Kazantsev +Copyright 2017 Srikanth Nagella, Edoardo Pasca + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at +http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +#include "TV_FGP_GPU_core.h" +#include <thrust/device_vector.h> +#include <thrust/transform_reduce.h> + +/* CUDA implementation of FGP-TV [1] denoising/regularization model (2D/3D case) + * + * Input Parameters: + * 1. Noisy image/volume + * 2. lambdaPar - regularization parameter + * 3. Number of iterations + * 4. eplsilon: tolerance constant + * 5. TV-type: methodTV - 'iso' (0) or 'l1' (1) + * 6. nonneg: 'nonnegativity (0 is OFF by default) + * 7. print information: 0 (off) or 1 (on) + * + * Output: + * [1] Filtered/regularized image + * + * This function is based on the Matlab's code and paper by + * [1] Amir Beck and Marc Teboulle, "Fast Gradient-Based Algorithms for Constrained Total Variation Image Denoising and Deblurring Problems" + */ + +// This will output the proper CUDA error strings in the event that a CUDA host call returns an error +#define checkCudaErrors(err) __checkCudaErrors (err, __FILE__, __LINE__) + +inline void __checkCudaErrors(cudaError err, const char *file, const int line) +{ + if (cudaSuccess != err) + { + fprintf(stderr, "%s(%i) : CUDA Runtime API error %d: %s.\n", + file, line, (int)err, cudaGetErrorString(err)); + exit(EXIT_FAILURE); + } +} + +#define BLKXSIZE2D 16 +#define BLKYSIZE2D 16 + +#define BLKXSIZE 8 +#define BLKYSIZE 8 +#define BLKZSIZE 8 + +#define idivup(a, b) ( ((a)%(b) != 0) ? (a)/(b)+1 : (a)/(b) ) +struct square { __host__ __device__ float operator()(float x) { return x * x; } }; + +/************************************************/ +/*****************2D modules*********************/ +/************************************************/ +__global__ void Obj_func2D_kernel(float *Ad, float *D, float *R1, float *R2, int N, int M, int ImSize, float lambda) +{ + + float val1,val2; + + //calculate each thread global index + const int xIndex=blockIdx.x*blockDim.x+threadIdx.x; + const int yIndex=blockIdx.y*blockDim.y+threadIdx.y; + + int index = xIndex + N*yIndex; + + if ((xIndex < N) && (yIndex < M)) { + if (xIndex <= 0) {val1 = 0.0f;} else {val1 = R1[(xIndex-1) + N*yIndex];} + if (yIndex <= 0) {val2 = 0.0f;} else {val2 = R2[xIndex + N*(yIndex-1)];} + //Write final result to global memory + D[index] = Ad[index] - lambda*(R1[index] + R2[index] - val1 - val2); + } + return; +} + +__global__ void Grad_func2D_kernel(float *P1, float *P2, float *D, float *R1, float *R2, int N, int M, int ImSize, float multip) +{ + + float val1,val2; + + //calculate each thread global index + const int xIndex=blockIdx.x*blockDim.x+threadIdx.x; + const int yIndex=blockIdx.y*blockDim.y+threadIdx.y; + + int index = xIndex + N*yIndex; + + if ((xIndex < N) && (yIndex < M)) { + + /* boundary conditions */ + if (xIndex >= N-1) val1 = 0.0f; else val1 = D[index] - D[(xIndex+1) + N*yIndex]; + if (yIndex >= M-1) val2 = 0.0f; else val2 = D[index] - D[(xIndex) + N*(yIndex + 1)]; + + //Write final result to global memory + P1[index] = R1[index] + multip*val1; + P2[index] = R2[index] + multip*val2; + } + return; +} + +__global__ void Proj_func2D_iso_kernel(float *P1, float *P2, int N, int M, int ImSize) +{ + + float denom; + //calculate each thread global index + const int xIndex=blockIdx.x*blockDim.x+threadIdx.x; + const int yIndex=blockIdx.y*blockDim.y+threadIdx.y; + + int index = xIndex + N*yIndex; + + if ((xIndex < N) && (yIndex < M)) { + denom = pow(P1[index],2) + pow(P2[index],2); + if (denom > 1.0f) { + P1[index] = P1[index]/sqrt(denom); + P2[index] = P2[index]/sqrt(denom); + } + } + return; +} +__global__ void Proj_func2D_aniso_kernel(float *P1, float *P2, int N, int M, int ImSize) +{ + + float val1, val2; + //calculate each thread global index + const int xIndex=blockIdx.x*blockDim.x+threadIdx.x; + const int yIndex=blockIdx.y*blockDim.y+threadIdx.y; + + int index = xIndex + N*yIndex; + + if ((xIndex < N) && (yIndex < M)) { + val1 = abs(P1[index]); + val2 = abs(P2[index]); + if (val1 < 1.0f) {val1 = 1.0f;} + if (val2 < 1.0f) {val2 = 1.0f;} + P1[index] = P1[index]/val1; + P2[index] = P2[index]/val2; + } + return; +} +__global__ void Rupd_func2D_kernel(float *P1, float *P1_old, float *P2, float *P2_old, float *R1, float *R2, float tkp1, float tk, float multip2, int N, int M, int ImSize) +{ + //calculate each thread global index + const int xIndex=blockIdx.x*blockDim.x+threadIdx.x; + const int yIndex=blockIdx.y*blockDim.y+threadIdx.y; + + int index = xIndex + N*yIndex; + + if ((xIndex < N) && (yIndex < M)) { + R1[index] = P1[index] + multip2*(P1[index] - P1_old[index]); + R2[index] = P2[index] + multip2*(P2[index] - P2_old[index]); + } + return; +} +__global__ void nonneg2D_kernel(float* Output, int N, int M, int num_total) +{ + int xIndex = blockDim.x * blockIdx.x + threadIdx.x; + int yIndex = blockDim.y * blockIdx.y + threadIdx.y; + + int index = xIndex + N*yIndex; + + if (index < num_total) { + if (Output[index] < 0.0f) Output[index] = 0.0f; + } +} +__global__ void copy_kernel2D(float *Input, float* Output, int N, int M, int num_total) +{ + int xIndex = blockDim.x * blockIdx.x + threadIdx.x; + int yIndex = blockDim.y * blockIdx.y + threadIdx.y; + + int index = xIndex + N*yIndex; + + if (index < num_total) { + Output[index] = Input[index]; + } +} +__global__ void ResidCalc2D_kernel(float *Input1, float *Input2, float* Output, int N, int M, int num_total) +{ + int xIndex = blockDim.x * blockIdx.x + threadIdx.x; + int yIndex = blockDim.y * blockIdx.y + threadIdx.y; + + int index = xIndex + N*yIndex; + + if (index < num_total) { + Output[index] = Input1[index] - Input2[index]; + } +} +/************************************************/ +/*****************3D modules*********************/ +/************************************************/ +__global__ void Obj_func3D_kernel(float *Ad, float *D, float *R1, float *R2, float *R3, int N, int M, int Z, int ImSize, float lambda) +{ + + float val1,val2,val3; + + //calculate each thread global index + int i = blockDim.x * blockIdx.x + threadIdx.x; + int j = blockDim.y * blockIdx.y + threadIdx.y; + int k = blockDim.z * blockIdx.z + threadIdx.z; + + int index = (N*M)*k + i + N*j; + + if ((i < N) && (j < M) && (k < Z)) { + if (i <= 0) {val1 = 0.0f;} else {val1 = R1[(N*M)*(k) + (i-1) + N*j];} + if (j <= 0) {val2 = 0.0f;} else {val2 = R2[(N*M)*(k) + i + N*(j-1)];} + if (k <= 0) {val3 = 0.0f;} else {val3 = R3[(N*M)*(k-1) + i + N*j];} + //Write final result to global memory + D[index] = Ad[index] - lambda*(R1[index] + R2[index] + R3[index] - val1 - val2 - val3); + } + return; +} + +__global__ void Grad_func3D_kernel(float *P1, float *P2, float *P3, float *D, float *R1, float *R2, float *R3, int N, int M, int Z, int ImSize, float multip) +{ + + float val1,val2,val3; + + //calculate each thread global index + int i = blockDim.x * blockIdx.x + threadIdx.x; + int j = blockDim.y * blockIdx.y + threadIdx.y; + int k = blockDim.z * blockIdx.z + threadIdx.z; + + int index = (N*M)*k + i + N*j; + + if ((i < N) && (j < M) && (k < Z)) { + /* boundary conditions */ + if (i >= N-1) val1 = 0.0f; else val1 = D[index] - D[(N*M)*(k) + (i+1) + N*j]; + if (j >= M-1) val2 = 0.0f; else val2 = D[index] - D[(N*M)*(k) + i + N*(j+1)]; + if (k >= Z-1) val3 = 0.0f; else val3 = D[index] - D[(N*M)*(k+1) + i + N*j]; + + //Write final result to global memory + P1[index] = R1[index] + multip*val1; + P2[index] = R2[index] + multip*val2; + P3[index] = R3[index] + multip*val3; + } + return; +} + +__global__ void Proj_func3D_iso_kernel(float *P1, float *P2, float *P3, int N, int M, int Z, int ImSize) +{ + + float denom,sq_denom; + //calculate each thread global index + int i = blockDim.x * blockIdx.x + threadIdx.x; + int j = blockDim.y * blockIdx.y + threadIdx.y; + int k = blockDim.z * blockIdx.z + threadIdx.z; + + int index = (N*M)*k + i + N*j; + + if ((i < N) && (j < M) && (k < Z)) { + denom = pow(P1[index],2) + pow(P2[index],2) + pow(P3[index],2); + + if (denom > 1.0f) { + sq_denom = 1.0f/sqrt(denom); + P1[index] = P1[index]*sq_denom; + P2[index] = P2[index]*sq_denom; + P3[index] = P3[index]*sq_denom; + } + } + return; +} + +__global__ void Proj_func3D_aniso_kernel(float *P1, float *P2, float *P3, int N, int M, int Z, int ImSize) +{ + + float val1, val2, val3; + //calculate each thread global index + int i = blockDim.x * blockIdx.x + threadIdx.x; + int j = blockDim.y * blockIdx.y + threadIdx.y; + int k = blockDim.z * blockIdx.z + threadIdx.z; + + int index = (N*M)*k + i + N*j; + + if ((i < N) && (j < M) && (k < Z)) { + val1 = abs(P1[index]); + val2 = abs(P2[index]); + val3 = abs(P3[index]); + if (val1 < 1.0f) {val1 = 1.0f;} + if (val2 < 1.0f) {val2 = 1.0f;} + if (val3 < 1.0f) {val3 = 1.0f;} + P1[index] = P1[index]/val1; + P2[index] = P2[index]/val2; + P3[index] = P3[index]/val3; + } + return; +} + + +__global__ void Rupd_func3D_kernel(float *P1, float *P1_old, float *P2, float *P2_old, float *P3, float *P3_old, float *R1, float *R2, float *R3, float tkp1, float tk, float multip2, int N, int M, int Z, int ImSize) +{ + //calculate each thread global index + int i = blockDim.x * blockIdx.x + threadIdx.x; + int j = blockDim.y * blockIdx.y + threadIdx.y; + int k = blockDim.z * blockIdx.z + threadIdx.z; + + int index = (N*M)*k + i + N*j; + + if ((i < N) && (j < M) && (k < Z)) { + R1[index] = P1[index] + multip2*(P1[index] - P1_old[index]); + R2[index] = P2[index] + multip2*(P2[index] - P2_old[index]); + R3[index] = P3[index] + multip2*(P3[index] - P3_old[index]); + } + return; +} + +__global__ void nonneg3D_kernel(float* Output, int N, int M, int Z, int num_total) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + int j = blockDim.y * blockIdx.y + threadIdx.y; + int k = blockDim.z * blockIdx.z + threadIdx.z; + + int index = (N*M)*k + i + N*j; + + if (index < num_total) { + if (Output[index] < 0.0f) Output[index] = 0.0f; + } +} + +__global__ void copy_kernel3D(float *Input, float* Output, int N, int M, int Z, int num_total) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + int j = blockDim.y * blockIdx.y + threadIdx.y; + int k = blockDim.z * blockIdx.z + threadIdx.z; + + int index = (N*M)*k + i + N*j; + + if (index < num_total) { + Output[index] = Input[index]; + } +} +/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/ + +////////////MAIN HOST FUNCTION /////////////// +extern "C" void TV_FGP_GPU_main(float *Input, float *Output, float lambdaPar, int iter, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ) +{ + int deviceCount = -1; // number of devices + cudaGetDeviceCount(&deviceCount); + if (deviceCount == 0) { + fprintf(stderr, "No CUDA devices found\n"); + return; + } + + int count = 0, i; + float re, multip,multip2; + float tk = 1.0f; + float tkp1=1.0f; + + if (dimZ <= 1) { + /*2D verson*/ + int ImSize = dimX*dimY; + float *d_input, *d_update=NULL, *d_update_prev=NULL, *P1=NULL, *P2=NULL, *P1_prev=NULL, *P2_prev=NULL, *R1=NULL, *R2=NULL; + + dim3 dimBlock(BLKXSIZE2D,BLKYSIZE2D); + dim3 dimGrid(idivup(dimX,BLKXSIZE2D), idivup(dimY,BLKYSIZE2D)); + + /*allocate space for images on device*/ + checkCudaErrors( cudaMalloc((void**)&d_input,ImSize*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&d_update,ImSize*sizeof(float)) ); + if (epsil != 0.0f) checkCudaErrors( cudaMalloc((void**)&d_update_prev,ImSize*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&P1,ImSize*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&P2,ImSize*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&P1_prev,ImSize*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&P2_prev,ImSize*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&R1,ImSize*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&R2,ImSize*sizeof(float)) ); + + checkCudaErrors( cudaMemcpy(d_input,Input,ImSize*sizeof(float),cudaMemcpyHostToDevice)); + cudaMemset(P1, 0, ImSize*sizeof(float)); + cudaMemset(P2, 0, ImSize*sizeof(float)); + cudaMemset(P1_prev, 0, ImSize*sizeof(float)); + cudaMemset(P2_prev, 0, ImSize*sizeof(float)); + cudaMemset(R1, 0, ImSize*sizeof(float)); + cudaMemset(R2, 0, ImSize*sizeof(float)); + + /********************** Run CUDA 2D kernel here ********************/ + multip = (1.0f/(8.0f*lambdaPar)); + + /* The main kernel */ + for (i = 0; i < iter; i++) { + + /* computing the gradient of the objective function */ + Obj_func2D_kernel<<<dimGrid,dimBlock>>>(d_input, d_update, R1, R2, dimX, dimY, ImSize, lambdaPar); + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); + + if (nonneg != 0) { + nonneg2D_kernel<<<dimGrid,dimBlock>>>(d_update, dimX, dimY, ImSize); + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); } + + /*Taking a step towards minus of the gradient*/ + Grad_func2D_kernel<<<dimGrid,dimBlock>>>(P1, P2, d_update, R1, R2, dimX, dimY, ImSize, multip); + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); + + /* projection step */ + if (methodTV == 0) Proj_func2D_iso_kernel<<<dimGrid,dimBlock>>>(P1, P2, dimX, dimY, ImSize); /*isotropic TV*/ + else Proj_func2D_aniso_kernel<<<dimGrid,dimBlock>>>(P1, P2, dimX, dimY, ImSize); /*anisotropic TV*/ + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); + + tkp1 = (1.0f + sqrt(1.0f + 4.0f*tk*tk))*0.5f; + multip2 = ((tk-1.0f)/tkp1); + + Rupd_func2D_kernel<<<dimGrid,dimBlock>>>(P1, P1_prev, P2, P2_prev, R1, R2, tkp1, tk, multip2, dimX, dimY, ImSize); + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); + + if (epsil != 0.0f) { + /* calculate norm - stopping rules using the Thrust library */ + ResidCalc2D_kernel<<<dimGrid,dimBlock>>>(d_update, d_update_prev, P1_prev, dimX, dimY, ImSize); + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); + + thrust::device_vector<float> d_vec(P1_prev, P1_prev + ImSize); + float reduction = sqrt(thrust::transform_reduce(d_vec.begin(), d_vec.end(), square(), 0.0f, thrust::plus<float>())); + thrust::device_vector<float> d_vec2(d_update, d_update + ImSize); + float reduction2 = sqrt(thrust::transform_reduce(d_vec2.begin(), d_vec2.end(), square(), 0.0f, thrust::plus<float>())); + + re = (reduction/reduction2); + if (re < epsil) count++; + if (count > 4) break; + + copy_kernel2D<<<dimGrid,dimBlock>>>(d_update, d_update_prev, dimX, dimY, ImSize); + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); + } + + copy_kernel2D<<<dimGrid,dimBlock>>>(P1, P1_prev, dimX, dimY, ImSize); + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); + + copy_kernel2D<<<dimGrid,dimBlock>>>(P2, P2_prev, dimX, dimY, ImSize); + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); + + tk = tkp1; + } + if (printM == 1) printf("FGP-TV iterations stopped at iteration %i \n", i); + /***************************************************************/ + //copy result matrix from device to host memory + cudaMemcpy(Output,d_update,ImSize*sizeof(float),cudaMemcpyDeviceToHost); + + cudaFree(d_input); + cudaFree(d_update); + if (epsil != 0.0f) cudaFree(d_update_prev); + cudaFree(P1); + cudaFree(P2); + cudaFree(P1_prev); + cudaFree(P2_prev); + cudaFree(R1); + cudaFree(R2); + } + else { + /*3D verson*/ + int ImSize = dimX*dimY*dimZ; + float *d_input, *d_update=NULL, *P1=NULL, *P2=NULL, *P3=NULL, *P1_prev=NULL, *P2_prev=NULL, *P3_prev=NULL, *R1=NULL, *R2=NULL, *R3=NULL; + + dim3 dimBlock(BLKXSIZE,BLKYSIZE,BLKZSIZE); + dim3 dimGrid(idivup(dimX,BLKXSIZE), idivup(dimY,BLKYSIZE),idivup(dimZ,BLKZSIZE)); + + /*allocate space for images on device*/ + checkCudaErrors( cudaMalloc((void**)&d_input,ImSize*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&d_update,ImSize*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&P1,ImSize*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&P2,ImSize*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&P3,ImSize*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&P1_prev,ImSize*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&P2_prev,ImSize*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&P3_prev,ImSize*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&R1,ImSize*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&R2,ImSize*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&R3,ImSize*sizeof(float)) ); + + checkCudaErrors( cudaMemcpy(d_input,Input,ImSize*sizeof(float),cudaMemcpyHostToDevice)); + cudaMemset(P1, 0, ImSize*sizeof(float)); + cudaMemset(P2, 0, ImSize*sizeof(float)); + cudaMemset(P3, 0, ImSize*sizeof(float)); + cudaMemset(P1_prev, 0, ImSize*sizeof(float)); + cudaMemset(P2_prev, 0, ImSize*sizeof(float)); + cudaMemset(P3_prev, 0, ImSize*sizeof(float)); + cudaMemset(R1, 0, ImSize*sizeof(float)); + cudaMemset(R2, 0, ImSize*sizeof(float)); + cudaMemset(R3, 0, ImSize*sizeof(float)); + /********************** Run CUDA 3D kernel here ********************/ + multip = (1.0f/(8.0f*lambdaPar)); + + /* The main kernel */ + for (i = 0; i < iter; i++) { + + /* computing the gradient of the objective function */ + Obj_func3D_kernel<<<dimGrid,dimBlock>>>(d_input, d_update, R1, R2, R3, dimX, dimY, dimZ, ImSize, lambdaPar); + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); + + if (nonneg != 0) { + nonneg3D_kernel<<<dimGrid,dimBlock>>>(d_update, dimX, dimY, dimZ, ImSize); + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); } + + /*Taking a step towards minus of the gradient*/ + Grad_func3D_kernel<<<dimGrid,dimBlock>>>(P1, P2, P3, d_update, R1, R2, R3, dimX, dimY, dimZ, ImSize, multip); + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); + + /* projection step */ + if (methodTV == 0) Proj_func3D_iso_kernel<<<dimGrid,dimBlock>>>(P1, P2, P3, dimX, dimY, dimZ, ImSize); /* isotropic kernel */ + else Proj_func3D_aniso_kernel<<<dimGrid,dimBlock>>>(P1, P2, P3, dimX, dimY, dimZ, ImSize); /* anisotropic kernel */ + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); + + tkp1 = (1.0f + sqrt(1.0f + 4.0f*tk*tk))*0.5f; + multip2 = ((tk-1.0f)/tkp1); + + Rupd_func3D_kernel<<<dimGrid,dimBlock>>>(P1, P1_prev, P2, P2_prev, P3, P3_prev, R1, R2, R3, tkp1, tk, multip2, dimX, dimY, dimZ, ImSize); + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); + + copy_kernel3D<<<dimGrid,dimBlock>>>(P1, P1_prev, dimX, dimY, dimZ, ImSize); + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); + + copy_kernel3D<<<dimGrid,dimBlock>>>(P2, P2_prev, dimX, dimY, dimZ, ImSize); + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); + + copy_kernel3D<<<dimGrid,dimBlock>>>(P3, P3_prev, dimX, dimY, dimZ, ImSize); + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); + + tk = tkp1; + } + if (printM == 1) printf("FGP-TV iterations stopped at iteration %i \n", i); + /***************************************************************/ + //copy result matrix from device to host memory + cudaMemcpy(Output,d_update,ImSize*sizeof(float),cudaMemcpyDeviceToHost); + + cudaFree(d_input); + cudaFree(d_update); + cudaFree(P1); + cudaFree(P2); + cudaFree(P3); + cudaFree(P1_prev); + cudaFree(P2_prev); + cudaFree(P3_prev); + cudaFree(R1); + cudaFree(R2); + cudaFree(R3); + } + cudaDeviceReset(); +} diff --git a/Core/regularizers_GPU/TV_FGP/TV_FGP_GPU_core.h b/Core/regularizers_GPU/TV_FGP/TV_FGP_GPU_core.h new file mode 100755 index 0000000..107d243 --- /dev/null +++ b/Core/regularizers_GPU/TV_FGP/TV_FGP_GPU_core.h @@ -0,0 +1,10 @@ +#include <stdio.h> +#include <stdlib.h> +#include <memory.h> + +#ifndef _TV_FGP_GPU_ +#define _TV_FGP_GPU_ + +extern "C" void TV_FGP_GPU_main(float *Input, float *Output, float lambdaPar, int iter, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ); + +#endif diff --git a/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU.h b/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU.h deleted file mode 100755 index 3163482..0000000 --- a/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU.h +++ /dev/null @@ -1,7 +0,0 @@ -#ifndef __TVGPU_H__ -#define __TVGPU_H__ -#include "CCPiDefines.h" - -extern "C" CCPI_EXPORT void TV_ROF_GPU(float* Input, float* Output, int N, int M, int Z, int iter, float tau, float lambda); - -#endif diff --git a/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU.cu b/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU_core.cu index 633bf63..a30a89b 100755 --- a/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU.cu +++ b/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU_core.cu @@ -17,7 +17,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -#include "TV_ROF_GPU.h" +#include "TV_ROF_GPU_core.h" /* C-OMP implementation of ROF-TV denoising/regularization model [1] (2D/3D case) * @@ -54,7 +54,7 @@ limitations under the License. #define BLKXSIZE2D 16 #define BLKYSIZE2D 16 -#define EPS 1.0e-4 +#define EPS 1.0e-5 #define idivup(a, b) ( ((a)%(b) != 0) ? (a)/(b)+1 : (a)/(b) ) @@ -71,7 +71,7 @@ __host__ __device__ int sign (float x) /* differences 1 */ __global__ void D1_func2D(float* Input, float* D1, int N, int M) { - int i1, j1, i2, j2; + int i1, j1, i2; float NOMx_1,NOMy_1,NOMy_0,denom1,denom2,T1; int i = blockDim.x * blockIdx.x + threadIdx.x; int j = blockDim.y * blockIdx.y + threadIdx.y; @@ -84,7 +84,6 @@ __host__ __device__ int sign (float x) i1 = i + 1; if (i1 >= N) i1 = i-1; i2 = i - 1; if (i2 < 0) i2 = i+1; j1 = j + 1; if (j1 >= M) j1 = j-1; - j2 = j - 1; if (j2 < 0) j2 = j+1; /* Forward-backward differences */ NOMx_1 = Input[j1*N + i] - Input[index]; /* x+ */ @@ -102,7 +101,7 @@ __host__ __device__ int sign (float x) /* differences 2 */ __global__ void D2_func2D(float* Input, float* D2, int N, int M) { - int i1, j1, i2, j2; + int i1, j1, j2; float NOMx_1,NOMy_1,NOMx_0,denom1,denom2,T2; int i = blockDim.x * blockIdx.x + threadIdx.x; int j = blockDim.y * blockIdx.y + threadIdx.y; @@ -113,7 +112,6 @@ __host__ __device__ int sign (float x) /* boundary conditions (Neumann reflections) */ i1 = i + 1; if (i1 >= N) i1 = i-1; - i2 = i - 1; if (i2 < 0) i2 = i+1; j1 = j + 1; if (j1 >= M) j1 = j-1; j2 = j - 1; if (j2 < 0) j2 = j+1; @@ -149,7 +147,7 @@ __host__ __device__ int sign (float x) dv1 = D1[index] - D1[j2*N + i]; dv2 = D2[index] - D2[j*N + i2]; - Update[index] = Update[index] + tau*((dv1 + dv2) - lambda*(Update[index] - Input[index])); + Update[index] = Update[index] + tau*(2.0f*lambda*(dv1 + dv2) - (Update[index] - Input[index])); } } @@ -299,21 +297,21 @@ __host__ __device__ int sign (float x) dv2 = D2[index] - D2[(dimX*dimY)*k + j*dimX+i2]; dv3 = D3[index] - D3[(dimX*dimY)*k2 + j*dimX+i]; - Update[index] = Update[index] + tau*lambda*(dv1 + dv2 + dv3) + tau*(Update[index] - Input[index]); + Update[index] = Update[index] + tau*(2.0f*lambda*(dv1 + dv2 + dv3) - (Update[index] - Input[index])); } } ///////////////////////////////////////////////// // HOST FUNCTION -extern "C" void TV_ROF_GPU(float* Input, float* Output, int N, int M, int Z, int iter, float tau, float lambda) +extern "C" void TV_ROF_GPU_main(float* Input, float* Output, float lambdaPar, int iter, float tau, int N, int M, int Z) { // set up device int dev = 0; CHECK(cudaSetDevice(dev)); - float *d_input, *d_update, *d_D1, *d_D2; + if (Z == 0) Z = 1; CHECK(cudaMalloc((void**)&d_input,N*M*Z*sizeof(float))); CHECK(cudaMalloc((void**)&d_update,N*M*Z*sizeof(float))); CHECK(cudaMalloc((void**)&d_D1,N*M*Z*sizeof(float))); @@ -332,32 +330,32 @@ extern "C" void TV_ROF_GPU(float* Input, float* Output, int N, int M, int Z, int for(int n=0; n < iter; n++) { /* calculate differences */ - D1_func3D<<<dimGrid,dimBlock>>>(d_update, d_D1, N, M, Z); + D1_func3D<<<dimGrid,dimBlock>>>(d_update, d_D1, N, M, Z); CHECK(cudaDeviceSynchronize()); - D2_func3D<<<dimGrid,dimBlock>>>(d_update, d_D2, N, M, Z); + D2_func3D<<<dimGrid,dimBlock>>>(d_update, d_D2, N, M, Z); CHECK(cudaDeviceSynchronize()); - D3_func3D<<<dimGrid,dimBlock>>>(d_update, d_D3, N, M, Z); + D3_func3D<<<dimGrid,dimBlock>>>(d_update, d_D3, N, M, Z); CHECK(cudaDeviceSynchronize()); /*running main kernel*/ - TV_kernel3D<<<dimGrid,dimBlock>>>(d_D1, d_D2, d_D3, d_update, d_input, lambda, tau, N, M, Z); + TV_kernel3D<<<dimGrid,dimBlock>>>(d_D1, d_D2, d_D3, d_update, d_input, lambdaPar, tau, N, M, Z); CHECK(cudaDeviceSynchronize()); } CHECK(cudaFree(d_D3)); } else { - // TV - 2D case + // TV - 2D case dim3 dimBlock(BLKXSIZE2D,BLKYSIZE2D); dim3 dimGrid(idivup(N,BLKXSIZE2D), idivup(M,BLKYSIZE2D)); for(int n=0; n < iter; n++) { /* calculate differences */ - D1_func2D<<<dimGrid,dimBlock>>>(d_update, d_D1, N, M); + D1_func2D<<<dimGrid,dimBlock>>>(d_update, d_D1, N, M); CHECK(cudaDeviceSynchronize()); - D2_func2D<<<dimGrid,dimBlock>>>(d_update, d_D2, N, M); + D2_func2D<<<dimGrid,dimBlock>>>(d_update, d_D2, N, M); CHECK(cudaDeviceSynchronize()); /*running main kernel*/ - TV_kernel2D<<<dimGrid,dimBlock>>>(d_D1, d_D2, d_update, d_input, lambda, tau, N, M); + TV_kernel2D<<<dimGrid,dimBlock>>>(d_D1, d_D2, d_update, d_input, lambdaPar, tau, N, M); CHECK(cudaDeviceSynchronize()); } } diff --git a/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU_core.h b/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU_core.h new file mode 100755 index 0000000..d772aba --- /dev/null +++ b/Core/regularizers_GPU/TV_ROF/TV_ROF_GPU_core.h @@ -0,0 +1,8 @@ +#ifndef __TVGPU_H__ +#define __TVGPU_H__ +#include "CCPiDefines.h" +#include <stdio.h> + +extern "C" CCPI_EXPORT void TV_ROF_GPU_main(float* Input, float* Output, float lambdaPar, int iter, float tau, int N, int M, int Z); + +#endif diff --git a/Wrappers/Matlab/mex_compile/regularizers_CPU/FGP_TV.c b/Wrappers/Matlab/mex_compile/regularizers_CPU/FGP_TV.c index 30cea1a..7cc861a 100644 --- a/Wrappers/Matlab/mex_compile/regularizers_CPU/FGP_TV.c +++ b/Wrappers/Matlab/mex_compile/regularizers_CPU/FGP_TV.c @@ -53,9 +53,9 @@ void mexFunction( int nrhs, const mxArray *prhs[]) { - int number_of_dims, iter, dimX, dimY, dimZ, ll, j, count, methTV; + int number_of_dims, iter, dimX, dimY, dimZ, methTV; const int *dim_array; - float *A, *D=NULL, *D_old=NULL, *P1=NULL, *P2=NULL, *P3=NULL, *P1_old=NULL, *P2_old=NULL, *P3_old=NULL, *R1=NULL, *R2=NULL, *R3=NULL, lambda, tk, tkp1, re, re1, re_old, epsil; + float *Input, *Output, lambda, epsil; number_of_dims = mxGetNumberOfDimensions(prhs[0]); dim_array = mxGetDimensions(prhs[0]); @@ -63,9 +63,9 @@ void mexFunction( /*Handling Matlab input data*/ if ((nrhs < 2) || (nrhs > 5)) mexErrMsgTxt("At least 2 parameters is required: Image(2D/3D), Regularization parameter. The full list of parameters: Image(2D/3D), Regularization parameter, iterations number, tolerance, penalty type ('iso' or 'l1')"); - A = (float *) mxGetData(prhs[0]); /*noisy image (2D/3D) */ + Input = (float *) mxGetData(prhs[0]); /*noisy image (2D/3D) */ lambda = (float) mxGetScalar(prhs[1]); /* regularization parameter */ - iter = 50; /* default iterations number */ + iter = 300; /* default iterations number */ epsil = 0.0001; /* default tolerance constant */ methTV = 0; /* default isotropic TV penalty */ @@ -78,139 +78,17 @@ void mexFunction( if (strcmp(penalty_type, "l1") == 0) methTV = 1; /* enable 'l1' penalty */ mxFree(penalty_type); } - /*output function value (last iteration) */ - plhs[1] = mxCreateNumericMatrix(1, 1, mxSINGLE_CLASS, mxREAL); - float *funcvalA = (float *) mxGetData(plhs[1]); if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); } /*Handling Matlab output data*/ - dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2]; - - tk = 1.0f; - tkp1=1.0f; - count = 0; - re_old = 0.0f; + dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2]; if (number_of_dims == 2) { dimZ = 1; /*2D case*/ - D = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); - D_old = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); - P1 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); - P2 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); - P1_old = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); - P2_old = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); - R1 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); - R2 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); - - /* begin iterations */ - for(ll=0; ll<iter; ll++) { - - /* computing the gradient of the objective function */ - Obj_func2D(A, D, R1, R2, lambda, dimX, dimY); - - /*Taking a step towards minus of the gradient*/ - Grad_func2D(P1, P2, D, R1, R2, lambda, dimX, dimY); - - /* projection step */ - Proj_func2D(P1, P2, methTV, dimX, dimY); - - /*updating R and t*/ - tkp1 = (1.0f + sqrt(1.0f + 4.0f*tk*tk))*0.5f; - Rupd_func2D(P1, P1_old, P2, P2_old, R1, R2, tkp1, tk, dimX, dimY); - - /* calculate norm */ - re = 0.0f; re1 = 0.0f; - for(j=0; j<dimX*dimY*dimZ; j++) - { - re += pow(D[j] - D_old[j],2); - re1 += pow(D[j],2); - } - re = sqrt(re)/sqrt(re1); - if (re < epsil) count++; - if (count > 4) { - Obj_func_CALC2D(A, D, funcvalA, lambda, dimX, dimY); - break; } - - /* check that the residual norm is decreasing */ - if (ll > 2) { - if (re > re_old) { - Obj_func_CALC2D(A, D, funcvalA, lambda, dimX, dimY); - break; }} - re_old = re; - /*printf("%f %i %i \n", re, ll, count); */ - - /*storing old values*/ - copyIm(D, D_old, dimX, dimY, dimZ); - copyIm(P1, P1_old, dimX, dimY, dimZ); - copyIm(P2, P2_old, dimX, dimY, dimZ); - tk = tkp1; - - /* calculating the objective function value */ - if (ll == (iter-1)) Obj_func_CALC2D(A, D, funcvalA, lambda, dimX, dimY); - } - printf("FGP-TV iterations stopped at iteration %i with the function value %f \n", ll, funcvalA[0]); - } - if (number_of_dims == 3) { - D = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); - D_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); - P1 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); - P2 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); - P3 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); - P1_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); - P2_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); - P3_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); - R1 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); - R2 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); - R3 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); - - /* begin iterations */ - for(ll=0; ll<iter; ll++) { - - /* computing the gradient of the objective function */ - Obj_func3D(A, D, R1, R2, R3,lambda, dimX, dimY, dimZ); - - /*Taking a step towards minus of the gradient*/ - Grad_func3D(P1, P2, P3, D, R1, R2, R3, lambda, dimX, dimY, dimZ); - - /* projection step */ - Proj_func3D(P1, P2, P3, dimX, dimY, dimZ); - - /*updating R and t*/ - tkp1 = (1.0f + sqrt(1.0f + 4.0f*tk*tk))*0.5f; - Rupd_func3D(P1, P1_old, P2, P2_old, P3, P3_old, R1, R2, R3, tkp1, tk, dimX, dimY, dimZ); - - /* calculate norm - stopping rules*/ - re = 0.0f; re1 = 0.0f; - for(j=0; j<dimX*dimY*dimZ; j++) - { - re += pow(D[j] - D_old[j],2); - re1 += pow(D[j],2); - } - re = sqrt(re)/sqrt(re1); - /* stop if the norm residual is less than the tolerance EPS */ - if (re < epsil) count++; - if (count > 3) { - Obj_func_CALC3D(A, D, funcvalA, lambda, dimX, dimY, dimZ); - break;} - - /* check that the residual norm is decreasing */ - if (ll > 2) { - if (re > re_old) { - Obj_func_CALC3D(A, D, funcvalA, lambda, dimX, dimY, dimZ); - }} - re_old = re; - /*printf("%f %i %i \n", re, ll, count); */ - - /*storing old values*/ - copyIm(D, D_old, dimX, dimY, dimZ); - copyIm(P1, P1_old, dimX, dimY, dimZ); - copyIm(P2, P2_old, dimX, dimY, dimZ); - copyIm(P3, P3_old, dimX, dimY, dimZ); - tk = tkp1; - - if (ll == (iter-1)) Obj_func_CALC3D(A, D, funcvalA, lambda, dimX, dimY, dimZ); - } - printf("FGP-TV iterations stopped at iteration %i with the function value %f \n", ll, funcvalA[0]); - } + Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); + TV_FGP_CPU_main(Input, Output, lambda, iter, epsil, methTV, 0, 0, dimX, dimY, dimZ); + } + if (number_of_dims == 3) { + } } diff --git a/Wrappers/Python/ccpi/filters/regularizers.py b/Wrappers/Python/ccpi/filters/regularizers.py new file mode 100644 index 0000000..d6dfa8c --- /dev/null +++ b/Wrappers/Python/ccpi/filters/regularizers.py @@ -0,0 +1,44 @@ +""" +script which assigns a proper device core function based on a flag ('cpu' or 'gpu') +""" + +from ccpi.filters.cpu_regularizers_cython import TV_ROF_CPU, TV_FGP_CPU +from ccpi.filters.gpu_regularizers import TV_ROF_GPU, TV_FGP_GPU + +def ROF_TV(inputData, regularization_parameter, iterations, + time_marching_parameter,device='cpu'): + if device == 'cpu': + return TV_ROF_CPU(inputData, + regularization_parameter, + iterations, + time_marching_parameter) + elif device == 'gpu': + return TV_ROF_GPU(inputData, + regularization_parameter, + iterations, + time_marching_parameter) + else: + raise ValueError('Unknown device {0}. Expecting gpu or cpu'\ + .format(device)) + +def FGP_TV(inputData, regularization_parameter,iterations, + tolerance_param, methodTV, nonneg, printM, device='cpu'): + if device == 'cpu': + return TV_FGP_CPU(inputData, + regularization_parameter, + iterations, + tolerance_param, + methodTV, + nonneg, + printM) + elif device == 'gpu': + return TV_FGP_GPU(inputData, + regularization_parameter, + iterations, + tolerance_param, + methodTV, + nonneg, + printM) + else: + raise ValueError('Unknown device {0}. Expecting gpu or cpu'\ + .format(device))
\ No newline at end of file diff --git a/Wrappers/Python/demo/test_cpu_regularizers.py b/Wrappers/Python/demo/test_cpu_regularizers.py index 5908c3c..76b9de7 100644 --- a/Wrappers/Python/demo/test_cpu_regularizers.py +++ b/Wrappers/Python/demo/test_cpu_regularizers.py @@ -11,11 +11,8 @@ import numpy as np import os from enum import Enum import timeit -from ccpi.filters.cpu_regularizers_boost import SplitBregman_TV , FGP_TV ,\ - LLT_model, PatchBased_Regul ,\ - TGV_PD -from ccpi.filters.cpu_regularizers_cython import ROF_TV - +from ccpi.filters.cpu_regularizers_boost import SplitBregman_TV, LLT_model, PatchBased_Regul, TGV_PD +from ccpi.filters.regularizers import ROF_TV, FGP_TV ############################################################################### #https://stackoverflow.com/questions/13875989/comparing-image-in-url-to-image-in-filesystem-in-python/13884956#13884956 #NRMSE a normalization of the root of the mean squared error @@ -112,12 +109,10 @@ rms = rmse(Im, splitbregman) pars['rmse'] = rms txtstr = printParametersToString(pars) txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time) -print (txtstr) - +print (txtstr) a=fig.add_subplot(2,4,2) - # these are matplotlib.patch.Patch properties props = dict(boxstyle='round', facecolor='wheat', alpha=0.5) # place a text box in upper left in axes coords @@ -128,23 +123,26 @@ imgplot = plt.imshow(splitbregman,\ ) ###################### FGP_TV ######################################### -# u = FGP_TV(single(u0), 0.05, 100, 1e-04); + start_time = timeit.default_timer() pars = {'algorithm' : FGP_TV , \ - 'input' : u0, - 'regularization_parameter':0.05, \ - 'number_of_iterations' :200 ,\ - 'tolerance_constant':1e-4,\ - 'TV_penalty': 0 -} + 'input' : u0,\ + 'regularization_parameter':0.07, \ + 'number_of_iterations' :300 ,\ + 'tolerance_constant':0.00001,\ + 'methodTV': 0 ,\ + 'nonneg': 0 ,\ + 'printingOut': 0 + } -out = FGP_TV (pars['input'], +fgp = FGP_TV(pars['input'], pars['regularization_parameter'], pars['number_of_iterations'], pars['tolerance_constant'], - pars['TV_penalty']) + pars['methodTV'], + pars['nonneg'], + pars['printingOut'], 'cpu') -fgp = out[0] rms = rmse(Im, fgp) pars['rmse'] = rms @@ -165,8 +163,8 @@ imgplot = plt.imshow(fgp, \ a.text(0.05, 0.95, txtstr, transform=a.transAxes, fontsize=14, verticalalignment='top', bbox=props) -###################### LLT_model ######################################### +###################### LLT_model ######################################### start_time = timeit.default_timer() pars = {'algorithm': LLT_model , \ @@ -201,8 +199,6 @@ a.text(0.05, 0.95, txtstr, transform=a.transAxes, fontsize=14, imgplot = plt.imshow(llt,\ cmap="gray" ) - - # ###################### PatchBased_Regul ######################################### # # Quick 2D denoising example in Matlab: # # Im = double(imread('lena_gray_256.tif'))/255; % loading image @@ -284,16 +280,15 @@ start_time = timeit.default_timer() pars = {'algorithm': ROF_TV , \ 'input' : u0,\ - 'regularization_parameter':1,\ - 'marching_step': 0.003,\ + 'regularization_parameter':0.07,\ + 'marching_step': 0.0025,\ 'number_of_iterations': 300 } rof = ROF_TV(pars['input'], - pars['number_of_iterations'], - pars['regularization_parameter'], - pars['marching_step'] - ) -#tgv = out + pars['regularization_parameter'], + pars['number_of_iterations'], + pars['marching_step'], 'cpu') + rms = rmse(Im, rof) pars['rmse'] = rms @@ -307,9 +302,7 @@ props = dict(boxstyle='round', facecolor='wheat', alpha=0.5) # place a text box in upper left in axes coords a.text(0.05, 0.95, txtstr, transform=a.transAxes, fontsize=14, verticalalignment='top', bbox=props) -imgplot = plt.imshow(tgv, cmap="gray") - - +imgplot = plt.imshow(rof, cmap="gray") plt.show() diff --git a/Wrappers/Python/setup-regularizers.py.in b/Wrappers/Python/setup-regularizers.py.in index a125261..8655a2e 100644 --- a/Wrappers/Python/setup-regularizers.py.in +++ b/Wrappers/Python/setup-regularizers.py.in @@ -34,7 +34,9 @@ extra_libraries = ['cilreg'] extra_include_dirs += [os.path.join(".." , ".." , "Core"), os.path.join(".." , ".." , "Core", "regularizers_CPU"), - os.path.join(".." , ".." , "Core", "regularizers_GPU") , + os.path.join(".." , ".." , "Core", "regularizers_GPU" , "Diffus_HO" ) , + os.path.join(".." , ".." , "Core", "regularizers_GPU" , "NL_Regul" ) , + os.path.join(".." , ".." , "Core", "regularizers_GPU" , "TV_ROF" ) , "."] if platform.system() == 'Windows': @@ -81,4 +83,4 @@ setup( ) -@SETUP_GPU_WRAPPERS@
\ No newline at end of file +@SETUP_GPU_WRAPPERS@ diff --git a/Wrappers/Python/src/cpu_regularizers.cpp b/Wrappers/Python/src/cpu_regularizers.cpp index e311570..3529ebd 100644 --- a/Wrappers/Python/src/cpu_regularizers.cpp +++ b/Wrappers/Python/src/cpu_regularizers.cpp @@ -27,7 +27,6 @@ limitations under the License. #include "boost/tuple/tuple.hpp" #include "SplitBregman_TV_core.h" -#include "FGP_TV_core.h" #include "LLT_model_core.h" #include "PatchBased_Regul_core.h" #include "TGV_PD_core.h" @@ -303,292 +302,6 @@ bp::list SplitBregman_TV(np::ndarray input, double d_mu, int iter, double d_epsi } - - -bp::list FGP_TV(np::ndarray input, double d_mu, int iter, double d_epsil, int methTV) { - - // the result is in the following list - bp::list result; - - int number_of_dims, dimX, dimY, dimZ, ll, j, count; - float *A, *D = NULL, *D_old = NULL, *P1 = NULL, *P2 = NULL, *P3 = NULL, *P1_old = NULL, *P2_old = NULL, *P3_old = NULL, *R1 = NULL, *R2 = NULL, *R3 = NULL; - float lambda, tk, tkp1, re, re1, re_old, epsil, funcval; - - //number_of_dims = mxGetNumberOfDimensions(prhs[0]); - //dim_array = mxGetDimensions(prhs[0]); - - number_of_dims = input.get_nd(); - int dim_array[3]; - - dim_array[0] = input.shape(0); - dim_array[1] = input.shape(1); - if (number_of_dims == 2) { - dim_array[2] = -1; - } - else { - dim_array[2] = input.shape(2); - } - // Parameter handling is be done in Python - ///*Handling Matlab input data*/ - //if ((nrhs < 2) || (nrhs > 5)) mexErrMsgTxt("At least 2 parameters is required: Image(2D/3D), Regularization parameter. The full list of parameters: Image(2D/3D), Regularization parameter, iterations number, tolerance, penalty type ('iso' or 'l1')"); - - ///*Handling Matlab input data*/ - //A = (float *)mxGetData(prhs[0]); /*noisy image (2D/3D) */ - A = reinterpret_cast<float *>(input.get_data()); - - //mu = (float)mxGetScalar(prhs[1]); /* regularization parameter */ - lambda = (float)d_mu; - - //iter = 35; /* default iterations number */ - - //epsil = 0.0001; /* default tolerance constant */ - epsil = (float)d_epsil; - //methTV = 0; /* default isotropic TV penalty */ - //if ((nrhs == 3) || (nrhs == 4) || (nrhs == 5)) iter = (int)mxGetScalar(prhs[2]); /* iterations number */ - //if ((nrhs == 4) || (nrhs == 5)) epsil = (float)mxGetScalar(prhs[3]); /* tolerance constant */ - //if (nrhs == 5) { - // char *penalty_type; - // penalty_type = mxArrayToString(prhs[4]); /* choosing TV penalty: 'iso' or 'l1', 'iso' is the default */ - // if ((strcmp(penalty_type, "l1") != 0) && (strcmp(penalty_type, "iso") != 0)) mexErrMsgTxt("Choose TV type: 'iso' or 'l1',"); - // if (strcmp(penalty_type, "l1") == 0) methTV = 1; /* enable 'l1' penalty */ - // mxFree(penalty_type); - //} - //if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) { mexErrMsgTxt("The input image must be in a single precision"); } - - //plhs[1] = mxCreateNumericMatrix(1, 1, mxSINGLE_CLASS, mxREAL); - bp::tuple shape1 = bp::make_tuple(dim_array[0], dim_array[1]); - np::dtype dtype = np::dtype::get_builtin<float>(); - np::ndarray out1 = np::zeros(shape1, dtype); - - //float *funcvalA = (float *)mxGetData(plhs[1]); - float * funcvalA = reinterpret_cast<float *>(out1.get_data()); - //if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) { mexErrMsgTxt("The input image must be in a single precision"); } - - /*Handling Matlab output data*/ - dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2]; - - tk = 1.0f; - tkp1 = 1.0f; - count = 1; - re_old = 0.0f; - - if (number_of_dims == 2) { - dimZ = 1; /*2D case*/ - /*D = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); - D_old = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); - P1 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); - P2 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); - P1_old = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); - P2_old = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); - R1 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); - R2 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));*/ - - bp::tuple shape = bp::make_tuple(dim_array[0], dim_array[1]); - np::dtype dtype = np::dtype::get_builtin<float>(); - - - np::ndarray npD = np::zeros(shape, dtype); - np::ndarray npD_old = np::zeros(shape, dtype); - np::ndarray npP1 = np::zeros(shape, dtype); - np::ndarray npP2 = np::zeros(shape, dtype); - np::ndarray npP1_old = np::zeros(shape, dtype); - np::ndarray npP2_old = np::zeros(shape, dtype); - np::ndarray npR1 = np::zeros(shape, dtype); - np::ndarray npR2 = np::zeros(shape, dtype); - - D = reinterpret_cast<float *>(npD.get_data()); - D_old = reinterpret_cast<float *>(npD_old.get_data()); - P1 = reinterpret_cast<float *>(npP1.get_data()); - P2 = reinterpret_cast<float *>(npP2.get_data()); - P1_old = reinterpret_cast<float *>(npP1_old.get_data()); - P2_old = reinterpret_cast<float *>(npP2_old.get_data()); - R1 = reinterpret_cast<float *>(npR1.get_data()); - R2 = reinterpret_cast<float *>(npR2.get_data()); - - /* begin iterations */ - for (ll = 0; ll<iter; ll++) { - /* computing the gradient of the objective function */ - Obj_func2D(A, D, R1, R2, lambda, dimX, dimY); - - /*Taking a step towards minus of the gradient*/ - Grad_func2D(P1, P2, D, R1, R2, lambda, dimX, dimY); - - /* projection step */ - Proj_func2D(P1, P2, methTV, dimX, dimY); - - /*updating R and t*/ - tkp1 = (1.0f + sqrt(1.0f + 4.0f*tk*tk))*0.5f; - Rupd_func2D(P1, P1_old, P2, P2_old, R1, R2, tkp1, tk, dimX, dimY); - - /* calculate norm */ - re = 0.0f; re1 = 0.0f; - for (j = 0; j<dimX*dimY*dimZ; j++) - { - re += pow(D[j] - D_old[j], 2); - re1 += pow(D[j], 2); - } - re = sqrt(re) / sqrt(re1); - if (re < epsil) count++; - if (count > 4) { - Obj_func2D(A, D, P1, P2, lambda, dimX, dimY); - funcval = 0.0f; - for (j = 0; j<dimX*dimY*dimZ; j++) funcval += pow(D[j], 2); - //funcvalA[0] = sqrt(funcval); - float fv = sqrt(funcval); - std::memcpy(funcvalA, &fv, sizeof(float)); - break; - } - - /* check that the residual norm is decreasing */ - if (ll > 2) { - if (re > re_old) { - Obj_func2D(A, D, P1, P2, lambda, dimX, dimY); - funcval = 0.0f; - for (j = 0; j<dimX*dimY*dimZ; j++) funcval += pow(D[j], 2); - //funcvalA[0] = sqrt(funcval); - float fv = sqrt(funcval); - std::memcpy(funcvalA, &fv, sizeof(float)); - break; - } - } - re_old = re; - /*printf("%f %i %i \n", re, ll, count); */ - - /*storing old values*/ - copyIm(D, D_old, dimX, dimY, dimZ); - copyIm(P1, P1_old, dimX, dimY, dimZ); - copyIm(P2, P2_old, dimX, dimY, dimZ); - tk = tkp1; - - /* calculating the objective function value */ - if (ll == (iter - 1)) { - Obj_func2D(A, D, P1, P2, lambda, dimX, dimY); - funcval = 0.0f; - for (j = 0; j<dimX*dimY*dimZ; j++) funcval += pow(D[j], 2); - //funcvalA[0] = sqrt(funcval); - float fv = sqrt(funcval); - std::memcpy(funcvalA, &fv, sizeof(float)); - } - } - //printf("FGP-TV iterations stopped at iteration %i with the function value %f \n", ll, funcvalA[0]); - result.append<np::ndarray>(npD); - result.append<np::ndarray>(out1); - result.append<int>(ll); - } - if (number_of_dims == 3) { - /*D = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); - D_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); - P1 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); - P2 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); - P3 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); - P1_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); - P2_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); - P3_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); - R1 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); - R2 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); - R3 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));*/ - bp::tuple shape = bp::make_tuple(dim_array[0], dim_array[1], dim_array[2]); - np::dtype dtype = np::dtype::get_builtin<float>(); - - np::ndarray npD = np::zeros(shape, dtype); - np::ndarray npD_old = np::zeros(shape, dtype); - np::ndarray npP1 = np::zeros(shape, dtype); - np::ndarray npP2 = np::zeros(shape, dtype); - np::ndarray npP3 = np::zeros(shape, dtype); - np::ndarray npP1_old = np::zeros(shape, dtype); - np::ndarray npP2_old = np::zeros(shape, dtype); - np::ndarray npP3_old = np::zeros(shape, dtype); - np::ndarray npR1 = np::zeros(shape, dtype); - np::ndarray npR2 = np::zeros(shape, dtype); - np::ndarray npR3 = np::zeros(shape, dtype); - - D = reinterpret_cast<float *>(npD.get_data()); - D_old = reinterpret_cast<float *>(npD_old.get_data()); - P1 = reinterpret_cast<float *>(npP1.get_data()); - P2 = reinterpret_cast<float *>(npP2.get_data()); - P3 = reinterpret_cast<float *>(npP3.get_data()); - P1_old = reinterpret_cast<float *>(npP1_old.get_data()); - P2_old = reinterpret_cast<float *>(npP2_old.get_data()); - P3_old = reinterpret_cast<float *>(npP3_old.get_data()); - R1 = reinterpret_cast<float *>(npR1.get_data()); - R2 = reinterpret_cast<float *>(npR2.get_data()); - R3 = reinterpret_cast<float *>(npR3.get_data()); - /* begin iterations */ - for (ll = 0; ll<iter; ll++) { - /* computing the gradient of the objective function */ - Obj_func3D(A, D, R1, R2, R3, lambda, dimX, dimY, dimZ); - /*Taking a step towards minus of the gradient*/ - Grad_func3D(P1, P2, P3, D, R1, R2, R3, lambda, dimX, dimY, dimZ); - - /* projection step */ - Proj_func3D(P1, P2, P3, dimX, dimY, dimZ); - - /*updating R and t*/ - tkp1 = (1.0f + sqrt(1.0f + 4.0f*tk*tk))*0.5f; - Rupd_func3D(P1, P1_old, P2, P2_old, P3, P3_old, R1, R2, R3, tkp1, tk, dimX, dimY, dimZ); - - /* calculate norm - stopping rules*/ - re = 0.0f; re1 = 0.0f; - for (j = 0; j<dimX*dimY*dimZ; j++) - { - re += pow(D[j] - D_old[j], 2); - re1 += pow(D[j], 2); - } - re = sqrt(re) / sqrt(re1); - /* stop if the norm residual is less than the tolerance EPS */ - if (re < epsil) count++; - if (count > 3) { - Obj_func3D(A, D, P1, P2, P3, lambda, dimX, dimY, dimZ); - funcval = 0.0f; - for (j = 0; j<dimX*dimY*dimZ; j++) funcval += pow(D[j], 2); - //funcvalA[0] = sqrt(funcval); - float fv = sqrt(funcval); - std::memcpy(funcvalA, &fv, sizeof(float)); - break; - } - - /* check that the residual norm is decreasing */ - if (ll > 2) { - if (re > re_old) { - Obj_func3D(A, D, P1, P2, P3, lambda, dimX, dimY, dimZ); - funcval = 0.0f; - for (j = 0; j<dimX*dimY*dimZ; j++) funcval += pow(D[j], 2); - //funcvalA[0] = sqrt(funcval); - float fv = sqrt(funcval); - std::memcpy(funcvalA, &fv, sizeof(float)); - break; - } - } - - re_old = re; - /*printf("%f %i %i \n", re, ll, count); */ - - /*storing old values*/ - copyIm(D, D_old, dimX, dimY, dimZ); - copyIm(P1, P1_old, dimX, dimY, dimZ); - copyIm(P2, P2_old, dimX, dimY, dimZ); - copyIm(P3, P3_old, dimX, dimY, dimZ); - tk = tkp1; - - if (ll == (iter - 1)) { - Obj_func3D(A, D, P1, P2, P3, lambda, dimX, dimY, dimZ); - funcval = 0.0f; - for (j = 0; j<dimX*dimY*dimZ; j++) funcval += pow(D[j], 2); - //funcvalA[0] = sqrt(funcval); - float fv = sqrt(funcval); - std::memcpy(funcvalA, &fv, sizeof(float)); - } - - } - //printf("FGP-TV iterations stopped at iteration %i with the function value %f \n", ll, funcvalA[0]); - result.append<np::ndarray>(npD); - result.append<np::ndarray>(out1); - result.append<int>(ll); - } - - return result; -} - bp::list LLT_model(np::ndarray input, double d_lambda, double d_tau, int iter, double d_epsil, int switcher) { // the result is in the following list bp::list result; @@ -1022,9 +735,6 @@ bp::list TGV_PD(np::ndarray input, double d_lambda, double d_alpha1, double d_al result.append<np::ndarray>(npU); } - - - return result; } @@ -1040,7 +750,6 @@ BOOST_PYTHON_MODULE(cpu_regularizers_boost) np::dtype dt2 = np::dtype::get_builtin<uint16_t>(); def("SplitBregman_TV", SplitBregman_TV); - def("FGP_TV", FGP_TV); def("LLT_model", LLT_model); def("PatchBased_Regul", PatchBased_Regul); def("TGV_PD", TGV_PD); diff --git a/Wrappers/Python/src/cpu_regularizers.pyx b/Wrappers/Python/src/cpu_regularizers.pyx index e7ff78c..60e8627 100644 --- a/Wrappers/Python/src/cpu_regularizers.pyx +++ b/Wrappers/Python/src/cpu_regularizers.pyx @@ -11,62 +11,108 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. -Author: Edoardo Pasca +Author: Edoardo Pasca, Daniil Kazantsev """ import cython import numpy as np cimport numpy as np -cdef extern float TV_ROF(float *A, float *B, int dimX, int dimY, int dimZ, - int iterationsNumb, float tau, float flambda); +cdef extern float TV_ROF_CPU_main(float *Input, float *Output, float lambdaPar, int iterationsNumb, float tau, int dimX, int dimY, int dimZ); +cdef extern float TV_FGP_CPU_main(float *Input, float *Output, float lambdaPar, int iterationsNumb, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ); +def TV_ROF_CPU(inputData, regularization_parameter, iterationsNumb, marching_step_parameter): + if inputData.ndim == 2: + return TV_ROF_2D(inputData, regularization_parameter, iterationsNumb, marching_step_parameter) + elif inputData.ndim == 3: + return TV_ROF_3D(inputData, regularization_parameter, iterationsNumb, marching_step_parameter) + +def TV_ROF_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, + float regularization_parameter, + int iterationsNumb, + float marching_step_parameter): + cdef long dims[2] + dims[0] = inputData.shape[0] + dims[1] = inputData.shape[1] + + cdef np.ndarray[np.float32_t, ndim=2, mode="c"] outputData = \ + np.zeros([dims[0],dims[1]], dtype='float32') + + # Run ROF iterations for 2D data + TV_ROF_CPU_main(&inputData[0,0], &outputData[0,0], regularization_parameter, iterationsNumb, marching_step_parameter, dims[0], dims[1], 1) + + return outputData + +def TV_ROF_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, + int iterationsNumb, + float regularization_parameter, + float marching_step_parameter): + cdef long dims[3] + dims[0] = inputData.shape[0] + dims[1] = inputData.shape[1] + dims[2] = inputData.shape[2] + + cdef np.ndarray[np.float32_t, ndim=3, mode="c"] outputData = \ + np.zeros([dims[0],dims[1],dims[2]], dtype='float32') + + # Run ROF iterations for 3D data + TV_ROF_CPU_main(&inputData[0,0,0], &outputData[0,0,0], regularization_parameter, iterationsNumb, marching_step_parameter, dims[0], dims[1], dims[2]) -def ROF_TV(inputData, iterations, regularization_parameter, - marching_step_parameter): + return outputData + +def TV_FGP_CPU(inputData, regularization_parameter, iterationsNumb, tolerance_param, methodTV, nonneg, printM): if inputData.ndim == 2: - return ROF_TV_2D(inputData, iterations, regularization_parameter, - marching_step_parameter) + return TV_FGP_2D(inputData, regularization_parameter, iterationsNumb, tolerance_param, methodTV, nonneg, printM) elif inputData.ndim == 3: - return ROF_TV_3D(inputData, iterations, regularization_parameter, - marching_step_parameter) + return TV_FGP_3D(inputData, regularization_parameter, iterationsNumb, tolerance_param, methodTV, nonneg, printM) -def ROF_TV_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, - int iterations, +def TV_FGP_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, float regularization_parameter, - float marching_step_parameter - ): + int iterationsNumb, + float tolerance_param, + int methodTV, + int nonneg, + int printM): cdef long dims[2] dims[0] = inputData.shape[0] dims[1] = inputData.shape[1] - cdef np.ndarray[np.float32_t, ndim=2, mode="c"] B = \ + cdef np.ndarray[np.float32_t, ndim=2, mode="c"] outputData = \ np.zeros([dims[0],dims[1]], dtype='float32') #/* Run ROF iterations for 2D data */ - TV_ROF(&inputData[0,0], &B[0,0], dims[0], dims[1], 1, iterations, - marching_step_parameter, regularization_parameter) + TV_FGP_CPU_main(&inputData[0,0], &outputData[0,0], regularization_parameter, + iterationsNumb, + tolerance_param, + methodTV, + nonneg, + printM, + dims[0], dims[1], 1) - return B - + return outputData -def ROF_TV_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, - int iterations, +def TV_FGP_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, float regularization_parameter, - float marching_step_parameter - ): - cdef long dims[2] + int iterationsNumb, + float tolerance_param, + int methodTV, + int nonneg, + int printM): + cdef long dims[3] dims[0] = inputData.shape[0] dims[1] = inputData.shape[1] dims[2] = inputData.shape[2] - cdef np.ndarray[np.float32_t, ndim=3, mode="c"] B = \ - np.zeros([dims[0],dims[1],dims[2]], dtype='float32') + cdef np.ndarray[np.float32_t, ndim=3, mode="c"] outputData = \ + np.zeros([dims[0], dims[1], dims[2]], dtype='float32') #/* Run ROF iterations for 3D data */ - TV_ROF(&inputData[0,0, 0], &B[0,0, 0], dims[0], dims[1], dims[2], iterations, - marching_step_parameter, regularization_parameter) - - return B - + TV_FGP_CPU_main(&inputData[0,0,0], &outputData[0,0,0], regularization_parameter, + iterationsNumb, + tolerance_param, + methodTV, + nonneg, + printM, + dims[0], dims[1], dims[2]) + return outputData diff --git a/Wrappers/Python/src/gpu_regularizers.pyx b/Wrappers/Python/src/gpu_regularizers.pyx index 5a5d274..f96156a 100644 --- a/Wrappers/Python/src/gpu_regularizers.pyx +++ b/Wrappers/Python/src/gpu_regularizers.pyx @@ -11,7 +11,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. -Author: Edoardo Pasca +Author: Edoardo Pasca, Daniil Kazantsev """ import cython @@ -25,12 +25,16 @@ cdef extern void NLM_GPU_kernel(float *A, float* B, float *Eucl_Vec, int N, int M, int Z, int dimension, int SearchW, int SimilW, int SearchW_real, float denh2, float lambdaf); -cdef extern void TV_ROF_GPU(float* A, float* B, int N, int M, int Z, int iter, float tau, float lambdaf); +cdef extern void TV_ROF_GPU_main(float* Input, float* Output, float lambdaPar, int iter, float tau, int N, int M, int Z); +cdef extern void TV_FGP_GPU_main(float *Input, float *Output, float lambdaPar, int iter, float epsil, int methodTV, int nonneg, int printM, int N, int M, int Z); + +# padding function cdef extern float pad_crop(float *A, float *Ap, int OldSizeX, int OldSizeY, int OldSizeZ, int NewSizeX, int NewSizeY, int NewSizeZ, int padXY, int switchpad_crop); - + +#Diffusion 4th order regularizer def Diff4thHajiaboli(inputData, edge_preserv_parameter, iterations, @@ -48,7 +52,7 @@ def Diff4thHajiaboli(inputData, iterations, time_marching_parameter, regularization_parameter) - +# patch-based nonlocal regularization def NML(inputData, SearchW_real, SimilW, @@ -66,23 +70,48 @@ def NML(inputData, SimilW, h, lambdaf) - -def ROF_TV_GPU(inputData, + +# Total-variation Rudin-Osher-Fatemi (ROF) +def TV_ROF_GPU(inputData, + regularization_parameter, iterations, - time_marching_parameter, - regularization_parameter): + time_marching_parameter): if inputData.ndim == 2: return ROFTV2D(inputData, - iterations, - time_marching_parameter, - regularization_parameter) + regularization_parameter, + iterations, + time_marching_parameter) elif inputData.ndim == 3: return ROFTV3D(inputData, + regularization_parameter, iterations, - time_marching_parameter, - regularization_parameter) - - + time_marching_parameter) + +# Total-variation Fast-Gradient-Projection (FGP) +def TV_FGP_GPU(inputData, + regularization_parameter, + iterations, + tolerance_param, + methodTV, + nonneg, + printM): + if inputData.ndim == 2: + return FGPTV2D(inputData, + regularization_parameter, + iterations, + tolerance_param, + methodTV, + nonneg, + printM) + elif inputData.ndim == 3: + return FGPTV3D(inputData, + regularization_parameter, + iterations, + tolerance_param, + methodTV, + nonneg, + printM) +#****************************************************************# def Diff4thHajiaboli2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, float edge_preserv_parameter, int iterations, @@ -329,48 +358,106 @@ def NML3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, switchpad_crop) return B - + +# Total-variation ROF def ROFTV2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, + float regularization_parameter, int iterations, - float time_marching_parameter, - float regularization_parameter): + float time_marching_parameter): cdef long dims[2] dims[0] = inputData.shape[0] dims[1] = inputData.shape[1] - cdef np.ndarray[np.float32_t, ndim=2, mode="c"] B = \ + cdef np.ndarray[np.float32_t, ndim=2, mode="c"] outputData = \ np.zeros([dims[0],dims[1]], dtype='float32') # Running CUDA code here - TV_ROF_GPU( - &inputData[0,0], &B[0,0], - dims[0], dims[1], 0, + TV_ROF_GPU_main( + &inputData[0,0], &outputData[0,0], + regularization_parameter, iterations , time_marching_parameter, - regularization_parameter); + dims[0], dims[1], 1); - return B + return outputData def ROFTV3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, + float regularization_parameter, int iterations, - float time_marching_parameter, - float regularization_parameter): + float time_marching_parameter): cdef long dims[3] dims[0] = inputData.shape[0] dims[1] = inputData.shape[1] dims[2] = inputData.shape[2] - cdef np.ndarray[np.float32_t, ndim=3, mode="c"] B = \ + cdef np.ndarray[np.float32_t, ndim=3, mode="c"] outputData = \ np.zeros([dims[0],dims[1],dims[2]], dtype='float32') # Running CUDA code here - TV_ROF_GPU( - &inputData[0,0,0], &B[0,0,0], - dims[0], dims[1], dims[2], + TV_ROF_GPU_main( + &inputData[0,0,0], &outputData[0,0,0], + regularization_parameter, iterations , time_marching_parameter, - regularization_parameter); + dims[0], dims[1], dims[2]); - return B + return outputData + +# Total-variation Fast-Gradient-Projection (FGP) +def FGPTV2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, + float regularization_parameter, + int iterations, + float tolerance_param, + int methodTV, + int nonneg, + int printM): + + cdef long dims[2] + dims[0] = inputData.shape[0] + dims[1] = inputData.shape[1] + + cdef np.ndarray[np.float32_t, ndim=2, mode="c"] outputData = \ + np.zeros([dims[0],dims[1]], dtype='float32') + + # Running CUDA code here + TV_FGP_GPU_main(&inputData[0,0], &outputData[0,0], + regularization_parameter, + iterations, + tolerance_param, + methodTV, + nonneg, + printM, + dims[0], dims[1], 1); + + return outputData + +def FGPTV3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, + float regularization_parameter, + int iterations, + float tolerance_param, + int methodTV, + int nonneg, + int printM): + + cdef long dims[3] + dims[0] = inputData.shape[0] + dims[1] = inputData.shape[1] + dims[2] = inputData.shape[2] + + cdef np.ndarray[np.float32_t, ndim=3, mode="c"] outputData = \ + np.zeros([dims[0],dims[1],dims[2]], dtype='float32') + + # Running CUDA code here + TV_FGP_GPU_main( + &inputData[0,0,0], &outputData[0,0,0], + regularization_parameter , + iterations, + tolerance_param, + methodTV, + nonneg, + printM, + dims[0], dims[1], dims[2]); + + return outputData diff --git a/Wrappers/Python/test/test_cpu_vs_gpu_regularizers.py b/Wrappers/Python/test/test_cpu_vs_gpu_regularizers.py new file mode 100644 index 0000000..63be1a0 --- /dev/null +++ b/Wrappers/Python/test/test_cpu_vs_gpu_regularizers.py @@ -0,0 +1,219 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Thu Feb 22 11:39:43 2018 + +Testing CPU implementation against the GPU one + +@author: Daniil Kazantsev +""" + +import matplotlib.pyplot as plt +import numpy as np +import os +import timeit +from ccpi.filters.regularizers import ROF_TV, FGP_TV + +############################################################################### +def printParametersToString(pars): + txt = r'' + for key, value in pars.items(): + if key== 'algorithm' : + txt += "{0} = {1}".format(key, value.__name__) + elif key == 'input': + txt += "{0} = {1}".format(key, np.shape(value)) + else: + txt += "{0} = {1}".format(key, value) + txt += '\n' + return txt +############################################################################### +def rmse(im1, im2): + a, b = im1.shape + rmse = np.sqrt(np.sum((im1 - im2) ** 2) / float(a * b)) + return rmse + +filename = os.path.join(".." , ".." , ".." , "data" ,"lena_gray_512.tif") + +# read image +Im = plt.imread(filename) +Im = np.asarray(Im, dtype='float32') + +Im = Im/255 +perc = 0.075 +u0 = Im + np.random.normal(loc = Im , + scale = perc * Im , + size = np.shape(Im)) +# map the u0 u0->u0>0 +f = np.frompyfunc(lambda x: 0 if x < 0 else x, 1,1) +u0 = f(u0).astype('float32') + + +print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") +print ("____________ROF-TV bench___________________") +print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") + +## plot +fig = plt.figure(1) +plt.suptitle('Comparison of ROF-TV regularizer using CPU and GPU implementations') +a=fig.add_subplot(1,4,1) +a.set_title('Noisy Image') +imgplot = plt.imshow(u0,cmap="gray") + +# set parameters +pars = {'algorithm': ROF_TV, \ + 'input' : u0,\ + 'regularization_parameter':0.04,\ + 'number_of_iterations': 1200,\ + 'time_marching_parameter': 0.0025 + } +print ("#############ROF TV CPU####################") +start_time = timeit.default_timer() +rof_cpu = ROF_TV(pars['input'], + pars['regularization_parameter'], + pars['number_of_iterations'], + pars['time_marching_parameter'],'cpu') +rms = rmse(Im, rof_cpu) +pars['rmse'] = rms + +txtstr = printParametersToString(pars) +txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time) +print (txtstr) +a=fig.add_subplot(1,4,2) + +# these are matplotlib.patch.Patch properties +props = dict(boxstyle='round', facecolor='wheat', alpha=0.75) +# place a text box in upper left in axes coords +a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14, + verticalalignment='top', bbox=props) +imgplot = plt.imshow(rof_cpu, cmap="gray") +plt.title('{}'.format('CPU results')) + + +print ("##############ROF TV GPU##################") +start_time = timeit.default_timer() +rof_gpu = ROF_TV(pars['input'], + pars['regularization_parameter'], + pars['number_of_iterations'], + pars['time_marching_parameter'],'gpu') + +rms = rmse(Im, rof_gpu) +pars['rmse'] = rms +pars['algorithm'] = ROF_TV +txtstr = printParametersToString(pars) +txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time) +print (txtstr) +a=fig.add_subplot(1,4,3) + +# these are matplotlib.patch.Patch properties +props = dict(boxstyle='round', facecolor='wheat', alpha=0.75) +# place a text box in upper left in axes coords +a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14, + verticalalignment='top', bbox=props) +imgplot = plt.imshow(rof_gpu, cmap="gray") +plt.title('{}'.format('GPU results')) + + +print ("--------Compare the results--------") +tolerance = 1e-05 +diff_im = np.zeros(np.shape(rof_cpu)) +diff_im = abs(rof_cpu - rof_gpu) +diff_im[diff_im > tolerance] = 1 +a=fig.add_subplot(1,4,4) +imgplot = plt.imshow(diff_im, vmin=0, vmax=1, cmap="gray") +plt.title('{}'.format('Pixels larger threshold difference')) +if (diff_im.sum() > 1): + print ("Arrays do not match!") +else: + print ("Arrays match") + +print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") +print ("____________FGP-TV bench___________________") +print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") + +## plot +fig = plt.figure(2) +plt.suptitle('Comparison of FGP-TV regularizer using CPU and GPU implementations') +a=fig.add_subplot(1,4,1) +a.set_title('Noisy Image') +imgplot = plt.imshow(u0,cmap="gray") + +# set parameters +pars = {'algorithm' : FGP_TV, \ + 'input' : u0,\ + 'regularization_parameter':0.04, \ + 'number_of_iterations' :1200 ,\ + 'tolerance_constant':0.00001,\ + 'methodTV': 0 ,\ + 'nonneg': 0 ,\ + 'printingOut': 0 + } + +print ("#############FGP TV CPU####################") +start_time = timeit.default_timer() +fgp_cpu = FGP_TV(pars['input'], + pars['regularization_parameter'], + pars['number_of_iterations'], + pars['tolerance_constant'], + pars['methodTV'], + pars['nonneg'], + pars['printingOut'],'cpu') + + +rms = rmse(Im, fgp_cpu) +pars['rmse'] = rms + +txtstr = printParametersToString(pars) +txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time) +print (txtstr) +a=fig.add_subplot(1,4,2) + +# these are matplotlib.patch.Patch properties +props = dict(boxstyle='round', facecolor='wheat', alpha=0.75) +# place a text box in upper left in axes coords +a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14, + verticalalignment='top', bbox=props) +imgplot = plt.imshow(fgp_cpu, cmap="gray") +plt.title('{}'.format('CPU results')) + + +print ("##############FGP TV GPU##################") +start_time = timeit.default_timer() +fgp_gpu = FGP_TV(pars['input'], + pars['regularization_parameter'], + pars['number_of_iterations'], + pars['tolerance_constant'], + pars['methodTV'], + pars['nonneg'], + pars['printingOut'],'gpu') + +rms = rmse(Im, fgp_gpu) +pars['rmse'] = rms +pars['algorithm'] = FGP_TV +txtstr = printParametersToString(pars) +txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time) +print (txtstr) +a=fig.add_subplot(1,4,3) + +# these are matplotlib.patch.Patch properties +props = dict(boxstyle='round', facecolor='wheat', alpha=0.75) +# place a text box in upper left in axes coords +a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14, + verticalalignment='top', bbox=props) +imgplot = plt.imshow(fgp_gpu, cmap="gray") +plt.title('{}'.format('GPU results')) + + +print ("--------Compare the results--------") +tolerance = 1e-05 +diff_im = np.zeros(np.shape(rof_cpu)) +diff_im = abs(fgp_cpu - fgp_gpu) +diff_im[diff_im > tolerance] = 1 +a=fig.add_subplot(1,4,4) +imgplot = plt.imshow(diff_im, vmin=0, vmax=1, cmap="gray") +plt.title('{}'.format('Pixels larger threshold difference')) +if (diff_im.sum() > 1): + print ("Arrays do not match!") +else: + print ("Arrays match") + + diff --git a/Wrappers/Python/test/test_gpu_regularizers.py b/Wrappers/Python/test/test_gpu_regularizers.py index 735a25d..640b3f9 100644 --- a/Wrappers/Python/test/test_gpu_regularizers.py +++ b/Wrappers/Python/test/test_gpu_regularizers.py @@ -6,14 +6,13 @@ Created on Tue Jan 30 10:24:26 2018 @author: ofn77899 """ - - import matplotlib.pyplot as plt import numpy as np import os from enum import Enum import timeit from ccpi.filters.gpu_regularizers import Diff4thHajiaboli, NML +from ccpi.filters.regularizers import ROF_TV, FGP_TV ############################################################################### def printParametersToString(pars): txt = r'' @@ -52,14 +51,15 @@ u0 = f(u0).astype('float32') ## plot fig = plt.figure() -a=fig.add_subplot(2,3,1) +a=fig.add_subplot(2,4,1) a.set_title('noise') imgplot = plt.imshow(u0,cmap="gray") ## Diff4thHajiaboli start_time = timeit.default_timer() -pars = {'algorithm' : Diff4thHajiaboli , \ +pars = { +'algorithm' : Diff4thHajiaboli , \ 'input' : u0, 'edge_preserv_parameter':0.1 , \ 'number_of_iterations' :250 ,\ @@ -78,7 +78,7 @@ pars['rmse'] = rms txtstr = printParametersToString(pars) txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time) print (txtstr) -a=fig.add_subplot(2,3,2) +a=fig.add_subplot(2,4,2) # these are matplotlib.patch.Patch properties props = dict(boxstyle='round', facecolor='wheat', alpha=0.75) @@ -87,14 +87,15 @@ a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=12, verticalalignment='top', bbox=props) imgplot = plt.imshow(d4h, cmap="gray") -a=fig.add_subplot(2,3,5) +a=fig.add_subplot(2,4,6) # these are matplotlib.patch.Patch properties props = dict(boxstyle='round', facecolor='wheat', alpha=0.5) # place a text box in upper left in axes coords a.text(0.05, 0.95, 'd4h - u0', transform=a.transAxes, fontsize=12, verticalalignment='top', bbox=props) -imgplot = plt.imshow((d4h - u0)**2, cmap="gray") +imgplot = plt.imshow((d4h - u0)**2, vmin=0, vmax=0.03, cmap="gray") +plt.colorbar(ticks=[0, 0.03], orientation='vertical') ## Patch Based Regul NML @@ -109,6 +110,7 @@ pars = {'algorithm' : NML , \ } """ pars = { +'algorithm' : NML , \ 'input' : u0, 'regularization_parameter': 0.01,\ 'searching_window_ratio':3, \ @@ -126,7 +128,7 @@ pars['rmse'] = rms txtstr = printParametersToString(pars) txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time) print (txtstr) -a=fig.add_subplot(2,3,3) +a=fig.add_subplot(2,4,3) # these are matplotlib.patch.Patch properties props = dict(boxstyle='round', facecolor='wheat', alpha=0.75) @@ -135,14 +137,103 @@ a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=12, verticalalignment='top', bbox=props) imgplot = plt.imshow(nml, cmap="gray") -a=fig.add_subplot(2,3,6) +a=fig.add_subplot(2,4,7) # these are matplotlib.patch.Patch properties props = dict(boxstyle='round', facecolor='wheat', alpha=0.5) # place a text box in upper left in axes coords a.text(0.05, 0.95, 'nml - u0', transform=a.transAxes, fontsize=14, verticalalignment='top', bbox=props) -imgplot = plt.imshow((nml - u0)**2, cmap="gray") +imgplot = plt.imshow((nml - u0)**2, vmin=0, vmax=0.03, cmap="gray") +plt.colorbar(ticks=[0, 0.03], orientation='vertical') -plt.show() + +## Rudin-Osher-Fatemi (ROF) TV regularization +start_time = timeit.default_timer() +pars = { +'algorithm' : ROF_TV , \ + 'input' : u0, + 'regularization_parameter': 0.04,\ + 'number_of_iterations':300,\ + 'time_marching_parameter': 0.0025 + + } + +rof_tv = TV_ROF_GPU(pars['input'], + pars['regularization_parameter'], + pars['number_of_iterations'], + pars['time_marching_parameter'],'gpu') + +rms = rmse(Im, rof_tv) +pars['rmse'] = rms +txtstr = printParametersToString(pars) +txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time) +print (txtstr) +a=fig.add_subplot(2,4,4) + +# these are matplotlib.patch.Patch properties +props = dict(boxstyle='round', facecolor='wheat', alpha=0.75) +# place a text box in upper left in axes coords +a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=12, + verticalalignment='top', bbox=props) +imgplot = plt.imshow(rof_tv, cmap="gray") + +a=fig.add_subplot(2,4,8) + +# these are matplotlib.patch.Patch properties +props = dict(boxstyle='round', facecolor='wheat', alpha=0.5) +# place a text box in upper left in axes coords +a.text(0.05, 0.95, 'rof_tv - u0', transform=a.transAxes, fontsize=14, + verticalalignment='top', bbox=props) +imgplot = plt.imshow((rof_tv - u0)**2, vmin=0, vmax=0.03, cmap="gray") +plt.colorbar(ticks=[0, 0.03], orientation='vertical') +plt.show() + +## Fast-Gradient Projection TV regularization +""" +start_time = timeit.default_timer() + +pars = {'algorithm' : FGP_TV, \ + 'input' : u0,\ + 'regularization_parameter':0.04, \ + 'number_of_iterations' :1200 ,\ + 'tolerance_constant':0.00001,\ + 'methodTV': 0 ,\ + 'nonneg': 0 ,\ + 'printingOut': 0 + } + +fgp_gpu = FGP_TV(pars['input'], + pars['regularization_parameter'], + pars['number_of_iterations'], + pars['tolerance_constant'], + pars['methodTV'], + pars['nonneg'], + pars['printingOut'],'gpu') + +rms = rmse(Im, fgp_gpu) +pars['rmse'] = rms +txtstr = printParametersToString(pars) +txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time) +print (txtstr) +a=fig.add_subplot(2,4,4) + +# these are matplotlib.patch.Patch properties +props = dict(boxstyle='round', facecolor='wheat', alpha=0.75) +# place a text box in upper left in axes coords +a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=12, + verticalalignment='top', bbox=props) +imgplot = plt.imshow(fgp_gpu, cmap="gray") + +a=fig.add_subplot(2,4,8) + +# these are matplotlib.patch.Patch properties +props = dict(boxstyle='round', facecolor='wheat', alpha=0.5) +# place a text box in upper left in axes coords +a.text(0.05, 0.95, 'fgp_gpu - u0', transform=a.transAxes, fontsize=14, + verticalalignment='top', bbox=props) +imgplot = plt.imshow((fgp_gpu - u0)**2, vmin=0, vmax=0.03, cmap="gray") +plt.colorbar(ticks=[0, 0.03], orientation='vertical') +plt.show() +""" |