diff options
-rw-r--r-- | Core/regularisers_CPU/TGV_core.c | 342 | ||||
-rw-r--r-- | Core/regularisers_CPU/TGV_core.h | 21 | ||||
-rw-r--r-- | Core/regularisers_GPU/TGV_GPU_core.cu | 455 | ||||
-rw-r--r-- | Core/regularisers_GPU/TGV_GPU_core.h | 2 | ||||
-rw-r--r-- | Readme.md | 10 | ||||
-rw-r--r-- | Wrappers/Matlab/demos/demoMatlab_3Ddenoise.m | 15 | ||||
-rw-r--r-- | Wrappers/Matlab/demos/demoMatlab_denoise.m | 16 | ||||
-rw-r--r-- | Wrappers/Matlab/mex_compile/regularisers_CPU/TGV.c | 29 | ||||
-rw-r--r-- | Wrappers/Python/conda-recipe/conda_build_config.yaml | 2 | ||||
-rwxr-xr-x | Wrappers/Python/conda-recipe/run_test.py | 12 | ||||
-rw-r--r-- | Wrappers/Python/demos/demo_cpu_regularisers.py | 14 | ||||
-rw-r--r-- | Wrappers/Python/demos/demo_cpu_regularisers3D.py | 60 | ||||
-rw-r--r-- | Wrappers/Python/demos/demo_cpu_vs_gpu_regularisers.py | 14 | ||||
-rw-r--r-- | Wrappers/Python/demos/demo_gpu_regularisers.py | 12 | ||||
-rw-r--r-- | Wrappers/Python/demos/demo_gpu_regularisers3D.py | 55 | ||||
-rw-r--r-- | Wrappers/Python/src/cpu_regularisers.pyx | 35 | ||||
-rw-r--r-- | Wrappers/Python/src/gpu_regularisers.pyx | 39 |
17 files changed, 920 insertions, 213 deletions
diff --git a/Core/regularisers_CPU/TGV_core.c b/Core/regularisers_CPU/TGV_core.c index 477e909..805c3d4 100644 --- a/Core/regularisers_CPU/TGV_core.c +++ b/Core/regularisers_CPU/TGV_core.c @@ -20,31 +20,35 @@ limitations under the License. #include "TGV_core.h" /* C-OMP implementation of Primal-Dual denoising method for - * Total Generilized Variation (TGV)-L2 model [1] (2D case only) + * Total Generilized Variation (TGV)-L2 model [1] (2D/3D case) * * Input Parameters: - * 1. Noisy image (2D) + * 1. Noisy image/volume (2D/3D) * 2. lambda - regularisation parameter * 3. parameter to control the first-order term (alpha1) * 4. parameter to control the second-order term (alpha0) * 5. Number of Chambolle-Pock (Primal-Dual) iterations * 6. Lipshitz constant (default is 12) - * + * * Output: - * Filtered/regulariaed image + * Filtered/regularised image/volume * * References: * [1] K. Bredies "Total Generalized Variation" + * */ -float TGV_main(float *U0, float *U, float lambda, float alpha1, float alpha0, int iter, float L2, int dimX, int dimY) +float TGV_main(float *U0, float *U, float lambda, float alpha1, float alpha0, int iter, float L2, int dimX, int dimY, int dimZ) { long DimTotal; - int ll; + int ll; float *U_old, *P1, *P2, *Q1, *Q2, *Q3, *V1, *V1_old, *V2, *V2_old, tau, sigma; - - DimTotal = (long)(dimX*dimY); - + + DimTotal = (long)(dimX*dimY*dimZ); + copyIm(U0, U, (long)(dimX), (long)(dimY), (long)(dimZ)); /* initialize */ + tau = pow(L2,-0.5); + sigma = pow(L2,-0.5); + /* dual variables */ P1 = calloc(DimTotal, sizeof(float)); P2 = calloc(DimTotal, sizeof(float)); @@ -59,12 +63,10 @@ float TGV_main(float *U0, float *U, float lambda, float alpha1, float alpha0, in V1_old = calloc(DimTotal, sizeof(float)); V2 = calloc(DimTotal, sizeof(float)); V2_old = calloc(DimTotal, sizeof(float)); - - copyIm(U0, U, (long)(dimX), (long)(dimY), 1l); /* initialize */ - - tau = pow(L2,-0.5); - sigma = pow(L2,-0.5); - + + if (dimZ == 1) { + /*2D case*/ + /* Primal-dual iterations begin here */ for(ll = 0; ll < iter; ll++) { @@ -100,6 +102,54 @@ float TGV_main(float *U0, float *U, float lambda, float alpha1, float alpha0, in newU(V1, V1_old, (long)(dimX), (long)(dimY)); newU(V2, V2_old, (long)(dimX), (long)(dimY)); } /*end of iterations*/ + } + else { + /*3D case*/ + float *P3, *Q4, *Q5, *Q6, *V3, *V3_old; + + P3 = calloc(DimTotal, sizeof(float)); + Q4 = calloc(DimTotal, sizeof(float)); + Q5 = calloc(DimTotal, sizeof(float)); + Q6 = calloc(DimTotal, sizeof(float)); + V3 = calloc(DimTotal, sizeof(float)); + V3_old = calloc(DimTotal, sizeof(float)); + + /* Primal-dual iterations begin here */ + for(ll = 0; ll < iter; ll++) { + + /* Calculate Dual Variable P */ + DualP_3D(U, V1, V2, V3, P1, P2, P3, (long)(dimX), (long)(dimY), (long)(dimZ), sigma); + + /*Projection onto convex set for P*/ + ProjP_3D(P1, P2, P3, (long)(dimX), (long)(dimY), (long)(dimZ), alpha1); + + /* Calculate Dual Variable Q */ + DualQ_3D(V1, V2, V3, Q1, Q2, Q3, Q4, Q5, Q6, (long)(dimX), (long)(dimY), (long)(dimZ), sigma); + + /*Projection onto convex set for Q*/ + ProjQ_3D(Q1, Q2, Q3, Q4, Q5, Q6, (long)(dimX), (long)(dimY), (long)(dimZ), alpha0); + + /*saving U into U_old*/ + copyIm(U, U_old, (long)(dimX), (long)(dimY), (long)(dimZ)); + + /*adjoint operation -> divergence and projection of P*/ + DivProjP_3D(U, U0, P1, P2, P3, (long)(dimX), (long)(dimY), (long)(dimZ), lambda, tau); + + /*get updated solution U*/ + newU3D(U, U_old, (long)(dimX), (long)(dimY), (long)(dimZ)); + + /*saving V into V_old*/ + copyIm_3Ar(V1, V2, V3, V1_old, V2_old, V3_old, (long)(dimX), (long)(dimY), (long)(dimZ)); + + /* upd V*/ + UpdV_3D(V1, V2, V3, P1, P2, P3, Q1, Q2, Q3, Q4, Q5, Q6, (long)(dimX), (long)(dimY), (long)(dimZ), tau); + + /*get new V*/ + newU3D_3Ar(V1, V2, V3, V1_old, V2_old, V3_old, (long)(dimX), (long)(dimY), (long)(dimZ)); + } /*end of iterations*/ + free(P3);free(Q4);free(Q5);free(Q6);free(V3);free(V3_old); + } + /*freeing*/ free(P1);free(P2);free(Q1);free(Q2);free(Q3);free(U_old); free(V1);free(V2);free(V1_old);free(V2_old); @@ -134,10 +184,9 @@ float ProjP_2D(float *P1, float *P2, long dimX, long dimY, float alpha1) #pragma omp parallel for shared(P1,P2) private(i,j,index,grad_magn) for(i=0; i<dimX; i++) { for(j=0; j<dimY; j++) { - index = j*dimX+i; - grad_magn = sqrt(pow(P1[index],2) + pow(P2[index],2)); - grad_magn = grad_magn/alpha1; - if (grad_magn > 1.0) { + index = j*dimX+i; + grad_magn = (sqrtf(pow(P1[index],2) + pow(P2[index],2)))/alpha1; + if (grad_magn > 1.0f) { P1[index] /= grad_magn; P2[index] /= grad_magn; } @@ -152,21 +201,14 @@ float DualQ_2D(float *V1, float *V2, float *Q1, float *Q2, float *Q3, long dimX, #pragma omp parallel for shared(Q1,Q2,Q3,V1,V2) private(i,j,index,q1,q2,q11,q22) for(i=0; i<dimX; i++) { for(j=0; j<dimY; j++) { - index = j*dimX+i; - /* symmetric boundary conditions (Neuman) */ - if (i == dimX-1) - { q1 = (V1[j*dimX+(i-1)] - V1[index]); - q11 = (V2[j*dimX+(i-1)] - V2[index]); + index = j*dimX+i; + q1 = 0.0f; q11 = 0.0f; q2 = 0.0f; q22 = 0.0f; + /* boundary conditions (Neuman) */ + if (i != dimX-1){ + q1 = V1[j*dimX+(i+1)] - V1[index]; + q11 = V2[j*dimX+(i+1)] - V2[index]; } - else { - q1 = (V1[j*dimX+(i+1)] - V1[index]); - q11 = (V2[j*dimX+(i+1)] - V2[index]); - } - if (j == dimY-1) { - q2 = (V2[(j-1)*dimX+i] - V2[index]); - q22 = (V1[(j-1)*dimX+i] - V1[index]); - } - else { + if (j != dimY-1) { q2 = V2[(j+1)*dimX+i] - V2[index]; q22 = V1[(j+1)*dimX+i] - V1[index]; } @@ -183,10 +225,10 @@ float ProjQ_2D(float *Q1, float *Q2, float *Q3, long dimX, long dimY, float alph #pragma omp parallel for shared(Q1,Q2,Q3) private(i,j,index,grad_magn) for(i=0; i<dimX; i++) { for(j=0; j<dimY; j++) { - index = j*dimX+i; - grad_magn = sqrt(pow(Q1[index],2) + pow(Q2[index],2) + 2*pow(Q3[index],2)); + index = j*dimX+i; + grad_magn = sqrtf(pow(Q1[index],2) + pow(Q2[index],2) + 2*pow(Q3[index],2)); grad_magn = grad_magn/alpha0; - if (grad_magn > 1.0) { + if (grad_magn > 1.0f) { Q1[index] /= grad_magn; Q2[index] /= grad_magn; Q3[index] /= grad_magn; @@ -202,7 +244,7 @@ float DivProjP_2D(float *U, float *U0, float *P1, float *P2, long dimX, long dim #pragma omp parallel for shared(U,U0,P1,P2) private(i,j,index,P_v1,P_v2,div) for(i=0; i<dimX; i++) { for(j=0; j<dimY; j++) { - index = j*dimX+i; + index = j*dimX+i; if (i == 0) P_v1 = P1[index]; else P_v1 = P1[index] - P1[j*dimX+(i-1)]; if (j == 0) P_v2 = P2[index]; @@ -224,32 +266,222 @@ float newU(float *U, float *U_old, long dimX, long dimY) float UpdV_2D(float *V1, float *V2, float *P1, float *P2, float *Q1, float *Q2, float *Q3, long dimX, long dimY, float tau) { long i, j, index; - float q1, q11, q2, q22, div1, div2; -#pragma omp parallel for shared(V1,V2,P1,P2,Q1,Q2,Q3) private(i, j, index, q1, q11, q2, q22, div1, div2) + float q1, q3_x, q3_y, q2, div1, div2; +#pragma omp parallel for shared(V1,V2,P1,P2,Q1,Q2,Q3) private(i, j, index, q1, q3_x, q3_y, q2, div1, div2) for(i=0; i<dimX; i++) { for(j=0; j<dimY; j++) { - index = j*dimX+i; - /* symmetric boundary conditions (Neuman) */ - if (i == 0) { - q1 = Q1[index]; - q11 = Q3[index]; - } - else { + index = j*dimX+i; + q2 = 0.0f; q3_y = 0.0f; q1 = 0.0f; q3_x = 0.0; + /* boundary conditions (Neuman) */ + if (i != 0) { q1 = Q1[index] - Q1[j*dimX+(i-1)]; - q11 = Q3[index] - Q3[j*dimX+(i-1)]; + q3_x = Q3[index] - Q3[j*dimX+(i-1)]; } - if (j == 0) { - q2 = Q2[index]; - q22 = Q3[index]; - } - else { + if (j != 0) { q2 = Q2[index] - Q2[(j-1)*dimX+i]; - q22 = Q3[index] - Q3[(j-1)*dimX+i]; + q3_y = Q3[index] - Q3[(j-1)*dimX+i]; } - div1 = q1 + q22; - div2 = q2 + q11; + div1 = q1 + q3_y; + div2 = q3_x + q2; V1[index] += tau*(P1[index] + div1); V2[index] += tau*(P2[index] + div2); }} return 1; } + +/********************************************************************/ +/***************************3D Functions*****************************/ +/********************************************************************/ +/*Calculating dual variable P (using forward differences)*/ +float DualP_3D(float *U, float *V1, float *V2, float *V3, float *P1, float *P2, float *P3, long dimX, long dimY, long dimZ, float sigma) +{ + long i,j,k, index; +#pragma omp parallel for shared(U,V1,V2,V3,P1,P2,P3) private(i,j,k,index) + for(i=0; i<dimX; i++) { + for(j=0; j<dimY; j++) { + for(k=0; k<dimZ; k++) { + index = (dimX*dimY)*k + j*dimX+i; + /* symmetric boundary conditions (Neuman) */ + if (i == dimX-1) P1[index] += sigma*((U[(dimX*dimY)*k + j*dimX+(i-1)] - U[index]) - V1[index]); + else P1[index] += sigma*((U[(dimX*dimY)*k + j*dimX+(i+1)] - U[index]) - V1[index]); + if (j == dimY-1) P2[index] += sigma*((U[(dimX*dimY)*k + (j-1)*dimX+i] - U[index]) - V2[index]); + else P2[index] += sigma*((U[(dimX*dimY)*k + (j+1)*dimX+i] - U[index]) - V2[index]); + if (k == dimZ-1) P3[index] += sigma*((U[(dimX*dimY)*(k-1) + j*dimX+i] - U[index]) - V3[index]); + else P3[index] += sigma*((U[(dimX*dimY)*(k+1) + j*dimX+i] - U[index]) - V3[index]); + }}} + return 1; +} +/*Projection onto convex set for P*/ +float ProjP_3D(float *P1, float *P2, float *P3, long dimX, long dimY, long dimZ, float alpha1) +{ + float grad_magn; + long i,j,k,index; +#pragma omp parallel for shared(P1,P2,P3) private(i,j,k,index,grad_magn) + for(i=0; i<dimX; i++) { + for(j=0; j<dimY; j++) { + for(k=0; k<dimZ; k++) { + index = (dimX*dimY)*k + j*dimX+i; + grad_magn = (sqrtf(pow(P1[index],2) + pow(P2[index],2) + pow(P3[index],2)))/alpha1; + if (grad_magn > 1.0f) { + P1[index] /= grad_magn; + P2[index] /= grad_magn; + P3[index] /= grad_magn; + } + }}} + return 1; +} +/*Calculating dual variable Q (using forward differences)*/ +float DualQ_3D(float *V1, float *V2, float *V3, float *Q1, float *Q2, float *Q3, float *Q4, float *Q5, float *Q6, long dimX, long dimY, long dimZ, float sigma) +{ + long i,j,k,index; + float q1, q2, q3, q11, q22, q33, q44, q55, q66; +#pragma omp parallel for shared(Q1,Q2,Q3,Q4,Q5,Q6,V1,V2,V3) private(i,j,k,index,q1,q2,q3,q11,q22,q33,q44,q55,q66) + for(i=0; i<dimX; i++) { + for(j=0; j<dimY; j++) { + for(k=0; k<dimZ; k++) { + index = (dimX*dimY)*k + j*dimX+i; + q1 = 0.0f; q11 = 0.0f; q33 = 0.0f; q2 = 0.0f; q22 = 0.0f; q55 = 0.0f; q3 = 0.0f; q44 = 0.0f; q66 = 0.0f; + /* symmetric boundary conditions (Neuman) */ + if (i != dimX-1){ + q1 = V1[(dimX*dimY)*k + j*dimX+(i+1)] - V1[index]; + q11 = V2[(dimX*dimY)*k + j*dimX+(i+1)] - V2[index]; + q33 = V3[(dimX*dimY)*k + j*dimX+(i+1)] - V3[index]; + } + if (j != dimY-1) { + q2 = V2[(dimX*dimY)*k + (j+1)*dimX+i] - V2[index]; + q22 = V1[(dimX*dimY)*k + (j+1)*dimX+i] - V1[index]; + q55 = V3[(dimX*dimY)*k + (j+1)*dimX+i] - V3[index]; + } + if (k != dimZ-1) { + q3 = V3[(dimX*dimY)*(k+1) + j*dimX+i] - V3[index]; + q44 = V1[(dimX*dimY)*(k+1) + j*dimX+i] - V1[index]; + q66 = V2[(dimX*dimY)*(k+1) + j*dimX+i] - V2[index]; + } + + Q1[index] += sigma*(q1); /*Q11*/ + Q2[index] += sigma*(q2); /*Q22*/ + Q3[index] += sigma*(q3); /*Q33*/ + Q4[index] += sigma*(0.5f*(q11 + q22)); /* Q21 / Q12 */ + Q5[index] += sigma*(0.5f*(q33 + q44)); /* Q31 / Q13 */ + Q6[index] += sigma*(0.5f*(q55 + q66)); /* Q32 / Q23 */ + }}} + return 1; +} +float ProjQ_3D(float *Q1, float *Q2, float *Q3, float *Q4, float *Q5, float *Q6, long dimX, long dimY, long dimZ, float alpha0) +{ + float grad_magn; + long i,j,k,index; +#pragma omp parallel for shared(Q1,Q2,Q3,Q4,Q5,Q6) private(i,j,k,index,grad_magn) + for(i=0; i<dimX; i++) { + for(j=0; j<dimY; j++) { + for(k=0; k<dimZ; k++) { + index = (dimX*dimY)*k + j*dimX+i; + grad_magn = sqrtf(pow(Q1[index],2) + pow(Q2[index],2) + pow(Q3[index],2) + 2.0f*pow(Q4[index],2) + 2.0f*pow(Q5[index],2) + 2.0f*pow(Q6[index],2)); + grad_magn = grad_magn/alpha0; + if (grad_magn > 1.0f) { + Q1[index] /= grad_magn; + Q2[index] /= grad_magn; + Q3[index] /= grad_magn; + Q4[index] /= grad_magn; + Q5[index] /= grad_magn; + Q6[index] /= grad_magn; + } + }}} + return 1; +} +/* Divergence and projection for P*/ +float DivProjP_3D(float *U, float *U0, float *P1, float *P2, float *P3, long dimX, long dimY, long dimZ, float lambda, float tau) +{ + long i,j,k,index; + float P_v1, P_v2, P_v3, div; +#pragma omp parallel for shared(U,U0,P1,P2,P3) private(i,j,k,index,P_v1,P_v2,P_v3,div) + for(i=0; i<dimX; i++) { + for(j=0; j<dimY; j++) { + for(k=0; k<dimZ; k++) { + index = (dimX*dimY)*k + j*dimX+i; + if (i == 0) P_v1 = P1[index]; + else P_v1 = P1[index] - P1[(dimX*dimY)*k + j*dimX+(i-1)]; + if (j == 0) P_v2 = P2[index]; + else P_v2 = P2[index] - P2[(dimX*dimY)*k + (j-1)*dimX+i]; + if (k == 0) P_v3 = P3[index]; + else P_v3 = P3[index] - P3[(dimX*dimY)*(k-1) + (j)*dimX+i]; + + div = P_v1 + P_v2 + P_v3; + U[index] = (lambda*(U[index] + tau*div) + tau*U0[index])/(lambda + tau); + }}} + return *U; +} +/*get update for V*/ +float UpdV_3D(float *V1, float *V2, float *V3, float *P1, float *P2, float *P3, float *Q1, float *Q2, float *Q3, float *Q4, float *Q5, float *Q6, long dimX, long dimY, long dimZ, float tau) +{ + long i,j,k,index; + float q1, q4x, q5x, q2, q4y, q6y, q6z, q5z, q3, div1, div2, div3; +#pragma omp parallel for shared(V1,V2,V3,P1,P2,P3,Q1,Q2,Q3,Q4,Q5,Q6) private(i,j,k,index,q1,q4x,q5x,q2,q4y,q6y,q6z,q5z,q3,div1,div2,div3) + for(i=0; i<dimX; i++) { + for(j=0; j<dimY; j++) { + for(k=0; k<dimZ; k++) { + index = (dimX*dimY)*k + j*dimX+i; + q1 = 0.0f; q4x= 0.0f; q5x= 0.0f; q2= 0.0f; q4y= 0.0f; q6y= 0.0f; q6z= 0.0f; q5z= 0.0f; q3= 0.0f; + /* Q1 - Q11, Q2 - Q22, Q3 - Q33, Q4 - Q21/Q12, Q5 - Q31/Q13, Q6 - Q32/Q23*/ + /* symmetric boundary conditions (Neuman) */ + if (i != 0) { + q1 = Q1[index] - Q1[(dimX*dimY)*k + j*dimX+(i-1)]; + q4x = Q4[index] - Q4[(dimX*dimY)*k + j*dimX+(i-1)]; + q5x = Q5[index] - Q5[(dimX*dimY)*k + j*dimX+(i-1)]; + } + if (j != 0) { + q2 = Q2[index] - Q2[(dimX*dimY)*k + (j-1)*dimX+i]; + q4y = Q4[index] - Q4[(dimX*dimY)*k + (j-1)*dimX+i]; + q6y = Q6[index] - Q6[(dimX*dimY)*k + (j-1)*dimX+i]; + } + if (k != 0) { + q6z = Q6[index] - Q6[(dimX*dimY)*(k-1) + (j)*dimX+i]; + q5z = Q5[index] - Q5[(dimX*dimY)*(k-1) + (j)*dimX+i]; + q3 = Q3[index] - Q3[(dimX*dimY)*(k-1) + (j)*dimX+i]; + } + div1 = q1 + q4y + q5z; + div2 = q4x + q2 + q6z; + div3 = q5x + q6y + q3; + + V1[index] += tau*(P1[index] + div1); + V2[index] += tau*(P2[index] + div2); + V3[index] += tau*(P3[index] + div3); + }}} + return 1; +} + +float copyIm_3Ar(float *V1, float *V2, float *V3, float *V1_old, float *V2_old, float *V3_old, long dimX, long dimY, long dimZ) +{ + long j; +#pragma omp parallel for shared(V1, V2, V3, V1_old, V2_old, V3_old) private(j) + for (j = 0; j<dimX*dimY*dimZ; j++) { + V1_old[j] = V1[j]; + V2_old[j] = V2[j]; + V3_old[j] = V3[j]; + } + return 1; +} + +/*get updated solution U*/ +float newU3D(float *U, float *U_old, long dimX, long dimY, long dimZ) +{ + long i; +#pragma omp parallel for shared(U, U_old) private(i) + for(i=0; i<dimX*dimY*dimZ; i++) U[i] = 2.0f*U[i] - U_old[i]; + return *U; +} + + +/*get updated solution U*/ +float newU3D_3Ar(float *V1, float *V2, float *V3, float *V1_old, float *V2_old, float *V3_old, long dimX, long dimY, long dimZ) +{ + long i; +#pragma omp parallel for shared(V1, V2, V3, V1_old, V2_old, V3_old) private(i) + for(i=0; i<dimX*dimY*dimZ; i++) { + V1[i] = 2.0f*V1[i] - V1_old[i]; + V2[i] = 2.0f*V2[i] - V2_old[i]; + V3[i] = 2.0f*V3[i] - V3_old[i]; + } + return 1; +} + diff --git a/Core/regularisers_CPU/TGV_core.h b/Core/regularisers_CPU/TGV_core.h index 29482ad..11b12c1 100644 --- a/Core/regularisers_CPU/TGV_core.h +++ b/Core/regularisers_CPU/TGV_core.h @@ -26,10 +26,10 @@ limitations under the License. #include "CCPiDefines.h" /* C-OMP implementation of Primal-Dual denoising method for - * Total Generilized Variation (TGV)-L2 model [1] (2D case only) + * Total Generilized Variation (TGV)-L2 model [1] (2D/3D) * * Input Parameters: - * 1. Noisy image (2D) + * 1. Noisy image/volume (2D/3D) * 2. lambda - regularisation parameter * 3. parameter to control the first-order term (alpha1) * 4. parameter to control the second-order term (alpha0) @@ -37,7 +37,7 @@ limitations under the License. * 6. Lipshitz constant (default is 12) * * Output: - * Filtered/regulariaed image + * Filtered/regularised image/volume * * References: * [1] K. Bredies "Total Generalized Variation" @@ -47,9 +47,10 @@ limitations under the License. #ifdef __cplusplus extern "C" { #endif -/* 2D functions */ -CCPI_EXPORT float TGV_main(float *U0, float *U, float lambda, float alpha1, float alpha0, int iter, float L2, int dimX, int dimY); +CCPI_EXPORT float TGV_main(float *U0, float *U, float lambda, float alpha1, float alpha0, int iter, float L2, int dimX, int dimY, int dimZ); + +/* 2D functions */ CCPI_EXPORT float DualP_2D(float *U, float *V1, float *V2, float *P1, float *P2, long dimX, long dimY, float sigma); CCPI_EXPORT float ProjP_2D(float *P1, float *P2, long dimX, long dimY, float alpha1); CCPI_EXPORT float DualQ_2D(float *V1, float *V2, float *Q1, float *Q2, float *Q3, long dimX, long dimY, float sigma); @@ -57,6 +58,16 @@ CCPI_EXPORT float ProjQ_2D(float *Q1, float *Q2, float *Q3, long dimX, long dimY CCPI_EXPORT float DivProjP_2D(float *U, float *U0, float *P1, float *P2, long dimX, long dimY, float lambda, float tau); CCPI_EXPORT float UpdV_2D(float *V1, float *V2, float *P1, float *P2, float *Q1, float *Q2, float *Q3, long dimX, long dimY, float tau); CCPI_EXPORT float newU(float *U, float *U_old, long dimX, long dimY); +/* 3D functions */ +CCPI_EXPORT float DualP_3D(float *U, float *V1, float *V2, float *V3, float *P1, float *P2, float *P3, long dimX, long dimY, long dimZ, float sigma); +CCPI_EXPORT float ProjP_3D(float *P1, float *P2, float *P3, long dimX, long dimY, long dimZ, float alpha1); +CCPI_EXPORT float DualQ_3D(float *V1, float *V2, float *V3, float *Q1, float *Q2, float *Q3, float *Q4, float *Q5, float *Q6, long dimX, long dimY, long dimZ, float sigma); +CCPI_EXPORT float ProjQ_3D(float *Q1, float *Q2, float *Q3, float *Q4, float *Q5, float *Q6, long dimX, long dimY, long dimZ, float alpha0); +CCPI_EXPORT float DivProjP_3D(float *U, float *U0, float *P1, float *P2, float *P3, long dimX, long dimY, long dimZ, float lambda, float tau); +CCPI_EXPORT float UpdV_3D(float *V1, float *V2, float *V3, float *P1, float *P2, float *P3, float *Q1, float *Q2, float *Q3, float *Q4, float *Q5, float *Q6, long dimX, long dimY, long dimZ, float tau); +CCPI_EXPORT float newU3D(float *U, float *U_old, long dimX, long dimY, long dimZ); +CCPI_EXPORT float copyIm_3Ar(float *V1, float *V2, float *V3, float *V1_old, float *V2_old, float *V3_old, long dimX, long dimY, long dimZ); +CCPI_EXPORT float newU3D_3Ar(float *V1, float *V2, float *V3, float *V1_old, float *V2_old, float *V3_old, long dimX, long dimY, long dimZ); #ifdef __cplusplus } #endif diff --git a/Core/regularisers_GPU/TGV_GPU_core.cu b/Core/regularisers_GPU/TGV_GPU_core.cu index 73232a9..58b2c41 100644 --- a/Core/regularisers_GPU/TGV_GPU_core.cu +++ b/Core/regularisers_GPU/TGV_GPU_core.cu @@ -21,10 +21,10 @@ limitations under the License. #include "shared.h" /* CUDA implementation of Primal-Dual denoising method for - * Total Generilized Variation (TGV)-L2 model [1] (2D case only) + * Total Generilized Variation (TGV)-L2 model [1] (2D/3D case) * * Input Parameters: - * 1. Noisy image (2D) + * 1. Noisy image/volume (2D/3D) * 2. lambda - regularisation parameter * 3. parameter to control the first-order term (alpha1) * 4. parameter to control the second-order term (alpha0) @@ -38,6 +38,10 @@ limitations under the License. * [1] K. Bredies "Total Generalized Variation" */ +#define BLKXSIZE 8 +#define BLKYSIZE 8 +#define BLKZSIZE 8 + #define BLKXSIZE2D 16 #define BLKYSIZE2D 16 #define EPS 1.0e-7 @@ -48,9 +52,8 @@ limitations under the License. /***************************2D Functions*****************************/ /********************************************************************/ __global__ void DualP_2D_kernel(float *U, float *V1, float *V2, float *P1, float *P2, int dimX, int dimY, float sigma) -{ - - int i = blockDim.x * blockIdx.x + threadIdx.x; +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; int j = blockDim.y * blockIdx.y + threadIdx.y; int index = i + dimX*j; @@ -67,9 +70,9 @@ __global__ void DualP_2D_kernel(float *U, float *V1, float *V2, float *P1, float __global__ void ProjP_2D_kernel(float *P1, float *P2, int dimX, int dimY, float alpha1) { - float grad_magn; + float grad_magn; - int i = blockDim.x * blockIdx.x + threadIdx.x; + int i = blockDim.x * blockIdx.x + threadIdx.x; int j = blockDim.y * blockIdx.y + threadIdx.y; int index = i + dimX*j; @@ -78,7 +81,7 @@ __global__ void ProjP_2D_kernel(float *P1, float *P2, int dimX, int dimY, float grad_magn = sqrt(pow(P1[index],2) + pow(P2[index],2)); grad_magn = grad_magn/alpha1; - if (grad_magn > 1.0) { + if (grad_magn > 1.0f) { P1[index] /= grad_magn; P2[index] /= grad_magn; } @@ -90,64 +93,56 @@ __global__ void DualQ_2D_kernel(float *V1, float *V2, float *Q1, float *Q2, floa { float q1, q2, q11, q22; - int i = blockDim.x * blockIdx.x + threadIdx.x; + int i = blockDim.x * blockIdx.x + threadIdx.x; int j = blockDim.y * blockIdx.y + threadIdx.y; int index = i + dimX*j; - if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY)) { - + if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY)) { /* symmetric boundary conditions (Neuman) */ - if (i == dimX-1) - { q1 = (V1[j*dimX+(i-1)] - V1[index]); - q11 = (V2[j*dimX+(i-1)] - V2[index]); + q1 = 0.0f; q11 = 0.0f; q2 = 0.0f; q22 = 0.0f; + /* boundary conditions (Neuman) */ + if (i != dimX-1){ + q1 = V1[j*dimX+(i+1)] - V1[index]; + q11 = V2[j*dimX+(i+1)] - V2[index]; } - else { - q1 = (V1[j*dimX+(i+1)] - V1[index]); - q11 = (V2[j*dimX+(i+1)] - V2[index]); - } - if (j == dimY-1) { - q2 = (V2[(j-1)*dimX+i] - V2[index]); - q22 = (V1[(j-1)*dimX+i] - V1[index]); - } - else { + if (j != dimY-1) { q2 = V2[(j+1)*dimX+i] - V2[index]; q22 = V1[(j+1)*dimX+i] - V1[index]; } Q1[index] += sigma*(q1); Q2[index] += sigma*(q2); Q3[index] += sigma*(0.5f*(q11 + q22)); - } + } return; } __global__ void ProjQ_2D_kernel(float *Q1, float *Q2, float *Q3, int dimX, int dimY, float alpha0) { - float grad_magn; + float grad_magn; - int i = blockDim.x * blockIdx.x + threadIdx.x; + int i = blockDim.x * blockIdx.x + threadIdx.x; int j = blockDim.y * blockIdx.y + threadIdx.y; int index = i + dimX*j; - if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY)) { - + if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY)) { grad_magn = sqrt(pow(Q1[index],2) + pow(Q2[index],2) + 2*pow(Q3[index],2)); grad_magn = grad_magn/alpha0; - if (grad_magn > 1.0) { + if (grad_magn > 1.0f) { Q1[index] /= grad_magn; Q2[index] /= grad_magn; Q3[index] /= grad_magn; - } - } + } + } return; } __global__ void DivProjP_2D_kernel(float *U, float *U0, float *P1, float *P2, int dimX, int dimY, float lambda, float tau) { - float P_v1, P_v2, div; + float P_v1, P_v2, div; - int i = blockDim.x * blockIdx.x + threadIdx.x; + int i = blockDim.x * blockIdx.x + threadIdx.x; int j = blockDim.y * blockIdx.y + threadIdx.y; int index = i + dimX*j; @@ -166,49 +161,54 @@ __global__ void DivProjP_2D_kernel(float *U, float *U0, float *P1, float *P2, in __global__ void UpdV_2D_kernel(float *V1, float *V2, float *P1, float *P2, float *Q1, float *Q2, float *Q3, int dimX, int dimY, float tau) { - float q1, q11, q2, q22, div1, div2; + float q1, q3_x, q2, q3_y, div1, div2; - int i = blockDim.x * blockIdx.x + threadIdx.x; + int i = blockDim.x * blockIdx.x + threadIdx.x; int j = blockDim.y * blockIdx.y + threadIdx.y; int index = i + dimX*j; - if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY)) { - - /* symmetric boundary conditions (Neuman) */ - if (i == 0) { - q1 = Q1[index]; - q11 = Q3[index]; - } - else { + if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY)) { + q2 = 0.0f; q3_y = 0.0f; q1 = 0.0f; q3_x = 0.0; + /* boundary conditions (Neuman) */ + if (i != 0) { q1 = Q1[index] - Q1[j*dimX+(i-1)]; - q11 = Q3[index] - Q3[j*dimX+(i-1)]; - } - if (j == 0) { - q2 = Q2[index]; - q22 = Q3[index]; + q3_x = Q3[index] - Q3[j*dimX+(i-1)]; } - else { + if (j != 0) { q2 = Q2[index] - Q2[(j-1)*dimX+i]; - q22 = Q3[index] - Q3[(j-1)*dimX+i]; + q3_y = Q3[index] - Q3[(j-1)*dimX+i]; } - div1 = q1 + q22; - div2 = q2 + q11; + div1 = q1 + q3_y; + div2 = q3_x + q2; V1[index] += tau*(P1[index] + div1); V2[index] += tau*(P2[index] + div2); - } + } return; } -__global__ void copyIm_TGV_kernel(float *Input, float* Output, int N, int M, int num_total) +__global__ void copyIm_TGV_kernel(float *U, float *U_old, int N, int M, int num_total) { int xIndex = blockDim.x * blockIdx.x + threadIdx.x; int yIndex = blockDim.y * blockIdx.y + threadIdx.y; int index = xIndex + N*yIndex; - if (index < num_total) { - Output[index] = Input[index]; + if (index < num_total) { + U_old[index] = U[index]; + } +} + +__global__ void copyIm_TGV_kernel_ar2(float *V1, float *V2, float *V1_old, float *V2_old, int N, int M, int num_total) +{ + int xIndex = blockDim.x * blockIdx.x + threadIdx.x; + int yIndex = blockDim.y * blockIdx.y + threadIdx.y; + + int index = xIndex + N*yIndex; + + if (index < num_total) { + V1_old[index] = V1[index]; + V2_old[index] = V2[index]; } } @@ -220,18 +220,276 @@ __global__ void newU_kernel(float *U, float *U_old, int N, int M, int num_total) int index = xIndex + N*yIndex; if (index < num_total) { - U[index] = 2.0f*U[index] - U_old[index]; + U[index] = 2.0f*U[index] - U_old[index]; + } +} + + +__global__ void newU_kernel_ar2(float *V1, float *V2, float *V1_old, float *V2_old, int N, int M, int num_total) +{ + int xIndex = blockDim.x * blockIdx.x + threadIdx.x; + int yIndex = blockDim.y * blockIdx.y + threadIdx.y; + + int index = xIndex + N*yIndex; + + if (index < num_total) { + V1[index] = 2.0f*V1[index] - V1_old[index]; + V2[index] = 2.0f*V2[index] - V2_old[index]; + } +} +/********************************************************************/ +/***************************3D Functions*****************************/ +/********************************************************************/ +__global__ void DualP_3D_kernel(float *U, float *V1, float *V2, float *V3, float *P1, float *P2, float *P3, int dimX, int dimY, int dimZ, float sigma) +{ + int index; + int i = blockDim.x * blockIdx.x + threadIdx.x; + int j = blockDim.y * blockIdx.y + threadIdx.y; + int k = blockDim.z * blockIdx.z + threadIdx.z; + + if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) { + + index = (dimX*dimY)*k + j*dimX+i; + /* symmetric boundary conditions (Neuman) */ + if (i == dimX-1) P1[index] += sigma*((U[(dimX*dimY)*k + j*dimX+(i-1)] - U[index]) - V1[index]); + else P1[index] += sigma*((U[(dimX*dimY)*k + j*dimX+(i+1)] - U[index]) - V1[index]); + if (j == dimY-1) P2[index] += sigma*((U[(dimX*dimY)*k + (j-1)*dimX+i] - U[index]) - V2[index]); + else P2[index] += sigma*((U[(dimX*dimY)*k + (j+1)*dimX+i] - U[index]) - V2[index]); + if (k == dimZ-1) P3[index] += sigma*((U[(dimX*dimY)*(k-1) + j*dimX+i] - U[index]) - V3[index]); + else P3[index] += sigma*((U[(dimX*dimY)*(k+1) + j*dimX+i] - U[index]) - V3[index]); + } + return; +} + +__global__ void ProjP_3D_kernel(float *P1, float *P2, float *P3, int dimX, int dimY, int dimZ, float alpha1) +{ + float grad_magn; + int index; + + int i = blockDim.x * blockIdx.x + threadIdx.x; + int j = blockDim.y * blockIdx.y + threadIdx.y; + int k = blockDim.z * blockIdx.z + threadIdx.z; + + if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) { + index = (dimX*dimY)*k + j*dimX+i; + + grad_magn = (sqrtf(pow(P1[index],2) + pow(P2[index],2) + pow(P3[index],2)))/alpha1; + if (grad_magn > 1.0f) { + P1[index] /= grad_magn; + P2[index] /= grad_magn; + P3[index] /= grad_magn; + } + } + return; +} + +__global__ void DualQ_3D_kernel(float *V1, float *V2, float *V3, float *Q1, float *Q2, float *Q3, float *Q4, float *Q5, float *Q6, int dimX, int dimY, int dimZ, float sigma) +{ + int index; + float q1, q2, q3, q11, q22, q33, q44, q55, q66; + + int i = blockDim.x * blockIdx.x + threadIdx.x; + int j = blockDim.y * blockIdx.y + threadIdx.y; + int k = blockDim.z * blockIdx.z + threadIdx.z; + + if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) { + + index = (dimX*dimY)*k + j*dimX+i; + q1 = 0.0f; q11 = 0.0f; q33 = 0.0f; q2 = 0.0f; q22 = 0.0f; q55 = 0.0f; q3 = 0.0f; q44 = 0.0f; q66 = 0.0f; + /* symmetric boundary conditions (Neuman) */ + if (i != dimX-1){ + q1 = V1[(dimX*dimY)*k + j*dimX+(i+1)] - V1[index]; + q11 = V2[(dimX*dimY)*k + j*dimX+(i+1)] - V2[index]; + q33 = V3[(dimX*dimY)*k + j*dimX+(i+1)] - V3[index]; + } + if (j != dimY-1) { + q2 = V2[(dimX*dimY)*k + (j+1)*dimX+i] - V2[index]; + q22 = V1[(dimX*dimY)*k + (j+1)*dimX+i] - V1[index]; + q55 = V3[(dimX*dimY)*k + (j+1)*dimX+i] - V3[index]; + } + if (k != dimZ-1) { + q3 = V3[(dimX*dimY)*(k+1) + j*dimX+i] - V3[index]; + q44 = V1[(dimX*dimY)*(k+1) + j*dimX+i] - V1[index]; + q66 = V2[(dimX*dimY)*(k+1) + j*dimX+i] - V2[index]; + } + + Q1[index] += sigma*(q1); /*Q11*/ + Q2[index] += sigma*(q2); /*Q22*/ + Q3[index] += sigma*(q3); /*Q33*/ + Q4[index] += sigma*(0.5f*(q11 + q22)); /* Q21 / Q12 */ + Q5[index] += sigma*(0.5f*(q33 + q44)); /* Q31 / Q13 */ + Q6[index] += sigma*(0.5f*(q55 + q66)); /* Q32 / Q23 */ + } + return; +} + + +__global__ void ProjQ_3D_kernel(float *Q1, float *Q2, float *Q3, float *Q4, float *Q5, float *Q6, int dimX, int dimY, int dimZ, float alpha0) +{ + float grad_magn; + int index; + + int i = blockDim.x * blockIdx.x + threadIdx.x; + int j = blockDim.y * blockIdx.y + threadIdx.y; + int k = blockDim.z * blockIdx.z + threadIdx.z; + + if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) { + + index = (dimX*dimY)*k + j*dimX+i; + + grad_magn = sqrtf(pow(Q1[index],2) + pow(Q2[index],2) + pow(Q3[index],2) + 2.0f*pow(Q4[index],2) + 2.0f*pow(Q5[index],2) + 2.0f*pow(Q6[index],2)); + grad_magn = grad_magn/alpha0; + if (grad_magn > 1.0f) { + Q1[index] /= grad_magn; + Q2[index] /= grad_magn; + Q3[index] /= grad_magn; + Q4[index] /= grad_magn; + Q5[index] /= grad_magn; + Q6[index] /= grad_magn; + } + } + return; +} +__global__ void DivProjP_3D_kernel(float *U, float *U0, float *P1, float *P2, float *P3, int dimX, int dimY, int dimZ, float lambda, float tau) +{ + float P_v1, P_v2, P_v3, div; + int index; + + int i = blockDim.x * blockIdx.x + threadIdx.x; + int j = blockDim.y * blockIdx.y + threadIdx.y; + int k = blockDim.z * blockIdx.z + threadIdx.z; + + if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) { + + index = (dimX*dimY)*k + j*dimX+i; + + if (i == 0) P_v1 = P1[index]; + else P_v1 = P1[index] - P1[(dimX*dimY)*k + j*dimX+(i-1)]; + if (j == 0) P_v2 = P2[index]; + else P_v2 = P2[index] - P2[(dimX*dimY)*k + (j-1)*dimX+i]; + if (k == 0) P_v3 = P3[index]; + else P_v3 = P3[index] - P3[(dimX*dimY)*(k-1) + (j)*dimX+i]; + + div = P_v1 + P_v2 + P_v3; + U[index] = (lambda*(U[index] + tau*div) + tau*U0[index])/(lambda + tau); + } + return; +} +__global__ void UpdV_3D_kernel(float *V1, float *V2, float *V3, float *P1, float *P2, float *P3, float *Q1, float *Q2, float *Q3, float *Q4, float *Q5, float *Q6, int dimX, int dimY, int dimZ, float tau) +{ + float q1, q4x, q5x, q2, q4y, q6y, q6z, q5z, q3, div1, div2, div3; + int index; + + int i = blockDim.x * blockIdx.x + threadIdx.x; + int j = blockDim.y * blockIdx.y + threadIdx.y; + int k = blockDim.z * blockIdx.z + threadIdx.z; + + if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) { + + index = (dimX*dimY)*k + j*dimX+i; + + q1 = 0.0f; q4x= 0.0f; q5x= 0.0f; q2= 0.0f; q4y= 0.0f; q6y= 0.0f; q6z= 0.0f; q5z= 0.0f; q3= 0.0f; + /* Q1 - Q11, Q2 - Q22, Q3 - Q33, Q4 - Q21/Q12, Q5 - Q31/Q13, Q6 - Q32/Q23*/ + /* symmetric boundary conditions (Neuman) */ + if (i != 0) { + q1 = Q1[index] - Q1[(dimX*dimY)*k + j*dimX+(i-1)]; + q4x = Q4[index] - Q4[(dimX*dimY)*k + j*dimX+(i-1)]; + q5x = Q5[index] - Q5[(dimX*dimY)*k + j*dimX+(i-1)]; + } + if (j != 0) { + q2 = Q2[index] - Q2[(dimX*dimY)*k + (j-1)*dimX+i]; + q4y = Q4[index] - Q4[(dimX*dimY)*k + (j-1)*dimX+i]; + q6y = Q6[index] - Q6[(dimX*dimY)*k + (j-1)*dimX+i]; + } + if (k != 0) { + q6z = Q6[index] - Q6[(dimX*dimY)*(k-1) + (j)*dimX+i]; + q5z = Q5[index] - Q5[(dimX*dimY)*(k-1) + (j)*dimX+i]; + q3 = Q3[index] - Q3[(dimX*dimY)*(k-1) + (j)*dimX+i]; + } + div1 = q1 + q4y + q5z; + div2 = q4x + q2 + q6z; + div3 = q5x + q6y + q3; + + V1[index] += tau*(P1[index] + div1); + V2[index] += tau*(P2[index] + div2); + V3[index] += tau*(P3[index] + div3); + } + return; +} + +__global__ void copyIm_TGV_kernel3D(float *U, float *U_old, int dimX, int dimY, int dimZ, int num_total) +{ + int index; + + int i = blockDim.x * blockIdx.x + threadIdx.x; + int j = blockDim.y * blockIdx.y + threadIdx.y; + int k = blockDim.z * blockIdx.z + threadIdx.z; + + index = (dimX*dimY)*k + j*dimX+i; + + if (index < num_total) { + U_old[index] = U[index]; } } +__global__ void copyIm_TGV_kernel3D_ar3(float *V1, float *V2, float *V3, float *V1_old, float *V2_old, float *V3_old, int dimX, int dimY, int dimZ, int num_total) +{ + int index; + + int i = blockDim.x * blockIdx.x + threadIdx.x; + int j = blockDim.y * blockIdx.y + threadIdx.y; + int k = blockDim.z * blockIdx.z + threadIdx.z; + + index = (dimX*dimY)*k + j*dimX+i; + + if (index < num_total) { + V1_old[index] = V1[index]; + V2_old[index] = V2[index]; + V3_old[index] = V3[index]; + } +} + +__global__ void newU_kernel3D(float *U, float *U_old, int dimX, int dimY, int dimZ, int num_total) +{ + int index; + + int i = blockDim.x * blockIdx.x + threadIdx.x; + int j = blockDim.y * blockIdx.y + threadIdx.y; + int k = blockDim.z * blockIdx.z + threadIdx.z; + + index = (dimX*dimY)*k + j*dimX+i; + + if (index < num_total) { + U[index] = 2.0f*U[index] - U_old[index]; + } +} + +__global__ void newU_kernel3D_ar3(float *V1, float *V2, float *V3, float *V1_old, float *V2_old, float *V3_old, int dimX, int dimY, int dimZ, int num_total) +{ + int index; + + int i = blockDim.x * blockIdx.x + threadIdx.x; + int j = blockDim.y * blockIdx.y + threadIdx.y; + int k = blockDim.z * blockIdx.z + threadIdx.z; + + index = (dimX*dimY)*k + j*dimX+i; + + if (index < num_total) { + V1[index] = 2.0f*V1[index] - V1_old[index]; + V2[index] = 2.0f*V2[index] - V2_old[index]; + V3[index] = 2.0f*V3[index] - V3_old[index]; + } +} + /*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/ -/********************* MAIN HOST FUNCTION ******************/ +/************************ MAIN HOST FUNCTION ***********************/ /*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/ -extern "C" int TGV_GPU_main(float *U0, float *U, float lambda, float alpha1, float alpha0, int iterationsNumb, float L2, int dimX, int dimY) +extern "C" int TGV_GPU_main(float *U0, float *U, float lambda, float alpha1, float alpha0, int iterationsNumb, float L2, int dimX, int dimY, int dimZ) { - int dimTotal, dev = 0; - CHECK(cudaSetDevice(dev)); - dimTotal = dimX*dimY; + int dimTotal, dev = 0; + CHECK(cudaSetDevice(dev)); + + dimTotal = dimX*dimY*dimZ; float *U_old, *d_U0, *d_U, *P1, *P2, *Q1, *Q2, *Q3, *V1, *V1_old, *V2, *V2_old, tau, sigma; tau = pow(L2,-0.5); @@ -254,16 +512,17 @@ extern "C" int TGV_GPU_main(float *U0, float *U, float lambda, float alpha1, flo CHECK(cudaMemcpy(d_U0,U0,dimTotal*sizeof(float),cudaMemcpyHostToDevice)); CHECK(cudaMemcpy(d_U,U0,dimTotal*sizeof(float),cudaMemcpyHostToDevice)); - /*2D case */ + if (dimZ == 1) { + /*2D case */ dim3 dimBlock(BLKXSIZE2D,BLKYSIZE2D); dim3 dimGrid(idivup(dimX,BLKXSIZE2D), idivup(dimY,BLKYSIZE2D)); for(int n=0; n < iterationsNumb; n++) { - /* Calculate Dual Variable P */ + /* Calculate Dual Variable P */ DualP_2D_kernel<<<dimGrid,dimBlock>>>(d_U, V1, V2, P1, P2, dimX, dimY, sigma); - CHECK(cudaDeviceSynchronize()); - /*Projection onto convex set for P*/ + CHECK(cudaDeviceSynchronize()); + /*Projection onto convex set for P*/ ProjP_2D_kernel<<<dimGrid,dimBlock>>>(P1, P2, dimX, dimY, alpha1); CHECK(cudaDeviceSynchronize()); /* Calculate Dual Variable Q */ @@ -282,18 +541,72 @@ extern "C" int TGV_GPU_main(float *U0, float *U, float lambda, float alpha1, flo newU_kernel<<<dimGrid,dimBlock>>>(d_U, U_old, dimX, dimY, dimTotal); CHECK(cudaDeviceSynchronize()); /*saving V into V_old*/ - copyIm_TGV_kernel<<<dimGrid,dimBlock>>>(V1, V1_old, dimX, dimY, dimTotal); - copyIm_TGV_kernel<<<dimGrid,dimBlock>>>(V2, V2_old, dimX, dimY, dimTotal); + copyIm_TGV_kernel_ar2<<<dimGrid,dimBlock>>>(V1, V2, V1_old, V2_old, dimX, dimY, dimTotal); CHECK(cudaDeviceSynchronize()); /* upd V*/ UpdV_2D_kernel<<<dimGrid,dimBlock>>>(V1, V2, P1, P2, Q1, Q2, Q3, dimX, dimY, tau); CHECK(cudaDeviceSynchronize()); /*get new V*/ - newU_kernel<<<dimGrid,dimBlock>>>(V1, V1_old, dimX, dimY, dimTotal); - newU_kernel<<<dimGrid,dimBlock>>>(V2, V2_old, dimX, dimY, dimTotal); + newU_kernel_ar2<<<dimGrid,dimBlock>>>(V1, V2, V1_old, V2_old, dimX, dimY, dimTotal); CHECK(cudaDeviceSynchronize()); + } } - + else { + /*3D case */ + dim3 dimBlock(BLKXSIZE,BLKYSIZE,BLKZSIZE); + dim3 dimGrid(idivup(dimX,BLKXSIZE), idivup(dimY,BLKYSIZE),idivup(dimZ,BLKXSIZE)); + + float *P3, *Q4, *Q5, *Q6, *V3, *V3_old; + + CHECK(cudaMalloc((void**)&P3,dimTotal*sizeof(float))); + CHECK(cudaMalloc((void**)&Q4,dimTotal*sizeof(float))); + CHECK(cudaMalloc((void**)&Q5,dimTotal*sizeof(float))); + CHECK(cudaMalloc((void**)&Q6,dimTotal*sizeof(float))); + CHECK(cudaMalloc((void**)&V3,dimTotal*sizeof(float))); + CHECK(cudaMalloc((void**)&V3_old,dimTotal*sizeof(float))); + + for(int n=0; n < iterationsNumb; n++) { + + /* Calculate Dual Variable P */ + DualP_3D_kernel<<<dimGrid,dimBlock>>>(d_U, V1, V2, V3, P1, P2, P3, dimX, dimY, dimZ, sigma); + CHECK(cudaDeviceSynchronize()); + /*Projection onto convex set for P*/ + ProjP_3D_kernel<<<dimGrid,dimBlock>>>(P1, P2, P3, dimX, dimY, dimZ, alpha1); + CHECK(cudaDeviceSynchronize()); + /* Calculate Dual Variable Q */ + DualQ_3D_kernel<<<dimGrid,dimBlock>>>(V1, V2, V3, Q1, Q2, Q3, Q4, Q5, Q6, dimX, dimY, dimZ, sigma); + CHECK(cudaDeviceSynchronize()); + /*Projection onto convex set for Q*/ + ProjQ_3D_kernel<<<dimGrid,dimBlock>>>(Q1, Q2, Q3, Q4, Q5, Q6, dimX, dimY, dimZ, alpha0); + CHECK(cudaDeviceSynchronize()); + /*saving U into U_old*/ + copyIm_TGV_kernel3D<<<dimGrid,dimBlock>>>(d_U, U_old, dimX, dimY, dimZ, dimTotal); + CHECK(cudaDeviceSynchronize()); + /*adjoint operation -> divergence and projection of P*/ + DivProjP_3D_kernel<<<dimGrid,dimBlock>>>(d_U, d_U0, P1, P2, P3, dimX, dimY, dimZ, lambda, tau); + CHECK(cudaDeviceSynchronize()); + /*get updated solution U*/ + newU_kernel3D<<<dimGrid,dimBlock>>>(d_U, U_old, dimX, dimY, dimZ, dimTotal); + CHECK(cudaDeviceSynchronize()); + /*saving V into V_old*/ + copyIm_TGV_kernel3D_ar3<<<dimGrid,dimBlock>>>(V1, V2, V3, V1_old, V2_old, V3_old, dimX, dimY, dimZ, dimTotal); + CHECK(cudaDeviceSynchronize()); + /* upd V*/ + UpdV_3D_kernel<<<dimGrid,dimBlock>>>(V1, V2, V3, P1, P2, P3, Q1, Q2, Q3, Q4, Q5, Q6, dimX, dimY, dimZ, tau); + CHECK(cudaDeviceSynchronize()); + /*get new V*/ + newU_kernel3D_ar3<<<dimGrid,dimBlock>>>(V1, V2, V3, V1_old, V2_old, V3_old, dimX, dimY, dimZ, dimTotal); + CHECK(cudaDeviceSynchronize()); + } + + CHECK(cudaFree(Q4)); + CHECK(cudaFree(Q5)); + CHECK(cudaFree(Q6)); + CHECK(cudaFree(P3)); + CHECK(cudaFree(V3)); + CHECK(cudaFree(V3_old)); + } + CHECK(cudaMemcpy(U,d_U,dimTotal*sizeof(float),cudaMemcpyDeviceToHost)); CHECK(cudaFree(d_U0)); CHECK(cudaFree(d_U)); diff --git a/Core/regularisers_GPU/TGV_GPU_core.h b/Core/regularisers_GPU/TGV_GPU_core.h index 5a4eb76..9f73d1c 100644 --- a/Core/regularisers_GPU/TGV_GPU_core.h +++ b/Core/regularisers_GPU/TGV_GPU_core.h @@ -3,6 +3,6 @@ #include "CCPiDefines.h" #include <stdio.h> -extern "C" CCPI_EXPORT int TGV_GPU_main(float *U0, float *U, float lambda, float alpha1, float alpha0, int iterationsNumb, float L2, int dimX, int dimY); +extern "C" CCPI_EXPORT int TGV_GPU_main(float *U0, float *U, float lambda, float alpha1, float alpha0, int iterationsNumb, float L2, int dimX, int dimY, int dimZ); #endif @@ -2,9 +2,9 @@ -| Master | Development | -|--------|-------------| -| [![Build Status](https://anvil.softeng-support.ac.uk/jenkins/buildStatus/icon?job=CILsingle/CCPi-Regularisation-Toolkit)](https://anvil.softeng-support.ac.uk/jenkins/job/CILsingle/job/CCPi-Regularisation-Toolkit/) | [![Build Status](https://anvil.softeng-support.ac.uk/jenkins/buildStatus/icon?job=CILsingle/CCPi-Regularisation-Toolkit-dev)](https://anvil.softeng-support.ac.uk/jenkins/job/CILsingle/job/CCPi-Regularisation-Toolkit-dev/) | +| Master | Development | Anaconda binaries | +|--------|-------------|-------------------| +| [![Build Status](https://anvil.softeng-support.ac.uk/jenkins/buildStatus/icon?job=CILsingle/CCPi-Regularisation-Toolkit)](https://anvil.softeng-support.ac.uk/jenkins/job/CILsingle/job/CCPi-Regularisation-Toolkit/) | [![Build Status](https://anvil.softeng-support.ac.uk/jenkins/buildStatus/icon?job=CILsingle/CCPi-Regularisation-Toolkit-dev)](https://anvil.softeng-support.ac.uk/jenkins/job/CILsingle/job/CCPi-Regularisation-Toolkit-dev/) | ![conda version](https://anaconda.org/ccpi/ccpi-regulariser/badges/version.svg) ![conda last release](https://anaconda.org/ccpi/ccpi-regulariser/badges/latest_release_date.svg) [![conda platforms](https://anaconda.org/ccpi/ccpi-regulariser/badges/platforms.svg) ![conda dowloads](https://anaconda.org/ccpi/ccpi-regulariser/badges/downloads.svg)](https://anaconda.org/ccpi/ccpi-regulariser) | **Iterative image reconstruction (IIR) methods normally require regularisation to stabilise the convergence and make the reconstruction problem (inverse problem) more well-posed. The CCPi-RGL software provides 2D/3D and multi-channel regularisation strategies to ensure better performance of IIR methods. The regularisation modules are well-suited to use with [splitting algorithms](https://en.wikipedia.org/wiki/Augmented_Lagrangian_method#Alternating_direction_method_of_multipliers), such as, [ADMM](https://github.com/dkazanc/ADMM-tomo) and [FISTA](https://github.com/dkazanc/FISTA-tomo). Furthermore, the toolkit can be used for simpler inversion tasks, such as, image denoising, inpaiting, deconvolution etc. The core modules are written in C-OMP and CUDA languages and wrappers for Matlab and Python are provided.** @@ -33,7 +33,7 @@ 1. Rudin-Osher-Fatemi (ROF) Total Variation (explicit PDE minimisation scheme) **2D/3D CPU/GPU** (Ref. *1*) 2. Fast-Gradient-Projection (FGP) Total Variation **2D/3D CPU/GPU** (Ref. *2*) 3. Split-Bregman (SB) Total Variation **2D/3D CPU/GPU** (Ref. *5*) -4. Total Generalised Variation (TGV) model for higher-order regularisation **2D CPU/GPU** (Ref. *6*) +4. Total Generalised Variation (TGV) model for higher-order regularisation **2D/3D CPU/GPU** (Ref. *6*) 5. Linear and nonlinear diffusion (explicit PDE minimisation scheme) **2D/3D CPU/GPU** (Ref. *8*) 6. Anisotropic Fourth-Order Diffusion (explicit PDE minimisation) **2D/3D CPU/GPU** (Ref. *9*) 7. A joint ROF-LLT (Lysaker-Lundervold-Tai) model for higher-order regularisation **2D/3D CPU/GPU** (Ref. *10,11*) @@ -93,7 +93,7 @@ conda install ccpi-regulariser -c ccpi -c conda-forge #### Python (conda-build) ``` - export CIL_VERSION=0.10.3 + export CIL_VERSION=0.10.4 conda build Wrappers/Python/conda-recipe --numpy 1.12 --python 3.5 conda install ccpi-regulariser=${CIL_VERSION} --use-local --force cd demos/ diff --git a/Wrappers/Matlab/demos/demoMatlab_3Ddenoise.m b/Wrappers/Matlab/demos/demoMatlab_3Ddenoise.m index 5cc47b3..0c331a4 100644 --- a/Wrappers/Matlab/demos/demoMatlab_3Ddenoise.m +++ b/Wrappers/Matlab/demos/demoMatlab_3Ddenoise.m @@ -2,11 +2,13 @@ clear; close all Path1 = sprintf(['..' filesep 'mex_compile' filesep 'installed'], 1i); Path2 = sprintf(['..' filesep '..' filesep '..' filesep 'data' filesep], 1i); +Path3 = sprintf(['..' filesep 'supp'], 1i); addpath(Path1); addpath(Path2); +addpath(Path3); N = 512; -slices = 15; +slices = 7; vol3D = zeros(N,N,slices, 'single'); Ideal3D = zeros(N,N,slices, 'single'); Im = double(imread('lena_gray_512.tif'))/255; % loading image @@ -131,7 +133,16 @@ figure; imshow(u_diff4(:,:,7), [0 1]); title('Diffusion 4thO denoised volume (CP % fprintf('%s %f \n', 'RMSE error for Anis.Diff of 4th order is:', rmse_diff4); % figure; imshow(u_diff4_g(:,:,7), [0 1]); title('Diffusion 4thO denoised volume (GPU)'); %% - +fprintf('Denoise using the TGV model (CPU) \n'); +lambda_TGV = 0.03; % regularisation parameter +alpha1 = 1.0; % parameter to control the first-order term +alpha0 = 2.0; % parameter to control the second-order term +iter_TGV = 500; % number of Primal-Dual iterations for TGV +tic; u_tgv = TGV(single(vol3D), lambda_TGV, alpha1, alpha0, iter_TGV); toc; +rmseTGV = RMSE(Ideal3D(:),u_tgv(:)); +fprintf('%s %f \n', 'RMSE error for TGV is:', rmseTGV); +figure; imshow(u_tgv(:,:,3), [0 1]); title('TGV denoised volume (CPU)'); +%% %>>>>>>>>>>>>>> MULTI-CHANNEL priors <<<<<<<<<<<<<<< % fprintf('Denoise a volume using the FGP-dTV model (CPU) \n'); diff --git a/Wrappers/Matlab/demos/demoMatlab_denoise.m b/Wrappers/Matlab/demos/demoMatlab_denoise.m index 3506cca..14d3096 100644 --- a/Wrappers/Matlab/demos/demoMatlab_denoise.m +++ b/Wrappers/Matlab/demos/demoMatlab_denoise.m @@ -60,20 +60,20 @@ figure; imshow(u_sb, [0 1]); title('SB-TV denoised image (CPU)'); % figure; imshow(u_sbG, [0 1]); title('SB-TV denoised image (GPU)'); %% fprintf('Denoise using the TGV model (CPU) \n'); -lambda_TGV = 0.04; % regularisation parameter -alpha1 = 1; % parameter to control the first-order term -alpha0 = 0.7; % parameter to control the second-order term -iter_TGV = 500; % number of Primal-Dual iterations for TGV +lambda_TGV = 0.045; % regularisation parameter +alpha1 = 1.0; % parameter to control the first-order term +alpha0 = 2.0; % parameter to control the second-order term +iter_TGV = 2000; % number of Primal-Dual iterations for TGV tic; u_tgv = TGV(single(u0), lambda_TGV, alpha1, alpha0, iter_TGV); toc; rmseTGV = (RMSE(u_tgv(:),Im(:))); fprintf('%s %f \n', 'RMSE error for TGV is:', rmseTGV); figure; imshow(u_tgv, [0 1]); title('TGV denoised image (CPU)'); %% % fprintf('Denoise using the TGV model (GPU) \n'); -% lambda_TGV = 0.04; % regularisation parameter -% alpha1 = 1; % parameter to control the first-order term -% alpha0 = 0.7; % parameter to control the second-order term -% iter_TGV = 500; % number of Primal-Dual iterations for TGV +% lambda_TGV = 0.045; % regularisation parameter +% alpha1 = 1.0; % parameter to control the first-order term +% alpha0 = 2.0; % parameter to control the second-order term +% iter_TGV = 2000; % number of Primal-Dual iterations for TGV % tic; u_tgv_gpu = TGV_GPU(single(u0), lambda_TGV, alpha1, alpha0, iter_TGV); toc; % rmseTGV_gpu = (RMSE(u_tgv_gpu(:),Im(:))); % fprintf('%s %f \n', 'RMSE error for TGV is:', rmseTGV_gpu); diff --git a/Wrappers/Matlab/mex_compile/regularisers_CPU/TGV.c b/Wrappers/Matlab/mex_compile/regularisers_CPU/TGV.c index 5459bf5..aa4eed4 100644 --- a/Wrappers/Matlab/mex_compile/regularisers_CPU/TGV.c +++ b/Wrappers/Matlab/mex_compile/regularisers_CPU/TGV.c @@ -21,14 +21,14 @@ limitations under the License. #include "TGV_core.h" /* C-OMP implementation of Primal-Dual denoising method for - * Total Generilized Variation (TGV)-L2 model [1] (2D case only) + * Total Generilized Variation (TGV)-L2 model [1] (2D/3D) * * Input Parameters: - * 1. Noisy image (2D) (required) - * 2. lambda - regularisation parameter (required) - * 3. parameter to control the first-order term (alpha1) (default - 1) - * 4. parameter to control the second-order term (alpha0) (default - 0.5) - * 5. Number of Chambolle-Pock (Primal-Dual) iterations (default is 300) + * 1. Noisy image/volume (2D/3D) + * 2. lambda - regularisation parameter + * 3. parameter to control the first-order term (alpha1) + * 4. parameter to control the second-order term (alpha0) + * 5. Number of Chambolle-Pock (Primal-Dual) iterations * 6. Lipshitz constant (default is 12) * * Output: @@ -44,7 +44,7 @@ void mexFunction( { int number_of_dims, iter; - mwSize dimX, dimY; + mwSize dimX, dimY, dimZ; const mwSize *dim_array; float *Input, *Output=NULL, lambda, alpha0, alpha1, L2; @@ -55,7 +55,7 @@ void mexFunction( /*Handling Matlab input data*/ if ((nrhs < 2) || (nrhs > 6)) mexErrMsgTxt("At least 2 parameters is required, all parameters are: Image(2D), Regularisation parameter, alpha0, alpha1, iterations number, Lipshitz Constant"); - Input = (float *) mxGetData(prhs[0]); /*noisy image (2D) */ + Input = (float *) mxGetData(prhs[0]); /*noisy image/volume */ lambda = (float) mxGetScalar(prhs[1]); /* regularisation parameter */ alpha1 = 1.0f; /* parameter to control the first-order term */ alpha0 = 0.5f; /* parameter to control the second-order term */ @@ -69,12 +69,15 @@ void mexFunction( if (nrhs == 6) L2 = (float) mxGetScalar(prhs[5]); /* Lipshitz constant */ /*Handling Matlab output data*/ - dimX = dim_array[0]; dimY = dim_array[1]; + dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2]; if (number_of_dims == 2) { - Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); - /* running the function */ - TGV_main(Input, Output, lambda, alpha1, alpha0, iter, L2, dimX, dimY); + dimZ = 1; /*2D case*/ + Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); } - if (number_of_dims == 3) {mexErrMsgTxt("Only 2D images accepted");} + if (number_of_dims == 3) { + Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); + } + /* running the function */ + TGV_main(Input, Output, lambda, alpha1, alpha0, iter, L2, dimX, dimY, dimZ); } diff --git a/Wrappers/Python/conda-recipe/conda_build_config.yaml b/Wrappers/Python/conda-recipe/conda_build_config.yaml index b7977f3..d6a3915 100644 --- a/Wrappers/Python/conda-recipe/conda_build_config.yaml +++ b/Wrappers/Python/conda-recipe/conda_build_config.yaml @@ -2,6 +2,8 @@ python: - 2.7 # [not win] - 3.5 - 3.6 + - 3.7 numpy: - 1.12 + - 1.14 - 1.15 diff --git a/Wrappers/Python/conda-recipe/run_test.py b/Wrappers/Python/conda-recipe/run_test.py index cfb3f53..21f3216 100755 --- a/Wrappers/Python/conda-recipe/run_test.py +++ b/Wrappers/Python/conda-recipe/run_test.py @@ -2,7 +2,7 @@ import unittest import numpy as np
import os
import timeit
-from ccpi.filters.regularisers import ROF_TV, FGP_TV, SB_TV, TGV, LLT_ROF, FGP_dTV, NDF, DIFF4th
+from ccpi.filters.regularisers import ROF_TV, FGP_TV, SB_TV, TGV, LLT_ROF, FGP_dTV, NDF, Diff4th
from PIL import Image
class TiffReader(object):
@@ -303,7 +303,7 @@ class TestRegularisers(unittest.TestCase): 'input' : u0,\
'regularisation_parameter':0.04, \
'alpha1':1.0,\
- 'alpha0':0.7,\
+ 'alpha0':2.0,\
'number_of_iterations' :250 ,\
'LipshitzConstant' :12 ,\
}
@@ -530,7 +530,7 @@ class TestRegularisers(unittest.TestCase): print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
# set parameters
- pars = {'algorithm' : DIFF4th, \
+ pars = {'algorithm' : Diff4th, \
'input' : u0,\
'regularisation_parameter':3.5, \
'edge_parameter':0.02,\
@@ -540,7 +540,7 @@ class TestRegularisers(unittest.TestCase): print ("#############Diff4th CPU####################")
start_time = timeit.default_timer()
- diff4th_cpu = DIFF4th(pars['input'],
+ diff4th_cpu = Diff4th(pars['input'],
pars['regularisation_parameter'],
pars['edge_parameter'],
pars['number_of_iterations'],
@@ -555,7 +555,7 @@ class TestRegularisers(unittest.TestCase): print ("##############Diff4th GPU##################")
start_time = timeit.default_timer()
try:
- diff4th_gpu = DIFF4th(pars['input'],
+ diff4th_gpu = Diff4th(pars['input'],
pars['regularisation_parameter'],
pars['edge_parameter'],
pars['number_of_iterations'],
@@ -565,7 +565,7 @@ class TestRegularisers(unittest.TestCase): self.skipTest("Results not comparable. GPU computing error.")
rms = rmse(Im, diff4th_gpu)
pars['rmse'] = rms
- pars['algorithm'] = DIFF4th
+ pars['algorithm'] = Diff4th
txtstr = printParametersToString(pars)
txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
print (txtstr)
diff --git a/Wrappers/Python/demos/demo_cpu_regularisers.py b/Wrappers/Python/demos/demo_cpu_regularisers.py index 78e9aff..e6befa9 100644 --- a/Wrappers/Python/demos/demo_cpu_regularisers.py +++ b/Wrappers/Python/demos/demo_cpu_regularisers.py @@ -12,7 +12,7 @@ import matplotlib.pyplot as plt import numpy as np import os import timeit -from ccpi.filters.regularisers import ROF_TV, FGP_TV, SB_TV, TGV, LLT_ROF, FGP_dTV, TNV, NDF, DIFF4th +from ccpi.filters.regularisers import ROF_TV, FGP_TV, SB_TV, TGV, LLT_ROF, FGP_dTV, TNV, NDF, Diff4th from ccpi.filters.regularisers import PatchSelect, NLTV from qualitymetrics import rmse ############################################################################### @@ -225,8 +225,8 @@ pars = {'algorithm' : TGV, \ 'input' : u0,\ 'regularisation_parameter':0.04, \ 'alpha1':1.0,\ - 'alpha0':0.7,\ - 'number_of_iterations' :250 ,\ + 'alpha0':2.0,\ + 'number_of_iterations' :1350 ,\ 'LipshitzConstant' :12 ,\ } @@ -358,13 +358,13 @@ print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") ## plot fig = plt.figure() -plt.suptitle('Performance of DIFF4th regulariser using the CPU') +plt.suptitle('Performance of Diff4th regulariser using the CPU') a=fig.add_subplot(1,2,1) a.set_title('Noisy Image') imgplot = plt.imshow(u0,cmap="gray") # set parameters -pars = {'algorithm' : DIFF4th, \ +pars = {'algorithm' : Diff4th, \ 'input' : u0,\ 'regularisation_parameter':3.5, \ 'edge_parameter':0.02,\ @@ -372,9 +372,9 @@ pars = {'algorithm' : DIFF4th, \ 'time_marching_parameter':0.0015 } -print ("#############DIFF4th CPU################") +print ("#############Diff4th CPU################") start_time = timeit.default_timer() -diff4_cpu = DIFF4th(pars['input'], +diff4_cpu = Diff4th(pars['input'], pars['regularisation_parameter'], pars['edge_parameter'], pars['number_of_iterations'], diff --git a/Wrappers/Python/demos/demo_cpu_regularisers3D.py b/Wrappers/Python/demos/demo_cpu_regularisers3D.py index 9c28de1..2d2fc22 100644 --- a/Wrappers/Python/demos/demo_cpu_regularisers3D.py +++ b/Wrappers/Python/demos/demo_cpu_regularisers3D.py @@ -12,7 +12,7 @@ import matplotlib.pyplot as plt import numpy as np import os import timeit -from ccpi.filters.regularisers import ROF_TV, FGP_TV, SB_TV, LLT_ROF, FGP_dTV, NDF, DIFF4th +from ccpi.filters.regularisers import ROF_TV, FGP_TV, SB_TV, TGV, LLT_ROF, FGP_dTV, NDF, Diff4th from qualitymetrics import rmse ############################################################################### def printParametersToString(pars): @@ -68,7 +68,7 @@ Im2[:,0:M] = Im[:,0:M] Im = Im2 del Im2 """ -slices = 20 +slices = 15 noisyVol = np.zeros((slices,N,M),dtype='float32') noisyRef = np.zeros((slices,N,M),dtype='float32') @@ -96,7 +96,7 @@ pars = {'algorithm': ROF_TV, \ 'input' : noisyVol,\ 'regularisation_parameter':0.04,\ 'number_of_iterations': 500,\ - 'time_marching_parameter': 0.0025 + 'time_marching_parameter': 0.0025 } print ("#############ROF TV CPU####################") start_time = timeit.default_timer() @@ -264,6 +264,54 @@ plt.title('{}'.format('Recovered volume on the CPU using LLT-ROF')) #%% print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") +print ("_______________TGV (3D)_________________") +print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") + +## plot +fig = plt.figure() +plt.suptitle('Performance of TGV regulariser using the CPU') +a=fig.add_subplot(1,2,1) +a.set_title('Noisy Image') +imgplot = plt.imshow(noisyVol[10,:,:],cmap="gray") + +# set parameters +pars = {'algorithm' : TGV, \ + 'input' : noisyVol,\ + 'regularisation_parameter':0.04, \ + 'alpha1':1.0,\ + 'alpha0':2.0,\ + 'number_of_iterations' :250 ,\ + 'LipshitzConstant' :12 ,\ + } + +print ("#############TGV CPU####################") +start_time = timeit.default_timer() +tgv_cpu3D = TGV(pars['input'], + pars['regularisation_parameter'], + pars['alpha1'], + pars['alpha0'], + pars['number_of_iterations'], + pars['LipshitzConstant'],'cpu') + + +rms = rmse(idealVol, tgv_cpu3D) +pars['rmse'] = rms + +txtstr = printParametersToString(pars) +txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time) +print (txtstr) +a=fig.add_subplot(1,2,2) + +# these are matplotlib.patch.Patch properties +props = dict(boxstyle='round', facecolor='wheat', alpha=0.75) +# place a text box in upper left in axes coords +a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14, + verticalalignment='top', bbox=props) +imgplot = plt.imshow(tgv_cpu3D[10,:,:], cmap="gray") +plt.title('{}'.format('Recovered volume on the CPU using TGV')) + +#%% +print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") print ("________________NDF (3D)___________________") print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") @@ -322,7 +370,7 @@ a.set_title('Noisy volume') imgplot = plt.imshow(noisyVol[10,:,:],cmap="gray") # set parameters -pars = {'algorithm' : DIFF4th, \ +pars = {'algorithm' : Diff4th, \ 'input' : noisyVol,\ 'regularisation_parameter':3.5, \ 'edge_parameter':0.02,\ @@ -330,9 +378,9 @@ pars = {'algorithm' : DIFF4th, \ 'time_marching_parameter':0.0015 } -print ("#############DIFF4th CPU################") +print ("#############Diff4th CPU################") start_time = timeit.default_timer() -diff4th_cpu3D = DIFF4th(pars['input'], +diff4th_cpu3D = Diff4th(pars['input'], pars['regularisation_parameter'], pars['edge_parameter'], pars['number_of_iterations'], diff --git a/Wrappers/Python/demos/demo_cpu_vs_gpu_regularisers.py b/Wrappers/Python/demos/demo_cpu_vs_gpu_regularisers.py index 6529b5c..230a761 100644 --- a/Wrappers/Python/demos/demo_cpu_vs_gpu_regularisers.py +++ b/Wrappers/Python/demos/demo_cpu_vs_gpu_regularisers.py @@ -12,7 +12,7 @@ import matplotlib.pyplot as plt import numpy as np import os import timeit -from ccpi.filters.regularisers import ROF_TV, FGP_TV, SB_TV, TGV, LLT_ROF, FGP_dTV, NDF, DIFF4th +from ccpi.filters.regularisers import ROF_TV, FGP_TV, SB_TV, TGV, LLT_ROF, FGP_dTV, NDF, Diff4th from ccpi.filters.regularisers import PatchSelect from qualitymetrics import rmse ############################################################################### @@ -323,8 +323,8 @@ pars = {'algorithm' : TGV, \ 'input' : u0,\ 'regularisation_parameter':0.04, \ 'alpha1':1.0,\ - 'alpha0':0.7,\ - 'number_of_iterations' :250 ,\ + 'alpha0':2.0,\ + 'number_of_iterations' :400 ,\ 'LipshitzConstant' :12 ,\ } @@ -570,7 +570,7 @@ a.set_title('Noisy Image') imgplot = plt.imshow(u0,cmap="gray") # set parameters -pars = {'algorithm' : DIFF4th, \ +pars = {'algorithm' : Diff4th, \ 'input' : u0,\ 'regularisation_parameter':3.5, \ 'edge_parameter':0.02,\ @@ -580,7 +580,7 @@ pars = {'algorithm' : DIFF4th, \ print ("#############Diff4th CPU####################") start_time = timeit.default_timer() -diff4th_cpu = DIFF4th(pars['input'], +diff4th_cpu = Diff4th(pars['input'], pars['regularisation_parameter'], pars['edge_parameter'], pars['number_of_iterations'], @@ -604,7 +604,7 @@ plt.title('{}'.format('CPU results')) print ("##############Diff4th GPU##################") start_time = timeit.default_timer() -diff4th_gpu = DIFF4th(pars['input'], +diff4th_gpu = Diff4th(pars['input'], pars['regularisation_parameter'], pars['edge_parameter'], pars['number_of_iterations'], @@ -612,7 +612,7 @@ diff4th_gpu = DIFF4th(pars['input'], rms = rmse(Im, diff4th_gpu) pars['rmse'] = rms -pars['algorithm'] = DIFF4th +pars['algorithm'] = Diff4th txtstr = printParametersToString(pars) txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time) print (txtstr) diff --git a/Wrappers/Python/demos/demo_gpu_regularisers.py b/Wrappers/Python/demos/demo_gpu_regularisers.py index 2ada559..e1c6575 100644 --- a/Wrappers/Python/demos/demo_gpu_regularisers.py +++ b/Wrappers/Python/demos/demo_gpu_regularisers.py @@ -12,7 +12,7 @@ import matplotlib.pyplot as plt import numpy as np import os import timeit -from ccpi.filters.regularisers import ROF_TV, FGP_TV, SB_TV, TGV, LLT_ROF, FGP_dTV, NDF, DIFF4th +from ccpi.filters.regularisers import ROF_TV, FGP_TV, SB_TV, TGV, LLT_ROF, FGP_dTV, NDF, Diff4th from ccpi.filters.regularisers import PatchSelect, NLTV from qualitymetrics import rmse ############################################################################### @@ -223,8 +223,8 @@ pars = {'algorithm' : TGV, \ 'input' : u0,\ 'regularisation_parameter':0.04, \ 'alpha1':1.0,\ - 'alpha0':0.7,\ - 'number_of_iterations' :250 ,\ + 'alpha0':2.0,\ + 'number_of_iterations' :1250 ,\ 'LipshitzConstant' :12 ,\ } @@ -355,13 +355,13 @@ print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") ## plot fig = plt.figure() -plt.suptitle('Performance of DIFF4th regulariser using the GPU') +plt.suptitle('Performance of Diff4th regulariser using the GPU') a=fig.add_subplot(1,2,1) a.set_title('Noisy Image') imgplot = plt.imshow(u0,cmap="gray") # set parameters -pars = {'algorithm' : DIFF4th, \ +pars = {'algorithm' : Diff4th, \ 'input' : u0,\ 'regularisation_parameter':3.5, \ 'edge_parameter':0.02,\ @@ -371,7 +371,7 @@ pars = {'algorithm' : DIFF4th, \ print ("#############DIFF4th CPU################") start_time = timeit.default_timer() -diff4_gpu = DIFF4th(pars['input'], +diff4_gpu = Diff4th(pars['input'], pars['regularisation_parameter'], pars['edge_parameter'], pars['number_of_iterations'], diff --git a/Wrappers/Python/demos/demo_gpu_regularisers3D.py b/Wrappers/Python/demos/demo_gpu_regularisers3D.py index d5f9a39..b6058d2 100644 --- a/Wrappers/Python/demos/demo_gpu_regularisers3D.py +++ b/Wrappers/Python/demos/demo_gpu_regularisers3D.py @@ -12,7 +12,7 @@ import matplotlib.pyplot as plt import numpy as np import os import timeit -from ccpi.filters.regularisers import ROF_TV, FGP_TV, SB_TV, LLT_ROF, FGP_dTV, NDF, DIFF4th +from ccpi.filters.regularisers import ROF_TV, FGP_TV, SB_TV, TGV, LLT_ROF, FGP_dTV, NDF, Diff4th from qualitymetrics import rmse ############################################################################### def printParametersToString(pars): @@ -67,7 +67,7 @@ Im = Im2 del Im2 """ -#%% + slices = 20 filename = os.path.join(".." , ".." , ".." , "data" ,"lena_gray_512.tif") @@ -268,6 +268,53 @@ plt.title('{}'.format('Recovered volume on the GPU using LLT-ROF')) #%% print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") +print ("_______________TGV (3D)_________________") +print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") + +## plot +fig = plt.figure() +plt.suptitle('Performance of TGV regulariser using the GPU') +a=fig.add_subplot(1,2,1) +a.set_title('Noisy Image') +imgplot = plt.imshow(noisyVol[10,:,:],cmap="gray") + +# set parameters +pars = {'algorithm' : TGV, \ + 'input' : noisyVol,\ + 'regularisation_parameter':0.04, \ + 'alpha1':1.0,\ + 'alpha0':2.0,\ + 'number_of_iterations' :600 ,\ + 'LipshitzConstant' :12 ,\ + } + +print ("#############TGV GPU####################") +start_time = timeit.default_timer() +tgv_gpu3D = TGV(pars['input'], + pars['regularisation_parameter'], + pars['alpha1'], + pars['alpha0'], + pars['number_of_iterations'], + pars['LipshitzConstant'],'gpu') + + +rms = rmse(idealVol, tgv_gpu3D) +pars['rmse'] = rms + +txtstr = printParametersToString(pars) +txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time) +print (txtstr) +a=fig.add_subplot(1,2,2) + +# these are matplotlib.patch.Patch properties +props = dict(boxstyle='round', facecolor='wheat', alpha=0.75) +# place a text box in upper left in axes coords +a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14, + verticalalignment='top', bbox=props) +imgplot = plt.imshow(tgv_gpu3D[10,:,:], cmap="gray") +plt.title('{}'.format('Recovered volume on the GPU using TGV')) +#%% +print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") print ("_______________NDF-TV (3D)_________________") print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") @@ -326,7 +373,7 @@ a.set_title('Noisy Image') imgplot = plt.imshow(noisyVol[10,:,:],cmap="gray") # set parameters -pars = {'algorithm' : DIFF4th, \ +pars = {'algorithm' : Diff4th, \ 'input' : noisyVol,\ 'regularisation_parameter':3.5, \ 'edge_parameter':0.02,\ @@ -336,7 +383,7 @@ pars = {'algorithm' : DIFF4th, \ print ("#############DIFF4th CPU################") start_time = timeit.default_timer() -diff4_gpu3D = DIFF4th(pars['input'], +diff4_gpu3D = Diff4th(pars['input'], pars['regularisation_parameter'], pars['edge_parameter'], pars['number_of_iterations'], diff --git a/Wrappers/Python/src/cpu_regularisers.pyx b/Wrappers/Python/src/cpu_regularisers.pyx index 7d57ed1..11a0617 100644 --- a/Wrappers/Python/src/cpu_regularisers.pyx +++ b/Wrappers/Python/src/cpu_regularisers.pyx @@ -22,7 +22,7 @@ cdef extern float TV_ROF_CPU_main(float *Input, float *Output, float lambdaPar, cdef extern float TV_FGP_CPU_main(float *Input, float *Output, float lambdaPar, int iterationsNumb, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ); cdef extern float SB_TV_CPU_main(float *Input, float *Output, float lambdaPar, int iterationsNumb, float epsil, int methodTV, int printM, int dimX, int dimY, int dimZ); cdef extern float LLT_ROF_CPU_main(float *Input, float *Output, float lambdaROF, float lambdaLLT, int iterationsNumb, float tau, int dimX, int dimY, int dimZ); -cdef extern float TGV_main(float *Input, float *Output, float lambdaPar, float alpha1, float alpha0, int iterationsNumb, float L2, int dimX, int dimY); +cdef extern float TGV_main(float *Input, float *Output, float lambdaPar, float alpha1, float alpha0, int iterationsNumb, float L2, int dimX, int dimY, int dimZ); cdef extern float Diffusion_CPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int penaltytype, int dimX, int dimY, int dimZ); cdef extern float Diffus4th_CPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int dimX, int dimY, int dimZ); cdef extern float TNV_CPU_main(float *Input, float *u, float lambdaPar, int maxIter, float tol, int dimX, int dimY, int dimZ); @@ -202,12 +202,8 @@ def TGV_CPU(inputData, regularisation_parameter, alpha1, alpha0, iterations, Lip return TGV_2D(inputData, regularisation_parameter, alpha1, alpha0, iterations, LipshitzConst) elif inputData.ndim == 3: - shape = inputData.shape - out = inputData.copy() - for i in range(shape[0]): - out[i,:,:] = TGV_2D(inputData[i,:,:], regularisation_parameter, - alpha1, alpha0, iterations, LipshitzConst) - return out + return TGV_3D(inputData, regularisation_parameter, alpha1, alpha0, + iterations, LipshitzConst) def TGV_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, float regularisation_parameter, @@ -229,7 +225,30 @@ def TGV_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, alpha0, iterationsNumb, LipshitzConst, - dims[1],dims[0]) + dims[1],dims[0],1) + return outputData +def TGV_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, + float regularisation_parameter, + float alpha1, + float alpha0, + int iterationsNumb, + float LipshitzConst): + + cdef long dims[3] + dims[0] = inputData.shape[0] + dims[1] = inputData.shape[1] + dims[2] = inputData.shape[2] + + cdef np.ndarray[np.float32_t, ndim=3, mode="c"] outputData = \ + np.zeros([dims[0], dims[1], dims[2]], dtype='float32') + + #/* Run TGV iterations for 3D data */ + TGV_main(&inputData[0,0,0], &outputData[0,0,0], regularisation_parameter, + alpha1, + alpha0, + iterationsNumb, + LipshitzConst, + dims[2], dims[1], dims[0]) return outputData #***************************************************************# diff --git a/Wrappers/Python/src/gpu_regularisers.pyx b/Wrappers/Python/src/gpu_regularisers.pyx index 47a6149..b52f669 100644 --- a/Wrappers/Python/src/gpu_regularisers.pyx +++ b/Wrappers/Python/src/gpu_regularisers.pyx @@ -23,7 +23,7 @@ CUDAErrorMessage = 'CUDA error' cdef extern int TV_ROF_GPU_main(float* Input, float* Output, float lambdaPar, int iter, float tau, int N, int M, int Z); cdef extern int TV_FGP_GPU_main(float *Input, float *Output, float lambdaPar, int iter, float epsil, int methodTV, int nonneg, int printM, int N, int M, int Z); cdef extern int TV_SB_GPU_main(float *Input, float *Output, float lambdaPar, int iter, float epsil, int methodTV, int printM, int N, int M, int Z); -cdef extern int TGV_GPU_main(float *Input, float *Output, float lambdaPar, float alpha1, float alpha0, int iterationsNumb, float L2, int dimX, int dimY); +cdef extern int TGV_GPU_main(float *Input, float *Output, float lambdaPar, float alpha1, float alpha0, int iterationsNumb, float L2, int dimX, int dimY, int dimZ); cdef extern int LLT_ROF_GPU_main(float *Input, float *Output, float lambdaROF, float lambdaLLT, int iterationsNumb, float tau, int N, int M, int Z); cdef extern int NonlDiff_GPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int penaltytype, int N, int M, int Z); cdef extern int dTV_FGP_GPU_main(float *Input, float *InputRef, float *Output, float lambdaPar, int iterationsNumb, float epsil, float eta, int methodTV, int nonneg, int printM, int N, int M, int Z); @@ -102,12 +102,7 @@ def TGV_GPU(inputData, regularisation_parameter, alpha1, alpha0, iterations, Lip if inputData.ndim == 2: return TGV2D(inputData, regularisation_parameter, alpha1, alpha0, iterations, LipshitzConst) elif inputData.ndim == 3: - shape = inputData.shape - out = inputData.copy() - for i in range(shape[0]): - out[i,:,:] = TGV2D(inputData[i,:,:], regularisation_parameter, - alpha1, alpha0, iterations, LipshitzConst) - return out + return TGV3D(inputData, regularisation_parameter, alpha1, alpha0, iterations, LipshitzConst) # Directional Total-variation Fast-Gradient-Projection (FGP) def dTV_FGP_GPU(inputData, refdata, @@ -393,7 +388,6 @@ def LLT_ROF_GPU3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, raise ValueError(CUDAErrorMessage); - #***************************************************************# #***************** Total Generalised Variation *****************# #***************************************************************# @@ -417,11 +411,38 @@ def TGV2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, alpha0, iterationsNumb, LipshitzConst, - dims[1],dims[0])==0): + dims[1],dims[0], 1)==0): return outputData else: raise ValueError(CUDAErrorMessage); +def TGV3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, + float regularisation_parameter, + float alpha1, + float alpha0, + int iterationsNumb, + float LipshitzConst): + + cdef long dims[3] + dims[0] = inputData.shape[0] + dims[1] = inputData.shape[1] + dims[2] = inputData.shape[2] + + cdef np.ndarray[np.float32_t, ndim=3, mode="c"] outputData = \ + np.zeros([dims[0],dims[1],dims[2]], dtype='float32') + + # Running CUDA code here + if (TGV_GPU_main( + &inputData[0,0,0], &outputData[0,0,0], regularisation_parameter, + alpha1, + alpha0, + iterationsNumb, + LipshitzConst, + dims[2], dims[1], dims[0])==0): + return outputData; + else: + raise ValueError(CUDAErrorMessage); + #****************************************************************# #**************Directional Total-variation FGP ******************# |