summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDaniil Kazantsev <dkazanc3@googlemail.com>2019-02-19 22:14:48 +0000
committerGitHub <noreply@github.com>2019-02-19 22:14:48 +0000
commit8e71dd67abef0caddb24caa365321d3874529254 (patch)
tree108c7e25e3d741ca04ef45aa29eb61a9732075f4
parent8f2e86726669b9dadb3c788e0ea681d397a2eeb7 (diff)
parent53d5508915709245d0573e0335de83fc24313b5a (diff)
downloadregularization-8e71dd67abef0caddb24caa365321d3874529254.tar.gz
regularization-8e71dd67abef0caddb24caa365321d3874529254.tar.bz2
regularization-8e71dd67abef0caddb24caa365321d3874529254.tar.xz
regularization-8e71dd67abef0caddb24caa365321d3874529254.zip
Merge pull request #99 from vais-ral/TGV_bugfix
TGV bugfix
-rw-r--r--Core/regularisers_CPU/TGV_core.c415
-rw-r--r--Core/regularisers_GPU/TGV_GPU_core.cu409
-rw-r--r--Readme.md2
-rw-r--r--Wrappers/Matlab/demos/demoMatlab_3Ddenoise.m16
-rw-r--r--Wrappers/Matlab/demos/demoMatlab_denoise.m16
-rw-r--r--Wrappers/Matlab/mex_compile/regularisers_GPU/TGV_GPU.cpp32
-rw-r--r--run.sh19
7 files changed, 536 insertions, 373 deletions
diff --git a/Core/regularisers_CPU/TGV_core.c b/Core/regularisers_CPU/TGV_core.c
index 805c3d4..136e0bd 100644
--- a/Core/regularisers_CPU/TGV_core.c
+++ b/Core/regularisers_CPU/TGV_core.c
@@ -1,25 +1,25 @@
/*
-This work is part of the Core Imaging Library developed by
-Visual Analytics and Imaging System Group of the Science Technology
-Facilities Council, STFC
-
-Copyright 2017 Daniil Kazantsev
-Copyright 2017 Srikanth Nagella, Edoardo Pasca
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
+ * This work is part of the Core Imaging Library developed by
+ * Visual Analytics and Imaging System Group of the Science Technology
+ * Facilities Council, STFC
+ *
+ * Copyright 2019 Daniil Kazantsev
+ * Copyright 2019 Srikanth Nagella, Edoardo Pasca
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
#include "TGV_core.h"
-/* C-OMP implementation of Primal-Dual denoising method for
+/* C-OMP implementation of Primal-Dual denoising method for
* Total Generilized Variation (TGV)-L2 model [1] (2D/3D case)
*
* Input Parameters:
@@ -29,44 +29,44 @@ limitations under the License.
* 4. parameter to control the second-order term (alpha0)
* 5. Number of Chambolle-Pock (Primal-Dual) iterations
* 6. Lipshitz constant (default is 12)
- *
+ *
* Output:
* Filtered/regularised image/volume
*
* References:
* [1] K. Bredies "Total Generalized Variation"
- *
+ *
*/
-
+
float TGV_main(float *U0, float *U, float lambda, float alpha1, float alpha0, int iter, float L2, int dimX, int dimY, int dimZ)
{
- long DimTotal;
- int ll;
- float *U_old, *P1, *P2, *Q1, *Q2, *Q3, *V1, *V1_old, *V2, *V2_old, tau, sigma;
-
- DimTotal = (long)(dimX*dimY*dimZ);
- copyIm(U0, U, (long)(dimX), (long)(dimY), (long)(dimZ)); /* initialize */
- tau = pow(L2,-0.5);
- sigma = pow(L2,-0.5);
-
- /* dual variables */
- P1 = calloc(DimTotal, sizeof(float));
- P2 = calloc(DimTotal, sizeof(float));
-
- Q1 = calloc(DimTotal, sizeof(float));
- Q2 = calloc(DimTotal, sizeof(float));
- Q3 = calloc(DimTotal, sizeof(float));
-
- U_old = calloc(DimTotal, sizeof(float));
+ long DimTotal;
+ int ll;
+ float *U_old, *P1, *P2, *Q1, *Q2, *Q3, *V1, *V1_old, *V2, *V2_old, tau, sigma;
+
+ DimTotal = (long)(dimX*dimY*dimZ);
+ copyIm(U0, U, (long)(dimX), (long)(dimY), (long)(dimZ)); /* initialize */
+ tau = pow(L2,-0.5);
+ sigma = pow(L2,-0.5);
+
+ /* dual variables */
+ P1 = calloc(DimTotal, sizeof(float));
+ P2 = calloc(DimTotal, sizeof(float));
+
+ Q1 = calloc(DimTotal, sizeof(float));
+ Q2 = calloc(DimTotal, sizeof(float));
+ Q3 = calloc(DimTotal, sizeof(float));
+
+ U_old = calloc(DimTotal, sizeof(float));
+
+ V1 = calloc(DimTotal, sizeof(float));
+ V1_old = calloc(DimTotal, sizeof(float));
+ V2 = calloc(DimTotal, sizeof(float));
+ V2_old = calloc(DimTotal, sizeof(float));
+
+ if (dimZ == 1) {
+ /*2D case*/
- V1 = calloc(DimTotal, sizeof(float));
- V1_old = calloc(DimTotal, sizeof(float));
- V2 = calloc(DimTotal, sizeof(float));
- V2_old = calloc(DimTotal, sizeof(float));
-
- if (dimZ == 1) {
- /*2D case*/
-
/* Primal-dual iterations begin here */
for(ll = 0; ll < iter; ll++) {
@@ -102,8 +102,8 @@ float TGV_main(float *U0, float *U, float lambda, float alpha1, float alpha0, in
newU(V1, V1_old, (long)(dimX), (long)(dimY));
newU(V2, V2_old, (long)(dimX), (long)(dimY));
} /*end of iterations*/
- }
- else {
+ }
+ else {
/*3D case*/
float *P3, *Q4, *Q5, *Q6, *V3, *V3_old;
@@ -114,7 +114,7 @@ float TGV_main(float *U0, float *U, float lambda, float alpha1, float alpha0, in
V3 = calloc(DimTotal, sizeof(float));
V3_old = calloc(DimTotal, sizeof(float));
- /* Primal-dual iterations begin here */
+ /* Primal-dual iterations begin here */
for(ll = 0; ll < iter; ll++) {
/* Calculate Dual Variable P */
@@ -145,21 +145,20 @@ float TGV_main(float *U0, float *U, float lambda, float alpha1, float alpha0, in
UpdV_3D(V1, V2, V3, P1, P2, P3, Q1, Q2, Q3, Q4, Q5, Q6, (long)(dimX), (long)(dimY), (long)(dimZ), tau);
/*get new V*/
- newU3D_3Ar(V1, V2, V3, V1_old, V2_old, V3_old, (long)(dimX), (long)(dimY), (long)(dimZ));
- } /*end of iterations*/
+ newU3D_3Ar(V1, V2, V3, V1_old, V2_old, V3_old, (long)(dimX), (long)(dimY), (long)(dimZ));
+ } /*end of iterations*/
free(P3);free(Q4);free(Q5);free(Q6);free(V3);free(V3_old);
- }
-
+ }
+
/*freeing*/
free(P1);free(P2);free(Q1);free(Q2);free(Q3);free(U_old);
free(V1);free(V2);free(V1_old);free(V2_old);
- return *U;
+ return *U;
}
/********************************************************************/
/***************************2D Functions*****************************/
/********************************************************************/
-
/*Calculating dual variable P (using forward differences)*/
float DualP_2D(float *U, float *V1, float *V2, float *P1, float *P2, long dimX, long dimY, float sigma)
{
@@ -167,12 +166,13 @@ float DualP_2D(float *U, float *V1, float *V2, float *P1, float *P2, long dimX,
#pragma omp parallel for shared(U,V1,V2,P1,P2) private(i,j,index)
for(i=0; i<dimX; i++) {
for(j=0; j<dimY; j++) {
- index = j*dimX+i;
+ index = j*dimX+i;
/* symmetric boundary conditions (Neuman) */
- if (i == dimX-1) P1[index] += sigma*((U[j*dimX+(i-1)] - U[index]) - V1[index]);
- else P1[index] += sigma*((U[j*dimX+(i+1)] - U[index]) - V1[index]);
- if (j == dimY-1) P2[index] += sigma*((U[(j-1)*dimX+i] - U[index]) - V2[index]);
+ if (i == dimX-1) P1[index] += sigma*(-V1[index]);
+ else P1[index] += sigma*((U[j*dimX+(i+1)] - U[index]) - V1[index]);
+ if (j == dimY-1) P2[index] += sigma*(-V2[index]);
else P2[index] += sigma*((U[(j+1)*dimX+i] - U[index]) - V2[index]);
+
}}
return 1;
}
@@ -184,7 +184,7 @@ float ProjP_2D(float *P1, float *P2, long dimX, long dimY, float alpha1)
#pragma omp parallel for shared(P1,P2) private(i,j,index,grad_magn)
for(i=0; i<dimX; i++) {
for(j=0; j<dimY; j++) {
- index = j*dimX+i;
+ index = j*dimX+i;
grad_magn = (sqrtf(pow(P1[index],2) + pow(P2[index],2)))/alpha1;
if (grad_magn > 1.0f) {
P1[index] /= grad_magn;
@@ -201,8 +201,8 @@ float DualQ_2D(float *V1, float *V2, float *Q1, float *Q2, float *Q3, long dimX,
#pragma omp parallel for shared(Q1,Q2,Q3,V1,V2) private(i,j,index,q1,q2,q11,q22)
for(i=0; i<dimX; i++) {
for(j=0; j<dimY; j++) {
- index = j*dimX+i;
- q1 = 0.0f; q11 = 0.0f; q2 = 0.0f; q22 = 0.0f;
+ index = j*dimX+i;
+ q1 = 0.0f; q11 = 0.0f; q2 = 0.0f; q22 = 0.0f;
/* boundary conditions (Neuman) */
if (i != dimX-1){
q1 = V1[j*dimX+(i+1)] - V1[index];
@@ -225,7 +225,7 @@ float ProjQ_2D(float *Q1, float *Q2, float *Q3, long dimX, long dimY, float alph
#pragma omp parallel for shared(Q1,Q2,Q3) private(i,j,index,grad_magn)
for(i=0; i<dimX; i++) {
for(j=0; j<dimY; j++) {
- index = j*dimX+i;
+ index = j*dimX+i;
grad_magn = sqrtf(pow(Q1[index],2) + pow(Q2[index],2) + 2*pow(Q3[index],2));
grad_magn = grad_magn/alpha0;
if (grad_magn > 1.0f) {
@@ -236,7 +236,7 @@ float ProjQ_2D(float *Q1, float *Q2, float *Q3, long dimX, long dimY, float alph
}}
return 1;
}
-/* Divergence and projection for P*/
+/* Divergence and projection for P (backward differences)*/
float DivProjP_2D(float *U, float *U0, float *P1, float *P2, long dimX, long dimY, float lambda, float tau)
{
long i,j,index;
@@ -244,11 +244,16 @@ float DivProjP_2D(float *U, float *U0, float *P1, float *P2, long dimX, long dim
#pragma omp parallel for shared(U,U0,P1,P2) private(i,j,index,P_v1,P_v2,div)
for(i=0; i<dimX; i++) {
for(j=0; j<dimY; j++) {
- index = j*dimX+i;
+ index = j*dimX+i;
+
if (i == 0) P_v1 = P1[index];
+ else if (i == dimX-1) P_v1 = -P1[j*dimX+(i-1)];
else P_v1 = P1[index] - P1[j*dimX+(i-1)];
+
if (j == 0) P_v2 = P2[index];
- else P_v2 = P2[index] - P2[(j-1)*dimX+i];
+ else if (j == dimY-1) P_v2 = -P2[(j-1)*dimX+i];
+ else P_v2 = P2[index] - P2[(j-1)*dimX+i];
+
div = P_v1 + P_v2;
U[index] = (lambda*(U[index] + tau*div) + tau*U0[index])/(lambda + tau);
}}
@@ -262,7 +267,7 @@ float newU(float *U, float *U_old, long dimX, long dimY)
for(i=0; i<dimX*dimY; i++) U[i] = 2*U[i] - U_old[i];
return *U;
}
-/*get update for V*/
+/*get update for V (backward differences)*/
float UpdV_2D(float *V1, float *V2, float *P1, float *P2, float *Q1, float *Q2, float *Q3, long dimX, long dimY, float tau)
{
long i, j, index;
@@ -270,17 +275,30 @@ float UpdV_2D(float *V1, float *V2, float *P1, float *P2, float *Q1, float *Q2,
#pragma omp parallel for shared(V1,V2,P1,P2,Q1,Q2,Q3) private(i, j, index, q1, q3_x, q3_y, q2, div1, div2)
for(i=0; i<dimX; i++) {
for(j=0; j<dimY; j++) {
- index = j*dimX+i;
- q2 = 0.0f; q3_y = 0.0f; q1 = 0.0f; q3_x = 0.0;
+ index = j*dimX+i;
+
/* boundary conditions (Neuman) */
- if (i != 0) {
+ if (i == 0) {
+ q1 = Q1[index];
+ q3_x = Q3[index]; }
+ else if (i == dimX-1) {
+ q1 = -Q1[j*dimX+(i-1)];
+ q3_x = -Q3[j*dimX+(i-1)]; }
+ else {
q1 = Q1[index] - Q1[j*dimX+(i-1)];
- q3_x = Q3[index] - Q3[j*dimX+(i-1)];
- }
- if (j != 0) {
+ q3_x = Q3[index] - Q3[j*dimX+(i-1)]; }
+
+ if (j == 0) {
+ q2 = Q2[index];
+ q3_y = Q3[index]; }
+ else if (j == dimY-1) {
+ q2 = -Q2[(j-1)*dimX+i];
+ q3_y = -Q3[(j-1)*dimX+i]; }
+ else {
q2 = Q2[index] - Q2[(j-1)*dimX+i];
- q3_y = Q3[index] - Q3[(j-1)*dimX+i];
- }
+ q3_y = Q3[index] - Q3[(j-1)*dimX+i]; }
+
+
div1 = q1 + q3_y;
div2 = q3_x + q2;
V1[index] += tau*(P1[index] + div1);
@@ -299,16 +317,16 @@ float DualP_3D(float *U, float *V1, float *V2, float *V3, float *P1, float *P2,
#pragma omp parallel for shared(U,V1,V2,V3,P1,P2,P3) private(i,j,k,index)
for(i=0; i<dimX; i++) {
for(j=0; j<dimY; j++) {
- for(k=0; k<dimZ; k++) {
- index = (dimX*dimY)*k + j*dimX+i;
- /* symmetric boundary conditions (Neuman) */
- if (i == dimX-1) P1[index] += sigma*((U[(dimX*dimY)*k + j*dimX+(i-1)] - U[index]) - V1[index]);
- else P1[index] += sigma*((U[(dimX*dimY)*k + j*dimX+(i+1)] - U[index]) - V1[index]);
- if (j == dimY-1) P2[index] += sigma*((U[(dimX*dimY)*k + (j-1)*dimX+i] - U[index]) - V2[index]);
- else P2[index] += sigma*((U[(dimX*dimY)*k + (j+1)*dimX+i] - U[index]) - V2[index]);
- if (k == dimZ-1) P3[index] += sigma*((U[(dimX*dimY)*(k-1) + j*dimX+i] - U[index]) - V3[index]);
- else P3[index] += sigma*((U[(dimX*dimY)*(k+1) + j*dimX+i] - U[index]) - V3[index]);
- }}}
+ for(k=0; k<dimZ; k++) {
+ index = (dimX*dimY)*k + j*dimX+i;
+ /* symmetric boundary conditions (Neuman) */
+ if (i == dimX-1) P1[index] += sigma*(-V1[index]);
+ else P1[index] += sigma*((U[(dimX*dimY)*k + j*dimX+(i+1)] - U[index]) - V1[index]);
+ if (j == dimY-1) P2[index] += sigma*(-V2[index]);
+ else P2[index] += sigma*((U[(dimX*dimY)*k + (j+1)*dimX+i] - U[index]) - V2[index]);
+ if (k == dimZ-1) P3[index] += sigma*(-V3[index]);
+ else P3[index] += sigma*((U[(dimX*dimY)*(k+1) + j*dimX+i] - U[index]) - V3[index]);
+ }}}
return 1;
}
/*Projection onto convex set for P*/
@@ -319,15 +337,15 @@ float ProjP_3D(float *P1, float *P2, float *P3, long dimX, long dimY, long dimZ,
#pragma omp parallel for shared(P1,P2,P3) private(i,j,k,index,grad_magn)
for(i=0; i<dimX; i++) {
for(j=0; j<dimY; j++) {
- for(k=0; k<dimZ; k++) {
- index = (dimX*dimY)*k + j*dimX+i;
- grad_magn = (sqrtf(pow(P1[index],2) + pow(P2[index],2) + pow(P3[index],2)))/alpha1;
- if (grad_magn > 1.0f) {
- P1[index] /= grad_magn;
- P2[index] /= grad_magn;
- P3[index] /= grad_magn;
- }
- }}}
+ for(k=0; k<dimZ; k++) {
+ index = (dimX*dimY)*k + j*dimX+i;
+ grad_magn = (sqrtf(pow(P1[index],2) + pow(P2[index],2) + pow(P3[index],2)))/alpha1;
+ if (grad_magn > 1.0f) {
+ P1[index] /= grad_magn;
+ P2[index] /= grad_magn;
+ P3[index] /= grad_magn;
+ }
+ }}}
return 1;
}
/*Calculating dual variable Q (using forward differences)*/
@@ -338,33 +356,33 @@ float DualQ_3D(float *V1, float *V2, float *V3, float *Q1, float *Q2, float *Q3,
#pragma omp parallel for shared(Q1,Q2,Q3,Q4,Q5,Q6,V1,V2,V3) private(i,j,k,index,q1,q2,q3,q11,q22,q33,q44,q55,q66)
for(i=0; i<dimX; i++) {
for(j=0; j<dimY; j++) {
- for(k=0; k<dimZ; k++) {
- index = (dimX*dimY)*k + j*dimX+i;
- q1 = 0.0f; q11 = 0.0f; q33 = 0.0f; q2 = 0.0f; q22 = 0.0f; q55 = 0.0f; q3 = 0.0f; q44 = 0.0f; q66 = 0.0f;
- /* symmetric boundary conditions (Neuman) */
- if (i != dimX-1){
- q1 = V1[(dimX*dimY)*k + j*dimX+(i+1)] - V1[index];
- q11 = V2[(dimX*dimY)*k + j*dimX+(i+1)] - V2[index];
- q33 = V3[(dimX*dimY)*k + j*dimX+(i+1)] - V3[index];
- }
- if (j != dimY-1) {
- q2 = V2[(dimX*dimY)*k + (j+1)*dimX+i] - V2[index];
- q22 = V1[(dimX*dimY)*k + (j+1)*dimX+i] - V1[index];
- q55 = V3[(dimX*dimY)*k + (j+1)*dimX+i] - V3[index];
- }
- if (k != dimZ-1) {
- q3 = V3[(dimX*dimY)*(k+1) + j*dimX+i] - V3[index];
- q44 = V1[(dimX*dimY)*(k+1) + j*dimX+i] - V1[index];
- q66 = V2[(dimX*dimY)*(k+1) + j*dimX+i] - V2[index];
- }
-
- Q1[index] += sigma*(q1); /*Q11*/
- Q2[index] += sigma*(q2); /*Q22*/
- Q3[index] += sigma*(q3); /*Q33*/
- Q4[index] += sigma*(0.5f*(q11 + q22)); /* Q21 / Q12 */
- Q5[index] += sigma*(0.5f*(q33 + q44)); /* Q31 / Q13 */
- Q6[index] += sigma*(0.5f*(q55 + q66)); /* Q32 / Q23 */
- }}}
+ for(k=0; k<dimZ; k++) {
+ index = (dimX*dimY)*k + j*dimX+i;
+ q1 = 0.0f; q11 = 0.0f; q33 = 0.0f; q2 = 0.0f; q22 = 0.0f; q55 = 0.0f; q3 = 0.0f; q44 = 0.0f; q66 = 0.0f;
+ /* symmetric boundary conditions (Neuman) */
+ if (i != dimX-1){
+ q1 = V1[(dimX*dimY)*k + j*dimX+(i+1)] - V1[index];
+ q11 = V2[(dimX*dimY)*k + j*dimX+(i+1)] - V2[index];
+ q33 = V3[(dimX*dimY)*k + j*dimX+(i+1)] - V3[index];
+ }
+ if (j != dimY-1) {
+ q2 = V2[(dimX*dimY)*k + (j+1)*dimX+i] - V2[index];
+ q22 = V1[(dimX*dimY)*k + (j+1)*dimX+i] - V1[index];
+ q55 = V3[(dimX*dimY)*k + (j+1)*dimX+i] - V3[index];
+ }
+ if (k != dimZ-1) {
+ q3 = V3[(dimX*dimY)*(k+1) + j*dimX+i] - V3[index];
+ q44 = V1[(dimX*dimY)*(k+1) + j*dimX+i] - V1[index];
+ q66 = V2[(dimX*dimY)*(k+1) + j*dimX+i] - V2[index];
+ }
+
+ Q1[index] += sigma*(q1); /*Q11*/
+ Q2[index] += sigma*(q2); /*Q22*/
+ Q3[index] += sigma*(q3); /*Q33*/
+ Q4[index] += sigma*(0.5f*(q11 + q22)); /* Q21 / Q12 */
+ Q5[index] += sigma*(0.5f*(q33 + q44)); /* Q31 / Q13 */
+ Q6[index] += sigma*(0.5f*(q55 + q66)); /* Q32 / Q23 */
+ }}}
return 1;
}
float ProjQ_3D(float *Q1, float *Q2, float *Q3, float *Q4, float *Q5, float *Q6, long dimX, long dimY, long dimZ, float alpha0)
@@ -374,19 +392,19 @@ float ProjQ_3D(float *Q1, float *Q2, float *Q3, float *Q4, float *Q5, float *Q6,
#pragma omp parallel for shared(Q1,Q2,Q3,Q4,Q5,Q6) private(i,j,k,index,grad_magn)
for(i=0; i<dimX; i++) {
for(j=0; j<dimY; j++) {
- for(k=0; k<dimZ; k++) {
- index = (dimX*dimY)*k + j*dimX+i;
- grad_magn = sqrtf(pow(Q1[index],2) + pow(Q2[index],2) + pow(Q3[index],2) + 2.0f*pow(Q4[index],2) + 2.0f*pow(Q5[index],2) + 2.0f*pow(Q6[index],2));
- grad_magn = grad_magn/alpha0;
- if (grad_magn > 1.0f) {
- Q1[index] /= grad_magn;
- Q2[index] /= grad_magn;
- Q3[index] /= grad_magn;
- Q4[index] /= grad_magn;
- Q5[index] /= grad_magn;
- Q6[index] /= grad_magn;
- }
- }}}
+ for(k=0; k<dimZ; k++) {
+ index = (dimX*dimY)*k + j*dimX+i;
+ grad_magn = sqrtf(pow(Q1[index],2) + pow(Q2[index],2) + pow(Q3[index],2) + 2.0f*pow(Q4[index],2) + 2.0f*pow(Q5[index],2) + 2.0f*pow(Q6[index],2));
+ grad_magn = grad_magn/alpha0;
+ if (grad_magn > 1.0f) {
+ Q1[index] /= grad_magn;
+ Q2[index] /= grad_magn;
+ Q3[index] /= grad_magn;
+ Q4[index] /= grad_magn;
+ Q5[index] /= grad_magn;
+ Q6[index] /= grad_magn;
+ }
+ }}}
return 1;
}
/* Divergence and projection for P*/
@@ -397,18 +415,22 @@ float DivProjP_3D(float *U, float *U0, float *P1, float *P2, float *P3, long dim
#pragma omp parallel for shared(U,U0,P1,P2,P3) private(i,j,k,index,P_v1,P_v2,P_v3,div)
for(i=0; i<dimX; i++) {
for(j=0; j<dimY; j++) {
- for(k=0; k<dimZ; k++) {
- index = (dimX*dimY)*k + j*dimX+i;
- if (i == 0) P_v1 = P1[index];
- else P_v1 = P1[index] - P1[(dimX*dimY)*k + j*dimX+(i-1)];
- if (j == 0) P_v2 = P2[index];
- else P_v2 = P2[index] - P2[(dimX*dimY)*k + (j-1)*dimX+i];
- if (k == 0) P_v3 = P3[index];
- else P_v3 = P3[index] - P3[(dimX*dimY)*(k-1) + (j)*dimX+i];
-
- div = P_v1 + P_v2 + P_v3;
- U[index] = (lambda*(U[index] + tau*div) + tau*U0[index])/(lambda + tau);
- }}}
+ for(k=0; k<dimZ; k++) {
+ index = (dimX*dimY)*k + j*dimX+i;
+
+ if (i == 0) P_v1 = P1[index];
+ else if (i == dimX-1) P_v1 = -P1[(dimX*dimY)*k + j*dimX+(i-1)];
+ else P_v1 = P1[index] - P1[(dimX*dimY)*k + j*dimX+(i-1)];
+ if (j == 0) P_v2 = P2[index];
+ else if (j == dimY-1) P_v2 = -P2[(dimX*dimY)*k + (j-1)*dimX+i];
+ else P_v2 = P2[index] - P2[(dimX*dimY)*k + (j-1)*dimX+i];
+ if (k == 0) P_v3 = P3[index];
+ else if (k == dimZ-1) P_v3 = -P3[(dimX*dimY)*(k-1) + (j)*dimX+i];
+ else P_v3 = P3[index] - P3[(dimX*dimY)*(k-1) + (j)*dimX+i];
+
+ div = P_v1 + P_v2 + P_v3;
+ U[index] = (lambda*(U[index] + tau*div) + tau*U0[index])/(lambda + tau);
+ }}}
return *U;
}
/*get update for V*/
@@ -419,47 +441,70 @@ float UpdV_3D(float *V1, float *V2, float *V3, float *P1, float *P2, float *P3,
#pragma omp parallel for shared(V1,V2,V3,P1,P2,P3,Q1,Q2,Q3,Q4,Q5,Q6) private(i,j,k,index,q1,q4x,q5x,q2,q4y,q6y,q6z,q5z,q3,div1,div2,div3)
for(i=0; i<dimX; i++) {
for(j=0; j<dimY; j++) {
- for(k=0; k<dimZ; k++) {
- index = (dimX*dimY)*k + j*dimX+i;
- q1 = 0.0f; q4x= 0.0f; q5x= 0.0f; q2= 0.0f; q4y= 0.0f; q6y= 0.0f; q6z= 0.0f; q5z= 0.0f; q3= 0.0f;
- /* Q1 - Q11, Q2 - Q22, Q3 - Q33, Q4 - Q21/Q12, Q5 - Q31/Q13, Q6 - Q32/Q23*/
- /* symmetric boundary conditions (Neuman) */
- if (i != 0) {
- q1 = Q1[index] - Q1[(dimX*dimY)*k + j*dimX+(i-1)];
- q4x = Q4[index] - Q4[(dimX*dimY)*k + j*dimX+(i-1)];
- q5x = Q5[index] - Q5[(dimX*dimY)*k + j*dimX+(i-1)];
- }
- if (j != 0) {
- q2 = Q2[index] - Q2[(dimX*dimY)*k + (j-1)*dimX+i];
- q4y = Q4[index] - Q4[(dimX*dimY)*k + (j-1)*dimX+i];
- q6y = Q6[index] - Q6[(dimX*dimY)*k + (j-1)*dimX+i];
- }
- if (k != 0) {
- q6z = Q6[index] - Q6[(dimX*dimY)*(k-1) + (j)*dimX+i];
- q5z = Q5[index] - Q5[(dimX*dimY)*(k-1) + (j)*dimX+i];
- q3 = Q3[index] - Q3[(dimX*dimY)*(k-1) + (j)*dimX+i];
- }
- div1 = q1 + q4y + q5z;
- div2 = q4x + q2 + q6z;
- div3 = q5x + q6y + q3;
-
- V1[index] += tau*(P1[index] + div1);
- V2[index] += tau*(P2[index] + div2);
- V3[index] += tau*(P3[index] + div3);
- }}}
+ for(k=0; k<dimZ; k++) {
+ index = (dimX*dimY)*k + j*dimX+i;
+ q1 = 0.0f; q4x= 0.0f; q5x= 0.0f; q2= 0.0f; q4y= 0.0f; q6y= 0.0f; q6z= 0.0f; q5z= 0.0f; q3= 0.0f;
+ /* Q1 - Q11, Q2 - Q22, Q3 - Q33, Q4 - Q21/Q12, Q5 - Q31/Q13, Q6 - Q32/Q23*/
+ /* symmetric boundary conditions (Neuman) */
+
+ if (i == 0) {
+ q1 = Q1[index];
+ q4x = Q4[index];
+ q5x = Q5[index]; }
+ else if (i == dimX-1) {
+ q1 = -Q1[(dimX*dimY)*k + j*dimX+(i-1)];
+ q4x = -Q4[(dimX*dimY)*k + j*dimX+(i-1)];
+ q5x = -Q5[(dimX*dimY)*k + j*dimX+(i-1)]; }
+ else {
+ q1 = Q1[index] - Q1[(dimX*dimY)*k + j*dimX+(i-1)];
+ q4x = Q4[index] - Q4[(dimX*dimY)*k + j*dimX+(i-1)];
+ q5x = Q5[index] - Q5[(dimX*dimY)*k + j*dimX+(i-1)]; }
+ if (j == 0) {
+ q2 = Q2[index];
+ q4y = Q4[index];
+ q6y = Q6[index]; }
+ else if (j == dimY-1) {
+ q2 = -Q2[(dimX*dimY)*k + (j-1)*dimX+i];
+ q4y = -Q4[(dimX*dimY)*k + (j-1)*dimX+i];
+ q6y = -Q6[(dimX*dimY)*k + (j-1)*dimX+i]; }
+ else {
+ q2 = Q2[index] - Q2[(dimX*dimY)*k + (j-1)*dimX+i];
+ q4y = Q4[index] - Q4[(dimX*dimY)*k + (j-1)*dimX+i];
+ q6y = Q6[index] - Q6[(dimX*dimY)*k + (j-1)*dimX+i]; }
+ if (k == 0) {
+ q6z = Q6[index];
+ q5z = Q5[index];
+ q3 = Q3[index]; }
+ else if (k == dimZ-1) {
+ q6z = -Q6[(dimX*dimY)*(k-1) + (j)*dimX+i];
+ q5z = -Q5[(dimX*dimY)*(k-1) + (j)*dimX+i];
+ q3 = -Q3[(dimX*dimY)*(k-1) + (j)*dimX+i]; }
+ else {
+ q6z = Q6[index] - Q6[(dimX*dimY)*(k-1) + (j)*dimX+i];
+ q5z = Q5[index] - Q5[(dimX*dimY)*(k-1) + (j)*dimX+i];
+ q3 = Q3[index] - Q3[(dimX*dimY)*(k-1) + (j)*dimX+i]; }
+
+ div1 = q1 + q4y + q5z;
+ div2 = q4x + q2 + q6z;
+ div3 = q5x + q6y + q3;
+
+ V1[index] += tau*(P1[index] + div1);
+ V2[index] += tau*(P2[index] + div2);
+ V3[index] += tau*(P3[index] + div3);
+ }}}
return 1;
}
float copyIm_3Ar(float *V1, float *V2, float *V3, float *V1_old, float *V2_old, float *V3_old, long dimX, long dimY, long dimZ)
{
- long j;
+ long j;
#pragma omp parallel for shared(V1, V2, V3, V1_old, V2_old, V3_old) private(j)
- for (j = 0; j<dimX*dimY*dimZ; j++) {
- V1_old[j] = V1[j];
- V2_old[j] = V2[j];
- V3_old[j] = V3[j];
- }
- return 1;
+ for (j = 0; j<dimX*dimY*dimZ; j++) {
+ V1_old[j] = V1[j];
+ V2_old[j] = V2[j];
+ V3_old[j] = V3[j];
+ }
+ return 1;
}
/*get updated solution U*/
@@ -478,9 +523,9 @@ float newU3D_3Ar(float *V1, float *V2, float *V3, float *V1_old, float *V2_old,
long i;
#pragma omp parallel for shared(V1, V2, V3, V1_old, V2_old, V3_old) private(i)
for(i=0; i<dimX*dimY*dimZ; i++) {
- V1[i] = 2.0f*V1[i] - V1_old[i];
- V2[i] = 2.0f*V2[i] - V2_old[i];
- V3[i] = 2.0f*V3[i] - V3_old[i];
+ V1[i] = 2.0f*V1[i] - V1_old[i];
+ V2[i] = 2.0f*V2[i] - V2_old[i];
+ V3[i] = 2.0f*V3[i] - V3_old[i];
}
return 1;
}
diff --git a/Core/regularisers_GPU/TGV_GPU_core.cu b/Core/regularisers_GPU/TGV_GPU_core.cu
index 58b2c41..e4abf72 100644
--- a/Core/regularisers_GPU/TGV_GPU_core.cu
+++ b/Core/regularisers_GPU/TGV_GPU_core.cu
@@ -3,8 +3,8 @@ This work is part of the Core Imaging Library developed by
Visual Analytics and Imaging System Group of the Science Technology
Facilities Council, STFC
-Copyright 2017 Daniil Kazantsev
-Copyright 2017 Srikanth Nagella, Edoardo Pasca
+Copyright 2019 Daniil Kazantsev
+Copyright 2019 Srikanth Nagella, Edoardo Pasca
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@@ -32,7 +32,7 @@ limitations under the License.
* 6. Lipshitz constant (default is 12)
*
* Output:
- * Filtered/regulariaed image
+ * Filtered/regularised image
*
* References:
* [1] K. Bredies "Total Generalized Variation"
@@ -42,8 +42,8 @@ limitations under the License.
#define BLKYSIZE 8
#define BLKZSIZE 8
-#define BLKXSIZE2D 16
-#define BLKYSIZE2D 16
+#define BLKXSIZE2D 8
+#define BLKYSIZE2D 8
#define EPS 1.0e-7
#define idivup(a, b) ( ((a)%(b) != 0) ? (a)/(b)+1 : (a)/(b) )
@@ -53,80 +53,84 @@ limitations under the License.
/********************************************************************/
__global__ void DualP_2D_kernel(float *U, float *V1, float *V2, float *P1, float *P2, int dimX, int dimY, float sigma)
{
- int i = blockDim.x * blockIdx.x + threadIdx.x;
- int j = blockDim.y * blockIdx.y + threadIdx.y;
+ int num_total = dimX*dimY;
+ const int i = blockDim.x * blockIdx.x + threadIdx.x;
+ const int j = blockDim.y * blockIdx.y + threadIdx.y;
- int index = i + dimX*j;
+ int index = i + dimX*j;
- if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY)) {
- /* symmetric boundary conditions (Neuman) */
- if (i == dimX-1) P1[index] += sigma*((U[j*dimX+(i-1)] - U[index]) - V1[index]);
- else P1[index] += sigma*((U[j*dimX+(i+1)] - U[index]) - V1[index]);
- if (j == dimY-1) P2[index] += sigma*((U[(j-1)*dimX+i] - U[index]) - V2[index]);
- else P2[index] += sigma*((U[(j+1)*dimX+i] - U[index]) - V2[index]);
- }
+ if (index < num_total) {
+ /* symmetric boundary conditions (Neuman) */
+ if ((i >= 0) && (i < dimX-1)) P1[index] += sigma*((U[(i+1) + dimX*j] - U[index]) - V1[index]);
+ else P1[index] += sigma*(-V1[index]);
+ if ((j >= 0) && (j < dimY-1)) P2[index] += sigma*((U[i + dimX*(j+1)] - U[index]) - V2[index]);
+ else P2[index] += sigma*(-V2[index]);
+ }
return;
}
__global__ void ProjP_2D_kernel(float *P1, float *P2, int dimX, int dimY, float alpha1)
{
float grad_magn;
-
- int i = blockDim.x * blockIdx.x + threadIdx.x;
- int j = blockDim.y * blockIdx.y + threadIdx.y;
+ int num_total = dimX*dimY;
+
+ const int i = blockDim.x * blockIdx.x + threadIdx.x;
+ const int j = blockDim.y * blockIdx.y + threadIdx.y;
int index = i + dimX*j;
-
- if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY)) {
-
- grad_magn = sqrt(pow(P1[index],2) + pow(P2[index],2));
+
+ if (index < num_total) {
+ grad_magn = sqrtf(pow(P1[index],2) + pow(P2[index],2));
grad_magn = grad_magn/alpha1;
if (grad_magn > 1.0f) {
P1[index] /= grad_magn;
P2[index] /= grad_magn;
}
- }
+ }
return;
}
__global__ void DualQ_2D_kernel(float *V1, float *V2, float *Q1, float *Q2, float *Q3, int dimX, int dimY, float sigma)
{
float q1, q2, q11, q22;
-
- int i = blockDim.x * blockIdx.x + threadIdx.x;
- int j = blockDim.y * blockIdx.y + threadIdx.y;
+ int num_total = dimX*dimY;
+
+ const int i = blockDim.x * blockIdx.x + threadIdx.x;
+ const int j = blockDim.y * blockIdx.y + threadIdx.y;
- int index = i + dimX*j;
+ int index = i + dimX*j;
- if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY)) {
- /* symmetric boundary conditions (Neuman) */
- q1 = 0.0f; q11 = 0.0f; q2 = 0.0f; q22 = 0.0f;
- /* boundary conditions (Neuman) */
- if (i != dimX-1){
- q1 = V1[j*dimX+(i+1)] - V1[index];
- q11 = V2[j*dimX+(i+1)] - V2[index];
- }
- if (j != dimY-1) {
- q2 = V2[(j+1)*dimX+i] - V2[index];
- q22 = V1[(j+1)*dimX+i] - V1[index];
- }
+ if (index < num_total) {
+ q1 = 0.0f; q2 = 0.0f; q11 = 0.0f; q22 = 0.0f;
+
+ if ((i >= 0) && (i < dimX-1)) {
+ /* boundary conditions (Neuman) */
+ q1 = V1[(i+1) + dimX*j] - V1[index];
+ q11 = V2[(i+1) + dimX*j] - V2[index];
+ }
+ if ((j >= 0) && (j < dimY-1)) {
+ q2 = V2[i + dimX*(j+1)] - V2[index];
+ q22 = V1[i + dimX*(j+1)] - V1[index];
+ }
+
Q1[index] += sigma*(q1);
Q2[index] += sigma*(q2);
Q3[index] += sigma*(0.5f*(q11 + q22));
- }
+ }
return;
}
__global__ void ProjQ_2D_kernel(float *Q1, float *Q2, float *Q3, int dimX, int dimY, float alpha0)
{
float grad_magn;
+ int num_total = dimX*dimY;
- int i = blockDim.x * blockIdx.x + threadIdx.x;
- int j = blockDim.y * blockIdx.y + threadIdx.y;
-
- int index = i + dimX*j;
+ const int i = blockDim.x * blockIdx.x + threadIdx.x;
+ const int j = blockDim.y * blockIdx.y + threadIdx.y;
- if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY)) {
+ int index = i + dimX*j;
+
+ if (index < num_total) {
grad_magn = sqrt(pow(Q1[index],2) + pow(Q2[index],2) + 2*pow(Q3[index],2));
grad_magn = grad_magn/alpha0;
if (grad_magn > 1.0f) {
@@ -141,44 +145,75 @@ __global__ void ProjQ_2D_kernel(float *Q1, float *Q2, float *Q3, int dimX, int d
__global__ void DivProjP_2D_kernel(float *U, float *U0, float *P1, float *P2, int dimX, int dimY, float lambda, float tau)
{
float P_v1, P_v2, div;
-
- int i = blockDim.x * blockIdx.x + threadIdx.x;
- int j = blockDim.y * blockIdx.y + threadIdx.y;
+ int num_total = dimX*dimY;
+
+ const int i = blockDim.x * blockIdx.x + threadIdx.x;
+ const int j = blockDim.y * blockIdx.y + threadIdx.y;
int index = i + dimX*j;
+
+ if (index < num_total) {
+ P_v1 = 0.0f; P_v2 = 0.0f;
- if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY)) {
-
- if (i == 0) P_v1 = P1[index];
- else P_v1 = P1[index] - P1[j*dimX+(i-1)];
- if (j == 0) P_v2 = P2[index];
- else P_v2 = P2[index] - P2[(j-1)*dimX+i];
- div = P_v1 + P_v2;
- U[index] = (lambda*(U[index] + tau*div) + tau*U0[index])/(lambda + tau);
- }
+ if (i == 0) P_v1 = P1[index];
+ if (i == dimX-1) P_v1 = -P1[(i-1) + dimX*j];
+ if ((i > 0) && (i < dimX-1)) P_v1 = P1[index] - P1[(i-1) + dimX*j];
+
+ if (j == 0) P_v2 = P2[index];
+ if (j == dimY-1) P_v2 = -P2[i + dimX*(j-1)];
+ if ((j > 0) && (j < dimY-1)) P_v2 = P2[index] - P2[i + dimX*(j-1)];
+
+ div = P_v1 + P_v2;
+ U[index] = (lambda*(U[index] + tau*div) + tau*U0[index])/(lambda + tau);
+ }
return;
}
__global__ void UpdV_2D_kernel(float *V1, float *V2, float *P1, float *P2, float *Q1, float *Q2, float *Q3, int dimX, int dimY, float tau)
{
float q1, q3_x, q2, q3_y, div1, div2;
-
- int i = blockDim.x * blockIdx.x + threadIdx.x;
- int j = blockDim.y * blockIdx.y + threadIdx.y;
-
- int index = i + dimX*j;
+ int num_total = dimX*dimY;
+ int i1, j1;
+
+ const int i = blockDim.x * blockIdx.x + threadIdx.x;
+ const int j = blockDim.y * blockIdx.y + threadIdx.y;
- if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY)) {
- q2 = 0.0f; q3_y = 0.0f; q1 = 0.0f; q3_x = 0.0;
- /* boundary conditions (Neuman) */
- if (i != 0) {
- q1 = Q1[index] - Q1[j*dimX+(i-1)];
- q3_x = Q3[index] - Q3[j*dimX+(i-1)];
- }
- if (j != 0) {
- q2 = Q2[index] - Q2[(j-1)*dimX+i];
- q3_y = Q3[index] - Q3[(j-1)*dimX+i];
- }
+ int index = i + dimX*j;
+
+ if (index < num_total) {
+
+ i1 = (i-1) + dimX*j;
+ j1 = (i) + dimX*(j-1);
+
+ /* boundary conditions (Neuman) */
+ if ((i > 0) && (i < dimX-1)) {
+ q1 = Q1[index] - Q1[i1];
+ q3_x = Q3[index] - Q3[i1]; }
+ else if (i == 0) {
+ q1 = Q1[index];
+ q3_x = Q3[index]; }
+ else if (i == dimX-1) {
+ q1 = -Q1[i1];
+ q3_x = -Q3[i1]; }
+ else {
+ q1 = 0.0f;
+ q3_x = 0.0f;
+ }
+
+ if ((j > 0) && (j < dimY-1)) {
+ q2 = Q2[index] - Q2[j1];
+ q3_y = Q3[index] - Q3[j1]; }
+ else if (j == dimY-1) {
+ q2 = -Q2[j1];
+ q3_y = -Q3[j1]; }
+ else if (j == 0) {
+ q2 = Q2[index];
+ q3_y = Q3[index]; }
+ else {
+ q2 = 0.0f;
+ q3_y = 0.0f;
+ }
+
div1 = q1 + q3_y;
div2 = q3_x + q2;
V1[index] += tau*(P1[index] + div1);
@@ -243,21 +278,22 @@ __global__ void newU_kernel_ar2(float *V1, float *V2, float *V1_old, float *V2_o
__global__ void DualP_3D_kernel(float *U, float *V1, float *V2, float *V3, float *P1, float *P2, float *P3, int dimX, int dimY, int dimZ, float sigma)
{
int index;
- int i = blockDim.x * blockIdx.x + threadIdx.x;
- int j = blockDim.y * blockIdx.y + threadIdx.y;
- int k = blockDim.z * blockIdx.z + threadIdx.z;
+ const int i = blockDim.x * blockIdx.x + threadIdx.x;
+ const int j = blockDim.y * blockIdx.y + threadIdx.y;
+ const int k = blockDim.z * blockIdx.z + threadIdx.z;
+
+ int num_total = dimX*dimY*dimZ;
- if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) {
-
- index = (dimX*dimY)*k + j*dimX+i;
- /* symmetric boundary conditions (Neuman) */
- if (i == dimX-1) P1[index] += sigma*((U[(dimX*dimY)*k + j*dimX+(i-1)] - U[index]) - V1[index]);
- else P1[index] += sigma*((U[(dimX*dimY)*k + j*dimX+(i+1)] - U[index]) - V1[index]);
- if (j == dimY-1) P2[index] += sigma*((U[(dimX*dimY)*k + (j-1)*dimX+i] - U[index]) - V2[index]);
- else P2[index] += sigma*((U[(dimX*dimY)*k + (j+1)*dimX+i] - U[index]) - V2[index]);
- if (k == dimZ-1) P3[index] += sigma*((U[(dimX*dimY)*(k-1) + j*dimX+i] - U[index]) - V3[index]);
- else P3[index] += sigma*((U[(dimX*dimY)*(k+1) + j*dimX+i] - U[index]) - V3[index]);
- }
+ index = (dimX*dimY)*k + i*dimX+j;
+ if (index < num_total) {
+ /* symmetric boundary conditions (Neuman) */
+ if ((i >= 0) && (i < dimX-1)) P1[index] += sigma*((U[(dimX*dimY)*k + (i+1)*dimX+j] - U[index]) - V1[index]);
+ else P1[index] += sigma*(-V1[index]);
+ if ((j >= 0) && (j < dimY-1)) P2[index] += sigma*((U[(dimX*dimY)*k + i*dimX+(j+1)] - U[index]) - V2[index]);
+ else P2[index] += sigma*(-V2[index]);
+ if ((k >= 0) && (k < dimZ-1)) P3[index] += sigma*((U[(dimX*dimY)*(k+1) + i*dimX+(j)] - U[index]) - V3[index]);
+ else P3[index] += sigma*(-V3[index]);
+ }
return;
}
@@ -265,14 +301,14 @@ __global__ void ProjP_3D_kernel(float *P1, float *P2, float *P3, int dimX, int d
{
float grad_magn;
int index;
+ int num_total = dimX*dimY*dimZ;
int i = blockDim.x * blockIdx.x + threadIdx.x;
int j = blockDim.y * blockIdx.y + threadIdx.y;
int k = blockDim.z * blockIdx.z + threadIdx.z;
- if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) {
- index = (dimX*dimY)*k + j*dimX+i;
-
+ index = (dimX*dimY)*k + i*dimX+j;
+ if (index < num_total) {
grad_magn = (sqrtf(pow(P1[index],2) + pow(P2[index],2) + pow(P3[index],2)))/alpha1;
if (grad_magn > 1.0f) {
P1[index] /= grad_magn;
@@ -288,55 +324,57 @@ __global__ void DualQ_3D_kernel(float *V1, float *V2, float *V3, float *Q1, floa
int index;
float q1, q2, q3, q11, q22, q33, q44, q55, q66;
+ int num_total = dimX*dimY*dimZ;
+
int i = blockDim.x * blockIdx.x + threadIdx.x;
int j = blockDim.y * blockIdx.y + threadIdx.y;
int k = blockDim.z * blockIdx.z + threadIdx.z;
- if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) {
-
- index = (dimX*dimY)*k + j*dimX+i;
- q1 = 0.0f; q11 = 0.0f; q33 = 0.0f; q2 = 0.0f; q22 = 0.0f; q55 = 0.0f; q3 = 0.0f; q44 = 0.0f; q66 = 0.0f;
- /* symmetric boundary conditions (Neuman) */
- if (i != dimX-1){
- q1 = V1[(dimX*dimY)*k + j*dimX+(i+1)] - V1[index];
- q11 = V2[(dimX*dimY)*k + j*dimX+(i+1)] - V2[index];
- q33 = V3[(dimX*dimY)*k + j*dimX+(i+1)] - V3[index];
- }
- if (j != dimY-1) {
- q2 = V2[(dimX*dimY)*k + (j+1)*dimX+i] - V2[index];
- q22 = V1[(dimX*dimY)*k + (j+1)*dimX+i] - V1[index];
- q55 = V3[(dimX*dimY)*k + (j+1)*dimX+i] - V3[index];
- }
- if (k != dimZ-1) {
- q3 = V3[(dimX*dimY)*(k+1) + j*dimX+i] - V3[index];
- q44 = V1[(dimX*dimY)*(k+1) + j*dimX+i] - V1[index];
- q66 = V2[(dimX*dimY)*(k+1) + j*dimX+i] - V2[index];
- }
-
+ index = (dimX*dimY)*k + i*dimX+j;
+ int i1 = (dimX*dimY)*k + (i+1)*dimX+j;
+ int j1 = (dimX*dimY)*k + (i)*dimX+(j+1);
+ int k1 = (dimX*dimY)*(k+1) + (i)*dimX+(j);
+
+ if (index < num_total) {
+ q1 = 0.0f; q11 = 0.0f; q33 = 0.0f; q2 = 0.0f; q22 = 0.0f; q55 = 0.0f; q3 = 0.0f; q44 = 0.0f; q66 = 0.0f;
+
+ /* boundary conditions (Neuman) */
+ if ((i >= 0) && (i < dimX-1)) {
+ q1 = V1[i1] - V1[index];
+ q11 = V2[i1] - V2[index];
+ q33 = V3[i1] - V3[index]; }
+ if ((j >= 0) && (j < dimY-1)) {
+ q2 = V2[j1] - V2[index];
+ q22 = V1[j1] - V1[index];
+ q55 = V3[j1] - V3[index]; }
+ if ((k >= 0) && (k < dimZ-1)) {
+ q3 = V3[k1] - V3[index];
+ q44 = V1[k1] - V1[index];
+ q66 = V2[k1] - V2[index]; }
+
Q1[index] += sigma*(q1); /*Q11*/
Q2[index] += sigma*(q2); /*Q22*/
Q3[index] += sigma*(q3); /*Q33*/
Q4[index] += sigma*(0.5f*(q11 + q22)); /* Q21 / Q12 */
Q5[index] += sigma*(0.5f*(q33 + q44)); /* Q31 / Q13 */
Q6[index] += sigma*(0.5f*(q55 + q66)); /* Q32 / Q23 */
- }
+ }
return;
}
-
__global__ void ProjQ_3D_kernel(float *Q1, float *Q2, float *Q3, float *Q4, float *Q5, float *Q6, int dimX, int dimY, int dimZ, float alpha0)
{
float grad_magn;
int index;
-
+ int num_total = dimX*dimY*dimZ;
+
int i = blockDim.x * blockIdx.x + threadIdx.x;
int j = blockDim.y * blockIdx.y + threadIdx.y;
- int k = blockDim.z * blockIdx.z + threadIdx.z;
+ int k = blockDim.z * blockIdx.z + threadIdx.z;
+
+ index = (dimX*dimY)*k + i*dimX+j;
- if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) {
-
- index = (dimX*dimY)*k + j*dimX+i;
-
+ if (index < num_total) {
grad_magn = sqrtf(pow(Q1[index],2) + pow(Q2[index],2) + pow(Q3[index],2) + 2.0f*pow(Q4[index],2) + 2.0f*pow(Q5[index],2) + 2.0f*pow(Q6[index],2));
grad_magn = grad_magn/alpha0;
if (grad_magn > 1.0f) {
@@ -354,21 +392,33 @@ __global__ void DivProjP_3D_kernel(float *U, float *U0, float *P1, float *P2, fl
{
float P_v1, P_v2, P_v3, div;
int index;
-
+ int num_total = dimX*dimY*dimZ;
+
+
int i = blockDim.x * blockIdx.x + threadIdx.x;
int j = blockDim.y * blockIdx.y + threadIdx.y;
- int k = blockDim.z * blockIdx.z + threadIdx.z;
+ int k = blockDim.z * blockIdx.z + threadIdx.z;
+
+ index = (dimX*dimY)*k + i*dimX+j;
+ int i1 = (dimX*dimY)*k + (i-1)*dimX+j;
+ int j1 = (dimX*dimY)*k + (i)*dimX+(j-1);
+ int k1 = (dimX*dimY)*(k-1) + (i)*dimX+(j);
+
+ if (index < num_total) {
+ P_v1 = 0.0f; P_v2 = 0.0f; P_v3 = 0.0f;
- if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) {
-
- index = (dimX*dimY)*k + j*dimX+i;
-
if (i == 0) P_v1 = P1[index];
- else P_v1 = P1[index] - P1[(dimX*dimY)*k + j*dimX+(i-1)];
+ if (i == dimX-1) P_v1 = -P1[i1];
+ if ((i > 0) && (i < dimX-1)) P_v1 = P1[index] - P1[i1];
+
if (j == 0) P_v2 = P2[index];
- else P_v2 = P2[index] - P2[(dimX*dimY)*k + (j-1)*dimX+i];
+ if (j == dimY-1) P_v2 = -P2[j1];
+ if ((j > 0) && (j < dimY-1)) P_v2 = P2[index] - P2[j1];
+
if (k == 0) P_v3 = P3[index];
- else P_v3 = P3[index] - P3[(dimX*dimY)*(k-1) + (j)*dimX+i];
+ if (k == dimZ-1) P_v3 = -P3[k1];
+ if ((k > 0) && (k < dimZ-1)) P_v3 = P3[index] - P3[k1];
+
div = P_v1 + P_v2 + P_v3;
U[index] = (lambda*(U[index] + tau*div) + tau*U0[index])/(lambda + tau);
@@ -379,33 +429,73 @@ __global__ void UpdV_3D_kernel(float *V1, float *V2, float *V3, float *P1, float
{
float q1, q4x, q5x, q2, q4y, q6y, q6z, q5z, q3, div1, div2, div3;
int index;
+ int num_total = dimX*dimY*dimZ;
int i = blockDim.x * blockIdx.x + threadIdx.x;
int j = blockDim.y * blockIdx.y + threadIdx.y;
int k = blockDim.z * blockIdx.z + threadIdx.z;
- if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) {
-
- index = (dimX*dimY)*k + j*dimX+i;
+ index = (dimX*dimY)*k + i*dimX+j;
+ int i1 = (dimX*dimY)*k + (i-1)*dimX+j;
+ int j1 = (dimX*dimY)*k + (i)*dimX+(j-1);
+ int k1 = (dimX*dimY)*(k-1) + (i)*dimX+(j);
- q1 = 0.0f; q4x= 0.0f; q5x= 0.0f; q2= 0.0f; q4y= 0.0f; q6y= 0.0f; q6z= 0.0f; q5z= 0.0f; q3= 0.0f;
- /* Q1 - Q11, Q2 - Q22, Q3 - Q33, Q4 - Q21/Q12, Q5 - Q31/Q13, Q6 - Q32/Q23*/
- /* symmetric boundary conditions (Neuman) */
- if (i != 0) {
- q1 = Q1[index] - Q1[(dimX*dimY)*k + j*dimX+(i-1)];
- q4x = Q4[index] - Q4[(dimX*dimY)*k + j*dimX+(i-1)];
- q5x = Q5[index] - Q5[(dimX*dimY)*k + j*dimX+(i-1)];
- }
- if (j != 0) {
- q2 = Q2[index] - Q2[(dimX*dimY)*k + (j-1)*dimX+i];
- q4y = Q4[index] - Q4[(dimX*dimY)*k + (j-1)*dimX+i];
- q6y = Q6[index] - Q6[(dimX*dimY)*k + (j-1)*dimX+i];
- }
- if (k != 0) {
- q6z = Q6[index] - Q6[(dimX*dimY)*(k-1) + (j)*dimX+i];
- q5z = Q5[index] - Q5[(dimX*dimY)*(k-1) + (j)*dimX+i];
- q3 = Q3[index] - Q3[(dimX*dimY)*(k-1) + (j)*dimX+i];
- }
+ /* Q1 - Q11, Q2 - Q22, Q3 - Q33, Q4 - Q21/Q12, Q5 - Q31/Q13, Q6 - Q32/Q23*/
+ if (index < num_total) {
+
+ /* boundary conditions (Neuman) */
+ if ((i > 0) && (i < dimX-1)) {
+ q1 = Q1[index] - Q1[i1];
+ q4x = Q4[index] - Q4[i1];
+ q5x = Q5[index] - Q5[i1]; }
+ else if (i == 0) {
+ q1 = Q1[index];
+ q4x = Q4[index];
+ q5x = Q5[index]; }
+ else if (i == dimX-1) {
+ q1 = -Q1[i1];
+ q4x = -Q4[i1];
+ q5x = -Q5[i1]; }
+ else {
+ q1 = 0.0f;
+ q4x = 0.0f;
+ q5x = 0.0f; }
+
+ if ((j > 0) && (j < dimY-1)) {
+ q2 = Q2[index] - Q2[j1];
+ q4y = Q4[index] - Q4[j1];
+ q6y = Q6[index] - Q6[j1]; }
+ else if (j == dimY-1) {
+ q2 = -Q2[j1];
+ q4y = -Q4[j1];
+ q6y = -Q6[j1]; }
+ else if (j == 0) {
+ q2 = Q2[index];
+ q4y = Q4[index];
+ q6y = Q6[index]; }
+ else {
+ q2 = 0.0f;
+ q4y = 0.0f;
+ q6y = 0.0f;
+ }
+
+ if ((k > 0) && (k < dimZ-1)) {
+ q6z = Q6[index] - Q6[k1];
+ q5z = Q5[index] - Q5[k1];
+ q3 = Q3[index] - Q3[k1]; }
+ else if (k == dimZ-1) {
+ q6z = -Q6[k1];
+ q5z = -Q5[k1];
+ q3 = -Q3[k1]; }
+ else if (k == 0) {
+ q6z = Q6[index];
+ q5z = Q5[index];
+ q3 = Q3[index]; }
+ else {
+ q6z = 0.0f;
+ q5z = 0.0f;
+ q3 = 0.0f; }
+
div1 = q1 + q4y + q5z;
div2 = q4x + q2 + q6z;
div3 = q5x + q6y + q3;
@@ -510,7 +600,14 @@ extern "C" int TGV_GPU_main(float *U0, float *U, float lambda, float alpha1, flo
CHECK(cudaMalloc((void**)&V2_old,dimTotal*sizeof(float)));
CHECK(cudaMemcpy(d_U0,U0,dimTotal*sizeof(float),cudaMemcpyHostToDevice));
- CHECK(cudaMemcpy(d_U,U0,dimTotal*sizeof(float),cudaMemcpyHostToDevice));
+ CHECK(cudaMemcpy(d_U,U0,dimTotal*sizeof(float),cudaMemcpyHostToDevice));
+ cudaMemset(P1, 0, dimTotal*sizeof(float));
+ cudaMemset(P2, 0, dimTotal*sizeof(float));
+ cudaMemset(Q1, 0, dimTotal*sizeof(float));
+ cudaMemset(Q2, 0, dimTotal*sizeof(float));
+ cudaMemset(Q3, 0, dimTotal*sizeof(float));
+ cudaMemset(V1, 0, dimTotal*sizeof(float));
+ cudaMemset(V2, 0, dimTotal*sizeof(float));
if (dimZ == 1) {
/*2D case */
@@ -521,14 +618,14 @@ extern "C" int TGV_GPU_main(float *U0, float *U, float lambda, float alpha1, flo
/* Calculate Dual Variable P */
DualP_2D_kernel<<<dimGrid,dimBlock>>>(d_U, V1, V2, P1, P2, dimX, dimY, sigma);
- CHECK(cudaDeviceSynchronize());
+ CHECK(cudaDeviceSynchronize());
/*Projection onto convex set for P*/
ProjP_2D_kernel<<<dimGrid,dimBlock>>>(P1, P2, dimX, dimY, alpha1);
CHECK(cudaDeviceSynchronize());
/* Calculate Dual Variable Q */
DualQ_2D_kernel<<<dimGrid,dimBlock>>>(V1, V2, Q1, Q2, Q3, dimX, dimY, sigma);
CHECK(cudaDeviceSynchronize());
- /*Projection onto convex set for Q*/
+ /*Projection onto convex set for Q*/
ProjQ_2D_kernel<<<dimGrid,dimBlock>>>(Q1, Q2, Q3, dimX, dimY, alpha0);
CHECK(cudaDeviceSynchronize());
/*saving U into U_old*/
@@ -549,7 +646,7 @@ extern "C" int TGV_GPU_main(float *U0, float *U, float lambda, float alpha1, flo
/*get new V*/
newU_kernel_ar2<<<dimGrid,dimBlock>>>(V1, V2, V1_old, V2_old, dimX, dimY, dimTotal);
CHECK(cudaDeviceSynchronize());
- }
+ }
}
else {
/*3D case */
@@ -565,6 +662,12 @@ extern "C" int TGV_GPU_main(float *U0, float *U, float lambda, float alpha1, flo
CHECK(cudaMalloc((void**)&V3,dimTotal*sizeof(float)));
CHECK(cudaMalloc((void**)&V3_old,dimTotal*sizeof(float)));
+ cudaMemset(Q4, 0.0f, dimTotal*sizeof(float));
+ cudaMemset(Q5, 0.0f, dimTotal*sizeof(float));
+ cudaMemset(Q6, 0.0f, dimTotal*sizeof(float));
+ cudaMemset(P3, 0.0f, dimTotal*sizeof(float));
+ cudaMemset(V3, 0.0f, dimTotal*sizeof(float));
+
for(int n=0; n < iterationsNumb; n++) {
/* Calculate Dual Variable P */
diff --git a/Readme.md b/Readme.md
index ebd4d20..aeefffd 100644
--- a/Readme.md
+++ b/Readme.md
@@ -93,7 +93,7 @@ conda install ccpi-regulariser -c ccpi -c conda-forge
#### Python (conda-build)
```
- export CIL_VERSION=0.10.4
+ export CIL_VERSION=19.02
conda build Wrappers/Python/conda-recipe --numpy 1.12 --python 3.5
conda install ccpi-regulariser=${CIL_VERSION} --use-local --force
cd demos/
diff --git a/Wrappers/Matlab/demos/demoMatlab_3Ddenoise.m b/Wrappers/Matlab/demos/demoMatlab_3Ddenoise.m
index 0c331a4..ac8e1ba 100644
--- a/Wrappers/Matlab/demos/demoMatlab_3Ddenoise.m
+++ b/Wrappers/Matlab/demos/demoMatlab_3Ddenoise.m
@@ -8,7 +8,7 @@ addpath(Path2);
addpath(Path3);
N = 512;
-slices = 7;
+slices = 15;
vol3D = zeros(N,N,slices, 'single');
Ideal3D = zeros(N,N,slices, 'single');
Im = double(imread('lena_gray_512.tif'))/255; % loading image
@@ -17,9 +17,7 @@ vol3D(:,:,i) = Im + .05*randn(size(Im));
Ideal3D(:,:,i) = Im;
end
vol3D(vol3D < 0) = 0;
-figure; imshow(vol3D(:,:,15), [0 1]); title('Noisy image');
-
-
+figure; imshow(vol3D(:,:,7), [0 1]); title('Noisy image');
lambda_reg = 0.03; % regularsation parameter for all methods
%%
fprintf('Denoise a volume using the ROF-TV model (CPU) \n');
@@ -143,6 +141,16 @@ rmseTGV = RMSE(Ideal3D(:),u_tgv(:));
fprintf('%s %f \n', 'RMSE error for TGV is:', rmseTGV);
figure; imshow(u_tgv(:,:,3), [0 1]); title('TGV denoised volume (CPU)');
%%
+% fprintf('Denoise using the TGV model (GPU) \n');
+% lambda_TGV = 0.03; % regularisation parameter
+% alpha1 = 1.0; % parameter to control the first-order term
+% alpha0 = 2.0; % parameter to control the second-order term
+% iter_TGV = 500; % number of Primal-Dual iterations for TGV
+% tic; u_tgv_gpu = TGV_GPU(single(vol3D), lambda_TGV, alpha1, alpha0, iter_TGV); toc;
+% rmseTGV = RMSE(Ideal3D(:),u_tgv_gpu(:));
+% fprintf('%s %f \n', 'RMSE error for TGV is:', rmseTGV);
+% figure; imshow(u_tgv_gpu(:,:,3), [0 1]); title('TGV denoised volume (GPU)');
+%%
%>>>>>>>>>>>>>> MULTI-CHANNEL priors <<<<<<<<<<<<<<< %
fprintf('Denoise a volume using the FGP-dTV model (CPU) \n');
diff --git a/Wrappers/Matlab/demos/demoMatlab_denoise.m b/Wrappers/Matlab/demos/demoMatlab_denoise.m
index 14d3096..62e5834 100644
--- a/Wrappers/Matlab/demos/demoMatlab_denoise.m
+++ b/Wrappers/Matlab/demos/demoMatlab_denoise.m
@@ -5,7 +5,9 @@ fsep = '/';
Path1 = sprintf(['..' fsep 'mex_compile' fsep 'installed'], 1i);
Path2 = sprintf(['..' fsep '..' fsep '..' fsep 'data' fsep], 1i);
Path3 = sprintf(['..' fsep 'supp'], 1i);
-addpath(Path1); addpath(Path2); addpath(Path3);
+addpath(Path1);
+addpath(Path2);
+addpath(Path3);
Im = double(imread('lena_gray_512.tif'))/255; % loading image
u0 = Im + .05*randn(size(Im)); u0(u0 < 0) = 0;
@@ -29,7 +31,7 @@ figure; imshow(u_rof, [0 1]); title('ROF-TV denoised image (CPU)');
% figure; imshow(u_rofG, [0 1]); title('ROF-TV denoised image (GPU)');
%%
fprintf('Denoise using the FGP-TV model (CPU) \n');
-iter_fgp = 1000; % number of FGP iterations
+iter_fgp = 1300; % number of FGP iterations
epsil_tol = 1.0e-06; % tolerance
tic; u_fgp = FGP_TV(single(u0), lambda_reg, iter_fgp, epsil_tol); toc;
energyfunc_val_fgp = TV_energy(single(u_fgp),single(u0),lambda_reg, 1); % get energy function value
@@ -39,8 +41,8 @@ figure; imshow(u_fgp, [0 1]); title('FGP-TV denoised image (CPU)');
%%
% fprintf('Denoise using the FGP-TV model (GPU) \n');
-% iter_fgp = 1000; % number of FGP iterations
-% epsil_tol = 1.0e-05; % tolerance
+% iter_fgp = 1300; % number of FGP iterations
+% epsil_tol = 1.0e-06; % tolerance
% tic; u_fgpG = FGP_TV_GPU(single(u0), lambda_reg, iter_fgp, epsil_tol); toc;
% figure; imshow(u_fgpG, [0 1]); title('FGP-TV denoised image (GPU)');
%%
@@ -63,17 +65,17 @@ fprintf('Denoise using the TGV model (CPU) \n');
lambda_TGV = 0.045; % regularisation parameter
alpha1 = 1.0; % parameter to control the first-order term
alpha0 = 2.0; % parameter to control the second-order term
-iter_TGV = 2000; % number of Primal-Dual iterations for TGV
+iter_TGV = 1500; % number of Primal-Dual iterations for TGV
tic; u_tgv = TGV(single(u0), lambda_TGV, alpha1, alpha0, iter_TGV); toc;
rmseTGV = (RMSE(u_tgv(:),Im(:)));
fprintf('%s %f \n', 'RMSE error for TGV is:', rmseTGV);
figure; imshow(u_tgv, [0 1]); title('TGV denoised image (CPU)');
-%%
+
% fprintf('Denoise using the TGV model (GPU) \n');
% lambda_TGV = 0.045; % regularisation parameter
% alpha1 = 1.0; % parameter to control the first-order term
% alpha0 = 2.0; % parameter to control the second-order term
-% iter_TGV = 2000; % number of Primal-Dual iterations for TGV
+% iter_TGV = 1500; % number of Primal-Dual iterations for TGV
% tic; u_tgv_gpu = TGV_GPU(single(u0), lambda_TGV, alpha1, alpha0, iter_TGV); toc;
% rmseTGV_gpu = (RMSE(u_tgv_gpu(:),Im(:)));
% fprintf('%s %f \n', 'RMSE error for TGV is:', rmseTGV_gpu);
diff --git a/Wrappers/Matlab/mex_compile/regularisers_GPU/TGV_GPU.cpp b/Wrappers/Matlab/mex_compile/regularisers_GPU/TGV_GPU.cpp
index edb551d..1173282 100644
--- a/Wrappers/Matlab/mex_compile/regularisers_GPU/TGV_GPU.cpp
+++ b/Wrappers/Matlab/mex_compile/regularisers_GPU/TGV_GPU.cpp
@@ -21,18 +21,18 @@ limitations under the License.
#include "TGV_GPU_core.h"
/* CUDA implementation of Primal-Dual denoising method for
- * Total Generilized Variation (TGV)-L2 model [1] (2D case only)
+ * Total Generilized Variation (TGV)-L2 model [1] (2D/3D)
*
* Input Parameters:
- * 1. Noisy image (2D) (required)
- * 2. lambda - regularisation parameter (required)
- * 3. parameter to control the first-order term (alpha1) (default - 1)
- * 4. parameter to control the second-order term (alpha0) (default - 0.5)
- * 5. Number of Chambolle-Pock (Primal-Dual) iterations (default is 300)
+ * 1. Noisy image/volume (2D/3D)
+ * 2. lambda - regularisation parameter
+ * 3. parameter to control the first-order term (alpha1)
+ * 4. parameter to control the second-order term (alpha0)
+ * 5. Number of Chambolle-Pock (Primal-Dual) iterations
* 6. Lipshitz constant (default is 12)
*
* Output:
- * Filtered/regulariaed image
+ * Filtered/regularised image
*
* References:
* [1] K. Bredies "Total Generalized Variation"
@@ -44,7 +44,7 @@ void mexFunction(
{
int number_of_dims, iter;
- mwSize dimX, dimY;
+ mwSize dimX, dimY, dimZ;
const mwSize *dim_array;
float *Input, *Output=NULL, lambda, alpha0, alpha1, L2;
@@ -57,8 +57,8 @@ void mexFunction(
Input = (float *) mxGetData(prhs[0]); /*noisy image (2D) */
lambda = (float) mxGetScalar(prhs[1]); /* regularisation parameter */
alpha1 = 1.0f; /* parameter to control the first-order term */
- alpha0 = 0.5f; /* parameter to control the second-order term */
- iter = 300; /* Iterations number */
+ alpha0 = 2.0f; /* parameter to control the second-order term */
+ iter = 500; /* Iterations number */
L2 = 12.0f; /* Lipshitz constant */
if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); }
@@ -68,12 +68,14 @@ void mexFunction(
if (nrhs == 6) L2 = (float) mxGetScalar(prhs[5]); /* Lipshitz constant */
/*Handling Matlab output data*/
- dimX = dim_array[0]; dimY = dim_array[1];
+ dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2];
if (number_of_dims == 2) {
- Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
- /* running the function */
- TGV_GPU_main(Input, Output, lambda, alpha1, alpha0, iter, L2, dimX, dimY);
+ dimZ = 1; /*2D case*/
+ Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
}
- if (number_of_dims == 3) {mexErrMsgTxt("Only 2D images accepted");}
+ if (number_of_dims == 3) Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+
+ /* running the function */
+ TGV_GPU_main(Input, Output, lambda, alpha1, alpha0, iter, L2, dimX, dimY, dimZ);
}
diff --git a/run.sh b/run.sh
index a8e5555..332d660 100644
--- a/run.sh
+++ b/run.sh
@@ -1,19 +1,22 @@
#!/bin/bash
echo "Building CCPi-regularisation Toolkit using CMake"
-# rm -r build
+rm -r build
# Requires Cython, install it first:
# pip install cython
-# mkdir build
+mkdir build
cd build/
make clean
-# install Python modules only without CUDA
-cmake ../ -DBUILD_PYTHON_WRAPPER=ON -DBUILD_MATLAB_WRAPPER=OFF -DBUILD_CUDA=OFF -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=./install
-# install Python modules only with CUDA
+# install Python modules without CUDA
+#cmake ../ -DBUILD_PYTHON_WRAPPER=ON -DBUILD_MATLAB_WRAPPER=OFF -DBUILD_CUDA=OFF -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=./install
+# install Python modules with CUDA
# cmake ../ -DBUILD_PYTHON_WRAPPER=ON -DBUILD_MATLAB_WRAPPER=OFF -DBUILD_CUDA=ON -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=./install
+# install Matlab modules with CUDA
+cmake ../ -DBUILD_PYTHON_WRAPPER=OFF -DMatlab_ROOT_DIR=/dls_sw/apps/matlab/r2014a/ -DBUILD_MATLAB_WRAPPER=ON -DBUILD_CUDA=ON -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=./install
make install
# cp install/lib/libcilreg.so install/python/ccpi/filters
-cd install/python
-export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:../lib
+#cd install/python
+#export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:../lib
# spyder
# one can also run Matlab in Linux as:
-# PATH="/path/to/mex/:$PATH" LD_LIBRARY_PATH="/path/to/library:$LD_LIBRARY_PATH" matlab
+#PATH="/path/to/mex/:$PATH" LD_LIBRARY_PATH="/path/to/library:$LD_LIBRARY_PATH" matlab
+PATH="/home/kjy41806/Documents/SOFT/CCPi-Regularisation-Toolkit/build/install/matlab/:$PATH" LD_LIBRARY_PATH="/home/kjy41806/Documents/SOFT/CCPi-Regularisation-Toolkit/build/install/lib:$LD_LIBRARY_PATH" matlab