some TGV updates and demos

author: dkazanc <dkazanc@hotmail.com> 2019-02-25 17:13:28 +0000
committer: dkazanc <dkazanc@hotmail.com> 2019-02-25 17:13:28 +0000
commit: 0587f96cafac66d9ee04005a80b43514c6d2a753 (patch)
tree: 59481436cee835066241d74cf595a77bb2f960a4
parent: dc5a92771dcf9bfd262ba34b0fc8a1c2df40897d (diff)
download: regularization-0587f96cafac66d9ee04005a80b43514c6d2a753.tar.gz
regularization-0587f96cafac66d9ee04005a80b43514c6d2a753.tar.bz2
regularization-0587f96cafac66d9ee04005a80b43514c6d2a753.tar.xz
regularization-0587f96cafac66d9ee04005a80b43514c6d2a753.zip
4 files changed, 261 insertions, 158 deletions
diff --git a/Core/regularisers_GPU/TGV_GPU_core.cu b/Core/regularisers_GPU/TGV_GPU_core.cu
index e4abf72..9b43c21 100644
--- a/Core/regularisers_GPU/TGV_GPU_core.cu
+++ b/Core/regularisers_GPU/TGV_GPU_core.cu
@@ -38,28 +38,27 @@ limitations under the License.
  * [1] K. Bredies "Total Generalized Variation"
  */
     
-#define BLKXSIZE 8
-#define BLKYSIZE 8
-#define BLKZSIZE 8    
-    
+   
 #define BLKXSIZE2D 8
 #define BLKYSIZE2D 8
-#define EPS 1.0e-7
-#define idivup(a, b) ( ((a)%(b) != 0) ? (a)/(b)+1 : (a)/(b) )
 
+#define BLKXSIZE 8
+#define BLKYSIZE 8
+#define BLKZSIZE 8
+
+#define idivup(a, b) ( ((a)%(b) != 0) ? (a)/(b)+1 : (a)/(b) )
 
 /********************************************************************/
 /***************************2D Functions*****************************/
 /********************************************************************/
-__global__ void DualP_2D_kernel(float *U, float *V1, float *V2, float *P1, float *P2, int dimX, int dimY, float sigma)
+__global__ void DualP_2D_kernel(float *U, float *V1, float *V2, float *P1, float *P2, long dimX, long dimY, long num_total, float sigma)
 {    
-	int num_total = dimX*dimY;
-	const int i = blockDim.x * blockIdx.x + threadIdx.x;
-        const int j = blockDim.y * blockIdx.y + threadIdx.y;
+	const long i = blockDim.x * blockIdx.x + threadIdx.x;
+        const long j = blockDim.y * blockIdx.y + threadIdx.y;
         
-        int index = i + dimX*j;       
+        long index = i + (dimX)*j;       
         
-        if (index < num_total) {  
+        if (index < num_total) {     
         /* symmetric boundary conditions (Neuman) */            
         if ((i >= 0) && (i < dimX-1))  P1[index] += sigma*((U[(i+1) + dimX*j] - U[index])  - V1[index]); 
         else P1[index] += sigma*(-V1[index]); 
@@ -69,17 +68,16 @@ __global__ void DualP_2D_kernel(float *U, float *V1, float *V2, float *P1, float
 	return;
 } 
 
-__global__ void ProjP_2D_kernel(float *P1, float *P2, int dimX, int dimY, float alpha1)
+__global__ void ProjP_2D_kernel(float *P1, float *P2, long dimX, long dimY, long num_total, float alpha1)
 {
    	float grad_magn;
-	int num_total = dimX*dimY;
 	
-	const int i = blockDim.x * blockIdx.x + threadIdx.x;
-        const int j = blockDim.y * blockIdx.y + threadIdx.y;
+	const long i = blockDim.x * blockIdx.x + threadIdx.x;
+        const long j = blockDim.y * blockIdx.y + threadIdx.y;
         
-        int index = i + dimX*j;
+        long index = i + (dimX)*j;    
                 
-        if (index < num_total) {            
+        if (index < num_total) {             
             grad_magn = sqrtf(pow(P1[index],2) + pow(P2[index],2));
             grad_magn = grad_magn/alpha1;
             if (grad_magn > 1.0f) {
@@ -90,17 +88,15 @@ __global__ void ProjP_2D_kernel(float *P1, float *P2, int dimX, int dimY, float
 	return;
 } 
 
-__global__ void DualQ_2D_kernel(float *V1, float *V2, float *Q1, float *Q2, float *Q3, int dimX, int dimY, float sigma)
+__global__ void DualQ_2D_kernel(float *V1, float *V2, float *Q1, float *Q2, float *Q3, long dimX, long dimY, long num_total, float sigma)
 {
         float q1, q2, q11, q22;
-	int num_total = dimX*dimY;
-	
-	const int i = blockDim.x * blockIdx.x + threadIdx.x;
-        const int j = blockDim.y * blockIdx.y + threadIdx.y;
+	const long i = blockDim.x * blockIdx.x + threadIdx.x;
+        const long j = blockDim.y * blockIdx.y + threadIdx.y;
         
-        int index = i + dimX*j;      
+        long index = i + (dimX)*j;          
         
-        if (index < num_total) {
+        if (index < num_total) {     
          q1 = 0.0f; q2  = 0.0f; q11  = 0.0f; q22  = 0.0f;
          
 	        if ((i >= 0) && (i < dimX-1))  {            
@@ -120,17 +116,15 @@ __global__ void DualQ_2D_kernel(float *V1, float *V2, float *Q1, float *Q2, floa
 	return;
 } 
 
-__global__ void ProjQ_2D_kernel(float *Q1, float *Q2, float *Q3, int dimX, int dimY, float alpha0)
+__global__ void ProjQ_2D_kernel(float *Q1, float *Q2, float *Q3, long dimX, long dimY, long num_total, float alpha0)
 {
 	float grad_magn;
-        int num_total = dimX*dimY;
-
-	const int i = blockDim.x * blockIdx.x + threadIdx.x;
-        const int j = blockDim.y * blockIdx.y + threadIdx.y;
+	const long i = blockDim.x * blockIdx.x + threadIdx.x;
+        const long j = blockDim.y * blockIdx.y + threadIdx.y;
         
-        int index = i + dimX*j;        
+        long index = i + (dimX)*j;     
        
-        if (index < num_total) {
+        if (index < num_total) {     
             grad_magn = sqrt(pow(Q1[index],2) + pow(Q2[index],2) + 2*pow(Q3[index],2));
             grad_magn = grad_magn/alpha0;
             if (grad_magn > 1.0f) {
@@ -142,26 +136,27 @@ __global__ void ProjQ_2D_kernel(float *Q1, float *Q2, float *Q3, int dimX, int d
 	return;
 } 
 
-__global__ void DivProjP_2D_kernel(float *U, float *U0, float *P1, float *P2, int dimX, int dimY, float lambda, float tau)
+__global__ void DivProjP_2D_kernel(float *U, float *U0, float *P1, float *P2, long dimX, long dimY, long num_total, float lambda, float tau)
 {
 	float P_v1, P_v2, div;
-	int num_total = dimX*dimY;
-	
-	const int i = blockDim.x * blockIdx.x + threadIdx.x;
-        const int j = blockDim.y * blockIdx.y + threadIdx.y;       
+	const long i = blockDim.x * blockIdx.x + threadIdx.x;
+        const long j = blockDim.y * blockIdx.y + threadIdx.y;
         
-        int index = i + dimX*j;
+        long index = i + (dimX)*j;    
 
-        if (index < num_total) { 
+        if (index < num_total) {     
 	P_v1 = 0.0f; P_v2 = 0.0f;
         
-        if (i == 0) P_v1 = P1[index];
-        if (i == dimX-1) P_v1 = -P1[(i-1) + dimX*j];
         if ((i > 0) && (i < dimX-1)) P_v1 = P1[index] - P1[(i-1) + dimX*j];
+        else if (i == dimX-1) P_v1 = -P1[(i-1) + dimX*j];
+        else if (i == 0) P_v1 = P1[index];
+        else P_v1 = 0.0f;
 
-        if (j == 0) P_v2 = P2[index];
-        if (j == dimY-1) P_v2 = -P2[i + dimX*(j-1)];
       	if ((j > 0) && (j < dimY-1))  P_v2 = P2[index] - P2[i + dimX*(j-1)];
+      	else if (j == dimY-1) P_v2 = -P2[i + dimX*(j-1)];
+      	else if (j == 0) P_v2 = P2[index];
+      	else P_v2 = 0.0f;
+
          
         div = P_v1 + P_v2;
         U[index] = (lambda*(U[index] + tau*div) + tau*U0[index])/(lambda + tau);
@@ -169,18 +164,19 @@ __global__ void DivProjP_2D_kernel(float *U, float *U0, float *P1, float *P2, in
 	return;
 } 
 
-__global__ void UpdV_2D_kernel(float *V1, float *V2, float *P1, float *P2, float *Q1, float *Q2, float *Q3, int dimX, int dimY, float tau)
+__global__ void UpdV_2D_kernel(float *V1, float *V2, float *P1, float *P2, float *Q1, float *Q2, float *Q3, long dimX, long dimY, long num_total, float tau)
 {
 	float q1, q3_x, q2, q3_y, div1, div2;
-	int num_total = dimX*dimY;
-	int i1, j1;
+	long i1, j1;
 	
-	const int i = blockDim.x * blockIdx.x + threadIdx.x;
-        const int j = blockDim.y * blockIdx.y + threadIdx.y;
+	const long i = blockDim.x * blockIdx.x + threadIdx.x;
+        const long j = blockDim.y * blockIdx.y + threadIdx.y;
         
-        int index = i + dimX*j;      
+        long index = i + (dimX)*j;        
       
-	if (index < num_total) {   
+        if (index < num_total) {     
+
+	q1 = 0.0f; q3_x = 0.0f; q2 = 0.0f; q3_y = 0.0f; div1 = 0.0f; div2= 0.0f;
 	
 	    i1 = (i-1) + dimX*j;
             j1 = (i) + dimX*(j-1);
@@ -222,24 +218,24 @@ __global__ void UpdV_2D_kernel(float *V1, float *V2, float *P1, float *P2, float
 	return;
 } 
 
-__global__ void copyIm_TGV_kernel(float *U, float *U_old, int N, int M, int num_total)
+__global__ void copyIm_TGV_kernel(float *U, float *U_old, long dimX, long dimY, long num_total)
 {
-    int xIndex = blockDim.x * blockIdx.x + threadIdx.x;
-    int yIndex = blockDim.y * blockIdx.y + threadIdx.y;
-    
-    int index = xIndex + N*yIndex;
+	const long i = blockDim.x * blockIdx.x + threadIdx.x;
+        const long j = blockDim.y * blockIdx.y + threadIdx.y;
+        
+        long index = i + (dimX)*j;    
     
     if (index < num_total)   {
         U_old[index] = U[index];
     }
 }
 
-__global__ void copyIm_TGV_kernel_ar2(float *V1, float *V2, float *V1_old, float *V2_old, int N, int M, int num_total)
+__global__ void copyIm_TGV_kernel_ar2(float *V1, float *V2, float *V1_old, float *V2_old, long dimX, long dimY, long num_total)
 {
-    int xIndex = blockDim.x * blockIdx.x + threadIdx.x;
-    int yIndex = blockDim.y * blockIdx.y + threadIdx.y;
-    
-    int index = xIndex + N*yIndex;
+	const long i = blockDim.x * blockIdx.x + threadIdx.x;
+        const long j = blockDim.y * blockIdx.y + threadIdx.y;
+        
+        long index = i + (dimX)*j;    
     
     if (index < num_total)   {
         V1_old[index] = V1[index];
@@ -247,12 +243,12 @@ __global__ void copyIm_TGV_kernel_ar2(float *V1, float *V2, float *V1_old, float
     }
 }
 
-__global__ void newU_kernel(float *U, float *U_old, int N, int M, int num_total)
+__global__ void newU_kernel(float *U, float *U_old, long dimX, long dimY, long num_total)
 {
-    int xIndex = blockDim.x * blockIdx.x + threadIdx.x;
-    int yIndex = blockDim.y * blockIdx.y + threadIdx.y;
-    
-    int index = xIndex + N*yIndex;
+	const long i = blockDim.x * blockIdx.x + threadIdx.x;
+        const long j = blockDim.y * blockIdx.y + threadIdx.y;
+        
+        long index = i + (dimX)*j;    
     
     if (index < num_total)	{
         U[index] = 2.0f*U[index] - U_old[index];
@@ -260,12 +256,12 @@ __global__ void newU_kernel(float *U, float *U_old, int N, int M, int num_total)
 }
 
 
-__global__ void newU_kernel_ar2(float *V1, float *V2, float *V1_old, float *V2_old, int N, int M, int num_total)
+__global__ void newU_kernel_ar2(float *V1, float *V2, float *V1_old, float *V2_old, long dimX, long dimY, long num_total)
 {
-    int xIndex = blockDim.x * blockIdx.x + threadIdx.x;
-    int yIndex = blockDim.y * blockIdx.y + threadIdx.y;
-    
-    int index = xIndex + N*yIndex;
+	const long i = blockDim.x * blockIdx.x + threadIdx.x;
+        const long j = blockDim.y * blockIdx.y + threadIdx.y;
+        
+        long index = i + (dimX)*j;    
     
     if (index < num_total)	{
         V1[index] = 2.0f*V1[index] - V1_old[index];
@@ -576,14 +572,20 @@ __global__ void newU_kernel3D_ar3(float *V1, float *V2, float *V3, float *V1_old
 /*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
 extern "C" int TGV_GPU_main(float *U0, float *U, float lambda, float alpha1, float alpha0, int iterationsNumb, float L2, int dimX, int dimY, int dimZ)
 {
-	int dimTotal, dev = 0;
-	CHECK(cudaSetDevice(dev));
-	
-	dimTotal = dimX*dimY*dimZ;
+
+        int deviceCount = -1; // number of devices
+        cudaGetDeviceCount(&deviceCount);
+        if (deviceCount == 0) {
+         fprintf(stderr, "No CUDA devices found\n");
+        return -1;
+        }	
+
+	long dimTotal = (long)(dimX*dimY*dimZ);
+
        
         float *U_old, *d_U0, *d_U, *P1, *P2, *Q1, *Q2, *Q3, *V1, *V1_old, *V2, *V2_old, tau, sigma;
-        tau = pow(L2,-0.5);
-        sigma = pow(L2,-0.5);
+        tau = powf(L2,-0.5f);
+        sigma = tau;
                                       
         CHECK(cudaMalloc((void**)&d_U0,dimTotal*sizeof(float)));
         CHECK(cudaMalloc((void**)&d_U,dimTotal*sizeof(float)));
@@ -611,41 +613,51 @@ extern "C" int TGV_GPU_main(float *U0, float *U, float lambda, float alpha1, flo
         
         if (dimZ == 1) {
 	/*2D case */
-        dim3 dimBlock(BLKXSIZE2D,BLKYSIZE2D);
-        dim3 dimGrid(idivup(dimX,BLKXSIZE2D), idivup(dimY,BLKYSIZE2D));
+	dim3 dimBlock(BLKXSIZE2D,BLKYSIZE2D);
+	dim3 dimGrid(idivup(dimX,BLKXSIZE2D), idivup(dimY,BLKYSIZE2D));
              
         for(int n=0; n < iterationsNumb; n++) {
 			
 	    /* Calculate Dual Variable P */
-            DualP_2D_kernel<<<dimGrid,dimBlock>>>(d_U, V1, V2, P1, P2, dimX, dimY, sigma);
-      	    CHECK(cudaDeviceSynchronize());
+            DualP_2D_kernel<<<dimGrid,dimBlock>>>(d_U, V1, V2, P1, P2, (long)(dimX), (long)(dimY), dimTotal, sigma);
+      	    checkCudaErrors( cudaDeviceSynchronize() );
+            checkCudaErrors(cudaPeekAtLastError() );
             /*Projection onto convex set for P*/
-            ProjP_2D_kernel<<<dimGrid,dimBlock>>>(P1, P2, dimX, dimY, alpha1);
-            CHECK(cudaDeviceSynchronize());
+            ProjP_2D_kernel<<<dimGrid,dimBlock>>>(P1, P2, (long)(dimX), (long)(dimY), dimTotal, alpha1);
+            checkCudaErrors( cudaDeviceSynchronize() );
+            checkCudaErrors(cudaPeekAtLastError() );            
             /* Calculate Dual Variable Q */
-            DualQ_2D_kernel<<<dimGrid,dimBlock>>>(V1, V2, Q1, Q2, Q3, dimX, dimY, sigma);
-            CHECK(cudaDeviceSynchronize());
+            DualQ_2D_kernel<<<dimGrid,dimBlock>>>(V1, V2, Q1, Q2, Q3, (long)(dimX), (long)(dimY), dimTotal, sigma);
+            checkCudaErrors( cudaDeviceSynchronize() );
+            checkCudaErrors(cudaPeekAtLastError() );
             /*Projection onto convex set for Q*/
-            ProjQ_2D_kernel<<<dimGrid,dimBlock>>>(Q1, Q2, Q3, dimX, dimY, alpha0);
-            CHECK(cudaDeviceSynchronize());
+            ProjQ_2D_kernel<<<dimGrid,dimBlock>>>(Q1, Q2, Q3, (long)(dimX), (long)(dimY), dimTotal, alpha0);
+            checkCudaErrors( cudaDeviceSynchronize() );
+            checkCudaErrors(cudaPeekAtLastError() );
             /*saving U into U_old*/
-            copyIm_TGV_kernel<<<dimGrid,dimBlock>>>(d_U, U_old, dimX, dimY, dimTotal);
-            CHECK(cudaDeviceSynchronize());
+            copyIm_TGV_kernel<<<dimGrid,dimBlock>>>(d_U, U_old, (long)(dimX), (long)(dimY), dimTotal);
+            checkCudaErrors( cudaDeviceSynchronize() );
+            checkCudaErrors(cudaPeekAtLastError() );
             /*adjoint operation  -> divergence and projection of P*/
-            DivProjP_2D_kernel<<<dimGrid,dimBlock>>>(d_U, d_U0, P1, P2, dimX, dimY, lambda, tau);
-            CHECK(cudaDeviceSynchronize());
+            DivProjP_2D_kernel<<<dimGrid,dimBlock>>>(d_U, d_U0, P1, P2, (long)(dimX), (long)(dimY), dimTotal, lambda, tau);
+            checkCudaErrors( cudaDeviceSynchronize() );
+            checkCudaErrors(cudaPeekAtLastError() );
             /*get updated solution U*/
-            newU_kernel<<<dimGrid,dimBlock>>>(d_U, U_old, dimX, dimY, dimTotal);
-            CHECK(cudaDeviceSynchronize());
+            newU_kernel<<<dimGrid,dimBlock>>>(d_U, U_old, (long)(dimX), (long)(dimY), dimTotal);
+            checkCudaErrors( cudaDeviceSynchronize() );
+            checkCudaErrors(cudaPeekAtLastError() );
             /*saving V into V_old*/
-            copyIm_TGV_kernel_ar2<<<dimGrid,dimBlock>>>(V1, V2, V1_old, V2_old, dimX, dimY, dimTotal);
-            CHECK(cudaDeviceSynchronize());
+            copyIm_TGV_kernel_ar2<<<dimGrid,dimBlock>>>(V1, V2, V1_old, V2_old, (long)(dimX), (long)(dimY), dimTotal);
+            checkCudaErrors( cudaDeviceSynchronize() );
+            checkCudaErrors(cudaPeekAtLastError() );
             /* upd V*/
-            UpdV_2D_kernel<<<dimGrid,dimBlock>>>(V1, V2, P1, P2, Q1, Q2, Q3, dimX, dimY, tau);
-            CHECK(cudaDeviceSynchronize());
+            UpdV_2D_kernel<<<dimGrid,dimBlock>>>(V1, V2, P1, P2, Q1, Q2, Q3, (long)(dimX), (long)(dimY), dimTotal, tau);
+            checkCudaErrors( cudaDeviceSynchronize() );
+            checkCudaErrors(cudaPeekAtLastError() );
             /*get new V*/
-            newU_kernel_ar2<<<dimGrid,dimBlock>>>(V1, V2, V1_old, V2_old, dimX, dimY, dimTotal);
-            CHECK(cudaDeviceSynchronize());            
+            newU_kernel_ar2<<<dimGrid,dimBlock>>>(V1, V2, V1_old, V2_old, (long)(dimX), (long)(dimY), dimTotal);
+            checkCudaErrors( cudaDeviceSynchronize() );
+            checkCudaErrors(cudaPeekAtLastError() );
 	    }
         }
         else {
@@ -672,34 +684,44 @@ extern "C" int TGV_GPU_main(float *U0, float *U, float lambda, float alpha1, flo
 			
 	    /* Calculate Dual Variable P */
             DualP_3D_kernel<<<dimGrid,dimBlock>>>(d_U, V1, V2, V3, P1, P2, P3, dimX, dimY, dimZ, sigma);
-	    CHECK(cudaDeviceSynchronize());
+            checkCudaErrors( cudaDeviceSynchronize() );
+            checkCudaErrors(cudaPeekAtLastError() );
             /*Projection onto convex set for P*/
             ProjP_3D_kernel<<<dimGrid,dimBlock>>>(P1, P2, P3, dimX, dimY, dimZ, alpha1);
-            CHECK(cudaDeviceSynchronize());
+            checkCudaErrors( cudaDeviceSynchronize() );
+            checkCudaErrors(cudaPeekAtLastError() );
             /* Calculate Dual Variable Q */
             DualQ_3D_kernel<<<dimGrid,dimBlock>>>(V1, V2, V3, Q1, Q2, Q3, Q4, Q5, Q6, dimX, dimY, dimZ, sigma);
-            CHECK(cudaDeviceSynchronize());
+            checkCudaErrors( cudaDeviceSynchronize() );
+            checkCudaErrors(cudaPeekAtLastError() );
              /*Projection onto convex set for Q*/
             ProjQ_3D_kernel<<<dimGrid,dimBlock>>>(Q1, Q2, Q3, Q4, Q5, Q6, dimX, dimY, dimZ, alpha0);
-            CHECK(cudaDeviceSynchronize());
+            checkCudaErrors( cudaDeviceSynchronize() );
+            checkCudaErrors(cudaPeekAtLastError() );
             /*saving U into U_old*/
             copyIm_TGV_kernel3D<<<dimGrid,dimBlock>>>(d_U, U_old, dimX, dimY, dimZ, dimTotal);
-            CHECK(cudaDeviceSynchronize());
+            checkCudaErrors( cudaDeviceSynchronize() );
+            checkCudaErrors(cudaPeekAtLastError() );
             /*adjoint operation  -> divergence and projection of P*/
             DivProjP_3D_kernel<<<dimGrid,dimBlock>>>(d_U, d_U0, P1, P2, P3, dimX, dimY, dimZ, lambda, tau);
-            CHECK(cudaDeviceSynchronize());
+            checkCudaErrors( cudaDeviceSynchronize() );
+            checkCudaErrors(cudaPeekAtLastError() );
             /*get updated solution U*/
             newU_kernel3D<<<dimGrid,dimBlock>>>(d_U, U_old, dimX, dimY, dimZ, dimTotal);
-            CHECK(cudaDeviceSynchronize());
+            checkCudaErrors( cudaDeviceSynchronize() );
+            checkCudaErrors(cudaPeekAtLastError() );
             /*saving V into V_old*/
             copyIm_TGV_kernel3D_ar3<<<dimGrid,dimBlock>>>(V1, V2, V3, V1_old, V2_old, V3_old, dimX, dimY, dimZ, dimTotal);           
-            CHECK(cudaDeviceSynchronize());
+            checkCudaErrors( cudaDeviceSynchronize() );
+            checkCudaErrors(cudaPeekAtLastError() );
             /* upd V*/
             UpdV_3D_kernel<<<dimGrid,dimBlock>>>(V1, V2, V3, P1, P2, P3, Q1, Q2, Q3, Q4, Q5, Q6, dimX, dimY, dimZ, tau);
-            CHECK(cudaDeviceSynchronize());
+            checkCudaErrors( cudaDeviceSynchronize() );
+            checkCudaErrors(cudaPeekAtLastError() );
             /*get new V*/
             newU_kernel3D_ar3<<<dimGrid,dimBlock>>>(V1, V2, V3, V1_old, V2_old, V3_old, dimX, dimY, dimZ, dimTotal);
-            CHECK(cudaDeviceSynchronize());            
+            checkCudaErrors( cudaDeviceSynchronize() );
+            checkCudaErrors(cudaPeekAtLastError() );
 	        }
 	        
         CHECK(cudaFree(Q4));
@@ -724,5 +746,7 @@ extern "C" int TGV_GPU_main(float *U0, float *U, float lambda, float alpha1, flo
         CHECK(cudaFree(V2));
         CHECK(cudaFree(V1_old));
         CHECK(cudaFree(V2_old));
+
+        cudaDeviceReset();
         return 0;
 }
diff --git a/Core/regularisers_GPU/TGV_GPU_core.h b/Core/regularisers_GPU/TGV_GPU_core.h
index 9f73d1c..e8f9c6e 100644
--- a/Core/regularisers_GPU/TGV_GPU_core.h
+++ b/Core/regularisers_GPU/TGV_GPU_core.h
@@ -1,6 +1,8 @@
 #ifndef __TGV_GPU_H__
 #define __TGV_GPU_H__
+
 #include "CCPiDefines.h"
+#include <memory.h>
 #include <stdio.h>
 
 extern "C" CCPI_EXPORT int TGV_GPU_main(float *U0, float *U, float lambda, float alpha1, float alpha0, int iterationsNumb, float L2, int dimX, int dimY, int dimZ);
diff --git a/Wrappers/Python/demos/SoftwareX_supp/Demo_RealData_Recon_SX.py b/Wrappers/Python/demos/SoftwareX_supp/Demo_RealData_Recon_SX.py
index 8a11f04..4cd680e 100644
--- a/Wrappers/Python/demos/SoftwareX_supp/Demo_RealData_Recon_SX.py
+++ b/Wrappers/Python/demos/SoftwareX_supp/Demo_RealData_Recon_SX.py
@@ -22,7 +22,8 @@ import numpy as np
 import matplotlib.pyplot as plt
 import h5py
 from tomorec.supp.suppTools import normaliser
-
+import time
+from libtiff import TIFF
 
 # load dendritic projection data
 h5f = h5py.File('data/DendrData_3D.h5','r')
@@ -36,7 +37,7 @@ h5f.close()
 data_norm = normaliser(dataRaw, flats, darks, log='log')
 del dataRaw, darks, flats
 
-intens_max = 2
+intens_max = 2.3
 plt.figure() 
 plt.subplot(131)
 plt.imshow(data_norm[:,150,:],vmin=0, vmax=intens_max)
@@ -49,29 +50,38 @@ plt.imshow(data_norm[:,:,600],vmin=0, vmax=intens_max)
 plt.title('Tangentogram view')
 plt.show()
 
-
 detectorHoriz = np.size(data_norm,2)
 det_y_crop = [i for i in range(0,detectorHoriz-22)]
 N_size = 950 # reconstruction domain
+time_label = int(time.time())
 #%%
+"""
 print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
 print ("%%%%%%%%%%%%Reconstructing with FBP method %%%%%%%%%%%%%%%%%")
 print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
 from tomorec.methodsDIR import RecToolsDIR
 
 RectoolsDIR = RecToolsDIR(DetectorsDimH = np.size(det_y_crop),  # DetectorsDimH # detector dimension (horizontal)
-                    DetectorsDimV = 10,  # DetectorsDimV # detector dimension (vertical) for 3D case only
+                    DetectorsDimV = 200,  # DetectorsDimV # detector dimension (vertical) for 3D case only
                     AnglesVec = angles_rad, # array of angles in radians
                     ObjSize = N_size, # a scalar to define reconstructed object dimensions
                     device='gpu')
 
-FBPrec = RectoolsDIR.FBP(data_norm[0:10,:,det_y_crop])
+FBPrec = RectoolsDIR.FBP(data_norm[20:220,:,det_y_crop])
 
 plt.figure()
-#plt.imshow(FBPrec[0,150:550,150:550], vmin=0, vmax=0.005, cmap="gray")
 plt.imshow(FBPrec[0,:,:], vmin=0, vmax=0.005, cmap="gray")
 plt.title('FBP reconstruction')
 
+FBPrec += np.abs(np.min(FBPrec))
+multiplier = (int)(65535/(np.max(FBPrec)))
+
+# saving to tiffs (16bit)
+for i in range(0,np.size(FBPrec,0)):
+    tiff = TIFF.open('Dendr_FBP'+'_'+str(i)+'.tiff', mode='w')
+    tiff.write_image(np.uint16(FBPrec[i,:,:]*multiplier))
+    tiff.close()
+"""
 #%%
 print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
 print ("Reconstructing with ADMM method using TomoRec software")
@@ -79,7 +89,7 @@ print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
 # initialise TomoRec ITERATIVE reconstruction class ONCE
 from tomorec.methodsIR import RecToolsIR
 RectoolsIR = RecToolsIR(DetectorsDimH =  np.size(det_y_crop),  # DetectorsDimH # detector dimension (horizontal)
-                    DetectorsDimV = 5,  # DetectorsDimV # detector dimension (vertical) for 3D case only
+                    DetectorsDimV = 200,  # DetectorsDimV # detector dimension (vertical) for 3D case only
                     AnglesVec = angles_rad, # array of angles in radians
                     ObjSize = N_size, # a scalar to define reconstructed object dimensions
                     datafidelity='LS',# data fidelity, choose LS, PWLS (wip), GH (wip), Student (wip)
@@ -88,29 +98,125 @@ RectoolsIR = RecToolsIR(DetectorsDimH =  np.size(det_y_crop),  # DetectorsDimH #
                     tolerance = 1e-08, # tolerance to stop outer iterations earlier
                     device='gpu')
 #%%
-print ("Reconstructing with ADMM method using ROF-TV penalty")
+print ("Reconstructing with ADMM method using SB-TV penalty")
+RecADMM_reg_sbtv = RectoolsIR.ADMM(data_norm[20:220,:,det_y_crop],
+                              rho_const = 2000.0, \
+                              iterationsADMM = 15, \
+                              regularisation = 'SB_TV', \
+                              regularisation_parameter = 0.00085,\
+                              regularisation_iterations = 50)
+
+sliceSel = 5
+max_val = 0.003
+plt.figure() 
+plt.subplot(131)
+plt.imshow(RecADMM_reg_sbtv[sliceSel,:,:],vmin=0, vmax=max_val, cmap="gray")
+plt.title('3D ADMM-SB-TV Reconstruction, axial view')
+
+plt.subplot(132)
+plt.imshow(RecADMM_reg_sbtv[:,sliceSel,:],vmin=0, vmax=max_val, cmap="gray")
+plt.title('3D ADMM-SB-TV Reconstruction, coronal view')
+
+plt.subplot(133)
+plt.imshow(RecADMM_reg_sbtv[:,:,sliceSel],vmin=0, vmax=max_val, cmap="gray")
+plt.title('3D ADMM-SB-TV Reconstruction, sagittal view')
+plt.show()
+
+multiplier = (int)(65535/(np.max(RecADMM_reg_sbtv)))
+
+# saving to tiffs (16bit)
+for i in range(0,np.size(RecADMM_reg_sbtv,0)):
+    tiff = TIFF.open('Dendr_ADMM_SBTV'+'_'+str(i)+'.tiff', mode='w')
+    tiff.write_image(np.uint16(RecADMM_reg_sbtv[i,:,:]*multiplier))
+    tiff.close()
 
-RecADMM_reg_roftv = RectoolsIR.ADMM(data_norm[0:5,:,det_y_crop],
+# Saving recpnstructed data with a unique time label
+np.save('Dendr_ADMM_SBTV'+str(time_label)+'.npy', RecADMM_reg_sbtv)
+del RecADMM_reg_sbtv
+#%%
+print ("Reconstructing with ADMM method using ROF-LLT penalty")
+RecADMM_reg_rofllt = RectoolsIR.ADMM(data_norm[20:220,:,det_y_crop],
                               rho_const = 2000.0, \
-                              iterationsADMM = 3, \
-                              regularisation = 'FGP_TV', \
-                              regularisation_parameter = 0.001,\
-                              regularisation_iterations = 80)
+                              iterationsADMM = 15, \
+                              regularisation = 'LLT_ROF', \
+                              regularisation_parameter = 0.0009,\
+                              regularisation_parameter2 = 0.0007,\
+                              time_marching_parameter = 0.001,\
+                              regularisation_iterations = 450)
+
+sliceSel = 5
+max_val = 0.003
+plt.figure() 
+plt.subplot(131)
+plt.imshow(RecADMM_reg_rofllt[sliceSel,:,:],vmin=0, vmax=max_val)
+plt.title('3D ADMM-ROFLLT Reconstruction, axial view')
+
+plt.subplot(132)
+plt.imshow(RecADMM_reg_rofllt[:,sliceSel,:],vmin=0, vmax=max_val)
+plt.title('3D ADMM-ROFLLT Reconstruction, coronal view')
+
+plt.subplot(133)
+plt.imshow(RecADMM_reg_rofllt[:,:,sliceSel],vmin=0, vmax=max_val)
+plt.title('3D ADMM-ROFLLT Reconstruction, sagittal view')
+plt.show()
+
+multiplier = (int)(65535/(np.max(RecADMM_reg_rofllt)))
+
+# saving to tiffs (16bit)
+for i in range(0,np.size(RecADMM_reg_rofllt,0)):
+    tiff = TIFF.open('Dendr_ADMM_ROFLLT'+'_'+str(i)+'.tiff', mode='w')
+    tiff.write_image(np.uint16(RecADMM_reg_rofllt[i,:,:]*multiplier))
+    tiff.close()
+
+
+# Saving recpnstructed data with a unique time label
+np.save('Dendr_ADMM_ROFLLT'+str(time_label)+'.npy', RecADMM_reg_rofllt)
+del RecADMM_reg_rofllt
+#%%
+RectoolsIR = RecToolsIR(DetectorsDimH =  np.size(det_y_crop),  # DetectorsDimH # detector dimension (horizontal)
+                    DetectorsDimV = 10,  # DetectorsDimV # detector dimension (vertical) for 3D case only
+                    AnglesVec = angles_rad, # array of angles in radians
+                    ObjSize = N_size, # a scalar to define reconstructed object dimensions
+                    datafidelity='LS',# data fidelity, choose LS, PWLS (wip), GH (wip), Student (wip)
+                    nonnegativity='ENABLE', # enable nonnegativity constraint (set to 'ENABLE')
+                    OS_number = None, # the number of subsets, NONE/(or > 1) ~ classical / ordered subsets
+                    tolerance = 1e-08, # tolerance to stop outer iterations earlier
+                    device='cpu')
 
+print ("Reconstructing with ADMM method using TGV penalty")
+RecADMM_reg_tgv = RectoolsIR.ADMM(data_norm[0:10,:,det_y_crop],
+                              rho_const = 2000.0, \
+                              iterationsADMM = 15, \
+                              regularisation = 'TGV', \
+                              regularisation_parameter = 0.01,\
+                              regularisation_iterations = 450)
 
-sliceSel = 2
-max_val = 0.005
+sliceSel = 7
+max_val = 0.003
 plt.figure() 
 plt.subplot(131)
-plt.imshow(RecADMM_reg_roftv[sliceSel,:,:],vmin=0, vmax=max_val)
-plt.title('3D ADMM-ROF-TV Reconstruction, axial view')
+plt.imshow(RecADMM_reg_tgv[sliceSel,:,:],vmin=0, vmax=max_val)
+plt.title('3D ADMM-TGV Reconstruction, axial view')
 
 plt.subplot(132)
-plt.imshow(RecADMM_reg_roftv[:,sliceSel,:],vmin=0, vmax=max_val)
-plt.title('3D ADMM-ROF-TV Reconstruction, coronal view')
+plt.imshow(RecADMM_reg_tgv[:,sliceSel,:],vmin=0, vmax=max_val)
+plt.title('3D ADMM-TGV Reconstruction, coronal view')
 
 plt.subplot(133)
-plt.imshow(RecADMM_reg_roftv[:,:,sliceSel],vmin=0, vmax=max_val)
-plt.title('3D ADMM-ROF-TV Reconstruction, sagittal view')
+plt.imshow(RecADMM_reg_tgv[:,:,sliceSel],vmin=0, vmax=max_val)
+plt.title('3D ADMM-TGV Reconstruction, sagittal view')
 plt.show()
+
+multiplier = (int)(65535/(np.max(RecADMM_reg_tgv)))
+
+# saving to tiffs (16bit)
+for i in range(0,np.size(RecADMM_reg_tgv,0)):
+    tiff = TIFF.open('Dendr_ADMM_TGV'+'_'+str(i)+'.tiff', mode='w')
+    tiff.write_image(np.uint16(RecADMM_reg_tgv[i,:,:]*multiplier))
+    tiff.close()
+
+
+# Saving recpnstructed data with a unique time label
+#np.save('Dendr_ADMM_TGV'+str(time_label)+'.npy', RecADMM_reg_tgv)
+del RecADMM_reg_tgv
 #%%
 \ No newline at end of file
diff --git a/Wrappers/Python/demos/SoftwareX_supp/Demo_SimulData_Recon_SX.py b/Wrappers/Python/demos/SoftwareX_supp/Demo_SimulData_Recon_SX.py
index 5dbd436..a022ad7 100644
--- a/Wrappers/Python/demos/SoftwareX_supp/Demo_SimulData_Recon_SX.py
+++ b/Wrappers/Python/demos/SoftwareX_supp/Demo_SimulData_Recon_SX.py
@@ -197,32 +197,3 @@ Qtools = QualityTools(phantom, RecADMM_reg_tgv)
 RMSE_admm_tgv = Qtools.rmse()
 print("Root Mean Square Error for ADMM-TGV is {}".format(RMSE_admm_tgv))
 #%%
-print ("Reconstructing with ADMM method using Diff4th penalty")
-RecADMM_reg_diff4th = RectoolsIR.ADMM(projdata_norm,
-                              rho_const = 2000.0, \
-                              iterationsADMM = 30, \
-                              regularisation = 'Diff4th', \
-                              regularisation_parameter = 0.0005,\
-                              regularisation_iterations = 200)
-
-sliceSel = int(0.5*N_size)
-max_val = 1
-plt.figure() 
-plt.subplot(131)
-plt.imshow(RecADMM_reg_diff4th[sliceSel,:,:],vmin=0, vmax=max_val)
-plt.title('3D ADMM-Diff4th Reconstruction, axial view')
-
-plt.subplot(132)
-plt.imshow(RecADMM_reg_diff4th[:,sliceSel,:],vmin=0, vmax=max_val)
-plt.title('3D ADMM-Diff4th Reconstruction, coronal view')
-
-plt.subplot(133)
-plt.imshow(RecADMM_reg_diff4th[:,:,sliceSel],vmin=0, vmax=max_val)
-plt.title('3D ADMM-Diff4th Reconstruction, sagittal view')
-plt.show()
-
-# calculate errors 
-Qtools = QualityTools(phantom, RecADMM_reg_diff4th)
-RMSE_admm_diff4th = Qtools.rmse()
-print("Root Mean Square Error for ADMM-Diff4th is {}".format(RMSE_admm_diff4th))
-#%%
author	dkazanc <dkazanc@hotmail.com>	2019-02-25 17:13:28 +0000
committer	dkazanc <dkazanc@hotmail.com>	2019-02-25 17:13:28 +0000
commit	0587f96cafac66d9ee04005a80b43514c6d2a753 (patch)
tree	59481436cee835066241d74cf595a77bb2f960a4
parent	dc5a92771dcf9bfd262ba34b0fc8a1c2df40897d (diff)
download	regularization-0587f96cafac66d9ee04005a80b43514c6d2a753.tar.gz regularization-0587f96cafac66d9ee04005a80b43514c6d2a753.tar.bz2 regularization-0587f96cafac66d9ee04005a80b43514c6d2a753.tar.xz regularization-0587f96cafac66d9ee04005a80b43514c6d2a753.zip