summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authordkazanc <dkazanc@hotmail.com>2019-02-26 15:17:33 +0000
committerdkazanc <dkazanc@hotmail.com>2019-02-26 15:17:33 +0000
commita596196c1e2569a2b530463f1dcf14e59e293376 (patch)
treea9115f56436f0bbfce7c032d42313f2892f240f0
parente05bcdce76e4e20154228e6ccf56207412340c96 (diff)
downloadregularization-a596196c1e2569a2b530463f1dcf14e59e293376.tar.gz
regularization-a596196c1e2569a2b530463f1dcf14e59e293376.tar.bz2
regularization-a596196c1e2569a2b530463f1dcf14e59e293376.tar.xz
regularization-a596196c1e2569a2b530463f1dcf14e59e293376.zip
tgv 3 cudad fix
-rw-r--r--Core/regularisers_GPU/TGV_GPU_core.cu270
1 files changed, 130 insertions, 140 deletions
diff --git a/Core/regularisers_GPU/TGV_GPU_core.cu b/Core/regularisers_GPU/TGV_GPU_core.cu
index 1f660ad..3f3c67c 100644
--- a/Core/regularisers_GPU/TGV_GPU_core.cu
+++ b/Core/regularisers_GPU/TGV_GPU_core.cu
@@ -51,9 +51,9 @@ limitations under the License.
/********************************************************************/
/***************************2D Functions*****************************/
/********************************************************************/
-__global__ void DualP_2D_kernel(float *U, float *V1, float *V2, float *P1, float *P2, long dimX, long dimY, long num_total, float sigma)
+__global__ void DualP_2D_kernel(float *U, float *V1, float *V2, float *P1, float *P2, long dimX, long dimY, float sigma)
{
- const long i = blockDim.x * blockIdx.x + threadIdx.x;
+ const long i = blockDim.x * blockIdx.x + threadIdx.x;
const long j = blockDim.y * blockIdx.y + threadIdx.y;
long index = i + (dimX)*j;
@@ -80,7 +80,7 @@ __global__ void ProjP_2D_kernel(float *P1, float *P2, long dimX, long dimY, long
long index = i + (dimX)*j;
if ((i < dimX) && (j < dimY)) {
- grad_magn = sqrtf(pow(P1[index],2) + pow(P2[index],2));
+ grad_magn = sqrtf(powf(P1[index],2) + powf(P2[index],2));
grad_magn = grad_magn/alpha1;
if (grad_magn > 1.0f) {
P1[index] /= grad_magn;
@@ -90,7 +90,7 @@ __global__ void ProjP_2D_kernel(float *P1, float *P2, long dimX, long dimY, long
return;
}
-__global__ void DualQ_2D_kernel(float *V1, float *V2, float *Q1, float *Q2, float *Q3, long dimX, long dimY, long num_total, float sigma)
+__global__ void DualQ_2D_kernel(float *V1, float *V2, float *Q1, float *Q2, float *Q3, long dimX, long dimY, float sigma)
{
float q1, q2, q11, q22;
const long i = blockDim.x * blockIdx.x + threadIdx.x;
@@ -118,7 +118,7 @@ __global__ void DualQ_2D_kernel(float *V1, float *V2, float *Q1, float *Q2, floa
return;
}
-__global__ void ProjQ_2D_kernel(float *Q1, float *Q2, float *Q3, long dimX, long dimY, long num_total, float alpha0)
+__global__ void ProjQ_2D_kernel(float *Q1, float *Q2, float *Q3, long dimX, long dimY, float alpha0)
{
float grad_magn;
const long i = blockDim.x * blockIdx.x + threadIdx.x;
@@ -127,7 +127,7 @@ __global__ void ProjQ_2D_kernel(float *Q1, float *Q2, float *Q3, long dimX, long
long index = i + (dimX)*j;
if ((i < dimX) && (j < dimY)) {
- grad_magn = sqrt(pow(Q1[index],2) + pow(Q2[index],2) + 2*pow(Q3[index],2));
+ grad_magn = sqrt(powf(Q1[index],2) + powf(Q2[index],2) + 2*powf(Q3[index],2));
grad_magn = grad_magn/alpha0;
if (grad_magn > 1.0f) {
Q1[index] /= grad_magn;
@@ -138,7 +138,7 @@ __global__ void ProjQ_2D_kernel(float *Q1, float *Q2, float *Q3, long dimX, long
return;
}
-__global__ void DivProjP_2D_kernel(float *U, float *U0, float *P1, float *P2, long dimX, long dimY, long num_total, float lambda, float tau)
+__global__ void DivProjP_2D_kernel(float *U, float *U0, float *P1, float *P2, long dimX, long dimY, float lambda, float tau)
{
float P_v1, P_v2, div;
const long i = blockDim.x * blockIdx.x + threadIdx.x;
@@ -165,7 +165,7 @@ __global__ void DivProjP_2D_kernel(float *U, float *U0, float *P1, float *P2, lo
return;
}
-__global__ void UpdV_2D_kernel(float *V1, float *V2, float *P1, float *P2, float *Q1, float *Q2, float *Q3, long dimX, long dimY, long num_total, float tau)
+__global__ void UpdV_2D_kernel(float *V1, float *V2, float *P1, float *P2, float *Q1, float *Q2, float *Q3, long dimX, long dimY, float tau)
{
float q1, q3_x, q2, q3_y, div1, div2;
long i1, j1;
@@ -219,7 +219,7 @@ __global__ void UpdV_2D_kernel(float *V1, float *V2, float *P1, float *P2, float
return;
}
-__global__ void copyIm_TGV_kernel(float *U, float *U_old, long dimX, long dimY, long num_total)
+__global__ void copyIm_TGV_kernel(float *U, float *U_old, long dimX, long dimY)
{
const long i = blockDim.x * blockIdx.x + threadIdx.x;
const long j = blockDim.y * blockIdx.y + threadIdx.y;
@@ -231,7 +231,7 @@ __global__ void copyIm_TGV_kernel(float *U, float *U_old, long dimX, long dimY,
}
}
-__global__ void copyIm_TGV_kernel_ar2(float *V1, float *V2, float *V1_old, float *V2_old, long dimX, long dimY, long num_total)
+__global__ void copyIm_TGV_kernel_ar2(float *V1, float *V2, float *V1_old, float *V2_old, long dimX, long dimY)
{
const long i = blockDim.x * blockIdx.x + threadIdx.x;
const long j = blockDim.y * blockIdx.y + threadIdx.y;
@@ -244,7 +244,7 @@ __global__ void copyIm_TGV_kernel_ar2(float *V1, float *V2, float *V1_old, float
}
}
-__global__ void newU_kernel(float *U, float *U_old, long dimX, long dimY, long num_total)
+__global__ void newU_kernel(float *U, float *U_old, long dimX, long dimY)
{
const long i = blockDim.x * blockIdx.x + threadIdx.x;
const long j = blockDim.y * blockIdx.y + threadIdx.y;
@@ -257,7 +257,7 @@ __global__ void newU_kernel(float *U, float *U_old, long dimX, long dimY, long n
}
-__global__ void newU_kernel_ar2(float *V1, float *V2, float *V1_old, float *V2_old, long dimX, long dimY, long num_total)
+__global__ void newU_kernel_ar2(float *V1, float *V2, float *V1_old, float *V2_old, long dimX, long dimY)
{
const long i = blockDim.x * blockIdx.x + threadIdx.x;
const long j = blockDim.y * blockIdx.y + threadIdx.y;
@@ -269,44 +269,45 @@ __global__ void newU_kernel_ar2(float *V1, float *V2, float *V1_old, float *V2_o
V2[index] = 2.0f*V2[index] - V2_old[index];
}
}
+
/********************************************************************/
/***************************3D Functions*****************************/
/********************************************************************/
-__global__ void DualP_3D_kernel(float *U, float *V1, float *V2, float *V3, float *P1, float *P2, float *P3, int dimX, int dimY, int dimZ, float sigma)
+__global__ void DualP_3D_kernel(float *U, float *V1, float *V2, float *V3, float *P1, float *P2, float *P3, long dimX, long dimY, long dimZ, float sigma)
{
- int index;
- const int i = blockDim.x * blockIdx.x + threadIdx.x;
- const int j = blockDim.y * blockIdx.y + threadIdx.y;
- const int k = blockDim.z * blockIdx.z + threadIdx.z;
-
- int num_total = dimX*dimY*dimZ;
-
- index = (dimX*dimY)*k + i*dimX+j;
- if (index < num_total) {
+ long index;
+ const long i = blockDim.x * blockIdx.x + threadIdx.x;
+ const long j = blockDim.y * blockIdx.y + threadIdx.y;
+ const long k = blockDim.z * blockIdx.z + threadIdx.z;
+
+ index = (dimX*dimY)*k + i*dimX+j;
+
+ if ((i < dimX) && (j < dimY) && (k < dimZ)) {
/* symmetric boundary conditions (Neuman) */
if ((i >= 0) && (i < dimX-1)) P1[index] += sigma*((U[(dimX*dimY)*k + (i+1)*dimX+j] - U[index]) - V1[index]);
- else P1[index] += sigma*(-V1[index]);
+ else if (i == dimX-1) P1[index] -= sigma*(V1[index]);
+ else P1[index] = 0.0f;
if ((j >= 0) && (j < dimY-1)) P2[index] += sigma*((U[(dimX*dimY)*k + i*dimX+(j+1)] - U[index]) - V2[index]);
- else P2[index] += sigma*(-V2[index]);
+ else if P2[index] -= sigma*(V2[index]);
+ else P2[index] = 0.0f;
if ((k >= 0) && (k < dimZ-1)) P3[index] += sigma*((U[(dimX*dimY)*(k+1) + i*dimX+(j)] - U[index]) - V3[index]);
- else P3[index] += sigma*(-V3[index]);
- }
+ else if P3[index] -= sigma*(V3[index]);
+ else P3[index] = 0.0f;
+ }
return;
-}
+}
-__global__ void ProjP_3D_kernel(float *P1, float *P2, float *P3, int dimX, int dimY, int dimZ, float alpha1)
+__global__ void ProjP_3D_kernel(float *P1, float *P2, float *P3, long dimX, long dimY, long dimZ, float alpha1)
{
float grad_magn;
- int index;
- int num_total = dimX*dimY*dimZ;
-
- int i = blockDim.x * blockIdx.x + threadIdx.x;
- int j = blockDim.y * blockIdx.y + threadIdx.y;
- int k = blockDim.z * blockIdx.z + threadIdx.z;
+ long index;
+ const long i = blockDim.x * blockIdx.x + threadIdx.x;
+ const long j = blockDim.y * blockIdx.y + threadIdx.y;
+ const long k = blockDim.z * blockIdx.z + threadIdx.z;
index = (dimX*dimY)*k + i*dimX+j;
- if (index < num_total) {
- grad_magn = (sqrtf(pow(P1[index],2) + pow(P2[index],2) + pow(P3[index],2)))/alpha1;
+ if ((i < dimX) && (j < dimY) && (k < dimZ)) {
+ grad_magn = (sqrtf(powf(P1[index],2) + powf(P2[index],2) + powf(P3[index],2)))/alpha1;
if (grad_magn > 1.0f) {
P1[index] /= grad_magn;
P2[index] /= grad_magn;
@@ -316,23 +317,22 @@ __global__ void ProjP_3D_kernel(float *P1, float *P2, float *P3, int dimX, int d
return;
}
-__global__ void DualQ_3D_kernel(float *V1, float *V2, float *V3, float *Q1, float *Q2, float *Q3, float *Q4, float *Q5, float *Q6, int dimX, int dimY, int dimZ, float sigma)
+__global__ void DualQ_3D_kernel(float *V1, float *V2, float *V3, float *Q1, float *Q2, float *Q3, float *Q4, float *Q5, float *Q6, long dimX, long dimY, long dimZ, float sigma)
{
- int index;
- float q1, q2, q3, q11, q22, q33, q44, q55, q66;
- int num_total = dimX*dimY*dimZ;
-
- int i = blockDim.x * blockIdx.x + threadIdx.x;
- int j = blockDim.y * blockIdx.y + threadIdx.y;
- int k = blockDim.z * blockIdx.z + threadIdx.z;
+ float q1, q2, q3, q11, q22, q33, q44, q55, q66;
+ long index;
+
+ const long i = blockDim.x * blockIdx.x + threadIdx.x;
+ const long j = blockDim.y * blockIdx.y + threadIdx.y;
+ const long k = blockDim.z * blockIdx.z + threadIdx.z;
index = (dimX*dimY)*k + i*dimX+j;
- int i1 = (dimX*dimY)*k + (i+1)*dimX+j;
- int j1 = (dimX*dimY)*k + (i)*dimX+(j+1);
- int k1 = (dimX*dimY)*(k+1) + (i)*dimX+(j);
+ long i1 = (dimX*dimY)*k + (i+1)*dimX+j;
+ long j1 = (dimX*dimY)*k + (i)*dimX+(j+1);
+ long k1 = (dimX*dimY)*(k+1) + (i)*dimX+(j);
- if (index < num_total) {
+ if ((i < dimX) && (j < dimY) && (k < dimZ)) {
q1 = 0.0f; q11 = 0.0f; q33 = 0.0f; q2 = 0.0f; q22 = 0.0f; q55 = 0.0f; q3 = 0.0f; q44 = 0.0f; q66 = 0.0f;
/* boundary conditions (Neuman) */
@@ -359,20 +359,18 @@ __global__ void DualQ_3D_kernel(float *V1, float *V2, float *V3, float *Q1, floa
return;
}
-__global__ void ProjQ_3D_kernel(float *Q1, float *Q2, float *Q3, float *Q4, float *Q5, float *Q6, int dimX, int dimY, int dimZ, float alpha0)
+__global__ void ProjQ_3D_kernel(float *Q1, float *Q2, float *Q3, float *Q4, float *Q5, float *Q6, long dimX, long dimY, long dimZ, float alpha0)
{
float grad_magn;
- int index;
- int num_total = dimX*dimY*dimZ;
-
- int i = blockDim.x * blockIdx.x + threadIdx.x;
- int j = blockDim.y * blockIdx.y + threadIdx.y;
- int k = blockDim.z * blockIdx.z + threadIdx.z;
+ long index;
+ const long i = blockDim.x * blockIdx.x + threadIdx.x;
+ const long j = blockDim.y * blockIdx.y + threadIdx.y;
+ const long k = blockDim.z * blockIdx.z + threadIdx.z;
index = (dimX*dimY)*k + i*dimX+j;
- if (index < num_total) {
- grad_magn = sqrtf(pow(Q1[index],2) + pow(Q2[index],2) + pow(Q3[index],2) + 2.0f*pow(Q4[index],2) + 2.0f*pow(Q5[index],2) + 2.0f*pow(Q6[index],2));
+ if ((i < dimX) && (j < dimY) && (k < dimZ)) {
+ grad_magn = sqrtf(powf(Q1[index],2) + powf(Q2[index],2) + powf(Q3[index],2) + 2.0f*powf(Q4[index],2) + 2.0f*powf(Q5[index],2) + 2.0f*powf(Q6[index],2));
grad_magn = grad_magn/alpha0;
if (grad_magn > 1.0f) {
Q1[index] /= grad_magn;
@@ -385,60 +383,56 @@ __global__ void ProjQ_3D_kernel(float *Q1, float *Q2, float *Q3, float *Q4, floa
}
return;
}
-__global__ void DivProjP_3D_kernel(float *U, float *U0, float *P1, float *P2, float *P3, int dimX, int dimY, int dimZ, float lambda, float tau)
+__global__ void DivProjP_3D_kernel(float *U, float *U0, float *P1, float *P2, float *P3, long dimX, long dimY, long dimZ, float lambda, float tau)
{
float P_v1, P_v2, P_v3, div;
- int index;
- int num_total = dimX*dimY*dimZ;
-
-
- int i = blockDim.x * blockIdx.x + threadIdx.x;
- int j = blockDim.y * blockIdx.y + threadIdx.y;
- int k = blockDim.z * blockIdx.z + threadIdx.z;
+ long index;
+ const long i = blockDim.x * blockIdx.x + threadIdx.x;
+ const long j = blockDim.y * blockIdx.y + threadIdx.y;
+ const long k = blockDim.z * blockIdx.z + threadIdx.z;
index = (dimX*dimY)*k + i*dimX+j;
- int i1 = (dimX*dimY)*k + (i-1)*dimX+j;
- int j1 = (dimX*dimY)*k + (i)*dimX+(j-1);
- int k1 = (dimX*dimY)*(k-1) + (i)*dimX+(j);
-
- if (index < num_total) {
- P_v1 = 0.0f; P_v2 = 0.0f; P_v3 = 0.0f;
+ long i1 = (dimX*dimY)*k + (i-1)*dimX+j;
+ long j1 = (dimX*dimY)*k + (i)*dimX+(j-1);
+ long k1 = (dimX*dimY)*(k-1) + (i)*dimX+(j);
- if (i == 0) P_v1 = P1[index];
- if (i == dimX-1) P_v1 = -P1[i1];
+ if ((i < dimX) && (j < dimY) && (k < dimZ)) {
+
if ((i > 0) && (i < dimX-1)) P_v1 = P1[index] - P1[i1];
+ else if (i == dimX-1) P_v1 = -P1[i1];
+ else if (i == 0) P_v1 = P1[index];
+ else P_v1 = 0.0f;
- if (j == 0) P_v2 = P2[index];
- if (j == dimY-1) P_v2 = -P2[j1];
if ((j > 0) && (j < dimY-1)) P_v2 = P2[index] - P2[j1];
-
- if (k == 0) P_v3 = P3[index];
- if (k == dimZ-1) P_v3 = -P3[k1];
- if ((k > 0) && (k < dimZ-1)) P_v3 = P3[index] - P3[k1];
-
-
+ else if (j == dimY-1) P_v2 = -P2[j1];
+ else if (j == 0) P_v2 = P2[index];
+ else P_v2 = 0.0f;
+
+ if ((k > 0) && (k < dimZ-1)) P_v3 = P3[index] - P3[k1];
+ else if (k == dimZ-1) P_v3 = -P3[k1];
+ else if (k == 0) P_v3 = P3[index];
+ else P_v3 = 0.0f;
+
div = P_v1 + P_v2 + P_v3;
U[index] = (lambda*(U[index] + tau*div) + tau*U0[index])/(lambda + tau);
}
return;
}
-__global__ void UpdV_3D_kernel(float *V1, float *V2, float *V3, float *P1, float *P2, float *P3, float *Q1, float *Q2, float *Q3, float *Q4, float *Q5, float *Q6, int dimX, int dimY, int dimZ, float tau)
+__global__ void UpdV_3D_kernel(float *V1, float *V2, float *V3, float *P1, float *P2, float *P3, float *Q1, float *Q2, float *Q3, float *Q4, float *Q5, float *Q6, long dimX, long dimY, long dimZ, float tau)
{
float q1, q4x, q5x, q2, q4y, q6y, q6z, q5z, q3, div1, div2, div3;
- int index;
- int num_total = dimX*dimY*dimZ;
-
- int i = blockDim.x * blockIdx.x + threadIdx.x;
- int j = blockDim.y * blockIdx.y + threadIdx.y;
- int k = blockDim.z * blockIdx.z + threadIdx.z;
+ long index;
+ const long i = blockDim.x * blockIdx.x + threadIdx.x;
+ const long j = blockDim.y * blockIdx.y + threadIdx.y;
+ const long k = blockDim.z * blockIdx.z + threadIdx.z;
index = (dimX*dimY)*k + i*dimX+j;
- int i1 = (dimX*dimY)*k + (i-1)*dimX+j;
- int j1 = (dimX*dimY)*k + (i)*dimX+(j-1);
- int k1 = (dimX*dimY)*(k-1) + (i)*dimX+(j);
+ long i1 = (dimX*dimY)*k + (i-1)*dimX+j;
+ long j1 = (dimX*dimY)*k + (i)*dimX+(j-1);
+ long k1 = (dimX*dimY)*(k-1) + (i)*dimX+(j);
/* Q1 - Q11, Q2 - Q22, Q3 - Q33, Q4 - Q21/Q12, Q5 - Q31/Q13, Q6 - Q32/Q23*/
- if (index < num_total) {
+ if ((i < dimX) && (j < dimY) && (k < dimZ)) {
/* boundary conditions (Neuman) */
if ((i > 0) && (i < dimX-1)) {
@@ -504,64 +498,60 @@ __global__ void UpdV_3D_kernel(float *V1, float *V2, float *V3, float *P1, float
return;
}
-__global__ void copyIm_TGV_kernel3D(float *U, float *U_old, int dimX, int dimY, int dimZ, int num_total)
+__global__ void copyIm_TGV_kernel3D(float *U, float *U_old, long dimX, long dimY, long dimZ)
{
- int index;
-
- int i = blockDim.x * blockIdx.x + threadIdx.x;
- int j = blockDim.y * blockIdx.y + threadIdx.y;
- int k = blockDim.z * blockIdx.z + threadIdx.z;
+ long index;
+ const long i = blockDim.x * blockIdx.x + threadIdx.x;
+ const long j = blockDim.y * blockIdx.y + threadIdx.y;
+ const long k = blockDim.z * blockIdx.z + threadIdx.z;
index = (dimX*dimY)*k + j*dimX+i;
- if (index < num_total) {
+ if ((i < dimX) && (j < dimY) && (k < dimZ)) {
U_old[index] = U[index];
}
}
-__global__ void copyIm_TGV_kernel3D_ar3(float *V1, float *V2, float *V3, float *V1_old, float *V2_old, float *V3_old, int dimX, int dimY, int dimZ, int num_total)
+__global__ void copyIm_TGV_kernel3D_ar3(float *V1, float *V2, float *V3, float *V1_old, float *V2_old, float *V3_old, long dimX, long dimY, long dimZ)
{
- int index;
-
- int i = blockDim.x * blockIdx.x + threadIdx.x;
- int j = blockDim.y * blockIdx.y + threadIdx.y;
- int k = blockDim.z * blockIdx.z + threadIdx.z;
+ long index;
+ const long i = blockDim.x * blockIdx.x + threadIdx.x;
+ const long j = blockDim.y * blockIdx.y + threadIdx.y;
+ const long k = blockDim.z * blockIdx.z + threadIdx.z;
index = (dimX*dimY)*k + j*dimX+i;
- if (index < num_total) {
+ if ((i < dimX) && (j < dimY) && (k < dimZ)) {
V1_old[index] = V1[index];
V2_old[index] = V2[index];
V3_old[index] = V3[index];
}
}
-__global__ void newU_kernel3D(float *U, float *U_old, int dimX, int dimY, int dimZ, int num_total)
+__global__ void newU_kernel3D(float *U, float *U_old, int dimX, int dimY, int dimZ)
{
- int index;
-
- int i = blockDim.x * blockIdx.x + threadIdx.x;
- int j = blockDim.y * blockIdx.y + threadIdx.y;
- int k = blockDim.z * blockIdx.z + threadIdx.z;
+ long index;
+ const long i = blockDim.x * blockIdx.x + threadIdx.x;
+ const long j = blockDim.y * blockIdx.y + threadIdx.y;
+ const long k = blockDim.z * blockIdx.z + threadIdx.z;
index = (dimX*dimY)*k + j*dimX+i;
- if (index < num_total) {
+ if ((i < dimX) && (j < dimY) && (k < dimZ)) {
U[index] = 2.0f*U[index] - U_old[index];
}
}
-__global__ void newU_kernel3D_ar3(float *V1, float *V2, float *V3, float *V1_old, float *V2_old, float *V3_old, int dimX, int dimY, int dimZ, int num_total)
+__global__ void newU_kernel3D_ar3(float *V1, float *V2, float *V3, float *V1_old, float *V2_old, float *V3_old, long dimX, long dimY, long dimZ)
{
- int index;
-
- int i = blockDim.x * blockIdx.x + threadIdx.x;
- int j = blockDim.y * blockIdx.y + threadIdx.y;
- int k = blockDim.z * blockIdx.z + threadIdx.z;
+ long index;
+ const long i = blockDim.x * blockIdx.x + threadIdx.x;
+ const long j = blockDim.y * blockIdx.y + threadIdx.y;
+ const long k = blockDim.z * blockIdx.z + threadIdx.z;
index = (dimX*dimY)*k + j*dimX+i;
- if (index < num_total) {
+ if ((i < dimX) && (j < dimY) && (k < dimZ)) {
V1[index] = 2.0f*V1[index] - V1_old[index];
V2[index] = 2.0f*V2[index] - V2_old[index];
V3[index] = 2.0f*V3[index] - V3_old[index];
@@ -620,43 +610,43 @@ extern "C" int TGV_GPU_main(float *U0, float *U, float lambda, float alpha1, flo
for(int n=0; n < iterationsNumb; n++) {
/* Calculate Dual Variable P */
- DualP_2D_kernel<<<dimGrid,dimBlock>>>(d_U, V1, V2, P1, P2, (long)(dimX), (long)(dimY), dimTotal, sigma);
+ DualP_2D_kernel<<<dimGrid,dimBlock>>>(d_U, V1, V2, P1, P2, (long)(dimX), (long)(dimY), sigma);
checkCudaErrors( cudaDeviceSynchronize() );
checkCudaErrors(cudaPeekAtLastError() );
/*Projection onto convex set for P*/
- ProjP_2D_kernel<<<dimGrid,dimBlock>>>(P1, P2, (long)(dimX), (long)(dimY), dimTotal, alpha1);
+ ProjP_2D_kernel<<<dimGrid,dimBlock>>>(P1, P2, (long)(dimX), (long)(dimY), alpha1);
checkCudaErrors( cudaDeviceSynchronize() );
checkCudaErrors(cudaPeekAtLastError() );
/* Calculate Dual Variable Q */
- DualQ_2D_kernel<<<dimGrid,dimBlock>>>(V1, V2, Q1, Q2, Q3, (long)(dimX), (long)(dimY), dimTotal, sigma);
+ DualQ_2D_kernel<<<dimGrid,dimBlock>>>(V1, V2, Q1, Q2, Q3, (long)(dimX), (long)(dimY), sigma);
checkCudaErrors( cudaDeviceSynchronize() );
checkCudaErrors(cudaPeekAtLastError() );
/*Projection onto convex set for Q*/
- ProjQ_2D_kernel<<<dimGrid,dimBlock>>>(Q1, Q2, Q3, (long)(dimX), (long)(dimY), dimTotal, alpha0);
+ ProjQ_2D_kernel<<<dimGrid,dimBlock>>>(Q1, Q2, Q3, (long)(dimX), (long)(dimY), alpha0);
checkCudaErrors( cudaDeviceSynchronize() );
checkCudaErrors(cudaPeekAtLastError() );
/*saving U into U_old*/
- copyIm_TGV_kernel<<<dimGrid,dimBlock>>>(d_U, U_old, (long)(dimX), (long)(dimY), dimTotal);
+ copyIm_TGV_kernel<<<dimGrid,dimBlock>>>(d_U, U_old, (long)(dimX), (long)(dimY));
checkCudaErrors( cudaDeviceSynchronize() );
checkCudaErrors(cudaPeekAtLastError() );
/*adjoint operation -> divergence and projection of P*/
- DivProjP_2D_kernel<<<dimGrid,dimBlock>>>(d_U, d_U0, P1, P2, (long)(dimX), (long)(dimY), dimTotal, lambda, tau);
+ DivProjP_2D_kernel<<<dimGrid,dimBlock>>>(d_U, d_U0, P1, P2, (long)(dimX), (long)(dimY), lambda, tau);
checkCudaErrors( cudaDeviceSynchronize() );
checkCudaErrors(cudaPeekAtLastError() );
/*get updated solution U*/
- newU_kernel<<<dimGrid,dimBlock>>>(d_U, U_old, (long)(dimX), (long)(dimY), dimTotal);
+ newU_kernel<<<dimGrid,dimBlock>>>(d_U, U_old, (long)(dimX), (long)(dimY));
checkCudaErrors( cudaDeviceSynchronize() );
checkCudaErrors(cudaPeekAtLastError() );
/*saving V into V_old*/
- copyIm_TGV_kernel_ar2<<<dimGrid,dimBlock>>>(V1, V2, V1_old, V2_old, (long)(dimX), (long)(dimY), dimTotal);
+ copyIm_TGV_kernel_ar2<<<dimGrid,dimBlock>>>(V1, V2, V1_old, V2_old, (long)(dimX), (long)(dimY));
checkCudaErrors( cudaDeviceSynchronize() );
checkCudaErrors(cudaPeekAtLastError() );
/* upd V*/
- UpdV_2D_kernel<<<dimGrid,dimBlock>>>(V1, V2, P1, P2, Q1, Q2, Q3, (long)(dimX), (long)(dimY), dimTotal, tau);
+ UpdV_2D_kernel<<<dimGrid,dimBlock>>>(V1, V2, P1, P2, Q1, Q2, Q3, (long)(dimX), (long)(dimY), tau);
checkCudaErrors( cudaDeviceSynchronize() );
checkCudaErrors(cudaPeekAtLastError() );
/*get new V*/
- newU_kernel_ar2<<<dimGrid,dimBlock>>>(V1, V2, V1_old, V2_old, (long)(dimX), (long)(dimY), dimTotal);
+ newU_kernel_ar2<<<dimGrid,dimBlock>>>(V1, V2, V1_old, V2_old, (long)(dimX), (long)(dimY));
checkCudaErrors( cudaDeviceSynchronize() );
checkCudaErrors(cudaPeekAtLastError() );
}
@@ -684,43 +674,43 @@ extern "C" int TGV_GPU_main(float *U0, float *U, float lambda, float alpha1, flo
for(int n=0; n < iterationsNumb; n++) {
/* Calculate Dual Variable P */
- DualP_3D_kernel<<<dimGrid,dimBlock>>>(d_U, V1, V2, V3, P1, P2, P3, dimX, dimY, dimZ, sigma);
+ DualP_3D_kernel<<<dimGrid,dimBlock>>>(d_U, V1, V2, V3, P1, P2, P3, (long)(dimX), (long)(dimY), (long)(dimZ), sigma);
checkCudaErrors( cudaDeviceSynchronize() );
checkCudaErrors(cudaPeekAtLastError() );
/*Projection onto convex set for P*/
- ProjP_3D_kernel<<<dimGrid,dimBlock>>>(P1, P2, P3, dimX, dimY, dimZ, alpha1);
+ ProjP_3D_kernel<<<dimGrid,dimBlock>>>(P1, P2, P3, (long)(dimX), (long)(dimY), (long)(dimZ), alpha1);
checkCudaErrors( cudaDeviceSynchronize() );
checkCudaErrors(cudaPeekAtLastError() );
/* Calculate Dual Variable Q */
- DualQ_3D_kernel<<<dimGrid,dimBlock>>>(V1, V2, V3, Q1, Q2, Q3, Q4, Q5, Q6, dimX, dimY, dimZ, sigma);
+ DualQ_3D_kernel<<<dimGrid,dimBlock>>>(V1, V2, V3, Q1, Q2, Q3, Q4, Q5, Q6, (long)(dimX), (long)(dimY), (long)(dimZ), sigma);
checkCudaErrors( cudaDeviceSynchronize() );
checkCudaErrors(cudaPeekAtLastError() );
/*Projection onto convex set for Q*/
- ProjQ_3D_kernel<<<dimGrid,dimBlock>>>(Q1, Q2, Q3, Q4, Q5, Q6, dimX, dimY, dimZ, alpha0);
+ ProjQ_3D_kernel<<<dimGrid,dimBlock>>>(Q1, Q2, Q3, Q4, Q5, Q6, (long)(dimX), (long)(dimY), (long)(dimZ), alpha0);
checkCudaErrors( cudaDeviceSynchronize() );
checkCudaErrors(cudaPeekAtLastError() );
/*saving U into U_old*/
- copyIm_TGV_kernel3D<<<dimGrid,dimBlock>>>(d_U, U_old, dimX, dimY, dimZ, dimTotal);
+ copyIm_TGV_kernel3D<<<dimGrid,dimBlock>>>(d_U, U_old, (long)(dimX), (long)(dimY), (long)(dimZ));
checkCudaErrors( cudaDeviceSynchronize() );
checkCudaErrors(cudaPeekAtLastError() );
/*adjoint operation -> divergence and projection of P*/
- DivProjP_3D_kernel<<<dimGrid,dimBlock>>>(d_U, d_U0, P1, P2, P3, dimX, dimY, dimZ, lambda, tau);
+ DivProjP_3D_kernel<<<dimGrid,dimBlock>>>(d_U, d_U0, P1, P2, P3, (long)(dimX), (long)(dimY), (long)(dimZ), lambda, tau);
checkCudaErrors( cudaDeviceSynchronize() );
checkCudaErrors(cudaPeekAtLastError() );
/*get updated solution U*/
- newU_kernel3D<<<dimGrid,dimBlock>>>(d_U, U_old, dimX, dimY, dimZ, dimTotal);
+ newU_kernel3D<<<dimGrid,dimBlock>>>(d_U, U_old, (long)(dimX), (long)(dimY), (long)(dimZ));
checkCudaErrors( cudaDeviceSynchronize() );
checkCudaErrors(cudaPeekAtLastError() );
/*saving V into V_old*/
- copyIm_TGV_kernel3D_ar3<<<dimGrid,dimBlock>>>(V1, V2, V3, V1_old, V2_old, V3_old, dimX, dimY, dimZ, dimTotal);
+ copyIm_TGV_kernel3D_ar3<<<dimGrid,dimBlock>>>(V1, V2, V3, V1_old, V2_old, V3_old, (long)(dimX), (long)(dimY), (long)(dimZ));
checkCudaErrors( cudaDeviceSynchronize() );
checkCudaErrors(cudaPeekAtLastError() );
/* upd V*/
- UpdV_3D_kernel<<<dimGrid,dimBlock>>>(V1, V2, V3, P1, P2, P3, Q1, Q2, Q3, Q4, Q5, Q6, dimX, dimY, dimZ, tau);
+ UpdV_3D_kernel<<<dimGrid,dimBlock>>>(V1, V2, V3, P1, P2, P3, Q1, Q2, Q3, Q4, Q5, Q6, (long)(dimX), (long)(dimY), (long)(dimZ), tau);
checkCudaErrors( cudaDeviceSynchronize() );
checkCudaErrors(cudaPeekAtLastError() );
/*get new V*/
- newU_kernel3D_ar3<<<dimGrid,dimBlock>>>(V1, V2, V3, V1_old, V2_old, V3_old, dimX, dimY, dimZ, dimTotal);
+ newU_kernel3D_ar3<<<dimGrid,dimBlock>>>(V1, V2, V3, V1_old, V2_old, V3_old, (long)(dimX), (long)(dimY), (long)(dimZ));
checkCudaErrors( cudaDeviceSynchronize() );
checkCudaErrors(cudaPeekAtLastError() );
}