diff options
| author | Willem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl> | 2014-06-23 10:08:37 +0000 | 
|---|---|---|
| committer | wpalenst <Willem.Jan.Palenstijn@cwi.nl> | 2014-06-23 10:08:37 +0000 | 
| commit | 00767fae7142a66b182508448951816f9c95f189 (patch) | |
| tree | 9f55239cd8960ce703aabc513f765afdbb91374e | |
| parent | 97ba7288f6d665c4442b3c9873128529c7dcf508 (diff) | |
| download | astra-00767fae7142a66b182508448951816f9c95f189.tar.gz astra-00767fae7142a66b182508448951816f9c95f189.tar.bz2 astra-00767fae7142a66b182508448951816f9c95f189.tar.xz astra-00767fae7142a66b182508448951816f9c95f189.zip | |
Remove angle limits in cone
| -rw-r--r-- | cuda/3d/cone_bp.cu | 83 | ||||
| -rw-r--r-- | cuda/3d/cone_fp.cu | 37 | 
2 files changed, 72 insertions, 48 deletions
| diff --git a/cuda/3d/cone_bp.cu b/cuda/3d/cone_bp.cu index 1b0154d..3b0fd70 100644 --- a/cuda/3d/cone_bp.cu +++ b/cuda/3d/cone_bp.cu @@ -87,13 +87,13 @@ bool bindProjDataTexture(const cudaArray* array)  } -__global__ void dev_cone_BP(void* D_volData, unsigned int volPitch, int startAngle, const SDimensions3D dims) +__global__ void dev_cone_BP(void* D_volData, unsigned int volPitch, int startAngle, int angleOffset, const SDimensions3D dims)  {  	float* volData = (float*)D_volData;  	int endAngle = startAngle + g_anglesPerBlock; -	if (endAngle > dims.iProjAngles) -		endAngle = dims.iProjAngles; +	if (endAngle > dims.iProjAngles - angleOffset) +		endAngle = dims.iProjAngles - angleOffset;  	// threadIdx: x = rel x  	//            y = rel y @@ -126,7 +126,7 @@ __global__ void dev_cone_BP(void* D_volData, unsigned int volPitch, int startAng  	{  		float fVal = 0.0f; -		float fAngle = startAngle + 0.5f; +		float fAngle = startAngle + angleOffset + 0.5f;  		for (int angle = startAngle; angle < endAngle; ++angle, fAngle += 1.0f)  		{ @@ -161,13 +161,13 @@ __global__ void dev_cone_BP(void* D_volData, unsigned int volPitch, int startAng  }  // supersampling version -__global__ void dev_cone_BP_SS(void* D_volData, unsigned int volPitch, int startAngle, const SDimensions3D dims) +__global__ void dev_cone_BP_SS(void* D_volData, unsigned int volPitch, int startAngle, int angleOffset, const SDimensions3D dims)  {  	float* volData = (float*)D_volData;  	int endAngle = startAngle + g_anglesPerBlock; -	if (endAngle > dims.iProjAngles) -		endAngle = dims.iProjAngles; +	if (endAngle > dims.iProjAngles - angleOffset) +		endAngle = dims.iProjAngles - angleOffset;  	// threadIdx: x = rel x  	//            y = rel y @@ -201,7 +201,7 @@ __global__ void dev_cone_BP_SS(void* D_volData, unsigned int volPitch, int start  	{  		float fVal = 0.0f; -		float fAngle = startAngle + 0.5f; +		float fAngle = startAngle + angleOffset + 0.5f;  		for (int angle = startAngle; angle < endAngle; ++angle, fAngle += 1.0f)  		{ @@ -256,49 +256,60 @@ bool ConeBP_Array(cudaPitchedPtr D_volumeData,  {  	bindProjDataTexture(D_projArray); +	for (unsigned int th = 0; th < dims.iProjAngles; th += g_MaxAngles) { +		unsigned int angleCount = g_MaxAngles; +		if (th + angleCount > dims.iProjAngles) +			angleCount = dims.iProjAngles - th; -	// transfer angles to constant memory -	float* tmp = new float[dims.iProjAngles]; +		// transfer angles to constant memory +		float* tmp = new float[angleCount]; -#define TRANSFER_TO_CONSTANT(expr,name) do { for (unsigned int i = 0; i < dims.iProjAngles; ++i) tmp[i] = (expr) ; cudaMemcpyToSymbol(gC_##name, tmp, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice); } while (0) -	TRANSFER_TO_CONSTANT( (angles[i].fDetSZ - angles[i].fSrcZ)*angles[i].fDetVY - (angles[i].fDetSY - angles[i].fSrcY)*angles[i].fDetVZ , Cux ); -	TRANSFER_TO_CONSTANT( (angles[i].fDetSX - angles[i].fSrcX)*angles[i].fDetVZ -(angles[i].fDetSZ - angles[i].fSrcZ)*angles[i].fDetVX , Cuy ); -	TRANSFER_TO_CONSTANT( (angles[i].fDetSY - angles[i].fSrcY)*angles[i].fDetVX - (angles[i].fDetSX - angles[i].fSrcX)*angles[i].fDetVY , Cuz ); -	TRANSFER_TO_CONSTANT( (angles[i].fDetSY*angles[i].fDetVZ - angles[i].fDetSZ*angles[i].fDetVY)*angles[i].fSrcX - (angles[i].fDetSX*angles[i].fDetVZ - angles[i].fDetSZ*angles[i].fDetVX)*angles[i].fSrcY + (angles[i].fDetSX*angles[i].fDetVY - angles[i].fDetSY*angles[i].fDetVX)*angles[i].fSrcZ , Cuc ); +		// NB: We increment angles at the end of the loop body. -	TRANSFER_TO_CONSTANT( (angles[i].fDetSY - angles[i].fSrcY)*angles[i].fDetUZ-(angles[i].fDetSZ - angles[i].fSrcZ)*angles[i].fDetUY, Cvx ); -	TRANSFER_TO_CONSTANT( (angles[i].fDetSZ - angles[i].fSrcZ)*angles[i].fDetUX - (angles[i].fDetSX - angles[i].fSrcX)*angles[i].fDetUZ , Cvy ); -	TRANSFER_TO_CONSTANT((angles[i].fDetSX - angles[i].fSrcX)*angles[i].fDetUY-(angles[i].fDetSY - angles[i].fSrcY)*angles[i].fDetUX , Cvz ); -	TRANSFER_TO_CONSTANT( -(angles[i].fDetSY*angles[i].fDetUZ - angles[i].fDetSZ*angles[i].fDetUY)*angles[i].fSrcX + (angles[i].fDetSX*angles[i].fDetUZ - angles[i].fDetSZ*angles[i].fDetUX)*angles[i].fSrcY - (angles[i].fDetSX*angles[i].fDetUY - angles[i].fDetSY*angles[i].fDetUX)*angles[i].fSrcZ , Cvc ); +#define TRANSFER_TO_CONSTANT(expr,name) do { for (unsigned int i = 0; i < angleCount; ++i) tmp[i] = (expr) ; cudaMemcpyToSymbol(gC_##name, tmp, angleCount*sizeof(float), 0, cudaMemcpyHostToDevice); } while (0) -	TRANSFER_TO_CONSTANT( angles[i].fDetUY*angles[i].fDetVZ - angles[i].fDetUZ*angles[i].fDetVY , Cdx ); -	TRANSFER_TO_CONSTANT( angles[i].fDetUZ*angles[i].fDetVX - angles[i].fDetUX*angles[i].fDetVZ , Cdy ); -	TRANSFER_TO_CONSTANT( angles[i].fDetUX*angles[i].fDetVY - angles[i].fDetUY*angles[i].fDetVX , Cdz ); -	TRANSFER_TO_CONSTANT( -angles[i].fSrcX * (angles[i].fDetUY*angles[i].fDetVZ - angles[i].fDetUZ*angles[i].fDetVY) - angles[i].fSrcY * (angles[i].fDetUZ*angles[i].fDetVX - angles[i].fDetUX*angles[i].fDetVZ) - angles[i].fSrcZ * (angles[i].fDetUX*angles[i].fDetVY - angles[i].fDetUY*angles[i].fDetVX) , Cdc ); +		TRANSFER_TO_CONSTANT( (angles[i].fDetSZ - angles[i].fSrcZ)*angles[i].fDetVY - (angles[i].fDetSY - angles[i].fSrcY)*angles[i].fDetVZ , Cux ); +		TRANSFER_TO_CONSTANT( (angles[i].fDetSX - angles[i].fSrcX)*angles[i].fDetVZ -(angles[i].fDetSZ - angles[i].fSrcZ)*angles[i].fDetVX , Cuy ); +		TRANSFER_TO_CONSTANT( (angles[i].fDetSY - angles[i].fSrcY)*angles[i].fDetVX - (angles[i].fDetSX - angles[i].fSrcX)*angles[i].fDetVY , Cuz ); +		TRANSFER_TO_CONSTANT( (angles[i].fDetSY*angles[i].fDetVZ - angles[i].fDetSZ*angles[i].fDetVY)*angles[i].fSrcX - (angles[i].fDetSX*angles[i].fDetVZ - angles[i].fDetSZ*angles[i].fDetVX)*angles[i].fSrcY + (angles[i].fDetSX*angles[i].fDetVY - angles[i].fDetSY*angles[i].fDetVX)*angles[i].fSrcZ , Cuc ); + +		TRANSFER_TO_CONSTANT( (angles[i].fDetSY - angles[i].fSrcY)*angles[i].fDetUZ-(angles[i].fDetSZ - angles[i].fSrcZ)*angles[i].fDetUY, Cvx ); +		TRANSFER_TO_CONSTANT( (angles[i].fDetSZ - angles[i].fSrcZ)*angles[i].fDetUX - (angles[i].fDetSX - angles[i].fSrcX)*angles[i].fDetUZ , Cvy ); +		TRANSFER_TO_CONSTANT((angles[i].fDetSX - angles[i].fSrcX)*angles[i].fDetUY-(angles[i].fDetSY - angles[i].fSrcY)*angles[i].fDetUX , Cvz ); +		TRANSFER_TO_CONSTANT( -(angles[i].fDetSY*angles[i].fDetUZ - angles[i].fDetSZ*angles[i].fDetUY)*angles[i].fSrcX + (angles[i].fDetSX*angles[i].fDetUZ - angles[i].fDetSZ*angles[i].fDetUX)*angles[i].fSrcY - (angles[i].fDetSX*angles[i].fDetUY - angles[i].fDetSY*angles[i].fDetUX)*angles[i].fSrcZ , Cvc ); + +		TRANSFER_TO_CONSTANT( angles[i].fDetUY*angles[i].fDetVZ - angles[i].fDetUZ*angles[i].fDetVY , Cdx ); +		TRANSFER_TO_CONSTANT( angles[i].fDetUZ*angles[i].fDetVX - angles[i].fDetUX*angles[i].fDetVZ , Cdy ); +		TRANSFER_TO_CONSTANT( angles[i].fDetUX*angles[i].fDetVY - angles[i].fDetUY*angles[i].fDetVX , Cdz ); +		TRANSFER_TO_CONSTANT( -angles[i].fSrcX * (angles[i].fDetUY*angles[i].fDetVZ - angles[i].fDetUZ*angles[i].fDetVY) - angles[i].fSrcY * (angles[i].fDetUZ*angles[i].fDetVX - angles[i].fDetUX*angles[i].fDetVZ) - angles[i].fSrcZ * (angles[i].fDetUX*angles[i].fDetVY - angles[i].fDetUY*angles[i].fDetVX) , Cdc );  #undef TRANSFER_TO_CONSTANT -	delete[] tmp; +		delete[] tmp; -	dim3 dimBlock(g_volBlockX, g_volBlockY); +		dim3 dimBlock(g_volBlockX, g_volBlockY); -	dim3 dimGrid(((dims.iVolX+g_volBlockX-1)/g_volBlockX)*((dims.iVolY+g_volBlockY-1)/g_volBlockY), (dims.iVolZ+g_volBlockZ-1)/g_volBlockZ); +		dim3 dimGrid(((dims.iVolX+g_volBlockX-1)/g_volBlockX)*((dims.iVolY+g_volBlockY-1)/g_volBlockY), (dims.iVolZ+g_volBlockZ-1)/g_volBlockZ); -	// timeval t; -	// tic(t); +		// timeval t; +		// tic(t); -	for (unsigned int i = 0; i < dims.iProjAngles; i += g_anglesPerBlock) { +		for (unsigned int i = 0; i < angleCount; i += g_anglesPerBlock) {  		// printf("Calling BP: %d, %dx%d, %dx%d to %p\n", i, dimBlock.x, dimBlock.y, dimGrid.x, dimGrid.y, (void*)D_volumeData.ptr);  -		if (dims.iRaysPerVoxelDim == 1) -			dev_cone_BP<<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, dims); -		else -			dev_cone_BP_SS<<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, dims); -	} +			if (dims.iRaysPerVoxelDim == 1) +				dev_cone_BP<<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims); +			else +				dev_cone_BP_SS<<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims); +		} -	cudaTextForceKernelsCompletion(); +		cudaTextForceKernelsCompletion(); + +		angles = angles + angleCount; +		// printf("%f\n", toc(t)); + +	} -	// printf("%f\n", toc(t));  	return true;  } diff --git a/cuda/3d/cone_fp.cu b/cuda/3d/cone_fp.cu index 0b1f012..d049151 100644 --- a/cuda/3d/cone_fp.cu +++ b/cuda/3d/cone_fp.cu @@ -299,18 +299,14 @@ __global__ void cone_FP_SS_t(float* D_projData, unsigned int projPitch,  } - -bool ConeFP_Array(cudaArray *D_volArray, -                  cudaPitchedPtr D_projData, -                  const SDimensions3D& dims, const SConeProjection* angles, +bool ConeFP_Array_internal(cudaPitchedPtr D_projData, +                  const SDimensions3D& dims, unsigned int angleCount, const SConeProjection* angles,                    float fOutputScale)  { -	bindVolumeDataTexture(D_volArray); -  	// transfer angles to constant memory -	float* tmp = new float[dims.iProjAngles]; +	float* tmp = new float[angleCount]; -#define TRANSFER_TO_CONSTANT(name) do { for (unsigned int i = 0; i < dims.iProjAngles; ++i) tmp[i] = angles[i].f##name ; cudaMemcpyToSymbol(gC_##name, tmp, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice); } while (0) +#define TRANSFER_TO_CONSTANT(name) do { for (unsigned int i = 0; i < angleCount; ++i) tmp[i] = angles[i].f##name ; cudaMemcpyToSymbol(gC_##name, tmp, angleCount*sizeof(float), 0, cudaMemcpyHostToDevice); } while (0)  	TRANSFER_TO_CONSTANT(SrcX);  	TRANSFER_TO_CONSTANT(SrcY); @@ -343,9 +339,9 @@ bool ConeFP_Array(cudaArray *D_volArray,  	// timeval t;  	// tic(t); -	for (unsigned int a = 0; a <= dims.iProjAngles; ++a) { +	for (unsigned int a = 0; a <= angleCount; ++a) {  		int dir; -		if (a != dims.iProjAngles) { +		if (a != angleCount) {  			float dX = fabsf(angles[a].fSrcX - (angles[a].fDetSX + dims.iProjU*angles[a].fDetUX*0.5f + dims.iProjV*angles[a].fDetVX*0.5f));  			float dY = fabsf(angles[a].fSrcY - (angles[a].fDetSY + dims.iProjU*angles[a].fDetUY*0.5f + dims.iProjV*angles[a].fDetVY*0.5f));  			float dZ = fabsf(angles[a].fSrcZ - (angles[a].fDetSZ + dims.iProjU*angles[a].fDetUZ*0.5f + dims.iProjV*angles[a].fDetVZ*0.5f)); @@ -358,7 +354,7 @@ bool ConeFP_Array(cudaArray *D_volArray,  				dir = 2;  		} -		if (a == dims.iProjAngles || dir != blockDirection) { +		if (a == angleCount || dir != blockDirection) {  			// block done  			blockEnd = a; @@ -414,6 +410,7 @@ bool ConeFP_Array(cudaArray *D_volArray,  	return true;  } +  bool ConeFP(cudaPitchedPtr D_volumeData,              cudaPitchedPtr D_projData,              const SDimensions3D& dims, const SConeProjection* angles, @@ -423,8 +420,24 @@ bool ConeFP(cudaPitchedPtr D_volumeData,  	cudaArray* cuArray = allocateVolumeArray(dims);  	transferVolumeToArray(D_volumeData, cuArray, dims); +	bindVolumeDataTexture(cuArray); + +	bool ret; -	bool ret = ConeFP_Array(cuArray, D_projData, dims, angles, fOutputScale); +	for (unsigned int iAngle = 0; iAngle < dims.iProjAngles; iAngle += g_MaxAngles) { +		unsigned int iEndAngle = iAngle + g_MaxAngles; +		if (iEndAngle >= dims.iProjAngles) +			iEndAngle = dims.iProjAngles; + +		cudaPitchedPtr D_subprojData = D_projData; +		D_subprojData.ptr = (char*)D_projData.ptr + iAngle * D_projData.pitch; + +		ret = ConeFP_Array_internal(D_subprojData, +		                            dims, iEndAngle - iAngle, angles + iAngle, +		                            fOutputScale); +		if (!ret) +			break; +	}  	cudaFreeArray(cuArray); | 
