From 673bf2fd8fd40053ac6baba229953f2e58b7cb42 Mon Sep 17 00:00:00 2001 From: Willem Jan Palenstijn Date: Wed, 6 Jan 2016 14:27:13 +0100 Subject: Fix whitespace --- src/CompositeGeometryManager.cpp | 42 ++++++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/src/CompositeGeometryManager.cpp b/src/CompositeGeometryManager.cpp index 41f6319..eed06c4 100644 --- a/src/CompositeGeometryManager.cpp +++ b/src/CompositeGeometryManager.cpp @@ -305,37 +305,37 @@ CCompositeGeometryManager::CPart* CCompositeGeometryManager::CVolumePart::reduce static size_t ceildiv(size_t a, size_t b) { - return (a + b - 1) / b; + return (a + b - 1) / b; } static size_t computeVerticalSplit(size_t maxBlock, int div, size_t sliceCount) { - size_t blockSize = maxBlock; - size_t blockCount = ceildiv(sliceCount, blockSize); + size_t blockSize = maxBlock; + size_t blockCount = ceildiv(sliceCount, blockSize); - // Increase number of blocks to be divisible by div - size_t divCount = div * ceildiv(blockCount, div); + // Increase number of blocks to be divisible by div + size_t divCount = div * ceildiv(blockCount, div); - // If divCount is above sqrt(number of slices), then - // we can't guarantee divisibility by div, but let's try anyway - if (ceildiv(sliceCount, ceildiv(sliceCount, divCount)) % div == 0) { - blockCount = divCount; - } else { - // If divisibility isn't achievable, we may want to optimize - // differently. - // TODO: Figure out how to model and optimize this. - } + // If divCount is above sqrt(number of slices), then + // we can't guarantee divisibility by div, but let's try anyway + if (ceildiv(sliceCount, ceildiv(sliceCount, divCount)) % div == 0) { + blockCount = divCount; + } else { + // If divisibility isn't achievable, we may want to optimize + // differently. + // TODO: Figure out how to model and optimize this. + } - // Final adjustment to make blocks more evenly sized - // (This can't make the blocks larger) - blockSize = ceildiv(sliceCount, blockCount); + // Final adjustment to make blocks more evenly sized + // (This can't make the blocks larger) + blockSize = ceildiv(sliceCount, blockCount); - ASTRA_DEBUG("%ld %ld -> %ld * %ld", sliceCount, maxBlock, blockCount, blockSize); + ASTRA_DEBUG("%ld %ld -> %ld * %ld", sliceCount, maxBlock, blockCount, blockSize); - assert(blockSize <= maxBlock); - assert((divCount * divCount > sliceCount) || (blockCount % div) == 0); + assert(blockSize <= maxBlock); + assert((divCount * divCount > sliceCount) || (blockCount % div) == 0); - return blockSize; + return blockSize; } template -- cgit v1.2.3 From 8e68248bd587456325101911a927b206b5450b31 Mon Sep 17 00:00:00 2001 From: Willem Jan Palenstijn Date: Wed, 6 Jan 2016 13:29:01 +0100 Subject: Remove noisy debugging output --- src/ConeProjectionGeometry3D.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/ConeProjectionGeometry3D.cpp b/src/ConeProjectionGeometry3D.cpp index 99b4bf4..96b04fb 100644 --- a/src/ConeProjectionGeometry3D.cpp +++ b/src/ConeProjectionGeometry3D.cpp @@ -256,9 +256,6 @@ void CConeProjectionGeometry3D::projectPoint(double fX, double fY, double fZ, // Scale fS to detector plane fU = detectorOffsetXToColIndexFloat( (fS * (m_fOriginSourceDistance + m_fOriginDetectorDistance)) / fD ); - - ASTRA_DEBUG("alpha: %f, D: %f, V: %f, S: %f, U: %f", alpha, fD, fV, fS, fU); - } void CConeProjectionGeometry3D::backprojectPointX(int iAngleIndex, double fU, double fV, -- cgit v1.2.3 From 687c5e244e46e51786afad77f5015cae9abad129 Mon Sep 17 00:00:00 2001 From: Willem Jan Palenstijn Date: Wed, 6 Jan 2016 15:10:34 +0100 Subject: Add multi-GPU support to CompositeGeometryManager --- cuda/3d/mem3d.h | 2 + include/astra/CompositeGeometryManager.h | 16 ++ matlab/mex/astra_mex_c.cpp | 45 +++- src/CompositeGeometryManager.cpp | 434 +++++++++++++++++++++++-------- 4 files changed, 378 insertions(+), 119 deletions(-) diff --git a/cuda/3d/mem3d.h b/cuda/3d/mem3d.h index 82bad19..acb72cb 100644 --- a/cuda/3d/mem3d.h +++ b/cuda/3d/mem3d.h @@ -87,6 +87,8 @@ bool copyFromGPUMemory(float *dst, MemHandle3D src, const SSubDimensions3D &pos) bool freeGPUMemory(MemHandle3D handle); +bool setGPUIndex(int index); + bool FP(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, const astra::CVolumeGeometry3D* pVolGeom, MemHandle3D volData, int iDetectorSuperSampling, astra::Cuda3DProjectionKernel projKernel); diff --git a/include/astra/CompositeGeometryManager.h b/include/astra/CompositeGeometryManager.h index 6610151..49d02a7 100644 --- a/include/astra/CompositeGeometryManager.h +++ b/include/astra/CompositeGeometryManager.h @@ -50,9 +50,16 @@ class CProjectionGeometry3D; class CProjector3D; +struct SGPUParams { + std::vector GPUIndices; + size_t memory; +}; + class _AstraExport CCompositeGeometryManager { public: + CCompositeGeometryManager(); + class CPart; typedef std::list > TPartList; class CPart { @@ -139,10 +146,19 @@ public: bool doFP(CProjector3D *pProjector, const std::vector& volData, const std::vector& projData); bool doBP(CProjector3D *pProjector, const std::vector& volData, const std::vector& projData); + void setGPUIndices(const std::vector& GPUIndices); + + static void setGlobalGPUParams(const SGPUParams& params); + protected: bool splitJobs(TJobSet &jobs, size_t maxSize, int div, TJobSet &split); + std::vector m_GPUIndices; + size_t m_iMaxSize; + + + static SGPUParams* s_params; }; } diff --git a/matlab/mex/astra_mex_c.cpp b/matlab/mex/astra_mex_c.cpp index d34334c..fdf4f33 100644 --- a/matlab/mex/astra_mex_c.cpp +++ b/matlab/mex/astra_mex_c.cpp @@ -38,6 +38,7 @@ $Id$ #include "astra/Globals.h" #ifdef ASTRA_CUDA #include "../cuda/2d/darthelper.h" +#include "astra/CompositeGeometryManager.h" #endif using namespace std; using namespace astra; @@ -83,12 +84,46 @@ void astra_mex_use_cuda(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs * Set active GPU */ void astra_mex_set_gpu_index(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) -{ +{ #ifdef ASTRA_CUDA - if (nrhs >= 2) { - bool ret = astraCUDA::setGPUIndex((int)mxGetScalar(prhs[1])); - if (!ret) - mexPrintf("Failed to set GPU %d\n", (int)mxGetScalar(prhs[1])); + bool usage = false; + if (nrhs != 2 && nrhs != 4) { + usage = true; + } + + astra::SGPUParams params; + params.memory = 0; + + if (!usage && nrhs >= 4) { + std::string s = mexToString(prhs[2]); + if (s != "memory") { + usage = true; + } else { + params.memory = (size_t)mxGetScalar(prhs[3]); + } + } + + if (!usage && nrhs >= 2) { + int n = mxGetN(prhs[1]) * mxGetM(prhs[1]); + params.GPUIndices.resize(n); + double* pdMatlabData = mxGetPr(prhs[1]); + for (int i = 0; i < n; ++i) + params.GPUIndices[i] = (int)pdMatlabData[i]; + + + astra::CCompositeGeometryManager::setGlobalGPUParams(params); + + + // Set first GPU + if (n >= 1) { + bool ret = astraCUDA::setGPUIndex((int)pdMatlabData[0]); + if (!ret) + mexPrintf("Failed to set GPU %d\n", (int)pdMatlabData[0]); + } + } + + if (usage) { + mexPrintf("Usage: astra_mex('set_gpu_index', index/indices [, 'memory', memory])"); } #endif } diff --git a/src/CompositeGeometryManager.cpp b/src/CompositeGeometryManager.cpp index eed06c4..d1b713e 100644 --- a/src/CompositeGeometryManager.cpp +++ b/src/CompositeGeometryManager.cpp @@ -44,11 +44,31 @@ along with the ASTRA Toolbox. If not, see . #include "../cuda/3d/mem3d.h" #include +#include + +#ifndef USE_PTHREADS +#include +#include +#endif namespace astra { + +SGPUParams* CCompositeGeometryManager::s_params = 0; + +CCompositeGeometryManager::CCompositeGeometryManager() +{ + m_iMaxSize = 0; + + if (s_params) { + m_iMaxSize = s_params->memory; + m_GPUIndices = s_params->GPUIndices; + } +} + + // JOB: -// +// // VolumePart // ProjectionPart // FP-or-BP @@ -76,7 +96,6 @@ namespace astra { // (First approach: 0.5/0.5) - bool CCompositeGeometryManager::splitJobs(TJobSet &jobs, size_t maxSize, int div, TJobSet &split) { split.clear(); @@ -848,6 +867,260 @@ bool CCompositeGeometryManager::doBP(CProjector3D *pProjector, const std::vector +static bool doJob(const CCompositeGeometryManager::TJobSet::const_iterator& iter) +{ + CCompositeGeometryManager::CPart* output = iter->first; + const CCompositeGeometryManager::TJobList& L = iter->second; + + assert(!L.empty()); + + bool zero = L.begin()->eMode == CCompositeGeometryManager::SJob::MODE_SET; + + size_t outx, outy, outz; + output->getDims(outx, outy, outz); + + if (L.begin()->eType == CCompositeGeometryManager::SJob::JOB_NOP) { + // just zero output? + if (zero) { + for (size_t z = 0; z < outz; ++z) { + for (size_t y = 0; y < outy; ++y) { + float* ptr = output->pData->getData(); + ptr += (z + output->subX) * (size_t)output->pData->getHeight() * (size_t)output->pData->getWidth(); + ptr += (y + output->subY) * (size_t)output->pData->getWidth(); + ptr += output->subX; + memset(ptr, 0, sizeof(float) * outx); + } + } + } + return true; + } + + + astraCUDA3d::SSubDimensions3D dstdims; + dstdims.nx = output->pData->getWidth(); + dstdims.pitch = dstdims.nx; + dstdims.ny = output->pData->getHeight(); + dstdims.nz = output->pData->getDepth(); + dstdims.subnx = outx; + dstdims.subny = outy; + dstdims.subnz = outz; + ASTRA_DEBUG("dstdims: %d,%d,%d in %d,%d,%d", dstdims.subnx, dstdims.subny, dstdims.subnz, dstdims.nx, dstdims.ny, dstdims.nz); + dstdims.subx = output->subX; + dstdims.suby = output->subY; + dstdims.subz = output->subZ; + float *dst = output->pData->getData(); + + astraCUDA3d::MemHandle3D outputMem = astraCUDA3d::allocateGPUMemory(outx, outy, outz, zero ? astraCUDA3d::INIT_ZERO : astraCUDA3d::INIT_NO); + bool ok = outputMem; + + for (CCompositeGeometryManager::TJobList::const_iterator i = L.begin(); i != L.end(); ++i) { + const CCompositeGeometryManager::SJob &j = *i; + + assert(j.pInput); + + CCudaProjector3D *projector = dynamic_cast(j.pProjector); + Cuda3DProjectionKernel projKernel = ker3d_default; + int detectorSuperSampling = 1; + int voxelSuperSampling = 1; + if (projector) { + projKernel = projector->getProjectionKernel(); + detectorSuperSampling = projector->getDetectorSuperSampling(); + voxelSuperSampling = projector->getVoxelSuperSampling(); + } + + size_t inx, iny, inz; + j.pInput->getDims(inx, iny, inz); + astraCUDA3d::MemHandle3D inputMem = astraCUDA3d::allocateGPUMemory(inx, iny, inz, astraCUDA3d::INIT_NO); + + astraCUDA3d::SSubDimensions3D srcdims; + srcdims.nx = j.pInput->pData->getWidth(); + srcdims.pitch = srcdims.nx; + srcdims.ny = j.pInput->pData->getHeight(); + srcdims.nz = j.pInput->pData->getDepth(); + srcdims.subnx = inx; + srcdims.subny = iny; + srcdims.subnz = inz; + srcdims.subx = j.pInput->subX; + srcdims.suby = j.pInput->subY; + srcdims.subz = j.pInput->subZ; + const float *src = j.pInput->pData->getDataConst(); + + ok = astraCUDA3d::copyToGPUMemory(src, inputMem, srcdims); + if (!ok) ASTRA_ERROR("Error copying input data to GPU"); + + if (j.eType == CCompositeGeometryManager::SJob::JOB_FP) { + assert(dynamic_cast(j.pInput.get())); + assert(dynamic_cast(j.pOutput.get())); + + ASTRA_DEBUG("CCompositeGeometryManager::doJobs: doing FP"); + + ok = astraCUDA3d::FP(((CCompositeGeometryManager::CProjectionPart*)j.pOutput.get())->pGeom, outputMem, ((CCompositeGeometryManager::CVolumePart*)j.pInput.get())->pGeom, inputMem, detectorSuperSampling, projKernel); + if (!ok) ASTRA_ERROR("Error performing sub-FP"); + ASTRA_DEBUG("CCompositeGeometryManager::doJobs: FP done"); + } else if (j.eType == CCompositeGeometryManager::SJob::JOB_BP) { + assert(dynamic_cast(j.pOutput.get())); + assert(dynamic_cast(j.pInput.get())); + + ASTRA_DEBUG("CCompositeGeometryManager::doJobs: doing BP"); + + ok = astraCUDA3d::BP(((CCompositeGeometryManager::CProjectionPart*)j.pInput.get())->pGeom, inputMem, ((CCompositeGeometryManager::CVolumePart*)j.pOutput.get())->pGeom, outputMem, voxelSuperSampling); + if (!ok) ASTRA_ERROR("Error performing sub-BP"); + ASTRA_DEBUG("CCompositeGeometryManager::doJobs: BP done"); + } else { + assert(false); + } + + ok = astraCUDA3d::freeGPUMemory(inputMem); + if (!ok) ASTRA_ERROR("Error freeing GPU memory"); + + } + + ok = astraCUDA3d::copyFromGPUMemory(dst, outputMem, dstdims); + if (!ok) ASTRA_ERROR("Error copying output data from GPU"); + + ok = astraCUDA3d::freeGPUMemory(outputMem); + if (!ok) ASTRA_ERROR("Error freeing GPU memory"); + + return true; +} + + +class WorkQueue { +public: + WorkQueue(CCompositeGeometryManager::TJobSet &_jobs) : m_jobs(_jobs) { +#ifdef USE_PTHREADS + pthread_mutex_init(&m_mutex, 0); +#endif + m_iter = m_jobs.begin(); + } + bool receive(CCompositeGeometryManager::TJobSet::const_iterator &i) { + lock(); + + if (m_iter == m_jobs.end()) { + unlock(); + return false; + } + + i = m_iter++; + + unlock(); + + return true; + } +#ifdef USE_PTHREADS + void lock() { + // TODO: check mutex op return values + pthread_mutex_lock(&m_mutex); + } + void unlock() { + // TODO: check mutex op return values + pthread_mutex_unlock(&m_mutex); + } +#else + void lock() { + m_mutex.lock(); + } + void unlock() { + m_mutex.unlock(); + } +#endif + +private: + CCompositeGeometryManager::TJobSet &m_jobs; + CCompositeGeometryManager::TJobSet::const_iterator m_iter; +#ifdef USE_PTHREADS + pthread_mutex_t m_mutex; +#else + boost::mutex m_mutex; +#endif +}; + +struct WorkThreadInfo { + WorkQueue* m_queue; + unsigned int m_iGPU; +}; + +#ifndef USE_PTHREADS + +void runEntries_boost(WorkThreadInfo* info) +{ + ASTRA_DEBUG("Launching thread on GPU %d\n", info->m_iGPU); + CCompositeGeometryManager::TJobSet::const_iterator i; + while (info->m_queue->receive(i)) { + ASTRA_DEBUG("Running block on GPU %d\n", info->m_iGPU); + astraCUDA3d::setGPUIndex(info->m_iGPU); + boost::this_thread::interruption_point(); + doJob(i); + boost::this_thread::interruption_point(); + } + ASTRA_DEBUG("Finishing thread on GPU %d\n", info->m_iGPU); +} + + +#else + +void* runEntries_pthreads(void* data) { + WorkThreadInfo* info = (WorkThreadInfo*)data; + + ASTRA_DEBUG("Launching thread on GPU %d\n", info->m_iGPU); + + CCompositeGeometryManager::TJobSet::const_iterator i; + + while (info->m_queue->receive(i)) { + ASTRA_DEBUG("Running block on GPU %d\n", info->m_iGPU); + astraCUDA3d::setGPUIndex(info->m_iGPU); + pthread_testcancel(); + doJob(i); + pthread_testcancel(); + } + ASTRA_DEBUG("Finishing thread on GPU %d\n", info->m_iGPU); + + return 0; +} + +#endif + + +void runWorkQueue(WorkQueue &queue, const std::vector & iGPUIndices) { + int iThreadCount = iGPUIndices.size(); + + std::vector infos; +#ifdef USE_PTHREADS + std::vector threads; +#else + std::vector threads; +#endif + infos.resize(iThreadCount); + threads.resize(iThreadCount); + + for (int i = 0; i < iThreadCount; ++i) { + infos[i].m_queue = &queue; + infos[i].m_iGPU = iGPUIndices[i]; +#ifdef USE_PTHREADS + pthread_create(&threads[i], 0, runEntries_pthreads, (void*)&infos[i]); +#else + threads[i] = new boost::thread(runEntries_boost, &infos[i]); +#endif + } + + // Wait for them to finish + for (int i = 0; i < iThreadCount; ++i) { +#ifdef USE_PTHREADS + pthread_join(threads[i], 0); +#else + threads[i]->join(); + delete threads[i]; + threads[i] = 0; +#endif + } +} + + +void CCompositeGeometryManager::setGPUIndices(const std::vector& GPUIndices) +{ + m_GPUIndices = GPUIndices; +} + bool CCompositeGeometryManager::doJobs(TJobList &jobs) { ASTRA_DEBUG("CCompositeGeometryManager::doJobs"); @@ -859,140 +1132,53 @@ bool CCompositeGeometryManager::doJobs(TJobList &jobs) jobset[i->pOutput.get()].push_back(*i); } - size_t maxSize = astraCUDA3d::availableGPUMemory(); + size_t maxSize = m_iMaxSize; if (maxSize == 0) { - ASTRA_WARN("Unable to get available GPU memory. Defaulting to 1GB."); - maxSize = 1024 * 1024 * 1024; + // Get memory from first GPU. Not optimal... + if (!m_GPUIndices.empty()) + astraCUDA3d::setGPUIndex(m_GPUIndices[0]); + maxSize = astraCUDA3d::availableGPUMemory(); + if (maxSize == 0) { + ASTRA_WARN("Unable to get available GPU memory. Defaulting to 1GB."); + maxSize = 1024 * 1024 * 1024; + } else { + ASTRA_DEBUG("Detected %lu bytes of GPU memory", maxSize); + } } else { - ASTRA_DEBUG("Detected %lu bytes of GPU memory", maxSize); + ASTRA_DEBUG("Set to %lu bytes of GPU memory", maxSize); } maxSize = (maxSize * 9) / 10; maxSize /= sizeof(float); int div = 1; - - // TODO: Multi-GPU support + if (!m_GPUIndices.empty()) + div = m_GPUIndices.size(); // Split jobs to fit TJobSet split; splitJobs(jobset, maxSize, div, split); jobset.clear(); - // Run jobs - - for (TJobSet::iterator iter = split.begin(); iter != split.end(); ++iter) { - - CPart* output = iter->first; - TJobList& L = iter->second; - - assert(!L.empty()); + if (m_GPUIndices.size() <= 1) { - bool zero = L.begin()->eMode == SJob::MODE_SET; + // Run jobs + ASTRA_DEBUG("Running single-threaded"); - size_t outx, outy, outz; - output->getDims(outx, outy, outz); + if (!m_GPUIndices.empty()) + astraCUDA3d::setGPUIndex(m_GPUIndices[0]); - if (L.begin()->eType == SJob::JOB_NOP) { - // just zero output? - if (zero) { - for (size_t z = 0; z < outz; ++z) { - for (size_t y = 0; y < outy; ++y) { - float* ptr = output->pData->getData(); - ptr += (z + output->subX) * (size_t)output->pData->getHeight() * (size_t)output->pData->getWidth(); - ptr += (y + output->subY) * (size_t)output->pData->getWidth(); - ptr += output->subX; - memset(ptr, 0, sizeof(float) * outx); - } - } - } - continue; + for (TJobSet::const_iterator iter = split.begin(); iter != split.end(); ++iter) { + doJob(iter); } + } else { - astraCUDA3d::SSubDimensions3D dstdims; - dstdims.nx = output->pData->getWidth(); - dstdims.pitch = dstdims.nx; - dstdims.ny = output->pData->getHeight(); - dstdims.nz = output->pData->getDepth(); - dstdims.subnx = outx; - dstdims.subny = outy; - dstdims.subnz = outz; - ASTRA_DEBUG("dstdims: %d,%d,%d in %d,%d,%d", dstdims.subnx, dstdims.subny, dstdims.subnz, dstdims.nx, dstdims.ny, dstdims.nz); - dstdims.subx = output->subX; - dstdims.suby = output->subY; - dstdims.subz = output->subZ; - float *dst = output->pData->getData(); - - astraCUDA3d::MemHandle3D outputMem = astraCUDA3d::allocateGPUMemory(outx, outy, outz, zero ? astraCUDA3d::INIT_ZERO : astraCUDA3d::INIT_NO); - bool ok = outputMem; - - for (TJobList::iterator i = L.begin(); i != L.end(); ++i) { - SJob &j = *i; - - assert(j.pInput); - - CCudaProjector3D *projector = dynamic_cast(j.pProjector); - Cuda3DProjectionKernel projKernel = ker3d_default; - int detectorSuperSampling = 1; - int voxelSuperSampling = 1; - if (projector) { - projKernel = projector->getProjectionKernel(); - detectorSuperSampling = projector->getDetectorSuperSampling(); - voxelSuperSampling = projector->getVoxelSuperSampling(); - } - - size_t inx, iny, inz; - j.pInput->getDims(inx, iny, inz); - astraCUDA3d::MemHandle3D inputMem = astraCUDA3d::allocateGPUMemory(inx, iny, inz, astraCUDA3d::INIT_NO); - - astraCUDA3d::SSubDimensions3D srcdims; - srcdims.nx = j.pInput->pData->getWidth(); - srcdims.pitch = srcdims.nx; - srcdims.ny = j.pInput->pData->getHeight(); - srcdims.nz = j.pInput->pData->getDepth(); - srcdims.subnx = inx; - srcdims.subny = iny; - srcdims.subnz = inz; - srcdims.subx = j.pInput->subX; - srcdims.suby = j.pInput->subY; - srcdims.subz = j.pInput->subZ; - const float *src = j.pInput->pData->getDataConst(); - - ok = astraCUDA3d::copyToGPUMemory(src, inputMem, srcdims); - if (!ok) ASTRA_ERROR("Error copying input data to GPU"); - - if (j.eType == SJob::JOB_FP) { - assert(dynamic_cast(j.pInput.get())); - assert(dynamic_cast(j.pOutput.get())); - - ASTRA_DEBUG("CCompositeGeometryManager::doJobs: doing FP"); - - ok = astraCUDA3d::FP(((CProjectionPart*)j.pOutput.get())->pGeom, outputMem, ((CVolumePart*)j.pInput.get())->pGeom, inputMem, detectorSuperSampling, projKernel); - if (!ok) ASTRA_ERROR("Error performing sub-FP"); - ASTRA_DEBUG("CCompositeGeometryManager::doJobs: FP done"); - } else if (j.eType == SJob::JOB_BP) { - assert(dynamic_cast(j.pOutput.get())); - assert(dynamic_cast(j.pInput.get())); - - ASTRA_DEBUG("CCompositeGeometryManager::doJobs: doing BP"); - - ok = astraCUDA3d::BP(((CProjectionPart*)j.pInput.get())->pGeom, inputMem, ((CVolumePart*)j.pOutput.get())->pGeom, outputMem, voxelSuperSampling); - if (!ok) ASTRA_ERROR("Error performing sub-BP"); - ASTRA_DEBUG("CCompositeGeometryManager::doJobs: BP done"); - } else { - assert(false); - } + ASTRA_DEBUG("Running multi-threaded"); - ok = astraCUDA3d::freeGPUMemory(inputMem); - if (!ok) ASTRA_ERROR("Error freeing GPU memory"); + WorkQueue wq(split); - } + runWorkQueue(wq, m_GPUIndices); - ok = astraCUDA3d::copyFromGPUMemory(dst, outputMem, dstdims); - if (!ok) ASTRA_ERROR("Error copying output data from GPU"); - - ok = astraCUDA3d::freeGPUMemory(outputMem); - if (!ok) ASTRA_ERROR("Error freeing GPU memory"); } return true; @@ -1000,6 +1186,26 @@ bool CCompositeGeometryManager::doJobs(TJobList &jobs) + +//static +void CCompositeGeometryManager::setGlobalGPUParams(const SGPUParams& params) +{ + delete s_params; + + s_params = new SGPUParams; + *s_params = params; + + ASTRA_DEBUG("CompositeGeometryManager: Setting global GPU params:"); + std::ostringstream s; + s << "GPU indices:"; + for (unsigned int i = 0; i < params.GPUIndices.size(); ++i) + s << " " << params.GPUIndices[i]; + std::string ss = s.str(); + ASTRA_DEBUG(ss.c_str()); + ASTRA_DEBUG("Memory: %llu", params.memory); +} + + } #endif -- cgit v1.2.3 From 76d182ace088b7ab6d64019079c049a0f9a29e69 Mon Sep 17 00:00:00 2001 From: Willem Jan Palenstijn Date: Wed, 6 Jan 2016 16:40:26 +0100 Subject: Change python set_gpu_index to match --- python/astra/astra.py | 4 ++-- python/astra/astra_c.pyx | 21 ++++++++++++++++++--- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/python/astra/astra.py b/python/astra/astra.py index 26b1ff0..9328b6b 100644 --- a/python/astra/astra.py +++ b/python/astra/astra.py @@ -49,10 +49,10 @@ def version(printToScreen=False): """ return a.version(printToScreen) -def set_gpu_index(idx): +def set_gpu_index(idx, memory=0): """Set default GPU index to use. :param idx: GPU index :type idx: :class:`int` """ - a.set_gpu_index(idx) + a.set_gpu_index(idx, memory) diff --git a/python/astra/astra_c.pyx b/python/astra/astra_c.pyx index 6b246b6..2a9c816 100644 --- a/python/astra/astra_c.pyx +++ b/python/astra/astra_c.pyx @@ -31,6 +31,7 @@ import six from .utils import wrap_from_bytes from libcpp.string cimport string +from libcpp.vector cimport vector from libcpp cimport bool cdef extern from "astra/Globals.h" namespace "astra": int getVersion() @@ -43,6 +44,12 @@ IF HAVE_CUDA==True: ELSE: def setGPUIndex(): pass +cdef extern from "astra/CompositeGeometryManager.h" namespace "astra": + cdef cppclass SGPUParams: + vector[int] GPUIndices + size_t memory +cdef extern from "astra/CompositeGeometryManager.h" namespace "astra::CCompositeGeometryManager": + void setGlobalGPUParams(SGPUParams&) def credits(): six.print_("""The ASTRA Toolbox has been developed at the University of Antwerp and CWI, Amsterdam by @@ -70,8 +77,16 @@ def version(printToScreen=False): else: return getVersion() -def set_gpu_index(idx): +def set_gpu_index(idx, memory=0): + import types + import collections + cdef SGPUParams params if use_cuda()==True: - ret = setGPUIndex(idx) + if not isinstance(idx, collections.Iterable) or isinstance(idx, types.StringTypes): + idx = (idx,) + params.memory = memory + params.GPUIndices = idx + setGlobalGPUParams(params) + ret = setGPUIndex(params.GPUIndices[0]) if not ret: - six.print_("Failed to set GPU " + str(idx)) + six.print_("Failed to set GPU " + str(params.GPUIndices[0])) -- cgit v1.2.3 From 5bc9c23c84a3e7f079b75d0bee50c570a7372920 Mon Sep 17 00:00:00 2001 From: Willem Jan Palenstijn Date: Wed, 6 Jan 2016 16:49:56 +0100 Subject: Allow multiple GPUs in ASTRA_GPU_INDEX envvar --- python/astra/__init__.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/python/astra/__init__.py b/python/astra/__init__.py index 10ed74d..515d9a2 100644 --- a/python/astra/__init__.py +++ b/python/astra/__init__.py @@ -39,7 +39,7 @@ from . import log from .optomo import OpTomo import os -try: - astra.set_gpu_index(int(os.environ['ASTRA_GPU_INDEX'])) -except KeyError: - pass + +if 'ASTRA_GPU_INDEX' in os.environ: + L = [ int(x) for x in os.environ['ASTRA_GPU_INDEX'].split(',') ] + astra.set_gpu_index(L) -- cgit v1.2.3 From f2227eaca7248b6b01a5776f0cb750cff4c0279a Mon Sep 17 00:00:00 2001 From: Willem Jan Palenstijn Date: Thu, 7 Jan 2016 13:29:37 +0100 Subject: Rename sample with conflicting filename --- samples/python/s018_experimental_multires.py | 84 ---------------------------- samples/python/s019_experimental_multires.py | 84 ++++++++++++++++++++++++++++ 2 files changed, 84 insertions(+), 84 deletions(-) delete mode 100644 samples/python/s018_experimental_multires.py create mode 100644 samples/python/s019_experimental_multires.py diff --git a/samples/python/s018_experimental_multires.py b/samples/python/s018_experimental_multires.py deleted file mode 100644 index cf38e53..0000000 --- a/samples/python/s018_experimental_multires.py +++ /dev/null @@ -1,84 +0,0 @@ -#----------------------------------------------------------------------- -#Copyright 2013 Centrum Wiskunde & Informatica, Amsterdam -# -#Author: Daniel M. Pelt -#Contact: D.M.Pelt@cwi.nl -#Website: http://dmpelt.github.io/pyastratoolbox/ -# -# -#This file is part of the Python interface to the -#All Scale Tomographic Reconstruction Antwerp Toolbox ("ASTRA Toolbox"). -# -#The Python interface to the ASTRA Toolbox is free software: you can redistribute it and/or modify -#it under the terms of the GNU General Public License as published by -#the Free Software Foundation, either version 3 of the License, or -#(at your option) any later version. -# -#The Python interface to the ASTRA Toolbox is distributed in the hope that it will be useful, -#but WITHOUT ANY WARRANTY; without even the implied warranty of -#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -#GNU General Public License for more details. -# -#You should have received a copy of the GNU General Public License -#along with the Python interface to the ASTRA Toolbox. If not, see . -# -#----------------------------------------------------------------------- - -import astra -import numpy as np -from astra.experimental import do_composite_FP - -astra.log.setOutputScreen(astra.log.STDERR, astra.log.DEBUG) - -# low res part (voxels of 4x4x4) -vol_geom1 = astra.create_vol_geom(32, 16, 32, -64, 0, -64, 64, -64, 64) - -# high res part (voxels of 1x1x1) -vol_geom2 = astra.create_vol_geom(128, 64, 128, 0, 64, -64, 64, -64, 64) - - -# Split the output in two parts as well, for demonstration purposes -angles1 = np.linspace(0, np.pi/2, 90, False) -angles2 = np.linspace(np.pi/2, np.pi, 90, False) -proj_geom1 = astra.create_proj_geom('parallel3d', 1.0, 1.0, 128, 192, angles1) -proj_geom2 = astra.create_proj_geom('parallel3d', 1.0, 1.0, 128, 192, angles2) - -# Create a simple hollow cube phantom -cube1 = np.zeros((32,32,16)) -cube1[4:28,4:28,4:16] = 1 - -cube2 = np.zeros((128,128,64)) -cube2[16:112,16:112,0:112] = 1 -cube2[33:97,33:97,4:28] = 0 - -vol1 = astra.data3d.create('-vol', vol_geom1, cube1) -vol2 = astra.data3d.create('-vol', vol_geom2, cube2) - -proj1 = astra.data3d.create('-proj3d', proj_geom1, 0) -proj2 = astra.data3d.create('-proj3d', proj_geom2, 0) - -# The actual geometries don't matter for this composite FP/BP case -projector = astra.create_projector('cuda3d', proj_geom1, vol_geom1) - -do_composite_FP(projector, [vol1, vol2], [proj1, proj2]) - -proj_data1 = astra.data3d.get(proj1) -proj_data2 = astra.data3d.get(proj2) - -# Display a single projection image -import pylab -pylab.gray() -pylab.figure(1) -pylab.imshow(proj_data1[:,0,:]) -pylab.figure(2) -pylab.imshow(proj_data2[:,0,:]) -pylab.show() - - -# Clean up. Note that GPU memory is tied up in the algorithm object, -# and main RAM in the data objects. -astra.data3d.delete(vol1) -astra.data3d.delete(vol2) -astra.data3d.delete(proj1) -astra.data3d.delete(proj2) -astra.projector3d.delete(projector) diff --git a/samples/python/s019_experimental_multires.py b/samples/python/s019_experimental_multires.py new file mode 100644 index 0000000..cf38e53 --- /dev/null +++ b/samples/python/s019_experimental_multires.py @@ -0,0 +1,84 @@ +#----------------------------------------------------------------------- +#Copyright 2013 Centrum Wiskunde & Informatica, Amsterdam +# +#Author: Daniel M. Pelt +#Contact: D.M.Pelt@cwi.nl +#Website: http://dmpelt.github.io/pyastratoolbox/ +# +# +#This file is part of the Python interface to the +#All Scale Tomographic Reconstruction Antwerp Toolbox ("ASTRA Toolbox"). +# +#The Python interface to the ASTRA Toolbox is free software: you can redistribute it and/or modify +#it under the terms of the GNU General Public License as published by +#the Free Software Foundation, either version 3 of the License, or +#(at your option) any later version. +# +#The Python interface to the ASTRA Toolbox is distributed in the hope that it will be useful, +#but WITHOUT ANY WARRANTY; without even the implied warranty of +#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +#GNU General Public License for more details. +# +#You should have received a copy of the GNU General Public License +#along with the Python interface to the ASTRA Toolbox. If not, see . +# +#----------------------------------------------------------------------- + +import astra +import numpy as np +from astra.experimental import do_composite_FP + +astra.log.setOutputScreen(astra.log.STDERR, astra.log.DEBUG) + +# low res part (voxels of 4x4x4) +vol_geom1 = astra.create_vol_geom(32, 16, 32, -64, 0, -64, 64, -64, 64) + +# high res part (voxels of 1x1x1) +vol_geom2 = astra.create_vol_geom(128, 64, 128, 0, 64, -64, 64, -64, 64) + + +# Split the output in two parts as well, for demonstration purposes +angles1 = np.linspace(0, np.pi/2, 90, False) +angles2 = np.linspace(np.pi/2, np.pi, 90, False) +proj_geom1 = astra.create_proj_geom('parallel3d', 1.0, 1.0, 128, 192, angles1) +proj_geom2 = astra.create_proj_geom('parallel3d', 1.0, 1.0, 128, 192, angles2) + +# Create a simple hollow cube phantom +cube1 = np.zeros((32,32,16)) +cube1[4:28,4:28,4:16] = 1 + +cube2 = np.zeros((128,128,64)) +cube2[16:112,16:112,0:112] = 1 +cube2[33:97,33:97,4:28] = 0 + +vol1 = astra.data3d.create('-vol', vol_geom1, cube1) +vol2 = astra.data3d.create('-vol', vol_geom2, cube2) + +proj1 = astra.data3d.create('-proj3d', proj_geom1, 0) +proj2 = astra.data3d.create('-proj3d', proj_geom2, 0) + +# The actual geometries don't matter for this composite FP/BP case +projector = astra.create_projector('cuda3d', proj_geom1, vol_geom1) + +do_composite_FP(projector, [vol1, vol2], [proj1, proj2]) + +proj_data1 = astra.data3d.get(proj1) +proj_data2 = astra.data3d.get(proj2) + +# Display a single projection image +import pylab +pylab.gray() +pylab.figure(1) +pylab.imshow(proj_data1[:,0,:]) +pylab.figure(2) +pylab.imshow(proj_data2[:,0,:]) +pylab.show() + + +# Clean up. Note that GPU memory is tied up in the algorithm object, +# and main RAM in the data objects. +astra.data3d.delete(vol1) +astra.data3d.delete(vol2) +astra.data3d.delete(proj1) +astra.data3d.delete(proj2) +astra.projector3d.delete(projector) -- cgit v1.2.3 From b529ff854f0e1191108e31d6be294d31b50c666e Mon Sep 17 00:00:00 2001 From: Willem Jan Palenstijn Date: Thu, 7 Jan 2016 13:36:02 +0100 Subject: Add multi-GPU sample --- samples/matlab/s020_3d_multiGPU.m | 38 +++++++++++++++++++++++++ samples/python/s020_3d_multiGPU.py | 57 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 95 insertions(+) create mode 100644 samples/matlab/s020_3d_multiGPU.m create mode 100644 samples/python/s020_3d_multiGPU.py diff --git a/samples/matlab/s020_3d_multiGPU.m b/samples/matlab/s020_3d_multiGPU.m new file mode 100644 index 0000000..bade325 --- /dev/null +++ b/samples/matlab/s020_3d_multiGPU.m @@ -0,0 +1,38 @@ +% ----------------------------------------------------------------------- +% This file is part of the ASTRA Toolbox +% +% Copyright: 2010-2015, iMinds-Vision Lab, University of Antwerp +% 2014-2015, CWI, Amsterdam +% License: Open Source under GPLv3 +% Contact: astra@uantwerpen.be +% Website: http://sf.net/projects/astra-toolbox +% ----------------------------------------------------------------------- + + +% Set up multi-GPU usage. +% This only works for 3D GPU forward projection and back projection. +astra_mex('set_gpu_index', [0 1]); + +% Optionally, you can also restrict the amount of GPU memory ASTRA will use. +% The line commented below sets this to 1GB. +%astra_mex('set_gpu_index', [0 1], 'memory', 1024*1024*1024); + +vol_geom = astra_create_vol_geom(1024, 1024, 1024); + +angles = linspace2(0, pi, 1024); +proj_geom = astra_create_proj_geom('parallel3d', 1.0, 1.0, 1024, 1024, angles); + +% Create a simple hollow cube phantom +cube = zeros(1024,1024,1024); +cube(129:896,129:896,129:896) = 1; +cube(257:768,257:768,257:768) = 0; + +% Create projection data from this +[proj_id, proj_data] = astra_create_sino3d_cuda(cube, proj_geom, vol_geom); + +% Backproject projection data +[bproj_id, bproj_data] = astra_create_backprojection3d_cuda(proj_data, proj_geom, vol_geom); + +astra_mex_data3d('delete', proj_id); +astra_mex_data3d('delete', bproj_id); + diff --git a/samples/python/s020_3d_multiGPU.py b/samples/python/s020_3d_multiGPU.py new file mode 100644 index 0000000..d6799c4 --- /dev/null +++ b/samples/python/s020_3d_multiGPU.py @@ -0,0 +1,57 @@ +#----------------------------------------------------------------------- +#Copyright 2013 Centrum Wiskunde & Informatica, Amsterdam +# +#Author: Daniel M. Pelt +#Contact: D.M.Pelt@cwi.nl +#Website: http://dmpelt.github.io/pyastratoolbox/ +# +# +#This file is part of the Python interface to the +#All Scale Tomographic Reconstruction Antwerp Toolbox ("ASTRA Toolbox"). +# +#The Python interface to the ASTRA Toolbox is free software: you can redistribute it and/or modify +#it under the terms of the GNU General Public License as published by +#the Free Software Foundation, either version 3 of the License, or +#(at your option) any later version. +# +#The Python interface to the ASTRA Toolbox is distributed in the hope that it will be useful, +#but WITHOUT ANY WARRANTY; without even the implied warranty of +#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +#GNU General Public License for more details. +# +#You should have received a copy of the GNU General Public License +#along with the Python interface to the ASTRA Toolbox. If not, see . +# +#----------------------------------------------------------------------- + +import astra +import numpy as np + +# Set up multi-GPU usage. +# This only works for 3D GPU forward projection and back projection. +astra.astra.set_gpu_index([0,1]) + +# Optionally, you can also restrict the amount of GPU memory ASTRA will use. +# The line commented below sets this to 1GB. +#astra.astra.set_gpu_index([0,1], memory=1024*1024*1024) + +vol_geom = astra.create_vol_geom(1024, 1024, 1024) + +angles = np.linspace(0, np.pi, 1024,False) +proj_geom = astra.create_proj_geom('parallel3d', 1.0, 1.0, 1024, 1024, angles) + +# Create a simple hollow cube phantom +cube = np.zeros((1024,1024,1024)) +cube[128:895,128:895,128:895] = 1 +cube[256:767,256:767,256:767] = 0 + +# Create projection data from this +proj_id, proj_data = astra.create_sino3d_gpu(cube, proj_geom, vol_geom) + +# Backproject projection data +bproj_id, bproj_data = astra.create_backprojection3d_gpu(proj_data, proj_geom, vol_geom) + +# Clean up. Note that GPU memory is tied up in the algorithm object, +# and main RAM in the data objects. +astra.data3d.delete(proj_id) +astra.data3d.delete(bproj_id) -- cgit v1.2.3