diff options
author | Willem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl> | 2014-10-30 17:41:58 +0100 |
---|---|---|
committer | Willem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl> | 2015-01-19 11:18:24 +0100 |
commit | 50598bfd0c4959da66780fe695755e2be1a28b87 (patch) | |
tree | a12ab13e2005478fb01804525b1124de7641cab7 /matlab/mex/mexCopyDataHelpFunctions.cpp | |
parent | ed56403b30389501d1df7fbbeb763abbe7b09654 (diff) | |
download | astra-50598bfd0c4959da66780fe695755e2be1a28b87.tar.gz astra-50598bfd0c4959da66780fe695755e2be1a28b87.tar.bz2 astra-50598bfd0c4959da66780fe695755e2be1a28b87.tar.xz astra-50598bfd0c4959da66780fe695755e2be1a28b87.zip |
Refactor astra_mex_data3d
(Modified) patch by Nicola ViganĂ²
Diffstat (limited to 'matlab/mex/mexCopyDataHelpFunctions.cpp')
-rw-r--r-- | matlab/mex/mexCopyDataHelpFunctions.cpp | 257 |
1 files changed, 257 insertions, 0 deletions
diff --git a/matlab/mex/mexCopyDataHelpFunctions.cpp b/matlab/mex/mexCopyDataHelpFunctions.cpp new file mode 100644 index 0000000..bbcad30 --- /dev/null +++ b/matlab/mex/mexCopyDataHelpFunctions.cpp @@ -0,0 +1,257 @@ +/* +----------------------------------------------------------------------- +Copyright 2013 iMinds-Vision Lab, University of Antwerp + +Contact: astra@ua.ac.be +Website: http://astra.ua.ac.be + + +This file is part of the +All Scale Tomographic Reconstruction Antwerp Toolbox ("ASTRA Toolbox"). + +The ASTRA Toolbox is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +The ASTRA Toolbox is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with the ASTRA Toolbox. If not, see <http://www.gnu.org/licenses/>. + +----------------------------------------------------------------------- +$Id$ +*/ + +#include "mexCopyDataHelpFunctions.h" + +#include "mexHelpFunctions.h" + +#define HAVE_OMP + +#define ROUND_DOWN(x, s) ((x) & ~((s)-1)) + +#ifdef HAVE_OMP +# include <omp.h> +#endif + +#if defined(__SSE2__) +# include <emmintrin.h> + +# define STORE_32F_64F_CORE8(in, out, count, base) \ + {\ + const __m128 inV0 = *((const __m128 *) &in[count + 0 + base]);\ + const __m128 inV1 = *((const __m128 *) &in[count + 4 + base]);\ +\ + *((__m128d *)&out[count + 0 + base]) = _mm_cvtps_pd(inV0);\ + *((__m128d *)&out[count + 2 + base]) = _mm_cvtps_pd(_mm_movehl_ps(inV0, inV0));\ +\ + *((__m128d *)&out[count + 4 + base]) = _mm_cvtps_pd(inV1);\ + *((__m128d *)&out[count + 6 + base]) = _mm_cvtps_pd(_mm_movehl_ps(inV1, inV1));\ + } +# define STORE_64F_32F_CORE8(in, out, count, base) \ + {\ + const __m128d inV0 = *((const __m128d *) &in[count + 0 + base]);\ + const __m128d inV1 = *((const __m128d *) &in[count + 2 + base]);\ +\ + const __m128d inV2 = *((const __m128d *) &in[count + 4 + base]);\ + const __m128d inV3 = *((const __m128d *) &in[count + 6 + base]);\ +\ + *((__m128 *)&out[count + 0 + base]) = _mm_movelh_ps(_mm_cvtpd_ps(inV0), _mm_cvtpd_ps(inV1));\ + *((__m128 *)&out[count + 4 + base]) = _mm_movelh_ps(_mm_cvtpd_ps(inV2), _mm_cvtpd_ps(inV3));\ + } +# define STORE_32F_32F_CORE8(in, out, count, base) \ + {\ + *((__m128 *)&out[count + 0 + base]) = *((const __m128 *)&in[count + 0 + base]);\ + *((__m128 *)&out[count + 4 + base]) = *((const __m128 *)&in[count + 4 + base]);\ + } +# define STORE_CONST_32F_CORE8(val, out, count, base) \ + {\ + *((__m128 *)&out[count + 0 + base]) = val;\ + *((__m128 *)&out[count + 4 + base]) = val;\ + } +#else +# define STORE_32F_64F_CORE8(in, out, count, base) \ + {\ + out[count + 0 + base] = (double)in[count + 0 + base];\ + out[count + 1 + base] = (double)in[count + 1 + base];\ + out[count + 2 + base] = (double)in[count + 2 + base];\ + out[count + 3 + base] = (double)in[count + 3 + base];\ + out[count + 4 + base] = (double)in[count + 4 + base];\ + out[count + 5 + base] = (double)in[count + 5 + base];\ + out[count + 6 + base] = (double)in[count + 6 + base];\ + out[count + 7 + base] = (double)in[count + 7 + base];\ + } +# define STORE_64F_32F_CORE8(in, out, count, base) \ + {\ + out[count + 0 + base] = (float)in[count + 0 + base];\ + out[count + 1 + base] = (float)in[count + 1 + base];\ + out[count + 2 + base] = (float)in[count + 2 + base];\ + out[count + 3 + base] = (float)in[count + 3 + base];\ + out[count + 4 + base] = (float)in[count + 4 + base];\ + out[count + 5 + base] = (float)in[count + 5 + base];\ + out[count + 6 + base] = (float)in[count + 6 + base];\ + out[count + 7 + base] = (float)in[count + 7 + base];\ + } +# define STORE_32F_32F_CORE8(in, out, count, base) \ + {\ + out[count + 0 + base] = in[count + 0 + base];\ + out[count + 1 + base] = in[count + 1 + base];\ + out[count + 2 + base] = in[count + 2 + base];\ + out[count + 3 + base] = in[count + 3 + base];\ + out[count + 4 + base] = in[count + 4 + base];\ + out[count + 5 + base] = in[count + 5 + base];\ + out[count + 6 + base] = in[count + 6 + base];\ + out[count + 7 + base] = in[count + 7 + base];\ + } +#endif + +const char * warnDataTypeNotSupported = "Data type not supported: nothing was copied"; + +void +_copyMexToCFloat32Array(const mxArray * const inArray, astra::float32 * const out) +{ + const long long tot_size = mxGetNumberOfElements(inArray); + const long long block = 32; + const long long totRoundedPixels = ROUND_DOWN(tot_size, block); + + if (mxIsDouble(inArray)) { + const double * const pdMatlabData = mxGetPr(inArray); + +#pragma omp for nowait + for (long long count = 0; count < totRoundedPixels; count += block) { + STORE_64F_32F_CORE8(pdMatlabData, out, count, 0); + STORE_64F_32F_CORE8(pdMatlabData, out, count, 8); + STORE_64F_32F_CORE8(pdMatlabData, out, count, 16); + STORE_64F_32F_CORE8(pdMatlabData, out, count, 24); + } +#pragma omp for nowait + for (long long count = totRoundedPixels; count < tot_size; count++) { + out[count] = pdMatlabData[count]; + } + } + else if (mxIsSingle(inArray)) { + const float * const pfMatlabData = (const float *)mxGetData(inArray); + +#pragma omp for nowait + for (long long count = 0; count < totRoundedPixels; count += block) { + STORE_32F_32F_CORE8(pfMatlabData, out, count, 0); + STORE_32F_32F_CORE8(pfMatlabData, out, count, 8); + STORE_32F_32F_CORE8(pfMatlabData, out, count, 16); + STORE_32F_32F_CORE8(pfMatlabData, out, count, 24); + } +#pragma omp for nowait + for (long long count = totRoundedPixels; count < tot_size; count++) { + out[count] = pfMatlabData[count]; + } + } + else { +#pragma omp single nowait + mexWarnMsgIdAndTxt("ASTRA_MEX:wrong_datatype", warnDataTypeNotSupported); + } +} + +void +_copyCFloat32ArrayToMex(const astra::float32 * const in, mxArray * const outArray) +{ + const long long tot_size = mxGetNumberOfElements(outArray); + const long long block = 32; + const long long tot_rounded_size = ROUND_DOWN(tot_size, block); + + if (mxIsDouble(outArray)) { + double * const pdMatlabData = mxGetPr(outArray); + +#pragma omp for nowait + for (long long count = 0; count < tot_rounded_size; count += block) { + STORE_32F_64F_CORE8(in, pdMatlabData, count, 0); + STORE_32F_64F_CORE8(in, pdMatlabData, count, 8); + STORE_32F_64F_CORE8(in, pdMatlabData, count, 16); + STORE_32F_64F_CORE8(in, pdMatlabData, count, 24); + } +#pragma omp for nowait + for (long long count = tot_rounded_size; count < tot_size; count++) { + pdMatlabData[count] = in[count]; + } + } + else if (mxIsSingle(outArray)) { + float * const pfMatlabData = (float *) mxGetData(outArray); + +#pragma omp for nowait + for (long long count = 0; count < tot_rounded_size; count += block) { + STORE_32F_32F_CORE8(in, pfMatlabData, count, 0); + STORE_32F_32F_CORE8(in, pfMatlabData, count, 8); + STORE_32F_32F_CORE8(in, pfMatlabData, count, 16); + STORE_32F_32F_CORE8(in, pfMatlabData, count, 24); + } +#pragma omp for nowait + for (long long count = tot_rounded_size; count < tot_size; count++) { + pfMatlabData[count] = in[count]; + } + } + else { +#pragma omp single nowait + mexWarnMsgIdAndTxt("ASTRA_MEX:wrong_datatype", warnDataTypeNotSupported); + } +} + +void +_initializeCFloat32Array(const astra::float32 & val, astra::float32 * const out, + const size_t & tot_size) +{ +#ifdef __SSE2__ + const long long block = 32; + const long long tot_rounded_size = ROUND_DOWN(tot_size, block); + + const __m128 vecVal = _mm_set1_ps(val); + + { +#pragma omp for nowait + for (long long count = 0; count < tot_rounded_size; count += block) { + STORE_CONST_32F_CORE8(vecVal, out, count, 0); + STORE_CONST_32F_CORE8(vecVal, out, count, 8); + STORE_CONST_32F_CORE8(vecVal, out, count, 16); + STORE_CONST_32F_CORE8(vecVal, out, count, 24); + } +#else + const long long tot_rounded_size = 0; + { +#endif +#pragma omp for nowait + for (long long count = tot_rounded_size; count < (long long)tot_size; count++) { + out[count] = val; + } + } +} + +void +copyMexToCFloat32Array(const mxArray * const in, + astra::float32 * const out, const size_t &tot_size) +{ +#pragma omp parallel + { + // fill with scalar value + if (mex_is_scalar(in)) { + astra::float32 fValue = 0.f; + if (!mxIsEmpty(in)) { + fValue = (astra::float32)mxGetScalar(in); + } + _initializeCFloat32Array(fValue, out, tot_size); + } + // fill with array value + else { + _copyMexToCFloat32Array(in, out); + } + } +} + +void +copyCFloat32ArrayToMex(const float * const in, mxArray * const outArray) +{ +#pragma omp parallel + { + _copyCFloat32ArrayToMex(in, outArray); + } +} |