summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEdoardo Pasca <edo.paskino@gmail.com>2020-01-24 13:59:09 +0000
committerGitHub <noreply@github.com>2020-01-24 13:59:09 +0000
commit6e60b7802bb0369cc9dd8b1715073a1ff3c18f03 (patch)
treed57ae73a28e56d59461d1fa62284d3d94e630b90
parent894f35c9be404bc2c13f90f4a6184a545029181a (diff)
downloadframework-6e60b7802bb0369cc9dd8b1715073a1ff3c18f03.tar.gz
framework-6e60b7802bb0369cc9dd8b1715073a1ff3c18f03.tar.bz2
framework-6e60b7802bb0369cc9dd8b1715073a1ff3c18f03.tar.xz
framework-6e60b7802bb0369cc9dd8b1715073a1ff3c18f03.zip
axpby as concrete method in DataContainer and BlockDataContainer (#489)
* axpby as concrete method in DataContainer and BlockDataContainer * fixed axpby and added unittest * PDHG to use axpby * pass num_threads to axpby * void commit * add seed to random in test * NUM_THREADS can be imported from ccpi.utilities * added test to axpby with num_threads
-rw-r--r--Wrappers/Python/ccpi/framework/BlockDataContainer.py47
-rw-r--r--Wrappers/Python/ccpi/framework/framework.py31
-rw-r--r--Wrappers/Python/ccpi/optimisation/algorithms/PDHG.py16
-rw-r--r--Wrappers/Python/ccpi/optimisation/operators/GradientOperator.py7
-rw-r--r--Wrappers/Python/ccpi/utilities/__init__.py22
-rwxr-xr-xWrappers/Python/test/test_BlockDataContainer.py155
-rwxr-xr-xWrappers/Python/test/test_DataContainer.py15
-rwxr-xr-xWrappers/Python/test/test_Gradient.py4
-rw-r--r--src/Core/CMakeLists.txt5
-rw-r--r--src/Core/FiniteDifferenceLibrary.c11
-rwxr-xr-xsrc/Core/axpby.c84
-rw-r--r--src/Core/include/FiniteDifferenceLibrary.h3
-rw-r--r--src/Core/include/axpby.h13
-rw-r--r--src/Core/include/utilities.h3
-rw-r--r--src/Core/utilities.c14
15 files changed, 300 insertions, 130 deletions
diff --git a/Wrappers/Python/ccpi/framework/BlockDataContainer.py b/Wrappers/Python/ccpi/framework/BlockDataContainer.py
index 22cee03..a0d139b 100644
--- a/Wrappers/Python/ccpi/framework/BlockDataContainer.py
+++ b/Wrappers/Python/ccpi/framework/BlockDataContainer.py
@@ -23,6 +23,7 @@ import numpy
from numbers import Number
import functools
from ccpi.framework import DataContainer
+from ccpi.utilities import NUM_THREADS
#from ccpi.framework import AcquisitionData, ImageData
#from ccpi.optimisation.operators import Operator, LinearOperator
@@ -50,11 +51,12 @@ class BlockDataContainer(object):
A * 3 = [ 3 * [B,C] , 3* D] = [ [ 3*B, 3*C] , 3*D ]
'''
- ADD = 'add'
+ ADD = 'add'
SUBTRACT = 'subtract'
MULTIPLY = 'multiply'
- DIVIDE = 'divide'
- POWER = 'power'
+ DIVIDE = 'divide'
+ POWER = 'power'
+ AXPBY = 'axpby'
__array_priority__ = 1
__container_priority__ = 2
def __init__(self, *args, **kwargs):
@@ -173,7 +175,22 @@ class BlockDataContainer(object):
self.binary_operations(BlockDataContainer.DIVIDE, other, *args, **kwargs)
else:
return self.binary_operations(BlockDataContainer.DIVIDE, other, *args, **kwargs)
-
+ def axpby(self, a, b, y, out, dtype=numpy.float32, num_threads = NUM_THREADS):
+ r'''performs axpby element-wise on the BlockDataContainer containers
+
+ Does the operation .. math:: a*x+b*y and stores the result in out, where x is self
+
+ :param a: scalar
+ :param b: scalar
+ :param y: compatible (Block)DataContainer
+ :param out: (Block)DataContainer to store the result
+ :param dtype: optional, data type of the DataContainers
+ '''
+ if out is None:
+ raise ValueError("out container cannot be None")
+ kwargs = {'a':a, 'b':b, 'out':out, 'dtype': dtype, 'num_threads': NUM_THREADS}
+ self.binary_operations(BlockDataContainer.AXPBY, y, **kwargs)
+
def binary_operations(self, operation, other, *args, **kwargs):
'''Algebra: generic method of algebric operation with BlockDataContainer with number/DataContainer or BlockDataContainer
@@ -234,11 +251,19 @@ class BlockDataContainer(object):
op = el.divide
elif operation == BlockDataContainer.POWER:
op = el.power
+ elif operation == BlockDataContainer.AXPBY:
+ if not isinstance(other, BlockDataContainer):
+ raise ValueError("{} cannot handle {}".format(operation, type(other)))
+ op = el.axpby
else:
raise ValueError('Unsupported operation', operation)
if out is not None:
kw['out'] = out.get_item(i)
- op(ot, *args, **kw)
+ if operation == BlockDataContainer.AXPBY:
+ kw['y'] = ot
+ el.axpby(kw['a'], kw['b'], kw['y'], kw['out'], kw['dtype'], kw['num_threads'])
+ else:
+ op(ot, *args, **kw)
else:
res.append(op(ot, *args, **kw))
if out is not None:
@@ -249,6 +274,12 @@ class BlockDataContainer(object):
else:
# try to do algebra with one DataContainer. Will raise error if not compatible
kw = kwargs.copy()
+ if operation != BlockDataContainer.AXPBY:
+ # remove keyworded argument related to AXPBY
+ for k in ['a','b','y', 'num_threads', 'dtype']:
+ if k in kw.keys():
+ kw.pop(k)
+
res = []
for i,el in enumerate(self.containers):
if operation == BlockDataContainer.ADD:
@@ -261,6 +292,12 @@ class BlockDataContainer(object):
op = el.divide
elif operation == BlockDataContainer.POWER:
op = el.power
+ elif operation == BlockDataContainer.AXPBY:
+ # As out cannot be None, it is safe to continue the
+ # for loop after the call to axpby
+ kw['out'] = out.get_item(i)
+ el.axpby(kw['a'], kw['b'], other, kw['out'], kw['dtype'], kw['num_threads'])
+ continue
else:
raise ValueError('Unsupported operation', operation)
if out is not None:
diff --git a/Wrappers/Python/ccpi/framework/framework.py b/Wrappers/Python/ccpi/framework/framework.py
index 65121d2..6f1ed1c 100644
--- a/Wrappers/Python/ccpi/framework/framework.py
+++ b/Wrappers/Python/ccpi/framework/framework.py
@@ -26,6 +26,7 @@ import warnings
from functools import reduce
from numbers import Number
import ctypes, platform
+from ccpi.utilities import NUM_THREADS
# dll = os.path.abspath(os.path.join(
# os.path.abspath(os.path.dirname(__file__)),
@@ -45,6 +46,11 @@ else:
#print ("dll location", dll)
cilacc = ctypes.cdll.LoadLibrary(dll)
+#default nThreads
+# import multiprocessing
+# cpus = multiprocessing.cpu_count()
+# NUM_THREADS = max(int(cpus/2),1)
+
def find_key(dic, val):
"""return the key of dictionary dic given the value"""
@@ -828,24 +834,27 @@ class DataContainer(object):
def minimum(self,x2, out=None, *args, **kwargs):
return self.pixel_wise_binary(numpy.minimum, x2=x2, out=out, *args, **kwargs)
- @staticmethod
- def axpby(a,x,b,y,out,dtype=numpy.float32):
+ def axpby(self, a, b, y, out, dtype=numpy.float32, num_threads=NUM_THREADS):
'''performs axpby with cilacc C library
- Does the operation .. math:: a*x+b*y and stores the result in out
+ Does the operation .. math:: a*x+b*y and stores the result in out, where x is self
:param a: scalar
- :param x: DataContainer
+ :type a: float
:param b: scalar
+ :type b: float
:param y: DataContainer
- :param out: DataContainer to store the result
- :param dtype: optional, data type of the DataContainers
+ :param out: DataContainer instance to store the result
+ :param dtype: data type of the DataContainers
+ :type dtype: numpy type, optional, default numpy.float32
+ :param num_threads: number of threads to run on
+ :type num_threads: int, optional, default 1/2 CPU of the system
'''
c_float_p = ctypes.POINTER(ctypes.c_float)
c_double_p = ctypes.POINTER(ctypes.c_double)
# get the reference to the data
- ndx = x.as_array()
+ ndx = self.as_array()
ndy = y.as_array()
ndout = out.as_array()
@@ -879,15 +888,17 @@ class DataContainer(object):
ctypes.POINTER(ctypes.c_float), # pointer to the third array
ctypes.c_float, # type of A (float)
ctypes.c_float, # type of B (float)
- ctypes.c_long] # type of size of first array
+ ctypes.c_long, # type of size of first array
+ ctypes.c_int] # number of threads
cilacc.daxpby.argtypes = [ctypes.POINTER(ctypes.c_double), # pointer to the first array
ctypes.POINTER(ctypes.c_double), # pointer to the second array
ctypes.POINTER(ctypes.c_double), # pointer to the third array
ctypes.c_double, # type of A (c_double)
ctypes.c_double, # type of B (c_double)
- ctypes.c_long] # type of size of first array
+ ctypes.c_long, # type of size of first array
+ ctypes.c_int] # number of threads
- if f(x_p, y_p, out_p, a, b, ndx.size) != 0:
+ if f(x_p, y_p, out_p, a, b, ndx.size, num_threads) != 0:
raise RuntimeError('axpby execution failed')
diff --git a/Wrappers/Python/ccpi/optimisation/algorithms/PDHG.py b/Wrappers/Python/ccpi/optimisation/algorithms/PDHG.py
index cc384e3..dcb9298 100644
--- a/Wrappers/Python/ccpi/optimisation/algorithms/PDHG.py
+++ b/Wrappers/Python/ccpi/optimisation/algorithms/PDHG.py
@@ -130,24 +130,26 @@ class PDHG(Algorithm):
# Gradient ascent for the dual variable
self.operator.direct(self.xbar, out=self.y_tmp)
- self.y_tmp *= self.sigma
- self.y_tmp += self.y_old
+ # self.y_tmp *= self.sigma
+ # self.y_tmp += self.y_old
+ self.y_tmp.axpby(self.sigma, 1 , self.y_old, self.y_tmp)
# self.y = self.f.proximal_conjugate(self.y_old, self.sigma)
self.f.proximal_conjugate(self.y_tmp, self.sigma, out=self.y)
# Gradient descent for the primal variable
self.operator.adjoint(self.y, out=self.x_tmp)
- self.x_tmp *= -1*self.tau
- self.x_tmp += self.x_old
+ # self.x_tmp *= -1*self.tau
+ # self.x_tmp += self.x_old
+ self.x_tmp.axpby(-self.tau, 1. , self.x_old, self.x_tmp)
self.g.proximal(self.x_tmp, self.tau, out=self.x)
# Update
self.x.subtract(self.x_old, out=self.xbar)
- self.xbar *= self.theta
- self.xbar += self.x
-
+ # self.xbar *= self.theta
+ # self.xbar += self.x
+ self.xbar.axpby(self.theta, 1 , self.x, self.xbar)
def update_objective(self):
diff --git a/Wrappers/Python/ccpi/optimisation/operators/GradientOperator.py b/Wrappers/Python/ccpi/optimisation/operators/GradientOperator.py
index a45c3d2..a5feca3 100644
--- a/Wrappers/Python/ccpi/optimisation/operators/GradientOperator.py
+++ b/Wrappers/Python/ccpi/optimisation/operators/GradientOperator.py
@@ -22,13 +22,14 @@ from __future__ import print_function
from ccpi.optimisation.operators import Operator, LinearOperator, ScaledOperator
from ccpi.optimisation.operators import FiniteDiff, SparseFiniteDiff
from ccpi.framework import ImageData, ImageGeometry, BlockGeometry, BlockDataContainer
+from ccpi.utilities import NUM_THREADS
import numpy
import warnings
#default nThreads
-import multiprocessing
-cpus = multiprocessing.cpu_count()
-NUM_THREADS = max(int(cpus/2),1)
+# import multiprocessing
+# cpus = multiprocessing.cpu_count()
+# NUM_THREADS = max(int(cpus/2),1)
NEUMANN = 'Neumann'
PERIODIC = 'Periodic'
diff --git a/Wrappers/Python/ccpi/utilities/__init__.py b/Wrappers/Python/ccpi/utilities/__init__.py
index e69de29..79eaa98 100644
--- a/Wrappers/Python/ccpi/utilities/__init__.py
+++ b/Wrappers/Python/ccpi/utilities/__init__.py
@@ -0,0 +1,22 @@
+# -*- coding: utf-8 -*-
+# This work is part of the Core Imaging Library developed by
+# Visual Analytics and Imaging System Group of the Science Technology
+# Facilities Council, STFC
+
+# Copyright 2020 Edoardo Pasca
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+# http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#default nThreads
+
+import multiprocessing
+NUM_THREADS = max(int(multiprocessing.cpu_count()/2),1)
diff --git a/Wrappers/Python/test/test_BlockDataContainer.py b/Wrappers/Python/test/test_BlockDataContainer.py
index bc0e83a..a8e59b0 100755
--- a/Wrappers/Python/test/test_BlockDataContainer.py
+++ b/Wrappers/Python/test/test_BlockDataContainer.py
@@ -485,4 +485,159 @@ class TestBlockDataContainer(unittest.TestCase):
print(err)
self.assertTrue(res)
+ def test_axpby(self):
+ # test axpby between BlockDataContainers
+ ig0 = ImageGeometry(2,3,4)
+ ig1 = ImageGeometry(2,3,5)
+
+ data0 = ig0.allocate(-1)
+ data2 = ig0.allocate(1)
+
+ data1 = ig0.allocate(2)
+ data3 = ig0.allocate(3)
+
+ cp0 = BlockDataContainer(data0,data2)
+ cp1 = BlockDataContainer(data1,data3)
+
+ out = cp0 * 0. - 10
+
+ cp0.axpby(3,-2,cp1,out, num_threads=4)
+
+ # operation should be [ 3 * -1 + (-2) * 2 , 3 * 1 + (-2) * 3 ]
+ # output should be [ -7 , -3 ]
+ res0 = ig0.allocate(-7)
+ res2 = ig0.allocate(-3)
+ res = BlockDataContainer(res0, res2)
+
+ print ("res0", res0.as_array())
+ print ("res2", res2.as_array())
+
+ print ("###############################")
+
+ print ("out_0", out.get_item(0).as_array())
+ print ("out_1", out.get_item(1).as_array())
+ self.assertBlockDataContainerEqual(out, res)
+
+ def test_axpby2(self):
+ # test axpby with BlockDataContainer and DataContainer
+ ig0 = ImageGeometry(2,3,4)
+ # ig1 = ImageGeometry(2,3,5)
+
+ data0 = ig0.allocate(-1)
+ data2 = ig0.allocate(1)
+
+ data1 = ig0.allocate(2)
+ # data3 = ig1.allocate(3)
+
+ cp0 = BlockDataContainer(data0,data2)
+ # cp1 = BlockDataContainer(data1,data3)
+
+ out = cp0 * 0. - 10
+
+ cp0.axpby(3,-2,data1,out)
+
+ # operation should be [ 3 * -1 + (-2) * 2 , 3 * 1 + (-2) * 2 ]
+ # output should be [ -7 , -1 ]
+ res0 = ig0.allocate(-7)
+ res2 = ig0.allocate(-1)
+ res = BlockDataContainer(res0, res2)
+
+ print ("res0", res0.as_array())
+ print ("res2", res2.as_array())
+
+ print ("###############################")
+
+ print ("out_0", out.get_item(0).as_array())
+ print ("out_1", out.get_item(1).as_array())
+ self.assertBlockDataContainerEqual(out, res)
+
+
+ def test_axpby3(self):
+ # test axpby with nested BlockDataContainer
+ ig0 = ImageGeometry(2,3,4)
+ ig1 = ImageGeometry(2,3,5)
+
+ data0 = ig0.allocate(-1)
+ data2 = ig0.allocate(1)
+
+ # data1 = ig0.allocate(2)
+ data3 = ig1.allocate(3)
+
+ cp0 = BlockDataContainer(data0,data2)
+ cp1 = BlockDataContainer(cp0 *0. + [2, -2], data3)
+ print (cp1.get_item(0).get_item(0).as_array())
+ print (cp1.get_item(0).get_item(1).as_array())
+ print (cp1.get_item(1).as_array())
+ print ("###############################")
+
+
+
+ out = cp1 * 0.
+ cp2 = out + [1,3]
+
+ print (cp2.get_item(0).get_item(0).as_array())
+ print (cp2.get_item(0).get_item(1).as_array())
+ print (cp2.get_item(1).as_array())
+
+ cp2.axpby(3,-2, cp1 ,out)
+
+ # output should be [ [ -1 , 7 ] , 3]
+ res0 = ig0.allocate(-1)
+ res2 = ig0.allocate(7)
+ res3 = ig1.allocate(3)
+ res = BlockDataContainer(BlockDataContainer(res0, res2), res3)
+
+ # print ("res0", res0.as_array())
+ # print ("res2", res2.as_array())
+
+ print ("###############################")
+
+ # print ("out_0", out.get_item(0).as_array())
+ # print ("out_1", out.get_item(1).as_array())
+ self.assertBlockDataContainerEqual(out, res)
+
+ def test_axpby4(self):
+ # test axpby with nested BlockDataContainer
+ ig0 = ImageGeometry(2,3,4)
+ ig1 = ImageGeometry(2,3,5)
+
+ data0 = ig0.allocate(-1)
+ data2 = ig0.allocate(1)
+
+ # data1 = ig0.allocate(2)
+ data3 = ig1.allocate(3)
+
+ cp0 = BlockDataContainer(data0,data2)
+ cp1 = BlockDataContainer(cp0 *0. + [2, -2], data3)
+ print (cp1.get_item(0).get_item(0).as_array())
+ print (cp1.get_item(0).get_item(1).as_array())
+ print (cp1.get_item(1).as_array())
+ print ("###############################")
+
+
+
+ out = cp1 * 0.
+ cp2 = out + [1,3]
+
+
+ print (cp2.get_item(0).get_item(0).as_array())
+ print (cp2.get_item(0).get_item(1).as_array())
+ print (cp2.get_item(1).as_array())
+
+ cp2.axpby(3,-2, cp1 ,out, num_threads=4)
+
+ # output should be [ [ -1 , 7 ] , 3]
+ res0 = ig0.allocate(-1)
+ res2 = ig0.allocate(7)
+ res3 = ig1.allocate(3)
+ res = BlockDataContainer(BlockDataContainer(res0, res2), res3)
+
+ # print ("res0", res0.as_array())
+ # print ("res2", res2.as_array())
+
+ print ("###############################")
+
+ # print ("out_0", out.get_item(0).as_array())
+ # print ("out_1", out.get_item(1).as_array())
+ self.assertBlockDataContainerEqual(out, res)
diff --git a/Wrappers/Python/test/test_DataContainer.py b/Wrappers/Python/test/test_DataContainer.py
index 6e297ee..4a8a6d1 100755
--- a/Wrappers/Python/test/test_DataContainer.py
+++ b/Wrappers/Python/test/test_DataContainer.py
@@ -740,7 +740,20 @@ class TestDataContainer(unittest.TestCase):
d2 = ig.allocate(2)
out = ig.allocate(None)
# equals to 2 * [1] + 1 * [2] = [4]
- DataContainer.axpby(2,d1,1,d2,out)
+ d1.axpby(2,1,d2,out)
+ res = numpy.ones_like(d1.as_array()) * 4.
+ numpy.testing.assert_array_equal(res, out.as_array())
+ def test_axpby2(self):
+ print ("test axpby2")
+ N = 100
+ ig = ImageGeometry(N,2*N,N*10)
+ d1 = ig.allocate(1)
+ d2 = ig.allocate(2)
+ out = ig.allocate(None)
+ print ("allocated")
+ # equals to 2 * [1] + 1 * [2] = [4]
+ d1.axpby(2,1,d2,out, num_threads=4)
+ print ("calculated")
res = numpy.ones_like(d1.as_array()) * 4.
numpy.testing.assert_array_equal(res, out.as_array())
diff --git a/Wrappers/Python/test/test_Gradient.py b/Wrappers/Python/test/test_Gradient.py
index 5aeede0..78fc261 100755
--- a/Wrappers/Python/test/test_Gradient.py
+++ b/Wrappers/Python/test/test_Gradient.py
@@ -30,6 +30,8 @@ class TestGradient(unittest.TestCase):
N, M, K = 20, 30, 40
channels = 10
+ numpy.random.seed(1)
+
# check range geometry, examples
ig1 = ImageGeometry(voxel_num_x = M, voxel_num_y = N)
@@ -235,4 +237,4 @@ class TestGradient(unittest.TestCase):
grad = Gradient(ig, bnd_cond='Periodic', correlation='SpaceChannels', backend='numpy')
self.assertTrue(LinearOperator.dot_test(grad))
- \ No newline at end of file
+
diff --git a/src/Core/CMakeLists.txt b/src/Core/CMakeLists.txt
index e828fe5..9c9a89d 100644
--- a/src/Core/CMakeLists.txt
+++ b/src/Core/CMakeLists.txt
@@ -93,8 +93,9 @@ message("CMAKE_STATIC_LINKER_FLAGS ${CMAKE_STATIC_LINKER_FLAGS}")
-add_library(cilacc SHARED ${CMAKE_CURRENT_SOURCE_DIR}/axpby.c
- ${CMAKE_CURRENT_SOURCE_DIR}/FiniteDifferenceLibrary.c )
+add_library(cilacc SHARED ${CMAKE_CURRENT_SOURCE_DIR}/utilities.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/FiniteDifferenceLibrary.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/axpby.c )
target_link_libraries(cilacc ${OpenMP_C_LIB_NAMES} )
include_directories(cilacc PUBLIC
diff --git a/src/Core/FiniteDifferenceLibrary.c b/src/Core/FiniteDifferenceLibrary.c
index fbf2646..244e170 100644
--- a/src/Core/FiniteDifferenceLibrary.c
+++ b/src/Core/FiniteDifferenceLibrary.c
@@ -16,17 +16,6 @@ DLL_EXPORT int openMPtest(int nThreads)
}
return nThreads_running;
}
-void threads_setup(int nThreads_requested, int *nThreads_current)
-{
-#pragma omp parallel
- {
- if (omp_get_thread_num() == 0)
- {
- *nThreads_current = omp_get_num_threads();
- }
- }
- omp_set_num_threads(nThreads_requested);
-}
int fdiff_direct_neumann(const float *inimagefull, float *outimageXfull, float *outimageYfull, float *outimageZfull, float *outimageCfull, long nx, long ny, long nz, long nc)
{
diff --git a/src/Core/axpby.c b/src/Core/axpby.c
index c4d162d..54a597f 100755
--- a/src/Core/axpby.c
+++ b/src/Core/axpby.c
@@ -1,87 +1,12 @@
#include "axpby.h"
-DLL_EXPORT int padd(float * x, float * y, float * out, long size){
+DLL_EXPORT int saxpby(float * x, float * y, float * out, float a, float b, long size, int nThreads){
long i = 0;
-#pragma omp parallel for
- for (i=0; i < size; i++)
- {
- *(out + i ) = *(x + i) + *(y+i);
- }
- return 0;
-}
-
-DLL_EXPORT int psubtract(float * x, float * y, float * out, long size){
- long i = 0;
-#pragma omp parallel
-{
-//#pragma omp single
-//{
-// printf("current number of threads %d\n", omp_get_num_threads());
-//}
-#pragma omp for
- for (i=0; i < size; i++)
- {
- *(out + i ) = *(x + i) - *(y+i);
- }
-}
- return 0;
-
-}
-
-DLL_EXPORT int pmultiply(float * x, float * y, float * out, long size){
- long i = 0;
-#pragma omp parallel for
- for (i=0; i < size; i++)
- {
- *(out + i ) = *(x + i) * *(y+i);
- }
- return 0;
-}
-
-DLL_EXPORT int pdivide(float * x, float * y, float * out, long size, float default_value)
-{
- long i = 0;
-#pragma omp parallel for
- for (i=0; i < size; i++)
- {
- *(out + i ) = *(y+i) ? *(x + i) / *(y+i) : default_value;
- }
- return 0;
-}
-DLL_EXPORT int ppower(float * x, float * y, float * out, long size){
- long i = 0;
-#pragma omp parallel for
- for (i=0; i < size; i++)
- {
- *(out + i ) = (float)pow(*(x + i) , *(y+i)) ;
- }
- return 0;
-}
-
-DLL_EXPORT int pminimum(float * x, float * y, float * out, long size){
- long i = 0;
-#pragma omp parallel for
- for (i=0; i < size; i++)
- {
- *(out + i ) = *(y+i) > (*x+i) ? *(x + i) : *(y+i);
- }
- return 0;
-}
-
-DLL_EXPORT int pmaximum(float * x, float * y, float * out, long size) {
- long i = 0;
-#pragma omp parallel for
- for (i = 0; i < size; i++)
- {
- *(out + i) = *(y + i) < (*x + i) ? *(x + i) : *(y + i);
- }
- return 0;
-}
+ int nThreads_initial;
+ threads_setup(nThreads, &nThreads_initial);
-DLL_EXPORT int saxpby(float * x, float * y, float * out, float a, float b, long size){
- long i = 0;
#pragma omp parallel
{
#pragma omp for
@@ -90,11 +15,12 @@ DLL_EXPORT int saxpby(float * x, float * y, float * out, float a, float b, long
*(out + i ) = a * ( *(x + i) ) + b * ( *(y + i) );
}
}
+ omp_set_num_threads(nThreads_initial);
return 0;
}
-DLL_EXPORT int daxpby(double * x, double * y, double * out, double a, double b, long size) {
+DLL_EXPORT int daxpby(double * x, double * y, double * out, double a, double b, long size, int nThreads) {
long i = 0;
#pragma omp parallel
{
diff --git a/src/Core/include/FiniteDifferenceLibrary.h b/src/Core/include/FiniteDifferenceLibrary.h
index 6e426af..b8e6c4f 100644
--- a/src/Core/include/FiniteDifferenceLibrary.h
+++ b/src/Core/include/FiniteDifferenceLibrary.h
@@ -3,4 +3,5 @@
#include <stdio.h>
#include "omp.h"
//#include "ipp.h"
-#include "dll_export.h" \ No newline at end of file
+#include "dll_export.h"
+#include "utilities.h" \ No newline at end of file
diff --git a/src/Core/include/axpby.h b/src/Core/include/axpby.h
index 2849547..e13d6e1 100644
--- a/src/Core/include/axpby.h
+++ b/src/Core/include/axpby.h
@@ -3,15 +3,8 @@
#include <stdio.h>
#include "omp.h"
#include "dll_export.h"
+#include "utilities.h"
-DLL_EXPORT int padd(float * x, float * y, float * out, long size);
-DLL_EXPORT int psubtract(float * x, float * y, float * out, long size);
-DLL_EXPORT int pmultiply(float * x, float * y, float * out, long size);
-DLL_EXPORT int pdivide(float * x, float * y, float * out, long size, float default_value);
-DLL_EXPORT int ppower(float * x, float * y, float * out, long size);
-DLL_EXPORT int pminimum(float * x, float * y, float * out, long size);
-DLL_EXPORT int pmaximum(float * x, float * y, float * out, long size);
-
-DLL_EXPORT int saxpby(float * x, float * y, float * out, float a, float b, long size);
-DLL_EXPORT int daxpby(double * x, double * y, double * out, double a, double b, long size);
+DLL_EXPORT int saxpby(float * x, float * y, float * out, float a, float b, long size, int nThreads);
+DLL_EXPORT int daxpby(double * x, double * y, double * out, double a, double b, long size, int nThreads);
diff --git a/src/Core/include/utilities.h b/src/Core/include/utilities.h
new file mode 100644
index 0000000..c3003d6
--- /dev/null
+++ b/src/Core/include/utilities.h
@@ -0,0 +1,3 @@
+#include "omp.h"
+
+void threads_setup(int nThreads_requested, int *nThreads_current); \ No newline at end of file
diff --git a/src/Core/utilities.c b/src/Core/utilities.c
new file mode 100644
index 0000000..86b23e8
--- /dev/null
+++ b/src/Core/utilities.c
@@ -0,0 +1,14 @@
+#include "utilities.h"
+
+
+void threads_setup(int nThreads_requested, int *nThreads_current)
+{
+#pragma omp parallel
+ {
+ if (omp_get_thread_num() == 0)
+ {
+ *nThreads_current = omp_get_num_threads();
+ }
+ }
+ omp_set_num_threads(nThreads_requested);
+}