diff options
6 files changed, 254 insertions, 31 deletions
| diff --git a/Wrappers/Python/ccpi/optimisation/algorithms/Algorithm.py b/Wrappers/Python/ccpi/optimisation/algorithms/Algorithm.py index df8dbf5..408a904 100755 --- a/Wrappers/Python/ccpi/optimisation/algorithms/Algorithm.py +++ b/Wrappers/Python/ccpi/optimisation/algorithms/Algorithm.py @@ -197,6 +197,10 @@ class Algorithm(object):                      if verbose:                          print (self.verbose_output(very_verbose))                  break +        if verbose: +            if self.update_objective_interval != 1: +                print (self.verbose_output(very_verbose)) +            print ("Stop criterion has been reached.")      def verbose_output(self, verbose=False):          '''Creates a nice tabulated output''' diff --git a/Wrappers/Python/ccpi/optimisation/algorithms/GradientDescent.py b/Wrappers/Python/ccpi/optimisation/algorithms/GradientDescent.py index 8f9c958..5e7284a 100755 --- a/Wrappers/Python/ccpi/optimisation/algorithms/GradientDescent.py +++ b/Wrappers/Python/ccpi/optimisation/algorithms/GradientDescent.py @@ -24,7 +24,7 @@ from __future__ import absolute_import  from __future__ import division  from __future__ import print_function  from __future__ import unicode_literals - +import numpy  from ccpi.optimisation.algorithms import Algorithm  class GradientDescent(Algorithm): @@ -35,7 +35,7 @@ class GradientDescent(Algorithm):      ''' -    def __init__(self, x_init=None, objective_function=None, rate=None, **kwargs): +    def __init__(self, x_init=None, objective_function=None, step_size =None, **kwargs):          '''GradientDescent algorithm creator          initialisation can be done at creation time if all  @@ -43,51 +43,135 @@ class GradientDescent(Algorithm):          :param x_init: initial guess          :param objective_function: objective function to be minimised -        :param rate: step rate +        :param step_size: step size for gradient descent iteration +        :param alpha: optional parameter to start the backtracking algorithm, default 1e6 +        :param beta: optional parameter defining the reduction of step, default 0.5. +                    It's value can be in (0,1) +        :param rtol: optional parameter defining the relative tolerance comparing the  +                     current objective function to 0, default 1e-5, see numpy.isclose +        :param atol: optional parameter defining the absolute tolerance comparing the  +                     current objective function to 0, default 1e-8, see numpy.isclose          '''          super(GradientDescent, self).__init__(**kwargs) -         -        if x_init is not None and objective_function is not None and rate is not None: -            self.set_up(x_init=x_init, objective_function=objective_function, rate=rate) -     -    def should_stop(self): -        '''stopping criterion, currently only based on number of iterations''' -        return self.iteration >= self.max_iteration +        self.alpha = kwargs.get('alpha' , 1e6) +        self.beta = kwargs.get('beta', 0.5) +        self.rtol = kwargs.get('rtol', 1e-5) +        self.atol = kwargs.get('atol', 1e-8) +        if x_init is not None and objective_function is not None : +            self.set_up(x_init=x_init, objective_function=objective_function, step_size=step_size) -    def set_up(self, x_init, objective_function, rate): +    def set_up(self, x_init, objective_function, step_size):          '''initialisation of the algorithm          :param x_init: initial guess          :param objective_function: objective function to be minimised -        :param rate: step rate''' +        :param step_size: step size'''          print("{} setting up".format(self.__class__.__name__, ))          self.x = x_init.copy()          self.objective_function = objective_function -        self.rate = rate +         -        self.loss.append(objective_function(x_init)) +        if step_size is None: +            self.k = 0 +            self.update_step_size = True +            self.x_armijo = x_init.copy() +            # self.rate = self.armijo_rule() * 2 +            # print (self.rate) +        else: +            self.step_size = step_size +            self.update_step_size = False +         +         +        self.update_objective()          self.iteration = 0 -        try: -            self.memopt = self.objective_function.memopt -        except AttributeError as ae: -            self.memopt = False -        if self.memopt: -            self.x_update = x_init.copy() +        self.x_update = x_init.copy()          self.configured = True          print("{} configured".format(self.__class__.__name__, ))      def update(self):          '''Single iteration''' -        if self.memopt: -            self.objective_function.gradient(self.x, out=self.x_update) -            self.x_update *= -self.rate -            self.x += self.x_update +         +        self.objective_function.gradient(self.x, out=self.x_update) +         +        if self.update_step_size: +            # the next update and solution are calculated within the armijo_rule +            self.step_size = self.armijo_rule()          else: -            self.x += -self.rate * self.objective_function.gradient(self.x) +            self.x_update *= -self.step_size +            self.x += self.x_update +         +          def update_objective(self):          self.loss.append(self.objective_function(self.x)) + +    def armijo_rule(self): +        '''Applies the Armijo rule to calculate the step size (step_size) + +        https://projecteuclid.org/download/pdf_1/euclid.pjm/1102995080 + +        The Armijo rule runs a while loop to find the appropriate step_size by starting +        from a very large number (alpha). The step_size is found by dividing alpha by 2  +        in an iterative way until a certain criterion is met. To avoid infinite loops, we  +        add a maximum number of times the while loop is run.  + +        This rule would allow to reach a minimum step_size of 10^-alpha.  +         +        if +        alpha = numpy.power(10,gamma) +        delta = 3 +        step_size = numpy.power(10, -delta) +        with armijo rule we can get to step_size from initial alpha by repeating the while loop k times  +        where  +        alpha / 2^k = step_size +        10^gamma / 2^k = 10^-delta  +        2^k = 10^(gamma+delta) +        k = gamma+delta / log10(2) \\approx 3.3 * (gamma+delta) + +        if we would take by default delta = gamma +        kmax = numpy.ceil ( 2 * gamma / numpy.log10(2) ) +        kmax = numpy.ceil (2 * numpy.log10(alpha) / numpy.log10(2)) + +        ''' +        f_x = self.objective_function(self.x) +        if not hasattr(self, 'x_update'): +            self.x_update = self.objective_function.gradient(self.x) +         +        while self.k < self.kmax: +            # self.x - alpha * self.x_update +            self.x_update.multiply(self.alpha, out=self.x_armijo) +            self.x.subtract(self.x_armijo, out=self.x_armijo) +             +            f_x_a = self.objective_function(self.x_armijo) +            sqnorm = self.x_update.squared_norm() +            if f_x_a - f_x <= - ( self.alpha/2. ) * sqnorm: +                self.x.fill(self.x_armijo) +                break +            else: +                self.k += 1. +                # we don't want to update kmax +                self._alpha *= self.beta + +        if self.k == self.kmax: +            raise ValueError('Could not find a proper step_size in {} loops. Consider increasing alpha.'.format(self.kmax)) +        return self.alpha +     +    @property +    def alpha(self): +        return self._alpha +    @alpha.setter +    def alpha(self, alpha): +        self._alpha = alpha +        self.kmax = numpy.ceil (2 * numpy.log10(alpha) / numpy.log10(2)) +        self.k = 0 +         +    def should_stop(self): +        return self.max_iteration_stop_cryterion() or \ +            numpy.isclose(self.get_last_objective(), 0., rtol=self.rtol,  +                atol=self.atol, equal_nan=False) + + diff --git a/Wrappers/Python/ccpi/optimisation/functions/Rosenbrock.py b/Wrappers/Python/ccpi/optimisation/functions/Rosenbrock.py new file mode 100755 index 0000000..03abf7a --- /dev/null +++ b/Wrappers/Python/ccpi/optimisation/functions/Rosenbrock.py @@ -0,0 +1,78 @@ +# -*- coding: utf-8 -*- +#======================================================================== +# Copyright 2019 Science Technology Facilities Council +# Copyright 2019 University of Manchester +# +# This work is part of the Core Imaging Library developed by Science Technology +# Facilities Council and University of Manchester +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +#         http://www.apache.org/licenses/LICENSE-2.0.txt +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +#========================================================================= + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals +import numpy +from ccpi.optimisation.functions import Function +from ccpi.framework import VectorData, VectorGeometry + +class Rosenbrock(Function): +    r'''Rosenbrock function + +    .. math::  + +    F(x,y) = (\alpha - x)^2 + \beta(y-x^2)^2 + +    The function has a global minimum at .. math:: (x,y)=(\alpha, \alpha^2) + +    ''' +    def __init__(self, alpha, beta): +        super(Rosenbrock, self).__init__() + +        self.alpha = alpha +        self.beta = beta + +    def __call__(self, x): +        if not isinstance(x, VectorData): +            raise TypeError('Rosenbrock function works on VectorData only') +        vec = x.as_array() +        a = (self.alpha - vec[0]) +        b = (vec[1] - (vec[0]*vec[0])) +        return a * a + self.beta * b * b + +    def gradient(self, x, out=None): +        r'''Gradient of the Rosenbrock function +         +        .. math:: + +        \nabla f(x,y) = \left[ 2*((x-\alpha) - 2\beta x(y-x^2)) ; 2\beta (y - x^2)  \right] + +        ''' +        if not isinstance(x, VectorData): +            raise TypeError('Rosenbrock function works on VectorData only') + +        vec = x.as_array() +        a = (vec[0] - self.alpha) +        b = (vec[1] - (vec[0]*vec[0])) + +        res = numpy.empty_like(vec) +        res[0] = 2 * ( a - 2 * self.beta * vec[0] * b) +        res[1] = 2 * self.beta * b + +        if out is not None: +            out.fill (res) +        else: +            return VectorData(res)  + diff --git a/Wrappers/Python/ccpi/optimisation/functions/__init__.py b/Wrappers/Python/ccpi/optimisation/functions/__init__.py index 67abeef..d968539 100644 --- a/Wrappers/Python/ccpi/optimisation/functions/__init__.py +++ b/Wrappers/Python/ccpi/optimisation/functions/__init__.py @@ -26,3 +26,4 @@ from .MixedL21Norm import MixedL21Norm  from .IndicatorBox import IndicatorBox  from .KullbackLeibler import KullbackLeibler  from .Norm2Sq import Norm2Sq +from .Rosenbrock import Rosenbrock diff --git a/Wrappers/Python/test/test_algorithms.py b/Wrappers/Python/test/test_algorithms.py index db13b97..7a312f9 100755 --- a/Wrappers/Python/test/test_algorithms.py +++ b/Wrappers/Python/test/test_algorithms.py @@ -66,24 +66,73 @@ class TestAlgorithms(unittest.TestCase):          identity = Identity(ig)          norm2sq = Norm2Sq(identity, b) -        rate = 0.3          rate = norm2sq.L / 3.          alg = GradientDescent(x_init=x_init,                                 objective_function=norm2sq,  -                              rate=rate) +                              rate=rate, atol=1e-9, rtol=1e-6) +        alg.max_iteration = 1000 +        alg.run() +        self.assertNumpyArrayAlmostEqual(alg.x.as_array(), b.as_array()) +        alg = GradientDescent(x_init=x_init,  +                              objective_function=norm2sq,  +                              rate=rate, max_iteration=20, +                              update_objective_interval=2, +                              atol=1e-9, rtol=1e-6)          alg.max_iteration = 20 +        self.assertTrue(alg.max_iteration == 20) +        self.assertTrue(alg.update_objective_interval==2)          alg.run(20, verbose=True)          self.assertNumpyArrayAlmostEqual(alg.x.as_array(), b.as_array()) +    def test_GradientDescentArmijo(self): +        print ("Test GradientDescent") +        ig = ImageGeometry(12,13,14) +        x_init = ig.allocate() +        # b = x_init.copy() +        # fill with random numbers +        # b.fill(numpy.random.random(x_init.shape)) +        b = ig.allocate('random') +        identity = Identity(ig) +         +        norm2sq = Norm2Sq(identity, b) +        rate = None +         +        alg = GradientDescent(x_init=x_init,  +                              objective_function=norm2sq, rate=rate) +        alg.max_iteration = 100 +        alg.run() +        self.assertNumpyArrayAlmostEqual(alg.x.as_array(), b.as_array())          alg = GradientDescent(x_init=x_init,                                 objective_function=norm2sq,  -                              rate=rate, max_iteration=20, +                              max_iteration=20,                                update_objective_interval=2) -        alg.max_iteration = 20 +        #alg.max_iteration = 20          self.assertTrue(alg.max_iteration == 20)          self.assertTrue(alg.update_objective_interval==2)          alg.run(20, verbose=True)          self.assertNumpyArrayAlmostEqual(alg.x.as_array(), b.as_array()) +    def test_GradientDescentArmijo2(self): +        from ccpi.optimisation.functions import Rosenbrock +        from ccpi.framework import VectorData, VectorGeometry + +        f = Rosenbrock (alpha = 1., beta=100.) +        vg = VectorGeometry(2) +        x = vg.allocate('random_int', seed=2) +        # x = vg.allocate('random', seed=1)  +        x.fill(numpy.asarray([10.,-3.])) +         +        max_iter = 1000000 +        update_interval = 100000 + +        alg = GradientDescent(x, f, max_iteration=max_iter, update_objective_interval=update_interval, alpha=1e6) +         +        alg.run() +         +        print (alg.get_output().as_array(), alg.step_size, alg.kmax, alg.k) + +        numpy.testing.assert_array_almost_equal(alg.get_output().as_array(), [1,1], decimal = 1) +        numpy.testing.assert_array_almost_equal(alg.get_output().as_array(), [0.982744, 0.965725], decimal = 6) +      def test_CGLS(self):          print ("Test CGLS")          #ig = ImageGeometry(124,153,154) diff --git a/Wrappers/Python/test/test_functions.py b/Wrappers/Python/test/test_functions.py index 550f1de..d295234 100644 --- a/Wrappers/Python/test/test_functions.py +++ b/Wrappers/Python/test/test_functions.py @@ -17,8 +17,8 @@  #   limitations under the License.  import numpy as np -from ccpi.optimisation.functions import Function, KullbackLeibler -from ccpi.framework import DataContainer, ImageData, ImageGeometry  +from ccpi.optimisation.functions import Function, KullbackLeibler, Rosenbrock +from ccpi.framework import DataContainer, ImageData, ImageGeometry , VectorData  from ccpi.optimisation.operators import  Identity  from ccpi.optimisation.operators import BlockOperator  from ccpi.framework import BlockDataContainer @@ -371,3 +371,10 @@ class TestFunction(unittest.TestCase):          proxc = f.proximal_conjugate(x,1.2)          f.proximal_conjugate(x, 1.2, out=out)          numpy.testing.assert_array_equal(proxc.as_array(), out.as_array()) + +    def test_Rosenbrock(self): +        f = Rosenbrock (alpha = 1, beta=100) +        x = VectorData(numpy.asarray([1,1])) +        assert f(x) == 0. +        numpy.testing.assert_array_almost_equal( f.gradient(x).as_array(), numpy.zeros(shape=(2,), dtype=numpy.float32)) + | 
