From a1341271589a5bd3dad0def8198c49c37d1c197d Mon Sep 17 00:00:00 2001
From: Edoardo Pasca <edo.paskino@gmail.com>
Date: Tue, 19 Nov 2019 10:00:25 +0000
Subject: added armijo rule (#390)

* added armijo rule

* added info and kwarg for alpha

* updated Armijo rule

* modified stop criterion to GD

* added kmax to limit the while True loop

* fix raise comment

* change default stopping criterion

* updated test

* added Rosenbrock function and test Gradient descent on that

* removed test code

* added missing import
---
 .../ccpi/optimisation/algorithms/Algorithm.py      |   4 +
 .../optimisation/algorithms/GradientDescent.py     | 134 +++++++++++++++++----
 .../ccpi/optimisation/functions/Rosenbrock.py      |  78 ++++++++++++
 .../Python/ccpi/optimisation/functions/__init__.py |   1 +
 Wrappers/Python/test/test_algorithms.py            |  57 ++++++++-
 Wrappers/Python/test/test_functions.py             |  11 +-
 6 files changed, 254 insertions(+), 31 deletions(-)
 create mode 100755 Wrappers/Python/ccpi/optimisation/functions/Rosenbrock.py

diff --git a/Wrappers/Python/ccpi/optimisation/algorithms/Algorithm.py b/Wrappers/Python/ccpi/optimisation/algorithms/Algorithm.py
index df8dbf5..408a904 100755
--- a/Wrappers/Python/ccpi/optimisation/algorithms/Algorithm.py
+++ b/Wrappers/Python/ccpi/optimisation/algorithms/Algorithm.py
@@ -197,6 +197,10 @@ class Algorithm(object):
                     if verbose:
                         print (self.verbose_output(very_verbose))
                 break
+        if verbose:
+            if self.update_objective_interval != 1:
+                print (self.verbose_output(very_verbose))
+            print ("Stop criterion has been reached.")
 
     def verbose_output(self, verbose=False):
         '''Creates a nice tabulated output'''
diff --git a/Wrappers/Python/ccpi/optimisation/algorithms/GradientDescent.py b/Wrappers/Python/ccpi/optimisation/algorithms/GradientDescent.py
index 8f9c958..5e7284a 100755
--- a/Wrappers/Python/ccpi/optimisation/algorithms/GradientDescent.py
+++ b/Wrappers/Python/ccpi/optimisation/algorithms/GradientDescent.py
@@ -24,7 +24,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 from __future__ import unicode_literals
-
+import numpy
 from ccpi.optimisation.algorithms import Algorithm
 
 class GradientDescent(Algorithm):
@@ -35,7 +35,7 @@ class GradientDescent(Algorithm):
         
     '''
 
-    def __init__(self, x_init=None, objective_function=None, rate=None, **kwargs):
+    def __init__(self, x_init=None, objective_function=None, step_size =None, **kwargs):
         '''GradientDescent algorithm creator
         
         initialisation can be done at creation time if all 
@@ -43,51 +43,135 @@ class GradientDescent(Algorithm):
         
         :param x_init: initial guess
         :param objective_function: objective function to be minimised
-        :param rate: step rate
+        :param step_size: step size for gradient descent iteration
+        :param alpha: optional parameter to start the backtracking algorithm, default 1e6
+        :param beta: optional parameter defining the reduction of step, default 0.5.
+                    It's value can be in (0,1)
+        :param rtol: optional parameter defining the relative tolerance comparing the 
+                     current objective function to 0, default 1e-5, see numpy.isclose
+        :param atol: optional parameter defining the absolute tolerance comparing the 
+                     current objective function to 0, default 1e-8, see numpy.isclose
         '''
         super(GradientDescent, self).__init__(**kwargs)
 
-        
-        if x_init is not None and objective_function is not None and rate is not None:
-            self.set_up(x_init=x_init, objective_function=objective_function, rate=rate)
-    
-    def should_stop(self):
-        '''stopping criterion, currently only based on number of iterations'''
-        return self.iteration >= self.max_iteration
+        self.alpha = kwargs.get('alpha' , 1e6)
+        self.beta = kwargs.get('beta', 0.5)
+        self.rtol = kwargs.get('rtol', 1e-5)
+        self.atol = kwargs.get('atol', 1e-8)
+        if x_init is not None and objective_function is not None :
+            self.set_up(x_init=x_init, objective_function=objective_function, step_size=step_size)
     
-    def set_up(self, x_init, objective_function, rate):
+    def set_up(self, x_init, objective_function, step_size):
         '''initialisation of the algorithm
         
         :param x_init: initial guess
         :param objective_function: objective function to be minimised
-        :param rate: step rate'''
+        :param step_size: step size'''
         print("{} setting up".format(self.__class__.__name__, ))
             
         self.x = x_init.copy()
         self.objective_function = objective_function
-        self.rate = rate
+        
 
-        self.loss.append(objective_function(x_init))
+        if step_size is None:
+            self.k = 0
+            self.update_step_size = True
+            self.x_armijo = x_init.copy()
+            # self.rate = self.armijo_rule() * 2
+            # print (self.rate)
+        else:
+            self.step_size = step_size
+            self.update_step_size = False
+        
+        
+        self.update_objective()
         self.iteration = 0
 
-        try:
-            self.memopt = self.objective_function.memopt
-        except AttributeError as ae:
-            self.memopt = False
-        if self.memopt:
-            self.x_update = x_init.copy()
+        self.x_update = x_init.copy()
 
         self.configured = True
         print("{} configured".format(self.__class__.__name__, ))
 
     def update(self):
         '''Single iteration'''
-        if self.memopt:
-            self.objective_function.gradient(self.x, out=self.x_update)
-            self.x_update *= -self.rate
-            self.x += self.x_update
+        
+        self.objective_function.gradient(self.x, out=self.x_update)
+        
+        if self.update_step_size:
+            # the next update and solution are calculated within the armijo_rule
+            self.step_size = self.armijo_rule()
         else:
-            self.x += -self.rate * self.objective_function.gradient(self.x)
+            self.x_update *= -self.step_size
+            self.x += self.x_update
+        
+    
 
     def update_objective(self):
         self.loss.append(self.objective_function(self.x))
+
+    def armijo_rule(self):
+        '''Applies the Armijo rule to calculate the step size (step_size)
+
+        https://projecteuclid.org/download/pdf_1/euclid.pjm/1102995080
+
+        The Armijo rule runs a while loop to find the appropriate step_size by starting
+        from a very large number (alpha). The step_size is found by dividing alpha by 2 
+        in an iterative way until a certain criterion is met. To avoid infinite loops, we 
+        add a maximum number of times the while loop is run. 
+
+        This rule would allow to reach a minimum step_size of 10^-alpha. 
+        
+        if
+        alpha = numpy.power(10,gamma)
+        delta = 3
+        step_size = numpy.power(10, -delta)
+        with armijo rule we can get to step_size from initial alpha by repeating the while loop k times 
+        where 
+        alpha / 2^k = step_size
+        10^gamma / 2^k = 10^-delta 
+        2^k = 10^(gamma+delta)
+        k = gamma+delta / log10(2) \\approx 3.3 * (gamma+delta)
+
+        if we would take by default delta = gamma
+        kmax = numpy.ceil ( 2 * gamma / numpy.log10(2) )
+        kmax = numpy.ceil (2 * numpy.log10(alpha) / numpy.log10(2))
+
+        '''
+        f_x = self.objective_function(self.x)
+        if not hasattr(self, 'x_update'):
+            self.x_update = self.objective_function.gradient(self.x)
+        
+        while self.k < self.kmax:
+            # self.x - alpha * self.x_update
+            self.x_update.multiply(self.alpha, out=self.x_armijo)
+            self.x.subtract(self.x_armijo, out=self.x_armijo)
+            
+            f_x_a = self.objective_function(self.x_armijo)
+            sqnorm = self.x_update.squared_norm()
+            if f_x_a - f_x <= - ( self.alpha/2. ) * sqnorm:
+                self.x.fill(self.x_armijo)
+                break
+            else:
+                self.k += 1.
+                # we don't want to update kmax
+                self._alpha *= self.beta
+
+        if self.k == self.kmax:
+            raise ValueError('Could not find a proper step_size in {} loops. Consider increasing alpha.'.format(self.kmax))
+        return self.alpha
+    
+    @property
+    def alpha(self):
+        return self._alpha
+    @alpha.setter
+    def alpha(self, alpha):
+        self._alpha = alpha
+        self.kmax = numpy.ceil (2 * numpy.log10(alpha) / numpy.log10(2))
+        self.k = 0
+        
+    def should_stop(self):
+        return self.max_iteration_stop_cryterion() or \
+            numpy.isclose(self.get_last_objective(), 0., rtol=self.rtol, 
+                atol=self.atol, equal_nan=False)
+
+
diff --git a/Wrappers/Python/ccpi/optimisation/functions/Rosenbrock.py b/Wrappers/Python/ccpi/optimisation/functions/Rosenbrock.py
new file mode 100755
index 0000000..03abf7a
--- /dev/null
+++ b/Wrappers/Python/ccpi/optimisation/functions/Rosenbrock.py
@@ -0,0 +1,78 @@
+# -*- coding: utf-8 -*-
+#========================================================================
+# Copyright 2019 Science Technology Facilities Council
+# Copyright 2019 University of Manchester
+#
+# This work is part of the Core Imaging Library developed by Science Technology
+# Facilities Council and University of Manchester
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0.txt
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+#=========================================================================
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+import numpy
+from ccpi.optimisation.functions import Function
+from ccpi.framework import VectorData, VectorGeometry
+
+class Rosenbrock(Function):
+    r'''Rosenbrock function
+
+    .. math:: 
+
+    F(x,y) = (\alpha - x)^2 + \beta(y-x^2)^2
+
+    The function has a global minimum at .. math:: (x,y)=(\alpha, \alpha^2)
+
+    '''
+    def __init__(self, alpha, beta):
+        super(Rosenbrock, self).__init__()
+
+        self.alpha = alpha
+        self.beta = beta
+
+    def __call__(self, x):
+        if not isinstance(x, VectorData):
+            raise TypeError('Rosenbrock function works on VectorData only')
+        vec = x.as_array()
+        a = (self.alpha - vec[0])
+        b = (vec[1] - (vec[0]*vec[0]))
+        return a * a + self.beta * b * b
+
+    def gradient(self, x, out=None):
+        r'''Gradient of the Rosenbrock function
+        
+        .. math::
+
+        \nabla f(x,y) = \left[ 2*((x-\alpha) - 2\beta x(y-x^2)) ; 2\beta (y - x^2)  \right]
+
+        '''
+        if not isinstance(x, VectorData):
+            raise TypeError('Rosenbrock function works on VectorData only')
+
+        vec = x.as_array()
+        a = (vec[0] - self.alpha)
+        b = (vec[1] - (vec[0]*vec[0]))
+
+        res = numpy.empty_like(vec)
+        res[0] = 2 * ( a - 2 * self.beta * vec[0] * b)
+        res[1] = 2 * self.beta * b
+
+        if out is not None:
+            out.fill (res)
+        else:
+            return VectorData(res) 
+
diff --git a/Wrappers/Python/ccpi/optimisation/functions/__init__.py b/Wrappers/Python/ccpi/optimisation/functions/__init__.py
index 67abeef..d968539 100644
--- a/Wrappers/Python/ccpi/optimisation/functions/__init__.py
+++ b/Wrappers/Python/ccpi/optimisation/functions/__init__.py
@@ -26,3 +26,4 @@ from .MixedL21Norm import MixedL21Norm
 from .IndicatorBox import IndicatorBox
 from .KullbackLeibler import KullbackLeibler
 from .Norm2Sq import Norm2Sq
+from .Rosenbrock import Rosenbrock
diff --git a/Wrappers/Python/test/test_algorithms.py b/Wrappers/Python/test/test_algorithms.py
index db13b97..7a312f9 100755
--- a/Wrappers/Python/test/test_algorithms.py
+++ b/Wrappers/Python/test/test_algorithms.py
@@ -66,24 +66,73 @@ class TestAlgorithms(unittest.TestCase):
         identity = Identity(ig)
         
         norm2sq = Norm2Sq(identity, b)
-        rate = 0.3
         rate = norm2sq.L / 3.
         
         alg = GradientDescent(x_init=x_init, 
                               objective_function=norm2sq, 
-                              rate=rate)
+                              rate=rate, atol=1e-9, rtol=1e-6)
+        alg.max_iteration = 1000
+        alg.run()
+        self.assertNumpyArrayAlmostEqual(alg.x.as_array(), b.as_array())
+        alg = GradientDescent(x_init=x_init, 
+                              objective_function=norm2sq, 
+                              rate=rate, max_iteration=20,
+                              update_objective_interval=2,
+                              atol=1e-9, rtol=1e-6)
         alg.max_iteration = 20
+        self.assertTrue(alg.max_iteration == 20)
+        self.assertTrue(alg.update_objective_interval==2)
         alg.run(20, verbose=True)
         self.assertNumpyArrayAlmostEqual(alg.x.as_array(), b.as_array())
+    def test_GradientDescentArmijo(self):
+        print ("Test GradientDescent")
+        ig = ImageGeometry(12,13,14)
+        x_init = ig.allocate()
+        # b = x_init.copy()
+        # fill with random numbers
+        # b.fill(numpy.random.random(x_init.shape))
+        b = ig.allocate('random')
+        identity = Identity(ig)
+        
+        norm2sq = Norm2Sq(identity, b)
+        rate = None
+        
+        alg = GradientDescent(x_init=x_init, 
+                              objective_function=norm2sq, rate=rate)
+        alg.max_iteration = 100
+        alg.run()
+        self.assertNumpyArrayAlmostEqual(alg.x.as_array(), b.as_array())
         alg = GradientDescent(x_init=x_init, 
                               objective_function=norm2sq, 
-                              rate=rate, max_iteration=20,
+                              max_iteration=20,
                               update_objective_interval=2)
-        alg.max_iteration = 20
+        #alg.max_iteration = 20
         self.assertTrue(alg.max_iteration == 20)
         self.assertTrue(alg.update_objective_interval==2)
         alg.run(20, verbose=True)
         self.assertNumpyArrayAlmostEqual(alg.x.as_array(), b.as_array())
+    def test_GradientDescentArmijo2(self):
+        from ccpi.optimisation.functions import Rosenbrock
+        from ccpi.framework import VectorData, VectorGeometry
+
+        f = Rosenbrock (alpha = 1., beta=100.)
+        vg = VectorGeometry(2)
+        x = vg.allocate('random_int', seed=2)
+        # x = vg.allocate('random', seed=1) 
+        x.fill(numpy.asarray([10.,-3.]))
+        
+        max_iter = 1000000
+        update_interval = 100000
+
+        alg = GradientDescent(x, f, max_iteration=max_iter, update_objective_interval=update_interval, alpha=1e6)
+        
+        alg.run()
+        
+        print (alg.get_output().as_array(), alg.step_size, alg.kmax, alg.k)
+
+        numpy.testing.assert_array_almost_equal(alg.get_output().as_array(), [1,1], decimal = 1)
+        numpy.testing.assert_array_almost_equal(alg.get_output().as_array(), [0.982744, 0.965725], decimal = 6)
+
     def test_CGLS(self):
         print ("Test CGLS")
         #ig = ImageGeometry(124,153,154)
diff --git a/Wrappers/Python/test/test_functions.py b/Wrappers/Python/test/test_functions.py
index 550f1de..d295234 100644
--- a/Wrappers/Python/test/test_functions.py
+++ b/Wrappers/Python/test/test_functions.py
@@ -17,8 +17,8 @@
 #   limitations under the License.
 
 import numpy as np
-from ccpi.optimisation.functions import Function, KullbackLeibler
-from ccpi.framework import DataContainer, ImageData, ImageGeometry 
+from ccpi.optimisation.functions import Function, KullbackLeibler, Rosenbrock
+from ccpi.framework import DataContainer, ImageData, ImageGeometry , VectorData
 from ccpi.optimisation.operators import  Identity
 from ccpi.optimisation.operators import BlockOperator
 from ccpi.framework import BlockDataContainer
@@ -371,3 +371,10 @@ class TestFunction(unittest.TestCase):
         proxc = f.proximal_conjugate(x,1.2)
         f.proximal_conjugate(x, 1.2, out=out)
         numpy.testing.assert_array_equal(proxc.as_array(), out.as_array())
+
+    def test_Rosenbrock(self):
+        f = Rosenbrock (alpha = 1, beta=100)
+        x = VectorData(numpy.asarray([1,1]))
+        assert f(x) == 0.
+        numpy.testing.assert_array_almost_equal( f.gradient(x).as_array(), numpy.zeros(shape=(2,), dtype=numpy.float32))
+
-- 
cgit v1.2.3