Source code for mmf.solve.solver_utils

r"""Solver Utilities.

This module provides some utilities used by various solvers.
"""
from __future__ import division

__all__ = ['line_search', 'IterationError', 'NoDescentError']

import math

import numpy as np
import scipy as sp

import scipy.optimize

from mmf.solve.solver_interface import IterationError, Mesg

_FINFO = np.finfo(float)
_TINY = _FINFO.tiny
_MESGS = Mesg()

[docs]class NoDescentError(IterationError):
    """No downhill step could be found."""

def exact_line_search(g, g0, g1=None,  mesgs=_MESGS, max_steps=500,
                      rel_tol=1e-5):
    r"""Return `(l, g(l))` where `g(l)` is minimized."""
    l = sp.optimize.brent(g, brack=(0, 1), tol=rel_tol,
                      maxiter=max_steps)
    
    return (l, g(l))

[docs]def line_search(g, g0, g1=None,
                trust_dg=False,
                allow_backstep=True,
                mesgs=_MESGS,
                max_steps=10,
                avg_descent_factor=1e-4, min_step=1e-5,
                min_scale_factor=0.1, max_scale_factor=0.5,
                use_cubic_factor=0.5,
                allow_bad_step=False):
    r"""Return `(l, g(l))` where `g(l)` decreases sufficiently.

    The assumption idea here is that one is trying to minimize a
    function `g(x)` where `x = x0 + l*dx` and `dx` is known to be
    a local downhill direction.  Taking a large step `l=1` could
    work well if the function is linear, but if the function
    fluctuates on length scales comparable to `|dx|`, then one
    needs to take a shorter step.

    The step size is monotonically reduced and a cubic
    interpolation scheme is used to find the optimal step using a
    minimal number of steps.

    The standard line search algorithm can use this with `g(l) =
    g(x0 + l*dx)` but more general search algorithms can also be
    used.

    Parameters
    ----------
    g : function
       This should return the objective function `g(l)` where `l`
       is the scale parameter.  `l` will monotonically decrease
       from `1`.
    g0 : float
       Value of objective `g(0)` at `l=0`.
    g1 : float (optional)
       Value of objective `g(1)` at `l=1`.
    trust_dg : bool, optional
       If `True`, then we assume that the model for `g(l)` is correct
       (see below) and use $g'(0) = -2g(0)$ in order to perform
       extrapolations for the optimal `l`.  This will ensure `l` is
       always positive.  Otherwise, a quadratic approximation might be
       used which allows for negative `l`.
    allow_backstep : bool, optional
       If `True` (default), then take the second step in the negative
       direction if not `trust_dg`.  This may scale the problem in an
       inefficient manner, so we provide this option to suppress
       backsteps until some information is build up about the function.
    mesgs : IMesg
       Messages are displayed using this.
    allow_bad_step : bool, optional
       If `True`, then accept bad steps (i.e. do nothing other than
       compute new ``g(1)``.)

    Other Parameters
    ----------------
    avg_descent_factor : float
       A successful step is one which decreases the objective
       function by at least this relative amount::

          g < - g0*(1 - l*(2-l)*avg_descent_factor)

       where we take `l` to be positive here.  This prevents infinite
       loops.

       .. note:: This form is predicated on the assumption that
          .. math::
             g(l) = \norm{G(x_0 + l \d{x})}^2

          where $\d{x} = -\mat{J}^{-1}G(x_0)$ is the Newton step.  If
          this is the case, then for a linear function $G(x_0)$ (or
          for sufficiently small `l`),

          .. math::
             g(l) = g(0)(1-l)^2
             
        This should still work well with the relaxes assumptions that:
          1) `g` is non-negative with an ultimate goal of `g=0`.
          2) That in the linear regime for small `l` one has:
             .. math::
                g_l \approx g_0(1 - 2l).
                
    min_step : float
       This is the minimum value of `l` to try before giving up.
    min_scale_factor, max_scale_factor : float
       `l` is reduced by a fraction between `min_scale_factor` and
       `max_scale_factor`.  The lower scale prevents excessively small
       steps while the upper scale ensures termination in a reasonable
       number of steps.  If the function is not a minimum and the
       `-2*g0` is a good approximation of the derivative, then
       arbitrarily small steps will always be accepted.  See the
       Examples below for a demonstration.
    max_steps : int
       Maximum number of line-search steps to take.  Raises
       :exc:`NoDescentError` if a descent direction has not been found
       after this many steps.
    use_cubic_factor : float
       If `trust_dg` is `False`, then we use a quadratic
       interpolation to estimate $g'(0)$.  If the relative error in
       this estimate is less than `use_cubic_factor`, then we use the
       cubic interpolation.
       
    Raises
    ------
    :class:`NoDescentError`
       If a step cannot be found because function does not decrease 
       fast enough, or step size is too small.

    Examples
    --------
    Here is a bad problem.  The initial step is 500 but a step of
    about 1 is required, so `l` is about `1/500 = 0.002`. 

    >>> f = lambda x: x**2 - 1
    >>> df = lambda x: 2*x
    >>> x0 = 0.001
    >>> dx = - f(x0)/df(x0)
    >>> def g(l):
    ...     print l
    ...     return abs(f(x0 + l*dx))**2

    >>> l, g_l = line_search(g, g0=g(0), mesgs=_MESGS) # doctest: +ELLIPSIS
    0
    1
    -0.5
    -0.166...
    -0.076...
    -0.033...
    -0.014...
    -0.006...
    -0.002...
    -0.001...

    There are quite a few steps here because `l` must become small.
    The number of steps can be reduced by reducing the
    `*_scale_factor` parameters, but this runs the risk of
    overshooting and producing extremely small steps.  (If the
    Jacobian is correct, then small enough steps will always be
    accepted unless at a local minimum).

    Note that one can still get hung up at local extrema if not
    sufficiently close to the solution.  In this case, the program
    should raise an exception.
    
    >>> f = lambda x: (x - 3)*(x*x + 1)
    >>> df = lambda x: 2*x**2 - 6*x
    >>> x0 = 1 - 6**(1/2)/3
    >>> dx = 0.1                        # Correct direction
    >>> def g(l):
    ...     return abs(f(x0 + l*dx))**2

    >>> l, g_l = line_search(g, g0=g(0), mesgs=_MESGS) # doctest: +ELLIPSIS
    Traceback (most recent call last):
       ...
    NoDescentError: Minimum step-size reached l = ... < 1e-05, err = 2.9...
    Perhaps a local minimum has been found?

    Notes
    -----
    If the functional form is justified, then we know the slope of
    $g(0) = -2g(0)$ so we can use two points to fit a cubic polynomial
    in order to find the optimal $l$:

    .. math::
       g(l) = a_3 l^3 + b_3 l^2 - 2 g(0) l + g(0).

    If we only have one point, then we set $a_3=0$.

    If the functional form is not justified (for example, during a
    Broyden algorithm where the inverse Jacobian is approximate or
    incorrect) then this may fail because the slope $g'(0)$ is
    incorrect (possibly even with an incorrect sign).  In this case,
    we fit a quadratic to the previous two points:

    .. math::
       g(l) = a_3 l^2 + b_2 l + g(0) 

    A test is made to see if $b_2 \approx -2g(0)$.  If so, then the
    cubic approximation is used, otherwise the quadratic approximation
    is assume (which may allow $l < 0$).
    """
    # Vars
    # l, g_l: Current step and g(l)
    # l_, g_: Prev step and g(l_)

    assert(min_scale_factor <= max_scale_factor)
    
    l = 1                           # Current step
    l_ = 1                          # Prev step
    g_l = None                      # Current g(l)
    g_ = None                       # Prev g(l_)

    if g0 == 0:
        return 0.0, g0

    steps = 0
    while True:
        if l == 1 and g1 is not None:
            g_l = g1
        else:
            try:
                g_l = g(l)                  # Current g(l)
            except:
                if l < 0:
                    # Try positive step
                    l = -l
                    continue
                else:
                    raise
            if not np.isfinite(g_l):
                if l < 0:
                    # Try positive step
                    l = -l
                    continue
                
        steps += 1

        mesgs.iter("Line search...  l, old_err, new_err: %g, %g, %g"%(
            l, math.sqrt(g0), math.sqrt(g_l)))

        if g_l <= g0 - avg_descent_factor*abs(l)*g0 or allow_bad_step:
            break
        elif abs(l) <= min_step:
            msg = ('\n'.join([
                "Minimum step-size reached l = %g < %g, err = %g"
                % (abs(l), min_step, np.sqrt(g_l)),
                "Perhaps a local minimum has been found?"]))
            mesgs.error(msg)
            raise NoDescentError(msg)
        elif max_steps <= steps:
            msg = ("Maximum line-search steps ls_max_steps=%i reached, err = %g"
                   % (max_steps, np.sqrt(g_l)))
            mesgs.error(msg)
            raise NoDescentError(msg)            
        elif 1 == l:
            # First time not good enough: Try back-stepping.
            if trust_dg:
                # If we trust g'(0) = -2g(0), then we use a quadratic
                # interpolation:
                l_temp = 1/(1 + g_l/(g0 + _TINY))
            elif allow_backstep:
                # Otherwise, we try a negative step
                l_temp = -l*max_scale_factor
            else:
                # Otherwise, positive step:
                l_temp = l*max_scale_factor
        else:
            # Even the second attempt was not good enough: use a
            # quadratic or cubic interpolation about l=0 to try and
            # optimize step size.

            # First we try the cubic interpolation where we know the
            # initial position g(0) = g0 and trust the initial
            # gradient g'(0) = -2*g0.
            #
            # y = a3*l^3 + b3*l^2 - 2*l*g0 + g0
            #
            # [l1**3, l1**2] [a3] = [g1 + 2*l1*g0 - g0]
            # [l2**3, l2**2] [b3]   [g2 + 2*l2*g0 - g0]
            t = np.array([[l**3, l**2],
                          [l_**3, l_**2]])
            a3, b3 = np.linalg.solve(t, [g_l - (2*l + 1)*g0,
                                         g_ - (2*l_ + 1)*g0])

            if not trust_dg:
                # Try the quadratic interpolation
                # y = a2*l^2 + b2*l + g0
                #
                # [l1**2, l1] [a2] = [g1 - g0]
                # [l2**2, l2] [b2] = [g2 - g0]
                #
                t = np.array([[l**2, l],
                              [l_**2, l_]])
                a2, b2 = np.linalg.solve(t, [g_l - g0,
                                             g_ - g0])
                
            # Find minimum at root g' = 0
            if trust_dg or abs((b2 + 2*g0)/g0) < 0*use_cubic_factor:
                # Use cubic interpolation
                # y = 3*a*step^2 + 2*b*step - 2*g0  = 0
                # step_temp = (-b + sqrt(b^2 + 6*a*g0))/(3*a)
                if 0 == a3:
                    l_temp = g0/b3
                else:
                    discriminant = b3*b3 + 6*a3*g0
                    if discriminant < 0:
                        l_temp = abs(l)*min_scale_factor
                    elif a3 < 0:
                        l_temp = (-b3 + math.sqrt(discriminant))/3/a3
                    else:
                        l_temp = 2*g0/(b3 + math.sqrt(discriminant))
            else:
                # Use quadratic interpolation
                if a2 <= 0:
                    # Quadratic has wrong shape.  Step in opposite
                    # direction.
                    l_temp = -l*max_scale_factor
                else:
                    l_temp = -b2/2/a2

        l_ = l
        g_ = g_l
        l_sign = np.sign(l_temp)
        l_temp = min(abs(l)*max_scale_factor, abs(l_temp))
        l = l_sign*max(abs(l)*min_scale_factor, l_temp)

    return l, g_l
Navigation

Quick search

Source code for mmf.solve.solver_utils

Navigation