Source code for mmf.solve.broyden_solvers

r"""Iterative solvers based on the Broyden method.

This module contains methods for solving root-finding problems with
various Broyden algorithms.

The problem is expressed as `x = F(x)` where the Jacobian of `F(x)`
close to the solution should be close to zero (in other words,
the iteration should be as close to convergent as possible) because the
Broyden iterations always start with the approximation of an identity
`J`.

.. todo:: Check usage of `max_step`.  This is now administered in
   :meth:`Solver.solve` and should accept a vector.

"""
from __future__ import division

from copy import copy

import numpy
np = numpy

from mmf.objects import process_vars
from mmf.objects import Required, Delegate, Excluded, Computed, Deleted

import broyden
import solver_interface, solver_utils
from solver_interface import Mesg

from mmf.interfaces import implements

_ABS_TOL = 1e-12
_REL_TOL = 1e-12

_FINFO = np.finfo(float)
_TINY = _FINFO.tiny

_MESGS = Mesg()       # Default object.  Should generally not be used.
_DEBUG = False

[docs]class BroydenData(solver_interface.SolverData):
    """Container for Broyden data.
    
    The core for this method is defined in
    :class:`mmf.solve.JinvBroyden` and a proxy object is used to
    maintain the state and compute the updates.
    """
    implements(solver_interface.ISolverData)
    _state_vars = [
        ('x0', None, "Previous value of `x`"),
        ('g0', None, "Previous value of `g = x - F(x)`"),
        ('skip_0', None, "Previous skipped parameters"),
        ('x_good_0', None,
            r"""Previous good step at which some parameter became
            bad in the sense that the change in the errors was
            no-longer small.  This is used as the reference point for
            updating the Jacobian of sensitive parameters."""),
        ('g_good_0', None, "`g = x - F(x)` at `x_good_0`"),
        ('x_good_1', None, "Current good step (see :attr:`x_good_0`"),
        ('g_good_1', None, "`g = x - F(x)` at `x_good_1`"),
        ('skip_good', None,
            r"""These are the parameters that were bad at the time
            that `x_good` was saved or that have gone bad since."""),
        ('j_inv_F', Delegate(broyden.JinvBroyden,
                             ['dyadic', 'x', 'g', 'n_prev'])),
        ('old_err', np.inf, "Previous absolute error."),
        ('dyadic=j_inv_F.dyadic', True),
        ('inv_jacobian_err', np.inf,
            r"""This flag is used to indicate the quality of the
            Jacobian.  It represents a measure of the relative change
            in the Jacobian inverse from the previous step.  If this
            is small (sufficiently less than 1), then the
            approximation is probably reasonable for determining a
            step.  See :meth:`mmf.solve.broyden.JinvBroyden.update` for
            details.""")
        ]
    process_vars()
    def change_basis(self, f, f_inv_t=None):
        """Use the function `x_new = f(x)` to update the data
        corresponding to a change of basis.  Raise an
        :except:`AttributeError` if the change cannot be implemented
        (or don't define this method.)

        Parameters
        ----------
        f : function
           Change of basis transformation (should be essentially
           matrix multiplication, but for large bases one might want
           to prevent forming this matrix explicitly.)
        f_inv_t : function, optional
           Transpose of the inverse of the function.  If this is
           provided, then it should be used to transform the bra's,
           otherwise, use `f`.  *Presently ignored!*
        """
        if self.x0 is not None:
            self.x0 = f(self.x0)
        self.g0 = None                   # This should be recalculated.
        self.j_inv_F.change_basis(f)
        
class _Broyden(solver_interface.Solver):
    """Common interface for Broyden solvers.

    Sensitive Parameters
    --------------------
    Sensitive parameters are those for which convergence of the other
    parameters is required before the objective function corresponding to
    these parameters is trustworthy.

    To deal with these, we hold these fixed, performing a regular
    search in the space of the parameters until the change in the
    errors is small enough.  This criterion is determined by
    :meth:`_get_skip_inds` which returns the indices that should be
    skipped.

    Iteration proceeds in the sector without sensitive parameters
    until the errors stabilize and non of the sensitive parameters
    should be skipped.  This state is stored as a "good" state `x_good_1`.

    From this state a line-search is made including the sensitive
    parameters until a suitable step is made that changes the
    sensitive parameters.  One question here is how should the
    Jacobian be updated.  We are presently taking the strategy that
    if a reasonable approximation to the Jacobian at this good point
    is found, the non-sensitive sector will still be okay (the
    potential problem is that this block will be messed up during this
    search).

    Typically, after this step, some of the parameters will no longer
    be good.  If this is the case and there was a previous good state
    `x_good_0`, then the difference between this and `x_good_1` is
    used to do an update to correct the global structure of the
    Jacobian.

    Once this is done, we repeat, starting in the sector of
    non-sensitive parameters.
    """
    _state_vars = [
        ('bad_step_factor', 10,\
             """If the error increases by more than a factor  of
             :attr:`bad_step_factor` from the previous step, then a
             weighted step is used.  The default is a large value so
             this does not happen much.  If you know that the weighted
             step will be in the correct direction, then make this
             smaller.  This must be greater than 1 otherwise the
             method may loop forever.
             """),
        ('weight', 0.5,\
             """If the broyden step fails, then the new step is `x -
             w*G(x)`."""),
        ('ls_avg_descent_factor', 1e-4, 
            r"""To ensure that we terminate, the line search algorithm
            requires that each step descend with at least this slope,
            otherwise the routine will terminate with an error
            indicating that a local minimum was found rather than a
            root."""),
        ('ls_min_scale_factor', 0.1,
            r"""Minimum factor by which the step size is reduced in
            the line search.  See :func:`solver_utils.line_search`."""),
        ('ls_max_scale_factor', 0.5,
            r"""Minimum factor by which the step size is reduced in
            the line search.  See
            :func:`solver_utils.line_search`."""),
        ('ls_linear', True,
            r"""If `True` then the line searches will be along a
            line.  This ensures that bad updates to the inverse
            Jacobian are completely overwritten, but if the initial
            direction is not downhill, then this can fail.  As a
            recourse, if the linear search fails, then one pass with
            the non-linear search is attempted to see if the Jacobian
            can be compensated.  See also :attr:`ls_linear_min_step`.

            If `False`, then new directions are used at each step as
            the inverse Jacobian is updated.  Hopefully this will lead
            to an improved step that is ultimately downhill.
            Previous bad updates to the inverse Jacobian may not be
            completely erased."""),
        ('ls_allow_backstep', True,
            r"""If `True`, then the second line search step (if the
            first step fails) will be in the negative direction.  This
            may scale the problem inappropriately (especially if a
            monotonic decrease in a control parameter is desired), in
            which case one should set this to `False`."""),
        ('ls_min_step', 1e-12,
            r"""To ensure termination, if the relative step size
            becomes smaller than this, then the routine will terminate
            with an error."""),
        ('ls_linear_min_step', None,
            r"""If defined, then this will be the bailout criterion
            for the linear line-search algorithm.  This allows for a
            much earlier bailout from the linear search leaving the
            high-precision test and ultimate bailout for the
            subsequent non-linear search.

            In general, this should be somewhat larger than
            :attr:`ls_min_step`.  If it is not specified, then it is a
            factor of 100 times larger than :attr:`ls_min_step`."""),
        ('ls_strategy', 'default',
            r"""Choose the line-search strategy.  See :meth:`iterate`
            for possibilities."""),
        ('ls_n_bad_steps', 10,
             r"""Maximum number of bad steps to try during a
            line-search phase before giving up."""),
        ('ls_allow_bad_steps', False,
             r"""Set to allow the line search to allow bad steps.
            Used mainly for debugging, not for production use."""),
        ('max_weighted_step', np.inf,
             """Maximum length of the weighted steps (also respects
             :attr:`max_step`).  If this is zero, then weighted steps
             are never used (except for the first step)."""),
        ('try_weighted_step', True,
             """If this is `True`, then a weighed step will be
             attempted if the other steps fail.  This should be `True`
             if the iteration is naturally somewhat convergent
             (i.e. if the objective function has the form `G(x) = x -
             F(x)` where the iteration `x -> F(x)` is somewhat convergent) and
             should be `False` if the objective function is unrelated
             to the steps."""),
        ('initial_recession_factor', 0.2,\
             """When the error increases by more than
             :attr:`bad_step_factor`, then the step is regressed
             toward the previous step by this factor.  The regression
             factor is then further reduced by
             :attr:`regression_factor`."""),
        ('recession_factor', 0.5,\
             """When the error increases by more than
             :attr:`bad_step_factor`, then the step is regressed
             toward the previous step by this factor until the error
             is suitably reduced."""),
        ('j_inv_incompatibility', 0.1,
             r"""Maximum incompatibility for an update of the inverse
             Jacobian to be accepted.  See :meth:`set_point`."""),
        ('solver_data0', Delegate(BroydenData, ['dyadic', 'n_prev'])),
        ('solver_data0.dyadic', 10),
        ('solver_data0.n_prev', 10),
        ('reset_Jinv', Excluded(False),\
             """If `True`, then on the next iteration, reset `Jinv`.
             Only works once.  This can be used in
             :attr:`solver_interface.IProblem.schedule` to force the
             Jacobian to be reset."""),
        ('ls_max', 10,
            """Maximum number of steps allowed during line search.  If
            the standard iteration fails to improve the solution, then
            a line search may be used to find the local minimum in the
            search direction.  This can be used to limit or disable
            this."""),
        ('sensitive_rel_tol', 1e-1,
            r"""Sensitive parameters are only adjusted significantly if
            the relative change in the error of those components is
            less than this."""),
        ('sensitive_abs_tol', 1e-2,
            r"""Sensitive parameters are only adjusted significantly if
            the absolute change in the error of those components is
            less than this."""),
        ('Jinv_tol', 1e-2,
            r"""This is the tolerance required on the relative change
            of the predicted step in order to ascertain that the
            inverse Jacobian is accurate.  See
            :meth:`_line_search_d`."""),        
        ]
    process_vars()
    def __init__(self, *v, **kw):
        if (0 >= self.initial_recession_factor or
            1 <= self.initial_recession_factor):
            raise ValueError("initial_recession_factor must be in (0,1).")
        if (0 >= self.recession_factor or
            1 <= self.recession_factor):
            raise ValueError("recession_factor must be in (0,1).")

        solver_interface.Solver.__init__(self, *v, **kw)
        
    def _correct_step(self, dx, max_dx=None):
        """Return a step that satisfies the constraints `max_dx`."""
        if max_dx is None:
            max_dx = self.max_step
            
        length = np.linalg.norm(dx)
        if length > max_dx:
            dx *= max_dx/length
        return dx

    def set_point(self, j_inv, x, g, skip_inds=None,
                  force=False):
        r"""Update the inverse Jacobian if the step is suitable
        compatible and set the current point to `(x,g)`."""
        j_inv.set_point(x=x, g=g,
                        skip_inds=skip_inds,
                        include_skip=False,
                        mags=abs(g))
        
    def line_search_broyden(self, x, x0, F0, # x1, x0, F0, problem,
                            problem, solver_data, mesgs,
                            min_step=None, skip_inds=None,
                            extrapolate=None):
        r"""Return `(x1, F1)` corresponding to a pure broyden step.

        This version always takes the full broyden step `x` irrespective
        of whether of not the step improves the solution.
        """
        j_inv = solver_data.j_inv_F
        G0 = x0 - F0
        j_inv.update(x0=x0, g0=G0)
        if extrapolate is None:
            def extrapolate(x_controls):
                return j_inv.get_x(x_controls=x_controls,
                                   min_step=self.min_abs_step,
                                   max_step=self.max_step)
            
        if skip_inds is None:
            skip_inds = np.array([], dtype=int)

        (x1, _G1, _dx1, F1) = self.problem_step(
            problem=problem, solver_data=solver_data,
            x=x, x0=x0, G0=None, dx0=None, F0=F0,
            compute_F=True, extrapolate=extrapolate,
            iter_data=solver_data.iter_data, abs_tol=self.G_tol,
            mesgs=mesgs)

        self.set_point(j_inv=solver_data.j_inv_F,
                       x=x1, g=x1 - F1, skip_inds=skip_inds)

        return x1, F1

    def line_search_jacobian(self, x, x0, F0, # x1, x0, F0, problem,
                             problem, solver_data, min_step,
                             mesgs, skip_inds=None, extrapolate=None,
                             ):
        r"""Return `(x1, F1)`.

        This version searches along a line until a good update to the
        Jacobian is found along this direction.  By restricting to a
        linear search, poor updates to the Jacobian are completely
        corrected.  Once a good update for the Jacobian is found, the
        recommended step is `(x1 + x0)/2`, half-way along the path
        towards this point so that the new point has a good
        approximation (the finite-difference approximation for the
        derivative at this new point has the properties of a centered
        difference).  This may take the function uphill, but should
        leave the iteration at a point where the Jacobian is a better
        approximation.
        """
        j_inv = solver_data.j_inv_F
        G0 = x0 - F0

        x1, F1 = self.line_search_broyden(
            x=x, x0=x0, F0=F0, # x1, x0, F0, problem,
            problem=problem, solver_data=solver_data,
            mesgs=mesgs, skip_inds=skip_inds,
            extrapolate=extrapolate)
        l = 1
        dx = x1 - x0
        ndx = np.linalg.norm(dx)
        min_l = min_step/ndx
        
        while True:
            G1 = x1 - F1

            incompatibility = j_inv.update(
                x=x0 + l*dx, g=G1, x0=x0, g0=G0,
                skip_inds=skip_inds, mags=abs(F1) + abs(x1))
            if incompatibility <= self.j_inv_incompatibility:
                break
            
            l /= 2
            if l < min_l:
                raise solver_utils.NoDescentError(
                    """Could not find a good to updating Jacbian with
                    step size greater than min_step = %g""" % (min_step,))

            (x1, _G1, _dx1, F1) = self.problem_step(
                problem=problem, solver_data=solver_data,
                x=x0 + l*dx, x0=x0, G0=None, dx0=None, F0=F0,
                compute_F=True, extrapolate=None,
                abs_tol=self.G_tol, mesgs=mesgs)
            
        return x1, F1

    def line_search_linear(self, x, x0, F0, # x1, x0, F0, problem,
                  problem, solver_data, min_step,
                  mesgs, skip_inds=None, extrapolate=None):
        r"""Return `(x1, F1)` where the step size diminishes sufficiently
        to ensure convergence.  If a good step is not found, then an
        :exc:`solver_utils.NoDescentError` exception is raised.
        """
        G0 = x0 - F0
        dx = x - x0
        dx_norm = np.linalg.norm(dx)
        x_F_new = [None, None]
        g0 = np.linalg.norm(G0/problem.G_scale, ord=self.norm_p)**2
        if g0 == 0:
            return x0, F0
        
        def g(l, dx=dx):
            """Function passed to line search algorithm."""
            (x1_, _G1, _dx1, F1_) = self.problem_step(
                problem=problem, solver_data=solver_data,                
                x=x0 + l*dx, x0=x0, G0=None, dx0=None, F0=F0,
                compute_F=True, extrapolate=extrapolate,
                abs_tol=self.G_tol, mesgs=mesgs)

            x_F_new[:] = x1_, F1_
            G_ = x1_ - F1_
            g_ = np.linalg.norm(G_/problem.G_scale, ord=self.norm_p)**2
            return g_

        l, g_l = solver_utils.line_search(
            g=g, g0=g0,
            avg_descent_factor=self.ls_avg_descent_factor,
            min_step=min_step,
            min_scale_factor=self.ls_min_scale_factor,
            max_scale_factor=self.ls_max_scale_factor,
            max_steps=self.ls_max,
            allow_backstep=self.ls_allow_backstep,
            mesgs=mesgs)

        x1, F1 = x_F_new
        return x1, F1

    def line_search_non_linear(self, x, x0, F0, # x1, x0, F0, problem,
                               problem, solver_data, min_step,
                               mesgs, skip_inds=None, extrapolate=None):
        r"""Return `(x1, F1)` where the step size diminishes sufficiently
        to ensure convergence.  If a good step is not found, then an
        :exc:`solver_utils.NoDescentError` exception is raised.

        This is similar to the Linear search, but a new step is
        computed with each update to the Jacobian and the search is
        performed along this new direction.  This has the advantage of
        being able to find a good step even if the original direction
        was not down-hill, but has the disadvantage that if the steps
        are too large, then the Jacobian might be "polluted".  It is a
        good idea to use this routine if :meth:`ls_linear` has failed,
        but to start with a small step to prevent polluting the
        Jacobian.
        """
        G0 = x0 - F0
        #dx = x - x0
        #dx_norm = np.linalg.norm(dx)
        x_F_new = [None, None]
        g0 = np.linalg.norm(G0/problem.G_scale, ord=self.norm_p)**2
        if g0 == 0:
            return x0, F0

        def g(l):
            """Function passed to line search algorithm."""
            dx = solver_data.j_inv_F.get_x(
                min_step=self.min_abs_step,
                max_step=self.max_step) - x0
            #dx *= dx_norm/np.max(dx_norm, np.linalg.norm(dx))
            
            (x1_, _G1, _dx1, F1_) = self.problem_step(
                problem=problem, solver_data=solver_data,                
                x=x0 + l*dx, x0=x0, G0=None, dx0=None, F0=F0,
                compute_F=True, extrapolate=extrapolate,
                abs_tol=self.G_tol, mesgs=mesgs)

            x_F_new[:] = x1_, F1_
            G1_ = x1_ - F1_

            solver_data.j_inv_F.add_point(x=x1_, g=G1_)
            solver_data.j_inv_F.update(
                x0=x0, g0=G0,
                skip_inds=skip_inds,
                mags=(abs(x0) + abs(x1_) + abs(F0) + abs(F1_)))

            g_ = np.linalg.norm(G1_/problem.G_scale, ord=self.norm_p)**2
            return g_

        l, g_l = solver_utils.line_search(
            g=g, g0=g0,
            avg_descent_factor=self.ls_avg_descent_factor,
            min_step=min_step,
            min_scale_factor=self.ls_min_scale_factor,
            max_scale_factor=self.ls_max_scale_factor,
            max_steps=self.ls_max,
            allow_backstep=self.ls_allow_backstep,
            mesgs=mesgs)

        x1, F1 = x_F_new
        return x1, F1

    def line_search(self, x, x0, F0, # x1, x0, F0, problem,
                    problem, solver_data, min_step,
                    mesgs, skip_inds=None, extrapolate=None,
                    allow_bad_step=False):
        r"""Return `(x1, F1)` where the step size diminishes sufficiently
        to ensure convergence.  If a good step is not found, then an
        :exc:`solver_utils.NoDescentError` exception is raised.

        This uses a combination of the linear and non-linear
        algorithms.  First a linear search is performed until the
        updates to the jacobian are consistent, then it switches to
        the non-linear search which selects a new downhill direction.

        Parameters
        ----------
        skip_inds : None or array
           Indices of parameters to hold fixed.  If the problem
           contains sensitive parameters as specified in 
           `problem.state0.sensitive`, then this must not be `None`.
        """
        G0 = x0 - F0
        dx = x - x0        # This needs to be mutated as it is used in
                           # extrapolation functions.
    
        if skip_inds is not None and 0 < len(skip_inds):
            x_controls = (skip_inds, [x0[skip_inds]])
            dx[skip_inds] = 0.0
            G0[skip_inds] = 0.0
        else:
            x_controls = None

        dx_norm = np.linalg.norm(dx)
        x_F_new = [None, None]
        g0 = np.linalg.norm(G0/problem.G_scale, ord=self.norm_p)**2
        if g0 == 0:
            return x0, F0

        d_ = {'incompatibility': np.inf} # Dictionary for persistent
                                         # results
                                        
        j_inv = solver_data.j_inv_F

        extrapolate_J, extrapolate_dx = self._get_extrapolate(
            x0=x0, G0=G0, j_inv=j_inv, dx =dx)

        def g(l):
            """Function passed to line search algorithm."""
            if d_['incompatibility'] <= self.j_inv_incompatibility:
                dx[:] = j_inv.get_x(x_controls=x_controls,
                                    min_step=self.min_abs_step,
                                    max_step=self.max_step) - x0
                l = abs(l)
                extrapolate = extrapolate_J
            else:
                extrapolate = extrapolate_dx
                
            (x1_, _G1, _dx1, F1_) = self.problem_step(
                problem=problem, solver_data=solver_data,                
                x=x0 + l*dx, x0=x0, G0=None, dx0=None, F0=F0,
                compute_F=True, extrapolate=extrapolate,
                abs_tol=self.G_tol, mesgs=mesgs)

            x_F_new[:] = x1_, F1_
            G1_ = x1_ - F1_

            #dx_ = x1_ - x0
            #dg_ = G1_ - G0
            
            
            d_['incompatibility'] = j_inv.incompatibility(
                x=x1_, g=G1_, x0=x0, g0=G0)
            j_inv.add_point(x=x1_, g=G1_)
            j_inv.update(x0=x0, g0=G0,
                         skip_inds=skip_inds,
                         mags=(abs(x0) + abs(x1_) + abs(F0) + abs(F1_)))

            G1_[skip_inds] = 0.0
            g_ = np.linalg.norm(G1_/problem.G_scale, ord=self.norm_p)**2
            return g_

        analyze = False                 # **** Change This
        if analyze:
            def set_dx():
                dx[:] = j_inv.get_x(min_step=self.min_abs_step,
                                    max_step=self.max_step) - x0
            @np.vectorize
            def f(l, ord=self.norm_p):
                (x1_, _G1, _dx1, F1_) = problem.step(
                    problem=problem, solver_data=solver_data,
                    x=x0 + l*dx, x0=x0, G0=None, dx0=None, F0=F0,
                    compute_F=True, extrapolate=extrapolate,
                    abs_tol=self.G_tol, mesgs=mesgs)
                
                x_F_new[:] = x1_, F1_
                G1_ = x1_ - F1_
                
                print j_inv.update(
                    x=x1_, g=G1_, skip_inds=skip_inds,
                    mags=(abs(x0) + abs(x1_) + abs(F0) + abs(F1_)))
                d_['dg_'] = G1_ - G0
                d_['dx_'] = x1_ - x0

                g_ = np.linalg.norm(G1_/problem.G_scale, ord=ord)**2
                return g_
            import pylab as plt
            ls = np.linspace(-2,2,100)
            ls1 = np.linspace(-0.2,0.2,100)
            def pl(long=False, ord=2):
                _f = plt.gcf()
                plt.figure(2)
                plt.plot(ls, g0 + (ls*ls - 2*ls)*g0, ':')
                if long:
                    plt.plot(ls, f(ls, ord))
                    plt.axis([-2,2,0,5*g0])
                else:
                    plt.plot(ls1, f(ls1, ord))
                    plt.axis([-0.2,0.2,0,3*g0])
                plt.figure(_f.number)

            def get_J():
                J = problem._J(x0, lambda x: x - problem.F(x))
                B = np.linalg.inv(J)
                B0 = solver_data.j_inv_F.j_inv.asarray()
                return B, B0
            #import pdb;pdb.set_trace()
                
        l, g_l = solver_utils.line_search(
            g=g, g0=g0,
            avg_descent_factor=self.ls_avg_descent_factor,
            min_step=min_step,
            min_scale_factor=self.ls_min_scale_factor,
            max_scale_factor=self.ls_max_scale_factor,
            max_steps=self.ls_max,
            allow_backstep=self.ls_allow_backstep,
            allow_bad_step=allow_bad_step,
            mesgs=mesgs)

        x1, F1 = x_F_new
        return x1, F1

    def _get_extrapolate(self, x0, G0, j_inv, dx):
        """Return `(extrapolate_J, extrapolate_dx)` where each is an
        appropriate extrapolation function.

        Parameters
        ----------
        x0, G0 : array
           Original point and `G0 = G(x0)` to extrapolate from.
        j_inv : matrix-like
           Inverse Jacobian.  Calls `j_inv.get_x()`.
        dx : array
           Desired step used by simple extrapolation.
           
        Returns
        -------
        extrapolate_J : function
           Jacobian based extrapolation
        extrapolate_dx : function
           Simple extrapolation (no Jacobian used).
        """
        def extrapolate_J(x_controls):
            """Use Jacobian to extrapolate."""
            return j_inv.get_x(x_controls=x_controls,
                               min_step=self.min_abs_step,
                               max_step=self.max_step)

        def extrapolate_dx(x_controls):
            """Default extrapolation if Jacobian is not good."""
            return self._extrapolate(x=x0 + dx,
                                     x0=x0, x_controls=x_controls)

        return extrapolate_J, extrapolate_dx
    def _line_search(self):
        r"""Perform line search.

        There are several strategies for line-searches available here.

        A couple of generic options are provided controlling how the
        line-search is performed.  In particular, one must determine
        how the backtracking is performed.  Options include:

        1) *Constant reduction.*  The step-length `l` is decreased by
           a constant factor `-1 < c < 1`.  Negative factors allow for
           steps in the opposite direction to be attempted.
        2) *Cubic fit.*  Here we fit a cubic polynomial through
           function $g(l) = \norm{G(x+l\delta_x)}^2$ and extrapolate
           to find the minimum.  An option is to assume that $\delta_x
           = -\mat{J}^{-1}G(x)$ which fixes the slope $g'(0) =
           -2g(0)$ (thereby requiring only previous two points), or to
           use the previous three points.  Parameters must be provided
           to limit the step size to prevent `l` from decreasing too
           slowly (spending too much time in the line search) or too
           quickly (resulting in extremely small step sizes).
        """

    def _get_skip_inds(self, problem, solver_data):
        """Return `skip_inds`.

        Returns
        -------
        skip_inds : list
           The indices that should be skipped for this iteration.
        x1, F1 : array
           Function evaluated at `F1 = F(x1)`.
        """
        x0 = solver_data.x0
        g0 = solver_data.g0
        x1 = solver_data.x
        g1 = solver_data.g

        sensitive_inds = np.asarray(problem.state0.sensitive, dtype=int)

        if g0 is None or g1 is None:
            # Need at least two steps before we vary sensitive parameters
            skip_inds = sensitive_inds
        else:
            err0_s = np.abs(g0[sensitive_inds])
            err1_s = np.abs(g1[sensitive_inds])
            skip_inds = sensitive_inds[
                np.where(abs(err1_s - err0_s)/abs(err1_s)
                         >= self.sensitive_rel_tol)]

            abs_err = copy(g1)
            abs_err[sensitive_inds] = 0
            rel_err = abs_err/(abs(x1) + _TINY)
            err = np.minimum(abs_err/self.sensitive_abs_tol,
                             rel_err/self.sensitive_rel_tol)

            if err.max() < 1:
                # Things have started to converge.  Only skip bad parameters.
                pass
            else:
                skip_inds = sensitive_inds

        #print skip_inds, x1[-1]
        return skip_inds
        
    def iterate(self, x, problem, solver_data,
                x0=None, G0=None, dx0=None, F0=None,
                mesgs=_MESGS):
        r"""Perform a single iteration and return the triplet
        `(x_new, (x, G, dx, F))`.

        The line-search strategy is dictated by :attr:`ls_strategy`
        which may take on one of the following values.

        'pure_broyden' : Perform pure Broyden steps irrespective of
           whether or not they increase the error.  Termination occurs
           if more than :attr:`ls_n_bad_step` bad steps are taken.
        '1' :

        Notes
        -----
        In order to enjoy the good convergence properties of the
        Broyden methods, we first try a full Broyden step (unless 
        The big concern with Broyden methods is that the approximate
        Jacobian inverse may be inaccurate.  To deal with this, we use
        :meth:`_line_search_J` which does a modified line search until
        a reasonable approximation of the Jacobian is obtained.
        """
        #import pdb;pdb.set_trace()

        G1 = dx1 = None

        if self.reset_Jinv:
            solver_data.j_inv_F.reset()
            self.reset_Jinv = False
        
        skip_inds = np.asarray(problem.state0.sensitive, dtype=int)
        if (solver_data.x is None or solver_data.g is None):
            # First iteration: Here we first make an evaluation, and
            # then perform a line-search to update the Jacobian.
            
            # We use self._extrapolate as we have no base point yet.
            def extrapolate(xc):
                return self._extrapolate(x=x, x0=None, x_controls=xc)

            x0, F0 = self._problem_step(
                x, x0=None, F0=None,
                problem=problem, solver_data=solver_data,
                extrapolate=extrapolate,
                mesgs=mesgs, skip_inds=skip_inds)
            self.set_point(j_inv=solver_data.j_inv_F,
                           x=x0, g=x0 - F0, skip_inds=skip_inds)
            x = F0
        elif x0 is None or F0 is None:
            # Restarting from a saved solution, so use old stored
            # values.
            x0 = solver_data.x
            F0 = x0 - solver_data.g
            x = F0
        else:
            # Subsequent steps:
            pass

        if (np.linalg.norm((x0 - F0)/problem.G_scale, ord=self.norm_p)
            < self.G_tol):
            # Converged.  Don't need any more.
            x1 = x0
            F1 = F0
            G1 = dx1 = None
            x = F1
            return (x, (x1, G1, dx1, F1))

        skip_inds = self._get_skip_inds(problem, solver_data)
        
        if self.ls_strategy == 'pure_broyden' and True:
            x1, F1 = self.line_search_broyden(x=x, x0=x0, F0=F0,
                                              problem=problem,
                                              solver_data=solver_data,
                                              mesgs=mesgs,
                                              skip_inds=skip_inds)
        elif self.ls_strategy == 'pure_broyden' and False:
            g0 = np.linalg.norm((x0 - F0)/problem.G_scale, ord=self.norm_p)
            g1 = np.inf
            step_found = False
            for n in xrange(self.ls_n_bad_steps):
                mesgs.iter(
                    "Trying %i of %i broyden steps: "
                    % (n, self.ls_n_bad_steps) +
                    "Error %g >= %g" % (g1, g0))
                x1, F1 = self.line_search_broyden(x=x, x0=x0, F0=F0,
                                                  problem=problem,
                                                  solver_data=solver_data,
                                                  mesgs=mesgs,
                                                  skip_inds=skip_inds)
                G1 = x1 - F1
                self.set_point(j_inv=solver_data.j_inv_F,
                               x=x1, g=G1, skip_inds=skip_inds)
                x = solver_data.j_inv_F.get_x(
                    x_controls=[skip_inds, x[skip_inds]],
                    min_step=self.min_abs_step, max_step=self.max_step)
                
                g1 = np.linalg.norm((x1 - F1)/problem.G_scale,
                                    ord=self.norm_p)
                if g1 < g0:
                    step_found = True
                    break
            if not step_found and not self.ls_allow_bad_steps:
                msg = ("No downhill step found after ls_n_bad_steps" +
                       " = %i broyden steps" % (self.ls_n_bad_steps,))
                mesgs.error(msg)
                raise solver_utils.NoDescentError(msg)

        elif self.ls_strategy == 'default':
            if (0 < len(problem.state0.sensitive)
                and 0 == len(skip_inds)):
                # We have sensitive parameters but all parameters are
                # good so we record the good step, then search
                mesgs.info("Good step found.  Saving...")
                solver_data.x_good_0 = solver_data.x_good_1
                solver_data.g_good_0 = solver_data.g_good_1
                solver_data.x_good_1 = x0
                solver_data.g_good_1 = x0 - F0
                mesgs.info("Good step found.  Finding new step " + 
                           "including sensitive parameters...")
            
            ls_min_step = self.ls_min_step
            allow_bad_step = (
                self.ls_allow_bad_steps and
                solver_data.iteration_number < self.ls_n_bad_steps)

            x1, F1 = self.line_search(x=x, x0=x0, F0=F0,
                                      problem=problem,
                                      solver_data=solver_data,
                                      mesgs=mesgs,
                                      min_step=self.ls_min_step,
                                      skip_inds=skip_inds,
                                      allow_bad_step=allow_bad_step
                                      )

            if (0 < len(problem.state0.sensitive)
                and 0 == len(skip_inds)
                and solver_data.x_good_0 is not None):
                # We have sensitive parameters but all parameters were
                # good.  The line_search may have messed up the
                # Jacobian a bit, so we use the previous good step (if
                # it exists) to perform an update without any
                mesgs.info("Updating Jacobian with previous good step...")
                self.set_point(
                    j_inv=solver_data.j_inv_F,
                    x=solver_data.x_good_1,
                    g=solver_data.g_good_1,
                    force=True)
        elif self.ls_strategy == 'jacobian':
            x1, F1 = self.line_search_jacobian(x=x, x0=x0, F0=F0,
                                               problem=problem,
                                               solver_data=solver_data,
                                               min_step=self.ls_min_step,
                                               mesgs=mesgs,
                                               skip_inds=skip_inds)
        elif self.ls_strategy == 'linear':
            x1, F1 = self.line_search_linear(x=x, x0=x0, F0=F0,
                                             problem=problem,
                                             solver_data=solver_data,
                                             min_step=self.ls_min_step,
                                             mesgs=mesgs,
                                             skip_inds=skip_inds)
        elif self.ls_strategy == 'non-linear':
            x1, F1 = self.line_search_non_linear(x=x, x0=x0, F0=F0,
                                                 problem=problem,
                                                 solver_data=solver_data,
                                                 min_step=self.ls_min_step,
                                                 mesgs=mesgs,
                                                 skip_inds=skip_inds)
        elif self.ls_strategy == 'two_step':
            ls_min_step = self.ls_linear_min_step
            if ls_min_step is None:
                ls_min_step = self.ls_min_step*100
            try: 
                x1, F1 = self.line_search_linear(x=x, x0=x0, F0=F0,
                                                 problem=problem,
                                                 solver_data=solver_data,
                                                 mesgs=mesgs,
                                                 min_step=ls_min_step,
                                                 skip_inds=None)
            except solver_utils.NoDescentError:
                if True:
                    if self.verbosity >= 10:
                        mesgs.append("Trying bad step...")
                    dx = x - x0
                    x1, F1 = self.line_search_broyden(
                        x=x0 + dx/2**self.ls_max, x0=x0, F0=F0,
                        problem=problem,
                        solver_data=solver_data,
                        mesgs=mesgs, skip_inds=None)
                else:
                    x1_ = x
                    # Try once again without linear algorithm.
                    if self.verbosity >= 10:
                        mesgs.append("No downhill step found: " +
                                     "Trying non-linear line-search...")
                    try:
                        x1, F1 = self.line_search_non_linear(
                            x=x1_, x0=x0, F0=F0,
                            problem=problem, solver_data=solver_data,
                            mesgs=mesgs, min_step=self.ls_min_step,
                            skip_inds=None)
                    except solver_utils.NoDescentError:
                        # Try resetting Jacobian.
                        if self.verbosity >= 10:
                            mesgs.append("No downhill step found: " +
                                         "Trying to reset Jacobian...")

                        solver_data.j_inv_F.reset(m0=None)
                        x1, F1 = self.line_search_non_linear(
                            x=x1_, x0=x0, F0=F0,
                            problem=problem, solver_data=solver_data,
                            mesgs=mesgs, min_step=self.ls_min_step,
                            skip_inds=None)
        else:
            raise ValueError("Line search strategy %s not supported" %
                             (self.ls_strategy,))
            

        self.set_point(j_inv=solver_data.j_inv_F,
                       x=x1, g=x1 - F1, skip_inds=skip_inds)

        
        x_new = solver_data.j_inv_F.get_x(x_controls=[skip_inds, x[skip_inds]],
                                          min_step=self.min_abs_step,
                                          max_step=self.max_step)

        solver_data.skip_0 = skip_inds
        solver_data.g0 = solver_data.g
        solver_data.x0 = solver_data.x
        return (x_new, (x1, G1, dx1, F1))




    
        G0 = x0 - F0
        dx = x - x0
        dx_norm = np.linalg.norm(dx)
        x_F_new = [None, None]
        g0 = np.linalg.norm(G0/problem.G_scale, ord=self.norm_p)**2
        if g0 == 0:
            return (x0, (x0, G1, dx1, F0))
        
        ls_linear = self.ls_linear
        def g(l, dx=dx):
            """Function passed to line search algorithm."""
            if not ls_linear:
                # Get new step from broyden
                dx = solver_data.j_inv_F.get_x(min_step=self.min_abs_step,
                                               max_step=self.max_step) - x0
                #dx *= dx_norm/np.max(dx_norm, np.linalg.norm(dx))
            _x = x0 + l*dx
            x_F_new[:] = self._problem_step(
                _x, x0=x0, F0=F0,
                problem=problem, solver_data=solver_data,
                mesgs=mesgs)
            x1, F1 = x_F_new
            _G = x1 - F1
            _g = np.linalg.norm(_G/problem.G_scale, ord=self.norm_p)**2
            return _g

        try:
            ls_min_step = self.ls_min_step
            if ls_linear:
                ls_min_step = self.ls_linear_min_step
                if ls_min_step is None:
                    ls_min_step = self.ls_min_step*100
                    
            l, g_l = solver_utils.line_search(
                g=g, g0=g0,
                avg_descent_factor=self.ls_avg_descent_factor,
                min_step=ls_min_step,
                min_scale_factor=self.ls_min_scale_factor,
                max_scale_factor=self.ls_max_scale_factor,
                max_steps=self.ls_max,
                allow_backstep=self.ls_allow_backstep,
                mesgs=mesgs)
        except solver_utils.NoDescentError:
            if ls_linear:
                # Try once again without linear algorithm.
                if self.verbosity >= 10:
                    mesgs.append("No downhill step found: " +
                                 "Trying non-linear line-search...")
                
                ls_linear = False
#                 for n in xrange(11):
#                     try:
#                         l, g_l = solver_utils.line_search(
#                             g=g, g0=g0,
#                             avg_descent_factor=self.ls_avg_descent_factor,
#                             min_step=self.ls_min_step,
#                             min_scale_factor=self.ls_min_scale_factor,
#                             max_scale_factor=self.ls_max_scale_factor,
#                             max_steps=self.ls_max,
#                             mesgs=mesgs)
#                     except solver_utils.NoDescentError:
#                         if n < 10:
#                             continue
#                         else:
#                             raise
                try:
                    l, g_l = solver_utils.line_search(
                        g=g, g0=g0,
                        avg_descent_factor=self.ls_avg_descent_factor,
                        min_step=self.ls_min_step,
                        min_scale_factor=self.ls_min_scale_factor,
                        max_scale_factor=self.ls_max_scale_factor,
                        max_steps=self.ls_max,
                        allow_backstep=self.ls_allow_backstep,                
                        mesgs=mesgs)
                except solver_utils.NoDescentError:
                    # Try resetting Jacobian.
                    if self.verbosity >= 10:
                        mesgs.append("No downhill step found: " +
                                     "Trying to reset Jacobian...")
                    
                    solver_data.j_inv_F.reset(m0=None)
                    l, g_l = solver_utils.line_search(
                        g=g, g0=g0,
                        avg_descent_factor=self.ls_avg_descent_factor,
                        min_step=self.ls_min_step,
                        min_scale_factor=self.ls_min_scale_factor,
                        max_scale_factor=self.ls_max_scale_factor,
                        max_steps=self.ls_max,
                        allow_backstep=self.ls_allow_backstep,
                        mesgs=mesgs)
            else:
                raise

        x1, F1 = x_F_new
        x_new = solver_data.j_inv_F.get_x(
            x_controls=[skip_inds, x[skip_inds]],
            min_step=self.min_abs_step, max_step=self.max_step)

        mesgs.append("Abs Error: %g" % (
            np.linalg.norm((x1 - F1)/problem.G_scale,
                           ord=self.norm_p)))

        #solver_data.x = x1
        #solver_data.g = x1 - F1
        return (x_new, (x1, G1, dx1, F1))

    ##########################################################
    # Required methods: These need to be defined by subclasses.
    def _step(self, problem, x, x0, F0, solver_data, mesgs):
        """Return `(x1, F1, skip_inds)`.  We intervene here to ensure
        that the error decreases with each step (otherwise we use a
        weighted step.)

        We use one of the following algorithms.

        A) First we compute `x1` and `G1` corresponding to the full
           Brodyen step

        A)
           1) Try full Broyden.
           2) If not okay, try half step.

        1) Try full Broyden step.  If this reduces the error, then
           accept.
        2) If not, then check for sensitive parameters.  These should
           only be changed if the error in the component of G for
           these has stabilized.
        2) If the error increases by too much, then check if there are
           any sensitive parameters.  If so, then try holding these
           fixed.
        3) If this fails, then try taking a weighted step (if allowed).
        4) If this fails, then perform a line search.

        """
        return self._problem_step(
            x, x0=None, F0=None,
            problem=problem, solver_data=solver_data,
            mesgs=mesgs)
        
        print x, x0
        import pdb;pdb.set_trace()
        if self.verbosity >= 10:
            mesgs.append("Trying Broyden step...")

        x1, F1 = self._problem_step(
            x, x0=None, F0=None,
            problem=problem, solver_data=solver_data,
            mesgs=mesgs)

        g0, dx0 = G0_dx

        # Now check sensitive parameters
        sensitive_inds = np.asarray(problem.state0.sensitive,
                                    dtype=int)
        skip_inds = []                     # Indices to skip        
        if 0 < len(sensitive_inds):
            skip_inds = self._get_skip_inds(problem=problem, solver_data=solver_data)
            x_controls = (skip_inds, x0[skip_inds])
            if self.verbosity >= 10:
                mesgs.append("Holding %i sensitive parameters fixed." %
                             (len(skip_inds),))

            x1 = solver_data.j_inv_F.get_x(
                x_controls=x_controls,
                min_step=self.min_abs_step, max_step=self.max_step)
                
            x1, F1 = self._problem_step(
                x, x0=None, F0=None,
                problem=problem, solver_data=solver_data,
                mesgs=mesgs, skip_inds=skip_inds)
            
        
        g0_ = copy(g0)
        G1_ = copy(G1)

        g0_[skip_inds] = 0
        G1_[skip_inds] = 0
        
        err0 = np.abs(g0_).max()
        err1 = np.abs(G1_).max()

        if err1 < err0:
            # Error decreased, done.
            if self.verbosity >= 10:
                mesgs.append("Broyden step succeeded.")
            return (x1, G1, skip_inds)

        if len(skip_inds) < len(sensitive_inds):
            # First try holding all sensitive parameters fixed (if this
            # has not already been done)
            x_controls = (sensitive_inds, x0[sensitive_inds])
            if self.verbosity >= 10:
                mesgs.append("Trying sensitive step")

            # Should we maybe set skip_inds = sensitive_inds here?  Or
            # possibly recompute skip_inds based on new update?
            skip_inds = sensitive_inds
            x1 = x + solver_data.j_inv_F.get_x(
                x_controls=x_controls,
                min_step=self.min_abs_step, max_step=self.max_step)

            x1, F1 = self._problem_step(
                x, x0=None, F0=None,
                problem=problem, solver_data=solver_data,
                mesgs=mesgs, skip_inds=skip_inds)
        
            # Check errors:
            g0_ = copy(g0)
            G1_ = copy(G1)

            g0_[skip_inds] = 0
            G1_[skip_inds] = 0
        
            err0 = np.abs(g0_).max()
            err1 = np.abs(G1_).max()
            
            err0 = np.abs(g0).max()
            err1 = np.abs(G1).max()
            if err1 < err0:
                # Error decreased, done.
                if self.verbosity >= 10:
                    mesgs.append("Sensitive step succeeded.")
                return (x1, G1, skip_inds)
            
        step_ok = err1 < err0*self.bad_step_factor
        
        if (not step_ok
            and self.max_weighted_step > 0
            and self.try_weighted_step):
            # Try a weighted step
            if self.verbosity >= 10:
                mesgs.append("Trying weighted step")

            x1 = x1 - self._correct_step(self.weight*G1,
                                         max_dx=min(self.max_step,
                                                    self.max_weighted_step))

            x1, F1 = self._problem_step(
                x, x0=None, F0=None,
                problem=problem, solver_data=solver_data,
                mesgs=mesgs, skip_inds=skip_inds)

            G1_[skip_inds] = 0
            err1 = np.abs(G1_).max()
            step_ok =  err1 <= self.bad_step_factor*err0
            if step_ok:
                if self.verbosity >= 10:
                    mesgs.append("Weighted step succeeded.")
                return (x1, G1, skip_inds)

        if 0 < self.ls_max:
            # Do line search
            if self.verbosity >= 10:
                mesgs.append("Doing line search")
            x1, G1 = self._line_search(x0, x1, G1_, err0,
                                       err1, skip_inds, solver_data,
                                       problem,
                                       mesgs)
        return (x1, G1, skip_inds)
    
    def _step(self, problem, x, x0, F0, solver_data, mesgs):
        x1, F1 = self._problem_step(
            x, x0=None, F0=None,
            problem=problem, solver_data=solver_data,
            mesgs=mesgs)
        
        return x1, F1, np.array([], dtype=int)
        return self._line_search_d(problem, x, x0, G0_dx, solver_data,
                                   mesgs)
    
    
    def _problem_step(self, x1, x0, F0, problem, solver_data,
                      mesgs, skip_inds=None, extrapolate=None):
        r"""Return result of `problem.step`, updating the Jacobian if
        the step is not too big."""
        
        if extrapolate is None:
            def extrapolate(x_controls):
                return solver_data.x + solver_data.j_inv_F.get_x(
                    x_controls=x_controls,                    
                    min_step=self.min_abs_step,
                    max_step=self.max_step)
        if skip_inds is None:
            skip_inds = np.array([], dtype=int)
            
        if x0 is not None:
            x1 = x0 + self._correct_step(x1 - x0, max_dx=self.max_step)

        
        (x1, _G1, _dx1, F1) = self.problem_step(
            problem=problem, solver_data=solver_data,
            x=x1, x0=x0, G0=None, dx0=None, F0=F0,
            compute_F=True,
            extrapolate=extrapolate,
            iter_data=solver_data.iter_data, 
            abs_tol=self.G_tol,
            mesgs=mesgs)

        if x0 is not None and np.linalg.norm(x1 - x0) <= self.max_step:
            solver_data.j_inv_F.update(
                x=x1, g=x1 - F1, skip_inds=skip_inds,
                mags=(abs(x0) + abs(x1) + abs(F0) + abs(F1)))
        return x1, F1
        
        

[docs]def tst():
    from solver_interface import Problem, BareState
    class Sqrt(Problem):
        '''Iterative solution to sqrt(n).'''
        _state_vars = [('n', Required, 'To compute sqrt(n)'),
                       ('state0', Computed)]
        process_vars()
        def __init__(self, *v, **kw):
            self.n = np.array(self.n)
            self.state0 = self.get_initial_state()
        def get_initial_state(self):
            return BareState(x_=copy(self.n))
        def F(self, x, iter_data=None, abs_tol=None, rel_tol=None,
              mesgs=_MESGS):
            return x - (x*x - self.n)
        def G(self, x, compute_dx=True,
              iter_data=None, abs_tol=None, rel_tol=None,
              mesgs=_MESGS):
            return (x*x - self.n), -(x*x - self.n)/(2*x) 
        def Jinv(self, x):
            return np.diag(1/(2*x))
        def step(self, x, x0=None, G0=None, dx0=None, F0=None,
                 compute_G=False, compute_dx=False, compute_F=False,
                 extrapolate=None, iter_data=None,
                 abs_tol=1e-12, rel_tol=1e-12, mesgs=_MESGS):
            'Keep x positive'
            print x
            x1 = x
            okay = lambda x: all(x >= 0)
            if not okay(x1):
                x_controls = zip(*[(i_, abs(x_))
                                   for i_, x_ in enumerate(x1)
                                   if x_ < 0])
                x1 = extrapolate(x_controls)
            if not okay(x1):
                x1 = abs(x1)
            F1 = self.F(abs(x), None, abs_tol, rel_tol,  mesgs)
            G1 = dx1 = None
            return (x1, G1, dx1, F1)
    x_tol = 0.01
    G_tol = 0.01
    solver = Broyden(x_tol=x_tol, G_tol=G_tol)
    problem = Sqrt(n=[0.01, 1.0, 100.0])
    state0 = problem.get_initial_state()
    print("Newton iterations...")
    x = 1*state0.x_
    for _ in xrange(5):
        x += problem.G(x)[1]
        print x
    print
    print("Straight iterations...")
    x = 1*state0.x_
    for _ in xrange(10):
        x = problem.F(x)
        print x
    print
    print("Straight broyden...")
    import broyden
    x = 1*state0.x_
    b = broyden.Broyden(x0=x, G0=x - problem.F(x))
    for _ in xrange(21):
        b.update(b.x - problem.F(b.x))
        print b.x
    
    x = 1*state0.x_
    b = broyden.Broyden(x0=x, G0=x - problem.F(x))
    for _ in xrange(21):
        b.update(b.x - problem.F(b.x))
        print b.x

    print("Our Broyden...")
    solver.ls_linear = True
    state, niter = solver.solve(problem, state0)
    print("Our Broyden: %i" % niter)
    
[docs]class Broyden(_Broyden):
    """This solver uses the simple Broyden algorithm.

    Examples
    --------
    >>> from solver_interface import Problem, BareState
    >>> class Sqrt(Problem):
    ...     '''Iterative solution to sqrt(n).'''
    ...     _state_vars = [('n', Required, 'To compute sqrt(n)'),
    ...                    ('state0', Computed)]
    ...     process_vars()
    ...     def __init__(self, *v, **kw):
    ...         self.n = np.array(self.n)
    ...         self.state0 = self.get_initial_state()
    ...     def get_initial_state(self):
    ...         return BareState(x_=copy(self.n))
    ...     def F(self, x, iter_data=None, abs_tol=None, rel_tol=None,
    ...           mesgs=_MESGS):
    ...         return x - (x*x - self.n)
    ...     def Jinv(self, x):
    ...         return np.diag(1/(2*x))
    ...     def step(self, x, x0=None, G0=None, dx0=None, F0=None,
    ...              compute_G=False, compute_dx=False, compute_F=False,
    ...              extrapolate=None, iter_data=None,
    ...              abs_tol=1e-12, rel_tol=1e-12, mesgs=_MESGS):
    ...         'Keep x positive'
    ...         x1 = x
    ...         okay = lambda x: all(x >= 0)
    ...         if not okay(x1):
    ...             x_controls = zip(*[(i_, abs(x_))
    ...                                for i_, x_ in enumerate(x1)
    ...                                if x_ < 0])
    ...             x1 = extrapolate(x_controls)
    ...         if not okay(x1):
    ...             x1 = abs(x1)
    ...         F1 = self.F(abs(x), None, abs_tol, rel_tol,  mesgs)
    ...         G1 = dx1 = None
    ...         return (x1, G1, dx1, F1)
    >>> x_tol = 0.01
    >>> G_tol = 0.01
    >>> solver = Broyden(x_tol=x_tol, G_tol=G_tol, ls_max=100,
    ...                  ls_strategy='default', dyadic=2, n_prev=2)
    >>> problem = Sqrt(n=[0.01, 1.0, 100.0])
    >>> state0 = problem.get_initial_state()
    >>> state, niter = solver.solve(problem, state0)
    >>> niter              # Newton takes 5, straight Broyden takes 21
    10
    >>> abs(state.x_[0] - 0.1) < 1.2*x_tol
    True
    >>> abs(state.x_[1] - 1.0) < 1.2*x_tol
    True
    >>> abs(state.x_[2] - 10.0) < x_tol
    True
    >>> G, dx = problem.G(state.x_)
    >>> abs(G).max() < G_tol
    True
    """


[docs]class ModifiedBroyden(_Broyden):
    """This solver uses the Modifed Broyden algorithm.
    >> from solver_interface import Problem, BareState
    >> class Sqrt(Problem):
    ...     '''Iterative solution to sqrt(n).'''
    ...     _state_vars = [('n', Required, 'To compute sqrt(n)')]
    ...     process_vars()
    ...     def __init__(self, *v, **kw): self.n = np.array(self.n)
    ...     def get_initial_state(self):
    ...         return BareState(x_=copy(self.n))
    ...     def G(self, x, iter_data, abs_tol, rel_tol, mesgs):
    ...         return x*x - self.n
    ...         'Keep x positive'
    ...         x1 = x
    ...         okay = lambda x: all(x >= 0)
    ...         if not okay(x1):
    ...             x_control = zip(*[(i_, abs(x_))
    ...                                for i_, x_ in enumerate(x1)
    ...                                if x_ < 0])
    ...             x1 = extrapolate(x_controls)
    ...         if not okay(x1):
    ...             x1 = abs(x1)
    ...         G1 = self.G(abs(x), None, abs_tol, rel_tol,  mesgs)
    ...         return (x1, G1)
    >> abs_xtol = 0.01
    >> rel_xtol = 0.01
    >> solver = ModifiedBroyden(abs_xtol=abs_xtol, rel_xtol=rel_xtol)
    >> problem = Sqrt(n=[0.01, 1.0, 100.0])
    >> state0 = problem.get_initial_state()
    >> state, niter = solver.solve(problem, state0)
    >> abs(state.x_[0] - 0.1) < abs_xtol
    True
    >> abs(state.x_[1] - 1.0) < abs_xtol
    True
    >> abs(state.x_[2] - 10.0) < abs_xtol
    True
    >> abs(state.x_[2] - 10.0)/10.0 < rel_xtol
    True
    """
    _state_vars = [('alpha', 1.0),
                   ('W', None),
                   ('DIIS', 10, "Size of DIIS subspace"),
                   ('mmf_method', True,
                    "Use my DIIS method rather than the usual.")]
    process_vars()

    def _get_broyden(self, x, gx):
        """Return an initialized broyden object"""
        return broyden.ModifiedBroyden(x0=x,
                                       G0=gx,
                                       alpha=self.alpha,
                                       W=self.W,
                                       DIIS=self.DIIS,
                                       mmf_method=self.mmf_method)
[docs]class ExtendedBroydenData(BroydenData):
    """Container for Extended Broyden data.
    
    The core for this method is defined in
    :class:`mmf.solve.JinvExtendedBroydenFull` and a proxy object is
    used to maintain the state and compute the updates.
    """
    implements(solver_interface.ISolverData)
    _ignore_name_clash = set(['j_inv_F', 'dyadic'])
    _state_vars = [
        ('j_inv_F.dyadic', Deleted),
        ('j_inv_F', Delegate(broyden.JinvExtendedBroydenFull,
                             ['xs', 'gs', 'j_inv0', 'n_prev'])),
        ('dyadic', False),
        ]
    process_vars()

    def __init__(self, *v, **kw):
        if self.dyadic:
            raise NotImplementedError(
                "Extended Broyden only supports `dyadic=False`")

    def change_basis(self, f, f_inv_t=None):
        """Use the function `x_new = f(x)` to update the data
        corresponding to a change of basis.  Raise an
        :except:`AttributeError` if the change cannot be implemented
        (or don't define this method.)

        Parameters
        ----------
        f : function
           Change of basis transformation (should be essentially
           matrix multiplication, but for large bases one might want
           to prevent forming this matrix explicitly.)
        f_inv_t : function, optional
           Transpose of the inverse of the function.  If this is
           provided, then it should be used to transform the bra's,
           otherwise, use `f`.  *Presently ignored!*
        """
        self.j_inv_F.change_basis(f)
        
[docs]class ExtendedBroyden(_Broyden):
    """This solver uses the Extended Broyden algorithm with dynamic restarts
    toward the specified `j_inv0` when the algorithm starts to diverge.

    .. note:: In order for this to work reasonably, you must provide a
       reasonable estimate for :attr:`x_scale` which sets the typical scale over
       which the function `G(x)` is linear.  If not provided, then
       :attr:`IProblem.x_scale` will be used.

    >>> from solver_interface import Problem, BareState
    >>> from mmf.objects import ClassVar
    >>> class Sqrt(Problem):
    ...     '''Iterative solution to sqrt(n).'''
    ...     _no_name_clash = set(['state0'])
    ...     _state_vars = [('n', Required, 'To compute sqrt(n)'),
    ...                    ('state0', Computed)]
    ...     process_vars()
    ...     def __init__(self, *v, **kw):
    ...         self.n = np.array(self.n)
    ...         self.state0 = self.get_initial_state()
    ...     def get_initial_state(self):
    ...         return BareState(x_=copy(self.n))
    ...     def F(self, x, iter_data=None, abs_tol=None, rel_tol=None,
    ...           mesgs=[]):
    ...         return x - (x*x - self.n)
    ...     def Jinv(self, x):
    ...         return np.diag(1/(2*x))
    ...     def step(self, x, x0=None, G0=None, dx0=None, F0=None,
    ...              compute_G=False, compute_dx=False, compute_F=False,
    ...              extrapolate=None, iter_data=None,
    ...              abs_tol=1e-12, rel_tol=1e-12, mesgs=[]):
    ...         'Keep x positive'
    ...         x1 = x
    ...         okay = lambda x: all(x >= 0)
    ...         if not okay(x1):
    ...             x_controls = zip(*[(i_, abs(x_))
    ...                                for i_, x_ in enumerate(x1)
    ...                                if x_ < 0])
    ...             x1 = extrapolate(x_controls)
    ...         if not okay(x1):
    ...             x1 = abs(x1)
    ...         F1 = self.F(abs(x), None, abs_tol, rel_tol,  mesgs)
    ...         G1 = dx1 = None
    ...         return (x1, G1, dx1, F1)
    >>> abs_xtol = 0.01
    >>> rel_xtol = 0.01
    >>> solver = ExtendedBroyden(x_tol=abs_xtol, x_tol_rel=rel_xtol,
    ...                          no_restart=True)
    >>> problem = Sqrt(n=[0.01, 1.0, 100.0])
    >>> state0 = problem.get_initial_state()
    >>> state, niter = solver.solve(problem, state0)
    >>> abs(state.x_[0] - 0.1) < abs_xtol
    True
    >>> abs(state.x_[1] - 1.0) < abs_xtol
    True
    >>> abs(state.x_[2] - 10.0) < abs_xtol
    True
    >>> abs(state.x_[2] - 10.0)/10.0 < rel_xtol
    True
    """
    _state_vars = [
        ('dyadic', False),
        ('solver_data0', Delegate(ExtendedBroydenData, 
                                  ['j_inv0', 'n_prev'])),
        ('method=solver_data0.j_inv_F.method'),
        ('solver_data0.dyadic', Deleted),
        ('solver_data0.n_prev', 10),
        ('n_bad', 10,\
             r"""Number of bad steps to allow.  When, after this many
             steps, the best solution has not been improved, then we
             bail."""),
        ('alpha=solver_data0.j_inv0', 0.5,\
             r"""Linear mixing parameter.  If bad steps are taken,
             then the Jacobian is pulled back to that which will give
             a linear mixing $x \mapsto (1-\alpha) + \alpha F(x)$."""),
        ('x_scale', None,\
             r"""Typical scale (or vector of scales for each component) over
             which the function `G(x)` is approximately linear.  This is
             important for ensuring that the dynamic restart mechanism only
             restarts components from far away.  If `None`, then
             :attr:`IProblem.x_scale` will be used."""),
        ('no_restart', False,\
             r"""If `True` then dynamic restarts are not used.  Set this if
             there is no good approximation for `j_inv0`."""),
        ]

    process_vars()

[docs]    def __init__(self, *v, **kw):
        # Ensure we keep at least one more point than n_bad
        self.solver_data0.n_prev = max(
            self.solver_data0.n_prev, self.n_bad + 1)

[docs]    def iterate(self, x, problem, solver_data,
                x0=None, G0=None, dx0=None, F0=None,
                mesgs=_MESGS):
        r"""Perform a single iteration and return the triplet
        `(x_new, (x, G, dx, F))`.

        The line-search strategy is dictated by :attr:`ls_strategy`
        which may take on one of the following values.

        'pure_broyden' : Perform pure Broyden steps irrespective of
           whether or not they increase the error.  Termination occurs
           if more than :attr:`ls_n_bad_step` bad steps are taken.
        '1' :

        """
        G1 = dx1 = None
        j_inv = solver_data.j_inv_F

        # Set j_inv0
        j0_diag = np.asarray(problem.j0_diag)
        if j0_diag.shape == ():
            j0_diag = np.ones(len(x), dtype=float)*j0_diag
        j_inv.j_inv0 = self.alpha*np.diag(
            np.divide(1, j0_diag))

        skip_inds = np.asarray(problem.state0.sensitive, dtype=int)
        if (solver_data.x is None or solver_data.g is None):
            # First iteration: Here we first make an evaluation, and
            # then perform a line-search to update the Jacobian.
            
            # We use self._extrapolate as we have no base point yet.
            def _extrapolate(xc):
                return self._extrapolate(x=x, x0=None, x_controls=xc)

            x0, F0 = self._problem_step(
                x, x0=None, F0=None,
                problem=problem, solver_data=solver_data,
                extrapolate=_extrapolate,
                mesgs=mesgs, skip_inds=skip_inds)

            G0 = x0 - F0
            j_inv.set_point(x=x0, g=G0, 
                            skip_inds=skip_inds,
                            mags=abs(G0))
            x = F0
        elif x0 is None or F0 is None:
            # Restarting from a saved solution, so use old stored
            # values.
            x0 = solver_data.x
            F0 = x0 - solver_data.g
            x = F0
        else:
            # Subsequent steps:
            pass

        if (np.linalg.norm((x0 - F0)/problem.G_scale, ord=self.norm_p)
            < self.G_tol):
            # Converged.  Don't need any more.
            x1 = x0
            F1 = F0
            G1 = dx1 = None
            x = F1
            return (x, (x1, G1, dx1, F1))

        skip_inds = self._get_skip_inds(problem, solver_data)

        # Now take a step:
        #x1, F1 = self.line_search_broyden(x=x, x0=x0, F0=F0,
        #                                  problem=problem,
        #                                  solver_data=solver_data,
        #                                  mesgs=mesgs,
        #                                  skip_inds=skip_inds)

        G0 = x0 - F0
        j_inv.update(x0=x0, g0=G0)
        def _extrapolate(x_controls):
            return j_inv.get_x(x_controls=x_controls,
                               min_step=self.min_abs_step,
                               max_step=self.max_step)

        (x1, _G1, _dx1, F1) = self.problem_step(
            problem=problem, solver_data=solver_data,
            x=x, x0=x0, G0=None, dx0=None, F0=F0,
            compute_F=True, extrapolate=_extrapolate,
            iter_data=solver_data.iter_data, abs_tol=self.G_tol,
            mesgs=mesgs)

        G1 = x1 - F1

        # Check if the step is better than the best previous solution.  If not
        # then see how far back the best previous solution is.  If it is more
        # than n_bad_steps back then bail or start something more drastic like
        # line searches.
        if self.n_bad <= len(solver_data.xs):
            errs = [np.linalg.norm(_g/problem.G_scale,
                                   ord=self.norm_p) 
                    for _g in solver_data.gs[-self.n_bad:]]
            errs = np.array(errs)
            min_ind = np.argmin(errs)
            if 0 == min_ind:
                # The best solution has been pushed until the n_bad'th
                # step.  If we don't improve with this step, then we
                # fail (or do something else)
                curr_err = np.linalg.norm(G1/problem.G_scale,
                                          ord=self.norm_p)
                if errs[min_ind] < curr_err:
                    x0 = solver_data.xs[-self.n_bad:][min_ind]
                    G0 = solver_data.gs[-self.n_bad:][min_ind]                  
                    F0 = x0 - G0
                    j_inv.set_point(x=x0, g=G0)
                    

                    x = j_inv.get_x(x_controls=[skip_inds, x[skip_inds]],
                                    min_step=self.min_abs_step,
                                    max_step=self.max_step)
                    
                    mesgs.iter(
                        "Best state now n_bad=%i steps old." % (self.n_bad,) +
                        "Trying line search...")
                    try:
                        if _DEBUG and max(abs(F0 - problem.F(x0))) > 1e-12:
                            import pdb;pdb.set_trace()
                        (x1, F1) = self.line_search_linear(
                            x=x, x0=x0, F0=F0,
                            problem=problem, solver_data=solver_data,
                            min_step=self.min_abs_step,
                            mesgs=mesgs, skip_inds=skip_inds,
                            extrapolate=_extrapolate)
                        G1 = x1 - F1
                    except solver_utils.NoDescentError, err:
                        msg = "\n".join(
                            [err.message,
                             "Line search failed after best solution" +
                             " n_bad=%i steps old."  % self.n_bad])

                        err.args = (msg,) + err.args[1:] 
                        raise
                else:
                    pass
            else:
                # Still have some attempts
                pass
        
        # Check if the step is better than the previous step.  If not, then
        # perform a dynamic restart.
        prev_err = np.linalg.norm(
            solver_data.gs[-1]/problem.G_scale,
            ord=self.norm_p)
        curr_err = np.linalg.norm(
            G1/problem.G_scale,
            ord=self.norm_p)

        if self.no_restart:
            restart=False
        else:
            restart = curr_err > prev_err
        x_scale = self.x_scale
        if x_scale is None:
            x_scale = problem.x_scale

        j_inv.set_point(x=x1, g=G1, 
                        restart=restart,
                        x_scale=x_scale,
                        skip_inds=skip_inds,
                        mags=abs(G1))
                        
        x_new = j_inv.get_x(x_controls=[skip_inds, x[skip_inds]],
                            min_step=self.min_abs_step,
                            max_step=self.max_step)

        solver_data.skip_0 = skip_inds
        return (x_new, (x1, G1, dx1, F1))
Navigation

Quick search

Source code for mmf.solve.broyden_solvers

Navigation