Source code for mmf.solve.broyden_solvers

r"""Iterative solvers based on the Broyden method.

This module contains methods for solving root-finding problems with
various Broyden algorithms.

The problem is expressed as `x = F(x)` where the Jacobian of `F(x)`
close to the solution should be close to zero (in other words,
the iteration should be as close to convergent as possible) because the
Broyden iterations always start with the approximation of an identity
`J`.

.. todo:: Check usage of `max_step`.  This is now administered in
   :meth:`Solver.solve` and should accept a vector.

"""
from __future__ import division

from copy import copy

import numpy
np = numpy

from mmf.objects import process_vars
from mmf.objects import Required, Delegate, Excluded, Computed, Deleted

import broyden
import solver_interface, solver_utils
from solver_interface import Mesg

from mmf.interfaces import implements

_ABS_TOL = 1e-12
_REL_TOL = 1e-12

_FINFO = np.finfo(float)
_TINY = _FINFO.tiny

_MESGS = Mesg()       # Default object.  Should generally not be used.
_DEBUG = False

[docs]class BroydenData(solver_interface.SolverData): """Container for Broyden data. The core for this method is defined in :class:`mmf.solve.JinvBroyden` and a proxy object is used to maintain the state and compute the updates. """ implements(solver_interface.ISolverData) _state_vars = [ ('x0', None, "Previous value of `x`"), ('g0', None, "Previous value of `g = x - F(x)`"), ('skip_0', None, "Previous skipped parameters"), ('x_good_0', None, r"""Previous good step at which some parameter became bad in the sense that the change in the errors was no-longer small. This is used as the reference point for updating the Jacobian of sensitive parameters."""), ('g_good_0', None, "`g = x - F(x)` at `x_good_0`"), ('x_good_1', None, "Current good step (see :attr:`x_good_0`"), ('g_good_1', None, "`g = x - F(x)` at `x_good_1`"), ('skip_good', None, r"""These are the parameters that were bad at the time that `x_good` was saved or that have gone bad since."""), ('j_inv_F', Delegate(broyden.JinvBroyden, ['dyadic', 'x', 'g', 'n_prev'])), ('old_err', np.inf, "Previous absolute error."), ('dyadic=j_inv_F.dyadic', True), ('inv_jacobian_err', np.inf, r"""This flag is used to indicate the quality of the Jacobian. It represents a measure of the relative change in the Jacobian inverse from the previous step. If this is small (sufficiently less than 1), then the approximation is probably reasonable for determining a step. See :meth:`mmf.solve.broyden.JinvBroyden.update` for details.""") ] process_vars() def change_basis(self, f, f_inv_t=None): """Use the function `x_new = f(x)` to update the data corresponding to a change of basis. Raise an :except:`AttributeError` if the change cannot be implemented (or don't define this method.) Parameters ---------- f : function Change of basis transformation (should be essentially matrix multiplication, but for large bases one might want to prevent forming this matrix explicitly.) f_inv_t : function, optional Transpose of the inverse of the function. If this is provided, then it should be used to transform the bra's, otherwise, use `f`. *Presently ignored!* """ if self.x0 is not None: self.x0 = f(self.x0) self.g0 = None # This should be recalculated. self.j_inv_F.change_basis(f)
class _Broyden(solver_interface.Solver): """Common interface for Broyden solvers. Sensitive Parameters -------------------- Sensitive parameters are those for which convergence of the other parameters is required before the objective function corresponding to these parameters is trustworthy. To deal with these, we hold these fixed, performing a regular search in the space of the parameters until the change in the errors is small enough. This criterion is determined by :meth:`_get_skip_inds` which returns the indices that should be skipped. Iteration proceeds in the sector without sensitive parameters until the errors stabilize and non of the sensitive parameters should be skipped. This state is stored as a "good" state `x_good_1`. From this state a line-search is made including the sensitive parameters until a suitable step is made that changes the sensitive parameters. One question here is how should the Jacobian be updated. We are presently taking the strategy that if a reasonable approximation to the Jacobian at this good point is found, the non-sensitive sector will still be okay (the potential problem is that this block will be messed up during this search). Typically, after this step, some of the parameters will no longer be good. If this is the case and there was a previous good state `x_good_0`, then the difference between this and `x_good_1` is used to do an update to correct the global structure of the Jacobian. Once this is done, we repeat, starting in the sector of non-sensitive parameters. """ _state_vars = [ ('bad_step_factor', 10,\ """If the error increases by more than a factor of :attr:`bad_step_factor` from the previous step, then a weighted step is used. The default is a large value so this does not happen much. If you know that the weighted step will be in the correct direction, then make this smaller. This must be greater than 1 otherwise the method may loop forever. """), ('weight', 0.5,\ """If the broyden step fails, then the new step is `x - w*G(x)`."""), ('ls_avg_descent_factor', 1e-4, r"""To ensure that we terminate, the line search algorithm requires that each step descend with at least this slope, otherwise the routine will terminate with an error indicating that a local minimum was found rather than a root."""), ('ls_min_scale_factor', 0.1, r"""Minimum factor by which the step size is reduced in the line search. See :func:`solver_utils.line_search`."""), ('ls_max_scale_factor', 0.5, r"""Minimum factor by which the step size is reduced in the line search. See :func:`solver_utils.line_search`."""), ('ls_linear', True, r"""If `True` then the line searches will be along a line. This ensures that bad updates to the inverse Jacobian are completely overwritten, but if the initial direction is not downhill, then this can fail. As a recourse, if the linear search fails, then one pass with the non-linear search is attempted to see if the Jacobian can be compensated. See also :attr:`ls_linear_min_step`. If `False`, then new directions are used at each step as the inverse Jacobian is updated. Hopefully this will lead to an improved step that is ultimately downhill. Previous bad updates to the inverse Jacobian may not be completely erased."""), ('ls_allow_backstep', True, r"""If `True`, then the second line search step (if the first step fails) will be in the negative direction. This may scale the problem inappropriately (especially if a monotonic decrease in a control parameter is desired), in which case one should set this to `False`."""), ('ls_min_step', 1e-12, r"""To ensure termination, if the relative step size becomes smaller than this, then the routine will terminate with an error."""), ('ls_linear_min_step', None, r"""If defined, then this will be the bailout criterion for the linear line-search algorithm. This allows for a much earlier bailout from the linear search leaving the high-precision test and ultimate bailout for the subsequent non-linear search. In general, this should be somewhat larger than :attr:`ls_min_step`. If it is not specified, then it is a factor of 100 times larger than :attr:`ls_min_step`."""), ('ls_strategy', 'default', r"""Choose the line-search strategy. See :meth:`iterate` for possibilities."""), ('ls_n_bad_steps', 10, r"""Maximum number of bad steps to try during a line-search phase before giving up."""), ('ls_allow_bad_steps', False, r"""Set to allow the line search to allow bad steps. Used mainly for debugging, not for production use."""), ('max_weighted_step', np.inf, """Maximum length of the weighted steps (also respects :attr:`max_step`). If this is zero, then weighted steps are never used (except for the first step)."""), ('try_weighted_step', True, """If this is `True`, then a weighed step will be attempted if the other steps fail. This should be `True` if the iteration is naturally somewhat convergent (i.e. if the objective function has the form `G(x) = x - F(x)` where the iteration `x -> F(x)` is somewhat convergent) and should be `False` if the objective function is unrelated to the steps."""), ('initial_recession_factor', 0.2,\ """When the error increases by more than :attr:`bad_step_factor`, then the step is regressed toward the previous step by this factor. The regression factor is then further reduced by :attr:`regression_factor`."""), ('recession_factor', 0.5,\ """When the error increases by more than :attr:`bad_step_factor`, then the step is regressed toward the previous step by this factor until the error is suitably reduced."""), ('j_inv_incompatibility', 0.1, r"""Maximum incompatibility for an update of the inverse Jacobian to be accepted. See :meth:`set_point`."""), ('solver_data0', Delegate(BroydenData, ['dyadic', 'n_prev'])), ('solver_data0.dyadic', 10), ('solver_data0.n_prev', 10), ('reset_Jinv', Excluded(False),\ """If `True`, then on the next iteration, reset `Jinv`. Only works once. This can be used in :attr:`solver_interface.IProblem.schedule` to force the Jacobian to be reset."""), ('ls_max', 10, """Maximum number of steps allowed during line search. If the standard iteration fails to improve the solution, then a line search may be used to find the local minimum in the search direction. This can be used to limit or disable this."""), ('sensitive_rel_tol', 1e-1, r"""Sensitive parameters are only adjusted significantly if the relative change in the error of those components is less than this."""), ('sensitive_abs_tol', 1e-2, r"""Sensitive parameters are only adjusted significantly if the absolute change in the error of those components is less than this."""), ('Jinv_tol', 1e-2, r"""This is the tolerance required on the relative change of the predicted step in order to ascertain that the inverse Jacobian is accurate. See :meth:`_line_search_d`."""), ] process_vars() def __init__(self, *v, **kw): if (0 >= self.initial_recession_factor or 1 <= self.initial_recession_factor): raise ValueError("initial_recession_factor must be in (0,1).") if (0 >= self.recession_factor or 1 <= self.recession_factor): raise ValueError("recession_factor must be in (0,1).") solver_interface.Solver.__init__(self, *v, **kw) def _correct_step(self, dx, max_dx=None): """Return a step that satisfies the constraints `max_dx`.""" if max_dx is None: max_dx = self.max_step length = np.linalg.norm(dx) if length > max_dx: dx *= max_dx/length return dx def set_point(self, j_inv, x, g, skip_inds=None, force=False): r"""Update the inverse Jacobian if the step is suitable compatible and set the current point to `(x,g)`.""" j_inv.set_point(x=x, g=g, skip_inds=skip_inds, include_skip=False, mags=abs(g)) def line_search_broyden(self, x, x0, F0, # x1, x0, F0, problem, problem, solver_data, mesgs, min_step=None, skip_inds=None, extrapolate=None): r"""Return `(x1, F1)` corresponding to a pure broyden step. This version always takes the full broyden step `x` irrespective of whether of not the step improves the solution. """ j_inv = solver_data.j_inv_F G0 = x0 - F0 j_inv.update(x0=x0, g0=G0) if extrapolate is None: def extrapolate(x_controls): return j_inv.get_x(x_controls=x_controls, min_step=self.min_abs_step, max_step=self.max_step) if skip_inds is None: skip_inds = np.array([], dtype=int) (x1, _G1, _dx1, F1) = self.problem_step( problem=problem, solver_data=solver_data, x=x, x0=x0, G0=None, dx0=None, F0=F0, compute_F=True, extrapolate=extrapolate, iter_data=solver_data.iter_data, abs_tol=self.G_tol, mesgs=mesgs) self.set_point(j_inv=solver_data.j_inv_F, x=x1, g=x1 - F1, skip_inds=skip_inds) return x1, F1 def line_search_jacobian(self, x, x0, F0, # x1, x0, F0, problem, problem, solver_data, min_step, mesgs, skip_inds=None, extrapolate=None, ): r"""Return `(x1, F1)`. This version searches along a line until a good update to the Jacobian is found along this direction. By restricting to a linear search, poor updates to the Jacobian are completely corrected. Once a good update for the Jacobian is found, the recommended step is `(x1 + x0)/2`, half-way along the path towards this point so that the new point has a good approximation (the finite-difference approximation for the derivative at this new point has the properties of a centered difference). This may take the function uphill, but should leave the iteration at a point where the Jacobian is a better approximation. """ j_inv = solver_data.j_inv_F G0 = x0 - F0 x1, F1 = self.line_search_broyden( x=x, x0=x0, F0=F0, # x1, x0, F0, problem, problem=problem, solver_data=solver_data, mesgs=mesgs, skip_inds=skip_inds, extrapolate=extrapolate) l = 1 dx = x1 - x0 ndx = np.linalg.norm(dx) min_l = min_step/ndx while True: G1 = x1 - F1 incompatibility = j_inv.update( x=x0 + l*dx, g=G1, x0=x0, g0=G0, skip_inds=skip_inds, mags=abs(F1) + abs(x1)) if incompatibility <= self.j_inv_incompatibility: break l /= 2 if l < min_l: raise solver_utils.NoDescentError( """Could not find a good to updating Jacbian with step size greater than min_step = %g""" % (min_step,)) (x1, _G1, _dx1, F1) = self.problem_step( problem=problem, solver_data=solver_data, x=x0 + l*dx, x0=x0, G0=None, dx0=None, F0=F0, compute_F=True, extrapolate=None, abs_tol=self.G_tol, mesgs=mesgs) return x1, F1 def line_search_linear(self, x, x0, F0, # x1, x0, F0, problem, problem, solver_data, min_step, mesgs, skip_inds=None, extrapolate=None): r"""Return `(x1, F1)` where the step size diminishes sufficiently to ensure convergence. If a good step is not found, then an :exc:`solver_utils.NoDescentError` exception is raised. """ G0 = x0 - F0 dx = x - x0 dx_norm = np.linalg.norm(dx) x_F_new = [None, None] g0 = np.linalg.norm(G0/problem.G_scale, ord=self.norm_p)**2 if g0 == 0: return x0, F0 def g(l, dx=dx): """Function passed to line search algorithm.""" (x1_, _G1, _dx1, F1_) = self.problem_step( problem=problem, solver_data=solver_data, x=x0 + l*dx, x0=x0, G0=None, dx0=None, F0=F0, compute_F=True, extrapolate=extrapolate, abs_tol=self.G_tol, mesgs=mesgs) x_F_new[:] = x1_, F1_ G_ = x1_ - F1_ g_ = np.linalg.norm(G_/problem.G_scale, ord=self.norm_p)**2 return g_ l, g_l = solver_utils.line_search( g=g, g0=g0, avg_descent_factor=self.ls_avg_descent_factor, min_step=min_step, min_scale_factor=self.ls_min_scale_factor, max_scale_factor=self.ls_max_scale_factor, max_steps=self.ls_max, allow_backstep=self.ls_allow_backstep, mesgs=mesgs) x1, F1 = x_F_new return x1, F1 def line_search_non_linear(self, x, x0, F0, # x1, x0, F0, problem, problem, solver_data, min_step, mesgs, skip_inds=None, extrapolate=None): r"""Return `(x1, F1)` where the step size diminishes sufficiently to ensure convergence. If a good step is not found, then an :exc:`solver_utils.NoDescentError` exception is raised. This is similar to the Linear search, but a new step is computed with each update to the Jacobian and the search is performed along this new direction. This has the advantage of being able to find a good step even if the original direction was not down-hill, but has the disadvantage that if the steps are too large, then the Jacobian might be "polluted". It is a good idea to use this routine if :meth:`ls_linear` has failed, but to start with a small step to prevent polluting the Jacobian. """ G0 = x0 - F0 #dx = x - x0 #dx_norm = np.linalg.norm(dx) x_F_new = [None, None] g0 = np.linalg.norm(G0/problem.G_scale, ord=self.norm_p)**2 if g0 == 0: return x0, F0 def g(l): """Function passed to line search algorithm.""" dx = solver_data.j_inv_F.get_x( min_step=self.min_abs_step, max_step=self.max_step) - x0 #dx *= dx_norm/np.max(dx_norm, np.linalg.norm(dx)) (x1_, _G1, _dx1, F1_) = self.problem_step( problem=problem, solver_data=solver_data, x=x0 + l*dx, x0=x0, G0=None, dx0=None, F0=F0, compute_F=True, extrapolate=extrapolate, abs_tol=self.G_tol, mesgs=mesgs) x_F_new[:] = x1_, F1_ G1_ = x1_ - F1_ solver_data.j_inv_F.add_point(x=x1_, g=G1_) solver_data.j_inv_F.update( x0=x0, g0=G0, skip_inds=skip_inds, mags=(abs(x0) + abs(x1_) + abs(F0) + abs(F1_))) g_ = np.linalg.norm(G1_/problem.G_scale, ord=self.norm_p)**2 return g_ l, g_l = solver_utils.line_search( g=g, g0=g0, avg_descent_factor=self.ls_avg_descent_factor, min_step=min_step, min_scale_factor=self.ls_min_scale_factor, max_scale_factor=self.ls_max_scale_factor, max_steps=self.ls_max, allow_backstep=self.ls_allow_backstep, mesgs=mesgs) x1, F1 = x_F_new return x1, F1 def line_search(self, x, x0, F0, # x1, x0, F0, problem, problem, solver_data, min_step, mesgs, skip_inds=None, extrapolate=None, allow_bad_step=False): r"""Return `(x1, F1)` where the step size diminishes sufficiently to ensure convergence. If a good step is not found, then an :exc:`solver_utils.NoDescentError` exception is raised. This uses a combination of the linear and non-linear algorithms. First a linear search is performed until the updates to the jacobian are consistent, then it switches to the non-linear search which selects a new downhill direction. Parameters ---------- skip_inds : None or array Indices of parameters to hold fixed. If the problem contains sensitive parameters as specified in `problem.state0.sensitive`, then this must not be `None`. """ G0 = x0 - F0 dx = x - x0 # This needs to be mutated as it is used in # extrapolation functions. if skip_inds is not None and 0 < len(skip_inds): x_controls = (skip_inds, [x0[skip_inds]]) dx[skip_inds] = 0.0 G0[skip_inds] = 0.0 else: x_controls = None dx_norm = np.linalg.norm(dx) x_F_new = [None, None] g0 = np.linalg.norm(G0/problem.G_scale, ord=self.norm_p)**2 if g0 == 0: return x0, F0 d_ = {'incompatibility': np.inf} # Dictionary for persistent # results j_inv = solver_data.j_inv_F extrapolate_J, extrapolate_dx = self._get_extrapolate( x0=x0, G0=G0, j_inv=j_inv, dx =dx) def g(l): """Function passed to line search algorithm.""" if d_['incompatibility'] <= self.j_inv_incompatibility: dx[:] = j_inv.get_x(x_controls=x_controls, min_step=self.min_abs_step, max_step=self.max_step) - x0 l = abs(l) extrapolate = extrapolate_J else: extrapolate = extrapolate_dx (x1_, _G1, _dx1, F1_) = self.problem_step( problem=problem, solver_data=solver_data, x=x0 + l*dx, x0=x0, G0=None, dx0=None, F0=F0, compute_F=True, extrapolate=extrapolate, abs_tol=self.G_tol, mesgs=mesgs) x_F_new[:] = x1_, F1_ G1_ = x1_ - F1_ #dx_ = x1_ - x0 #dg_ = G1_ - G0 d_['incompatibility'] = j_inv.incompatibility( x=x1_, g=G1_, x0=x0, g0=G0) j_inv.add_point(x=x1_, g=G1_) j_inv.update(x0=x0, g0=G0, skip_inds=skip_inds, mags=(abs(x0) + abs(x1_) + abs(F0) + abs(F1_))) G1_[skip_inds] = 0.0 g_ = np.linalg.norm(G1_/problem.G_scale, ord=self.norm_p)**2 return g_ analyze = False # **** Change This if analyze: def set_dx(): dx[:] = j_inv.get_x(min_step=self.min_abs_step, max_step=self.max_step) - x0 @np.vectorize def f(l, ord=self.norm_p): (x1_, _G1, _dx1, F1_) = problem.step( problem=problem, solver_data=solver_data, x=x0 + l*dx, x0=x0, G0=None, dx0=None, F0=F0, compute_F=True, extrapolate=extrapolate, abs_tol=self.G_tol, mesgs=mesgs) x_F_new[:] = x1_, F1_ G1_ = x1_ - F1_ print j_inv.update( x=x1_, g=G1_, skip_inds=skip_inds, mags=(abs(x0) + abs(x1_) + abs(F0) + abs(F1_))) d_['dg_'] = G1_ - G0 d_['dx_'] = x1_ - x0 g_ = np.linalg.norm(G1_/problem.G_scale, ord=ord)**2 return g_ import pylab as plt ls = np.linspace(-2,2,100) ls1 = np.linspace(-0.2,0.2,100) def pl(long=False, ord=2): _f = plt.gcf() plt.figure(2) plt.plot(ls, g0 + (ls*ls - 2*ls)*g0, ':') if long: plt.plot(ls, f(ls, ord)) plt.axis([-2,2,0,5*g0]) else: plt.plot(ls1, f(ls1, ord)) plt.axis([-0.2,0.2,0,3*g0]) plt.figure(_f.number) def get_J(): J = problem._J(x0, lambda x: x - problem.F(x)) B = np.linalg.inv(J) B0 = solver_data.j_inv_F.j_inv.asarray() return B, B0 #import pdb;pdb.set_trace() l, g_l = solver_utils.line_search( g=g, g0=g0, avg_descent_factor=self.ls_avg_descent_factor, min_step=min_step, min_scale_factor=self.ls_min_scale_factor, max_scale_factor=self.ls_max_scale_factor, max_steps=self.ls_max, allow_backstep=self.ls_allow_backstep, allow_bad_step=allow_bad_step, mesgs=mesgs) x1, F1 = x_F_new return x1, F1 def _get_extrapolate(self, x0, G0, j_inv, dx): """Return `(extrapolate_J, extrapolate_dx)` where each is an appropriate extrapolation function. Parameters ---------- x0, G0 : array Original point and `G0 = G(x0)` to extrapolate from. j_inv : matrix-like Inverse Jacobian. Calls `j_inv.get_x()`. dx : array Desired step used by simple extrapolation. Returns ------- extrapolate_J : function Jacobian based extrapolation extrapolate_dx : function Simple extrapolation (no Jacobian used). """ def extrapolate_J(x_controls): """Use Jacobian to extrapolate.""" return j_inv.get_x(x_controls=x_controls, min_step=self.min_abs_step, max_step=self.max_step) def extrapolate_dx(x_controls): """Default extrapolation if Jacobian is not good.""" return self._extrapolate(x=x0 + dx, x0=x0, x_controls=x_controls) return extrapolate_J, extrapolate_dx def _line_search(self): r"""Perform line search. There are several strategies for line-searches available here. A couple of generic options are provided controlling how the line-search is performed. In particular, one must determine how the backtracking is performed. Options include: 1) *Constant reduction.* The step-length `l` is decreased by a constant factor `-1 < c < 1`. Negative factors allow for steps in the opposite direction to be attempted. 2) *Cubic fit.* Here we fit a cubic polynomial through function $g(l) = \norm{G(x+l\delta_x)}^2$ and extrapolate to find the minimum. An option is to assume that $\delta_x = -\mat{J}^{-1}G(x)$ which fixes the slope $g'(0) = -2g(0)$ (thereby requiring only previous two points), or to use the previous three points. Parameters must be provided to limit the step size to prevent `l` from decreasing too slowly (spending too much time in the line search) or too quickly (resulting in extremely small step sizes). """ def _get_skip_inds(self, problem, solver_data): """Return `skip_inds`. Returns ------- skip_inds : list The indices that should be skipped for this iteration. x1, F1 : array Function evaluated at `F1 = F(x1)`. """ x0 = solver_data.x0 g0 = solver_data.g0 x1 = solver_data.x g1 = solver_data.g sensitive_inds = np.asarray(problem.state0.sensitive, dtype=int) if g0 is None or g1 is None: # Need at least two steps before we vary sensitive parameters skip_inds = sensitive_inds else: err0_s = np.abs(g0[sensitive_inds]) err1_s = np.abs(g1[sensitive_inds]) skip_inds = sensitive_inds[ np.where(abs(err1_s - err0_s)/abs(err1_s) >= self.sensitive_rel_tol)] abs_err = copy(g1) abs_err[sensitive_inds] = 0 rel_err = abs_err/(abs(x1) + _TINY) err = np.minimum(abs_err/self.sensitive_abs_tol, rel_err/self.sensitive_rel_tol) if err.max() < 1: # Things have started to converge. Only skip bad parameters. pass else: skip_inds = sensitive_inds #print skip_inds, x1[-1] return skip_inds def iterate(self, x, problem, solver_data, x0=None, G0=None, dx0=None, F0=None, mesgs=_MESGS): r"""Perform a single iteration and return the triplet `(x_new, (x, G, dx, F))`. The line-search strategy is dictated by :attr:`ls_strategy` which may take on one of the following values. 'pure_broyden' : Perform pure Broyden steps irrespective of whether or not they increase the error. Termination occurs if more than :attr:`ls_n_bad_step` bad steps are taken. '1' : Notes ----- In order to enjoy the good convergence properties of the Broyden methods, we first try a full Broyden step (unless The big concern with Broyden methods is that the approximate Jacobian inverse may be inaccurate. To deal with this, we use :meth:`_line_search_J` which does a modified line search until a reasonable approximation of the Jacobian is obtained. """ #import pdb;pdb.set_trace() G1 = dx1 = None if self.reset_Jinv: solver_data.j_inv_F.reset() self.reset_Jinv = False skip_inds = np.asarray(problem.state0.sensitive, dtype=int) if (solver_data.x is None or solver_data.g is None): # First iteration: Here we first make an evaluation, and # then perform a line-search to update the Jacobian. # We use self._extrapolate as we have no base point yet. def extrapolate(xc): return self._extrapolate(x=x, x0=None, x_controls=xc) x0, F0 = self._problem_step( x, x0=None, F0=None, problem=problem, solver_data=solver_data, extrapolate=extrapolate, mesgs=mesgs, skip_inds=skip_inds) self.set_point(j_inv=solver_data.j_inv_F, x=x0, g=x0 - F0, skip_inds=skip_inds) x = F0 elif x0 is None or F0 is None: # Restarting from a saved solution, so use old stored # values. x0 = solver_data.x F0 = x0 - solver_data.g x = F0 else: # Subsequent steps: pass if (np.linalg.norm((x0 - F0)/problem.G_scale, ord=self.norm_p) < self.G_tol): # Converged. Don't need any more. x1 = x0 F1 = F0 G1 = dx1 = None x = F1 return (x, (x1, G1, dx1, F1)) skip_inds = self._get_skip_inds(problem, solver_data) if self.ls_strategy == 'pure_broyden' and True: x1, F1 = self.line_search_broyden(x=x, x0=x0, F0=F0, problem=problem, solver_data=solver_data, mesgs=mesgs, skip_inds=skip_inds) elif self.ls_strategy == 'pure_broyden' and False: g0 = np.linalg.norm((x0 - F0)/problem.G_scale, ord=self.norm_p) g1 = np.inf step_found = False for n in xrange(self.ls_n_bad_steps): mesgs.iter( "Trying %i of %i broyden steps: " % (n, self.ls_n_bad_steps) + "Error %g >= %g" % (g1, g0)) x1, F1 = self.line_search_broyden(x=x, x0=x0, F0=F0, problem=problem, solver_data=solver_data, mesgs=mesgs, skip_inds=skip_inds) G1 = x1 - F1 self.set_point(j_inv=solver_data.j_inv_F, x=x1, g=G1, skip_inds=skip_inds) x = solver_data.j_inv_F.get_x( x_controls=[skip_inds, x[skip_inds]], min_step=self.min_abs_step, max_step=self.max_step) g1 = np.linalg.norm((x1 - F1)/problem.G_scale, ord=self.norm_p) if g1 < g0: step_found = True break if not step_found and not self.ls_allow_bad_steps: msg = ("No downhill step found after ls_n_bad_steps" + " = %i broyden steps" % (self.ls_n_bad_steps,)) mesgs.error(msg) raise solver_utils.NoDescentError(msg) elif self.ls_strategy == 'default': if (0 < len(problem.state0.sensitive) and 0 == len(skip_inds)): # We have sensitive parameters but all parameters are # good so we record the good step, then search mesgs.info("Good step found. Saving...") solver_data.x_good_0 = solver_data.x_good_1 solver_data.g_good_0 = solver_data.g_good_1 solver_data.x_good_1 = x0 solver_data.g_good_1 = x0 - F0 mesgs.info("Good step found. Finding new step " + "including sensitive parameters...") ls_min_step = self.ls_min_step allow_bad_step = ( self.ls_allow_bad_steps and solver_data.iteration_number < self.ls_n_bad_steps) x1, F1 = self.line_search(x=x, x0=x0, F0=F0, problem=problem, solver_data=solver_data, mesgs=mesgs, min_step=self.ls_min_step, skip_inds=skip_inds, allow_bad_step=allow_bad_step ) if (0 < len(problem.state0.sensitive) and 0 == len(skip_inds) and solver_data.x_good_0 is not None): # We have sensitive parameters but all parameters were # good. The line_search may have messed up the # Jacobian a bit, so we use the previous good step (if # it exists) to perform an update without any mesgs.info("Updating Jacobian with previous good step...") self.set_point( j_inv=solver_data.j_inv_F, x=solver_data.x_good_1, g=solver_data.g_good_1, force=True) elif self.ls_strategy == 'jacobian': x1, F1 = self.line_search_jacobian(x=x, x0=x0, F0=F0, problem=problem, solver_data=solver_data, min_step=self.ls_min_step, mesgs=mesgs, skip_inds=skip_inds) elif self.ls_strategy == 'linear': x1, F1 = self.line_search_linear(x=x, x0=x0, F0=F0, problem=problem, solver_data=solver_data, min_step=self.ls_min_step, mesgs=mesgs, skip_inds=skip_inds) elif self.ls_strategy == 'non-linear': x1, F1 = self.line_search_non_linear(x=x, x0=x0, F0=F0, problem=problem, solver_data=solver_data, min_step=self.ls_min_step, mesgs=mesgs, skip_inds=skip_inds) elif self.ls_strategy == 'two_step': ls_min_step = self.ls_linear_min_step if ls_min_step is None: ls_min_step = self.ls_min_step*100 try: x1, F1 = self.line_search_linear(x=x, x0=x0, F0=F0, problem=problem, solver_data=solver_data, mesgs=mesgs, min_step=ls_min_step, skip_inds=None) except solver_utils.NoDescentError: if True: if self.verbosity >= 10: mesgs.append("Trying bad step...") dx = x - x0 x1, F1 = self.line_search_broyden( x=x0 + dx/2**self.ls_max, x0=x0, F0=F0, problem=problem, solver_data=solver_data, mesgs=mesgs, skip_inds=None) else: x1_ = x # Try once again without linear algorithm. if self.verbosity >= 10: mesgs.append("No downhill step found: " + "Trying non-linear line-search...") try: x1, F1 = self.line_search_non_linear( x=x1_, x0=x0, F0=F0, problem=problem, solver_data=solver_data, mesgs=mesgs, min_step=self.ls_min_step, skip_inds=None) except solver_utils.NoDescentError: # Try resetting Jacobian. if self.verbosity >= 10: mesgs.append("No downhill step found: " + "Trying to reset Jacobian...") solver_data.j_inv_F.reset(m0=None) x1, F1 = self.line_search_non_linear( x=x1_, x0=x0, F0=F0, problem=problem, solver_data=solver_data, mesgs=mesgs, min_step=self.ls_min_step, skip_inds=None) else: raise ValueError("Line search strategy %s not supported" % (self.ls_strategy,)) self.set_point(j_inv=solver_data.j_inv_F, x=x1, g=x1 - F1, skip_inds=skip_inds) x_new = solver_data.j_inv_F.get_x(x_controls=[skip_inds, x[skip_inds]], min_step=self.min_abs_step, max_step=self.max_step) solver_data.skip_0 = skip_inds solver_data.g0 = solver_data.g solver_data.x0 = solver_data.x return (x_new, (x1, G1, dx1, F1)) G0 = x0 - F0 dx = x - x0 dx_norm = np.linalg.norm(dx) x_F_new = [None, None] g0 = np.linalg.norm(G0/problem.G_scale, ord=self.norm_p)**2 if g0 == 0: return (x0, (x0, G1, dx1, F0)) ls_linear = self.ls_linear def g(l, dx=dx): """Function passed to line search algorithm.""" if not ls_linear: # Get new step from broyden dx = solver_data.j_inv_F.get_x(min_step=self.min_abs_step, max_step=self.max_step) - x0 #dx *= dx_norm/np.max(dx_norm, np.linalg.norm(dx)) _x = x0 + l*dx x_F_new[:] = self._problem_step( _x, x0=x0, F0=F0, problem=problem, solver_data=solver_data, mesgs=mesgs) x1, F1 = x_F_new _G = x1 - F1 _g = np.linalg.norm(_G/problem.G_scale, ord=self.norm_p)**2 return _g try: ls_min_step = self.ls_min_step if ls_linear: ls_min_step = self.ls_linear_min_step if ls_min_step is None: ls_min_step = self.ls_min_step*100 l, g_l = solver_utils.line_search( g=g, g0=g0, avg_descent_factor=self.ls_avg_descent_factor, min_step=ls_min_step, min_scale_factor=self.ls_min_scale_factor, max_scale_factor=self.ls_max_scale_factor, max_steps=self.ls_max, allow_backstep=self.ls_allow_backstep, mesgs=mesgs) except solver_utils.NoDescentError: if ls_linear: # Try once again without linear algorithm. if self.verbosity >= 10: mesgs.append("No downhill step found: " + "Trying non-linear line-search...") ls_linear = False # for n in xrange(11): # try: # l, g_l = solver_utils.line_search( # g=g, g0=g0, # avg_descent_factor=self.ls_avg_descent_factor, # min_step=self.ls_min_step, # min_scale_factor=self.ls_min_scale_factor, # max_scale_factor=self.ls_max_scale_factor, # max_steps=self.ls_max, # mesgs=mesgs) # except solver_utils.NoDescentError: # if n < 10: # continue # else: # raise try: l, g_l = solver_utils.line_search( g=g, g0=g0, avg_descent_factor=self.ls_avg_descent_factor, min_step=self.ls_min_step, min_scale_factor=self.ls_min_scale_factor, max_scale_factor=self.ls_max_scale_factor, max_steps=self.ls_max, allow_backstep=self.ls_allow_backstep, mesgs=mesgs) except solver_utils.NoDescentError: # Try resetting Jacobian. if self.verbosity >= 10: mesgs.append("No downhill step found: " + "Trying to reset Jacobian...") solver_data.j_inv_F.reset(m0=None) l, g_l = solver_utils.line_search( g=g, g0=g0, avg_descent_factor=self.ls_avg_descent_factor, min_step=self.ls_min_step, min_scale_factor=self.ls_min_scale_factor, max_scale_factor=self.ls_max_scale_factor, max_steps=self.ls_max, allow_backstep=self.ls_allow_backstep, mesgs=mesgs) else: raise x1, F1 = x_F_new x_new = solver_data.j_inv_F.get_x( x_controls=[skip_inds, x[skip_inds]], min_step=self.min_abs_step, max_step=self.max_step) mesgs.append("Abs Error: %g" % ( np.linalg.norm((x1 - F1)/problem.G_scale, ord=self.norm_p))) #solver_data.x = x1 #solver_data.g = x1 - F1 return (x_new, (x1, G1, dx1, F1)) ########################################################## # Required methods: These need to be defined by subclasses. def _step(self, problem, x, x0, F0, solver_data, mesgs): """Return `(x1, F1, skip_inds)`. We intervene here to ensure that the error decreases with each step (otherwise we use a weighted step.) We use one of the following algorithms. A) First we compute `x1` and `G1` corresponding to the full Brodyen step A) 1) Try full Broyden. 2) If not okay, try half step. 1) Try full Broyden step. If this reduces the error, then accept. 2) If not, then check for sensitive parameters. These should only be changed if the error in the component of G for these has stabilized. 2) If the error increases by too much, then check if there are any sensitive parameters. If so, then try holding these fixed. 3) If this fails, then try taking a weighted step (if allowed). 4) If this fails, then perform a line search. """ return self._problem_step( x, x0=None, F0=None, problem=problem, solver_data=solver_data, mesgs=mesgs) print x, x0 import pdb;pdb.set_trace() if self.verbosity >= 10: mesgs.append("Trying Broyden step...") x1, F1 = self._problem_step( x, x0=None, F0=None, problem=problem, solver_data=solver_data, mesgs=mesgs) g0, dx0 = G0_dx # Now check sensitive parameters sensitive_inds = np.asarray(problem.state0.sensitive, dtype=int) skip_inds = [] # Indices to skip if 0 < len(sensitive_inds): skip_inds = self._get_skip_inds(problem=problem, solver_data=solver_data) x_controls = (skip_inds, x0[skip_inds]) if self.verbosity >= 10: mesgs.append("Holding %i sensitive parameters fixed." % (len(skip_inds),)) x1 = solver_data.j_inv_F.get_x( x_controls=x_controls, min_step=self.min_abs_step, max_step=self.max_step) x1, F1 = self._problem_step( x, x0=None, F0=None, problem=problem, solver_data=solver_data, mesgs=mesgs, skip_inds=skip_inds) g0_ = copy(g0) G1_ = copy(G1) g0_[skip_inds] = 0 G1_[skip_inds] = 0 err0 = np.abs(g0_).max() err1 = np.abs(G1_).max() if err1 < err0: # Error decreased, done. if self.verbosity >= 10: mesgs.append("Broyden step succeeded.") return (x1, G1, skip_inds) if len(skip_inds) < len(sensitive_inds): # First try holding all sensitive parameters fixed (if this # has not already been done) x_controls = (sensitive_inds, x0[sensitive_inds]) if self.verbosity >= 10: mesgs.append("Trying sensitive step") # Should we maybe set skip_inds = sensitive_inds here? Or # possibly recompute skip_inds based on new update? skip_inds = sensitive_inds x1 = x + solver_data.j_inv_F.get_x( x_controls=x_controls, min_step=self.min_abs_step, max_step=self.max_step) x1, F1 = self._problem_step( x, x0=None, F0=None, problem=problem, solver_data=solver_data, mesgs=mesgs, skip_inds=skip_inds) # Check errors: g0_ = copy(g0) G1_ = copy(G1) g0_[skip_inds] = 0 G1_[skip_inds] = 0 err0 = np.abs(g0_).max() err1 = np.abs(G1_).max() err0 = np.abs(g0).max() err1 = np.abs(G1).max() if err1 < err0: # Error decreased, done. if self.verbosity >= 10: mesgs.append("Sensitive step succeeded.") return (x1, G1, skip_inds) step_ok = err1 < err0*self.bad_step_factor if (not step_ok and self.max_weighted_step > 0 and self.try_weighted_step): # Try a weighted step if self.verbosity >= 10: mesgs.append("Trying weighted step") x1 = x1 - self._correct_step(self.weight*G1, max_dx=min(self.max_step, self.max_weighted_step)) x1, F1 = self._problem_step( x, x0=None, F0=None, problem=problem, solver_data=solver_data, mesgs=mesgs, skip_inds=skip_inds) G1_[skip_inds] = 0 err1 = np.abs(G1_).max() step_ok = err1 <= self.bad_step_factor*err0 if step_ok: if self.verbosity >= 10: mesgs.append("Weighted step succeeded.") return (x1, G1, skip_inds) if 0 < self.ls_max: # Do line search if self.verbosity >= 10: mesgs.append("Doing line search") x1, G1 = self._line_search(x0, x1, G1_, err0, err1, skip_inds, solver_data, problem, mesgs) return (x1, G1, skip_inds) def _step(self, problem, x, x0, F0, solver_data, mesgs): x1, F1 = self._problem_step( x, x0=None, F0=None, problem=problem, solver_data=solver_data, mesgs=mesgs) return x1, F1, np.array([], dtype=int) return self._line_search_d(problem, x, x0, G0_dx, solver_data, mesgs) def _problem_step(self, x1, x0, F0, problem, solver_data, mesgs, skip_inds=None, extrapolate=None): r"""Return result of `problem.step`, updating the Jacobian if the step is not too big.""" if extrapolate is None: def extrapolate(x_controls): return solver_data.x + solver_data.j_inv_F.get_x( x_controls=x_controls, min_step=self.min_abs_step, max_step=self.max_step) if skip_inds is None: skip_inds = np.array([], dtype=int) if x0 is not None: x1 = x0 + self._correct_step(x1 - x0, max_dx=self.max_step) (x1, _G1, _dx1, F1) = self.problem_step( problem=problem, solver_data=solver_data, x=x1, x0=x0, G0=None, dx0=None, F0=F0, compute_F=True, extrapolate=extrapolate, iter_data=solver_data.iter_data, abs_tol=self.G_tol, mesgs=mesgs) if x0 is not None and np.linalg.norm(x1 - x0) <= self.max_step: solver_data.j_inv_F.update( x=x1, g=x1 - F1, skip_inds=skip_inds, mags=(abs(x0) + abs(x1) + abs(F0) + abs(F1))) return x1, F1
[docs]def tst(): from solver_interface import Problem, BareState class Sqrt(Problem): '''Iterative solution to sqrt(n).''' _state_vars = [('n', Required, 'To compute sqrt(n)'), ('state0', Computed)] process_vars() def __init__(self, *v, **kw): self.n = np.array(self.n) self.state0 = self.get_initial_state() def get_initial_state(self): return BareState(x_=copy(self.n)) def F(self, x, iter_data=None, abs_tol=None, rel_tol=None, mesgs=_MESGS): return x - (x*x - self.n) def G(self, x, compute_dx=True, iter_data=None, abs_tol=None, rel_tol=None, mesgs=_MESGS): return (x*x - self.n), -(x*x - self.n)/(2*x) def Jinv(self, x): return np.diag(1/(2*x)) def step(self, x, x0=None, G0=None, dx0=None, F0=None, compute_G=False, compute_dx=False, compute_F=False, extrapolate=None, iter_data=None, abs_tol=1e-12, rel_tol=1e-12, mesgs=_MESGS): 'Keep x positive' print x x1 = x okay = lambda x: all(x >= 0) if not okay(x1): x_controls = zip(*[(i_, abs(x_)) for i_, x_ in enumerate(x1) if x_ < 0]) x1 = extrapolate(x_controls) if not okay(x1): x1 = abs(x1) F1 = self.F(abs(x), None, abs_tol, rel_tol, mesgs) G1 = dx1 = None return (x1, G1, dx1, F1) x_tol = 0.01 G_tol = 0.01 solver = Broyden(x_tol=x_tol, G_tol=G_tol) problem = Sqrt(n=[0.01, 1.0, 100.0]) state0 = problem.get_initial_state() print("Newton iterations...") x = 1*state0.x_ for _ in xrange(5): x += problem.G(x)[1] print x print print("Straight iterations...") x = 1*state0.x_ for _ in xrange(10): x = problem.F(x) print x print print("Straight broyden...") import broyden x = 1*state0.x_ b = broyden.Broyden(x0=x, G0=x - problem.F(x)) for _ in xrange(21): b.update(b.x - problem.F(b.x)) print b.x x = 1*state0.x_ b = broyden.Broyden(x0=x, G0=x - problem.F(x)) for _ in xrange(21): b.update(b.x - problem.F(b.x)) print b.x print("Our Broyden...") solver.ls_linear = True state, niter = solver.solve(problem, state0) print("Our Broyden: %i" % niter)
[docs]class Broyden(_Broyden): """This solver uses the simple Broyden algorithm. Examples -------- >>> from solver_interface import Problem, BareState >>> class Sqrt(Problem): ... '''Iterative solution to sqrt(n).''' ... _state_vars = [('n', Required, 'To compute sqrt(n)'), ... ('state0', Computed)] ... process_vars() ... def __init__(self, *v, **kw): ... self.n = np.array(self.n) ... self.state0 = self.get_initial_state() ... def get_initial_state(self): ... return BareState(x_=copy(self.n)) ... def F(self, x, iter_data=None, abs_tol=None, rel_tol=None, ... mesgs=_MESGS): ... return x - (x*x - self.n) ... def Jinv(self, x): ... return np.diag(1/(2*x)) ... def step(self, x, x0=None, G0=None, dx0=None, F0=None, ... compute_G=False, compute_dx=False, compute_F=False, ... extrapolate=None, iter_data=None, ... abs_tol=1e-12, rel_tol=1e-12, mesgs=_MESGS): ... 'Keep x positive' ... x1 = x ... okay = lambda x: all(x >= 0) ... if not okay(x1): ... x_controls = zip(*[(i_, abs(x_)) ... for i_, x_ in enumerate(x1) ... if x_ < 0]) ... x1 = extrapolate(x_controls) ... if not okay(x1): ... x1 = abs(x1) ... F1 = self.F(abs(x), None, abs_tol, rel_tol, mesgs) ... G1 = dx1 = None ... return (x1, G1, dx1, F1) >>> x_tol = 0.01 >>> G_tol = 0.01 >>> solver = Broyden(x_tol=x_tol, G_tol=G_tol, ls_max=100, ... ls_strategy='default', dyadic=2, n_prev=2) >>> problem = Sqrt(n=[0.01, 1.0, 100.0]) >>> state0 = problem.get_initial_state() >>> state, niter = solver.solve(problem, state0) >>> niter # Newton takes 5, straight Broyden takes 21 10 >>> abs(state.x_[0] - 0.1) < 1.2*x_tol True >>> abs(state.x_[1] - 1.0) < 1.2*x_tol True >>> abs(state.x_[2] - 10.0) < x_tol True >>> G, dx = problem.G(state.x_) >>> abs(G).max() < G_tol True """
[docs]class ModifiedBroyden(_Broyden): """This solver uses the Modifed Broyden algorithm. >> from solver_interface import Problem, BareState >> class Sqrt(Problem): ... '''Iterative solution to sqrt(n).''' ... _state_vars = [('n', Required, 'To compute sqrt(n)')] ... process_vars() ... def __init__(self, *v, **kw): self.n = np.array(self.n) ... def get_initial_state(self): ... return BareState(x_=copy(self.n)) ... def G(self, x, iter_data, abs_tol, rel_tol, mesgs): ... return x*x - self.n ... 'Keep x positive' ... x1 = x ... okay = lambda x: all(x >= 0) ... if not okay(x1): ... x_control = zip(*[(i_, abs(x_)) ... for i_, x_ in enumerate(x1) ... if x_ < 0]) ... x1 = extrapolate(x_controls) ... if not okay(x1): ... x1 = abs(x1) ... G1 = self.G(abs(x), None, abs_tol, rel_tol, mesgs) ... return (x1, G1) >> abs_xtol = 0.01 >> rel_xtol = 0.01 >> solver = ModifiedBroyden(abs_xtol=abs_xtol, rel_xtol=rel_xtol) >> problem = Sqrt(n=[0.01, 1.0, 100.0]) >> state0 = problem.get_initial_state() >> state, niter = solver.solve(problem, state0) >> abs(state.x_[0] - 0.1) < abs_xtol True >> abs(state.x_[1] - 1.0) < abs_xtol True >> abs(state.x_[2] - 10.0) < abs_xtol True >> abs(state.x_[2] - 10.0)/10.0 < rel_xtol True """ _state_vars = [('alpha', 1.0), ('W', None), ('DIIS', 10, "Size of DIIS subspace"), ('mmf_method', True, "Use my DIIS method rather than the usual.")] process_vars() def _get_broyden(self, x, gx): """Return an initialized broyden object""" return broyden.ModifiedBroyden(x0=x, G0=gx, alpha=self.alpha, W=self.W, DIIS=self.DIIS, mmf_method=self.mmf_method)
[docs]class ExtendedBroydenData(BroydenData): """Container for Extended Broyden data. The core for this method is defined in :class:`mmf.solve.JinvExtendedBroydenFull` and a proxy object is used to maintain the state and compute the updates. """ implements(solver_interface.ISolverData) _ignore_name_clash = set(['j_inv_F', 'dyadic']) _state_vars = [ ('j_inv_F.dyadic', Deleted), ('j_inv_F', Delegate(broyden.JinvExtendedBroydenFull, ['xs', 'gs', 'j_inv0', 'n_prev'])), ('dyadic', False), ] process_vars() def __init__(self, *v, **kw): if self.dyadic: raise NotImplementedError( "Extended Broyden only supports `dyadic=False`") def change_basis(self, f, f_inv_t=None): """Use the function `x_new = f(x)` to update the data corresponding to a change of basis. Raise an :except:`AttributeError` if the change cannot be implemented (or don't define this method.) Parameters ---------- f : function Change of basis transformation (should be essentially matrix multiplication, but for large bases one might want to prevent forming this matrix explicitly.) f_inv_t : function, optional Transpose of the inverse of the function. If this is provided, then it should be used to transform the bra's, otherwise, use `f`. *Presently ignored!* """ self.j_inv_F.change_basis(f)
[docs]class ExtendedBroyden(_Broyden): """This solver uses the Extended Broyden algorithm with dynamic restarts toward the specified `j_inv0` when the algorithm starts to diverge. .. note:: In order for this to work reasonably, you must provide a reasonable estimate for :attr:`x_scale` which sets the typical scale over which the function `G(x)` is linear. If not provided, then :attr:`IProblem.x_scale` will be used. >>> from solver_interface import Problem, BareState >>> from mmf.objects import ClassVar >>> class Sqrt(Problem): ... '''Iterative solution to sqrt(n).''' ... _no_name_clash = set(['state0']) ... _state_vars = [('n', Required, 'To compute sqrt(n)'), ... ('state0', Computed)] ... process_vars() ... def __init__(self, *v, **kw): ... self.n = np.array(self.n) ... self.state0 = self.get_initial_state() ... def get_initial_state(self): ... return BareState(x_=copy(self.n)) ... def F(self, x, iter_data=None, abs_tol=None, rel_tol=None, ... mesgs=[]): ... return x - (x*x - self.n) ... def Jinv(self, x): ... return np.diag(1/(2*x)) ... def step(self, x, x0=None, G0=None, dx0=None, F0=None, ... compute_G=False, compute_dx=False, compute_F=False, ... extrapolate=None, iter_data=None, ... abs_tol=1e-12, rel_tol=1e-12, mesgs=[]): ... 'Keep x positive' ... x1 = x ... okay = lambda x: all(x >= 0) ... if not okay(x1): ... x_controls = zip(*[(i_, abs(x_)) ... for i_, x_ in enumerate(x1) ... if x_ < 0]) ... x1 = extrapolate(x_controls) ... if not okay(x1): ... x1 = abs(x1) ... F1 = self.F(abs(x), None, abs_tol, rel_tol, mesgs) ... G1 = dx1 = None ... return (x1, G1, dx1, F1) >>> abs_xtol = 0.01 >>> rel_xtol = 0.01 >>> solver = ExtendedBroyden(x_tol=abs_xtol, x_tol_rel=rel_xtol, ... no_restart=True) >>> problem = Sqrt(n=[0.01, 1.0, 100.0]) >>> state0 = problem.get_initial_state() >>> state, niter = solver.solve(problem, state0) >>> abs(state.x_[0] - 0.1) < abs_xtol True >>> abs(state.x_[1] - 1.0) < abs_xtol True >>> abs(state.x_[2] - 10.0) < abs_xtol True >>> abs(state.x_[2] - 10.0)/10.0 < rel_xtol True """ _state_vars = [ ('dyadic', False), ('solver_data0', Delegate(ExtendedBroydenData, ['j_inv0', 'n_prev'])), ('method=solver_data0.j_inv_F.method'), ('solver_data0.dyadic', Deleted), ('solver_data0.n_prev', 10), ('n_bad', 10,\ r"""Number of bad steps to allow. When, after this many steps, the best solution has not been improved, then we bail."""), ('alpha=solver_data0.j_inv0', 0.5,\ r"""Linear mixing parameter. If bad steps are taken, then the Jacobian is pulled back to that which will give a linear mixing $x \mapsto (1-\alpha) + \alpha F(x)$."""), ('x_scale', None,\ r"""Typical scale (or vector of scales for each component) over which the function `G(x)` is approximately linear. This is important for ensuring that the dynamic restart mechanism only restarts components from far away. If `None`, then :attr:`IProblem.x_scale` will be used."""), ('no_restart', False,\ r"""If `True` then dynamic restarts are not used. Set this if there is no good approximation for `j_inv0`."""), ] process_vars()
[docs] def __init__(self, *v, **kw): # Ensure we keep at least one more point than n_bad self.solver_data0.n_prev = max( self.solver_data0.n_prev, self.n_bad + 1)
[docs] def iterate(self, x, problem, solver_data, x0=None, G0=None, dx0=None, F0=None, mesgs=_MESGS): r"""Perform a single iteration and return the triplet `(x_new, (x, G, dx, F))`. The line-search strategy is dictated by :attr:`ls_strategy` which may take on one of the following values. 'pure_broyden' : Perform pure Broyden steps irrespective of whether or not they increase the error. Termination occurs if more than :attr:`ls_n_bad_step` bad steps are taken. '1' : """ G1 = dx1 = None j_inv = solver_data.j_inv_F # Set j_inv0 j0_diag = np.asarray(problem.j0_diag) if j0_diag.shape == (): j0_diag = np.ones(len(x), dtype=float)*j0_diag j_inv.j_inv0 = self.alpha*np.diag( np.divide(1, j0_diag)) skip_inds = np.asarray(problem.state0.sensitive, dtype=int) if (solver_data.x is None or solver_data.g is None): # First iteration: Here we first make an evaluation, and # then perform a line-search to update the Jacobian. # We use self._extrapolate as we have no base point yet. def _extrapolate(xc): return self._extrapolate(x=x, x0=None, x_controls=xc) x0, F0 = self._problem_step( x, x0=None, F0=None, problem=problem, solver_data=solver_data, extrapolate=_extrapolate, mesgs=mesgs, skip_inds=skip_inds) G0 = x0 - F0 j_inv.set_point(x=x0, g=G0, skip_inds=skip_inds, mags=abs(G0)) x = F0 elif x0 is None or F0 is None: # Restarting from a saved solution, so use old stored # values. x0 = solver_data.x F0 = x0 - solver_data.g x = F0 else: # Subsequent steps: pass if (np.linalg.norm((x0 - F0)/problem.G_scale, ord=self.norm_p) < self.G_tol): # Converged. Don't need any more. x1 = x0 F1 = F0 G1 = dx1 = None x = F1 return (x, (x1, G1, dx1, F1)) skip_inds = self._get_skip_inds(problem, solver_data) # Now take a step: #x1, F1 = self.line_search_broyden(x=x, x0=x0, F0=F0, # problem=problem, # solver_data=solver_data, # mesgs=mesgs, # skip_inds=skip_inds) G0 = x0 - F0 j_inv.update(x0=x0, g0=G0) def _extrapolate(x_controls): return j_inv.get_x(x_controls=x_controls, min_step=self.min_abs_step, max_step=self.max_step) (x1, _G1, _dx1, F1) = self.problem_step( problem=problem, solver_data=solver_data, x=x, x0=x0, G0=None, dx0=None, F0=F0, compute_F=True, extrapolate=_extrapolate, iter_data=solver_data.iter_data, abs_tol=self.G_tol, mesgs=mesgs) G1 = x1 - F1 # Check if the step is better than the best previous solution. If not # then see how far back the best previous solution is. If it is more # than n_bad_steps back then bail or start something more drastic like # line searches. if self.n_bad <= len(solver_data.xs): errs = [np.linalg.norm(_g/problem.G_scale, ord=self.norm_p) for _g in solver_data.gs[-self.n_bad:]] errs = np.array(errs) min_ind = np.argmin(errs) if 0 == min_ind: # The best solution has been pushed until the n_bad'th # step. If we don't improve with this step, then we # fail (or do something else) curr_err = np.linalg.norm(G1/problem.G_scale, ord=self.norm_p) if errs[min_ind] < curr_err: x0 = solver_data.xs[-self.n_bad:][min_ind] G0 = solver_data.gs[-self.n_bad:][min_ind] F0 = x0 - G0 j_inv.set_point(x=x0, g=G0) x = j_inv.get_x(x_controls=[skip_inds, x[skip_inds]], min_step=self.min_abs_step, max_step=self.max_step) mesgs.iter( "Best state now n_bad=%i steps old." % (self.n_bad,) + "Trying line search...") try: if _DEBUG and max(abs(F0 - problem.F(x0))) > 1e-12: import pdb;pdb.set_trace() (x1, F1) = self.line_search_linear( x=x, x0=x0, F0=F0, problem=problem, solver_data=solver_data, min_step=self.min_abs_step, mesgs=mesgs, skip_inds=skip_inds, extrapolate=_extrapolate) G1 = x1 - F1 except solver_utils.NoDescentError, err: msg = "\n".join( [err.message, "Line search failed after best solution" + " n_bad=%i steps old." % self.n_bad]) err.args = (msg,) + err.args[1:] raise else: pass else: # Still have some attempts pass # Check if the step is better than the previous step. If not, then # perform a dynamic restart. prev_err = np.linalg.norm( solver_data.gs[-1]/problem.G_scale, ord=self.norm_p) curr_err = np.linalg.norm( G1/problem.G_scale, ord=self.norm_p) if self.no_restart: restart=False else: restart = curr_err > prev_err x_scale = self.x_scale if x_scale is None: x_scale = problem.x_scale j_inv.set_point(x=x1, g=G1, restart=restart, x_scale=x_scale, skip_inds=skip_inds, mags=abs(G1)) x_new = j_inv.get_x(x_controls=[skip_inds, x[skip_inds]], min_step=self.min_abs_step, max_step=self.max_step) solver_data.skip_0 = skip_inds return (x_new, (x1, G1, dx1, F1))