r"""Iterative solvers based on the Broyden method.
This module contains methods for solving root-finding problems with
various Broyden algorithms.
The problem is expressed as `x = F(x)` where the Jacobian of `F(x)`
close to the solution should be close to zero (in other words,
the iteration should be as close to convergent as possible) because the
Broyden iterations always start with the approximation of an identity
`J`.
.. todo:: Check usage of `max_step`. This is now administered in
:meth:`Solver.solve` and should accept a vector.
"""
from __future__ import division
from copy import copy
import numpy
np = numpy
from mmf.objects import process_vars
from mmf.objects import Required, Delegate, Excluded, Computed, Deleted
import broyden
import solver_interface, solver_utils
from solver_interface import Mesg
from mmf.interfaces import implements
_ABS_TOL = 1e-12
_REL_TOL = 1e-12
_FINFO = np.finfo(float)
_TINY = _FINFO.tiny
_MESGS = Mesg() # Default object. Should generally not be used.
_DEBUG = False
[docs]class BroydenData(solver_interface.SolverData):
"""Container for Broyden data.
The core for this method is defined in
:class:`mmf.solve.JinvBroyden` and a proxy object is used to
maintain the state and compute the updates.
"""
implements(solver_interface.ISolverData)
_state_vars = [
('x0', None, "Previous value of `x`"),
('g0', None, "Previous value of `g = x - F(x)`"),
('skip_0', None, "Previous skipped parameters"),
('x_good_0', None,
r"""Previous good step at which some parameter became
bad in the sense that the change in the errors was
no-longer small. This is used as the reference point for
updating the Jacobian of sensitive parameters."""),
('g_good_0', None, "`g = x - F(x)` at `x_good_0`"),
('x_good_1', None, "Current good step (see :attr:`x_good_0`"),
('g_good_1', None, "`g = x - F(x)` at `x_good_1`"),
('skip_good', None,
r"""These are the parameters that were bad at the time
that `x_good` was saved or that have gone bad since."""),
('j_inv_F', Delegate(broyden.JinvBroyden,
['dyadic', 'x', 'g', 'n_prev'])),
('old_err', np.inf, "Previous absolute error."),
('dyadic=j_inv_F.dyadic', True),
('inv_jacobian_err', np.inf,
r"""This flag is used to indicate the quality of the
Jacobian. It represents a measure of the relative change
in the Jacobian inverse from the previous step. If this
is small (sufficiently less than 1), then the
approximation is probably reasonable for determining a
step. See :meth:`mmf.solve.broyden.JinvBroyden.update` for
details.""")
]
process_vars()
def change_basis(self, f, f_inv_t=None):
"""Use the function `x_new = f(x)` to update the data
corresponding to a change of basis. Raise an
:except:`AttributeError` if the change cannot be implemented
(or don't define this method.)
Parameters
----------
f : function
Change of basis transformation (should be essentially
matrix multiplication, but for large bases one might want
to prevent forming this matrix explicitly.)
f_inv_t : function, optional
Transpose of the inverse of the function. If this is
provided, then it should be used to transform the bra's,
otherwise, use `f`. *Presently ignored!*
"""
if self.x0 is not None:
self.x0 = f(self.x0)
self.g0 = None # This should be recalculated.
self.j_inv_F.change_basis(f)
class _Broyden(solver_interface.Solver):
"""Common interface for Broyden solvers.
Sensitive Parameters
--------------------
Sensitive parameters are those for which convergence of the other
parameters is required before the objective function corresponding to
these parameters is trustworthy.
To deal with these, we hold these fixed, performing a regular
search in the space of the parameters until the change in the
errors is small enough. This criterion is determined by
:meth:`_get_skip_inds` which returns the indices that should be
skipped.
Iteration proceeds in the sector without sensitive parameters
until the errors stabilize and non of the sensitive parameters
should be skipped. This state is stored as a "good" state `x_good_1`.
From this state a line-search is made including the sensitive
parameters until a suitable step is made that changes the
sensitive parameters. One question here is how should the
Jacobian be updated. We are presently taking the strategy that
if a reasonable approximation to the Jacobian at this good point
is found, the non-sensitive sector will still be okay (the
potential problem is that this block will be messed up during this
search).
Typically, after this step, some of the parameters will no longer
be good. If this is the case and there was a previous good state
`x_good_0`, then the difference between this and `x_good_1` is
used to do an update to correct the global structure of the
Jacobian.
Once this is done, we repeat, starting in the sector of
non-sensitive parameters.
"""
_state_vars = [
('bad_step_factor', 10,\
"""If the error increases by more than a factor of
:attr:`bad_step_factor` from the previous step, then a
weighted step is used. The default is a large value so
this does not happen much. If you know that the weighted
step will be in the correct direction, then make this
smaller. This must be greater than 1 otherwise the
method may loop forever.
"""),
('weight', 0.5,\
"""If the broyden step fails, then the new step is `x -
w*G(x)`."""),
('ls_avg_descent_factor', 1e-4,
r"""To ensure that we terminate, the line search algorithm
requires that each step descend with at least this slope,
otherwise the routine will terminate with an error
indicating that a local minimum was found rather than a
root."""),
('ls_min_scale_factor', 0.1,
r"""Minimum factor by which the step size is reduced in
the line search. See :func:`solver_utils.line_search`."""),
('ls_max_scale_factor', 0.5,
r"""Minimum factor by which the step size is reduced in
the line search. See
:func:`solver_utils.line_search`."""),
('ls_linear', True,
r"""If `True` then the line searches will be along a
line. This ensures that bad updates to the inverse
Jacobian are completely overwritten, but if the initial
direction is not downhill, then this can fail. As a
recourse, if the linear search fails, then one pass with
the non-linear search is attempted to see if the Jacobian
can be compensated. See also :attr:`ls_linear_min_step`.
If `False`, then new directions are used at each step as
the inverse Jacobian is updated. Hopefully this will lead
to an improved step that is ultimately downhill.
Previous bad updates to the inverse Jacobian may not be
completely erased."""),
('ls_allow_backstep', True,
r"""If `True`, then the second line search step (if the
first step fails) will be in the negative direction. This
may scale the problem inappropriately (especially if a
monotonic decrease in a control parameter is desired), in
which case one should set this to `False`."""),
('ls_min_step', 1e-12,
r"""To ensure termination, if the relative step size
becomes smaller than this, then the routine will terminate
with an error."""),
('ls_linear_min_step', None,
r"""If defined, then this will be the bailout criterion
for the linear line-search algorithm. This allows for a
much earlier bailout from the linear search leaving the
high-precision test and ultimate bailout for the
subsequent non-linear search.
In general, this should be somewhat larger than
:attr:`ls_min_step`. If it is not specified, then it is a
factor of 100 times larger than :attr:`ls_min_step`."""),
('ls_strategy', 'default',
r"""Choose the line-search strategy. See :meth:`iterate`
for possibilities."""),
('ls_n_bad_steps', 10,
r"""Maximum number of bad steps to try during a
line-search phase before giving up."""),
('ls_allow_bad_steps', False,
r"""Set to allow the line search to allow bad steps.
Used mainly for debugging, not for production use."""),
('max_weighted_step', np.inf,
"""Maximum length of the weighted steps (also respects
:attr:`max_step`). If this is zero, then weighted steps
are never used (except for the first step)."""),
('try_weighted_step', True,
"""If this is `True`, then a weighed step will be
attempted if the other steps fail. This should be `True`
if the iteration is naturally somewhat convergent
(i.e. if the objective function has the form `G(x) = x -
F(x)` where the iteration `x -> F(x)` is somewhat convergent) and
should be `False` if the objective function is unrelated
to the steps."""),
('initial_recession_factor', 0.2,\
"""When the error increases by more than
:attr:`bad_step_factor`, then the step is regressed
toward the previous step by this factor. The regression
factor is then further reduced by
:attr:`regression_factor`."""),
('recession_factor', 0.5,\
"""When the error increases by more than
:attr:`bad_step_factor`, then the step is regressed
toward the previous step by this factor until the error
is suitably reduced."""),
('j_inv_incompatibility', 0.1,
r"""Maximum incompatibility for an update of the inverse
Jacobian to be accepted. See :meth:`set_point`."""),
('solver_data0', Delegate(BroydenData, ['dyadic', 'n_prev'])),
('solver_data0.dyadic', 10),
('solver_data0.n_prev', 10),
('reset_Jinv', Excluded(False),\
"""If `True`, then on the next iteration, reset `Jinv`.
Only works once. This can be used in
:attr:`solver_interface.IProblem.schedule` to force the
Jacobian to be reset."""),
('ls_max', 10,
"""Maximum number of steps allowed during line search. If
the standard iteration fails to improve the solution, then
a line search may be used to find the local minimum in the
search direction. This can be used to limit or disable
this."""),
('sensitive_rel_tol', 1e-1,
r"""Sensitive parameters are only adjusted significantly if
the relative change in the error of those components is
less than this."""),
('sensitive_abs_tol', 1e-2,
r"""Sensitive parameters are only adjusted significantly if
the absolute change in the error of those components is
less than this."""),
('Jinv_tol', 1e-2,
r"""This is the tolerance required on the relative change
of the predicted step in order to ascertain that the
inverse Jacobian is accurate. See
:meth:`_line_search_d`."""),
]
process_vars()
def __init__(self, *v, **kw):
if (0 >= self.initial_recession_factor or
1 <= self.initial_recession_factor):
raise ValueError("initial_recession_factor must be in (0,1).")
if (0 >= self.recession_factor or
1 <= self.recession_factor):
raise ValueError("recession_factor must be in (0,1).")
solver_interface.Solver.__init__(self, *v, **kw)
def _correct_step(self, dx, max_dx=None):
"""Return a step that satisfies the constraints `max_dx`."""
if max_dx is None:
max_dx = self.max_step
length = np.linalg.norm(dx)
if length > max_dx:
dx *= max_dx/length
return dx
def set_point(self, j_inv, x, g, skip_inds=None,
force=False):
r"""Update the inverse Jacobian if the step is suitable
compatible and set the current point to `(x,g)`."""
j_inv.set_point(x=x, g=g,
skip_inds=skip_inds,
include_skip=False,
mags=abs(g))
def line_search_broyden(self, x, x0, F0, # x1, x0, F0, problem,
problem, solver_data, mesgs,
min_step=None, skip_inds=None,
extrapolate=None):
r"""Return `(x1, F1)` corresponding to a pure broyden step.
This version always takes the full broyden step `x` irrespective
of whether of not the step improves the solution.
"""
j_inv = solver_data.j_inv_F
G0 = x0 - F0
j_inv.update(x0=x0, g0=G0)
if extrapolate is None:
def extrapolate(x_controls):
return j_inv.get_x(x_controls=x_controls,
min_step=self.min_abs_step,
max_step=self.max_step)
if skip_inds is None:
skip_inds = np.array([], dtype=int)
(x1, _G1, _dx1, F1) = self.problem_step(
problem=problem, solver_data=solver_data,
x=x, x0=x0, G0=None, dx0=None, F0=F0,
compute_F=True, extrapolate=extrapolate,
iter_data=solver_data.iter_data, abs_tol=self.G_tol,
mesgs=mesgs)
self.set_point(j_inv=solver_data.j_inv_F,
x=x1, g=x1 - F1, skip_inds=skip_inds)
return x1, F1
def line_search_jacobian(self, x, x0, F0, # x1, x0, F0, problem,
problem, solver_data, min_step,
mesgs, skip_inds=None, extrapolate=None,
):
r"""Return `(x1, F1)`.
This version searches along a line until a good update to the
Jacobian is found along this direction. By restricting to a
linear search, poor updates to the Jacobian are completely
corrected. Once a good update for the Jacobian is found, the
recommended step is `(x1 + x0)/2`, half-way along the path
towards this point so that the new point has a good
approximation (the finite-difference approximation for the
derivative at this new point has the properties of a centered
difference). This may take the function uphill, but should
leave the iteration at a point where the Jacobian is a better
approximation.
"""
j_inv = solver_data.j_inv_F
G0 = x0 - F0
x1, F1 = self.line_search_broyden(
x=x, x0=x0, F0=F0, # x1, x0, F0, problem,
problem=problem, solver_data=solver_data,
mesgs=mesgs, skip_inds=skip_inds,
extrapolate=extrapolate)
l = 1
dx = x1 - x0
ndx = np.linalg.norm(dx)
min_l = min_step/ndx
while True:
G1 = x1 - F1
incompatibility = j_inv.update(
x=x0 + l*dx, g=G1, x0=x0, g0=G0,
skip_inds=skip_inds, mags=abs(F1) + abs(x1))
if incompatibility <= self.j_inv_incompatibility:
break
l /= 2
if l < min_l:
raise solver_utils.NoDescentError(
"""Could not find a good to updating Jacbian with
step size greater than min_step = %g""" % (min_step,))
(x1, _G1, _dx1, F1) = self.problem_step(
problem=problem, solver_data=solver_data,
x=x0 + l*dx, x0=x0, G0=None, dx0=None, F0=F0,
compute_F=True, extrapolate=None,
abs_tol=self.G_tol, mesgs=mesgs)
return x1, F1
def line_search_linear(self, x, x0, F0, # x1, x0, F0, problem,
problem, solver_data, min_step,
mesgs, skip_inds=None, extrapolate=None):
r"""Return `(x1, F1)` where the step size diminishes sufficiently
to ensure convergence. If a good step is not found, then an
:exc:`solver_utils.NoDescentError` exception is raised.
"""
G0 = x0 - F0
dx = x - x0
dx_norm = np.linalg.norm(dx)
x_F_new = [None, None]
g0 = np.linalg.norm(G0/problem.G_scale, ord=self.norm_p)**2
if g0 == 0:
return x0, F0
def g(l, dx=dx):
"""Function passed to line search algorithm."""
(x1_, _G1, _dx1, F1_) = self.problem_step(
problem=problem, solver_data=solver_data,
x=x0 + l*dx, x0=x0, G0=None, dx0=None, F0=F0,
compute_F=True, extrapolate=extrapolate,
abs_tol=self.G_tol, mesgs=mesgs)
x_F_new[:] = x1_, F1_
G_ = x1_ - F1_
g_ = np.linalg.norm(G_/problem.G_scale, ord=self.norm_p)**2
return g_
l, g_l = solver_utils.line_search(
g=g, g0=g0,
avg_descent_factor=self.ls_avg_descent_factor,
min_step=min_step,
min_scale_factor=self.ls_min_scale_factor,
max_scale_factor=self.ls_max_scale_factor,
max_steps=self.ls_max,
allow_backstep=self.ls_allow_backstep,
mesgs=mesgs)
x1, F1 = x_F_new
return x1, F1
def line_search_non_linear(self, x, x0, F0, # x1, x0, F0, problem,
problem, solver_data, min_step,
mesgs, skip_inds=None, extrapolate=None):
r"""Return `(x1, F1)` where the step size diminishes sufficiently
to ensure convergence. If a good step is not found, then an
:exc:`solver_utils.NoDescentError` exception is raised.
This is similar to the Linear search, but a new step is
computed with each update to the Jacobian and the search is
performed along this new direction. This has the advantage of
being able to find a good step even if the original direction
was not down-hill, but has the disadvantage that if the steps
are too large, then the Jacobian might be "polluted". It is a
good idea to use this routine if :meth:`ls_linear` has failed,
but to start with a small step to prevent polluting the
Jacobian.
"""
G0 = x0 - F0
#dx = x - x0
#dx_norm = np.linalg.norm(dx)
x_F_new = [None, None]
g0 = np.linalg.norm(G0/problem.G_scale, ord=self.norm_p)**2
if g0 == 0:
return x0, F0
def g(l):
"""Function passed to line search algorithm."""
dx = solver_data.j_inv_F.get_x(
min_step=self.min_abs_step,
max_step=self.max_step) - x0
#dx *= dx_norm/np.max(dx_norm, np.linalg.norm(dx))
(x1_, _G1, _dx1, F1_) = self.problem_step(
problem=problem, solver_data=solver_data,
x=x0 + l*dx, x0=x0, G0=None, dx0=None, F0=F0,
compute_F=True, extrapolate=extrapolate,
abs_tol=self.G_tol, mesgs=mesgs)
x_F_new[:] = x1_, F1_
G1_ = x1_ - F1_
solver_data.j_inv_F.add_point(x=x1_, g=G1_)
solver_data.j_inv_F.update(
x0=x0, g0=G0,
skip_inds=skip_inds,
mags=(abs(x0) + abs(x1_) + abs(F0) + abs(F1_)))
g_ = np.linalg.norm(G1_/problem.G_scale, ord=self.norm_p)**2
return g_
l, g_l = solver_utils.line_search(
g=g, g0=g0,
avg_descent_factor=self.ls_avg_descent_factor,
min_step=min_step,
min_scale_factor=self.ls_min_scale_factor,
max_scale_factor=self.ls_max_scale_factor,
max_steps=self.ls_max,
allow_backstep=self.ls_allow_backstep,
mesgs=mesgs)
x1, F1 = x_F_new
return x1, F1
def line_search(self, x, x0, F0, # x1, x0, F0, problem,
problem, solver_data, min_step,
mesgs, skip_inds=None, extrapolate=None,
allow_bad_step=False):
r"""Return `(x1, F1)` where the step size diminishes sufficiently
to ensure convergence. If a good step is not found, then an
:exc:`solver_utils.NoDescentError` exception is raised.
This uses a combination of the linear and non-linear
algorithms. First a linear search is performed until the
updates to the jacobian are consistent, then it switches to
the non-linear search which selects a new downhill direction.
Parameters
----------
skip_inds : None or array
Indices of parameters to hold fixed. If the problem
contains sensitive parameters as specified in
`problem.state0.sensitive`, then this must not be `None`.
"""
G0 = x0 - F0
dx = x - x0 # This needs to be mutated as it is used in
# extrapolation functions.
if skip_inds is not None and 0 < len(skip_inds):
x_controls = (skip_inds, [x0[skip_inds]])
dx[skip_inds] = 0.0
G0[skip_inds] = 0.0
else:
x_controls = None
dx_norm = np.linalg.norm(dx)
x_F_new = [None, None]
g0 = np.linalg.norm(G0/problem.G_scale, ord=self.norm_p)**2
if g0 == 0:
return x0, F0
d_ = {'incompatibility': np.inf} # Dictionary for persistent
# results
j_inv = solver_data.j_inv_F
extrapolate_J, extrapolate_dx = self._get_extrapolate(
x0=x0, G0=G0, j_inv=j_inv, dx =dx)
def g(l):
"""Function passed to line search algorithm."""
if d_['incompatibility'] <= self.j_inv_incompatibility:
dx[:] = j_inv.get_x(x_controls=x_controls,
min_step=self.min_abs_step,
max_step=self.max_step) - x0
l = abs(l)
extrapolate = extrapolate_J
else:
extrapolate = extrapolate_dx
(x1_, _G1, _dx1, F1_) = self.problem_step(
problem=problem, solver_data=solver_data,
x=x0 + l*dx, x0=x0, G0=None, dx0=None, F0=F0,
compute_F=True, extrapolate=extrapolate,
abs_tol=self.G_tol, mesgs=mesgs)
x_F_new[:] = x1_, F1_
G1_ = x1_ - F1_
#dx_ = x1_ - x0
#dg_ = G1_ - G0
d_['incompatibility'] = j_inv.incompatibility(
x=x1_, g=G1_, x0=x0, g0=G0)
j_inv.add_point(x=x1_, g=G1_)
j_inv.update(x0=x0, g0=G0,
skip_inds=skip_inds,
mags=(abs(x0) + abs(x1_) + abs(F0) + abs(F1_)))
G1_[skip_inds] = 0.0
g_ = np.linalg.norm(G1_/problem.G_scale, ord=self.norm_p)**2
return g_
analyze = False # **** Change This
if analyze:
def set_dx():
dx[:] = j_inv.get_x(min_step=self.min_abs_step,
max_step=self.max_step) - x0
@np.vectorize
def f(l, ord=self.norm_p):
(x1_, _G1, _dx1, F1_) = problem.step(
problem=problem, solver_data=solver_data,
x=x0 + l*dx, x0=x0, G0=None, dx0=None, F0=F0,
compute_F=True, extrapolate=extrapolate,
abs_tol=self.G_tol, mesgs=mesgs)
x_F_new[:] = x1_, F1_
G1_ = x1_ - F1_
print j_inv.update(
x=x1_, g=G1_, skip_inds=skip_inds,
mags=(abs(x0) + abs(x1_) + abs(F0) + abs(F1_)))
d_['dg_'] = G1_ - G0
d_['dx_'] = x1_ - x0
g_ = np.linalg.norm(G1_/problem.G_scale, ord=ord)**2
return g_
import pylab as plt
ls = np.linspace(-2,2,100)
ls1 = np.linspace(-0.2,0.2,100)
def pl(long=False, ord=2):
_f = plt.gcf()
plt.figure(2)
plt.plot(ls, g0 + (ls*ls - 2*ls)*g0, ':')
if long:
plt.plot(ls, f(ls, ord))
plt.axis([-2,2,0,5*g0])
else:
plt.plot(ls1, f(ls1, ord))
plt.axis([-0.2,0.2,0,3*g0])
plt.figure(_f.number)
def get_J():
J = problem._J(x0, lambda x: x - problem.F(x))
B = np.linalg.inv(J)
B0 = solver_data.j_inv_F.j_inv.asarray()
return B, B0
#import pdb;pdb.set_trace()
l, g_l = solver_utils.line_search(
g=g, g0=g0,
avg_descent_factor=self.ls_avg_descent_factor,
min_step=min_step,
min_scale_factor=self.ls_min_scale_factor,
max_scale_factor=self.ls_max_scale_factor,
max_steps=self.ls_max,
allow_backstep=self.ls_allow_backstep,
allow_bad_step=allow_bad_step,
mesgs=mesgs)
x1, F1 = x_F_new
return x1, F1
def _get_extrapolate(self, x0, G0, j_inv, dx):
"""Return `(extrapolate_J, extrapolate_dx)` where each is an
appropriate extrapolation function.
Parameters
----------
x0, G0 : array
Original point and `G0 = G(x0)` to extrapolate from.
j_inv : matrix-like
Inverse Jacobian. Calls `j_inv.get_x()`.
dx : array
Desired step used by simple extrapolation.
Returns
-------
extrapolate_J : function
Jacobian based extrapolation
extrapolate_dx : function
Simple extrapolation (no Jacobian used).
"""
def extrapolate_J(x_controls):
"""Use Jacobian to extrapolate."""
return j_inv.get_x(x_controls=x_controls,
min_step=self.min_abs_step,
max_step=self.max_step)
def extrapolate_dx(x_controls):
"""Default extrapolation if Jacobian is not good."""
return self._extrapolate(x=x0 + dx,
x0=x0, x_controls=x_controls)
return extrapolate_J, extrapolate_dx
def _line_search(self):
r"""Perform line search.
There are several strategies for line-searches available here.
A couple of generic options are provided controlling how the
line-search is performed. In particular, one must determine
how the backtracking is performed. Options include:
1) *Constant reduction.* The step-length `l` is decreased by
a constant factor `-1 < c < 1`. Negative factors allow for
steps in the opposite direction to be attempted.
2) *Cubic fit.* Here we fit a cubic polynomial through
function $g(l) = \norm{G(x+l\delta_x)}^2$ and extrapolate
to find the minimum. An option is to assume that $\delta_x
= -\mat{J}^{-1}G(x)$ which fixes the slope $g'(0) =
-2g(0)$ (thereby requiring only previous two points), or to
use the previous three points. Parameters must be provided
to limit the step size to prevent `l` from decreasing too
slowly (spending too much time in the line search) or too
quickly (resulting in extremely small step sizes).
"""
def _get_skip_inds(self, problem, solver_data):
"""Return `skip_inds`.
Returns
-------
skip_inds : list
The indices that should be skipped for this iteration.
x1, F1 : array
Function evaluated at `F1 = F(x1)`.
"""
x0 = solver_data.x0
g0 = solver_data.g0
x1 = solver_data.x
g1 = solver_data.g
sensitive_inds = np.asarray(problem.state0.sensitive, dtype=int)
if g0 is None or g1 is None:
# Need at least two steps before we vary sensitive parameters
skip_inds = sensitive_inds
else:
err0_s = np.abs(g0[sensitive_inds])
err1_s = np.abs(g1[sensitive_inds])
skip_inds = sensitive_inds[
np.where(abs(err1_s - err0_s)/abs(err1_s)
>= self.sensitive_rel_tol)]
abs_err = copy(g1)
abs_err[sensitive_inds] = 0
rel_err = abs_err/(abs(x1) + _TINY)
err = np.minimum(abs_err/self.sensitive_abs_tol,
rel_err/self.sensitive_rel_tol)
if err.max() < 1:
# Things have started to converge. Only skip bad parameters.
pass
else:
skip_inds = sensitive_inds
#print skip_inds, x1[-1]
return skip_inds
def iterate(self, x, problem, solver_data,
x0=None, G0=None, dx0=None, F0=None,
mesgs=_MESGS):
r"""Perform a single iteration and return the triplet
`(x_new, (x, G, dx, F))`.
The line-search strategy is dictated by :attr:`ls_strategy`
which may take on one of the following values.
'pure_broyden' : Perform pure Broyden steps irrespective of
whether or not they increase the error. Termination occurs
if more than :attr:`ls_n_bad_step` bad steps are taken.
'1' :
Notes
-----
In order to enjoy the good convergence properties of the
Broyden methods, we first try a full Broyden step (unless
The big concern with Broyden methods is that the approximate
Jacobian inverse may be inaccurate. To deal with this, we use
:meth:`_line_search_J` which does a modified line search until
a reasonable approximation of the Jacobian is obtained.
"""
#import pdb;pdb.set_trace()
G1 = dx1 = None
if self.reset_Jinv:
solver_data.j_inv_F.reset()
self.reset_Jinv = False
skip_inds = np.asarray(problem.state0.sensitive, dtype=int)
if (solver_data.x is None or solver_data.g is None):
# First iteration: Here we first make an evaluation, and
# then perform a line-search to update the Jacobian.
# We use self._extrapolate as we have no base point yet.
def extrapolate(xc):
return self._extrapolate(x=x, x0=None, x_controls=xc)
x0, F0 = self._problem_step(
x, x0=None, F0=None,
problem=problem, solver_data=solver_data,
extrapolate=extrapolate,
mesgs=mesgs, skip_inds=skip_inds)
self.set_point(j_inv=solver_data.j_inv_F,
x=x0, g=x0 - F0, skip_inds=skip_inds)
x = F0
elif x0 is None or F0 is None:
# Restarting from a saved solution, so use old stored
# values.
x0 = solver_data.x
F0 = x0 - solver_data.g
x = F0
else:
# Subsequent steps:
pass
if (np.linalg.norm((x0 - F0)/problem.G_scale, ord=self.norm_p)
< self.G_tol):
# Converged. Don't need any more.
x1 = x0
F1 = F0
G1 = dx1 = None
x = F1
return (x, (x1, G1, dx1, F1))
skip_inds = self._get_skip_inds(problem, solver_data)
if self.ls_strategy == 'pure_broyden' and True:
x1, F1 = self.line_search_broyden(x=x, x0=x0, F0=F0,
problem=problem,
solver_data=solver_data,
mesgs=mesgs,
skip_inds=skip_inds)
elif self.ls_strategy == 'pure_broyden' and False:
g0 = np.linalg.norm((x0 - F0)/problem.G_scale, ord=self.norm_p)
g1 = np.inf
step_found = False
for n in xrange(self.ls_n_bad_steps):
mesgs.iter(
"Trying %i of %i broyden steps: "
% (n, self.ls_n_bad_steps) +
"Error %g >= %g" % (g1, g0))
x1, F1 = self.line_search_broyden(x=x, x0=x0, F0=F0,
problem=problem,
solver_data=solver_data,
mesgs=mesgs,
skip_inds=skip_inds)
G1 = x1 - F1
self.set_point(j_inv=solver_data.j_inv_F,
x=x1, g=G1, skip_inds=skip_inds)
x = solver_data.j_inv_F.get_x(
x_controls=[skip_inds, x[skip_inds]],
min_step=self.min_abs_step, max_step=self.max_step)
g1 = np.linalg.norm((x1 - F1)/problem.G_scale,
ord=self.norm_p)
if g1 < g0:
step_found = True
break
if not step_found and not self.ls_allow_bad_steps:
msg = ("No downhill step found after ls_n_bad_steps" +
" = %i broyden steps" % (self.ls_n_bad_steps,))
mesgs.error(msg)
raise solver_utils.NoDescentError(msg)
elif self.ls_strategy == 'default':
if (0 < len(problem.state0.sensitive)
and 0 == len(skip_inds)):
# We have sensitive parameters but all parameters are
# good so we record the good step, then search
mesgs.info("Good step found. Saving...")
solver_data.x_good_0 = solver_data.x_good_1
solver_data.g_good_0 = solver_data.g_good_1
solver_data.x_good_1 = x0
solver_data.g_good_1 = x0 - F0
mesgs.info("Good step found. Finding new step " +
"including sensitive parameters...")
ls_min_step = self.ls_min_step
allow_bad_step = (
self.ls_allow_bad_steps and
solver_data.iteration_number < self.ls_n_bad_steps)
x1, F1 = self.line_search(x=x, x0=x0, F0=F0,
problem=problem,
solver_data=solver_data,
mesgs=mesgs,
min_step=self.ls_min_step,
skip_inds=skip_inds,
allow_bad_step=allow_bad_step
)
if (0 < len(problem.state0.sensitive)
and 0 == len(skip_inds)
and solver_data.x_good_0 is not None):
# We have sensitive parameters but all parameters were
# good. The line_search may have messed up the
# Jacobian a bit, so we use the previous good step (if
# it exists) to perform an update without any
mesgs.info("Updating Jacobian with previous good step...")
self.set_point(
j_inv=solver_data.j_inv_F,
x=solver_data.x_good_1,
g=solver_data.g_good_1,
force=True)
elif self.ls_strategy == 'jacobian':
x1, F1 = self.line_search_jacobian(x=x, x0=x0, F0=F0,
problem=problem,
solver_data=solver_data,
min_step=self.ls_min_step,
mesgs=mesgs,
skip_inds=skip_inds)
elif self.ls_strategy == 'linear':
x1, F1 = self.line_search_linear(x=x, x0=x0, F0=F0,
problem=problem,
solver_data=solver_data,
min_step=self.ls_min_step,
mesgs=mesgs,
skip_inds=skip_inds)
elif self.ls_strategy == 'non-linear':
x1, F1 = self.line_search_non_linear(x=x, x0=x0, F0=F0,
problem=problem,
solver_data=solver_data,
min_step=self.ls_min_step,
mesgs=mesgs,
skip_inds=skip_inds)
elif self.ls_strategy == 'two_step':
ls_min_step = self.ls_linear_min_step
if ls_min_step is None:
ls_min_step = self.ls_min_step*100
try:
x1, F1 = self.line_search_linear(x=x, x0=x0, F0=F0,
problem=problem,
solver_data=solver_data,
mesgs=mesgs,
min_step=ls_min_step,
skip_inds=None)
except solver_utils.NoDescentError:
if True:
if self.verbosity >= 10:
mesgs.append("Trying bad step...")
dx = x - x0
x1, F1 = self.line_search_broyden(
x=x0 + dx/2**self.ls_max, x0=x0, F0=F0,
problem=problem,
solver_data=solver_data,
mesgs=mesgs, skip_inds=None)
else:
x1_ = x
# Try once again without linear algorithm.
if self.verbosity >= 10:
mesgs.append("No downhill step found: " +
"Trying non-linear line-search...")
try:
x1, F1 = self.line_search_non_linear(
x=x1_, x0=x0, F0=F0,
problem=problem, solver_data=solver_data,
mesgs=mesgs, min_step=self.ls_min_step,
skip_inds=None)
except solver_utils.NoDescentError:
# Try resetting Jacobian.
if self.verbosity >= 10:
mesgs.append("No downhill step found: " +
"Trying to reset Jacobian...")
solver_data.j_inv_F.reset(m0=None)
x1, F1 = self.line_search_non_linear(
x=x1_, x0=x0, F0=F0,
problem=problem, solver_data=solver_data,
mesgs=mesgs, min_step=self.ls_min_step,
skip_inds=None)
else:
raise ValueError("Line search strategy %s not supported" %
(self.ls_strategy,))
self.set_point(j_inv=solver_data.j_inv_F,
x=x1, g=x1 - F1, skip_inds=skip_inds)
x_new = solver_data.j_inv_F.get_x(x_controls=[skip_inds, x[skip_inds]],
min_step=self.min_abs_step,
max_step=self.max_step)
solver_data.skip_0 = skip_inds
solver_data.g0 = solver_data.g
solver_data.x0 = solver_data.x
return (x_new, (x1, G1, dx1, F1))
G0 = x0 - F0
dx = x - x0
dx_norm = np.linalg.norm(dx)
x_F_new = [None, None]
g0 = np.linalg.norm(G0/problem.G_scale, ord=self.norm_p)**2
if g0 == 0:
return (x0, (x0, G1, dx1, F0))
ls_linear = self.ls_linear
def g(l, dx=dx):
"""Function passed to line search algorithm."""
if not ls_linear:
# Get new step from broyden
dx = solver_data.j_inv_F.get_x(min_step=self.min_abs_step,
max_step=self.max_step) - x0
#dx *= dx_norm/np.max(dx_norm, np.linalg.norm(dx))
_x = x0 + l*dx
x_F_new[:] = self._problem_step(
_x, x0=x0, F0=F0,
problem=problem, solver_data=solver_data,
mesgs=mesgs)
x1, F1 = x_F_new
_G = x1 - F1
_g = np.linalg.norm(_G/problem.G_scale, ord=self.norm_p)**2
return _g
try:
ls_min_step = self.ls_min_step
if ls_linear:
ls_min_step = self.ls_linear_min_step
if ls_min_step is None:
ls_min_step = self.ls_min_step*100
l, g_l = solver_utils.line_search(
g=g, g0=g0,
avg_descent_factor=self.ls_avg_descent_factor,
min_step=ls_min_step,
min_scale_factor=self.ls_min_scale_factor,
max_scale_factor=self.ls_max_scale_factor,
max_steps=self.ls_max,
allow_backstep=self.ls_allow_backstep,
mesgs=mesgs)
except solver_utils.NoDescentError:
if ls_linear:
# Try once again without linear algorithm.
if self.verbosity >= 10:
mesgs.append("No downhill step found: " +
"Trying non-linear line-search...")
ls_linear = False
# for n in xrange(11):
# try:
# l, g_l = solver_utils.line_search(
# g=g, g0=g0,
# avg_descent_factor=self.ls_avg_descent_factor,
# min_step=self.ls_min_step,
# min_scale_factor=self.ls_min_scale_factor,
# max_scale_factor=self.ls_max_scale_factor,
# max_steps=self.ls_max,
# mesgs=mesgs)
# except solver_utils.NoDescentError:
# if n < 10:
# continue
# else:
# raise
try:
l, g_l = solver_utils.line_search(
g=g, g0=g0,
avg_descent_factor=self.ls_avg_descent_factor,
min_step=self.ls_min_step,
min_scale_factor=self.ls_min_scale_factor,
max_scale_factor=self.ls_max_scale_factor,
max_steps=self.ls_max,
allow_backstep=self.ls_allow_backstep,
mesgs=mesgs)
except solver_utils.NoDescentError:
# Try resetting Jacobian.
if self.verbosity >= 10:
mesgs.append("No downhill step found: " +
"Trying to reset Jacobian...")
solver_data.j_inv_F.reset(m0=None)
l, g_l = solver_utils.line_search(
g=g, g0=g0,
avg_descent_factor=self.ls_avg_descent_factor,
min_step=self.ls_min_step,
min_scale_factor=self.ls_min_scale_factor,
max_scale_factor=self.ls_max_scale_factor,
max_steps=self.ls_max,
allow_backstep=self.ls_allow_backstep,
mesgs=mesgs)
else:
raise
x1, F1 = x_F_new
x_new = solver_data.j_inv_F.get_x(
x_controls=[skip_inds, x[skip_inds]],
min_step=self.min_abs_step, max_step=self.max_step)
mesgs.append("Abs Error: %g" % (
np.linalg.norm((x1 - F1)/problem.G_scale,
ord=self.norm_p)))
#solver_data.x = x1
#solver_data.g = x1 - F1
return (x_new, (x1, G1, dx1, F1))
##########################################################
# Required methods: These need to be defined by subclasses.
def _step(self, problem, x, x0, F0, solver_data, mesgs):
"""Return `(x1, F1, skip_inds)`. We intervene here to ensure
that the error decreases with each step (otherwise we use a
weighted step.)
We use one of the following algorithms.
A) First we compute `x1` and `G1` corresponding to the full
Brodyen step
A)
1) Try full Broyden.
2) If not okay, try half step.
1) Try full Broyden step. If this reduces the error, then
accept.
2) If not, then check for sensitive parameters. These should
only be changed if the error in the component of G for
these has stabilized.
2) If the error increases by too much, then check if there are
any sensitive parameters. If so, then try holding these
fixed.
3) If this fails, then try taking a weighted step (if allowed).
4) If this fails, then perform a line search.
"""
return self._problem_step(
x, x0=None, F0=None,
problem=problem, solver_data=solver_data,
mesgs=mesgs)
print x, x0
import pdb;pdb.set_trace()
if self.verbosity >= 10:
mesgs.append("Trying Broyden step...")
x1, F1 = self._problem_step(
x, x0=None, F0=None,
problem=problem, solver_data=solver_data,
mesgs=mesgs)
g0, dx0 = G0_dx
# Now check sensitive parameters
sensitive_inds = np.asarray(problem.state0.sensitive,
dtype=int)
skip_inds = [] # Indices to skip
if 0 < len(sensitive_inds):
skip_inds = self._get_skip_inds(problem=problem, solver_data=solver_data)
x_controls = (skip_inds, x0[skip_inds])
if self.verbosity >= 10:
mesgs.append("Holding %i sensitive parameters fixed." %
(len(skip_inds),))
x1 = solver_data.j_inv_F.get_x(
x_controls=x_controls,
min_step=self.min_abs_step, max_step=self.max_step)
x1, F1 = self._problem_step(
x, x0=None, F0=None,
problem=problem, solver_data=solver_data,
mesgs=mesgs, skip_inds=skip_inds)
g0_ = copy(g0)
G1_ = copy(G1)
g0_[skip_inds] = 0
G1_[skip_inds] = 0
err0 = np.abs(g0_).max()
err1 = np.abs(G1_).max()
if err1 < err0:
# Error decreased, done.
if self.verbosity >= 10:
mesgs.append("Broyden step succeeded.")
return (x1, G1, skip_inds)
if len(skip_inds) < len(sensitive_inds):
# First try holding all sensitive parameters fixed (if this
# has not already been done)
x_controls = (sensitive_inds, x0[sensitive_inds])
if self.verbosity >= 10:
mesgs.append("Trying sensitive step")
# Should we maybe set skip_inds = sensitive_inds here? Or
# possibly recompute skip_inds based on new update?
skip_inds = sensitive_inds
x1 = x + solver_data.j_inv_F.get_x(
x_controls=x_controls,
min_step=self.min_abs_step, max_step=self.max_step)
x1, F1 = self._problem_step(
x, x0=None, F0=None,
problem=problem, solver_data=solver_data,
mesgs=mesgs, skip_inds=skip_inds)
# Check errors:
g0_ = copy(g0)
G1_ = copy(G1)
g0_[skip_inds] = 0
G1_[skip_inds] = 0
err0 = np.abs(g0_).max()
err1 = np.abs(G1_).max()
err0 = np.abs(g0).max()
err1 = np.abs(G1).max()
if err1 < err0:
# Error decreased, done.
if self.verbosity >= 10:
mesgs.append("Sensitive step succeeded.")
return (x1, G1, skip_inds)
step_ok = err1 < err0*self.bad_step_factor
if (not step_ok
and self.max_weighted_step > 0
and self.try_weighted_step):
# Try a weighted step
if self.verbosity >= 10:
mesgs.append("Trying weighted step")
x1 = x1 - self._correct_step(self.weight*G1,
max_dx=min(self.max_step,
self.max_weighted_step))
x1, F1 = self._problem_step(
x, x0=None, F0=None,
problem=problem, solver_data=solver_data,
mesgs=mesgs, skip_inds=skip_inds)
G1_[skip_inds] = 0
err1 = np.abs(G1_).max()
step_ok = err1 <= self.bad_step_factor*err0
if step_ok:
if self.verbosity >= 10:
mesgs.append("Weighted step succeeded.")
return (x1, G1, skip_inds)
if 0 < self.ls_max:
# Do line search
if self.verbosity >= 10:
mesgs.append("Doing line search")
x1, G1 = self._line_search(x0, x1, G1_, err0,
err1, skip_inds, solver_data,
problem,
mesgs)
return (x1, G1, skip_inds)
def _step(self, problem, x, x0, F0, solver_data, mesgs):
x1, F1 = self._problem_step(
x, x0=None, F0=None,
problem=problem, solver_data=solver_data,
mesgs=mesgs)
return x1, F1, np.array([], dtype=int)
return self._line_search_d(problem, x, x0, G0_dx, solver_data,
mesgs)
def _problem_step(self, x1, x0, F0, problem, solver_data,
mesgs, skip_inds=None, extrapolate=None):
r"""Return result of `problem.step`, updating the Jacobian if
the step is not too big."""
if extrapolate is None:
def extrapolate(x_controls):
return solver_data.x + solver_data.j_inv_F.get_x(
x_controls=x_controls,
min_step=self.min_abs_step,
max_step=self.max_step)
if skip_inds is None:
skip_inds = np.array([], dtype=int)
if x0 is not None:
x1 = x0 + self._correct_step(x1 - x0, max_dx=self.max_step)
(x1, _G1, _dx1, F1) = self.problem_step(
problem=problem, solver_data=solver_data,
x=x1, x0=x0, G0=None, dx0=None, F0=F0,
compute_F=True,
extrapolate=extrapolate,
iter_data=solver_data.iter_data,
abs_tol=self.G_tol,
mesgs=mesgs)
if x0 is not None and np.linalg.norm(x1 - x0) <= self.max_step:
solver_data.j_inv_F.update(
x=x1, g=x1 - F1, skip_inds=skip_inds,
mags=(abs(x0) + abs(x1) + abs(F0) + abs(F1)))
return x1, F1
[docs]def tst():
from solver_interface import Problem, BareState
class Sqrt(Problem):
'''Iterative solution to sqrt(n).'''
_state_vars = [('n', Required, 'To compute sqrt(n)'),
('state0', Computed)]
process_vars()
def __init__(self, *v, **kw):
self.n = np.array(self.n)
self.state0 = self.get_initial_state()
def get_initial_state(self):
return BareState(x_=copy(self.n))
def F(self, x, iter_data=None, abs_tol=None, rel_tol=None,
mesgs=_MESGS):
return x - (x*x - self.n)
def G(self, x, compute_dx=True,
iter_data=None, abs_tol=None, rel_tol=None,
mesgs=_MESGS):
return (x*x - self.n), -(x*x - self.n)/(2*x)
def Jinv(self, x):
return np.diag(1/(2*x))
def step(self, x, x0=None, G0=None, dx0=None, F0=None,
compute_G=False, compute_dx=False, compute_F=False,
extrapolate=None, iter_data=None,
abs_tol=1e-12, rel_tol=1e-12, mesgs=_MESGS):
'Keep x positive'
print x
x1 = x
okay = lambda x: all(x >= 0)
if not okay(x1):
x_controls = zip(*[(i_, abs(x_))
for i_, x_ in enumerate(x1)
if x_ < 0])
x1 = extrapolate(x_controls)
if not okay(x1):
x1 = abs(x1)
F1 = self.F(abs(x), None, abs_tol, rel_tol, mesgs)
G1 = dx1 = None
return (x1, G1, dx1, F1)
x_tol = 0.01
G_tol = 0.01
solver = Broyden(x_tol=x_tol, G_tol=G_tol)
problem = Sqrt(n=[0.01, 1.0, 100.0])
state0 = problem.get_initial_state()
print("Newton iterations...")
x = 1*state0.x_
for _ in xrange(5):
x += problem.G(x)[1]
print x
print
print("Straight iterations...")
x = 1*state0.x_
for _ in xrange(10):
x = problem.F(x)
print x
print
print("Straight broyden...")
import broyden
x = 1*state0.x_
b = broyden.Broyden(x0=x, G0=x - problem.F(x))
for _ in xrange(21):
b.update(b.x - problem.F(b.x))
print b.x
x = 1*state0.x_
b = broyden.Broyden(x0=x, G0=x - problem.F(x))
for _ in xrange(21):
b.update(b.x - problem.F(b.x))
print b.x
print("Our Broyden...")
solver.ls_linear = True
state, niter = solver.solve(problem, state0)
print("Our Broyden: %i" % niter)
[docs]class Broyden(_Broyden):
"""This solver uses the simple Broyden algorithm.
Examples
--------
>>> from solver_interface import Problem, BareState
>>> class Sqrt(Problem):
... '''Iterative solution to sqrt(n).'''
... _state_vars = [('n', Required, 'To compute sqrt(n)'),
... ('state0', Computed)]
... process_vars()
... def __init__(self, *v, **kw):
... self.n = np.array(self.n)
... self.state0 = self.get_initial_state()
... def get_initial_state(self):
... return BareState(x_=copy(self.n))
... def F(self, x, iter_data=None, abs_tol=None, rel_tol=None,
... mesgs=_MESGS):
... return x - (x*x - self.n)
... def Jinv(self, x):
... return np.diag(1/(2*x))
... def step(self, x, x0=None, G0=None, dx0=None, F0=None,
... compute_G=False, compute_dx=False, compute_F=False,
... extrapolate=None, iter_data=None,
... abs_tol=1e-12, rel_tol=1e-12, mesgs=_MESGS):
... 'Keep x positive'
... x1 = x
... okay = lambda x: all(x >= 0)
... if not okay(x1):
... x_controls = zip(*[(i_, abs(x_))
... for i_, x_ in enumerate(x1)
... if x_ < 0])
... x1 = extrapolate(x_controls)
... if not okay(x1):
... x1 = abs(x1)
... F1 = self.F(abs(x), None, abs_tol, rel_tol, mesgs)
... G1 = dx1 = None
... return (x1, G1, dx1, F1)
>>> x_tol = 0.01
>>> G_tol = 0.01
>>> solver = Broyden(x_tol=x_tol, G_tol=G_tol, ls_max=100,
... ls_strategy='default', dyadic=2, n_prev=2)
>>> problem = Sqrt(n=[0.01, 1.0, 100.0])
>>> state0 = problem.get_initial_state()
>>> state, niter = solver.solve(problem, state0)
>>> niter # Newton takes 5, straight Broyden takes 21
10
>>> abs(state.x_[0] - 0.1) < 1.2*x_tol
True
>>> abs(state.x_[1] - 1.0) < 1.2*x_tol
True
>>> abs(state.x_[2] - 10.0) < x_tol
True
>>> G, dx = problem.G(state.x_)
>>> abs(G).max() < G_tol
True
"""
[docs]class ModifiedBroyden(_Broyden):
"""This solver uses the Modifed Broyden algorithm.
>> from solver_interface import Problem, BareState
>> class Sqrt(Problem):
... '''Iterative solution to sqrt(n).'''
... _state_vars = [('n', Required, 'To compute sqrt(n)')]
... process_vars()
... def __init__(self, *v, **kw): self.n = np.array(self.n)
... def get_initial_state(self):
... return BareState(x_=copy(self.n))
... def G(self, x, iter_data, abs_tol, rel_tol, mesgs):
... return x*x - self.n
... 'Keep x positive'
... x1 = x
... okay = lambda x: all(x >= 0)
... if not okay(x1):
... x_control = zip(*[(i_, abs(x_))
... for i_, x_ in enumerate(x1)
... if x_ < 0])
... x1 = extrapolate(x_controls)
... if not okay(x1):
... x1 = abs(x1)
... G1 = self.G(abs(x), None, abs_tol, rel_tol, mesgs)
... return (x1, G1)
>> abs_xtol = 0.01
>> rel_xtol = 0.01
>> solver = ModifiedBroyden(abs_xtol=abs_xtol, rel_xtol=rel_xtol)
>> problem = Sqrt(n=[0.01, 1.0, 100.0])
>> state0 = problem.get_initial_state()
>> state, niter = solver.solve(problem, state0)
>> abs(state.x_[0] - 0.1) < abs_xtol
True
>> abs(state.x_[1] - 1.0) < abs_xtol
True
>> abs(state.x_[2] - 10.0) < abs_xtol
True
>> abs(state.x_[2] - 10.0)/10.0 < rel_xtol
True
"""
_state_vars = [('alpha', 1.0),
('W', None),
('DIIS', 10, "Size of DIIS subspace"),
('mmf_method', True,
"Use my DIIS method rather than the usual.")]
process_vars()
def _get_broyden(self, x, gx):
"""Return an initialized broyden object"""
return broyden.ModifiedBroyden(x0=x,
G0=gx,
alpha=self.alpha,
W=self.W,
DIIS=self.DIIS,
mmf_method=self.mmf_method)
[docs]class ExtendedBroydenData(BroydenData):
"""Container for Extended Broyden data.
The core for this method is defined in
:class:`mmf.solve.JinvExtendedBroydenFull` and a proxy object is
used to maintain the state and compute the updates.
"""
implements(solver_interface.ISolverData)
_ignore_name_clash = set(['j_inv_F', 'dyadic'])
_state_vars = [
('j_inv_F.dyadic', Deleted),
('j_inv_F', Delegate(broyden.JinvExtendedBroydenFull,
['xs', 'gs', 'j_inv0', 'n_prev'])),
('dyadic', False),
]
process_vars()
def __init__(self, *v, **kw):
if self.dyadic:
raise NotImplementedError(
"Extended Broyden only supports `dyadic=False`")
def change_basis(self, f, f_inv_t=None):
"""Use the function `x_new = f(x)` to update the data
corresponding to a change of basis. Raise an
:except:`AttributeError` if the change cannot be implemented
(or don't define this method.)
Parameters
----------
f : function
Change of basis transformation (should be essentially
matrix multiplication, but for large bases one might want
to prevent forming this matrix explicitly.)
f_inv_t : function, optional
Transpose of the inverse of the function. If this is
provided, then it should be used to transform the bra's,
otherwise, use `f`. *Presently ignored!*
"""
self.j_inv_F.change_basis(f)
[docs]class ExtendedBroyden(_Broyden):
"""This solver uses the Extended Broyden algorithm with dynamic restarts
toward the specified `j_inv0` when the algorithm starts to diverge.
.. note:: In order for this to work reasonably, you must provide a
reasonable estimate for :attr:`x_scale` which sets the typical scale over
which the function `G(x)` is linear. If not provided, then
:attr:`IProblem.x_scale` will be used.
>>> from solver_interface import Problem, BareState
>>> from mmf.objects import ClassVar
>>> class Sqrt(Problem):
... '''Iterative solution to sqrt(n).'''
... _no_name_clash = set(['state0'])
... _state_vars = [('n', Required, 'To compute sqrt(n)'),
... ('state0', Computed)]
... process_vars()
... def __init__(self, *v, **kw):
... self.n = np.array(self.n)
... self.state0 = self.get_initial_state()
... def get_initial_state(self):
... return BareState(x_=copy(self.n))
... def F(self, x, iter_data=None, abs_tol=None, rel_tol=None,
... mesgs=[]):
... return x - (x*x - self.n)
... def Jinv(self, x):
... return np.diag(1/(2*x))
... def step(self, x, x0=None, G0=None, dx0=None, F0=None,
... compute_G=False, compute_dx=False, compute_F=False,
... extrapolate=None, iter_data=None,
... abs_tol=1e-12, rel_tol=1e-12, mesgs=[]):
... 'Keep x positive'
... x1 = x
... okay = lambda x: all(x >= 0)
... if not okay(x1):
... x_controls = zip(*[(i_, abs(x_))
... for i_, x_ in enumerate(x1)
... if x_ < 0])
... x1 = extrapolate(x_controls)
... if not okay(x1):
... x1 = abs(x1)
... F1 = self.F(abs(x), None, abs_tol, rel_tol, mesgs)
... G1 = dx1 = None
... return (x1, G1, dx1, F1)
>>> abs_xtol = 0.01
>>> rel_xtol = 0.01
>>> solver = ExtendedBroyden(x_tol=abs_xtol, x_tol_rel=rel_xtol,
... no_restart=True)
>>> problem = Sqrt(n=[0.01, 1.0, 100.0])
>>> state0 = problem.get_initial_state()
>>> state, niter = solver.solve(problem, state0)
>>> abs(state.x_[0] - 0.1) < abs_xtol
True
>>> abs(state.x_[1] - 1.0) < abs_xtol
True
>>> abs(state.x_[2] - 10.0) < abs_xtol
True
>>> abs(state.x_[2] - 10.0)/10.0 < rel_xtol
True
"""
_state_vars = [
('dyadic', False),
('solver_data0', Delegate(ExtendedBroydenData,
['j_inv0', 'n_prev'])),
('method=solver_data0.j_inv_F.method'),
('solver_data0.dyadic', Deleted),
('solver_data0.n_prev', 10),
('n_bad', 10,\
r"""Number of bad steps to allow. When, after this many
steps, the best solution has not been improved, then we
bail."""),
('alpha=solver_data0.j_inv0', 0.5,\
r"""Linear mixing parameter. If bad steps are taken,
then the Jacobian is pulled back to that which will give
a linear mixing $x \mapsto (1-\alpha) + \alpha F(x)$."""),
('x_scale', None,\
r"""Typical scale (or vector of scales for each component) over
which the function `G(x)` is approximately linear. This is
important for ensuring that the dynamic restart mechanism only
restarts components from far away. If `None`, then
:attr:`IProblem.x_scale` will be used."""),
('no_restart', False,\
r"""If `True` then dynamic restarts are not used. Set this if
there is no good approximation for `j_inv0`."""),
]
process_vars()
[docs] def __init__(self, *v, **kw):
# Ensure we keep at least one more point than n_bad
self.solver_data0.n_prev = max(
self.solver_data0.n_prev, self.n_bad + 1)
[docs] def iterate(self, x, problem, solver_data,
x0=None, G0=None, dx0=None, F0=None,
mesgs=_MESGS):
r"""Perform a single iteration and return the triplet
`(x_new, (x, G, dx, F))`.
The line-search strategy is dictated by :attr:`ls_strategy`
which may take on one of the following values.
'pure_broyden' : Perform pure Broyden steps irrespective of
whether or not they increase the error. Termination occurs
if more than :attr:`ls_n_bad_step` bad steps are taken.
'1' :
"""
G1 = dx1 = None
j_inv = solver_data.j_inv_F
# Set j_inv0
j0_diag = np.asarray(problem.j0_diag)
if j0_diag.shape == ():
j0_diag = np.ones(len(x), dtype=float)*j0_diag
j_inv.j_inv0 = self.alpha*np.diag(
np.divide(1, j0_diag))
skip_inds = np.asarray(problem.state0.sensitive, dtype=int)
if (solver_data.x is None or solver_data.g is None):
# First iteration: Here we first make an evaluation, and
# then perform a line-search to update the Jacobian.
# We use self._extrapolate as we have no base point yet.
def _extrapolate(xc):
return self._extrapolate(x=x, x0=None, x_controls=xc)
x0, F0 = self._problem_step(
x, x0=None, F0=None,
problem=problem, solver_data=solver_data,
extrapolate=_extrapolate,
mesgs=mesgs, skip_inds=skip_inds)
G0 = x0 - F0
j_inv.set_point(x=x0, g=G0,
skip_inds=skip_inds,
mags=abs(G0))
x = F0
elif x0 is None or F0 is None:
# Restarting from a saved solution, so use old stored
# values.
x0 = solver_data.x
F0 = x0 - solver_data.g
x = F0
else:
# Subsequent steps:
pass
if (np.linalg.norm((x0 - F0)/problem.G_scale, ord=self.norm_p)
< self.G_tol):
# Converged. Don't need any more.
x1 = x0
F1 = F0
G1 = dx1 = None
x = F1
return (x, (x1, G1, dx1, F1))
skip_inds = self._get_skip_inds(problem, solver_data)
# Now take a step:
#x1, F1 = self.line_search_broyden(x=x, x0=x0, F0=F0,
# problem=problem,
# solver_data=solver_data,
# mesgs=mesgs,
# skip_inds=skip_inds)
G0 = x0 - F0
j_inv.update(x0=x0, g0=G0)
def _extrapolate(x_controls):
return j_inv.get_x(x_controls=x_controls,
min_step=self.min_abs_step,
max_step=self.max_step)
(x1, _G1, _dx1, F1) = self.problem_step(
problem=problem, solver_data=solver_data,
x=x, x0=x0, G0=None, dx0=None, F0=F0,
compute_F=True, extrapolate=_extrapolate,
iter_data=solver_data.iter_data, abs_tol=self.G_tol,
mesgs=mesgs)
G1 = x1 - F1
# Check if the step is better than the best previous solution. If not
# then see how far back the best previous solution is. If it is more
# than n_bad_steps back then bail or start something more drastic like
# line searches.
if self.n_bad <= len(solver_data.xs):
errs = [np.linalg.norm(_g/problem.G_scale,
ord=self.norm_p)
for _g in solver_data.gs[-self.n_bad:]]
errs = np.array(errs)
min_ind = np.argmin(errs)
if 0 == min_ind:
# The best solution has been pushed until the n_bad'th
# step. If we don't improve with this step, then we
# fail (or do something else)
curr_err = np.linalg.norm(G1/problem.G_scale,
ord=self.norm_p)
if errs[min_ind] < curr_err:
x0 = solver_data.xs[-self.n_bad:][min_ind]
G0 = solver_data.gs[-self.n_bad:][min_ind]
F0 = x0 - G0
j_inv.set_point(x=x0, g=G0)
x = j_inv.get_x(x_controls=[skip_inds, x[skip_inds]],
min_step=self.min_abs_step,
max_step=self.max_step)
mesgs.iter(
"Best state now n_bad=%i steps old." % (self.n_bad,) +
"Trying line search...")
try:
if _DEBUG and max(abs(F0 - problem.F(x0))) > 1e-12:
import pdb;pdb.set_trace()
(x1, F1) = self.line_search_linear(
x=x, x0=x0, F0=F0,
problem=problem, solver_data=solver_data,
min_step=self.min_abs_step,
mesgs=mesgs, skip_inds=skip_inds,
extrapolate=_extrapolate)
G1 = x1 - F1
except solver_utils.NoDescentError, err:
msg = "\n".join(
[err.message,
"Line search failed after best solution" +
" n_bad=%i steps old." % self.n_bad])
err.args = (msg,) + err.args[1:]
raise
else:
pass
else:
# Still have some attempts
pass
# Check if the step is better than the previous step. If not, then
# perform a dynamic restart.
prev_err = np.linalg.norm(
solver_data.gs[-1]/problem.G_scale,
ord=self.norm_p)
curr_err = np.linalg.norm(
G1/problem.G_scale,
ord=self.norm_p)
if self.no_restart:
restart=False
else:
restart = curr_err > prev_err
x_scale = self.x_scale
if x_scale is None:
x_scale = problem.x_scale
j_inv.set_point(x=x1, g=G1,
restart=restart,
x_scale=x_scale,
skip_inds=skip_inds,
mags=abs(G1))
x_new = j_inv.get_x(x_controls=[skip_inds, x[skip_inds]],
min_step=self.min_abs_step,
max_step=self.max_step)
solver_data.skip_0 = skip_inds
return (x_new, (x1, G1, dx1, F1))