r"""Modified Cholesky.
This module provides different methods for computing the Modified
Cholesky Factorization of a symmetric matrix $\mat{A} = \mat{A}^{T}$:
.. math::
\mat{A} + \diag(\vect{e}) = \mat{L}\mat{L}^{T} = \mat{M}.
These algorithms attempt to satisfy the following goals:
1) $\mat{M}$ is safely symmetric positive definite (SPD) and well
conditioned.
2) $\vect{e}$ is as small as possible, meaning that it is zero if $\mat{A}$ is
already SPD and not much larger than the most negative eigenvalue
$\lambda_{0}$ of $\mat{A}$. The figure of merit in this sense is
usually $-\norm{\vect{e}}_{\infty}/\lambda_{0}$.
3) Efficient computational efficiency.
The original algorithms
.. [GM74] Philip E. Gill and Walter Murray, `"Newton-type
methods for unconstrained and linearly constrained optimization"
<http://dx.doi.org/10.1007/BF01585529>`_, Mathematical Programming
7(1), 311 -- 350 (1974).
.. [GMW81] P. E. Gill, W. Murray, and M. H. Wright, `"Practical Optimization"
<http://dx.doi.org/10.1137/S105262349833266X>`_, Academic Press,
London, (1981).
.. [SE90] Robert B. Schnabel and Elizabeth Eskow, `"A New Modified
Cholesky Factorization" <http://dx.doi.org/10.1137/0911064>`_, SIAM
Journal on Scientific and Statistical Computing 11(6), 1136-1158
(1990).
.. [SE99] Robert B. Schnabel and Elizabeth Eskow, `"A Revised Modified
Cholesky Factorization Algorithm" <>`_, SIAM Journal on
Optimization 9(4), 1135 -- 1148 (1999).
Codes
-----
GMW81::
proc gmchol(A);
/* calculates the Gill-Murray generalized choleski decomposition */
/* input matrix A must be non-singular and symmetric */
/* Author: Jeff Gill. Part of the Hessian Project. */
local i,j,k,n,sum,R,theta_j,norm_A,phi_j,delta,xi_j,gamm,E,beta_j;
n = rows(A);
R = eye(n);
E = zeros(n,n);
norm_A = maxc(sumc(abs(A)));
gamm = maxc(abs(diag(A)));
delta = maxc(maxc(__macheps*norm_A~__macheps));
for j (1, n, 1);
theta_j = 0;
for i (1, n, 1);
sum = 0;
for k (1, (i-1), 1);
sum = sum + R[k,i]*R[k,j];
endfor;
R[i,j] = (A[i,j] - sum)/R[i,i];
if (A[i,j] -sum) > theta_j;
theta_j = A[i,j] - sum;
endif;
if i > j;
R[i,j] = 0;
endif;
endfor;
sum = 0;
for k (1, (j-1), 1);
sum = sum + R[k,j]^2;
endfor;
phi_j = A[j,j] - sum;
if (j+1) <= n;
xi_j = maxc(abs(A[(j+1):n,j]));
else;
xi_j = maxc(abs(A[n,j]));
endif;
beta_j = sqrt(maxc(maxc(gamm~(xi_j/n)~__macheps)));
if delta >= maxc(abs(phi_j)~((theta_j^2)/(beta_j^2)));
E[j,j] = delta - phi_j;
elseif abs(phi_j) >= maxc(((delta^2)/(beta_j^2))~delta);
E[j,j] = abs(phi_j) - phi_j;
elseif ((theta_j^2)/(beta_j^2)) >= maxc(delta~abs(phi_j));
E[j,j] = ((theta_j^2)/(beta_j^2)) - phi_j;
endif;
R[j,j] = sqrt(A[j,j] - sum + E[j,j]);
endfor;
retp(R'R);
endp;
"""
__all__ = ['se99']
[docs]def se99(A):
"""Return `(L, e)`: the Schnabel-Eskow generalized Cholesky
decomposition of `M = A + diag(e) = dot(L, L.T)` where
1) `M` is safely symmetric positive definite (SPD) and well
conditioned.
2) `e` is small (zero if `A` is already SPD and not much larger
than the most negative eigenvalue of `A`)
.. math::
\mat{A} + \diag{e} = \mat{L}\mat{L}^{T}
Parameters
----------
A : array
Must be a non-singular and symmetric matrix
test : bool
If `True`, use the directly translated iterative code for
testing.
Returns
-------
L : 2d array
Lower triangular Cholesky factor.
e : 1d array
Diagonals of correction matrix `E`.
Examples
--------
>>> np.random.seed(3)
>>> A = np.random.rand(100,100)*2 - 1
>>> A = A + A.T
>>> L, e = sechol(A)
>>> np.allclose(np.dot(L,L.T), (A + np.diag(e)))
True
>>> np.linalg.eigvalsh(A).min()
()
>>> A2 = np.array([[-0.451, -0.041, 0.124],
... [-0.041, -0.265, 0.061],
... [ 0.124, 0.061, -0.517]])
>>> L, e = sechol(A2, test=True)
>>> e.max()/np.linalg.eigvalsh(A).min()
This is based on the code in
"""
n = A.shape[0]
phase_one = True
gamma = abs(A.diagonal()).max()
j = 0
tau = _EPS**(1/3)
taut = tau*tau
mu = 0.1
# Phase one, A potentially positive-definite
L = A
E = np.zeros(n, dtype=float)
while j < n and phase_one:
a_max = A.diagonal()[j:].max()
a_min = A.diagonal()[j:].min
if (a_max < taut*gamma or a_min < -mu*a_max):
phase_one = False
else:
# Pivot on maximum diagonal of remaining submatrix
i = j + np.argmax(A.diagonal()[j:])
if i != j:
A[[i,j],:] = A[[j,i],:]
A[:, [i,j]] = A[:,[j,i]]
if ((A.diagonal()[j+1:] - A[j+1:,j]**2/A.diagonal()[j]).min()
< -mu*gamma):
phase_one = False # go to phase two
else:
# perform jth iteration of factorization
L[j,j] = np.sqrt(A[j,j]) # L[j j] overwrites A[j j]
for i in xrange(j+1, n):
L[j,j] = A[i,j]/L[j,j] # L[i j] overwrites A[i j]
A[i,j+1:i+1] -= L[i,j]*L[j+1:i+1,j]
A[i,i] -= L[i,j]**2
j += 1
# End phase 1
# Phase two, A not positive-definite
if not phase_one and j == n - 1:
E[-1] = delta = -A[-1,-1] + max(-tau*A[-1,-1]/(1-tau),
taut*gamma)
A[-1,-1] += delta
L[-1,-1] = np.sqrt(A[-1,-1])
delta_p = 0
g = np.zeros(n, dtype=float)
if not phase_one and j < n - 1:
k = j - 1 # k = number of iterations performed in phase one;
# Calculate lower Gerschgorin bounds of A[k+1]
for i in xrange(k + 1, n):
g[i] = A[i,i] - abs(A[i,k+1:i]).sum() - abs(A[i+1:,i]).sum()
# Modified Cholesky Decomposition
for j in xrange(k + 1, n - 2):
# Pivot on maximum lower Gerschgorin bound estimate
i = j + np.argmax(g[j:])
if i != j:
A[[i,j],:] = A[[j,i],:]
A[:, [i,j]] = A[:,[j,i]]
# Calculate E[j,j] and add to diagonal
norm_j = abs(A[j+1:n,j]).sum()
E[j] = delta = max(0,
-A[j,j] + max(norm_j, taut*gamma),
delta_p)
if delta > 0:
A[j,j] += delta
delta_p = delta # delta_p will contain E_inf
# Update Gerschgorin bound estimates
if A[j,j] != norm_j:
temp = 1 - norm_j/A[j,j]
g[j+1:] += abs(A.diagonal()[j+1:])*temp
# Perform j th iteration of factorization
L[j,j] = np.sqrt(A[j,j]) # L[j j] overwrites A[j j]
for i in xrange(j+1, n):
L[j,j] = A[i,j]/L[j,j] # L[i j] overwrites A[i j]
A[i,j+1:i+1] -= L[i,j]*L[j+1:i+1,j]
# Final 2 by 2 submatrix
e = np.linalg.eigvalsh(A[-2:,-2:])
e.sort()
delta = max(0,
-e[0] + max(tau*(e[1] - e[0])/(1 - tau),
taut*gamma),
delta_p)
if delta > 0:
A[-2,-2] += delta
A[-1,-1] += delta
delta_p = delta
L[-2,-2] = np.sqrt(A[-2,-2]) # overwrites A[-2,-2]
L[-1,-2] = A[-1, -2]/L[-2,-2] # overwrites A[-1,-2]
L[-1,-1] = np.sqrt(A[-1,-1] - L[-1,-2]**2) # overwrites A[-1,-1]
return np.tril(L), E
"""
local i,j,k,m,n,gamm,tau,delta,deltaprev,sum1,sum2,eigvals,dlist,dmax,
P,Ptemp,Pprod,L,norm_A,normj,g,gmax;
n = rows(A);
m = cols(A);
L = zeros(n,n);
deltaprev=0;
gamm = maxc(abs(diag(A)));
tau = __macheps^(1/3);
if minc(eig(A)') > 0;
/* print("not pivoting"); */
tau = -1000000;
endif;
if m ne n;
print("sechol: input matrix must be square");
retp(A);
endif;
norm_A = maxc(sumc(abs(A)));
gamm = maxc(abs(diag(A)));
delta = maxc(maxc(__macheps*norm_A~__macheps));
Pprod = eye(n);
if n > 2;
for k (1,(n-2),1);
trap 1,1;
if minc((diag(A[(k+1):n,(k+1):n])' - A[k,(k+1):n]^2/A[k,k])') < tau*gamm
and minc(eig(A[(k+1):n,(k+1):n])) < 0;
dmax = maxindc(diag(A[k:n,k:n]));
if (A[(k+dmax-1),(k+dmax-1)] > A[k,k]);
/* print("pivot using dmax:"); print(dmax); */
P = eye(n);
Ptemp = P[k,.]; P[k,.] = P[(k+dmax-1),.]; P[(k+dmax-1),.] = Ptemp;
A = P*A*P;
L = P*L*P;
Pprod = P*Pprod;
endif;
g = zeros(n-(k-1),1);
for i ((k), (n), 1);
if i == 1;
sum1 = 0;
else;
sum1 = sumc(abs(A[i,k:(i-1)])');
endif;
if i == n;
sum2 = 0;
else;
sum2 = sumc(abs(A[(i+1):n,i]));
endif;
g[i-(k-1)] = A[i,i] - sum1 - sum2;
endfor;
gmax = maxindc(g);
if gmax /= k;
/* print("gerschgorin pivot on cycle:"); print(k); */
P = eye(n);
Ptemp = P[k,.]; P[k,.] = P[(k+dmax-1),.]; P[(k+dmax-1),.] = Ptemp;
A = P*A*P;
L = P*L*P;
Pprod = P*Pprod;
endif;
normj = sumc(abs(A[(k+1):n,k]));
delta = maxc((0~deltaprev~-A[k,k]+(maxc(normj~tau*gamm))')');
if delta > 0;
A[k,k] = A[k,k] + delta;
deltaprev = delta;
endif;
endif;
A[k,k] = sqrt(A[k,k]);
L[k,k] = A[k,k];
for i ((k+1), (n), 1);
if L[k,k] > __macheps; A[i,k] = A[i,k]/L[k,k]; endif;
L[i,k] = A[i,k];
A[i,(k+1):i] = A[i,(k+1):i] - L[i,k]*L[(k+1):i,k]';
if A[i,i] < 0; A[i,i] = 0; endif;
endfor;
endfor;
endif;
A[(n-1),n] = A[n,(n-1)];
eigvals = eig(A[(n-1):n,(n-1):n]);
dlist = ( (0|deltaprev|-minc(eigvals)+tau*maxc( (1/(1-tau))*(maxc(eigvals)-minc(eigvals))|gamm)) );
if dlist[1] > dlist[2];
delta = dlist[1];
else;
delta = dlist[2];
endif;
if delta < dlist[3];
delta = dlist[3];
endif;
if delta > 0;
A[(n-1),(n-1)] = A[(n-1),(n-1)] + delta;
A[n,n] = A[n,n] + delta;
deltaprev = delta;
endif;
A[(n-1),(n-1)] = sqrt(A[(n-1),(n-1)]);
L[(n-1),(n-1)] = A[(n-1),(n-1)];
A[n,(n-1)] = A[n,(n-1)]/L[(n-1),(n-1)];
L[n,(n-1)] = A[n,(n-1)];
A[n,n] = sqrt(A[n,n] - L[n,(n-1)]^2);
L[n,n] = A[n,n];
retp(Pprod'*L'*Pprod');
"""
"""
% CHOLMOD Modified Cholesky factorization
% R = cholmod(A) returns the upper Cholesky factor of A (same as CHOL)
% if A is (sufficiently) positive definite, and otherwise returns a
% modified factor R with diagonal enries >= sqrt(delta) and
% offdiagonal entries <= beta in absolute value,
% where delta and beta are defined in terms of size of diagonal and
% offdiagonal entries of A and the machine precision; see below.
% The idea is to ensure that E = A - R'*R is reasonably small if A is
% not too far from being indefinite. If A is sparse, so is R.
% The output parameter indef is set to 0 if A is sufficiently positive
% definite and to 1 if the factorization is modified.
%
% The point of modified Cholesky is to avoid computing eigenvalues,
% but for dense matrices, MODCHOL takes longer than calling the built-in
% EIG, because of the cost of interpreting the code, even though it
% only has one loop and uses vector operations.
% reference: Nocedal and Wright, Algorithm 3.4 and subsequent discussion
% (not Algorithm 3.5, which is more complicated)
% original algorithm is due to Gill and Murray, 1974
% written by M. Overton, overton@cs.nyu.edu, last modified Feb 2005
% convenient to work with A = LDL' where D is diagonal, L is unit
% lower triangular, and so R = (LD^(1/2))'
%
if sum(sum(abs(A-A'))) > 0
error('A is not symmetric')
end
% set parameters governing bounds on L and D (eps is machine epsilon)
n = length(A);
diagA = diag(A);
gamma = max(abs(diagA)); % max diagonal entry
xi = max(max(abs(A - diag(diagA)))); % max offidagonal entry
delta = eps*(max([gamma+xi, 1]));
beta = sqrt(max([gamma, xi/n, eps]));
indef = 0;
% initialize d and L
d = zeros(n,1);
if issparse(A)
L = speye(n); % sparse identity
else
L = eye(n);
end
% there are no inner for loops, everything implemented with
% vector operations for a reasonable level of efficiency
for j = 1:n
K = 1:j-1; % column index: all columns to left of diagonal
% d(K) doesn't work in case K is empty
djtemp = A(j,j) - L(j,K)*(d(K,1).*L(j,K)'); % C(j,j) in book
if j < n
I = j+1:n; % row index: all rows below diagonal
Ccol = A(I,j) - L(I,K)*(d(K,1).*L(j,K)'); % C(I,j) in book
theta = max(abs(Ccol));
% guarantees d(j) not too small and L(I,j) not too big
% in sufficiently positive definite case, d(j) = djtemp
d(j) = max([abs(djtemp), (theta/beta)^2, delta]);
L(I,j) = Ccol/d(j);
else
d(j) = max([abs(djtemp), delta]);
end
if d(j) > djtemp % A was not sufficiently positive definite
indef = 1;
end
end
% convert to usual output format: replace L by L*sqrt(D) and transpose
for j=1:n
L(:,j) = L(:,j)*sqrt(d(j)); % L = L*diag(sqrt(d)) bad in sparse case
end;
R = L';
if nargout == 3
E = A - R'*R;
end
"""