"""
Rank-1 approximation of a partial Hermitian matrix.
"""
import logging
import numpy as np
import matplotlib.pyplot as plt
from hynet.types_ import hynet_float_, hynet_complex_
from hynet.utilities.base import (create_sparse_matrix,
create_sparse_diag_matrix,
Timer)
from hynet.utilities.graph import traverse_graph, get_graph_components
_log = logging.getLogger(__name__)
[docs]def rank1approx_via_traversal(V, edges, roots):
"""
Return a rank-1 approximation ``vv^H`` of ``V`` based on a graph traversal.
Assuming that ``V = vv^H`` (i.e., ``rank(V) = 1``), the off-diagonal
element in row ``i`` and column ``j`` is ``V_ij = v_i*conj(v_j)``. Thus,
``|v_j| = sqrt(V_jj)`` and ``arg(v_j) = arg(v_i) - arg(V_ij)``. This is
utilized to recover ``v`` by setting its element's magnitude to the square
root of the diagonal elements of ``V`` and reconstructing the angle of its
elements by accumulating the angle differences from the respective root
node of in the graph associated with ``V``. The angle at the root node(s)
is set to zero.
"""
V = V.tocsr() # Conversion to CSR for indexing
dim_v = V.shape[0]
v = np.zeros(dim_v, dtype=hynet_complex_)
np.sqrt(V.diagonal().real, out=v)
def update_angle(node, node_pre, cycle):
"""Callback for the graph traversal to update the angles."""
if cycle or np.isnan(node_pre):
return
angle = np.angle(v[node_pre]) - np.angle(V[node_pre, node])
v[node] *= np.exp(1j * angle)
traverse_graph(np.arange(dim_v), edges, update_angle, roots)
return v
[docs]def calc_rank1approx_rel_err(V, v, edges):
"""Calculate the relative reconstruction error for all edges."""
(e_src, e_dst) = edges
if not e_src.size:
return np.ndarray(0, dtype=hynet_float_)
vv_ = np.multiply(v[e_src], v[e_dst].conj())
V_ = np.asarray(V.tocsr()[e_src, e_dst])[0]
return np.true_divide(np.abs(vv_ - V_), np.abs(vv_))
[docs]def calc_rank1approx_mse(V, v, edges):
"""
Calculate the mean squared error for the rank-1 approximation.
Returns the mean of the squared error of the elements on the diagonal as
well as those on sparsity pattern defined by ``edges``.
"""
# ATTENTION: Use caution when modifying this function as the least
# squares rank-1 approximation depends on this particular MSE calculation.
(e_src, e_dst) = edges
vv_ = np.concatenate((np.square(np.abs(v)),
np.multiply(v[e_src], v[e_dst].conj()),
np.multiply(v[e_dst], v[e_src].conj())))
# The case distinction is necessary due to inconsistencies of SciPy...
V = V.tocsr() # Conversion to CSR for indexing
V_ = V.diagonal()
if e_src.size:
V_ = np.concatenate((V_,
np.asarray(V[e_src, e_dst])[0],
np.asarray(V[e_dst, e_src])[0]))
return np.square(np.abs(vv_ - V_)).mean()
[docs]def rank1approx_via_least_squares(V, edges, roots, grad_thres=1e-7,
mse_rel_thres=0.002, max_iter=300,
show_convergence_plot=False):
"""
Return a rank-1 approximation ``vv^H`` of ``V`` by minimizing the squared error.
The rank-1 approximation may be put as the following optimization problem::
minimize ||P(vv^H - V)||_F^2
v in C^N
Therein, ``P(X)`` is the projection of the matrix ``X`` onto the sparsity
pattern defined by ``edges``, i.e., all diagonal elements and the
off-diagonal elements ``(edges[k][0], edges[k][1])`` and
``(edges[k][1], edges[k][0])`` for ``k = 0,...,K-1``, where ``K`` is the
number of edges. ``||.||_F`` denotes the Frobenius norm. For ``V >= 0``
(psd) and ``V != 0``, this problem is nonconvex, i.e., the objective is not
convex over the entire ``C^N``. (If ``rank(V) = 1`` and ``V = xx^H``, then
the objective is locally convex at ``x``.)
This function finds a (local) optimizer of this problem using a Wirtinger
calculus based gradient descent with an Armijo-Goldstein step size control,
which is initialized with the vector ``v`` obtained from the graph
traversal rank-1 approximation method. (During the iterations it is ensured
that the least squares error decreases, otherwise the iterations are
aborted with a warning. Consequently, in terms of the least squares error,
this approximation is at least as accurate as the traversal-based
approximation.)
Parameters
----------
V : .hynet_sparse_
Matrix that should be approximated by ``vv^H``.
edges : (numpy.ndarray[.hynet_int_], numpy.ndarray[.hynet_int_])
Specification of the sparsity pattern of the matrix ``V``.
roots : numpy.ndarray[.hynet_int_]
Root nodes for the graph components of the sparsity pattern. The
absolute angle in ``v`` is adjusted such that the absolute angle
at the root nodes is zero.
grad_thres : .hynet_float_, optional
Absolute threshold on the 2-norm of the objective's gradient w.r.t.
``v`` divided by ``N``. (Termination criterion on the first order
condition for local optimality.)
mse_rel_thres : .hynet_float_, optional
Threshold on the relative improvement of ``||P(vv^H - V)||_F^2 / N``
from the candidate solution ``v`` in iteration ``i`` to ``v`` in
iteration ``i + 1``. (Termination criterion on stalling progress.)
max_iter : .hynet_int_, optional
Maximum number of iterations. (Fallback in case the other termination
criteria are not met in a reasonable number of iterations.)
show_convergence_plot : bool, optional
If True, a plot is shown to illustrate the convergence behavior.
Returns
-------
v : numpy.ndarray
Vector ``v``, where ``vv^H`` approximates ``V`` on the sparsity pattern.
"""
timer = Timer()
V = V.tocsr() # Conversion to CSR for indexing
(e_src, e_dst) = edges
N = V.shape[0]
K = len(e_src)
f = lambda x: calc_rank1approx_mse(V, x, edges) * (N + 2*K)
gamma = 0.5
iteration = 0
v = rank1approx_via_traversal(V, edges, roots)
mse = calc_rank1approx_mse(V, v, edges)
if show_convergence_plot:
iter_mse = [mse]
iter_grad = [np.nan]
vv_offd = np.ones(K, dtype=hynet_complex_)
P_vv = create_sparse_diag_matrix(np.ones(N)) \
+ create_sparse_matrix(e_src, e_dst, vv_offd, N, N) \
+ create_sparse_matrix(e_dst, e_src, vv_offd.conj(), N, N)
# The case distinction is necessary due to inconsistencies of SciPy...
P_V = create_sparse_diag_matrix(V.diagonal())
if K != 0:
P_V += create_sparse_matrix(e_src, e_dst,
np.asarray(V[e_src, e_dst])[0], N, N) \
+ create_sparse_matrix(e_dst, e_src,
np.asarray(V[e_dst, e_src])[0], N, N)
while iteration < max_iter:
np.multiply(v[e_src], v[e_dst].conj(), out=vv_offd)
P_vv[range(N), range(N)] = np.square(np.abs(v))
P_vv[e_src, e_dst] = vv_offd
P_vv[e_dst, e_src] = vv_offd.conj()
grad_v = 2 * (P_vv - P_V).dot(v)
grad_v_mse = np.square(np.abs(grad_v)).mean()
if grad_v_mse <= grad_thres:
_log.debug("LS rank-1 approx. ~ Threshold on gradient was reached.")
break
gamma = get_armijo_step_size(f, v, grad_v, 2 * gamma)
v_new = v - gamma * grad_v
mse_new = calc_rank1approx_mse(V, v_new, edges)
if mse_new > mse:
_log.warning(("The gradient descent method was aborted after {0} "
"iterations and a gradient 'MSE' of {1:.3g} as the "
"objective increased.").format(iteration, grad_v_mse))
break
if 1 - mse_new/mse < mse_rel_thres:
_log.debug("LS rank-1 approx. ~ Progress stalled, iteration aborted.")
break
if show_convergence_plot:
iter_mse.append(mse_new)
iter_grad.append(grad_v_mse)
(v, mse) = (v_new, mse_new)
iteration += 1
if iteration == max_iter:
_log.debug("LS rank-1 approx. ~ Maximum number of iterations reached.")
# Adjust absolute phase in all components
components = get_graph_components(np.arange(N), edges, roots)
for component in components:
v[component] *= np.exp(-1j * np.angle(v[component[0]]))
if show_convergence_plot:
fig = plt.figure(figsize=(10, 6))
fig.suptitle("Least-Squares Rank-1 Approximation via Gradient Descent")
ax = fig.add_subplot(2, 1, 1)
ax.plot(iter_mse, color='xkcd:sea blue', linestyle='-', linewidth=1)
ax.margins(x=0)
ax.set_xlim(left=0)
ax.set_title("Reconstruction MSE")
ax = fig.add_subplot(2, 1, 2)
ax.plot(iter_grad, color='xkcd:sea blue', linestyle='-', linewidth=1)
ax.margins(x=0)
ax.set_xlim(left=0)
ax.set_title("2-norm of the gradient over the number of dimensions")
ax.set_xlabel('Iteration')
fig.subplots_adjust(hspace=0.3)
fig.show()
_log.debug("LS rank-1 approx. ~ Ended with gradient 'MSE' of "
"{0:.3e} after {1} iterations ({2:.3f} sec.)"
.format(grad_v_mse, iteration, timer.total()))
return v
[docs]def get_armijo_step_size(f, x, grad_fx, gamma=1, epsilon=0.2, alpha=2):
"""
Return a step size based on the Armijo-Goldstein condition.
This step size is designed for a Wirtinger calculus based gradient descent
method that minimizes the function ``f(x)``, where ``x in C^N`` is the
current candidate solution, ``grad_fx`` is the gradient vector w.r.t.
``conj(x)`` evaluated at ``x``, and the step direction ``d = -grad_fx``.
This function returns a step size ``s in R_(+)``, such that the algorithmic
map reads ``x -> x + s * d``.
"""
# LITERATURE:
# 1) Armijo's rule is described, e.g., in "Nonlinear Programming - Theory
# and Algorithms" by Bazaraa, Sherali, and Shetty (3rd ed., ch. 8.3).
# 2) Relevant aspects of Wirtinger calculus are described, e.g., in
# "Complex-Valued Adaptive Signal Processing Using Nonlinear Functions"
# by Li and Adali (EURASIP Journal on Advances in Signal Processing)
fx = f(x)
dd = -epsilon * 2 * np.sum(np.square(np.abs(grad_fx)))
theta = lambda gamma: f(x - gamma*grad_fx)
theta_tld = lambda gamma: fx + gamma * dd
while gamma > 1e-10:
delta = theta(gamma) - theta_tld(gamma)
delta_bar = theta(alpha*gamma) - theta_tld(alpha*gamma)
if delta <= 0 < delta_bar:
return gamma
if delta <= 0:
gamma *= alpha
else:
gamma /= alpha
_log.warning("The step size is very small.")
return gamma