Source code for skerch.a_posteriori

#!/usr/bin/env python
# -*- coding: utf-8 -*-


"""A-posteriori utilities for sketched decompositions.

This module implements scalable utilities to assess the quality of a
sketched approximation, once it has been obtained.
"""

import math

import torch

from .algorithms import SketchedAlgorithmDispatcher
from .utils import complex_dtype_to_real


################################################################################
# # A POSTERIORI ERROR ESTIMATION
# ##############################################################################

[docs]
def apost_error_bounds(num_measurements, rel_err, is_complex=False):
    """Probabilistic bounds for a-posteriori sketch error estimation.

    The Frobenius error between any two linear operators can be estimated
    from sketches using :func:`apost_error`. But this estimation is random
    and subject to error.

    This function retrieves the probabilistic error bounds presented in
    `[TYUC2019, 6.4] <https://arxiv.org/abs/1902.08651>`_, which tell us
    the probability that the estimated error is wrong by a given amount.
    Interestingly, these bounds only depend on the number of measurements
    and data type, and *not* on the operator dimension.

    .. note::

      This function does not perform the error estimation itself, it merely
      informs about the probabilistic bounds associated with a given error.
      For the error estimation, see :func:`apost_error`.

    :param int num_measurements: How many measurements (assumed Gaussian iid)
      will be performed for the a-posteriori error estimation. More
      measurements means tighter error bounds (i.e. the output of our error
      estimation is more reliable).
    :param float rel_err: A float between 0 and 1, indicating the relative
      error that we want to consider. If ``x`` is the actual error we want to
      estimate, and ``y`` is our estimation, this function will return the
      probability that ``y`` is in the range ``[(1-rel_err)x, (1+rel_err)x]``.
    :param is_complex: The returned probabilities will be tighter if the
      error is measured on complex linear operators, using complex Gaussian
      noise. This boolean flag allows to specify this (assumed real-valued if
      false).
    :returns: Probabilities that, given the indicated number
      of measurements, the a-posteriori method yields values outside of
      ``actual_error * (1 +/- rel_err)``. Ideally, we want a sufficient number
      of measurements, such that the returned probabilities for small
      ``rel_err`` are themselves small (this means that the corresponding error
      estimation is tight).
    """
    if num_measurements < 1:
        raise ValueError("Num measurements must be >=1")

    if rel_err < 0 or rel_err > 1:
        raise ValueError("rel_err expected between 0 and 1")
    beta = 2 if is_complex else 1
    experr = math.exp(rel_err)
    beta_meas_half = beta * num_measurements / 2
    #
    lo_p = (experr * (1 - rel_err)) ** beta_meas_half
    hi_p = (experr / (1 + rel_err)) ** (-beta_meas_half)
    #
    result = {
        f"LOWER: P(err<={1 - rel_err}x)": lo_p,
        f"HIGHER: P(err>={1 + rel_err}x)": hi_p,
    }
    return result




[docs]
def apost_error(
    lop1,
    lop2,
    device,
    dtype,
    num_meas=5,
    seed=0b1110101001010101011,
    noise_type="gaussian",
    meas_blocksize=None,
    dispatcher=SketchedAlgorithmDispatcher,
    adj_meas=None,
):
    r"""A-posteriori sketched error estimation.

    This function implements the error estimation procedure discussed in
    `[TYUC2019, 6] <https://arxiv.org/abs/1902.08651>`_.
    Given two linear operators :math:`A, \hat{A}`, it performs
    ``num_measurements`` random vector multiplications
    :math:`y_i = Av_i, \hat{y}_i = \hat{A}v_i`, and the :math:`y` outputs can
    then be used to estimate the norms
    :math:`\lVert A \rVert_F^2, \lVert \hat{A} \rVert_F^2`, as well as the
    error :math:`\lVert A - \hat{A} \rVert_F`.

    .. note::
      The test measurements performed here are assumed to be random and
      independent of how either ``lop`` was obtained. For this reason, it is
      important to pick a combination of ``noise_type`` and ``seed`` that does
      not overlap with any other noise generation procedure used before.

    :param lop1: Linear operator with a ``shape=(h, w)`` attribute and
      implementing a left (``x @ mat``) or right ``(mat @ x)`` matmul op.
      It is assumed to match the given ``device`` and ``dtype``.
    :param lop2: Linear operator to compare ``lop1`` against. It must match
      in shape, device and dtype.
    :param num_meas: Number of test measurements that will be performed on
      ``lop1`` and ``lop2``.
    :param adjoint: If true, left-matmul is used, otherwise right-matmul.
    :returns: A tuple ``((f1, f2, e), (F1, F2, E))``, where ``f1, f2`` are the
      estimated Frobenius norms squared of ``lop1, lop2`` and ``e`` is the
      estimated Frobenius error squared. The uppercase counterparts are lists
      containing all ``num_meas`` individual measurements (which are averaged
      to obtain the estimates). To get confidence bounds on this estimate,
      use :func:`apost_error_bounds`.
    """
    h, w = lop1.shape
    h2, w2 = lop2.shape
    if lop1.shape != lop2.shape:
        raise ValueError("Linear operators must be of same shape!")
    if num_meas <= 0 or num_meas > min(h, w):
        raise ValueError(
            "Measurements must be between 1 and number of rows/columns!"
        )
    if meas_blocksize is None:
        meas_blocksize = num_meas
    if adj_meas is None:
        adj_meas = h > w  # this seems to be more accurate
    #
    rdtype = complex_dtype_to_real(dtype)
    frob1_all = torch.empty(num_meas, dtype=rdtype, device=device)
    frob2_all = torch.empty_like(frob1_all)
    dist_all = torch.empty_like(frob1_all)
    mop = dispatcher.mop(
        noise_type,
        (h if adj_meas else w, num_meas),
        seed,
        dtype,
        meas_blocksize,
        register=False,
    )
    #
    for block, idxs in mop.get_blocks(dtype, device):
        # we need that block @ block.H approximates I
        # assuming block is by-column!
        block = block.T * ((len(idxs) ** 0.5) / block.norm(dim=1))
        # block has been transposed
        meas1 = block @ lop1 if adj_meas else lop1 @ block.T
        meas2 = block @ lop2 if adj_meas else lop2 @ block.T
        #
        frob1 = (meas1 * meas1.conj()).real.sum(dim=1 if adj_meas else 0)
        frob2 = (meas2 * meas2.conj()).real.sum(dim=1 if adj_meas else 0)
        meas1 -= meas2
        dist = (meas1 * meas1.conj()).real.sum(dim=1 if adj_meas else 0)
        #
        frob1_all[idxs] = frob1
        frob2_all[idxs] = frob2
        dist_all[idxs] = dist
    #
    f1, f2, d = frob1_all.mean(), frob2_all.mean(), dist_all.mean()
    return ((f1, f2, d), (frob1_all, frob2_all, dist_all))



# ##############################################################################
# # SCREE
# ##############################################################################

[docs]
def scree_bounds(S, err_frob):
    """A-posteriori scree bounds for low-rank approximations.

    Reference: `[TYUC2019, 6.5] <https://arxiv.org/abs/1902.08651>`_.

    When we do a sketched eigen- or singular-decomposition of a linear
    operator, we often don't know what is the effective rank of said operator,
    and whether we took enough sketched measurements to cover it.
    A scree analysis is a tool to estimate the smallest rank that captures
    a sufficient amount of the actual matrix, typically by looking at
    inflection points in the curve associated with sharp decays in
    spectral norm.

    A common issue with sketched methods is that we don't have access to the
    actual spectrum, and thus we can't perform a scree analysis. Instead,
    it is possible to use estimates from :func:`apost_error` to retrieve
    upper and lower scree bounds, which ideally would still inform us about
    the effective rank of the original operator.

    :param S: Vector of the estimated eigen/singular values, expected to
      contain entries in non-ascending magnitude.
    :param err_frob: A-posteriori estimate of ``frob_norm(M - M')``, where
      ``M'`` is the recovery of ``M`` with singular values ``S``, as returned
      by :func:`apost_error` (note that here the norm is not squared).

    Usage example::

      U, S, Vh = ssvd(A)
      Ahat = CompositeLinOp((("US", U * S), ("Vh", Vh)))
      (f1, _, err), _ = apost_error(A, Ahat, "...")
      scree_lo, scree_hi = scree_bounds(S, err**0.5)
    """
    if (abs(S).diff() > 0).any():
        raise ValueError("Provided S must be given in non-ascending magnitude!")
    approx_norm = S.norm()
    # Nonincreasing curve with the (estimated) residual energies, going from
    # approx_norm down to 0
    residuals = (S**2).flip(0).cumsum(0).flip(0) ** 0.5
    # the lower scree bound is assumed to be given by the normalized
    # residuals, since the least-squares recovery scree tends to decay faster
    # than hte actual scree. Note that we divide by approx_norm, because the
    # lower bound must start at 1 (dividing estimated residuals by original
    # norm causes a mismatch and the resulting curve is not a scree)
    lo_scree = (residuals / approx_norm) ** 2
    # now the upper scree bound is given by adding the estimated error on
    # top of the lower bound.
    hi_scree = ((residuals + err_frob) / approx_norm) ** 2
    #
    return lo_scree, hi_scree