Source code for pympcc.sensitivity

"""Parametric sensitivity analysis for MPCC solutions (§5.1).

At a converged MPCC solution where MPCC-LICQ holds (no biactive pairs and
the active constraint Jacobian has full row rank), the implicit function
theorem applies to the KKT system of the **tightened NLP** (TNLP) — the
NLP obtained by fixing each comp pair's active side as an equality:

    ∇_x f(x; p) + J_c(x; p)^T λ = 0
    c(x; p) = 0

Differentiating with respect to a parameter vector ``p`` yields the saddle-
point linear system::

    ┌ ∇²_xx L   J_c^T ┐ ┌ dx*/dp ┐     ┌ ∂(∇_x L)/∂p ┐
    │                 │ │        │ = − │              │
    └ J_c       0     ┘ └ dλ*/dp ┘     └ ∂c/∂p       ┘

where ``c`` collects the equality, equality-pinned-G, equality-pinned-H,
active-inequality, and active-bound constraints (in that order — see
:func:`active_row_labels`).

This module exposes the linear-solve primitive only.  Callers supply the
two RHS blocks evaluated at ``x*`` directly; the higher-level convenience
that builds those blocks from JAX-traceable callables is part of §5.6.

Parameter-vector convention
---------------------------
``∂(∇_x L)/∂p`` is the partial derivative of the *Lagrangian gradient*
holding ``(x, λ)`` fixed.  When parameters enter only the objective, this
reduces to ``∂(∇_x f)/∂p``.  When constraints are also parametric, the
caller must include the cross-term ``Σ_i λ_i · ∂(∇_x c_i)/∂p``.

Multiplier convention follows :mod:`pympcc._sosc`: the Lagrangian Hessian
builder consumes raw IPOPT-convention multipliers, so callers do not need
to negate.  TNLP-refined multipliers are picked up automatically when
:func:`pympcc.solve` was invoked with ``tnlp_refine=True``; otherwise the
fallback is the zero vector with a ``UserWarning``.
"""
from __future__ import annotations

import logging
import warnings
from dataclasses import dataclass
from typing import Optional

import numpy as np

from ._constants import BIACTIVE_TOL as _BIACTIVE_TOL
from ._diagnostics import _stack_active_gradient_matrix, active_sets
from ._sosc import _build_hessian
from .problem import MPCCProblem
from .result import MPCCResult

_log = logging.getLogger(__name__)

__all__ = ["SensitivityResult", "sensitivity", "active_row_labels"]


_SKIP_NOT_CONVERGED = "not_converged"
_SKIP_BIACTIVE = "biactive_pairs"
_SKIP_HESSIAN = "no_hessian_callable_and_fd_failed"



[docs]
@dataclass
class SensitivityResult:
    """Result of a parametric sensitivity solve.

    Attributes
    ----------
    dx_dp : ndarray, shape (n, n_p)
        Parametric derivative of the primal solution.  Shape ``(n, 0)``
        when the check was skipped.
    dlam_dp : ndarray, shape (n_active, n_p)
        Parametric derivative of the Lagrange multipliers on the active
        constraint rows, in the order described by ``active_row_labels``.
    active_row_labels : list[tuple]
        Per-row labels for the active constraint Jacobian.  Each entry
        is one of ``("h", k)``, ``("G", i)``, ``("H", i)``, ``("g", j)``,
        ``("xL", j)``, or ``("xU", j)``.  The caller's ``dc_dp`` rows
        must match this order.
    rank_deficit : int
        ``0`` when the KKT matrix is non-singular.  Positive only when
        the regularised lstsq fallback was used.
    used_pseudoinverse : bool
        ``True`` when the dense ``solve`` failed and ``lstsq`` with
        Tikhonov regularisation was used instead.
    kkt_condition : float or None
        ``np.linalg.cond`` of the KKT matrix; ``None`` when the matrix
        was empty or the check was skipped.
    skipped_reason : str or None
        Set when prerequisites for IFT were not met:
        ``"not_converged"`` (``result.success`` was ``False``);
        ``"biactive_pairs"`` (``I_00 ≠ ∅`` — IFT requires LICQ);
        ``"no_hessian_callable_and_fd_failed"`` (FD fallback raised).
    """

    dx_dp: np.ndarray
    dlam_dp: np.ndarray
    active_row_labels: list
    rank_deficit: int = 0
    used_pseudoinverse: bool = False
    kkt_condition: Optional[float] = None
    skipped_reason: Optional[str] = None



def _build_active_row_labels(problem: MPCCProblem, sets: dict) -> list:
    """Per-row labels matching :func:`_stack_active_gradient_matrix` row order.

    Order: ``h`` (all eq) | ``G_{I_G}`` | ``H_{I_H}`` | ``g_{I_g}`` |
    ``bnd`` (sorted ``I_xL ∪ I_xU``).  When a variable appears in both
    ``I_xL`` and ``I_xU`` (pinned), prefers ``"xL"`` since the underlying
    matrix produces a single identity row per variable in the union.
    """
    labels: list = []
    for k in range(problem.n_eq):
        labels.append(("h", int(k)))
    for i in sets["I_G"]:
        labels.append(("G", int(i)))
    for i in sets["I_H"]:
        labels.append(("H", int(i)))
    for j in sets["I_g"]:
        labels.append(("g", int(j)))
    bound_idx = np.union1d(sets["I_xL"], sets["I_xU"]).astype(np.intp)
    I_xL_set = {int(j) for j in sets["I_xL"]}
    for j in bound_idx:
        labels.append(("xL" if int(j) in I_xL_set else "xU", int(j)))
    return labels



[docs]
def active_row_labels(
    result: MPCCResult,
    problem: MPCCProblem,
    *,
    active_tol: float = _BIACTIVE_TOL,
) -> list[str]:
    """Return active-row labels in the order :func:`sensitivity` expects ``dc_dp``.

    Useful for assembling the ``dc_dp`` matrix without first calling
    :func:`sensitivity`.

    Parameters
    ----------
    result, problem : MPCCResult, MPCCProblem
        Solved result and its source problem.
    active_tol : float
        Constraint tolerance for active-set detection (default ``1e-6``).

    Returns
    -------
    list of (str, int)
        Per-row labels.  See :class:`SensitivityResult.active_row_labels`.
    """
    sets = active_sets(result, problem, tol=active_tol)
    return _build_active_row_labels(problem, sets)



def _gather_multipliers(
    result: MPCCResult, problem: MPCCProblem,
) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, bool]:
    """Pick the best available Lagrangian multipliers for Hessian construction.

    Prefers TNLP-refined multipliers (signed and certified by the §2.6
    re-solve); falls back to zero vectors when no TNLP refinement is
    attached.  Returns ``(lam_g, lam_h, lam_G, lam_H, used_tnlp)``.
    """
    p = problem
    lam_g = np.zeros(p.n_ineq)
    lam_h = np.zeros(p.n_eq)
    lam_G = np.zeros(p.n_comp)
    lam_H = np.zeros(p.n_comp)

    tnlp = getattr(result, "tnlp_refined", None)
    if tnlp is not None and getattr(tnlp, "success", False):
        lam_G = np.asarray(tnlp.mult_comp_G, dtype=float)
        lam_H = np.asarray(tnlp.mult_comp_H, dtype=float)
        if tnlp.mult_ineq is not None:
            lam_g = np.asarray(tnlp.mult_ineq, dtype=float)
        if tnlp.mult_eq is not None:
            lam_h = np.asarray(tnlp.mult_eq, dtype=float)
        return lam_g, lam_h, lam_G, lam_H, True

    return lam_g, lam_h, lam_G, lam_H, False



[docs]
def sensitivity(
    result: MPCCResult,
    problem: MPCCProblem,
    *,
    dgrad_L_dp,
    dc_dp,
    fd_h: Optional[float] = None,
    regularize_eps: float = 1e-12,
    active_tol: float = _BIACTIVE_TOL,
) -> SensitivityResult:
    """Compute ``dx*/dp`` via implicit differentiation through the KKT system.

    Solves the saddle-point system

        [[H,  J_c^T], [J_c, 0]] · [dx; dλ] = − [dgrad_L_dp; dc_dp]

    where ``H = ∇²_xx L(x*, λ*)`` and ``J_c`` is the active constraint
    Jacobian.  Skips with ``skipped_reason`` set when MPCC-LICQ
    prerequisites fail (non-converged result, or biactive pairs present).

    Parameters
    ----------
    result : MPCCResult
        Converged solve result.  When ``tnlp_refine=True`` was used, the
        TNLP-refined multipliers are picked up automatically; otherwise
        a zero-multiplier fallback is used and a ``UserWarning`` is
        emitted (only when no analytic Hessian is available).
    problem : MPCCProblem
        Source problem (original space, not presolve-reduced).
    dgrad_L_dp : array-like, shape (n,) or (n, n_p)
        ``∂(∇_x L)/∂p`` at ``x*`` with ``(x, λ)`` held fixed.  When
        parameters enter only ``f``, this is ``∂(∇_x f)/∂p``; otherwise
        callers must include the cross-term
        ``Σ_i λ_i · ∂(∇_x c_i)/∂p``.  A 1-D input is treated as a single
        parameter column.
    dc_dp : array-like, shape (n_active,) or (n_active, n_p)
        ``∂c/∂p`` at ``x*``, with rows ordered to match
        :func:`active_row_labels`.  See :class:`SensitivityResult` for
        the per-row label format.
    fd_h : float, optional
        Step size for the central-difference Hessian fallback when
        ``problem.lagrangian_hessian`` is not provided.  Defaults to
        ``problem.fd_h``.
    regularize_eps : float
        Tikhonov ridge added to the KKT diagonal when the dense solve
        fails.  Default ``1e-12``.
    active_tol : float
        Active-set detection tolerance (default ``1e-6``).

    Returns
    -------
    SensitivityResult
    """
    p = problem
    n = p.n
    n_p = _infer_n_p(dgrad_L_dp, dc_dp)

    def _empty(reason: str) -> SensitivityResult:
        return SensitivityResult(
            dx_dp=np.zeros((n, n_p)),
            dlam_dp=np.zeros((0, n_p)),
            active_row_labels=[],
            skipped_reason=reason,
        )

    if not result.success:
        return _empty(_SKIP_NOT_CONVERGED)

    x = np.asarray(result.x, dtype=float)
    sets = active_sets(result, p, tol=active_tol)
    if sets["I_00"].size > 0:
        return _empty(_SKIP_BIACTIVE)

    Jc, _ = _stack_active_gradient_matrix(p, x, sets)
    labels = _build_active_row_labels(p, sets)
    n_active = int(Jc.shape[0])

    dgrad_L_dp_arr = _validate_rhs(
        dgrad_L_dp, expected_rows=n, n_p=n_p, name="dgrad_L_dp",
    )
    dc_dp_arr = _validate_rhs(
        dc_dp, expected_rows=n_active, n_p=n_p, name="dc_dp",
    )

    lam_g, lam_h, lam_G, lam_H, used_tnlp = _gather_multipliers(result, p)
    if not used_tnlp and p.lagrangian_hessian is None:
        warnings.warn(
            "sensitivity: no TNLP-refined multipliers available; the "
            "FD-Hessian fallback is using zero multipliers, which is "
            "exact only when constraints are linear in x. Pass "
            "tnlp_refine=True to pympcc.solve() for accurate sensitivity.",
            UserWarning,
            stacklevel=2,
        )
    _fd_h = fd_h if fd_h is not None else p.fd_h
    try:
        H = _build_hessian(x, p, lam_g, lam_h, lam_G, lam_H, _fd_h)
    except (ArithmeticError, AttributeError, TypeError, ValueError,
            RuntimeError, np.linalg.LinAlgError) as exc:
        _log.debug("sensitivity: _build_hessian failed (%s: %s); "
                   "returning SKIP_HESSIAN sentinel.",
                   type(exc).__name__, exc)
        return _empty(_SKIP_HESSIAN)

    K = np.block([
        [H,  Jc.T],
        [Jc, np.zeros((n_active, n_active))],
    ])
    rhs = -np.vstack([dgrad_L_dp_arr, dc_dp_arr])

    cond = float(np.linalg.cond(K)) if K.size else None

    used_pinv = False
    rank_def = 0
    try:
        sol = np.linalg.solve(K, rhs)
    except np.linalg.LinAlgError:
        used_pinv = True
        K_reg = K + regularize_eps * np.eye(K.shape[0])
        sol, _, rank, _ = np.linalg.lstsq(K_reg, rhs, rcond=None)
        rank_def = K.shape[0] - int(rank)

    return SensitivityResult(
        dx_dp=sol[:n],
        dlam_dp=sol[n:],
        active_row_labels=labels,
        rank_deficit=rank_def,
        used_pseudoinverse=used_pinv,
        kkt_condition=cond,
        skipped_reason=None,
    )



def _infer_n_p(dgrad_L_dp, dc_dp) -> int:
    """Infer the parameter count from either RHS block, treating 1-D as 1."""
    arr = np.asarray(dgrad_L_dp)
    if arr.ndim == 1:
        return 1
    if arr.ndim == 2:
        return arr.shape[1]
    raise ValueError(
        f"dgrad_L_dp must be 1-D or 2-D; got ndim={arr.ndim}"
    )


def _validate_rhs(arr, *, expected_rows: int, n_p: int, name: str) -> np.ndarray:
    """Coerce ``arr`` to shape ``(expected_rows, n_p)`` or raise."""
    a = np.asarray(arr, dtype=float)
    if a.ndim == 1:
        a = a.reshape(-1, 1)
    if a.ndim != 2:
        raise ValueError(f"{name} must be 1-D or 2-D; got ndim={a.ndim}")
    if a.shape[0] != expected_rows:
        raise ValueError(
            f"{name} has {a.shape[0]} rows; expected {expected_rows} "
            f"(rows must follow active_row_labels order)"
        )
    if a.shape[1] != n_p:
        raise ValueError(
            f"{name} has {a.shape[1]} parameter columns; expected {n_p}"
        )
    return a