Source code for tno.quantum.ml.classifiers.svm._support_vector_machine

"""This module implements a scikit-learn compatible, quantum support vector machine."""

from __future__ import annotations

import warnings
from collections.abc import Callable, Mapping
from typing import (
    TYPE_CHECKING,
    Any,
    cast,
)

import numpy as np
from numpy.typing import ArrayLike, NDArray
from sklearn.base import ClassifierMixin, ClassifierTags, Tags
from sklearn.preprocessing import LabelEncoder
from sklearn.utils.multiclass import type_of_target, unique_labels
from sklearn.utils.validation import check_is_fitted, check_X_y, validate_data
from tno.quantum.ml.components import QUBOEstimator, get_default_solver_config_if_none
from tno.quantum.optimization.qubo.components import QUBO, SolverConfig
from tno.quantum.utils.serialization import Serializable
from tno.quantum.utils.validation import check_int, check_real, check_string

from tno.quantum.ml.classifiers.svm.kernels import get_kernel

if TYPE_CHECKING:
    from typing import Self

    from tno.quantum.utils import BitVector


class ComparableLabelEncoder(LabelEncoder):  # type: ignore[misc]
    """LabelEncoder that can be compared to another LabelEncoder."""

    def __eq__(self, other: object) -> bool:
        if not isinstance(other, LabelEncoder):
            return False
        return np.array_equal(self.classes_, other.classes_)

    def __hash__(self) -> int:
        error_msg = "unhashable type: 'ComparableLabelEncoder'"
        raise NotImplementedError(error_msg)



[docs]
class SupportVectorMachine(ClassifierMixin, QUBOEstimator):  # type: ignore[misc]
    """Support vector machine."""


[docs]
    def __init__(  # noqa: PLR0913
        self,
        *,
        K: int = 2,  # noqa: N803
        C: float = 1.0,  # noqa: N803
        xi: str | float = "auto",
        kernel: str | Callable[[ArrayLike, ArrayLike], float] = "rbf",
        gamma: str | float = "scale",
        degree: int = 3,
        coef0: float = 0,
        solver_config: SolverConfig | Mapping[str, Any] | None = None,
    ) -> None:
        r"""Init of the SupportVectorMachine.

        The SupportVectorMachine minimizes the following objective function:

        .. _QUBO cost function:
        .. math::

            \frac{C}{2^K-1}\sum_{n,m,k,j}a_{n,k}a_{m,j}2^{k+j-1}y_ny_mk(x_n,x_m)
            -\sum_{n,k}a_{n,k}2^k
            + \xi \frac{C}{2^K-1}\left(\sum_{n,k}2^ka_{n,k}y_n\right)^2.

        **Details:**

        * $a_{n,k}$ are the binary decision variables.
        * $x_{n}$ is the $n^{th}$ feature vector, i.e., the $n^{th}$ row of the feature matrix $X$.
        * $y_n$ is the $n^{th}$ target value. The classes of the input vector `y` are converted such that $y_n\in\{-1,1\}$.
        * $k(\cdot,\cdot)$ is the kernel function.

        Args:
            K: Number of variables used in the variable encoding. The total number of
                variables the objective function of the SVM is ``n_samples * K``. K must
                be a strictly positive integer. Defaults to 2.
            C: Upper limit of the variables. C must be a strictly positive real number.
                Default is 1.
            xi: Penalty parameter to enforce the constraint $\sum_i y_i\alpha_i = 0$.
                `xi` is relative to the largest absolute term of the unconstrained
                problem. `xi` must be a positive real number or ``'auto'``. If
                ``'auto'`` is used, `xi` is set to 2.5K. Default is ``'auto'``.
            kernel: Kernel to use, choose from ``{'rbf', 'linear', 'poly', 'sigmoid'}``
                or custom callable.  Default is ``'rbf'``. For more information on the
                kernels see the documentation of
                :py:mod:`~tno.quantum.ml.classifiers.svm.kernels`.
            gamma: Kernel parameter of the ``'rbf'``, ``'poly'`` and ``'sigmoid'``
                kernels. If ``'scaled'`` is given, then
                ``gamma=1 / n_features_in * X_train.var()``. If ``'auto'`` is given,
                then ``gamma=1 / n_features_in``. If a real number is given then this
                float will be used for gamma. Default is ``'scaled'``. For more
                information see the documentation of
                :py:mod:`~tno.quantum.ml.classifiers.svm.kernels`.
            degree: Degree of the ``'poly'`` kernel. Default is 3. For more information,
                see the documentation of :py:func:`~tno.quantum.ml.classifiers.svm.kernels.poly_kernel`.
            coef0: Offset in the 'sigmoid' or ``'poly'`` kernel. Default is 0. For more
                information see the documentation of
                :py:func:`~tno.quantum.ml.classifiers.svm.kernels.sigmoid_kernel` or
                :py:func:`~tno.quantum.ml.classifiers.svm.kernels.poly_kernel`.
            solver_config: A QUBO solver configuration or None. In the former case
                includes name and options. If ``None`` is given, the default solver
                configuration as defined in
                :py:func:`~tno.quantum.ml.components.get_default_solver_config_if_none`
                will be used. Default is ``None``.

        Attributes:
            gamma_: kernel parameter of the ``'rbf'``, ``'poly'`` and ``'sigmoid'``
                kernels.
            y_spin_: Spin-encoded labels.
            alphas_: Coefficients of the SVM.
            bias_: Bias of the SVM.
            label_encoder_: Label encoder for the classes.
            classes_: Classes of the SVM.
        """  # noqa: E501
        self.K = K
        self.C = C
        self.xi = xi
        self.kernel = kernel
        self.gamma = gamma
        self.degree = degree
        self.coef0 = coef0
        super().__init__(solver_config=solver_config)
        self.gamma_: None | float
        self.y_spin_: NDArray[np.int8]
        self.alphas_: NDArray[np.float64]
        self.bias_: float
        self.label_encoder_: ComparableLabelEncoder
        self.classes_: NDArray[np.int_]


    def _validate_params(self) -> None:
        """Validate the parameters of the SVM.

        Raises:
            ValueError: If the SVM parameters are not valid.
        """
        check_int(self.K, "K", l_bound=1)
        check_real(self.C, "C", l_bound=0, l_inclusive=False)
        check_int(self.degree, "degree")
        check_real(self.coef0, "coef0")

    def _make_qubo(
        self, X: NDArray[np.float64], y: NDArray[np.float64] | None = None
    ) -> QUBO:
        """Make a SVM QUBO in the QUBO format of the optimization pipeline.

        Args:
            X: The input data.
            y: The target values. The array must contain values of (-1,1).

        Returns:
            qubo: QUBO of the SVM
        """
        if y is None:
            error_msg = "y must be provided for making the QUBO."
            raise ValueError(error_msg)
        # Calculate gamma
        self.gamma_ = self._check_and_calc_gamma()

        kernel = get_kernel(
            self.kernel, gamma=self.gamma_, degree=self.degree, coef0=self.coef0
        )
        n_variables = X.shape[0] * self.K
        scalar = self.C / (2**self.K - 1)

        qubo_base = np.zeros((n_variables, n_variables))

        for a_nk in range(n_variables):
            n, k = divmod(a_nk, self.K)
            qubo_base[a_nk, a_nk] -= 2**k
            qubo_base[a_nk, a_nk] += scalar * 2 ** (2 * k - 1) * kernel(X[n], X[n])

            for a_mj in range(a_nk + 1, n_variables):
                m, j = divmod(a_mj, self.K)
                qubo_base[a_nk, a_mj] += (
                    scalar * 2 ** (k + j) * y[n] * y[m] * kernel(X[n], X[m])
                )

        qubo_constraint = np.zeros((n_variables, n_variables))
        for a_nk in range(n_variables):
            n, k = divmod(a_nk, self.K)
            qubo_constraint[(a_nk, a_nk)] += (2 ** (2 * k - 1)) * scalar

            for a_mj in range(a_nk + 1, n_variables):
                m, j = divmod(a_mj, self.K)
                qubo_constraint[(a_nk, a_mj)] += scalar * ((2 ** (k + j)) * y[n] * y[m])

        penalty = max(-qubo_base.min(), qubo_base.max()) * self._check_and_calc_xi()

        return QUBO(qubo_base + penalty * qubo_constraint)


[docs]
    def fit(self, X: ArrayLike, y: ArrayLike | None = None) -> Self:
        """Fit the model to data matrix X and targets y.

        The model is fitted by minimizing the `QUBO cost function`_ shown in the
        ``__init__``.

        Args:
            X: The input data. Should be a 2 dimensional real valued ArrayLike
                containing the features. Must be the same length as `y`.
            y: The target values. There must be exactly 2 classes (unique values). Must
                be the same length as `X`.

        Returns:
            Self, a trained classifier.
        """
        if y is None:
            error_msg = "requires y to be passed, but the target y is None"
            raise ValueError(error_msg)
        self._validate_params()
        # Fitting is done on spin labels.
        self.y_spin_ = self._compute_spin_labels(y)
        svm = cast("SupportVectorMachine", super().fit(X, y=self.y_spin_))
        # Overwrite y_ with the original y.
        self.y_ = np.asarray(y)
        return svm



[docs]
    def predict(self, X: ArrayLike) -> NDArray[Any]:
        """Predict the class labels for the provided data.

        Predictions are theoretically based on
        :py:func:`~svm.SupportVectorMachine.predict_proba`, but this method uses a more
        efficient implementation based on the sign of the decision function.

        Args:
            X: Test samples.

        Returns:
            Array containing the predicted class labels.
        """
        # Check is fit has been called
        check_is_fitted(self)

        # Input validation
        X = np.asarray(validate_data(self, X=X, reset=False))
        y_pred_spin = np.sign(np.array([self._predict_svm(x) for x in X]))
        y_pred_binary = ((y_pred_spin + 1) / 2).astype(np.int8)
        y_pred = self.label_encoder_.inverse_transform(y_pred_binary)

        return np.asarray(y_pred)



[docs]
    def predict_proba(self, X: ArrayLike) -> NDArray[np.float64]:
        r"""Return probability-like estimates for the test data X.

        .. warning::
            The values returned by this method are **not true calibrated probability**.
            If true probabilistic interpretation is needed, consider applying a post-hoc
            calibration method such as Platt scaling.

        The predictions are based on the following formula:

        .. _prediction probability function:
        .. math::

            \begin{align}
            p(x) &= \frac{1-b}{2}
            - \frac{1}{2}\sum_n \sum_k a_{nk}y_n^\text{train}k(x_n^\text{train}, x)\\
            \text{, where}\\
            b &= \frac{
            \sum_n\left(\sum_k2^ka_{nk}\right)
            \left(1-\frac{\sum_k2^ka_{nk}}{2^K-1}\right)
            \left(y^\text{train}_n-\frac{C}{2^K-1}\sum_m\sum_ka_{m,k}y^\text{train}_mk
            (x^\text{train}_n,x^\text{train}_m)\right)
            }{
            \sum_n\left(\sum_k2^ka_{nk}\right)
            \left(1-\frac{\sum_k2^ka_{nk}}{2^K-1}\right)
            }
            \end{align}

        **Details:**

        * $a_{n,k}$ are the binary solutions of the fitted QUBO.
        * $x_{n}^\text{train}$ is the $n^{th}$ feature vector of the training data.
        * $y_n^\text{train}$ is the $n^{th}$ target value of the training data. The classes of the input vector `y` are converted such that $y_n^\text{train}\in\{-1,1\}$.
        * $k(\cdot,\cdot)$ is the kernel function.

        Args:
            X: Test samples.

        Returns:
            The class probabilities of the input samples. Classes are ordered by
            lexicographic order.
        """  # noqa: E501
        # Check is fit had been called
        check_is_fitted(self)

        # Input validation
        X = np.asarray(validate_data(self, X=X, reset=False))

        proba_array = np.zeros([X.shape[0], 2])
        for i, x in enumerate(X):
            proba_array[i, 0] = 0.5 - 0.5 * self._predict_svm(x)
        proba_array[:, 1] = 1 - proba_array[:, 0]

        return proba_array


    def _check_X_and_y(
        self, X: NDArray[np.float64], y: NDArray[np.float64] | None = None
    ) -> None:
        """Check if `X` and `y` are valid and compatible with binary classification.

        Args:
            X: Feature matrix.
            y: The target values

        Raises:
            ValueError: If `X` and `y` are not compatible.
            ValueError: If `y` has more than 2 classes.
        """
        # Check that X and y have correct shape
        check_X_y(X, y)

        # Check that we have exactly 2 classes
        y_type = type_of_target(y, input_name="y", raise_unknown=True)
        if y_type != "binary":
            error_msg = (
                "Only binary classification is supported. The type of the target "
                f"is {y_type}."
            )
            raise ValueError(error_msg)

    def _compute_spin_labels(self, y: ArrayLike) -> NDArray[np.int8]:
        """Compute spin labels {-1, +1} from class labels.

        Args:
            y: Original target labels.

        Returns:
            Spin-encoded labels.
        """
        label_encoder = ComparableLabelEncoder()
        self.classes_ = unique_labels(y)
        label_encoder.fit(self.classes_)
        y_binary = label_encoder.transform(y)

        # Store encoder
        self.label_encoder_ = label_encoder
        return np.asarray(y_binary * 2 - 1, dtype=np.int8)

    def _check_and_calc_gamma(self) -> None | float:
        """Compute gamma if needed.

        Returns:
            gamma: kernel parameter of the 'rbf', 'sigmoid' and 'poly' kernels.
        """
        if callable(self.kernel):
            return None
        kernel_id = check_string(self.kernel, "kernel", lower=True)
        if kernel_id not in ["rbf", "sigmoid", "poly"]:
            return None

        try:
            return float(check_real(self.gamma, "gamma"))
        except TypeError:
            gamma_id = check_string(self.gamma, "gamma", lower=True)
        num_features = int(self.n_features_in_)
        if gamma_id == "scale":
            return float(1 / (num_features * self.X_.var()))
        if gamma_id == "auto":
            return 1 / num_features
        error_msg = (
            f"gamma should be a real number, 'scale' or 'auto', but was  {self.gamma}"
        )
        raise ValueError(error_msg)

    def _check_and_calc_xi(self) -> float:
        """Compute xi.

        Returns:
            xi: penalty parameter for enforcing the constraint.
        """
        try:
            return float(check_real(self.xi, "xi", l_bound=0))
        except TypeError:
            xi_id = check_string(self.xi, "xi", lower=True)

        if xi_id == "auto":
            return 2.5 * self.K
        error_msg = f"xi should be a positive real number or 'auto', but was {self.xi}"
        raise ValueError(error_msg)

    def _predict_svm(self, x: ArrayLike) -> float:
        """Return the prediction of the test data x.

        Args:
            x: a single test sample.

        Returns:
            decision value: real-valued SVM decision function output.
        """
        kernel = get_kernel(
            self.kernel, gamma=self.gamma_, degree=self.degree, coef0=self.coef0
        )
        f = self.bias_
        for n, alpha in enumerate(self.alphas_):
            f += alpha * self.y_spin_[n] * kernel(self.X_[n], x)
        return f

    def _check_constraints(self, bit_vector: BitVector) -> bool:
        """Check if the found bit vector satisfies the imposed constraints.

        Args:
            bit_vector: Bitvector containing the solution to the QUBO.

        Returns:
            True if there are no violations, False otherwise.
        """
        constraint_value = 0
        for i, indicator in enumerate(bit_vector.bits):
            if indicator:
                n, k = divmod(i, self.K)
                constraint_value += 2**k * self.y_spin_[n]

        if not constraint_value:
            return True

        warnings.warn("Constraints were violated.", stacklevel=2)

        return False

    def _decode_bit_vector(self, bit_vector: BitVector) -> Self:
        """Decode bit vector and set attributes.

        More specifically, decode the bit vector to the decoded alpha
        (real valued) coefficients of the SVM.

        Args:
            bit_vector : A bit vector containing the solution to the QUBO.

        Attributes:
            alphas: decoded alpha coefficients of the SVM.
            bias_: decoded bias coefficient of the SVM.
        """
        int_alphas = np.zeros(self.X_.shape[0])

        for n in range(self.X_.shape[0]):
            for k in range(self.K):
                int_alphas[n] += (2**k) * bit_vector.bits[self.K * n + k]

        scaling = float(self.C / (2**self.K - 1))
        self.alphas_ = scaling * int_alphas
        self.bias_ = self._calc_bias()

        return self

    def _calc_bias(self) -> float:
        """Calculate the bias of the SVM.

        Returns:
            bias: bias of the SVM.
        """
        X = self.X_
        y = self.y_spin_
        alphas = self.alphas_
        kernel = get_kernel(
            self.kernel, gamma=self.gamma_, degree=self.degree, coef0=self.coef0
        )
        bias = 0
        for n, alpha_n in enumerate(alphas):
            sample_bias = y[n]
            for m, alpha_m in enumerate(alphas):
                sample_bias -= alpha_m * y[m] * kernel(X[m], X[n])
            sample_bias *= alpha_n * (self.C - alpha_n)
            bias += sample_bias

        if bias == 0:
            return float(bias)

        normalising_constant = sum(alpha * (self.C - alpha) for alpha in self.alphas_)

        return float(bias / normalising_constant)


[docs]
    def __sklearn_tags__(self) -> Tags:
        """Return estimator tags."""
        tags = super().__sklearn_tags__()

        # Dynamic tag based on solver
        solver_config = get_default_solver_config_if_none(self.solver_config)
        solver = solver_config.get_instance()
        tags.non_deterministic = solver.non_deterministic

        # Static tag
        tags.estimator_type = "classifier"
        tags.classifier_tags = ClassifierTags(multi_class=False)

        return tags




# Register `ComparableLabelEncoder` as serializable
def _serialize_comparable_label_encoder(
    value: ComparableLabelEncoder,
) -> dict[str, list[Any]]:
    return {"classes_": value.classes_.tolist()}


def _deserialize_comparable_label_encoder(
    data: dict[str, list[Any]],
) -> ComparableLabelEncoder:
    label_encoder = ComparableLabelEncoder()
    label_encoder.classes_ = np.array(data["classes_"])
    return label_encoder


Serializable.register(
    ComparableLabelEncoder,
    _serialize_comparable_label_encoder,
    _deserialize_comparable_label_encoder,
)