"""This module implements a scikit-learn compatible, quantum support vector machine."""
from __future__ import annotations
import warnings
from collections.abc import Callable, Mapping
from typing import (
TYPE_CHECKING,
Any,
cast,
)
import numpy as np
from numpy.typing import ArrayLike, NDArray
from sklearn.base import ClassifierMixin, ClassifierTags, Tags
from sklearn.preprocessing import LabelEncoder
from sklearn.utils.multiclass import type_of_target, unique_labels
from sklearn.utils.validation import check_is_fitted, check_X_y, validate_data
from tno.quantum.ml.components import QUBOEstimator, get_default_solver_config_if_none
from tno.quantum.optimization.qubo.components import QUBO, SolverConfig
from tno.quantum.utils.serialization import Serializable
from tno.quantum.utils.validation import check_int, check_real, check_string
from tno.quantum.ml.classifiers.svm.kernels import get_kernel
if TYPE_CHECKING:
from typing import Self
from tno.quantum.utils import BitVector
class ComparableLabelEncoder(LabelEncoder): # type: ignore[misc]
"""LabelEncoder that can be compared to another LabelEncoder."""
def __eq__(self, other: object) -> bool:
if not isinstance(other, LabelEncoder):
return False
return np.array_equal(self.classes_, other.classes_)
def __hash__(self) -> int:
error_msg = "unhashable type: 'ComparableLabelEncoder'"
raise NotImplementedError(error_msg)
[docs]
class SupportVectorMachine(ClassifierMixin, QUBOEstimator): # type: ignore[misc]
"""Support vector machine."""
[docs]
def __init__( # noqa: PLR0913
self,
*,
K: int = 2, # noqa: N803
C: float = 1.0, # noqa: N803
xi: str | float = "auto",
kernel: str | Callable[[ArrayLike, ArrayLike], float] = "rbf",
gamma: str | float = "scale",
degree: int = 3,
coef0: float = 0,
solver_config: SolverConfig | Mapping[str, Any] | None = None,
) -> None:
r"""Init of the SupportVectorMachine.
The SupportVectorMachine minimizes the following objective function:
.. _QUBO cost function:
.. math::
\frac{C}{2^K-1}\sum_{n,m,k,j}a_{n,k}a_{m,j}2^{k+j-1}y_ny_mk(x_n,x_m)
-\sum_{n,k}a_{n,k}2^k
+ \xi \frac{C}{2^K-1}\left(\sum_{n,k}2^ka_{n,k}y_n\right)^2.
**Details:**
* $a_{n,k}$ are the binary decision variables.
* $x_{n}$ is the $n^{th}$ feature vector, i.e., the $n^{th}$ row of the feature matrix $X$.
* $y_n$ is the $n^{th}$ target value. The classes of the input vector `y` are converted such that $y_n\in\{-1,1\}$.
* $k(\cdot,\cdot)$ is the kernel function.
Args:
K: Number of variables used in the variable encoding. The total number of
variables the objective function of the SVM is ``n_samples * K``. K must
be a strictly positive integer. Defaults to 2.
C: Upper limit of the variables. C must be a strictly positive real number.
Default is 1.
xi: Penalty parameter to enforce the constraint $\sum_i y_i\alpha_i = 0$.
`xi` is relative to the largest absolute term of the unconstrained
problem. `xi` must be a positive real number or ``'auto'``. If
``'auto'`` is used, `xi` is set to 2.5K. Default is ``'auto'``.
kernel: Kernel to use, choose from ``{'rbf', 'linear', 'poly', 'sigmoid'}``
or custom callable. Default is ``'rbf'``. For more information on the
kernels see the documentation of
:py:mod:`~tno.quantum.ml.classifiers.svm.kernels`.
gamma: Kernel parameter of the ``'rbf'``, ``'poly'`` and ``'sigmoid'``
kernels. If ``'scaled'`` is given, then
``gamma=1 / n_features_in * X_train.var()``. If ``'auto'`` is given,
then ``gamma=1 / n_features_in``. If a real number is given then this
float will be used for gamma. Default is ``'scaled'``. For more
information see the documentation of
:py:mod:`~tno.quantum.ml.classifiers.svm.kernels`.
degree: Degree of the ``'poly'`` kernel. Default is 3. For more information,
see the documentation of :py:func:`~tno.quantum.ml.classifiers.svm.kernels.poly_kernel`.
coef0: Offset in the 'sigmoid' or ``'poly'`` kernel. Default is 0. For more
information see the documentation of
:py:func:`~tno.quantum.ml.classifiers.svm.kernels.sigmoid_kernel` or
:py:func:`~tno.quantum.ml.classifiers.svm.kernels.poly_kernel`.
solver_config: A QUBO solver configuration or None. In the former case
includes name and options. If ``None`` is given, the default solver
configuration as defined in
:py:func:`~tno.quantum.ml.components.get_default_solver_config_if_none`
will be used. Default is ``None``.
Attributes:
gamma_: kernel parameter of the ``'rbf'``, ``'poly'`` and ``'sigmoid'``
kernels.
y_spin_: Spin-encoded labels.
alphas_: Coefficients of the SVM.
bias_: Bias of the SVM.
label_encoder_: Label encoder for the classes.
classes_: Classes of the SVM.
""" # noqa: E501
self.K = K
self.C = C
self.xi = xi
self.kernel = kernel
self.gamma = gamma
self.degree = degree
self.coef0 = coef0
super().__init__(solver_config=solver_config)
self.gamma_: None | float
self.y_spin_: NDArray[np.int8]
self.alphas_: NDArray[np.float64]
self.bias_: float
self.label_encoder_: ComparableLabelEncoder
self.classes_: NDArray[np.int_]
def _validate_params(self) -> None:
"""Validate the parameters of the SVM.
Raises:
ValueError: If the SVM parameters are not valid.
"""
check_int(self.K, "K", l_bound=1)
check_real(self.C, "C", l_bound=0, l_inclusive=False)
check_int(self.degree, "degree")
check_real(self.coef0, "coef0")
def _make_qubo(
self, X: NDArray[np.float64], y: NDArray[np.float64] | None = None
) -> QUBO:
"""Make a SVM QUBO in the QUBO format of the optimization pipeline.
Args:
X: The input data.
y: The target values. The array must contain values of (-1,1).
Returns:
qubo: QUBO of the SVM
"""
if y is None:
error_msg = "y must be provided for making the QUBO."
raise ValueError(error_msg)
# Calculate gamma
self.gamma_ = self._check_and_calc_gamma()
kernel = get_kernel(
self.kernel, gamma=self.gamma_, degree=self.degree, coef0=self.coef0
)
n_variables = X.shape[0] * self.K
scalar = self.C / (2**self.K - 1)
qubo_base = np.zeros((n_variables, n_variables))
for a_nk in range(n_variables):
n, k = divmod(a_nk, self.K)
qubo_base[a_nk, a_nk] -= 2**k
qubo_base[a_nk, a_nk] += scalar * 2 ** (2 * k - 1) * kernel(X[n], X[n])
for a_mj in range(a_nk + 1, n_variables):
m, j = divmod(a_mj, self.K)
qubo_base[a_nk, a_mj] += (
scalar * 2 ** (k + j) * y[n] * y[m] * kernel(X[n], X[m])
)
qubo_constraint = np.zeros((n_variables, n_variables))
for a_nk in range(n_variables):
n, k = divmod(a_nk, self.K)
qubo_constraint[(a_nk, a_nk)] += (2 ** (2 * k - 1)) * scalar
for a_mj in range(a_nk + 1, n_variables):
m, j = divmod(a_mj, self.K)
qubo_constraint[(a_nk, a_mj)] += scalar * ((2 ** (k + j)) * y[n] * y[m])
penalty = max(-qubo_base.min(), qubo_base.max()) * self._check_and_calc_xi()
return QUBO(qubo_base + penalty * qubo_constraint)
[docs]
def fit(self, X: ArrayLike, y: ArrayLike | None = None) -> Self:
"""Fit the model to data matrix X and targets y.
The model is fitted by minimizing the `QUBO cost function`_ shown in the
``__init__``.
Args:
X: The input data. Should be a 2 dimensional real valued ArrayLike
containing the features. Must be the same length as `y`.
y: The target values. There must be exactly 2 classes (unique values). Must
be the same length as `X`.
Returns:
Self, a trained classifier.
"""
if y is None:
error_msg = "requires y to be passed, but the target y is None"
raise ValueError(error_msg)
self._validate_params()
# Fitting is done on spin labels.
self.y_spin_ = self._compute_spin_labels(y)
svm = cast("SupportVectorMachine", super().fit(X, y=self.y_spin_))
# Overwrite y_ with the original y.
self.y_ = np.asarray(y)
return svm
[docs]
def predict(self, X: ArrayLike) -> NDArray[Any]:
"""Predict the class labels for the provided data.
Predictions are theoretically based on
:py:func:`~svm.SupportVectorMachine.predict_proba`, but this method uses a more
efficient implementation based on the sign of the decision function.
Args:
X: Test samples.
Returns:
Array containing the predicted class labels.
"""
# Check is fit has been called
check_is_fitted(self)
# Input validation
X = np.asarray(validate_data(self, X=X, reset=False))
y_pred_spin = np.sign(np.array([self._predict_svm(x) for x in X]))
y_pred_binary = ((y_pred_spin + 1) / 2).astype(np.int8)
y_pred = self.label_encoder_.inverse_transform(y_pred_binary)
return np.asarray(y_pred)
[docs]
def predict_proba(self, X: ArrayLike) -> NDArray[np.float64]:
r"""Return probability-like estimates for the test data X.
.. warning::
The values returned by this method are **not true calibrated probability**.
If true probabilistic interpretation is needed, consider applying a post-hoc
calibration method such as Platt scaling.
The predictions are based on the following formula:
.. _prediction probability function:
.. math::
\begin{align}
p(x) &= \frac{1-b}{2}
- \frac{1}{2}\sum_n \sum_k a_{nk}y_n^\text{train}k(x_n^\text{train}, x)\\
\text{, where}\\
b &= \frac{
\sum_n\left(\sum_k2^ka_{nk}\right)
\left(1-\frac{\sum_k2^ka_{nk}}{2^K-1}\right)
\left(y^\text{train}_n-\frac{C}{2^K-1}\sum_m\sum_ka_{m,k}y^\text{train}_mk
(x^\text{train}_n,x^\text{train}_m)\right)
}{
\sum_n\left(\sum_k2^ka_{nk}\right)
\left(1-\frac{\sum_k2^ka_{nk}}{2^K-1}\right)
}
\end{align}
**Details:**
* $a_{n,k}$ are the binary solutions of the fitted QUBO.
* $x_{n}^\text{train}$ is the $n^{th}$ feature vector of the training data.
* $y_n^\text{train}$ is the $n^{th}$ target value of the training data. The classes of the input vector `y` are converted such that $y_n^\text{train}\in\{-1,1\}$.
* $k(\cdot,\cdot)$ is the kernel function.
Args:
X: Test samples.
Returns:
The class probabilities of the input samples. Classes are ordered by
lexicographic order.
""" # noqa: E501
# Check is fit had been called
check_is_fitted(self)
# Input validation
X = np.asarray(validate_data(self, X=X, reset=False))
proba_array = np.zeros([X.shape[0], 2])
for i, x in enumerate(X):
proba_array[i, 0] = 0.5 - 0.5 * self._predict_svm(x)
proba_array[:, 1] = 1 - proba_array[:, 0]
return proba_array
def _check_X_and_y(
self, X: NDArray[np.float64], y: NDArray[np.float64] | None = None
) -> None:
"""Check if `X` and `y` are valid and compatible with binary classification.
Args:
X: Feature matrix.
y: The target values
Raises:
ValueError: If `X` and `y` are not compatible.
ValueError: If `y` has more than 2 classes.
"""
# Check that X and y have correct shape
check_X_y(X, y)
# Check that we have exactly 2 classes
y_type = type_of_target(y, input_name="y", raise_unknown=True)
if y_type != "binary":
error_msg = (
"Only binary classification is supported. The type of the target "
f"is {y_type}."
)
raise ValueError(error_msg)
def _compute_spin_labels(self, y: ArrayLike) -> NDArray[np.int8]:
"""Compute spin labels {-1, +1} from class labels.
Args:
y: Original target labels.
Returns:
Spin-encoded labels.
"""
label_encoder = ComparableLabelEncoder()
self.classes_ = unique_labels(y)
label_encoder.fit(self.classes_)
y_binary = label_encoder.transform(y)
# Store encoder
self.label_encoder_ = label_encoder
return np.asarray(y_binary * 2 - 1, dtype=np.int8)
def _check_and_calc_gamma(self) -> None | float:
"""Compute gamma if needed.
Returns:
gamma: kernel parameter of the 'rbf', 'sigmoid' and 'poly' kernels.
"""
if callable(self.kernel):
return None
kernel_id = check_string(self.kernel, "kernel", lower=True)
if kernel_id not in ["rbf", "sigmoid", "poly"]:
return None
try:
return float(check_real(self.gamma, "gamma"))
except TypeError:
gamma_id = check_string(self.gamma, "gamma", lower=True)
num_features = int(self.n_features_in_)
if gamma_id == "scale":
return float(1 / (num_features * self.X_.var()))
if gamma_id == "auto":
return 1 / num_features
error_msg = (
f"gamma should be a real number, 'scale' or 'auto', but was {self.gamma}"
)
raise ValueError(error_msg)
def _check_and_calc_xi(self) -> float:
"""Compute xi.
Returns:
xi: penalty parameter for enforcing the constraint.
"""
try:
return float(check_real(self.xi, "xi", l_bound=0))
except TypeError:
xi_id = check_string(self.xi, "xi", lower=True)
if xi_id == "auto":
return 2.5 * self.K
error_msg = f"xi should be a positive real number or 'auto', but was {self.xi}"
raise ValueError(error_msg)
def _predict_svm(self, x: ArrayLike) -> float:
"""Return the prediction of the test data x.
Args:
x: a single test sample.
Returns:
decision value: real-valued SVM decision function output.
"""
kernel = get_kernel(
self.kernel, gamma=self.gamma_, degree=self.degree, coef0=self.coef0
)
f = self.bias_
for n, alpha in enumerate(self.alphas_):
f += alpha * self.y_spin_[n] * kernel(self.X_[n], x)
return f
def _check_constraints(self, bit_vector: BitVector) -> bool:
"""Check if the found bit vector satisfies the imposed constraints.
Args:
bit_vector: Bitvector containing the solution to the QUBO.
Returns:
True if there are no violations, False otherwise.
"""
constraint_value = 0
for i, indicator in enumerate(bit_vector.bits):
if indicator:
n, k = divmod(i, self.K)
constraint_value += 2**k * self.y_spin_[n]
if not constraint_value:
return True
warnings.warn("Constraints were violated.", stacklevel=2)
return False
def _decode_bit_vector(self, bit_vector: BitVector) -> Self:
"""Decode bit vector and set attributes.
More specifically, decode the bit vector to the decoded alpha
(real valued) coefficients of the SVM.
Args:
bit_vector : A bit vector containing the solution to the QUBO.
Attributes:
alphas: decoded alpha coefficients of the SVM.
bias_: decoded bias coefficient of the SVM.
"""
int_alphas = np.zeros(self.X_.shape[0])
for n in range(self.X_.shape[0]):
for k in range(self.K):
int_alphas[n] += (2**k) * bit_vector.bits[self.K * n + k]
scaling = float(self.C / (2**self.K - 1))
self.alphas_ = scaling * int_alphas
self.bias_ = self._calc_bias()
return self
def _calc_bias(self) -> float:
"""Calculate the bias of the SVM.
Returns:
bias: bias of the SVM.
"""
X = self.X_
y = self.y_spin_
alphas = self.alphas_
kernel = get_kernel(
self.kernel, gamma=self.gamma_, degree=self.degree, coef0=self.coef0
)
bias = 0
for n, alpha_n in enumerate(alphas):
sample_bias = y[n]
for m, alpha_m in enumerate(alphas):
sample_bias -= alpha_m * y[m] * kernel(X[m], X[n])
sample_bias *= alpha_n * (self.C - alpha_n)
bias += sample_bias
if bias == 0:
return float(bias)
normalising_constant = sum(alpha * (self.C - alpha) for alpha in self.alphas_)
return float(bias / normalising_constant)
# Register `ComparableLabelEncoder` as serializable
def _serialize_comparable_label_encoder(
value: ComparableLabelEncoder,
) -> dict[str, list[Any]]:
return {"classes_": value.classes_.tolist()}
def _deserialize_comparable_label_encoder(
data: dict[str, list[Any]],
) -> ComparableLabelEncoder:
label_encoder = ComparableLabelEncoder()
label_encoder.classes_ = np.array(data["classes_"])
return label_encoder
Serializable.register(
ComparableLabelEncoder,
_serialize_comparable_label_encoder,
_deserialize_comparable_label_encoder,
)