Source code for tno.quantum.ml.datasets._blobs_clustering

"""Module for blobs clustering dataset."""

from __future__ import annotations

import numpy as np
from numpy.typing import NDArray
from sklearn import datasets
from tno.quantum.utils.validation import check_int


[docs] def get_blobs_clustering_dataset( n_samples: int, n_features: int, n_centers: int, random_seed: int = 42 ) -> tuple[NDArray[np.float64], NDArray[np.float64]]: r"""Load a blobs clustering dataset. This function wraps :func:`~sklearn.datasets.make_blobs` of :mod:`sklearn.datasets` with a fixed cluster standard deviation of 0.1. Example usage: >>> from tno.quantum.ml.datasets import get_blobs_clustering_dataset >>> X, true_labels = get_blobs_clustering_dataset(100, 3, 2) >>> print(f"{X.shape=}\n{true_labels.shape=}") X.shape=(100, 3) true_labels.shape=(100,) Args: n_samples: Number of samples. n_features: Number of features. n_centers: Number of centers. random_seed: Seed to give to the random number generator. Defaults to `42`. Returns: A tuple containing ``X`` and ``true_labels`` of a blobs clustering dataset. """ # Validate input n_samples = check_int(n_samples, name="n_classes", l_bound=1) n_features = check_int(n_features, name="n_features", l_bound=1) n_centers = check_int(n_centers, name="n_centers", l_bound=1) random_seed = check_int(random_seed, name="random_seed", l_bound=0) centers = np.array( [[i] + [(f + i) % 2 for f in range(n_features - 1)] for i in range(n_centers)] ) X, true_labels = datasets.make_blobs( n_samples=n_samples, centers=centers, cluster_std=0.1, random_state=random_seed ) return X, true_labels