Source code for tno.quantum.ml.datasets._bars_and_stripes

"""Module for bars and stripes dataset."""

from __future__ import annotations

import numpy as np
from numpy.typing import ArrayLike, NDArray
from tno.quantum.utils.validation import check_arraylike, check_int

from tno.quantum.ml.datasets._utils import _safe_train_test_split


def _add_noise(
    img: ArrayLike, noise_size: int, rng: np.random.Generator
) -> NDArray[np.uint8]:
    """Helper function to apply noise to pattern image(s).

    - Black pixels (0) get values in [0, noise_size]
    - White pixels (1) get values in [255 - noise_size, 255].

    Args:
        img: input pattern image(s).
        noise_size: magnitude of noise in range (0, 255)
        rng: random number generator instance for reproducibility.

    Returns:
        Noisy image(s).
    """
    arr = np.asarray(img, dtype=np.uint8)

    if noise_size == 0:
        return (arr * 255).astype(np.uint8)

    white_mask = arr == 1
    black_mask = arr == 0
    arr[white_mask] = rng.integers(255 - noise_size, 256, size=white_mask.sum())
    arr[black_mask] = rng.integers(0, noise_size + 1, size=black_mask.sum())
    return arr.astype(np.uint8)


[docs] def get_bars_and_stripes_dataset( n_samples: int | None = 100, shape: ArrayLike | None = None, noise_size: int = 0, random_seed: int = 42, test_size: float | int | None = None, ) -> tuple[NDArray[np.uint8], NDArray[np.int_], NDArray[np.uint8], NDArray[np.int_]]: r"""Create bars and stripes images dataset. Example usage: >>> from tno.quantum.ml.datasets import get_bars_and_stripes_dataset >>> X_train, y_train, X_val, y_val = get_bars_and_stripes_dataset( ... n_samples=100, shape=(4, 4), noise_size=10, ... ) >>> print(f"{X_train.shape=}\n{y_train.shape=}\n{X_val.shape=}\n{y_val.shape=}") X_train.shape=(75, 4, 4) y_train.shape=(75,) X_val.shape=(25, 4, 4) y_val.shape=(25,) Args: n_samples: Number of samples. If ``None``, generate the full Bars and Stripes dataset of size ``2^rows + 2^cols - 4``. shape: Shape of the generated images (rows, cols), defaults to (4, 4). noise_size: Amount of pixel noise intensity to add. By default (`noise_size=0`) the pixels are strictly binary with value `0` for black and value `255` for white pixels. For `noise_size>0`, pixel values are sampled uniformly within an interval around these extremes. random_seed: Seed to give to the random number generator. Defaults to `42`. test_size: The proportion of the dataset that is included in the test-split. Either represented by a percentage in the range [0.0, 1.0) or as absolute number of test samples in the range [1, inf). Defaults to 0.25. Returns: A tuple containing ``X_training``, ``y_training``, `X_validation`` and ``y_validation``. """ rng = np.random.default_rng(random_seed) # Validate input if n_samples is not None: n_samples = check_int(n_samples, "n_samples", l_bound=1) shape = check_arraylike(shape or (4, 4), "shape", ndim=1, shape=(2,)) n_rows, n_cols = shape noise_size = check_int(noise_size, "noise_size", l_bound=0, u_bound=255) if n_samples is None: # Generate full dataset X, y = _generate_full_bars_and_stripes_patterns(n_rows, n_cols) else: # Random subset sampling X = np.empty((n_samples, n_rows, n_cols), dtype=np.uint8) y = rng.integers(2, size=n_samples, dtype=np.int_) for i, y_i in enumerate(y): if y_i: # Stripes size = (1, n_cols) reps = (n_rows, 1) else: # Bars size = (n_rows, 1) reps = (1, n_cols) pattern = rng.integers(0, 2, size=size, dtype=np.uint8) while np.all(pattern == 0) or np.all(pattern == 1): # avoid trivial pattern = rng.integers(0, 2, size=size, dtype=np.uint8) img = np.tile(pattern, reps) X[i] = img # Apply noise X = _add_noise(X, noise_size, rng) # Split into training and validation data sets return _safe_train_test_split(X, y, test_size=test_size, random_state=random_seed)
def _generate_full_bars_and_stripes_patterns( n_rows: int, n_cols: int ) -> tuple[NDArray[np.uint8], NDArray[np.int_]]: """Generate all patterns for the full bars and stripes dataset. Dataset has size ``2^rows + 2^cols - 4``. Args: n_rows: number of rows n_cols: number of columns Returns: Image patterns (no noise applied yet) and labels (bar/stripe) """ bars = [] for row_pattern in range(1, 2**n_rows - 1): # exclude all 0 and all 1 pattern = np.fromiter(np.binary_repr(row_pattern, n_rows), int, n_rows).reshape( n_rows, 1 ) bars.append(np.tile(pattern, (1, n_cols))) y_bars = np.zeros(len(bars), dtype=np.int_) stripes = [] for col_pattern in range(1, 2**n_cols - 1): # exclude all 0 and all 1 pattern = np.fromiter(np.binary_repr(col_pattern, n_cols), int, n_cols).reshape( 1, n_cols ) stripes.append(np.tile(pattern, (n_rows, 1))) y_stripes = np.ones(len(stripes), dtype=np.int_) X = np.concatenate([bars, stripes], axis=0) y = np.concatenate([y_bars, y_stripes], axis=0) return X, y