"""This module implements the tracking feature of ultralytics 8.
It provides classes to handle detections per frame and sequences of such detections,
which help in analysing the tracking results and integrating the re_id functionality.
"""
from __future__ import annotations
from typing import TYPE_CHECKING, cast
import numpy as np
from numpy.typing import NDArray
if TYPE_CHECKING:
import pandas as pd
from ultralytics.trackers import BYTETracker
[docs]
class FrameDets:
"""Class containing all detections in one frame.
Attributes:
frame_number: Frame number.
conf: Confidence scores of the detections.
cls: Classes of the detections.
xywh: Box coordinates with shape (n_boxes, 4) using xywh format
(coord of center, width, height).
xyxy: Box coordinates with shape (n_boxes, 4) using xyxy format
(top left, bottom right).
ids: Ids associated with the box.
size: Number of detections in the frame.
"""
[docs]
def __init__(self) -> None:
"""Initializes the FrameDets instance with default values."""
self.frame_number: int = 0
self.cls: NDArray[np.int64] = np.zeros(0, dtype=np.int64)
self.xywh: NDArray[np.float64] = np.zeros((0, 4), dtype=np.float64)
self.xyxy: NDArray[np.float64] = np.zeros((0, 4), dtype=np.float64)
self.conf: NDArray[np.float64] = np.zeros(0, dtype=np.float64)
self.ids: NDArray[np.int64] = np.zeros(0, dtype=np.int64)
self.size: int = 0
[docs]
@classmethod
def from_dataframe(cls, detections: pd.DataFrame) -> FrameDets:
"""Creates an instance of the class from a pandas DataFrame.
Args:
detections: A DataFrame containing detections.
The columns should be structured as follows:
- 0: Frame number
- 1: Detection ID (not used in this implementation)
- 2: x center of the bounding box
- 3: y center of the bounding box
- 4: Width of the bounding box
- 5: Height of the bounding box
- 6: Confidence score of the detection
Returns:
An instance of FrameDets with detections for a single frame.
"""
frame_dets = cls()
frame_dets.frame_number = cast("int", detections[0])
frame_dets.xywh = detections.iloc[:, [2, 3, 4, 5]].to_numpy(dtype=np.float64)
frame_dets.xyxy = transform_xywh_to_xyxy(frame_dets.xywh)
frame_dets.conf = detections.iloc[:, 6].to_numpy(dtype=np.float64)
frame_dets.cls = np.zeros(
frame_dets.conf.shape, dtype=np.int64
) # Assuming class is not used, set to zero
frame_dets.ids = np.zeros(frame_dets.conf.shape, dtype=np.int64)
frame_dets.size = len(frame_dets.conf)
return frame_dets
[docs]
@classmethod
def from_ultralytics_tracker(cls, tracks: NDArray[np.float64]) -> FrameDets:
"""Creates an instance of the class from the output of the ultralytics tracker.
Args:
tracks: An array of shape (n_tracks, 6) where each row contains:
- x1: x coordinate of the top-left corner
- y1: y coordinate of the top-left corner
- x2: x coordinate of the bottom-right corner
- y2: y coordinate of the bottom-right corner
- id: Detection ID
- conf: Confidence score
Returns:
An instance of FrameDets with detections for a single frame.
"""
frame_dets = cls()
frame_dets.ids = tracks[:, 4].astype(int)
frame_dets.xyxy = tracks[:, :4]
frame_dets.conf = tracks[:, 5]
frame_dets.cls = np.zeros(
frame_dets.conf.shape, dtype=np.int64
) # Assuming class is not used, set to zero
frame_dets.size = tracks.shape[0]
return frame_dets
[docs]
@classmethod
def from_dict(cls, framedets_dict: dict[str, int]) -> FrameDets:
"""Creates an instance of the class from a dictionary.
Args:
framedets_dict: A dictionary with one key per FrameDets attribute.
Returns:
An instance of FrameDets with detections for a single frame.
"""
frame_dets = cls()
frame_dets.ids = np.array(framedets_dict["ids"], dtype=np.int64)
frame_dets.xyxy = np.array(framedets_dict["xyxy"], np.float64)
frame_dets.xywh = np.array(framedets_dict["xywh"], np.float64)
frame_dets.conf = np.array(framedets_dict["conf"], np.float64)
frame_dets.cls = np.array(framedets_dict["cls"], np.int64)
frame_dets.size = framedets_dict["size"]
frame_dets.frame_number = framedets_dict["frame_number"]
return frame_dets
[docs]
class DetectionSequence(list[FrameDets]):
"""Class containing a sequence of FrameDets.
Attributes:
det_sequence: List of FrameDets, each containing detections for a single frame.
"""
[docs]
def __init__(self, det_sequence: list[FrameDets]) -> None:
"""Initializes the tracker with a sequence of frame detections.
Args:
det_sequence: A list of FrameDets.
Raises:
TypeError: If any element in det_sequence is not an instance of FrameDets.
"""
if not all(isinstance(dets, FrameDets) for dets in det_sequence):
msg = "Only FrameDets instances are allowed in DetectionSequence."
raise TypeError(msg)
super().__init__(det_sequence)
[docs]
@classmethod
def from_dataframe(cls, detections: pd.DataFrame) -> DetectionSequence:
"""Creates an instance of the class from a pandas DataFrame.
Args:
detections: A DataFrame containing detections.
Uses the from_dataframe method from the FrameDets class, so the
DataFrame should be structured as described in the FrameDets class.
Returns:
An instance of DetectionSequence consisting of FrameDets for each frame.
"""
det_sequence = [
FrameDets.from_dataframe(detections.loc[detections[0] == frame])
for frame in detections[0].unique()
]
return cls(det_sequence)
[docs]
def ultralytics_track(
det_sequence: DetectionSequence, tracker: BYTETracker
) -> DetectionSequence:
"""Runs the ultralytics tracking algorithm on the already extracted detections.
Args:
det_sequence: sequence of detections per frame.
tracker: tracker object.
Returns:
An updated DetectionSequence with id's.
"""
updated_det_sequence = []
for dets in det_sequence:
tracks = tracker.update(dets)
updated_det_sequence.append(FrameDets.from_ultralytics_tracker(tracks))
return DetectionSequence(updated_det_sequence)
def transform_xywh_to_xyxy(boxes: NDArray[np.float64]) -> NDArray[np.float64]:
"""Transforms boxes from xywh format to xyxy format.
Args:
boxes: array of shape (n_boxes, 4) with the box coordinates in xywh format.
Returns:
The box coordinates in xyxy format (bottom left, top right).
"""
x1 = boxes[:, 0] - boxes[:, 2] / 2
y1 = boxes[:, 1] - boxes[:, 3] / 2
x2 = boxes[:, 0] + boxes[:, 2] / 2
y2 = boxes[:, 1] + boxes[:, 3] / 2
return np.column_stack((x1, y1, x2, y2))