OcSort

Bases: BaseTracker

OCSort Tracker: A tracking algorithm that utilizes motion-based tracking.

Parameters:

Name	Type	Description	Default
`per_class`	`bool`	Whether to perform per-class tracking. If True, tracks are maintained separately for each object class.	`False`
`det_thresh`	`float`	Detection confidence threshold. Detections below this threshold are ignored in the first association step.	`0.2`
`max_age`	`int`	Maximum number of frames to keep a track alive without any detections.	`30`
`min_hits`	`int`	Minimum number of hits required to confirm a track.	`3`
`asso_threshold`	`float`	Threshold for the association step in data association. Controls the maximum distance allowed between tracklets and detections for a match.	`0.3`
`delta_t`	`int`	Time delta for velocity estimation in Kalman Filter.	`3`
`asso_func`	`str`	Association function to use for data association. Options include "iou" for IoU-based association.	`'iou'`
`inertia`	`float`	Weight for inertia in motion modeling. Higher values make tracks less responsive to changes.	`0.2`
`use_byte`	`bool`	Whether to use BYTE association in the second association step.	`False`
`Q_xy_scaling`	`float`	Scaling factor for the process noise covariance in the Kalman Filter for position coordinates.	`0.01`
`Q_s_scaling`	`float`	Scaling factor for the process noise covariance in the Kalman Filter for scale coordinates.	`0.0001`

Source code in boxmot/trackers/ocsort/ocsort.py

class OcSort(BaseTracker):
    """
    OCSort Tracker: A tracking algorithm that utilizes motion-based tracking.

    Args:
        per_class (bool, optional): Whether to perform per-class tracking. If True, tracks are maintained separately for each object class.
        det_thresh (float, optional): Detection confidence threshold. Detections below this threshold are ignored in the first association step.
        max_age (int, optional): Maximum number of frames to keep a track alive without any detections.
        min_hits (int, optional): Minimum number of hits required to confirm a track.
        asso_threshold (float, optional): Threshold for the association step in data association. Controls the maximum distance allowed between tracklets and detections for a match.
        delta_t (int, optional): Time delta for velocity estimation in Kalman Filter.
        asso_func (str, optional): Association function to use for data association. Options include "iou" for IoU-based association.
        inertia (float, optional): Weight for inertia in motion modeling. Higher values make tracks less responsive to changes.
        use_byte (bool, optional): Whether to use BYTE association in the second association step.
        Q_xy_scaling (float, optional): Scaling factor for the process noise covariance in the Kalman Filter for position coordinates.
        Q_s_scaling (float, optional): Scaling factor for the process noise covariance in the Kalman Filter for scale coordinates.
    """

    def __init__(
        self,
        per_class: bool = False,
        min_conf: float = 0.1,
        det_thresh: float = 0.2,
        max_age: int = 30,
        min_hits: int = 3,
        asso_threshold: float = 0.3,
        delta_t: int = 3,
        asso_func: str = "iou",
        inertia: float = 0.2,
        use_byte: bool = False,
        Q_xy_scaling: float = 0.01,
        Q_s_scaling: float = 0.0001,
    ):
        super().__init__(max_age=max_age, per_class=per_class, asso_func=asso_func)
        """
        Sets key parameters for SORT
        """
        self.per_class = per_class
        self.min_conf = min_conf
        self.max_age = max_age
        self.min_hits = min_hits
        self.asso_threshold = asso_threshold
        self.frame_count = 0
        self.det_thresh = det_thresh
        self.delta_t = delta_t
        self.inertia = inertia
        self.use_byte = use_byte
        self.Q_xy_scaling = Q_xy_scaling
        self.Q_s_scaling = Q_s_scaling
        KalmanBoxTracker.count = 0

    @BaseTracker.setup_decorator
    @BaseTracker.per_class_decorator
    def update(
        self, dets: np.ndarray, img: np.ndarray, embs: np.ndarray = None
    ) -> np.ndarray:
        """
        Params:
          dets - a numpy array of detections in the format [[x1,y1,x2,y2,score],[x1,y1,x2,y2,score],...]
        Requires: this method must be called once for each frame even with empty detections
        (use np.empty((0, 5)) for frames without detections).
        Returns the a similar array, where the last column is the object ID.
        NOTE: The number of objects returned may differ from the number of detections provided.
        """

        self.check_inputs(dets, img)

        self.frame_count += 1
        h, w = img.shape[0:2]

        dets = np.hstack([dets, np.arange(len(dets)).reshape(-1, 1)])
        confs = dets[:, 4 + self.is_obb]

        inds_low = confs > self.min_conf
        inds_high = confs < self.det_thresh
        inds_second = np.logical_and(
            inds_low, inds_high
        )  # self.det_thresh > score > 0.1, for second matching
        dets_second = dets[inds_second]  # detections for second matching
        remain_inds = confs > self.det_thresh
        dets = dets[remain_inds]

        # get predicted locations from existing trackers.
        trks = np.zeros((len(self.active_tracks), 5 + self.is_obb))
        to_del = []
        ret = []
        for t, trk in enumerate(trks):
            pos = self.active_tracks[t].predict()[0]
            trk[:] = [pos[i] for i in range(4 + self.is_obb)] + [0]
            if np.any(np.isnan(pos)):
                to_del.append(t)
        trks = np.ma.compress_rows(np.ma.masked_invalid(trks))
        for t in reversed(to_del):
            self.active_tracks.pop(t)

        velocities = np.array(
            [
                trk.velocity if trk.velocity is not None else np.array((0, 0))
                for trk in self.active_tracks
            ]
        )
        last_boxes = np.array([trk.last_observation for trk in self.active_tracks])

        k_observations = np.array(
            [
                k_previous_obs(
                    trk.observations, trk.age, self.delta_t, is_obb=self.is_obb
                )
                for trk in self.active_tracks
            ]
        )

        """
            First round of association
        """
        matched, unmatched_dets, unmatched_trks = associate(
            dets[:, 0 : 5 + self.is_obb],
            trks,
            self.asso_func,
            self.asso_threshold,
            velocities,
            k_observations,
            self.inertia,
            w,
            h,
        )
        for m in matched:
            self.active_tracks[m[1]].update(
                dets[m[0], :-2], dets[m[0], -2], dets[m[0], -1]
            )

        """
            Second round of associaton by OCR
        """
        # BYTE association
        if self.use_byte and len(dets_second) > 0 and unmatched_trks.shape[0] > 0:
            u_trks = trks[unmatched_trks]
            iou_left = self.asso_func(
                dets_second, u_trks
            )  # iou between low score detections and unmatched tracks
            iou_left = np.array(iou_left)
            if iou_left.max() > self.asso_threshold:
                """
                NOTE: by using a lower threshold, e.g., self.asso_threshold - 0.1, you may
                get a higher performance especially on MOT17/MOT20 datasets. But we keep it
                uniform here for simplicity
                """
                matched_indices = linear_assignment(-iou_left)
                to_remove_trk_indices = []
                for m in matched_indices:
                    det_ind, trk_ind = m[0], unmatched_trks[m[1]]
                    if iou_left[m[0], m[1]] < self.asso_threshold:
                        continue
                    self.active_tracks[trk_ind].update(
                        dets_second[det_ind, :-2],
                        dets_second[det_ind, -2],
                        dets_second[det_ind, -1],
                    )
                    to_remove_trk_indices.append(trk_ind)
                unmatched_trks = np.setdiff1d(
                    unmatched_trks, np.array(to_remove_trk_indices)
                )

        if unmatched_dets.shape[0] > 0 and unmatched_trks.shape[0] > 0:
            left_dets = dets[unmatched_dets]
            left_trks = last_boxes[unmatched_trks]
            iou_left = self.asso_func(left_dets, left_trks)
            iou_left = np.array(iou_left)
            if iou_left.max() > self.asso_threshold:
                """
                NOTE: by using a lower threshold, e.g., self.asso_threshold - 0.1, you may
                get a higher performance especially on MOT17/MOT20 datasets. But we keep it
                uniform here for simplicity
                """
                rematched_indices = linear_assignment(-iou_left)
                to_remove_det_indices = []
                to_remove_trk_indices = []
                for m in rematched_indices:
                    det_ind, trk_ind = unmatched_dets[m[0]], unmatched_trks[m[1]]
                    if iou_left[m[0], m[1]] < self.asso_threshold:
                        continue
                    self.active_tracks[trk_ind].update(
                        dets[det_ind, :-2], dets[det_ind, -2], dets[det_ind, -1]
                    )
                    to_remove_det_indices.append(det_ind)
                    to_remove_trk_indices.append(trk_ind)
                unmatched_dets = np.setdiff1d(
                    unmatched_dets, np.array(to_remove_det_indices)
                )
                unmatched_trks = np.setdiff1d(
                    unmatched_trks, np.array(to_remove_trk_indices)
                )

        for m in unmatched_trks:
            self.active_tracks[m].update(None, None, None)

        # create and initialise new trackers for unmatched detections
        for i in unmatched_dets:
            if self.is_obb:
                trk = KalmanBoxTrackerOBB(
                    dets[i, :-2],
                    dets[i, -2],
                    dets[i, -1],
                    delta_t=self.delta_t,
                    Q_xy_scaling=self.Q_xy_scaling,
                    Q_a_scaling=self.Q_s_scaling,
                    max_obs=self.max_obs,
                )
            else:
                trk = KalmanBoxTracker(
                    dets[i, :5],
                    dets[i, 5],
                    dets[i, 6],
                    delta_t=self.delta_t,
                    Q_xy_scaling=self.Q_xy_scaling,
                    Q_s_scaling=self.Q_s_scaling,
                    max_obs=self.max_obs,
                )
            self.active_tracks.append(trk)
        i = len(self.active_tracks)
        for trk in reversed(self.active_tracks):
            if trk.last_observation.sum() < 0:
                d = trk.get_state()[0]
            else:
                """
                this is optional to use the recent observation or the kalman filter prediction,
                we didn't notice significant difference here
                """
                d = trk.last_observation[: 4 + self.is_obb]
            if (trk.time_since_update < 1) and (
                trk.hit_streak >= self.min_hits or self.frame_count <= self.min_hits
            ):
                # +1 as MOT benchmark requires positive
                ret.append(
                    np.concatenate(
                        (d, [trk.id + 1], [trk.conf], [trk.cls], [trk.det_ind])
                    ).reshape(1, -1)
                )
            i -= 1
            # remove dead tracklet
            if trk.time_since_update > self.max_age:
                self.active_tracks.pop(i)
        if len(ret) > 0:
            return np.concatenate(ret)
        return np.array([])

`update(dets, img, embs=None)`

Requires: this method must be called once for each frame even with empty detections (use np.empty((0, 5)) for frames without detections). Returns the a similar array, where the last column is the object ID. NOTE: The number of objects returned may differ from the number of detections provided.

Source code in boxmot/trackers/ocsort/ocsort.py

@BaseTracker.setup_decorator
@BaseTracker.per_class_decorator
def update(
    self, dets: np.ndarray, img: np.ndarray, embs: np.ndarray = None
) -> np.ndarray:
    """
    Params:
      dets - a numpy array of detections in the format [[x1,y1,x2,y2,score],[x1,y1,x2,y2,score],...]
    Requires: this method must be called once for each frame even with empty detections
    (use np.empty((0, 5)) for frames without detections).
    Returns the a similar array, where the last column is the object ID.
    NOTE: The number of objects returned may differ from the number of detections provided.
    """

    self.check_inputs(dets, img)

    self.frame_count += 1
    h, w = img.shape[0:2]

    dets = np.hstack([dets, np.arange(len(dets)).reshape(-1, 1)])
    confs = dets[:, 4 + self.is_obb]

    inds_low = confs > self.min_conf
    inds_high = confs < self.det_thresh
    inds_second = np.logical_and(
        inds_low, inds_high
    )  # self.det_thresh > score > 0.1, for second matching
    dets_second = dets[inds_second]  # detections for second matching
    remain_inds = confs > self.det_thresh
    dets = dets[remain_inds]

    # get predicted locations from existing trackers.
    trks = np.zeros((len(self.active_tracks), 5 + self.is_obb))
    to_del = []
    ret = []
    for t, trk in enumerate(trks):
        pos = self.active_tracks[t].predict()[0]
        trk[:] = [pos[i] for i in range(4 + self.is_obb)] + [0]
        if np.any(np.isnan(pos)):
            to_del.append(t)
    trks = np.ma.compress_rows(np.ma.masked_invalid(trks))
    for t in reversed(to_del):
        self.active_tracks.pop(t)

    velocities = np.array(
        [
            trk.velocity if trk.velocity is not None else np.array((0, 0))
            for trk in self.active_tracks
        ]
    )
    last_boxes = np.array([trk.last_observation for trk in self.active_tracks])

    k_observations = np.array(
        [
            k_previous_obs(
                trk.observations, trk.age, self.delta_t, is_obb=self.is_obb
            )
            for trk in self.active_tracks
        ]
    )

    """
        First round of association
    """
    matched, unmatched_dets, unmatched_trks = associate(
        dets[:, 0 : 5 + self.is_obb],
        trks,
        self.asso_func,
        self.asso_threshold,
        velocities,
        k_observations,
        self.inertia,
        w,
        h,
    )
    for m in matched:
        self.active_tracks[m[1]].update(
            dets[m[0], :-2], dets[m[0], -2], dets[m[0], -1]
        )

    """
        Second round of associaton by OCR
    """
    # BYTE association
    if self.use_byte and len(dets_second) > 0 and unmatched_trks.shape[0] > 0:
        u_trks = trks[unmatched_trks]
        iou_left = self.asso_func(
            dets_second, u_trks
        )  # iou between low score detections and unmatched tracks
        iou_left = np.array(iou_left)
        if iou_left.max() > self.asso_threshold:
            """
            NOTE: by using a lower threshold, e.g., self.asso_threshold - 0.1, you may
            get a higher performance especially on MOT17/MOT20 datasets. But we keep it
            uniform here for simplicity
            """
            matched_indices = linear_assignment(-iou_left)
            to_remove_trk_indices = []
            for m in matched_indices:
                det_ind, trk_ind = m[0], unmatched_trks[m[1]]
                if iou_left[m[0], m[1]] < self.asso_threshold:
                    continue
                self.active_tracks[trk_ind].update(
                    dets_second[det_ind, :-2],
                    dets_second[det_ind, -2],
                    dets_second[det_ind, -1],
                )
                to_remove_trk_indices.append(trk_ind)
            unmatched_trks = np.setdiff1d(
                unmatched_trks, np.array(to_remove_trk_indices)
            )

    if unmatched_dets.shape[0] > 0 and unmatched_trks.shape[0] > 0:
        left_dets = dets[unmatched_dets]
        left_trks = last_boxes[unmatched_trks]
        iou_left = self.asso_func(left_dets, left_trks)
        iou_left = np.array(iou_left)
        if iou_left.max() > self.asso_threshold:
            """
            NOTE: by using a lower threshold, e.g., self.asso_threshold - 0.1, you may
            get a higher performance especially on MOT17/MOT20 datasets. But we keep it
            uniform here for simplicity
            """
            rematched_indices = linear_assignment(-iou_left)
            to_remove_det_indices = []
            to_remove_trk_indices = []
            for m in rematched_indices:
                det_ind, trk_ind = unmatched_dets[m[0]], unmatched_trks[m[1]]
                if iou_left[m[0], m[1]] < self.asso_threshold:
                    continue
                self.active_tracks[trk_ind].update(
                    dets[det_ind, :-2], dets[det_ind, -2], dets[det_ind, -1]
                )
                to_remove_det_indices.append(det_ind)
                to_remove_trk_indices.append(trk_ind)
            unmatched_dets = np.setdiff1d(
                unmatched_dets, np.array(to_remove_det_indices)
            )
            unmatched_trks = np.setdiff1d(
                unmatched_trks, np.array(to_remove_trk_indices)
            )

    for m in unmatched_trks:
        self.active_tracks[m].update(None, None, None)

    # create and initialise new trackers for unmatched detections
    for i in unmatched_dets:
        if self.is_obb:
            trk = KalmanBoxTrackerOBB(
                dets[i, :-2],
                dets[i, -2],
                dets[i, -1],
                delta_t=self.delta_t,
                Q_xy_scaling=self.Q_xy_scaling,
                Q_a_scaling=self.Q_s_scaling,
                max_obs=self.max_obs,
            )
        else:
            trk = KalmanBoxTracker(
                dets[i, :5],
                dets[i, 5],
                dets[i, 6],
                delta_t=self.delta_t,
                Q_xy_scaling=self.Q_xy_scaling,
                Q_s_scaling=self.Q_s_scaling,
                max_obs=self.max_obs,
            )
        self.active_tracks.append(trk)
    i = len(self.active_tracks)
    for trk in reversed(self.active_tracks):
        if trk.last_observation.sum() < 0:
            d = trk.get_state()[0]
        else:
            """
            this is optional to use the recent observation or the kalman filter prediction,
            we didn't notice significant difference here
            """
            d = trk.last_observation[: 4 + self.is_obb]
        if (trk.time_since_update < 1) and (
            trk.hit_streak >= self.min_hits or self.frame_count <= self.min_hits
        ):
            # +1 as MOT benchmark requires positive
            ret.append(
                np.concatenate(
                    (d, [trk.id + 1], [trk.conf], [trk.cls], [trk.det_ind])
                ).reshape(1, -1)
            )
        i -= 1
        # remove dead tracklet
        if trk.time_since_update > self.max_age:
            self.active_tracks.pop(i)
    if len(ret) > 0:
        return np.concatenate(ret)
    return np.array([])