Skip to content

OcSort

Bases: BaseTracker

OCSort Tracker: A tracking algorithm that utilizes motion-based tracking.

Parameters:

Name Type Description Default
per_class bool

Whether to perform per-class tracking. If True, tracks are maintained separately for each object class.

False
det_thresh float

Detection confidence threshold. Detections below this threshold are ignored in the first association step.

0.2
max_age int

Maximum number of frames to keep a track alive without any detections.

30
min_hits int

Minimum number of hits required to confirm a track.

3
asso_threshold float

Threshold for the association step in data association. Controls the maximum distance allowed between tracklets and detections for a match.

0.3
delta_t int

Time delta for velocity estimation in Kalman Filter.

3
asso_func str

Association function to use for data association. Options include "iou" for IoU-based association.

'iou'
inertia float

Weight for inertia in motion modeling. Higher values make tracks less responsive to changes.

0.2
use_byte bool

Whether to use BYTE association in the second association step.

False
Q_xy_scaling float

Scaling factor for the process noise covariance in the Kalman Filter for position coordinates.

0.01
Q_s_scaling float

Scaling factor for the process noise covariance in the Kalman Filter for scale coordinates.

0.0001
Source code in boxmot/trackers/ocsort/ocsort.py
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
class OcSort(BaseTracker):
    """
    OCSort Tracker: A tracking algorithm that utilizes motion-based tracking.

    Args:
        per_class (bool, optional): Whether to perform per-class tracking. If True, tracks are maintained separately for each object class.
        det_thresh (float, optional): Detection confidence threshold. Detections below this threshold are ignored in the first association step.
        max_age (int, optional): Maximum number of frames to keep a track alive without any detections.
        min_hits (int, optional): Minimum number of hits required to confirm a track.
        asso_threshold (float, optional): Threshold for the association step in data association. Controls the maximum distance allowed between tracklets and detections for a match.
        delta_t (int, optional): Time delta for velocity estimation in Kalman Filter.
        asso_func (str, optional): Association function to use for data association. Options include "iou" for IoU-based association.
        inertia (float, optional): Weight for inertia in motion modeling. Higher values make tracks less responsive to changes.
        use_byte (bool, optional): Whether to use BYTE association in the second association step.
        Q_xy_scaling (float, optional): Scaling factor for the process noise covariance in the Kalman Filter for position coordinates.
        Q_s_scaling (float, optional): Scaling factor for the process noise covariance in the Kalman Filter for scale coordinates.
    """

    def __init__(
        self,
        per_class: bool = False,
        min_conf: float = 0.1,
        det_thresh: float = 0.2,
        max_age: int = 30,
        min_hits: int = 3,
        asso_threshold: float = 0.3,
        delta_t: int = 3,
        asso_func: str = "iou",
        inertia: float = 0.2,
        use_byte: bool = False,
        Q_xy_scaling: float = 0.01,
        Q_s_scaling: float = 0.0001,
    ):
        super().__init__(max_age=max_age, per_class=per_class, asso_func=asso_func)
        """
        Sets key parameters for SORT
        """
        self.per_class = per_class
        self.min_conf = min_conf
        self.max_age = max_age
        self.min_hits = min_hits
        self.asso_threshold = asso_threshold
        self.frame_count = 0
        self.det_thresh = det_thresh
        self.delta_t = delta_t
        self.inertia = inertia
        self.use_byte = use_byte
        self.Q_xy_scaling = Q_xy_scaling
        self.Q_s_scaling = Q_s_scaling
        KalmanBoxTracker.count = 0

    @BaseTracker.setup_decorator
    @BaseTracker.per_class_decorator
    def update(
        self, dets: np.ndarray, img: np.ndarray, embs: np.ndarray = None
    ) -> np.ndarray:
        """
        Params:
          dets - a numpy array of detections in the format [[x1,y1,x2,y2,score],[x1,y1,x2,y2,score],...]
        Requires: this method must be called once for each frame even with empty detections
        (use np.empty((0, 5)) for frames without detections).
        Returns the a similar array, where the last column is the object ID.
        NOTE: The number of objects returned may differ from the number of detections provided.
        """

        self.check_inputs(dets, img)

        self.frame_count += 1
        h, w = img.shape[0:2]

        dets = np.hstack([dets, np.arange(len(dets)).reshape(-1, 1)])
        confs = dets[:, 4 + self.is_obb]

        inds_low = confs > self.min_conf
        inds_high = confs < self.det_thresh
        inds_second = np.logical_and(
            inds_low, inds_high
        )  # self.det_thresh > score > 0.1, for second matching
        dets_second = dets[inds_second]  # detections for second matching
        remain_inds = confs > self.det_thresh
        dets = dets[remain_inds]

        # get predicted locations from existing trackers.
        trks = np.zeros((len(self.active_tracks), 5 + self.is_obb))
        to_del = []
        ret = []
        for t, trk in enumerate(trks):
            pos = self.active_tracks[t].predict()[0]
            trk[:] = [pos[i] for i in range(4 + self.is_obb)] + [0]
            if np.any(np.isnan(pos)):
                to_del.append(t)
        trks = np.ma.compress_rows(np.ma.masked_invalid(trks))
        for t in reversed(to_del):
            self.active_tracks.pop(t)

        velocities = np.array(
            [
                trk.velocity if trk.velocity is not None else np.array((0, 0))
                for trk in self.active_tracks
            ]
        )
        last_boxes = np.array([trk.last_observation for trk in self.active_tracks])

        k_observations = np.array(
            [
                k_previous_obs(
                    trk.observations, trk.age, self.delta_t, is_obb=self.is_obb
                )
                for trk in self.active_tracks
            ]
        )

        """
            First round of association
        """
        matched, unmatched_dets, unmatched_trks = associate(
            dets[:, 0 : 5 + self.is_obb],
            trks,
            self.asso_func,
            self.asso_threshold,
            velocities,
            k_observations,
            self.inertia,
            w,
            h,
        )
        for m in matched:
            self.active_tracks[m[1]].update(
                dets[m[0], :-2], dets[m[0], -2], dets[m[0], -1]
            )

        """
            Second round of associaton by OCR
        """
        # BYTE association
        if self.use_byte and len(dets_second) > 0 and unmatched_trks.shape[0] > 0:
            u_trks = trks[unmatched_trks]
            iou_left = self.asso_func(
                dets_second, u_trks
            )  # iou between low score detections and unmatched tracks
            iou_left = np.array(iou_left)
            if iou_left.max() > self.asso_threshold:
                """
                NOTE: by using a lower threshold, e.g., self.asso_threshold - 0.1, you may
                get a higher performance especially on MOT17/MOT20 datasets. But we keep it
                uniform here for simplicity
                """
                matched_indices = linear_assignment(-iou_left)
                to_remove_trk_indices = []
                for m in matched_indices:
                    det_ind, trk_ind = m[0], unmatched_trks[m[1]]
                    if iou_left[m[0], m[1]] < self.asso_threshold:
                        continue
                    self.active_tracks[trk_ind].update(
                        dets_second[det_ind, :-2],
                        dets_second[det_ind, -2],
                        dets_second[det_ind, -1],
                    )
                    to_remove_trk_indices.append(trk_ind)
                unmatched_trks = np.setdiff1d(
                    unmatched_trks, np.array(to_remove_trk_indices)
                )

        if unmatched_dets.shape[0] > 0 and unmatched_trks.shape[0] > 0:
            left_dets = dets[unmatched_dets]
            left_trks = last_boxes[unmatched_trks]
            iou_left = self.asso_func(left_dets, left_trks)
            iou_left = np.array(iou_left)
            if iou_left.max() > self.asso_threshold:
                """
                NOTE: by using a lower threshold, e.g., self.asso_threshold - 0.1, you may
                get a higher performance especially on MOT17/MOT20 datasets. But we keep it
                uniform here for simplicity
                """
                rematched_indices = linear_assignment(-iou_left)
                to_remove_det_indices = []
                to_remove_trk_indices = []
                for m in rematched_indices:
                    det_ind, trk_ind = unmatched_dets[m[0]], unmatched_trks[m[1]]
                    if iou_left[m[0], m[1]] < self.asso_threshold:
                        continue
                    self.active_tracks[trk_ind].update(
                        dets[det_ind, :-2], dets[det_ind, -2], dets[det_ind, -1]
                    )
                    to_remove_det_indices.append(det_ind)
                    to_remove_trk_indices.append(trk_ind)
                unmatched_dets = np.setdiff1d(
                    unmatched_dets, np.array(to_remove_det_indices)
                )
                unmatched_trks = np.setdiff1d(
                    unmatched_trks, np.array(to_remove_trk_indices)
                )

        for m in unmatched_trks:
            self.active_tracks[m].update(None, None, None)

        # create and initialise new trackers for unmatched detections
        for i in unmatched_dets:
            if self.is_obb:
                trk = KalmanBoxTrackerOBB(
                    dets[i, :-2],
                    dets[i, -2],
                    dets[i, -1],
                    delta_t=self.delta_t,
                    Q_xy_scaling=self.Q_xy_scaling,
                    Q_a_scaling=self.Q_s_scaling,
                    max_obs=self.max_obs,
                )
            else:
                trk = KalmanBoxTracker(
                    dets[i, :5],
                    dets[i, 5],
                    dets[i, 6],
                    delta_t=self.delta_t,
                    Q_xy_scaling=self.Q_xy_scaling,
                    Q_s_scaling=self.Q_s_scaling,
                    max_obs=self.max_obs,
                )
            self.active_tracks.append(trk)
        i = len(self.active_tracks)
        for trk in reversed(self.active_tracks):
            if trk.last_observation.sum() < 0:
                d = trk.get_state()[0]
            else:
                """
                this is optional to use the recent observation or the kalman filter prediction,
                we didn't notice significant difference here
                """
                d = trk.last_observation[: 4 + self.is_obb]
            if (trk.time_since_update < 1) and (
                trk.hit_streak >= self.min_hits or self.frame_count <= self.min_hits
            ):
                # +1 as MOT benchmark requires positive
                ret.append(
                    np.concatenate(
                        (d, [trk.id + 1], [trk.conf], [trk.cls], [trk.det_ind])
                    ).reshape(1, -1)
                )
            i -= 1
            # remove dead tracklet
            if trk.time_since_update > self.max_age:
                self.active_tracks.pop(i)
        if len(ret) > 0:
            return np.concatenate(ret)
        return np.array([])

update(dets, img, embs=None)

Requires: this method must be called once for each frame even with empty detections (use np.empty((0, 5)) for frames without detections). Returns the a similar array, where the last column is the object ID. NOTE: The number of objects returned may differ from the number of detections provided.

Source code in boxmot/trackers/ocsort/ocsort.py
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
@BaseTracker.setup_decorator
@BaseTracker.per_class_decorator
def update(
    self, dets: np.ndarray, img: np.ndarray, embs: np.ndarray = None
) -> np.ndarray:
    """
    Params:
      dets - a numpy array of detections in the format [[x1,y1,x2,y2,score],[x1,y1,x2,y2,score],...]
    Requires: this method must be called once for each frame even with empty detections
    (use np.empty((0, 5)) for frames without detections).
    Returns the a similar array, where the last column is the object ID.
    NOTE: The number of objects returned may differ from the number of detections provided.
    """

    self.check_inputs(dets, img)

    self.frame_count += 1
    h, w = img.shape[0:2]

    dets = np.hstack([dets, np.arange(len(dets)).reshape(-1, 1)])
    confs = dets[:, 4 + self.is_obb]

    inds_low = confs > self.min_conf
    inds_high = confs < self.det_thresh
    inds_second = np.logical_and(
        inds_low, inds_high
    )  # self.det_thresh > score > 0.1, for second matching
    dets_second = dets[inds_second]  # detections for second matching
    remain_inds = confs > self.det_thresh
    dets = dets[remain_inds]

    # get predicted locations from existing trackers.
    trks = np.zeros((len(self.active_tracks), 5 + self.is_obb))
    to_del = []
    ret = []
    for t, trk in enumerate(trks):
        pos = self.active_tracks[t].predict()[0]
        trk[:] = [pos[i] for i in range(4 + self.is_obb)] + [0]
        if np.any(np.isnan(pos)):
            to_del.append(t)
    trks = np.ma.compress_rows(np.ma.masked_invalid(trks))
    for t in reversed(to_del):
        self.active_tracks.pop(t)

    velocities = np.array(
        [
            trk.velocity if trk.velocity is not None else np.array((0, 0))
            for trk in self.active_tracks
        ]
    )
    last_boxes = np.array([trk.last_observation for trk in self.active_tracks])

    k_observations = np.array(
        [
            k_previous_obs(
                trk.observations, trk.age, self.delta_t, is_obb=self.is_obb
            )
            for trk in self.active_tracks
        ]
    )

    """
        First round of association
    """
    matched, unmatched_dets, unmatched_trks = associate(
        dets[:, 0 : 5 + self.is_obb],
        trks,
        self.asso_func,
        self.asso_threshold,
        velocities,
        k_observations,
        self.inertia,
        w,
        h,
    )
    for m in matched:
        self.active_tracks[m[1]].update(
            dets[m[0], :-2], dets[m[0], -2], dets[m[0], -1]
        )

    """
        Second round of associaton by OCR
    """
    # BYTE association
    if self.use_byte and len(dets_second) > 0 and unmatched_trks.shape[0] > 0:
        u_trks = trks[unmatched_trks]
        iou_left = self.asso_func(
            dets_second, u_trks
        )  # iou between low score detections and unmatched tracks
        iou_left = np.array(iou_left)
        if iou_left.max() > self.asso_threshold:
            """
            NOTE: by using a lower threshold, e.g., self.asso_threshold - 0.1, you may
            get a higher performance especially on MOT17/MOT20 datasets. But we keep it
            uniform here for simplicity
            """
            matched_indices = linear_assignment(-iou_left)
            to_remove_trk_indices = []
            for m in matched_indices:
                det_ind, trk_ind = m[0], unmatched_trks[m[1]]
                if iou_left[m[0], m[1]] < self.asso_threshold:
                    continue
                self.active_tracks[trk_ind].update(
                    dets_second[det_ind, :-2],
                    dets_second[det_ind, -2],
                    dets_second[det_ind, -1],
                )
                to_remove_trk_indices.append(trk_ind)
            unmatched_trks = np.setdiff1d(
                unmatched_trks, np.array(to_remove_trk_indices)
            )

    if unmatched_dets.shape[0] > 0 and unmatched_trks.shape[0] > 0:
        left_dets = dets[unmatched_dets]
        left_trks = last_boxes[unmatched_trks]
        iou_left = self.asso_func(left_dets, left_trks)
        iou_left = np.array(iou_left)
        if iou_left.max() > self.asso_threshold:
            """
            NOTE: by using a lower threshold, e.g., self.asso_threshold - 0.1, you may
            get a higher performance especially on MOT17/MOT20 datasets. But we keep it
            uniform here for simplicity
            """
            rematched_indices = linear_assignment(-iou_left)
            to_remove_det_indices = []
            to_remove_trk_indices = []
            for m in rematched_indices:
                det_ind, trk_ind = unmatched_dets[m[0]], unmatched_trks[m[1]]
                if iou_left[m[0], m[1]] < self.asso_threshold:
                    continue
                self.active_tracks[trk_ind].update(
                    dets[det_ind, :-2], dets[det_ind, -2], dets[det_ind, -1]
                )
                to_remove_det_indices.append(det_ind)
                to_remove_trk_indices.append(trk_ind)
            unmatched_dets = np.setdiff1d(
                unmatched_dets, np.array(to_remove_det_indices)
            )
            unmatched_trks = np.setdiff1d(
                unmatched_trks, np.array(to_remove_trk_indices)
            )

    for m in unmatched_trks:
        self.active_tracks[m].update(None, None, None)

    # create and initialise new trackers for unmatched detections
    for i in unmatched_dets:
        if self.is_obb:
            trk = KalmanBoxTrackerOBB(
                dets[i, :-2],
                dets[i, -2],
                dets[i, -1],
                delta_t=self.delta_t,
                Q_xy_scaling=self.Q_xy_scaling,
                Q_a_scaling=self.Q_s_scaling,
                max_obs=self.max_obs,
            )
        else:
            trk = KalmanBoxTracker(
                dets[i, :5],
                dets[i, 5],
                dets[i, 6],
                delta_t=self.delta_t,
                Q_xy_scaling=self.Q_xy_scaling,
                Q_s_scaling=self.Q_s_scaling,
                max_obs=self.max_obs,
            )
        self.active_tracks.append(trk)
    i = len(self.active_tracks)
    for trk in reversed(self.active_tracks):
        if trk.last_observation.sum() < 0:
            d = trk.get_state()[0]
        else:
            """
            this is optional to use the recent observation or the kalman filter prediction,
            we didn't notice significant difference here
            """
            d = trk.last_observation[: 4 + self.is_obb]
        if (trk.time_since_update < 1) and (
            trk.hit_streak >= self.min_hits or self.frame_count <= self.min_hits
        ):
            # +1 as MOT benchmark requires positive
            ret.append(
                np.concatenate(
                    (d, [trk.id + 1], [trk.conf], [trk.cls], [trk.det_ind])
                ).reshape(1, -1)
            )
        i -= 1
        # remove dead tracklet
        if trk.time_since_update > self.max_age:
            self.active_tracks.pop(i)
    if len(ret) > 0:
        return np.concatenate(ret)
    return np.array([])