Skip to main content

Guangzhou, China

YOLOv8 Nightshift

Dataset

Teledyne FLIR Free ADAS Thermal Dataset v2: The Teledyne FLIR free starter thermal dataset provides fully annotated thermal and visible spectrum frames for development of object detection neural networks. This data was constructed to encourage research on visible + thermal spectrum sensor fusion algorithms ("RGBT") in order to advance the safety of autonomous vehicles. A total of 26,442 fully-annotated frames are included with 15 different object classes.

Baseline Model: Baseline accuracy for object detection was established using the YOLOX-m neural network designed for 640 X 640 images. Both the RGB and thermal detectors were pre-trained on MSCOCO data (YOLOX: Exceeding YOLO Series in 2021 and YOLOX). The base neural networks were trained on the training set data provided in this dataset and tested on the video test data also provided in this dataset.

from glob import glob
import json
import matplotlib.pyplot as plt
import numpy as np
import os
import shutil
from tqdm import tqdm

Dataset Exploration

# read in dataset
images_themal = glob('./datasets/video_thermal_test/images/*.jpg')
images_rgb = glob('./datasets/video_rgb_test/images/*.jpg')
print(len(images_themal), len(images_rgb))

3749 3749

# plot multiple random thermal images
ran_gen = np.random.default_rng()

plt.figure(figsize=(16, 14))
plt.suptitle('Thermal Images')
for i in range(12):
ax = plt.subplot(4, 4, i+1)
random_index = ran_gen.integers(low=0, high=3748, size=1)
i = random_index[0]
img_loc = images_themal[i]
img_title = 'video: ' + images_themal[i][-52:-35]+'\n'+ 'frame: ' + images_themal[i][-28:-22]+'\n'+ 'id: ' + images_themal[i][-21:-4]
image = plt.imread(img_loc)
plt.imshow(image, cmap=plt.cm.binary)
plt.title(img_title, fontsize='small')
plt.axis(False)

Training the YOLOv8 Model (RGB)

# plot multiple random rgb images
ran_gen = np.random.default_rng()

plt.figure(figsize=(16, 14))
plt.suptitle('RGB Images')
for i in range(12):
ax = plt.subplot(4, 4, i+1)
random_index = ran_gen.integers(low=0, high=3748, size=1)
i = random_index[0]
img_loc = images_rgb[i]
img_title = 'video: ' + images_rgb[i][-52:-35]+'\n'+ 'frame: ' + images_rgb[i][-28:-22]+'\n'+ 'id: ' + images_rgb[i][-21:-4]
image = plt.imread(img_loc)
plt.imshow(image, cmap=plt.cm.binary)
plt.title(img_title, fontsize='small')
plt.axis(False)

Training the YOLOv8 Model (RGB)

Label Conversion JSON2YOLO

  • "file_name": "data/video-BzZspxAweF8AnKhWK-frame-000745-SSCRtAHcFjphNPczJ.jpg", -> "file_name": "video-BzZspxAweF8AnKhWK-frame-000745-SSCRtAHcFjphNPczJ.jpg",

YOLOv8 expects all images to be located in an images dir and the txt format annotation in a labels folder next to it. The dataset was using a dirname of data for all images and had COCO JSON annotations. I renamed the folder, created the missing one and removed the "data/" from all the filenames in the JSON file. Now I am able to run a conversion:

def make_folders(output_path):
if os.path.exists(output_path):
shutil.rmtree(output_path)
os.makedirs(output_path)
return output_path


def convert_bbox_coco2yolo(img_width, img_height, bbox):
"""
Convert bounding box from COCO format to YOLO format

Parameters
----------
img_width : int
width of image
img_height : int
height of image
bbox : list[int]
bounding box annotation in COCO format:
[top left x position, top left y position, width, height]

Returns
-------
list[float]
bounding box annotation in YOLO format:
[x_center_rel, y_center_rel, width_rel, height_rel]
"""

# YOLO bounding box format: [x_center, y_center, width, height]
# (float values relative to width and height of image)
x_tl, y_tl, w, h = bbox

dw = 1.0 / img_width
dh = 1.0 / img_height

x_center = x_tl + w / 2.0
y_center = y_tl + h / 2.0

x = x_center * dw
y = y_center * dh
w = w * dw
h = h * dh

return [x, y, w, h]
def convert_coco_json_to_yolo_txt(output_path, json_file):

path = make_folders(output_path)

with open(json_file) as f:
json_data = json.load(f)

# write _darknet.labels, which holds names of all classes (one class per line)
label_file = os.path.join(output_path, "_darknet.labels")
with open(label_file, "w") as f:
for category in tqdm(json_data["categories"], desc="Categories"):
category_name = category["name"]
f.write(f"{category_name}\n")

for image in tqdm(json_data["images"], desc="Annotation txt for each iamge"):
img_id = image["id"]
img_name = image["file_name"]
img_width = image["width"]
img_height = image["height"]

anno_in_image = [anno for anno in json_data["annotations"] if anno["image_id"] == img_id]
anno_txt = os.path.join(output_path, img_name.split(".")[0] + ".txt")
with open(anno_txt, "w") as f:
for anno in anno_in_image:
category = anno["category_id"]
bbox_COCO = anno["bbox"]
x, y, w, h = convert_bbox_coco2yolo(img_width, img_height, bbox_COCO)
f.write(f"{category} {x:.6f} {y:.6f} {w:.6f} {h:.6f}\n")

print("Converting COCO Json to YOLO txt finished!")

Video RGB Test Dataset

convert_coco_json_to_yolo_txt("./datasets/video_rgb_test/labels", "./datasets/video_rgb_test/coco.json")

Categories: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████| 16/16 [00:00, 253241.00it/s] Annotation txt for each iamge: 100%|███████████████████████████████████████████████████████████████████████████████| 3749/3749 [00:38, 98.23it/s]

Converting COCO Json to YOLO txt finished!

Video Thermal Test Dataset

convert_coco_json_to_yolo_txt("./datasets/video_thermal_test/labels", "./datasets/video_thermal_test/coco.json")

Categories: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████| 16/16 [00:00, 430185.03it/s] Annotation txt for each iamge: 100%|██████████████████████████████████████████████████████████████████████████████| 3749/3749 [00:25, 145.99it/s]

Converting COCO Json to YOLO txt finished!

Images RGB Train Dataset

convert_coco_json_to_yolo_txt("./datasets/images_rgb_train/labels", "./datasets/images_rgb_train/coco.json")

Categories: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████| 16/16 [00:00, 175218.97it/s]1060 Annotation txt for each iamge: 100%|█████████████████████████████████████████████████████████████████████████████| 10318/10318 [03:18, 51.86it/s]

Converting COCO Json to YOLO txt finished!

Images Thermal Train Dataset

convert_coco_json_to_yolo_txt("./datasets/images_thermal_train/labels", "./datasets/images_thermal_train/coco.json")

Categories: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████| 16/16 [00:00, 394758.02it/s] Annotation txt for each iamge: 100%|█████████████████████████████████████████████████████████████████████████████| 10742/10742 [03:07, 57.44it/s]

Converting COCO Json to YOLO txt finished!

Images RGB Val Dataset

convert_coco_json_to_yolo_txt("./datasets/images_rgb_val/labels", "./datasets/images_rgb_val/coco.json")

Categories: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████| 16/16 [00:00, 281970.02it/s] Annotation txt for each iamge: 100%|██████████████████████████████████████████████████████████████████████████████| 1085/1085 [00:02, 452.60it/s]

Converting COCO Json to YOLO txt finished!

Images Thermal Val Dataset

convert_coco_json_to_yolo_txt("./datasets/images_thermal_val/labels", "./datasets/images_thermal_val/coco.json")

Categories: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████| 16/16 [00:00, 377016.09it/s] Annotation txt for each iamge: 100%|██████████████████████████████████████████████████████████████████████████████| 1144/1144 [00:02, 472.82it/s]

Converting COCO Json to YOLO txt finished!

Dataset Configuration

The coco.yaml file that came with the dataset contained all 80 COCO classes - I removed all classes that were not part of the annotation and assigned new category_id's from 0-15 for the 16 categories. If you want to use the configuration files below to train your YOLO model you need to replace the annotations accordingly - check the ./config folder.

  • config/data_thermal.yaml
train: ../images_thermal_train/images
val: ../images_thermal_val/images
test: ../video_thermal_test/images

nc: 16
names: [
'person',
'bike',
'car',
'motor',
'bus',
'train',
'truck',
'light',
'hydrant',
'sign',
'dog',
'deer',
'skateboard',
'stroller',
'scooter',
'other vehicle'
]
  • config/data_rgb.yaml
train: /opt/app/datasets/images_rgb_train/images
val: /opt/app/datasets/images_rgb_val/images
test: /opt/app/datasets/video_rgb_test/images

nc: 16
names: [
'person',
'bike',
'car',
'motor',
'bus',
'train',
'truck',
'light',
'hydrant',
'sign',
'dog',
'deer',
'skateboard',
'stroller',
'scooter',
'other vehicle'
]
f_rgb = open('./config/images_rgb_val_coco.json') # =>'./datasets/images_rgb_val/coco.json'
f_thermal = open('./config/images_thermal_val_coco.json') # => './datasets/images_thermal_val/coco.json'
# returns JSON object as a dictionary
data_rgb_val = json.load(f_rgb)
data_thermal_val = json.load(f_thermal)
# closing files
f_rgb.close()
f_thermal.close()
f_rgb = open('./config/images_rgb_train_coco.json') # => './datasets/images_rgb_train/coco.json'
f_thermal = open('./config/images_thermal_train_coco.json') # => './datasets/images_thermal_train/coco.json'
# returns JSON object as a dictionary
data_rgb_train = json.load(f_rgb)
data_thermal_train = json.load(f_thermal)
# closing files
f_rgb.close()
f_thermal.close()
f_rgb = open('./config/video_rgb_test_coco.json') # => './datasets/video_rgb_test/coco.json'
f_thermal = open('./config/video_thermal_test_coco.json') # => './datasets/video_thermal_test/coco.json'
# returns JSON object as a dictionary
data_rgb_test = json.load(f_rgb)
data_thermal_test = json.load(f_thermal)
# closing files
f_rgb.close()
f_thermal.close()
# Iterating through the json list - check that all annotations are between 0 and 15

categories = []

for detection in data_rgb_val['annotations']:
categories.append(detection['category_id'])

print(np.unique(categories))

[ 0 1 2 3 5 6 8 9 12 13 15]

categories = []

for detection in data_thermal_val['annotations']:
categories.append(detection['category_id'])

print(np.unique(categories))

[ 0 1 2 3 5 6 8 9 12 13 15]

categories = []

for detection in data_rgb_train['annotations']:
categories.append(detection['category_id'])

print(np.unique(categories))

[ 0 1 2 3 5 6 8 9 12 13 14 15]

categories = []

for detection in data_thermal_train['annotations']:
categories.append(detection['category_id'])

print(np.unique(categories))

[ 0 1 2 3 5 6 8 9 10 11 12 13 14 15]

categories = []

for detection in data_rgb_test['annotations']:
categories.append(detection['category_id'])

print(np.unique(categories))

[ 0 1 2 3 6 7 8 9 10 15]

categories = []

for detection in data_thermal_test['annotations']:
categories.append(detection['category_id'])

print(np.unique(categories))

[ 0 1 2 3 6 8 9 10 15]

Training the YOLOv8 Model (RGB / IR)

# missing yolo dep
!pip install lapx>=0.5.2

WARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv 

import cv2 as cv
from glob import glob
import matplotlib.pyplot as plt
import os
import random
from ultralytics import YOLO

YOLOv8 Nano (RGB)

# unzip downloaded dataset to `./datasets`
dataset_rgb = 'datasets/data_rgb.yaml'

# load a model
backbone_nano = YOLO("yolov8n.yaml") # build a new model from scratch

Model Training

import json

# Opening JSON file
f = open('./datasets/images_rgb_train/coco.json')

# returns JSON object as
# a dictionary
data = json.load(f)
# Iterating through the json
list = []
for i in data['annotations']:
list.append(i['category_id'])
len(list)

169174

import numpy as np

print(np.unique(list))

# Closing file
f.close()

[ 0 1 2 3 5 6 8 9 12 13 14 15]

# Train the model
results_n = backbone_nano.train(data=dataset_rgb, epochs=20)

20 epochs completed in 1.521 hours.

EpochGPU_membox_losscls_lossdfl_lossInstancesSize
10/203.07G1.8291.3751.254328
ClassImagesInstancesPRmAP50mAP50-95
all1085169090.5250.160.1560.077
EpochGPU_membox_losscls_lossdfl_lossInstancesSize
20/202.63G1.5951.1171.146223
ClassImagesInstancesPRmAP50mAP50-95
all1085169090.5790.1850.1960.102

YOLOv8n summary (fused): 168 layers, 3008768 parameters, 0 gradients

ClassImagesInstancesPRmAP50mAP50-95
all1085169090.5780.1860.1960.102
person108532230.5010.3750.3890.167
bike10851930.2010.1970.1010.0438
car108572850.6620.5750.6210.397
motor1085770.4180.260.2980.164
train10851830.4580.2460.2530.153
truck108521900.4580.1980.2060.0686
hydrant10851260.7440.02320.07970.0265
sign108535810.5640.1430.1710.0824
skateboard108541000
stroller10857100.0180.0144
other vehicle1085400.3480.0250.02310.00793

Speed: 0.2ms preprocess, 4.2ms inference, 0.0ms loss, 0.6ms postprocess per image

Training the YOLOv8 Model (RGB)

plt.figure(figsize=(24, 14))

im_batch0_labels = plt.imread('./assets/backbone_rgb_nano_val_batch0_labels.webp')
im_batch0_pred = plt.imread('./assets/backbone_rgb_nano_val_batch0_pred.webp')
im_batch1_labels = plt.imread('./assets/backbone_rgb_nano_val_batch1_labels.webp')
im_batch1_pred = plt.imread('./assets/backbone_rgb_nano_val_batch1_pred.webp')
im_batch2_labels = plt.imread('./assets/backbone_rgb_nano_val_batch2_labels.webp')
im_batch2_pred = plt.imread('./assets/backbone_rgb_nano_val_batch2_pred.webp')

ax = plt.subplot(2, 3, 1)
plt.title('batch0_labels')
plt.imshow(im_batch0_labels)
plt.axis("off")

ax = plt.subplot(2, 3, 2)
plt.title('batch1_labels')
plt.imshow(im_batch1_labels)
plt.axis("off")

ax = plt.subplot(2, 3, 3)
plt.title('batch2_labels')
plt.imshow(im_batch2_labels)
plt.axis("off")

ax = plt.subplot(2, 3, 4)
plt.title('batch0_pred')
plt.imshow(im_batch0_pred)
plt.axis("off")

ax = plt.subplot(2, 3, 5)
plt.title('batch1_pred')
plt.imshow(im_batch1_pred)
plt.axis("off")

ax = plt.subplot(2, 3, 6)
plt.title('batch2_pred')
plt.imshow(im_batch2_pred)
plt.axis("off")

(-0.5, 1919.5, 1647.5, -0.5)

Training the YOLOv8 Model (RGB)

Model Evaluation

# Evaluate the model's performance on the validation set
results_n = backbone_nano.val()
ClassImagesInstancesPRmAP50mAP50-95
all1085169090.5780.1850.1980.104
person108532230.5050.3750.3910.167
bike10851930.20.1970.1020.044
car108572850.6630.5740.6210.398
motor1085770.4190.260.30.166
train10851830.4550.2460.2520.155
truck108521900.4580.1970.2050.0686
hydrant10851260.7410.0230.08010.0274
sign108535810.5630.1420.170.0824
skateboard108541000
stroller10857100.03530.023
other vehicle1085400.3550.0250.02310.00793

Speed: 0.3ms preprocess, 5.0ms inference, 0.0ms loss, 0.7ms postprocess per image

Training the YOLOv8 Model (RGB)

# Export the model to ONNX format
# success = backbone_nano.export(imgsz=(640, 480), format='onnx', opset=12, optimize=False, half=False)
# Export to PyTorch format
success = backbone_nano.export(imgsz=640, format='torchscript', optimize=False, half=False, int8=False)
# TorchScript: export success ✅ 1.3s, saved as 'runs/detect/train4/weights/best.torchscript' (11.9 MB)
# pick pre-trained model
n_model = YOLO('runs/detect/train6/weights/best.torchscript')
# read video by index
video = cv.VideoCapture(videos[1])
ret, frame = video.read()

# get video dims
frame_width = int(video.get(3))
frame_height = int(video.get(4))
size = (frame_width, frame_height)

# Define the codec and create VideoWriter object
fourcc = cv.VideoWriter_fourcc(*'DIVX')
out = cv.VideoWriter('./outputs/backbone_nano_rgb.avi', fourcc, 20.0, size)

# read frames
ret = True

while ret:
ret, frame = video.read()

if ret:
# detect & track objects
results = np_model.track(frame, persist=True)

# plot results
composed = results[0].plot()

# save video
out.write(composed)

out.release()
video.release()

YOLOv8 Small (RGB)

# unzip downloaded dataset to `./datasets`
dataset_rgb = 'datasets/data_rgb.yaml'

# load a model
backbone_small = YOLO("yolov8s.yaml") # build a new model from scratch

Model Training

# Train the model
results_s = backbone_small.train(data=dataset_rgb, epochs=20)

20 epochs completed in 2.438 hours.

EpochGPU_membox_losscls_lossdfl_lossInstancesSize
10/204.84G1.5691.0981.195328
ClassImagesInstancesPRmAP50mAP50-95
all1085169090.5960.2110.2450.128
EpochGPU_membox_losscls_lossdfl_lossInstancesSize
20/204.67G1.3670.88791.083223
ClassImagesInstancesPRmAP50mAP50-95
all1085169090.6080.250.2910.158

YOLOv8s summary (fused): 168 layers, 11131776 parameters, 0 gradients

ClassImagesInstancesPRmAP50mAP50-95
all1085169090.5230.2550.2910.159
person108532230.6180.4280.4810.225
bike10851930.2480.3260.2390.121
car108572850.7180.630.6830.454
motor1085770.5660.3380.3820.22
train10851830.5770.3440.4090.276
truck108521900.5930.3180.3360.119
hydrant10851260.80.1750.2930.129
sign108535810.6320.2430.2910.149
skateboard108540000
stroller10857100.06870.0477
other vehicle108540000.01350.00526

Speed: 0.3ms preprocess, 9.6ms inference, 0.0ms loss, 0.5ms postprocess per image

Training the YOLOv8 Model (RGB)

plt.figure(figsize=(24, 14))

im_batch0_labels = plt.imread('./assets/backbone_rgb_small_val_batch0_labels.webp')
im_batch0_pred = plt.imread('./assets/backbone_rgb_small_val_batch0_pred.webp')
im_batch1_labels = plt.imread('./assets/backbone_rgb_small_val_batch1_labels.webp')
im_batch1_pred = plt.imread('./assets/backbone_rgb_small_val_batch1_pred.webp')
im_batch2_labels = plt.imread('./assets/backbone_rgb_small_val_batch2_labels.webp')
im_batch2_pred = plt.imread('./assets/backbone_rgb_small_val_batch2_pred.webp')

ax = plt.subplot(2, 3, 1)
plt.title('batch0_labels')
plt.imshow(im_batch0_labels)
plt.axis("off")

ax = plt.subplot(2, 3, 2)
plt.title('batch1_labels')
plt.imshow(im_batch1_labels)
plt.axis("off")

ax = plt.subplot(2, 3, 3)
plt.title('batch2_labels')
plt.imshow(im_batch2_labels)
plt.axis("off")

ax = plt.subplot(2, 3, 4)
plt.title('batch0_pred')
plt.imshow(im_batch0_pred)
plt.axis("off")

ax = plt.subplot(2, 3, 5)
plt.title('batch1_pred')
plt.imshow(im_batch1_pred)
plt.axis("off")

ax = plt.subplot(2, 3, 6)
plt.title('batch2_pred')
plt.imshow(im_batch2_pred)
plt.axis("off")

(-0.5, 1919.5, 1647.5, -0.5)

Training the YOLOv8 Model (RGB)

Model Evaluation

# Evaluate the model's performance on the validation set
results_s = backbone_small.val()
ClassImagesInstancesPRmAP50mAP50-95
all1085169090.5240.2530.290.159
person108532230.6220.4280.4810.225
bike10851930.2470.3210.2390.121
car108572850.7220.6290.6830.454
motor1085770.5630.3380.3820.219
train10851830.5750.3390.410.276
truck108521900.60.3150.3330.12
hydrant10851260.80.1750.2920.129
sign108535810.6350.2430.2920.149
skateboard108540000
stroller10857100.0690.0479
other vehicle108540000.01360.00526

Speed: 0.4ms preprocess, 10.9ms inference, 0.0ms loss, 0.6ms postprocess per image

Training the YOLOv8 Model (RGB)

# Export the model to ONNX format
# success = backbone_nano.export(imgsz=(640, 480), format='onnx', opset=12, optimize=False, half=False)
# Export to PyTorch format
success = backbone_small.export(imgsz=640, format='torchscript', optimize=False, half=False, int8=False)
TorchScript: export success ✅ 2.1s, saved as 'runs/detect/train5/weights/best.torchscript' (42.9 MB)

YOLOv8 Nano (IR)

# unzip downloaded dataset to `./datasets`
dataset_ir = 'datasets/data_thermal.yaml'

# load a model
backbone_ir_nano = YOLO("yolov8n.yaml") # build a new model from scratch

Model Training

# Train the model
results_ir_n = backbone_ir_nano.train(data=dataset_ir, epochs=20)

20 epochs completed in 1.337 hours.

EpochGPU_membox_losscls_lossdfl_lossInstancesSize
10/203.33G1.7461.2631.211104
ClassImagesInstancesPRmAP50mAP50-95
all1144166880.4660.1860.2260.112
EpochGPU_membox_losscls_lossdfl_lossInstancesSize
20/202.5G1.5181.0161.111102
ClassImagesInstancesPRmAP50mAP50-95
all1144166880.5130.2490.2760.146

YOLOv8n summary (fused): 168 layers, 3008768 parameters, 0 gradients

ClassImagesInstancesPRmAP50mAP50-95
all1144166880.5140.2490.2760.146
person114444700.6280.5550.5940.276
bike11441700.2780.250.2190.11
car114471280.6910.650.710.449
motor1144550.5690.3640.390.19
train11441790.7410.3830.4550.284
truck114420480.4670.2590.2740.105
hydrant1144940.6780.06380.120.0535
sign114424720.5570.20.2550.132
skateboard114430000
stroller114461000
other vehicle1144630.04230.01590.01940.00652

Speed: 0.3ms preprocess, 3.8ms inference, 0.0ms loss, 0.6ms postprocess per image

Training the YOLOv8 Model (RGB)

plt.figure(figsize=(24, 14))

im_batch0_labels = plt.imread('./assets/backbone_ir_nano_val_batch0_labels.webp')
im_batch0_pred = plt.imread('./assets/backbone_ir_nano_val_batch0_pred.webp')
im_batch1_labels = plt.imread('./assets/backbone_ir_nano_val_batch1_labels.webp')
im_batch1_pred = plt.imread('./assets/backbone_ir_nano_val_batch1_pred.webp')
im_batch2_labels = plt.imread('./assets/backbone_ir_nano_val_batch2_labels.webp')
im_batch2_pred = plt.imread('./assets/backbone_ir_nano_val_batch2_pred.webp')

ax = plt.subplot(2, 3, 1)
plt.title('batch0_labels')
plt.imshow(im_batch0_labels)
plt.axis("off")

ax = plt.subplot(2, 3, 2)
plt.title('batch1_labels')
plt.imshow(im_batch1_labels)
plt.axis("off")

ax = plt.subplot(2, 3, 3)
plt.title('batch2_labels')
plt.imshow(im_batch2_labels)
plt.axis("off")

ax = plt.subplot(2, 3, 4)
plt.title('batch0_pred')
plt.imshow(im_batch0_pred)
plt.axis("off")

ax = plt.subplot(2, 3, 5)
plt.title('batch1_pred')
plt.imshow(im_batch1_pred)
plt.axis("off")

ax = plt.subplot(2, 3, 6)
plt.title('batch2_pred')
plt.imshow(im_batch2_pred)
plt.axis("off")

Training the YOLOv8 Model (RGB)

Model Evaluation

# Evaluate the model's performance on the validation set
results_ir_n = backbone_ir_nano.val()
ClassImagesInstancesPRmAP50mAP50-95
all1144166880.5160.2490.2760.146
person114444700.6310.5560.5950.276
bike11441700.2880.2530.2220.111
car114471280.6960.650.7110.449
motor1144550.570.3640.390.189
train11441790.7460.3780.4550.283
truck114420480.4620.2560.2710.104
hydrant1144940.6790.06380.120.0526
sign114424720.5570.1990.2560.132
skateboard114430000
stroller114461000
other vehicle1144630.04250.01590.01930.00637

Speed: 0.3ms preprocess, 4.5ms inference, 0.0ms loss, 0.7ms postprocess per image

Training the YOLOv8 Model (RGB)

# Export the model to ONNX format
# success = backbone_nano.export(imgsz=(640, 480), format='onnx', opset=12, optimize=False, half=False)
# Export to PyTorch format
success = backbone_ir_nano.export(imgsz=640, format='torchscript', optimize=False, half=False, int8=False)
# TorchScript: export success ✅ 1.6s, saved as 'runs/detect/train6/weights/best.torchscript' (12.4 MB)

YOLOv8 Small (IR)

# unzip downloaded dataset to `./datasets`
dataset_ir = 'datasets/data_thermal.yaml'

# load a model
backbone_ir_small = YOLO("yolov8s.yaml") # build a new model from scratch

Model Training

# Train the model
results_ir_s = backbone_ir_small.train(data=dataset_ir, epochs=20)

20 epochs completed in 2.827 hours.

EpochGPU_membox_losscls_lossdfl_lossInstancesSize
10/204.83G1.5081.0181.16104
ClassImagesInstancesPRmAP50mAP50-95
all1144166880.4890.2860.3130.168
EpochGPU_membox_losscls_lossdfl_lossInstancesSize
20/204.67G1.3170.82071.064102
ClassImagesInstancesPRmAP50mAP50-95
all1144166880.5540.3220.3580.2

YOLOv8s summary (fused): 168 layers, 11131776 parameters, 0 gradients

ClassImagesInstancesPRmAP50mAP50-95
all1144166880.5540.3220.3580.2
person114444700.6870.6340.6880.355
bike11441700.3640.3470.3080.174
car114471280.740.7250.7810.527
motor1144550.6080.5090.5520.25
train11441790.6830.4190.5260.358
truck114420480.6010.3850.4150.178
hydrant1144940.6870.1490.2740.147
sign114424720.6080.3130.3620.195
skateboard114430000
stroller114461000
other vehicle1144630.1120.06350.02540.0151

Speed: 0.3ms preprocess, 9.6ms inference, 0.0ms loss, 0.6ms postprocess per image

Training the YOLOv8 Model (RGB)

plt.figure(figsize=(24, 14))

im_batch0_labels = plt.imread('./assets/backbone_ir_small_val_batch0_labels.webp')
im_batch0_pred = plt.imread('./assets/backbone_ir_small_val_batch0_pred.webp')
im_batch1_labels = plt.imread('./assets/backbone_ir_small_val_batch1_labels.webp')
im_batch1_pred = plt.imread('./assets/backbone_ir_small_val_batch1_pred.webp')
im_batch2_labels = plt.imread('./assets/backbone_ir_small_val_batch2_labels.webp')
im_batch2_pred = plt.imread('./assets/backbone_ir_small_val_batch2_pred.webp')

ax = plt.subplot(2, 3, 1)
plt.title('batch0_labels')
plt.imshow(im_batch0_labels)
plt.axis("off")

ax = plt.subplot(2, 3, 2)
plt.title('batch1_labels')
plt.imshow(im_batch1_labels)
plt.axis("off")

ax = plt.subplot(2, 3, 3)
plt.title('batch2_labels')
plt.imshow(im_batch2_labels)
plt.axis("off")

ax = plt.subplot(2, 3, 4)
plt.title('batch0_pred')
plt.imshow(im_batch0_pred)
plt.axis("off")

ax = plt.subplot(2, 3, 5)
plt.title('batch1_pred')
plt.imshow(im_batch1_pred)
plt.axis("off")

ax = plt.subplot(2, 3, 6)
plt.title('batch2_pred')
plt.imshow(im_batch2_pred)
plt.axis("off")

Training the YOLOv8 Model (RGB)

Model Evaluation

# Evaluate the model's performance on the validation set
results_ir_s = backbone_ir_small.val()
ClassImagesInstancesPRmAP50mAP50-95
all1144166880.5550.3220.3580.2
person114444700.6910.6320.6870.356
bike11441700.3690.3530.3090.174
car114471280.7430.7250.7810.527
motor1144550.5930.5040.5510.251
train11441790.6830.4190.5270.361
truck114420480.6080.3860.4180.178
hydrant1144940.6950.1490.2740.148
sign114424720.6140.3130.3620.195
skateboard114430000
stroller114461000
other vehicle1144630.1120.06350.02540.0151

Training the YOLOv8 Model (RGB)

# Export the model to ONNX format
# success = backbone_nano.export(imgsz=(640, 480), format='onnx', opset=12, optimize=False, half=False)
# Export to PyTorch format
success = backbone_ir_small.export(imgsz=640, format='torchscript', optimize=False, half=False, int8=False)
# TorchScript: export success ✅ 1.6s, saved as 'runs/detect/train6/weights/best.torchscript' (12.4 MB)

Training the YOLOv8 Mixed Model (RGB + IR)

# missing yolo dep
!pip install lapx>=0.5.2
import cv2 as cv
from glob import glob
import matplotlib.pyplot as plt
import os
import random
from ultralytics import YOLO

YOLOv8 Nano (RGB+IR)

# unzip downloaded dataset to `./datasets`
dataset_combined = 'datasets/data_combined.yaml'

# load a model
backbone_nano = YOLO("yolov8n.yaml") # build a new model from scratch

Model Training

# Train the model
results_n = backbone_nano.train(data=dataset_combined, epochs=20)

20 epochs completed in 2.531 hours.

EpochGPU_membox_losscls_lossdfl_lossInstances
10/203.18G1.6551.2081.154179
ClassImagesInstancesPRmAP50
all2229335970.5450.2080.226
EpochGPU_membox_losscls_lossdfl_lossInstances
20/202.6G1.4580.98841.07352
ClassImagesInstancesPRmAP50
all2229335970.520.2420.272

YOLOv8n summary (fused): 168 layers, 3008768 parameters, 0 gradients

ClassImagesInstancesPRmAP50mAP50-95
all2229335970.5220.2410.2720.147
person222976930.6240.480.5260.253
bike22293630.2880.270.2390.128
car2229144130.6890.6440.6960.46
motor22291320.6110.3640.4190.193
train22293620.6750.3440.4250.281
truck222942380.4920.2350.2550.0951
hydrant22292200.6880.09550.1620.0659
sign222960530.5920.2050.2520.13
skateboard222970000
stroller2229131000
other vehicle22291030.07980.01940.02320.00977

Speed: 0.2ms preprocess, 4.0ms inference, 0.0ms loss, 0.6ms postprocess per image

Training the YOLOv8 Model (RGB)

plt.figure(figsize=(24, 14))

im_batch0_labels = plt.imread('./assets/backbone_combined_nano_val_batch0_labels.webp')
im_batch0_pred = plt.imread('./assets/backbone_combined_nano_val_batch0_pred.webp')
im_batch1_labels = plt.imread('./assets/backbone_combined_nano_val_batch1_labels.webp')
im_batch1_pred = plt.imread('./assets/backbone_combined_nano_val_batch1_pred.webp')
im_batch2_labels = plt.imread('./assets/backbone_combined_nano_val_batch2_labels.webp')
im_batch2_pred = plt.imread('./assets/backbone_combined_nano_val_batch2_pred.webp')

ax = plt.subplot(2, 3, 1)
plt.title('batch0_labels')
plt.imshow(im_batch0_labels)
plt.axis("off")

ax = plt.subplot(2, 3, 2)
plt.title('batch1_labels')
plt.imshow(im_batch1_labels)
plt.axis("off")

ax = plt.subplot(2, 3, 3)
plt.title('batch2_labels')
plt.imshow(im_batch2_labels)
plt.axis("off")

ax = plt.subplot(2, 3, 4)
plt.title('batch0_pred')
plt.imshow(im_batch0_pred)
plt.axis("off")

ax = plt.subplot(2, 3, 5)
plt.title('batch1_pred')
plt.imshow(im_batch1_pred)
plt.axis("off")

ax = plt.subplot(2, 3, 6)
plt.title('batch2_pred')
plt.imshow(im_batch2_pred)
plt.axis("off")

Training the YOLOv8 Model (RGB)

Model Evaluation

# Evaluate the model's performance on the validation set
results_n = backbone_nano.val()

Ultralytics YOLOv8.0.173 🚀 Python-3.10.11 torch-2.0.1 CUDA:0 (NVIDIA GeForce GTX 1060 6GB, 6070MiB) YOLOv8n summary (fused): 168 layers, 3008768 parameters, 0 gradients val: Scanning /opt/app/datasets/images_combined_val/labels.cache... 2229 images, 32 backgrounds, 0 corrupt: 100%|██████████| 2229/2229 , ?it/s] Class Images Instances Box(P R mAP50 mAP50-95): 100%|██████████| 140/140 [00:31, 4.47it/s] all 2229 33597 0.519 0.242 0.272 0.147 person 2229 7693 0.623 0.482 0.527 0.253 bike 2229 363 0.281 0.267 0.239 0.13 car 2229 14413 0.686 0.646 0.697 0.461 motor 2229 132 0.607 0.364 0.417 0.193 train 2229 362 0.666 0.348 0.424 0.279 truck 2229 4238 0.493 0.237 0.256 0.096 hydrant 2229 220 0.685 0.0955 0.162 0.0676 sign 2229 6053 0.588 0.205 0.251 0.131 skateboard 2229 7 0 0 0 0 stroller 2229 13 1 0 0 0 other vehicle 2229 103 0.0795 0.0194 0.023 0.00967 Speed: 0.3ms preprocess, 4.5ms inference, 0.0ms loss, 0.6ms postprocess per image Results saved to runs/detect/val5

ClassImagesInstancesPRmAP50mAP50-95
all2229335970.5190.2420.2720.147
person222976930.6230.4820.5270.253
bike22293630.2810.2670.2390.13
car2229144130.6860.6460.6970.461
motor22291320.6070.3640.4170.193
train22293620.6660.3480.4240.279
truck222942380.4930.2370.2560.096
hydrant22292200.6850.09550.1620.0676
sign222960530.5880.2050.2510.131
skateboard222970000
stroller2229131000
other vehicle22291030.07950.01940.0230.00967

Speed: 0.3ms preprocess, 4.5ms inference, 0.0ms loss, 0.6ms postprocess per image

Training the YOLOv8 Model (RGB)

# Export the model to ONNX format
# success = backbone_nano.export(imgsz=(640, 480), format='onnx', opset=12, optimize=False, half=False)
# Export to PyTorch format
success = backbone_nano.export(imgsz=640, format='torchscript', optimize=False, half=False, int8=False)
# TorchScript: export success ✅ 1.2s, saved as 'runs/detect/train10/weights/best.torchscript' (11.9 MB)

Ultralytics YOLOv8.0.173 🚀 Python-3.10.11 torch-2.0.1 CPU (Intel Core(TM) i7-7700 3.60GHz)

PyTorch: starting from 'runs/detect/train10/weights/best.pt' with input shape (1, 3, 640, 640) BCHW and output shape(s) (1, 20, 8400) (6.0 MB)

TorchScript: starting export with torch 2.0.1... TorchScript: export success ✅ 1.2s, saved as 'runs/detect/train10/weights/best.torchscript' (11.9 MB)

Export complete (2.5s) Results saved to /opt/app/runs/detect/train10/weights Predict: yolo predict task=detect model=runs/detect/train10/weights/best.torchscript imgsz=640
Validate: yolo val task=detect model=runs/detect/train10/weights/best.torchscript imgsz=640 data=datasets/data_combined.yaml
Visualize: https://netron.app

YOLOv8 Small (RGB + IR)

# unzip downloaded dataset to `./datasets`
dataset_combined = 'datasets/data_combined.yaml'

# load a model
backbone_small = YOLO("yolov8s.yaml") # build a new model from scratch

Model Training

# Train the model
results_s = backbone_small.train(data=dataset_combined, epochs=20)

20 epochs completed in 4.965 hours.

EpochGPU_membox_losscls_lossdfl_lossInstances
10/204.88G1.4450.99151.085179
ClassImagesInstancesPRmAP50
all2229335970.5480.2770.314
EpochGPU_membox_losscls_lossdfl_lossInstances
20/204.86G1.2650.79921.01152
ClassImagesInstancesPRmAP50
all2229335970.6510.3240.36

YOLOv8s summary (fused): 168 layers, 11131776 parameters, 0 gradients

ClassImagesInstancesPRmAP50mAP50-95
all2229335970.6520.3230.360.204
person222976930.6870.5660.6280.325
bike22293630.350.3830.3530.199
car2229144130.7350.7120.7640.528
motor22291320.6450.4390.5130.268
train22293620.7030.4780.5550.383
truck222942380.5890.3890.4040.167
hydrant22292200.6960.1770.2660.121
sign222960530.620.3290.3720.205
skateboard222971000
stroller222913100.03860.0297
other vehicle22291030.150.07770.06160.0202

Speed: 0.2ms preprocess, 9.1ms inference, 0.0ms loss, 0.5ms postprocess per image

Training the YOLOv8 Model (RGB)

plt.figure(figsize=(24, 14))

im_batch0_labels = plt.imread('./assets/backbone_combined_small_val_batch0_labels.webp')
im_batch0_pred = plt.imread('./assets/backbone_combined_small_val_batch0_pred.webp')
im_batch1_labels = plt.imread('./assets/backbone_combined_small_val_batch1_labels.webp')
im_batch1_pred = plt.imread('./assets/backbone_combined_small_val_batch1_pred.webp')
im_batch2_labels = plt.imread('./assets/backbone_combined_small_val_batch2_labels.webp')
im_batch2_pred = plt.imread('./assets/backbone_combined_small_val_batch2_pred.webp')

ax = plt.subplot(2, 3, 1)
plt.title('batch0_labels')
plt.imshow(im_batch0_labels)
plt.axis("off")

ax = plt.subplot(2, 3, 2)
plt.title('batch1_labels')
plt.imshow(im_batch1_labels)
plt.axis("off")

ax = plt.subplot(2, 3, 3)
plt.title('batch2_labels')
plt.imshow(im_batch2_labels)
plt.axis("off")

ax = plt.subplot(2, 3, 4)
plt.title('batch0_pred')
plt.imshow(im_batch0_pred)
plt.axis("off")

ax = plt.subplot(2, 3, 5)
plt.title('batch1_pred')
plt.imshow(im_batch1_pred)
plt.axis("off")

ax = plt.subplot(2, 3, 6)
plt.title('batch2_pred')
plt.imshow(im_batch2_pred)
plt.axis("off")

Training the YOLOv8 Model (RGB)

Model Evaluation

# Evaluate the model's performance on the validation set
results_s = backbone_small.val()

Ultralytics YOLOv8.0.173 🚀 Python-3.10.11 torch-2.0.1 CUDA:0 (NVIDIA GeForce GTX 1060 6GB, 6070MiB) YOLOv8s summary (fused): 168 layers, 11131776 parameters, 0 gradients val: Scanning /opt/app/datasets/images_combined_val/labels.cache... 2229 images, 32 backgrounds, 0 corrupt: 100%|██████████| 2229/2229 , ?it/s] Class Images Instances Box(P R mAP50 mAP50-95): 100%|██████████| 140/140 [00:42, 3.28it/s] all 2229 33597 0.651 0.324 0.359 0.204 person 2229 7693 0.687 0.569 0.63 0.326 bike 2229 363 0.346 0.383 0.351 0.198 car 2229 14413 0.734 0.713 0.764 0.529 motor 2229 132 0.633 0.439 0.514 0.267 train 2229 362 0.707 0.481 0.556 0.384 truck 2229 4238 0.59 0.391 0.407 0.167 hydrant 2229 220 0.697 0.178 0.266 0.122 sign 2229 6053 0.617 0.329 0.371 0.205 skateboard 2229 7 1 0 0 0 stroller 2229 13 1 0 0.0323 0.024 other vehicle 2229 103 0.148 0.0777 0.0618 0.0193 Speed: 0.3ms preprocess, 10.3ms inference, 0.0ms loss, 0.7ms postprocess per image Results saved to runs/detect/val6

ClassImagesInstancesPRmAP50mAP50-95
all2229335970.6510.3240.3590.204
person222976930.6870.5690.630.326
bike22293630.3460.3830.3510.198
car2229144130.7340.7130.7640.529
motor22291320.6330.4390.5140.267
train22293620.7070.4810.5560.384
truck222942380.590.3910.4070.167
hydrant22292200.6970.1780.2660.122
sign222960530.6170.3290.3710.205
skateboard222971000
stroller222913100.03230.024
other vehicle22291030.1480.07770.06180.0193

Speed: 0.3ms preprocess, 10.3ms inference, 0.0ms loss, 0.7ms postprocess per image

Training the YOLOv8 Model (RGB)

# Export the model to ONNX format
# success = backbone_nano.export(imgsz=(640, 480), format='onnx', opset=12, optimize=False, half=False)
# Export to PyTorch format
success = backbone_small.export(imgsz=640, format='torchscript', optimize=False, half=False, int8=False)
# TorchScript: export success ✅ 1.7s, saved as 'runs/detect/train11/weights/best.torchscript' (42.9 MB)

Model Evaluation

import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

YOLOv8n & RGB Dataset

data_index = ['all', 'person', 'bike', 'car', 'motor', 'train', 'truck', 'hydrant', 'sign', 'skateboard', 'stroller', 'other vehicle']
data_columns = ['Model', 'Images', 'Instances', 'P', 'R', 'mAP50', 'mAP50-95']
rgb_nano = [
['rgb_nano', 1085, 16909, 0.578, 0.185, 0.198, 0.104],
['rgb_nano', 1085, 3223, 0.505, 0.375, 0.391, 0.167],
['rgb_nano', 1085, 193, 0.2, 0.197, 0.102, 0.044],
['rgb_nano', 1085, 7285, 0.663, 0.574, 0.621, 0.398],
['rgb_nano', 1085, 77, 0.419, 0.26, 0.3, 0.166],
['rgb_nano', 1085, 183, 0.455, 0.246, 0.252, 0.155],
['rgb_nano', 1085, 2190, 0.458, 0.197, 0.205, 0.0686],
['rgb_nano', 1085, 126, 0.741, 0.023, 0.0801, 0.0274],
['rgb_nano', 1085, 3581, 0.563, 0.142, 0.17, 0.0824],
['rgb_nano', 1085, 4, 1, 0, 0, 0],
['rgb_nano', 1085, 7, 1, 0, 0.0353, 0.023],
['rgb_nano', 1085, 40, 0.355, 0.025, 0.0231, 0.00793]
]
rgb_nano_df = pd.DataFrame(rgb_nano, data_index, data_columns)
rgb_nano_df
ModelImagesInstancesPRmAP50mAP50-95
allrgb_nano1085169090.5780.1850.19800.10400
personrgb_nano108532230.5050.3750.39100.16700
bikergb_nano10851930.2000.1970.10200.04400
carrgb_nano108572850.6630.5740.62100.39800
motorrgb_nano1085770.4190.2600.30000.16600
trainrgb_nano10851830.4550.2460.25200.15500
truckrgb_nano108521900.4580.1970.20500.06860
hydrantrgb_nano10851260.7410.0230.08010.02740
signrgb_nano108535810.5630.1420.17000.08240
skateboardrgb_nano108541.0000.0000.00000.00000
strollerrgb_nano108571.0000.0000.03530.02300
other vehiclergb_nano1085400.3550.0250.02310.00793
plt.figure(figsize=(16, 5))
plt.title('Average Precision by Class & Instances')
sns.set(style='darkgrid')
sns.barplot(
data=rgb_nano_df.reset_index(),
x='Instances',
y='mAP50',
errorbar='sd',
hue='index',
palette='nipy_spectral'
)
plt.legend(bbox_to_anchor=(1.01,1.01))

Training the YOLOv8 Model (RGB)

YOLOv8s & RGB Dataset

rgb_small = [
['rgb_small', 1085, 16909, 0.524, 0.253, 0.29, 0.159],
['rgb_small', 1085, 3223, 0.622, 0.428, 0.481, 0.225],
['rgb_small', 1085, 193, 0.247, 0.321, 0.239, 0.121],
['rgb_small', 1085, 7285, 0.722, 0.629, 0.683, 0.454],
['rgb_small', 1085, 77, 0.563, 0.338, 0.382, 0.219],
['rgb_small', 1085, 183, 0.575, 0.339, 0.41, 0.276],
['rgb_small', 1085, 2190, 0.6, 0.315, 0.333, 0.12],
['rgb_small', 1085, 126, 0.8, 0.175, 0.292, 0.129],
['rgb_small', 1085, 3581, 0.635, 0.243, 0.292, 0.149],
['rgb_small', 1085, 4, 0, 0, 0, 0],
['rgb_small', 1085, 7, 1, 0, 0.069, 0.0479],
['rgb_small', 1085, 40, 0, 0, 0.0136, 0.00526]
]
rgb_small_df = pd.DataFrame(rgb_small, data_index, data_columns)
rgb_small_df
ModelImagesInstancesPRmAP50mAP50-95
allrgb_small1085169090.5240.2530.29000.15900
personrgb_small108532230.6220.4280.48100.22500
bikergb_small10851930.2470.3210.23900.12100
carrgb_small108572850.7220.6290.68300.45400
motorrgb_small1085770.5630.3380.38200.21900
trainrgb_small10851830.5750.3390.41000.27600
truckrgb_small108521900.6000.3150.33300.12000
hydrantrgb_small10851260.8000.1750.29200.12900
signrgb_small108535810.6350.2430.29200.14900
skateboardrgb_small108540.0000.0000.00000.00000
strollerrgb_small108571.0000.0000.06900.04790
other vehiclergb_small1085400.0000.0000.01360.00526
plt.figure(figsize=(16, 5))
plt.title('Average Precision by Class & Instances')
sns.set(style='darkgrid')
sns.barplot(
data=rgb_small_df.reset_index(),
x='Instances',
y='mAP50',
errorbar='sd',
hue='index',
palette='nipy_spectral'
)
plt.legend(bbox_to_anchor=(1.01,1.01))

Training the YOLOv8 Model (RGB)

YOLOv8n & IR Dataset

ir_nano = [
['ir_nano', 1144, 16688, 0.516, 0.249, 0.276, 0.146],
['ir_nano', 1144, 4470, 0.631, 0.556, 0.595, 0.276],
['ir_nano', 1144, 170, 0.288, 0.253, 0.222, 0.111],
['ir_nano', 1144, 7128, 0.696, 0.65, 0.711, 0.449],
['ir_nano', 1144, 55, 0.57, 0.364, 0.39, 0.189],
['ir_nano', 1144, 179, 0.746, 0.378, 0.455, 0.283],
['ir_nano', 1144, 2048, 0.462, 0.256, 0.271, 0.104],
['ir_nano', 1144, 94, 0.679, 0.0638, 0.12, 0.0526],
['ir_nano', 1144, 2472, 0.557, 0.199, 0.256, 0.132],
['ir_nano', 1144, 3, 0, 0, 0, 0],
['ir_nano', 1144, 6, 1, 0, 0, 0],
['ir_nano', 1144, 63, 0.0425, 0.0159, 0.0193, 0.00637]
]
ir_nano_df = pd.DataFrame(ir_nano, data_index, data_columns)
ir_nano_df
ModelImagesInstancesPRmAP50mAP50-95
allir_nano1144166880.51600.24900.27600.14600
personir_nano114444700.63100.55600.59500.27600
bikeir_nano11441700.28800.25300.22200.11100
carir_nano114471280.69600.65000.71100.44900
motorir_nano1144550.57000.36400.39000.18900
trainir_nano11441790.74600.37800.45500.28300
truckir_nano114420480.46200.25600.27100.10400
hydrantir_nano1144940.67900.06380.12000.05260
signir_nano114424720.55700.19900.25600.13200
skateboardir_nano114430.00000.00000.00000.00000
strollerir_nano114461.00000.00000.00000.00000
other vehicleir_nano1144630.04250.01590.01930.00637
plt.figure(figsize=(16, 5))
plt.title('Average Precision by Class & Instances')
sns.set(style='darkgrid')
sns.barplot(
data=ir_nano_df.reset_index(),
x='Instances',
y='mAP50',
errorbar='sd',
hue='index',
palette='nipy_spectral'
)
plt.legend(bbox_to_anchor=(1.01,1.01))

Training the YOLOv8 Model (RGB)

YOLOv8s & RGB Dataset

ir_small = [
['ir_small', 1144, 16688, 0.555, 0.322, 0.358, 0.2],
['ir_small', 1144, 4470, 0.691, 0.632, 0.687, 0.356],
['ir_small', 1144, 170, 0.369, 0.353, 0.309, 0.174],
['ir_small', 1144, 7128, 0.743, 0.725, 0.781, 0.527],
['ir_small', 1144, 55, 0.593, 0.504, 0.551, 0.251],
['ir_small', 1144, 179, 0.683, 0.419, 0.527, 0.361],
['ir_small', 1144, 2048, 0.608, 0.386, 0.418, 0.178],
['ir_small', 1144, 94, 0.695, 0.149, 0.274, 0.148],
['ir_small', 1144, 2472, 0.614, 0.313, 0.362, 0.195],
['ir_small', 1144, 3, 0, 0, 0, 0],
['ir_small', 1144, 6, 1, 0, 0, 0],
['ir_small', 1144, 63, 0.112, 0.0635, 0.0254, 0.0151]
]
ir_small_df = pd.DataFrame(ir_small, data_index, data_columns)
ir_small_df
ModelImagesInstancesPRmAP50mAP50-95
allir_small1144166880.5550.32200.35800.2000
personir_small114444700.6910.63200.68700.3560
bikeir_small11441700.3690.35300.30900.1740
carir_small114471280.7430.72500.78100.5270
motorir_small1144550.5930.50400.55100.2510
trainir_small11441790.6830.41900.52700.3610
truckir_small114420480.6080.38600.41800.1780
hydrantir_small1144940.6950.14900.27400.1480
signir_small114424720.6140.31300.36200.1950
skateboardir_small114430.0000.00000.00000.0000
strollerir_small114461.0000.00000.00000.0000
other vehicleir_small1144630.1120.06350.02540.0151
plt.figure(figsize=(16, 5))
plt.title('Average Precision by Class & Instances')
sns.set(style='darkgrid')
sns.barplot(
data=ir_small_df.reset_index(),
x='Instances',
y='mAP50',
errorbar='sd',
hue='index',
palette='nipy_spectral'
)
plt.legend(bbox_to_anchor=(1.01,1.01))

Training the YOLOv8 Model (RGB)

YOLOv8n & Combined Dataset

combined_nano = [
['combined_nano', 2229, 33597, 0.519, 0.242, 0.272, 0.147],
['combined_nano', 2229, 7693, 0.623, 0.482, 0.527, 0.253],
['combined_nano', 2229, 363, 0.281, 0.267, 0.239, 0.13],
['combined_nano', 2229, 14413, 0.686, 0.646, 0.697, 0.461],
['combined_nano', 2229, 132, 0.607, 0.364, 0.417, 0.193],
['combined_nano', 2229, 362, 0.666, 0.348, 0.424, 0.279],
['combined_nano', 2229, 4238, 0.493, 0.237, 0.256, 0.096],
['combined_nano', 2229, 220, 0.685, 0.0955, 0.162, 0.0676],
['combined_nano', 2229, 6053, 0.588, 0.205, 0.251, 0.131],
['combined_nano', 2229, 7, 0, 0, 0, 0],
['combined_nano', 2229, 13, 1, 0, 0, 0],
['combined_nano', 2229, 103, 0.0795, 0.0194, 0.023, 0.00967]
]
combined_nano_df = pd.DataFrame(combined_nano, data_index, data_columns)
combined_nano_df
ModelImagesInstancesPRmAP50mAP50-95
allcombined_nano2229335970.51900.24200.2720.14700
personcombined_nano222976930.62300.48200.5270.25300
bikecombined_nano22293630.28100.26700.2390.13000
carcombined_nano2229144130.68600.64600.6970.46100
motorcombined_nano22291320.60700.36400.4170.19300
traincombined_nano22293620.66600.34800.4240.27900
truckcombined_nano222942380.49300.23700.2560.09600
hydrantcombined_nano22292200.68500.09550.1620.06760
signcombined_nano222960530.58800.20500.2510.13100
skateboardcombined_nano222970.00000.00000.0000.00000
strollercombined_nano2229131.00000.00000.0000.00000
other vehiclecombined_nano22291030.07950.01940.0230.00967
plt.figure(figsize=(16, 5))
plt.title('Average Precision by Class & Instances')
sns.set(style='darkgrid')
sns.barplot(
data=combined_nano_df.reset_index(),
x='Instances',
y='mAP50',
errorbar='sd',
hue='index',
palette='nipy_spectral'
)
plt.legend(bbox_to_anchor=(1.01,1.01))

Training the YOLOv8 Model (RGB)

YOLOv8s & Combined Dataset

combined_small = [
['combined_small', 2229, 33597, 0.651, 0.324, 0.359, 0.204],
['combined_small', 2229, 7693, 0.687, 0.569, 0.63, 0.326],
['combined_small', 2229, 363, 0.346, 0.383, 0.351, 0.198],
['combined_small', 2229, 14413, 0.734, 0.713, 0.764, 0.529],
['combined_small', 2229, 132, 0.633, 0.439, 0.514, 0.267],
['combined_small', 2229, 362, 0.707, 0.481, 0.556, 0.384],
['combined_small', 2229, 4238, 0.59, 0.391, 0.407, 0.167],
['combined_small', 2229, 220, 0.697, 0.178, 0.266, 0.122],
['combined_small', 2229, 6053, 0.617, 0.329, 0.371, 0.205],
['combined_small', 2229, 7, 1, 0, 0, 0],
['combined_small', 2229, 13, 1, 0, 0.0323, 0.024],
['combined_small', 2229, 103, 0.148, 0.0777, 0.0618, 0.0193]
]
combined_small_df = pd.DataFrame(combined_small, data_index, data_columns)
combined_small_df
ModelImagesInstancesPRmAP50mAP50-95
allcombined_small2229335970.6510.32400.35900.2040
personcombined_small222976930.6870.56900.63000.3260
bikecombined_small22293630.3460.38300.35100.1980
carcombined_small2229144130.7340.71300.76400.5290
motorcombined_small22291320.6330.43900.51400.2670
traincombined_small22293620.7070.48100.55600.3840
truckcombined_small222942380.5900.39100.40700.1670
hydrantcombined_small22292200.6970.17800.26600.1220
signcombined_small222960530.6170.32900.37100.2050
skateboardcombined_small222971.0000.00000.00000.0000
strollercombined_small2229131.0000.00000.03230.0240
other vehiclecombined_small22291030.1480.07770.06180.0193
plt.figure(figsize=(16, 5))
plt.title('Average Precision by Class & Instances')
sns.set(style='darkgrid')
sns.barplot(
data=combined_small_df.reset_index(),
x='Instances',
y='mAP50',
errorbar='sd',
hue='index',
palette='nipy_spectral'
)
plt.legend(bbox_to_anchor=(1.01,1.01))

Training the YOLOv8 Model (RGB)

Combining Results

combined_df = pd.concat([rgb_nano_df, rgb_small_df, ir_nano_df, ir_small_df, combined_nano_df, combined_small_df], axis=0)
combined_df = combined_df.reset_index()
plt.figure(figsize=(16, 5))
plt.title('Average Precision by Class')
sns.set(style='darkgrid')
sns.barplot(
data=combined_df.reset_index(),
x='index',
y='mAP50',
errorbar='sd',
hue='Model',
palette='seismic'
)
plt.legend(bbox_to_anchor=(1.01,1.01))

Training the YOLOv8 Model (RGB)

plt.figure(figsize=(24, 10))

sns.set(style='darkgrid')

sns.scatterplot(
data=combined_df.reset_index(),
x='R',
y='P',
s=300,
alpha=0.8,
hue='Model',
palette='nipy_spectral',
style='index'
).set_title('Precision Recall')

Training the YOLOv8 Model (RGB)

combined_df[combined_df['index'] == 'car']
indexModelImagesInstancesPRmAP50mAP50-95
3carrgb_nano108572850.6630.5740.6210.398
15carrgb_small108572850.7220.6290.6830.454
27carir_nano114471280.6960.6500.7110.449
39carir_small114471280.7430.7250.7810.527
51carcombined_nano2229144130.6860.6460.6970.461
63carcombined_small2229144130.7340.7130.7640.529
plt.figure(figsize=(8, 5))
plt.title('Average Precision by Class')
sns.set(style='darkgrid')
sns.barplot(
data=combined_df[combined_df['index'] == 'car'],
x='index',
y='mAP50',
errorbar='sd',
hue='Model',
palette='seismic'
)
plt.legend(bbox_to_anchor=(1.01,1.01))

Training the YOLOv8 Model (RGB)

combined_df[combined_df['index'] == 'person']
indexModelImagesInstancesPRmAP50mAP50-95
1personrgb_nano108532230.5050.3750.3910.167
13personrgb_small108532230.6220.4280.4810.225
25personir_nano114444700.6310.5560.5950.276
37personir_small114444700.6910.6320.6870.356
49personcombined_nano222976930.6230.4820.5270.253
61personcombined_small222976930.6870.5690.6300.326
plt.figure(figsize=(8, 5))
plt.title('Average Precision by Class')
sns.set(style='darkgrid')
sns.barplot(
data=combined_df[combined_df['index'] == 'person'],
x='index',
y='mAP50',
errorbar='sd',
hue='Model',
palette='seismic'
)
plt.legend(bbox_to_anchor=(1.01,1.01))

Training the YOLOv8 Model (RGB)

combined_df[combined_df['index'] == 'motor']
indexModelImagesInstancesPRmAP50mAP50-95
4motorrgb_nano1085770.4190.2600.3000.166
16motorrgb_small1085770.5630.3380.3820.219
28motorir_nano1144550.5700.3640.3900.189
40motorir_small1144550.5930.5040.5510.251
52motorcombined_nano22291320.6070.3640.4170.193
64motorcombined_small22291320.6330.4390.5140.267
plt.figure(figsize=(8, 5))
plt.title('Average Precision by Class')
sns.set(style='darkgrid')
sns.barplot(
data=combined_df[combined_df['index'] == 'motor'],
x='index',
y='mAP50',
errorbar='sd',
hue='Model',
palette='seismic'
)
plt.legend(bbox_to_anchor=(1.01,1.01))

Training the YOLOv8 Model (RGB)

combined_df[combined_df['index'] == 'bike']
indexModelImagesInstancesPRmAP50mAP50-95
2bikergb_nano10851930.2000.1970.1020.044
14bikergb_small10851930.2470.3210.2390.121
26bikeir_nano11441700.2880.2530.2220.111
38bikeir_small11441700.3690.3530.3090.174
50bikecombined_nano22293630.2810.2670.2390.130
62bikecombined_small22293630.3460.3830.3510.198
plt.figure(figsize=(8, 5))
plt.title('Average Precision by Class')
sns.set(style='darkgrid')
sns.barplot(
data=combined_df[combined_df['index'] == 'bike'],
x='index',
y='mAP50',
errorbar='sd',
hue='Model',
palette='seismic'
)
plt.legend(bbox_to_anchor=(1.01,1.01))

Training the YOLOv8 Model (RGB)

combined_df[combined_df['index'] == 'truck']
indexModelImagesInstancesPRmAP50mAP50-95
6truckrgb_nano108521900.4580.1970.2050.0686
18truckrgb_small108521900.6000.3150.3330.1200
30truckir_nano114420480.4620.2560.2710.1040
42truckir_small114420480.6080.3860.4180.1780
54truckcombined_nano222942380.4930.2370.2560.0960
66truckcombined_small222942380.5900.3910.4070.1670
plt.figure(figsize=(8, 5))
plt.title('Average Precision by Class')
sns.set(style='darkgrid')
sns.barplot(
data=combined_df[combined_df['index'] == 'truck'],
x='index',
y='mAP50',
errorbar='sd',
hue='Model',
palette='seismic'
)
plt.legend(bbox_to_anchor=(1.01,1.01))

Training the YOLOv8 Model (RGB)

Evaluate Bounding Boxes

# read images
images = glob('./datasets/images_combined_val/images/\*.jpg')
print(len(images))

2229

# select image
img = cv.imread(images[0])
height, width, _ = img.shape

print(images[0][38:-4], height, width)

video-57kWWRyeqqHs3Byei-frame-000816-b6tuLjNco8MfoBs3d 512 640

# select label file
path = './datasets/images_combined_val/labels/' + images[0][38:-4] + '.txt'
labels = open(path, 'r')

data = labels.readlines()
labels.close()

print(data)

['9 0.696875 0.375000 0.025000 0.039062\n', '9 0.696094 0.314453 0.032813 0.082031\n', '9 0.168750 0.395508 0.040625 0.013672\n', '0 0.464063 0.457031 0.009375 0.027344\n', '0 0.004688 0.491211 0.009375 0.041016\n', '0 0.165625 0.489258 0.009375 0.033203\n', '1 0.316406 0.500000 0.014063 0.035156\n', '2 0.600781 0.511719 0.117188 0.113281\n', '2 0.524219 0.481445 0.060938 0.064453\n', '2 0.481250 0.469727 0.037500 0.033203\n', '2 0.426563 0.454102 0.015625 0.017578\n', '2 0.412500 0.463867 0.015625 0.025391\n', '2 0.376563 0.474609 0.018750 0.023438\n', '2 0.364063 0.477539 0.021875 0.033203\n', '2 0.342188 0.477539 0.034375 0.041016\n', '0 0.315625 0.483398 0.021875 0.044922\n', '8 0.105469 0.500977 0.007812 0.021484\n']

# create one colour for every COCO class
colours = []
number_colours=80

for j in range(number_colours):
colour = np.random.randint(0,255),np.random.randint(0,255),np.random.randint(0,255)
colours.append(colour)

print(len(colours),colours)

80 [(129, 83, 161), (220, 116, 220), (47, 113, 141), (185, 137, 77), (212, 208, 251), (36, 83, 204), (4, 40, 112), (61, 18, 39), (25, 132, 21), (239, 67, 234), (140, 253, 52), (207, 196, 72), (144, 32, 112), (138, 29, 227), (101, 17, 45), (102, 118, 7), (210, 51, 160), (59, 158, 131), (37, 145, 69), (68, 56, 71), (28, 96, 25), (72, 189, 118), (190, 67, 118), (152, 48, 33), (153, 138, 248), (218, 94, 242), (236, 229, 215), (133, 186, 102), (33, 198, 167), (223, 32, 103), (16, 209, 160), (83, 89, 91), (194, 46, 110), (243, 47, 47), (187, 11, 41), (193, 188, 6), (107, 119, 230), (116, 118, 109), (65, 155, 110), (12, 151, 145), (135, 138, 197), (43, 19, 174), (52, 203, 214), (72, 178, 172), (10, 247, 17), (108, 90, 185), (134, 29, 207), (217, 96, 179), (2, 38, 161), (245, 175, 254), (254, 57, 175), (84, 184, 46), (249, 195, 60), (246, 67, 127), (51, 89, 138), (12, 162, 182), (176, 89, 187), (165, 40, 110), (141, 76, 226), (245, 187, 119), (47, 237, 138), (173, 176, 50), (49, 101, 36), (171, 235, 78), (125, 105, 250), (123, 83, 13), (18, 47, 133), (196, 102, 109), (234, 204, 106), (55, 110, 131), (116, 209, 240), (147, 203, 253), (115, 246, 60), (17, 245, 112), (50, 250, 19), (254, 233, 18), (122, 211, 221), (229, 12, 236), (86, 169, 186), (13, 189, 38)]

Show Labels

index = 0

for line in data:

# Split string to float
_, x, y, w, h = map(float, line.split(' '))

l = int((x - w / 2) * width)
r = int((x + w / 2) * width)
t = int((y - h / 2) * height)
b = int((y + h / 2) * height)

if l < 0:
l = 0
if r > width - 1:
r = width - 1
if t < 0:
t = 0
if b > height - 1:
b = height - 1

image = cv.rectangle(img, (l, t), (r, b), colours[index], thickness=2)
index += 1

plt.title('Image with original Labels')
plt.axis('off')
plt.imshow(image)
plt.show()

Training the YOLOv8 Model (RGB)

Show Predictions

# Load the best model
backbone_combined_small = YOLO('./runs/detect/backbone_combined_small.torchscript')
# Run batched inference on a list of images
results = backbone_combined_small(img) # return a list of Results objects

for r in results:
im_array = r.plot() # plot a BGR numpy array of predictions
im = Image.fromarray(im_array[..., ::-1]) # RGB PIL image
im.show() # show image
im.save('results.jpg') # save image

0: 640x640 5 persons, 1 bike, 11 cars, 3 signs, 16.8ms Speed: 8.9ms preprocess, 16.8ms inference, 1.1ms postprocess per image at shape (1, 3, 640, 640)

Training the YOLOv8 Model (RGB)

Show Labels

# get another image
img = cv.imread(images[1337])
height, width, _ = img.shape

# get labels
path = './datasets/images_combined_val/labels/' + images[1337][38:-4] + '.txt'
labels = open(path, 'r')

data = labels.readlines()
labels.close()
index = 0

for line in data:

# Split string to float
_, x, y, w, h = map(float, line.split(' '))

l = int((x - w / 2) * width)
r = int((x + w / 2) * width)
t = int((y - h / 2) * height)
b = int((y + h / 2) * height)

if l < 0:
l = 0
if r > width - 1:
r = width - 1
if t < 0:
t = 0
if b > height - 1:
b = height - 1

image = cv.rectangle(img, (l, t), (r, b), colours[index], thickness=2)
index += 1

plt.title('Image with original Labels')
plt.axis('off')
plt.imshow(image)
plt.show()

Training the YOLOv8 Model (RGB)

Show Predictions

# Run batched inference on a list of images
results = backbone_combined_small(img) # return a list of Results objects

for r in results:
im_array = r.plot() # plot a BGR numpy array of predictions
im = Image.fromarray(im_array[..., ::-1]) # RGB PIL image
im.show() # show image
im.save('results.jpg') # save image

0: 640x640 2 persons, 1 car, 7 trucks, 2 signs, 17.7ms Speed: 12.7ms preprocess, 17.7ms inference, 1.4ms postprocess per image at shape (1, 3, 640, 640)

Training the YOLOv8 Model (RGB)

Show Labels

# get another image
img = cv.imread(images[666])
height, width, _ = img.shape

# get labels
path = './datasets/images_combined_val/labels/' + images[666][38:-4] + '.txt'
labels = open(path, 'r')

data = labels.readlines()
labels.close()
index = 0

for line in data:

# Split string to float
_, x, y, w, h = map(float, line.split(' '))

l = int((x - w / 2) * width)
r = int((x + w / 2) * width)
t = int((y - h / 2) * height)
b = int((y + h / 2) * height)

if l < 0:
l = 0
if r > width - 1:
r = width - 1
if t < 0:
t = 0
if b > height - 1:
b = height - 1

image = cv.rectangle(img, (l, t), (r, b), colours[index], thickness=2)
index += 1

plt.title('Image with original Labels')
plt.axis('off')
plt.imshow(image)
plt.show()

Training the YOLOv8 Model (RGB)

Show Predictions

# Run batched inference on a list of images
results = backbone_combined_small(img) # return a list of Results objects

for r in results:
im_array = r.plot() # plot a BGR numpy array of predictions
im = Image.fromarray(im_array[..., ::-1]) # RGB PIL image
im.show() # show image
im.save('results.jpg') # save image

0: 640x640 2 persons, 3 bikes, 7 cars, 1 sign, 16.9ms Speed: 13.9ms preprocess, 16.9ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)

Training the YOLOv8 Model (RGB)