Guangzhou, China

YOLOv8 Nightshift

YOLOv8 Nightshift

Dataset

Teledyne FLIR Free ADAS Thermal Dataset v2: The Teledyne FLIR free starter thermal dataset provides fully annotated thermal and visible spectrum frames for development of object detection neural networks. This data was constructed to encourage research on visible + thermal spectrum sensor fusion algorithms ("RGBT") in order to advance the safety of autonomous vehicles. A total of 26,442 fully-annotated frames are included with 15 different object classes.

Baseline Model: Baseline accuracy for object detection was established using the YOLOX-m neural network designed for 640 X 640 images. Both the RGB and thermal detectors were pre-trained on MSCOCO data (YOLOX: Exceeding YOLO Series in 2021 and YOLOX). The base neural networks were trained on the training set data provided in this dataset and tested on the video test data also provided in this dataset.

from glob import glob
import json
import matplotlib.pyplot as plt
import numpy as np
import os
import shutil
from tqdm import tqdm

Dataset Exploration

# read in dataset
images_themal = glob('./datasets/video_thermal_test/images/*.jpg')
images_rgb = glob('./datasets/video_rgb_test/images/*.jpg')

print(len(images_themal), len(images_rgb))

3749 3749

# plot multiple random thermal images
ran_gen = np.random.default_rng()

plt.figure(figsize=(16, 14))
plt.suptitle('Thermal Images')
for i in range(12):
    ax = plt.subplot(4, 4, i+1)
    random_index = ran_gen.integers(low=0, high=3748, size=1)
    i = random_index[0]
    img_loc = images_themal[i]
    img_title = 'video: ' + images_themal[i][-52:-35]+'\n'+ 'frame: ' + images_themal[i][-28:-22]+'\n'+ 'id: ' + images_themal[i][-21:-4]
    image = plt.imread(img_loc)
    plt.imshow(image, cmap=plt.cm.binary)
    plt.title(img_title, fontsize='small')
    plt.axis(False)

Training the YOLOv8 Model (RGB)

# plot multiple random rgb images
ran_gen = np.random.default_rng()

plt.figure(figsize=(16, 14))
plt.suptitle('RGB Images')
for i in range(12):
    ax = plt.subplot(4, 4, i+1)
    random_index = ran_gen.integers(low=0, high=3748, size=1)
    i = random_index[0]
    img_loc = images_rgb[i]
    img_title = 'video: ' + images_rgb[i][-52:-35]+'\n'+ 'frame: ' + images_rgb[i][-28:-22]+'\n'+ 'id: ' + images_rgb[i][-21:-4]
    image = plt.imread(img_loc)
    plt.imshow(image, cmap=plt.cm.binary)
    plt.title(img_title, fontsize='small')
    plt.axis(False)

Training the YOLOv8 Model (RGB)

Label Conversion JSON2YOLO

"file_name": "data/video-BzZspxAweF8AnKhWK-frame-000745-SSCRtAHcFjphNPczJ.jpg", -> "file_name": "video-BzZspxAweF8AnKhWK-frame-000745-SSCRtAHcFjphNPczJ.jpg",

YOLOv8 expects all images to be located in an images dir and the txt format annotation in a labels folder next to it. The dataset was using a dirname of data for all images and had COCO JSON annotations. I renamed the folder, created the missing one and removed the "data/" from all the filenames in the JSON file. Now I am able to run a conversion:

def make_folders(output_path):
    if os.path.exists(output_path):
        shutil.rmtree(output_path)
    os.makedirs(output_path)
    return output_path


def convert_bbox_coco2yolo(img_width, img_height, bbox):
    """
    Convert bounding box from COCO  format to YOLO format

    Parameters
    ----------
    img_width : int
        width of image
    img_height : int
        height of image
    bbox : list[int]
        bounding box annotation in COCO format: 
        [top left x position, top left y position, width, height]

    Returns
    -------
    list[float]
        bounding box annotation in YOLO format: 
        [x_center_rel, y_center_rel, width_rel, height_rel]
    """
    
    # YOLO bounding box format: [x_center, y_center, width, height]
    # (float values relative to width and height of image)
    x_tl, y_tl, w, h = bbox

    dw = 1.0 / img_width
    dh = 1.0 / img_height

    x_center = x_tl + w / 2.0
    y_center = y_tl + h / 2.0

    x = x_center * dw
    y = y_center * dh
    w = w * dw
    h = h * dh

    return [x, y, w, h]

def convert_coco_json_to_yolo_txt(output_path, json_file):

    path = make_folders(output_path)

    with open(json_file) as f:
        json_data = json.load(f)

    # write _darknet.labels, which holds names of all classes (one class per line)
    label_file = os.path.join(output_path, "_darknet.labels")
    with open(label_file, "w") as f:
        for category in tqdm(json_data["categories"], desc="Categories"):
            category_name = category["name"]
            f.write(f"{category_name}\n")

    for image in tqdm(json_data["images"], desc="Annotation txt for each iamge"):
        img_id = image["id"]
        img_name = image["file_name"]
        img_width = image["width"]
        img_height = image["height"]

        anno_in_image = [anno for anno in json_data["annotations"] if anno["image_id"] == img_id]
        anno_txt = os.path.join(output_path, img_name.split(".")[0] + ".txt")
        with open(anno_txt, "w") as f:
            for anno in anno_in_image:
                category = anno["category_id"]
                bbox_COCO = anno["bbox"]
                x, y, w, h = convert_bbox_coco2yolo(img_width, img_height, bbox_COCO)
                f.write(f"{category} {x:.6f} {y:.6f} {w:.6f} {h:.6f}\n")

    print("Converting COCO Json to YOLO txt finished!")

Video RGB Test Dataset

convert_coco_json_to_yolo_txt("./datasets/video_rgb_test/labels", "./datasets/video_rgb_test/coco.json")

Categories: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████| 16/16 [00:00, 253241.00it/s] Annotation txt for each iamge: 100%|███████████████████████████████████████████████████████████████████████████████| 3749/3749 [00:38, 98.23it/s]

Converting COCO Json to YOLO txt finished!

Video Thermal Test Dataset

convert_coco_json_to_yolo_txt("./datasets/video_thermal_test/labels", "./datasets/video_thermal_test/coco.json")

Categories: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████| 16/16 [00:00, 430185.03it/s] Annotation txt for each iamge: 100%|██████████████████████████████████████████████████████████████████████████████| 3749/3749 [00:25, 145.99it/s]

Converting COCO Json to YOLO txt finished!

Images RGB Train Dataset

convert_coco_json_to_yolo_txt("./datasets/images_rgb_train/labels", "./datasets/images_rgb_train/coco.json")

Categories: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████| 16/16 [00:00, 175218.97it/s]1060 Annotation txt for each iamge: 100%|█████████████████████████████████████████████████████████████████████████████| 10318/10318 [03:18, 51.86it/s]

Converting COCO Json to YOLO txt finished!

Images Thermal Train Dataset

convert_coco_json_to_yolo_txt("./datasets/images_thermal_train/labels", "./datasets/images_thermal_train/coco.json")

Categories: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████| 16/16 [00:00, 394758.02it/s] Annotation txt for each iamge: 100%|█████████████████████████████████████████████████████████████████████████████| 10742/10742 [03:07, 57.44it/s]

Converting COCO Json to YOLO txt finished!

Images RGB Val Dataset

convert_coco_json_to_yolo_txt("./datasets/images_rgb_val/labels", "./datasets/images_rgb_val/coco.json")

Categories: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████| 16/16 [00:00, 281970.02it/s] Annotation txt for each iamge: 100%|██████████████████████████████████████████████████████████████████████████████| 1085/1085 [00:02, 452.60it/s]

Converting COCO Json to YOLO txt finished!

Images Thermal Val Dataset

convert_coco_json_to_yolo_txt("./datasets/images_thermal_val/labels", "./datasets/images_thermal_val/coco.json")

Categories: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████| 16/16 [00:00, 377016.09it/s] Annotation txt for each iamge: 100%|██████████████████████████████████████████████████████████████████████████████| 1144/1144 [00:02, 472.82it/s]

Converting COCO Json to YOLO txt finished!

Dataset Configuration

The coco.yaml file that came with the dataset contained all 80 COCO classes - I removed all classes that were not part of the annotation and assigned new category_id's from 0-15 for the 16 categories. If you want to use the configuration files below to train your YOLO model you need to replace the annotations accordingly - check the ./config folder.

config/data_thermal.yaml

train: ../images_thermal_train/images
val: ../images_thermal_val/images
test: ../video_thermal_test/images

nc: 16
names: [
  'person',
  'bike',
  'car',
  'motor',
  'bus',
  'train',
  'truck',
  'light',
  'hydrant',
  'sign',
  'dog',
  'deer',
  'skateboard',
  'stroller',
  'scooter',
  'other vehicle'
  ]

config/data_rgb.yaml

train: /opt/app/datasets/images_rgb_train/images
val: /opt/app/datasets/images_rgb_val/images
test: /opt/app/datasets/video_rgb_test/images

nc: 16
names: [
  'person',
  'bike',
  'car',
  'motor',
  'bus',
  'train',
  'truck',
  'light',
  'hydrant',
  'sign',
  'dog',
  'deer',
  'skateboard',
  'stroller',
  'scooter',
  'other vehicle'
  ]

f_rgb = open('./config/images_rgb_val_coco.json') # =>'./datasets/images_rgb_val/coco.json'
f_thermal = open('./config/images_thermal_val_coco.json') # => './datasets/images_thermal_val/coco.json'
# returns JSON object as a dictionary
data_rgb_val = json.load(f_rgb)
data_thermal_val = json.load(f_thermal)
# closing files
f_rgb.close()
f_thermal.close()

f_rgb = open('./config/images_rgb_train_coco.json') # => './datasets/images_rgb_train/coco.json'
f_thermal = open('./config/images_thermal_train_coco.json') # => './datasets/images_thermal_train/coco.json'
# returns JSON object as a dictionary
data_rgb_train = json.load(f_rgb)
data_thermal_train = json.load(f_thermal)
# closing files
f_rgb.close()
f_thermal.close()

f_rgb = open('./config/video_rgb_test_coco.json') # => './datasets/video_rgb_test/coco.json'
f_thermal = open('./config/video_thermal_test_coco.json') # => './datasets/video_thermal_test/coco.json'
# returns JSON object as a dictionary
data_rgb_test = json.load(f_rgb)
data_thermal_test = json.load(f_thermal)
# closing files
f_rgb.close()
f_thermal.close()

# Iterating through the json list - check that all annotations are between 0 and 15

categories = []

for detection in data_rgb_val['annotations']:
    categories.append(detection['category_id'])

print(np.unique(categories))

[ 0 1 2 3 5 6 8 9 12 13 15]

categories = []

for detection in data_thermal_val['annotations']:
    categories.append(detection['category_id'])

print(np.unique(categories))

[ 0 1 2 3 5 6 8 9 12 13 15]

categories = []

for detection in data_rgb_train['annotations']:
    categories.append(detection['category_id'])

print(np.unique(categories))

[ 0 1 2 3 5 6 8 9 12 13 14 15]

categories = []

for detection in data_thermal_train['annotations']:
    categories.append(detection['category_id'])

print(np.unique(categories))

[ 0 1 2 3 5 6 8 9 10 11 12 13 14 15]

categories = []

for detection in data_rgb_test['annotations']:
    categories.append(detection['category_id'])

print(np.unique(categories))

[ 0 1 2 3 6 7 8 9 10 15]

categories = []

for detection in data_thermal_test['annotations']:
    categories.append(detection['category_id'])

print(np.unique(categories))

[ 0 1 2 3 6 8 9 10 15]

Training the YOLOv8 Model (RGB / IR)

# missing yolo dep
!pip install lapx>=0.5.2

[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv[0m[33m [0m

import cv2 as cv
from glob import glob
import matplotlib.pyplot as plt
import os
import random
from ultralytics import YOLO

YOLOv8 Nano (RGB)

# unzip downloaded dataset to `./datasets`
dataset_rgb = 'datasets/data_rgb.yaml'

# load a model
backbone_nano = YOLO("yolov8n.yaml")  # build a new model from scratch

Model Training

import json
 
# Opening JSON file
f = open('./datasets/images_rgb_train/coco.json')
 
# returns JSON object as
# a dictionary
data = json.load(f)

# Iterating through the json
list = []
for i in data['annotations']:
    list.append(i['category_id'])

len(list)

169174

import numpy as np

print(np.unique(list))

# Closing file
f.close()

[ 0 1 2 3 5 6 8 9 12 13 14 15]

# Train the model
results_n = backbone_nano.train(data=dataset_rgb, epochs=20)

20 epochs completed in 1.521 hours.

Epoch	GPU_mem	box_loss	cls_loss	dfl_loss	Instances	Size
10/20	3.07G	1.829	1.375	1.254	328
Class	Images	Instances	P	R	mAP50	mAP50-95
all	1085	16909	0.525	0.16	0.156	0.077

Epoch	GPU_mem	box_loss	cls_loss	dfl_loss	Instances	Size
20/20	2.63G	1.595	1.117	1.146	223
Class	Images	Instances	P	R	mAP50	mAP50-95
all	1085	16909	0.579	0.185	0.196	0.102

YOLOv8n summary (fused): 168 layers, 3008768 parameters, 0 gradients

Class	Images	Instances	P	R	mAP50	mAP50-95
all	1085	16909	0.578	0.186	0.196	0.102
person	1085	3223	0.501	0.375	0.389	0.167
bike	1085	193	0.201	0.197	0.101	0.0438
car	1085	7285	0.662	0.575	0.621	0.397
motor	1085	77	0.418	0.26	0.298	0.164
train	1085	183	0.458	0.246	0.253	0.153
truck	1085	2190	0.458	0.198	0.206	0.0686
hydrant	1085	126	0.744	0.0232	0.0797	0.0265
sign	1085	3581	0.564	0.143	0.171	0.0824
skateboard	1085	4	1	0	0	0
stroller	1085	7	1	0	0.018	0.0144
other vehicle	1085	40	0.348	0.025	0.0231	0.00793

Speed: 0.2ms preprocess, 4.2ms inference, 0.0ms loss, 0.6ms postprocess per image

Training the YOLOv8 Model (RGB)

plt.figure(figsize=(24, 14))

im_batch0_labels = plt.imread('./assets/backbone_rgb_nano_val_batch0_labels.webp')
im_batch0_pred = plt.imread('./assets/backbone_rgb_nano_val_batch0_pred.webp')
im_batch1_labels = plt.imread('./assets/backbone_rgb_nano_val_batch1_labels.webp')
im_batch1_pred = plt.imread('./assets/backbone_rgb_nano_val_batch1_pred.webp')
im_batch2_labels = plt.imread('./assets/backbone_rgb_nano_val_batch2_labels.webp')
im_batch2_pred = plt.imread('./assets/backbone_rgb_nano_val_batch2_pred.webp')

ax = plt.subplot(2, 3, 1)
plt.title('batch0_labels')
plt.imshow(im_batch0_labels)
plt.axis("off")

ax = plt.subplot(2, 3, 2)
plt.title('batch1_labels')
plt.imshow(im_batch1_labels)
plt.axis("off")

ax = plt.subplot(2, 3, 3)
plt.title('batch2_labels')
plt.imshow(im_batch2_labels)
plt.axis("off")

ax = plt.subplot(2, 3, 4)
plt.title('batch0_pred')
plt.imshow(im_batch0_pred)
plt.axis("off")

ax = plt.subplot(2, 3, 5)
plt.title('batch1_pred')
plt.imshow(im_batch1_pred)
plt.axis("off")

ax = plt.subplot(2, 3, 6)
plt.title('batch2_pred')
plt.imshow(im_batch2_pred)
plt.axis("off")

(-0.5, 1919.5, 1647.5, -0.5)

Training the YOLOv8 Model (RGB)

Model Evaluation

# Evaluate the model's performance on the validation set
results_n = backbone_nano.val()

Class	Images	Instances	P	R	mAP50	mAP50-95
all	1085	16909	0.578	0.185	0.198	0.104
person	1085	3223	0.505	0.375	0.391	0.167
bike	1085	193	0.2	0.197	0.102	0.044
car	1085	7285	0.663	0.574	0.621	0.398
motor	1085	77	0.419	0.26	0.3	0.166
train	1085	183	0.455	0.246	0.252	0.155
truck	1085	2190	0.458	0.197	0.205	0.0686
hydrant	1085	126	0.741	0.023	0.0801	0.0274
sign	1085	3581	0.563	0.142	0.17	0.0824
skateboard	1085	4	1	0	0	0
stroller	1085	7	1	0	0.0353	0.023
other vehicle	1085	40	0.355	0.025	0.0231	0.00793

Speed: 0.3ms preprocess, 5.0ms inference, 0.0ms loss, 0.7ms postprocess per image

Training the YOLOv8 Model (RGB)

# Export the model to ONNX format
# success = backbone_nano.export(imgsz=(640, 480), format='onnx', opset=12, optimize=False, half=False)
# Export to PyTorch format
success = backbone_nano.export(imgsz=640, format='torchscript', optimize=False, half=False, int8=False)
# TorchScript: export success ✅ 1.3s, saved as 'runs/detect/train4/weights/best.torchscript' (11.9 MB)

# pick pre-trained model
n_model = YOLO('runs/detect/train6/weights/best.torchscript')

# read video by index
video = cv.VideoCapture(videos[1])
ret, frame = video.read()

# get video dims
frame_width = int(video.get(3))
frame_height = int(video.get(4))
size = (frame_width, frame_height)

# Define the codec and create VideoWriter object
fourcc = cv.VideoWriter_fourcc(*'DIVX')
out = cv.VideoWriter('./outputs/backbone_nano_rgb.avi', fourcc, 20.0, size)

# read frames
ret = True

while ret:
    ret, frame = video.read()

    if ret:
        # detect & track objects
        results = np_model.track(frame, persist=True)

        # plot results
        composed = results[0].plot()

        # save video
        out.write(composed)

out.release()
video.release()

YOLOv8 Small (RGB)

# unzip downloaded dataset to `./datasets`
dataset_rgb = 'datasets/data_rgb.yaml'

# load a model
backbone_small = YOLO("yolov8s.yaml")  # build a new model from scratch

Model Training

# Train the model
results_s = backbone_small.train(data=dataset_rgb, epochs=20)

20 epochs completed in 2.438 hours.

Epoch	GPU_mem	box_loss	cls_loss	dfl_loss	Instances	Size
10/20	4.84G	1.569	1.098	1.195	328
Class	Images	Instances	P	R	mAP50	mAP50-95
all	1085	16909	0.596	0.211	0.245	0.128

Epoch	GPU_mem	box_loss	cls_loss	dfl_loss	Instances	Size
20/20	4.67G	1.367	0.8879	1.083	223
Class	Images	Instances	P	R	mAP50	mAP50-95
all	1085	16909	0.608	0.25	0.291	0.158

YOLOv8s summary (fused): 168 layers, 11131776 parameters, 0 gradients

Class	Images	Instances	P	R	mAP50	mAP50-95
all	1085	16909	0.523	0.255	0.291	0.159
person	1085	3223	0.618	0.428	0.481	0.225
bike	1085	193	0.248	0.326	0.239	0.121
car	1085	7285	0.718	0.63	0.683	0.454
motor	1085	77	0.566	0.338	0.382	0.22
train	1085	183	0.577	0.344	0.409	0.276
truck	1085	2190	0.593	0.318	0.336	0.119
hydrant	1085	126	0.8	0.175	0.293	0.129
sign	1085	3581	0.632	0.243	0.291	0.149
skateboard	1085	4	0	0	0	0
stroller	1085	7	1	0	0.0687	0.0477
other vehicle	1085	40	0	0	0.0135	0.00526

Speed: 0.3ms preprocess, 9.6ms inference, 0.0ms loss, 0.5ms postprocess per image

Training the YOLOv8 Model (RGB)

plt.figure(figsize=(24, 14))

im_batch0_labels = plt.imread('./assets/backbone_rgb_small_val_batch0_labels.webp')
im_batch0_pred = plt.imread('./assets/backbone_rgb_small_val_batch0_pred.webp')
im_batch1_labels = plt.imread('./assets/backbone_rgb_small_val_batch1_labels.webp')
im_batch1_pred = plt.imread('./assets/backbone_rgb_small_val_batch1_pred.webp')
im_batch2_labels = plt.imread('./assets/backbone_rgb_small_val_batch2_labels.webp')
im_batch2_pred = plt.imread('./assets/backbone_rgb_small_val_batch2_pred.webp')

ax = plt.subplot(2, 3, 1)
plt.title('batch0_labels')
plt.imshow(im_batch0_labels)
plt.axis("off")

ax = plt.subplot(2, 3, 2)
plt.title('batch1_labels')
plt.imshow(im_batch1_labels)
plt.axis("off")

ax = plt.subplot(2, 3, 3)
plt.title('batch2_labels')
plt.imshow(im_batch2_labels)
plt.axis("off")

ax = plt.subplot(2, 3, 4)
plt.title('batch0_pred')
plt.imshow(im_batch0_pred)
plt.axis("off")

ax = plt.subplot(2, 3, 5)
plt.title('batch1_pred')
plt.imshow(im_batch1_pred)
plt.axis("off")

ax = plt.subplot(2, 3, 6)
plt.title('batch2_pred')
plt.imshow(im_batch2_pred)
plt.axis("off")

(-0.5, 1919.5, 1647.5, -0.5)

Training the YOLOv8 Model (RGB)

Model Evaluation

# Evaluate the model's performance on the validation set
results_s = backbone_small.val()

Class	Images	Instances	P	R	mAP50	mAP50-95
all	1085	16909	0.524	0.253	0.29	0.159
person	1085	3223	0.622	0.428	0.481	0.225
bike	1085	193	0.247	0.321	0.239	0.121
car	1085	7285	0.722	0.629	0.683	0.454
motor	1085	77	0.563	0.338	0.382	0.219
train	1085	183	0.575	0.339	0.41	0.276
truck	1085	2190	0.6	0.315	0.333	0.12
hydrant	1085	126	0.8	0.175	0.292	0.129
sign	1085	3581	0.635	0.243	0.292	0.149
skateboard	1085	4	0	0	0	0
stroller	1085	7	1	0	0.069	0.0479
other vehicle	1085	40	0	0	0.0136	0.00526

Speed: 0.4ms preprocess, 10.9ms inference, 0.0ms loss, 0.6ms postprocess per image

Training the YOLOv8 Model (RGB)

# Export the model to ONNX format
# success = backbone_nano.export(imgsz=(640, 480), format='onnx', opset=12, optimize=False, half=False)
# Export to PyTorch format
success = backbone_small.export(imgsz=640, format='torchscript', optimize=False, half=False, int8=False)
TorchScript: export success ✅ 2.1s, saved as 'runs/detect/train5/weights/best.torchscript' (42.9 MB)

YOLOv8 Nano (IR)

# unzip downloaded dataset to `./datasets`
dataset_ir = 'datasets/data_thermal.yaml'

# load a model
backbone_ir_nano = YOLO("yolov8n.yaml")  # build a new model from scratch

Model Training

# Train the model
results_ir_n = backbone_ir_nano.train(data=dataset_ir, epochs=20)

20 epochs completed in 1.337 hours.

Epoch	GPU_mem	box_loss	cls_loss	dfl_loss	Instances	Size
10/20	3.33G	1.746	1.263	1.211	104
Class	Images	Instances	P	R	mAP50	mAP50-95
all	1144	16688	0.466	0.186	0.226	0.112

Epoch	GPU_mem	box_loss	cls_loss	dfl_loss	Instances	Size
20/20	2.5G	1.518	1.016	1.111	102
Class	Images	Instances	P	R	mAP50	mAP50-95
all	1144	16688	0.513	0.249	0.276	0.146

YOLOv8n summary (fused): 168 layers, 3008768 parameters, 0 gradients

Class	Images	Instances	P	R	mAP50	mAP50-95
all	1144	16688	0.514	0.249	0.276	0.146
person	1144	4470	0.628	0.555	0.594	0.276
bike	1144	170	0.278	0.25	0.219	0.11
car	1144	7128	0.691	0.65	0.71	0.449
motor	1144	55	0.569	0.364	0.39	0.19
train	1144	179	0.741	0.383	0.455	0.284
truck	1144	2048	0.467	0.259	0.274	0.105
hydrant	1144	94	0.678	0.0638	0.12	0.0535
sign	1144	2472	0.557	0.2	0.255	0.132
skateboard	1144	3	0	0	0	0
stroller	1144	6	1	0	0	0
other vehicle	1144	63	0.0423	0.0159	0.0194	0.00652

Speed: 0.3ms preprocess, 3.8ms inference, 0.0ms loss, 0.6ms postprocess per image

Training the YOLOv8 Model (RGB)

plt.figure(figsize=(24, 14))

im_batch0_labels = plt.imread('./assets/backbone_ir_nano_val_batch0_labels.webp')
im_batch0_pred = plt.imread('./assets/backbone_ir_nano_val_batch0_pred.webp')
im_batch1_labels = plt.imread('./assets/backbone_ir_nano_val_batch1_labels.webp')
im_batch1_pred = plt.imread('./assets/backbone_ir_nano_val_batch1_pred.webp')
im_batch2_labels = plt.imread('./assets/backbone_ir_nano_val_batch2_labels.webp')
im_batch2_pred = plt.imread('./assets/backbone_ir_nano_val_batch2_pred.webp')

ax = plt.subplot(2, 3, 1)
plt.title('batch0_labels')
plt.imshow(im_batch0_labels)
plt.axis("off")

ax = plt.subplot(2, 3, 2)
plt.title('batch1_labels')
plt.imshow(im_batch1_labels)
plt.axis("off")

ax = plt.subplot(2, 3, 3)
plt.title('batch2_labels')
plt.imshow(im_batch2_labels)
plt.axis("off")

ax = plt.subplot(2, 3, 4)
plt.title('batch0_pred')
plt.imshow(im_batch0_pred)
plt.axis("off")

ax = plt.subplot(2, 3, 5)
plt.title('batch1_pred')
plt.imshow(im_batch1_pred)
plt.axis("off")

ax = plt.subplot(2, 3, 6)
plt.title('batch2_pred')
plt.imshow(im_batch2_pred)
plt.axis("off")

Training the YOLOv8 Model (RGB)

Model Evaluation

# Evaluate the model's performance on the validation set
results_ir_n = backbone_ir_nano.val()

Class	Images	Instances	P	R	mAP50	mAP50-95
all	1144	16688	0.516	0.249	0.276	0.146
person	1144	4470	0.631	0.556	0.595	0.276
bike	1144	170	0.288	0.253	0.222	0.111
car	1144	7128	0.696	0.65	0.711	0.449
motor	1144	55	0.57	0.364	0.39	0.189
train	1144	179	0.746	0.378	0.455	0.283
truck	1144	2048	0.462	0.256	0.271	0.104
hydrant	1144	94	0.679	0.0638	0.12	0.0526
sign	1144	2472	0.557	0.199	0.256	0.132
skateboard	1144	3	0	0	0	0
stroller	1144	6	1	0	0	0
other vehicle	1144	63	0.0425	0.0159	0.0193	0.00637

Speed: 0.3ms preprocess, 4.5ms inference, 0.0ms loss, 0.7ms postprocess per image

Training the YOLOv8 Model (RGB)

# Export the model to ONNX format
# success = backbone_nano.export(imgsz=(640, 480), format='onnx', opset=12, optimize=False, half=False)
# Export to PyTorch format
success = backbone_ir_nano.export(imgsz=640, format='torchscript', optimize=False, half=False, int8=False)
# TorchScript: export success ✅ 1.6s, saved as 'runs/detect/train6/weights/best.torchscript' (12.4 MB)

YOLOv8 Small (IR)

# unzip downloaded dataset to `./datasets`
dataset_ir = 'datasets/data_thermal.yaml'

# load a model
backbone_ir_small = YOLO("yolov8s.yaml")  # build a new model from scratch

Model Training

# Train the model
results_ir_s = backbone_ir_small.train(data=dataset_ir, epochs=20)

20 epochs completed in 2.827 hours.

Epoch	GPU_mem	box_loss	cls_loss	dfl_loss	Instances	Size
10/20	4.83G	1.508	1.018	1.16	104
Class	Images	Instances	P	R	mAP50	mAP50-95
all	1144	16688	0.489	0.286	0.313	0.168

Epoch	GPU_mem	box_loss	cls_loss	dfl_loss	Instances	Size
20/20	4.67G	1.317	0.8207	1.064	102
Class	Images	Instances	P	R	mAP50	mAP50-95
all	1144	16688	0.554	0.322	0.358	0.2

YOLOv8s summary (fused): 168 layers, 11131776 parameters, 0 gradients

Class	Images	Instances	P	R	mAP50	mAP50-95
all	1144	16688	0.554	0.322	0.358	0.2
person	1144	4470	0.687	0.634	0.688	0.355
bike	1144	170	0.364	0.347	0.308	0.174
car	1144	7128	0.74	0.725	0.781	0.527
motor	1144	55	0.608	0.509	0.552	0.25
train	1144	179	0.683	0.419	0.526	0.358
truck	1144	2048	0.601	0.385	0.415	0.178
hydrant	1144	94	0.687	0.149	0.274	0.147
sign	1144	2472	0.608	0.313	0.362	0.195
skateboard	1144	3	0	0	0	0
stroller	1144	6	1	0	0	0
other vehicle	1144	63	0.112	0.0635	0.0254	0.0151

Speed: 0.3ms preprocess, 9.6ms inference, 0.0ms loss, 0.6ms postprocess per image

Training the YOLOv8 Model (RGB)

plt.figure(figsize=(24, 14))

im_batch0_labels = plt.imread('./assets/backbone_ir_small_val_batch0_labels.webp')
im_batch0_pred = plt.imread('./assets/backbone_ir_small_val_batch0_pred.webp')
im_batch1_labels = plt.imread('./assets/backbone_ir_small_val_batch1_labels.webp')
im_batch1_pred = plt.imread('./assets/backbone_ir_small_val_batch1_pred.webp')
im_batch2_labels = plt.imread('./assets/backbone_ir_small_val_batch2_labels.webp')
im_batch2_pred = plt.imread('./assets/backbone_ir_small_val_batch2_pred.webp')

ax = plt.subplot(2, 3, 1)
plt.title('batch0_labels')
plt.imshow(im_batch0_labels)
plt.axis("off")

ax = plt.subplot(2, 3, 2)
plt.title('batch1_labels')
plt.imshow(im_batch1_labels)
plt.axis("off")

ax = plt.subplot(2, 3, 3)
plt.title('batch2_labels')
plt.imshow(im_batch2_labels)
plt.axis("off")

ax = plt.subplot(2, 3, 4)
plt.title('batch0_pred')
plt.imshow(im_batch0_pred)
plt.axis("off")

ax = plt.subplot(2, 3, 5)
plt.title('batch1_pred')
plt.imshow(im_batch1_pred)
plt.axis("off")

ax = plt.subplot(2, 3, 6)
plt.title('batch2_pred')
plt.imshow(im_batch2_pred)
plt.axis("off")

Training the YOLOv8 Model (RGB)

Model Evaluation

# Evaluate the model's performance on the validation set
results_ir_s = backbone_ir_small.val()

Class	Images	Instances	P	R	mAP50	mAP50-95
all	1144	16688	0.555	0.322	0.358	0.2
person	1144	4470	0.691	0.632	0.687	0.356
bike	1144	170	0.369	0.353	0.309	0.174
car	1144	7128	0.743	0.725	0.781	0.527
motor	1144	55	0.593	0.504	0.551	0.251
train	1144	179	0.683	0.419	0.527	0.361
truck	1144	2048	0.608	0.386	0.418	0.178
hydrant	1144	94	0.695	0.149	0.274	0.148
sign	1144	2472	0.614	0.313	0.362	0.195
skateboard	1144	3	0	0	0	0
stroller	1144	6	1	0	0	0
other vehicle	1144	63	0.112	0.0635	0.0254	0.0151

Training the YOLOv8 Model (RGB)

# Export the model to ONNX format
# success = backbone_nano.export(imgsz=(640, 480), format='onnx', opset=12, optimize=False, half=False)
# Export to PyTorch format
success = backbone_ir_small.export(imgsz=640, format='torchscript', optimize=False, half=False, int8=False)
# TorchScript: export success ✅ 1.6s, saved as 'runs/detect/train6/weights/best.torchscript' (12.4 MB)

Training the YOLOv8 Mixed Model (RGB + IR)

# missing yolo dep
!pip install lapx>=0.5.2

import cv2 as cv
from glob import glob
import matplotlib.pyplot as plt
import os
import random
from ultralytics import YOLO

YOLOv8 Nano (RGB+IR)

# unzip downloaded dataset to `./datasets`
dataset_combined = 'datasets/data_combined.yaml'

# load a model
backbone_nano = YOLO("yolov8n.yaml")  # build a new model from scratch

Model Training

# Train the model
results_n = backbone_nano.train(data=dataset_combined, epochs=20)

20 epochs completed in 2.531 hours.

Epoch	GPU_mem	box_loss	cls_loss	dfl_loss	Instances
10/20	3.18G	1.655	1.208	1.154	179
Class	Images	Instances	P	R	mAP50
all	2229	33597	0.545	0.208	0.226

Epoch	GPU_mem	box_loss	cls_loss	dfl_loss	Instances
20/20	2.6G	1.458	0.9884	1.073	52
Class	Images	Instances	P	R	mAP50
all	2229	33597	0.52	0.242	0.272

YOLOv8n summary (fused): 168 layers, 3008768 parameters, 0 gradients

Class	Images	Instances	P	R	mAP50	mAP50-95
all	2229	33597	0.522	0.241	0.272	0.147
person	2229	7693	0.624	0.48	0.526	0.253
bike	2229	363	0.288	0.27	0.239	0.128
car	2229	14413	0.689	0.644	0.696	0.46
motor	2229	132	0.611	0.364	0.419	0.193
train	2229	362	0.675	0.344	0.425	0.281
truck	2229	4238	0.492	0.235	0.255	0.0951
hydrant	2229	220	0.688	0.0955	0.162	0.0659
sign	2229	6053	0.592	0.205	0.252	0.13
skateboard	2229	7	0	0	0	0
stroller	2229	13	1	0	0	0
other vehicle	2229	103	0.0798	0.0194	0.0232	0.00977

Speed: 0.2ms preprocess, 4.0ms inference, 0.0ms loss, 0.6ms postprocess per image

Training the YOLOv8 Model (RGB)

plt.figure(figsize=(24, 14))

im_batch0_labels = plt.imread('./assets/backbone_combined_nano_val_batch0_labels.webp')
im_batch0_pred = plt.imread('./assets/backbone_combined_nano_val_batch0_pred.webp')
im_batch1_labels = plt.imread('./assets/backbone_combined_nano_val_batch1_labels.webp')
im_batch1_pred = plt.imread('./assets/backbone_combined_nano_val_batch1_pred.webp')
im_batch2_labels = plt.imread('./assets/backbone_combined_nano_val_batch2_labels.webp')
im_batch2_pred = plt.imread('./assets/backbone_combined_nano_val_batch2_pred.webp')

ax = plt.subplot(2, 3, 1)
plt.title('batch0_labels')
plt.imshow(im_batch0_labels)
plt.axis("off")

ax = plt.subplot(2, 3, 2)
plt.title('batch1_labels')
plt.imshow(im_batch1_labels)
plt.axis("off")

ax = plt.subplot(2, 3, 3)
plt.title('batch2_labels')
plt.imshow(im_batch2_labels)
plt.axis("off")

ax = plt.subplot(2, 3, 4)
plt.title('batch0_pred')
plt.imshow(im_batch0_pred)
plt.axis("off")

ax = plt.subplot(2, 3, 5)
plt.title('batch1_pred')
plt.imshow(im_batch1_pred)
plt.axis("off")

ax = plt.subplot(2, 3, 6)
plt.title('batch2_pred')
plt.imshow(im_batch2_pred)
plt.axis("off")

Training the YOLOv8 Model (RGB)

Model Evaluation

# Evaluate the model's performance on the validation set
results_n = backbone_nano.val()

Ultralytics YOLOv8.0.173 🚀 Python-3.10.11 torch-2.0.1 CUDA:0 (NVIDIA GeForce GTX 1060 6GB, 6070MiB) YOLOv8n summary (fused): 168 layers, 3008768 parameters, 0 gradients [34m[1mval: [0mScanning /opt/app/datasets/images_combined_val/labels.cache... 2229 images, 32 backgrounds, 0 corrupt: 100%|██████████| 2229/2229 , ?it/s][0m Class Images Instances Box(P R mAP50 mAP50-95): 100%|██████████| 140/140 [00:31, 4.47it/s] all 2229 33597 0.519 0.242 0.272 0.147 person 2229 7693 0.623 0.482 0.527 0.253 bike 2229 363 0.281 0.267 0.239 0.13 car 2229 14413 0.686 0.646 0.697 0.461 motor 2229 132 0.607 0.364 0.417 0.193 train 2229 362 0.666 0.348 0.424 0.279 truck 2229 4238 0.493 0.237 0.256 0.096 hydrant 2229 220 0.685 0.0955 0.162 0.0676 sign 2229 6053 0.588 0.205 0.251 0.131 skateboard 2229 7 0 0 0 0 stroller 2229 13 1 0 0 0 other vehicle 2229 103 0.0795 0.0194 0.023 0.00967 Speed: 0.3ms preprocess, 4.5ms inference, 0.0ms loss, 0.6ms postprocess per image Results saved to [1mruns/detect/val5[0m

Class	Images	Instances	P	R	mAP50	mAP50-95
all	2229	33597	0.519	0.242	0.272	0.147
person	2229	7693	0.623	0.482	0.527	0.253
bike	2229	363	0.281	0.267	0.239	0.13
car	2229	14413	0.686	0.646	0.697	0.461
motor	2229	132	0.607	0.364	0.417	0.193
train	2229	362	0.666	0.348	0.424	0.279
truck	2229	4238	0.493	0.237	0.256	0.096
hydrant	2229	220	0.685	0.0955	0.162	0.0676
sign	2229	6053	0.588	0.205	0.251	0.131
skateboard	2229	7	0	0	0	0
stroller	2229	13	1	0	0	0
other vehicle	2229	103	0.0795	0.0194	0.023	0.00967

Speed: 0.3ms preprocess, 4.5ms inference, 0.0ms loss, 0.6ms postprocess per image

Training the YOLOv8 Model (RGB)

# Export the model to ONNX format
# success = backbone_nano.export(imgsz=(640, 480), format='onnx', opset=12, optimize=False, half=False)
# Export to PyTorch format
success = backbone_nano.export(imgsz=640, format='torchscript', optimize=False, half=False, int8=False)
# TorchScript: export success ✅ 1.2s, saved as 'runs/detect/train10/weights/best.torchscript' (11.9 MB)

Ultralytics YOLOv8.0.173 🚀 Python-3.10.11 torch-2.0.1 CPU (Intel Core(TM) i7-7700 3.60GHz)

[34m[1mPyTorch:[0m starting from 'runs/detect/train10/weights/best.pt' with input shape (1, 3, 640, 640) BCHW and output shape(s) (1, 20, 8400) (6.0 MB)

[34m[1mTorchScript:[0m starting export with torch 2.0.1... [34m[1mTorchScript:[0m export success ✅ 1.2s, saved as 'runs/detect/train10/weights/best.torchscript' (11.9 MB)

Export complete (2.5s) Results saved to [1m/opt/app/runs/detect/train10/weights[0m Predict: yolo predict task=detect model=runs/detect/train10/weights/best.torchscript imgsz=640
Validate: yolo val task=detect model=runs/detect/train10/weights/best.torchscript imgsz=640 data=datasets/data_combined.yaml
Visualize: https://netron.app

YOLOv8 Small (RGB + IR)

# unzip downloaded dataset to `./datasets`
dataset_combined = 'datasets/data_combined.yaml'

# load a model
backbone_small = YOLO("yolov8s.yaml")  # build a new model from scratch

Model Training

# Train the model
results_s = backbone_small.train(data=dataset_combined, epochs=20)

20 epochs completed in 4.965 hours.

Epoch	GPU_mem	box_loss	cls_loss	dfl_loss	Instances
10/20	4.88G	1.445	0.9915	1.085	179
Class	Images	Instances	P	R	mAP50
all	2229	33597	0.548	0.277	0.314

Epoch	GPU_mem	box_loss	cls_loss	dfl_loss	Instances
20/20	4.86G	1.265	0.7992	1.011	52
Class	Images	Instances	P	R	mAP50
all	2229	33597	0.651	0.324	0.36

YOLOv8s summary (fused): 168 layers, 11131776 parameters, 0 gradients

Class	Images	Instances	P	R	mAP50	mAP50-95
all	2229	33597	0.652	0.323	0.36	0.204
person	2229	7693	0.687	0.566	0.628	0.325
bike	2229	363	0.35	0.383	0.353	0.199
car	2229	14413	0.735	0.712	0.764	0.528
motor	2229	132	0.645	0.439	0.513	0.268
train	2229	362	0.703	0.478	0.555	0.383
truck	2229	4238	0.589	0.389	0.404	0.167
hydrant	2229	220	0.696	0.177	0.266	0.121
sign	2229	6053	0.62	0.329	0.372	0.205
skateboard	2229	7	1	0	0	0
stroller	2229	13	1	0	0.0386	0.0297
other vehicle	2229	103	0.15	0.0777	0.0616	0.0202

Speed: 0.2ms preprocess, 9.1ms inference, 0.0ms loss, 0.5ms postprocess per image

Training the YOLOv8 Model (RGB)

plt.figure(figsize=(24, 14))

im_batch0_labels = plt.imread('./assets/backbone_combined_small_val_batch0_labels.webp')
im_batch0_pred = plt.imread('./assets/backbone_combined_small_val_batch0_pred.webp')
im_batch1_labels = plt.imread('./assets/backbone_combined_small_val_batch1_labels.webp')
im_batch1_pred = plt.imread('./assets/backbone_combined_small_val_batch1_pred.webp')
im_batch2_labels = plt.imread('./assets/backbone_combined_small_val_batch2_labels.webp')
im_batch2_pred = plt.imread('./assets/backbone_combined_small_val_batch2_pred.webp')

ax = plt.subplot(2, 3, 1)
plt.title('batch0_labels')
plt.imshow(im_batch0_labels)
plt.axis("off")

ax = plt.subplot(2, 3, 2)
plt.title('batch1_labels')
plt.imshow(im_batch1_labels)
plt.axis("off")

ax = plt.subplot(2, 3, 3)
plt.title('batch2_labels')
plt.imshow(im_batch2_labels)
plt.axis("off")

ax = plt.subplot(2, 3, 4)
plt.title('batch0_pred')
plt.imshow(im_batch0_pred)
plt.axis("off")

ax = plt.subplot(2, 3, 5)
plt.title('batch1_pred')
plt.imshow(im_batch1_pred)
plt.axis("off")

ax = plt.subplot(2, 3, 6)
plt.title('batch2_pred')
plt.imshow(im_batch2_pred)
plt.axis("off")

Training the YOLOv8 Model (RGB)

Model Evaluation

# Evaluate the model's performance on the validation set
results_s = backbone_small.val()

Ultralytics YOLOv8.0.173 🚀 Python-3.10.11 torch-2.0.1 CUDA:0 (NVIDIA GeForce GTX 1060 6GB, 6070MiB) YOLOv8s summary (fused): 168 layers, 11131776 parameters, 0 gradients [34m[1mval: [0mScanning /opt/app/datasets/images_combined_val/labels.cache... 2229 images, 32 backgrounds, 0 corrupt: 100%|██████████| 2229/2229 , ?it/s][0m Class Images Instances Box(P R mAP50 mAP50-95): 100%|██████████| 140/140 [00:42, 3.28it/s] all 2229 33597 0.651 0.324 0.359 0.204 person 2229 7693 0.687 0.569 0.63 0.326 bike 2229 363 0.346 0.383 0.351 0.198 car 2229 14413 0.734 0.713 0.764 0.529 motor 2229 132 0.633 0.439 0.514 0.267 train 2229 362 0.707 0.481 0.556 0.384 truck 2229 4238 0.59 0.391 0.407 0.167 hydrant 2229 220 0.697 0.178 0.266 0.122 sign 2229 6053 0.617 0.329 0.371 0.205 skateboard 2229 7 1 0 0 0 stroller 2229 13 1 0 0.0323 0.024 other vehicle 2229 103 0.148 0.0777 0.0618 0.0193 Speed: 0.3ms preprocess, 10.3ms inference, 0.0ms loss, 0.7ms postprocess per image Results saved to [1mruns/detect/val6[0m

Class	Images	Instances	P	R	mAP50	mAP50-95
all	2229	33597	0.651	0.324	0.359	0.204
person	2229	7693	0.687	0.569	0.63	0.326
bike	2229	363	0.346	0.383	0.351	0.198
car	2229	14413	0.734	0.713	0.764	0.529
motor	2229	132	0.633	0.439	0.514	0.267
train	2229	362	0.707	0.481	0.556	0.384
truck	2229	4238	0.59	0.391	0.407	0.167
hydrant	2229	220	0.697	0.178	0.266	0.122
sign	2229	6053	0.617	0.329	0.371	0.205
skateboard	2229	7	1	0	0	0
stroller	2229	13	1	0	0.0323	0.024
other vehicle	2229	103	0.148	0.0777	0.0618	0.0193

Speed: 0.3ms preprocess, 10.3ms inference, 0.0ms loss, 0.7ms postprocess per image

Training the YOLOv8 Model (RGB)

# Export the model to ONNX format
# success = backbone_nano.export(imgsz=(640, 480), format='onnx', opset=12, optimize=False, half=False)
# Export to PyTorch format
success = backbone_small.export(imgsz=640, format='torchscript', optimize=False, half=False, int8=False)
# TorchScript: export success ✅ 1.7s, saved as 'runs/detect/train11/weights/best.torchscript' (42.9 MB)

Model Evaluation

import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

YOLOv8n & RGB Dataset

data_index = ['all', 'person', 'bike', 'car', 'motor', 'train', 'truck', 'hydrant', 'sign', 'skateboard', 'stroller', 'other vehicle']
data_columns = ['Model', 'Images', 'Instances', 'P', 'R', 'mAP50', 'mAP50-95']

rgb_nano = [
    ['rgb_nano', 1085, 16909, 0.578, 0.185, 0.198, 0.104],
    ['rgb_nano', 1085, 3223, 0.505, 0.375, 0.391, 0.167],
    ['rgb_nano', 1085, 193, 0.2, 0.197, 0.102, 0.044],
    ['rgb_nano', 1085, 7285, 0.663, 0.574, 0.621, 0.398],
    ['rgb_nano', 1085, 77, 0.419, 0.26, 0.3, 0.166],
    ['rgb_nano', 1085, 183, 0.455, 0.246, 0.252, 0.155],
    ['rgb_nano', 1085, 2190, 0.458, 0.197, 0.205, 0.0686],
    ['rgb_nano', 1085, 126, 0.741, 0.023, 0.0801, 0.0274],
    ['rgb_nano', 1085, 3581, 0.563, 0.142, 0.17, 0.0824],
    ['rgb_nano', 1085, 4, 1, 0, 0, 0],
    ['rgb_nano', 1085, 7, 1, 0, 0.0353, 0.023],
    ['rgb_nano', 1085, 40, 0.355, 0.025, 0.0231, 0.00793]
]

rgb_nano_df = pd.DataFrame(rgb_nano, data_index, data_columns)
rgb_nano_df

	Model	Images	Instances	P	R	mAP50	mAP50-95
all	rgb_nano	1085	16909	0.578	0.185	0.1980	0.10400
person	rgb_nano	1085	3223	0.505	0.375	0.3910	0.16700
bike	rgb_nano	1085	193	0.200	0.197	0.1020	0.04400
car	rgb_nano	1085	7285	0.663	0.574	0.6210	0.39800
motor	rgb_nano	1085	77	0.419	0.260	0.3000	0.16600
train	rgb_nano	1085	183	0.455	0.246	0.2520	0.15500
truck	rgb_nano	1085	2190	0.458	0.197	0.2050	0.06860
hydrant	rgb_nano	1085	126	0.741	0.023	0.0801	0.02740
sign	rgb_nano	1085	3581	0.563	0.142	0.1700	0.08240
skateboard	rgb_nano	1085	4	1.000	0.000	0.0000	0.00000
stroller	rgb_nano	1085	7	1.000	0.000	0.0353	0.02300
other vehicle	rgb_nano	1085	40	0.355	0.025	0.0231	0.00793

plt.figure(figsize=(16, 5))
plt.title('Average Precision by Class & Instances')
sns.set(style='darkgrid')
sns.barplot(
    data=rgb_nano_df.reset_index(),
    x='Instances',
    y='mAP50',
    errorbar='sd',
    hue='index',
    palette='nipy_spectral'
)
plt.legend(bbox_to_anchor=(1.01,1.01))

Training the YOLOv8 Model (RGB)

YOLOv8s & RGB Dataset

rgb_small = [
    ['rgb_small', 1085, 16909, 0.524, 0.253, 0.29, 0.159],
    ['rgb_small', 1085, 3223, 0.622, 0.428, 0.481, 0.225],
    ['rgb_small', 1085, 193, 0.247, 0.321, 0.239, 0.121],
    ['rgb_small', 1085, 7285, 0.722, 0.629, 0.683, 0.454],
    ['rgb_small', 1085, 77, 0.563, 0.338, 0.382, 0.219],
    ['rgb_small', 1085, 183, 0.575, 0.339, 0.41, 0.276],
    ['rgb_small', 1085, 2190, 0.6, 0.315, 0.333, 0.12],
    ['rgb_small', 1085, 126, 0.8, 0.175, 0.292, 0.129],
    ['rgb_small', 1085, 3581, 0.635, 0.243, 0.292, 0.149],
    ['rgb_small', 1085, 4, 0, 0, 0, 0],
    ['rgb_small', 1085, 7, 1, 0, 0.069, 0.0479],
    ['rgb_small', 1085, 40, 0, 0, 0.0136, 0.00526]
]

rgb_small_df = pd.DataFrame(rgb_small, data_index, data_columns)
rgb_small_df

	Model	Images	Instances	P	R	mAP50	mAP50-95
all	rgb_small	1085	16909	0.524	0.253	0.2900	0.15900
person	rgb_small	1085	3223	0.622	0.428	0.4810	0.22500
bike	rgb_small	1085	193	0.247	0.321	0.2390	0.12100
car	rgb_small	1085	7285	0.722	0.629	0.6830	0.45400
motor	rgb_small	1085	77	0.563	0.338	0.3820	0.21900
train	rgb_small	1085	183	0.575	0.339	0.4100	0.27600
truck	rgb_small	1085	2190	0.600	0.315	0.3330	0.12000
hydrant	rgb_small	1085	126	0.800	0.175	0.2920	0.12900
sign	rgb_small	1085	3581	0.635	0.243	0.2920	0.14900
skateboard	rgb_small	1085	4	0.000	0.000	0.0000	0.00000
stroller	rgb_small	1085	7	1.000	0.000	0.0690	0.04790
other vehicle	rgb_small	1085	40	0.000	0.000	0.0136	0.00526

plt.figure(figsize=(16, 5))
plt.title('Average Precision by Class & Instances')
sns.set(style='darkgrid')
sns.barplot(
    data=rgb_small_df.reset_index(),
    x='Instances',
    y='mAP50',
    errorbar='sd',
    hue='index',
    palette='nipy_spectral'
)
plt.legend(bbox_to_anchor=(1.01,1.01))

Training the YOLOv8 Model (RGB)

YOLOv8n & IR Dataset

ir_nano = [
    ['ir_nano', 1144, 16688, 0.516, 0.249, 0.276, 0.146],
    ['ir_nano', 1144, 4470, 0.631, 0.556, 0.595, 0.276],
    ['ir_nano', 1144, 170, 0.288, 0.253, 0.222, 0.111],
    ['ir_nano', 1144, 7128, 0.696, 0.65, 0.711, 0.449],
    ['ir_nano', 1144, 55, 0.57, 0.364, 0.39, 0.189],
    ['ir_nano', 1144, 179, 0.746, 0.378, 0.455, 0.283],
    ['ir_nano', 1144, 2048, 0.462, 0.256, 0.271, 0.104],
    ['ir_nano', 1144, 94, 0.679, 0.0638, 0.12, 0.0526],
    ['ir_nano', 1144, 2472, 0.557, 0.199, 0.256, 0.132],
    ['ir_nano', 1144, 3, 0, 0, 0, 0],
    ['ir_nano', 1144, 6, 1, 0, 0, 0],
    ['ir_nano', 1144, 63, 0.0425, 0.0159, 0.0193, 0.00637]
]

ir_nano_df = pd.DataFrame(ir_nano, data_index, data_columns)
ir_nano_df

	Model	Images	Instances	P	R	mAP50	mAP50-95
all	ir_nano	1144	16688	0.5160	0.2490	0.2760	0.14600
person	ir_nano	1144	4470	0.6310	0.5560	0.5950	0.27600
bike	ir_nano	1144	170	0.2880	0.2530	0.2220	0.11100
car	ir_nano	1144	7128	0.6960	0.6500	0.7110	0.44900
motor	ir_nano	1144	55	0.5700	0.3640	0.3900	0.18900
train	ir_nano	1144	179	0.7460	0.3780	0.4550	0.28300
truck	ir_nano	1144	2048	0.4620	0.2560	0.2710	0.10400
hydrant	ir_nano	1144	94	0.6790	0.0638	0.1200	0.05260
sign	ir_nano	1144	2472	0.5570	0.1990	0.2560	0.13200
skateboard	ir_nano	1144	3	0.0000	0.0000	0.0000	0.00000
stroller	ir_nano	1144	6	1.0000	0.0000	0.0000	0.00000
other vehicle	ir_nano	1144	63	0.0425	0.0159	0.0193	0.00637

plt.figure(figsize=(16, 5))
plt.title('Average Precision by Class & Instances')
sns.set(style='darkgrid')
sns.barplot(
    data=ir_nano_df.reset_index(),
    x='Instances',
    y='mAP50',
    errorbar='sd',
    hue='index',
    palette='nipy_spectral'
)
plt.legend(bbox_to_anchor=(1.01,1.01))

Training the YOLOv8 Model (RGB)

YOLOv8s & RGB Dataset

ir_small = [
    ['ir_small', 1144, 16688, 0.555, 0.322, 0.358, 0.2],
    ['ir_small', 1144, 4470, 0.691, 0.632, 0.687, 0.356],
    ['ir_small', 1144, 170, 0.369, 0.353, 0.309, 0.174],
    ['ir_small', 1144, 7128, 0.743, 0.725, 0.781, 0.527],
    ['ir_small', 1144, 55, 0.593, 0.504, 0.551, 0.251],
    ['ir_small', 1144, 179, 0.683, 0.419, 0.527, 0.361],
    ['ir_small', 1144, 2048, 0.608, 0.386, 0.418, 0.178],
    ['ir_small', 1144, 94, 0.695, 0.149, 0.274, 0.148],
    ['ir_small', 1144, 2472, 0.614, 0.313, 0.362, 0.195],
    ['ir_small', 1144, 3, 0, 0, 0, 0],
    ['ir_small', 1144, 6, 1, 0, 0, 0],
    ['ir_small', 1144, 63, 0.112, 0.0635, 0.0254, 0.0151]
]

ir_small_df = pd.DataFrame(ir_small, data_index, data_columns)
ir_small_df

	Model	Images	Instances	P	R	mAP50	mAP50-95
all	ir_small	1144	16688	0.555	0.3220	0.3580	0.2000
person	ir_small	1144	4470	0.691	0.6320	0.6870	0.3560
bike	ir_small	1144	170	0.369	0.3530	0.3090	0.1740
car	ir_small	1144	7128	0.743	0.7250	0.7810	0.5270
motor	ir_small	1144	55	0.593	0.5040	0.5510	0.2510
train	ir_small	1144	179	0.683	0.4190	0.5270	0.3610
truck	ir_small	1144	2048	0.608	0.3860	0.4180	0.1780
hydrant	ir_small	1144	94	0.695	0.1490	0.2740	0.1480
sign	ir_small	1144	2472	0.614	0.3130	0.3620	0.1950
skateboard	ir_small	1144	3	0.000	0.0000	0.0000	0.0000
stroller	ir_small	1144	6	1.000	0.0000	0.0000	0.0000
other vehicle	ir_small	1144	63	0.112	0.0635	0.0254	0.0151

plt.figure(figsize=(16, 5))
plt.title('Average Precision by Class & Instances')
sns.set(style='darkgrid')
sns.barplot(
    data=ir_small_df.reset_index(),
    x='Instances',
    y='mAP50',
    errorbar='sd',
    hue='index',
    palette='nipy_spectral'
)
plt.legend(bbox_to_anchor=(1.01,1.01))

Training the YOLOv8 Model (RGB)

YOLOv8n & Combined Dataset

combined_nano = [
    ['combined_nano', 2229, 33597, 0.519, 0.242, 0.272, 0.147],
    ['combined_nano', 2229, 7693, 0.623, 0.482, 0.527, 0.253],
    ['combined_nano', 2229, 363, 0.281, 0.267, 0.239, 0.13],
    ['combined_nano', 2229, 14413, 0.686, 0.646, 0.697, 0.461],
    ['combined_nano', 2229, 132, 0.607, 0.364, 0.417, 0.193],
    ['combined_nano', 2229, 362, 0.666, 0.348, 0.424, 0.279],
    ['combined_nano', 2229, 4238, 0.493, 0.237, 0.256, 0.096],
    ['combined_nano', 2229, 220, 0.685, 0.0955, 0.162, 0.0676],
    ['combined_nano', 2229, 6053, 0.588, 0.205, 0.251, 0.131],
    ['combined_nano', 2229, 7, 0, 0, 0, 0],
    ['combined_nano', 2229, 13, 1, 0, 0, 0],
    ['combined_nano', 2229, 103, 0.0795, 0.0194, 0.023, 0.00967]
]

combined_nano_df = pd.DataFrame(combined_nano, data_index, data_columns)
combined_nano_df

	Model	Images	Instances	P	R	mAP50	mAP50-95
all	combined_nano	2229	33597	0.5190	0.2420	0.272	0.14700
person	combined_nano	2229	7693	0.6230	0.4820	0.527	0.25300
bike	combined_nano	2229	363	0.2810	0.2670	0.239	0.13000
car	combined_nano	2229	14413	0.6860	0.6460	0.697	0.46100
motor	combined_nano	2229	132	0.6070	0.3640	0.417	0.19300
train	combined_nano	2229	362	0.6660	0.3480	0.424	0.27900
truck	combined_nano	2229	4238	0.4930	0.2370	0.256	0.09600
hydrant	combined_nano	2229	220	0.6850	0.0955	0.162	0.06760
sign	combined_nano	2229	6053	0.5880	0.2050	0.251	0.13100
skateboard	combined_nano	2229	7	0.0000	0.0000	0.000	0.00000
stroller	combined_nano	2229	13	1.0000	0.0000	0.000	0.00000
other vehicle	combined_nano	2229	103	0.0795	0.0194	0.023	0.00967

plt.figure(figsize=(16, 5))
plt.title('Average Precision by Class & Instances')
sns.set(style='darkgrid')
sns.barplot(
    data=combined_nano_df.reset_index(),
    x='Instances',
    y='mAP50',
    errorbar='sd',
    hue='index',
    palette='nipy_spectral'
)
plt.legend(bbox_to_anchor=(1.01,1.01))

Training the YOLOv8 Model (RGB)

YOLOv8s & Combined Dataset

combined_small = [
    ['combined_small', 2229, 33597, 0.651, 0.324, 0.359, 0.204],
    ['combined_small', 2229, 7693, 0.687, 0.569, 0.63, 0.326],
    ['combined_small', 2229, 363, 0.346, 0.383, 0.351, 0.198],
    ['combined_small', 2229, 14413, 0.734, 0.713, 0.764, 0.529],
    ['combined_small', 2229, 132, 0.633, 0.439, 0.514, 0.267],
    ['combined_small', 2229, 362, 0.707, 0.481, 0.556, 0.384],
    ['combined_small', 2229, 4238, 0.59, 0.391, 0.407, 0.167],
    ['combined_small', 2229, 220, 0.697, 0.178, 0.266, 0.122],
    ['combined_small', 2229, 6053, 0.617, 0.329, 0.371, 0.205],
    ['combined_small', 2229, 7, 1, 0, 0, 0],
    ['combined_small', 2229, 13, 1, 0, 0.0323, 0.024],
    ['combined_small', 2229, 103, 0.148, 0.0777, 0.0618, 0.0193]
]

combined_small_df = pd.DataFrame(combined_small, data_index, data_columns)
combined_small_df

	Model	Images	Instances	P	R	mAP50	mAP50-95
all	combined_small	2229	33597	0.651	0.3240	0.3590	0.2040
person	combined_small	2229	7693	0.687	0.5690	0.6300	0.3260
bike	combined_small	2229	363	0.346	0.3830	0.3510	0.1980
car	combined_small	2229	14413	0.734	0.7130	0.7640	0.5290
motor	combined_small	2229	132	0.633	0.4390	0.5140	0.2670
train	combined_small	2229	362	0.707	0.4810	0.5560	0.3840
truck	combined_small	2229	4238	0.590	0.3910	0.4070	0.1670
hydrant	combined_small	2229	220	0.697	0.1780	0.2660	0.1220
sign	combined_small	2229	6053	0.617	0.3290	0.3710	0.2050
skateboard	combined_small	2229	7	1.000	0.0000	0.0000	0.0000
stroller	combined_small	2229	13	1.000	0.0000	0.0323	0.0240
other vehicle	combined_small	2229	103	0.148	0.0777	0.0618	0.0193

plt.figure(figsize=(16, 5))
plt.title('Average Precision by Class & Instances')
sns.set(style='darkgrid')
sns.barplot(
    data=combined_small_df.reset_index(),
    x='Instances',
    y='mAP50',
    errorbar='sd',
    hue='index',
    palette='nipy_spectral'
)
plt.legend(bbox_to_anchor=(1.01,1.01))

Training the YOLOv8 Model (RGB)

Combining Results

combined_df = pd.concat([rgb_nano_df, rgb_small_df, ir_nano_df, ir_small_df, combined_nano_df, combined_small_df], axis=0)
combined_df = combined_df.reset_index()

plt.figure(figsize=(16, 5))
plt.title('Average Precision by Class')
sns.set(style='darkgrid')
sns.barplot(
    data=combined_df.reset_index(),
    x='index',
    y='mAP50',
    errorbar='sd',
    hue='Model',
    palette='seismic'
)
plt.legend(bbox_to_anchor=(1.01,1.01))

Training the YOLOv8 Model (RGB)

plt.figure(figsize=(24, 10))

sns.set(style='darkgrid')

sns.scatterplot(
    data=combined_df.reset_index(),
    x='R',
    y='P',
    s=300,
    alpha=0.8,
    hue='Model',
    palette='nipy_spectral',
    style='index'
).set_title('Precision Recall')

Training the YOLOv8 Model (RGB)

combined_df[combined_df['index'] == 'car']

	index	Model	Images	Instances	P	R	mAP50	mAP50-95
3	car	rgb_nano	1085	7285	0.663	0.574	0.621	0.398
15	car	rgb_small	1085	7285	0.722	0.629	0.683	0.454
27	car	ir_nano	1144	7128	0.696	0.650	0.711	0.449
39	car	ir_small	1144	7128	0.743	0.725	0.781	0.527
51	car	combined_nano	2229	14413	0.686	0.646	0.697	0.461
63	car	combined_small	2229	14413	0.734	0.713	0.764	0.529

plt.figure(figsize=(8, 5))
plt.title('Average Precision by Class')
sns.set(style='darkgrid')
sns.barplot(
    data=combined_df[combined_df['index'] == 'car'],
    x='index',
    y='mAP50',
    errorbar='sd',
    hue='Model',
    palette='seismic'
)
plt.legend(bbox_to_anchor=(1.01,1.01))

Training the YOLOv8 Model (RGB)

combined_df[combined_df['index'] == 'person']

	index	Model	Images	Instances	P	R	mAP50	mAP50-95
1	person	rgb_nano	1085	3223	0.505	0.375	0.391	0.167
13	person	rgb_small	1085	3223	0.622	0.428	0.481	0.225
25	person	ir_nano	1144	4470	0.631	0.556	0.595	0.276
37	person	ir_small	1144	4470	0.691	0.632	0.687	0.356
49	person	combined_nano	2229	7693	0.623	0.482	0.527	0.253
61	person	combined_small	2229	7693	0.687	0.569	0.630	0.326

plt.figure(figsize=(8, 5))
plt.title('Average Precision by Class')
sns.set(style='darkgrid')
sns.barplot(
    data=combined_df[combined_df['index'] == 'person'],
    x='index',
    y='mAP50',
    errorbar='sd',
    hue='Model',
    palette='seismic'
)
plt.legend(bbox_to_anchor=(1.01,1.01))

Training the YOLOv8 Model (RGB)

combined_df[combined_df['index'] == 'motor']

	index	Model	Images	Instances	P	R	mAP50	mAP50-95
4	motor	rgb_nano	1085	77	0.419	0.260	0.300	0.166
16	motor	rgb_small	1085	77	0.563	0.338	0.382	0.219
28	motor	ir_nano	1144	55	0.570	0.364	0.390	0.189
40	motor	ir_small	1144	55	0.593	0.504	0.551	0.251
52	motor	combined_nano	2229	132	0.607	0.364	0.417	0.193
64	motor	combined_small	2229	132	0.633	0.439	0.514	0.267

plt.figure(figsize=(8, 5))
plt.title('Average Precision by Class')
sns.set(style='darkgrid')
sns.barplot(
    data=combined_df[combined_df['index'] == 'motor'],
    x='index',
    y='mAP50',
    errorbar='sd',
    hue='Model',
    palette='seismic'
)
plt.legend(bbox_to_anchor=(1.01,1.01))

Training the YOLOv8 Model (RGB)

combined_df[combined_df['index'] == 'bike']

	index	Model	Images	Instances	P	R	mAP50	mAP50-95
2	bike	rgb_nano	1085	193	0.200	0.197	0.102	0.044
14	bike	rgb_small	1085	193	0.247	0.321	0.239	0.121
26	bike	ir_nano	1144	170	0.288	0.253	0.222	0.111
38	bike	ir_small	1144	170	0.369	0.353	0.309	0.174
50	bike	combined_nano	2229	363	0.281	0.267	0.239	0.130
62	bike	combined_small	2229	363	0.346	0.383	0.351	0.198

plt.figure(figsize=(8, 5))
plt.title('Average Precision by Class')
sns.set(style='darkgrid')
sns.barplot(
    data=combined_df[combined_df['index'] == 'bike'],
    x='index',
    y='mAP50',
    errorbar='sd',
    hue='Model',
    palette='seismic'
)
plt.legend(bbox_to_anchor=(1.01,1.01))

Training the YOLOv8 Model (RGB)

combined_df[combined_df['index'] == 'truck']

	index	Model	Images	Instances	P	R	mAP50	mAP50-95
6	truck	rgb_nano	1085	2190	0.458	0.197	0.205	0.0686
18	truck	rgb_small	1085	2190	0.600	0.315	0.333	0.1200
30	truck	ir_nano	1144	2048	0.462	0.256	0.271	0.1040
42	truck	ir_small	1144	2048	0.608	0.386	0.418	0.1780
54	truck	combined_nano	2229	4238	0.493	0.237	0.256	0.0960
66	truck	combined_small	2229	4238	0.590	0.391	0.407	0.1670

plt.figure(figsize=(8, 5))
plt.title('Average Precision by Class')
sns.set(style='darkgrid')
sns.barplot(
    data=combined_df[combined_df['index'] == 'truck'],
    x='index',
    y='mAP50',
    errorbar='sd',
    hue='Model',
    palette='seismic'
)
plt.legend(bbox_to_anchor=(1.01,1.01))

Training the YOLOv8 Model (RGB)

Evaluate Bounding Boxes

# read images
images = glob('./datasets/images_combined_val/images/\*.jpg')
print(len(images))

2229

# select image
img = cv.imread(images[0])
height, width, _ = img.shape

print(images[0][38:-4], height, width)

video-57kWWRyeqqHs3Byei-frame-000816-b6tuLjNco8MfoBs3d 512 640

# select label file
path = './datasets/images_combined_val/labels/' + images[0][38:-4] + '.txt'
labels = open(path, 'r')

data = labels.readlines()
labels.close()

print(data)

['9 0.696875 0.375000 0.025000 0.039062\n', '9 0.696094 0.314453 0.032813 0.082031\n', '9 0.168750 0.395508 0.040625 0.013672\n', '0 0.464063 0.457031 0.009375 0.027344\n', '0 0.004688 0.491211 0.009375 0.041016\n', '0 0.165625 0.489258 0.009375 0.033203\n', '1 0.316406 0.500000 0.014063 0.035156\n', '2 0.600781 0.511719 0.117188 0.113281\n', '2 0.524219 0.481445 0.060938 0.064453\n', '2 0.481250 0.469727 0.037500 0.033203\n', '2 0.426563 0.454102 0.015625 0.017578\n', '2 0.412500 0.463867 0.015625 0.025391\n', '2 0.376563 0.474609 0.018750 0.023438\n', '2 0.364063 0.477539 0.021875 0.033203\n', '2 0.342188 0.477539 0.034375 0.041016\n', '0 0.315625 0.483398 0.021875 0.044922\n', '8 0.105469 0.500977 0.007812 0.021484\n']

# create one colour for every COCO class
colours = []
number_colours=80

for j in range(number_colours):
    colour = np.random.randint(0,255),np.random.randint(0,255),np.random.randint(0,255)
    colours.append(colour)

print(len(colours),colours)

80 [(129, 83, 161), (220, 116, 220), (47, 113, 141), (185, 137, 77), (212, 208, 251), (36, 83, 204), (4, 40, 112), (61, 18, 39), (25, 132, 21), (239, 67, 234), (140, 253, 52), (207, 196, 72), (144, 32, 112), (138, 29, 227), (101, 17, 45), (102, 118, 7), (210, 51, 160), (59, 158, 131), (37, 145, 69), (68, 56, 71), (28, 96, 25), (72, 189, 118), (190, 67, 118), (152, 48, 33), (153, 138, 248), (218, 94, 242), (236, 229, 215), (133, 186, 102), (33, 198, 167), (223, 32, 103), (16, 209, 160), (83, 89, 91), (194, 46, 110), (243, 47, 47), (187, 11, 41), (193, 188, 6), (107, 119, 230), (116, 118, 109), (65, 155, 110), (12, 151, 145), (135, 138, 197), (43, 19, 174), (52, 203, 214), (72, 178, 172), (10, 247, 17), (108, 90, 185), (134, 29, 207), (217, 96, 179), (2, 38, 161), (245, 175, 254), (254, 57, 175), (84, 184, 46), (249, 195, 60), (246, 67, 127), (51, 89, 138), (12, 162, 182), (176, 89, 187), (165, 40, 110), (141, 76, 226), (245, 187, 119), (47, 237, 138), (173, 176, 50), (49, 101, 36), (171, 235, 78), (125, 105, 250), (123, 83, 13), (18, 47, 133), (196, 102, 109), (234, 204, 106), (55, 110, 131), (116, 209, 240), (147, 203, 253), (115, 246, 60), (17, 245, 112), (50, 250, 19), (254, 233, 18), (122, 211, 221), (229, 12, 236), (86, 169, 186), (13, 189, 38)]

Show Labels

index = 0

for line in data:

    # Split string to float
    _, x, y, w, h = map(float, line.split(' '))
    
    l = int((x - w / 2) * width)
    r = int((x + w / 2) * width)
    t = int((y - h / 2) * height)
    b = int((y + h / 2) * height)
    
    if l < 0:
        l = 0
    if r > width - 1:
        r = width - 1
    if t < 0:
        t = 0
    if b > height - 1:
        b = height - 1

    image = cv.rectangle(img, (l, t), (r, b), colours[index], thickness=2)
    index += 1

plt.title('Image with original Labels')
plt.axis('off')
plt.imshow(image)
plt.show()

Training the YOLOv8 Model (RGB)

Show Predictions

# Load the best model
backbone_combined_small = YOLO('./runs/detect/backbone_combined_small.torchscript')

# Run batched inference on a list of images
results = backbone_combined_small(img)  # return a list of Results objects

for r in results:
    im_array = r.plot()  # plot a BGR numpy array of predictions
    im = Image.fromarray(im_array[..., ::-1])  # RGB PIL image
    im.show()  # show image
    im.save('results.jpg')  # save image

0: 640x640 5 persons, 1 bike, 11 cars, 3 signs, 16.8ms Speed: 8.9ms preprocess, 16.8ms inference, 1.1ms postprocess per image at shape (1, 3, 640, 640)

Training the YOLOv8 Model (RGB)

Show Labels

# get another image
img = cv.imread(images[1337])
height, width, _ = img.shape

# get labels
path = './datasets/images_combined_val/labels/' + images[1337][38:-4] + '.txt'
labels = open(path, 'r')

data = labels.readlines()
labels.close()

index = 0

for line in data:

    # Split string to float
    _, x, y, w, h = map(float, line.split(' '))
    
    l = int((x - w / 2) * width)
    r = int((x + w / 2) * width)
    t = int((y - h / 2) * height)
    b = int((y + h / 2) * height)
    
    if l < 0:
        l = 0
    if r > width - 1:
        r = width - 1
    if t < 0:
        t = 0
    if b > height - 1:
        b = height - 1

    image = cv.rectangle(img, (l, t), (r, b), colours[index], thickness=2)
    index += 1

plt.title('Image with original Labels')
plt.axis('off')
plt.imshow(image)
plt.show()

Training the YOLOv8 Model (RGB)

Show Predictions

# Run batched inference on a list of images
results = backbone_combined_small(img)  # return a list of Results objects

for r in results:
    im_array = r.plot()  # plot a BGR numpy array of predictions
    im = Image.fromarray(im_array[..., ::-1])  # RGB PIL image
    im.show()  # show image
    im.save('results.jpg')  # save image

0: 640x640 2 persons, 1 car, 7 trucks, 2 signs, 17.7ms Speed: 12.7ms preprocess, 17.7ms inference, 1.4ms postprocess per image at shape (1, 3, 640, 640)

Training the YOLOv8 Model (RGB)

Show Labels

# get another image
img = cv.imread(images[666])
height, width, _ = img.shape

# get labels
path = './datasets/images_combined_val/labels/' + images[666][38:-4] + '.txt'
labels = open(path, 'r')

data = labels.readlines()
labels.close()

index = 0

for line in data:

    # Split string to float
    _, x, y, w, h = map(float, line.split(' '))
    
    l = int((x - w / 2) * width)
    r = int((x + w / 2) * width)
    t = int((y - h / 2) * height)
    b = int((y + h / 2) * height)
    
    if l < 0:
        l = 0
    if r > width - 1:
        r = width - 1
    if t < 0:
        t = 0
    if b > height - 1:
        b = height - 1

    image = cv.rectangle(img, (l, t), (r, b), colours[index], thickness=2)
    index += 1

plt.title('Image with original Labels')
plt.axis('off')
plt.imshow(image)
plt.show()

Training the YOLOv8 Model (RGB)

Show Predictions

# Run batched inference on a list of images
results = backbone_combined_small(img)  # return a list of Results objects

for r in results:
    im_array = r.plot()  # plot a BGR numpy array of predictions
    im = Image.fromarray(im_array[..., ::-1])  # RGB PIL image
    im.show()  # show image
    im.save('results.jpg')  # save image

0: 640x640 2 persons, 3 bikes, 7 cars, 1 sign, 16.9ms Speed: 13.9ms preprocess, 16.9ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)

Training the YOLOv8 Model (RGB)