- YOLOv8 Nightshift
YOLOv8 Nightshift
Dataset
Teledyne FLIR Free ADAS Thermal Dataset v2: The Teledyne FLIR free starter thermal dataset provides fully annotated thermal and visible spectrum frames for development of object detection neural networks. This data was constructed to encourage research on visible + thermal spectrum sensor fusion algorithms ("RGBT") in order to advance the safety of autonomous vehicles. A total of 26,442 fully-annotated frames are included with 15 different object classes.
Baseline Model: Baseline accuracy for object detection was established using the YOLOX-m neural network designed for 640 X 640 images. Both the RGB and thermal detectors were pre-trained on MSCOCO data (YOLOX: Exceeding YOLO Series in 2021 and YOLOX). The base neural networks were trained on the training set data provided in this dataset and tested on the video test data also provided in this dataset.
from glob import glob
import json
import matplotlib.pyplot as plt
import numpy as np
import os
import shutil
from tqdm import tqdm
Dataset Exploration
# read in dataset
images_themal = glob('./datasets/video_thermal_test/images/*.jpg')
images_rgb = glob('./datasets/video_rgb_test/images/*.jpg')
print(len(images_themal), len(images_rgb))
3749 3749
# plot multiple random thermal images
ran_gen = np.random.default_rng()
plt.figure(figsize=(16, 14))
plt.suptitle('Thermal Images')
for i in range(12):
ax = plt.subplot(4, 4, i+1)
random_index = ran_gen.integers(low=0, high=3748, size=1)
i = random_index[0]
img_loc = images_themal[i]
img_title = 'video: ' + images_themal[i][-52:-35]+'\n'+ 'frame: ' + images_themal[i][-28:-22]+'\n'+ 'id: ' + images_themal[i][-21:-4]
image = plt.imread(img_loc)
plt.imshow(image, cmap=plt.cm.binary)
plt.title(img_title, fontsize='small')
plt.axis(False)
# plot multiple random rgb images
ran_gen = np.random.default_rng()
plt.figure(figsize=(16, 14))
plt.suptitle('RGB Images')
for i in range(12):
ax = plt.subplot(4, 4, i+1)
random_index = ran_gen.integers(low=0, high=3748, size=1)
i = random_index[0]
img_loc = images_rgb[i]
img_title = 'video: ' + images_rgb[i][-52:-35]+'\n'+ 'frame: ' + images_rgb[i][-28:-22]+'\n'+ 'id: ' + images_rgb[i][-21:-4]
image = plt.imread(img_loc)
plt.imshow(image, cmap=plt.cm.binary)
plt.title(img_title, fontsize='small')
plt.axis(False)
Label Conversion JSON2YOLO
"file_name": "data/video-BzZspxAweF8AnKhWK-frame-000745-SSCRtAHcFjphNPczJ.jpg",
->"file_name": "video-BzZspxAweF8AnKhWK-frame-000745-SSCRtAHcFjphNPczJ.jpg",
YOLOv8 expects all images to be located in an images
dir and the txt format annotation in a labels
folder next to it. The dataset was using a dirname of data
for all images and had COCO JSON annotations. I renamed the folder, created the missing one and removed the "data/" from all the filenames in the JSON file. Now I am able to run a conversion:
def make_folders(output_path):
if os.path.exists(output_path):
shutil.rmtree(output_path)
os.makedirs(output_path)
return output_path
def convert_bbox_coco2yolo(img_width, img_height, bbox):
"""
Convert bounding box from COCO format to YOLO format
Parameters
----------
img_width : int
width of image
img_height : int
height of image
bbox : list[int]
bounding box annotation in COCO format:
[top left x position, top left y position, width, height]
Returns
-------
list[float]
bounding box annotation in YOLO format:
[x_center_rel, y_center_rel, width_rel, height_rel]
"""
# YOLO bounding box format: [x_center, y_center, width, height]
# (float values relative to width and height of image)
x_tl, y_tl, w, h = bbox
dw = 1.0 / img_width
dh = 1.0 / img_height
x_center = x_tl + w / 2.0
y_center = y_tl + h / 2.0
x = x_center * dw
y = y_center * dh
w = w * dw
h = h * dh
return [x, y, w, h]
def convert_coco_json_to_yolo_txt(output_path, json_file):
path = make_folders(output_path)
with open(json_file) as f:
json_data = json.load(f)
# write _darknet.labels, which holds names of all classes (one class per line)
label_file = os.path.join(output_path, "_darknet.labels")
with open(label_file, "w") as f:
for category in tqdm(json_data["categories"], desc="Categories"):
category_name = category["name"]
f.write(f"{category_name}\n")
for image in tqdm(json_data["images"], desc="Annotation txt for each iamge"):
img_id = image["id"]
img_name = image["file_name"]
img_width = image["width"]
img_height = image["height"]
anno_in_image = [anno for anno in json_data["annotations"] if anno["image_id"] == img_id]
anno_txt = os.path.join(output_path, img_name.split(".")[0] + ".txt")
with open(anno_txt, "w") as f:
for anno in anno_in_image:
category = anno["category_id"]
bbox_COCO = anno["bbox"]
x, y, w, h = convert_bbox_coco2yolo(img_width, img_height, bbox_COCO)
f.write(f"{category} {x:.6f} {y:.6f} {w:.6f} {h:.6f}\n")
print("Converting COCO Json to YOLO txt finished!")
Video RGB Test Dataset
convert_coco_json_to_yolo_txt("./datasets/video_rgb_test/labels", "./datasets/video_rgb_test/coco.json")
Categories: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████| 16/16 [00:00, 253241.00it/s] Annotation txt for each iamge: 100%|███████████████████████████████████████████████████████████████████████████████| 3749/3749 [00:38, 98.23it/s]
Converting COCO Json to YOLO txt finished!
Video Thermal Test Dataset
convert_coco_json_to_yolo_txt("./datasets/video_thermal_test/labels", "./datasets/video_thermal_test/coco.json")
Categories: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████| 16/16 [00:00, 430185.03it/s] Annotation txt for each iamge: 100%|██████████████████████████████████████████████████████████████████████████████| 3749/3749 [00:25, 145.99it/s]
Converting COCO Json to YOLO txt finished!
Images RGB Train Dataset
convert_coco_json_to_yolo_txt("./datasets/images_rgb_train/labels", "./datasets/images_rgb_train/coco.json")
Categories: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████| 16/16 [00:00, 175218.97it/s]1060 Annotation txt for each iamge: 100%|█████████████████████████████████████████████████████████████████████████████| 10318/10318 [03:18, 51.86it/s]
Converting COCO Json to YOLO txt finished!
Images Thermal Train Dataset
convert_coco_json_to_yolo_txt("./datasets/images_thermal_train/labels", "./datasets/images_thermal_train/coco.json")
Categories: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████| 16/16 [00:00, 394758.02it/s] Annotation txt for each iamge: 100%|█████████████████████████████████████████████████████████████████████████████| 10742/10742 [03:07, 57.44it/s]
Converting COCO Json to YOLO txt finished!
Images RGB Val Dataset
convert_coco_json_to_yolo_txt("./datasets/images_rgb_val/labels", "./datasets/images_rgb_val/coco.json")
Categories: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████| 16/16 [00:00, 281970.02it/s] Annotation txt for each iamge: 100%|██████████████████████████████████████████████████████████████████████████████| 1085/1085 [00:02, 452.60it/s]
Converting COCO Json to YOLO txt finished!
Images Thermal Val Dataset
convert_coco_json_to_yolo_txt("./datasets/images_thermal_val/labels", "./datasets/images_thermal_val/coco.json")
Categories: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████| 16/16 [00:00, 377016.09it/s] Annotation txt for each iamge: 100%|██████████████████████████████████████████████████████████████ ████████████████| 1144/1144 [00:02, 472.82it/s]
Converting COCO Json to YOLO txt finished!
Dataset Configuration
The coco.yaml
file that came with the dataset contained all 80 COCO classes - I removed all classes that were not part of the annotation and assigned new category_id
's from 0
-15
for the 16 categories. If you want to use the configuration files below to train your YOLO model you need to replace the annotations accordingly - check the ./config
folder.
config/data_thermal.yaml
train: ../images_thermal_train/images
val: ../images_thermal_val/images
test: ../video_thermal_test/images
nc: 16
names: [
'person',
'bike',
'car',
'motor',
'bus',
'train',
'truck',
'light',
'hydrant',
'sign',
'dog',
'deer',
'skateboard',
'stroller',
'scooter',
'other vehicle'
]
config/data_rgb.yaml
train: /opt/app/datasets/images_rgb_train/images
val: /opt/app/datasets/images_rgb_val/images
test: /opt/app/datasets/video_rgb_test/images
nc: 16
names: [
'person',
'bike',
'car',
'motor',
'bus',
'train',
'truck',
'light',
'hydrant',
'sign',
'dog',
'deer',
'skateboard',
'stroller',
'scooter',
'other vehicle'
]
f_rgb = open('./config/images_rgb_val_coco.json') # =>'./datasets/images_rgb_val/coco.json'
f_thermal = open('./config/images_thermal_val_coco.json') # => './datasets/images_thermal_val/coco.json'
# returns JSON object as a dictionary
data_rgb_val = json.load(f_rgb)
data_thermal_val = json.load(f_thermal)
# closing files
f_rgb.close()
f_thermal.close()
f_rgb = open('./config/images_rgb_train_coco.json') # => './datasets/images_rgb_train/coco.json'
f_thermal = open('./config/images_thermal_train_coco.json') # => './datasets/images_thermal_train/coco.json'
# returns JSON object as a dictionary
data_rgb_train = json.load(f_rgb)
data_thermal_train = json.load(f_thermal)
# closing files
f_rgb.close()
f_thermal.close()
f_rgb = open('./config/video_rgb_test_coco.json') # => './datasets/video_rgb_test/coco.json'
f_thermal = open('./config/video_thermal_test_coco.json') # => './datasets/video_thermal_test/coco.json'
# returns JSON object as a dictionary
data_rgb_test = json.load(f_rgb)
data_thermal_test = json.load(f_thermal)
# closing files
f_rgb.close()
f_thermal.close()
# Iterating through the json list - check that all annotations are between 0 and 15
categories = []
for detection in data_rgb_val['annotations']:
categories.append(detection['category_id'])
print(np.unique(categories))
[ 0 1 2 3 5 6 8 9 12 13 15]
categories = []
for detection in data_thermal_val['annotations']:
categories.append(detection['category_id'])
print(np.unique(categories))
[ 0 1 2 3 5 6 8 9 12 13 15]
categories = []
for detection in data_rgb_train['annotations']:
categories.append(detection['category_id'])
print(np.unique(categories))
[ 0 1 2 3 5 6 8 9 12 13 14 15]
categories = []
for detection in data_thermal_train['annotations']:
categories.append(detection['category_id'])
print(np.unique(categories))
[ 0 1 2 3 5 6 8 9 10 11 12 13 14 15]
categories = []
for detection in data_rgb_test['annotations']:
categories.append(detection['category_id'])
print(np.unique(categories))
[ 0 1 2 3 6 7 8 9 10 15]
categories = []
for detection in data_thermal_test['annotations']:
categories.append(detection['category_id'])
print(np.unique(categories))
[ 0 1 2 3 6 8 9 10 15]
Training the YOLOv8 Model (RGB / IR)
# missing yolo dep
!pip install lapx>=0.5.2
[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv[0m[33m [0m
import cv2 as cv
from glob import glob
import matplotlib.pyplot as plt
import os
import random
from ultralytics import YOLO
YOLOv8 Nano (RGB)
# unzip downloaded dataset to `./datasets`
dataset_rgb = 'datasets/data_rgb.yaml'
# load a model
backbone_nano = YOLO("yolov8n.yaml") # build a new model from scratch
Model Training
import json
# Opening JSON file
f = open('./datasets/images_rgb_train/coco.json')
# returns JSON object as
# a dictionary
data = json.load(f)
# Iterating through the json
list = []
for i in data['annotations']:
list.append(i['category_id'])
len(list)
169174
import numpy as np
print(np.unique(list))
# Closing file
f.close()
[ 0 1 2 3 5 6 8 9 12 13 14 15]
# Train the model
results_n = backbone_nano.train(data=dataset_rgb, epochs=20)
20 epochs completed in 1.521 hours.
Epoch | GPU_mem | box_loss | cls_loss | dfl_loss | Instances | Size |
---|---|---|---|---|---|---|
10/20 | 3.07G | 1.829 | 1.375 | 1.254 | 328 | |
Class | Images | Instances | P | R | mAP50 | mAP50-95 |
all | 1085 | 16909 | 0.525 | 0.16 | 0.156 | 0.077 |
Epoch | GPU_mem | box_loss | cls_loss | dfl_loss | Instances | Size |
---|---|---|---|---|---|---|
20/20 | 2.63G | 1.595 | 1.117 | 1.146 | 223 | |
Class | Images | Instances | P | R | mAP50 | mAP50-95 |
all | 1085 | 16909 | 0.579 | 0.185 | 0.196 | 0.102 |
YOLOv8n summary (fused): 168 layers, 3008768 parameters, 0 gradients
Class | Images | Instances | P | R | mAP50 | mAP50-95 |
---|---|---|---|---|---|---|
all | 1085 | 16909 | 0.578 | 0.186 | 0.196 | 0.102 |
person | 1085 | 3223 | 0.501 | 0.375 | 0.389 | 0.167 |
bike | 1085 | 193 | 0.201 | 0.197 | 0.101 | 0.0438 |
car | 1085 | 7285 | 0.662 | 0.575 | 0.621 | 0.397 |
motor | 1085 | 77 | 0.418 | 0.26 | 0.298 | 0.164 |
train | 1085 | 183 | 0.458 | 0.246 | 0.253 | 0.153 |
truck | 1085 | 2190 | 0.458 | 0.198 | 0.206 | 0.0686 |
hydrant | 1085 | 126 | 0.744 | 0.0232 | 0.0797 | 0.0265 |
sign | 1085 | 3581 | 0.564 | 0.143 | 0.171 | 0.0824 |
skateboard | 1085 | 4 | 1 | 0 | 0 | 0 |
stroller | 1085 | 7 | 1 | 0 | 0.018 | 0.0144 |
other vehicle | 1085 | 40 | 0.348 | 0.025 | 0.0231 | 0.00793 |
Speed: 0.2ms preprocess, 4.2ms inference, 0.0ms loss, 0.6ms postprocess per image
plt.figure(figsize=(24, 14))
im_batch0_labels = plt.imread('./assets/backbone_rgb_nano_val_batch0_labels.webp')
im_batch0_pred = plt.imread('./assets/backbone_rgb_nano_val_batch0_pred.webp')
im_batch1_labels = plt.imread('./assets/backbone_rgb_nano_val_batch1_labels.webp')
im_batch1_pred = plt.imread('./assets/backbone_rgb_nano_val_batch1_pred.webp')
im_batch2_labels = plt.imread('./assets/backbone_rgb_nano_val_batch2_labels.webp')
im_batch2_pred = plt.imread('./assets/backbone_rgb_nano_val_batch2_pred.webp')
ax = plt.subplot(2, 3, 1)
plt.title('batch0_labels')
plt.imshow(im_batch0_labels)
plt.axis("off")
ax = plt.subplot(2, 3, 2)
plt.title('batch1_labels')
plt.imshow(im_batch1_labels)
plt.axis("off")
ax = plt.subplot(2, 3, 3)
plt.title('batch2_labels')
plt.imshow(im_batch2_labels)
plt.axis("off")
ax = plt.subplot(2, 3, 4)
plt.title('batch0_pred')
plt.imshow(im_batch0_pred)
plt.axis("off")
ax = plt.subplot(2, 3, 5)
plt.title('batch1_pred')
plt.imshow(im_batch1_pred)
plt.axis("off")
ax = plt.subplot(2, 3, 6)
plt.title('batch2_pred')
plt.imshow(im_batch2_pred)
plt.axis("off")
(-0.5, 1919.5, 1647.5, -0.5)
Model Evaluation
# Evaluate the model's performance on the validation set
results_n = backbone_nano.val()
Class | Images | Instances | P | R | mAP50 | mAP50-95 |
---|---|---|---|---|---|---|
all | 1085 | 16909 | 0.578 | 0.185 | 0.198 | 0.104 |
person | 1085 | 3223 | 0.505 | 0.375 | 0.391 | 0.167 |
bike | 1085 | 193 | 0.2 | 0.197 | 0.102 | 0.044 |
car | 1085 | 7285 | 0.663 | 0.574 | 0.621 | 0.398 |
motor | 1085 | 77 | 0.419 | 0.26 | 0.3 | 0.166 |
train | 1085 | 183 | 0.455 | 0.246 | 0.252 | 0.155 |
truck | 1085 | 2190 | 0.458 | 0.197 | 0.205 | 0.0686 |
hydrant | 1085 | 126 | 0.741 | 0.023 | 0.0801 | 0.0274 |
sign | 1085 | 3581 | 0.563 | 0.142 | 0.17 | 0.0824 |
skateboard | 1085 | 4 | 1 | 0 | 0 | 0 |
stroller | 1085 | 7 | 1 | 0 | 0.0353 | 0.023 |
other vehicle | 1085 | 40 | 0.355 | 0.025 | 0.0231 | 0.00793 |
Speed: 0.3ms preprocess, 5.0ms inference, 0.0ms loss, 0.7ms postprocess per image
# Export the model to ONNX format
# success = backbone_nano.export(imgsz=(640, 480), format='onnx', opset=12, optimize=False, half=False)
# Export to PyTorch format
success = backbone_nano.export(imgsz=640, format='torchscript', optimize=False, half=False, int8=False)
# TorchScript: export success ✅ 1.3s, saved as 'runs/detect/train4/weights/best.torchscript' (11.9 MB)
# pick pre-trained model
n_model = YOLO('runs/detect/train6/weights/best.torchscript')
# read video by index
video = cv.VideoCapture(videos[1])
ret, frame = video.read()
# get video dims
frame_width = int(video.get(3))
frame_height = int(video.get(4))
size = (frame_width, frame_height)
# Define the codec and create VideoWriter object
fourcc = cv.VideoWriter_fourcc(*'DIVX')
out = cv.VideoWriter('./outputs/backbone_nano_rgb.avi', fourcc, 20.0, size)
# read frames
ret = True
while ret:
ret, frame = video.read()
if ret:
# detect & track objects
results = np_model.track(frame, persist=True)
# plot results
composed = results[0].plot()
# save video
out.write(composed)
out.release()
video.release()
YOLOv8 Small (RGB)
# unzip downloaded dataset to `./datasets`
dataset_rgb = 'datasets/data_rgb.yaml'
# load a model
backbone_small = YOLO("yolov8s.yaml") # build a new model from scratch
Model Training
# Train the model
results_s = backbone_small.train(data=dataset_rgb, epochs=20)
20 epochs completed in 2.438 hours.
Epoch | GPU_mem | box_loss | cls_loss | dfl_loss | Instances | Size |
---|---|---|---|---|---|---|
10/20 | 4.84G | 1.569 | 1.098 | 1.195 | 328 | |
Class | Images | Instances | P | R | mAP50 | mAP50-95 |
all | 1085 | 16909 | 0.596 | 0.211 | 0.245 | 0.128 |
Epoch | GPU_mem | box_loss | cls_loss | dfl_loss | Instances | Size |
---|---|---|---|---|---|---|
20/20 | 4.67G | 1.367 | 0.8879 | 1.083 | 223 | |
Class | Images | Instances | P | R | mAP50 | mAP50-95 |
all | 1085 | 16909 | 0.608 | 0.25 | 0.291 | 0.158 |
YOLOv8s summary (fused): 168 layers, 11131776 parameters, 0 gradients
Class | Images | Instances | P | R | mAP50 | mAP50-95 |
---|---|---|---|---|---|---|
all | 1085 | 16909 | 0.523 | 0.255 | 0.291 | 0.159 |
person | 1085 | 3223 | 0.618 | 0.428 | 0.481 | 0.225 |
bike | 1085 | 193 | 0.248 | 0.326 | 0.239 | 0.121 |
car | 1085 | 7285 | 0.718 | 0.63 | 0.683 | 0.454 |
motor | 1085 | 77 | 0.566 | 0.338 | 0.382 | 0.22 |
train | 1085 | 183 | 0.577 | 0.344 | 0.409 | 0.276 |
truck | 1085 | 2190 | 0.593 | 0.318 | 0.336 | 0.119 |
hydrant | 1085 | 126 | 0.8 | 0.175 | 0.293 | 0.129 |
sign | 1085 | 3581 | 0.632 | 0.243 | 0.291 | 0.149 |
skateboard | 1085 | 4 | 0 | 0 | 0 | 0 |
stroller | 1085 | 7 | 1 | 0 | 0.0687 | 0.0477 |
other vehicle | 1085 | 40 | 0 | 0 | 0.0135 | 0.00526 |
Speed: 0.3ms preprocess, 9.6ms inference, 0.0ms loss, 0.5ms postprocess per image
plt.figure(figsize=(24, 14))
im_batch0_labels = plt.imread('./assets/backbone_rgb_small_val_batch0_labels.webp')
im_batch0_pred = plt.imread('./assets/backbone_rgb_small_val_batch0_pred.webp')
im_batch1_labels = plt.imread('./assets/backbone_rgb_small_val_batch1_labels.webp')
im_batch1_pred = plt.imread('./assets/backbone_rgb_small_val_batch1_pred.webp')
im_batch2_labels = plt.imread('./assets/backbone_rgb_small_val_batch2_labels.webp')
im_batch2_pred = plt.imread('./assets/backbone_rgb_small_val_batch2_pred.webp')
ax = plt.subplot(2, 3, 1)
plt.title('batch0_labels')
plt.imshow(im_batch0_labels)
plt.axis("off")
ax = plt.subplot(2, 3, 2)
plt.title('batch1_labels')
plt.imshow(im_batch1_labels)
plt.axis("off")
ax = plt.subplot(2, 3, 3)
plt.title('batch2_labels')
plt.imshow(im_batch2_labels)
plt.axis("off")
ax = plt.subplot(2, 3, 4)
plt.title('batch0_pred')
plt.imshow(im_batch0_pred)
plt.axis("off")
ax = plt.subplot(2, 3, 5)
plt.title('batch1_pred')
plt.imshow(im_batch1_pred)
plt.axis("off")
ax = plt.subplot(2, 3, 6)
plt.title('batch2_pred')
plt.imshow(im_batch2_pred)
plt.axis("off")
(-0.5, 1919.5, 1647.5, -0.5)
Model Evaluation
# Evaluate the model's performance on the validation set
results_s = backbone_small.val()
Class | Images | Instances | P | R | mAP50 | mAP50-95 |
---|---|---|---|---|---|---|
all | 1085 | 16909 | 0.524 | 0.253 | 0.29 | 0.159 |
person | 1085 | 3223 | 0.622 | 0.428 | 0.481 | 0.225 |
bike | 1085 | 193 | 0.247 | 0.321 | 0.239 | 0.121 |
car | 1085 | 7285 | 0.722 | 0.629 | 0.683 | 0.454 |
motor | 1085 | 77 | 0.563 | 0.338 | 0.382 | 0.219 |
train | 1085 | 183 | 0.575 | 0.339 | 0.41 | 0.276 |
truck | 1085 | 2190 | 0.6 | 0.315 | 0.333 | 0.12 |
hydrant | 1085 | 126 | 0.8 | 0.175 | 0.292 | 0.129 |
sign | 1085 | 3581 | 0.635 | 0.243 | 0.292 | 0.149 |
skateboard | 1085 | 4 | 0 | 0 | 0 | 0 |
stroller | 1085 | 7 | 1 | 0 | 0.069 | 0.0479 |
other vehicle | 1085 | 40 | 0 | 0 | 0.0136 | 0.00526 |
Speed: 0.4ms preprocess, 10.9ms inference, 0.0ms loss, 0.6ms postprocess per image
# Export the model to ONNX format
# success = backbone_nano.export(imgsz=(640, 480), format='onnx', opset=12, optimize=False, half=False)
# Export to PyTorch format
success = backbone_small.export(imgsz=640, format='torchscript', optimize=False, half=False, int8=False)
TorchScript: export success ✅ 2.1s, saved as 'runs/detect/train5/weights/best.torchscript' (42.9 MB)
YOLOv8 Nano (IR)
# unzip downloaded dataset to `./datasets`
dataset_ir = 'datasets/data_thermal.yaml'
# load a model
backbone_ir_nano = YOLO("yolov8n.yaml") # build a new model from scratch
Model Training
# Train the model
results_ir_n = backbone_ir_nano.train(data=dataset_ir, epochs=20)
20 epochs completed in 1.337 hours.
Epoch | GPU_mem | box_loss | cls_loss | dfl_loss | Instances | Size |
---|---|---|---|---|---|---|
10/20 | 3.33G | 1.746 | 1.263 | 1.211 | 104 | |
Class | Images | Instances | P | R | mAP50 | mAP50-95 |
all | 1144 | 16688 | 0.466 | 0.186 | 0.226 | 0.112 |
Epoch | GPU_mem | box_loss | cls_loss | dfl_loss | Instances | Size |
---|---|---|---|---|---|---|
20/20 | 2.5G | 1.518 | 1.016 | 1.111 | 102 | |
Class | Images | Instances | P | R | mAP50 | mAP50-95 |
all | 1144 | 16688 | 0.513 | 0.249 | 0.276 | 0.146 |
YOLOv8n summary (fused): 168 layers, 3008768 parameters, 0 gradients
Class | Images | Instances | P | R | mAP50 | mAP50-95 |
---|---|---|---|---|---|---|
all | 1144 | 16688 | 0.514 | 0.249 | 0.276 | 0.146 |
person | 1144 | 4470 | 0.628 | 0.555 | 0.594 | 0.276 |
bike | 1144 | 170 | 0.278 | 0.25 | 0.219 | 0.11 |
car | 1144 | 7128 | 0.691 | 0.65 | 0.71 | 0.449 |
motor | 1144 | 55 | 0.569 | 0.364 | 0.39 | 0.19 |
train | 1144 | 179 | 0.741 | 0.383 | 0.455 | 0.284 |
truck | 1144 | 2048 | 0.467 | 0.259 | 0.274 | 0.105 |
hydrant | 1144 | 94 | 0.678 | 0.0638 | 0.12 | 0.0535 |
sign | 1144 | 2472 | 0.557 | 0.2 | 0.255 | 0.132 |
skateboard | 1144 | 3 | 0 | 0 | 0 | 0 |
stroller | 1144 | 6 | 1 | 0 | 0 | 0 |
other vehicle | 1144 | 63 | 0.0423 | 0.0159 | 0.0194 | 0.00652 |
Speed: 0.3ms preprocess, 3.8ms inference, 0.0ms loss, 0.6ms postprocess per image
plt.figure(figsize=(24, 14))
im_batch0_labels = plt.imread('./assets/backbone_ir_nano_val_batch0_labels.webp')
im_batch0_pred = plt.imread('./assets/backbone_ir_nano_val_batch0_pred.webp')
im_batch1_labels = plt.imread('./assets/backbone_ir_nano_val_batch1_labels.webp')
im_batch1_pred = plt.imread('./assets/backbone_ir_nano_val_batch1_pred.webp')
im_batch2_labels = plt.imread('./assets/backbone_ir_nano_val_batch2_labels.webp')
im_batch2_pred = plt.imread('./assets/backbone_ir_nano_val_batch2_pred.webp')
ax = plt.subplot(2, 3, 1)
plt.title('batch0_labels')
plt.imshow(im_batch0_labels)
plt.axis("off")
ax = plt.subplot(2, 3, 2)
plt.title('batch1_labels')
plt.imshow(im_batch1_labels)
plt.axis("off")
ax = plt.subplot(2, 3, 3)
plt.title('batch2_labels')
plt.imshow(im_batch2_labels)
plt.axis("off")
ax = plt.subplot(2, 3, 4)
plt.title('batch0_pred')
plt.imshow(im_batch0_pred)
plt.axis("off")
ax = plt.subplot(2, 3, 5)
plt.title('batch1_pred')
plt.imshow(im_batch1_pred)
plt.axis("off")
ax = plt.subplot(2, 3, 6)
plt.title('batch2_pred')
plt.imshow(im_batch2_pred)
plt.axis("off")
Model Evaluation
# Evaluate the model's performance on the validation set
results_ir_n = backbone_ir_nano.val()
Class | Images | Instances | P | R | mAP50 | mAP50-95 |
---|---|---|---|---|---|---|
all | 1144 | 16688 | 0.516 | 0.249 | 0.276 | 0.146 |
person | 1144 | 4470 | 0.631 | 0.556 | 0.595 | 0.276 |
bike | 1144 | 170 | 0.288 | 0.253 | 0.222 | 0.111 |
car | 1144 | 7128 | 0.696 | 0.65 | 0.711 | 0.449 |
motor | 1144 | 55 | 0.57 | 0.364 | 0.39 | 0.189 |
train | 1144 | 179 | 0.746 | 0.378 | 0.455 | 0.283 |
truck | 1144 | 2048 | 0.462 | 0.256 | 0.271 | 0.104 |
hydrant | 1144 | 94 | 0.679 | 0.0638 | 0.12 | 0.0526 |
sign | 1144 | 2472 | 0.557 | 0.199 | 0.256 | 0.132 |
skateboard | 1144 | 3 | 0 | 0 | 0 | 0 |
stroller | 1144 | 6 | 1 | 0 | 0 | 0 |
other vehicle | 1144 | 63 | 0.0425 | 0.0159 | 0.0193 | 0.00637 |
Speed: 0.3ms preprocess, 4.5ms inference, 0.0ms loss, 0.7ms postprocess per image
# Export the model to ONNX format
# success = backbone_nano.export(imgsz=(640, 480), format='onnx', opset=12, optimize=False, half=False)
# Export to PyTorch format
success = backbone_ir_nano.export(imgsz=640, format='torchscript', optimize=False, half=False, int8=False)
# TorchScript: export success ✅ 1.6s, saved as 'runs/detect/train6/weights/best.torchscript' (12.4 MB)
YOLOv8 Small (IR)
# unzip downloaded dataset to `./datasets`
dataset_ir = 'datasets/data_thermal.yaml'
# load a model
backbone_ir_small = YOLO("yolov8s.yaml") # build a new model from scratch
Model Training
# Train the model
results_ir_s = backbone_ir_small.train(data=dataset_ir, epochs=20)
20 epochs completed in 2.827 hours.
Epoch | GPU_mem | box_loss | cls_loss | dfl_loss | Instances | Size |
---|---|---|---|---|---|---|
10/20 | 4.83G | 1.508 | 1.018 | 1.16 | 104 | |
Class | Images | Instances | P | R | mAP50 | mAP50-95 |
all | 1144 | 16688 | 0.489 | 0.286 | 0.313 | 0.168 |
Epoch | GPU_mem | box_loss | cls_loss | dfl_loss | Instances | Size |
---|---|---|---|---|---|---|
20/20 | 4.67G | 1.317 | 0.8207 | 1.064 | 102 | |
Class | Images | Instances | P | R | mAP50 | mAP50-95 |
all | 1144 | 16688 | 0.554 | 0.322 | 0.358 | 0.2 |
YOLOv8s summary (fused): 168 layers, 11131776 parameters, 0 gradients
Class | Images | Instances | P | R | mAP50 | mAP50-95 |
---|---|---|---|---|---|---|
all | 1144 | 16688 | 0.554 | 0.322 | 0.358 | 0.2 |
person | 1144 | 4470 | 0.687 | 0.634 | 0.688 | 0.355 |
bike | 1144 | 170 | 0.364 | 0.347 | 0.308 | 0.174 |
car | 1144 | 7128 | 0.74 | 0.725 | 0.781 | 0.527 |
motor | 1144 | 55 | 0.608 | 0.509 | 0.552 | 0.25 |
train | 1144 | 179 | 0.683 | 0.419 | 0.526 | 0.358 |
truck | 1144 | 2048 | 0.601 | 0.385 | 0.415 | 0.178 |
hydrant | 1144 | 94 | 0.687 | 0.149 | 0.274 | 0.147 |
sign | 1144 | 2472 | 0.608 | 0.313 | 0.362 | 0.195 |
skateboard | 1144 | 3 | 0 | 0 | 0 | 0 |
stroller | 1144 | 6 | 1 | 0 | 0 | 0 |
other vehicle | 1144 | 63 | 0.112 | 0.0635 | 0.0254 | 0.0151 |
Speed: 0.3ms preprocess, 9.6ms inference, 0.0ms loss, 0.6ms postprocess per image
plt.figure(figsize=(24, 14))
im_batch0_labels = plt.imread('./assets/backbone_ir_small_val_batch0_labels.webp')
im_batch0_pred = plt.imread('./assets/backbone_ir_small_val_batch0_pred.webp')
im_batch1_labels = plt.imread('./assets/backbone_ir_small_val_batch1_labels.webp')
im_batch1_pred = plt.imread('./assets/backbone_ir_small_val_batch1_pred.webp')
im_batch2_labels = plt.imread('./assets/backbone_ir_small_val_batch2_labels.webp')
im_batch2_pred = plt.imread('./assets/backbone_ir_small_val_batch2_pred.webp')
ax = plt.subplot(2, 3, 1)
plt.title('batch0_labels')
plt.imshow(im_batch0_labels)
plt.axis("off")
ax = plt.subplot(2, 3, 2)
plt.title('batch1_labels')
plt.imshow(im_batch1_labels)
plt.axis("off")
ax = plt.subplot(2, 3, 3)
plt.title('batch2_labels')
plt.imshow(im_batch2_labels)
plt.axis("off")
ax = plt.subplot(2, 3, 4)
plt.title('batch0_pred')
plt.imshow(im_batch0_pred)
plt.axis("off")
ax = plt.subplot(2, 3, 5)
plt.title('batch1_pred')
plt.imshow(im_batch1_pred)
plt.axis("off")
ax = plt.subplot(2, 3, 6)
plt.title('batch2_pred')
plt.imshow(im_batch2_pred)
plt.axis("off")
Model Evaluation
# Evaluate the model's performance on the validation set
results_ir_s = backbone_ir_small.val()
Class | Images | Instances | P | R | mAP50 | mAP50-95 |
---|---|---|---|---|---|---|
all | 1144 | 16688 | 0.555 | 0.322 | 0.358 | 0.2 |
person | 1144 | 4470 | 0.691 | 0.632 | 0.687 | 0.356 |
bike | 1144 | 170 | 0.369 | 0.353 | 0.309 | 0.174 |
car | 1144 | 7128 | 0.743 | 0.725 | 0.781 | 0.527 |
motor | 1144 | 55 | 0.593 | 0.504 | 0.551 | 0.251 |
train | 1144 | 179 | 0.683 | 0.419 | 0.527 | 0.361 |
truck | 1144 | 2048 | 0.608 | 0.386 | 0.418 | 0.178 |
hydrant | 1144 | 94 | 0.695 | 0.149 | 0.274 | 0.148 |
sign | 1144 | 2472 | 0.614 | 0.313 | 0.362 | 0.195 |
skateboard | 1144 | 3 | 0 | 0 | 0 | 0 |
stroller | 1144 | 6 | 1 | 0 | 0 | 0 |
other vehicle | 1144 | 63 | 0.112 | 0.0635 | 0.0254 | 0.0151 |
# Export the model to ONNX format
# success = backbone_nano.export(imgsz=(640, 480), format='onnx', opset=12, optimize=False, half=False)
# Export to PyTorch format
success = backbone_ir_small.export(imgsz=640, format='torchscript', optimize=False, half=False, int8=False)
# TorchScript: export success ✅ 1.6s, saved as 'runs/detect/train6/weights/best.torchscript' (12.4 MB)
Training the YOLOv8 Mixed Model (RGB + IR)
# missing yolo dep
!pip install lapx>=0.5.2
import cv2 as cv
from glob import glob
import matplotlib.pyplot as plt
import os
import random
from ultralytics import YOLO
YOLOv8 Nano (RGB+IR)
# unzip downloaded dataset to `./datasets`
dataset_combined = 'datasets/data_combined.yaml'
# load a model
backbone_nano = YOLO("yolov8n.yaml") # build a new model from scratch
Model Training
# Train the model
results_n = backbone_nano.train(data=dataset_combined, epochs=20)
20 epochs completed in 2.531 hours.
Epoch | GPU_mem | box_loss | cls_loss | dfl_loss | Instances |
---|---|---|---|---|---|
10/20 | 3.18G | 1.655 | 1.208 | 1.154 | 179 |
Class | Images | Instances | P | R | mAP50 |
all | 2229 | 33597 | 0.545 | 0.208 | 0.226 |
Epoch | GPU_mem | box_loss | cls_loss | dfl_loss | Instances |
---|---|---|---|---|---|
20/20 | 2.6G | 1.458 | 0.9884 | 1.073 | 52 |
Class | Images | Instances | P | R | mAP50 |
all | 2229 | 33597 | 0.52 | 0.242 | 0.272 |
YOLOv8n summary (fused): 168 layers, 3008768 parameters, 0 gradients
Class | Images | Instances | P | R | mAP50 | mAP50-95 |
---|---|---|---|---|---|---|
all | 2229 | 33597 | 0.522 | 0.241 | 0.272 | 0.147 |
person | 2229 | 7693 | 0.624 | 0.48 | 0.526 | 0.253 |
bike | 2229 | 363 | 0.288 | 0.27 | 0.239 | 0.128 |
car | 2229 | 14413 | 0.689 | 0.644 | 0.696 | 0.46 |
motor | 2229 | 132 | 0.611 | 0.364 | 0.419 | 0.193 |
train | 2229 | 362 | 0.675 | 0.344 | 0.425 | 0.281 |
truck | 2229 | 4238 | 0.492 | 0.235 | 0.255 | 0.0951 |
hydrant | 2229 | 220 | 0.688 | 0.0955 | 0.162 | 0.0659 |
sign | 2229 | 6053 | 0.592 | 0.205 | 0.252 | 0.13 |
skateboard | 2229 | 7 | 0 | 0 | 0 | 0 |
stroller | 2229 | 13 | 1 | 0 | 0 | 0 |
other vehicle | 2229 | 103 | 0.0798 | 0.0194 | 0.0232 | 0.00977 |
Speed: 0.2ms preprocess, 4.0ms inference, 0.0ms loss, 0.6ms postprocess per image
plt.figure(figsize=(24, 14))
im_batch0_labels = plt.imread('./assets/backbone_combined_nano_val_batch0_labels.webp')
im_batch0_pred = plt.imread('./assets/backbone_combined_nano_val_batch0_pred.webp')
im_batch1_labels = plt.imread('./assets/backbone_combined_nano_val_batch1_labels.webp')
im_batch1_pred = plt.imread('./assets/backbone_combined_nano_val_batch1_pred.webp')
im_batch2_labels = plt.imread('./assets/backbone_combined_nano_val_batch2_labels.webp')
im_batch2_pred = plt.imread('./assets/backbone_combined_nano_val_batch2_pred.webp')
ax = plt.subplot(2, 3, 1)
plt.title('batch0_labels')
plt.imshow(im_batch0_labels)
plt.axis("off")
ax = plt.subplot(2, 3, 2)
plt.title('batch1_labels')
plt.imshow(im_batch1_labels)
plt.axis("off")
ax = plt.subplot(2, 3, 3)
plt.title('batch2_labels')
plt.imshow(im_batch2_labels)
plt.axis("off")
ax = plt.subplot(2, 3, 4)
plt.title('batch0_pred')
plt.imshow(im_batch0_pred)
plt.axis("off")
ax = plt.subplot(2, 3, 5)
plt.title('batch1_pred')
plt.imshow(im_batch1_pred)
plt.axis("off")
ax = plt.subplot(2, 3, 6)
plt.title('batch2_pred')
plt.imshow(im_batch2_pred)
plt.axis("off")
Model Evaluation
# Evaluate the model's performance on the validation set
results_n = backbone_nano.val()
Ultralytics YOLOv8.0.173 🚀 Python-3.10.11 torch-2.0.1 CUDA:0 (NVIDIA GeForce GTX 1060 6GB, 6070MiB) YOLOv8n summary (fused): 168 layers, 3008768 parameters, 0 gradients [34m[1mval: [0mScanning /opt/app/datasets/images_combined_val/labels.cache... 2229 images, 32 backgrounds, 0 corrupt: 100%|██████████| 2229/2229 , ?it/s][0m Class Images Instances Box(P R mAP50 mAP50-95): 100%|██████████| 140/140 [00:31, 4.47it/s] all 2229 33597 0.519 0.242 0.272 0.147 person 2229 7693 0.623 0.482 0.527 0.253 bike 2229 363 0.281 0.267 0.239 0.13 car 2229 14413 0.686 0.646 0.697 0.461 motor 2229 132 0.607 0.364 0.417 0.193 train 2229 362 0.666 0.348 0.424 0.279 truck 2229 4238 0.493 0.237 0.256 0.096 hydrant 2229 220 0.685 0.0955 0.162 0.0676 sign 2229 6053 0.588 0.205 0.251 0.131 skateboard 2229 7 0 0 0 0 stroller 2229 13 1 0 0 0 other vehicle 2229 103 0.0795 0.0194 0.023 0.00967 Speed: 0.3ms preprocess, 4.5ms inference, 0.0ms loss, 0.6ms postprocess per image Results saved to [1mruns/detect/val5[0m
Class | Images | Instances | P | R | mAP50 | mAP50-95 |
---|---|---|---|---|---|---|
all | 2229 | 33597 | 0.519 | 0.242 | 0.272 | 0.147 |
person | 2229 | 7693 | 0.623 | 0.482 | 0.527 | 0.253 |
bike | 2229 | 363 | 0.281 | 0.267 | 0.239 | 0.13 |
car | 2229 | 14413 | 0.686 | 0.646 | 0.697 | 0.461 |
motor | 2229 | 132 | 0.607 | 0.364 | 0.417 | 0.193 |
train | 2229 | 362 | 0.666 | 0.348 | 0.424 | 0.279 |
truck | 2229 | 4238 | 0.493 | 0.237 | 0.256 | 0.096 |
hydrant | 2229 | 220 | 0.685 | 0.0955 | 0.162 | 0.0676 |
sign | 2229 | 6053 | 0.588 | 0.205 | 0.251 | 0.131 |
skateboard | 2229 | 7 | 0 | 0 | 0 | 0 |
stroller | 2229 | 13 | 1 | 0 | 0 | 0 |
other vehicle | 2229 | 103 | 0.0795 | 0.0194 | 0.023 | 0.00967 |
Speed: 0.3ms preprocess, 4.5ms inference, 0.0ms loss, 0.6ms postprocess per image
# Export the model to ONNX format
# success = backbone_nano.export(imgsz=(640, 480), format='onnx', opset=12, optimize=False, half=False)
# Export to PyTorch format
success = backbone_nano.export(imgsz=640, format='torchscript', optimize=False, half=False, int8=False)
# TorchScript: export success ✅ 1.2s, saved as 'runs/detect/train10/weights/best.torchscript' (11.9 MB)
Ultralytics YOLOv8.0.173 🚀 Python-3.10.11 torch-2.0.1 CPU (Intel Core(TM) i7-7700 3.60GHz)
[34m[1mPyTorch:[0m starting from 'runs/detect/train10/weights/best.pt' with input shape (1, 3, 640, 640) BCHW and output shape(s) (1, 20, 8400) (6.0 MB)
[34m[1mTorchScript:[0m starting export with torch 2.0.1... [34m[1mTorchScript:[0m export success ✅ 1.2s, saved as 'runs/detect/train10/weights/best.torchscript' (11.9 MB)
Export complete (2.5s)
Results saved to [1m/opt/app/runs/detect/train10/weights[0m
Predict: yolo predict task=detect model=runs/detect/train10/weights/best.torchscript imgsz=640
Validate: yolo val task=detect model=runs/detect/train10/weights/best.torchscript imgsz=640 data=datasets/data_combined.yaml
Visualize: https://netron.app
YOLOv8 Small (RGB + IR)
# unzip downloaded dataset to `./datasets`
dataset_combined = 'datasets/data_combined.yaml'
# load a model
backbone_small = YOLO("yolov8s.yaml") # build a new model from scratch
Model Training
# Train the model
results_s = backbone_small.train(data=dataset_combined, epochs=20)
20 epochs completed in 4.965 hours.
Epoch | GPU_mem | box_loss | cls_loss | dfl_loss | Instances |
---|---|---|---|---|---|
10/20 | 4.88G | 1.445 | 0.9915 | 1.085 | 179 |
Class | Images | Instances | P | R | mAP50 |
all | 2229 | 33597 | 0.548 | 0.277 | 0.314 |
Epoch | GPU_mem | box_loss | cls_loss | dfl_loss | Instances |
---|---|---|---|---|---|
20/20 | 4.86G | 1.265 | 0.7992 | 1.011 | 52 |
Class | Images | Instances | P | R | mAP50 |
all | 2229 | 33597 | 0.651 | 0.324 | 0.36 |
YOLOv8s summary (fused): 168 layers, 11131776 parameters, 0 gradients
Class | Images | Instances | P | R | mAP50 | mAP50-95 |
---|---|---|---|---|---|---|
all | 2229 | 33597 | 0.652 | 0.323 | 0.36 | 0.204 |
person | 2229 | 7693 | 0.687 | 0.566 | 0.628 | 0.325 |
bike | 2229 | 363 | 0.35 | 0.383 | 0.353 | 0.199 |
car | 2229 | 14413 | 0.735 | 0.712 | 0.764 | 0.528 |
motor | 2229 | 132 | 0.645 | 0.439 | 0.513 | 0.268 |
train | 2229 | 362 | 0.703 | 0.478 | 0.555 | 0.383 |
truck | 2229 | 4238 | 0.589 | 0.389 | 0.404 | 0.167 |
hydrant | 2229 | 220 | 0.696 | 0.177 | 0.266 | 0.121 |
sign | 2229 | 6053 | 0.62 | 0.329 | 0.372 | 0.205 |
skateboard | 2229 | 7 | 1 | 0 | 0 | 0 |
stroller | 2229 | 13 | 1 | 0 | 0.0386 | 0.0297 |
other vehicle | 2229 | 103 | 0.15 | 0.0777 | 0.0616 | 0.0202 |
Speed: 0.2ms preprocess, 9.1ms inference, 0.0ms loss, 0.5ms postprocess per image
plt.figure(figsize=(24, 14))
im_batch0_labels = plt.imread('./assets/backbone_combined_small_val_batch0_labels.webp')
im_batch0_pred = plt.imread('./assets/backbone_combined_small_val_batch0_pred.webp')
im_batch1_labels = plt.imread('./assets/backbone_combined_small_val_batch1_labels.webp')
im_batch1_pred = plt.imread('./assets/backbone_combined_small_val_batch1_pred.webp')
im_batch2_labels = plt.imread('./assets/backbone_combined_small_val_batch2_labels.webp')
im_batch2_pred = plt.imread('./assets/backbone_combined_small_val_batch2_pred.webp')
ax = plt.subplot(2, 3, 1)
plt.title('batch0_labels')
plt.imshow(im_batch0_labels)
plt.axis("off")
ax = plt.subplot(2, 3, 2)
plt.title('batch1_labels')
plt.imshow(im_batch1_labels)
plt.axis("off")
ax = plt.subplot(2, 3, 3)
plt.title('batch2_labels')
plt.imshow(im_batch2_labels)
plt.axis("off")
ax = plt.subplot(2, 3, 4)
plt.title('batch0_pred')
plt.imshow(im_batch0_pred)
plt.axis("off")
ax = plt.subplot(2, 3, 5)
plt.title('batch1_pred')
plt.imshow(im_batch1_pred)
plt.axis("off")
ax = plt.subplot(2, 3, 6)
plt.title('batch2_pred')
plt.imshow(im_batch2_pred)
plt.axis("off")
Model Evaluation
# Evaluate the model's performance on the validation set
results_s = backbone_small.val()
Ultralytics YOLOv8.0.173 🚀 Python-3.10.11 torch-2.0.1 CUDA:0 (NVIDIA GeForce GTX 1060 6GB, 6070MiB) YOLOv8s summary (fused): 168 layers, 11131776 parameters, 0 gradients [34m[1mval: [0mScanning /opt/app/datasets/images_combined_val/labels.cache... 2229 images, 32 backgrounds, 0 corrupt: 100%|██████████| 2229/2229 , ?it/s][0m Class Images Instances Box(P R mAP50 mAP50-95): 100%|██████████| 140/140 [00:42, 3.28it/s] all 2229 33597 0.651 0.324 0.359 0.204 person 2229 7693 0.687 0.569 0.63 0.326 bike 2229 363 0.346 0.383 0.351 0.198 car 2229 14413 0.734 0.713 0.764 0.529 motor 2229 132 0.633 0.439 0.514 0.267 train 2229 362 0.707 0.481 0.556 0.384 truck 2229 4238 0.59 0.391 0.407 0.167 hydrant 2229 220 0.697 0.178 0.266 0.122 sign 2229 6053 0.617 0.329 0.371 0.205 skateboard 2229 7 1 0 0 0 stroller 2229 13 1 0 0.0323 0.024 other vehicle 2229 103 0.148 0.0777 0.0618 0.0193 Speed: 0.3ms preprocess, 10.3ms inference, 0.0ms loss, 0.7ms postprocess per image Results saved to [1mruns/detect/val6[0m
Class | Images | Instances | P | R | mAP50 | mAP50-95 |
---|---|---|---|---|---|---|
all | 2229 | 33597 | 0.651 | 0.324 | 0.359 | 0.204 |
person | 2229 | 7693 | 0.687 | 0.569 | 0.63 | 0.326 |
bike | 2229 | 363 | 0.346 | 0.383 | 0.351 | 0.198 |
car | 2229 | 14413 | 0.734 | 0.713 | 0.764 | 0.529 |
motor | 2229 | 132 | 0.633 | 0.439 | 0.514 | 0.267 |
train | 2229 | 362 | 0.707 | 0.481 | 0.556 | 0.384 |
truck | 2229 | 4238 | 0.59 | 0.391 | 0.407 | 0.167 |
hydrant | 2229 | 220 | 0.697 | 0.178 | 0.266 | 0.122 |
sign | 2229 | 6053 | 0.617 | 0.329 | 0.371 | 0.205 |
skateboard | 2229 | 7 | 1 | 0 | 0 | 0 |
stroller | 2229 | 13 | 1 | 0 | 0.0323 | 0.024 |
other vehicle | 2229 | 103 | 0.148 | 0.0777 | 0.0618 | 0.0193 |
Speed: 0.3ms preprocess, 10.3ms inference, 0.0ms loss, 0.7ms postprocess per image
# Export the model to ONNX format
# success = backbone_nano.export(imgsz=(640, 480), format='onnx', opset=12, optimize=False, half=False)
# Export to PyTorch format
success = backbone_small.export(imgsz=640, format='torchscript', optimize=False, half=False, int8=False)
# TorchScript: export success ✅ 1.7s, saved as 'runs/detect/train11/weights/best.torchscript' (42.9 MB)
Model Evaluation
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
YOLOv8n & RGB Dataset
data_index = ['all', 'person', 'bike', 'car', 'motor', 'train', 'truck', 'hydrant', 'sign', 'skateboard', 'stroller', 'other vehicle']
data_columns = ['Model', 'Images', 'Instances', 'P', 'R', 'mAP50', 'mAP50-95']
rgb_nano = [
['rgb_nano', 1085, 16909, 0.578, 0.185, 0.198, 0.104],
['rgb_nano', 1085, 3223, 0.505, 0.375, 0.391, 0.167],
['rgb_nano', 1085, 193, 0.2, 0.197, 0.102, 0.044],
['rgb_nano', 1085, 7285, 0.663, 0.574, 0.621, 0.398],
['rgb_nano', 1085, 77, 0.419, 0.26, 0.3, 0.166],
['rgb_nano', 1085, 183, 0.455, 0.246, 0.252, 0.155],
['rgb_nano', 1085, 2190, 0.458, 0.197, 0.205, 0.0686],
['rgb_nano', 1085, 126, 0.741, 0.023, 0.0801, 0.0274],
['rgb_nano', 1085, 3581, 0.563, 0.142, 0.17, 0.0824],
['rgb_nano', 1085, 4, 1, 0, 0, 0],
['rgb_nano', 1085, 7, 1, 0, 0.0353, 0.023],
['rgb_nano', 1085, 40, 0.355, 0.025, 0.0231, 0.00793]
]
rgb_nano_df = pd.DataFrame(rgb_nano, data_index, data_columns)
rgb_nano_df
Model | Images | Instances | P | R | mAP50 | mAP50-95 | |
---|---|---|---|---|---|---|---|
all | rgb_nano | 1085 | 16909 | 0.578 | 0.185 | 0.1980 | 0.10400 |
person | rgb_nano | 1085 | 3223 | 0.505 | 0.375 | 0.3910 | 0.16700 |
bike | rgb_nano | 1085 | 193 | 0.200 | 0.197 | 0.1020 | 0.04400 |
car | rgb_nano | 1085 | 7285 | 0.663 | 0.574 | 0.6210 | 0.39800 |
motor | rgb_nano | 1085 | 77 | 0.419 | 0.260 | 0.3000 | 0.16600 |
train | rgb_nano | 1085 | 183 | 0.455 | 0.246 | 0.2520 | 0.15500 |
truck | rgb_nano | 1085 | 2190 | 0.458 | 0.197 | 0.2050 | 0.06860 |
hydrant | rgb_nano | 1085 | 126 | 0.741 | 0.023 | 0.0801 | 0.02740 |
sign | rgb_nano | 1085 | 3581 | 0.563 | 0.142 | 0.1700 | 0.08240 |
skateboard | rgb_nano | 1085 | 4 | 1.000 | 0.000 | 0.0000 | 0.00000 |
stroller | rgb_nano | 1085 | 7 | 1.000 | 0.000 | 0.0353 | 0.02300 |
other vehicle | rgb_nano | 1085 | 40 | 0.355 | 0.025 | 0.0231 | 0.00793 |
plt.figure(figsize=(16, 5))
plt.title('Average Precision by Class & Instances')
sns.set(style='darkgrid')
sns.barplot(
data=rgb_nano_df.reset_index(),
x='Instances',
y='mAP50',
errorbar='sd',
hue='index',
palette='nipy_spectral'
)
plt.legend(bbox_to_anchor=(1.01,1.01))
YOLOv8s & RGB Dataset
rgb_small = [
['rgb_small', 1085, 16909, 0.524, 0.253, 0.29, 0.159],
['rgb_small', 1085, 3223, 0.622, 0.428, 0.481, 0.225],
['rgb_small', 1085, 193, 0.247, 0.321, 0.239, 0.121],
['rgb_small', 1085, 7285, 0.722, 0.629, 0.683, 0.454],
['rgb_small', 1085, 77, 0.563, 0.338, 0.382, 0.219],
['rgb_small', 1085, 183, 0.575, 0.339, 0.41, 0.276],
['rgb_small', 1085, 2190, 0.6, 0.315, 0.333, 0.12],
['rgb_small', 1085, 126, 0.8, 0.175, 0.292, 0.129],
['rgb_small', 1085, 3581, 0.635, 0.243, 0.292, 0.149],
['rgb_small', 1085, 4, 0, 0, 0, 0],
['rgb_small', 1085, 7, 1, 0, 0.069, 0.0479],
['rgb_small', 1085, 40, 0, 0, 0.0136, 0.00526]
]
rgb_small_df = pd.DataFrame(rgb_small, data_index, data_columns)
rgb_small_df
Model | Images | Instances | P | R | mAP50 | mAP50-95 | |
---|---|---|---|---|---|---|---|
all | rgb_small | 1085 | 16909 | 0.524 | 0.253 | 0.2900 | 0.15900 |
person | rgb_small | 1085 | 3223 | 0.622 | 0.428 | 0.4810 | 0.22500 |
bike | rgb_small | 1085 | 193 | 0.247 | 0.321 | 0.2390 | 0.12100 |
car | rgb_small | 1085 | 7285 | 0.722 | 0.629 | 0.6830 | 0.45400 |
motor | rgb_small | 1085 | 77 | 0.563 | 0.338 | 0.3820 | 0.21900 |
train | rgb_small | 1085 | 183 | 0.575 | 0.339 | 0.4100 | 0.27600 |
truck | rgb_small | 1085 | 2190 | 0.600 | 0.315 | 0.3330 | 0.12000 |
hydrant | rgb_small | 1085 | 126 | 0.800 | 0.175 | 0.2920 | 0.12900 |
sign | rgb_small | 1085 | 3581 | 0.635 | 0.243 | 0.2920 | 0.14900 |
skateboard | rgb_small | 1085 | 4 | 0.000 | 0.000 | 0.0000 | 0.00000 |
stroller | rgb_small | 1085 | 7 | 1.000 | 0.000 | 0.0690 | 0.04790 |
other vehicle | rgb_small | 1085 | 40 | 0.000 | 0.000 | 0.0136 | 0.00526 |
plt.figure(figsize=(16, 5))
plt.title('Average Precision by Class & Instances')
sns.set(style='darkgrid')
sns.barplot(
data=rgb_small_df.reset_index(),
x='Instances',
y='mAP50',
errorbar='sd',
hue='index',
palette='nipy_spectral'
)
plt.legend(bbox_to_anchor=(1.01,1.01))
YOLOv8n & IR Dataset
ir_nano = [
['ir_nano', 1144, 16688, 0.516, 0.249, 0.276, 0.146],
['ir_nano', 1144, 4470, 0.631, 0.556, 0.595, 0.276],
['ir_nano', 1144, 170, 0.288, 0.253, 0.222, 0.111],
['ir_nano', 1144, 7128, 0.696, 0.65, 0.711, 0.449],
['ir_nano', 1144, 55, 0.57, 0.364, 0.39, 0.189],
['ir_nano', 1144, 179, 0.746, 0.378, 0.455, 0.283],
['ir_nano', 1144, 2048, 0.462, 0.256, 0.271, 0.104],
['ir_nano', 1144, 94, 0.679, 0.0638, 0.12, 0.0526],
['ir_nano', 1144, 2472, 0.557, 0.199, 0.256, 0.132],
['ir_nano', 1144, 3, 0, 0, 0, 0],
['ir_nano', 1144, 6, 1, 0, 0, 0],
['ir_nano', 1144, 63, 0.0425, 0.0159, 0.0193, 0.00637]
]
ir_nano_df = pd.DataFrame(ir_nano, data_index, data_columns)
ir_nano_df
Model | Images | Instances | P | R | mAP50 | mAP50-95 | |
---|---|---|---|---|---|---|---|
all | ir_nano | 1144 | 16688 | 0.5160 | 0.2490 | 0.2760 | 0.14600 |
person | ir_nano | 1144 | 4470 | 0.6310 | 0.5560 | 0.5950 | 0.27600 |
bike | ir_nano | 1144 | 170 | 0.2880 | 0.2530 | 0.2220 | 0.11100 |
car | ir_nano | 1144 | 7128 | 0.6960 | 0.6500 | 0.7110 | 0.44900 |
motor | ir_nano | 1144 | 55 | 0.5700 | 0.3640 | 0.3900 | 0.18900 |
train | ir_nano | 1144 | 179 | 0.7460 | 0.3780 | 0.4550 | 0.28300 |
truck | ir_nano | 1144 | 2048 | 0.4620 | 0.2560 | 0.2710 | 0.10400 |
hydrant | ir_nano | 1144 | 94 | 0.6790 | 0.0638 | 0.1200 | 0.05260 |
sign | ir_nano | 1144 | 2472 | 0.5570 | 0.1990 | 0.2560 | 0.13200 |
skateboard | ir_nano | 1144 | 3 | 0.0000 | 0.0000 | 0.0000 | 0.00000 |
stroller | ir_nano | 1144 | 6 | 1.0000 | 0.0000 | 0.0000 | 0.00000 |
other vehicle | ir_nano | 1144 | 63 | 0.0425 | 0.0159 | 0.0193 | 0.00637 |
plt.figure(figsize=(16, 5))
plt.title('Average Precision by Class & Instances')
sns.set(style='darkgrid')
sns.barplot(
data=ir_nano_df.reset_index(),
x='Instances',
y='mAP50',
errorbar='sd',
hue='index',
palette='nipy_spectral'
)
plt.legend(bbox_to_anchor=(1.01,1.01))
YOLOv8s & RGB Dataset
ir_small = [
['ir_small', 1144, 16688, 0.555, 0.322, 0.358, 0.2],
['ir_small', 1144, 4470, 0.691, 0.632, 0.687, 0.356],
['ir_small', 1144, 170, 0.369, 0.353, 0.309, 0.174],
['ir_small', 1144, 7128, 0.743, 0.725, 0.781, 0.527],
['ir_small', 1144, 55, 0.593, 0.504, 0.551, 0.251],
['ir_small', 1144, 179, 0.683, 0.419, 0.527, 0.361],
['ir_small', 1144, 2048, 0.608, 0.386, 0.418, 0.178],
['ir_small', 1144, 94, 0.695, 0.149, 0.274, 0.148],
['ir_small', 1144, 2472, 0.614, 0.313, 0.362, 0.195],
['ir_small', 1144, 3, 0, 0, 0, 0],
['ir_small', 1144, 6, 1, 0, 0, 0],
['ir_small', 1144, 63, 0.112, 0.0635, 0.0254, 0.0151]
]
ir_small_df = pd.DataFrame(ir_small, data_index, data_columns)
ir_small_df
Model | Images | Instances | P | R | mAP50 | mAP50-95 | |
---|---|---|---|---|---|---|---|
all | ir_small | 1144 | 16688 | 0.555 | 0.3220 | 0.3580 | 0.2000 |
person | ir_small | 1144 | 4470 | 0.691 | 0.6320 | 0.6870 | 0.3560 |
bike | ir_small | 1144 | 170 | 0.369 | 0.3530 | 0.3090 | 0.1740 |
car | ir_small | 1144 | 7128 | 0.743 | 0.7250 | 0.7810 | 0.5270 |
motor | ir_small | 1144 | 55 | 0.593 | 0.5040 | 0.5510 | 0.2510 |
train | ir_small | 1144 | 179 | 0.683 | 0.4190 | 0.5270 | 0.3610 |
truck | ir_small | 1144 | 2048 | 0.608 | 0.3860 | 0.4180 | 0.1780 |
hydrant | ir_small | 1144 | 94 | 0.695 | 0.1490 | 0.2740 | 0.1480 |
sign | ir_small | 1144 | 2472 | 0.614 | 0.3130 | 0.3620 | 0.1950 |
skateboard | ir_small | 1144 | 3 | 0.000 | 0.0000 | 0.0000 | 0.0000 |
stroller | ir_small | 1144 | 6 | 1.000 | 0.0000 | 0.0000 | 0.0000 |
other vehicle | ir_small | 1144 | 63 | 0.112 | 0.0635 | 0.0254 | 0.0151 |
plt.figure(figsize=(16, 5))
plt.title('Average Precision by Class & Instances')
sns.set(style='darkgrid')
sns.barplot(
data=ir_small_df.reset_index(),
x='Instances',
y='mAP50',
errorbar='sd',
hue='index',
palette='nipy_spectral'
)
plt.legend(bbox_to_anchor=(1.01,1.01))
YOLOv8n & Combined Dataset
combined_nano = [
['combined_nano', 2229, 33597, 0.519, 0.242, 0.272, 0.147],
['combined_nano', 2229, 7693, 0.623, 0.482, 0.527, 0.253],
['combined_nano', 2229, 363, 0.281, 0.267, 0.239, 0.13],
['combined_nano', 2229, 14413, 0.686, 0.646, 0.697, 0.461],
['combined_nano', 2229, 132, 0.607, 0.364, 0.417, 0.193],
['combined_nano', 2229, 362, 0.666, 0.348, 0.424, 0.279],
['combined_nano', 2229, 4238, 0.493, 0.237, 0.256, 0.096],
['combined_nano', 2229, 220, 0.685, 0.0955, 0.162, 0.0676],
['combined_nano', 2229, 6053, 0.588, 0.205, 0.251, 0.131],
['combined_nano', 2229, 7, 0, 0, 0, 0],
['combined_nano', 2229, 13, 1, 0, 0, 0],
['combined_nano', 2229, 103, 0.0795, 0.0194, 0.023, 0.00967]
]
combined_nano_df = pd.DataFrame(combined_nano, data_index, data_columns)
combined_nano_df
Model | Images | Instances | P | R | mAP50 | mAP50-95 | |
---|---|---|---|---|---|---|---|
all | combined_nano | 2229 | 33597 | 0.5190 | 0.2420 | 0.272 | 0.14700 |
person | combined_nano | 2229 | 7693 | 0.6230 | 0.4820 | 0.527 | 0.25300 |
bike | combined_nano | 2229 | 363 | 0.2810 | 0.2670 | 0.239 | 0.13000 |
car | combined_nano | 2229 | 14413 | 0.6860 | 0.6460 | 0.697 | 0.46100 |
motor | combined_nano | 2229 | 132 | 0.6070 | 0.3640 | 0.417 | 0.19300 |
train | combined_nano | 2229 | 362 | 0.6660 | 0.3480 | 0.424 | 0.27900 |
truck | combined_nano | 2229 | 4238 | 0.4930 | 0.2370 | 0.256 | 0.09600 |
hydrant | combined_nano | 2229 | 220 | 0.6850 | 0.0955 | 0.162 | 0.06760 |
sign | combined_nano | 2229 | 6053 | 0.5880 | 0.2050 | 0.251 | 0.13100 |
skateboard | combined_nano | 2229 | 7 | 0.0000 | 0.0000 | 0.000 | 0.00000 |
stroller | combined_nano | 2229 | 13 | 1.0000 | 0.0000 | 0.000 | 0.00000 |
other vehicle | combined_nano | 2229 | 103 | 0.0795 | 0.0194 | 0.023 | 0.00967 |
plt.figure(figsize=(16, 5))
plt.title('Average Precision by Class & Instances')
sns.set(style='darkgrid')
sns.barplot(
data=combined_nano_df.reset_index(),
x='Instances',
y='mAP50',
errorbar='sd',
hue='index',
palette='nipy_spectral'
)
plt.legend(bbox_to_anchor=(1.01,1.01))
YOLOv8s & Combined Dataset
combined_small = [
['combined_small', 2229, 33597, 0.651, 0.324, 0.359, 0.204],
['combined_small', 2229, 7693, 0.687, 0.569, 0.63, 0.326],
['combined_small', 2229, 363, 0.346, 0.383, 0.351, 0.198],
['combined_small', 2229, 14413, 0.734, 0.713, 0.764, 0.529],
['combined_small', 2229, 132, 0.633, 0.439, 0.514, 0.267],
['combined_small', 2229, 362, 0.707, 0.481, 0.556, 0.384],
['combined_small', 2229, 4238, 0.59, 0.391, 0.407, 0.167],
['combined_small', 2229, 220, 0.697, 0.178, 0.266, 0.122],
['combined_small', 2229, 6053, 0.617, 0.329, 0.371, 0.205],
['combined_small', 2229, 7, 1, 0, 0, 0],
['combined_small', 2229, 13, 1, 0, 0.0323, 0.024],
['combined_small', 2229, 103, 0.148, 0.0777, 0.0618, 0.0193]
]
combined_small_df = pd.DataFrame(combined_small, data_index, data_columns)
combined_small_df
Model | Images | Instances | P | R | mAP50 | mAP50-95 | |
---|---|---|---|---|---|---|---|
all | combined_small | 2229 | 33597 | 0.651 | 0.3240 | 0.3590 | 0.2040 |
person | combined_small | 2229 | 7693 | 0.687 | 0.5690 | 0.6300 | 0.3260 |
bike | combined_small | 2229 | 363 | 0.346 | 0.3830 | 0.3510 | 0.1980 |
car | combined_small | 2229 | 14413 | 0.734 | 0.7130 | 0.7640 | 0.5290 |
motor | combined_small | 2229 | 132 | 0.633 | 0.4390 | 0.5140 | 0.2670 |
train | combined_small | 2229 | 362 | 0.707 | 0.4810 | 0.5560 | 0.3840 |
truck | combined_small | 2229 | 4238 | 0.590 | 0.3910 | 0.4070 | 0.1670 |
hydrant | combined_small | 2229 | 220 | 0.697 | 0.1780 | 0.2660 | 0.1220 |
sign | combined_small | 2229 | 6053 | 0.617 | 0.3290 | 0.3710 | 0.2050 |
skateboard | combined_small | 2229 | 7 | 1.000 | 0.0000 | 0.0000 | 0.0000 |
stroller | combined_small | 2229 | 13 | 1.000 | 0.0000 | 0.0323 | 0.0240 |
other vehicle | combined_small | 2229 | 103 | 0.148 | 0.0777 | 0.0618 | 0.0193 |
plt.figure(figsize=(16, 5))
plt.title('Average Precision by Class & Instances')
sns.set(style='darkgrid')
sns.barplot(
data=combined_small_df.reset_index(),
x='Instances',
y='mAP50',
errorbar='sd',
hue='index',
palette='nipy_spectral'
)
plt.legend(bbox_to_anchor=(1.01,1.01))
Combining Results
combined_df = pd.concat([rgb_nano_df, rgb_small_df, ir_nano_df, ir_small_df, combined_nano_df, combined_small_df], axis=0)
combined_df = combined_df.reset_index()
plt.figure(figsize=(16, 5))
plt.title('Average Precision by Class')
sns.set(style='darkgrid')
sns.barplot(
data=combined_df.reset_index(),
x='index',
y='mAP50',
errorbar='sd',
hue='Model',
palette='seismic'
)
plt.legend(bbox_to_anchor=(1.01,1.01))
plt.figure(figsize=(24, 10))
sns.set(style='darkgrid')
sns.scatterplot(
data=combined_df.reset_index(),
x='R',
y='P',
s=300,
alpha=0.8,
hue='Model',
palette='nipy_spectral',
style='index'
).set_title('Precision Recall')
combined_df[combined_df['index'] == 'car']
index | Model | Images | Instances | P | R | mAP50 | mAP50-95 | |
---|---|---|---|---|---|---|---|---|
3 | car | rgb_nano | 1085 | 7285 | 0.663 | 0.574 | 0.621 | 0.398 |
15 | car | rgb_small | 1085 | 7285 | 0.722 | 0.629 | 0.683 | 0.454 |
27 | car | ir_nano | 1144 | 7128 | 0.696 | 0.650 | 0.711 | 0.449 |
39 | car | ir_small | 1144 | 7128 | 0.743 | 0.725 | 0.781 | 0.527 |
51 | car | combined_nano | 2229 | 14413 | 0.686 | 0.646 | 0.697 | 0.461 |
63 | car | combined_small | 2229 | 14413 | 0.734 | 0.713 | 0.764 | 0.529 |
plt.figure(figsize=(8, 5))
plt.title('Average Precision by Class')
sns.set(style='darkgrid')
sns.barplot(
data=combined_df[combined_df['index'] == 'car'],
x='index',
y='mAP50',
errorbar='sd',
hue='Model',
palette='seismic'
)
plt.legend(bbox_to_anchor=(1.01,1.01))
combined_df[combined_df['index'] == 'person']
index | Model | Images | Instances | P | R | mAP50 | mAP50-95 | |
---|---|---|---|---|---|---|---|---|
1 | person | rgb_nano | 1085 | 3223 | 0.505 | 0.375 | 0.391 | 0.167 |
13 | person | rgb_small | 1085 | 3223 | 0.622 | 0.428 | 0.481 | 0.225 |
25 | person | ir_nano | 1144 | 4470 | 0.631 | 0.556 | 0.595 | 0.276 |
37 | person | ir_small | 1144 | 4470 | 0.691 | 0.632 | 0.687 | 0.356 |
49 | person | combined_nano | 2229 | 7693 | 0.623 | 0.482 | 0.527 | 0.253 |
61 | person | combined_small | 2229 | 7693 | 0.687 | 0.569 | 0.630 | 0.326 |
plt.figure(figsize=(8, 5))
plt.title('Average Precision by Class')
sns.set(style='darkgrid')
sns.barplot(
data=combined_df[combined_df['index'] == 'person'],
x='index',
y='mAP50',
errorbar='sd',
hue='Model',
palette='seismic'
)
plt.legend(bbox_to_anchor=(1.01,1.01))
combined_df[combined_df['index'] == 'motor']
index | Model | Images | Instances | P | R | mAP50 | mAP50-95 | |
---|---|---|---|---|---|---|---|---|
4 | motor | rgb_nano | 1085 | 77 | 0.419 | 0.260 | 0.300 | 0.166 |
16 | motor | rgb_small | 1085 | 77 | 0.563 | 0.338 | 0.382 | 0.219 |
28 | motor | ir_nano | 1144 | 55 | 0.570 | 0.364 | 0.390 | 0.189 |
40 | motor | ir_small | 1144 | 55 | 0.593 | 0.504 | 0.551 | 0.251 |
52 | motor | combined_nano | 2229 | 132 | 0.607 | 0.364 | 0.417 | 0.193 |
64 | motor | combined_small | 2229 | 132 | 0.633 | 0.439 | 0.514 | 0.267 |
plt.figure(figsize=(8, 5))
plt.title('Average Precision by Class')
sns.set(style='darkgrid')
sns.barplot(
data=combined_df[combined_df['index'] == 'motor'],
x='index',
y='mAP50',
errorbar='sd',
hue='Model',
palette='seismic'
)
plt.legend(bbox_to_anchor=(1.01,1.01))
combined_df[combined_df['index'] == 'bike']
index | Model | Images | Instances | P | R | mAP50 | mAP50-95 | |
---|---|---|---|---|---|---|---|---|
2 | bike | rgb_nano | 1085 | 193 | 0.200 | 0.197 | 0.102 | 0.044 |
14 | bike | rgb_small | 1085 | 193 | 0.247 | 0.321 | 0.239 | 0.121 |
26 | bike | ir_nano | 1144 | 170 | 0.288 | 0.253 | 0.222 | 0.111 |
38 | bike | ir_small | 1144 | 170 | 0.369 | 0.353 | 0.309 | 0.174 |
50 | bike | combined_nano | 2229 | 363 | 0.281 | 0.267 | 0.239 | 0.130 |
62 | bike | combined_small | 2229 | 363 | 0.346 | 0.383 | 0.351 | 0.198 |
plt.figure(figsize=(8, 5))
plt.title('Average Precision by Class')
sns.set(style='darkgrid')
sns.barplot(
data=combined_df[combined_df['index'] == 'bike'],
x='index',
y='mAP50',
errorbar='sd',
hue='Model',
palette='seismic'
)
plt.legend(bbox_to_anchor=(1.01,1.01))
combined_df[combined_df['index'] == 'truck']
index | Model | Images | Instances | P | R | mAP50 | mAP50-95 | |
---|---|---|---|---|---|---|---|---|
6 | truck | rgb_nano | 1085 | 2190 | 0.458 | 0.197 | 0.205 | 0.0686 |
18 | truck | rgb_small | 1085 | 2190 | 0.600 | 0.315 | 0.333 | 0.1200 |
30 | truck | ir_nano | 1144 | 2048 | 0.462 | 0.256 | 0.271 | 0.1040 |
42 | truck | ir_small | 1144 | 2048 | 0.608 | 0.386 | 0.418 | 0.1780 |
54 | truck | combined_nano | 2229 | 4238 | 0.493 | 0.237 | 0.256 | 0.0960 |
66 | truck | combined_small | 2229 | 4238 | 0.590 | 0.391 | 0.407 | 0.1670 |
plt.figure(figsize=(8, 5))
plt.title('Average Precision by Class')
sns.set(style='darkgrid')
sns.barplot(
data=combined_df[combined_df['index'] == 'truck'],
x='index',
y='mAP50',
errorbar='sd',
hue='Model',
palette='seismic'
)
plt.legend(bbox_to_anchor=(1.01,1.01))
Evaluate Bounding Boxes
# read images
images = glob('./datasets/images_combined_val/images/\*.jpg')
print(len(images))
2229
# select image
img = cv.imread(images[0])
height, width, _ = img.shape
print(images[0][38:-4], height, width)
video-57kWWRyeqqHs3Byei-frame-000816-b6tuLjNco8MfoBs3d 512 640
# select label file
path = './datasets/images_combined_val/labels/' + images[0][38:-4] + '.txt'
labels = open(path, 'r')
data = labels.readlines()
labels.close()
print(data)
['9 0.696875 0.375000 0.025000 0.039062\n', '9 0.696094 0.314453 0.032813 0.082031\n', '9 0.168750 0.395508 0.040625 0.013672\n', '0 0.464063 0.457031 0.009375 0.027344\n', '0 0.004688 0.491211 0.009375 0.041016\n', '0 0.165625 0.489258 0.009375 0.033203\n', '1 0.316406 0.500000 0.014063 0.035156\n', '2 0.600781 0.511719 0.117188 0.113281\n', '2 0.524219 0.481445 0.060938 0.064453\n', '2 0.481250 0.469727 0.037500 0.033203\n', '2 0.426563 0.454102 0.015625 0.017578\n', '2 0.412500 0.463867 0.015625 0.025391\n', '2 0.376563 0.474609 0.018750 0.023438\n', '2 0.364063 0.477539 0.021875 0.033203\n', '2 0.342188 0.477539 0.034375 0.041016\n', '0 0.315625 0.483398 0.021875 0.044922\n', '8 0.105469 0.500977 0.007812 0.021484\n']
# create one colour for every COCO class
colours = []
number_colours=80
for j in range(number_colours):
colour = np.random.randint(0,255),np.random.randint(0,255),np.random.randint(0,255)
colours.append(colour)
print(len(colours),colours)
80 [(129, 83, 161), (220, 116, 220), (47, 113, 141), (185, 137, 77), (212, 208, 251), (36, 83, 204), (4, 40, 112), (61, 18, 39), (25, 132, 21), (239, 67, 234), (140, 253, 52), (207, 196, 72), (144, 32, 112), (138, 29, 227), (101, 17, 45), (102, 118, 7), (210, 51, 160), (59, 158, 131), (37, 145, 69), (68, 56, 71), (28, 96, 25), (72, 189, 118), (190, 67, 118), (152, 48, 33), (153, 138, 248), (218, 94, 242), (236, 229, 215), (133, 186, 102), (33, 198, 167), (223, 32, 103), (16, 209, 160), (83, 89, 91), (194, 46, 110), (243, 47, 47), (187, 11, 41), (193, 188, 6), (107, 119, 230), (116, 118, 109), (65, 155, 110), (12, 151, 145), (135, 138, 197), (43, 19, 174), (52, 203, 214), (72, 178, 172), (10, 247, 17), (108, 90, 185), (134, 29, 207), (217, 96, 179), (2, 38, 161), (245, 175, 254), (254, 57, 175), (84, 184, 46), (249, 195, 60), (246, 67, 127), (51, 89, 138), (12, 162, 182), (176, 89, 187), (165, 40, 110), (141, 76, 226), (245, 187, 119), (47, 237, 138), (173, 176, 50), (49, 101, 36), (171, 235, 78), (125, 105, 250), (123, 83, 13), (18, 47, 133), (196, 102, 109), (234, 204, 106), (55, 110, 131), (116, 209, 240), (147, 203, 253), (115, 246, 60), (17, 245, 112), (50, 250, 19), (254, 233, 18), (122, 211, 221), (229, 12, 236), (86, 169, 186), (13, 189, 38)]
Show Labels
index = 0
for line in data:
# Split string to float
_, x, y, w, h = map(float, line.split(' '))
l = int((x - w / 2) * width)
r = int((x + w / 2) * width)
t = int((y - h / 2) * height)
b = int((y + h / 2) * height)
if l < 0:
l = 0
if r > width - 1:
r = width - 1
if t < 0:
t = 0
if b > height - 1:
b = height - 1
image = cv.rectangle(img, (l, t), (r, b), colours[index], thickness=2)
index += 1
plt.title('Image with original Labels')
plt.axis('off')
plt.imshow(image)
plt.show()
Show Predictions
# Load the best model
backbone_combined_small = YOLO('./runs/detect/backbone_combined_small.torchscript')
# Run batched inference on a list of images
results = backbone_combined_small(img) # return a list of Results objects
for r in results:
im_array = r.plot() # plot a BGR numpy array of predictions
im = Image.fromarray(im_array[..., ::-1]) # RGB PIL image
im.show() # show image
im.save('results.jpg') # save image
0: 640x640 5 persons, 1 bike, 11 cars, 3 signs, 16.8ms Speed: 8.9ms preprocess, 16.8ms inference, 1.1ms postprocess per image at shape (1, 3, 640, 640)
Show Labels
# get another image
img = cv.imread(images[1337])
height, width, _ = img.shape
# get labels
path = './datasets/images_combined_val/labels/' + images[1337][38:-4] + '.txt'
labels = open(path, 'r')
data = labels.readlines()
labels.close()
index = 0
for line in data:
# Split string to float
_, x, y, w, h = map(float, line.split(' '))
l = int((x - w / 2) * width)
r = int((x + w / 2) * width)
t = int((y - h / 2) * height)
b = int((y + h / 2) * height)
if l < 0:
l = 0
if r > width - 1:
r = width - 1
if t < 0:
t = 0
if b > height - 1:
b = height - 1
image = cv.rectangle(img, (l, t), (r, b), colours[index], thickness=2)
index += 1
plt.title('Image with original Labels')
plt.axis('off')
plt.imshow(image)
plt.show()
Show Predictions
# Run batched inference on a list of images
results = backbone_combined_small(img) # return a list of Results objects
for r in results:
im_array = r.plot() # plot a BGR numpy array of predictions
im = Image.fromarray(im_array[..., ::-1]) # RGB PIL image
im.show() # show image
im.save('results.jpg') # save image
0: 640x640 2 persons, 1 car, 7 trucks, 2 signs, 17.7ms Speed: 12.7ms preprocess, 17.7ms inference, 1.4ms postprocess per image at shape (1, 3, 640, 640)
Show Labels
# get another image
img = cv.imread(images[666])
height, width, _ = img.shape
# get labels
path = './datasets/images_combined_val/labels/' + images[666][38:-4] + '.txt'
labels = open(path, 'r')
data = labels.readlines()
labels.close()
index = 0
for line in data:
# Split string to float
_, x, y, w, h = map(float, line.split(' '))
l = int((x - w / 2) * width)
r = int((x + w / 2) * width)
t = int((y - h / 2) * height)
b = int((y + h / 2) * height)
if l < 0:
l = 0
if r > width - 1:
r = width - 1
if t < 0:
t = 0
if b > height - 1:
b = height - 1
image = cv.rectangle(img, (l, t), (r, b), colours[index], thickness=2)
index += 1
plt.title('Image with original Labels')
plt.axis('off')
plt.imshow(image)
plt.show()
Show Predictions
# Run batched inference on a list of images
results = backbone_combined_small(img) # return a list of Results objects
for r in results:
im_array = r.plot() # plot a BGR numpy array of predictions
im = Image.fromarray(im_array[..., ::-1]) # RGB PIL image
im.show() # show image
im.save('results.jpg') # save image
0: 640x640 2 persons, 3 bikes, 7 cars, 1 sign, 16.9ms Speed: 13.9ms preprocess, 16.9ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)