Skip to main content

Guangzhou, China

YOLOv8 License Plate Detection

Using the YOLOv8 Object Tracker and EasyOCR to record License Plates.

import cv2 as cv
from glob import glob
import os
import random
from ultralytics import YOLO

Pre-Trained YOLOv8

The regular YOLOv8 training weights do not contain a number_plate class and cannot be used directly for a number plate detection:

# read in video paths
videos = glob('inputs/*.mp4')
print(videos)

['inputs/uk_dash_1.mp4', 'inputs/uk_dash_2.mp4']

# pick pre-trained model
model_pretrained = YOLO('yolov8n.pt')
# read video by index
video = cv.VideoCapture(videos[1])

# get video dims
frame_width = int(video.get(3))
frame_height = int(video.get(4))
size = (frame_width, frame_height)

# Define the codec and create VideoWriter object
fourcc = cv.VideoWriter_fourcc(*'DIVX')
out = cv.VideoWriter('./outputs/uk_dash_2.avi', fourcc, 20.0, size)

# read frames
ret = True

while ret:
ret, frame = video.read()

if ret:
# detect & track objects
results = model_pretrained.track(frame, persist=True)

# plot results
composed = results[0].plot()

# save video
out.write(composed)

out.release()
video.release()

YOLOv8 License Plate Detection

Retraining YOLOv8

Download the Dataset with YOLOv8 annotation and point YOLO to the data.yaml file that comes with the dataset:

train: ../train/images
val: ../valid/images
test: ../test/images

nc: 1
names: ['License_Plate']

roboflow:
workspace: roboflow-universe-projects
project: license-plate-recognition-rxg4e
version: 4
license: CC BY 4.0
url: https://universe.roboflow.com/roboflow-universe-projects/license-plate-recognition-rxg4e/dataset/4
# unzip downloaded dataset to `./datasets`
dataset = 'datasets/data.yaml'

# load a model
# backbone = YOLO("yolov8n.yaml") # build a new model from scratch
backbone = YOLO("yolov8n.pt") # load a pre-trained model (recommended for training)
# Use the model
results = backbone.train(data=dataset, epochs=20) # train the model

20 epochs completed in 2.530 hours. Optimizer stripped from runs/detect/train11/weights/last.pt, 6.2MB Optimizer stripped from runs/detect/train11/weights/best.pt, 6.2MB

Validating runs/detect/train11/weights/best.pt... Ultralytics YOLOv8.0.173 🚀 Python-3.10.11 torch-2.0.1 CUDA:0 (NVIDIA GeForce GTX 1060 6GB, 6070MiB) Model summary (fused): 168 layers, 3005843 parameters, 0 gradients Class Images Instances Box(P R mAP50 mAP50-95): 100%|██████████| 64/64 [00:17, 3.61it/s] all 2046 2132 0.986 0.954 0.984 0.701 Speed: 0.3ms preprocess, 5.1ms inference, 0.0ms loss, 0.5ms postprocess per image Results saved to runs/detect/train11

YOLOv8 License Plate Detection

ClassImagesInstancesBox( PRmAP50mAP50-95)
all204621320.9860.9540.9840.701
Speed: 0.3ms preprocess, 5.1ms inference, 0.0ms loss, 0.5ms postprocess per image
Model summary (fused): 168 layers, 3005843 parameters, 0 gradients
# Evaluate the model's performance on the validation set
results = backbone.val()

Ultralytics YOLOv8.0.173 🚀 Python-3.10.11 torch-2.0.1 CUDA:0 (NVIDIA GeForce GTX 1060 6GB, 6070MiB) Model summary (fused): 168 layers, 3005843 parameters, 0 gradients val: Scanning /opt/app/03_object_detection_with_text_extraction_easyocr/datasets/valid/labels.cache... 2046 images, 3 backgrounds, 0 corrupt: 100%|████ Class Images Instances Box(P R mAP50 mAP50-95): 100%|██████████| 128/128 [00:18, 6.74it/s] all 2046 2132 0.986 0.954 0.984 0.701 Speed: 0.3ms preprocess, 5.7ms inference, 0.0ms loss, 0.6ms postprocess per image Results saved to runs/detect/val

YOLOv8 License Plate Detection

ClassImagesInstancesBox( PRmAP50mAP50-95)
all204621320.9860.9540.9840.701
Speed: 0.3ms preprocess, 5.7ms inference, 0.0ms loss, 0.6ms postprocess per image
# Perform object detection on an image using the model
results = backbone('inputs/cars.png')

image 1/1 /opt/app/03_object_detection_with_text_extraction_easyocr/inputs/cars.png: 384x640 2 License_Plates, 35.9ms Speed: 1.7ms preprocess, 35.9ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

# Export the model to ONNX format
# success = model.export(imgsz=(640, 480), format='onnx', opset=12, optimize=False, half=False)
# Export to PyTorch format
success = backbone.export(imgsz=640, format='torchscript', optimize=False, half=False, int8=False)
# TorchScript: export success ✅ 1.5s, saved as 'runs/detect/train11/weights/best.torchscript' (11.9 MB)

Ultralytics YOLOv8.0.173 🚀 Python-3.10.11 torch-2.0.1 CPU (Intel Core(TM) i7-7700 3.60GHz)

PyTorch: starting from 'runs/detect/train11/weights/best.pt' with input shape (1, 3, 640, 640) BCHW and output shape(s) (1, 5, 8400) (6.0 MB)

TorchScript: starting export with torch 2.0.1... TorchScript: export success ✅ 1.3s, saved as 'runs/detect/train11/weights/best.torchscript' (11.9 MB)

Export complete (2.6s) Results saved to /opt/app/03_object_detection_with_text_extraction_easyocr/runs/detect/train11/weights Predict: yolo predict task=detect model=runs/detect/train11/weights/best.torchscript imgsz=640
Validate: yolo val task=detect model=runs/detect/train11/weights/best.torchscript imgsz=640 data=datasets/data.yaml
Visualize: https://netron.app

# pick pre-trained model
np_model = YOLO('runs/detect/train11/weights/best.torchscript')
# read video by index
video = cv.VideoCapture(videos[1])
ret, frame = video.read()

# get video dims
frame_width = int(video.get(3))
frame_height = int(video.get(4))
size = (frame_width, frame_height)

# Define the codec and create VideoWriter object
fourcc = cv.VideoWriter_fourcc(*'DIVX')
out = cv.VideoWriter('./outputs/uk_dash_np_2.avi', fourcc, 20.0, size)

# read frames
ret = True

while ret:
ret, frame = video.read()

if ret:
# detect & track objects
results = np_model.track(frame, persist=True)

# plot results
composed = results[0].plot()

# save video
out.write(composed)

out.release()
video.release()

And now we have a model that is only interested in number plates:

YOLOv8 License Plate Detection

Though, the confusion matrix shows us that it also sees a lot of plates that do not exist - but at least it does not miss that many:

YOLOv8 License Plate Detection

Improving Training Results

# unzip downloaded dataset to `./datasets`
dataset = 'datasets/data.yaml'

# load a model
# backbone = YOLO("yolov8n.yaml") # build a new model from scratch
backbone_small = YOLO("yolov8s.pt") # load a pre-trained model (recommended for training)
# Use the model
results_medium = backbone_small.train(data=dataset, epochs=100) # train the model

Epoch GPU_mem box_loss cls_loss dfl_loss Instances Size 57/100 4.57G 0.9052 0.3966 1.064 7 640: 100%|██████████| 1324/1324 [13:43, 1.61it/s] Class Images Instances Box(P R mAP50 mAP50-95): 100%|██████████| 64/64 [00:26, 2.38it/s] all 2046 2132 0.981 0.968 0.984 0.709

# pick pre-trained model
np2_model = YOLO('runs/detect/train4/weights/best.pt')
# Evaluate the model's performance on the validation set
results = np2_model.val()

Ultralytics YOLOv8.0.173 🚀 Python-3.10.11 torch-2.0.1 CUDA:0 (NVIDIA GeForce GTX 1060 6GB, 6070MiB) Model summary (fused): 168 layers, 11125971 parameters, 0 gradients Downloading https://ultralytics.com/assets/Arial.ttf to '/root/.config/Ultralytics/Arial.ttf'... 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████| 755k/755k [00:00, 2.35MB/s] Class Images Instances Box(P R mAP50 mAP50-95): 100%|██████████| 128/128 [00:35, 3.59it/s] all 2046 2132 0.981 0.968 0.984 0.71 Speed: 0.3ms preprocess, 13.4ms inference, 0.0ms loss, 0.6ms postprocess per image Results saved to runs/detect/val2

License Plate Detection

import ast
import cv2 as cv
import easyocr
from glob import glob
import numpy as np
import pandas as pd
import string
from ultralytics import YOLO

As seen during the training - the model, just using the COCO training weights, is very capable of detecting cars, trucks and buses. But number plates seem to be a bit harder - the model often confuses street signs or just basic backgound noise as a car registration plate. The positive is that it rarely misses a plate.

# regular pre-trained yolov8 model for car recognition
# coco_model = YOLO('yolov8n.pt')
coco_model = YOLO('yolov8s.pt')
# yolov8 model trained to detect number plates
np_model = YOLO('runs/detect/train4/weights/best.pt')
# read in test video paths
videos = glob('inputs/*.mp4')
print(videos)

['inputs/uk_dash_1.mp4', 'inputs/uk_dash_2.mp4']

STEP 1 Implementing the Car Detection

Get the bounding boxes of all vehicles in your video recording with prediction confidence score and object tracking ID

# read video by index
video = cv.VideoCapture(videos[1])

ret = True
frame_number = -1
# all vehicle class IDs from the COCO dataset (car, motorbike, truck) https://docs.ultralytics.com/datasets/detect/coco/#dataset-yaml
vehicles = [2,3,5]
vehicle_bounding_boxes = []

# read the 10 first frames
while ret:
frame_number += 1
ret, frame = video.read()

if ret and frame_number < 10:
# use track() to identify instances and track them frame by frame
detections = coco_model.track(frame, persist=True)[0]
# save cropped detections
# detections.save_crop('outputs')
# print nodel predictions for debugging
# print(results)

for detection in detections.boxes.data.tolist():
# print detection bounding boxes for debugging
# print(detection)
x1, y1, x2, y2, track_id, score, class_id = detection
# I am only interested in class IDs that belong to vehicles
if int(class_id) in vehicles and score > 0.5:
vehicle_bounding_boxes.append([x1, y1, x2, y2, track_id, score])

# print found bounding boxes for debugging
print(vehicle_bounding_boxes)
video.release()

This code now collects all vehicle bounding boxes from the video and writes them into the vehicle_bounding_boxes list. Besides the bbox coordinates this list also contains the tracking ID of the detected vehicle - they should stay the same frame-to-frame for every detected vehicle and serve as a unique identifier. And the score - how confident is the model that this bbox acutally contains a vehicle with calues from 0-1:

[[762.6422729492188, 614.1869506835938, 1121.368408203125, 911.6900024414062, 1.0, 0.9254793524742126], [1196.008056640625, 635.3404541015625, 1526.3975830078125, 828.6062622070312, 2.0, 0.8488578200340271], [1672.98193359375, 613.9304809570312, 1912.382080078125, 819.3222045898438, 3.0, 0.5385741591453552], [758.7203369140625, 612.6467895507812, 1119.0892333984375, 917.7677612304688, 1.0, 0.925308883190155], [1195.9505615234375, 635.4146118164062, 1527.97412109375, 830.3245239257812, 2.0, 0.865635871887207], [1692.5439453125, 613.0050659179688, 1917.7542724609375, 819.2852783203125, 3.0, 0.5493771433830261], [754.7435302734375, 612.3658447265625, 1115.0045166015625, 919.653076171875, 1.0, 0.9127519130706787], [1194.00341796875, 634.9168090820312, 1527.029541015625, 832.383544921875, 2.0, 0.8814489841461182], [1688.8155517578125, 615.6485595703125, 1920.0, 812.7891235351562, 3.0, 0.6132197976112366], [752.8799438476562, 611.2362060546875, 1111.976318359375, 920.200927734375, 1.0, 0.9137689471244812], [1192.805908203125, 634.3713989257812, 1526.1273193359375, 832.46337890625, 2.0, 0.8671290278434753], [1680.4443359375, 616.384033203125, 1920.0, 813.3687744140625, 3.0, 0.6371273994445801], [750.9274291992188, 611.5806884765625, 1110.2657470703125, 915.3110961914062, 1.0, 0.9381350874900818], [1189.63916015625, 634.7803955078125, 1525.4072265625, 833.2440185546875, 2.0, 0.888615071773529], [1669.8206787109375, 616.796142578125, 1920.0, 808.6288452148438, 3.0, 0.5068169236183167], [748.747802734375, 609.5638427734375, 1109.0101318359375, 912.808837890625, 1.0, 0.9158740639686584], [1187.832275390625, 634.11328125, 1524.633056640625, 832.628173828125, 2.0, 0.8583219647407532], [1659.7103271484375, 615.9025268554688, 1920.0, 823.25048828125, 3.0, 0.7755634784698486], [745.1077270507812, 609.5160522460938, 1107.8214111328125, 912.8062133789062, 1.0, 0.9354495406150818], [1186.91455078125, 634.5582885742188, 1524.004150390625, 832.4244995117188, 2.0, 0.8758277297019958], [1650.2227783203125, 613.749267578125, 1920.0, 828.9586791992188, 3.0, 0.7407982349395752], [742.2940673828125, 610.4445190429688, 1106.597900390625, 912.6227416992188, 1.0, 0.9281387329101562], [1186.1158447265625, 634.223876953125, 1523.3406982421875, 832.6515502929688, 2.0, 0.8710047006607056], [1638.47705078125, 614.6183471679688, 1919.968017578125, 833.5314331054688, 3.0, 0.8480165600776672], [741.3974609375, 610.8768920898438, 1105.543701171875, 912.5601806640625, 1.0, 0.9410984516143799], [1185.1246337890625, 633.4691162109375, 1523.3682861328125, 832.612060546875, 2.0, 0.8842733502388], [1627.5872802734375, 616.9085693359375, 1919.9117431640625, 829.2400512695312, 3.0, 0.85666424036026], [741.3576049804688, 610.9183959960938, 1103.5601806640625, 914.4734497070312, 1.0, 0.9404377937316895], [1183.273681640625, 633.708984375, 1522.5953369140625, 833.3422241210938, 2.0, 0.8721591234207153], [1618.3934326171875, 619.4539794921875, 1919.864013671875, 827.6344604492188, 3.0, 0.8759608864784241]]

Using the save_crop() function shows me that the first 10 frames of my video contain three different cars:

YOLOv8 License Plate Detection

STEP 2 Implementing the License Plate Detection

Use the bounding box for each vehicle and use the number plate detector model to try to find the corresponding plate within in the confinement of those boxes.

# read video by index
video = cv.VideoCapture(videos[0])

ret = True
frame_number = -1
vehicles = [2,3,5]

# read the 10 first frames
while ret:
frame_number += 1
ret, frame = video.read()

if ret and frame_number < 10:

# vehicle detector
detections = coco_model.track(frame, persist=True)[0]
for detection in detections.boxes.data.tolist():
x1, y1, x2, y2, track_id, score, class_id = detection
if int(class_id) in vehicles and score > 0.5:
vehicle_bounding_boxes = []
vehicle_bounding_boxes.append([x1, y1, x2, y2, track_id, score])
for bbox in vehicle_bounding_boxes:
print(bbox)
roi = frame[int(y1):int(y2), int(x1):int(x2)]
# debugging check if bbox lines up with detected vehicles (should be identical to save_crops() above
# cv.imwrite(str(track_id) + '.jpg', roi)

# license plate detector for region of interest
license_plates = np_model(roi)[0]
# check every bounding box for a license plate
for license_plate in license_plates.boxes.data.tolist():
plate_x1, plate_y1, plate_x2, plate_y2, plate_score, _ = license_plate
# verify detections
print(license_plate, 'track_id: ' + str(bbox[4]))
plate = roi[int(plate_y1):int(plate_y2), int(plate_x1):int(plate_x2)]
cv.imwrite(str(track_id) + '.jpg', plate)

video.release()

By using the tracking ID I can make sure that every license plate - as seen above the video contained several instances of the same 3 cars - is only returned ones:

YOLOv8 License Plate Detection

STEP 3 Preprocess License Plates

# read video by index
video = cv.VideoCapture(videos[0])

ret = True
frame_number = -1
vehicles = [2,3,5]

# read the 10 first frames
while ret:
frame_number += 1
ret, frame = video.read()

if ret and frame_number < 100:

# vehicle detector
detections = coco_model.track(frame, persist=True)[0]
for detection in detections.boxes.data.tolist():
x1, y1, x2, y2, track_id, score, class_id = detection
if int(class_id) in vehicles and score > 0.5:
vehicle_bounding_boxes = []
vehicle_bounding_boxes.append([x1, y1, x2, y2, track_id, score])
for bbox in vehicle_bounding_boxes:
print(bbox)
roi = frame[int(y1):int(y2), int(x1):int(x2)]

# license plate detector for region of interest
license_plates = np_model(roi)[0]
# process license plate
for license_plate in license_plates.boxes.data.tolist():
plate_x1, plate_y1, plate_x2, plate_y2, plate_score, _ = license_plate
# crop plate from region of interest
plate = roi[int(plate_y1):int(plate_y2), int(plate_x1):int(plate_x2)]
# de-colorize
plate_gray = cv.cvtColor(plate, cv.COLOR_BGR2GRAY)
# posterize
_, plate_treshold = cv.threshold(plate_gray, 64, 255, cv.THRESH_BINARY_INV)
cv.imwrite(str(track_id) + '_gray.jpg', plate_gray)
cv.imwrite(str(track_id) + '_thresh.jpg', plate_treshold)

video.release()

YOLOv8 License Plate Detection

STEP 4 Read License Plates

# Initialize the OCR reader
reader = easyocr.Reader(['en'], gpu=True)
def read_license_plate(license_plate_crop):
detections = reader.readtext(license_plate_crop)

for detection in detections:
bbox, text, score = detection

text = text.upper().replace(' ', '')

return text, score

return None, None
def write_csv(results, output_path):

with open(output_path, 'w') as f:
f.write('{},{},{},{},{},{},{},{}\n'.format(
'frame_number', 'track_id', 'car_bbox', 'car_bbox_score',
'license_plate_bbox', 'license_plate_bbox_score', 'license_plate_number',
'license_text_score'))

for frame_number in results.keys():
for track_id in results[frame_number].keys():
print(results[frame_number][track_id])
if 'car' in results[frame_number][track_id].keys() and \
'license_plate' in results[frame_number][track_id].keys() and \
'number' in results[frame_number][track_id]['license_plate'].keys():
f.write('{},{},{},{},{},{},{},{}\n'.format(
frame_number,
track_id,
'[{} {} {} {}]'.format(
results[frame_number][track_id]['car']['bbox'][0],
results[frame_number][track_id]['car']['bbox'][1],
results[frame_number][track_id]['car']['bbox'][2],
results[frame_number][track_id]['car']['bbox'][3]
),
results[frame_number][track_id]['car']['bbox_score'],
'[{} {} {} {}]'.format(
results[frame_number][track_id]['license_plate']['bbox'][0],
results[frame_number][track_id]['license_plate']['bbox'][1],
results[frame_number][track_id]['license_plate']['bbox'][2],
results[frame_number][track_id]['license_plate']['bbox'][3]
),
results[frame_number][track_id]['license_plate']['bbox_score'],
results[frame_number][track_id]['license_plate']['number'],
results[frame_number][track_id]['license_plate']['text_score'])
)
f.close()
results = {}

# read video by index
video = cv.VideoCapture(videos[0])

ret = True
frame_number = -1
vehicles = [2,3,5]

# read the 10 first frames
while ret:
frame_number += 1
ret, frame = video.read()

if ret and frame_number < 100:
results[frame_number] = {}

# vehicle detector
detections = coco_model.track(frame, persist=True)[0]
for detection in detections.boxes.data.tolist():
x1, y1, x2, y2, track_id, score, class_id = detection
if int(class_id) in vehicles and score > 0.5:
vehicle_bounding_boxes = []
vehicle_bounding_boxes.append([x1, y1, x2, y2, track_id, score])
for bbox in vehicle_bounding_boxes:
print(bbox)
roi = frame[int(y1):int(y2), int(x1):int(x2)]

# license plate detector for region of interest
license_plates = np_model(roi)[0]
# process license plate
for license_plate in license_plates.boxes.data.tolist():
plate_x1, plate_y1, plate_x2, plate_y2, plate_score, _ = license_plate
# crop plate from region of interest
plate = roi[int(plate_y1):int(plate_y2), int(plate_x1):int(plate_x2)]
# de-colorize
plate_gray = cv.cvtColor(plate, cv.COLOR_BGR2GRAY)
# posterize
_, plate_treshold = cv.threshold(plate_gray, 64, 255, cv.THRESH_BINARY_INV)

# OCR
np_text, np_score = read_license_plate(plate_treshold)
# if plate could be read write results
if np_text is not None:
results[frame_number][track_id] = {
'car': {
'bbox': [x1, y1, x2, y2],
'bbox_score': score
},
'license_plate': {
'bbox': [plate_x1, plate_y1, plate_x2, plate_y2],
'bbox_score': plate_score,
'number': np_text,
'text_score': np_score
}
}

write_csv(results, './results.csv')
video.release()

This returns a list with bounding box metrics for every frame with a successful detection:


frame_numbertrack_idcar_bboxlicense_plate_bboxlicense_plate_bbox_scorelicense_numberlicense_number_score
01.0[760.1986694335938 614.2100830078125 1123.09130859375 914.9498901367188][110.20427703857422 133.25326538085938 238.5574493408203 175.96791076660156]0.7692280411720276BPG6UXN0.7290849695998655
11.0[758.7349243164062 612.4984741210938 1122.470458984375 919.1956787109375][109.57369995117188 134.78448486328125 238.8947296142578 179.6195831298828]0.767607569694519BP6EUXN0.27891552972114064
21.0[755.6078491210938 612.161865234375 1118.7542724609375 920.3657836914062][109.76798248291016 134.661376953125 239.85276794433594 180.43345642089844]0.7666334509849548BP66UXN0.7696779876170268
31.0[753.9749755859375 611.0296630859375 1115.607421875 920.6179809570312][109.80683898925781 134.79702758789062 239.79380798339844 180.0568389892578]0.7609436511993408BPG6UXN0.5947437696221942

STEP 5 Clean-Up License Plate Format

# Mapping dictionaries for character conversion
# characters that can easily be confused can be
# verified by their location - an `O` in a place
# where a number is expected is probably a `0`
dict_char_to_int = {'O': '0',
'I': '1',
'J': '3',
'A': '4',
'G': '6',
'S': '5'}

dict_int_to_char = {'0': 'O',
'1': 'I',
'3': 'J',
'4': 'A',
'6': 'G',
'5': 'S'}
def license_complies_format(text):
# True if the license plate complies with the format, False otherwise.
if len(text) != 7:
return False

if (text[0] in string.ascii_uppercase or text[0] in dict_int_to_char.keys()) and \
(text[1] in string.ascii_uppercase or text[1] in dict_int_to_char.keys()) and \
(text[2] in ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] or text[2] in dict_char_to_int.keys()) and \
(text[3] in ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] or text[3] in dict_char_to_int.keys()) and \
(text[4] in string.ascii_uppercase or text[4] in dict_int_to_char.keys()) and \
(text[5] in string.ascii_uppercase or text[5] in dict_int_to_char.keys()) and \
(text[6] in string.ascii_uppercase or text[6] in dict_int_to_char.keys()):
return True
else:
return False
def format_license(text):
license_plate_ = ''
mapping = {0: dict_int_to_char, 1: dict_int_to_char, 4: dict_int_to_char, 5: dict_int_to_char, 6: dict_int_to_char,
2: dict_char_to_int, 3: dict_char_to_int}
for j in [0, 1, 2, 3, 4, 5, 6]:
if text[j] in mapping[j].keys():
license_plate_ += mapping[j][text[j]]
else:
license_plate_ += text[j]

return license_plate_
def read_license_plate(license_plate_crop):
detections = reader.readtext(license_plate_crop)

for detection in detections:
bbox, text, score = detection

text = text.upper().replace(' ', '')

# verify that text is conform to a standard license plate
if license_complies_format(text):
# bring text into the default license plate format
return format_license(text), score

return None, None
results = {}

# read video by index
video = cv.VideoCapture(videos[1])

ret = True
frame_number = -1
vehicles = [2,3,5]

# read the entire video
while ret:
ret, frame = video.read()
frame_number += 1
if ret:
results[frame_number] = {}

# vehicle detector
detections = coco_model.track(frame, persist=True)[0]
for detection in detections.boxes.data.tolist():
x1, y1, x2, y2, track_id, score, class_id = detection
if int(class_id) in vehicles and score > 0.5:
vehicle_bounding_boxes = []
vehicle_bounding_boxes.append([x1, y1, x2, y2, track_id, score])
for bbox in vehicle_bounding_boxes:
print(bbox)
roi = frame[int(y1):int(y2), int(x1):int(x2)]

# license plate detector for region of interest
license_plates = np_model(roi)[0]
# process license plate
for license_plate in license_plates.boxes.data.tolist():
plate_x1, plate_y1, plate_x2, plate_y2, plate_score, _ = license_plate
# crop plate from region of interest
plate = roi[int(plate_y1):int(plate_y2), int(plate_x1):int(plate_x2)]
# de-colorize
plate_gray = cv.cvtColor(plate, cv.COLOR_BGR2GRAY)
# posterize
_, plate_treshold = cv.threshold(plate_gray, 64, 255, cv.THRESH_BINARY_INV)

# OCR
np_text, np_score = read_license_plate(plate_treshold)
# if plate could be read write results
if np_text is not None:
results[frame_number][track_id] = {
'car': {
'bbox': [x1, y1, x2, y2],
'bbox_score': score
},
'license_plate': {
'bbox': [plate_x1, plate_y1, plate_x2, plate_y2],
'bbox_score': plate_score,
'number': np_text,
'text_score': np_score
}
}

write_csv(results, './results.csv')
video.release()
results = pd.read_csv('./results.csv')

# show results for tracking ID `1` - sort by OCR prediction confidence
results[results['track_id'] == 1.].sort_values(by='license_text_score', ascending=False)
frame_numbertrack_idcar_bboxcar_bbox_scorelicense_plate_bboxlicense_plate_bbox_scorelicense_plate_numberlicense_text_score
175839298.0[775.8486938476562 504.52294921875 1095.532592...0.925278[102.20135498046875 212.2305908203125 218.7746...0.752586NL60GXO0.988261
2950298.0[846.99609375 521.3043823242188 1254.532104492...0.931958[133.1925506591797 275.73577880859375 280.6121...0.740573NL60GXO0.966773
146799298.0[810.77734375 522.0484008789062 1130.535888671...0.914011[102.2442626953125 215.42474365234375 218.7385...0.752845NL60GXO0.953542
147800298.0[810.7708740234375 521.808349609375 1130.57128...0.912922[102.17294311523438 215.99032592773438 218.767...0.754186NL60GXO0.953522
2841337298.0[843.4232788085938 523.5321044921875 1257.2657...0.910718[163.98861694335938 263.2216796875 300.1403503...0.757695NL60GXO0.934405
...........................
1911010298.0[865.4359741210938 488.0260009765625 1060.5764...0.861625[65.1905517578125 123.86817169189453 130.26571...0.761225KL60GZO0.043224
3551462298.0[685.121826171875 514.077880859375 888.8001098...0.832969[92.80020904541016 110.36637115478516 153.4690...0.739499HI60CIO0.036080
3922306298.0[462.48388671875 512.8485717773438 933.4752197...0.929456[121.44440460205078 294.94183349609375 269.183...0.722692WL60YNL0.031725
5172684298.0[856.17333984375 514.7470703125 1043.54296875 ...0.887135[59.788631439208984 116.58961486816406 126.729...0.738799HL60CKD0.030968
5152682298.0[852.64794921875 512.9298095703125 1043.665893...0.893880[61.673500061035156 121.25975799560547 129.707...0.746015ML60CZO0.016401

488 rows × 8 columns

STEP 6 Visualize the Results

def draw_border(img, top_left, bottom_right, color=(0, 255, 0), thickness=6, line_length_x=200, line_length_y=200):
x1, y1 = top_left
x2, y2 = bottom_right

cv.line(img, (x1, y1), (x1, y1 + line_length_y), color, thickness) #-- top-left
cv.line(img, (x1, y1), (x1 + line_length_x, y1), color, thickness)

cv.line(img, (x1, y2), (x1, y2 - line_length_y), color, thickness) #-- bottom-left
cv.line(img, (x1, y2), (x1 + line_length_x, y2), color, thickness)

cv.line(img, (x2, y1), (x2 - line_length_x, y1), color, thickness) #-- top-right
cv.line(img, (x2, y1), (x2, y1 + line_length_y), color, thickness)

cv.line(img, (x2, y2), (x2, y2 - line_length_y), color, thickness) #-- bottom-right
cv.line(img, (x2, y2), (x2 - line_length_x, y2), color, thickness)

return img
# read video by index
video = cv.VideoCapture(videos[1])

# get video dims
frame_width = int(video.get(3))
frame_height = int(video.get(4))
size = (frame_width, frame_height)

# Define the codec and create VideoWriter object
fourcc = cv.VideoWriter_fourcc(*'DIVX')
out = cv.VideoWriter('./outputs/processed.avi', fourcc, 20.0, size)

# reset video before you re-run cell below
frame_number = -1
video.set(cv.CAP_PROP_POS_FRAMES, 0)

True

ret = True

while ret:
ret, frame = video.read()
frame_number += 1
if ret:
df_ = results[results['frame_number'] == frame_number]
for index in range(len(df_)):
# draw car
vhcl_x1, vhcl_y1, vhcl_x2, vhcl_y2 = ast.literal_eval(df_.iloc[index]['car_bbox'].replace('[ ', '[').replace(' ', ' ').replace(' ', ' ').replace(' ', ','))

draw_border(
frame, (int(vhcl_x1), int(vhcl_y1)),
(int(vhcl_x2), int(vhcl_y2)), (0, 255, 0),
12, line_length_x=200, line_length_y=200)

# draw license plate
plate_x1, plate_y1, plate_x2, plate_y2 = ast.literal_eval(df_.iloc[index]['license_plate_bbox'].replace('[ ', '[').replace(' ', ' ').replace(' ', ' ').replace(' ', ','))

# region of interest
roi = frame[int(vhcl_y1):int(vhcl_y2), int(vhcl_x1):int(vhcl_x2)]
cv.rectangle(roi, (int(plate_x1), int(plate_y1)), (int(plate_x2), int(plate_y2)), (0, 0, 255), 6)

# write detected number
(text_width, text_height), _ = cv.getTextSize(
df_.iloc[index]['license_plate_number'],
cv.FONT_HERSHEY_SIMPLEX,
2,
6)

cv.putText(
frame,
df_.iloc[index]['license_plate_number'],
(int((vhcl_x2 + vhcl_x1 - text_width)/2), int(vhcl_y1 - text_height)),
cv.FONT_HERSHEY_SIMPLEX,
2,
(0, 255, 0),
6
)

out.write(frame)
frame = cv.resize(frame, (1280, 720))

out.release()
video.release()

processed.webp