Shenzhen, China

Using Tensorflow Models in OpenCV

Github Repository

import cv2 as cv
import json
import matplotlib.pyplot as plt
import numpy as np
import random

Model

!cd model && wget http://download.tensorflow.org/models/object_detection/mask_rcnn_inception_v2_coco_2018_01_28.tar.gz
!cd model && tar zxvf mask_rcnn_inception_v2_coco_2018_01_28.tar.gz
!cd model && wget https://github.com/vjgpt/Object-Detection/raw/master/mask_rcnn_inception_v2_coco_2018_01_28.pbtxt
!cd model && wget https://github.com/vjgpt/Object-Detection/blob/master/mscoco_labels.names

# model configuration and weights
cfg_path = './model/mask_rcnn_inception_v2_coco_2018_01_28.pbtxt'
weight_path = './model/mask_rcnn_inception_v2_coco_2018_01_28/frozen_inference_graph.pb'
classes_path = './model/mscoco_labels.names'

with open(classes_path) as json_data:
    data = json.load(json_data)

print(data['payload']['blob']['rawLines'])

['person\r', 'bicycle\r', 'car\r', 'motorcycle\r', 'airplane\r', 'bus\r', 'train\r', 'truck\r', 'boat\r', 'traffic light\r', 'fire hydrant\r', '\r', 'stop sign\r', 'parking meter\r', 'bench\r', 'bird\r', 'cat\r', 'dog\r', 'horse\r', 'sheep\r', 'cow\r', 'elephant\r', 'bear\r', 'zebra\r', 'giraffe\r', '\r', 'backpack\r', 'umbrella\r', '\r', '\r', 'handbag\r', 'tie\r', 'suitcase\r', 'frisbee\r', 'skis\r', 'snowboard\r', 'sports ball\r', 'kite\r', 'baseball bat\r', 'baseball glove\r', 'skateboard\r', 'surfboard\r', 'tennis racket\r', 'bottle\r', '\r', 'wine glass\r', 'cup\r', 'fork\r', 'knife\r', 'spoon\r', 'bowl\r', 'banana\r', 'apple\r', 'sandwich\r', 'orange\r', 'broccoli\r', 'carrot\r', 'hot dog\r', 'pizza\r', 'donut\r', 'cake\r', 'chair\r', 'couch\r', 'potted plant\r', 'bed\r', '\r', 'dining table\r', '\r', '\r', 'toilet\r', '\r', 'tv\r', 'laptop\r', 'mouse\r', 'remote\r', 'keyboard\r', 'cell phone\r', 'microwave\r', 'oven\r', 'toaster\r', 'sink\r', 'refrigerator\r', '\r', 'book\r', 'clock\r', 'vase\r', 'scissors\r', 'teddy bear\r', 'hair drier\r', 'toothbrush']

class_names = data['payload']['blob']['rawLines']
len(class_names)

model = cv.dnn.readNetFromTensorflow(weight_path, cfg_path)

Preprocessing

# load test image
img_path = './assets/bus.jpg'
img = cv.imread(img_path)
height, width, channels = img.shape

plt.imshow(img)

Using Tensorflow Models in OpenCV

blob = cv.dnn.blobFromImage(img)

Model Predictions

def get_predictions(model, blob):
    model.setInput(blob)
    boxes, masks = model.forward(['detection_out_final', 'detection_masks'])
    return boxes, masks

boxes, masks = get_predictions(model, blob)

print(len(boxes), len(masks))

Prediction Visualization

print(height, width, channels)

canvas = np.zeros((height, width, channels))

for j in range(len(masks)):
    bbox = boxes[0, 0 , j]
    mask = masks[j]

    print(bbox)

    class_id = bbox[1]
    score = bbox[2]

# filter 100 detections by adding a detection confidence threshold
threshold = 0.5
# generate random colours for each class
colours = [(random.randint(0,255), random.randint(0,255), random.randint(0,255)) for j in range(len(class_names))]

Bounding Boxes

for j in range(len(masks)):
    bbox = boxes[0, 0 , j]
    mask = masks[j]

    class_label = bbox[1]
    score = bbox[2]
    
    if score > threshold:
        # debug
        print(class_label)
        print(score)
        print(mask.shape)
        print(bbox.shape)

        # bbox corner positions in relative/normalized coordinates * img dimensions as int = pixel position
        x1, y1, x2, y2 = int(bbox[3] * width), int(bbox[4] * height), int(bbox[5] * width), int(bbox[6] * height)

        output = cv.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0))
        
        plt.imshow(output)
        
        # only keep mask for detected class label
        mask = mask[int(class_label)]

# debug output e.g.
# 2.0 => class label
# 0.9532703 => prediction confidence
# (90, 15, 15) => mask consists of 90 (1 for each class the model was trained with) 15(*image width)x15(*image height) squares
# (7,) => bounding box is drawn with 7 corner points

Using Tensorflow Models in OpenCV

Segmentation Masks

for j in range(len(masks)):
    bbox = boxes[0, 0 , j]
    score = bbox[2]
    
    if score > threshold:
        class_label = bbox[1]
        mask = masks[j]
        
        # bbox corner positions in relative/normalized coordinates * img dimensions as int = pixel position
        x1, y1, x2, y2 = int(bbox[3] * width), int(bbox[4] * height), int(bbox[5] * width), int(bbox[6] * height)
        
        # only keep mask for detected class label
        mask = mask[int(class_label)]
        # de-normalize mask
        mask = cv.resize(mask, (x2-x1, y2-y1))
        # debug
        # print(mask.shape)

        _, mask = cv.threshold(mask, 0.5, 1, cv.THRESH_BINARY)
        

        for c in range (channels):
            # multiply by 255 to get white masks for all classes
            canvas[y1:y2, x1:x2, c] = mask * colours[int(class_label)][c]

plt.imshow(canvas)

Using Tensorflow Models in OpenCV

overlay = ((0.8 * canvas) + (0.2 * img)).astype('uint8')

plt.imshow(overlay)

Using Tensorflow Models in OpenCV

Using Tensorflow Models in OpenCV

Model​

Preprocessing​

Model Predictions​

Prediction Visualization​

Bounding Boxes​

Segmentation Masks​

Model

Preprocessing

Model Predictions

Prediction Visualization

Bounding Boxes

Segmentation Masks