Skip to main content

TST, Hong Kong

Related:

Object Detection with Instance Segmentation

Detectron2 is a platform for object detection, segmentation and other visual recognition tasks.

  • Includes new capabilities such as panoptic segmentation, Densepose, Cascade R-CNN, rotated bounding boxes, PointRend, DeepLab, ViTDet, MViTv2 etc.
  • Used as a library to support building research projects on top of it.
  • Models can be exported to TorchScript format or Caffe2 format for deployment.
!pip install opencv-python matplotlib
!python -m pip install 'git+https://github.com/facebookresearch/detectron2.git'

Detectron2 :: Faster RCNN R101 FPN

import detectron2

#https://github.com/facebookresearch/detectron2/blob/main/MODEL_ZOO.md
from detectron2 import model_zoo
from detectron2.data import DatasetCatalog, build_detection_test_loader
from detectron2.evaluation import COCOEvaluator, inference_on_dataset
from detectron2.engine import DefaultTrainer, DefaultPredictor
from detectron2.config import get_cfg
from detectron2.structures import BoxMode
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog

import matplotlib.pyplot as plt
import numpy as np
import torch, os, json, cv2, random
CLASS_LABELS = ["balloon"]

Custom Dataset

dataset_path = '../datasets/balloon/'
# # if your dataset is in COCO format:
# from detectron2.data.datasets import register_coco_instances
# register_coco_instances("dataset_train", {}, "../datasets/balloon/train/via_region_data.json", "../datasets/balloon/train/")
# register_coco_instances("dataset_val", {}, "../datasets/balloon/val/via_region_data.json", "../datasets/balloon/val/")
def get_ds_dicts(dataset_path):
json_file = os.path.join(dataset_path, "via_region_data.json")

with open(json_file) as f:
imgs_anns = json.load(f)

dataset_dicts = []

for idx, v in enumerate(imgs_anns.values()):
record = {}
filename = os.path.join(dataset_path, v["filename"])
height, width = cv2.imread(filename).shape[:2]

record["file_name"] = filename
record["image_id"] = idx
record["height"] = height
record["width"] = width

annos = v["regions"]
objs = []

for _, anno in annos.items():
assert not anno["region_attributes"]
anno = anno["shape_attributes"]
px = anno["all_points_x"]
py = anno["all_points_y"]
poly = [(x + 0.5, y + 0.5) for x, y in zip(px, py)]
poly = [p for x in poly for p in x]

obj = {
"bbox": [np.min(px), np.min(py), np.max(px), np.max(py)],
"bbox_mode": BoxMode.XYXY_ABS,
"segmentation": [poly],
"category_id": 0,
}

objs.append(obj)

record["annotations"] = objs
dataset_dicts.append(record)

return dataset_dicts
for d in ["train", "val"]:
DatasetCatalog.register("balloon_" + d, lambda d=d: get_ds_dicts(dataset_path + d))
MetadataCatalog.get("balloon_" + d).set(thing_classes=CLASS_LABELS)
metadata = MetadataCatalog.get("balloon_train")
dataset_dicts = get_ds_dicts(dataset_path + "train")
dataset_dicts[0]["file_name"]

'../datasets/balloon/train/34020010494_e5cb88e1c4_k.jpg'

for d in random.sample(dataset_dicts, 1):
img = plt.imread(d["file_name"])
# plt.imshow(img)
visualizer = Visualizer(img[:, :, ::-1], metadata=metadata, scale=0.5)
out = visualizer.draw_dataset_dict(d)
plt.imshow(out.get_image()[:, :, ::-1])

png

Model Training

# https://github.com/facebookresearch/detectron2/tree/main/configs/COCO-InstanceSegmentation
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml"))
cfg.DATASETS.TRAIN = ("balloon_train",)
cfg.DATASETS.TEST = ()
cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml") # initialize from model zoo
cfg.SOLVER.IMS_PER_BATCH = 2
cfg.SOLVER.BASE_LR = 0.00025
cfg.SOLVER.MAX_ITER = 500
cfg.SOLVER.STEPS = [] # do not decay learning rate
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128 # faster, and good enough for this toy dataset
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1 # only has one class (ballon)
os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)

trainer = DefaultTrainer(cfg)
trainer.resume_or_load(resume=False)
trainer.train()
  • iter: 499 total_loss: 0.2681 loss_cls: 0.04987 loss_box_reg: 0.1311 loss_mask: 0.06432 loss_rpn_cls: 0.003482 loss_rpn_loc: 0.009058 time: 0.9189 last_time: 0.7537 data_time: 0.0041 last_data_time: 0.0046 lr: 0.0002495 max_mem: 3528M
  • Overall training speed: 498 iterations in 0:07:37 (0.9189 s / it)
  • Total training time: 0:07:48 (0:00:10 on hooks)

Inference & Evaluation

%load_ext tensorboard
%tensorboard --logdir output

Detectron2 :: Faster RCNN R101 FPN

cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.7 # set the testing threshold for this model
cfg.DATASETS.TEST = ("balloon_val", )
predictor = DefaultPredictor(cfg)

[08/26 12:15:35 d2.checkpoint.detection_checkpoint]: [DetectionCheckpointer] Loading from ./output/model_final.pth ...

dataset_dicts = get_ds_dicts(dataset_path + "val")
plt.figure(figsize=(14, 10))

c = 1

for i in random.sample(dataset_dicts, 4):
im = cv2.imread(i["file_name"])
outputs = predictor(im)

v = Visualizer(im[:, :, ::-1], metadata=metadata, scale=0.8)
v = v.draw_instance_predictions(outputs["instances"].to("cpu"))

ax = plt.subplot(2, 2, c)
plt.imshow(cv2.cvtColor(v.get_image()[:, :, ::-1], cv2.COLOR_BGR2RGB))
plt.axis("off")

c += 1

plt.savefig("./assets/Object_Detection_Detectron2_05.webp", bbox_inches='tight')

Detectron2 :: Faster RCNN R101 FPN

evaluator = COCOEvaluator("balloon_val", ("bbox",), False, output_dir="./output/")
val_loader = build_detection_test_loader(cfg, "balloon_val")

print(inference_on_dataset(trainer.model, val_loader, evaluator))

Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.801 Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.934 Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.900 Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.379 Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.665 Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.909 Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.240 Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.810 Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.834 Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.500 Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.729 Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.927 0mEvaluation results for bbox:

APAP50AP75APsAPmAPl
80.11693.37989.98237.85066.52890.944
OrderedDict( 'bbox', 'AP': 80.11559882221059, 'AP50': 93.37913947975808, 'AP75': 89.98231932673679, 'APs': 37.85007072135785, 'APm': 66.5283878216012, 'APl': 90.94365376771259)
Average Precision(AP) @[ IoU=0.50:0.95area= allmaxDets=100 ] = 0.801
Average Precision(AP) @[ IoU=0.50area= allmaxDets=100 ] = 0.934
Average Precision(AP) @[ IoU=0.75area= allmaxDets=100 ] = 0.900
Average Precision(AP) @[ IoU=0.50:0.95area= smallmaxDets=100 ] = 0.379
Average Precision(AP) @[ IoU=0.50:0.95area=mediummaxDets=100 ] = 0.665
Average Precision(AP) @[ IoU=0.50:0.95area= largemaxDets=100 ] = 0.909
Average Recall(AR) @[ IoU=0.50:0.95area= allmaxDets= 1 ] = 0.240
Average Recall(AR) @[ IoU=0.50:0.95area= allmaxDets= 10 ] = 0.810
Average Recall(AR) @[ IoU=0.50:0.95area= allmaxDets=100 ] = 0.834
Average Recall(AR) @[ IoU=0.50:0.95area= smallmaxDets=100 ] = 0.500
Average Recall(AR) @[ IoU=0.50:0.95area=mediummaxDets=100 ] = 0.729
Average Recall(AR) @[ IoU=0.50:0.95area= largemaxDets=100 ] = 0.927

Evaluation results for bbox:

APAP50AP75APsAPmAPl
80.11693.37989.98237.85066.52890.944