Related:
- Image Segmentation with PyTorch
- Semantic Segmentation Detectron2 Model Zoo
- Semantic Segmentation Detectron2 Model Zoo: Faster RCNN
- Semantic Segmentation Detectron2 Model Zoo: Mask RCNN
- Detectron Object Detection with OpenImages Dataset (WIP)
Object Detection with Instance Segmentation
Detectron2 is a platform for object detection, segmentation and other visual recognition tasks.
- Includes new capabilities such as panoptic segmentation, Densepose, Cascade R-CNN, rotated bounding boxes, PointRend, DeepLab, ViTDet, MViTv2 etc.
- Used as a library to support building research projects on top of it.
- Models can be exported to TorchScript format or Caffe2 format for deployment.
!pip install opencv-python matplotlib
!python -m pip install 'git+https://github.com/facebookresearch/detectron2.git'
Detectron2 :: Faster RCNN R101 FPN
import detectron2
#https://github.com/facebookresearch/detectron2/blob/main/MODEL_ZOO.md
from detectron2 import model_zoo
from detectron2.data import DatasetCatalog, build_detection_test_loader
from detectron2.evaluation import COCOEvaluator, inference_on_dataset
from detectron2.engine import DefaultTrainer, DefaultPredictor
from detectron2.config import get_cfg
from detectron2.structures import BoxMode
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog
import matplotlib.pyplot as plt
import numpy as np
import torch, os, json, cv2, random
CLASS_LABELS = ["balloon"]
Custom Dataset
dataset_path = '../datasets/balloon/'
# # if your dataset is in COCO format:
# from detectron2.data.datasets import register_coco_instances
# register_coco_instances("dataset_train", {}, "../datasets/balloon/train/via_region_data.json", "../datasets/balloon/train/")
# register_coco_instances("dataset_val", {}, "../datasets/balloon/val/via_region_data.json", "../datasets/balloon/val/")
def get_ds_dicts(dataset_path):
json_file = os.path.join(dataset_path, "via_region_data.json")
with open(json_file) as f:
imgs_anns = json.load(f)
dataset_dicts = []
for idx, v in enumerate(imgs_anns.values()):
record = {}
filename = os.path.join(dataset_path, v["filename"])
height, width = cv2.imread(filename).shape[:2]
record["file_name"] = filename
record["image_id"] = idx
record["height"] = height
record["width"] = width
annos = v["regions"]
objs = []
for _, anno in annos.items():
assert not anno["region_attributes"]
anno = anno["shape_attributes"]
px = anno["all_points_x"]
py = anno["all_points_y"]
poly = [(x + 0.5, y + 0.5) for x, y in zip(px, py)]
poly = [p for x in poly for p in x]
obj = {
"bbox": [np.min(px), np.min(py), np.max(px), np.max(py)],
"bbox_mode": BoxMode.XYXY_ABS,
"segmentation": [poly],
"category_id": 0,
}
objs.append(obj)
record["annotations"] = objs
dataset_dicts.append(record)
return dataset_dicts
for d in ["train", "val"]:
DatasetCatalog.register("balloon_" + d, lambda d=d: get_ds_dicts(dataset_path + d))
MetadataCatalog.get("balloon_" + d).set(thing_classes=CLASS_LABELS)
metadata = MetadataCatalog.get("balloon_train")
dataset_dicts = get_ds_dicts(dataset_path + "train")
dataset_dicts[0]["file_name"]
'../datasets/balloon/train/34020010494_e5cb88e1c4_k.jpg'
for d in random.sample(dataset_dicts, 1):
img = plt.imread(d["file_name"])
# plt.imshow(img)
visualizer = Visualizer(img[:, :, ::-1], metadata=metadata, scale=0.5)
out = visualizer.draw_dataset_dict(d)
plt.imshow(out.get_image()[:, :, ::-1])
Model Training
# https://github.com/facebookresearch/detectron2/tree/main/configs/COCO-InstanceSegmentation
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml"))
cfg.DATASETS.TRAIN = ("balloon_train",)
cfg.DATASETS.TEST = ()
cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml") # initialize from model zoo
cfg.SOLVER.IMS_PER_BATCH = 2
cfg.SOLVER.BASE_LR = 0.00025
cfg.SOLVER.MAX_ITER = 500
cfg.SOLVER.STEPS = [] # do not decay learning rate
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128 # faster, and good enough for this toy dataset
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1 # only has one class (ballon)
os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = DefaultTrainer(cfg)
trainer.resume_or_load(resume=False)
trainer.train()
- iter: 499 total_loss: 0.2681 loss_cls: 0.04987 loss_box_reg: 0.1311 loss_mask: 0.06432 loss_rpn_cls: 0.003482 loss_rpn_loc: 0.009058 time: 0.9189 last_time: 0.7537 data_time: 0.0041 last_data_time: 0.0046 lr: 0.0002495 max_mem: 3528M
- Overall training speed: 498 iterations in 0:07:37 (0.9189 s / it)
- Total training time: 0:07:48 (0:00:10 on hooks)
Inference & Evaluation
%load_ext tensorboard
%tensorboard --logdir output
cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.7 # set the testing threshold for this model
cfg.DATASETS.TEST = ("balloon_val", )
predictor = DefaultPredictor(cfg)
[32m[08/26 12:15:35 d2.checkpoint.detection_checkpoint]: [0m[DetectionCheckpointer] Loading from ./output/model_final.pth ...
dataset_dicts = get_ds_dicts(dataset_path + "val")
plt.figure(figsize=(14, 10))
c = 1
for i in random.sample(dataset_dicts, 4):
im = cv2.imread(i["file_name"])
outputs = predictor(im)
v = Visualizer(im[:, :, ::-1], metadata=metadata, scale=0.8)
v = v.draw_instance_predictions(outputs["instances"].to("cpu"))
ax = plt.subplot(2, 2, c)
plt.imshow(cv2.cvtColor(v.get_image()[:, :, ::-1], cv2.COLOR_BGR2RGB))
plt.axis("off")
c += 1
plt.savefig("./assets/Object_Detection_Detectron2_05.webp", bbox_inches='tight')
evaluator = COCOEvaluator("balloon_val", ("bbox",), False, output_dir="./output/")
val_loader = build_detection_test_loader(cfg, "balloon_val")
print(inference_on_dataset(trainer.model, val_loader, evaluator))
Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.801 Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.934 Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.900 Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.379 Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.665 Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.909 Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.240 Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.810 Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.834 Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.500 Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.729 Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.927 0mEvaluation results for bbox:
AP | AP50 | AP75 | APs | APm | APl |
---|---|---|---|---|---|
80.116 | 93.379 | 89.982 | 37.850 | 66.528 | 90.944 |
OrderedDict( 'bbox', 'AP': 80.11559882221059, 'AP50': 93.37913947975808, 'AP75': 89.98231932673679, 'APs': 37.85007072135785, 'APm': 66.5283878216012, 'APl': 90.94365376771259) |
Average Precision | (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.801 |
Average Precision | (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.934 |
Average Precision | (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.900 |
Average Precision | (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.379 |
Average Precision | (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.665 |
Average Precision | (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.909 |
Average Recall | (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.240 |
Average Recall | (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.810 |
Average Recall | (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.834 |
Average Recall | (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.500 |
Average Recall | (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.729 |
Average Recall | (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.927 |
Evaluation results for bbox:
AP | AP50 | AP75 | APs | APm | APl |
---|---|---|---|---|---|
80.116 | 93.379 | 89.982 | 37.850 | 66.528 | 90.944 |