from torchvision.models.detection import maskrcnn_resnet50_fpn, MaskRCNN_ResNet50_FPN_Weights
weights = MaskRCNN_ResNet50_FPN_Weights.DEFAULT
transforms = weights.transforms()
images = [transforms(d) for d in bird_list]
model = maskrcnn_resnet50_fpn(weights=weights, progress=False)
model = model.eval()
output = model(images)
# print(output)
score_threshold = .75
boolean_masks = [
out['masks'][out['scores'] > score_threshold] > proba_threshold
for out in output
]
boxes = [
output['boxes'][output['scores'] > score_threshold]
for output in outputs
]
bird_with_boxes = [
draw_bounding_boxes(img, boxes=box, width=2, labels=['bird', 'bird']) #COCO标签可以通过序号映射
for img, box in zip(bird_list, boxes)
]
bird_with_masksandboxes = [
draw_segmentation_masks(img, mask.squeeze(1), alpha=0.7)
for img, mask in zip(bird_with_boxes,boolean_masks)
]
#和原图对比
bird_with_masksandboxes.append(bird_int)
imgs = make_grid(bird_with_masksandboxes)
show(imgs)
效果图:


模型加载调用:
model = maskrcnn_resnet50_fpn(weights=MaskRCNN_ResNet50_FPN_Weights.DEFAULT, progress=False)
model = model.eval()
output = model(images)
推理过程:
##torchvision/models/detections/generalized_rcnn.py
class GeneralizedRCNN(nn.Module):
def forward(self, images, targets=None):
#1. 对图片大小进行处理,统一转换成固定大小 799*1202 大小
# 保证resnet 提取固定大小的feature map
images, targets = self.transform(images, targets)
# 2. 骨干网提取特征,默认实现是 resnet
features = self.backbone(images.tensors)
# 3. 特征图输入 rpn 网络输出锚框建议
proposals, proposal_losses = self.rpn(images, features, targets)
# 4. roi_head 处理锚框和特征图,得到最终的实例分割的锚框坐标boxes
# 每个物体分类结果 labels, 像素分割结果mask 28*28
detections, detector_losses = self.roi_heads(features, proposals, images.image_sizes, targets)
# 5. 转换成原图片尺寸,mask 转换成原图对应每个像素归属物体,box 转换成原图对应的坐标
detections = self.transform.postprocess(detections, images.image_sizes, original_image_sizes)
rpn 主体结构:



首先对特征图进行3*3卷积运算,计算k个(每个像素点提出的k个锚框)预测分类(是否是实例)和锚框偏移量


faster rcnn 原论文中是对特征图每一个位置生抽3*3个锚框,对应3中尺寸3种宽高比,pytorch官方库中使用的多尺度特征图,在特征图每个位置生成3个锚框,生成锚框后对概率较小的进行过滤,最终得到锚框坐标建议,如本实例运行后得到1000个锚框坐标