faster rcnn源码理解(二)之AnchorTargetLayer(网络中的rpn_data)

faster用python版本的 https://github.com/rbgirshick/py-faster-rcnn

AnchorTargetLayer源码在 https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/rpn/anchor_target_layer.py

源码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
# --------------------------------------------------------
# Faster R-CNN
# Copyright (c) 2015 Microsoft
# Licensed under The MIT License [see LICENSE for details]
# Written by Ross Girshick and Sean Bell
# --------------------------------------------------------

import os
import caffe
import yaml
from fast_rcnn.config import cfg
import numpy as np
import numpy.random as npr
from generate_anchors import generate_anchors
from utils.cython_bbox import bbox_overlaps
from fast_rcnn.bbox_transform import bbox_transform

DEBUG = False

class AnchorTargetLayer(caffe.Layer):
"""
Assign anchors to ground-truth targets. Produces anchor classification
labels and bounding-box regression targets.
"""

def setup(self, bottom, top):
layer_params = yaml.load(self.param_str_)
anchor_scales = layer_params.get('scales', (8, 16, 32))
self._anchors = generate_anchors(scales=np.array(anchor_scales))#九个anchor的w h x_cstr y_cstr,对原始的wh做横向纵向变化,并放大缩小得到九个
self._num_anchors = self._anchors.shape[0]<span style="font-family: Arial, Helvetica, sans-serif;">#anchor的个数</span>
self._feat_stride = layer_params['feat_stride']#网络中参数16

if DEBUG:
print 'anchors:'
print self._anchors
print 'anchor shapes:'
print np.hstack((
self._anchors[:, 2::4] - self._anchors[:, 0::4],
self._anchors[:, 3::4] - self._anchors[:, 1::4],
))
self._counts = cfg.EPS
self._sums = np.zeros((1, 4))
self._squared_sums = np.zeros((1, 4))
self._fg_sum = 0
self._bg_sum = 0
self._count = 0

# allow boxes to sit over the edge by a small amount
self._allowed_border = layer_params.get('allowed_border', 0)
#bottom 长度为4;bottom[0],map;bottom[1],boxes,labels;bottom[2],im_fo;bottom[3],图片数据
height, width = bottom[0].data.shape[-2:]
if DEBUG:
print 'AnchorTargetLayer: height', height, 'width', width

A = self._num_anchors#anchor的个数
# labels
top[0].reshape(1, 1, A * height, width)
# bbox_targets
top[1].reshape(1, A * 4, height, width)
# bbox_inside_weights
top[2].reshape(1, A * 4, height, width)
# bbox_outside_weights
top[3].reshape(1, A * 4, height, width)

def forward(self, bottom, top):
# Algorithm:
#
# for each (H, W) location i
# generate 9 anchor boxes centered on cell i
# apply predicted bbox deltas at cell i to each of the 9 anchors
# filter out-of-image anchors
# measure GT overlap

assert bottom[0].data.shape[0] == 1, \
'Only single item batches are supported'

# map of shape (..., H, W)
height, width = bottom[0].data.shape[-2:]
# GT boxes (x1, y1, x2, y2, label)
gt_boxes = bottom[1].data#gt_boxes:长度不定
# im_info
im_info = bottom[2].data[0, :]

if DEBUG:
print ''
print 'im_size: ({}, {})'.format(im_info[0], im_info[1])
print 'scale: {}'.format(im_info[2])
print 'height, width: ({}, {})'.format(height, width)
print 'rpn: gt_boxes.shape', gt_boxes.shape
print 'rpn: gt_boxes', gt_boxes

# 1. Generate proposals from bbox deltas and shifted anchors
shift_x = np.arange(0, width) * self._feat_stride
shift_y = np.arange(0, height) * self._feat_stride
shift_x, shift_y = np.meshgrid(shift_x, shift_y)
shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
shift_x.ravel(), shift_y.ravel())).transpose()
# add A anchors (1, A, 4) to
# cell K shifts (K, 1, 4) to get
# shift anchors (K, A, 4)
# reshape to (K*A, 4) shifted anchors
A = self._num_anchors
K = shifts.shape[0]
all_anchors = (self._anchors.reshape((1, A, 4)) +
shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
all_anchors = all_anchors.reshape((K * A, 4))
total_anchors = int(K * A)#K*A,所有anchors个数,包括越界的
#K: width*height
#A: 9
# only keep anchors inside the image
inds_inside = np.where(
(all_anchors[:, 0] >= -self._allowed_border) &
(all_anchors[:, 1] >= -self._allowed_border) &
(all_anchors[:, 2] < im_info[1] + self._allowed_border) & # width
(all_anchors[:, 3] < im_info[0] + self._allowed_border) # height
)[0]#没有过界的anchors索引

if DEBUG:
print 'total_anchors', total_anchors
print 'inds_inside', len(inds_inside)

# keep only inside anchors
anchors = all_anchors[inds_inside, :]#没有过界的anchors
if DEBUG:
print 'anchors.shape', anchors.shape

# label: 1 is positive, 0 is negative, -1 is dont care
labels = np.empty((len(inds_inside), ), dtype=np.float32)
labels.fill(-1)

# overlaps between the anchors and the gt boxes
# overlaps (ex, gt)
overlaps = bbox_overlaps(
np.ascontiguousarray(anchors, dtype=np.float),
np.ascontiguousarray(gt_boxes, dtype=np.float))
argmax_overlaps = overlaps.argmax(axis=1)#overlaps每行最大值索引
max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]
gt_argmax_overlaps = overlaps.argmax(axis=0)
gt_max_overlaps = overlaps[gt_argmax_overlaps,
np.arange(overlaps.shape[1])]
gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]

if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:
# assign bg labels first so that positive labels can clobber them
labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0

# fg label: for each gt, anchor with highest overlap
labels[gt_argmax_overlaps] = 1

# fg label: above threshold IOU
labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1

if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
# assign bg labels last so that negative labels can clobber positives
labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0

# subsample positive labels if we have too many
num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE)
fg_inds = np.where(labels == 1)[0]
if len(fg_inds) > num_fg:
disable_inds = npr.choice(
fg_inds, size=(len(fg_inds) - num_fg), replace=False)
labels[disable_inds] = -1

# subsample negative labels if we have too many
num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1)
bg_inds = np.where(labels == 0)[0]
if len(bg_inds) > num_bg:
disable_inds = npr.choice(
bg_inds, size=(len(bg_inds) - num_bg), replace=False)
labels[disable_inds] = -1
#print "was %s inds, disabling %s, now %s inds" % (
#len(bg_inds), len(disable_inds), np.sum(labels == 0))

bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32)
bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :])

bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
bbox_inside_weights[labels == 1, :] = np.array(cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS)

bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0:
# uniform weighting of examples (given non-uniform sampling)
num_examples = np.sum(labels >= 0)
positive_weights = np.ones((1, 4)) * 1.0 / num_examples
negative_weights = np.ones((1, 4)) * 1.0 / num_examples
else:
assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) &
(cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1))
positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT /
np.sum(labels == 1))
negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) /
np.sum(labels == 0))
bbox_outside_weights[labels == 1, :] = positive_weights
bbox_outside_weights[labels == 0, :] = negative_weights

if DEBUG:
self._sums += bbox_targets[labels == 1, :].sum(axis=0)
self._squared_sums += (bbox_targets[labels == 1, :] ** 2).sum(axis=0)
self._counts += np.sum(labels == 1)
means = self._sums / self._counts
stds = np.sqrt(self._squared_sums / self._counts - means ** 2)
print 'means:'
print means
print 'stdevs:'
print stds

# map up to original set of anchors
labels = _unmap(labels, total_anchors, inds_inside, fill=-1)
bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)
bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0)
bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0)

if DEBUG:
print 'rpn: max max_overlap', np.max(max_overlaps)
print 'rpn: num_positive', np.sum(labels == 1)
print 'rpn: num_negative', np.sum(labels == 0)
self._fg_sum += np.sum(labels == 1)
self._bg_sum += np.sum(labels == 0)
self._count += 1
print 'rpn: num_positive avg', self._fg_sum / self._count
print 'rpn: num_negative avg', self._bg_sum / self._count

# labels
labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2)
labels = labels.reshape((1, 1, A * height, width))
top[0].reshape(*labels.shape)
top[0].data[...] = labels

# bbox_targets
bbox_targets = bbox_targets \
.reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2)
top[1].reshape(*bbox_targets.shape)
top[1].data[...] = bbox_targets

# bbox_inside_weights
bbox_inside_weights = bbox_inside_weights \
.reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2)
assert bbox_inside_weights.shape[2] == height
assert bbox_inside_weights.shape[3] == width
top[2].reshape(*bbox_inside_weights.shape)
top[2].data[...] = bbox_inside_weights

# bbox_outside_weights
bbox_outside_weights = bbox_outside_weights \
.reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2)
assert bbox_outside_weights.shape[2] == height
assert bbox_outside_weights.shape[3] == width
top[3].reshape(*bbox_outside_weights.shape)
top[3].data[...] = bbox_outside_weights

def backward(self, top, propagate_down, bottom):
"""This layer does not propagate gradients."""
pass

def reshape(self, bottom, top):
"""Reshaping happens during the call to forward."""
pass


def _unmap(data, count, inds, fill=0):
""" Unmap a subset of item (data) back to the original set of items (of
size count) """
if len(data.shape) == 1:
ret = np.empty((count, ), dtype=np.float32)
ret.fill(fill)
ret[inds] = data
else:
ret = np.empty((count, ) + data.shape[1:], dtype=np.float32)
ret.fill(fill)
ret[inds, :] = data
return ret


def _compute_targets(ex_rois, gt_rois):
"""Compute bounding-box regression targets for an image."""

assert ex_rois.shape[0] == gt_rois.shape[0]
assert ex_rois.shape[1] == 4
assert gt_rois.shape[1] == 5

return bbox_transform(ex_rois, gt_rois[:, :4]).astype(np.float32, copy=False)

代码讲解

rpn-data是AnchorTargetLayer
bottom长度为4;bottom[0],map;bottom[1],boxes,labels;bottom[2],im_fo;bottom[3],图片数据
self._feat_stride:网络中参数16
self._anchors:九个anchor的w h x_cstr y_cstr,对原始的wh做横向纵向变化,并放大缩小的到九个
self._num_anchors:anchor的个数

inds_inside:没有过界的anchors索引
anchors:没有过界的anchors
argmax_overlaps:overlaps每行最大值索引
total_anchors: K × A,所有anchors个数,包括越界的
K: width × height
A: 9

gt_boxes:长度不定

bbox_overlaps: 返回overlaps: (len(inds_inside)* len(gt_boxes))

论文笔记
我们分配正标签给两类anchor:
(i)与某个ground truth(GT)包围盒有最高的IoU(Intersection-over-Union,交集并集之比)重叠的anchor(也许不到0.7)
(ii)与任意GT包围盒有大于0.7的IoU交叠的anchor

labels:0,bg; 1,fg; -1, on care,(len(inds_inside)); over_laps列最大值对应行坐标=1;over_laps行最大值 > 0.7,行=1; over_laps行最大值 < 0.3,行=0
正样本数量由他们控制:cfg.TRAIN.RPN_FG_FRACTION × cfg.TRAIN.RPN_BATCHSIZE(128),小于等于
负样本数量。。。。。:cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1)
cfg.TRAIN.RPN_BATCHSIZE: 256,最终输出proposal数量控制
多的proposal被随机搞成-1了。。。。。。随机

bbox_inside_weights: label等于1的行,它的值等于cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS(1.0);其他等于0;(len(inds_inside), 4);相当于损失函数中的pi*
cfg.TRAIN.RPN_POSITIVE_WEIGHT: -1.0
bbox_outside_weights:fg,bg=np.ones((1, 4)) × 1.0 / sum(fg+bg),其他为0;(len(inds_inside), 4)

_unmap: 建立一个total_anchors × 第一个参数列的数组;全用fill填充;再把inds_inside对应的行用第一个参数对应的行填充

文章作者:Lily

原始链接:/2018/04/08/faster%20rcnn%E6%BA%90%E7%A0%81%E7%90%86%E8%A7%A3%EF%BC%88%E4%BA%8C%EF%BC%89%E4%B9%8BAnchorTargetLayer%EF%BC%88%E7%BD%91%E7%BB%9C%E4%B8%AD%E7%9A%84rpn_data%EF%BC%89/

版权说明:转载请保留原文链接及作者。