【Paper Code】 feature pyramid networks - Code review
(논문리뷰) feature pyramid networks - Code review
코드를 설명하는 그림
코드
'''
FPN in PyTorch.
See the paper "Feature Pyramid Networks for Object Detection" for more details.
'''
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
# 4. block == Bottlenect (for Bottom-up)
class Bottleneck(nn.Module):
expansion = 4
# layer1 : (64, 256), (64, 64) , (1 , 1) // layer2 : (256, 512), (128, 128) , (2, 1) // layer3 : (512, 1024), (256, 256) , (2, 1)
def __init__(self, in_planes, planes, stride=1):
super(Bottleneck, self).__init__()
self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
self.bn1 = nn.BatchNorm2d(planes)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes)
self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False)
self.bn3 = nn.BatchNorm2d(self.expansion*planes)
# 5. shortcut 정의 하기 = Resnet 을 한다.
self.shortcut = nn.Sequential()
if stride != 1 or in_planes != self.expansion*planes: # layer1,2,3,4의 for 2개에서 모두 shortcut이 이루어진다.
self.shortcut = nn.Sequential(
nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(self.expansion*planes)
)
def forward(self, x):
out = F.relu(self.bn1(self.conv1(x))) # x's channel-> layer1 : 64 // layer2 : 128 // layer3 : 256 // layer4 : 512
out = F.relu(self.bn2(self.conv2(out)))
out = self.bn3(self.conv3(out)) # out's channel-> layer1 : 256 // layer2 : 512 // layer3 : 1024 // layer4 : 2048
out += self.shortcut(x) # bottom-up에서 Resnet의 구조가 작동하는 부분. x와 out의 channel이 다른것은 어떻게 처리하는지는 위에 정의 되어 있다.
out = F.relu(out)
return out
class FPN(nn.Module):
def __init__(self, block, num_blocks):
super(FPN, self).__init__()
self.in_planes = 64
# 1. input channel 3 (RGB) // output channel 64 // kernel size 7*7 // stride가 1이면 같은 size output이지만, stride = 2여서 output == input//2 size
self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
self.bn1 = nn.BatchNorm2d(64)
# 2. Bottom-up layers. 4개의 layer를 통과하고 나오는 결과 : channel 2048, width와 high는 대략 /(4(#1)*8(layer2,3,4)) 가 된다.. 하나의 _make_layer에 의해서 2로 나눠지므로.
self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
# Top layer
self.toplayer = nn.Conv2d(2048, 256, kernel_size=1, stride=1, padding=0) # Reduce channels
# Smooth layers
self.smooth1 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)
self.smooth2 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)
self.smooth3 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)
# Lateral layers
self.latlayer1 = nn.Conv2d(1024, 256, kernel_size=1, stride=1, padding=0)
self.latlayer2 = nn.Conv2d( 512, 256, kernel_size=1, stride=1, padding=0)
self.latlayer3 = nn.Conv2d( 256, 256, kernel_size=1, stride=1, padding=0)
# 3. call block (for Bottom-up)
def _make_layer(self, block, planes, num_blocks, stride):
strides = [stride] + [1]*(num_blocks-1) # [1or2] + [1] = [1or2 , 1]
layers = []
for stride in strides:
# layer1 : (64, 256), (64, 64) , (1 , 1) // layer2 : (256, 512), (128, 128) , (2, 1) // layer3 : (512, 1024), (256, 256) , (2, 1) // in_plain, planes, sride
layers.append(block(self.in_planes, planes, stride))
self.in_planes = planes * block.expansion
return nn.Sequential(*layers)
def _upsample_add(self, x, y):
_,_,H,W = y.size()
return F.upsample(x, size=(H,W), mode='bilinear') + y
# upsampling mode 설명 : https://pytorch.org/docs/0.3.1/_modules/torch/nn/modules/upsampling.html#Upsample
def forward(self, x):
# Bottom-up
c1 = F.relu(self.bn1(self.conv1(x)))
c1 = F.max_pool2d(c1, kernel_size=3, stride=2, padding=1) # 1. 위의 7*7 conv와 max_pool에 의해서 w/h가 /4 처리가 된다.
c2 = self.layer1(c1) # c2 size = torch.Size([1, 256, 150, 225])
c3 = self.layer2(c2) # c3 size = torch.Size([1, 512, 75, 113])
c4 = self.layer3(c3) # c4 size = torch.Size([1, 1024, 38, 57])
c5 = self.layer4(c4) # c5 size = torch.Size([1, 2048, 19, 29])
# Top-down
p5 = self.toplayer(c5)
p4 = self._upsample_add(p5, self.latlayer1(c4))
p3 = self._upsample_add(p4, self.latlayer2(c3))
p2 = self._upsample_add(p3, self.latlayer3(c2))
# Smooth
p4 = self.smooth1(p4)
p3 = self.smooth2(p3)
p2 = self.smooth3(p2)
return p2, p3, p4, p5
def FPN101():
# return FPN(Bottleneck, [2,4,23,3])
return FPN(Bottleneck, [2,2,2,2])
def test():
net = FPN101()
fms = net(Variable( torch.randn(1,3,600,900) )) # from torch.autograd import Variable -> dL/dw 처럼 w에 관하여 미분한 값을 구할때 사용. 여기서 randn이 w이다.
for fm in fms:
print(fm.size())
test()
"""
input : torch.size([1, 3, 600, 900])
fms[0] = torch.Size([1, 256, 150, 225])
fms[1] = torch.Size([1, 256, 75, 113])
fms[2] = torch.Size([1, 256, 38, 57])
fms[3] = torch.Size([1, 256, 19, 29])
"""