【Tensorflow】Faster RCNN Inference 수행하기 + GPU 자원 주의사항
Tensorflow 1.3. Faster RCNN API로 Object Detection 수행하기
/DLCV/Detection/fast_rcnn/Tensorflow_FasterRCNN_ObjectDetection.ipynb 참조
0. Tensorflow inferece 과정
- 이미지 read 하기
- .pb 파일만 읽어오기 - tf.gfile.FastGFile, graph_def = tf.GraphDef() 사용
- 세션을 시작한다 - with tf.Session() as sess:
- 세션 내부에서 graph를 import한다 - tf.import_graph_def(graph_def, name=’’)
- sess.run으로 forward처리하고, 원하는 정보를 뽑아온다. out = sess.run
- 객체 하나하나에 대한 정보를 추출하여 시각화 한다 - for i in range(int(out[0][0])):
1. GPU 자원 주의사항
해결방안
- $ nvidia-smi 를 주기적으로 확인하고 학습을 시작하기 전에,
- Jupyter - Running - 안쓰는 Notebook Shutdown 하기
- Notebook - Restart & Clear output 하기
- nvidia-smi 에서 나오는 process 중 GPU많이 사용하는 프로세서 Kill -9 <Processer ID>하기
- Jupyter Notebook 을 Terminal에서 kill하고 다시 키기 (~ /start_nb.sh)
2. tensorflow로 Object Detection 수행하기
1. 단일 이미지 Object Detection
import cv2
import matplotlib.pyplot as plt
%matplotlib inline
img = cv2.imread('../../data/image/john_wick01.jpg')
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
print('image shape:', img.shape)
plt.figure(figsize=(12, 12))
plt.imshow(img_rgb)
image shape: (450, 814, 3)
labels_to_names = {1:'person',2:'bicycle',3:'car',4:'motorcycle',5:'airplane',6:'bus',7:'train',8:'truck',9:'boat',10:'traffic light',
11:'fire hydrant',12:'street sign',13:'stop sign',14:'parking meter',15:'bench',16:'bird',17:'cat',18:'dog',19:'horse',20:'sheep',
21:'cow',22:'elephant',23:'bear',24:'zebra',25:'giraffe',26:'hat',27:'backpack',28:'umbrella',29:'shoe',30:'eye glasses',
31:'handbag',32:'tie',33:'suitcase',34:'frisbee',35:'skis',36:'snowboard',37:'sports ball',38:'kite',39:'baseball bat',40:'baseball glove',
41:'skateboard',42:'surfboard',43:'tennis racket',44:'bottle',45:'plate',46:'wine glass',47:'cup',48:'fork',49:'knife',50:'spoon',
51:'bowl',52:'banana',53:'apple',54:'sandwich',55:'orange',56:'broccoli',57:'carrot',58:'hot dog',59:'pizza',60:'donut',
61:'cake',62:'chair',63:'couch',64:'potted plant',65:'bed',66:'mirror',67:'dining table',68:'window',69:'desk',70:'toilet',
71:'door',72:'tv',73:'laptop',74:'mouse',75:'remote',76:'keyboard',77:'cell phone',78:'microwave',79:'oven',80:'toaster',
81:'sink',82:'refrigerator',83:'blender',84:'book',85:'clock',86:'vase',87:'scissors',88:'teddy bear',89:'hair drier',90:'toothbrush',
91:'hair brush'}
# !mkdir pretrained; cd pretrained
# !wget http://download.tensorflow.org/models/object_detection/faster_rcnn_resnet50_coco_2018_01_28.tar.gz
# !wget https://raw.githubusercontent.com/opencv/opencv_extra/master/testdata/dnn/faster_rcnn_resnet50_coco_2018_01_28.pbtxt
# cd faster_rcnn_resnet50_coco_2018_01_28; mv faster_rcnn_resnet50_coco_2018_01_28.pbtxt graph.pbtxt
import numpy as np
import tensorflow as tf
import cv2
import time
import matplotlib.pyplot as plt
%matplotlib inline
#inference graph를 읽음. .
with tf.gfile.FastGFile('./pretrained/faster_rcnn_resnet50_coco_2018_01_28/frozen_inference_graph.pb', 'rb') as f:
graph_def = tf.GraphDef()
graph_def.ParseFromString(f.read())
with tf.Session() as sess:
# Session 시작하고 inference graph 모델 로딩
sess.graph.as_default()
tf.import_graph_def(graph_def, name='')
# 여기서 session 내부에 graph가 들어가게 된다. 후에 sess변수를 사용하면서 grpah 정보를 가져올 수 있다.
# 입력 이미지 생성 및 BGR을 RGB로 변경
img = cv2.imread('../../data/image/beatles01.jpg')
draw_img = img.copy()
rows = img.shape[0]
cols = img.shape[1]
input_img = img[:, :, [2, 1, 0]] # BGR -> RGB
start = time.time()
# Object Detection 수행.
# run - graph.get을 통해서 내가 가져오고 싶은 것을 인자로 적어놓는다. 순서대로 [객체수, 신뢰도, Box위치, Class]
out = sess.run([sess.graph.get_tensor_by_name('num_detections:0'),
sess.graph.get_tensor_by_name('detection_scores:0'),
sess.graph.get_tensor_by_name('detection_boxes:0'),
sess.graph.get_tensor_by_name('detection_classes:0')],
feed_dict={'image_tensor:0': input_img.reshape(1, input_img.shape[0], input_img.shape[1], 3) } ) # 이미지 여러게
print('type of out:', type(out), 'length of out:',len(out)) # list(4) = [객체수, 신뢰도, Box위치, Class]
print(out)
green_color=(0, 255, 0)
red_color=(0, 0, 255)
# Bounding Box 시각화
num_detections = int(out[0][0])
for i in range(num_detections):
classId = int(out[3][0][i])
score = float(out[1][0][i])
bbox = [float(v) for v in out[2][0][i]]
if score > 0.5:
left = bbox[1] * cols
top = bbox[0] * rows
right = bbox[3] * cols
bottom = bbox[2] * rows
cv2.rectangle(draw_img, (int(left), int(top)), (int(right), int(bottom)), green_color, thickness=2)
caption = "{}: {:.4f}".format(labels_to_names[classId], score)
print(caption)
cv2.putText(draw_img, caption, (int(left), int(top - 5)), cv2.FONT_HERSHEY_SIMPLEX, 0.4, red_color, 1)
print('Detection 수행시간:',round(time.time() - start, 2),"초")
img_rgb = cv2.cvtColor(draw_img, cv2.COLOR_BGR2RGB)
plt.figure(figsize=(12, 12))
plt.imshow(img_rgb)
# NMS 필터링에서 약간의 문제가 있는듯 하다... 약간 결과가 꺼림직하다.
type of out: <class 'list'> length of out: 4
밑의 내용 : [객체수, 신뢰도, Box위치, Class]
[array([19.], dtype=float32),
array([[0.99974984, 0.99930644, 0.9980475 , 0.9970795 , 0.9222008 ,
0.8515703 , 0.8055376 , 0.7321974 , 0.7169089 , 0.6350252 ,
0.6057731 , 0.5482028 , 0.51252437, 0.46408176, 0.43892667,
0.41287616, 0.4075464 , 0.39610404, 0.3171757 , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
.
...
.
.
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ]],
dtype=float32),
array([[[0.40270284, 0.2723695 , 0.8693631 , 0.46764165],
[0.40439418, 0.06080557, 0.88185936, 0.24013077],
[0.40899867, 0.68438506, 0.9282361 , 0.9033634 ],
[0.42774147, 0.4751278 , 0.8887425 , 0.7367553 ],
[0.3681334 , 0.5855469 , 0.41420895, 0.6274197 ],
[0.36090973, 0.7612593 , 0.46531847, 0.78825235],
[0.35362682, 0.5422665 , 0.3779468 , 0.56790847],
[0.35872525, 0.47497243, 0.37832502, 0.4952262 ],
[0.39067298, 0.17564818, 0.54261357, 0.31135702],
[0.3596046 , 0.6206162 , 0.4659364 , 0.7180736 ],
[0.36052787, 0.7542875 , 0.45949724, 0.7803741 ],
[0.35740715, 0.55126834, 0.38326728, 0.57657194],
[0.36718863, 0.5769864 , 0.40654665, 0.61239254],
[0.35574582, 0.4798463 , 0.37322614, 0.4985193 ],
[0.35036406, 0.5329462 , 0.3708444 , 0.5514975 ],
[0.367587 , 0.39456058, 0.41583234, 0.43441534],
[0.3562084 , 0.47724184, 0.37217227, 0.49240994],
[0.36195153, 0.6252996 , 0.46575055, 0.72400415],
[0.36365557, 0.5674811 , 0.39475283, 0.59136254],
[0. , 0. , 0. , 0. ],
[0. , 0. , 0. , 0. ],
[0. , 0. , 0. , 0. ],
[0. , 0. , 0. , 0. ],
[0. , 0. , 0. , 0. ],
[0. , 0. , 0. , 0. ],
...
.
.
...
.
[0. , 0. , 0. , 0. ],
[0. , 0. , 0. , 0. ]]], dtype=float32),
array([[1., 1., 1., 1., 3., 1., 3., 3., 3., 8., 1., 3., 3., 3., 3., 3.,
3., 3., 3., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
1., 1., 1., 1.]], dtype=float32)]
person: 0.9997
person: 0.9993
person: 0.9980
person: 0.9971
car: 0.9222
person: 0.8516
car: 0.8055
car: 0.7322
car: 0.7169
truck: 0.6350
person: 0.6058
car: 0.5482
car: 0.5125
Detection 수행시간: 12.99 초
2. 위의 과정 함수로 def하고 활용해보기
def get_tensor_detected_image(sess, img_array, use_copied_array):
rows = img_array.shape[0]
cols = img_array.shape[1]
if use_copied_array:
draw_img_array = img_array.copy()
else:
draw_img_array = img_array
input_img = img_array[:, :, [2, 1, 0]] # BGR2RGB
start = time.time()
# Object Detection 수행.
out = sess.run([sess.graph.get_tensor_by_name('num_detections:0'),
sess.graph.get_tensor_by_name('detection_scores:0'),
sess.graph.get_tensor_by_name('detection_boxes:0'),
sess.graph.get_tensor_by_name('detection_classes:0')],
feed_dict={'image_tensor:0': input_img.reshape(1, input_img.shape[0], input_img.shape[1], 3)})
green_color=(0, 255, 0)
red_color=(0, 0, 255)
# Bounding Box 시각화
num_detections = int(out[0][0])
for i in range(num_detections):
classId = int(out[3][0][i])
score = float(out[1][0][i])
bbox = [float(v) for v in out[2][0][i]]
if score > 0.5:
left = bbox[1] * cols
top = bbox[0] * rows
right = bbox[3] * cols
bottom = bbox[2] * rows
cv2.rectangle(draw_img_array, (int(left), int(top)), (int(right), int(bottom)), green_color, thickness=2)
caption = "{}: {:.4f}".format(labels_to_names[classId], score)
cv2.putText(draw_img_array, caption, (int(left), int(top - 5)), cv2.FONT_HERSHEY_SIMPLEX, 0.4, red_color, 1)
#print(caption)
print('Detection 수행시간:',round(time.time() - start, 2),"초")
return draw_img_array
# end of function
방금 위에서 만든 함수 사용해서 Image Object Detection 수행하기
with tf.gfile.FastGFile('./pretrained/faster_rcnn_resnet50_coco_2018_01_28/frozen_inference_graph.pb', 'rb') as f:
graph_def = tf.GraphDef()
graph_def.ParseFromString(f.read())
with tf.Session() as sess:
# Session 시작하고 inference graph 모델 로딩
sess.graph.as_default()
tf.import_graph_def(graph_def, name='')
# 입력 이미지 생성 및 BGR을 RGB로 변경
img = cv2.imread('../../data/image/john_wick01.jpg')
draw_img = get_tensor_detected_image(sess, img, True)
img_rgb = cv2.cvtColor(draw_img, cv2.COLOR_BGR2RGB)
plt.figure(figsize=(12, 12))
plt.imshow(img_rgb)
Detection 수행시간: 15.58 초
3. 위에서 만든 함수로 Video Object Detection 수행
video_input_path = '../../data/video/John_Wick_small.mp4'
# linux에서 video output의 확장자는 반드시 avi 로 설정 필요.
video_output_path = '../../data/output/John_Wick_small_tensor01.avi'
cap = cv2.VideoCapture(video_input_path)
codec = cv2.VideoWriter_fourcc(*'XVID')
vid_size = (round(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),round(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))
vid_fps = cap.get(cv2.CAP_PROP_FPS)
vid_writer = cv2.VideoWriter(video_output_path, codec, vid_fps, vid_size)
frame_cnt = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
print('총 Frame 갯수:', frame_cnt)
green_color=(0, 255, 0)
red_color=(0, 0, 255)
#inference graph를 읽음. .
with tf.gfile.FastGFile('./pretrained/faster_rcnn_resnet50_coco_2018_01_28/frozen_inference_graph.pb', 'rb') as f:
graph_def = tf.GraphDef()
graph_def.ParseFromString(f.read())
with tf.Session() as sess:
# Session 시작하고 inference graph 모델 로딩
sess.graph.as_default()
tf.import_graph_def(graph_def, name='')
while True:
hasFrame, img_frame = cap.read()
if not hasFrame:
print('더 이상 처리할 frame이 없습니다.')
break
draw_img_frame = get_tensor_detected_image(sess, img_frame, False)
vid_writer.write(draw_img_frame)
# end of while loop
vid_writer.release()
cap.release()
!gsutil cp ../../data/output/John_Wick_small_tensor01.avi gs://my_bucket_dlcv/data/output/John_Wick_small_tensor01.avi