how can i get the fps=120 on nvidia tx2? please help me #72

oujieww · 2019-03-30T14:14:55Z

i write a webcam demo for this ,but i can not get 120fps as reported on paper, anyone can help me ?
('cap read frame time : ', 0.03454303741455078)
('detect time: ', 0.1441190242767334)

this is my code
import numpy as np
import matplotlib.pyplot as plt
import time

import os
import caffe
import cv2
from google.protobuf import text_format
from caffe.proto import caffe_pb2
caffe.set_mode_gpu()

load PASCAL VOC labels

labelmap_file = 'model/voc/labelmap_voc.prototxt'
file = open(labelmap_file, 'r')
labelmap = caffe_pb2.LabelMap()
text_format.Merge(str(file.read()), labelmap)

def get_labelname(labelmap, labels):
num_labels = len(labelmap.item)
labelnames = []
if type(labels) is not list:
labels = [labels]
for label in labels:
found = False
for i in xrange(0, num_labels):
if label == labelmap.item[i].label:
found = True
labelnames.append(labelmap.item[i].display_name)
break
assert found == True
return labelnames

model_def = 'model/voc/deploy_merged.prototxt'
model_weights = 'model/voc/pelee_merged.caffemodel'

net = caffe.Net(model_def, # defines the structure of the model
model_weights, # contains the trained weights
caffe.TEST) # use test mode (e.g., don't perform dropout)

input preprocessing: 'data' is the name of the input blob == net.inputs[0]

transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape})
transformer.set_transpose('data', (2, 0, 1))
transformer.set_input_scale('data', 0.017)
transformer.set_mean('data', np.array([103.94,116.78,123.68])) # mean pixel
transformer.set_raw_scale('data', 255) # the reference model operates on images in [0,255] range instead of [0,1]
transformer.set_channel_swap('data', (2,1,0)) # the reference model has channels in BGR order instead of RGB
font = cv2.FONT_HERSHEY_SIMPLEX
def open_cam_onboard(width, height):
# On versions of L4T previous to L4T 28.1, flip-method=2
# Use Jetson onboard camera
gst_str = ("nvcamerasrc ! "
"video/x-raw(memory:NVMM), width=(int)800, height=(int)600, format=(string)I420, framerate=(fraction)5/1 ! "
"nvvidconv ! video/x-raw, width=(int){}, height=(int){}, format=(string)BGRx ! "
"videoconvert ! appsink").format(width, height)
return cv2.VideoCapture(gst_str, cv2.CAP_GSTREAMER)
def do_detect(image,img2):
transformed_image = transformer.preprocess('data', image)
net.blobs['data'].data[...] = transformed_image

# Forward pass.
detections = net.forward()['detection_out']

# Parse the outputs.
det_label = detections[0,0,:,1]
det_conf = detections[0,0,:,2]
det_xmin = detections[0,0,:,3]
det_ymin = detections[0,0,:,4]
det_xmax = detections[0,0,:,5]
det_ymax = detections[0,0,:,6]

# Get detections with confidence higher than 0.4.
top_indices = [i for i, conf in enumerate(det_conf) if conf >= 0.4]

top_conf = det_conf[top_indices]
top_label_indices = det_label[top_indices].tolist()
top_labels = get_labelname(labelmap, top_label_indices)
top_xmin = det_xmin[top_indices]
top_ymin = det_ymin[top_indices]
top_xmax = det_xmax[top_indices]
top_ymax = det_ymax[top_indices]

for i in xrange(top_conf.shape[0]):
    xmin = int(round(top_xmin[i] * image.shape[1]))
    ymin = int(round(top_ymin[i] * image.shape[0]))
    xmax = int(round(top_xmax[i] * image.shape[1]))
    ymax = int(round(top_ymax[i] * image.shape[0]))
    score = top_conf[i]
    label_name = top_labels[i]
    img2=cv2.rectangle(img2,(xmin,ymin),(xmax,ymax),(0,255,0))
    img2=cv2.putText(img2,label_name+':'+str(score),(xmin,ymin-5),font,2,(0,0,255),1)
return img2

set net to batch size of 1

image_resize = 304
net.blobs['data'].reshape(1,3,image_resize,image_resize)

capture = open_cam_onboard(800, 600)
while(True):
time0=time.time()
ret, frame = capture.read()
frame1=cv2.cvtColor(frame,cv2.COLOR_BGR2RGB)
frame1=frame1/255.
print("cap read frame time : ",time.time()-time0)
time1=time.time()
res=do_detect(frame1,frame)
print("detect time: ",time.time()-time1)
cv2.imshow('frame', res)
if cv2.waitKey(1) == ord('q'):
break

The text was updated successfully, but these errors were encountered:

sparshgarg23 · 2021-11-19T15:46:42Z

What is the fps you are getting.
In the paper it's mentioned

The speed is calculated by the average time of processing 100 pictures with 1 batch size.
We run 100 picture processing for 10 times separately and average the time.
the paper also uses FP16 instead of FP 32 to achieve the desired FPS
Would be great if the author could confirm this

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

how can i get the fps=120 on nvidia tx2? please help me #72

how can i get the fps=120 on nvidia tx2? please help me #72

oujieww commented Mar 30, 2019

sparshgarg23 commented Nov 19, 2021 •

edited

Loading

how can i get the fps=120 on nvidia tx2? please help me #72

how can i get the fps=120 on nvidia tx2? please help me #72

Comments

oujieww commented Mar 30, 2019

load PASCAL VOC labels

input preprocessing: 'data' is the name of the input blob == net.inputs[0]

set net to batch size of 1

sparshgarg23 commented Nov 19, 2021 • edited Loading

sparshgarg23 commented Nov 19, 2021 •

edited

Loading