Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

how can i get the fps=120 on nvidia tx2? please help me #72

Open
oujieww opened this issue Mar 30, 2019 · 1 comment
Open

how can i get the fps=120 on nvidia tx2? please help me #72

oujieww opened this issue Mar 30, 2019 · 1 comment

Comments

@oujieww
Copy link

oujieww commented Mar 30, 2019

i write a webcam demo for this ,but i can not get 120fps as reported on paper, anyone can help me ?
('cap read frame time : ', 0.03454303741455078)
('detect time: ', 0.1441190242767334)

this is my code
import numpy as np
import matplotlib.pyplot as plt
import time

import os
import caffe
import cv2
from google.protobuf import text_format
from caffe.proto import caffe_pb2
caffe.set_mode_gpu()

load PASCAL VOC labels

labelmap_file = 'model/voc/labelmap_voc.prototxt'
file = open(labelmap_file, 'r')
labelmap = caffe_pb2.LabelMap()
text_format.Merge(str(file.read()), labelmap)

def get_labelname(labelmap, labels):
num_labels = len(labelmap.item)
labelnames = []
if type(labels) is not list:
labels = [labels]
for label in labels:
found = False
for i in xrange(0, num_labels):
if label == labelmap.item[i].label:
found = True
labelnames.append(labelmap.item[i].display_name)
break
assert found == True
return labelnames

model_def = 'model/voc/deploy_merged.prototxt'
model_weights = 'model/voc/pelee_merged.caffemodel'

net = caffe.Net(model_def, # defines the structure of the model
model_weights, # contains the trained weights
caffe.TEST) # use test mode (e.g., don't perform dropout)

input preprocessing: 'data' is the name of the input blob == net.inputs[0]

transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape})
transformer.set_transpose('data', (2, 0, 1))
transformer.set_input_scale('data', 0.017)
transformer.set_mean('data', np.array([103.94,116.78,123.68])) # mean pixel
transformer.set_raw_scale('data', 255) # the reference model operates on images in [0,255] range instead of [0,1]
transformer.set_channel_swap('data', (2,1,0)) # the reference model has channels in BGR order instead of RGB
font = cv2.FONT_HERSHEY_SIMPLEX
def open_cam_onboard(width, height):
# On versions of L4T previous to L4T 28.1, flip-method=2
# Use Jetson onboard camera
gst_str = ("nvcamerasrc ! "
"video/x-raw(memory:NVMM), width=(int)800, height=(int)600, format=(string)I420, framerate=(fraction)5/1 ! "
"nvvidconv ! video/x-raw, width=(int){}, height=(int){}, format=(string)BGRx ! "
"videoconvert ! appsink").format(width, height)
return cv2.VideoCapture(gst_str, cv2.CAP_GSTREAMER)
def do_detect(image,img2):
transformed_image = transformer.preprocess('data', image)
net.blobs['data'].data[...] = transformed_image

# Forward pass.
detections = net.forward()['detection_out']

# Parse the outputs.
det_label = detections[0,0,:,1]
det_conf = detections[0,0,:,2]
det_xmin = detections[0,0,:,3]
det_ymin = detections[0,0,:,4]
det_xmax = detections[0,0,:,5]
det_ymax = detections[0,0,:,6]

# Get detections with confidence higher than 0.4.
top_indices = [i for i, conf in enumerate(det_conf) if conf >= 0.4]

top_conf = det_conf[top_indices]
top_label_indices = det_label[top_indices].tolist()
top_labels = get_labelname(labelmap, top_label_indices)
top_xmin = det_xmin[top_indices]
top_ymin = det_ymin[top_indices]
top_xmax = det_xmax[top_indices]
top_ymax = det_ymax[top_indices]

for i in xrange(top_conf.shape[0]):
    xmin = int(round(top_xmin[i] * image.shape[1]))
    ymin = int(round(top_ymin[i] * image.shape[0]))
    xmax = int(round(top_xmax[i] * image.shape[1]))
    ymax = int(round(top_ymax[i] * image.shape[0]))
    score = top_conf[i]
    label_name = top_labels[i]
    img2=cv2.rectangle(img2,(xmin,ymin),(xmax,ymax),(0,255,0))
    img2=cv2.putText(img2,label_name+':'+str(score),(xmin,ymin-5),font,2,(0,0,255),1)
return img2

set net to batch size of 1

image_resize = 304
net.blobs['data'].reshape(1,3,image_resize,image_resize)

capture = open_cam_onboard(800, 600)
while(True):
time0=time.time()
ret, frame = capture.read()
frame1=cv2.cvtColor(frame,cv2.COLOR_BGR2RGB)
frame1=frame1/255.
print("cap read frame time : ",time.time()-time0)
time1=time.time()
res=do_detect(frame1,frame)
print("detect time: ",time.time()-time1)
cv2.imshow('frame', res)
if cv2.waitKey(1) == ord('q'):
break

@sparshgarg23
Copy link

sparshgarg23 commented Nov 19, 2021

What is the fps you are getting.
In the paper it's mentioned

The speed is calculated by the average time of processing 100 pictures with 1 batch size.
We run 100 picture processing for 10 times separately and average the time.
the paper also uses FP16 instead of FP 32 to achieve the desired FPS
Would be great if the author could confirm this

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants