Skip to content

Commit

Permalink
EDGECLOUD-3740 GPU Benchmarking script updates (#251)
Browse files Browse the repository at this point in the history
* EDGECLOUD-3740 GPU Benchmarking script updates

* minor updates

* Avoid session to session stat accumulation. Add simple shell script.
  • Loading branch information
brucearmstrong authored Nov 25, 2020
1 parent 8a1a200 commit cf0dff9
Show file tree
Hide file tree
Showing 13 changed files with 635 additions and 234 deletions.
4 changes: 3 additions & 1 deletion ComputerVisionServer/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,9 @@ python manage.py collectstatic --noinput
cd pytorch_objectdetecttrack/config/
wget http://opencv.facetraining.mobiledgex.net/files/yolov3.weights
cd ../..
uvicorn moedx.asgi:application --host 0.0.0.0 --port 8008
gunicorn moedx.asgi:application --bind 0.0.0.0:8008 -k uvicorn.workers.UvicornWorker
#or
python manage.py runserver 0:8008
```
### How to install OpenPose on a GPU-enabled server
This assumes CUDA and CUDNN are already installed.
Expand Down
123 changes: 78 additions & 45 deletions ComputerVisionServer/moedx/client/multi_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,10 @@
import cv2
import argparse
from threading import Thread
try:
from stats import RunningStats
except Exception as e:
from .stats import RunningStats

util_dir = "../utilities"
sys.path.append(os.path.join(os.path.dirname(__file__), util_dir))
from stats import RunningStats

WEBSOCKET_OPCODE_BINARY = 0x2
PING_INTERVAL = 1 # Seconds
Expand Down Expand Up @@ -74,10 +74,14 @@ class Client:
stats_latency_full_process = RunningStats()
stats_latency_network_only = RunningStats()
stats_server_processing_time = RunningStats()
stats_cpu_utilization = RunningStats()
stats_mem_utilization = RunningStats()
stats_gpu_utilization = RunningStats()
stats_gpu_mem_utilization = RunningStats()
stats_cpu_util = RunningStats()
stats_mem_util = RunningStats()
stats_gpu_util = RunningStats()
stats_gpu_util_max = RunningStats()
stats_gpu_util_avg = RunningStats()
stats_gpu_mem_util = RunningStats()
stats_gpu_mem_util_max = RunningStats()
stats_gpu_mem_util_avg = RunningStats()

def __init__(self, host, port):
# Initialize instance variables.
Expand All @@ -89,10 +93,14 @@ def __init__(self, host, port):
self.stats_latency_full_process = RunningStats()
self.stats_latency_network_only = RunningStats()
self.stats_server_processing_time = RunningStats()
self.stats_cpu_utilization = RunningStats()
self.stats_mem_utilization = RunningStats()
self.stats_gpu_utilization = RunningStats()
self.stats_gpu_mem_utilization = RunningStats()
self.stats_cpu_util = RunningStats()
self.stats_mem_util = RunningStats()
self.stats_gpu_util = RunningStats()
self.stats_gpu_util_max = RunningStats()
self.stats_gpu_util_avg = RunningStats()
self.stats_gpu_mem_util = RunningStats()
self.stats_gpu_mem_util_max = RunningStats()
self.stats_gpu_mem_util_avg = RunningStats()
self.media_file_name = None
self.latency_start_time = 0
self.loop_count = 0
Expand Down Expand Up @@ -181,18 +189,30 @@ def measure_server_stats(self):
time.sleep(SERVER_STATS_DELAY)
while self.running:
decoded_json = json.loads(requests.get(url).content)
if 'cpu_utilization' in decoded_json:
self.stats_cpu_utilization.push(float(decoded_json['cpu_utilization']))
Client.stats_cpu_utilization.push(float(decoded_json['cpu_utilization']))
if 'mem_utilization' in decoded_json:
self.stats_mem_utilization.push(float(decoded_json['mem_utilization']))
Client.stats_mem_utilization.push(float(decoded_json['mem_utilization']))
if 'gpu_utilization' in decoded_json:
self.stats_gpu_utilization.push(float(decoded_json['gpu_utilization']))
Client.stats_gpu_utilization.push(float(decoded_json['gpu_utilization']))
if 'gpu_mem_utilization' in decoded_json:
self.stats_gpu_mem_utilization.push(float(decoded_json['gpu_mem_utilization']))
Client.stats_gpu_mem_utilization.push(float(decoded_json['gpu_mem_utilization']))
if 'cpu_util' in decoded_json:
self.stats_cpu_util.push(float(decoded_json['cpu_util']))
Client.stats_cpu_util.push(float(decoded_json['cpu_util']))
if 'mem_util' in decoded_json:
self.stats_mem_util.push(float(decoded_json['mem_util']))
Client.stats_mem_util.push(float(decoded_json['mem_util']))
if 'gpu_util' in decoded_json:
self.stats_gpu_util.push(float(decoded_json['gpu_util']))
Client.stats_gpu_util.push(float(decoded_json['gpu_util']))
if 'gpu_util_max' in decoded_json:
self.stats_gpu_util_max.push(float(decoded_json['gpu_util_max']))
Client.stats_gpu_util_max.push(float(decoded_json['gpu_util_max']))
if 'gpu_util_avg' in decoded_json:
self.stats_gpu_util_avg.push(float(decoded_json['gpu_util_avg']))
Client.stats_gpu_util_avg.push(float(decoded_json['gpu_util_avg']))
if 'gpu_mem_util' in decoded_json:
self.stats_gpu_mem_util.push(float(decoded_json['gpu_mem_util']))
Client.stats_gpu_mem_util.push(float(decoded_json['gpu_mem_util']))
if 'gpu_mem_util_max' in decoded_json:
self.stats_gpu_mem_util_max.push(float(decoded_json['gpu_mem_util_max']))
Client.stats_gpu_mem_util_max.push(float(decoded_json['gpu_mem_util_max']))
if 'gpu_mem_util_avg' in decoded_json:
self.stats_gpu_mem_util_avg.push(float(decoded_json['gpu_mem_util_avg']))
Client.stats_gpu_mem_util_avg.push(float(decoded_json['gpu_mem_util_avg']))
if self.show_responses:
logger.info(requests.get(url).content)
time.sleep(SERVER_STATS_INTERVAL)
Expand Down Expand Up @@ -447,7 +467,7 @@ def benchmark(arguments=None, django=False):
# can be accessed with log_stream.getvalue()
log_stream = StringIO()
sh = logging.StreamHandler(log_stream)
formatter = logging.Formatter('%(asctime)s - %(message)s')
formatter = logging.Formatter('%(asctime)s - %(process)d - %(message)s')
sh.setLevel(logging.INFO)
sh.setFormatter(formatter)
logger.addHandler(sh)
Expand Down Expand Up @@ -477,6 +497,18 @@ def benchmark(arguments=None, django=False):
parser.add_argument("--server-stats", action='store_true', help="Get server stats every Nth frame.")
args = parser.parse_args(arguments)

# Clear the Class variables. Otherwise, in the case we are instantiated by
# a Django view, the accumulation of stats would continue session to session.
Client.stats_latency_full_process.clear()
Client.stats_latency_network_only.clear()
Client.stats_server_processing_time.clear()
Client.stats_gpu_util.clear()
Client.stats_gpu_util_max.clear()
Client.stats_gpu_util_avg.clear()
Client.stats_gpu_mem_util.clear()
Client.stats_gpu_mem_util_max.clear()
Client.stats_gpu_mem_util_avg.clear()

start_time = time.time()

if args.threads > 1:
Expand Down Expand Up @@ -535,14 +567,6 @@ def benchmark(arguments=None, django=False):
client.tls = args.tls
client.tls_verify = not args.noverify

Client.stats_latency_full_process.clear()
Client.stats_latency_network_only.clear()
Client.stats_server_processing_time.clear()
Client.stats_cpu_utilization.clear()
Client.stats_mem_utilization.clear()
Client.stats_gpu_utilization.clear()
Client.stats_gpu_mem_utilization.clear()

thread = Thread(target=client.start)
thread.start()
logger.debug("Started %s" %thread)
Expand Down Expand Up @@ -580,22 +604,31 @@ def benchmark(arguments=None, django=False):
if Client.stats_server_processing_time.n > 0:
logger.info("====> Average Server Processing Time=%.3f ms (stddev=%.3f)" %(Client.stats_server_processing_time.mean(), Client.stats_server_processing_time.stddev()))

if Client.stats_cpu_utilization.n > 0:
logger.info("====> Average CPU Utilization=%.1f%%" %(Client.stats_cpu_utilization.mean()))
if Client.stats_mem_utilization.n > 0:
logger.info("====> Average Memory Utilization=%.1f%%" %(Client.stats_mem_utilization.mean()))
if Client.stats_gpu_utilization.n > 0:
logger.info("====> Average GPU Utilization=%.1f%%" %(Client.stats_gpu_utilization.mean()))
if Client.stats_gpu_mem_utilization.n > 0:
logger.info("====> Average GPU Memory Utilization=%.1f%%" %(Client.stats_gpu_mem_utilization.mean()))
if Client.stats_cpu_util.n > 0:
logger.info("====> Average CPU Utilization=%.1f%%" %(Client.stats_cpu_util.mean()))
if Client.stats_mem_util.n > 0:
logger.info("====> Average Memory Utilization=%.1f%%" %(Client.stats_mem_util.mean()))
if Client.stats_gpu_util.n > 0:
logger.info("====> Average GPU Utilization=%.1f%%" %(Client.stats_gpu_util.mean()))
if Client.stats_gpu_util_max.n > 0:
logger.info("====> Average GPU Utilization Max=%.1f%%" %(Client.stats_gpu_util_max.mean()))
if Client.stats_gpu_util_avg.n > 0:
logger.info("====> Average GPU Utilization Avg=%.1f%%" %(Client.stats_gpu_util_avg.current))
if Client.stats_gpu_mem_util.n > 0:
logger.info("====> Average GPU Memory Utilization=%.1f%%" %(Client.stats_gpu_mem_util.mean()))
if Client.stats_gpu_mem_util_max.n > 0:
logger.info("====> Average GPU Memory Utilization Max=%.1f%%" %(Client.stats_gpu_mem_util_max.mean()))
if Client.stats_gpu_mem_util_avg.n > 0:
logger.info("====> Average GPU Memory Utilization Avg=%.1f%%" %(Client.stats_gpu_mem_util_avg.current))


# The following line outputs CSV data that can be imported to a spreadsheet.
logger.info("")
logger.info("Server, Full Process, Network Only, Server Time, CPU Util, Mem Util, GPU Util, GPU Mem Util")
logger.info("%s, %.3f, %.3f, %.3f, %.3f, %.3f, %.3f, %.3f" %(args.server, Client.stats_latency_full_process.mean(),
Client.stats_latency_network_only.mean(), Client.stats_server_processing_time.mean(), Client.stats_cpu_utilization.mean(),
Client.stats_mem_utilization.mean(), Client.stats_gpu_utilization.mean(), Client.stats_gpu_mem_utilization.mean()))
logger.info("Server, Full Process, Network Only, Server Time, CPU Util, Mem Util, GPU Util, GPU Util Max, GPU Util Avg, GPU Mem Util, GPU Mem Util Max, GPU Mem Util Avg")
logger.info("%s, %.3f, %.3f, %.3f, %.3f, %.3f, %.3f, %.3f, %.3f, %.3f, %.3f, %.3f" %(args.server, Client.stats_latency_full_process.mean(),
Client.stats_latency_network_only.mean(), Client.stats_server_processing_time.mean(), Client.stats_cpu_util.mean(),
Client.stats_mem_util.mean(), Client.stats_gpu_util.mean(), Client.stats_gpu_util_max.mean(), Client.stats_gpu_util_avg.current,
Client.stats_gpu_mem_util.mean(), Client.stats_gpu_mem_util_max.mean(), Client.stats_gpu_mem_util_avg.current))

logger.info("TEST_PASS=%r" %TEST_PASS)
else:
Expand Down
112 changes: 93 additions & 19 deletions ComputerVisionServer/moedx/client/remote_bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,47 +8,121 @@
import argparse
import requests
from threading import Thread
util_dir = "../utilities"
sys.path.append(os.path.join(os.path.dirname(__file__), util_dir))
from stats import RunningStats

LAUNCH_INTERVAL = 1 # Seconds
# filename = "objects_320x180_x2.mp4"
stats_latency_full_process = RunningStats()
stats_latency_network_only = RunningStats()
stats_server_processing_time = RunningStats()
stats_cpu_util = RunningStats()
stats_mem_util = RunningStats()
stats_gpu_util = RunningStats()
stats_gpu_util_max = RunningStats()
stats_gpu_util_avg = RunningStats()
stats_gpu_mem_util = RunningStats()
stats_gpu_mem_util_max = RunningStats()
stats_gpu_mem_util_avg = RunningStats()

LAUNCH_INTERVAL = 2 # Seconds
filename = "objects_320x180.mp4"
# filename = "objects.mp4"

def launch_remote_benchmark(url):
# Comment or uncomment lines to change test options.
body = '-s cv-gpu-cluster.fairview-main.gddt.mobiledgex.net --tls -e /object/detect/ -c websocket -f %s -n PING --server-stats' %filename
# body = '-s cv-gpu-cluster.fairview-main.gddt.mobiledgex.net --tls -e /object/detect/ -c websocket -f %s -n PING --server-stats --skip-frames 5' %filename
body = '-s cv-gpu-cluster.fairview-main.gddt.mobiledgex.net --tls -e /object/detect/ -c websocket -f %s -n PING --server-stats ' %filename
# body = '-s 80.187.140.9 -e /object/detect/ -c websocket -f %s -n PING --server-stats' %filename
# body = '-s cv-gpu-cluster.fairview-main.gddt.mobiledgex.net --tls -e /object/detect/ -c websocket -f %s -n PING --fullsize' %filename
# body = '-s cv-gpu-cluster.fairview-main.gddt.mobiledgex.net --tls -e /object/detect/ -c websocket -f %s -n PING' %filename
# body = '-s cv-gpu-cluster.fairview-main.gddt.mobiledgex.net --tls -e /object/detect/ -c rest -f %s -n PING --skip-frames 4' %filename
# body = '-s 80.187.140.9 -e /object/detect/ -c websocket -f %s -n PING --server-stats' %filename
response = requests.post("%s/client/benchmark/" %url, data=body)
print("Response for %s:" %url)
print(response.content.decode("utf-8"))
data = response.content.decode("utf-8")
print(data)

# Get the line of the output that contains the CSV stats, and add them to our
# RunningStats instances so we can average them when all responses have arrived.
lines = data.split(os.linesep)
stats_line = lines[-3]
stats = stats_line.split(',')
# 2020-11-12 21:59:10,741 - Server, Full Process, Network Only, Server Time, CPU Util, Mem Util, GPU Util, GPU Util Max, GPU Util Avg, GPU Mem Util, GPU Mem Util Max, GPU Mem Util Avg
# 2020-11-12 21:59:10,741 - cv-gpu-cluster.fairview-main.gddt.mobiledgex.net, 433.526, 9.040, 33.395, 26.626, 16.320, 26.833, 61.635, 33.974, 15.759, 34.826, 19.487

stats_latency_full_process.push(float(stats[2]))
stats_latency_network_only.push(float(stats[3]))
stats_server_processing_time.push(float(stats[4]))
stats_cpu_util.push(float(stats[5]))
stats_mem_util.push(float(stats[6]))
stats_gpu_util.push(float(stats[7]))
stats_gpu_util_max.push(float(stats[8]))
stats_gpu_util_avg.push(float(stats[9]))
stats_gpu_mem_util.push(float(stats[10]))
stats_gpu_mem_util_max.push(float(stats[11]))
stats_gpu_mem_util_avg.push(float(stats[12]))
print('{}/{} clients reporting:'.format(stats_latency_full_process.n, args.num_clients))

# Full Details
header = "Num Clients, Full Process, FPS/Client, Total FPS, Network Only, Server Time, % CPU, %MEM, %GPU, %GPU Max, %GPU Avg, %GPU Mem Util, %GPU Mem Max, %GPU Mem Avg"
csv = '{}, {:.2f}, {:.2f}, {:.2f}, {:.2f}, {:.2f}, {:.2f}, {:.2f}, {:.2f}, {:.2f}, {:.2f}, {:.2f}, {:.2f}, {:.2f}'.format(
stats_latency_full_process.n, stats_latency_full_process.mean(),
1/stats_latency_full_process.mean()*1000, 1/stats_latency_full_process.mean()*1000*stats_latency_full_process.n,
stats_latency_network_only.mean(), stats_server_processing_time.mean(), stats_cpu_util.mean(), stats_mem_util.mean(),
stats_gpu_util.mean(), stats_gpu_util_max.mean(), stats_gpu_util_avg.mean(),
stats_gpu_mem_util.mean(), stats_gpu_mem_util_max.mean(), stats_gpu_mem_util_avg.mean())
print(header)
print(csv)

# Only what we want to graph
header = "Num Clients, FPS/Client, Total FPS, % CPU, %Mem, %GPU, %GPU Mem"
csv = '{}, {:.2f}, {:.2f}, {:.2f}, {:.2f}, {:.2f}, {:.2f}'.format(
stats_latency_full_process.n,
1/stats_latency_full_process.mean()*1000, 1/stats_latency_full_process.mean()*1000*stats_latency_full_process.n,
stats_cpu_util.mean(), stats_mem_util.mean(),
stats_gpu_util.mean(),
stats_gpu_mem_util.mean())
print(header)
print(csv)

if stats_latency_full_process.n == args.num_clients:
print("__CSV__: %s" %csv)


def remote_download(url):
# params = {'url': 'http://acrotopia.com/mobiledgex/%s' %filename}
params = {'url': 'http://opencv.facetraining.mobiledgex.net/videos/landscape/%s' %filename}
params = {'url': 'http://acrotopia.com/mobiledgex/%s' %filename}
# params = {'url': 'http://opencv.facetraining.mobiledgex.net/videos/landscape/%s' %filename}
response = requests.post("%s/client/download/" %url, data=params)
print("Response for %s:" %url)
print(response.content.decode("utf-8"))

urls = [
"http://80.187.140.9:8008",
"https://cv-gpu-cluster.hawkins-main.gddt.mobiledgex.net:8008",
# "https://cv-gpu-cluster.hawkins-main.gddt.mobiledgex.net:8008",
"https://cv-gpu-cluster.beacon-main.gddt.mobiledgex.net:8008",
"https://cv-gpu-cluster.sunnydale-main.gddt.mobiledgex.net:8008",
"https://cv-gpu-cluster.paradise-main.gddt.mobiledgex.net:8008",
"https://cv-cluster.beacon-main.gddt.mobiledgex.net:8008",
]

# for url in urls:
# print("Starting %s" %url)
# remote_download(url)
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("-n", "--num-clients", type=int, default=1, help="Number of clients to launch")
parser.add_argument("-d", "--download", action='store_true', help="Download media file to client machine instead of launching")
args = parser.parse_args()

if args.download:
for url in urls[:args.num_clients]:
print("Starting download for %s" %url)
remote_download(url)
sys.exit()

for url in urls[:args.num_clients]:
print("Starting %s" %url)
thread = Thread(target=launch_remote_benchmark, args=(url,))
thread.start()
time.sleep(LAUNCH_INTERVAL)

for url in urls:
print("Starting %s" %url)
thread = Thread(target=launch_remote_benchmark, args=(url,))
thread.start()
time.sleep(LAUNCH_INTERVAL)
print("All started")
print()

print("All started")
print()
thread.join()
5 changes: 5 additions & 0 deletions ComputerVisionServer/moedx/client/remote_bench_all.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
time python remote_bench.py -n 1
time python remote_bench.py -n 2
time python remote_bench.py -n 3
time python remote_bench.py -n 4
time python remote_bench.py -n 5
9 changes: 8 additions & 1 deletion ComputerVisionServer/moedx/client/test_dme_rest.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
# dme = "262-01.dme.mobiledgex.net"
# dme = "us-mexdemo.dme.mobiledgex.net"
dme = "eu-mexdemo.dme.mobiledgex.net"
# dme = "emeraldeyeconstruct.mywire.org"
# dme = "eu-tef.dme.mobiledgex.net"
# dme = "eu-qa.dme.mobiledgex.net"
# dme = "eu-stage.dme.mobiledgex.net"
Expand Down Expand Up @@ -54,6 +55,12 @@
# app_vers = "02.00.rc2-2923"
# carrier_name = "GDDT"

# dme = "eu-qa.dme.mobiledgex.net"
# app_name = "automation-sdk-porttest"
# org_name = "MobiledgeX"
# app_vers = "1.0"
# carrier_name = "GDDT"

print("dme: %s, app_name: %s, org_name: %s, app_vers: %s, carrier_name: %s" %(dme, app_name, org_name, app_vers, carrier_name))
url = "https://%s:38001/v1/registerclient" %dme
data = {
Expand All @@ -66,7 +73,7 @@
# print(resp.content)
decoded_json = json.loads(resp.content)
if "status" not in decoded_json:
print("'status' not returned:\n"+resp.content)
print("'status' not returned:\n%s" %resp.content)
sys.exit()
status = decoded_json["status"]
session_cookie = decoded_json["session_cookie"]
Expand Down
Loading

0 comments on commit cf0dff9

Please sign in to comment.