Skip to content

Commit

Permalink
add long-lived stress test
Browse files Browse the repository at this point in the history
  • Loading branch information
simonlingoogle committed Sep 3, 2022
1 parent 0ab7444 commit ac05588
Show file tree
Hide file tree
Showing 8 changed files with 150 additions and 6 deletions.
10 changes: 9 additions & 1 deletion .github/workflows/stress.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,15 @@ jobs:
matrix:
python-version: [3.7]
go-version: [1.18]
suite: ["network-forming", "commissioning", "connectivity", "network-latency", "multicast-performance", "otns-performance", "network-limits"]
suite:
- "network-forming"
- "commissioning"
- "connectivity"
- "network-latency"
- "multicast-performance"
- "otns-performance"
- "network-limits"
- "long-duration"
runs-on: ubuntu-20.04
env:
HOMEBREW_NO_AUTO_UPDATE: 1
Expand Down
7 changes: 4 additions & 3 deletions dispatcher/Node.go
Original file line number Diff line number Diff line change
Expand Up @@ -187,15 +187,16 @@ func (node *Node) onPingReply(timestamp uint64, dstaddr string, datasize int, ho
// if datasize < 4, timestamp is 0, these ping replies are ignored
return
}
const maxPingDelayUs uint64 = 10 * 1000000

pingTimeout := node.D.cfg.PingTimeout
var leftPingRequests []*pingRequest
for _, req := range node.pendingPings {
if req.Timestamp == timestamp && req.Dst == dstaddr {
// ping replied
node.addPingResult(req.Dst, req.DataSize, node.D.CurTime-req.Timestamp)
} else if req.Timestamp+maxPingDelayUs < node.D.CurTime {
} else if req.Timestamp+pingTimeout < node.D.CurTime {
// ping timeout
node.addPingResult(req.Dst, req.DataSize, maxPingDelayUs)
node.addPingResult(req.Dst, req.DataSize, pingTimeout)
} else {
leftPingRequests = append(leftPingRequests, req)
}
Expand Down
2 changes: 2 additions & 0 deletions dispatcher/dispatcher.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ type Config struct {
Port int
DumpPackets bool
NoPcap bool
PingTimeout uint64
}

func DefaultConfig() *Config {
Expand All @@ -81,6 +82,7 @@ func DefaultConfig() *Config {
Host: "localhost",
Port: threadconst.InitialDispatcherPort,
DumpPackets: false,
PingTimeout: 10 * 1000000,
}
}

Expand Down
3 changes: 3 additions & 0 deletions otns_main/otns_main.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ type MainArgs struct {
DumpPackets bool
NoPcap bool
NoReplay bool
PingTimeout float64
}

var (
Expand All @@ -101,6 +102,7 @@ func parseArgs() {
flag.BoolVar(&args.DumpPackets, "dump-packets", false, "dump packets")
flag.BoolVar(&args.NoPcap, "no-pcap", false, "do not generate Pcap")
flag.BoolVar(&args.NoReplay, "no-replay", false, "do not generate Replay")
flag.Float64Var(&args.PingTimeout, "ping-timeout", 10, "set ping timeout")

flag.Parse()
}
Expand Down Expand Up @@ -254,6 +256,7 @@ func createSimulation(ctx *progctx.ProgCtx) *simulation.Simulation {

dispatcherCfg := dispatcher.DefaultConfig()
dispatcherCfg.NoPcap = args.NoPcap
dispatcherCfg.PingTimeout = uint64(args.PingTimeout * 1000000) // Dispatcher uses microseconds

sim, err := simulation.NewSimulation(ctx, simcfg, dispatcherCfg)
simplelogger.FatalIfError(err)
Expand Down
7 changes: 7 additions & 0 deletions pylibs/otns/cli/OTNS.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,13 @@ def get_poll_period(self, nodeid: int) -> float:
ms = self._expect_int(self.node_cmd(nodeid, 'pollperiod'))
return ms / 1000.0

def set_child_timeout(self, nodeid: int, timeout:int) -> None:
self.node_cmd(nodeid, f'childtimeout {timeout}')

def get_child_timeout(self, nodeid: int) -> int:
timeout = self._expect_int(self.node_cmd(nodeid, 'childtimeout'))
return timeout

@staticmethod
def _detect_otns_path() -> str:
env_otns_path = os.getenv('OTNS')
Expand Down
7 changes: 5 additions & 2 deletions pylibs/stress_tests/BaseStressTest.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,10 @@
import time
import traceback
from functools import wraps
from typing import Collection

from otns.cli import OTNS
from otns.cli.errors import UnexpectedError
from typing import Collection

from StressTestResult import StressTestResult
from errors import UnexpectedNodeAddr
Expand Down Expand Up @@ -66,11 +67,13 @@ def run_wrapper(self: 'BaseStressTest', report=True):


class BaseStressTest(object, metaclass=StressTestMetaclass):
def __init__(self, name, headers, raw=False):
def __init__(self, name, headers, raw=False, ping_timeout: float = None):
self.name = name
self._otns_args = []
if raw:
self._otns_args.append('-raw')
if ping_timeout is not None:
self._otns_args += ['-ping-timeout', str(ping_timeout)]
self.ns = OTNS(otns_args=self._otns_args)
self.ns.speed = float('inf')
self.ns.web()
Expand Down
119 changes: 119 additions & 0 deletions pylibs/stress_tests/long_lived.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
#!/usr/bin/env python3
#
# Copyright (c) 2022, The OTNS Authors.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# 3. Neither the name of the copyright holder nor the
# names of its contributors may be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
#
# OTNS Long Duration Stress test:
# Simulate 4x8 nodes at max speed without injected traffic or failure for 1h, measure the execution (real) time.
# Topology:
# Router 4x8
# Fault Injections:
# None
# Pass Criteria:
# Execution time <= 30s
#
import random
import time
import os

from BaseStressTest import BaseStressTest

RADIO_RANGE = 200
XMAX = 300
YMAX = 300

PACKET_LOSS_RATIO = 0.0
TOTAL_SIMULATION_TIME = 10 * 86400 * int(os.getenv("STRESS_LEVEL", "1"))
MOVE_INTERVAL = 3600
PING_INTERVAL = 300
PING_DATA_SIZE = 64

PING_TIMEOUT = PING_INTERVAL

assert TOTAL_SIMULATION_TIME // PING_INTERVAL <= 65535, "too many ping count"

class LongDurationStressTest(BaseStressTest):
SUITE = 'long-duration'

def __init__(self):
super(LongDurationStressTest, self).__init__("Long-Duration stress test",
['Simulation Time', 'Execution Time', 'Speed Up'], ping_timeout=PING_TIMEOUT)
self._cur_time = 0
self._last_ping_succ_time = {}

def rand_pos(self):
return random.randint(0, XMAX), random.randint(0, YMAX)

def run(self):
ns = self.ns
ns.packet_loss_ratio = PACKET_LOSS_RATIO

router1 = ns.add("router", *self.rand_pos(), radio_range=RADIO_RANGE)
router1_addr = self.expect_node_mleid(router1, 10)

router2 = ns.add("router", *self.rand_pos(), radio_range=RADIO_RANGE)
med = ns.add("med", *self.rand_pos(), radio_range=RADIO_RANGE)
ns.set_child_timeout(med, PING_INTERVAL * 3)

sed = ns.add("sed", *self.rand_pos(), radio_range=RADIO_RANGE)
ns.set_poll_period(sed, 60)
ns.set_child_timeout(sed, PING_INTERVAL * 3)

for nodeid in (med, sed):
self._last_ping_succ_time[nodeid] = 0
ns.ping(nodeid, router1_addr, datasize=PING_DATA_SIZE, count=TOTAL_SIMULATION_TIME // PING_INTERVAL,
interval=PING_INTERVAL)

t0 = time.time()

for _ in range(TOTAL_SIMULATION_TIME // MOVE_INTERVAL):
self.ns.go(MOVE_INTERVAL)
self._cur_time += MOVE_INTERVAL

self._collect_pings()

for nodeid in (router1, router2, med, sed):
self.ns.move(nodeid, *self.rand_pos())

duration = time.time() - t0

self.result.append_row('%ds' % TOTAL_SIMULATION_TIME, '%ds' % duration,
'%d' % (TOTAL_SIMULATION_TIME / duration))
self.result.fail_if(TOTAL_SIMULATION_TIME / duration < 3000, "Speed Up < 3000")
self.result.fail_if(self._last_ping_succ_time[med] < self._cur_time - 86400, "MED not connected for a long time")
self.result.fail_if(self._last_ping_succ_time[sed] < self._cur_time - 86400, "SED not connected for a long time")

def _collect_pings(self):
for srcid, dstaddr, _, delay in self.ns.pings():
if delay >= PING_TIMEOUT:
# ignore failed pings
continue

self._last_ping_succ_time[srcid] = self._cur_time


if __name__ == '__main__':
LongDurationStressTest().run()
1 change: 1 addition & 0 deletions script/test
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ build_openthread()
"-DOT_SERVICE=ON"
"-DOT_COAP=ON"
"-DOT_THREAD_VERSION=${THREAD_VERSION:-1.2}"
"-DOT_UPTIME=ON"
)

local COVERAGE=${COVERAGE:-0}
Expand Down

0 comments on commit ac05588

Please sign in to comment.