Skip to content

Commit

Permalink
add long-lived stress test
Browse files Browse the repository at this point in the history
  • Loading branch information
simonlingoogle committed Aug 21, 2022
1 parent 648958b commit a5075e5
Show file tree
Hide file tree
Showing 8 changed files with 150 additions and 5 deletions.
10 changes: 9 additions & 1 deletion .github/workflows/stress.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,15 @@ jobs:
matrix:
python-version: [3.7]
go-version: [1.18]
suite: ["network-forming", "commissioning", "connectivity", "network-latency", "multicast-performance", "otns-performance", "network-limits"]
suite:
- "network-forming"
- "commissioning"
- "connectivity"
- "network-latency"
- "multicast-performance"
- "otns-performance"
- "network-limits"
- "long-duration"
runs-on: ubuntu-20.04
env:
HOMEBREW_NO_AUTO_UPDATE: 1
Expand Down
7 changes: 4 additions & 3 deletions dispatcher/Node.go
Original file line number Diff line number Diff line change
Expand Up @@ -187,15 +187,16 @@ func (node *Node) onPingReply(timestamp uint64, dstaddr string, datasize int, ho
// if datasize < 4, timestamp is 0, these ping replies are ignored
return
}
const maxPingDelayUs uint64 = 10 * 1000000

pingTimeout := node.D.cfg.PingTimeout
var leftPingRequests []*pingRequest
for _, req := range node.pendingPings {
if req.Timestamp == timestamp && req.Dst == dstaddr {
// ping replied
node.addPingResult(req.Dst, req.DataSize, node.D.CurTime-req.Timestamp)
} else if req.Timestamp+maxPingDelayUs < node.D.CurTime {
} else if req.Timestamp+pingTimeout < node.D.CurTime {
// ping timeout
node.addPingResult(req.Dst, req.DataSize, maxPingDelayUs)
node.addPingResult(req.Dst, req.DataSize, pingTimeout)
} else {
leftPingRequests = append(leftPingRequests, req)
}
Expand Down
3 changes: 3 additions & 0 deletions dispatcher/dispatcher.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ type Config struct {
Port int
DumpPackets bool
NoPcap bool
PingTimeout uint64
}

func DefaultConfig() *Config {
Expand All @@ -81,6 +82,7 @@ func DefaultConfig() *Config {
Host: "localhost",
Port: threadconst.InitialDispatcherPort,
DumpPackets: false,
PingTimeout: 10 * 1000000,
}
}

Expand Down Expand Up @@ -152,6 +154,7 @@ func NewDispatcher(ctx *progctx.ProgCtx, cfg *Config, cbHandler CallbackHandler)
_ = ln.SetWriteBuffer(25 * 1024 * 1024)
_ = ln.SetReadBuffer(25 * 1024 * 1024)
simplelogger.Infof("dispatcher listening on %s ...", udpAddr)
simplelogger.Warnf("Dispatcher ping timeout: %v", cfg.PingTimeout)

simplelogger.AssertNil(err)

Expand Down
3 changes: 3 additions & 0 deletions otns_main/otns_main.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ type MainArgs struct {
DumpPackets bool
NoPcap bool
NoReplay bool
PingTimeout uint64
}

var (
Expand All @@ -101,6 +102,7 @@ func parseArgs() {
flag.BoolVar(&args.DumpPackets, "dump-packets", false, "dump packets")
flag.BoolVar(&args.NoPcap, "no-pcap", false, "do not generate Pcap")
flag.BoolVar(&args.NoReplay, "no-replay", false, "do not generate Replay")
flag.Uint64Var(&args.PingTimeout, "ping-timeout", 10*1000000, "set ping timeout")

flag.Parse()
}
Expand Down Expand Up @@ -254,6 +256,7 @@ func createSimulation(ctx *progctx.ProgCtx) *simulation.Simulation {

dispatcherCfg := dispatcher.DefaultConfig()
dispatcherCfg.NoPcap = args.NoPcap
dispatcherCfg.PingTimeout = args.PingTimeout

sim, err := simulation.NewSimulation(ctx, simcfg, dispatcherCfg)
simplelogger.FatalIfError(err)
Expand Down
7 changes: 7 additions & 0 deletions pylibs/otns/cli/OTNS.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,13 @@ def get_poll_period(self, nodeid: int) -> float:
ms = self._expect_int(self.node_cmd(nodeid, 'pollperiod'))
return ms / 1000.0

def set_child_timeout(self, nodeid: int, timeout:int) -> None:
self.node_cmd(nodeid, f'childtimeout {timeout}')

def get_child_timeout(self, nodeid: int) -> int:
timeout = self._expect_int(self.node_cmd(nodeid, 'childtimeout'))
return timeout

@staticmethod
def _detect_otns_path() -> str:
env_otns_path = os.getenv('OTNS')
Expand Down
4 changes: 3 additions & 1 deletion pylibs/stress_tests/BaseStressTest.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,11 +66,13 @@ def run_wrapper(self: 'BaseStressTest', report=True):


class BaseStressTest(object, metaclass=StressTestMetaclass):
def __init__(self, name, headers, raw=False):
def __init__(self, name, headers, raw=False, ping_timeout=None):
self.name = name
self._otns_args = []
if raw:
self._otns_args.append('-raw')
if ping_timeout is not None:
self._otns_args += ['-ping-timeout', str(ping_timeout * 1000)]
self.ns = OTNS(otns_args=self._otns_args)
self.ns.speed = float('inf')
self.ns.web()
Expand Down
120 changes: 120 additions & 0 deletions pylibs/stress_tests/long_lived.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
#!/usr/bin/env python3
#
# Copyright (c) 2022, The OTNS Authors.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# 3. Neither the name of the copyright holder nor the
# names of its contributors may be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
#
# OTNS Long Duration Stress test:
# Simulate 4x8 nodes at max speed without injected traffic or failure for 1h, measure the execution (real) time.
# Topology:
# Router 4x8
# Fault Injections:
# None
# Pass Criteria:
# Execution time <= 30s
#
import random
import time
import os
from collections import Counter

from BaseStressTest import BaseStressTest

RADIO_RANGE = 200
XMAX = 300
YMAX = 300

PACKET_LOSS_RATIO = 0.0
TOTAL_SIMULATION_TIME = 10 * 86400 * int(os.getenv("STRESS_LEVEL", "1"))
MOVE_INTERVAL = 3600
PING_INTERVAL = 300
PING_DATA_SIZE = 64

PING_TIMEOUT = 300*1000 # Unit: ms

assert TOTAL_SIMULATION_TIME // PING_INTERVAL <= 65535, "too many ping count"

class LongDurationStressTest(BaseStressTest):
SUITE = 'long-duration'

def __init__(self):
super(LongDurationStressTest, self).__init__("Long-Duration stress test",
['Simulation Time', 'Execution Time', 'Speed Up'], ping_timeout=PING_TIMEOUT)
self._cur_time = 0
self._last_ping_succ_time = {}

def rand_pos(self):
return random.randint(0, XMAX), random.randint(0, YMAX)

def run(self):
ns = self.ns
ns.packet_loss_ratio = PACKET_LOSS_RATIO

router1 = ns.add("router", *self.rand_pos(), radio_range=RADIO_RANGE)
router1_addr = self.expect_node_mleid(router1, 10)

router2 = ns.add("router", *self.rand_pos(), radio_range=RADIO_RANGE)
med = ns.add("med", *self.rand_pos(), radio_range=RADIO_RANGE)
ns.set_child_timeout(med, PING_INTERVAL * 3)

sed = ns.add("sed", *self.rand_pos(), radio_range=RADIO_RANGE)
ns.set_poll_period(sed, 60)
ns.set_child_timeout(sed, PING_INTERVAL * 3)

for nodeid in (med, sed):
self._last_ping_succ_time[nodeid] = 0
ns.ping(nodeid, router1_addr, datasize=PING_DATA_SIZE, count=TOTAL_SIMULATION_TIME // PING_INTERVAL,
interval=PING_INTERVAL)

t0 = time.time()

for _ in range(TOTAL_SIMULATION_TIME // MOVE_INTERVAL):
self.ns.go(MOVE_INTERVAL)
self._cur_time += MOVE_INTERVAL

self._collect_pings()

for nodeid in (router1, router2, med, sed):
self.ns.move(nodeid, *self.rand_pos())

duration = time.time() - t0

self.result.append_row('%ds' % TOTAL_SIMULATION_TIME, '%ds' % duration,
'%d' % (TOTAL_SIMULATION_TIME / duration))
self.result.fail_if(TOTAL_SIMULATION_TIME / duration < 3000, "Speed Up < 3000")
self.result.fail_if(self._last_ping_succ_time[med] < self._cur_time - 86400, "MED not connected for a long time")
self.result.fail_if(self._last_ping_succ_time[sed] < self._cur_time - 86400, "SED not connected for a long time")

def _collect_pings(self):
for srcid, dstaddr, _, delay in self.ns.pings():
if delay >= PING_TIMEOUT:
# ignore failed pings
continue

self._last_ping_succ_time[srcid] = self._cur_time


if __name__ == '__main__':
LongDurationStressTest().run()
1 change: 1 addition & 0 deletions script/test
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ build_openthread()
"-DOT_SERVICE=ON"
"-DOT_COAP=ON"
"-DOT_THREAD_VERSION=${THREAD_VERSION:-1.2}"
"-DOT_UPTIME=ON"
)

local COVERAGE=${COVERAGE:-0}
Expand Down

0 comments on commit a5075e5

Please sign in to comment.