From b06b1ba0bb77fff5647c1b4ea2b8f6dcea3ec187 Mon Sep 17 00:00:00 2001 From: Kristian Larsson Date: Tue, 12 Nov 2024 00:21:27 +0100 Subject: [PATCH] Add support for golden testing We now support golden testing, i.e. comparing the output of a test with an expected value, i.e. the golden value. Golden values are saved in test/golden/MODULE_NAME/TEST_NAME Errors, i.e. mismatches, are reported as NotEqualErrors, e.g.: kll@Boxy:~/dt/acton/test/stdlib_tests$ acton test --module test_testing3 Tests - module test_testing3: foo: FAIL: 215 runs in 50.217ms testing.NotEqualError: Test output does not match expected golden value. Actual : foobarBAR Expected: None 1 out of 1 tests failed (0.410s) And the expected output can be updated with --golden-update We only support strings as golden values, although this could be expanded in the future. Only unit tests and synchronous actor tests support golden tests. This could also be expanded to support the asynchronous and env tests. --- base/src/testing.act | 120 ++++++++++++++++++++++++++++++++-------- cli/src/acton.act | 60 ++++++++++++++------ compiler/Acton/Types.hs | 2 +- 3 files changed, 142 insertions(+), 40 deletions(-) diff --git a/base/src/testing.act b/base/src/testing.act index a0520ecc5..5ed9f9f09 100644 --- a/base/src/testing.act +++ b/base/src/testing.act @@ -1,6 +1,7 @@ import acton.rts import argparse +import file import json import logging import term @@ -229,18 +230,29 @@ def error(msg: ?str): # ------------------------------------------------------------------------------- +def eq_opt[T(Eq)](a: ?T, b: ?T) -> bool: + return a is not None and b is not None and a == b or a is None and b is None + class TestLogger(logging.Logger): pass class Test(object): + module: ?str name: str desc: str def __init__(self, name: str, desc: str): self.name = name self.desc = desc + self.module = None + + def get_module(self) -> str: + mod = self.module + if mod is not None: + return mod + raise ValueError("Test: Module not set") - def run(self, report_result: action(?bool, ?Exception) -> None, env: Env, log_handler: logging.Handler): + def run(self, report_result: action(?bool, ?Exception, ?str) -> None, env: Env, log_handler: logging.Handler): if isinstance(self, UnitTest): self.run_test(report_result, env, log_handler) elif isinstance(self, SyncActorTest): @@ -271,12 +283,14 @@ class UnitTest(Test): self.fn = fn self.name = name self.desc = desc + self.module = None - def run_test(self, report_result: action(?bool, ?Exception) -> None, env: Env, log_handler: logging.Handler): + def run_test(self, report_result: action(?bool, ?Exception, ?str) -> None, env: Env, log_handler: logging.Handler): + output = None success = None exception = None try: - self.fn() + output = self.fn() success = True exception = None except AssertionError as e: @@ -285,19 +299,21 @@ class UnitTest(Test): except Exception as e: success = None exception = e - report_result(success, exception) + report_result(success, exception, output) class SyncActorTest(Test): def __init__(self, fn: proc(logging.Handler) -> None, name: str, desc: str): self.fn = fn self.name = name self.desc = desc + self.module = None - def run_test(self, report_result: action(?bool, ?Exception) -> None, env: Env, log_handler: logging.Handler): + def run_test(self, report_result: action(?bool, ?Exception, ?str) -> None, env: Env, log_handler: logging.Handler): + output = None success = None exception = None try: - self.fn(log_handler) + output = self.fn(log_handler) success = True exception = None except AssertionError as e: @@ -306,32 +322,38 @@ class SyncActorTest(Test): except Exception as e: success = None exception = e - report_result(success, exception) + report_result(success, exception, output) class AsyncActorTest(Test): def __init__(self, fn: proc(action(?bool, ?Exception) -> None, logging.Handler) -> None, name: str, desc: str): self.fn = fn self.name = name self.desc = desc + self.module = None - def run_test(self, report_result: action(?bool, ?Exception) -> None, env: Env, log_handler: logging.Handler): - self.fn(report_result, log_handler) + def run_test(self, report_result: action(?bool, ?Exception, ?str) -> None, env: Env, log_handler: logging.Handler): + def repres(success: ?bool, exception: ?Exception): + report_result(success, exception, None) + self.fn(repres, log_handler) class EnvTest(Test): def __init__(self, fn: proc(action(?bool, ?Exception) -> None, Env, logging.Handler) -> None, name: str, desc: str): self.fn = fn self.name = name self.desc = desc + self.module = None - def run_test(self, report_result: action(?bool, ?Exception) -> None, env: Env, log_handler: logging.Handler): - self.fn(report_result, env, log_handler) + def run_test(self, report_result: action(?bool, ?Exception, ?str) -> None, env: Env, log_handler: logging.Handler): + def repres(success: ?bool, exception: ?Exception): + report_result(success, exception, None) + self.fn(repres, env, log_handler) class TestResult(object): """ There are three possible outcomes for a test: - success: the test ran to completion with the expected results - - for unit tests & synchronous actor tests, it means it returned `None` + - for unit tests & synchronous actor tests, it means the function returned - for asynchronous actor & env tests, the report_result callback was called with TestResult(success=True, exception=None) - failure: the test encountered an unexpected value - for unit tests & synchronous actor tests, an AssertionError (or child thereof) was raiesd @@ -342,13 +364,15 @@ class TestResult(object): """ success: ?bool exception: ?str + output: ?str duration: float mem_usage_delta: int non_gc_mem_usage_delta: int - def __init__(self, success: ?bool, exception: ?str, duration: float, mem_usage_delta: int, non_gc_mem_usage_delta: int): + def __init__(self, success: ?bool, exception: ?str, output: ?str, duration: float, mem_usage_delta: int, non_gc_mem_usage_delta: int): self.success = success self.exception = exception + self.output = output self.duration = duration self.mem_usage_delta = mem_usage_delta self.non_gc_mem_usage_delta = non_gc_mem_usage_delta @@ -357,6 +381,7 @@ class TestResult(object): return { "success": self.success, "exception": self.exception, + "output": self.output, "duration": self.duration, "mem_usage_delta": self.mem_usage_delta, "non_gc_mem_usage_delta": self.non_gc_mem_usage_delta, @@ -366,16 +391,18 @@ class TestResult(object): def from_json(data: dict[str, str]) -> TestResult: success = data["success"] exception = data["exception"] + output = data["output"] duration = data["duration"] mem_usage_delta = data["mem_usage_delta"] non_gc_mem_usage_delta = data["non_gc_mem_usage_delta"] if (isinstance(success, bool) and isinstance(exception, str) + and isinstance(output, str) and isinstance(duration, float) and isinstance(mem_usage_delta, int) and isinstance(non_gc_mem_usage_delta, int) ): - return TestResult(success, exception, duration, mem_usage_delta, non_gc_mem_usage_delta) + return TestResult(success, exception, output, duration, mem_usage_delta, non_gc_mem_usage_delta) raise ValueError("Invalid TestResult JSON") @@ -384,6 +411,8 @@ class TestInfo(object): complete: bool success: ?bool exception: ?str + output: ?str + flaky_output: bool flaky: bool leaky: bool min_duration: float @@ -404,6 +433,7 @@ class TestInfo(object): complete: bool=False, success: ?bool=None, exception: ?str=None, + output: ?str=None, flaky: bool=False, min_duration: float=-1.0, max_duration: float=-1.0, @@ -421,6 +451,8 @@ class TestInfo(object): self.complete = complete self.success = success self.exception = exception + self.output = output + self.flaky_output = False self.flaky = flaky self.leaky = False self.min_duration = min_duration @@ -438,10 +470,17 @@ class TestInfo(object): def update(self, complete, result: TestResult, test_duration: float=-1.0): self.complete = complete + + if len(self.results) == 0: + # First result + self.output = result.output + self.exception = result.exception + self.results.append(result) - exc = result.exception - if exc is not None: - self.exception = exc + + if not eq_opt(self.output, result.output): + self.flaky_output = True + if test_duration > 0.0: self.test_duration = test_duration @@ -534,6 +573,7 @@ class TestInfo(object): "complete": self.complete, "success": self.success, "exception": self.exception, + "output": self.output, "flaky": self.flaky, "min_duration": self.min_duration, "max_duration": self.max_duration, @@ -567,6 +607,10 @@ class TestInfo(object): exception: ?str = None if exc is not None and isinstance(exc, str): exception = exc + out = json_data["output"] + output: ?str = None + if out is not None and isinstance(out, str): + output = out flaky = json_data["flaky"] min_duration = json_data["min_duration"] max_duration = json_data["max_duration"] @@ -601,6 +645,7 @@ class TestInfo(object): complete, success, exception, + output, flaky, min_duration, max_duration, @@ -634,15 +679,27 @@ class TestRunnerConfig(object): # TODO: add a timeout to this -actor test_executor(syscap, config, get_test: () -> Test, report_complete, env): +actor TestExecutor(syscap, config, get_test: () -> Test, report_complete, env): """The actual executor of tests """ log_handler = logging.Handler("TestRunner") + fcap = file.FileCap(env.cap) + rfcap = file.ReadFileCap(fcap) + fs = file.FS(fcap) var test_sw = time.Stopwatch() var last_report = time.Stopwatch() var test_info = None - action def _report_result(test: Test, sw, non_gc_mem_usage_before, gc_total_bytes_start, gc_time_start, success: ?bool, exception: ?Exception): + def get_expected(module: str, test: str) -> ?str: + filename = file.join_path([fs.cwd(), "test", "golden", module, test]) + try: + exp_file = file.ReadFile(rfcap, filename) + exp_data = exp_file.read().decode() + return exp_data + except: + return None + + action def _report_result(test: Test, sw, non_gc_mem_usage_before, gc_total_bytes_start, gc_time_start, success: ?bool, exception: ?Exception, val: ?str): full_dur = sw.elapsed().to_float() * 1000.0 gc_time_end = acton.rts.get_gc_time(syscap).total gc_dur = float(gc_time_end - gc_time_start) @@ -659,7 +716,7 @@ actor test_executor(syscap, config, get_test: () -> Test, report_complete, env): complete = True if test_dur > config.min_test_duration else False if test_info is not None: exc = str(exception) if exception is not None else None - test_info.update(complete, TestResult(success, exc, testiter_dur, mem_usage_delta, non_gc_mem_usage_delta), test_dur*1000.0) + test_info.update(complete, TestResult(success, exc, val, testiter_dur, mem_usage_delta, non_gc_mem_usage_delta), test_dur*1000.0) if last_report.elapsed().to_float() > 0.05 or complete: if test_info is not None and config.output_enabled: print(json.encode({"test_info": test_info.to_json()}), err=True) @@ -679,12 +736,23 @@ actor test_executor(syscap, config, get_test: () -> Test, report_complete, env): gc_total_bytes_start = int(acton.rts.get_gc_total_bytes(syscap)) gc_time_start = acton.rts.get_gc_time(syscap).total sw = time.Stopwatch() + + def repres(s: ?bool, e: ?Exception, val: ?str) -> None: + # Compare expected golden value + if val is not None: + exp_val = get_expected(t.get_module(), t.name) + if exp_val is None or exp_val is not None and val != exp_val: + exc = NotEqualError(val, exp_val, "Test output does not match expected golden value.\nActual : %s\nExpected: %s" % (val, exp_val if exp_val is not None else "None")) + _report_result(t, sw, non_gc_mem_usage_before, gc_total_bytes_start, gc_time_start, False, exc, val) + return + _report_result(t, sw, non_gc_mem_usage_before, gc_total_bytes_start, gc_time_start, s, e, val) + try: - t.run(lambda s, e: _report_result(t, sw, non_gc_mem_usage_before, gc_total_bytes_start, gc_time_start, s, e), env, log_handler) + t.run(repres, env, log_handler) except AssertionError as e: - _report_result(t, sw, non_gc_mem_usage_before, gc_total_bytes_start, gc_time_start, False, e) + _report_result(t, sw, non_gc_mem_usage_before, gc_total_bytes_start, gc_time_start, False, e, None) except Exception as e: - _report_result(t, sw, non_gc_mem_usage_before, gc_total_bytes_start, gc_time_start, None, e) + _report_result(t, sw, non_gc_mem_usage_before, gc_total_bytes_start, gc_time_start, None, e, None) def _run_next(): """Get the next available test and run it""" @@ -1036,12 +1104,16 @@ actor test_runner(env: Env, def _init_results(args): for name, t in unit_tests.items(): + t.module = args.get_str("modname") all_tests[name] = t for name, t in sync_actor_tests.items(): + t.module = args.get_str("modname") all_tests[name] = t for name, t in async_actor_tests.items(): + t.module = args.get_str("modname") all_tests[name] = t for name, t in env_tests.items(): + t.module = args.get_str("modname") all_tests[name] = t tests = _filter_tests(all_tests, args) @@ -1112,6 +1184,8 @@ actor test_runner(env: Env, p = argparse.Parser() p.add_bool("json", "Output results as JSON") p.add_bool("no_output", "No result output") + # TODO: remove modname arg and get it from __modname__ that compiler should provide + p.add_option("modname", "str", help="Name of module") p.add_option("name", "strlist", nargs="+", default=[], help="Filter tests by name") lp = p.add_cmd("list", "list tests", _list_tests) tp = p.add_cmd("test", "Run tests", _run_tests) diff --git a/cli/src/acton.act b/cli/src/acton.act index c56320785..2bc7dac3f 100644 --- a/cli/src/acton.act +++ b/cli/src/acton.act @@ -2,6 +2,7 @@ import argparse import file import json import process +import term import testing from buildy import * @@ -425,7 +426,7 @@ actor RunTestList(env, args): _expected_modules = set(module_names) for module_name in module_names: - t = RunModuleTest(process_cap, module_name, ["list"], lambda x: _on_json_output(module_name, x), _on_test_error) + t = RunModuleTest(process_cap, module_name, ["list", "--modname", module_name], lambda x: _on_json_output(module_name, x), _on_test_error) def _on_build_success(stdout_buf: str): test_modules = [] @@ -460,6 +461,7 @@ actor RunTestTest(env: Env, args, perf_mode: bool=False): var _module_tests = {} var modules_to_test = set() var perf_data = "{}" + fs = file.FS(file.FileCap(env.cap)) test_cmd_args = [] for name_filter in args.get_strlist("name"): @@ -479,7 +481,26 @@ actor RunTestTest(env: Env, args, perf_mode: bool=False): perf_wfile = file.WriteFile(file.WriteFileCap(file.FileCap(env.cap)), "perf_data") perf_wfile.write(ptr.to_json().encode()) perf_wfile.close() + if args.get_bool("golden-update"): + for module_name, tests in ptr.results.items(): + for test_name, test_info in tests.items(): + exc = test_info.exception + output = test_info.output + if exc is not None and output is not None and exc.startswith("testing.NotEqualError: Test output does not match expected golden value"): + rpath = ["test", "golden", module_name] + filename = file.join_path([fs.cwd()] + rpath + [test_name]) + for idx in range(1, len(rpath)+1): + mkdir_path = file.join_path([fs.cwd()] + rpath[0:idx]) + try: + a = fs.mkdir(mkdir_path) + except: + pass + golden_file = file.WriteFile(file.WriteFileCap(file.FileCap(env.cap)), filename) + golden_file.write(output.encode()) + await async golden_file.close() + env.exit(r) + return after 0.05: _periodic_show() def _on_json_output(module_name, data): @@ -494,13 +515,12 @@ actor RunTestTest(env: Env, args, perf_mode: bool=False): ptr.update_module(module_name, tests) expected_modules_list.discard(module_name) if len(expected_modules_list) == 0: + # We have received the test list from all modules, show + # results to get empty skeleton and then run tests. + # NOTE: in perf mode we run a single module at a time and + # that module in turn limits concurrency to 1 _periodic_show() - if perf_mode: - _run_module_tests() - else: - # Run all tests in parallel - for module_name in modules_to_test: - t = RunModuleTest(process_cap, module_name, ["test"] + test_cmd_args, lambda x: _on_json_output(module_name, x), lambda x, y, z: _on_test_error(module_name, x, y, z)) + _run_module_tests(run_all=not perf_mode) elif "test_info" in data: test_info = TestInfo.from_json(data["test_info"]) @@ -534,21 +554,27 @@ actor RunTestTest(env: Env, args, perf_mode: bool=False): if len(modules_to_test) == 0: print("No tests found") env.exit(0) + return # List all tests first, which we can run in parallel. Once we have the # list of all tests we can start running them one at a time in sequence. for module_name in modules_to_test: - t = RunModuleTest(process_cap, module_name, ["list"] + test_cmd_args, lambda x: _on_json_output(module_name, x), lambda x, y, z: _on_test_error(module_name, x, y, z)) + t = RunModuleTest(process_cap, module_name, ["list", "--modname", module_name] + test_cmd_args, lambda x: _on_json_output(module_name, x), lambda x, y, z: _on_test_error(module_name, x, y, z)) - def _run_module_tests(): + def _run_module_tests(run_all=False): try: module_name = modules_to_test.pop() - if module_name is not None: - t = RunModuleTest(process_cap, module_name, ["test", "perf"] + test_cmd_args, lambda x: _on_json_output(module_name, x), lambda x, y, z: _on_test_error(module_name, x, y, z)) - else: - _periodic_show() - except: - pass + cmd = ["test"] + if perf_mode: + cmd += ["perf"] + cmd += ["--modname", module_name] + cmd += test_cmd_args + t = RunModuleTest(process_cap, module_name, cmd, lambda x: _on_json_output(module_name, x), lambda x, y, z: _on_test_error(module_name, x, y, z)) + except ValueError: + _periodic_show() + return + if run_all: + _run_module_tests(run_all) def _on_build_success(stdout_buf: str): print(term.clearline + term.up() + term.clearline, end="") @@ -580,7 +606,8 @@ actor RunTestTest(env: Env, args, perf_mode: bool=False): def build_cmd_args(args): cmdargs = [] for argname, arg in args.options.items(): - if argname in {"file", "record"}: + # TODO: reverse this logic, we should only pass in a small set of options, not all + if argname in {"file", "record", "golden-update"}: continue if arg.type == "bool": if args.get_bool(argname): @@ -1096,6 +1123,7 @@ actor main(env): testp = p.add_cmd("test", "Test", _cmd_test) testp.add_bool("record", "Record test performance results") + testp.add_bool("golden-update", "Update expected golden values based on current values") testp.add_option("module", "strlist", "+", [], "Filter on test module name") testp.add_option("name", "strlist", "+", [], "Filter on test name") diff --git a/compiler/Acton/Types.hs b/compiler/Acton/Types.hs index a88253226..9fc84db6c 100644 --- a/compiler/Acton/Types.hs +++ b/compiler/Acton/Types.hs @@ -1725,7 +1725,7 @@ data TestType = UnitType | SyncType | AsyncType | EnvType deriving (Eq,Show,Read) testType (NDef (TSchema _ [] (TFun _ fx (TNil _ PRow) k res)) _) - | res /= tNone = Nothing + | res /= tNone && res /= tStr = Nothing | otherwise = case row2list k of [] -> if fx == fxPure || fx == fxMut then Just UnitType else Nothing [t] -> if t == logging_handler then Just SyncType else Nothing