diff --git a/base/src/testing.act b/base/src/testing.act index a0520ecc..5ed9f9f0 100644 --- a/base/src/testing.act +++ b/base/src/testing.act @@ -1,6 +1,7 @@ import acton.rts import argparse +import file import json import logging import term @@ -229,18 +230,29 @@ def error(msg: ?str): # ------------------------------------------------------------------------------- +def eq_opt[T(Eq)](a: ?T, b: ?T) -> bool: + return a is not None and b is not None and a == b or a is None and b is None + class TestLogger(logging.Logger): pass class Test(object): + module: ?str name: str desc: str def __init__(self, name: str, desc: str): self.name = name self.desc = desc + self.module = None + + def get_module(self) -> str: + mod = self.module + if mod is not None: + return mod + raise ValueError("Test: Module not set") - def run(self, report_result: action(?bool, ?Exception) -> None, env: Env, log_handler: logging.Handler): + def run(self, report_result: action(?bool, ?Exception, ?str) -> None, env: Env, log_handler: logging.Handler): if isinstance(self, UnitTest): self.run_test(report_result, env, log_handler) elif isinstance(self, SyncActorTest): @@ -271,12 +283,14 @@ class UnitTest(Test): self.fn = fn self.name = name self.desc = desc + self.module = None - def run_test(self, report_result: action(?bool, ?Exception) -> None, env: Env, log_handler: logging.Handler): + def run_test(self, report_result: action(?bool, ?Exception, ?str) -> None, env: Env, log_handler: logging.Handler): + output = None success = None exception = None try: - self.fn() + output = self.fn() success = True exception = None except AssertionError as e: @@ -285,19 +299,21 @@ class UnitTest(Test): except Exception as e: success = None exception = e - report_result(success, exception) + report_result(success, exception, output) class SyncActorTest(Test): def __init__(self, fn: proc(logging.Handler) -> None, name: str, desc: str): self.fn = fn self.name = name self.desc = desc + self.module = None - def run_test(self, report_result: action(?bool, ?Exception) -> None, env: Env, log_handler: logging.Handler): + def run_test(self, report_result: action(?bool, ?Exception, ?str) -> None, env: Env, log_handler: logging.Handler): + output = None success = None exception = None try: - self.fn(log_handler) + output = self.fn(log_handler) success = True exception = None except AssertionError as e: @@ -306,32 +322,38 @@ class SyncActorTest(Test): except Exception as e: success = None exception = e - report_result(success, exception) + report_result(success, exception, output) class AsyncActorTest(Test): def __init__(self, fn: proc(action(?bool, ?Exception) -> None, logging.Handler) -> None, name: str, desc: str): self.fn = fn self.name = name self.desc = desc + self.module = None - def run_test(self, report_result: action(?bool, ?Exception) -> None, env: Env, log_handler: logging.Handler): - self.fn(report_result, log_handler) + def run_test(self, report_result: action(?bool, ?Exception, ?str) -> None, env: Env, log_handler: logging.Handler): + def repres(success: ?bool, exception: ?Exception): + report_result(success, exception, None) + self.fn(repres, log_handler) class EnvTest(Test): def __init__(self, fn: proc(action(?bool, ?Exception) -> None, Env, logging.Handler) -> None, name: str, desc: str): self.fn = fn self.name = name self.desc = desc + self.module = None - def run_test(self, report_result: action(?bool, ?Exception) -> None, env: Env, log_handler: logging.Handler): - self.fn(report_result, env, log_handler) + def run_test(self, report_result: action(?bool, ?Exception, ?str) -> None, env: Env, log_handler: logging.Handler): + def repres(success: ?bool, exception: ?Exception): + report_result(success, exception, None) + self.fn(repres, env, log_handler) class TestResult(object): """ There are three possible outcomes for a test: - success: the test ran to completion with the expected results - - for unit tests & synchronous actor tests, it means it returned `None` + - for unit tests & synchronous actor tests, it means the function returned - for asynchronous actor & env tests, the report_result callback was called with TestResult(success=True, exception=None) - failure: the test encountered an unexpected value - for unit tests & synchronous actor tests, an AssertionError (or child thereof) was raiesd @@ -342,13 +364,15 @@ class TestResult(object): """ success: ?bool exception: ?str + output: ?str duration: float mem_usage_delta: int non_gc_mem_usage_delta: int - def __init__(self, success: ?bool, exception: ?str, duration: float, mem_usage_delta: int, non_gc_mem_usage_delta: int): + def __init__(self, success: ?bool, exception: ?str, output: ?str, duration: float, mem_usage_delta: int, non_gc_mem_usage_delta: int): self.success = success self.exception = exception + self.output = output self.duration = duration self.mem_usage_delta = mem_usage_delta self.non_gc_mem_usage_delta = non_gc_mem_usage_delta @@ -357,6 +381,7 @@ class TestResult(object): return { "success": self.success, "exception": self.exception, + "output": self.output, "duration": self.duration, "mem_usage_delta": self.mem_usage_delta, "non_gc_mem_usage_delta": self.non_gc_mem_usage_delta, @@ -366,16 +391,18 @@ class TestResult(object): def from_json(data: dict[str, str]) -> TestResult: success = data["success"] exception = data["exception"] + output = data["output"] duration = data["duration"] mem_usage_delta = data["mem_usage_delta"] non_gc_mem_usage_delta = data["non_gc_mem_usage_delta"] if (isinstance(success, bool) and isinstance(exception, str) + and isinstance(output, str) and isinstance(duration, float) and isinstance(mem_usage_delta, int) and isinstance(non_gc_mem_usage_delta, int) ): - return TestResult(success, exception, duration, mem_usage_delta, non_gc_mem_usage_delta) + return TestResult(success, exception, output, duration, mem_usage_delta, non_gc_mem_usage_delta) raise ValueError("Invalid TestResult JSON") @@ -384,6 +411,8 @@ class TestInfo(object): complete: bool success: ?bool exception: ?str + output: ?str + flaky_output: bool flaky: bool leaky: bool min_duration: float @@ -404,6 +433,7 @@ class TestInfo(object): complete: bool=False, success: ?bool=None, exception: ?str=None, + output: ?str=None, flaky: bool=False, min_duration: float=-1.0, max_duration: float=-1.0, @@ -421,6 +451,8 @@ class TestInfo(object): self.complete = complete self.success = success self.exception = exception + self.output = output + self.flaky_output = False self.flaky = flaky self.leaky = False self.min_duration = min_duration @@ -438,10 +470,17 @@ class TestInfo(object): def update(self, complete, result: TestResult, test_duration: float=-1.0): self.complete = complete + + if len(self.results) == 0: + # First result + self.output = result.output + self.exception = result.exception + self.results.append(result) - exc = result.exception - if exc is not None: - self.exception = exc + + if not eq_opt(self.output, result.output): + self.flaky_output = True + if test_duration > 0.0: self.test_duration = test_duration @@ -534,6 +573,7 @@ class TestInfo(object): "complete": self.complete, "success": self.success, "exception": self.exception, + "output": self.output, "flaky": self.flaky, "min_duration": self.min_duration, "max_duration": self.max_duration, @@ -567,6 +607,10 @@ class TestInfo(object): exception: ?str = None if exc is not None and isinstance(exc, str): exception = exc + out = json_data["output"] + output: ?str = None + if out is not None and isinstance(out, str): + output = out flaky = json_data["flaky"] min_duration = json_data["min_duration"] max_duration = json_data["max_duration"] @@ -601,6 +645,7 @@ class TestInfo(object): complete, success, exception, + output, flaky, min_duration, max_duration, @@ -634,15 +679,27 @@ class TestRunnerConfig(object): # TODO: add a timeout to this -actor test_executor(syscap, config, get_test: () -> Test, report_complete, env): +actor TestExecutor(syscap, config, get_test: () -> Test, report_complete, env): """The actual executor of tests """ log_handler = logging.Handler("TestRunner") + fcap = file.FileCap(env.cap) + rfcap = file.ReadFileCap(fcap) + fs = file.FS(fcap) var test_sw = time.Stopwatch() var last_report = time.Stopwatch() var test_info = None - action def _report_result(test: Test, sw, non_gc_mem_usage_before, gc_total_bytes_start, gc_time_start, success: ?bool, exception: ?Exception): + def get_expected(module: str, test: str) -> ?str: + filename = file.join_path([fs.cwd(), "test", "golden", module, test]) + try: + exp_file = file.ReadFile(rfcap, filename) + exp_data = exp_file.read().decode() + return exp_data + except: + return None + + action def _report_result(test: Test, sw, non_gc_mem_usage_before, gc_total_bytes_start, gc_time_start, success: ?bool, exception: ?Exception, val: ?str): full_dur = sw.elapsed().to_float() * 1000.0 gc_time_end = acton.rts.get_gc_time(syscap).total gc_dur = float(gc_time_end - gc_time_start) @@ -659,7 +716,7 @@ actor test_executor(syscap, config, get_test: () -> Test, report_complete, env): complete = True if test_dur > config.min_test_duration else False if test_info is not None: exc = str(exception) if exception is not None else None - test_info.update(complete, TestResult(success, exc, testiter_dur, mem_usage_delta, non_gc_mem_usage_delta), test_dur*1000.0) + test_info.update(complete, TestResult(success, exc, val, testiter_dur, mem_usage_delta, non_gc_mem_usage_delta), test_dur*1000.0) if last_report.elapsed().to_float() > 0.05 or complete: if test_info is not None and config.output_enabled: print(json.encode({"test_info": test_info.to_json()}), err=True) @@ -679,12 +736,23 @@ actor test_executor(syscap, config, get_test: () -> Test, report_complete, env): gc_total_bytes_start = int(acton.rts.get_gc_total_bytes(syscap)) gc_time_start = acton.rts.get_gc_time(syscap).total sw = time.Stopwatch() + + def repres(s: ?bool, e: ?Exception, val: ?str) -> None: + # Compare expected golden value + if val is not None: + exp_val = get_expected(t.get_module(), t.name) + if exp_val is None or exp_val is not None and val != exp_val: + exc = NotEqualError(val, exp_val, "Test output does not match expected golden value.\nActual : %s\nExpected: %s" % (val, exp_val if exp_val is not None else "None")) + _report_result(t, sw, non_gc_mem_usage_before, gc_total_bytes_start, gc_time_start, False, exc, val) + return + _report_result(t, sw, non_gc_mem_usage_before, gc_total_bytes_start, gc_time_start, s, e, val) + try: - t.run(lambda s, e: _report_result(t, sw, non_gc_mem_usage_before, gc_total_bytes_start, gc_time_start, s, e), env, log_handler) + t.run(repres, env, log_handler) except AssertionError as e: - _report_result(t, sw, non_gc_mem_usage_before, gc_total_bytes_start, gc_time_start, False, e) + _report_result(t, sw, non_gc_mem_usage_before, gc_total_bytes_start, gc_time_start, False, e, None) except Exception as e: - _report_result(t, sw, non_gc_mem_usage_before, gc_total_bytes_start, gc_time_start, None, e) + _report_result(t, sw, non_gc_mem_usage_before, gc_total_bytes_start, gc_time_start, None, e, None) def _run_next(): """Get the next available test and run it""" @@ -1036,12 +1104,16 @@ actor test_runner(env: Env, def _init_results(args): for name, t in unit_tests.items(): + t.module = args.get_str("modname") all_tests[name] = t for name, t in sync_actor_tests.items(): + t.module = args.get_str("modname") all_tests[name] = t for name, t in async_actor_tests.items(): + t.module = args.get_str("modname") all_tests[name] = t for name, t in env_tests.items(): + t.module = args.get_str("modname") all_tests[name] = t tests = _filter_tests(all_tests, args) @@ -1112,6 +1184,8 @@ actor test_runner(env: Env, p = argparse.Parser() p.add_bool("json", "Output results as JSON") p.add_bool("no_output", "No result output") + # TODO: remove modname arg and get it from __modname__ that compiler should provide + p.add_option("modname", "str", help="Name of module") p.add_option("name", "strlist", nargs="+", default=[], help="Filter tests by name") lp = p.add_cmd("list", "list tests", _list_tests) tp = p.add_cmd("test", "Run tests", _run_tests) diff --git a/cli/src/acton.act b/cli/src/acton.act index c5632078..2bc7dac3 100644 --- a/cli/src/acton.act +++ b/cli/src/acton.act @@ -2,6 +2,7 @@ import argparse import file import json import process +import term import testing from buildy import * @@ -425,7 +426,7 @@ actor RunTestList(env, args): _expected_modules = set(module_names) for module_name in module_names: - t = RunModuleTest(process_cap, module_name, ["list"], lambda x: _on_json_output(module_name, x), _on_test_error) + t = RunModuleTest(process_cap, module_name, ["list", "--modname", module_name], lambda x: _on_json_output(module_name, x), _on_test_error) def _on_build_success(stdout_buf: str): test_modules = [] @@ -460,6 +461,7 @@ actor RunTestTest(env: Env, args, perf_mode: bool=False): var _module_tests = {} var modules_to_test = set() var perf_data = "{}" + fs = file.FS(file.FileCap(env.cap)) test_cmd_args = [] for name_filter in args.get_strlist("name"): @@ -479,7 +481,26 @@ actor RunTestTest(env: Env, args, perf_mode: bool=False): perf_wfile = file.WriteFile(file.WriteFileCap(file.FileCap(env.cap)), "perf_data") perf_wfile.write(ptr.to_json().encode()) perf_wfile.close() + if args.get_bool("golden-update"): + for module_name, tests in ptr.results.items(): + for test_name, test_info in tests.items(): + exc = test_info.exception + output = test_info.output + if exc is not None and output is not None and exc.startswith("testing.NotEqualError: Test output does not match expected golden value"): + rpath = ["test", "golden", module_name] + filename = file.join_path([fs.cwd()] + rpath + [test_name]) + for idx in range(1, len(rpath)+1): + mkdir_path = file.join_path([fs.cwd()] + rpath[0:idx]) + try: + a = fs.mkdir(mkdir_path) + except: + pass + golden_file = file.WriteFile(file.WriteFileCap(file.FileCap(env.cap)), filename) + golden_file.write(output.encode()) + await async golden_file.close() + env.exit(r) + return after 0.05: _periodic_show() def _on_json_output(module_name, data): @@ -494,13 +515,12 @@ actor RunTestTest(env: Env, args, perf_mode: bool=False): ptr.update_module(module_name, tests) expected_modules_list.discard(module_name) if len(expected_modules_list) == 0: + # We have received the test list from all modules, show + # results to get empty skeleton and then run tests. + # NOTE: in perf mode we run a single module at a time and + # that module in turn limits concurrency to 1 _periodic_show() - if perf_mode: - _run_module_tests() - else: - # Run all tests in parallel - for module_name in modules_to_test: - t = RunModuleTest(process_cap, module_name, ["test"] + test_cmd_args, lambda x: _on_json_output(module_name, x), lambda x, y, z: _on_test_error(module_name, x, y, z)) + _run_module_tests(run_all=not perf_mode) elif "test_info" in data: test_info = TestInfo.from_json(data["test_info"]) @@ -534,21 +554,27 @@ actor RunTestTest(env: Env, args, perf_mode: bool=False): if len(modules_to_test) == 0: print("No tests found") env.exit(0) + return # List all tests first, which we can run in parallel. Once we have the # list of all tests we can start running them one at a time in sequence. for module_name in modules_to_test: - t = RunModuleTest(process_cap, module_name, ["list"] + test_cmd_args, lambda x: _on_json_output(module_name, x), lambda x, y, z: _on_test_error(module_name, x, y, z)) + t = RunModuleTest(process_cap, module_name, ["list", "--modname", module_name] + test_cmd_args, lambda x: _on_json_output(module_name, x), lambda x, y, z: _on_test_error(module_name, x, y, z)) - def _run_module_tests(): + def _run_module_tests(run_all=False): try: module_name = modules_to_test.pop() - if module_name is not None: - t = RunModuleTest(process_cap, module_name, ["test", "perf"] + test_cmd_args, lambda x: _on_json_output(module_name, x), lambda x, y, z: _on_test_error(module_name, x, y, z)) - else: - _periodic_show() - except: - pass + cmd = ["test"] + if perf_mode: + cmd += ["perf"] + cmd += ["--modname", module_name] + cmd += test_cmd_args + t = RunModuleTest(process_cap, module_name, cmd, lambda x: _on_json_output(module_name, x), lambda x, y, z: _on_test_error(module_name, x, y, z)) + except ValueError: + _periodic_show() + return + if run_all: + _run_module_tests(run_all) def _on_build_success(stdout_buf: str): print(term.clearline + term.up() + term.clearline, end="") @@ -580,7 +606,8 @@ actor RunTestTest(env: Env, args, perf_mode: bool=False): def build_cmd_args(args): cmdargs = [] for argname, arg in args.options.items(): - if argname in {"file", "record"}: + # TODO: reverse this logic, we should only pass in a small set of options, not all + if argname in {"file", "record", "golden-update"}: continue if arg.type == "bool": if args.get_bool(argname): @@ -1096,6 +1123,7 @@ actor main(env): testp = p.add_cmd("test", "Test", _cmd_test) testp.add_bool("record", "Record test performance results") + testp.add_bool("golden-update", "Update expected golden values based on current values") testp.add_option("module", "strlist", "+", [], "Filter on test module name") testp.add_option("name", "strlist", "+", [], "Filter on test name") diff --git a/compiler/Acton/Types.hs b/compiler/Acton/Types.hs index a8825322..9fc84db6 100644 --- a/compiler/Acton/Types.hs +++ b/compiler/Acton/Types.hs @@ -1725,7 +1725,7 @@ data TestType = UnitType | SyncType | AsyncType | EnvType deriving (Eq,Show,Read) testType (NDef (TSchema _ [] (TFun _ fx (TNil _ PRow) k res)) _) - | res /= tNone = Nothing + | res /= tNone && res /= tStr = Nothing | otherwise = case row2list k of [] -> if fx == fxPure || fx == fxMut then Just UnitType else Nothing [t] -> if t == logging_handler then Just SyncType else Nothing