From 64e7095578ff0e03cd9a8ed39eaae3674caede6d Mon Sep 17 00:00:00 2001 From: "William D. Jones" Date: Sun, 18 Aug 2024 22:37:38 -0400 Subject: [PATCH] Ensure that 0B changes are hidden from text diffs too. --- src/borg/archiver/diff_cmd.py | 37 +++++++++++++------- src/borg/testsuite/archiver/__init__.py | 6 ++++ src/borg/testsuite/archiver/diff_cmd_test.py | 26 +++++++++----- 3 files changed, 48 insertions(+), 21 deletions(-) diff --git a/src/borg/archiver/diff_cmd.py b/src/borg/archiver/diff_cmd.py index 6bfb8daa71..0e6c8d5712 100644 --- a/src/borg/archiver/diff_cmd.py +++ b/src/borg/archiver/diff_cmd.py @@ -19,17 +19,19 @@ class DiffMixIn: def do_diff(self, args, repository, manifest): """Diff contents of two archives""" - def print_json_output(diff): - def actual_change(j): - j = j.to_dict() - if j["type"] == "modified": - # It's useful to show 0 added and 0 removed for text output - # but for JSON this is essentially noise. Additionally, the - # JSON key is "changes", and this is not actually a change. - return not (j["added"] == 0 and j["removed"] == 0) - else: - return True + def actual_change(j): + j = j.to_dict() + if j["type"] == "modified": + # Added/removed keys will not exist if chunker params differ + # between the two archives. Err on the side of caution and assume + # a real modification in this case (short-circuiting retrieving + # non-existent keys). + return not {"added", "removed"} <= j.keys() or not (j["added"] == 0 and j["removed"] == 0) + else: + # All other change types are indeed changes. + return True + def print_json_output(diff): print( json.dumps( { @@ -45,6 +47,17 @@ def actual_change(j): ) ) + def print_text_output(diff, formatter): + actual_changes = dict( + (name, change) + for name, change in diff.changes().items() + if actual_change(change) and (not args.content_only or (name not in DiffFormatter.METADATA)) + ) + diff._changes = actual_changes + res: str = formatter.format_item(diff) + if res.strip(): + sys.stdout.write(res) + if args.format is not None: format = args.format elif args.content_only: @@ -85,9 +98,7 @@ def actual_change(j): if args.json_lines: print_json_output(diff) else: - res: str = formatter.format_item(diff) - if res.strip(): - sys.stdout.write(res) + print_text_output(diff, formatter) for pattern in matcher.get_unmatched_include_patterns(): self.print_warning_instance(IncludePatternNeverMatchedWarning(pattern)) diff --git a/src/borg/testsuite/archiver/__init__.py b/src/borg/testsuite/archiver/__init__.py index 44cd80866e..c3620a50bb 100644 --- a/src/borg/testsuite/archiver/__init__.py +++ b/src/borg/testsuite/archiver/__init__.py @@ -358,6 +358,12 @@ def assert_line_exists(lines, expected_regexpr): assert any(re.search(expected_regexpr, line) for line in lines), f"no match for {expected_regexpr} in {lines}" +def assert_line_not_exists(lines, expected_regexpr): + assert not any( + re.search(expected_regexpr, line) for line in lines + ), f"unexpected match for {expected_regexpr} in {lines}" + + def _assert_dirs_equal_cmp(diff, ignore_flags=False, ignore_xattrs=False, ignore_ns=False): assert diff.left_only == [] assert diff.right_only == [] diff --git a/src/borg/testsuite/archiver/diff_cmd_test.py b/src/borg/testsuite/archiver/diff_cmd_test.py index bf72b23856..8658ad5da6 100644 --- a/src/borg/testsuite/archiver/diff_cmd_test.py +++ b/src/borg/testsuite/archiver/diff_cmd_test.py @@ -7,7 +7,14 @@ from ...constants import * # NOQA from .. import are_symlinks_supported, are_hardlinks_supported from ...platformflags import is_win32, is_darwin -from . import cmd, create_regular_file, RK_ENCRYPTION, assert_line_exists, generate_archiver_tests +from . import ( + cmd, + create_regular_file, + RK_ENCRYPTION, + assert_line_exists, + generate_archiver_tests, + assert_line_not_exists, +) pytest_generate_tests = lambda metafunc: generate_archiver_tests(metafunc, kinds="local,remote,binary") # NOQA @@ -105,17 +112,20 @@ def do_asserts(output, can_compare_ids, content_only=False): # pointing to the file is not changed. change = "modified.*0 B" if can_compare_ids else r"modified: \(can't get size\)" assert_line_exists(lines, f"{change}.*input/empty") + + # Do not show a 0 byte change for a file whose contents weren't modified. + assert_line_not_exists(lines, "0 B.*input/file_touched") + if not content_only: + assert_line_exists(lines, "[cm]time:.*input/file_touched") + else: + # And if we're doing content-only, don't show the file at all. + assert "input/file_touched" not in output + if are_hardlinks_supported(): assert_line_exists(lines, f"{change}.*input/hardlink_contents_changed") if are_symlinks_supported(): assert "input/link_target_contents_changed" not in output - # Show a 0 byte change for a file whose contents weren't modified for text output. - if content_only: - assert "input/file_touched" not in output - else: - assert_line_exists(lines, f"{change}.*input/file_touched") - # Added a new file and a hard link to it. Both links to the same # inode should appear as separate files. assert "added: 2.05 kB input/file_added" in output @@ -159,7 +169,7 @@ def get_changes(filename, data): # File unchanged assert not any(get_changes("input/file_unchanged", joutput)) - # Do NOT show a 0 byte change for a file whose contents weren't modified for JSON output. + # Do not show a 0 byte change for a file whose contents weren't modified. unexpected = {"type": "modified", "added": 0, "removed": 0} assert unexpected not in get_changes("input/file_touched", joutput) if not content_only: