Item: remove .chunks_healthy, fixes #8559

Well, it's not totally removed, some code in Item, Archive and borg transfer --from-borg1 needs to stay in place, so that we can pick the CORRECT chunks list that is in .chunks_healthy for all-zero-replacement-chunk-patched items when transferring archives from borg1 to borg2 repos. FUSE fs read: IOError or all-zero result Other reads: TODO
borgbackup · Nov 25, 2024 · 9ed75ca · 9ed75ca
1 parent 84744ac
commit 9ed75ca
Show file tree

Hide file tree

Showing 12 changed files with 88 additions and 230 deletions.
diff --git a/src/borg/archive.py b/src/borg/archive.py
@@ -281,7 +281,7 @@ def unpack_many(self, ids, *, filter=None, preload=False):
                 item = Item(internal_dict=_item)
                 if "chunks" in item:
                     item.chunks = [ChunkListEntry(*e) for e in item.chunks]
-                if "chunks_healthy" in item:
+                if "chunks_healthy" in item:  # legacy
                     item.chunks_healthy = [ChunkListEntry(*e) for e in item.chunks_healthy]
                 if filter and not filter(item):
                     continue
@@ -744,7 +744,6 @@ def same_item(item, st):
             # if a previous extraction was interrupted between setting the mtime and setting non-default flags.
             return True
 
-        has_damaged_chunks = "chunks_healthy" in item
         if dry_run or stdout:
             with self.extract_helper(item, "", hlm, dry_run=dry_run or stdout) as hardlink_set:
                 if not hardlink_set:
@@ -771,8 +770,6 @@ def same_item(item, st):
                                         item_size, item_chunks_size
                                     )
                                 )
-            if has_damaged_chunks:
-                raise BackupError("File has damaged (all-zero) chunks. Try running borg check --repair.")
             return
 
         dest = self.cwd
@@ -827,8 +824,6 @@ def make_parent(path):
                         raise BackupError(
                             f"Size inconsistency detected: size {item_size}, chunks size {item_chunks_size}"
                         )
-                if has_damaged_chunks:
-                    raise BackupError("File has damaged (all-zero) chunks. Try running borg check --repair.")
             return
         with backup_io:
             # No repository access beyond this point.
@@ -1141,10 +1136,6 @@ def chunk_processor(chunk):
                 return chunk_entry
 
         item.chunks = []
-        # if we rechunkify, we'll get a fundamentally different chunks list, thus we need
-        # to get rid of .chunks_healthy, as it might not correspond to .chunks any more.
-        if self.rechunkify and "chunks_healthy" in item:
-            del item.chunks_healthy
         for chunk in chunk_iter:
             chunk_entry = chunk_processor(chunk)
             item.chunks.append(chunk_entry)
@@ -1761,13 +1752,10 @@ def verify_data(self):
         if defect_chunks:
             if self.repair:
                 # if we kill the defect chunk here, subsequent actions within this "borg check"
-                # run will find missing chunks and replace them with all-zero replacement
-                # chunks and flag the files as "repaired".
-                # if another backup is done later and the missing chunks get backed up again,
-                # a "borg check" afterwards can heal all files where this chunk was missing.
+                # run will find missing chunks.
                 logger.warning(
-                    "Found defect chunks. They will be deleted now, so affected files can "
-                    "get repaired now and maybe healed later."
+                    "Found defect chunks and will delete them now. "
+                    "Reading files referencing these chunks will result in an I/O error."
                 )
                 for defect_chunk in defect_chunks:
                     # remote repo (ssh): retry might help for strange network / NIC / RAM errors
@@ -1787,10 +1775,7 @@ def verify_data(self):
                     else:
                         logger.warning("chunk %s not deleted, did not consistently fail.", bin_to_hex(defect_chunk))
             else:
-                logger.warning(
-                    "Found defect chunks. With --repair, they would get deleted, so affected "
-                    "files could get repaired then and maybe healed later."
-                )
+                logger.warning("Found defect chunks. With --repair, they would get deleted.")
                 for defect_chunk in defect_chunks:
                     logger.debug("chunk %s is defect.", bin_to_hex(defect_chunk))
         log = logger.error if errors else logger.info
@@ -1901,80 +1886,18 @@ def add_reference(id_, size, cdata):
                     self.repository.put(id_, cdata)
 
         def verify_file_chunks(archive_name, item):
-            """Verifies that all file chunks are present.
-
-            Missing file chunks will be replaced with new chunks of the same length containing all zeros.
-            If a previously missing file chunk re-appears, the replacement chunk is replaced by the correct one.
-            """
-
-            def replacement_chunk(size):
-                chunk = Chunk(None, allocation=CH_ALLOC, size=size)
-                chunk_id, data = cached_hash(chunk, self.key.id_hash)
-                cdata = self.repo_objs.format(chunk_id, {}, data, ro_type=ROBJ_FILE_STREAM)
-                return chunk_id, size, cdata
-
+            """Verifies that all file chunks are present. Missing file chunks will be logged."""
             offset = 0
-            chunk_list = []
-            chunks_replaced = False
-            has_chunks_healthy = "chunks_healthy" in item
-            chunks_current = item.chunks
-            chunks_healthy = item.chunks_healthy if has_chunks_healthy else chunks_current
-            if has_chunks_healthy and len(chunks_current) != len(chunks_healthy):
-                # should never happen, but there was issue #3218.
-                logger.warning(f"{archive_name}: {item.path}: Invalid chunks_healthy metadata removed!")
-                del item.chunks_healthy
-                has_chunks_healthy = False
-                chunks_healthy = chunks_current
-            for chunk_current, chunk_healthy in zip(chunks_current, chunks_healthy):
-                chunk_id, size = chunk_healthy
+            for chunk in item.chunks:
+                chunk_id, size = chunk
                 if chunk_id not in self.chunks:
-                    # a chunk of the healthy list is missing
-                    if chunk_current == chunk_healthy:
-                        logger.error(
-                            "{}: {}: New missing file chunk detected (Byte {}-{}, Chunk {}). "
-                            "Replacing with all-zero chunk.".format(
-                                archive_name, item.path, offset, offset + size, bin_to_hex(chunk_id)
-                            )
+                    logger.error(
+                        "{}: {}: Missing file chunk detected (Byte {}-{}, Chunk {}).".format(
+                            archive_name, item.path, offset, offset + size, bin_to_hex(chunk_id)
                         )
-                        self.error_found = chunks_replaced = True
-                        chunk_id, size, cdata = replacement_chunk(size)
-                        add_reference(chunk_id, size, cdata)
-                    else:
-                        logger.info(
-                            "{}: {}: Previously missing file chunk is still missing (Byte {}-{}, Chunk {}). "
-                            "It has an all-zero replacement chunk already.".format(
-                                archive_name, item.path, offset, offset + size, bin_to_hex(chunk_id)
-                            )
-                        )
-                        chunk_id, size = chunk_current
-                        if chunk_id not in self.chunks:
-                            logger.warning(
-                                "{}: {}: Missing all-zero replacement chunk detected (Byte {}-{}, Chunk {}). "
-                                "Generating new replacement chunk.".format(
-                                    archive_name, item.path, offset, offset + size, bin_to_hex(chunk_id)
-                                )
-                            )
-                            self.error_found = chunks_replaced = True
-                            chunk_id, size, cdata = replacement_chunk(size)
-                            add_reference(chunk_id, size, cdata)
-                else:
-                    if chunk_current == chunk_healthy:
-                        pass  # normal case, all fine.
-                    else:
-                        logger.info(
-                            "{}: {}: Healed previously missing file chunk! (Byte {}-{}, Chunk {}).".format(
-                                archive_name, item.path, offset, offset + size, bin_to_hex(chunk_id)
-                            )
-                        )
-                chunk_list.append([chunk_id, size])  # list-typed element as chunks_healthy is list-of-lists
+                    )
+                    self.error_found = True
                 offset += size
-            if chunks_replaced and not has_chunks_healthy:
-                # if this is first repair, remember the correct chunk IDs, so we can maybe heal the file later
-                item.chunks_healthy = item.chunks
-            if has_chunks_healthy and chunk_list == chunks_healthy:
-                logger.info(f"{archive_name}: {item.path}: Completely healed previously damaged file!")
-                del item.chunks_healthy
-            item.chunks = chunk_list
             if "size" in item:
                 item_size = item.size
                 item_chunks_size = item.get_size(from_chunks=True)

diff --git a/src/borg/archiver/check_cmd.py b/src/borg/archiver/check_cmd.py
@@ -168,28 +168,7 @@ def build_parser_check(self, subparsers, common_parser, mid_common_parser):
 
         2. When checking the consistency and correctness of archives, repair mode might
            remove whole archives from the manifest if their archive metadata chunk is
-           corrupt or lost. On a chunk level (i.e. the contents of files), repair mode
-           will replace corrupt or lost chunks with a same-size replacement chunk of
-           zeroes. If a previously zeroed chunk reappears, repair mode will restore
-           this lost chunk using the new chunk.
-
-        Most steps taken by repair mode have a one-time effect on the repository, like
-        removing a lost archive from the repository. However, replacing a corrupt or
-        lost chunk with an all-zero replacement will have an ongoing effect on the
-        repository: When attempting to extract a file referencing an all-zero chunk,
-        the ``extract`` command will distinctly warn about it. The FUSE filesystem
-        created by the ``mount`` command will reject reading such a "zero-patched"
-        file unless a special mount option is given.
-
-        As mentioned earlier, Borg might be able to "heal" a "zero-patched" file in
-        repair mode, if all its previously lost chunks reappear (e.g. via a later
-        backup). This is achieved by Borg not only keeping track of the all-zero
-        replacement chunks, but also by keeping metadata about the lost chunks. In
-        repair mode Borg will check whether a previously lost chunk reappeared and will
-        replace the all-zero replacement chunk by the reappeared chunk. If all lost
-        chunks of a "zero-patched" file reappear, this effectively "heals" the file.
-        Consequently, if lost chunks were repaired earlier, it is advised to run
-        ``--repair`` a second time after creating some new backups.
+           corrupt or lost. Borg will also report files that reference missing chunks.
 
         If ``--repair --find-lost-archives`` is given, previously lost entries will
         be recreated in the archive directory. This is only possible before

diff --git a/src/borg/archiver/compact_cmd.py b/src/borg/archiver/compact_cmd.py
@@ -6,7 +6,7 @@
 from ..cache import write_chunkindex_to_repo_cache, build_chunkindex_from_repo
 from ..constants import *  # NOQA
 from ..hashindex import ChunkIndex, ChunkIndexEntry
-from ..helpers import set_ec, EXIT_WARNING, EXIT_ERROR, format_file_size, bin_to_hex
+from ..helpers import set_ec, EXIT_ERROR, format_file_size, bin_to_hex
 from ..helpers import ProgressIndicatorPercent
 from ..manifest import Manifest
 from ..remote import RemoteRepository
@@ -39,9 +39,7 @@ def garbage_collect(self):
         logger.info("Starting compaction / garbage collection...")
         self.chunks = self.get_repository_chunks()
         logger.info("Computing object IDs used by archives...")
-        (self.missing_chunks, self.reappeared_chunks, self.total_files, self.total_size, self.archives_count) = (
-            self.analyze_archives()
-        )
+        (self.missing_chunks, self.total_files, self.total_size, self.archives_count) = self.analyze_archives()
         self.report_and_delete()
         self.save_chunk_index()
         logger.info("Finished compaction / garbage collection...")
@@ -73,28 +71,24 @@ def save_chunk_index(self):
             self.chunks.clear()  # we already have updated the repo cache in get_repository_chunks
         self.chunks = None  # nothing there (cleared!)
 
-    def analyze_archives(self) -> Tuple[Set, Set, int, int, int]:
-        """Iterate over all items in all archives, create the dicts id -> size of all used/wanted chunks."""
+    def analyze_archives(self) -> Tuple[Set, int, int, int]:
+        """Iterate over all items in all archives, create the dicts id -> size of all used chunks."""
 
-        def use_it(id, *, wanted=False):
+        def use_it(id):
             entry = self.chunks.get(id)
             if entry is not None:
                 # the chunk is in the repo, mark it used.
                 self.chunks[id] = entry._replace(flags=entry.flags | ChunkIndex.F_USED)
-                if wanted:
-                    # chunk id is from chunks_healthy list: a lost chunk has re-appeared!
-                    reappeared_chunks.add(id)
             else:
                 # with --stats: we do NOT have this chunk in the repository!
                 # without --stats: we do not have this chunk or the chunks index is incomplete.
                 missing_chunks.add(id)
 
         missing_chunks: set[bytes] = set()
-        reappeared_chunks: set[bytes] = set()
         archive_infos = self.manifest.archives.list(sort_by=["ts"])
         num_archives = len(archive_infos)
         pi = ProgressIndicatorPercent(
-            total=num_archives, msg="Computing used/wanted chunks %3.1f%%", step=0.1, msgid="compact.analyze_archives"
+            total=num_archives, msg="Computing used chunks %3.1f%%", step=0.1, msgid="compact.analyze_archives"
         )
         total_size, total_files = 0, 0
         for i, info in enumerate(archive_infos):
@@ -114,25 +108,14 @@ def use_it(id, *, wanted=False):
                     for id, size in item.chunks:
                         total_size += size  # original, uncompressed file content size
                         use_it(id)
-                    if "chunks_healthy" in item:
-                        # we also consider the chunks_healthy chunks as referenced - do not throw away
-                        # anything that borg check --repair might still need.
-                        for id, size in item.chunks_healthy:
-                            use_it(id, wanted=True)
         pi.finish()
-        return missing_chunks, reappeared_chunks, total_files, total_size, num_archives
+        return missing_chunks, total_files, total_size, num_archives
 
     def report_and_delete(self):
-        run_repair = " Run borg check --repair!"
-
         if self.missing_chunks:
-            logger.error(f"Repository has {len(self.missing_chunks)} missing objects." + run_repair)
+            logger.error(f"Repository has {len(self.missing_chunks)} missing objects!")
             set_ec(EXIT_ERROR)
 
-        if self.reappeared_chunks:
-            logger.warning(f"{len(self.reappeared_chunks)} previously missing objects re-appeared!" + run_repair)
-            set_ec(EXIT_WARNING)
-
         logger.info("Cleaning archives directory from soft-deleted archives...")
         archive_infos = self.manifest.archives.list(sort_by=["ts"], deleted=True)
         for archive_info in archive_infos:

diff --git a/src/borg/archiver/mount_cmds.py b/src/borg/archiver/mount_cmds.py
@@ -104,9 +104,9 @@ def build_parser_mount_umount(self, subparsers, common_parser, mid_common_parser
 
         - ``versions``: when used with a repository mount, this gives a merged, versioned
           view of the files in the archives. EXPERIMENTAL, layout may change in future.
-        - ``allow_damaged_files``: by default damaged files (where missing chunks were
-          replaced with runs of zeros by ``borg check --repair``) are not readable and
-          return EIO (I/O error). Set this option to read such files.
+        - ``allow_damaged_files``: by default damaged files (where chunks are missing)
+          will return EIO (I/O error) when trying to read the related parts of the file.
+          Set this option to replace the missing parts with all-zero bytes.
         - ``ignore_permissions``: for security reasons the ``default_permissions`` mount
           option is internally enforced by borg. ``ignore_permissions`` can be given to
           not enforce ``default_permissions``.

diff --git a/src/borg/archiver/recreate_cmd.py b/src/borg/archiver/recreate_cmd.py
@@ -95,16 +95,10 @@ def build_parser_recreate(self, subparsers, common_parser, mid_common_parser):
         at least the entire deduplicated size of the archives using the previous
         chunker params.
 
-        If you recently ran borg check --repair and it had to fix lost chunks with all-zero
-        replacement chunks, please first run another backup for the same data and re-run
-        borg check --repair afterwards to heal any archives that had lost chunks which are
-        still generated from the input data.
-
-        Important: running borg recreate to re-chunk will remove the chunks_healthy
-        metadata of all items with replacement chunks, so healing will not be possible
-        any more after re-chunking (it is also unlikely it would ever work: due to the
-        change of chunking parameters, the missing chunk likely will never be seen again
-        even if you still have the data that produced it).
+        If your most recent borg check found missing chunks, please first run another
+        backup for the same data, before doing any rechunking. If you are lucky, that
+        will re-create the missing chunks. Optionally, do another borg check, to see
+        if the chunks are still missing).
         """
         )
         subparser = subparsers.add_parser(