Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix shadow index update for double-put (1.2-maint) #7896

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 12 additions & 13 deletions src/borg/repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -1223,11 +1223,11 @@ def put(self, id, data, wait=True):
except KeyError:
pass
else:
# note: doing a delete first will do some bookkeeping.
# we do not want to update the shadow_index here, because
# we know already that we will PUT to this id, so it will
# be in the repo index (and we won't need it in the shadow_index).
self._delete(id, segment, offset, update_shadow_index=False)
# this put call supersedes a previous put to same id.
# it is essential to do a delete first to get correct quota bookkeeping
# and also a correctly updated shadow_index, so that the compaction code
# does not wrongly resurrect an old PUT by dropping a DEL that is still needed.
self._delete(id, segment, offset)
segment, offset = self.io.write_put(id, data)
self.storage_quota_use += len(data) + self.io.put_header_fmt.size
self.segments.setdefault(segment, 0)
Expand All @@ -1250,16 +1250,15 @@ def delete(self, id, wait=True):
segment, offset = self.index.pop(id)
except KeyError:
raise self.ObjectNotFound(id, self.path) from None
# if we get here, there is an object with this id in the repo,
# we write a DEL here that shadows the respective PUT.
# after the delete, the object is not in the repo index any more,
# for the compaction code, we need to update the shadow_index in this case.
self._delete(id, segment, offset, update_shadow_index=True)
self._delete(id, segment, offset)

def _delete(self, id, segment, offset, *, update_shadow_index):
def _delete(self, id, segment, offset):
# common code used by put and delete
if update_shadow_index:
self.shadow_index.setdefault(id, []).append(segment)
# because we'll write a DEL tag to the repository, we must update the shadow index.
# this is always true, no matter whether we are called from put() or delete().
# the compaction code needs this to not drop DEL tags if they are still required
# to keep a PUT in an earlier segment in the "effectively deleted" state.
self.shadow_index.setdefault(id, []).append(segment)
self.segments[segment] -= 1
size = self.io.read(segment, offset, id, read_data=False)
self.compact[segment] += size
Expand Down
51 changes: 50 additions & 1 deletion src/borg/testsuite/repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -345,7 +345,8 @@ def test_moved_deletes_are_tracked(self):
# we have no put or del any more for H(1), so we lost knowledge about H(1).
assert H(1) not in self.repository.shadow_index

def test_shadowed_entries_are_preserved(self):
def test_shadowed_entries_are_preserved1(self):
# this tests the shadowing-by-del behaviour
get_latest_segment = self.repository.io.get_latest_segment
self.repository.put(H(1), b'1')
# This is the segment with our original PUT of interest
Expand Down Expand Up @@ -379,6 +380,54 @@ def test_shadowed_entries_are_preserved(self):
# Must not reappear
assert H(1) not in self.repository

def test_shadowed_entries_are_preserved2(self):
# this tests the shadowing-by-double-put behaviour, see issue #5661
# assume this repo state:
# seg1: PUT H1
# seg2: COMMIT
# seg3: DEL H1, PUT H1, DEL H1, PUT H2
# seg4: COMMIT
# Note how due to the final DEL H1 in seg3, H1 is effectively deleted.
#
# compaction of only seg3:
# PUT H1 gets dropped because it is not needed any more.
# DEL H1 must be kept, because there is still a PUT H1 in seg1 which must not
# "reappear" in the index if the index gets rebuilt.
get_latest_segment = self.repository.io.get_latest_segment
self.repository.put(H(1), b'1')
# This is the segment with our original PUT of interest
put_segment = get_latest_segment()
self.repository.commit(compact=False)

# We now put H(1) again (which implicitly does DEL(H(1)) followed by PUT(H(1), ...)),
# delete H(1) afterwards, and force this segment to not be compacted, which can happen
# if it's not sparse enough (symbolized by H(2) here).
self.repository.put(H(1), b'1')
self.repository.delete(H(1))
self.repository.put(H(2), b'1')
delete_segment = get_latest_segment()

# We pretend these are mostly dense (not sparse) and won't be compacted
del self.repository.compact[put_segment]
del self.repository.compact[delete_segment]

self.repository.commit(compact=True)

# Now we perform an unrelated operation on the segment containing the DELETE,
# causing it to be compacted.
self.repository.delete(H(2))
self.repository.commit(compact=True)

assert self.repository.io.segment_exists(put_segment)
assert not self.repository.io.segment_exists(delete_segment)

# Basic case, since the index survived this must be ok
assert H(1) not in self.repository
# Nuke index, force replay
os.unlink(os.path.join(self.repository.path, 'index.%d' % get_latest_segment()))
# Must not reappear
assert H(1) not in self.repository

def test_shadow_index_rollback(self):
self.repository.put(H(1), b'1')
self.repository.delete(H(1))
Expand Down
Loading