Skip to content

Commit 3bff0c3

Browse files
Merge pull request #9024 from ThomasWaldmann/transfer-corrupts-src-repo
fix borg transfer corrupting the src repo index
2 parents 5b85f47 + 4a0b71c commit 3bff0c3

File tree

4 files changed

+73
-39
lines changed

4 files changed

+73
-39
lines changed

src/borg/hashindex.pyx

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,10 @@ class NSIndex1(HTProxyMixin, MutableMapping):
201201
used = len(self.ht)
202202
header_bytes = struct.pack(self.HEADER_FMT, self.MAGIC, used, used, self.KEY_SIZE, self.VALUE_SIZE)
203203
fd.write(header_bytes)
204+
# record the header as a separate integrity-hash part if supported
205+
hash_part = getattr(fd, "hash_part", None)
206+
if hash_part:
207+
hash_part("HashHeader")
204208
count = 0
205209
for key, _ in self.ht.items():
206210
value = self.ht._get_raw(key)
@@ -214,6 +218,10 @@ class NSIndex1(HTProxyMixin, MutableMapping):
214218
header_bytes = fd.read(header_size)
215219
if len(header_bytes) < header_size:
216220
raise ValueError(f"Invalid file, file is too short (header).")
221+
# verify the header as a separate integrity-hash part if supported
222+
hash_part = getattr(fd, "hash_part", None)
223+
if hash_part:
224+
hash_part("HashHeader")
217225
magic, entries, buckets, ksize, vsize = struct.unpack(self.HEADER_FMT, header_bytes)
218226
if magic != self.MAGIC:
219227
raise ValueError(f"Invalid file, magic {self.MAGIC.decode()} not found.")
@@ -228,6 +236,10 @@ class NSIndex1(HTProxyMixin, MutableMapping):
228236
for i in range(buckets):
229237
key = fd.read(ksize)
230238
value = fd.read(vsize)
239+
if value.startswith(b'\xFF\xFF\xFF\xFF'): # LE for 0xffffffff (empty/unused bucket)
240+
continue
241+
if value.startswith(b'\xFE\xFF\xFF\xFF'): # LE for 0xfffffffe (deleted/tombstone bucket)
242+
continue
231243
self.ht._set_raw(key, value)
232244
pos = fd.tell()
233245
assert pos == end_of_file

src/borg/legacyrepository.py

Lines changed: 5 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -515,23 +515,13 @@ def _read_integrity(self, transaction_id, key):
515515
return
516516
return integrity[key]
517517

518-
def open_index(self, transaction_id, auto_recover=True):
518+
def open_index(self, transaction_id):
519519
if transaction_id is None:
520520
return NSIndex1()
521521
index_path = os.path.join(self.path, "index.%d" % transaction_id)
522522
integrity_data = self._read_integrity(transaction_id, "index")
523-
try:
524-
with IntegrityCheckedFile(index_path, write=False, integrity_data=integrity_data) as fd:
525-
return NSIndex1.read(fd)
526-
except (ValueError, OSError, FileIntegrityError) as exc:
527-
logger.warning("Repository index missing or corrupted, trying to recover from: %s", exc)
528-
os.unlink(index_path)
529-
if not auto_recover:
530-
raise
531-
self.prepare_txn(self.get_transaction_id())
532-
# don't leave an open transaction around
533-
self.commit(compact=False)
534-
return self.open_index(self.get_transaction_id())
523+
with IntegrityCheckedFile(index_path, write=False, integrity_data=integrity_data) as fd:
524+
return NSIndex1.read(fd)
535525

536526
def _unpack_hints(self, transaction_id):
537527
hints_path = os.path.join(self.path, "hints.%d" % transaction_id)
@@ -560,11 +550,11 @@ def prepare_txn(self, transaction_id, do_cleanup=True):
560550
raise
561551
if not self.index or transaction_id is None:
562552
try:
563-
self.index = self.open_index(transaction_id, auto_recover=False)
553+
self.index = self.open_index(transaction_id)
564554
except (ValueError, OSError, FileIntegrityError) as exc:
565555
logger.warning("Checking repository transaction due to previous error: %s", exc)
566556
self.check_transaction()
567-
self.index = self.open_index(transaction_id, auto_recover=False)
557+
self.index = self.open_index(transaction_id)
568558
if transaction_id is None:
569559
self.segments = {} # XXX bad name: usage_count_of_segment_x = self.segments[x]
570560
self.compact = FreeSpace() # XXX bad name: freeable_space_of_segment_x = self.compact[x]

src/borg/testsuite/archiver/transfer_cmd_test.py

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import glob
12
import hashlib
23
import json
34
import os
@@ -469,3 +470,58 @@ def test_transfer_rechunk(archivers, request, monkeypatch):
469470
# Verify that the file hash is identical to the source
470471
assert item.path in source_file_hashes, f"File {item.path} not found in source archive"
471472
assert dest_hash == source_file_hashes[item.path], f"Content hash mismatch for {item.path}"
473+
474+
475+
def test_issue_9022(archivers, request, monkeypatch):
476+
"""
477+
Regression test for borgbackup/borg#9022: After "borg transfer --from-borg1",
478+
the source Borg 1.x repository index must not be changed.
479+
"""
480+
archiver = request.getfixturevalue(archivers)
481+
if archiver.get_kind() in ["remote", "binary"]:
482+
pytest.skip("only works locally")
483+
484+
# Prepare source (borg 1.2) repo from tarball next to this test file
485+
repo12_tar = os.path.join(os.path.dirname(__file__), "repo12.tar.gz")
486+
487+
original_location = archiver.repository_location
488+
extract_dir = f"{original_location}1"
489+
os.makedirs(extract_dir)
490+
with tarfile.open(repo12_tar) as tf:
491+
tf.extractall(extract_dir)
492+
493+
def index_meta(repo_path):
494+
index_files = sorted(glob.glob(os.path.join(repo_path, "index.*")))
495+
assert len(index_files) == 1, f"Expected exactly 1 index file before transfer, found {len(index_files)}"
496+
st = os.stat(index_files[0])
497+
# Return (mtime_ns, size, inode). Use fallbacks where attributes may not exist on some platforms.
498+
mtime_ns = getattr(st, "st_mtime_ns", int(st.st_mtime * 1e9))
499+
inode = getattr(st, "st_ino", None)
500+
return (mtime_ns, st.st_size, inode)
501+
502+
# Record pre-transfer index file metadata
503+
pre_meta = index_meta(extract_dir)
504+
505+
other_repo1 = f"--other-repo={original_location}1"
506+
507+
# Destination repo where we transfer to (borg 2 repo)
508+
archiver.repository_location = f"{original_location}2"
509+
510+
# Set passphrases: repo12 testdata uses "waytooeasyonlyfortests"
511+
monkeypatch.setenv("BORG_PASSPHRASE", "pw2")
512+
monkeypatch.setenv("BORG_OTHER_PASSPHRASE", "waytooeasyonlyfortests")
513+
# For this test, we must not weaken KDF, otherwise borg2 couldn't decrypt the borg1 key
514+
os.environ["BORG_TESTONLY_WEAKEN_KDF"] = "0"
515+
516+
# Create destination repo and run transfer from borg1 source
517+
cmd(archiver, "repo-create", RK_ENCRYPTION, other_repo1, "--from-borg1")
518+
cmd(archiver, "transfer", other_repo1, "--from-borg1")
519+
520+
# After transfer, ensure the source borg1 index file looks valid and unchanged.
521+
post_meta = index_meta(extract_dir)
522+
523+
assert post_meta == pre_meta, (
524+
f"Index file metadata changed after transfer!\n"
525+
f"Before: mtime_ns={pre_meta[0]}, size={pre_meta[1]}, inode={pre_meta[2]}\n"
526+
f"After: mtime_ns={post_meta[0]}, size={post_meta[1]}, inode={post_meta[2]}"
527+
)

src/borg/testsuite/legacyrepository_test.py

Lines changed: 0 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -571,21 +571,6 @@ def test_unreadable_hints(repository):
571571
do_commit(repository)
572572

573573

574-
def test_index(repository):
575-
make_auxiliary(repository)
576-
with open(os.path.join(repository.path, "index.1"), "wb") as fd:
577-
fd.write(b"123456789")
578-
do_commit(repository)
579-
580-
581-
def test_index_outside_transaction(repository):
582-
make_auxiliary(repository)
583-
with open(os.path.join(repository.path, "index.1"), "wb") as fd:
584-
fd.write(b"123456789")
585-
with repository:
586-
assert len(repository) == 1
587-
588-
589574
def _corrupt_index(repository):
590575
# HashIndex is able to detect incorrect headers and file lengths,
591576
# but on its own it can't tell if the data is correct.
@@ -601,15 +586,6 @@ def _corrupt_index(repository):
601586
fd.write(corrupted_index_data)
602587

603588

604-
def test_index_corrupted(repository):
605-
make_auxiliary(repository)
606-
_corrupt_index(repository)
607-
with repository:
608-
# data corruption is detected due to mismatching checksums, and fixed by rebuilding the index.
609-
assert len(repository) == 1
610-
assert pdchunk(repository.get(H(0))) == b"foo"
611-
612-
613589
def test_index_corrupted_without_integrity(repository):
614590
make_auxiliary(repository)
615591
_corrupt_index(repository)

0 commit comments

Comments
 (0)