Skip to content

Commit 33f823d

Browse files
extract: --skip-errors ignores corrupted chunks (w/ log message), see #840
Forward port of a change implemented by @enkore back in 2016: enkore@09b21b1
1 parent 80c08ab commit 33f823d

File tree

4 files changed

+72
-14
lines changed

4 files changed

+72
-14
lines changed

setup.cfg

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,7 @@ per_file_ignores =
126126
src/borg/archiver/debug_cmd.py:F405
127127
src/borg/archiver/delete_cmd.py:F405
128128
src/borg/archiver/diff_cmd.py:F405
129+
src/borg/archiver/extract_cmd.py:F405
129130
src/borg/archiver/help_cmd.py:E501,F405
130131
src/borg/archiver/key_cmds.py:F405
131132
src/borg/archiver/prune_cmd.py:F405

src/borg/archive.py

Lines changed: 39 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -791,6 +791,7 @@ def extract_item(
791791
stripped_components=0,
792792
original_path=None,
793793
pi=None,
794+
skip_integrity_errors=False,
794795
):
795796
"""
796797
Extract archive item.
@@ -804,6 +805,8 @@ def extract_item(
804805
:param stripped_components: stripped leading path components to correct hard link extraction
805806
:param original_path: 'path' key as stored in archive
806807
:param pi: ProgressIndicatorPercent (or similar) for file extraction progress (in bytes)
808+
:param skip_integrity_errors: skip over corrupted chunks instead of raising IntegrityError
809+
(ignored for dry_run and stdout)
807810
"""
808811
has_damaged_chunks = "chunks_healthy" in item
809812
if dry_run or stdout:
@@ -832,7 +835,7 @@ def extract_item(
832835
)
833836
if has_damaged_chunks:
834837
raise BackupError("File has damaged (all-zero) chunks. Try running borg check --repair.")
835-
return
838+
return True
836839

837840
original_path = original_path or item.path
838841
dest = self.cwd
@@ -867,15 +870,38 @@ def make_parent(path):
867870
fd = open(path, "wb")
868871
with fd:
869872
ids = [c.id for c in item.chunks]
870-
for data in self.pipeline.fetch_many(ids, is_preloaded=True):
873+
chunk_index = -1
874+
chunk_iterator = self.pipeline.fetch_many(ids, is_preloaded=True)
875+
skipped_errors = False
876+
while True:
877+
try:
878+
chunk_index += 1
879+
data = next(chunk_iterator)
880+
except StopIteration:
881+
break
882+
except IntegrityError as err:
883+
if not skip_integrity_errors:
884+
raise
885+
c = item.chunks[chunk_index]
886+
size = c.size
887+
logger.warning("%s: chunk %s: %s", remove_surrogates(item.path), bin_to_hex(c.id), err)
888+
with backup_io("seek"):
889+
fd.seek(size, 1)
890+
skipped_errors = True
891+
# restart chunk data generator
892+
ids = [c.id for c in item.chunks[chunk_index + 1 :]]
893+
chunk_iterator = self.pipeline.fetch_many(ids, is_preloaded=True)
894+
else:
895+
with backup_io("write"):
896+
size = len(data)
897+
if sparse and zeros.startswith(data):
898+
# all-zero chunk: create a hole in a sparse file
899+
fd.seek(size, 1)
900+
else:
901+
fd.write(data)
871902
if pi:
872-
pi.show(increase=len(data), info=[remove_surrogates(item.path)])
873-
with backup_io("write"):
874-
if sparse and zeros.startswith(data):
875-
# all-zero chunk: create a hole in a sparse file
876-
fd.seek(len(data), 1)
877-
else:
878-
fd.write(data)
903+
pi.show(increase=size, info=[remove_surrogates(item.path)])
904+
879905
with backup_io("truncate_and_attrs"):
880906
pos = item_chunks_size = fd.tell()
881907
fd.truncate(pos)
@@ -889,7 +915,7 @@ def make_parent(path):
889915
)
890916
if has_damaged_chunks:
891917
raise BackupError("File has damaged (all-zero) chunks. Try running borg check --repair.")
892-
return
918+
return not skipped_errors
893919
with backup_io:
894920
# No repository access beyond this point.
895921
if stat.S_ISDIR(mode):
@@ -914,18 +940,19 @@ def make_parent(path):
914940
make_parent(path)
915941
with self.extract_helper(item, path, hlm) as hardlink_set:
916942
if hardlink_set:
917-
return
943+
return True
918944
os.mkfifo(path)
919945
self.restore_attrs(path, item)
920946
elif stat.S_ISCHR(mode) or stat.S_ISBLK(mode):
921947
make_parent(path)
922948
with self.extract_helper(item, path, hlm) as hardlink_set:
923949
if hardlink_set:
924-
return
950+
return True
925951
os.mknod(path, item.mode, item.rdev)
926952
self.restore_attrs(path, item)
927953
else:
928954
raise Exception("Unknown archive item type %r" % item.mode)
955+
return True
929956

930957
def restore_attrs(self, path, item, symlink=False, fd=None):
931958
"""

src/borg/archiver/extract_cmd.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ def do_extract(self, args, repository, manifest, archive):
3939
progress = args.progress
4040
output_list = args.output_list
4141
dry_run = args.dry_run
42+
skip_errors = args.skip_errors
4243
stdout = args.stdout
4344
sparse = args.sparse
4445
strip_components = args.strip_components
@@ -75,15 +76,17 @@ def do_extract(self, args, repository, manifest, archive):
7576
dirs.append(item)
7677
archive.extract_item(item, stdout=stdout, restore_attrs=False)
7778
else:
78-
archive.extract_item(
79+
if not archive.extract_item(
7980
item,
8081
stdout=stdout,
8182
sparse=sparse,
8283
hlm=hlm,
8384
stripped_components=strip_components,
8485
original_path=orig_path,
8586
pi=pi,
86-
)
87+
skip_integrity_errors=skip_errors,
88+
):
89+
self.exit_code = EXIT_WARNING
8790
except (BackupOSError, BackupError) as e:
8891
self.print_warning("%s: %s", remove_surrogates(orig_path), e)
8992

@@ -174,6 +177,13 @@ def build_parser_extract(self, subparsers, common_parser, mid_common_parser):
174177
action="store_true",
175178
help="create holes in output sparse file from all-zero chunks",
176179
)
180+
subparser.add_argument(
181+
"--skip-errors",
182+
dest="skip_errors",
183+
action="store_true",
184+
help="skip corrupted chunks with a log message (exit 1) instead of aborting "
185+
"(no effect for --dry-run and --stdout)",
186+
)
177187
subparser.add_argument("name", metavar="NAME", type=archivename_validator, help="specify the archive name")
178188
subparser.add_argument(
179189
"paths", metavar="PATH", nargs="*", type=str, help="paths to extract; patterns are supported"

src/borg/testsuite/archiver/extract_cmd.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -585,6 +585,26 @@ def test_overwrite(self):
585585
with changedir("output"):
586586
self.cmd(f"--repo={self.repository_location}", "extract", "test", exit_code=1)
587587

588+
def test_extract_skip_errors(self):
589+
self.create_regular_file("file1", contents=b"a" * 280 + b"b" * 280)
590+
self.cmd(f"--repo={self.repository_location}", "rcreate", "-e" "none")
591+
self.cmd(f"--repo={self.repository_location}", "create", "--chunker-params", "7,9,8,128", "test", "input")
592+
segment_files = sorted(os.listdir(os.path.join(self.repository_path, "data", "0")), reverse=True)
593+
print(
594+
", ".join(
595+
f"{fn}: {os.stat(os.path.join(self.repository_path, 'data', '0', fn)).st_size}b" for fn in segment_files
596+
)
597+
)
598+
name = segment_files[3] # must be the segment file that has the file's chunks
599+
with open(os.path.join(self.repository_path, "data", "0", name), "r+b") as fd:
600+
fd.seek(100)
601+
fd.write(b"XXXX")
602+
with changedir("output"):
603+
output = self.cmd(f"--repo={self.repository_location}", "extract", "--skip-errors", "test", exit_code=1)
604+
assert "input/file1: chunk" in output
605+
assert os.stat("input/file1").st_size == 560
606+
self.cmd(f"--repo={self.repository_location}", "check", exit_code=1)
607+
588608
# derived from test_extract_xattrs_errors()
589609
@pytest.mark.skipif(
590610
not xattr.XATTR_FAKEROOT, reason="xattr not supported on this system or on this version of fakeroot"

0 commit comments

Comments
 (0)