Skip to content

Commit b0b3c6b

Browse files
buzhash64: optional rolling hash encryption with AES-128
Encrypting the rolling hash should make it better resist against attacks. use aes128-ecb (slow, not optimized)
1 parent 07183f1 commit b0b3c6b

File tree

7 files changed

+164
-12
lines changed

7 files changed

+164
-12
lines changed

setup.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -179,13 +179,18 @@ def lib_ext_kwargs(pc, prefix_env_var, lib_name, lib_pkg_name, pc_version, lib_s
179179
dict(sources=[platform_linux_source], libraries=["acl"], extra_compile_args=cflags)
180180
)
181181

182+
# Configure buzhash64 with OpenSSL support
183+
buzhash64_ext_kwargs = members_appended(
184+
dict(sources=[buzhash64_source]), crypto_ext_lib, dict(extra_compile_args=cflags)
185+
)
186+
182187
ext_modules += [
183188
Extension("borg.crypto.low_level", **crypto_ext_kwargs),
184189
Extension("borg.compress", **compress_ext_kwargs),
185190
Extension("borg.hashindex", [hashindex_source], extra_compile_args=cflags),
186191
Extension("borg.item", [item_source], extra_compile_args=cflags),
187192
Extension("borg.chunkers.buzhash", [buzhash_source], extra_compile_args=cflags),
188-
Extension("borg.chunkers.buzhash64", [buzhash64_source], extra_compile_args=cflags),
193+
Extension("borg.chunkers.buzhash64", **buzhash64_ext_kwargs),
189194
Extension("borg.chunkers.reader", [reader_source], extra_compile_args=cflags),
190195
Extension("borg.checksums", **checksums_ext_kwargs),
191196
]

src/borg/archiver/benchmark_cmd.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -153,8 +153,15 @@ def chunkit(ch):
153153
),
154154
# note: the buzhash64 chunker creation is rather slow, so we must keep it in setup
155155
(
156-
"buzhash64,19,23,21,4095",
157-
"ch = get_chunker('buzhash64', 19, 23, 21, 4095, sparse=False)",
156+
"buzhash64,19,23,21,4095,enc=0",
157+
"ch = get_chunker('buzhash64', 19, 23, 21, 4095, sparse=False, do_encrypt=False)",
158+
"chunkit(ch)",
159+
locals(),
160+
),
161+
# note: the buzhash64 chunker creation is rather slow, so we must keep it in setup
162+
(
163+
"buzhash64,19,23,21,4095,enc=1",
164+
"ch = get_chunker('buzhash64', 19, 23, 21, 4095, sparse=False, do_encrypt=True)",
158165
"chunkit(ch)",
159166
locals(),
160167
),

src/borg/chunkers/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
def get_chunker(algo, *params, **kw):
1111
key = kw.get("key", None)
1212
sparse = kw.get("sparse", False)
13+
do_encrypt = kw.get("do_encrypt", 0) # 0 is much faster, 1 is more secure
1314
# key.chunk_seed only has 32 bits
1415
seed = key.chunk_seed if key is not None else 0
1516
# for buzhash64, we want a much longer key, so we derive it from the id key
@@ -19,7 +20,7 @@ def get_chunker(algo, *params, **kw):
1920
if algo == "buzhash":
2021
return Chunker(seed, *params, sparse=sparse)
2122
if algo == "buzhash64":
22-
return ChunkerBuzHash64(bh64_key, *params, sparse=sparse)
23+
return ChunkerBuzHash64(bh64_key, *params, sparse=sparse, do_encrypt=do_encrypt)
2324
if algo == "fixed":
2425
return ChunkerFixed(*params, sparse=sparse)
2526
if algo == "fail":

src/borg/chunkers/buzhash64.pyx

Lines changed: 121 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,39 @@ import time
88
from cpython.bytes cimport PyBytes_AsString
99
from libc.stdint cimport uint8_t, uint64_t
1010
from libc.stdlib cimport malloc, free
11-
from libc.string cimport memcpy, memmove
11+
from libc.string cimport memcpy, memmove, memset
1212

1313
from ..crypto.low_level import CSPRNG
1414

1515
from ..constants import CH_DATA, CH_ALLOC, CH_HOLE, zeros
1616
from .reader import FileReader, Chunk
1717

18+
# OpenSSL imports for AES encryption
19+
cdef extern from "openssl/evp.h":
20+
ctypedef struct EVP_CIPHER:
21+
pass
22+
ctypedef struct EVP_CIPHER_CTX:
23+
pass
24+
ctypedef struct ENGINE:
25+
pass
26+
27+
const EVP_CIPHER * EVP_aes_128_ecb()
28+
29+
EVP_CIPHER_CTX *EVP_CIPHER_CTX_new()
30+
void EVP_CIPHER_CTX_free(EVP_CIPHER_CTX *a)
31+
32+
int EVP_EncryptInit_ex(EVP_CIPHER_CTX *ctx, const EVP_CIPHER *cipher, ENGINE *impl,
33+
const unsigned char *key, const unsigned char *iv) nogil
34+
int EVP_DecryptInit_ex(EVP_CIPHER_CTX *ctx, const EVP_CIPHER *cipher, ENGINE *impl,
35+
const unsigned char *key, const unsigned char *iv) nogil
36+
int EVP_EncryptUpdate(EVP_CIPHER_CTX *ctx, unsigned char *out, int *outl,
37+
const unsigned char *in_, int inl) nogil
38+
int EVP_DecryptUpdate(EVP_CIPHER_CTX *ctx, unsigned char *out, int *outl,
39+
const unsigned char *in_, int inl) nogil
40+
int EVP_EncryptFinal_ex(EVP_CIPHER_CTX* ctx, unsigned char* out, int* outl) nogil
41+
int EVP_DecryptFinal_ex(EVP_CIPHER_CTX* ctx, unsigned char* out, int* outl) nogil
42+
int EVP_CIPHER_CTX_set_padding(EVP_CIPHER_CTX *ctx, int pad) nogil
43+
1844
# Cyclic polynomial / buzhash
1945
#
2046
# https://en.wikipedia.org/wiki/Rolling_hash
@@ -117,7 +143,11 @@ cdef class ChunkerBuzHash64:
117143
cdef size_t reader_block_size
118144
cdef bint sparse
119145

120-
def __cinit__(self, bytes key, int chunk_min_exp, int chunk_max_exp, int hash_mask_bits, int hash_window_size, bint sparse=False):
146+
# optional AES encryption for rolling hash based chunking decision
147+
cdef bint do_encrypt
148+
cdef Crypter crypter
149+
150+
def __cinit__(self, bytes key, int chunk_min_exp, int chunk_max_exp, int hash_mask_bits, int hash_window_size, bint sparse=False, bint do_encrypt=False):
121151
min_size = 1 << chunk_min_exp
122152
max_size = 1 << chunk_max_exp
123153
assert max_size <= len(zeros)
@@ -143,6 +173,10 @@ cdef class ChunkerBuzHash64:
143173
self.reader_block_size = 1024 * 1024
144174
self.sparse = sparse
145175

176+
self.do_encrypt = do_encrypt
177+
if do_encrypt:
178+
self.crypter = Crypter(key[:16])
179+
146180
def __dealloc__(self):
147181
"""Free the chunker's resources."""
148182
if self.table != NULL:
@@ -188,11 +222,12 @@ cdef class ChunkerBuzHash64:
188222

189223
cdef object process(self) except *:
190224
"""Process the chunker's buffer and return the next chunk."""
191-
cdef uint64_t sum, chunk_mask = self.chunk_mask
225+
cdef uint64_t sum, esum, chunk_mask = self.chunk_mask
192226
cdef size_t n, old_last, min_size = self.min_size, window_size = self.window_size
193227
cdef uint8_t* p
194228
cdef uint8_t* stop_at
195229
cdef size_t did_bytes
230+
cdef bint do_encrypt = self.do_encrypt
196231

197232
if self.done:
198233
if self.bytes_read == self.bytes_yielded:
@@ -223,13 +258,15 @@ cdef class ChunkerBuzHash64:
223258
self.position += min_size
224259
self.remaining -= min_size
225260
sum = _buzhash64(self.data + self.position, window_size, self.table)
261+
esum = self.crypter.encrypt64(sum) if do_encrypt else sum
226262

227-
while self.remaining > window_size and (sum & chunk_mask) and not (self.eof and self.remaining <= window_size):
263+
while self.remaining > window_size and (esum & chunk_mask) and not (self.eof and self.remaining <= window_size):
228264
p = self.data + self.position
229265
stop_at = p + self.remaining - window_size
230266

231-
while p < stop_at and (sum & chunk_mask):
267+
while p < stop_at and (esum & chunk_mask):
232268
sum = _buzhash64_update(sum, p[0], p[window_size], window_size, self.table)
269+
esum = self.crypter.encrypt64(sum) if do_encrypt else sum
233270
p += 1
234271

235272
did_bytes = p - (self.data + self.position)
@@ -318,3 +355,82 @@ def buzhash64_get_table(bytes key):
318355
return [table[i] for i in range(256)]
319356
finally:
320357
free(table)
358+
359+
360+
cdef class Crypter:
361+
"""AES128-ECB wrapper"""
362+
cdef EVP_CIPHER_CTX * ctx
363+
cdef const EVP_CIPHER * cipher
364+
cdef uint8_t key[16]
365+
366+
def __init__(self, bytes key):
367+
assert len(key) == 16, "bad key size"
368+
self.key = key[:16]
369+
self.ctx = EVP_CIPHER_CTX_new()
370+
if self.ctx == NULL:
371+
raise MemoryError("Failed to create cipher context")
372+
self.cipher = EVP_aes_128_ecb()
373+
374+
def __dealloc__(self):
375+
if self.ctx != NULL:
376+
EVP_CIPHER_CTX_free(self.ctx)
377+
378+
@cython.boundscheck(False)
379+
@cython.wraparound(False)
380+
cdef inline int encrypt(self, const uint8_t *plaintext, uint8_t *ciphertext):
381+
cdef int out_len, final_len
382+
if EVP_EncryptInit_ex(self.ctx, self.cipher, NULL, <const uint8_t *> <char *> self.key, NULL) != 1:
383+
return 1
384+
if EVP_CIPHER_CTX_set_padding(self.ctx, 0) != 1:
385+
return 2
386+
if EVP_EncryptUpdate(self.ctx, ciphertext, &out_len, plaintext, 16) != 1:
387+
return 3
388+
if out_len != 16:
389+
return 4
390+
if EVP_EncryptFinal_ex(self.ctx, ciphertext + out_len, &final_len) != 1:
391+
return 5
392+
if final_len != 0:
393+
return 6
394+
return 0 # OK
395+
396+
@cython.boundscheck(False)
397+
@cython.wraparound(False)
398+
cdef int decrypt(self, uint8_t *ciphertext, uint8_t *plaintext):
399+
cdef int out_len, final_len
400+
if EVP_DecryptInit_ex(self.ctx, self.cipher, NULL, <const uint8_t *> <char *> self.key, NULL) != 1:
401+
return 1
402+
if EVP_CIPHER_CTX_set_padding(self.ctx, 0) != 1:
403+
return 2
404+
if EVP_DecryptUpdate(self.ctx, plaintext, &out_len, ciphertext, 16) != 1:
405+
return 3
406+
if out_len != 16:
407+
return 4
408+
if EVP_DecryptFinal_ex(self.ctx, plaintext + out_len, &final_len) != 1:
409+
return 5
410+
if final_len != 0:
411+
return 6
412+
return 0
413+
414+
cdef inline uint64_t encrypt64(self, uint64_t v):
415+
cdef uint64_t plaintext[2], ciphertext[2]
416+
plaintext[0] = v
417+
plaintext[1] = 0 # or v?
418+
rc = self.encrypt(<uint8_t *>plaintext, <uint8_t *>ciphertext)
419+
assert rc == 0, f"encrypt failed with rc={rc}"
420+
return ciphertext[0] # ^ ciphertext[1]?
421+
422+
def encrypt_bytes(self, bytes plaintext) -> bytes: # Python callable for tests
423+
cdef uint8_t _plaintext[16], _ciphertext[16]
424+
assert len(plaintext) == 16, "invalid plaintext length"
425+
_plaintext = plaintext[:16]
426+
rc = self.encrypt(_plaintext, _ciphertext)
427+
assert rc == 0, f"encrypt failed with rc={rc}"
428+
return _ciphertext[:16]
429+
430+
def decrypt_bytes(self, bytes ciphertext) -> bytes: # Python callable for tests
431+
cdef uint8_t _ciphertext[16], _plaintext[16]
432+
assert len(ciphertext) == 16, "invalid ciphertext length"
433+
_ciphertext = ciphertext[:16]
434+
rc = self.decrypt(_ciphertext, _plaintext)
435+
assert rc == 0, f"decrypt failed with rc={rc}"
436+
return _plaintext[:16]

src/borg/testsuite/chunkers/buzhash64_self_test.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33

44
from io import BytesIO
55

6-
from ...chunkers import get_chunker
76
from ...chunkers.buzhash64 import buzhash64, buzhash64_update, ChunkerBuzHash64
87
from ...constants import * # NOQA
98
from ...helpers import hex_to_bin
@@ -78,6 +77,10 @@ def read(self, nbytes):
7877
self.input = self.input[:-1]
7978
return self.input[:1]
8079

81-
chunker = get_chunker(*CHUNKER64_PARAMS, sparse=False)
80+
# Explicitly create the chunker with the same parameters as CHUNKER64_PARAMS
81+
# but also specify do_encrypt=True.
82+
chunker = ChunkerBuzHash64(
83+
b"0" * 32, CHUNK_MIN_EXP, CHUNK_MAX_EXP, HASH_MASK_BITS, HASH_WINDOW_SIZE, sparse=False, do_encrypt=True
84+
)
8285
reconstructed = b"".join(cf(chunker.chunkify(SmallReadFile())))
8386
assert reconstructed == b"a" * 20

src/borg/testsuite/chunkers/buzhash64_test.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ def twist(size):
3838
for maskbits in (4, 7, 10, 12):
3939
for key in (key0, key1):
4040
fh = BytesIO(data)
41-
chunker = ChunkerBuzHash64(key, minexp, maxexp, maskbits, winsize)
41+
chunker = ChunkerBuzHash64(key, minexp, maxexp, maskbits, winsize, do_encrypt=False)
4242
chunks = [H(c) for c in cf(chunker.chunkify(fh, -1))]
4343
runs.append(H(b"".join(chunks)))
4444

src/borg/testsuite/crypto/crypto_test.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -332,3 +332,23 @@ def test_derive_key_from_different_keys(self):
332332
derived_key_from_id = key.derive_key(salt=salt, domain=domain, size=size, from_id_key=True)
333333
derived_key_from_crypt = key.derive_key(salt=salt, domain=domain, size=size, from_id_key=False)
334334
assert derived_key_from_id != derived_key_from_crypt
335+
336+
337+
def test_chunker_buzhash64_encryption():
338+
"""Test the encryption functionality."""
339+
from ...chunkers.buzhash64 import Crypter
340+
341+
key = b"0123456789ABCDEF"
342+
assert len(key) == 16
343+
c = Crypter(key)
344+
345+
plaintext = b"abcdef0123456789"
346+
assert len(plaintext) == 16
347+
348+
ciphertext = c.encrypt_bytes(plaintext)
349+
assert len(ciphertext) == 16
350+
351+
decrypted = c.decrypt_bytes(ciphertext)
352+
assert len(decrypted) == 16
353+
354+
assert decrypted == plaintext

0 commit comments

Comments
 (0)