11from hashlib import (
22 sha1 ,
33)
4+ from itertools import (
5+ accumulate ,
6+ )
47import math
58from typing import (
69 Any ,
1821
1922from azul import (
2023 R ,
24+ cached_property ,
2125)
2226from azul .types import (
2327 JSON ,
@@ -181,43 +185,94 @@ class UUIDPartition(metaclass=UUIDPartitionMeta):
181185 #:
182186 prefix : int
183187
188+ #: The canonical string representation of UUIDs has five groups of
189+ #: hexadecimal digits separated by dash. The first group is eight digits
190+ #: long, the last group twelve and the three groups in between are four
191+ #: digits long. The first and the last group are best suited for a random
192+ #: distribution of v4 v5 UUIDs across partitions. By default, UUID
193+ #: partitions use the first group.
194+ #:
195+ group : int = 0
196+
184197 #: The partition that includes all UUIDs. Since this attribute holds an
185198 #: instance of this class, we can't initialize it here, but have to do so in
186199 #: the metaclass constructor.
187200 #:
188201 root : ClassVar [Self ]
189202
203+ #: The width of each group in bits.
204+ #:
205+ group_lengths : ClassVar [tuple [int , ...]]
206+ group_lengths = tuple (4 * n for n in [8 , 4 , 4 , 4 , 12 ])
207+
208+ #: For each group, the number of bits to right-shift the binary, 128-bit-
209+ #: wide representation of a UUID in order to have the bits of that group
210+ #: become the low-order bits.
211+ #:
212+ group_shifts : ClassVar [tuple [int , ...]]
213+ group_shifts = tuple (accumulate (group_lengths [:- 1 ], initial = 0 ))
214+
190215 def __attrs_post_init__ (self ):
191216 """
192217 >>> UUIDPartition(prefix_length=0, prefix=1)
193218 ... # doctest: +NORMALIZE_WHITESPACE
194219 Traceback (most recent call last):
195220 ...
196221 AssertionError: R('If prefix length is 0, the prefix must be, too',
197- UUIDPartition(prefix_length=0, prefix=1))
222+ UUIDPartition(prefix_length=0, prefix=1, group=0 ))
198223
199224 >>> UUIDPartition(prefix_length=1, prefix=3)
200225 ... # doctest: +NORMALIZE_WHITESPACE
201226 Traceback (most recent call last):
202227 ...
203228 AssertionError: R('Prefix has extra high-order bits set',
204- UUIDPartition(prefix_length=1, prefix=3))
229+ UUIDPartition(prefix_length=1, prefix=3, group=0))
230+
231+ >>> UUIDPartition(prefix_length=1, prefix=0, group=5)
232+ ... # doctest: +NORMALIZE_WHITESPACE
233+ Traceback (most recent call last):
234+ ...
235+ AssertionError: R('Invalid group',
236+ UUIDPartition(prefix_length=1, prefix=0, group=5))
237+
238+ >>> UUIDPartition(prefix_length=1, prefix=0, group=-1)
239+ ... # doctest: +NORMALIZE_WHITESPACE
240+ Traceback (most recent call last):
241+ ...
242+ AssertionError: R('Invalid group',
243+ UUIDPartition(prefix_length=1, prefix=0, group=-1))
205244
245+ >>> UUIDPartition(prefix_length=49, prefix=0, group=4)
246+ Traceback (most recent call last):
247+ ...
248+ AssertionError: R('Length of prefix exceeds that of group', 49, 48)
249+
250+ >>> UUIDPartition(prefix_length=17, prefix=0, group=1)
251+ Traceback (most recent call last):
252+ ...
253+ AssertionError: R('Length of prefix exceeds that of group', 17, 16)
206254 """
207255 assert self .prefix_length != 0 or self .prefix == 0 , R (
208256 'If prefix length is 0, the prefix must be, too' , self )
257+ assert 0 <= self .group < len (self .group_shifts ), R (
258+ 'Invalid group' , self )
259+ group_length = self .group_lengths [self .group ]
260+ assert self .prefix_length <= group_length , R (
261+ 'Length of prefix exceeds that of group' , self .prefix_length , group_length )
209262 assert 0 <= self .prefix < 2 ** self .prefix_length , R (
210263 'Prefix has extra high-order bits set' , self )
211264
212265 @classmethod
213266 def from_json (cls , json : JSON ) -> Self :
214267 return cls (prefix_length = json_int (json ['prefix_length' ]),
215- prefix = json_int (json ['prefix' ]))
268+ prefix = json_int (json ['prefix' ]),
269+ group = json_int (json ['group' ]))
216270
217271 def to_json (self ) -> MutableJSON :
218272 return {
219273 'prefix_length' : self .prefix_length ,
220- 'prefix' : self .prefix
274+ 'prefix' : self .prefix ,
275+ 'group' : self .group
221276 }
222277
223278 def contains (self , member : UUID ) -> bool :
@@ -229,10 +284,30 @@ def contains(self, member: UUID) -> bool:
229284 True
230285 >>> p.contains(UUID('ffd4524e-14c4-41d7-9071-6cadab09d75c'))
231286 True
287+
288+ >>> p = UUIDPartition(prefix_length=5, prefix=0b0110_0, group=4)
289+ >>> p.contains(UUID('fdd4524e-14c4-41d7-9071-66adab09d75c'))
290+ True
291+ >>> p.contains(UUID('fdd4524e-14c4-41d7-9071-67adab09d75c'))
292+ True
293+ >>> p.contains(UUID('fdd4524e-14c4-41d7-9071-68adab09d75c'))
294+ False
295+
296+ >>> p = UUIDPartition(prefix_length=48, prefix=0x68adab09d75c, group=4)
297+ >>> p.contains(UUID('fdd4524e-14c4-41d7-9071-68adab09d75c'))
298+ True
299+ >>> p.contains(UUID('fdd4524e-14c4-41d7-9071-68adab09d75d'))
300+ False
232301 """
233- # UUIDs are 128 bit integers
234- shift = 128 - self .prefix_length
235- return member .int >> shift == self .prefix
302+ mask , shift = self ._mask_and_shift
303+ return (member .int & mask ) >> shift == self .prefix
304+
305+ @cached_property
306+ def _mask_and_shift (self ) -> tuple [int , int ]:
307+ group_shift = self .group_shifts [self .group ]
308+ shift = 128 - self .prefix_length - group_shift
309+ mask = (1 << (128 - group_shift )) - 1
310+ return mask , shift
236311
237312 def divide (self , num_divisions : int ) -> list [Self ]:
238313 """
@@ -250,18 +325,24 @@ def divide(self, num_divisions: int) -> list[Self]:
250325
251326 >>> sorted(UUIDPartition.root.divide(3))
252327 ... # doctest: +NORMALIZE_WHITESPACE
253- [UUIDPartition(prefix_length=2, prefix=0),
254- UUIDPartition(prefix_length=2, prefix=1),
255- UUIDPartition(prefix_length=2, prefix=2),
256- UUIDPartition(prefix_length=2, prefix=3)]
328+ [UUIDPartition(prefix_length=2, prefix=0, group=0),
329+ UUIDPartition(prefix_length=2, prefix=1, group=0),
330+ UUIDPartition(prefix_length=2, prefix=2, group=0),
331+ UUIDPartition(prefix_length=2, prefix=3, group=0)]
332+
333+ >>> UUIDPartition(prefix_length=2, prefix=0, group=4).divide(2)
334+ ... # doctest: +NORMALIZE_WHITESPACE
335+ [UUIDPartition(prefix_length=3, prefix=0, group=4),
336+ UUIDPartition(prefix_length=3, prefix=1, group=4)]
257337 """
258338 assert num_divisions > 0 , R ('Number of divisions must be 1 or more' )
259339 prefix_length = math .ceil (math .log2 (num_divisions ))
260340 num_divisions = 2 ** prefix_length
261341 cls = type (self )
262342 return [
263343 cls (prefix_length = self .prefix_length + prefix_length ,
264- prefix = (self .prefix << prefix_length ) + prefix )
344+ prefix = (self .prefix << prefix_length ) + prefix ,
345+ group = self .group )
265346 for prefix in range (num_divisions )
266347 ]
267348
@@ -273,28 +354,28 @@ def __str__(self) -> str:
273354 returned by this function.
274355
275356 >>> str(UUIDPartition.root)
276- '-'
357+ '-@0 '
277358
278359 0b1111_1110 == 0xfe
279360 0b1111_1111 == 0xff
280- >>> str(UUIDPartition(prefix_length=7, prefix=0b1111_111))
281- 'fe-ff'
361+ >>> str(UUIDPartition(prefix_length=7, prefix=0b1111_111, group=4 ))
362+ 'fe-ff@4 '
282363
283364 Leading zeroes in the high and low end of the range:
284365
285366 0b0000_1110 == 0x0e
286367 0b0000_1111 == 0x0f
287- >>> str(UUIDPartition(prefix_length=7, prefix=0b0000_111))
288- '0e-0f'
368+ >>> str(UUIDPartition(prefix_length=7, prefix=0b0000_111, group=4 ))
369+ '0e-0f@4 '
289370
290371 A partition twice as big (a binary prefix that's one bit shorter):
291372
292373 0b0000_1100 = 0x0c
293374 0b0000_1101 = 0x0d
294375 0b0000_1110 = 0x0e
295376 0b0000_1111 = 0x0f
296- >>> str(UUIDPartition(prefix_length=6, prefix=0b0000_11))
297- '0c-0f'
377+ >>> str(UUIDPartition(prefix_length=6, prefix=0b0000_11, group=4 ))
378+ '0c-0f@4 '
298379 """
299380 shift = 4 - self .prefix_length % 4 # shift to align at nibble boundary
300381 all_ones = (1 << shift ) - 1
@@ -306,7 +387,7 @@ def __str__(self) -> str:
306387 def hex (i ):
307388 return format (i , f'0{ hex_len } x' )[:hex_len ]
308389
309- return '-' . join ( map ( hex , (lo , hi )))
390+ return f' { hex (lo ) } - { hex ( hi )} @ { self . group } '
310391
311392
312393def uuid5_for_bytes (namespace : UUID , name : bytes ) -> UUID :
0 commit comments