Skip to content

Commit bcb7440

Browse files
authored
Add functions to skip forward/backward with an iterator, and use it to simplify roaring_bitmap_range_uint32_array (#741)
* feat: add roaring_uint32_iterator_skip{,_backward} This allows efficently skipping over items in an iterator. * refactor: implement roaring_bitmap_range_uint32_array with new function Greatly simplify the implementation of roaring_uint32_iterator_skip using `roaring_uint32_iterator_skip`. This also eliminates an unessisary allocation, and possible failure. For backward compat, `roaring_bitmap_range_uint32_array` still returns a bool, but it always returns true since it can no longer fail.
1 parent d9b4c86 commit bcb7440

File tree

8 files changed

+599
-90
lines changed

8 files changed

+599
-90
lines changed

include/roaring/containers/containers.h

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2477,6 +2477,40 @@ bool container_iterator_read_into_uint64(const container_t *c, uint8_t typecode,
24772477
uint32_t count, uint32_t *consumed,
24782478
uint16_t *value_out);
24792479

2480+
/**
2481+
* Skips the next `skip_count` entries in the container iterator. Returns true
2482+
* and sets `value_out` if a value is present after skipping. Returns false if
2483+
* the end of the container is reached during the skip operation. Sets
2484+
* consumed_count to the number of values actually skipped (which may be less
2485+
* than skip_count if the end of the container is reached).
2486+
*
2487+
* value_out must be initialized to the previous value yielded by the iterator.
2488+
*
2489+
* skip_count must be greater than zero.
2490+
*/
2491+
bool container_iterator_skip(const container_t *c, uint8_t typecode,
2492+
roaring_container_iterator_t *it,
2493+
uint32_t skip_count, uint32_t *consumed_count,
2494+
uint16_t *value_out);
2495+
2496+
/**
2497+
* Skips the previous `skip_count` entries in the container iterator (moves
2498+
* backwards). Returns true and sets `value_out` if a value is present after
2499+
* skipping backwards. Returns false if the beginning of the container is
2500+
* reached during the skip operation. Sets consumed_count to the number of
2501+
* values actually skipped backwards (which may be less than skip_count if
2502+
* the beginning of the container is reached).
2503+
*
2504+
* value_out must be initialized to the current value yielded by the iterator.
2505+
*
2506+
* skip_count must be greater than zero.
2507+
*/
2508+
bool container_iterator_skip_backward(const container_t *c, uint8_t typecode,
2509+
roaring_container_iterator_t *it,
2510+
uint32_t skip_count,
2511+
uint32_t *consumed_count,
2512+
uint16_t *value_out);
2513+
24802514
#ifdef __cplusplus
24812515
}
24822516
}

include/roaring/roaring.h

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -563,7 +563,11 @@ bool roaring_bitmap_to_bitset(const roaring_bitmap_t *r, bitset_t *bitset);
563563
*
564564
* ans = malloc(roaring_bitmap_get_cardinality(limit) * sizeof(uint32_t));
565565
*
566-
* Return false in case of failure (e.g., insufficient memory)
566+
* This function always returns `true`
567+
*
568+
* For more control, see `roaring_uint32_iterator_skip` and
569+
* `roaring_uint32_iterator_read`, which can be used to e.g. tell how many
570+
* values were actually read.
567571
*/
568572
bool roaring_bitmap_range_uint32_array(const roaring_bitmap_t *r, size_t offset,
569573
size_t limit, uint32_t *ans);
@@ -1190,7 +1194,7 @@ CROARING_DEPRECATED static inline void roaring_free_uint32_iterator(
11901194
roaring_uint32_iterator_free(it);
11911195
}
11921196

1193-
/*
1197+
/**
11941198
* Reads next ${count} values from iterator into user-supplied ${buf}.
11951199
* Returns the number of read elements.
11961200
* This number can be smaller than ${count}, which means that iterator is
@@ -1210,6 +1214,30 @@ CROARING_DEPRECATED static inline uint32_t roaring_read_uint32_iterator(
12101214
return roaring_uint32_iterator_read(it, buf, count);
12111215
}
12121216

1217+
/**
1218+
* Skip the next ${count} values from iterator.
1219+
* Returns the number of values actually skipped.
1220+
* The number can be smaller than ${count}, which means that iterator is
1221+
* drained.
1222+
*
1223+
* This function is equivalent to calling `roaring_uint32_iterator_advance()`
1224+
* ${count} times but is much more efficient.
1225+
*/
1226+
uint32_t roaring_uint32_iterator_skip(roaring_uint32_iterator_t *it,
1227+
uint32_t count);
1228+
1229+
/**
1230+
* Skip the previous ${count} values from iterator (move backwards).
1231+
* Returns the number of values actually skipped backwards.
1232+
* The number can be smaller than ${count}, which means that iterator reached
1233+
* the beginning.
1234+
*
1235+
* This function is equivalent to calling `roaring_uint32_iterator_previous()`
1236+
* ${count} times but is much more efficient.
1237+
*/
1238+
uint32_t roaring_uint32_iterator_skip_backward(roaring_uint32_iterator_t *it,
1239+
uint32_t count);
1240+
12131241
#ifdef __cplusplus
12141242
}
12151243
}

include/roaring/roaring_array.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -199,9 +199,6 @@ inline void ra_replace_key_and_container_at_index(roaring_array_t *ra,
199199
// write set bits to an array
200200
void ra_to_uint32_array(const roaring_array_t *ra, uint32_t *ans);
201201

202-
bool ra_range_uint32_array(const roaring_array_t *ra, size_t offset,
203-
size_t limit, uint32_t *ans);
204-
205202
/**
206203
* write a bitmap to a buffer. This is meant to be compatible with
207204
* the

src/containers/containers.c

Lines changed: 199 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -706,6 +706,205 @@ bool container_iterator_read_into_uint64(const container_t *c, uint8_t typecode,
706706
}
707707
}
708708

709+
bool container_iterator_skip(const container_t *c, uint8_t typecode,
710+
roaring_container_iterator_t *it,
711+
uint32_t skip_count, uint32_t *consumed_count,
712+
uint16_t *value_out) {
713+
uint32_t actually_skipped;
714+
bool has_value;
715+
skip_count = minimum_uint32(skip_count, (uint32_t)UINT16_MAX + 1);
716+
switch (typecode) {
717+
case ARRAY_CONTAINER_TYPE: {
718+
const array_container_t *ac = const_CAST_array(c);
719+
actually_skipped =
720+
minimum_uint32(ac->cardinality - it->index, skip_count);
721+
it->index += actually_skipped;
722+
has_value = it->index < ac->cardinality;
723+
if (has_value) {
724+
*value_out = ac->array[it->index];
725+
}
726+
break;
727+
}
728+
case BITSET_CONTAINER_TYPE: {
729+
const bitset_container_t *bc = const_CAST_bitset(c);
730+
731+
uint32_t remaining_skip = skip_count;
732+
uint32_t current_index = it->index;
733+
uint64_t word_mask = UINT64_MAX << (current_index % 64);
734+
has_value = false;
735+
736+
for (uint32_t word_index = current_index / 64;
737+
word_index < BITSET_CONTAINER_SIZE_IN_WORDS; word_index++) {
738+
uint64_t word = bc->words[word_index] & word_mask;
739+
word_mask = ~0; // Only apply mask for the first word
740+
741+
uint32_t bits_in_word = roaring_hamming(word);
742+
if (bits_in_word > remaining_skip) {
743+
// Unset the lowest bit `remaining_skip` times
744+
for (; remaining_skip > 0; --remaining_skip) {
745+
word &= word - 1;
746+
}
747+
has_value = true;
748+
*value_out = it->index =
749+
roaring_trailing_zeroes(word) + word_index * 64;
750+
break;
751+
}
752+
// Skip all set bits in this word
753+
remaining_skip -= bits_in_word;
754+
}
755+
actually_skipped = skip_count - remaining_skip;
756+
break;
757+
}
758+
case RUN_CONTAINER_TYPE: {
759+
const run_container_t *rc = const_CAST_run(c);
760+
761+
uint16_t current_value = *value_out;
762+
uint32_t remaining_skip = skip_count;
763+
int32_t run_index;
764+
765+
// Process skips by iterating through runs
766+
for (run_index = it->index;
767+
remaining_skip > 0 && run_index < rc->n_runs; run_index++) {
768+
// max value (inclusive) in current run
769+
uint32_t run_max_inc =
770+
rc->runs[run_index].value + rc->runs[run_index].length;
771+
// Max to skip in this run (we can skip from the current value
772+
// to the last value in the run, plus one to move past this run)
773+
uint32_t max_skip_this_run = run_max_inc - current_value + 1;
774+
uint32_t consume =
775+
minimum_uint32(remaining_skip, max_skip_this_run);
776+
remaining_skip -= consume;
777+
if (consume < max_skip_this_run) {
778+
current_value += consume;
779+
break;
780+
}
781+
// Skip past the end of this run, to the next if there is one
782+
if (run_index + 1 < rc->n_runs) {
783+
current_value = rc->runs[run_index + 1].value;
784+
}
785+
}
786+
787+
// Update final state
788+
it->index = run_index;
789+
actually_skipped = skip_count - remaining_skip;
790+
has_value = run_index < rc->n_runs;
791+
if (has_value) {
792+
*value_out = current_value;
793+
}
794+
break;
795+
}
796+
default:
797+
assert(false);
798+
roaring_unreachable;
799+
return false;
800+
}
801+
*consumed_count = actually_skipped;
802+
return has_value;
803+
}
804+
805+
bool container_iterator_skip_backward(const container_t *c, uint8_t typecode,
806+
roaring_container_iterator_t *it,
807+
uint32_t skip_count,
808+
uint32_t *consumed_count,
809+
uint16_t *value_out) {
810+
uint32_t actually_skipped;
811+
bool has_value;
812+
skip_count = minimum_uint32(skip_count, (uint32_t)UINT16_MAX + 1);
813+
switch (typecode) {
814+
case ARRAY_CONTAINER_TYPE: {
815+
const array_container_t *ac = const_CAST_array(c);
816+
// Allow skipping back to -1
817+
actually_skipped = minimum_uint32(it->index + 1, skip_count);
818+
it->index -= actually_skipped;
819+
has_value = it->index >= 0;
820+
if (has_value) {
821+
*value_out = ac->array[it->index];
822+
}
823+
break;
824+
}
825+
case BITSET_CONTAINER_TYPE: {
826+
const bitset_container_t *bc = const_CAST_bitset(c);
827+
828+
uint32_t remaining_skip = skip_count;
829+
uint32_t current_index = it->index;
830+
uint64_t word_mask = UINT64_MAX >> (63 - (current_index % 64));
831+
has_value = false;
832+
833+
// Start from the word containing current index and go backwards
834+
for (int32_t word_index = current_index / 64; word_index >= 0;
835+
word_index--) {
836+
uint64_t word = bc->words[word_index] & word_mask;
837+
word_mask = ~0; // Only apply mask for the first word
838+
839+
uint32_t bits_in_word = roaring_hamming(word);
840+
if (bits_in_word > remaining_skip) {
841+
// Unset the highest bit `remaining_skip` times
842+
for (; remaining_skip > 0; --remaining_skip) {
843+
uint64_t high_bit =
844+
UINT64_C(1) << (63 - roaring_leading_zeroes(word));
845+
// Clear the highest set bit
846+
word &= ~high_bit;
847+
}
848+
has_value = true;
849+
*value_out = it->index =
850+
(63 - roaring_leading_zeroes(word)) + word_index * 64;
851+
break;
852+
}
853+
// Skip all set bits in this word
854+
remaining_skip -= bits_in_word;
855+
}
856+
actually_skipped = skip_count - remaining_skip;
857+
break;
858+
}
859+
case RUN_CONTAINER_TYPE: {
860+
const run_container_t *rc = const_CAST_run(c);
861+
862+
uint16_t current_value = *value_out;
863+
uint32_t remaining_skip = skip_count;
864+
int32_t run_index;
865+
866+
// Process skips by iterating through runs backwards
867+
for (run_index = it->index; remaining_skip > 0 && run_index >= 0;
868+
run_index--) {
869+
// min value (inclusive) in current run
870+
uint32_t run_min_inc = rc->runs[run_index].value;
871+
// Max to skip in this run (we can skip from the current value
872+
// back to the first value in the run, plus one to move before
873+
// this run)
874+
uint32_t max_skip_this_run = current_value - run_min_inc + 1;
875+
uint32_t consume =
876+
minimum_uint32(remaining_skip, max_skip_this_run);
877+
remaining_skip -= consume;
878+
if (consume < max_skip_this_run) {
879+
current_value -= consume;
880+
break;
881+
}
882+
// Skip past the beginning of this run, to the previous if there
883+
// is one
884+
if (run_index - 1 >= 0) {
885+
current_value = rc->runs[run_index - 1].value +
886+
rc->runs[run_index - 1].length;
887+
}
888+
}
889+
890+
// Update final state
891+
it->index = run_index;
892+
actually_skipped = skip_count - remaining_skip;
893+
has_value = run_index >= 0;
894+
if (has_value) {
895+
*value_out = current_value;
896+
}
897+
break;
898+
}
899+
default:
900+
assert(false);
901+
roaring_unreachable;
902+
return false;
903+
}
904+
*consumed_count = actually_skipped;
905+
return has_value;
906+
}
907+
709908
#ifdef __cplusplus
710909
}
711910
}

src/roaring.c

Lines changed: 59 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1430,7 +1430,13 @@ void roaring_bitmap_to_uint32_array(const roaring_bitmap_t *r, uint32_t *ans) {
14301430

14311431
bool roaring_bitmap_range_uint32_array(const roaring_bitmap_t *r, size_t offset,
14321432
size_t limit, uint32_t *ans) {
1433-
return ra_range_uint32_array(&r->high_low_container, offset, limit, ans);
1433+
roaring_uint32_iterator_t it;
1434+
roaring_iterator_init(r, &it);
1435+
roaring_uint32_iterator_skip(&it, offset);
1436+
roaring_uint32_iterator_read(&it, ans, limit);
1437+
1438+
// This function always succeeds
1439+
return true;
14341440
}
14351441

14361442
/** convert array and bitmap containers to run containers when it is more
@@ -1863,15 +1869,67 @@ uint32_t roaring_uint32_iterator_read(roaring_uint32_iterator_t *it,
18631869
if (has_value) {
18641870
it->has_value = true;
18651871
it->current_value = it->highbits | low16;
1872+
// If the container still has values, we must have stopped because
1873+
// we skipped enough values.
1874+
assert(ret == count);
1875+
return ret;
1876+
}
1877+
it->container_index++;
1878+
it->has_value = loadfirstvalue(it);
1879+
}
1880+
return ret;
1881+
}
1882+
1883+
uint32_t roaring_uint32_iterator_skip(roaring_uint32_iterator_t *it,
1884+
uint32_t count) {
1885+
uint32_t ret = 0;
1886+
while (it->has_value && ret < count) {
1887+
uint32_t consumed;
1888+
uint16_t low16 = (uint16_t)it->current_value;
1889+
bool has_value = container_iterator_skip(it->container, it->typecode,
1890+
&it->container_it, count - ret,
1891+
&consumed, &low16);
1892+
ret += consumed;
1893+
if (has_value) {
1894+
it->has_value = true;
1895+
it->current_value = it->highbits | low16;
1896+
// If the container still has values, we must have stopped because
1897+
// we skipped enough values.
18661898
assert(ret == count);
18671899
return ret;
18681900
}
1901+
// We have skipped over all items in the current container, so set
1902+
// ourselves at the first item of the next container.
1903+
// We do NOT need to count another item skipped here.
18691904
it->container_index++;
18701905
it->has_value = loadfirstvalue(it);
18711906
}
18721907
return ret;
18731908
}
18741909

1910+
uint32_t roaring_uint32_iterator_skip_backward(roaring_uint32_iterator_t *it,
1911+
uint32_t count) {
1912+
uint32_t ret = 0;
1913+
while (it->has_value && ret < count) {
1914+
uint32_t consumed;
1915+
uint16_t low16 = (uint16_t)it->current_value;
1916+
bool has_value = container_iterator_skip_backward(
1917+
it->container, it->typecode, &it->container_it, count - ret,
1918+
&consumed, &low16);
1919+
ret += consumed;
1920+
if (has_value) {
1921+
it->has_value = true;
1922+
it->current_value = it->highbits | low16;
1923+
return ret;
1924+
}
1925+
// We have skipped over all items in the current container backwards.
1926+
// Moving to the previous container counts as consuming one more skip.
1927+
it->container_index--;
1928+
it->has_value = loadlastvalue(it);
1929+
}
1930+
return ret;
1931+
}
1932+
18751933
void roaring_uint32_iterator_free(roaring_uint32_iterator_t *it) {
18761934
roaring_free(it);
18771935
}

0 commit comments

Comments
 (0)