diff --git a/src/ailego/buffer/buffer_pool.cc b/src/ailego/buffer/buffer_pool.cc new file mode 100644 index 00000000..bdbf0a03 --- /dev/null +++ b/src/ailego/buffer/buffer_pool.cc @@ -0,0 +1,239 @@ +#include +#include + +namespace zvec { +namespace ailego { + +int LRUCache::init(size_t block_size) { + block_size_ = block_size; + for (size_t i = 0; i < CATCH_QUEUE_NUM; i++) { + queues_.push_back(ConcurrentQueue(block_size)); + } + return 0; +} + +bool LRUCache::evict_single_block(BlockType &item) { + bool found = false; + for (size_t i = 0; i < CATCH_QUEUE_NUM; i++) { + found = queues_[i].try_dequeue(item); + if (found) { + break; + } + } + return found; +} + +bool LRUCache::add_single_block(const LPMap *lp_map, const BlockType &block, + int block_type) { + bool ok = queues_[block_type].try_enqueue(block); + evict_queue_insertions_.fetch_add(1, std::memory_order_relaxed); + if (evict_queue_insertions_ % block_size_ == 0) { + this->clear_dead_node(lp_map); + } + return ok; +} + +void LRUCache::clear_dead_node(const LPMap *lp_map) { + for (int i = 0; i < CATCH_QUEUE_NUM; i++) { + int clear_size = block_size_ * 2; + if (queues_[i].size_approx() < clear_size * 4) { + continue; + } + int clear_count = 0; + ConcurrentQueue tmp(block_size_); + BlockType item; + while (queues_[i].try_dequeue(item) && (clear_count++ < clear_size)) { + if (!lp_map->isDeadBlock(item)) { + tmp.try_enqueue(item); + } + } + while (tmp.try_dequeue(item)) { + if (!lp_map->isDeadBlock(item)) { + queues_[i].try_enqueue(item); + } + } + } +} + +void LPMap::init(size_t entry_num) { + if (entries_) { + delete[] entries_; + } + entry_num_ = entry_num; + entries_ = new Entry[entry_num_]; + for (size_t i = 0; i < entry_num_; i++) { + entries_[i].ref_count.store(std::numeric_limits::min()); + entries_[i].load_count.store(0); + entries_[i].buffer = nullptr; + } + cache_.init(entry_num * 4); +} + +char *LPMap::acquire_block(block_id_t block_id) { + assert(block_id < entry_num_); + Entry &entry = entries_[block_id]; + if (entry.ref_count.load(std::memory_order_relaxed) == 0) { + entry.load_count.fetch_add(1, std::memory_order_relaxed); + } + entry.ref_count.fetch_add(1, std::memory_order_relaxed); + if (entry.ref_count.load(std::memory_order_relaxed) < 0) { + return nullptr; + } + return entry.buffer; +} + +void LPMap::release_block(block_id_t block_id) { + assert(block_id < entry_num_); + Entry &entry = entries_[block_id]; + + if (entry.ref_count.fetch_sub(1, std::memory_order_release) == 1) { + std::atomic_thread_fence(std::memory_order_acquire); + LRUCache::BlockType block; + block.first = block_id; + block.second = entry.load_count.load(); + cache_.add_single_block(this, block, 0); + } +} + +char *LPMap::evict_block(block_id_t block_id) { + assert(block_id < entry_num_); + Entry &entry = entries_[block_id]; + int expected = 0; + if (entry.ref_count.compare_exchange_strong( + expected, std::numeric_limits::min())) { + char *buffer = entry.buffer; + entry.buffer = nullptr; + return buffer; + } else { + return nullptr; + } +} + +char *LPMap::set_block_acquired(block_id_t block_id, char *buffer) { + assert(block_id < entry_num_); + Entry &entry = entries_[block_id]; + if (entry.ref_count.load(std::memory_order_relaxed) >= 0) { + entry.ref_count.fetch_add(1, std::memory_order_relaxed); + return entry.buffer; + } + entry.buffer = buffer; + entry.ref_count.store(1, std::memory_order_relaxed); + entry.load_count.fetch_add(1, std::memory_order_relaxed); + return buffer; +} + +void LPMap::recycle(moodycamel::ConcurrentQueue &free_buffers) { + LRUCache::BlockType block; + do { + bool ok = cache_.evict_single_block(block); + if (!ok) { + return; + } + } while (isDeadBlock(block)); + char *buffer = evict_block(block.first); + if (buffer) { + free_buffers.try_enqueue(buffer); + } +} + +VecBufferPool::VecBufferPool(const std::string &filename) { + fd_ = open(filename.c_str(), O_RDONLY); + if (fd_ < 0) { + throw std::runtime_error("Failed to open file: " + filename); + } + struct stat st; + if (fstat(fd_, &st) < 0) { + throw std::runtime_error("Failed to stat file: " + filename); + } + file_size_ = st.st_size; +} + +int VecBufferPool::init(size_t pool_capacity, size_t block_size) { + pool_capacity_ = pool_capacity; + size_t buffer_num = pool_capacity_ / block_size + 10; + size_t block_num = file_size_ / block_size + 10; + lp_map_.init(block_num); + for (size_t i = 0; i < buffer_num; i++) { + char *buffer = (char *)aligned_alloc(64, block_size); + if (buffer != nullptr) { + bool ok = free_buffers_.try_enqueue(buffer); + } + } + LOG_DEBUG("Buffer pool num: %zu, entry num: %zu", buffer_num, + lp_map_.entry_num()); + return 0; +} + +VecBufferPoolHandle VecBufferPool::get_handle() { + return VecBufferPoolHandle(*this); +} + +char *VecBufferPool::acquire_buffer(block_id_t block_id, size_t offset, + size_t size, int retry) { + char *buffer = lp_map_.acquire_block(block_id); + if (buffer) { + return buffer; + } + { + bool found = free_buffers_.try_dequeue(buffer); + if (!found) { + for (int i = 0; i < retry; i++) { + lp_map_.recycle(free_buffers_); + found = free_buffers_.try_dequeue(buffer); + if (found) { + break; + } + } + } + if (!found) { + LOG_ERROR("Buffer pool failed to get free buffer"); + return nullptr; + } + } + + ssize_t read_bytes = pread(fd_, buffer, size, offset); + if (read_bytes != static_cast(size)) { + LOG_ERROR("Buffer pool failed to read file at offset: %zu", offset); + return nullptr; + } + char *placed_buffer = nullptr; + { + std::lock_guard lock(mutex_); + placed_buffer = lp_map_.set_block_acquired(block_id, buffer); + } + if (placed_buffer != buffer) { + // another thread has set the block + free_buffers_.try_enqueue(buffer); + } + return placed_buffer; +} + +int VecBufferPool::get_meta(size_t offset, size_t length, char *buffer) { + ssize_t read_bytes = pread(fd_, buffer, length, offset); + if (read_bytes != static_cast(length)) { + LOG_ERROR("Buffer pool failed to read file at offset: %zu", offset); + return -1; + } + return 0; +} + +char *VecBufferPoolHandle::get_block(size_t offset, size_t size, + size_t block_id) { + char *buffer = pool.acquire_buffer(block_id, offset, size, 5); + return buffer; +} + +int VecBufferPoolHandle::get_meta(size_t offset, size_t length, char *buffer) { + return pool.get_meta(offset, length, buffer); +} + +void VecBufferPoolHandle::release_one(block_id_t block_id) { + pool.lp_map_.release_block(block_id); +} + +void VecBufferPoolHandle::acquire_one(block_id_t block_id) { + pool.lp_map_.acquire_block(block_id); +} + +} // namespace ailego +} // namespace zvec \ No newline at end of file diff --git a/src/core/algorithm/flat/flat_streamer_context.h b/src/core/algorithm/flat/flat_streamer_context.h index 24cfd9e5..22a1106a 100644 --- a/src/core/algorithm/flat/flat_streamer_context.h +++ b/src/core/algorithm/flat/flat_streamer_context.h @@ -190,10 +190,18 @@ class FlatStreamerContext : public IndexStreamer::Context { group_topk_heaps_.clear(); } - void reset() override {} + void reset() override { + for (auto &it : results_) { + it.clear(); + } + for (auto &it : group_results_) { + it.clear(); + } + } //! Reset the context void reset(const FlatStreamer *owner) { + this->reset(); magic_ = owner->magic(); feature_size_ = owner->meta().element_size(); diff --git a/src/core/algorithm/hnsw/hnsw_context.h b/src/core/algorithm/hnsw/hnsw_context.h index 22bcfaad..e776b81a 100644 --- a/src/core/algorithm/hnsw/hnsw_context.h +++ b/src/core/algorithm/hnsw/hnsw_context.h @@ -335,6 +335,7 @@ class HnswContext : public IndexContext { //! Reset context void reset(void) override { + this->clear(); set_filter(nullptr); reset_threshold(); set_fetch_vector(false); @@ -422,6 +423,9 @@ class HnswContext : public IndexContext { for (auto &it : results_) { it.clear(); } + for (auto &it : group_results_) { + it.clear(); + } } uint32_t *mutable_stats_get_neighbors() { diff --git a/src/core/algorithm/hnsw/hnsw_entity.h b/src/core/algorithm/hnsw/hnsw_entity.h index e5f2077f..70ea3dcc 100644 --- a/src/core/algorithm/hnsw/hnsw_entity.h +++ b/src/core/algorithm/hnsw/hnsw_entity.h @@ -147,8 +147,7 @@ struct Neighbors { Neighbors(uint32_t cnt_in, const node_id_t *data_in) : cnt{cnt_in}, data{data_in} {} - Neighbors(IndexStorage::MemoryBlock &&mem_block) - : neighbor_block{std::move(mem_block)} { + Neighbors(IndexStorage::MemoryBlock &mem_block) : neighbor_block{mem_block} { auto hd = reinterpret_cast(neighbor_block.data()); cnt = hd->neighbor_cnt; data = hd->neighbors; diff --git a/src/core/algorithm/hnsw/hnsw_streamer_entity.cc b/src/core/algorithm/hnsw/hnsw_streamer_entity.cc index feafa573..734f11f1 100644 --- a/src/core/algorithm/hnsw/hnsw_streamer_entity.cc +++ b/src/core/algorithm/hnsw/hnsw_streamer_entity.cc @@ -127,7 +127,7 @@ const Neighbors HnswStreamerEntity::get_neighbors(level_t level, LOG_ERROR("Read neighbor header failed, ret=%zu", size); return Neighbors(); } - return Neighbors(std::move(neighbor_block)); + return Neighbors(neighbor_block); } //! Get vector data by key diff --git a/src/core/interface/index.cc b/src/core/interface/index.cc index 038f67d4..72005bc9 100644 --- a/src/core/interface/index.cc +++ b/src/core/interface/index.cc @@ -406,8 +406,9 @@ int Index::Search(const VectorData &vector_data, } // dense support refiner, but sparse doesn't + int ret = 0; if (search_param->refiner_param == nullptr) { - return _dense_search(vector_data, search_param, result, context); + ret = _dense_search(vector_data, search_param, result, context); } else { auto &reference_index = search_param->refiner_param->reference_index; if (reference_index == nullptr) { @@ -441,8 +442,10 @@ int Index::Search(const VectorData &vector_data, // TODO: should copy other params? flat_search_param->bf_pks = std::make_shared>(keys); - return reference_index->Search(vector_data, flat_search_param, result); + ret = reference_index->Search(vector_data, flat_search_param, result); } + context->reset(); + return ret; } diff --git a/src/core/utility/buffer_storage.cc b/src/core/utility/buffer_storage.cc index bccf07e2..d339553a 100644 --- a/src/core/utility/buffer_storage.cc +++ b/src/core/utility/buffer_storage.cc @@ -12,8 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include #include -#include +#include +#include #include #include #include @@ -38,9 +40,10 @@ class BufferStorage : public IndexStorage { //! Constructor WrappedSegment(BufferStorage *owner, IndexMapping::Segment *segment, uint64_t segment_header_start_offset, - IndexFormat::MetaHeader *segment_header) + IndexFormat::MetaHeader *segment_header, size_t segment_id) : segment_(segment), owner_(owner), + segment_id_(segment_id), capacity_(static_cast(segment->meta()->data_size + segment->meta()->padding_size)), segment_header_start_offset_(segment_header_start_offset), @@ -77,9 +80,9 @@ class BufferStorage : public IndexStorage { } len = meta->data_size - offset; } - ailego::BufferHandle buffer_handle = - owner_->get_buffer_handle(offset, len); - memmove(buf, (const uint8_t *)buffer_handle.pin_vector_data() + offset, + memmove(buf, + (const uint8_t *)(owner_->get_buffer(offset, len, segment_id_)) + + offset, len); return len; } @@ -95,10 +98,9 @@ class BufferStorage : public IndexStorage { } size_t buffer_offset = segment_header_start_offset_ + segment_header_->content_offset + - segment_->meta()->data_index + offset; - ailego::BufferHandle buffer_handle = - owner_->get_buffer_handle(buffer_offset, len); - *data = buffer_handle.pin_vector_data(); + segment_->meta()->data_index; + *data = + owner_->get_buffer(buffer_offset, capacity_, segment_id_) + offset; return len; } @@ -112,15 +114,16 @@ class BufferStorage : public IndexStorage { } size_t buffer_offset = segment_header_start_offset_ + segment_header_->content_offset + - segment_->meta()->data_index + offset; - data.reset(owner_->get_buffer_handle_ptr(buffer_offset, len)); + segment_->meta()->data_index; + data.reset( + owner_->buffer_pool_handle_.get(), segment_id_, + owner_->get_buffer(buffer_offset, capacity_, segment_id_) + offset); + // data.reset(owner_->get_buffer(buffer_offset, capacity_, segment_id_) + + // offset); if (data.data()) { return len; } else { - LOG_ERROR( - "Buffer handle is null, now used memory: %zu, new: %zu", - (size_t)ailego::BufferManager::Instance().total_size_in_bytes(), - len); + LOG_ERROR("read error."); return -1; } } @@ -150,6 +153,7 @@ class BufferStorage : public IndexStorage { private: BufferStorage *owner_{nullptr}; + size_t segment_id_{}; size_t capacity_{}; uint64_t segment_header_start_offset_; IndexFormat::MetaHeader *segment_header_; @@ -174,28 +178,34 @@ class BufferStorage : public IndexStorage { //! Open storage int open(const std::string &path, bool /*create*/) override { file_name_ = path; - return ParseToMapping(); + buffer_pool_ = std::make_shared(path); + buffer_pool_handle_ = std::make_shared( + buffer_pool_->get_handle()); + int ret = ParseToMapping(); + if (ret != 0) { + return ret; + } + ret = buffer_pool_->init(20lu * 1024 * 1024 * 1024, max_segment_size_); + if (ret != 0) { + return ret; + } + return 0; } - ailego::BufferHandle get_buffer_handle(int offset, int length) { - ailego::BufferID buffer_id = - ailego::BufferID::VectorID(file_name_, offset, length); - return ailego::BufferManager::Instance().acquire(buffer_id); + char *get_buffer(size_t offset, size_t length, size_t block_id) { + return buffer_pool_handle_->get_block(offset, length, block_id); } - ailego::BufferHandle::Pointer get_buffer_handle_ptr(int offset, int length) { - ailego::BufferID buffer_id = - ailego::BufferID::VectorID(file_name_, offset, length); - return ailego::BufferManager::Instance().acquire_ptr(buffer_id); + int get_meta(size_t offset, size_t length, char *out) { + return buffer_pool_handle_->get_meta(offset, length, out); } - int ParseHeader(int offset) { - ailego::BufferHandle header_handle = - get_buffer_handle(offset, sizeof(header_)); - void *buffer = header_handle.pin_vector_data(); + int ParseHeader(size_t offset) { + char *buffer = new char[sizeof(header_)]; + get_meta(offset, sizeof(header_), buffer); uint8_t *header_ptr = reinterpret_cast(buffer); memcpy(&header_, header_ptr, sizeof(header_)); - header_handle.unpin_vector_data(); + delete[] buffer; if (header_.meta_header_size != sizeof(IndexFormat::MetaHeader)) { LOG_ERROR("Header meta size is invalid."); return IndexError_InvalidLength; @@ -208,14 +218,13 @@ class BufferStorage : public IndexStorage { return 0; } - int ParseFooter(int offset) { - ailego::BufferHandle footer_handle = - get_buffer_handle(offset, sizeof(footer_)); - void *buffer = footer_handle.pin_vector_data(); + int ParseFooter(size_t offset) { + char *buffer = new char[sizeof(footer_)]; + get_meta(offset, sizeof(footer_), buffer); uint8_t *footer_ptr = reinterpret_cast(buffer); memcpy(&footer_, footer_ptr, sizeof(footer_)); - footer_handle.unpin_vector_data(); - if (offset < (int)footer_.segments_meta_size) { + delete[] buffer; + if (offset < (size_t)footer_.segments_meta_size) { LOG_ERROR("Footer meta size is invalid."); return IndexError_InvalidLength; } @@ -227,17 +236,16 @@ class BufferStorage : public IndexStorage { return 0; } - int ParseSegment(int offset) { - ailego::BufferHandle segment_start_handle = - get_buffer_handle(offset, footer_.segments_meta_size); - void *segment_buffer = segment_start_handle.pin_vector_data(); - if (ailego::Crc32c::Hash(segment_buffer, footer_.segments_meta_size, 0u) != - footer_.segments_meta_crc) { + int ParseSegment(size_t offset) { + segment_buffer_ = std::make_unique(footer_.segments_meta_size); + get_meta(offset, footer_.segments_meta_size, segment_buffer_.get()); + if (ailego::Crc32c::Hash(segment_buffer_.get(), footer_.segments_meta_size, + 0u) != footer_.segments_meta_crc) { LOG_ERROR("Index segments meta checksum is invalid."); return IndexError_InvalidChecksum; } IndexFormat::SegmentMeta *segment_start = - reinterpret_cast(segment_buffer); + reinterpret_cast(segment_buffer_.get()); uint32_t segment_ids_offset = footer_.segments_meta_size; for (IndexFormat::SegmentMeta *iter = segment_start, *end = segment_start + footer_.segment_count; @@ -255,11 +263,17 @@ class BufferStorage : public IndexStorage { if (iter->segment_id_offset < segment_ids_offset) { segment_ids_offset = iter->segment_id_offset; } + id_hash_.emplace( + std::string(reinterpret_cast(segment_start) + + iter->segment_id_offset), + segments_.size()); segments_.emplace( std::string(reinterpret_cast(segment_start) + iter->segment_id_offset), IndexMapping::SegmentInfo{IndexMapping::Segment{iter}, current_header_start_offset_, &header_}); + max_segment_size_ = + std::max(max_segment_size_, iter->data_size + iter->padding_size); if (sizeof(IndexFormat::SegmentMeta) * footer_.segment_count > footer_.segments_meta_size) { return IndexError_InvalidLength; @@ -358,7 +372,7 @@ class BufferStorage : public IndexStorage { } return std::make_shared( this, &segment_info->segment, segment_info->segment_header_start_offset, - segment_info->segment_header); + segment_info->segment_header, id_hash_[id]); } //! Test if it a segment exists @@ -397,22 +411,14 @@ class BufferStorage : public IndexStorage { //! Initialize index file int init_index(const std::string &path) { - int error_code = mapping_.create(path, segment_meta_capacity_); - if (error_code != 0) { - return error_code; - } - // Add index version - error_code = this->init_version_segment(); + int error_code = this->init_version_segment(); if (error_code != 0) { return error_code; } // Refresh mapping this->refresh_index(0); - - // Close mapping - mapping_.close(); return 0; } @@ -436,6 +442,7 @@ class BufferStorage : public IndexStorage { segments_.clear(); memset(&header_, 0, sizeof(header_)); memset(&footer_, 0, sizeof(footer_)); + segment_buffer_.release(); } //! Append a segment into storage @@ -460,21 +467,20 @@ class BufferStorage : public IndexStorage { } private: - // mmap - uint32_t segment_meta_capacity_{1024 * 1024}; - // bool copy_on_write_{false}; - // bool force_flush_{false}; - // bool memory_locked_{false}; - // bool memory_warmup_{false}; bool index_dirty_{false}; - mutable IndexMapping mapping_{}; mutable std::mutex mapping_mutex_{}; // buffer manager std::string file_name_; - IndexFormat::MetaHeader header_{}; - IndexFormat::MetaFooter footer_{}; + IndexFormat::MetaHeader header_; + IndexFormat::MetaFooter footer_; std::map segments_{}; + std::map id_hash_{}; + uint64_t max_segment_size_{0}; + std::unique_ptr segment_buffer_{nullptr}; + + ailego::VecBufferPool::Pointer buffer_pool_{nullptr}; + ailego::VecBufferPoolHandle::Pointer buffer_pool_handle_{nullptr}; uint64_t current_header_start_offset_{0u}; }; diff --git a/src/include/zvec/ailego/buffer/buffer_pool.h b/src/include/zvec/ailego/buffer/buffer_pool.h new file mode 100644 index 00000000..c27065a2 --- /dev/null +++ b/src/include/zvec/ailego/buffer/buffer_pool.h @@ -0,0 +1,155 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "concurrentqueue.h" + +namespace zvec { +namespace ailego { + +using block_id_t = size_t; +using version_t = size_t; + +class LPMap; + +class LRUCache { + public: + typedef std::pair BlockType; + typedef moodycamel::ConcurrentQueue ConcurrentQueue; + + int init(size_t block_size); + + bool evict_single_block(BlockType &item); + + bool add_single_block(const LPMap *lp_map, const BlockType &block, + int block_type); + + void clear_dead_node(const LPMap *lp_map); + + private: + constexpr static size_t CATCH_QUEUE_NUM = 3; + int block_size_; + std::vector queues_; + alignas(64) std::atomic evict_queue_insertions_{0}; +}; + +class LPMap { + struct Entry { + alignas(64) std::atomic ref_count; + alignas(64) std::atomic load_count; + char *buffer; + }; + + public: + LPMap() : entry_num_(0), entries_(nullptr) {} + ~LPMap() { + delete[] entries_; + } + + void init(size_t entry_num); + + char *acquire_block(block_id_t block_id); + + void release_block(block_id_t block_id); + + // need be called under lock + char *evict_block(block_id_t block_id); + + // need be called under lock + char *set_block_acquired(block_id_t block_id, char *buffer); + + // need be called under lock + void recycle(moodycamel::ConcurrentQueue &free_buffers); + + size_t entry_num() const { + return entry_num_; + } + + bool isDeadBlock(LRUCache::BlockType block) const { + Entry &entry = entries_[block.first]; + return block.second != entry.load_count.load(); + } + + private: + size_t entry_num_{0}; + Entry *entries_{nullptr}; + LRUCache cache_; +}; + +class VecBufferPoolHandle; + +class VecBufferPool { + public: + typedef std::shared_ptr Pointer; + + VecBufferPool(const std::string &filename); + ~VecBufferPool() { + close(fd_); + } + + int init(size_t pool_capacity, size_t block_size); + + VecBufferPoolHandle get_handle(); + + char *acquire_buffer(block_id_t block_id, size_t offset, size_t size, + int retry = 0); + + int get_meta(size_t offset, size_t length, char *buffer); + + size_t file_size() const { + return file_size_; + } + + private: + int fd_; + size_t file_size_; + size_t pool_capacity_; + + public: + LPMap lp_map_; + + private: + std::mutex mutex_; + moodycamel::ConcurrentQueue free_buffers_; +}; + +struct VecBufferPoolHandle { + VecBufferPoolHandle(VecBufferPool &pool) : pool(pool), hit_num_(0) {}; + VecBufferPoolHandle(VecBufferPoolHandle &&other) + : pool(other.pool), hit_num_(other.hit_num_) { + other.hit_num_ = 0; + } + + ~VecBufferPoolHandle() = default; + + typedef std::shared_ptr Pointer; + + char *get_block(size_t offset, size_t size, size_t block_id); + + int get_meta(size_t offset, size_t length, char *buffer); + + void release_one(block_id_t block_id); + + void acquire_one(block_id_t block_id); + + VecBufferPool &pool; + int hit_num_; +}; + +} // namespace ailego +} // namespace zvec \ No newline at end of file diff --git a/src/include/zvec/ailego/buffer/concurrentqueue.h b/src/include/zvec/ailego/buffer/concurrentqueue.h new file mode 100644 index 00000000..f7f3d77e --- /dev/null +++ b/src/include/zvec/ailego/buffer/concurrentqueue.h @@ -0,0 +1,4410 @@ +// Provides a C++11 implementation of a multi-producer, multi-consumer lock-free +// queue. An overview, including benchmark results, is provided here: +// http://moodycamel.com/blog/2014/a-fast-general-purpose-lock-free-queue-for-c++ +// The full design is also described in excruciating detail at: +// http://moodycamel.com/blog/2014/detailed-design-of-a-lock-free-queue + +// Simplified BSD license: +// Copyright (c) 2013-2020, Cameron Desrochers. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// - Redistributions of source code must retain the above copyright notice, this +// list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. + +// Also dual-licensed under the Boost Software License (see LICENSE.md) + +#pragma once + +#if defined(__GNUC__) && !defined(__INTEL_COMPILER) +// Disable -Wconversion warnings (spuriously triggered when Traits::size_t and +// Traits::index_t are set to < 32 bits, causing integer promotion, causing +// warnings upon assigning any computed values) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wconversion" + +#ifdef MCDBGQ_USE_RELACY +#pragma GCC diagnostic ignored "-Wint-to-pointer-cast" +#endif +#endif + +#if defined(_MSC_VER) && (!defined(_HAS_CXX17) || !_HAS_CXX17) +// VS2019 with /W4 warns about constant conditional expressions but unless +// /std=c++17 or higher does not support `if constexpr`, so we have no choice +// but to simply disable the warning +#pragma warning(push) +#pragma warning(disable : 4127) // conditional expression is constant +#endif + +#if defined(__APPLE__) +#include "TargetConditionals.h" +#endif + +#ifdef MCDBGQ_USE_RELACY +#include "relacy/relacy_std.hpp" +#include "relacy_shims.h" +// We only use malloc/free anyway, and the delete macro messes up `= delete` +// method declarations. We'll override the default trait malloc ourselves +// without a macro. +#undef new +#undef delete +#undef malloc +#undef free +#else +#include // Requires C++11. Sorry VS2010. +#include +#endif +#include +#include +#include // for CHAR_BIT +#include // for max_align_t +#include +#include +#include +#include // used for thread exit synchronization +#include // partly for __WINPTHREADS_VERSION if on MinGW-w64 w/ POSIX threading +#include +#include + +// Platform-specific definitions of a numeric thread ID type and an invalid +// value +namespace moodycamel { +namespace details { +template +struct thread_id_converter { + typedef thread_id_t thread_id_numeric_size_t; + typedef thread_id_t thread_id_hash_t; + static thread_id_hash_t prehash(thread_id_t const &x) { + return x; + } +}; +} // namespace details +} // namespace moodycamel +#if defined(MCDBGQ_USE_RELACY) +namespace moodycamel { +namespace details { +typedef std::uint32_t thread_id_t; +static const thread_id_t invalid_thread_id = 0xFFFFFFFFU; +static const thread_id_t invalid_thread_id2 = 0xFFFFFFFEU; +static inline thread_id_t thread_id() { + return rl::thread_index(); +} +} // namespace details +} // namespace moodycamel +#elif defined(_WIN32) || defined(__WINDOWS__) || defined(__WIN32__) +// No sense pulling in windows.h in a header, we'll manually declare the +// function we use and rely on backwards-compatibility for this not to break +extern "C" + __declspec(dllimport) unsigned long __stdcall GetCurrentThreadId(void); +namespace moodycamel { +namespace details { +static_assert(sizeof(unsigned long) == sizeof(std::uint32_t), + "Expected size of unsigned long to be 32 bits on Windows"); +typedef std::uint32_t thread_id_t; +static const thread_id_t invalid_thread_id = + 0; // See http://blogs.msdn.com/b/oldnewthing/archive/2004/02/23/78395.aspx +static const thread_id_t invalid_thread_id2 = + 0xFFFFFFFFU; // Not technically guaranteed to be invalid, but is never used + // in practice. Note that all Win32 thread IDs are presently + // multiples of 4. +static inline thread_id_t thread_id() { + return static_cast(::GetCurrentThreadId()); +} +} // namespace details +} // namespace moodycamel +#elif defined(__arm__) || defined(_M_ARM) || defined(__aarch64__) || \ + (defined(__APPLE__) && TARGET_OS_IPHONE) || defined(__MVS__) || \ + defined(MOODYCAMEL_NO_THREAD_LOCAL) +namespace moodycamel { +namespace details { +static_assert(sizeof(std::thread::id) == 4 || sizeof(std::thread::id) == 8, + "std::thread::id is expected to be either 4 or 8 bytes"); + +typedef std::thread::id thread_id_t; +static const thread_id_t invalid_thread_id; // Default ctor creates invalid ID + +// Note we don't define a invalid_thread_id2 since std::thread::id doesn't have +// one; it's only used if MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED is defined +// anyway, which it won't be. +static inline thread_id_t thread_id() { + return std::this_thread::get_id(); +} + +template +struct thread_id_size {}; +template <> +struct thread_id_size<4> { + typedef std::uint32_t numeric_t; +}; +template <> +struct thread_id_size<8> { + typedef std::uint64_t numeric_t; +}; + +template <> +struct thread_id_converter { + typedef thread_id_size::numeric_t + thread_id_numeric_size_t; +#ifndef __APPLE__ + typedef std::size_t thread_id_hash_t; +#else + typedef thread_id_numeric_size_t thread_id_hash_t; +#endif + + static thread_id_hash_t prehash(thread_id_t const &x) { +#ifndef __APPLE__ + return std::hash()(x); +#else + return *reinterpret_cast(&x); +#endif + } +}; +} +} +#else +// Use a nice trick from this answer: http://stackoverflow.com/a/8438730/21475 +// In order to get a numeric thread ID in a platform-independent way, we use a +// thread-local static variable's address as a thread identifier :-) +#if defined(__GNUC__) || defined(__INTEL_COMPILER) +#define MOODYCAMEL_THREADLOCAL __thread +#elif defined(_MSC_VER) +#define MOODYCAMEL_THREADLOCAL __declspec(thread) +#else +// Assume C++11 compliant compiler +#define MOODYCAMEL_THREADLOCAL thread_local +#endif +namespace moodycamel { +namespace details { +typedef std::uintptr_t thread_id_t; +static const thread_id_t invalid_thread_id = 0; // Address can't be nullptr +static const thread_id_t invalid_thread_id2 = + 1; // Member accesses off a null pointer are also generally invalid. Plus + // it's not aligned. +inline thread_id_t thread_id() { + static MOODYCAMEL_THREADLOCAL int x; + return reinterpret_cast(&x); +} +} +} +#endif + +// Constexpr if +#ifndef MOODYCAMEL_CONSTEXPR_IF +#if (defined(_MSC_VER) && defined(_HAS_CXX17) && _HAS_CXX17) || \ + __cplusplus > 201402L +#define MOODYCAMEL_CONSTEXPR_IF if constexpr +#define MOODYCAMEL_MAYBE_UNUSED [[maybe_unused]] +#else +#define MOODYCAMEL_CONSTEXPR_IF if +#define MOODYCAMEL_MAYBE_UNUSED +#endif +#endif + +// Exceptions +#ifndef MOODYCAMEL_EXCEPTIONS_ENABLED +#if (defined(_MSC_VER) && defined(_CPPUNWIND)) || \ + (defined(__GNUC__) && defined(__EXCEPTIONS)) || \ + (!defined(_MSC_VER) && !defined(__GNUC__)) +#define MOODYCAMEL_EXCEPTIONS_ENABLED +#endif +#endif +#ifdef MOODYCAMEL_EXCEPTIONS_ENABLED +#define MOODYCAMEL_TRY try +#define MOODYCAMEL_CATCH(...) catch (__VA_ARGS__) +#define MOODYCAMEL_RETHROW throw +#define MOODYCAMEL_THROW(expr) throw(expr) +#else +#define MOODYCAMEL_TRY MOODYCAMEL_CONSTEXPR_IF(true) +#define MOODYCAMEL_CATCH(...) else MOODYCAMEL_CONSTEXPR_IF(false) +#define MOODYCAMEL_RETHROW +#define MOODYCAMEL_THROW(expr) +#endif + +#ifndef MOODYCAMEL_NOEXCEPT +#if !defined(MOODYCAMEL_EXCEPTIONS_ENABLED) +#define MOODYCAMEL_NOEXCEPT +#define MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr) true +#define MOODYCAMEL_NOEXCEPT_ASSIGN(type, valueType, expr) true +#elif defined(_MSC_VER) && defined(_NOEXCEPT) && _MSC_VER < 1800 +// VS2012's std::is_nothrow_[move_]constructible is broken and returns true when +// it shouldn't :-( We have to assume *all* non-trivial constructors may throw +// on VS2012! +#define MOODYCAMEL_NOEXCEPT _NOEXCEPT +#define MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr) \ + (std::is_rvalue_reference::value && \ + std::is_move_constructible::value \ + ? std::is_trivially_move_constructible::value \ + : std::is_trivially_copy_constructible::value) +#define MOODYCAMEL_NOEXCEPT_ASSIGN(type, valueType, expr) \ + ((std::is_rvalue_reference::value && \ + std::is_move_assignable::value \ + ? std::is_trivially_move_assignable::value || \ + std::is_nothrow_move_assignable::value \ + : std::is_trivially_copy_assignable::value || \ + std::is_nothrow_copy_assignable::value) && \ + MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr)) +#elif defined(_MSC_VER) && defined(_NOEXCEPT) && _MSC_VER < 1900 +#define MOODYCAMEL_NOEXCEPT _NOEXCEPT +#define MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr) \ + (std::is_rvalue_reference::value && \ + std::is_move_constructible::value \ + ? std::is_trivially_move_constructible::value || \ + std::is_nothrow_move_constructible::value \ + : std::is_trivially_copy_constructible::value || \ + std::is_nothrow_copy_constructible::value) +#define MOODYCAMEL_NOEXCEPT_ASSIGN(type, valueType, expr) \ + ((std::is_rvalue_reference::value && \ + std::is_move_assignable::value \ + ? std::is_trivially_move_assignable::value || \ + std::is_nothrow_move_assignable::value \ + : std::is_trivially_copy_assignable::value || \ + std::is_nothrow_copy_assignable::value) && \ + MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr)) +#else +#define MOODYCAMEL_NOEXCEPT noexcept +#define MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr) noexcept(expr) +#define MOODYCAMEL_NOEXCEPT_ASSIGN(type, valueType, expr) noexcept(expr) +#endif +#endif + +#ifndef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED +#ifdef MCDBGQ_USE_RELACY +#define MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED +#else +// VS2013 doesn't support `thread_local`, and MinGW-w64 w/ POSIX threading has a +// crippling bug: http://sourceforge.net/p/mingw-w64/bugs/445 g++ <=4.7 doesn't +// support thread_local either. Finally, iOS/ARM doesn't have support for it +// either, and g++/ARM allows it to compile but it's unconfirmed to actually +// work +#if (!defined(_MSC_VER) || _MSC_VER >= 1900) && \ + (!defined(__MINGW32__) && !defined(__MINGW64__) || \ + !defined(__WINPTHREADS_VERSION)) && \ + (!defined(__GNUC__) || __GNUC__ > 4 || \ + (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)) && \ + (!defined(__APPLE__) || !TARGET_OS_IPHONE) && !defined(__arm__) && \ + !defined(_M_ARM) && !defined(__aarch64__) && !defined(__MVS__) +// Assume `thread_local` is fully supported in all other C++11 +// compilers/platforms +#define MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED // tentatively enabled for now; + // years ago several users + // report having problems with + // it on +#endif +#endif +#endif + +// VS2012 doesn't support deleted functions. +// In this case, we declare the function normally but don't define it. A link +// error will be generated if the function is called. +#ifndef MOODYCAMEL_DELETE_FUNCTION +#if defined(_MSC_VER) && _MSC_VER < 1800 +#define MOODYCAMEL_DELETE_FUNCTION +#else +#define MOODYCAMEL_DELETE_FUNCTION = delete +#endif +#endif + +namespace moodycamel { +namespace details { +#ifndef MOODYCAMEL_ALIGNAS +// VS2013 doesn't support alignas or alignof, and align() requires a constant +// literal +#if defined(_MSC_VER) && _MSC_VER <= 1800 +#define MOODYCAMEL_ALIGNAS(alignment) __declspec(align(alignment)) +#define MOODYCAMEL_ALIGNOF(obj) __alignof(obj) +#define MOODYCAMEL_ALIGNED_TYPE_LIKE(T, obj) \ + typename details::Vs2013Aligned::value, T>::type +template +struct Vs2013Aligned {}; // default, unsupported alignment +template +struct Vs2013Aligned<1, T> { + typedef __declspec(align(1)) T type; +}; +template +struct Vs2013Aligned<2, T> { + typedef __declspec(align(2)) T type; +}; +template +struct Vs2013Aligned<4, T> { + typedef __declspec(align(4)) T type; +}; +template +struct Vs2013Aligned<8, T> { + typedef __declspec(align(8)) T type; +}; +template +struct Vs2013Aligned<16, T> { + typedef __declspec(align(16)) T type; +}; +template +struct Vs2013Aligned<32, T> { + typedef __declspec(align(32)) T type; +}; +template +struct Vs2013Aligned<64, T> { + typedef __declspec(align(64)) T type; +}; +template +struct Vs2013Aligned<128, T> { + typedef __declspec(align(128)) T type; +}; +template +struct Vs2013Aligned<256, T> { + typedef __declspec(align(256)) T type; +}; +#else +template +struct identity { + typedef T type; +}; +#define MOODYCAMEL_ALIGNAS(alignment) alignas(alignment) +#define MOODYCAMEL_ALIGNOF(obj) alignof(obj) +#define MOODYCAMEL_ALIGNED_TYPE_LIKE(T, obj) \ + alignas(alignof(obj)) typename details::identity::type +#endif +#endif +} // namespace details +} // namespace moodycamel + + +// TSAN can false report races in lock-free code. To enable TSAN to be used +// from projects that use this one, we can apply per-function compile-time +// suppression. See +// https://clang.llvm.org/docs/ThreadSanitizer.html#has-feature-thread-sanitizer +#define MOODYCAMEL_NO_TSAN +#if defined(__has_feature) +#if __has_feature(thread_sanitizer) +#undef MOODYCAMEL_NO_TSAN +#define MOODYCAMEL_NO_TSAN __attribute__((no_sanitize("thread"))) +#endif // TSAN +#endif // TSAN + +// Compiler-specific likely/unlikely hints +namespace moodycamel { +namespace details { +#if defined(__GNUC__) +static inline bool(likely)(bool x) { + return __builtin_expect((x), true); +} +static inline bool(unlikely)(bool x) { + return __builtin_expect((x), false); +} +#else +static inline bool(likely)(bool x) { + return x; +} +static inline bool(unlikely)(bool x) { + return x; +} +#endif +} // namespace details +} // namespace moodycamel + +#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG +#include "internal/concurrentqueue_internal_debug.h" +#endif + +namespace moodycamel { +namespace details { +template +struct const_numeric_max { + static_assert(std::is_integral::value, + "const_numeric_max can only be used with integers"); + static const T value = + std::numeric_limits::is_signed + ? (static_cast(1) << (sizeof(T) * CHAR_BIT - 1)) - + static_cast(1) + : static_cast(-1); +}; + +#if defined(__GLIBCXX__) +typedef ::max_align_t + std_max_align_t; // libstdc++ forgot to add it to std:: for a while +#else +typedef std::max_align_t std_max_align_t; // Others (e.g. MSVC) insist it can + // *only* be accessed via std:: +#endif + +// Some platforms have incorrectly set max_align_t to a type with <8 bytes +// alignment even while supporting 8-byte aligned scalar values (*cough* 32-bit +// iOS). Work around this with our own union. See issue #64. +typedef union { + std_max_align_t x; + long long y; + void *z; +} max_align_t; +} // namespace details + +// Default traits for the ConcurrentQueue. To change some of the +// traits without re-implementing all of them, inherit from this +// struct and shadow the declarations you wish to be different; +// since the traits are used as a template type parameter, the +// shadowed declarations will be used where defined, and the defaults +// otherwise. +struct ConcurrentQueueDefaultTraits { + // General-purpose size type. std::size_t is strongly recommended. + typedef std::size_t size_t; + + // The type used for the enqueue and dequeue indices. Must be at least as + // large as size_t. Should be significantly larger than the number of elements + // you expect to hold at once, especially if you have a high turnover rate; + // for example, on 32-bit x86, if you expect to have over a hundred million + // elements or pump several million elements through your queue in a very + // short space of time, using a 32-bit type *may* trigger a race condition. + // A 64-bit int type is recommended in that case, and in practice will + // prevent a race condition no matter the usage of the queue. Note that + // whether the queue is lock-free with a 64-int type depends on the whether + // std::atomic is lock-free, which is platform-specific. + typedef std::size_t index_t; + + // Internally, all elements are enqueued and dequeued from multi-element + // blocks; this is the smallest controllable unit. If you expect few elements + // but many producers, a smaller block size should be favoured. For few + // producers and/or many elements, a larger block size is preferred. A sane + // default is provided. Must be a power of 2. + static const size_t BLOCK_SIZE = 32; + + // For explicit producers (i.e. when using a producer token), the block is + // checked for being empty by iterating through a list of flags, one per + // element. For large block sizes, this is too inefficient, and switching to + // an atomic counter-based approach is faster. The switch is made for block + // sizes strictly larger than this threshold. + static const size_t EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD = 32; + + // How many full blocks can be expected for a single explicit producer? This + // should reflect that number's maximum for optimal performance. Must be a + // power of 2. + static const size_t EXPLICIT_INITIAL_INDEX_SIZE = 32; + + // How many full blocks can be expected for a single implicit producer? This + // should reflect that number's maximum for optimal performance. Must be a + // power of 2. + static const size_t IMPLICIT_INITIAL_INDEX_SIZE = 32; + + // The initial size of the hash table mapping thread IDs to implicit + // producers. Note that the hash is resized every time it becomes half full. + // Must be a power of two, and either 0 or at least 1. If 0, implicit + // production (using the enqueue methods without an explicit producer token) + // is disabled. + static const size_t INITIAL_IMPLICIT_PRODUCER_HASH_SIZE = 32; + + // Controls the number of items that an explicit consumer (i.e. one with a + // token) must consume before it causes all consumers to rotate and move on to + // the next internal queue. + static const std::uint32_t EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE = + 256; + + // The maximum number of elements (inclusive) that can be enqueued to a + // sub-queue. Enqueue operations that would cause this limit to be surpassed + // will fail. Note that this limit is enforced at the block level (for + // performance reasons), i.e. it's rounded up to the nearest block size. + static const size_t MAX_SUBQUEUE_SIZE = + details::const_numeric_max::value; + + // The number of times to spin before sleeping when waiting on a semaphore. + // Recommended values are on the order of 1000-10000 unless the number of + // consumer threads exceeds the number of idle cores (in which case try + // 0-100). Only affects instances of the BlockingConcurrentQueue. + static const int MAX_SEMA_SPINS = 10000; + + // Whether to recycle dynamically-allocated blocks into an internal free list + // or not. If false, only pre-allocated blocks (controlled by the constructor + // arguments) will be recycled, and all others will be `free`d back to the + // heap. Note that blocks consumed by explicit producers are only freed on + // destruction of the queue (not following destruction of the token) + // regardless of this trait. + static const bool RECYCLE_ALLOCATED_BLOCKS = false; + + +#ifndef MCDBGQ_USE_RELACY + // Memory allocation can be customized if needed. + // malloc should return nullptr on failure, and handle alignment like + // std::malloc. +#if defined(malloc) || defined(free) + // Gah, this is 2015, stop defining macros that break standard code already! + // Work around malloc/free being special macros: + static inline void *WORKAROUND_malloc(size_t size) { + return malloc(size); + } + static inline void WORKAROUND_free(void *ptr) { + return free(ptr); + } + static inline void *(malloc)(size_t size) { + return WORKAROUND_malloc(size); + } + static inline void(free)(void *ptr) { + return WORKAROUND_free(ptr); + } +#else + static inline void *malloc(size_t size) { + return std::malloc(size); + } + static inline void free(void *ptr) { + return std::free(ptr); + } +#endif +#else + // Debug versions when running under the Relacy race detector (ignore + // these in user code) + static inline void *malloc(size_t size) { + return rl::rl_malloc(size, $); + } + static inline void free(void *ptr) { + return rl::rl_free(ptr, $); + } +#endif +}; + + +// When producing or consuming many elements, the most efficient way is to: +// 1) Use one of the bulk-operation methods of the queue with a token +// 2) Failing that, use the bulk-operation methods without a token +// 3) Failing that, create a token and use that with the single-item methods +// 4) Failing that, use the single-parameter methods of the queue +// Having said that, don't create tokens willy-nilly -- ideally there should be +// a maximum of one token per thread (of each kind). +struct ProducerToken; +struct ConsumerToken; + +template +class ConcurrentQueue; +template +class BlockingConcurrentQueue; +class ConcurrentQueueTests; + + +namespace details { +struct ConcurrentQueueProducerTypelessBase { + ConcurrentQueueProducerTypelessBase *next; + std::atomic inactive; + ProducerToken *token; + + ConcurrentQueueProducerTypelessBase() + : next(nullptr), inactive(false), token(nullptr) {} +}; + +template +struct _hash_32_or_64 { + static inline std::uint32_t hash(std::uint32_t h) { + // MurmurHash3 finalizer -- see + // https://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp + // Since the thread ID is already unique, all we really want to do is + // propagate that uniqueness evenly across all the bits, so that we can use + // a subset of the bits while reducing collisions significantly + h ^= h >> 16; + h *= 0x85ebca6b; + h ^= h >> 13; + h *= 0xc2b2ae35; + return h ^ (h >> 16); + } +}; +template <> +struct _hash_32_or_64<1> { + static inline std::uint64_t hash(std::uint64_t h) { + h ^= h >> 33; + h *= 0xff51afd7ed558ccd; + h ^= h >> 33; + h *= 0xc4ceb9fe1a85ec53; + return h ^ (h >> 33); + } +}; +template +struct hash_32_or_64 : public _hash_32_or_64<(size > 4)> {}; + +static inline size_t hash_thread_id(thread_id_t id) { + static_assert( + sizeof(thread_id_t) <= 8, + "Expected a platform where thread IDs are at most 64-bit values"); + return static_cast( + hash_32_or_64::thread_id_hash_t)>:: + hash(thread_id_converter::prehash(id))); +} + +template +static inline bool circular_less_than(T a, T b) { + static_assert( + std::is_integral::value && !std::numeric_limits::is_signed, + "circular_less_than is intended to be used only with unsigned integer " + "types"); + return static_cast(a - b) > + static_cast(static_cast(1) + << (static_cast(sizeof(T) * CHAR_BIT - 1))); + // Note: extra parens around rhs of operator<< is MSVC bug: + // https://developercommunity2.visualstudio.com/t/C4554-triggers-when-both-lhs-and-rhs-is/10034931 + // silencing the bug requires #pragma warning(disable: 4554) around the + // calling code and has no effect when done here. +} + +template +static inline char *align_for(char *ptr) { + const std::size_t alignment = std::alignment_of::value; + return ptr + + (alignment - (reinterpret_cast(ptr) % alignment)) % + alignment; +} + +template +static inline T ceil_to_pow_2(T x) { + static_assert( + std::is_integral::value && !std::numeric_limits::is_signed, + "ceil_to_pow_2 is intended to be used only with unsigned integer types"); + + // Adapted from + // http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 + --x; + x |= x >> 1; + x |= x >> 2; + x |= x >> 4; + for (std::size_t i = 1; i < sizeof(T); i <<= 1) { + x |= x >> (i << 3); + } + ++x; + return x; +} + +template +static inline void swap_relaxed(std::atomic &left, std::atomic &right) { + T temp = left.load(std::memory_order_relaxed); + left.store(right.load(std::memory_order_relaxed), std::memory_order_relaxed); + right.store(temp, std::memory_order_relaxed); +} + +template +static inline T const &nomove(T const &x) { + return x; +} + +template +struct nomove_if { + template + static inline T const &eval(T const &x) { + return x; + } +}; + +template <> +struct nomove_if { + template + static inline auto eval(U &&x) -> decltype(std::forward(x)) { + return std::forward(x); + } +}; + +template +static inline auto deref_noexcept(It &it) MOODYCAMEL_NOEXCEPT -> decltype(*it) { + return *it; +} + +#if defined(__clang__) || !defined(__GNUC__) || __GNUC__ > 4 || \ + (__GNUC__ == 4 && __GNUC_MINOR__ >= 8) +template +struct is_trivially_destructible : std::is_trivially_destructible {}; +#else +template +struct is_trivially_destructible : std::has_trivial_destructor {}; +#endif + +#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED +#ifdef MCDBGQ_USE_RELACY +typedef RelacyThreadExitListener ThreadExitListener; +typedef RelacyThreadExitNotifier ThreadExitNotifier; +#else +class ThreadExitNotifier; + +struct ThreadExitListener { + typedef void (*callback_t)(void *); + callback_t callback; + void *userData; + + ThreadExitListener *next; // reserved for use by the ThreadExitNotifier + ThreadExitNotifier *chain; // reserved for use by the ThreadExitNotifier +}; + +class ThreadExitNotifier { + public: + static void subscribe(ThreadExitListener *listener) { + auto &tlsInst = instance(); + std::lock_guard guard(mutex()); + listener->next = tlsInst.tail; + listener->chain = &tlsInst; + tlsInst.tail = listener; + } + + static void unsubscribe(ThreadExitListener *listener) { + std::lock_guard guard(mutex()); + if (!listener->chain) { + return; // race with ~ThreadExitNotifier + } + auto &tlsInst = *listener->chain; + listener->chain = nullptr; + ThreadExitListener **prev = &tlsInst.tail; + for (auto ptr = tlsInst.tail; ptr != nullptr; ptr = ptr->next) { + if (ptr == listener) { + *prev = ptr->next; + break; + } + prev = &ptr->next; + } + } + + private: + ThreadExitNotifier() : tail(nullptr) {} + ThreadExitNotifier(ThreadExitNotifier const &) MOODYCAMEL_DELETE_FUNCTION; + ThreadExitNotifier &operator=(ThreadExitNotifier const &) + MOODYCAMEL_DELETE_FUNCTION; + + ~ThreadExitNotifier() { + // This thread is about to exit, let everyone know! + assert(this == &instance() && + "If this assert fails, you likely have a buggy compiler! Change the " + "preprocessor conditions such that " + "MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED is no longer defined."); + std::lock_guard guard(mutex()); + for (auto ptr = tail; ptr != nullptr; ptr = ptr->next) { + ptr->chain = nullptr; + ptr->callback(ptr->userData); + } + } + + // Thread-local + static inline ThreadExitNotifier &instance() { + static thread_local ThreadExitNotifier notifier; + return notifier; + } + + static inline std::mutex &mutex() { + // Must be static because the ThreadExitNotifier could be destroyed while + // unsubscribe is called + static std::mutex mutex; + return mutex; + } + + private: + ThreadExitListener *tail; +}; +#endif +#endif + +template +struct static_is_lock_free_num { + enum { value = 0 }; +}; +template <> +struct static_is_lock_free_num { + enum { value = ATOMIC_CHAR_LOCK_FREE }; +}; +template <> +struct static_is_lock_free_num { + enum { value = ATOMIC_SHORT_LOCK_FREE }; +}; +template <> +struct static_is_lock_free_num { + enum { value = ATOMIC_INT_LOCK_FREE }; +}; +template <> +struct static_is_lock_free_num { + enum { value = ATOMIC_LONG_LOCK_FREE }; +}; +template <> +struct static_is_lock_free_num { + enum { value = ATOMIC_LLONG_LOCK_FREE }; +}; +template +struct static_is_lock_free + : static_is_lock_free_num::type> {}; +template <> +struct static_is_lock_free { + enum { value = ATOMIC_BOOL_LOCK_FREE }; +}; +template +struct static_is_lock_free { + enum { value = ATOMIC_POINTER_LOCK_FREE }; +}; +} // namespace details + + +struct ProducerToken { + template + explicit ProducerToken(ConcurrentQueue &queue); + + template + explicit ProducerToken(BlockingConcurrentQueue &queue); + + ProducerToken(ProducerToken &&other) MOODYCAMEL_NOEXCEPT + : producer(other.producer) { + other.producer = nullptr; + if (producer != nullptr) { + producer->token = this; + } + } + + inline ProducerToken &operator=(ProducerToken &&other) MOODYCAMEL_NOEXCEPT { + swap(other); + return *this; + } + + void swap(ProducerToken &other) MOODYCAMEL_NOEXCEPT { + std::swap(producer, other.producer); + if (producer != nullptr) { + producer->token = this; + } + if (other.producer != nullptr) { + other.producer->token = &other; + } + } + + // A token is always valid unless: + // 1) Memory allocation failed during construction + // 2) It was moved via the move constructor + // (Note: assignment does a swap, leaving both potentially valid) + // 3) The associated queue was destroyed + // Note that if valid() returns true, that only indicates + // that the token is valid for use with a specific queue, + // but not which one; that's up to the user to track. + inline bool valid() const { + return producer != nullptr; + } + + ~ProducerToken() { + if (producer != nullptr) { + producer->token = nullptr; + producer->inactive.store(true, std::memory_order_release); + } + } + + // Disable copying and assignment + ProducerToken(ProducerToken const &) MOODYCAMEL_DELETE_FUNCTION; + ProducerToken &operator=(ProducerToken const &) MOODYCAMEL_DELETE_FUNCTION; + + private: + template + friend class ConcurrentQueue; + friend class ConcurrentQueueTests; + + protected: + details::ConcurrentQueueProducerTypelessBase *producer; +}; + + +struct ConsumerToken { + template + explicit ConsumerToken(ConcurrentQueue &q); + + template + explicit ConsumerToken(BlockingConcurrentQueue &q); + + ConsumerToken(ConsumerToken &&other) MOODYCAMEL_NOEXCEPT + : initialOffset(other.initialOffset), + lastKnownGlobalOffset(other.lastKnownGlobalOffset), + itemsConsumedFromCurrent(other.itemsConsumedFromCurrent), + currentProducer(other.currentProducer), + desiredProducer(other.desiredProducer) {} + + inline ConsumerToken &operator=(ConsumerToken &&other) MOODYCAMEL_NOEXCEPT { + swap(other); + return *this; + } + + void swap(ConsumerToken &other) MOODYCAMEL_NOEXCEPT { + std::swap(initialOffset, other.initialOffset); + std::swap(lastKnownGlobalOffset, other.lastKnownGlobalOffset); + std::swap(itemsConsumedFromCurrent, other.itemsConsumedFromCurrent); + std::swap(currentProducer, other.currentProducer); + std::swap(desiredProducer, other.desiredProducer); + } + + // Disable copying and assignment + ConsumerToken(ConsumerToken const &) MOODYCAMEL_DELETE_FUNCTION; + ConsumerToken &operator=(ConsumerToken const &) MOODYCAMEL_DELETE_FUNCTION; + + private: + template + friend class ConcurrentQueue; + friend class ConcurrentQueueTests; + + private: // but shared with ConcurrentQueue + std::uint32_t initialOffset; + std::uint32_t lastKnownGlobalOffset; + std::uint32_t itemsConsumedFromCurrent; + details::ConcurrentQueueProducerTypelessBase *currentProducer; + details::ConcurrentQueueProducerTypelessBase *desiredProducer; +}; + +// Need to forward-declare this swap because it's in a namespace. +// See +// http://stackoverflow.com/questions/4492062/why-does-a-c-friend-class-need-a-forward-declaration-only-in-other-namespaces +template +inline void swap(typename ConcurrentQueue::ImplicitProducerKVP &a, + typename ConcurrentQueue::ImplicitProducerKVP &b) + MOODYCAMEL_NOEXCEPT; + + +template +class ConcurrentQueue { + public: + typedef ::moodycamel::ProducerToken producer_token_t; + typedef ::moodycamel::ConsumerToken consumer_token_t; + + typedef typename Traits::index_t index_t; + typedef typename Traits::size_t size_t; + + static const size_t BLOCK_SIZE = static_cast(Traits::BLOCK_SIZE); + static const size_t EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD = + static_cast(Traits::EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD); + static const size_t EXPLICIT_INITIAL_INDEX_SIZE = + static_cast(Traits::EXPLICIT_INITIAL_INDEX_SIZE); + static const size_t IMPLICIT_INITIAL_INDEX_SIZE = + static_cast(Traits::IMPLICIT_INITIAL_INDEX_SIZE); + static const size_t INITIAL_IMPLICIT_PRODUCER_HASH_SIZE = + static_cast(Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE); + static const std::uint32_t EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE = + static_cast( + Traits::EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE); +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable : 4307) // + integral constant overflow (that's what + // the ternary expression is for!) +#pragma warning(disable : 4309) // static_cast: Truncation of constant value +#endif + static const size_t MAX_SUBQUEUE_SIZE = + (details::const_numeric_max::value - + static_cast(Traits::MAX_SUBQUEUE_SIZE) < + BLOCK_SIZE) + ? details::const_numeric_max::value + : ((static_cast(Traits::MAX_SUBQUEUE_SIZE) + + (BLOCK_SIZE - 1)) / + BLOCK_SIZE * BLOCK_SIZE); +#ifdef _MSC_VER +#pragma warning(pop) +#endif + + static_assert(!std::numeric_limits::is_signed && + std::is_integral::value, + "Traits::size_t must be an unsigned integral type"); + static_assert(!std::numeric_limits::is_signed && + std::is_integral::value, + "Traits::index_t must be an unsigned integral type"); + static_assert(sizeof(index_t) >= sizeof(size_t), + "Traits::index_t must be at least as wide as Traits::size_t"); + static_assert((BLOCK_SIZE > 1) && !(BLOCK_SIZE & (BLOCK_SIZE - 1)), + "Traits::BLOCK_SIZE must be a power of 2 (and at least 2)"); + static_assert((EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD > 1) && + !(EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD & + (EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD - 1)), + "Traits::EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD must be a " + "power of 2 (and greater than 1)"); + static_assert((EXPLICIT_INITIAL_INDEX_SIZE > 1) && + !(EXPLICIT_INITIAL_INDEX_SIZE & + (EXPLICIT_INITIAL_INDEX_SIZE - 1)), + "Traits::EXPLICIT_INITIAL_INDEX_SIZE must be a power of 2 (and " + "greater than 1)"); + static_assert((IMPLICIT_INITIAL_INDEX_SIZE > 1) && + !(IMPLICIT_INITIAL_INDEX_SIZE & + (IMPLICIT_INITIAL_INDEX_SIZE - 1)), + "Traits::IMPLICIT_INITIAL_INDEX_SIZE must be a power of 2 (and " + "greater than 1)"); + static_assert( + (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) || + !(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE & + (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE - 1)), + "Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE must be a power of 2"); + static_assert(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0 || + INITIAL_IMPLICIT_PRODUCER_HASH_SIZE >= 1, + "Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE must be at least " + "1 (or 0 to disable implicit enqueueing)"); + + public: + // Creates a queue with at least `capacity` element slots; note that the + // actual number of elements that can be inserted without additional memory + // allocation depends on the number of producers and the block size (e.g. if + // the block size is equal to `capacity`, only a single block will be + // allocated up-front, which means only a single producer will be able to + // enqueue elements without an extra allocation -- blocks aren't shared + // between producers). This method is not thread safe -- it is up to the user + // to ensure that the queue is fully constructed before it starts being used + // by other threads (this includes making the memory effects of construction + // visible, possibly with a memory barrier). + explicit ConcurrentQueue(size_t capacity = 32 * BLOCK_SIZE) + : producerListTail(nullptr), + producerCount(0), + initialBlockPoolIndex(0), + nextExplicitConsumerId(0), + globalExplicitConsumerOffset(0) { + implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed); + populate_initial_implicit_producer_hash(); + populate_initial_block_list(capacity / BLOCK_SIZE + + ((capacity & (BLOCK_SIZE - 1)) == 0 ? 0 : 1)); + +#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG + // Track all the producers using a fully-resolved typed list for + // each kind; this makes it possible to debug them starting from + // the root queue object (otherwise wacky casts are needed that + // don't compile in the debugger's expression evaluator). + explicitProducers.store(nullptr, std::memory_order_relaxed); + implicitProducers.store(nullptr, std::memory_order_relaxed); +#endif + } + + // Computes the correct amount of pre-allocated blocks for you based + // on the minimum number of elements you want available at any given + // time, and the maximum concurrent number of each type of producer. + ConcurrentQueue(size_t minCapacity, size_t maxExplicitProducers, + size_t maxImplicitProducers) + : producerListTail(nullptr), + producerCount(0), + initialBlockPoolIndex(0), + nextExplicitConsumerId(0), + globalExplicitConsumerOffset(0) { + implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed); + populate_initial_implicit_producer_hash(); + size_t blocks = (((minCapacity + BLOCK_SIZE - 1) / BLOCK_SIZE) - 1) * + (maxExplicitProducers + 1) + + 2 * (maxExplicitProducers + maxImplicitProducers); + populate_initial_block_list(blocks); + +#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG + explicitProducers.store(nullptr, std::memory_order_relaxed); + implicitProducers.store(nullptr, std::memory_order_relaxed); +#endif + } + + // Note: The queue should not be accessed concurrently while it's + // being deleted. It's up to the user to synchronize this. + // This method is not thread safe. + ~ConcurrentQueue() { + // Destroy producers + auto ptr = producerListTail.load(std::memory_order_relaxed); + while (ptr != nullptr) { + auto next = ptr->next_prod(); + if (ptr->token != nullptr) { + ptr->token->producer = nullptr; + } + destroy(ptr); + ptr = next; + } + + // Destroy implicit producer hash tables + MOODYCAMEL_CONSTEXPR_IF(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE != 0) { + auto hash = implicitProducerHash.load(std::memory_order_relaxed); + while (hash != nullptr) { + auto prev = hash->prev; + if (prev != nullptr) { // The last hash is part of this object and was + // not allocated dynamically + for (size_t i = 0; i != hash->capacity; ++i) { + hash->entries[i].~ImplicitProducerKVP(); + } + hash->~ImplicitProducerHash(); + (Traits::free)(hash); + } + hash = prev; + } + } + + // Destroy global free list + auto block = freeList.head_unsafe(); + while (block != nullptr) { + auto next = block->freeListNext.load(std::memory_order_relaxed); + if (block->dynamicallyAllocated) { + destroy(block); + } + block = next; + } + + // Destroy initial free list + destroy_array(initialBlockPool, initialBlockPoolSize); + } + + // Disable copying and copy assignment + ConcurrentQueue(ConcurrentQueue const &) MOODYCAMEL_DELETE_FUNCTION; + ConcurrentQueue &operator=(ConcurrentQueue const &) + MOODYCAMEL_DELETE_FUNCTION; + + // Moving is supported, but note that it is *not* a thread-safe operation. + // Nobody can use the queue while it's being moved, and the memory effects + // of that move must be propagated to other threads before they can use it. + // Note: When a queue is moved, its tokens are still valid but can only be + // used with the destination queue (i.e. semantically they are moved along + // with the queue itself). + ConcurrentQueue(ConcurrentQueue &&other) MOODYCAMEL_NOEXCEPT + : producerListTail( + other.producerListTail.load(std::memory_order_relaxed)), + producerCount(other.producerCount.load(std::memory_order_relaxed)), + initialBlockPoolIndex( + other.initialBlockPoolIndex.load(std::memory_order_relaxed)), + initialBlockPool(other.initialBlockPool), + initialBlockPoolSize(other.initialBlockPoolSize), + freeList(std::move(other.freeList)), + nextExplicitConsumerId( + other.nextExplicitConsumerId.load(std::memory_order_relaxed)), + globalExplicitConsumerOffset(other.globalExplicitConsumerOffset.load( + std::memory_order_relaxed)) { + // Move the other one into this, and leave the other one as an empty queue + implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed); + populate_initial_implicit_producer_hash(); + swap_implicit_producer_hashes(other); + + other.producerListTail.store(nullptr, std::memory_order_relaxed); + other.producerCount.store(0, std::memory_order_relaxed); + other.nextExplicitConsumerId.store(0, std::memory_order_relaxed); + other.globalExplicitConsumerOffset.store(0, std::memory_order_relaxed); + +#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG + explicitProducers.store( + other.explicitProducers.load(std::memory_order_relaxed), + std::memory_order_relaxed); + other.explicitProducers.store(nullptr, std::memory_order_relaxed); + implicitProducers.store( + other.implicitProducers.load(std::memory_order_relaxed), + std::memory_order_relaxed); + other.implicitProducers.store(nullptr, std::memory_order_relaxed); +#endif + + other.initialBlockPoolIndex.store(0, std::memory_order_relaxed); + other.initialBlockPoolSize = 0; + other.initialBlockPool = nullptr; + + reown_producers(); + } + + inline ConcurrentQueue &operator=(ConcurrentQueue &&other) + MOODYCAMEL_NOEXCEPT { + return swap_internal(other); + } + + // Swaps this queue's state with the other's. Not thread-safe. + // Swapping two queues does not invalidate their tokens, however + // the tokens that were created for one queue must be used with + // only the swapped queue (i.e. the tokens are tied to the + // queue's movable state, not the object itself). + inline void swap(ConcurrentQueue &other) MOODYCAMEL_NOEXCEPT { + swap_internal(other); + } + + private: + ConcurrentQueue &swap_internal(ConcurrentQueue &other) { + if (this == &other) { + return *this; + } + + details::swap_relaxed(producerListTail, other.producerListTail); + details::swap_relaxed(producerCount, other.producerCount); + details::swap_relaxed(initialBlockPoolIndex, other.initialBlockPoolIndex); + std::swap(initialBlockPool, other.initialBlockPool); + std::swap(initialBlockPoolSize, other.initialBlockPoolSize); + freeList.swap(other.freeList); + details::swap_relaxed(nextExplicitConsumerId, other.nextExplicitConsumerId); + details::swap_relaxed(globalExplicitConsumerOffset, + other.globalExplicitConsumerOffset); + + swap_implicit_producer_hashes(other); + + reown_producers(); + other.reown_producers(); + +#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG + details::swap_relaxed(explicitProducers, other.explicitProducers); + details::swap_relaxed(implicitProducers, other.implicitProducers); +#endif + + return *this; + } + + public: + // Enqueues a single item (by copying it). + // Allocates memory if required. Only fails if memory allocation fails (or + // implicit production is disabled because + // Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0, or + // Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed). + // Thread-safe. + inline bool enqueue(T const &item) { + MOODYCAMEL_CONSTEXPR_IF(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) + return false; + else return inner_enqueue(item); + } + + // Enqueues a single item (by moving it, if possible). + // Allocates memory if required. Only fails if memory allocation fails (or + // implicit production is disabled because + // Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0, or + // Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed). + // Thread-safe. + inline bool enqueue(T &&item) { + MOODYCAMEL_CONSTEXPR_IF(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) + return false; + else return inner_enqueue(std::move(item)); + } + + // Enqueues a single item (by copying it) using an explicit producer token. + // Allocates memory if required. Only fails if memory allocation fails (or + // Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed). + // Thread-safe. + inline bool enqueue(producer_token_t const &token, T const &item) { + return inner_enqueue(token, item); + } + + // Enqueues a single item (by moving it, if possible) using an explicit + // producer token. Allocates memory if required. Only fails if memory + // allocation fails (or Traits::MAX_SUBQUEUE_SIZE has been defined and would + // be surpassed). Thread-safe. + inline bool enqueue(producer_token_t const &token, T &&item) { + return inner_enqueue(token, std::move(item)); + } + + // Enqueues several items. + // Allocates memory if required. Only fails if memory allocation fails (or + // implicit production is disabled because + // Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0, or + // Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed). Note: + // Use std::make_move_iterator if the elements should be moved instead of + // copied. Thread-safe. + template + bool enqueue_bulk(It itemFirst, size_t count) { + MOODYCAMEL_CONSTEXPR_IF(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) + return false; + else return inner_enqueue_bulk(itemFirst, count); + } + + // Enqueues several items using an explicit producer token. + // Allocates memory if required. Only fails if memory allocation fails + // (or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed). + // Note: Use std::make_move_iterator if the elements should be moved + // instead of copied. + // Thread-safe. + template + bool enqueue_bulk(producer_token_t const &token, It itemFirst, size_t count) { + return inner_enqueue_bulk(token, itemFirst, count); + } + + // Enqueues a single item (by copying it). + // Does not allocate memory. Fails if not enough room to enqueue (or implicit + // production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE + // is 0). + // Thread-safe. + inline bool try_enqueue(T const &item) { + MOODYCAMEL_CONSTEXPR_IF(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) + return false; + else return inner_enqueue(item); + } + + // Enqueues a single item (by moving it, if possible). + // Does not allocate memory (except for one-time implicit producer). + // Fails if not enough room to enqueue (or implicit production is + // disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0). + // Thread-safe. + inline bool try_enqueue(T &&item) { + MOODYCAMEL_CONSTEXPR_IF(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) + return false; + else return inner_enqueue(std::move(item)); + } + + // Enqueues a single item (by copying it) using an explicit producer token. + // Does not allocate memory. Fails if not enough room to enqueue. + // Thread-safe. + inline bool try_enqueue(producer_token_t const &token, T const &item) { + return inner_enqueue(token, item); + } + + // Enqueues a single item (by moving it, if possible) using an explicit + // producer token. Does not allocate memory. Fails if not enough room to + // enqueue. Thread-safe. + inline bool try_enqueue(producer_token_t const &token, T &&item) { + return inner_enqueue(token, std::move(item)); + } + + // Enqueues several items. + // Does not allocate memory (except for one-time implicit producer). + // Fails if not enough room to enqueue (or implicit production is + // disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0). + // Note: Use std::make_move_iterator if the elements should be moved + // instead of copied. + // Thread-safe. + template + bool try_enqueue_bulk(It itemFirst, size_t count) { + MOODYCAMEL_CONSTEXPR_IF(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) + return false; + else return inner_enqueue_bulk(itemFirst, count); + } + + // Enqueues several items using an explicit producer token. + // Does not allocate memory. Fails if not enough room to enqueue. + // Note: Use std::make_move_iterator if the elements should be moved + // instead of copied. + // Thread-safe. + template + bool try_enqueue_bulk(producer_token_t const &token, It itemFirst, + size_t count) { + return inner_enqueue_bulk(token, itemFirst, count); + } + + + // Attempts to dequeue from the queue. + // Returns false if all producer streams appeared empty at the time they + // were checked (so, the queue is likely but not guaranteed to be empty). + // Never allocates. Thread-safe. + template + bool try_dequeue(U &item) { + // Instead of simply trying each producer in turn (which could cause + // needless contention on the first producer), we score them heuristically. + size_t nonEmptyCount = 0; + ProducerBase *best = nullptr; + size_t bestSize = 0; + for (auto ptr = producerListTail.load(std::memory_order_acquire); + nonEmptyCount < 3 && ptr != nullptr; ptr = ptr->next_prod()) { + auto size = ptr->size_approx(); + if (size > 0) { + if (size > bestSize) { + bestSize = size; + best = ptr; + } + ++nonEmptyCount; + } + } + + // If there was at least one non-empty queue but it appears empty at the + // time we try to dequeue from it, we need to make sure every queue's been + // tried + if (nonEmptyCount > 0) { + if ((details::likely)(best->dequeue(item))) { + return true; + } + for (auto ptr = producerListTail.load(std::memory_order_acquire); + ptr != nullptr; ptr = ptr->next_prod()) { + if (ptr != best && ptr->dequeue(item)) { + return true; + } + } + } + return false; + } + + // Attempts to dequeue from the queue. + // Returns false if all producer streams appeared empty at the time they + // were checked (so, the queue is likely but not guaranteed to be empty). + // This differs from the try_dequeue(item) method in that this one does + // not attempt to reduce contention by interleaving the order that producer + // streams are dequeued from. So, using this method can reduce overall + // throughput under contention, but will give more predictable results in + // single-threaded consumer scenarios. This is mostly only useful for internal + // unit tests. Never allocates. Thread-safe. + template + bool try_dequeue_non_interleaved(U &item) { + for (auto ptr = producerListTail.load(std::memory_order_acquire); + ptr != nullptr; ptr = ptr->next_prod()) { + if (ptr->dequeue(item)) { + return true; + } + } + return false; + } + + // Attempts to dequeue from the queue using an explicit consumer token. + // Returns false if all producer streams appeared empty at the time they + // were checked (so, the queue is likely but not guaranteed to be empty). + // Never allocates. Thread-safe. + template + bool try_dequeue(consumer_token_t &token, U &item) { + // The idea is roughly as follows: + // Every 256 items from one producer, make everyone rotate (increase the + // global offset) -> this means the highest efficiency consumer dictates the + // rotation speed of everyone else, more or less If you see that the global + // offset has changed, you must reset your consumption counter and move to + // your designated place If there's no items where you're supposed to be, + // keep moving until you find a producer with some items If the global + // offset has not changed but you've run out of items to consume, move over + // from your current position until you find an producer with something in + // it + + if (token.desiredProducer == nullptr || + token.lastKnownGlobalOffset != + globalExplicitConsumerOffset.load(std::memory_order_relaxed)) { + if (!update_current_producer_after_rotation(token)) { + return false; + } + } + + // If there was at least one non-empty queue but it appears empty at the + // time we try to dequeue from it, we need to make sure every queue's been + // tried + if (static_cast(token.currentProducer)->dequeue(item)) { + if (++token.itemsConsumedFromCurrent == + EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE) { + globalExplicitConsumerOffset.fetch_add(1, std::memory_order_relaxed); + } + return true; + } + + auto tail = producerListTail.load(std::memory_order_acquire); + auto ptr = static_cast(token.currentProducer)->next_prod(); + if (ptr == nullptr) { + ptr = tail; + } + while (ptr != static_cast(token.currentProducer)) { + if (ptr->dequeue(item)) { + token.currentProducer = ptr; + token.itemsConsumedFromCurrent = 1; + return true; + } + ptr = ptr->next_prod(); + if (ptr == nullptr) { + ptr = tail; + } + } + return false; + } + + // Attempts to dequeue several elements from the queue. + // Returns the number of items actually dequeued. + // Returns 0 if all producer streams appeared empty at the time they + // were checked (so, the queue is likely but not guaranteed to be empty). + // Never allocates. Thread-safe. + template + size_t try_dequeue_bulk(It itemFirst, size_t max) { + size_t count = 0; + for (auto ptr = producerListTail.load(std::memory_order_acquire); + ptr != nullptr; ptr = ptr->next_prod()) { + count += ptr->dequeue_bulk(itemFirst, max - count); + if (count == max) { + break; + } + } + return count; + } + + // Attempts to dequeue several elements from the queue using an explicit + // consumer token. Returns the number of items actually dequeued. Returns 0 if + // all producer streams appeared empty at the time they were checked (so, the + // queue is likely but not guaranteed to be empty). Never allocates. + // Thread-safe. + template + size_t try_dequeue_bulk(consumer_token_t &token, It itemFirst, size_t max) { + if (token.desiredProducer == nullptr || + token.lastKnownGlobalOffset != + globalExplicitConsumerOffset.load(std::memory_order_relaxed)) { + if (!update_current_producer_after_rotation(token)) { + return 0; + } + } + + size_t count = static_cast(token.currentProducer) + ->dequeue_bulk(itemFirst, max); + if (count == max) { + if ((token.itemsConsumedFromCurrent += static_cast(max)) >= + EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE) { + globalExplicitConsumerOffset.fetch_add(1, std::memory_order_relaxed); + } + return max; + } + token.itemsConsumedFromCurrent += static_cast(count); + max -= count; + + auto tail = producerListTail.load(std::memory_order_acquire); + auto ptr = static_cast(token.currentProducer)->next_prod(); + if (ptr == nullptr) { + ptr = tail; + } + while (ptr != static_cast(token.currentProducer)) { + auto dequeued = ptr->dequeue_bulk(itemFirst, max); + count += dequeued; + if (dequeued != 0) { + token.currentProducer = ptr; + token.itemsConsumedFromCurrent = static_cast(dequeued); + } + if (dequeued == max) { + break; + } + max -= dequeued; + ptr = ptr->next_prod(); + if (ptr == nullptr) { + ptr = tail; + } + } + return count; + } + + + // Attempts to dequeue from a specific producer's inner queue. + // If you happen to know which producer you want to dequeue from, this + // is significantly faster than using the general-case try_dequeue methods. + // Returns false if the producer's queue appeared empty at the time it + // was checked (so, the queue is likely but not guaranteed to be empty). + // Never allocates. Thread-safe. + template + inline bool try_dequeue_from_producer(producer_token_t const &producer, + U &item) { + return static_cast(producer.producer)->dequeue(item); + } + + // Attempts to dequeue several elements from a specific producer's inner + // queue. Returns the number of items actually dequeued. If you happen to know + // which producer you want to dequeue from, this is significantly faster than + // using the general-case try_dequeue methods. Returns 0 if the producer's + // queue appeared empty at the time it was checked (so, the queue is likely + // but not guaranteed to be empty). Never allocates. Thread-safe. + template + inline size_t try_dequeue_bulk_from_producer(producer_token_t const &producer, + It itemFirst, size_t max) { + return static_cast(producer.producer) + ->dequeue_bulk(itemFirst, max); + } + + + // Returns an estimate of the total number of elements currently in the queue. + // This estimate is only accurate if the queue has completely stabilized + // before it is called (i.e. all enqueue and dequeue operations have completed + // and their memory effects are visible on the calling thread, and no further + // operations start while this method is being called). Thread-safe. + size_t size_approx() const { + size_t size = 0; + for (auto ptr = producerListTail.load(std::memory_order_acquire); + ptr != nullptr; ptr = ptr->next_prod()) { + size += ptr->size_approx(); + } + return size; + } + + + // Returns true if the underlying atomic variables used by + // the queue are lock-free (they should be on most platforms). + // Thread-safe. + static constexpr bool is_lock_free() { + return details::static_is_lock_free::value == 2 && + details::static_is_lock_free::value == 2 && + details::static_is_lock_free::value == 2 && + details::static_is_lock_free::value == 2 && + details::static_is_lock_free::value == 2 && + details::static_is_lock_free::thread_id_numeric_size_t>::value == 2; + } + + + private: + friend struct ProducerToken; + friend struct ConsumerToken; + struct ExplicitProducer; + friend struct ExplicitProducer; + struct ImplicitProducer; + friend struct ImplicitProducer; + friend class ConcurrentQueueTests; + + enum AllocationMode { CanAlloc, CannotAlloc }; + + + /////////////////////////////// + // Queue methods + /////////////////////////////// + + template + inline bool inner_enqueue(producer_token_t const &token, U &&element) { + return static_cast(token.producer) + ->ConcurrentQueue::ExplicitProducer::template enqueue( + std::forward(element)); + } + + template + inline bool inner_enqueue(U &&element) { + auto producer = get_or_add_implicit_producer(); + return producer == nullptr + ? false + : producer->ConcurrentQueue::ImplicitProducer::template enqueue< + canAlloc>(std::forward(element)); + } + + template + inline bool inner_enqueue_bulk(producer_token_t const &token, It itemFirst, + size_t count) { + return static_cast(token.producer) + ->ConcurrentQueue::ExplicitProducer::template enqueue_bulk( + itemFirst, count); + } + + template + inline bool inner_enqueue_bulk(It itemFirst, size_t count) { + auto producer = get_or_add_implicit_producer(); + return producer == nullptr + ? false + : producer->ConcurrentQueue::ImplicitProducer:: + template enqueue_bulk(itemFirst, count); + } + + inline bool update_current_producer_after_rotation(consumer_token_t &token) { + // Ah, there's been a rotation, figure out where we should be! + auto tail = producerListTail.load(std::memory_order_acquire); + if (token.desiredProducer == nullptr && tail == nullptr) { + return false; + } + auto prodCount = producerCount.load(std::memory_order_relaxed); + auto globalOffset = + globalExplicitConsumerOffset.load(std::memory_order_relaxed); + if ((details::unlikely)(token.desiredProducer == nullptr)) { + // Aha, first time we're dequeueing anything. + // Figure out our local position + // Note: offset is from start, not end, but we're traversing from end -- + // subtract from count first + std::uint32_t offset = prodCount - 1 - (token.initialOffset % prodCount); + token.desiredProducer = tail; + for (std::uint32_t i = 0; i != offset; ++i) { + token.desiredProducer = + static_cast(token.desiredProducer)->next_prod(); + if (token.desiredProducer == nullptr) { + token.desiredProducer = tail; + } + } + } + + std::uint32_t delta = globalOffset - token.lastKnownGlobalOffset; + if (delta >= prodCount) { + delta = delta % prodCount; + } + for (std::uint32_t i = 0; i != delta; ++i) { + token.desiredProducer = + static_cast(token.desiredProducer)->next_prod(); + if (token.desiredProducer == nullptr) { + token.desiredProducer = tail; + } + } + + token.lastKnownGlobalOffset = globalOffset; + token.currentProducer = token.desiredProducer; + token.itemsConsumedFromCurrent = 0; + return true; + } + + + /////////////////////////// + // Free list + /////////////////////////// + + template + struct FreeListNode { + FreeListNode() : freeListRefs(0), freeListNext(nullptr) {} + + std::atomic freeListRefs; + std::atomic freeListNext; + }; + + // A simple CAS-based lock-free free list. Not the fastest thing in the world + // under heavy contention, but simple and correct (assuming nodes are never + // freed until after the free list is destroyed), and fairly speedy under low + // contention. + template // N must inherit FreeListNode or have the same fields + // (and initialization of them) + struct FreeList { + FreeList() : freeListHead(nullptr) {} + FreeList(FreeList &&other) + : freeListHead(other.freeListHead.load(std::memory_order_relaxed)) { + other.freeListHead.store(nullptr, std::memory_order_relaxed); + } + void swap(FreeList &other) { + details::swap_relaxed(freeListHead, other.freeListHead); + } + + FreeList(FreeList const &) MOODYCAMEL_DELETE_FUNCTION; + FreeList &operator=(FreeList const &) MOODYCAMEL_DELETE_FUNCTION; + + inline void add(N *node) { +#ifdef MCDBGQ_NOLOCKFREE_FREELIST + debug::DebugLock lock(mutex); +#endif + // We know that the should-be-on-freelist bit is 0 at this point, so it's + // safe to set it using a fetch_add + if (node->freeListRefs.fetch_add(SHOULD_BE_ON_FREELIST, + std::memory_order_acq_rel) == 0) { + // Oh look! We were the last ones referencing this node, and we know + // we want to add it to the free list, so let's do it! + add_knowing_refcount_is_zero(node); + } + } + + inline N *try_get() { +#ifdef MCDBGQ_NOLOCKFREE_FREELIST + debug::DebugLock lock(mutex); +#endif + auto head = freeListHead.load(std::memory_order_acquire); + while (head != nullptr) { + auto prevHead = head; + auto refs = head->freeListRefs.load(std::memory_order_relaxed); + if ((refs & REFS_MASK) == 0 || + !head->freeListRefs.compare_exchange_strong( + refs, refs + 1, std::memory_order_acquire)) { + head = freeListHead.load(std::memory_order_acquire); + continue; + } + + // Good, reference count has been incremented (it wasn't at zero), which + // means we can read the next and not worry about it changing between + // now and the time we do the CAS + auto next = head->freeListNext.load(std::memory_order_relaxed); + if (freeListHead.compare_exchange_strong(head, next, + std::memory_order_acquire, + std::memory_order_relaxed)) { + // Yay, got the node. This means it was on the list, which means + // shouldBeOnFreeList must be false no matter the refcount (because + // nobody else knows it's been taken off yet, it can't have been put + // back on). + assert((head->freeListRefs.load(std::memory_order_relaxed) & + SHOULD_BE_ON_FREELIST) == 0); + + // Decrease refcount twice, once for our ref, and once for the list's + // ref + head->freeListRefs.fetch_sub(2, std::memory_order_release); + return head; + } + + // OK, the head must have changed on us, but we still need to decrease + // the refcount we increased. Note that we don't need to release any + // memory effects, but we do need to ensure that the reference count + // decrement happens-after the CAS on the head. + refs = prevHead->freeListRefs.fetch_sub(1, std::memory_order_acq_rel); + if (refs == SHOULD_BE_ON_FREELIST + 1) { + add_knowing_refcount_is_zero(prevHead); + } + } + + return nullptr; + } + + // Useful for traversing the list when there's no contention (e.g. to + // destroy remaining nodes) + N *head_unsafe() const { + return freeListHead.load(std::memory_order_relaxed); + } + + private: + inline void add_knowing_refcount_is_zero(N *node) { + // Since the refcount is zero, and nobody can increase it once it's zero + // (except us, and we run only one copy of this method per node at a time, + // i.e. the single thread case), then we know we can safely change the + // next pointer of the node; however, once the refcount is back above + // zero, then other threads could increase it (happens under heavy + // contention, when the refcount goes to zero in between a load and a + // refcount increment of a node in try_get, then back up to something + // non-zero, then the refcount increment is done by the other thread) -- + // so, if the CAS to add the node to the actual list fails, decrease the + // refcount and leave the add operation to the next thread who puts the + // refcount back at zero (which could be us, hence the loop). + auto head = freeListHead.load(std::memory_order_relaxed); + while (true) { + node->freeListNext.store(head, std::memory_order_relaxed); + node->freeListRefs.store(1, std::memory_order_release); + if (!freeListHead.compare_exchange_strong(head, node, + std::memory_order_release, + std::memory_order_relaxed)) { + // Hmm, the add failed, but we can only try again when the refcount + // goes back to zero + if (node->freeListRefs.fetch_add(SHOULD_BE_ON_FREELIST - 1, + std::memory_order_acq_rel) == 1) { + continue; + } + } + return; + } + } + + private: + // Implemented like a stack, but where node order doesn't matter (nodes are + // inserted out of order under contention) + std::atomic freeListHead; + + static const std::uint32_t REFS_MASK = 0x7FFFFFFF; + static const std::uint32_t SHOULD_BE_ON_FREELIST = 0x80000000; + +#ifdef MCDBGQ_NOLOCKFREE_FREELIST + debug::DebugMutex mutex; +#endif + }; + + + /////////////////////////// + // Block + /////////////////////////// + + enum InnerQueueContext { implicit_context = 0, explicit_context = 1 }; + + struct Block { + Block() + : next(nullptr), + elementsCompletelyDequeued(0), + freeListRefs(0), + freeListNext(nullptr), + dynamicallyAllocated(true) { +#ifdef MCDBGQ_TRACKMEM + owner = nullptr; +#endif + } + + template + inline bool is_empty() const { + MOODYCAMEL_CONSTEXPR_IF(context == explicit_context && + BLOCK_SIZE <= + EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) { + // Check flags + for (size_t i = 0; i < BLOCK_SIZE; ++i) { + if (!emptyFlags[i].load(std::memory_order_relaxed)) { + return false; + } + } + + // Aha, empty; make sure we have all other memory effects that happened + // before the empty flags were set + std::atomic_thread_fence(std::memory_order_acquire); + return true; + } + else { + // Check counter + if (elementsCompletelyDequeued.load(std::memory_order_relaxed) == + BLOCK_SIZE) { + std::atomic_thread_fence(std::memory_order_acquire); + return true; + } + assert(elementsCompletelyDequeued.load(std::memory_order_relaxed) <= + BLOCK_SIZE); + return false; + } + } + + // Returns true if the block is now empty (does not apply in explicit + // context) + template + inline bool set_empty(MOODYCAMEL_MAYBE_UNUSED index_t i) { + MOODYCAMEL_CONSTEXPR_IF(context == explicit_context && + BLOCK_SIZE <= + EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) { + // Set flag + assert(!emptyFlags[BLOCK_SIZE - 1 - + static_cast( + i & static_cast(BLOCK_SIZE - 1))] + .load(std::memory_order_relaxed)); + emptyFlags[BLOCK_SIZE - 1 - + static_cast(i & + static_cast(BLOCK_SIZE - 1))] + .store(true, std::memory_order_release); + return false; + } + else { + // Increment counter + auto prevVal = + elementsCompletelyDequeued.fetch_add(1, std::memory_order_acq_rel); + assert(prevVal < BLOCK_SIZE); + return prevVal == BLOCK_SIZE - 1; + } + } + + // Sets multiple contiguous item statuses to 'empty' (assumes no wrapping + // and count > 0). Returns true if the block is now empty (does not apply in + // explicit context). + template + inline bool set_many_empty(MOODYCAMEL_MAYBE_UNUSED index_t i, + size_t count) { + MOODYCAMEL_CONSTEXPR_IF(context == explicit_context && + BLOCK_SIZE <= + EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) { + // Set flags + std::atomic_thread_fence(std::memory_order_release); + i = BLOCK_SIZE - 1 - + static_cast(i & static_cast(BLOCK_SIZE - 1)) - + count + 1; + for (size_t j = 0; j != count; ++j) { + assert(!emptyFlags[i + j].load(std::memory_order_relaxed)); + emptyFlags[i + j].store(true, std::memory_order_relaxed); + } + return false; + } + else { + // Increment counter + auto prevVal = elementsCompletelyDequeued.fetch_add( + count, std::memory_order_acq_rel); + assert(prevVal + count <= BLOCK_SIZE); + return prevVal + count == BLOCK_SIZE; + } + } + + template + inline void set_all_empty() { + MOODYCAMEL_CONSTEXPR_IF(context == explicit_context && + BLOCK_SIZE <= + EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) { + // Set all flags + for (size_t i = 0; i != BLOCK_SIZE; ++i) { + emptyFlags[i].store(true, std::memory_order_relaxed); + } + } + else { + // Reset counter + elementsCompletelyDequeued.store(BLOCK_SIZE, std::memory_order_relaxed); + } + } + + template + inline void reset_empty() { + MOODYCAMEL_CONSTEXPR_IF(context == explicit_context && + BLOCK_SIZE <= + EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) { + // Reset flags + for (size_t i = 0; i != BLOCK_SIZE; ++i) { + emptyFlags[i].store(false, std::memory_order_relaxed); + } + } + else { + // Reset counter + elementsCompletelyDequeued.store(0, std::memory_order_relaxed); + } + } + + inline T *operator[](index_t idx) MOODYCAMEL_NOEXCEPT { + return static_cast(static_cast(elements)) + + static_cast(idx & static_cast(BLOCK_SIZE - 1)); + } + inline T const *operator[](index_t idx) const MOODYCAMEL_NOEXCEPT { + return static_cast(static_cast(elements)) + + static_cast(idx & static_cast(BLOCK_SIZE - 1)); + } + + private: + static_assert(std::alignment_of::value <= sizeof(T), + "The queue does not support types with an alignment greater " + "than their size at this time"); + MOODYCAMEL_ALIGNED_TYPE_LIKE(char[sizeof(T) * BLOCK_SIZE], T) elements; + + public: + Block *next; + std::atomic elementsCompletelyDequeued; + std::atomic emptyFlags + [BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD ? BLOCK_SIZE : 1]; + + public: + std::atomic freeListRefs; + std::atomic freeListNext; + bool dynamicallyAllocated; // Perhaps a better name for this would be + // 'isNotPartOfInitialBlockPool' + +#ifdef MCDBGQ_TRACKMEM + void *owner; +#endif + }; + static_assert(std::alignment_of::value >= std::alignment_of::value, + "Internal error: Blocks must be at least as aligned as the " + "type they are wrapping"); + + +#ifdef MCDBGQ_TRACKMEM + public: + struct MemStats; + + private: +#endif + + /////////////////////////// + // Producer base + /////////////////////////// + + struct ProducerBase : public details::ConcurrentQueueProducerTypelessBase { + ProducerBase(ConcurrentQueue *parent_, bool isExplicit_) + : tailIndex(0), + headIndex(0), + dequeueOptimisticCount(0), + dequeueOvercommit(0), + tailBlock(nullptr), + isExplicit(isExplicit_), + parent(parent_) {} + + virtual ~ProducerBase() {} + + template + inline bool dequeue(U &element) { + if (isExplicit) { + return static_cast(this)->dequeue(element); + } else { + return static_cast(this)->dequeue(element); + } + } + + template + inline size_t dequeue_bulk(It &itemFirst, size_t max) { + if (isExplicit) { + return static_cast(this)->dequeue_bulk(itemFirst, + max); + } else { + return static_cast(this)->dequeue_bulk(itemFirst, + max); + } + } + + inline ProducerBase *next_prod() const { + return static_cast(next); + } + + inline size_t size_approx() const { + auto tail = tailIndex.load(std::memory_order_relaxed); + auto head = headIndex.load(std::memory_order_relaxed); + return details::circular_less_than(head, tail) + ? static_cast(tail - head) + : 0; + } + + inline index_t getTail() const { + return tailIndex.load(std::memory_order_relaxed); + } + + protected: + std::atomic tailIndex; // Where to enqueue to next + std::atomic headIndex; // Where to dequeue from next + + std::atomic dequeueOptimisticCount; + std::atomic dequeueOvercommit; + + Block *tailBlock; + + public: + bool isExplicit; + ConcurrentQueue *parent; + + protected: +#ifdef MCDBGQ_TRACKMEM + friend struct MemStats; +#endif + }; + + + /////////////////////////// + // Explicit queue + /////////////////////////// + + struct ExplicitProducer : public ProducerBase { + explicit ExplicitProducer(ConcurrentQueue *parent_) + : ProducerBase(parent_, true), + blockIndex(nullptr), + pr_blockIndexSlotsUsed(0), + pr_blockIndexSize(EXPLICIT_INITIAL_INDEX_SIZE >> 1), + pr_blockIndexFront(0), + pr_blockIndexEntries(nullptr), + pr_blockIndexRaw(nullptr) { + size_t poolBasedIndexSize = + details::ceil_to_pow_2(parent_->initialBlockPoolSize) >> 1; + if (poolBasedIndexSize > pr_blockIndexSize) { + pr_blockIndexSize = poolBasedIndexSize; + } + + new_block_index(0); // This creates an index with double the number of + // current entries, i.e. EXPLICIT_INITIAL_INDEX_SIZE + } + + ~ExplicitProducer() { + // Destruct any elements not yet dequeued. + // Since we're in the destructor, we can assume all elements + // are either completely dequeued or completely not (no halfways). + if (this->tailBlock != + nullptr) { // Note this means there must be a block index too + // First find the block that's partially dequeued, if any + Block *halfDequeuedBlock = nullptr; + if ((this->headIndex.load(std::memory_order_relaxed) & + static_cast(BLOCK_SIZE - 1)) != 0) { + // The head's not on a block boundary, meaning a block somewhere is + // partially dequeued (or the head block is the tail block and was + // fully dequeued, but the head/tail are still not on a boundary) + size_t i = (pr_blockIndexFront - pr_blockIndexSlotsUsed) & + (pr_blockIndexSize - 1); + while (details::circular_less_than( + pr_blockIndexEntries[i].base + BLOCK_SIZE, + this->headIndex.load(std::memory_order_relaxed))) { + i = (i + 1) & (pr_blockIndexSize - 1); + } + assert(details::circular_less_than( + pr_blockIndexEntries[i].base, + this->headIndex.load(std::memory_order_relaxed))); + halfDequeuedBlock = pr_blockIndexEntries[i].block; + } + + // Start at the head block (note the first line in the loop gives us the + // head from the tail on the first iteration) + auto block = this->tailBlock; + do { + block = block->next; + if (block->ConcurrentQueue::Block::template is_empty< + explicit_context>()) { + continue; + } + + size_t i = 0; // Offset into block + if (block == halfDequeuedBlock) { + i = static_cast( + this->headIndex.load(std::memory_order_relaxed) & + static_cast(BLOCK_SIZE - 1)); + } + + // Walk through all the items in the block; if this is the tail block, + // we need to stop when we reach the tail index + auto lastValidIndex = + (this->tailIndex.load(std::memory_order_relaxed) & + static_cast(BLOCK_SIZE - 1)) == 0 + ? BLOCK_SIZE + : static_cast( + this->tailIndex.load(std::memory_order_relaxed) & + static_cast(BLOCK_SIZE - 1)); + while (i != BLOCK_SIZE && + (block != this->tailBlock || i != lastValidIndex)) { + (*block)[i++]->~T(); + } + } while (block != this->tailBlock); + } + + // Destroy all blocks that we own + if (this->tailBlock != nullptr) { + auto block = this->tailBlock; + do { + auto nextBlock = block->next; + this->parent->add_block_to_free_list(block); + block = nextBlock; + } while (block != this->tailBlock); + } + + // Destroy the block indices + auto header = static_cast(pr_blockIndexRaw); + while (header != nullptr) { + auto prev = static_cast(header->prev); + header->~BlockIndexHeader(); + (Traits::free)(header); + header = prev; + } + } + + template + inline bool enqueue(U &&element) { + index_t currentTailIndex = + this->tailIndex.load(std::memory_order_relaxed); + index_t newTailIndex = 1 + currentTailIndex; + if ((currentTailIndex & static_cast(BLOCK_SIZE - 1)) == 0) { + // We reached the end of a block, start a new one + auto startBlock = this->tailBlock; + auto originalBlockIndexSlotsUsed = pr_blockIndexSlotsUsed; + if (this->tailBlock != nullptr && + this->tailBlock->next->ConcurrentQueue::Block::template is_empty< + explicit_context>()) { + // We can re-use the block ahead of us, it's empty! + this->tailBlock = this->tailBlock->next; + this->tailBlock->ConcurrentQueue::Block::template reset_empty< + explicit_context>(); + + // We'll put the block on the block index (guaranteed to be room since + // we're conceptually removing the last block from it first -- except + // instead of removing then adding, we can just overwrite). Note that + // there must be a valid block index here, since even if allocation + // failed in the ctor, it would have been re-attempted when adding the + // first block to the queue; since there is such a block, a block + // index must have been successfully allocated. + } else { + // Whatever head value we see here is >= the last value we saw here + // (relatively), and <= its current value. Since we have the most + // recent tail, the head must be + // <= to it. + auto head = this->headIndex.load(std::memory_order_relaxed); + assert(!details::circular_less_than(currentTailIndex, head)); + if (!details::circular_less_than( + head, currentTailIndex + BLOCK_SIZE) || + (MAX_SUBQUEUE_SIZE != details::const_numeric_max::value && + (MAX_SUBQUEUE_SIZE == 0 || + MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head))) { + // We can't enqueue in another block because there's not enough + // leeway -- the tail could surpass the head by the time the block + // fills up! (Or we'll exceed the size limit, if the second part of + // the condition was true.) + return false; + } + // We're going to need a new block; check that the block index has + // room + if (pr_blockIndexRaw == nullptr || + pr_blockIndexSlotsUsed == pr_blockIndexSize) { + // Hmm, the circular block index is already full -- we'll need + // to allocate a new index. Note pr_blockIndexRaw can only be + // nullptr if the initial allocation failed in the constructor. + + MOODYCAMEL_CONSTEXPR_IF(allocMode == CannotAlloc) { + return false; + } + else if (!new_block_index(pr_blockIndexSlotsUsed)) { + return false; + } + } + + // Insert a new block in the circular linked list + auto newBlock = + this->parent + ->ConcurrentQueue::template requisition_block(); + if (newBlock == nullptr) { + return false; + } +#ifdef MCDBGQ_TRACKMEM + newBlock->owner = this; +#endif + newBlock->ConcurrentQueue::Block::template reset_empty< + explicit_context>(); + if (this->tailBlock == nullptr) { + newBlock->next = newBlock; + } else { + newBlock->next = this->tailBlock->next; + this->tailBlock->next = newBlock; + } + this->tailBlock = newBlock; + ++pr_blockIndexSlotsUsed; + } + + MOODYCAMEL_CONSTEXPR_IF(!MOODYCAMEL_NOEXCEPT_CTOR( + T, U, + new (static_cast(nullptr)) T(std::forward(element)))) { + // The constructor may throw. We want the element not to appear in the + // queue in that case (without corrupting the queue): + MOODYCAMEL_TRY { + new ((*this->tailBlock)[currentTailIndex]) + T(std::forward(element)); + } + MOODYCAMEL_CATCH(...) { + // Revert change to the current block, but leave the new block + // available for next time + pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed; + this->tailBlock = + startBlock == nullptr ? this->tailBlock : startBlock; + MOODYCAMEL_RETHROW; + } + } + else { + (void)startBlock; + (void)originalBlockIndexSlotsUsed; + } + + // Add block to block index + auto &entry = blockIndex.load(std::memory_order_relaxed) + ->entries[pr_blockIndexFront]; + entry.base = currentTailIndex; + entry.block = this->tailBlock; + blockIndex.load(std::memory_order_relaxed) + ->front.store(pr_blockIndexFront, std::memory_order_release); + pr_blockIndexFront = (pr_blockIndexFront + 1) & (pr_blockIndexSize - 1); + + MOODYCAMEL_CONSTEXPR_IF(!MOODYCAMEL_NOEXCEPT_CTOR( + T, U, + new (static_cast(nullptr)) T(std::forward(element)))) { + this->tailIndex.store(newTailIndex, std::memory_order_release); + return true; + } + } + + // Enqueue + new ((*this->tailBlock)[currentTailIndex]) T(std::forward(element)); + + this->tailIndex.store(newTailIndex, std::memory_order_release); + return true; + } + + template + bool dequeue(U &element) { + auto tail = this->tailIndex.load(std::memory_order_relaxed); + auto overcommit = this->dequeueOvercommit.load(std::memory_order_relaxed); + if (details::circular_less_than( + this->dequeueOptimisticCount.load(std::memory_order_relaxed) - + overcommit, + tail)) { + // Might be something to dequeue, let's give it a try + + // Note that this if is purely for performance purposes in the common + // case when the queue is empty and the values are eventually consistent + // -- we may enter here spuriously. + + // Note that whatever the values of overcommit and tail are, they are + // not going to change (unless we change them) and must be the same + // value at this point (inside the if) as when the if condition was + // evaluated. + + // We insert an acquire fence here to synchronize-with the release upon + // incrementing dequeueOvercommit below. This ensures that whatever the + // value we got loaded into overcommit, the load of dequeueOptisticCount + // in the fetch_add below will result in a value at least as recent as + // that (and therefore at least as large). Note that I believe a + // compiler (signal) fence here would be sufficient due to the nature of + // fetch_add (all read-modify-write operations are guaranteed to work on + // the latest value in the modification order), but unfortunately that + // can't be shown to be correct using only the C++11 standard. See + // http://stackoverflow.com/questions/18223161/what-are-the-c11-memory-ordering-guarantees-in-this-corner-case + std::atomic_thread_fence(std::memory_order_acquire); + + // Increment optimistic counter, then check if it went over the boundary + auto myDequeueCount = this->dequeueOptimisticCount.fetch_add( + 1, std::memory_order_relaxed); + + // Note that since dequeueOvercommit must be <= dequeueOptimisticCount + // (because dequeueOvercommit is only ever incremented after + // dequeueOptimisticCount -- this is enforced in the `else` block + // below), and since we now have a version of dequeueOptimisticCount + // that is at least as recent as overcommit (due to the release upon + // incrementing dequeueOvercommit and the acquire above that + // synchronizes with it), overcommit <= myDequeueCount. However, we + // can't assert this since both dequeueOptimisticCount and + // dequeueOvercommit may (independently) overflow; in such a case, + // though, the logic still holds since the difference between the two is + // maintained. + + // Note that we reload tail here in case it changed; it will be the same + // value as before or greater, since this load is sequenced after + // (happens after) the earlier load above. This is supported by + // read-read coherency (as defined in the standard), explained here: + // http://en.cppreference.com/w/cpp/atomic/memory_order + tail = this->tailIndex.load(std::memory_order_acquire); + if ((details::likely)(details::circular_less_than( + myDequeueCount - overcommit, tail))) { + // Guaranteed to be at least one element to dequeue! + + // Get the index. Note that since there's guaranteed to be at least + // one element, this will never exceed tail. We need to do an + // acquire-release fence here since it's possible that whatever + // condition got us to this point was for an earlier enqueued element + // (that we already see the memory effects for), but that by the time + // we increment somebody else has incremented it, and we need to see + // the memory effects for *that* element, which is in such a case is + // necessarily visible on the thread that incremented it in the first + // place with the more current condition (they must have acquired a + // tail that is at least as recent). + auto index = this->headIndex.fetch_add(1, std::memory_order_acq_rel); + + + // Determine which block the element is in + + auto localBlockIndex = blockIndex.load(std::memory_order_acquire); + auto localBlockIndexHead = + localBlockIndex->front.load(std::memory_order_acquire); + + // We need to be careful here about subtracting and dividing because + // of index wrap-around. When an index wraps, we need to preserve the + // sign of the offset when dividing it by the block size (in order to + // get a correct signed block count offset in all cases): + auto headBase = localBlockIndex->entries[localBlockIndexHead].base; + auto blockBaseIndex = index & ~static_cast(BLOCK_SIZE - 1); + auto offset = static_cast( + static_cast::type>( + blockBaseIndex - headBase) / + static_cast::type>( + BLOCK_SIZE)); + auto block = localBlockIndex + ->entries[(localBlockIndexHead + offset) & + (localBlockIndex->size - 1)] + .block; + + // Dequeue + auto &el = *((*block)[index]); + if (!MOODYCAMEL_NOEXCEPT_ASSIGN(T, T &&, element = std::move(el))) { + // Make sure the element is still fully dequeued and destroyed even + // if the assignment throws + struct Guard { + Block *block; + index_t index; + + ~Guard() { + (*block)[index]->~T(); + block->ConcurrentQueue::Block::template set_empty< + explicit_context>(index); + } + } guard = {block, index}; + + element = std::move(el); // NOLINT + } else { + element = std::move(el); // NOLINT + el.~T(); // NOLINT + block->ConcurrentQueue::Block::template set_empty( + index); + } + + return true; + } else { + // Wasn't anything to dequeue after all; make the effective dequeue + // count eventually consistent + this->dequeueOvercommit.fetch_add( + 1, std::memory_order_release); // Release so that the fetch_add + // on dequeueOptimisticCount is + // guaranteed to happen before + // this write + } + } + + return false; + } + + template + bool MOODYCAMEL_NO_TSAN enqueue_bulk(It itemFirst, size_t count) { + // First, we need to make sure we have enough room to enqueue all of the + // elements; this means pre-allocating blocks and putting them in the + // block index (but only if all the allocations succeeded). + index_t startTailIndex = this->tailIndex.load(std::memory_order_relaxed); + auto startBlock = this->tailBlock; + auto originalBlockIndexFront = pr_blockIndexFront; + auto originalBlockIndexSlotsUsed = pr_blockIndexSlotsUsed; + + Block *firstAllocatedBlock = nullptr; + + // Figure out how many blocks we'll need to allocate, and do so + size_t blockBaseDiff = + ((startTailIndex + count - 1) & + ~static_cast(BLOCK_SIZE - 1)) - + ((startTailIndex - 1) & ~static_cast(BLOCK_SIZE - 1)); + index_t currentTailIndex = + (startTailIndex - 1) & ~static_cast(BLOCK_SIZE - 1); + if (blockBaseDiff > 0) { + // Allocate as many blocks as possible from ahead + while (blockBaseDiff > 0 && this->tailBlock != nullptr && + this->tailBlock->next != firstAllocatedBlock && + this->tailBlock->next->ConcurrentQueue::Block::template is_empty< + explicit_context>()) { + blockBaseDiff -= static_cast(BLOCK_SIZE); + currentTailIndex += static_cast(BLOCK_SIZE); + + this->tailBlock = this->tailBlock->next; + firstAllocatedBlock = firstAllocatedBlock == nullptr + ? this->tailBlock + : firstAllocatedBlock; + + auto &entry = blockIndex.load(std::memory_order_relaxed) + ->entries[pr_blockIndexFront]; + entry.base = currentTailIndex; + entry.block = this->tailBlock; + pr_blockIndexFront = + (pr_blockIndexFront + 1) & (pr_blockIndexSize - 1); + } + + // Now allocate as many blocks as necessary from the block pool + while (blockBaseDiff > 0) { + blockBaseDiff -= static_cast(BLOCK_SIZE); + currentTailIndex += static_cast(BLOCK_SIZE); + + auto head = this->headIndex.load(std::memory_order_relaxed); + assert(!details::circular_less_than(currentTailIndex, head)); + bool full = + !details::circular_less_than( + head, currentTailIndex + BLOCK_SIZE) || + (MAX_SUBQUEUE_SIZE != details::const_numeric_max::value && + (MAX_SUBQUEUE_SIZE == 0 || + MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head)); + if (pr_blockIndexRaw == nullptr || + pr_blockIndexSlotsUsed == pr_blockIndexSize || full) { + MOODYCAMEL_CONSTEXPR_IF(allocMode == CannotAlloc) { + // Failed to allocate, undo changes (but keep injected blocks) + pr_blockIndexFront = originalBlockIndexFront; + pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed; + this->tailBlock = + startBlock == nullptr ? firstAllocatedBlock : startBlock; + return false; + } + else if (full || !new_block_index(originalBlockIndexSlotsUsed)) { + // Failed to allocate, undo changes (but keep injected blocks) + pr_blockIndexFront = originalBlockIndexFront; + pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed; + this->tailBlock = + startBlock == nullptr ? firstAllocatedBlock : startBlock; + return false; + } + + // pr_blockIndexFront is updated inside new_block_index, so we need + // to update our fallback value too (since we keep the new index + // even if we later fail) + originalBlockIndexFront = originalBlockIndexSlotsUsed; + } + + // Insert a new block in the circular linked list + auto newBlock = + this->parent + ->ConcurrentQueue::template requisition_block(); + if (newBlock == nullptr) { + pr_blockIndexFront = originalBlockIndexFront; + pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed; + this->tailBlock = + startBlock == nullptr ? firstAllocatedBlock : startBlock; + return false; + } + +#ifdef MCDBGQ_TRACKMEM + newBlock->owner = this; +#endif + newBlock->ConcurrentQueue::Block::template set_all_empty< + explicit_context>(); + if (this->tailBlock == nullptr) { + newBlock->next = newBlock; + } else { + newBlock->next = this->tailBlock->next; + this->tailBlock->next = newBlock; + } + this->tailBlock = newBlock; + firstAllocatedBlock = firstAllocatedBlock == nullptr + ? this->tailBlock + : firstAllocatedBlock; + + ++pr_blockIndexSlotsUsed; + + auto &entry = blockIndex.load(std::memory_order_relaxed) + ->entries[pr_blockIndexFront]; + entry.base = currentTailIndex; + entry.block = this->tailBlock; + pr_blockIndexFront = + (pr_blockIndexFront + 1) & (pr_blockIndexSize - 1); + } + + // Excellent, all allocations succeeded. Reset each block's emptiness + // before we fill them up, and publish the new block index front + auto block = firstAllocatedBlock; + while (true) { + block->ConcurrentQueue::Block::template reset_empty< + explicit_context>(); + if (block == this->tailBlock) { + break; + } + block = block->next; + } + + MOODYCAMEL_CONSTEXPR_IF(MOODYCAMEL_NOEXCEPT_CTOR( + T, decltype(*itemFirst), + new (static_cast(nullptr)) + T(details::deref_noexcept(itemFirst)))) { + blockIndex.load(std::memory_order_relaxed) + ->front.store((pr_blockIndexFront - 1) & (pr_blockIndexSize - 1), + std::memory_order_release); + } + } + + // Enqueue, one block at a time + index_t newTailIndex = startTailIndex + static_cast(count); + currentTailIndex = startTailIndex; + auto endBlock = this->tailBlock; + this->tailBlock = startBlock; + assert((startTailIndex & static_cast(BLOCK_SIZE - 1)) != 0 || + firstAllocatedBlock != nullptr || count == 0); + if ((startTailIndex & static_cast(BLOCK_SIZE - 1)) == 0 && + firstAllocatedBlock != nullptr) { + this->tailBlock = firstAllocatedBlock; + } + while (true) { + index_t stopIndex = + (currentTailIndex & ~static_cast(BLOCK_SIZE - 1)) + + static_cast(BLOCK_SIZE); + if (details::circular_less_than(newTailIndex, stopIndex)) { + stopIndex = newTailIndex; + } + MOODYCAMEL_CONSTEXPR_IF(MOODYCAMEL_NOEXCEPT_CTOR( + T, decltype(*itemFirst), + new (static_cast(nullptr)) + T(details::deref_noexcept(itemFirst)))) { + while (currentTailIndex != stopIndex) { + new ((*this->tailBlock)[currentTailIndex++]) T(*itemFirst++); + } + } + else { + MOODYCAMEL_TRY { + while (currentTailIndex != stopIndex) { + // Must use copy constructor even if move constructor is available + // because we may have to revert if there's an exception. + // Sorry about the horrible templated next line, but it was the + // only way to disable moving *at compile time*, which is + // important because a type may only define a (noexcept) move + // constructor, and so calls to the cctor will not compile, even + // if they are in an if branch that will never be executed + new ((*this->tailBlock)[currentTailIndex]) T( + details::nomove_if(nullptr)) T(details::deref_noexcept( + itemFirst)))>::eval(*itemFirst)); + ++currentTailIndex; + ++itemFirst; + } + } + MOODYCAMEL_CATCH(...) { + // Oh dear, an exception's been thrown -- destroy the elements that + // were enqueued so far and revert the entire bulk operation (we'll + // keep any allocated blocks in our linked list for later, though). + auto constructedStopIndex = currentTailIndex; + auto lastBlockEnqueued = this->tailBlock; + + pr_blockIndexFront = originalBlockIndexFront; + pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed; + this->tailBlock = + startBlock == nullptr ? firstAllocatedBlock : startBlock; + + if (!details::is_trivially_destructible::value) { + auto block = startBlock; + if ((startTailIndex & static_cast(BLOCK_SIZE - 1)) == + 0) { + block = firstAllocatedBlock; + } + currentTailIndex = startTailIndex; + while (true) { + stopIndex = + (currentTailIndex & ~static_cast(BLOCK_SIZE - 1)) + + static_cast(BLOCK_SIZE); + if (details::circular_less_than(constructedStopIndex, + stopIndex)) { + stopIndex = constructedStopIndex; + } + while (currentTailIndex != stopIndex) { + (*block)[currentTailIndex++]->~T(); + } + if (block == lastBlockEnqueued) { + break; + } + block = block->next; + } + } + MOODYCAMEL_RETHROW; + } + } + + if (this->tailBlock == endBlock) { + assert(currentTailIndex == newTailIndex); + break; + } + this->tailBlock = this->tailBlock->next; + } + + MOODYCAMEL_CONSTEXPR_IF(!MOODYCAMEL_NOEXCEPT_CTOR( + T, decltype(*itemFirst), + new (static_cast(nullptr)) + T(details::deref_noexcept(itemFirst)))) { + if (firstAllocatedBlock != nullptr) + blockIndex.load(std::memory_order_relaxed) + ->front.store((pr_blockIndexFront - 1) & (pr_blockIndexSize - 1), + std::memory_order_release); + } + + this->tailIndex.store(newTailIndex, std::memory_order_release); + return true; + } + + template + size_t dequeue_bulk(It &itemFirst, size_t max) { + auto tail = this->tailIndex.load(std::memory_order_relaxed); + auto overcommit = this->dequeueOvercommit.load(std::memory_order_relaxed); + auto desiredCount = static_cast( + tail - (this->dequeueOptimisticCount.load(std::memory_order_relaxed) - + overcommit)); + if (details::circular_less_than(0, desiredCount)) { + desiredCount = desiredCount < max ? desiredCount : max; + std::atomic_thread_fence(std::memory_order_acquire); + + auto myDequeueCount = this->dequeueOptimisticCount.fetch_add( + desiredCount, std::memory_order_relaxed); + + tail = this->tailIndex.load(std::memory_order_acquire); + auto actualCount = + static_cast(tail - (myDequeueCount - overcommit)); + if (details::circular_less_than(0, actualCount)) { + actualCount = desiredCount < actualCount ? desiredCount : actualCount; + if (actualCount < desiredCount) { + this->dequeueOvercommit.fetch_add(desiredCount - actualCount, + std::memory_order_release); + } + + // Get the first index. Note that since there's guaranteed to be at + // least actualCount elements, this will never exceed tail. + auto firstIndex = + this->headIndex.fetch_add(actualCount, std::memory_order_acq_rel); + + // Determine which block the first element is in + auto localBlockIndex = blockIndex.load(std::memory_order_acquire); + auto localBlockIndexHead = + localBlockIndex->front.load(std::memory_order_acquire); + + auto headBase = localBlockIndex->entries[localBlockIndexHead].base; + auto firstBlockBaseIndex = + firstIndex & ~static_cast(BLOCK_SIZE - 1); + auto offset = static_cast( + static_cast::type>( + firstBlockBaseIndex - headBase) / + static_cast::type>( + BLOCK_SIZE)); + auto indexIndex = + (localBlockIndexHead + offset) & (localBlockIndex->size - 1); + + // Iterate the blocks and dequeue + auto index = firstIndex; + do { + auto firstIndexInBlock = index; + index_t endIndex = (index & ~static_cast(BLOCK_SIZE - 1)) + + static_cast(BLOCK_SIZE); + endIndex = + details::circular_less_than( + firstIndex + static_cast(actualCount), endIndex) + ? firstIndex + static_cast(actualCount) + : endIndex; + auto block = localBlockIndex->entries[indexIndex].block; + if (MOODYCAMEL_NOEXCEPT_ASSIGN(T, T &&, + details::deref_noexcept(itemFirst) = + std::move((*(*block)[index])))) { + while (index != endIndex) { + auto &el = *((*block)[index]); + *itemFirst++ = std::move(el); + el.~T(); + ++index; + } + } else { + MOODYCAMEL_TRY { + while (index != endIndex) { + auto &el = *((*block)[index]); + *itemFirst = std::move(el); + ++itemFirst; + el.~T(); + ++index; + } + } + MOODYCAMEL_CATCH(...) { + // It's too late to revert the dequeue, but we can make sure + // that all the dequeued objects are properly destroyed and the + // block index (and empty count) are properly updated before we + // propagate the exception + do { + block = localBlockIndex->entries[indexIndex].block; + while (index != endIndex) { + (*block)[index++]->~T(); + } + block->ConcurrentQueue::Block::template set_many_empty< + explicit_context>( + firstIndexInBlock, + static_cast(endIndex - firstIndexInBlock)); + indexIndex = (indexIndex + 1) & (localBlockIndex->size - 1); + + firstIndexInBlock = index; + endIndex = (index & ~static_cast(BLOCK_SIZE - 1)) + + static_cast(BLOCK_SIZE); + endIndex = + details::circular_less_than( + firstIndex + static_cast(actualCount), + endIndex) + ? firstIndex + static_cast(actualCount) + : endIndex; + } while (index != firstIndex + actualCount); + + MOODYCAMEL_RETHROW; + } + } + block->ConcurrentQueue::Block::template set_many_empty< + explicit_context>( + firstIndexInBlock, + static_cast(endIndex - firstIndexInBlock)); + indexIndex = (indexIndex + 1) & (localBlockIndex->size - 1); + } while (index != firstIndex + actualCount); + + return actualCount; + } else { + // Wasn't anything to dequeue after all; make the effective dequeue + // count eventually consistent + this->dequeueOvercommit.fetch_add(desiredCount, + std::memory_order_release); + } + } + + return 0; + } + + private: + struct BlockIndexEntry { + index_t base; + Block *block; + }; + + struct BlockIndexHeader { + size_t size; + std::atomic + front; // Current slot (not next, like pr_blockIndexFront) + BlockIndexEntry *entries; + void *prev; + }; + + + bool new_block_index(size_t numberOfFilledSlotsToExpose) { + auto prevBlockSizeMask = pr_blockIndexSize - 1; + + // Create the new block + pr_blockIndexSize <<= 1; + auto newRawPtr = static_cast((Traits::malloc)( + sizeof(BlockIndexHeader) + std::alignment_of::value - + 1 + sizeof(BlockIndexEntry) * pr_blockIndexSize)); + if (newRawPtr == nullptr) { + pr_blockIndexSize >>= 1; // Reset to allow graceful retry + return false; + } + + auto newBlockIndexEntries = reinterpret_cast( + details::align_for(newRawPtr + + sizeof(BlockIndexHeader))); + + // Copy in all the old indices, if any + size_t j = 0; + if (pr_blockIndexSlotsUsed != 0) { + auto i = + (pr_blockIndexFront - pr_blockIndexSlotsUsed) & prevBlockSizeMask; + do { + newBlockIndexEntries[j++] = pr_blockIndexEntries[i]; + i = (i + 1) & prevBlockSizeMask; + } while (i != pr_blockIndexFront); + } + + // Update everything + auto header = new (newRawPtr) BlockIndexHeader; + header->size = pr_blockIndexSize; + header->front.store(numberOfFilledSlotsToExpose - 1, + std::memory_order_relaxed); + header->entries = newBlockIndexEntries; + header->prev = pr_blockIndexRaw; // we link the new block to the old one + // so we can free it later + + pr_blockIndexFront = j; + pr_blockIndexEntries = newBlockIndexEntries; + pr_blockIndexRaw = newRawPtr; + blockIndex.store(header, std::memory_order_release); + + return true; + } + + private: + std::atomic blockIndex; + + // To be used by producer only -- consumer must use the ones in referenced + // by blockIndex + size_t pr_blockIndexSlotsUsed; + size_t pr_blockIndexSize; + size_t pr_blockIndexFront; // Next slot (not current) + BlockIndexEntry *pr_blockIndexEntries; + void *pr_blockIndexRaw; + +#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG + public: + ExplicitProducer *nextExplicitProducer; + + private: +#endif + +#ifdef MCDBGQ_TRACKMEM + friend struct MemStats; +#endif + }; + + + ////////////////////////////////// + // Implicit queue + ////////////////////////////////// + + struct ImplicitProducer : public ProducerBase { + ImplicitProducer(ConcurrentQueue *parent_) + : ProducerBase(parent_, false), + nextBlockIndexCapacity(IMPLICIT_INITIAL_INDEX_SIZE), + blockIndex(nullptr) { + new_block_index(); + } + + ~ImplicitProducer() { + // Note that since we're in the destructor we can assume that all + // enqueue/dequeue operations completed already; this means that all + // undequeued elements are placed contiguously across contiguous blocks, + // and that only the first and last remaining blocks can be only partially + // empty (all other remaining blocks must be completely full). + +#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED + // Unregister ourselves for thread termination notification + if (!this->inactive.load(std::memory_order_relaxed)) { + details::ThreadExitNotifier::unsubscribe(&threadExitListener); + } +#endif + + // Destroy all remaining elements! + auto tail = this->tailIndex.load(std::memory_order_relaxed); + auto index = this->headIndex.load(std::memory_order_relaxed); + Block *block = nullptr; + assert(index == tail || details::circular_less_than(index, tail)); + bool forceFreeLastBlock = + index != tail; // If we enter the loop, then the last (tail) block + // will not be freed + while (index != tail) { + if ((index & static_cast(BLOCK_SIZE - 1)) == 0 || + block == nullptr) { + if (block != nullptr) { + // Free the old block + this->parent->add_block_to_free_list(block); + } + + block = get_block_index_entry_for_index(index)->value.load( + std::memory_order_relaxed); + } + + ((*block)[index])->~T(); + ++index; + } + // Even if the queue is empty, there's still one block that's not on the + // free list (unless the head index reached the end of it, in which case + // the tail will be poised to create a new block). + if (this->tailBlock != nullptr && + (forceFreeLastBlock || + (tail & static_cast(BLOCK_SIZE - 1)) != 0)) { + this->parent->add_block_to_free_list(this->tailBlock); + } + + // Destroy block index + auto localBlockIndex = blockIndex.load(std::memory_order_relaxed); + if (localBlockIndex != nullptr) { + for (size_t i = 0; i != localBlockIndex->capacity; ++i) { + localBlockIndex->index[i]->~BlockIndexEntry(); + } + do { + auto prev = localBlockIndex->prev; + localBlockIndex->~BlockIndexHeader(); + (Traits::free)(localBlockIndex); + localBlockIndex = prev; + } while (localBlockIndex != nullptr); + } + } + + template + inline bool enqueue(U &&element) { + index_t currentTailIndex = + this->tailIndex.load(std::memory_order_relaxed); + index_t newTailIndex = 1 + currentTailIndex; + if ((currentTailIndex & static_cast(BLOCK_SIZE - 1)) == 0) { + // We reached the end of a block, start a new one + auto head = this->headIndex.load(std::memory_order_relaxed); + assert(!details::circular_less_than(currentTailIndex, head)); + if (!details::circular_less_than( + head, currentTailIndex + BLOCK_SIZE) || + (MAX_SUBQUEUE_SIZE != details::const_numeric_max::value && + (MAX_SUBQUEUE_SIZE == 0 || + MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head))) { + return false; + } +#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX + debug::DebugLock lock(mutex); +#endif + // Find out where we'll be inserting this block in the block index + BlockIndexEntry *idxEntry; + if (!insert_block_index_entry(idxEntry, currentTailIndex)) { + return false; + } + + // Get ahold of a new block + auto newBlock = + this->parent + ->ConcurrentQueue::template requisition_block(); + if (newBlock == nullptr) { + rewind_block_index_tail(); + idxEntry->value.store(nullptr, std::memory_order_relaxed); + return false; + } +#ifdef MCDBGQ_TRACKMEM + newBlock->owner = this; +#endif + newBlock + ->ConcurrentQueue::Block::template reset_empty(); + + MOODYCAMEL_CONSTEXPR_IF(!MOODYCAMEL_NOEXCEPT_CTOR( + T, U, + new (static_cast(nullptr)) T(std::forward(element)))) { + // May throw, try to insert now before we publish the fact that we + // have this new block + MOODYCAMEL_TRY { + new ((*newBlock)[currentTailIndex]) T(std::forward(element)); + } + MOODYCAMEL_CATCH(...) { + rewind_block_index_tail(); + idxEntry->value.store(nullptr, std::memory_order_relaxed); + this->parent->add_block_to_free_list(newBlock); + MOODYCAMEL_RETHROW; + } + } + + // Insert the new block into the index + idxEntry->value.store(newBlock, std::memory_order_relaxed); + + this->tailBlock = newBlock; + + MOODYCAMEL_CONSTEXPR_IF(!MOODYCAMEL_NOEXCEPT_CTOR( + T, U, + new (static_cast(nullptr)) T(std::forward(element)))) { + this->tailIndex.store(newTailIndex, std::memory_order_release); + return true; + } + } + + // Enqueue + new ((*this->tailBlock)[currentTailIndex]) T(std::forward(element)); + + this->tailIndex.store(newTailIndex, std::memory_order_release); + return true; + } + + template + bool dequeue(U &element) { + // See ExplicitProducer::dequeue for rationale and explanation + index_t tail = this->tailIndex.load(std::memory_order_relaxed); + index_t overcommit = + this->dequeueOvercommit.load(std::memory_order_relaxed); + if (details::circular_less_than( + this->dequeueOptimisticCount.load(std::memory_order_relaxed) - + overcommit, + tail)) { + std::atomic_thread_fence(std::memory_order_acquire); + + index_t myDequeueCount = this->dequeueOptimisticCount.fetch_add( + 1, std::memory_order_relaxed); + tail = this->tailIndex.load(std::memory_order_acquire); + if ((details::likely)(details::circular_less_than( + myDequeueCount - overcommit, tail))) { + index_t index = + this->headIndex.fetch_add(1, std::memory_order_acq_rel); + + // Determine which block the element is in + auto entry = get_block_index_entry_for_index(index); + + // Dequeue + auto block = entry->value.load(std::memory_order_relaxed); + auto &el = *((*block)[index]); + + if (!MOODYCAMEL_NOEXCEPT_ASSIGN(T, T &&, element = std::move(el))) { +#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX + // Note: Acquiring the mutex with every dequeue instead of only when + // a block is released is very sub-optimal, but it is, after all, + // purely debug code. + debug::DebugLock lock(producer->mutex); +#endif + struct Guard { + Block *block; + index_t index; + BlockIndexEntry *entry; + ConcurrentQueue *parent; + + ~Guard() { + (*block)[index]->~T(); + if (block->ConcurrentQueue::Block::template set_empty< + implicit_context>(index)) { + entry->value.store(nullptr, std::memory_order_relaxed); + parent->add_block_to_free_list(block); + } + } + } guard = {block, index, entry, this->parent}; + + element = std::move(el); // NOLINT + } else { + element = std::move(el); // NOLINT + el.~T(); // NOLINT + + if (block->ConcurrentQueue::Block::template set_empty< + implicit_context>(index)) { + { +#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX + debug::DebugLock lock(mutex); +#endif + // Add the block back into the global free pool (and remove from + // block index) + entry->value.store(nullptr, std::memory_order_relaxed); + } + this->parent->add_block_to_free_list( + block); // releases the above store + } + } + + return true; + } else { + this->dequeueOvercommit.fetch_add(1, std::memory_order_release); + } + } + + return false; + } + +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable : 4706) // assignment within conditional expression +#endif + template + bool enqueue_bulk(It itemFirst, size_t count) { + // First, we need to make sure we have enough room to enqueue all of the + // elements; this means pre-allocating blocks and putting them in the + // block index (but only if all the allocations succeeded). + + // Note that the tailBlock we start off with may not be owned by us any + // more; this happens if it was filled up exactly to the top (setting + // tailIndex to the first index of the next block which is not yet + // allocated), then dequeued completely (putting it on the free list) + // before we enqueue again. + + index_t startTailIndex = this->tailIndex.load(std::memory_order_relaxed); + auto startBlock = this->tailBlock; + Block *firstAllocatedBlock = nullptr; + auto endBlock = this->tailBlock; + + // Figure out how many blocks we'll need to allocate, and do so + size_t blockBaseDiff = + ((startTailIndex + count - 1) & + ~static_cast(BLOCK_SIZE - 1)) - + ((startTailIndex - 1) & ~static_cast(BLOCK_SIZE - 1)); + index_t currentTailIndex = + (startTailIndex - 1) & ~static_cast(BLOCK_SIZE - 1); + if (blockBaseDiff > 0) { +#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX + debug::DebugLock lock(mutex); +#endif + do { + blockBaseDiff -= static_cast(BLOCK_SIZE); + currentTailIndex += static_cast(BLOCK_SIZE); + + // Find out where we'll be inserting this block in the block index + BlockIndexEntry *idxEntry = + nullptr; // initialization here unnecessary but compiler can't + // always tell + Block *newBlock; + bool indexInserted = false; + auto head = this->headIndex.load(std::memory_order_relaxed); + assert(!details::circular_less_than(currentTailIndex, head)); + bool full = + !details::circular_less_than( + head, currentTailIndex + BLOCK_SIZE) || + (MAX_SUBQUEUE_SIZE != details::const_numeric_max::value && + (MAX_SUBQUEUE_SIZE == 0 || + MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head)); + + if (full || + !(indexInserted = insert_block_index_entry( + idxEntry, currentTailIndex)) || + (newBlock = + this->parent->ConcurrentQueue::template requisition_block< + allocMode>()) == nullptr) { + // Index allocation or block allocation failed; revert any other + // allocations and index insertions done so far for this operation + if (indexInserted) { + rewind_block_index_tail(); + idxEntry->value.store(nullptr, std::memory_order_relaxed); + } + currentTailIndex = + (startTailIndex - 1) & ~static_cast(BLOCK_SIZE - 1); + for (auto block = firstAllocatedBlock; block != nullptr; + block = block->next) { + currentTailIndex += static_cast(BLOCK_SIZE); + idxEntry = get_block_index_entry_for_index(currentTailIndex); + idxEntry->value.store(nullptr, std::memory_order_relaxed); + rewind_block_index_tail(); + } + this->parent->add_blocks_to_free_list(firstAllocatedBlock); + this->tailBlock = startBlock; + + return false; + } + +#ifdef MCDBGQ_TRACKMEM + newBlock->owner = this; +#endif + newBlock->ConcurrentQueue::Block::template reset_empty< + implicit_context>(); + newBlock->next = nullptr; + + // Insert the new block into the index + idxEntry->value.store(newBlock, std::memory_order_relaxed); + + // Store the chain of blocks so that we can undo if later allocations + // fail, and so that we can find the blocks when we do the actual + // enqueueing + if ((startTailIndex & static_cast(BLOCK_SIZE - 1)) != 0 || + firstAllocatedBlock != nullptr) { + assert(this->tailBlock != nullptr); + this->tailBlock->next = newBlock; + } + this->tailBlock = newBlock; + endBlock = newBlock; + firstAllocatedBlock = + firstAllocatedBlock == nullptr ? newBlock : firstAllocatedBlock; + } while (blockBaseDiff > 0); + } + + // Enqueue, one block at a time + index_t newTailIndex = startTailIndex + static_cast(count); + currentTailIndex = startTailIndex; + this->tailBlock = startBlock; + assert((startTailIndex & static_cast(BLOCK_SIZE - 1)) != 0 || + firstAllocatedBlock != nullptr || count == 0); + if ((startTailIndex & static_cast(BLOCK_SIZE - 1)) == 0 && + firstAllocatedBlock != nullptr) { + this->tailBlock = firstAllocatedBlock; + } + while (true) { + index_t stopIndex = + (currentTailIndex & ~static_cast(BLOCK_SIZE - 1)) + + static_cast(BLOCK_SIZE); + if (details::circular_less_than(newTailIndex, stopIndex)) { + stopIndex = newTailIndex; + } + MOODYCAMEL_CONSTEXPR_IF(MOODYCAMEL_NOEXCEPT_CTOR( + T, decltype(*itemFirst), + new (static_cast(nullptr)) + T(details::deref_noexcept(itemFirst)))) { + while (currentTailIndex != stopIndex) { + new ((*this->tailBlock)[currentTailIndex++]) T(*itemFirst++); + } + } + else { + MOODYCAMEL_TRY { + while (currentTailIndex != stopIndex) { + new ((*this->tailBlock)[currentTailIndex]) T( + details::nomove_if(nullptr)) T(details::deref_noexcept( + itemFirst)))>::eval(*itemFirst)); + ++currentTailIndex; + ++itemFirst; + } + } + MOODYCAMEL_CATCH(...) { + auto constructedStopIndex = currentTailIndex; + auto lastBlockEnqueued = this->tailBlock; + + if (!details::is_trivially_destructible::value) { + auto block = startBlock; + if ((startTailIndex & static_cast(BLOCK_SIZE - 1)) == + 0) { + block = firstAllocatedBlock; + } + currentTailIndex = startTailIndex; + while (true) { + stopIndex = + (currentTailIndex & ~static_cast(BLOCK_SIZE - 1)) + + static_cast(BLOCK_SIZE); + if (details::circular_less_than(constructedStopIndex, + stopIndex)) { + stopIndex = constructedStopIndex; + } + while (currentTailIndex != stopIndex) { + (*block)[currentTailIndex++]->~T(); + } + if (block == lastBlockEnqueued) { + break; + } + block = block->next; + } + } + + currentTailIndex = + (startTailIndex - 1) & ~static_cast(BLOCK_SIZE - 1); + for (auto block = firstAllocatedBlock; block != nullptr; + block = block->next) { + currentTailIndex += static_cast(BLOCK_SIZE); + auto idxEntry = get_block_index_entry_for_index(currentTailIndex); + idxEntry->value.store(nullptr, std::memory_order_relaxed); + rewind_block_index_tail(); + } + this->parent->add_blocks_to_free_list(firstAllocatedBlock); + this->tailBlock = startBlock; + MOODYCAMEL_RETHROW; + } + } + + if (this->tailBlock == endBlock) { + assert(currentTailIndex == newTailIndex); + break; + } + this->tailBlock = this->tailBlock->next; + } + this->tailIndex.store(newTailIndex, std::memory_order_release); + return true; + } +#ifdef _MSC_VER +#pragma warning(pop) +#endif + + template + size_t dequeue_bulk(It &itemFirst, size_t max) { + auto tail = this->tailIndex.load(std::memory_order_relaxed); + auto overcommit = this->dequeueOvercommit.load(std::memory_order_relaxed); + auto desiredCount = static_cast( + tail - (this->dequeueOptimisticCount.load(std::memory_order_relaxed) - + overcommit)); + if (details::circular_less_than(0, desiredCount)) { + desiredCount = desiredCount < max ? desiredCount : max; + std::atomic_thread_fence(std::memory_order_acquire); + + auto myDequeueCount = this->dequeueOptimisticCount.fetch_add( + desiredCount, std::memory_order_relaxed); + + tail = this->tailIndex.load(std::memory_order_acquire); + auto actualCount = + static_cast(tail - (myDequeueCount - overcommit)); + if (details::circular_less_than(0, actualCount)) { + actualCount = desiredCount < actualCount ? desiredCount : actualCount; + if (actualCount < desiredCount) { + this->dequeueOvercommit.fetch_add(desiredCount - actualCount, + std::memory_order_release); + } + + // Get the first index. Note that since there's guaranteed to be at + // least actualCount elements, this will never exceed tail. + auto firstIndex = + this->headIndex.fetch_add(actualCount, std::memory_order_acq_rel); + + // Iterate the blocks and dequeue + auto index = firstIndex; + BlockIndexHeader *localBlockIndex; + auto indexIndex = + get_block_index_index_for_index(index, localBlockIndex); + do { + auto blockStartIndex = index; + index_t endIndex = (index & ~static_cast(BLOCK_SIZE - 1)) + + static_cast(BLOCK_SIZE); + endIndex = + details::circular_less_than( + firstIndex + static_cast(actualCount), endIndex) + ? firstIndex + static_cast(actualCount) + : endIndex; + + auto entry = localBlockIndex->index[indexIndex]; + auto block = entry->value.load(std::memory_order_relaxed); + if (MOODYCAMEL_NOEXCEPT_ASSIGN(T, T &&, + details::deref_noexcept(itemFirst) = + std::move((*(*block)[index])))) { + while (index != endIndex) { + auto &el = *((*block)[index]); + *itemFirst++ = std::move(el); + el.~T(); + ++index; + } + } else { + MOODYCAMEL_TRY { + while (index != endIndex) { + auto &el = *((*block)[index]); + *itemFirst = std::move(el); + ++itemFirst; + el.~T(); + ++index; + } + } + MOODYCAMEL_CATCH(...) { + do { + entry = localBlockIndex->index[indexIndex]; + block = entry->value.load(std::memory_order_relaxed); + while (index != endIndex) { + (*block)[index++]->~T(); + } + + if (block->ConcurrentQueue::Block::template set_many_empty< + implicit_context>( + blockStartIndex, + static_cast(endIndex - blockStartIndex))) { +#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX + debug::DebugLock lock(mutex); +#endif + entry->value.store(nullptr, std::memory_order_relaxed); + this->parent->add_block_to_free_list(block); + } + indexIndex = + (indexIndex + 1) & (localBlockIndex->capacity - 1); + + blockStartIndex = index; + endIndex = (index & ~static_cast(BLOCK_SIZE - 1)) + + static_cast(BLOCK_SIZE); + endIndex = + details::circular_less_than( + firstIndex + static_cast(actualCount), + endIndex) + ? firstIndex + static_cast(actualCount) + : endIndex; + } while (index != firstIndex + actualCount); + + MOODYCAMEL_RETHROW; + } + } + if (block->ConcurrentQueue::Block::template set_many_empty< + implicit_context>( + blockStartIndex, + static_cast(endIndex - blockStartIndex))) { + { +#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX + debug::DebugLock lock(mutex); +#endif + // Note that the set_many_empty above did a release, meaning + // that anybody who acquires the block we're about to free can + // use it safely since our writes (and reads!) will have + // happened-before then. + entry->value.store(nullptr, std::memory_order_relaxed); + } + this->parent->add_block_to_free_list( + block); // releases the above store + } + indexIndex = (indexIndex + 1) & (localBlockIndex->capacity - 1); + } while (index != firstIndex + actualCount); + + return actualCount; + } else { + this->dequeueOvercommit.fetch_add(desiredCount, + std::memory_order_release); + } + } + + return 0; + } + + private: + // The block size must be > 1, so any number with the low bit set is an + // invalid block base index + static const index_t INVALID_BLOCK_BASE = 1; + + struct BlockIndexEntry { + std::atomic key; + std::atomic value; + }; + + struct BlockIndexHeader { + size_t capacity; + std::atomic tail; + BlockIndexEntry *entries; + BlockIndexEntry **index; + BlockIndexHeader *prev; + }; + + template + inline bool insert_block_index_entry(BlockIndexEntry *&idxEntry, + index_t blockStartIndex) { + auto localBlockIndex = + blockIndex.load(std::memory_order_relaxed); // We're the only writer + // thread, relaxed is OK + if (localBlockIndex == nullptr) { + return false; // this can happen if new_block_index failed in the + // constructor + } + size_t newTail = + (localBlockIndex->tail.load(std::memory_order_relaxed) + 1) & + (localBlockIndex->capacity - 1); + idxEntry = localBlockIndex->index[newTail]; + if (idxEntry->key.load(std::memory_order_relaxed) == INVALID_BLOCK_BASE || + idxEntry->value.load(std::memory_order_relaxed) == nullptr) { + idxEntry->key.store(blockStartIndex, std::memory_order_relaxed); + localBlockIndex->tail.store(newTail, std::memory_order_release); + return true; + } + + // No room in the old block index, try to allocate another one! + MOODYCAMEL_CONSTEXPR_IF(allocMode == CannotAlloc) { + return false; + } + else if (!new_block_index()) { + return false; + } + else { + localBlockIndex = blockIndex.load(std::memory_order_relaxed); + newTail = (localBlockIndex->tail.load(std::memory_order_relaxed) + 1) & + (localBlockIndex->capacity - 1); + idxEntry = localBlockIndex->index[newTail]; + assert(idxEntry->key.load(std::memory_order_relaxed) == + INVALID_BLOCK_BASE); + idxEntry->key.store(blockStartIndex, std::memory_order_relaxed); + localBlockIndex->tail.store(newTail, std::memory_order_release); + return true; + } + } + + inline void rewind_block_index_tail() { + auto localBlockIndex = blockIndex.load(std::memory_order_relaxed); + localBlockIndex->tail.store( + (localBlockIndex->tail.load(std::memory_order_relaxed) - 1) & + (localBlockIndex->capacity - 1), + std::memory_order_relaxed); + } + + inline BlockIndexEntry *get_block_index_entry_for_index( + index_t index) const { + BlockIndexHeader *localBlockIndex; + auto idx = get_block_index_index_for_index(index, localBlockIndex); + return localBlockIndex->index[idx]; + } + + inline size_t get_block_index_index_for_index( + index_t index, BlockIndexHeader *&localBlockIndex) const { +#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX + debug::DebugLock lock(mutex); +#endif + index &= ~static_cast(BLOCK_SIZE - 1); + localBlockIndex = blockIndex.load(std::memory_order_acquire); + auto tail = localBlockIndex->tail.load(std::memory_order_acquire); + auto tailBase = + localBlockIndex->index[tail]->key.load(std::memory_order_relaxed); + assert(tailBase != INVALID_BLOCK_BASE); + // Note: Must use division instead of shift because the index may wrap + // around, causing a negative offset, whose negativity we want to preserve + auto offset = static_cast( + static_cast::type>(index - + tailBase) / + static_cast::type>(BLOCK_SIZE)); + size_t idx = (tail + offset) & (localBlockIndex->capacity - 1); + assert(localBlockIndex->index[idx]->key.load(std::memory_order_relaxed) == + index && + localBlockIndex->index[idx]->value.load( + std::memory_order_relaxed) != nullptr); + return idx; + } + + bool new_block_index() { + auto prev = blockIndex.load(std::memory_order_relaxed); + size_t prevCapacity = prev == nullptr ? 0 : prev->capacity; + auto entryCount = prev == nullptr ? nextBlockIndexCapacity : prevCapacity; + auto raw = static_cast((Traits::malloc)( + sizeof(BlockIndexHeader) + std::alignment_of::value - + 1 + sizeof(BlockIndexEntry) * entryCount + + std::alignment_of::value - 1 + + sizeof(BlockIndexEntry *) * nextBlockIndexCapacity)); + if (raw == nullptr) { + return false; + } + + auto header = new (raw) BlockIndexHeader; + auto entries = reinterpret_cast( + details::align_for(raw + sizeof(BlockIndexHeader))); + auto index = reinterpret_cast( + details::align_for( + reinterpret_cast(entries) + + sizeof(BlockIndexEntry) * entryCount)); + if (prev != nullptr) { + auto prevTail = prev->tail.load(std::memory_order_relaxed); + auto prevPos = prevTail; + size_t i = 0; + do { + prevPos = (prevPos + 1) & (prev->capacity - 1); + index[i++] = prev->index[prevPos]; + } while (prevPos != prevTail); + assert(i == prevCapacity); + } + for (size_t i = 0; i != entryCount; ++i) { + new (entries + i) BlockIndexEntry; + entries[i].key.store(INVALID_BLOCK_BASE, std::memory_order_relaxed); + index[prevCapacity + i] = entries + i; + } + header->prev = prev; + header->entries = entries; + header->index = index; + header->capacity = nextBlockIndexCapacity; + header->tail.store((prevCapacity - 1) & (nextBlockIndexCapacity - 1), + std::memory_order_relaxed); + + blockIndex.store(header, std::memory_order_release); + + nextBlockIndexCapacity <<= 1; + + return true; + } + + private: + size_t nextBlockIndexCapacity; + std::atomic blockIndex; + +#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED + public: + details::ThreadExitListener threadExitListener; + + private: +#endif + +#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG + public: + ImplicitProducer *nextImplicitProducer; + + private: +#endif + +#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX + mutable debug::DebugMutex mutex; +#endif +#ifdef MCDBGQ_TRACKMEM + friend struct MemStats; +#endif + }; + + + ////////////////////////////////// + // Block pool manipulation + ////////////////////////////////// + + void populate_initial_block_list(size_t blockCount) { + initialBlockPoolSize = blockCount; + if (initialBlockPoolSize == 0) { + initialBlockPool = nullptr; + return; + } + + initialBlockPool = create_array(blockCount); + if (initialBlockPool == nullptr) { + initialBlockPoolSize = 0; + } + for (size_t i = 0; i < initialBlockPoolSize; ++i) { + initialBlockPool[i].dynamicallyAllocated = false; + } + } + + inline Block *try_get_block_from_initial_pool() { + if (initialBlockPoolIndex.load(std::memory_order_relaxed) >= + initialBlockPoolSize) { + return nullptr; + } + + auto index = initialBlockPoolIndex.fetch_add(1, std::memory_order_relaxed); + + return index < initialBlockPoolSize ? (initialBlockPool + index) : nullptr; + } + + inline void add_block_to_free_list(Block *block) { +#ifdef MCDBGQ_TRACKMEM + block->owner = nullptr; +#endif + if (!Traits::RECYCLE_ALLOCATED_BLOCKS && block->dynamicallyAllocated) { + destroy(block); + } else { + freeList.add(block); + } + } + + inline void add_blocks_to_free_list(Block *block) { + while (block != nullptr) { + auto next = block->next; + add_block_to_free_list(block); + block = next; + } + } + + inline Block *try_get_block_from_free_list() { + return freeList.try_get(); + } + + // Gets a free block from one of the memory pools, or allocates a new one (if + // applicable) + template + Block *requisition_block() { + auto block = try_get_block_from_initial_pool(); + if (block != nullptr) { + return block; + } + + block = try_get_block_from_free_list(); + if (block != nullptr) { + return block; + } + + MOODYCAMEL_CONSTEXPR_IF(canAlloc == CanAlloc) { + return create(); + } + else { + return nullptr; + } + } + + +#ifdef MCDBGQ_TRACKMEM + public: + struct MemStats { + size_t allocatedBlocks; + size_t usedBlocks; + size_t freeBlocks; + size_t ownedBlocksExplicit; + size_t ownedBlocksImplicit; + size_t implicitProducers; + size_t explicitProducers; + size_t elementsEnqueued; + size_t blockClassBytes; + size_t queueClassBytes; + size_t implicitBlockIndexBytes; + size_t explicitBlockIndexBytes; + + friend class ConcurrentQueue; + + private: + static MemStats getFor(ConcurrentQueue *q) { + MemStats stats = {0}; + + stats.elementsEnqueued = q->size_approx(); + + auto block = q->freeList.head_unsafe(); + while (block != nullptr) { + ++stats.allocatedBlocks; + ++stats.freeBlocks; + block = block->freeListNext.load(std::memory_order_relaxed); + } + + for (auto ptr = q->producerListTail.load(std::memory_order_acquire); + ptr != nullptr; ptr = ptr->next_prod()) { + bool implicit = dynamic_cast(ptr) != nullptr; + stats.implicitProducers += implicit ? 1 : 0; + stats.explicitProducers += implicit ? 0 : 1; + + if (implicit) { + auto prod = static_cast(ptr); + stats.queueClassBytes += sizeof(ImplicitProducer); + auto head = prod->headIndex.load(std::memory_order_relaxed); + auto tail = prod->tailIndex.load(std::memory_order_relaxed); + auto hash = prod->blockIndex.load(std::memory_order_relaxed); + if (hash != nullptr) { + for (size_t i = 0; i != hash->capacity; ++i) { + if (hash->index[i]->key.load(std::memory_order_relaxed) != + ImplicitProducer::INVALID_BLOCK_BASE && + hash->index[i]->value.load(std::memory_order_relaxed) != + nullptr) { + ++stats.allocatedBlocks; + ++stats.ownedBlocksImplicit; + } + } + stats.implicitBlockIndexBytes += + hash->capacity * + sizeof(typename ImplicitProducer::BlockIndexEntry); + for (; hash != nullptr; hash = hash->prev) { + stats.implicitBlockIndexBytes += + sizeof(typename ImplicitProducer::BlockIndexHeader) + + hash->capacity * + sizeof(typename ImplicitProducer::BlockIndexEntry *); + } + } + for (; details::circular_less_than(head, tail); + head += BLOCK_SIZE) { + // auto block = prod->get_block_index_entry_for_index(head); + ++stats.usedBlocks; + } + } else { + auto prod = static_cast(ptr); + stats.queueClassBytes += sizeof(ExplicitProducer); + auto tailBlock = prod->tailBlock; + bool wasNonEmpty = false; + if (tailBlock != nullptr) { + auto block = tailBlock; + do { + ++stats.allocatedBlocks; + if (!block->ConcurrentQueue::Block::template is_empty< + explicit_context>() || + wasNonEmpty) { + ++stats.usedBlocks; + wasNonEmpty = wasNonEmpty || block != tailBlock; + } + ++stats.ownedBlocksExplicit; + block = block->next; + } while (block != tailBlock); + } + auto index = prod->blockIndex.load(std::memory_order_relaxed); + while (index != nullptr) { + stats.explicitBlockIndexBytes += + sizeof(typename ExplicitProducer::BlockIndexHeader) + + index->size * + sizeof(typename ExplicitProducer::BlockIndexEntry); + index = static_cast( + index->prev); + } + } + } + + auto freeOnInitialPool = + q->initialBlockPoolIndex.load(std::memory_order_relaxed) >= + q->initialBlockPoolSize + ? 0 + : q->initialBlockPoolSize - + q->initialBlockPoolIndex.load(std::memory_order_relaxed); + stats.allocatedBlocks += freeOnInitialPool; + stats.freeBlocks += freeOnInitialPool; + + stats.blockClassBytes = sizeof(Block) * stats.allocatedBlocks; + stats.queueClassBytes += sizeof(ConcurrentQueue); + + return stats; + } + }; + + // For debugging only. Not thread-safe. + MemStats getMemStats() { + return MemStats::getFor(this); + } + + private: + friend struct MemStats; +#endif + + + ////////////////////////////////// + // Producer list manipulation + ////////////////////////////////// + + ProducerBase *recycle_or_create_producer(bool isExplicit) { +#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH + debug::DebugLock lock(implicitProdMutex); +#endif + // Try to re-use one first + for (auto ptr = producerListTail.load(std::memory_order_acquire); + ptr != nullptr; ptr = ptr->next_prod()) { + if (ptr->inactive.load(std::memory_order_relaxed) && + ptr->isExplicit == isExplicit) { + bool expected = true; + if (ptr->inactive.compare_exchange_strong(expected, /* desired */ false, + std::memory_order_acquire, + std::memory_order_relaxed)) { + // We caught one! It's been marked as activated, the caller can have + // it + return ptr; + } + } + } + + return add_producer( + isExplicit ? static_cast(create(this)) + : create(this)); + } + + ProducerBase *add_producer(ProducerBase *producer) { + // Handle failed memory allocation + if (producer == nullptr) { + return nullptr; + } + + producerCount.fetch_add(1, std::memory_order_relaxed); + + // Add it to the lock-free list + auto prevTail = producerListTail.load(std::memory_order_relaxed); + do { + producer->next = prevTail; + } while (!producerListTail.compare_exchange_weak( + prevTail, producer, std::memory_order_release, + std::memory_order_relaxed)); + +#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG + if (producer->isExplicit) { + auto prevTailExplicit = explicitProducers.load(std::memory_order_relaxed); + do { + static_cast(producer)->nextExplicitProducer = + prevTailExplicit; + } while (!explicitProducers.compare_exchange_weak( + prevTailExplicit, static_cast(producer), + std::memory_order_release, std::memory_order_relaxed)); + } else { + auto prevTailImplicit = implicitProducers.load(std::memory_order_relaxed); + do { + static_cast(producer)->nextImplicitProducer = + prevTailImplicit; + } while (!implicitProducers.compare_exchange_weak( + prevTailImplicit, static_cast(producer), + std::memory_order_release, std::memory_order_relaxed)); + } +#endif + + return producer; + } + + void reown_producers() { + // After another instance is moved-into/swapped-with this one, all the + // producers we stole still think their parents are the other queue. + // So fix them up! + for (auto ptr = producerListTail.load(std::memory_order_relaxed); + ptr != nullptr; ptr = ptr->next_prod()) { + ptr->parent = this; + } + } + + + ////////////////////////////////// + // Implicit producer hash + ////////////////////////////////// + + struct ImplicitProducerKVP { + std::atomic key; + ImplicitProducer *value; // No need for atomicity since it's only read by + // the thread that sets it in the first place + + ImplicitProducerKVP() : value(nullptr) {} + + ImplicitProducerKVP(ImplicitProducerKVP &&other) MOODYCAMEL_NOEXCEPT { + key.store(other.key.load(std::memory_order_relaxed), + std::memory_order_relaxed); + value = other.value; + } + + inline ImplicitProducerKVP &operator=(ImplicitProducerKVP &&other) + MOODYCAMEL_NOEXCEPT { + swap(other); + return *this; + } + + inline void swap(ImplicitProducerKVP &other) MOODYCAMEL_NOEXCEPT { + if (this != &other) { + details::swap_relaxed(key, other.key); + std::swap(value, other.value); + } + } + }; + + template + friend void moodycamel::swap( + typename ConcurrentQueue::ImplicitProducerKVP &, + typename ConcurrentQueue::ImplicitProducerKVP &) + MOODYCAMEL_NOEXCEPT; + + struct ImplicitProducerHash { + size_t capacity; + ImplicitProducerKVP *entries; + ImplicitProducerHash *prev; + }; + + inline void populate_initial_implicit_producer_hash() { + MOODYCAMEL_CONSTEXPR_IF(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) { + return; + } + else { + implicitProducerHashCount.store(0, std::memory_order_relaxed); + auto hash = &initialImplicitProducerHash; + hash->capacity = INITIAL_IMPLICIT_PRODUCER_HASH_SIZE; + hash->entries = &initialImplicitProducerHashEntries[0]; + for (size_t i = 0; i != INITIAL_IMPLICIT_PRODUCER_HASH_SIZE; ++i) { + initialImplicitProducerHashEntries[i].key.store( + details::invalid_thread_id, std::memory_order_relaxed); + } + hash->prev = nullptr; + implicitProducerHash.store(hash, std::memory_order_relaxed); + } + } + + void swap_implicit_producer_hashes(ConcurrentQueue &other) { + MOODYCAMEL_CONSTEXPR_IF(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) { + return; + } + else { + // Swap (assumes our implicit producer hash is initialized) + initialImplicitProducerHashEntries.swap( + other.initialImplicitProducerHashEntries); + initialImplicitProducerHash.entries = + &initialImplicitProducerHashEntries[0]; + other.initialImplicitProducerHash.entries = + &other.initialImplicitProducerHashEntries[0]; + + details::swap_relaxed(implicitProducerHashCount, + other.implicitProducerHashCount); + + details::swap_relaxed(implicitProducerHash, other.implicitProducerHash); + if (implicitProducerHash.load(std::memory_order_relaxed) == + &other.initialImplicitProducerHash) { + implicitProducerHash.store(&initialImplicitProducerHash, + std::memory_order_relaxed); + } else { + ImplicitProducerHash *hash; + for (hash = implicitProducerHash.load(std::memory_order_relaxed); + hash->prev != &other.initialImplicitProducerHash; + hash = hash->prev) { + continue; + } + hash->prev = &initialImplicitProducerHash; + } + if (other.implicitProducerHash.load(std::memory_order_relaxed) == + &initialImplicitProducerHash) { + other.implicitProducerHash.store(&other.initialImplicitProducerHash, + std::memory_order_relaxed); + } else { + ImplicitProducerHash *hash; + for (hash = other.implicitProducerHash.load(std::memory_order_relaxed); + hash->prev != &initialImplicitProducerHash; hash = hash->prev) { + continue; + } + hash->prev = &other.initialImplicitProducerHash; + } + } + } + + // Only fails (returns nullptr) if memory allocation fails + ImplicitProducer *get_or_add_implicit_producer() { + // Note that since the data is essentially thread-local (key is thread ID), + // there's a reduced need for fences (memory ordering is already consistent + // for any individual thread), except for the current table itself. + + // Start by looking for the thread ID in the current and all previous hash + // tables. If it's not found, it must not be in there yet, since this same + // thread would have added it previously to one of the tables that we + // traversed. + + // Code and algorithm adapted from + // http://preshing.com/20130605/the-worlds-simplest-lock-free-hash-table + +#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH + debug::DebugLock lock(implicitProdMutex); +#endif + + auto id = details::thread_id(); + auto hashedId = details::hash_thread_id(id); + + auto mainHash = implicitProducerHash.load(std::memory_order_acquire); + assert( + mainHash != + nullptr); // silence clang-tidy and MSVC warnings (hash cannot be null) + for (auto hash = mainHash; hash != nullptr; hash = hash->prev) { + // Look for the id in this hash + auto index = hashedId; + while (true) { // Not an infinite loop because at least one slot is free + // in the hash table + index &= hash->capacity - 1u; + + auto probedKey = + hash->entries[index].key.load(std::memory_order_relaxed); + if (probedKey == id) { + // Found it! If we had to search several hashes deep, though, we + // should lazily add it to the current main hash table to avoid the + // extended search next time. Note there's guaranteed to be room in + // the current hash table since every subsequent table implicitly + // reserves space for all previous tables (there's only one + // implicitProducerHashCount). + auto value = hash->entries[index].value; + if (hash != mainHash) { + index = hashedId; + while (true) { + index &= mainHash->capacity - 1u; + auto empty = details::invalid_thread_id; +#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED + auto reusable = details::invalid_thread_id2; + if (mainHash->entries[index].key.compare_exchange_strong( + empty, id, std::memory_order_seq_cst, + std::memory_order_relaxed) || + mainHash->entries[index].key.compare_exchange_strong( + reusable, id, std::memory_order_seq_cst, + std::memory_order_relaxed)) { +#else + if (mainHash->entries[index].key.compare_exchange_strong( + empty, id, std::memory_order_seq_cst, + std::memory_order_relaxed)) { +#endif + mainHash->entries[index].value = value; + break; + } + ++index; + } + } + + return value; + } + if (probedKey == details::invalid_thread_id) { + break; // Not in this hash table + } + ++index; + } + } + + // Insert! + auto newCount = + 1 + implicitProducerHashCount.fetch_add(1, std::memory_order_relaxed); + while (true) { + // NOLINTNEXTLINE(clang-analyzer-core.NullDereference) + if (newCount >= (mainHash->capacity >> 1) && + !implicitProducerHashResizeInProgress.test_and_set( + std::memory_order_acquire)) { + // We've acquired the resize lock, try to allocate a bigger hash table. + // Note the acquire fence synchronizes with the release fence at the end + // of this block, and hence when we reload implicitProducerHash it must + // be the most recent version (it only gets changed within this locked + // block). + mainHash = implicitProducerHash.load(std::memory_order_acquire); + if (newCount >= (mainHash->capacity >> 1)) { + size_t newCapacity = mainHash->capacity << 1; + while (newCount >= (newCapacity >> 1)) { + newCapacity <<= 1; + } + auto raw = static_cast( + (Traits::malloc)(sizeof(ImplicitProducerHash) + + std::alignment_of::value - + 1 + sizeof(ImplicitProducerKVP) * newCapacity)); + if (raw == nullptr) { + // Allocation failed + implicitProducerHashCount.fetch_sub(1, std::memory_order_relaxed); + implicitProducerHashResizeInProgress.clear( + std::memory_order_relaxed); + return nullptr; + } + + auto newHash = new (raw) ImplicitProducerHash; + newHash->capacity = static_cast(newCapacity); + newHash->entries = reinterpret_cast( + details::align_for( + raw + sizeof(ImplicitProducerHash))); + for (size_t i = 0; i != newCapacity; ++i) { + new (newHash->entries + i) ImplicitProducerKVP; + newHash->entries[i].key.store(details::invalid_thread_id, + std::memory_order_relaxed); + } + newHash->prev = mainHash; + implicitProducerHash.store(newHash, std::memory_order_release); + implicitProducerHashResizeInProgress.clear(std::memory_order_release); + mainHash = newHash; + } else { + implicitProducerHashResizeInProgress.clear(std::memory_order_release); + } + } + + // If it's < three-quarters full, add to the old one anyway so that we + // don't have to wait for the next table to finish being allocated by + // another thread (and if we just finished allocating above, the condition + // will always be true) + if (newCount < (mainHash->capacity >> 1) + (mainHash->capacity >> 2)) { + auto producer = + static_cast(recycle_or_create_producer(false)); + if (producer == nullptr) { + implicitProducerHashCount.fetch_sub(1, std::memory_order_relaxed); + return nullptr; + } + +#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED + producer->threadExitListener.callback = + &ConcurrentQueue::implicit_producer_thread_exited_callback; + producer->threadExitListener.userData = producer; + details::ThreadExitNotifier::subscribe(&producer->threadExitListener); +#endif + + auto index = hashedId; + while (true) { + index &= mainHash->capacity - 1u; + auto empty = details::invalid_thread_id; +#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED + auto reusable = details::invalid_thread_id2; + if (mainHash->entries[index].key.compare_exchange_strong( + reusable, id, std::memory_order_seq_cst, + std::memory_order_relaxed)) { + implicitProducerHashCount.fetch_sub( + 1, + std::memory_order_relaxed); // already counted as a used slot + mainHash->entries[index].value = producer; + break; + } +#endif + if (mainHash->entries[index].key.compare_exchange_strong( + empty, id, std::memory_order_seq_cst, + std::memory_order_relaxed)) { + mainHash->entries[index].value = producer; + break; + } + ++index; + } + return producer; + } + + // Hmm, the old hash is quite full and somebody else is busy allocating a + // new one. We need to wait for the allocating thread to finish (if it + // succeeds, we add, if not, we try to allocate ourselves). + mainHash = implicitProducerHash.load(std::memory_order_acquire); + } + } + +#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED + void implicit_producer_thread_exited(ImplicitProducer *producer) { + // Remove from hash +#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH + debug::DebugLock lock(implicitProdMutex); +#endif + auto hash = implicitProducerHash.load(std::memory_order_acquire); + assert(hash != nullptr); // The thread exit listener is only registered if + // we were added to a hash in the first place + auto id = details::thread_id(); + auto hashedId = details::hash_thread_id(id); + details::thread_id_t probedKey; + + // We need to traverse all the hashes just in case other threads aren't on + // the current one yet and are trying to add an entry thinking there's a + // free slot (because they reused a producer) + for (; hash != nullptr; hash = hash->prev) { + auto index = hashedId; + do { + index &= hash->capacity - 1u; + probedKey = id; + if (hash->entries[index].key.compare_exchange_strong( + probedKey, details::invalid_thread_id2, + std::memory_order_seq_cst, std::memory_order_relaxed)) { + break; + } + ++index; + } while ( + probedKey != + details::invalid_thread_id); // Can happen if the hash has changed + // but we weren't put back in it yet, or + // if we weren't added to this hash in + // the first place + } + + // Mark the queue as being recyclable + producer->inactive.store(true, std::memory_order_release); + } + + static void implicit_producer_thread_exited_callback(void *userData) { + auto producer = static_cast(userData); + auto queue = producer->parent; + queue->implicit_producer_thread_exited(producer); + } +#endif + + ////////////////////////////////// + // Utility functions + ////////////////////////////////// + + template + static inline void *aligned_malloc(size_t size) { + MOODYCAMEL_CONSTEXPR_IF(std::alignment_of::value <= + std::alignment_of::value) + return (Traits::malloc)(size); + else { + size_t alignment = std::alignment_of::value; + void *raw = (Traits::malloc)(size + alignment - 1 + sizeof(void *)); + if (!raw) return nullptr; + char *ptr = details::align_for(reinterpret_cast(raw) + + sizeof(void *)); + *(reinterpret_cast(ptr) - 1) = raw; + return ptr; + } + } + + template + static inline void aligned_free(void *ptr) { + MOODYCAMEL_CONSTEXPR_IF(std::alignment_of::value <= + std::alignment_of::value) + return (Traits::free)(ptr); + else(Traits::free)(ptr ? *(reinterpret_cast(ptr) - 1) : nullptr); + } + + template + static inline U *create_array(size_t count) { + assert(count > 0); + U *p = static_cast(aligned_malloc(sizeof(U) * count)); + if (p == nullptr) return nullptr; + + for (size_t i = 0; i != count; ++i) new (p + i) U(); + return p; + } + + template + static inline void destroy_array(U *p, size_t count) { + if (p != nullptr) { + assert(count > 0); + for (size_t i = count; i != 0;) (p + --i)->~U(); + } + aligned_free(p); + } + + template + static inline U *create() { + void *p = aligned_malloc(sizeof(U)); + return p != nullptr ? new (p) U : nullptr; + } + + template + static inline U *create(A1 &&a1) { + void *p = aligned_malloc(sizeof(U)); + return p != nullptr ? new (p) U(std::forward(a1)) : nullptr; + } + + template + static inline void destroy(U *p) { + if (p != nullptr) p->~U(); + aligned_free(p); + } + + private: + std::atomic producerListTail; + std::atomic producerCount; + + std::atomic initialBlockPoolIndex; + Block *initialBlockPool; + size_t initialBlockPoolSize; + +#ifndef MCDBGQ_USEDEBUGFREELIST + FreeList freeList; +#else + debug::DebugFreeList freeList; +#endif + + std::atomic implicitProducerHash; + std::atomic + implicitProducerHashCount; // Number of slots logically used + ImplicitProducerHash initialImplicitProducerHash; + std::array + initialImplicitProducerHashEntries; + std::atomic_flag implicitProducerHashResizeInProgress; + + std::atomic nextExplicitConsumerId; + std::atomic globalExplicitConsumerOffset; + +#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH + debug::DebugMutex implicitProdMutex; +#endif + +#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG + std::atomic explicitProducers; + std::atomic implicitProducers; +#endif +}; + + +template +ProducerToken::ProducerToken(ConcurrentQueue &queue) + : producer(queue.recycle_or_create_producer(true)) { + if (producer != nullptr) { + producer->token = this; + } +} + +template +ProducerToken::ProducerToken(BlockingConcurrentQueue &queue) + : producer(reinterpret_cast *>(&queue) + ->recycle_or_create_producer(true)) { + if (producer != nullptr) { + producer->token = this; + } +} + +template +ConsumerToken::ConsumerToken(ConcurrentQueue &queue) + : itemsConsumedFromCurrent(0), + currentProducer(nullptr), + desiredProducer(nullptr) { + initialOffset = + queue.nextExplicitConsumerId.fetch_add(1, std::memory_order_release); + lastKnownGlobalOffset = static_cast(-1); +} + +template +ConsumerToken::ConsumerToken(BlockingConcurrentQueue &queue) + : itemsConsumedFromCurrent(0), + currentProducer(nullptr), + desiredProducer(nullptr) { + initialOffset = + reinterpret_cast *>(&queue) + ->nextExplicitConsumerId.fetch_add(1, std::memory_order_release); + lastKnownGlobalOffset = static_cast(-1); +} + +template +inline void swap(ConcurrentQueue &a, + ConcurrentQueue &b) MOODYCAMEL_NOEXCEPT { + a.swap(b); +} + +inline void swap(ProducerToken &a, ProducerToken &b) MOODYCAMEL_NOEXCEPT { + a.swap(b); +} + +inline void swap(ConsumerToken &a, ConsumerToken &b) MOODYCAMEL_NOEXCEPT { + a.swap(b); +} + +template +inline void swap(typename ConcurrentQueue::ImplicitProducerKVP &a, + typename ConcurrentQueue::ImplicitProducerKVP &b) + MOODYCAMEL_NOEXCEPT { + a.swap(b); +} + +} // namespace moodycamel + +#if defined(_MSC_VER) && (!defined(_HAS_CXX17) || !_HAS_CXX17) +#pragma warning(pop) +#endif + +#if defined(__GNUC__) && !defined(__INTEL_COMPILER) +#pragma GCC diagnostic pop +#endif \ No newline at end of file diff --git a/src/include/zvec/core/framework/index_storage.h b/src/include/zvec/core/framework/index_storage.h index 8673d63e..9173da3e 100644 --- a/src/include/zvec/core/framework/index_storage.h +++ b/src/include/zvec/core/framework/index_storage.h @@ -14,7 +14,7 @@ #pragma once -#include +#include #include #include #include @@ -37,10 +37,12 @@ class IndexStorage : public IndexModule { }; MemoryBlock() {} - MemoryBlock(ailego::BufferHandle::Pointer &&buffer_handle) - : type_(MemoryBlockType::MBT_BUFFERPOOL), - buffer_handle_(std::move(buffer_handle)) { - data_ = buffer_handle_->pin_vector_data(); + MemoryBlock(ailego::VecBufferPoolHandle *buffer_pool_handle, int block_id, + void *data) + : type_(MemoryBlockType::MBT_BUFFERPOOL) { + buffer_pool_handle_ = buffer_pool_handle; + buffer_block_id_ = block_id; + data_ = data; } MemoryBlock(void *data) : type_(MemoryBlockType::MBT_MMAP), data_(data) {} @@ -50,7 +52,8 @@ class IndexStorage : public IndexModule { this->reset(rhs.data_); break; case MemoryBlockType::MBT_BUFFERPOOL: - this->reset(rhs.buffer_handle_); + this->reset(rhs.buffer_pool_handle_, rhs.buffer_block_id_, rhs.data_); + buffer_pool_handle_->acquire_one(buffer_block_id_); break; default: break; @@ -63,7 +66,8 @@ class IndexStorage : public IndexModule { this->reset(std::move(rhs.data_)); break; case MemoryBlockType::MBT_BUFFERPOOL: - this->reset(std::move(rhs.buffer_handle_)); + this->reset(std::move(rhs.buffer_pool_handle_), + std::move(rhs.buffer_block_id_), std::move(rhs.data_)); break; default: break; @@ -77,7 +81,9 @@ class IndexStorage : public IndexModule { this->reset(rhs.data_); break; case MemoryBlockType::MBT_BUFFERPOOL: - this->reset(rhs.buffer_handle_); + this->reset(rhs.buffer_pool_handle_, rhs.buffer_block_id_, + rhs.data_); + buffer_pool_handle_->acquire_one(buffer_block_id_); break; default: break; @@ -93,7 +99,8 @@ class IndexStorage : public IndexModule { this->reset(std::move(rhs.data_)); break; case MemoryBlockType::MBT_BUFFERPOOL: - this->reset(std::move(rhs.buffer_handle_)); + this->reset(std::move(rhs.buffer_pool_handle_), + std::move(rhs.buffer_block_id_), std::move(rhs.data_)); break; default: break; @@ -107,9 +114,8 @@ class IndexStorage : public IndexModule { case MemoryBlockType::MBT_MMAP: break; case MemoryBlockType::MBT_BUFFERPOOL: - if (buffer_handle_) { - buffer_handle_->unpin_vector_data(); - // buffer_handle_.reset(); + if (buffer_pool_handle_) { + buffer_pool_handle_->release_one(buffer_block_id_); } break; default: @@ -122,34 +128,21 @@ class IndexStorage : public IndexModule { return data_; } - void reset(ailego::BufferHandle::Pointer &buffer_handle) { + void reset(ailego::VecBufferPoolHandle *buffer_pool_handle, int block_id, + void *data) { if (type_ == MemoryBlockType::MBT_BUFFERPOOL) { - buffer_handle_->unpin_vector_data(); - buffer_handle_.reset(); + buffer_pool_handle_->release_one(buffer_block_id_); } type_ = MemoryBlockType::MBT_BUFFERPOOL; - if (buffer_handle) { - buffer_handle_.reset(buffer_handle.release()); - } - data_ = buffer_handle_->pin_vector_data(); - } - - void reset(ailego::BufferHandle::Pointer &&buffer_handle) { - if (type_ == MemoryBlockType::MBT_BUFFERPOOL) { - buffer_handle_->unpin_vector_data(); - buffer_handle_.reset(); - } - type_ = MemoryBlockType::MBT_BUFFERPOOL; - if (buffer_handle) { - buffer_handle_ = std::move(buffer_handle); - } - data_ = buffer_handle_->pin_vector_data(); + buffer_pool_handle_ = buffer_pool_handle; + buffer_block_id_ = block_id; + data_ = data; } void reset(void *data) { if (type_ == MemoryBlockType::MBT_BUFFERPOOL) { - buffer_handle_->unpin_vector_data(); - buffer_handle_.reset(); + buffer_pool_handle_->release_one(buffer_block_id_); + buffer_pool_handle_ = nullptr; } type_ = MemoryBlockType::MBT_MMAP; data_ = data; @@ -157,7 +150,8 @@ class IndexStorage : public IndexModule { MemoryBlockType type_{MBT_UNKNOWN}; void *data_{nullptr}; - mutable ailego::BufferHandle::Pointer buffer_handle_{nullptr}; + mutable ailego::VecBufferPoolHandle *buffer_pool_handle_; + int buffer_block_id_{0}; }; struct SegmentData { diff --git a/tests/core/algorithm/flat/flat_streamer_buffer_test.cpp b/tests/core/algorithm/flat/flat_streamer_buffer_test.cc similarity index 78% rename from tests/core/algorithm/flat/flat_streamer_buffer_test.cpp rename to tests/core/algorithm/flat/flat_streamer_buffer_test.cc index 62b25e23..fbc404b4 100644 --- a/tests/core/algorithm/flat/flat_streamer_buffer_test.cpp +++ b/tests/core/algorithm/flat/flat_streamer_buffer_test.cc @@ -50,7 +50,6 @@ void FlatStreamerTest::TearDown(void) { } TEST_F(FlatStreamerTest, TestLinearSearch) { - BufferManager::Instance().init(300 * 1024 / 2 * 1024, 1); IndexStreamer::Pointer write_streamer = IndexFactory::CreateStreamer("FlatStreamer"); ASSERT_TRUE(write_streamer != nullptr); @@ -165,31 +164,33 @@ TEST_F(FlatStreamerTest, TestLinearSearch) { ASSERT_EQ(i == cnt - 1 ? i - 1 : i + 1, result2[1].key()); ASSERT_EQ(i == 0 ? 2 : (i == cnt - 1 ? i - 2 : i - 1), result2[2].key()); } + cout << "Elapsed time: " << elapsed_time.milli_seconds() << " ms" << endl; read_streamer->close(); read_streamer.reset(); - cout << "Elapsed time: " << elapsed_time.milli_seconds() << " ms" << endl; } -TEST_F(FlatStreamerTest, TestLinearSearchMMap) { - BufferManager::Instance().init(3 * 1024 / 2 * 1024, 1); +TEST_F(FlatStreamerTest, TestLinearSearchWithLRU) { + constexpr size_t static dim = 1600; IndexStreamer::Pointer write_streamer = IndexFactory::CreateStreamer("FlatStreamer"); ASSERT_TRUE(write_streamer != nullptr); Params params; - ASSERT_EQ(0, write_streamer->init(*index_meta_ptr_, params)); + IndexMeta meta = IndexMeta(IndexMeta::DataType::DT_FP32, dim); + meta.set_metric("SquaredEuclidean", 0, Params()); + ASSERT_EQ(0, write_streamer->init(meta, params)); auto storage = IndexFactory::CreateStorage("MMapFileStorage"); ASSERT_NE(nullptr, storage); Params stg_params; ASSERT_EQ(0, storage->init(stg_params)); - ASSERT_EQ(0, storage->open(dir_ + "/Test/LinearSearchMMap", true)); + ASSERT_EQ(0, storage->open(dir_ + "/Test/LinearSearchWithLRU", true)); ASSERT_EQ(0, write_streamer->open(storage)); auto ctx = write_streamer->create_context(); ASSERT_TRUE(!!ctx); - size_t cnt = 10000UL; + size_t cnt = 1000000UL; IndexQueryMeta qmeta(IndexMeta::DT_FP32, dim); for (size_t i = 0; i < cnt; i++) { NumericalVector vec(dim); @@ -202,18 +203,19 @@ TEST_F(FlatStreamerTest, TestLinearSearchMMap) { write_streamer->close(); write_streamer.reset(); - ElapsedTime elapsed_time; + IndexStreamer::Pointer read_streamer = IndexFactory::CreateStreamer("FlatStreamer"); - ASSERT_EQ(0, read_streamer->init(*index_meta_ptr_, params)); - auto read_storage = IndexFactory::CreateStorage("MMapFileStorage"); + ASSERT_EQ(0, read_streamer->init(meta, params)); + auto read_storage = IndexFactory::CreateStorage("BufferStorage"); ASSERT_NE(nullptr, read_storage); ASSERT_EQ(0, read_storage->init(stg_params)); - ASSERT_EQ(0, read_storage->open(dir_ + "/Test/LinearSearchMMap", false)); + ASSERT_EQ(0, read_storage->open(dir_ + "/Test/LinearSearchWithLRU", false)); ASSERT_EQ(0, read_streamer->open(read_storage)); size_t topk = 3; auto provider = read_streamer->create_provider(); - for (size_t i = 0; i < cnt; i += 1) { + ElapsedTime elapsed_time; + for (size_t i = 0; i < 10; i += 1) { NumericalVector vec(dim); for (size_t j = 0; j < dim; ++j) { vec[j] = i; @@ -241,122 +243,132 @@ TEST_F(FlatStreamerTest, TestLinearSearchMMap) { ASSERT_EQ(i == cnt - 1 ? i - 1 : i + 1, result2[1].key()); ASSERT_EQ(i == 0 ? 2 : (i == cnt - 1 ? i - 2 : i - 1), result2[2].key()); } - - ctx->set_topk(100U); - NumericalVector vec(dim); - for (size_t j = 0; j < dim; ++j) { - vec[j] = 10.1f; - } - ASSERT_EQ(0, read_streamer->search_bf_impl(vec.data(), qmeta, ctx)); - auto &result = ctx->result(); - ASSERT_EQ(100U, result.size()); - ASSERT_EQ(10, result[0].key()); - ASSERT_EQ(11, result[1].key()); - ASSERT_EQ(5, result[10].key()); - ASSERT_EQ(0, result[20].key()); - ASSERT_EQ(30, result[30].key()); - ASSERT_EQ(35, result[35].key()); - ASSERT_EQ(99, result[99].key()); + cout << "Elapsed time: " << elapsed_time.milli_seconds() << " ms" << endl; read_streamer->close(); read_streamer.reset(); - cout << "Elapsed time: " << elapsed_time.milli_seconds() << " ms" << endl; } -TEST_F(FlatStreamerTest, TestBufferStorage) { - BufferManager::Instance().init(10 * 1024 * 1024, 1); - IndexStreamer::Pointer streamer = +TEST_F(FlatStreamerTest, TestLinearSearchMMap) { + IndexStreamer::Pointer write_streamer = IndexFactory::CreateStreamer("FlatStreamer"); - ASSERT_TRUE(streamer != nullptr); - const int dim = 16; - IndexMeta meta = IndexMeta(IndexMeta::DT_FP32, dim); - meta.set_metric("SquaredEuclidean", 0, Params()); + ASSERT_TRUE(write_streamer != nullptr); Params params; - EXPECT_EQ(0, streamer->init(meta, params)); + ASSERT_EQ(0, write_streamer->init(*index_meta_ptr_, params)); auto storage = IndexFactory::CreateStorage("MMapFileStorage"); ASSERT_NE(nullptr, storage); Params stg_params; - EXPECT_EQ(0, storage->init(stg_params)); - EXPECT_EQ(0, storage->open(dir_ + "/Test/LinearSearch", true)); - EXPECT_EQ(0, streamer->open(storage)); + ASSERT_EQ(0, storage->init(stg_params)); + ASSERT_EQ(0, storage->open(dir_ + "/Test/LinearSearchMMap", true)); + ASSERT_EQ(0, write_streamer->open(storage)); - auto ctx = streamer->create_context(); + auto ctx = write_streamer->create_context(); ASSERT_TRUE(!!ctx); - size_t cnt = 1000UL; + size_t cnt = 10000UL; IndexQueryMeta qmeta(IndexMeta::DT_FP32, dim); for (size_t i = 0; i < cnt; i++) { NumericalVector vec(dim); for (size_t j = 0; j < dim; ++j) { vec[j] = i; } - streamer->add_impl(i, vec.data(), qmeta, ctx); + write_streamer->add_impl(i, vec.data(), qmeta, ctx); } - streamer->flush(0UL); - streamer.reset(); + write_streamer->flush(0UL); + write_streamer->close(); + write_streamer.reset(); IndexStreamer::Pointer read_streamer = IndexFactory::CreateStreamer("FlatStreamer"); - ASSERT_TRUE(read_streamer != nullptr); - EXPECT_EQ(0, read_streamer->init(meta, params)); - auto read_storage = IndexFactory::CreateStorage("BufferStorage"); + ASSERT_EQ(0, read_streamer->init(*index_meta_ptr_, params)); + auto read_storage = IndexFactory::CreateStorage("MMapFileStorage"); ASSERT_NE(nullptr, read_storage); - EXPECT_EQ(0, read_storage->init(stg_params)); - EXPECT_EQ(0, read_storage->open(dir_ + "/Test/LinearSearch", false)); - EXPECT_EQ(0, read_streamer->open(read_storage)); - auto read_ctx = read_streamer->create_context(); - auto provider = read_streamer->create_provider(); - + ASSERT_EQ(0, read_storage->init(stg_params)); + ASSERT_EQ(0, read_storage->open(dir_ + "/Test/LinearSearchMMap", false)); + ASSERT_EQ(0, read_streamer->open(read_storage)); size_t topk = 3; + auto provider = read_streamer->create_provider(); for (size_t i = 0; i < cnt; i += 1) { NumericalVector vec(dim); for (size_t j = 0; j < dim; ++j) { vec[j] = i; } - read_ctx->set_topk(topk); - EXPECT_EQ(0, read_streamer->search_impl(vec.data(), qmeta, read_ctx)); - auto &result1 = read_ctx->result(); - EXPECT_EQ(topk, result1.size()); + ctx->set_topk(topk); + ASSERT_EQ(0, read_streamer->search_impl(vec.data(), qmeta, ctx)); + auto &result1 = ctx->result(); + ASSERT_EQ(topk, result1.size()); + IndexStorage::MemoryBlock block; + ASSERT_EQ(0, provider->get_vector(result1[0].key(), block)); + const float *data = (float *)block.data(); for (size_t j = 0; j < dim; ++j) { - const float *data = (float *)provider->get_vector(result1[0].key()); - EXPECT_EQ(data[j], i); + ASSERT_EQ(data[j], i); } - EXPECT_EQ(i, result1[0].key()); + ASSERT_EQ(i, result1[0].key()); for (size_t j = 0; j < dim; ++j) { vec[j] = i + 0.1f; } - read_ctx->set_topk(topk); - EXPECT_EQ(0, read_streamer->search_impl(vec.data(), qmeta, read_ctx)); - auto &result2 = read_ctx->result(); - EXPECT_EQ(topk, result2.size()); - EXPECT_EQ(i, result2[0].key()); - EXPECT_EQ(i == cnt - 1 ? i - 1 : i + 1, result2[1].key()); - EXPECT_EQ(i == 0 ? 2 : (i == cnt - 1 ? i - 2 : i - 1), result2[2].key()); + ctx->set_topk(topk); + ASSERT_EQ(0, read_streamer->search_impl(vec.data(), qmeta, ctx)); + auto &result2 = ctx->result(); + ASSERT_EQ(topk, result2.size()); + ASSERT_EQ(i, result2[0].key()); + ASSERT_EQ(i == cnt - 1 ? i - 1 : i + 1, result2[1].key()); + ASSERT_EQ(i == 0 ? 2 : (i == cnt - 1 ? i - 2 : i - 1), result2[2].key()); } - read_ctx->set_topk(100U); + ctx->set_topk(100U); NumericalVector vec(dim); for (size_t j = 0; j < dim; ++j) { vec[j] = 10.1f; } - EXPECT_EQ(0, read_streamer->search_bf_impl(vec.data(), qmeta, read_ctx)); - auto &result = read_ctx->result(); - EXPECT_EQ(100U, result.size()); - EXPECT_EQ(10, result[0].key()); - EXPECT_EQ(11, result[1].key()); - EXPECT_EQ(5, result[10].key()); - EXPECT_EQ(0, result[20].key()); - EXPECT_EQ(30, result[30].key()); - EXPECT_EQ(35, result[35].key()); - EXPECT_EQ(99, result[99].key()); - - read_streamer->flush(0UL); + ASSERT_EQ(0, read_streamer->search_bf_impl(vec.data(), qmeta, ctx)); + auto &result = ctx->result(); + ASSERT_EQ(100U, result.size()); + ASSERT_EQ(10, result[0].key()); + ASSERT_EQ(11, result[1].key()); + ASSERT_EQ(5, result[10].key()); + ASSERT_EQ(0, result[20].key()); + ASSERT_EQ(30, result[30].key()); + ASSERT_EQ(35, result[35].key()); + ASSERT_EQ(99, result[99].key()); + + ElapsedTime elapsed_time; + for (size_t i = 0; i < cnt; i += 1) { + NumericalVector vec(dim); + for (size_t j = 0; j < dim; ++j) { + vec[j] = i; + } + ctx->set_topk(topk); + ASSERT_EQ(0, read_streamer->search_impl(vec.data(), qmeta, ctx)); + auto &result1 = ctx->result(); + ASSERT_EQ(topk, result1.size()); + IndexStorage::MemoryBlock block; + ASSERT_EQ(0, provider->get_vector(result1[0].key(), block)); + const float *data = (float *)block.data(); + for (size_t j = 0; j < dim; ++j) { + ASSERT_EQ(data[j], i); + } + ASSERT_EQ(i, result1[0].key()); + + for (size_t j = 0; j < dim; ++j) { + vec[j] = i + 0.1f; + } + ctx->set_topk(topk); + ASSERT_EQ(0, read_streamer->search_impl(vec.data(), qmeta, ctx)); + auto &result2 = ctx->result(); + ASSERT_EQ(topk, result2.size()); + ASSERT_EQ(i, result2[0].key()); + ASSERT_EQ(i == cnt - 1 ? i - 1 : i + 1, result2[1].key()); + ASSERT_EQ(i == 0 ? 2 : (i == cnt - 1 ? i - 2 : i - 1), result2[2].key()); + } + + read_streamer->close(); read_streamer.reset(); + cout << "Elapsed time: " << elapsed_time.milli_seconds() << " ms" << endl; } - #if defined(__GNUC__) || defined(__GNUG__) #pragma GCC diagnostic pop #endif \ No newline at end of file diff --git a/tests/core/algorithm/flat/flat_streamer_buffer_time_test.cc b/tests/core/algorithm/flat/flat_streamer_buffer_time_test.cc new file mode 100644 index 00000000..435ecccc --- /dev/null +++ b/tests/core/algorithm/flat/flat_streamer_buffer_time_test.cc @@ -0,0 +1,235 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace zvec::core; +using namespace zvec::ailego; +using namespace std; + +#if defined(__GNUC__) || defined(__GNUG__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-result" +#endif + +constexpr size_t static dim = 128; + +class FlatStreamerTest : public testing::Test { + protected: + void SetUp(void); + void TearDown(void); + void hybrid_scale(std::vector &dense_value, + std::vector &sparse_value, float alpha_scale); + + static std::string dir_; + static std::shared_ptr index_meta_ptr_; +}; + +std::string FlatStreamerTest::dir_("streamer_test/"); +std::shared_ptr FlatStreamerTest::index_meta_ptr_; + +void FlatStreamerTest::SetUp(void) { + index_meta_ptr_.reset(new (std::nothrow) + IndexMeta(IndexMeta::DataType::DT_FP32, dim)); + index_meta_ptr_->set_metric("SquaredEuclidean", 0, Params()); + + char cmdBuf[100]; + snprintf(cmdBuf, 100, "rm -rf %s", dir_.c_str()); + system(cmdBuf); +} + +void FlatStreamerTest::TearDown(void) { + char cmdBuf[100]; + snprintf(cmdBuf, 100, "rm -rf %s", dir_.c_str()); + system(cmdBuf); +} + +TEST_F(FlatStreamerTest, TestLinearSearchMMap) { + BufferManager::Instance().init(50 * 1024 * 1024, 1); + IndexStreamer::Pointer write_streamer = + IndexFactory::CreateStreamer("FlatStreamer"); + ASSERT_TRUE(write_streamer != nullptr); + + Params params; + ASSERT_EQ(0, write_streamer->init(*index_meta_ptr_, params)); + auto storage = IndexFactory::CreateStorage("MMapFileStorage"); + ASSERT_NE(nullptr, storage); + Params stg_params; + ASSERT_EQ(0, storage->init(stg_params)); + ASSERT_EQ(0, storage->open(dir_ + "/Test/LinearSearchMMap", true)); + ASSERT_EQ(0, write_streamer->open(storage)); + + auto ctx = write_streamer->create_context(); + ASSERT_TRUE(!!ctx); + + size_t data_cnt = 300000UL, cnt = 500UL; + IndexQueryMeta qmeta(IndexMeta::DT_FP32, dim); + for (size_t i = 0; i < data_cnt; i++) { + NumericalVector vec(dim); + for (size_t j = 0; j < dim; ++j) { + vec[j] = i; + } + write_streamer->add_impl(i, vec.data(), qmeta, ctx); + } + write_streamer->flush(0UL); + write_streamer->close(); + write_streamer.reset(); + + IndexStreamer::Pointer read_streamer = + IndexFactory::CreateStreamer("FlatStreamer"); + ASSERT_EQ(0, read_streamer->init(*index_meta_ptr_, params)); + auto read_storage = IndexFactory::CreateStorage("MMapFileStorage"); + ASSERT_NE(nullptr, read_storage); + ASSERT_EQ(0, read_storage->init(stg_params)); + ASSERT_EQ(0, read_storage->open(dir_ + "/Test/LinearSearchMMap", false)); + ASSERT_EQ(0, read_streamer->open(read_storage)); + size_t topk = 30; + ElapsedTime elapsed_time; + for (size_t i = 0; i < cnt; i += 1) { + NumericalVector vec(dim); + for (size_t j = 0; j < dim; ++j) { + vec[j] = i; + } + ctx->set_topk(topk); + ASSERT_EQ(0, read_streamer->search_impl(vec.data(), qmeta, ctx)); + // auto &result1 = ctx->result(); + // ASSERT_EQ(topk, result1.size()); + // ASSERT_EQ(i, result1[0].key()); + + // for (size_t j = 0; j < dim; ++j) { + // vec[j] = i + 0.1f; + // } + // ctx->set_topk(topk); + // ASSERT_EQ(0, read_streamer->search_impl(vec.data(), qmeta, ctx)); + // auto &result2 = ctx->result(); + // ASSERT_EQ(topk, result2.size()); + // ASSERT_EQ(i, result2[0].key()); + // ASSERT_EQ(i == cnt - 1 ? i - 1 : i + 1, result2[1].key()); + // ASSERT_EQ(i == 0 ? 2 : (i == cnt - 1 ? i - 2 : i - 1), result2[2].key()); + } + cout << "Elapsed time: " << elapsed_time.micro_seconds() << " us" << endl; + for (size_t i = 0; i < cnt; i += 1) { + NumericalVector vec(dim); + for (size_t j = 0; j < dim; ++j) { + vec[j] = i; + } + ctx->set_topk(topk); + ASSERT_EQ(0, read_streamer->search_impl(vec.data(), qmeta, ctx)); + // auto &result1 = ctx->result(); + // ASSERT_EQ(topk, result1.size()); + // ASSERT_EQ(i, result1[0].key()); + + // for (size_t j = 0; j < dim; ++j) { + // vec[j] = i + 0.1f; + // } + // ctx->set_topk(topk); + // ASSERT_EQ(0, read_streamer->search_impl(vec.data(), qmeta, ctx)); + // auto &result2 = ctx->result(); + // ASSERT_EQ(topk, result2.size()); + // ASSERT_EQ(i, result2[0].key()); + // ASSERT_EQ(i == cnt - 1 ? i - 1 : i + 1, result2[1].key()); + // ASSERT_EQ(i == 0 ? 2 : (i == cnt - 1 ? i - 2 : i - 1), result2[2].key()); + } + cout << "Elapsed time: " << elapsed_time.micro_seconds() << " us" << endl; + read_streamer->close(); + read_streamer.reset(); +} + +TEST_F(FlatStreamerTest, TestLinearSearchBuffer) { + IndexStreamer::Pointer write_streamer = + IndexFactory::CreateStreamer("FlatStreamer"); + ASSERT_TRUE(write_streamer != nullptr); + + Params params; + ASSERT_EQ(0, write_streamer->init(*index_meta_ptr_, params)); + auto storage = IndexFactory::CreateStorage("MMapFileStorage"); + ASSERT_NE(nullptr, storage); + Params stg_params; + ASSERT_EQ(0, storage->init(stg_params)); + ASSERT_EQ(0, storage->open(dir_ + "/Test/LinearSearchBuffer", true)); + ASSERT_EQ(0, write_streamer->open(storage)); + + auto ctx = write_streamer->create_context(); + ASSERT_TRUE(!!ctx); + + size_t data_cnt = 300000UL, cnt = 500UL; + IndexQueryMeta qmeta(IndexMeta::DT_FP32, dim); + for (size_t i = 0; i < data_cnt; i++) { + NumericalVector vec(dim); + for (size_t j = 0; j < dim; ++j) { + vec[j] = i; + } + write_streamer->add_impl(i, vec.data(), qmeta, ctx); + } + write_streamer->flush(0UL); + write_streamer->close(); + write_streamer.reset(); + + IndexStreamer::Pointer read_streamer = + IndexFactory::CreateStreamer("FlatStreamer"); + ASSERT_EQ(0, read_streamer->init(*index_meta_ptr_, params)); + auto read_storage = IndexFactory::CreateStorage("BufferStorage"); + ASSERT_NE(nullptr, read_storage); + ASSERT_EQ(0, read_storage->init(stg_params)); + ASSERT_EQ(0, read_storage->open(dir_ + "/Test/LinearSearchBuffer", false)); + ASSERT_EQ(0, read_streamer->open(read_storage)); + size_t topk = 30; + ElapsedTime elapsed_time; + for (size_t i = 0; i < cnt; i += 1) { + NumericalVector vec(dim); + for (size_t j = 0; j < dim; ++j) { + vec[j] = i; + } + ctx->set_topk(topk); + ASSERT_EQ(0, read_streamer->search_impl(vec.data(), qmeta, ctx)); + // auto &result1 = ctx->result(); + // ASSERT_EQ(topk, result1.size()); + // ASSERT_EQ(i, result1[0].key()); + + // for (size_t j = 0; j < dim; ++j) { + // vec[j] = i + 0.1f; + // } + // ctx->set_topk(topk); + // ASSERT_EQ(0, read_streamer->search_impl(vec.data(), qmeta, ctx)); + // auto &result2 = ctx->result(); + // ASSERT_EQ(topk, result2.size()); + // ASSERT_EQ(i, result2[0].key()); + // ASSERT_EQ(i == cnt - 1 ? i - 1 : i + 1, result2[1].key()); + // ASSERT_EQ(i == 0 ? 2 : (i == cnt - 1 ? i - 2 : i - 1), result2[2].key()); + } + cout << "Elapsed time: " << elapsed_time.micro_seconds() << " us" << endl; + for (size_t i = 0; i < cnt; i += 1) { + NumericalVector vec(dim); + for (size_t j = 0; j < dim; ++j) { + vec[j] = i; + } + ctx->set_topk(topk); + ASSERT_EQ(0, read_streamer->search_impl(vec.data(), qmeta, ctx)); + // auto &result1 = ctx->result(); + // ASSERT_EQ(topk, result1.size()); + // ASSERT_EQ(i, result1[0].key()); + + // for (size_t j = 0; j < dim; ++j) { + // vec[j] = i + 0.1f; + // } + // ctx->set_topk(topk); + // ASSERT_EQ(0, read_streamer->search_impl(vec.data(), qmeta, ctx)); + // auto &result2 = ctx->result(); + // ASSERT_EQ(topk, result2.size()); + // ASSERT_EQ(i, result2[0].key()); + // ASSERT_EQ(i == cnt - 1 ? i - 1 : i + 1, result2[1].key()); + // ASSERT_EQ(i == 0 ? 2 : (i == cnt - 1 ? i - 2 : i - 1), result2[2].key()); + } + cout << "Elapsed time: " << elapsed_time.micro_seconds() << " us" << endl; + read_streamer->close(); + read_streamer.reset(); +} + +#if defined(__GNUC__) || defined(__GNUG__) +#pragma GCC diagnostic pop +#endif \ No newline at end of file diff --git a/tests/core/algorithm/flat/flat_streamer_buffer_time_test.cpp b/tests/core/algorithm/flat/flat_streamer_buffer_time_test.cpp deleted file mode 100644 index c919e9fe..00000000 --- a/tests/core/algorithm/flat/flat_streamer_buffer_time_test.cpp +++ /dev/null @@ -1,140 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include - -using namespace zvec::core; -using namespace zvec::ailego; -using namespace std; - -#if defined(__GNUC__) || defined(__GNUG__) -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wunused-result" -#endif - -constexpr size_t static dim = 128; - -class FlatStreamerTest : public testing::Test { - protected: - void SetUp(void); - void TearDown(void); - void hybrid_scale(std::vector &dense_value, - std::vector &sparse_value, float alpha_scale); - - static std::string dir_; - static std::shared_ptr index_meta_ptr_; -}; - -std::string FlatStreamerTest::dir_("streamer_test/"); -std::shared_ptr FlatStreamerTest::index_meta_ptr_; - -void FlatStreamerTest::SetUp(void) { - index_meta_ptr_.reset(new (std::nothrow) - IndexMeta(IndexMeta::DataType::DT_FP32, dim)); - index_meta_ptr_->set_metric("SquaredEuclidean", 0, Params()); - - char cmdBuf[100]; - snprintf(cmdBuf, 100, "rm -rf %s", dir_.c_str()); - system(cmdBuf); -} - -void FlatStreamerTest::TearDown(void) { - char cmdBuf[100]; - snprintf(cmdBuf, 100, "rm -rf %s", dir_.c_str()); - system(cmdBuf); -} - -TEST_F(FlatStreamerTest, TestLinearSearchMMap) { - BufferManager::Instance().init(50 * 1024 * 1024, 1); - IndexStreamer::Pointer write_streamer = - IndexFactory::CreateStreamer("FlatStreamer"); - ASSERT_TRUE(write_streamer != nullptr); - - Params params; - ASSERT_EQ(0, write_streamer->init(*index_meta_ptr_, params)); - auto storage = IndexFactory::CreateStorage("MMapFileStorage"); - ASSERT_NE(nullptr, storage); - Params stg_params; - ASSERT_EQ(0, storage->init(stg_params)); - ASSERT_EQ(0, storage->open(dir_ + "/Test/LinearSearchMMap", true)); - ASSERT_EQ(0, write_streamer->open(storage)); - - auto ctx = write_streamer->create_context(); - ASSERT_TRUE(!!ctx); - - size_t data_cnt = 300000UL, cnt = 500UL; - IndexQueryMeta qmeta(IndexMeta::DT_FP32, dim); - for (size_t i = 0; i < data_cnt; i++) { - NumericalVector vec(dim); - for (size_t j = 0; j < dim; ++j) { - vec[j] = i; - } - write_streamer->add_impl(i, vec.data(), qmeta, ctx); - } - write_streamer->flush(0UL); - write_streamer->close(); - write_streamer.reset(); - - IndexStreamer::Pointer read_streamer = - IndexFactory::CreateStreamer("FlatStreamer"); - ASSERT_EQ(0, read_streamer->init(*index_meta_ptr_, params)); - auto read_storage = IndexFactory::CreateStorage("BufferStorage"); - ASSERT_NE(nullptr, read_storage); - ASSERT_EQ(0, read_storage->init(stg_params)); - ASSERT_EQ(0, read_storage->open(dir_ + "/Test/LinearSearchMMap", false)); - ASSERT_EQ(0, read_streamer->open(read_storage)); - size_t topk = 30; - ElapsedTime elapsed_time; - for (size_t i = 0; i < cnt; i += 1) { - NumericalVector vec(dim); - for (size_t j = 0; j < dim; ++j) { - vec[j] = i; - } - ctx->set_topk(topk); - ASSERT_EQ(0, read_streamer->search_impl(vec.data(), qmeta, ctx)); - // auto &result1 = ctx->result(); - // ASSERT_EQ(topk, result1.size()); - // ASSERT_EQ(i, result1[0].key()); - - // for (size_t j = 0; j < dim; ++j) { - // vec[j] = i + 0.1f; - // } - // ctx->set_topk(topk); - // ASSERT_EQ(0, read_streamer->search_impl(vec.data(), qmeta, ctx)); - // auto &result2 = ctx->result(); - // ASSERT_EQ(topk, result2.size()); - // ASSERT_EQ(i, result2[0].key()); - // ASSERT_EQ(i == cnt - 1 ? i - 1 : i + 1, result2[1].key()); - // ASSERT_EQ(i == 0 ? 2 : (i == cnt - 1 ? i - 2 : i - 1), result2[2].key()); - } - cout << "Elapsed time: " << elapsed_time.micro_seconds() << " us" << endl; - - // ctx->set_topk(100U); - // NumericalVector vec(dim); - // for (size_t j = 0; j < dim; ++j) { - // vec[j] = 10.1f; - // } - // ASSERT_EQ(0, read_streamer->search_bf_impl(vec.data(), qmeta, ctx)); - // auto &result = ctx->result(); - // ASSERT_EQ(100U, result.size()); - // ASSERT_EQ(10, result[0].key()); - // ASSERT_EQ(11, result[1].key()); - // ASSERT_EQ(5, result[10].key()); - // ASSERT_EQ(0, result[20].key()); - // ASSERT_EQ(30, result[30].key()); - // ASSERT_EQ(35, result[35].key()); - // ASSERT_EQ(99, result[99].key()); - - read_streamer->close(); - read_streamer.reset(); - // cout << "Elapsed time: " << elapsed_time.milli_seconds() << " ms" << endl; -} - -#if defined(__GNUC__) || defined(__GNUG__) -#pragma GCC diagnostic pop -#endif \ No newline at end of file diff --git a/tests/core/algorithm/hnsw/hnsw_streamer_buffer_test.cpp b/tests/core/algorithm/hnsw/hnsw_streamer_buffer_test.cc similarity index 100% rename from tests/core/algorithm/hnsw/hnsw_streamer_buffer_test.cpp rename to tests/core/algorithm/hnsw/hnsw_streamer_buffer_test.cc diff --git a/tests/db/index/column/vector_column_indexer_test.cc b/tests/db/index/column/vector_column_indexer_test.cc index 483efcde..251e5a18 100644 --- a/tests/db/index/column/vector_column_indexer_test.cc +++ b/tests/db/index/column/vector_column_indexer_test.cc @@ -2160,7 +2160,6 @@ TEST(VectorColumnIndexerTest, Failure) { ASSERT_TRUE(indexer->Flush().ok()); ASSERT_TRUE(indexer->Close().ok()); { - ailego::BufferManager::Instance().init(10 * 1024 * 1024, 1); auto indexer = std::make_shared( index_file_path, FieldSchema("test", DataType::VECTOR_FP32, 3, false,