Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 44 additions & 1 deletion velox/type/Filter.h
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,10 @@ class Filter : public velox::ISerializable {
VELOX_UNSUPPORTED("{}: testBytes() is not supported.", toString());
}

virtual bool testStringView(const StringView& view) const {
return testBytes(view.data(), view.size());
}

virtual bool testTimestamp(const Timestamp& /* unused */) const {
VELOX_UNSUPPORTED("{}: testTimestamp() is not supported.", toString());
}
Expand Down Expand Up @@ -1700,6 +1704,8 @@ class BytesRange final : public AbstractRange {
FilterKind::kBytesRange),
lower_(std::move(lower)),
upper_(std::move(upper)),
lowerView_(lower_),
upperView_(upper_),
singleValue_(
!lowerExclusive_ && !upperExclusive_ && !lowerUnbounded_ &&
!upperUnbounded_ && lower_ == upper_) {
Expand All @@ -1717,6 +1723,8 @@ class BytesRange final : public AbstractRange {
FilterKind::kBytesRange),
lower_(other.lower_),
upper_(other.upper_),
lowerView_(lower_),
upperView_(upper_),
singleValue_(other.singleValue_) {}

folly::dynamic serialize() const override;
Expand Down Expand Up @@ -1744,6 +1752,35 @@ class BytesRange final : public AbstractRange {

bool testBytes(const char* value, int32_t length) const final;

bool testStringView(const StringView& view) const final {
if (singleValue_) {
return view == lowerView_;
}
if (!lowerUnbounded_) {
if (lowerExclusive_) {
if (view <= lowerView_) {
return false;
}
} else {
if (view < lowerView_) {
return false;
}
}
}
if (!upperUnbounded_) {
if (upperExclusive_) {
if (view >= upperView_) {
return false;
}
} else {
if (view > upperView_) {
return false;
}
}
}
return true;
}

bool testBytesRange(
std::optional<std::string_view> min,
std::optional<std::string_view> max,
Expand Down Expand Up @@ -1798,6 +1835,8 @@ class BytesRange final : public AbstractRange {
private:
const std::string lower_;
const std::string upper_;
const StringView lowerView_;
const StringView upperView_;
const bool singleValue_;
};

Expand Down Expand Up @@ -1856,6 +1895,10 @@ class NegatedBytesRange final : public Filter {
return !nonNegated_->testBytes(value, length);
}

bool testStringView(const StringView& view) const final {
return !nonNegated_->testStringView(view);
}

bool testBytesRange(
std::optional<std::string_view> min,
std::optional<std::string_view> max,
Expand Down Expand Up @@ -2256,7 +2299,7 @@ static inline bool applyFilter(TFilter& filter, std::string_view value) {

template <typename TFilter>
static inline bool applyFilter(TFilter& filter, StringView value) {
return filter.testBytes(value.data(), value.size());
return filter.testStringView(value);
}

// Creates a hash or bitmap based IN filter depending on value distribution.
Expand Down
145 changes: 78 additions & 67 deletions velox/type/tests/FilterTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,9 @@

#include <gtest/gtest.h>

using namespace facebook::velox;
namespace facebook::velox {
namespace {

using namespace facebook::velox::common;
using namespace facebook::velox::exec;

Expand Down Expand Up @@ -792,20 +794,26 @@ TEST(FilterTest, floatRange) {
EXPECT_TRUE(filter->testFloat(100));
}

bool testBytes(const Filter& filter, std::string_view value) {
bool result = filter.testBytes(value.data(), value.size());
VELOX_CHECK_EQ(filter.testStringView(StringView(value)), result);
return result;
}

TEST(FilterTest, bytesRange) {
{
auto filter = equal("abc");
EXPECT_TRUE(filter->testBytes("abc", 3));
EXPECT_FALSE(filter->testBytes("acb", 3));
EXPECT_TRUE(testBytes(*filter, "abc"));
EXPECT_FALSE(testBytes(*filter, "acb"));
EXPECT_TRUE(filter->testLength(3));
// The bit for lane 2 should be set.
int32_t lens[] = {0, 1, 3, 0, 4, 10, 11, 12};
EXPECT_EQ(
4, simd::toBitMask(filter->testLengths(xsimd::load_unaligned(lens))));

EXPECT_FALSE(filter->testNull());
EXPECT_FALSE(filter->testBytes("apple", 5));
EXPECT_FALSE(filter->testBytes(nullptr, 0));
EXPECT_FALSE(testBytes(*filter, "apple"));
EXPECT_FALSE(testBytes(*filter, {}));
EXPECT_FALSE(filter->testLength(4));

EXPECT_TRUE(filter->testBytesRange("abc", "abc", false));
Expand All @@ -824,132 +832,132 @@ TEST(FilterTest, bytesRange) {

// = ''
filter = equal("");
EXPECT_TRUE(filter->testBytes(nullptr, 0));
EXPECT_FALSE(filter->testBytes("abc", 3));
EXPECT_TRUE(testBytes(*filter, {}));
EXPECT_FALSE(testBytes(*filter, "abc"));
}

char const* theBestOfTimes =
"It was the best of times, it was the worst of times, it was the age of wisdom, it was the age of foolishness, it was the epoch of belief, it was the epoch of incredulity,...";
auto filter = lessThanOrEqual(theBestOfTimes);
EXPECT_TRUE(filter->testBytes(theBestOfTimes, std::strlen(theBestOfTimes)));
EXPECT_TRUE(filter->testBytes(theBestOfTimes, 5));
EXPECT_TRUE(filter->testBytes(theBestOfTimes, 50));
EXPECT_TRUE(filter->testBytes(theBestOfTimes, 100));
EXPECT_TRUE(testBytes(*filter, theBestOfTimes));
EXPECT_TRUE(testBytes(*filter, {theBestOfTimes, 5}));
EXPECT_TRUE(testBytes(*filter, {theBestOfTimes, 50}));
EXPECT_TRUE(testBytes(*filter, {theBestOfTimes, 100}));
// testLength is true of all lengths for a range filter.
EXPECT_TRUE(filter->testLength(1));
EXPECT_TRUE(filter->testLength(1000));

EXPECT_FALSE(filter->testNull());
EXPECT_FALSE(filter->testBytes("Zzz", 3));
EXPECT_FALSE(filter->testBytes("It was the best of times, zzz", 30));
EXPECT_FALSE(testBytes(*filter, "Zzz"));
EXPECT_FALSE(testBytes(*filter, {"It was the best of times, zzz", 30}));

EXPECT_TRUE(filter->testBytesRange("Apple", "banana", false));
EXPECT_FALSE(filter->testBytesRange("Pear", "Plum", false));
EXPECT_FALSE(filter->testBytesRange("apple", "banana", false));

filter = greaterThanOrEqual("abc");
EXPECT_TRUE(filter->testBytes("abc", 3));
EXPECT_TRUE(filter->testBytes("ad", 2));
EXPECT_TRUE(filter->testBytes("apple", 5));
EXPECT_TRUE(filter->testBytes("banana", 6));
EXPECT_TRUE(testBytes(*filter, "abc"));
EXPECT_TRUE(testBytes(*filter, "ad"));
EXPECT_TRUE(testBytes(*filter, "apple"));
EXPECT_TRUE(testBytes(*filter, "banana"));

EXPECT_FALSE(filter->testNull());
EXPECT_FALSE(filter->testBytes("ab", 2));
EXPECT_FALSE(filter->testBytes("_abc", 4));
EXPECT_FALSE(testBytes(*filter, "ab"));
EXPECT_FALSE(testBytes(*filter, "_abc"));

filter = between("apple", "banana");
EXPECT_TRUE(filter->testBytes("apple", 5));
EXPECT_TRUE(filter->testBytes("banana", 6));
EXPECT_TRUE(filter->testBytes("avocado", 7));
EXPECT_TRUE(testBytes(*filter, "apple"));
EXPECT_TRUE(testBytes(*filter, "banana"));
EXPECT_TRUE(testBytes(*filter, "avocado"));

EXPECT_FALSE(filter->testNull());
EXPECT_FALSE(filter->testBytes("camel", 5));
EXPECT_FALSE(filter->testBytes("_abc", 4));
EXPECT_FALSE(testBytes(*filter, "camel"));
EXPECT_FALSE(testBytes(*filter, "_abc"));

filter = std::make_unique<BytesRange>(
"apple", false, true, "banana", false, false, false);
EXPECT_TRUE(filter->testBytes("banana", 6));
EXPECT_TRUE(filter->testBytes("avocado", 7));
EXPECT_TRUE(testBytes(*filter, "banana"));
EXPECT_TRUE(testBytes(*filter, "avocado"));

EXPECT_FALSE(filter->testNull());
EXPECT_FALSE(filter->testBytes("apple", 5));
EXPECT_FALSE(filter->testBytes("camel", 5));
EXPECT_FALSE(filter->testBytes("_abc", 4));
EXPECT_FALSE(testBytes(*filter, "apple"));
EXPECT_FALSE(testBytes(*filter, "camel"));
EXPECT_FALSE(testBytes(*filter, "_abc"));

filter = std::make_unique<BytesRange>(
"apple", false, true, "banana", false, true, false);
EXPECT_TRUE(filter->testBytes("avocado", 7));
EXPECT_TRUE(testBytes(*filter, "avocado"));

EXPECT_FALSE(filter->testNull());
EXPECT_FALSE(filter->testBytes("apple", 5));
EXPECT_FALSE(filter->testBytes("banana", 6));
EXPECT_FALSE(filter->testBytes("camel", 5));
EXPECT_FALSE(filter->testBytes("_abc", 4));
EXPECT_FALSE(testBytes(*filter, "apple"));
EXPECT_FALSE(testBytes(*filter, "banana"));
EXPECT_FALSE(testBytes(*filter, "camel"));
EXPECT_FALSE(testBytes(*filter, "_abc"));

// < b
filter = lessThan("b");
EXPECT_TRUE(filter->testBytes("a", 1));
EXPECT_FALSE(filter->testBytes("b", 1));
EXPECT_FALSE(filter->testBytes("c", 1));
EXPECT_TRUE(filter->testBytes(nullptr, 0));
EXPECT_TRUE(testBytes(*filter, "a"));
EXPECT_FALSE(testBytes(*filter, "b"));
EXPECT_FALSE(testBytes(*filter, "c"));
EXPECT_TRUE(testBytes(*filter, {}));
EXPECT_FALSE(filter->testBytesRange("b", "c", false));

// <= b
filter = lessThanOrEqual("b");
EXPECT_TRUE(filter->testBytes("a", 1));
EXPECT_TRUE(filter->testBytes("b", 1));
EXPECT_FALSE(filter->testBytes("c", 1));
EXPECT_TRUE(filter->testBytes(nullptr, 0));
EXPECT_TRUE(testBytes(*filter, "a"));
EXPECT_TRUE(testBytes(*filter, "b"));
EXPECT_FALSE(testBytes(*filter, "c"));
EXPECT_TRUE(testBytes(*filter, {}));
EXPECT_TRUE(filter->testBytesRange("b", "c", false));

// >= b
filter = greaterThanOrEqual("b");
EXPECT_FALSE(filter->testBytes("a", 1));
EXPECT_TRUE(filter->testBytes("b", 1));
EXPECT_TRUE(filter->testBytes("c", 1));
EXPECT_FALSE(filter->testBytes(nullptr, 0));
EXPECT_FALSE(testBytes(*filter, "a"));
EXPECT_TRUE(testBytes(*filter, "b"));
EXPECT_TRUE(testBytes(*filter, "c"));
EXPECT_FALSE(testBytes(*filter, {}));
EXPECT_TRUE(filter->testBytesRange("a", "b", false));

// > b
filter = greaterThan("b");
EXPECT_FALSE(filter->testBytes("a", 1));
EXPECT_FALSE(filter->testBytes("b", 1));
EXPECT_TRUE(filter->testBytes("c", 1));
EXPECT_FALSE(filter->testBytes(nullptr, 0));
EXPECT_FALSE(testBytes(*filter, "a"));
EXPECT_FALSE(testBytes(*filter, "b"));
EXPECT_TRUE(testBytes(*filter, "c"));
EXPECT_FALSE(testBytes(*filter, {}));
EXPECT_FALSE(filter->testBytesRange("a", "b", false));

// < ''
filter = lessThan("");
EXPECT_FALSE(filter->testBytes(nullptr, 0));
EXPECT_FALSE(filter->testBytes("abc", 3));
EXPECT_FALSE(testBytes(*filter, {}));
EXPECT_FALSE(testBytes(*filter, "abc"));

// <= ''
filter = lessThanOrEqual("");
EXPECT_TRUE(filter->testBytes(nullptr, 0));
EXPECT_FALSE(filter->testBytes("abc", 3));
EXPECT_TRUE(testBytes(*filter, {}));
EXPECT_FALSE(testBytes(*filter, "abc"));

// > ''
filter = greaterThan("");
EXPECT_FALSE(filter->testBytes(nullptr, 0));
EXPECT_TRUE(filter->testBytes("abc", 3));
EXPECT_FALSE(testBytes(*filter, {}));
EXPECT_TRUE(testBytes(*filter, "abc"));

// >= ''
filter = greaterThanOrEqual("");
EXPECT_TRUE(filter->testBytes(nullptr, 0));
EXPECT_TRUE(filter->testBytes("abc", 3));
EXPECT_TRUE(testBytes(*filter, {}));
EXPECT_TRUE(testBytes(*filter, "abc"));
}

TEST(FilterTest, negatedBytesRange) {
auto filter = notBetween("a", "c");

EXPECT_TRUE(filter->testBytes("A", 1));
EXPECT_TRUE(filter->testBytes(nullptr, 0));
EXPECT_TRUE(filter->testBytes("ca", 2));
EXPECT_TRUE(filter->testBytes("z", 1));
EXPECT_TRUE(testBytes(*filter, "A"));
EXPECT_TRUE(testBytes(*filter, {}));
EXPECT_TRUE(testBytes(*filter, "ca"));
EXPECT_TRUE(testBytes(*filter, "z"));

EXPECT_FALSE(filter->testBytes("a", 1));
EXPECT_FALSE(filter->testBytes("apple", 5));
EXPECT_FALSE(filter->testBytes("c", 1));
EXPECT_FALSE(testBytes(*filter, "a"));
EXPECT_FALSE(testBytes(*filter, "apple"));
EXPECT_FALSE(testBytes(*filter, "c"));
EXPECT_FALSE(filter->testNull());

EXPECT_TRUE(filter->testLength(1));
Expand All @@ -974,8 +982,8 @@ TEST(FilterTest, negatedBytesRange) {
EXPECT_FALSE(filter->isUpperExclusive());

filter = notBetweenExclusive("b", "d");
EXPECT_TRUE(filter->testBytes("b", 1));
EXPECT_TRUE(filter->testBytes("d", 1));
EXPECT_TRUE(testBytes(*filter, "b"));
EXPECT_TRUE(testBytes(*filter, "d"));

EXPECT_TRUE(filter->testBytesRange("b", "c", false));
EXPECT_TRUE(filter->testBytesRange("c", "d", false));
Expand Down Expand Up @@ -2039,3 +2047,6 @@ TEST(FilterTest, timestampRange) {
EXPECT_TRUE(filter->testTimestampRange(
Timestamp(5, 123000000), Timestamp(30, 123000000), true));
}

} // namespace
} // namespace facebook::velox
Loading