diff --git a/velox/type/Filter.h b/velox/type/Filter.h index ee1a5f74d72d..1a7b47608b53 100644 --- a/velox/type/Filter.h +++ b/velox/type/Filter.h @@ -200,6 +200,10 @@ class Filter : public velox::ISerializable { VELOX_UNSUPPORTED("{}: testBytes() is not supported.", toString()); } + virtual bool testStringView(const StringView& view) const { + return testBytes(view.data(), view.size()); + } + virtual bool testTimestamp(const Timestamp& /* unused */) const { VELOX_UNSUPPORTED("{}: testTimestamp() is not supported.", toString()); } @@ -1700,6 +1704,8 @@ class BytesRange final : public AbstractRange { FilterKind::kBytesRange), lower_(std::move(lower)), upper_(std::move(upper)), + lowerView_(lower_), + upperView_(upper_), singleValue_( !lowerExclusive_ && !upperExclusive_ && !lowerUnbounded_ && !upperUnbounded_ && lower_ == upper_) { @@ -1717,6 +1723,8 @@ class BytesRange final : public AbstractRange { FilterKind::kBytesRange), lower_(other.lower_), upper_(other.upper_), + lowerView_(lower_), + upperView_(upper_), singleValue_(other.singleValue_) {} folly::dynamic serialize() const override; @@ -1744,6 +1752,35 @@ class BytesRange final : public AbstractRange { bool testBytes(const char* value, int32_t length) const final; + bool testStringView(const StringView& view) const final { + if (singleValue_) { + return view == lowerView_; + } + if (!lowerUnbounded_) { + if (lowerExclusive_) { + if (view <= lowerView_) { + return false; + } + } else { + if (view < lowerView_) { + return false; + } + } + } + if (!upperUnbounded_) { + if (upperExclusive_) { + if (view >= upperView_) { + return false; + } + } else { + if (view > upperView_) { + return false; + } + } + } + return true; + } + bool testBytesRange( std::optional min, std::optional max, @@ -1798,6 +1835,8 @@ class BytesRange final : public AbstractRange { private: const std::string lower_; const std::string upper_; + const StringView lowerView_; + const StringView upperView_; const bool singleValue_; }; @@ -1856,6 +1895,10 @@ class NegatedBytesRange final : public Filter { return !nonNegated_->testBytes(value, length); } + bool testStringView(const StringView& view) const final { + return !nonNegated_->testStringView(view); + } + bool testBytesRange( std::optional min, std::optional max, @@ -2256,7 +2299,7 @@ static inline bool applyFilter(TFilter& filter, std::string_view value) { template static inline bool applyFilter(TFilter& filter, StringView value) { - return filter.testBytes(value.data(), value.size()); + return filter.testStringView(value); } // Creates a hash or bitmap based IN filter depending on value distribution. diff --git a/velox/type/tests/FilterTest.cpp b/velox/type/tests/FilterTest.cpp index ac78b5f86f41..1fe37d37d09b 100644 --- a/velox/type/tests/FilterTest.cpp +++ b/velox/type/tests/FilterTest.cpp @@ -26,7 +26,9 @@ #include -using namespace facebook::velox; +namespace facebook::velox { +namespace { + using namespace facebook::velox::common; using namespace facebook::velox::exec; @@ -792,11 +794,17 @@ TEST(FilterTest, floatRange) { EXPECT_TRUE(filter->testFloat(100)); } +bool testBytes(const Filter& filter, std::string_view value) { + bool result = filter.testBytes(value.data(), value.size()); + VELOX_CHECK_EQ(filter.testStringView(StringView(value)), result); + return result; +} + TEST(FilterTest, bytesRange) { { auto filter = equal("abc"); - EXPECT_TRUE(filter->testBytes("abc", 3)); - EXPECT_FALSE(filter->testBytes("acb", 3)); + EXPECT_TRUE(testBytes(*filter, "abc")); + EXPECT_FALSE(testBytes(*filter, "acb")); EXPECT_TRUE(filter->testLength(3)); // The bit for lane 2 should be set. int32_t lens[] = {0, 1, 3, 0, 4, 10, 11, 12}; @@ -804,8 +812,8 @@ TEST(FilterTest, bytesRange) { 4, simd::toBitMask(filter->testLengths(xsimd::load_unaligned(lens)))); EXPECT_FALSE(filter->testNull()); - EXPECT_FALSE(filter->testBytes("apple", 5)); - EXPECT_FALSE(filter->testBytes(nullptr, 0)); + EXPECT_FALSE(testBytes(*filter, "apple")); + EXPECT_FALSE(testBytes(*filter, {})); EXPECT_FALSE(filter->testLength(4)); EXPECT_TRUE(filter->testBytesRange("abc", "abc", false)); @@ -824,132 +832,132 @@ TEST(FilterTest, bytesRange) { // = '' filter = equal(""); - EXPECT_TRUE(filter->testBytes(nullptr, 0)); - EXPECT_FALSE(filter->testBytes("abc", 3)); + EXPECT_TRUE(testBytes(*filter, {})); + EXPECT_FALSE(testBytes(*filter, "abc")); } char const* theBestOfTimes = "It was the best of times, it was the worst of times, it was the age of wisdom, it was the age of foolishness, it was the epoch of belief, it was the epoch of incredulity,..."; auto filter = lessThanOrEqual(theBestOfTimes); - EXPECT_TRUE(filter->testBytes(theBestOfTimes, std::strlen(theBestOfTimes))); - EXPECT_TRUE(filter->testBytes(theBestOfTimes, 5)); - EXPECT_TRUE(filter->testBytes(theBestOfTimes, 50)); - EXPECT_TRUE(filter->testBytes(theBestOfTimes, 100)); + EXPECT_TRUE(testBytes(*filter, theBestOfTimes)); + EXPECT_TRUE(testBytes(*filter, {theBestOfTimes, 5})); + EXPECT_TRUE(testBytes(*filter, {theBestOfTimes, 50})); + EXPECT_TRUE(testBytes(*filter, {theBestOfTimes, 100})); // testLength is true of all lengths for a range filter. EXPECT_TRUE(filter->testLength(1)); EXPECT_TRUE(filter->testLength(1000)); EXPECT_FALSE(filter->testNull()); - EXPECT_FALSE(filter->testBytes("Zzz", 3)); - EXPECT_FALSE(filter->testBytes("It was the best of times, zzz", 30)); + EXPECT_FALSE(testBytes(*filter, "Zzz")); + EXPECT_FALSE(testBytes(*filter, {"It was the best of times, zzz", 30})); EXPECT_TRUE(filter->testBytesRange("Apple", "banana", false)); EXPECT_FALSE(filter->testBytesRange("Pear", "Plum", false)); EXPECT_FALSE(filter->testBytesRange("apple", "banana", false)); filter = greaterThanOrEqual("abc"); - EXPECT_TRUE(filter->testBytes("abc", 3)); - EXPECT_TRUE(filter->testBytes("ad", 2)); - EXPECT_TRUE(filter->testBytes("apple", 5)); - EXPECT_TRUE(filter->testBytes("banana", 6)); + EXPECT_TRUE(testBytes(*filter, "abc")); + EXPECT_TRUE(testBytes(*filter, "ad")); + EXPECT_TRUE(testBytes(*filter, "apple")); + EXPECT_TRUE(testBytes(*filter, "banana")); EXPECT_FALSE(filter->testNull()); - EXPECT_FALSE(filter->testBytes("ab", 2)); - EXPECT_FALSE(filter->testBytes("_abc", 4)); + EXPECT_FALSE(testBytes(*filter, "ab")); + EXPECT_FALSE(testBytes(*filter, "_abc")); filter = between("apple", "banana"); - EXPECT_TRUE(filter->testBytes("apple", 5)); - EXPECT_TRUE(filter->testBytes("banana", 6)); - EXPECT_TRUE(filter->testBytes("avocado", 7)); + EXPECT_TRUE(testBytes(*filter, "apple")); + EXPECT_TRUE(testBytes(*filter, "banana")); + EXPECT_TRUE(testBytes(*filter, "avocado")); EXPECT_FALSE(filter->testNull()); - EXPECT_FALSE(filter->testBytes("camel", 5)); - EXPECT_FALSE(filter->testBytes("_abc", 4)); + EXPECT_FALSE(testBytes(*filter, "camel")); + EXPECT_FALSE(testBytes(*filter, "_abc")); filter = std::make_unique( "apple", false, true, "banana", false, false, false); - EXPECT_TRUE(filter->testBytes("banana", 6)); - EXPECT_TRUE(filter->testBytes("avocado", 7)); + EXPECT_TRUE(testBytes(*filter, "banana")); + EXPECT_TRUE(testBytes(*filter, "avocado")); EXPECT_FALSE(filter->testNull()); - EXPECT_FALSE(filter->testBytes("apple", 5)); - EXPECT_FALSE(filter->testBytes("camel", 5)); - EXPECT_FALSE(filter->testBytes("_abc", 4)); + EXPECT_FALSE(testBytes(*filter, "apple")); + EXPECT_FALSE(testBytes(*filter, "camel")); + EXPECT_FALSE(testBytes(*filter, "_abc")); filter = std::make_unique( "apple", false, true, "banana", false, true, false); - EXPECT_TRUE(filter->testBytes("avocado", 7)); + EXPECT_TRUE(testBytes(*filter, "avocado")); EXPECT_FALSE(filter->testNull()); - EXPECT_FALSE(filter->testBytes("apple", 5)); - EXPECT_FALSE(filter->testBytes("banana", 6)); - EXPECT_FALSE(filter->testBytes("camel", 5)); - EXPECT_FALSE(filter->testBytes("_abc", 4)); + EXPECT_FALSE(testBytes(*filter, "apple")); + EXPECT_FALSE(testBytes(*filter, "banana")); + EXPECT_FALSE(testBytes(*filter, "camel")); + EXPECT_FALSE(testBytes(*filter, "_abc")); // < b filter = lessThan("b"); - EXPECT_TRUE(filter->testBytes("a", 1)); - EXPECT_FALSE(filter->testBytes("b", 1)); - EXPECT_FALSE(filter->testBytes("c", 1)); - EXPECT_TRUE(filter->testBytes(nullptr, 0)); + EXPECT_TRUE(testBytes(*filter, "a")); + EXPECT_FALSE(testBytes(*filter, "b")); + EXPECT_FALSE(testBytes(*filter, "c")); + EXPECT_TRUE(testBytes(*filter, {})); EXPECT_FALSE(filter->testBytesRange("b", "c", false)); // <= b filter = lessThanOrEqual("b"); - EXPECT_TRUE(filter->testBytes("a", 1)); - EXPECT_TRUE(filter->testBytes("b", 1)); - EXPECT_FALSE(filter->testBytes("c", 1)); - EXPECT_TRUE(filter->testBytes(nullptr, 0)); + EXPECT_TRUE(testBytes(*filter, "a")); + EXPECT_TRUE(testBytes(*filter, "b")); + EXPECT_FALSE(testBytes(*filter, "c")); + EXPECT_TRUE(testBytes(*filter, {})); EXPECT_TRUE(filter->testBytesRange("b", "c", false)); // >= b filter = greaterThanOrEqual("b"); - EXPECT_FALSE(filter->testBytes("a", 1)); - EXPECT_TRUE(filter->testBytes("b", 1)); - EXPECT_TRUE(filter->testBytes("c", 1)); - EXPECT_FALSE(filter->testBytes(nullptr, 0)); + EXPECT_FALSE(testBytes(*filter, "a")); + EXPECT_TRUE(testBytes(*filter, "b")); + EXPECT_TRUE(testBytes(*filter, "c")); + EXPECT_FALSE(testBytes(*filter, {})); EXPECT_TRUE(filter->testBytesRange("a", "b", false)); // > b filter = greaterThan("b"); - EXPECT_FALSE(filter->testBytes("a", 1)); - EXPECT_FALSE(filter->testBytes("b", 1)); - EXPECT_TRUE(filter->testBytes("c", 1)); - EXPECT_FALSE(filter->testBytes(nullptr, 0)); + EXPECT_FALSE(testBytes(*filter, "a")); + EXPECT_FALSE(testBytes(*filter, "b")); + EXPECT_TRUE(testBytes(*filter, "c")); + EXPECT_FALSE(testBytes(*filter, {})); EXPECT_FALSE(filter->testBytesRange("a", "b", false)); // < '' filter = lessThan(""); - EXPECT_FALSE(filter->testBytes(nullptr, 0)); - EXPECT_FALSE(filter->testBytes("abc", 3)); + EXPECT_FALSE(testBytes(*filter, {})); + EXPECT_FALSE(testBytes(*filter, "abc")); // <= '' filter = lessThanOrEqual(""); - EXPECT_TRUE(filter->testBytes(nullptr, 0)); - EXPECT_FALSE(filter->testBytes("abc", 3)); + EXPECT_TRUE(testBytes(*filter, {})); + EXPECT_FALSE(testBytes(*filter, "abc")); // > '' filter = greaterThan(""); - EXPECT_FALSE(filter->testBytes(nullptr, 0)); - EXPECT_TRUE(filter->testBytes("abc", 3)); + EXPECT_FALSE(testBytes(*filter, {})); + EXPECT_TRUE(testBytes(*filter, "abc")); // >= '' filter = greaterThanOrEqual(""); - EXPECT_TRUE(filter->testBytes(nullptr, 0)); - EXPECT_TRUE(filter->testBytes("abc", 3)); + EXPECT_TRUE(testBytes(*filter, {})); + EXPECT_TRUE(testBytes(*filter, "abc")); } TEST(FilterTest, negatedBytesRange) { auto filter = notBetween("a", "c"); - EXPECT_TRUE(filter->testBytes("A", 1)); - EXPECT_TRUE(filter->testBytes(nullptr, 0)); - EXPECT_TRUE(filter->testBytes("ca", 2)); - EXPECT_TRUE(filter->testBytes("z", 1)); + EXPECT_TRUE(testBytes(*filter, "A")); + EXPECT_TRUE(testBytes(*filter, {})); + EXPECT_TRUE(testBytes(*filter, "ca")); + EXPECT_TRUE(testBytes(*filter, "z")); - EXPECT_FALSE(filter->testBytes("a", 1)); - EXPECT_FALSE(filter->testBytes("apple", 5)); - EXPECT_FALSE(filter->testBytes("c", 1)); + EXPECT_FALSE(testBytes(*filter, "a")); + EXPECT_FALSE(testBytes(*filter, "apple")); + EXPECT_FALSE(testBytes(*filter, "c")); EXPECT_FALSE(filter->testNull()); EXPECT_TRUE(filter->testLength(1)); @@ -974,8 +982,8 @@ TEST(FilterTest, negatedBytesRange) { EXPECT_FALSE(filter->isUpperExclusive()); filter = notBetweenExclusive("b", "d"); - EXPECT_TRUE(filter->testBytes("b", 1)); - EXPECT_TRUE(filter->testBytes("d", 1)); + EXPECT_TRUE(testBytes(*filter, "b")); + EXPECT_TRUE(testBytes(*filter, "d")); EXPECT_TRUE(filter->testBytesRange("b", "c", false)); EXPECT_TRUE(filter->testBytesRange("c", "d", false)); @@ -2039,3 +2047,6 @@ TEST(FilterTest, timestampRange) { EXPECT_TRUE(filter->testTimestampRange( Timestamp(5, 123000000), Timestamp(30, 123000000), true)); } + +} // namespace +} // namespace facebook::velox