Skip to content

Commit ad1f393

Browse files
pedroerpfacebook-github-bot
authored andcommitted
Enable more combinations in join fuzzer (#10676)
Summary: Pull Request resolved: #10676 Refactoring the isSupported() method for MergeJoin and NestedLoopJoin to centralize them, and enabling a few types not tested in JoinFuzzer today. Reviewed By: xiaoxmeng Differential Revision: D60797642 fbshipit-source-id: fcd4359777c4a7edbc67a1198142dba852bfc17b
1 parent f40fa8a commit ad1f393

File tree

5 files changed

+71
-45
lines changed

5 files changed

+71
-45
lines changed

velox/core/PlanNode.cpp

Lines changed: 56 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1093,10 +1093,50 @@ PlanNodePtr HashJoinNode::create(const folly::dynamic& obj, void* context) {
10931093
outputType);
10941094
}
10951095

1096+
MergeJoinNode::MergeJoinNode(
1097+
const PlanNodeId& id,
1098+
JoinType joinType,
1099+
const std::vector<FieldAccessTypedExprPtr>& leftKeys,
1100+
const std::vector<FieldAccessTypedExprPtr>& rightKeys,
1101+
TypedExprPtr filter,
1102+
PlanNodePtr left,
1103+
PlanNodePtr right,
1104+
RowTypePtr outputType)
1105+
: AbstractJoinNode(
1106+
id,
1107+
joinType,
1108+
leftKeys,
1109+
rightKeys,
1110+
std::move(filter),
1111+
std::move(left),
1112+
std::move(right),
1113+
std::move(outputType)) {
1114+
VELOX_USER_CHECK(
1115+
isSupported(joinType_),
1116+
"The join type is not supported by merge join: ",
1117+
joinTypeName(joinType_));
1118+
}
1119+
10961120
folly::dynamic MergeJoinNode::serialize() const {
10971121
return serializeBase();
10981122
}
10991123

1124+
// static
1125+
bool MergeJoinNode::isSupported(core::JoinType joinType) {
1126+
switch (joinType) {
1127+
case core::JoinType::kInner:
1128+
case core::JoinType::kLeft:
1129+
case core::JoinType::kRight:
1130+
case core::JoinType::kLeftSemiFilter:
1131+
case core::JoinType::kRightSemiFilter:
1132+
case core::JoinType::kAnti:
1133+
return true;
1134+
1135+
default:
1136+
return false;
1137+
}
1138+
}
1139+
11001140
// static
11011141
PlanNodePtr MergeJoinNode::create(const folly::dynamic& obj, void* context) {
11021142
auto sources = deserializeSources(obj, context);
@@ -1136,9 +1176,8 @@ NestedLoopJoinNode::NestedLoopJoinNode(
11361176
sources_({std::move(left), std::move(right)}),
11371177
outputType_(std::move(outputType)) {
11381178
VELOX_USER_CHECK(
1139-
core::isInnerJoin(joinType_) || core::isLeftJoin(joinType_) ||
1140-
core::isRightJoin(joinType_) || core::isFullJoin(joinType_),
1141-
"{} unsupported, NestedLoopJoin only supports inner and outer join",
1179+
isSupported(joinType_),
1180+
"The join type is not supported by nested loop join: ",
11421181
joinTypeName(joinType_));
11431182

11441183
auto leftType = sources_[0]->outputType();
@@ -1170,6 +1209,20 @@ NestedLoopJoinNode::NestedLoopJoinNode(
11701209
right,
11711210
outputType) {}
11721211

1212+
// static
1213+
bool NestedLoopJoinNode::isSupported(core::JoinType joinType) {
1214+
switch (joinType) {
1215+
case core::JoinType::kInner:
1216+
case core::JoinType::kLeft:
1217+
case core::JoinType::kRight:
1218+
case core::JoinType::kFull:
1219+
return true;
1220+
1221+
default:
1222+
return false;
1223+
}
1224+
}
1225+
11731226
void NestedLoopJoinNode::addDetails(std::stringstream& stream) const {
11741227
stream << joinTypeName(joinType_);
11751228
if (joinCondition_) {

velox/core/PlanNode.h

Lines changed: 9 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,7 @@ namespace facebook::velox::core {
2727

2828
typedef std::string PlanNodeId;
2929

30-
/**
31-
* Generic representation of InsertTable
32-
*/
30+
/// Generic representation of InsertTable
3331
struct InsertTableHandle {
3432
public:
3533
InsertTableHandle(
@@ -1643,31 +1641,25 @@ class MergeJoinNode : public AbstractJoinNode {
16431641
TypedExprPtr filter,
16441642
PlanNodePtr left,
16451643
PlanNodePtr right,
1646-
RowTypePtr outputType)
1647-
: AbstractJoinNode(
1648-
id,
1649-
joinType,
1650-
leftKeys,
1651-
rightKeys,
1652-
std::move(filter),
1653-
std::move(left),
1654-
std::move(right),
1655-
std::move(outputType)) {}
1644+
RowTypePtr outputType);
16561645

16571646
std::string_view name() const override {
16581647
return "MergeJoin";
16591648
}
16601649

16611650
folly::dynamic serialize() const override;
16621651

1652+
/// If merge join supports this join type.
1653+
static bool isSupported(core::JoinType joinType);
1654+
16631655
static PlanNodePtr create(const folly::dynamic& obj, void* context);
16641656
};
16651657

16661658
/// Represents inner/outer nested loop joins. Translates to an
16671659
/// exec::NestedLoopJoinProbe and exec::NestedLoopJoinBuild. A separate pipeline
16681660
/// is produced for the build side when generating exec::Operators.
16691661
///
1670-
/// Nested loop join supports both equal and non-equal joins. Expressions
1662+
/// Nested loop join (NLJ) supports both equal and non-equal joins. Expressions
16711663
/// specified in joinCondition are evaluated on every combination of left/right
16721664
/// tuple, to emit result. Results are emitted following the same input order of
16731665
/// probe rows for inner and left joins, for each thread of execution.
@@ -1712,6 +1704,9 @@ class NestedLoopJoinNode : public PlanNode {
17121704

17131705
folly::dynamic serialize() const override;
17141706

1707+
/// If nested loop join supports this join type.
1708+
static bool isSupported(core::JoinType joinType);
1709+
17151710
static PlanNodePtr create(const folly::dynamic& obj, void* context);
17161711

17171712
private:

velox/exec/MergeJoin.cpp

Lines changed: 1 addition & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -35,27 +35,11 @@ MergeJoin::MergeJoin(
3535
numKeys_{joinNode->leftKeys().size()},
3636
joinNode_(joinNode) {
3737
VELOX_USER_CHECK(
38-
isSupported(joinNode_->joinType()),
38+
core::MergeJoinNode::isSupported(joinNode_->joinType()),
3939
"The join type is not supported by merge join: ",
4040
joinTypeName(joinNode_->joinType()));
4141
}
4242

43-
// static
44-
bool MergeJoin::isSupported(core::JoinType joinType) {
45-
switch (joinType) {
46-
case core::JoinType::kInner:
47-
case core::JoinType::kLeft:
48-
case core::JoinType::kRight:
49-
case core::JoinType::kLeftSemiFilter:
50-
case core::JoinType::kRightSemiFilter:
51-
case core::JoinType::kAnti:
52-
return true;
53-
54-
default:
55-
return false;
56-
}
57-
}
58-
5943
void MergeJoin::initialize() {
6044
Operator::initialize();
6145
VELOX_CHECK_NOT_NULL(joinNode_);

velox/exec/MergeJoin.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -68,9 +68,6 @@ class MergeJoin : public Operator {
6868
Operator::close();
6969
}
7070

71-
/// If merge join supports this join type.
72-
static bool isSupported(core::JoinType joinType);
73-
7471
private:
7572
// Sets up 'filter_' and related member variables.
7673
void initializeFilter(

velox/exec/fuzzer/JoinFuzzer.cpp

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
#include "velox/connectors/hive/HiveConnector.h"
2020
#include "velox/connectors/hive/HiveConnectorSplit.h"
2121
#include "velox/connectors/hive/PartitionIdGenerator.h"
22-
#include "velox/exec/MergeJoin.h"
2322
#include "velox/exec/OperatorUtils.h"
2423
#include "velox/exec/fuzzer/FuzzerUtil.h"
2524
#include "velox/exec/fuzzer/ReferenceQueryRunner.h"
@@ -148,7 +147,7 @@ class JoinFuzzer {
148147
const std::vector<std::string>& outputColumns);
149148

150149
// Returns a PlanWithSplits for NestedLoopJoin with inputs from Values nodes.
151-
// If withFilter is true, uses the equiality filter between probeKeys and
150+
// If withFilter is true, uses the equality filter between probeKeys and
152151
// buildKeys as the join filter. Uses empty join filter otherwise.
153152
JoinFuzzer::PlanWithSplits makeNestedLoopJoinPlan(
154153
core::JoinType joinType,
@@ -860,7 +859,7 @@ void JoinFuzzer::makeAlternativePlans(
860859
.planNode()});
861860

862861
// Use OrderBy + MergeJoin
863-
if (exec::MergeJoin::isSupported(joinNode->joinType())) {
862+
if (core::MergeJoinNode::isSupported(joinNode->joinType())) {
864863
auto planWithSplits = makeMergeJoinPlan(
865864
joinType, probeKeys, buildKeys, probeInput, buildInput, outputColumns);
866865
plans.push_back(planWithSplits);
@@ -869,8 +868,7 @@ void JoinFuzzer::makeAlternativePlans(
869868
}
870869

871870
// Use NestedLoopJoin.
872-
if (joinNode->isInnerJoin() || joinNode->isLeftJoin() ||
873-
joinNode->isFullJoin()) {
871+
if (core::NestedLoopJoinNode::isSupported(joinNode->joinType())) {
874872
auto planWithSplits = makeNestedLoopJoinPlan(
875873
joinType, probeKeys, buildKeys, probeInput, buildInput, outputColumns);
876874
plans.push_back(planWithSplits);
@@ -1285,7 +1283,7 @@ void JoinFuzzer::addPlansWithTableScan(
12851283
}
12861284

12871285
// Add ungrouped MergeJoin with TableScan.
1288-
if (joinNode->isInnerJoin() || joinNode->isLeftJoin()) {
1286+
if (core::MergeJoinNode::isSupported(joinNode->joinType())) {
12891287
auto planWithSplits = makeMergeJoinPlanWithTableScan(
12901288
joinType,
12911289
probeType,
@@ -1307,8 +1305,7 @@ void JoinFuzzer::addPlansWithTableScan(
13071305
}
13081306

13091307
// Add ungrouped NestedLoopJoin with TableScan.
1310-
if (joinNode->isInnerJoin() || joinNode->isLeftJoin() ||
1311-
joinNode->isFullJoin()) {
1308+
if (core::NestedLoopJoinNode::isSupported(joinNode->joinType())) {
13121309
auto planWithSplits = makeNestedLoopJoinPlanWithTableScan(
13131310
joinType,
13141311
probeType,

0 commit comments

Comments
 (0)