diff --git a/README.md b/README.md index 1145db45..c327bdd3 100755 --- a/README.md +++ b/README.md @@ -88,15 +88,17 @@ run `cargo run -p tpcc --release` to run tpcc - 32.0 GB - KIOXIA-EXCERIA PLUS G3 SSD - Tips: TPC-C currently only supports single thread + +All cases have been fully optimized. ```shell <90th Percentile RT (MaxRT)> - New-Order : 0.002 (0.005) - Payment : 0.001 (0.003) -Order-Status : 0.057 (0.088) + New-Order : 0.002 (0.012) + Payment : 0.001 (0.002) +Order-Status : 0.002 (0.019) Delivery : 0.001 (0.001) - Stock-Level : 0.002 (0.006) + Stock-Level : 0.002 (0.018) -11125 Tpmc +37166 Tpmc ``` #### 👉[check more](tpcc/README.md) diff --git a/src/binder/aggregate.rs b/src/binder/aggregate.rs index d9632afd..7c74afb2 100644 --- a/src/binder/aggregate.rs +++ b/src/binder/aggregate.rs @@ -98,7 +98,7 @@ impl> Binder<'_, '_, T, A> return_orderby.push(SortField::new( expr, asc.is_none_or(|asc| asc), - nulls_first.is_some_and(|first| first), + nulls_first.unwrap_or(true), )); } Some(return_orderby) diff --git a/src/binder/alter_table.rs b/src/binder/alter_table.rs index fb41e175..1f1fc218 100644 --- a/src/binder/alter_table.rs +++ b/src/binder/alter_table.rs @@ -44,7 +44,7 @@ impl> Binder<'_, '_, T, A> if_not_exists, column_def, } => { - let plan = TableScanOperator::build(table_name.clone(), table, true); + let plan = TableScanOperator::build(table_name.clone(), table, true)?; let column = self.bind_column(column_def, None)?; if !is_valid_identifier(column.name()) { @@ -66,7 +66,7 @@ impl> Binder<'_, '_, T, A> if_exists, .. } => { - let plan = TableScanOperator::build(table_name.clone(), table, true); + let plan = TableScanOperator::build(table_name.clone(), table, true)?; let column_name = column_name.value.clone(); LogicalPlan::new( diff --git a/src/binder/analyze.rs b/src/binder/analyze.rs index cd2747c7..52168585 100644 --- a/src/binder/analyze.rs +++ b/src/binder/analyze.rs @@ -40,7 +40,7 @@ impl> Binder<'_, '_, T, A> .ok_or(DatabaseError::TableNotFound)?; let index_metas = table.indexes.clone(); - let scan_op = TableScanOperator::build(table_name.clone(), table, false); + let scan_op = TableScanOperator::build(table_name.clone(), table, false)?; Ok(LogicalPlan::new( Operator::Analyze(AnalyzeOperator { table_name, diff --git a/src/binder/copy.rs b/src/binder/copy.rs index 60fa5aed..5ea67653 100644 --- a/src/binder/copy.rs +++ b/src/binder/copy.rs @@ -118,7 +118,9 @@ impl> Binder<'_, '_, T, A> target: ext_source, schema_ref, }), - Childrens::Only(Box::new(TableScanOperator::build(table_name, table, false))), + Childrens::Only(Box::new(TableScanOperator::build( + table_name, table, false, + )?)), )) } else { // COPY FROM diff --git a/src/binder/create_index.rs b/src/binder/create_index.rs index 03e87b46..06d75df8 100644 --- a/src/binder/create_index.rs +++ b/src/binder/create_index.rs @@ -49,7 +49,7 @@ impl> Binder<'_, '_, T, A> .source_and_bind(table_name.clone(), None, None, false)? .ok_or(DatabaseError::SourceNotFound)?; let plan = match source { - Source::Table(table) => TableScanOperator::build(table_name.clone(), table, true), + Source::Table(table) => TableScanOperator::build(table_name.clone(), table, true)?, Source::View(view) => LogicalPlan::clone(&view.plan), }; let mut columns = Vec::with_capacity(exprs.len()); diff --git a/src/binder/delete.rs b/src/binder/delete.rs index b9fb3fd0..76f5826b 100644 --- a/src/binder/delete.rs +++ b/src/binder/delete.rs @@ -51,7 +51,7 @@ impl> Binder<'_, '_, T, A> .iter() .map(|(_, column)| column.clone()) .collect_vec(); - let mut plan = TableScanOperator::build(table_name.clone(), table, true); + let mut plan = TableScanOperator::build(table_name.clone(), table, true)?; if let Some(alias_idents) = alias_idents { plan = diff --git a/src/binder/select.rs b/src/binder/select.rs index 9d2381c5..a1b18b39 100644 --- a/src/binder/select.rs +++ b/src/binder/select.rs @@ -541,7 +541,7 @@ impl<'a: 'b, 'b, T: Transaction, A: AsRef<[(&'static str, DataValue)]>> Binder<' .source_and_bind(table_name.clone(), table_alias.as_ref(), join_type, false)? .ok_or(DatabaseError::SourceNotFound)?; let mut plan = match source { - Source::Table(table) => TableScanOperator::build(table_name.clone(), table, with_pk), + Source::Table(table) => TableScanOperator::build(table_name.clone(), table, with_pk)?, Source::View(view) => LogicalPlan::clone(&view.plan), }; diff --git a/src/catalog/table.rs b/src/catalog/table.rs index d024c825..b041b420 100644 --- a/src/catalog/table.rs +++ b/src/catalog/table.rs @@ -38,7 +38,7 @@ pub struct TableCatalog { schema_ref: SchemaRef, primary_keys: Vec<(usize, ColumnRef)>, primary_key_indices: PrimaryKeyIndices, - primary_key_type: Option, + primary_key_type: LogicalType, } //TODO: can add some like Table description and other information as attributes @@ -99,6 +99,10 @@ impl TableCatalog { &self.primary_keys } + pub(crate) fn primary_keys_type(&self) -> &LogicalType { + &self.primary_key_type + } + pub(crate) fn primary_keys_indices(&self) -> &PrimaryKeyIndices { &self.primary_key_indices } @@ -144,23 +148,7 @@ impl TableCatalog { } let index_id = self.indexes.last().map(|index| index.id + 1).unwrap_or(0); - let pk_ty = self - .primary_key_type - .get_or_insert_with(|| { - let primary_keys = &self.primary_keys; - - if primary_keys.len() == 1 { - primary_keys[0].1.datatype().clone() - } else { - LogicalType::Tuple( - primary_keys - .iter() - .map(|(_, column)| column.datatype().clone()) - .collect_vec(), - ) - } - }) - .clone(); + let pk_ty = self.primary_key_type.clone(); let mut val_tys = Vec::with_capacity(column_ids.len()); for column_id in column_ids.iter() { @@ -205,7 +193,7 @@ impl TableCatalog { schema_ref: Arc::new(vec![]), primary_keys: vec![], primary_key_indices: Default::default(), - primary_key_type: None, + primary_key_type: LogicalType::SqlNull, }; let mut generator = Generator::new(); for col_catalog in columns.into_iter() { @@ -216,12 +204,26 @@ impl TableCatalog { let (primary_keys, primary_key_indices) = Self::build_primary_keys(&table_catalog.schema_ref); + table_catalog.primary_key_type = Self::build_primary_key_type(&primary_keys); table_catalog.primary_keys = primary_keys; table_catalog.primary_key_indices = primary_key_indices; Ok(table_catalog) } + fn build_primary_key_type(primary_keys: &[(usize, ColumnRef)]) -> LogicalType { + if primary_keys.len() == 1 { + primary_keys[0].1.datatype().clone() + } else { + LogicalType::Tuple( + primary_keys + .iter() + .map(|(_, column)| column.datatype().clone()) + .collect_vec(), + ) + } + } + pub(crate) fn reload( name: TableName, column_refs: Vec, @@ -240,6 +242,7 @@ impl TableCatalog { } let schema_ref = Arc::new(column_refs.clone()); let (primary_keys, primary_key_indices) = Self::build_primary_keys(&schema_ref); + let primary_key_type = Self::build_primary_key_type(&primary_keys); Ok(TableCatalog { name, @@ -249,7 +252,7 @@ impl TableCatalog { schema_ref, primary_keys, primary_key_indices, - primary_key_type: None, + primary_key_type, }) } diff --git a/src/db.rs b/src/db.rs index cc2c1546..0677d721 100644 --- a/src/db.rs +++ b/src/db.rs @@ -26,7 +26,7 @@ use crate::function::numbers::Numbers; use crate::function::octet_length::OctetLength; use crate::function::upper::Upper; use crate::optimizer::heuristic::batch::HepBatchStrategy; -use crate::optimizer::heuristic::optimizer::HepOptimizer; +use crate::optimizer::heuristic::optimizer::HepOptimizerPipeline; use crate::optimizer::rule::implementation::ImplementationRuleImpl; use crate::optimizer::rule::normalization::NormalizationRuleImpl; use crate::parser::parse_sql; @@ -148,18 +148,112 @@ impl DataBaseBuilder { meta_cache, table_cache, view_cache, + optimizer_pipeline: default_optimizer_pipeline(), _p: Default::default(), }), }) } } +fn default_optimizer_pipeline() -> HepOptimizerPipeline { + HepOptimizerPipeline::builder() + .before_batch( + "Column Pruning".to_string(), + HepBatchStrategy::once_topdown(), + vec![NormalizationRuleImpl::ColumnPruning], + ) + .before_batch( + "Simplify Filter".to_string(), + HepBatchStrategy::fix_point_topdown(10), + vec![ + NormalizationRuleImpl::SimplifyFilter, + NormalizationRuleImpl::ConstantCalculation, + ], + ) + .before_batch( + "Predicate Pushdown".to_string(), + HepBatchStrategy::fix_point_topdown(10), + vec![ + NormalizationRuleImpl::PushPredicateThroughJoin, + NormalizationRuleImpl::PushJoinPredicateIntoScan, + NormalizationRuleImpl::PushPredicateIntoScan, + ], + ) + .before_batch( + "Limit Pushdown".to_string(), + HepBatchStrategy::fix_point_topdown(10), + vec![ + NormalizationRuleImpl::LimitProjectTranspose, + NormalizationRuleImpl::PushLimitThroughJoin, + NormalizationRuleImpl::PushLimitIntoTableScan, + ], + ) + .before_batch( + "Combine Operators".to_string(), + HepBatchStrategy::fix_point_topdown(10), + vec![ + NormalizationRuleImpl::CollapseProject, + NormalizationRuleImpl::CollapseGroupByAgg, + NormalizationRuleImpl::CombineFilter, + ], + ) + .before_batch( + "TopK".to_string(), + HepBatchStrategy::once_topdown(), + vec![NormalizationRuleImpl::TopK], + ) + .after_batch( + "Eliminate Redundant Sort".to_string(), + HepBatchStrategy::once_topdown(), + vec![NormalizationRuleImpl::EliminateRedundantSort], + ) + .after_batch( + "Expression Remapper".to_string(), + HepBatchStrategy::once_topdown(), + vec![ + NormalizationRuleImpl::BindExpressionPosition, + NormalizationRuleImpl::EvaluatorBind, + ], + ) + .implementations(vec![ + // DQL + ImplementationRuleImpl::SimpleAggregate, + ImplementationRuleImpl::GroupByAggregate, + ImplementationRuleImpl::Dummy, + ImplementationRuleImpl::Filter, + ImplementationRuleImpl::HashJoin, + ImplementationRuleImpl::Limit, + ImplementationRuleImpl::Projection, + ImplementationRuleImpl::SeqScan, + ImplementationRuleImpl::IndexScan, + ImplementationRuleImpl::FunctionScan, + ImplementationRuleImpl::Sort, + ImplementationRuleImpl::TopK, + ImplementationRuleImpl::Values, + // DML + ImplementationRuleImpl::Analyze, + ImplementationRuleImpl::CopyFromFile, + ImplementationRuleImpl::CopyToFile, + ImplementationRuleImpl::Delete, + ImplementationRuleImpl::Insert, + ImplementationRuleImpl::Update, + // DLL + ImplementationRuleImpl::AddColumn, + ImplementationRuleImpl::CreateTable, + ImplementationRuleImpl::DropColumn, + ImplementationRuleImpl::DropTable, + ImplementationRuleImpl::Truncate, + ]) + .build() +} + pub(crate) struct State { scala_functions: ScalaFunctions, table_functions: TableFunctions, meta_cache: StatisticsMetaCache, table_cache: TableCache, view_cache: ViewCache, + optimizer_pipeline: HepOptimizerPipeline, _p: PhantomData, } @@ -182,6 +276,7 @@ impl State { #[allow(clippy::too_many_arguments)] pub(crate) fn build_plan>( + &self, stmt: &Statement, params: A, table_cache: &TableCache, @@ -211,98 +306,14 @@ impl State { /// Limit(1) /// Project(a,b) let source_plan = binder.bind(stmt)?; - let best_plan = Self::default_optimizer(source_plan) + let best_plan = self + .optimizer_pipeline + .instantiate(source_plan) .find_best(Some(&transaction.meta_loader(meta_cache)))?; Ok(best_plan) } - pub(crate) fn default_optimizer(source_plan: LogicalPlan) -> HepOptimizer { - HepOptimizer::new(source_plan) - .batch( - "Column Pruning".to_string(), - HepBatchStrategy::once_topdown(), - vec![NormalizationRuleImpl::ColumnPruning], - ) - .batch( - "Simplify Filter".to_string(), - HepBatchStrategy::fix_point_topdown(10), - vec![ - NormalizationRuleImpl::SimplifyFilter, - NormalizationRuleImpl::ConstantCalculation, - ], - ) - .batch( - "Predicate Pushdown".to_string(), - HepBatchStrategy::fix_point_topdown(10), - vec![ - NormalizationRuleImpl::PushPredicateThroughJoin, - NormalizationRuleImpl::PushPredicateIntoScan, - ], - ) - .batch( - "Limit Pushdown".to_string(), - HepBatchStrategy::fix_point_topdown(10), - vec![ - NormalizationRuleImpl::LimitProjectTranspose, - NormalizationRuleImpl::PushLimitThroughJoin, - NormalizationRuleImpl::PushLimitIntoTableScan, - ], - ) - .batch( - "Combine Operators".to_string(), - HepBatchStrategy::fix_point_topdown(10), - vec![ - NormalizationRuleImpl::CollapseProject, - NormalizationRuleImpl::CollapseGroupByAgg, - NormalizationRuleImpl::CombineFilter, - ], - ) - .batch( - "TopK".to_string(), - HepBatchStrategy::once_topdown(), - vec![NormalizationRuleImpl::TopK], - ) - .batch( - "Expression Remapper".to_string(), - HepBatchStrategy::once_topdown(), - vec![ - NormalizationRuleImpl::BindExpressionPosition, - // TIPS: This rule is necessary - NormalizationRuleImpl::EvaluatorBind, - ], - ) - .implementations(vec![ - // DQL - ImplementationRuleImpl::SimpleAggregate, - ImplementationRuleImpl::GroupByAggregate, - ImplementationRuleImpl::Dummy, - ImplementationRuleImpl::Filter, - ImplementationRuleImpl::HashJoin, - ImplementationRuleImpl::Limit, - ImplementationRuleImpl::Projection, - ImplementationRuleImpl::SeqScan, - ImplementationRuleImpl::IndexScan, - ImplementationRuleImpl::FunctionScan, - ImplementationRuleImpl::Sort, - ImplementationRuleImpl::TopK, - ImplementationRuleImpl::Values, - // DML - ImplementationRuleImpl::Analyze, - ImplementationRuleImpl::CopyFromFile, - ImplementationRuleImpl::CopyToFile, - ImplementationRuleImpl::Delete, - ImplementationRuleImpl::Insert, - ImplementationRuleImpl::Update, - // DLL - ImplementationRuleImpl::AddColumn, - ImplementationRuleImpl::CreateTable, - ImplementationRuleImpl::DropColumn, - ImplementationRuleImpl::DropTable, - ImplementationRuleImpl::Truncate, - ]) - } - fn prepare>(&self, sql: T) -> Result { let mut stmts = parse_sql(sql)?; stmts.pop().ok_or(DatabaseError::EmptyStatement) @@ -314,7 +325,7 @@ impl State { stmt: &Statement, params: A, ) -> Result<(SchemaRef, Executor<'a>), DatabaseError> { - let mut plan = Self::build_plan( + let mut plan = self.build_plan( stmt, params, self.table_cache(), @@ -629,9 +640,9 @@ pub(crate) mod test { assert_eq!( iter.next().unwrap()?.values[0].utf8().unwrap(), - "Projection [t1.a, t1.b] [Project] - Filter (t1.b > 0), Is Having: false [Filter] - TableScan t1 -> [a, b] [SeqScan]" + "Projection [t1.a, t1.b] [Project => (Sort Option: Follow)] + Filter (t1.b > 0), Is Having: false [Filter => (Sort Option: Follow)] + TableScan t1 -> [a, b] [SeqScan => (Sort Option: None)]" ) } // Aggregate @@ -651,10 +662,10 @@ pub(crate) mod test { )?; assert_eq!( iter.next().unwrap()?.values[0].utf8().unwrap(), - "Projection [(t1.a + 0), Max((t1.b + 0))] [Project] - Aggregate [Max((t1.b + 0))] -> Group By [(t1.a + 0)] [HashAggregate] - Filter (t1.b > 1), Is Having: false [Filter] - TableScan t1 -> [a, b] [SeqScan]" + "Projection [(t1.a + 0), Max((t1.b + 0))] [Project => (Sort Option: Follow)] + Aggregate [Max((t1.b + 0))] -> Group By [(t1.a + 0)] [HashAggregate => (Sort Option: None)] + Filter (t1.b > 1), Is Having: false [Filter => (Sort Option: Follow)] + TableScan t1 -> [a, b] [SeqScan => (Sort Option: None)]" ) } { @@ -671,14 +682,14 @@ pub(crate) mod test { )?; assert_eq!( iter.next().unwrap()?.values[0].utf8().unwrap(), - "Projection [t1.a, t1.b, 9] [Project] - LeftOuter Join Where (t1.a > 0) [NestLoopJoin] - Projection [t1.a, t1.b] [Project] - Filter (t1.b > 0), Is Having: false [Filter] - TableScan t1 -> [a, b] [SeqScan] - Projection [t1.a, t1.b] [Project] - Filter (t1.a > 1), Is Having: false [Filter] - TableScan t1 -> [a, b] [SeqScan]" + "Projection [t1.a, t1.b, 9] [Project => (Sort Option: Follow)] + LeftOuter Join Where (t1.a > 0) [NestLoopJoin => (Sort Option: None)] + Projection [t1.a, t1.b] [Project => (Sort Option: Follow)] + Filter (t1.b > 0), Is Having: false [Filter => (Sort Option: Follow)] + TableScan t1 -> [a, b] [SeqScan => (Sort Option: None)] + Projection [t1.a, t1.b] [Project => (Sort Option: Follow)] + Filter (t1.a > 1), Is Having: false [Filter => (Sort Option: Follow)] + TableScan t1 -> [a, b] [SeqScan => (Sort Option: None)]" ) } diff --git a/src/execution/dml/copy_to_file.rs b/src/execution/dml/copy_to_file.rs index 1ffaa000..87e55be3 100644 --- a/src/execution/dml/copy_to_file.rs +++ b/src/execution/dml/copy_to_file.rs @@ -195,7 +195,7 @@ mod tests { let executor = CopyToFile { op: op.clone(), - input: TableScanOperator::build("t1".to_string().into(), table, true), + input: TableScanOperator::build("t1".to_string().into(), table, true)?, }; let mut executor = executor.execute( ( diff --git a/src/execution/dql/aggregate/hash_agg.rs b/src/execution/dql/aggregate/hash_agg.rs index da24e7a9..a38c6d58 100644 --- a/src/execution/dql/aggregate/hash_agg.rs +++ b/src/execution/dql/aggregate/hash_agg.rs @@ -131,7 +131,7 @@ mod test { use crate::expression::agg::AggKind; use crate::expression::ScalarExpression; use crate::optimizer::heuristic::batch::HepBatchStrategy; - use crate::optimizer::heuristic::optimizer::HepOptimizer; + use crate::optimizer::heuristic::optimizer::HepOptimizerPipeline; use crate::optimizer::rule::normalization::NormalizationRuleImpl; use crate::planner::operator::aggregate::AggregateOperator; use crate::planner::operator::values::ValuesOperator; @@ -208,8 +208,8 @@ mod test { Childrens::Only(Box::new(input)), ); - let plan = HepOptimizer::new(plan) - .batch( + let pipeline = HepOptimizerPipeline::builder() + .before_batch( "Expression Remapper".to_string(), HepBatchStrategy::once_topdown(), vec![ @@ -218,6 +218,9 @@ mod test { NormalizationRuleImpl::EvaluatorBind, ], ) + .build(); + let plan = pipeline + .instantiate(plan) .find_best::(None)?; let Operator::Aggregate(op) = plan.operator else { diff --git a/src/execution/dql/join/hash/right_join.rs b/src/execution/dql/join/hash/right_join.rs index c0d5d914..325ba836 100644 --- a/src/execution/dql/join/hash/right_join.rs +++ b/src/execution/dql/join/hash/right_join.rs @@ -40,6 +40,7 @@ impl<'a> JoinProbeState<'a> for RightJoinState { } = probe_args { let mut has_filtered = false; + let mut produced = false; for (_, Tuple { values, pk }) in build_state.tuples.iter() { let full_values = Vec::from_iter(values.iter().chain(probe_tuple.values.iter()).cloned()); @@ -49,18 +50,21 @@ impl<'a> JoinProbeState<'a> for RightJoinState { Some(filter_args) => { if !throw!(co, filter(&full_values, filter_args)) { has_filtered = true; - co.yield_(Ok(FullJoinState::full_right_row( - left_schema_len, - &probe_tuple, - ))) - .await; continue; } } } + produced = true; co.yield_(Ok(Tuple::new(pk.clone(), full_values))).await; } - build_state.is_used = !has_filtered; + if !produced { + co.yield_(Ok(FullJoinState::full_right_row( + left_schema_len, + &probe_tuple, + ))) + .await; + } + build_state.is_used = produced; build_state.has_filted = has_filtered; return; } diff --git a/src/execution/dql/join/hash_join.rs b/src/execution/dql/join/hash_join.rs index 3366c947..3670ed05 100644 --- a/src/execution/dql/join/hash_join.rs +++ b/src/execution/dql/join/hash_join.rs @@ -273,9 +273,9 @@ mod test { use crate::execution::dql::join::hash_join::HashJoin; use crate::execution::dql::test::build_integers; use crate::execution::{try_collect, ReadExecutor}; - use crate::expression::ScalarExpression; + use crate::expression::{BinaryOperator, ScalarExpression}; use crate::optimizer::heuristic::batch::HepBatchStrategy; - use crate::optimizer::heuristic::optimizer::HepOptimizer; + use crate::optimizer::heuristic::optimizer::HepOptimizerPipeline; use crate::optimizer::rule::normalization::NormalizationRuleImpl; use crate::planner::operator::join::{JoinCondition, JoinOperator, JoinType}; use crate::planner::operator::values::ValuesOperator; @@ -292,6 +292,21 @@ mod test { use std::sync::Arc; use tempfile::TempDir; + fn optimize_exprs(plan: LogicalPlan) -> Result { + HepOptimizerPipeline::builder() + .before_batch( + "Expression Remapper".to_string(), + HepBatchStrategy::once_topdown(), + vec![ + NormalizationRuleImpl::BindExpressionPosition, + NormalizationRuleImpl::EvaluatorBind, + ], + ) + .build() + .instantiate(plan) + .find_best::(None) + } + fn build_join_values() -> ( Vec<(ScalarExpression, ScalarExpression)>, LogicalPlan, @@ -399,17 +414,7 @@ mod test { right: Box::new(right), }, ); - let plan = HepOptimizer::new(plan) - .batch( - "Expression Remapper".to_string(), - HepBatchStrategy::once_topdown(), - vec![ - NormalizationRuleImpl::BindExpressionPosition, - // TIPS: This rule is necessary - NormalizationRuleImpl::EvaluatorBind, - ], - ) - .find_best::(None)?; + let plan = optimize_exprs(plan)?; let Operator::Join(op) = plan.operator else { unreachable!() @@ -460,17 +465,7 @@ mod test { right: Box::new(right), }, ); - let plan = HepOptimizer::new(plan) - .batch( - "Expression Remapper".to_string(), - HepBatchStrategy::once_topdown(), - vec![ - NormalizationRuleImpl::BindExpressionPosition, - // TIPS: This rule is necessary - NormalizationRuleImpl::EvaluatorBind, - ], - ) - .find_best::(None)?; + let plan = optimize_exprs(plan)?; let Operator::Join(op) = plan.operator else { unreachable!() @@ -568,17 +563,7 @@ mod test { right: Box::new(right), }, ); - let plan = HepOptimizer::new(plan) - .batch( - "Expression Remapper".to_string(), - HepBatchStrategy::once_topdown(), - vec![ - NormalizationRuleImpl::BindExpressionPosition, - // TIPS: This rule is necessary - NormalizationRuleImpl::EvaluatorBind, - ], - ) - .find_best::(None)?; + let plan = optimize_exprs(plan)?; let Operator::Join(op) = plan.operator else { unreachable!() @@ -610,6 +595,97 @@ mod test { Ok(()) } + #[test] + fn test_right_join_filter_only_left_columns() -> Result<(), DatabaseError> { + let temp_dir = TempDir::new().expect("unable to create temporary working directory"); + let storage = RocksStorage::new(temp_dir.path())?; + let mut transaction = storage.transaction()?; + let meta_cache = Arc::new(SharedLruCache::new(4, 1, RandomState::new())?); + let view_cache = Arc::new(SharedLruCache::new(4, 1, RandomState::new())?); + let table_cache = Arc::new(SharedLruCache::new(4, 1, RandomState::new())?); + + let desc = ColumnDesc::new(LogicalType::Integer, None, false, None)?; + let left_columns = vec![ + ColumnRef::from(ColumnCatalog::new("k".to_string(), true, desc.clone())), + ColumnRef::from(ColumnCatalog::new("v".to_string(), true, desc.clone())), + ]; + let right_columns = vec![ColumnRef::from(ColumnCatalog::new( + "rk".to_string(), + true, + desc.clone(), + ))]; + + let on_keys = vec![( + ScalarExpression::column_expr(left_columns[0].clone()), + ScalarExpression::column_expr(right_columns[0].clone()), + )]; + let filter_expr = ScalarExpression::Binary { + op: BinaryOperator::Gt, + left_expr: Box::new(ScalarExpression::column_expr(left_columns[1].clone())), + right_expr: Box::new(ScalarExpression::Constant(DataValue::Int32(1))), + evaluator: None, + ty: LogicalType::Boolean, + }; + + let left = LogicalPlan { + operator: Operator::Values(ValuesOperator { + rows: vec![ + vec![DataValue::Int32(2), DataValue::Int32(0)], + vec![DataValue::Int32(2), DataValue::Int32(5)], + ], + schema_ref: Arc::new(left_columns), + }), + childrens: Box::new(Childrens::None), + physical_option: None, + _output_schema_ref: None, + }; + let right = LogicalPlan { + operator: Operator::Values(ValuesOperator { + rows: vec![vec![DataValue::Int32(2)]], + schema_ref: Arc::new(right_columns), + }), + childrens: Box::new(Childrens::None), + physical_option: None, + _output_schema_ref: None, + }; + + let plan = LogicalPlan::new( + Operator::Join(JoinOperator { + on: JoinCondition::On { + on: on_keys, + filter: Some(filter_expr), + }, + join_type: JoinType::RightOuter, + }), + Childrens::Twins { + left: Box::new(left), + right: Box::new(right), + }, + ); + + let plan = optimize_exprs(plan)?; + + let Operator::Join(op) = plan.operator else { + unreachable!() + }; + let (left, right) = plan.childrens.pop_twins(); + let executor = HashJoin::from((op, left, right)) + .execute((&table_cache, &view_cache, &meta_cache), &mut transaction); + let tuples = try_collect(executor)?; + + assert_eq!(tuples.len(), 1); + assert_eq!( + tuples[0].values, + vec![ + DataValue::Int32(2), + DataValue::Int32(5), + DataValue::Int32(2) + ] + ); + + Ok(()) + } + #[test] fn test_full_join() -> Result<(), DatabaseError> { let temp_dir = TempDir::new().expect("unable to create temporary working directory"); @@ -633,17 +709,7 @@ mod test { right: Box::new(right), }, ); - let plan = HepOptimizer::new(plan) - .batch( - "Expression Remapper".to_string(), - HepBatchStrategy::once_topdown(), - vec![ - NormalizationRuleImpl::BindExpressionPosition, - // TIPS: This rule is necessary - NormalizationRuleImpl::EvaluatorBind, - ], - ) - .find_best::(None)?; + let plan = optimize_exprs(plan)?; let Operator::Join(op) = plan.operator else { unreachable!() diff --git a/src/execution/dql/join/nested_loop_join.rs b/src/execution/dql/join/nested_loop_join.rs index 587e445c..9eb45425 100644 --- a/src/execution/dql/join/nested_loop_join.rs +++ b/src/execution/dql/join/nested_loop_join.rs @@ -388,7 +388,7 @@ mod test { use crate::execution::{try_collect, ReadExecutor}; use crate::expression::ScalarExpression; use crate::optimizer::heuristic::batch::HepBatchStrategy; - use crate::optimizer::heuristic::optimizer::HepOptimizer; + use crate::optimizer::heuristic::optimizer::HepOptimizerPipeline; use crate::optimizer::rule::normalization::NormalizationRuleImpl; use crate::planner::operator::values::ValuesOperator; use crate::planner::operator::Operator; @@ -405,6 +405,21 @@ mod test { use std::sync::Arc; use tempfile::TempDir; + fn optimize_exprs(plan: LogicalPlan) -> Result { + HepOptimizerPipeline::builder() + .before_batch( + "Expression Remapper".to_string(), + HepBatchStrategy::once_topdown(), + vec![ + NormalizationRuleImpl::BindExpressionPosition, + NormalizationRuleImpl::EvaluatorBind, + ], + ) + .build() + .instantiate(plan) + .find_best::(None) + } + fn tuple_to_strings(tuple: &Tuple) -> Vec> { tuple .values @@ -568,17 +583,7 @@ mod test { right: Box::new(right), }, ); - let plan = HepOptimizer::new(plan) - .batch( - "Expression Remapper".to_string(), - HepBatchStrategy::once_topdown(), - vec![ - NormalizationRuleImpl::BindExpressionPosition, - // TIPS: This rule is necessary - NormalizationRuleImpl::EvaluatorBind, - ], - ) - .find_best::(None)?; + let plan = optimize_exprs(plan)?; let Operator::Join(op) = plan.operator else { unreachable!() }; @@ -618,17 +623,7 @@ mod test { right: Box::new(right), }, ); - let plan = HepOptimizer::new(plan) - .batch( - "Expression Remapper".to_string(), - HepBatchStrategy::once_topdown(), - vec![ - NormalizationRuleImpl::BindExpressionPosition, - // TIPS: This rule is necessary - NormalizationRuleImpl::EvaluatorBind, - ], - ) - .find_best::(None)?; + let plan = optimize_exprs(plan)?; let Operator::Join(op) = plan.operator else { unreachable!() }; @@ -680,17 +675,7 @@ mod test { right: Box::new(right), }, ); - let plan = HepOptimizer::new(plan) - .batch( - "Expression Remapper".to_string(), - HepBatchStrategy::once_topdown(), - vec![ - NormalizationRuleImpl::BindExpressionPosition, - // TIPS: This rule is necessary - NormalizationRuleImpl::EvaluatorBind, - ], - ) - .find_best::(None)?; + let plan = optimize_exprs(plan)?; let Operator::Join(op) = plan.operator else { unreachable!() }; @@ -731,17 +716,7 @@ mod test { right: Box::new(right), }, ); - let plan = HepOptimizer::new(plan) - .batch( - "Expression Remapper".to_string(), - HepBatchStrategy::once_topdown(), - vec![ - NormalizationRuleImpl::BindExpressionPosition, - // TIPS: This rule is necessary - NormalizationRuleImpl::EvaluatorBind, - ], - ) - .find_best::(None)?; + let plan = optimize_exprs(plan)?; let Operator::Join(op) = plan.operator else { unreachable!() }; @@ -785,17 +760,7 @@ mod test { right: Box::new(right), }, ); - let plan = HepOptimizer::new(plan) - .batch( - "Expression Remapper".to_string(), - HepBatchStrategy::once_topdown(), - vec![ - NormalizationRuleImpl::BindExpressionPosition, - // TIPS: This rule is necessary - NormalizationRuleImpl::EvaluatorBind, - ], - ) - .find_best::(None)?; + let plan = optimize_exprs(plan)?; let Operator::Join(op) = plan.operator else { unreachable!() }; @@ -831,17 +796,7 @@ mod test { right: Box::new(right), }, ); - let plan = HepOptimizer::new(plan) - .batch( - "Expression Remapper".to_string(), - HepBatchStrategy::once_topdown(), - vec![ - NormalizationRuleImpl::BindExpressionPosition, - // TIPS: This rule is necessary - NormalizationRuleImpl::EvaluatorBind, - ], - ) - .find_best::(None)?; + let plan = optimize_exprs(plan)?; let Operator::Join(op) = plan.operator else { unreachable!() }; @@ -880,17 +835,7 @@ mod test { right: Box::new(right), }, ); - let plan = HepOptimizer::new(plan) - .batch( - "Expression Remapper".to_string(), - HepBatchStrategy::once_topdown(), - vec![ - NormalizationRuleImpl::BindExpressionPosition, - // TIPS: This rule is necessary - NormalizationRuleImpl::EvaluatorBind, - ], - ) - .find_best::(None)?; + let plan = optimize_exprs(plan)?; let Operator::Join(op) = plan.operator else { unreachable!() }; @@ -931,17 +876,7 @@ mod test { right: Box::new(right), }, ); - let plan = HepOptimizer::new(plan) - .batch( - "Expression Remapper".to_string(), - HepBatchStrategy::once_topdown(), - vec![ - NormalizationRuleImpl::BindExpressionPosition, - // TIPS: This rule is necessary - NormalizationRuleImpl::EvaluatorBind, - ], - ) - .find_best::(None)?; + let plan = optimize_exprs(plan)?; let Operator::Join(op) = plan.operator else { unreachable!() }; @@ -987,17 +922,7 @@ mod test { right: Box::new(right), }, ); - let plan = HepOptimizer::new(plan) - .batch( - "Expression Remapper".to_string(), - HepBatchStrategy::once_topdown(), - vec![ - NormalizationRuleImpl::BindExpressionPosition, - // TIPS: This rule is necessary - NormalizationRuleImpl::EvaluatorBind, - ], - ) - .find_best::(None)?; + let plan = optimize_exprs(plan)?; let Operator::Join(op) = plan.operator else { unreachable!() }; diff --git a/src/execution/mod.rs b/src/execution/mod.rs index e1a9071a..76027fed 100644 --- a/src/execution/mod.rs +++ b/src/execution/mod.rs @@ -54,7 +54,7 @@ use crate::execution::dql::top_k::TopK; use crate::execution::dql::union::Union; use crate::execution::dql::values::Values; use crate::planner::operator::join::JoinCondition; -use crate::planner::operator::{Operator, PhysicalOption}; +use crate::planner::operator::{Operator, PhysicalOption, PlanImpl}; use crate::planner::LogicalPlan; use crate::storage::{StatisticsMetaCache, TableCache, Transaction, ViewCache}; use crate::types::index::IndexInfo; @@ -120,7 +120,14 @@ pub fn build_read<'a, T: Transaction + 'a>( match &op.on { JoinCondition::On { on, .. } - if !on.is_empty() && plan.physical_option == Some(PhysicalOption::HashJoin) => + if !on.is_empty() + && matches!( + plan.physical_option, + Some(PhysicalOption { + plan: PlanImpl::HashJoin, + .. + }) + ) => { HashJoin::from((op, left_input, right_input)).execute(cache, transaction) } @@ -135,12 +142,18 @@ pub fn build_read<'a, T: Transaction + 'a>( Projection::from((op, input)).execute(cache, transaction) } Operator::TableScan(op) => { - if let Some(PhysicalOption::IndexScan(IndexInfo { - meta, - range: Some(range), - covered_deserializers, - cover_mapping, - })) = plan.physical_option + if let Some(PhysicalOption { + plan: + PlanImpl::IndexScan(IndexInfo { + meta, + range: Some(range), + covered_deserializers, + cover_mapping, + sort_option: _, + sort_elimination_hint: _, + }), + .. + }) = plan.physical_option { IndexScan::from((op, meta, range, covered_deserializers, cover_mapping)) .execute(cache, transaction) diff --git a/src/expression/mod.rs b/src/expression/mod.rs index 87712486..4c4942d8 100644 --- a/src/expression/mod.rs +++ b/src/expression/mod.rs @@ -407,6 +407,17 @@ impl ScalarExpression { self.0.push(col.clone()); Ok(()) } + + fn visit_alias( + &mut self, + expr: &ScalarExpression, + ty: &AliasType, + ) -> Result<(), DatabaseError> { + if let AliasType::Expr(alias_expr) = ty { + self.0.push(alias_expr.output_column()); + } + self.visit(expr) + } } struct OutputColumnCollector(Vec); impl Visitor<'_> for OutputColumnCollector { diff --git a/src/expression/range_detacher.rs b/src/expression/range_detacher.rs index edb99842..60066e6f 100644 --- a/src/expression/range_detacher.rs +++ b/src/expression/range_detacher.rs @@ -807,7 +807,7 @@ mod test { use crate::errors::DatabaseError; use crate::expression::range_detacher::{Range, RangeDetacher}; use crate::optimizer::heuristic::batch::HepBatchStrategy; - use crate::optimizer::heuristic::optimizer::HepOptimizer; + use crate::optimizer::heuristic::optimizer::HepOptimizerPipeline; use crate::optimizer::rule::normalization::NormalizationRuleImpl; use crate::planner::operator::filter::FilterOperator; use crate::planner::operator::Operator; @@ -819,12 +819,15 @@ mod test { use std::ops::Bound; fn plan_filter(plan: LogicalPlan) -> Result, DatabaseError> { - let best_plan = HepOptimizer::new(plan.clone()) - .batch( + let pipeline = HepOptimizerPipeline::builder() + .before_batch( "test_simplify_filter".to_string(), HepBatchStrategy::once_topdown(), vec![NormalizationRuleImpl::SimplifyFilter], ) + .build(); + let best_plan = pipeline + .instantiate(plan) .find_best::(None)?; if let Operator::Filter(filter_op) = best_plan.childrens.pop_only().operator { Ok(Some(filter_op)) diff --git a/src/optimizer/core/memo.rs b/src/optimizer/core/memo.rs index d6277182..beb2d9c7 100644 --- a/src/optimizer/core/memo.rs +++ b/src/optimizer/core/memo.rs @@ -145,12 +145,14 @@ mod tests { use crate::db::{DataBaseBuilder, ResultIter}; use crate::errors::DatabaseError; use crate::expression::range_detacher::Range; + use crate::expression::ScalarExpression; use crate::optimizer::core::memo::Memo; use crate::optimizer::heuristic::batch::HepBatchStrategy; - use crate::optimizer::heuristic::optimizer::HepOptimizer; + use crate::optimizer::heuristic::optimizer::HepOptimizerPipeline; use crate::optimizer::rule::implementation::ImplementationRuleImpl; use crate::optimizer::rule::normalization::NormalizationRuleImpl; - use crate::planner::operator::PhysicalOption; + use crate::planner::operator::sort::SortField; + use crate::planner::operator::{PhysicalOption, PlanImpl, SortOption}; use crate::storage::rocksdb::RocksTransaction; use crate::storage::{Storage, Transaction}; use crate::types::index::{IndexInfo, IndexMeta, IndexType}; @@ -181,13 +183,16 @@ mod tests { database.run("analyze table t1")?.done()?; let transaction = database.storage.transaction()?; - let c1_column_id = { - transaction - .table(database.state.table_cache(), "t1".to_string().into())? - .unwrap() - .get_column_id_by_name("c1") - .unwrap() - }; + let c1_column = transaction + .table(database.state.table_cache(), "t1".to_string().into())? + .unwrap() + .get_column_by_name("c1") + .unwrap(); + let sort_fields = vec![SortField::new( + ScalarExpression::column_expr(c1_column.clone()), + true, + true, + )]; let scala_functions = Default::default(); let table_functions = Default::default(); let mut binder = Binder::new( @@ -207,20 +212,24 @@ mod tests { "select c1, c3 from t1 inner join t2 on c1 = c3 where (c1 > 40 or c1 = 2) and c3 > 22", )?; let plan = binder.bind(&stmt[0])?; - let mut best_plan = HepOptimizer::new(plan) - .batch( + let pipeline = HepOptimizerPipeline::builder() + .before_batch( "Simplify Filter".to_string(), HepBatchStrategy::once_topdown(), vec![NormalizationRuleImpl::SimplifyFilter], ) - .batch( + .before_batch( "Predicate Pushdown".to_string(), HepBatchStrategy::fix_point_topdown(10), vec![ NormalizationRuleImpl::PushPredicateThroughJoin, + NormalizationRuleImpl::PushJoinPredicateIntoScan, NormalizationRuleImpl::PushPredicateIntoScan, ], ) + .build(); + let mut best_plan = pipeline + .instantiate(plan) .find_best::(None)?; let rules = vec![ ImplementationRuleImpl::Projection, @@ -243,9 +252,18 @@ mod tests { assert_eq!(exprs.exprs.len(), 2); assert_eq!(exprs.exprs[0].cost, Some(1000)); - assert_eq!(exprs.exprs[0].op, PhysicalOption::SeqScan); + assert_eq!( + exprs.exprs[0].op, + PhysicalOption::new(PlanImpl::SeqScan, SortOption::None) + ); assert!(exprs.exprs[1].cost.unwrap() >= 960); - assert!(matches!(exprs.exprs[1].op, PhysicalOption::IndexScan(_))); + assert!(matches!( + exprs.exprs[1].op, + PhysicalOption { + plan: PlanImpl::IndexScan(..), + .. + } + )); assert_eq!( best_plan .childrens @@ -256,26 +274,37 @@ mod tests { .childrens .pop_only() .physical_option, - Some(PhysicalOption::IndexScan(IndexInfo { - meta: Arc::new(IndexMeta { - id: 0, - column_ids: vec![*c1_column_id], - table_name: "t1".to_string().into(), - pk_ty: LogicalType::Integer, - value_ty: LogicalType::Integer, - name: "pk_index".to_string(), - ty: IndexType::PrimaryKey { is_multiple: false }, + Some(PhysicalOption::new( + PlanImpl::IndexScan(IndexInfo { + meta: Arc::new(IndexMeta { + id: 0, + column_ids: vec![c1_column.id().unwrap()], + table_name: "t1".to_string().into(), + pk_ty: LogicalType::Integer, + value_ty: LogicalType::Integer, + name: "pk_index".to_string(), + ty: IndexType::PrimaryKey { is_multiple: false }, + }), + sort_option: SortOption::OrderBy { + fields: sort_fields.clone(), + ignore_prefix_len: 0, + }, + range: Some(Range::SortedRanges(vec![ + Range::Eq(DataValue::Int32(2)), + Range::Scope { + min: Bound::Excluded(DataValue::Int32(40)), + max: Bound::Unbounded, + } + ])), + covered_deserializers: None, + cover_mapping: None, + sort_elimination_hint: None, }), - range: Some(Range::SortedRanges(vec![ - Range::Eq(DataValue::Int32(2)), - Range::Scope { - min: Bound::Excluded(DataValue::Int32(40)), - max: Bound::Unbounded, - } - ])), - covered_deserializers: None, - cover_mapping: None, - })) + SortOption::OrderBy { + fields: sort_fields, + ignore_prefix_len: 0, + } + )) ); Ok(()) diff --git a/src/optimizer/heuristic/optimizer.rs b/src/optimizer/heuristic/optimizer.rs index 037624aa..17f1f5a5 100644 --- a/src/optimizer/heuristic/optimizer.rs +++ b/src/optimizer/heuristic/optimizer.rs @@ -20,70 +20,70 @@ use crate::optimizer::core::statistics_meta::StatisticMetaLoader; use crate::optimizer::heuristic::batch::{HepBatch, HepBatchStrategy}; use crate::optimizer::heuristic::matcher::PlanMatcher; use crate::optimizer::rule::implementation::ImplementationRuleImpl; +use crate::optimizer::rule::normalization::annotate_sort_preserving_indexes; use crate::optimizer::rule::normalization::NormalizationRuleImpl; use crate::planner::{Childrens, LogicalPlan}; use crate::storage::Transaction; use std::ops::Not; -pub struct HepOptimizer { - batches: Vec, +pub struct HepOptimizer<'a> { + before_batches: &'a [HepBatch], + after_batches: &'a [HepBatch], + implementations: &'a [ImplementationRuleImpl], plan: LogicalPlan, - implementations: Vec, } -impl HepOptimizer { - pub fn new(root: LogicalPlan) -> Self { +impl<'a> HepOptimizer<'a> { + pub fn new( + plan: LogicalPlan, + before_batches: &'a [HepBatch], + after_batches: &'a [HepBatch], + implementations: &'a [ImplementationRuleImpl], + ) -> Self { Self { - batches: vec![], - plan: root, - implementations: vec![], + before_batches, + after_batches, + implementations, + plan, } } - pub fn batch( - mut self, - name: String, - strategy: HepBatchStrategy, - rules: Vec, - ) -> Self { - self.batches.push(HepBatch::new(name, strategy, rules)); - self - } - - pub fn implementations(mut self, implementations: Vec) -> Self { - self.implementations = implementations; - self - } - pub fn find_best( mut self, loader: Option<&StatisticMetaLoader<'_, T>>, ) -> Result { - for batch in &self.batches { + Self::apply_batches(&mut self.plan, self.before_batches)?; + annotate_sort_preserving_indexes(&mut self.plan); + + if let Some(loader) = loader { + if self.implementations.is_empty().not() { + let memo = Memo::new(&self.plan, loader, self.implementations)?; + Memo::annotate_plan(&memo, &mut self.plan); + } + } + Self::apply_batches(&mut self.plan, self.after_batches)?; + + Ok(self.plan) + } + + #[inline] + fn apply_batches(plan: &mut LogicalPlan, batches: &[HepBatch]) -> Result<(), DatabaseError> { + for batch in batches { match batch.strategy { HepBatchStrategy::MaxTimes(max_iteration) => { for _ in 0..max_iteration { - if !Self::apply_batch(&mut self.plan, batch)? { + if !Self::apply_batch(plan, batch)? { break; } } } - HepBatchStrategy::LoopIfApplied => { - while Self::apply_batch(&mut self.plan, batch)? {} - } + HepBatchStrategy::LoopIfApplied => while Self::apply_batch(plan, batch)? {}, } } - - if let Some(loader) = loader { - if self.implementations.is_empty().not() { - let memo = Memo::new(&self.plan, loader, &self.implementations)?; - Memo::annotate_plan(&memo, &mut self.plan); - } - } - - Ok(self.plan) + Ok(()) } + #[inline] fn apply_batch(plan: &mut LogicalPlan, batch: &HepBatch) -> Result { let mut applied = false; for rule in &batch.rules { @@ -126,3 +126,84 @@ impl HepOptimizer { } } } + +#[derive(Clone, Default)] +pub struct HepOptimizerPipeline { + before_batches: Vec, + after_batches: Vec, + implementations: Vec, +} + +impl HepOptimizerPipeline { + pub fn builder() -> HepOptimizerPipelineBuilder { + HepOptimizerPipelineBuilder { + before_batches: vec![], + after_batches: vec![], + implementations: vec![], + } + } + + pub fn new( + before_batches: Vec, + after_batches: Vec, + implementations: Vec, + ) -> Self { + Self { + before_batches, + after_batches, + implementations, + } + } + + pub fn instantiate(&self, plan: LogicalPlan) -> HepOptimizer<'_> { + HepOptimizer::new( + plan, + &self.before_batches, + &self.after_batches, + &self.implementations, + ) + } +} + +pub struct HepOptimizerPipelineBuilder { + before_batches: Vec, + after_batches: Vec, + implementations: Vec, +} + +impl HepOptimizerPipelineBuilder { + pub fn before_batch( + mut self, + name: String, + strategy: HepBatchStrategy, + rules: Vec, + ) -> Self { + self.before_batches + .push(HepBatch::new(name, strategy, rules)); + self + } + + pub fn after_batch( + mut self, + name: String, + strategy: HepBatchStrategy, + rules: Vec, + ) -> Self { + self.after_batches + .push(HepBatch::new(name, strategy, rules)); + self + } + + pub fn implementations(mut self, implementations: Vec) -> Self { + self.implementations = implementations; + self + } + + pub fn build(self) -> HepOptimizerPipeline { + HepOptimizerPipeline::new( + self.before_batches, + self.after_batches, + self.implementations, + ) + } +} diff --git a/src/optimizer/rule/implementation/ddl/add_column.rs b/src/optimizer/rule/implementation/ddl/add_column.rs index 7416cb03..51614e77 100644 --- a/src/optimizer/rule/implementation/ddl/add_column.rs +++ b/src/optimizer/rule/implementation/ddl/add_column.rs @@ -17,7 +17,7 @@ use crate::optimizer::core::memo::{Expression, GroupExpression}; use crate::optimizer::core::pattern::{Pattern, PatternChildrenPredicate}; use crate::optimizer::core::rule::{ImplementationRule, MatchPattern}; use crate::optimizer::core::statistics_meta::StatisticMetaLoader; -use crate::planner::operator::{Operator, PhysicalOption}; +use crate::planner::operator::{Operator, PhysicalOption, PlanImpl, SortOption}; use crate::single_mapping; use crate::storage::Transaction; use std::sync::LazyLock; @@ -33,5 +33,5 @@ pub struct AddColumnImplementation; single_mapping!( AddColumnImplementation, ADD_COLUMN_PATTERN, - PhysicalOption::AddColumn + PhysicalOption::new(PlanImpl::AddColumn, SortOption::None) ); diff --git a/src/optimizer/rule/implementation/ddl/create_table.rs b/src/optimizer/rule/implementation/ddl/create_table.rs index db626e4e..4b255076 100644 --- a/src/optimizer/rule/implementation/ddl/create_table.rs +++ b/src/optimizer/rule/implementation/ddl/create_table.rs @@ -17,7 +17,7 @@ use crate::optimizer::core::memo::{Expression, GroupExpression}; use crate::optimizer::core::pattern::{Pattern, PatternChildrenPredicate}; use crate::optimizer::core::rule::{ImplementationRule, MatchPattern}; use crate::optimizer::core::statistics_meta::StatisticMetaLoader; -use crate::planner::operator::{Operator, PhysicalOption}; +use crate::planner::operator::{Operator, PhysicalOption, PlanImpl, SortOption}; use crate::single_mapping; use crate::storage::Transaction; use std::sync::LazyLock; @@ -33,5 +33,5 @@ pub struct CreateTableImplementation; single_mapping!( CreateTableImplementation, CREATE_TABLE_PATTERN, - PhysicalOption::CreateTable + PhysicalOption::new(PlanImpl::CreateTable, SortOption::None) ); diff --git a/src/optimizer/rule/implementation/ddl/drop_column.rs b/src/optimizer/rule/implementation/ddl/drop_column.rs index b5fc4b31..7440a10a 100644 --- a/src/optimizer/rule/implementation/ddl/drop_column.rs +++ b/src/optimizer/rule/implementation/ddl/drop_column.rs @@ -17,7 +17,7 @@ use crate::optimizer::core::memo::{Expression, GroupExpression}; use crate::optimizer::core::pattern::{Pattern, PatternChildrenPredicate}; use crate::optimizer::core::rule::{ImplementationRule, MatchPattern}; use crate::optimizer::core::statistics_meta::StatisticMetaLoader; -use crate::planner::operator::{Operator, PhysicalOption}; +use crate::planner::operator::{Operator, PhysicalOption, PlanImpl, SortOption}; use crate::single_mapping; use crate::storage::Transaction; use std::sync::LazyLock; @@ -33,5 +33,5 @@ pub struct DropColumnImplementation; single_mapping!( DropColumnImplementation, DROP_COLUMN_PATTERN, - PhysicalOption::DropColumn + PhysicalOption::new(PlanImpl::DropColumn, SortOption::None) ); diff --git a/src/optimizer/rule/implementation/ddl/drop_table.rs b/src/optimizer/rule/implementation/ddl/drop_table.rs index 8e1de948..ec995230 100644 --- a/src/optimizer/rule/implementation/ddl/drop_table.rs +++ b/src/optimizer/rule/implementation/ddl/drop_table.rs @@ -17,7 +17,7 @@ use crate::optimizer::core::memo::{Expression, GroupExpression}; use crate::optimizer::core::pattern::{Pattern, PatternChildrenPredicate}; use crate::optimizer::core::rule::{ImplementationRule, MatchPattern}; use crate::optimizer::core::statistics_meta::StatisticMetaLoader; -use crate::planner::operator::{Operator, PhysicalOption}; +use crate::planner::operator::{Operator, PhysicalOption, PlanImpl, SortOption}; use crate::single_mapping; use crate::storage::Transaction; use std::sync::LazyLock; @@ -33,5 +33,5 @@ pub struct DropTableImplementation; single_mapping!( DropTableImplementation, DROP_TABLE_PATTERN, - PhysicalOption::DropTable + PhysicalOption::new(PlanImpl::DropTable, SortOption::None) ); diff --git a/src/optimizer/rule/implementation/ddl/truncate.rs b/src/optimizer/rule/implementation/ddl/truncate.rs index cc82117c..281a1996 100644 --- a/src/optimizer/rule/implementation/ddl/truncate.rs +++ b/src/optimizer/rule/implementation/ddl/truncate.rs @@ -17,7 +17,7 @@ use crate::optimizer::core::memo::{Expression, GroupExpression}; use crate::optimizer::core::pattern::{Pattern, PatternChildrenPredicate}; use crate::optimizer::core::rule::{ImplementationRule, MatchPattern}; use crate::optimizer::core::statistics_meta::StatisticMetaLoader; -use crate::planner::operator::{Operator, PhysicalOption}; +use crate::planner::operator::{Operator, PhysicalOption, PlanImpl, SortOption}; use crate::single_mapping; use crate::storage::Transaction; use std::sync::LazyLock; @@ -33,5 +33,5 @@ pub struct TruncateImplementation; single_mapping!( TruncateImplementation, TRUNCATE_PATTERN, - PhysicalOption::Truncate + PhysicalOption::new(PlanImpl::Truncate, SortOption::None) ); diff --git a/src/optimizer/rule/implementation/dml/analyze.rs b/src/optimizer/rule/implementation/dml/analyze.rs index 50d5063b..b2e0881d 100644 --- a/src/optimizer/rule/implementation/dml/analyze.rs +++ b/src/optimizer/rule/implementation/dml/analyze.rs @@ -17,7 +17,7 @@ use crate::optimizer::core::memo::{Expression, GroupExpression}; use crate::optimizer::core::pattern::{Pattern, PatternChildrenPredicate}; use crate::optimizer::core::rule::{ImplementationRule, MatchPattern}; use crate::optimizer::core::statistics_meta::StatisticMetaLoader; -use crate::planner::operator::{Operator, PhysicalOption}; +use crate::planner::operator::{Operator, PhysicalOption, PlanImpl, SortOption}; use crate::single_mapping; use crate::storage::Transaction; use std::sync::LazyLock; @@ -33,5 +33,5 @@ pub struct AnalyzeImplementation; single_mapping!( AnalyzeImplementation, ANALYZE_PATTERN, - PhysicalOption::Analyze + PhysicalOption::new(PlanImpl::Analyze, SortOption::None) ); diff --git a/src/optimizer/rule/implementation/dml/copy_from_file.rs b/src/optimizer/rule/implementation/dml/copy_from_file.rs index 96c7f9a3..3fd5c500 100644 --- a/src/optimizer/rule/implementation/dml/copy_from_file.rs +++ b/src/optimizer/rule/implementation/dml/copy_from_file.rs @@ -17,7 +17,7 @@ use crate::optimizer::core::memo::{Expression, GroupExpression}; use crate::optimizer::core::pattern::{Pattern, PatternChildrenPredicate}; use crate::optimizer::core::rule::{ImplementationRule, MatchPattern}; use crate::optimizer::core::statistics_meta::StatisticMetaLoader; -use crate::planner::operator::{Operator, PhysicalOption}; +use crate::planner::operator::{Operator, PhysicalOption, PlanImpl, SortOption}; use crate::single_mapping; use crate::storage::Transaction; use std::sync::LazyLock; @@ -33,5 +33,5 @@ pub struct CopyFromFileImplementation; single_mapping!( CopyFromFileImplementation, COPY_FROM_FILE_PATTERN, - PhysicalOption::CopyFromFile + PhysicalOption::new(PlanImpl::CopyFromFile, SortOption::None) ); diff --git a/src/optimizer/rule/implementation/dml/copy_to_file.rs b/src/optimizer/rule/implementation/dml/copy_to_file.rs index d743f4b4..10ac05c4 100644 --- a/src/optimizer/rule/implementation/dml/copy_to_file.rs +++ b/src/optimizer/rule/implementation/dml/copy_to_file.rs @@ -17,7 +17,7 @@ use crate::optimizer::core::memo::{Expression, GroupExpression}; use crate::optimizer::core::pattern::{Pattern, PatternChildrenPredicate}; use crate::optimizer::core::rule::{ImplementationRule, MatchPattern}; use crate::optimizer::core::statistics_meta::StatisticMetaLoader; -use crate::planner::operator::{Operator, PhysicalOption}; +use crate::planner::operator::{Operator, PhysicalOption, PlanImpl, SortOption}; use crate::single_mapping; use crate::storage::Transaction; use std::sync::LazyLock; @@ -33,5 +33,5 @@ pub struct CopyToFileImplementation; single_mapping!( CopyToFileImplementation, COPY_TO_FILE_PATTERN, - PhysicalOption::CopyToFile + PhysicalOption::new(PlanImpl::CopyToFile, SortOption::None) ); diff --git a/src/optimizer/rule/implementation/dml/delete.rs b/src/optimizer/rule/implementation/dml/delete.rs index c3db1b41..0e1a4e17 100644 --- a/src/optimizer/rule/implementation/dml/delete.rs +++ b/src/optimizer/rule/implementation/dml/delete.rs @@ -17,7 +17,7 @@ use crate::optimizer::core::memo::{Expression, GroupExpression}; use crate::optimizer::core::pattern::{Pattern, PatternChildrenPredicate}; use crate::optimizer::core::rule::{ImplementationRule, MatchPattern}; use crate::optimizer::core::statistics_meta::StatisticMetaLoader; -use crate::planner::operator::{Operator, PhysicalOption}; +use crate::planner::operator::{Operator, PhysicalOption, PlanImpl, SortOption}; use crate::single_mapping; use crate::storage::Transaction; use std::sync::LazyLock; @@ -30,4 +30,8 @@ static DELETE_PATTERN: LazyLock = LazyLock::new(|| Pattern { #[derive(Clone)] pub struct DeleteImplementation; -single_mapping!(DeleteImplementation, DELETE_PATTERN, PhysicalOption::Delete); +single_mapping!( + DeleteImplementation, + DELETE_PATTERN, + PhysicalOption::new(PlanImpl::Delete, SortOption::None) +); diff --git a/src/optimizer/rule/implementation/dml/insert.rs b/src/optimizer/rule/implementation/dml/insert.rs index 68a247ff..27994273 100644 --- a/src/optimizer/rule/implementation/dml/insert.rs +++ b/src/optimizer/rule/implementation/dml/insert.rs @@ -17,7 +17,7 @@ use crate::optimizer::core::memo::{Expression, GroupExpression}; use crate::optimizer::core::pattern::{Pattern, PatternChildrenPredicate}; use crate::optimizer::core::rule::{ImplementationRule, MatchPattern}; use crate::optimizer::core::statistics_meta::StatisticMetaLoader; -use crate::planner::operator::{Operator, PhysicalOption}; +use crate::planner::operator::{Operator, PhysicalOption, PlanImpl, SortOption}; use crate::single_mapping; use crate::storage::Transaction; use std::sync::LazyLock; @@ -30,4 +30,8 @@ static INSERT_PATTERN: LazyLock = LazyLock::new(|| Pattern { #[derive(Clone)] pub struct InsertImplementation; -single_mapping!(InsertImplementation, INSERT_PATTERN, PhysicalOption::Insert); +single_mapping!( + InsertImplementation, + INSERT_PATTERN, + PhysicalOption::new(PlanImpl::Insert, SortOption::None) +); diff --git a/src/optimizer/rule/implementation/dml/update.rs b/src/optimizer/rule/implementation/dml/update.rs index 555000e7..128f3a1b 100644 --- a/src/optimizer/rule/implementation/dml/update.rs +++ b/src/optimizer/rule/implementation/dml/update.rs @@ -17,7 +17,7 @@ use crate::optimizer::core::memo::{Expression, GroupExpression}; use crate::optimizer::core::pattern::{Pattern, PatternChildrenPredicate}; use crate::optimizer::core::rule::{ImplementationRule, MatchPattern}; use crate::optimizer::core::statistics_meta::StatisticMetaLoader; -use crate::planner::operator::{Operator, PhysicalOption}; +use crate::planner::operator::{Operator, PhysicalOption, PlanImpl, SortOption}; use crate::single_mapping; use crate::storage::Transaction; use std::sync::LazyLock; @@ -30,4 +30,8 @@ static UPDATE_PATTERN: LazyLock = LazyLock::new(|| Pattern { #[derive(Clone)] pub struct UpdateImplementation; -single_mapping!(UpdateImplementation, UPDATE_PATTERN, PhysicalOption::Update); +single_mapping!( + UpdateImplementation, + UPDATE_PATTERN, + PhysicalOption::new(PlanImpl::Update, SortOption::None) +); diff --git a/src/optimizer/rule/implementation/dql/aggregate.rs b/src/optimizer/rule/implementation/dql/aggregate.rs index 5eedab5c..bdcd75c8 100644 --- a/src/optimizer/rule/implementation/dql/aggregate.rs +++ b/src/optimizer/rule/implementation/dql/aggregate.rs @@ -17,7 +17,7 @@ use crate::optimizer::core::memo::{Expression, GroupExpression}; use crate::optimizer::core::pattern::{Pattern, PatternChildrenPredicate}; use crate::optimizer::core::rule::{ImplementationRule, MatchPattern}; use crate::optimizer::core::statistics_meta::StatisticMetaLoader; -use crate::planner::operator::{Operator, PhysicalOption}; +use crate::planner::operator::{Operator, PhysicalOption, PlanImpl, SortOption}; use crate::single_mapping; use crate::storage::Transaction; use std::sync::LazyLock; @@ -48,7 +48,7 @@ pub struct GroupByAggregateImplementation; single_mapping!( GroupByAggregateImplementation, GROUP_BY_AGGREGATE_PATTERN, - PhysicalOption::HashAggregate + PhysicalOption::new(PlanImpl::HashAggregate, SortOption::None) ); pub struct SimpleAggregateImplementation; @@ -56,5 +56,5 @@ pub struct SimpleAggregateImplementation; single_mapping!( SimpleAggregateImplementation, SIMPLE_AGGREGATE_PATTERN, - PhysicalOption::SimpleAggregate + PhysicalOption::new(PlanImpl::SimpleAggregate, SortOption::None) ); diff --git a/src/optimizer/rule/implementation/dql/dummy.rs b/src/optimizer/rule/implementation/dql/dummy.rs index 321dfdd6..d3a85057 100644 --- a/src/optimizer/rule/implementation/dql/dummy.rs +++ b/src/optimizer/rule/implementation/dql/dummy.rs @@ -17,7 +17,7 @@ use crate::optimizer::core::memo::{Expression, GroupExpression}; use crate::optimizer::core::pattern::{Pattern, PatternChildrenPredicate}; use crate::optimizer::core::rule::{ImplementationRule, MatchPattern}; use crate::optimizer::core::statistics_meta::StatisticMetaLoader; -use crate::planner::operator::{Operator, PhysicalOption}; +use crate::planner::operator::{Operator, PhysicalOption, PlanImpl, SortOption}; use crate::single_mapping; use crate::storage::Transaction; use std::sync::LazyLock; @@ -30,4 +30,8 @@ static DUMMY_PATTERN: LazyLock = LazyLock::new(|| Pattern { #[derive(Clone)] pub struct DummyImplementation; -single_mapping!(DummyImplementation, DUMMY_PATTERN, PhysicalOption::Dummy); +single_mapping!( + DummyImplementation, + DUMMY_PATTERN, + PhysicalOption::new(PlanImpl::Dummy, SortOption::None) +); diff --git a/src/optimizer/rule/implementation/dql/filter.rs b/src/optimizer/rule/implementation/dql/filter.rs index e999c7c8..24cdb124 100644 --- a/src/optimizer/rule/implementation/dql/filter.rs +++ b/src/optimizer/rule/implementation/dql/filter.rs @@ -17,7 +17,7 @@ use crate::optimizer::core::memo::{Expression, GroupExpression}; use crate::optimizer::core::pattern::{Pattern, PatternChildrenPredicate}; use crate::optimizer::core::rule::{ImplementationRule, MatchPattern}; use crate::optimizer::core::statistics_meta::StatisticMetaLoader; -use crate::planner::operator::{Operator, PhysicalOption}; +use crate::planner::operator::{Operator, PhysicalOption, PlanImpl, SortOption}; use crate::single_mapping; use crate::storage::Transaction; use std::sync::LazyLock; @@ -30,4 +30,8 @@ static FILTER_PATTERN: LazyLock = LazyLock::new(|| Pattern { #[derive(Clone)] pub struct FilterImplementation; -single_mapping!(FilterImplementation, FILTER_PATTERN, PhysicalOption::Filter); +single_mapping!( + FilterImplementation, + FILTER_PATTERN, + PhysicalOption::new(PlanImpl::Filter, SortOption::Follow) +); diff --git a/src/optimizer/rule/implementation/dql/function_scan.rs b/src/optimizer/rule/implementation/dql/function_scan.rs index 0989973c..d4b0fdbf 100644 --- a/src/optimizer/rule/implementation/dql/function_scan.rs +++ b/src/optimizer/rule/implementation/dql/function_scan.rs @@ -17,7 +17,7 @@ use crate::optimizer::core::memo::{Expression, GroupExpression}; use crate::optimizer::core::pattern::{Pattern, PatternChildrenPredicate}; use crate::optimizer::core::rule::{ImplementationRule, MatchPattern}; use crate::optimizer::core::statistics_meta::StatisticMetaLoader; -use crate::planner::operator::{Operator, PhysicalOption}; +use crate::planner::operator::{Operator, PhysicalOption, PlanImpl, SortOption}; use crate::single_mapping; use crate::storage::Transaction; use std::sync::LazyLock; @@ -33,5 +33,5 @@ pub struct FunctionScanImplementation; single_mapping!( FunctionScanImplementation, FUNCTION_SCAN_PATTERN, - PhysicalOption::FunctionScan + PhysicalOption::new(PlanImpl::FunctionScan, SortOption::None) ); diff --git a/src/optimizer/rule/implementation/dql/join.rs b/src/optimizer/rule/implementation/dql/join.rs index 1b0825f4..e8e10bd6 100644 --- a/src/optimizer/rule/implementation/dql/join.rs +++ b/src/optimizer/rule/implementation/dql/join.rs @@ -18,7 +18,7 @@ use crate::optimizer::core::pattern::{Pattern, PatternChildrenPredicate}; use crate::optimizer::core::rule::{ImplementationRule, MatchPattern}; use crate::optimizer::core::statistics_meta::StatisticMetaLoader; use crate::planner::operator::join::{JoinCondition, JoinOperator}; -use crate::planner::operator::{Operator, PhysicalOption}; +use crate::planner::operator::{Operator, PhysicalOption, PlanImpl, SortOption}; use crate::storage::Transaction; use std::sync::LazyLock; @@ -43,7 +43,7 @@ impl ImplementationRule for JoinImplementation { _: &StatisticMetaLoader<'_, T>, group_expr: &mut GroupExpression, ) -> Result<(), DatabaseError> { - let mut physical_option = PhysicalOption::NestLoopJoin; + let mut physical_option = PhysicalOption::new(PlanImpl::NestLoopJoin, SortOption::None); if let Operator::Join(JoinOperator { on: JoinCondition::On { on, .. }, @@ -51,7 +51,7 @@ impl ImplementationRule for JoinImplementation { }) = op { if !on.is_empty() { - physical_option = PhysicalOption::HashJoin; + physical_option.plan = PlanImpl::HashJoin; } } group_expr.append_expr(Expression { diff --git a/src/optimizer/rule/implementation/dql/limit.rs b/src/optimizer/rule/implementation/dql/limit.rs index c1fbb505..3d6f28ee 100644 --- a/src/optimizer/rule/implementation/dql/limit.rs +++ b/src/optimizer/rule/implementation/dql/limit.rs @@ -17,7 +17,7 @@ use crate::optimizer::core::memo::{Expression, GroupExpression}; use crate::optimizer::core::pattern::{Pattern, PatternChildrenPredicate}; use crate::optimizer::core::rule::{ImplementationRule, MatchPattern}; use crate::optimizer::core::statistics_meta::StatisticMetaLoader; -use crate::planner::operator::{Operator, PhysicalOption}; +use crate::planner::operator::{Operator, PhysicalOption, PlanImpl, SortOption}; use crate::single_mapping; use crate::storage::Transaction; use std::sync::LazyLock; @@ -30,4 +30,8 @@ static LIMIT_PATTERN: LazyLock = LazyLock::new(|| Pattern { #[derive(Clone)] pub struct LimitImplementation; -single_mapping!(LimitImplementation, LIMIT_PATTERN, PhysicalOption::Limit); +single_mapping!( + LimitImplementation, + LIMIT_PATTERN, + PhysicalOption::new(PlanImpl::Limit, SortOption::Follow) +); diff --git a/src/optimizer/rule/implementation/dql/projection.rs b/src/optimizer/rule/implementation/dql/projection.rs index 43091489..4744650d 100644 --- a/src/optimizer/rule/implementation/dql/projection.rs +++ b/src/optimizer/rule/implementation/dql/projection.rs @@ -17,7 +17,7 @@ use crate::optimizer::core::memo::{Expression, GroupExpression}; use crate::optimizer::core::pattern::{Pattern, PatternChildrenPredicate}; use crate::optimizer::core::rule::{ImplementationRule, MatchPattern}; use crate::optimizer::core::statistics_meta::StatisticMetaLoader; -use crate::planner::operator::{Operator, PhysicalOption}; +use crate::planner::operator::{Operator, PhysicalOption, PlanImpl, SortOption}; use crate::single_mapping; use crate::storage::Transaction; use std::sync::LazyLock; @@ -33,5 +33,5 @@ pub struct ProjectionImplementation; single_mapping!( ProjectionImplementation, PROJECTION_PATTERN, - PhysicalOption::Project + PhysicalOption::new(PlanImpl::Project, SortOption::Follow) ); diff --git a/src/optimizer/rule/implementation/dql/sort.rs b/src/optimizer/rule/implementation/dql/sort.rs index eff4f5b9..625c2ada 100644 --- a/src/optimizer/rule/implementation/dql/sort.rs +++ b/src/optimizer/rule/implementation/dql/sort.rs @@ -17,8 +17,7 @@ use crate::optimizer::core::memo::{Expression, GroupExpression}; use crate::optimizer::core::pattern::{Pattern, PatternChildrenPredicate}; use crate::optimizer::core::rule::{ImplementationRule, MatchPattern}; use crate::optimizer::core::statistics_meta::StatisticMetaLoader; -use crate::planner::operator::{Operator, PhysicalOption}; -use crate::single_mapping; +use crate::planner::operator::{Operator, PhysicalOption, PlanImpl, SortOption}; use crate::storage::Transaction; use std::sync::LazyLock; @@ -30,4 +29,32 @@ static SORT_PATTERN: LazyLock = LazyLock::new(|| Pattern { #[derive(Clone)] pub struct SortImplementation; -single_mapping!(SortImplementation, SORT_PATTERN, PhysicalOption::Sort); +impl MatchPattern for SortImplementation { + fn pattern(&self) -> &Pattern { + &SORT_PATTERN + } +} + +impl ImplementationRule for SortImplementation { + fn to_expression( + &self, + op: &Operator, + _: &StatisticMetaLoader<'_, T>, + group_expr: &mut GroupExpression, + ) -> Result<(), DatabaseError> { + if let Operator::Sort(op) = op { + group_expr.append_expr(Expression { + op: PhysicalOption::new( + PlanImpl::Sort, + SortOption::OrderBy { + fields: op.sort_fields.clone(), + ignore_prefix_len: 0, + }, + ), + cost: None, + }); + } + + Ok(()) + } +} diff --git a/src/optimizer/rule/implementation/dql/table_scan.rs b/src/optimizer/rule/implementation/dql/table_scan.rs index 22457b23..a3bcf2dd 100644 --- a/src/optimizer/rule/implementation/dql/table_scan.rs +++ b/src/optimizer/rule/implementation/dql/table_scan.rs @@ -17,7 +17,7 @@ use crate::optimizer::core::memo::{Expression, GroupExpression}; use crate::optimizer::core::pattern::{Pattern, PatternChildrenPredicate}; use crate::optimizer::core::rule::{ImplementationRule, MatchPattern}; use crate::optimizer::core::statistics_meta::StatisticMetaLoader; -use crate::planner::operator::{Operator, PhysicalOption}; +use crate::planner::operator::{Operator, PhysicalOption, PlanImpl, SortOption}; use crate::storage::Transaction; use crate::types::index::IndexType; use std::sync::LazyLock; @@ -54,10 +54,9 @@ impl ImplementationRule for SeqScanImplementation { .map(|statistics_meta| statistics_meta.histogram().values_len()); group_expr.append_expr(Expression { - op: PhysicalOption::SeqScan, + op: PhysicalOption::new(PlanImpl::SeqScan, SortOption::None), cost, }); - Ok(()) } else { unreachable!("invalid operator!") @@ -92,7 +91,6 @@ impl ImplementationRule for IndexScanImplementation { loader.load(&scan_op.table_name, index_info.meta.id)? { let mut row_count = statistics_meta.collect_count(range)?; - if index_info.covered_deserializers.is_none() && !matches!(index_info.meta.ty, IndexType::PrimaryKey { .. }) { @@ -103,10 +101,21 @@ impl ImplementationRule for IndexScanImplementation { } } + if let (Some(covered), Some(row_count)) = (index_info.sort_elimination_hint, cost) { + let rows = row_count.max(1) as f64; + let raw_bonus = rows * rows.log2(); + // TODO: replace this heuristic with accurate row-count driven sort cost once available. + let bonus = (raw_bonus as usize) / covered.max(1); + cost = Some(row_count.saturating_sub(bonus)); + } + group_expr.append_expr(Expression { - op: PhysicalOption::IndexScan(index_info.clone()), + op: PhysicalOption::new( + PlanImpl::IndexScan(index_info.clone()), + index_info.sort_option.clone(), + ), cost, - }) + }); } Ok(()) diff --git a/src/optimizer/rule/implementation/dql/top_k.rs b/src/optimizer/rule/implementation/dql/top_k.rs index d98497cf..d79cd2d3 100644 --- a/src/optimizer/rule/implementation/dql/top_k.rs +++ b/src/optimizer/rule/implementation/dql/top_k.rs @@ -17,8 +17,7 @@ use crate::optimizer::core::memo::{Expression, GroupExpression}; use crate::optimizer::core::pattern::{Pattern, PatternChildrenPredicate}; use crate::optimizer::core::rule::{ImplementationRule, MatchPattern}; use crate::optimizer::core::statistics_meta::StatisticMetaLoader; -use crate::planner::operator::{Operator, PhysicalOption}; -use crate::single_mapping; +use crate::planner::operator::{Operator, PhysicalOption, PlanImpl, SortOption}; use crate::storage::Transaction; use std::sync::LazyLock; @@ -30,4 +29,32 @@ static TOPK_PATTERN: LazyLock = LazyLock::new(|| Pattern { #[derive(Clone)] pub struct TopKImplementation; -single_mapping!(TopKImplementation, TOPK_PATTERN, PhysicalOption::TopK); +impl MatchPattern for TopKImplementation { + fn pattern(&self) -> &Pattern { + &TOPK_PATTERN + } +} + +impl ImplementationRule for TopKImplementation { + fn to_expression( + &self, + op: &Operator, + _: &StatisticMetaLoader<'_, T>, + group_expr: &mut GroupExpression, + ) -> Result<(), DatabaseError> { + if let Operator::TopK(op) = op { + group_expr.append_expr(Expression { + op: PhysicalOption::new( + PlanImpl::TopK, + SortOption::OrderBy { + fields: op.sort_fields.clone(), + ignore_prefix_len: 0, + }, + ), + cost: None, + }); + } + + Ok(()) + } +} diff --git a/src/optimizer/rule/implementation/dql/values.rs b/src/optimizer/rule/implementation/dql/values.rs index c3ae6edc..d7713ab9 100644 --- a/src/optimizer/rule/implementation/dql/values.rs +++ b/src/optimizer/rule/implementation/dql/values.rs @@ -17,7 +17,7 @@ use crate::optimizer::core::memo::{Expression, GroupExpression}; use crate::optimizer::core::pattern::{Pattern, PatternChildrenPredicate}; use crate::optimizer::core::rule::{ImplementationRule, MatchPattern}; use crate::optimizer::core::statistics_meta::StatisticMetaLoader; -use crate::planner::operator::{Operator, PhysicalOption}; +use crate::planner::operator::{Operator, PhysicalOption, PlanImpl, SortOption}; use crate::single_mapping; use crate::storage::Transaction; use std::sync::LazyLock; @@ -30,4 +30,8 @@ static VALUES_PATTERN: LazyLock = LazyLock::new(|| Pattern { #[derive(Clone)] pub struct ValuesImplementation; -single_mapping!(ValuesImplementation, VALUES_PATTERN, PhysicalOption::Values); +single_mapping!( + ValuesImplementation, + VALUES_PATTERN, + PhysicalOption::new(PlanImpl::Values, SortOption::None) +); diff --git a/src/optimizer/rule/normalization/column_pruning.rs b/src/optimizer/rule/normalization/column_pruning.rs index f3403714..55a47606 100644 --- a/src/optimizer/rule/normalization/column_pruning.rs +++ b/src/optimizer/rule/normalization/column_pruning.rs @@ -228,7 +228,7 @@ mod tests { use crate::binder::test::build_t1_table; use crate::errors::DatabaseError; use crate::optimizer::heuristic::batch::HepBatchStrategy; - use crate::optimizer::heuristic::optimizer::HepOptimizer; + use crate::optimizer::heuristic::optimizer::HepOptimizerPipeline; use crate::optimizer::rule::normalization::NormalizationRuleImpl; use crate::planner::operator::join::JoinCondition; use crate::planner::operator::Operator; @@ -240,12 +240,15 @@ mod tests { let table_state = build_t1_table()?; let plan = table_state.plan("select c1, c3 from t1 left join t2 on c1 = c3")?; - let best_plan = HepOptimizer::new(plan.clone()) - .batch( + let pipeline = HepOptimizerPipeline::builder() + .before_batch( "test_column_pruning".to_string(), HepBatchStrategy::once_topdown(), vec![NormalizationRuleImpl::ColumnPruning], ) + .build(); + let best_plan = pipeline + .instantiate(plan) .find_best::(None)?; assert!(matches!(best_plan.childrens.as_ref(), Childrens::Only(_))); diff --git a/src/optimizer/rule/normalization/combine_operators.rs b/src/optimizer/rule/normalization/combine_operators.rs index 95ca6d01..fcd9034e 100644 --- a/src/optimizer/rule/normalization/combine_operators.rs +++ b/src/optimizer/rule/normalization/combine_operators.rs @@ -190,7 +190,7 @@ mod tests { use crate::errors::DatabaseError; use crate::expression::{BinaryOperator, ScalarExpression}; use crate::optimizer::heuristic::batch::HepBatchStrategy; - use crate::optimizer::heuristic::optimizer::HepOptimizer; + use crate::optimizer::heuristic::optimizer::HepOptimizerPipeline; use crate::optimizer::rule::normalization::NormalizationRuleImpl; use crate::planner::operator::Operator; use crate::planner::Childrens; @@ -201,12 +201,16 @@ mod tests { let table_state = build_t1_table()?; let plan = table_state.plan("select c1 from (select c1, c2 from t1) t")?; - let optimizer = HepOptimizer::new(plan).batch( - "test_collapse_project".to_string(), - HepBatchStrategy::once_topdown(), - vec![NormalizationRuleImpl::CollapseProject], - ); - let best_plan = optimizer.find_best::(None)?; + let pipeline = HepOptimizerPipeline::builder() + .before_batch( + "test_collapse_project".to_string(), + HepBatchStrategy::once_topdown(), + vec![NormalizationRuleImpl::CollapseProject], + ) + .build(); + let best_plan = pipeline + .instantiate(plan) + .find_best::(None)?; if let Operator::Project(op) = &best_plan.operator { assert_eq!(op.exprs.len(), 1); @@ -234,12 +238,16 @@ mod tests { let original_grandchild = original_child.childrens.pop_only(); assert!(matches!(original_grandchild.operator, Operator::Project(_))); - let optimizer = HepOptimizer::new(plan).batch( - "test_collapse_project_with_alias".to_string(), - HepBatchStrategy::once_topdown(), - vec![NormalizationRuleImpl::CollapseProject], - ); - let best_plan = optimizer.find_best::(None)?; + let pipeline = HepOptimizerPipeline::builder() + .before_batch( + "test_collapse_project_with_alias".to_string(), + HepBatchStrategy::once_topdown(), + vec![NormalizationRuleImpl::CollapseProject], + ) + .build(); + let best_plan = pipeline + .instantiate(plan) + .find_best::(None)?; if let Operator::Project(op) = &best_plan.operator { assert_eq!(op.exprs.len(), 1); } else { @@ -263,12 +271,16 @@ mod tests { let plan = table_state.plan("select * from (select * from t1 where c1 > 1) t where 1 = 1")?; - let optimizer = HepOptimizer::new(plan).batch( - "test_combine_filter".to_string(), - HepBatchStrategy::once_topdown(), - vec![NormalizationRuleImpl::CombineFilter], - ); - let best_plan = optimizer.find_best::(None)?; + let pipeline = HepOptimizerPipeline::builder() + .before_batch( + "test_combine_filter".to_string(), + HepBatchStrategy::once_topdown(), + vec![NormalizationRuleImpl::CombineFilter], + ) + .build(); + let best_plan = pipeline + .instantiate(plan) + .find_best::(None)?; let filter_op = best_plan.childrens.pop_only(); if let Operator::Filter(op) = &filter_op.operator { @@ -289,13 +301,17 @@ mod tests { let table_state = build_t1_table()?; let plan = table_state.plan("select distinct c1, c2 from t1 group by c1, c2")?; - let optimizer = HepOptimizer::new(plan.clone()).batch( - "test_collapse_group_by_agg".to_string(), - HepBatchStrategy::once_topdown(), - vec![NormalizationRuleImpl::CollapseGroupByAgg], - ); - - let best_plan = optimizer.find_best::(None)?; + let pipeline = HepOptimizerPipeline::builder() + .before_batch( + "test_collapse_group_by_agg".to_string(), + HepBatchStrategy::once_topdown(), + vec![NormalizationRuleImpl::CollapseGroupByAgg], + ) + .build(); + + let best_plan = pipeline + .instantiate(plan) + .find_best::(None)?; let agg_op = best_plan.childrens.pop_only(); if let Operator::Aggregate(_) = &agg_op.operator { diff --git a/src/optimizer/rule/normalization/mod.rs b/src/optimizer/rule/normalization/mod.rs index dbc62f55..4b2129ed 100644 --- a/src/optimizer/rule/normalization/mod.rs +++ b/src/optimizer/rule/normalization/mod.rs @@ -27,10 +27,12 @@ use crate::optimizer::rule::normalization::compilation_in_advance::{ use crate::optimizer::rule::normalization::pushdown_limit::{ LimitProjectTranspose, PushLimitIntoScan, PushLimitThroughJoin, }; -use crate::optimizer::rule::normalization::pushdown_predicates::PushPredicateIntoScan; -use crate::optimizer::rule::normalization::pushdown_predicates::PushPredicateThroughJoin; +use crate::optimizer::rule::normalization::pushdown_predicates::{ + PushJoinPredicateIntoScan, PushPredicateIntoScan, PushPredicateThroughJoin, +}; use crate::optimizer::rule::normalization::simplification::ConstantCalculation; use crate::optimizer::rule::normalization::simplification::SimplifyFilter; +use crate::optimizer::rule::normalization::sort_elimination::EliminateRedundantSort; use crate::optimizer::rule::normalization::top_k::TopK; use crate::planner::LogicalPlan; mod column_pruning; @@ -39,7 +41,9 @@ mod compilation_in_advance; mod pushdown_limit; mod pushdown_predicates; mod simplification; +mod sort_elimination; mod top_k; +pub use sort_elimination::annotate_sort_preserving_indexes; #[derive(Debug, Copy, Clone)] pub enum NormalizationRuleImpl { @@ -54,6 +58,7 @@ pub enum NormalizationRuleImpl { PushLimitIntoTableScan, // PushDown predicates PushPredicateThroughJoin, + PushJoinPredicateIntoScan, // Tips: need to be used with `SimplifyFilter` PushPredicateIntoScan, // Simplification @@ -63,6 +68,7 @@ pub enum NormalizationRuleImpl { BindExpressionPosition, EvaluatorBind, TopK, + EliminateRedundantSort, } impl MatchPattern for NormalizationRuleImpl { @@ -76,12 +82,14 @@ impl MatchPattern for NormalizationRuleImpl { NormalizationRuleImpl::PushLimitThroughJoin => PushLimitThroughJoin.pattern(), NormalizationRuleImpl::PushLimitIntoTableScan => PushLimitIntoScan.pattern(), NormalizationRuleImpl::PushPredicateThroughJoin => PushPredicateThroughJoin.pattern(), + NormalizationRuleImpl::PushJoinPredicateIntoScan => PushJoinPredicateIntoScan.pattern(), NormalizationRuleImpl::PushPredicateIntoScan => PushPredicateIntoScan.pattern(), NormalizationRuleImpl::SimplifyFilter => SimplifyFilter.pattern(), NormalizationRuleImpl::ConstantCalculation => ConstantCalculation.pattern(), NormalizationRuleImpl::BindExpressionPosition => BindExpressionPosition.pattern(), NormalizationRuleImpl::EvaluatorBind => EvaluatorBind.pattern(), NormalizationRuleImpl::TopK => TopK.pattern(), + NormalizationRuleImpl::EliminateRedundantSort => EliminateRedundantSort.pattern(), } } } @@ -97,12 +105,16 @@ impl NormalizationRule for NormalizationRuleImpl { NormalizationRuleImpl::PushLimitThroughJoin => PushLimitThroughJoin.apply(plan), NormalizationRuleImpl::PushLimitIntoTableScan => PushLimitIntoScan.apply(plan), NormalizationRuleImpl::PushPredicateThroughJoin => PushPredicateThroughJoin.apply(plan), + NormalizationRuleImpl::PushJoinPredicateIntoScan => { + PushJoinPredicateIntoScan.apply(plan) + } NormalizationRuleImpl::SimplifyFilter => SimplifyFilter.apply(plan), NormalizationRuleImpl::PushPredicateIntoScan => PushPredicateIntoScan.apply(plan), NormalizationRuleImpl::ConstantCalculation => ConstantCalculation.apply(plan), NormalizationRuleImpl::BindExpressionPosition => BindExpressionPosition.apply(plan), NormalizationRuleImpl::EvaluatorBind => EvaluatorBind.apply(plan), NormalizationRuleImpl::TopK => TopK.apply(plan), + NormalizationRuleImpl::EliminateRedundantSort => EliminateRedundantSort.apply(plan), } } } diff --git a/src/optimizer/rule/normalization/pushdown_limit.rs b/src/optimizer/rule/normalization/pushdown_limit.rs index 65f75126..145283ac 100644 --- a/src/optimizer/rule/normalization/pushdown_limit.rs +++ b/src/optimizer/rule/normalization/pushdown_limit.rs @@ -161,7 +161,7 @@ mod tests { use crate::binder::test::build_t1_table; use crate::errors::DatabaseError; use crate::optimizer::heuristic::batch::HepBatchStrategy; - use crate::optimizer::heuristic::optimizer::HepOptimizer; + use crate::optimizer::heuristic::optimizer::HepOptimizerPipeline; use crate::optimizer::rule::normalization::NormalizationRuleImpl; use crate::planner::operator::Operator; use crate::storage::rocksdb::RocksTransaction; @@ -171,12 +171,15 @@ mod tests { let table_state = build_t1_table()?; let plan = table_state.plan("select c1, c2 from t1 limit 1")?; - let best_plan = HepOptimizer::new(plan.clone()) - .batch( + let pipeline = HepOptimizerPipeline::builder() + .before_batch( "test_limit_project_transpose".to_string(), HepBatchStrategy::once_topdown(), vec![NormalizationRuleImpl::LimitProjectTranspose], ) + .build(); + let best_plan = pipeline + .instantiate(plan) .find_best::(None)?; if let Operator::Project(_) = &best_plan.operator { @@ -198,8 +201,8 @@ mod tests { let table_state = build_t1_table()?; let plan = table_state.plan("select * from t1 left join t2 on c1 = c3 limit 1")?; - let best_plan = HepOptimizer::new(plan.clone()) - .batch( + let pipeline = HepOptimizerPipeline::builder() + .before_batch( "test_push_limit_through_join".to_string(), HepBatchStrategy::once_topdown(), vec![ @@ -207,6 +210,9 @@ mod tests { NormalizationRuleImpl::PushLimitThroughJoin, ], ) + .build(); + let best_plan = pipeline + .instantiate(plan) .find_best::(None)?; let join_op = best_plan.childrens.pop_only().childrens.pop_only(); @@ -230,8 +236,8 @@ mod tests { let table_state = build_t1_table()?; let plan = table_state.plan("select * from t1 limit 1 offset 1")?; - let best_plan = HepOptimizer::new(plan.clone()) - .batch( + let pipeline = HepOptimizerPipeline::builder() + .before_batch( "test_push_limit_into_table_scan".to_string(), HepBatchStrategy::once_topdown(), vec![ @@ -239,6 +245,9 @@ mod tests { NormalizationRuleImpl::PushLimitIntoTableScan, ], ) + .build(); + let best_plan = pipeline + .instantiate(plan) .find_best::(None)?; let scan_op = best_plan.childrens.pop_only(); diff --git a/src/optimizer/rule/normalization/pushdown_predicates.rs b/src/optimizer/rule/normalization/pushdown_predicates.rs index d934810e..71842859 100644 --- a/src/optimizer/rule/normalization/pushdown_predicates.rs +++ b/src/optimizer/rule/normalization/pushdown_predicates.rs @@ -23,9 +23,9 @@ use crate::optimizer::plan_utils::{ left_child, only_child_mut, replace_with_only_child, right_child, wrap_child_with, }; use crate::planner::operator::filter::FilterOperator; -use crate::planner::operator::join::JoinType; -use crate::planner::operator::Operator; -use crate::planner::LogicalPlan; +use crate::planner::operator::join::{JoinCondition, JoinType}; +use crate::planner::operator::{Operator, SortOption}; +use crate::planner::{LogicalPlan, SchemaOutput}; use crate::types::index::{IndexInfo, IndexMetaRef, IndexType}; use crate::types::value::DataValue; use crate::types::LogicalType; @@ -50,7 +50,11 @@ static PUSH_PREDICATE_INTO_SCAN: LazyLock = LazyLock::new(|| Pattern { }]), }); -// TODO: 感觉是只是处理projection中的alias反向替换为filter中表达式 +static JOIN_WITH_FILTER_PATTERN: LazyLock = LazyLock::new(|| Pattern { + predicate: |op| matches!(op, Operator::Join(_)), + children: PatternChildrenPredicate::None, +}); + #[allow(dead_code)] static PUSH_PREDICATE_THROUGH_NON_JOIN: LazyLock = LazyLock::new(|| Pattern { predicate: |op| matches!(op, Operator::Filter(_)), @@ -101,6 +105,13 @@ pub fn is_subset_cols(left: &[ColumnRef], right: &[ColumnRef]) -> bool { left.iter().all(|l| right.contains(l)) } +fn plan_output_columns(plan: &LogicalPlan) -> Vec { + match plan.output_schema_direct() { + SchemaOutput::Schema(schema) => schema, + SchemaOutput::SchemaRef(schema_ref) => schema_ref.iter().cloned().collect(), + } +} + /// Comments copied from Spark Catalyst PushPredicateThroughJoin /// /// Pushes down `Filter` operators where the `condition` can be @@ -149,10 +160,10 @@ impl NormalizationRule for PushPredicateThroughJoin { } let left_columns = left_child(join_plan) - .map(|child| child.operator.referenced_columns(true)) + .map(plan_output_columns) .unwrap_or_default(); let right_columns = right_child(join_plan) - .map(|child| child.operator.referenced_columns(true)) + .map(plan_output_columns) .unwrap_or_default(); let filter_exprs = split_conjunctive_predicates(&filter_op.predicate); @@ -245,11 +256,19 @@ impl NormalizationRule for PushPredicateIntoScan { range, covered_deserializers, cover_mapping, + sort_option, + sort_elimination_hint: _, } in &mut scan_op.index_infos { if range.is_some() { continue; } + let SortOption::OrderBy { + ignore_prefix_len, .. + } = sort_option + else { + return Err(DatabaseError::InvalidIndex); + }; *range = match meta.ty { IndexType::PrimaryKey { is_multiple: false } | IndexType::Unique @@ -258,7 +277,7 @@ impl NormalizationRule for PushPredicateIntoScan { .detach(&op.predicate)? } IndexType::PrimaryKey { is_multiple: true } | IndexType::Composite => { - Self::composite_range(&op, meta)? + Self::composite_range(&op, meta, ignore_prefix_len)? } }; if range.is_none() { @@ -312,6 +331,7 @@ impl PushPredicateIntoScan { fn composite_range( op: &FilterOperator, meta: &mut IndexMetaRef, + ignore_prefix_len: &mut usize, ) -> Result, DatabaseError> { let mut res = None; let mut eq_ranges = Vec::with_capacity(meta.column_ids.len()); @@ -331,6 +351,8 @@ impl PushPredicateIntoScan { } break; } + *ignore_prefix_len = eq_ranges.len(); + if res.is_none() { if let Some(range) = eq_ranges.pop() { res = range.combining_eqs(&eq_ranges); @@ -341,8 +363,8 @@ impl PushPredicateIntoScan { fn eq_to_scope(range: Range) -> Range { match range { Range::Eq(DataValue::Tuple(values, _)) => { - let min = Bound::Excluded(DataValue::Tuple(values.clone(), false)); - let max = Bound::Excluded(DataValue::Tuple(values, true)); + let min = Bound::Included(DataValue::Tuple(values.clone(), false)); + let max = Bound::Included(DataValue::Tuple(values, true)); Range::Scope { min, max } } @@ -363,6 +385,119 @@ impl PushPredicateIntoScan { } } +pub struct PushJoinPredicateIntoScan; + +impl MatchPattern for PushJoinPredicateIntoScan { + fn pattern(&self) -> &Pattern { + &JOIN_WITH_FILTER_PATTERN + } +} + +impl NormalizationRule for PushJoinPredicateIntoScan { + fn apply(&self, plan: &mut LogicalPlan) -> Result { + let (join_type, filter_expr) = { + let Operator::Join(join_op) = &mut plan.operator else { + return Ok(false); + }; + if !matches!( + join_op.join_type, + JoinType::Inner + | JoinType::LeftOuter + | JoinType::LeftSemi + | JoinType::LeftAnti + | JoinType::RightOuter + ) { + return Ok(false); + } + let JoinCondition::On { filter, .. } = &mut join_op.on else { + return Ok(false); + }; + let Some(filter_expr) = filter.take() else { + return Ok(false); + }; + (join_op.join_type, filter_expr) + }; + + let left_columns = left_child(plan) + .map(plan_output_columns) + .unwrap_or_default(); + let right_columns = right_child(plan) + .map(plan_output_columns) + .unwrap_or_default(); + + let filter_exprs = split_conjunctive_predicates(&filter_expr); + let (left_filters, rest): (Vec<_>, Vec<_>) = filter_exprs + .into_iter() + .partition(|expr| is_subset_cols(&expr.referenced_columns(true), &left_columns)); + let (right_filters, common_filters): (Vec<_>, Vec<_>) = rest + .into_iter() + .partition(|expr| is_subset_cols(&expr.referenced_columns(true), &right_columns)); + + let (push_left, push_right) = match join_type { + JoinType::Inner => (true, true), + JoinType::LeftOuter => (false, true), + JoinType::RightOuter => (true, false), + JoinType::LeftSemi => (true, false), + JoinType::LeftAnti => (false, false), + _ => (false, false), + }; + + let mut new_ops = (None, None); + let mut remaining_filters = common_filters; + + let (left_push, left_remain) = if push_left { + (left_filters, Vec::new()) + } else { + (Vec::new(), left_filters) + }; + if let Some(filter_op) = reduce_filters(left_push, false) { + new_ops.0 = Some(Operator::Filter(filter_op)); + } else { + remaining_filters.extend(left_remain); + } + + let (right_push, right_remain) = if push_right { + (right_filters, Vec::new()) + } else { + (Vec::new(), right_filters) + }; + if let Some(filter_op) = reduce_filters(right_push, false) { + new_ops.1 = Some(Operator::Filter(filter_op)); + } else { + remaining_filters.extend(right_remain); + } + + let mut applied = false; + if let Some(left_op) = new_ops.0 { + applied |= wrap_child_with(plan, 0, left_op); + } + if let Some(right_op) = new_ops.1 { + applied |= wrap_child_with(plan, 1, right_op); + } + + let mut join_filter = reduce_filters(remaining_filters, false).map(|op| op.predicate); + let filter_changed = match &join_filter { + Some(expr) => expr != &filter_expr, + None => true, + }; + + if !filter_changed { + join_filter = Some(filter_expr); + } else { + applied = true; + } + + if let Operator::Join(join_op) = &mut plan.operator { + match &mut join_op.on { + JoinCondition::On { filter, .. } => *filter = join_filter, + JoinCondition::None => {} + } + } + + Ok(applied) + } +} + #[cfg(all(test, not(target_arch = "wasm32")))] mod tests { use crate::binder::test::build_t1_table; @@ -371,11 +506,14 @@ mod tests { use crate::expression::range_detacher::Range; use crate::expression::{BinaryOperator, ScalarExpression}; use crate::optimizer::heuristic::batch::HepBatchStrategy; - use crate::optimizer::heuristic::optimizer::HepOptimizer; + use crate::optimizer::heuristic::optimizer::{ + HepOptimizerPipeline, HepOptimizerPipelineBuilder, + }; use crate::optimizer::rule::normalization::NormalizationRuleImpl; use crate::planner::operator::filter::FilterOperator; + use crate::planner::operator::join::{JoinCondition, JoinType}; use crate::planner::operator::table_scan::TableScanOperator; - use crate::planner::operator::Operator; + use crate::planner::operator::{Operator, SortOption}; use crate::planner::{Childrens, LogicalPlan}; use crate::storage::rocksdb::RocksTransaction; use crate::types::index::{IndexInfo, IndexMeta, IndexType}; @@ -385,24 +523,57 @@ mod tests { use std::sync::Arc; use ulid::Ulid; + fn apply_pipeline( + plan: LogicalPlan, + builder: HepOptimizerPipelineBuilder, + ) -> Result { + builder + .build() + .instantiate(plan) + .find_best::(None) + } + + fn with_join_type(mut plan: LogicalPlan, join_type: JoinType) -> LogicalPlan { + fn visit(plan: &mut LogicalPlan, join_type: JoinType) -> bool { + if let Operator::Join(join_op) = &mut plan.operator { + join_op.join_type = join_type; + return true; + } + match plan.childrens.as_mut() { + Childrens::Only(child) => visit(child, join_type), + Childrens::Twins { left, right } => { + visit(left, join_type) || visit(right, join_type) + } + Childrens::None => false, + } + } + assert!( + visit(&mut plan, join_type), + "expected plan to contain a join" + ); + plan + } + #[test] fn test_push_predicate_into_scan() -> Result<(), DatabaseError> { let table_state = build_t1_table()?; // 1 - c2 < 0 => c2 > 1 let plan = table_state.plan("select * from t1 where -(1 - c2) > 0")?; - let best_plan = HepOptimizer::new(plan) - .batch( - "simplify_filter".to_string(), - HepBatchStrategy::once_topdown(), - vec![NormalizationRuleImpl::SimplifyFilter], - ) - .batch( - "test_push_predicate_into_scan".to_string(), - HepBatchStrategy::once_topdown(), - vec![NormalizationRuleImpl::PushPredicateIntoScan], - ) - .find_best::(None)?; + let best_plan = apply_pipeline( + plan, + HepOptimizerPipeline::builder() + .before_batch( + "simplify_filter".to_string(), + HepBatchStrategy::once_topdown(), + vec![NormalizationRuleImpl::SimplifyFilter], + ) + .before_batch( + "test_push_predicate_into_scan".to_string(), + HepBatchStrategy::once_topdown(), + vec![NormalizationRuleImpl::PushPredicateIntoScan], + ), + )?; let scan_op = best_plan.childrens.pop_only().childrens.pop_only(); if let Operator::TableScan(op) = &scan_op.operator { @@ -485,15 +656,25 @@ mod tests { index_infos: vec![ IndexInfo { meta: index_meta_reordered, + sort_option: SortOption::OrderBy { + fields: vec![], + ignore_prefix_len: 0, + }, range: None, covered_deserializers: None, cover_mapping: None, + sort_elimination_hint: None, }, IndexInfo { meta: index_meta_aligned, + sort_option: SortOption::OrderBy { + fields: vec![], + ignore_prefix_len: 0, + }, range: None, covered_deserializers: None, cover_mapping: None, + sort_elimination_hint: None, }, ], with_pk: false, @@ -532,13 +713,14 @@ mod tests { Childrens::Only(Box::new(scan_plan)), ); - let best_plan = HepOptimizer::new(filter_plan) - .batch( + let best_plan = apply_pipeline( + filter_plan, + HepOptimizerPipeline::builder().before_batch( "push_cover_mapping".to_string(), HepBatchStrategy::once_topdown(), vec![NormalizationRuleImpl::PushPredicateIntoScan], - ) - .find_best::(None)?; + ), + )?; let table_scan = best_plan.childrens.pop_only(); if let Operator::TableScan(op) = &table_scan.operator { @@ -590,13 +772,14 @@ mod tests { let plan = table_state.plan("select * from t1 left join t2 on c1 = c3 where c1 > 1 and c3 < 2")?; - let best_plan = HepOptimizer::new(plan) - .batch( + let best_plan = apply_pipeline( + plan, + HepOptimizerPipeline::builder().before_batch( "test_push_predicate_through_join".to_string(), HepBatchStrategy::once_topdown(), vec![NormalizationRuleImpl::PushPredicateThroughJoin], - ) - .find_best::(None)?; + ), + )?; let filter_op = best_plan.childrens.pop_only(); if let Operator::Filter(op) = &filter_op.operator { @@ -635,13 +818,14 @@ mod tests { let plan = table_state .plan("select * from t1 right join t2 on c1 = c3 where c1 > 1 and c3 < 2")?; - let best_plan = HepOptimizer::new(plan) - .batch( + let best_plan = apply_pipeline( + plan, + HepOptimizerPipeline::builder().before_batch( "test_push_predicate_through_join".to_string(), HepBatchStrategy::once_topdown(), vec![NormalizationRuleImpl::PushPredicateThroughJoin], - ) - .find_best::(None)?; + ), + )?; let filter_op = best_plan.childrens.pop_only(); if let Operator::Filter(op) = &filter_op.operator { @@ -680,13 +864,14 @@ mod tests { let plan = table_state .plan("select * from t1 inner join t2 on c1 = c3 where c1 > 1 and c3 < 2")?; - let best_plan = HepOptimizer::new(plan) - .batch( + let best_plan = apply_pipeline( + plan, + HepOptimizerPipeline::builder().before_batch( "test_push_predicate_through_join".to_string(), HepBatchStrategy::once_topdown(), vec![NormalizationRuleImpl::PushPredicateThroughJoin], - ) - .find_best::(None)?; + ), + )?; let join_op = best_plan.childrens.pop_only(); if let Operator::Join(_) = &join_op.operator { @@ -723,4 +908,270 @@ mod tests { Ok(()) } + + #[test] + fn test_push_join_predicate_into_scan_inner_join() -> Result<(), DatabaseError> { + let table_state = build_t1_table()?; + let plan = table_state + .plan("select * from t1 inner join t2 on t1.c1 = t2.c3 and t1.c1 > 1 and t2.c3 < 2")?; + + let mut best_plan = apply_pipeline( + plan, + HepOptimizerPipeline::builder().before_batch( + "push_join_predicate_into_scan".to_string(), + HepBatchStrategy::once_topdown(), + vec![NormalizationRuleImpl::PushJoinPredicateIntoScan], + ), + )?; + + if matches!(best_plan.operator, Operator::Project(_)) { + best_plan = best_plan.childrens.pop_only(); + } + + let join_plan = best_plan; + let join_op = match &join_plan.operator { + Operator::Join(op) => op, + _ => unreachable!("expected join root"), + }; + + match &join_op.on { + JoinCondition::On { filter, .. } => assert!( + filter.is_none(), + "join filter should be removed after pushdown" + ), + JoinCondition::None => unreachable!("expected join condition"), + } + + let (left_child, right_child) = join_plan.childrens.pop_twins(); + + if let Operator::Filter(left_filter) = &left_child.operator { + match left_filter.predicate { + ScalarExpression::Binary { + op: BinaryOperator::Gt, + ty: LogicalType::Boolean, + .. + } => (), + _ => unreachable!("left filter should be greater-than"), + } + } else { + unreachable!("left child should be filter"); + } + match left_child.childrens.pop_only().operator { + Operator::TableScan(_) => (), + _ => unreachable!("left filter child should be table scan"), + } + + if let Operator::Filter(right_filter) = &right_child.operator { + match right_filter.predicate { + ScalarExpression::Binary { + op: BinaryOperator::Lt, + ty: LogicalType::Boolean, + .. + } => (), + _ => unreachable!("right filter should be less-than"), + } + } else { + unreachable!("right child should be filter"); + } + match right_child.childrens.pop_only().operator { + Operator::TableScan(_) => (), + _ => unreachable!("right filter child should be table scan"), + } + + Ok(()) + } + + #[test] + fn test_push_join_predicate_left_outer_preserve_left() -> Result<(), DatabaseError> { + let table_state = build_t1_table()?; + let plan = + table_state.plan("select * from t1 left join t2 on t1.c1 = t2.c3 and t1.c1 > 1")?; + + let mut best_plan = apply_pipeline( + plan, + HepOptimizerPipeline::builder().before_batch( + "push_join_predicate_into_scan".to_string(), + HepBatchStrategy::once_topdown(), + vec![NormalizationRuleImpl::PushJoinPredicateIntoScan], + ), + )?; + + if matches!(best_plan.operator, Operator::Project(_)) { + best_plan = best_plan.childrens.pop_only(); + } + + let join_plan = best_plan; + let join_op = match &join_plan.operator { + Operator::Join(op) => op, + _ => unreachable!("expected join root"), + }; + + assert!(matches!(join_op.join_type, JoinType::LeftOuter)); + + match &join_op.on { + JoinCondition::On { filter, .. } => assert!( + filter.is_some(), + "left-side predicate should remain in join filter" + ), + JoinCondition::None => unreachable!("expected join condition"), + } + + let (left_child, _right_child) = join_plan.childrens.pop_twins(); + assert!( + !matches!(left_child.operator, Operator::Filter(_)), + "left child should not introduce new filter" + ); + + Ok(()) + } + + #[test] + fn test_push_join_predicate_left_outer_push_right() -> Result<(), DatabaseError> { + let table_state = build_t1_table()?; + let plan = + table_state.plan("select * from t1 left join t2 on t1.c1 = t2.c3 and t2.c3 < 2")?; + + let mut best_plan = apply_pipeline( + plan, + HepOptimizerPipeline::builder().before_batch( + "push_join_predicate_into_scan".to_string(), + HepBatchStrategy::once_topdown(), + vec![NormalizationRuleImpl::PushJoinPredicateIntoScan], + ), + )?; + + if matches!(best_plan.operator, Operator::Project(_)) { + best_plan = best_plan.childrens.pop_only(); + } + + let join_plan = best_plan; + + let join_op = match &join_plan.operator { + Operator::Join(op) => op, + _ => unreachable!("expected join root"), + }; + + assert!(matches!(join_op.join_type, JoinType::LeftOuter)); + match &join_op.on { + JoinCondition::On { filter, .. } => assert!( + filter.is_none(), + "right-side predicate should be pushed down" + ), + JoinCondition::None => unreachable!("expected join condition"), + } + + let (_left_child, right_child) = join_plan.childrens.pop_twins(); + let filter_op = match right_child.operator { + Operator::Filter(ref op) => op, + _ => unreachable!("right child should be a filter"), + }; + match filter_op.predicate { + ScalarExpression::Binary { + op: BinaryOperator::Lt, + ty: LogicalType::Boolean, + .. + } => (), + _ => unreachable!("right filter should be less-than predicate"), + } + match right_child.childrens.pop_only().operator { + Operator::TableScan(_) => (), + _ => unreachable!("filter child should be a table scan"), + } + + Ok(()) + } + + #[test] + fn test_push_join_predicate_left_semi_keeps_right_filter() -> Result<(), DatabaseError> { + let table_state = build_t1_table()?; + let plan = + table_state.plan("select * from t1 inner join t2 on t1.c1 = t2.c3 and t2.c3 < 2")?; + let plan = with_join_type(plan, JoinType::LeftSemi); + + let mut best_plan = apply_pipeline( + plan, + HepOptimizerPipeline::builder().before_batch( + "push_join_predicate_into_scan".to_string(), + HepBatchStrategy::once_topdown(), + vec![NormalizationRuleImpl::PushJoinPredicateIntoScan], + ), + )?; + + if matches!(best_plan.operator, Operator::Project(_)) { + best_plan = best_plan.childrens.pop_only(); + } + + let join_plan = best_plan; + { + let join_op = match &join_plan.operator { + Operator::Join(op) => op, + _ => unreachable!("expected join root"), + }; + + assert!(matches!(join_op.join_type, JoinType::LeftSemi)); + match &join_op.on { + JoinCondition::On { filter, .. } => assert!( + filter.is_some(), + "semi join should keep right-side predicates in the join filter" + ), + JoinCondition::None => unreachable!("expected join condition"), + } + } + let (_left_child, right_child) = join_plan.childrens.pop_twins(); + assert!( + !matches!(right_child.operator, Operator::Filter(_)), + "right child should not get a pushed-down filter for semi join" + ); + + Ok(()) + } + + #[test] + fn test_push_join_predicate_left_anti_keeps_filters() -> Result<(), DatabaseError> { + let table_state = build_t1_table()?; + let plan = table_state + .plan("select * from t1 inner join t2 on t1.c1 = t2.c3 and t1.c1 > 1 and t2.c3 < 2")?; + let plan = with_join_type(plan, JoinType::LeftAnti); + + let mut best_plan = apply_pipeline( + plan, + HepOptimizerPipeline::builder().before_batch( + "push_join_predicate_into_scan".to_string(), + HepBatchStrategy::once_topdown(), + vec![NormalizationRuleImpl::PushJoinPredicateIntoScan], + ), + )?; + + if matches!(best_plan.operator, Operator::Project(_)) { + best_plan = best_plan.childrens.pop_only(); + } + + let join_plan = best_plan; + { + let join_op = match &join_plan.operator { + Operator::Join(op) => op, + _ => unreachable!("expected join root"), + }; + assert!(matches!(join_op.join_type, JoinType::LeftAnti)); + + match &join_op.on { + JoinCondition::On { filter, .. } => { + assert!(filter.is_some(), "left anti join should keep ON predicates") + } + JoinCondition::None => unreachable!("expected join condition"), + } + } + + let (left_child, right_child) = join_plan.childrens.pop_twins(); + assert!( + !matches!(left_child.operator, Operator::Filter(_)), + "left anti join should not push predicates to the left child" + ); + assert!( + !matches!(right_child.operator, Operator::Filter(_)), + "left anti join should not push predicates to the right child" + ); + + Ok(()) + } } diff --git a/src/optimizer/rule/normalization/simplification.rs b/src/optimizer/rule/normalization/simplification.rs index 8b764a7e..ffeb33de 100644 --- a/src/optimizer/rule/normalization/simplification.rs +++ b/src/optimizer/rule/normalization/simplification.rs @@ -133,7 +133,7 @@ mod test { use crate::expression::range_detacher::{Range, RangeDetacher}; use crate::expression::{BinaryOperator, ScalarExpression, UnaryOperator}; use crate::optimizer::heuristic::batch::HepBatchStrategy; - use crate::optimizer::heuristic::optimizer::HepOptimizer; + use crate::optimizer::heuristic::optimizer::HepOptimizerPipeline; use crate::optimizer::rule::normalization::NormalizationRuleImpl; use crate::planner::operator::Operator; use crate::planner::LogicalPlan; @@ -142,6 +142,19 @@ mod test { use crate::types::{ColumnId, LogicalType}; use std::collections::Bound; + fn run_with_single_batch( + plan: LogicalPlan, + name: &str, + strategy: HepBatchStrategy, + rules: Vec, + ) -> Result { + HepOptimizerPipeline::builder() + .before_batch(name.to_string(), strategy, rules) + .build() + .instantiate(plan) + .find_best::(None) + } + #[test] fn test_constant_calculation_omitted() -> Result<(), DatabaseError> { let table_state = build_t1_table()?; @@ -149,8 +162,8 @@ mod test { let plan = table_state.plan("select c1 + (2 + 1), 2 + 1 from t1 where (2 + (-1)) < -(c1 + 1)")?; - let best_plan = HepOptimizer::new(plan) - .batch( + let best_plan = HepOptimizerPipeline::builder() + .before_batch( "test_simplification".to_string(), HepBatchStrategy::once_topdown(), vec![ @@ -158,6 +171,8 @@ mod test { NormalizationRuleImpl::ConstantCalculation, ], ) + .build() + .instantiate(plan) .find_best::(None)?; if let Operator::Project(project_op) = best_plan.clone().operator { let constant_expr = ScalarExpression::Constant(DataValue::Int32(3)); @@ -215,13 +230,12 @@ mod test { let plan_10 = table_state.plan("select * from t1 where 24 < (-1 - c1) + 1")?; let op = |plan: LogicalPlan| -> Result, DatabaseError> { - let best_plan = HepOptimizer::new(plan.clone()) - .batch( - "test_simplify_filter".to_string(), - HepBatchStrategy::once_topdown(), - vec![NormalizationRuleImpl::SimplifyFilter], - ) - .find_best::(None)?; + let best_plan = run_with_single_batch( + plan, + "test_simplify_filter", + HepBatchStrategy::once_topdown(), + vec![NormalizationRuleImpl::SimplifyFilter], + )?; let filter_op = best_plan.childrens.pop_only(); if let Operator::Filter(filter_op) = filter_op.operator { @@ -260,13 +274,12 @@ mod test { let table_state = build_t1_table()?; let plan = table_state.plan("select * from t1 where -(c1 + 1) > c2")?; - let best_plan = HepOptimizer::new(plan.clone()) - .batch( - "test_simplify_filter".to_string(), - HepBatchStrategy::once_topdown(), - vec![NormalizationRuleImpl::SimplifyFilter], - ) - .find_best::(None)?; + let best_plan = run_with_single_batch( + plan, + "test_simplify_filter", + HepBatchStrategy::once_topdown(), + vec![NormalizationRuleImpl::SimplifyFilter], + )?; let filter_op = best_plan.childrens.pop_only(); if let Operator::Filter(filter_op) = filter_op.operator { @@ -332,13 +345,12 @@ mod test { plan: &LogicalPlan, column_id: &ColumnId, ) -> Result, DatabaseError> { - let best_plan = HepOptimizer::new(plan.clone()) - .batch( - "test_simplify_filter".to_string(), - HepBatchStrategy::once_topdown(), - vec![NormalizationRuleImpl::SimplifyFilter], - ) - .find_best::(None)?; + let best_plan = run_with_single_batch( + plan.clone(), + "test_simplify_filter", + HepBatchStrategy::once_topdown(), + vec![NormalizationRuleImpl::SimplifyFilter], + )?; let filter_op = best_plan.childrens.pop_only(); if let Operator::Filter(filter_op) = filter_op.operator { diff --git a/src/optimizer/rule/normalization/sort_elimination.rs b/src/optimizer/rule/normalization/sort_elimination.rs new file mode 100644 index 00000000..bf454ae1 --- /dev/null +++ b/src/optimizer/rule/normalization/sort_elimination.rs @@ -0,0 +1,412 @@ +// Copyright 2024 KipData/KiteSQL +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use crate::catalog::ColumnRef; +use crate::errors::DatabaseError; +use crate::optimizer::core::pattern::{Pattern, PatternChildrenPredicate}; +use crate::optimizer::core::rule::{MatchPattern, NormalizationRule}; +use crate::optimizer::plan_utils::{only_child_mut, replace_with_only_child}; +use crate::planner::operator::sort::SortField; +use crate::planner::operator::{Operator, PhysicalOption, PlanImpl, SortOption}; +use crate::planner::{Childrens, LogicalPlan}; +use std::sync::LazyLock; + +static REDUNDANT_SORT_PATTERN: LazyLock = LazyLock::new(|| Pattern { + predicate: |op| matches!(op, Operator::Sort(_)), + children: PatternChildrenPredicate::None, +}); + +pub struct EliminateRedundantSort; + +impl MatchPattern for EliminateRedundantSort { + fn pattern(&self) -> &Pattern { + &REDUNDANT_SORT_PATTERN + } +} + +impl NormalizationRule for EliminateRedundantSort { + fn apply(&self, plan: &mut LogicalPlan) -> Result { + let sort_fields = match &plan.operator { + Operator::Sort(sort_op) => sort_op.sort_fields.clone(), + _ => return Ok(false), + }; + + let child = match only_child_mut(plan) { + Some(child) => child, + None => return Ok(false), + }; + mark_sort_preserving_indexes(child, &sort_fields); + let can_remove = ensure_index_order(child, &sort_fields); + + if !can_remove { + return Ok(false); + } + + Ok(replace_with_only_child(plan)) + } +} + +pub fn annotate_sort_preserving_indexes(plan: &mut LogicalPlan) { + fn visit(plan: &mut LogicalPlan) { + if let Operator::Sort(sort_op) = &plan.operator { + let sort_fields = sort_op.sort_fields.clone(); + mark_sort_preserving_indexes(plan, &sort_fields); + } + match plan.childrens.as_mut() { + Childrens::Only(child) => visit(child), + Childrens::Twins { left, right } => { + visit(left); + visit(right); + } + Childrens::None => {} + } + } + visit(plan); +} + +fn mark_sort_preserving_indexes(plan: &mut LogicalPlan, required: &[SortField]) { + if required.is_empty() { + return; + } + + match &mut plan.operator { + Operator::Filter(_) + | Operator::Project(_) + | Operator::Limit(_) + | Operator::TopK(_) + | Operator::Sort(_) => { + if let Childrens::Only(child) = plan.childrens.as_mut() { + mark_sort_preserving_indexes(child, required); + } + } + Operator::TableScan(scan_op) => { + let table_columns: Vec = scan_op.columns.values().cloned().collect(); + let required_from_table = required.iter().all(|field| { + let referenced = field.expr.referenced_columns(true); + referenced + .iter() + .all(|column| table_columns.contains(column)) + }); + if !required_from_table { + return; + } + for index_info in scan_op.index_infos.iter_mut() { + if covers(required, &index_info.sort_option) { + let covered = required.len(); + index_info.sort_elimination_hint = Some( + index_info + .sort_elimination_hint + .map_or(covered, |old| old.max(covered)), + ); + } + } + } + _ => {} + } +} + +fn ensure_index_order(plan: &mut LogicalPlan, required: &[SortField]) -> bool { + if let Some(PhysicalOption { + plan: PlanImpl::IndexScan(index_info), + .. + }) = plan.physical_option.as_ref() + { + if covers(required, &index_info.sort_option) { + return true; + } + } + + if let Some(physical_option) = plan.physical_option.as_ref() { + if matches!(physical_option.sort_option(), SortOption::Follow) { + if let Childrens::Only(child) = plan.childrens.as_mut() { + if ensure_index_order(child, required) { + return true; + } + } + } + } + + false +} + +fn covers(required: &[SortField], provided: &SortOption) -> bool { + if required.is_empty() { + return true; + } + + match provided { + SortOption::OrderBy { + fields, + ignore_prefix_len, + } => { + if fields.is_empty() { + return false; + } + let max_skip = (*ignore_prefix_len).min(fields.len()); + + for skip in 0..=max_skip { + if fields.len() < skip + required.len() { + continue; + } + if required + .iter() + .zip(fields.iter().skip(skip)) + .all(|(lhs, rhs)| lhs == rhs) + { + return true; + } + } + false + } + SortOption::Follow | SortOption::None => false, + } +} + +#[cfg(all(test, not(target_arch = "wasm32")))] +mod tests { + use super::EliminateRedundantSort; + use crate::catalog::{ColumnCatalog, ColumnRef, TableName}; + use crate::errors::DatabaseError; + use crate::expression::range_detacher::Range; + use crate::expression::ScalarExpression; + use crate::optimizer::core::rule::NormalizationRule; + use crate::planner::operator::filter::FilterOperator; + use crate::planner::operator::sort::{SortField, SortOperator}; + use crate::planner::operator::table_scan::TableScanOperator; + use crate::planner::operator::{Operator, PhysicalOption, PlanImpl, SortOption}; + use crate::planner::{Childrens, LogicalPlan}; + use crate::types::index::{IndexInfo, IndexMeta, IndexType}; + use crate::types::value::DataValue; + use crate::types::LogicalType; + use std::collections::BTreeMap; + use std::ops::Bound; + use std::sync::Arc; + use ulid::Ulid; + + fn make_sort_field(name: &str) -> SortField { + let column = ColumnRef::from(ColumnCatalog::new_dummy(name.to_string())); + SortField::new(ScalarExpression::column_expr(column), true, true) + } + + fn build_plan( + required_fields: Vec, + index_fields: Vec, + ignore_prefix_len: usize, + ) -> LogicalPlan { + let (index_info, index_sort_option) = build_index_info(index_fields, ignore_prefix_len); + + let mut leaf = LogicalPlan::new(Operator::Dummy, Childrens::None); + leaf.physical_option = Some(PhysicalOption::new( + PlanImpl::IndexScan(index_info), + index_sort_option, + )); + + let mut filter = LogicalPlan::new( + Operator::Filter(FilterOperator { + predicate: ScalarExpression::Constant(DataValue::Boolean(true)), + is_optimized: false, + having: false, + }), + Childrens::Only(Box::new(leaf)), + ); + filter.physical_option = Some(PhysicalOption::new(PlanImpl::Filter, SortOption::Follow)); + + LogicalPlan::new( + Operator::Sort(SortOperator { + sort_fields: required_fields, + limit: None, + }), + Childrens::Only(Box::new(filter)), + ) + } + + fn build_index_info( + index_fields: Vec, + ignore_prefix_len: usize, + ) -> (IndexInfo, SortOption) { + let len = index_fields.len(); + let sort_option = SortOption::OrderBy { + fields: index_fields, + ignore_prefix_len, + }; + let table_name: TableName = Arc::from("t1"); + let meta = Arc::new(IndexMeta { + id: 1, + column_ids: (0..len).map(|_| Ulid::new()).collect(), + table_name, + pk_ty: LogicalType::Integer, + value_ty: LogicalType::Integer, + name: "idx".to_string(), + ty: IndexType::PrimaryKey { + is_multiple: len > 1, + }, + }); + let index_info = IndexInfo { + meta, + sort_option: sort_option.clone(), + range: None, + covered_deserializers: None, + cover_mapping: None, + sort_elimination_hint: None, + }; + (index_info, sort_option) + } + + #[test] + fn remove_sort_when_index_matches_order() -> Result<(), DatabaseError> { + let sort_field = make_sort_field("c1"); + let mut plan = build_plan(vec![sort_field.clone()], vec![sort_field], 0); + let rule = EliminateRedundantSort; + + assert!(rule.apply(&mut plan)?); + assert!(matches!(plan.operator, Operator::Filter(_))); + Ok(()) + } + + #[test] + fn remove_sort_when_prefix_can_be_ignored() -> Result<(), DatabaseError> { + let c1 = make_sort_field("c1"); + let c2 = make_sort_field("c2"); + let mut plan = build_plan(vec![c2.clone()], vec![c1, c2], 1); + super::annotate_sort_preserving_indexes(&mut plan); + let rule = EliminateRedundantSort; + + assert!(rule.apply(&mut plan)?); + Ok(()) + } + + #[test] + fn annotate_sets_sort_hint_on_table_scan() -> Result<(), DatabaseError> { + let column = ColumnRef::from(ColumnCatalog::new_dummy("c1".to_string())); + let sort_field = SortField::new(ScalarExpression::column_expr(column.clone()), true, true); + let (index_info, _) = build_index_info(vec![sort_field.clone()], 0); + + let mut columns = BTreeMap::new(); + columns.insert(0, column); + let table_name: TableName = Arc::from("t"); + let table_scan = LogicalPlan::new( + Operator::TableScan(TableScanOperator { + table_name: table_name.clone(), + primary_keys: vec![], + columns, + limit: (None, None), + index_infos: vec![index_info], + with_pk: false, + }), + Childrens::None, + ); + + let mut plan = LogicalPlan::new( + Operator::Sort(SortOperator { + sort_fields: vec![sort_field], + limit: None, + }), + Childrens::Only(Box::new(table_scan)), + ); + + super::annotate_sort_preserving_indexes(&mut plan); + + let table_plan = plan.childrens.pop_only(); + match table_plan.operator { + Operator::TableScan(scan_op) => assert!( + scan_op + .index_infos + .iter() + .any(|info| info.sort_elimination_hint.is_some()), + "expected sort elimination hint on at least one index" + ), + _ => unreachable!("expected table scan under sort"), + } + Ok(()) + } + + #[test] + fn keep_sort_when_order_not_covered() -> Result<(), DatabaseError> { + let c1 = make_sort_field("c1"); + let c2 = make_sort_field("c2"); + let mut plan = build_plan(vec![c2.clone()], vec![c1.clone(), c2], 0); + super::annotate_sort_preserving_indexes(&mut plan); + let rule = EliminateRedundantSort; + + assert!(!rule.apply(&mut plan)?); + assert!(matches!(plan.operator, Operator::Sort(_))); + Ok(()) + } + + #[test] + fn promote_index_to_remove_sort() -> Result<(), DatabaseError> { + let column = ColumnRef::from(ColumnCatalog::new_dummy("c_first".to_string())); + let sort_field = SortField::new(ScalarExpression::column_expr(column.clone()), true, true); + let (mut index_info, _) = build_index_info(vec![sort_field.clone()], 0); + index_info.range = Some(Range::Scope { + min: Bound::Unbounded, + max: Bound::Unbounded, + }); + + let mut columns = BTreeMap::new(); + columns.insert(0, column); + + let mut scan_plan = LogicalPlan::new( + Operator::TableScan(TableScanOperator { + table_name: Arc::from("t"), + primary_keys: vec![], + columns, + limit: (None, None), + index_infos: vec![index_info], + with_pk: false, + }), + Childrens::None, + ); + if let Operator::TableScan(scan_op) = &scan_plan.operator { + let index_info = scan_op.index_infos[0].clone(); + scan_plan.physical_option = Some(PhysicalOption::new( + PlanImpl::IndexScan(index_info.clone()), + index_info.sort_option.clone(), + )); + } + + let mut filter = LogicalPlan::new( + Operator::Filter(FilterOperator { + predicate: ScalarExpression::Constant(DataValue::Boolean(true)), + is_optimized: false, + having: false, + }), + Childrens::Only(Box::new(scan_plan)), + ); + filter.physical_option = Some(PhysicalOption::new(PlanImpl::Filter, SortOption::Follow)); + + let mut plan = LogicalPlan::new( + Operator::Sort(SortOperator { + sort_fields: vec![sort_field], + limit: None, + }), + Childrens::Only(Box::new(filter)), + ); + + super::annotate_sort_preserving_indexes(&mut plan); + let rule = EliminateRedundantSort; + assert!(rule.apply(&mut plan)?); + assert!(matches!(plan.operator, Operator::Filter(_))); + + let table_plan = plan.childrens.pop_only(); + assert!(matches!( + table_plan.physical_option, + Some(PhysicalOption { + plan: PlanImpl::IndexScan(_), + .. + }) + )); + Ok(()) + } +} diff --git a/src/planner/operator/mod.rs b/src/planner/operator/mod.rs index d109dcba..74b3437c 100644 --- a/src/planner/operator/mod.rs +++ b/src/planner/operator/mod.rs @@ -63,6 +63,7 @@ use crate::planner::operator::except::ExceptOperator; use crate::planner::operator::function_scan::FunctionScanOperator; use crate::planner::operator::insert::InsertOperator; use crate::planner::operator::join::JoinCondition; +use crate::planner::operator::sort::SortField; use crate::planner::operator::top_k::TopKOperator; use crate::planner::operator::truncate::TruncateOperator; use crate::planner::operator::union::UnionOperator; @@ -115,7 +116,36 @@ pub enum Operator { } #[derive(Debug, PartialEq, Eq, Clone, Hash, ReferenceSerialization)] -pub enum PhysicalOption { +pub enum SortOption { + OrderBy { + fields: Vec, + // When indexing, the output columns can ignore the order of the first few columns due to equality queries in the range prefix, thus satisfying diverse sort_fields. + // e.g.: index (c1, c2, c3) range where c1 = 1, c2 = 2, c3 > 3, + // sort_fields can be c1, c2, c3, or even just c2, c3, in which case ignore_prefix_len is 2. + ignore_prefix_len: usize, + }, + Follow, + None, +} + +#[derive(Debug, PartialEq, Eq, Clone, Hash, ReferenceSerialization)] +pub struct PhysicalOption { + pub plan: PlanImpl, + sort_option: SortOption, +} + +impl PhysicalOption { + pub fn new(plan: PlanImpl, sort_option: SortOption) -> Self { + Self { plan, sort_option } + } + + pub fn sort_option(&self) -> &SortOption { + &self.sort_option + } +} + +#[derive(Debug, PartialEq, Eq, Clone, Hash, ReferenceSerialization)] +pub enum PlanImpl { Dummy, SimpleAggregate, HashAggregate, @@ -335,34 +365,63 @@ impl fmt::Display for Operator { } impl fmt::Display for PhysicalOption { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + write!(f, "{} => (Sort Option: {})", self.plan, self.sort_option)?; + Ok(()) + } +} + +impl fmt::Display for SortOption { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + match self { + SortOption::OrderBy { + fields, + ignore_prefix_len, + } => { + write!(f, "OrderBy: (")?; + for (i, sort_field) in fields.iter().enumerate() { + write!(f, "{sort_field}")?; + if fields.len() - 1 != i { + write!(f, ", ")?; + } + } + write!(f, ") ignore_prefix_len: {ignore_prefix_len}") + } + SortOption::Follow => write!(f, "Follow"), + SortOption::None => write!(f, "None"), + } + } +} + +impl fmt::Display for PlanImpl { fn fmt(&self, f: &mut Formatter) -> fmt::Result { match self { - PhysicalOption::Dummy => write!(f, "Dummy"), - PhysicalOption::SimpleAggregate => write!(f, "SimpleAggregate"), - PhysicalOption::HashAggregate => write!(f, "HashAggregate"), - PhysicalOption::Filter => write!(f, "Filter"), - PhysicalOption::HashJoin => write!(f, "HashJoin"), - PhysicalOption::NestLoopJoin => write!(f, "NestLoopJoin"), - PhysicalOption::Project => write!(f, "Project"), - PhysicalOption::SeqScan => write!(f, "SeqScan"), - PhysicalOption::FunctionScan => write!(f, "FunctionScan"), - PhysicalOption::IndexScan(index) => write!(f, "IndexScan By {index}"), - PhysicalOption::Sort => write!(f, "Sort"), - PhysicalOption::Limit => write!(f, "Limit"), - PhysicalOption::TopK => write!(f, "TopK"), - PhysicalOption::Values => write!(f, "Values"), - PhysicalOption::Insert => write!(f, "Insert"), - PhysicalOption::Update => write!(f, "Update"), - PhysicalOption::Delete => write!(f, "Delete"), - PhysicalOption::AddColumn => write!(f, "AddColumn"), - PhysicalOption::DropColumn => write!(f, "DropColumn"), - PhysicalOption::CreateTable => write!(f, "CreateTable"), - PhysicalOption::DropTable => write!(f, "DropTable"), - PhysicalOption::Truncate => write!(f, "Truncate"), - PhysicalOption::Show => write!(f, "Show"), - PhysicalOption::CopyFromFile => write!(f, "CopyFromFile"), - PhysicalOption::CopyToFile => write!(f, "CopyToFile"), - PhysicalOption::Analyze => write!(f, "Analyze"), + PlanImpl::Dummy => write!(f, "Dummy"), + PlanImpl::SimpleAggregate => write!(f, "SimpleAggregate"), + PlanImpl::HashAggregate => write!(f, "HashAggregate"), + PlanImpl::Filter => write!(f, "Filter"), + PlanImpl::HashJoin => write!(f, "HashJoin"), + PlanImpl::NestLoopJoin => write!(f, "NestLoopJoin"), + PlanImpl::Project => write!(f, "Project"), + PlanImpl::SeqScan => write!(f, "SeqScan"), + PlanImpl::FunctionScan => write!(f, "FunctionScan"), + PlanImpl::IndexScan(index) => write!(f, "IndexScan By {index}"), + PlanImpl::Sort => write!(f, "Sort"), + PlanImpl::Limit => write!(f, "Limit"), + PlanImpl::TopK => write!(f, "TopK"), + PlanImpl::Values => write!(f, "Values"), + PlanImpl::Insert => write!(f, "Insert"), + PlanImpl::Update => write!(f, "Update"), + PlanImpl::Delete => write!(f, "Delete"), + PlanImpl::AddColumn => write!(f, "AddColumn"), + PlanImpl::DropColumn => write!(f, "DropColumn"), + PlanImpl::CreateTable => write!(f, "CreateTable"), + PlanImpl::DropTable => write!(f, "DropTable"), + PlanImpl::Truncate => write!(f, "Truncate"), + PlanImpl::Show => write!(f, "Show"), + PlanImpl::CopyFromFile => write!(f, "CopyFromFile"), + PlanImpl::CopyToFile => write!(f, "CopyToFile"), + PlanImpl::Analyze => write!(f, "Analyze"), } } } diff --git a/src/planner/operator/table_scan.rs b/src/planner/operator/table_scan.rs index c9d2ca14..2d1f8497 100644 --- a/src/planner/operator/table_scan.rs +++ b/src/planner/operator/table_scan.rs @@ -12,8 +12,11 @@ // See the License for the specific language governing permissions and // limitations under the License. -use super::Operator; +use super::{Operator, SortOption}; use crate::catalog::{ColumnRef, TableCatalog, TableName}; +use crate::errors::DatabaseError; +use crate::expression::ScalarExpression; +use crate::planner::operator::sort::SortField; use crate::planner::{Childrens, LogicalPlan}; use crate::storage::Bounds; use crate::types::index::IndexInfo; @@ -44,7 +47,7 @@ impl TableScanOperator { table_name: TableName, table_catalog: &TableCatalog, with_pk: bool, - ) -> LogicalPlan { + ) -> Result { let primary_keys = table_catalog .primary_keys() .iter() @@ -56,18 +59,35 @@ impl TableScanOperator { .enumerate() .map(|(i, column)| (i, column.clone())) .collect(); - let index_infos = table_catalog - .indexes - .iter() - .map(|meta| IndexInfo { - meta: meta.clone(), + let mut index_infos = Vec::with_capacity(table_catalog.indexes.len()); + + for index_meta in table_catalog.indexes.iter() { + let mut sort_fields = Vec::with_capacity(index_meta.column_ids.len()); + for col_id in &index_meta.column_ids { + let column = table_catalog.get_column_by_id(col_id).ok_or_else(|| { + DatabaseError::ColumnNotFound(format!("index column id: {col_id} not found")) + })?; + sort_fields.push(SortField { + expr: ScalarExpression::column_expr(column.clone()), + asc: true, + nulls_first: true, + }) + } + + index_infos.push(IndexInfo { + meta: index_meta.clone(), + sort_option: SortOption::OrderBy { + fields: sort_fields, + ignore_prefix_len: 0, + }, range: None, covered_deserializers: None, cover_mapping: None, - }) - .collect_vec(); + sort_elimination_hint: None, + }); + } - LogicalPlan::new( + Ok(LogicalPlan::new( Operator::TableScan(TableScanOperator { index_infos, table_name, @@ -77,7 +97,7 @@ impl TableScanOperator { with_pk, }), Childrens::None, - ) + )) } } diff --git a/src/storage/mod.rs b/src/storage/mod.rs index 2d1aebec..a35df93b 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -81,13 +81,13 @@ pub trait Transaction: Sized { let table = self .table(table_cache, table_name.clone())? .ok_or(DatabaseError::TableNotFound)?; - if columns.is_empty() || with_pk { + if columns.is_empty() { for (i, column) in table.primary_keys() { columns.insert(*i, column.clone()); } } - let (deserializers, remap_pk_indices) = - Self::create_deserializers(&columns, table, with_pk); + let deserializers = Self::create_deserializers(&columns, table); + let pk_ty = with_pk.then(|| table.primary_keys_type().clone()); let (min, max) = unsafe { &*self.table_codec() }.tuple_bound(&table_name); let iter = self.range(Bound::Included(min), Bound::Included(max))?; @@ -95,7 +95,7 @@ pub trait Transaction: Sized { Ok(TupleIter { offset: bounds.0.unwrap_or(0), limit: bounds.1, - remap_pk_indices, + pk_ty, deserializers, values_len: columns.len(), total_len: table.columns_len(), @@ -130,7 +130,7 @@ pub trait Transaction: Sized { columns.insert(*i, column.clone()); } } - let (inner, deserializers, remap_pk_indices, cover_mapping) = + let (inner, deserializers, cover_mapping) = match (covered_deserializers, cover_mapping_indices) { (Some(deserializers), mapping) => { let tuple_len = match &index_meta.value_ty { @@ -142,26 +142,18 @@ pub trait Transaction: Sized { ( IndexImplEnum::Covered(CoveredIndexImpl), deserializers, - PrimaryKeyRemap::Covered, cover_mapping, ) } (None, _) => { - let (deserializers, remap_pk_indices) = - Self::create_deserializers(&columns, table, with_pk); - ( - IndexImplEnum::instance(index_meta.ty), - deserializers, - remap_pk_indices, - None, - ) + let deserializers = Self::create_deserializers(&columns, table); + (IndexImplEnum::instance(index_meta.ty), deserializers, None) } }; Ok(IndexIter { offset, limit: limit_option, - remap_pk_indices, params: IndexImplParams { index_meta, table_name, @@ -170,6 +162,7 @@ pub trait Transaction: Sized { total_len: table.columns_len(), tx: self, cover_mapping, + with_pk, }, inner, ranges: ranges.into_iter(), @@ -180,12 +173,8 @@ pub trait Transaction: Sized { fn create_deserializers( columns: &BTreeMap, table: &TableCatalog, - with_pk: bool, - ) -> (Vec, PrimaryKeyRemap) { - let primary_keys_indices = table.primary_keys_indices(); - + ) -> Vec { let mut deserializers = Vec::with_capacity(columns.len()); - let mut projections = Vec::with_capacity(columns.len()); let mut last_projection = None; for (projection, column) in columns.iter() { let (start, end) = last_projection @@ -198,23 +187,10 @@ pub trait Transaction: Sized { for skip_column in table.schema_ref()[start..end].iter() { deserializers.push(skip_column.datatype().skip_serializable()); } - if with_pk { - projections.push(*projection); - } deserializers.push(column.datatype().serializable()); last_projection = Some(*projection); } - let remap_pk = if with_pk { - PrimaryKeyRemap::Indices( - primary_keys_indices - .iter() - .filter_map(|pk| projections.binary_search(pk).ok()) - .collect_vec(), - ) - } else { - PrimaryKeyRemap::None - }; - (deserializers, remap_pk) + deserializers } fn add_index_meta( @@ -804,14 +780,12 @@ trait IndexImpl<'bytes, T: Transaction + 'bytes> { &self, key: &Bytes, value: &Bytes, - pk_indices: &PrimaryKeyRemap, params: &IndexImplParams, ) -> Result; fn eq_to_res<'a>( &self, value: &DataValue, - pk_indices: &PrimaryKeyRemap, params: &IndexImplParams<'a, T>, ) -> Result, DatabaseError>; @@ -879,6 +853,7 @@ struct IndexImplParams<'a, T: Transaction> { total_len: usize, tx: &'a T, cover_mapping: Option, + with_pk: bool, } impl IndexImplParams<'_, T> { @@ -901,11 +876,7 @@ impl IndexImplParams<'_, T> { Ok(val) } - fn get_tuple_by_id( - &self, - pk_indices: &PrimaryKeyRemap, - tuple_id: &TupleId, - ) -> Result, DatabaseError> { + fn get_tuple_by_id(&self, tuple_id: &TupleId) -> Result, DatabaseError> { let key = unsafe { &*self.table_codec() }.encode_tuple_key(self.table_name, tuple_id)?; self.tx @@ -913,7 +884,7 @@ impl IndexImplParams<'_, T> { .map(|bytes| { TableCodec::decode_tuple( &self.deserializers, - pk_indices, + Some(tuple_id.clone()), &bytes, self.values_len, self.total_len, @@ -933,30 +904,28 @@ impl<'bytes, T: Transaction + 'bytes> IndexImpl<'bytes, T> for IndexImplEnum { &self, key: &Bytes, value: &Bytes, - pk_indices: &PrimaryKeyRemap, params: &IndexImplParams, ) -> Result { match self { - IndexImplEnum::PrimaryKey(inner) => inner.index_lookup(key, value, pk_indices, params), - IndexImplEnum::Unique(inner) => inner.index_lookup(key, value, pk_indices, params), - IndexImplEnum::Normal(inner) => inner.index_lookup(key, value, pk_indices, params), - IndexImplEnum::Composite(inner) => inner.index_lookup(key, value, pk_indices, params), - IndexImplEnum::Covered(inner) => inner.index_lookup(key, value, pk_indices, params), + IndexImplEnum::PrimaryKey(inner) => inner.index_lookup(key, value, params), + IndexImplEnum::Unique(inner) => inner.index_lookup(key, value, params), + IndexImplEnum::Normal(inner) => inner.index_lookup(key, value, params), + IndexImplEnum::Composite(inner) => inner.index_lookup(key, value, params), + IndexImplEnum::Covered(inner) => inner.index_lookup(key, value, params), } } fn eq_to_res<'a>( &self, value: &DataValue, - pk_indices: &PrimaryKeyRemap, params: &IndexImplParams<'a, T>, ) -> Result, DatabaseError> { match self { - IndexImplEnum::PrimaryKey(inner) => inner.eq_to_res(value, pk_indices, params), - IndexImplEnum::Unique(inner) => inner.eq_to_res(value, pk_indices, params), - IndexImplEnum::Normal(inner) => inner.eq_to_res(value, pk_indices, params), - IndexImplEnum::Composite(inner) => inner.eq_to_res(value, pk_indices, params), - IndexImplEnum::Covered(inner) => inner.eq_to_res(value, pk_indices, params), + IndexImplEnum::PrimaryKey(inner) => inner.eq_to_res(value, params), + IndexImplEnum::Unique(inner) => inner.eq_to_res(value, params), + IndexImplEnum::Normal(inner) => inner.eq_to_res(value, params), + IndexImplEnum::Composite(inner) => inner.eq_to_res(value, params), + IndexImplEnum::Covered(inner) => inner.eq_to_res(value, params), } } @@ -979,14 +948,14 @@ impl<'bytes, T: Transaction + 'bytes> IndexImpl<'bytes, T> for IndexImplEnum { impl<'bytes, T: Transaction + 'bytes> IndexImpl<'bytes, T> for PrimaryKeyIndexImpl { fn index_lookup( &self, - _: &Bytes, + key: &Bytes, value: &Bytes, - pk_indices: &PrimaryKeyRemap, params: &IndexImplParams, ) -> Result { + let tuple_id = TableCodec::decode_tuple_key(key, ¶ms.index_meta.pk_ty)?; TableCodec::decode_tuple( ¶ms.deserializers, - pk_indices, + Some(tuple_id), value, params.values_len, params.total_len, @@ -996,16 +965,16 @@ impl<'bytes, T: Transaction + 'bytes> IndexImpl<'bytes, T> for PrimaryKeyIndexIm fn eq_to_res<'a>( &self, value: &DataValue, - pk_indices: &PrimaryKeyRemap, params: &IndexImplParams<'a, T>, ) -> Result, DatabaseError> { + let tuple_id = value.clone(); let tuple = params .tx .get(&unsafe { &*params.table_codec() }.encode_tuple_key(params.table_name, value)?)? .map(|bytes| { TableCodec::decode_tuple( ¶ms.deserializers, - pk_indices, + Some(tuple_id.clone()), &bytes, params.values_len, params.total_len, @@ -1028,12 +997,11 @@ impl<'bytes, T: Transaction + 'bytes> IndexImpl<'bytes, T> for PrimaryKeyIndexIm #[inline(always)] fn secondary_index_lookup( bytes: &Bytes, - pk_indices: &PrimaryKeyRemap, params: &IndexImplParams, ) -> Result { let tuple_id = TableCodec::decode_index(bytes)?; params - .get_tuple_by_id(pk_indices, &tuple_id)? + .get_tuple_by_id(&tuple_id)? .ok_or(DatabaseError::TupleIdNotFound(tuple_id)) } @@ -1042,16 +1010,14 @@ impl<'bytes, T: Transaction + 'bytes> IndexImpl<'bytes, T> for UniqueIndexImpl { &self, _: &Bytes, value: &Bytes, - pk_indices: &PrimaryKeyRemap, params: &IndexImplParams, ) -> Result { - secondary_index_lookup(value, pk_indices, params) + secondary_index_lookup(value, params) } fn eq_to_res<'a>( &self, value: &DataValue, - pk_indices: &PrimaryKeyRemap, params: &IndexImplParams<'a, T>, ) -> Result, DatabaseError> { let Some(bytes) = params.tx.get(&self.bound_key(params, value, false)?)? else { @@ -1059,7 +1025,7 @@ impl<'bytes, T: Transaction + 'bytes> IndexImpl<'bytes, T> for UniqueIndexImpl { }; let tuple_id = TableCodec::decode_index(&bytes)?; let tuple = params - .get_tuple_by_id(pk_indices, &tuple_id)? + .get_tuple_by_id(&tuple_id)? .ok_or(DatabaseError::TupleIdNotFound(tuple_id))?; Ok(IndexResult::Tuple(Some(tuple))) } @@ -1081,16 +1047,14 @@ impl<'bytes, T: Transaction + 'bytes> IndexImpl<'bytes, T> for NormalIndexImpl { &self, _: &Bytes, value: &Bytes, - pk_indices: &PrimaryKeyRemap, params: &IndexImplParams, ) -> Result { - secondary_index_lookup(value, pk_indices, params) + secondary_index_lookup(value, params) } fn eq_to_res<'a>( &self, value: &DataValue, - _: &PrimaryKeyRemap, params: &IndexImplParams<'a, T>, ) -> Result, DatabaseError> { eq_to_res_scope(self, value, params) @@ -1117,16 +1081,14 @@ impl<'bytes, T: Transaction + 'bytes> IndexImpl<'bytes, T> for CompositeIndexImp &self, _: &Bytes, value: &Bytes, - pk_indices: &PrimaryKeyRemap, params: &IndexImplParams, ) -> Result { - secondary_index_lookup(value, pk_indices, params) + secondary_index_lookup(value, params) } fn eq_to_res<'a>( &self, value: &DataValue, - _: &PrimaryKeyRemap, params: &IndexImplParams<'a, T>, ) -> Result, DatabaseError> { eq_to_res_scope(self, value, params) @@ -1153,7 +1115,6 @@ impl<'bytes, T: Transaction + 'bytes> IndexImpl<'bytes, T> for CoveredIndexImpl &self, key: &Bytes, value: &Bytes, - pk_indices: &PrimaryKeyRemap, params: &IndexImplParams, ) -> Result { let mapping = params @@ -1162,10 +1123,11 @@ impl<'bytes, T: Transaction + 'bytes> IndexImpl<'bytes, T> for CoveredIndexImpl .map(|mapping| mapping.as_ref()); let key = TableCodec::decode_index_key(key, params.value_ty(), mapping)?; - let mut tuple_id = None; - if matches!(pk_indices, PrimaryKeyRemap::Covered) { - tuple_id = Some(TableCodec::decode_index(value)?); - } + let tuple_id = if params.with_pk { + Some(TableCodec::decode_index(value)?) + } else { + None + }; let values = match key { DataValue::Tuple(vals, _) => vals, v => { @@ -1178,7 +1140,6 @@ impl<'bytes, T: Transaction + 'bytes> IndexImpl<'bytes, T> for CoveredIndexImpl fn eq_to_res<'a>( &self, value: &DataValue, - _: &PrimaryKeyRemap, params: &IndexImplParams<'a, T>, ) -> Result, DatabaseError> { eq_to_res_scope(self, value, params) @@ -1218,7 +1179,7 @@ fn eq_to_res_scope<'a, T: Transaction + 'a>( pub struct TupleIter<'a, T: Transaction + 'a> { offset: usize, limit: Option, - remap_pk_indices: PrimaryKeyRemap, + pk_ty: Option, deserializers: Vec, values_len: usize, total_len: usize, @@ -1235,16 +1196,21 @@ impl<'a, T: Transaction + 'a> Iter for TupleIter<'a, T> { } #[allow(clippy::never_loop)] - while let Some((_, value)) = self.iter.try_next()? { + while let Some((key, value)) = self.iter.try_next()? { if let Some(limit) = self.limit.as_mut() { if *limit == 0 { return Ok(None); } *limit -= 1; } + let tuple_id = if let Some(pk_ty) = &self.pk_ty { + Some(TableCodec::decode_tuple_key(&key, pk_ty)?) + } else { + None + }; let tuple = TableCodec::decode_tuple( &self.deserializers, - &self.remap_pk_indices, + tuple_id, &value, self.values_len, self.total_len, @@ -1257,17 +1223,9 @@ impl<'a, T: Transaction + 'a> Iter for TupleIter<'a, T> { } } -pub enum PrimaryKeyRemap { - None, - Covered, - Indices(Vec), -} - pub struct IndexIter<'a, T: Transaction> { offset: usize, limit: Option, - - remap_pk_indices: PrimaryKeyRemap, params: IndexImplParams<'a, T>, inner: IndexImplEnum, // for buffering data @@ -1369,11 +1327,7 @@ impl Iter for IndexIter<'_, T> { Range::Eq(mut val) => { val = self.params.try_cast(val)?; - match self.inner.eq_to_res( - &val, - &self.remap_pk_indices, - &self.params, - )? { + match self.inner.eq_to_res(&val, &self.params)? { IndexResult::Tuple(tuple) => { if Self::offset_move(&mut self.offset) { continue; @@ -1395,12 +1349,7 @@ impl Iter for IndexIter<'_, T> { continue; } Self::limit_sub(&mut self.limit); - let tuple = self.inner.index_lookup( - &key, - &value, - &self.remap_pk_indices, - &self.params, - )?; + let tuple = self.inner.index_lookup(&key, &value, &self.params)?; return Ok(Some(tuple)); } diff --git a/src/storage/rocksdb.rs b/src/storage/rocksdb.rs index 33ac2340..17ca1547 100644 --- a/src/storage/rocksdb.rs +++ b/src/storage/rocksdb.rs @@ -244,7 +244,7 @@ mod test { use crate::storage::rocksdb::RocksStorage; use crate::storage::{ IndexImplEnum, IndexImplParams, IndexIter, IndexIterState, Iter, PrimaryKeyIndexImpl, - PrimaryKeyRemap, Storage, Transaction, + Storage, Transaction, }; use crate::types::index::{IndexMeta, IndexType}; use crate::types::tuple::Tuple; @@ -369,9 +369,7 @@ mod test { let mut iter = IndexIter { offset: 0, limit: None, - remap_pk_indices: PrimaryKeyRemap::Indices(vec![0]), params: IndexImplParams { - deserializers, index_meta: Arc::new(IndexMeta { id: 0, column_ids: vec![*a_column_id], @@ -382,10 +380,12 @@ mod test { ty: IndexType::PrimaryKey { is_multiple: false }, }), table_name: &table.name, - tx: &transaction, + deserializers, values_len, - total_len: 1, + total_len: table.columns_len(), + tx: &transaction, cover_mapping: None, + with_pk: true, }, ranges: vec![ Range::Eq(DataValue::Int32(0)), @@ -595,7 +595,7 @@ mod test { } assert_eq!(tuples.len(), 1); - assert_eq!(tuples[0].pk, Some(target_pk)); + assert_eq!(tuples[0].pk, None); assert_eq!(tuples[0].values, vec![covered_value]); Ok(()) diff --git a/src/storage/table_codec.rs b/src/storage/table_codec.rs index 5708461f..a1ef5745 100644 --- a/src/storage/table_codec.rs +++ b/src/storage/table_codec.rs @@ -16,7 +16,7 @@ use crate::catalog::view::View; use crate::catalog::{ColumnRef, ColumnRelation, TableMeta}; use crate::errors::DatabaseError; use crate::serdes::{ReferenceSerialization, ReferenceTables}; -use crate::storage::{PrimaryKeyRemap, TableCache, Transaction}; +use crate::storage::{TableCache, Transaction}; use crate::types::index::{Index, IndexId, IndexMeta, IndexType, INDEX_ID_LEN}; use crate::types::serialize::TupleValueSerializableImpl; use crate::types::tuple::{Tuple, TupleId}; @@ -32,6 +32,10 @@ pub(crate) const BOUND_MIN_TAG: u8 = u8::MIN; pub(crate) const BOUND_MAX_TAG: u8 = u8::MAX; pub(crate) const NULL_TAG: u8 = 0u8; pub(crate) const NOTNULL_TAG: u8 = 1u8; +const TABLE_NAME_HASH_LEN: usize = 8; +const KEY_TYPE_TAG_LEN: usize = 1; +const KEY_BOUND_LEN: usize = 1; +const TUPLE_KEY_PREFIX_LEN: usize = TABLE_NAME_HASH_LEN + KEY_TYPE_TAG_LEN + KEY_BOUND_LEN; static ROOT_BYTES: LazyLock> = LazyLock::new(|| b"Root".to_vec()); static VIEW_BYTES: LazyLock> = LazyLock::new(|| b"View".to_vec()); @@ -298,15 +302,19 @@ impl TableCodec { Ok(key_prefix) } + pub fn decode_tuple_key(bytes: &[u8], pk_ty: &LogicalType) -> Result { + DataValue::memcomparable_decode(&mut Cursor::new(&bytes[TUPLE_KEY_PREFIX_LEN..]), pk_ty) + } + #[inline] pub fn decode_tuple( deserializers: &[TupleValueSerializableImpl], - pk_indices: &PrimaryKeyRemap, + tuple_id: Option, bytes: &[u8], values_len: usize, total_len: usize, ) -> Result { - Tuple::deserialize_from(deserializers, pk_indices, bytes, values_len, total_len) + Tuple::deserialize_from(deserializers, tuple_id, bytes, values_len, total_len) } pub fn encode_index_meta_key( @@ -405,7 +413,7 @@ impl TableCodec { mapping: Option>, ) -> Result { // Hash + TypeTag + Bound Min + Index Id Len + Bound Min - let start = 8 + 1 + 1 + 1 + INDEX_ID_LEN; + let start = TUPLE_KEY_PREFIX_LEN + INDEX_ID_LEN + KEY_BOUND_LEN; DataValue::memcomparable_decode_mapping(&mut Cursor::new(&bytes[start..]), ty, mapping) } @@ -570,7 +578,7 @@ mod tests { use crate::serdes::ReferenceTables; use crate::storage::rocksdb::RocksTransaction; use crate::storage::table_codec::{BumpBytes, TableCodec}; - use crate::storage::{PrimaryKeyRemap, Storage}; + use crate::storage::Storage; use crate::types::index::{Index, IndexMeta, IndexType}; use crate::types::tuple::Tuple; use crate::types::value::DataValue; @@ -625,13 +633,7 @@ mod tests { tuple.pk = None; assert_eq!( - TableCodec::decode_tuple( - &deserializers, - &PrimaryKeyRemap::None, - &bytes, - deserializers.len(), - 2, - )?, + TableCodec::decode_tuple(&deserializers, None, &bytes, deserializers.len(), 2,)?, tuple ); diff --git a/src/types/index.rs b/src/types/index.rs index 001e2b25..78b81ded 100644 --- a/src/types/index.rs +++ b/src/types/index.rs @@ -16,6 +16,7 @@ use crate::catalog::{TableCatalog, TableName}; use crate::errors::DatabaseError; use crate::expression::range_detacher::Range; use crate::expression::ScalarExpression; +use crate::planner::operator::SortOption; use crate::types::serialize::TupleValueSerializableImpl; use crate::types::value::DataValue; use crate::types::{ColumnId, LogicalType}; @@ -40,9 +41,11 @@ pub enum IndexType { #[derive(Debug, Clone, Eq, PartialEq, Hash, ReferenceSerialization)] pub struct IndexInfo { pub(crate) meta: IndexMetaRef, + pub(crate) sort_option: SortOption, pub(crate) range: Option, pub(crate) covered_deserializers: Option>, pub(crate) cover_mapping: Option>, + pub(crate) sort_elimination_hint: Option, } #[derive(Debug, Clone, Eq, PartialEq, Hash, ReferenceSerialization)] diff --git a/src/types/tuple.rs b/src/types/tuple.rs index e38e47ee..13b4d1f7 100644 --- a/src/types/tuple.rs +++ b/src/types/tuple.rs @@ -16,7 +16,6 @@ use crate::catalog::ColumnRef; use crate::db::ResultIter; use crate::errors::DatabaseError; use crate::storage::table_codec::BumpBytes; -use crate::storage::PrimaryKeyRemap; use crate::types::serialize::{TupleValueSerializable, TupleValueSerializableImpl}; use crate::types::value::DataValue; use bumpalo::Bump; @@ -51,7 +50,7 @@ impl Tuple { #[inline] pub fn deserialize_from( deserializers: &[TupleValueSerializableImpl], - pk_remap: &PrimaryKeyRemap, + tuple_id: Option, bytes: &[u8], values_len: usize, total_len: usize, @@ -72,13 +71,10 @@ impl Tuple { } deserializer.filling_value(&mut cursor, &mut values)?; } - let pk = if let PrimaryKeyRemap::Indices(indices) = pk_remap { - Some(Tuple::primary_projection(indices, &values)) - } else { - None - }; - - Ok(Tuple { pk, values }) + Ok(Tuple { + pk: tuple_id, + values, + }) } /// e.g.: bits(u8)..|data_0(len for utf8_1)|utf8_0|data_1| @@ -154,7 +150,6 @@ pub fn create_table(iter: I) -> Result { #[cfg(all(test, not(target_arch = "wasm32")))] mod tests { use crate::catalog::{ColumnCatalog, ColumnDesc, ColumnRef}; - use crate::storage::PrimaryKeyRemap; use crate::types::tuple::Tuple; use crate::types::value::{DataValue, Utf8Type}; use crate::types::LogicalType; @@ -343,7 +338,7 @@ mod tests { { let tuple_0 = Tuple::deserialize_from( &serializers, - &PrimaryKeyRemap::Indices(vec![0]), + tuples[0].pk.clone(), &tuples[0].serialize_to(&serializers, &arena).unwrap(), serializers.len(), columns.len(), @@ -355,7 +350,7 @@ mod tests { { let tuple_1 = Tuple::deserialize_from( &serializers, - &PrimaryKeyRemap::Indices(vec![0]), + tuples[1].pk.clone(), &tuples[1].serialize_to(&serializers, &arena).unwrap(), serializers.len(), columns.len(), @@ -374,7 +369,7 @@ mod tests { ]; let tuple_2 = Tuple::deserialize_from( &projection_serializers, - &PrimaryKeyRemap::Indices(vec![0]), + tuples[0].pk.clone(), &tuples[0].serialize_to(&serializers, &arena).unwrap(), 2, columns.len(), @@ -396,11 +391,19 @@ mod tests { .take(5) .map(|column| column.datatype().serializable()) .collect_vec(); + let mut multi_pk_tuple = tuples[0].clone(); + multi_pk_tuple.pk = Some(DataValue::Tuple( + vec![ + multi_pk_tuple.values[4].clone(), + multi_pk_tuple.values[2].clone(), + ], + false, + )); let tuple_3 = Tuple::deserialize_from( &multiple_pk_serializers, - &PrimaryKeyRemap::Indices(vec![4, 2]), - &tuples[0].serialize_to(&serializers, &arena).unwrap(), + multi_pk_tuple.pk.clone(), + &multi_pk_tuple.serialize_to(&serializers, &arena).unwrap(), serializers.len(), columns.len(), ) @@ -409,17 +412,7 @@ mod tests { assert_eq!( tuple_3, Tuple { - pk: Some(DataValue::Tuple( - vec![ - DataValue::UInt16(1), - DataValue::Utf8 { - value: "LOL".to_string(), - ty: Utf8Type::Variable(Some(2)), - unit: CharLengthUnits::Octets, - }, - ], - false - )), + pk: multi_pk_tuple.pk.clone(), values: vec![ DataValue::Int32(0), DataValue::UInt32(1), diff --git a/tests/slt/crdb/join.slt b/tests/slt/crdb/join.slt index d9c13f42..c1066d3f 100644 --- a/tests/slt/crdb/join.slt +++ b/tests/slt/crdb/join.slt @@ -10,15 +10,15 @@ INSERT INTO onecolumn(id, x) VALUES (0, 44), (1, NULL), (2, 42) query II SELECT * FROM onecolumn AS a(aid, x) CROSS JOIN onecolumn AS b(bid, y) order by x ---- +1 null 0 44 +1 null 1 null +1 null 2 42 2 42 0 44 2 42 1 null 2 42 2 42 0 44 0 44 0 44 1 null 0 44 2 42 -1 null 0 44 -1 null 1 null -1 null 2 42 statement error SELECT x FROM onecolumn AS a, onecolumn AS b; @@ -44,16 +44,16 @@ SELECT * FROM onecolumn AS a NATURAL JOIN onecolumn as b order by a.x desc query II SELECT * FROM onecolumn AS a(aid, x) LEFT OUTER JOIN onecolumn AS b(bid, y) ON a.x = b.y order by a.x ---- +1 null null null 2 42 2 42 0 44 0 44 -1 null null null query I SELECT * FROM onecolumn AS a LEFT OUTER JOIN onecolumn AS b USING(x) ORDER BY x ---- +1 null null 2 42 2 0 44 0 -1 null null statement error SELECT * FROM onecolumn AS a, onecolumn AS b ORDER BY x @@ -61,30 +61,30 @@ SELECT * FROM onecolumn AS a, onecolumn AS b ORDER BY x query I SELECT * FROM onecolumn AS a NATURAL LEFT OUTER JOIN onecolumn AS b order by a.x ---- +1 null 2 42 0 44 -1 null query II SELECT * FROM onecolumn AS a(aid, x) RIGHT OUTER JOIN onecolumn AS b(bid, y) ON a.x = b.y order by x ---- +null null 1 null 2 42 2 42 0 44 0 44 -null null 1 null query I SELECT * FROM onecolumn AS a RIGHT OUTER JOIN onecolumn AS b USING(x) ORDER BY x ---- +null 1 null 2 2 42 0 0 44 -null 1 null query I SELECT * FROM onecolumn AS a NATURAL RIGHT OUTER JOIN onecolumn AS b order by x ---- +1 null 2 42 0 44 -1 null statement ok drop table if exists onecolumn_w @@ -117,52 +117,52 @@ INSERT INTO othercolumn(o_id, x) VALUES (0, 43),(1, 42),(2, 16) query II SELECT * FROM onecolumn AS a FULL OUTER JOIN othercolumn AS b ON a.x = b.x ORDER BY a.x,b.x ---- -2 42 1 42 -0 44 null null +1 null null null null null 2 16 null null 0 43 -1 null null null +2 42 1 42 +0 44 null null query II SELECT * FROM onecolumn AS a full OUTER JOIN othercolumn AS b ON a.x = b.x and a.x > 16 order by a.x ---- -2 42 1 42 -0 44 null null null null 0 43 null null 2 16 1 null null null +2 42 1 42 +0 44 null null query II SELECT * FROM onecolumn AS a full OUTER JOIN othercolumn AS b ON a.x = b.x and b.x > 16 order by b.x,a.x ---- +1 null null null +0 44 null null null null 2 16 2 42 1 42 null null 0 43 -0 44 null null -1 null null null query II SELECT a.x, b.x FROM onecolumn AS a full OUTER JOIN othercolumn AS b ON false order by a.x, b.x ---- -42 null -44 null +null null null 16 null 42 null 43 -null null +42 null +44 null query II SELECT a.x, b.x FROM onecolumn AS a full OUTER JOIN othercolumn AS b ON true order by a.x, b.x ---- +null 16 +null 42 +null 43 42 16 42 42 42 43 44 16 44 42 44 43 -null 16 -null 42 -null 43 # Tips: This case will make x take the value of both sides # query @@ -210,9 +210,9 @@ SELECT * FROM empty AS a JOIN onecolumn AS b USING(x) query IT SELECT * FROM onecolumn AS a(aid, x) LEFT OUTER JOIN empty AS b(bid, y) ON a.x = b.y ORDER BY a.x ---- +null null 1 null null null 2 42 null null 0 44 -null null 1 null query I rowsort SELECT * FROM onecolumn AS a LEFT OUTER JOIN empty AS b USING(x) ORDER BY x @@ -240,9 +240,9 @@ SELECT * FROM onecolumn AS a RIGHT OUTER JOIN empty AS b USING(x) query II SELECT * FROM empty AS a(aid, x) FULL OUTER JOIN onecolumn AS b(bid, y) ON a.x = b.y ORDER BY b.y ---- +null null 1 null null null 2 42 null null 0 44 -null null 1 null statement ok SELECT * FROM empty AS a FULL OUTER JOIN onecolumn AS b USING(x) ORDER BY x @@ -250,9 +250,9 @@ SELECT * FROM empty AS a FULL OUTER JOIN onecolumn AS b USING(x) ORDER BY x query IIII SELECT * FROM onecolumn AS a(aid, x) FULL OUTER JOIN empty AS b(bid, y) ON a.x = b.y ORDER BY a.x ---- +null null 1 null null null 2 42 null null 0 44 -null null 1 null query III rowsort SELECT * FROM onecolumn AS a FULL OUTER JOIN empty AS b USING(x) ORDER BY x @@ -264,9 +264,9 @@ null 2 42 query II SELECT * FROM empty AS a(aid, x) FULL OUTER JOIN onecolumn AS b(bid, y) ON a.x = b.y ORDER BY b.y ---- +null null 1 null null null 2 42 null null 0 44 -null null 1 null # query # SELECT * FROM empty AS a FULL OUTER JOIN onecolumn AS b USING(x) ORDER BY x @@ -302,23 +302,23 @@ SELECT o.x, t.y FROM onecolumn o INNER JOIN twocolumn t ON (o.x=t.x AND t.y=53) query IT SELECT o.x, t.y FROM onecolumn o LEFT OUTER JOIN twocolumn t ON (o.x=t.x AND t.y=53) order by o.x ---- +null null 42 53 44 null -null null query II SELECT o.x, t.y FROM onecolumn o LEFT OUTER JOIN twocolumn t ON (o.x=t.x AND o.x=44) order by o.x ---- +null null 42 null 44 51 -null null query II SELECT o.x, t.y FROM onecolumn o LEFT OUTER JOIN twocolumn t ON (o.x=t.x AND t.x=44) order by o.x ---- +null null 42 null 44 51 -null null # query # SELECT * FROM (SELECT x, 2 two FROM onecolumn) NATURAL FULL JOIN (SELECT x, y+1 plus1 FROM twocolumn) @@ -364,19 +364,19 @@ null null 2 4 false query III SELECT * FROM a FULL OUTER JOIN b ON a.i = b.i order by b ---- +0 1 null null null null null 2 4 false 1 2 0 2 true 2 3 1 3 true -0 1 null null null query III SELECT * FROM a FULL OUTER JOIN b ON (a.i = b.i and a.i>2) ORDER BY a.i, b.i ---- +null null 0 2 true +null null 2 4 false 0 1 null null null 1 2 null null null 2 3 1 3 true -null null 0 2 true -null null 2 4 false statement ok INSERT INTO b VALUES (3, 3, false) @@ -392,11 +392,11 @@ null null 2 4 false query III SELECT * FROM a FULL OUTER JOIN b ON a.i=b.i ORDER BY b.i, b.b ---- +0 1 null null null 1 2 0 2 true 2 3 3 3 false 2 3 1 3 true null null 2 4 false -0 1 null null null # TODO # query IIIIII @@ -537,6 +537,10 @@ SELECT * FROM pairs, square WHERE pairs.a + pairs.b = square.sq query III SELECT a, b, n, sq FROM pairs FULL OUTER JOIN square ON pairs.a + pairs.b = square.sq order by a ---- +null null 1 1 +null null 4 16 +null null 5 25 +null null 6 36 1 1 null null 1 2 null null 1 3 2 4 @@ -552,10 +556,6 @@ SELECT a, b, n, sq FROM pairs FULL OUTER JOIN square ON pairs.a + pairs.b = squa 3 6 3 9 4 5 3 9 4 6 null null -null null 1 1 -null null 4 16 -null null 5 25 -null null 6 36 query IIII SELECT pairs.a, pairs.b, square.* FROM pairs FULL OUTER JOIN square ON pairs.a + pairs.b = square.sq WHERE pairs.b%2 <> square.sq%2 order by a @@ -584,7 +584,6 @@ SELECT * FROM (SELECT * FROM pairs LEFT JOIN square ON b = sq AND a > 1 AND n < query IIII SELECT * FROM (SELECT * FROM pairs RIGHT JOIN square ON b = sq AND a > 1 AND n < 6) WHERE (a IS NULL OR a > 2) AND n > 1 AND (a IS NULL OR a < sq) order by n ---- -null null null 2 4 10 3 4 2 4 null null null 3 9 null null null 4 16 diff --git a/tests/slt/crdb/order_by.slt b/tests/slt/crdb/order_by.slt index 716d6ab3..35fc5b64 100644 --- a/tests/slt/crdb/order_by.slt +++ b/tests/slt/crdb/order_by.slt @@ -10,23 +10,23 @@ INSERT INTO t VALUES (1, 9, true), (2, 8, false), (3, 7, NULL) query I SELECT c FROM t ORDER BY c ---- +null false true -null query I SELECT c FROM t ORDER BY c ---- +null false true -null query I SELECT c FROM t ORDER BY c DESC ---- +null true false -null query II SELECT a, b FROM t ORDER BY b @@ -354,20 +354,20 @@ null 6 query II SELECT x, y FROM xy ORDER BY x, y ---- -2 5 +null null +null 6 2 null +2 5 4 8 -null 6 -null null query IT SELECT x, y FROM xy ORDER BY x, y DESC NULLS FIRST ---- +null null +null 6 2 null 2 5 4 8 -null null -null 6 query IT SELECT x, y FROM xy ORDER BY x NULLS LAST, y DESC NULLS FIRST @@ -390,10 +390,10 @@ null null query TT SELECT x, y FROM xy ORDER BY x NULLS FIRST, y DESC ---- -null 6 null null -2 5 +null 6 2 null +2 5 4 8 query TI diff --git a/tests/slt/crdb/sqlite.slt b/tests/slt/crdb/sqlite.slt index b37840ea..b0b603a4 100644 --- a/tests/slt/crdb/sqlite.slt +++ b/tests/slt/crdb/sqlite.slt @@ -30,6 +30,7 @@ CREATE TABLE tab64784(pk INTEGER primary key, col0 INTEGER, col1 FLOAT, col2 VAR statement ok INSERT INTO tab64784 VALUES(0,212,202.62,'nshdy',212,208.79,'wsxfc'),(1,213,203.64,'xwfuo',213,209.26,'lyswz'),(2,214,204.82,'jnued',216,210.48,'qczzf'),(3,215,205.40,'jtijf',217,211.96,'dpugl'),(4,216,206.3,'dpdzk',219,212.43,'xfirg'),(5,218,207.43,'qpwyw',220,213.50,'fmgky'),(6,219,208.3,'uooxb',221,215.30,'xpmdy'),(7,220,209.54,'ndtbb',225,218.8,'ivqyw'),(8,221,210.65,'zjpts',226,219.82,'sezsm'),(9,222,211.57,'slaxq',227,220.91,'bdqyb') +# TODO: Order by column index query II SELECT pk, col0 FROM tab64784 WHERE (col0 IN (SELECT col3 FROM tab64784 WHERE col3 IS NULL OR (col1 < 22.54) OR col4 > 85.74) OR ((col4 IS NULL)) AND col3 < 8 OR (col4 > 82.93 AND (col0 <= 61) AND col0 > 94 AND col0 > 15)) ORDER BY 2 DESC ---- diff --git a/tests/slt/order_by.slt b/tests/slt/order_by.slt index 12745df5..e8cc4599 100644 --- a/tests/slt/order_by.slt +++ b/tests/slt/order_by.slt @@ -55,20 +55,20 @@ insert into t values (0, 1, 0), (1, 2, 2), (2, null, 5), (3, 2, null), (4, null, query II select v1, v2 from t order by v1 asc, v2 asc ---- +null null +null 5 1 0 -2 2 2 null -null 5 -null null +2 2 query II select v1, v2 from t order by v1 asc nulls last, v2 asc ---- 1 0 -2 2 2 null -null 5 +2 2 null null +null 5 statement ok drop table t diff --git a/tpcc/README.md b/tpcc/README.md index 657cb215..1de8d175 100644 --- a/tpcc/README.md +++ b/tpcc/README.md @@ -4,13 +4,13 @@ run `cargo run -p tpcc --release` to run tpcc - i9-13900HX - 32.0 GB - KIOXIA-EXCERIA PLUS G3 SSD -- Tips: TPCC currently only supports single thread +- Tips: Pass `--threads ` to run multiple worker threads (default: 8) ```shell -|New-Order| sc: 133498 lt: 0 fl: 1360 -|Payment| sc: 133473 lt: 0 fl: 0 -|Order-Status| sc: 13348 lt: 0 fl: 450 -|Delivery| sc: 13348 lt: 0 fl: 0 -|Stock-Level| sc: 13347 lt: 0 fl: 0 +|New-Order| sc: 445996 lt: 0 fl: 4649 +|Payment| sc: 445972 lt: 0 fl: 0 +|Order-Status| sc: 44597 lt: 0 fl: 622 +|Delivery| sc: 44597 lt: 0 fl: 0 +|Stock-Level| sc: 44597 lt: 0 fl: 0 in 720 sec. (all must be [OK]) [transaction percentage] @@ -24,113 +24,59 @@ in 720 sec. Order-Status: 100.0 [OK] Delivery: 100.0 [OK] Stock-Level: 100.0 [OK] - New-Order Total: 133498 - Payment Total: 133473 - Order-Status Total: 13348 - Delivery Total: 13348 - Stock-Level Total: 13347 + New-Order Total: 445996 + Payment Total: 445972 + Order-Status Total: 44597 + Delivery Total: 44597 + Stock-Level Total: 44597 1.New-Order -0.001, 83231 -0.002, 49784 -0.003, 36 -0.004, 4 -0.005, 2 +0.001, 282206 +0.002, 163359 +0.003, 95 +0.004, 6 2.Payment -0.001, 133281 -0.002, 184 -0.003, 2 +0.001, 444948 +0.002, 260 3.Order-Status -0.012, 31 -0.013, 265 -0.014, 332 -0.015, 307 -0.016, 296 -0.017, 284 -0.018, 303 -0.019, 415 -0.020, 386 -0.021, 382 -0.022, 252 -0.023, 228 -0.024, 264 -0.025, 249 -0.026, 268 -0.027, 253 -0.028, 246 -0.029, 277 -0.030, 253 -0.031, 237 -0.032, 289 -0.033, 172 -0.034, 192 -0.035, 268 -0.036, 266 -0.037, 276 -0.038, 243 -0.039, 223 -0.040, 216 -0.041, 225 -0.042, 248 -0.043, 193 -0.044, 174 -0.045, 307 -0.046, 305 -0.047, 246 -0.048, 213 -0.049, 267 -0.050, 197 -0.051, 182 -0.052, 207 -0.053, 84 -0.054, 42 -0.055, 54 -0.056, 102 -0.057, 156 -0.058, 165 -0.059, 199 -0.060, 195 -0.061, 173 -0.062, 141 -0.063, 102 -0.064, 56 -0.065, 29 -0.066, 8 -0.067, 6 -0.068, 2 -0.069, 3 -0.075, 1 -0.078, 1 +0.001, 36721 +0.002, 5254 +0.003, 1110 +0.004, 396 +0.005, 189 +0.006, 62 +0.007, 6 4.Delivery -0.001, 11129 -0.002, 1 +0.001, 43260 5.Stock-Level -0.001, 5697 -0.002, 4669 -0.003, 494 -0.004, 8 -0.005, 1 +0.001, 22651 +0.002, 18136 +0.003, 3223 +0.004, 108 +0.005, 5 +0.006, 4 +0.007, 1 <90th Percentile RT (MaxRT)> - New-Order : 0.002 (0.005) - Payment : 0.001 (0.003) -Order-Status : 0.057 (0.088) + New-Order : 0.002 (0.012) + Payment : 0.001 (0.002) +Order-Status : 0.002 (0.019) Delivery : 0.001 (0.001) - Stock-Level : 0.002 (0.006) + Stock-Level : 0.002 (0.018) -11125 Tpmc +37166 Tpmc ``` ## Explain