diff --git a/datafusion/catalog-listing/Cargo.toml b/datafusion/catalog-listing/Cargo.toml index be1374b371485..388cdac71f3c0 100644 --- a/datafusion/catalog-listing/Cargo.toml +++ b/datafusion/catalog-listing/Cargo.toml @@ -20,7 +20,7 @@ name = "datafusion-catalog-listing" description = "datafusion-catalog-listing" readme = "README.md" authors.workspace = true -edition.workspace = true +edition = "2024" homepage.workspace = true license.workspace = true repository.workspace = true diff --git a/datafusion/catalog-listing/src/config.rs b/datafusion/catalog-listing/src/config.rs index 3370d2ea75535..e3cd01a191924 100644 --- a/datafusion/catalog-listing/src/config.rs +++ b/datafusion/catalog-listing/src/config.rs @@ -19,9 +19,9 @@ use crate::options::ListingOptions; use arrow::datatypes::{DataType, Schema, SchemaRef}; use datafusion_catalog::Session; use datafusion_common::{config_err, internal_err}; +use datafusion_datasource::ListingTableUrl; use datafusion_datasource::file_compression_type::FileCompressionType; use datafusion_datasource::schema_adapter::SchemaAdapterFactory; -use datafusion_datasource::ListingTableUrl; use datafusion_physical_expr_adapter::PhysicalExprAdapterFactory; use std::str::FromStr; use std::sync::Arc; diff --git a/datafusion/catalog-listing/src/helpers.rs b/datafusion/catalog-listing/src/helpers.rs index 34073338fbd7e..ea016015cebd3 100644 --- a/datafusion/catalog-listing/src/helpers.rs +++ b/datafusion/catalog-listing/src/helpers.rs @@ -21,10 +21,10 @@ use std::mem; use std::sync::Arc; use datafusion_catalog::Session; -use datafusion_common::{assert_or_internal_err, HashMap, Result, ScalarValue}; +use datafusion_common::{HashMap, Result, ScalarValue, assert_or_internal_err}; use datafusion_datasource::ListingTableUrl; use datafusion_datasource::PartitionedFile; -use datafusion_expr::{lit, utils, BinaryExpr, Operator}; +use datafusion_expr::{BinaryExpr, Operator, lit, utils}; use arrow::{ array::AsArray, @@ -33,7 +33,7 @@ use arrow::{ }; use datafusion_expr::execution_props::ExecutionProps; use futures::stream::FuturesUnordered; -use futures::{stream::BoxStream, StreamExt, TryStreamExt}; +use futures::{StreamExt, TryStreamExt, stream::BoxStream}; use log::{debug, trace}; use datafusion_common::tree_node::{TreeNode, TreeNodeRecursion}; @@ -51,7 +51,7 @@ use object_store::{ObjectMeta, ObjectStore}; pub fn expr_applicable_for_cols(col_names: &[&str], expr: &Expr) -> bool { let mut is_applicable = true; expr.apply(|expr| match expr { - Expr::Column(Column { ref name, .. }) => { + Expr::Column(Column { name, .. }) => { is_applicable &= col_names.contains(&name.as_str()); if is_applicable { Ok(TreeNodeRecursion::Jump) @@ -247,16 +247,11 @@ fn populate_partition_values<'a>( partition_values: &mut HashMap<&'a str, PartitionValue>, filter: &'a Expr, ) { - if let Expr::BinaryExpr(BinaryExpr { - ref left, - op, - ref right, - }) = filter - { + if let Expr::BinaryExpr(BinaryExpr { left, op, right }) = filter { match op { Operator::Eq => match (left.as_ref(), right.as_ref()) { - (Expr::Column(Column { ref name, .. }), Expr::Literal(val, _)) - | (Expr::Literal(val, _), Expr::Column(Column { ref name, .. })) => { + (Expr::Column(Column { name, .. }), Expr::Literal(val, _)) + | (Expr::Literal(val, _), Expr::Column(Column { name, .. })) => { if partition_values .insert(name, PartitionValue::Single(val.to_string())) .is_some() @@ -466,7 +461,7 @@ mod tests { use std::ops::Not; use super::*; - use datafusion_expr::{case, col, lit, Expr}; + use datafusion_expr::{Expr, case, col, lit}; #[test] fn test_split_files() { diff --git a/datafusion/catalog-listing/src/options.rs b/datafusion/catalog-listing/src/options.rs index 7da8005f90ec2..146f98d62335e 100644 --- a/datafusion/catalog-listing/src/options.rs +++ b/datafusion/catalog-listing/src/options.rs @@ -18,12 +18,12 @@ use arrow::datatypes::{DataType, SchemaRef}; use datafusion_catalog::Session; use datafusion_common::plan_err; -use datafusion_datasource::file_format::FileFormat; use datafusion_datasource::ListingTableUrl; +use datafusion_datasource::file_format::FileFormat; use datafusion_execution::config::SessionConfig; use datafusion_expr::SortExpr; use futures::StreamExt; -use futures::{future, TryStreamExt}; +use futures::{TryStreamExt, future}; use itertools::Itertools; use std::sync::Arc; diff --git a/datafusion/catalog-listing/src/table.rs b/datafusion/catalog-listing/src/table.rs index 2e7453e4ff4d8..3f2860947177c 100644 --- a/datafusion/catalog-listing/src/table.rs +++ b/datafusion/catalog-listing/src/table.rs @@ -23,8 +23,8 @@ use async_trait::async_trait; use datafusion_catalog::{ScanArgs, ScanResult, Session, TableProvider}; use datafusion_common::stats::Precision; use datafusion_common::{ - internal_datafusion_err, plan_err, project_schema, Constraints, DataFusionError, - SchemaExt, Statistics, + Constraints, DataFusionError, SchemaExt, Statistics, internal_datafusion_err, + plan_err, project_schema, }; use datafusion_datasource::file::FileSource; use datafusion_datasource::file_groups::FileGroup; @@ -34,7 +34,7 @@ use datafusion_datasource::schema_adapter::{ DefaultSchemaAdapterFactory, SchemaAdapter, SchemaAdapterFactory, }; use datafusion_datasource::{ - compute_all_files_statistics, ListingTableUrl, PartitionedFile, TableSchema, + ListingTableUrl, PartitionedFile, TableSchema, compute_all_files_statistics, }; use datafusion_execution::cache::cache_manager::FileStatisticsCache; use datafusion_execution::cache::cache_unit::DefaultFileStatisticsCache; @@ -44,9 +44,9 @@ use datafusion_expr::{Expr, TableProviderFilterPushDown, TableType}; use datafusion_physical_expr::create_lex_ordering; use datafusion_physical_expr_adapter::PhysicalExprAdapterFactory; use datafusion_physical_expr_common::sort_expr::LexOrdering; -use datafusion_physical_plan::empty::EmptyExec; use datafusion_physical_plan::ExecutionPlan; -use futures::{future, stream, Stream, StreamExt, TryStreamExt}; +use datafusion_physical_plan::empty::EmptyExec; +use futures::{Stream, StreamExt, TryStreamExt, future, stream}; use object_store::ObjectStore; use std::any::Any; use std::collections::HashMap; @@ -493,7 +493,9 @@ impl TableProvider for ListingTable { if new_groups.len() <= self.options.target_partitions { partitioned_file_lists = new_groups; } else { - log::debug!("attempted to split file groups by statistics, but there were more file groups than target_partitions; falling back to unordered") + log::debug!( + "attempted to split file groups by statistics, but there were more file groups than target_partitions; falling back to unordered" + ) } } None => {} // no ordering required @@ -817,28 +819,25 @@ async fn get_files_with_limit( let file = file_result?; // Update file statistics regardless of state - if collect_stats { - if let Some(file_stats) = &file.statistics { - num_rows = if file_group.is_empty() { - // For the first file, just take its row count - file_stats.num_rows - } else { - // For subsequent files, accumulate the counts - num_rows.add(&file_stats.num_rows) - }; - } + if collect_stats && let Some(file_stats) = &file.statistics { + num_rows = if file_group.is_empty() { + // For the first file, just take its row count + file_stats.num_rows + } else { + // For subsequent files, accumulate the counts + num_rows.add(&file_stats.num_rows) + }; } // Always add the file to our group file_group.push(file); // Check if we've hit the limit (if one was specified) - if let Some(limit) = limit { - if let Precision::Exact(row_count) = num_rows { - if row_count > limit { - state = ProcessingState::ReachedLimit; - } - } + if let Some(limit) = limit + && let Precision::Exact(row_count) = num_rows + && row_count > limit + { + state = ProcessingState::ReachedLimit; } } // If we still have files in the stream, it means that the limit kicked diff --git a/datafusion/catalog/Cargo.toml b/datafusion/catalog/Cargo.toml index 1009e9aee477b..2d9f26ea50806 100644 --- a/datafusion/catalog/Cargo.toml +++ b/datafusion/catalog/Cargo.toml @@ -20,7 +20,7 @@ name = "datafusion-catalog" description = "datafusion-catalog" readme = "README.md" authors.workspace = true -edition.workspace = true +edition = "2024" homepage.workspace = true license.workspace = true repository.workspace = true diff --git a/datafusion/catalog/src/async.rs b/datafusion/catalog/src/async.rs index c2e760deab00c..1b8039d828fdb 100644 --- a/datafusion/catalog/src/async.rs +++ b/datafusion/catalog/src/async.rs @@ -18,7 +18,7 @@ use std::sync::Arc; use async_trait::async_trait; -use datafusion_common::{error::Result, not_impl_err, HashMap, TableReference}; +use datafusion_common::{HashMap, TableReference, error::Result, not_impl_err}; use datafusion_execution::config::SessionConfig; use crate::{CatalogProvider, CatalogProviderList, SchemaProvider, TableProvider}; @@ -60,7 +60,9 @@ impl SchemaProvider for ResolvedSchemaProvider { } fn deregister_table(&self, name: &str) -> Result>> { - not_impl_err!("Attempt to deregister table '{name}' with ResolvedSchemaProvider which is not supported") + not_impl_err!( + "Attempt to deregister table '{name}' with ResolvedSchemaProvider which is not supported" + ) } fn table_exist(&self, name: &str) -> bool { @@ -425,14 +427,14 @@ mod tests { use std::{ any::Any, sync::{ - atomic::{AtomicU32, Ordering}, Arc, + atomic::{AtomicU32, Ordering}, }, }; use arrow::datatypes::SchemaRef; use async_trait::async_trait; - use datafusion_common::{error::Result, Statistics, TableReference}; + use datafusion_common::{Statistics, TableReference, error::Result}; use datafusion_execution::config::SessionConfig; use datafusion_expr::{Expr, TableType}; use datafusion_physical_plan::ExecutionPlan; diff --git a/datafusion/catalog/src/catalog.rs b/datafusion/catalog/src/catalog.rs index 67d13d61006a7..bb9e89eba2fef 100644 --- a/datafusion/catalog/src/catalog.rs +++ b/datafusion/catalog/src/catalog.rs @@ -20,8 +20,8 @@ use std::fmt::Debug; use std::sync::Arc; pub use crate::schema::SchemaProvider; -use datafusion_common::not_impl_err; use datafusion_common::Result; +use datafusion_common::not_impl_err; /// Represents a catalog, comprising a number of named schemas. /// diff --git a/datafusion/catalog/src/default_table_source.rs b/datafusion/catalog/src/default_table_source.rs index 11963c06c88f5..fb6531ba0b2ee 100644 --- a/datafusion/catalog/src/default_table_source.rs +++ b/datafusion/catalog/src/default_table_source.rs @@ -23,7 +23,7 @@ use std::{any::Any, borrow::Cow}; use crate::TableProvider; use arrow::datatypes::SchemaRef; -use datafusion_common::{internal_err, Constraints}; +use datafusion_common::{Constraints, internal_err}; use datafusion_expr::{Expr, TableProviderFilterPushDown, TableSource, TableType}; /// Implements [`TableSource`] for a [`TableProvider`] diff --git a/datafusion/catalog/src/information_schema.rs b/datafusion/catalog/src/information_schema.rs index c955819790259..52bfeca3d4282 100644 --- a/datafusion/catalog/src/information_schema.rs +++ b/datafusion/catalog/src/information_schema.rs @@ -28,17 +28,17 @@ use arrow::{ record_batch::RecordBatch, }; use async_trait::async_trait; +use datafusion_common::DataFusionError; use datafusion_common::config::{ConfigEntry, ConfigOptions}; use datafusion_common::error::Result; use datafusion_common::types::NativeType; -use datafusion_common::DataFusionError; -use datafusion_execution::runtime_env::RuntimeEnv; use datafusion_execution::TaskContext; +use datafusion_execution::runtime_env::RuntimeEnv; use datafusion_expr::{AggregateUDF, ScalarUDF, Signature, TypeSignature, WindowUDF}; use datafusion_expr::{TableType, Volatility}; +use datafusion_physical_plan::SendableRecordBatchStream; use datafusion_physical_plan::stream::RecordBatchStreamAdapter; use datafusion_physical_plan::streaming::PartitionStream; -use datafusion_physical_plan::SendableRecordBatchStream; use std::collections::{BTreeSet, HashMap, HashSet}; use std::fmt::Debug; use std::{any::Any, sync::Arc}; @@ -138,11 +138,11 @@ impl InformationSchemaConfig { let catalog = self.catalog_list.catalog(&catalog_name).unwrap(); for schema_name in catalog.schema_names() { - if schema_name != INFORMATION_SCHEMA { - if let Some(schema) = catalog.schema(&schema_name) { - let schema_owner = schema.owner_name(); - builder.add_schemata(&catalog_name, &schema_name, schema_owner); - } + if schema_name != INFORMATION_SCHEMA + && let Some(schema) = catalog.schema(&schema_name) + { + let schema_owner = schema.owner_name(); + builder.add_schemata(&catalog_name, &schema_name, schema_owner); } } } @@ -1408,7 +1408,9 @@ mod tests { // InformationSchemaConfig::make_tables used this before `table_type` // existed but should not, as it may be expensive. async fn table(&self, _: &str) -> Result>> { - panic!("InformationSchemaConfig::make_tables called SchemaProvider::table instead of table_type") + panic!( + "InformationSchemaConfig::make_tables called SchemaProvider::table instead of table_type" + ) } fn as_any(&self) -> &dyn Any { diff --git a/datafusion/catalog/src/lib.rs b/datafusion/catalog/src/lib.rs index 3e866f84a5ec7..d1cd3998fecf1 100644 --- a/datafusion/catalog/src/lib.rs +++ b/datafusion/catalog/src/lib.rs @@ -48,13 +48,13 @@ mod dynamic_file; mod schema; mod table; +pub use r#async::*; pub use catalog::*; pub use datafusion_session::Session; pub use dynamic_file::catalog::*; pub use memory::{ MemTable, MemoryCatalogProvider, MemoryCatalogProviderList, MemorySchemaProvider, }; -pub use r#async::*; pub use schema::*; pub use table::*; diff --git a/datafusion/catalog/src/listing_schema.rs b/datafusion/catalog/src/listing_schema.rs index e30366bb3f493..77fbea8577089 100644 --- a/datafusion/catalog/src/listing_schema.rs +++ b/datafusion/catalog/src/listing_schema.rs @@ -26,7 +26,7 @@ use crate::{SchemaProvider, TableProvider, TableProviderFactory}; use crate::Session; use datafusion_common::{ - internal_datafusion_err, DFSchema, DataFusionError, HashMap, TableReference, + DFSchema, DataFusionError, HashMap, TableReference, internal_datafusion_err, }; use datafusion_expr::CreateExternalTable; diff --git a/datafusion/catalog/src/memory/schema.rs b/datafusion/catalog/src/memory/schema.rs index f1b3628f7affc..97a579b021617 100644 --- a/datafusion/catalog/src/memory/schema.rs +++ b/datafusion/catalog/src/memory/schema.rs @@ -20,7 +20,7 @@ use crate::{SchemaProvider, TableProvider}; use async_trait::async_trait; use dashmap::DashMap; -use datafusion_common::{exec_err, DataFusionError}; +use datafusion_common::{DataFusionError, exec_err}; use std::any::Any; use std::sync::Arc; diff --git a/datafusion/catalog/src/memory/table.rs b/datafusion/catalog/src/memory/table.rs index 90224f6a37bc3..47f773fe9befd 100644 --- a/datafusion/catalog/src/memory/table.rs +++ b/datafusion/catalog/src/memory/table.rs @@ -27,17 +27,17 @@ use crate::TableProvider; use arrow::datatypes::SchemaRef; use arrow::record_batch::RecordBatch; use datafusion_common::error::Result; -use datafusion_common::{not_impl_err, plan_err, Constraints, DFSchema, SchemaExt}; +use datafusion_common::{Constraints, DFSchema, SchemaExt, not_impl_err, plan_err}; use datafusion_common_runtime::JoinSet; use datafusion_datasource::memory::{MemSink, MemorySourceConfig}; use datafusion_datasource::sink::DataSinkExec; use datafusion_datasource::source::DataSourceExec; use datafusion_expr::dml::InsertOp; use datafusion_expr::{Expr, SortExpr, TableType}; -use datafusion_physical_expr::{create_physical_sort_exprs, LexOrdering}; +use datafusion_physical_expr::{LexOrdering, create_physical_sort_exprs}; use datafusion_physical_plan::repartition::RepartitionExec; use datafusion_physical_plan::{ - common, ExecutionPlan, ExecutionPlanProperties, Partitioning, + ExecutionPlan, ExecutionPlanProperties, Partitioning, common, }; use datafusion_session::Session; diff --git a/datafusion/catalog/src/schema.rs b/datafusion/catalog/src/schema.rs index 222b1d8ed784c..c6299582813b4 100644 --- a/datafusion/catalog/src/schema.rs +++ b/datafusion/catalog/src/schema.rs @@ -19,7 +19,7 @@ //! representing collections of named tables. use async_trait::async_trait; -use datafusion_common::{exec_err, DataFusionError}; +use datafusion_common::{DataFusionError, exec_err}; use std::any::Any; use std::fmt::Debug; use std::sync::Arc; diff --git a/datafusion/catalog/src/stream.rs b/datafusion/catalog/src/stream.rs index f4a2338b8eecb..bdd72a1b1d70b 100644 --- a/datafusion/catalog/src/stream.rs +++ b/datafusion/catalog/src/stream.rs @@ -28,7 +28,7 @@ use std::sync::Arc; use crate::{Session, TableProvider, TableProviderFactory}; use arrow::array::{RecordBatch, RecordBatchReader, RecordBatchWriter}; use arrow::datatypes::SchemaRef; -use datafusion_common::{config_err, plan_err, Constraints, DataFusionError, Result}; +use datafusion_common::{Constraints, DataFusionError, Result, config_err, plan_err}; use datafusion_common_runtime::SpawnedTask; use datafusion_datasource::sink::{DataSink, DataSinkExec}; use datafusion_execution::{SendableRecordBatchStream, TaskContext}; diff --git a/datafusion/catalog/src/streaming.rs b/datafusion/catalog/src/streaming.rs index 082e74dab9a15..31669171b291a 100644 --- a/datafusion/catalog/src/streaming.rs +++ b/datafusion/catalog/src/streaming.rs @@ -24,11 +24,11 @@ use crate::Session; use crate::TableProvider; use arrow::datatypes::SchemaRef; -use datafusion_common::{plan_err, DFSchema, Result}; +use datafusion_common::{DFSchema, Result, plan_err}; use datafusion_expr::{Expr, SortExpr, TableType}; -use datafusion_physical_expr::{create_physical_sort_exprs, LexOrdering}; -use datafusion_physical_plan::streaming::{PartitionStream, StreamingTableExec}; +use datafusion_physical_expr::{LexOrdering, create_physical_sort_exprs}; use datafusion_physical_plan::ExecutionPlan; +use datafusion_physical_plan::streaming::{PartitionStream, StreamingTableExec}; use async_trait::async_trait; use log::debug; diff --git a/datafusion/catalog/src/table.rs b/datafusion/catalog/src/table.rs index 11c9af01a7a54..cabdb22c62ae5 100644 --- a/datafusion/catalog/src/table.rs +++ b/datafusion/catalog/src/table.rs @@ -24,7 +24,7 @@ use crate::session::Session; use arrow::datatypes::SchemaRef; use async_trait::async_trait; use datafusion_common::Result; -use datafusion_common::{not_impl_err, Constraints, Statistics}; +use datafusion_common::{Constraints, Statistics, not_impl_err}; use datafusion_expr::Expr; use datafusion_expr::dml::InsertOp; diff --git a/datafusion/catalog/src/view.rs b/datafusion/catalog/src/view.rs index 89c6a4a224511..54c54431a5913 100644 --- a/datafusion/catalog/src/view.rs +++ b/datafusion/catalog/src/view.rs @@ -24,8 +24,8 @@ use crate::TableProvider; use arrow::datatypes::SchemaRef; use async_trait::async_trait; -use datafusion_common::error::Result; use datafusion_common::Column; +use datafusion_common::error::Result; use datafusion_expr::TableType; use datafusion_expr::{Expr, LogicalPlan}; use datafusion_expr::{LogicalPlanBuilder, TableProviderFilterPushDown}; diff --git a/datafusion/datasource-arrow/Cargo.toml b/datafusion/datasource-arrow/Cargo.toml index fbadc8708ca69..002408d643035 100644 --- a/datafusion/datasource-arrow/Cargo.toml +++ b/datafusion/datasource-arrow/Cargo.toml @@ -20,7 +20,7 @@ name = "datafusion-datasource-arrow" description = "datafusion-datasource-arrow" readme = "README.md" authors.workspace = true -edition.workspace = true +edition = "2024" homepage.workspace = true license.workspace = true repository.workspace = true diff --git a/datafusion/datasource-arrow/src/file_format.rs b/datafusion/datasource-arrow/src/file_format.rs index 7754748fbf866..9997d23d4c61f 100644 --- a/datafusion/datasource-arrow/src/file_format.rs +++ b/datafusion/datasource-arrow/src/file_format.rs @@ -30,22 +30,22 @@ use arrow::error::ArrowError; use arrow::ipc::convert::fb_to_schema; use arrow::ipc::reader::{FileReader, StreamReader}; use arrow::ipc::writer::IpcWriteOptions; -use arrow::ipc::{root_as_message, CompressionType}; +use arrow::ipc::{CompressionType, root_as_message}; use datafusion_common::error::Result; use datafusion_common::parsers::CompressionTypeVariant; use datafusion_common::{ - internal_datafusion_err, not_impl_err, DataFusionError, GetExt, Statistics, - DEFAULT_ARROW_EXTENSION, + DEFAULT_ARROW_EXTENSION, DataFusionError, GetExt, Statistics, + internal_datafusion_err, not_impl_err, }; use datafusion_common_runtime::{JoinSet, SpawnedTask}; +use datafusion_datasource::TableSchema; use datafusion_datasource::display::FileGroupDisplay; use datafusion_datasource::file::FileSource; use datafusion_datasource::file_scan_config::{FileScanConfig, FileScanConfigBuilder}; use datafusion_datasource::sink::{DataSink, DataSinkExec}; use datafusion_datasource::write::{ - get_writer_schema, ObjectWriterBuilder, SharedBuffer, + ObjectWriterBuilder, SharedBuffer, get_writer_schema, }; -use datafusion_datasource::TableSchema; use datafusion_execution::{SendableRecordBatchStream, TaskContext}; use datafusion_expr::dml::InsertOp; use datafusion_physical_expr_common::sort_expr::LexRequirement; @@ -60,10 +60,10 @@ use datafusion_datasource::source::DataSourceExec; use datafusion_datasource::write::demux::DemuxedStreamReceiver; use datafusion_physical_plan::{DisplayAs, DisplayFormatType, ExecutionPlan}; use datafusion_session::Session; -use futures::stream::BoxStream; use futures::StreamExt; +use futures::stream::BoxStream; use object_store::{ - path::Path, GetOptions, GetRange, GetResultPayload, ObjectMeta, ObjectStore, + GetOptions, GetRange, GetResultPayload, ObjectMeta, ObjectStore, path::Path, }; use tokio::io::AsyncWriteExt; @@ -163,7 +163,9 @@ impl FileFormat for ArrowFormat { match StreamReader::try_new(&mut file, None) { Ok(reader) => reader.schema(), Err(stream_error) => { - return Err(internal_datafusion_err!("Failed to parse Arrow file as either file format or stream format. File format error: {file_error}. Stream format error: {stream_error}")); + return Err(internal_datafusion_err!( + "Failed to parse Arrow file as either file format or stream format. File format error: {file_error}. Stream format error: {stream_error}" + )); } } } @@ -217,10 +219,10 @@ impl FileFormat for ArrowFormat { }; // Preserve projection from the original file source - if let Some(projection) = conf.file_source.projection() { - if let Some(new_source) = source.try_pushdown_projection(projection)? { - source = new_source; - } + if let Some(projection) = conf.file_source.projection() + && let Some(new_source) = source.try_pushdown_projection(projection)? + { + source = new_source; } let config = FileScanConfigBuilder::from(conf) @@ -547,8 +549,8 @@ mod tests { use super::*; use chrono::DateTime; - use datafusion_common::config::TableOptions; use datafusion_common::DFSchema; + use datafusion_common::config::TableOptions; use datafusion_execution::config::SessionConfig; use datafusion_execution::runtime_env::RuntimeEnv; use datafusion_expr::execution_props::ExecutionProps; @@ -706,7 +708,10 @@ mod tests { .await; assert!(err.is_err()); - assert_eq!( "Arrow error: Parser error: Unexpected end of byte stream for Arrow IPC file", err.unwrap_err().to_string().lines().next().unwrap()); + assert_eq!( + "Arrow error: Parser error: Unexpected end of byte stream for Arrow IPC file", + err.unwrap_err().to_string().lines().next().unwrap() + ); } Ok(()) diff --git a/datafusion/datasource-arrow/src/source.rs b/datafusion/datasource-arrow/src/source.rs index 070e4fae6df65..892ab01b23c16 100644 --- a/datafusion/datasource-arrow/src/source.rs +++ b/datafusion/datasource-arrow/src/source.rs @@ -37,17 +37,17 @@ use std::{any::Any, io::Cursor}; use datafusion_datasource::schema_adapter::{ DefaultSchemaAdapterFactory, SchemaAdapterFactory, }; -use datafusion_datasource::{as_file_source, TableSchema}; +use datafusion_datasource::{TableSchema, as_file_source}; use arrow::buffer::Buffer; use arrow::datatypes::SchemaRef; use arrow::ipc::reader::{FileDecoder, FileReader, StreamReader}; use datafusion_common::error::Result; use datafusion_common::exec_datafusion_err; +use datafusion_datasource::PartitionedFile; use datafusion_datasource::file::FileSource; use datafusion_datasource::file_scan_config::FileScanConfig; use datafusion_datasource::projection::{ProjectionOpener, SplitProjection}; -use datafusion_datasource::PartitionedFile; use datafusion_physical_expr_common::sort_expr::LexOrdering; use datafusion_physical_plan::metrics::ExecutionPlanMetricsSet; use datafusion_physical_plan::projection::ProjectionExprs; diff --git a/datafusion/datasource-avro/Cargo.toml b/datafusion/datasource-avro/Cargo.toml index c9299aeb101da..fec17e133cc87 100644 --- a/datafusion/datasource-avro/Cargo.toml +++ b/datafusion/datasource-avro/Cargo.toml @@ -20,7 +20,7 @@ name = "datafusion-datasource-avro" description = "datafusion-datasource-avro" readme = "README.md" authors.workspace = true -edition.workspace = true +edition = "2024" homepage.workspace = true license.workspace = true repository.workspace = true diff --git a/datafusion/datasource-avro/src/avro_to_arrow/arrow_array_reader.rs b/datafusion/datasource-avro/src/avro_to_arrow/arrow_array_reader.rs index 37ac7ff923fae..ea676a7611db9 100644 --- a/datafusion/datasource-avro/src/avro_to_arrow/arrow_array_reader.rs +++ b/datafusion/datasource-avro/src/avro_to_arrow/arrow_array_reader.rs @@ -19,25 +19,25 @@ use apache_avro::schema::RecordSchema; use apache_avro::{ + Error as AvroError, Reader as AvroReader, error::Details as AvroErrorDetails, schema::{Schema as AvroSchema, SchemaKind}, types::Value, - Error as AvroError, Reader as AvroReader, }; use arrow::array::{ - make_array, Array, ArrayBuilder, ArrayData, ArrayDataBuilder, ArrayRef, - BooleanBuilder, LargeStringArray, ListBuilder, NullArray, OffsetSizeTrait, - PrimitiveArray, StringArray, StringBuilder, StringDictionaryBuilder, + Array, ArrayBuilder, ArrayData, ArrayDataBuilder, ArrayRef, BooleanBuilder, + LargeStringArray, ListBuilder, NullArray, OffsetSizeTrait, PrimitiveArray, + StringArray, StringBuilder, StringDictionaryBuilder, make_array, }; use arrow::array::{BinaryArray, FixedSizeBinaryArray, GenericListArray}; use arrow::buffer::{Buffer, MutableBuffer}; use arrow::datatypes::{ ArrowDictionaryKeyType, ArrowNumericType, ArrowPrimitiveType, DataType, Date32Type, - Date64Type, Field, Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, - Int8Type, Time32MillisecondType, Time32SecondType, Time64MicrosecondType, + Date64Type, Field, Float32Type, Float64Type, Int8Type, Int16Type, Int32Type, + Int64Type, Time32MillisecondType, Time32SecondType, Time64MicrosecondType, Time64NanosecondType, TimeUnit, TimestampMicrosecondType, TimestampMillisecondType, - TimestampNanosecondType, TimestampSecondType, UInt16Type, UInt32Type, UInt64Type, - UInt8Type, + TimestampNanosecondType, TimestampSecondType, UInt8Type, UInt16Type, UInt32Type, + UInt64Type, }; use arrow::datatypes::{Fields, SchemaRef}; use arrow::error::ArrowError; @@ -103,16 +103,16 @@ impl AvroArrowArrayReader<'_, R> { ) .is_some(); let sub_schemas = us.variants(); - if has_nullable && sub_schemas.len() == 2 { - if let Some(sub_schema) = + if has_nullable + && sub_schemas.len() == 2 + && let Some(sub_schema) = sub_schemas.iter().find(|&s| !matches!(s, AvroSchema::Null)) - { - Self::child_schema_lookup( - parent_field_name, - sub_schema, - schema_lookup, - )?; - } + { + Self::child_schema_lookup( + parent_field_name, + sub_schema, + schema_lookup, + )?; } } AvroSchema::Record(RecordSchema { fields, lookup, .. }) => { @@ -308,7 +308,7 @@ impl AvroArrowArrayReader<'_, R> { e => { return Err(SchemaError(format!( "Nested list data builder type is not supported: {e}" - ))) + ))); } }; @@ -373,7 +373,7 @@ impl AvroArrowArrayReader<'_, R> { e => { return Err(SchemaError(format!( "Nested list data builder type is not supported: {e}" - ))) + ))); } } } @@ -517,7 +517,7 @@ impl AvroArrowArrayReader<'_, R> { DataType::UInt32 => self.read_primitive_list_values::(rows), DataType::UInt64 => self.read_primitive_list_values::(rows), DataType::Float16 => { - return Err(SchemaError("Float16 not supported".to_string())) + return Err(SchemaError("Float16 not supported".to_string())); } DataType::Float32 => self.read_primitive_list_values::(rows), DataType::Float64 => self.read_primitive_list_values::(rows), @@ -528,7 +528,7 @@ impl AvroArrowArrayReader<'_, R> { | DataType::Time64(_) => { return Err(SchemaError( "Temporal types are not yet supported, see ARROW-4803".to_string(), - )) + )); } DataType::Utf8 => flatten_string_values(rows) .into_iter() @@ -715,7 +715,7 @@ impl AvroArrowArrayReader<'_, R> { t => { return Err(SchemaError(format!( "TimeUnit {t:?} not supported with Time64" - ))) + ))); } }, DataType::Time32(unit) => match unit { @@ -729,7 +729,7 @@ impl AvroArrowArrayReader<'_, R> { t => { return Err(SchemaError(format!( "TimeUnit {t:?} not supported with Time32" - ))) + ))); } }, DataType::Utf8 | DataType::LargeUtf8 => Arc::new( @@ -753,7 +753,7 @@ impl AvroArrowArrayReader<'_, R> { .collect::(), ) as ArrayRef, - DataType::FixedSizeBinary(ref size) => { + DataType::FixedSizeBinary(size) => { Arc::new(FixedSizeBinaryArray::try_from_sparse_iter_with_size( rows.iter().map(|row| { let maybe_value = self.field_lookup(&field_path, row); @@ -762,9 +762,9 @@ impl AvroArrowArrayReader<'_, R> { *size, )?) as ArrayRef } - DataType::List(ref list_field) => { + DataType::List(list_field) => { match list_field.data_type() { - DataType::Dictionary(ref key_ty, _) => { + DataType::Dictionary(key_ty, _) => { self.build_wrapped_list_array(rows, &field_path, key_ty)? } _ => { @@ -784,7 +784,7 @@ impl AvroArrowArrayReader<'_, R> { } } } - DataType::Dictionary(ref key_ty, ref val_ty) => self + DataType::Dictionary(key_ty, val_ty) => self .build_string_dictionary_array( rows, &field_path, @@ -829,7 +829,7 @@ impl AvroArrowArrayReader<'_, R> { return Err(SchemaError(format!( "type {} not supported", field.data_type() - ))) + ))); } }; Ok(arr) diff --git a/datafusion/datasource-avro/src/avro_to_arrow/schema.rs b/datafusion/datasource-avro/src/avro_to_arrow/schema.rs index 3fce0d4826a22..0e8f2a4d56088 100644 --- a/datafusion/datasource-avro/src/avro_to_arrow/schema.rs +++ b/datafusion/datasource-avro/src/avro_to_arrow/schema.rs @@ -15,11 +15,11 @@ // specific language governing permissions and limitations // under the License. +use apache_avro::Schema as AvroSchema; use apache_avro::schema::{ Alias, DecimalSchema, EnumSchema, FixedSchema, Name, RecordSchema, }; use apache_avro::types::Value; -use apache_avro::Schema as AvroSchema; use arrow::datatypes::{DataType, IntervalUnit, Schema, TimeUnit, UnionMode}; use arrow::datatypes::{Field, UnionFields}; use datafusion_common::error::Result; @@ -248,15 +248,9 @@ fn default_field_name(dt: &DataType) -> &str { fn external_props(schema: &AvroSchema) -> HashMap { let mut props = HashMap::new(); match &schema { - AvroSchema::Record(RecordSchema { - doc: Some(ref doc), .. - }) - | AvroSchema::Enum(EnumSchema { - doc: Some(ref doc), .. - }) - | AvroSchema::Fixed(FixedSchema { - doc: Some(ref doc), .. - }) => { + AvroSchema::Record(RecordSchema { doc: Some(doc), .. }) + | AvroSchema::Enum(EnumSchema { doc: Some(doc), .. }) + | AvroSchema::Fixed(FixedSchema { doc: Some(doc), .. }) => { props.insert("avro::doc".to_string(), doc.clone()); } _ => {} @@ -312,8 +306,8 @@ pub fn aliased( #[cfg(test)] mod test { use super::{aliased, external_props, to_arrow_schema}; - use apache_avro::schema::{Alias, EnumSchema, FixedSchema, Name, RecordSchema}; use apache_avro::Schema as AvroSchema; + use apache_avro::schema::{Alias, EnumSchema, FixedSchema, Name, RecordSchema}; use arrow::datatypes::DataType::{Binary, Float32, Float64, Timestamp, Utf8}; use arrow::datatypes::DataType::{Boolean, Int32, Int64}; use arrow::datatypes::TimeUnit::Microsecond; diff --git a/datafusion/datasource-avro/src/file_format.rs b/datafusion/datasource-avro/src/file_format.rs index 6df26a79f0e6a..2447c032e700d 100644 --- a/datafusion/datasource-avro/src/file_format.rs +++ b/datafusion/datasource-avro/src/file_format.rs @@ -27,10 +27,10 @@ use crate::source::AvroSource; use arrow::datatypes::Schema; use arrow::datatypes::SchemaRef; +use datafusion_common::DEFAULT_AVRO_EXTENSION; +use datafusion_common::GetExt; use datafusion_common::internal_err; use datafusion_common::parsers::CompressionTypeVariant; -use datafusion_common::GetExt; -use datafusion_common::DEFAULT_AVRO_EXTENSION; use datafusion_common::{Result, Statistics}; use datafusion_datasource::file::FileSource; use datafusion_datasource::file_compression_type::FileCompressionType; diff --git a/datafusion/datasource-avro/src/source.rs b/datafusion/datasource-avro/src/source.rs index 1ba3ad4350415..33d6cf5272678 100644 --- a/datafusion/datasource-avro/src/source.rs +++ b/datafusion/datasource-avro/src/source.rs @@ -23,12 +23,12 @@ use std::sync::Arc; use crate::avro_to_arrow::Reader as AvroReader; use datafusion_common::error::Result; +use datafusion_datasource::TableSchema; use datafusion_datasource::file::FileSource; use datafusion_datasource::file_scan_config::FileScanConfig; use datafusion_datasource::file_stream::FileOpener; use datafusion_datasource::projection::{ProjectionOpener, SplitProjection}; use datafusion_datasource::schema_adapter::SchemaAdapterFactory; -use datafusion_datasource::TableSchema; use datafusion_physical_expr_common::sort_expr::LexOrdering; use datafusion_physical_plan::metrics::ExecutionPlanMetricsSet; use datafusion_physical_plan::projection::ProjectionExprs; @@ -162,7 +162,7 @@ mod private { use super::*; use bytes::Buf; - use datafusion_datasource::{file_stream::FileOpenFuture, PartitionedFile}; + use datafusion_datasource::{PartitionedFile, file_stream::FileOpenFuture}; use futures::StreamExt; use object_store::{GetResultPayload, ObjectStore}; diff --git a/datafusion/datasource-csv/Cargo.toml b/datafusion/datasource-csv/Cargo.toml index c9e138759ef4a..74e5ff33007cb 100644 --- a/datafusion/datasource-csv/Cargo.toml +++ b/datafusion/datasource-csv/Cargo.toml @@ -20,7 +20,7 @@ name = "datafusion-datasource-csv" description = "datafusion-datasource-csv" readme = "README.md" authors.workspace = true -edition.workspace = true +edition = "2024" homepage.workspace = true license.workspace = true repository.workspace = true diff --git a/datafusion/datasource-csv/src/file_format.rs b/datafusion/datasource-csv/src/file_format.rs index 6b27687a56f7b..e81ae630112cf 100644 --- a/datafusion/datasource-csv/src/file_format.rs +++ b/datafusion/datasource-csv/src/file_format.rs @@ -31,24 +31,24 @@ use arrow::error::ArrowError; use datafusion_common::config::{ConfigField, ConfigFileType, CsvOptions}; use datafusion_common::file_options::csv_writer::CsvWriterOptions; use datafusion_common::{ - exec_err, not_impl_err, DataFusionError, GetExt, Result, Statistics, - DEFAULT_CSV_EXTENSION, + DEFAULT_CSV_EXTENSION, DataFusionError, GetExt, Result, Statistics, exec_err, + not_impl_err, }; use datafusion_common_runtime::SpawnedTask; +use datafusion_datasource::TableSchema; use datafusion_datasource::decoder::Decoder; use datafusion_datasource::display::FileGroupDisplay; use datafusion_datasource::file::FileSource; use datafusion_datasource::file_compression_type::FileCompressionType; use datafusion_datasource::file_format::{ - FileFormat, FileFormatFactory, DEFAULT_SCHEMA_INFER_MAX_RECORD, + DEFAULT_SCHEMA_INFER_MAX_RECORD, FileFormat, FileFormatFactory, }; use datafusion_datasource::file_scan_config::{FileScanConfig, FileScanConfigBuilder}; use datafusion_datasource::file_sink_config::{FileSink, FileSinkConfig}; use datafusion_datasource::sink::{DataSink, DataSinkExec}; +use datafusion_datasource::write::BatchSerializer; use datafusion_datasource::write::demux::DemuxedStreamReceiver; use datafusion_datasource::write::orchestration::spawn_writer_tasks_and_join; -use datafusion_datasource::write::BatchSerializer; -use datafusion_datasource::TableSchema; use datafusion_execution::{SendableRecordBatchStream, TaskContext}; use datafusion_expr::dml::InsertOp; use datafusion_physical_expr_common::sort_expr::LexRequirement; @@ -59,8 +59,8 @@ use async_trait::async_trait; use bytes::{Buf, Bytes}; use datafusion_datasource::source::DataSourceExec; use futures::stream::BoxStream; -use futures::{pin_mut, Stream, StreamExt, TryStreamExt}; -use object_store::{delimited::newline_delimited_stream, ObjectMeta, ObjectStore}; +use futures::{Stream, StreamExt, TryStreamExt, pin_mut}; +use object_store::{ObjectMeta, ObjectStore, delimited::newline_delimited_stream}; use regex::Regex; #[derive(Default)] diff --git a/datafusion/datasource-csv/src/source.rs b/datafusion/datasource-csv/src/source.rs index 95f369962733d..b318d89189d6b 100644 --- a/datafusion/datasource-csv/src/source.rs +++ b/datafusion/datasource-csv/src/source.rs @@ -26,12 +26,12 @@ use std::io::{Read, Seek, SeekFrom}; use std::sync::Arc; use std::task::Poll; -use datafusion_datasource::decoder::{deserialize_stream, DecoderDeserializer}; +use datafusion_datasource::decoder::{DecoderDeserializer, deserialize_stream}; use datafusion_datasource::file_compression_type::FileCompressionType; use datafusion_datasource::file_stream::{FileOpenFuture, FileOpener}; use datafusion_datasource::{ - as_file_source, calculate_range, FileRange, ListingTableUrl, PartitionedFile, - RangeCalculation, TableSchema, + FileRange, ListingTableUrl, PartitionedFile, RangeCalculation, TableSchema, + as_file_source, calculate_range, }; use arrow::csv; @@ -350,10 +350,10 @@ impl FileOpener for CsvOpener { // If the .csv file is read in parallel and this `CsvOpener` is only reading some middle // partition, then don't skip first line let mut csv_has_header = self.config.has_header(); - if let Some(FileRange { start, .. }) = partitioned_file.range { - if start != 0 { - csv_has_header = false; - } + if let Some(FileRange { start, .. }) = partitioned_file.range + && start != 0 + { + csv_has_header = false; } let mut config = (*self.config).clone(); @@ -387,7 +387,7 @@ impl FileOpener for CsvOpener { RangeCalculation::TerminateEarly => { return Ok( futures::stream::poll_fn(move |_| Poll::Ready(None)).boxed() - ) + ); } }; diff --git a/datafusion/datasource-json/Cargo.toml b/datafusion/datasource-json/Cargo.toml index 37fa8d43a0816..a8984849a2218 100644 --- a/datafusion/datasource-json/Cargo.toml +++ b/datafusion/datasource-json/Cargo.toml @@ -20,7 +20,7 @@ name = "datafusion-datasource-json" description = "datafusion-datasource-json" readme = "README.md" authors.workspace = true -edition.workspace = true +edition = "2024" homepage.workspace = true license.workspace = true repository.workspace = true diff --git a/datafusion/datasource-json/src/file_format.rs b/datafusion/datasource-json/src/file_format.rs index 27d1c6d960b9f..0f8754ce72526 100644 --- a/datafusion/datasource-json/src/file_format.rs +++ b/datafusion/datasource-json/src/file_format.rs @@ -30,27 +30,27 @@ use arrow::array::RecordBatch; use arrow::datatypes::{Schema, SchemaRef}; use arrow::error::ArrowError; use arrow::json; -use arrow::json::reader::{infer_json_schema_from_iterator, ValueIter}; +use arrow::json::reader::{ValueIter, infer_json_schema_from_iterator}; use datafusion_common::config::{ConfigField, ConfigFileType, JsonOptions}; use datafusion_common::file_options::json_writer::JsonWriterOptions; use datafusion_common::{ - not_impl_err, GetExt, Result, Statistics, DEFAULT_JSON_EXTENSION, + DEFAULT_JSON_EXTENSION, GetExt, Result, Statistics, not_impl_err, }; use datafusion_common_runtime::SpawnedTask; +use datafusion_datasource::TableSchema; use datafusion_datasource::decoder::Decoder; use datafusion_datasource::display::FileGroupDisplay; use datafusion_datasource::file::FileSource; use datafusion_datasource::file_compression_type::FileCompressionType; use datafusion_datasource::file_format::{ - FileFormat, FileFormatFactory, DEFAULT_SCHEMA_INFER_MAX_RECORD, + DEFAULT_SCHEMA_INFER_MAX_RECORD, FileFormat, FileFormatFactory, }; use datafusion_datasource::file_scan_config::{FileScanConfig, FileScanConfigBuilder}; use datafusion_datasource::file_sink_config::{FileSink, FileSinkConfig}; use datafusion_datasource::sink::{DataSink, DataSinkExec}; +use datafusion_datasource::write::BatchSerializer; use datafusion_datasource::write::demux::DemuxedStreamReceiver; use datafusion_datasource::write::orchestration::spawn_writer_tasks_and_join; -use datafusion_datasource::write::BatchSerializer; -use datafusion_datasource::TableSchema; use datafusion_execution::{SendableRecordBatchStream, TaskContext}; use datafusion_expr::dml::InsertOp; use datafusion_physical_expr_common::sort_expr::LexRequirement; diff --git a/datafusion/datasource-json/src/source.rs b/datafusion/datasource-json/src/source.rs index db070d2033f4a..21ffa7f2f9e9e 100644 --- a/datafusion/datasource-json/src/source.rs +++ b/datafusion/datasource-json/src/source.rs @@ -26,13 +26,13 @@ use crate::file_format::JsonDecoder; use datafusion_common::error::{DataFusionError, Result}; use datafusion_common_runtime::JoinSet; -use datafusion_datasource::decoder::{deserialize_stream, DecoderDeserializer}; +use datafusion_datasource::decoder::{DecoderDeserializer, deserialize_stream}; use datafusion_datasource::file_compression_type::FileCompressionType; use datafusion_datasource::file_stream::{FileOpenFuture, FileOpener}; use datafusion_datasource::projection::{ProjectionOpener, SplitProjection}; use datafusion_datasource::schema_adapter::SchemaAdapterFactory; use datafusion_datasource::{ - as_file_source, calculate_range, ListingTableUrl, PartitionedFile, RangeCalculation, + ListingTableUrl, PartitionedFile, RangeCalculation, as_file_source, calculate_range, }; use datafusion_physical_plan::projection::ProjectionExprs; use datafusion_physical_plan::{ExecutionPlan, ExecutionPlanProperties}; @@ -214,7 +214,7 @@ impl FileOpener for JsonOpener { RangeCalculation::TerminateEarly => { return Ok( futures::stream::poll_fn(move |_| Poll::Ready(None)).boxed() - ) + ); } }; diff --git a/datafusion/datasource-parquet/Cargo.toml b/datafusion/datasource-parquet/Cargo.toml index a5f6f56ac6f33..19b814fd0b564 100644 --- a/datafusion/datasource-parquet/Cargo.toml +++ b/datafusion/datasource-parquet/Cargo.toml @@ -20,7 +20,7 @@ name = "datafusion-datasource-parquet" description = "datafusion-datasource-parquet" readme = "README.md" authors.workspace = true -edition.workspace = true +edition = "2024" homepage.workspace = true license.workspace = true repository.workspace = true diff --git a/datafusion/datasource-parquet/src/access_plan.rs b/datafusion/datasource-parquet/src/access_plan.rs index 7399a2cd0856a..570792d40e5b4 100644 --- a/datafusion/datasource-parquet/src/access_plan.rs +++ b/datafusion/datasource-parquet/src/access_plan.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -use datafusion_common::{assert_eq_or_internal_err, Result}; +use datafusion_common::{Result, assert_eq_or_internal_err}; use parquet::arrow::arrow_reader::{RowSelection, RowSelector}; use parquet::file::metadata::RowGroupMetaData; @@ -302,13 +302,10 @@ impl ParquetAccessPlan { /// Return an iterator over the row group indexes that should be scanned pub fn row_group_index_iter(&self) -> impl Iterator + '_ { - self.row_groups.iter().enumerate().filter_map(|(idx, b)| { - if b.should_scan() { - Some(idx) - } else { - None - } - }) + self.row_groups + .iter() + .enumerate() + .filter_map(|(idx, b)| if b.should_scan() { Some(idx) } else { None }) } /// Return a vec of all row group indexes to scan @@ -511,7 +508,10 @@ mod test { .unwrap_err() .to_string(); assert_eq!(row_group_indexes, vec![0, 1, 2, 3]); - assert_contains!(err, "Invalid ParquetAccessPlan Selection. Row group 1 has 20 rows but selection only specifies 22 rows"); + assert_contains!( + err, + "Invalid ParquetAccessPlan Selection. Row group 1 has 20 rows but selection only specifies 22 rows" + ); } /// [`RowGroupMetaData`] that returns 4 row groups with 10, 20, 30, 40 rows diff --git a/datafusion/datasource-parquet/src/file_format.rs b/datafusion/datasource-parquet/src/file_format.rs index 9cc061cc45dc0..5e482382be687 100644 --- a/datafusion/datasource-parquet/src/file_format.rs +++ b/datafusion/datasource-parquet/src/file_format.rs @@ -27,12 +27,12 @@ use std::{fmt, vec}; use arrow::array::RecordBatch; use arrow::datatypes::{Fields, Schema, SchemaRef, TimeUnit}; +use datafusion_datasource::TableSchema; use datafusion_datasource::file_compression_type::FileCompressionType; use datafusion_datasource::file_sink_config::{FileSink, FileSinkConfig}; use datafusion_datasource::write::{ - get_writer_schema, ObjectWriterBuilder, SharedBuffer, + ObjectWriterBuilder, SharedBuffer, get_writer_schema, }; -use datafusion_datasource::TableSchema; use datafusion_datasource::file_format::{FileFormat, FileFormatFactory}; use datafusion_datasource::write::demux::DemuxedStreamReceiver; @@ -42,8 +42,8 @@ use datafusion_common::config::{ConfigField, ConfigFileType, TableParquetOptions use datafusion_common::encryption::FileDecryptionProperties; use datafusion_common::parsers::CompressionTypeVariant; use datafusion_common::{ - internal_datafusion_err, internal_err, not_impl_err, DataFusionError, GetExt, - HashSet, Result, DEFAULT_PARQUET_EXTENSION, + DEFAULT_PARQUET_EXTENSION, DataFusionError, GetExt, HashSet, Result, + internal_datafusion_err, internal_err, not_impl_err, }; use datafusion_common::{HashMap, Statistics}; use datafusion_common_runtime::{JoinSet, SpawnedTask}; @@ -60,7 +60,7 @@ use datafusion_session::Session; use crate::metadata::DFParquetMetadata; use crate::reader::CachedParquetFileReaderFactory; -use crate::source::{parse_coerce_int96_string, ParquetSource}; +use crate::source::{ParquetSource, parse_coerce_int96_string}; use async_trait::async_trait; use bytes::Bytes; use datafusion_datasource::source::DataSourceExec; @@ -72,8 +72,8 @@ use object_store::buffered::BufWriter; use object_store::path::Path; use object_store::{ObjectMeta, ObjectStore}; use parquet::arrow::arrow_writer::{ - compute_leaves, ArrowColumnChunk, ArrowColumnWriter, ArrowLeafColumn, - ArrowRowGroupWriterFactory, ArrowWriterOptions, + ArrowColumnChunk, ArrowColumnWriter, ArrowLeafColumn, ArrowRowGroupWriterFactory, + ArrowWriterOptions, compute_leaves, }; use parquet::arrow::async_reader::MetadataFetch; use parquet::arrow::{ArrowWriter, AsyncArrowWriter}; @@ -540,8 +540,9 @@ impl ParquetFormat { _state: &dyn Session, ) -> Result { if let Some(encryption_factory_id) = &self.options.crypto.factory_id { - Err(DataFusionError::Configuration( - format!("Parquet encryption factory id is set to '{encryption_factory_id}' but the parquet_encryption feature is disabled"))) + Err(DataFusionError::Configuration(format!( + "Parquet encryption factory id is set to '{encryption_factory_id}' but the parquet_encryption feature is disabled" + ))) } else { Ok(source) } diff --git a/datafusion/datasource-parquet/src/metadata.rs b/datafusion/datasource-parquet/src/metadata.rs index e2ab3fd8279ea..8b11ba64ae7f1 100644 --- a/datafusion/datasource-parquet/src/metadata.rs +++ b/datafusion/datasource-parquet/src/metadata.rs @@ -19,7 +19,7 @@ //! and schema information. use crate::{ - apply_file_schema_type_coercions, coerce_int96_to_resolution, ObjectStoreFetch, + ObjectStoreFetch, apply_file_schema_type_coercions, coerce_int96_to_resolution, }; use arrow::array::{ArrayRef, BooleanArray}; use arrow::compute::and; @@ -124,8 +124,8 @@ impl<'a> DFParquetMetadata<'a> { let cache_metadata = !cfg!(feature = "parquet_encryption") || decryption_properties.is_none(); - if cache_metadata { - if let Some(parquet_metadata) = file_metadata_cache + if cache_metadata + && let Some(parquet_metadata) = file_metadata_cache .as_ref() .and_then(|file_metadata_cache| file_metadata_cache.get(object_meta)) .and_then(|file_metadata| { @@ -136,9 +136,8 @@ impl<'a> DFParquetMetadata<'a> { Arc::clone(cached_parquet_metadata.parquet_metadata()) }) }) - { - return Ok(parquet_metadata); - } + { + return Ok(parquet_metadata); } let mut reader = @@ -162,13 +161,11 @@ impl<'a> DFParquetMetadata<'a> { .map_err(DataFusionError::from)?, ); - if cache_metadata { - if let Some(file_metadata_cache) = file_metadata_cache { - file_metadata_cache.put( - object_meta, - Arc::new(CachedParquetMetaData::new(Arc::clone(&metadata))), - ); - } + if cache_metadata && let Some(file_metadata_cache) = file_metadata_cache { + file_metadata_cache.put( + object_meta, + Arc::new(CachedParquetMetaData::new(Arc::clone(&metadata))), + ); } Ok(metadata) diff --git a/datafusion/datasource-parquet/src/opener.rs b/datafusion/datasource-parquet/src/opener.rs index 30573ff6e11da..c426cef3fa0b8 100644 --- a/datafusion/datasource-parquet/src/opener.rs +++ b/datafusion/datasource-parquet/src/opener.rs @@ -20,8 +20,8 @@ use crate::page_filter::PagePruningAccessPlanFilter; use crate::row_group_filter::RowGroupAccessPlanFilter; use crate::{ - apply_file_schema_type_coercions, coerce_int96_to_resolution, row_filter, ParquetAccessPlan, ParquetFileMetrics, ParquetFileReaderFactory, + apply_file_schema_type_coercions, coerce_int96_to_resolution, row_filter, }; use arrow::array::{RecordBatch, RecordBatchOptions}; use datafusion_datasource::file_stream::{FileOpenFuture, FileOpener}; @@ -36,23 +36,23 @@ use std::task::{Context, Poll}; use arrow::datatypes::{SchemaRef, TimeUnit}; use datafusion_common::encryption::FileDecryptionProperties; -use datafusion_common::{exec_err, DataFusionError, Result, ScalarValue}; +use datafusion_common::{DataFusionError, Result, ScalarValue, exec_err}; use datafusion_datasource::{PartitionedFile, TableSchema}; use datafusion_physical_expr::simplifier::PhysicalExprSimplifier; use datafusion_physical_expr_adapter::PhysicalExprAdapterFactory; use datafusion_physical_expr_common::physical_expr::{ - is_dynamic_physical_expr, PhysicalExpr, + PhysicalExpr, is_dynamic_physical_expr, }; use datafusion_physical_plan::metrics::{ Count, ExecutionPlanMetricsSet, MetricBuilder, PruningMetrics, }; -use datafusion_pruning::{build_pruning_predicate, FilePruner, PruningPredicate}; +use datafusion_pruning::{FilePruner, PruningPredicate, build_pruning_predicate}; #[cfg(feature = "parquet_encryption")] use datafusion_common::config::EncryptionFactoryOptions; #[cfg(feature = "parquet_encryption")] use datafusion_execution::parquet_encryption::EncryptionFactory; -use futures::{ready, Stream, StreamExt, TryStreamExt}; +use futures::{Stream, StreamExt, TryStreamExt, ready}; use log::debug; use parquet::arrow::arrow_reader::metrics::ArrowReaderMetrics; use parquet::arrow::arrow_reader::{ @@ -252,12 +252,12 @@ impl FileOpener for ParquetOpener { ) }); - if let Some(file_pruner) = &mut file_pruner { - if file_pruner.should_prune()? { - // Return an empty stream immediately to skip the work of setting up the actual stream - file_metrics.files_ranges_pruned_statistics.add_pruned(1); - return Ok(futures::stream::empty().boxed()); - } + if let Some(file_pruner) = &mut file_pruner + && file_pruner.should_prune()? + { + // Return an empty stream immediately to skip the work of setting up the actual stream + file_metrics.files_ranges_pruned_statistics.add_pruned(1); + return Ok(futures::stream::empty().boxed()); } file_metrics.files_ranges_pruned_statistics.add_matched(1); @@ -306,19 +306,19 @@ impl FileOpener for ParquetOpener { )?; } - if let Some(ref coerce) = coerce_int96 { - if let Some(merged) = coerce_int96_to_resolution( + if let Some(ref coerce) = coerce_int96 + && let Some(merged) = coerce_int96_to_resolution( reader_metadata.parquet_schema(), &physical_file_schema, coerce, - ) { - physical_file_schema = Arc::new(merged); - options = options.with_schema(Arc::clone(&physical_file_schema)); - reader_metadata = ArrowReaderMetadata::try_new( - Arc::clone(reader_metadata.metadata()), - options.clone(), - )?; - } + ) + { + physical_file_schema = Arc::new(merged); + options = options.with_schema(Arc::clone(&physical_file_schema)); + reader_metadata = ArrowReaderMetadata::try_new( + Arc::clone(reader_metadata.metadata()), + options.clone(), + )?; } // Adapt the projection & filter predicate to the physical file schema. @@ -462,16 +462,17 @@ impl FileOpener for ParquetOpener { // page index pruning: if all data on individual pages can // be ruled using page metadata, rows from other columns // with that range can be skipped as well - if enable_page_index && !access_plan.is_empty() { - if let Some(p) = page_pruning_predicate { - access_plan = p.prune_plan_with_page_index( - access_plan, - &physical_file_schema, - builder.parquet_schema(), - file_metadata.as_ref(), - &file_metrics, - ); - } + if enable_page_index + && !access_plan.is_empty() + && let Some(p) = page_pruning_predicate + { + access_plan = p.prune_plan_with_page_index( + access_plan, + &physical_file_schema, + builder.parquet_schema(), + file_metadata.as_ref(), + &file_metrics, + ); } let row_group_indexes = access_plan.row_group_indexes(); @@ -843,22 +844,22 @@ mod test { use arrow::datatypes::{DataType, Field, Schema}; use bytes::{BufMut, BytesMut}; use datafusion_common::{ - record_batch, stats::Precision, ColumnStatistics, DataFusionError, ScalarValue, - Statistics, + ColumnStatistics, DataFusionError, ScalarValue, Statistics, record_batch, + stats::Precision, }; - use datafusion_datasource::{file_stream::FileOpener, PartitionedFile, TableSchema}; + use datafusion_datasource::{PartitionedFile, TableSchema, file_stream::FileOpener}; use datafusion_expr::{col, lit}; use datafusion_physical_expr::{ - expressions::DynamicFilterPhysicalExpr, planner::logical2physical, - projection::ProjectionExprs, PhysicalExpr, + PhysicalExpr, expressions::DynamicFilterPhysicalExpr, planner::logical2physical, + projection::ProjectionExprs, }; use datafusion_physical_expr_adapter::DefaultPhysicalExprAdapterFactory; use datafusion_physical_plan::metrics::ExecutionPlanMetricsSet; use futures::{Stream, StreamExt}; - use object_store::{memory::InMemory, path::Path, ObjectStore}; + use object_store::{ObjectStore, memory::InMemory, path::Path}; use parquet::arrow::ArrowWriter; - use crate::{opener::ParquetOpener, DefaultParquetFileReaderFactory}; + use crate::{DefaultParquetFileReaderFactory, opener::ParquetOpener}; async fn count_batches_and_rows( mut stream: std::pin::Pin< diff --git a/datafusion/datasource-parquet/src/page_filter.rs b/datafusion/datasource-parquet/src/page_filter.rs index 9f4e52c513cf5..e25e33835f790 100644 --- a/datafusion/datasource-parquet/src/page_filter.rs +++ b/datafusion/datasource-parquet/src/page_filter.rs @@ -28,9 +28,9 @@ use arrow::{ array::ArrayRef, datatypes::{Schema, SchemaRef}, }; -use datafusion_common::pruning::PruningStatistics; use datafusion_common::ScalarValue; -use datafusion_physical_expr::{split_conjunction, PhysicalExpr}; +use datafusion_common::pruning::PruningStatistics; +use datafusion_physical_expr::{PhysicalExpr, split_conjunction}; use datafusion_pruning::PruningPredicate; use log::{debug, trace}; @@ -178,9 +178,10 @@ impl PagePruningAccessPlanFilter { || parquet_metadata.column_index().is_none() { debug!( - "Can not prune pages due to lack of indexes. Have offset: {}, column index: {}", - parquet_metadata.offset_index().is_some(), parquet_metadata.column_index().is_some() - ); + "Can not prune pages due to lack of indexes. Have offset: {}, column index: {}", + parquet_metadata.offset_index().is_some(), + parquet_metadata.column_index().is_some() + ); return access_plan; }; @@ -230,7 +231,8 @@ impl PagePruningAccessPlanFilter { continue; }; - debug!("Use filter and page index to create RowSelection {:?} from predicate: {:?}", + debug!( + "Use filter and page index to create RowSelection {:?} from predicate: {:?}", &selection, predicate.predicate_expr(), ); @@ -253,7 +255,9 @@ impl PagePruningAccessPlanFilter { let rows_selected = overall_selection.row_count(); if rows_selected > 0 { let rows_skipped = overall_selection.skipped_row_count(); - trace!("Overall selection from predicate skipped {rows_skipped}, selected {rows_selected}: {overall_selection:?}"); + trace!( + "Overall selection from predicate skipped {rows_skipped}, selected {rows_selected}: {overall_selection:?}" + ); total_skip += rows_skipped; total_select += rows_selected; access_plan.scan_selection(row_group_index, overall_selection) diff --git a/datafusion/datasource-parquet/src/reader.rs b/datafusion/datasource-parquet/src/reader.rs index 0d27d18d88dd1..4291c9af76a63 100644 --- a/datafusion/datasource-parquet/src/reader.rs +++ b/datafusion/datasource-parquet/src/reader.rs @@ -18,15 +18,15 @@ //! [`ParquetFileReaderFactory`] and [`DefaultParquetFileReaderFactory`] for //! low level control of parquet file readers -use crate::metadata::DFParquetMetadata; use crate::ParquetFileMetrics; +use crate::metadata::DFParquetMetadata; use bytes::Bytes; use datafusion_datasource::PartitionedFile; use datafusion_execution::cache::cache_manager::FileMetadata; use datafusion_execution::cache::cache_manager::FileMetadataCache; use datafusion_physical_plan::metrics::ExecutionPlanMetricsSet; -use futures::future::BoxFuture; use futures::FutureExt; +use futures::future::BoxFuture; use object_store::ObjectStore; use parquet::arrow::arrow_reader::ArrowReaderOptions; use parquet::arrow::async_reader::{AsyncFileReader, ParquetObjectReader}; diff --git a/datafusion/datasource-parquet/src/row_filter.rs b/datafusion/datasource-parquet/src/row_filter.rs index 04f8d5533ef69..ba3b29be40d74 100644 --- a/datafusion/datasource-parquet/src/row_filter.rs +++ b/datafusion/datasource-parquet/src/row_filter.rs @@ -67,16 +67,16 @@ use arrow::array::BooleanArray; use arrow::datatypes::{DataType, Schema, SchemaRef}; use arrow::error::{ArrowError, Result as ArrowResult}; use arrow::record_batch::RecordBatch; -use parquet::arrow::arrow_reader::{ArrowPredicate, RowFilter}; use parquet::arrow::ProjectionMask; +use parquet::arrow::arrow_reader::{ArrowPredicate, RowFilter}; use parquet::file::metadata::ParquetMetaData; +use datafusion_common::Result; use datafusion_common::cast::as_boolean_array; use datafusion_common::tree_node::{TreeNode, TreeNodeRecursion, TreeNodeVisitor}; -use datafusion_common::Result; use datafusion_physical_expr::expressions::Column; use datafusion_physical_expr::utils::reassign_expr_columns; -use datafusion_physical_expr::{split_conjunction, PhysicalExpr}; +use datafusion_physical_expr::{PhysicalExpr, split_conjunction}; use datafusion_physical_plan::metrics; @@ -287,10 +287,10 @@ impl TreeNodeVisitor<'_> for PushdownChecker<'_> { type Node = Arc; fn f_down(&mut self, node: &Self::Node) -> Result { - if let Some(column) = node.as_any().downcast_ref::() { - if let Some(recursion) = self.check_single_column(column.name()) { - return Ok(recursion); - } + if let Some(column) = node.as_any().downcast_ref::() + && let Some(recursion) = self.check_single_column(column.name()) + { + return Ok(recursion); } Ok(TreeNodeRecursion::Continue) @@ -465,7 +465,7 @@ mod test { use datafusion_common::ScalarValue; use arrow::datatypes::{Field, TimeUnit::Nanosecond}; - use datafusion_expr::{col, Expr}; + use datafusion_expr::{Expr, col}; use datafusion_physical_expr::planner::logical2physical; use datafusion_physical_expr_adapter::{ DefaultPhysicalExprAdapterFactory, PhysicalExprAdapterFactory, diff --git a/datafusion/datasource-parquet/src/row_group_filter.rs b/datafusion/datasource-parquet/src/row_group_filter.rs index 90e4e10d5ae8f..1264197609f3f 100644 --- a/datafusion/datasource-parquet/src/row_group_filter.rs +++ b/datafusion/datasource-parquet/src/row_group_filter.rs @@ -31,7 +31,7 @@ use parquet::basic::Type; use parquet::data_type::Decimal; use parquet::schema::types::SchemaDescriptor; use parquet::{ - arrow::{async_reader::AsyncFileReader, ParquetRecordBatchStreamBuilder}, + arrow::{ParquetRecordBatchStreamBuilder, async_reader::AsyncFileReader}, bloom_filter::Sbbf, file::metadata::RowGroupMetaData, }; @@ -444,11 +444,11 @@ mod tests { use arrow::datatypes::DataType::Decimal128; use arrow::datatypes::{DataType, Field}; use datafusion_common::Result; - use datafusion_expr::{cast, col, lit, Expr}; + use datafusion_expr::{Expr, cast, col, lit}; use datafusion_physical_expr::planner::logical2physical; use datafusion_physical_plan::metrics::ExecutionPlanMetricsSet; - use parquet::arrow::async_reader::ParquetObjectReader; use parquet::arrow::ArrowSchemaConverter; + use parquet::arrow::async_reader::ParquetObjectReader; use parquet::basic::LogicalType; use parquet::data_type::{ByteArray, FixedLenByteArray}; use parquet::file::metadata::ColumnChunkMetaData; @@ -1425,7 +1425,10 @@ mod tests { } ExpectedPruning::Some(expected) => { let actual = row_groups.access_plan.row_group_indexes(); - assert_eq!(expected, &actual, "Unexpected row groups pruned. Expected {expected:?}, got {actual:?}"); + assert_eq!( + expected, &actual, + "Unexpected row groups pruned. Expected {expected:?}, got {actual:?}" + ); } } } diff --git a/datafusion/datasource-parquet/src/source.rs b/datafusion/datasource-parquet/src/source.rs index c6a58a7c98b86..5caaa1c4747de 100644 --- a/datafusion/datasource-parquet/src/source.rs +++ b/datafusion/datasource-parquet/src/source.rs @@ -21,11 +21,11 @@ use std::fmt::Debug; use std::fmt::Formatter; use std::sync::Arc; -use crate::opener::build_pruning_predicates; -use crate::opener::ParquetOpener; -use crate::row_filter::can_expr_be_pushed_down_with_schemas; use crate::DefaultParquetFileReaderFactory; use crate::ParquetFileReaderFactory; +use crate::opener::ParquetOpener; +use crate::opener::build_pruning_predicates; +use crate::row_filter::can_expr_be_pushed_down_with_schemas; use datafusion_common::config::ConfigOptions; #[cfg(feature = "parquet_encryption")] use datafusion_common::config::EncryptionFactoryOptions; @@ -33,23 +33,23 @@ use datafusion_datasource::as_file_source; use datafusion_datasource::file_stream::FileOpener; use arrow::datatypes::TimeUnit; -use datafusion_common::config::TableParquetOptions; use datafusion_common::DataFusionError; +use datafusion_common::config::TableParquetOptions; +use datafusion_datasource::TableSchema; use datafusion_datasource::file::FileSource; use datafusion_datasource::file_scan_config::FileScanConfig; -use datafusion_datasource::TableSchema; use datafusion_physical_expr::conjunction; use datafusion_physical_expr::projection::ProjectionExprs; use datafusion_physical_expr_adapter::DefaultPhysicalExprAdapterFactory; -use datafusion_physical_expr_common::physical_expr::fmt_sql; use datafusion_physical_expr_common::physical_expr::PhysicalExpr; +use datafusion_physical_expr_common::physical_expr::fmt_sql; +use datafusion_physical_plan::DisplayFormatType; use datafusion_physical_plan::filter_pushdown::PushedDown; use datafusion_physical_plan::filter_pushdown::{ FilterPushdownPropagation, PushedDownPredicate, }; use datafusion_physical_plan::metrics::Count; use datafusion_physical_plan::metrics::ExecutionPlanMetricsSet; -use datafusion_physical_plan::DisplayFormatType; #[cfg(feature = "parquet_encryption")] use datafusion_execution::parquet_encryption::EncryptionFactory; diff --git a/datafusion/functions-table/Cargo.toml b/datafusion/functions-table/Cargo.toml index aa401fbd7d4ed..8894ef8c4ade5 100644 --- a/datafusion/functions-table/Cargo.toml +++ b/datafusion/functions-table/Cargo.toml @@ -21,7 +21,7 @@ description = "Traits and types for logical plans and expressions for DataFusion keywords = ["datafusion", "logical", "plan", "expressions"] readme = "README.md" version = { workspace = true } -edition = { workspace = true } +edition = "2024" homepage = { workspace = true } repository = { workspace = true } license = { workspace = true } diff --git a/datafusion/functions-table/src/generate_series.rs b/datafusion/functions-table/src/generate_series.rs index d71c5945aafcc..9e58e9d0d01b4 100644 --- a/datafusion/functions-table/src/generate_series.rs +++ b/datafusion/functions-table/src/generate_series.rs @@ -26,10 +26,10 @@ use async_trait::async_trait; use datafusion_catalog::Session; use datafusion_catalog::TableFunctionImpl; use datafusion_catalog::TableProvider; -use datafusion_common::{plan_err, Result, ScalarValue}; +use datafusion_common::{Result, ScalarValue, plan_err}; use datafusion_expr::{Expr, TableType}; -use datafusion_physical_plan::memory::{LazyBatchGenerator, LazyMemoryExec}; use datafusion_physical_plan::ExecutionPlan; +use datafusion_physical_plan::memory::{LazyBatchGenerator, LazyMemoryExec}; use parking_lot::RwLock; use std::any::Any; use std::fmt; @@ -415,11 +415,7 @@ impl fmt::Display for GenericSeriesState { fn reach_end_int64(val: i64, end: i64, step: i64, include_end: bool) -> bool { if step > 0 { - if include_end { - val > end - } else { - val >= end - } + if include_end { val > end } else { val >= end } } else if include_end { val < end } else { @@ -440,11 +436,15 @@ fn validate_interval_step( let step_is_negative = step.months < 0 || step.days < 0 || step.nanoseconds < 0; if start > end && step_is_positive { - return plan_err!("Start is bigger than end, but increment is positive: Cannot generate infinite series"); + return plan_err!( + "Start is bigger than end, but increment is positive: Cannot generate infinite series" + ); } if start < end && step_is_negative { - return plan_err!("Start is smaller than end, but increment is negative: Cannot generate infinite series"); + return plan_err!( + "Start is smaller than end, but increment is negative: Cannot generate infinite series" + ); } Ok(()) @@ -529,7 +529,7 @@ impl GenerateSeriesFuncImpl { "Argument #{} must be an INTEGER or NULL, got {:?}", expr_index + 1, other - ) + ); } }; } @@ -558,11 +558,15 @@ impl GenerateSeriesFuncImpl { }; if start > end && step > 0 { - return plan_err!("Start is bigger than end, but increment is positive: Cannot generate infinite series"); + return plan_err!( + "Start is bigger than end, but increment is positive: Cannot generate infinite series" + ); } if start < end && step < 0 { - return plan_err!("Start is smaller than end, but increment is negative: Cannot generate infinite series"); + return plan_err!( + "Start is smaller than end, but increment is negative: Cannot generate infinite series" + ); } if step == 0 { @@ -598,7 +602,7 @@ impl GenerateSeriesFuncImpl { return plan_err!( "First argument must be a timestamp or NULL, got {:?}", other - ) + ); } }; @@ -610,7 +614,7 @@ impl GenerateSeriesFuncImpl { return plan_err!( "Second argument must be a timestamp or NULL, got {:?}", other - ) + ); } }; @@ -622,7 +626,7 @@ impl GenerateSeriesFuncImpl { return plan_err!( "Third argument must be an interval or NULL, got {:?}", other - ) + ); } }; @@ -685,7 +689,7 @@ impl GenerateSeriesFuncImpl { return plan_err!( "First argument must be a date or NULL, got {:?}", other - ) + ); } }; @@ -703,7 +707,7 @@ impl GenerateSeriesFuncImpl { return plan_err!( "Second argument must be a date or NULL, got {:?}", other - ) + ); } }; @@ -723,7 +727,7 @@ impl GenerateSeriesFuncImpl { return plan_err!( "Third argument must be an interval or NULL, got {:?}", other - ) + ); } }; diff --git a/datafusion/physical-optimizer/Cargo.toml b/datafusion/physical-optimizer/Cargo.toml index 395da10d629ba..1c94b8cd492bc 100644 --- a/datafusion/physical-optimizer/Cargo.toml +++ b/datafusion/physical-optimizer/Cargo.toml @@ -21,7 +21,7 @@ description = "DataFusion Physical Optimizer" keywords = ["datafusion", "query", "optimizer"] readme = "README.md" version = { workspace = true } -edition = { workspace = true } +edition = "2024" homepage = { workspace = true } repository = { workspace = true } license = { workspace = true } diff --git a/datafusion/physical-optimizer/src/aggregate_statistics.rs b/datafusion/physical-optimizer/src/aggregate_statistics.rs index 4bd8e321f158b..cf3c15509c29a 100644 --- a/datafusion/physical-optimizer/src/aggregate_statistics.rs +++ b/datafusion/physical-optimizer/src/aggregate_statistics.rs @@ -16,15 +16,15 @@ // under the License. //! Utilizing exact statistics from sources to avoid scanning data +use datafusion_common::Result; use datafusion_common::config::ConfigOptions; use datafusion_common::scalar::ScalarValue; use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode}; -use datafusion_common::Result; use datafusion_physical_plan::aggregates::AggregateExec; use datafusion_physical_plan::placeholder_row::PlaceholderRowExec; use datafusion_physical_plan::projection::{ProjectionExec, ProjectionExpr}; use datafusion_physical_plan::udaf::{AggregateFunctionExpr, StatisticsArgs}; -use datafusion_physical_plan::{expressions, ExecutionPlan}; +use datafusion_physical_plan::{ExecutionPlan, expressions}; use std::sync::Arc; use crate::PhysicalOptimizerRule; @@ -115,27 +115,23 @@ impl PhysicalOptimizerRule for AggregateStatistics { /// We would have preferred to return a casted ref to AggregateExec but the recursion requires /// the `ExecutionPlan.children()` method that returns an owned reference. fn take_optimizable(node: &dyn ExecutionPlan) -> Option> { - if let Some(final_agg_exec) = node.as_any().downcast_ref::() { - if !final_agg_exec.mode().is_first_stage() - && final_agg_exec.group_expr().is_empty() - { - let mut child = Arc::clone(final_agg_exec.input()); - loop { - if let Some(partial_agg_exec) = - child.as_any().downcast_ref::() - { - if partial_agg_exec.mode().is_first_stage() - && partial_agg_exec.group_expr().is_empty() - && partial_agg_exec.filter_expr().iter().all(|e| e.is_none()) - { - return Some(child); - } - } - if let [childrens_child] = child.children().as_slice() { - child = Arc::clone(childrens_child); - } else { - break; - } + if let Some(final_agg_exec) = node.as_any().downcast_ref::() + && !final_agg_exec.mode().is_first_stage() + && final_agg_exec.group_expr().is_empty() + { + let mut child = Arc::clone(final_agg_exec.input()); + loop { + if let Some(partial_agg_exec) = child.as_any().downcast_ref::() + && partial_agg_exec.mode().is_first_stage() + && partial_agg_exec.group_expr().is_empty() + && partial_agg_exec.filter_expr().iter().all(|e| e.is_none()) + { + return Some(child); + } + if let [childrens_child] = child.children().as_slice() { + child = Arc::clone(childrens_child); + } else { + break; } } } diff --git a/datafusion/physical-optimizer/src/coalesce_batches.rs b/datafusion/physical-optimizer/src/coalesce_batches.rs index ecfe193b34445..24efcc437a3ad 100644 --- a/datafusion/physical-optimizer/src/coalesce_batches.rs +++ b/datafusion/physical-optimizer/src/coalesce_batches.rs @@ -27,8 +27,8 @@ use datafusion_common::config::ConfigOptions; use datafusion_common::error::Result; use datafusion_physical_expr::Partitioning; use datafusion_physical_plan::{ - async_func::AsyncFuncExec, coalesce_batches::CoalesceBatchesExec, - repartition::RepartitionExec, ExecutionPlan, + ExecutionPlan, async_func::AsyncFuncExec, coalesce_batches::CoalesceBatchesExec, + repartition::RepartitionExec, }; use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode}; diff --git a/datafusion/physical-optimizer/src/combine_partial_final_agg.rs b/datafusion/physical-optimizer/src/combine_partial_final_agg.rs index 667250d6460b5..782e0754b7d27 100644 --- a/datafusion/physical-optimizer/src/combine_partial_final_agg.rs +++ b/datafusion/physical-optimizer/src/combine_partial_final_agg.rs @@ -21,16 +21,16 @@ use std::sync::Arc; use datafusion_common::error::Result; +use datafusion_physical_plan::ExecutionPlan; use datafusion_physical_plan::aggregates::{ AggregateExec, AggregateMode, PhysicalGroupBy, }; -use datafusion_physical_plan::ExecutionPlan; use crate::PhysicalOptimizerRule; use datafusion_common::config::ConfigOptions; use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode}; use datafusion_physical_expr::aggregate::AggregateFunctionExpr; -use datafusion_physical_expr::{physical_exprs_equal, PhysicalExpr}; +use datafusion_physical_expr::{PhysicalExpr, physical_exprs_equal}; /// CombinePartialFinalAggregate optimizer rule combines the adjacent Partial and Final AggregateExecs /// into a Single AggregateExec if their grouping exprs and aggregate exprs equal. diff --git a/datafusion/physical-optimizer/src/enforce_distribution.rs b/datafusion/physical-optimizer/src/enforce_distribution.rs index fd5ef6eda5704..40b0486d11c3e 100644 --- a/datafusion/physical-optimizer/src/enforce_distribution.rs +++ b/datafusion/physical-optimizer/src/enforce_distribution.rs @@ -40,8 +40,9 @@ use datafusion_expr::logical_plan::JoinType; use datafusion_physical_expr::expressions::{Column, NoOp}; use datafusion_physical_expr::utils::map_columns_before_projection; use datafusion_physical_expr::{ - physical_exprs_equal, EquivalenceProperties, PhysicalExpr, PhysicalExprRef, + EquivalenceProperties, PhysicalExpr, PhysicalExprRef, physical_exprs_equal, }; +use datafusion_physical_plan::ExecutionPlanProperties; use datafusion_physical_plan::aggregates::{ AggregateExec, AggregateMode, PhysicalGroupBy, }; @@ -54,10 +55,9 @@ use datafusion_physical_plan::projection::{ProjectionExec, ProjectionExpr}; use datafusion_physical_plan::repartition::RepartitionExec; use datafusion_physical_plan::sorts::sort_preserving_merge::SortPreservingMergeExec; use datafusion_physical_plan::tree_node::PlanContext; -use datafusion_physical_plan::union::{can_interleave, InterleaveExec, UnionExec}; +use datafusion_physical_plan::union::{InterleaveExec, UnionExec, can_interleave}; use datafusion_physical_plan::windows::WindowAggExec; -use datafusion_physical_plan::windows::{get_best_fitting_window, BoundedWindowAggExec}; -use datafusion_physical_plan::ExecutionPlanProperties; +use datafusion_physical_plan::windows::{BoundedWindowAggExec, get_best_fitting_window}; use datafusion_physical_plan::{Distribution, ExecutionPlan, Partitioning}; use itertools::izip; @@ -457,14 +457,14 @@ where positions, ) = try_reorder(join_key_pairs, parent_required, eq_properties); - if let Some(positions) = positions { - if !positions.is_empty() { - let new_join_on = new_join_conditions(&left_keys, &right_keys); - let new_sort_options = (0..sort_options.len()) - .map(|idx| sort_options[positions[idx]]) - .collect(); - join_plan.plan = join_constructor((new_join_on, new_sort_options))?; - } + if let Some(positions) = positions + && !positions.is_empty() + { + let new_join_on = new_join_conditions(&left_keys, &right_keys); + let new_sort_options = (0..sort_options.len()) + .map(|idx| sort_options[positions[idx]]) + .collect(); + join_plan.plan = join_constructor((new_join_on, new_sort_options))?; } join_plan.children[0].data = left_keys; @@ -493,83 +493,75 @@ pub fn reorder_aggregate_keys( if parent_required.len() == output_exprs.len() && agg_exec.group_expr().null_expr().is_empty() && !physical_exprs_equal(&output_exprs, parent_required) + && let Some(positions) = expected_expr_positions(&output_exprs, parent_required) + && let Some(agg_exec) = agg_exec.input().as_any().downcast_ref::() + && matches!(agg_exec.mode(), &AggregateMode::Partial) { - if let Some(positions) = expected_expr_positions(&output_exprs, parent_required) { - if let Some(agg_exec) = - agg_exec.input().as_any().downcast_ref::() - { - if matches!(agg_exec.mode(), &AggregateMode::Partial) { - let group_exprs = agg_exec.group_expr().expr(); - let new_group_exprs = positions - .into_iter() - .map(|idx| group_exprs[idx].clone()) - .collect(); - let partial_agg = Arc::new(AggregateExec::try_new( - AggregateMode::Partial, - PhysicalGroupBy::new_single(new_group_exprs), - agg_exec.aggr_expr().to_vec(), - agg_exec.filter_expr().to_vec(), - Arc::clone(agg_exec.input()), - Arc::clone(&agg_exec.input_schema), - )?); - // Build new group expressions that correspond to the output - // of the "reordered" aggregator: - let group_exprs = partial_agg.group_expr().expr(); - let new_group_by = PhysicalGroupBy::new_single( - partial_agg - .output_group_expr() - .into_iter() - .enumerate() - .map(|(idx, expr)| (expr, group_exprs[idx].1.clone())) - .collect(), - ); - let new_final_agg = Arc::new(AggregateExec::try_new( - AggregateMode::FinalPartitioned, - new_group_by, - agg_exec.aggr_expr().to_vec(), - agg_exec.filter_expr().to_vec(), - Arc::clone(&partial_agg) as _, - agg_exec.input_schema(), - )?); - - agg_node.plan = Arc::clone(&new_final_agg) as _; - agg_node.data.clear(); - agg_node.children = vec![PlanWithKeyRequirements::new( - partial_agg as _, - vec![], - agg_node.children.swap_remove(0).children, - )]; - - // Need to create a new projection to change the expr ordering back - let agg_schema = new_final_agg.schema(); - let mut proj_exprs = output_columns - .iter() - .map(|col| { - let name = col.name(); - let index = agg_schema.index_of(name)?; - Ok(ProjectionExpr { - expr: Arc::new(Column::new(name, index)) as _, - alias: name.to_owned(), - }) - }) - .collect::>>()?; - let agg_fields = agg_schema.fields(); - for (idx, field) in - agg_fields.iter().enumerate().skip(output_columns.len()) - { - let name = field.name(); - let plan = Arc::new(Column::new(name, idx)) as _; - proj_exprs.push(ProjectionExpr { - expr: plan, - alias: name.clone(), - }) - } - return ProjectionExec::try_new(proj_exprs, new_final_agg).map(|p| { - PlanWithKeyRequirements::new(Arc::new(p), vec![], vec![agg_node]) - }); - } - } + let group_exprs = agg_exec.group_expr().expr(); + let new_group_exprs = positions + .into_iter() + .map(|idx| group_exprs[idx].clone()) + .collect(); + let partial_agg = Arc::new(AggregateExec::try_new( + AggregateMode::Partial, + PhysicalGroupBy::new_single(new_group_exprs), + agg_exec.aggr_expr().to_vec(), + agg_exec.filter_expr().to_vec(), + Arc::clone(agg_exec.input()), + Arc::clone(&agg_exec.input_schema), + )?); + // Build new group expressions that correspond to the output + // of the "reordered" aggregator: + let group_exprs = partial_agg.group_expr().expr(); + let new_group_by = PhysicalGroupBy::new_single( + partial_agg + .output_group_expr() + .into_iter() + .enumerate() + .map(|(idx, expr)| (expr, group_exprs[idx].1.clone())) + .collect(), + ); + let new_final_agg = Arc::new(AggregateExec::try_new( + AggregateMode::FinalPartitioned, + new_group_by, + agg_exec.aggr_expr().to_vec(), + agg_exec.filter_expr().to_vec(), + Arc::clone(&partial_agg) as _, + agg_exec.input_schema(), + )?); + + agg_node.plan = Arc::clone(&new_final_agg) as _; + agg_node.data.clear(); + agg_node.children = vec![PlanWithKeyRequirements::new( + partial_agg as _, + vec![], + agg_node.children.swap_remove(0).children, + )]; + + // Need to create a new projection to change the expr ordering back + let agg_schema = new_final_agg.schema(); + let mut proj_exprs = output_columns + .iter() + .map(|col| { + let name = col.name(); + let index = agg_schema.index_of(name)?; + Ok(ProjectionExpr { + expr: Arc::new(Column::new(name, index)) as _, + alias: name.to_owned(), + }) + }) + .collect::>>()?; + let agg_fields = agg_schema.fields(); + for (idx, field) in agg_fields.iter().enumerate().skip(output_columns.len()) { + let name = field.name(); + let plan = Arc::new(Column::new(name, idx)) as _; + proj_exprs.push(ProjectionExpr { + expr: plan, + alias: name.clone(), + }) } + return ProjectionExec::try_new(proj_exprs, new_final_agg) + .map(|p| PlanWithKeyRequirements::new(Arc::new(p), vec![], vec![agg_node])); } Ok(agg_node) } @@ -673,27 +665,27 @@ pub fn reorder_join_keys_to_inputs( left.equivalence_properties(), right.equivalence_properties(), ); - if let Some(positions) = positions { - if !positions.is_empty() { - let JoinKeyPairs { - left_keys, - right_keys, - } = join_keys; - let new_join_on = new_join_conditions(&left_keys, &right_keys); - let new_sort_options = (0..sort_options.len()) - .map(|idx| sort_options[positions[idx]]) - .collect(); - return SortMergeJoinExec::try_new( - Arc::clone(left), - Arc::clone(right), - new_join_on, - filter.clone(), - *join_type, - new_sort_options, - *null_equality, - ) - .map(|smj| Arc::new(smj) as _); - } + if let Some(positions) = positions + && !positions.is_empty() + { + let JoinKeyPairs { + left_keys, + right_keys, + } = join_keys; + let new_join_on = new_join_conditions(&left_keys, &right_keys); + let new_sort_options = (0..sort_options.len()) + .map(|idx| sort_options[positions[idx]]) + .collect(); + return SortMergeJoinExec::try_new( + Arc::clone(left), + Arc::clone(right), + new_join_on, + filter.clone(), + *join_type, + new_sort_options, + *null_equality, + ) + .map(|smj| Arc::new(smj) as _); } } Ok(plan) @@ -1045,14 +1037,13 @@ pub fn replace_order_preserving_variants( return Ok(context); } else if let Some(repartition) = context.plan.as_any().downcast_ref::() + && repartition.preserve_order() { - if repartition.preserve_order() { - context.plan = Arc::new(RepartitionExec::try_new( - Arc::clone(&context.children[0].plan), - repartition.partitioning().clone(), - )?); - return Ok(context); - } + context.plan = Arc::new(RepartitionExec::try_new( + Arc::clone(&context.children[0].plan), + repartition.partitioning().clone(), + )?); + return Ok(context); } context.update_plan_from_children() @@ -1211,14 +1202,14 @@ pub fn ensure_distribution( )? { plan = updated_window; } - } else if let Some(exec) = plan.as_any().downcast_ref::() { - if let Some(updated_window) = get_best_fitting_window( + } else if let Some(exec) = plan.as_any().downcast_ref::() + && let Some(updated_window) = get_best_fitting_window( exec.window_expr(), exec.input(), &exec.partition_keys(), - )? { - plan = updated_window; - } + )? + { + plan = updated_window; }; let repartition_status_flags = @@ -1259,12 +1250,12 @@ pub fn ensure_distribution( // If repartitioning is not possible (a.k.a. None is returned from `ExecutionPlan::repartitioned`) // then no repartitioning will have occurred. As the default implementation returns None, it is only // specific physical plan nodes, such as certain datasources, which are repartitioned. - if repartition_file_scans && roundrobin_beneficial_stats { - if let Some(new_child) = + if repartition_file_scans + && roundrobin_beneficial_stats + && let Some(new_child) = child.plan.repartitioned(target_partitions, config)? - { - child.plan = new_child; - } + { + child.plan = new_child; } // Satisfy the distribution requirement if it is unmet. diff --git a/datafusion/physical-optimizer/src/enforce_sorting/mod.rs b/datafusion/physical-optimizer/src/enforce_sorting/mod.rs index 69090f8ef69fb..a5fafb9e87e1d 100644 --- a/datafusion/physical-optimizer/src/enforce_sorting/mod.rs +++ b/datafusion/physical-optimizer/src/enforce_sorting/mod.rs @@ -40,23 +40,23 @@ pub mod sort_pushdown; use std::sync::Arc; +use crate::PhysicalOptimizerRule; use crate::enforce_sorting::replace_with_order_preserving_variants::{ - replace_with_order_preserving_variants, OrderPreservationContext, + OrderPreservationContext, replace_with_order_preserving_variants, }; use crate::enforce_sorting::sort_pushdown::{ - assign_initial_requirements, pushdown_sorts, SortPushDown, + SortPushDown, assign_initial_requirements, pushdown_sorts, }; use crate::output_requirements::OutputRequirementExec; use crate::utils::{ add_sort_above, add_sort_above_with_check, is_coalesce_partitions, is_limit, is_repartition, is_sort, is_sort_preserving_merge, is_window, }; -use crate::PhysicalOptimizerRule; +use datafusion_common::Result; use datafusion_common::config::ConfigOptions; use datafusion_common::plan_err; use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode}; -use datafusion_common::Result; use datafusion_physical_expr::{Distribution, Partitioning}; use datafusion_physical_expr_common::sort_expr::{LexOrdering, LexRequirement}; use datafusion_physical_plan::coalesce_partitions::CoalescePartitionsExec; @@ -67,7 +67,7 @@ use datafusion_physical_plan::sorts::sort::SortExec; use datafusion_physical_plan::sorts::sort_preserving_merge::SortPreservingMergeExec; use datafusion_physical_plan::tree_node::PlanContext; use datafusion_physical_plan::windows::{ - get_best_fitting_window, BoundedWindowAggExec, WindowAggExec, + BoundedWindowAggExec, WindowAggExec, get_best_fitting_window, }; use datafusion_physical_plan::{ExecutionPlan, ExecutionPlanProperties, InputOrderMode}; diff --git a/datafusion/physical-optimizer/src/enforce_sorting/replace_with_order_preserving_variants.rs b/datafusion/physical-optimizer/src/enforce_sorting/replace_with_order_preserving_variants.rs index 2c9303d7ea690..6ab84dc95eab9 100644 --- a/datafusion/physical-optimizer/src/enforce_sorting/replace_with_order_preserving_variants.rs +++ b/datafusion/physical-optimizer/src/enforce_sorting/replace_with_order_preserving_variants.rs @@ -27,13 +27,13 @@ use crate::utils::{ use datafusion_common::config::ConfigOptions; use datafusion_common::tree_node::Transformed; -use datafusion_common::{assert_or_internal_err, Result}; +use datafusion_common::{Result, assert_or_internal_err}; +use datafusion_physical_plan::ExecutionPlanProperties; use datafusion_physical_plan::coalesce_partitions::CoalescePartitionsExec; use datafusion_physical_plan::execution_plan::EmissionType; use datafusion_physical_plan::repartition::RepartitionExec; use datafusion_physical_plan::sorts::sort_preserving_merge::SortPreservingMergeExec; use datafusion_physical_plan::tree_node::PlanContext; -use datafusion_physical_plan::ExecutionPlanProperties; use itertools::izip; diff --git a/datafusion/physical-optimizer/src/enforce_sorting/sort_pushdown.rs b/datafusion/physical-optimizer/src/enforce_sorting/sort_pushdown.rs index 7c02b901169a7..698fdea8e766e 100644 --- a/datafusion/physical-optimizer/src/enforce_sorting/sort_pushdown.rs +++ b/datafusion/physical-optimizer/src/enforce_sorting/sort_pushdown.rs @@ -24,12 +24,12 @@ use crate::utils::{ use arrow::datatypes::SchemaRef; use datafusion_common::tree_node::{Transformed, TreeNode}; -use datafusion_common::{internal_err, HashSet, JoinSide, Result}; +use datafusion_common::{HashSet, JoinSide, Result, internal_err}; use datafusion_expr::JoinType; use datafusion_physical_expr::expressions::Column; use datafusion_physical_expr::utils::collect_columns; use datafusion_physical_expr::{ - add_offset_to_physical_sort_exprs, EquivalenceProperties, + EquivalenceProperties, add_offset_to_physical_sort_exprs, }; use datafusion_physical_expr_common::sort_expr::{ LexOrdering, LexRequirement, OrderingRequirements, PhysicalSortExpr, @@ -38,7 +38,7 @@ use datafusion_physical_expr_common::sort_expr::{ use datafusion_physical_plan::execution_plan::CardinalityEffect; use datafusion_physical_plan::filter::FilterExec; use datafusion_physical_plan::joins::utils::{ - calculate_join_output_ordering, ColumnIndex, + ColumnIndex, calculate_join_output_ordering, }; use datafusion_physical_plan::joins::{HashJoinExec, SortMergeJoinExec}; use datafusion_physical_plan::projection::ProjectionExec; diff --git a/datafusion/physical-optimizer/src/ensure_coop.rs b/datafusion/physical-optimizer/src/ensure_coop.rs index 0c0b63c0b3e79..6f6edcb94ff8e 100644 --- a/datafusion/physical-optimizer/src/ensure_coop.rs +++ b/datafusion/physical-optimizer/src/ensure_coop.rs @@ -25,12 +25,12 @@ use std::sync::Arc; use crate::PhysicalOptimizerRule; +use datafusion_common::Result; use datafusion_common::config::ConfigOptions; use datafusion_common::tree_node::{Transformed, TreeNode, TreeNodeRecursion}; -use datafusion_common::Result; +use datafusion_physical_plan::ExecutionPlan; use datafusion_physical_plan::coop::CooperativeExec; use datafusion_physical_plan::execution_plan::{EvaluationType, SchedulingType}; -use datafusion_physical_plan::ExecutionPlan; /// `EnsureCooperative` is a [`PhysicalOptimizerRule`] that inspects the physical plan for /// sub plans that do not participate in cooperative scheduling. The plan is subdivided into sub diff --git a/datafusion/physical-optimizer/src/filter_pushdown.rs b/datafusion/physical-optimizer/src/filter_pushdown.rs index 22cb03fc3e876..28f8155002a50 100644 --- a/datafusion/physical-optimizer/src/filter_pushdown.rs +++ b/datafusion/physical-optimizer/src/filter_pushdown.rs @@ -36,16 +36,16 @@ use std::sync::Arc; use crate::PhysicalOptimizerRule; use datafusion_common::tree_node::{TreeNode, TreeNodeRecursion}; -use datafusion_common::{assert_eq_or_internal_err, config::ConfigOptions, Result}; +use datafusion_common::{Result, assert_eq_or_internal_err, config::ConfigOptions}; use datafusion_physical_expr::PhysicalExpr; use datafusion_physical_expr_common::physical_expr::is_volatile; use datafusion_physical_plan::filter_pushdown::{ ChildFilterPushdownResult, ChildPushdownResult, FilterPushdownPhase, FilterPushdownPropagation, PushedDown, }; -use datafusion_physical_plan::{with_new_children_if_necessary, ExecutionPlan}; +use datafusion_physical_plan::{ExecutionPlan, with_new_children_if_necessary}; -use itertools::{izip, Itertools}; +use itertools::{Itertools, izip}; /// Attempts to recursively push given filters from the top of the tree into leaves. /// diff --git a/datafusion/physical-optimizer/src/join_selection.rs b/datafusion/physical-optimizer/src/join_selection.rs index 1ffe6dbca3c70..f837c79a4e391 100644 --- a/datafusion/physical-optimizer/src/join_selection.rs +++ b/datafusion/physical-optimizer/src/join_selection.rs @@ -27,10 +27,10 @@ use crate::PhysicalOptimizerRule; use datafusion_common::config::ConfigOptions; use datafusion_common::error::Result; use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode}; -use datafusion_common::{internal_err, JoinSide, JoinType}; +use datafusion_common::{JoinSide, JoinType, internal_err}; use datafusion_expr_common::sort_properties::SortProperties; -use datafusion_physical_expr::expressions::Column; use datafusion_physical_expr::LexOrdering; +use datafusion_physical_expr::expressions::Column; use datafusion_physical_plan::execution_plan::EmissionType; use datafusion_physical_plan::joins::utils::ColumnIndex; use datafusion_physical_plan::joins::{ @@ -481,19 +481,15 @@ pub fn hash_join_swap_subrule( mut input: Arc, _config_options: &ConfigOptions, ) -> Result> { - if let Some(hash_join) = input.as_any().downcast_ref::() { - if hash_join.left.boundedness().is_unbounded() - && !hash_join.right.boundedness().is_unbounded() - && matches!( - *hash_join.join_type(), - JoinType::Inner - | JoinType::Left - | JoinType::LeftSemi - | JoinType::LeftAnti - ) - { - input = swap_join_according_to_unboundedness(hash_join)?; - } + if let Some(hash_join) = input.as_any().downcast_ref::() + && hash_join.left.boundedness().is_unbounded() + && !hash_join.right.boundedness().is_unbounded() + && matches!( + *hash_join.join_type(), + JoinType::Inner | JoinType::Left | JoinType::LeftSemi | JoinType::LeftAnti + ) + { + input = swap_join_according_to_unboundedness(hash_join)?; } Ok(input) } diff --git a/datafusion/physical-optimizer/src/limit_pushdown_past_window.rs b/datafusion/physical-optimizer/src/limit_pushdown_past_window.rs index 1c671cd074886..c23fa4faef95f 100644 --- a/datafusion/physical-optimizer/src/limit_pushdown_past_window.rs +++ b/datafusion/physical-optimizer/src/limit_pushdown_past_window.rs @@ -16,9 +16,9 @@ // under the License. use crate::PhysicalOptimizerRule; +use datafusion_common::ScalarValue; use datafusion_common::config::ConfigOptions; use datafusion_common::tree_node::{Transformed, TreeNode}; -use datafusion_common::ScalarValue; use datafusion_expr::{LimitEffect, WindowFrameBound, WindowFrameUnits}; use datafusion_physical_expr::window::{ PlainAggregateWindowExpr, SlidingAggregateWindowExpr, StandardWindowExpr, @@ -113,10 +113,10 @@ impl PhysicalOptimizerRule for LimitPushPastWindows { } // Apply the limit if we hit a sortpreservingmerge node - if phase == Phase::Apply { - if let Some(out) = apply_limit(&node, &mut ctx) { - return Ok(out); - } + if phase == Phase::Apply + && let Some(out) = apply_limit(&node, &mut ctx) + { + return Ok(out); } // nodes along the way diff --git a/datafusion/physical-optimizer/src/limited_distinct_aggregation.rs b/datafusion/physical-optimizer/src/limited_distinct_aggregation.rs index 3666ff3798b67..671d247cf36a5 100644 --- a/datafusion/physical-optimizer/src/limited_distinct_aggregation.rs +++ b/datafusion/physical-optimizer/src/limited_distinct_aggregation.rs @@ -24,9 +24,9 @@ use datafusion_physical_plan::aggregates::AggregateExec; use datafusion_physical_plan::limit::{GlobalLimitExec, LocalLimitExec}; use datafusion_physical_plan::{ExecutionPlan, ExecutionPlanProperties}; +use datafusion_common::Result; use datafusion_common::config::ConfigOptions; use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode}; -use datafusion_common::Result; use crate::PhysicalOptimizerRule; use itertools::Itertools; @@ -113,17 +113,15 @@ impl LimitedDistinctAggregation { return Ok(Transformed::no(plan)); } if let Some(aggr) = plan.as_any().downcast_ref::() { - if found_match_aggr { - if let Some(parent_aggr) = + if found_match_aggr + && let Some(parent_aggr) = match_aggr.as_any().downcast_ref::() - { - if !parent_aggr.group_expr().eq(aggr.group_expr()) { - // a partial and final aggregation with different groupings disqualifies - // rewriting the child aggregation - rewrite_applicable = false; - return Ok(Transformed::no(plan)); - } - } + && !parent_aggr.group_expr().eq(aggr.group_expr()) + { + // a partial and final aggregation with different groupings disqualifies + // rewriting the child aggregation + rewrite_applicable = false; + return Ok(Transformed::no(plan)); } // either we run into an Aggregate and transform it, or disable the rewrite // for subsequent children diff --git a/datafusion/physical-optimizer/src/optimizer.rs b/datafusion/physical-optimizer/src/optimizer.rs index 03c83bb5a092a..f8e2e9950af3c 100644 --- a/datafusion/physical-optimizer/src/optimizer.rs +++ b/datafusion/physical-optimizer/src/optimizer.rs @@ -37,8 +37,8 @@ use crate::topk_aggregation::TopKAggregation; use crate::update_aggr_exprs::OptimizeAggregateOrder; use crate::limit_pushdown_past_window::LimitPushPastWindows; -use datafusion_common::config::ConfigOptions; use datafusion_common::Result; +use datafusion_common::config::ConfigOptions; use datafusion_physical_plan::ExecutionPlan; /// `PhysicalOptimizerRule` transforms one ['ExecutionPlan'] into another which diff --git a/datafusion/physical-optimizer/src/output_requirements.rs b/datafusion/physical-optimizer/src/output_requirements.rs index 9e5e980219767..0dc6a25fbc0b7 100644 --- a/datafusion/physical-optimizer/src/output_requirements.rs +++ b/datafusion/physical-optimizer/src/output_requirements.rs @@ -34,7 +34,7 @@ use datafusion_physical_expr::Distribution; use datafusion_physical_expr_common::sort_expr::OrderingRequirements; use datafusion_physical_plan::execution_plan::Boundedness; use datafusion_physical_plan::projection::{ - make_with_child, update_expr, update_ordering_requirement, ProjectionExec, + ProjectionExec, make_with_child, update_expr, update_ordering_requirement, }; use datafusion_physical_plan::sorts::sort::SortExec; use datafusion_physical_plan::sorts::sort_preserving_merge::SortPreservingMergeExec; diff --git a/datafusion/physical-optimizer/src/projection_pushdown.rs b/datafusion/physical-optimizer/src/projection_pushdown.rs index cb51c9167cd32..281d61aecf538 100644 --- a/datafusion/physical-optimizer/src/projection_pushdown.rs +++ b/datafusion/physical-optimizer/src/projection_pushdown.rs @@ -33,12 +33,12 @@ use datafusion_common::tree_node::{ use datafusion_common::{JoinSide, JoinType, Result}; use datafusion_physical_expr::expressions::Column; use datafusion_physical_expr_common::physical_expr::PhysicalExpr; -use datafusion_physical_plan::joins::utils::{ColumnIndex, JoinFilter}; +use datafusion_physical_plan::ExecutionPlan; use datafusion_physical_plan::joins::NestedLoopJoinExec; +use datafusion_physical_plan::joins::utils::{ColumnIndex, JoinFilter}; use datafusion_physical_plan::projection::{ - remove_unnecessary_projections, ProjectionExec, + ProjectionExec, remove_unnecessary_projections, }; -use datafusion_physical_plan::ExecutionPlan; /// This rule inspects `ProjectionExec`'s in the given physical plan and tries to /// remove or swap with its child. @@ -449,8 +449,8 @@ mod test { use arrow::datatypes::{DataType, Field, FieldRef, Schema}; use datafusion_expr_common::operator::Operator; use datafusion_functions::math::random; - use datafusion_physical_expr::expressions::{binary, lit}; use datafusion_physical_expr::ScalarFunctionExpr; + use datafusion_physical_expr::expressions::{binary, lit}; use datafusion_physical_expr_common::physical_expr::PhysicalExpr; use datafusion_physical_plan::displayable; use datafusion_physical_plan::empty::EmptyExec; diff --git a/datafusion/physical-optimizer/src/sanity_checker.rs b/datafusion/physical-optimizer/src/sanity_checker.rs index 23df6b4e5528c..f4845317386a4 100644 --- a/datafusion/physical-optimizer/src/sanity_checker.rs +++ b/datafusion/physical-optimizer/src/sanity_checker.rs @@ -32,7 +32,7 @@ use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode}; use datafusion_physical_expr::intervals::utils::{check_support, is_datatype_supported}; use datafusion_physical_plan::execution_plan::{Boundedness, EmissionType}; use datafusion_physical_plan::joins::SymmetricHashJoinExec; -use datafusion_physical_plan::{get_plan_string, ExecutionPlanProperties}; +use datafusion_physical_plan::{ExecutionPlanProperties, get_plan_string}; use crate::PhysicalOptimizerRule; use datafusion_physical_expr_common::sort_expr::format_physical_sort_requirement_list; @@ -78,13 +78,14 @@ pub fn check_finiteness_requirements( input: Arc, optimizer_options: &OptimizerOptions, ) -> Result>> { - if let Some(exec) = input.as_any().downcast_ref::() { - if !(optimizer_options.allow_symmetric_joins_without_pruning + if let Some(exec) = input.as_any().downcast_ref::() + && !(optimizer_options.allow_symmetric_joins_without_pruning || (exec.check_if_order_information_available()? && is_prunable(exec))) - { - return plan_err!("Join operation cannot operate on a non-prunable stream without enabling \ - the 'allow_symmetric_joins_without_pruning' configuration flag"); - } + { + return plan_err!( + "Join operation cannot operate on a non-prunable stream without enabling \ + the 'allow_symmetric_joins_without_pruning' configuration flag" + ); } if matches!( diff --git a/datafusion/physical-optimizer/src/topk_aggregation.rs b/datafusion/physical-optimizer/src/topk_aggregation.rs index b7505f0df4edb..7eb9e6a76211b 100644 --- a/datafusion/physical-optimizer/src/topk_aggregation.rs +++ b/datafusion/physical-optimizer/src/topk_aggregation.rs @@ -21,15 +21,15 @@ use std::sync::Arc; use crate::PhysicalOptimizerRule; use arrow::datatypes::DataType; +use datafusion_common::Result; use datafusion_common::config::ConfigOptions; use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode}; -use datafusion_common::Result; use datafusion_physical_expr::expressions::Column; +use datafusion_physical_plan::ExecutionPlan; use datafusion_physical_plan::aggregates::AggregateExec; use datafusion_physical_plan::execution_plan::CardinalityEffect; use datafusion_physical_plan::projection::ProjectionExec; use datafusion_physical_plan::sorts::sort::SortExec; -use datafusion_physical_plan::ExecutionPlan; use itertools::Itertools; /// An optimizer rule that passes a `limit` hint to aggregations if the whole result is not needed diff --git a/datafusion/physical-optimizer/src/update_aggr_exprs.rs b/datafusion/physical-optimizer/src/update_aggr_exprs.rs index c22a63c0c16c2..c0aab4080da77 100644 --- a/datafusion/physical-optimizer/src/update_aggr_exprs.rs +++ b/datafusion/physical-optimizer/src/update_aggr_exprs.rs @@ -22,10 +22,10 @@ use std::sync::Arc; use datafusion_common::config::ConfigOptions; use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode}; -use datafusion_common::{plan_datafusion_err, Result}; +use datafusion_common::{Result, plan_datafusion_err}; use datafusion_physical_expr::aggregate::AggregateFunctionExpr; use datafusion_physical_expr::{EquivalenceProperties, PhysicalSortRequirement}; -use datafusion_physical_plan::aggregates::{concat_slices, AggregateExec}; +use datafusion_physical_plan::aggregates::{AggregateExec, concat_slices}; use datafusion_physical_plan::windows::get_ordered_partition_by_indices; use datafusion_physical_plan::{ExecutionPlan, ExecutionPlanProperties}; diff --git a/datafusion/pruning/Cargo.toml b/datafusion/pruning/Cargo.toml index bd898cba202ba..97d9035d1e06b 100644 --- a/datafusion/pruning/Cargo.toml +++ b/datafusion/pruning/Cargo.toml @@ -3,7 +3,7 @@ name = "datafusion-pruning" description = "DataFusion Pruning Logic" readme = "README.md" version = { workspace = true } -edition = { workspace = true } +edition = "2024" homepage = { workspace = true } repository = { workspace = true } license = { workspace = true } diff --git a/datafusion/pruning/src/file_pruner.rs b/datafusion/pruning/src/file_pruner.rs index 1d923202b35fd..f850e0c0114fb 100644 --- a/datafusion/pruning/src/file_pruner.rs +++ b/datafusion/pruning/src/file_pruner.rs @@ -20,9 +20,9 @@ use std::sync::Arc; use arrow::datatypes::{FieldRef, SchemaRef}; -use datafusion_common::{internal_datafusion_err, pruning::PrunableStatistics, Result}; +use datafusion_common::{Result, internal_datafusion_err, pruning::PrunableStatistics}; use datafusion_datasource::PartitionedFile; -use datafusion_physical_expr_common::physical_expr::{snapshot_generation, PhysicalExpr}; +use datafusion_physical_expr_common::physical_expr::{PhysicalExpr, snapshot_generation}; use datafusion_physical_plan::metrics::Count; use log::debug; diff --git a/datafusion/pruning/src/lib.rs b/datafusion/pruning/src/lib.rs index c16d1bfd10ba3..be17f29eaafa0 100644 --- a/datafusion/pruning/src/lib.rs +++ b/datafusion/pruning/src/lib.rs @@ -22,6 +22,6 @@ mod pruning_predicate; pub use file_pruner::FilePruner; pub use pruning_predicate::{ - build_pruning_predicate, PredicateRewriter, PruningPredicate, PruningStatistics, - RequiredColumns, UnhandledPredicateHook, + PredicateRewriter, PruningPredicate, PruningStatistics, RequiredColumns, + UnhandledPredicateHook, build_pruning_predicate, }; diff --git a/datafusion/pruning/src/pruning_predicate.rs b/datafusion/pruning/src/pruning_predicate.rs index 2de8116cfeaee..32f2371f796e1 100644 --- a/datafusion/pruning/src/pruning_predicate.rs +++ b/datafusion/pruning/src/pruning_predicate.rs @@ -24,7 +24,7 @@ use std::sync::Arc; use arrow::array::AsArray; use arrow::{ - array::{new_null_array, ArrayRef, BooleanArray}, + array::{ArrayRef, BooleanArray, new_null_array}, datatypes::{DataType, Field, Schema, SchemaRef}, record_batch::{RecordBatch, RecordBatchOptions}, }; @@ -36,16 +36,15 @@ use log::{debug, trace}; use datafusion_common::error::Result; use datafusion_common::tree_node::{TransformedResult, TreeNodeRecursion}; -use datafusion_common::{assert_eq_or_internal_err, Column, DFSchema}; +use datafusion_common::{Column, DFSchema, assert_eq_or_internal_err}; use datafusion_common::{ - internal_datafusion_err, plan_datafusion_err, plan_err, + ScalarValue, internal_datafusion_err, plan_datafusion_err, plan_err, tree_node::{Transformed, TreeNode}, - ScalarValue, }; use datafusion_expr_common::operator::Operator; use datafusion_physical_expr::expressions::CastColumnExpr; use datafusion_physical_expr::utils::{Guarantee, LiteralGuarantee}; -use datafusion_physical_expr::{expressions as phys_expr, PhysicalExprRef}; +use datafusion_physical_expr::{PhysicalExprRef, expressions as phys_expr}; use datafusion_physical_expr_common::physical_expr::snapshot_physical_expr_opt; use datafusion_physical_plan::{ColumnarValue, PhysicalExpr}; @@ -999,14 +998,14 @@ impl<'a> PruningExpressionBuilder<'a> { (ColumnReferenceCount::One(_), ColumnReferenceCount::One(_)) => { // both sides have one column - not supported return plan_err!( - "Expression not supported for pruning: left has 1 column, right has 1 column" - ); + "Expression not supported for pruning: left has 1 column, right has 1 column" + ); } (ColumnReferenceCount::Zero, ColumnReferenceCount::Zero) => { // both sides are literals - should be handled before calling try_new return plan_err!( - "Pruning literal expressions is not supported, please call PhysicalExprSimplifier first" - ); + "Pruning literal expressions is not supported, please call PhysicalExprSimplifier first" + ); } (ColumnReferenceCount::Many, _) | (_, ColumnReferenceCount::Many) => { return plan_err!( @@ -1226,13 +1225,13 @@ fn verify_support_type_for_prune(from_type: &DataType, to_type: &DataType) -> Re // Dictionary casts are always supported as long as the value types are supported let from_type = match from_type { DataType::Dictionary(_, t) => { - return verify_support_type_for_prune(t.as_ref(), to_type) + return verify_support_type_for_prune(t.as_ref(), to_type); } _ => from_type, }; let to_type = match to_type { DataType::Dictionary(_, t) => { - return verify_support_type_for_prune(from_type, t.as_ref()) + return verify_support_type_for_prune(from_type, t.as_ref()); } _ => to_type, }; @@ -1255,10 +1254,10 @@ fn rewrite_column_expr( column_new: &phys_expr::Column, ) -> Result> { e.transform(|expr| { - if let Some(column) = expr.as_any().downcast_ref::() { - if column == column_old { - return Ok(Transformed::yes(Arc::new(column_new.clone()))); - } + if let Some(column) = expr.as_any().downcast_ref::() + && column == column_old + { + return Ok(Transformed::yes(Arc::new(column_new.clone()))); } Ok(Transformed::no(expr)) @@ -1904,13 +1903,13 @@ fn increment_utf8(data: &str) -> Option { let original = code_points[idx] as u32; // Try incrementing the code point - if let Some(next_char) = char::from_u32(original + 1) { - if is_valid_unicode(next_char) { - code_points[idx] = next_char; - // truncate the string to the current index - code_points.truncate(idx + 1); - return Some(code_points.into_iter().collect()); - } + if let Some(next_char) = char::from_u32(original + 1) + && is_valid_unicode(next_char) + { + code_points[idx] = next_char; + // truncate the string to the current index + code_points.truncate(idx + 1); + return Some(code_points.into_iter().collect()); } } @@ -1981,7 +1980,7 @@ mod tests { datatypes::TimeUnit, }; use datafusion_expr::expr::InList; - use datafusion_expr::{cast, is_null, try_cast, Expr}; + use datafusion_expr::{Expr, cast, is_null, try_cast}; use datafusion_functions_nested::expr_fn::{array_has, make_array}; use datafusion_physical_expr::expressions::{ self as phys_expr, DynamicFilterPhysicalExpr, @@ -3055,7 +3054,7 @@ mod tests { test_build_predicate_expression(&expr, &schema, &mut required_columns); assert_eq!(predicate_expr.to_string(), expected_expr); println!("required_columns: {required_columns:#?}"); // for debugging assertions below - // c1 < 1 should add c1_min + // c1 < 1 should add c1_min let c1_min_field = Field::new("c1_min", DataType::Int32, false); assert_eq!( required_columns.columns[0], diff --git a/datafusion/spark/Cargo.toml b/datafusion/spark/Cargo.toml index 09959db41fe60..02d747df6deeb 100644 --- a/datafusion/spark/Cargo.toml +++ b/datafusion/spark/Cargo.toml @@ -24,7 +24,7 @@ repository = { workspace = true } authors = { workspace = true } readme = "README.md" license = { workspace = true } -edition = { workspace = true } +edition = "2024" [package.metadata.docs.rs] all-features = true diff --git a/datafusion/spark/benches/char.rs b/datafusion/spark/benches/char.rs index 02eab7630d070..b5f87857ae9c6 100644 --- a/datafusion/spark/benches/char.rs +++ b/datafusion/spark/benches/char.rs @@ -19,7 +19,7 @@ extern crate criterion; use arrow::datatypes::{DataType, Field}; use arrow::{array::PrimitiveArray, datatypes::Int64Type}; -use criterion::{criterion_group, criterion_main, Criterion}; +use criterion::{Criterion, criterion_group, criterion_main}; use datafusion_common::config::ConfigOptions; use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use datafusion_spark::function::string::char; diff --git a/datafusion/spark/src/function/aggregate/avg.rs b/datafusion/spark/src/function/aggregate/avg.rs index 4a7adc515bbc4..bbcda9b0f8c7f 100644 --- a/datafusion/spark/src/function/aggregate/avg.rs +++ b/datafusion/spark/src/function/aggregate/avg.rs @@ -16,15 +16,15 @@ // under the License. use arrow::array::{ + Array, ArrayRef, ArrowNativeTypeOp, ArrowNumericType, Int64Array, PrimitiveArray, builder::PrimitiveBuilder, cast::AsArray, types::{Float64Type, Int64Type}, - Array, ArrayRef, ArrowNativeTypeOp, ArrowNumericType, Int64Array, PrimitiveArray, }; use arrow::compute::sum; use arrow::datatypes::{DataType, Field, FieldRef}; -use datafusion_common::types::{logical_float64, NativeType}; -use datafusion_common::{not_impl_err, Result, ScalarValue}; +use datafusion_common::types::{NativeType, logical_float64}; +use datafusion_common::{Result, ScalarValue, not_impl_err}; use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs}; use datafusion_expr::utils::format_state_name; use datafusion_expr::{ diff --git a/datafusion/spark/src/function/array/shuffle.rs b/datafusion/spark/src/function/array/shuffle.rs index 9fb37a4e78035..eaeff6538c328 100644 --- a/datafusion/spark/src/function/array/shuffle.rs +++ b/datafusion/spark/src/function/array/shuffle.rs @@ -27,7 +27,7 @@ use datafusion_common::cast::{ as_fixed_size_list_array, as_large_list_array, as_list_array, }; use datafusion_common::{ - exec_err, internal_err, utils::take_function_args, Result, ScalarValue, + Result, ScalarValue, exec_err, internal_err, utils::take_function_args, }; use datafusion_expr::{ ArrayFunctionArgument, ArrayFunctionSignature, ColumnarValue, ScalarUDFImpl, @@ -35,7 +35,7 @@ use datafusion_expr::{ }; use rand::rng; use rand::rngs::StdRng; -use rand::{seq::SliceRandom, Rng, SeedableRng}; +use rand::{Rng, SeedableRng, seq::SliceRandom}; use std::any::Any; use std::sync::Arc; diff --git a/datafusion/spark/src/function/array/spark_array.rs b/datafusion/spark/src/function/array/spark_array.rs index 67a307bbe9008..6d9f9a1695e1b 100644 --- a/datafusion/spark/src/function/array/spark_array.rs +++ b/datafusion/spark/src/function/array/spark_array.rs @@ -17,10 +17,10 @@ use std::{any::Any, sync::Arc}; -use arrow::array::{new_null_array, Array, ArrayRef}; +use arrow::array::{Array, ArrayRef, new_null_array}; use arrow::datatypes::{DataType, Field, FieldRef}; use datafusion_common::utils::SingleRowListArrayBuilder; -use datafusion_common::{internal_err, Result}; +use datafusion_common::{Result, internal_err}; use datafusion_expr::{ ColumnarValue, ReturnFieldArgs, ScalarFunctionArgs, ScalarUDFImpl, Signature, TypeSignature, Volatility, diff --git a/datafusion/spark/src/function/bitmap/bitmap_count.rs b/datafusion/spark/src/function/bitmap/bitmap_count.rs index 93a33f106b38b..e59bc5f529317 100644 --- a/datafusion/spark/src/function/bitmap/bitmap_count.rs +++ b/datafusion/spark/src/function/bitmap/bitmap_count.rs @@ -19,15 +19,15 @@ use std::any::Any; use std::sync::Arc; use arrow::array::{ - as_dictionary_array, Array, ArrayRef, BinaryArray, BinaryViewArray, - FixedSizeBinaryArray, Int64Array, LargeBinaryArray, + Array, ArrayRef, BinaryArray, BinaryViewArray, FixedSizeBinaryArray, Int64Array, + LargeBinaryArray, as_dictionary_array, }; use arrow::datatypes::DataType::{ Binary, BinaryView, Dictionary, FixedSizeBinary, LargeBinary, }; -use arrow::datatypes::{DataType, FieldRef, Int16Type, Int32Type, Int64Type, Int8Type}; +use arrow::datatypes::{DataType, FieldRef, Int8Type, Int16Type, Int32Type, Int64Type}; use datafusion_common::utils::take_function_args; -use datafusion_common::{internal_err, Result}; +use datafusion_common::{Result, internal_err}; use datafusion_expr::{ Coercion, ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, TypeSignatureClass, Volatility, @@ -218,12 +218,17 @@ mod tests { Box::new(ScalarValue::Binary(Some(vec![0xFFu8, 0xFFu8]))), )); - let arg_fields = vec![Field::new( - "a", - DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Binary)), - true, - ) - .into()]; + let arg_fields = vec![ + Field::new( + "a", + DataType::Dictionary( + Box::new(DataType::Int32), + Box::new(DataType::Binary), + ), + true, + ) + .into(), + ]; let args = ScalarFunctionArgs { args: vec![dict.clone()], arg_fields, diff --git a/datafusion/spark/src/function/bitwise/bit_count.rs b/datafusion/spark/src/function/bitwise/bit_count.rs index 30da585813d01..00170293dc391 100644 --- a/datafusion/spark/src/function/bitwise/bit_count.rs +++ b/datafusion/spark/src/function/bitwise/bit_count.rs @@ -20,11 +20,11 @@ use std::sync::Arc; use arrow::array::{ArrayRef, AsArray, Int32Array}; use arrow::datatypes::{ - DataType, FieldRef, Int16Type, Int32Type, Int64Type, Int8Type, UInt16Type, - UInt32Type, UInt64Type, UInt8Type, + DataType, FieldRef, Int8Type, Int16Type, Int32Type, Int64Type, UInt8Type, UInt16Type, + UInt32Type, UInt64Type, }; use datafusion_common::cast::as_boolean_array; -use datafusion_common::{internal_err, plan_err, Result}; +use datafusion_common::{Result, internal_err, plan_err}; use datafusion_expr::{ ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, TypeSignature, Volatility, @@ -173,8 +173,8 @@ fn spark_bit_count(value_array: &[ArrayRef]) -> Result { mod tests { use super::*; use arrow::array::{ - Array, BooleanArray, Int16Array, Int32Array, Int64Array, Int8Array, UInt16Array, - UInt32Array, UInt64Array, UInt8Array, + Array, BooleanArray, Int8Array, Int16Array, Int32Array, Int64Array, UInt8Array, + UInt16Array, UInt32Array, UInt64Array, }; use arrow::datatypes::{Field, Int32Type}; diff --git a/datafusion/spark/src/function/bitwise/bit_get.rs b/datafusion/spark/src/function/bitwise/bit_get.rs index f8980a697b38a..3343c6c61de0b 100644 --- a/datafusion/spark/src/function/bitwise/bit_get.rs +++ b/datafusion/spark/src/function/bitwise/bit_get.rs @@ -20,14 +20,14 @@ use std::mem::size_of; use std::sync::Arc; use arrow::array::{ - downcast_integer_array, Array, ArrayRef, ArrowPrimitiveType, AsArray, Int32Array, - Int8Array, PrimitiveArray, + Array, ArrayRef, ArrowPrimitiveType, AsArray, Int8Array, Int32Array, PrimitiveArray, + downcast_integer_array, }; use arrow::compute::try_binary; -use arrow::datatypes::{ArrowNativeType, DataType, Field, FieldRef, Int32Type, Int8Type}; -use datafusion_common::types::{logical_int32, NativeType}; +use arrow::datatypes::{ArrowNativeType, DataType, Field, FieldRef, Int8Type, Int32Type}; +use datafusion_common::types::{NativeType, logical_int32}; use datafusion_common::utils::take_function_args; -use datafusion_common::{internal_err, Result}; +use datafusion_common::{Result, internal_err}; use datafusion_expr::{ Coercion, ColumnarValue, ReturnFieldArgs, ScalarFunctionArgs, ScalarUDFImpl, Signature, TypeSignatureClass, Volatility, diff --git a/datafusion/spark/src/function/bitwise/bit_shift.rs b/datafusion/spark/src/function/bitwise/bit_shift.rs index ff7f7662ec40f..fc3df28e968a8 100644 --- a/datafusion/spark/src/function/bitwise/bit_shift.rs +++ b/datafusion/spark/src/function/bitwise/bit_shift.rs @@ -25,11 +25,11 @@ use arrow::datatypes::{ UInt64Type, }; use datafusion_common::types::{ - logical_int16, logical_int32, logical_int64, logical_int8, logical_uint16, - logical_uint32, logical_uint64, logical_uint8, NativeType, + NativeType, logical_int8, logical_int16, logical_int32, logical_int64, logical_uint8, + logical_uint16, logical_uint32, logical_uint64, }; use datafusion_common::utils::take_function_args; -use datafusion_common::{internal_err, Result}; +use datafusion_common::{Result, internal_err}; use datafusion_expr::{ Coercion, ColumnarValue, ReturnFieldArgs, ScalarFunctionArgs, ScalarUDFImpl, Signature, TypeSignature, TypeSignatureClass, Volatility, diff --git a/datafusion/spark/src/function/bitwise/bitwise_not.rs b/datafusion/spark/src/function/bitwise/bitwise_not.rs index 5710fa4fddbf2..5f8cf36911f43 100644 --- a/datafusion/spark/src/function/bitwise/bitwise_not.rs +++ b/datafusion/spark/src/function/bitwise/bitwise_not.rs @@ -18,9 +18,9 @@ use arrow::array::*; use arrow::compute::kernels::bitwise; use arrow::datatypes::{ - DataType, Field, FieldRef, Int16Type, Int32Type, Int64Type, Int8Type, + DataType, Field, FieldRef, Int8Type, Int16Type, Int32Type, Int64Type, }; -use datafusion_common::{internal_err, plan_err, Result}; +use datafusion_common::{Result, internal_err, plan_err}; use datafusion_expr::{ColumnarValue, TypeSignature, Volatility}; use datafusion_expr::{ReturnFieldArgs, ScalarFunctionArgs, ScalarUDFImpl, Signature}; use datafusion_functions::utils::make_scalar_function; diff --git a/datafusion/spark/src/function/conditional/if.rs b/datafusion/spark/src/function/conditional/if.rs index aee43dd8d0a58..906b0bc312f2f 100644 --- a/datafusion/spark/src/function/conditional/if.rs +++ b/datafusion/spark/src/function/conditional/if.rs @@ -16,10 +16,10 @@ // under the License. use arrow::datatypes::DataType; -use datafusion_common::{internal_err, plan_err, Result}; +use datafusion_common::{Result, internal_err, plan_err}; use datafusion_expr::{ - binary::try_type_union_resolution, simplify::ExprSimplifyResult, when, ColumnarValue, - Expr, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility, + ColumnarValue, Expr, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility, + binary::try_type_union_resolution, simplify::ExprSimplifyResult, when, }; #[derive(Debug, PartialEq, Eq, Hash)] diff --git a/datafusion/spark/src/function/datetime/date_add.rs b/datafusion/spark/src/function/datetime/date_add.rs index 457d4d476dce3..0c44233224573 100644 --- a/datafusion/spark/src/function/datetime/date_add.rs +++ b/datafusion/spark/src/function/datetime/date_add.rs @@ -23,10 +23,10 @@ use arrow::compute; use arrow::datatypes::{DataType, Date32Type}; use arrow::error::ArrowError; use datafusion_common::cast::{ - as_date32_array, as_int16_array, as_int32_array, as_int8_array, + as_date32_array, as_int8_array, as_int16_array, as_int32_array, }; use datafusion_common::utils::take_function_args; -use datafusion_common::{internal_err, Result}; +use datafusion_common::{Result, internal_err}; use datafusion_expr::{ ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, TypeSignature, Volatility, diff --git a/datafusion/spark/src/function/datetime/date_sub.rs b/datafusion/spark/src/function/datetime/date_sub.rs index df406ba7ef104..7e56670f17d22 100644 --- a/datafusion/spark/src/function/datetime/date_sub.rs +++ b/datafusion/spark/src/function/datetime/date_sub.rs @@ -23,9 +23,9 @@ use arrow::compute; use arrow::datatypes::{DataType, Date32Type, Field, FieldRef}; use arrow::error::ArrowError; use datafusion_common::cast::{ - as_date32_array, as_int16_array, as_int32_array, as_int8_array, + as_date32_array, as_int8_array, as_int16_array, as_int32_array, }; -use datafusion_common::{internal_err, Result}; +use datafusion_common::{Result, internal_err}; use datafusion_expr::{ ColumnarValue, ReturnFieldArgs, ScalarFunctionArgs, ScalarUDFImpl, Signature, TypeSignature, Volatility, diff --git a/datafusion/spark/src/function/datetime/last_day.rs b/datafusion/spark/src/function/datetime/last_day.rs index d71237d6da098..40834ec345df5 100644 --- a/datafusion/spark/src/function/datetime/last_day.rs +++ b/datafusion/spark/src/function/datetime/last_day.rs @@ -22,7 +22,7 @@ use arrow::array::{ArrayRef, AsArray, Date32Array}; use arrow::datatypes::{DataType, Date32Type, Field, FieldRef}; use chrono::{Datelike, Duration, NaiveDate}; use datafusion_common::utils::take_function_args; -use datafusion_common::{exec_datafusion_err, internal_err, Result, ScalarValue}; +use datafusion_common::{Result, ScalarValue, exec_datafusion_err, internal_err}; use datafusion_expr::{ ColumnarValue, ReturnFieldArgs, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility, @@ -99,7 +99,9 @@ impl ScalarUDFImpl for SparkLastDay { Ok(Arc::new(result) as ArrayRef) } other => { - internal_err!("Unsupported data type {other:?} for Spark function `last_day`") + internal_err!( + "Unsupported data type {other:?} for Spark function `last_day`" + ) } }?; Ok(ColumnarValue::Array(result)) diff --git a/datafusion/spark/src/function/datetime/make_dt_interval.rs b/datafusion/spark/src/function/datetime/make_dt_interval.rs index f3630df467043..f00b4c5804eca 100644 --- a/datafusion/spark/src/function/datetime/make_dt_interval.rs +++ b/datafusion/spark/src/function/datetime/make_dt_interval.rs @@ -23,9 +23,9 @@ use arrow::array::{ }; use arrow::datatypes::TimeUnit::Microsecond; use arrow::datatypes::{DataType, Field, FieldRef, Float64Type, Int32Type}; -use datafusion_common::types::{logical_float64, logical_int32, NativeType}; +use datafusion_common::types::{NativeType, logical_float64, logical_int32}; use datafusion_common::{ - internal_err, plan_datafusion_err, DataFusionError, Result, ScalarValue, + DataFusionError, Result, ScalarValue, internal_err, plan_datafusion_err, }; use datafusion_expr::{ Coercion, ColumnarValue, ReturnFieldArgs, ScalarFunctionArgs, ScalarUDFImpl, @@ -249,7 +249,7 @@ mod tests { use arrow::array::{DurationMicrosecondArray, Float64Array, Int32Array}; use arrow::datatypes::DataType::Duration; use arrow::datatypes::{DataType, Field, TimeUnit::Microsecond}; - use datafusion_common::{internal_datafusion_err, DataFusionError, Result}; + use datafusion_common::{DataFusionError, Result, internal_datafusion_err}; use datafusion_expr::{ColumnarValue, ReturnFieldArgs, ScalarFunctionArgs}; use super::*; diff --git a/datafusion/spark/src/function/datetime/make_interval.rs b/datafusion/spark/src/function/datetime/make_interval.rs index 7e358d552246e..e4dd541793048 100644 --- a/datafusion/spark/src/function/datetime/make_interval.rs +++ b/datafusion/spark/src/function/datetime/make_interval.rs @@ -22,8 +22,8 @@ use arrow::array::{Array, ArrayRef, IntervalMonthDayNanoBuilder, PrimitiveArray} use arrow::datatypes::DataType::Interval; use arrow::datatypes::IntervalUnit::MonthDayNano; use arrow::datatypes::{DataType, IntervalMonthDayNano}; -use datafusion_common::types::{logical_float64, logical_int32, NativeType}; -use datafusion_common::{plan_datafusion_err, DataFusionError, Result, ScalarValue}; +use datafusion_common::types::{NativeType, logical_float64, logical_int32}; +use datafusion_common::{DataFusionError, Result, ScalarValue, plan_datafusion_err}; use datafusion_expr::{ Coercion, ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, TypeSignature, TypeSignatureClass, Volatility, @@ -274,7 +274,7 @@ mod tests { use arrow::datatypes::Field; use datafusion_common::config::ConfigOptions; use datafusion_common::{ - assert_eq_or_internal_err, internal_datafusion_err, internal_err, Result, + Result, assert_eq_or_internal_err, internal_datafusion_err, internal_err, }; use super::*; diff --git a/datafusion/spark/src/function/datetime/next_day.rs b/datafusion/spark/src/function/datetime/next_day.rs index 32739f3e2c591..72a0c830ffb25 100644 --- a/datafusion/spark/src/function/datetime/next_day.rs +++ b/datafusion/spark/src/function/datetime/next_day.rs @@ -18,10 +18,10 @@ use std::any::Any; use std::sync::Arc; -use arrow::array::{new_null_array, ArrayRef, AsArray, Date32Array, StringArrayType}; +use arrow::array::{ArrayRef, AsArray, Date32Array, StringArrayType, new_null_array}; use arrow::datatypes::{DataType, Date32Type}; use chrono::{Datelike, Duration, Weekday}; -use datafusion_common::{exec_err, Result, ScalarValue}; +use datafusion_common::{Result, ScalarValue, exec_err}; use datafusion_expr::{ ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility, }; @@ -78,7 +78,12 @@ impl ScalarUDFImpl for SparkNextDay { match (date, day_of_week) { (ColumnarValue::Scalar(date), ColumnarValue::Scalar(day_of_week)) => { match (date, day_of_week) { - (ScalarValue::Date32(days), ScalarValue::Utf8(day_of_week) | ScalarValue::LargeUtf8(day_of_week) | ScalarValue::Utf8View(day_of_week)) => { + ( + ScalarValue::Date32(days), + ScalarValue::Utf8(day_of_week) + | ScalarValue::LargeUtf8(day_of_week) + | ScalarValue::Utf8View(day_of_week), + ) => { if let Some(days) = days { if let Some(day_of_week) = day_of_week { Ok(ColumnarValue::Scalar(ScalarValue::Date32( @@ -93,25 +98,39 @@ impl ScalarUDFImpl for SparkNextDay { Ok(ColumnarValue::Scalar(ScalarValue::Date32(None))) } } - _ => exec_err!("Spark `next_day` function: first arg must be date, second arg must be string. Got {args:?}"), + _ => exec_err!( + "Spark `next_day` function: first arg must be date, second arg must be string. Got {args:?}" + ), } } (ColumnarValue::Array(date_array), ColumnarValue::Scalar(day_of_week)) => { match (date_array.data_type(), day_of_week) { - (DataType::Date32, ScalarValue::Utf8(day_of_week) | ScalarValue::LargeUtf8(day_of_week) | ScalarValue::Utf8View(day_of_week)) => { + ( + DataType::Date32, + ScalarValue::Utf8(day_of_week) + | ScalarValue::LargeUtf8(day_of_week) + | ScalarValue::Utf8View(day_of_week), + ) => { if let Some(day_of_week) = day_of_week { let result: Date32Array = date_array .as_primitive::() - .unary_opt(|days| spark_next_day(days, day_of_week.as_str())) + .unary_opt(|days| { + spark_next_day(days, day_of_week.as_str()) + }) .with_data_type(DataType::Date32); Ok(ColumnarValue::Array(Arc::new(result) as ArrayRef)) } else { // TODO: if spark.sql.ansi.enabled is false, // returns NULL instead of an error for a malformed dayOfWeek. - Ok(ColumnarValue::Array(Arc::new(new_null_array(&DataType::Date32, date_array.len())))) + Ok(ColumnarValue::Array(Arc::new(new_null_array( + &DataType::Date32, + date_array.len(), + )))) } } - _ => exec_err!("Spark `next_day` function: first arg must be date, second arg must be string. Got {args:?}"), + _ => exec_err!( + "Spark `next_day` function: first arg must be date, second arg must be string. Got {args:?}" + ), } } ( @@ -143,7 +162,9 @@ impl ScalarUDFImpl for SparkNextDay { process_next_day_arrays(date_array, day_of_week_array) } other => { - exec_err!("Spark `next_day` function: second arg must be string. Got {other:?}") + exec_err!( + "Spark `next_day` function: second arg must be string. Got {other:?}" + ) } } } diff --git a/datafusion/spark/src/function/error_utils.rs b/datafusion/spark/src/function/error_utils.rs index b972d64ed3e9a..362a32bcd0cc2 100644 --- a/datafusion/spark/src/function/error_utils.rs +++ b/datafusion/spark/src/function/error_utils.rs @@ -18,7 +18,7 @@ // TODO: https://github.com/apache/spark/tree/master/common/utils/src/main/resources/error use arrow::datatypes::DataType; -use datafusion_common::{exec_datafusion_err, internal_datafusion_err, DataFusionError}; +use datafusion_common::{DataFusionError, exec_datafusion_err, internal_datafusion_err}; pub fn invalid_arg_count_exec_err( function_name: &str, @@ -44,7 +44,9 @@ pub fn unsupported_data_type_exec_err( required: &str, provided: &DataType, ) -> DataFusionError { - exec_datafusion_err!("Unsupported Data Type: Spark `{function_name}` function expects {required}, got {provided}") + exec_datafusion_err!( + "Unsupported Data Type: Spark `{function_name}` function expects {required}, got {provided}" + ) } pub fn unsupported_data_types_exec_err( diff --git a/datafusion/spark/src/function/hash/crc32.rs b/datafusion/spark/src/function/hash/crc32.rs index 8280e24b8ef59..714aea0aeb4bb 100644 --- a/datafusion/spark/src/function/hash/crc32.rs +++ b/datafusion/spark/src/function/hash/crc32.rs @@ -25,9 +25,9 @@ use datafusion_common::cast::{ as_binary_array, as_binary_view_array, as_fixed_size_binary_array, as_large_binary_array, }; -use datafusion_common::types::{logical_string, NativeType}; +use datafusion_common::types::{NativeType, logical_string}; use datafusion_common::utils::take_function_args; -use datafusion_common::{internal_err, Result}; +use datafusion_common::{Result, internal_err}; use datafusion_expr::{ Coercion, ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, TypeSignatureClass, Volatility, diff --git a/datafusion/spark/src/function/hash/sha1.rs b/datafusion/spark/src/function/hash/sha1.rs index 45530894724b0..a78ac6e509091 100644 --- a/datafusion/spark/src/function/hash/sha1.rs +++ b/datafusion/spark/src/function/hash/sha1.rs @@ -25,9 +25,9 @@ use datafusion_common::cast::{ as_binary_array, as_binary_view_array, as_fixed_size_binary_array, as_large_binary_array, }; -use datafusion_common::types::{logical_string, NativeType}; +use datafusion_common::types::{NativeType, logical_string}; use datafusion_common::utils::take_function_args; -use datafusion_common::{internal_err, Result}; +use datafusion_common::{Result, internal_err}; use datafusion_expr::{ Coercion, ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, TypeSignatureClass, Volatility, diff --git a/datafusion/spark/src/function/hash/sha2.rs b/datafusion/spark/src/function/hash/sha2.rs index b006607d3eeda..1f17275062778 100644 --- a/datafusion/spark/src/function/hash/sha2.rs +++ b/datafusion/spark/src/function/hash/sha2.rs @@ -23,7 +23,7 @@ use crate::function::error_utils::{ use crate::function::math::hex::spark_sha2_hex; use arrow::array::{ArrayRef, AsArray, StringArray}; use arrow::datatypes::{DataType, Int32Type}; -use datafusion_common::{exec_err, internal_datafusion_err, Result, ScalarValue}; +use datafusion_common::{Result, ScalarValue, exec_err, internal_datafusion_err}; use datafusion_expr::Signature; use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Volatility}; pub use datafusion_functions::crypto::basic::{sha224, sha256, sha384, sha512}; @@ -81,7 +81,7 @@ impl ScalarUDFImpl for SparkSha2 { return exec_err!( "{} function can only accept strings or binary arrays.", self.name() - ) + ); } }) } @@ -138,17 +138,21 @@ impl ScalarUDFImpl for SparkSha2 { pub fn sha2(args: [ColumnarValue; 2]) -> Result { match args { - [ColumnarValue::Scalar(ScalarValue::Utf8(expr_arg)), ColumnarValue::Scalar(ScalarValue::Int32(Some(bit_length_arg)))] => { - compute_sha2( - bit_length_arg, - &[ColumnarValue::from(ScalarValue::Utf8(expr_arg))], - ) - } - [ColumnarValue::Array(expr_arg), ColumnarValue::Scalar(ScalarValue::Int32(Some(bit_length_arg)))] => { - compute_sha2(bit_length_arg, &[ColumnarValue::from(expr_arg)]) - } - [ColumnarValue::Scalar(ScalarValue::Utf8(expr_arg)), ColumnarValue::Array(bit_length_arg)] => - { + [ + ColumnarValue::Scalar(ScalarValue::Utf8(expr_arg)), + ColumnarValue::Scalar(ScalarValue::Int32(Some(bit_length_arg))), + ] => compute_sha2( + bit_length_arg, + &[ColumnarValue::from(ScalarValue::Utf8(expr_arg))], + ), + [ + ColumnarValue::Array(expr_arg), + ColumnarValue::Scalar(ScalarValue::Int32(Some(bit_length_arg))), + ] => compute_sha2(bit_length_arg, &[ColumnarValue::from(expr_arg)]), + [ + ColumnarValue::Scalar(ScalarValue::Utf8(expr_arg)), + ColumnarValue::Array(bit_length_arg), + ] => { let arr: StringArray = bit_length_arg .as_primitive::() .iter() @@ -171,7 +175,10 @@ pub fn sha2(args: [ColumnarValue; 2]) -> Result { .collect(); Ok(ColumnarValue::Array(Arc::new(arr) as ArrayRef)) } - [ColumnarValue::Array(expr_arg), ColumnarValue::Array(bit_length_arg)] => { + [ + ColumnarValue::Array(expr_arg), + ColumnarValue::Array(bit_length_arg), + ] => { let expr_iter = expr_arg.as_string::().iter(); let bit_length_iter = bit_length_arg.as_primitive::().iter(); let arr: StringArray = expr_iter diff --git a/datafusion/spark/src/function/map/map_from_arrays.rs b/datafusion/spark/src/function/map/map_from_arrays.rs index dc155616dd77b..f6ca02e2fe867 100644 --- a/datafusion/spark/src/function/map/map_from_arrays.rs +++ b/datafusion/spark/src/function/map/map_from_arrays.rs @@ -25,7 +25,7 @@ use arrow::array::{Array, ArrayRef, NullArray}; use arrow::compute::kernels::cast; use arrow::datatypes::{DataType, Field, FieldRef}; use datafusion_common::utils::take_function_args; -use datafusion_common::{internal_err, Result}; +use datafusion_common::{Result, internal_err}; use datafusion_expr::{ ColumnarValue, ReturnFieldArgs, ScalarUDFImpl, Signature, Volatility, }; diff --git a/datafusion/spark/src/function/map/map_from_entries.rs b/datafusion/spark/src/function/map/map_from_entries.rs index 584aad0ffa0bd..6b2114863d11f 100644 --- a/datafusion/spark/src/function/map/map_from_entries.rs +++ b/datafusion/spark/src/function/map/map_from_entries.rs @@ -26,7 +26,7 @@ use arrow::array::{Array, ArrayRef, NullBufferBuilder, StructArray}; use arrow::buffer::NullBuffer; use arrow::datatypes::{DataType, Field, FieldRef}; use datafusion_common::utils::take_function_args; -use datafusion_common::{exec_err, internal_err, Result}; +use datafusion_common::{Result, exec_err, internal_err}; use datafusion_expr::{ ColumnarValue, ReturnFieldArgs, ScalarUDFImpl, Signature, Volatility, }; diff --git a/datafusion/spark/src/function/map/utils.rs b/datafusion/spark/src/function/map/utils.rs index b568f45403c30..1a25ffb295687 100644 --- a/datafusion/spark/src/function/map/utils.rs +++ b/datafusion/spark/src/function/map/utils.rs @@ -23,7 +23,7 @@ use arrow::array::{Array, ArrayRef, AsArray, BooleanBuilder, MapArray, StructArr use arrow::buffer::{NullBuffer, OffsetBuffer}; use arrow::compute::filter; use arrow::datatypes::{DataType, Field, Fields}; -use datafusion_common::{exec_err, Result, ScalarValue}; +use datafusion_common::{Result, ScalarValue, exec_err}; /// Helper function to get element [`DataType`] /// from [`List`](DataType::List)/[`LargeList`](DataType::LargeList)/[`FixedSizeList`](DataType::FixedSizeList)
@@ -64,14 +64,15 @@ pub fn get_list_offsets(array: &ArrayRef) -> Result> { match array.data_type() { DataType::List(_) => Ok(Cow::Borrowed(array.as_list::().offsets().as_ref())), DataType::LargeList(_) => Ok(Cow::Owned( - array.as_list::() + array + .as_list::() .offsets() .iter() .map(|i| *i as i32) .collect::>(), )), DataType::FixedSizeList(_, size) => Ok(Cow::Owned( - (0..=array.len() as i32).map(|i| size * i).collect() + (0..=array.len() as i32).map(|i| size * i).collect(), )), wrong_type => exec_err!( "get_list_offsets expects List/LargeList/FixedSizeList as argument, got {wrong_type:?}" @@ -188,7 +189,9 @@ fn map_deduplicate_keys( if key_is_valid && value_is_valid { if num_keys_entries != num_values_entries { - return exec_err!("map_deduplicate_keys: keys and values lists in the same row must have equal lengths"); + return exec_err!( + "map_deduplicate_keys: keys and values lists in the same row must have equal lengths" + ); } else if num_keys_entries != 0 { let mut seen_keys = HashSet::new(); diff --git a/datafusion/spark/src/function/math/abs.rs b/datafusion/spark/src/function/math/abs.rs index f48f8964c28c9..97703937f39f2 100644 --- a/datafusion/spark/src/function/math/abs.rs +++ b/datafusion/spark/src/function/math/abs.rs @@ -17,7 +17,7 @@ use arrow::array::*; use arrow::datatypes::DataType; -use datafusion_common::{internal_err, DataFusionError, Result, ScalarValue}; +use datafusion_common::{DataFusionError, Result, ScalarValue, internal_err}; use datafusion_expr::{ ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility, }; diff --git a/datafusion/spark/src/function/math/factorial.rs b/datafusion/spark/src/function/math/factorial.rs index 5cf33d6073e53..439e79a9dd8b2 100644 --- a/datafusion/spark/src/function/math/factorial.rs +++ b/datafusion/spark/src/function/math/factorial.rs @@ -23,7 +23,7 @@ use arrow::datatypes::DataType; use arrow::datatypes::DataType::{Int32, Int64}; use datafusion_common::cast::as_int32_array; use datafusion_common::{ - exec_err, utils::take_function_args, DataFusionError, Result, ScalarValue, + DataFusionError, Result, ScalarValue, exec_err, utils::take_function_args, }; use datafusion_expr::Signature; use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Volatility}; @@ -136,8 +136,8 @@ fn compute_factorial(num: Option) -> Option { mod test { use crate::function::math::factorial::spark_factorial; use arrow::array::{Int32Array, Int64Array}; - use datafusion_common::cast::as_int64_array; use datafusion_common::ScalarValue; + use datafusion_common::cast::as_int64_array; use datafusion_expr::ColumnarValue; use std::sync::Arc; diff --git a/datafusion/spark/src/function/math/hex.rs b/datafusion/spark/src/function/math/hex.rs index c25a23fcfd741..dbbea17db5bfa 100644 --- a/datafusion/spark/src/function/math/hex.rs +++ b/datafusion/spark/src/function/math/hex.rs @@ -26,11 +26,12 @@ use arrow::{ }; use datafusion_common::cast::as_large_binary_array; use datafusion_common::cast::as_string_view_array; -use datafusion_common::types::{logical_int64, logical_string, NativeType}; +use datafusion_common::types::{NativeType, logical_int64, logical_string}; use datafusion_common::utils::take_function_args; use datafusion_common::{ + DataFusionError, cast::{as_binary_array, as_fixed_size_binary_array, as_int64_array}, - exec_err, DataFusionError, + exec_err, }; use datafusion_expr::{ Coercion, ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, TypeSignature, @@ -275,8 +276,8 @@ mod test { use arrow::array::{Int64Array, StringArray}; use arrow::{ array::{ - as_string_array, BinaryDictionaryBuilder, PrimitiveDictionaryBuilder, - StringBuilder, StringDictionaryBuilder, + BinaryDictionaryBuilder, PrimitiveDictionaryBuilder, StringBuilder, + StringDictionaryBuilder, as_string_array, }, datatypes::{Int32Type, Int64Type}, }; diff --git a/datafusion/spark/src/function/math/mod.rs b/datafusion/spark/src/function/math/mod.rs index 74fa4cf37ca55..1422eb250d939 100644 --- a/datafusion/spark/src/function/math/mod.rs +++ b/datafusion/spark/src/function/math/mod.rs @@ -52,7 +52,11 @@ pub mod expr_fn { export_functions!((hex, "Computes hex value of the given column.", arg1)); export_functions!((modulus, "Returns the remainder of division of the first argument by the second argument.", arg1 arg2)); export_functions!((pmod, "Returns the positive remainder of division of the first argument by the second argument.", arg1 arg2)); - export_functions!((rint, "Returns the double value that is closest in value to the argument and is equal to a mathematical integer.", arg1)); + export_functions!(( + rint, + "Returns the double value that is closest in value to the argument and is equal to a mathematical integer.", + arg1 + )); export_functions!((width_bucket, "Returns the bucket number into which the value of this expression would fall after being evaluated.", arg1 arg2 arg3 arg4)); export_functions!((csc, "Returns the cosecant of expr.", arg1)); export_functions!((sec, "Returns the secant of expr.", arg1)); diff --git a/datafusion/spark/src/function/math/modulus.rs b/datafusion/spark/src/function/math/modulus.rs index 60d45baa7f380..49657e2cb8cee 100644 --- a/datafusion/spark/src/function/math/modulus.rs +++ b/datafusion/spark/src/function/math/modulus.rs @@ -18,7 +18,7 @@ use arrow::compute::kernels::numeric::add; use arrow::compute::kernels::{cmp::lt, numeric::rem, zip::zip}; use arrow::datatypes::DataType; -use datafusion_common::{assert_eq_or_internal_err, Result, ScalarValue}; +use datafusion_common::{Result, ScalarValue, assert_eq_or_internal_err}; use datafusion_expr::{ ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility, }; @@ -239,7 +239,7 @@ mod test { assert!((result_float64.value(0) - 1.5).abs() < f64::EPSILON); // 10.5 % 3.0 = 1.5 assert!((result_float64.value(1) - 2.2).abs() < f64::EPSILON); // 7.2 % 2.5 = 2.2 assert!((result_float64.value(2) - 3.2).abs() < f64::EPSILON); // 15.8 % 4.2 = 3.2 - // nan % 2.0 = nan + // nan % 2.0 = nan assert!(result_float64.value(3).is_nan()); // inf % 2.0 = nan (IEEE 754) assert!(result_float64.value(4).is_nan()); @@ -295,7 +295,7 @@ mod test { assert!((result_float32.value(0) - 1.5).abs() < f32::EPSILON); // 10.5 % 3.0 = 1.5 assert!((result_float32.value(1) - 2.2).abs() < f32::EPSILON * 3.0); // 7.2 % 2.5 = 2.2 assert!((result_float32.value(2) - 3.2).abs() < f32::EPSILON * 10.0); // 15.8 % 4.2 = 3.2 - // nan % 2.0 = nan + // nan % 2.0 = nan assert!(result_float32.value(3).is_nan()); // inf % 2.0 = nan (IEEE 754) assert!(result_float32.value(4).is_nan()); @@ -437,7 +437,7 @@ mod test { assert!((result_float64.value(1) - 1.8).abs() < f64::EPSILON * 3.0); // -7.2 pmod 3.0 = 1.8 (positive) assert!((result_float64.value(2) - 3.2).abs() < f64::EPSILON * 3.0); // 15.8 pmod 4.2 = 3.2 assert!((result_float64.value(3) - 1.0).abs() < f64::EPSILON * 3.0); // -15.8 pmod 4.2 = 1.0 (positive) - // nan pmod 2.0 = nan + // nan pmod 2.0 = nan assert!(result_float64.value(4).is_nan()); // inf pmod 2.0 = nan (IEEE 754) assert!(result_float64.value(5).is_nan()); @@ -488,7 +488,7 @@ mod test { assert!((result_float32.value(1) - 1.8).abs() < f32::EPSILON * 3.0); // -7.2 pmod 3.0 = 1.8 (positive) assert!((result_float32.value(2) - 3.2).abs() < f32::EPSILON * 10.0); // 15.8 pmod 4.2 = 3.2 assert!((result_float32.value(3) - 1.0).abs() < f32::EPSILON * 10.0); // -15.8 pmod 4.2 = 1.0 (positive) - // nan pmod 2.0 = nan + // nan pmod 2.0 = nan assert!(result_float32.value(4).is_nan()); // inf pmod 2.0 = nan (IEEE 754) assert!(result_float32.value(5).is_nan()); diff --git a/datafusion/spark/src/function/math/rint.rs b/datafusion/spark/src/function/math/rint.rs index 3271be38f8338..ae1a25110ac89 100644 --- a/datafusion/spark/src/function/math/rint.rs +++ b/datafusion/spark/src/function/math/rint.rs @@ -21,10 +21,10 @@ use std::sync::Arc; use arrow::array::{Array, ArrayRef, AsArray}; use arrow::compute::cast; use arrow::datatypes::DataType::{ - Float32, Float64, Int16, Int32, Int64, Int8, UInt16, UInt32, UInt64, UInt8, + Float32, Float64, Int8, Int16, Int32, Int64, UInt8, UInt16, UInt32, UInt64, }; use arrow::datatypes::{DataType, Float32Type, Float64Type}; -use datafusion_common::{assert_eq_or_internal_err, exec_err, Result}; +use datafusion_common::{Result, assert_eq_or_internal_err, exec_err}; use datafusion_expr::sort_properties::{ExprProperties, SortProperties}; use datafusion_expr::{ ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility, diff --git a/datafusion/spark/src/function/math/width_bucket.rs b/datafusion/spark/src/function/math/width_bucket.rs index 052c8007ac599..8d748439ad806 100644 --- a/datafusion/spark/src/function/math/width_bucket.rs +++ b/datafusion/spark/src/function/math/width_bucket.rs @@ -30,10 +30,10 @@ use datafusion_common::cast::{ as_interval_mdn_array, as_interval_ym_array, }; use datafusion_common::types::{ - logical_duration_microsecond, logical_float64, logical_int32, logical_interval_mdn, - logical_interval_year_month, NativeType, + NativeType, logical_duration_microsecond, logical_float64, logical_int32, + logical_interval_mdn, logical_interval_year_month, }; -use datafusion_common::{exec_err, internal_err, Result}; +use datafusion_common::{Result, exec_err, internal_err}; use datafusion_expr::sort_properties::{ExprProperties, SortProperties}; use datafusion_expr::{ ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, TypeSignature, diff --git a/datafusion/spark/src/function/string/char.rs b/datafusion/spark/src/function/string/char.rs index 43a759ffdd6cd..16dfe0943565f 100644 --- a/datafusion/spark/src/function/string/char.rs +++ b/datafusion/spark/src/function/string/char.rs @@ -22,7 +22,7 @@ use arrow::datatypes::DataType::Utf8; use arrow::datatypes::{DataType, Field, FieldRef}; use std::{any::Any, sync::Arc}; -use datafusion_common::{cast::as_int64_array, exec_err, Result, ScalarValue}; +use datafusion_common::{Result, ScalarValue, cast::as_int64_array, exec_err}; use datafusion_expr::{ ColumnarValue, ReturnFieldArgs, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility, @@ -127,7 +127,7 @@ fn chr(args: &[ArrayRef]) -> Result { None => { return exec_err!( "requested character not compatible for encoding." - ) + ); } } } diff --git a/datafusion/spark/src/function/string/concat.rs b/datafusion/spark/src/function/string/concat.rs index 0dcc58d5bb8ed..772b280320acf 100644 --- a/datafusion/spark/src/function/string/concat.rs +++ b/datafusion/spark/src/function/string/concat.rs @@ -150,10 +150,10 @@ fn compute_null_mask( if all_scalars { // For scalars, check if any is NULL for arg in args { - if let ColumnarValue::Scalar(scalar) = arg { - if scalar.is_null() { - return Ok(NullMaskResolution::ReturnNull); - } + if let ColumnarValue::Scalar(scalar) = arg + && scalar.is_null() + { + return Ok(NullMaskResolution::ReturnNull); } } // No NULLs in scalars diff --git a/datafusion/spark/src/function/string/elt.rs b/datafusion/spark/src/function/string/elt.rs index 4af6d5128e97d..7d4b0dbd7a168 100644 --- a/datafusion/spark/src/function/string/elt.rs +++ b/datafusion/spark/src/function/string/elt.rs @@ -25,7 +25,7 @@ use arrow::compute::{can_cast_types, cast}; use arrow::datatypes::DataType::{Int64, Utf8}; use arrow::datatypes::{DataType, Field, FieldRef, Int64Type}; use datafusion_common::cast::as_string_array; -use datafusion_common::{internal_err, plan_datafusion_err, DataFusionError, Result}; +use datafusion_common::{DataFusionError, Result, internal_err, plan_datafusion_err}; use datafusion_expr::{ ColumnarValue, ReturnFieldArgs, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility, diff --git a/datafusion/spark/src/function/string/format_string.rs b/datafusion/spark/src/function/string/format_string.rs index adb0eb2f09951..5261251cedfff 100644 --- a/datafusion/spark/src/function/string/format_string.rs +++ b/datafusion/spark/src/function/string/format_string.rs @@ -26,12 +26,12 @@ use arrow::{ datatypes::DataType, }; use bigdecimal::{ - num_bigint::{BigInt, Sign}, BigDecimal, ToPrimitive, + num_bigint::{BigInt, Sign}, }; use chrono::{DateTime, Datelike, Timelike, Utc}; use datafusion_common::{ - exec_datafusion_err, exec_err, plan_err, DataFusionError, Result, ScalarValue, + DataFusionError, Result, ScalarValue, exec_datafusion_err, exec_err, plan_err, }; use datafusion_expr::{ ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, TypeSignature, @@ -81,8 +81,12 @@ impl ScalarUDFImpl for FormatStringFunc { fn return_type(&self, arg_types: &[DataType]) -> Result { match arg_types[0] { DataType::Null => Ok(DataType::Utf8), - DataType::Utf8 | DataType::LargeUtf8 | DataType::Utf8View => Ok(arg_types[0].clone()), - _ => plan_err!("The format_string function expects the first argument to be Utf8, LargeUtf8 or Utf8View") + DataType::Utf8 | DataType::LargeUtf8 | DataType::Utf8View => { + Ok(arg_types[0].clone()) + } + _ => plan_err!( + "The format_string function expects the first argument to be Utf8, LargeUtf8 or Utf8View" + ), } } @@ -317,7 +321,7 @@ impl<'a> Formatter<'a> { (index as usize, &rest2[1..]) } (NumericParam::FromArgument, true) => { - return exec_err!("Invalid numeric parameter") + return exec_err!("Invalid numeric parameter"); } (_, false) => { argument_index += 1; @@ -1675,7 +1679,7 @@ impl ConversionSpecifier { return exec_err!( "Invalid conversion type: {:?} for boolean array", self.conversion_type - ) + ); } }; self.format_str(writer, formatted) @@ -1744,7 +1748,7 @@ impl ConversionSpecifier { return exec_err!( "Invalid conversion type: {:?} for float", self.conversion_type - ) + ); } } @@ -1789,7 +1793,7 @@ impl ConversionSpecifier { return exec_err!( "Invalid conversion type: {:?} for float", self.conversion_type - ) + ); } } } @@ -1908,7 +1912,7 @@ impl ConversionSpecifier { return exec_err!( "Invalid conversion type: {:?} for u64", self.conversion_type - ) + ); } } let mut prefix = if self.alt_form { @@ -2065,7 +2069,7 @@ impl ConversionSpecifier { return exec_err!( "Invalid conversion type: {:?} for decimal", self.conversion_type - ) + ); } }; diff --git a/datafusion/spark/src/function/string/ilike.rs b/datafusion/spark/src/function/string/ilike.rs index 4be2969248d2d..0d90bd1694175 100644 --- a/datafusion/spark/src/function/string/ilike.rs +++ b/datafusion/spark/src/function/string/ilike.rs @@ -18,7 +18,7 @@ use arrow::array::ArrayRef; use arrow::compute::ilike; use arrow::datatypes::{DataType, Field}; -use datafusion_common::{exec_err, internal_err, Result}; +use datafusion_common::{Result, exec_err, internal_err}; use datafusion_expr::ColumnarValue; use datafusion_expr::{ ReturnFieldArgs, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility, diff --git a/datafusion/spark/src/function/string/like.rs b/datafusion/spark/src/function/string/like.rs index df8eaef7cecbc..4e9b52327919c 100644 --- a/datafusion/spark/src/function/string/like.rs +++ b/datafusion/spark/src/function/string/like.rs @@ -18,7 +18,7 @@ use arrow::array::ArrayRef; use arrow::compute::like; use arrow::datatypes::DataType; -use datafusion_common::{exec_err, Result}; +use datafusion_common::{Result, exec_err}; use datafusion_expr::ColumnarValue; use datafusion_expr::{ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility}; use datafusion_functions::utils::make_scalar_function; diff --git a/datafusion/spark/src/function/string/luhn_check.rs b/datafusion/spark/src/function/string/luhn_check.rs index 090b16e34b8f1..dffd4fe0ae7e2 100644 --- a/datafusion/spark/src/function/string/luhn_check.rs +++ b/datafusion/spark/src/function/string/luhn_check.rs @@ -21,7 +21,7 @@ use arrow::array::{Array, AsArray, BooleanArray}; use arrow::datatypes::DataType; use arrow::datatypes::DataType::Boolean; use datafusion_common::utils::take_function_args; -use datafusion_common::{exec_err, Result, ScalarValue}; +use datafusion_common::{Result, ScalarValue, exec_err}; use datafusion_expr::{ ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, TypeSignature, Volatility, diff --git a/datafusion/spark/src/function/url/mod.rs b/datafusion/spark/src/function/url/mod.rs index 657655429ebaa..1313edaed5347 100644 --- a/datafusion/spark/src/function/url/mod.rs +++ b/datafusion/spark/src/function/url/mod.rs @@ -44,9 +44,21 @@ pub mod expr_fn { "Same as parse_url but returns NULL if an invalid URL is provided.", args )); - export_functions!((url_decode, "Decodes a URL-encoded string in ‘application/x-www-form-urlencoded’ format to its original format.", args)); - export_functions!((try_url_decode, "Same as url_decode but returns NULL if an invalid URL-encoded string is provided", args)); - export_functions!((url_encode, "Encodes a string into a URL-encoded string in ‘application/x-www-form-urlencoded’ format.", args)); + export_functions!(( + url_decode, + "Decodes a URL-encoded string in ‘application/x-www-form-urlencoded’ format to its original format.", + args + )); + export_functions!(( + try_url_decode, + "Same as url_decode but returns NULL if an invalid URL-encoded string is provided", + args + )); + export_functions!(( + url_encode, + "Encodes a string into a URL-encoded string in ‘application/x-www-form-urlencoded’ format.", + args + )); } pub fn functions() -> Vec> { diff --git a/datafusion/spark/src/function/url/parse_url.rs b/datafusion/spark/src/function/url/parse_url.rs index a8afa1d9639f5..e82ef28045a33 100644 --- a/datafusion/spark/src/function/url/parse_url.rs +++ b/datafusion/spark/src/function/url/parse_url.rs @@ -26,7 +26,7 @@ use arrow::datatypes::DataType; use datafusion_common::cast::{ as_large_string_array, as_string_array, as_string_view_array, }; -use datafusion_common::{exec_datafusion_err, exec_err, Result}; +use datafusion_common::{Result, exec_datafusion_err, exec_err}; use datafusion_expr::{ ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, TypeSignature, Volatility, @@ -86,7 +86,9 @@ impl ParseUrl { return if !value.contains("://") { Ok(None) } else { - Err(exec_datafusion_err!("The url is invalid: {value}. Use `try_parse_url` to tolerate invalid URL and return NULL instead. SQLSTATE: 22P02")) + Err(exec_datafusion_err!( + "The url is invalid: {value}. Use `try_parse_url` to tolerate invalid URL and return NULL instead. SQLSTATE: 22P02" + )) }; }; url.map_err(|e| exec_datafusion_err!("{e:?}")) @@ -186,7 +188,7 @@ pub fn spark_handled_parse_url( let url = &args[0]; let part = &args[1]; - let result = if args.len() == 3 { + if args.len() == 3 { // In this case, the 'key' argument is passed let key = &args[2]; @@ -253,8 +255,7 @@ pub fn spark_handled_parse_url( } _ => exec_err!("{} expects STRING arguments, got {:?}", "`parse_url`", args), } - }; - result + } } fn process_parse_url<'a, A, B, C, T>( diff --git a/datafusion/spark/src/function/url/try_parse_url.rs b/datafusion/spark/src/function/url/try_parse_url.rs index c04850f3a6bf0..4f6c5bb940fec 100644 --- a/datafusion/spark/src/function/url/try_parse_url.rs +++ b/datafusion/spark/src/function/url/try_parse_url.rs @@ -17,7 +17,7 @@ use std::any::Any; -use crate::function::url::parse_url::{spark_handled_parse_url, ParseUrl}; +use crate::function::url::parse_url::{ParseUrl, spark_handled_parse_url}; use arrow::array::ArrayRef; use arrow::datatypes::DataType; use datafusion_common::Result; diff --git a/datafusion/spark/src/function/url/try_url_decode.rs b/datafusion/spark/src/function/url/try_url_decode.rs index 61440e7ff05a0..58013236d5ec9 100644 --- a/datafusion/spark/src/function/url/try_url_decode.rs +++ b/datafusion/spark/src/function/url/try_url_decode.rs @@ -26,7 +26,7 @@ use datafusion_expr::{ }; use datafusion_functions::utils::make_scalar_function; -use crate::function::url::url_decode::{spark_handled_url_decode, UrlDecode}; +use crate::function::url::url_decode::{UrlDecode, spark_handled_url_decode}; #[derive(Debug, PartialEq, Eq, Hash)] pub struct TryUrlDecode { @@ -84,7 +84,7 @@ mod tests { use std::sync::Arc; use arrow::array::StringArray; - use datafusion_common::{cast::as_string_array, Result}; + use datafusion_common::{Result, cast::as_string_array}; use super::*; diff --git a/datafusion/spark/src/function/url/url_decode.rs b/datafusion/spark/src/function/url/url_decode.rs index 520588bc19e9c..e4a9cf6acd3e7 100644 --- a/datafusion/spark/src/function/url/url_decode.rs +++ b/datafusion/spark/src/function/url/url_decode.rs @@ -24,7 +24,7 @@ use arrow::datatypes::DataType; use datafusion_common::cast::{ as_large_string_array, as_string_array, as_string_view_array, }; -use datafusion_common::{exec_datafusion_err, exec_err, plan_err, Result}; +use datafusion_common::{Result, exec_datafusion_err, exec_err, plan_err}; use datafusion_expr::{ ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility, }; @@ -252,7 +252,9 @@ mod tests { ])); let result = spark_url_decode(&[input]); - assert!(result.is_err_and(|e| e.to_string().contains("Invalid percent-encoding"))); + assert!( + result.is_err_and(|e| e.to_string().contains("Invalid percent-encoding")) + ); Ok(()) } diff --git a/datafusion/spark/src/function/url/url_encode.rs b/datafusion/spark/src/function/url/url_encode.rs index 9b37f0ac6a740..7292eb530a6ae 100644 --- a/datafusion/spark/src/function/url/url_encode.rs +++ b/datafusion/spark/src/function/url/url_encode.rs @@ -23,7 +23,7 @@ use arrow::datatypes::DataType; use datafusion_common::cast::{ as_large_string_array, as_string_array, as_string_view_array, }; -use datafusion_common::{exec_err, plan_err, Result}; +use datafusion_common::{Result, exec_err, plan_err}; use datafusion_expr::{ ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility, }; diff --git a/datafusion/spark/src/lib.rs b/datafusion/spark/src/lib.rs index a3c3f392a0149..aad3ceed68ce3 100644 --- a/datafusion/spark/src/lib.rs +++ b/datafusion/spark/src/lib.rs @@ -123,8 +123,8 @@ pub mod expr_fn { pub use super::function::math::expr_fn::*; pub use super::function::misc::expr_fn::*; pub use super::function::predicate::expr_fn::*; - pub use super::function::r#struct::expr_fn::*; pub use super::function::string::expr_fn::*; + pub use super::function::r#struct::expr_fn::*; pub use super::function::table::expr_fn::*; pub use super::function::url::expr_fn::*; pub use super::function::window::expr_fn::*;