Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
112 changes: 91 additions & 21 deletions datafusion/spark/src/function/math/modulus.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,14 @@

use arrow::compute::kernels::numeric::add;
use arrow::compute::kernels::{cmp::lt, numeric::rem, zip::zip};
use arrow::datatypes::DataType;
use datafusion_common::{Result, ScalarValue, assert_eq_or_internal_err};
use arrow::datatypes::{DataType, Field, FieldRef};
use datafusion_common::{Result, ScalarValue, assert_eq_or_internal_err, internal_err};
use datafusion_expr::{
ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility,
ColumnarValue, ReturnFieldArgs, ScalarFunctionArgs, ScalarUDFImpl, Signature,
Volatility,
};
use std::any::Any;
use std::sync::Arc;

/// Spark-compatible `mod` function
/// This function directly uses Arrow's arithmetic_op function for modulo operations
Expand Down Expand Up @@ -82,16 +84,12 @@ impl ScalarUDFImpl for SparkMod {
&self.signature
}

fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
assert_eq_or_internal_err!(
arg_types.len(),
2,
"mod expects exactly two arguments"
);
fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
internal_err!("return_field_from_args should be used instead")
}

// Return the same type as the first argument for simplicity
// Arrow's rem function handles type promotion internally
Ok(arg_types[0].clone())
fn return_field_from_args(&self, args: ReturnFieldArgs) -> Result<FieldRef> {
return_field_for_binary_op(self.name(), args)
}

fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
Expand Down Expand Up @@ -132,23 +130,27 @@ impl ScalarUDFImpl for SparkPmod {
&self.signature
}

fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
assert_eq_or_internal_err!(
arg_types.len(),
2,
"pmod expects exactly two arguments"
);
fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
internal_err!("return_field_from_args should be used instead")
}

// Return the same type as the first argument for simplicity
// Arrow's rem function handles type promotion internally
Ok(arg_types[0].clone())
fn return_field_from_args(&self, args: ReturnFieldArgs) -> Result<FieldRef> {
return_field_for_binary_op(self.name(), args)
}

fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
spark_pmod(&args.args)
}
}

fn return_field_for_binary_op(name: &str, args: ReturnFieldArgs) -> Result<FieldRef> {
// The mod function output is nullable only in the case that the input is nullable
// (notably, a mod 0 returns an error, not null). Thus this check is sufficient.
let any_nullable = args.arg_fields.iter().any(|f| f.is_nullable());
let data_type = args.arg_fields[0].data_type().clone();
Ok(Arc::new(Field::new(name, data_type, any_nullable)))
}

#[cfg(test)]
mod test {
use std::sync::Arc;
Expand Down Expand Up @@ -606,4 +608,72 @@ mod test {
panic!("Expected array result");
}
}

#[test]
fn test_mod_return_type_error() {
let mod_func = SparkMod::new();
let result = mod_func.return_type(&[DataType::Int32, DataType::Int32]);
assert!(result.is_err());
}

#[test]
fn test_mod_return_field_nullability() {
let mod_func = SparkMod::new();

// Non-nullable inputs -> non-nullable output.
let args = ReturnFieldArgs {
arg_fields: &[
Arc::new(Field::new("a", DataType::Int32, false)),
Arc::new(Field::new("b", DataType::Int32, false)),
],
scalar_arguments: &[],
};
let field = mod_func.return_field_from_args(args).unwrap();
assert!(!field.is_nullable());

// Nullable input -> nullable output.
let args = ReturnFieldArgs {
arg_fields: &[
Arc::new(Field::new("a", DataType::Int32, true)),
Arc::new(Field::new("b", DataType::Int32, false)),
],
scalar_arguments: &[],
};
let field = mod_func.return_field_from_args(args).unwrap();
assert!(field.is_nullable());
}

#[test]
fn test_pmod_return_type_error() {
let pmod_func = SparkPmod::new();
let result = pmod_func.return_type(&[DataType::Int32, DataType::Int32]);
assert!(result.is_err());
}

#[test]
fn test_pmod_return_field_nullability() {
let pmod_func = SparkPmod::new();

// Non-nullable inputs -> non-nullable output.
let args = ReturnFieldArgs {
arg_fields: &[
Arc::new(Field::new("a", DataType::Int32, false)),
Arc::new(Field::new("b", DataType::Int32, false)),
],
scalar_arguments: &[],
};
let field = pmod_func.return_field_from_args(args).unwrap();
assert!(!field.is_nullable());

// Nullable input -> nullable output.
let args = ReturnFieldArgs {
arg_fields: &[
Arc::new(Field::new("a", DataType::Int32, true)),
Arc::new(Field::new("b", DataType::Int32, false)),
],
scalar_arguments: &[],
};
let field = pmod_func.return_field_from_args(args).unwrap();
assert!(field.is_nullable());
}
}