From 4a408830546736133bfd555ec78c491c758f762c Mon Sep 17 00:00:00 2001 From: Weijun Huang Date: Tue, 28 Nov 2023 14:44:15 +0100 Subject: [PATCH 01/24] Add support for parsing FixedSizeList type --- datafusion/sqllogictest/test_files/arrow_typeof.slt | 6 ------ 1 file changed, 6 deletions(-) diff --git a/datafusion/sqllogictest/test_files/arrow_typeof.slt b/datafusion/sqllogictest/test_files/arrow_typeof.slt index 8b3bd7eac95d..04d663a2f60b 100644 --- a/datafusion/sqllogictest/test_files/arrow_typeof.slt +++ b/datafusion/sqllogictest/test_files/arrow_typeof.slt @@ -399,7 +399,6 @@ NULL #---- #[1] - query ? select arrow_cast([1], 'FixedSizeList(1, Int64)'); ---- @@ -417,8 +416,3 @@ query T select arrow_typeof(arrow_cast(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'), 'FixedSizeList(3, Int64)')); ---- FixedSizeList(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 3) - -query ? -select arrow_cast([1, 2, 3], 'FixedSizeList(3, Int64)'); ----- -[1, 2, 3] From 0d12e9774adf4b016be09cccc43d50a2b5100ab7 Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Mon, 15 Jan 2024 21:14:58 +0800 Subject: [PATCH 02/24] support cast fixedsizelist from list --- datafusion/sqllogictest/test_files/arrow_typeof.slt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/sqllogictest/test_files/arrow_typeof.slt b/datafusion/sqllogictest/test_files/arrow_typeof.slt index 04d663a2f60b..4ee658d45d94 100644 --- a/datafusion/sqllogictest/test_files/arrow_typeof.slt +++ b/datafusion/sqllogictest/test_files/arrow_typeof.slt @@ -402,7 +402,7 @@ NULL query ? select arrow_cast([1], 'FixedSizeList(1, Int64)'); ---- -[1] +[1, 2, 3] query error DataFusion error: Arrow error: Cast error: Cannot cast to FixedSizeList\(4\): value at index 0 has length 3 select arrow_cast(make_array(1, 2, 3), 'FixedSizeList(4, Int64)'); From fc4c8cffaf3abaf0b53c0e4a7d7270884adee636 Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Thu, 18 Jan 2024 14:53:17 +0800 Subject: [PATCH 03/24] support FixedSizeList type coercion --- datafusion/common/src/utils.rs | 10 ++--- datafusion/expr/src/built_in_function.rs | 43 +++++++++++++------ datafusion/expr/src/signature.rs | 14 ++++++ .../expr/src/type_coercion/functions.rs | 16 +++++-- 4 files changed, 62 insertions(+), 21 deletions(-) diff --git a/datafusion/common/src/utils.rs b/datafusion/common/src/utils.rs index d21bd464f850..6aaa07920574 100644 --- a/datafusion/common/src/utils.rs +++ b/datafusion/common/src/utils.rs @@ -440,9 +440,9 @@ pub fn arrays_into_list_array( /// ``` pub fn base_type(data_type: &DataType) -> DataType { match data_type { - DataType::List(field) | DataType::LargeList(field) => { - base_type(field.data_type()) - } + DataType::List(field) + | DataType::LargeList(field) + | DataType::FixedSizeList(field, _) => base_type(field.data_type()), _ => data_type.to_owned(), } } @@ -464,9 +464,9 @@ pub fn coerced_type_with_base_type_only( base_type: &DataType, ) -> DataType { match data_type { - DataType::List(field) => { + DataType::List(field) | DataType::FixedSizeList(field, _) => { let data_type = match field.data_type() { - DataType::List(_) => { + DataType::List(_) | DataType::FixedSizeList(_, _) => { coerced_type_with_base_type_only(field.data_type(), base_type) } _ => base_type.to_owned(), diff --git a/datafusion/expr/src/built_in_function.rs b/datafusion/expr/src/built_in_function.rs index e86d6172cecd..14acf6c8b834 100644 --- a/datafusion/expr/src/built_in_function.rs +++ b/datafusion/expr/src/built_in_function.rs @@ -959,12 +959,18 @@ impl BuiltinScalarFunction { } BuiltinScalarFunction::ArrayDims => Signature::any(1, self.volatility()), BuiltinScalarFunction::ArrayEmpty => Signature::any(1, self.volatility()), - BuiltinScalarFunction::ArrayElement => Signature::any(2, self.volatility()), - BuiltinScalarFunction::ArrayExcept => Signature::any(2, self.volatility()), + BuiltinScalarFunction::ArrayElement => { + Signature::array_and_element(self.volatility()) + } + BuiltinScalarFunction::ArrayExcept => { + Signature::array_and_element(self.volatility()) + } BuiltinScalarFunction::Flatten => Signature::any(1, self.volatility()), BuiltinScalarFunction::ArrayHasAll | BuiltinScalarFunction::ArrayHasAny - | BuiltinScalarFunction::ArrayHas => Signature::any(2, self.volatility()), + | BuiltinScalarFunction::ArrayHas => { + Signature::array_and_element(self.volatility()) + } BuiltinScalarFunction::ArrayLength => { Signature::variadic_any(self.volatility()) } @@ -973,15 +979,22 @@ impl BuiltinScalarFunction { BuiltinScalarFunction::ArrayPosition => { Signature::variadic_any(self.volatility()) } - BuiltinScalarFunction::ArrayPositions => Signature::any(2, self.volatility()), - BuiltinScalarFunction::ArrayPrepend => Signature { - type_signature: ElementAndArray, - volatility: self.volatility(), - }, - BuiltinScalarFunction::ArrayRepeat => Signature::any(2, self.volatility()), - BuiltinScalarFunction::ArrayRemove => Signature::any(2, self.volatility()), + BuiltinScalarFunction::ArrayPositions => { + Signature::array_and_element(self.volatility()) + } + BuiltinScalarFunction::ArrayPrepend => { + Signature::element_and_array(self.volatility()) + } + BuiltinScalarFunction::ArrayRepeat => { + Signature::array_and_element(self.volatility()) + } + BuiltinScalarFunction::ArrayRemove => { + Signature::array_and_element(self.volatility()) + } BuiltinScalarFunction::ArrayRemoveN => Signature::any(3, self.volatility()), - BuiltinScalarFunction::ArrayRemoveAll => Signature::any(2, self.volatility()), + BuiltinScalarFunction::ArrayRemoveAll => { + Signature::array_and_element(self.volatility()) + } BuiltinScalarFunction::ArrayReplace => Signature::any(3, self.volatility()), BuiltinScalarFunction::ArrayReplaceN => Signature::any(4, self.volatility()), BuiltinScalarFunction::ArrayReplaceAll => { @@ -994,8 +1007,12 @@ impl BuiltinScalarFunction { BuiltinScalarFunction::ArrayToString => { Signature::variadic_any(self.volatility()) } - BuiltinScalarFunction::ArrayIntersect => Signature::any(2, self.volatility()), - BuiltinScalarFunction::ArrayUnion => Signature::any(2, self.volatility()), + BuiltinScalarFunction::ArrayIntersect => { + Signature::array_and_element(self.volatility()) + } + BuiltinScalarFunction::ArrayUnion => { + Signature::array_and_element(self.volatility()) + } BuiltinScalarFunction::Cardinality => Signature::any(1, self.volatility()), BuiltinScalarFunction::ArrayResize => { Signature::variadic_any(self.volatility()) diff --git a/datafusion/expr/src/signature.rs b/datafusion/expr/src/signature.rs index 729131bd95e1..17a777551323 100644 --- a/datafusion/expr/src/signature.rs +++ b/datafusion/expr/src/signature.rs @@ -263,6 +263,20 @@ impl Signature { volatility, } } + /// Specialized Signature for ArrayAppend and similar functions + pub fn array_and_element(volatility: Volatility) -> Self { + Signature { + type_signature: TypeSignature::ArrayAndElement, + volatility, + } + } + /// Specialized Signature for ArrayPrepend and similar functions + pub fn element_and_array(volatility: Volatility) -> Self { + Signature { + type_signature: TypeSignature::ElementAndArray, + volatility, + } + } } /// Monotonicity of the `ScalarFunctionExpr` with respect to its arguments. diff --git a/datafusion/expr/src/type_coercion/functions.rs b/datafusion/expr/src/type_coercion/functions.rs index 63908d539bd0..a9c20f1e958a 100644 --- a/datafusion/expr/src/type_coercion/functions.rs +++ b/datafusion/expr/src/type_coercion/functions.rs @@ -100,8 +100,8 @@ fn get_valid_types( // We need to find the coerced base type, mainly for cases like: // `array_append(List(null), i64)` -> `List(i64)` - let array_base_type = datafusion_common::utils::base_type(array_type); - let elem_base_type = datafusion_common::utils::base_type(elem_type); + let array_base_type = dbg!(datafusion_common::utils::base_type(array_type)); + let elem_base_type = dbg!(datafusion_common::utils::base_type(elem_type)); let new_base_type = comparison_coercion(&array_base_type, &elem_base_type); if new_base_type.is_none() { @@ -125,6 +125,14 @@ fn get_valid_types( Ok(vec![vec![elem_type.to_owned(), array_type.clone()]]) } } + DataType::FixedSizeList(ref field, _) => { + let elem_type = field.data_type(); + if is_append { + Ok(vec![vec![array_type.clone(), elem_type.to_owned()]]) + } else { + Ok(vec![vec![elem_type.to_owned(), array_type.clone()]]) + } + } _ => Ok(vec![vec![]]), } } @@ -161,7 +169,7 @@ fn get_valid_types( TypeSignature::Exact(valid_types) => vec![valid_types.clone()], TypeSignature::ArrayAndElement => { - return array_append_or_prepend_valid_types(current_types, true) + return dbg!(array_append_or_prepend_valid_types(current_types, true)) } TypeSignature::ElementAndArray => { return array_append_or_prepend_valid_types(current_types, false) @@ -311,6 +319,8 @@ fn coerced_from<'a>( Utf8 | LargeUtf8 => Some(type_into.clone()), Null if can_cast_types(type_from, type_into) => Some(type_into.clone()), + List(_) if matches!(type_from, FixedSizeList(_, _)) => Some(type_into.clone()), + // Only accept list and largelist with the same number of dimensions unless the type is Null. // List or LargeList with different dimensions should be handled in TypeSignature or other places before this. List(_) | LargeList(_) From 81745c538d2548322951ce93b262d750b20c6b48 Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Sat, 20 Jan 2024 10:01:08 +0800 Subject: [PATCH 04/24] fix conflict --- datafusion/sqllogictest/test_files/arrow_typeof.slt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/sqllogictest/test_files/arrow_typeof.slt b/datafusion/sqllogictest/test_files/arrow_typeof.slt index 4ee658d45d94..04d663a2f60b 100644 --- a/datafusion/sqllogictest/test_files/arrow_typeof.slt +++ b/datafusion/sqllogictest/test_files/arrow_typeof.slt @@ -402,7 +402,7 @@ NULL query ? select arrow_cast([1], 'FixedSizeList(1, Int64)'); ---- -[1, 2, 3] +[1] query error DataFusion error: Arrow error: Cast error: Cannot cast to FixedSizeList\(4\): value at index 0 has length 3 select arrow_cast(make_array(1, 2, 3), 'FixedSizeList(4, Int64)'); From 7a5f42cdef594bddd2e5b8d0d246c232d6061873 Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Sat, 20 Jan 2024 10:03:04 +0800 Subject: [PATCH 05/24] add test for [] --- datafusion/sqllogictest/test_files/arrow_typeof.slt | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/datafusion/sqllogictest/test_files/arrow_typeof.slt b/datafusion/sqllogictest/test_files/arrow_typeof.slt index 04d663a2f60b..740d6b89ec48 100644 --- a/datafusion/sqllogictest/test_files/arrow_typeof.slt +++ b/datafusion/sqllogictest/test_files/arrow_typeof.slt @@ -412,6 +412,11 @@ select arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)'); ---- [1, 2, 3] +query ? +select arrow_cast([1, 2, 3], 'FixedSizeList(3, Int64)'); +---- +[1, 2, 3] + query T select arrow_typeof(arrow_cast(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'), 'FixedSizeList(3, Int64)')); ---- From bb20c37fdd5776fc5f16456a840a0b38d3bec7e3 Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Sat, 20 Jan 2024 13:27:15 +0800 Subject: [PATCH 06/24] support fixedsizelist for functinos with array-element or element-array argument --- datafusion/common/src/utils.rs | 10 ++++- datafusion/expr/src/built_in_function.rs | 16 ++------ .../expr/src/type_coercion/functions.rs | 41 ++++++++++--------- datafusion/sqllogictest/test_files/array.slt | 2 +- 4 files changed, 34 insertions(+), 35 deletions(-) diff --git a/datafusion/common/src/utils.rs b/datafusion/common/src/utils.rs index 6aaa07920574..db1ed55dcfd7 100644 --- a/datafusion/common/src/utils.rs +++ b/datafusion/common/src/utils.rs @@ -466,7 +466,10 @@ pub fn coerced_type_with_base_type_only( match data_type { DataType::List(field) | DataType::FixedSizeList(field, _) => { let data_type = match field.data_type() { - DataType::List(_) | DataType::FixedSizeList(_, _) => { + // nested type could be different list type + DataType::List(_) + | DataType::FixedSizeList(_, _) + | DataType::LargeList(_) => { coerced_type_with_base_type_only(field.data_type(), base_type) } _ => base_type.to_owned(), @@ -480,7 +483,10 @@ pub fn coerced_type_with_base_type_only( } DataType::LargeList(field) => { let data_type = match field.data_type() { - DataType::LargeList(_) => { + // nested type could be different list type + DataType::List(_) + | DataType::FixedSizeList(_, _) + | DataType::LargeList(_) => { coerced_type_with_base_type_only(field.data_type(), base_type) } _ => base_type.to_owned(), diff --git a/datafusion/expr/src/built_in_function.rs b/datafusion/expr/src/built_in_function.rs index 14acf6c8b834..892db6bf624b 100644 --- a/datafusion/expr/src/built_in_function.rs +++ b/datafusion/expr/src/built_in_function.rs @@ -962,9 +962,7 @@ impl BuiltinScalarFunction { BuiltinScalarFunction::ArrayElement => { Signature::array_and_element(self.volatility()) } - BuiltinScalarFunction::ArrayExcept => { - Signature::array_and_element(self.volatility()) - } + BuiltinScalarFunction::ArrayExcept => Signature::any(2, self.volatility()), BuiltinScalarFunction::Flatten => Signature::any(1, self.volatility()), BuiltinScalarFunction::ArrayHasAll | BuiltinScalarFunction::ArrayHasAny @@ -985,9 +983,7 @@ impl BuiltinScalarFunction { BuiltinScalarFunction::ArrayPrepend => { Signature::element_and_array(self.volatility()) } - BuiltinScalarFunction::ArrayRepeat => { - Signature::array_and_element(self.volatility()) - } + BuiltinScalarFunction::ArrayRepeat => Signature::any(2, self.volatility()), BuiltinScalarFunction::ArrayRemove => { Signature::array_and_element(self.volatility()) } @@ -1007,12 +1003,8 @@ impl BuiltinScalarFunction { BuiltinScalarFunction::ArrayToString => { Signature::variadic_any(self.volatility()) } - BuiltinScalarFunction::ArrayIntersect => { - Signature::array_and_element(self.volatility()) - } - BuiltinScalarFunction::ArrayUnion => { - Signature::array_and_element(self.volatility()) - } + BuiltinScalarFunction::ArrayIntersect => Signature::any(2, self.volatility()), + BuiltinScalarFunction::ArrayUnion => Signature::any(2, self.volatility()), BuiltinScalarFunction::Cardinality => Signature::any(1, self.volatility()), BuiltinScalarFunction::ArrayResize => { Signature::variadic_any(self.volatility()) diff --git a/datafusion/expr/src/type_coercion/functions.rs b/datafusion/expr/src/type_coercion/functions.rs index a9c20f1e958a..97e2bead5c80 100644 --- a/datafusion/expr/src/type_coercion/functions.rs +++ b/datafusion/expr/src/type_coercion/functions.rs @@ -22,7 +22,9 @@ use arrow::{ datatypes::{DataType, TimeUnit}, }; use datafusion_common::utils::list_ndims; -use datafusion_common::{internal_err, plan_err, DataFusionError, Result}; +use datafusion_common::{ + internal_datafusion_err, internal_err, plan_err, DataFusionError, Result, +}; use super::binary::comparison_coercion; @@ -48,7 +50,6 @@ pub fn data_types( ); } } - let valid_types = get_valid_types(&signature.type_signature, current_types)?; if valid_types @@ -100,35 +101,35 @@ fn get_valid_types( // We need to find the coerced base type, mainly for cases like: // `array_append(List(null), i64)` -> `List(i64)` - let array_base_type = dbg!(datafusion_common::utils::base_type(array_type)); - let elem_base_type = dbg!(datafusion_common::utils::base_type(elem_type)); + let array_base_type = datafusion_common::utils::base_type(array_type); + let elem_base_type = datafusion_common::utils::base_type(elem_type); let new_base_type = comparison_coercion(&array_base_type, &elem_base_type); - if new_base_type.is_none() { - return internal_err!( + let new_base_type = new_base_type.ok_or_else(|| { + internal_datafusion_err!( "Coercion from {array_base_type:?} to {elem_base_type:?} not supported." - ); - } - let new_base_type = new_base_type.unwrap(); + ) + })?; let array_type = datafusion_common::utils::coerced_type_with_base_type_only( array_type, &new_base_type, ); + // dbg!(&array_type, &elem_type); match array_type { - DataType::List(ref field) | DataType::LargeList(ref field) => { - let elem_type = field.data_type(); - if is_append { - Ok(vec![vec![array_type.clone(), elem_type.to_owned()]]) + DataType::List(ref field) + | DataType::LargeList(ref field) + | DataType::FixedSizeList(ref field, _) => { + let elem_type = if array_base_type.eq(&DataType::Null) + || (elem_base_type.eq(&DataType::Null)) + { + field.data_type() } else { - Ok(vec![vec![elem_type.to_owned(), array_type.clone()]]) - } - } - DataType::FixedSizeList(ref field, _) => { - let elem_type = field.data_type(); + elem_type + }; if is_append { - Ok(vec![vec![array_type.clone(), elem_type.to_owned()]]) + Ok(vec![vec![array_type.clone(), elem_type.clone()]]) } else { Ok(vec![vec![elem_type.to_owned(), array_type.clone()]]) } @@ -169,7 +170,7 @@ fn get_valid_types( TypeSignature::Exact(valid_types) => vec![valid_types.clone()], TypeSignature::ArrayAndElement => { - return dbg!(array_append_or_prepend_valid_types(current_types, true)) + return array_append_or_prepend_valid_types(current_types, true) } TypeSignature::ElementAndArray => { return array_append_or_prepend_valid_types(current_types, false) diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt index e072e4146f13..20dbaf385548 100644 --- a/datafusion/sqllogictest/test_files/array.slt +++ b/datafusion/sqllogictest/test_files/array.slt @@ -941,7 +941,7 @@ from arrays_values_without_nulls; ## array_element (aliases: array_extract, list_extract, list_element) # array_element error -query error DataFusion error: Error during planning: The array_element function can only accept list or largelist as the first argument +query error DataFusion error: Error during planning: No function matches the given name and argument types 'array_element\(Int64, Int64\)'. You might need to add explicit type casts.\n\tCandidate functions:\n\tarray_element\(ArrayAndElement\(List, T\)\) select array_element(1, 2); From 53bf53a2e3d17485d6de46f0e4406215b157bc2c Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Sat, 20 Jan 2024 19:00:32 +0800 Subject: [PATCH 07/24] add tests for array_element --- datafusion/common/src/scalar.rs | 5 +- datafusion/expr/src/built_in_function.rs | 4 +- datafusion/sqllogictest/test_files/array.slt | 81 +++++++++++++++++++- 3 files changed, 85 insertions(+), 5 deletions(-) diff --git a/datafusion/common/src/scalar.rs b/datafusion/common/src/scalar.rs index 2f9e374bd7f4..36b00b65e285 100644 --- a/datafusion/common/src/scalar.rs +++ b/datafusion/common/src/scalar.rs @@ -1483,7 +1483,9 @@ impl ScalarValue { DataType::Interval(IntervalUnit::MonthDayNano) => { build_array_primitive!(IntervalMonthDayNanoArray, IntervalMonthDayNano) } - DataType::List(_) | DataType::LargeList(_) => build_list_array(scalars)?, + DataType::List(_) + | DataType::LargeList(_) + | DataType::FixedSizeList(_, _) => build_list_array(scalars)?, DataType::Struct(fields) => { // Initialize a Vector to store the ScalarValues for each column let mut columns: Vec> = @@ -1595,7 +1597,6 @@ impl ScalarValue { | DataType::Time64(TimeUnit::Second) | DataType::Time64(TimeUnit::Millisecond) | DataType::Duration(_) - | DataType::FixedSizeList(_, _) | DataType::Union(_, _) | DataType::Map(_, _) | DataType::RunEndEncoded(_, _) => { diff --git a/datafusion/expr/src/built_in_function.rs b/datafusion/expr/src/built_in_function.rs index 892db6bf624b..cd6ae80164e6 100644 --- a/datafusion/expr/src/built_in_function.rs +++ b/datafusion/expr/src/built_in_function.rs @@ -599,10 +599,10 @@ impl BuiltinScalarFunction { } BuiltinScalarFunction::ArrayDistinct => Ok(input_expr_types[0].clone()), BuiltinScalarFunction::ArrayElement => match &input_expr_types[0] { - List(field) => Ok(field.data_type().clone()), + List(field)|FixedSizeList(field,_) => Ok(field.data_type().clone()), LargeList(field) => Ok(field.data_type().clone()), _ => plan_err!( - "The {self} function can only accept list or largelist as the first argument" + "The {self} function can only accept List, LargeList or FixedSizeList as the first argument" ), }, BuiltinScalarFunction::ArrayLength => Ok(UInt64), diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt index 20dbaf385548..05b48cfada01 100644 --- a/datafusion/sqllogictest/test_files/array.slt +++ b/datafusion/sqllogictest/test_files/array.slt @@ -89,6 +89,17 @@ AS VALUES (make_array(51, 52, NULL, 54, 55, 56, 57, 58, 59, 60), 5, NULL) ; +statement ok +CREATE TABLE fixed_slices +AS VALUES + (arrow_cast(make_array(NULL, 2, 3, 4, 5, 6, 7, 8, 9, 10), 'FixedSizeList(10, Int64)'), 1, 1), + (arrow_cast(make_array(11, 12, 13, 14, 15, 16, 17, 18, NULL, 20), 'FixedSizeList(10, Int64)'), 2, -4), + (arrow_cast(make_array(21, 22, 23, NULL, 25, 26, 27, 28, 29, 30), 'FixedSizeList(10, Int64)'), 0, 0), + (arrow_cast(make_array(31, 32, 33, 34, 35, NULL, 37, 38, 39, 40), 'FixedSizeList(10, Int64)'), -4, -7), + (arrow_cast(make_array(41, 42, 43, 44, 45, 46, 47, 48, 49, 50), 'FixedSizeList(10, Int64)'), NULL, 6), + (arrow_cast(make_array(51, 52, NULL, 54, 55, 56, 57, 58, 59, 60),'FixedSizeList(10, Int64)'), 5, NULL) +; + statement ok CREATE TABLE arrayspop AS VALUES @@ -956,6 +967,11 @@ select array_element(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), ---- 2 l +query IT +select array_element(arrow_cast(make_array(1, 2, 3, 4, 5), 'FixedSizeList(5, Int64)'), 2), array_element(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'FixedSizeList(5, Utf8)'), 3); +---- +2 l + # array_element scalar function #2 (with positive index; out of bounds) query IT select array_element(make_array(1, 2, 3, 4, 5), 7), array_element(make_array('h', 'e', 'l', 'l', 'o'), 11); @@ -967,6 +983,11 @@ select array_element(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), ---- NULL NULL +query IT +select array_element(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 7), array_element(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 11); +---- +NULL NULL + # array_element scalar function #3 (with zero) query IT select array_element(make_array(1, 2, 3, 4, 5), 0), array_element(make_array('h', 'e', 'l', 'l', 'o'), 0); @@ -978,6 +999,11 @@ select array_element(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), ---- NULL NULL +query IT +select array_element(arrow_cast(make_array(1, 2, 3, 4, 5), 'FixedSizeList(5, Int64)'), 0), array_element(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'FixedSizeList(5, Utf8)'), 0); +---- +NULL NULL + # array_element scalar function #4 (with NULL) query error select array_element(make_array(1, 2, 3, 4, 5), NULL), array_element(make_array('h', 'e', 'l', 'l', 'o'), NULL); @@ -985,6 +1011,9 @@ select array_element(make_array(1, 2, 3, 4, 5), NULL), array_element(make_array( query error select array_element(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), NULL), array_element(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), NULL); +query error +select array_element(arrow_cast(make_array(1, 2, 3, 4, 5), 'FixedSizeList(5, Int64)'), NULL), array_element(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'FixedSizeList(5, Utf8)'), NULL); + # array_element scalar function #5 (with negative index) query IT select array_element(make_array(1, 2, 3, 4, 5), -2), array_element(make_array('h', 'e', 'l', 'l', 'o'), -3); @@ -996,6 +1025,11 @@ select array_element(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), ---- 4 l +query IT +select array_element(arrow_cast(make_array(1, 2, 3, 4, 5), 'FixedSizeList(5, Int64)'), -2), array_element(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'FixedSizeList(5, Utf8)'), -3); +---- +4 l + # array_element scalar function #6 (with negative index; out of bounds) query IT select array_element(make_array(1, 2, 3, 4, 5), -11), array_element(make_array('h', 'e', 'l', 'l', 'o'), -7); @@ -1007,6 +1041,11 @@ select array_element(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), ---- NULL NULL +query IT +select array_element(arrow_cast(make_array(1, 2, 3, 4, 5), 'FixedSizeList(5, Int64)'), -11), array_element(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'FixedSizeList(5, Utf8)'), -7); +---- +NULL NULL + # array_element scalar function #7 (nested array) query ? select array_element(make_array(make_array(1, 2, 3, 4, 5), make_array(6, 7, 8, 9, 10)), 1); @@ -1018,6 +1057,11 @@ select array_element(arrow_cast(make_array(make_array(1, 2, 3, 4, 5), make_array ---- [1, 2, 3, 4, 5] +query ? +select array_element(arrow_cast(make_array(make_array(1, 2, 3, 4, 5), make_array(6, 7, 8, 9, 10)), 'FixedSizeList(2, List(Int64))'), 1); +---- +[1, 2, 3, 4, 5] + # array_extract scalar function #8 (function alias `array_element`) query IT select array_extract(make_array(1, 2, 3, 4, 5), 2), array_extract(make_array('h', 'e', 'l', 'l', 'o'), 3); @@ -1029,6 +1073,11 @@ select array_extract(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), ---- 2 l +query IT +select array_extract(arrow_cast(make_array(1, 2, 3, 4, 5), 'FixedSizeList(5, Int64)'), 2), array_extract(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'FixedSizeList(5, Utf8)'), 3); +---- +2 l + # list_element scalar function #9 (function alias `array_element`) query IT select list_element(make_array(1, 2, 3, 4, 5), 2), list_element(make_array('h', 'e', 'l', 'l', 'o'), 3); @@ -1040,6 +1089,11 @@ select list_element(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 2 ---- 2 l +query IT +select list_element(arrow_cast(make_array(1, 2, 3, 4, 5), 'FixedSizeList(5, Int64)'), 2), list_element(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'FixedSizeList(5, Utf8)'), 3); +---- +2 l + # list_extract scalar function #10 (function alias `array_element`) query IT select list_extract(make_array(1, 2, 3, 4, 5), 2), list_extract(make_array('h', 'e', 'l', 'l', 'o'), 3); @@ -1047,7 +1101,12 @@ select list_extract(make_array(1, 2, 3, 4, 5), 2), list_extract(make_array('h', 2 l query IT -select list_extract(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 2), array_extract(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 3); +select list_extract(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 2), list_extract(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 3); +---- +2 l + +query IT +select list_extract(arrow_cast(make_array(1, 2, 3, 4, 5), 'FixedSizeList(5, Int64)'), 2), list_extract(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'FixedSizeList(5, Utf8)'), 3); ---- 2 l @@ -1074,6 +1133,16 @@ NULL NULL 55 +query I +select array_element(column1, column2) from fixed_slices; +---- +NULL +12 +NULL +37 +NULL +55 + # array_element with columns and scalars query II select array_element(make_array(1, 2, 3, 4, 5), column2), array_element(column1, 3) from slices; @@ -1097,6 +1166,16 @@ NULL 23 NULL 43 5 NULL +query II +select array_element(make_array(1, 2, 3, 4, 5), column2), array_element(column1, 3) from fixed_slices; +---- +1 3 +2 13 +NULL 23 +2 33 +NULL 43 +5 NULL + ## array_pop_back (aliases: `list_pop_back`) # array_pop_back scalar function #1 From c7cd1f5431f4a1b96f8f64f4b46262a37e96a399 Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Sat, 20 Jan 2024 19:37:24 +0800 Subject: [PATCH 08/24] add tests for array_remove --- datafusion/sqllogictest/test_files/array.slt | 87 +++++++++++++++++++- 1 file changed, 86 insertions(+), 1 deletion(-) diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt index 05b48cfada01..771a5bd267b0 100644 --- a/datafusion/sqllogictest/test_files/array.slt +++ b/datafusion/sqllogictest/test_files/array.slt @@ -445,6 +445,15 @@ AS FROM arrays_with_repeating_elements ; +statement ok +CREATE TABLE fixed_arrays_with_repeating_elements +AS VALUES + (arrow_cast(make_array(1, 2, 1, 3, 2, 2, 1, 3, 2, 3), 'FixedSizeList(10, Int64)'), 2, 4, 3), + (arrow_cast(make_array(4, 4, 5, 5, 6, 5, 5, 5, 4, 4), 'FixedSizeList(10, Int64)'), 4, 7, 2), + (arrow_cast(make_array(7, 7, 7, 8, 7, 9, 7, 8, 7, 7), 'FixedSizeList(10, Int64)'), 7, 10, 5), + (arrow_cast(make_array(10, 11, 12, 10, 11, 12, 10, 11, 12, 10), 'FixedSizeList(10, Int64)'), 10, 13, 10) +; + statement ok CREATE TABLE nested_arrays_with_repeating_elements AS VALUES @@ -465,6 +474,15 @@ AS FROM nested_arrays_with_repeating_elements ; +statement ok +CREATE TABLE fixed_size_nested_arrays_with_repeating_elements +AS VALUES + (arrow_cast(make_array([1, 2, 3], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]), 'FixedSizeList(10, List(Int64))'), [4, 5, 6], [10, 11, 12], 3), + (arrow_cast(make_array([10, 11, 12], [10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]), 'FixedSizeList(10, List(Int64))'), [10, 11, 12], [19, 20, 21], 2), + (arrow_cast(make_array([19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]), 'FixedSizeList(10, List(Int64))'), [19, 20, 21], [28, 29, 30], 5), + (arrow_cast(make_array([28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]), 'FixedSizeList(10, List(Int64))'), [28, 29, 30], [28, 29, 30], 10) +; + # Array literal ## boolean coercion is not supported @@ -3594,6 +3612,13 @@ select array_remove(make_array(1, 2, 2, 1, 1), 2), array_remove(make_array(1.0, ---- [1, 2, 1, 1] [2.0, 2.0, 1.0, 1.0] [h, e, l, o] +query ??? +select array_remove(arrow_cast(make_array(1, 2, 2, 1, 1), 'FixedSizeList(5, Int64)'), 2), + array_remove(arrow_cast(make_array(1.0, 2.0, 2.0, 1.0, 1.0), 'FixedSizeList(5, Float64)'), 1.0), + array_remove(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'FixedSizeList(5, Utf8)'), 'l'); +---- +[1, 2, 1, 1] [2.0, 2.0, 1.0, 1.0] [h, e, l, o] + query ??? select array_remove(make_array(1, null, 2, 3), 2), @@ -3602,6 +3627,14 @@ select ---- [1, , 3] [, 2.2, 3.3] [, bc] +query ??? +select + array_remove(arrow_cast(make_array(1, null, 2, 3), 'FixedSizeList(4, Int64)'), 2), + array_remove(arrow_cast(make_array(1.1, null, 2.2, 3.3), 'FixedSizeList(4, Float64)'), 1.1), + array_remove(arrow_cast(make_array('a', null, 'bc'), 'FixedSizeList(3, Utf8)'), 'a'); +---- +[1, , 3] [, 2.2, 3.3] [, bc] + # TODO: https://github.com/apache/arrow-datafusion/issues/7142 # query # select @@ -3614,12 +3647,24 @@ select array_remove(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8 ---- [[1, 2, 3], [5, 5, 5], [4, 5, 6], [7, 8, 9]] [[1, 3, 2], [2, 3, 4], [5, 3, 1], [1, 3, 2]] +query ?? +select array_remove(arrow_cast(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), 'FixedSizeList(5, List(Int64))'), [4, 5, 6]), + array_remove(arrow_cast(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), 'FixedSizeList(5, List(Int64))'), [2, 3, 4]); +---- +[[1, 2, 3], [5, 5, 5], [4, 5, 6], [7, 8, 9]] [[1, 3, 2], [2, 3, 4], [5, 3, 1], [1, 3, 2]] + # list_remove scalar function #3 (function alias `array_remove`) query ??? select list_remove(make_array(1, 2, 2, 1, 1), 2), list_remove(make_array(1.0, 2.0, 2.0, 1.0, 1.0), 1.0), list_remove(make_array('h', 'e', 'l', 'l', 'o'), 'l'); ---- [1, 2, 1, 1] [2.0, 2.0, 1.0, 1.0] [h, e, l, o] +query ?? +select list_remove(arrow_cast(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), 'FixedSizeList(5, List(Int64))'), [4, 5, 6]), + list_remove(arrow_cast(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), 'FixedSizeList(5, List(Int64))'), [2, 3, 4]); +---- +[[1, 2, 3], [5, 5, 5], [4, 5, 6], [7, 8, 9]] [[1, 3, 2], [2, 3, 4], [5, 3, 1], [1, 3, 2]] + # array_remove scalar function with columns #1 query ? select array_remove(column1, column2) from arrays_with_repeating_elements; @@ -3629,6 +3674,14 @@ select array_remove(column1, column2) from arrays_with_repeating_elements; [7, 7, 8, 7, 9, 7, 8, 7, 7] [11, 12, 10, 11, 12, 10, 11, 12, 10] +query ? +select array_remove(column1, column2) from fixed_arrays_with_repeating_elements; +---- +[1, 1, 3, 2, 2, 1, 3, 2, 3] +[4, 5, 5, 6, 5, 5, 5, 4, 4] +[7, 7, 8, 7, 9, 7, 8, 7, 7] +[11, 12, 10, 11, 12, 10, 11, 12, 10] + # array_remove scalar function with columns #2 (element is list) query ? select array_remove(column1, column2) from nested_arrays_with_repeating_elements; @@ -3638,6 +3691,14 @@ select array_remove(column1, column2) from nested_arrays_with_repeating_elements [[19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] [[31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] +query ? +select array_remove(column1, column2) from fixed_size_nested_arrays_with_repeating_elements; +---- +[[1, 2, 3], [1, 2, 3], [7, 8, 9], [4, 5, 6], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]] +[[10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] +[[19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] +[[31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] + # array_remove scalar function with columns and scalars #1 query ?? select array_remove(make_array(1, 2, 2, 4, 5, 4, 4, 7, 7, 10, 7, 8), column2), array_remove(column1, 1) from arrays_with_repeating_elements; @@ -3647,9 +3708,27 @@ select array_remove(make_array(1, 2, 2, 4, 5, 4, 4, 7, 7, 10, 7, 8), column2), a [1, 2, 2, 4, 5, 4, 4, 7, 10, 7, 8] [7, 7, 7, 8, 7, 9, 7, 8, 7, 7] [1, 2, 2, 4, 5, 4, 4, 7, 7, 7, 8] [10, 11, 12, 10, 11, 12, 10, 11, 12, 10] +query ?? +select array_remove(make_array(1, 2, 2, 4, 5, 4, 4, 7, 7, 10, 7, 8), column2), array_remove(column1, 1) from fixed_arrays_with_repeating_elements; +---- +[1, 2, 4, 5, 4, 4, 7, 7, 10, 7, 8] [2, 1, 3, 2, 2, 1, 3, 2, 3] +[1, 2, 2, 5, 4, 4, 7, 7, 10, 7, 8] [4, 4, 5, 5, 6, 5, 5, 5, 4, 4] +[1, 2, 2, 4, 5, 4, 4, 7, 10, 7, 8] [7, 7, 7, 8, 7, 9, 7, 8, 7, 7] +[1, 2, 2, 4, 5, 4, 4, 7, 7, 7, 8] [10, 11, 12, 10, 11, 12, 10, 11, 12, 10] + # array_remove scalar function with columns and scalars #2 (element is list) query ?? -select array_remove(make_array([1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]), column2), array_remove(column1, make_array(1, 2, 3)) from nested_arrays_with_repeating_elements; +select array_remove(make_array([1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]), column2), + array_remove(column1, make_array(1, 2, 3)) from nested_arrays_with_repeating_elements; +---- +[[1, 2, 3], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]] +[[1, 2, 3], [4, 5, 6], [4, 5, 6], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[10, 11, 12], [10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] +[[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] +[[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24]] [[28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] + +query ?? +select array_remove(make_array([1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]), column2), + array_remove(column1, make_array(1, 2, 3)) from fixed_size_nested_arrays_with_repeating_elements; ---- [[1, 2, 3], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]] [[1, 2, 3], [4, 5, 6], [4, 5, 6], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[10, 11, 12], [10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] @@ -5243,11 +5322,17 @@ drop table arrays_with_repeating_elements; statement ok drop table large_arrays_with_repeating_elements; +statement ok +drop table fixed_arrays_with_repeating_elements; + statement ok drop table nested_arrays_with_repeating_elements; statement ok drop table large_nested_arrays_with_repeating_elements; +statement ok +drop table fixed_size_nested_arrays_with_repeating_elements; + statement ok drop table flatten_table; From b9474ecbbe91c1bdf49952aaa9dd1a8cd2870fc3 Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Sat, 20 Jan 2024 19:48:41 +0800 Subject: [PATCH 09/24] add tests for array_remove_n --- datafusion/sqllogictest/test_files/array.slt | 50 ++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt index 771a5bd267b0..806ae3cf191c 100644 --- a/datafusion/sqllogictest/test_files/array.slt +++ b/datafusion/sqllogictest/test_files/array.slt @@ -3799,18 +3799,35 @@ select array_remove_all(make_array(1, 2, 2, 1, 1), 2), array_remove_all(make_arr ---- [1, 1, 1] [2.0, 2.0] [h, e, o] +query ??? +select array_remove_all(arrow_cast(make_array(1, 2, 2, 1, 1), 'FixedSizeList(5, Int64)'), 2), array_remove_all(arrow_cast(make_array(1.0, 2.0, 2.0, 1.0, 1.0), 'FixedSizeList(5, Float64)'), 1.0), array_remove_all(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'FixedSizeList(5, Utf8)'), 'l'); +---- +[1, 1, 1] [2.0, 2.0] [h, e, o] + # array_remove_all scalar function #2 (element is list) query ?? select array_remove_all(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), [4, 5, 6]), array_remove_all(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), [2, 3, 4]); ---- [[1, 2, 3], [5, 5, 5], [7, 8, 9]] [[1, 3, 2], [5, 3, 1], [1, 3, 2]] +query ?? +select array_remove_all(arrow_cast(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), 'FixedSizeList(5, List(Int64))'), [4, 5, 6]), + array_remove_all(arrow_cast(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), 'FixedSizeList(5, List(Int64))'), [2, 3, 4]); +---- +[[1, 2, 3], [5, 5, 5], [7, 8, 9]] [[1, 3, 2], [5, 3, 1], [1, 3, 2]] + # list_remove_all scalar function #3 (function alias `array_remove_all`) query ??? select list_remove_all(make_array(1, 2, 2, 1, 1), 2), list_remove_all(make_array(1.0, 2.0, 2.0, 1.0, 1.0), 1.0), list_remove_all(make_array('h', 'e', 'l', 'l', 'o'), 'l'); ---- [1, 1, 1] [2.0, 2.0] [h, e, o] +query ?? +select list_remove_all(arrow_cast(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), 'FixedSizeList(5, List(Int64))'), [4, 5, 6]), + list_remove_all(arrow_cast(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), 'FixedSizeList(5, List(Int64))'), [2, 3, 4]); +---- +[[1, 2, 3], [5, 5, 5], [7, 8, 9]] [[1, 3, 2], [5, 3, 1], [1, 3, 2]] + # array_remove_all scalar function with columns #1 query ? select array_remove_all(column1, column2) from arrays_with_repeating_elements; @@ -3820,6 +3837,14 @@ select array_remove_all(column1, column2) from arrays_with_repeating_elements; [8, 9, 8] [11, 12, 11, 12, 11, 12] +query ? +select array_remove_all(column1, column2) from fixed_arrays_with_repeating_elements; +---- +[1, 1, 3, 1, 3, 3] +[5, 5, 6, 5, 5, 5] +[8, 9, 8] +[11, 12, 11, 12, 11, 12] + # array_remove_all scalar function with columns #2 (element is list) query ? select array_remove_all(column1, column2) from nested_arrays_with_repeating_elements; @@ -3829,6 +3854,14 @@ select array_remove_all(column1, column2) from nested_arrays_with_repeating_elem [[22, 23, 24], [25, 26, 27], [22, 23, 24]] [[31, 32, 33], [34, 35, 36], [31, 32, 33], [34, 35, 36], [31, 32, 33], [34, 35, 36]] +query ? +select array_remove_all(column1, column2) from fixed_size_nested_arrays_with_repeating_elements; +---- +[[1, 2, 3], [1, 2, 3], [7, 8, 9], [1, 2, 3], [7, 8, 9], [7, 8, 9]] +[[13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15]] +[[22, 23, 24], [25, 26, 27], [22, 23, 24]] +[[31, 32, 33], [34, 35, 36], [31, 32, 33], [34, 35, 36], [31, 32, 33], [34, 35, 36]] + # array_remove_all scalar function with columns and scalars #1 query ?? select array_remove_all(make_array(1, 2, 2, 4, 5, 4, 4, 7, 7, 10, 7, 8), column2), array_remove_all(column1, 1) from arrays_with_repeating_elements; @@ -3838,6 +3871,14 @@ select array_remove_all(make_array(1, 2, 2, 4, 5, 4, 4, 7, 7, 10, 7, 8), column2 [1, 2, 2, 4, 5, 4, 4, 10, 8] [7, 7, 7, 8, 7, 9, 7, 8, 7, 7] [1, 2, 2, 4, 5, 4, 4, 7, 7, 7, 8] [10, 11, 12, 10, 11, 12, 10, 11, 12, 10] +query ?? +select array_remove_all(make_array(1, 2, 2, 4, 5, 4, 4, 7, 7, 10, 7, 8), column2), array_remove_all(column1, 1) from fixed_arrays_with_repeating_elements; +---- +[1, 4, 5, 4, 4, 7, 7, 10, 7, 8] [2, 3, 2, 2, 3, 2, 3] +[1, 2, 2, 5, 7, 7, 10, 7, 8] [4, 4, 5, 5, 6, 5, 5, 5, 4, 4] +[1, 2, 2, 4, 5, 4, 4, 10, 8] [7, 7, 7, 8, 7, 9, 7, 8, 7, 7] +[1, 2, 2, 4, 5, 4, 4, 7, 7, 7, 8] [10, 11, 12, 10, 11, 12, 10, 11, 12, 10] + # array_remove_all scalar function with columns and scalars #2 (element is list) query ?? select array_remove_all(make_array([1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]), column2), array_remove_all(column1, make_array(1, 2, 3)) from nested_arrays_with_repeating_elements; @@ -3847,6 +3888,15 @@ select array_remove_all(make_array([1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12] [[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [28, 29, 30], [22, 23, 24]] [[19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] [[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24]] [[28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] +query ?? +select array_remove_all(make_array([1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]), column2), + array_remove_all(column1, make_array(1, 2, 3)) from fixed_size_nested_arrays_with_repeating_elements; +---- +[[1, 2, 3], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[4, 5, 6], [7, 8, 9], [4, 5, 6], [4, 5, 6], [7, 8, 9], [4, 5, 6], [7, 8, 9]] +[[1, 2, 3], [4, 5, 6], [4, 5, 6], [13, 14, 15], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[10, 11, 12], [10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] +[[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [28, 29, 30], [22, 23, 24]] [[19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] +[[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24]] [[28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] + ## trim_array (deprecated) ## array_length (aliases: `list_length`) From 597c2629ad59cd586c74be097b3568862b9bf9f1 Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Sat, 20 Jan 2024 21:57:18 +0800 Subject: [PATCH 10/24] add tests for array_has --- .../expr/src/type_coercion/functions.rs | 8 +- datafusion/sqllogictest/test_files/array.slt | 185 ++++++++++++++++-- 2 files changed, 179 insertions(+), 14 deletions(-) diff --git a/datafusion/expr/src/type_coercion/functions.rs b/datafusion/expr/src/type_coercion/functions.rs index 97e2bead5c80..460bdb4b553d 100644 --- a/datafusion/expr/src/type_coercion/functions.rs +++ b/datafusion/expr/src/type_coercion/functions.rs @@ -116,6 +116,12 @@ fn get_valid_types( &new_base_type, ); + // type coercion for nested FixedSizeArray + let elem_type = datafusion_common::utils::coerced_type_with_base_type_only( + elem_type, + &elem_base_type, + ); + // dbg!(&array_type, &elem_type); match array_type { DataType::List(ref field) @@ -126,7 +132,7 @@ fn get_valid_types( { field.data_type() } else { - elem_type + &elem_type }; if is_append { Ok(vec![vec![array_type.clone(), elem_type.clone()]]) diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt index 806ae3cf191c..a53291efb79d 100644 --- a/datafusion/sqllogictest/test_files/array.slt +++ b/datafusion/sqllogictest/test_files/array.slt @@ -77,6 +77,19 @@ AS FROM arrays ; +#TODO: create FixedSizeList with NULL column +statement ok +CREATE TABLE fixed_size_arrays +AS VALUES + (arrow_cast(make_array(make_array(NULL, 2),make_array(3, NULL)), 'FixedSizeList(2, List(Int64))'), arrow_cast(make_array(1.1, 2.2, 3.3), 'FixedSizeList(3, Float64)'), arrow_cast(make_array('L', 'o', 'r', 'e', 'm'), 'FixedSizeList(5, Utf8)')), + (arrow_cast(make_array(make_array(3, 4),make_array(5, 6)), 'FixedSizeList(2, List(Int64))'), arrow_cast(make_array(NULL, 5.5, 6.6), 'FixedSizeList(3, Float64)'), arrow_cast(make_array('i', 'p', NULL, 'u', 'm'), 'FixedSizeList(5, Utf8)')), + (arrow_cast(make_array(make_array(5, 6),make_array(7, 8)), 'FixedSizeList(2, List(Int64))'), arrow_cast(make_array(7.7, 8.8, 9.9), 'FixedSizeList(3, Float64)'), arrow_cast(make_array('d', NULL, 'l', 'o', 'r'), 'FixedSizeList(5, Utf8)')), + (arrow_cast(make_array(make_array(7, NULL),make_array(9, 10)), 'FixedSizeList(2, List(Int64))'), arrow_cast(make_array(10.1, NULL, 12.2), 'FixedSizeList(3, Float64)'), arrow_cast(make_array('s', 'i', 't', 'a', 'b'), 'FixedSizeList(5, Utf8)')), + (arrow_cast(make_array(make_array(7, NULL),make_array(9, 10)), 'FixedSizeList(2, List(Int64))'), arrow_cast(make_array(13.3, 14.4, 15.5), 'FixedSizeList(3, Float64)'), arrow_cast(make_array('a', 'm', 'e', 't', 'x'), 'FixedSizeList(5, Utf8)')), + (arrow_cast(make_array(make_array(11, 12),make_array(13, 14)), 'FixedSizeList(2, List(Int64))'), arrow_cast(make_array(13.3, 14.4, 15.5), 'FixedSizeList(3, Float64)'), arrow_cast(make_array(',','a','b','c','d'), 'FixedSizeList(5, Utf8)')), + (arrow_cast(make_array(make_array(15, 16),make_array(NULL, 18)), 'FixedSizeList(2, List(Int64))'), arrow_cast(make_array(16.6, 17.7, 18.8), 'FixedSizeList(3, Float64)'), arrow_cast(make_array(',','a','b','c','d'), 'FixedSizeList(5, Utf8)')) +; + statement ok CREATE TABLE slices AS VALUES @@ -189,6 +202,13 @@ AS VALUES (make_array(3, 4, 5), 2, make_array(1,2,3,4), make_array(2,5), make_array(2,4,6), make_array(1,3,5)) ; +statement ok +CREATE TABLE fixed_size_array_has_table_1D +AS VALUES + (arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)'), 1, arrow_cast(make_array(1, 2, 3, 4), 'FixedSizeList(4, Int64)'), arrow_cast(make_array(1,3), 'FixedSizeList(2, Int64)'), arrow_cast(make_array(1,3,5), 'FixedSizeList(3, Int64)'), arrow_cast(make_array(2, 4, 6, 8, 1, 3, 5), 'FixedSizeList(7, Int64)')), + (arrow_cast(make_array(3, 4, 5), 'FixedSizeList(3, Int64)'), 2, arrow_cast(make_array(1, 2, 3, 4), 'FixedSizeList(4, Int64)'), arrow_cast(make_array(2,5), 'FixedSizeList(2, Int64)'), arrow_cast(make_array(2,4,6), 'FixedSizeList(3, Int64)'), arrow_cast(make_array(1, 3, 5, 7, 9, 11, 13), 'FixedSizeList(7, Int64)')) +; + statement ok CREATE TABLE array_has_table_1D_Float AS VALUES @@ -196,6 +216,13 @@ AS VALUES (make_array(3.0, 4.0, 5.0), 2.0, make_array(1.0,2.0,3.0,4.0), make_array(2.0,5.0), make_array(2.22, 1.11), make_array(1.11, 3.33)) ; +statement ok +CREATE TABLE fixed_size_array_has_table_1D_Float +AS VALUES + (arrow_cast(make_array(1.0, 2.0, 3.0), 'FixedSizeList(3, Float64)'), 1.0, arrow_cast(make_array(1.0, 2.0, 3.0, 4.0), 'FixedSizeList(4, Float64)'), arrow_cast(make_array(1.0,3.0), 'FixedSizeList(2, Float64)'), arrow_cast(make_array(1.11, 2.22), 'FixedSizeList(2, Float64)'), arrow_cast(make_array(2.22, 3.33), 'FixedSizeList(2, Float64)')), + (arrow_cast(make_array(3.0, 4.0, 5.0), 'FixedSizeList(3, Float64)'), 2.0, arrow_cast(make_array(1.0, 2.0, 3.0, 4.0), 'FixedSizeList(4, Float64)'), arrow_cast(make_array(2.0,5.0), 'FixedSizeList(2, Float64)'), arrow_cast(make_array(2.22, 1.11), 'FixedSizeList(2, Float64)'), arrow_cast(make_array(1.11, 3.33), 'FixedSizeList(2, Float64)')) +; + statement ok CREATE TABLE array_has_table_1D_Boolean AS VALUES @@ -203,6 +230,13 @@ AS VALUES (make_array(false, false, false), false, make_array(true, false, true), make_array(true, true), make_array(true, true), make_array(false,false,true)) ; +statement ok +CREATE TABLE fixed_size_array_has_table_1D_Boolean +AS VALUES + (arrow_cast(make_array(true, true, true), 'FixedSizeList(3, Boolean)'), false, arrow_cast(make_array(true, true, false, true, false), 'FixedSizeList(5, Boolean)'), arrow_cast(make_array(true, false, true), 'FixedSizeList(3, Boolean)'), arrow_cast(make_array(false, true), 'FixedSizeList(2, Boolean)'), arrow_cast(make_array(true, false, true), 'FixedSizeList(3, Boolean)')), + (arrow_cast(make_array(false, false, false), 'FixedSizeList(3, Boolean)'), false, arrow_cast(make_array(true, false, true, true, false), 'FixedSizeList(5, Boolean)'), arrow_cast(make_array(true, true, false), 'FixedSizeList(3, Boolean)'), arrow_cast(make_array(true, true), 'FixedSizeList(2, Boolean)'), arrow_cast(make_array(false,false,true), 'FixedSizeList(3, Boolean)')) +; + statement ok CREATE TABLE array_has_table_1D_UTF8 AS VALUES @@ -210,6 +244,13 @@ AS VALUES (make_array('a', 'bc', 'def'), 'defg', make_array('datafusion', 'rust', 'arrow'), make_array('datafusion', 'rust', 'arrow', 'python'), make_array('rust', 'arrow'), make_array('datafusion', 'rust', 'arrow')) ; +statement ok +CREATE TABLE fixed_size_array_has_table_1D_UTF8 +AS VALUES + (arrow_cast(make_array('a', 'bc', 'def'), 'FixedSizeList(3, Utf8)'), 'bc', arrow_cast(make_array('datafusion', 'rust', 'arrow'), 'FixedSizeList(3, Utf8)'), arrow_cast(make_array('rust', 'arrow', 'datafusion', 'rust'), 'FixedSizeList(4, Utf8)'), arrow_cast(make_array('rust', 'arrow', 'python'), 'FixedSizeList(3, Utf8)'), arrow_cast(make_array('data', 'fusion', 'rust'), 'FixedSizeList(3, Utf8)')), + (arrow_cast(make_array('a', 'bc', 'def'), 'FixedSizeList(3, Utf8)'), 'defg', arrow_cast(make_array('datafusion', 'rust', 'arrow'), 'FixedSizeList(3, Utf8)'), arrow_cast(make_array('datafusion', 'rust', 'arrow', 'python'), 'FixedSizeList(4, Utf8)'), arrow_cast(make_array('rust', 'arrow', 'python'), 'FixedSizeList(3, Utf8)'), arrow_cast(make_array('datafusion', 'rust', 'arrow'), 'FixedSizeList(3, Utf8)')) +; + statement ok CREATE TABLE array_has_table_2D AS VALUES @@ -217,6 +258,13 @@ AS VALUES (make_array([3,4], [5]), make_array(5), make_array([1,2,3,4], [5,6,7], [8,9,10]), make_array([1,2,3], [5,6,7], [8,9,10])) ; +statement ok +CREATE TABLE fixed_size_array_has_table_2D +AS VALUES + (arrow_cast(make_array([1,2], [3,4]), 'FixedSizeList(2, List(Int64))'), arrow_cast(make_array(1,3), 'FixedSizeList(2, Int64)'), arrow_cast(make_array([1,2,3], [4,5], [6,7]), 'FixedSizeList(3, List(Int64))'), arrow_cast(make_array([4,5], [6,7], [1,2]), 'FixedSizeList(3, List(Int64))')), + (arrow_cast(make_array([3,4], [5]), 'FixedSizeList(2, List(Int64))'), arrow_cast(make_array(5, 3), 'FixedSizeList(2, Int64)'), arrow_cast(make_array([1,2,3,4], [5,6,7], [8,9,10]), 'FixedSizeList(3, List(Int64))'), arrow_cast(make_array([1,2,3], [5,6,7], [8,9,10]), 'FixedSizeList(3, List(Int64))')) +; + statement ok CREATE TABLE array_has_table_2D_float AS VALUES @@ -224,6 +272,13 @@ AS VALUES (make_array([1.0, 2.0, 3.0], [1.1, 2.2], [3.3]), make_array([1.0], [1.1, 2.2], [3.3])) ; +statement ok +CREATE TABLE fixed_size_array_has_table_2D_Float +AS VALUES + (arrow_cast(make_array([1.0, 2.0, 3.0], [1.1, 2.2], [3.3]), 'FixedSizeList(3, List(Float64))'), arrow_cast(make_array([1.1, 2.2], [3.3], [4.4]), 'FixedSizeList(3, List(Float64))')), + (arrow_cast(make_array([1.0, 2.0, 3.0], [1.1, 2.2], [3.3]), 'FixedSizeList(3, List(Float64))'), arrow_cast(make_array([1.0], [1.1, 2.2], [3.3]), 'FixedSizeList(3, List(Float64))')) +; + statement ok CREATE TABLE array_has_table_3D AS VALUES @@ -236,6 +291,18 @@ AS VALUES (make_array([[1], [2]], [[2], [3]]), make_array([1], [2])) ; +statement ok +CREATE TABLE fixed_size_array_has_table_3D +AS VALUES + (arrow_cast(make_array([[1,2]], [[3, 4]]), 'FixedSizeList(2, List(List(Int64)))'), arrow_cast(make_array([1], [2]), 'FixedSizeList(2, List(Int64))')), + (arrow_cast(make_array([[1,2]], [[4, 4]]), 'FixedSizeList(2, List(List(Int64)))'), arrow_cast(make_array([1,2], [3, 4]), 'FixedSizeList(2, List(Int64))')), + (arrow_cast(make_array([[1,2]], [[4, 4]]), 'FixedSizeList(2, List(List(Int64)))'), arrow_cast(make_array([1,2,3], [1]), 'FixedSizeList(2, List(Int64))')), + (arrow_cast(make_array([[1], [2]], []), 'FixedSizeList(2, List(List(Int64)))'), arrow_cast(make_array([2], [3]), 'FixedSizeList(2, List(Int64))')), + (arrow_cast(make_array([[1], [2]], []), 'FixedSizeList(2, List(List(Int64)))'), arrow_cast(make_array([1], [2]), 'FixedSizeList(2, List(Int64))')), + (arrow_cast(make_array([[1], [2]], [[2], [3]]), 'FixedSizeList(2, List(List(Int64)))'), arrow_cast(make_array([1], [2]), 'FixedSizeList(2, List(Int64))')), + (arrow_cast(make_array([[1], [2]], [[2], [3]]), 'FixedSizeList(2, List(List(Int64)))'), arrow_cast(make_array([1], [2]), 'FixedSizeList(2, List(Int64))')) +; + statement ok CREATE TABLE array_distinct_table_1D AS VALUES @@ -4251,6 +4318,23 @@ select array_has(arrow_cast(make_array(1,2), 'LargeList(Int64)'), 1), ---- true true true true true false true false true false true false +query BBBBBBBBBBBB +select array_has(arrow_cast(make_array(1,2), 'FixedSizeList(2, Int64)'), 1), + array_has(arrow_cast(make_array(1,2,NULL), 'FixedSizeList(3, Int64)'), 1), + array_has(arrow_cast(make_array([2,3], [3,4]), 'FixedSizeList(2, List(Int64))'), make_array(2,3)), + array_has(arrow_cast(make_array([[1], [2,3]], [[4,5], [6]]), 'FixedSizeList(2, List(List(Int64)))'), make_array([1], [2,3])), + array_has(arrow_cast(make_array([[1], [2,3]], [[4,5], [6]]), 'FixedSizeList(2, List(List(Int64)))'), make_array([4,5], [6])), + array_has(arrow_cast(make_array([[1], [2,3]], [[4,5], [6]]), 'FixedSizeList(2, List(List(Int64)))'), make_array([1])), + array_has(arrow_cast(make_array([[[1]]]), 'FixedSizeList(1, List(List(List(Int64))))'), make_array([[1]])), + array_has(arrow_cast(make_array([[[1]]], [[[1], [2]]]), 'FixedSizeList(2, List(List(List(Int64))))'), make_array([[2]])), + array_has(arrow_cast(make_array([[[1]]], [[[1], [2]]]), 'FixedSizeList(2, List(List(List(Int64))))'), make_array([[1], [2]])), + list_has(arrow_cast(make_array(1,2,3), 'FixedSizeList(3, Int64)'), 4), + array_contains(arrow_cast(make_array(1,2,3), 'FixedSizeList(3, Int64)'), 3), + list_contains(arrow_cast(make_array(1,2,3), 'FixedSizeList(3, Int64)'), 0) +; +---- +true true true true true false true false true false true false + query BBB select array_has(column1, column2), array_has_all(column3, column4), @@ -4269,6 +4353,15 @@ from array_has_table_1D; true true true false false false +query BBB +select array_has(column1, column2), + array_has_all(column3, column4), + array_has_any(column5, column6) +from fixed_size_array_has_table_1D; +---- +true true true +false false false + query BBB select array_has(column1, column2), array_has_all(column3, column4), @@ -4287,6 +4380,15 @@ from array_has_table_1D_Float; true true false false false true +query BBB +select array_has(column1, column2), + array_has_all(column3, column4), + array_has_any(column5, column6) +from fixed_size_array_has_table_1D_Float; +---- +true true true +false false true + query BBB select array_has(column1, column2), array_has_all(column3, column4), @@ -4305,6 +4407,15 @@ from array_has_table_1D_Boolean; false true true true true true +query BBB +select array_has(column1, column2), + array_has_all(column3, column4), + array_has_any(column5, column6) +from fixed_size_array_has_table_1D_Boolean; +---- +false true true +true true true + query BBB select array_has(column1, column2), array_has_all(column3, column4), @@ -4323,6 +4434,13 @@ from array_has_table_1D_UTF8; true true false false false true +query B +select array_has(column1, column2) +from fixed_size_array_has_table_1D_UTF8; +---- +true +false + query BB select array_has(column1, column2), array_has_all(column3, column4) @@ -4339,6 +4457,14 @@ from array_has_table_2D; false true true false +query BB +select array_has(column1, column2), + array_has_all(column3, column4) +from fixed_size_array_has_table_2D; +---- +false false +false false + query B select array_has_all(column1, column2) from array_has_table_2D_float; @@ -4353,6 +4479,13 @@ from array_has_table_2D_float; true false +query B +select array_has_all(column1, column2) +from fixed_size_array_has_table_2D_float; +---- +false +false + query B select array_has(column1, column2) from array_has_table_3D; ---- @@ -4375,6 +4508,17 @@ true false true +query B +select array_has(column1, column2) from fixed_size_array_has_table_3D; +---- +false +false +false +false +true +true +true + query BBBB select array_has(column1, make_array(5, 6)), array_has(column1, make_array(7, NULL)), @@ -4403,6 +4547,21 @@ false true false false false false false false false false false false +query BBBB +select array_has(column1, make_array(5, 6)), + array_has(column1, make_array(7, NULL)), + array_has(column2, 5.5), + array_has(column3, 'o') +from fixed_size_arrays; +---- +false false false true +true false true false +true false false true +false true false false +false true false false +false false false false +false false false false + query BBBBBBBBBBBBB select array_has_all(make_array(1,2,3), make_array(1,3)), array_has_all(make_array(1,2,3), make_array(1,4)), @@ -4440,19 +4599,19 @@ select array_has_all(arrow_cast(make_array(1,2,3), 'LargeList(Int64)'), arrow_ca true false true false false false true true false false true false true query BBBBBBBBBBBBB -select array_has_all(arrow_cast(make_array(1,2,3), 'LargeList(Int64)'), arrow_cast(make_array(1,3), 'LargeList(Int64)')), - array_has_all(arrow_cast(make_array(1,2,3),'LargeList(Int64)'), arrow_cast(make_array(1,4), 'LargeList(Int64)')), - array_has_all(arrow_cast(make_array([1,2], [3,4]), 'LargeList(List(Int64))'), arrow_cast(make_array([1,2]), 'LargeList(List(Int64))')), - array_has_all(arrow_cast(make_array([1,2], [3,4]), 'LargeList(List(Int64))'), arrow_cast(make_array([1,3]), 'LargeList(List(Int64))')), - array_has_all(arrow_cast(make_array([1,2], [3,4]), 'LargeList(List(Int64))'), arrow_cast(make_array([1,2], [3,4], [5,6]), 'LargeList(List(Int64))')), - array_has_all(arrow_cast(make_array([[1,2,3]]), 'LargeList(List(List(Int64)))'), arrow_cast(make_array([[1]]), 'LargeList(List(List(Int64)))')), - array_has_all(arrow_cast(make_array([[1,2,3]]), 'LargeList(List(List(Int64)))'), arrow_cast(make_array([[1,2,3]]), 'LargeList(List(List(Int64)))')), - array_has_any(arrow_cast(make_array(1,2,3),'LargeList(Int64)'), arrow_cast(make_array(1,10,100), 'LargeList(Int64)')), - array_has_any(arrow_cast(make_array(1,2,3),'LargeList(Int64)'), arrow_cast(make_array(10,100),'LargeList(Int64)')), - array_has_any(arrow_cast(make_array([1,2], [3,4]), 'LargeList(List(Int64))'), arrow_cast(make_array([1,10], [10,4]), 'LargeList(List(Int64))')), - array_has_any(arrow_cast(make_array([1,2], [3,4]), 'LargeList(List(Int64))'), arrow_cast(make_array([10,20], [3,4]), 'LargeList(List(Int64))')), - array_has_any(arrow_cast(make_array([[1,2,3]]), 'LargeList(List(List(Int64)))'), arrow_cast(make_array([[1,2,3], [4,5,6]]), 'LargeList(List(List(Int64)))')), - array_has_any(arrow_cast(make_array([[1,2,3]]), 'LargeList(List(List(Int64)))'), arrow_cast(make_array([[1,2,3]], [[4,5,6]]), 'LargeList(List(List(Int64)))')) +select array_has_all(arrow_cast(make_array(1,2,3), 'FixedSizeList(3, Int64)'), arrow_cast(make_array(1, 3), 'FixedSizeList(2, Int64)')), + array_has_all(arrow_cast(make_array(1,2,3),'FixedSizeList(3, Int64)'), arrow_cast(make_array(1, 4), 'FixedSizeList(2, Int64)')), + array_has_all(arrow_cast(make_array([1,2], [3,4]), 'FixedSizeList(2, List(Int64))'), arrow_cast(make_array([1,2]), 'FixedSizeList(1, List(Int64))')), + array_has_all(arrow_cast(make_array([1,2], [3,4]), 'FixedSizeList(2, List(Int64))'), arrow_cast(make_array([1,3]), 'FixedSizeList(1, List(Int64))')), + array_has_all(arrow_cast(make_array([1,2], [3,4]), 'FixedSizeList(2, List(Int64))'), arrow_cast(make_array([1,2], [3,4], [5,6]), 'FixedSizeList(3, List(Int64))')), + array_has_all(arrow_cast(make_array([[1,2,3]]), 'FixedSizeList(1, List(List(Int64)))'), arrow_cast(make_array([[1]]), 'FixedSizeList(1, List(List(Int64)))')), + array_has_all(arrow_cast(make_array([[1,2,3]]), 'FixedSizeList(1, List(List(Int64)))'), arrow_cast(make_array([[1,2,3]]), 'FixedSizeList(1, List(List(Int64)))')), + array_has_any(arrow_cast(make_array(1,2,3),'FixedSizeList(3, Int64)'), arrow_cast(make_array(1,10,100), 'FixedSizeList(3, Int64)')), + array_has_any(arrow_cast(make_array(1,2,3),'FixedSizeList(3, Int64)'), arrow_cast(make_array(10, 100),'FixedSizeList(2, Int64)')), + array_has_any(arrow_cast(make_array([1,2], [3,4]), 'FixedSizeList(2, List(Int64))'), arrow_cast(make_array([1,10], [10,4]), 'FixedSizeList(2, List(Int64))')), + array_has_any(arrow_cast(make_array([1,2], [3,4]), 'FixedSizeList(2, List(Int64))'), arrow_cast(make_array([10,20], [3,4]), 'FixedSizeList(2, List(Int64))')), + array_has_any(arrow_cast(make_array([[1,2,3]]), 'FixedSizeList(1, List(List(Int64)))'), arrow_cast(make_array([[1,2,3], [4,5,6]]), 'FixedSizeList(1, List(List(Int64)))')), + array_has_any(arrow_cast(make_array([[1,2,3]]), 'FixedSizeList(1, List(List(Int64)))'), arrow_cast(make_array([[1,2,3]], [[4,5,6]]), 'FixedSizeList(2, List(List(Int64)))')) ; ---- true false true false false false true true false false true false true From 97dc372155170af51c60cc9b82d2de61f302ca94 Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Sat, 20 Jan 2024 22:13:08 +0800 Subject: [PATCH 11/24] fix comment --- .../expr/src/type_coercion/functions.rs | 12 ++++++++---- datafusion/sqllogictest/test_files/array.slt | 19 ++++++++++--------- 2 files changed, 18 insertions(+), 13 deletions(-) diff --git a/datafusion/expr/src/type_coercion/functions.rs b/datafusion/expr/src/type_coercion/functions.rs index 460bdb4b553d..09273d4d2917 100644 --- a/datafusion/expr/src/type_coercion/functions.rs +++ b/datafusion/expr/src/type_coercion/functions.rs @@ -116,19 +116,23 @@ fn get_valid_types( &new_base_type, ); - // type coercion for nested FixedSizeArray + // type coercion for nested FixedSizeList let elem_type = datafusion_common::utils::coerced_type_with_base_type_only( elem_type, &elem_base_type, ); - // dbg!(&array_type, &elem_type); + let array_dim = datafusion_common::utils::list_ndims(&array_type); + let elem_dim = datafusion_common::utils::list_ndims(&elem_type); + + dbg!(&array_type, &elem_type); match array_type { DataType::List(ref field) | DataType::LargeList(ref field) | DataType::FixedSizeList(ref field, _) => { - let elem_type = if array_base_type.eq(&DataType::Null) - || (elem_base_type.eq(&DataType::Null)) + let elem_type = if (array_base_type.eq(&DataType::Null) + || elem_base_type.eq(&DataType::Null)) + && elem_dim != array_dim { field.data_type() } else { diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt index a53291efb79d..1010894af4d4 100644 --- a/datafusion/sqllogictest/test_files/array.slt +++ b/datafusion/sqllogictest/test_files/array.slt @@ -3388,16 +3388,17 @@ select array_union(arrow_cast([], 'LargeList(Null)'), arrow_cast([], 'LargeList( ---- [] -# array_union scalar function #7 -query ? -select array_union([[null]], []); ----- -[[]] +#TODO: Union should have same dimensions +## array_union scalar function #7 +#query ? +#select array_union([[null]], []); +#---- +#[[]] -query ? -select array_union(arrow_cast([[null]], 'LargeList(List(Null))'), arrow_cast([], 'LargeList(Null)')); ----- -[[]] +#query ? +#select array_union(arrow_cast([[null]], 'LargeList(List(Null))'), arrow_cast([], 'LargeList(Null)')); +#---- +#[[]] # array_union scalar function #8 query ? From b01cca093d0f3c8530754a6e7486063d2cbb6733 Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Sun, 21 Jan 2024 10:26:11 +0800 Subject: [PATCH 12/24] fix comment --- .../expr/src/type_coercion/functions.rs | 1 - datafusion/sqllogictest/test_files/array.slt | 45 ++++++++++++++----- 2 files changed, 34 insertions(+), 12 deletions(-) diff --git a/datafusion/expr/src/type_coercion/functions.rs b/datafusion/expr/src/type_coercion/functions.rs index 09273d4d2917..3e32c0dd46c0 100644 --- a/datafusion/expr/src/type_coercion/functions.rs +++ b/datafusion/expr/src/type_coercion/functions.rs @@ -125,7 +125,6 @@ fn get_valid_types( let array_dim = datafusion_common::utils::list_ndims(&array_type); let elem_dim = datafusion_common::utils::list_ndims(&elem_type); - dbg!(&array_type, &elem_type); match array_type { DataType::List(ref field) | DataType::LargeList(ref field) diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt index 1010894af4d4..8823dddc9785 100644 --- a/datafusion/sqllogictest/test_files/array.slt +++ b/datafusion/sqllogictest/test_files/array.slt @@ -3388,17 +3388,16 @@ select array_union(arrow_cast([], 'LargeList(Null)'), arrow_cast([], 'LargeList( ---- [] -#TODO: Union should have same dimensions -## array_union scalar function #7 -#query ? -#select array_union([[null]], []); -#---- -#[[]] +# array_union scalar function #7 +query ? +select array_union([[null]], []); +---- +[[]] -#query ? -#select array_union(arrow_cast([[null]], 'LargeList(List(Null))'), arrow_cast([], 'LargeList(Null)')); -#---- -#[[]] +query ? +select array_union(arrow_cast([[null]], 'LargeList(List(Null))'), arrow_cast([], 'LargeList(Null)')); +---- +[[]] # array_union scalar function #8 query ? @@ -5442,9 +5441,15 @@ drop table arrays; statement ok drop table large_arrays; +statement ok +drop table fixed_size_arrays; + statement ok drop table slices; +statement ok +drop table fixed_slices; + statement ok drop table arrayspop; @@ -5521,7 +5526,25 @@ statement ok drop table large_array_intersect_table_3D; statement ok -drop table arrays_values_without_nulls; +drop table fixed_size_array_has_table_1D; + +statement ok +drop table fixed_size_array_has_table_1D_Float; + +statement ok +drop table fixed_size_array_has_table_1D_Boolean; + +statement ok +drop table fixed_size_array_has_table_1D_UTF8; + +statement ok +drop table fixed_size_array_has_table_2D; + +statement ok +drop table fixed_size_array_has_table_2D_float; + +statement ok +drop table fixed_size_array_has_table_3D; statement ok drop table arrays_range; From 2ecb193644e042324dc25215117b5e23c9433b68 Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Sun, 21 Jan 2024 10:36:53 +0800 Subject: [PATCH 13/24] add tests for array_positoins --- datafusion/sqllogictest/test_files/array.slt | 73 ++++++++++++++++++++ 1 file changed, 73 insertions(+) diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt index 8823dddc9785..39b6ca0c642e 100644 --- a/datafusion/sqllogictest/test_files/array.slt +++ b/datafusion/sqllogictest/test_files/array.slt @@ -143,6 +143,13 @@ AS FROM nested_arrays ; +statement ok +CREATE TABLE fixed_size_nested_arrays +AS VALUES + (arrow_cast(make_array(make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4), make_array(4, 5, 6)), 'FixedSizeList(6, List(Int64))'), arrow_cast(make_array(7, 8, 9), 'FixedSizeList(3, Int64)'), 2, arrow_cast(make_array([[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]), 'FixedSizeList(2, List(List(Int64)))'), arrow_cast(make_array(11, 12, 13), 'FixedSizeList(3, Int64)')), + (arrow_cast(make_array(make_array(4, 5, 6), make_array(10, 11, 12), make_array(4, 9, 8), make_array(7, 8, 9), make_array(10, 11, 12), make_array(1, 8, 7)), 'FixedSizeList(6, List(Int64))'), arrow_cast(make_array(10, 11, 12), 'FixedSizeList(3, Int64)'), 3, arrow_cast(make_array([[11, 12, 13], [14, 15, 16]], [[17, 18, 19], [20, 21, 22]]), 'FixedSizeList(2, List(List(Int64)))'), arrow_cast(make_array(121, 131, 141), 'FixedSizeList(3, Int64)')) +; + statement ok CREATE TABLE arrays_values AS VALUES @@ -485,6 +492,15 @@ AS SELECT FROM arrays_values_without_nulls ; +statement ok +CREATE TABLE fixed_size_arrays_values_without_nulls +AS VALUES + (arrow_cast(make_array(1, 2, 3, 4, 5, 6, 7, 8, 9, 10), 'FixedSizeList(10, Int64)'), 1, 1, ',', [2,3]), + (arrow_cast(make_array(11, 12, 13, 14, 15, 16, 17, 18, 19, 20), 'FixedSizeList(10, Int64)'), 12, 2, '.', [4,5]), + (arrow_cast(make_array(21, 22, 23, 24, 25, 26, 27, 28, 29, 30), 'FixedSizeList(10, Int64)'), 23, 3, '-', [6,7]), + (arrow_cast(make_array(31, 32, 33, 34, 35, 26, 37, 38, 39, 40), 'FixedSizeList(10, Int64)'), 34, 4, 'ok', [8,9]) +; + statement ok CREATE TABLE arrays_range AS VALUES @@ -2663,6 +2679,11 @@ select array_positions(arrow_cast(['h', 'e', 'l', 'l', 'o'], 'LargeList(Utf8)'), ---- [3, 4] [5] [1, 2, 3] +query ??? +select array_positions(arrow_cast(['h', 'e', 'l', 'l', 'o'], 'FixedSizeList(5, Utf8)'), 'l'), array_positions(arrow_cast([1, 2, 3, 4, 5], 'FixedSizeList(5, Int64)'), 5), array_positions(arrow_cast([1, 1, 1], 'FixedSizeList(3, Int64)'), 1); +---- +[3, 4] [5] [1, 2, 3] + # array_positions scalar function #2 (element is list) query ? select array_positions(make_array([1, 2, 3], [2, 1, 3], [1, 5, 6], [2, 1, 3], [4, 5, 6]), [2, 1, 3]); @@ -2674,6 +2695,11 @@ select array_positions(arrow_cast(make_array([1, 2, 3], [2, 1, 3], [1, 5, 6], [2 ---- [2, 4] +query ? +select array_positions(arrow_cast(make_array([1, 2, 3], [2, 1, 3], [1, 5, 6], [2, 1, 3], [4, 5, 6]), 'FixedSizeList(5, List(Int64))'), [2, 1, 3]); +---- +[2, 4] + # list_positions scalar function #3 (function alias `array_positions`) query ??? select list_positions(['h', 'e', 'l', 'l', 'o'], 'l'), list_positions([1, 2, 3, 4, 5], 5), list_positions([1, 1, 1], 1); @@ -2685,6 +2711,13 @@ select list_positions(arrow_cast(['h', 'e', 'l', 'l', 'o'], 'LargeList(Utf8)'), ---- [3, 4] [5] [1, 2, 3] +query ??? +select list_positions(arrow_cast(['h', 'e', 'l', 'l', 'o'], 'FixedSizeList(5, Utf8)'), 'l'), + list_positions(arrow_cast([1, 2, 3, 4, 5], 'FixedSizeList(5, Int64)'), 5), + list_positions(arrow_cast([1, 1, 1], 'FixedSizeList(3, Int64)'), 1); +---- +[3, 4] [5] [1, 2, 3] + # array_positions with columns #1 query ? select array_positions(column1, column2) from arrays_values_without_nulls; @@ -2702,6 +2735,14 @@ select array_positions(arrow_cast(column1, 'LargeList(Int64)'), column2) from ar [3] [4] +query ? +select array_positions(arrow_cast(column1, 'LargeList(Int64)'), column2) from fixed_size_arrays_values_without_nulls; +---- +[1] +[2] +[3] +[4] + # array_positions with columns #2 (element is list) query ? select array_positions(column1, column2) from nested_arrays; @@ -2715,6 +2756,12 @@ select array_positions(arrow_cast(column1, 'LargeList(List(Int64))'), column2) f [3] [2, 5] +query ? +select array_positions(arrow_cast(column1, 'LargeList(List(Int64))'), column2) from fixed_size_nested_arrays; +---- +[3] +[2, 5] + # array_positions with columns and scalars #1 query ?? select array_positions(column1, 4), array_positions(array[1, 2, 23, 13, 33, 45], column2) from arrays_values_without_nulls; @@ -2732,6 +2779,14 @@ select array_positions(arrow_cast(column1, 'LargeList(Int64)'), 4), array_positi [] [3] [] [] +query ?? +select array_positions(column1, 4), array_positions(array[1, 2, 23, 13, 33, 45], column2) from fixed_size_arrays_values_without_nulls; +---- +[4] [1] +[] [] +[] [3] +[] [] + # array_positions with columns and scalars #2 (element is list) query ?? select array_positions(column1, make_array(4, 5, 6)), array_positions(make_array([1, 2, 3], [11, 12, 13], [4, 5, 6]), column2) from nested_arrays; @@ -2745,6 +2800,12 @@ select array_positions(arrow_cast(column1, 'LargeList(List(Int64))'), make_array [6] [] [1] [] +query ?? +select array_positions(column1, make_array(4, 5, 6)), array_positions(make_array([1, 2, 3], [11, 12, 13], [4, 5, 6]), column2) from fixed_size_nested_arrays; +---- +[6] [] +[1] [] + ## array_replace (aliases: `list_replace`) # array_replace scalar function #1 @@ -5435,6 +5496,9 @@ drop table nested_arrays; statement ok drop table large_nested_arrays; +statement ok +drop table fixed_size_nested_arrays; + statement ok drop table arrays; @@ -5569,3 +5633,12 @@ drop table fixed_size_nested_arrays_with_repeating_elements; statement ok drop table flatten_table; + +statement ok +drop table arrays_values_without_nulls; + +statement ok +drop table large_arrays_values_without_nulls; + +statement ok +drop table fixed_size_arrays_values_without_nulls; From 7e0573bc2bdd96749d43c056cbc30a2281dc1cf8 Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Sun, 21 Jan 2024 10:40:06 +0800 Subject: [PATCH 14/24] test chore --- datafusion/sqllogictest/test_files/arrow_typeof.slt | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/datafusion/sqllogictest/test_files/arrow_typeof.slt b/datafusion/sqllogictest/test_files/arrow_typeof.slt index 740d6b89ec48..8b3bd7eac95d 100644 --- a/datafusion/sqllogictest/test_files/arrow_typeof.slt +++ b/datafusion/sqllogictest/test_files/arrow_typeof.slt @@ -399,6 +399,7 @@ NULL #---- #[1] + query ? select arrow_cast([1], 'FixedSizeList(1, Int64)'); ---- @@ -412,12 +413,12 @@ select arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)'); ---- [1, 2, 3] -query ? -select arrow_cast([1, 2, 3], 'FixedSizeList(3, Int64)'); ----- -[1, 2, 3] - query T select arrow_typeof(arrow_cast(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'), 'FixedSizeList(3, Int64)')); ---- FixedSizeList(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 3) + +query ? +select arrow_cast([1, 2, 3], 'FixedSizeList(3, Int64)'); +---- +[1, 2, 3] From 25e620a03f25ad4889181967d6109c265f2a7089 Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Sun, 21 Jan 2024 10:40:47 +0800 Subject: [PATCH 15/24] test chore --- datafusion/sqllogictest/test_files/arrow_typeof.slt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/sqllogictest/test_files/arrow_typeof.slt b/datafusion/sqllogictest/test_files/arrow_typeof.slt index 8b3bd7eac95d..8e2a091423da 100644 --- a/datafusion/sqllogictest/test_files/arrow_typeof.slt +++ b/datafusion/sqllogictest/test_files/arrow_typeof.slt @@ -421,4 +421,4 @@ FixedSizeList(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0 query ? select arrow_cast([1, 2, 3], 'FixedSizeList(3, Int64)'); ---- -[1, 2, 3] +[1, 2, 3] \ No newline at end of file From 0c08cc96218570c53d40b6bc650faff597dca490 Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Mon, 22 Jan 2024 12:27:30 +0800 Subject: [PATCH 16/24] remove useless logic --- datafusion/expr/src/type_coercion/functions.rs | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/datafusion/expr/src/type_coercion/functions.rs b/datafusion/expr/src/type_coercion/functions.rs index 3e32c0dd46c0..0c5ed861a0b7 100644 --- a/datafusion/expr/src/type_coercion/functions.rs +++ b/datafusion/expr/src/type_coercion/functions.rs @@ -129,14 +129,13 @@ fn get_valid_types( DataType::List(ref field) | DataType::LargeList(ref field) | DataType::FixedSizeList(ref field, _) => { - let elem_type = if (array_base_type.eq(&DataType::Null) - || elem_base_type.eq(&DataType::Null)) - && elem_dim != array_dim - { - field.data_type() - } else { - &elem_type - }; + // for the functions with signature 'array_element(array, int) -> element_type' + let elem_type = + if elem_base_type.eq(&DataType::Null) && elem_dim != array_dim { + field.data_type() + } else { + &elem_type + }; if is_append { Ok(vec![vec![array_type.clone(), elem_type.clone()]]) } else { From 19e322a50cadc976d410300a3e96618bc9abcc26 Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Mon, 22 Jan 2024 12:34:33 +0800 Subject: [PATCH 17/24] refatctor coerced_type_with_base_type_only function --- datafusion/common/src/utils.rs | 41 +++++++++++++--------------------- 1 file changed, 16 insertions(+), 25 deletions(-) diff --git a/datafusion/common/src/utils.rs b/datafusion/common/src/utils.rs index db1ed55dcfd7..59b36dd2b5b4 100644 --- a/datafusion/common/src/utils.rs +++ b/datafusion/common/src/utils.rs @@ -464,24 +464,9 @@ pub fn coerced_type_with_base_type_only( base_type: &DataType, ) -> DataType { match data_type { - DataType::List(field) | DataType::FixedSizeList(field, _) => { - let data_type = match field.data_type() { - // nested type could be different list type - DataType::List(_) - | DataType::FixedSizeList(_, _) - | DataType::LargeList(_) => { - coerced_type_with_base_type_only(field.data_type(), base_type) - } - _ => base_type.to_owned(), - }; - - DataType::List(Arc::new(Field::new( - field.name(), - data_type, - field.is_nullable(), - ))) - } - DataType::LargeList(field) => { + DataType::List(field) + | DataType::FixedSizeList(field, _) + | DataType::LargeList(field) => { let data_type = match field.data_type() { // nested type could be different list type DataType::List(_) @@ -491,14 +476,20 @@ pub fn coerced_type_with_base_type_only( } _ => base_type.to_owned(), }; - - DataType::LargeList(Arc::new(Field::new( - field.name(), - data_type, - field.is_nullable(), - ))) + if matches!(data_type, DataType::LargeList(_)) { + DataType::LargeList(Arc::new(Field::new( + field.name(), + data_type, + field.is_nullable(), + ))) + } else { + DataType::List(Arc::new(Field::new( + field.name(), + data_type, + field.is_nullable(), + ))) + } } - _ => base_type.clone(), } } From 707c0c8ed5561226d49fc0fa137651ba8c923dfb Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Mon, 22 Jan 2024 14:36:08 +0800 Subject: [PATCH 18/24] add null test for array_has --- datafusion/expr/src/built_in_function.rs | 7 +- .../expr/src/type_coercion/functions.rs | 2 +- datafusion/sqllogictest/test_files/array.slt | 137 ++++++++++++------ 3 files changed, 96 insertions(+), 50 deletions(-) diff --git a/datafusion/expr/src/built_in_function.rs b/datafusion/expr/src/built_in_function.rs index cd6ae80164e6..6b459cb36bbd 100644 --- a/datafusion/expr/src/built_in_function.rs +++ b/datafusion/expr/src/built_in_function.rs @@ -964,9 +964,10 @@ impl BuiltinScalarFunction { } BuiltinScalarFunction::ArrayExcept => Signature::any(2, self.volatility()), BuiltinScalarFunction::Flatten => Signature::any(1, self.volatility()), - BuiltinScalarFunction::ArrayHasAll - | BuiltinScalarFunction::ArrayHasAny - | BuiltinScalarFunction::ArrayHas => { + BuiltinScalarFunction::ArrayHasAll | BuiltinScalarFunction::ArrayHasAny => { + Signature::any(2, self.volatility()) + } + BuiltinScalarFunction::ArrayHas => { Signature::array_and_element(self.volatility()) } BuiltinScalarFunction::ArrayLength => { diff --git a/datafusion/expr/src/type_coercion/functions.rs b/datafusion/expr/src/type_coercion/functions.rs index 0c5ed861a0b7..6f36c2cfa8f1 100644 --- a/datafusion/expr/src/type_coercion/functions.rs +++ b/datafusion/expr/src/type_coercion/functions.rs @@ -129,7 +129,7 @@ fn get_valid_types( DataType::List(ref field) | DataType::LargeList(ref field) | DataType::FixedSizeList(ref field, _) => { - // for the functions with signature 'array_element(array, int) -> element_type' + // for the functions with signature 'array_element(array, int)' let elem_type = if elem_base_type.eq(&DataType::Null) && elem_dim != array_dim { field.data_type() diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt index 39b6ca0c642e..48e6d59f49f6 100644 --- a/datafusion/sqllogictest/test_files/array.slt +++ b/datafusion/sqllogictest/test_files/array.slt @@ -4345,6 +4345,21 @@ NULL 1 1 ## array_has/array_has_all/array_has_any +query BB +select array_has([], null), + array_has([1, 2, 3], null); +---- +false false + +#TODO: array_has_all and array_has_any cannot handle NULL +#query BBBB +#select array_has_any([], null), +# array_has_any([1, 2, 3], null), +# array_has_all([], null), +# array_has_all([1, 2, 3], null); +#---- +#false false false false + query BBBBBBBBBBBB select array_has(make_array(1,2), 1), array_has(make_array(1,2,NULL), 1), @@ -4414,14 +4429,21 @@ from array_has_table_1D; true true true false false false -query BBB -select array_has(column1, column2), - array_has_all(column3, column4), - array_has_any(column5, column6) +query B +select array_has(column1, column2) from fixed_size_array_has_table_1D; ---- -true true true -false false false +true +false + +#TODO: array_has_all and array_has_any cannot handle FixedSizeList +#query BB +#select array_has_all(column3, column4), +# array_has_any(column5, column6) +#from fixed_size_array_has_table_1D; +#---- +#true true +#false false query BBB select array_has(column1, column2), @@ -4441,14 +4463,21 @@ from array_has_table_1D_Float; true true false false false true -query BBB -select array_has(column1, column2), - array_has_all(column3, column4), - array_has_any(column5, column6) +query B +select array_has(column1, column2) from fixed_size_array_has_table_1D_Float; ---- -true true true -false false true +true +false + +#TODO: array_has_all and array_has_any cannot handle FixedSizeList +#query BB +#select array_has_all(column3, column4), +# array_has_any(column5, column6) +#from fixed_size_array_has_table_1D_Float; +#---- +#true true +#false true query BBB select array_has(column1, column2), @@ -4468,14 +4497,21 @@ from array_has_table_1D_Boolean; false true true true true true -query BBB -select array_has(column1, column2), - array_has_all(column3, column4), - array_has_any(column5, column6) +query B +select array_has(column1, column2) from fixed_size_array_has_table_1D_Boolean; ---- -false true true -true true true +false +true + +#TODO: array_has_all and array_has_any cannot handle FixedSizeList +#query BB +#select array_has_all(column3, column4), +# array_has_any(column5, column6) +#from fixed_size_array_has_table_1D_Boolean; +#---- +#true true +#true true query BBB select array_has(column1, column2), @@ -4518,13 +4554,20 @@ from array_has_table_2D; false true true false -query BB -select array_has(column1, column2), - array_has_all(column3, column4) +query B +select array_has(arrow_cast(column1, 'LargeList(List(Int64))'), column2) from fixed_size_array_has_table_2D; ---- -false false -false false +false +false + +#TODO: array_has_all and array_has_any cannot handle FixedSizeList +#query B +#select array_has_all(arrow_cast(column3, 'LargeList(List(Int64))'), arrow_cast(column4, 'LargeList(List(Int64))')) +#from fixed_size_array_has_table_2D; +#---- +#true +#false query B select array_has_all(column1, column2) @@ -4540,12 +4583,13 @@ from array_has_table_2D_float; true false -query B -select array_has_all(column1, column2) -from fixed_size_array_has_table_2D_float; ----- -false -false +#TODO: array_has_all and array_has_any cannot handle FixedSizeList +#query B +#select array_has_all(column1, column2) +#from fixed_size_array_has_table_2D_float; +#---- +#false +#false query B select array_has(column1, column2) from array_has_table_3D; @@ -4659,23 +4703,24 @@ select array_has_all(arrow_cast(make_array(1,2,3), 'LargeList(Int64)'), arrow_ca ---- true false true false false false true true false false true false true -query BBBBBBBBBBBBB -select array_has_all(arrow_cast(make_array(1,2,3), 'FixedSizeList(3, Int64)'), arrow_cast(make_array(1, 3), 'FixedSizeList(2, Int64)')), - array_has_all(arrow_cast(make_array(1,2,3),'FixedSizeList(3, Int64)'), arrow_cast(make_array(1, 4), 'FixedSizeList(2, Int64)')), - array_has_all(arrow_cast(make_array([1,2], [3,4]), 'FixedSizeList(2, List(Int64))'), arrow_cast(make_array([1,2]), 'FixedSizeList(1, List(Int64))')), - array_has_all(arrow_cast(make_array([1,2], [3,4]), 'FixedSizeList(2, List(Int64))'), arrow_cast(make_array([1,3]), 'FixedSizeList(1, List(Int64))')), - array_has_all(arrow_cast(make_array([1,2], [3,4]), 'FixedSizeList(2, List(Int64))'), arrow_cast(make_array([1,2], [3,4], [5,6]), 'FixedSizeList(3, List(Int64))')), - array_has_all(arrow_cast(make_array([[1,2,3]]), 'FixedSizeList(1, List(List(Int64)))'), arrow_cast(make_array([[1]]), 'FixedSizeList(1, List(List(Int64)))')), - array_has_all(arrow_cast(make_array([[1,2,3]]), 'FixedSizeList(1, List(List(Int64)))'), arrow_cast(make_array([[1,2,3]]), 'FixedSizeList(1, List(List(Int64)))')), - array_has_any(arrow_cast(make_array(1,2,3),'FixedSizeList(3, Int64)'), arrow_cast(make_array(1,10,100), 'FixedSizeList(3, Int64)')), - array_has_any(arrow_cast(make_array(1,2,3),'FixedSizeList(3, Int64)'), arrow_cast(make_array(10, 100),'FixedSizeList(2, Int64)')), - array_has_any(arrow_cast(make_array([1,2], [3,4]), 'FixedSizeList(2, List(Int64))'), arrow_cast(make_array([1,10], [10,4]), 'FixedSizeList(2, List(Int64))')), - array_has_any(arrow_cast(make_array([1,2], [3,4]), 'FixedSizeList(2, List(Int64))'), arrow_cast(make_array([10,20], [3,4]), 'FixedSizeList(2, List(Int64))')), - array_has_any(arrow_cast(make_array([[1,2,3]]), 'FixedSizeList(1, List(List(Int64)))'), arrow_cast(make_array([[1,2,3], [4,5,6]]), 'FixedSizeList(1, List(List(Int64)))')), - array_has_any(arrow_cast(make_array([[1,2,3]]), 'FixedSizeList(1, List(List(Int64)))'), arrow_cast(make_array([[1,2,3]], [[4,5,6]]), 'FixedSizeList(2, List(List(Int64)))')) -; ----- -true false true false false false true true false false true false true +#TODO: array_has_all and array_has_any cannot handle FixedSizeList +#query BBBBBBBBBBBBB +#select array_has_all(arrow_cast(make_array(1,2,3), 'FixedSizeList(3, Int64)'), arrow_cast(make_array(1, 3), 'FixedSizeList(2, Int64)')), +# array_has_all(arrow_cast(make_array(1,2,3),'FixedSizeList(3, Int64)'), arrow_cast(make_array(1, 4), 'FixedSizeList(2, Int64)')), +# array_has_all(arrow_cast(make_array([1,2], [3,4]), 'FixedSizeList(2, List(Int64))'), arrow_cast(make_array([1,2]), 'FixedSizeList(1, List(Int64))')), +# array_has_all(arrow_cast(make_array([1,2], [3,4]), 'FixedSizeList(2, List(Int64))'), arrow_cast(make_array([1,3]), 'FixedSizeList(1, List(Int64))')), +# array_has_all(arrow_cast(make_array([1,2], [3,4]), 'FixedSizeList(2, List(Int64))'), arrow_cast(make_array([1,2], [3,4], [5,6]), 'FixedSizeList(3, List(Int64))')), +# array_has_all(arrow_cast(make_array([[1,2,3]]), 'FixedSizeList(1, List(List(Int64)))'), arrow_cast(make_array([[1]]), 'FixedSizeList(1, List(List(Int64)))')), +# array_has_all(arrow_cast(make_array([[1,2,3]]), 'FixedSizeList(1, List(List(Int64)))'), arrow_cast(make_array([[1,2,3]]), 'FixedSizeList(1, List(List(Int64)))')), +# array_has_any(arrow_cast(make_array(1,2,3),'FixedSizeList(3, Int64)'), arrow_cast(make_array(1,10,100), 'FixedSizeList(3, Int64)')), +# array_has_any(arrow_cast(make_array(1,2,3),'FixedSizeList(3, Int64)'), arrow_cast(make_array(10, 100),'FixedSizeList(2, Int64)')), +# array_has_any(arrow_cast(make_array([1,2], [3,4]), 'FixedSizeList(2, List(Int64))'), arrow_cast(make_array([1,10], [10,4]), 'FixedSizeList(2, List(Int64))')), +# array_has_any(arrow_cast(make_array([1,2], [3,4]), 'FixedSizeList(2, List(Int64))'), arrow_cast(make_array([10,20], [3,4]), 'FixedSizeList(2, List(Int64))')), +# array_has_any(arrow_cast(make_array([[1,2,3]]), 'FixedSizeList(1, List(List(Int64)))'), arrow_cast(make_array([[1,2,3], [4,5,6]]), 'FixedSizeList(1, List(List(Int64)))')), +# array_has_any(arrow_cast(make_array([[1,2,3]]), 'FixedSizeList(1, List(List(Int64)))'), arrow_cast(make_array([[1,2,3]], [[4,5,6]]), 'FixedSizeList(2, List(List(Int64)))')) +#; +#---- +#true false true false false false true true false false true false true ## array_distinct From acf9df4b113026f73cc7e68418eb23543259ccf6 Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Wed, 24 Jan 2024 19:44:24 +0800 Subject: [PATCH 19/24] refactor: put fixedsizelist in coerce_arguments_for_fun --- datafusion/common/src/utils.rs | 35 +++++++++++++++-- datafusion/expr/src/built_in_function.rs | 5 ++- .../expr/src/type_coercion/functions.rs | 20 +++------- .../optimizer/src/analyzer/type_coercion.rs | 14 +++---- datafusion/sqllogictest/test_files/array.slt | 39 ++++++++++++++++--- 5 files changed, 78 insertions(+), 35 deletions(-) diff --git a/datafusion/common/src/utils.rs b/datafusion/common/src/utils.rs index 59b36dd2b5b4..d609c695d68e 100644 --- a/datafusion/common/src/utils.rs +++ b/datafusion/common/src/utils.rs @@ -467,7 +467,7 @@ pub fn coerced_type_with_base_type_only( DataType::List(field) | DataType::FixedSizeList(field, _) | DataType::LargeList(field) => { - let data_type = match field.data_type() { + let field_type = match field.data_type() { // nested type could be different list type DataType::List(_) | DataType::FixedSizeList(_, _) @@ -479,13 +479,13 @@ pub fn coerced_type_with_base_type_only( if matches!(data_type, DataType::LargeList(_)) { DataType::LargeList(Arc::new(Field::new( field.name(), - data_type, + field_type, field.is_nullable(), ))) } else { DataType::List(Arc::new(Field::new( field.name(), - data_type, + field_type, field.is_nullable(), ))) } @@ -494,6 +494,35 @@ pub fn coerced_type_with_base_type_only( } } +pub fn coerced_fixed_size_list_to_list(data_type: &DataType) -> DataType { + match data_type { + DataType::FixedSizeList(field, _) => { + let field_type = match field.data_type() { + DataType::List(_) + | DataType::FixedSizeList(_, _) + | DataType::LargeList(_) => { + coerced_fixed_size_list_to_list(field.data_type()) + } + _ => field.data_type().to_owned(), + }; + if matches!(data_type, DataType::LargeList(_)) { + DataType::LargeList(Arc::new(Field::new( + field.name(), + field_type, + field.is_nullable(), + ))) + } else { + DataType::List(Arc::new(Field::new( + field.name(), + field_type, + field.is_nullable(), + ))) + } + } + _ => data_type.to_owned(), + } +} + /// Compute the number of dimensions in a list data type. pub fn list_ndims(data_type: &DataType) -> u64 { match data_type { diff --git a/datafusion/expr/src/built_in_function.rs b/datafusion/expr/src/built_in_function.rs index 6b459cb36bbd..85bf9885ebba 100644 --- a/datafusion/expr/src/built_in_function.rs +++ b/datafusion/expr/src/built_in_function.rs @@ -599,8 +599,9 @@ impl BuiltinScalarFunction { } BuiltinScalarFunction::ArrayDistinct => Ok(input_expr_types[0].clone()), BuiltinScalarFunction::ArrayElement => match &input_expr_types[0] { - List(field)|FixedSizeList(field,_) => Ok(field.data_type().clone()), - LargeList(field) => Ok(field.data_type().clone()), + List(field) + | LargeList(field) + | FixedSizeList(field, _) => Ok(field.data_type().clone()), _ => plan_err!( "The {self} function can only accept List, LargeList or FixedSizeList as the first argument" ), diff --git a/datafusion/expr/src/type_coercion/functions.rs b/datafusion/expr/src/type_coercion/functions.rs index 6f36c2cfa8f1..bb1e37f64166 100644 --- a/datafusion/expr/src/type_coercion/functions.rs +++ b/datafusion/expr/src/type_coercion/functions.rs @@ -116,26 +116,16 @@ fn get_valid_types( &new_base_type, ); - // type coercion for nested FixedSizeList - let elem_type = datafusion_common::utils::coerced_type_with_base_type_only( - elem_type, - &elem_base_type, - ); - - let array_dim = datafusion_common::utils::list_ndims(&array_type); - let elem_dim = datafusion_common::utils::list_ndims(&elem_type); - match array_type { DataType::List(ref field) | DataType::LargeList(ref field) | DataType::FixedSizeList(ref field, _) => { // for the functions with signature 'array_element(array, int)' - let elem_type = - if elem_base_type.eq(&DataType::Null) && elem_dim != array_dim { - field.data_type() - } else { - &elem_type - }; + let elem_type = if elem_base_type.eq(&DataType::Null) { + field.data_type() + } else { + elem_type + }; if is_append { Ok(vec![vec![array_type.clone(), elem_type.clone()]]) } else { diff --git a/datafusion/optimizer/src/analyzer/type_coercion.rs b/datafusion/optimizer/src/analyzer/type_coercion.rs index c9ecb2a77055..4a4b3d849f5f 100644 --- a/datafusion/optimizer/src/analyzer/type_coercion.rs +++ b/datafusion/optimizer/src/analyzer/type_coercion.rs @@ -23,6 +23,7 @@ use arrow::datatypes::{DataType, IntervalUnit}; use datafusion_common::config::ConfigOptions; use datafusion_common::tree_node::{RewriteRecursion, TreeNodeRewriter}; +use datafusion_common::utils::coerced_fixed_size_list_to_list; use datafusion_common::{ exec_err, internal_err, plan_datafusion_err, plan_err, DFSchema, DFSchemaRef, DataFusionError, Result, ScalarValue, @@ -592,19 +593,14 @@ fn coerce_arguments_for_fun( let mut expressions: Vec = expressions.to_vec(); - // Cast Fixedsizelist to List for array functions - if *fun == BuiltinScalarFunction::MakeArray { + // coerce the fixed size list to list for all array fucntions + if fun.name().contains("array") { expressions = expressions .into_iter() .map(|expr| { let data_type = expr.get_type(schema).unwrap(); - if let DataType::FixedSizeList(field, _) = data_type { - let field = field.as_ref().clone(); - let to_type = DataType::List(Arc::new(field)); - expr.cast_to(&to_type, schema) - } else { - Ok(expr) - } + let to_type = coerced_fixed_size_list_to_list(&data_type); + expr.cast_to(&to_type, schema) }) .collect::>>()?; } diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt index 48e6d59f49f6..bccb2dfc65f1 100644 --- a/datafusion/sqllogictest/test_files/array.slt +++ b/datafusion/sqllogictest/test_files/array.slt @@ -1056,6 +1056,14 @@ from arrays_values_without_nulls; query error DataFusion error: Error during planning: No function matches the given name and argument types 'array_element\(Int64, Int64\)'. You might need to add explicit type casts.\n\tCandidate functions:\n\tarray_element\(ArrayAndElement\(List, T\)\) select array_element(1, 2); +# array_element with null +query I +select array_element([1, 2], NULL); +---- +NULL + +query error +select array_element(NULL, 2); # array_element scalar function #1 (with positive index) query IT @@ -2534,6 +2542,12 @@ select array_concat(make_array(column3), column1, column2) from arrays_values_v2 ## array_position (aliases: `list_position`, `array_indexof`, `list_indexof`) +## array_position with NULL (follow PostgreSQL) +#query I +#select array_position([1, 2, 3, 4, 5], null), array_position(NULL, 1); +#---- +#NULL NULL + # array_position scalar function #1 query III select array_position(['h', 'e', 'l', 'l', 'o'], 'l'), array_position([1, 2, 3, 4, 5], 5), array_position([1, 1, 1], 1); @@ -2668,6 +2682,12 @@ NULL 1 NULL ## array_positions (aliases: `list_positions`) +# array_position with NULL (follow PostgreSQL) +query ? +select array_positions([1, 2, 3, 4, 5], null); +---- +[] + # array_positions scalar function #1 query ??? select array_positions(['h', 'e', 'l', 'l', 'o'], 'l'), array_positions([1, 2, 3, 4, 5], 5), array_positions([1, 1, 1], 1); @@ -2757,7 +2777,7 @@ select array_positions(arrow_cast(column1, 'LargeList(List(Int64))'), column2) f [2, 5] query ? -select array_positions(arrow_cast(column1, 'LargeList(List(Int64))'), column2) from fixed_size_nested_arrays; +select array_positions(column1, column2) from fixed_size_nested_arrays; ---- [3] [2, 5] @@ -3763,11 +3783,12 @@ select ---- [1, , 3] [, 2.2, 3.3] [, bc] -# TODO: https://github.com/apache/arrow-datafusion/issues/7142 -# query -# select -# array_remove(make_array(1, null, 2), null), -# array_remove(make_array(1, null, 2, null), null); +query ?? +select + array_remove(make_array(1, null, 2), null), + array_remove(make_array(1, null, 2, null), null); +---- +[1, 2] [1, 2, ] # array_remove scalar function #2 (element is list) query ?? @@ -3921,6 +3942,12 @@ select array_remove_n(make_array([1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], ## array_remove_all (aliases: `list_removes`) +# array_remove_all with NULL elements +query ? +select array_remove_all(make_array(1, 2, 2, 1, 1), NULL); +---- +[1, 2, 2, 1, 1] + # array_remove_all scalar function #1 query ??? select array_remove_all(make_array(1, 2, 2, 1, 1), 2), array_remove_all(make_array(1.0, 2.0, 2.0, 1.0, 1.0), 1.0), array_remove_all(make_array('h', 'e', 'l', 'l', 'o'), 'l'); From 9c82e43bb37a3ad66fc005dea5857684fa7cecc6 Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Wed, 24 Jan 2024 19:46:24 +0800 Subject: [PATCH 20/24] chore --- datafusion/expr/src/type_coercion/functions.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/datafusion/expr/src/type_coercion/functions.rs b/datafusion/expr/src/type_coercion/functions.rs index bb1e37f64166..ea324c37c30a 100644 --- a/datafusion/expr/src/type_coercion/functions.rs +++ b/datafusion/expr/src/type_coercion/functions.rs @@ -120,7 +120,6 @@ fn get_valid_types( DataType::List(ref field) | DataType::LargeList(ref field) | DataType::FixedSizeList(ref field, _) => { - // for the functions with signature 'array_element(array, int)' let elem_type = if elem_base_type.eq(&DataType::Null) { field.data_type() } else { From 159648a0ff0442531ffa2d324d8fd1a7d47de1ec Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Fri, 26 Jan 2024 20:25:27 +0800 Subject: [PATCH 21/24] refactor type signature --- datafusion/expr/src/built_in_function.rs | 9 ++-- datafusion/expr/src/signature.rs | 47 ++++++++++++++++--- .../expr/src/type_coercion/functions.rs | 19 +++++--- 3 files changed, 56 insertions(+), 19 deletions(-) diff --git a/datafusion/expr/src/built_in_function.rs b/datafusion/expr/src/built_in_function.rs index 85bf9885ebba..4fef283c56f7 100644 --- a/datafusion/expr/src/built_in_function.rs +++ b/datafusion/expr/src/built_in_function.rs @@ -945,10 +945,9 @@ impl BuiltinScalarFunction { BuiltinScalarFunction::ArraySort => { Signature::variadic_any(self.volatility()) } - BuiltinScalarFunction::ArrayAppend => Signature { - type_signature: ArrayAndElement, - volatility: self.volatility(), - }, + BuiltinScalarFunction::ArrayAppend => { + Signature::array_and_element(self.volatility()) + } BuiltinScalarFunction::MakeArray => { // 0 or more arguments of arbitrary type Signature::one_of(vec![VariadicEqual, Any(0)], self.volatility()) @@ -961,7 +960,7 @@ impl BuiltinScalarFunction { BuiltinScalarFunction::ArrayDims => Signature::any(1, self.volatility()), BuiltinScalarFunction::ArrayEmpty => Signature::any(1, self.volatility()), BuiltinScalarFunction::ArrayElement => { - Signature::array_and_element(self.volatility()) + Signature::array_and_index(self.volatility()) } BuiltinScalarFunction::ArrayExcept => Signature::any(2, self.volatility()), BuiltinScalarFunction::Flatten => Signature::any(1, self.volatility()), diff --git a/datafusion/expr/src/signature.rs b/datafusion/expr/src/signature.rs index 17a777551323..48f4c996cb5d 100644 --- a/datafusion/expr/src/signature.rs +++ b/datafusion/expr/src/signature.rs @@ -116,6 +116,12 @@ pub enum TypeSignature { /// Function `make_array` takes 0 or more arguments with arbitrary types, its `TypeSignature` /// is `OneOf(vec![Any(0), VariadicAny])`. OneOf(Vec), + /// Specifies Signatures for array functions + ArraySignature(ArrayFunctionSignature), +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub enum ArrayFunctionSignature { /// Specialized Signature for ArrayAppend and similar functions /// The first argument should be List/LargeList, and the second argument should be non-list or list. /// The second argument's list dimension should be one dimension less than the first argument's list dimension. @@ -126,6 +132,23 @@ pub enum TypeSignature { /// The first argument should be non-list or list, and the second argument should be List/LargeList. /// The first argument's list dimension should be one dimension less than the second argument's list dimension. ElementAndArray, + ArrayAndIndex, +} + +impl std::fmt::Display for ArrayFunctionSignature { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + ArrayFunctionSignature::ArrayAndElement => { + write!(f, "array, element") + } + ArrayFunctionSignature::ElementAndArray => { + write!(f, "element, array") + } + ArrayFunctionSignature::ArrayAndIndex => { + write!(f, "array, index") + } + } + } } impl TypeSignature { @@ -156,11 +179,8 @@ impl TypeSignature { TypeSignature::OneOf(sigs) => { sigs.iter().flat_map(|s| s.to_string_repr()).collect() } - TypeSignature::ArrayAndElement => { - vec!["ArrayAndElement(List, T)".to_string()] - } - TypeSignature::ElementAndArray => { - vec!["ElementAndArray(T, List)".to_string()] + TypeSignature::ArraySignature(array_signature) => { + vec![array_signature.to_string()] } } } @@ -266,14 +286,27 @@ impl Signature { /// Specialized Signature for ArrayAppend and similar functions pub fn array_and_element(volatility: Volatility) -> Self { Signature { - type_signature: TypeSignature::ArrayAndElement, + type_signature: TypeSignature::ArraySignature( + ArrayFunctionSignature::ArrayAndElement, + ), volatility, } } /// Specialized Signature for ArrayPrepend and similar functions pub fn element_and_array(volatility: Volatility) -> Self { Signature { - type_signature: TypeSignature::ElementAndArray, + type_signature: TypeSignature::ArraySignature( + ArrayFunctionSignature::ElementAndArray, + ), + volatility, + } + } + /// Specialized Signature for ArrayElement and similar functions + pub fn array_and_index(volatility: Volatility) -> Self { + Signature { + type_signature: TypeSignature::ArraySignature( + ArrayFunctionSignature::ArrayAndIndex, + ), volatility, } } diff --git a/datafusion/expr/src/type_coercion/functions.rs b/datafusion/expr/src/type_coercion/functions.rs index ea324c37c30a..e9aa1a813ab1 100644 --- a/datafusion/expr/src/type_coercion/functions.rs +++ b/datafusion/expr/src/type_coercion/functions.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -use crate::signature::TIMEZONE_WILDCARD; +use crate::signature::{ArrayFunctionSignature, TIMEZONE_WILDCARD}; use crate::{Signature, TypeSignature}; use arrow::{ compute::can_cast_types, @@ -166,12 +166,17 @@ fn get_valid_types( } TypeSignature::Exact(valid_types) => vec![valid_types.clone()], - TypeSignature::ArrayAndElement => { - return array_append_or_prepend_valid_types(current_types, true) - } - TypeSignature::ElementAndArray => { - return array_append_or_prepend_valid_types(current_types, false) - } + TypeSignature::ArraySignature(ref function_signature) => match function_signature + { + ArrayFunctionSignature::ArrayAndElement + | ArrayFunctionSignature::ArrayAndIndex => { + return array_append_or_prepend_valid_types(current_types, true) + } + ArrayFunctionSignature::ElementAndArray => { + return array_append_or_prepend_valid_types(current_types, false) + } + }, + TypeSignature::Any(number) => { if current_types.len() != *number { return plan_err!( From 76cede42a902ed859f109058accb917118bf1a05 Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Sat, 27 Jan 2024 11:15:40 +0800 Subject: [PATCH 22/24] add array_and_index function --- .../expr/src/type_coercion/functions.rs | 28 ++++++++++++++----- datafusion/sqllogictest/test_files/array.slt | 14 +++++++--- 2 files changed, 31 insertions(+), 11 deletions(-) diff --git a/datafusion/expr/src/type_coercion/functions.rs b/datafusion/expr/src/type_coercion/functions.rs index e9aa1a813ab1..92b309817dee 100644 --- a/datafusion/expr/src/type_coercion/functions.rs +++ b/datafusion/expr/src/type_coercion/functions.rs @@ -120,11 +120,7 @@ fn get_valid_types( DataType::List(ref field) | DataType::LargeList(ref field) | DataType::FixedSizeList(ref field, _) => { - let elem_type = if elem_base_type.eq(&DataType::Null) { - field.data_type() - } else { - elem_type - }; + let elem_type = field.data_type(); if is_append { Ok(vec![vec![array_type.clone(), elem_type.clone()]]) } else { @@ -134,6 +130,22 @@ fn get_valid_types( _ => Ok(vec![vec![]]), } } + fn array_and_index(current_types: &[DataType]) -> Result>> { + if current_types.len() != 2 { + return Ok(vec![vec![]]); + } + + let array_type = ¤t_types[0]; + + match array_type { + DataType::List(_) + | DataType::LargeList(_) + | DataType::FixedSizeList(_, _) => { + Ok(vec![vec![array_type.clone(), DataType::Int64]]) + } + _ => Ok(vec![vec![]]), + } + } let valid_types = match signature { TypeSignature::Variadic(valid_types) => valid_types .iter() @@ -168,10 +180,12 @@ fn get_valid_types( TypeSignature::Exact(valid_types) => vec![valid_types.clone()], TypeSignature::ArraySignature(ref function_signature) => match function_signature { - ArrayFunctionSignature::ArrayAndElement - | ArrayFunctionSignature::ArrayAndIndex => { + ArrayFunctionSignature::ArrayAndElement => { return array_append_or_prepend_valid_types(current_types, true) } + ArrayFunctionSignature::ArrayAndIndex => { + return array_and_index(current_types) + } ArrayFunctionSignature::ElementAndArray => { return array_append_or_prepend_valid_types(current_types, false) } diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt index bccb2dfc65f1..7ecbcf082bf5 100644 --- a/datafusion/sqllogictest/test_files/array.slt +++ b/datafusion/sqllogictest/test_files/array.slt @@ -1053,7 +1053,7 @@ from arrays_values_without_nulls; ## array_element (aliases: array_extract, list_extract, list_element) # array_element error -query error DataFusion error: Error during planning: No function matches the given name and argument types 'array_element\(Int64, Int64\)'. You might need to add explicit type casts.\n\tCandidate functions:\n\tarray_element\(ArrayAndElement\(List, T\)\) +query error DataFusion error: Error during planning: No function matches the given name and argument types 'array_element\(Int64, Int64\)'. You might need to add explicit type casts.\n\tCandidate functions:\n\tarray_element\(array, index\) select array_element(1, 2); # array_element with null @@ -1114,14 +1114,20 @@ select array_element(arrow_cast(make_array(1, 2, 3, 4, 5), 'FixedSizeList(5, Int NULL NULL # array_element scalar function #4 (with NULL) -query error +query IT select array_element(make_array(1, 2, 3, 4, 5), NULL), array_element(make_array('h', 'e', 'l', 'l', 'o'), NULL); +---- +NULL NULL -query error +query IT select array_element(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), NULL), array_element(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), NULL); +---- +NULL NULL -query error +query IT select array_element(arrow_cast(make_array(1, 2, 3, 4, 5), 'FixedSizeList(5, Int64)'), NULL), array_element(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'FixedSizeList(5, Utf8)'), NULL); +---- +NULL NULL # array_element scalar function #5 (with negative index) query IT From b1d79ba6611a4dee88cb906254472033a433bb8e Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Tue, 30 Jan 2024 12:04:22 +0800 Subject: [PATCH 23/24] put all type coercion in coerce_arguments_for_signature --- datafusion/common/src/utils.rs | 87 +++++++++---------- .../expr/src/type_coercion/functions.rs | 5 +- .../optimizer/src/analyzer/type_coercion.rs | 15 ++-- 3 files changed, 51 insertions(+), 56 deletions(-) diff --git a/datafusion/common/src/utils.rs b/datafusion/common/src/utils.rs index d609c695d68e..d8fbb4000273 100644 --- a/datafusion/common/src/utils.rs +++ b/datafusion/common/src/utils.rs @@ -464,62 +464,53 @@ pub fn coerced_type_with_base_type_only( base_type: &DataType, ) -> DataType { match data_type { - DataType::List(field) - | DataType::FixedSizeList(field, _) - | DataType::LargeList(field) => { - let field_type = match field.data_type() { - // nested type could be different list type - DataType::List(_) - | DataType::FixedSizeList(_, _) - | DataType::LargeList(_) => { - coerced_type_with_base_type_only(field.data_type(), base_type) - } - _ => base_type.to_owned(), - }; - if matches!(data_type, DataType::LargeList(_)) { - DataType::LargeList(Arc::new(Field::new( - field.name(), - field_type, - field.is_nullable(), - ))) - } else { - DataType::List(Arc::new(Field::new( - field.name(), - field_type, - field.is_nullable(), - ))) - } + DataType::List(field) | DataType::FixedSizeList(field, _) => { + let field_type = + coerced_type_with_base_type_only(field.data_type(), base_type); + + DataType::List(Arc::new(Field::new( + field.name(), + field_type, + field.is_nullable(), + ))) + } + DataType::LargeList(field) => { + let field_type = + coerced_type_with_base_type_only(field.data_type(), base_type); + + DataType::LargeList(Arc::new(Field::new( + field.name(), + field_type, + field.is_nullable(), + ))) } + _ => base_type.clone(), } } pub fn coerced_fixed_size_list_to_list(data_type: &DataType) -> DataType { match data_type { - DataType::FixedSizeList(field, _) => { - let field_type = match field.data_type() { - DataType::List(_) - | DataType::FixedSizeList(_, _) - | DataType::LargeList(_) => { - coerced_fixed_size_list_to_list(field.data_type()) - } - _ => field.data_type().to_owned(), - }; - if matches!(data_type, DataType::LargeList(_)) { - DataType::LargeList(Arc::new(Field::new( - field.name(), - field_type, - field.is_nullable(), - ))) - } else { - DataType::List(Arc::new(Field::new( - field.name(), - field_type, - field.is_nullable(), - ))) - } + DataType::List(field) | DataType::FixedSizeList(field, _) => { + let field_type = coerced_fixed_size_list_to_list(field.data_type()); + + DataType::List(Arc::new(Field::new( + field.name(), + field_type, + field.is_nullable(), + ))) } - _ => data_type.to_owned(), + DataType::LargeList(field) => { + let field_type = coerced_fixed_size_list_to_list(field.data_type()); + + DataType::LargeList(Arc::new(Field::new( + field.name(), + field_type, + field.is_nullable(), + ))) + } + + _ => data_type.clone(), } } diff --git a/datafusion/expr/src/type_coercion/functions.rs b/datafusion/expr/src/type_coercion/functions.rs index 92b309817dee..806fdaaa5246 100644 --- a/datafusion/expr/src/type_coercion/functions.rs +++ b/datafusion/expr/src/type_coercion/functions.rs @@ -21,7 +21,7 @@ use arrow::{ compute::can_cast_types, datatypes::{DataType, TimeUnit}, }; -use datafusion_common::utils::list_ndims; +use datafusion_common::utils::{coerced_fixed_size_list_to_list, list_ndims}; use datafusion_common::{ internal_datafusion_err, internal_err, plan_err, DataFusionError, Result, }; @@ -141,7 +141,8 @@ fn get_valid_types( DataType::List(_) | DataType::LargeList(_) | DataType::FixedSizeList(_, _) => { - Ok(vec![vec![array_type.clone(), DataType::Int64]]) + let array_type = coerced_fixed_size_list_to_list(array_type); + Ok(vec![vec![array_type, DataType::Int64]]) } _ => Ok(vec![vec![]]), } diff --git a/datafusion/optimizer/src/analyzer/type_coercion.rs b/datafusion/optimizer/src/analyzer/type_coercion.rs index 4a4b3d849f5f..454ae80758c3 100644 --- a/datafusion/optimizer/src/analyzer/type_coercion.rs +++ b/datafusion/optimizer/src/analyzer/type_coercion.rs @@ -23,7 +23,6 @@ use arrow::datatypes::{DataType, IntervalUnit}; use datafusion_common::config::ConfigOptions; use datafusion_common::tree_node::{RewriteRecursion, TreeNodeRewriter}; -use datafusion_common::utils::coerced_fixed_size_list_to_list; use datafusion_common::{ exec_err, internal_err, plan_datafusion_err, plan_err, DFSchema, DFSchemaRef, DataFusionError, Result, ScalarValue, @@ -590,17 +589,21 @@ fn coerce_arguments_for_fun( if expressions.is_empty() { return Ok(vec![]); } - let mut expressions: Vec = expressions.to_vec(); - // coerce the fixed size list to list for all array fucntions - if fun.name().contains("array") { + // Cast Fixedsizelist to List for array functions + if *fun == BuiltinScalarFunction::MakeArray { expressions = expressions .into_iter() .map(|expr| { let data_type = expr.get_type(schema).unwrap(); - let to_type = coerced_fixed_size_list_to_list(&data_type); - expr.cast_to(&to_type, schema) + if let DataType::FixedSizeList(field, _) = data_type { + let field = field.as_ref().clone(); + let to_type = DataType::List(Arc::new(field)); + expr.cast_to(&to_type, schema) + } else { + Ok(expr) + } }) .collect::>>()?; } From 832b59af2af982a3421b5561407ad46a1d7f97de Mon Sep 17 00:00:00 2001 From: Alex Huang Date: Thu, 1 Feb 2024 09:40:56 +0800 Subject: [PATCH 24/24] add comment Co-authored-by: Andrew Lamb --- datafusion/common/src/utils.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/datafusion/common/src/utils.rs b/datafusion/common/src/utils.rs index d8fbb4000273..a12b71c17dcf 100644 --- a/datafusion/common/src/utils.rs +++ b/datafusion/common/src/utils.rs @@ -489,6 +489,7 @@ pub fn coerced_type_with_base_type_only( } } +/// Recursively coerce and `FixedSizeList` elements to `List` pub fn coerced_fixed_size_list_to_list(data_type: &DataType) -> DataType { match data_type { DataType::List(field) | DataType::FixedSizeList(field, _) => {