diff --git a/CHANGELOG.md b/CHANGELOG.md index d5e3abbe2..0a05429f4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,23 @@ # DataFusion Python Changelog +## [36.0.0](https://github.com/apache/arrow-datafusion-python/tree/36.0.0) (2024-03-02) + +**Implemented enhancements:** + +- feat: Add `flatten` array function [#562](https://github.com/apache/arrow-datafusion-python/pull/562) (mobley-trent) + +**Documentation updates:** + +- docs: Add ASF attribution [#580](https://github.com/apache/arrow-datafusion-python/pull/580) (simicd) + +**Merged pull requests:** + +- Allow PyDataFrame to be used from other projects [#582](https://github.com/apache/arrow-datafusion-python/pull/582) (andygrove) +- docs: Add ASF attribution [#580](https://github.com/apache/arrow-datafusion-python/pull/580) (simicd) +- Add array functions [#560](https://github.com/apache/arrow-datafusion-python/pull/560) (ongchi) +- feat: Add `flatten` array function [#562](https://github.com/apache/arrow-datafusion-python/pull/562) (mobley-trent) + ## [35.0.0](https://github.com/apache/arrow-datafusion-python/tree/35.0.0) (2024-01-20) **Merged pull requests:** diff --git a/Cargo.lock b/Cargo.lock index e6aa6050a..31e7bad01 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -107,8 +107,8 @@ dependencies = [ "serde", "serde_json", "snap", - "strum", - "strum_macros", + "strum 0.25.0", + "strum_macros 0.25.3", "thiserror", "typed-builder", "uuid", @@ -544,15 +544,15 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "chrono" -version = "0.4.31" +version = "0.4.34" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f2c685bad3eb3d45a01354cedb7d5faa66194d1d58ba6e267a8de788f79db38" +checksum = "5bc015644b92d5890fab7489e49d21f879d5c990186827d42ec511919404f38b" dependencies = [ "android-tzdata", "iana-time-zone", "num-traits", "serde", - "windows-targets 0.48.5", + "windows-targets 0.52.0", ] [[package]] @@ -583,8 +583,8 @@ version = "7.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7c64043d6c7b7a4c58e39e7efccfdea7b93d885a795d0c054a69dbbf4dd52686" dependencies = [ - "strum", - "strum_macros", + "strum 0.25.0", + "strum_macros 0.25.3", "unicode-width", ] @@ -715,9 +715,9 @@ dependencies = [ [[package]] name = "datafusion" -version = "35.0.0" +version = "36.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4328f5467f76d890fe3f924362dbc3a838c6a733f762b32d87f9e0b7bef5fb49" +checksum = "b2b360b692bf6c6d6e6b6dbaf41a3be0020daeceac0f406aed54c75331e50dbb" dependencies = [ "ahash", "apache-avro", @@ -734,6 +734,8 @@ dependencies = [ "datafusion-common", "datafusion-execution", "datafusion-expr", + "datafusion-functions", + "datafusion-functions-array", "datafusion-optimizer", "datafusion-physical-expr", "datafusion-physical-plan", @@ -765,9 +767,9 @@ dependencies = [ [[package]] name = "datafusion-common" -version = "35.0.0" +version = "36.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d29a7752143b446db4a2cccd9a6517293c6b97e8c39e520ca43ccd07135a4f7e" +checksum = "37f343ccc298f440e25aa38ff82678291a7acc24061c7370ba6c0ff5cc811412" dependencies = [ "ahash", "apache-avro", @@ -787,9 +789,9 @@ dependencies = [ [[package]] name = "datafusion-execution" -version = "35.0.0" +version = "36.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d447650af16e138c31237f53ddaef6dd4f92f0e2d3f2f35d190e16c214ca496" +checksum = "3f9c93043081487e335399a21ebf8295626367a647ac5cb87d41d18afad7d0f7" dependencies = [ "arrow", "chrono", @@ -808,9 +810,9 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "35.0.0" +version = "36.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8d19598e48a498850fb79f97a9719b1f95e7deb64a7a06f93f313e8fa1d524b" +checksum = "e204d89909e678846b6a95f156aafc1ee5b36cb6c9e37ec2e1449b078a38c818" dependencies = [ "ahash", "arrow", @@ -818,15 +820,44 @@ dependencies = [ "datafusion-common", "paste", "sqlparser", - "strum", - "strum_macros", + "strum 0.26.1", + "strum_macros 0.26.1", +] + +[[package]] +name = "datafusion-functions" +version = "36.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "98f1c73f7801b2b8ba2297b3ad78ffcf6c1fc6b8171f502987eb9ad5cb244ee7" +dependencies = [ + "arrow", + "base64", + "datafusion-common", + "datafusion-execution", + "datafusion-expr", + "hex", + "log", +] + +[[package]] +name = "datafusion-functions-array" +version = "36.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42d16a0ddf2c991526f6ffe2f47a72c6da0b7354d6c32411dd20631fe2e38937" +dependencies = [ + "arrow", + "datafusion-common", + "datafusion-execution", + "datafusion-expr", + "log", + "paste", ] [[package]] name = "datafusion-optimizer" -version = "35.0.0" +version = "36.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b7feb0391f1fc75575acb95b74bfd276903dc37a5409fcebe160bc7ddff2010" +checksum = "5ae27e07bf1f04d327be5c2a293470879801ab5535204dc3b16b062fda195496" dependencies = [ "arrow", "async-trait", @@ -842,9 +873,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "35.0.0" +version = "36.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e911bca609c89a54e8f014777449d8290327414d3e10c57a3e3c2122e38878d0" +checksum = "dde620cd9ef76a3bca9c754fb68854bd2349c49f55baf97e08001f9e967f6d6b" dependencies = [ "ahash", "arrow", @@ -852,11 +883,13 @@ dependencies = [ "arrow-buffer", "arrow-ord", "arrow-schema", + "arrow-string", "base64", "blake2", "blake3", "chrono", "datafusion-common", + "datafusion-execution", "datafusion-expr", "half", "hashbrown 0.14.3", @@ -876,9 +909,9 @@ dependencies = [ [[package]] name = "datafusion-physical-plan" -version = "35.0.0" +version = "36.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e96b546b8a02e9c2ab35ac6420d511f12a4701950c1eb2e568c122b4fefb0be3" +checksum = "9a4c75fba9ea99d64b2246cbd2fcae2e6fc973e6616b1015237a616036506dd4" dependencies = [ "ahash", "arrow", @@ -907,12 +940,13 @@ dependencies = [ [[package]] name = "datafusion-python" -version = "35.0.0" +version = "36.0.0" dependencies = [ "async-trait", "datafusion", "datafusion-common", "datafusion-expr", + "datafusion-functions-array", "datafusion-optimizer", "datafusion-sql", "datafusion-substrait", @@ -934,9 +968,9 @@ dependencies = [ [[package]] name = "datafusion-sql" -version = "35.0.0" +version = "36.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d18d36f260bbbd63aafdb55339213a23d540d3419810575850ef0a798a6b768" +checksum = "21474a95c3a62d113599d21b439fa15091b538bac06bd20be0bb2e7d22903c09" dependencies = [ "arrow", "arrow-schema", @@ -948,9 +982,9 @@ dependencies = [ [[package]] name = "datafusion-substrait" -version = "35.0.0" +version = "36.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dad6bef77af3d8a697ae63ffbcb5aa66b74cd08ea93a31e2e757da75b2f1452f" +checksum = "aab89c01ef66a59ec92d2360db63893224b4f7e085e2ee6351e0bb77f88931f0" dependencies = [ "async-recursion", "chrono", @@ -1181,9 +1215,9 @@ checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253" [[package]] name = "git2" -version = "0.18.1" +version = "0.18.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fbf97ba92db08df386e10c8ede66a2a0369bd277090afd8710e19e38de9ec0cd" +checksum = "1b3ba52851e73b46a4c3df1d89343741112003f0f6f13beb0dfac9e457c3fdcd" dependencies = [ "bitflags 2.4.2", "libc", @@ -1557,9 +1591,9 @@ dependencies = [ [[package]] name = "libgit2-sys" -version = "0.16.1+1.7.1" +version = "0.16.2+1.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2a2bb3680b094add03bb3732ec520ece34da31a8cd2d633d1389d0f0fb60d0c" +checksum = "ee4126d8b4ee5c9d9ea891dd875cfdc1e9d0950437179104b183d7d8a74d24e8" dependencies = [ "cc", "libc", @@ -2662,9 +2696,9 @@ checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" [[package]] name = "sqlparser" -version = "0.41.0" +version = "0.43.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5cc2c25a6c66789625ef164b4c7d2e548d627902280c13710d33da8222169964" +checksum = "f95c4bae5aba7cd30bd506f7140026ade63cff5afd778af8854026f9606bf5d4" dependencies = [ "log", "sqlparser_derive", @@ -2692,8 +2726,14 @@ name = "strum" version = "0.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "290d54ea6f91c969195bdbcd7442c8c2a2ba87da8bf60a7ee86a235d4bc1e125" + +[[package]] +name = "strum" +version = "0.26.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "723b93e8addf9aa965ebe2d11da6d7540fa2283fcea14b3371ff055f7ba13f5f" dependencies = [ - "strum_macros", + "strum_macros 0.26.1", ] [[package]] @@ -2709,11 +2749,24 @@ dependencies = [ "syn 2.0.48", ] +[[package]] +name = "strum_macros" +version = "0.26.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a3417fc93d76740d974a01654a09777cb500428cc874ca9f45edfe0c4d4cd18" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "rustversion", + "syn 2.0.48", +] + [[package]] name = "substrait" -version = "0.22.1" +version = "0.24.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5478fbd0313a9b0915a1c0e7ebf15b5fed7d7c6dd7229b4f5e32ce75b10f256a" +checksum = "2c8ffb7a3e7505bb835513e77ebfe67d359e57d684a5972323e3bdefbecc1f25" dependencies = [ "git2", "heck", @@ -2857,9 +2910,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.35.1" +version = "1.36.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c89b4efa943be685f629b149f53829423f8f5531ea21249408e8e2f8671ec104" +checksum = "61285f6515fa018fb2d1e46eb21223fff441ee8db5d0f1435e8ab4f5cdb80931" dependencies = [ "backtrace", "bytes", diff --git a/Cargo.toml b/Cargo.toml index 6c47484b3..f8c6bf9f7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,7 +17,7 @@ [package] name = "datafusion-python" -version = "35.0.0" +version = "36.0.0" homepage = "https://github.com/apache/arrow-datafusion-python" repository = "https://github.com/apache/arrow-datafusion-python" authors = ["Apache Arrow "] @@ -37,12 +37,13 @@ substrait = ["dep:datafusion-substrait"] tokio = { version = "1.35", features = ["macros", "rt", "rt-multi-thread", "sync"] } rand = "0.8" pyo3 = { version = "0.20", features = ["extension-module", "abi3", "abi3-py38"] } -datafusion = { version = "35.0.0", features = ["pyarrow", "avro"] } -datafusion-common = { version = "35.0.0", features = ["pyarrow"] } -datafusion-expr = "35.0.0" -datafusion-optimizer = "35.0.0" -datafusion-sql = "35.0.0" -datafusion-substrait = { version = "35.0.0", optional = true } +datafusion = { version = "36.0.0", features = ["pyarrow", "avro"] } +datafusion-common = { version = "36.0.0", features = ["pyarrow"] } +datafusion-expr = "36.0.0" +datafusion-functions-array = "36.0.0" +datafusion-optimizer = "36.0.0" +datafusion-sql = "36.0.0" +datafusion-substrait = { version = "36.0.0", optional = true } prost = "0.12" prost-types = "0.12" uuid = { version = "1.3", features = ["v4"] } diff --git a/src/common/data_type.rs b/src/common/data_type.rs index 6059768e3..d3203fdcd 100644 --- a/src/common/data_type.rs +++ b/src/common/data_type.rs @@ -290,15 +290,21 @@ impl DataTypeMap { Ok(DataType::Interval(IntervalUnit::MonthDayNano)) } ScalarValue::List(arr) => Ok(arr.data_type().to_owned()), - ScalarValue::Struct(_, fields) => Ok(DataType::Struct(fields.to_owned())), + ScalarValue::Struct(_fields) => Err(py_datafusion_err( + DataFusionError::NotImplemented("ScalarValue::Struct".to_string()), + )), ScalarValue::FixedSizeBinary(size, _) => Ok(DataType::FixedSizeBinary(*size)), ScalarValue::FixedSizeList(_array_ref) => { // The FieldRef was removed from ScalarValue::FixedSizeList in // https://github.com/apache/arrow-datafusion/pull/8221, so we can no // longer convert back to a DataType here - todo!() + Err(py_datafusion_err(DataFusionError::NotImplemented( + "ScalarValue::FixedSizeList".to_string(), + ))) } - ScalarValue::LargeList(_) => todo!(), + ScalarValue::LargeList(_) => Err(py_datafusion_err(DataFusionError::NotImplemented( + "ScalarValue::LargeList".to_string(), + ))), ScalarValue::DurationSecond(_) => Ok(DataType::Duration(TimeUnit::Second)), ScalarValue::DurationMillisecond(_) => Ok(DataType::Duration(TimeUnit::Millisecond)), ScalarValue::DurationMicrosecond(_) => Ok(DataType::Duration(TimeUnit::Microsecond)), diff --git a/src/expr.rs b/src/expr.rs index dbb56ee99..c0e7019f0 100644 --- a/src/expr.rs +++ b/src/expr.rs @@ -32,7 +32,7 @@ use datafusion_expr::{ }; use crate::common::data_type::{DataTypeMap, RexType}; -use crate::errors::{py_runtime_err, py_type_err, DataFusionError}; +use crate::errors::{py_datafusion_err, py_runtime_err, py_type_err, DataFusionError}; use crate::expr::aggregate_expr::PyAggregateFunction; use crate::expr::binary_expr::PyBinaryExpr; use crate::expr::column::PyColumn; @@ -292,6 +292,7 @@ impl PyExpr { | Expr::IsNotFalse(..) | Expr::Placeholder { .. } | Expr::OuterReferenceColumn(_, _) + | Expr::Unnest(_) | Expr::IsNotUnknown(_) => RexType::Call, Expr::ScalarSubquery(..) => RexType::ScalarSubquery, }) @@ -306,49 +307,81 @@ impl PyExpr { /// Extracts the Expr value into a PyObject that can be shared with Python pub fn python_value(&self, py: Python) -> PyResult { match &self.expr { - Expr::Literal(scalar_value) => Ok(match scalar_value { - ScalarValue::Null => todo!(), - ScalarValue::Boolean(v) => v.into_py(py), - ScalarValue::Float32(v) => v.into_py(py), - ScalarValue::Float64(v) => v.into_py(py), - ScalarValue::Decimal128(v, _, _) => v.into_py(py), - ScalarValue::Decimal256(_, _, _) => todo!(), - ScalarValue::Int8(v) => v.into_py(py), - ScalarValue::Int16(v) => v.into_py(py), - ScalarValue::Int32(v) => v.into_py(py), - ScalarValue::Int64(v) => v.into_py(py), - ScalarValue::UInt8(v) => v.into_py(py), - ScalarValue::UInt16(v) => v.into_py(py), - ScalarValue::UInt32(v) => v.into_py(py), - ScalarValue::UInt64(v) => v.into_py(py), - ScalarValue::Utf8(v) => v.clone().into_py(py), - ScalarValue::LargeUtf8(v) => v.clone().into_py(py), - ScalarValue::Binary(v) => v.clone().into_py(py), - ScalarValue::FixedSizeBinary(_, _) => todo!(), - ScalarValue::LargeBinary(v) => v.clone().into_py(py), - ScalarValue::List(_) => todo!(), - ScalarValue::Date32(v) => v.into_py(py), - ScalarValue::Date64(v) => v.into_py(py), - ScalarValue::Time32Second(v) => v.into_py(py), - ScalarValue::Time32Millisecond(v) => v.into_py(py), - ScalarValue::Time64Microsecond(v) => v.into_py(py), - ScalarValue::Time64Nanosecond(v) => v.into_py(py), - ScalarValue::TimestampSecond(v, _) => v.into_py(py), - ScalarValue::TimestampMillisecond(v, _) => v.into_py(py), - ScalarValue::TimestampMicrosecond(v, _) => v.into_py(py), - ScalarValue::TimestampNanosecond(v, _) => v.into_py(py), - ScalarValue::IntervalYearMonth(v) => v.into_py(py), - ScalarValue::IntervalDayTime(v) => v.into_py(py), - ScalarValue::IntervalMonthDayNano(v) => v.into_py(py), - ScalarValue::DurationSecond(v) => v.into_py(py), - ScalarValue::DurationMicrosecond(v) => v.into_py(py), - ScalarValue::DurationNanosecond(v) => v.into_py(py), - ScalarValue::DurationMillisecond(v) => v.into_py(py), - ScalarValue::Struct(_, _) => todo!(), - ScalarValue::Dictionary(_, _) => todo!(), - ScalarValue::FixedSizeList(_) => todo!(), - ScalarValue::LargeList(_) => todo!(), - }), + Expr::Literal(scalar_value) => match scalar_value { + ScalarValue::Null => Err(py_datafusion_err( + datafusion_common::DataFusionError::NotImplemented( + "ScalarValue::Null".to_string(), + ), + )), + ScalarValue::Boolean(v) => Ok(v.into_py(py)), + ScalarValue::Float32(v) => Ok(v.into_py(py)), + ScalarValue::Float64(v) => Ok(v.into_py(py)), + ScalarValue::Decimal128(v, _, _) => Ok(v.into_py(py)), + ScalarValue::Decimal256(_, _, _) => Err(py_datafusion_err( + datafusion_common::DataFusionError::NotImplemented( + "ScalarValue::Decimal256".to_string(), + ), + )), + ScalarValue::Int8(v) => Ok(v.into_py(py)), + ScalarValue::Int16(v) => Ok(v.into_py(py)), + ScalarValue::Int32(v) => Ok(v.into_py(py)), + ScalarValue::Int64(v) => Ok(v.into_py(py)), + ScalarValue::UInt8(v) => Ok(v.into_py(py)), + ScalarValue::UInt16(v) => Ok(v.into_py(py)), + ScalarValue::UInt32(v) => Ok(v.into_py(py)), + ScalarValue::UInt64(v) => Ok(v.into_py(py)), + ScalarValue::Utf8(v) => Ok(v.clone().into_py(py)), + ScalarValue::LargeUtf8(v) => Ok(v.clone().into_py(py)), + ScalarValue::Binary(v) => Ok(v.clone().into_py(py)), + ScalarValue::FixedSizeBinary(_, _) => Err(py_datafusion_err( + datafusion_common::DataFusionError::NotImplemented( + "ScalarValue::FixedSizeBinary".to_string(), + ), + )), + ScalarValue::LargeBinary(v) => Ok(v.clone().into_py(py)), + ScalarValue::List(_) => Err(py_datafusion_err( + datafusion_common::DataFusionError::NotImplemented( + "ScalarValue::List".to_string(), + ), + )), + ScalarValue::Date32(v) => Ok(v.into_py(py)), + ScalarValue::Date64(v) => Ok(v.into_py(py)), + ScalarValue::Time32Second(v) => Ok(v.into_py(py)), + ScalarValue::Time32Millisecond(v) => Ok(v.into_py(py)), + ScalarValue::Time64Microsecond(v) => Ok(v.into_py(py)), + ScalarValue::Time64Nanosecond(v) => Ok(v.into_py(py)), + ScalarValue::TimestampSecond(v, _) => Ok(v.into_py(py)), + ScalarValue::TimestampMillisecond(v, _) => Ok(v.into_py(py)), + ScalarValue::TimestampMicrosecond(v, _) => Ok(v.into_py(py)), + ScalarValue::TimestampNanosecond(v, _) => Ok(v.into_py(py)), + ScalarValue::IntervalYearMonth(v) => Ok(v.into_py(py)), + ScalarValue::IntervalDayTime(v) => Ok(v.into_py(py)), + ScalarValue::IntervalMonthDayNano(v) => Ok(v.into_py(py)), + ScalarValue::DurationSecond(v) => Ok(v.into_py(py)), + ScalarValue::DurationMicrosecond(v) => Ok(v.into_py(py)), + ScalarValue::DurationNanosecond(v) => Ok(v.into_py(py)), + ScalarValue::DurationMillisecond(v) => Ok(v.into_py(py)), + ScalarValue::Struct(_) => Err(py_datafusion_err( + datafusion_common::DataFusionError::NotImplemented( + "ScalarValue::Struct".to_string(), + ), + )), + ScalarValue::Dictionary(_, _) => Err(py_datafusion_err( + datafusion_common::DataFusionError::NotImplemented( + "ScalarValue::Dictionary".to_string(), + ), + )), + ScalarValue::FixedSizeList(_) => Err(py_datafusion_err( + datafusion_common::DataFusionError::NotImplemented( + "ScalarValue::FixedSizeList".to_string(), + ), + )), + ScalarValue::LargeList(_) => Err(py_datafusion_err( + datafusion_common::DataFusionError::NotImplemented( + "ScalarValue::LargeList".to_string(), + ), + )), + }, _ => Err(py_type_err(format!( "Non Expr::Literal encountered in types: {:?}", &self.expr @@ -455,6 +488,7 @@ impl PyExpr { // Currently un-support/implemented Expr types for Rex Call operations Expr::GroupingSet(..) + | Expr::Unnest(_) | Expr::OuterReferenceColumn(_, _) | Expr::Wildcard { .. } | Expr::ScalarSubquery(..) @@ -573,7 +607,11 @@ impl PyExpr { | Operator::RegexMatch | Operator::RegexIMatch | Operator::RegexNotMatch - | Operator::RegexNotIMatch => DataTypeMap::map_from_arrow_type(&DataType::Boolean), + | Operator::RegexNotIMatch + | Operator::LikeMatch + | Operator::ILikeMatch + | Operator::NotLikeMatch + | Operator::NotILikeMatch => DataTypeMap::map_from_arrow_type(&DataType::Boolean), Operator::Plus | Operator::Minus | Operator::Multiply | Operator::Modulo => { DataTypeMap::map_from_arrow_type(&DataType::Int64) } @@ -584,7 +622,9 @@ impl PyExpr { | Operator::BitwiseXor | Operator::BitwiseAnd | Operator::BitwiseOr => DataTypeMap::map_from_arrow_type(&DataType::Binary), - Operator::AtArrow | Operator::ArrowAt => todo!(), + Operator::AtArrow | Operator::ArrowAt => { + Err(py_type_err(format!("Unsupported expr: ${op}"))) + } }, Expr::Cast(Cast { expr: _, data_type }) => DataTypeMap::map_from_arrow_type(data_type), Expr::Literal(scalar_value) => DataTypeMap::map_from_scalar_value(scalar_value), diff --git a/src/functions.rs b/src/functions.rs index 5a558de0e..757fb31d1 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -23,6 +23,7 @@ use crate::expr::conditional_expr::PyCaseBuilder; use crate::expr::window::PyWindowFrame; use crate::expr::PyExpr; use datafusion::execution::FunctionRegistry; +use datafusion::functions; use datafusion_common::{Column, TableReference}; use datafusion_expr::expr::Alias; use datafusion_expr::{ @@ -34,6 +35,49 @@ use datafusion_expr::{ lit, BuiltinScalarFunction, Expr, WindowFunctionDefinition, }; +#[pyfunction] +pub fn isnan(expr: PyExpr) -> PyExpr { + functions::expr_fn::isnan(expr.into()).into() +} + +#[pyfunction] +pub fn nullif(expr1: PyExpr, expr2: PyExpr) -> PyExpr { + functions::expr_fn::nullif(expr1.into(), expr2.into()).into() +} + +#[pyfunction] +pub fn encode(input: PyExpr, encoding: PyExpr) -> PyExpr { + functions::expr_fn::encode(input.into(), encoding.into()).into() +} + +#[pyfunction] +pub fn decode(input: PyExpr, encoding: PyExpr) -> PyExpr { + functions::expr_fn::decode(input.into(), encoding.into()).into() +} + +#[pyfunction] +pub fn array_to_string(expr: PyExpr, delim: PyExpr) -> PyExpr { + datafusion_functions_array::expr_fn::array_to_string(expr.into(), delim.into()).into() +} + +#[pyfunction] +pub fn array_join(expr: PyExpr, delim: PyExpr) -> PyExpr { + // alias for array_to_string + array_to_string(expr, delim) +} + +#[pyfunction] +pub fn list_to_string(expr: PyExpr, delim: PyExpr) -> PyExpr { + // alias for array_to_string + array_to_string(expr, delim) +} + +#[pyfunction] +pub fn list_join(expr: PyExpr, delim: PyExpr) -> PyExpr { + // alias for array_to_string + array_to_string(expr, delim) +} + #[pyfunction] fn in_list(expr: PyExpr, value: Vec, negated: bool) -> PyExpr { datafusion_expr::in_list( @@ -252,7 +296,6 @@ scalar_function!(factorial, Factorial); scalar_function!(floor, Floor); scalar_function!(gcd, Gcd); scalar_function!(initcap, InitCap, "Converts the first letter of each word to upper case and the rest to lower case. Words are sequences of alphanumeric characters separated by non-alphanumeric characters."); -scalar_function!(isnan, Isnan); scalar_function!(iszero, Iszero); scalar_function!(lcm, Lcm); scalar_function!(left, Left, "Returns first n characters in the string, or when n is negative, returns all but last |n| characters."); @@ -348,15 +391,11 @@ scalar_function!(trunc, Trunc); scalar_function!(upper, Upper, "Converts the string to all upper case."); scalar_function!(make_array, MakeArray); scalar_function!(array, MakeArray); -scalar_function!(nullif, NullIf); scalar_function!(uuid, Uuid); scalar_function!(r#struct, Struct); // Use raw identifier since struct is a keyword scalar_function!(from_unixtime, FromUnixtime); scalar_function!(arrow_typeof, ArrowTypeof); scalar_function!(random, Random); -//Binary String Functions -scalar_function!(encode, Encode); -scalar_function!(decode, Decode); // Array Functions scalar_function!(array_append, ArrayAppend); @@ -382,10 +421,6 @@ scalar_function!(list_position, ArrayPosition); scalar_function!(list_indexof, ArrayPosition); scalar_function!(array_positions, ArrayPositions); scalar_function!(list_positions, ArrayPositions); -scalar_function!(array_to_string, ArrayToString); -scalar_function!(array_join, ArrayToString); -scalar_function!(list_to_string, ArrayToString); -scalar_function!(list_join, ArrayToString); scalar_function!(array_ndims, ArrayNdims); scalar_function!(list_ndims, ArrayNdims); scalar_function!(array_prepend, ArrayPrepend); diff --git a/src/udaf.rs b/src/udaf.rs index 0e7a8deab..9aea761cd 100644 --- a/src/udaf.rs +++ b/src/udaf.rs @@ -41,12 +41,12 @@ impl RustAccumulator { } impl Accumulator for RustAccumulator { - fn state(&self) -> Result> { + fn state(&mut self) -> Result> { Python::with_gil(|py| self.accum.as_ref(py).call_method0("state")?.extract()) .map_err(|e| DataFusionError::Execution(format!("{e}"))) } - fn evaluate(&self) -> Result { + fn evaluate(&mut self) -> Result { Python::with_gil(|py| self.accum.as_ref(py).call_method0("evaluate")?.extract()) .map_err(|e| DataFusionError::Execution(format!("{e}"))) }