diff --git a/_duckdb-stubs/_typing.pyi b/_duckdb-stubs/_typing.pyi index c7399063..48b50eb8 100644 --- a/_duckdb-stubs/_typing.pyi +++ b/_duckdb-stubs/_typing.pyi @@ -133,7 +133,7 @@ Note: We use lowercase here to be able to reuse this `Literal` in the `DTypeIdentifiers` `Literal`. """ -NestedIds: TypeAlias = Literal["list", "struct", "array", "enum", "map", "decimal", "union"] +NestedIds: TypeAlias = Literal["list", "struct", "tuple", "array", "enum", "map", "decimal", "union"] """Identifiers for nested types in `DuckDBPyType.id`.""" PyTypeIds: TypeAlias = Builtins | NestedIds diff --git a/duckdb/experimental/spark/sql/type_utils.py b/duckdb/experimental/spark/sql/type_utils.py index 43d04e7c..65e17662 100644 --- a/duckdb/experimental/spark/sql/type_utils.py +++ b/duckdb/experimental/spark/sql/type_utils.py @@ -94,7 +94,7 @@ def convert_nested_type(dtype: DuckDBPyType) -> DataType: # noqa: D103 "DuckDB union types cannot be directly mapped to PySpark types." ) raise ContributionsAcceptedError(msg) - if id == "struct": + if id == "struct" or id == "tuple": children: list[tuple[str, DuckDBPyType]] = dtype.children fields = [StructField(x[0], convert_type(x[1])) for x in children] return StructType(fields) @@ -105,7 +105,7 @@ def convert_nested_type(dtype: DuckDBPyType) -> DataType: # noqa: D103 def convert_type(dtype: DuckDBPyType) -> DataType: # noqa: D103 id = dtype.id - if id in ["list", "struct", "map", "array"]: + if id in ["list", "struct", "tuple", "map", "array"]: return convert_nested_type(dtype) if id == "decimal": children: list[tuple[str, DuckDBPyType]] = dtype.children diff --git a/external/duckdb b/external/duckdb index 06eb6b68..cb5d12db 160000 --- a/external/duckdb +++ b/external/duckdb @@ -1 +1 @@ -Subproject commit 06eb6b6858c6d568f5fe62855f53c386f13c98c7 +Subproject commit cb5d12dbf2b6d8263fa1af45f3987befa8abbf8c diff --git a/src/duckdb_py/arrow/pyarrow_filter_pushdown.cpp b/src/duckdb_py/arrow/pyarrow_filter_pushdown.cpp index 761ccbf6..80ed2101 100644 --- a/src/duckdb_py/arrow/pyarrow_filter_pushdown.cpp +++ b/src/duckdb_py/arrow/pyarrow_filter_pushdown.cpp @@ -79,10 +79,9 @@ py::object MakePyArrowScalar(const Value &constant, const string &timezone_confi // Cast::Operation for which no specialization exists, and // throws "Unimplemented type for cast (INT64 -> INT64)". Use the type-strong // GetValueUnsafe() which reads `value_.time_ns` from the union - // directly. The `dtime_ns_t.micros` field name is a misnomer — it actually holds - // nanoseconds (see arrow_conversion.cpp:432). + // directly. dtime_ns_t.value holds nanoseconds (see arrow_conversion.cpp:432). py::handle date_type = import_cache.pyarrow.time64(); - return dataset_scalar(scalar(constant.GetValueUnsafe().micros, date_type("ns"))); + return dataset_scalar(scalar(constant.GetValueUnsafe().value, date_type("ns"))); } case LogicalTypeId::TIMESTAMP: { py::handle date_type = import_cache.pyarrow.timestamp(); diff --git a/src/duckdb_py/native/python_conversion.cpp b/src/duckdb_py/native/python_conversion.cpp index 722d85c2..7b0a089a 100644 --- a/src/duckdb_py/native/python_conversion.cpp +++ b/src/duckdb_py/native/python_conversion.cpp @@ -117,7 +117,7 @@ Value TransformDictionaryToStruct(optional_ptr context, const PyD const LogicalType &target_type = LogicalType::UNKNOWN) { auto struct_keys = TransformStructKeys(dict.keys, dict.len, target_type); - bool struct_target = target_type.id() == LogicalTypeId::STRUCT; + bool struct_target = target_type.id() == LogicalTypeId::STRUCT || target_type.id() == LogicalTypeId::TUPLE; if (struct_target && dict.len != StructType::GetChildCount(target_type)) { throw InvalidInputException("We could not convert the object %s to the desired target type (%s)", dict.ToString(), target_type.ToString()); @@ -252,7 +252,7 @@ Value TransformTupleToStruct(optional_ptr context, py::handle ele auto tuple = py::cast(ele); auto size = py::len(tuple); - D_ASSERT(target_type.id() == LogicalTypeId::STRUCT); + D_ASSERT(target_type.id() == LogicalTypeId::STRUCT || target_type.id() == LogicalTypeId::TUPLE); auto child_types = StructType::GetChildTypes(target_type); auto child_count = child_types.size(); if (size != child_count) { @@ -558,7 +558,7 @@ struct PythonValueConversion { static void HandleTuple(optional_ptr context, Value &result, const LogicalType &target_type, py::handle ele, idx_t list_size) { - if (target_type.id() == LogicalTypeId::STRUCT) { + if (target_type.id() == LogicalTypeId::STRUCT || target_type.id() == LogicalTypeId::TUPLE) { result = TransformTupleToStruct(context, ele, target_type); return; } @@ -584,6 +584,7 @@ struct PythonValueConversion { PyDictionary dict = PyDictionary(py::reinterpret_borrow(ele)); switch (target_type.id()) { case LogicalTypeId::STRUCT: + case LogicalTypeId::TUPLE: return TransformDictionaryToStruct(context, dict, target_type); case LogicalTypeId::MAP: return TransformDictionaryToMap(context, dict, target_type); @@ -886,6 +887,7 @@ struct PythonVectorConversion { auto &result_type = result.GetType(); switch (result_type.id()) { case LogicalTypeId::STRUCT: + case LogicalTypeId::TUPLE: ConvertTupleToStruct(context, result, result_offset, ele, tuple_size); break; case LogicalTypeId::ARRAY: @@ -981,6 +983,7 @@ void TransformPythonObjectInternal(optional_ptr context, py::hand auto &conversion_target = OP::ConversionTarget(result, param); switch (conversion_target.id()) { case LogicalTypeId::STRUCT: + case LogicalTypeId::TUPLE: case LogicalTypeId::UNKNOWN: case LogicalTypeId::LIST: case LogicalTypeId::ARRAY: diff --git a/src/duckdb_py/native/python_objects.cpp b/src/duckdb_py/native/python_objects.cpp index d34cf28f..ed3b99d8 100644 --- a/src/duckdb_py/native/python_objects.cpp +++ b/src/duckdb_py/native/python_objects.cpp @@ -461,6 +461,7 @@ static bool KeyIsHashable(const LogicalType &type) { return true; } case LogicalTypeId::STRUCT: + case LogicalTypeId::TUPLE: return false; case LogicalTypeId::SQLNULL: // A SQLNULL key is always NULL, and Python's None is hashable. @@ -603,7 +604,7 @@ py::object PythonObject::FromValue(const Value &val, const LogicalType &type, time = val.GetValueUnsafe(); } else { // Python's datetime doesn't support nanoseconds, we convert to micros. - time = val.GetValueUnsafe().time(); + time = dtime_t(val.GetValueUnsafe().value / 1000); } duckdb::Time::Convert(time, hour, min, sec, usec); try { @@ -692,7 +693,8 @@ py::object PythonObject::FromValue(const Value &val, const LogicalType &type, } return std::move(py_struct); } - case LogicalTypeId::STRUCT: { + case LogicalTypeId::STRUCT: + case LogicalTypeId::TUPLE: { return FromStruct(val, type, client_properties); } case LogicalTypeId::UUID: { diff --git a/src/duckdb_py/numpy/array_wrapper.cpp b/src/duckdb_py/numpy/array_wrapper.cpp index 7cf38f6d..38374e71 100644 --- a/src/duckdb_py/numpy/array_wrapper.cpp +++ b/src/duckdb_py/numpy/array_wrapper.cpp @@ -295,21 +295,10 @@ struct ArrayConvert { }; struct StructConvert { - static py::dict ConvertValue(Vector &input, idx_t chunk_offset, NumpyAppendData &append_data) { - auto &client_properties = append_data.client_properties; - - py::dict py_struct; + // Delegate to FromStruct so unnamed structs / TUPLE values become Python tuples (named ones stay dicts). + static py::object ConvertValue(Vector &input, idx_t chunk_offset, NumpyAppendData &append_data) { auto val = input.GetValue(chunk_offset); - auto &child_types = StructType::GetChildTypes(input.GetType()); - auto &struct_children = StructValue::GetChildren(val); - - for (idx_t i = 0; i < struct_children.size(); i++) { - auto &child_entry = child_types[i]; - auto &child_name = child_entry.first; - auto &child_type = child_entry.second; - py_struct[child_name.c_str()] = PythonObject::FromValue(struct_children[i], child_type, client_properties); - } - return py_struct; + return PythonObject::FromStruct(val, input.GetType(), append_data.client_properties); } }; @@ -714,6 +703,7 @@ void ArrayWrapper::Append(idx_t current_offset, Vector &input, idx_t source_size may_have_null = ConvertNested(append_data); break; case LogicalTypeId::STRUCT: + case LogicalTypeId::TUPLE: may_have_null = ConvertNested(append_data); break; case LogicalTypeId::VARIANT: diff --git a/src/duckdb_py/numpy/raw_array_wrapper.cpp b/src/duckdb_py/numpy/raw_array_wrapper.cpp index df89a0f6..f8cb7195 100644 --- a/src/duckdb_py/numpy/raw_array_wrapper.cpp +++ b/src/duckdb_py/numpy/raw_array_wrapper.cpp @@ -58,6 +58,7 @@ static idx_t GetNumpyTypeWidth(const LogicalType &type) { case LogicalTypeId::LIST: case LogicalTypeId::MAP: case LogicalTypeId::STRUCT: + case LogicalTypeId::TUPLE: case LogicalTypeId::UNION: case LogicalTypeId::UUID: case LogicalTypeId::ARRAY: @@ -124,6 +125,7 @@ string RawArrayWrapper::DuckDBToNumpyDtype(const LogicalType &type) { case LogicalTypeId::LIST: case LogicalTypeId::MAP: case LogicalTypeId::STRUCT: + case LogicalTypeId::TUPLE: case LogicalTypeId::UNION: case LogicalTypeId::UUID: case LogicalTypeId::ARRAY: diff --git a/src/duckdb_py/pyconnection.cpp b/src/duckdb_py/pyconnection.cpp index 6ad90bce..f49deb9b 100644 --- a/src/duckdb_py/pyconnection.cpp +++ b/src/duckdb_py/pyconnection.cpp @@ -424,7 +424,7 @@ std::shared_ptr DuckDBPyConnection::UnregisterUDF(const stri auto &catalog = Catalog::GetCatalog(context, SYSTEM_CATALOG); DropInfo info; info.type = CatalogType::SCALAR_FUNCTION_ENTRY; - info.NameMutable() = Identifier(name); + info.SetName(Identifier(name)); info.allow_drop_internal = true; info.cascade = false; info.if_not_found = OnEntryNotFound::THROW_EXCEPTION; @@ -1664,7 +1664,7 @@ std::unique_ptr DuckDBPyConnection::Table(const string &tname) auto &connection = con.GetConnection(); auto qualified_name = QualifiedName::Parse(tname); if (qualified_name.Schema().empty()) { - qualified_name.SchemaMutable() = DEFAULT_SCHEMA; + qualified_name = QualifiedName(qualified_name.Catalog(), DEFAULT_SCHEMA, qualified_name.Name()); } try { return CreateRelation( diff --git a/src/duckdb_py/typing/pytype.cpp b/src/duckdb_py/typing/pytype.cpp index fef05918..c32ef398 100644 --- a/src/duckdb_py/typing/pytype.cpp +++ b/src/duckdb_py/typing/pytype.cpp @@ -51,7 +51,7 @@ bool DuckDBPyType::EqualsString(const string &type_str) const { std::shared_ptr DuckDBPyType::GetAttribute(const string &name) const { auto name_identifier = Identifier(name); - if (type.id() == LogicalTypeId::STRUCT || type.id() == LogicalTypeId::UNION) { + if (type.id() == LogicalTypeId::STRUCT || type.id() == LogicalTypeId::TUPLE || type.id() == LogicalTypeId::UNION) { auto &children = StructType::GetChildTypes(type); for (idx_t i = 0; i < children.size(); i++) { auto &child = children[i]; @@ -372,6 +372,7 @@ py::list DuckDBPyType::Children() const { switch (type.id()) { case LogicalTypeId::LIST: case LogicalTypeId::STRUCT: + case LogicalTypeId::TUPLE: case LogicalTypeId::UNION: case LogicalTypeId::MAP: case LogicalTypeId::ARRAY: @@ -403,7 +404,7 @@ py::list DuckDBPyType::Children() const { children.append(py::make_tuple("values", strings_list)); return children; } - if (id == LogicalTypeId::STRUCT || id == LogicalTypeId::UNION) { + if (id == LogicalTypeId::STRUCT || id == LogicalTypeId::TUPLE || id == LogicalTypeId::UNION) { auto &struct_children = StructType::GetChildTypes(type); for (idx_t i = 0; i < struct_children.size(); i++) { auto &child = struct_children[i]; diff --git a/tests/fast/api/test_duckdb_query.py b/tests/fast/api/test_duckdb_query.py index 78aea7a7..175de479 100644 --- a/tests/fast/api/test_duckdb_query.py +++ b/tests/fast/api/test_duckdb_query.py @@ -57,7 +57,7 @@ def test_parametrized_explain(self, duckdb_cursor): duckdb_cursor.execute(query, params) results = duckdb_cursor.fetchall() - assert "EXPLAIN_ANALYZE" in results[0][1] + assert "Total Time" in results[0][1] def test_named_param(self): con = duckdb.connect() diff --git a/tests/fast/spark/test_spark_types.py b/tests/fast/spark/test_spark_types.py index af26ec1e..c7402f36 100644 --- a/tests/fast/spark/test_spark_types.py +++ b/tests/fast/spark/test_spark_types.py @@ -135,5 +135,10 @@ def test_all_types_schema(self, spark): ), StructField("map", MapType(StringType(), StringType(), True), True), StructField("time_ns", TimeNSType(), True), + StructField( + "tuple", + StructType([StructField("", IntegerType(), True), StructField("", StringType(), True)]), + True, + ), ] )