diff --git a/src/iceberg/partition_spec.cc b/src/iceberg/partition_spec.cc index ed5aa831a..96287fb87 100644 --- a/src/iceberg/partition_spec.cc +++ b/src/iceberg/partition_spec.cc @@ -192,10 +192,7 @@ Status PartitionSpec::Validate(const Schema& schema, bool allow_missing_fields) partition_field); } const auto& source_type = source_field.value().get().type(); - if (!field_transform->CanTransform(*source_type)) { - return InvalidArgument("Invalid source type {} for transform {}", - source_type->ToString(), field_transform->ToString()); - } + ICEBERG_RETURN_UNEXPECTED(field_transform->Validate(source_type)); // The only valid parent types for a PartitionField are StructTypes. This must be // checked recursively. diff --git a/src/iceberg/sort_order.cc b/src/iceberg/sort_order.cc index b317efb90..04de6b835 100644 --- a/src/iceberg/sort_order.cc +++ b/src/iceberg/sort_order.cc @@ -96,10 +96,7 @@ Status SortOrder::Validate(const Schema& schema) const { const auto& source_type = schema_field.value().get().type(); - if (!field.transform()->CanTransform(*source_type)) { - return InvalidArgument("Invalid source type {} for transform {}", - source_type->ToString(), field.transform()->ToString()); - } + ICEBERG_RETURN_UNEXPECTED(field.transform()->Validate(source_type)); } return {}; } diff --git a/src/iceberg/test/partition_spec_test.cc b/src/iceberg/test/partition_spec_test.cc index e3b86e274..da1724e49 100644 --- a/src/iceberg/test/partition_spec_test.cc +++ b/src/iceberg/test/partition_spec_test.cc @@ -219,6 +219,22 @@ TEST(PartitionSpecTest, InvalidTransformForType) { EXPECT_THAT(result_void, IsOk()); } +TEST(PartitionSpecTest, InvalidParameterizedTransform) { + auto field_id = SchemaField::MakeRequired(1, "id", int32()); + auto field_name = SchemaField::MakeRequired(2, "name", string()); + Schema schema({field_id, field_name}, Schema::kInitialSchemaId); + + PartitionField bucket_field(1, 1000, "id_bucket", Transform::Bucket(0)); + auto bucket_result = PartitionSpec::Make(schema, 1, {bucket_field}, false); + EXPECT_THAT(bucket_result, IsError(ErrorKind::kInvalidArgument)); + EXPECT_THAT(bucket_result, HasErrorMessage("Number of buckets must be positive")); + + PartitionField truncate_field(2, 1000, "name_trunc", Transform::Truncate(0)); + auto truncate_result = PartitionSpec::Make(schema, 1, {truncate_field}, false); + EXPECT_THAT(truncate_result, IsError(ErrorKind::kInvalidArgument)); + EXPECT_THAT(truncate_result, HasErrorMessage("Width must be positive")); +} + TEST(PartitionSpecTest, SourceIdNotFound) { auto field1 = SchemaField::MakeRequired(1, "id", int64()); auto field2 = SchemaField::MakeRequired(2, "ts", timestamp()); diff --git a/src/iceberg/test/sort_order_test.cc b/src/iceberg/test/sort_order_test.cc index d2bef580a..bd23d89c1 100644 --- a/src/iceberg/test/sort_order_test.cc +++ b/src/iceberg/test/sort_order_test.cc @@ -225,6 +225,21 @@ TEST_F(SortOrderTest, MakeInvalidSortOrderTransformCannotApply) { EXPECT_THAT(sort_order, HasErrorMessage("Invalid source type")); } +TEST_F(SortOrderTest, MakeInvalidParameterizedTransform) { + SortField bucket_field(1, Transform::Bucket(0), SortDirection::kAscending, + NullOrder::kFirst); + auto bucket_order = SortOrder::Make(*schema_, 1, std::vector{bucket_field}); + EXPECT_THAT(bucket_order, IsError(ErrorKind::kInvalidArgument)); + EXPECT_THAT(bucket_order, HasErrorMessage("Number of buckets must be positive")); + + SortField truncate_field(2, Transform::Truncate(0), SortDirection::kAscending, + NullOrder::kFirst); + auto truncate_order = + SortOrder::Make(*schema_, 1, std::vector{truncate_field}); + EXPECT_THAT(truncate_order, IsError(ErrorKind::kInvalidArgument)); + EXPECT_THAT(truncate_order, HasErrorMessage("Width must be positive")); +} + TEST_F(SortOrderTest, MakeInvalidSortOrderNonPrimitiveField) { auto struct_field = std::make_unique( 4, "struct_field", diff --git a/src/iceberg/test/transform_test.cc b/src/iceberg/test/transform_test.cc index 8d7cd880d..65a2b5e58 100644 --- a/src/iceberg/test/transform_test.cc +++ b/src/iceberg/test/transform_test.cc @@ -154,10 +154,12 @@ TEST(TransformFromStringTest, PositiveCases) { } TEST(TransformFromStringTest, NegativeCases) { - constexpr std::array invalid_cases = { + constexpr std::array invalid_cases = { "bucket", // missing param "bucket[]", // empty param "bucket[abc]", // invalid number + "bucket[0]", // invalid number of buckets + "truncate[0]", // invalid width "unknown", // unsupported transform "bucket[16", // missing closing bracket "truncate[1]extra" // extra characters diff --git a/src/iceberg/transform.cc b/src/iceberg/transform.cc index 453941c95..cb8852b32 100644 --- a/src/iceberg/transform.cc +++ b/src/iceberg/transform.cc @@ -232,6 +232,18 @@ bool Transform::CanTransform(const Type& source_type) const { std::unreachable(); } +Status Transform::Validate(const std::shared_ptr& source_type) const { + if (!source_type) { + return InvalidArgument("Source type cannot be null for transform {}", ToString()); + } + if (!CanTransform(*source_type)) { + return InvalidArgument("Invalid source type {} for transform {}", + source_type->ToString(), ToString()); + } + ICEBERG_RETURN_UNEXPECTED(Bind(source_type)); + return {}; +} + bool Transform::PreservesOrder() const { switch (transform_type_) { case TransformType::kUnknown: @@ -554,9 +566,15 @@ Result> TransformFromString(std::string_view transfor StringUtils::ParseNumber(match[2].str())); if (type_str == kBucketName) { + if (param <= 0) { + return InvalidArgument("Number of buckets must be positive, got {}", param); + } return Transform::Bucket(param); } if (type_str == kTruncateName) { + if (param <= 0) { + return InvalidArgument("Width must be positive, got {}", param); + } return Transform::Truncate(param); } } diff --git a/src/iceberg/transform.h b/src/iceberg/transform.h index 6b855a74d..405519535 100644 --- a/src/iceberg/transform.h +++ b/src/iceberg/transform.h @@ -159,6 +159,11 @@ class ICEBERG_EXPORT Transform : public util::Formattable { /// \return true if this transform can be applied to the type, false otherwise bool CanTransform(const Type& source_type) const; + /// \brief Validates whether this transform can bind to the given source type. + /// \param source_type The source type to validate against. + /// \return Error status if the transform cannot bind to the source type. + Status Validate(const std::shared_ptr& source_type) const; + /// \brief Whether the transform preserves the order of values (is monotonic). bool PreservesOrder() const;