diff --git a/src/iceberg/json_serde.cc b/src/iceberg/json_serde.cc index c72b7da57..4552db883 100644 --- a/src/iceberg/json_serde.cc +++ b/src/iceberg/json_serde.cc @@ -27,6 +27,8 @@ #include #include "iceberg/constants.h" +#include "iceberg/expression/json_serde_internal.h" +#include "iceberg/expression/literal.h" #include "iceberg/json_serde_internal.h" #include "iceberg/name_mapping.h" #include "iceberg/partition_field.h" @@ -298,6 +300,15 @@ nlohmann::json ToJson(const SchemaField& field) { if (!field.doc().empty()) { json[kDoc] = field.doc(); } + // Defaults are validated to be primitive literals matching the field type, so + // single-value serialization cannot fail here. + if (field.initial_default().has_value()) { + ICEBERG_ASSIGN_OR_THROW(json[kInitialDefault], + ToJson(field.initial_default()->get())); + } + if (field.write_default().has_value()) { + ICEBERG_ASSIGN_OR_THROW(json[kWriteDefault], ToJson(field.write_default()->get())); + } return json; } @@ -310,7 +321,6 @@ nlohmann::json ToJson(const Type& type) { nlohmann::json fields_json = nlohmann::json::array(); for (const auto& field : struct_type.fields()) { fields_json.push_back(ToJson(field)); - // TODO(gangwu): add default values } json[kFields] = fields_json; return json; @@ -552,9 +562,27 @@ Result> FieldFromJson(const nlohmann::json& json) { ICEBERG_ASSIGN_OR_RAISE(auto name, GetJsonValue(json, kName)); ICEBERG_ASSIGN_OR_RAISE(auto required, GetJsonValue(json, kRequired)); ICEBERG_ASSIGN_OR_RAISE(auto doc, GetJsonValueOrDefault(json, kDoc)); + ICEBERG_ASSIGN_OR_RAISE(std::optional initial_default_json, + GetJsonValueOptional(json, kInitialDefault)); + ICEBERG_ASSIGN_OR_RAISE(std::optional write_default_json, + GetJsonValueOptional(json, kWriteDefault)); + + std::shared_ptr initial_default; + if (initial_default_json.has_value()) { + ICEBERG_ASSIGN_OR_RAISE(Literal literal, + LiteralFromJson(*initial_default_json, type.get())); + initial_default = std::make_shared(std::move(literal)); + } + std::shared_ptr write_default; + if (write_default_json.has_value()) { + ICEBERG_ASSIGN_OR_RAISE(Literal literal, + LiteralFromJson(*write_default_json, type.get())); + write_default = std::make_shared(std::move(literal)); + } return std::make_unique(field_id, std::move(name), std::move(type), - !required, doc); + !required, doc, std::move(initial_default), + std::move(write_default)); } Result> SchemaFromJson(const nlohmann::json& json) { diff --git a/src/iceberg/schema.cc b/src/iceberg/schema.cc index fcac43c78..d6347251c 100644 --- a/src/iceberg/schema.cc +++ b/src/iceberg/schema.cc @@ -116,9 +116,15 @@ std::shared_ptr ReassignTypeIds(const std::shared_ptr& type, SchemaField ReassignField(const SchemaField& field, int32_t new_id, const Schema::GetId& get_id, Schema::IdMap& ids_to_reassigned, Schema::IdMap& ids_to_original) { - return {new_id, std::string(field.name()), + // Reassigning IDs only rewrites the field ID and nested type IDs; share the field's + // (immutable) default values rather than copying them. + return {new_id, + std::string(field.name()), ReassignTypeIds(field.type(), get_id, ids_to_reassigned, ids_to_original), - field.optional(), std::string(field.doc())}; + field.optional(), + std::string(field.doc()), + field.initial_default_ptr(), + field.write_default_ptr()}; } std::vector ReassignIds(std::vector fields, @@ -447,7 +453,21 @@ Status Schema::Validate(int32_t format_version) const { } } - // TODO(GuoTao.yu): Check default values when they are supported + // Only the initial-default is gated on format version: it changes how existing + // data files are read (rows written before the column existed materialize this + // value), so it requires the v3 reader contract. A write-default only affects + // values written going forward and does not reinterpret existing data. + if (field.initial_default().has_value() && + format_version < TableMetadata::kMinFormatVersionDefaultValues) { + return InvalidSchema( + "Invalid initial default for {}: non-null default ({}) is not supported " + "until v{}", + field.name(), field.initial_default()->get(), + TableMetadata::kMinFormatVersionDefaultValues); + } + if (field.initial_default().has_value() || field.write_default().has_value()) { + ICEBERG_RETURN_UNEXPECTED(field.Validate()); + } } return {}; diff --git a/src/iceberg/schema_field.cc b/src/iceberg/schema_field.cc index 206915ec2..8c3fb9be6 100644 --- a/src/iceberg/schema_field.cc +++ b/src/iceberg/schema_field.cc @@ -21,19 +21,26 @@ #include #include +#include +#include "iceberg/expression/literal.h" #include "iceberg/type.h" #include "iceberg/util/formatter.h" // IWYU pragma: keep +#include "iceberg/util/macros.h" namespace iceberg { SchemaField::SchemaField(int32_t field_id, std::string_view name, - std::shared_ptr type, bool optional, std::string_view doc) + std::shared_ptr type, bool optional, std::string_view doc, + std::shared_ptr initial_default, + std::shared_ptr write_default) : field_id_(field_id), name_(name), type_(std::move(type)), optional_(optional), - doc_(doc) {} + doc_(doc), + initial_default_(std::move(initial_default)), + write_default_(std::move(write_default)) {} SchemaField SchemaField::MakeOptional(int32_t field_id, std::string_view name, std::shared_ptr type, std::string_view doc) { @@ -55,6 +62,51 @@ bool SchemaField::optional() const { return optional_; } std::string_view SchemaField::doc() const { return doc_; } +std::optional> SchemaField::initial_default() + const { + if (initial_default_ == nullptr) { + return std::nullopt; + } + return std::cref(*initial_default_); +} + +std::optional> SchemaField::write_default() const { + if (write_default_ == nullptr) { + return std::nullopt; + } + return std::cref(*write_default_); +} + +const std::shared_ptr& SchemaField::initial_default_ptr() const { + return initial_default_; +} + +const std::shared_ptr& SchemaField::write_default_ptr() const { + return write_default_; +} + +namespace { + +Status ValidateDefault(const SchemaField& field, const Literal& value, + std::string_view kind) { + if (value.IsNull() || value.IsAboveMax() || value.IsBelowMin()) { + return InvalidSchema("Invalid {} value for {}: must be a non-null value", kind, + field.name()); + } + if (field.type() == nullptr || !field.type()->is_primitive()) { + return InvalidSchema( + "Invalid {} value for {}: default values are only supported for primitive types", + kind, field.name()); + } + if (*value.type() != *field.type()) { + return InvalidSchema("{} of field {} has type {} but expected {}", kind, field.name(), + *value.type(), *field.type()); + } + return {}; +} + +} // namespace + Status SchemaField::Validate() const { if (name_.empty()) [[unlikely]] { return InvalidSchema("SchemaField cannot have empty name"); @@ -62,6 +114,13 @@ Status SchemaField::Validate() const { if (type_ == nullptr) [[unlikely]] { return InvalidSchema("SchemaField cannot have null type"); } + if (initial_default_ != nullptr) { + ICEBERG_RETURN_UNEXPECTED( + ValidateDefault(*this, *initial_default_, "initial-default")); + } + if (write_default_ != nullptr) { + ICEBERG_RETURN_UNEXPECTED(ValidateDefault(*this, *write_default_, "write-default")); + } return {}; } @@ -72,9 +131,23 @@ std::string SchemaField::ToString() const { return result; } +namespace { + +bool DefaultEquals(const std::shared_ptr& lhs, + const std::shared_ptr& rhs) { + if (lhs == nullptr || rhs == nullptr) { + return lhs == rhs; + } + return *lhs == *rhs; +} + +} // namespace + bool SchemaField::Equals(const SchemaField& other) const { return field_id_ == other.field_id_ && name_ == other.name_ && *type_ == *other.type_ && - optional_ == other.optional_; + optional_ == other.optional_ && + DefaultEquals(initial_default_, other.initial_default_) && + DefaultEquals(write_default_, other.write_default_); } } // namespace iceberg diff --git a/src/iceberg/schema_field.h b/src/iceberg/schema_field.h index fd20226a5..e90425ca7 100644 --- a/src/iceberg/schema_field.h +++ b/src/iceberg/schema_field.h @@ -24,7 +24,9 @@ /// type (e.g. a struct). #include +#include #include +#include #include #include @@ -46,8 +48,14 @@ class ICEBERG_EXPORT SchemaField : public iceberg::util::Formattable { /// \param[in] type The field type. /// \param[in] optional Whether values of this field are required or nullable. /// \param[in] doc Optional documentation string for the field. + /// \param[in] initial_default The v3 `initial-default` value, or null if absent. The + /// field shares ownership of the (immutable) value. + /// \param[in] write_default The v3 `write-default` value, or null if absent. The field + /// shares ownership of the (immutable) value. SchemaField(int32_t field_id, std::string_view name, std::shared_ptr type, - bool optional, std::string_view doc = {}); + bool optional, std::string_view doc = {}, + std::shared_ptr initial_default = nullptr, + std::shared_ptr write_default = nullptr); /// \brief Construct an optional (nullable) field. static SchemaField MakeOptional(int32_t field_id, std::string_view name, @@ -71,6 +79,32 @@ class ICEBERG_EXPORT SchemaField : public iceberg::util::Formattable { /// \brief Get the field documentation. std::string_view doc() const; + /// \brief Get the default value for this field used when reading rows written + /// before the field existed (v3 `initial-default`). Empty if absent. + /// + /// The returned reference is a non-owning view into a value owned by this field; + /// it remains valid for the lifetime of this SchemaField. + [[nodiscard]] std::optional> initial_default() + const; + + /// \brief Get the default value for this field used when a writer does not + /// supply a value (v3 `write-default`). Empty if absent. + /// + /// The returned reference is a non-owning view into a value owned by this field; + /// it remains valid for the lifetime of this SchemaField. + [[nodiscard]] std::optional> write_default() + const; + + /// \brief Get the shared owning pointer to the `initial-default` value, or null if + /// absent. Prefer initial_default() for reading; this exists so a rebuilt field can + /// share the (immutable) value rather than copy it. + [[nodiscard]] const std::shared_ptr& initial_default_ptr() const; + + /// \brief Get the shared owning pointer to the `write-default` value, or null if + /// absent. Prefer write_default() for reading; this exists so a rebuilt field can + /// share the (immutable) value rather than copy it. + [[nodiscard]] const std::shared_ptr& write_default_ptr() const; + [[nodiscard]] std::string ToString() const override; Status Validate() const; @@ -100,6 +134,11 @@ class ICEBERG_EXPORT SchemaField : public iceberg::util::Formattable { std::shared_ptr type_; bool optional_; std::string doc_; + // Default values are owned by this field and never mutated after being set; copies + // of the field share the same payload (reference-counted) instead of deep-copying, + // like `type_` above. Sharing is unobservable because the payload is immutable. + std::shared_ptr initial_default_; + std::shared_ptr write_default_; }; } // namespace iceberg diff --git a/src/iceberg/schema_util.cc b/src/iceberg/schema_util.cc index 4ff678fc6..5ff828258 100644 --- a/src/iceberg/schema_util.cc +++ b/src/iceberg/schema_util.cc @@ -172,10 +172,14 @@ Result ProjectNested(const Type& expected_type, const Type& sou iter->second.local_index, prune_source)); } else if (MetadataColumns::IsMetadataColumn(field_id)) { child_projection.kind = FieldProjection::Kind::kMetadata; + } else if (expected_field.initial_default().has_value()) { + // Rows written before the field existed assume its `initial-default` value. + child_projection.kind = FieldProjection::Kind::kDefault; + child_projection.from = expected_field.initial_default()->get(); } else if (expected_field.optional()) { child_projection.kind = FieldProjection::Kind::kNull; } else { - // TODO(gangwu): support default value for v3 and constant value + // TODO(gangwu): support constant value return InvalidSchema("Missing required field: {}", expected_field.ToString()); } result.children.emplace_back(std::move(child_projection)); diff --git a/src/iceberg/test/resources/TableMetadataV3Valid.json b/src/iceberg/test/resources/TableMetadataV3Valid.json new file mode 100644 index 000000000..712d4b5bd --- /dev/null +++ b/src/iceberg/test/resources/TableMetadataV3Valid.json @@ -0,0 +1,123 @@ +{ + "format-version": 3, + "table-uuid": "9c12d441-03fe-4693-9a96-a0705ddf69c1", + "location": "s3://bucket/test/location", + "last-sequence-number": 34, + "next-row-id": 0, + "last-updated-ms": 1602638573590, + "last-column-id": 3, + "current-schema-id": 1, + "schemas": [ + { + "type": "struct", + "schema-id": 0, + "fields": [ + { + "id": 1, + "name": "x", + "required": true, + "type": "long" + } + ] + }, + { + "type": "struct", + "schema-id": 1, + "identifier-field-ids": [ + 1, + 2 + ], + "fields": [ + { + "id": 1, + "name": "x", + "required": true, + "type": "long" + }, + { + "id": 2, + "name": "y", + "required": true, + "type": "long", + "doc": "comment" + }, + { + "id": 3, + "name": "z", + "required": true, + "type": "long" + } + ] + } + ], + "default-spec-id": 0, + "partition-specs": [ + { + "spec-id": 0, + "fields": [ + { + "name": "x", + "transform": "identity", + "source-id": 1, + "field-id": 1000 + } + ] + } + ], + "last-partition-id": 1000, + "default-sort-order-id": 3, + "sort-orders": [ + { + "order-id": 3, + "fields": [ + { + "transform": "identity", + "source-id": 2, + "direction": "asc", + "null-order": "nulls-first" + }, + { + "transform": "bucket[4]", + "source-id": 3, + "direction": "desc", + "null-order": "nulls-last" + } + ] + } + ], + "properties": {}, + "current-snapshot-id": 3055729675574597004, + "snapshots": [ + { + "snapshot-id": 3051729675574597004, + "timestamp-ms": 1515100955770, + "sequence-number": 0, + "summary": { + "operation": "append" + }, + "manifest-list": "s3://a/b/1.avro" + }, + { + "snapshot-id": 3055729675574597004, + "parent-snapshot-id": 3051729675574597004, + "timestamp-ms": 1555100955770, + "sequence-number": 1, + "summary": { + "operation": "append" + }, + "manifest-list": "s3://a/b/2.avro", + "schema-id": 1 + } + ], + "snapshot-log": [ + { + "snapshot-id": 3051729675574597004, + "timestamp-ms": 1515100955770 + }, + { + "snapshot-id": 3055729675574597004, + "timestamp-ms": 1555100955770 + } + ], + "metadata-log": [] +} diff --git a/src/iceberg/test/schema_json_test.cc b/src/iceberg/test/schema_json_test.cc index 08275a45c..71fc474f4 100644 --- a/src/iceberg/test/schema_json_test.cc +++ b/src/iceberg/test/schema_json_test.cc @@ -24,6 +24,7 @@ #include #include +#include "iceberg/expression/literal.h" #include "iceberg/json_serde_internal.h" #include "iceberg/schema.h" #include "iceberg/schema_field.h" @@ -137,6 +138,50 @@ TEST(SchemaJsonTest, RoundTrip) { ASSERT_EQ(dumped_json, json); } +TEST(SchemaJsonTest, FieldWithDefaultValuesRoundTrip) { + constexpr std::string_view json = + R"({"fields":[{"id":1,"initial-default":42,"name":"id","required":true,"type":"int","write-default":7},{"id":2,"initial-default":"n/a","name":"name","required":false,"type":"string"}],"schema-id":1,"type":"struct"})"; + + ICEBERG_UNWRAP_OR_FAIL(auto schema, SchemaFromJson(nlohmann::json::parse(json))); + ASSERT_EQ(schema->fields().size(), 2); + + const auto& field1 = schema->fields()[0]; + ASSERT_TRUE(field1.initial_default().has_value()); + ASSERT_EQ(field1.initial_default()->get(), Literal::Int(42)); + ASSERT_TRUE(field1.write_default().has_value()); + ASSERT_EQ(field1.write_default()->get(), Literal::Int(7)); + + const auto& field2 = schema->fields()[1]; + ASSERT_TRUE(field2.initial_default().has_value()); + ASSERT_EQ(field2.initial_default()->get(), Literal::String("n/a")); + ASSERT_FALSE(field2.write_default().has_value()); + + ASSERT_EQ(ToJson(*schema).dump(), json); +} + +TEST(SchemaJsonTest, FieldWithMismatchedDefaultValueFails) { + constexpr std::string_view json = + R"({"fields":[{"id":1,"initial-default":"oops","name":"id","required":true,"type":"int"}],"schema-id":1,"type":"struct"})"; + + auto result = SchemaFromJson(nlohmann::json::parse(json)); + ASSERT_FALSE(result.has_value()); +} + +TEST(SchemaJsonTest, NestedFieldWithDefaultValuesRoundTrip) { + constexpr std::string_view json = + R"({"fields":[{"id":1,"name":"person","required":true,"type":{"fields":[{"id":2,"initial-default":18,"name":"age","required":true,"type":"int","write-default":21}],"type":"struct"}}],"schema-id":1,"type":"struct"})"; + + ICEBERG_UNWRAP_OR_FAIL(auto schema, SchemaFromJson(nlohmann::json::parse(json))); + const auto& person = schema->fields()[0]; + const auto& nested = dynamic_cast(*person.type()).fields()[0]; + ASSERT_TRUE(nested.initial_default().has_value()); + ASSERT_EQ(nested.initial_default()->get(), Literal::Int(18)); + ASSERT_TRUE(nested.write_default().has_value()); + ASSERT_EQ(nested.write_default()->get(), Literal::Int(21)); + + ASSERT_EQ(ToJson(*schema).dump(), json); +} + TEST(SchemaJsonTest, UnknownFieldRoundTrip) { constexpr std::string_view json = R"({"fields":[{"id":1,"name":"mystery","required":false,"type":"unknown"}],"schema-id":1,"type":"struct"})"; diff --git a/src/iceberg/test/schema_test.cc b/src/iceberg/test/schema_test.cc index 8f1b20035..28f7eb9a0 100644 --- a/src/iceberg/test/schema_test.cc +++ b/src/iceberg/test/schema_test.cc @@ -25,6 +25,7 @@ #include #include +#include "iceberg/expression/literal.h" #include "iceberg/result.h" #include "iceberg/schema_field.h" #include "iceberg/table_metadata.h" @@ -133,6 +134,61 @@ TEST(SchemaTest, ValidateRejectsV3TypesBeforeFormatV3) { iceberg::IsOk()); } +TEST(SchemaTest, ValidateRejectsInitialDefaultBeforeFormatV3) { + iceberg::Schema schema({iceberg::SchemaField( + 1, "id", iceberg::int32(), false, /*doc=*/{}, + std::make_shared(iceberg::Literal::Int(42)))}); + + auto status = schema.Validate(2); + ASSERT_THAT(status, iceberg::IsError(iceberg::ErrorKind::kInvalidSchema)); + EXPECT_THAT(status, iceberg::HasErrorMessage("is not supported until v3")); + + EXPECT_THAT(schema.Validate(iceberg::TableMetadata::kSupportedTableFormatVersion), + iceberg::IsOk()); +} + +TEST(SchemaTest, ValidateDoesNotVersionGateWriteDefault) { + // A write-default does not reinterpret existing data, so it is not gated on + // format version: a write-default alone is accepted below v3. + iceberg::Schema schema({iceberg::SchemaField( + 1, "id", iceberg::int32(), false, /*doc=*/{}, /*initial_default=*/nullptr, + std::make_shared(iceberg::Literal::Int(7)))}); + + EXPECT_THAT(schema.Validate(2), iceberg::IsOk()); +} + +TEST(SchemaTest, ValidateRejectsMismatchedDefaultValue) { + iceberg::Schema schema({iceberg::SchemaField( + 1, "id", iceberg::int32(), false, /*doc=*/{}, /*initial_default=*/nullptr, + std::make_shared(iceberg::Literal::String("oops")))}); + + auto status = schema.Validate(iceberg::TableMetadata::kSupportedTableFormatVersion); + ASSERT_THAT(status, iceberg::IsError(iceberg::ErrorKind::kInvalidSchema)); + EXPECT_THAT(status, iceberg::HasErrorMessage("write-default")); +} + +TEST(SchemaTest, ReassignIdsPreservesDefaultValues) { + // Reassigning field IDs rebuilds each SchemaField, so the rebuild must carry the + // default values over to the field with the new ID. + std::vector fields; + fields.push_back(iceberg::SchemaField( + 1, "id", iceberg::int32(), false, /*doc=*/{}, + std::make_shared(iceberg::Literal::Int(42)), + std::make_shared(iceberg::Literal::Int(7)))); + auto reassign_id = [](int32_t old_id) { return old_id + 1000; }; + + iceberg::Schema schema(std::move(fields), iceberg::Schema::kInitialSchemaId, + reassign_id); + + ASSERT_EQ(schema.fields().size(), 1); + const iceberg::SchemaField& field = schema.fields()[0]; + EXPECT_EQ(field.field_id(), 1001); + ASSERT_TRUE(field.initial_default().has_value()); + EXPECT_EQ(field.initial_default()->get(), iceberg::Literal::Int(42)); + ASSERT_TRUE(field.write_default().has_value()); + EXPECT_EQ(field.write_default()->get(), iceberg::Literal::Int(7)); +} + TEST(SchemaTest, ValidateRejectsInvalidUnknownFields) { iceberg::Schema required_unknown_schema( {iceberg::SchemaField(1, "mystery", iceberg::unknown(), false)}); diff --git a/src/iceberg/test/schema_util_test.cc b/src/iceberg/test/schema_util_test.cc index ee075006f..9a3eff887 100644 --- a/src/iceberg/test/schema_util_test.cc +++ b/src/iceberg/test/schema_util_test.cc @@ -24,6 +24,7 @@ #include #include +#include "iceberg/expression/literal.h" #include "iceberg/metadata_columns.h" #include "iceberg/schema.h" #include "iceberg/schema_field.h" @@ -179,6 +180,58 @@ TEST(SchemaUtilTest, ProjectMissingRequiredField) { ASSERT_THAT(projection_result, HasErrorMessage("Missing required field")); } +TEST(SchemaUtilTest, ProjectMissingRequiredFieldWithInitialDefault) { + Schema source_schema = CreateFlatSchema(); + Schema expected_schema({ + SchemaField::MakeRequired(/*field_id=*/1, "id", iceberg::int64()), + SchemaField(/*field_id=*/10, "extra", iceberg::int32(), /*optional=*/false, + /*doc=*/{}, std::make_shared(Literal::Int(42))), + }); + + auto projection_result = + Project(expected_schema, source_schema, /*prune_source=*/false); + ASSERT_THAT(projection_result, IsOk()); + + const auto& projection = *projection_result; + ASSERT_EQ(projection.fields.size(), 2); + AssertProjectedField(projection.fields[0], 0); + ASSERT_EQ(projection.fields[1].kind, FieldProjection::Kind::kDefault); + ASSERT_EQ(std::get(projection.fields[1].from), Literal::Int(42)); +} + +TEST(SchemaUtilTest, ProjectMissingOptionalFieldWithInitialDefault) { + // An optional field with an initial-default reads the default, not null. + Schema source_schema = CreateFlatSchema(); + Schema expected_schema({ + SchemaField::MakeRequired(/*field_id=*/1, "id", iceberg::int64()), + SchemaField(/*field_id=*/10, "extra", iceberg::string(), /*optional=*/true, + /*doc=*/{}, std::make_shared(Literal::String("n/a"))), + }); + + auto projection_result = + Project(expected_schema, source_schema, /*prune_source=*/false); + ASSERT_THAT(projection_result, IsOk()); + + const auto& projection = *projection_result; + ASSERT_EQ(projection.fields.size(), 2); + ASSERT_EQ(projection.fields[1].kind, FieldProjection::Kind::kDefault); + ASSERT_EQ(std::get(projection.fields[1].from), Literal::String("n/a")); +} + +TEST(SchemaUtilTest, ProjectPresentFieldIgnoresInitialDefault) { + // initial-default only applies when the field is missing from the data file. + Schema source_schema = CreateFlatSchema(); + Schema expected_schema({ + SchemaField(/*field_id=*/1, "id", iceberg::int64(), /*optional=*/false, + /*doc=*/{}, std::make_shared(Literal::Long(-1))), + }); + + auto projection_result = + Project(expected_schema, source_schema, /*prune_source=*/false); + ASSERT_THAT(projection_result, IsOk()); + AssertProjectedField(projection_result->fields[0], 0); +} + TEST(SchemaUtilTest, ProjectMetadataColumn) { Schema source_schema = CreateFlatSchema(); Schema expected_schema({