From 3588723cb804615a2e5062f298e037578584ab66 Mon Sep 17 00:00:00 2001 From: wernerdaehn Date: Fri, 5 Jan 2018 12:38:25 +0000 Subject: [PATCH] Proposed change in AvroSchema to handle circular references. This is as suggestion only at the moment and not complete. The code has one TODO. And it needs the matching changes to actually write the records into the correct fields. --- .../parquet/avro/AvroSchemaConverter.java | 57 +- parquet-avro/src/test/resources/Patient.avsc | 1693 +++++++++++++++++ .../org/apache/parquet/schema/GroupType.java | 53 +- .../apache/parquet/schema/MessageType.java | 5 + 4 files changed, 1801 insertions(+), 7 deletions(-) create mode 100644 parquet-avro/src/test/resources/Patient.avsc diff --git a/parquet-avro/src/main/java/org/apache/parquet/avro/AvroSchemaConverter.java b/parquet-avro/src/main/java/org/apache/parquet/avro/AvroSchemaConverter.java index 70b6525f60..84a4bb7286 100644 --- a/parquet-avro/src/main/java/org/apache/parquet/avro/AvroSchemaConverter.java +++ b/parquet-avro/src/main/java/org/apache/parquet/avro/AvroSchemaConverter.java @@ -58,6 +58,9 @@ public class AvroSchemaConverter { private final boolean assumeRepeatedIsListElement; private final boolean writeOldListStructure; + + private ArrayList schemapath; + private ArrayList grouppath; public AvroSchemaConverter() { this.assumeRepeatedIsListElement = ADD_LIST_ELEMENT_RECORDS_DEFAULT; @@ -112,7 +115,13 @@ public MessageType convert(Schema avroSchema) { if (!avroSchema.getType().equals(Schema.Type.RECORD)) { throw new IllegalArgumentException("Avro schema must be a record."); } - return new MessageType(avroSchema.getFullName(), convertFields(avroSchema.getFields())); + schemapath = new ArrayList(); + schemapath.add(avroSchema); + grouppath = new ArrayList(); + MessageType m = new MessageType(avroSchema.getFullName()); + grouppath.add(m); + m.addFields(convertFields(avroSchema.getFields())); + return m; } private List convertFields(List fields) { @@ -149,7 +158,50 @@ private Type convertField(String fieldName, Schema schema, Type.Repetition repet } else if (type.equals(Schema.Type.STRING)) { builder = Types.primitive(BINARY, repetition).as(UTF8); } else if (type.equals(Schema.Type.RECORD)) { - return new GroupType(repetition, fieldName, convertFields(schema.getFields())); + /* + * A Schema might contain directly or indirectly a parent schema. + * Example1: "Person"-Schema has a field of type array-of-"Person" named "children" --> A "Person" can have multiple Person records in the field "children" + * Example2: "Person"-Schema has a field "contacts" which lists various contact options. These contact options have an optional field naturalperson which is of type "Person" + * + * To solve that, whenever a new record schema is found, we check if this schema had been used somewhere along the path. + * If No, then it is just a regular structure tree, no circular references where one schema has itself as child. + * If Yes, then this field is redefined as a INT64 containing a generated ID and records of that element can be found in the parent structure via the __ID field. + */ + int index = schemapath.lastIndexOf(schema); // Has the current schema been used in the schema tree already? + if (index == -1) { + /* + * No, it has not been used, it is the first time this schema appears in this section of the tree, hence simply add it. + * But we need to build the schema tree so the recursive calls know the tree structure. + * And we need to build the same tree with the generated GroupTypes so we can add the __ID column in case it is needed. + */ + schemapath.add(schema); + GroupType group = new GroupType(repetition, fieldName); + grouppath.add(group); + group.addFields(convertFields(schema.getFields())); + schemapath.remove(schemapath.size()-1); + grouppath.remove(grouppath.size()-1); + return group; + } else { + /* + * We found a recursion like Schema1 -> Schema2 -> Schema3 -> .... SchemaN -> Schema2. + * In that case the column within the SchemaN that is reusing Schema2 as datatype gets an INT64 and the reused Schema2 has to have an additional __ID column. + * This __ID column will not be filled for Schema1 fields but contain the records for the SchemaN field with the __ID column reference. + */ + GroupType referencegroup = grouppath.get(index); + if (!referencegroup.containsField("__ID")) { + if (!referencegroup.isRepetition(REPEATED)) { + /* + * Originally this AvroSchema can contain a single record only. But as we are reusing it to store child + * records as well, it needs to be turned into an array. + * + * What is the most efficient way for that?? + */ + //TODO: Change referencegroup to an array object + } + referencegroup.addField(Types.primitive(INT64, repetition).named("__ID")); + } + builder = Types.primitive(INT64, repetition); + } } else if (type.equals(Schema.Type.ENUM)) { builder = Types.primitive(BINARY, repetition).as(ENUM); } else if (type.equals(Schema.Type.ARRAY)) { @@ -443,3 +495,4 @@ private static Schema optional(Schema original) { original)); } } + diff --git a/parquet-avro/src/test/resources/Patient.avsc b/parquet-avro/src/test/resources/Patient.avsc new file mode 100644 index 0000000000..36d149295b --- /dev/null +++ b/parquet-avro/src/test/resources/Patient.avsc @@ -0,0 +1,1693 @@ +{ + "type" : "record", + "name" : "Patient", + "fields" : [ { + "name" : "__change_type", + "type" : [ "null", "string" ], + "doc" : "Indicates how the row is to be processed: Insert, Update, Delete, upsert/Autocorrect, eXterminate, Truncate", + "default" : "NULL" + }, { + "name" : "__change_time", + "type" : [ "null", "long" ], + "doc" : "Timestamp of the transaction. All rows of the transaction have the same value.", + "default" : "NULL" + }, { + "name" : "__source_rowid", + "type" : [ "null", "string" ], + "doc" : "Optional unqiue and static pointer to the row, e.g. Oracle rowid", + "default" : "NULL" + }, { + "name" : "__source_transaction", + "type" : [ "null", "string" ], + "doc" : "Optional source transaction information for auditing", + "default" : "NULL" + }, { + "name" : "id", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "id" + }, { + "name" : "meta", + "type" : [ "null", { + "type" : "record", + "name" : "Meta", + "namespace" : "Meta", + "fields" : [ { + "name" : "id", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "string" + }, { + "name" : "extension", + "type" : [ "null", { + "type" : "array", + "items" : { + "type" : "record", + "name" : "Extension", + "namespace" : "Meta.Extension", + "fields" : [ { + "name" : "id", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "string" + }, { + "name" : "extension", + "type" : [ "null", { + "type" : "array", + "items" : "Extension" + } ], + "default" : "NULL", + "__source_data_type" : "Extension" + }, { + "name" : "url", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "uri" + }, { + "name" : "valueBase64Binary", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "base64Binary" + }, { + "name" : "valueBoolean", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "boolean" + }, { + "name" : "valueCode", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "code" + }, { + "name" : "valueDate", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "date" + }, { + "name" : "valueDateTime", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "dateTime" + }, { + "name" : "valueDecimal", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "decimal" + }, { + "name" : "valueId", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "id" + }, { + "name" : "valueInstant", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "instant" + }, { + "name" : "valueInteger", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "integer" + }, { + "name" : "valueMarkdown", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "markdown" + }, { + "name" : "valueOid", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "oid" + }, { + "name" : "valuePositiveInt", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "positiveInt" + }, { + "name" : "valueString", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "string" + }, { + "name" : "valueTime", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "time" + }, { + "name" : "valueUnsignedInt", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "unsignedInt" + }, { + "name" : "valueUri", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "uri" + }, { + "name" : "valueAddress", + "type" : [ "null", { + "type" : "record", + "name" : "Address", + "namespace" : "Meta.Extension.Address", + "fields" : [ { + "name" : "id", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "string" + }, { + "name" : "extension", + "type" : [ "null", { + "type" : "array", + "items" : "Meta.Extension.Extension" + } ], + "default" : "NULL", + "__source_data_type" : "Extension" + }, { + "name" : "use", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "code" + }, { + "name" : "type", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "code" + }, { + "name" : "text", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "string" + }, { + "name" : "line", + "type" : [ "null", { + "type" : "array", + "items" : "string" + } ], + "default" : "NULL", + "__source_data_type" : "string" + }, { + "name" : "city", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "string" + }, { + "name" : "district", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "string" + }, { + "name" : "state", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "string" + }, { + "name" : "postalCode", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "string" + }, { + "name" : "country", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "string" + }, { + "name" : "period", + "type" : [ "null", { + "type" : "record", + "name" : "Period", + "namespace" : "Meta.Extension.Address.Period", + "fields" : [ { + "name" : "id", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "string" + }, { + "name" : "extension", + "type" : [ "null", { + "type" : "array", + "items" : "Meta.Extension.Extension" + } ], + "default" : "NULL", + "__source_data_type" : "Extension" + }, { + "name" : "start", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "dateTime" + }, { + "name" : "end", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "dateTime" + } ] + } ], + "default" : "NULL", + "__source_data_type" : "Period" + } ] + } ], + "default" : "NULL", + "__source_data_type" : "Address" + }, { + "name" : "valueAge", + "type" : [ "null", { + "type" : "record", + "name" : "Age", + "namespace" : "Meta.Extension.Age", + "fields" : [ { + "name" : "id", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "string" + }, { + "name" : "extension", + "type" : [ "null", { + "type" : "array", + "items" : "Meta.Extension.Extension" + } ], + "default" : "NULL", + "__source_data_type" : "Extension" + }, { + "name" : "value", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "decimal" + }, { + "name" : "comparator", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "code" + }, { + "name" : "unit", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "string" + }, { + "name" : "system", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "uri" + }, { + "name" : "code", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "code" + } ] + } ], + "default" : "NULL", + "__source_data_type" : "Age" + }, { + "name" : "valueAnnotation", + "type" : [ "null", { + "type" : "record", + "name" : "Annotation", + "namespace" : "Meta.Extension.Annotation", + "fields" : [ { + "name" : "id", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "string" + }, { + "name" : "extension", + "type" : [ "null", { + "type" : "array", + "items" : "Meta.Extension.Extension" + } ], + "default" : "NULL", + "__source_data_type" : "Extension" + }, { + "name" : "authorReference", + "type" : [ "null", { + "type" : "record", + "name" : "Reference", + "namespace" : "Meta.Extension.Annotation.Reference", + "fields" : [ { + "name" : "id", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "string" + }, { + "name" : "extension", + "type" : [ "null", { + "type" : "array", + "items" : "Meta.Extension.Extension" + } ], + "default" : "NULL", + "__source_data_type" : "Extension" + }, { + "name" : "reference", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "string" + }, { + "name" : "identifier", + "type" : [ "null", { + "type" : "record", + "name" : "Identifier", + "namespace" : "Meta.Extension.Annotation.Reference.Identifier", + "fields" : [ { + "name" : "id", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "string" + }, { + "name" : "extension", + "type" : [ "null", { + "type" : "array", + "items" : "Meta.Extension.Extension" + } ], + "default" : "NULL", + "__source_data_type" : "Extension" + }, { + "name" : "use", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "code" + }, { + "name" : "type", + "type" : [ "null", { + "type" : "record", + "name" : "CodeableConcept", + "namespace" : "Meta.Extension.Annotation.Reference.Identifier.CodeableConcept", + "fields" : [ { + "name" : "id", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "string" + }, { + "name" : "extension", + "type" : [ "null", { + "type" : "array", + "items" : "Meta.Extension.Extension" + } ], + "default" : "NULL", + "__source_data_type" : "Extension" + }, { + "name" : "coding", + "type" : [ "null", { + "type" : "array", + "items" : { + "type" : "record", + "name" : "Coding", + "namespace" : "Meta.Extension.Annotation.Reference.Identifier.CodeableConcept.Coding", + "fields" : [ { + "name" : "id", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "string" + }, { + "name" : "extension", + "type" : [ "null", { + "type" : "array", + "items" : "Meta.Extension.Extension" + } ], + "default" : "NULL", + "__source_data_type" : "Extension" + }, { + "name" : "system", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "uri" + }, { + "name" : "version", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "string" + }, { + "name" : "code", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "code" + }, { + "name" : "display", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "string" + }, { + "name" : "userSelected", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "boolean" + } ] + } + } ], + "default" : "NULL", + "__source_data_type" : "Coding" + }, { + "name" : "text", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "string" + } ] + } ], + "default" : "NULL", + "__source_data_type" : "CodeableConcept" + }, { + "name" : "system", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "uri" + }, { + "name" : "value", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "string" + }, { + "name" : "period", + "type" : [ "null", "Meta.Extension.Address.Period.Period" ], + "default" : "NULL", + "__source_data_type" : "Period" + }, { + "name" : "assigner", + "type" : [ "null", "Meta.Extension.Annotation.Reference.Reference" ], + "default" : "NULL", + "__source_data_type" : "Reference" + } ] + } ], + "default" : "NULL", + "__source_data_type" : "Identifier" + }, { + "name" : "display", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "string" + } ] + } ], + "default" : "NULL", + "__source_data_type" : "Reference" + }, { + "name" : "authorString", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "string" + }, { + "name" : "time", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "dateTime" + }, { + "name" : "text", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "string" + } ] + } ], + "default" : "NULL", + "__source_data_type" : "Annotation" + }, { + "name" : "valueAttachment", + "type" : [ "null", { + "type" : "record", + "name" : "Attachment", + "namespace" : "Meta.Extension.Attachment", + "fields" : [ { + "name" : "id", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "string" + }, { + "name" : "extension", + "type" : [ "null", { + "type" : "array", + "items" : "Meta.Extension.Extension" + } ], + "default" : "NULL", + "__source_data_type" : "Extension" + }, { + "name" : "contentType", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "code" + }, { + "name" : "language", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "code" + }, { + "name" : "data", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "base64Binary" + }, { + "name" : "url", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "uri" + }, { + "name" : "size", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "unsignedInt" + }, { + "name" : "hash", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "base64Binary" + }, { + "name" : "title", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "string" + }, { + "name" : "creation", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "dateTime" + } ] + } ], + "default" : "NULL", + "__source_data_type" : "Attachment" + }, { + "name" : "valueCodeableConcept", + "type" : [ "null", "Meta.Extension.Annotation.Reference.Identifier.CodeableConcept.CodeableConcept" ], + "default" : "NULL", + "__source_data_type" : "CodeableConcept" + }, { + "name" : "valueCoding", + "type" : [ "null", "Meta.Extension.Annotation.Reference.Identifier.CodeableConcept.Coding.Coding" ], + "default" : "NULL", + "__source_data_type" : "Coding" + }, { + "name" : "valueContactPoint", + "type" : [ "null", { + "type" : "record", + "name" : "ContactPoint", + "namespace" : "Meta.Extension.ContactPoint", + "fields" : [ { + "name" : "id", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "string" + }, { + "name" : "extension", + "type" : [ "null", { + "type" : "array", + "items" : "Meta.Extension.Extension" + } ], + "default" : "NULL", + "__source_data_type" : "Extension" + }, { + "name" : "system", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "code" + }, { + "name" : "value", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "string" + }, { + "name" : "use", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "code" + }, { + "name" : "rank", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "positiveInt" + }, { + "name" : "period", + "type" : [ "null", "Meta.Extension.Address.Period.Period" ], + "default" : "NULL", + "__source_data_type" : "Period" + } ] + } ], + "default" : "NULL", + "__source_data_type" : "ContactPoint" + }, { + "name" : "valueCount", + "type" : [ "null", { + "type" : "record", + "name" : "Count", + "namespace" : "Meta.Extension.Count", + "fields" : [ { + "name" : "id", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "string" + }, { + "name" : "extension", + "type" : [ "null", { + "type" : "array", + "items" : "Meta.Extension.Extension" + } ], + "default" : "NULL", + "__source_data_type" : "Extension" + }, { + "name" : "value", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "decimal" + }, { + "name" : "comparator", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "code" + }, { + "name" : "unit", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "string" + }, { + "name" : "system", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "uri" + }, { + "name" : "code", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "code" + } ] + } ], + "default" : "NULL", + "__source_data_type" : "Count" + }, { + "name" : "valueDistance", + "type" : [ "null", { + "type" : "record", + "name" : "Distance", + "namespace" : "Meta.Extension.Distance", + "fields" : [ { + "name" : "id", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "string" + }, { + "name" : "extension", + "type" : [ "null", { + "type" : "array", + "items" : "Meta.Extension.Extension" + } ], + "default" : "NULL", + "__source_data_type" : "Extension" + }, { + "name" : "value", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "decimal" + }, { + "name" : "comparator", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "code" + }, { + "name" : "unit", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "string" + }, { + "name" : "system", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "uri" + }, { + "name" : "code", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "code" + } ] + } ], + "default" : "NULL", + "__source_data_type" : "Distance" + }, { + "name" : "valueDuration", + "type" : [ "null", { + "type" : "record", + "name" : "Duration", + "namespace" : "Meta.Extension.Duration", + "fields" : [ { + "name" : "id", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "string" + }, { + "name" : "extension", + "type" : [ "null", { + "type" : "array", + "items" : "Meta.Extension.Extension" + } ], + "default" : "NULL", + "__source_data_type" : "Extension" + }, { + "name" : "value", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "decimal" + }, { + "name" : "comparator", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "code" + }, { + "name" : "unit", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "string" + }, { + "name" : "system", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "uri" + }, { + "name" : "code", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "code" + } ] + } ], + "default" : "NULL", + "__source_data_type" : "Duration" + }, { + "name" : "valueHumanName", + "type" : [ "null", { + "type" : "record", + "name" : "HumanName", + "namespace" : "Meta.Extension.HumanName", + "fields" : [ { + "name" : "id", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "string" + }, { + "name" : "extension", + "type" : [ "null", { + "type" : "array", + "items" : "Meta.Extension.Extension" + } ], + "default" : "NULL", + "__source_data_type" : "Extension" + }, { + "name" : "use", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "code" + }, { + "name" : "text", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "string" + }, { + "name" : "family", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "string" + }, { + "name" : "given", + "type" : [ "null", { + "type" : "array", + "items" : "string" + } ], + "default" : "NULL", + "__source_data_type" : "string" + }, { + "name" : "prefix", + "type" : [ "null", { + "type" : "array", + "items" : "string" + } ], + "default" : "NULL", + "__source_data_type" : "string" + }, { + "name" : "suffix", + "type" : [ "null", { + "type" : "array", + "items" : "string" + } ], + "default" : "NULL", + "__source_data_type" : "string" + }, { + "name" : "period", + "type" : [ "null", "Meta.Extension.Address.Period.Period" ], + "default" : "NULL", + "__source_data_type" : "Period" + } ] + } ], + "default" : "NULL", + "__source_data_type" : "HumanName" + }, { + "name" : "valueIdentifier", + "type" : [ "null", "Meta.Extension.Annotation.Reference.Identifier.Identifier" ], + "default" : "NULL", + "__source_data_type" : "Identifier" + }, { + "name" : "valueMoney", + "type" : [ "null", { + "type" : "record", + "name" : "Money", + "namespace" : "Meta.Extension.Money", + "fields" : [ { + "name" : "id", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "string" + }, { + "name" : "extension", + "type" : [ "null", { + "type" : "array", + "items" : "Meta.Extension.Extension" + } ], + "default" : "NULL", + "__source_data_type" : "Extension" + }, { + "name" : "value", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "decimal" + }, { + "name" : "comparator", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "code" + }, { + "name" : "unit", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "string" + }, { + "name" : "system", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "uri" + }, { + "name" : "code", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "code" + } ] + } ], + "default" : "NULL", + "__source_data_type" : "Money" + }, { + "name" : "valuePeriod", + "type" : [ "null", "Meta.Extension.Address.Period.Period" ], + "default" : "NULL", + "__source_data_type" : "Period" + }, { + "name" : "valueQuantity", + "type" : [ "null", { + "type" : "record", + "name" : "Quantity", + "namespace" : "Meta.Extension.Quantity", + "fields" : [ { + "name" : "id", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "string" + }, { + "name" : "extension", + "type" : [ "null", { + "type" : "array", + "items" : "Meta.Extension.Extension" + } ], + "default" : "NULL", + "__source_data_type" : "Extension" + }, { + "name" : "value", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "decimal" + }, { + "name" : "comparator", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "code" + }, { + "name" : "unit", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "string" + }, { + "name" : "system", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "uri" + }, { + "name" : "code", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "code" + } ] + } ], + "default" : "NULL", + "__source_data_type" : "Quantity" + }, { + "name" : "valueRange", + "type" : [ "null", { + "type" : "record", + "name" : "Range", + "namespace" : "Meta.Extension.Range", + "fields" : [ { + "name" : "id", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "string" + }, { + "name" : "extension", + "type" : [ "null", { + "type" : "array", + "items" : "Meta.Extension.Extension" + } ], + "default" : "NULL", + "__source_data_type" : "Extension" + }, { + "name" : "low", + "type" : [ "null", "Meta.Extension.Quantity.Quantity" ], + "default" : "NULL", + "__source_data_type" : "Quantity" + }, { + "name" : "high", + "type" : [ "null", "Meta.Extension.Quantity.Quantity" ], + "default" : "NULL", + "__source_data_type" : "Quantity" + } ] + } ], + "default" : "NULL", + "__source_data_type" : "Range" + }, { + "name" : "valueRatio", + "type" : [ "null", { + "type" : "record", + "name" : "Ratio", + "namespace" : "Meta.Extension.Ratio", + "fields" : [ { + "name" : "id", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "string" + }, { + "name" : "extension", + "type" : [ "null", { + "type" : "array", + "items" : "Meta.Extension.Extension" + } ], + "default" : "NULL", + "__source_data_type" : "Extension" + }, { + "name" : "numerator", + "type" : [ "null", "Meta.Extension.Quantity.Quantity" ], + "default" : "NULL", + "__source_data_type" : "Quantity" + }, { + "name" : "denominator", + "type" : [ "null", "Meta.Extension.Quantity.Quantity" ], + "default" : "NULL", + "__source_data_type" : "Quantity" + } ] + } ], + "default" : "NULL", + "__source_data_type" : "Ratio" + }, { + "name" : "valueReference", + "type" : [ "null", "Meta.Extension.Annotation.Reference.Reference" ], + "default" : "NULL", + "__source_data_type" : "Reference" + }, { + "name" : "valueSampledData", + "type" : [ "null", { + "type" : "record", + "name" : "SampledData", + "namespace" : "Meta.Extension.SampledData", + "fields" : [ { + "name" : "id", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "string" + }, { + "name" : "extension", + "type" : [ "null", { + "type" : "array", + "items" : "Meta.Extension.Extension" + } ], + "default" : "NULL", + "__source_data_type" : "Extension" + }, { + "name" : "origin", + "type" : [ "null", "Meta.Extension.Quantity.Quantity" ], + "default" : "NULL", + "__source_data_type" : "Quantity" + }, { + "name" : "period", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "decimal" + }, { + "name" : "factor", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "decimal" + }, { + "name" : "lowerLimit", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "decimal" + }, { + "name" : "upperLimit", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "decimal" + }, { + "name" : "dimensions", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "positiveInt" + }, { + "name" : "data", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "string" + } ] + } ], + "default" : "NULL", + "__source_data_type" : "SampledData" + }, { + "name" : "valueSignature", + "type" : [ "null", { + "type" : "record", + "name" : "Signature", + "namespace" : "Meta.Extension.Signature", + "fields" : [ { + "name" : "id", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "string" + }, { + "name" : "extension", + "type" : [ "null", { + "type" : "array", + "items" : "Meta.Extension.Extension" + } ], + "default" : "NULL", + "__source_data_type" : "Extension" + }, { + "name" : "type", + "type" : [ "null", { + "type" : "array", + "items" : "Meta.Extension.Annotation.Reference.Identifier.CodeableConcept.Coding.Coding" + } ], + "default" : "NULL", + "__source_data_type" : "Coding" + }, { + "name" : "when", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "instant" + }, { + "name" : "whoUri", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "uri" + }, { + "name" : "whoReference", + "type" : [ "null", "Meta.Extension.Annotation.Reference.Reference" ], + "default" : "NULL", + "__source_data_type" : "Reference" + }, { + "name" : "onBehalfOfUri", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "uri" + }, { + "name" : "onBehalfOfReference", + "type" : [ "null", "Meta.Extension.Annotation.Reference.Reference" ], + "default" : "NULL", + "__source_data_type" : "Reference" + }, { + "name" : "contentType", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "code" + }, { + "name" : "blob", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "base64Binary" + } ] + } ], + "default" : "NULL", + "__source_data_type" : "Signature" + }, { + "name" : "valueTiming", + "type" : [ "null", { + "type" : "record", + "name" : "Timing", + "namespace" : "Meta.Extension.Timing", + "fields" : [ { + "name" : "id", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "string" + }, { + "name" : "extension", + "type" : [ "null", { + "type" : "array", + "items" : "Meta.Extension.Extension" + } ], + "default" : "NULL", + "__source_data_type" : "Extension" + }, { + "name" : "event", + "type" : [ "null", { + "type" : "array", + "items" : "string" + } ], + "default" : "NULL", + "__source_data_type" : "dateTime" + }, { + "name" : "repeat", + "type" : [ "null", { + "type" : "record", + "name" : "Element", + "namespace" : "Meta.Extension.Timing.Element", + "fields" : [ { + "name" : "id", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "string" + }, { + "name" : "extension", + "type" : [ "null", { + "type" : "array", + "items" : "Meta.Extension.Extension" + } ], + "default" : "NULL", + "__source_data_type" : "Extension" + } ] + } ], + "default" : "NULL", + "__source_data_type" : "Element" + }, { + "name" : "boundsDuration", + "type" : [ "null", "Meta.Extension.Duration.Duration" ], + "default" : "NULL", + "__source_data_type" : "Duration" + }, { + "name" : "boundsRange", + "type" : [ "null", "Meta.Extension.Range.Range" ], + "default" : "NULL", + "__source_data_type" : "Range" + }, { + "name" : "boundsPeriod", + "type" : [ "null", "Meta.Extension.Address.Period.Period" ], + "default" : "NULL", + "__source_data_type" : "Period" + }, { + "name" : "count", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "integer" + }, { + "name" : "countMax", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "integer" + }, { + "name" : "duration", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "decimal" + }, { + "name" : "durationMax", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "decimal" + }, { + "name" : "durationUnit", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "code" + }, { + "name" : "frequency", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "integer" + }, { + "name" : "frequencyMax", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "integer" + }, { + "name" : "period", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "decimal" + }, { + "name" : "periodMax", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "decimal" + }, { + "name" : "periodUnit", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "code" + }, { + "name" : "dayOfWeek", + "type" : [ "null", { + "type" : "array", + "items" : "string" + } ], + "default" : "NULL", + "__source_data_type" : "code" + }, { + "name" : "timeOfDay", + "type" : [ "null", { + "type" : "array", + "items" : "string" + } ], + "default" : "NULL", + "__source_data_type" : "time" + }, { + "name" : "when", + "type" : [ "null", { + "type" : "array", + "items" : "string" + } ], + "default" : "NULL", + "__source_data_type" : "code" + }, { + "name" : "offset", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "unsignedInt" + }, { + "name" : "code", + "type" : [ "null", "Meta.Extension.Annotation.Reference.Identifier.CodeableConcept.CodeableConcept" ], + "default" : "NULL", + "__source_data_type" : "CodeableConcept" + } ] + } ], + "default" : "NULL", + "__source_data_type" : "Timing" + }, { + "name" : "valueMeta", + "type" : [ "null", "Meta.Meta" ], + "default" : "NULL", + "__source_data_type" : "Meta" + } ] + } + } ], + "default" : "NULL", + "__source_data_type" : "Extension" + }, { + "name" : "versionId", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "id" + }, { + "name" : "lastUpdated", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "instant" + }, { + "name" : "profile", + "type" : [ "null", { + "type" : "array", + "items" : "string" + } ], + "default" : "NULL", + "__source_data_type" : "uri" + }, { + "name" : "security", + "type" : [ "null", { + "type" : "array", + "items" : "Meta.Extension.Annotation.Reference.Identifier.CodeableConcept.Coding.Coding" + } ], + "default" : "NULL", + "__source_data_type" : "Coding" + }, { + "name" : "tag", + "type" : [ "null", { + "type" : "array", + "items" : "Meta.Extension.Annotation.Reference.Identifier.CodeableConcept.Coding.Coding" + } ], + "default" : "NULL", + "__source_data_type" : "Coding" + } ] + } ], + "default" : "NULL", + "__source_data_type" : "Meta" + }, { + "name" : "implicitRules", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "uri" + }, { + "name" : "language", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "code" + }, { + "name" : "text", + "type" : [ "null", { + "type" : "record", + "name" : "Narrative", + "namespace" : "Narrative", + "fields" : [ { + "name" : "id", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "string" + }, { + "name" : "extension", + "type" : [ "null", { + "type" : "array", + "items" : "Meta.Extension.Extension" + } ], + "default" : "NULL", + "__source_data_type" : "Extension" + }, { + "name" : "status", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "code" + }, { + "name" : "div", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "xhtml" + } ] + } ], + "default" : "NULL", + "__source_data_type" : "Narrative" + }, { + "name" : "extension", + "type" : [ "null", { + "type" : "array", + "items" : "Meta.Extension.Extension" + } ], + "default" : "NULL", + "__source_data_type" : "Extension" + }, { + "name" : "modifierExtension", + "type" : [ "null", { + "type" : "array", + "items" : "Meta.Extension.Extension" + } ], + "default" : "NULL", + "__source_data_type" : "Extension" + }, { + "name" : "identifier", + "type" : [ "null", { + "type" : "array", + "items" : "Meta.Extension.Annotation.Reference.Identifier.Identifier" + } ], + "default" : "NULL", + "__source_data_type" : "Identifier" + }, { + "name" : "active", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "boolean" + }, { + "name" : "name", + "type" : [ "null", { + "type" : "array", + "items" : "Meta.Extension.HumanName.HumanName" + } ], + "default" : "NULL", + "__source_data_type" : "HumanName" + }, { + "name" : "telecom", + "type" : [ "null", { + "type" : "array", + "items" : "Meta.Extension.ContactPoint.ContactPoint" + } ], + "default" : "NULL", + "__source_data_type" : "ContactPoint" + }, { + "name" : "gender", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "code" + }, { + "name" : "birthDate", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "date" + }, { + "name" : "deceasedBoolean", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "boolean" + }, { + "name" : "deceasedDateTime", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "dateTime" + }, { + "name" : "address", + "type" : [ "null", { + "type" : "array", + "items" : "Meta.Extension.Address.Address" + } ], + "default" : "NULL", + "__source_data_type" : "Address" + }, { + "name" : "maritalStatus", + "type" : [ "null", "Meta.Extension.Annotation.Reference.Identifier.CodeableConcept.CodeableConcept" ], + "default" : "NULL", + "__source_data_type" : "CodeableConcept" + }, { + "name" : "multipleBirthBoolean", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "boolean" + }, { + "name" : "multipleBirthInteger", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "integer" + }, { + "name" : "photo", + "type" : [ "null", { + "type" : "array", + "items" : "Meta.Extension.Attachment.Attachment" + } ], + "default" : "NULL", + "__source_data_type" : "Attachment" + }, { + "name" : "contact", + "type" : [ "null", { + "type" : "array", + "items" : { + "type" : "record", + "name" : "contact", + "namespace" : "Patient", + "fields" : [ { + "name" : "id", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "string" + }, { + "name" : "extension", + "type" : [ "null", { + "type" : "array", + "items" : "Meta.Extension.Extension" + } ], + "default" : "NULL", + "__source_data_type" : "Extension" + }, { + "name" : "modifierExtension", + "type" : [ "null", { + "type" : "array", + "items" : "Meta.Extension.Extension" + } ], + "default" : "NULL", + "__source_data_type" : "Extension" + }, { + "name" : "relationship", + "type" : [ "null", { + "type" : "array", + "items" : "Meta.Extension.Annotation.Reference.Identifier.CodeableConcept.CodeableConcept" + } ], + "default" : "NULL", + "__source_data_type" : "CodeableConcept" + }, { + "name" : "name", + "type" : [ "null", "Meta.Extension.HumanName.HumanName" ], + "default" : "NULL", + "__source_data_type" : "HumanName" + }, { + "name" : "telecom", + "type" : [ "null", { + "type" : "array", + "items" : "Meta.Extension.ContactPoint.ContactPoint" + } ], + "default" : "NULL", + "__source_data_type" : "ContactPoint" + }, { + "name" : "address", + "type" : [ "null", "Meta.Extension.Address.Address" ], + "default" : "NULL", + "__source_data_type" : "Address" + }, { + "name" : "gender", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "code" + }, { + "name" : "organization", + "type" : [ "null", "Meta.Extension.Annotation.Reference.Reference" ], + "default" : "NULL", + "__source_data_type" : "Reference" + }, { + "name" : "period", + "type" : [ "null", "Meta.Extension.Address.Period.Period" ], + "default" : "NULL", + "__source_data_type" : "Period" + } ] + } + } ], + "default" : "NULL", + "__source_data_type" : "Patient.contact" + }, { + "name" : "animal", + "type" : [ "null", { + "type" : "record", + "name" : "animal", + "namespace" : "Patient", + "fields" : [ { + "name" : "id", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "string" + }, { + "name" : "extension", + "type" : [ "null", { + "type" : "array", + "items" : "Meta.Extension.Extension" + } ], + "default" : "NULL", + "__source_data_type" : "Extension" + }, { + "name" : "modifierExtension", + "type" : [ "null", { + "type" : "array", + "items" : "Meta.Extension.Extension" + } ], + "default" : "NULL", + "__source_data_type" : "Extension" + }, { + "name" : "species", + "type" : [ "null", "Meta.Extension.Annotation.Reference.Identifier.CodeableConcept.CodeableConcept" ], + "default" : "NULL", + "__source_data_type" : "CodeableConcept" + }, { + "name" : "breed", + "type" : [ "null", "Meta.Extension.Annotation.Reference.Identifier.CodeableConcept.CodeableConcept" ], + "default" : "NULL", + "__source_data_type" : "CodeableConcept" + }, { + "name" : "genderStatus", + "type" : [ "null", "Meta.Extension.Annotation.Reference.Identifier.CodeableConcept.CodeableConcept" ], + "default" : "NULL", + "__source_data_type" : "CodeableConcept" + } ] + } ], + "default" : "NULL", + "__source_data_type" : "Patient.animal" + }, { + "name" : "communication", + "type" : [ "null", { + "type" : "array", + "items" : { + "type" : "record", + "name" : "communication", + "namespace" : "Patient", + "fields" : [ { + "name" : "id", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "string" + }, { + "name" : "extension", + "type" : [ "null", { + "type" : "array", + "items" : "Meta.Extension.Extension" + } ], + "default" : "NULL", + "__source_data_type" : "Extension" + }, { + "name" : "modifierExtension", + "type" : [ "null", { + "type" : "array", + "items" : "Meta.Extension.Extension" + } ], + "default" : "NULL", + "__source_data_type" : "Extension" + }, { + "name" : "language", + "type" : [ "null", "Meta.Extension.Annotation.Reference.Identifier.CodeableConcept.CodeableConcept" ], + "default" : "NULL", + "__source_data_type" : "CodeableConcept" + }, { + "name" : "preferred", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "boolean" + } ] + } + } ], + "default" : "NULL", + "__source_data_type" : "Patient.communication" + }, { + "name" : "generalPractitioner", + "type" : [ "null", { + "type" : "array", + "items" : "Meta.Extension.Annotation.Reference.Reference" + } ], + "default" : "NULL", + "__source_data_type" : "Reference" + }, { + "name" : "managingOrganization", + "type" : [ "null", "Meta.Extension.Annotation.Reference.Reference" ], + "default" : "NULL", + "__source_data_type" : "Reference" + }, { + "name" : "link", + "type" : [ "null", { + "type" : "array", + "items" : { + "type" : "record", + "name" : "link", + "namespace" : "Patient", + "fields" : [ { + "name" : "id", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "string" + }, { + "name" : "extension", + "type" : [ "null", { + "type" : "array", + "items" : "Meta.Extension.Extension" + } ], + "default" : "NULL", + "__source_data_type" : "Extension" + }, { + "name" : "modifierExtension", + "type" : [ "null", { + "type" : "array", + "items" : "Meta.Extension.Extension" + } ], + "default" : "NULL", + "__source_data_type" : "Extension" + }, { + "name" : "other", + "type" : [ "null", "Meta.Extension.Annotation.Reference.Reference" ], + "default" : "NULL", + "__source_data_type" : "Reference" + }, { + "name" : "type", + "type" : [ "null", "string" ], + "default" : "NULL", + "__source_data_type" : "code" + } ] + } + } ], + "default" : "NULL", + "__source_data_type" : "Patient.link" + } ], + "__SchemaID" : "15" +} diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/GroupType.java b/parquet-column/src/main/java/org/apache/parquet/schema/GroupType.java index 68dba979b8..71c1ce10c0 100644 --- a/parquet-column/src/main/java/org/apache/parquet/schema/GroupType.java +++ b/parquet-column/src/main/java/org/apache/parquet/schema/GroupType.java @@ -38,9 +38,21 @@ */ public class GroupType extends Type { - private final List fields; + private List fields = null; private final Map indexByName; + /** + * Constructor to delay the field assignment. + * This is useful in case fields are added from diferent places in the code, e.g. first add a primary key and then add all columns. + * + * @param repetition OPTIONAL, REPEATED, REQUIRED + * @param name the name of the field + * @param fields the contained fields + */ + public GroupType(Repetition repetition, String name) { + this(repetition, name, null, null, null); + } + /** * @param repetition OPTIONAL, REPEATED, REQUIRED * @param name the name of the field @@ -90,11 +102,41 @@ public GroupType(Repetition repetition, String name, OriginalType originalType, */ GroupType(Repetition repetition, String name, OriginalType originalType, List fields, ID id) { super(name, repetition, originalType, id); - this.fields = fields; this.indexByName = new HashMap(); - for (int i = 0; i < fields.size(); i++) { - indexByName.put(fields.get(i).getName(), i); - } + addFields(fields); + } + + /** + * This method either assigns the provided fields List as the fields of this GroupType or it copies them. + * + * @param fields + */ + public void addFields(List fields) { + if (fields != null) { + if (this.fields == null) { + this.fields = fields; + for (int i = 0; i < fields.size(); i++) { + indexByName.put(fields.get(i).getName(), i); + } + } else { + for (Type f : fields) { + addField(f); + } + } + } + } + + /** + * Add an additional field to an already existing list of fields or establishes a new list. + * + * @param field + */ + public void addField(Type field) { + if (fields == null) { + fields = new ArrayList(); + } + fields.add(field); + indexByName.put(field.getName(), fields.size()-1); } /** @@ -395,3 +437,4 @@ List mergeFields(GroupType toMerge, boolean strict) { return newFields; } } + diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/MessageType.java b/parquet-column/src/main/java/org/apache/parquet/schema/MessageType.java index 1e26ed2425..c187b5796a 100644 --- a/parquet-column/src/main/java/org/apache/parquet/schema/MessageType.java +++ b/parquet-column/src/main/java/org/apache/parquet/schema/MessageType.java @@ -41,6 +41,10 @@ public MessageType(String name, Type... fields) { super(Repetition.REPEATED, name, fields); } + public MessageType(String name) { + super(Repetition.REPEATED, name); + } + /** * * @param name the name of the type @@ -146,3 +150,4 @@ public MessageType union(MessageType toMerge, boolean strict) { } } +