diff --git a/clickhouse/client.cpp b/clickhouse/client.cpp index 65eb9f65..36c1bcb3 100644 --- a/clickhouse/client.cpp +++ b/clickhouse/client.cpp @@ -653,7 +653,12 @@ void Client::Impl::WriteBlock(const Block& block, OutputStream& output) { WireFormat::WriteString(output, bi.Name()); WireFormat::WriteString(output, bi.Type()->GetName()); - bi.Column()->Save(&output); + // Empty columns are not serialized and occupy exactly 0 bytes. + // ref https://github.com/ClickHouse/ClickHouse/blob/39b37a3240f74f4871c8c1679910e065af6bea19/src/Formats/NativeWriter.cpp#L163 + const bool containsData = block.GetRowCount() > 0; + if (containsData) { + bi.Column()->Save(&output); + } } output.Flush(); } diff --git a/clickhouse/client.h b/clickhouse/client.h index 7f2b97dd..6de09b8a 100644 --- a/clickhouse/client.h +++ b/clickhouse/client.h @@ -86,6 +86,7 @@ struct ClientOptions { // TCP options DECLARE_FIELD(tcp_nodelay, bool, TcpNoDelay, true); + // TODO deprecate setting /** It helps to ease migration of the old codebases, which can't afford to switch * to using ColumnLowCardinalityT or ColumnLowCardinality directly, * but still want to benefit from smaller on-wire LowCardinality bandwidth footprint. diff --git a/clickhouse/columns/factory.cpp b/clickhouse/columns/factory.cpp index 264d2933..904a34aa 100644 --- a/clickhouse/columns/factory.cpp +++ b/clickhouse/columns/factory.cpp @@ -163,6 +163,8 @@ static ColumnRef CreateColumnFromAst(const TypeAst& ast, CreateColumnByTypeSetti return std::make_shared>(); case Type::FixedString: return std::make_shared>(nested.elements.front().value); + case Type::Nullable: + throw UnimplementedError("LowCardinality(" + nested.name + ") is not supported with LowCardinalityAsWrappedColumn on"); default: throw UnimplementedError("LowCardinality(" + nested.name + ") is not supported"); } @@ -174,6 +176,13 @@ static ColumnRef CreateColumnFromAst(const TypeAst& ast, CreateColumnByTypeSetti return std::make_shared>(); case Type::FixedString: return std::make_shared>(nested.elements.front().value); + case Type::Nullable: + return std::make_shared( + std::make_shared( + CreateColumnFromAst(nested.elements.front(), settings), + std::make_shared() + ) + ); default: throw UnimplementedError("LowCardinality(" + nested.name + ") is not supported"); } diff --git a/clickhouse/columns/lowcardinality.cpp b/clickhouse/columns/lowcardinality.cpp index 1e4e7443..0ffef7e0 100644 --- a/clickhouse/columns/lowcardinality.cpp +++ b/clickhouse/columns/lowcardinality.cpp @@ -94,6 +94,47 @@ inline auto VisitIndexColumn(Vizitor && vizitor, ColumnType && col) { } } +// A special NULL-item, which is expected at pos(0) in dictionary, +// note that we distinguish empty string from NULL-value. +inline auto GetNullItemForDictionary(const ColumnRef dictionary) { + if (auto n = dictionary->As()) { + return ItemView {}; + } else { + return ItemView{dictionary->Type()->GetCode(), std::string_view{}}; + } +} + +// A special default item, which is expected at pos(0) in dictionary, +// note that we distinguish empty string from NULL-value. +inline ItemView GetDefaultItemForDictionary(const ColumnRef dictionary) { + if (auto n = dictionary->As()) { + return GetDefaultItemForDictionary(n->Nested()); + } else { + return ItemView{dictionary->Type()->GetCode(), std::string_view{}}; + } +} + +void AppendToDictionary(Column& dictionary, const ItemView & item); + +inline void AppendNullableToDictionary(ColumnNullable& nullable, const ItemView & item) { + auto nested = nullable.Nested(); + + const bool isNullValue = item.type == Type::Void; + + if (isNullValue) { + AppendToDictionary(*nested, GetNullItemForDictionary(nested)); + } else { + const auto nestedType = nested->GetType().GetCode(); + if (nestedType != item.type) { + throw ValidationError("Invalid value. Type expected: " + nested->GetType().GetName()); + } + + AppendToDictionary(*nested, item); + } + + nullable.Append(isNullValue); +} + inline void AppendToDictionary(Column& dictionary, const ItemView & item) { switch (dictionary.GetType().GetCode()) { case Type::FixedString: @@ -102,21 +143,14 @@ inline void AppendToDictionary(Column& dictionary, const ItemView & item) { case Type::String: column_down_cast(dictionary).Append(item.get()); return; + case Type::Nullable: + AppendNullableToDictionary(column_down_cast(dictionary), item); + return; default: throw ValidationError("Unexpected dictionary column type: " + dictionary.GetType().GetName()); } } -// A special NULL-item, which is expected at pos(0) in dictionary, -// note that we distinguish empty string from NULL-value. -inline auto GetNullItemForDictionary(const ColumnRef dictionary) { - if (auto n = dictionary->As()) { - return ItemView{}; - } else { - return ItemView{dictionary->Type()->GetCode(), std::string_view{}}; - } -} - } namespace clickhouse { @@ -125,7 +159,23 @@ ColumnLowCardinality::ColumnLowCardinality(ColumnRef dictionary_column) dictionary_column_(dictionary_column->CloneEmpty()), // safe way to get an column of the same type. index_column_(std::make_shared()) { - AppendNullItemToEmptyColumn(); + Setup(dictionary_column); +} + +ColumnLowCardinality::ColumnLowCardinality(std::shared_ptr dictionary_column) + : Column(Type::CreateLowCardinality(dictionary_column->Type())), + dictionary_column_(dictionary_column->CloneEmpty()), // safe way to get an column of the same type. + index_column_(std::make_shared()) +{ + AppendNullItem(); + Setup(dictionary_column); +} + +ColumnLowCardinality::~ColumnLowCardinality() +{} + +void ColumnLowCardinality::Setup(ColumnRef dictionary_column) { + AppendDefaultItem(); if (dictionary_column->Size() != 0) { // Add values, updating index_column_ and unique_items_map_. @@ -140,9 +190,6 @@ ColumnLowCardinality::ColumnLowCardinality(ColumnRef dictionary_column) } } -ColumnLowCardinality::~ColumnLowCardinality() -{} - std::uint64_t ColumnLowCardinality::getDictionaryIndex(std::uint64_t item_index) const { return VisitIndexColumn([item_index](const auto & arg) -> std::uint64_t { return arg[item_index]; @@ -215,7 +262,12 @@ auto Load(ColumnRef new_dictionary_column, InputStream& input, size_t rows) { if (!WireFormat::ReadFixed(input, &number_of_keys)) throw ProtocolError("Failed to read number of rows in dictionary column."); - if (!new_dictionary_column->LoadBody(&input, number_of_keys)) + auto dataColumn = new_dictionary_column; + if (auto nullable = new_dictionary_column->As()) { + dataColumn = nullable->Nested(); + } + + if (!dataColumn->LoadBody(&input, number_of_keys)) throw ProtocolError("Failed to read values of dictionary column."); uint64_t number_of_rows; @@ -227,8 +279,15 @@ auto Load(ColumnRef new_dictionary_column, InputStream& input, size_t rows) { new_index_column->LoadBody(&input, number_of_rows); + if (auto nullable = new_dictionary_column->As()) { + nullable->Append(true); + for(std::size_t i = 1; i < new_index_column->Size(); i++) { + nullable->Append(false); + } + } + ColumnLowCardinality::UniqueItems new_unique_items_map; - for (size_t i = 0; i < new_dictionary_column->Size(); ++i) { + for (size_t i = 0; i < dataColumn->Size(); ++i) { const auto key = ColumnLowCardinality::computeHashKey(new_dictionary_column->GetItem(i)); new_unique_items_map.emplace(key, i); } @@ -278,10 +337,16 @@ void ColumnLowCardinality::SaveBody(OutputStream* output) { const uint64_t number_of_keys = dictionary_column_->Size(); WireFormat::WriteFixed(*output, number_of_keys); - dictionary_column_->SaveBody(output); + + if (auto columnNullable = dictionary_column_->As()) { + columnNullable->Nested()->SaveBody(output); + } else { + dictionary_column_->SaveBody(output); + } const uint64_t number_of_rows = index_column_->Size(); WireFormat::WriteFixed(*output, number_of_rows); + index_column_->SaveBody(output); } @@ -290,7 +355,10 @@ void ColumnLowCardinality::Clear() { dictionary_column_->Clear(); unique_items_map_.clear(); - AppendNullItemToEmptyColumn(); + if (auto columnNullable = dictionary_column_->As()) { + AppendNullItem(); + } + AppendDefaultItem(); } size_t ColumnLowCardinality::Size() const { @@ -328,7 +396,17 @@ void ColumnLowCardinality::Swap(Column& other) { } ItemView ColumnLowCardinality::GetItem(size_t index) const { - return dictionary_column_->GetItem(getDictionaryIndex(index)); + const auto dictionaryIndex = getDictionaryIndex(index); + + if (auto nullable = dictionary_column_->As()) { + const auto isNull = dictionaryIndex == 0u; + + if (isNull) { + return GetNullItemForDictionary(nullable); + } + } + + return dictionary_column_->GetItem(dictionaryIndex); } // No checks regarding value type or validity of value is made. @@ -359,19 +437,20 @@ void ColumnLowCardinality::AppendUnsafe(const ItemView & value) { } } -void ColumnLowCardinality::AppendNullItemToEmptyColumn() +void ColumnLowCardinality::AppendNullItem() { - // INVARIANT: Empty LC column has an (invisible) null-item at pos 0, which MUST be present in - // unique_items_map_ in order to reuse dictionary posistion on subsequent Append()-s. - - // Should be only performed on empty LC column. - assert(dictionary_column_->Size() == 0 && unique_items_map_.empty()); - const auto null_item = GetNullItemForDictionary(dictionary_column_); AppendToDictionary(*dictionary_column_, null_item); unique_items_map_.emplace(computeHashKey(null_item), 0); } +void ColumnLowCardinality::AppendDefaultItem() +{ + const auto defaultItem = GetDefaultItemForDictionary(dictionary_column_); + unique_items_map_.emplace(computeHashKey(defaultItem), dictionary_column_->Size()); + AppendToDictionary(*dictionary_column_, defaultItem); +} + size_t ColumnLowCardinality::GetDictionarySize() const { return dictionary_column_->Size(); } diff --git a/clickhouse/columns/lowcardinality.h b/clickhouse/columns/lowcardinality.h index d8eabfc9..3d8581fc 100644 --- a/clickhouse/columns/lowcardinality.h +++ b/clickhouse/columns/lowcardinality.h @@ -2,6 +2,7 @@ #include "column.h" #include "numeric.h" +#include "nullable.h" #include #include @@ -32,6 +33,11 @@ struct LowCardinalityHashKeyHash { } +/* + * LC column contains an "invisible" default item at the beginning of the collection. [default, ...] + * If the nested type is Nullable, it contains a null-item at the beginning and a default item at the second position. [null, default, ...] + * Null map is not serialized in LC columns. Instead, nulls are tracked by having an index of 0. + * */ class ColumnLowCardinality : public Column { public: using UniqueItems = std::unordered_map; @@ -49,6 +55,7 @@ class ColumnLowCardinality : public Column { public: // c-tor makes a deep copy of the dictionary_column. explicit ColumnLowCardinality(ColumnRef dictionary_column); + explicit ColumnLowCardinality(std::shared_ptr dictionary_column); ~ColumnLowCardinality(); /// Appends another LowCardinality column to the end of this one, updating dictionary. @@ -84,12 +91,14 @@ class ColumnLowCardinality : public Column { std::uint64_t getDictionaryIndex(std::uint64_t item_index) const; void appendIndex(std::uint64_t item_index); void removeLastIndex(); - ColumnRef GetDictionary(); + void AppendUnsafe(const ItemView &); private: - void AppendNullItemToEmptyColumn(); + void Setup(ColumnRef dictionary_column); + void AppendNullItem(); + void AppendDefaultItem(); public: static details::LowCardinalityHashKey computeHashKey(const ItemView &); diff --git a/clickhouse/columns/nullable.cpp b/clickhouse/columns/nullable.cpp index 4fb46d70..dd863545 100644 --- a/clickhouse/columns/nullable.cpp +++ b/clickhouse/columns/nullable.cpp @@ -74,7 +74,6 @@ void ColumnNullable::SaveBody(OutputStream* output) { } size_t ColumnNullable::Size() const { - assert(nested_->Size() == nulls_->Size()); return nulls_->Size(); } diff --git a/ut/CMakeLists.txt b/ut/CMakeLists.txt index 11f91208..a30032af 100644 --- a/ut/CMakeLists.txt +++ b/ut/CMakeLists.txt @@ -22,6 +22,7 @@ SET ( clickhouse-cpp-ut-src utils.cpp value_generators.cpp + low_cardinality_nullable_tests.cpp ) IF (WITH_OPENSSL) diff --git a/ut/low_cardinality_nullable_tests.cpp b/ut/low_cardinality_nullable_tests.cpp new file mode 100644 index 00000000..41c0d3c5 --- /dev/null +++ b/ut/low_cardinality_nullable_tests.cpp @@ -0,0 +1,116 @@ +#include +#include +#include "clickhouse/columns/nullable.h" +#include "clickhouse/columns/lowcardinality.h" +#include "clickhouse/client.h" +#include "utils.h" +#include "clickhouse/base/wire_format.h" +#include + +namespace +{ +using namespace clickhouse; +} + +static const auto localHostEndpoint = ClientOptions() + .SetHost( getEnvOrDefault("CLICKHOUSE_HOST", "localhost")) + .SetPort( getEnvOrDefault("CLICKHOUSE_PORT", "9000")) + .SetUser( getEnvOrDefault("CLICKHOUSE_USER", "default")) + .SetPassword( getEnvOrDefault("CLICKHOUSE_PASSWORD", "")) + .SetDefaultDatabase(getEnvOrDefault("CLICKHOUSE_DB", "default")); + + +ColumnRef buildTestColumn(const std::vector& rowsData, const std::vector& nulls) { + auto stringColumn = std::make_shared(rowsData); + auto nullsColumn = std::make_shared(nulls); + auto lowCardinalityColumn = std::make_shared( + std::make_shared(stringColumn, nullsColumn) + ); + + return lowCardinalityColumn; +} + +void createTable(Client& client) { + client.Execute("DROP TEMPORARY TABLE IF EXISTS lc_of_nullable"); + client.Execute("CREATE TEMPORARY TABLE IF NOT EXISTS lc_of_nullable (words LowCardinality(Nullable(String))) ENGINE = Memory"); +} + +TEST(LowCardinalityOfNullable, InsertAndQuery) { + const auto rowsData = std::vector { + "eminem", + "", + "tupac", + "shady", + "fifty", + "dre", + "", + "cube" + }; + + const auto nulls = std::vector { + false, false, true, false, true, true, false, false + }; + + auto column = buildTestColumn(rowsData, nulls); + + Block block; + block.AppendColumn("words", column); + + Client client(ClientOptions(localHostEndpoint) + .SetBakcwardCompatibilityFeatureLowCardinalityAsWrappedColumn(false) + .SetPingBeforeQuery(true)); + + createTable(client); + + client.Insert("lc_of_nullable", block); + + client.Select("SELECT * FROM lc_of_nullable", [&](const Block& bl) { + for (size_t row = 0; row < bl.GetRowCount(); row++) { + auto lc_col = bl[0]->As(); + auto item = lc_col->GetItem(row); + + if (nulls[row]) { + ASSERT_EQ(Type::Code::Void, item.type); + } else { + ASSERT_EQ(rowsData[row], item.get()); + } + } + }); +} + +TEST(LowCardinalityOfNullable, InsertAndQueryEmpty) { + auto column = buildTestColumn({}, {}); + + Block block; + block.AppendColumn("words", column); + + Client client(ClientOptions(localHostEndpoint) + .SetBakcwardCompatibilityFeatureLowCardinalityAsWrappedColumn(false) + .SetPingBeforeQuery(true)); + + createTable(client); + + EXPECT_NO_THROW(client.Insert("lc_of_nullable", block)); + + client.Select("SELECT * FROM lc_of_nullable", [&](const Block& bl) { + ASSERT_EQ(bl.GetRowCount(), 0u); + }); +} + +TEST(LowCardinalityOfNullable, ThrowOnBackwardsCompatibleLCColumn) { + auto column = buildTestColumn({}, {}); + + Block block; + block.AppendColumn("words", column); + + Client client(ClientOptions(localHostEndpoint) + .SetPingBeforeQuery(true)); + + createTable(client); + + EXPECT_THROW(client.Insert("lc_of_nullable", block), UnimplementedError); + + client.Select("SELECT * FROM lc_of_nullable", [&](const Block& bl) { + ASSERT_EQ(bl.GetRowCount(), 0u); + }); +} \ No newline at end of file