From 970ecfe01a55d87766f12936dcfc2ea098721023 Mon Sep 17 00:00:00 2001 From: MisterRaindrop <278811821@qq.com> Date: Mon, 15 Jun 2026 16:22:44 +0800 Subject: [PATCH] feat(hive): add iceberg_hive library Build checked-in HMS bindings into libiceberg_hive with a stub HiveCatalog and HMS connection properties. Reuse Arrow bundled Thrift runtime and suppress generated-source deprecated-declaration warnings. --- .../IcebergThirdpartyToolchain.cmake | 11 ++ src/iceberg/catalog/hive/CMakeLists.txt | 76 +++++++++- src/iceberg/catalog/hive/hive_catalog.cc | 132 ++++++++++++++++++ src/iceberg/catalog/hive/hive_catalog.h | 115 +++++++++++++++ .../catalog/hive/hive_catalog_properties.cc | 57 ++++++++ .../catalog/hive/hive_catalog_properties.h | 104 ++++++++++++++ src/iceberg/iceberg-config.cmake.in | 2 + 7 files changed, 492 insertions(+), 5 deletions(-) create mode 100644 src/iceberg/catalog/hive/hive_catalog.cc create mode 100644 src/iceberg/catalog/hive/hive_catalog.h create mode 100644 src/iceberg/catalog/hive/hive_catalog_properties.cc create mode 100644 src/iceberg/catalog/hive/hive_catalog_properties.h diff --git a/cmake_modules/IcebergThirdpartyToolchain.cmake b/cmake_modules/IcebergThirdpartyToolchain.cmake index 8e10fd8ec..13ef6dbc4 100644 --- a/cmake_modules/IcebergThirdpartyToolchain.cmake +++ b/cmake_modules/IcebergThirdpartyToolchain.cmake @@ -742,3 +742,14 @@ endif() if(ICEBERG_BUILD_SQL_CATALOG) resolve_sql_catalog_dependencies() endif() + +# Arrow's bundled build creates the Thrift C++ runtime as a `thrift` target +# scoped to its FetchContent directory, where iceberg_hive cannot see it. +# Promote it to a global `thrift::thrift` alias so iceberg_hive can link the +# generated Hive Metastore bindings against it. +if(ICEBERG_BUILD_HIVE + AND TARGET thrift + AND NOT TARGET thrift::thrift) + add_library(thrift::thrift INTERFACE IMPORTED GLOBAL) + target_link_libraries(thrift::thrift INTERFACE thrift) +endif() diff --git a/src/iceberg/catalog/hive/CMakeLists.txt b/src/iceberg/catalog/hive/CMakeLists.txt index 0dee1ad10..3204be3eb 100644 --- a/src/iceberg/catalog/hive/CMakeLists.txt +++ b/src/iceberg/catalog/hive/CMakeLists.txt @@ -15,11 +15,77 @@ # specific language governing permissions and limitations # under the License. -# Skeleton for the iceberg_hive library target. +# The iceberg_hive library: a Hive Metastore (HMS) catalog client built on +# generated Apache Thrift bindings. Layout mirrors iceberg_rest. + +# ---------------------------------------------------------------------- +# Hive Metastore Thrift bindings. # -# Sources, dependency wiring and the actual `iceberg_hive` library target -# are introduced in follow-up commits. For now this file installs only the -# public export header so that the directory is wired into the build system -# end-to-end. +# These are checked into gen-cpp/ rather than generated at build time, so a +# normal build needs no Thrift IDL compiler — only the Thrift C++ runtime, +# which comes from Apache Arrow's bundled build. Regenerate them with +# dev/update_hive_thrift.sh whenever thirdparty/hive_metastore/*.thrift changes. + +set(_thrift_gen_dir ${CMAKE_CURRENT_SOURCE_DIR}/gen-cpp) + +set(ICEBERG_HIVE_THRIFT_GEN_SOURCES + ${_thrift_gen_dir}/FacebookService.cpp + ${_thrift_gen_dir}/fb303_types.cpp + ${_thrift_gen_dir}/hive_metastore_constants.cpp + ${_thrift_gen_dir}/hive_metastore_types.cpp + ${_thrift_gen_dir}/ThriftHiveMetastore.cpp) + +# Upstream-generated code: skip lint/format, and downgrade the deprecated +# std::iterator warning from Arrow's bundled Thrift 0.22 headers (removed +# upstream in Thrift 0.23, THRIFT-5698) so it does not trip -Werror. +set_source_files_properties(${ICEBERG_HIVE_THRIFT_GEN_SOURCES} + PROPERTIES SKIP_LINTING TRUE + COMPILE_OPTIONS + "$<$:-Wno-error=deprecated-declarations>" +) + +# ---------------------------------------------------------------------- +# iceberg_hive library + +if(NOT TARGET thrift::thrift) + message(FATAL_ERROR "iceberg_hive requires a `thrift::thrift` target, normally " + "provided by Apache Arrow's bundled Thrift. Build with " + "-DICEBERG_BUILD_BUNDLE=ON (the default).") +endif() + +set(ICEBERG_HIVE_SOURCES hive_catalog.cc hive_catalog_properties.cc + ${ICEBERG_HIVE_THRIFT_GEN_SOURCES}) + +set(ICEBERG_HIVE_STATIC_BUILD_INTERFACE_LIBS) +set(ICEBERG_HIVE_SHARED_BUILD_INTERFACE_LIBS) +set(ICEBERG_HIVE_STATIC_INSTALL_INTERFACE_LIBS) +set(ICEBERG_HIVE_SHARED_INSTALL_INTERFACE_LIBS) + +list(APPEND ICEBERG_HIVE_STATIC_BUILD_INTERFACE_LIBS + "$,iceberg_static,iceberg_shared>" thrift::thrift) +list(APPEND ICEBERG_HIVE_SHARED_BUILD_INTERFACE_LIBS + "$,iceberg_shared,iceberg_static>" thrift::thrift) +list(APPEND + ICEBERG_HIVE_STATIC_INSTALL_INTERFACE_LIBS + "$,iceberg::iceberg_static,iceberg::iceberg_shared>" + thrift::thrift) +list(APPEND + ICEBERG_HIVE_SHARED_INSTALL_INTERFACE_LIBS + "$,iceberg::iceberg_shared,iceberg::iceberg_static>" + thrift::thrift) + +add_iceberg_lib(iceberg_hive + SOURCES + ${ICEBERG_HIVE_SOURCES} + PRIVATE_INCLUDES + ${_thrift_gen_dir} + SHARED_LINK_LIBS + ${ICEBERG_HIVE_SHARED_BUILD_INTERFACE_LIBS} + STATIC_LINK_LIBS + ${ICEBERG_HIVE_STATIC_BUILD_INTERFACE_LIBS} + STATIC_INSTALL_INTERFACE_LIBS + ${ICEBERG_HIVE_STATIC_INSTALL_INTERFACE_LIBS} + SHARED_INSTALL_INTERFACE_LIBS + ${ICEBERG_HIVE_SHARED_INSTALL_INTERFACE_LIBS}) iceberg_install_all_headers(iceberg/catalog/hive) diff --git a/src/iceberg/catalog/hive/hive_catalog.cc b/src/iceberg/catalog/hive/hive_catalog.cc new file mode 100644 index 000000000..1309babaa --- /dev/null +++ b/src/iceberg/catalog/hive/hive_catalog.cc @@ -0,0 +1,132 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "iceberg/catalog/hive/hive_catalog.h" + +#include +#include + +#include "iceberg/util/macros.h" + +namespace iceberg::hive { + +namespace { + +constexpr std::string_view kNotImplementedMessage = + "HiveCatalog method is not yet implemented."; + +} // namespace + +HiveCatalog::HiveCatalog(HiveCatalogProperties config) + : config_(std::move(config)), name_(config_.Get(HiveCatalogProperties::kName)) {} + +HiveCatalog::~HiveCatalog() = default; + +Result> HiveCatalog::Make( + const HiveCatalogProperties& config) { + ICEBERG_RETURN_UNEXPECTED(config.Uri()); + return std::shared_ptr(new HiveCatalog(config)); +} + +std::string_view HiveCatalog::name() const { return name_; } + +Status HiveCatalog::CreateNamespace( + const Namespace& /*ns*/, + const std::unordered_map& /*properties*/) { + return NotImplemented("{}", kNotImplementedMessage); +} + +Result> HiveCatalog::ListNamespaces( + const Namespace& /*ns*/) const { + return NotImplemented("{}", kNotImplementedMessage); +} + +Result> HiveCatalog::GetNamespaceProperties( + const Namespace& /*ns*/) const { + return NotImplemented("{}", kNotImplementedMessage); +} + +Status HiveCatalog::DropNamespace(const Namespace& /*ns*/) { + return NotImplemented("{}", kNotImplementedMessage); +} + +Result HiveCatalog::NamespaceExists(const Namespace& /*ns*/) const { + return NotImplemented("{}", kNotImplementedMessage); +} + +Status HiveCatalog::UpdateNamespaceProperties( + const Namespace& /*ns*/, + const std::unordered_map& /*updates*/, + const std::unordered_set& /*removals*/) { + return NotImplemented("{}", kNotImplementedMessage); +} + +Result> HiveCatalog::ListTables( + const Namespace& /*ns*/) const { + return NotImplemented("{}", kNotImplementedMessage); +} + +Result> HiveCatalog::CreateTable( + const TableIdentifier& /*identifier*/, const std::shared_ptr& /*schema*/, + const std::shared_ptr& /*spec*/, + const std::shared_ptr& /*order*/, const std::string& /*location*/, + const std::unordered_map& /*properties*/) { + return NotImplemented("{}", kNotImplementedMessage); +} + +Result> HiveCatalog::UpdateTable( + const TableIdentifier& /*identifier*/, + const std::vector>& /*requirements*/, + const std::vector>& /*updates*/) { + return NotImplemented("{}", kNotImplementedMessage); +} + +Result> HiveCatalog::StageCreateTable( + const TableIdentifier& /*identifier*/, const std::shared_ptr& /*schema*/, + const std::shared_ptr& /*spec*/, + const std::shared_ptr& /*order*/, const std::string& /*location*/, + const std::unordered_map& /*properties*/) { + return NotImplemented("{}", kNotImplementedMessage); +} + +Result HiveCatalog::TableExists(const TableIdentifier& /*identifier*/) const { + return NotImplemented("{}", kNotImplementedMessage); +} + +Status HiveCatalog::DropTable(const TableIdentifier& /*identifier*/, bool /*purge*/) { + return NotImplemented("{}", kNotImplementedMessage); +} + +Status HiveCatalog::RenameTable(const TableIdentifier& /*from*/, + const TableIdentifier& /*to*/) { + return NotImplemented("{}", kNotImplementedMessage); +} + +Result> HiveCatalog::LoadTable( + const TableIdentifier& /*identifier*/) { + return NotImplemented("{}", kNotImplementedMessage); +} + +Result> HiveCatalog::RegisterTable( + const TableIdentifier& /*identifier*/, + const std::string& /*metadata_file_location*/) { + return NotImplemented("{}", kNotImplementedMessage); +} + +} // namespace iceberg::hive diff --git a/src/iceberg/catalog/hive/hive_catalog.h b/src/iceberg/catalog/hive/hive_catalog.h new file mode 100644 index 000000000..bd598fee9 --- /dev/null +++ b/src/iceberg/catalog/hive/hive_catalog.h @@ -0,0 +1,115 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include +#include +#include + +#include "iceberg/catalog.h" +#include "iceberg/catalog/hive/hive_catalog_properties.h" +#include "iceberg/catalog/hive/iceberg_hive_export.h" +#include "iceberg/result.h" + +/// \file iceberg/catalog/hive/hive_catalog.h +/// \brief HiveCatalog implementation for talking to a Hive Metastore (HMS). + +namespace iceberg::hive { + +/// \brief Catalog implementation backed by a Hive Metastore. +/// +/// Currently a stub: every Catalog method returns +/// ErrorKind::kNotImplemented. Follow-up changes add the HMS Thrift client +/// and wire each method to the metastore. +class ICEBERG_HIVE_EXPORT HiveCatalog : public Catalog, + public std::enable_shared_from_this { + public: + ~HiveCatalog() override; + + HiveCatalog(const HiveCatalog&) = delete; + HiveCatalog& operator=(const HiveCatalog&) = delete; + HiveCatalog(HiveCatalog&&) = delete; + HiveCatalog& operator=(HiveCatalog&&) = delete; + + /// \brief Construct a HiveCatalog from `config`. + /// + /// Only stores the configuration for now; HMS connection setup comes + /// with the Thrift client. Returns an error if the supplied + /// configuration is missing required fields (currently: the URI). + static Result> Make(const HiveCatalogProperties& config); + + std::string_view name() const override; + + Status CreateNamespace( + const Namespace& ns, + const std::unordered_map& properties) override; + + Result> ListNamespaces(const Namespace& ns) const override; + + Result> GetNamespaceProperties( + const Namespace& ns) const override; + + Status DropNamespace(const Namespace& ns) override; + + Result NamespaceExists(const Namespace& ns) const override; + + Status UpdateNamespaceProperties( + const Namespace& ns, const std::unordered_map& updates, + const std::unordered_set& removals) override; + + Result> ListTables(const Namespace& ns) const override; + + Result> CreateTable( + const TableIdentifier& identifier, const std::shared_ptr& schema, + const std::shared_ptr& spec, const std::shared_ptr& order, + const std::string& location, + const std::unordered_map& properties) override; + + Result> UpdateTable( + const TableIdentifier& identifier, + const std::vector>& requirements, + const std::vector>& updates) override; + + Result> StageCreateTable( + const TableIdentifier& identifier, const std::shared_ptr& schema, + const std::shared_ptr& spec, const std::shared_ptr& order, + const std::string& location, + const std::unordered_map& properties) override; + + Result TableExists(const TableIdentifier& identifier) const override; + + Status DropTable(const TableIdentifier& identifier, bool purge) override; + + Status RenameTable(const TableIdentifier& from, const TableIdentifier& to) override; + + Result> LoadTable(const TableIdentifier& identifier) override; + + Result> RegisterTable( + const TableIdentifier& identifier, + const std::string& metadata_file_location) override; + + private: + explicit HiveCatalog(HiveCatalogProperties config); + + HiveCatalogProperties config_; + std::string name_; +}; + +} // namespace iceberg::hive diff --git a/src/iceberg/catalog/hive/hive_catalog_properties.cc b/src/iceberg/catalog/hive/hive_catalog_properties.cc new file mode 100644 index 000000000..e09aa1627 --- /dev/null +++ b/src/iceberg/catalog/hive/hive_catalog_properties.cc @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "iceberg/catalog/hive/hive_catalog_properties.h" + +#include +#include + +#include "iceberg/util/string_util.h" + +namespace iceberg::hive { + +HiveCatalogProperties HiveCatalogProperties::default_properties() { return {}; } + +HiveCatalogProperties HiveCatalogProperties::FromMap( + std::unordered_map properties) { + HiveCatalogProperties hive_catalog_config; + hive_catalog_config.configs_ = std::move(properties); + return hive_catalog_config; +} + +Result HiveCatalogProperties::Uri() const { + auto it = configs_.find(kUri.key()); + if (it == configs_.end() || it->second.empty()) { + return InvalidArgument("Hive catalog configuration property 'uri' is required."); + } + return it->second; +} + +Result HiveCatalogProperties::ThriftTransport() const { + const std::string upper = StringUtils::ToUpper(Get(kThriftTransport)); + if (upper == "BUFFERED") { + return HiveThriftTransport::kBuffered; + } + if (upper == "FRAMED") { + return HiveThriftTransport::kFramed; + } + return InvalidArgument("Invalid Hive thrift transport: '{}'.", Get(kThriftTransport)); +} + +} // namespace iceberg::hive diff --git a/src/iceberg/catalog/hive/hive_catalog_properties.h b/src/iceberg/catalog/hive/hive_catalog_properties.h new file mode 100644 index 000000000..5f1273b34 --- /dev/null +++ b/src/iceberg/catalog/hive/hive_catalog_properties.h @@ -0,0 +1,104 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include +#include + +#include "iceberg/catalog/hive/iceberg_hive_export.h" +#include "iceberg/result.h" +#include "iceberg/util/config.h" + +/// \file iceberg/catalog/hive/hive_catalog_properties.h +/// \brief Configuration for connecting to a Hive Metastore (HMS) over Thrift. + +namespace iceberg::hive { + +/// \brief Thrift framing mode used to connect to the Hive Metastore. +/// +/// Most HMS deployments default to TBufferedTransport. TFramedTransport is +/// required by HMS instances that have been configured to use framed +/// transport (for example, certain Hive 3.x setups with SASL enabled). +enum class HiveThriftTransport : uint8_t { kBuffered, kFramed }; + +/// \brief Configuration for the iceberg_hive HiveCatalog. +/// +/// HMS connection settings (URI, transport, timeouts) plus warehouse / FileIO +/// metadata. Authentication (SASL/Kerberos) and HMS-side locking are +/// introduced in follow-up changes. +class ICEBERG_HIVE_EXPORT HiveCatalogProperties + : public ConfigBase { + public: + template + using Entry = const ConfigBase::Entry; + + /// \brief The URI of the Hive Metastore Thrift endpoint. + /// + /// Accepted forms (matching the conventions used by iceberg-java and + /// iceberg-rust): + /// * `thrift://host:port` + /// * `host:port` + /// * comma-separated list of either form for HA failover + inline static Entry kUri{"uri", ""}; + + /// \brief The catalog name reported by `name()`. Defaults to "hive". + inline static Entry kName{"name", "hive"}; + + /// \brief The warehouse root path (for example, `s3://bucket/warehouse` + /// or `hdfs://nn/path`). Used as the default base location for new + /// tables that do not specify their own location. + inline static Entry kWarehouse{"warehouse", ""}; + + /// \brief The FileIO implementation name used to read and write Iceberg + /// metadata files. + inline static Entry kIOImpl{"io-impl", ""}; + + /// \brief Thrift framing for the HMS connection ("buffered" or "framed"). + inline static Entry kThriftTransport{"thrift-transport", "buffered"}; + + /// \brief HMS connect timeout, in milliseconds. + inline static Entry kConnectTimeoutMs{"connect-timeout-ms", 30000}; + + /// \brief HMS socket / RPC timeout, in milliseconds. + inline static Entry kSocketTimeoutMs{"socket-timeout-ms", 60000}; + + /// \brief When true, wrap the commit path with HMS `lock` / `unlock` for + /// extra safety on top of the metadata_location CAS. Defaults to false + /// because CAS already handles single-writer correctness; turn this on + /// for environments with high write concurrency. + inline static Entry kLockEnabled{"hive.lock-enabled", false}; + + /// \brief Build a HiveCatalogProperties with defaults applied. + static HiveCatalogProperties default_properties(); + + /// \brief Build a HiveCatalogProperties from a property map. + static HiveCatalogProperties FromMap( + std::unordered_map properties); + + /// \brief Resolve `kUri`. Returns an error if the URI is unset or empty. + Result Uri() const; + + /// \brief Parse `kThriftTransport` into a HiveThriftTransport. Comparison + /// is case-insensitive to match the conventions used by other Iceberg + /// language ports. + Result ThriftTransport() const; +}; + +} // namespace iceberg::hive diff --git a/src/iceberg/iceberg-config.cmake.in b/src/iceberg/iceberg-config.cmake.in index dfb0e1dbc..9a7bfbea4 100644 --- a/src/iceberg/iceberg-config.cmake.in +++ b/src/iceberg/iceberg-config.cmake.in @@ -30,6 +30,8 @@ # iceberg::iceberg_bundle_static # iceberg::iceberg_rest_shared # iceberg::iceberg_rest_static +# iceberg::iceberg_hive_shared +# iceberg::iceberg_hive_static # iceberg::iceberg_catalog_sql_shared # iceberg::iceberg_catalog_sql_static