From ca9a374a1c296c6a7abbdaa6de88911203ca653e Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Tue, 12 Dec 2017 14:57:25 -0500 Subject: [PATCH 1/9] Add SetupCxxFlags.cmake from Apache Arrow. Add PARQUET_BUILD_WARNING_LEVEL flag. Fix Clang compiler warnings --- CMakeLists.txt | 84 +------ benchmarks/decode_benchmark.cc | 2 +- ci/before_script_travis.sh | 2 + cmake_modules/SetupCxxFlags.cmake | 236 ++++++++++++++++++ examples/reader-writer.cc | 12 +- src/parquet/arrow/arrow-reader-writer-test.cc | 7 +- src/parquet/arrow/reader.cc | 6 +- src/parquet/arrow/record_reader.cc | 4 +- src/parquet/arrow/schema.cc | 5 +- src/parquet/arrow/writer.cc | 18 +- src/parquet/column_writer-test.cc | 16 +- src/parquet/column_writer.h | 2 + src/parquet/encoding-internal.h | 4 +- src/parquet/encoding-test.cc | 2 +- src/parquet/exception.cc | 10 +- src/parquet/public-api-test.cc | 4 +- src/parquet/types.cc | 25 -- src/parquet/util/macros.h | 17 ++ src/parquet/util/memory-test.cc | 2 +- src/parquet/util/memory.h | 6 +- 20 files changed, 322 insertions(+), 142 deletions(-) create mode 100644 cmake_modules/SetupCxxFlags.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index 47746312..278347d2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -129,6 +129,10 @@ if ("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}") option(PARQUET_BUILD_BENCHMARKS "Build the libparquet benchmark suite" OFF) + + set(PARQUET_BUILD_WARNING_LEVEL "PRODUCTION" CACHE STRING + "Levels of compiler warnings for development: PRODUCTION/CHECKIN/EVERYTHING") + option(PARQUET_BOOST_USE_SHARED "Rely on boost shared libraries where relevant" ON) @@ -375,6 +379,10 @@ enable_testing() # Dependencies ############################################################ +# Determine compiler version +include(CompilerInfo) +include(SetupCxxFlags) + include_directories(${CMAKE_CURRENT_BINARY_DIR}/src) include_directories( ${CMAKE_CURRENT_SOURCE_DIR}/src @@ -388,6 +396,11 @@ else() endif() include(ThirdpartyToolchain) +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CXX_COMMON_FLAGS}") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${PARQUET_CXXFLAGS}") + +message(STATUS "CMAKE_CXX_FLAGS: ${CMAKE_CXX_FLAGS}") + # Thrift requires these definitions for some types that we use add_definitions(-DHAVE_INTTYPES_H -DHAVE_NETDB_H) if (MSVC) @@ -396,77 +409,6 @@ else() add_definitions(-DHAVE_NETINET_IN_H -fPIC) endif() -############################################################# -# Compiler flags and release types - -# compiler flags for different build types (run 'cmake -DCMAKE_BUILD_TYPE= .') -# For all builds: -# For CMAKE_BUILD_TYPE=Debug -# -ggdb: Enable gdb debugging -# For CMAKE_BUILD_TYPE=FastDebug -# Same as DEBUG, except with -O1 -# For CMAKE_BUILD_TYPE=Release -# -O3: Enable all compiler optimizations -# Debug symbols are stripped for reduced binary size. Add -# -DPARQUET_CXXFLAGS="-g" to include them -if (MSVC) - set(CXX_FLAGS_DEBUG "${CXX_FLAGS_DEBUG} /bigobj") # TODO set /bigobj only for specific lib -else() - set(CXX_FLAGS_DEBUG "-ggdb -O0") - set(CXX_FLAGS_FASTDEBUG "-ggdb -O1") - set(CXX_FLAGS_RELEASE "-O3") -endif() - -string (TOUPPER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE) - -if ("${CMAKE_BUILD_TYPE}" STREQUAL "DEBUG") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CXX_FLAGS_DEBUG}") - -elseif ("${CMAKE_BUILD_TYPE}" STREQUAL "FASTDEBUG") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CXX_FLAGS_FASTDEBUG}") -elseif ("${CMAKE_BUILD_TYPE}" STREQUAL "RELEASE") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CXX_FLAGS_RELEASE}") -else() - message(FATAL_ERROR "Unknown build type: ${CMAKE_BUILD_TYPE}") -endif () - -message(STATUS "Build Type: ${CMAKE_BUILD_TYPE}") - -set(CMAKE_CXX_FLAGS "${PARQUET_CXXFLAGS} ${CMAKE_CXX_FLAGS}") -if (MSVC) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W3") -else() - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-strict-aliasing -Wall") -endif() - -if (PARQUET_USE_SSE) - SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native") - add_definitions(-DPARQUET_USE_SSE) -endif() - -if (APPLE) - # Use libc++ to avoid linker errors on some platforms - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++") -endif() - -# Determine compiler version -include(CompilerInfo) - -if ("${COMPILER_FAMILY}" STREQUAL "clang") - # Using Clang with ccache causes a bunch of spurious warnings that are - # purportedly fixed in the next version of ccache. See the following for details: - # - # http://petereisentraut.blogspot.com/2011/05/ccache-and-clang.html - # http://petereisentraut.blogspot.com/2011/09/ccache-and-clang-part-2.html - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Qunused-arguments") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CMAKE_CLANG_OPTIONS}") -endif() - -if ("${COMPILER_FAMILY}" STREQUAL "msvc") - # MSVC version of -Wno-deprecated - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4996") -endif() - ############################################################ # "make lint" target ############################################################ diff --git a/benchmarks/decode_benchmark.cc b/benchmarks/decode_benchmark.cc index 6fc3c543..576b1fbd 100644 --- a/benchmarks/decode_benchmark.cc +++ b/benchmarks/decode_benchmark.cc @@ -86,7 +86,7 @@ class DeltaBitPackEncoder { // The bit width for this block is the number of bits needed to store // (max_delta - min_delta). int bit_width = arrow::BitUtil::NumRequiredBits(max_delta - min_delta); - mini_block_widths[i] = bit_width; + mini_block_widths[i] = static_cast(bit_width); // Encode this mini blocking using min_delta and bit_width for (int j = 0; j < n; ++j) { diff --git a/ci/before_script_travis.sh b/ci/before_script_travis.sh index c35e2323..4251ee5f 100755 --- a/ci/before_script_travis.sh +++ b/ci/before_script_travis.sh @@ -30,9 +30,11 @@ if [ $TRAVIS_OS_NAME == "linux" ]; then cmake -DPARQUET_CXXFLAGS="$PARQUET_CXXFLAGS" \ -DPARQUET_TEST_MEMCHECK=ON \ -DPARQUET_BUILD_BENCHMARKS=ON \ + -DPARQUET_BUILD_WARNING_LEVEL=CHECKIN \ -DPARQUET_GENERATE_COVERAGE=1 \ $TRAVIS_BUILD_DIR else cmake -DPARQUET_CXXFLAGS="$PARQUET_CXXFLAGS" \ + -DPARQUET_BUILD_WARNING_LEVEL=CHECKIN \ $TRAVIS_BUILD_DIR fi diff --git a/cmake_modules/SetupCxxFlags.cmake b/cmake_modules/SetupCxxFlags.cmake new file mode 100644 index 00000000..6fa28700 --- /dev/null +++ b/cmake_modules/SetupCxxFlags.cmake @@ -0,0 +1,236 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Check if the target architecture and compiler supports some special +# instruction sets that would boost performance. +include(CheckCXXCompilerFlag) +# x86/amd64 compiler flags +CHECK_CXX_COMPILER_FLAG("-msse3" CXX_SUPPORTS_SSE3) +# power compiler flags +CHECK_CXX_COMPILER_FLAG("-maltivec" CXX_SUPPORTS_ALTIVEC) + +# compiler flags that are common across debug/release builds + +if (MSVC) + # TODO(wesm): Change usages of C runtime functions that MSVC says are + # insecure, like std::getenv + add_definitions(-D_CRT_SECURE_NO_WARNINGS) + + # Use __declspec(dllexport) during library build, other users of the Parquet + # headers will see dllimport + add_definitions(-DPARQUET_EXPORTING) + + if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang") + # clang-cl + set(CXX_COMMON_FLAGS "-EHsc") + elseif(${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 19) + message(FATAL_ERROR "Only MSVC 2015 (Version 19.0) and later are supported + by Parquet. Found version ${CMAKE_CXX_COMPILER_VERSION}.") + else() + # Fix annoying D9025 warning + string(REPLACE "/W3" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") + + # Set desired warning level (e.g. set /W4 for more warnings) + set(CXX_COMMON_FLAGS "/W3") + endif() + + if (PARQUET_USE_STATIC_CRT) + foreach (c_flag CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_RELEASE CMAKE_CXX_FLAGS_DEBUG + CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO + CMAKE_C_FLAGS CMAKE_C_FLAGS_RELEASE CMAKE_C_FLAGS_DEBUG + CMAKE_C_FLAGS_MINSIZEREL CMAKE_C_FLAGS_RELWITHDEBINFO) + string(REPLACE "/MD" "-MT" ${c_flag} "${${c_flag}}") + endforeach() + endif() +else() + # Common flags set below with warning level + set(CXX_COMMON_FLAGS "") +endif() + +# Build warning level (CHECKIN, EVERYTHING, etc.) + +# if no build warning level is specified, default to development warning level +if (NOT PARQUET_BUILD_WARNING_LEVEL) + set(PARQUET_BUILD_WARNING_LEVEL Production) +endif(NOT PARQUET_BUILD_WARNING_LEVEL) + +string(TOUPPER ${PARQUET_BUILD_WARNING_LEVEL} UPPERCASE_BUILD_WARNING_LEVEL) + +if ("${UPPERCASE_BUILD_WARNING_LEVEL}" STREQUAL "CHECKIN") + # Pre-checkin builds + if ("${COMPILER_FAMILY}" STREQUAL "msvc") + string(REPLACE "/W3" "" CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS}") + set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} /W3") + # Treat all compiler warnings as errors + set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} /WX") + elseif ("${COMPILER_FAMILY}" STREQUAL "clang") + set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Weverything -Wno-c++98-compat \ +-Wno-c++98-compat-pedantic -Wno-deprecated -Wno-weak-vtables -Wno-padded \ +-Wno-comma -Wno-unused-parameter -Wno-undef \ +-Wno-shadow -Wno-switch-enum -Wno-exit-time-destructors \ +-Wno-global-constructors -Wno-weak-template-vtables -Wno-undefined-reinterpret-cast \ +-Wno-implicit-fallthrough -Wno-unreachable-code-return \ +-Wno-float-equal -Wno-missing-prototypes \ +-Wno-old-style-cast -Wno-covered-switch-default \ +-Wno-format-nonliteral \ +-Wno-cast-align -Wno-vla-extension -Wno-shift-sign-overflow \ +-Wno-used-but-marked-unused -Wno-missing-variable-declarations \ +-Wno-gnu-zero-variadic-macro-arguments -Wconversion -Wno-sign-conversion \ +-Wno-disabled-macro-expansion -Wno-shorten-64-to-32") + + # Version numbers where warnings are introduced + if ("${COMPILER_VERSION}" VERSION_GREATER "3.3") + set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-gnu-folding-constant") + endif() + if ("${COMPILER_VERSION}" VERSION_GREATER "3.6") + set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-reserved-id-macro") + set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-range-loop-analysis") + endif() + if ("${COMPILER_VERSION}" VERSION_GREATER "3.7") + set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-double-promotion") + endif() + if ("${COMPILER_VERSION}" VERSION_GREATER "3.8") + set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-undefined-func-template") + endif() + + # Treat all compiler warnings as errors + set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-unknown-warning-option -Werror") + elseif ("${COMPILER_FAMILY}" STREQUAL "gcc") + set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wall -Wconversion -Wno-sign-conversion") + # Treat all compiler warnings as errors + set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-unknown-warning-option -Werror") + else() + message(FATAL_ERROR "Unknown compiler. Version info:\n${COMPILER_VERSION_FULL}") + endif() +elseif ("${UPPERCASE_BUILD_WARNING_LEVEL}" STREQUAL "EVERYTHING") + # Pedantic builds for fixing warnings + if ("${COMPILER_FAMILY}" STREQUAL "msvc") + string(REPLACE "/W3" "" CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS}") + set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} /Wall") + # https://docs.microsoft.com/en-us/cpp/build/reference/compiler-option-warning-level + # /wdnnnn disables a warning where "nnnn" is a warning number + # Treat all compiler warnings as errors + set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} /WX") + elseif ("${COMPILER_FAMILY}" STREQUAL "clang") + set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Weverything -Wno-c++98-compat -Wno-c++98-compat-pedantic") + # Treat all compiler warnings as errors + set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Werror") + elseif ("${COMPILER_FAMILY}" STREQUAL "gcc") + set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wall -Wpedantic -Wextra -Wno-unused-parameter") + # Treat all compiler warnings as errors + set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Werror") + else() + message(FATAL_ERROR "Unknown compiler. Version info:\n${COMPILER_VERSION_FULL}") + endif() +else() + # Production builds (warning are not treated as errors) + if ("${COMPILER_FAMILY}" STREQUAL "msvc") + # https://docs.microsoft.com/en-us/cpp/build/reference/compiler-option-warning-level + # TODO: Enable /Wall and disable individual warnings until build compiles without errors + # /wdnnnn disables a warning where "nnnn" is a warning number + string(REPLACE "/W3" "" CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS}") + set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} /W3") + elseif ("${COMPILER_FAMILY}" STREQUAL "clang") + set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wall") + elseif ("${COMPILER_FAMILY}" STREQUAL "gcc") + set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wall") + else() + message(FATAL_ERROR "Unknown compiler. Version info:\n${COMPILER_VERSION_FULL}") + endif() +endif() + +# if build warning flags is set, add to CXX_COMMON_FLAGS +if (BUILD_WARNING_FLAGS) + # Use BUILD_WARNING_FLAGS with BUILD_WARNING_LEVEL=everything to disable + # warnings (use with Clang's -Weverything flag to find potential errors) + set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} ${BUILD_WARNING_FLAGS}") +endif(BUILD_WARNING_FLAGS) + +if ("${COMPILER_FAMILY}" STREQUAL "clang") + # Using Clang with ccache causes a bunch of spurious warnings that are + # purportedly fixed in the next version of ccache. See the following for details: + # + # http://petereisentraut.blogspot.com/2011/05/ccache-and-clang.html + # http://petereisentraut.blogspot.com/2011/09/ccache-and-clang-part-2.html + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Qunused-arguments") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CMAKE_CLANG_OPTIONS}") +endif() + +if (NOT ("${COMPILER_FAMILY}" STREQUAL "msvc")) +set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -std=c++11") +endif() + +if ("${COMPILER_FAMILY}" STREQUAL "msvc") + # MSVC version of -Wno-deprecated + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4996") +endif() + +if (PARQUET_USE_SSE) + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native") + add_definitions(-DPARQUET_USE_SSE) +endif() + +if (APPLE) + # Depending on the default OSX_DEPLOYMENT_TARGET (< 10.9), libstdc++ may be + # the default standard library which does not support C++11. libc++ is the + # default from 10.9 onward. + set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -stdlib=libc++") +endif() + +# compiler flags for different build types (run 'cmake -DCMAKE_BUILD_TYPE= .') +# For all builds: +# For CMAKE_BUILD_TYPE=Debug +# -ggdb: Enable gdb debugging +# For CMAKE_BUILD_TYPE=FastDebug +# Same as DEBUG, except with some optimizations on. +# For CMAKE_BUILD_TYPE=Release +# -O3: Enable all compiler optimizations +# Debug symbols are stripped for reduced binary size. Add +# -DPARQUET_CXXFLAGS="-g" to add them +if (NOT MSVC) + set(CXX_FLAGS_DEBUG "-ggdb -O0") + set(CXX_FLAGS_FASTDEBUG "-ggdb -O1") + set(CXX_FLAGS_RELEASE "-O3 -DNDEBUG") +endif() + +set(CXX_FLAGS_PROFILE_GEN "${CXX_FLAGS_RELEASE} -fprofile-generate") +set(CXX_FLAGS_PROFILE_BUILD "${CXX_FLAGS_RELEASE} -fprofile-use") + +# if no build build type is specified, default to debug builds +if (NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE Debug) +endif(NOT CMAKE_BUILD_TYPE) + +string (TOUPPER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE) + +# Set compile flags based on the build type. +message("Configured for ${CMAKE_BUILD_TYPE} build (set with cmake -DCMAKE_BUILD_TYPE={release,debug,...})") +if ("${CMAKE_BUILD_TYPE}" STREQUAL "DEBUG") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CXX_FLAGS_DEBUG}") +elseif ("${CMAKE_BUILD_TYPE}" STREQUAL "FASTDEBUG") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CXX_FLAGS_FASTDEBUG}") +elseif ("${CMAKE_BUILD_TYPE}" STREQUAL "RELEASE") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CXX_FLAGS_RELEASE}") +elseif ("${CMAKE_BUILD_TYPE}" STREQUAL "PROFILE_GEN") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CXX_FLAGS_PROFILE_GEN}") +elseif ("${CMAKE_BUILD_TYPE}" STREQUAL "PROFILE_BUILD") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CXX_FLAGS_PROFILE_BUILD}") +else() + message(FATAL_ERROR "Unknown build type: ${CMAKE_BUILD_TYPE}") +endif () + +message(STATUS "Build Type: ${CMAKE_BUILD_TYPE}") diff --git a/examples/reader-writer.cc b/examples/reader-writer.cc index 7136b288..35da7c65 100644 --- a/examples/reader-writer.cc +++ b/examples/reader-writer.cc @@ -188,9 +188,9 @@ int main(int argc, char** argv) { for (int i = 0; i < NUM_ROWS_PER_ROW_GROUP; i++) { parquet::ByteArray value; char hello[FIXED_LENGTH] = "parquet"; - hello[7] = '0' + i / 100; - hello[8] = '0' + (i / 10) % 10; - hello[9] = '0' + i % 10; + hello[7] = '0' + static_cast(i / 100); + hello[8] = '0' + static_cast((i / 10) % 10); + hello[9] = '0' + static_cast(i % 10); if (i % 2 == 0) { int16_t definition_level = 1; value.ptr = reinterpret_cast(&hello[0]); @@ -411,9 +411,9 @@ int main(int argc, char** argv) { assert(rows_read == 1); // Verify the value written char expected_value[FIXED_LENGTH] = "parquet"; - expected_value[7] = '0' + i / 100; - expected_value[8] = '0' + (i / 10) % 10; - expected_value[9] = '0' + i % 10; + expected_value[7] = static_cast('0' + i / 100); + expected_value[8] = static_cast('0' + (i / 10) % 10); + expected_value[9] = static_cast('0' + i % 10); if (i % 2 == 0) { // only alternate values exist // There are no NULL values in the rows written assert(values_read == 1); diff --git a/src/parquet/arrow/arrow-reader-writer-test.cc b/src/parquet/arrow/arrow-reader-writer-test.cc index c10b1644..3f8daf06 100644 --- a/src/parquet/arrow/arrow-reader-writer-test.cc +++ b/src/parquet/arrow/arrow-reader-writer-test.cc @@ -811,13 +811,16 @@ TEST_F(TestInt96ParquetIO, ReadIntoTimestamp) { *(reinterpret_cast(&(day.value))) = seconds * INT64_C(1000) * INT64_C(1000) * INT64_C(1000) + 145738543; // Compute the corresponding nanosecond timestamp - struct tm datetime = {0}; + struct tm datetime; + memset(&datetime, 0, sizeof(struct tm)); datetime.tm_year = 70; datetime.tm_mon = 0; datetime.tm_mday = 2; datetime.tm_hour = 11; datetime.tm_min = 35; - struct tm epoch = {0}; + struct tm epoch; + memset(&epoch, 0, sizeof(struct tm)); + epoch.tm_year = 70; epoch.tm_mday = 1; // Nanoseconds since the epoch diff --git a/src/parquet/arrow/reader.cc b/src/parquet/arrow/reader.cc index dd60d29b..98a88c04 100644 --- a/src/parquet/arrow/reader.cc +++ b/src/parquet/arrow/reader.cc @@ -122,7 +122,7 @@ class AllRowGroupsIterator : public FileColumnIterator { result = nullptr; } return result; - }; + } private: int next_row_group_; @@ -145,7 +145,7 @@ class SingleRowGroupIterator : public FileColumnIterator { reader_->RowGroup(row_group_number_)->GetColumnPageReader(column_index_); done_ = true; return result; - }; + } private: int row_group_number_; @@ -631,7 +631,7 @@ Status PrimitiveImpl::WrapIntoListArray(std::shared_ptr* array) { if (nullable[i]) { def_level++; } - empty_def_level[i] = def_level; + empty_def_level[i] = static_cast(def_level); def_level++; } diff --git a/src/parquet/arrow/record_reader.cc b/src/parquet/arrow/record_reader.cc index 6405ee77..cc968e9c 100644 --- a/src/parquet/arrow/record_reader.cc +++ b/src/parquet/arrow/record_reader.cc @@ -309,8 +309,8 @@ class RecordReader::RecordReaderImpl { // into memory int64_t num_decoded_values_; - const int max_def_level_; - const int max_rep_level_; + const int16_t max_def_level_; + const int16_t max_rep_level_; bool nullable_values_; diff --git a/src/parquet/arrow/schema.cc b/src/parquet/arrow/schema.cc index 321bd20a..e8bcce0d 100644 --- a/src/parquet/arrow/schema.cc +++ b/src/parquet/arrow/schema.cc @@ -84,7 +84,6 @@ static Status FromFLBA(const PrimitiveNode& node, TypePtr* out) { ss << "Unhandled logical type " << LogicalTypeToString(node.logical_type()) << " for fixed-length binary array"; return Status::NotImplemented(ss.str()); - break; } return Status::OK(); @@ -127,7 +126,6 @@ static Status FromInt32(const PrimitiveNode& node, TypePtr* out) { ss << "Unhandled logical type " << LogicalTypeToString(node.logical_type()) << " for INT32"; return Status::NotImplemented(ss.str()); - break; } return Status::OK(); } @@ -160,7 +158,6 @@ static Status FromInt64(const PrimitiveNode& node, TypePtr* out) { ss << "Unhandled logical type " << LogicalTypeToString(node.logical_type()) << " for INT64"; return Status::NotImplemented(ss.str()); - break; } return Status::OK(); } @@ -697,9 +694,9 @@ int32_t DecimalSize(int32_t precision) { case 38: return 16; // 170,141,183,460,469,231,731,687,303,715,884,105,727 default: - DCHECK(false); break; } + DCHECK(false); return -1; } diff --git a/src/parquet/arrow/writer.cc b/src/parquet/arrow/writer.cc index 1f3fc7e6..88a211a5 100644 --- a/src/parquet/arrow/writer.cc +++ b/src/parquet/arrow/writer.cc @@ -912,11 +912,10 @@ Status FileWriter::Impl::WriteColumnChunk(const Array& data) { } std::shared_ptr values_array = _values_array->Slice(values_offset, num_values); -#define WRITE_BATCH_CASE(ArrowEnum, ArrowType, ParquetType) \ - case ::arrow::Type::ArrowEnum: \ - return TypedWriteBatch( \ - column_writer, values_array, num_levels, def_levels, rep_levels); \ - break; +#define WRITE_BATCH_CASE(ArrowEnum, ArrowType, ParquetType) \ + case ::arrow::Type::ArrowEnum: \ + return TypedWriteBatch( \ + column_writer, values_array, num_levels, def_levels, rep_levels); switch (values_type) { case ::arrow::Type::UINT32: { @@ -953,14 +952,13 @@ Status FileWriter::Impl::WriteColumnChunk(const Array& data) { WRITE_BATCH_CASE(TIME32, Time32Type, Int32Type) WRITE_BATCH_CASE(TIME64, Time64Type, Int64Type) default: - std::stringstream ss; - ss << "Data type not supported as list value: " << values_array->type()->ToString(); - return Status::NotImplemented(ss.str()); + break; } PARQUET_CATCH_NOT_OK(column_writer->Close()); - - return Status::OK(); + std::stringstream ss; + ss << "Data type not supported as list value: " << values_array->type()->ToString(); + return Status::NotImplemented(ss.str()); } Status FileWriter::WriteColumnChunk(const ::arrow::Array& array) { diff --git a/src/parquet/column_writer-test.cc b/src/parquet/column_writer-test.cc index b4e3232f..7e5dc828 100644 --- a/src/parquet/column_writer-test.cc +++ b/src/parquet/column_writer-test.cc @@ -509,19 +509,19 @@ void GenerateLevels(int min_repeat_factor, int max_repeat_factor, int max_level, // repeat count increases by a factor of 2 for every iteration int repeat_count = (1 << repeat); // generate levels for repetition count upto the maximum level - int value = 0; + int16_t value = 0; int bwidth = 0; while (value <= max_level) { for (int i = 0; i < repeat_count; i++) { input_levels.push_back(value); } - value = (2 << bwidth) - 1; + value = static_cast((2 << bwidth) - 1); bwidth++; } } } -void EncodeLevels(Encoding::type encoding, int max_level, int num_levels, +void EncodeLevels(Encoding::type encoding, int16_t max_level, int num_levels, const int16_t* input_levels, std::vector& bytes) { LevelEncoder encoder; int levels_count = 0; @@ -543,7 +543,7 @@ void EncodeLevels(Encoding::type encoding, int max_level, int num_levels, ASSERT_EQ(num_levels, levels_count); } -void VerifyDecodingLevels(Encoding::type encoding, int max_level, +void VerifyDecodingLevels(Encoding::type encoding, int16_t max_level, std::vector& input_levels, std::vector& bytes) { LevelDecoder decoder; @@ -581,7 +581,7 @@ void VerifyDecodingLevels(Encoding::type encoding, int max_level, ASSERT_EQ(0, decoder.Decode(1, output_levels.data())); } -void VerifyDecodingMultipleSetData(Encoding::type encoding, int max_level, +void VerifyDecodingMultipleSetData(Encoding::type encoding, int16_t max_level, std::vector& input_levels, std::vector>& bytes) { LevelDecoder decoder; @@ -623,7 +623,7 @@ TEST(TestLevels, TestLevelsDecodeMultipleBitWidth) { // for each maximum bit-width for (int bit_width = 1; bit_width <= max_bit_width; bit_width++) { // find the maximum level for the current bit_width - int max_level = (1 << bit_width) - 1; + int16_t max_level = static_cast((1 << bit_width) - 1); // Generate levels GenerateLevels(min_repeat_factor, max_repeat_factor, max_level, input_levels); EncodeLevels(encoding, max_level, static_cast(input_levels.size()), @@ -639,7 +639,7 @@ TEST(TestLevels, TestLevelsDecodeMultipleSetData) { int min_repeat_factor = 3; int max_repeat_factor = 7; // 128 int bit_width = 8; - int max_level = (1 << bit_width) - 1; + int16_t max_level = static_cast((1 << bit_width) - 1); std::vector input_levels; std::vector> bytes; Encoding::type encodings[2] = {Encoding::RLE, Encoding::BIT_PACKED}; @@ -705,7 +705,7 @@ TEST(TestLevelEncoder, MinimumBufferSize2) { } } - for (int bit_width = 1; bit_width <= 8; bit_width++) { + for (int16_t bit_width = 1; bit_width <= 8; bit_width++) { std::vector output( LevelEncoder::MaxBufferSize(Encoding::RLE, bit_width, kNumToEncode)); diff --git a/src/parquet/column_writer.h b/src/parquet/column_writer.h index f1c13a05..7b8c7755 100644 --- a/src/parquet/column_writer.h +++ b/src/parquet/column_writer.h @@ -98,6 +98,8 @@ class PARQUET_EXPORT ColumnWriter { bool has_dictionary, Encoding::type encoding, const WriterProperties* properties); + virtual ~ColumnWriter() = default; + static std::shared_ptr Make(ColumnChunkMetaDataBuilder*, std::unique_ptr, const WriterProperties* properties); diff --git a/src/parquet/encoding-internal.h b/src/parquet/encoding-internal.h index 3284aca6..e677b6cb 100644 --- a/src/parquet/encoding-internal.h +++ b/src/parquet/encoding-internal.h @@ -518,7 +518,7 @@ class DictEncoder : public Encoder { ClearIndices(); PARQUET_THROW_NOT_OK(buffer->Resize(result_size, false)); return buffer; - }; + } void Put(const T* values, int num_values) override { for (int i = 0; i < num_values; i++) { @@ -757,7 +757,7 @@ inline void DictEncoder::WriteDict(uint8_t* buffer) { template inline int DictEncoder::WriteIndices(uint8_t* buffer, int buffer_len) { // Write bit width in first byte - *buffer = bit_width(); + *buffer = static_cast(bit_width()); ++buffer; --buffer_len; diff --git a/src/parquet/encoding-test.cc b/src/parquet/encoding-test.cc index b0ca050d..a658cb2f 100644 --- a/src/parquet/encoding-test.cc +++ b/src/parquet/encoding-test.cc @@ -207,7 +207,7 @@ class TestEncodingBase : public ::testing::Test { using TestEncodingBase::data_buffer_; \ using TestEncodingBase::type_length_; \ using TestEncodingBase::encode_buffer_; \ - using TestEncodingBase::decode_buf_; + using TestEncodingBase::decode_buf_ template class TestPlainEncoding : public TestEncodingBase { diff --git a/src/parquet/exception.cc b/src/parquet/exception.cc index 480eecda..2278bc80 100644 --- a/src/parquet/exception.cc +++ b/src/parquet/exception.cc @@ -21,19 +21,23 @@ #include #include +#include "parquet/util/macros.h" + namespace parquet { -void ParquetException::EofException() { +PARQUET_NORETURN void ParquetException::EofException() { throw ParquetException("Unexpected end of stream."); } -void ParquetException::NYI(const std::string& msg) { +PARQUET_NORETURN void ParquetException::NYI(const std::string& msg) { std::stringstream ss; ss << "Not yet implemented: " << msg << "."; throw ParquetException(ss.str()); } -void ParquetException::Throw(const std::string& msg) { throw ParquetException(msg); } +PARQUET_NORETURN void ParquetException::Throw(const std::string& msg) { + throw ParquetException(msg); +} ParquetException::ParquetException(const char* msg) : msg_(msg) {} diff --git a/src/parquet/public-api-test.cc b/src/parquet/public-api-test.cc index 09d399b2..958e9701 100644 --- a/src/parquet/public-api-test.cc +++ b/src/parquet/public-api-test.cc @@ -40,7 +40,9 @@ TEST(TestPublicAPI, DoesNotIncludeZlib) { #endif } -void ThrowsParquetException() { throw parquet::ParquetException("This function throws"); } +PARQUET_NORETURN void ThrowsParquetException() { + throw parquet::ParquetException("This function throws"); +} TEST(TestPublicAPI, CanThrowParquetException) { ASSERT_THROW(ThrowsParquetException(), parquet::ParquetException); diff --git a/src/parquet/types.cc b/src/parquet/types.cc index 8ec3f3b1..4e6770f9 100644 --- a/src/parquet/types.cc +++ b/src/parquet/types.cc @@ -66,31 +66,22 @@ std::string EncodingToString(Encoding::type t) { switch (t) { case Encoding::PLAIN: return "PLAIN"; - break; case Encoding::PLAIN_DICTIONARY: return "PLAIN_DICTIONARY"; - break; case Encoding::RLE: return "RLE"; - break; case Encoding::BIT_PACKED: return "BIT_PACKED"; - break; case Encoding::DELTA_BINARY_PACKED: return "DELTA_BINARY_PACKED"; - break; case Encoding::DELTA_LENGTH_BYTE_ARRAY: return "DELTA_LENGTH_BYTE_ARRAY"; - break; case Encoding::DELTA_BYTE_ARRAY: return "DELTA_BYTE_ARRAY"; - break; case Encoding::RLE_DICTIONARY: return "RLE_DICTIONARY"; - break; default: return "UNKNOWN"; - break; } } @@ -98,25 +89,18 @@ std::string CompressionToString(Compression::type t) { switch (t) { case Compression::UNCOMPRESSED: return "UNCOMPRESSED"; - break; case Compression::SNAPPY: return "SNAPPY"; - break; case Compression::GZIP: return "GZIP"; - break; case Compression::LZO: return "LZO"; - break; case Compression::LZ4: return "LZ4"; - break; case Compression::ZSTD: return "ZSTD"; - break; default: return "UNKNOWN"; - break; } } @@ -124,31 +108,22 @@ std::string TypeToString(Type::type t) { switch (t) { case Type::BOOLEAN: return "BOOLEAN"; - break; case Type::INT32: return "INT32"; - break; case Type::INT64: return "INT64"; - break; case Type::INT96: return "INT96"; - break; case Type::FLOAT: return "FLOAT"; - break; case Type::DOUBLE: return "DOUBLE"; - break; case Type::BYTE_ARRAY: return "BYTE_ARRAY"; - break; case Type::FIXED_LEN_BYTE_ARRAY: return "FIXED_LEN_BYTE_ARRAY"; - break; default: return "UNKNOWN"; - break; } } diff --git a/src/parquet/util/macros.h b/src/parquet/util/macros.h index 22645c28..0d172b13 100644 --- a/src/parquet/util/macros.h +++ b/src/parquet/util/macros.h @@ -27,6 +27,23 @@ void operator=(const TypeName&) = delete #endif +#if defined(__GNUC__) +#define PARQUET_PREDICT_FALSE(x) (__builtin_expect(x, 0)) +#define PARQUET_PREDICT_TRUE(x) (__builtin_expect(!!(x), 1)) +#define PARQUET_NORETURN __attribute__((noreturn)) +#define PARQUET_PREFETCH(addr) __builtin_prefetch(addr) +#elif defined(_MSC_VER) +#define PARQUET_NORETURN __declspec(noreturn) +#define PARQUET_PREDICT_FALSE(x) x +#define PARQUET_PREDICT_TRUE(x) x +#define PARQUET_PREFETCH(addr) +#else +#define PARQUET_NORETURN +#define PARQUET_PREDICT_FALSE(x) x +#define PARQUET_PREDICT_TRUE(x) x +#define PARQUET_PREFETCH(addr) +#endif + // ---------------------------------------------------------------------- // From googletest diff --git a/src/parquet/util/memory-test.cc b/src/parquet/util/memory-test.cc index 16617a74..ee5fe313 100644 --- a/src/parquet/util/memory-test.cc +++ b/src/parquet/util/memory-test.cc @@ -258,7 +258,7 @@ TEST(TestBufferedInputStream, Basics) { std::shared_ptr buf = AllocateBuffer(default_memory_pool(), source_size); ASSERT_EQ(source_size, buf->size()); for (int i = 0; i < source_size; i++) { - buf->mutable_data()[i] = i; + buf->mutable_data()[i] = static_cast(i); } auto wrapper = diff --git a/src/parquet/util/memory.h b/src/parquet/util/memory.h index a28917bd..5408d1c7 100644 --- a/src/parquet/util/memory.h +++ b/src/parquet/util/memory.h @@ -244,6 +244,8 @@ class PARQUET_EXPORT ChunkedAllocator { class PARQUET_EXPORT FileInterface { public: + virtual ~FileInterface() = default; + // Close the file virtual void Close() = 0; @@ -255,7 +257,7 @@ class PARQUET_EXPORT FileInterface { /// resources class PARQUET_EXPORT RandomAccessSource : virtual public FileInterface { public: - virtual ~RandomAccessSource() {} + virtual ~RandomAccessSource() = default; virtual int64_t Size() const = 0; @@ -272,7 +274,7 @@ class PARQUET_EXPORT RandomAccessSource : virtual public FileInterface { class PARQUET_EXPORT OutputStream : virtual public FileInterface { public: - virtual ~OutputStream() {} + virtual ~OutputStream() = default; // Copy bytes into the output stream virtual void Write(const uint8_t* data, int64_t length) = 0; From c848855279048871bade480659828c2045314564 Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Tue, 12 Dec 2017 21:39:29 -0500 Subject: [PATCH 2/9] Fix compiler warnings with gcc 4.9 --- benchmarks/decode_benchmark.cc | 47 ++++++++++--------- cmake_modules/SetupCxxFlags.cmake | 2 +- examples/reader-writer.cc | 10 ++-- src/parquet/arrow/arrow-reader-writer-test.cc | 2 +- src/parquet/arrow/arrow-schema-test.cc | 4 +- src/parquet/arrow/reader.cc | 4 +- src/parquet/arrow/writer.cc | 12 +++-- src/parquet/column-io-benchmark.cc | 19 ++++---- src/parquet/column_reader.cc | 2 +- src/parquet/column_writer.cc | 2 +- src/parquet/encoding-benchmark.cc | 28 ++++++----- src/parquet/encoding-internal.h | 8 ++-- src/parquet/file_reader.cc | 18 +++---- src/parquet/statistics-test.cc | 8 ++-- src/parquet/test-specialization.h | 2 +- src/parquet/util/test-common.h | 6 +-- tools/parquet-scan.cc | 3 +- 17 files changed, 95 insertions(+), 82 deletions(-) diff --git a/benchmarks/decode_benchmark.cc b/benchmarks/decode_benchmark.cc index 576b1fbd..90057347 100644 --- a/benchmarks/decode_benchmark.cc +++ b/benchmarks/decode_benchmark.cc @@ -42,32 +42,33 @@ class DeltaBitPackEncoder { uint8_t* Encode(int* encoded_len) { uint8_t* result = new uint8_t[10 * 1024 * 1024]; - int num_mini_blocks = arrow::BitUtil::Ceil(num_values() - 1, mini_block_size_); + int num_mini_blocks = static_cast(arrow::BitUtil::Ceil(num_values() - 1, + mini_block_size_)); uint8_t* mini_block_widths = NULL; arrow::BitWriter writer(result, 10 * 1024 * 1024); // Writer the size of each block. We only use 1 block currently. - writer.PutVlqInt(num_mini_blocks * mini_block_size_); + writer.PutVlqInt(static_cast(num_mini_blocks * mini_block_size_)); // Write the number of mini blocks. - writer.PutVlqInt(num_mini_blocks); + writer.PutVlqInt(static_cast(num_mini_blocks)); // Write the number of values. writer.PutVlqInt(num_values() - 1); // Write the first value. - writer.PutZigZagVlqInt(values_[0]); + writer.PutZigZagVlqInt(static_cast(values_[0])); // Compute the values as deltas and the min delta. int64_t min_delta = std::numeric_limits::max(); - for (int i = values_.size() - 1; i > 0; --i) { + for (size_t i = values_.size() - 1; i > 0; --i) { values_[i] -= values_[i - 1]; min_delta = std::min(min_delta, values_[i]); } // Write out the min delta. - writer.PutZigZagVlqInt(min_delta); + writer.PutZigZagVlqInt(static_cast(min_delta)); // We need to save num_mini_blocks bytes to store the bit widths of the mini // blocks. @@ -105,7 +106,7 @@ class DeltaBitPackEncoder { return result; } - int num_values() const { return values_.size(); } + int num_values() const { return static_cast(values_.size()); } private: int mini_block_size_; @@ -121,11 +122,11 @@ class DeltaLengthByteArrayEncoder { plain_encoded_len_(0) {} void Add(const std::string& s) { - Add(reinterpret_cast(s.data()), s.size()); + Add(reinterpret_cast(s.data()), static_cast(s.size())); } void Add(const uint8_t* ptr, int len) { - plain_encoded_len_ += len + sizeof(int); + plain_encoded_len_ += static_cast(len + sizeof(int)); len_encoder_.Add(len); memcpy(buffer_ + offset_, ptr, len); offset_ += len; @@ -136,7 +137,7 @@ class DeltaLengthByteArrayEncoder { memmove(buffer_ + *encoded_len + sizeof(int), buffer_, offset_); memcpy(buffer_, encoded_len, sizeof(int)); memcpy(buffer_ + sizeof(int), encoded_lengths, *encoded_len); - *encoded_len += offset_ + sizeof(int); + *encoded_len += static_cast(offset_ + sizeof(int)); return buffer_; } @@ -155,8 +156,8 @@ class DeltaByteArrayEncoder { DeltaByteArrayEncoder() : plain_encoded_len_(0) {} void Add(const std::string& s) { - plain_encoded_len_ += s.size() + sizeof(int); - int min_len = std::min(s.size(), last_value_.size()); + plain_encoded_len_ += static_cast(s.size() + sizeof(int)); + int min_len = static_cast(std::min(s.size(), last_value_.size())); int prefix_len = 0; for (int i = 0; i < min_len; ++i) { if (s[i] == last_value_[i]) { @@ -167,7 +168,7 @@ class DeltaByteArrayEncoder { } prefix_len_encoder_.Add(prefix_len); suffix_encoder_.Add(reinterpret_cast(s.data()) + prefix_len, - s.size() - prefix_len); + static_cast(s.size() - prefix_len)); last_value_ = s; } @@ -181,7 +182,7 @@ class DeltaByteArrayEncoder { memcpy(buffer, &prefix_buffer_len, sizeof(int)); memcpy(buffer + sizeof(int), prefix_buffer, prefix_buffer_len); memcpy(buffer + sizeof(int) + prefix_buffer_len, suffix_buffer, suffix_buffer_len); - *encoded_len = sizeof(int) + prefix_buffer_len + suffix_buffer_len; + *encoded_len = static_cast(sizeof(int) + prefix_buffer_len + suffix_buffer_len); return buffer; } @@ -198,7 +199,7 @@ class DeltaByteArrayEncoder { uint64_t TestPlainIntEncoding(const uint8_t* data, int num_values, int batch_size) { uint64_t result = 0; parquet::PlainDecoder decoder(nullptr); - decoder.SetData(num_values, data, num_values * sizeof(int64_t)); + decoder.SetData(num_values, data, static_cast(num_values * sizeof(int64_t))); std::vector values(batch_size); for (int i = 0; i < num_values;) { int n = decoder.Decode(values.data(), batch_size); @@ -227,14 +228,15 @@ uint64_t TestBinaryPackedEncoding(const char* name, const std::vector& encoder.Add(values[i]); } - int raw_len = encoder.num_values() * sizeof(int); + int raw_len = static_cast(encoder.num_values() * sizeof(int)); int len; uint8_t* buffer = encoder.Encode(&len); if (benchmark_iters == -1) { printf("%s\n", name); printf(" Raw len: %d\n", raw_len); - printf(" Encoded len: %d (%0.2f%%)\n", len, len * 100 / static_cast(raw_len)); + printf(" Encoded len: %d (%0.2f%%)\n", len, + static_cast(len) * 100.0f / static_cast(raw_len)); decoder.SetData(encoder.num_values(), buffer, len); for (int i = 0; i < encoder.num_values(); ++i) { int64_t x = 0; @@ -249,7 +251,8 @@ uint64_t TestBinaryPackedEncoding(const char* name, const std::vector& } else { printf("%s\n", name); printf(" Raw len: %d\n", raw_len); - printf(" Encoded len: %d (%0.2f%%)\n", len, len * 100 / static_cast(raw_len)); + printf(" Encoded len: %d (%0.2f%%)\n", len, + static_cast(len) * 100.0f / static_cast(raw_len)); uint64_t result = 0; std::vector buf(benchmark_batch_size); @@ -266,9 +269,9 @@ uint64_t TestBinaryPackedEncoding(const char* name, const std::vector& } } uint64_t elapsed = sw.Stop(); - double num_ints = values.size() * benchmark_iters * 1000.; + double num_ints = static_cast(values.size() * benchmark_iters) * 1000.; printf("%s rate (batch size = %2d): %0.3fM per second.\n", name, benchmark_batch_size, - num_ints / elapsed); + num_ints / static_cast(elapsed)); return result; } } @@ -285,10 +288,10 @@ uint64_t TestBinaryPackedEncoding(const char* name, const std::vector& void TestPlainIntCompressed(::arrow::Codec* codec, const std::vector& data, int num_iters, int batch_size) { const uint8_t* raw_data = reinterpret_cast(&data[0]); - int uncompressed_len = data.size() * sizeof(int64_t); + int uncompressed_len = static_cast(data.size() * sizeof(int64_t)); uint8_t* decompressed_data = new uint8_t[uncompressed_len]; - int max_compressed_size = codec->MaxCompressedLen(uncompressed_len, raw_data); + int64_t max_compressed_size = codec->MaxCompressedLen(uncompressed_len, raw_data); uint8_t* compressed_data = new uint8_t[max_compressed_size]; int64_t compressed_len; DCHECK(codec diff --git a/cmake_modules/SetupCxxFlags.cmake b/cmake_modules/SetupCxxFlags.cmake index 6fa28700..ac0ef797 100644 --- a/cmake_modules/SetupCxxFlags.cmake +++ b/cmake_modules/SetupCxxFlags.cmake @@ -112,7 +112,7 @@ if ("${UPPERCASE_BUILD_WARNING_LEVEL}" STREQUAL "CHECKIN") elseif ("${COMPILER_FAMILY}" STREQUAL "gcc") set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wall -Wconversion -Wno-sign-conversion") # Treat all compiler warnings as errors - set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-unknown-warning-option -Werror") + set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Werror") else() message(FATAL_ERROR "Unknown compiler. Version info:\n${COMPILER_VERSION_FULL}") endif() diff --git a/examples/reader-writer.cc b/examples/reader-writer.cc index 35da7c65..fb2ec774 100644 --- a/examples/reader-writer.cc +++ b/examples/reader-writer.cc @@ -170,7 +170,7 @@ int main(int argc, char** argv) { parquet::FloatWriter* float_writer = static_cast(rg_writer->NextColumn()); for (int i = 0; i < NUM_ROWS_PER_ROW_GROUP; i++) { - float value = i * 1.1f; + float value = static_cast(i) * 1.1f; float_writer->WriteBatch(1, nullptr, nullptr, &value); } @@ -188,9 +188,9 @@ int main(int argc, char** argv) { for (int i = 0; i < NUM_ROWS_PER_ROW_GROUP; i++) { parquet::ByteArray value; char hello[FIXED_LENGTH] = "parquet"; - hello[7] = '0' + static_cast(i / 100); - hello[8] = '0' + static_cast((i / 10) % 10); - hello[9] = '0' + static_cast(i % 10); + hello[7] = static_cast(static_cast('0') + i / 100); + hello[8] = static_cast(static_cast('0') + (i / 10) % 10); + hello[9] = static_cast(static_cast('0') + i % 10); if (i % 2 == 0) { int16_t definition_level = 1; value.ptr = reinterpret_cast(&hello[0]); @@ -369,7 +369,7 @@ int main(int argc, char** argv) { // There are no NULL values in the rows written assert(values_read == 1); // Verify the value written - float expected_value = i * 1.1f; + float expected_value = static_cast(i) * 1.1f; assert(value == expected_value); i++; } diff --git a/src/parquet/arrow/arrow-reader-writer-test.cc b/src/parquet/arrow/arrow-reader-writer-test.cc index 3f8daf06..02f37517 100644 --- a/src/parquet/arrow/arrow-reader-writer-test.cc +++ b/src/parquet/arrow/arrow-reader-writer-test.cc @@ -1672,7 +1672,7 @@ class TestNestedSchemaRead : public ::testing::TestWithParam { leaf1_def_levels[i] = (i % 3 == 0) ? 0 : 1; // leaf2 is optional, can be null in the primitive (def-level 1) or // struct level (def-level 0) - leaf2_def_levels[i] = i % 3; + leaf2_def_levels[i] = static_cast(i % 3); // leaf3 is required leaf3_def_levels[i] = 0; } diff --git a/src/parquet/arrow/arrow-schema-test.cc b/src/parquet/arrow/arrow-schema-test.cc index b33eda14..771b9960 100644 --- a/src/parquet/arrow/arrow-schema-test.cc +++ b/src/parquet/arrow/arrow-schema-test.cc @@ -62,8 +62,8 @@ class TestConvertParquetSchema : public ::testing::Test { for (int i = 0; i < expected_schema->num_fields(); ++i) { auto lhs = result_schema_->field(i); auto rhs = expected_schema->field(i); - EXPECT_TRUE(lhs->Equals(rhs)) << i << " " << lhs->ToString() - << " != " << rhs->ToString(); + EXPECT_TRUE(lhs->Equals(rhs)) + << i << " " << lhs->ToString() << " != " << rhs->ToString(); } } diff --git a/src/parquet/arrow/reader.cc b/src/parquet/arrow/reader.cc index 98a88c04..53065a68 100644 --- a/src/parquet/arrow/reader.cc +++ b/src/parquet/arrow/reader.cc @@ -298,8 +298,8 @@ Status FileReader::Impl::GetReaderForNode( // TODO(itaiin): Remove the -1 index hack when all types of nested reads // are supported. This currently just signals the lower level reader resolution // to abort - RETURN_NOT_OK(GetReaderForNode(index, group->field(i).get(), indices, def_level + 1, - &child_reader)); + RETURN_NOT_OK(GetReaderForNode(index, group->field(i).get(), indices, + static_cast(def_level + 1), &child_reader)); if (child_reader != nullptr) { children.push_back(std::move(child_reader)); } diff --git a/src/parquet/arrow/writer.cc b/src/parquet/arrow/writer.cc index 88a211a5..e5a0f177 100644 --- a/src/parquet/arrow/writer.cc +++ b/src/parquet/arrow/writer.cc @@ -186,7 +186,7 @@ class LevelBuilder { if (nullable_[rep_level]) { if (null_counts_[rep_level] == 0 || BitUtil::GetBit(valid_bitmaps_[rep_level], index + array_offsets_[rep_level])) { - return HandleNonNullList(def_level + 1, rep_level, index); + return HandleNonNullList(static_cast(def_level + 1), rep_level, index); } else { return def_levels_.Append(def_level); } @@ -203,24 +203,26 @@ class LevelBuilder { return def_levels_.Append(def_level); } if (recursion_level < static_cast(offsets_.size())) { - return HandleListEntries(def_level + 1, rep_level + 1, inner_offset, inner_length); + return HandleListEntries(static_cast(def_level + 1), + static_cast(rep_level + 1), inner_offset, + inner_length); } else { // We have reached the leaf: primitive list, handle remaining nullables for (int64_t i = 0; i < inner_length; i++) { if (i > 0) { - RETURN_NOT_OK(rep_levels_.Append(rep_level + 1)); + RETURN_NOT_OK(rep_levels_.Append(static_cast(rep_level + 1))); } if (nullable_[recursion_level] && ((null_counts_[recursion_level] == 0) || BitUtil::GetBit(valid_bitmaps_[recursion_level], inner_offset + i + array_offsets_[recursion_level]))) { - RETURN_NOT_OK(def_levels_.Append(def_level + 2)); + RETURN_NOT_OK(def_levels_.Append(static_cast(def_level + 2))); } else { // This can be produced in two case: // * elements are nullable and this one is null (i.e. max_def_level = def_level // + 2) // * elements are non-nullable (i.e. max_def_level = def_level + 1) - RETURN_NOT_OK(def_levels_.Append(def_level + 1)); + RETURN_NOT_OK(def_levels_.Append(static_cast(def_level + 1))); } } return Status::OK(); diff --git a/src/parquet/column-io-benchmark.cc b/src/parquet/column-io-benchmark.cc index 7c8d093e..ad625bd8 100644 --- a/src/parquet/column-io-benchmark.cc +++ b/src/parquet/column-io-benchmark.cc @@ -186,15 +186,16 @@ static void BM_RleEncoding(::benchmark::State& state) { std::generate(levels.begin(), levels.end(), [&state, &n] { return (n++ % state.range(1)) == 0; }); int16_t max_level = 1; - int64_t rle_size = LevelEncoder::MaxBufferSize(Encoding::RLE, max_level, levels.size()); + int64_t rle_size = LevelEncoder::MaxBufferSize(Encoding::RLE, max_level, + static_cast(levels.size())); auto buffer_rle = std::make_shared(); PARQUET_THROW_NOT_OK(buffer_rle->Resize(rle_size)); while (state.KeepRunning()) { LevelEncoder level_encoder; - level_encoder.Init(Encoding::RLE, max_level, levels.size(), - buffer_rle->mutable_data(), buffer_rle->size()); - level_encoder.Encode(levels.size(), levels.data()); + level_encoder.Init(Encoding::RLE, max_level, static_cast(levels.size()), + buffer_rle->mutable_data(), static_cast(buffer_rle->size())); + level_encoder.Encode(static_cast(levels.size()), levels.data()); } state.SetBytesProcessed(state.iterations() * state.range(0) * sizeof(int16_t)); state.SetItemsProcessed(state.iterations() * state.range(0)); @@ -209,17 +210,19 @@ static void BM_RleDecoding(::benchmark::State& state) { std::generate(levels.begin(), levels.end(), [&state, &n] { return (n++ % state.range(1)) == 0; }); int16_t max_level = 1; - int64_t rle_size = LevelEncoder::MaxBufferSize(Encoding::RLE, max_level, levels.size()); + int rle_size = LevelEncoder::MaxBufferSize(Encoding::RLE, max_level, + static_cast(levels.size())); auto buffer_rle = std::make_shared(); PARQUET_THROW_NOT_OK(buffer_rle->Resize(rle_size + sizeof(int32_t))); - level_encoder.Init(Encoding::RLE, max_level, levels.size(), + level_encoder.Init(Encoding::RLE, max_level, static_cast(levels.size()), buffer_rle->mutable_data() + sizeof(int32_t), rle_size); - level_encoder.Encode(levels.size(), levels.data()); + level_encoder.Encode(static_cast(levels.size()), levels.data()); reinterpret_cast(buffer_rle->mutable_data())[0] = level_encoder.len(); while (state.KeepRunning()) { LevelDecoder level_decoder; - level_decoder.SetData(Encoding::RLE, max_level, levels.size(), buffer_rle->data()); + level_decoder.SetData(Encoding::RLE, max_level, static_cast(levels.size()), + buffer_rle->data()); level_decoder.Decode(state.range(0), levels.data()); } diff --git a/src/parquet/column_reader.cc b/src/parquet/column_reader.cc index 91557af7..4c114397 100644 --- a/src/parquet/column_reader.cc +++ b/src/parquet/column_reader.cc @@ -56,7 +56,7 @@ int LevelDecoder::SetData(Encoding::type encoding, int16_t max_level, } else { rle_decoder_->Reset(decoder_data, num_bytes, bit_width_); } - return sizeof(int32_t) + num_bytes; + return static_cast(sizeof(int32_t)) + num_bytes; } case Encoding::BIT_PACKED: { num_bytes = diff --git a/src/parquet/column_writer.cc b/src/parquet/column_writer.cc index bdaa9f69..6d6347aa 100644 --- a/src/parquet/column_writer.cc +++ b/src/parquet/column_writer.cc @@ -342,7 +342,7 @@ int64_t ColumnWriter::RleEncodeLevels(const Buffer& src_buffer, level_encoder_.Init(Encoding::RLE, max_level, static_cast(num_buffered_values_), dest_buffer->mutable_data() + sizeof(int32_t), - static_cast(dest_buffer->size()) - sizeof(int32_t)); + static_cast(dest_buffer->size() - sizeof(int32_t))); int encoded = level_encoder_.Encode(static_cast(num_buffered_values_), reinterpret_cast(src_buffer.data())); diff --git a/src/parquet/encoding-benchmark.cc b/src/parquet/encoding-benchmark.cc index 72c41e58..9556fd1d 100644 --- a/src/parquet/encoding-benchmark.cc +++ b/src/parquet/encoding-benchmark.cc @@ -40,7 +40,7 @@ static void BM_PlainEncodingBoolean(::benchmark::State& state) { PlainEncoder encoder(nullptr); while (state.KeepRunning()) { - encoder.Put(values, values.size()); + encoder.Put(values, static_cast(values.size())); encoder.FlushValues(); } state.SetBytesProcessed(state.iterations() * state.range(0) * sizeof(bool)); @@ -52,13 +52,14 @@ static void BM_PlainDecodingBoolean(::benchmark::State& state) { std::vector values(state.range(0), 64); bool* output = new bool[state.range(0)]; PlainEncoder encoder(nullptr); - encoder.Put(values, values.size()); + encoder.Put(values, static_cast(values.size())); std::shared_ptr buf = encoder.FlushValues(); while (state.KeepRunning()) { PlainDecoder decoder(nullptr); - decoder.SetData(values.size(), buf->data(), buf->size()); - decoder.Decode(output, values.size()); + decoder.SetData(static_cast(values.size()), buf->data(), + static_cast(buf->size())); + decoder.Decode(output, static_cast(values.size())); } state.SetBytesProcessed(state.iterations() * state.range(0) * sizeof(bool)); @@ -72,7 +73,7 @@ static void BM_PlainEncodingInt64(::benchmark::State& state) { PlainEncoder encoder(nullptr); while (state.KeepRunning()) { - encoder.Put(values.data(), values.size()); + encoder.Put(values.data(), static_cast(values.size())); encoder.FlushValues(); } state.SetBytesProcessed(state.iterations() * state.range(0) * sizeof(int64_t)); @@ -83,13 +84,14 @@ BENCHMARK(BM_PlainEncodingInt64)->Range(1024, 65536); static void BM_PlainDecodingInt64(::benchmark::State& state) { std::vector values(state.range(0), 64); PlainEncoder encoder(nullptr); - encoder.Put(values.data(), values.size()); + encoder.Put(values.data(), static_cast(values.size())); std::shared_ptr buf = encoder.FlushValues(); while (state.KeepRunning()) { PlainDecoder decoder(nullptr); - decoder.SetData(values.size(), buf->data(), buf->size()); - decoder.Decode(values.data(), values.size()); + decoder.SetData(static_cast(values.size()), buf->data(), + static_cast(buf->size())); + decoder.Decode(values.data(), static_cast(values.size())); } state.SetBytesProcessed(state.iterations() * state.range(0) * sizeof(int64_t)); } @@ -100,7 +102,7 @@ template static void DecodeDict(std::vector& values, ::benchmark::State& state) { typedef typename Type::c_type T; - int num_values = values.size(); + int num_values = static_cast(values.size()); ChunkedAllocator pool; MemoryPool* allocator = default_memory_pool(); @@ -118,16 +120,18 @@ static void DecodeDict(std::vector& values, AllocateBuffer(allocator, encoder.EstimatedDataEncodedSize()); encoder.WriteDict(dict_buffer->mutable_data()); - int actual_bytes = encoder.WriteIndices(indices->mutable_data(), indices->size()); + int actual_bytes = + encoder.WriteIndices(indices->mutable_data(), static_cast(indices->size())); PARQUET_THROW_NOT_OK(indices->Resize(actual_bytes)); while (state.KeepRunning()) { PlainDecoder dict_decoder(descr.get()); - dict_decoder.SetData(encoder.num_entries(), dict_buffer->data(), dict_buffer->size()); + dict_decoder.SetData(encoder.num_entries(), dict_buffer->data(), + static_cast(dict_buffer->size())); DictionaryDecoder decoder(descr.get()); decoder.SetDict(&dict_decoder); - decoder.SetData(num_values, indices->data(), indices->size()); + decoder.SetData(num_values, indices->data(), static_cast(indices->size())); decoder.Decode(values.data(), num_values); } diff --git a/src/parquet/encoding-internal.h b/src/parquet/encoding-internal.h index e677b6cb..3e9a16d0 100644 --- a/src/parquet/encoding-internal.h +++ b/src/parquet/encoding-internal.h @@ -81,7 +81,7 @@ class PlainDecoder : public Decoder { template inline int DecodePlain(const uint8_t* data, int64_t data_size, int num_values, int type_length, T* out) { - int bytes_to_decode = num_values * sizeof(T); + int bytes_to_decode = num_values * static_cast(sizeof(T)); if (data_size < bytes_to_decode) { ParquetException::EofException(); } @@ -98,7 +98,7 @@ inline int DecodePlain(const uint8_t* data, int64_t data_size, int nu int increment; for (int i = 0; i < num_values; ++i) { uint32_t len = out[i].len = *reinterpret_cast(data); - increment = sizeof(uint32_t) + len; + increment = static_cast(sizeof(uint32_t) + len); if (data_size < increment) ParquetException::EofException(); out[i].ptr = data + sizeof(uint32_t); data += increment; @@ -688,7 +688,7 @@ inline void DictEncoder::DoubleTableSize() { template inline void DictEncoder::AddDictKey(const typename DType::c_type& v) { uniques_.push_back(v); - dict_encoded_size_ += sizeof(typename DType::c_type); + dict_encoded_size_ += static_cast(sizeof(typename DType::c_type)); } template <> @@ -699,7 +699,7 @@ inline void DictEncoder::AddDictKey(const ByteArray& v) { } memcpy(heap, v.ptr, v.len); uniques_.push_back(ByteArray(v.len, heap)); - dict_encoded_size_ += v.len + sizeof(uint32_t); + dict_encoded_size_ += static_cast(v.len + sizeof(uint32_t)); } template <> diff --git a/src/parquet/file_reader.cc b/src/parquet/file_reader.cc index 72c71c66..7b748120 100644 --- a/src/parquet/file_reader.cc +++ b/src/parquet/file_reader.cc @@ -64,9 +64,9 @@ RowGroupReader::RowGroupReader(std::unique_ptr contents) : contents_(std::move(contents)) {} std::shared_ptr RowGroupReader::Column(int i) { - DCHECK(i < metadata()->num_columns()) << "The RowGroup only has " - << metadata()->num_columns() - << "columns, requested column: " << i; + DCHECK(i < metadata()->num_columns()) + << "The RowGroup only has " << metadata()->num_columns() + << "columns, requested column: " << i; const ColumnDescriptor* descr = metadata()->schema()->Column(i); std::unique_ptr page_reader = contents_->GetColumnPageReader(i); @@ -76,9 +76,9 @@ std::shared_ptr RowGroupReader::Column(int i) { } std::unique_ptr RowGroupReader::GetColumnPageReader(int i) { - DCHECK(i < metadata()->num_columns()) << "The RowGroup only has " - << metadata()->num_columns() - << "columns, requested column: " << i; + DCHECK(i < metadata()->num_columns()) + << "The RowGroup only has " << metadata()->num_columns() + << "columns, requested column: " << i; return contents_->GetColumnPageReader(i); } @@ -302,9 +302,9 @@ std::shared_ptr ParquetFileReader::metadata() const { } std::shared_ptr ParquetFileReader::RowGroup(int i) { - DCHECK(i < metadata()->num_row_groups()) << "The file only has " - << metadata()->num_row_groups() - << "row groups, requested reader for: " << i; + DCHECK(i < metadata()->num_row_groups()) + << "The file only has " << metadata()->num_row_groups() + << "row groups, requested reader for: " << i; return contents_->GetRowGroup(i); } diff --git a/src/parquet/statistics-test.cc b/src/parquet/statistics-test.cc index e5992c6e..bc6eac26 100644 --- a/src/parquet/statistics-test.cc +++ b/src/parquet/statistics-test.cc @@ -563,8 +563,8 @@ void TestStatistics::SetValues() { template <> void TestStatistics::SetValues() { for (int i = 0; i < NUM_VALUES; i++) { - values_[i] = - (i * 1.0f) - 5; // {-5.0, -4.0, -3.0, -2.0, -1.0, 0.0, 1.0, 2.0, 3.0, 4.0}; + values_[i] = static_cast(i) - + 5; // {-5.0, -4.0, -3.0, -2.0, -1.0, 0.0, 1.0, 2.0, 3.0, 4.0}; } // Write Float min/max values @@ -577,8 +577,8 @@ void TestStatistics::SetValues() { template <> void TestStatistics::SetValues() { for (int i = 0; i < NUM_VALUES; i++) { - values_[i] = - (i * 1.0f) - 5; // {-5.0, -4.0, -3.0, -2.0, -1.0, 0.0, 1.0, 2.0, 3.0, 4.0}; + values_[i] = static_cast(i) - + 5; // {-5.0, -4.0, -3.0, -2.0, -1.0, 0.0, 1.0, 2.0, 3.0, 4.0}; } // Write Double min/max values diff --git a/src/parquet/test-specialization.h b/src/parquet/test-specialization.h index 4719fdc9..08160a68 100644 --- a/src/parquet/test-specialization.h +++ b/src/parquet/test-specialization.h @@ -43,7 +43,7 @@ template <> void InitValues(int num_values, vector& values, vector& buffer) { int max_byte_array_len = 12; - int num_bytes = max_byte_array_len + sizeof(uint32_t); + int num_bytes = static_cast(max_byte_array_len + sizeof(uint32_t)); size_t nbytes = num_values * num_bytes; buffer.resize(nbytes); random_byte_array(num_values, 0, buffer.data(), values.data(), max_byte_array_len); diff --git a/src/parquet/util/test-common.h b/src/parquet/util/test-common.h index 1043378a..ebf48516 100644 --- a/src/parquet/util/test-common.h +++ b/src/parquet/util/test-common.h @@ -103,7 +103,7 @@ void random_bytes(int n, uint32_t seed, std::vector* out) { std::uniform_int_distribution d(0, 255); for (int i = 0; i < n; ++i) { - out->push_back(d(gen) & 0xFF); + out->push_back(static_cast(d(gen) & 0xFF)); } } @@ -160,7 +160,7 @@ void random_fixed_byte_array(int n, uint32_t seed, uint8_t* buf, int len, FLBA* for (int i = 0; i < n; ++i) { out[i].ptr = buf; for (int j = 0; j < len; ++j) { - buf[j] = d(gen) & 0xFF; + buf[j] = static_cast(d(gen) & 0xFF); } buf += len; } @@ -176,7 +176,7 @@ void random_byte_array(int n, uint32_t seed, uint8_t* buf, ByteArray* out, int m out[i].len = len; out[i].ptr = buf; for (int j = 0; j < len; ++j) { - buf[j] = d2(gen) & 0xFF; + buf[j] = static_cast(d2(gen) & 0xFF); } buf += len; } diff --git a/tools/parquet-scan.cc b/tools/parquet-scan.cc index fdc73d75..ab9363b2 100644 --- a/tools/parquet-scan.cc +++ b/tools/parquet-scan.cc @@ -65,7 +65,8 @@ int main(int argc, char** argv) { int64_t total_rows = parquet::ScanFileContents(columns, batch_size, reader.get()); - total_time = (std::clock() - start_time) / static_cast(CLOCKS_PER_SEC); + total_time = static_cast(std::clock() - start_time) / + static_cast(CLOCKS_PER_SEC); std::cout << total_rows << " rows scanned in " << total_time << " seconds." << std::endl; } catch (const std::exception& e) { From 5a98e81cb6b61ccee7eb46144ff9fe9010ec76e3 Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Tue, 12 Dec 2017 21:42:04 -0500 Subject: [PATCH 3/9] Fix usage of PrimitiveArray::raw_values --- src/parquet/arrow/writer.cc | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/parquet/arrow/writer.cc b/src/parquet/arrow/writer.cc index e5a0f177..d7001aab 100644 --- a/src/parquet/arrow/writer.cc +++ b/src/parquet/arrow/writer.cc @@ -342,20 +342,21 @@ Status FileWriter::Impl::TypedWriteBatch(ColumnWriter* column_writer, const int16_t* rep_levels) { using ArrowCType = typename ArrowType::c_type; - auto data = static_cast(array.get()); - auto data_ptr = reinterpret_cast(data->raw_values()); + const auto& data = static_cast(*array); + auto data_ptr = + reinterpret_cast(data.values()->data()) + data.offset(); auto writer = reinterpret_cast*>(column_writer); - if (writer->descr()->schema_node()->is_required() || (data->null_count() == 0)) { + if (writer->descr()->schema_node()->is_required() || (data.null_count() == 0)) { // no nulls, just dump the data RETURN_NOT_OK((WriteNonNullableBatch( writer, static_cast(*array->type()), array->length(), num_levels, def_levels, rep_levels, data_ptr))); } else { - const uint8_t* valid_bits = data->null_bitmap_data(); + const uint8_t* valid_bits = data.null_bitmap_data(); RETURN_NOT_OK((WriteNullableBatch( - writer, static_cast(*array->type()), data->length(), num_levels, - def_levels, rep_levels, valid_bits, data->offset(), data_ptr))); + writer, static_cast(*array->type()), data.length(), num_levels, + def_levels, rep_levels, valid_bits, data.offset(), data_ptr))); } PARQUET_CATCH_NOT_OK(writer->Close()); return Status::OK(); From 3aef3b41ce89e83e02db21a5d894a010a3d220f5 Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Wed, 13 Dec 2017 10:39:21 -0500 Subject: [PATCH 4/9] Do not pass -Werror via PARQUET_CXXFLAGS --- .travis.yml | 9 ++++----- benchmarks/decode_benchmark.cc | 1 + 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.travis.yml b/.travis.yml index ae24cfe7..7918b890 100644 --- a/.travis.yml +++ b/.travis.yml @@ -41,24 +41,23 @@ matrix: - compiler: gcc os: linux before_script: - - export PARQUET_CXXFLAGS="-Werror -DARROW_NO_DEPRECATED_API" + - export PARQUET_CXXFLAGS="-DARROW_NO_DEPRECATED_API" - source $TRAVIS_BUILD_DIR/ci/before_script_travis.sh - compiler: gcc os: linux before_script: - - export PARQUET_CXXFLAGS="-Werror" - source $TRAVIS_BUILD_DIR/ci/before_script_travis.sh - compiler: clang os: linux before_script: - - export PARQUET_CXXFLAGS="-Werror -DARROW_NO_DEPRECATED_API" + - export PARQUET_CXXFLAGS="-DARROW_NO_DEPRECATED_API" - source $TRAVIS_BUILD_DIR/ci/before_script_travis.sh - compiler: clang os: osx osx_image: xcode6.4 addons: before_script: - - export PARQUET_CXXFLAGS="-Werror -DARROW_NO_DEPRECATED_API" + - export PARQUET_CXXFLAGS="-DARROW_NO_DEPRECATED_API" - source $TRAVIS_BUILD_DIR/ci/before_script_travis.sh before_install: - mkdir $TRAVIS_BUILD_DIR/parquet-build @@ -68,7 +67,7 @@ matrix: env: PARQUET_BUILD_GROUP=toolchain before_script: script: - - export PARQUET_CXXFLAGS="-Werror -DARROW_NO_DEPRECATED_API" + - export PARQUET_CXXFLAGS="-DARROW_NO_DEPRECATED_API" - $TRAVIS_BUILD_DIR/ci/travis_script_static.sh - compiler: gcc os: linux diff --git a/benchmarks/decode_benchmark.cc b/benchmarks/decode_benchmark.cc index 90057347..d1dc78a4 100644 --- a/benchmarks/decode_benchmark.cc +++ b/benchmarks/decode_benchmark.cc @@ -302,6 +302,7 @@ void TestPlainIntCompressed(::arrow::Codec* codec, const std::vector& d printf("\n%s:\n Uncompressed len: %d\n Compressed len: %d\n", codec->name(), uncompressed_len, static_cast(compressed_len)); + double mult = num_iters * data.size() * 1000.; parquet::StopWatch sw; sw.Start(); From 758a21698b4c0ae6e7005461358cfe3d84b03082 Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Wed, 13 Dec 2017 10:56:12 -0500 Subject: [PATCH 5/9] Fix warnings on macOS Clang --- cmake_modules/CompilerInfo.cmake | 2 +- cmake_modules/SetupCxxFlags.cmake | 2 ++ src/parquet/file_writer.h | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/cmake_modules/CompilerInfo.cmake b/cmake_modules/CompilerInfo.cmake index 8eba8743..654a0d83 100644 --- a/cmake_modules/CompilerInfo.cmake +++ b/cmake_modules/CompilerInfo.cmake @@ -51,7 +51,7 @@ elseif("${COMPILER_VERSION_FULL}" MATCHES ".*based on LLVM.*") # clang on Mac OS X, XCode 7+. elseif("${COMPILER_VERSION_FULL}" MATCHES ".*clang-.*") set(COMPILER_FAMILY "clang") - + set(COMPILER_VERSION "4.0") # gcc elseif("${COMPILER_VERSION_FULL_LOWER}" MATCHES ".*gcc[ -]version.*") set(COMPILER_FAMILY "gcc") diff --git a/cmake_modules/SetupCxxFlags.cmake b/cmake_modules/SetupCxxFlags.cmake index ac0ef797..6788dd5d 100644 --- a/cmake_modules/SetupCxxFlags.cmake +++ b/cmake_modules/SetupCxxFlags.cmake @@ -92,6 +92,8 @@ if ("${UPPERCASE_BUILD_WARNING_LEVEL}" STREQUAL "CHECKIN") -Wno-gnu-zero-variadic-macro-arguments -Wconversion -Wno-sign-conversion \ -Wno-disabled-macro-expansion -Wno-shorten-64-to-32") + message(STATUS "Clang version: ${COMPILER_VERSION}") + # Version numbers where warnings are introduced if ("${COMPILER_VERSION}" VERSION_GREATER "3.3") set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-gnu-folding-constant") diff --git a/src/parquet/file_writer.h b/src/parquet/file_writer.h index f1652611..9c28531f 100644 --- a/src/parquet/file_writer.h +++ b/src/parquet/file_writer.h @@ -94,7 +94,7 @@ class PARQUET_EXPORT ParquetFileWriter { // Perform any cleanup associated with the file contents virtual void Close() = 0; - /// \deprecated Since 1.3.0 + /// \note Deprecated since 1.3.0 RowGroupWriter* AppendRowGroup(int64_t num_rows); virtual RowGroupWriter* AppendRowGroup() = 0; From e3ffb7104637090309425bfe2fa4bff105c4d531 Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Wed, 13 Dec 2017 11:03:56 -0500 Subject: [PATCH 6/9] Fix -Wconversion warnings in decode_benchmark.cc --- benchmarks/decode_benchmark.cc | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/benchmarks/decode_benchmark.cc b/benchmarks/decode_benchmark.cc index d1dc78a4..8f2dfa07 100644 --- a/benchmarks/decode_benchmark.cc +++ b/benchmarks/decode_benchmark.cc @@ -283,7 +283,7 @@ uint64_t TestBinaryPackedEncoding(const char* name, const std::vector& } \ elapsed = sw.Stop(); \ printf("%s rate (batch size = %2d): %0.3fM per second.\n", NAME, BATCH_SIZE, \ - mult / elapsed); + mult / static_cast(elapsed)); void TestPlainIntCompressed(::arrow::Codec* codec, const std::vector& data, int num_iters, int batch_size) { @@ -302,19 +302,19 @@ void TestPlainIntCompressed(::arrow::Codec* codec, const std::vector& d printf("\n%s:\n Uncompressed len: %d\n Compressed len: %d\n", codec->name(), uncompressed_len, static_cast(compressed_len)); - - double mult = num_iters * data.size() * 1000.; + double mult = static_cast(num_iters * data.size()) * 1000.; parquet::StopWatch sw; sw.Start(); uint64_t r = 0; for (int i = 0; i < num_iters; ++i) { ABORT_NOT_OK(codec->Decompress(compressed_len, compressed_data, uncompressed_len, decompressed_data)); - r += TestPlainIntEncoding(decompressed_data, data.size(), batch_size); + r += TestPlainIntEncoding(decompressed_data, static_cast(data.size()), + batch_size); } int64_t elapsed = sw.Stop(); printf("Compressed(%s) plain int rate (batch size = %2d): %0.3fM per second.\n", - codec->name(), batch_size, mult / elapsed); + codec->name(), batch_size, mult / static_cast(elapsed)); delete[] compressed_data; delete[] decompressed_data; From cc5bca0710817939ee8a5ec87d74df2ab0e7f38b Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Wed, 13 Dec 2017 13:28:58 -0500 Subject: [PATCH 7/9] Add noreturn to static methods in ParquetException --- src/parquet/exception.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/parquet/exception.h b/src/parquet/exception.h index 37ec8afc..37481848 100644 --- a/src/parquet/exception.h +++ b/src/parquet/exception.h @@ -24,6 +24,7 @@ #include "arrow/status.h" +#include "parquet/util/macros.h" #include "parquet/util/visibility.h" // PARQUET-1085 @@ -58,9 +59,9 @@ namespace parquet { class PARQUET_EXPORT ParquetException : public std::exception { public: - static void EofException(); - static void NYI(const std::string& msg); - static void Throw(const std::string& msg); + PARQUET_NORETURN static void EofException(); + PARQUET_NORETURN static void NYI(const std::string& msg); + PARQUET_NORETURN static void Throw(const std::string& msg); explicit ParquetException(const char* msg); explicit ParquetException(const std::string& msg); From 5b6cd809e09cba0a594471dee697aa4829f0bffd Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Wed, 13 Dec 2017 14:23:22 -0500 Subject: [PATCH 8/9] Compile with /bigobj in MSVC --- cmake_modules/SetupCxxFlags.cmake | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/cmake_modules/SetupCxxFlags.cmake b/cmake_modules/SetupCxxFlags.cmake index 6788dd5d..e42e570e 100644 --- a/cmake_modules/SetupCxxFlags.cmake +++ b/cmake_modules/SetupCxxFlags.cmake @@ -177,8 +177,11 @@ set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -std=c++11") endif() if ("${COMPILER_FAMILY}" STREQUAL "msvc") + # Support large object code + set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} /bigobj") + # MSVC version of -Wno-deprecated - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4996") + set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} /wd4996") endif() if (PARQUET_USE_SSE) From 3769a8c1b60605036fc07ebaa53c87119db34c6d Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Wed, 13 Dec 2017 15:58:30 -0500 Subject: [PATCH 9/9] Add -Wno-missing-noreturn --- cmake_modules/SetupCxxFlags.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake_modules/SetupCxxFlags.cmake b/cmake_modules/SetupCxxFlags.cmake index e42e570e..1678e8dc 100644 --- a/cmake_modules/SetupCxxFlags.cmake +++ b/cmake_modules/SetupCxxFlags.cmake @@ -86,7 +86,7 @@ if ("${UPPERCASE_BUILD_WARNING_LEVEL}" STREQUAL "CHECKIN") -Wno-implicit-fallthrough -Wno-unreachable-code-return \ -Wno-float-equal -Wno-missing-prototypes \ -Wno-old-style-cast -Wno-covered-switch-default \ --Wno-format-nonliteral \ +-Wno-format-nonliteral -Wno-missing-noreturn \ -Wno-cast-align -Wno-vla-extension -Wno-shift-sign-overflow \ -Wno-used-but-marked-unused -Wno-missing-variable-declarations \ -Wno-gnu-zero-variadic-macro-arguments -Wconversion -Wno-sign-conversion \