This is an automated email from the ASF dual-hosted git repository.
wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 146e7df ARROW-3442: [C++] Allow dynamic linking of (most) unit tests
146e7df is described below
commit 146e7df80cbc316c55a4e63534193dce7271125a
Author: Antoine Pitrou <[email protected]>
AuthorDate: Thu Oct 11 17:23:53 2018 -0400
ARROW-3442: [C++] Allow dynamic linking of (most) unit tests
Use dynamic linking in preference for tests, as it reduces the disk
footprint and probably makes linking faster too.
Author: Antoine Pitrou <[email protected]>
Author: Wes McKinney <[email protected]>
Closes #2744 from pitrou/ARROW-3442-tests-linking-shared and squashes the
following commits:
c783f851c <Antoine Pitrou> Remove obsolete comment
3ee4b3fc0 <Wes McKinney> Add re2 note about ARROW-3494
622e2a1f7 <Antoine Pitrou> Set LD_LIBRARY_PATH before testing Plasma Java
client.
6876db8cd <Antoine Pitrou> ARROW-3442: Allow dynamic linking of (most)
unit tests
---
ci/travis_script_plasma_java_client.sh | 10 ++-
cpp/CMakeLists.txt | 29 ++------
cpp/cmake_modules/ThirdpartyToolchain.cmake | 4 +-
cpp/src/arrow/gpu/CMakeLists.txt | 3 +-
cpp/src/parquet/CMakeLists.txt | 35 +++++++---
cpp/src/parquet/arrow/schema.h | 1 +
cpp/src/parquet/column_writer-test.cc | 16 ++---
cpp/src/parquet/column_writer.h | 2 +-
cpp/src/parquet/file_reader.h | 4 +-
cpp/src/parquet/metadata.cc | 46 +++++++++++--
cpp/src/parquet/metadata.h | 10 ++-
cpp/src/parquet/schema-test.cc | 31 ++++-----
cpp/src/parquet/statistics-test.cc | 78 ++++++++++------------
cpp/src/plasma/CMakeLists.txt | 12 ++--
.../org/apache/arrow/plasma/PlasmaClientTest.java | 1 +
15 files changed, 157 insertions(+), 125 deletions(-)
diff --git a/ci/travis_script_plasma_java_client.sh
b/ci/travis_script_plasma_java_client.sh
index d7dc3bc..927a239 100755
--- a/ci/travis_script_plasma_java_client.sh
+++ b/ci/travis_script_plasma_java_client.sh
@@ -19,12 +19,20 @@
set -e
+source $TRAVIS_BUILD_DIR/ci/travis_env_common.sh
+
PLASMA_JAVA_DIR=${TRAVIS_BUILD_DIR}/java/plasma
+
pushd $PLASMA_JAVA_DIR
mvn clean install
-export PLASMA_STORE=${TRAVIS_BUILD_DIR}/cpp-install/bin/plasma_store_server
+
+export LD_LIBRARY_PATH=${ARROW_CPP_INSTALL}/lib:$LD_LIBRARY_PATH
+export PLASMA_STORE=${ARROW_CPP_INSTALL}/bin/plasma_store_server
+
+ldd $PLASMA_STORE
+
java -cp target/test-classes:target/classes
-Djava.library.path=${TRAVIS_BUILD_DIR}/cpp-build/debug/
org.apache.arrow.plasma.PlasmaClientTest
popd
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 5f7d2ca..691c307 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -308,9 +308,6 @@ Always OFF if building binaries"
endif()
if(ARROW_BUILD_TESTS OR ARROW_BUILD_BENCHMARKS)
- if (NOT MSVC AND NOT ARROW_BUILD_STATIC)
- message(FATAL_ERROR "ARROW_BUILD_TESTS needs ARROW_BUILD_STATIC")
- endif ()
set(ARROW_WITH_BROTLI ON)
set(ARROW_WITH_LZ4 ON)
set(ARROW_WITH_SNAPPY ON)
@@ -406,20 +403,7 @@ include(san-config)
# Code coverage
if ("${ARROW_GENERATE_COVERAGE}")
- if("${CMAKE_CXX_COMPILER}" MATCHES ".*clang.*")
- # There appears to be some bugs in clang 3.3 which cause code coverage
- # to have link errors, not locating the llvm_gcda_* symbols.
- # This should be fixed in llvm 3.4 with
http://llvm.org/viewvc/llvm-project?view=revision&revision=184666
- message(SEND_ERROR "Cannot currently generate coverage with clang")
- endif()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --coverage -DCOVERAGE_BUILD")
-
- # For coverage to work properly, we need to use static linkage. Otherwise,
- # __gcov_flush() doesn't properly flush coverage from every module.
- # See
http://stackoverflow.com/questions/28164543/using-gcov-flush-within-a-library-doesnt-force-the-other-modules-to-yield-gc
- if(NOT ARROW_BUILD_STATIC)
- message(SEND_ERROR "Coverage requires the static lib to be built")
- endif()
endif()
# CMAKE_CXX_FLAGS now fully assembled
@@ -714,17 +698,12 @@ if (ARROW_JEMALLOC)
add_definitions(-DARROW_JEMALLOC)
add_definitions(-DARROW_JEMALLOC_INCLUDE_DIR=${JEMALLOC_INCLUDE_DIR})
- # If using gcc or clang on Linux, we need to link pthread for older Linuxes,
- # including distros as new as Ubuntu 14.04
- if ((CMAKE_COMPILER_IS_GNUCXX OR
- (NOT APPLE AND CMAKE_CXX_COMPILER_ID MATCHES "Clang"))
- AND PTHREAD_LIBRARY)
+ if (NOT WIN32 AND NOT APPLE)
set(ARROW_JEMALLOC_LINK_LIBS
jemalloc_static
# For glibc <2.17 we need to link to librt.
# As we compile with --as-needed by default, the linker will omit this
# dependency if not required.
- ${PTHREAD_LIBRARY}
rt
)
else()
@@ -738,9 +717,9 @@ if (ARROW_JEMALLOC)
set(ARROW_STATIC_LINK_LIBS
${ARROW_STATIC_LINK_LIBS}
${ARROW_JEMALLOC_LINK_LIBS})
-elseif (PTHREAD_LIBRARY)
- # We need to separate this as otherwise CMake would mess with the library
- # linking order.
+endif(ARROW_JEMALLOC)
+
+if (PTHREAD_LIBRARY)
set(ARROW_LINK_LIBS
${ARROW_LINK_LIBS}
${PTHREAD_LIBRARY})
diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake
b/cpp/cmake_modules/ThirdpartyToolchain.cmake
index 949ccd8..1e28185 100644
--- a/cpp/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake
@@ -42,8 +42,8 @@ if (NOT "$ENV{ARROW_BUILD_TOOLCHAIN}" STREQUAL "")
# set(ORC_HOME "$ENV{ARROW_BUILD_TOOLCHAIN}")
set(PROTOBUF_HOME "$ENV{ARROW_BUILD_TOOLCHAIN}")
set(RAPIDJSON_HOME "$ENV{ARROW_BUILD_TOOLCHAIN}")
- # conda-forge doesn't have a static re2.
- #set(RE2_HOME "$ENV{ARROW_BUILD_TOOLCHAIN}")
+ # ARROW-3494: conda-forge re2 does not work yet
+ # set(RE2_HOME "$ENV{ARROW_BUILD_TOOLCHAIN}")
set(SNAPPY_HOME "$ENV{ARROW_BUILD_TOOLCHAIN}")
set(THRIFT_HOME "$ENV{ARROW_BUILD_TOOLCHAIN}")
set(ZLIB_HOME "$ENV{ARROW_BUILD_TOOLCHAIN}")
diff --git a/cpp/src/arrow/gpu/CMakeLists.txt b/cpp/src/arrow/gpu/CMakeLists.txt
index a5b11e5..465977a 100644
--- a/cpp/src/arrow/gpu/CMakeLists.txt
+++ b/cpp/src/arrow/gpu/CMakeLists.txt
@@ -80,8 +80,7 @@ install(
DESTINATION "${CMAKE_INSTALL_LIBDIR}/pkgconfig/")
set(ARROW_GPU_TEST_LINK_LIBS
- # ARROW-2561: Must link statically against arrow and arrow_gpu to avoid
double free
- arrow_gpu_static
+ arrow_gpu_shared
${ARROW_TEST_LINK_LIBS})
if (ARROW_BUILD_TESTS)
diff --git a/cpp/src/parquet/CMakeLists.txt b/cpp/src/parquet/CMakeLists.txt
index 3d46674..1b2e72a 100644
--- a/cpp/src/parquet/CMakeLists.txt
+++ b/cpp/src/parquet/CMakeLists.txt
@@ -26,7 +26,7 @@ endif()
add_custom_target(parquet)
function(ADD_PARQUET_TEST REL_TEST_NAME)
- set(options NO_VALGRIND)
+ set(options USE_STATIC_LINKING)
set(one_value_args)
set(multi_value_args EXTRA_DEPENDENCIES LABELS)
cmake_parse_arguments(ARG "${options}" "${one_value_args}"
"${multi_value_args}" ${ARGN})
@@ -34,10 +34,20 @@ function(ADD_PARQUET_TEST REL_TEST_NAME)
message(SEND_ERROR "Error: unrecognized arguments:
${ARG_UNPARSED_ARGUMENTS}")
endif()
- ADD_ARROW_TEST(${REL_TEST_NAME}
- STATIC_LINK_LIBS ${PARQUET_TEST_LINK_LIBS}
- PREFIX "parquet"
- LABELS "unittest;parquet")
+ # By default we prefer shared linking with libparquet, as it's faster
+ # and uses less disk space, but in some cases we need to force static
+ # linking (see rationale below).
+ if (ARG_USE_STATIC_LINKING)
+ ADD_ARROW_TEST(${REL_TEST_NAME}
+ STATIC_LINK_LIBS ${PARQUET_STATIC_TEST_LINK_LIBS}
+ PREFIX "parquet"
+ LABELS "unittest;parquet")
+ else()
+ ADD_ARROW_TEST(${REL_TEST_NAME}
+ STATIC_LINK_LIBS ${PARQUET_SHARED_TEST_LINK_LIBS}
+ PREFIX "parquet"
+ LABELS "unittest;parquet")
+ endif()
endfunction()
# ----------------------------------------------------------------------
@@ -85,7 +95,13 @@ elseif(NOT MSVC)
${CMAKE_DL_LIBS})
endif()
-set(PARQUET_TEST_LINK_LIBS
+set(PARQUET_SHARED_TEST_LINK_LIBS
+ ${PARQUET_MIN_TEST_LIBS}
+ ${PARQUET_ARROW_LINK_LIBS}
+ parquet_shared
+ thriftstatic)
+
+set(PARQUET_STATIC_TEST_LINK_LIBS
${PARQUET_MIN_TEST_LIBS}
${PARQUET_ARROW_LINK_LIBS}
parquet_static
@@ -257,7 +273,6 @@ ADD_PARQUET_TEST(bloom_filter-test)
ADD_PARQUET_TEST(column_reader-test)
ADD_PARQUET_TEST(column_scanner-test)
ADD_PARQUET_TEST(column_writer-test)
-ADD_PARQUET_TEST(file-deserialize-test)
ADD_PARQUET_TEST(file-serialize-test)
ADD_PARQUET_TEST(properties-test)
ADD_PARQUET_TEST(statistics-test)
@@ -266,7 +281,11 @@ ADD_PARQUET_TEST(metadata-test)
ADD_PARQUET_TEST(public-api-test)
ADD_PARQUET_TEST(types-test)
ADD_PARQUET_TEST(reader-test)
-ADD_PARQUET_TEST(schema-test)
+
+# Those tests need to use static linking as they access thrift-generated
+# symbols which are not exported by parquet.dll on Windows (PARQUET-1420).
+ADD_PARQUET_TEST(file-deserialize-test USE_STATIC_LINKING)
+ADD_PARQUET_TEST(schema-test USE_STATIC_LINKING)
#############################################################
# Benchmark linking
diff --git a/cpp/src/parquet/arrow/schema.h b/cpp/src/parquet/arrow/schema.h
index 8e92085..649fe86 100644
--- a/cpp/src/parquet/arrow/schema.h
+++ b/cpp/src/parquet/arrow/schema.h
@@ -91,6 +91,7 @@ ToParquetSchema(const ::arrow::Schema* arrow_schema, const
WriterProperties& pro
const WriterProperties&
properties,
std::shared_ptr<SchemaDescriptor>* out);
+PARQUET_EXPORT
int32_t DecimalSize(int32_t precision);
} // namespace arrow
diff --git a/cpp/src/parquet/column_writer-test.cc
b/cpp/src/parquet/column_writer-test.cc
index 8c20a6e..b81f3ed 100644
--- a/cpp/src/parquet/column_writer-test.cc
+++ b/cpp/src/parquet/column_writer-test.cc
@@ -89,8 +89,7 @@ class TestPrimitiveWriter : public
PrimitiveTypedTest<TestType> {
wp_builder.max_statistics_size(column_properties.max_statistics_size());
writer_properties_ = wp_builder.build();
- metadata_ = ColumnChunkMetaDataBuilder::Make(
- writer_properties_, this->descr_,
reinterpret_cast<uint8_t*>(&thrift_metadata_));
+ metadata_ = ColumnChunkMetaDataBuilder::Make(writer_properties_,
this->descr_);
std::unique_ptr<PageWriter> pager =
PageWriter::Open(sink_.get(), column_properties.compression(),
metadata_.get());
std::shared_ptr<ColumnWriter> writer =
@@ -177,8 +176,8 @@ class TestPrimitiveWriter : public
PrimitiveTypedTest<TestType> {
// Metadata accessor must be created lazily.
// This is because the ColumnChunkMetaData semantics dictate the metadata
object is
// complete (no changes to the metadata buffer can be made after
instantiation)
- auto metadata_accessor = ColumnChunkMetaData::Make(
- reinterpret_cast<const uint8_t*>(&thrift_metadata_), this->descr_);
+ auto metadata_accessor =
+ ColumnChunkMetaData::Make(metadata_->contents(), this->descr_);
return metadata_accessor->num_values();
}
@@ -187,8 +186,8 @@ class TestPrimitiveWriter : public
PrimitiveTypedTest<TestType> {
// This is because the ColumnChunkMetaData semantics dictate the metadata
object is
// complete (no changes to the metadata buffer can be made after
instantiation)
ApplicationVersion app_version(this->writer_properties_->created_by());
- auto metadata_accessor = ColumnChunkMetaData::Make(
- reinterpret_cast<const uint8_t*>(&thrift_metadata_), this->descr_,
&app_version);
+ auto metadata_accessor =
+ ColumnChunkMetaData::Make(metadata_->contents(), this->descr_,
&app_version);
return metadata_accessor->is_stats_set();
}
@@ -196,8 +195,8 @@ class TestPrimitiveWriter : public
PrimitiveTypedTest<TestType> {
// Metadata accessor must be created lazily.
// This is because the ColumnChunkMetaData semantics dictate the metadata
object is
// complete (no changes to the metadata buffer can be made after
instantiation)
- auto metadata_accessor = ColumnChunkMetaData::Make(
- reinterpret_cast<const uint8_t*>(&thrift_metadata_), this->descr_);
+ auto metadata_accessor =
+ ColumnChunkMetaData::Make(metadata_->contents(), this->descr_);
return metadata_accessor->encodings();
}
@@ -213,7 +212,6 @@ class TestPrimitiveWriter : public
PrimitiveTypedTest<TestType> {
const ColumnDescriptor* descr_;
private:
- format::ColumnChunk thrift_metadata_;
std::unique_ptr<ColumnChunkMetaDataBuilder> metadata_;
std::unique_ptr<InMemoryOutputStream> sink_;
std::shared_ptr<WriterProperties> writer_properties_;
diff --git a/cpp/src/parquet/column_writer.h b/cpp/src/parquet/column_writer.h
index 457c532..41bc7bd 100644
--- a/cpp/src/parquet/column_writer.h
+++ b/cpp/src/parquet/column_writer.h
@@ -76,7 +76,7 @@ class PARQUET_EXPORT LevelEncoder {
std::unique_ptr<::arrow::BitUtil::BitWriter> bit_packed_encoder_;
};
-class PageWriter {
+class PARQUET_EXPORT PageWriter {
public:
virtual ~PageWriter() {}
diff --git a/cpp/src/parquet/file_reader.h b/cpp/src/parquet/file_reader.h
index 6836bb1..4730305 100644
--- a/cpp/src/parquet/file_reader.h
+++ b/cpp/src/parquet/file_reader.h
@@ -68,10 +68,10 @@ class PARQUET_EXPORT RowGroupReader {
class PARQUET_EXPORT ParquetFileReader {
public:
- // Forward declare a virtual class 'Contents' to aid dependency injection
and more
+ // Declare a virtual class 'Contents' to aid dependency injection and more
// easily create test fixtures
// An implementation of the Contents class is defined in the .cc file
- struct Contents {
+ struct PARQUET_EXPORT Contents {
static std::unique_ptr<Contents> Open(
std::unique_ptr<RandomAccessSource> source,
const ReaderProperties& props = default_reader_properties(),
diff --git a/cpp/src/parquet/metadata.cc b/cpp/src/parquet/metadata.cc
index f49393b..cf63b0f 100644
--- a/cpp/src/parquet/metadata.cc
+++ b/cpp/src/parquet/metadata.cc
@@ -580,17 +580,26 @@ bool ApplicationVersion::HasCorrectStatistics(Type::type
col_type,
class ColumnChunkMetaDataBuilder::ColumnChunkMetaDataBuilderImpl {
public:
explicit ColumnChunkMetaDataBuilderImpl(const
std::shared_ptr<WriterProperties>& props,
+ const ColumnDescriptor* column)
+ : owned_column_chunk_(new format::ColumnChunk),
+ properties_(props),
+ column_(column) {
+ Init(owned_column_chunk_.get());
+ }
+
+ explicit ColumnChunkMetaDataBuilderImpl(const
std::shared_ptr<WriterProperties>& props,
const ColumnDescriptor* column,
- uint8_t* contents)
+ format::ColumnChunk* column_chunk)
: properties_(props), column_(column) {
- column_chunk_ = reinterpret_cast<format::ColumnChunk*>(contents);
- column_chunk_->meta_data.__set_type(ToThrift(column->physical_type()));
-
column_chunk_->meta_data.__set_path_in_schema(column->path()->ToDotVector());
- column_chunk_->meta_data.__set_codec(
- ToThrift(properties_->compression(column->path())));
+ Init(column_chunk);
}
+
~ColumnChunkMetaDataBuilderImpl() {}
+ const uint8_t* contents() const {
+ return reinterpret_cast<const uint8_t*>(column_chunk_);
+ }
+
// column chunk
void set_file_path(const std::string& val) {
column_chunk_->__set_file_path(val); }
@@ -662,7 +671,16 @@ class
ColumnChunkMetaDataBuilder::ColumnChunkMetaDataBuilderImpl {
const ColumnDescriptor* descr() const { return column_; }
private:
+ void Init(format::ColumnChunk* column_chunk) {
+ column_chunk_ = column_chunk;
+ column_chunk_->meta_data.__set_type(ToThrift(column_->physical_type()));
+
column_chunk_->meta_data.__set_path_in_schema(column_->path()->ToDotVector());
+ column_chunk_->meta_data.__set_codec(
+ ToThrift(properties_->compression(column_->path())));
+ }
+
format::ColumnChunk* column_chunk_;
+ std::unique_ptr<format::ColumnChunk> owned_column_chunk_;
const std::shared_ptr<WriterProperties> properties_;
const ColumnDescriptor* column_;
};
@@ -674,14 +692,28 @@ std::unique_ptr<ColumnChunkMetaDataBuilder>
ColumnChunkMetaDataBuilder::Make(
new ColumnChunkMetaDataBuilder(props, column, contents));
}
+std::unique_ptr<ColumnChunkMetaDataBuilder> ColumnChunkMetaDataBuilder::Make(
+ const std::shared_ptr<WriterProperties>& props, const ColumnDescriptor*
column) {
+ return std::unique_ptr<ColumnChunkMetaDataBuilder>(
+ new ColumnChunkMetaDataBuilder(props, column));
+}
+
+ColumnChunkMetaDataBuilder::ColumnChunkMetaDataBuilder(
+ const std::shared_ptr<WriterProperties>& props, const ColumnDescriptor*
column)
+ : impl_{std::unique_ptr<ColumnChunkMetaDataBuilderImpl>(
+ new ColumnChunkMetaDataBuilderImpl(props, column))} {}
+
ColumnChunkMetaDataBuilder::ColumnChunkMetaDataBuilder(
const std::shared_ptr<WriterProperties>& props, const ColumnDescriptor*
column,
uint8_t* contents)
: impl_{std::unique_ptr<ColumnChunkMetaDataBuilderImpl>(
- new ColumnChunkMetaDataBuilderImpl(props, column, contents))} {}
+ new ColumnChunkMetaDataBuilderImpl(
+ props, column,
reinterpret_cast<format::ColumnChunk*>(contents)))} {}
ColumnChunkMetaDataBuilder::~ColumnChunkMetaDataBuilder() {}
+const uint8_t* ColumnChunkMetaDataBuilder::contents() const { return
impl_->contents(); }
+
void ColumnChunkMetaDataBuilder::set_file_path(const std::string& path) {
impl_->set_file_path(path);
}
diff --git a/cpp/src/parquet/metadata.h b/cpp/src/parquet/metadata.h
index 7e29fe9..706e980 100644
--- a/cpp/src/parquet/metadata.h
+++ b/cpp/src/parquet/metadata.h
@@ -37,7 +37,7 @@ namespace parquet {
using KeyValueMetadata = ::arrow::KeyValueMetadata;
-class ApplicationVersion {
+class PARQUET_EXPORT ApplicationVersion {
public:
// Known Versions with Issues
static const ApplicationVersion& PARQUET_251_FIXED_VERSION();
@@ -195,6 +195,9 @@ class PARQUET_EXPORT ColumnChunkMetaDataBuilder {
public:
// API convenience to get a MetaData reader
static std::unique_ptr<ColumnChunkMetaDataBuilder> Make(
+ const std::shared_ptr<WriterProperties>& props, const ColumnDescriptor*
column);
+
+ static std::unique_ptr<ColumnChunkMetaDataBuilder> Make(
const std::shared_ptr<WriterProperties>& props, const ColumnDescriptor*
column,
uint8_t* contents);
@@ -213,11 +216,16 @@ class PARQUET_EXPORT ColumnChunkMetaDataBuilder {
int64_t compressed_size, int64_t uncompressed_size, bool
has_dictionary,
bool dictionary_fallback);
+ // The metadata contents, suitable for passing to ColumnChunkMetaData::Make
+ const uint8_t* contents() const;
+
// For writing metadata at end of column chunk
void WriteTo(OutputStream* sink);
private:
explicit ColumnChunkMetaDataBuilder(const std::shared_ptr<WriterProperties>&
props,
+ const ColumnDescriptor* column);
+ explicit ColumnChunkMetaDataBuilder(const std::shared_ptr<WriterProperties>&
props,
const ColumnDescriptor* column, uint8_t*
contents);
// PIMPL Idiom
class ColumnChunkMetaDataBuilderImpl;
diff --git a/cpp/src/parquet/schema-test.cc b/cpp/src/parquet/schema-test.cc
index 5d2af28..3d7eb0e 100644
--- a/cpp/src/parquet/schema-test.cc
+++ b/cpp/src/parquet/schema-test.cc
@@ -42,11 +42,11 @@ namespace schema {
static inline SchemaElement NewPrimitive(const std::string& name,
FieldRepetitionType::type repetition,
- format::Type::type type, int id = 0) {
+ Type::type type, int id = 0) {
SchemaElement result;
result.__set_name(name);
result.__set_repetition_type(repetition);
- result.__set_type(type);
+ result.__set_type(static_cast<format::Type::type>(type));
return result;
}
@@ -138,8 +138,7 @@ TEST_F(TestPrimitiveNode, Attrs) {
}
TEST_F(TestPrimitiveNode, FromParquet) {
- SchemaElement elt =
- NewPrimitive(name_, FieldRepetitionType::OPTIONAL, format::Type::INT32,
0);
+ SchemaElement elt = NewPrimitive(name_, FieldRepetitionType::OPTIONAL,
Type::INT32, 0);
ASSERT_NO_FATAL_FAILURE(Convert(&elt));
ASSERT_EQ(name_, prim_node_->name());
ASSERT_EQ(id_, prim_node_->id());
@@ -148,7 +147,7 @@ TEST_F(TestPrimitiveNode, FromParquet) {
ASSERT_EQ(LogicalType::NONE, prim_node_->logical_type());
// Test a logical type
- elt = NewPrimitive(name_, FieldRepetitionType::REQUIRED,
format::Type::BYTE_ARRAY, 0);
+ elt = NewPrimitive(name_, FieldRepetitionType::REQUIRED, Type::BYTE_ARRAY,
0);
elt.__set_converted_type(ConvertedType::UTF8);
ASSERT_NO_FATAL_FAILURE(Convert(&elt));
@@ -157,8 +156,7 @@ TEST_F(TestPrimitiveNode, FromParquet) {
ASSERT_EQ(LogicalType::UTF8, prim_node_->logical_type());
// FIXED_LEN_BYTE_ARRAY
- elt = NewPrimitive(name_, FieldRepetitionType::OPTIONAL,
- format::Type::FIXED_LEN_BYTE_ARRAY, 0);
+ elt = NewPrimitive(name_, FieldRepetitionType::OPTIONAL,
Type::FIXED_LEN_BYTE_ARRAY, 0);
elt.__set_type_length(16);
ASSERT_NO_FATAL_FAILURE(Convert(&elt));
@@ -169,8 +167,7 @@ TEST_F(TestPrimitiveNode, FromParquet) {
ASSERT_EQ(16, prim_node_->type_length());
// ConvertedType::Decimal
- elt = NewPrimitive(name_, FieldRepetitionType::OPTIONAL,
- format::Type::FIXED_LEN_BYTE_ARRAY, 0);
+ elt = NewPrimitive(name_, FieldRepetitionType::OPTIONAL,
Type::FIXED_LEN_BYTE_ARRAY, 0);
elt.__set_converted_type(ConvertedType::DECIMAL);
elt.__set_type_length(6);
elt.__set_scale(2);
@@ -419,8 +416,7 @@ TEST_F(TestSchemaConverter, NestedExample) {
elements.push_back(NewGroup(name_, FieldRepetitionType::REPEATED, 2, 0));
// A primitive one
- elements.push_back(
- NewPrimitive("a", FieldRepetitionType::REQUIRED, format::Type::INT32,
1));
+ elements.push_back(NewPrimitive("a", FieldRepetitionType::REQUIRED,
Type::INT32, 1));
// A group
elements.push_back(NewGroup("bag", FieldRepetitionType::OPTIONAL, 1, 2));
@@ -429,8 +425,7 @@ TEST_F(TestSchemaConverter, NestedExample) {
elt = NewGroup("b", FieldRepetitionType::REPEATED, 1, 3);
elt.__set_converted_type(ConvertedType::LIST);
elements.push_back(elt);
- elements.push_back(
- NewPrimitive("item", FieldRepetitionType::OPTIONAL, format::Type::INT64,
4));
+ elements.push_back(NewPrimitive("item", FieldRepetitionType::OPTIONAL,
Type::INT64, 4));
ASSERT_NO_FATAL_FAILURE(Convert(&elements[0],
static_cast<int>(elements.size())));
@@ -461,7 +456,7 @@ TEST_F(TestSchemaConverter, InvalidRoot) {
SchemaElement elements[2];
elements[0] =
- NewPrimitive("not-a-group", FieldRepetitionType::REQUIRED,
format::Type::INT32, 0);
+ NewPrimitive("not-a-group", FieldRepetitionType::REQUIRED, Type::INT32,
0);
ASSERT_THROW(Convert(elements, 2), ParquetException);
// While the Parquet spec indicates that the root group should have REPEATED
@@ -469,7 +464,7 @@ TEST_F(TestSchemaConverter, InvalidRoot) {
// groups as the first element. These tests check that this is okay as a
// practicality matter.
elements[0] = NewGroup("not-repeated", FieldRepetitionType::REQUIRED, 1, 0);
- elements[1] = NewPrimitive("a", FieldRepetitionType::REQUIRED,
format::Type::INT32, 1);
+ elements[1] = NewPrimitive("a", FieldRepetitionType::REQUIRED, Type::INT32,
1);
ASSERT_NO_FATAL_FAILURE(Convert(elements, 2));
elements[0] = NewGroup("not-repeated", FieldRepetitionType::OPTIONAL, 1, 0);
@@ -525,8 +520,7 @@ TEST_F(TestSchemaFlatten, NestedExample) {
elements.push_back(NewGroup(name_, FieldRepetitionType::REPEATED, 2, 0));
// A primitive one
- elements.push_back(
- NewPrimitive("a", FieldRepetitionType::REQUIRED, format::Type::INT32,
1));
+ elements.push_back(NewPrimitive("a", FieldRepetitionType::REQUIRED,
Type::INT32, 1));
// A group
elements.push_back(NewGroup("bag", FieldRepetitionType::OPTIONAL, 1, 2));
@@ -535,8 +529,7 @@ TEST_F(TestSchemaFlatten, NestedExample) {
elt = NewGroup("b", FieldRepetitionType::REPEATED, 1, 3);
elt.__set_converted_type(ConvertedType::LIST);
elements.push_back(elt);
- elements.push_back(
- NewPrimitive("item", FieldRepetitionType::OPTIONAL, format::Type::INT64,
4));
+ elements.push_back(NewPrimitive("item", FieldRepetitionType::OPTIONAL,
Type::INT64, 4));
// Construct the schema
NodeVector fields;
diff --git a/cpp/src/parquet/statistics-test.cc
b/cpp/src/parquet/statistics-test.cc
index d2ecede..e1926a3 100644
--- a/cpp/src/parquet/statistics-test.cc
+++ b/cpp/src/parquet/statistics-test.cc
@@ -311,9 +311,22 @@ TYPED_TEST(TestNumericRowGroupStatistics, Merge) {
ASSERT_NO_FATAL_FAILURE(this->TestMerge());
}
+// Helper for basic statistics tests below
+void AssertStatsSet(const ApplicationVersion& version,
+ std::shared_ptr<parquet::WriterProperties> props,
+ const ColumnDescriptor* column, bool expected_is_set) {
+ auto metadata_builder = ColumnChunkMetaDataBuilder::Make(props, column);
+ auto column_chunk =
+ ColumnChunkMetaData::Make(metadata_builder->contents(), column,
&version);
+ EncodedStatistics stats;
+ metadata_builder->SetStatistics(false /* is_signed */, stats);
+ ASSERT_EQ(column_chunk->is_stats_set(), expected_is_set);
+}
+
// Statistics are restricted for few types in older parquet version
TEST(CorruptStatistics, Basics) {
- ApplicationVersion version("parquet-mr version 1.8.0");
+ std::string created_by = "parquet-mr version 1.8.0";
+ ApplicationVersion version(created_by);
SchemaDescriptor schema;
schema::NodePtr node;
std::vector<schema::NodePtr> fields;
@@ -335,31 +348,22 @@ TEST(CorruptStatistics, Basics) {
node = schema::GroupNode::Make("schema", Repetition::REQUIRED, fields);
schema.Init(node);
- format::ColumnChunk col_chunk;
- col_chunk.meta_data.__isset.statistics = true;
- auto column_chunk1 = ColumnChunkMetaData::Make(
- reinterpret_cast<const uint8_t*>(&col_chunk), schema.Column(0),
&version);
- ASSERT_TRUE(column_chunk1->is_stats_set());
- auto column_chunk2 = ColumnChunkMetaData::Make(
- reinterpret_cast<const uint8_t*>(&col_chunk), schema.Column(1),
&version);
- ASSERT_FALSE(column_chunk2->is_stats_set());
- auto column_chunk3 = ColumnChunkMetaData::Make(
- reinterpret_cast<const uint8_t*>(&col_chunk), schema.Column(2),
&version);
- ASSERT_TRUE(column_chunk3->is_stats_set());
- auto column_chunk4 = ColumnChunkMetaData::Make(
- reinterpret_cast<const uint8_t*>(&col_chunk), schema.Column(3),
&version);
- ASSERT_FALSE(column_chunk4->is_stats_set());
- auto column_chunk5 = ColumnChunkMetaData::Make(
- reinterpret_cast<const uint8_t*>(&col_chunk), schema.Column(4),
&version);
- ASSERT_FALSE(column_chunk5->is_stats_set());
- auto column_chunk6 = ColumnChunkMetaData::Make(
- reinterpret_cast<const uint8_t*>(&col_chunk), schema.Column(5),
&version);
- ASSERT_FALSE(column_chunk6->is_stats_set());
+ parquet::WriterProperties::Builder builder;
+ builder.created_by(created_by);
+ std::shared_ptr<parquet::WriterProperties> props = builder.build();
+
+ AssertStatsSet(version, props, schema.Column(0), true);
+ AssertStatsSet(version, props, schema.Column(1), false);
+ AssertStatsSet(version, props, schema.Column(2), true);
+ AssertStatsSet(version, props, schema.Column(3), false);
+ AssertStatsSet(version, props, schema.Column(4), false);
+ AssertStatsSet(version, props, schema.Column(5), false);
}
// Statistics for all types have no restrictions in newer parquet version
TEST(CorrectStatistics, Basics) {
- ApplicationVersion version("parquet-cpp version 1.3.0");
+ std::string created_by = "parquet-cpp version 1.3.0";
+ ApplicationVersion version(created_by);
SchemaDescriptor schema;
schema::NodePtr node;
std::vector<schema::NodePtr> fields;
@@ -381,26 +385,16 @@ TEST(CorrectStatistics, Basics) {
node = schema::GroupNode::Make("schema", Repetition::REQUIRED, fields);
schema.Init(node);
- format::ColumnChunk col_chunk;
- col_chunk.meta_data.__isset.statistics = true;
- auto column_chunk1 = ColumnChunkMetaData::Make(
- reinterpret_cast<const uint8_t*>(&col_chunk), schema.Column(0),
&version);
- ASSERT_TRUE(column_chunk1->is_stats_set());
- auto column_chunk2 = ColumnChunkMetaData::Make(
- reinterpret_cast<const uint8_t*>(&col_chunk), schema.Column(1),
&version);
- ASSERT_TRUE(column_chunk2->is_stats_set());
- auto column_chunk3 = ColumnChunkMetaData::Make(
- reinterpret_cast<const uint8_t*>(&col_chunk), schema.Column(2),
&version);
- ASSERT_TRUE(column_chunk3->is_stats_set());
- auto column_chunk4 = ColumnChunkMetaData::Make(
- reinterpret_cast<const uint8_t*>(&col_chunk), schema.Column(3),
&version);
- ASSERT_TRUE(column_chunk4->is_stats_set());
- auto column_chunk5 = ColumnChunkMetaData::Make(
- reinterpret_cast<const uint8_t*>(&col_chunk), schema.Column(4),
&version);
- ASSERT_FALSE(column_chunk5->is_stats_set());
- auto column_chunk6 = ColumnChunkMetaData::Make(
- reinterpret_cast<const uint8_t*>(&col_chunk), schema.Column(5),
&version);
- ASSERT_TRUE(column_chunk6->is_stats_set());
+ parquet::WriterProperties::Builder builder;
+ builder.created_by(created_by);
+ std::shared_ptr<parquet::WriterProperties> props = builder.build();
+
+ AssertStatsSet(version, props, schema.Column(0), true);
+ AssertStatsSet(version, props, schema.Column(1), true);
+ AssertStatsSet(version, props, schema.Column(2), true);
+ AssertStatsSet(version, props, schema.Column(3), true);
+ AssertStatsSet(version, props, schema.Column(4), false);
+ AssertStatsSet(version, props, schema.Column(5), true);
}
// Test SortOrder class
diff --git a/cpp/src/plasma/CMakeLists.txt b/cpp/src/plasma/CMakeLists.txt
index 116d534..152190f 100644
--- a/cpp/src/plasma/CMakeLists.txt
+++ b/cpp/src/plasma/CMakeLists.txt
@@ -81,7 +81,7 @@ set(PLASMA_SRCS
thirdparty/ae/ae.c
thirdparty/xxhash.cc)
-set(PLASMA_LINK_LIBS arrow_static)
+set(PLASMA_LINK_LIBS arrow_shared)
if (ARROW_GPU)
set(PLASMA_LINK_LIBS ${PLASMA_LINK_LIBS} arrow_gpu_shared)
@@ -129,7 +129,7 @@ if ("${COMPILER_FAMILY}" STREQUAL "gcc")
endif()
add_executable(plasma_store_server store.cc)
-target_link_libraries(plasma_store_server plasma_static ${PLASMA_LINK_LIBS})
+target_link_libraries(plasma_store_server plasma_shared ${PLASMA_LINK_LIBS})
if (ARROW_RPATH_ORIGIN)
if (APPLE)
@@ -192,9 +192,9 @@ if(ARROW_PLASMA_JAVA_CLIENT)
${PLASMA_LIBRARY_EXT_java_SRC})
if(APPLE)
- target_link_libraries(plasma_java plasma_static ${PLASMA_LINK_LIBS}
"-undefined dynamic_lookup" -Wl,-force_load,${FLATBUFFERS_STATIC_LIB}
${FLATBUFFERS_STATIC_LIB} ${PTHREAD_LIBRARY})
+ target_link_libraries(plasma_java plasma_shared ${PLASMA_LINK_LIBS}
"-undefined dynamic_lookup" -Wl,-force_load,${FLATBUFFERS_STATIC_LIB}
${FLATBUFFERS_STATIC_LIB} ${PTHREAD_LIBRARY})
else(APPLE)
- target_link_libraries(plasma_java plasma_static ${PLASMA_LINK_LIBS}
-Wl,--whole-archive ${FLATBUFFERS_STATIC_LIB} -Wl,--no-whole-archive
${FLATBUFFERS_STATIC_LIB} ${PTHREAD_LIBRARY})
+ target_link_libraries(plasma_java plasma_shared ${PLASMA_LINK_LIBS}
-Wl,--whole-archive ${FLATBUFFERS_STATIC_LIB} -Wl,--no-whole-archive
${FLATBUFFERS_STATIC_LIB} ${PTHREAD_LIBRARY})
endif(APPLE)
endif()
#######################################
@@ -202,7 +202,7 @@ endif()
#######################################
ADD_ARROW_TEST(test/serialization_tests
- EXTRA_LINK_LIBS plasma_static ${PLASMA_LINK_LIBS})
+ EXTRA_LINK_LIBS plasma_shared ${PLASMA_LINK_LIBS})
ADD_ARROW_TEST(test/client_tests
- EXTRA_LINK_LIBS plasma_static ${PLASMA_LINK_LIBS}
+ EXTRA_LINK_LIBS plasma_shared ${PLASMA_LINK_LIBS}
EXTRA_DEPENDENCIES plasma_store_server)
diff --git
a/java/plasma/src/test/java/org/apache/arrow/plasma/PlasmaClientTest.java
b/java/plasma/src/test/java/org/apache/arrow/plasma/PlasmaClientTest.java
index eb771fa..cd132d5 100644
--- a/java/plasma/src/test/java/org/apache/arrow/plasma/PlasmaClientTest.java
+++ b/java/plasma/src/test/java/org/apache/arrow/plasma/PlasmaClientTest.java
@@ -57,6 +57,7 @@ public class PlasmaClientTest {
ProcessBuilder builder;
List<String> newCmd = Arrays.stream(cmd).filter(s -> s.length() >
0).collect(Collectors.toList());
builder = new ProcessBuilder(newCmd);
+ builder.inheritIO();
Process p = null;
try {
p = builder.start();