pitrou commented on a change in pull request #7887:
URL: https://github.com/apache/arrow/pull/7887#discussion_r485588382
##########
File path: cpp/src/arrow/array/builder_dict.h
##########
@@ -233,6 +233,20 @@ class DictionaryBuilderBase : public ArrayBuilder {
return indices_builder_.AppendNulls(length);
}
+ Status AppendEmptyValue() final {
+ length_ += 1;
+ null_count_ += 1;
+
+ return indices_builder_.AppendEmptyValue();
+ }
+
+ Status AppendEmptyValues(int64_t length) final {
+ length_ += length;
+ null_count_ += length;
Review comment:
Same here.
##########
File path: cpp/src/arrow/array/builder_dict.h
##########
@@ -233,6 +233,20 @@ class DictionaryBuilderBase : public ArrayBuilder {
return indices_builder_.AppendNulls(length);
}
+ Status AppendEmptyValue() final {
+ length_ += 1;
+ null_count_ += 1;
Review comment:
Hmm, I don't think we should increase the null count here, or is it
necessary?
##########
File path: cpp/src/arrow/array/builder_nested.h
##########
@@ -395,19 +416,35 @@ class ARROW_EXPORT StructBuilder : public ArrayBuilder {
return Status::OK();
}
- /// \brief Append a null value. Automatically appends a null to each child
+ /// \brief Append a null value. Automatically appends a empty to each child
/// builder.
Status AppendNull() final {
for (const auto& field : children_) {
- ARROW_RETURN_NOT_OK(field->AppendNull());
+ ARROW_RETURN_NOT_OK(field->AppendEmptyValue());
}
return Append(false);
}
- /// \brief Append multiple null values. Automatically appends nulls to each
+ /// \brief Append multiple null values. Automatically appends empties to each
Review comment:
"empty values"
##########
File path: cpp/src/arrow/array/builder_primitive.h
##########
@@ -43,6 +43,14 @@ class ARROW_EXPORT NullBuilder : public ArrayBuilder {
/// \brief Append a single null element
Status AppendNull() final { return AppendNulls(1); }
+ Status AppendEmptyValues(int64_t length) final {
+ if (length < 0) return Status::Invalid("length must be positive");
+ length_ += length;
Review comment:
For the null type, `AppendEmptyValues` should simply be the same as
`AppendNulls` (the only possible value is null).
##########
File path: cpp/src/arrow/array/builder_dict.h
##########
@@ -409,6 +423,20 @@ class DictionaryBuilderBase<BuilderType, NullType> :
public ArrayBuilder {
return indices_builder_.AppendNulls(length);
}
+ Status AppendEmptyValue() final {
+ length_ += 1;
+ null_count_ += 1;
Review comment:
Same here and below.
##########
File path: cpp/src/arrow/array/builder_adaptive.h
##########
@@ -64,6 +64,16 @@ class ARROW_EXPORT AdaptiveIntBuilderBase : public
ArrayBuilder {
return Status::OK();
}
+ Status AppendEmptyValues(int64_t length) final {
+ ARROW_RETURN_NOT_OK(CommitPendingData());
+ ARROW_RETURN_NOT_OK(Reserve(length));
+ memset(data_->mutable_data() + length_ * int_size_, 0, int_size_ * length);
+ UnsafeSetNotNull(length);
+ return Status::OK();
+ }
+
+ Status AppendEmptyValue() final { return AppendEmptyValues(1); }
Review comment:
This could actually be implemented exactly like `AppendNull()` above,
except for the nulls part :-)
##########
File path: cpp/src/arrow/json/parser_test.cc
##########
@@ -188,7 +189,7 @@ TEST(BlockParserWithSchema, Nested) {
field("nuf", struct_({field("ps", utf8())}))},
{"[\"thing\", null, \"\xe5\xbf\x8d\", null]",
R"([["1", "2", "3"], ["2"], [], null])",
- R"([{"ps":null}, null, {"ps":"78"}, {"ps":"90"}])"});
+ R"([{"ps":null}, {}, {"ps":"78"}, {"ps":"90"}])"});
Review comment:
Hmm... @bkietz Can you validate this change is ok? It seems it only
affects internal implementation details.
##########
File path: cpp/src/arrow/array/builder_nested.h
##########
@@ -395,19 +416,35 @@ class ARROW_EXPORT StructBuilder : public ArrayBuilder {
return Status::OK();
}
- /// \brief Append a null value. Automatically appends a null to each child
+ /// \brief Append a null value. Automatically appends a empty to each child
Review comment:
"an empty value" would be better.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]