This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch dev-1.0.1 in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
commit 0c73dc85a6aabb30b4104f46fea1fbdf3a24587e Author: pengxiangyu <[email protected]> AuthorDate: Sun May 8 06:45:28 2022 +0800 [Bug] Missing error tablet list when close_wait return error (#9418) --- be/src/olap/delta_writer.cpp | 12 ++++++++++-- be/src/olap/delta_writer.h | 4 +++- be/src/runtime/load_channel.cpp | 3 ++- be/src/runtime/tablets_channel.cpp | 7 +++++-- be/src/runtime/tablets_channel.h | 5 ++++- 5 files changed, 24 insertions(+), 7 deletions(-) diff --git a/be/src/olap/delta_writer.cpp b/be/src/olap/delta_writer.cpp index 8446340d5a..198019561f 100644 --- a/be/src/olap/delta_writer.cpp +++ b/be/src/olap/delta_writer.cpp @@ -279,7 +279,9 @@ OLAPStatus DeltaWriter::close() { return OLAP_SUCCESS; } -OLAPStatus DeltaWriter::close_wait(google::protobuf::RepeatedPtrField<PTabletInfo>* tablet_vec, bool is_broken) { +OLAPStatus DeltaWriter::close_wait(google::protobuf::RepeatedPtrField<PTabletInfo>* tablet_vec, + google::protobuf::RepeatedPtrField<PTabletError>* tablet_errors, + bool is_broken) { std::lock_guard<std::mutex> l(_lock); DCHECK(_is_init) << "delta writer is supposed be to initialized before close_wait() being called"; @@ -289,7 +291,13 @@ OLAPStatus DeltaWriter::close_wait(google::protobuf::RepeatedPtrField<PTabletInf } // return error if previous flush failed - RETURN_NOT_OK(_flush_token->wait()); + OLAPStatus st = _flush_token->wait(); + if (st != OLAP_SUCCESS) { + PTabletError* tablet_error = tablet_errors->Add(); + tablet_error->set_tablet_id(_tablet->tablet_id()); + tablet_error->set_msg("flush failed"); + return st; + } DCHECK_EQ(_mem_tracker->consumption(), 0); // use rowset meta manager to save meta diff --git a/be/src/olap/delta_writer.h b/be/src/olap/delta_writer.h index c765d03115..cf5a2729d2 100644 --- a/be/src/olap/delta_writer.h +++ b/be/src/olap/delta_writer.h @@ -67,7 +67,9 @@ public: OLAPStatus close(); // wait for all memtables to be flushed. // mem_consumption() should be 0 after this function returns. - OLAPStatus close_wait(google::protobuf::RepeatedPtrField<PTabletInfo>* tablet_vec, bool is_broken); + OLAPStatus close_wait(google::protobuf::RepeatedPtrField<PTabletInfo>* tablet_vec, + google::protobuf::RepeatedPtrField<PTabletError>* tablet_errors, + bool is_broken); // abandon current memtable and wait for all pending-flushing memtables to be destructed. // mem_consumption() should be 0 after this function returns. diff --git a/be/src/runtime/load_channel.cpp b/be/src/runtime/load_channel.cpp index db523f2aa7..715762bd18 100644 --- a/be/src/runtime/load_channel.cpp +++ b/be/src/runtime/load_channel.cpp @@ -99,7 +99,8 @@ Status LoadChannel::add_batch(const PTabletWriterAddBatchRequest& request, bool finished = false; RETURN_IF_ERROR(channel->close(request.sender_id(), request.backend_id(), &finished, request.partition_ids(), - response->mutable_tablet_vec())); + response->mutable_tablet_vec(), + response->mutable_tablet_errors())); if (finished) { std::lock_guard<std::mutex> l(_lock); _tablets_channels.erase(index_id); diff --git a/be/src/runtime/tablets_channel.cpp b/be/src/runtime/tablets_channel.cpp index 75f9936e9b..926cba7aaa 100644 --- a/be/src/runtime/tablets_channel.cpp +++ b/be/src/runtime/tablets_channel.cpp @@ -149,7 +149,8 @@ Status TabletsChannel::add_batch(const PTabletWriterAddBatchRequest& request, Status TabletsChannel::close(int sender_id, int64_t backend_id, bool* finished, const google::protobuf::RepeatedField<int64_t>& partition_ids, - google::protobuf::RepeatedPtrField<PTabletInfo>* tablet_vec) { + google::protobuf::RepeatedPtrField<PTabletInfo>* tablet_vec, + google::protobuf::RepeatedPtrField<PTabletError>* tablet_errors) { std::lock_guard<std::mutex> l(_lock); if (_state == kFinished) { return _close_status; @@ -197,7 +198,9 @@ Status TabletsChannel::close(int sender_id, int64_t backend_id, bool* finished, for (auto writer : need_wait_writers) { // close may return failed, but no need to handle it here. // tablet_vec will only contains success tablet, and then let FE judge it. - writer->close_wait(tablet_vec, (_broken_tablets.find(writer->tablet_id()) != _broken_tablets.end())); + writer->close_wait( + tablet_vec, tablet_errors, + (_broken_tablets.find(writer->tablet_id()) != _broken_tablets.end())); } // TODO(gaodayue) clear and destruct all delta writers to make sure all memory are freed // DCHECK_EQ(_mem_tracker->consumption(), 0); diff --git a/be/src/runtime/tablets_channel.h b/be/src/runtime/tablets_channel.h index e99ac6264b..360242ae88 100644 --- a/be/src/runtime/tablets_channel.h +++ b/be/src/runtime/tablets_channel.h @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +#pragma once + #include <cstdint> #include <unordered_map> #include <utility> @@ -69,7 +71,8 @@ public: // no-op when this channel has been closed or cancelled Status close(int sender_id, int64_t backend_id, bool* finished, const google::protobuf::RepeatedField<int64_t>& partition_ids, - google::protobuf::RepeatedPtrField<PTabletInfo>* tablet_vec); + google::protobuf::RepeatedPtrField<PTabletInfo>* tablet_vec, + google::protobuf::RepeatedPtrField<PTabletError>* tablet_error); // no-op when this channel has been closed or cancelled Status cancel(); --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
