This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push:
new 2ad0bb4c58c [opt](MergeIO) use equivalent merge size to measure merge
effectiveness (#26741) (#26923)
2ad0bb4c58c is described below
commit 2ad0bb4c58cc4c71cfc12d4079aa5467e9117ccf
Author: Ashin Gau <[email protected]>
AuthorDate: Tue Nov 14 23:55:13 2023 +0800
[opt](MergeIO) use equivalent merge size to measure merge effectiveness
(#26741) (#26923)
backport #26741
---
be/src/io/fs/buffered_reader.cpp | 8 +++++---
be/src/io/fs/buffered_reader.h | 11 ++++++++++-
2 files changed, 15 insertions(+), 4 deletions(-)
diff --git a/be/src/io/fs/buffered_reader.cpp b/be/src/io/fs/buffered_reader.cpp
index 2a7187cc28f..8bee5f468fa 100644
--- a/be/src/io/fs/buffered_reader.cpp
+++ b/be/src/io/fs/buffered_reader.cpp
@@ -152,7 +152,6 @@ Status MergeRangeFileReader::read_at_impl(size_t offset,
Slice result, size_t* b
}
content_size = 0;
hollow_size = 0;
- double amplified_ratio = config::max_amplified_read_ratio;
std::vector<std::pair<double, size_t>> ratio_and_size;
// Calculate the read amplified ratio for each merge operation and the
size of the merged data.
// Find the largest size of the merged data whose amplified ratio is less
than config::max_amplified_read_ratio
@@ -168,9 +167,12 @@ Status MergeRangeFileReader::read_at_impl(size_t offset,
Slice result, size_t* b
}
}
size_t best_merged_size = 0;
- for (const std::pair<double, size_t>& rs : ratio_and_size) {
+ for (int i = 0; i < ratio_and_size.size(); ++i) {
+ const std::pair<double, size_t>& rs = ratio_and_size[i];
+ size_t equivalent_size = rs.second / (i + 1);
if (rs.second > best_merged_size) {
- if (rs.first < amplified_ratio || rs.second <= MIN_READ_SIZE) {
+ if (rs.first <= _max_amplified_ratio ||
+ (_max_amplified_ratio < 1 && equivalent_size <=
_equivalent_io_size)) {
best_merged_size = rs.second;
}
}
diff --git a/be/src/io/fs/buffered_reader.h b/be/src/io/fs/buffered_reader.h
index 84235f0a460..c9b1eb96ecd 100644
--- a/be/src/io/fs/buffered_reader.h
+++ b/be/src/io/fs/buffered_reader.h
@@ -131,8 +131,9 @@ public:
static constexpr size_t READ_SLICE_SIZE = 8 * 1024 * 1024; // 8MB
static constexpr size_t BOX_SIZE = 1 * 1024 * 1024; // 1MB
static constexpr size_t SMALL_IO = 2 * 1024 * 1024; // 2MB
+ static constexpr size_t HDFS_MIN_IO_SIZE = 4 * 1024; // 4KB
+ static constexpr size_t OSS_MIN_IO_SIZE = 512 * 1024; // 512KB
static constexpr size_t NUM_BOX = TOTAL_BUFFER_SIZE / BOX_SIZE; // 128
- static constexpr size_t MIN_READ_SIZE = 4096; // 4KB
MergeRangeFileReader(RuntimeProfile* profile, io::FileReaderSPtr reader,
const std::vector<PrefetchRange>&
random_access_ranges)
@@ -142,6 +143,11 @@ public:
_range_cached_data.resize(random_access_ranges.size());
_size = _reader->size();
_remaining = TOTAL_BUFFER_SIZE;
+ _is_oss = typeid_cast<io::S3FileReader*>(_reader.get()) != nullptr;
+ _max_amplified_ratio = config::max_amplified_read_ratio;
+ // Equivalent min size of each IO that can reach the maximum storage
speed limit:
+ // 512KB for oss, 4KB for hdfs
+ _equivalent_io_size = _is_oss ? OSS_MIN_IO_SIZE : HDFS_MIN_IO_SIZE;
if (_profile != nullptr) {
const char* random_profile = "MergedSmallIO";
ADD_TIMER(_profile, random_profile);
@@ -235,6 +241,9 @@ private:
int16 _last_box_ref = -1;
uint32 _last_box_usage = 0;
std::vector<int16> _box_ref;
+ bool _is_oss;
+ double _max_amplified_ratio;
+ size_t _equivalent_io_size;
Statistics _statistics;
};
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]