This is an automated email from the ASF dual-hosted git repository. zouxinyi pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new bb96c24dc77 [fix](memory) Fix metadata memory tracking and profile (#44739) bb96c24dc77 is described below commit bb96c24dc771704cec2960db348261a2a26d8550 Author: Xinyi Zou <zouxi...@selectdb.com> AuthorDate: Wed Dec 4 10:43:56 2024 +0800 [fix](memory) Fix metadata memory tracking and profile (#44739) ### What problem does this PR solve? Problem Summary: 1. `MemTrackerLimiter` and `MemoryProfile` add metadata and cache type. 2. metadata memory consists of rowsets, segments, and tablets. 3. `all_segments_mem_tracker` excludes the segments in SegmentCache, and all segments use the same memory calculation method. 4. segments memory is estimated, which is not accurate and can only be used as a reference. 5. rowsets and tablets only count part of the memory, mainly PB. TODO, Metadata also has some memory that can be allocated using Doris Allocator. ``` MemoryProfile: MemoryOverviewSnapshot: - PhysicalMemory(VmRSS) Current: 5.85 GB (Peak: 7.13 GB) - VirtualMemory(VmSize) Current: 61.35 GB (Peak: 61.67 GB) UntrackedMemory: - Memory Current: 3.61 GB (Peak: 3.77 GB) TrackedMemory: - Memory Current: 2.24 GB (Peak: 3.48 GB) TasksMemory: - Memory Current: 0 (Peak: 749.17 MB) - ReservedMemory Current: 0 (Peak: 0) Details: - Compaction Current: 0 (Peak: 13.60 MB) - Load Current: 0 (Peak: 749.17 MB) - AllMemTablesMemory Current: 0 (Peak: 853.20 MB) - Other Current: 0 (Peak: 0) - Query Current: 0 (Peak: 65.20 MB) - SchemaChange Current: 0 (Peak: 0) GlobalMemory: - Memory Current: 55.03 MB (Peak: 70.39 MB) MetadataMemory: - Memory Current: 972.13 MB (Peak: 972.13 MB) CacheMemory: - Memory Current: 360.21 MB (Peak: 675.32 MB) JemallocMemory: - Memory Current: 905.55 MB (Peak: 1.80 GB) Details: - Cache Current: 646.47 MB (Peak: 1.59 GB) - Metadata Current: 259.08 MB (Peak: 259.72 MB) GlobalMemorySnapshot: Orphan@global@id=3144f940fa84f930-d82d459289789494: - Memory Current: 1.73 MB (Peak: 1.73 MB) IOBufBlockMemory@global@id=1f4316bc5fb4c3d4-73f14d3652ed269b: - Memory Current: 2.65 GB (Peak: 2.65 GB) PointQueryExecutor@global@id=794878deaa4d8f56-769fbc87ccb7ec8d: - Memory Current: 36.90 MB (Peak: 36.90 MB) BlockCompression@global@id=0a4d429b1915dd8b-c2e429b1186d69a8: - Memory Current: 563.01 MB (Peak: 563.01 MB) RowIdStorageReader@global@id=a546ca5878c9204e-c4c72ea9088ca9a0: - Memory Current: 101.25 KB (Peak: 101.25 KB) SubcolumnsTree@global@id=294bde93c01d4ba6-4ce7b3290489a1b8: - Memory Current: 95.61 MB (Peak: 95.61 MB) S3FileBuffer@global@id=bc437d624d9a234d-03eb4bbc3d030a99: - Memory Current: 0 (Peak: 0) MetadataMemorySnapshot: Tablets(not in SchemaCache, TabletSchemaCache)@metadata@id=7646d676ad9f8de9-98beb5f1a8aa739b: - Memory Current: 26.84 GB (Peak: 26.84 GB) Segments(not in SegmentCache)@metadata@id=864b4887722b327a-516c912d4602b59b: - Memory Current: 4.98 MB (Peak: 4.98 MB) Rowsets@metadata@id=f0440aa247730497-752a527c71e3c393: - Memory Current: 5.57 GB (Peak: 5.57 GB) SegmentCache[size]@metadata@id=b34e798db2c25109-04c7ff3c722e4cbb: - Memory Current: 4.86 GB (Peak: 4.86 GB) SchemaCache[number]@metadata@id=9f41f61531da3fd9-163834a6a6f83eba: - Memory Current: 103.38 MB (Peak: 103.38 MB) TabletSchemaCache[number]@metadata@id=f9475c16ba7ea1ab-1d28b38568ef0b8d: - Memory Current: 170.65 MB (Peak: 170.65 MB) CacheMemorySnapshot: QueryCache@cache@id=504af426a68103cf-a60f36a48bc1998a: - Memory Current: 0 (Peak: 0) DataPageCache[size]@cache@id=f744bb4ef53c307b-4c021a8f9b8a77a5: - Memory Current: 14.33 GB (Peak: 14.33 GB) IndexPageCache[size]@cache@id=cb4ed0e456f081d4-d635edda9aa2778b: - Memory Current: 5.25 GB (Peak: 5.25 GB) PKIndexPageCache[size]@cache@id=8e4bf12b4d39133a-e0d878a18d73619b: - Memory Current: 2.72 GB (Peak: 2.72 GB) PointQueryRowCache[size]@cache@id=024dc624c80205f1-ca3221a154e4b68a: - Memory Current: 94.84 KB (Peak: 94.84 KB) CommonObjLRUCache[number]@cache@id=af40d4a03b391540-43338381c8e826a4: - Memory Current: 0 (Peak: 0) PointQueryLookupConnectionCache[number]@cache@id=284fa848dd97ccc8-56c61386a4950784: - Memory Current: 337.97 KB (Peak: 337.97 KB) InvertedIndexSearcherCache[size]@cache@id=f5404416ad0a9465-3cf12f5224276d8a: - Memory Current: 111.52 MB (Peak: 111.52 MB) InvertedIndexQueryCache[size]@cache@id=6f4d3c55508e3531-3fafe328c6d3e1ab: - Memory Current: 51.97 MB (Peak: 51.97 MB) QueryCache[size]@cache@id=274d47670e0796ad-542ad958334ff988: - Memory Current: 0 (Peak: 0) LastSuccessChannelCache[size]@cache@id=08449badfd6abb81-5b30599bfc60f79f: - Memory Current: 0 (Peak: 0) TabletColumnObjectPool[number]@cache@id=7b42da8391945265-06697f83a6bf01a7: - Memory Current: 151.67 MB (Peak: 151.67 MB) MowTabletVersionCache[number]@cache@id=f345a1adbd2a87dd-74b9caa3b30cf18b: - Memory Current: 150.94 MB (Peak: 150.94 MB) CreateTabletRRIdxCache[number]@cache@id=f043eee91e9aef2d-ef3a2245d50545ae: - Memory Current: 15.20 MB (Peak: 15.20 MB) MowDeleteBitmapAggCache[size]@cache@id=df47a8ed5bf3f11f-ac944d7d7e79d4b3: - Memory Current: 181.11 MB (Peak: 181.11 MB) TopMemoryTasksSnapshot: SnapshotManager@other_task@id=11480ad6017368b9-8aef2150427d229f: - Memory Current: 0 (Peak: 0) StreamLoadPipe@load@id=024958077445f4f7-d739fd5231dca890: - Memory Current: 1.39 GB (Peak: 1.39 GB) SegCompaction@compaction@id=2d4d5c796a9f48ee-fdf5dd9f8faf83a8: - Memory Current: 0 (Peak: 0) Load#Id=544468c7f5a9bac6-2c78fc9962f3908d@load@id=52497774c12c5ad1-1fda35667a3d0e8b: - Limit: 2.00 GB - Memory Current: 6.06 MB (Peak: 6.06 MB) Load#Id=624e7947099ef183-577a22cca9d34d89@load@id=ed4691ce5a748ca8-e49ef7cc9be10b95: - Limit: 2.00 GB - Memory Current: 6.06 MB (Peak: 6.06 MB) ``` --- be/src/common/daemon.cpp | 11 +- be/src/olap/delta_writer_v2.h | 1 - be/src/olap/memtable_writer.h | 1 - be/src/olap/metadata_adder.h | 73 +++-- .../rowset/segment_v2/indexed_column_reader.cpp | 5 +- .../olap/rowset/segment_v2/indexed_column_reader.h | 15 +- be/src/olap/rowset/segment_v2/page_handle.h | 12 +- be/src/olap/rowset/segment_v2/segment.cpp | 17 +- be/src/olap/rowset/segment_v2/segment.h | 7 +- be/src/olap/rowset_builder.h | 1 - be/src/olap/segment_loader.cpp | 3 +- be/src/olap/segment_loader.h | 13 +- be/src/olap/tablet_manager.cpp | 19 +- be/src/olap/tablet_manager.h | 3 - be/src/olap/tablet_meta.cpp | 6 - be/src/olap/tablet_meta.h | 4 - be/src/olap/tablet_schema.cpp | 2 - be/src/olap/tablet_schema.h | 7 +- be/src/olap/tablet_schema_cache.cpp | 2 +- be/src/runtime/exec_env.h | 16 +- be/src/runtime/exec_env_init.cpp | 13 +- be/src/runtime/memory/cache_policy.h | 6 + be/src/runtime/memory/lru_cache_policy.h | 38 ++- be/src/runtime/memory/lru_cache_value_base.h | 11 +- be/src/runtime/memory/mem_tracker_limiter.cpp | 58 ++-- be/src/runtime/memory/mem_tracker_limiter.h | 14 +- be/src/runtime/memory/memory_profile.cpp | 305 ++++++++++----------- be/src/runtime/memory/memory_profile.h | 69 +++-- be/src/util/runtime_profile.cpp | 4 +- be/src/util/runtime_profile.h | 45 ++- be/src/vec/runtime/partitioner.h | 1 - be/src/vec/sink/vdata_stream_sender.h | 1 - 32 files changed, 464 insertions(+), 319 deletions(-) diff --git a/be/src/common/daemon.cpp b/be/src/common/daemon.cpp index ce2a6878dba..d3d55f10dde 100644 --- a/be/src/common/daemon.cpp +++ b/be/src/common/daemon.cpp @@ -500,15 +500,18 @@ void Daemon::cache_adjust_capacity_thread() { void Daemon::cache_prune_stale_thread() { int32_t interval = config::cache_periodic_prune_stale_sweep_sec; while (!_stop_background_threads_latch.wait_for(std::chrono::seconds(interval))) { - if (interval <= 0) { - LOG(WARNING) << "config of cache clean interval is illegal: [" << interval - << "], force set to 3600 "; - interval = 3600; + if (config::cache_periodic_prune_stale_sweep_sec <= 0) { + LOG(WARNING) << "config of cache clean interval is: [" << interval + << "], it means the cache prune stale thread is disabled, will wait 3s " + "and check again."; + interval = 3; + continue; } if (config::disable_memory_gc) { continue; } CacheManager::instance()->for_each_cache_prune_stale(); + interval = config::cache_periodic_prune_stale_sweep_sec; } } diff --git a/be/src/olap/delta_writer_v2.h b/be/src/olap/delta_writer_v2.h index beeb3d3ecd3..f9c2800a68f 100644 --- a/be/src/olap/delta_writer_v2.h +++ b/be/src/olap/delta_writer_v2.h @@ -46,7 +46,6 @@ namespace doris { class FlushToken; class MemTable; -class MemTracker; class Schema; class StorageEngine; class TupleDescriptor; diff --git a/be/src/olap/memtable_writer.h b/be/src/olap/memtable_writer.h index fb07e740fa3..713400793a1 100644 --- a/be/src/olap/memtable_writer.h +++ b/be/src/olap/memtable_writer.h @@ -45,7 +45,6 @@ namespace doris { class FlushToken; class MemTable; -class MemTracker; class StorageEngine; class TupleDescriptor; class SlotDescriptor; diff --git a/be/src/olap/metadata_adder.h b/be/src/olap/metadata_adder.h index 559c5db873b..5b5ba163224 100644 --- a/be/src/olap/metadata_adder.h +++ b/be/src/olap/metadata_adder.h @@ -20,6 +20,8 @@ #include <bvar/bvar.h> #include <stdint.h> +#include "runtime/exec_env.h" +#include "runtime/memory/mem_tracker_limiter.h" #include "util/runtime_profile.h" namespace doris { @@ -27,8 +29,8 @@ namespace doris { inline bvar::Adder<int64_t> g_rowset_meta_mem_bytes("doris_rowset_meta_mem_bytes"); inline bvar::Adder<int64_t> g_rowset_meta_num("doris_rowset_meta_num"); -inline bvar::Adder<int64_t> g_all_rowsets_mem_bytes("doris_all_rowsets_mem_bytes"); -inline bvar::Adder<int64_t> g_all_rowsets_num("doris_all_rowsets_num"); +inline bvar::Adder<int64_t> g_rowset_mem_bytes("doris_rowset_mem_bytes"); +inline bvar::Adder<int64_t> g_rowset_num("doris_rowset_num"); inline bvar::Adder<int64_t> g_tablet_meta_mem_bytes("doris_tablet_meta_mem_bytes"); inline bvar::Adder<int64_t> g_tablet_meta_num("doris_tablet_meta_num"); @@ -42,8 +44,9 @@ inline bvar::Adder<int64_t> g_tablet_index_num("doris_tablet_index_num"); inline bvar::Adder<int64_t> g_tablet_schema_mem_bytes("doris_tablet_schema_mem_bytes"); inline bvar::Adder<int64_t> g_tablet_schema_num("doris_tablet_schema_num"); -inline bvar::Adder<int64_t> g_all_segments_mem_bytes("doris_all_segments_mem_bytes"); -inline bvar::Adder<int64_t> g_all_segments_num("doris_all_segments_num"); +inline bvar::Adder<int64_t> g_segment_mem_bytes("doris_segment_mem_bytes"); +inline bvar::Adder<int64_t> g_segment_num("doris_segment_num"); +inline bvar::Adder<int64_t> g_segment_estimate_mem_bytes("doris_segment_estimate_mem_bytes"); inline bvar::Adder<int64_t> g_column_reader_mem_bytes("doris_column_reader_mem_bytes"); inline bvar::Adder<int64_t> g_column_reader_num("doris_column_reader_num"); @@ -96,6 +99,10 @@ class ZoneMapIndexReader; When a derived Class extends MetadataAdder, then the Class's number and fixed length field's memory can be counted automatically. But if the Class has variable length field, then you should overwrite get_metadata_size and call update_metadata_size when the Class's memory changes. + get_metadata_size is only the memory of the metadata object itself, not include child objects, + for example, TabletMeta::get_metadata_size does not include the memory of TabletSchema. + Note, the memory allocated by Doris Allocator is not included. + There are some special situations that need to be noted: 1. when the derived Class override copy constructor, you'd better update memory size(call update_metadata_size) if derived class's memory changed in its copy constructor or you not call MetadataAdder's copy constructor. @@ -111,6 +118,31 @@ public: static void dump_metadata_object(RuntimeProfile* object_heap_dump_snapshot); + static int64_t get_all_tablets_size() { + return g_tablet_meta_mem_bytes.get_value() + g_tablet_column_mem_bytes.get_value() + + g_tablet_index_mem_bytes.get_value() + g_tablet_schema_mem_bytes.get_value(); + } + + static int64_t get_all_rowsets_size() { + return g_rowset_meta_mem_bytes.get_value() + g_rowset_mem_bytes.get_value(); + } + + static int64_t get_all_segments_size() { + return g_segment_mem_bytes.get_value() + g_column_reader_mem_bytes.get_value() + + g_bitmap_index_reader_mem_bytes.get_value() + + g_bloom_filter_index_reader_mem_bytes.get_value() + + g_index_page_reader_mem_bytes.get_value() + + g_indexed_column_reader_mem_bytes.get_value() + + g_inverted_index_reader_mem_bytes.get_value() + + g_ordinal_index_reader_mem_bytes.get_value() + + g_zone_map_index_reader_mem_bytes.get_value(); + } + + // Doris currently uses the estimated segments memory as the basis, maybe it is more realistic. + static int64_t get_all_segments_estimate_size() { + return g_segment_estimate_mem_bytes.get_value(); + } + protected: MetadataAdder(const MetadataAdder& other); @@ -122,7 +154,6 @@ protected: MetadataAdder<T>& operator=(const MetadataAdder<T>& other) = default; -private: int64_t _current_meta_size {0}; void add_mem_size(int64_t val); @@ -167,7 +198,7 @@ void MetadataAdder<T>::add_mem_size(int64_t val) { if constexpr (std::is_same_v<T, RowsetMeta>) { g_rowset_meta_mem_bytes << val; } else if constexpr (std::is_same_v<T, Rowset>) { - g_all_rowsets_mem_bytes << val; + g_rowset_mem_bytes << val; } else if constexpr (std::is_same_v<T, TabletMeta>) { g_tablet_meta_mem_bytes << val; } else if constexpr (std::is_same_v<T, TabletColumn>) { @@ -177,7 +208,7 @@ void MetadataAdder<T>::add_mem_size(int64_t val) { } else if constexpr (std::is_same_v<T, TabletSchema>) { g_tablet_schema_mem_bytes << val; } else if constexpr (std::is_same_v<T, segment_v2::Segment>) { - g_all_segments_mem_bytes << val; + g_segment_mem_bytes << val; } else if constexpr (std::is_same_v<T, segment_v2::ColumnReader>) { g_column_reader_mem_bytes << val; } else if constexpr (std::is_same_v<T, segment_v2::BitmapIndexReader>) { @@ -208,7 +239,7 @@ void MetadataAdder<T>::add_num(int64_t val) { if constexpr (std::is_same_v<T, RowsetMeta>) { g_rowset_meta_num << val; } else if constexpr (std::is_same_v<T, Rowset>) { - g_all_rowsets_num << val; + g_rowset_num << val; } else if constexpr (std::is_same_v<T, TabletMeta>) { g_tablet_meta_num << val; } else if constexpr (std::is_same_v<T, TabletColumn>) { @@ -218,7 +249,7 @@ void MetadataAdder<T>::add_num(int64_t val) { } else if constexpr (std::is_same_v<T, TabletSchema>) { g_tablet_schema_num << val; } else if constexpr (std::is_same_v<T, segment_v2::Segment>) { - g_all_segments_num << val; + g_segment_num << val; } else if constexpr (std::is_same_v<T, segment_v2::ColumnReader>) { g_column_reader_num << val; } else if constexpr (std::is_same_v<T, segment_v2::BitmapIndexReader>) { @@ -250,12 +281,12 @@ void MetadataAdder<T>::dump_metadata_object(RuntimeProfile* object_heap_dump_sna COUNTER_SET(rowset_meta_mem_bytes_counter, g_rowset_meta_mem_bytes.get_value()); COUNTER_SET(rowset_meta_num_counter, g_rowset_meta_num.get_value()); - RuntimeProfile::Counter* all_rowsets_mem_bytes_counter = - ADD_COUNTER(object_heap_dump_snapshot, "AllRowsetsMemBytes", TUnit::BYTES); - RuntimeProfile::Counter* all_rowsets_num_counter = - ADD_COUNTER(object_heap_dump_snapshot, "AllRowsetsNum", TUnit::UNIT); - COUNTER_SET(all_rowsets_mem_bytes_counter, g_all_rowsets_mem_bytes.get_value()); - COUNTER_SET(all_rowsets_num_counter, g_all_rowsets_num.get_value()); + RuntimeProfile::Counter* rowset_mem_bytes_counter = + ADD_COUNTER(object_heap_dump_snapshot, "RowsetMemBytes", TUnit::BYTES); + RuntimeProfile::Counter* rowset_num_counter = + ADD_COUNTER(object_heap_dump_snapshot, "RowsetNum", TUnit::UNIT); + COUNTER_SET(rowset_mem_bytes_counter, g_rowset_mem_bytes.get_value()); + COUNTER_SET(rowset_num_counter, g_rowset_num.get_value()); RuntimeProfile::Counter* tablet_meta_mem_bytes_counter = ADD_COUNTER(object_heap_dump_snapshot, "TabletMetaMemBytes", TUnit::BYTES); @@ -285,12 +316,12 @@ void MetadataAdder<T>::dump_metadata_object(RuntimeProfile* object_heap_dump_sna COUNTER_SET(tablet_schema_mem_bytes_counter, g_tablet_schema_mem_bytes.get_value()); COUNTER_SET(tablet_schema_num_counter, g_tablet_schema_num.get_value()); - RuntimeProfile::Counter* all_segments_mem_bytes_counter = - ADD_COUNTER(object_heap_dump_snapshot, "AllSegmentsMemBytes", TUnit::BYTES); - RuntimeProfile::Counter* all_segments_num_counter = - ADD_COUNTER(object_heap_dump_snapshot, "AllSegmentsNum", TUnit::UNIT); - COUNTER_SET(all_segments_mem_bytes_counter, g_all_segments_mem_bytes.get_value()); - COUNTER_SET(all_segments_num_counter, g_all_segments_num.get_value()); + RuntimeProfile::Counter* segment_mem_bytes_counter = + ADD_COUNTER(object_heap_dump_snapshot, "SegmentMemBytes", TUnit::BYTES); + RuntimeProfile::Counter* segment_num_counter = + ADD_COUNTER(object_heap_dump_snapshot, "SegmentNum", TUnit::UNIT); + COUNTER_SET(segment_mem_bytes_counter, g_segment_mem_bytes.get_value()); + COUNTER_SET(segment_num_counter, g_segment_num.get_value()); RuntimeProfile::Counter* column_reader_mem_bytes_counter = ADD_COUNTER(object_heap_dump_snapshot, "ColumnReaderMemBytes", TUnit::BYTES); diff --git a/be/src/olap/rowset/segment_v2/indexed_column_reader.cpp b/be/src/olap/rowset/segment_v2/indexed_column_reader.cpp index 3028211f266..da6beff5d8d 100644 --- a/be/src/olap/rowset/segment_v2/indexed_column_reader.cpp +++ b/be/src/olap/rowset/segment_v2/indexed_column_reader.cpp @@ -81,7 +81,8 @@ Status IndexedColumnReader::load(bool use_page_cache, bool kept_in_memory, _sole_data_page = PagePointer(_meta.ordinal_index_meta().root_page()); } else { RETURN_IF_ERROR(load_index_page(_meta.ordinal_index_meta().root_page(), - &_ordinal_index_page_handle, &_ordinal_index_reader)); + &_ordinal_index_page_handle, + _ordinal_index_reader.get())); _has_index_page = true; } } @@ -92,7 +93,7 @@ Status IndexedColumnReader::load(bool use_page_cache, bool kept_in_memory, _sole_data_page = PagePointer(_meta.value_index_meta().root_page()); } else { RETURN_IF_ERROR(load_index_page(_meta.value_index_meta().root_page(), - &_value_index_page_handle, &_value_index_reader)); + &_value_index_page_handle, _value_index_reader.get())); _has_index_page = true; } } diff --git a/be/src/olap/rowset/segment_v2/indexed_column_reader.h b/be/src/olap/rowset/segment_v2/indexed_column_reader.h index c3469f9f6be..c9640c0007c 100644 --- a/be/src/olap/rowset/segment_v2/indexed_column_reader.h +++ b/be/src/olap/rowset/segment_v2/indexed_column_reader.h @@ -50,9 +50,12 @@ class EncodingInfo; class IndexedColumnReader : public MetadataAdder<IndexedColumnReader> { public: explicit IndexedColumnReader(io::FileReaderSPtr file_reader, const IndexedColumnMetaPB& meta) - : _file_reader(std::move(file_reader)), _meta(meta) {} + : _file_reader(std::move(file_reader)), _meta(meta) { + _ordinal_index_reader = std::make_unique<IndexPageReader>(); + _value_index_reader = std::make_unique<IndexPageReader>(); + } - ~IndexedColumnReader(); + ~IndexedColumnReader() override; Status load(bool use_page_cache, bool kept_in_memory, OlapReaderStatistics* index_load_stats = nullptr); @@ -90,8 +93,8 @@ private: bool _has_index_page = false; // valid only when the column contains only one data page PagePointer _sole_data_page; - IndexPageReader _ordinal_index_reader; - IndexPageReader _value_index_reader; + std::unique_ptr<IndexPageReader> _ordinal_index_reader; + std::unique_ptr<IndexPageReader> _value_index_reader; PageHandle _ordinal_index_page_handle; PageHandle _value_index_page_handle; @@ -108,8 +111,8 @@ public: explicit IndexedColumnIterator(const IndexedColumnReader* reader, OlapReaderStatistics* stats = nullptr) : _reader(reader), - _ordinal_iter(&reader->_ordinal_index_reader), - _value_iter(&reader->_value_index_reader), + _ordinal_iter(reader->_ordinal_index_reader.get()), + _value_iter(reader->_value_index_reader.get()), _stats(stats) {} // Seek to the given ordinal entry. Entry 0 is the first entry. diff --git a/be/src/olap/rowset/segment_v2/page_handle.h b/be/src/olap/rowset/segment_v2/page_handle.h index b1e53ee8086..d4dfdfb2ff3 100644 --- a/be/src/olap/rowset/segment_v2/page_handle.h +++ b/be/src/olap/rowset/segment_v2/page_handle.h @@ -23,6 +23,10 @@ #include "util/slice.h" // for Slice namespace doris { + +// After disable page cache, sometimes we need to know the percentage of data pages in query memory. +inline bvar::Adder<int64_t> g_page_no_cache_mem_bytes("doris_page_no_cache_mem_bytes"); + namespace segment_v2 { // When a column page is read into memory, we use this to store it. @@ -37,8 +41,7 @@ public: // This class will take the ownership of input data's memory. It will // free it when deconstructs. PageHandle(DataPage* data) : _is_data_owner(true), _data(data) { - _page_tracker = ExecEnv::GetInstance()->page_no_cache_mem_tracker(); - _page_tracker->consume(_data->capacity()); + g_page_no_cache_mem_bytes << _data->capacity(); } // This class will take the content of cache data, and will make input @@ -51,20 +54,18 @@ public: // we can use std::exchange if we switch c++14 on std::swap(_is_data_owner, other._is_data_owner); std::swap(_data, other._data); - _page_tracker = ExecEnv::GetInstance()->page_no_cache_mem_tracker(); } PageHandle& operator=(PageHandle&& other) noexcept { std::swap(_is_data_owner, other._is_data_owner); std::swap(_data, other._data); _cache_data = std::move(other._cache_data); - _page_tracker = ExecEnv::GetInstance()->page_no_cache_mem_tracker(); return *this; } ~PageHandle() { if (_is_data_owner) { - _page_tracker->release(_data->capacity()); + g_page_no_cache_mem_bytes << -_data->capacity(); delete _data; } else { DCHECK(_data == nullptr); @@ -85,7 +86,6 @@ private: // otherwise _cache_data is valid, and data is belong to cache. bool _is_data_owner = false; DataPage* _data = nullptr; - std::shared_ptr<MemTracker> _page_tracker; PageCacheHandle _cache_data; // Don't allow copy and assign diff --git a/be/src/olap/rowset/segment_v2/segment.cpp b/be/src/olap/rowset/segment_v2/segment.cpp index 0ad799683fc..d2c2920d4e6 100644 --- a/be/src/olap/rowset/segment_v2/segment.cpp +++ b/be/src/olap/rowset/segment_v2/segment.cpp @@ -163,7 +163,11 @@ Segment::Segment(uint32_t segment_id, RowsetId rowset_id, TabletSchemaSPtr table _tablet_schema(std::move(tablet_schema)), _idx_file_info(idx_file_info) {} -Segment::~Segment() = default; +Segment::~Segment() { + g_segment_estimate_mem_bytes << -_tracked_meta_mem_usage; + // if failed, fix `_tracked_meta_mem_usage` accuracy + DCHECK(_tracked_meta_mem_usage == meta_mem_usage()); +} io::UInt128Wrapper Segment::file_cache_key(std::string_view rowset_id, uint32_t seg_id) { return io::BlockFileCache::hash(fmt::format("{}_{}.dat", rowset_id, seg_id)); @@ -174,6 +178,12 @@ int64_t Segment::get_metadata_size() const { (_pk_index_meta ? _pk_index_meta->ByteSizeLong() : 0); } +void Segment::update_metadata_size() { + MetadataAdder::update_metadata_size(); + g_segment_estimate_mem_bytes << _meta_mem_usage - _tracked_meta_mem_usage; + _tracked_meta_mem_usage = _meta_mem_usage; +} + Status Segment::_open() { _footer_pb = std::make_unique<SegmentFooterPB>(); RETURN_IF_ERROR(_parse_footer(_footer_pb.get())); @@ -191,8 +201,6 @@ Status Segment::_open() { _meta_mem_usage += _pk_index_meta->ByteSizeLong(); } - update_metadata_size(); - _meta_mem_usage += sizeof(*this); _meta_mem_usage += _tablet_schema->num_columns() * config::estimated_mem_per_column_reader; @@ -201,6 +209,8 @@ Status Segment::_open() { // 0.01 comes from PrimaryKeyIndexBuilder::init _meta_mem_usage += BloomFilter::optimal_bit_num(_num_rows, 0.01) / 8; + update_metadata_size(); + return Status::OK(); } @@ -467,6 +477,7 @@ Status Segment::_load_pk_bloom_filter() { // for BE UT "segment_cache_test" return _load_pk_bf_once.call([this] { _meta_mem_usage += 100; + update_metadata_size(); return Status::OK(); }); } diff --git a/be/src/olap/rowset/segment_v2/segment.h b/be/src/olap/rowset/segment_v2/segment.h index bc5ab1e1fdc..1b20c1f066b 100644 --- a/be/src/olap/rowset/segment_v2/segment.h +++ b/be/src/olap/rowset/segment_v2/segment.h @@ -57,7 +57,6 @@ class IDataType; class ShortKeyIndexDecoder; class Schema; class StorageReadOptions; -class MemTracker; class PrimaryKeyIndexReader; class RowwiseIterator; struct RowLocation; @@ -93,6 +92,7 @@ public: ~Segment(); int64_t get_metadata_size() const override; + void update_metadata_size(); Status new_iterator(SchemaSPtr schema, const StorageReadOptions& read_options, std::unique_ptr<RowwiseIterator>* iter); @@ -163,6 +163,8 @@ public: io::FileReaderSPtr file_reader() { return _file_reader; } + // Including the column reader memory. + // another method `get_metadata_size` not include the column reader, only the segment object itself. int64_t meta_mem_usage() const { return _meta_mem_usage; } // Identify the column by unique id or path info @@ -249,9 +251,8 @@ private: // 1. Tracking memory use by segment meta data such as footer or index page. // 2. Tracking memory use by segment column reader // The memory consumed by querying is tracked in segment iterator. - // TODO: Segment::_meta_mem_usage Unknown value overflow, causes the value of SegmentMeta mem tracker - // is similar to `-2912341218700198079`. So, temporarily put it in experimental type tracker. int64_t _meta_mem_usage; + int64_t _tracked_meta_mem_usage = 0; RowsetId _rowset_id; TabletSchemaSPtr _tablet_schema; diff --git a/be/src/olap/rowset_builder.h b/be/src/olap/rowset_builder.h index 7fd57803736..fb2294d1770 100644 --- a/be/src/olap/rowset_builder.h +++ b/be/src/olap/rowset_builder.h @@ -38,7 +38,6 @@ namespace doris { class CalcDeleteBitmapToken; class FlushToken; class MemTable; -class MemTracker; class StorageEngine; class TupleDescriptor; class SlotDescriptor; diff --git a/be/src/olap/segment_loader.cpp b/be/src/olap/segment_loader.cpp index 26ac54c699b..4240f7e250a 100644 --- a/be/src/olap/segment_loader.cpp +++ b/be/src/olap/segment_loader.cpp @@ -77,9 +77,8 @@ Status SegmentLoader::load_segments(const BetaRowsetSharedPtr& rowset, } if (use_cache && !config::disable_segment_cache) { // memory of SegmentCache::CacheValue will be handled by SegmentCache - auto* cache_value = new SegmentCache::CacheValue(); + auto* cache_value = new SegmentCache::CacheValue(segment); _cache_mem_usage += segment->meta_mem_usage(); - cache_value->segment = std::move(segment); _segment_cache->insert(cache_key, *cache_value, cache_handle); } else { cache_handle->push_segment(std::move(segment)); diff --git a/be/src/olap/segment_loader.h b/be/src/olap/segment_loader.h index 834906da93b..2c5b1ed200d 100644 --- a/be/src/olap/segment_loader.h +++ b/be/src/olap/segment_loader.h @@ -75,9 +75,9 @@ public: // Holding all opened segments of a rowset. class CacheValue : public LRUCacheValueBase { public: - ~CacheValue() override { segment.reset(); } + CacheValue(segment_v2::SegmentSharedPtr segment_) : segment(std::move(segment_)) {} - segment_v2::SegmentSharedPtr segment; + const segment_v2::SegmentSharedPtr segment; }; SegmentCache(size_t memory_bytes_limit, size_t segment_num_limit) @@ -124,8 +124,13 @@ public: void erase_segments(const RowsetId& rowset_id, int64_t num_segments); - // Just used for BE UT - int64_t cache_mem_usage() const { return _cache_mem_usage; } + int64_t cache_mem_usage() const { +#ifdef BE_TEST + return _cache_mem_usage; +#else + return _segment_cache->value_mem_consumption(); +#endif + } private: SegmentLoader(); diff --git a/be/src/olap/tablet_manager.cpp b/be/src/olap/tablet_manager.cpp index a18dc0b4a16..59f0e1893d8 100644 --- a/be/src/olap/tablet_manager.cpp +++ b/be/src/olap/tablet_manager.cpp @@ -57,8 +57,6 @@ #include "olap/tablet_schema.h" #include "olap/txn_manager.h" #include "runtime/exec_env.h" -#include "runtime/memory/mem_tracker.h" -#include "runtime/thread_context.h" #include "service/backend_options.h" #include "util/defer_op.h" #include "util/doris_metrics.h" @@ -83,28 +81,18 @@ using std::vector; namespace doris { using namespace ErrorCode; -DEFINE_GAUGE_METRIC_PROTOTYPE_5ARG(tablet_meta_mem_consumption, MetricUnit::BYTES, "", - mem_consumption, Labels({{"type", "tablet_meta"}})); - bvar::Adder<int64_t> g_tablet_meta_schema_columns_count("tablet_meta_schema_columns_count"); TabletManager::TabletManager(StorageEngine& engine, int32_t tablet_map_lock_shard_size) : _engine(engine), - _tablet_meta_mem_tracker(std::make_shared<MemTracker>("TabletMeta(experimental)")), _tablets_shards_size(tablet_map_lock_shard_size), _tablets_shards_mask(tablet_map_lock_shard_size - 1) { CHECK_GT(_tablets_shards_size, 0); CHECK_EQ(_tablets_shards_size & _tablets_shards_mask, 0); _tablets_shards.resize(_tablets_shards_size); - REGISTER_HOOK_METRIC(tablet_meta_mem_consumption, - [this]() { return _tablet_meta_mem_tracker->consumption(); }); } -TabletManager::~TabletManager() { -#ifndef BE_TEST - DEREGISTER_HOOK_METRIC(tablet_meta_mem_consumption); -#endif -} +TabletManager::~TabletManager() = default; Status TabletManager::_add_tablet_unlocked(TTabletId tablet_id, const TabletSharedPtr& tablet, bool update_meta, bool force, RuntimeProfile* profile) { @@ -242,10 +230,6 @@ Status TabletManager::_add_tablet_to_map_unlocked(TTabletId tablet_id, tablet_map_t& tablet_map = _get_tablet_map(tablet_id); tablet_map[tablet_id] = tablet; _add_tablet_to_partition(tablet); - // TODO: remove multiply 2 of tablet meta mem size - // Because table schema will copy in tablet, there will be double mem cost - // so here multiply 2 - _tablet_meta_mem_tracker->consume(tablet->tablet_meta()->mem_size() * 2); g_tablet_meta_schema_columns_count << tablet->tablet_meta()->tablet_columns_num(); COUNTER_UPDATE(ADD_CHILD_TIMER(profile, "RegisterTabletInfo", "AddTablet"), static_cast<int64_t>(watch.reset())); @@ -599,7 +583,6 @@ Status TabletManager::_drop_tablet(TTabletId tablet_id, TReplicaId replica_id, b } to_drop_tablet->deregister_tablet_from_dir(); - _tablet_meta_mem_tracker->release(to_drop_tablet->tablet_meta()->mem_size() * 2); g_tablet_meta_schema_columns_count << -to_drop_tablet->tablet_meta()->tablet_columns_num(); return Status::OK(); } diff --git a/be/src/olap/tablet_manager.h b/be/src/olap/tablet_manager.h index 42623cf05f2..6b6e7998f9c 100644 --- a/be/src/olap/tablet_manager.h +++ b/be/src/olap/tablet_manager.h @@ -251,9 +251,6 @@ private: StorageEngine& _engine; - // TODO: memory size of TabletSchema cannot be accurately tracked. - std::shared_ptr<MemTracker> _tablet_meta_mem_tracker; - const int32_t _tablets_shards_size; const int32_t _tablets_shards_mask; std::vector<tablets_shard> _tablets_shards; diff --git a/be/src/olap/tablet_meta.cpp b/be/src/olap/tablet_meta.cpp index 0570aff349c..fc9fc034b0b 100644 --- a/be/src/olap/tablet_meta.cpp +++ b/be/src/olap/tablet_meta.cpp @@ -781,12 +781,6 @@ void TabletMeta::to_meta_pb(TabletMetaPB* tablet_meta_pb) { time_series_compaction_level_threshold()); } -int64_t TabletMeta::mem_size() const { - auto size = sizeof(TabletMeta); - size += _schema->mem_size(); - return size; -} - void TabletMeta::to_json(string* json_string, json2pb::Pb2JsonOptions& options) { TabletMetaPB tablet_meta_pb; to_meta_pb(&tablet_meta_pb); diff --git a/be/src/olap/tablet_meta.h b/be/src/olap/tablet_meta.h index 34794ef6d0a..a8c82a4abd2 100644 --- a/be/src/olap/tablet_meta.h +++ b/be/src/olap/tablet_meta.h @@ -140,10 +140,6 @@ public: void to_meta_pb(TabletMetaPB* tablet_meta_pb); void to_json(std::string* json_string, json2pb::Pb2JsonOptions& options); - // Don't use. - // TODO: memory size of TabletSchema cannot be accurately tracked. - // In some places, temporarily use num_columns() as TabletSchema size. - int64_t mem_size() const; size_t tablet_columns_num() const { return _schema->num_columns(); } TabletTypePB tablet_type() const { return _tablet_type; } diff --git a/be/src/olap/tablet_schema.cpp b/be/src/olap/tablet_schema.cpp index c4f96e22148..47179ce19b2 100644 --- a/be/src/olap/tablet_schema.cpp +++ b/be/src/olap/tablet_schema.cpp @@ -41,8 +41,6 @@ #include "olap/tablet_column_object_pool.h" #include "olap/types.h" #include "olap/utils.h" -#include "runtime/memory/lru_cache_policy.h" -#include "runtime/thread_context.h" #include "tablet_meta.h" #include "vec/aggregate_functions/aggregate_function_simple_factory.h" #include "vec/aggregate_functions/aggregate_function_state_union.h" diff --git a/be/src/olap/tablet_schema.h b/be/src/olap/tablet_schema.h index 5fb3deafd77..9a0cd53f7b1 100644 --- a/be/src/olap/tablet_schema.h +++ b/be/src/olap/tablet_schema.h @@ -332,10 +332,8 @@ public: void copy_from(const TabletSchema& tablet_schema); void update_index_info_from(const TabletSchema& tablet_schema); std::string to_key() const; - // Don't use. - // TODO: memory size of TabletSchema cannot be accurately tracked. - // In some places, temporarily use num_columns() as TabletSchema size. - int64_t mem_size() const { return _mem_size; } + // get_metadata_size is only the memory of the TabletSchema itself, not include child objects. + int64_t mem_size() const { return get_metadata_size(); } size_t row_size() const; int32_t field_index(const std::string& field_name) const; int32_t field_index(const vectorized::PathInData& path) const; @@ -573,7 +571,6 @@ private: int64_t _db_id = -1; bool _disable_auto_compaction = false; bool _enable_single_replica_compaction = false; - int64_t _mem_size = 0; bool _store_row_column = false; bool _skip_write_index_on_load = false; InvertedIndexStorageFormatPB _inverted_index_storage_format = InvertedIndexStorageFormatPB::V1; diff --git a/be/src/olap/tablet_schema_cache.cpp b/be/src/olap/tablet_schema_cache.cpp index fd238fa5aff..e044ef9c042 100644 --- a/be/src/olap/tablet_schema_cache.cpp +++ b/be/src/olap/tablet_schema_cache.cpp @@ -56,7 +56,7 @@ std::pair<Cache::Handle*, TabletSchemaSPtr> TabletSchemaCache::insert(const std: tablet_schema_ptr->init_from_pb(pb, false, true); value->tablet_schema = tablet_schema_ptr; lru_handle = LRUCachePolicy::insert(key_signature, value, tablet_schema_ptr->num_columns(), - 0, CachePriority::NORMAL); + tablet_schema_ptr->mem_size(), CachePriority::NORMAL); g_tablet_schema_cache_count << 1; g_tablet_schema_cache_columns_count << tablet_schema_ptr->num_columns(); } diff --git a/be/src/runtime/exec_env.h b/be/src/runtime/exec_env.h index a27936f5f0d..636ce2bf288 100644 --- a/be/src/runtime/exec_env.h +++ b/be/src/runtime/exec_env.h @@ -178,7 +178,6 @@ public: std::vector<TrackerLimiterGroup> mem_tracker_limiter_pool; void init_mem_tracker(); std::shared_ptr<MemTrackerLimiter> orphan_mem_tracker() { return _orphan_mem_tracker; } - std::shared_ptr<MemTracker> page_no_cache_mem_tracker() { return _page_no_cache_mem_tracker; } std::shared_ptr<MemTrackerLimiter> brpc_iobuf_block_memory_tracker() { return _brpc_iobuf_block_memory_tracker; } @@ -188,6 +187,15 @@ public: std::shared_ptr<MemTrackerLimiter> stream_load_pipe_tracker() { return _stream_load_pipe_tracker; } + std::shared_ptr<MemTrackerLimiter> tablets_no_cache_mem_tracker() { + return _tablets_no_cache_mem_tracker; + } + std::shared_ptr<MemTrackerLimiter> rowsets_no_cache_mem_tracker() { + return _rowsets_no_cache_mem_tracker; + } + std::shared_ptr<MemTrackerLimiter> segments_no_cache_mem_tracker() { + return _segments_no_cache_mem_tracker; + } std::shared_ptr<MemTrackerLimiter> point_query_executor_mem_tracker() { return _point_query_executor_mem_tracker; } @@ -377,13 +385,15 @@ private: // Ideally, all threads are expected to attach to the specified tracker, so that "all memory has its own ownership", // and the consumption of the orphan mem tracker is close to 0, but greater than 0. std::shared_ptr<MemTrackerLimiter> _orphan_mem_tracker; - // page size not in cache, data page/index page/etc. - std::shared_ptr<MemTracker> _page_no_cache_mem_tracker; std::shared_ptr<MemTrackerLimiter> _brpc_iobuf_block_memory_tracker; // Count the memory consumption of segment compaction tasks. std::shared_ptr<MemTrackerLimiter> _segcompaction_mem_tracker; std::shared_ptr<MemTrackerLimiter> _stream_load_pipe_tracker; + std::shared_ptr<MemTrackerLimiter> _tablets_no_cache_mem_tracker; + std::shared_ptr<MemTrackerLimiter> _rowsets_no_cache_mem_tracker; + std::shared_ptr<MemTrackerLimiter> _segments_no_cache_mem_tracker; + // Tracking memory may be shared between multiple queries. std::shared_ptr<MemTrackerLimiter> _point_query_executor_mem_tracker; std::shared_ptr<MemTrackerLimiter> _block_compression_mem_tracker; diff --git a/be/src/runtime/exec_env_init.cpp b/be/src/runtime/exec_env_init.cpp index b7f926cc3b4..a371cdb947f 100644 --- a/be/src/runtime/exec_env_init.cpp +++ b/be/src/runtime/exec_env_init.cpp @@ -599,15 +599,20 @@ void ExecEnv::init_mem_tracker() { _s_tracking_memory = true; _orphan_mem_tracker = MemTrackerLimiter::create_shared(MemTrackerLimiter::Type::GLOBAL, "Orphan"); - _page_no_cache_mem_tracker = std::make_shared<MemTracker>("PageNoCache"); _brpc_iobuf_block_memory_tracker = MemTrackerLimiter::create_shared(MemTrackerLimiter::Type::GLOBAL, "IOBufBlockMemory"); _segcompaction_mem_tracker = - MemTrackerLimiter::create_shared(MemTrackerLimiter::Type::GLOBAL, "SegCompaction"); + MemTrackerLimiter::create_shared(MemTrackerLimiter::Type::COMPACTION, "SegCompaction"); + _tablets_no_cache_mem_tracker = MemTrackerLimiter::create_shared( + MemTrackerLimiter::Type::METADATA, "Tablets(not in SchemaCache, TabletSchemaCache)"); + _segments_no_cache_mem_tracker = MemTrackerLimiter::create_shared( + MemTrackerLimiter::Type::METADATA, "Segments(not in SegmentCache)"); + _rowsets_no_cache_mem_tracker = + MemTrackerLimiter::create_shared(MemTrackerLimiter::Type::METADATA, "Rowsets"); _point_query_executor_mem_tracker = MemTrackerLimiter::create_shared(MemTrackerLimiter::Type::GLOBAL, "PointQueryExecutor"); _query_cache_mem_tracker = - MemTrackerLimiter::create_shared(MemTrackerLimiter::Type::GLOBAL, "QueryCache"); + MemTrackerLimiter::create_shared(MemTrackerLimiter::Type::CACHE, "QueryCache"); _block_compression_mem_tracker = MemTrackerLimiter::create_shared(MemTrackerLimiter::Type::GLOBAL, "BlockCompression"); _rowid_storage_reader_tracker = @@ -716,7 +721,7 @@ void ExecEnv::destroy() { _file_cache_open_fd_cache.reset(); SAFE_STOP(_write_cooldown_meta_executors); - // StorageEngine must be destoried before _page_no_cache_mem_tracker.reset and _cache_manager destory + // StorageEngine must be destoried before _cache_manager destory SAFE_STOP(_storage_engine); _storage_engine.reset(); diff --git a/be/src/runtime/memory/cache_policy.h b/be/src/runtime/memory/cache_policy.h index e7e1c73e7cb..8f077a4eb45 100644 --- a/be/src/runtime/memory/cache_policy.h +++ b/be/src/runtime/memory/cache_policy.h @@ -17,6 +17,8 @@ #pragma once +#include <vector> + #include "util/runtime_profile.h" namespace doris { @@ -123,6 +125,7 @@ public: {"CloudTabletCache", CacheType::CLOUD_TABLET_CACHE}, {"CloudTxnDeleteBitmapCache", CacheType::CLOUD_TXN_DELETE_BITMAP_CACHE}, {"ForUTCacheNumber", CacheType::FOR_UT_CACHE_NUMBER}, + {"QueryCache", CacheType::QUERY_CACHE}, {"TabletColumnObjectPool", CacheType::TABLET_COLUMN_OBJECT_POOL}}; static CacheType string_to_type(std::string type) { @@ -133,6 +136,9 @@ public: } } + inline static std::vector<CacheType> MetadataCache { + CacheType::SEGMENT_CACHE, CacheType::SCHEMA_CACHE, CacheType::TABLET_SCHEMA_CACHE}; + CachePolicy(CacheType type, size_t capacity, uint32_t stale_sweep_time_s, bool enable_prune); virtual ~CachePolicy(); diff --git a/be/src/runtime/memory/lru_cache_policy.h b/be/src/runtime/memory/lru_cache_policy.h index ea34e2837f1..3fdb43facd7 100644 --- a/be/src/runtime/memory/lru_cache_policy.h +++ b/be/src/runtime/memory/lru_cache_policy.h @@ -104,20 +104,26 @@ public: return _mem_tracker->consumption(); } + int64_t value_mem_consumption() { + DCHECK(_value_mem_tracker != nullptr); + return _value_mem_tracker->consumption(); + } + // Insert will consume tracking_bytes to _mem_tracker and cache value destroy will release tracking_bytes. - // If LRUCacheType::SIZE, tracking_bytes usually equal to charge. - // If LRUCacheType::NUMBER, tracking_bytes usually not equal to charge, at this time charge is an weight. - // If LRUCacheType::SIZE and tracking_bytes equals 0, memory must be tracked in Doris Allocator, + // If LRUCacheType::SIZE, value_tracking_bytes usually equal to charge. + // If LRUCacheType::NUMBER, value_tracking_bytes usually not equal to charge, at this time charge is an weight. + // If LRUCacheType::SIZE and value_tracking_bytes equals 0, memory must be tracked in Doris Allocator, // cache value is allocated using Alloctor. - // If LRUCacheType::NUMBER and tracking_bytes equals 0, usually currently cannot accurately tracking memory size, + // If LRUCacheType::NUMBER and value_tracking_bytes equals 0, usually currently cannot accurately tracking memory size, // only tracking handle_size(106). - Cache::Handle* insert(const CacheKey& key, void* value, size_t charge, size_t tracking_bytes, + Cache::Handle* insert(const CacheKey& key, void* value, size_t charge, + size_t value_tracking_bytes, CachePriority priority = CachePriority::NORMAL) { - size_t tracking_bytes_with_handle = sizeof(LRUHandle) - 1 + key.size() + tracking_bytes; + size_t tracking_bytes = sizeof(LRUHandle) - 1 + key.size() + value_tracking_bytes; if (value != nullptr) { - mem_tracker()->consume(tracking_bytes_with_handle); ((LRUCacheValueBase*)value) - ->set_tracking_bytes(tracking_bytes_with_handle, _mem_tracker); + ->set_tracking_bytes(tracking_bytes, _mem_tracker, value_tracking_bytes, + _value_mem_tracker); } return _cache->insert(key, value, charge, priority); } @@ -265,9 +271,18 @@ public: protected: void _init_mem_tracker(const std::string& type_name) { - _mem_tracker = MemTrackerLimiter::create_shared( - MemTrackerLimiter::Type::GLOBAL, - fmt::format("{}[{}]", type_string(_type), type_name)); + if (std::find(CachePolicy::MetadataCache.begin(), CachePolicy::MetadataCache.end(), + _type) == CachePolicy::MetadataCache.end()) { + _mem_tracker = MemTrackerLimiter::create_shared( + MemTrackerLimiter::Type::CACHE, + fmt::format("{}[{}]", type_string(_type), type_name)); + } else { + _mem_tracker = MemTrackerLimiter::create_shared( + MemTrackerLimiter::Type::METADATA, + fmt::format("{}[{}]", type_string(_type), type_name)); + } + _value_mem_tracker = std::make_shared<MemTracker>( + fmt::format("{}::Value[{}]", type_string(_type), type_name)); } // if check_capacity failed, will return dummy lru cache, @@ -277,6 +292,7 @@ protected: LRUCacheType _lru_cache_type; std::shared_ptr<MemTrackerLimiter> _mem_tracker; + std::shared_ptr<MemTracker> _value_mem_tracker; }; } // namespace doris diff --git a/be/src/runtime/memory/lru_cache_value_base.h b/be/src/runtime/memory/lru_cache_value_base.h index f9e534e6600..a9a3ae5ddab 100644 --- a/be/src/runtime/memory/lru_cache_value_base.h +++ b/be/src/runtime/memory/lru_cache_value_base.h @@ -28,18 +28,27 @@ public: virtual ~LRUCacheValueBase() { if (_tracking_bytes > 0) { _mem_tracker->release(_tracking_bytes); + _value_mem_tracker->release(_value_tracking_bytes); } } void set_tracking_bytes(size_t tracking_bytes, - const std::shared_ptr<MemTrackerLimiter>& mem_tracker) { + const std::shared_ptr<MemTrackerLimiter>& mem_tracker, + size_t value_tracking_bytes, + const std::shared_ptr<MemTracker>& value_mem_tracker) { this->_tracking_bytes = tracking_bytes; this->_mem_tracker = mem_tracker; + this->_value_tracking_bytes = value_tracking_bytes; + this->_value_mem_tracker = value_mem_tracker; + _mem_tracker->consume(_tracking_bytes); + _value_mem_tracker->consume(_value_tracking_bytes); } protected: size_t _tracking_bytes = 0; + size_t _value_tracking_bytes = 0; std::shared_ptr<MemTrackerLimiter> _mem_tracker; + std::shared_ptr<MemTracker> _value_mem_tracker; }; } // namespace doris diff --git a/be/src/runtime/memory/mem_tracker_limiter.cpp b/be/src/runtime/memory/mem_tracker_limiter.cpp index 05ff13f0e7c..ac4684835a6 100644 --- a/be/src/runtime/memory/mem_tracker_limiter.cpp +++ b/be/src/runtime/memory/mem_tracker_limiter.cpp @@ -66,9 +66,13 @@ MemTrackerLimiter::MemTrackerLimiter(Type type, const std::string& label, int64_ _uid = UniqueId::gen_uid(); if (_type == Type::GLOBAL) { _group_num = 0; + } else if (_type == Type::METADATA) { + _group_num = 1; + } else if (_type == Type::CACHE) { + _group_num = 2; } else { _group_num = - mem_tracker_limiter_group_counter.fetch_add(1) % (MEM_TRACKER_GROUP_NUM - 1) + 1; + mem_tracker_limiter_group_counter.fetch_add(1) % (MEM_TRACKER_GROUP_NUM - 3) + 3; } // currently only select/load need runtime query statistics @@ -208,24 +212,20 @@ std::string MemTrackerLimiter::print_address_sanitizers() { RuntimeProfile* MemTrackerLimiter::make_profile(RuntimeProfile* profile) const { RuntimeProfile* profile_snapshot = profile->create_child( fmt::format("{}@{}@id={}", _label, type_string(_type), _uid.to_string()), true, false); - RuntimeProfile::Counter* current_usage_counter = - ADD_COUNTER(profile_snapshot, "CurrentUsage", TUnit::BYTES); - RuntimeProfile::Counter* peak_usage_counter = - ADD_COUNTER(profile_snapshot, "PeakUsage", TUnit::BYTES); - COUNTER_SET(current_usage_counter, consumption()); - COUNTER_SET(peak_usage_counter, peak_consumption()); + RuntimeProfile::HighWaterMarkCounter* usage_counter = + profile_snapshot->AddHighWaterMarkCounter("Memory", TUnit::BYTES); + COUNTER_SET(usage_counter, peak_consumption()); + COUNTER_SET(usage_counter, consumption()); if (has_limit()) { RuntimeProfile::Counter* limit_counter = ADD_COUNTER(profile_snapshot, "Limit", TUnit::BYTES); COUNTER_SET(limit_counter, _limit); } if (reserved_peak_consumption() != 0) { - RuntimeProfile::Counter* reserved_counter = - ADD_COUNTER(profile_snapshot, "ReservedMemory", TUnit::BYTES); - RuntimeProfile::Counter* reserved_peak_counter = - ADD_COUNTER(profile_snapshot, "ReservedPeakMemory", TUnit::BYTES); + RuntimeProfile::HighWaterMarkCounter* reserved_counter = + profile_snapshot->AddHighWaterMarkCounter("ReservedMemory", TUnit::BYTES); + COUNTER_SET(reserved_counter, reserved_peak_consumption()); COUNTER_SET(reserved_counter, reserved_consumption()); - COUNTER_SET(reserved_peak_counter, reserved_peak_consumption()); } return profile_snapshot; } @@ -268,8 +268,26 @@ void MemTrackerLimiter::make_type_trackers_profile(RuntimeProfile* profile, tracker->make_profile(profile); } } + } else if (type == Type::METADATA) { + std::lock_guard<std::mutex> l( + ExecEnv::GetInstance()->mem_tracker_limiter_pool[1].group_lock); + for (auto trackerWptr : ExecEnv::GetInstance()->mem_tracker_limiter_pool[1].trackers) { + auto tracker = trackerWptr.lock(); + if (tracker != nullptr) { + tracker->make_profile(profile); + } + } + } else if (type == Type::CACHE) { + std::lock_guard<std::mutex> l( + ExecEnv::GetInstance()->mem_tracker_limiter_pool[2].group_lock); + for (auto trackerWptr : ExecEnv::GetInstance()->mem_tracker_limiter_pool[2].trackers) { + auto tracker = trackerWptr.lock(); + if (tracker != nullptr) { + tracker->make_profile(profile); + } + } } else { - for (unsigned i = 1; i < ExecEnv::GetInstance()->mem_tracker_limiter_pool.size(); ++i) { + for (unsigned i = 3; i < ExecEnv::GetInstance()->mem_tracker_limiter_pool.size(); ++i) { std::lock_guard<std::mutex> l( ExecEnv::GetInstance()->mem_tracker_limiter_pool[i].group_lock); for (auto trackerWptr : ExecEnv::GetInstance()->mem_tracker_limiter_pool[i].trackers) { @@ -296,8 +314,8 @@ void MemTrackerLimiter::make_top_consumption_tasks_tracker_profile(RuntimeProfil std::unique_ptr<RuntimeProfile> tmp_profile_snapshot = std::make_unique<RuntimeProfile>("tmpSnapshot"); std::priority_queue<std::pair<int64_t, RuntimeProfile*>> max_pq; - // start from 2, not include global type. - for (unsigned i = 1; i < ExecEnv::GetInstance()->mem_tracker_limiter_pool.size(); ++i) { + // start from 3, not include global/metadata/cache type. + for (unsigned i = 3; i < ExecEnv::GetInstance()->mem_tracker_limiter_pool.size(); ++i) { std::lock_guard<std::mutex> l( ExecEnv::GetInstance()->mem_tracker_limiter_pool[i].group_lock); for (auto trackerWptr : ExecEnv::GetInstance()->mem_tracker_limiter_pool[i].trackers) { @@ -326,13 +344,19 @@ void MemTrackerLimiter::make_all_tasks_tracker_profile(RuntimeProfile* profile) types_profile[Type::SCHEMA_CHANGE] = profile->create_child("SchemaChangeTasks", true, false); types_profile[Type::OTHER] = profile->create_child("OtherTasks", true, false); - // start from 2, not include global type. - for (unsigned i = 1; i < ExecEnv::GetInstance()->mem_tracker_limiter_pool.size(); ++i) { + // start from 3, not include global/metadata/cache type. + for (unsigned i = 3; i < ExecEnv::GetInstance()->mem_tracker_limiter_pool.size(); ++i) { std::lock_guard<std::mutex> l( ExecEnv::GetInstance()->mem_tracker_limiter_pool[i].group_lock); for (auto trackerWptr : ExecEnv::GetInstance()->mem_tracker_limiter_pool[i].trackers) { auto tracker = trackerWptr.lock(); if (tracker != nullptr) { + // BufferControlBlock will continue to exist for 5 minutes after the query ends, even if the + // result buffer is empty, and will not be shown in the profile. of course, this code is tricky. + if (tracker->consumption() == 0 && + tracker->label().starts_with("BufferControlBlock")) { + continue; + } tracker->make_profile(types_profile[tracker->type()]); } } diff --git a/be/src/runtime/memory/mem_tracker_limiter.h b/be/src/runtime/memory/mem_tracker_limiter.h index 445856b1f6a..43b20a410ff 100644 --- a/be/src/runtime/memory/mem_tracker_limiter.h +++ b/be/src/runtime/memory/mem_tracker_limiter.h @@ -77,12 +77,14 @@ public: enum class GCType { PROCESS = 0, WORK_LOAD_GROUP = 1 }; enum class Type { - GLOBAL = 0, // Life cycle is the same as the process, e.g. Cache and default Orphan + GLOBAL = 0, // Life cycle is the same as the process, except cache and metadata. QUERY = 1, // Count the memory consumption of all Query tasks. LOAD = 2, // Count the memory consumption of all Load tasks. COMPACTION = 3, // Count the memory consumption of all Base and Cumulative tasks. SCHEMA_CHANGE = 4, // Count the memory consumption of all SchemaChange tasks. - OTHER = 5, + METADATA = 5, // Count the memory consumption of all Metadata. + CACHE = 6, // Count the memory consumption of all Cache. + OTHER = 7, // Count the memory consumption of all other tasks, such as Clone, Snapshot, etc.. }; static std::string type_string(Type type) { @@ -97,8 +99,12 @@ public: return "compaction"; case Type::SCHEMA_CHANGE: return "schema_change"; + case Type::METADATA: + return "metadata"; + case Type::CACHE: + return "cache"; case Type::OTHER: - return "other"; + return "other_task"; default: LOG(FATAL) << "not match type of mem tracker limiter :" << static_cast<int>(type); } @@ -158,6 +164,8 @@ public: int64_t consumption() const { return _mem_counter.current_value(); } int64_t peak_consumption() const { return _mem_counter.peak_value(); } + // Use carefully! only memory that cannot be allocated using Doris Allocator needs to be consumed manually. + // Ideally, all memory should use Doris Allocator. void consume(int64_t bytes) { _mem_counter.add(bytes); if (_query_statistics) { diff --git a/be/src/runtime/memory/memory_profile.cpp b/be/src/runtime/memory/memory_profile.cpp index 8dbdcbdd3af..5d649c52601 100644 --- a/be/src/runtime/memory/memory_profile.cpp +++ b/be/src/runtime/memory/memory_profile.cpp @@ -18,6 +18,9 @@ #include "runtime/memory/memory_profile.h" #include "bvar/reducer.h" +#include "olap/metadata_adder.h" +#include "olap/schema_cache.h" +#include "olap/tablet_schema_cache.h" #include "runtime/exec_env.h" #include "runtime/memory/global_memory_arbitrator.h" #include "runtime/memory/mem_tracker_limiter.h" @@ -28,6 +31,9 @@ namespace doris { static bvar::Adder<int64_t> memory_all_tracked_sum_bytes("memory_all_tracked_sum_bytes"); static bvar::Adder<int64_t> memory_global_trackers_sum_bytes("memory_global_trackers_sum_bytes"); +static bvar::Adder<int64_t> memory_metadata_trackers_sum_bytes( + "memory_metadata_trackers_sum_bytes"); +static bvar::Adder<int64_t> memory_cache_trackers_sum_bytes("memory_cache_trackers_sum_bytes"); static bvar::Adder<int64_t> memory_query_trackers_sum_bytes("memory_query_trackers_sum_bytes"); static bvar::Adder<int64_t> memory_load_trackers_sum_bytes("memory_load_trackers_sum_bytes"); static bvar::Adder<int64_t> memory_compaction_trackers_sum_bytes( @@ -40,140 +46,122 @@ static bvar::Adder<int64_t> memory_all_tasks_memory_bytes("memory_all_tasks_memo static bvar::Adder<int64_t> memory_untracked_memory_bytes("memory_untracked_memory_bytes"); MemoryProfile::MemoryProfile() { - _memory_overview_profile.set(std::make_unique<RuntimeProfile>("MemoryOverviewSnapshot")); +#ifdef ADDRESS_SANITIZER + _memory_overview_profile = std::make_unique<RuntimeProfile>("[ASAN]MemoryOverviewSnapshot"); +#else + _memory_overview_profile = std::make_unique<RuntimeProfile>("MemoryOverviewSnapshot"); +#endif _global_memory_profile.set(std::make_unique<RuntimeProfile>("GlobalMemorySnapshot")); + _metadata_memory_profile.set(std::make_unique<RuntimeProfile>("MetadataMemorySnapshot")); + _cache_memory_profile.set(std::make_unique<RuntimeProfile>("CacheMemorySnapshot")); _top_memory_tasks_profile.set(std::make_unique<RuntimeProfile>("TopMemoryTasksSnapshot")); _tasks_memory_profile.set(std::make_unique<RuntimeProfile>("TasksMemorySnapshot")); + init_memory_overview_counter(); } -void MemoryProfile::refresh_memory_overview_profile() { -#ifdef ADDRESS_SANITIZER - std::unique_ptr<RuntimeProfile> memory_overview_profile = - std::make_unique<RuntimeProfile>("[ASAN]MemoryOverviewSnapshot"); -#else - std::unique_ptr<RuntimeProfile> memory_overview_profile = - std::make_unique<RuntimeProfile>("MemoryOverviewSnapshot"); -#endif - std::unique_ptr<RuntimeProfile> global_memory_profile = - std::make_unique<RuntimeProfile>("GlobalMemorySnapshot"); - std::unique_ptr<RuntimeProfile> top_memory_tasks_profile = - std::make_unique<RuntimeProfile>("TopMemoryTasksSnapshot"); - - // 1. create profile +void MemoryProfile::init_memory_overview_counter() { RuntimeProfile* untracked_memory_profile = - memory_overview_profile->create_child("UntrackedMemory", true, false); + _memory_overview_profile->create_child("UntrackedMemory", true, false); RuntimeProfile* tracked_memory_profile = - memory_overview_profile->create_child("TrackedMemory", true, false); + _memory_overview_profile->create_child("TrackedMemory", true, false); RuntimeProfile* tasks_memory_overview_profile = tracked_memory_profile->create_child("TasksMemory", true, false); RuntimeProfile* tasks_memory_overview_details_profile = tasks_memory_overview_profile->create_child("Details", true, false); RuntimeProfile* global_memory_overview_profile = tracked_memory_profile->create_child("GlobalMemory", true, false); + RuntimeProfile* metadata_memory_overview_profile = + tracked_memory_profile->create_child("MetadataMemory", true, false); + RuntimeProfile* cache_memory_overview_profile = + tracked_memory_profile->create_child("CacheMemory", true, false); RuntimeProfile* jemalloc_memory_profile = tracked_memory_profile->create_child("JemallocMemory", true, false); RuntimeProfile* jemalloc_memory_details_profile = jemalloc_memory_profile->create_child("Details", true, false); - // 2. add counter - // 2.1 add process memory counter - RuntimeProfile::Counter* process_physical_memory_current_usage_counter = - ADD_COUNTER(memory_overview_profile, "PhysicalMemory(VmRSS)", TUnit::BYTES); - RuntimeProfile::Counter* process_physical_memory_peak_usage_counter = - memory_overview_profile->AddHighWaterMarkCounter("PhysicalMemoryPeak", TUnit::BYTES); - RuntimeProfile::Counter* process_virtual_memory_current_usage_counter = - ADD_COUNTER(memory_overview_profile, "VirtualMemory(VmSize)", TUnit::BYTES); - RuntimeProfile::Counter* process_virtual_memory_peak_usage_counter = - memory_overview_profile->AddHighWaterMarkCounter("VirtualMemoryPeak", TUnit::BYTES); - - // 2.2 add untracked memory counter - RuntimeProfile::Counter* untracked_memory_current_usage_counter = - ADD_COUNTER(untracked_memory_profile, "CurrentUsage", TUnit::BYTES); - RuntimeProfile::Counter* untracked_memory_peak_usage_counter = - untracked_memory_profile->AddHighWaterMarkCounter("PeakUsage", TUnit::BYTES); - - // 2.3 add tracked memory counter - RuntimeProfile::Counter* tracked_memory_current_usage_counter = - ADD_COUNTER(tracked_memory_profile, "CurrentUsage", TUnit::BYTES); - RuntimeProfile::Counter* tracked_memory_peak_usage_counter = - tracked_memory_profile->AddHighWaterMarkCounter("PeakUsage", TUnit::BYTES); - - // 2.4 add jemalloc memory counter - RuntimeProfile::Counter* jemalloc_memory_current_usage_counter = - ADD_COUNTER(jemalloc_memory_profile, "CurrentUsage", TUnit::BYTES); - RuntimeProfile::Counter* jemalloc_memory_peak_usage_counter = - jemalloc_memory_profile->AddHighWaterMarkCounter("PeakUsage", TUnit::BYTES); - RuntimeProfile::Counter* jemalloc_cache_current_usage_counter = - ADD_COUNTER(jemalloc_memory_details_profile, "Cache", TUnit::BYTES); - RuntimeProfile::Counter* jemalloc_cache_peak_usage_counter = - jemalloc_memory_details_profile->AddHighWaterMarkCounter("CachePeak", TUnit::BYTES); - RuntimeProfile::Counter* jemalloc_metadata_current_usage_counter = - ADD_COUNTER(jemalloc_memory_details_profile, "Metadata", TUnit::BYTES); - RuntimeProfile::Counter* jemalloc_metadata_peak_usage_counter = - jemalloc_memory_details_profile->AddHighWaterMarkCounter("MetadataPeak", TUnit::BYTES); - - // 2.5 add global memory counter - RuntimeProfile::Counter* global_current_usage_counter = - ADD_COUNTER(global_memory_overview_profile, "CurrentUsage", TUnit::BYTES); - RuntimeProfile::Counter* global_peak_usage_counter = - global_memory_overview_profile->AddHighWaterMarkCounter("PeakUsage", TUnit::BYTES); - - // 2.6 add tasks memory counter - RuntimeProfile::Counter* tasks_memory_current_usage_counter = - ADD_COUNTER_WITH_LEVEL(tasks_memory_overview_profile, "CurrentUsage", TUnit::BYTES, 1); + // 1 add process memory counter + _process_physical_memory_usage_counter = _memory_overview_profile->AddHighWaterMarkCounter( + "PhysicalMemory(VmRSS)", TUnit::BYTES); + _process_virtual_memory_usage_counter = _memory_overview_profile->AddHighWaterMarkCounter( + "VirtualMemory(VmSize)", TUnit::BYTES); + + // 2 add untracked/tracked memory counter + _untracked_memory_usage_counter = + untracked_memory_profile->AddHighWaterMarkCounter("Memory", TUnit::BYTES); + _tracked_memory_usage_counter = + tracked_memory_profile->AddHighWaterMarkCounter("Memory", TUnit::BYTES); + + // 3 add Jemalloc memory counter + _jemalloc_memory_usage_counter = + jemalloc_memory_profile->AddHighWaterMarkCounter("Memory", TUnit::BYTES); + _jemalloc_cache_usage_counter = + jemalloc_memory_details_profile->AddHighWaterMarkCounter("Cache", TUnit::BYTES); + _jemalloc_metadata_usage_counter = + jemalloc_memory_details_profile->AddHighWaterMarkCounter("Metadata", TUnit::BYTES); + + // 4 add global/metadata/cache memory counter + _global_usage_counter = + global_memory_overview_profile->AddHighWaterMarkCounter("Memory", TUnit::BYTES); + _metadata_usage_counter = + metadata_memory_overview_profile->AddHighWaterMarkCounter("Memory", TUnit::BYTES); + _cache_usage_counter = + cache_memory_overview_profile->AddHighWaterMarkCounter("Memory", TUnit::BYTES); + + // 5 add tasks memory counter + _tasks_memory_usage_counter = + tasks_memory_overview_profile->AddHighWaterMarkCounter("Memory", TUnit::BYTES); // Reserved memory is the sum of all task reserved memory, is duplicated with all task memory counter. - RuntimeProfile::Counter* reserved_memory_current_usage_counter = ADD_CHILD_COUNTER_WITH_LEVEL( - tasks_memory_overview_profile, "ReservedMemory", TUnit::BYTES, "CurrentUsage", 1); - RuntimeProfile::Counter* reserved_memory_peak_usage_counter = - tasks_memory_overview_profile->AddHighWaterMarkCounter("ReservedMemoryPeak", - TUnit::BYTES, "CurrentUsage", 1); - RuntimeProfile::Counter* tasks_memory_peak_usage_counter = - tasks_memory_overview_profile->AddHighWaterMarkCounter("PeakUsage", TUnit::BYTES); - RuntimeProfile::Counter* query_current_usage_counter = - ADD_COUNTER_WITH_LEVEL(tasks_memory_overview_details_profile, "Query", TUnit::BYTES, 1); - RuntimeProfile::Counter* query_peak_usage_counter = - tasks_memory_overview_details_profile->AddHighWaterMarkCounter( - "QueryPeak", TUnit::BYTES, "Query", 1); - RuntimeProfile::Counter* load_current_usage_counter = - ADD_COUNTER_WITH_LEVEL(tasks_memory_overview_details_profile, "Load", TUnit::BYTES, 1); - RuntimeProfile::Counter* load_peak_usage_counter = - tasks_memory_overview_details_profile->AddHighWaterMarkCounter("LoadPeak", TUnit::BYTES, - "Load", 1); - RuntimeProfile::Counter* load_all_memtables_current_usage_counter = - ADD_CHILD_COUNTER_WITH_LEVEL(tasks_memory_overview_details_profile, - "AllMemTablesMemory", TUnit::BYTES, "Load", 1); - RuntimeProfile::Counter* load_all_memtables_peak_usage_counter = - ADD_CHILD_COUNTER_WITH_LEVEL(tasks_memory_overview_details_profile, - "AllMemTablesMemoryPeak", TUnit::BYTES, "Load", 1); - RuntimeProfile::Counter* compaction_current_usage_counter = ADD_COUNTER_WITH_LEVEL( - tasks_memory_overview_details_profile, "Compaction", TUnit::BYTES, 1); - RuntimeProfile::Counter* compaction_peak_usage_counter = - tasks_memory_overview_details_profile->AddHighWaterMarkCounter( - "CompactionPeak", TUnit::BYTES, "Compaction", 1); - RuntimeProfile::Counter* schema_change_current_usage_counter = ADD_COUNTER_WITH_LEVEL( - tasks_memory_overview_details_profile, "SchemaChange", TUnit::BYTES, 1); - RuntimeProfile::Counter* schema_change_peak_usage_counter = - tasks_memory_overview_details_profile->AddHighWaterMarkCounter( - "SchemaChangePeak", TUnit::BYTES, "SchemaChange", 1); - RuntimeProfile::Counter* other_current_usage_counter = - ADD_COUNTER_WITH_LEVEL(tasks_memory_overview_details_profile, "Other", TUnit::BYTES, 1); - RuntimeProfile::Counter* other_peak_usage_counter = - tasks_memory_overview_details_profile->AddHighWaterMarkCounter( - "OtherPeak", TUnit::BYTES, "Other", 1); - // 3. refresh counter - // 3.1 refresh process memory counter - COUNTER_SET(process_physical_memory_current_usage_counter, + _reserved_memory_usage_counter = tasks_memory_overview_profile->AddHighWaterMarkCounter( + "ReservedMemory", TUnit::BYTES, "Memory", 1); + _query_usage_counter = + tasks_memory_overview_details_profile->AddHighWaterMarkCounter("Query", TUnit::BYTES); + _load_usage_counter = + tasks_memory_overview_details_profile->AddHighWaterMarkCounter("Load", TUnit::BYTES); + _load_all_memtables_usage_counter = + tasks_memory_overview_details_profile->AddHighWaterMarkCounter("AllMemTablesMemory", + TUnit::BYTES, "Load", 1); + _compaction_usage_counter = tasks_memory_overview_details_profile->AddHighWaterMarkCounter( + "Compaction", TUnit::BYTES); + _schema_change_usage_counter = tasks_memory_overview_details_profile->AddHighWaterMarkCounter( + "SchemaChange", TUnit::BYTES); + _other_usage_counter = + tasks_memory_overview_details_profile->AddHighWaterMarkCounter("Other", TUnit::BYTES); +} + +void MemoryProfile::refresh_memory_overview_profile() { + // 1 create profile + std::unique_ptr<RuntimeProfile> global_memory_profile = + std::make_unique<RuntimeProfile>("GlobalMemorySnapshot"); + std::unique_ptr<RuntimeProfile> metadata_memory_profile = + std::make_unique<RuntimeProfile>("MetadataMemorySnapshot"); + std::unique_ptr<RuntimeProfile> cache_memory_profile = + std::make_unique<RuntimeProfile>("CacheMemorySnapshot"); + std::unique_ptr<RuntimeProfile> top_memory_tasks_profile = + std::make_unique<RuntimeProfile>("TopMemoryTasksSnapshot"); + + // 2 refresh process memory counter + COUNTER_SET(_process_physical_memory_usage_counter, PerfCounters::get_vm_rss()); // from /proc VmRSS VmHWM - COUNTER_SET(process_physical_memory_peak_usage_counter, PerfCounters::get_vm_hwm()); - COUNTER_SET(process_virtual_memory_current_usage_counter, + COUNTER_SET(_process_virtual_memory_usage_counter, PerfCounters::get_vm_size()); // from /proc VmSize VmPeak - COUNTER_SET(process_virtual_memory_peak_usage_counter, PerfCounters::get_vm_peak()); - // 3.2 refresh tracked memory counter + // 2 refresh metadata memory tracker + ExecEnv::GetInstance()->tablets_no_cache_mem_tracker()->set_consumption( + MetadataAdder<TabletMeta>::get_all_tablets_size() - + TabletSchemaCache::instance()->value_mem_consumption() - + SchemaCache::instance()->value_mem_consumption()); + ExecEnv::GetInstance()->rowsets_no_cache_mem_tracker()->set_consumption( + MetadataAdder<RowsetMeta>::get_all_rowsets_size()); + ExecEnv::GetInstance()->segments_no_cache_mem_tracker()->set_consumption( + MetadataAdder<segment_v2::Segment>::get_all_segments_estimate_size() - + SegmentLoader::instance()->cache_mem_usage()); + + // 4 refresh tracked memory counter std::unordered_map<MemTrackerLimiter::Type, int64_t> type_mem_sum = { {MemTrackerLimiter::Type::GLOBAL, 0}, {MemTrackerLimiter::Type::QUERY, 0}, {MemTrackerLimiter::Type::LOAD, 0}, {MemTrackerLimiter::Type::COMPACTION, 0}, - {MemTrackerLimiter::Type::SCHEMA_CHANGE, 0}, {MemTrackerLimiter::Type::OTHER, 0}}; + {MemTrackerLimiter::Type::SCHEMA_CHANGE, 0}, {MemTrackerLimiter::Type::METADATA, 0}, + {MemTrackerLimiter::Type::CACHE, 0}, {MemTrackerLimiter::Type::OTHER, 0}}; // always ExecEnv::ready(), because Daemon::_stop_background_threads_latch for (auto& group : ExecEnv::GetInstance()->mem_tracker_limiter_pool) { std::lock_guard<std::mutex> l(group.group_lock); @@ -191,42 +179,46 @@ void MemoryProfile::refresh_memory_overview_profile() { all_tracked_mem_sum += it.second; switch (it.first) { case MemTrackerLimiter::Type::GLOBAL: - COUNTER_SET(global_current_usage_counter, it.second); - COUNTER_SET(global_peak_usage_counter, it.second); + COUNTER_SET(_global_usage_counter, it.second); memory_global_trackers_sum_bytes << it.second - memory_global_trackers_sum_bytes.get_value(); break; case MemTrackerLimiter::Type::QUERY: - COUNTER_SET(query_current_usage_counter, it.second); - COUNTER_SET(query_peak_usage_counter, it.second); + COUNTER_SET(_query_usage_counter, it.second); tasks_trackers_mem_sum += it.second; memory_query_trackers_sum_bytes << it.second - memory_query_trackers_sum_bytes.get_value(); break; case MemTrackerLimiter::Type::LOAD: - COUNTER_SET(load_current_usage_counter, it.second); - COUNTER_SET(load_peak_usage_counter, it.second); + COUNTER_SET(_load_usage_counter, it.second); tasks_trackers_mem_sum += it.second; memory_load_trackers_sum_bytes << it.second - memory_load_trackers_sum_bytes.get_value(); break; case MemTrackerLimiter::Type::COMPACTION: - COUNTER_SET(compaction_current_usage_counter, it.second); - COUNTER_SET(compaction_peak_usage_counter, it.second); + COUNTER_SET(_compaction_usage_counter, it.second); tasks_trackers_mem_sum += it.second; memory_compaction_trackers_sum_bytes << it.second - memory_compaction_trackers_sum_bytes.get_value(); break; case MemTrackerLimiter::Type::SCHEMA_CHANGE: - COUNTER_SET(schema_change_current_usage_counter, it.second); - COUNTER_SET(schema_change_peak_usage_counter, it.second); + COUNTER_SET(_schema_change_usage_counter, it.second); tasks_trackers_mem_sum += it.second; memory_schema_change_trackers_sum_bytes << it.second - memory_schema_change_trackers_sum_bytes.get_value(); break; + case MemTrackerLimiter::Type::METADATA: + COUNTER_SET(_metadata_usage_counter, it.second); + memory_metadata_trackers_sum_bytes + << it.second - memory_metadata_trackers_sum_bytes.get_value(); + break; + case MemTrackerLimiter::Type::CACHE: + COUNTER_SET(_cache_usage_counter, it.second); + memory_cache_trackers_sum_bytes + << it.second - memory_cache_trackers_sum_bytes.get_value(); + break; case MemTrackerLimiter::Type::OTHER: - COUNTER_SET(other_current_usage_counter, it.second); - COUNTER_SET(other_peak_usage_counter, it.second); + COUNTER_SET(_other_usage_counter, it.second); tasks_trackers_mem_sum += it.second; memory_other_trackers_sum_bytes << it.second - memory_other_trackers_sum_bytes.get_value(); @@ -235,60 +227,52 @@ void MemoryProfile::refresh_memory_overview_profile() { MemTrackerLimiter::make_type_trackers_profile(global_memory_profile.get(), MemTrackerLimiter::Type::GLOBAL); + MemTrackerLimiter::make_type_trackers_profile(metadata_memory_profile.get(), + MemTrackerLimiter::Type::METADATA); + MemTrackerLimiter::make_type_trackers_profile(cache_memory_profile.get(), + MemTrackerLimiter::Type::CACHE); MemTrackerLimiter::make_top_consumption_tasks_tracker_profile(top_memory_tasks_profile.get(), 15); - COUNTER_SET(tasks_memory_current_usage_counter, tasks_trackers_mem_sum); - COUNTER_SET(tasks_memory_peak_usage_counter, tasks_trackers_mem_sum); + COUNTER_SET(_tasks_memory_usage_counter, tasks_trackers_mem_sum); memory_all_tasks_memory_bytes << tasks_trackers_mem_sum - memory_all_tasks_memory_bytes.get_value(); - COUNTER_SET(reserved_memory_current_usage_counter, - GlobalMemoryArbitrator::process_reserved_memory()); - COUNTER_SET(reserved_memory_peak_usage_counter, - GlobalMemoryArbitrator::process_reserved_memory()); + COUNTER_SET(_reserved_memory_usage_counter, GlobalMemoryArbitrator::process_reserved_memory()); memory_reserved_memory_bytes << GlobalMemoryArbitrator::process_reserved_memory() - memory_reserved_memory_bytes.get_value(); all_tracked_mem_sum += MemInfo::allocator_cache_mem(); - COUNTER_SET(jemalloc_cache_current_usage_counter, - static_cast<int64_t>(MemInfo::allocator_cache_mem())); - COUNTER_SET(jemalloc_cache_peak_usage_counter, + COUNTER_SET(_jemalloc_cache_usage_counter, static_cast<int64_t>(MemInfo::allocator_cache_mem())); all_tracked_mem_sum += MemInfo::allocator_metadata_mem(); - COUNTER_SET(jemalloc_metadata_current_usage_counter, - static_cast<int64_t>(MemInfo::allocator_metadata_mem())); - COUNTER_SET(jemalloc_metadata_peak_usage_counter, + COUNTER_SET(_jemalloc_metadata_usage_counter, static_cast<int64_t>(MemInfo::allocator_metadata_mem())); - COUNTER_SET(jemalloc_memory_current_usage_counter, - jemalloc_cache_current_usage_counter->value() + - jemalloc_metadata_current_usage_counter->value()); - COUNTER_SET(jemalloc_memory_peak_usage_counter, - jemalloc_cache_current_usage_counter->value() + - jemalloc_metadata_current_usage_counter->value()); - - COUNTER_SET(tracked_memory_current_usage_counter, all_tracked_mem_sum); - COUNTER_SET(tracked_memory_peak_usage_counter, all_tracked_mem_sum); + COUNTER_SET(_jemalloc_memory_usage_counter, + _jemalloc_cache_usage_counter->current_value() + + _jemalloc_metadata_usage_counter->current_value()); + + COUNTER_SET(_tracked_memory_usage_counter, all_tracked_mem_sum); memory_all_tracked_sum_bytes << all_tracked_mem_sum - memory_all_tracked_sum_bytes.get_value(); - // 3.3 refresh untracked memory counter + // 5 refresh untracked memory counter int64_t untracked_memory = - process_physical_memory_current_usage_counter->value() - all_tracked_mem_sum; - COUNTER_SET(untracked_memory_current_usage_counter, untracked_memory); - COUNTER_SET(untracked_memory_peak_usage_counter, untracked_memory); + _process_physical_memory_usage_counter->current_value() - all_tracked_mem_sum; + COUNTER_SET(_untracked_memory_usage_counter, untracked_memory); memory_untracked_memory_bytes << untracked_memory - memory_untracked_memory_bytes.get_value(); - // 3.4 refresh additional tracker printed when memory exceeds limit. - COUNTER_SET(load_all_memtables_current_usage_counter, - ExecEnv::GetInstance()->memtable_memory_limiter()->mem_tracker()->consumption()); + // 6 refresh additional tracker printed when memory exceeds limit. COUNTER_SET( - load_all_memtables_peak_usage_counter, + _load_all_memtables_usage_counter, ExecEnv::GetInstance()->memtable_memory_limiter()->mem_tracker()->peak_consumption()); + COUNTER_SET(_load_all_memtables_usage_counter, + ExecEnv::GetInstance()->memtable_memory_limiter()->mem_tracker()->consumption()); - // 4. reset profile - _memory_overview_profile.set(std::move(memory_overview_profile)); + // 7. reset profile _global_memory_profile.set(std::move(global_memory_profile)); + _metadata_memory_profile.set(std::move(metadata_memory_profile)); + _cache_memory_profile.set(std::move(cache_memory_profile)); _top_memory_tasks_profile.set(std::move(top_memory_tasks_profile)); } @@ -302,16 +286,25 @@ void MemoryProfile::refresh_tasks_memory_profile() { void MemoryProfile::make_memory_profile(RuntimeProfile* profile) const { RuntimeProfile* memory_profile_snapshot = profile->create_child("MemoryProfile", true, false); - auto memory_overview_version_ptr = _memory_overview_profile.get(); RuntimeProfile* memory_overview_profile = - memory_profile_snapshot->create_child(memory_overview_version_ptr->name(), true, false); - memory_overview_profile->merge(const_cast<RuntimeProfile*>(memory_overview_version_ptr.get())); + memory_profile_snapshot->create_child(_memory_overview_profile->name(), true, false); + memory_overview_profile->merge(const_cast<RuntimeProfile*>(_memory_overview_profile.get())); auto global_memory_version_ptr = _global_memory_profile.get(); RuntimeProfile* global_memory_profile = memory_profile_snapshot->create_child(global_memory_version_ptr->name(), true, false); global_memory_profile->merge(const_cast<RuntimeProfile*>(global_memory_version_ptr.get())); + auto metadata_memory_version_ptr = _metadata_memory_profile.get(); + RuntimeProfile* metadata_memory_profile = + memory_profile_snapshot->create_child(metadata_memory_version_ptr->name(), true, false); + metadata_memory_profile->merge(const_cast<RuntimeProfile*>(metadata_memory_version_ptr.get())); + + auto cache_memory_version_ptr = _cache_memory_profile.get(); + RuntimeProfile* cache_memory_profile = + memory_profile_snapshot->create_child(cache_memory_version_ptr->name(), true, false); + cache_memory_profile->merge(const_cast<RuntimeProfile*>(cache_memory_version_ptr.get())); + auto top_memory_tasks_version_ptr = _top_memory_tasks_profile.get(); RuntimeProfile* top_memory_tasks_profile = memory_profile_snapshot->create_child( top_memory_tasks_version_ptr->name(), true, false); @@ -346,6 +339,8 @@ void MemoryProfile::print_log_process_usage() { LOG(WARNING) << "Process Memory Summary: " + GlobalMemoryArbitrator::process_mem_log_str(); LOG(WARNING) << "\n" << print_memory_overview_profile(); LOG(WARNING) << "\n" << print_global_memory_profile(); + LOG(WARNING) << "\n" << print_metadata_memory_profile(); + LOG(WARNING) << "\n" << print_cache_memory_profile(); LOG(WARNING) << "\n" << print_top_memory_tasks_profile(); } } diff --git a/be/src/runtime/memory/memory_profile.h b/be/src/runtime/memory/memory_profile.h index 9f1bab0c02a..c6aefb72f22 100644 --- a/be/src/runtime/memory/memory_profile.h +++ b/be/src/runtime/memory/memory_profile.h @@ -33,31 +33,27 @@ public: void make_memory_profile(RuntimeProfile* profile) const; std::string print_memory_overview_profile() const { - std::stringstream ss; - auto version_ptr = _memory_overview_profile.get(); - version_ptr->pretty_print(&ss); - return ss.str(); + return return_memory_profile_str(_memory_overview_profile.get()); } std::string print_global_memory_profile() const { - std::stringstream ss; - auto version_ptr = _global_memory_profile.get(); - version_ptr->pretty_print(&ss); - return ss.str(); + return return_memory_profile_str(_global_memory_profile.get().get()); + } + + std::string print_metadata_memory_profile() const { + return return_memory_profile_str(_metadata_memory_profile.get().get()); + } + + std::string print_cache_memory_profile() const { + return return_memory_profile_str(_cache_memory_profile.get().get()); } std::string print_top_memory_tasks_profile() const { - std::stringstream ss; - auto version_ptr = _top_memory_tasks_profile.get(); - version_ptr->pretty_print(&ss); - return ss.str(); + return return_memory_profile_str(_top_memory_tasks_profile.get().get()); } std::string print_tasks_memory_profile() const { - std::stringstream ss; - auto version_ptr = _tasks_memory_profile.get(); - version_ptr->pretty_print(&ss); - return ss.str(); + return return_memory_profile_str(_tasks_memory_profile.get().get()); } static int64_t query_current_usage(); @@ -71,11 +67,50 @@ public: void print_log_process_usage(); private: - MultiVersion<RuntimeProfile> _memory_overview_profile; + std::string return_memory_profile_str(const RuntimeProfile* profile) const { + std::stringstream ss; + profile->pretty_print(&ss); + return ss.str(); + } + + void init_memory_overview_counter(); + + std::unique_ptr<RuntimeProfile> _memory_overview_profile; MultiVersion<RuntimeProfile> _global_memory_profile; + MultiVersion<RuntimeProfile> _metadata_memory_profile; + MultiVersion<RuntimeProfile> _cache_memory_profile; MultiVersion<RuntimeProfile> _top_memory_tasks_profile; MultiVersion<RuntimeProfile> _tasks_memory_profile; + // process memory counter + RuntimeProfile::HighWaterMarkCounter* _process_physical_memory_usage_counter; + RuntimeProfile::HighWaterMarkCounter* _process_virtual_memory_usage_counter; + + // untracked/tracked memory counter + RuntimeProfile::HighWaterMarkCounter* _untracked_memory_usage_counter; + RuntimeProfile::HighWaterMarkCounter* _tracked_memory_usage_counter; + + // Jemalloc memory counter + RuntimeProfile::HighWaterMarkCounter* _jemalloc_memory_usage_counter; + RuntimeProfile::HighWaterMarkCounter* _jemalloc_cache_usage_counter; + RuntimeProfile::HighWaterMarkCounter* _jemalloc_metadata_usage_counter; + + // global/metadata/cache memory counter + RuntimeProfile::HighWaterMarkCounter* _global_usage_counter; + RuntimeProfile::HighWaterMarkCounter* _metadata_usage_counter; + RuntimeProfile::HighWaterMarkCounter* _cache_usage_counter; + + // tasks memory counter + RuntimeProfile::HighWaterMarkCounter* _tasks_memory_usage_counter; + // reserved memory is the sum of all task reserved memory, is duplicated with all task memory counter. + RuntimeProfile::HighWaterMarkCounter* _reserved_memory_usage_counter; + RuntimeProfile::HighWaterMarkCounter* _query_usage_counter; + RuntimeProfile::HighWaterMarkCounter* _load_usage_counter; + RuntimeProfile::HighWaterMarkCounter* _load_all_memtables_usage_counter; + RuntimeProfile::HighWaterMarkCounter* _compaction_usage_counter; + RuntimeProfile::HighWaterMarkCounter* _schema_change_usage_counter; + RuntimeProfile::HighWaterMarkCounter* _other_usage_counter; + std::atomic<bool> _enable_print_log_process_usage {true}; }; diff --git a/be/src/util/runtime_profile.cpp b/be/src/util/runtime_profile.cpp index 45db607a342..8b9e97bfc9c 100644 --- a/be/src/util/runtime_profile.cpp +++ b/be/src/util/runtime_profile.cpp @@ -28,6 +28,7 @@ #include <algorithm> #include <iomanip> #include <iostream> +#include <type_traits> #include "common/object_pool.h" #include "util/container_util.hpp" @@ -72,8 +73,7 @@ void RuntimeProfile::merge(RuntimeProfile* other) { dst_iter = _counter_map.find(src_iter->first); if (dst_iter == _counter_map.end()) { - _counter_map[src_iter->first] = _pool->add( - new Counter(src_iter->second->type(), src_iter->second->value())); + _counter_map[src_iter->first] = _pool->add(src_iter->second->clone()); } else { DCHECK(dst_iter->second->type() == src_iter->second->type()); diff --git a/be/src/util/runtime_profile.h b/be/src/util/runtime_profile.h index 6e393ac673a..7130acbd2f9 100644 --- a/be/src/util/runtime_profile.h +++ b/be/src/util/runtime_profile.h @@ -100,6 +100,8 @@ public: : _value(value), _type(type), _level(level) {} virtual ~Counter() = default; + virtual Counter* clone() const { return new Counter(type(), value(), _level); } + virtual void update(int64_t delta) { _value.fetch_add(delta, std::memory_order_relaxed); } void bit_or(int64_t delta) { _value.fetch_or(delta, std::memory_order_relaxed); } @@ -137,7 +139,7 @@ public: TUnit::type type() const { return _type; } - virtual int64_t level() { return _level; } + virtual int64_t level() const { return _level; } private: friend class RuntimeProfile; @@ -151,8 +153,16 @@ public: /// as value()) and the current value. class HighWaterMarkCounter : public Counter { public: - HighWaterMarkCounter(TUnit::type unit, int64_t level, const std::string& parent_name) - : Counter(unit, 0, level), current_value_(0), _parent_name(parent_name) {} + HighWaterMarkCounter(TUnit::type unit, int64_t level, const std::string& parent_name, + int64_t value = 0, int64_t current_value = 0) + : Counter(unit, value, level), + current_value_(current_value), + _parent_name(parent_name) {} + + virtual Counter* clone() const override { + return new HighWaterMarkCounter(type(), level(), parent_name(), value(), + current_value()); + } void add(int64_t delta) { current_value_.fetch_add(delta, std::memory_order_relaxed); @@ -188,10 +198,9 @@ public: virtual void pretty_print(std::ostream* s, const std::string& prefix, const std::string& name) const override { std::ostream& stream = *s; - stream << prefix << " - " << name << ": " - << PrettyPrinter::print(current_value(), type()) << std::endl; - stream << prefix << " - " << name << "Peak: " - << PrettyPrinter::print(_value.load(std::memory_order_relaxed), type()) + stream << prefix << " - " << name + << " Current: " << PrettyPrinter::print(current_value(), type()) << " (Peak: " + << PrettyPrinter::print(_value.load(std::memory_order_relaxed), type()) << ")" << std::endl; } @@ -217,6 +226,8 @@ public: int64_t current_value() const { return current_value_.load(std::memory_order_relaxed); } + std::string parent_name() const { return _parent_name; } + private: /// Set '_value' to 'v' if 'v' is larger than '_value'. The entire operation is /// atomic. @@ -247,8 +258,13 @@ public: // Do not call Set() and Update(). class DerivedCounter : public Counter { public: - DerivedCounter(TUnit::type type, const DerivedCounterFunction& counter_fn) - : Counter(type, 0), _counter_fn(counter_fn) {} + DerivedCounter(TUnit::type type, const DerivedCounterFunction& counter_fn, + int64_t value = 0, int64_t level = 1) + : Counter(type, value, level), _counter_fn(counter_fn) {} + + virtual Counter* clone() const override { + return new DerivedCounter(type(), _counter_fn, value(), level()); + } int64_t value() const override { return _counter_fn(); } @@ -259,8 +275,13 @@ public: // NonZeroCounter will not be converted to Thrift if the value is 0. class NonZeroCounter : public Counter { public: - NonZeroCounter(TUnit::type type, int64_t level, const std::string& parent_name) - : Counter(type, 0, level), _parent_name(parent_name) {} + NonZeroCounter(TUnit::type type, int64_t level, const std::string& parent_name, + int64_t value = 0) + : Counter(type, value, level), _parent_name(parent_name) {} + + virtual Counter* clone() const override { + return new NonZeroCounter(type(), level(), parent_name(), value()); + } void to_thrift(const std::string& name, std::vector<TCounter>& tcounters, std::map<std::string, std::set<std::string>>& child_counters_map) override { @@ -272,6 +293,8 @@ public: } } + std::string parent_name() const { return _parent_name; } + private: const std::string _parent_name; }; diff --git a/be/src/vec/runtime/partitioner.h b/be/src/vec/runtime/partitioner.h index 39ed0e89941..d5492c3be87 100644 --- a/be/src/vec/runtime/partitioner.h +++ b/be/src/vec/runtime/partitioner.h @@ -23,7 +23,6 @@ namespace doris { #include "common/compile_check_begin.h" -class MemTracker; namespace vectorized { diff --git a/be/src/vec/sink/vdata_stream_sender.h b/be/src/vec/sink/vdata_stream_sender.h index 5fe35e4da11..4999602fdf4 100644 --- a/be/src/vec/sink/vdata_stream_sender.h +++ b/be/src/vec/sink/vdata_stream_sender.h @@ -56,7 +56,6 @@ namespace doris { #include "common/compile_check_begin.h" class ObjectPool; class RuntimeState; -class MemTracker; class RowDescriptor; class TDataSink; class TDataStreamSink; --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org