This is an automated email from the ASF dual-hosted git repository.

zouxinyi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new bb96c24dc77 [fix](memory) Fix metadata memory tracking and profile 
(#44739)
bb96c24dc77 is described below

commit bb96c24dc771704cec2960db348261a2a26d8550
Author: Xinyi Zou <zouxi...@selectdb.com>
AuthorDate: Wed Dec 4 10:43:56 2024 +0800

    [fix](memory) Fix metadata memory tracking and profile (#44739)
    
    ### What problem does this PR solve?
    
    Problem Summary:
    
    1. `MemTrackerLimiter` and `MemoryProfile` add metadata and cache type.
    2. metadata memory consists of rowsets, segments, and tablets.
    3. `all_segments_mem_tracker` excludes the segments in SegmentCache, and
    all segments use the same memory calculation method.
    4. segments memory is estimated, which is not accurate and can only be
    used as a reference.
    5. rowsets and tablets only count part of the memory, mainly PB.
    
    TODO, Metadata also has some memory that can be allocated using Doris
    Allocator.
    
    ```
    MemoryProfile:
      MemoryOverviewSnapshot:
         - PhysicalMemory(VmRSS) Current: 5.85 GB (Peak: 7.13 GB)
         - VirtualMemory(VmSize) Current: 61.35 GB (Peak: 61.67 GB)
        UntrackedMemory:
           - Memory Current: 3.61 GB (Peak: 3.77 GB)
        TrackedMemory:
           - Memory Current: 2.24 GB (Peak: 3.48 GB)
          TasksMemory:
             - Memory Current: 0 (Peak: 749.17 MB)
               - ReservedMemory Current: 0 (Peak: 0)
            Details:
               - Compaction Current: 0 (Peak: 13.60 MB)
               - Load Current: 0 (Peak: 749.17 MB)
                 - AllMemTablesMemory Current: 0 (Peak: 853.20 MB)
               - Other Current: 0 (Peak: 0)
               - Query Current: 0 (Peak: 65.20 MB)
               - SchemaChange Current: 0 (Peak: 0)
          GlobalMemory:
             - Memory Current: 55.03 MB (Peak: 70.39 MB)
          MetadataMemory:
             - Memory Current: 972.13 MB (Peak: 972.13 MB)
          CacheMemory:
             - Memory Current: 360.21 MB (Peak: 675.32 MB)
          JemallocMemory:
             - Memory Current: 905.55 MB (Peak: 1.80 GB)
            Details:
               - Cache Current: 646.47 MB (Peak: 1.59 GB)
               - Metadata Current: 259.08 MB (Peak: 259.72 MB)
      GlobalMemorySnapshot:
        Orphan@global@id=3144f940fa84f930-d82d459289789494:
           - Memory Current: 1.73 MB (Peak: 1.73 MB)
        IOBufBlockMemory@global@id=1f4316bc5fb4c3d4-73f14d3652ed269b:
           - Memory Current: 2.65 GB (Peak: 2.65 GB)
        PointQueryExecutor@global@id=794878deaa4d8f56-769fbc87ccb7ec8d:
           - Memory Current: 36.90 MB (Peak: 36.90 MB)
        BlockCompression@global@id=0a4d429b1915dd8b-c2e429b1186d69a8:
           - Memory Current: 563.01 MB (Peak: 563.01 MB)
        RowIdStorageReader@global@id=a546ca5878c9204e-c4c72ea9088ca9a0:
           - Memory Current: 101.25 KB (Peak: 101.25 KB)
        SubcolumnsTree@global@id=294bde93c01d4ba6-4ce7b3290489a1b8:
           - Memory Current: 95.61 MB (Peak: 95.61 MB)
        S3FileBuffer@global@id=bc437d624d9a234d-03eb4bbc3d030a99:
           - Memory Current: 0 (Peak: 0)
      MetadataMemorySnapshot:
        Tablets(not in SchemaCache, 
TabletSchemaCache)@metadata@id=7646d676ad9f8de9-98beb5f1a8aa739b:
           - Memory Current: 26.84 GB (Peak: 26.84 GB)
        Segments(not in 
SegmentCache)@metadata@id=864b4887722b327a-516c912d4602b59b:
           - Memory Current: 4.98 MB (Peak: 4.98 MB)
        Rowsets@metadata@id=f0440aa247730497-752a527c71e3c393:
           - Memory Current: 5.57 GB (Peak: 5.57 GB)
        SegmentCache[size]@metadata@id=b34e798db2c25109-04c7ff3c722e4cbb:
           - Memory Current: 4.86 GB (Peak: 4.86 GB)
        SchemaCache[number]@metadata@id=9f41f61531da3fd9-163834a6a6f83eba:
           - Memory Current: 103.38 MB (Peak: 103.38 MB)
        TabletSchemaCache[number]@metadata@id=f9475c16ba7ea1ab-1d28b38568ef0b8d:
           - Memory Current: 170.65 MB (Peak: 170.65 MB)
      CacheMemorySnapshot:
        QueryCache@cache@id=504af426a68103cf-a60f36a48bc1998a:
           - Memory Current: 0 (Peak: 0)
        DataPageCache[size]@cache@id=f744bb4ef53c307b-4c021a8f9b8a77a5:
           - Memory Current: 14.33 GB (Peak: 14.33 GB)
        IndexPageCache[size]@cache@id=cb4ed0e456f081d4-d635edda9aa2778b:
           - Memory Current: 5.25 GB (Peak: 5.25 GB)
        PKIndexPageCache[size]@cache@id=8e4bf12b4d39133a-e0d878a18d73619b:
           - Memory Current: 2.72 GB (Peak: 2.72 GB)
        PointQueryRowCache[size]@cache@id=024dc624c80205f1-ca3221a154e4b68a:
           - Memory Current: 94.84 KB (Peak: 94.84 KB)
        CommonObjLRUCache[number]@cache@id=af40d4a03b391540-43338381c8e826a4:
           - Memory Current: 0 (Peak: 0)
        
PointQueryLookupConnectionCache[number]@cache@id=284fa848dd97ccc8-56c61386a4950784:
           - Memory Current: 337.97 KB (Peak: 337.97 KB)
        
InvertedIndexSearcherCache[size]@cache@id=f5404416ad0a9465-3cf12f5224276d8a:
           - Memory Current: 111.52 MB (Peak: 111.52 MB)
        
InvertedIndexQueryCache[size]@cache@id=6f4d3c55508e3531-3fafe328c6d3e1ab:
           - Memory Current: 51.97 MB (Peak: 51.97 MB)
        QueryCache[size]@cache@id=274d47670e0796ad-542ad958334ff988:
           - Memory Current: 0 (Peak: 0)
        
LastSuccessChannelCache[size]@cache@id=08449badfd6abb81-5b30599bfc60f79f:
           - Memory Current: 0 (Peak: 0)
        
TabletColumnObjectPool[number]@cache@id=7b42da8391945265-06697f83a6bf01a7:
           - Memory Current: 151.67 MB (Peak: 151.67 MB)
        
MowTabletVersionCache[number]@cache@id=f345a1adbd2a87dd-74b9caa3b30cf18b:
           - Memory Current: 150.94 MB (Peak: 150.94 MB)
        
CreateTabletRRIdxCache[number]@cache@id=f043eee91e9aef2d-ef3a2245d50545ae:
           - Memory Current: 15.20 MB (Peak: 15.20 MB)
        
MowDeleteBitmapAggCache[size]@cache@id=df47a8ed5bf3f11f-ac944d7d7e79d4b3:
           - Memory Current: 181.11 MB (Peak: 181.11 MB)
      TopMemoryTasksSnapshot:
        SnapshotManager@other_task@id=11480ad6017368b9-8aef2150427d229f:
           - Memory Current: 0 (Peak: 0)
        StreamLoadPipe@load@id=024958077445f4f7-d739fd5231dca890:
           - Memory Current: 1.39 GB (Peak: 1.39 GB)
        SegCompaction@compaction@id=2d4d5c796a9f48ee-fdf5dd9f8faf83a8:
           - Memory Current: 0 (Peak: 0)
        
Load#Id=544468c7f5a9bac6-2c78fc9962f3908d@load@id=52497774c12c5ad1-1fda35667a3d0e8b:
           - Limit: 2.00 GB
           - Memory Current: 6.06 MB (Peak: 6.06 MB)
        
Load#Id=624e7947099ef183-577a22cca9d34d89@load@id=ed4691ce5a748ca8-e49ef7cc9be10b95:
           - Limit: 2.00 GB
           - Memory Current: 6.06 MB (Peak: 6.06 MB)
     ```
---
 be/src/common/daemon.cpp                           |  11 +-
 be/src/olap/delta_writer_v2.h                      |   1 -
 be/src/olap/memtable_writer.h                      |   1 -
 be/src/olap/metadata_adder.h                       |  73 +++--
 .../rowset/segment_v2/indexed_column_reader.cpp    |   5 +-
 .../olap/rowset/segment_v2/indexed_column_reader.h |  15 +-
 be/src/olap/rowset/segment_v2/page_handle.h        |  12 +-
 be/src/olap/rowset/segment_v2/segment.cpp          |  17 +-
 be/src/olap/rowset/segment_v2/segment.h            |   7 +-
 be/src/olap/rowset_builder.h                       |   1 -
 be/src/olap/segment_loader.cpp                     |   3 +-
 be/src/olap/segment_loader.h                       |  13 +-
 be/src/olap/tablet_manager.cpp                     |  19 +-
 be/src/olap/tablet_manager.h                       |   3 -
 be/src/olap/tablet_meta.cpp                        |   6 -
 be/src/olap/tablet_meta.h                          |   4 -
 be/src/olap/tablet_schema.cpp                      |   2 -
 be/src/olap/tablet_schema.h                        |   7 +-
 be/src/olap/tablet_schema_cache.cpp                |   2 +-
 be/src/runtime/exec_env.h                          |  16 +-
 be/src/runtime/exec_env_init.cpp                   |  13 +-
 be/src/runtime/memory/cache_policy.h               |   6 +
 be/src/runtime/memory/lru_cache_policy.h           |  38 ++-
 be/src/runtime/memory/lru_cache_value_base.h       |  11 +-
 be/src/runtime/memory/mem_tracker_limiter.cpp      |  58 ++--
 be/src/runtime/memory/mem_tracker_limiter.h        |  14 +-
 be/src/runtime/memory/memory_profile.cpp           | 305 ++++++++++-----------
 be/src/runtime/memory/memory_profile.h             |  69 +++--
 be/src/util/runtime_profile.cpp                    |   4 +-
 be/src/util/runtime_profile.h                      |  45 ++-
 be/src/vec/runtime/partitioner.h                   |   1 -
 be/src/vec/sink/vdata_stream_sender.h              |   1 -
 32 files changed, 464 insertions(+), 319 deletions(-)

diff --git a/be/src/common/daemon.cpp b/be/src/common/daemon.cpp
index ce2a6878dba..d3d55f10dde 100644
--- a/be/src/common/daemon.cpp
+++ b/be/src/common/daemon.cpp
@@ -500,15 +500,18 @@ void Daemon::cache_adjust_capacity_thread() {
 void Daemon::cache_prune_stale_thread() {
     int32_t interval = config::cache_periodic_prune_stale_sweep_sec;
     while 
(!_stop_background_threads_latch.wait_for(std::chrono::seconds(interval))) {
-        if (interval <= 0) {
-            LOG(WARNING) << "config of cache clean interval is illegal: [" << 
interval
-                         << "], force set to 3600 ";
-            interval = 3600;
+        if (config::cache_periodic_prune_stale_sweep_sec <= 0) {
+            LOG(WARNING) << "config of cache clean interval is: [" << interval
+                         << "], it means the cache prune stale thread is 
disabled, will wait 3s "
+                            "and check again.";
+            interval = 3;
+            continue;
         }
         if (config::disable_memory_gc) {
             continue;
         }
         CacheManager::instance()->for_each_cache_prune_stale();
+        interval = config::cache_periodic_prune_stale_sweep_sec;
     }
 }
 
diff --git a/be/src/olap/delta_writer_v2.h b/be/src/olap/delta_writer_v2.h
index beeb3d3ecd3..f9c2800a68f 100644
--- a/be/src/olap/delta_writer_v2.h
+++ b/be/src/olap/delta_writer_v2.h
@@ -46,7 +46,6 @@ namespace doris {
 
 class FlushToken;
 class MemTable;
-class MemTracker;
 class Schema;
 class StorageEngine;
 class TupleDescriptor;
diff --git a/be/src/olap/memtable_writer.h b/be/src/olap/memtable_writer.h
index fb07e740fa3..713400793a1 100644
--- a/be/src/olap/memtable_writer.h
+++ b/be/src/olap/memtable_writer.h
@@ -45,7 +45,6 @@ namespace doris {
 
 class FlushToken;
 class MemTable;
-class MemTracker;
 class StorageEngine;
 class TupleDescriptor;
 class SlotDescriptor;
diff --git a/be/src/olap/metadata_adder.h b/be/src/olap/metadata_adder.h
index 559c5db873b..5b5ba163224 100644
--- a/be/src/olap/metadata_adder.h
+++ b/be/src/olap/metadata_adder.h
@@ -20,6 +20,8 @@
 #include <bvar/bvar.h>
 #include <stdint.h>
 
+#include "runtime/exec_env.h"
+#include "runtime/memory/mem_tracker_limiter.h"
 #include "util/runtime_profile.h"
 
 namespace doris {
@@ -27,8 +29,8 @@ namespace doris {
 inline bvar::Adder<int64_t> 
g_rowset_meta_mem_bytes("doris_rowset_meta_mem_bytes");
 inline bvar::Adder<int64_t> g_rowset_meta_num("doris_rowset_meta_num");
 
-inline bvar::Adder<int64_t> 
g_all_rowsets_mem_bytes("doris_all_rowsets_mem_bytes");
-inline bvar::Adder<int64_t> g_all_rowsets_num("doris_all_rowsets_num");
+inline bvar::Adder<int64_t> g_rowset_mem_bytes("doris_rowset_mem_bytes");
+inline bvar::Adder<int64_t> g_rowset_num("doris_rowset_num");
 
 inline bvar::Adder<int64_t> 
g_tablet_meta_mem_bytes("doris_tablet_meta_mem_bytes");
 inline bvar::Adder<int64_t> g_tablet_meta_num("doris_tablet_meta_num");
@@ -42,8 +44,9 @@ inline bvar::Adder<int64_t> 
g_tablet_index_num("doris_tablet_index_num");
 inline bvar::Adder<int64_t> 
g_tablet_schema_mem_bytes("doris_tablet_schema_mem_bytes");
 inline bvar::Adder<int64_t> g_tablet_schema_num("doris_tablet_schema_num");
 
-inline bvar::Adder<int64_t> 
g_all_segments_mem_bytes("doris_all_segments_mem_bytes");
-inline bvar::Adder<int64_t> g_all_segments_num("doris_all_segments_num");
+inline bvar::Adder<int64_t> g_segment_mem_bytes("doris_segment_mem_bytes");
+inline bvar::Adder<int64_t> g_segment_num("doris_segment_num");
+inline bvar::Adder<int64_t> 
g_segment_estimate_mem_bytes("doris_segment_estimate_mem_bytes");
 
 inline bvar::Adder<int64_t> 
g_column_reader_mem_bytes("doris_column_reader_mem_bytes");
 inline bvar::Adder<int64_t> g_column_reader_num("doris_column_reader_num");
@@ -96,6 +99,10 @@ class ZoneMapIndexReader;
     When a derived Class extends MetadataAdder, then the Class's number and 
fixed length field's memory can be counted automatically.
     But if the Class has variable length field, then you should overwrite 
get_metadata_size and call update_metadata_size when the Class's memory changes.
 
+    get_metadata_size is only the memory of the metadata object itself, not 
include child objects,
+    for example, TabletMeta::get_metadata_size does not include the memory of 
TabletSchema.
+    Note, the memory allocated by Doris Allocator is not included.
+
     There are some special situations that need to be noted:
     1. when the derived Class override copy constructor, you'd better update 
memory size(call update_metadata_size) if derived class's 
     memory changed in its copy constructor or you not call MetadataAdder's 
copy constructor.
@@ -111,6 +118,31 @@ public:
 
     static void dump_metadata_object(RuntimeProfile* 
object_heap_dump_snapshot);
 
+    static int64_t get_all_tablets_size() {
+        return g_tablet_meta_mem_bytes.get_value() + 
g_tablet_column_mem_bytes.get_value() +
+               g_tablet_index_mem_bytes.get_value() + 
g_tablet_schema_mem_bytes.get_value();
+    }
+
+    static int64_t get_all_rowsets_size() {
+        return g_rowset_meta_mem_bytes.get_value() + 
g_rowset_mem_bytes.get_value();
+    }
+
+    static int64_t get_all_segments_size() {
+        return g_segment_mem_bytes.get_value() + 
g_column_reader_mem_bytes.get_value() +
+               g_bitmap_index_reader_mem_bytes.get_value() +
+               g_bloom_filter_index_reader_mem_bytes.get_value() +
+               g_index_page_reader_mem_bytes.get_value() +
+               g_indexed_column_reader_mem_bytes.get_value() +
+               g_inverted_index_reader_mem_bytes.get_value() +
+               g_ordinal_index_reader_mem_bytes.get_value() +
+               g_zone_map_index_reader_mem_bytes.get_value();
+    }
+
+    // Doris currently uses the estimated segments memory as the basis, maybe 
it is more realistic.
+    static int64_t get_all_segments_estimate_size() {
+        return g_segment_estimate_mem_bytes.get_value();
+    }
+
 protected:
     MetadataAdder(const MetadataAdder& other);
 
@@ -122,7 +154,6 @@ protected:
 
     MetadataAdder<T>& operator=(const MetadataAdder<T>& other) = default;
 
-private:
     int64_t _current_meta_size {0};
 
     void add_mem_size(int64_t val);
@@ -167,7 +198,7 @@ void MetadataAdder<T>::add_mem_size(int64_t val) {
     if constexpr (std::is_same_v<T, RowsetMeta>) {
         g_rowset_meta_mem_bytes << val;
     } else if constexpr (std::is_same_v<T, Rowset>) {
-        g_all_rowsets_mem_bytes << val;
+        g_rowset_mem_bytes << val;
     } else if constexpr (std::is_same_v<T, TabletMeta>) {
         g_tablet_meta_mem_bytes << val;
     } else if constexpr (std::is_same_v<T, TabletColumn>) {
@@ -177,7 +208,7 @@ void MetadataAdder<T>::add_mem_size(int64_t val) {
     } else if constexpr (std::is_same_v<T, TabletSchema>) {
         g_tablet_schema_mem_bytes << val;
     } else if constexpr (std::is_same_v<T, segment_v2::Segment>) {
-        g_all_segments_mem_bytes << val;
+        g_segment_mem_bytes << val;
     } else if constexpr (std::is_same_v<T, segment_v2::ColumnReader>) {
         g_column_reader_mem_bytes << val;
     } else if constexpr (std::is_same_v<T, segment_v2::BitmapIndexReader>) {
@@ -208,7 +239,7 @@ void MetadataAdder<T>::add_num(int64_t val) {
     if constexpr (std::is_same_v<T, RowsetMeta>) {
         g_rowset_meta_num << val;
     } else if constexpr (std::is_same_v<T, Rowset>) {
-        g_all_rowsets_num << val;
+        g_rowset_num << val;
     } else if constexpr (std::is_same_v<T, TabletMeta>) {
         g_tablet_meta_num << val;
     } else if constexpr (std::is_same_v<T, TabletColumn>) {
@@ -218,7 +249,7 @@ void MetadataAdder<T>::add_num(int64_t val) {
     } else if constexpr (std::is_same_v<T, TabletSchema>) {
         g_tablet_schema_num << val;
     } else if constexpr (std::is_same_v<T, segment_v2::Segment>) {
-        g_all_segments_num << val;
+        g_segment_num << val;
     } else if constexpr (std::is_same_v<T, segment_v2::ColumnReader>) {
         g_column_reader_num << val;
     } else if constexpr (std::is_same_v<T, segment_v2::BitmapIndexReader>) {
@@ -250,12 +281,12 @@ void 
MetadataAdder<T>::dump_metadata_object(RuntimeProfile* object_heap_dump_sna
     COUNTER_SET(rowset_meta_mem_bytes_counter, 
g_rowset_meta_mem_bytes.get_value());
     COUNTER_SET(rowset_meta_num_counter, g_rowset_meta_num.get_value());
 
-    RuntimeProfile::Counter* all_rowsets_mem_bytes_counter =
-            ADD_COUNTER(object_heap_dump_snapshot, "AllRowsetsMemBytes", 
TUnit::BYTES);
-    RuntimeProfile::Counter* all_rowsets_num_counter =
-            ADD_COUNTER(object_heap_dump_snapshot, "AllRowsetsNum", 
TUnit::UNIT);
-    COUNTER_SET(all_rowsets_mem_bytes_counter, 
g_all_rowsets_mem_bytes.get_value());
-    COUNTER_SET(all_rowsets_num_counter, g_all_rowsets_num.get_value());
+    RuntimeProfile::Counter* rowset_mem_bytes_counter =
+            ADD_COUNTER(object_heap_dump_snapshot, "RowsetMemBytes", 
TUnit::BYTES);
+    RuntimeProfile::Counter* rowset_num_counter =
+            ADD_COUNTER(object_heap_dump_snapshot, "RowsetNum", TUnit::UNIT);
+    COUNTER_SET(rowset_mem_bytes_counter, g_rowset_mem_bytes.get_value());
+    COUNTER_SET(rowset_num_counter, g_rowset_num.get_value());
 
     RuntimeProfile::Counter* tablet_meta_mem_bytes_counter =
             ADD_COUNTER(object_heap_dump_snapshot, "TabletMetaMemBytes", 
TUnit::BYTES);
@@ -285,12 +316,12 @@ void 
MetadataAdder<T>::dump_metadata_object(RuntimeProfile* object_heap_dump_sna
     COUNTER_SET(tablet_schema_mem_bytes_counter, 
g_tablet_schema_mem_bytes.get_value());
     COUNTER_SET(tablet_schema_num_counter, g_tablet_schema_num.get_value());
 
-    RuntimeProfile::Counter* all_segments_mem_bytes_counter =
-            ADD_COUNTER(object_heap_dump_snapshot, "AllSegmentsMemBytes", 
TUnit::BYTES);
-    RuntimeProfile::Counter* all_segments_num_counter =
-            ADD_COUNTER(object_heap_dump_snapshot, "AllSegmentsNum", 
TUnit::UNIT);
-    COUNTER_SET(all_segments_mem_bytes_counter, 
g_all_segments_mem_bytes.get_value());
-    COUNTER_SET(all_segments_num_counter, g_all_segments_num.get_value());
+    RuntimeProfile::Counter* segment_mem_bytes_counter =
+            ADD_COUNTER(object_heap_dump_snapshot, "SegmentMemBytes", 
TUnit::BYTES);
+    RuntimeProfile::Counter* segment_num_counter =
+            ADD_COUNTER(object_heap_dump_snapshot, "SegmentNum", TUnit::UNIT);
+    COUNTER_SET(segment_mem_bytes_counter, g_segment_mem_bytes.get_value());
+    COUNTER_SET(segment_num_counter, g_segment_num.get_value());
 
     RuntimeProfile::Counter* column_reader_mem_bytes_counter =
             ADD_COUNTER(object_heap_dump_snapshot, "ColumnReaderMemBytes", 
TUnit::BYTES);
diff --git a/be/src/olap/rowset/segment_v2/indexed_column_reader.cpp 
b/be/src/olap/rowset/segment_v2/indexed_column_reader.cpp
index 3028211f266..da6beff5d8d 100644
--- a/be/src/olap/rowset/segment_v2/indexed_column_reader.cpp
+++ b/be/src/olap/rowset/segment_v2/indexed_column_reader.cpp
@@ -81,7 +81,8 @@ Status IndexedColumnReader::load(bool use_page_cache, bool 
kept_in_memory,
             _sole_data_page = 
PagePointer(_meta.ordinal_index_meta().root_page());
         } else {
             
RETURN_IF_ERROR(load_index_page(_meta.ordinal_index_meta().root_page(),
-                                            &_ordinal_index_page_handle, 
&_ordinal_index_reader));
+                                            &_ordinal_index_page_handle,
+                                            _ordinal_index_reader.get()));
             _has_index_page = true;
         }
     }
@@ -92,7 +93,7 @@ Status IndexedColumnReader::load(bool use_page_cache, bool 
kept_in_memory,
             _sole_data_page = 
PagePointer(_meta.value_index_meta().root_page());
         } else {
             
RETURN_IF_ERROR(load_index_page(_meta.value_index_meta().root_page(),
-                                            &_value_index_page_handle, 
&_value_index_reader));
+                                            &_value_index_page_handle, 
_value_index_reader.get()));
             _has_index_page = true;
         }
     }
diff --git a/be/src/olap/rowset/segment_v2/indexed_column_reader.h 
b/be/src/olap/rowset/segment_v2/indexed_column_reader.h
index c3469f9f6be..c9640c0007c 100644
--- a/be/src/olap/rowset/segment_v2/indexed_column_reader.h
+++ b/be/src/olap/rowset/segment_v2/indexed_column_reader.h
@@ -50,9 +50,12 @@ class EncodingInfo;
 class IndexedColumnReader : public MetadataAdder<IndexedColumnReader> {
 public:
     explicit IndexedColumnReader(io::FileReaderSPtr file_reader, const 
IndexedColumnMetaPB& meta)
-            : _file_reader(std::move(file_reader)), _meta(meta) {}
+            : _file_reader(std::move(file_reader)), _meta(meta) {
+        _ordinal_index_reader = std::make_unique<IndexPageReader>();
+        _value_index_reader = std::make_unique<IndexPageReader>();
+    }
 
-    ~IndexedColumnReader();
+    ~IndexedColumnReader() override;
 
     Status load(bool use_page_cache, bool kept_in_memory,
                 OlapReaderStatistics* index_load_stats = nullptr);
@@ -90,8 +93,8 @@ private:
     bool _has_index_page = false;
     // valid only when the column contains only one data page
     PagePointer _sole_data_page;
-    IndexPageReader _ordinal_index_reader;
-    IndexPageReader _value_index_reader;
+    std::unique_ptr<IndexPageReader> _ordinal_index_reader;
+    std::unique_ptr<IndexPageReader> _value_index_reader;
     PageHandle _ordinal_index_page_handle;
     PageHandle _value_index_page_handle;
 
@@ -108,8 +111,8 @@ public:
     explicit IndexedColumnIterator(const IndexedColumnReader* reader,
                                    OlapReaderStatistics* stats = nullptr)
             : _reader(reader),
-              _ordinal_iter(&reader->_ordinal_index_reader),
-              _value_iter(&reader->_value_index_reader),
+              _ordinal_iter(reader->_ordinal_index_reader.get()),
+              _value_iter(reader->_value_index_reader.get()),
               _stats(stats) {}
 
     // Seek to the given ordinal entry. Entry 0 is the first entry.
diff --git a/be/src/olap/rowset/segment_v2/page_handle.h 
b/be/src/olap/rowset/segment_v2/page_handle.h
index b1e53ee8086..d4dfdfb2ff3 100644
--- a/be/src/olap/rowset/segment_v2/page_handle.h
+++ b/be/src/olap/rowset/segment_v2/page_handle.h
@@ -23,6 +23,10 @@
 #include "util/slice.h" // for Slice
 
 namespace doris {
+
+// After disable page cache, sometimes we need to know the percentage of data 
pages in query memory.
+inline bvar::Adder<int64_t> 
g_page_no_cache_mem_bytes("doris_page_no_cache_mem_bytes");
+
 namespace segment_v2 {
 
 // When a column page is read into memory, we use this to store it.
@@ -37,8 +41,7 @@ public:
     // This class will take the ownership of input data's memory. It will
     // free it when deconstructs.
     PageHandle(DataPage* data) : _is_data_owner(true), _data(data) {
-        _page_tracker = ExecEnv::GetInstance()->page_no_cache_mem_tracker();
-        _page_tracker->consume(_data->capacity());
+        g_page_no_cache_mem_bytes << _data->capacity();
     }
 
     // This class will take the content of cache data, and will make input
@@ -51,20 +54,18 @@ public:
         // we can use std::exchange if we switch c++14 on
         std::swap(_is_data_owner, other._is_data_owner);
         std::swap(_data, other._data);
-        _page_tracker = ExecEnv::GetInstance()->page_no_cache_mem_tracker();
     }
 
     PageHandle& operator=(PageHandle&& other) noexcept {
         std::swap(_is_data_owner, other._is_data_owner);
         std::swap(_data, other._data);
         _cache_data = std::move(other._cache_data);
-        _page_tracker = ExecEnv::GetInstance()->page_no_cache_mem_tracker();
         return *this;
     }
 
     ~PageHandle() {
         if (_is_data_owner) {
-            _page_tracker->release(_data->capacity());
+            g_page_no_cache_mem_bytes << -_data->capacity();
             delete _data;
         } else {
             DCHECK(_data == nullptr);
@@ -85,7 +86,6 @@ private:
     // otherwise _cache_data is valid, and data is belong to cache.
     bool _is_data_owner = false;
     DataPage* _data = nullptr;
-    std::shared_ptr<MemTracker> _page_tracker;
     PageCacheHandle _cache_data;
 
     // Don't allow copy and assign
diff --git a/be/src/olap/rowset/segment_v2/segment.cpp 
b/be/src/olap/rowset/segment_v2/segment.cpp
index 0ad799683fc..d2c2920d4e6 100644
--- a/be/src/olap/rowset/segment_v2/segment.cpp
+++ b/be/src/olap/rowset/segment_v2/segment.cpp
@@ -163,7 +163,11 @@ Segment::Segment(uint32_t segment_id, RowsetId rowset_id, 
TabletSchemaSPtr table
           _tablet_schema(std::move(tablet_schema)),
           _idx_file_info(idx_file_info) {}
 
-Segment::~Segment() = default;
+Segment::~Segment() {
+    g_segment_estimate_mem_bytes << -_tracked_meta_mem_usage;
+    // if failed, fix `_tracked_meta_mem_usage` accuracy
+    DCHECK(_tracked_meta_mem_usage == meta_mem_usage());
+}
 
 io::UInt128Wrapper Segment::file_cache_key(std::string_view rowset_id, 
uint32_t seg_id) {
     return io::BlockFileCache::hash(fmt::format("{}_{}.dat", rowset_id, 
seg_id));
@@ -174,6 +178,12 @@ int64_t Segment::get_metadata_size() const {
            (_pk_index_meta ? _pk_index_meta->ByteSizeLong() : 0);
 }
 
+void Segment::update_metadata_size() {
+    MetadataAdder::update_metadata_size();
+    g_segment_estimate_mem_bytes << _meta_mem_usage - _tracked_meta_mem_usage;
+    _tracked_meta_mem_usage = _meta_mem_usage;
+}
+
 Status Segment::_open() {
     _footer_pb = std::make_unique<SegmentFooterPB>();
     RETURN_IF_ERROR(_parse_footer(_footer_pb.get()));
@@ -191,8 +201,6 @@ Status Segment::_open() {
         _meta_mem_usage += _pk_index_meta->ByteSizeLong();
     }
 
-    update_metadata_size();
-
     _meta_mem_usage += sizeof(*this);
     _meta_mem_usage += _tablet_schema->num_columns() * 
config::estimated_mem_per_column_reader;
 
@@ -201,6 +209,8 @@ Status Segment::_open() {
     // 0.01 comes from PrimaryKeyIndexBuilder::init
     _meta_mem_usage += BloomFilter::optimal_bit_num(_num_rows, 0.01) / 8;
 
+    update_metadata_size();
+
     return Status::OK();
 }
 
@@ -467,6 +477,7 @@ Status Segment::_load_pk_bloom_filter() {
         // for BE UT "segment_cache_test"
         return _load_pk_bf_once.call([this] {
             _meta_mem_usage += 100;
+            update_metadata_size();
             return Status::OK();
         });
     }
diff --git a/be/src/olap/rowset/segment_v2/segment.h 
b/be/src/olap/rowset/segment_v2/segment.h
index bc5ab1e1fdc..1b20c1f066b 100644
--- a/be/src/olap/rowset/segment_v2/segment.h
+++ b/be/src/olap/rowset/segment_v2/segment.h
@@ -57,7 +57,6 @@ class IDataType;
 class ShortKeyIndexDecoder;
 class Schema;
 class StorageReadOptions;
-class MemTracker;
 class PrimaryKeyIndexReader;
 class RowwiseIterator;
 struct RowLocation;
@@ -93,6 +92,7 @@ public:
     ~Segment();
 
     int64_t get_metadata_size() const override;
+    void update_metadata_size();
 
     Status new_iterator(SchemaSPtr schema, const StorageReadOptions& 
read_options,
                         std::unique_ptr<RowwiseIterator>* iter);
@@ -163,6 +163,8 @@ public:
 
     io::FileReaderSPtr file_reader() { return _file_reader; }
 
+    // Including the column reader memory.
+    // another method `get_metadata_size` not include the column reader, only 
the segment object itself.
     int64_t meta_mem_usage() const { return _meta_mem_usage; }
 
     // Identify the column by unique id or path info
@@ -249,9 +251,8 @@ private:
     // 1. Tracking memory use by segment meta data such as footer or index 
page.
     // 2. Tracking memory use by segment column reader
     // The memory consumed by querying is tracked in segment iterator.
-    // TODO: Segment::_meta_mem_usage Unknown value overflow, causes the value 
of SegmentMeta mem tracker
-    // is similar to `-2912341218700198079`. So, temporarily put it in 
experimental type tracker.
     int64_t _meta_mem_usage;
+    int64_t _tracked_meta_mem_usage = 0;
 
     RowsetId _rowset_id;
     TabletSchemaSPtr _tablet_schema;
diff --git a/be/src/olap/rowset_builder.h b/be/src/olap/rowset_builder.h
index 7fd57803736..fb2294d1770 100644
--- a/be/src/olap/rowset_builder.h
+++ b/be/src/olap/rowset_builder.h
@@ -38,7 +38,6 @@ namespace doris {
 class CalcDeleteBitmapToken;
 class FlushToken;
 class MemTable;
-class MemTracker;
 class StorageEngine;
 class TupleDescriptor;
 class SlotDescriptor;
diff --git a/be/src/olap/segment_loader.cpp b/be/src/olap/segment_loader.cpp
index 26ac54c699b..4240f7e250a 100644
--- a/be/src/olap/segment_loader.cpp
+++ b/be/src/olap/segment_loader.cpp
@@ -77,9 +77,8 @@ Status SegmentLoader::load_segments(const 
BetaRowsetSharedPtr& rowset,
         }
         if (use_cache && !config::disable_segment_cache) {
             // memory of SegmentCache::CacheValue will be handled by 
SegmentCache
-            auto* cache_value = new SegmentCache::CacheValue();
+            auto* cache_value = new SegmentCache::CacheValue(segment);
             _cache_mem_usage += segment->meta_mem_usage();
-            cache_value->segment = std::move(segment);
             _segment_cache->insert(cache_key, *cache_value, cache_handle);
         } else {
             cache_handle->push_segment(std::move(segment));
diff --git a/be/src/olap/segment_loader.h b/be/src/olap/segment_loader.h
index 834906da93b..2c5b1ed200d 100644
--- a/be/src/olap/segment_loader.h
+++ b/be/src/olap/segment_loader.h
@@ -75,9 +75,9 @@ public:
     // Holding all opened segments of a rowset.
     class CacheValue : public LRUCacheValueBase {
     public:
-        ~CacheValue() override { segment.reset(); }
+        CacheValue(segment_v2::SegmentSharedPtr segment_) : 
segment(std::move(segment_)) {}
 
-        segment_v2::SegmentSharedPtr segment;
+        const segment_v2::SegmentSharedPtr segment;
     };
 
     SegmentCache(size_t memory_bytes_limit, size_t segment_num_limit)
@@ -124,8 +124,13 @@ public:
 
     void erase_segments(const RowsetId& rowset_id, int64_t num_segments);
 
-    // Just used for BE UT
-    int64_t cache_mem_usage() const { return _cache_mem_usage; }
+    int64_t cache_mem_usage() const {
+#ifdef BE_TEST
+        return _cache_mem_usage;
+#else
+        return _segment_cache->value_mem_consumption();
+#endif
+    }
 
 private:
     SegmentLoader();
diff --git a/be/src/olap/tablet_manager.cpp b/be/src/olap/tablet_manager.cpp
index a18dc0b4a16..59f0e1893d8 100644
--- a/be/src/olap/tablet_manager.cpp
+++ b/be/src/olap/tablet_manager.cpp
@@ -57,8 +57,6 @@
 #include "olap/tablet_schema.h"
 #include "olap/txn_manager.h"
 #include "runtime/exec_env.h"
-#include "runtime/memory/mem_tracker.h"
-#include "runtime/thread_context.h"
 #include "service/backend_options.h"
 #include "util/defer_op.h"
 #include "util/doris_metrics.h"
@@ -83,28 +81,18 @@ using std::vector;
 namespace doris {
 using namespace ErrorCode;
 
-DEFINE_GAUGE_METRIC_PROTOTYPE_5ARG(tablet_meta_mem_consumption, 
MetricUnit::BYTES, "",
-                                   mem_consumption, Labels({{"type", 
"tablet_meta"}}));
-
 bvar::Adder<int64_t> 
g_tablet_meta_schema_columns_count("tablet_meta_schema_columns_count");
 
 TabletManager::TabletManager(StorageEngine& engine, int32_t 
tablet_map_lock_shard_size)
         : _engine(engine),
-          
_tablet_meta_mem_tracker(std::make_shared<MemTracker>("TabletMeta(experimental)")),
           _tablets_shards_size(tablet_map_lock_shard_size),
           _tablets_shards_mask(tablet_map_lock_shard_size - 1) {
     CHECK_GT(_tablets_shards_size, 0);
     CHECK_EQ(_tablets_shards_size & _tablets_shards_mask, 0);
     _tablets_shards.resize(_tablets_shards_size);
-    REGISTER_HOOK_METRIC(tablet_meta_mem_consumption,
-                         [this]() { return 
_tablet_meta_mem_tracker->consumption(); });
 }
 
-TabletManager::~TabletManager() {
-#ifndef BE_TEST
-    DEREGISTER_HOOK_METRIC(tablet_meta_mem_consumption);
-#endif
-}
+TabletManager::~TabletManager() = default;
 
 Status TabletManager::_add_tablet_unlocked(TTabletId tablet_id, const 
TabletSharedPtr& tablet,
                                            bool update_meta, bool force, 
RuntimeProfile* profile) {
@@ -242,10 +230,6 @@ Status 
TabletManager::_add_tablet_to_map_unlocked(TTabletId tablet_id,
     tablet_map_t& tablet_map = _get_tablet_map(tablet_id);
     tablet_map[tablet_id] = tablet;
     _add_tablet_to_partition(tablet);
-    // TODO: remove multiply 2 of tablet meta mem size
-    // Because table schema will copy in tablet, there will be double mem cost
-    // so here multiply 2
-    _tablet_meta_mem_tracker->consume(tablet->tablet_meta()->mem_size() * 2);
     g_tablet_meta_schema_columns_count << 
tablet->tablet_meta()->tablet_columns_num();
     COUNTER_UPDATE(ADD_CHILD_TIMER(profile, "RegisterTabletInfo", "AddTablet"),
                    static_cast<int64_t>(watch.reset()));
@@ -599,7 +583,6 @@ Status TabletManager::_drop_tablet(TTabletId tablet_id, 
TReplicaId replica_id, b
     }
 
     to_drop_tablet->deregister_tablet_from_dir();
-    
_tablet_meta_mem_tracker->release(to_drop_tablet->tablet_meta()->mem_size() * 
2);
     g_tablet_meta_schema_columns_count << 
-to_drop_tablet->tablet_meta()->tablet_columns_num();
     return Status::OK();
 }
diff --git a/be/src/olap/tablet_manager.h b/be/src/olap/tablet_manager.h
index 42623cf05f2..6b6e7998f9c 100644
--- a/be/src/olap/tablet_manager.h
+++ b/be/src/olap/tablet_manager.h
@@ -251,9 +251,6 @@ private:
 
     StorageEngine& _engine;
 
-    // TODO: memory size of TabletSchema cannot be accurately tracked.
-    std::shared_ptr<MemTracker> _tablet_meta_mem_tracker;
-
     const int32_t _tablets_shards_size;
     const int32_t _tablets_shards_mask;
     std::vector<tablets_shard> _tablets_shards;
diff --git a/be/src/olap/tablet_meta.cpp b/be/src/olap/tablet_meta.cpp
index 0570aff349c..fc9fc034b0b 100644
--- a/be/src/olap/tablet_meta.cpp
+++ b/be/src/olap/tablet_meta.cpp
@@ -781,12 +781,6 @@ void TabletMeta::to_meta_pb(TabletMetaPB* tablet_meta_pb) {
             time_series_compaction_level_threshold());
 }
 
-int64_t TabletMeta::mem_size() const {
-    auto size = sizeof(TabletMeta);
-    size += _schema->mem_size();
-    return size;
-}
-
 void TabletMeta::to_json(string* json_string, json2pb::Pb2JsonOptions& 
options) {
     TabletMetaPB tablet_meta_pb;
     to_meta_pb(&tablet_meta_pb);
diff --git a/be/src/olap/tablet_meta.h b/be/src/olap/tablet_meta.h
index 34794ef6d0a..a8c82a4abd2 100644
--- a/be/src/olap/tablet_meta.h
+++ b/be/src/olap/tablet_meta.h
@@ -140,10 +140,6 @@ public:
 
     void to_meta_pb(TabletMetaPB* tablet_meta_pb);
     void to_json(std::string* json_string, json2pb::Pb2JsonOptions& options);
-    // Don't use.
-    // TODO: memory size of TabletSchema cannot be accurately tracked.
-    // In some places, temporarily use num_columns() as TabletSchema size.
-    int64_t mem_size() const;
     size_t tablet_columns_num() const { return _schema->num_columns(); }
 
     TabletTypePB tablet_type() const { return _tablet_type; }
diff --git a/be/src/olap/tablet_schema.cpp b/be/src/olap/tablet_schema.cpp
index c4f96e22148..47179ce19b2 100644
--- a/be/src/olap/tablet_schema.cpp
+++ b/be/src/olap/tablet_schema.cpp
@@ -41,8 +41,6 @@
 #include "olap/tablet_column_object_pool.h"
 #include "olap/types.h"
 #include "olap/utils.h"
-#include "runtime/memory/lru_cache_policy.h"
-#include "runtime/thread_context.h"
 #include "tablet_meta.h"
 #include "vec/aggregate_functions/aggregate_function_simple_factory.h"
 #include "vec/aggregate_functions/aggregate_function_state_union.h"
diff --git a/be/src/olap/tablet_schema.h b/be/src/olap/tablet_schema.h
index 5fb3deafd77..9a0cd53f7b1 100644
--- a/be/src/olap/tablet_schema.h
+++ b/be/src/olap/tablet_schema.h
@@ -332,10 +332,8 @@ public:
     void copy_from(const TabletSchema& tablet_schema);
     void update_index_info_from(const TabletSchema& tablet_schema);
     std::string to_key() const;
-    // Don't use.
-    // TODO: memory size of TabletSchema cannot be accurately tracked.
-    // In some places, temporarily use num_columns() as TabletSchema size.
-    int64_t mem_size() const { return _mem_size; }
+    // get_metadata_size is only the memory of the TabletSchema itself, not 
include child objects.
+    int64_t mem_size() const { return get_metadata_size(); }
     size_t row_size() const;
     int32_t field_index(const std::string& field_name) const;
     int32_t field_index(const vectorized::PathInData& path) const;
@@ -573,7 +571,6 @@ private:
     int64_t _db_id = -1;
     bool _disable_auto_compaction = false;
     bool _enable_single_replica_compaction = false;
-    int64_t _mem_size = 0;
     bool _store_row_column = false;
     bool _skip_write_index_on_load = false;
     InvertedIndexStorageFormatPB _inverted_index_storage_format = 
InvertedIndexStorageFormatPB::V1;
diff --git a/be/src/olap/tablet_schema_cache.cpp 
b/be/src/olap/tablet_schema_cache.cpp
index fd238fa5aff..e044ef9c042 100644
--- a/be/src/olap/tablet_schema_cache.cpp
+++ b/be/src/olap/tablet_schema_cache.cpp
@@ -56,7 +56,7 @@ std::pair<Cache::Handle*, TabletSchemaSPtr> 
TabletSchemaCache::insert(const std:
         tablet_schema_ptr->init_from_pb(pb, false, true);
         value->tablet_schema = tablet_schema_ptr;
         lru_handle = LRUCachePolicy::insert(key_signature, value, 
tablet_schema_ptr->num_columns(),
-                                            0, CachePriority::NORMAL);
+                                            tablet_schema_ptr->mem_size(), 
CachePriority::NORMAL);
         g_tablet_schema_cache_count << 1;
         g_tablet_schema_cache_columns_count << 
tablet_schema_ptr->num_columns();
     }
diff --git a/be/src/runtime/exec_env.h b/be/src/runtime/exec_env.h
index a27936f5f0d..636ce2bf288 100644
--- a/be/src/runtime/exec_env.h
+++ b/be/src/runtime/exec_env.h
@@ -178,7 +178,6 @@ public:
     std::vector<TrackerLimiterGroup> mem_tracker_limiter_pool;
     void init_mem_tracker();
     std::shared_ptr<MemTrackerLimiter> orphan_mem_tracker() { return 
_orphan_mem_tracker; }
-    std::shared_ptr<MemTracker> page_no_cache_mem_tracker() { return 
_page_no_cache_mem_tracker; }
     std::shared_ptr<MemTrackerLimiter> brpc_iobuf_block_memory_tracker() {
         return _brpc_iobuf_block_memory_tracker;
     }
@@ -188,6 +187,15 @@ public:
     std::shared_ptr<MemTrackerLimiter> stream_load_pipe_tracker() {
         return _stream_load_pipe_tracker;
     }
+    std::shared_ptr<MemTrackerLimiter> tablets_no_cache_mem_tracker() {
+        return _tablets_no_cache_mem_tracker;
+    }
+    std::shared_ptr<MemTrackerLimiter> rowsets_no_cache_mem_tracker() {
+        return _rowsets_no_cache_mem_tracker;
+    }
+    std::shared_ptr<MemTrackerLimiter> segments_no_cache_mem_tracker() {
+        return _segments_no_cache_mem_tracker;
+    }
     std::shared_ptr<MemTrackerLimiter> point_query_executor_mem_tracker() {
         return _point_query_executor_mem_tracker;
     }
@@ -377,13 +385,15 @@ private:
     // Ideally, all threads are expected to attach to the specified tracker, 
so that "all memory has its own ownership",
     // and the consumption of the orphan mem tracker is close to 0, but 
greater than 0.
     std::shared_ptr<MemTrackerLimiter> _orphan_mem_tracker;
-    // page size not in cache, data page/index page/etc.
-    std::shared_ptr<MemTracker> _page_no_cache_mem_tracker;
     std::shared_ptr<MemTrackerLimiter> _brpc_iobuf_block_memory_tracker;
     // Count the memory consumption of segment compaction tasks.
     std::shared_ptr<MemTrackerLimiter> _segcompaction_mem_tracker;
     std::shared_ptr<MemTrackerLimiter> _stream_load_pipe_tracker;
 
+    std::shared_ptr<MemTrackerLimiter> _tablets_no_cache_mem_tracker;
+    std::shared_ptr<MemTrackerLimiter> _rowsets_no_cache_mem_tracker;
+    std::shared_ptr<MemTrackerLimiter> _segments_no_cache_mem_tracker;
+
     // Tracking memory may be shared between multiple queries.
     std::shared_ptr<MemTrackerLimiter> _point_query_executor_mem_tracker;
     std::shared_ptr<MemTrackerLimiter> _block_compression_mem_tracker;
diff --git a/be/src/runtime/exec_env_init.cpp b/be/src/runtime/exec_env_init.cpp
index b7f926cc3b4..a371cdb947f 100644
--- a/be/src/runtime/exec_env_init.cpp
+++ b/be/src/runtime/exec_env_init.cpp
@@ -599,15 +599,20 @@ void ExecEnv::init_mem_tracker() {
     _s_tracking_memory = true;
     _orphan_mem_tracker =
             MemTrackerLimiter::create_shared(MemTrackerLimiter::Type::GLOBAL, 
"Orphan");
-    _page_no_cache_mem_tracker = std::make_shared<MemTracker>("PageNoCache");
     _brpc_iobuf_block_memory_tracker =
             MemTrackerLimiter::create_shared(MemTrackerLimiter::Type::GLOBAL, 
"IOBufBlockMemory");
     _segcompaction_mem_tracker =
-            MemTrackerLimiter::create_shared(MemTrackerLimiter::Type::GLOBAL, 
"SegCompaction");
+            
MemTrackerLimiter::create_shared(MemTrackerLimiter::Type::COMPACTION, 
"SegCompaction");
+    _tablets_no_cache_mem_tracker = MemTrackerLimiter::create_shared(
+            MemTrackerLimiter::Type::METADATA, "Tablets(not in SchemaCache, 
TabletSchemaCache)");
+    _segments_no_cache_mem_tracker = MemTrackerLimiter::create_shared(
+            MemTrackerLimiter::Type::METADATA, "Segments(not in 
SegmentCache)");
+    _rowsets_no_cache_mem_tracker =
+            
MemTrackerLimiter::create_shared(MemTrackerLimiter::Type::METADATA, "Rowsets");
     _point_query_executor_mem_tracker =
             MemTrackerLimiter::create_shared(MemTrackerLimiter::Type::GLOBAL, 
"PointQueryExecutor");
     _query_cache_mem_tracker =
-            MemTrackerLimiter::create_shared(MemTrackerLimiter::Type::GLOBAL, 
"QueryCache");
+            MemTrackerLimiter::create_shared(MemTrackerLimiter::Type::CACHE, 
"QueryCache");
     _block_compression_mem_tracker =
             MemTrackerLimiter::create_shared(MemTrackerLimiter::Type::GLOBAL, 
"BlockCompression");
     _rowid_storage_reader_tracker =
@@ -716,7 +721,7 @@ void ExecEnv::destroy() {
     _file_cache_open_fd_cache.reset();
     SAFE_STOP(_write_cooldown_meta_executors);
 
-    // StorageEngine must be destoried before _page_no_cache_mem_tracker.reset 
and _cache_manager destory
+    // StorageEngine must be destoried before _cache_manager destory
     SAFE_STOP(_storage_engine);
     _storage_engine.reset();
 
diff --git a/be/src/runtime/memory/cache_policy.h 
b/be/src/runtime/memory/cache_policy.h
index e7e1c73e7cb..8f077a4eb45 100644
--- a/be/src/runtime/memory/cache_policy.h
+++ b/be/src/runtime/memory/cache_policy.h
@@ -17,6 +17,8 @@
 
 #pragma once
 
+#include <vector>
+
 #include "util/runtime_profile.h"
 
 namespace doris {
@@ -123,6 +125,7 @@ public:
             {"CloudTabletCache", CacheType::CLOUD_TABLET_CACHE},
             {"CloudTxnDeleteBitmapCache", 
CacheType::CLOUD_TXN_DELETE_BITMAP_CACHE},
             {"ForUTCacheNumber", CacheType::FOR_UT_CACHE_NUMBER},
+            {"QueryCache", CacheType::QUERY_CACHE},
             {"TabletColumnObjectPool", CacheType::TABLET_COLUMN_OBJECT_POOL}};
 
     static CacheType string_to_type(std::string type) {
@@ -133,6 +136,9 @@ public:
         }
     }
 
+    inline static std::vector<CacheType> MetadataCache {
+            CacheType::SEGMENT_CACHE, CacheType::SCHEMA_CACHE, 
CacheType::TABLET_SCHEMA_CACHE};
+
     CachePolicy(CacheType type, size_t capacity, uint32_t stale_sweep_time_s, 
bool enable_prune);
     virtual ~CachePolicy();
 
diff --git a/be/src/runtime/memory/lru_cache_policy.h 
b/be/src/runtime/memory/lru_cache_policy.h
index ea34e2837f1..3fdb43facd7 100644
--- a/be/src/runtime/memory/lru_cache_policy.h
+++ b/be/src/runtime/memory/lru_cache_policy.h
@@ -104,20 +104,26 @@ public:
         return _mem_tracker->consumption();
     }
 
+    int64_t value_mem_consumption() {
+        DCHECK(_value_mem_tracker != nullptr);
+        return _value_mem_tracker->consumption();
+    }
+
     // Insert will consume tracking_bytes to _mem_tracker and cache value 
destroy will release tracking_bytes.
-    // If LRUCacheType::SIZE, tracking_bytes usually equal to charge.
-    // If LRUCacheType::NUMBER, tracking_bytes usually not equal to charge, at 
this time charge is an weight.
-    // If LRUCacheType::SIZE and tracking_bytes equals 0, memory must be 
tracked in Doris Allocator,
+    // If LRUCacheType::SIZE, value_tracking_bytes usually equal to charge.
+    // If LRUCacheType::NUMBER, value_tracking_bytes usually not equal to 
charge, at this time charge is an weight.
+    // If LRUCacheType::SIZE and value_tracking_bytes equals 0, memory must be 
tracked in Doris Allocator,
     //    cache value is allocated using Alloctor.
-    // If LRUCacheType::NUMBER and tracking_bytes equals 0, usually currently 
cannot accurately tracking memory size,
+    // If LRUCacheType::NUMBER and value_tracking_bytes equals 0, usually 
currently cannot accurately tracking memory size,
     //    only tracking handle_size(106).
-    Cache::Handle* insert(const CacheKey& key, void* value, size_t charge, 
size_t tracking_bytes,
+    Cache::Handle* insert(const CacheKey& key, void* value, size_t charge,
+                          size_t value_tracking_bytes,
                           CachePriority priority = CachePriority::NORMAL) {
-        size_t tracking_bytes_with_handle = sizeof(LRUHandle) - 1 + key.size() 
+ tracking_bytes;
+        size_t tracking_bytes = sizeof(LRUHandle) - 1 + key.size() + 
value_tracking_bytes;
         if (value != nullptr) {
-            mem_tracker()->consume(tracking_bytes_with_handle);
             ((LRUCacheValueBase*)value)
-                    ->set_tracking_bytes(tracking_bytes_with_handle, 
_mem_tracker);
+                    ->set_tracking_bytes(tracking_bytes, _mem_tracker, 
value_tracking_bytes,
+                                         _value_mem_tracker);
         }
         return _cache->insert(key, value, charge, priority);
     }
@@ -265,9 +271,18 @@ public:
 
 protected:
     void _init_mem_tracker(const std::string& type_name) {
-        _mem_tracker = MemTrackerLimiter::create_shared(
-                MemTrackerLimiter::Type::GLOBAL,
-                fmt::format("{}[{}]", type_string(_type), type_name));
+        if (std::find(CachePolicy::MetadataCache.begin(), 
CachePolicy::MetadataCache.end(),
+                      _type) == CachePolicy::MetadataCache.end()) {
+            _mem_tracker = MemTrackerLimiter::create_shared(
+                    MemTrackerLimiter::Type::CACHE,
+                    fmt::format("{}[{}]", type_string(_type), type_name));
+        } else {
+            _mem_tracker = MemTrackerLimiter::create_shared(
+                    MemTrackerLimiter::Type::METADATA,
+                    fmt::format("{}[{}]", type_string(_type), type_name));
+        }
+        _value_mem_tracker = std::make_shared<MemTracker>(
+                fmt::format("{}::Value[{}]", type_string(_type), type_name));
     }
 
     // if check_capacity failed, will return dummy lru cache,
@@ -277,6 +292,7 @@ protected:
     LRUCacheType _lru_cache_type;
 
     std::shared_ptr<MemTrackerLimiter> _mem_tracker;
+    std::shared_ptr<MemTracker> _value_mem_tracker;
 };
 
 } // namespace doris
diff --git a/be/src/runtime/memory/lru_cache_value_base.h 
b/be/src/runtime/memory/lru_cache_value_base.h
index f9e534e6600..a9a3ae5ddab 100644
--- a/be/src/runtime/memory/lru_cache_value_base.h
+++ b/be/src/runtime/memory/lru_cache_value_base.h
@@ -28,18 +28,27 @@ public:
     virtual ~LRUCacheValueBase() {
         if (_tracking_bytes > 0) {
             _mem_tracker->release(_tracking_bytes);
+            _value_mem_tracker->release(_value_tracking_bytes);
         }
     }
 
     void set_tracking_bytes(size_t tracking_bytes,
-                            const std::shared_ptr<MemTrackerLimiter>& 
mem_tracker) {
+                            const std::shared_ptr<MemTrackerLimiter>& 
mem_tracker,
+                            size_t value_tracking_bytes,
+                            const std::shared_ptr<MemTracker>& 
value_mem_tracker) {
         this->_tracking_bytes = tracking_bytes;
         this->_mem_tracker = mem_tracker;
+        this->_value_tracking_bytes = value_tracking_bytes;
+        this->_value_mem_tracker = value_mem_tracker;
+        _mem_tracker->consume(_tracking_bytes);
+        _value_mem_tracker->consume(_value_tracking_bytes);
     }
 
 protected:
     size_t _tracking_bytes = 0;
+    size_t _value_tracking_bytes = 0;
     std::shared_ptr<MemTrackerLimiter> _mem_tracker;
+    std::shared_ptr<MemTracker> _value_mem_tracker;
 };
 
 } // namespace doris
diff --git a/be/src/runtime/memory/mem_tracker_limiter.cpp 
b/be/src/runtime/memory/mem_tracker_limiter.cpp
index 05ff13f0e7c..ac4684835a6 100644
--- a/be/src/runtime/memory/mem_tracker_limiter.cpp
+++ b/be/src/runtime/memory/mem_tracker_limiter.cpp
@@ -66,9 +66,13 @@ MemTrackerLimiter::MemTrackerLimiter(Type type, const 
std::string& label, int64_
     _uid = UniqueId::gen_uid();
     if (_type == Type::GLOBAL) {
         _group_num = 0;
+    } else if (_type == Type::METADATA) {
+        _group_num = 1;
+    } else if (_type == Type::CACHE) {
+        _group_num = 2;
     } else {
         _group_num =
-                mem_tracker_limiter_group_counter.fetch_add(1) % 
(MEM_TRACKER_GROUP_NUM - 1) + 1;
+                mem_tracker_limiter_group_counter.fetch_add(1) % 
(MEM_TRACKER_GROUP_NUM - 3) + 3;
     }
 
     // currently only select/load need runtime query statistics
@@ -208,24 +212,20 @@ std::string MemTrackerLimiter::print_address_sanitizers() 
{
 RuntimeProfile* MemTrackerLimiter::make_profile(RuntimeProfile* profile) const 
{
     RuntimeProfile* profile_snapshot = profile->create_child(
             fmt::format("{}@{}@id={}", _label, type_string(_type), 
_uid.to_string()), true, false);
-    RuntimeProfile::Counter* current_usage_counter =
-            ADD_COUNTER(profile_snapshot, "CurrentUsage", TUnit::BYTES);
-    RuntimeProfile::Counter* peak_usage_counter =
-            ADD_COUNTER(profile_snapshot, "PeakUsage", TUnit::BYTES);
-    COUNTER_SET(current_usage_counter, consumption());
-    COUNTER_SET(peak_usage_counter, peak_consumption());
+    RuntimeProfile::HighWaterMarkCounter* usage_counter =
+            profile_snapshot->AddHighWaterMarkCounter("Memory", TUnit::BYTES);
+    COUNTER_SET(usage_counter, peak_consumption());
+    COUNTER_SET(usage_counter, consumption());
     if (has_limit()) {
         RuntimeProfile::Counter* limit_counter =
                 ADD_COUNTER(profile_snapshot, "Limit", TUnit::BYTES);
         COUNTER_SET(limit_counter, _limit);
     }
     if (reserved_peak_consumption() != 0) {
-        RuntimeProfile::Counter* reserved_counter =
-                ADD_COUNTER(profile_snapshot, "ReservedMemory", TUnit::BYTES);
-        RuntimeProfile::Counter* reserved_peak_counter =
-                ADD_COUNTER(profile_snapshot, "ReservedPeakMemory", 
TUnit::BYTES);
+        RuntimeProfile::HighWaterMarkCounter* reserved_counter =
+                profile_snapshot->AddHighWaterMarkCounter("ReservedMemory", 
TUnit::BYTES);
+        COUNTER_SET(reserved_counter, reserved_peak_consumption());
         COUNTER_SET(reserved_counter, reserved_consumption());
-        COUNTER_SET(reserved_peak_counter, reserved_peak_consumption());
     }
     return profile_snapshot;
 }
@@ -268,8 +268,26 @@ void 
MemTrackerLimiter::make_type_trackers_profile(RuntimeProfile* profile,
                 tracker->make_profile(profile);
             }
         }
+    } else if (type == Type::METADATA) {
+        std::lock_guard<std::mutex> l(
+                
ExecEnv::GetInstance()->mem_tracker_limiter_pool[1].group_lock);
+        for (auto trackerWptr : 
ExecEnv::GetInstance()->mem_tracker_limiter_pool[1].trackers) {
+            auto tracker = trackerWptr.lock();
+            if (tracker != nullptr) {
+                tracker->make_profile(profile);
+            }
+        }
+    } else if (type == Type::CACHE) {
+        std::lock_guard<std::mutex> l(
+                
ExecEnv::GetInstance()->mem_tracker_limiter_pool[2].group_lock);
+        for (auto trackerWptr : 
ExecEnv::GetInstance()->mem_tracker_limiter_pool[2].trackers) {
+            auto tracker = trackerWptr.lock();
+            if (tracker != nullptr) {
+                tracker->make_profile(profile);
+            }
+        }
     } else {
-        for (unsigned i = 1; i < 
ExecEnv::GetInstance()->mem_tracker_limiter_pool.size(); ++i) {
+        for (unsigned i = 3; i < 
ExecEnv::GetInstance()->mem_tracker_limiter_pool.size(); ++i) {
             std::lock_guard<std::mutex> l(
                     
ExecEnv::GetInstance()->mem_tracker_limiter_pool[i].group_lock);
             for (auto trackerWptr : 
ExecEnv::GetInstance()->mem_tracker_limiter_pool[i].trackers) {
@@ -296,8 +314,8 @@ void 
MemTrackerLimiter::make_top_consumption_tasks_tracker_profile(RuntimeProfil
     std::unique_ptr<RuntimeProfile> tmp_profile_snapshot =
             std::make_unique<RuntimeProfile>("tmpSnapshot");
     std::priority_queue<std::pair<int64_t, RuntimeProfile*>> max_pq;
-    // start from 2, not include global type.
-    for (unsigned i = 1; i < 
ExecEnv::GetInstance()->mem_tracker_limiter_pool.size(); ++i) {
+    // start from 3, not include global/metadata/cache type.
+    for (unsigned i = 3; i < 
ExecEnv::GetInstance()->mem_tracker_limiter_pool.size(); ++i) {
         std::lock_guard<std::mutex> l(
                 
ExecEnv::GetInstance()->mem_tracker_limiter_pool[i].group_lock);
         for (auto trackerWptr : 
ExecEnv::GetInstance()->mem_tracker_limiter_pool[i].trackers) {
@@ -326,13 +344,19 @@ void 
MemTrackerLimiter::make_all_tasks_tracker_profile(RuntimeProfile* profile)
     types_profile[Type::SCHEMA_CHANGE] = 
profile->create_child("SchemaChangeTasks", true, false);
     types_profile[Type::OTHER] = profile->create_child("OtherTasks", true, 
false);
 
-    // start from 2, not include global type.
-    for (unsigned i = 1; i < 
ExecEnv::GetInstance()->mem_tracker_limiter_pool.size(); ++i) {
+    // start from 3, not include global/metadata/cache type.
+    for (unsigned i = 3; i < 
ExecEnv::GetInstance()->mem_tracker_limiter_pool.size(); ++i) {
         std::lock_guard<std::mutex> l(
                 
ExecEnv::GetInstance()->mem_tracker_limiter_pool[i].group_lock);
         for (auto trackerWptr : 
ExecEnv::GetInstance()->mem_tracker_limiter_pool[i].trackers) {
             auto tracker = trackerWptr.lock();
             if (tracker != nullptr) {
+                // BufferControlBlock will continue to exist for 5 minutes 
after the query ends, even if the
+                // result buffer is empty, and will not be shown in the 
profile. of course, this code is tricky.
+                if (tracker->consumption() == 0 &&
+                    tracker->label().starts_with("BufferControlBlock")) {
+                    continue;
+                }
                 tracker->make_profile(types_profile[tracker->type()]);
             }
         }
diff --git a/be/src/runtime/memory/mem_tracker_limiter.h 
b/be/src/runtime/memory/mem_tracker_limiter.h
index 445856b1f6a..43b20a410ff 100644
--- a/be/src/runtime/memory/mem_tracker_limiter.h
+++ b/be/src/runtime/memory/mem_tracker_limiter.h
@@ -77,12 +77,14 @@ public:
     enum class GCType { PROCESS = 0, WORK_LOAD_GROUP = 1 };
 
     enum class Type {
-        GLOBAL = 0,        // Life cycle is the same as the process, e.g. 
Cache and default Orphan
+        GLOBAL = 0,        // Life cycle is the same as the process, except 
cache and metadata.
         QUERY = 1,         // Count the memory consumption of all Query tasks.
         LOAD = 2,          // Count the memory consumption of all Load tasks.
         COMPACTION = 3,    // Count the memory consumption of all Base and 
Cumulative tasks.
         SCHEMA_CHANGE = 4, // Count the memory consumption of all SchemaChange 
tasks.
-        OTHER = 5,
+        METADATA = 5,      // Count the memory consumption of all Metadata.
+        CACHE = 6,         // Count the memory consumption of all Cache.
+        OTHER = 7, // Count the memory consumption of all other tasks, such as 
Clone, Snapshot, etc..
     };
 
     static std::string type_string(Type type) {
@@ -97,8 +99,12 @@ public:
             return "compaction";
         case Type::SCHEMA_CHANGE:
             return "schema_change";
+        case Type::METADATA:
+            return "metadata";
+        case Type::CACHE:
+            return "cache";
         case Type::OTHER:
-            return "other";
+            return "other_task";
         default:
             LOG(FATAL) << "not match type of mem tracker limiter :" << 
static_cast<int>(type);
         }
@@ -158,6 +164,8 @@ public:
     int64_t consumption() const { return _mem_counter.current_value(); }
     int64_t peak_consumption() const { return _mem_counter.peak_value(); }
 
+    // Use carefully! only memory that cannot be allocated using Doris 
Allocator needs to be consumed manually.
+    // Ideally, all memory should use Doris Allocator.
     void consume(int64_t bytes) {
         _mem_counter.add(bytes);
         if (_query_statistics) {
diff --git a/be/src/runtime/memory/memory_profile.cpp 
b/be/src/runtime/memory/memory_profile.cpp
index 8dbdcbdd3af..5d649c52601 100644
--- a/be/src/runtime/memory/memory_profile.cpp
+++ b/be/src/runtime/memory/memory_profile.cpp
@@ -18,6 +18,9 @@
 #include "runtime/memory/memory_profile.h"
 
 #include "bvar/reducer.h"
+#include "olap/metadata_adder.h"
+#include "olap/schema_cache.h"
+#include "olap/tablet_schema_cache.h"
 #include "runtime/exec_env.h"
 #include "runtime/memory/global_memory_arbitrator.h"
 #include "runtime/memory/mem_tracker_limiter.h"
@@ -28,6 +31,9 @@ namespace doris {
 
 static bvar::Adder<int64_t> 
memory_all_tracked_sum_bytes("memory_all_tracked_sum_bytes");
 static bvar::Adder<int64_t> 
memory_global_trackers_sum_bytes("memory_global_trackers_sum_bytes");
+static bvar::Adder<int64_t> memory_metadata_trackers_sum_bytes(
+        "memory_metadata_trackers_sum_bytes");
+static bvar::Adder<int64_t> 
memory_cache_trackers_sum_bytes("memory_cache_trackers_sum_bytes");
 static bvar::Adder<int64_t> 
memory_query_trackers_sum_bytes("memory_query_trackers_sum_bytes");
 static bvar::Adder<int64_t> 
memory_load_trackers_sum_bytes("memory_load_trackers_sum_bytes");
 static bvar::Adder<int64_t> memory_compaction_trackers_sum_bytes(
@@ -40,140 +46,122 @@ static bvar::Adder<int64_t> 
memory_all_tasks_memory_bytes("memory_all_tasks_memo
 static bvar::Adder<int64_t> 
memory_untracked_memory_bytes("memory_untracked_memory_bytes");
 
 MemoryProfile::MemoryProfile() {
-    
_memory_overview_profile.set(std::make_unique<RuntimeProfile>("MemoryOverviewSnapshot"));
+#ifdef ADDRESS_SANITIZER
+    _memory_overview_profile = 
std::make_unique<RuntimeProfile>("[ASAN]MemoryOverviewSnapshot");
+#else
+    _memory_overview_profile = 
std::make_unique<RuntimeProfile>("MemoryOverviewSnapshot");
+#endif
     
_global_memory_profile.set(std::make_unique<RuntimeProfile>("GlobalMemorySnapshot"));
+    
_metadata_memory_profile.set(std::make_unique<RuntimeProfile>("MetadataMemorySnapshot"));
+    
_cache_memory_profile.set(std::make_unique<RuntimeProfile>("CacheMemorySnapshot"));
     
_top_memory_tasks_profile.set(std::make_unique<RuntimeProfile>("TopMemoryTasksSnapshot"));
     
_tasks_memory_profile.set(std::make_unique<RuntimeProfile>("TasksMemorySnapshot"));
+    init_memory_overview_counter();
 }
 
-void MemoryProfile::refresh_memory_overview_profile() {
-#ifdef ADDRESS_SANITIZER
-    std::unique_ptr<RuntimeProfile> memory_overview_profile =
-            std::make_unique<RuntimeProfile>("[ASAN]MemoryOverviewSnapshot");
-#else
-    std::unique_ptr<RuntimeProfile> memory_overview_profile =
-            std::make_unique<RuntimeProfile>("MemoryOverviewSnapshot");
-#endif
-    std::unique_ptr<RuntimeProfile> global_memory_profile =
-            std::make_unique<RuntimeProfile>("GlobalMemorySnapshot");
-    std::unique_ptr<RuntimeProfile> top_memory_tasks_profile =
-            std::make_unique<RuntimeProfile>("TopMemoryTasksSnapshot");
-
-    // 1. create profile
+void MemoryProfile::init_memory_overview_counter() {
     RuntimeProfile* untracked_memory_profile =
-            memory_overview_profile->create_child("UntrackedMemory", true, 
false);
+            _memory_overview_profile->create_child("UntrackedMemory", true, 
false);
     RuntimeProfile* tracked_memory_profile =
-            memory_overview_profile->create_child("TrackedMemory", true, 
false);
+            _memory_overview_profile->create_child("TrackedMemory", true, 
false);
     RuntimeProfile* tasks_memory_overview_profile =
             tracked_memory_profile->create_child("TasksMemory", true, false);
     RuntimeProfile* tasks_memory_overview_details_profile =
             tasks_memory_overview_profile->create_child("Details", true, 
false);
     RuntimeProfile* global_memory_overview_profile =
             tracked_memory_profile->create_child("GlobalMemory", true, false);
+    RuntimeProfile* metadata_memory_overview_profile =
+            tracked_memory_profile->create_child("MetadataMemory", true, 
false);
+    RuntimeProfile* cache_memory_overview_profile =
+            tracked_memory_profile->create_child("CacheMemory", true, false);
     RuntimeProfile* jemalloc_memory_profile =
             tracked_memory_profile->create_child("JemallocMemory", true, 
false);
     RuntimeProfile* jemalloc_memory_details_profile =
             jemalloc_memory_profile->create_child("Details", true, false);
 
-    // 2. add counter
-    // 2.1 add process memory counter
-    RuntimeProfile::Counter* process_physical_memory_current_usage_counter =
-            ADD_COUNTER(memory_overview_profile, "PhysicalMemory(VmRSS)", 
TUnit::BYTES);
-    RuntimeProfile::Counter* process_physical_memory_peak_usage_counter =
-            
memory_overview_profile->AddHighWaterMarkCounter("PhysicalMemoryPeak", 
TUnit::BYTES);
-    RuntimeProfile::Counter* process_virtual_memory_current_usage_counter =
-            ADD_COUNTER(memory_overview_profile, "VirtualMemory(VmSize)", 
TUnit::BYTES);
-    RuntimeProfile::Counter* process_virtual_memory_peak_usage_counter =
-            
memory_overview_profile->AddHighWaterMarkCounter("VirtualMemoryPeak", 
TUnit::BYTES);
-
-    // 2.2 add untracked memory counter
-    RuntimeProfile::Counter* untracked_memory_current_usage_counter =
-            ADD_COUNTER(untracked_memory_profile, "CurrentUsage", 
TUnit::BYTES);
-    RuntimeProfile::Counter* untracked_memory_peak_usage_counter =
-            untracked_memory_profile->AddHighWaterMarkCounter("PeakUsage", 
TUnit::BYTES);
-
-    // 2.3 add tracked memory counter
-    RuntimeProfile::Counter* tracked_memory_current_usage_counter =
-            ADD_COUNTER(tracked_memory_profile, "CurrentUsage", TUnit::BYTES);
-    RuntimeProfile::Counter* tracked_memory_peak_usage_counter =
-            tracked_memory_profile->AddHighWaterMarkCounter("PeakUsage", 
TUnit::BYTES);
-
-    // 2.4 add jemalloc memory counter
-    RuntimeProfile::Counter* jemalloc_memory_current_usage_counter =
-            ADD_COUNTER(jemalloc_memory_profile, "CurrentUsage", TUnit::BYTES);
-    RuntimeProfile::Counter* jemalloc_memory_peak_usage_counter =
-            jemalloc_memory_profile->AddHighWaterMarkCounter("PeakUsage", 
TUnit::BYTES);
-    RuntimeProfile::Counter* jemalloc_cache_current_usage_counter =
-            ADD_COUNTER(jemalloc_memory_details_profile, "Cache", 
TUnit::BYTES);
-    RuntimeProfile::Counter* jemalloc_cache_peak_usage_counter =
-            
jemalloc_memory_details_profile->AddHighWaterMarkCounter("CachePeak", 
TUnit::BYTES);
-    RuntimeProfile::Counter* jemalloc_metadata_current_usage_counter =
-            ADD_COUNTER(jemalloc_memory_details_profile, "Metadata", 
TUnit::BYTES);
-    RuntimeProfile::Counter* jemalloc_metadata_peak_usage_counter =
-            
jemalloc_memory_details_profile->AddHighWaterMarkCounter("MetadataPeak", 
TUnit::BYTES);
-
-    // 2.5 add global memory counter
-    RuntimeProfile::Counter* global_current_usage_counter =
-            ADD_COUNTER(global_memory_overview_profile, "CurrentUsage", 
TUnit::BYTES);
-    RuntimeProfile::Counter* global_peak_usage_counter =
-            
global_memory_overview_profile->AddHighWaterMarkCounter("PeakUsage", 
TUnit::BYTES);
-
-    // 2.6 add tasks memory counter
-    RuntimeProfile::Counter* tasks_memory_current_usage_counter =
-            ADD_COUNTER_WITH_LEVEL(tasks_memory_overview_profile, 
"CurrentUsage", TUnit::BYTES, 1);
+    // 1 add process memory counter
+    _process_physical_memory_usage_counter = 
_memory_overview_profile->AddHighWaterMarkCounter(
+            "PhysicalMemory(VmRSS)", TUnit::BYTES);
+    _process_virtual_memory_usage_counter = 
_memory_overview_profile->AddHighWaterMarkCounter(
+            "VirtualMemory(VmSize)", TUnit::BYTES);
+
+    // 2 add untracked/tracked memory counter
+    _untracked_memory_usage_counter =
+            untracked_memory_profile->AddHighWaterMarkCounter("Memory", 
TUnit::BYTES);
+    _tracked_memory_usage_counter =
+            tracked_memory_profile->AddHighWaterMarkCounter("Memory", 
TUnit::BYTES);
+
+    // 3 add Jemalloc memory counter
+    _jemalloc_memory_usage_counter =
+            jemalloc_memory_profile->AddHighWaterMarkCounter("Memory", 
TUnit::BYTES);
+    _jemalloc_cache_usage_counter =
+            jemalloc_memory_details_profile->AddHighWaterMarkCounter("Cache", 
TUnit::BYTES);
+    _jemalloc_metadata_usage_counter =
+            
jemalloc_memory_details_profile->AddHighWaterMarkCounter("Metadata", 
TUnit::BYTES);
+
+    // 4 add global/metadata/cache memory counter
+    _global_usage_counter =
+            global_memory_overview_profile->AddHighWaterMarkCounter("Memory", 
TUnit::BYTES);
+    _metadata_usage_counter =
+            
metadata_memory_overview_profile->AddHighWaterMarkCounter("Memory", 
TUnit::BYTES);
+    _cache_usage_counter =
+            cache_memory_overview_profile->AddHighWaterMarkCounter("Memory", 
TUnit::BYTES);
+
+    // 5 add tasks memory counter
+    _tasks_memory_usage_counter =
+            tasks_memory_overview_profile->AddHighWaterMarkCounter("Memory", 
TUnit::BYTES);
     // Reserved memory is the sum of all task reserved memory, is duplicated 
with all task memory counter.
-    RuntimeProfile::Counter* reserved_memory_current_usage_counter = 
ADD_CHILD_COUNTER_WITH_LEVEL(
-            tasks_memory_overview_profile, "ReservedMemory", TUnit::BYTES, 
"CurrentUsage", 1);
-    RuntimeProfile::Counter* reserved_memory_peak_usage_counter =
-            
tasks_memory_overview_profile->AddHighWaterMarkCounter("ReservedMemoryPeak",
-                                                                   
TUnit::BYTES, "CurrentUsage", 1);
-    RuntimeProfile::Counter* tasks_memory_peak_usage_counter =
-            
tasks_memory_overview_profile->AddHighWaterMarkCounter("PeakUsage", 
TUnit::BYTES);
-    RuntimeProfile::Counter* query_current_usage_counter =
-            ADD_COUNTER_WITH_LEVEL(tasks_memory_overview_details_profile, 
"Query", TUnit::BYTES, 1);
-    RuntimeProfile::Counter* query_peak_usage_counter =
-            tasks_memory_overview_details_profile->AddHighWaterMarkCounter(
-                    "QueryPeak", TUnit::BYTES, "Query", 1);
-    RuntimeProfile::Counter* load_current_usage_counter =
-            ADD_COUNTER_WITH_LEVEL(tasks_memory_overview_details_profile, 
"Load", TUnit::BYTES, 1);
-    RuntimeProfile::Counter* load_peak_usage_counter =
-            
tasks_memory_overview_details_profile->AddHighWaterMarkCounter("LoadPeak", 
TUnit::BYTES,
-                                                                           
"Load", 1);
-    RuntimeProfile::Counter* load_all_memtables_current_usage_counter =
-            ADD_CHILD_COUNTER_WITH_LEVEL(tasks_memory_overview_details_profile,
-                                         "AllMemTablesMemory", TUnit::BYTES, 
"Load", 1);
-    RuntimeProfile::Counter* load_all_memtables_peak_usage_counter =
-            ADD_CHILD_COUNTER_WITH_LEVEL(tasks_memory_overview_details_profile,
-                                         "AllMemTablesMemoryPeak", 
TUnit::BYTES, "Load", 1);
-    RuntimeProfile::Counter* compaction_current_usage_counter = 
ADD_COUNTER_WITH_LEVEL(
-            tasks_memory_overview_details_profile, "Compaction", TUnit::BYTES, 
1);
-    RuntimeProfile::Counter* compaction_peak_usage_counter =
-            tasks_memory_overview_details_profile->AddHighWaterMarkCounter(
-                    "CompactionPeak", TUnit::BYTES, "Compaction", 1);
-    RuntimeProfile::Counter* schema_change_current_usage_counter = 
ADD_COUNTER_WITH_LEVEL(
-            tasks_memory_overview_details_profile, "SchemaChange", 
TUnit::BYTES, 1);
-    RuntimeProfile::Counter* schema_change_peak_usage_counter =
-            tasks_memory_overview_details_profile->AddHighWaterMarkCounter(
-                    "SchemaChangePeak", TUnit::BYTES, "SchemaChange", 1);
-    RuntimeProfile::Counter* other_current_usage_counter =
-            ADD_COUNTER_WITH_LEVEL(tasks_memory_overview_details_profile, 
"Other", TUnit::BYTES, 1);
-    RuntimeProfile::Counter* other_peak_usage_counter =
-            tasks_memory_overview_details_profile->AddHighWaterMarkCounter(
-                    "OtherPeak", TUnit::BYTES, "Other", 1);
-    // 3. refresh counter
-    // 3.1 refresh process memory counter
-    COUNTER_SET(process_physical_memory_current_usage_counter,
+    _reserved_memory_usage_counter = 
tasks_memory_overview_profile->AddHighWaterMarkCounter(
+            "ReservedMemory", TUnit::BYTES, "Memory", 1);
+    _query_usage_counter =
+            
tasks_memory_overview_details_profile->AddHighWaterMarkCounter("Query", 
TUnit::BYTES);
+    _load_usage_counter =
+            
tasks_memory_overview_details_profile->AddHighWaterMarkCounter("Load", 
TUnit::BYTES);
+    _load_all_memtables_usage_counter =
+            
tasks_memory_overview_details_profile->AddHighWaterMarkCounter("AllMemTablesMemory",
+                                                                           
TUnit::BYTES, "Load", 1);
+    _compaction_usage_counter = 
tasks_memory_overview_details_profile->AddHighWaterMarkCounter(
+            "Compaction", TUnit::BYTES);
+    _schema_change_usage_counter = 
tasks_memory_overview_details_profile->AddHighWaterMarkCounter(
+            "SchemaChange", TUnit::BYTES);
+    _other_usage_counter =
+            
tasks_memory_overview_details_profile->AddHighWaterMarkCounter("Other", 
TUnit::BYTES);
+}
+
+void MemoryProfile::refresh_memory_overview_profile() {
+    // 1 create profile
+    std::unique_ptr<RuntimeProfile> global_memory_profile =
+            std::make_unique<RuntimeProfile>("GlobalMemorySnapshot");
+    std::unique_ptr<RuntimeProfile> metadata_memory_profile =
+            std::make_unique<RuntimeProfile>("MetadataMemorySnapshot");
+    std::unique_ptr<RuntimeProfile> cache_memory_profile =
+            std::make_unique<RuntimeProfile>("CacheMemorySnapshot");
+    std::unique_ptr<RuntimeProfile> top_memory_tasks_profile =
+            std::make_unique<RuntimeProfile>("TopMemoryTasksSnapshot");
+
+    // 2 refresh process memory counter
+    COUNTER_SET(_process_physical_memory_usage_counter,
                 PerfCounters::get_vm_rss()); // from /proc VmRSS VmHWM
-    COUNTER_SET(process_physical_memory_peak_usage_counter, 
PerfCounters::get_vm_hwm());
-    COUNTER_SET(process_virtual_memory_current_usage_counter,
+    COUNTER_SET(_process_virtual_memory_usage_counter,
                 PerfCounters::get_vm_size()); // from /proc VmSize VmPeak
-    COUNTER_SET(process_virtual_memory_peak_usage_counter, 
PerfCounters::get_vm_peak());
 
-    // 3.2 refresh tracked memory counter
+    // 2 refresh metadata memory tracker
+    ExecEnv::GetInstance()->tablets_no_cache_mem_tracker()->set_consumption(
+            MetadataAdder<TabletMeta>::get_all_tablets_size() -
+            TabletSchemaCache::instance()->value_mem_consumption() -
+            SchemaCache::instance()->value_mem_consumption());
+    ExecEnv::GetInstance()->rowsets_no_cache_mem_tracker()->set_consumption(
+            MetadataAdder<RowsetMeta>::get_all_rowsets_size());
+    ExecEnv::GetInstance()->segments_no_cache_mem_tracker()->set_consumption(
+            
MetadataAdder<segment_v2::Segment>::get_all_segments_estimate_size() -
+            SegmentLoader::instance()->cache_mem_usage());
+
+    // 4 refresh tracked memory counter
     std::unordered_map<MemTrackerLimiter::Type, int64_t> type_mem_sum = {
             {MemTrackerLimiter::Type::GLOBAL, 0},        
{MemTrackerLimiter::Type::QUERY, 0},
             {MemTrackerLimiter::Type::LOAD, 0},          
{MemTrackerLimiter::Type::COMPACTION, 0},
-            {MemTrackerLimiter::Type::SCHEMA_CHANGE, 0}, 
{MemTrackerLimiter::Type::OTHER, 0}};
+            {MemTrackerLimiter::Type::SCHEMA_CHANGE, 0}, 
{MemTrackerLimiter::Type::METADATA, 0},
+            {MemTrackerLimiter::Type::CACHE, 0},         
{MemTrackerLimiter::Type::OTHER, 0}};
     // always ExecEnv::ready(), because Daemon::_stop_background_threads_latch
     for (auto& group : ExecEnv::GetInstance()->mem_tracker_limiter_pool) {
         std::lock_guard<std::mutex> l(group.group_lock);
@@ -191,42 +179,46 @@ void MemoryProfile::refresh_memory_overview_profile() {
         all_tracked_mem_sum += it.second;
         switch (it.first) {
         case MemTrackerLimiter::Type::GLOBAL:
-            COUNTER_SET(global_current_usage_counter, it.second);
-            COUNTER_SET(global_peak_usage_counter, it.second);
+            COUNTER_SET(_global_usage_counter, it.second);
             memory_global_trackers_sum_bytes
                     << it.second - 
memory_global_trackers_sum_bytes.get_value();
             break;
         case MemTrackerLimiter::Type::QUERY:
-            COUNTER_SET(query_current_usage_counter, it.second);
-            COUNTER_SET(query_peak_usage_counter, it.second);
+            COUNTER_SET(_query_usage_counter, it.second);
             tasks_trackers_mem_sum += it.second;
             memory_query_trackers_sum_bytes
                     << it.second - memory_query_trackers_sum_bytes.get_value();
             break;
         case MemTrackerLimiter::Type::LOAD:
-            COUNTER_SET(load_current_usage_counter, it.second);
-            COUNTER_SET(load_peak_usage_counter, it.second);
+            COUNTER_SET(_load_usage_counter, it.second);
             tasks_trackers_mem_sum += it.second;
             memory_load_trackers_sum_bytes
                     << it.second - memory_load_trackers_sum_bytes.get_value();
             break;
         case MemTrackerLimiter::Type::COMPACTION:
-            COUNTER_SET(compaction_current_usage_counter, it.second);
-            COUNTER_SET(compaction_peak_usage_counter, it.second);
+            COUNTER_SET(_compaction_usage_counter, it.second);
             tasks_trackers_mem_sum += it.second;
             memory_compaction_trackers_sum_bytes
                     << it.second - 
memory_compaction_trackers_sum_bytes.get_value();
             break;
         case MemTrackerLimiter::Type::SCHEMA_CHANGE:
-            COUNTER_SET(schema_change_current_usage_counter, it.second);
-            COUNTER_SET(schema_change_peak_usage_counter, it.second);
+            COUNTER_SET(_schema_change_usage_counter, it.second);
             tasks_trackers_mem_sum += it.second;
             memory_schema_change_trackers_sum_bytes
                     << it.second - 
memory_schema_change_trackers_sum_bytes.get_value();
             break;
+        case MemTrackerLimiter::Type::METADATA:
+            COUNTER_SET(_metadata_usage_counter, it.second);
+            memory_metadata_trackers_sum_bytes
+                    << it.second - 
memory_metadata_trackers_sum_bytes.get_value();
+            break;
+        case MemTrackerLimiter::Type::CACHE:
+            COUNTER_SET(_cache_usage_counter, it.second);
+            memory_cache_trackers_sum_bytes
+                    << it.second - memory_cache_trackers_sum_bytes.get_value();
+            break;
         case MemTrackerLimiter::Type::OTHER:
-            COUNTER_SET(other_current_usage_counter, it.second);
-            COUNTER_SET(other_peak_usage_counter, it.second);
+            COUNTER_SET(_other_usage_counter, it.second);
             tasks_trackers_mem_sum += it.second;
             memory_other_trackers_sum_bytes
                     << it.second - memory_other_trackers_sum_bytes.get_value();
@@ -235,60 +227,52 @@ void MemoryProfile::refresh_memory_overview_profile() {
 
     MemTrackerLimiter::make_type_trackers_profile(global_memory_profile.get(),
                                                   
MemTrackerLimiter::Type::GLOBAL);
+    
MemTrackerLimiter::make_type_trackers_profile(metadata_memory_profile.get(),
+                                                  
MemTrackerLimiter::Type::METADATA);
+    MemTrackerLimiter::make_type_trackers_profile(cache_memory_profile.get(),
+                                                  
MemTrackerLimiter::Type::CACHE);
 
     
MemTrackerLimiter::make_top_consumption_tasks_tracker_profile(top_memory_tasks_profile.get(),
                                                                   15);
 
-    COUNTER_SET(tasks_memory_current_usage_counter, tasks_trackers_mem_sum);
-    COUNTER_SET(tasks_memory_peak_usage_counter, tasks_trackers_mem_sum);
+    COUNTER_SET(_tasks_memory_usage_counter, tasks_trackers_mem_sum);
     memory_all_tasks_memory_bytes << tasks_trackers_mem_sum -
                                              
memory_all_tasks_memory_bytes.get_value();
 
-    COUNTER_SET(reserved_memory_current_usage_counter,
-                GlobalMemoryArbitrator::process_reserved_memory());
-    COUNTER_SET(reserved_memory_peak_usage_counter,
-                GlobalMemoryArbitrator::process_reserved_memory());
+    COUNTER_SET(_reserved_memory_usage_counter, 
GlobalMemoryArbitrator::process_reserved_memory());
     memory_reserved_memory_bytes << 
GlobalMemoryArbitrator::process_reserved_memory() -
                                             
memory_reserved_memory_bytes.get_value();
 
     all_tracked_mem_sum += MemInfo::allocator_cache_mem();
-    COUNTER_SET(jemalloc_cache_current_usage_counter,
-                static_cast<int64_t>(MemInfo::allocator_cache_mem()));
-    COUNTER_SET(jemalloc_cache_peak_usage_counter,
+    COUNTER_SET(_jemalloc_cache_usage_counter,
                 static_cast<int64_t>(MemInfo::allocator_cache_mem()));
     all_tracked_mem_sum += MemInfo::allocator_metadata_mem();
-    COUNTER_SET(jemalloc_metadata_current_usage_counter,
-                static_cast<int64_t>(MemInfo::allocator_metadata_mem()));
-    COUNTER_SET(jemalloc_metadata_peak_usage_counter,
+    COUNTER_SET(_jemalloc_metadata_usage_counter,
                 static_cast<int64_t>(MemInfo::allocator_metadata_mem()));
-    COUNTER_SET(jemalloc_memory_current_usage_counter,
-                jemalloc_cache_current_usage_counter->value() +
-                        jemalloc_metadata_current_usage_counter->value());
-    COUNTER_SET(jemalloc_memory_peak_usage_counter,
-                jemalloc_cache_current_usage_counter->value() +
-                        jemalloc_metadata_current_usage_counter->value());
-
-    COUNTER_SET(tracked_memory_current_usage_counter, all_tracked_mem_sum);
-    COUNTER_SET(tracked_memory_peak_usage_counter, all_tracked_mem_sum);
+    COUNTER_SET(_jemalloc_memory_usage_counter,
+                _jemalloc_cache_usage_counter->current_value() +
+                        _jemalloc_metadata_usage_counter->current_value());
+
+    COUNTER_SET(_tracked_memory_usage_counter, all_tracked_mem_sum);
     memory_all_tracked_sum_bytes << all_tracked_mem_sum - 
memory_all_tracked_sum_bytes.get_value();
 
-    // 3.3 refresh untracked memory counter
+    // 5 refresh untracked memory counter
     int64_t untracked_memory =
-            process_physical_memory_current_usage_counter->value() - 
all_tracked_mem_sum;
-    COUNTER_SET(untracked_memory_current_usage_counter, untracked_memory);
-    COUNTER_SET(untracked_memory_peak_usage_counter, untracked_memory);
+            _process_physical_memory_usage_counter->current_value() - 
all_tracked_mem_sum;
+    COUNTER_SET(_untracked_memory_usage_counter, untracked_memory);
     memory_untracked_memory_bytes << untracked_memory - 
memory_untracked_memory_bytes.get_value();
 
-    // 3.4 refresh additional tracker printed when memory exceeds limit.
-    COUNTER_SET(load_all_memtables_current_usage_counter,
-                
ExecEnv::GetInstance()->memtable_memory_limiter()->mem_tracker()->consumption());
+    // 6 refresh additional tracker printed when memory exceeds limit.
     COUNTER_SET(
-            load_all_memtables_peak_usage_counter,
+            _load_all_memtables_usage_counter,
             
ExecEnv::GetInstance()->memtable_memory_limiter()->mem_tracker()->peak_consumption());
+    COUNTER_SET(_load_all_memtables_usage_counter,
+                
ExecEnv::GetInstance()->memtable_memory_limiter()->mem_tracker()->consumption());
 
-    // 4. reset profile
-    _memory_overview_profile.set(std::move(memory_overview_profile));
+    // 7. reset profile
     _global_memory_profile.set(std::move(global_memory_profile));
+    _metadata_memory_profile.set(std::move(metadata_memory_profile));
+    _cache_memory_profile.set(std::move(cache_memory_profile));
     _top_memory_tasks_profile.set(std::move(top_memory_tasks_profile));
 }
 
@@ -302,16 +286,25 @@ void MemoryProfile::refresh_tasks_memory_profile() {
 void MemoryProfile::make_memory_profile(RuntimeProfile* profile) const {
     RuntimeProfile* memory_profile_snapshot = 
profile->create_child("MemoryProfile", true, false);
 
-    auto memory_overview_version_ptr = _memory_overview_profile.get();
     RuntimeProfile* memory_overview_profile =
-            
memory_profile_snapshot->create_child(memory_overview_version_ptr->name(), 
true, false);
-    
memory_overview_profile->merge(const_cast<RuntimeProfile*>(memory_overview_version_ptr.get()));
+            
memory_profile_snapshot->create_child(_memory_overview_profile->name(), true, 
false);
+    
memory_overview_profile->merge(const_cast<RuntimeProfile*>(_memory_overview_profile.get()));
 
     auto global_memory_version_ptr = _global_memory_profile.get();
     RuntimeProfile* global_memory_profile =
             
memory_profile_snapshot->create_child(global_memory_version_ptr->name(), true, 
false);
     
global_memory_profile->merge(const_cast<RuntimeProfile*>(global_memory_version_ptr.get()));
 
+    auto metadata_memory_version_ptr = _metadata_memory_profile.get();
+    RuntimeProfile* metadata_memory_profile =
+            
memory_profile_snapshot->create_child(metadata_memory_version_ptr->name(), 
true, false);
+    
metadata_memory_profile->merge(const_cast<RuntimeProfile*>(metadata_memory_version_ptr.get()));
+
+    auto cache_memory_version_ptr = _cache_memory_profile.get();
+    RuntimeProfile* cache_memory_profile =
+            
memory_profile_snapshot->create_child(cache_memory_version_ptr->name(), true, 
false);
+    
cache_memory_profile->merge(const_cast<RuntimeProfile*>(cache_memory_version_ptr.get()));
+
     auto top_memory_tasks_version_ptr = _top_memory_tasks_profile.get();
     RuntimeProfile* top_memory_tasks_profile = 
memory_profile_snapshot->create_child(
             top_memory_tasks_version_ptr->name(), true, false);
@@ -346,6 +339,8 @@ void MemoryProfile::print_log_process_usage() {
         LOG(WARNING) << "Process Memory Summary: " + 
GlobalMemoryArbitrator::process_mem_log_str();
         LOG(WARNING) << "\n" << print_memory_overview_profile();
         LOG(WARNING) << "\n" << print_global_memory_profile();
+        LOG(WARNING) << "\n" << print_metadata_memory_profile();
+        LOG(WARNING) << "\n" << print_cache_memory_profile();
         LOG(WARNING) << "\n" << print_top_memory_tasks_profile();
     }
 }
diff --git a/be/src/runtime/memory/memory_profile.h 
b/be/src/runtime/memory/memory_profile.h
index 9f1bab0c02a..c6aefb72f22 100644
--- a/be/src/runtime/memory/memory_profile.h
+++ b/be/src/runtime/memory/memory_profile.h
@@ -33,31 +33,27 @@ public:
     void make_memory_profile(RuntimeProfile* profile) const;
 
     std::string print_memory_overview_profile() const {
-        std::stringstream ss;
-        auto version_ptr = _memory_overview_profile.get();
-        version_ptr->pretty_print(&ss);
-        return ss.str();
+        return return_memory_profile_str(_memory_overview_profile.get());
     }
 
     std::string print_global_memory_profile() const {
-        std::stringstream ss;
-        auto version_ptr = _global_memory_profile.get();
-        version_ptr->pretty_print(&ss);
-        return ss.str();
+        return return_memory_profile_str(_global_memory_profile.get().get());
+    }
+
+    std::string print_metadata_memory_profile() const {
+        return return_memory_profile_str(_metadata_memory_profile.get().get());
+    }
+
+    std::string print_cache_memory_profile() const {
+        return return_memory_profile_str(_cache_memory_profile.get().get());
     }
 
     std::string print_top_memory_tasks_profile() const {
-        std::stringstream ss;
-        auto version_ptr = _top_memory_tasks_profile.get();
-        version_ptr->pretty_print(&ss);
-        return ss.str();
+        return 
return_memory_profile_str(_top_memory_tasks_profile.get().get());
     }
 
     std::string print_tasks_memory_profile() const {
-        std::stringstream ss;
-        auto version_ptr = _tasks_memory_profile.get();
-        version_ptr->pretty_print(&ss);
-        return ss.str();
+        return return_memory_profile_str(_tasks_memory_profile.get().get());
     }
 
     static int64_t query_current_usage();
@@ -71,11 +67,50 @@ public:
     void print_log_process_usage();
 
 private:
-    MultiVersion<RuntimeProfile> _memory_overview_profile;
+    std::string return_memory_profile_str(const RuntimeProfile* profile) const 
{
+        std::stringstream ss;
+        profile->pretty_print(&ss);
+        return ss.str();
+    }
+
+    void init_memory_overview_counter();
+
+    std::unique_ptr<RuntimeProfile> _memory_overview_profile;
     MultiVersion<RuntimeProfile> _global_memory_profile;
+    MultiVersion<RuntimeProfile> _metadata_memory_profile;
+    MultiVersion<RuntimeProfile> _cache_memory_profile;
     MultiVersion<RuntimeProfile> _top_memory_tasks_profile;
     MultiVersion<RuntimeProfile> _tasks_memory_profile;
 
+    // process memory counter
+    RuntimeProfile::HighWaterMarkCounter* 
_process_physical_memory_usage_counter;
+    RuntimeProfile::HighWaterMarkCounter* 
_process_virtual_memory_usage_counter;
+
+    // untracked/tracked memory counter
+    RuntimeProfile::HighWaterMarkCounter* _untracked_memory_usage_counter;
+    RuntimeProfile::HighWaterMarkCounter* _tracked_memory_usage_counter;
+
+    // Jemalloc memory counter
+    RuntimeProfile::HighWaterMarkCounter* _jemalloc_memory_usage_counter;
+    RuntimeProfile::HighWaterMarkCounter* _jemalloc_cache_usage_counter;
+    RuntimeProfile::HighWaterMarkCounter* _jemalloc_metadata_usage_counter;
+
+    // global/metadata/cache memory counter
+    RuntimeProfile::HighWaterMarkCounter* _global_usage_counter;
+    RuntimeProfile::HighWaterMarkCounter* _metadata_usage_counter;
+    RuntimeProfile::HighWaterMarkCounter* _cache_usage_counter;
+
+    // tasks memory counter
+    RuntimeProfile::HighWaterMarkCounter* _tasks_memory_usage_counter;
+    // reserved memory is the sum of all task reserved memory, is duplicated 
with all task memory counter.
+    RuntimeProfile::HighWaterMarkCounter* _reserved_memory_usage_counter;
+    RuntimeProfile::HighWaterMarkCounter* _query_usage_counter;
+    RuntimeProfile::HighWaterMarkCounter* _load_usage_counter;
+    RuntimeProfile::HighWaterMarkCounter* _load_all_memtables_usage_counter;
+    RuntimeProfile::HighWaterMarkCounter* _compaction_usage_counter;
+    RuntimeProfile::HighWaterMarkCounter* _schema_change_usage_counter;
+    RuntimeProfile::HighWaterMarkCounter* _other_usage_counter;
+
     std::atomic<bool> _enable_print_log_process_usage {true};
 };
 
diff --git a/be/src/util/runtime_profile.cpp b/be/src/util/runtime_profile.cpp
index 45db607a342..8b9e97bfc9c 100644
--- a/be/src/util/runtime_profile.cpp
+++ b/be/src/util/runtime_profile.cpp
@@ -28,6 +28,7 @@
 #include <algorithm>
 #include <iomanip>
 #include <iostream>
+#include <type_traits>
 
 #include "common/object_pool.h"
 #include "util/container_util.hpp"
@@ -72,8 +73,7 @@ void RuntimeProfile::merge(RuntimeProfile* other) {
             dst_iter = _counter_map.find(src_iter->first);
 
             if (dst_iter == _counter_map.end()) {
-                _counter_map[src_iter->first] = _pool->add(
-                        new Counter(src_iter->second->type(), 
src_iter->second->value()));
+                _counter_map[src_iter->first] = 
_pool->add(src_iter->second->clone());
             } else {
                 DCHECK(dst_iter->second->type() == src_iter->second->type());
 
diff --git a/be/src/util/runtime_profile.h b/be/src/util/runtime_profile.h
index 6e393ac673a..7130acbd2f9 100644
--- a/be/src/util/runtime_profile.h
+++ b/be/src/util/runtime_profile.h
@@ -100,6 +100,8 @@ public:
                 : _value(value), _type(type), _level(level) {}
         virtual ~Counter() = default;
 
+        virtual Counter* clone() const { return new Counter(type(), value(), 
_level); }
+
         virtual void update(int64_t delta) { _value.fetch_add(delta, 
std::memory_order_relaxed); }
 
         void bit_or(int64_t delta) { _value.fetch_or(delta, 
std::memory_order_relaxed); }
@@ -137,7 +139,7 @@ public:
 
         TUnit::type type() const { return _type; }
 
-        virtual int64_t level() { return _level; }
+        virtual int64_t level() const { return _level; }
 
     private:
         friend class RuntimeProfile;
@@ -151,8 +153,16 @@ public:
     /// as value()) and the current value.
     class HighWaterMarkCounter : public Counter {
     public:
-        HighWaterMarkCounter(TUnit::type unit, int64_t level, const 
std::string& parent_name)
-                : Counter(unit, 0, level), current_value_(0), 
_parent_name(parent_name) {}
+        HighWaterMarkCounter(TUnit::type unit, int64_t level, const 
std::string& parent_name,
+                             int64_t value = 0, int64_t current_value = 0)
+                : Counter(unit, value, level),
+                  current_value_(current_value),
+                  _parent_name(parent_name) {}
+
+        virtual Counter* clone() const override {
+            return new HighWaterMarkCounter(type(), level(), parent_name(), 
value(),
+                                            current_value());
+        }
 
         void add(int64_t delta) {
             current_value_.fetch_add(delta, std::memory_order_relaxed);
@@ -188,10 +198,9 @@ public:
         virtual void pretty_print(std::ostream* s, const std::string& prefix,
                                   const std::string& name) const override {
             std::ostream& stream = *s;
-            stream << prefix << "   - " << name << ": "
-                   << PrettyPrinter::print(current_value(), type()) << 
std::endl;
-            stream << prefix << "      - " << name << "Peak: "
-                   << 
PrettyPrinter::print(_value.load(std::memory_order_relaxed), type())
+            stream << prefix << "   - " << name
+                   << " Current: " << PrettyPrinter::print(current_value(), 
type()) << " (Peak: "
+                   << 
PrettyPrinter::print(_value.load(std::memory_order_relaxed), type()) << ")"
                    << std::endl;
         }
 
@@ -217,6 +226,8 @@ public:
 
         int64_t current_value() const { return 
current_value_.load(std::memory_order_relaxed); }
 
+        std::string parent_name() const { return _parent_name; }
+
     private:
         /// Set '_value' to 'v' if 'v' is larger than '_value'. The entire 
operation is
         /// atomic.
@@ -247,8 +258,13 @@ public:
     // Do not call Set() and Update().
     class DerivedCounter : public Counter {
     public:
-        DerivedCounter(TUnit::type type, const DerivedCounterFunction& 
counter_fn)
-                : Counter(type, 0), _counter_fn(counter_fn) {}
+        DerivedCounter(TUnit::type type, const DerivedCounterFunction& 
counter_fn,
+                       int64_t value = 0, int64_t level = 1)
+                : Counter(type, value, level), _counter_fn(counter_fn) {}
+
+        virtual Counter* clone() const override {
+            return new DerivedCounter(type(), _counter_fn, value(), level());
+        }
 
         int64_t value() const override { return _counter_fn(); }
 
@@ -259,8 +275,13 @@ public:
     // NonZeroCounter will not be converted to Thrift if the value is 0.
     class NonZeroCounter : public Counter {
     public:
-        NonZeroCounter(TUnit::type type, int64_t level, const std::string& 
parent_name)
-                : Counter(type, 0, level), _parent_name(parent_name) {}
+        NonZeroCounter(TUnit::type type, int64_t level, const std::string& 
parent_name,
+                       int64_t value = 0)
+                : Counter(type, value, level), _parent_name(parent_name) {}
+
+        virtual Counter* clone() const override {
+            return new NonZeroCounter(type(), level(), parent_name(), value());
+        }
 
         void to_thrift(const std::string& name, std::vector<TCounter>& 
tcounters,
                        std::map<std::string, std::set<std::string>>& 
child_counters_map) override {
@@ -272,6 +293,8 @@ public:
             }
         }
 
+        std::string parent_name() const { return _parent_name; }
+
     private:
         const std::string _parent_name;
     };
diff --git a/be/src/vec/runtime/partitioner.h b/be/src/vec/runtime/partitioner.h
index 39ed0e89941..d5492c3be87 100644
--- a/be/src/vec/runtime/partitioner.h
+++ b/be/src/vec/runtime/partitioner.h
@@ -23,7 +23,6 @@
 
 namespace doris {
 #include "common/compile_check_begin.h"
-class MemTracker;
 
 namespace vectorized {
 
diff --git a/be/src/vec/sink/vdata_stream_sender.h 
b/be/src/vec/sink/vdata_stream_sender.h
index 5fe35e4da11..4999602fdf4 100644
--- a/be/src/vec/sink/vdata_stream_sender.h
+++ b/be/src/vec/sink/vdata_stream_sender.h
@@ -56,7 +56,6 @@ namespace doris {
 #include "common/compile_check_begin.h"
 class ObjectPool;
 class RuntimeState;
-class MemTracker;
 class RowDescriptor;
 class TDataSink;
 class TDataStreamSink;


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org


Reply via email to