gianm closed pull request #6357: Improve interning in SQLMetadataSegmentManager
URL: https://github.com/apache/incubator-druid/pull/6357
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/server/src/main/java/org/apache/druid/client/DruidDataSource.java 
b/server/src/main/java/org/apache/druid/client/DruidDataSource.java
index d280e30a5b7..ee8b574eaab 100644
--- a/server/src/main/java/org/apache/druid/client/DruidDataSource.java
+++ b/server/src/main/java/org/apache/druid/client/DruidDataSource.java
@@ -23,6 +23,7 @@
 import com.google.common.base.Preconditions;
 import org.apache.druid.timeline.DataSegment;
 
+import javax.annotation.Nullable;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.Map;
@@ -63,6 +64,12 @@ public String getName()
     return Collections.unmodifiableCollection(idToSegmentMap.values());
   }
 
+  @Nullable
+  public DataSegment getSegment(String segmentId)
+  {
+    return idToSegmentMap.get(segmentId);
+  }
+
   public DruidDataSource addSegment(DataSegment dataSegment)
   {
     idToSegmentMap.put(dataSegment.getIdentifier(), dataSegment);
diff --git 
a/server/src/main/java/org/apache/druid/metadata/SQLMetadataSegmentManager.java 
b/server/src/main/java/org/apache/druid/metadata/SQLMetadataSegmentManager.java
index c072ee6717d..acf149445be 100644
--- 
a/server/src/main/java/org/apache/druid/metadata/SQLMetadataSegmentManager.java
+++ 
b/server/src/main/java/org/apache/druid/metadata/SQLMetadataSegmentManager.java
@@ -25,8 +25,6 @@
 import com.google.common.base.Throwables;
 import com.google.common.collect.Collections2;
 import com.google.common.collect.ImmutableMap;
-import com.google.common.collect.Interner;
-import com.google.common.collect.Interners;
 import com.google.common.collect.Iterators;
 import com.google.common.collect.Lists;
 import com.google.inject.Inject;
@@ -82,7 +80,6 @@
 @ManageLifecycle
 public class SQLMetadataSegmentManager implements MetadataSegmentManager
 {
-  private static final Interner<DataSegment> DATA_SEGMENT_INTERNER = 
Interners.newWeakInterner();
   private static final EmittingLogger log = new 
EmittingLogger(SQLMetadataSegmentManager.class);
 
   /**
@@ -232,7 +229,7 @@ public boolean enableDatasource(final String ds)
                       .iterator(),
                   payload -> {
                     try {
-                      return 
DATA_SEGMENT_INTERNER.intern(jsonMapper.readValue(payload, DataSegment.class));
+                      return jsonMapper.readValue(payload, DataSegment.class);
                     }
                     catch (IOException e) {
                       throw new RuntimeException(e);
@@ -466,10 +463,9 @@ public DataSegment map(int index, ResultSet r, 
StatementContext ctx)
                             throws SQLException
                         {
                           try {
-                            return 
DATA_SEGMENT_INTERNER.intern(jsonMapper.readValue(
-                                r.getBytes("payload"),
-                                DataSegment.class
-                            ));
+                            return replaceWithExistingSegmentIfPresent(
+                                jsonMapper.readValue(r.getBytes("payload"), 
DataSegment.class)
+                            );
                           }
                           catch (IOException e) {
                             log.makeAlert(e, "Failed to read segment from 
db.").emit();
@@ -535,6 +531,25 @@ public DataSegment map(int index, ResultSet r, 
StatementContext ctx)
     }
   }
 
+  /**
+   * For the garbage collector in Java, it's better to keep new objects 
short-living, but once they are old enough
+   * (i. e. promoted to old generation), try to keep them alive. In {@link 
#poll()}, we fetch and deserialize all
+   * existing segments each time, and then replace them in {@link 
#dataSourcesRef}. This method allows to use already
+   * existing (old) segments when possible, effectively interning them a-la 
{@link String#intern} or {@link
+   * com.google.common.collect.Interner}, aiming to make the majority of 
{@link DataSegment} objects garbage soon after
+   * they are deserialized and to die in young generation. It allows to avoid 
fragmentation of the old generation and
+   * full GCs.
+   */
+  private DataSegment replaceWithExistingSegmentIfPresent(DataSegment segment)
+  {
+    DruidDataSource dataSource = 
dataSourcesRef.get().get(segment.getDataSource());
+    if (dataSource == null) {
+      return segment;
+    }
+    DataSegment alreadyExistingSegment = 
dataSource.getSegment(segment.getIdentifier());
+    return alreadyExistingSegment != null ? alreadyExistingSegment : segment;
+  }
+
   private String getSegmentsTable()
   {
     return dbTables.get().getSegmentsTable();


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@druid.apache.org
For additional commands, e-mail: commits-h...@druid.apache.org

Reply via email to