gianm closed pull request #6357: Improve interning in SQLMetadataSegmentManager URL: https://github.com/apache/incubator-druid/pull/6357
This is a PR merged from a forked repository. As GitHub hides the original diff on merge, it is displayed below for the sake of provenance: As this is a foreign pull request (from a fork), the diff is supplied below (as it won't show otherwise due to GitHub magic): diff --git a/server/src/main/java/org/apache/druid/client/DruidDataSource.java b/server/src/main/java/org/apache/druid/client/DruidDataSource.java index d280e30a5b7..ee8b574eaab 100644 --- a/server/src/main/java/org/apache/druid/client/DruidDataSource.java +++ b/server/src/main/java/org/apache/druid/client/DruidDataSource.java @@ -23,6 +23,7 @@ import com.google.common.base.Preconditions; import org.apache.druid.timeline.DataSegment; +import javax.annotation.Nullable; import java.util.Collection; import java.util.Collections; import java.util.Map; @@ -63,6 +64,12 @@ public String getName() return Collections.unmodifiableCollection(idToSegmentMap.values()); } + @Nullable + public DataSegment getSegment(String segmentId) + { + return idToSegmentMap.get(segmentId); + } + public DruidDataSource addSegment(DataSegment dataSegment) { idToSegmentMap.put(dataSegment.getIdentifier(), dataSegment); diff --git a/server/src/main/java/org/apache/druid/metadata/SQLMetadataSegmentManager.java b/server/src/main/java/org/apache/druid/metadata/SQLMetadataSegmentManager.java index c072ee6717d..acf149445be 100644 --- a/server/src/main/java/org/apache/druid/metadata/SQLMetadataSegmentManager.java +++ b/server/src/main/java/org/apache/druid/metadata/SQLMetadataSegmentManager.java @@ -25,8 +25,6 @@ import com.google.common.base.Throwables; import com.google.common.collect.Collections2; import com.google.common.collect.ImmutableMap; -import com.google.common.collect.Interner; -import com.google.common.collect.Interners; import com.google.common.collect.Iterators; import com.google.common.collect.Lists; import com.google.inject.Inject; @@ -82,7 +80,6 @@ @ManageLifecycle public class SQLMetadataSegmentManager implements MetadataSegmentManager { - private static final Interner<DataSegment> DATA_SEGMENT_INTERNER = Interners.newWeakInterner(); private static final EmittingLogger log = new EmittingLogger(SQLMetadataSegmentManager.class); /** @@ -232,7 +229,7 @@ public boolean enableDatasource(final String ds) .iterator(), payload -> { try { - return DATA_SEGMENT_INTERNER.intern(jsonMapper.readValue(payload, DataSegment.class)); + return jsonMapper.readValue(payload, DataSegment.class); } catch (IOException e) { throw new RuntimeException(e); @@ -466,10 +463,9 @@ public DataSegment map(int index, ResultSet r, StatementContext ctx) throws SQLException { try { - return DATA_SEGMENT_INTERNER.intern(jsonMapper.readValue( - r.getBytes("payload"), - DataSegment.class - )); + return replaceWithExistingSegmentIfPresent( + jsonMapper.readValue(r.getBytes("payload"), DataSegment.class) + ); } catch (IOException e) { log.makeAlert(e, "Failed to read segment from db.").emit(); @@ -535,6 +531,25 @@ public DataSegment map(int index, ResultSet r, StatementContext ctx) } } + /** + * For the garbage collector in Java, it's better to keep new objects short-living, but once they are old enough + * (i. e. promoted to old generation), try to keep them alive. In {@link #poll()}, we fetch and deserialize all + * existing segments each time, and then replace them in {@link #dataSourcesRef}. This method allows to use already + * existing (old) segments when possible, effectively interning them a-la {@link String#intern} or {@link + * com.google.common.collect.Interner}, aiming to make the majority of {@link DataSegment} objects garbage soon after + * they are deserialized and to die in young generation. It allows to avoid fragmentation of the old generation and + * full GCs. + */ + private DataSegment replaceWithExistingSegmentIfPresent(DataSegment segment) + { + DruidDataSource dataSource = dataSourcesRef.get().get(segment.getDataSource()); + if (dataSource == null) { + return segment; + } + DataSegment alreadyExistingSegment = dataSource.getSegment(segment.getIdentifier()); + return alreadyExistingSegment != null ? alreadyExistingSegment : segment; + } + private String getSegmentsTable() { return dbTables.get().getSegmentsTable(); ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@druid.apache.org For additional commands, e-mail: commits-h...@druid.apache.org