stefan-egli commented on code in PR #1314: URL: https://github.com/apache/jackrabbit-oak/pull/1314#discussion_r1497894711
########## oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/util/Utils.java: ########## @@ -274,6 +278,73 @@ private static String diagsForEntry(Map.Entry<String, PropertyStats> member) { } } + /** + * @return cluster if from first revision found in op, {@code -1} otherwise + */ + public static int extractClusterId(UpdateOp op) { + for (Key key : op.getChanges().keySet()) { + if (key.getRevision() != null) { + return key.getRevision().getClusterId(); + } + } + return -1; + } + + /** + * Produce an {@link UpdateOp} suitable for shrinking branch revision entries for given property in {@link Document}, {@code null} otherwise. + * + * @param doc document to inspect for repeated branch commits + * @param propertName property to check for + * @param revisionChecker filter for revisions (for instance, to check for cluster id) + * @return {@link UpdateOp} suitable for shrinking document, {@code null} otherwise + */ + public static @Nullable UpdateOp getShrinkOp(Document doc, String propertyName, Predicate<Revision> revisionChecker) { + Object t_bc = doc.get("_bc"); + Object t_property = doc.get(propertyName); + if (t_bc instanceof Map && t_property instanceof Map) { + @SuppressWarnings("unchecked") + Map<Revision, String> _bc = (Map<Revision, String>)t_bc; + @SuppressWarnings("unchecked") + Map<Revision, String> pMap = (Map<Revision, String>)t_property; + List<Revision> revs = new ArrayList<>(); + for (Map.Entry<Revision, String> en : pMap.entrySet()) { + Revision r = en.getKey(); + if (revisionChecker.apply(r)) { + String bcv = _bc.get(r); + if ("true".equals(bcv)) { + revs.add(r); + } + } + } + // sort by age + Collections.sort(revs, new Comparator<Revision>() { + @Override + public int compare(Revision r1, Revision r2) { + if (r1.getClusterId() != r2.getClusterId()) { + return r1.getClusterId() - r2.getClusterId(); + } else if (r1.getTimestamp() != r2.getTimestamp()) { + return r1.getTimestamp() > r2.getTimestamp() ? 1 : -1; + } else { + return r1.getCounter() - r2.getCounter(); + } + }}); + + UpdateOp clean = new UpdateOp(doc.getId(), false); + Revision last = null; + for (Revision r : revs) { + if (last != null) { + if (last.getClusterId() == r.getClusterId()) { + clean.removeMapEntry(propertyName, last); + } + } + last = r; + } Review Comment: > we might not have done that properly in the DetailedGC effort so far ... taking that back .. the difference between `DetailedGC` and this `runtime GC` case here is : in `DetailedGC` we're only looking at documents that have not been modified for 24+ hours. That means, reading their traversed state with headRevision of "now" is fine. But in this `runtime GC` case here, that is not fine (as we need to respect those 24+ hours worth of MVCC) -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: dev-unsubscr...@jackrabbit.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org