reschke commented on code in PR #1314: URL: https://github.com/apache/jackrabbit-oak/pull/1314#discussion_r1497853848
########## oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/util/Utils.java: ########## @@ -274,6 +278,73 @@ private static String diagsForEntry(Map.Entry<String, PropertyStats> member) { } } + /** + * @return cluster if from first revision found in op, {@code -1} otherwise + */ + public static int extractClusterId(UpdateOp op) { + for (Key key : op.getChanges().keySet()) { + if (key.getRevision() != null) { + return key.getRevision().getClusterId(); + } + } + return -1; + } + + /** + * Produce an {@link UpdateOp} suitable for shrinking branch revision entries for given property in {@link Document}, {@code null} otherwise. + * + * @param doc document to inspect for repeated branch commits + * @param propertName property to check for + * @param revisionChecker filter for revisions (for instance, to check for cluster id) + * @return {@link UpdateOp} suitable for shrinking document, {@code null} otherwise + */ + public static @Nullable UpdateOp getShrinkOp(Document doc, String propertyName, Predicate<Revision> revisionChecker) { + Object t_bc = doc.get("_bc"); + Object t_property = doc.get(propertyName); + if (t_bc instanceof Map && t_property instanceof Map) { + @SuppressWarnings("unchecked") + Map<Revision, String> _bc = (Map<Revision, String>)t_bc; + @SuppressWarnings("unchecked") + Map<Revision, String> pMap = (Map<Revision, String>)t_property; + List<Revision> revs = new ArrayList<>(); + for (Map.Entry<Revision, String> en : pMap.entrySet()) { + Revision r = en.getKey(); + if (revisionChecker.apply(r)) { + String bcv = _bc.get(r); + if ("true".equals(bcv)) { + revs.add(r); + } + } + } + // sort by age + Collections.sort(revs, new Comparator<Revision>() { + @Override + public int compare(Revision r1, Revision r2) { + if (r1.getClusterId() != r2.getClusterId()) { + return r1.getClusterId() - r2.getClusterId(); + } else if (r1.getTimestamp() != r2.getTimestamp()) { + return r1.getTimestamp() > r2.getTimestamp() ? 1 : -1; + } else { + return r1.getCounter() - r2.getCounter(); + } + }}); + + UpdateOp clean = new UpdateOp(doc.getId(), false); + Revision last = null; + for (Revision r : revs) { + if (last != null) { + if (last.getClusterId() == r.getClusterId()) { + clean.removeMapEntry(propertyName, last); + } + } + last = r; + } Review Comment: - all older revisions that are branch commits (filtered earlier in the code); is there more that we can check? That would require _revisions, but those might be in a different document, right? - clusterid check - yes, unless we guarantee that the predicate will filter by id - ack - suggestions? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: dev-unsubscr...@jackrabbit.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org