[GitHub] helix pull request #169: Two Performance Improvement for Rebalance pipeline
Github user asfgit closed the pull request at: https://github.com/apache/helix/pull/169 ---
[GitHub] helix pull request #169: Two Performance Improvement for Rebalance pipeline
Github user dasahcc commented on a diff in the pull request: https://github.com/apache/helix/pull/169#discussion_r177527783 --- Diff: helix-core/src/main/java/org/apache/helix/common/caches/BasicClusterDataCache.java --- @@ -104,6 +108,52 @@ public void refresh(HelixDataAccessor accessor) { } } + /** + * Selective update Helix Cache by version + * @param accessor the HelixDataAccessor + * @param reloadKeys keys needs to be reload + * @param cachedKeys keys already exists in the cache + * @param cachedPropertyMap cached map of propertykey -> property object + * @param the type of metadata + * @return + */ + public static Map updateReloadProperties( + HelixDataAccessor accessor, List reloadKeys, List cachedKeys, --- End diff -- cachedkeys only contains the existing keys. Deleted node will be removed from it. ---
[GitHub] helix pull request #169: Two Performance Improvement for Rebalance pipeline
Github user dasahcc commented on a diff in the pull request: https://github.com/apache/helix/pull/169#discussion_r177527495 --- Diff: helix-core/src/main/java/org/apache/helix/common/caches/BasicClusterDataCache.java --- @@ -104,6 +108,52 @@ public void refresh(HelixDataAccessor accessor) { } } + /** + * Selective update Helix Cache by version + * @param accessor the HelixDataAccessor + * @param reloadKeys keys needs to be reload + * @param cachedKeys keys already exists in the cache + * @param cachedPropertyMap cached map of propertykey -> property object + * @param the type of metadata + * @return + */ + public static Map updateReloadProperties( + HelixDataAccessor accessor, List reloadKeys, List cachedKeys, + Map cachedPropertyMap) { +// All new entries from zk not cached locally yet should be read from ZK. +Map refreshedPropertyMap = Maps.newHashMap(); +List stats = accessor.getPropertyStats(cachedKeys); +for (int i = 0; i < cachedKeys.size(); i++) { + PropertyKey key = cachedKeys.get(i); + HelixProperty.Stat stat = stats.get(i); + if (stat != null) { +T property = cachedPropertyMap.get(key); +if (property != null && property.getBucketSize() == 0 && property.getStat().equals(stat)) { + refreshedPropertyMap.put(key, property); +} else { + // need update from zk + reloadKeys.add(key); +} + } else { +LOG.warn("stat is null for key: " + key); +reloadKeys.add(key); + } +} + +List reloadedProperty = accessor.getProperty(reloadKeys, true); +Iterator csKeyIter = reloadKeys.iterator(); +for (T property : reloadedProperty) { + PropertyKey key = csKeyIter.next(); + if (property != null) { +refreshedPropertyMap.put(key, property); + } else { +LOG.warn("CurrentState null for key: " + key); --- End diff -- Good catch. Will make correct logs. ---
[GitHub] helix pull request #169: Two Performance Improvement for Rebalance pipeline
Github user dasahcc commented on a diff in the pull request: https://github.com/apache/helix/pull/169#discussion_r177527357 --- Diff: helix-core/src/main/java/org/apache/helix/common/caches/BasicClusterDataCache.java --- @@ -104,6 +108,52 @@ public void refresh(HelixDataAccessor accessor) { } } + /** + * Selective update Helix Cache by version + * @param accessor the HelixDataAccessor + * @param reloadKeys keys needs to be reload + * @param cachedKeys keys already exists in the cache + * @param cachedPropertyMap cached map of propertykey -> property object + * @param the type of metadata + * @return + */ + public static Map updateReloadProperties( + HelixDataAccessor accessor, List reloadKeys, List cachedKeys, + Map cachedPropertyMap) { +// All new entries from zk not cached locally yet should be read from ZK. +Map refreshedPropertyMap = Maps.newHashMap(); +List stats = accessor.getPropertyStats(cachedKeys); +for (int i = 0; i < cachedKeys.size(); i++) { + PropertyKey key = cachedKeys.get(i); + HelixProperty.Stat stat = stats.get(i); + if (stat != null) { +T property = cachedPropertyMap.get(key); +if (property != null && property.getBucketSize() == 0 && property.getStat().equals(stat)) { + refreshedPropertyMap.put(key, property); +} else { + // need update from zk + reloadKeys.add(key); +} + } else { +LOG.warn("stat is null for key: " + key); +reloadKeys.add(key); + } +} + +List reloadedProperty = accessor.getProperty(reloadKeys, true); +Iterator csKeyIter = reloadKeys.iterator(); --- End diff -- Will change the name. ---
[GitHub] helix pull request #169: Two Performance Improvement for Rebalance pipeline
Github user dasahcc commented on a diff in the pull request: https://github.com/apache/helix/pull/169#discussion_r177527284 --- Diff: helix-core/src/main/java/org/apache/helix/common/caches/BasicClusterDataCache.java --- @@ -104,6 +108,52 @@ public void refresh(HelixDataAccessor accessor) { } } + /** + * Selective update Helix Cache by version + * @param accessor the HelixDataAccessor + * @param reloadKeys keys needs to be reload + * @param cachedKeys keys already exists in the cache + * @param cachedPropertyMap cached map of propertykey -> property object + * @param the type of metadata + * @return + */ + public static Map updateReloadProperties( + HelixDataAccessor accessor, List reloadKeys, List cachedKeys, + Map cachedPropertyMap) { +// All new entries from zk not cached locally yet should be read from ZK. +Map refreshedPropertyMap = Maps.newHashMap(); +List stats = accessor.getPropertyStats(cachedKeys); +for (int i = 0; i < cachedKeys.size(); i++) { --- End diff -- deleted the node has been removed for previous preprocess. The cached key will only retain the keys from reload keys from ZK. ---
[GitHub] helix pull request #169: Two Performance Improvement for Rebalance pipeline
Github user dasahcc commented on a diff in the pull request: https://github.com/apache/helix/pull/169#discussion_r177526661 --- Diff: helix-core/src/main/java/org/apache/helix/common/caches/BasicClusterDataCache.java --- @@ -104,6 +108,52 @@ public void refresh(HelixDataAccessor accessor) { } } + /** + * Selective update Helix Cache by version + * @param accessor the HelixDataAccessor + * @param reloadKeys keys needs to be reload + * @param cachedKeys keys already exists in the cache + * @param cachedPropertyMap cached map of propertykey -> property object + * @param the type of metadata + * @return + */ + public static Map updateReloadProperties( --- End diff -- Since this method will be used by different cache class, I think it is better to put this method the basic class of them. ---
[GitHub] helix pull request #169: Two Performance Improvement for Rebalance pipeline
Github user kishoreg commented on a diff in the pull request: https://github.com/apache/helix/pull/169#discussion_r177523538 --- Diff: helix-core/src/main/java/org/apache/helix/common/caches/BasicClusterDataCache.java --- @@ -104,6 +108,52 @@ public void refresh(HelixDataAccessor accessor) { } } + /** + * Selective update Helix Cache by version + * @param accessor the HelixDataAccessor + * @param reloadKeys keys needs to be reload + * @param cachedKeys keys already exists in the cache + * @param cachedPropertyMap cached map of propertykey -> property object + * @param the type of metadata + * @return + */ + public static Map updateReloadProperties( + HelixDataAccessor accessor, List reloadKeys, List cachedKeys, --- End diff -- how will cachedKeys be different from cachedPropertyMap.keySet? ---
[GitHub] helix pull request #169: Two Performance Improvement for Rebalance pipeline
Github user kishoreg commented on a diff in the pull request: https://github.com/apache/helix/pull/169#discussion_r177523236 --- Diff: helix-core/src/main/java/org/apache/helix/common/caches/BasicClusterDataCache.java --- @@ -104,6 +108,52 @@ public void refresh(HelixDataAccessor accessor) { } } + /** + * Selective update Helix Cache by version + * @param accessor the HelixDataAccessor + * @param reloadKeys keys needs to be reload + * @param cachedKeys keys already exists in the cache + * @param cachedPropertyMap cached map of propertykey -> property object + * @param the type of metadata + * @return + */ + public static Map updateReloadProperties( + HelixDataAccessor accessor, List reloadKeys, List cachedKeys, + Map cachedPropertyMap) { +// All new entries from zk not cached locally yet should be read from ZK. +Map refreshedPropertyMap = Maps.newHashMap(); +List stats = accessor.getPropertyStats(cachedKeys); +for (int i = 0; i < cachedKeys.size(); i++) { + PropertyKey key = cachedKeys.get(i); + HelixProperty.Stat stat = stats.get(i); + if (stat != null) { +T property = cachedPropertyMap.get(key); +if (property != null && property.getBucketSize() == 0 && property.getStat().equals(stat)) { + refreshedPropertyMap.put(key, property); +} else { + // need update from zk + reloadKeys.add(key); +} + } else { +LOG.warn("stat is null for key: " + key); +reloadKeys.add(key); + } +} + +List reloadedProperty = accessor.getProperty(reloadKeys, true); +Iterator csKeyIter = reloadKeys.iterator(); +for (T property : reloadedProperty) { + PropertyKey key = csKeyIter.next(); + if (property != null) { +refreshedPropertyMap.put(key, property); + } else { +LOG.warn("CurrentState null for key: " + key); --- End diff -- this is not related to currentstate right ---
[GitHub] helix pull request #169: Two Performance Improvement for Rebalance pipeline
Github user kishoreg commented on a diff in the pull request: https://github.com/apache/helix/pull/169#discussion_r177522574 --- Diff: helix-core/src/main/java/org/apache/helix/common/caches/BasicClusterDataCache.java --- @@ -104,6 +108,52 @@ public void refresh(HelixDataAccessor accessor) { } } + /** + * Selective update Helix Cache by version + * @param accessor the HelixDataAccessor + * @param reloadKeys keys needs to be reload + * @param cachedKeys keys already exists in the cache + * @param cachedPropertyMap cached map of propertykey -> property object + * @param the type of metadata + * @return + */ + public static Map updateReloadProperties( --- End diff -- If this is a public static method, why is this in this class? ---
[GitHub] helix pull request #169: Two Performance Improvement for Rebalance pipeline
Github user kishoreg commented on a diff in the pull request: https://github.com/apache/helix/pull/169#discussion_r177523148 --- Diff: helix-core/src/main/java/org/apache/helix/common/caches/BasicClusterDataCache.java --- @@ -104,6 +108,52 @@ public void refresh(HelixDataAccessor accessor) { } } + /** + * Selective update Helix Cache by version + * @param accessor the HelixDataAccessor + * @param reloadKeys keys needs to be reload + * @param cachedKeys keys already exists in the cache + * @param cachedPropertyMap cached map of propertykey -> property object + * @param the type of metadata + * @return + */ + public static Map updateReloadProperties( + HelixDataAccessor accessor, List reloadKeys, List cachedKeys, + Map cachedPropertyMap) { +// All new entries from zk not cached locally yet should be read from ZK. +Map refreshedPropertyMap = Maps.newHashMap(); +List stats = accessor.getPropertyStats(cachedKeys); +for (int i = 0; i < cachedKeys.size(); i++) { + PropertyKey key = cachedKeys.get(i); + HelixProperty.Stat stat = stats.get(i); + if (stat != null) { +T property = cachedPropertyMap.get(key); +if (property != null && property.getBucketSize() == 0 && property.getStat().equals(stat)) { + refreshedPropertyMap.put(key, property); +} else { + // need update from zk + reloadKeys.add(key); +} + } else { +LOG.warn("stat is null for key: " + key); +reloadKeys.add(key); + } +} + +List reloadedProperty = accessor.getProperty(reloadKeys, true); +Iterator csKeyIter = reloadKeys.iterator(); --- End diff -- is this method specific to current state? why are the variables named cs? ---
[GitHub] helix pull request #169: Two Performance Improvement for Rebalance pipeline
Github user kishoreg commented on a diff in the pull request: https://github.com/apache/helix/pull/169#discussion_r177522908 --- Diff: helix-core/src/main/java/org/apache/helix/common/caches/BasicClusterDataCache.java --- @@ -104,6 +108,52 @@ public void refresh(HelixDataAccessor accessor) { } } + /** + * Selective update Helix Cache by version + * @param accessor the HelixDataAccessor + * @param reloadKeys keys needs to be reload + * @param cachedKeys keys already exists in the cache + * @param cachedPropertyMap cached map of propertykey -> property object + * @param the type of metadata + * @return + */ + public static Map updateReloadProperties( + HelixDataAccessor accessor, List reloadKeys, List cachedKeys, + Map cachedPropertyMap) { +// All new entries from zk not cached locally yet should be read from ZK. +Map refreshedPropertyMap = Maps.newHashMap(); +List stats = accessor.getPropertyStats(cachedKeys); +for (int i = 0; i < cachedKeys.size(); i++) { --- End diff -- what will the stat be if the znode is deleted ---
[GitHub] helix pull request #169: Two Performance Improvement for Rebalance pipeline
GitHub user dasahcc opened a pull request: https://github.com/apache/helix/pull/169 Two Performance Improvement for Rebalance pipeline Two improvements have been implemented : 1. Avoid to read data from ZK for ExternalViewStage 2. Support selective update for IdealState You can merge this pull request into a Git repository by running: $ git pull https://github.com/dasahcc/helix perf_improve Alternatively you can review and apply these changes as the patch at: https://github.com/apache/helix/pull/169.patch To close this pull request, make a commit to your master/trunk branch with (at least) the following in the commit message: This closes #169 commit ead4d3760b36576d8be5cf0480c84139d989f676 Author: Junkai Xue Date: 2018-03-08T00:55:30Z Avoid to read data from ZK for ExternalViewStage Helix Controller is trying to external view back to compare with ZK data, which is not necessary since we can cache the data and compare them in memory. RB=1244812 BUG=HELIX-844 G=helix-reviewers A=jjwang commit c2378557be5f6e4d6be9f7c90088427fb2a98d43 Author: Junkai Xue Date: 2018-03-26T23:02:52Z Support selective update for IdealState To have better performance for Pinot, we need some improvements for rebalance pipeline. One of the improvement is trying to read as less as possible. So use version based IdealState read will be very helpful. ---