[jira] [Work logged] (BEAM-4019) Refactor HBaseIO splitting to produce ByteKeyRange objects
[ https://issues.apache.org/jira/browse/BEAM-4019?focusedWorklogId=92208&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-92208 ] ASF GitHub Bot logged work on BEAM-4019: Author: ASF GitHub Bot Created on: 18/Apr/18 18:36 Start Date: 18/Apr/18 18:36 Worklog Time Spent: 10m Work Description: iemejia closed pull request #5081: [BEAM-4019] Refactor HBaseIO splitting to produce ByteKeyRange objects URL: https://github.com/apache/beam/pull/5081 This is a PR merged from a forked repository. As GitHub hides the original diff on merge, it is displayed below for the sake of provenance: As this is a foreign pull request (from a fork), the diff is supplied below (as it won't show otherwise due to GitHub magic): diff --git a/sdks/java/io/hbase/src/main/java/org/apache/beam/sdk/io/hbase/HBaseIO.java b/sdks/java/io/hbase/src/main/java/org/apache/beam/sdk/io/hbase/HBaseIO.java index bcdaefa1498..5b8f00439d6 100644 --- a/sdks/java/io/hbase/src/main/java/org/apache/beam/sdk/io/hbase/HBaseIO.java +++ b/sdks/java/io/hbase/src/main/java/org/apache/beam/sdk/io/hbase/HBaseIO.java @@ -22,13 +22,10 @@ import java.io.IOException; import java.util.ArrayList; -import java.util.Collection; import java.util.Collections; import java.util.Iterator; import java.util.List; import java.util.NoSuchElementException; -import java.util.Set; -import java.util.TreeSet; import javax.annotation.Nullable; import org.apache.beam.sdk.annotations.Experimental; import org.apache.beam.sdk.coders.Coder; @@ -46,11 +43,7 @@ import org.apache.beam.sdk.values.PCollection; import org.apache.beam.sdk.values.PDone; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hbase.ClusterStatus; import org.apache.hadoop.hbase.HRegionLocation; -import org.apache.hadoop.hbase.RegionLoad; -import org.apache.hadoop.hbase.ServerLoad; -import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.Admin; import org.apache.hadoop.hbase.client.BufferedMutator; @@ -58,13 +51,11 @@ import org.apache.hadoop.hbase.client.Connection; import org.apache.hadoop.hbase.client.ConnectionFactory; import org.apache.hadoop.hbase.client.Mutation; -import org.apache.hadoop.hbase.client.RegionLocator; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.ResultScanner; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.client.Table; import org.apache.hadoop.hbase.filter.Filter; -import org.apache.hadoop.hbase.util.Bytes; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -228,8 +219,11 @@ private Read( } catch (IOException e) { LOG.warn("Error checking whether table {} exists; proceeding.", tableId, e); } - HBaseSource source = new HBaseSource(this, null /* estimatedSizeBytes */); - return input.getPipeline().apply(org.apache.beam.sdk.io.Read.from(source)); + return input + .getPipeline() + .apply( + org.apache.beam.sdk.io.Read.from( + new HBaseSource(this, null /* estimatedSizeBytes */))); } @Override @@ -294,7 +288,11 @@ HBaseSource withEndKey(ByteKey endKey) throws IOException { @Override public long getEstimatedSizeBytes(PipelineOptions pipelineOptions) throws Exception { if (estimatedSizeBytes == null) { -estimatedSizeBytes = estimateSizeBytes(); +try (Connection connection = + ConnectionFactory.createConnection(read.serializableConfiguration.get())) { + estimatedSizeBytes = + HBaseUtils.estimateSizeBytes(connection, read.tableId, read.serializableScan.get()); +} LOG.debug( "Estimated size {} bytes for table {} and scan {}", estimatedSizeBytes, @@ -304,111 +302,6 @@ public long getEstimatedSizeBytes(PipelineOptions pipelineOptions) throws Except return estimatedSizeBytes; } -/** - * This estimates the real size, it can be the compressed size depending on the HBase - * configuration. - */ -private long estimateSizeBytes() throws Exception { - // This code is based on RegionSizeCalculator in hbase-server - long estimatedSizeBytes = 0L; - Configuration configuration = this.read.serializableConfiguration.get(); - try (Connection connection = ConnectionFactory.createConnection(configuration)) { -// filter regions for the given table/scan -List regionLocations = getRegionLocations(connection); - -// builds set of regions who are part of the table scan -Set tableRegions = new TreeSet<>(Bytes.BYTES_COMPARATOR); -for (HRegionLocation regionLocation : regionLocations) { - tableRegions.add(regionLocation.getRegionInfo().getRegionName()); -} - -// calculate esti
[jira] [Work logged] (BEAM-4019) Refactor HBaseIO splitting to produce ByteKeyRange objects
[ https://issues.apache.org/jira/browse/BEAM-4019?focusedWorklogId=92207&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-92207 ] ASF GitHub Bot logged work on BEAM-4019: Author: ASF GitHub Bot Created on: 18/Apr/18 18:36 Start Date: 18/Apr/18 18:36 Worklog Time Spent: 10m Work Description: iemejia commented on issue #5081: [BEAM-4019] Refactor HBaseIO splitting to produce ByteKeyRange objects URL: https://github.com/apache/beam/pull/5081#issuecomment-382486788 Jenkins break is unrelated so I am merging it. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 92207) Time Spent: 1h 40m (was: 1.5h) > Refactor HBaseIO splitting to produce ByteKeyRange objects > -- > > Key: BEAM-4019 > URL: https://issues.apache.org/jira/browse/BEAM-4019 > Project: Beam > Issue Type: Improvement > Components: io-java-hbase >Reporter: Ismaël Mejía >Assignee: Ismaël Mejía >Priority: Minor > Time Spent: 1h 40m > Remaining Estimate: 0h > > This allows to reuse the splitting logic for a future SDF-based > implementation by reusing it as part of the @SplitRestriction method. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4019) Refactor HBaseIO splitting to produce ByteKeyRange objects
[ https://issues.apache.org/jira/browse/BEAM-4019?focusedWorklogId=92156&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-92156 ] ASF GitHub Bot logged work on BEAM-4019: Author: ASF GitHub Bot Created on: 18/Apr/18 16:18 Start Date: 18/Apr/18 16:18 Worklog Time Spent: 10m Work Description: iemejia commented on issue #5081: [BEAM-4019] Refactor HBaseIO splitting to produce ByteKeyRange objects URL: https://github.com/apache/beam/pull/5081#issuecomment-38248 Ok rebased and merged, this is good to go now. I will wait for Jenkins to be happy and then merge, thanks @aromanenko-dev and @tweise for the review. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 92156) Time Spent: 1.5h (was: 1h 20m) > Refactor HBaseIO splitting to produce ByteKeyRange objects > -- > > Key: BEAM-4019 > URL: https://issues.apache.org/jira/browse/BEAM-4019 > Project: Beam > Issue Type: Improvement > Components: io-java-hbase >Reporter: Ismaël Mejía >Assignee: Ismaël Mejía >Priority: Minor > Time Spent: 1.5h > Remaining Estimate: 0h > > This allows to reuse the splitting logic for a future SDF-based > implementation by reusing it as part of the @SplitRestriction method. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4019) Refactor HBaseIO splitting to produce ByteKeyRange objects
[ https://issues.apache.org/jira/browse/BEAM-4019?focusedWorklogId=92155&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-92155 ] ASF GitHub Bot logged work on BEAM-4019: Author: ASF GitHub Bot Created on: 18/Apr/18 16:17 Start Date: 18/Apr/18 16:17 Worklog Time Spent: 10m Work Description: iemejia commented on issue #5081: [BEAM-4019] Refactor HBaseIO splitting to produce ByteKeyRange objects URL: https://github.com/apache/beam/pull/5081#issuecomment-380268135 Mmmm need to run some extra validations (eventual tests). This is apparently not yet good to merge. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 92155) Time Spent: 1h 20m (was: 1h 10m) > Refactor HBaseIO splitting to produce ByteKeyRange objects > -- > > Key: BEAM-4019 > URL: https://issues.apache.org/jira/browse/BEAM-4019 > Project: Beam > Issue Type: Improvement > Components: io-java-hbase >Reporter: Ismaël Mejía >Assignee: Ismaël Mejía >Priority: Minor > Time Spent: 1h 20m > Remaining Estimate: 0h > > This allows to reuse the splitting logic for a future SDF-based > implementation by reusing it as part of the @SplitRestriction method. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4019) Refactor HBaseIO splitting to produce ByteKeyRange objects
[ https://issues.apache.org/jira/browse/BEAM-4019?focusedWorklogId=92151&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-92151 ] ASF GitHub Bot logged work on BEAM-4019: Author: ASF GitHub Bot Created on: 18/Apr/18 16:05 Start Date: 18/Apr/18 16:05 Worklog Time Spent: 10m Work Description: iemejia commented on a change in pull request #5081: [BEAM-4019] Refactor HBaseIO splitting to produce ByteKeyRange objects URL: https://github.com/apache/beam/pull/5081#discussion_r182483046 ## File path: sdks/java/io/hbase/src/main/java/org/apache/beam/sdk/io/hbase/HBaseIO.java ## @@ -420,17 +313,38 @@ private long estimateSizeBytes() throws Exception { } try (Connection connection = ConnectionFactory.createConnection(read.getConfiguration())) { -List regionLocations = getRegionLocations(connection); -int realNumSplits = numSplits < regionLocations.size() ? regionLocations.size() : numSplits; -LOG.debug("Suggested {} bundle(s) based on size", numSplits); -LOG.debug("Suggested {} bundle(s) based on number of regions", regionLocations.size()); -final List sources = splitBasedOnRegions(regionLocations, realNumSplits); -LOG.debug("Split into {} bundle(s)", sources.size()); -if (numSplits >= 1) { +List regionLocations = +HBaseUtils.getRegionLocations(connection, read.tableId, read.serializableScan.get()); +LOG.debug("Suggested {} source(s) based on size", numSplits); +LOG.debug("Suggested {} source(s) based on number of regions", regionLocations.size()); + +List ranges = +HBaseUtils.getRanges( +regionLocations, read.tableId, read.serializableScan.get()); +final int numSources = ranges.size(); +LOG.debug("Spliting into {} source(s)", numSources); +if (numSources >= 1) { Review comment: yes sir ! This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 92151) Time Spent: 1h 10m (was: 1h) > Refactor HBaseIO splitting to produce ByteKeyRange objects > -- > > Key: BEAM-4019 > URL: https://issues.apache.org/jira/browse/BEAM-4019 > Project: Beam > Issue Type: Improvement > Components: io-java-hbase >Reporter: Ismaël Mejía >Assignee: Ismaël Mejía >Priority: Minor > Time Spent: 1h 10m > Remaining Estimate: 0h > > This allows to reuse the splitting logic for a future SDF-based > implementation by reusing it as part of the @SplitRestriction method. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4019) Refactor HBaseIO splitting to produce ByteKeyRange objects
[ https://issues.apache.org/jira/browse/BEAM-4019?focusedWorklogId=90659&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-90659 ] ASF GitHub Bot logged work on BEAM-4019: Author: ASF GitHub Bot Created on: 13/Apr/18 00:58 Start Date: 13/Apr/18 00:58 Worklog Time Spent: 10m Work Description: tweise commented on a change in pull request #5081: [BEAM-4019] Refactor HBaseIO splitting to produce ByteKeyRange objects URL: https://github.com/apache/beam/pull/5081#discussion_r181261877 ## File path: sdks/java/io/hbase/src/main/java/org/apache/beam/sdk/io/hbase/HBaseIO.java ## @@ -420,17 +313,38 @@ private long estimateSizeBytes() throws Exception { } try (Connection connection = ConnectionFactory.createConnection(read.getConfiguration())) { -List regionLocations = getRegionLocations(connection); -int realNumSplits = numSplits < regionLocations.size() ? regionLocations.size() : numSplits; -LOG.debug("Suggested {} bundle(s) based on size", numSplits); -LOG.debug("Suggested {} bundle(s) based on number of regions", regionLocations.size()); -final List sources = splitBasedOnRegions(regionLocations, realNumSplits); -LOG.debug("Split into {} bundle(s)", sources.size()); -if (numSplits >= 1) { +List regionLocations = +HBaseUtils.getRegionLocations(connection, read.tableId, read.serializableScan.get()); +LOG.debug("Suggested {} source(s) based on size", numSplits); +LOG.debug("Suggested {} source(s) based on number of regions", regionLocations.size()); + +List ranges = +HBaseUtils.getRanges( +regionLocations, read.tableId, read.serializableScan.get()); +final int numSources = ranges.size(); +LOG.debug("Spliting into {} source(s)", numSources); +if (numSources >= 1) { Review comment: `if numSources > 0` ? This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 90659) Time Spent: 1h (was: 50m) > Refactor HBaseIO splitting to produce ByteKeyRange objects > -- > > Key: BEAM-4019 > URL: https://issues.apache.org/jira/browse/BEAM-4019 > Project: Beam > Issue Type: Improvement > Components: io-java-hbase >Reporter: Ismaël Mejía >Assignee: Ismaël Mejía >Priority: Minor > Time Spent: 1h > Remaining Estimate: 0h > > This allows to reuse the splitting logic for a future SDF-based > implementation by reusing it as part of the @SplitRestriction method. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4019) Refactor HBaseIO splitting to produce ByteKeyRange objects
[ https://issues.apache.org/jira/browse/BEAM-4019?focusedWorklogId=89657&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-89657 ] ASF GitHub Bot logged work on BEAM-4019: Author: ASF GitHub Bot Created on: 10/Apr/18 22:30 Start Date: 10/Apr/18 22:30 Worklog Time Spent: 10m Work Description: iemejia commented on issue #5081: [BEAM-4019] Refactor HBaseIO splitting to produce ByteKeyRange objects URL: https://github.com/apache/beam/pull/5081#issuecomment-380268135 Mmmm need to run some extra validations (eventual tests). This is apparently not yet good to merge. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 89657) Time Spent: 50m (was: 40m) > Refactor HBaseIO splitting to produce ByteKeyRange objects > -- > > Key: BEAM-4019 > URL: https://issues.apache.org/jira/browse/BEAM-4019 > Project: Beam > Issue Type: Improvement > Components: io-java-hbase >Reporter: Ismaël Mejía >Assignee: Ismaël Mejía >Priority: Minor > Time Spent: 50m > Remaining Estimate: 0h > > This allows to reuse the splitting logic for a future SDF-based > implementation by reusing it as part of the @SplitRestriction method. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4019) Refactor HBaseIO splitting to produce ByteKeyRange objects
[ https://issues.apache.org/jira/browse/BEAM-4019?focusedWorklogId=89628&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-89628 ] ASF GitHub Bot logged work on BEAM-4019: Author: ASF GitHub Bot Created on: 10/Apr/18 21:28 Start Date: 10/Apr/18 21:28 Worklog Time Spent: 10m Work Description: iemejia commented on a change in pull request #5081: [BEAM-4019] Refactor HBaseIO splitting to produce ByteKeyRange objects URL: https://github.com/apache/beam/pull/5081#discussion_r180573328 ## File path: sdks/java/io/hbase/src/main/java/org/apache/beam/sdk/io/hbase/HBaseUtils.java ## @@ -0,0 +1,122 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.sdk.io.hbase; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.Set; +import java.util.TreeSet; +import org.apache.beam.sdk.io.range.ByteKey; +import org.apache.beam.sdk.io.range.ByteKeyRange; +import org.apache.hadoop.hbase.ClusterStatus; +import org.apache.hadoop.hbase.HRegionLocation; +import org.apache.hadoop.hbase.RegionLoad; +import org.apache.hadoop.hbase.ServerLoad; +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.Admin; +import org.apache.hadoop.hbase.client.Connection; +import org.apache.hadoop.hbase.client.RegionLocator; +import org.apache.hadoop.hbase.client.Scan; +import org.apache.hadoop.hbase.util.Bytes; + +/** Utils to interact with an HBase cluster and get information on tables/regions. */ +class HBaseUtils { + Review comment: Thanks, added docs in all methods of utils. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 89628) Time Spent: 40m (was: 0.5h) > Refactor HBaseIO splitting to produce ByteKeyRange objects > -- > > Key: BEAM-4019 > URL: https://issues.apache.org/jira/browse/BEAM-4019 > Project: Beam > Issue Type: Improvement > Components: io-java-hbase >Reporter: Ismaël Mejía >Assignee: Ismaël Mejía >Priority: Minor > Time Spent: 40m > Remaining Estimate: 0h > > This allows to reuse the splitting logic for a future SDF-based > implementation by reusing it as part of the @SplitRestriction method. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4019) Refactor HBaseIO splitting to produce ByteKeyRange objects
[ https://issues.apache.org/jira/browse/BEAM-4019?focusedWorklogId=89462&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-89462 ] ASF GitHub Bot logged work on BEAM-4019: Author: ASF GitHub Bot Created on: 10/Apr/18 16:12 Start Date: 10/Apr/18 16:12 Worklog Time Spent: 10m Work Description: aromanenko-dev commented on a change in pull request #5081: [BEAM-4019] Refactor HBaseIO splitting to produce ByteKeyRange objects URL: https://github.com/apache/beam/pull/5081#discussion_r180476050 ## File path: sdks/java/io/hbase/src/main/java/org/apache/beam/sdk/io/hbase/HBaseUtils.java ## @@ -0,0 +1,122 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.sdk.io.hbase; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.Set; +import java.util.TreeSet; +import org.apache.beam.sdk.io.range.ByteKey; +import org.apache.beam.sdk.io.range.ByteKeyRange; +import org.apache.hadoop.hbase.ClusterStatus; +import org.apache.hadoop.hbase.HRegionLocation; +import org.apache.hadoop.hbase.RegionLoad; +import org.apache.hadoop.hbase.ServerLoad; +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.Admin; +import org.apache.hadoop.hbase.client.Connection; +import org.apache.hadoop.hbase.client.RegionLocator; +import org.apache.hadoop.hbase.client.Scan; +import org.apache.hadoop.hbase.util.Bytes; + +/** Utils to interact with an HBase cluster and get information on tables/regions. */ +class HBaseUtils { + Review comment: Please, add JavaDoc for this method that explains how it works This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 89462) Time Spent: 0.5h (was: 20m) > Refactor HBaseIO splitting to produce ByteKeyRange objects > -- > > Key: BEAM-4019 > URL: https://issues.apache.org/jira/browse/BEAM-4019 > Project: Beam > Issue Type: Improvement > Components: io-java-hbase >Reporter: Ismaël Mejía >Assignee: Ismaël Mejía >Priority: Minor > Time Spent: 0.5h > Remaining Estimate: 0h > > This allows to reuse the splitting logic for a future SDF-based > implementation by reusing it as part of the @SplitRestriction method. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4019) Refactor HBaseIO splitting to produce ByteKeyRange objects
[ https://issues.apache.org/jira/browse/BEAM-4019?focusedWorklogId=89397&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-89397 ] ASF GitHub Bot logged work on BEAM-4019: Author: ASF GitHub Bot Created on: 10/Apr/18 13:58 Start Date: 10/Apr/18 13:58 Worklog Time Spent: 10m Work Description: iemejia commented on issue #5081: [BEAM-4019] Refactor HBaseIO splitting to produce ByteKeyRange objects URL: https://github.com/apache/beam/pull/5081#issuecomment-380108054 R: @aromanenko-dev @tweise This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 89397) Time Spent: 20m (was: 10m) > Refactor HBaseIO splitting to produce ByteKeyRange objects > -- > > Key: BEAM-4019 > URL: https://issues.apache.org/jira/browse/BEAM-4019 > Project: Beam > Issue Type: Improvement > Components: io-java-hbase >Reporter: Ismaël Mejía >Assignee: Ismaël Mejía >Priority: Minor > Time Spent: 20m > Remaining Estimate: 0h > > This allows to reuse the splitting logic for a future SDF-based > implementation by reusing it as part of the @SplitRestriction method. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4019) Refactor HBaseIO splitting to produce ByteKeyRange objects
[ https://issues.apache.org/jira/browse/BEAM-4019?focusedWorklogId=89391&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-89391 ] ASF GitHub Bot logged work on BEAM-4019: Author: ASF GitHub Bot Created on: 10/Apr/18 13:55 Start Date: 10/Apr/18 13:55 Worklog Time Spent: 10m Work Description: iemejia opened a new pull request #5081: [BEAM-4019] Refactor HBaseIO splitting to produce ByteKeyRange objects URL: https://github.com/apache/beam/pull/5081 For some context this is an internal refactor towards a SDF based translation of HBase read, it is just internals. I extracted most (if not all) HBase region/split related methods into an individual class `HBaseUtils` too. I did not create an additional unit test for that one since it is well covered by HBaseIOTest and I did not want to add additional extra time to the build.. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 89391) Time Spent: 10m Remaining Estimate: 0h > Refactor HBaseIO splitting to produce ByteKeyRange objects > -- > > Key: BEAM-4019 > URL: https://issues.apache.org/jira/browse/BEAM-4019 > Project: Beam > Issue Type: Improvement > Components: io-java-hbase >Reporter: Ismaël Mejía >Assignee: Ismaël Mejía >Priority: Minor > Time Spent: 10m > Remaining Estimate: 0h > > This allows to reuse the splitting logic for a future SDF-based > implementation by reusing it as part of the @SplitRestriction method. -- This message was sent by Atlassian JIRA (v7.6.3#76005)