Repository: incubator-reef
Updated Branches:
  refs/heads/master 4a9796792 -> 9b16d54cd


[REEF-613] DataLoadingRequestBuilder should use the desired number of input 
splits defined by the user

We are overriding the numberOfDesiredInputSplits defined by the user to 0,
when they invoke the DataLoadingRequestBuilder.setInputPath method.
This change allows to use the DataLoadingRequestBuilder.setInputPath method
and DataLoadingRequestBuilder.setNumberOfDesiredSplits safely.
We now set the correct number of desired splits to the DistributedDataSet 
object we create.
This also un-deprecates some of the API to avoid the overhead of
creating the multi data center strategy, and throws an exception
if setInputPath and setDistributedDataSet methods are both called (in any 
order).

JIRA:
  [REEF-613](https://issues.apache.org/jira/browse/REEF-613)

Pull Request:
  Closes #391


Project: http://git-wip-us.apache.org/repos/asf/incubator-reef/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-reef/commit/9b16d54c
Tree: http://git-wip-us.apache.org/repos/asf/incubator-reef/tree/9b16d54c
Diff: http://git-wip-us.apache.org/repos/asf/incubator-reef/diff/9b16d54c

Branch: refs/heads/master
Commit: 9b16d54cdef4c68e580e3dc0625ef9fc9043cfdf
Parents: 4a97967
Author: Ignacio Cano <[email protected]>
Authored: Thu Aug 20 11:34:46 2015 -0700
Committer: Jason (Joo Seong) Jeong <[email protected]>
Committed: Tue Aug 25 17:48:45 2015 +0900

----------------------------------------------------------------------
 .../loading/api/DataLoadingRequestBuilder.java  | 51 ++++++++++++++------
 1 file changed, 36 insertions(+), 15 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-reef/blob/9b16d54c/lang/java/reef-io/src/main/java/org/apache/reef/io/data/loading/api/DataLoadingRequestBuilder.java
----------------------------------------------------------------------
diff --git 
a/lang/java/reef-io/src/main/java/org/apache/reef/io/data/loading/api/DataLoadingRequestBuilder.java
 
b/lang/java/reef-io/src/main/java/org/apache/reef/io/data/loading/api/DataLoadingRequestBuilder.java
index 79275a0..46085fe 100644
--- 
a/lang/java/reef-io/src/main/java/org/apache/reef/io/data/loading/api/DataLoadingRequestBuilder.java
+++ 
b/lang/java/reef-io/src/main/java/org/apache/reef/io/data/loading/api/DataLoadingRequestBuilder.java
@@ -83,6 +83,11 @@ public final class DataLoadingRequestBuilder
    */
   private DistributedDataSet distributedDataSet;
 
+  /**
+   * The input path of the data to be loaded.
+   */
+  private String inputPath;
+
   public DataLoadingRequestBuilder setNumberOfDesiredSplits(final int 
numberOfDesiredSplits) {
     this.numberOfDesiredSplits = numberOfDesiredSplits;
     return this;
@@ -205,29 +210,23 @@ public final class DataLoadingRequestBuilder
   }
 
   /**
-   * Sets the path of the folder where the data is. Internally it constructs a
-   * distributed data set with one partition, no splits and the data can be
-   * loaded from anywhere.
+   * Sets the path of the folder where the data is.
+   * Internally, a distributed dataset with a unique partition is created,
+   * and {@link SingleDataCenterEvaluatorToPartitionStrategy} is binded.
    *
-   * @deprecated since 0.12. Should use instead
-   *             {@link 
DataLoadingRequestBuilder#setDistributedDataSet(DistributedDataSet)}
    * @param inputPath
    *          the input path
    * @return this
    */
-  @Deprecated
   public DataLoadingRequestBuilder setInputPath(final String inputPath) {
-    final DistributedDataSet dds = new DistributedDataSet();
-    
dds.addPartition(DistributedDataSetPartition.newBuilder().setPath(inputPath)
-        .setLocation(DistributedDataSetPartition.LOAD_INTO_ANY_LOCATION)
-        
.setDesiredSplits(Integer.valueOf(NumberOfDesiredSplits.DEFAULT_DESIRED_SPLITS)).build());
+    this.inputPath = inputPath;
     this.singleDataCenterStrategy = true;
-    this.distributedDataSet = dds;
     return this;
   }
 
   /**
    * Sets the distributed data set.
+   * Internally, a {@link MultiDataCenterEvaluatorToPartitionStrategy} is 
binded.
    *
    * @param distributedDataSet
    *          the distributed data set
@@ -245,6 +244,32 @@ public final class DataLoadingRequestBuilder
       throw new BindException("Driver Configuration Module is a required 
parameter.");
     }
 
+    // need to create the distributed data set
+    if (this.singleDataCenterStrategy) {
+      if (this.inputPath == null) {
+        throw new BindException("Should specify an input path.");
+      }
+      if (this.distributedDataSet != null && 
!this.distributedDataSet.isEmpty()) {
+        throw new BindException("You should either call setInputPath or 
setDistributedDataSet, but not both");
+      }
+      // Create a distributed data set with one partition, the splits defined 
by
+      // the user if greater than 0 or no splits, and data to be loaded from
+      // anywhere.
+      final DistributedDataSet dds = new DistributedDataSet();
+      dds.addPartition(DistributedDataSetPartition
+          .newBuilder()
+          .setPath(inputPath)
+          .setLocation(DistributedDataSetPartition.LOAD_INTO_ANY_LOCATION)
+          .setDesiredSplits(
+              numberOfDesiredSplits > 0 ? numberOfDesiredSplits : Integer
+                  
.valueOf(NumberOfDesiredSplits.DEFAULT_DESIRED_SPLITS)).build());
+      this.distributedDataSet = dds;
+    } else {
+      if (this.inputPath != null) {
+        throw new BindException("You should either call setInputPath or 
setDistributedDataSet, but not both");
+      }
+    }
+
     if (this.distributedDataSet == null || this.distributedDataSet.isEmpty()) {
       throw new BindException("Distributed Data Set is a required parameter.");
     }
@@ -270,10 +295,6 @@ public final class DataLoadingRequestBuilder
     final JavaConfigurationBuilder jcb =
         Tang.Factory.getTang().newConfigurationBuilder(driverConfiguration);
 
-    if (this.numberOfDesiredSplits > 0) {
-      jcb.bindNamedParameter(NumberOfDesiredSplits.class, "" + 
this.numberOfDesiredSplits);
-    }
-
     // if empty, then the user code still uses the deprecated fields.
     // we create a dataLoadRequest object based on them (or their default 
values)
     if (this.dataRequests.isEmpty()) {

Reply via email to