dsmiley commented on code in PR #4320:
URL: https://github.com/apache/solr/pull/4320#discussion_r3135104415


##########
solr/solrj-streaming/src/java/org/apache/solr/client/solrj/io/graph/ShortestPathStream.java:
##########
@@ -79,7 +79,7 @@ public class ShortestPathStream extends TupleStream 
implements Expressible {
   private SolrParams queryParams;
 
   public ShortestPathStream(
-      String zkHost,
+      String solrCloud,

Review Comment:
   Can we use that nice record you made previously for the connection string 
instead of String?  To be used not just right here but really everywhere in 
this PR that needs to refer to the connection.  It would be so nice to have a 
real type vs String which is used for anything and everything, defeating the 
typed nature of Java.
   If not, fine.



##########
solr/solrj-streaming/src/java/org/apache/solr/client/solrj/io/stream/CloudSolrStream.java:
##########
@@ -107,7 +106,8 @@ public CloudSolrStream(StreamExpression expression, 
StreamFactory factory) throw
               expression));
     }
 
-    // Validate there are no unknown parameters - zkHost and alias are 
namedParameter, so we don't
+    // Validate there are no unknown parameters - solrCloud/zkHost and alias 
are namedParameter, so
+    // we don't
     // need to count it twice

Review Comment:
   reflow newline



##########
solr/solrj-streaming/src/java/org/apache/solr/client/solrj/io/stream/Facet2DStream.java:
##########
@@ -184,26 +177,9 @@ public Facet2DStream(StreamExpression expression, 
StreamFactory factory) throws
       }
     }
 
-    String zkHost = null;
-    if (zkHostExpression == null) {
-      zkHost = factory.getCollectionZkHost(collectionName);
-      if (zkHost == null) {
-        zkHost = factory.getDefaultZkHost();
-      }
-    } else if (zkHostExpression.getParameter() instanceof 
StreamExpressionValue) {
-      zkHost = ((StreamExpressionValue) 
zkHostExpression.getParameter()).getValue();
-    }
-
-    if (zkHost == null) {
-      throw new IOException(
-          String.format(
-              Locale.ROOT,
-              "invalid expression %s - zkHost not found for collection '%s'",
-              expression,
-              collectionName));
-    }
+    String solrCloud = getSolrCloud(factory, expression, collectionName);
 
-    init(collectionName, params, x, y, bucketSort, dimensionX, dimensionY, 
metric, zkHost);
+    init(collectionName, params, x, y, bucketSort, dimensionX, dimensionY, 
metric, solrCloud);

Review Comment:
   weird order again



##########
solr/solrj-streaming/src/java/org/apache/solr/client/solrj/io/stream/FeaturesSelectionStream.java:
##########
@@ -86,18 +85,18 @@ public FeaturesSelectionStream(
       int numTerms)
       throws IOException {
 
-    init(collectionName, zkHost, params, field, outcome, featureSet, 
positiveLabel, numTerms);
+    init(collectionName, solrCloud, params, field, outcome, featureSet, 
positiveLabel, numTerms);

Review Comment:
   again



##########
solr/solrj-streaming/src/java/org/apache/solr/client/solrj/io/stream/TupleStream.java:
##########
@@ -213,14 +219,73 @@ public static List<String> getShards(
       }
     } else {
       shards =
-          getReplicas(zkHost, collection, streamContext, 
requestParams).stream()
+          getReplicas(solrCloud, collection, streamContext, 
requestParams).stream()
               .map(Replica::getCoreUrl)
               .collect(Collectors.toList());
     }
 
     return shards;
   }
 
+  public static String getSolrCloud(

Review Comment:
   So I know I suggested calling the param "solrCloud" because in context to 
where a user types it (a streaming expression string), it seems a reasonable 
choice.  But at a code level like here and all the var names, I find it... not 
so good.  getCloudConnectionString would be better IMO and maybe used the 
record type you made.



##########
solr/solrj-streaming/src/java/org/apache/solr/client/solrj/io/stream/TupleStream.java:
##########
@@ -213,14 +219,73 @@ public static List<String> getShards(
       }
     } else {
       shards =
-          getReplicas(zkHost, collection, streamContext, 
requestParams).stream()
+          getReplicas(solrCloud, collection, streamContext, 
requestParams).stream()
               .map(Replica::getCoreUrl)
               .collect(Collectors.toList());
     }
 
     return shards;
   }
 
+  public static String getSolrCloud(
+      StreamFactory streamFactory, StreamExpression streamExpression, String 
collectionName)
+      throws IOException {
+    var solrCloudExpression = streamFactory.getNamedOperand(streamExpression, 
"solrCloud");
+    var zkHostExpression = streamFactory.getNamedOperand(streamExpression, 
"zkHost");
+    String solrCloud = null;
+    if (zkHostExpression == null && solrCloudExpression == null) {
+      solrCloud = streamFactory.getCollectionZkHost(collectionName);
+      if (solrCloud == null) {
+        solrCloud = streamFactory.getDefaultZkHost();
+      }
+    } else if (solrCloudExpression != null
+        && solrCloudExpression.getParameter() instanceof StreamExpressionValue 
exprValue) {
+      solrCloud = exprValue.getValue();
+    } else if (zkHostExpression != null
+        && zkHostExpression.getParameter() instanceof StreamExpressionValue 
exprValue) {
+      solrCloud = exprValue.getValue();
+    }
+    if (solrCloud == null) {
+      throw new IOException(
+          String.format(
+              Locale.ROOT,
+              "invalid expression %s - solrCloud or zkHost not found for 
collection '%s'",
+              streamExpression,
+              collectionName));
+    }
+    return solrCloud;
+  }
+
+  public static ModifiableSolrParams getModifiableSolrParamsWithExclusions(
+      List<StreamExpressionNamedParameter> namedParams, String... excluded) {
+    ModifiableSolrParams mParams = new ModifiableSolrParams();
+
+    Set<String> excludedSet = new HashSet<>(Arrays.asList(excluded));
+
+    for (StreamExpressionNamedParameter namedParam : namedParams) {
+      if (!excludedSet.contains(namedParam.getName())) {
+        mParams.add(namedParam.getName(), 
namedParam.getParameter().toString().trim());
+      }
+    }
+
+    return mParams;
+  }
+
+  public static Map<String, String> getMapWithExclusions(

Review Comment:
   if the above method uses MapSolrParams instead of ModifiableSolrParams, then 
we don't even need this version here since the (few?) callers using it can call 
`MapSolrParams.getMap()`.



##########
solr/solrj-streaming/src/java/org/apache/solr/client/solrj/io/stream/TupleStream.java:
##########
@@ -213,14 +219,73 @@ public static List<String> getShards(
       }
     } else {
       shards =
-          getReplicas(zkHost, collection, streamContext, 
requestParams).stream()
+          getReplicas(solrCloud, collection, streamContext, 
requestParams).stream()
               .map(Replica::getCoreUrl)
               .collect(Collectors.toList());
     }
 
     return shards;
   }
 
+  public static String getSolrCloud(

Review Comment:
   BTW I love this refactoring overall; it cuts down on repeated code.



##########
solr/solrj-streaming/src/java/org/apache/solr/client/solrj/io/stream/DeepRandomStream.java:
##########
@@ -239,8 +222,8 @@ public Explanation toExplanation(StreamFactory factory) 
throws IOException {
     return explanation;
   }
 
-  void init(String collectionName, String zkHost, SolrParams params) throws 
IOException {
-    this.zkHost = zkHost;
+  void init(String collectionName, String solrCloud, SolrParams params) throws 
IOException {

Review Comment:
   IMO this param order is surprising; connection string should be first.  If 
we used a real record/type for the connection string, it'd help prevent 
mistakes.



##########
solr/solrj-streaming/src/java/org/apache/solr/client/solrj/io/stream/DeepRandomStream.java:
##########
@@ -102,7 +101,8 @@ public DeepRandomStream(StreamExpression expression, 
StreamFactory factory) thro
               expression));
     }
 
-    // Validate there are no unknown parameters - zkHost and alias are 
namedParameter, so we don't
+    // Validate there are no unknown parameters - solrCloud and alias are 
namedParameter, so we
+    // don't
     // need to count it twice

Review Comment:
   reflow newline



##########
solr/solrj-streaming/src/java/org/apache/solr/client/solrj/io/graph/ShortestPathStream.java:
##########
@@ -194,42 +193,24 @@ public ShortestPathStream(StreamExpression expression, 
StreamFactory factory) th
           Integer.parseInt(((StreamExpressionValue) 
depthExpression.getParameter()).getValue());
     }
 
-    ModifiableSolrParams params = new ModifiableSolrParams();
-    for (StreamExpressionNamedParameter namedParam : namedParams) {
-      if (!namedParam.getName().equals("zkHost")
-          && !namedParam.getName().equals("to")
-          && !namedParam.getName().equals("from")
-          && !namedParam.getName().equals("edge")
-          && !namedParam.getName().equals("maxDepth")
-          && !namedParam.getName().equals("threads")
-          && !namedParam.getName().equals("partitionSize")) {
-        params.set(namedParam.getName(), 
namedParam.getParameter().toString().trim());
-      }
-    }
-
-    // zkHost, optional - if not provided then will look into factory list to 
get
-    String zkHost = null;
-    if (null == zkHostExpression) {
-      zkHost = factory.getCollectionZkHost(collectionName);
-      if (zkHost == null) {
-        zkHost = factory.getDefaultZkHost();
-      }
-    } else if (zkHostExpression.getParameter() instanceof 
StreamExpressionValue) {
-      zkHost = ((StreamExpressionValue) 
zkHostExpression.getParameter()).getValue();
-    }
-
-    if (null == zkHost) {
-      throw new IOException(
-          String.format(
-              Locale.ROOT,
-              "invalid expression %s - zkHost not found for collection '%s'",
-              expression,
-              collectionName));
-    }
+    ModifiableSolrParams params =
+        getModifiableSolrParamsWithExclusions(

Review Comment:
   ModifiableSolrParams is a specific impl of SolrParams.  I don't think this 
method name should contain the word "Modifiable"; it's a detail.



##########
solr/solrj-streaming/src/java/org/apache/solr/client/solrj/io/stream/Facet2DStream.java:
##########
@@ -89,7 +89,7 @@ public Facet2DStream(
     String bucketSortString = metric.getIdentifier() + " desc";
     this.bucketSort = parseBucketSort(bucketSortString, x, y);
 
-    init(collection, params, x, y, bucketSort, dimensionX, dimensionY, metric, 
zkHost);
+    init(collection, params, x, y, bucketSort, dimensionX, dimensionY, metric, 
solrCloud);

Review Comment:
   again; weird param order (pre-existing issue)



##########
changelog/unreleased/SOLR-18130-solrj-streaming-solr-cloud-parameter.yml:
##########
@@ -0,0 +1,7 @@
+title: Parameter 'zkHost' renamed to 'solrCloud' in solrj-streaming. Parameter 
'zkHost' is still supported.

Review Comment:
   Useful / fine but omits a bigger feature/point in this PR: this parameter 
now supports HTTP based SolrCloud connection strings (ZK access no longer 
required).



##########
solr/solrj-streaming/src/java/org/apache/solr/client/solrj/io/stream/FeaturesSelectionStream.java:
##########
@@ -86,18 +85,18 @@ public FeaturesSelectionStream(
       int numTerms)
       throws IOException {
 
-    init(collectionName, zkHost, params, field, outcome, featureSet, 
positiveLabel, numTerms);
+    init(collectionName, solrCloud, params, field, outcome, featureSet, 
positiveLabel, numTerms);
   }
 
-  /** logit(collection, zkHost="", features="a,b,c,d,e,f,g", outcome="y", 
maxIteration="20") */
+  /** logit(collection, solrCloud="", features="a,b,c,d,e,f,g", outcome="y", 
maxIteration="20") */
   public FeaturesSelectionStream(StreamExpression expression, StreamFactory 
factory)
       throws IOException {
     // grab all parameters out
     String collectionName = factory.getValueOperand(expression, 0);
     List<StreamExpressionNamedParameter> namedParams = 
factory.getNamedOperands(expression);
-    StreamExpressionNamedParameter zkHostExpression = 
factory.getNamedOperand(expression, "zkHost");
 
-    // Validate there are no unknown parameters - zkHost and alias are 
namedParameter, so we don't
+    // Validate there are no unknown parameters - solrCloud and alias are 
namedParameter, so we
+    // don't
     // need to count it twice

Review Comment:
   reflow



##########
solr/solrj-streaming/src/java/org/apache/solr/client/solrj/io/stream/SignificantTermsStream.java:
##########
@@ -81,17 +80,17 @@ public SignificantTermsStream(
       int numTerms)
       throws IOException {
 
-    init(collectionName, zkHost, params, field, minDocFreq, maxDocFreq, 
minTermLength, numTerms);
+    init(collectionName, solrCloud, params, field, minDocFreq, maxDocFreq, 
minTermLength, numTerms);
   }
 
   public SignificantTermsStream(StreamExpression expression, StreamFactory 
factory)
       throws IOException {
     // grab all parameters out
     String collectionName = factory.getValueOperand(expression, 0);
     List<StreamExpressionNamedParameter> namedParams = 
factory.getNamedOperands(expression);
-    StreamExpressionNamedParameter zkHostExpression = 
factory.getNamedOperand(expression, "zkHost");
 
-    // Validate there are no unknown parameters - zkHost and alias are 
namedParameter, so we don't
+    // Validate there are no unknown parameters - solrCloud/zkHost and alias 
are namedParameter, so
+    // we don't

Review Comment:
   reflow



##########
solr/solrj-streaming/src/java/org/apache/solr/client/solrj/io/stream/RandomFacadeStream.java:
##########
@@ -80,39 +70,23 @@ public RandomFacadeStream(StreamExpression expression, 
StreamFactory factory) th
       params.put("rows", "500");
     }
 
-    // zkHost, optional - if not provided then will look into factory list to 
get
-    String zkHost = null;
-    if (null == zkHostExpression) {
-      zkHost = factory.getCollectionZkHost(collectionName);
-      if (zkHost == null) {
-        zkHost = factory.getDefaultZkHost();
-      }
-    } else if (zkHostExpression.getParameter() instanceof 
StreamExpressionValue) {
-      zkHost = ((StreamExpressionValue) 
zkHostExpression.getParameter()).getValue();
-    }
-    if (null == zkHost) {
-      throw new IOException(
-          String.format(
-              Locale.ROOT,
-              "invalid expression %s - zkHost not found for collection '%s'",
-              expression,
-              collectionName));
-    }
+    // solrCloud, optional - if not provided then will look into factory list 
to get
+    String solrCloud = getSolrCloud(factory, expression, collectionName);
 
     if (params.get(ROWS) != null) {
       int rows = Integer.parseInt(params.get(ROWS));
       if (rows >= 5000) {
         DeepRandomStream deepRandomStream = new DeepRandomStream();
-        deepRandomStream.init(collectionName, zkHost, toSolrParams(params));
+        deepRandomStream.init(collectionName, solrCloud, toSolrParams(params));

Review Comment:
   weird order



##########
solr/solrj-streaming/src/java/org/apache/solr/client/solrj/io/stream/ShuffleStream.java:
##########
@@ -54,7 +53,8 @@ public ShuffleStream(StreamExpression expression, 
StreamFactory factory) throws
               expression));
     }
 
-    // Validate there are no unknown parameters - zkHost and alias are 
namedParameter, so we don't
+    // Validate there are no unknown parameters - solrCloud/zkHost and alias 
are namedParameter, so
+    // we don't

Review Comment:
   reflow



##########
solr/solrj-streaming/src/java/org/apache/solr/client/solrj/io/stream/StatsStream.java:
##########
@@ -74,9 +73,9 @@ public class StatsStream extends TupleStream implements 
Expressible, ParallelMet
   private transient StreamContext context;
   protected transient TupleStream parallelizedStream;
 
-  public StatsStream(String zkHost, String collection, SolrParams params, 
Metric[] metrics)
+  public StatsStream(String solrCloud, String collection, SolrParams params, 
Metric[] metrics)
       throws IOException {
-    init(collection, params, metrics, zkHost);
+    init(collection, params, metrics, solrCloud);

Review Comment:
   param order



##########
solr/solrj-streaming/src/java/org/apache/solr/client/solrj/io/stream/RandomStream.java:
##########
@@ -58,7 +56,8 @@
  */
 public class RandomStream extends TupleStream implements Expressible {
 
-  private String zkHost;
+  private String solrCloud;
+  // TODO: replace all Map<String, String> in stream handlers by 
ModifiableSolrParams ?

Review Comment:
   yes, well SolrParams.  Maybe out of scope.



##########
solr/solrj-streaming/src/java/org/apache/solr/client/solrj/io/stream/ModelStream.java:
##########
@@ -48,24 +46,23 @@ public class ModelStream extends TupleStream implements 
Expressible {
 
   private static final long serialVersionUID = 1;
 
-  protected String zkHost;
+  protected String solrCloud;
   protected String collection;
   protected String modelID;
   protected ModelCache modelCache;
   protected Tuple model;
   protected long cacheMillis;
 
-  public ModelStream(String zkHost, String collectionName, String modelID, 
long cacheMillis)
+  public ModelStream(String solrCloud, String collectionName, String modelID, 
long cacheMillis)
       throws IOException {
 
-    init(collectionName, zkHost, modelID, cacheMillis);

Review Comment:
   param order



##########
solr/solrj-streaming/src/java/org/apache/solr/client/solrj/io/stream/SignificantTermsStream.java:
##########
@@ -81,17 +80,17 @@ public SignificantTermsStream(
       int numTerms)
       throws IOException {
 
-    init(collectionName, zkHost, params, field, minDocFreq, maxDocFreq, 
minTermLength, numTerms);
+    init(collectionName, solrCloud, params, field, minDocFreq, maxDocFreq, 
minTermLength, numTerms);

Review Comment:
   param order



##########
solr/solrj-streaming/src/java/org/apache/solr/client/solrj/io/stream/TextLogitStream.java:
##########
@@ -88,7 +87,7 @@ public class TextLogitStream extends TupleStream implements 
Expressible {
   private double lastError = 0;
 
   public TextLogitStream(
-      String zkHost,
+      String solrCloud,

Review Comment:
   param order



##########
solr/solrj-streaming/src/java/org/apache/solr/client/solrj/io/stream/TextLogitStream.java:
##########
@@ -312,15 +293,15 @@ private StreamExpression toExpression(StreamFactory 
factory, boolean includeStre
     expression.addParameter(
         new StreamExpressionNamedParameter("threshold", 
Double.toString(threshold)));
 
-    // zkHost
-    expression.addParameter(new StreamExpressionNamedParameter("zkHost", 
zkHost));
+    // solrCloud
+    expression.addParameter(new StreamExpressionNamedParameter("solrCloud", 
solrCloud));
 
     return expression;
   }
 
   private void init(
       String collectionName,

Review Comment:
   param order



##########
solr/solrj-streaming/src/java/org/apache/solr/client/solrj/io/stream/SqlStream.java:
##########
@@ -173,8 +154,9 @@ public Explanation toExplanation(StreamFactory factory) 
throws IOException {
     return explanation;
   }
 
-  protected void init(String collectionName, String zkHost, SolrParams params) 
throws IOException {
-    this.zkHost = zkHost;
+  protected void init(String collectionName, String solrCloud, SolrParams 
params)

Review Comment:
   param order



##########
solr/solrj-streaming/src/java/org/apache/solr/client/solrj/io/stream/TopicStream.java:
##########
@@ -238,8 +223,8 @@ public StreamExpression toExpression(StreamFactory factory) 
throws IOException {
       expression.addParameter(new 
StreamExpressionNamedParameter(param.getKey(), value));
     }
 
-    // zkHost
-    expression.addParameter(new StreamExpressionNamedParameter("zkHost", 
zkHost));
+    // solrCloud

Review Comment:
   You can simply omit such comment lines where you see them (seemingly all 
streams) instead of updating them.  IMO the comment doesn't add clarity to a 
one-liner.



##########
solr/solrj-streaming/src/java/org/apache/solr/client/solrj/io/Lang.java:
##########


Review Comment:
   what's happening here?



##########
solr/solrj-streaming/src/java/org/apache/solr/client/solrj/io/stream/TupleStream.java:
##########
@@ -213,14 +219,73 @@ public static List<String> getShards(
       }
     } else {
       shards =
-          getReplicas(zkHost, collection, streamContext, 
requestParams).stream()
+          getReplicas(solrCloud, collection, streamContext, 
requestParams).stream()
               .map(Replica::getCoreUrl)
               .collect(Collectors.toList());
     }
 
     return shards;
   }
 
+  public static String getSolrCloud(
+      StreamFactory streamFactory, StreamExpression streamExpression, String 
collectionName)
+      throws IOException {
+    var solrCloudExpression = streamFactory.getNamedOperand(streamExpression, 
"solrCloud");
+    var zkHostExpression = streamFactory.getNamedOperand(streamExpression, 
"zkHost");
+    String solrCloud = null;
+    if (zkHostExpression == null && solrCloudExpression == null) {
+      solrCloud = streamFactory.getCollectionZkHost(collectionName);
+      if (solrCloud == null) {
+        solrCloud = streamFactory.getDefaultZkHost();
+      }
+    } else if (solrCloudExpression != null
+        && solrCloudExpression.getParameter() instanceof StreamExpressionValue 
exprValue) {
+      solrCloud = exprValue.getValue();
+    } else if (zkHostExpression != null
+        && zkHostExpression.getParameter() instanceof StreamExpressionValue 
exprValue) {
+      solrCloud = exprValue.getValue();
+    }
+    if (solrCloud == null) {
+      throw new IOException(
+          String.format(
+              Locale.ROOT,
+              "invalid expression %s - solrCloud or zkHost not found for 
collection '%s'",
+              streamExpression,
+              collectionName));
+    }
+    return solrCloud;
+  }
+
+  public static ModifiableSolrParams getModifiableSolrParamsWithExclusions(

Review Comment:
   I suggest the name `buildSolrParams` or `buildSolrParamsExcept`.
   "Modifiable" is overly specific to the SolrParams type.
   "get" is overused in java... suggests we retrieve something when in fact we 
are creating something here.
   Also IMO the varargs should be a Set that the caller can easily pass via 
`Set.of`



##########
solr/solrj-streaming/src/java/org/apache/solr/client/solrj/io/stream/UpdateStream.java:
##########
@@ -92,24 +91,28 @@ public UpdateStream(StreamExpression expression, 
StreamFactory factory) throws I
               streamExpressions.size()));
     }
     StreamExpression sourceStreamExpression = streamExpressions.get(0);
-    init(collectionName, factory.constructStream(sourceStreamExpression), 
zkHost, updateBatchSize);
+    init(
+        collectionName,
+        factory.constructStream(sourceStreamExpression),
+        solrCloud,
+        updateBatchSize);
   }
 
   public UpdateStream(
-      String collectionName, TupleStream tupleSource, String zkHost, int 
updateBatchSize)
+      String collectionName, TupleStream tupleSource, String solrCloud, int 
updateBatchSize)
       throws IOException {
     if (updateBatchSize <= 0) {
       throw new IOException(
           String.format(Locale.ROOT, "batchSize '%d' must be greater than 0.", 
updateBatchSize));
     }
     pruneVersionField = defaultPruneVersionField();
-    init(collectionName, tupleSource, zkHost, updateBatchSize);
+    init(collectionName, tupleSource, solrCloud, updateBatchSize);
   }
 
   private void init(
-      String collectionName, TupleStream tupleSource, String zkHost, int 
updateBatchSize) {
+      String collectionName, TupleStream tupleSource, String solrCloud, int 
updateBatchSize) {

Review Comment:
   param order



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to