dsmiley commented on code in PR #4320:
URL: https://github.com/apache/solr/pull/4320#discussion_r3135104415
##########
solr/solrj-streaming/src/java/org/apache/solr/client/solrj/io/graph/ShortestPathStream.java:
##########
@@ -79,7 +79,7 @@ public class ShortestPathStream extends TupleStream
implements Expressible {
private SolrParams queryParams;
public ShortestPathStream(
- String zkHost,
+ String solrCloud,
Review Comment:
Can we use that nice record you made previously for the connection string
instead of String? To be used not just right here but really everywhere in
this PR that needs to refer to the connection. It would be so nice to have a
real type vs String which is used for anything and everything, defeating the
typed nature of Java.
If not, fine.
##########
solr/solrj-streaming/src/java/org/apache/solr/client/solrj/io/stream/CloudSolrStream.java:
##########
@@ -107,7 +106,8 @@ public CloudSolrStream(StreamExpression expression,
StreamFactory factory) throw
expression));
}
- // Validate there are no unknown parameters - zkHost and alias are
namedParameter, so we don't
+ // Validate there are no unknown parameters - solrCloud/zkHost and alias
are namedParameter, so
+ // we don't
// need to count it twice
Review Comment:
reflow newline
##########
solr/solrj-streaming/src/java/org/apache/solr/client/solrj/io/stream/Facet2DStream.java:
##########
@@ -184,26 +177,9 @@ public Facet2DStream(StreamExpression expression,
StreamFactory factory) throws
}
}
- String zkHost = null;
- if (zkHostExpression == null) {
- zkHost = factory.getCollectionZkHost(collectionName);
- if (zkHost == null) {
- zkHost = factory.getDefaultZkHost();
- }
- } else if (zkHostExpression.getParameter() instanceof
StreamExpressionValue) {
- zkHost = ((StreamExpressionValue)
zkHostExpression.getParameter()).getValue();
- }
-
- if (zkHost == null) {
- throw new IOException(
- String.format(
- Locale.ROOT,
- "invalid expression %s - zkHost not found for collection '%s'",
- expression,
- collectionName));
- }
+ String solrCloud = getSolrCloud(factory, expression, collectionName);
- init(collectionName, params, x, y, bucketSort, dimensionX, dimensionY,
metric, zkHost);
+ init(collectionName, params, x, y, bucketSort, dimensionX, dimensionY,
metric, solrCloud);
Review Comment:
weird order again
##########
solr/solrj-streaming/src/java/org/apache/solr/client/solrj/io/stream/FeaturesSelectionStream.java:
##########
@@ -86,18 +85,18 @@ public FeaturesSelectionStream(
int numTerms)
throws IOException {
- init(collectionName, zkHost, params, field, outcome, featureSet,
positiveLabel, numTerms);
+ init(collectionName, solrCloud, params, field, outcome, featureSet,
positiveLabel, numTerms);
Review Comment:
again
##########
solr/solrj-streaming/src/java/org/apache/solr/client/solrj/io/stream/TupleStream.java:
##########
@@ -213,14 +219,73 @@ public static List<String> getShards(
}
} else {
shards =
- getReplicas(zkHost, collection, streamContext,
requestParams).stream()
+ getReplicas(solrCloud, collection, streamContext,
requestParams).stream()
.map(Replica::getCoreUrl)
.collect(Collectors.toList());
}
return shards;
}
+ public static String getSolrCloud(
Review Comment:
So I know I suggested calling the param "solrCloud" because in context to
where a user types it (a streaming expression string), it seems a reasonable
choice. But at a code level like here and all the var names, I find it... not
so good. getCloudConnectionString would be better IMO and maybe used the
record type you made.
##########
solr/solrj-streaming/src/java/org/apache/solr/client/solrj/io/stream/TupleStream.java:
##########
@@ -213,14 +219,73 @@ public static List<String> getShards(
}
} else {
shards =
- getReplicas(zkHost, collection, streamContext,
requestParams).stream()
+ getReplicas(solrCloud, collection, streamContext,
requestParams).stream()
.map(Replica::getCoreUrl)
.collect(Collectors.toList());
}
return shards;
}
+ public static String getSolrCloud(
+ StreamFactory streamFactory, StreamExpression streamExpression, String
collectionName)
+ throws IOException {
+ var solrCloudExpression = streamFactory.getNamedOperand(streamExpression,
"solrCloud");
+ var zkHostExpression = streamFactory.getNamedOperand(streamExpression,
"zkHost");
+ String solrCloud = null;
+ if (zkHostExpression == null && solrCloudExpression == null) {
+ solrCloud = streamFactory.getCollectionZkHost(collectionName);
+ if (solrCloud == null) {
+ solrCloud = streamFactory.getDefaultZkHost();
+ }
+ } else if (solrCloudExpression != null
+ && solrCloudExpression.getParameter() instanceof StreamExpressionValue
exprValue) {
+ solrCloud = exprValue.getValue();
+ } else if (zkHostExpression != null
+ && zkHostExpression.getParameter() instanceof StreamExpressionValue
exprValue) {
+ solrCloud = exprValue.getValue();
+ }
+ if (solrCloud == null) {
+ throw new IOException(
+ String.format(
+ Locale.ROOT,
+ "invalid expression %s - solrCloud or zkHost not found for
collection '%s'",
+ streamExpression,
+ collectionName));
+ }
+ return solrCloud;
+ }
+
+ public static ModifiableSolrParams getModifiableSolrParamsWithExclusions(
+ List<StreamExpressionNamedParameter> namedParams, String... excluded) {
+ ModifiableSolrParams mParams = new ModifiableSolrParams();
+
+ Set<String> excludedSet = new HashSet<>(Arrays.asList(excluded));
+
+ for (StreamExpressionNamedParameter namedParam : namedParams) {
+ if (!excludedSet.contains(namedParam.getName())) {
+ mParams.add(namedParam.getName(),
namedParam.getParameter().toString().trim());
+ }
+ }
+
+ return mParams;
+ }
+
+ public static Map<String, String> getMapWithExclusions(
Review Comment:
if the above method uses MapSolrParams instead of ModifiableSolrParams, then
we don't even need this version here since the (few?) callers using it can call
`MapSolrParams.getMap()`.
##########
solr/solrj-streaming/src/java/org/apache/solr/client/solrj/io/stream/TupleStream.java:
##########
@@ -213,14 +219,73 @@ public static List<String> getShards(
}
} else {
shards =
- getReplicas(zkHost, collection, streamContext,
requestParams).stream()
+ getReplicas(solrCloud, collection, streamContext,
requestParams).stream()
.map(Replica::getCoreUrl)
.collect(Collectors.toList());
}
return shards;
}
+ public static String getSolrCloud(
Review Comment:
BTW I love this refactoring overall; it cuts down on repeated code.
##########
solr/solrj-streaming/src/java/org/apache/solr/client/solrj/io/stream/DeepRandomStream.java:
##########
@@ -239,8 +222,8 @@ public Explanation toExplanation(StreamFactory factory)
throws IOException {
return explanation;
}
- void init(String collectionName, String zkHost, SolrParams params) throws
IOException {
- this.zkHost = zkHost;
+ void init(String collectionName, String solrCloud, SolrParams params) throws
IOException {
Review Comment:
IMO this param order is surprising; connection string should be first. If
we used a real record/type for the connection string, it'd help prevent
mistakes.
##########
solr/solrj-streaming/src/java/org/apache/solr/client/solrj/io/stream/DeepRandomStream.java:
##########
@@ -102,7 +101,8 @@ public DeepRandomStream(StreamExpression expression,
StreamFactory factory) thro
expression));
}
- // Validate there are no unknown parameters - zkHost and alias are
namedParameter, so we don't
+ // Validate there are no unknown parameters - solrCloud and alias are
namedParameter, so we
+ // don't
// need to count it twice
Review Comment:
reflow newline
##########
solr/solrj-streaming/src/java/org/apache/solr/client/solrj/io/graph/ShortestPathStream.java:
##########
@@ -194,42 +193,24 @@ public ShortestPathStream(StreamExpression expression,
StreamFactory factory) th
Integer.parseInt(((StreamExpressionValue)
depthExpression.getParameter()).getValue());
}
- ModifiableSolrParams params = new ModifiableSolrParams();
- for (StreamExpressionNamedParameter namedParam : namedParams) {
- if (!namedParam.getName().equals("zkHost")
- && !namedParam.getName().equals("to")
- && !namedParam.getName().equals("from")
- && !namedParam.getName().equals("edge")
- && !namedParam.getName().equals("maxDepth")
- && !namedParam.getName().equals("threads")
- && !namedParam.getName().equals("partitionSize")) {
- params.set(namedParam.getName(),
namedParam.getParameter().toString().trim());
- }
- }
-
- // zkHost, optional - if not provided then will look into factory list to
get
- String zkHost = null;
- if (null == zkHostExpression) {
- zkHost = factory.getCollectionZkHost(collectionName);
- if (zkHost == null) {
- zkHost = factory.getDefaultZkHost();
- }
- } else if (zkHostExpression.getParameter() instanceof
StreamExpressionValue) {
- zkHost = ((StreamExpressionValue)
zkHostExpression.getParameter()).getValue();
- }
-
- if (null == zkHost) {
- throw new IOException(
- String.format(
- Locale.ROOT,
- "invalid expression %s - zkHost not found for collection '%s'",
- expression,
- collectionName));
- }
+ ModifiableSolrParams params =
+ getModifiableSolrParamsWithExclusions(
Review Comment:
ModifiableSolrParams is a specific impl of SolrParams. I don't think this
method name should contain the word "Modifiable"; it's a detail.
##########
solr/solrj-streaming/src/java/org/apache/solr/client/solrj/io/stream/Facet2DStream.java:
##########
@@ -89,7 +89,7 @@ public Facet2DStream(
String bucketSortString = metric.getIdentifier() + " desc";
this.bucketSort = parseBucketSort(bucketSortString, x, y);
- init(collection, params, x, y, bucketSort, dimensionX, dimensionY, metric,
zkHost);
+ init(collection, params, x, y, bucketSort, dimensionX, dimensionY, metric,
solrCloud);
Review Comment:
again; weird param order (pre-existing issue)
##########
changelog/unreleased/SOLR-18130-solrj-streaming-solr-cloud-parameter.yml:
##########
@@ -0,0 +1,7 @@
+title: Parameter 'zkHost' renamed to 'solrCloud' in solrj-streaming. Parameter
'zkHost' is still supported.
Review Comment:
Useful / fine but omits a bigger feature/point in this PR: this parameter
now supports HTTP based SolrCloud connection strings (ZK access no longer
required).
##########
solr/solrj-streaming/src/java/org/apache/solr/client/solrj/io/stream/FeaturesSelectionStream.java:
##########
@@ -86,18 +85,18 @@ public FeaturesSelectionStream(
int numTerms)
throws IOException {
- init(collectionName, zkHost, params, field, outcome, featureSet,
positiveLabel, numTerms);
+ init(collectionName, solrCloud, params, field, outcome, featureSet,
positiveLabel, numTerms);
}
- /** logit(collection, zkHost="", features="a,b,c,d,e,f,g", outcome="y",
maxIteration="20") */
+ /** logit(collection, solrCloud="", features="a,b,c,d,e,f,g", outcome="y",
maxIteration="20") */
public FeaturesSelectionStream(StreamExpression expression, StreamFactory
factory)
throws IOException {
// grab all parameters out
String collectionName = factory.getValueOperand(expression, 0);
List<StreamExpressionNamedParameter> namedParams =
factory.getNamedOperands(expression);
- StreamExpressionNamedParameter zkHostExpression =
factory.getNamedOperand(expression, "zkHost");
- // Validate there are no unknown parameters - zkHost and alias are
namedParameter, so we don't
+ // Validate there are no unknown parameters - solrCloud and alias are
namedParameter, so we
+ // don't
// need to count it twice
Review Comment:
reflow
##########
solr/solrj-streaming/src/java/org/apache/solr/client/solrj/io/stream/SignificantTermsStream.java:
##########
@@ -81,17 +80,17 @@ public SignificantTermsStream(
int numTerms)
throws IOException {
- init(collectionName, zkHost, params, field, minDocFreq, maxDocFreq,
minTermLength, numTerms);
+ init(collectionName, solrCloud, params, field, minDocFreq, maxDocFreq,
minTermLength, numTerms);
}
public SignificantTermsStream(StreamExpression expression, StreamFactory
factory)
throws IOException {
// grab all parameters out
String collectionName = factory.getValueOperand(expression, 0);
List<StreamExpressionNamedParameter> namedParams =
factory.getNamedOperands(expression);
- StreamExpressionNamedParameter zkHostExpression =
factory.getNamedOperand(expression, "zkHost");
- // Validate there are no unknown parameters - zkHost and alias are
namedParameter, so we don't
+ // Validate there are no unknown parameters - solrCloud/zkHost and alias
are namedParameter, so
+ // we don't
Review Comment:
reflow
##########
solr/solrj-streaming/src/java/org/apache/solr/client/solrj/io/stream/RandomFacadeStream.java:
##########
@@ -80,39 +70,23 @@ public RandomFacadeStream(StreamExpression expression,
StreamFactory factory) th
params.put("rows", "500");
}
- // zkHost, optional - if not provided then will look into factory list to
get
- String zkHost = null;
- if (null == zkHostExpression) {
- zkHost = factory.getCollectionZkHost(collectionName);
- if (zkHost == null) {
- zkHost = factory.getDefaultZkHost();
- }
- } else if (zkHostExpression.getParameter() instanceof
StreamExpressionValue) {
- zkHost = ((StreamExpressionValue)
zkHostExpression.getParameter()).getValue();
- }
- if (null == zkHost) {
- throw new IOException(
- String.format(
- Locale.ROOT,
- "invalid expression %s - zkHost not found for collection '%s'",
- expression,
- collectionName));
- }
+ // solrCloud, optional - if not provided then will look into factory list
to get
+ String solrCloud = getSolrCloud(factory, expression, collectionName);
if (params.get(ROWS) != null) {
int rows = Integer.parseInt(params.get(ROWS));
if (rows >= 5000) {
DeepRandomStream deepRandomStream = new DeepRandomStream();
- deepRandomStream.init(collectionName, zkHost, toSolrParams(params));
+ deepRandomStream.init(collectionName, solrCloud, toSolrParams(params));
Review Comment:
weird order
##########
solr/solrj-streaming/src/java/org/apache/solr/client/solrj/io/stream/ShuffleStream.java:
##########
@@ -54,7 +53,8 @@ public ShuffleStream(StreamExpression expression,
StreamFactory factory) throws
expression));
}
- // Validate there are no unknown parameters - zkHost and alias are
namedParameter, so we don't
+ // Validate there are no unknown parameters - solrCloud/zkHost and alias
are namedParameter, so
+ // we don't
Review Comment:
reflow
##########
solr/solrj-streaming/src/java/org/apache/solr/client/solrj/io/stream/StatsStream.java:
##########
@@ -74,9 +73,9 @@ public class StatsStream extends TupleStream implements
Expressible, ParallelMet
private transient StreamContext context;
protected transient TupleStream parallelizedStream;
- public StatsStream(String zkHost, String collection, SolrParams params,
Metric[] metrics)
+ public StatsStream(String solrCloud, String collection, SolrParams params,
Metric[] metrics)
throws IOException {
- init(collection, params, metrics, zkHost);
+ init(collection, params, metrics, solrCloud);
Review Comment:
param order
##########
solr/solrj-streaming/src/java/org/apache/solr/client/solrj/io/stream/RandomStream.java:
##########
@@ -58,7 +56,8 @@
*/
public class RandomStream extends TupleStream implements Expressible {
- private String zkHost;
+ private String solrCloud;
+ // TODO: replace all Map<String, String> in stream handlers by
ModifiableSolrParams ?
Review Comment:
yes, well SolrParams. Maybe out of scope.
##########
solr/solrj-streaming/src/java/org/apache/solr/client/solrj/io/stream/ModelStream.java:
##########
@@ -48,24 +46,23 @@ public class ModelStream extends TupleStream implements
Expressible {
private static final long serialVersionUID = 1;
- protected String zkHost;
+ protected String solrCloud;
protected String collection;
protected String modelID;
protected ModelCache modelCache;
protected Tuple model;
protected long cacheMillis;
- public ModelStream(String zkHost, String collectionName, String modelID,
long cacheMillis)
+ public ModelStream(String solrCloud, String collectionName, String modelID,
long cacheMillis)
throws IOException {
- init(collectionName, zkHost, modelID, cacheMillis);
Review Comment:
param order
##########
solr/solrj-streaming/src/java/org/apache/solr/client/solrj/io/stream/SignificantTermsStream.java:
##########
@@ -81,17 +80,17 @@ public SignificantTermsStream(
int numTerms)
throws IOException {
- init(collectionName, zkHost, params, field, minDocFreq, maxDocFreq,
minTermLength, numTerms);
+ init(collectionName, solrCloud, params, field, minDocFreq, maxDocFreq,
minTermLength, numTerms);
Review Comment:
param order
##########
solr/solrj-streaming/src/java/org/apache/solr/client/solrj/io/stream/TextLogitStream.java:
##########
@@ -88,7 +87,7 @@ public class TextLogitStream extends TupleStream implements
Expressible {
private double lastError = 0;
public TextLogitStream(
- String zkHost,
+ String solrCloud,
Review Comment:
param order
##########
solr/solrj-streaming/src/java/org/apache/solr/client/solrj/io/stream/TextLogitStream.java:
##########
@@ -312,15 +293,15 @@ private StreamExpression toExpression(StreamFactory
factory, boolean includeStre
expression.addParameter(
new StreamExpressionNamedParameter("threshold",
Double.toString(threshold)));
- // zkHost
- expression.addParameter(new StreamExpressionNamedParameter("zkHost",
zkHost));
+ // solrCloud
+ expression.addParameter(new StreamExpressionNamedParameter("solrCloud",
solrCloud));
return expression;
}
private void init(
String collectionName,
Review Comment:
param order
##########
solr/solrj-streaming/src/java/org/apache/solr/client/solrj/io/stream/SqlStream.java:
##########
@@ -173,8 +154,9 @@ public Explanation toExplanation(StreamFactory factory)
throws IOException {
return explanation;
}
- protected void init(String collectionName, String zkHost, SolrParams params)
throws IOException {
- this.zkHost = zkHost;
+ protected void init(String collectionName, String solrCloud, SolrParams
params)
Review Comment:
param order
##########
solr/solrj-streaming/src/java/org/apache/solr/client/solrj/io/stream/TopicStream.java:
##########
@@ -238,8 +223,8 @@ public StreamExpression toExpression(StreamFactory factory)
throws IOException {
expression.addParameter(new
StreamExpressionNamedParameter(param.getKey(), value));
}
- // zkHost
- expression.addParameter(new StreamExpressionNamedParameter("zkHost",
zkHost));
+ // solrCloud
Review Comment:
You can simply omit such comment lines where you see them (seemingly all
streams) instead of updating them. IMO the comment doesn't add clarity to a
one-liner.
##########
solr/solrj-streaming/src/java/org/apache/solr/client/solrj/io/Lang.java:
##########
Review Comment:
what's happening here?
##########
solr/solrj-streaming/src/java/org/apache/solr/client/solrj/io/stream/TupleStream.java:
##########
@@ -213,14 +219,73 @@ public static List<String> getShards(
}
} else {
shards =
- getReplicas(zkHost, collection, streamContext,
requestParams).stream()
+ getReplicas(solrCloud, collection, streamContext,
requestParams).stream()
.map(Replica::getCoreUrl)
.collect(Collectors.toList());
}
return shards;
}
+ public static String getSolrCloud(
+ StreamFactory streamFactory, StreamExpression streamExpression, String
collectionName)
+ throws IOException {
+ var solrCloudExpression = streamFactory.getNamedOperand(streamExpression,
"solrCloud");
+ var zkHostExpression = streamFactory.getNamedOperand(streamExpression,
"zkHost");
+ String solrCloud = null;
+ if (zkHostExpression == null && solrCloudExpression == null) {
+ solrCloud = streamFactory.getCollectionZkHost(collectionName);
+ if (solrCloud == null) {
+ solrCloud = streamFactory.getDefaultZkHost();
+ }
+ } else if (solrCloudExpression != null
+ && solrCloudExpression.getParameter() instanceof StreamExpressionValue
exprValue) {
+ solrCloud = exprValue.getValue();
+ } else if (zkHostExpression != null
+ && zkHostExpression.getParameter() instanceof StreamExpressionValue
exprValue) {
+ solrCloud = exprValue.getValue();
+ }
+ if (solrCloud == null) {
+ throw new IOException(
+ String.format(
+ Locale.ROOT,
+ "invalid expression %s - solrCloud or zkHost not found for
collection '%s'",
+ streamExpression,
+ collectionName));
+ }
+ return solrCloud;
+ }
+
+ public static ModifiableSolrParams getModifiableSolrParamsWithExclusions(
Review Comment:
I suggest the name `buildSolrParams` or `buildSolrParamsExcept`.
"Modifiable" is overly specific to the SolrParams type.
"get" is overused in java... suggests we retrieve something when in fact we
are creating something here.
Also IMO the varargs should be a Set that the caller can easily pass via
`Set.of`
##########
solr/solrj-streaming/src/java/org/apache/solr/client/solrj/io/stream/UpdateStream.java:
##########
@@ -92,24 +91,28 @@ public UpdateStream(StreamExpression expression,
StreamFactory factory) throws I
streamExpressions.size()));
}
StreamExpression sourceStreamExpression = streamExpressions.get(0);
- init(collectionName, factory.constructStream(sourceStreamExpression),
zkHost, updateBatchSize);
+ init(
+ collectionName,
+ factory.constructStream(sourceStreamExpression),
+ solrCloud,
+ updateBatchSize);
}
public UpdateStream(
- String collectionName, TupleStream tupleSource, String zkHost, int
updateBatchSize)
+ String collectionName, TupleStream tupleSource, String solrCloud, int
updateBatchSize)
throws IOException {
if (updateBatchSize <= 0) {
throw new IOException(
String.format(Locale.ROOT, "batchSize '%d' must be greater than 0.",
updateBatchSize));
}
pruneVersionField = defaultPruneVersionField();
- init(collectionName, tupleSource, zkHost, updateBatchSize);
+ init(collectionName, tupleSource, solrCloud, updateBatchSize);
}
private void init(
- String collectionName, TupleStream tupleSource, String zkHost, int
updateBatchSize) {
+ String collectionName, TupleStream tupleSource, String solrCloud, int
updateBatchSize) {
Review Comment:
param order
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]