[
https://issues.apache.org/jira/browse/SOLR-8996?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15273301#comment-15273301
]
Dennis Gove commented on SOLR-8996:
-----------------------------------
[~joel.bernstein], I saw a failure of the test for this stream. Because there
are only 5 records in the collection during the test I believe there is a
probability of 1 in 5! that the test will fail (1 in 120) because the two
streams return the records in the same order. Below is a small patch that
increases the # of records to 1000 thus decreasing the probability of a failure
to 1 in 1000! (1 in basically never). Do you think it's worth re-opening this
and applying the patch?
{code}
diff --git
a/solr/solrj/src/test/org/apache/solr/client/solrj/io/stream/StreamExpressionTest.java
b/solr/solrj/src/test/org/apache/solr/client/solrj/io/stream/StreamExpressionTest.java
index d273477..267eeca 100644
---
a/solr/solrj/src/test/org/apache/solr/client/solrj/io/stream/StreamExpressionTest.java
+++
b/solr/solrj/src/test/org/apache/solr/client/solrj/io/stream/StreamExpressionTest.java
@@ -483,13 +483,12 @@ public class StreamExpressionTest extends
SolrCloudTestCase {
@Test
public void testRandomStream() throws Exception {
- new UpdateRequest()
- .add(id, "0", "a_s", "hello0", "a_i", "0", "a_f", "0")
- .add(id, "2", "a_s", "hello2", "a_i", "2", "a_f", "0")
- .add(id, "3", "a_s", "hello3", "a_i", "3", "a_f", "3")
- .add(id, "4", "a_s", "hello4", "a_i", "4", "a_f", "4")
- .add(id, "1", "a_s", "hello1", "a_i", "1", "a_f", "1")
- .commit(cluster.getSolrClient(), COLLECTION);
+ UpdateRequest update = new UpdateRequest();
+ for(int idx = 0; idx < 1000; ++idx){
+ String idxString = new Integer(idx).toString();
+ update.add(id,idxString, "a_s", "hello" + idxString, "a_i", idxString,
"a_f", idxString);
+ }
+ update.commit(cluster.getSolrClient(), COLLECTION);
StreamExpression expression;
TupleStream stream;
@@ -504,17 +503,17 @@ public class StreamExpressionTest extends
SolrCloudTestCase {
try {
context.setSolrClientCache(cache);
- expression = StreamExpressionParser.parse("random(" + COLLECTION + ",
q=\"*:*\", rows=\"10\", fl=\"id, a_i\")");
+ expression = StreamExpressionParser.parse("random(" + COLLECTION + ",
q=\"*:*\", rows=\"1000\", fl=\"id, a_i\")");
stream = factory.constructStream(expression);
stream.setStreamContext(context);
List<Tuple> tuples1 = getTuples(stream);
- assert (tuples1.size() == 5);
+ assert (tuples1.size() == 1000);
- expression = StreamExpressionParser.parse("random(" + COLLECTION + ",
q=\"*:*\", rows=\"10\", fl=\"id, a_i\")");
+ expression = StreamExpressionParser.parse("random(" + COLLECTION + ",
q=\"*:*\", rows=\"1000\", fl=\"id, a_i\")");
stream = factory.constructStream(expression);
stream.setStreamContext(context);
List<Tuple> tuples2 = getTuples(stream);
- assert (tuples2.size() == 5);
+ assert (tuples2.size() == 1000);
boolean different = false;
for (int i = 0; i < tuples1.size(); i++) {
{code}
> Add Random Streaming Expression
> -------------------------------
>
> Key: SOLR-8996
> URL: https://issues.apache.org/jira/browse/SOLR-8996
> Project: Solr
> Issue Type: New Feature
> Reporter: Joel Bernstein
> Assignee: Joel Bernstein
> Fix For: 6.1
>
> Attachments: RandomStream.java, SOLR-8996.patch
>
>
> The random Streaming Expression will return a *limited* random stream of
> Tuples that match a query. This will be useful in many different scenarios
> where random data sets are needed.
> Proposed syntax:
> {code}
> random(baskets, q="productID:productX", rows="100", fl="basketID")
> {code}
> The sample code above will query the *baskets* collection and return 100
> random *basketID's* where the productID is productX.
> The underlying implementation will rely on Solr's random field type.
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]