[jira] [Commented] (SOLR-6314) Multi-threaded facet counts differ when SolrCloud has 1 shard

2014-08-21 Thread Mark Miller (JIRA)

[ 
https://issues.apache.org/jira/browse/SOLR-6314?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=14105395#comment-14105395
 ] 

Mark Miller commented on SOLR-6314:
---

bq. Multi-threaded facet counts differ when SolrCloud has 1 shard

This seems like the wrong title for the JIRA and especially the wrong title in 
CHANGES. Makes it sound like a severe bug rather than what I'm reading above.

 Multi-threaded facet counts differ when SolrCloud has 1 shard
 --

 Key: SOLR-6314
 URL: https://issues.apache.org/jira/browse/SOLR-6314
 Project: Solr
  Issue Type: Bug
  Components: SearchComponents - other, SolrCloud
Affects Versions: 5.0
Reporter: Vamsee Yarlagadda
Assignee: Erick Erickson
 Fix For: 5.0, 4.10

 Attachments: SOLR-6314.patch, SOLR-6314.patch, SOLR-6314.patch


 I am trying to work with multi-threaded faceting on SolrCloud and in the 
 process i was hit by some issues.
 I am currently running the below upstream test on different SolrCloud 
 configurations and i am getting a different result set per configuration.
 https://github.com/apache/lucene-solr/blob/trunk/solr/core/src/test/org/apache/solr/request/TestFaceting.java#L654
 Setup:
 - *Indexed 50 docs into SolrCloud.*
 - *If the SolrCloud has only 1 shard, the facet field query has the below 
 output (which matches with the expected upstream test output - # facet fields 
 ~ 50).*
 {code}
 $ curl  
 http://localhost:8983/solr/collection1/select?facet=truefl=idindent=trueq=id%3A*facet.limit=-1facet.threads=1000facet.field=f0_wsfacet.field=f0_wsfacet.field=f0_wsfacet.field=f0_wsfacet.field=f0_wsfacet.field=f1_wsfacet.field=f1_wsfacet.field=f1_wsfacet.field=f1_wsfacet.field=f1_wsfacet.field=f2_wsfacet.field=f2_wsfacet.field=f2_wsfacet.field=f2_wsfacet.field=f2_wsfacet.field=f3_wsfacet.field=f3_wsfacet.field=f3_wsfacet.field=f3_wsfacet.field=f3_wsfacet.field=f4_wsfacet.field=f4_wsfacet.field=f4_wsfacet.field=f4_wsfacet.field=f4_wsfacet.field=f5_wsfacet.field=f5_wsfacet.field=f5_wsfacet.field=f5_wsfacet.field=f5_wsfacet.field=f6_wsfacet.field=f6_wsfacet.field=f6_wsfacet.field=f6_wsfacet.field=f6_wsfacet.field=f7_wsfacet.field=f7_wsfacet.field=f7_wsfacet.field=f7_wsfacet.field=f7_wsfacet.field=f8_wsfacet.field=f8_wsfacet.field=f8_wsfacet.field=f8_wsfacet.field=f8_wsfacet.field=f9_wsfacet.field=f9_wsfacet.field=f9_wsfacet.field=f9_wsfacet.field=f9_wsrows=1wt=xml;
 ?xml version=1.0 encoding=UTF-8?
 response
 lst name=responseHeader
   int name=status0/int
   int name=QTime21/int
   lst name=params
 str name=facettrue/str
 str name=flid/str
 str name=indenttrue/str
 str name=qid:*/str
 str name=facet.limit-1/str
 str name=facet.threads1000/str
 arr name=facet.field
   strf0_ws/str
   strf0_ws/str
   strf0_ws/str
   strf0_ws/str
   strf0_ws/str
   strf1_ws/str
   strf1_ws/str
   strf1_ws/str
   strf1_ws/str
   strf1_ws/str
   strf2_ws/str
   strf2_ws/str
   strf2_ws/str
   strf2_ws/str
   strf2_ws/str
   strf3_ws/str
   strf3_ws/str
   strf3_ws/str
   strf3_ws/str
   strf3_ws/str
   strf4_ws/str
   strf4_ws/str
   strf4_ws/str
   strf4_ws/str
   strf4_ws/str
   strf5_ws/str
   strf5_ws/str
   strf5_ws/str
   strf5_ws/str
   strf5_ws/str
   strf6_ws/str
   strf6_ws/str
   strf6_ws/str
   strf6_ws/str
   strf6_ws/str
   strf7_ws/str
   strf7_ws/str
   strf7_ws/str
   strf7_ws/str
   strf7_ws/str
   strf8_ws/str
   strf8_ws/str
   strf8_ws/str
   strf8_ws/str
   strf8_ws/str
   strf9_ws/str
   strf9_ws/str
   strf9_ws/str
   strf9_ws/str
   strf9_ws/str
 /arr
 str name=wtxml/str
 str name=rows1/str
   /lst
 /lst
 result name=response numFound=50 start=0
   doc
 float name=id0.0/float/doc
 /result
 lst name=facet_counts
   lst name=facet_queries/
   lst name=facet_fields
 lst name=f0_ws
   int name=zero_125/int
   int name=zero_225/int
 /lst
 lst name=f0_ws
   int name=zero_125/int
   int name=zero_225/int
 /lst
 lst name=f0_ws
   int name=zero_125/int
   int name=zero_225/int
 /lst
 lst name=f0_ws
   int name=zero_125/int
   int name=zero_225/int
 /lst
 lst name=f0_ws
   int name=zero_125/int
   int name=zero_225/int
 /lst
 lst name=f1_ws
   int name=one_133/int
   int name=one_317/int
 /lst
 lst name=f1_ws
   int name=one_133/int
   int name=one_317/int
 /lst
 lst name=f1_ws
   int name=one_133/int
   int name=one_317/int
 /lst
 lst name=f1_ws
   int name=one_133/int
   int name=one_317/int
 /lst
 lst name=f1_ws
   int 

[jira] [Commented] (SOLR-6314) Multi-threaded facet counts differ when SolrCloud has 1 shard

2014-08-21 Thread Mark Miller (JIRA)

[ 
https://issues.apache.org/jira/browse/SOLR-6314?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=14105397#comment-14105397
 ] 

Mark Miller commented on SOLR-6314:
---

bq. SOLR-6314: Multi-threaded facet counts differ when SolrCloud has 1 shard 
(Erick Erickson)

Also missing credit for Vamsee in CHANGES.

 Multi-threaded facet counts differ when SolrCloud has 1 shard
 --

 Key: SOLR-6314
 URL: https://issues.apache.org/jira/browse/SOLR-6314
 Project: Solr
  Issue Type: Bug
  Components: SearchComponents - other, SolrCloud
Affects Versions: 5.0
Reporter: Vamsee Yarlagadda
Assignee: Erick Erickson
 Fix For: 5.0, 4.10

 Attachments: SOLR-6314.patch, SOLR-6314.patch, SOLR-6314.patch


 I am trying to work with multi-threaded faceting on SolrCloud and in the 
 process i was hit by some issues.
 I am currently running the below upstream test on different SolrCloud 
 configurations and i am getting a different result set per configuration.
 https://github.com/apache/lucene-solr/blob/trunk/solr/core/src/test/org/apache/solr/request/TestFaceting.java#L654
 Setup:
 - *Indexed 50 docs into SolrCloud.*
 - *If the SolrCloud has only 1 shard, the facet field query has the below 
 output (which matches with the expected upstream test output - # facet fields 
 ~ 50).*
 {code}
 $ curl  
 http://localhost:8983/solr/collection1/select?facet=truefl=idindent=trueq=id%3A*facet.limit=-1facet.threads=1000facet.field=f0_wsfacet.field=f0_wsfacet.field=f0_wsfacet.field=f0_wsfacet.field=f0_wsfacet.field=f1_wsfacet.field=f1_wsfacet.field=f1_wsfacet.field=f1_wsfacet.field=f1_wsfacet.field=f2_wsfacet.field=f2_wsfacet.field=f2_wsfacet.field=f2_wsfacet.field=f2_wsfacet.field=f3_wsfacet.field=f3_wsfacet.field=f3_wsfacet.field=f3_wsfacet.field=f3_wsfacet.field=f4_wsfacet.field=f4_wsfacet.field=f4_wsfacet.field=f4_wsfacet.field=f4_wsfacet.field=f5_wsfacet.field=f5_wsfacet.field=f5_wsfacet.field=f5_wsfacet.field=f5_wsfacet.field=f6_wsfacet.field=f6_wsfacet.field=f6_wsfacet.field=f6_wsfacet.field=f6_wsfacet.field=f7_wsfacet.field=f7_wsfacet.field=f7_wsfacet.field=f7_wsfacet.field=f7_wsfacet.field=f8_wsfacet.field=f8_wsfacet.field=f8_wsfacet.field=f8_wsfacet.field=f8_wsfacet.field=f9_wsfacet.field=f9_wsfacet.field=f9_wsfacet.field=f9_wsfacet.field=f9_wsrows=1wt=xml;
 ?xml version=1.0 encoding=UTF-8?
 response
 lst name=responseHeader
   int name=status0/int
   int name=QTime21/int
   lst name=params
 str name=facettrue/str
 str name=flid/str
 str name=indenttrue/str
 str name=qid:*/str
 str name=facet.limit-1/str
 str name=facet.threads1000/str
 arr name=facet.field
   strf0_ws/str
   strf0_ws/str
   strf0_ws/str
   strf0_ws/str
   strf0_ws/str
   strf1_ws/str
   strf1_ws/str
   strf1_ws/str
   strf1_ws/str
   strf1_ws/str
   strf2_ws/str
   strf2_ws/str
   strf2_ws/str
   strf2_ws/str
   strf2_ws/str
   strf3_ws/str
   strf3_ws/str
   strf3_ws/str
   strf3_ws/str
   strf3_ws/str
   strf4_ws/str
   strf4_ws/str
   strf4_ws/str
   strf4_ws/str
   strf4_ws/str
   strf5_ws/str
   strf5_ws/str
   strf5_ws/str
   strf5_ws/str
   strf5_ws/str
   strf6_ws/str
   strf6_ws/str
   strf6_ws/str
   strf6_ws/str
   strf6_ws/str
   strf7_ws/str
   strf7_ws/str
   strf7_ws/str
   strf7_ws/str
   strf7_ws/str
   strf8_ws/str
   strf8_ws/str
   strf8_ws/str
   strf8_ws/str
   strf8_ws/str
   strf9_ws/str
   strf9_ws/str
   strf9_ws/str
   strf9_ws/str
   strf9_ws/str
 /arr
 str name=wtxml/str
 str name=rows1/str
   /lst
 /lst
 result name=response numFound=50 start=0
   doc
 float name=id0.0/float/doc
 /result
 lst name=facet_counts
   lst name=facet_queries/
   lst name=facet_fields
 lst name=f0_ws
   int name=zero_125/int
   int name=zero_225/int
 /lst
 lst name=f0_ws
   int name=zero_125/int
   int name=zero_225/int
 /lst
 lst name=f0_ws
   int name=zero_125/int
   int name=zero_225/int
 /lst
 lst name=f0_ws
   int name=zero_125/int
   int name=zero_225/int
 /lst
 lst name=f0_ws
   int name=zero_125/int
   int name=zero_225/int
 /lst
 lst name=f1_ws
   int name=one_133/int
   int name=one_317/int
 /lst
 lst name=f1_ws
   int name=one_133/int
   int name=one_317/int
 /lst
 lst name=f1_ws
   int name=one_133/int
   int name=one_317/int
 /lst
 lst name=f1_ws
   int name=one_133/int
   int name=one_317/int
 /lst
 lst name=f1_ws
   int name=one_133/int
   int name=one_317/int
 /lst
 lst name=f2_ws
   int 

[jira] [Commented] (SOLR-6314) Multi-threaded facet counts differ when SolrCloud has 1 shard

2014-08-20 Thread Vamsee Yarlagadda (JIRA)

[ 
https://issues.apache.org/jira/browse/SOLR-6314?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=14103564#comment-14103564
 ] 

Vamsee Yarlagadda commented on SOLR-6314:
-

The patch looks good to me. 
Thanks [~erickerickson].

 Multi-threaded facet counts differ when SolrCloud has 1 shard
 --

 Key: SOLR-6314
 URL: https://issues.apache.org/jira/browse/SOLR-6314
 Project: Solr
  Issue Type: Bug
  Components: SearchComponents - other, SolrCloud
Affects Versions: 5.0
Reporter: Vamsee Yarlagadda
Assignee: Erick Erickson
 Fix For: 5.0, 4.10

 Attachments: SOLR-6314.patch, SOLR-6314.patch, SOLR-6314.patch


 I am trying to work with multi-threaded faceting on SolrCloud and in the 
 process i was hit by some issues.
 I am currently running the below upstream test on different SolrCloud 
 configurations and i am getting a different result set per configuration.
 https://github.com/apache/lucene-solr/blob/trunk/solr/core/src/test/org/apache/solr/request/TestFaceting.java#L654
 Setup:
 - *Indexed 50 docs into SolrCloud.*
 - *If the SolrCloud has only 1 shard, the facet field query has the below 
 output (which matches with the expected upstream test output - # facet fields 
 ~ 50).*
 {code}
 $ curl  
 http://localhost:8983/solr/collection1/select?facet=truefl=idindent=trueq=id%3A*facet.limit=-1facet.threads=1000facet.field=f0_wsfacet.field=f0_wsfacet.field=f0_wsfacet.field=f0_wsfacet.field=f0_wsfacet.field=f1_wsfacet.field=f1_wsfacet.field=f1_wsfacet.field=f1_wsfacet.field=f1_wsfacet.field=f2_wsfacet.field=f2_wsfacet.field=f2_wsfacet.field=f2_wsfacet.field=f2_wsfacet.field=f3_wsfacet.field=f3_wsfacet.field=f3_wsfacet.field=f3_wsfacet.field=f3_wsfacet.field=f4_wsfacet.field=f4_wsfacet.field=f4_wsfacet.field=f4_wsfacet.field=f4_wsfacet.field=f5_wsfacet.field=f5_wsfacet.field=f5_wsfacet.field=f5_wsfacet.field=f5_wsfacet.field=f6_wsfacet.field=f6_wsfacet.field=f6_wsfacet.field=f6_wsfacet.field=f6_wsfacet.field=f7_wsfacet.field=f7_wsfacet.field=f7_wsfacet.field=f7_wsfacet.field=f7_wsfacet.field=f8_wsfacet.field=f8_wsfacet.field=f8_wsfacet.field=f8_wsfacet.field=f8_wsfacet.field=f9_wsfacet.field=f9_wsfacet.field=f9_wsfacet.field=f9_wsfacet.field=f9_wsrows=1wt=xml;
 ?xml version=1.0 encoding=UTF-8?
 response
 lst name=responseHeader
   int name=status0/int
   int name=QTime21/int
   lst name=params
 str name=facettrue/str
 str name=flid/str
 str name=indenttrue/str
 str name=qid:*/str
 str name=facet.limit-1/str
 str name=facet.threads1000/str
 arr name=facet.field
   strf0_ws/str
   strf0_ws/str
   strf0_ws/str
   strf0_ws/str
   strf0_ws/str
   strf1_ws/str
   strf1_ws/str
   strf1_ws/str
   strf1_ws/str
   strf1_ws/str
   strf2_ws/str
   strf2_ws/str
   strf2_ws/str
   strf2_ws/str
   strf2_ws/str
   strf3_ws/str
   strf3_ws/str
   strf3_ws/str
   strf3_ws/str
   strf3_ws/str
   strf4_ws/str
   strf4_ws/str
   strf4_ws/str
   strf4_ws/str
   strf4_ws/str
   strf5_ws/str
   strf5_ws/str
   strf5_ws/str
   strf5_ws/str
   strf5_ws/str
   strf6_ws/str
   strf6_ws/str
   strf6_ws/str
   strf6_ws/str
   strf6_ws/str
   strf7_ws/str
   strf7_ws/str
   strf7_ws/str
   strf7_ws/str
   strf7_ws/str
   strf8_ws/str
   strf8_ws/str
   strf8_ws/str
   strf8_ws/str
   strf8_ws/str
   strf9_ws/str
   strf9_ws/str
   strf9_ws/str
   strf9_ws/str
   strf9_ws/str
 /arr
 str name=wtxml/str
 str name=rows1/str
   /lst
 /lst
 result name=response numFound=50 start=0
   doc
 float name=id0.0/float/doc
 /result
 lst name=facet_counts
   lst name=facet_queries/
   lst name=facet_fields
 lst name=f0_ws
   int name=zero_125/int
   int name=zero_225/int
 /lst
 lst name=f0_ws
   int name=zero_125/int
   int name=zero_225/int
 /lst
 lst name=f0_ws
   int name=zero_125/int
   int name=zero_225/int
 /lst
 lst name=f0_ws
   int name=zero_125/int
   int name=zero_225/int
 /lst
 lst name=f0_ws
   int name=zero_125/int
   int name=zero_225/int
 /lst
 lst name=f1_ws
   int name=one_133/int
   int name=one_317/int
 /lst
 lst name=f1_ws
   int name=one_133/int
   int name=one_317/int
 /lst
 lst name=f1_ws
   int name=one_133/int
   int name=one_317/int
 /lst
 lst name=f1_ws
   int name=one_133/int
   int name=one_317/int
 /lst
 lst name=f1_ws
   int name=one_133/int
   int name=one_317/int
 /lst
 lst name=f2_ws
   int name=two_137/int
   int name=two_413/int
 /lst
 lst name=f2_ws
   

[jira] [Commented] (SOLR-6314) Multi-threaded facet counts differ when SolrCloud has 1 shard

2014-08-18 Thread ASF subversion and git services (JIRA)

[ 
https://issues.apache.org/jira/browse/SOLR-6314?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=14101155#comment-14101155
 ] 

ASF subversion and git services commented on SOLR-6314:
---

Commit 1618716 from [~erickoerickson] in branch 'dev/trunk'
[ https://svn.apache.org/r1618716 ]

SOLR-6314: Multi-threaded facet counts differ when SolrCloud has 1 shard

 Multi-threaded facet counts differ when SolrCloud has 1 shard
 --

 Key: SOLR-6314
 URL: https://issues.apache.org/jira/browse/SOLR-6314
 Project: Solr
  Issue Type: Bug
  Components: SearchComponents - other, SolrCloud
Affects Versions: 5.0
Reporter: Vamsee Yarlagadda
Assignee: Erick Erickson
 Attachments: SOLR-6314.patch, SOLR-6314.patch


 I am trying to work with multi-threaded faceting on SolrCloud and in the 
 process i was hit by some issues.
 I am currently running the below upstream test on different SolrCloud 
 configurations and i am getting a different result set per configuration.
 https://github.com/apache/lucene-solr/blob/trunk/solr/core/src/test/org/apache/solr/request/TestFaceting.java#L654
 Setup:
 - *Indexed 50 docs into SolrCloud.*
 - *If the SolrCloud has only 1 shard, the facet field query has the below 
 output (which matches with the expected upstream test output - # facet fields 
 ~ 50).*
 {code}
 $ curl  
 http://localhost:8983/solr/collection1/select?facet=truefl=idindent=trueq=id%3A*facet.limit=-1facet.threads=1000facet.field=f0_wsfacet.field=f0_wsfacet.field=f0_wsfacet.field=f0_wsfacet.field=f0_wsfacet.field=f1_wsfacet.field=f1_wsfacet.field=f1_wsfacet.field=f1_wsfacet.field=f1_wsfacet.field=f2_wsfacet.field=f2_wsfacet.field=f2_wsfacet.field=f2_wsfacet.field=f2_wsfacet.field=f3_wsfacet.field=f3_wsfacet.field=f3_wsfacet.field=f3_wsfacet.field=f3_wsfacet.field=f4_wsfacet.field=f4_wsfacet.field=f4_wsfacet.field=f4_wsfacet.field=f4_wsfacet.field=f5_wsfacet.field=f5_wsfacet.field=f5_wsfacet.field=f5_wsfacet.field=f5_wsfacet.field=f6_wsfacet.field=f6_wsfacet.field=f6_wsfacet.field=f6_wsfacet.field=f6_wsfacet.field=f7_wsfacet.field=f7_wsfacet.field=f7_wsfacet.field=f7_wsfacet.field=f7_wsfacet.field=f8_wsfacet.field=f8_wsfacet.field=f8_wsfacet.field=f8_wsfacet.field=f8_wsfacet.field=f9_wsfacet.field=f9_wsfacet.field=f9_wsfacet.field=f9_wsfacet.field=f9_wsrows=1wt=xml;
 ?xml version=1.0 encoding=UTF-8?
 response
 lst name=responseHeader
   int name=status0/int
   int name=QTime21/int
   lst name=params
 str name=facettrue/str
 str name=flid/str
 str name=indenttrue/str
 str name=qid:*/str
 str name=facet.limit-1/str
 str name=facet.threads1000/str
 arr name=facet.field
   strf0_ws/str
   strf0_ws/str
   strf0_ws/str
   strf0_ws/str
   strf0_ws/str
   strf1_ws/str
   strf1_ws/str
   strf1_ws/str
   strf1_ws/str
   strf1_ws/str
   strf2_ws/str
   strf2_ws/str
   strf2_ws/str
   strf2_ws/str
   strf2_ws/str
   strf3_ws/str
   strf3_ws/str
   strf3_ws/str
   strf3_ws/str
   strf3_ws/str
   strf4_ws/str
   strf4_ws/str
   strf4_ws/str
   strf4_ws/str
   strf4_ws/str
   strf5_ws/str
   strf5_ws/str
   strf5_ws/str
   strf5_ws/str
   strf5_ws/str
   strf6_ws/str
   strf6_ws/str
   strf6_ws/str
   strf6_ws/str
   strf6_ws/str
   strf7_ws/str
   strf7_ws/str
   strf7_ws/str
   strf7_ws/str
   strf7_ws/str
   strf8_ws/str
   strf8_ws/str
   strf8_ws/str
   strf8_ws/str
   strf8_ws/str
   strf9_ws/str
   strf9_ws/str
   strf9_ws/str
   strf9_ws/str
   strf9_ws/str
 /arr
 str name=wtxml/str
 str name=rows1/str
   /lst
 /lst
 result name=response numFound=50 start=0
   doc
 float name=id0.0/float/doc
 /result
 lst name=facet_counts
   lst name=facet_queries/
   lst name=facet_fields
 lst name=f0_ws
   int name=zero_125/int
   int name=zero_225/int
 /lst
 lst name=f0_ws
   int name=zero_125/int
   int name=zero_225/int
 /lst
 lst name=f0_ws
   int name=zero_125/int
   int name=zero_225/int
 /lst
 lst name=f0_ws
   int name=zero_125/int
   int name=zero_225/int
 /lst
 lst name=f0_ws
   int name=zero_125/int
   int name=zero_225/int
 /lst
 lst name=f1_ws
   int name=one_133/int
   int name=one_317/int
 /lst
 lst name=f1_ws
   int name=one_133/int
   int name=one_317/int
 /lst
 lst name=f1_ws
   int name=one_133/int
   int name=one_317/int
 /lst
 lst name=f1_ws
   int name=one_133/int
   int name=one_317/int
 /lst
 lst name=f1_ws
   int name=one_133/int
   int name=one_317/int
 /lst
 lst name=f2_ws
  

[jira] [Commented] (SOLR-6314) Multi-threaded facet counts differ when SolrCloud has 1 shard

2014-08-18 Thread Erick Erickson (JIRA)

[ 
https://issues.apache.org/jira/browse/SOLR-6314?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=14101292#comment-14101292
 ] 

Erick Erickson commented on SOLR-6314:
--

BTW, the final solution was to NOT log any warnings in the general case and 
just remove the dups in the facet component that were facet-related.

 Multi-threaded facet counts differ when SolrCloud has 1 shard
 --

 Key: SOLR-6314
 URL: https://issues.apache.org/jira/browse/SOLR-6314
 Project: Solr
  Issue Type: Bug
  Components: SearchComponents - other, SolrCloud
Affects Versions: 5.0
Reporter: Vamsee Yarlagadda
Assignee: Erick Erickson
 Fix For: 5.0, 4.10

 Attachments: SOLR-6314.patch, SOLR-6314.patch, SOLR-6314.patch


 I am trying to work with multi-threaded faceting on SolrCloud and in the 
 process i was hit by some issues.
 I am currently running the below upstream test on different SolrCloud 
 configurations and i am getting a different result set per configuration.
 https://github.com/apache/lucene-solr/blob/trunk/solr/core/src/test/org/apache/solr/request/TestFaceting.java#L654
 Setup:
 - *Indexed 50 docs into SolrCloud.*
 - *If the SolrCloud has only 1 shard, the facet field query has the below 
 output (which matches with the expected upstream test output - # facet fields 
 ~ 50).*
 {code}
 $ curl  
 http://localhost:8983/solr/collection1/select?facet=truefl=idindent=trueq=id%3A*facet.limit=-1facet.threads=1000facet.field=f0_wsfacet.field=f0_wsfacet.field=f0_wsfacet.field=f0_wsfacet.field=f0_wsfacet.field=f1_wsfacet.field=f1_wsfacet.field=f1_wsfacet.field=f1_wsfacet.field=f1_wsfacet.field=f2_wsfacet.field=f2_wsfacet.field=f2_wsfacet.field=f2_wsfacet.field=f2_wsfacet.field=f3_wsfacet.field=f3_wsfacet.field=f3_wsfacet.field=f3_wsfacet.field=f3_wsfacet.field=f4_wsfacet.field=f4_wsfacet.field=f4_wsfacet.field=f4_wsfacet.field=f4_wsfacet.field=f5_wsfacet.field=f5_wsfacet.field=f5_wsfacet.field=f5_wsfacet.field=f5_wsfacet.field=f6_wsfacet.field=f6_wsfacet.field=f6_wsfacet.field=f6_wsfacet.field=f6_wsfacet.field=f7_wsfacet.field=f7_wsfacet.field=f7_wsfacet.field=f7_wsfacet.field=f7_wsfacet.field=f8_wsfacet.field=f8_wsfacet.field=f8_wsfacet.field=f8_wsfacet.field=f8_wsfacet.field=f9_wsfacet.field=f9_wsfacet.field=f9_wsfacet.field=f9_wsfacet.field=f9_wsrows=1wt=xml;
 ?xml version=1.0 encoding=UTF-8?
 response
 lst name=responseHeader
   int name=status0/int
   int name=QTime21/int
   lst name=params
 str name=facettrue/str
 str name=flid/str
 str name=indenttrue/str
 str name=qid:*/str
 str name=facet.limit-1/str
 str name=facet.threads1000/str
 arr name=facet.field
   strf0_ws/str
   strf0_ws/str
   strf0_ws/str
   strf0_ws/str
   strf0_ws/str
   strf1_ws/str
   strf1_ws/str
   strf1_ws/str
   strf1_ws/str
   strf1_ws/str
   strf2_ws/str
   strf2_ws/str
   strf2_ws/str
   strf2_ws/str
   strf2_ws/str
   strf3_ws/str
   strf3_ws/str
   strf3_ws/str
   strf3_ws/str
   strf3_ws/str
   strf4_ws/str
   strf4_ws/str
   strf4_ws/str
   strf4_ws/str
   strf4_ws/str
   strf5_ws/str
   strf5_ws/str
   strf5_ws/str
   strf5_ws/str
   strf5_ws/str
   strf6_ws/str
   strf6_ws/str
   strf6_ws/str
   strf6_ws/str
   strf6_ws/str
   strf7_ws/str
   strf7_ws/str
   strf7_ws/str
   strf7_ws/str
   strf7_ws/str
   strf8_ws/str
   strf8_ws/str
   strf8_ws/str
   strf8_ws/str
   strf8_ws/str
   strf9_ws/str
   strf9_ws/str
   strf9_ws/str
   strf9_ws/str
   strf9_ws/str
 /arr
 str name=wtxml/str
 str name=rows1/str
   /lst
 /lst
 result name=response numFound=50 start=0
   doc
 float name=id0.0/float/doc
 /result
 lst name=facet_counts
   lst name=facet_queries/
   lst name=facet_fields
 lst name=f0_ws
   int name=zero_125/int
   int name=zero_225/int
 /lst
 lst name=f0_ws
   int name=zero_125/int
   int name=zero_225/int
 /lst
 lst name=f0_ws
   int name=zero_125/int
   int name=zero_225/int
 /lst
 lst name=f0_ws
   int name=zero_125/int
   int name=zero_225/int
 /lst
 lst name=f0_ws
   int name=zero_125/int
   int name=zero_225/int
 /lst
 lst name=f1_ws
   int name=one_133/int
   int name=one_317/int
 /lst
 lst name=f1_ws
   int name=one_133/int
   int name=one_317/int
 /lst
 lst name=f1_ws
   int name=one_133/int
   int name=one_317/int
 /lst
 lst name=f1_ws
   int name=one_133/int
   int name=one_317/int
 /lst
 lst name=f1_ws
   int name=one_133/int
   int name=one_317/int
 /lst
 lst name=f2_ws
   

[jira] [Commented] (SOLR-6314) Multi-threaded facet counts differ when SolrCloud has 1 shard

2014-08-18 Thread ASF subversion and git services (JIRA)

[ 
https://issues.apache.org/jira/browse/SOLR-6314?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=14101287#comment-14101287
 ] 

ASF subversion and git services commented on SOLR-6314:
---

Commit 1618734 from [~erickoerickson] in branch 'dev/branches/branch_4x'
[ https://svn.apache.org/r1618734 ]

SOLR-6314: Multi-threaded facet counts differ when SolrCloud has 1 shard

 Multi-threaded facet counts differ when SolrCloud has 1 shard
 --

 Key: SOLR-6314
 URL: https://issues.apache.org/jira/browse/SOLR-6314
 Project: Solr
  Issue Type: Bug
  Components: SearchComponents - other, SolrCloud
Affects Versions: 5.0
Reporter: Vamsee Yarlagadda
Assignee: Erick Erickson
 Fix For: 5.0, 4.10

 Attachments: SOLR-6314.patch, SOLR-6314.patch, SOLR-6314.patch


 I am trying to work with multi-threaded faceting on SolrCloud and in the 
 process i was hit by some issues.
 I am currently running the below upstream test on different SolrCloud 
 configurations and i am getting a different result set per configuration.
 https://github.com/apache/lucene-solr/blob/trunk/solr/core/src/test/org/apache/solr/request/TestFaceting.java#L654
 Setup:
 - *Indexed 50 docs into SolrCloud.*
 - *If the SolrCloud has only 1 shard, the facet field query has the below 
 output (which matches with the expected upstream test output - # facet fields 
 ~ 50).*
 {code}
 $ curl  
 http://localhost:8983/solr/collection1/select?facet=truefl=idindent=trueq=id%3A*facet.limit=-1facet.threads=1000facet.field=f0_wsfacet.field=f0_wsfacet.field=f0_wsfacet.field=f0_wsfacet.field=f0_wsfacet.field=f1_wsfacet.field=f1_wsfacet.field=f1_wsfacet.field=f1_wsfacet.field=f1_wsfacet.field=f2_wsfacet.field=f2_wsfacet.field=f2_wsfacet.field=f2_wsfacet.field=f2_wsfacet.field=f3_wsfacet.field=f3_wsfacet.field=f3_wsfacet.field=f3_wsfacet.field=f3_wsfacet.field=f4_wsfacet.field=f4_wsfacet.field=f4_wsfacet.field=f4_wsfacet.field=f4_wsfacet.field=f5_wsfacet.field=f5_wsfacet.field=f5_wsfacet.field=f5_wsfacet.field=f5_wsfacet.field=f6_wsfacet.field=f6_wsfacet.field=f6_wsfacet.field=f6_wsfacet.field=f6_wsfacet.field=f7_wsfacet.field=f7_wsfacet.field=f7_wsfacet.field=f7_wsfacet.field=f7_wsfacet.field=f8_wsfacet.field=f8_wsfacet.field=f8_wsfacet.field=f8_wsfacet.field=f8_wsfacet.field=f9_wsfacet.field=f9_wsfacet.field=f9_wsfacet.field=f9_wsfacet.field=f9_wsrows=1wt=xml;
 ?xml version=1.0 encoding=UTF-8?
 response
 lst name=responseHeader
   int name=status0/int
   int name=QTime21/int
   lst name=params
 str name=facettrue/str
 str name=flid/str
 str name=indenttrue/str
 str name=qid:*/str
 str name=facet.limit-1/str
 str name=facet.threads1000/str
 arr name=facet.field
   strf0_ws/str
   strf0_ws/str
   strf0_ws/str
   strf0_ws/str
   strf0_ws/str
   strf1_ws/str
   strf1_ws/str
   strf1_ws/str
   strf1_ws/str
   strf1_ws/str
   strf2_ws/str
   strf2_ws/str
   strf2_ws/str
   strf2_ws/str
   strf2_ws/str
   strf3_ws/str
   strf3_ws/str
   strf3_ws/str
   strf3_ws/str
   strf3_ws/str
   strf4_ws/str
   strf4_ws/str
   strf4_ws/str
   strf4_ws/str
   strf4_ws/str
   strf5_ws/str
   strf5_ws/str
   strf5_ws/str
   strf5_ws/str
   strf5_ws/str
   strf6_ws/str
   strf6_ws/str
   strf6_ws/str
   strf6_ws/str
   strf6_ws/str
   strf7_ws/str
   strf7_ws/str
   strf7_ws/str
   strf7_ws/str
   strf7_ws/str
   strf8_ws/str
   strf8_ws/str
   strf8_ws/str
   strf8_ws/str
   strf8_ws/str
   strf9_ws/str
   strf9_ws/str
   strf9_ws/str
   strf9_ws/str
   strf9_ws/str
 /arr
 str name=wtxml/str
 str name=rows1/str
   /lst
 /lst
 result name=response numFound=50 start=0
   doc
 float name=id0.0/float/doc
 /result
 lst name=facet_counts
   lst name=facet_queries/
   lst name=facet_fields
 lst name=f0_ws
   int name=zero_125/int
   int name=zero_225/int
 /lst
 lst name=f0_ws
   int name=zero_125/int
   int name=zero_225/int
 /lst
 lst name=f0_ws
   int name=zero_125/int
   int name=zero_225/int
 /lst
 lst name=f0_ws
   int name=zero_125/int
   int name=zero_225/int
 /lst
 lst name=f0_ws
   int name=zero_125/int
   int name=zero_225/int
 /lst
 lst name=f1_ws
   int name=one_133/int
   int name=one_317/int
 /lst
 lst name=f1_ws
   int name=one_133/int
   int name=one_317/int
 /lst
 lst name=f1_ws
   int name=one_133/int
   int name=one_317/int
 /lst
 lst name=f1_ws
   int name=one_133/int
   int name=one_317/int
 /lst
 lst name=f1_ws
   int 

[jira] [Commented] (SOLR-6314) Multi-threaded facet counts differ when SolrCloud has 1 shard

2014-08-15 Thread Yonik Seeley (JIRA)

[ 
https://issues.apache.org/jira/browse/SOLR-6314?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=14098760#comment-14098760
 ] 

Yonik Seeley commented on SOLR-6314:


bq. OTOH, what's the case for allowing dups?

It's a general parameter passing mechanism, so we're talking about all possible 
plugins/interfaces.
But here's one off the top of my head...  specifying field values with query 
parameters:

field.description=shoes
field.color=red
field.color=blue
field.size=10.5
field.size=10.5

It still seems like there's no bug here.
The root cause of the incorrect facet count appears to be an incorrect facet 
request, right?
If that's the case, the simplest answer is well, that behavior is undefined.
Or if we want to help detect the user error sooner, simply throw an exception 
in the facet component?


 Multi-threaded facet counts differ when SolrCloud has 1 shard
 --

 Key: SOLR-6314
 URL: https://issues.apache.org/jira/browse/SOLR-6314
 Project: Solr
  Issue Type: Bug
  Components: SearchComponents - other, SolrCloud
Affects Versions: 5.0
Reporter: Vamsee Yarlagadda
Assignee: Erick Erickson
 Attachments: SOLR-6314.patch, SOLR-6314.patch


 I am trying to work with multi-threaded faceting on SolrCloud and in the 
 process i was hit by some issues.
 I am currently running the below upstream test on different SolrCloud 
 configurations and i am getting a different result set per configuration.
 https://github.com/apache/lucene-solr/blob/trunk/solr/core/src/test/org/apache/solr/request/TestFaceting.java#L654
 Setup:
 - *Indexed 50 docs into SolrCloud.*
 - *If the SolrCloud has only 1 shard, the facet field query has the below 
 output (which matches with the expected upstream test output - # facet fields 
 ~ 50).*
 {code}
 $ curl  
 http://localhost:8983/solr/collection1/select?facet=truefl=idindent=trueq=id%3A*facet.limit=-1facet.threads=1000facet.field=f0_wsfacet.field=f0_wsfacet.field=f0_wsfacet.field=f0_wsfacet.field=f0_wsfacet.field=f1_wsfacet.field=f1_wsfacet.field=f1_wsfacet.field=f1_wsfacet.field=f1_wsfacet.field=f2_wsfacet.field=f2_wsfacet.field=f2_wsfacet.field=f2_wsfacet.field=f2_wsfacet.field=f3_wsfacet.field=f3_wsfacet.field=f3_wsfacet.field=f3_wsfacet.field=f3_wsfacet.field=f4_wsfacet.field=f4_wsfacet.field=f4_wsfacet.field=f4_wsfacet.field=f4_wsfacet.field=f5_wsfacet.field=f5_wsfacet.field=f5_wsfacet.field=f5_wsfacet.field=f5_wsfacet.field=f6_wsfacet.field=f6_wsfacet.field=f6_wsfacet.field=f6_wsfacet.field=f6_wsfacet.field=f7_wsfacet.field=f7_wsfacet.field=f7_wsfacet.field=f7_wsfacet.field=f7_wsfacet.field=f8_wsfacet.field=f8_wsfacet.field=f8_wsfacet.field=f8_wsfacet.field=f8_wsfacet.field=f9_wsfacet.field=f9_wsfacet.field=f9_wsfacet.field=f9_wsfacet.field=f9_wsrows=1wt=xml;
 ?xml version=1.0 encoding=UTF-8?
 response
 lst name=responseHeader
   int name=status0/int
   int name=QTime21/int
   lst name=params
 str name=facettrue/str
 str name=flid/str
 str name=indenttrue/str
 str name=qid:*/str
 str name=facet.limit-1/str
 str name=facet.threads1000/str
 arr name=facet.field
   strf0_ws/str
   strf0_ws/str
   strf0_ws/str
   strf0_ws/str
   strf0_ws/str
   strf1_ws/str
   strf1_ws/str
   strf1_ws/str
   strf1_ws/str
   strf1_ws/str
   strf2_ws/str
   strf2_ws/str
   strf2_ws/str
   strf2_ws/str
   strf2_ws/str
   strf3_ws/str
   strf3_ws/str
   strf3_ws/str
   strf3_ws/str
   strf3_ws/str
   strf4_ws/str
   strf4_ws/str
   strf4_ws/str
   strf4_ws/str
   strf4_ws/str
   strf5_ws/str
   strf5_ws/str
   strf5_ws/str
   strf5_ws/str
   strf5_ws/str
   strf6_ws/str
   strf6_ws/str
   strf6_ws/str
   strf6_ws/str
   strf6_ws/str
   strf7_ws/str
   strf7_ws/str
   strf7_ws/str
   strf7_ws/str
   strf7_ws/str
   strf8_ws/str
   strf8_ws/str
   strf8_ws/str
   strf8_ws/str
   strf8_ws/str
   strf9_ws/str
   strf9_ws/str
   strf9_ws/str
   strf9_ws/str
   strf9_ws/str
 /arr
 str name=wtxml/str
 str name=rows1/str
   /lst
 /lst
 result name=response numFound=50 start=0
   doc
 float name=id0.0/float/doc
 /result
 lst name=facet_counts
   lst name=facet_queries/
   lst name=facet_fields
 lst name=f0_ws
   int name=zero_125/int
   int name=zero_225/int
 /lst
 lst name=f0_ws
   int name=zero_125/int
   int name=zero_225/int
 /lst
 lst name=f0_ws
   int name=zero_125/int
   int name=zero_225/int
 /lst
 lst name=f0_ws
   int name=zero_125/int
   int name=zero_225/int
 /lst
 lst name=f0_ws
   int name=zero_125/int
   int name=zero_225/int
 /lst
 lst 

[jira] [Commented] (SOLR-6314) Multi-threaded facet counts differ when SolrCloud has 1 shard

2014-08-15 Thread Erick Erickson (JIRA)

[ 
https://issues.apache.org/jira/browse/SOLR-6314?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=14099319#comment-14099319
 ] 

Erick Erickson commented on SOLR-6314:
--

Ahhh, OK. If I'm understanding the approach, then collapsing the identical 
params
would screw up anything that depended on relative positions (as one example). So
something like this:

field.description=shoes
field.color=red
field.color=blue
field.color=black
field.size=10.5
field.size=10.5
field.size=11

where there's an positional association between the order of the
colors and the shoe size you're looking for. In this case it means
red and blue shoes of size 10.5 
black shoes 11

and collapsing the two 10.5s would screw it up.

Ok, that makes sense.

Certainly throwing an error in the component would be easily do-able.

Let me see if I can put something together that collapses the values in 
facet component, an approach just occurred to me. If it works we'll have 
two choices. Then we can all weigh in on whether it's better to 

1 throw an error or (possibly breaking currently-running code BTW)
or
2 remove the duplicates and log a warning.

 Multi-threaded facet counts differ when SolrCloud has 1 shard
 --

 Key: SOLR-6314
 URL: https://issues.apache.org/jira/browse/SOLR-6314
 Project: Solr
  Issue Type: Bug
  Components: SearchComponents - other, SolrCloud
Affects Versions: 5.0
Reporter: Vamsee Yarlagadda
Assignee: Erick Erickson
 Attachments: SOLR-6314.patch, SOLR-6314.patch


 I am trying to work with multi-threaded faceting on SolrCloud and in the 
 process i was hit by some issues.
 I am currently running the below upstream test on different SolrCloud 
 configurations and i am getting a different result set per configuration.
 https://github.com/apache/lucene-solr/blob/trunk/solr/core/src/test/org/apache/solr/request/TestFaceting.java#L654
 Setup:
 - *Indexed 50 docs into SolrCloud.*
 - *If the SolrCloud has only 1 shard, the facet field query has the below 
 output (which matches with the expected upstream test output - # facet fields 
 ~ 50).*
 {code}
 $ curl  
 http://localhost:8983/solr/collection1/select?facet=truefl=idindent=trueq=id%3A*facet.limit=-1facet.threads=1000facet.field=f0_wsfacet.field=f0_wsfacet.field=f0_wsfacet.field=f0_wsfacet.field=f0_wsfacet.field=f1_wsfacet.field=f1_wsfacet.field=f1_wsfacet.field=f1_wsfacet.field=f1_wsfacet.field=f2_wsfacet.field=f2_wsfacet.field=f2_wsfacet.field=f2_wsfacet.field=f2_wsfacet.field=f3_wsfacet.field=f3_wsfacet.field=f3_wsfacet.field=f3_wsfacet.field=f3_wsfacet.field=f4_wsfacet.field=f4_wsfacet.field=f4_wsfacet.field=f4_wsfacet.field=f4_wsfacet.field=f5_wsfacet.field=f5_wsfacet.field=f5_wsfacet.field=f5_wsfacet.field=f5_wsfacet.field=f6_wsfacet.field=f6_wsfacet.field=f6_wsfacet.field=f6_wsfacet.field=f6_wsfacet.field=f7_wsfacet.field=f7_wsfacet.field=f7_wsfacet.field=f7_wsfacet.field=f7_wsfacet.field=f8_wsfacet.field=f8_wsfacet.field=f8_wsfacet.field=f8_wsfacet.field=f8_wsfacet.field=f9_wsfacet.field=f9_wsfacet.field=f9_wsfacet.field=f9_wsfacet.field=f9_wsrows=1wt=xml;
 ?xml version=1.0 encoding=UTF-8?
 response
 lst name=responseHeader
   int name=status0/int
   int name=QTime21/int
   lst name=params
 str name=facettrue/str
 str name=flid/str
 str name=indenttrue/str
 str name=qid:*/str
 str name=facet.limit-1/str
 str name=facet.threads1000/str
 arr name=facet.field
   strf0_ws/str
   strf0_ws/str
   strf0_ws/str
   strf0_ws/str
   strf0_ws/str
   strf1_ws/str
   strf1_ws/str
   strf1_ws/str
   strf1_ws/str
   strf1_ws/str
   strf2_ws/str
   strf2_ws/str
   strf2_ws/str
   strf2_ws/str
   strf2_ws/str
   strf3_ws/str
   strf3_ws/str
   strf3_ws/str
   strf3_ws/str
   strf3_ws/str
   strf4_ws/str
   strf4_ws/str
   strf4_ws/str
   strf4_ws/str
   strf4_ws/str
   strf5_ws/str
   strf5_ws/str
   strf5_ws/str
   strf5_ws/str
   strf5_ws/str
   strf6_ws/str
   strf6_ws/str
   strf6_ws/str
   strf6_ws/str
   strf6_ws/str
   strf7_ws/str
   strf7_ws/str
   strf7_ws/str
   strf7_ws/str
   strf7_ws/str
   strf8_ws/str
   strf8_ws/str
   strf8_ws/str
   strf8_ws/str
   strf8_ws/str
   strf9_ws/str
   strf9_ws/str
   strf9_ws/str
   strf9_ws/str
   strf9_ws/str
 /arr
 str name=wtxml/str
 str name=rows1/str
   /lst
 /lst
 result name=response numFound=50 start=0
   doc
 float name=id0.0/float/doc
 /result
 lst name=facet_counts
   lst name=facet_queries/
   lst name=facet_fields
 lst name=f0_ws
   int name=zero_125/int
   int name=zero_225/int
 /lst
 lst name=f0_ws
   int name=zero_125/int
 

[jira] [Commented] (SOLR-6314) Multi-threaded facet counts differ when SolrCloud has 1 shard

2014-08-14 Thread Erick Erickson (JIRA)

[ 
https://issues.apache.org/jira/browse/SOLR-6314?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=14098119#comment-14098119
 ] 

Erick Erickson commented on SOLR-6314:
--

While arguably the behavior I started looking at is benign-but-annoying (if 
inefficient), there is bona-fide badness here. If you specify facet.query=blah 
twice in distributed mode, the result is twice the count it should be.

Try this: URL on the test docs, simple 2-shard setup:
http://localhost:8983/solr/collection1/query?q=*:*facet=truefacet.range.mincount=1facet.range=manufacturedate_dtfacet.range.start=NOW-100YEARSfacet.range.end=NOWfacet.range.gap=%2B10YEAR

The only non-zero entry is
2004-08-14T22:53:07.852Z,
11

Now duplicate the field by adding another copy of:
facet.range=manufacturedate_dt
the count goes to 22
oops.

Admittedly this is pilot error, but still it's incorrect.

I looked at de-duping in the facet component only, but that ran into weird null 
pointer exceptions. Given that I slightly favored deduping at a high level 
anyway, I'm not willing to pursue that avenue unless there's a really good 
counter-argument.

Long version:

The behavior is different because the distributed case is, of course, a 
separate code path. Part of that path is FacetComponent.parse (roughly line 
1000 in trunk). That method collects the facet.field parameters into a 
LinkedHashMap, so multiple entries are weeded out. In the case where we specify 
the field twice, the params are still _passed to the shards and calculated 
twice_. And returned twice. It just happens that the facets member var (which 
collected the names of the fields) only has one entry for the 
multiply-specified field since it's a HashMap. So it only asks for the facet 
counts from the response packet once for each shard. Since they're identical, 
it's OK.

However, take rangeFacets, roughly line 981 in FacetComponent is defined as a 
SimpleOrderedMap. Which checks to see if there is already an entry for the 
field in question... and if there is adds to it. 

I think that this adds weight to the somewhat invasive change I first tried. I 
don't know how many of these are hanging around. Based on them being 
SimpleOrderedMaps, I'm guessing at least date facets, pivot facets and interval 
facets at least have problems.

So rather than find them one-by-one I'm inclined to do the deduping at a higher 
level.

This is easy to demonstrate in the distributed tests, just hack 
TestDistributedSearch to duplicate one of the facet.range (or other facet types 
that use SimpleOrderedMaps) in the queries . Tests start failing.

I'll probably attach a new version of the patch tonight (running tests now). 
It's just like the old one except it adds duplicated fields to the distributed 
facets test (which will fail without the patch).

I spent some time trying to move the dedupe code to FacetComponent, but it's 
more tangled than I think is worth the effort. It's not difficult
to dedupe the parameters, but they're passed through in a SolrParams object. So 
far so good. But somehow that generates a null pointer exception down the road. 
I _think_ that the problem here is that there are some assumptions about how 
many entries should be there based on the original parameters in the request. 
Or something like that. Deduping on the highest level seems to avoid this 
altogether and I'm very afraid it's an N+1 problem.

Let me know if anyone disagrees violently. Otherwise I'll commit sometime this 
weekend assuming tests pass.


 Multi-threaded facet counts differ when SolrCloud has 1 shard
 --

 Key: SOLR-6314
 URL: https://issues.apache.org/jira/browse/SOLR-6314
 Project: Solr
  Issue Type: Bug
  Components: SearchComponents - other, SolrCloud
Affects Versions: 5.0
Reporter: Vamsee Yarlagadda
Assignee: Erick Erickson
 Attachments: SOLR-6314.patch


 I am trying to work with multi-threaded faceting on SolrCloud and in the 
 process i was hit by some issues.
 I am currently running the below upstream test on different SolrCloud 
 configurations and i am getting a different result set per configuration.
 https://github.com/apache/lucene-solr/blob/trunk/solr/core/src/test/org/apache/solr/request/TestFaceting.java#L654
 Setup:
 - *Indexed 50 docs into SolrCloud.*
 - *If the SolrCloud has only 1 shard, the facet field query has the below 
 output (which matches with the expected upstream test output - # facet fields 
 ~ 50).*
 {code}
 $ curl  
 

[jira] [Commented] (SOLR-6314) Multi-threaded facet counts differ when SolrCloud has 1 shard

2014-08-08 Thread Yonik Seeley (JIRA)

[ 
https://issues.apache.org/jira/browse/SOLR-6314?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=14090870#comment-14090870
 ] 

Yonik Seeley commented on SOLR-6314:


bq. The only real code changes are in MultimapSolrParams.

I'm not sure it makes sense to dedup at that level.  That implies that 
repeated parameters never make sense in any context... and I don't think we 
can/should try to do that.  If you want to dedup facet parameters for some 
reason, then it should probably be done in the faceting code.

 Multi-threaded facet counts differ when SolrCloud has 1 shard
 --

 Key: SOLR-6314
 URL: https://issues.apache.org/jira/browse/SOLR-6314
 Project: Solr
  Issue Type: Bug
  Components: SearchComponents - other, SolrCloud
Affects Versions: 5.0
Reporter: Vamsee Yarlagadda
Assignee: Erick Erickson
 Attachments: SOLR-6314.patch


 I am trying to work with multi-threaded faceting on SolrCloud and in the 
 process i was hit by some issues.
 I am currently running the below upstream test on different SolrCloud 
 configurations and i am getting a different result set per configuration.
 https://github.com/apache/lucene-solr/blob/trunk/solr/core/src/test/org/apache/solr/request/TestFaceting.java#L654
 Setup:
 - *Indexed 50 docs into SolrCloud.*
 - *If the SolrCloud has only 1 shard, the facet field query has the below 
 output (which matches with the expected upstream test output - # facet fields 
 ~ 50).*
 {code}
 $ curl  
 http://localhost:8983/solr/collection1/select?facet=truefl=idindent=trueq=id%3A*facet.limit=-1facet.threads=1000facet.field=f0_wsfacet.field=f0_wsfacet.field=f0_wsfacet.field=f0_wsfacet.field=f0_wsfacet.field=f1_wsfacet.field=f1_wsfacet.field=f1_wsfacet.field=f1_wsfacet.field=f1_wsfacet.field=f2_wsfacet.field=f2_wsfacet.field=f2_wsfacet.field=f2_wsfacet.field=f2_wsfacet.field=f3_wsfacet.field=f3_wsfacet.field=f3_wsfacet.field=f3_wsfacet.field=f3_wsfacet.field=f4_wsfacet.field=f4_wsfacet.field=f4_wsfacet.field=f4_wsfacet.field=f4_wsfacet.field=f5_wsfacet.field=f5_wsfacet.field=f5_wsfacet.field=f5_wsfacet.field=f5_wsfacet.field=f6_wsfacet.field=f6_wsfacet.field=f6_wsfacet.field=f6_wsfacet.field=f6_wsfacet.field=f7_wsfacet.field=f7_wsfacet.field=f7_wsfacet.field=f7_wsfacet.field=f7_wsfacet.field=f8_wsfacet.field=f8_wsfacet.field=f8_wsfacet.field=f8_wsfacet.field=f8_wsfacet.field=f9_wsfacet.field=f9_wsfacet.field=f9_wsfacet.field=f9_wsfacet.field=f9_wsrows=1wt=xml;
 ?xml version=1.0 encoding=UTF-8?
 response
 lst name=responseHeader
   int name=status0/int
   int name=QTime21/int
   lst name=params
 str name=facettrue/str
 str name=flid/str
 str name=indenttrue/str
 str name=qid:*/str
 str name=facet.limit-1/str
 str name=facet.threads1000/str
 arr name=facet.field
   strf0_ws/str
   strf0_ws/str
   strf0_ws/str
   strf0_ws/str
   strf0_ws/str
   strf1_ws/str
   strf1_ws/str
   strf1_ws/str
   strf1_ws/str
   strf1_ws/str
   strf2_ws/str
   strf2_ws/str
   strf2_ws/str
   strf2_ws/str
   strf2_ws/str
   strf3_ws/str
   strf3_ws/str
   strf3_ws/str
   strf3_ws/str
   strf3_ws/str
   strf4_ws/str
   strf4_ws/str
   strf4_ws/str
   strf4_ws/str
   strf4_ws/str
   strf5_ws/str
   strf5_ws/str
   strf5_ws/str
   strf5_ws/str
   strf5_ws/str
   strf6_ws/str
   strf6_ws/str
   strf6_ws/str
   strf6_ws/str
   strf6_ws/str
   strf7_ws/str
   strf7_ws/str
   strf7_ws/str
   strf7_ws/str
   strf7_ws/str
   strf8_ws/str
   strf8_ws/str
   strf8_ws/str
   strf8_ws/str
   strf8_ws/str
   strf9_ws/str
   strf9_ws/str
   strf9_ws/str
   strf9_ws/str
   strf9_ws/str
 /arr
 str name=wtxml/str
 str name=rows1/str
   /lst
 /lst
 result name=response numFound=50 start=0
   doc
 float name=id0.0/float/doc
 /result
 lst name=facet_counts
   lst name=facet_queries/
   lst name=facet_fields
 lst name=f0_ws
   int name=zero_125/int
   int name=zero_225/int
 /lst
 lst name=f0_ws
   int name=zero_125/int
   int name=zero_225/int
 /lst
 lst name=f0_ws
   int name=zero_125/int
   int name=zero_225/int
 /lst
 lst name=f0_ws
   int name=zero_125/int
   int name=zero_225/int
 /lst
 lst name=f0_ws
   int name=zero_125/int
   int name=zero_225/int
 /lst
 lst name=f1_ws
   int name=one_133/int
   int name=one_317/int
 /lst
 lst name=f1_ws
   int name=one_133/int
   int name=one_317/int
 /lst
 lst name=f1_ws
   int name=one_133/int
   int name=one_317/int
 /lst
 lst name=f1_ws
   int name=one_133/int
   int 

[jira] [Commented] (SOLR-6314) Multi-threaded facet counts differ when SolrCloud has 1 shard

2014-08-08 Thread Erick Erickson (JIRA)

[ 
https://issues.apache.org/jira/browse/SOLR-6314?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=14090909#comment-14090909
 ] 

Erick Erickson commented on SOLR-6314:
--

bq: If you want to dedup facet parameters for some reason, then it should 
probably be 
done in the faceting code.

Yeah, that's exactly what's making me uncomfortable about the patch, it's at 
such a low level and it
affects _everything_. Unintended consequences and all that.

OTOH, what's the case for allowing dups? Do you have specific cases where that's
good or is your comment more of a statement that we shouldn't restrict future 
possibilities
just because I'm not sufficiently imaginative ;) ?

I'm really having a tough time imagining scenarios where allowing dups is 
useful, and I can
come up with scenarios where allowing dups is harmful (imagine multiple, 
expensive, identical
fq clause with cache=false for instance) that would be caught here. Hmmm, a 
WARN-level 
log message is indicated for dups no matter what I think.

The counter-argument is that the user should be free to shoot themselves in the 
foot as
they want to.

The counter-counter argument is that when we identify potential traps we
should do something about them if we can.

What do you think about this alternative? (note, I'm not proposing it as much 
as throwing it out
for discussion). Leave the dup-detection where it is and log a WARN level 
message when dups
are detected, and move the actual de-duping out to the faceting code. Then 
de-dupe on a case-
by-case basis as situations arise.

Where this started was that the exact same query over the exact same data set 
returns different
results in sharded and non-sharded situations. The results have the same 
information, just 
repeated in the single shard case. Which means that somehow the sharded code 
manages to 
ignore the extra entries. I'll look at how in a bit. At any rate, the 
sharded case manages to avoid returning the data multiple times so either 
there's code in there 
specifically to deal with this or it's happening by chance, which is its own 
gotcha.

I've seen some very large queries out in the wild and it's hard in many cases 
to see things
like this so logging a message would help the users figure out their (perhaps
machine-generated) code was doing things  they _probably_ don't want.

So this is a long winded way of saying Hell, I don't know. My _slight_ 
preference here
would be to dedupe as it's being done in this patch (and log warnings when 
doing so). It
just feels more correct and may prevent weird behavior in the future. But I'm 
not 
adamant about that, if the general consensus is that doing this on a 
case-by-case basis
is a better idea I can make it so for the facet case.

 Multi-threaded facet counts differ when SolrCloud has 1 shard
 --

 Key: SOLR-6314
 URL: https://issues.apache.org/jira/browse/SOLR-6314
 Project: Solr
  Issue Type: Bug
  Components: SearchComponents - other, SolrCloud
Affects Versions: 5.0
Reporter: Vamsee Yarlagadda
Assignee: Erick Erickson
 Attachments: SOLR-6314.patch


 I am trying to work with multi-threaded faceting on SolrCloud and in the 
 process i was hit by some issues.
 I am currently running the below upstream test on different SolrCloud 
 configurations and i am getting a different result set per configuration.
 https://github.com/apache/lucene-solr/blob/trunk/solr/core/src/test/org/apache/solr/request/TestFaceting.java#L654
 Setup:
 - *Indexed 50 docs into SolrCloud.*
 - *If the SolrCloud has only 1 shard, the facet field query has the below 
 output (which matches with the expected upstream test output - # facet fields 
 ~ 50).*
 {code}
 $ curl  
 http://localhost:8983/solr/collection1/select?facet=truefl=idindent=trueq=id%3A*facet.limit=-1facet.threads=1000facet.field=f0_wsfacet.field=f0_wsfacet.field=f0_wsfacet.field=f0_wsfacet.field=f0_wsfacet.field=f1_wsfacet.field=f1_wsfacet.field=f1_wsfacet.field=f1_wsfacet.field=f1_wsfacet.field=f2_wsfacet.field=f2_wsfacet.field=f2_wsfacet.field=f2_wsfacet.field=f2_wsfacet.field=f3_wsfacet.field=f3_wsfacet.field=f3_wsfacet.field=f3_wsfacet.field=f3_wsfacet.field=f4_wsfacet.field=f4_wsfacet.field=f4_wsfacet.field=f4_wsfacet.field=f4_wsfacet.field=f5_wsfacet.field=f5_wsfacet.field=f5_wsfacet.field=f5_wsfacet.field=f5_wsfacet.field=f6_wsfacet.field=f6_wsfacet.field=f6_wsfacet.field=f6_wsfacet.field=f6_wsfacet.field=f7_wsfacet.field=f7_wsfacet.field=f7_wsfacet.field=f7_wsfacet.field=f7_wsfacet.field=f8_wsfacet.field=f8_wsfacet.field=f8_wsfacet.field=f8_wsfacet.field=f8_wsfacet.field=f9_wsfacet.field=f9_wsfacet.field=f9_wsfacet.field=f9_wsfacet.field=f9_wsrows=1wt=xml;
 ?xml version=1.0 encoding=UTF-8?
 response
 lst name=responseHeader
   int name=status0/int
   int name=QTime21/int
 

[jira] [Commented] (SOLR-6314) Multi-threaded facet counts differ when SolrCloud has 1 shard

2014-08-07 Thread Shawn Heisey (JIRA)

[ 
https://issues.apache.org/jira/browse/SOLR-6314?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=14089341#comment-14089341
 ] 

Shawn Heisey commented on SOLR-6314:


I can't imagine any situations where having the f1 key in the results twice 
(with the same info) is useful, so I would argue that the non-distributed query 
should behave like the distributed query and only return it once.  If someone 
has a reasonable use case for the current behavior, then you can forget what I 
said and make distributed work like non-distributed ... but hopefully with an 
optimization in both instances so that it only calculates the results once.

One thing that a user might do is include two *nearly* identical keys like 
this, either for migration purposes, or to work around web developers who are 
completely clueless:

facet.field=f1
facet.field={!key=foo}f1

This *does* work properly in Solr 4.9 with a distributed facet -- the original 
and re-keyed facets are both returned, with identical info.  I don't know if it 
runs the facet twice or re-uses the info that is already calculated.


 Multi-threaded facet counts differ when SolrCloud has 1 shard
 --

 Key: SOLR-6314
 URL: https://issues.apache.org/jira/browse/SOLR-6314
 Project: Solr
  Issue Type: Bug
  Components: SearchComponents - other, SolrCloud
Affects Versions: 5.0
Reporter: Vamsee Yarlagadda
Assignee: Erick Erickson

 I am trying to work with multi-threaded faceting on SolrCloud and in the 
 process i was hit by some issues.
 I am currently running the below upstream test on different SolrCloud 
 configurations and i am getting a different result set per configuration.
 https://github.com/apache/lucene-solr/blob/trunk/solr/core/src/test/org/apache/solr/request/TestFaceting.java#L654
 Setup:
 - *Indexed 50 docs into SolrCloud.*
 - *If the SolrCloud has only 1 shard, the facet field query has the below 
 output (which matches with the expected upstream test output - # facet fields 
 ~ 50).*
 {code}
 $ curl  
 http://localhost:8983/solr/collection1/select?facet=truefl=idindent=trueq=id%3A*facet.limit=-1facet.threads=1000facet.field=f0_wsfacet.field=f0_wsfacet.field=f0_wsfacet.field=f0_wsfacet.field=f0_wsfacet.field=f1_wsfacet.field=f1_wsfacet.field=f1_wsfacet.field=f1_wsfacet.field=f1_wsfacet.field=f2_wsfacet.field=f2_wsfacet.field=f2_wsfacet.field=f2_wsfacet.field=f2_wsfacet.field=f3_wsfacet.field=f3_wsfacet.field=f3_wsfacet.field=f3_wsfacet.field=f3_wsfacet.field=f4_wsfacet.field=f4_wsfacet.field=f4_wsfacet.field=f4_wsfacet.field=f4_wsfacet.field=f5_wsfacet.field=f5_wsfacet.field=f5_wsfacet.field=f5_wsfacet.field=f5_wsfacet.field=f6_wsfacet.field=f6_wsfacet.field=f6_wsfacet.field=f6_wsfacet.field=f6_wsfacet.field=f7_wsfacet.field=f7_wsfacet.field=f7_wsfacet.field=f7_wsfacet.field=f7_wsfacet.field=f8_wsfacet.field=f8_wsfacet.field=f8_wsfacet.field=f8_wsfacet.field=f8_wsfacet.field=f9_wsfacet.field=f9_wsfacet.field=f9_wsfacet.field=f9_wsfacet.field=f9_wsrows=1wt=xml;
 ?xml version=1.0 encoding=UTF-8?
 response
 lst name=responseHeader
   int name=status0/int
   int name=QTime21/int
   lst name=params
 str name=facettrue/str
 str name=flid/str
 str name=indenttrue/str
 str name=qid:*/str
 str name=facet.limit-1/str
 str name=facet.threads1000/str
 arr name=facet.field
   strf0_ws/str
   strf0_ws/str
   strf0_ws/str
   strf0_ws/str
   strf0_ws/str
   strf1_ws/str
   strf1_ws/str
   strf1_ws/str
   strf1_ws/str
   strf1_ws/str
   strf2_ws/str
   strf2_ws/str
   strf2_ws/str
   strf2_ws/str
   strf2_ws/str
   strf3_ws/str
   strf3_ws/str
   strf3_ws/str
   strf3_ws/str
   strf3_ws/str
   strf4_ws/str
   strf4_ws/str
   strf4_ws/str
   strf4_ws/str
   strf4_ws/str
   strf5_ws/str
   strf5_ws/str
   strf5_ws/str
   strf5_ws/str
   strf5_ws/str
   strf6_ws/str
   strf6_ws/str
   strf6_ws/str
   strf6_ws/str
   strf6_ws/str
   strf7_ws/str
   strf7_ws/str
   strf7_ws/str
   strf7_ws/str
   strf7_ws/str
   strf8_ws/str
   strf8_ws/str
   strf8_ws/str
   strf8_ws/str
   strf8_ws/str
   strf9_ws/str
   strf9_ws/str
   strf9_ws/str
   strf9_ws/str
   strf9_ws/str
 /arr
 str name=wtxml/str
 str name=rows1/str
   /lst
 /lst
 result name=response numFound=50 start=0
   doc
 float name=id0.0/float/doc
 /result
 lst name=facet_counts
   lst name=facet_queries/
   lst name=facet_fields
 lst name=f0_ws
   int name=zero_125/int
   int name=zero_225/int
 /lst
 lst name=f0_ws
   int name=zero_125/int
   int name=zero_225/int
 /lst
 lst name=f0_ws
   int name=zero_125/int
   

[jira] [Commented] (SOLR-6314) Multi-threaded facet counts differ when SolrCloud has 1 shard

2014-08-06 Thread Erick Erickson (JIRA)

[ 
https://issues.apache.org/jira/browse/SOLR-6314?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=14088820#comment-14088820
 ] 

Erick Erickson commented on SOLR-6314:
--

OK, taking a closer look at this and I wonder what the right behavior is. The 
totals
are correct, it's just that they repeated in one case and not in  another. 

It _looks_ like I can restate the problem like this:

When a facet field is requested more than one time in non-sharded cluster, then 
the field
is repeated in the result set. 

When a facet field is requested more than once in a sharded cluster, then the 
field is only
returned once in the result set.

IOW, specifying the same facet.field twice: facet.field=f1facet.field=f1
results in two (identical) sections in the response in a non-sharded case
and one in the sharded case.

I'll look at the code tomorrow to see where the difference happens, I suspect 
in the
aggregating code in the distributed case but that's just a guess.

So the question is what the right behavior really is. I can argue that 
specifying the
exact same facet parameter (either query or field) more than once is a waste, 
and the
facet information should be cleaned up on the way in by removing duplicates. 
That would give
the same response in both cases and return just one entry per unique facet 
criteria. This is 
arguably avoiding useless work (what's the value of specifying the same facet 
parameter
twice?) That would change current behavior in the single-shard case however.

If we tried to return multiple entries in the sharded case, it seems quite 
fragile to try to sum the
facet sub-counts separately. By that I mean say shard 1 returns
f1:33
f1:33

shard two returns
f1:78
f1:78 

You'd like the final result to be
f1:111
f1:111

On the surface, some rule like add facets by
position when the key is identical and return multiple
counts seems like it would work, but it also seems
rife for errors to creep in with arguably no added value. What happens,
for instance, if there are three values for f1 from one shard
and only two from another? I don't see how that would really happen,
but

So, my question for you (and anyone who wants to chime in)
is: Do you agree that pruning multiple identical facet criteria
is a Good Thing?. If not, what use case does returning multiple
identical facet counts support and is that use case worth the
effort? My gut feeling is no.

Thanks for bringing this up it's certainly something
that's confusing. I suspect it's just something that hasn't been
thought of in the past

 Multi-threaded facet counts differ when SolrCloud has 1 shard
 --

 Key: SOLR-6314
 URL: https://issues.apache.org/jira/browse/SOLR-6314
 Project: Solr
  Issue Type: Bug
  Components: SearchComponents - other, SolrCloud
Affects Versions: 5.0
Reporter: Vamsee Yarlagadda
Assignee: Erick Erickson

 I am trying to work with multi-threaded faceting on SolrCloud and in the 
 process i was hit by some issues.
 I am currently running the below upstream test on different SolrCloud 
 configurations and i am getting a different result set per configuration.
 https://github.com/apache/lucene-solr/blob/trunk/solr/core/src/test/org/apache/solr/request/TestFaceting.java#L654
 Setup:
 - *Indexed 50 docs into SolrCloud.*
 - *If the SolrCloud has only 1 shard, the facet field query has the below 
 output (which matches with the expected upstream test output - # facet fields 
 ~ 50).*
 {code}
 $ curl  
 http://localhost:8983/solr/collection1/select?facet=truefl=idindent=trueq=id%3A*facet.limit=-1facet.threads=1000facet.field=f0_wsfacet.field=f0_wsfacet.field=f0_wsfacet.field=f0_wsfacet.field=f0_wsfacet.field=f1_wsfacet.field=f1_wsfacet.field=f1_wsfacet.field=f1_wsfacet.field=f1_wsfacet.field=f2_wsfacet.field=f2_wsfacet.field=f2_wsfacet.field=f2_wsfacet.field=f2_wsfacet.field=f3_wsfacet.field=f3_wsfacet.field=f3_wsfacet.field=f3_wsfacet.field=f3_wsfacet.field=f4_wsfacet.field=f4_wsfacet.field=f4_wsfacet.field=f4_wsfacet.field=f4_wsfacet.field=f5_wsfacet.field=f5_wsfacet.field=f5_wsfacet.field=f5_wsfacet.field=f5_wsfacet.field=f6_wsfacet.field=f6_wsfacet.field=f6_wsfacet.field=f6_wsfacet.field=f6_wsfacet.field=f7_wsfacet.field=f7_wsfacet.field=f7_wsfacet.field=f7_wsfacet.field=f7_wsfacet.field=f8_wsfacet.field=f8_wsfacet.field=f8_wsfacet.field=f8_wsfacet.field=f8_wsfacet.field=f9_wsfacet.field=f9_wsfacet.field=f9_wsfacet.field=f9_wsfacet.field=f9_wsrows=1wt=xml;
 ?xml version=1.0 encoding=UTF-8?
 response
 lst name=responseHeader
   int name=status0/int
   int name=QTime21/int
   lst name=params
 str name=facettrue/str
 str name=flid/str
 str name=indenttrue/str
 str name=qid:*/str
 str name=facet.limit-1/str
 str name=facet.threads1000/str
 arr name=facet.field
   

[jira] [Commented] (SOLR-6314) Multi-threaded facet counts differ when SolrCloud has 1 shard

2014-08-06 Thread Vamsee Yarlagadda (JIRA)

[ 
https://issues.apache.org/jira/browse/SOLR-6314?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=14088833#comment-14088833
 ] 

Vamsee Yarlagadda commented on SOLR-6314:
-

Thanks [~erickerickson] for looking into this.

Yes, you are right. It makes perfect sense to return a count for every unique 
facet request rather than repeating the facets over and over. It might be the 
case that the facet result that's returned in the case of multi-shard (by going 
through the aggregating code) is the right thing to do. Perhaps, we may want to 
fix the behavior for single shard system and make changes to the unit tests to 
reflect the same.

I can't think of any particular reason why the initial implementation of 
multithreaded faceting created a test that will check for duplicate facet 
counts. It might be a test bug too?
https://github.com/apache/lucene-solr/blob/trunk/solr/core/src/test/org/apache/solr/request/TestFaceting.java#L654

Thoughts?

 Multi-threaded facet counts differ when SolrCloud has 1 shard
 --

 Key: SOLR-6314
 URL: https://issues.apache.org/jira/browse/SOLR-6314
 Project: Solr
  Issue Type: Bug
  Components: SearchComponents - other, SolrCloud
Affects Versions: 5.0
Reporter: Vamsee Yarlagadda
Assignee: Erick Erickson

 I am trying to work with multi-threaded faceting on SolrCloud and in the 
 process i was hit by some issues.
 I am currently running the below upstream test on different SolrCloud 
 configurations and i am getting a different result set per configuration.
 https://github.com/apache/lucene-solr/blob/trunk/solr/core/src/test/org/apache/solr/request/TestFaceting.java#L654
 Setup:
 - *Indexed 50 docs into SolrCloud.*
 - *If the SolrCloud has only 1 shard, the facet field query has the below 
 output (which matches with the expected upstream test output - # facet fields 
 ~ 50).*
 {code}
 $ curl  
 http://localhost:8983/solr/collection1/select?facet=truefl=idindent=trueq=id%3A*facet.limit=-1facet.threads=1000facet.field=f0_wsfacet.field=f0_wsfacet.field=f0_wsfacet.field=f0_wsfacet.field=f0_wsfacet.field=f1_wsfacet.field=f1_wsfacet.field=f1_wsfacet.field=f1_wsfacet.field=f1_wsfacet.field=f2_wsfacet.field=f2_wsfacet.field=f2_wsfacet.field=f2_wsfacet.field=f2_wsfacet.field=f3_wsfacet.field=f3_wsfacet.field=f3_wsfacet.field=f3_wsfacet.field=f3_wsfacet.field=f4_wsfacet.field=f4_wsfacet.field=f4_wsfacet.field=f4_wsfacet.field=f4_wsfacet.field=f5_wsfacet.field=f5_wsfacet.field=f5_wsfacet.field=f5_wsfacet.field=f5_wsfacet.field=f6_wsfacet.field=f6_wsfacet.field=f6_wsfacet.field=f6_wsfacet.field=f6_wsfacet.field=f7_wsfacet.field=f7_wsfacet.field=f7_wsfacet.field=f7_wsfacet.field=f7_wsfacet.field=f8_wsfacet.field=f8_wsfacet.field=f8_wsfacet.field=f8_wsfacet.field=f8_wsfacet.field=f9_wsfacet.field=f9_wsfacet.field=f9_wsfacet.field=f9_wsfacet.field=f9_wsrows=1wt=xml;
 ?xml version=1.0 encoding=UTF-8?
 response
 lst name=responseHeader
   int name=status0/int
   int name=QTime21/int
   lst name=params
 str name=facettrue/str
 str name=flid/str
 str name=indenttrue/str
 str name=qid:*/str
 str name=facet.limit-1/str
 str name=facet.threads1000/str
 arr name=facet.field
   strf0_ws/str
   strf0_ws/str
   strf0_ws/str
   strf0_ws/str
   strf0_ws/str
   strf1_ws/str
   strf1_ws/str
   strf1_ws/str
   strf1_ws/str
   strf1_ws/str
   strf2_ws/str
   strf2_ws/str
   strf2_ws/str
   strf2_ws/str
   strf2_ws/str
   strf3_ws/str
   strf3_ws/str
   strf3_ws/str
   strf3_ws/str
   strf3_ws/str
   strf4_ws/str
   strf4_ws/str
   strf4_ws/str
   strf4_ws/str
   strf4_ws/str
   strf5_ws/str
   strf5_ws/str
   strf5_ws/str
   strf5_ws/str
   strf5_ws/str
   strf6_ws/str
   strf6_ws/str
   strf6_ws/str
   strf6_ws/str
   strf6_ws/str
   strf7_ws/str
   strf7_ws/str
   strf7_ws/str
   strf7_ws/str
   strf7_ws/str
   strf8_ws/str
   strf8_ws/str
   strf8_ws/str
   strf8_ws/str
   strf8_ws/str
   strf9_ws/str
   strf9_ws/str
   strf9_ws/str
   strf9_ws/str
   strf9_ws/str
 /arr
 str name=wtxml/str
 str name=rows1/str
   /lst
 /lst
 result name=response numFound=50 start=0
   doc
 float name=id0.0/float/doc
 /result
 lst name=facet_counts
   lst name=facet_queries/
   lst name=facet_fields
 lst name=f0_ws
   int name=zero_125/int
   int name=zero_225/int
 /lst
 lst name=f0_ws
   int name=zero_125/int
   int name=zero_225/int
 /lst
 lst name=f0_ws
   int name=zero_125/int
   int name=zero_225/int
 /lst
 lst name=f0_ws
   int name=zero_125/int
   int name=zero_225/int
 /lst
 lst 

[jira] [Commented] (SOLR-6314) Multi-threaded facet counts differ when SolrCloud has 1 shard

2014-08-02 Thread Erick Erickson (JIRA)

[ 
https://issues.apache.org/jira/browse/SOLR-6314?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=14083620#comment-14083620
 ] 

Erick Erickson commented on SOLR-6314:
--

I know something about that code so I'll try to take a look.

I'm not entirely sure when I'll get to it though, I'm slammed. So
if someone wants to look at it instead, please feel free.

Vamsee:

What version of Solr are you seeing this on?

 Multi-threaded facet counts differ when SolrCloud has 1 shard
 --

 Key: SOLR-6314
 URL: https://issues.apache.org/jira/browse/SOLR-6314
 Project: Solr
  Issue Type: Bug
  Components: SearchComponents - other, SolrCloud
Affects Versions: 5.0
Reporter: Vamsee Yarlagadda
Assignee: Erick Erickson

 I am trying to work with multi-threaded faceting on SolrCloud and in the 
 process i was hit by some issues.
 I am currently running the below upstream test on different SolrCloud 
 configurations and i am getting a different result set per configuration.
 https://github.com/apache/lucene-solr/blob/trunk/solr/core/src/test/org/apache/solr/request/TestFaceting.java#L654
 Setup:
 - *Indexed 50 docs into SolrCloud.*
 - *If the SolrCloud has only 1 shard, the facet field query has the below 
 output (which matches with the expected upstream test output - # facet fields 
 ~ 50).*
 {code}
 $ curl  
 http://localhost:8983/solr/collection1/select?facet=truefl=idindent=trueq=id%3A*facet.limit=-1facet.threads=1000facet.field=f0_wsfacet.field=f0_wsfacet.field=f0_wsfacet.field=f0_wsfacet.field=f0_wsfacet.field=f1_wsfacet.field=f1_wsfacet.field=f1_wsfacet.field=f1_wsfacet.field=f1_wsfacet.field=f2_wsfacet.field=f2_wsfacet.field=f2_wsfacet.field=f2_wsfacet.field=f2_wsfacet.field=f3_wsfacet.field=f3_wsfacet.field=f3_wsfacet.field=f3_wsfacet.field=f3_wsfacet.field=f4_wsfacet.field=f4_wsfacet.field=f4_wsfacet.field=f4_wsfacet.field=f4_wsfacet.field=f5_wsfacet.field=f5_wsfacet.field=f5_wsfacet.field=f5_wsfacet.field=f5_wsfacet.field=f6_wsfacet.field=f6_wsfacet.field=f6_wsfacet.field=f6_wsfacet.field=f6_wsfacet.field=f7_wsfacet.field=f7_wsfacet.field=f7_wsfacet.field=f7_wsfacet.field=f7_wsfacet.field=f8_wsfacet.field=f8_wsfacet.field=f8_wsfacet.field=f8_wsfacet.field=f8_wsfacet.field=f9_wsfacet.field=f9_wsfacet.field=f9_wsfacet.field=f9_wsfacet.field=f9_wsrows=1wt=xml;
 ?xml version=1.0 encoding=UTF-8?
 response
 lst name=responseHeader
   int name=status0/int
   int name=QTime21/int
   lst name=params
 str name=facettrue/str
 str name=flid/str
 str name=indenttrue/str
 str name=qid:*/str
 str name=facet.limit-1/str
 str name=facet.threads1000/str
 arr name=facet.field
   strf0_ws/str
   strf0_ws/str
   strf0_ws/str
   strf0_ws/str
   strf0_ws/str
   strf1_ws/str
   strf1_ws/str
   strf1_ws/str
   strf1_ws/str
   strf1_ws/str
   strf2_ws/str
   strf2_ws/str
   strf2_ws/str
   strf2_ws/str
   strf2_ws/str
   strf3_ws/str
   strf3_ws/str
   strf3_ws/str
   strf3_ws/str
   strf3_ws/str
   strf4_ws/str
   strf4_ws/str
   strf4_ws/str
   strf4_ws/str
   strf4_ws/str
   strf5_ws/str
   strf5_ws/str
   strf5_ws/str
   strf5_ws/str
   strf5_ws/str
   strf6_ws/str
   strf6_ws/str
   strf6_ws/str
   strf6_ws/str
   strf6_ws/str
   strf7_ws/str
   strf7_ws/str
   strf7_ws/str
   strf7_ws/str
   strf7_ws/str
   strf8_ws/str
   strf8_ws/str
   strf8_ws/str
   strf8_ws/str
   strf8_ws/str
   strf9_ws/str
   strf9_ws/str
   strf9_ws/str
   strf9_ws/str
   strf9_ws/str
 /arr
 str name=wtxml/str
 str name=rows1/str
   /lst
 /lst
 result name=response numFound=50 start=0
   doc
 float name=id0.0/float/doc
 /result
 lst name=facet_counts
   lst name=facet_queries/
   lst name=facet_fields
 lst name=f0_ws
   int name=zero_125/int
   int name=zero_225/int
 /lst
 lst name=f0_ws
   int name=zero_125/int
   int name=zero_225/int
 /lst
 lst name=f0_ws
   int name=zero_125/int
   int name=zero_225/int
 /lst
 lst name=f0_ws
   int name=zero_125/int
   int name=zero_225/int
 /lst
 lst name=f0_ws
   int name=zero_125/int
   int name=zero_225/int
 /lst
 lst name=f1_ws
   int name=one_133/int
   int name=one_317/int
 /lst
 lst name=f1_ws
   int name=one_133/int
   int name=one_317/int
 /lst
 lst name=f1_ws
   int name=one_133/int
   int name=one_317/int
 /lst
 lst name=f1_ws
   int name=one_133/int
   int name=one_317/int
 /lst
 lst name=f1_ws
   int name=one_133/int
   int name=one_317/int
 /lst
 lst name=f2_ws
   int 

[jira] [Commented] (SOLR-6314) Multi-threaded facet counts differ when SolrCloud has 1 shard

2014-08-02 Thread Vamsee Yarlagadda (JIRA)

[ 
https://issues.apache.org/jira/browse/SOLR-6314?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=14083726#comment-14083726
 ] 

Vamsee Yarlagadda commented on SOLR-6314:
-

Thanks @Erick. I was able to replicate the issue on Solr trunk (5.0)
Let me know if there is anything I can do to help in the process. 

 Multi-threaded facet counts differ when SolrCloud has 1 shard
 --

 Key: SOLR-6314
 URL: https://issues.apache.org/jira/browse/SOLR-6314
 Project: Solr
  Issue Type: Bug
  Components: SearchComponents - other, SolrCloud
Affects Versions: 5.0
Reporter: Vamsee Yarlagadda
Assignee: Erick Erickson

 I am trying to work with multi-threaded faceting on SolrCloud and in the 
 process i was hit by some issues.
 I am currently running the below upstream test on different SolrCloud 
 configurations and i am getting a different result set per configuration.
 https://github.com/apache/lucene-solr/blob/trunk/solr/core/src/test/org/apache/solr/request/TestFaceting.java#L654
 Setup:
 - *Indexed 50 docs into SolrCloud.*
 - *If the SolrCloud has only 1 shard, the facet field query has the below 
 output (which matches with the expected upstream test output - # facet fields 
 ~ 50).*
 {code}
 $ curl  
 http://localhost:8983/solr/collection1/select?facet=truefl=idindent=trueq=id%3A*facet.limit=-1facet.threads=1000facet.field=f0_wsfacet.field=f0_wsfacet.field=f0_wsfacet.field=f0_wsfacet.field=f0_wsfacet.field=f1_wsfacet.field=f1_wsfacet.field=f1_wsfacet.field=f1_wsfacet.field=f1_wsfacet.field=f2_wsfacet.field=f2_wsfacet.field=f2_wsfacet.field=f2_wsfacet.field=f2_wsfacet.field=f3_wsfacet.field=f3_wsfacet.field=f3_wsfacet.field=f3_wsfacet.field=f3_wsfacet.field=f4_wsfacet.field=f4_wsfacet.field=f4_wsfacet.field=f4_wsfacet.field=f4_wsfacet.field=f5_wsfacet.field=f5_wsfacet.field=f5_wsfacet.field=f5_wsfacet.field=f5_wsfacet.field=f6_wsfacet.field=f6_wsfacet.field=f6_wsfacet.field=f6_wsfacet.field=f6_wsfacet.field=f7_wsfacet.field=f7_wsfacet.field=f7_wsfacet.field=f7_wsfacet.field=f7_wsfacet.field=f8_wsfacet.field=f8_wsfacet.field=f8_wsfacet.field=f8_wsfacet.field=f8_wsfacet.field=f9_wsfacet.field=f9_wsfacet.field=f9_wsfacet.field=f9_wsfacet.field=f9_wsrows=1wt=xml;
 ?xml version=1.0 encoding=UTF-8?
 response
 lst name=responseHeader
   int name=status0/int
   int name=QTime21/int
   lst name=params
 str name=facettrue/str
 str name=flid/str
 str name=indenttrue/str
 str name=qid:*/str
 str name=facet.limit-1/str
 str name=facet.threads1000/str
 arr name=facet.field
   strf0_ws/str
   strf0_ws/str
   strf0_ws/str
   strf0_ws/str
   strf0_ws/str
   strf1_ws/str
   strf1_ws/str
   strf1_ws/str
   strf1_ws/str
   strf1_ws/str
   strf2_ws/str
   strf2_ws/str
   strf2_ws/str
   strf2_ws/str
   strf2_ws/str
   strf3_ws/str
   strf3_ws/str
   strf3_ws/str
   strf3_ws/str
   strf3_ws/str
   strf4_ws/str
   strf4_ws/str
   strf4_ws/str
   strf4_ws/str
   strf4_ws/str
   strf5_ws/str
   strf5_ws/str
   strf5_ws/str
   strf5_ws/str
   strf5_ws/str
   strf6_ws/str
   strf6_ws/str
   strf6_ws/str
   strf6_ws/str
   strf6_ws/str
   strf7_ws/str
   strf7_ws/str
   strf7_ws/str
   strf7_ws/str
   strf7_ws/str
   strf8_ws/str
   strf8_ws/str
   strf8_ws/str
   strf8_ws/str
   strf8_ws/str
   strf9_ws/str
   strf9_ws/str
   strf9_ws/str
   strf9_ws/str
   strf9_ws/str
 /arr
 str name=wtxml/str
 str name=rows1/str
   /lst
 /lst
 result name=response numFound=50 start=0
   doc
 float name=id0.0/float/doc
 /result
 lst name=facet_counts
   lst name=facet_queries/
   lst name=facet_fields
 lst name=f0_ws
   int name=zero_125/int
   int name=zero_225/int
 /lst
 lst name=f0_ws
   int name=zero_125/int
   int name=zero_225/int
 /lst
 lst name=f0_ws
   int name=zero_125/int
   int name=zero_225/int
 /lst
 lst name=f0_ws
   int name=zero_125/int
   int name=zero_225/int
 /lst
 lst name=f0_ws
   int name=zero_125/int
   int name=zero_225/int
 /lst
 lst name=f1_ws
   int name=one_133/int
   int name=one_317/int
 /lst
 lst name=f1_ws
   int name=one_133/int
   int name=one_317/int
 /lst
 lst name=f1_ws
   int name=one_133/int
   int name=one_317/int
 /lst
 lst name=f1_ws
   int name=one_133/int
   int name=one_317/int
 /lst
 lst name=f1_ws
   int name=one_133/int
   int name=one_317/int
 /lst
 lst name=f2_ws
   int name=two_137/int
   int name=two_413/int
 /lst
 lst name=f2_ws
   int name=two_137/int