Hi,
I have been trying to enable autoscaling on SolrCloud 8.5, with Node Added
trigger and Node Lost trigger. The SolrCloud is running on AWS EKS pods, with 2
nodes minimum.
I have added NodeAddedTrigger. My autoscaling API response looks like as in the
attached file.
Whenever I scale up the SolrCloud replicas on EKS, new nodes are added to the
cluster but the Node Added trigger throws below error:
<str name="error.details_t">org.apache.solr.common.SolrException: Unexpected
exception while processing event: {
"id":"c889e6ef3b34eTcc9nazth0kbod28rj2zc84n0b",
"source":"node_added_trigger",
"eventTime":3527913768203086,
"eventType":"NODEADDED",
"properties":{
"eventTimes":[3527913768203086],
"preferredOperation":"addreplica",
"_enqueue_time_":3527918773192489,
"nodeNames":["solrcloud-2.solrcluster:8983_solr"],
"replicaType":"NRT"}}
at
org.apache.solr.cloud.autoscaling.ComputePlanAction.process(ComputePlanAction.java:161)
at
org.apache.solr.cloud.autoscaling.ScheduledTriggers.lambda$null$3(ScheduledTriggers.java:326)
at
java.base/java.util.concurrent.Executors$RunnableAdapter.call(Unknown Source)
at java.base/java.util.concurrent.FutureTask.run(Unknown
Source)
at
org.apache.solr.common.util.ExecutorUtil$MDCAwareThreadPoolExecutor.lambda$execute$0(ExecutorUtil.java:210)
at
java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
at
java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
at java.base/java.lang.Thread.run(Unknown Source)
Caused by: org.apache.solr.common.SolrException:
org.apache.solr.common.SolrException: Error getting remote info
at
org.apache.solr.common.cloud.rule.ImplicitSnitch.getTags(ImplicitSnitch.java:78)
at
org.apache.solr.client.solrj.impl.SolrClientNodeStateProvider.fetchTagValues(SolrClientNodeStateProvider.java:139)
at
org.apache.solr.client.solrj.impl.SolrClientNodeStateProvider.getNodeValues(SolrClientNodeStateProvider.java:128)
at
org.apache.solr.client.solrj.cloud.autoscaling.Row.<init>(Row.java:71)
at
org.apache.solr.client.solrj.cloud.autoscaling.Policy$Session.<init>(Policy.java:575)
at
org.apache.solr.client.solrj.cloud.autoscaling.Policy.createSession(Policy.java:396)
at
org.apache.solr.client.solrj.cloud.autoscaling.Policy.createSession(Policy.java:358)
at
org.apache.solr.client.solrj.cloud.autoscaling.PolicyHelper$SessionRef.createSession(PolicyHelper.java:492)
at
org.apache.solr.client.solrj.cloud.autoscaling.PolicyHelper$SessionRef.get(PolicyHelper.java:457)
at
org.apache.solr.client.solrj.cloud.autoscaling.PolicyHelper.getSession(PolicyHelper.java:513)
at
org.apache.solr.cloud.autoscaling.ComputePlanAction.process(ComputePlanAction.java:90)
... 7 more
Caused by: org.apache.solr.common.SolrException: Error
getting remote info
at
org.apache.solr.client.solrj.impl.SolrClientNodeStateProvider$AutoScalingSnitch.getRemoteInfo(SolrClientNodeStateProvider.java:364)
at
org.apache.solr.common.cloud.rule.ImplicitSnitch.getTags(ImplicitSnitch.java:76)
... 17 more
Caused by: org.apache.solr.common.SolrException: Could not
get remote info after many retries on NoHttpResponseException
at
org.apache.solr.client.solrj.impl.SolrClientNodeStateProvider$AutoScalingSnitch.getRemoteInfo(SolrClientNodeStateProvider.java:335)
... 18 more
</str>
Looking for help on the subject.
Please let me know for doubts.
Thanks,
Kirti Mangla
________________________________
If you are not the intended recipient or have received this message in error,
please notify the sender and permanently delete this message and any
attachments.
curl -s -u solr:xxxx http://solr-service:8983/api/cluster/autoscaling
{
"responseHeader":{
"status":0,
"QTime":0},
"cluster-preferences":[{
"minimize":"cores",
"precision":1}
,{
"maximize":"freedisk"}],
"cluster-policy":[{
"replica":"2",
"shard":"#EACH",
"node":"#ANY"}],
"triggers":{
".auto_add_replicas":{
"name":".auto_add_replicas",
"event":"nodeLost",
"waitFor":120,
"enabled":true,
"actions":[{
"name":"auto_add_replicas_plan",
"class":"solr.AutoAddReplicasPlanAction"},
{
"name":"execute_plan",
"class":"solr.ExecutePlanAction"}]},
".scheduled_maintenance":{
"name":".scheduled_maintenance",
"event":"scheduled",
"startTime":"NOW",
"every":"+1DAY",
"enabled":true,
"actions":[{
"name":"inactive_shard_plan",
"class":"solr.InactiveShardPlanAction"},
{
"name":"inactive_markers_plan",
"class":"solr.InactiveMarkersPlanAction"},
{
"name":"execute_plan",
"class":"solr.ExecutePlanAction"}]},
"node_added_trigger":{
"event":"nodeAdded",
"waitFor":5,
"preferredOperation":"ADDREPLICA",
"actions":[{
"name":"compute_plan",
"class":"solr.ComputePlanAction"},
{
"name":"execute_plan",
"class":"solr.ExecutePlanAction"}]},
"node_lost_trigger":{
"event":"nodeLost",
"waitFor":120,
"preferredOperation":"DELETENODE",
"actions":[{
"name":"compute_plan",
"class":"solr.ComputePlanAction"},
{
"name":"execute_plan",
"class":"solr.ExecutePlanAction"}]}},
"listeners":{
".auto_add_replicas.system":{
"beforeAction":[],
"afterAction":[],
"stage":["STARTED",
"ABORTED",
"SUCCEEDED",
"FAILED",
"BEFORE_ACTION",
"AFTER_ACTION",
"IGNORED"],
"trigger":".auto_add_replicas",
"class":"org.apache.solr.cloud.autoscaling.SystemLogListener"},
".scheduled_maintenance.system":{
"beforeAction":[],
"afterAction":[],
"stage":["STARTED",
"ABORTED",
"SUCCEEDED",
"FAILED",
"BEFORE_ACTION",
"AFTER_ACTION",
"IGNORED"],
"trigger":".scheduled_maintenance",
"class":"org.apache.solr.cloud.autoscaling.SystemLogListener"},
"node_added_trigger.system":{
"beforeAction":[],
"afterAction":[],
"stage":["STARTED",
"ABORTED",
"SUCCEEDED",
"FAILED",
"BEFORE_ACTION",
"AFTER_ACTION",
"IGNORED"],
"trigger":"node_added_trigger",
"class":"org.apache.solr.cloud.autoscaling.SystemLogListener"},
"node_lost_trigger.system":{
"beforeAction":[],
"afterAction":[],
"stage":["STARTED",
"ABORTED",
"SUCCEEDED",
"FAILED",
"BEFORE_ACTION",
"AFTER_ACTION",
"IGNORED"],
"trigger":"node_lost_trigger",
"class":"org.apache.solr.cloud.autoscaling.SystemLogListener"}},
"properties":{},
"WARNING":"This response format is experimental. It is likely to change in
the future."}