Hello,

On reboot of one of the solr nodes in the cluster, we often see a
collection's shards with
1. LEADER replica in DOWN state, and/or
2. shard with no LEADER

Output from /solr/admin/collections?action=CLUSTERSTATUS is below.

Even after 5 to 10 minutes, the collection often does not recover. Unclear
why this is happening and what we can try to prevent or remedy it.

ps: perReplicaState= true in solr v8.8.0 didn't work well because after a
rebalance all replicas somehow get a "leader:true" status even though
states.json looked ok.

{
  "responseHeader": {
    "status": 0,
    "QTime": 2
  },
  "cluster": {
    "collections": {
      "datacore": {
        "pullReplicas": "0",
        "replicationFactor": "0",
        "shards": {
          "xxxx_yyyy_zzzz": {
            "range": null,
            "state": "active",
            "replicas": {
              "core_node1": {
                "core": "datacore_xxxx_yyyy_zzzz_replica_t187",
                "base_url": "http://solr-0.solr-headless:8983/solr";,
                "node_name": "solr-0.solr-headless:8983_solr",
                "state": "down",
                "type": "TLOG",
                "force_set_state": "false",
                "property.preferredleader": "true",
                "leader": "true"
              },
              "core_node2": {
                "core": "datacore_xxxx_yyyy_zzzz_replica_t188",
                "base_url": "http://solr-1.solr-headless:8983/solr";,
                "node_name": "solr-1.solr-headless:8983_solr",
                "state": "active",
                "type": "TLOG",
                "force_set_state": "false"
              },
              "core_node3": {
                "core": "datacore_xxxx_yyyy_zzzz_replica_t189",
                "base_url": "http://solr-2.solr-headless:8983/solr";,
                "node_name": "solr-2.solr-headless:8983_solr",
                "state": "active",
                "type": "TLOG",
                "force_set_state": "false"
              }
            }
          },
          "gggg_hhhh_jjjjj": {
            "range": null,
            "state": "active",
            "replicas": {
              "core_node19": {
                "core": "datacore_gggg_hhhh_jjjjj_replica_t187",
                "base_url": "http://solr-0.solr-headless:8983/solr";,
                "node_name": "solr-0.solr-headless:8983_solr",
                "state": "down",
                "type": "TLOG",
                "force_set_state": "false",
                "property.preferredleader": "true"
              },
              "core_node20": {
                "core": "datacore_gggg_hhhh_jjjjj_replica_t188",
                "base_url": "http://solr-1.solr-headless:8983/solr";,
                "node_name": "solr-1.solr-headless:8983_solr",
                "state": "active",
                "type": "TLOG",
                "force_set_state": "false"
              },
              "core_node21": {
                "core": "datacore_gggg_hhhh_jjjjj_replica_t189",
                "base_url": "http://solr-2.solr-headless:8983/solr";,
                "node_name": "solr-2.solr-headless:8983_solr",
                "state": "active",
                "type": "TLOG",
                "force_set_state": "false"
              }
            }
          },
          "aaaa_bbbb_cccc": {
            "range": null,
            "state": "active",
            "replicas": {
              "core_node4": {
                "core": "datacore_aaaa_bbbb_cccc_replica_t91",
                "base_url": "http://solr-0...



--
Sent from: https://lucene.472066.n3.nabble.com/Solr-User-f472068.html

Reply via email to