This is an automated email from the ASF dual-hosted git repository.

mlbiscoc pushed a commit to branch branch_10x
in repository https://gitbox.apache.org/repos/asf/solr.git


The following commit(s) were added to refs/heads/branch_10x by this push:
     new 5a599eddd59 SOLR-17955 & SOLR-17437: OTEL metrics: 
SplitShardCmd.checkDiskSpace needs conversion (#3859)
5a599eddd59 is described below

commit 5a599eddd59bbf273b476f61d228431f01829f58
Author: Matthew Biscocho <[email protected]>
AuthorDate: Mon Jan 26 16:39:13 2026 -0500

    SOLR-17955 & SOLR-17437: OTEL metrics: SplitShardCmd.checkDiskSpace needs 
conversion (#3859)
    
    Fixes split shard which was disabled after moving metrics to OTEL. Split 
shard now parses and reads prometheus metrics for disk space on the correct 
node for the split shard command. Also a minor change to throw when failing to 
get the index size and free size metrics.
---
 .../SOLR-17437-shard-split-disk-space-check.yml    |  9 +++
 ...OLR-17955-SplitShardCmd.checkDiskSpace-otel.yml |  9 +++
 .../solr/cloud/api/collections/SplitShardCmd.java  | 80 +++++++++++++++-------
 3 files changed, 73 insertions(+), 25 deletions(-)

diff --git a/changelog/unreleased/SOLR-17437-shard-split-disk-space-check.yml 
b/changelog/unreleased/SOLR-17437-shard-split-disk-space-check.yml
new file mode 100644
index 00000000000..f363c311dad
--- /dev/null
+++ b/changelog/unreleased/SOLR-17437-shard-split-disk-space-check.yml
@@ -0,0 +1,9 @@
+# See https://github.com/apache/solr/blob/main/dev-docs/changelog.adoc
+title: Fix disk space check in shard split operation
+type: fixed
+authors:
+  - name: Matthew Biscocho
+  - name: David Smiley
+links:
+  - name: SOLR-17437
+    url: https://issues.apache.org/jira/browse/SOLR-17437
diff --git 
a/changelog/unreleased/SOLR-17955-SplitShardCmd.checkDiskSpace-otel.yml 
b/changelog/unreleased/SOLR-17955-SplitShardCmd.checkDiskSpace-otel.yml
new file mode 100644
index 00000000000..cba54573654
--- /dev/null
+++ b/changelog/unreleased/SOLR-17955-SplitShardCmd.checkDiskSpace-otel.yml
@@ -0,0 +1,9 @@
+# See https://github.com/apache/solr/blob/main/dev-docs/changelog.adoc
+title: OTEL metrics - SplitShardCmd.checkDiskSpace needs conversion
+type: fixed
+authors:
+  - name: Matthew Biscocho
+  - name: David Smiley
+links:
+  - name: SOLR-17955
+    url: https://issues.apache.org/jira/browse/SOLR-17955
\ No newline at end of file
diff --git 
a/solr/core/src/java/org/apache/solr/cloud/api/collections/SplitShardCmd.java 
b/solr/core/src/java/org/apache/solr/cloud/api/collections/SplitShardCmd.java
index c2f0fd3d500..3298ecba2bc 100644
--- 
a/solr/core/src/java/org/apache/solr/cloud/api/collections/SplitShardCmd.java
+++ 
b/solr/core/src/java/org/apache/solr/cloud/api/collections/SplitShardCmd.java
@@ -17,6 +17,7 @@
 
 package org.apache.solr.cloud.api.collections;
 
+import static 
org.apache.solr.client.solrj.response.InputStreamResponseParser.STREAM_KEY;
 import static org.apache.solr.common.cloud.ZkStateReader.COLLECTION_PROP;
 import static org.apache.solr.common.cloud.ZkStateReader.REPLICA_TYPE;
 import static org.apache.solr.common.cloud.ZkStateReader.SHARD_ID_PROP;
@@ -26,6 +27,7 @@ import static 
org.apache.solr.common.params.CollectionParams.CollectionAction.CR
 import static 
org.apache.solr.common.params.CollectionParams.CollectionAction.DELETESHARD;
 import static org.apache.solr.common.params.CommonAdminParams.NUM_SUB_SHARDS;
 
+import java.io.InputStream;
 import java.lang.invoke.MethodHandles;
 import java.util.ArrayList;
 import java.util.Collection;
@@ -38,12 +40,14 @@ import java.util.NoSuchElementException;
 import java.util.Set;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicReference;
-import org.apache.solr.client.solrj.SolrResponse;
 import org.apache.solr.client.solrj.cloud.DistribStateManager;
 import org.apache.solr.client.solrj.cloud.SolrCloudManager;
 import org.apache.solr.client.solrj.cloud.VersionedData;
+import org.apache.solr.client.solrj.impl.CloudHttp2SolrClient;
+import org.apache.solr.client.solrj.impl.NodeValueFetcher;
 import org.apache.solr.client.solrj.request.CoreAdminRequest;
 import org.apache.solr.client.solrj.request.MetricsRequest;
+import org.apache.solr.client.solrj.response.InputStreamResponseParser;
 import org.apache.solr.cloud.DistributedClusterStateUpdater;
 import org.apache.solr.cloud.Overseer;
 import 
org.apache.solr.cloud.api.collections.CollectionHandlingUtils.ShardRequestTracker;
@@ -858,42 +862,68 @@ public class SplitShardCmd implements 
CollApiCmds.CollectionApiCommand {
       SolrIndexSplitter.SplitMethod method,
       SolrCloudManager cloudManager)
       throws Exception {
-    if (true) {
-      log.warn("checkDiskSpace disabled SOLR-17458 SOLR-17955");
-      return;
-    }
     // check that enough disk space is available on the parent leader node
     // otherwise the actual index splitting will always fail
 
-    String replicaName = Utils.parseMetricsReplicaName(collection, 
parentShardLeader.getCoreName());
-    String indexSizeMetricName =
-        "solr.core." + collection + "." + shard + "." + replicaName + 
":INDEX.sizeInBytes";
-    String freeDiskSpaceMetricName = "solr.node:CONTAINER.fs.usableSpace";
+    String indexSizeMetric = "solr_core_index_size_megabytes";
+    String freeDiskSpaceMetric = "solr_disk_space_megabytes";
+    String coreLabel =
+        collection
+            + "_"
+            + shard
+            + "_"
+            + Utils.parseMetricsReplicaName(collection, 
parentShardLeader.getCoreName());
 
     ModifiableSolrParams params =
-        new ModifiableSolrParams()
-            .add("key", indexSizeMetricName)
-            .add("key", freeDiskSpaceMetricName);
-    SolrResponse rsp = new 
MetricsRequest(params).process(cloudManager.getSolrClient());
-
-    Number size = (Number) rsp.getResponse()._get(List.of("metrics", 
indexSizeMetricName), null);
-    if (size == null) {
-      log.warn("cannot verify information for parent shard leader");
-      return;
+        new ModifiableSolrParams().add("name", indexSizeMetric).add("name", 
freeDiskSpaceMetric);
+
+    var req = new MetricsRequest(params);
+    req.setResponseParser(new InputStreamResponseParser("prometheus"));
+
+    var cloudClient = (CloudHttp2SolrClient) cloudManager.getSolrClient();
+    var httpClient = cloudClient.getHttpClient();
+
+    NamedList<Object> resp =
+        httpClient.requestWithBaseUrl(parentShardLeader.getBaseUrl(), req, 
null);
+
+    var indexSizeRef = new AtomicReference<Double>(-1.0);
+    var freeSizeRef = new AtomicReference<Double>(-1.0);
+    try (InputStream prometheusStream = (InputStream) resp.get(STREAM_KEY);
+        var lines = 
NodeValueFetcher.Metrics.prometheusMetricStream(prometheusStream)) {
+
+      lines
+          .filter(line -> !line.isBlank() && !line.startsWith("#"))
+          .forEach(
+              line -> {
+                if (line.contains(indexSizeMetric) && 
line.contains(coreLabel)) {
+                  
indexSizeRef.set(NodeValueFetcher.Metrics.extractPrometheusValue(line));
+                } else if (line.contains(freeDiskSpaceMetric) && 
line.contains("usable_space")) {
+                  
freeSizeRef.set(NodeValueFetcher.Metrics.extractPrometheusValue(line));
+                }
+              });
     }
-    double indexSize = size.doubleValue();
 
-    Number freeSize =
-        (Number) rsp.getResponse()._get(List.of("metrics", 
freeDiskSpaceMetricName), null);
-    if (freeSize == null) {
-      log.warn("missing node disk space information for parent shard leader");
-      return;
+    double indexSize = indexSizeRef.get();
+    double freeSize = freeSizeRef.get();
+
+    if (indexSize == -1.0) {
+      throw new SolrException(
+          SolrException.ErrorCode.SERVER_ERROR,
+          "cannot verify index size information for parent shard leader on 
node "
+              + parentShardLeader.getNodeName());
+    }
+
+    if (freeSize == -1.0) {
+      throw new SolrException(
+          SolrException.ErrorCode.SERVER_ERROR,
+          "missing node disk space information for parent shard leader on node 
"
+              + parentShardLeader.getNodeName());
     }
 
     // 100% more for REWRITE, 5% more for LINK
     double neededSpace =
         method == SolrIndexSplitter.SplitMethod.REWRITE ? 2.0 * indexSize : 
1.05 * indexSize;
-    if (freeSize.doubleValue() < neededSpace) {
+    if (freeSize < neededSpace) {
       throw new SolrException(
           SolrException.ErrorCode.SERVER_ERROR,
           "not enough free disk space to perform index split on node "

Reply via email to