murblanc commented on code in PR #1691:
URL: https://github.com/apache/solr/pull/1691#discussion_r1250853774


##########
solr/core/src/java/org/apache/solr/search/join/ScoreJoinQParserPlugin.java:
##########
@@ -355,43 +375,163 @@ private static String resolveAlias(String fromIndex, 
ZkController zkController)
     }
   }
 
-  private static String findLocalReplicaForFromIndex(ZkController 
zkController, String fromIndex) {
-    String fromReplica = null;
-
-    String nodeName = zkController.getNodeName();
-    for (Slice slice :
-        
zkController.getClusterState().getCollection(fromIndex).getActiveSlicesArr()) {
-      if (fromReplica != null)
-        throw new SolrException(
-            SolrException.ErrorCode.BAD_REQUEST,
-            "SolrCloud join: To join with a sharded collection, use 
method=crossCollection.");
-
-      for (Replica replica : slice.getReplicas()) {
-        if (replica.getNodeName().equals(nodeName)) {
-          fromReplica = replica.getStr(ZkStateReader.CORE_NAME_PROP);
-          // found local replica, but is it Active?
-          if (replica.getState() != Replica.State.ACTIVE)
-            throw new SolrException(
-                SolrException.ErrorCode.BAD_REQUEST,
-                "SolrCloud join: "
-                    + fromIndex
-                    + " has a local replica ("
-                    + fromReplica
-                    + ") on "
-                    + nodeName
-                    + ", but it is "
-                    + replica.getState());
+  private static String findLocalReplicaForFromIndex(
+      ZkController zkController,
+      String fromIndex,
+      SolrCore toCore,
+      String toField,
+      String fromField,
+      SolrParams localParams) {
+    final DocCollection fromCollection = 
zkController.getClusterState().getCollection(fromIndex);
+    final String nodeName = zkController.getNodeName();
+    final String hitTheRoad =
+        "SolrCloud join: To join with a collection that might not be 
co-located, use method=crossCollection.";
+    if (fromCollection.getSlices().size() == 1) {
+      String fromReplica = null;
+
+      for (Slice slice : fromCollection.getActiveSlicesArr()) {
+        if (fromReplica != null)

Review Comment:
   This test is always `false` (i.e. `fromReplica` is always `null`).
   
   `fromReplica` was set to `null` line 390 above, and the whole block is 
guarded by the `if` at line 389 which guarantees that `fromCollection` has a 
single slice, therefore the block of the `for` loop line 392 executes exactly 
once, the test line 393 will be executed only once when `fromReplica` is null.



##########
solr/core/src/java/org/apache/solr/search/join/ScoreJoinQParserPlugin.java:
##########
@@ -355,43 +375,163 @@ private static String resolveAlias(String fromIndex, 
ZkController zkController)
     }
   }
 
-  private static String findLocalReplicaForFromIndex(ZkController 
zkController, String fromIndex) {
-    String fromReplica = null;
-
-    String nodeName = zkController.getNodeName();
-    for (Slice slice :
-        
zkController.getClusterState().getCollection(fromIndex).getActiveSlicesArr()) {
-      if (fromReplica != null)
-        throw new SolrException(
-            SolrException.ErrorCode.BAD_REQUEST,
-            "SolrCloud join: To join with a sharded collection, use 
method=crossCollection.");
-
-      for (Replica replica : slice.getReplicas()) {
-        if (replica.getNodeName().equals(nodeName)) {
-          fromReplica = replica.getStr(ZkStateReader.CORE_NAME_PROP);
-          // found local replica, but is it Active?
-          if (replica.getState() != Replica.State.ACTIVE)
-            throw new SolrException(
-                SolrException.ErrorCode.BAD_REQUEST,
-                "SolrCloud join: "
-                    + fromIndex
-                    + " has a local replica ("
-                    + fromReplica
-                    + ") on "
-                    + nodeName
-                    + ", but it is "
-                    + replica.getState());
+  private static String findLocalReplicaForFromIndex(
+      ZkController zkController,
+      String fromIndex,
+      SolrCore toCore,
+      String toField,
+      String fromField,
+      SolrParams localParams) {
+    final DocCollection fromCollection = 
zkController.getClusterState().getCollection(fromIndex);
+    final String nodeName = zkController.getNodeName();
+    final String hitTheRoad =
+        "SolrCloud join: To join with a collection that might not be 
co-located, use method=crossCollection.";
+    if (fromCollection.getSlices().size() == 1) {
+      String fromReplica = null;
+
+      for (Slice slice : fromCollection.getActiveSlicesArr()) {
+        if (fromReplica != null)
+          throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, 
hitTheRoad);
+
+        for (Replica replica : slice.getReplicas()) {
+          if (replica.getNodeName().equals(nodeName)) {
+            fromReplica = replica.getStr(ZkStateReader.CORE_NAME_PROP);
+            // found local replica, but is it Active?
+            if (replica.getState() != Replica.State.ACTIVE)
+              throw new SolrException(
+                  SolrException.ErrorCode.BAD_REQUEST,
+                  "SolrCloud join: "
+                      + fromIndex
+                      + " has a local replica ("
+                      + fromReplica
+                      + ") on "
+                      + nodeName
+                      + ", but it is "
+                      + replica.getState());
+
+            break;
+          }
+        }
+      }
 
+      if (fromReplica == null)
+        throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, 
hitTheRoad);
+
+      return fromReplica;
+    } else { // sharded from
+      final CloudDescriptor toCoreDescriptor = 
toCore.getCoreDescriptor().getCloudDescriptor();
+      final String toShardId = toCoreDescriptor.getShardId();
+      final DocCollection toCollection =
+          
zkController.getClusterState().getCollection(toCoreDescriptor.getCollectionName());
+
+      String routerName = checkRouters(toCollection, fromCollection, 
hitTheRoad);
+      boolean checkField = false;
+      switch (routerName) {
+        case PlainIdRouter.NAME: // mandatory field check
+          checkField = true;
+          checkRouterField(toCore, toCollection, toField, hitTheRoad);
+          break;
+        case CompositeIdRouter.NAME: // let you shoot your legs
+          if (localParams.getBool(CHECK_ROUTER_FIELD, true)) {
+            checkField = true;
+            checkRouterField(toCore, toCollection, toField, hitTheRoad);
+          }
           break;
+        case ImplicitDocRouter.NAME: // don't check field, you know what you do
+        default:
+          break;
+      }
+      // if router field is not set, "to" may fallback to uniqueKey
+      if (localParams.getBool(CHECK_ROUTER_FIELD, true)) {
+        checkRouterField(toCore, toCollection, toField, hitTheRoad);
+      }
+      checkShardNumber(toCollection, fromCollection, hitTheRoad);
+      final Slice fromShardReplicas = 
fromCollection.getActiveSlicesMap().get(toShardId);
+      for (Replica collocatedFrom :
+          fromShardReplicas.getReplicas(r -> 
r.getNodeName().equals(nodeName))) {
+        if (log.isDebugEnabled()) {
+          log.debug("<-{} @ {}", collocatedFrom.getCoreName(), 
toCoreDescriptor.getCoreNodeName());
+        }
+        // which replica to pick if there are many one?
+        // if router field is not set, "from" may fallback to uniqueKey, but 
only we attempt to pick
+        // local shard.
+        if (checkField) {
+          try (final SolrCore fromCore =
+              toCore.getCoreContainer().getCore(collocatedFrom.getCoreName())) 
{
+            checkRouterField(fromCore, fromCollection, fromField, hitTheRoad);
+          }
         }
+        return collocatedFrom.getCoreName();
       }
+      final String shards =
+          fromShardReplicas.getReplicas().stream()
+              .map(r -> r.getShard() + ":" + r.getCoreName() + "@" + 
r.getNodeName())
+              .collect(Collectors.joining(","));
+      throw new SolrException(
+          SolrException.ErrorCode.BAD_REQUEST,
+          "Unable to find collocated \"from\" replica: "
+              + shards
+              + " at node: "
+              + nodeName
+              + " for "
+              + toShardId
+              + ". "
+              + hitTheRoad);
+    }
+  }
+
+  private static void checkShardNumber(

Review Comment:
   Suggestion: use `count` instead of `number` in method name



##########
solr/core/src/java/org/apache/solr/search/join/ScoreJoinQParserPlugin.java:
##########
@@ -299,29 +308,40 @@ private Query createQuery(
   }
 
   /**
-   * Returns an String with the name of a core.
+   * Returns a String with the name of a core.
    *
    * <p>This method searches the core with fromIndex name in the core's 
container. If fromIndex
-   * isn't name of collection or alias it's returns fromIndex without changes. 
If fromIndex is name
-   * of alias but if the alias points to multiple collections it's throw
+   * isn't name of collection or alias it returns fromIndex without changes. 
If fromIndex is name of
+   * alias but if the alias points to multiple collections it's throw

Review Comment:
   Additional typos to fix:
   
   > If fromIndex isn't **the** name of **a** collection or alias it returns 
fromIndex without changes. If fromIndex is **the** name of **an** alias but  
**̶i̶f̶** the alias points to multiple collections **it throws** 
SolrException...



##########
solr/core/src/java/org/apache/solr/search/join/ScoreJoinQParserPlugin.java:
##########
@@ -355,43 +375,163 @@ private static String resolveAlias(String fromIndex, 
ZkController zkController)
     }
   }
 
-  private static String findLocalReplicaForFromIndex(ZkController 
zkController, String fromIndex) {
-    String fromReplica = null;
-
-    String nodeName = zkController.getNodeName();
-    for (Slice slice :
-        
zkController.getClusterState().getCollection(fromIndex).getActiveSlicesArr()) {
-      if (fromReplica != null)
-        throw new SolrException(
-            SolrException.ErrorCode.BAD_REQUEST,
-            "SolrCloud join: To join with a sharded collection, use 
method=crossCollection.");
-
-      for (Replica replica : slice.getReplicas()) {
-        if (replica.getNodeName().equals(nodeName)) {
-          fromReplica = replica.getStr(ZkStateReader.CORE_NAME_PROP);
-          // found local replica, but is it Active?
-          if (replica.getState() != Replica.State.ACTIVE)
-            throw new SolrException(
-                SolrException.ErrorCode.BAD_REQUEST,
-                "SolrCloud join: "
-                    + fromIndex
-                    + " has a local replica ("
-                    + fromReplica
-                    + ") on "
-                    + nodeName
-                    + ", but it is "
-                    + replica.getState());
+  private static String findLocalReplicaForFromIndex(
+      ZkController zkController,
+      String fromIndex,
+      SolrCore toCore,
+      String toField,
+      String fromField,
+      SolrParams localParams) {
+    final DocCollection fromCollection = 
zkController.getClusterState().getCollection(fromIndex);
+    final String nodeName = zkController.getNodeName();
+    final String hitTheRoad =
+        "SolrCloud join: To join with a collection that might not be 
co-located, use method=crossCollection.";
+    if (fromCollection.getSlices().size() == 1) {
+      String fromReplica = null;
+
+      for (Slice slice : fromCollection.getActiveSlicesArr()) {
+        if (fromReplica != null)
+          throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, 
hitTheRoad);
+
+        for (Replica replica : slice.getReplicas()) {
+          if (replica.getNodeName().equals(nodeName)) {
+            fromReplica = replica.getStr(ZkStateReader.CORE_NAME_PROP);
+            // found local replica, but is it Active?
+            if (replica.getState() != Replica.State.ACTIVE)
+              throw new SolrException(
+                  SolrException.ErrorCode.BAD_REQUEST,
+                  "SolrCloud join: "
+                      + fromIndex
+                      + " has a local replica ("
+                      + fromReplica
+                      + ") on "
+                      + nodeName
+                      + ", but it is "
+                      + replica.getState());
+
+            break;
+          }
+        }
+      }
 
+      if (fromReplica == null)
+        throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, 
hitTheRoad);
+
+      return fromReplica;
+    } else { // sharded from
+      final CloudDescriptor toCoreDescriptor = 
toCore.getCoreDescriptor().getCloudDescriptor();
+      final String toShardId = toCoreDescriptor.getShardId();
+      final DocCollection toCollection =
+          
zkController.getClusterState().getCollection(toCoreDescriptor.getCollectionName());
+
+      String routerName = checkRouters(toCollection, fromCollection, 
hitTheRoad);
+      boolean checkField = false;
+      switch (routerName) {
+        case PlainIdRouter.NAME: // mandatory field check
+          checkField = true;
+          checkRouterField(toCore, toCollection, toField, hitTheRoad);
+          break;
+        case CompositeIdRouter.NAME: // let you shoot your legs
+          if (localParams.getBool(CHECK_ROUTER_FIELD, true)) {
+            checkField = true;
+            checkRouterField(toCore, toCollection, toField, hitTheRoad);
+          }
           break;
+        case ImplicitDocRouter.NAME: // don't check field, you know what you do
+        default:
+          break;
+      }
+      // if router field is not set, "to" may fallback to uniqueKey
+      if (localParams.getBool(CHECK_ROUTER_FIELD, true)) {

Review Comment:
   This would be running twice in case of `PlainIdRouter.NAME` and 
`CompositeIdRouter.NAME` as the calls were already made in the `switch` block.



##########
solr/core/src/java/org/apache/solr/search/join/ScoreJoinQParserPlugin.java:
##########
@@ -355,43 +375,163 @@ private static String resolveAlias(String fromIndex, 
ZkController zkController)
     }
   }
 
-  private static String findLocalReplicaForFromIndex(ZkController 
zkController, String fromIndex) {
-    String fromReplica = null;
-
-    String nodeName = zkController.getNodeName();
-    for (Slice slice :
-        
zkController.getClusterState().getCollection(fromIndex).getActiveSlicesArr()) {
-      if (fromReplica != null)
-        throw new SolrException(
-            SolrException.ErrorCode.BAD_REQUEST,
-            "SolrCloud join: To join with a sharded collection, use 
method=crossCollection.");
-
-      for (Replica replica : slice.getReplicas()) {
-        if (replica.getNodeName().equals(nodeName)) {
-          fromReplica = replica.getStr(ZkStateReader.CORE_NAME_PROP);
-          // found local replica, but is it Active?
-          if (replica.getState() != Replica.State.ACTIVE)
-            throw new SolrException(
-                SolrException.ErrorCode.BAD_REQUEST,
-                "SolrCloud join: "
-                    + fromIndex
-                    + " has a local replica ("
-                    + fromReplica
-                    + ") on "
-                    + nodeName
-                    + ", but it is "
-                    + replica.getState());
+  private static String findLocalReplicaForFromIndex(
+      ZkController zkController,
+      String fromIndex,
+      SolrCore toCore,
+      String toField,
+      String fromField,
+      SolrParams localParams) {
+    final DocCollection fromCollection = 
zkController.getClusterState().getCollection(fromIndex);
+    final String nodeName = zkController.getNodeName();
+    final String hitTheRoad =
+        "SolrCloud join: To join with a collection that might not be 
co-located, use method=crossCollection.";
+    if (fromCollection.getSlices().size() == 1) {
+      String fromReplica = null;
+
+      for (Slice slice : fromCollection.getActiveSlicesArr()) {
+        if (fromReplica != null)
+          throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, 
hitTheRoad);
+
+        for (Replica replica : slice.getReplicas()) {
+          if (replica.getNodeName().equals(nodeName)) {
+            fromReplica = replica.getStr(ZkStateReader.CORE_NAME_PROP);
+            // found local replica, but is it Active?
+            if (replica.getState() != Replica.State.ACTIVE)
+              throw new SolrException(
+                  SolrException.ErrorCode.BAD_REQUEST,
+                  "SolrCloud join: "
+                      + fromIndex
+                      + " has a local replica ("
+                      + fromReplica
+                      + ") on "
+                      + nodeName
+                      + ", but it is "
+                      + replica.getState());
+
+            break;
+          }
+        }
+      }
 
+      if (fromReplica == null)
+        throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, 
hitTheRoad);
+
+      return fromReplica;
+    } else { // sharded from
+      final CloudDescriptor toCoreDescriptor = 
toCore.getCoreDescriptor().getCloudDescriptor();
+      final String toShardId = toCoreDescriptor.getShardId();
+      final DocCollection toCollection =
+          
zkController.getClusterState().getCollection(toCoreDescriptor.getCollectionName());
+
+      String routerName = checkRouters(toCollection, fromCollection, 
hitTheRoad);
+      boolean checkField = false;
+      switch (routerName) {
+        case PlainIdRouter.NAME: // mandatory field check
+          checkField = true;
+          checkRouterField(toCore, toCollection, toField, hitTheRoad);
+          break;
+        case CompositeIdRouter.NAME: // let you shoot your legs
+          if (localParams.getBool(CHECK_ROUTER_FIELD, true)) {
+            checkField = true;
+            checkRouterField(toCore, toCollection, toField, hitTheRoad);
+          }
           break;
+        case ImplicitDocRouter.NAME: // don't check field, you know what you do
+        default:
+          break;
+      }
+      // if router field is not set, "to" may fallback to uniqueKey
+      if (localParams.getBool(CHECK_ROUTER_FIELD, true)) {
+        checkRouterField(toCore, toCollection, toField, hitTheRoad);
+      }
+      checkShardNumber(toCollection, fromCollection, hitTheRoad);

Review Comment:
   Shards can split. How do we support one of the collections having a shard 
split here? Even if we (try to) force the other collection to split at the same 
time, it's likely not possible in practice.
   
   The feature does not really require a one to one matching of shards. 
Assuming the matching shards do have the same ranges, this is just one setup 
that allows doing a local join.
   
   What's really needed is to have "from" replicas covering the range of "to" 
replicas on every node where there are "to" replicas, regardless of the 
topology of the collections (sharded or not sharded, different number of shards 
for each collections).
   
   Verifying such a hash range coverage generalizes the existing join (in which 
the "from" collection must be single shard and have a replica on each nodes 
that has a "to" replica) and the more advanced join you're implementing here.
   
   Verifying by range will also support the use case of shard splits on a node, 
as the number of shards of each collection is not a criteria for allowing or 
disallowing the join, and a split does not change the total range of a 
collection present on a node.



##########
solr/core/src/java/org/apache/solr/search/join/ScoreJoinQParserPlugin.java:
##########
@@ -355,43 +375,163 @@ private static String resolveAlias(String fromIndex, 
ZkController zkController)
     }
   }
 
-  private static String findLocalReplicaForFromIndex(ZkController 
zkController, String fromIndex) {
-    String fromReplica = null;
-
-    String nodeName = zkController.getNodeName();
-    for (Slice slice :
-        
zkController.getClusterState().getCollection(fromIndex).getActiveSlicesArr()) {
-      if (fromReplica != null)
-        throw new SolrException(
-            SolrException.ErrorCode.BAD_REQUEST,
-            "SolrCloud join: To join with a sharded collection, use 
method=crossCollection.");
-
-      for (Replica replica : slice.getReplicas()) {
-        if (replica.getNodeName().equals(nodeName)) {
-          fromReplica = replica.getStr(ZkStateReader.CORE_NAME_PROP);
-          // found local replica, but is it Active?
-          if (replica.getState() != Replica.State.ACTIVE)
-            throw new SolrException(
-                SolrException.ErrorCode.BAD_REQUEST,
-                "SolrCloud join: "
-                    + fromIndex
-                    + " has a local replica ("
-                    + fromReplica
-                    + ") on "
-                    + nodeName
-                    + ", but it is "
-                    + replica.getState());
+  private static String findLocalReplicaForFromIndex(

Review Comment:
   This PR turns a 38 lines method into a 159 lines method which makes it hard 
to follow (and review).
   It should be refactored into multiple more readable methods.



##########
solr/core/src/java/org/apache/solr/search/join/ScoreJoinQParserPlugin.java:
##########
@@ -355,43 +375,163 @@ private static String resolveAlias(String fromIndex, 
ZkController zkController)
     }
   }
 
-  private static String findLocalReplicaForFromIndex(ZkController 
zkController, String fromIndex) {
-    String fromReplica = null;
-
-    String nodeName = zkController.getNodeName();
-    for (Slice slice :
-        
zkController.getClusterState().getCollection(fromIndex).getActiveSlicesArr()) {
-      if (fromReplica != null)
-        throw new SolrException(
-            SolrException.ErrorCode.BAD_REQUEST,
-            "SolrCloud join: To join with a sharded collection, use 
method=crossCollection.");
-
-      for (Replica replica : slice.getReplicas()) {
-        if (replica.getNodeName().equals(nodeName)) {
-          fromReplica = replica.getStr(ZkStateReader.CORE_NAME_PROP);
-          // found local replica, but is it Active?
-          if (replica.getState() != Replica.State.ACTIVE)
-            throw new SolrException(
-                SolrException.ErrorCode.BAD_REQUEST,
-                "SolrCloud join: "
-                    + fromIndex
-                    + " has a local replica ("
-                    + fromReplica
-                    + ") on "
-                    + nodeName
-                    + ", but it is "
-                    + replica.getState());
+  private static String findLocalReplicaForFromIndex(
+      ZkController zkController,
+      String fromIndex,
+      SolrCore toCore,
+      String toField,
+      String fromField,
+      SolrParams localParams) {
+    final DocCollection fromCollection = 
zkController.getClusterState().getCollection(fromIndex);
+    final String nodeName = zkController.getNodeName();
+    final String hitTheRoad =
+        "SolrCloud join: To join with a collection that might not be 
co-located, use method=crossCollection.";
+    if (fromCollection.getSlices().size() == 1) {
+      String fromReplica = null;
+
+      for (Slice slice : fromCollection.getActiveSlicesArr()) {
+        if (fromReplica != null)
+          throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, 
hitTheRoad);
+
+        for (Replica replica : slice.getReplicas()) {
+          if (replica.getNodeName().equals(nodeName)) {
+            fromReplica = replica.getStr(ZkStateReader.CORE_NAME_PROP);
+            // found local replica, but is it Active?
+            if (replica.getState() != Replica.State.ACTIVE)
+              throw new SolrException(
+                  SolrException.ErrorCode.BAD_REQUEST,
+                  "SolrCloud join: "
+                      + fromIndex
+                      + " has a local replica ("
+                      + fromReplica
+                      + ") on "
+                      + nodeName
+                      + ", but it is "
+                      + replica.getState());
+
+            break;
+          }
+        }
+      }
 
+      if (fromReplica == null)
+        throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, 
hitTheRoad);
+
+      return fromReplica;
+    } else { // sharded from
+      final CloudDescriptor toCoreDescriptor = 
toCore.getCoreDescriptor().getCloudDescriptor();
+      final String toShardId = toCoreDescriptor.getShardId();
+      final DocCollection toCollection =
+          
zkController.getClusterState().getCollection(toCoreDescriptor.getCollectionName());
+
+      String routerName = checkRouters(toCollection, fromCollection, 
hitTheRoad);
+      boolean checkField = false;
+      switch (routerName) {
+        case PlainIdRouter.NAME: // mandatory field check
+          checkField = true;
+          checkRouterField(toCore, toCollection, toField, hitTheRoad);
+          break;
+        case CompositeIdRouter.NAME: // let you shoot your legs
+          if (localParams.getBool(CHECK_ROUTER_FIELD, true)) {
+            checkField = true;
+            checkRouterField(toCore, toCollection, toField, hitTheRoad);
+          }
           break;
+        case ImplicitDocRouter.NAME: // don't check field, you know what you do
+        default:
+          break;
+      }
+      // if router field is not set, "to" may fallback to uniqueKey
+      if (localParams.getBool(CHECK_ROUTER_FIELD, true)) {
+        checkRouterField(toCore, toCollection, toField, hitTheRoad);
+      }
+      checkShardNumber(toCollection, fromCollection, hitTheRoad);
+      final Slice fromShardReplicas = 
fromCollection.getActiveSlicesMap().get(toShardId);

Review Comment:
   In addition to same filed and same router, having the same number shard on 
both collections is not sufficient, one must also verify the hash range for the 
from and to shards is equal, right?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@solr.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@solr.apache.org
For additional commands, e-mail: issues-h...@solr.apache.org

Reply via email to