tombentley commented on a change in pull request #10530:
URL: https://github.com/apache/kafka/pull/10530#discussion_r618372208



##########
File path: 
connect/runtime/src/main/java/org/apache/kafka/connect/runtime/rest/RestServer.java
##########
@@ -357,21 +358,53 @@ public URI advertisedUrl() {
         ServerConnector serverConnector = 
findConnector(advertisedSecurityProtocol);
         builder.scheme(advertisedSecurityProtocol);
 
-        String advertisedHostname = 
config.getString(WorkerConfig.REST_ADVERTISED_HOST_NAME_CONFIG);
-        if (advertisedHostname != null && !advertisedHostname.isEmpty())
-            builder.host(advertisedHostname);
-        else if (serverConnector != null && serverConnector.getHost() != null 
&& serverConnector.getHost().length() > 0)
-            builder.host(serverConnector.getHost());
+        String hostNameOverride = hostNameOverride(serverConnector);
+        if (hostNameOverride != null) {
+            builder.host(hostNameOverride);
+        }
 
         Integer advertisedPort = 
config.getInt(WorkerConfig.REST_ADVERTISED_PORT_CONFIG);
         if (advertisedPort != null)
             builder.port(advertisedPort);
         else if (serverConnector != null && serverConnector.getPort() > 0)
             builder.port(serverConnector.getPort());
 
-        log.info("Advertised URI: {}", builder.build());
+        URI uri = builder.build();
+        maybeThrowInvalidHostNameException(uri, hostNameOverride);
+        log.info("Advertised URI: {}", uri);
 
-        return builder.build();
+        return uri;
+    }
+
+    private String hostNameOverride(ServerConnector serverConnector) {
+        String advertisedHostname = 
config.getString(WorkerConfig.REST_ADVERTISED_HOST_NAME_CONFIG);
+        if (advertisedHostname != null && !advertisedHostname.isEmpty())
+            return advertisedHostname;
+        else if (serverConnector != null && serverConnector.getHost() != null 
&& serverConnector.getHost().length() > 0)
+            return serverConnector.getHost();
+        return null;
+    }
+
+    /**
+     * Parses the uri and throws a more definitive error
+     * when the internal node to node communication can't happen due to an 
invalid host name.
+     */
+    static void maybeThrowInvalidHostNameException(URI uri, String 
hostNameOverride) {
+        //java URI parsing will fail silently returning null in the host if 
the host name contains invalid characters like _
+        //this bubbles up later when the Herder tries to communicate on the 
advertised url and the current HttpClient fails with an ambiguous message
+        if (uri.getHost() == null) {
+            String errorMsg = "Could not parse host from advertised URL: '"  + 
uri.toString() + "'";
+            if (hostNameOverride != null) {
+                //validate hostname using IDN class to see if it can bubble up 
the real cause and we can show the user a more detailed exception
+                try {
+                    IDN.toASCII(hostNameOverride, IDN.USE_STD3_ASCII_RULES);
+                } catch (IllegalArgumentException e) {
+                    errorMsg += ", as it doesn't conform to RFC 1123 
specification: " + e.getMessage();

Review comment:
       If we're going to mention the RFC let's mention section 2.1 
specifically. We're referring the user to a nearly 100 page document and it's 
only a tiny part of it which is relevant here.

##########
File path: 
connect/runtime/src/main/java/org/apache/kafka/connect/runtime/rest/RestServer.java
##########
@@ -357,21 +358,53 @@ public URI advertisedUrl() {
         ServerConnector serverConnector = 
findConnector(advertisedSecurityProtocol);
         builder.scheme(advertisedSecurityProtocol);
 
-        String advertisedHostname = 
config.getString(WorkerConfig.REST_ADVERTISED_HOST_NAME_CONFIG);
-        if (advertisedHostname != null && !advertisedHostname.isEmpty())
-            builder.host(advertisedHostname);
-        else if (serverConnector != null && serverConnector.getHost() != null 
&& serverConnector.getHost().length() > 0)
-            builder.host(serverConnector.getHost());
+        String hostNameOverride = hostNameOverride(serverConnector);
+        if (hostNameOverride != null) {
+            builder.host(hostNameOverride);
+        }
 
         Integer advertisedPort = 
config.getInt(WorkerConfig.REST_ADVERTISED_PORT_CONFIG);
         if (advertisedPort != null)
             builder.port(advertisedPort);
         else if (serverConnector != null && serverConnector.getPort() > 0)
             builder.port(serverConnector.getPort());
 
-        log.info("Advertised URI: {}", builder.build());
+        URI uri = builder.build();
+        maybeThrowInvalidHostNameException(uri, hostNameOverride);
+        log.info("Advertised URI: {}", uri);
 
-        return builder.build();
+        return uri;
+    }
+
+    private String hostNameOverride(ServerConnector serverConnector) {
+        String advertisedHostname = 
config.getString(WorkerConfig.REST_ADVERTISED_HOST_NAME_CONFIG);
+        if (advertisedHostname != null && !advertisedHostname.isEmpty())
+            return advertisedHostname;
+        else if (serverConnector != null && serverConnector.getHost() != null 
&& serverConnector.getHost().length() > 0)
+            return serverConnector.getHost();
+        return null;
+    }
+
+    /**
+     * Parses the uri and throws a more definitive error
+     * when the internal node to node communication can't happen due to an 
invalid host name.
+     */
+    static void maybeThrowInvalidHostNameException(URI uri, String 
hostNameOverride) {
+        //java URI parsing will fail silently returning null in the host if 
the host name contains invalid characters like _
+        //this bubbles up later when the Herder tries to communicate on the 
advertised url and the current HttpClient fails with an ambiguous message

Review comment:
       It's not that URI parsing _fails_, it actually succeeds, but it doesn't 
result in a server-based authority (it's a registry-based authority instead, 
precisely because what would be the hostname part cannot be a hostname because 
of the illegal syntax) and so there really is no host. Can we amend the comment 
to something like this:
   
   ```suggestion
           // When the hostname contains illegal characters (e.g. _) the URI 
authority will be registry-based, lacking a host
           // so detect it here to prevent a confusing error later when the 
Herder tries to communicate on the advertised url
   ```




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to