tombentley commented on a change in pull request #10530: URL: https://github.com/apache/kafka/pull/10530#discussion_r618372208
########## File path: connect/runtime/src/main/java/org/apache/kafka/connect/runtime/rest/RestServer.java ########## @@ -357,21 +358,53 @@ public URI advertisedUrl() { ServerConnector serverConnector = findConnector(advertisedSecurityProtocol); builder.scheme(advertisedSecurityProtocol); - String advertisedHostname = config.getString(WorkerConfig.REST_ADVERTISED_HOST_NAME_CONFIG); - if (advertisedHostname != null && !advertisedHostname.isEmpty()) - builder.host(advertisedHostname); - else if (serverConnector != null && serverConnector.getHost() != null && serverConnector.getHost().length() > 0) - builder.host(serverConnector.getHost()); + String hostNameOverride = hostNameOverride(serverConnector); + if (hostNameOverride != null) { + builder.host(hostNameOverride); + } Integer advertisedPort = config.getInt(WorkerConfig.REST_ADVERTISED_PORT_CONFIG); if (advertisedPort != null) builder.port(advertisedPort); else if (serverConnector != null && serverConnector.getPort() > 0) builder.port(serverConnector.getPort()); - log.info("Advertised URI: {}", builder.build()); + URI uri = builder.build(); + maybeThrowInvalidHostNameException(uri, hostNameOverride); + log.info("Advertised URI: {}", uri); - return builder.build(); + return uri; + } + + private String hostNameOverride(ServerConnector serverConnector) { + String advertisedHostname = config.getString(WorkerConfig.REST_ADVERTISED_HOST_NAME_CONFIG); + if (advertisedHostname != null && !advertisedHostname.isEmpty()) + return advertisedHostname; + else if (serverConnector != null && serverConnector.getHost() != null && serverConnector.getHost().length() > 0) + return serverConnector.getHost(); + return null; + } + + /** + * Parses the uri and throws a more definitive error + * when the internal node to node communication can't happen due to an invalid host name. + */ + static void maybeThrowInvalidHostNameException(URI uri, String hostNameOverride) { + //java URI parsing will fail silently returning null in the host if the host name contains invalid characters like _ + //this bubbles up later when the Herder tries to communicate on the advertised url and the current HttpClient fails with an ambiguous message + if (uri.getHost() == null) { + String errorMsg = "Could not parse host from advertised URL: '" + uri.toString() + "'"; + if (hostNameOverride != null) { + //validate hostname using IDN class to see if it can bubble up the real cause and we can show the user a more detailed exception + try { + IDN.toASCII(hostNameOverride, IDN.USE_STD3_ASCII_RULES); + } catch (IllegalArgumentException e) { + errorMsg += ", as it doesn't conform to RFC 1123 specification: " + e.getMessage(); Review comment: If we're going to mention the RFC let's mention section 2.1 specifically. We're referring the user to a nearly 100 page document and it's only a tiny part of it which is relevant here. ########## File path: connect/runtime/src/main/java/org/apache/kafka/connect/runtime/rest/RestServer.java ########## @@ -357,21 +358,53 @@ public URI advertisedUrl() { ServerConnector serverConnector = findConnector(advertisedSecurityProtocol); builder.scheme(advertisedSecurityProtocol); - String advertisedHostname = config.getString(WorkerConfig.REST_ADVERTISED_HOST_NAME_CONFIG); - if (advertisedHostname != null && !advertisedHostname.isEmpty()) - builder.host(advertisedHostname); - else if (serverConnector != null && serverConnector.getHost() != null && serverConnector.getHost().length() > 0) - builder.host(serverConnector.getHost()); + String hostNameOverride = hostNameOverride(serverConnector); + if (hostNameOverride != null) { + builder.host(hostNameOverride); + } Integer advertisedPort = config.getInt(WorkerConfig.REST_ADVERTISED_PORT_CONFIG); if (advertisedPort != null) builder.port(advertisedPort); else if (serverConnector != null && serverConnector.getPort() > 0) builder.port(serverConnector.getPort()); - log.info("Advertised URI: {}", builder.build()); + URI uri = builder.build(); + maybeThrowInvalidHostNameException(uri, hostNameOverride); + log.info("Advertised URI: {}", uri); - return builder.build(); + return uri; + } + + private String hostNameOverride(ServerConnector serverConnector) { + String advertisedHostname = config.getString(WorkerConfig.REST_ADVERTISED_HOST_NAME_CONFIG); + if (advertisedHostname != null && !advertisedHostname.isEmpty()) + return advertisedHostname; + else if (serverConnector != null && serverConnector.getHost() != null && serverConnector.getHost().length() > 0) + return serverConnector.getHost(); + return null; + } + + /** + * Parses the uri and throws a more definitive error + * when the internal node to node communication can't happen due to an invalid host name. + */ + static void maybeThrowInvalidHostNameException(URI uri, String hostNameOverride) { + //java URI parsing will fail silently returning null in the host if the host name contains invalid characters like _ + //this bubbles up later when the Herder tries to communicate on the advertised url and the current HttpClient fails with an ambiguous message Review comment: It's not that URI parsing _fails_, it actually succeeds, but it doesn't result in a server-based authority (it's a registry-based authority instead, precisely because what would be the hostname part cannot be a hostname because of the illegal syntax) and so there really is no host. Can we amend the comment to something like this: ```suggestion // When the hostname contains illegal characters (e.g. _) the URI authority will be registry-based, lacking a host // so detect it here to prevent a confusing error later when the Herder tries to communicate on the advertised url ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org