Hello all,
I'm trying to upgrade a working cluster from Openshift Origin 3.9
to OKD 3.10 and the control plane update fails at one point with
host not found.
I've looked abit over the problem and found this issue on github:
https://github.com/openshift/openshift-ansible/issues/9935 where
michaelgugino points out that "when upgrading from 3.9, your
hostnames match the node names in 'oc get nodes' otherwise, we
won't be able to find the CSRs for your nodes."
In fact my issue is precisely this: the node names are in fact
their IPs and not the hostnames of the specific machines. It was
something that I saw upon installation, but as the 3.9 cluster
was functioning all right, I let it be.
The idea is that I (think) I have the DNS resolution set up
properly, with all machines being able to resolve each-other by
FQDNs, however the 3.9 installer configured the node names with
their respective IP addresses and I don't know how to address this.
I should mention that the cluster is deployed inside an Openstack
project, but the install config doesn't use OpenShift-Openstack
configuration. However when running the
~/openshift-ansible/playbooks/byo/openshift_facts.yml I get
references to the underlying openstack(somehow the installer
"figures out" the undelying Openstack and treats it as a
provider, the way I see it). I've pasted the output for one of
the nodes below.
Has any of you come across this node name config problem and were
you able to solve it?
Is there any procedure to change node names of a working cluster?
I should say that the masters are also nodes(infrasructure), so
I'm guessing the procedure, if there is one, would have to do
with deprecating one master at a time, while for the nodes with a
delete/change config/re-add procedure.
Thank you!
Output from openshift_facts playbook:
ok: [node1.oshift-pinfold.intra] => {
"result": {
"ansible_facts": {
"openshift": {
"common": {
"all_hostnames": [
"node1.oshift-pinfold.intra",
"192.168.150.22"
],
"config_base": "/etc/origin",
"deployment_subtype": "basic",
"deployment_type": "origin",
"dns_domain": "cluster.local",
"examples_content_version": "v3.9",
"generate_no_proxy_hosts": true,
"hostname": "192.168.150.22",
"internal_hostnames": [
"192.168.150.22"
],
"ip": "192.168.150.22",
"kube_svc_ip": "172.30.0.1",
"portal_net": "172.30.0.0/16
<http://172.30.0.0/16>",
"public_hostname": "node1.oshift-pinfold.intra",
"public_ip": "192.168.150.22",
"short_version": "3.9",
"version": "3.9.0",
"version_gte_3_10": false,
"version_gte_3_6": true,
"version_gte_3_7": true,
"version_gte_3_8": true,
"version_gte_3_9": true
},
"current_config": {
"roles": [
"node"
]
},
"node": {
"bootstrapped": false,
"nodename": "192.168.150.22",
"sdn_mtu": "1408"
},
"provider": {
"metadata": {
"availability_zone": "nova",
"ec2_compat": {
"ami-id": "None",
"ami-launch-index": "0",
"ami-manifest-path": "FIXME",
"block-device-mapping": {
"ami": "vda",
"ebs0": "/dev/vda",
"ebs1": "/dev/vdb",
"root": "/dev/vda"
},
"hostname": "node1.novalocal",
"instance-action": "none",
"instance-id": "i-00000583",
"instance-type": "1cpu-2ram-20disk",
"local-hostname": "node1.novalocal",
"local-ipv4": "192.168.150.22",
"placement": {
"availability-zone": "nova"
},
"public-hostname": "node1.novalocal",
"public-ipv4": [],
"public-keys/": "0=xxxxxxxxxxx",
"reservation-id": "r-la13azpq",
"security-groups": [
"DefaultInternal",
"oshift-node"
]
},
"hostname": "node1.novalocal",
"keys": [
{
"data": "ssh-rsa
AAAA...........................................................
Generated-by-Nova",
"name": "xxxxxxxxxx",
"type": "ssh"
}
],
"launch_index": 0,
"name": "node1",
"project_id":
"2a2.........................",
"uuid":
"80bb................................."
},
"name": "openstack",
"network": {
"hostname": "192.168.150.22",
"interfaces": [],
"ip": "192.168.150.22",
"ipv6_enabled": false,
"public_hostname": [],
"public_ip": []
},
"zone": "nova"
}
}
},
"changed": false,
"failed": false
}
}