Hello all,
I'm trying to upgrade a working cluster from Openshift Origin 3.9 to OKD
3.10 and the control plane update fails at one point with host not found.
I've looked abit over the problem and found this issue on github:
https://github.com/openshift/openshift-ansible/issues/9935 where michaelgugino
points out that "when upgrading from
3.9, your hostnames match the node names in 'oc get nodes' otherwise, we won't
be able to find the CSRs for your nodes."
In fact my issue is precisely this: the node names are in fact their IPs
and not the hostnames of the specific machines. It was something that I saw
upon installation, but as the 3.9
cluster was functioning all right, I let it be.
The idea is that I (think) I have the DNS resolution set up properly, with
all machines being able to resolve each-other by FQDNs, however the 3.9
installer configured the node names
with their respective IP addresses and I don't know how to address this.
I should mention that the cluster is deployed inside an Openstack project,
but the install config doesn't use OpenShift-Openstack configuration. However
when running the
~/openshift-ansible/playbooks/byo/openshift_facts.yml I get references to the
underlying openstack(somehow the installer "figures out" the undelying
Openstack and treats it as a
provider, the way I see it). I've pasted the output for one of the nodes
below.
Has any of you come across this node name config problem and were you able
to solve it?
Is there any procedure to change node names of a working cluster? I should
say that the masters are also nodes(infrasructure), so I'm guessing the
procedure, if there is one, would
have to do with deprecating one master at a time, while for the nodes with
a delete/change config/re-add procedure.
Thank you!
Output from openshift_facts playbook:
ok: [node1.oshift-pinfold.intra] => {
"result": {
"ansible_facts": {
"openshift": {
"common": {
"all_hostnames": [
"node1.oshift-pinfold.intra",
"192.168.150.22"
],
"config_base": "/etc/origin",
"deployment_subtype": "basic",
"deployment_type": "origin",
"dns_domain": "cluster.local",
"examples_content_version": "v3.9",
"generate_no_proxy_hosts": true,
"hostname": "192.168.150.22",
"internal_hostnames": [
"192.168.150.22"
],
"ip": "192.168.150.22",
"kube_svc_ip": "172.30.0.1",
"portal_net": "172.30.0.0/16 <http://172.30.0.0/16>",
"public_hostname": "node1.oshift-pinfold.intra",
"public_ip": "192.168.150.22",
"short_version": "3.9",
"version": "3.9.0",
"version_gte_3_10": false,
"version_gte_3_6": true,
"version_gte_3_7": true,
"version_gte_3_8": true,
"version_gte_3_9": true
},
"current_config": {
"roles": [
"node"
]
},
"node": {
"bootstrapped": false,
"nodename": "192.168.150.22",
"sdn_mtu": "1408"
},
"provider": {
"metadata": {
"availability_zone": "nova",
"ec2_compat": {
"ami-id": "None",
"ami-launch-index": "0",
"ami-manifest-path": "FIXME",
"block-device-mapping": {
"ami": "vda",
"ebs0": "/dev/vda",
"ebs1": "/dev/vdb",
"root": "/dev/vda"
},
"hostname": "node1.novalocal",
"instance-action": "none",
"instance-id": "i-00000583",
"instance-type": "1cpu-2ram-20disk",
"local-hostname": "node1.novalocal",
"local-ipv4": "192.168.150.22",
"placement": {
"availability-zone": "nova"
},
"public-hostname": "node1.novalocal",
"public-ipv4": [],
"public-keys/": "0=xxxxxxxxxxx",
"reservation-id": "r-la13azpq",
"security-groups": [
"DefaultInternal",
"oshift-node"
]
},
"hostname": "node1.novalocal",
"keys": [
{
"data": "ssh-rsa
AAAA........................................................... Generated-by-Nova",
"name": "xxxxxxxxxx",
"type": "ssh"
}
],
"launch_index": 0,
"name": "node1",
"project_id": "2a2.........................",
"uuid": "80bb................................."
},
"name": "openstack",
"network": {
"hostname": "192.168.150.22",
"interfaces": [],
"ip": "192.168.150.22",
"ipv6_enabled": false,
"public_hostname": [],
"public_ip": []
},
"zone": "nova"
}
}
},
"changed": false,
"failed": false
}
}