On 11/04/2015 06:50 PM, Petr Vobornik wrote:
On 11/04/2015 01:30 PM, Martin Babinsky wrote:
On 10/30/2015 05:06 PM, Martin Babinsky wrote:
On 10/30/2015 03:38 PM, Petr Vobornik wrote:
On 10/30/2015 03:26 PM, Martin Babinsky wrote:
patch for https://fedorahosted.org/freeipa/ticket/5309
The ticket itself is about connectivity checks in topology suffixes,
but
there is a code (install/tools/ipa-replica-manage starting at line 788
after applying my patch) which monitors whether the segments pointing
to/from the deleted host are already deleted.
These checks are currently hardcoded for 'realm' prefix, should we
generalize them as well or is it a part of other effort?
Could be separate patch but yes.
Ok I have included it in the attached patch so that both of these
operations are performed for all suffixes.
Hmm, I'm thinking whether the 'check_last_link_managed' and
'check_deleted_segments' should not be called per-suffix, but should
themselves check all suffixes available. This could make the fix for
https://fedorahosted.org/freeipa/ticket/5409 also easier.
Depends if the output is reusable. If so then why not.
check_last_link_managed basically adds text to several
get_topology_connection_errors calls.
Attaching updated patch.
--
Martin^3 Babinsky
From bb0506c3f79d34bf70441eddb36e0c150229e2f0 Mon Sep 17 00:00:00 2001
From: Martin Babinsky <mbabi...@redhat.com>
Date: Fri, 30 Oct 2015 13:59:03 +0100
Subject: [PATCH] check for disconnected topology and deleted agreements for
all suffices
The code in ipa-replica-manage which checks for disconnected topology and
deleted agreements during node removal was generalized so that it now performs
these checks for all suffixes to which the node belongs.
https://fedorahosted.org/freeipa/ticket/5309
---
install/tools/ipa-replica-manage | 247 ++++++++++++++++++++++++++-------------
1 file changed, 166 insertions(+), 81 deletions(-)
diff --git a/install/tools/ipa-replica-manage b/install/tools/ipa-replica-manage
index b9998da44dcc1f01c5eb342ee713634de0ee84ee..8a44895c17e7fdc4c5160a77f55119fd3cc7f58d 100755
--- a/install/tools/ipa-replica-manage
+++ b/install/tools/ipa-replica-manage
@@ -570,46 +570,97 @@ def check_last_link(delrepl, realm, dirman_passwd, force):
else:
return None
-def check_last_link_managed(api, masters, hostname, force):
+
+def map_masters_to_suffices(masters, suffices):
+ masters_to_suffix = {}
+ suffix_name_to_root = {
+ s['iparepltopoconfroot'][0]: s['cn'][0] for s in suffices
+ }
+
+ for master in masters:
+ managed_suffices = master['iparepltopomanagedsuffix']
+ for suffix in managed_suffices:
+ suffix_name = suffix_name_to_root[suffix]
+ try:
+ masters_to_suffix[suffix_name].append(master)
+ except KeyError:
+ masters_to_suffix[suffix_name] = [master]
+
+ return masters_to_suffix
+
+
+def check_hostname_in_masters(hostname, masters):
+ master_cns = {m['cn'][0] for m in masters}
+ return hostname in master_cns
+
+
+def check_last_link_managed(api, hostname, masters, force):
"""
Check if 'hostname' is safe to delete.
:returns: tuple with lists of current and future errors in topology
(current_errors, new_errors)
"""
+ suffices = api.Command.topologysuffix_find(u'')['result']
+ suffix_to_masters = map_masters_to_suffices(masters, suffices)
+ topo_errors = ([], [])
+
+ for suffix in suffices:
+ suffix_name = suffix['cn'][0]
+ suffix_members = suffix_to_masters[suffix_name]
+ print("Checking connectivity in topology suffix '{}'".format(
+ suffix_name))
+ if not check_hostname_in_masters(hostname, suffix_members):
+ print(
+ "'{}' is not a part of topology suffix '{}'".format(
+ hostname, suffix_name
+ )
+ )
+ print("Not checking connectivity")
+ continue
+
+ segments = api.Command.topologysegment_find(
+ suffix_name, sizelimit=0).get('result')
+ graph = create_topology_graph(suffix_to_masters[suffix_name], segments)
+
+ # check topology before removal
+ orig_errors = get_topology_connection_errors(graph)
+ if orig_errors:
+ print("Current topology in suffix '{}' is disconnected:".format(
+ suffix_name))
+ print("Changes are not replicated to all servers and data are "
+ "probably inconsistent.")
+ print("You need to add segments to reconnect the topology.")
+ print_connect_errors(orig_errors)
+
+ # after removal
+ try:
+ graph.remove_vertex(hostname)
+ except ValueError:
+ pass # ignore already deleted master, continue to clean
+
+ new_errors = get_topology_connection_errors(graph)
+ if new_errors:
+ print("WARNING: Removal of '{}' will lead to disconnected topology"
+ " in suffix '{}'".format(hostname, suffix_name))
+ print("Changes will not be replicated to all servers and data will"
+ " become inconsistent.")
+ print("You need to add segments to prevent disconnection of the "
+ "topology.")
+ print("Errors in topology after removal:")
+ print_connect_errors(new_errors)
+
+ if orig_errors or new_errors:
+ if not force:
+ sys.exit("Aborted")
+ else:
+ print("Forcing removal of %s" % hostname)
+
+ topo_errors[0].extend(orig_errors)
+ topo_errors[1].extend(new_errors)
+
+ return topo_errors
- segments = api.Command.topologysegment_find(u'realm', sizelimit=0).get('result')
- graph = create_topology_graph(masters, segments)
-
- # check topology before removal
- orig_errors = get_topology_connection_errors(graph)
- if orig_errors:
- print("Current topology is disconnected:")
- print("Changes are not replicated to all servers and data are probably inconsistent.")
- print("You need to add segments to reconnect the topology.")
- print_connect_errors(orig_errors)
-
- # after removal
- try:
- graph.remove_vertex(hostname)
- except ValueError:
- pass # ignore already deleted master, continue to clean
-
- new_errors = get_topology_connection_errors(graph)
- if new_errors:
- print("WARNING: Topology after removal of %s will be disconnected." % hostname)
- print("Changes will not be replicated to all servers and data will become inconsistent.")
- print("You need to add segments to prevent disconnection of the topology.")
- print("Errors in topology after removal:")
- print_connect_errors(new_errors)
-
- if orig_errors or new_errors:
- if not force:
- sys.exit("Aborted")
- else:
- print("Forcing removal of %s" % hostname)
-
- return (orig_errors, new_errors)
def print_connect_errors(errors):
for error in errors:
@@ -725,8 +776,9 @@ def del_master_managed(realm, hostname, options):
# 2. Get all masters
masters = api.Command.server_find('', sizelimit=0)['result']
- # 3. Check topology
- topo_errors = check_last_link_managed(api, masters, hostname, options.force)
+ # 3. Check topology connectivity in all suffices
+ topo_errors = check_last_link_managed(
+ api, hostname, masters, options.force)
# 4. Check that we are not leaving the installation without CA and/or DNS
# And pick new CA master.
@@ -751,58 +803,91 @@ def del_master_managed(realm, hostname, options):
# 7. Clean RUV for the deleted master
# Wait for topology plugin to delete segments
- i = 0
- m_cns = [m['cn'][0] for m in masters] # masters before removal
- while True:
- left = api.Command.topologysegment_find(
- u'realm', iparepltoposegmentleftnode=hostname_u, sizelimit=0)['result']
- right = api.Command.topologysegment_find(
- u'realm', iparepltoposegmentrightnode=hostname_u, sizelimit=0)['result']
-
- # If the server was already deleted, we can expect that all removals
- # had been done in previous run and dangling segments were not deleted.
- if hostname not in m_cns:
- print("Skipping replication agreement deletion check")
- break
-
- # Relax check if topology was or is disconnected. Disconnected topology
- # can contain segments with already deleted servers. Check only if
- # segments of servers, which can contact this server, and the deleted
- # server were removed.
- # This code should handle a case where there was a topology with
- # a central node(B): A <-> B <-> C, where A is current server.
- # After removal of B, topology will be disconnected and removal of
- # segment B <-> C won't be replicated back to server A, therefore
- # presence of the segment has to be ignored.
- if topo_errors[0] or topo_errors[1]:
- # use errors after deletion because we don't care if some server
- # can't contact the deleted one
- cant_contact_me = [e[0] for e in topo_errors[1] if options.host in e[2]]
- can_contact_me = set(m_cns) - set(cant_contact_me)
- left = [s for s in left if s['iparepltoposegmentrightnode'][0] in can_contact_me]
- right = [s for s in right if s['iparepltoposegmentleftnode'][0] in can_contact_me]
-
- if not left and not right:
- print("Agreements deleted")
- break
- time.sleep(2)
- if i == 2: # taking too long, something is wrong, report
- print("Waiting for removal of replication agreements")
- if i > 90:
- print("Taking too long, skipping")
- print("Following segments were not deleted:")
- for s in left:
- print(" %s" % s['cn'][0])
- for s in right:
- print(" %s" % s['cn'][0])
- break
- i += 1
+ check_deleted_segments(hostname_u, masters, topo_errors, options.host)
# Clean RUV is handled by the topolgy plugin
# 8. And clean up the removed replica DNS entries if any.
cleanup_server_dns_entries(realm, hostname, thisrepl.suffix, options)
+
+def check_deleted_segments(hostname, masters, topo_errors, starting_host):
+
+ def wait_for_segment_removal(hostname, master_cns, suffix_name,
+ topo_errors):
+ i = 0
+ while True:
+ left = api.Command.topologysegment_find(
+ suffix_name, iparepltoposegmentleftnode=hostname, sizelimit=0
+ )['result']
+ right = api.Command.topologysegment_find(
+ suffix_name, iparepltoposegmentrightnode=hostname, sizelimit=0
+ )['result']
+
+ # Relax check if topology was or is disconnected. Disconnected
+ # topology can contain segments with already deleted servers.
+ # Check only if segments of servers, which can contact this server,
+ # and the deleted server were removed.
+ # This code should handle a case where there was a topology with
+ # a central node(B): A <-> B <-> C, where A is current server.
+ # After removal of B, topology will be disconnected and removal of
+ # segment B <-> C won't be replicated back to server A, therefore
+ # presence of the segment has to be ignored.
+ if topo_errors[0] or topo_errors[1]:
+ # use errors after deletion because we don't care if some
+ # server can't contact the deleted one
+ cant_contact_me = [e[0] for e in topo_errors[1]
+ if starting_host in e[2]]
+ can_contact_me = set(master_cns) - set(cant_contact_me)
+ left = [s for s in left if s['iparepltoposegmentrightnode'][0]
+ in can_contact_me]
+ right = [s for s in right if s['iparepltoposegmentleftnode'][0]
+ in can_contact_me]
+
+ if not left and not right:
+ print("Agreements deleted")
+ return
+ time.sleep(2)
+ if i == 2: # taking too long, something is wrong, report
+ print("Waiting for removal of replication agreements")
+ if i > 90:
+ print("Taking too long, skipping")
+ print("Following segments were not deleted:")
+ for s in left:
+ print(" %s" % s['cn'][0])
+ for s in right:
+ print(" %s" % s['cn'][0])
+ return
+ i += 1
+
+ if not check_hostname_in_masters(hostname, masters):
+ print("{} not in masters, skipping agreement deletion check".format(
+ hostname))
+ return
+
+ suffices = api.Command.topologysuffix_find('', sizelimit=0)['result']
+ suffix_to_masters = map_masters_to_suffices(masters, suffices)
+
+ for suffix in suffices:
+ suffix_name = suffix['cn'][0]
+ suffix_member_cns = [
+ m['cn'][0] for m in suffix_to_masters[suffix_name]
+ ]
+
+ if hostname not in suffix_member_cns:
+ # If the server was already deleted, we can expect that all
+ # removals had been done in previous run and dangling segments
+ # were not deleted.
+ print("Skipping replication agreement deletion check for "
+ "suffix '{}'".format(suffix_name))
+ continue
+
+ print("Checking for deleted segments in suffix '{}'".format(
+ suffix_name))
+ wait_for_segment_removal(hostname, suffix_member_cns, suffix_name,
+ topo_errors)
+
+
def del_master_direct(realm, hostname, options):
"""
Removing of master for realm without managed topology
--
2.4.3
--
Manage your subscription for the Freeipa-devel mailing list:
https://www.redhat.com/mailman/listinfo/freeipa-devel
Contribute to FreeIPA: http://www.freeipa.org/page/Contribute/Code