This patch needs so more testing. In one of my deployments I see no master
is promoted and all the ovn db resources running in 3 nodes are in slave
mode. In my previous testing it worked fine though.

I will fix this and submit a v3.

Thanks
Numan


On Thu, Nov 30, 2017 at 4:12 PM, <nusid...@redhat.com> wrote:

> From: Numan Siddique <nusid...@redhat.com>
>
> Pacemaker Resource agent periodically calls the OVN OCF's "monitor" action
> periodically to check the status. But the OVN OCF script doesn't add the
> action "monitor" for the role "Master" because of which the pacemaker
> resource agent do not call the "monitor" action at all for the master.
> In case OVN db servers exit for some reason this totally gets undetected
> and one of the standby node is not promoted to master.
>
> This patch adds the monitor action for "Master" role. Also the monitor
> action do not check for the status of the ovn-northd (if manage_northd is
> yes).
> This patch also checks for the status of the ovn-northd in the monitor
> action
> for the "Master" role. If any of the ovsdb-server or ovn-northd is not
> running,
> monitor action will return OCF_NOT_RUNNING and this will cause the
> pacemaker
> to restart the OVN OCF resource.
>
> Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=1512568
> Signed-off-by: Numan Siddique <nusid...@redhat.com>
> CC: Russel Bryant <rus...@ovn.org>
> ---
>
> v1 -> v2
> -----
> Reverted the change to use 'ocf_attribute_target' as this function is
> only availabe in pacemaker 1.1.16-12
>
>  ovn/utilities/ovndb-servers.ocf | 49 ++++++++++++++++++++++++++++++
> +++--------
>  1 file changed, 40 insertions(+), 9 deletions(-)
>
> diff --git a/ovn/utilities/ovndb-servers.ocf
> b/ovn/utilities/ovndb-servers.ocf
> index 3f3008700..4b87c9e20 100755
> --- a/ovn/utilities/ovndb-servers.ocf
> +++ b/ovn/utilities/ovndb-servers.ocf
> @@ -120,7 +120,11 @@ ovsdb_server_metadata() {
>      <action name="stop"         timeout="20s" />
>      <action name="promote"      timeout="50s" />
>      <action name="demote"       timeout="50s" />
> -    <action name="monitor"      timeout="20s"  depth="0" interval="10s" />
> +    <action name="monitor"      timeout="20s"  depth="0" interval="30s" />
> +    <action name="monitor"      timeout="20s"  depth="0" interval="10s"
> +     role="Master" />
> +    <action name="monitor"      timeout="20s"  depth="0" interval="30s"
> +     role="Slave"/>
>      <action name="meta-data"    timeout="5s" />
>      <action name="validate-all" timeout="20s" />
>    </actions>
> @@ -247,7 +251,7 @@ ovsdb_server_master_update() {
>  }
>
>  ovsdb_server_monitor() {
> -    ovsdb_server_check_status
> +    ovsdb_server_check_status $@
>      rc=$?
>
>      ovsdb_server_master_update $rc
> @@ -262,8 +266,21 @@ ovsdb_server_check_status() {
>          return $OCF_SUCCESS
>      fi
>
> +    check_northd="no"
> +    if [ "$MANAGE_NORTHD" == "yes" ] && [ "$1" != "ignore_northd" ]; then
> +        check_northd="yes"
> +    fi
> +
>      if [[ $sb_status == "running/active" && $nb_status ==
> "running/active" ]]; then
> -        return $OCF_RUNNING_MASTER
> +        if [ "$check_northd" == "yes" ]; then
> +            # Verify if ovn-northd is running or not.
> +            ${OVN_CTL} status_northd | grep "ovn-northd is running"
> +            if [ "$?" == "0" ] ; then
> +                return $OCF_RUNNING_MASTER
> +            fi
> +        else
> +            return $OCF_RUNNING_MASTER
> +        fi
>      fi
>
>      # TODO: What about service running but not in either state above?
> @@ -317,8 +334,13 @@ ovsdb_server_start() {
>      $@ start_ovsdb
>
>      while [ 1 = 1 ]; do
> -        # It is important that we don't return until we're in a
> functional state
> -        ovsdb_server_monitor
> +        # It is important that we don't return until we're in a functional
> +        # state. When checking the status of the ovsdb-server's ignore
> northd.
> +        # It is possible that when the resource is restarted
> ovsdb-server's
> +        # can be started as masters and ovn-northd would not have been
> started.
> +        # ovn-northd will be started once a node is promoted to master and
> +        # 'manage_northd' is set to yes.
> +        ovsdb_server_monitor ignore_northd
>          rc=$?
>          case $rc in
>              $OCF_SUCCESS)        return $rc;;
> @@ -350,7 +372,7 @@ ovsdb_server_stop() {
>          ${OVN_CTL} --ovn-manage-ovsdb=no stop_northd
>      fi
>
> -    ovsdb_server_check_status
> +    ovsdb_server_check_status ignore_northd
>      case $? in
>          $OCF_NOT_RUNNING)    return ${OCF_SUCCESS};;
>      esac
> @@ -360,7 +382,7 @@ ovsdb_server_stop() {
>
>      while [ 1 = 1 ]; do
>          # It is important that we don't return until we're stopped
> -        ovsdb_server_check_status
> +        ovsdb_server_check_status ignore_northd
>          rc=$?
>          case $rc in
>          $OCF_SUCCESS)
> @@ -381,7 +403,7 @@ ovsdb_server_stop() {
>  }
>
>  ovsdb_server_promote() {
> -    ovsdb_server_check_status
> +    ovsdb_server_check_status ignore_northd
>      rc=$?
>      case $rc in
>          ${OCF_SUCCESS}) ;;
> @@ -395,6 +417,11 @@ ovsdb_server_promote() {
>      ${OVN_CTL} promote_ovnnb
>      ${OVN_CTL} promote_ovnsb
>
> +    if [ "$MANAGE_NORTHD" = "yes" ]; then
> +        # Startup ovn-northd service
> +        ${OVN_CTL} --ovn-manage-ovsdb=no start_northd
> +    fi
> +
>      ocf_log debug "ovndb_servers: Promoting $host_name as the master"
>      # Record ourselves so that the agent has a better chance of doing
>      # the right thing at startup
> @@ -404,7 +431,7 @@ ovsdb_server_promote() {
>  }
>
>  ovsdb_server_demote() {
> -    ovsdb_server_check_status
> +    ovsdb_server_check_status ignore_northd
>      if [ $? = $OCF_NOT_RUNNING ]; then
>          return $OCF_NOT_RUNNING
>      fi
> @@ -452,6 +479,10 @@ ovsdb_server_demote() {
>          ${OVN_CTL} demote_ovnsb --db-sb-sync-from-addr=${
> INVALID_IP_ADDRESS}
>      fi
>
> +    if [ "$MANAGE_NORTHD" = "yes" ]; then
> +        # Stop ovn-northd service
> +        ${OVN_CTL} --ovn-manage-ovsdb=no stop_northd
> +    fi
>      ovsdb_server_master_update $OCF_SUCCESS
>      return $OCF_SUCCESS
>  }
> --
> 2.14.3
>
>
_______________________________________________
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to