Hi Alex,

Ack with some comments.

- I think we should maintain(and package) a sample script that has the commands 
to add the EE to IMM.
- This is just a thought. Iam thinking if we should introduce(in future) a 
validation(say security validation) phase for such an EE or just
leave it to this script to handle any security validation related actions to be 
performed.
- The environment variable could be renamed to have the OPENSAF prefix i.e.
It could be called as OPENSAF_PLMS_CLUSTERAUTO_SCALE

Thanks,
Mathi.

> -----Original Message-----
> From: Alex Jones [mailto:ajo...@genband.com]
> Sent: Wednesday, October 26, 2016 9:27 PM
> To: Mathivanan Naickan Palanivelu
> Cc: opensaf-devel@lists.sourceforge.net
> Subject: [PATCH 1 of 1] plm: add support for dynamically scaling EEs [#2118]
> 
>  osaf/libs/common/plmsv/include/Makefile.am              |    1 +
>  osaf/libs/common/plmsv/include/plms.h                   |    9 +-
>  osaf/libs/common/plmsv/include/plms_scale.h             |   66 +++++
>  osaf/libs/common/plmsv/plmc/include/plmc_lib_internal.h |    6 +-
>  osaf/libs/common/plmsv/plmc/plmc_lib_internal.c         |    4 +
>  osaf/services/saf/plmsv/README                          |   23 +
>  osaf/services/saf/plmsv/config/plmd.conf                |    9 +
>  osaf/services/saf/plmsv/plms/Makefile.am                |    1 +
>  osaf/services/saf/plmsv/plms/plms_imm.c                 |    5 +-
>  osaf/services/saf/plmsv/plms/plms_plmc.c                |   25 +-
>  osaf/services/saf/plmsv/plms/plms_scale.cc              |  199
> ++++++++++++++++
>  11 files changed, 331 insertions(+), 17 deletions(-)
> 
> 
> This commit adds support for dynamically scaling out EEs.
> 
> diff --git a/osaf/libs/common/plmsv/include/Makefile.am
> b/osaf/libs/common/plmsv/include/Makefile.am
> --- a/osaf/libs/common/plmsv/include/Makefile.am
> +++ b/osaf/libs/common/plmsv/include/Makefile.am
> @@ -29,4 +29,5 @@ noinst_HEADERS = \
>       plms_hrb.h \
>       plms_hpi.h \
>       plms_mbcsv.h \
> +     plms_scale.h \
>       plms_virt.h
> diff --git a/osaf/libs/common/plmsv/include/plms.h
> b/osaf/libs/common/plmsv/include/plms.h
> --- a/osaf/libs/common/plmsv/include/plms.h
> +++ b/osaf/libs/common/plmsv/include/plms.h
> @@ -416,7 +416,7 @@ typedef struct plms_track_info
>  } PLMS_TRACK_INFO;
> 
>  /* Data structure to map invocation to trackinfo */
> -struct plms_invocation_to_track_info
> +extern struct plms_invocation_to_track_info
>  {
>       SaInvocationT               invocation;
>       PLMS_TRACK_INFO              *track_info;
> @@ -609,6 +609,9 @@ SaUint32T plms_ee_hypervisor_instantiate
>  SaUint32T plms_ee_restart_vm(const PLMS_ENTITY *);
>  SaUint32T plms_ee_isolate_vm(const PLMS_ENTITY *);
> 
> +/* Function declaration from plms_scale.cc */
> +SaUint32T plms_scale(const PLMS_PLMC_EVT *);
> +
>  /* Function declaration from plms_adm_fsm.c*/
>  SaUint32T plms_cbk_call(PLMS_TRACK_INFO *,SaUint8T);
>  SaUint32T plms_imm_adm_op_req_process(PLMS_EVT *);
> @@ -676,8 +679,8 @@ void plms_he_pres_fsm_init(PLMS_PRES_FUN
>  SaUint32T plms_tmr_handler_install();
>  SaUint32T plms_hsm_hrb_init();
> 
> -SaUint64T  plm_handle_pool;
> -SaUint64T  entity_grp_hdl_pool;
> +extern SaUint64T  plm_handle_pool;
> +extern SaUint64T  entity_grp_hdl_pool;
>  void plm_imm_reinit_bg(PLMS_CB *cb);
>  SaUint32T plms_build_epath_to_entity_map_tree();
> 
> diff --git a/osaf/libs/common/plmsv/include/plms_scale.h
> b/osaf/libs/common/plmsv/include/plms_scale.h
> new file mode 100644
> --- /dev/null
> +++ b/osaf/libs/common/plmsv/include/plms_scale.h
> @@ -0,0 +1,66 @@
> +/*      -*- OpenSAF  -*-
> + *
> + * (C) Copyright 2016 The OpenSAF Foundation
> + *
> + * This program is distributed in the hope that it will be useful, but
> + * WITHOUT ANY WARRANTY; without even the implied warranty of
> MERCHANTABILITY
> + * or FITNESS FOR A PARTICULAR PURPOSE. This file and program are
> licensed
> + * under the GNU Lesser General Public License Version 2.1, February 1999.
> + * The complete license can be accessed from the following location:
> + * http://opensource.org/licenses/lgpl-license.php
> + * See the Copying file included with the OpenSAF distribution for full
> + * licensing terms.
> + *
> + * Author(s): Genband
> + *
> + */
> +
> +/*********************************************************
> ********************
> + *   FILE NAME: plms_scale.h
> + *
> + *   DESCRIPTION: C++ implementation of EE scale
> + *
> +
> **********************************************************
> ******************/
> +#ifndef
> OPENSAF_OSAF_LIBS_COMMON_PLMSV_INCLUDE_PLMS_SCALE_H_
> +#define
> OPENSAF_OSAF_LIBS_COMMON_PLMSV_INCLUDE_PLMS_SCALE_H_
> +
> +#include <condition_variable>
> +#include <mutex>
> +#include <set>
> +#include <string>
> +#include <thread>
> +
> +class PlmsScaleThread : public std::thread {
> + public:
> +  struct Node {
> +    std::string eeName;
> +  };
> +
> +  explicit PlmsScaleThread(const std::string& script);
> +  ~PlmsScaleThread(void);
> +
> +  void add(const Node);
> +
> + private:
> +  struct ltNode {
> +    bool operator()(const Node& n1, const Node& n2) const {
> +      return n1.eeName < n2.eeName;
> +    }
> +  };
> +
> +  typedef std::set<Node, ltNode> NodeList;
> +
> +  std::string scaleOutFile;
> +
> +  NodeList nodeList;
> +
> +  std::condition_variable nodeListCv;
> +  std::mutex              nodeListMutex;
> +
> +  static void _main(PlmsScaleThread *self) { self->main(); }
> +
> +  void main(void);
> +  void executeScaleScript(int argc, char **argv);
> +};
> +
> +#endif  // OSAF_LIBS_COMMON_PLMSV_INCLUDE_PLMS_SCALE_H_
> diff --git a/osaf/libs/common/plmsv/plmc/include/plmc_lib_internal.h
> b/osaf/libs/common/plmsv/plmc/include/plmc_lib_internal.h
> --- a/osaf/libs/common/plmsv/plmc/include/plmc_lib_internal.h
> +++ b/osaf/libs/common/plmsv/plmc/include/plmc_lib_internal.h
> @@ -31,9 +31,9 @@ typedef struct cb_functions_struct {
>       int( *err_cb)( plmc_lib_error*);
>  } cb_functions;
> 
> -char *plmc_config_file;
> -PLMC_config_data config;
> -pthread_t tcp_listener_id, udp_listener_id, plmc_connection_mgr_id;
> +extern char *plmc_config_file;
> +extern PLMC_config_data config;
> +extern pthread_t tcp_listener_id, udp_listener_id,
> plmc_connection_mgr_id;
> 
> 
> /**********************************************************
> **********
>  * This struct is used for the data entry that a client_mgr thread
> diff --git a/osaf/libs/common/plmsv/plmc/plmc_lib_internal.c
> b/osaf/libs/common/plmsv/plmc/plmc_lib_internal.c
> --- a/osaf/libs/common/plmsv/plmc/plmc_lib_internal.c
> +++ b/osaf/libs/common/plmsv/plmc/plmc_lib_internal.c
> @@ -38,6 +38,10 @@ static pthread_mutex_t td_list_lock;
> 
>  FILE *plmc_lib_debug;
> 
> +char *plmc_config_file = 0;
> +PLMC_config_data config;
> +pthread_t tcp_listener_id = 0, udp_listener_id = 0,
> plmc_connection_mgr_id = 0;
> +
>  cb_functions callbacks;
> 
> 
> diff --git a/osaf/services/saf/plmsv/README
> b/osaf/services/saf/plmsv/README
> --- a/osaf/services/saf/plmsv/README
> +++ b/osaf/services/saf/plmsv/README
> @@ -367,6 +367,29 @@ 2) Map the virtual machine EE to the lib
>  3) libvirtd must be started in some fashion.
> 
> 
> +SCALE OUT
> +---------
> +
> +The scale-out feature makes it possible to run a customizable script when
> an EE
> +which is not configured in IMM tries to join the cluster. The intention is 
> that
> +the script will check if the new EE is eligible to be added to the cluster,
> +and, if so, add the necessary IMM objects so that the EE will be able to join
> +the next time it tries. The script will be called with one or more command-
> line
> +arguments, where each argument is a comma-separated list of properties
> of an EE
> +that wishes to join the cluster. Currently, the comma-separated list in each
> +command-line argument contains only one entry, but the script should be
> +forwards compatible with future extensions where more entries may be
> added to
> +the comma-separated list. The first entry in the list is the EE rdn.
> +
> +NOTE: the script must be idempotent, i.e. it must be harmless to call it more
> +than one time with the same parameters. The second call should do nothing
> since
> +the nodes were added to the cluster the first time the script was called.
> +
> +To enable the scale-out feature in PLM, set the variable
> PLMS_CLUSTERAUTO_SCALE
> +in plmd.conf to the full path of the script you want called. This script 
> should
> +add the IMM objects needed necessary for the node to join the cluster.
> +
> +
>  IMPLEMENTATION NOTES
>  ---------------------
> 
> diff --git a/osaf/services/saf/plmsv/config/plmd.conf
> b/osaf/services/saf/plmsv/config/plmd.conf
> --- a/osaf/services/saf/plmsv/config/plmd.conf
> +++ b/osaf/services/saf/plmsv/config/plmd.conf
> @@ -12,3 +12,12 @@ export PLMS_ENV_HEALTHCHECK_KEY="Default
> 
>  # Uncomment the next line to enable info level logging
>  #args="--loglevel=info"
> +
> +# Setting PLMS_CLUSTERAUTO_SCALE to a script or executable will enable
> support
> +# for automatic cluster scaling in PLM. Currently, only scale-out is
> supported.
> +# When automatic cluster scaling is enabled, the PLM service will call the
> +# script defined here with one or more command-line arguments, where
> each
> +# argument is a comma-separated list of properties of a node that wishes to
> join
> +# the cluster. This script/executable should add the necessary IMM objects
> for
> +# the new node(s). See the README file of the PLM service for more
> information.
> +# export PLMS_CLUSTERAUTO_SCALE=/usr/bin/plm_scale_out
> diff --git a/osaf/services/saf/plmsv/plms/Makefile.am
> b/osaf/services/saf/plmsv/plms/Makefile.am
> --- a/osaf/services/saf/plmsv/plms/Makefile.am
> +++ b/osaf/services/saf/plmsv/plms/Makefile.am
> @@ -48,6 +48,7 @@ osafplmd_SOURCES = \
>       plms_proc.c \
>       plms_stdby.c \
>       plms_dbg_utils.c \
> +     plms_scale.cc \
>       plms_virt.cc
> 
>  osafplmd_LDADD = \
> diff --git a/osaf/services/saf/plmsv/plms/plms_imm.c
> b/osaf/services/saf/plmsv/plms/plms_imm.c
> --- a/osaf/services/saf/plmsv/plms/plms_imm.c
> +++ b/osaf/services/saf/plmsv/plms/plms_imm.c
> @@ -764,9 +764,8 @@ static SaAisErrorT plms_imm_ccb_obj_crea
>       for (j=0; attr[j] != NULL; j++) {
>               if ( ((attr[j])->attrValues == NULL ) || 
> (*((attr[j])->attrValues)
> == NULL) ) {
>                       /* Log the error */
> -                     TRACE_LEAVE2("attr_val is null for attr_name %s",
> -                             (*attr)->attrName);
> -                     return SA_AIS_ERR_BAD_OPERATION;
> +                     TRACE("attr_val is null for attr_name %s", attr[j]-
> >attrName);
> +                     continue;
>               }
>               if (strcmp((attr[j])->attrName, "saPlmHEBaseHEType")== 0) {
>                       rc = validate_he_base_type_attr_val(ccb_id, attr[j]);
> diff --git a/osaf/services/saf/plmsv/plms/plms_plmc.c
> b/osaf/services/saf/plmsv/plms/plms_plmc.c
> --- a/osaf/services/saf/plmsv/plms/plms_plmc.c
> +++ b/osaf/services/saf/plmsv/plms/plms_plmc.c
> @@ -1485,21 +1485,30 @@ SaUint32T plms_plmc_mbx_evt_process(PLMS
>  {
>       SaUint32T ret_err = NCSCC_RC_FAILURE;
>       PLMS_ENTITY *ent = NULL;
> -     SaUint8T tmp[SA_MAX_NAME_LENGTH+1];
>       PLMS_CB *cb = plms_cb;
> 
>       if (evt->req_evt.plms_plmc_evt.ee_id.length){
>               ent = (PLMS_ENTITY *)ncs_patricia_tree_get(&(cb-
> >entity_info),
>                       (SaUint8T *)&(evt->req_evt.plms_plmc_evt.ee_id));
>               if (NULL == ent){
> -                     memcpy(tmp,evt-
> >req_evt.plms_plmc_evt.ee_id.value,
> -                              evt->req_evt.plms_plmc_evt.ee_id.length);
> -                     tmp[evt->req_evt.plms_plmc_evt.ee_id.length] =
> '\0';
> +                     /* don't know about this EE; notify for possible scale
> event */
> +                     ret_err = plms_scale(&evt-
> >req_evt.plms_plmc_evt);
> 
> -                     LOG_ER (" Entity not found for PLMC event. ee_id:
> %s \
> -                     ,evt_type: %d",
> -                     tmp,evt->req_evt.plms_plmc_evt.plmc_evt_type);
> -                     return ret_err;
> +      if (ret_err == NCSCC_RC_SUCCESS) {
> +        if (evt->req_evt.plms_plmc_evt.plmc_evt_type ==
> PLMS_PLMC_EE_TCP_CONCTED)
> +        {
> +          TRACE("sending plmcd restart to %s for scaling",
> +                evt->req_evt.plms_plmc_evt.ee_id.value);
> +
> +          ret_err = plmc_plmcd_restart((char *)evt-
> >req_evt.plms_plmc_evt.ee_id.value,
> +                                       plms_plmc_tcp_cbk);
> +        }
> +      } else {
> +        LOG_IN("scaling not enabled: ignoring unknown EE: %s",
> +               evt->req_evt.plms_plmc_evt.ee_id.value);
> +      }
> +
> +      return ret_err;
>               }
>       }else {
>               LOG_ER("evt->req_evt.plms_plmc_evt.ee_id.length is
> ZERO");
> diff --git a/osaf/services/saf/plmsv/plms/plms_scale.cc
> b/osaf/services/saf/plmsv/plms/plms_scale.cc
> new file mode 100644
> --- /dev/null
> +++ b/osaf/services/saf/plmsv/plms/plms_scale.cc
> @@ -0,0 +1,199 @@
> +/*      -*- OpenSAF  -*-
> + *
> + * (C) Copyright 2016 The OpenSAF Foundation
> + *
> + * This program is distributed in the hope that it will be useful, but
> + * WITHOUT ANY WARRANTY; without even the implied warranty of
> MERCHANTABILITY
> + * or FITNESS FOR A PARTICULAR PURPOSE. This file and program are
> licensed
> + * under the GNU Lesser General Public License Version 2.1, February 1999.
> + * The complete license can be accessed from the following location:
> + * http://opensource.org/licenses/lgpl-license.php
> + * See the Copying file included with the OpenSAF distribution for full
> + * licensing terms.
> + *
> + * Author(s): Genband
> + *
> + */
> +
> +/*********************************************************
> ********************
> + *   FILE NAME: plms_scale.cc
> + *
> + *   DESCRIPTION: C++ implementation of EE scaling
> + *
> +
> **********************************************************
> ******************/
> +
> +#include "osaf/libs/common/plmsv/include/plms_scale.h"
> +#include "osaf/libs/common/plmsv/include/plms.h"
> +
> +extern "C" SaUint32T
> +plms_scale(const PLMS_PLMC_EVT *evt) {
> +  TRACE_ENTER();
> +
> +  SaUint32T rc(NCSCC_RC_SUCCESS);
> +
> +  static bool init(false);
> +  static bool scalingEnabled(false);
> +  static PlmsScaleThread *scaleThread(0);
> +
> +  if (!init) {
> +    char *scale_out_exe(getenv("PLMS_CLUSTERAUTO_SCALE"));
> +
> +    if (scale_out_exe) {
> +      scalingEnabled = true;
> +      scaleThread = new PlmsScaleThread(scale_out_exe);
> +    }
> +
> +    init = true;
> +  }
> +
> +  if (scalingEnabled) {
> +    if (evt->plmc_evt_type == PLMS_PLMC_EE_INSTING ||
> +        evt->plmc_evt_type == PLMS_PLMC_EE_INSTED ||
> +        evt->plmc_evt_type == PLMS_PLMC_EE_TCP_CONCTED) {
> +      // need to put the request on a thread so PLMs can service any IMM
> +      // requests
> +      PlmsScaleThread::Node node;
> +      node.eeName = std::string(
> +        reinterpret_cast<const char *>(evt->ee_id.value), evt->ee_id.length);
> +
> +      TRACE("adding node %s to thread", node.eeName.c_str());
> +
> +      std::thread addNodeThread(&PlmsScaleThread::add, scaleThread,
> node);
> +
> +      addNodeThread.detach();
> +    }
> +  } else {
> +    TRACE("scaling not enabled");
> +    rc = NCSCC_RC_FAILURE;
> +  }
> +
> +  TRACE_LEAVE();
> +
> +  return rc;
> +}
> +
> +PlmsScaleThread::PlmsScaleThread(const std::string& outFile)
> +  : std::thread(_main, this),
> +    scaleOutFile(outFile) {
> +  TRACE_ENTER();
> +  TRACE_LEAVE();
> +}
> +
> +PlmsScaleThread::~PlmsScaleThread(void) {
> +  TRACE_ENTER();
> +  TRACE_LEAVE();
> +}
> +
> +void
> +PlmsScaleThread::main(void) {
> +  TRACE_ENTER2("Scale thread starting");
> +
> +  while (true) {
> +    std::unique_lock<std::mutex> lk(nodeListMutex);
> +
> +    while (nodeList.empty())
> +      nodeListCv.wait(lk);
> +
> +    int argc(1);
> +    char **argv(new char *[nodeList.size() + argc + 1]);
> +    argv[0] = new char[scaleOutFile.length() + 1];
> +    snprintf(argv[0], scaleOutFile.length() + 1, "%s", scaleOutFile.c_str());
> +
> +    // call the scale script with the EEs that are trying to come in
> +    for (NodeList::iterator it(nodeList.begin());
> +         it != nodeList.end();
> +         ++it) {
> +      argv[argc] = new char[it->eeName.length() + 1];
> +      snprintf(argv[argc++], it->eeName.length() + 1, "%s", it-
> >eeName.c_str());
> +    }
> +
> +    // null terminate argv
> +    argv[nodeList.size() + 1] = 0;
> +
> +    executeScaleScript(argc, argv);
> +
> +    for (int i(argc); i >= 0; i--)
> +      delete[] argv[i];
> +
> +    delete[] argv;
> +
> +    nodeList.clear();
> +  }
> +
> +  TRACE_LEAVE2("Scale thread exiting");
> +}
> +
> +void
> +PlmsScaleThread::executeScaleScript(int argc, char **argv) {
> +  TRACE_ENTER();
> +
> +  LOG_IN("executing scale script for %i nodes", argc - 1);
> +
> +  pid_t pid(fork());
> +
> +  if (pid > 0) {
> +    // parent
> +    int status;
> +    pid_t wait_pid;
> +
> +    do {
> +      wait_pid = waitpid(pid, &status, 0);
> +    } while (wait_pid == -1 && errno == EINTR);
> +
> +    if (wait_pid != -1) {
> +      if (!WIFEXITED(status)) {
> +        LOG_ER("Scale out script %s terminated abnormally", argv[0]);
> +      } else if (WEXITSTATUS(status) != 0) {
> +        if (WEXITSTATUS(status) == 123) {
> +          LOG_ER("Scale out script %s could not be executed", argv[0]);
> +        } else {
> +          LOG_ER("Scale out script %s failed with exit code %d",
> +                 argv[0],
> +                 WEXITSTATUS(status));
> +        }
> +      } else {
> +        LOG_IN("Scale out script %s exited successfully", argv[0]);
> +      }
> +    } else {
> +      LOG_ER("Scale out script %s failed in waitpid(%i): %s",
> +             argv[0],
> +             wait_pid,
> +             strerror(errno));
> +    }
> +  } else if (pid == 0) {
> +    // child
> +    static char scaleOutPathEnv[] = "PATH=/usr/local/sbin:/usr/local/bin:"
> +      "/usr/sbin:/usr/bin:/sbin:/bin";
> +
> +    char *env[] = { scaleOutPathEnv, 0 };
> +
> +    const int nofile(1024);
> +
> +    for (int fd(3); fd < nofile; ++fd) close(fd);
> +
> +    if (execve(argv[0], argv, env) < 0)
> +      LOG_ER("error executing plms_scale_out script: %s: %i",
> +             scaleOutFile.c_str(),
> +             errno);
> +    _Exit(123);
> +  } else {
> +    LOG_ER("unable to fork plms_scale_out script: %s: %i",
> +           scaleOutFile.c_str(),
> +           errno);
> +  }
> +
> +  TRACE_LEAVE();
> +}
> +
> +void
> +PlmsScaleThread::add(const Node node) {
> +  TRACE_ENTER();
> +
> +  std::lock_guard<std::mutex> guard(nodeListMutex);
> +
> +  nodeList.insert(node);
> +
> +  nodeListCv.notify_one();
> +
> +  TRACE_LEAVE();
> +}
> 

------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most 
engaging tech sites, SlashDot.org! http://sdm.link/slashdot
_______________________________________________
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to