Hi Alex, Ack with some comments.
- I think we should maintain(and package) a sample script that has the commands to add the EE to IMM. - This is just a thought. Iam thinking if we should introduce(in future) a validation(say security validation) phase for such an EE or just leave it to this script to handle any security validation related actions to be performed. - The environment variable could be renamed to have the OPENSAF prefix i.e. It could be called as OPENSAF_PLMS_CLUSTERAUTO_SCALE Thanks, Mathi. > -----Original Message----- > From: Alex Jones [mailto:ajo...@genband.com] > Sent: Wednesday, October 26, 2016 9:27 PM > To: Mathivanan Naickan Palanivelu > Cc: opensaf-devel@lists.sourceforge.net > Subject: [PATCH 1 of 1] plm: add support for dynamically scaling EEs [#2118] > > osaf/libs/common/plmsv/include/Makefile.am | 1 + > osaf/libs/common/plmsv/include/plms.h | 9 +- > osaf/libs/common/plmsv/include/plms_scale.h | 66 +++++ > osaf/libs/common/plmsv/plmc/include/plmc_lib_internal.h | 6 +- > osaf/libs/common/plmsv/plmc/plmc_lib_internal.c | 4 + > osaf/services/saf/plmsv/README | 23 + > osaf/services/saf/plmsv/config/plmd.conf | 9 + > osaf/services/saf/plmsv/plms/Makefile.am | 1 + > osaf/services/saf/plmsv/plms/plms_imm.c | 5 +- > osaf/services/saf/plmsv/plms/plms_plmc.c | 25 +- > osaf/services/saf/plmsv/plms/plms_scale.cc | 199 > ++++++++++++++++ > 11 files changed, 331 insertions(+), 17 deletions(-) > > > This commit adds support for dynamically scaling out EEs. > > diff --git a/osaf/libs/common/plmsv/include/Makefile.am > b/osaf/libs/common/plmsv/include/Makefile.am > --- a/osaf/libs/common/plmsv/include/Makefile.am > +++ b/osaf/libs/common/plmsv/include/Makefile.am > @@ -29,4 +29,5 @@ noinst_HEADERS = \ > plms_hrb.h \ > plms_hpi.h \ > plms_mbcsv.h \ > + plms_scale.h \ > plms_virt.h > diff --git a/osaf/libs/common/plmsv/include/plms.h > b/osaf/libs/common/plmsv/include/plms.h > --- a/osaf/libs/common/plmsv/include/plms.h > +++ b/osaf/libs/common/plmsv/include/plms.h > @@ -416,7 +416,7 @@ typedef struct plms_track_info > } PLMS_TRACK_INFO; > > /* Data structure to map invocation to trackinfo */ > -struct plms_invocation_to_track_info > +extern struct plms_invocation_to_track_info > { > SaInvocationT invocation; > PLMS_TRACK_INFO *track_info; > @@ -609,6 +609,9 @@ SaUint32T plms_ee_hypervisor_instantiate > SaUint32T plms_ee_restart_vm(const PLMS_ENTITY *); > SaUint32T plms_ee_isolate_vm(const PLMS_ENTITY *); > > +/* Function declaration from plms_scale.cc */ > +SaUint32T plms_scale(const PLMS_PLMC_EVT *); > + > /* Function declaration from plms_adm_fsm.c*/ > SaUint32T plms_cbk_call(PLMS_TRACK_INFO *,SaUint8T); > SaUint32T plms_imm_adm_op_req_process(PLMS_EVT *); > @@ -676,8 +679,8 @@ void plms_he_pres_fsm_init(PLMS_PRES_FUN > SaUint32T plms_tmr_handler_install(); > SaUint32T plms_hsm_hrb_init(); > > -SaUint64T plm_handle_pool; > -SaUint64T entity_grp_hdl_pool; > +extern SaUint64T plm_handle_pool; > +extern SaUint64T entity_grp_hdl_pool; > void plm_imm_reinit_bg(PLMS_CB *cb); > SaUint32T plms_build_epath_to_entity_map_tree(); > > diff --git a/osaf/libs/common/plmsv/include/plms_scale.h > b/osaf/libs/common/plmsv/include/plms_scale.h > new file mode 100644 > --- /dev/null > +++ b/osaf/libs/common/plmsv/include/plms_scale.h > @@ -0,0 +1,66 @@ > +/* -*- OpenSAF -*- > + * > + * (C) Copyright 2016 The OpenSAF Foundation > + * > + * This program is distributed in the hope that it will be useful, but > + * WITHOUT ANY WARRANTY; without even the implied warranty of > MERCHANTABILITY > + * or FITNESS FOR A PARTICULAR PURPOSE. This file and program are > licensed > + * under the GNU Lesser General Public License Version 2.1, February 1999. > + * The complete license can be accessed from the following location: > + * http://opensource.org/licenses/lgpl-license.php > + * See the Copying file included with the OpenSAF distribution for full > + * licensing terms. > + * > + * Author(s): Genband > + * > + */ > + > +/********************************************************* > ******************** > + * FILE NAME: plms_scale.h > + * > + * DESCRIPTION: C++ implementation of EE scale > + * > + > ********************************************************** > ******************/ > +#ifndef > OPENSAF_OSAF_LIBS_COMMON_PLMSV_INCLUDE_PLMS_SCALE_H_ > +#define > OPENSAF_OSAF_LIBS_COMMON_PLMSV_INCLUDE_PLMS_SCALE_H_ > + > +#include <condition_variable> > +#include <mutex> > +#include <set> > +#include <string> > +#include <thread> > + > +class PlmsScaleThread : public std::thread { > + public: > + struct Node { > + std::string eeName; > + }; > + > + explicit PlmsScaleThread(const std::string& script); > + ~PlmsScaleThread(void); > + > + void add(const Node); > + > + private: > + struct ltNode { > + bool operator()(const Node& n1, const Node& n2) const { > + return n1.eeName < n2.eeName; > + } > + }; > + > + typedef std::set<Node, ltNode> NodeList; > + > + std::string scaleOutFile; > + > + NodeList nodeList; > + > + std::condition_variable nodeListCv; > + std::mutex nodeListMutex; > + > + static void _main(PlmsScaleThread *self) { self->main(); } > + > + void main(void); > + void executeScaleScript(int argc, char **argv); > +}; > + > +#endif // OSAF_LIBS_COMMON_PLMSV_INCLUDE_PLMS_SCALE_H_ > diff --git a/osaf/libs/common/plmsv/plmc/include/plmc_lib_internal.h > b/osaf/libs/common/plmsv/plmc/include/plmc_lib_internal.h > --- a/osaf/libs/common/plmsv/plmc/include/plmc_lib_internal.h > +++ b/osaf/libs/common/plmsv/plmc/include/plmc_lib_internal.h > @@ -31,9 +31,9 @@ typedef struct cb_functions_struct { > int( *err_cb)( plmc_lib_error*); > } cb_functions; > > -char *plmc_config_file; > -PLMC_config_data config; > -pthread_t tcp_listener_id, udp_listener_id, plmc_connection_mgr_id; > +extern char *plmc_config_file; > +extern PLMC_config_data config; > +extern pthread_t tcp_listener_id, udp_listener_id, > plmc_connection_mgr_id; > > > /********************************************************** > ********** > * This struct is used for the data entry that a client_mgr thread > diff --git a/osaf/libs/common/plmsv/plmc/plmc_lib_internal.c > b/osaf/libs/common/plmsv/plmc/plmc_lib_internal.c > --- a/osaf/libs/common/plmsv/plmc/plmc_lib_internal.c > +++ b/osaf/libs/common/plmsv/plmc/plmc_lib_internal.c > @@ -38,6 +38,10 @@ static pthread_mutex_t td_list_lock; > > FILE *plmc_lib_debug; > > +char *plmc_config_file = 0; > +PLMC_config_data config; > +pthread_t tcp_listener_id = 0, udp_listener_id = 0, > plmc_connection_mgr_id = 0; > + > cb_functions callbacks; > > > diff --git a/osaf/services/saf/plmsv/README > b/osaf/services/saf/plmsv/README > --- a/osaf/services/saf/plmsv/README > +++ b/osaf/services/saf/plmsv/README > @@ -367,6 +367,29 @@ 2) Map the virtual machine EE to the lib > 3) libvirtd must be started in some fashion. > > > +SCALE OUT > +--------- > + > +The scale-out feature makes it possible to run a customizable script when > an EE > +which is not configured in IMM tries to join the cluster. The intention is > that > +the script will check if the new EE is eligible to be added to the cluster, > +and, if so, add the necessary IMM objects so that the EE will be able to join > +the next time it tries. The script will be called with one or more command- > line > +arguments, where each argument is a comma-separated list of properties > of an EE > +that wishes to join the cluster. Currently, the comma-separated list in each > +command-line argument contains only one entry, but the script should be > +forwards compatible with future extensions where more entries may be > added to > +the comma-separated list. The first entry in the list is the EE rdn. > + > +NOTE: the script must be idempotent, i.e. it must be harmless to call it more > +than one time with the same parameters. The second call should do nothing > since > +the nodes were added to the cluster the first time the script was called. > + > +To enable the scale-out feature in PLM, set the variable > PLMS_CLUSTERAUTO_SCALE > +in plmd.conf to the full path of the script you want called. This script > should > +add the IMM objects needed necessary for the node to join the cluster. > + > + > IMPLEMENTATION NOTES > --------------------- > > diff --git a/osaf/services/saf/plmsv/config/plmd.conf > b/osaf/services/saf/plmsv/config/plmd.conf > --- a/osaf/services/saf/plmsv/config/plmd.conf > +++ b/osaf/services/saf/plmsv/config/plmd.conf > @@ -12,3 +12,12 @@ export PLMS_ENV_HEALTHCHECK_KEY="Default > > # Uncomment the next line to enable info level logging > #args="--loglevel=info" > + > +# Setting PLMS_CLUSTERAUTO_SCALE to a script or executable will enable > support > +# for automatic cluster scaling in PLM. Currently, only scale-out is > supported. > +# When automatic cluster scaling is enabled, the PLM service will call the > +# script defined here with one or more command-line arguments, where > each > +# argument is a comma-separated list of properties of a node that wishes to > join > +# the cluster. This script/executable should add the necessary IMM objects > for > +# the new node(s). See the README file of the PLM service for more > information. > +# export PLMS_CLUSTERAUTO_SCALE=/usr/bin/plm_scale_out > diff --git a/osaf/services/saf/plmsv/plms/Makefile.am > b/osaf/services/saf/plmsv/plms/Makefile.am > --- a/osaf/services/saf/plmsv/plms/Makefile.am > +++ b/osaf/services/saf/plmsv/plms/Makefile.am > @@ -48,6 +48,7 @@ osafplmd_SOURCES = \ > plms_proc.c \ > plms_stdby.c \ > plms_dbg_utils.c \ > + plms_scale.cc \ > plms_virt.cc > > osafplmd_LDADD = \ > diff --git a/osaf/services/saf/plmsv/plms/plms_imm.c > b/osaf/services/saf/plmsv/plms/plms_imm.c > --- a/osaf/services/saf/plmsv/plms/plms_imm.c > +++ b/osaf/services/saf/plmsv/plms/plms_imm.c > @@ -764,9 +764,8 @@ static SaAisErrorT plms_imm_ccb_obj_crea > for (j=0; attr[j] != NULL; j++) { > if ( ((attr[j])->attrValues == NULL ) || > (*((attr[j])->attrValues) > == NULL) ) { > /* Log the error */ > - TRACE_LEAVE2("attr_val is null for attr_name %s", > - (*attr)->attrName); > - return SA_AIS_ERR_BAD_OPERATION; > + TRACE("attr_val is null for attr_name %s", attr[j]- > >attrName); > + continue; > } > if (strcmp((attr[j])->attrName, "saPlmHEBaseHEType")== 0) { > rc = validate_he_base_type_attr_val(ccb_id, attr[j]); > diff --git a/osaf/services/saf/plmsv/plms/plms_plmc.c > b/osaf/services/saf/plmsv/plms/plms_plmc.c > --- a/osaf/services/saf/plmsv/plms/plms_plmc.c > +++ b/osaf/services/saf/plmsv/plms/plms_plmc.c > @@ -1485,21 +1485,30 @@ SaUint32T plms_plmc_mbx_evt_process(PLMS > { > SaUint32T ret_err = NCSCC_RC_FAILURE; > PLMS_ENTITY *ent = NULL; > - SaUint8T tmp[SA_MAX_NAME_LENGTH+1]; > PLMS_CB *cb = plms_cb; > > if (evt->req_evt.plms_plmc_evt.ee_id.length){ > ent = (PLMS_ENTITY *)ncs_patricia_tree_get(&(cb- > >entity_info), > (SaUint8T *)&(evt->req_evt.plms_plmc_evt.ee_id)); > if (NULL == ent){ > - memcpy(tmp,evt- > >req_evt.plms_plmc_evt.ee_id.value, > - evt->req_evt.plms_plmc_evt.ee_id.length); > - tmp[evt->req_evt.plms_plmc_evt.ee_id.length] = > '\0'; > + /* don't know about this EE; notify for possible scale > event */ > + ret_err = plms_scale(&evt- > >req_evt.plms_plmc_evt); > > - LOG_ER (" Entity not found for PLMC event. ee_id: > %s \ > - ,evt_type: %d", > - tmp,evt->req_evt.plms_plmc_evt.plmc_evt_type); > - return ret_err; > + if (ret_err == NCSCC_RC_SUCCESS) { > + if (evt->req_evt.plms_plmc_evt.plmc_evt_type == > PLMS_PLMC_EE_TCP_CONCTED) > + { > + TRACE("sending plmcd restart to %s for scaling", > + evt->req_evt.plms_plmc_evt.ee_id.value); > + > + ret_err = plmc_plmcd_restart((char *)evt- > >req_evt.plms_plmc_evt.ee_id.value, > + plms_plmc_tcp_cbk); > + } > + } else { > + LOG_IN("scaling not enabled: ignoring unknown EE: %s", > + evt->req_evt.plms_plmc_evt.ee_id.value); > + } > + > + return ret_err; > } > }else { > LOG_ER("evt->req_evt.plms_plmc_evt.ee_id.length is > ZERO"); > diff --git a/osaf/services/saf/plmsv/plms/plms_scale.cc > b/osaf/services/saf/plmsv/plms/plms_scale.cc > new file mode 100644 > --- /dev/null > +++ b/osaf/services/saf/plmsv/plms/plms_scale.cc > @@ -0,0 +1,199 @@ > +/* -*- OpenSAF -*- > + * > + * (C) Copyright 2016 The OpenSAF Foundation > + * > + * This program is distributed in the hope that it will be useful, but > + * WITHOUT ANY WARRANTY; without even the implied warranty of > MERCHANTABILITY > + * or FITNESS FOR A PARTICULAR PURPOSE. This file and program are > licensed > + * under the GNU Lesser General Public License Version 2.1, February 1999. > + * The complete license can be accessed from the following location: > + * http://opensource.org/licenses/lgpl-license.php > + * See the Copying file included with the OpenSAF distribution for full > + * licensing terms. > + * > + * Author(s): Genband > + * > + */ > + > +/********************************************************* > ******************** > + * FILE NAME: plms_scale.cc > + * > + * DESCRIPTION: C++ implementation of EE scaling > + * > + > ********************************************************** > ******************/ > + > +#include "osaf/libs/common/plmsv/include/plms_scale.h" > +#include "osaf/libs/common/plmsv/include/plms.h" > + > +extern "C" SaUint32T > +plms_scale(const PLMS_PLMC_EVT *evt) { > + TRACE_ENTER(); > + > + SaUint32T rc(NCSCC_RC_SUCCESS); > + > + static bool init(false); > + static bool scalingEnabled(false); > + static PlmsScaleThread *scaleThread(0); > + > + if (!init) { > + char *scale_out_exe(getenv("PLMS_CLUSTERAUTO_SCALE")); > + > + if (scale_out_exe) { > + scalingEnabled = true; > + scaleThread = new PlmsScaleThread(scale_out_exe); > + } > + > + init = true; > + } > + > + if (scalingEnabled) { > + if (evt->plmc_evt_type == PLMS_PLMC_EE_INSTING || > + evt->plmc_evt_type == PLMS_PLMC_EE_INSTED || > + evt->plmc_evt_type == PLMS_PLMC_EE_TCP_CONCTED) { > + // need to put the request on a thread so PLMs can service any IMM > + // requests > + PlmsScaleThread::Node node; > + node.eeName = std::string( > + reinterpret_cast<const char *>(evt->ee_id.value), evt->ee_id.length); > + > + TRACE("adding node %s to thread", node.eeName.c_str()); > + > + std::thread addNodeThread(&PlmsScaleThread::add, scaleThread, > node); > + > + addNodeThread.detach(); > + } > + } else { > + TRACE("scaling not enabled"); > + rc = NCSCC_RC_FAILURE; > + } > + > + TRACE_LEAVE(); > + > + return rc; > +} > + > +PlmsScaleThread::PlmsScaleThread(const std::string& outFile) > + : std::thread(_main, this), > + scaleOutFile(outFile) { > + TRACE_ENTER(); > + TRACE_LEAVE(); > +} > + > +PlmsScaleThread::~PlmsScaleThread(void) { > + TRACE_ENTER(); > + TRACE_LEAVE(); > +} > + > +void > +PlmsScaleThread::main(void) { > + TRACE_ENTER2("Scale thread starting"); > + > + while (true) { > + std::unique_lock<std::mutex> lk(nodeListMutex); > + > + while (nodeList.empty()) > + nodeListCv.wait(lk); > + > + int argc(1); > + char **argv(new char *[nodeList.size() + argc + 1]); > + argv[0] = new char[scaleOutFile.length() + 1]; > + snprintf(argv[0], scaleOutFile.length() + 1, "%s", scaleOutFile.c_str()); > + > + // call the scale script with the EEs that are trying to come in > + for (NodeList::iterator it(nodeList.begin()); > + it != nodeList.end(); > + ++it) { > + argv[argc] = new char[it->eeName.length() + 1]; > + snprintf(argv[argc++], it->eeName.length() + 1, "%s", it- > >eeName.c_str()); > + } > + > + // null terminate argv > + argv[nodeList.size() + 1] = 0; > + > + executeScaleScript(argc, argv); > + > + for (int i(argc); i >= 0; i--) > + delete[] argv[i]; > + > + delete[] argv; > + > + nodeList.clear(); > + } > + > + TRACE_LEAVE2("Scale thread exiting"); > +} > + > +void > +PlmsScaleThread::executeScaleScript(int argc, char **argv) { > + TRACE_ENTER(); > + > + LOG_IN("executing scale script for %i nodes", argc - 1); > + > + pid_t pid(fork()); > + > + if (pid > 0) { > + // parent > + int status; > + pid_t wait_pid; > + > + do { > + wait_pid = waitpid(pid, &status, 0); > + } while (wait_pid == -1 && errno == EINTR); > + > + if (wait_pid != -1) { > + if (!WIFEXITED(status)) { > + LOG_ER("Scale out script %s terminated abnormally", argv[0]); > + } else if (WEXITSTATUS(status) != 0) { > + if (WEXITSTATUS(status) == 123) { > + LOG_ER("Scale out script %s could not be executed", argv[0]); > + } else { > + LOG_ER("Scale out script %s failed with exit code %d", > + argv[0], > + WEXITSTATUS(status)); > + } > + } else { > + LOG_IN("Scale out script %s exited successfully", argv[0]); > + } > + } else { > + LOG_ER("Scale out script %s failed in waitpid(%i): %s", > + argv[0], > + wait_pid, > + strerror(errno)); > + } > + } else if (pid == 0) { > + // child > + static char scaleOutPathEnv[] = "PATH=/usr/local/sbin:/usr/local/bin:" > + "/usr/sbin:/usr/bin:/sbin:/bin"; > + > + char *env[] = { scaleOutPathEnv, 0 }; > + > + const int nofile(1024); > + > + for (int fd(3); fd < nofile; ++fd) close(fd); > + > + if (execve(argv[0], argv, env) < 0) > + LOG_ER("error executing plms_scale_out script: %s: %i", > + scaleOutFile.c_str(), > + errno); > + _Exit(123); > + } else { > + LOG_ER("unable to fork plms_scale_out script: %s: %i", > + scaleOutFile.c_str(), > + errno); > + } > + > + TRACE_LEAVE(); > +} > + > +void > +PlmsScaleThread::add(const Node node) { > + TRACE_ENTER(); > + > + std::lock_guard<std::mutex> guard(nodeListMutex); > + > + nodeList.insert(node); > + > + nodeListCv.notify_one(); > + > + TRACE_LEAVE(); > +} > ------------------------------------------------------------------------------ Check out the vibrant tech community on one of the world's most engaging tech sites, SlashDot.org! http://sdm.link/slashdot _______________________________________________ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel