Repository: incubator-trafodion Updated Branches: refs/heads/master c5d39273f -> a89a6585a
[TRAFODION-2420] RMS Enhancements Added yet another offender feature to list query ids that has a total IO time for any storage engine opertor consuming longer than a given number of seconds. ./offender -s se_offender Will list the query ids along with the table name. SEE $TRAF_HOME/export/limited-support-tools/LSO/README The "Number of SQL Processes" counter is now made multi-fragment aware and hence contains the actual number of ESPs used + 1 for master process. Project: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/commit/45ff9540 Tree: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/tree/45ff9540 Diff: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/diff/45ff9540 Branch: refs/heads/master Commit: 45ff9540eeeacb46e4e65e2b6a57b01322223760 Parents: 4261a9d Author: selvaganesang <selva.govindara...@esgyn.com> Authored: Thu Jan 19 01:58:56 2017 +0000 Committer: selvaganesang <selva.govindara...@esgyn.com> Committed: Thu Jan 19 01:58:56 2017 +0000 ---------------------------------------------------------------------- .../sqf/export/limited-support-tools/LSO/README | 20 ++++++- .../export/limited-support-tools/LSO/offender | 5 ++ core/sql/cli/Context.cpp | 7 --- core/sql/executor/ExStats.cpp | 59 ++++++++++++++------ core/sql/executor/ExStats.h | 5 +- core/sql/executor/ex_frag_rt.cpp | 16 +++++- core/sql/executor/ex_root.cpp | 1 - core/sql/runtimestats/ssmpipc.cpp | 1 - 8 files changed, 83 insertions(+), 31 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/45ff9540/core/sqf/export/limited-support-tools/LSO/README ---------------------------------------------------------------------- diff --git a/core/sqf/export/limited-support-tools/LSO/README b/core/sqf/export/limited-support-tools/LSO/README index 36f8b85..098ea1e 100644 --- a/core/sqf/export/limited-support-tools/LSO/README +++ b/core/sqf/export/limited-support-tools/LSO/README @@ -15,7 +15,7 @@ consumed memory resources exceeding over a certain threshold. These are: a) MEM_OFFENDER b) WM_MEM_OFFENDER -In addition, we have 7 types of filtering mechanism based on query execution +In addition, we have several types of filtering mechanism based on query execution time and/or state that can help the DBA and/or support personnel in analyzing the live state of the Trafodion instance for problems. These are: @@ -26,7 +26,7 @@ d) INACTIVE_QUERIES e) DEAD_QUERIES f) UNMONITORED_QUERIES g) SE_BLOCKED_QUERIES - +h) SE_OFFENDER_QUERIES All dynamic SQL queries including child queries can be monitored using this mechanism, because it uses direct access to the RMS infrastructure. RMS @@ -278,6 +278,22 @@ CURRENT_TIMESTAMP NO_OF_PROCESSES BLOCKED_FOR_SECS QUERY_ID 2016-12-28 10:29:43.941455 1 83 MXID11000030514212349680799580002000000000206U3333300_18_S1 TRAFODION.SCH.T022 +SE_OFFENDING_QUERIES +==================== + +This statement lists queries which has a total IO time of any operator +accessing the storage engine longer than the given number of seconds + +The SQL commands to list SE_BLOCKED queries are available at: +$TRAF_HOME/export/limited-support-tools/LSO/se_offender.sql + +CURRENT_TIMESTAMP TOTAL_IO_TIME_IN_SECS PROCESS_ID QUERY_ID TABLE_NAME +-------------------------- --------------------- ------------ ----------------------------------------------------------------------------------- ---------------------------------------------------------------------------------------------------------------- + +2017-01-18 14:20:03.604532 1 000, 06390 MXID11000026007212351477539057002000000000206U3333300_107___SQLCI_DML_LAST__ TRAFODION.SELVA.CUSTOMER +2017-01-18 14:20:03.604532 1 001, 06391 MXID11000026007212351477539057002000000000206U3333300_107___SQLCI_DML_LAST__ TRAFODION.SELVA.CUSTOMER + + LIMITING THE OUTPUT TO A NODE ============================= http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/45ff9540/core/sqf/export/limited-support-tools/LSO/offender ---------------------------------------------------------------------- diff --git a/core/sqf/export/limited-support-tools/LSO/offender b/core/sqf/export/limited-support-tools/LSO/offender index 826eed0..ecd9e45 100755 --- a/core/sqf/export/limited-support-tools/LSO/offender +++ b/core/sqf/export/limited-support-tools/LSO/offender @@ -44,6 +44,7 @@ function usage() wm_mem_offender active se_blocked + se_offender inactive in_sql in_client @@ -194,6 +195,10 @@ case $SCRIPT in PREAMBLE="set param ?filter 'SE_BLOCKED=$TIME$NODE'; " QUERY_FILE="$LSO_DIR/se_blocked.sql" ;; + se_offender) + PREAMBLE="set param ?filter 'SE_OFFENDER=$TIME$NODE'; " + QUERY_FILE="$LSO_DIR/se_offender.sql" + ;; cpu_offender) PREAMBLE="set param ?filter 'CPU_OFFENDER=$OFFENDER_NODE'; " QUERY_FILE="$LSO_DIR/cpu_offender.sql" http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/45ff9540/core/sql/cli/Context.cpp ---------------------------------------------------------------------- diff --git a/core/sql/cli/Context.cpp b/core/sql/cli/Context.cpp index 0aaa275..efde0a3 100644 --- a/core/sql/cli/Context.cpp +++ b/core/sql/cli/Context.cpp @@ -4487,13 +4487,6 @@ ExStatisticsArea *ContextCli::getMergedStats( setDeleteStats(TRUE); } } - - if (stats->getMasterStats() != NULL) - { - stats->getMasterStats()->setNumSqlProcs((short)(stats->getMasterStats()->numOfTotalEspsUsed()+1)); -// see ExRtFragTable::countSQLNodes in ex_frag_rt.cpp. The compiler's dop -// counts cores. We want nodes here. - } return stats; } else http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/45ff9540/core/sql/executor/ExStats.cpp ---------------------------------------------------------------------- diff --git a/core/sql/executor/ExStats.cpp b/core/sql/executor/ExStats.cpp index 198147d..03e41e2 100644 --- a/core/sql/executor/ExStats.cpp +++ b/core/sql/executor/ExStats.cpp @@ -2357,11 +2357,19 @@ Int64 ExHdfsScanStats::getNumVal(Int32 i) const NABoolean ExHdfsScanStats::filterForSEstats(struct timespec currTimespec, Lng32 filter) { - blockTime_ = timer_.filterForSEstats(currTimespec); - if (blockTime_ >= filter) - return TRUE; + Int64 sumIOTime; + + if (filter > 0) { + blockTime_ = timer_.filterForSEstats(currTimespec); + if (blockTime_ >= filter) + return TRUE; + } else - return FALSE; + if (queryId_ != NULL && (sumIOTime = timer_.getTime()) > 0 && (sumIOTime = sumIOTime /(1000000LL)) >= -filter) { + blockTime_ = sumIOTime; + return TRUE; + } + return FALSE; } void ExHdfsScanStats::getVariableStatsInfo(char * dataBuffer, @@ -2741,13 +2749,21 @@ Int64 ExHbaseAccessStats::getNumVal(Int32 i) const NABoolean ExHbaseAccessStats::filterForSEstats(struct timespec currTimespec, Lng32 filter) { - blockTime_ = timer_.filterForSEstats(currTimespec); - if (blockTime_ >= filter) - return TRUE; + Int64 sumIOTime; + if (filter > 0) { + blockTime_ = timer_.filterForSEstats(currTimespec); + if (blockTime_ >= filter) + return TRUE; + } else - return FALSE; + if (queryId_ != NULL && (sumIOTime = timer_.getTime()) > 0 && (sumIOTime = sumIOTime /(1000000LL)) >= -filter) { + blockTime_ = sumIOTime; + return TRUE; + } + return FALSE; } + void ExHbaseAccessStats::getVariableStatsInfo(char * dataBuffer, char * dataLen, Lng32 maxLen) @@ -7898,7 +7914,6 @@ short ExStatsTcb::work() } if (stats_->getMasterStats() != NULL) { - stats_->getMasterStats()->setNumSqlProcs((short)(stats_->getMasterStats()->numOfTotalEspsUsed()+1)); stats_->getMasterStats()->setNumCpus((short)stats_->getMasterStats()->compilerStatsInfo().dop()); } pstate->step_ = GET_MASTER_STATS_ENTRY_; @@ -8684,7 +8699,7 @@ void ExMasterStats::init() rowsAffected_ = -1; rowsReturned_ = 0; sqlErrorCode_ = 0; - numOfTotalEspsUsed_ = -1; + numOfTotalEspsUsed_ = 0; numOfNewEspsStarted_ = -1; numOfRootEsps_ = -1; exePriority_ = -1; @@ -8701,7 +8716,6 @@ void ExMasterStats::init() stmtState_ = 0; #endif numCpus_ = 0; - numSqlProcs_ = 0; masterFlags_ = 0; parentQid_ = NULL; parentQidLen_ = 0; @@ -8750,11 +8764,10 @@ void ExMasterStats::initBeforeExecute(Int64 currentTimeStamp) rowsAffected_ = -1; rowsReturned_ = 0; sqlErrorCode_ = 0; - numOfTotalEspsUsed_ = -1; + numOfTotalEspsUsed_ = 0; numOfNewEspsStarted_ = -1; numOfRootEsps_ = -1; numCpus_ = 0; - numSqlProcs_ = 0; transId_ = -1; childQid_ = NULL; childQidLen_ = 0; @@ -9060,7 +9073,7 @@ void ExMasterStats::getVariableStatsInfo(char * dataBuffer, ((childQid_ != NULL) ? childQid_ : "NONE"), rowsReturned_, firstRowReturnTime_, - numSqlProcs_, + getNumSqlProcs(), numCpus_, exePriority_, transId_, @@ -9114,7 +9127,7 @@ void ExMasterStats::getVariableStatsInfo(char * dataBuffer, ((childQid_ != NULL) ? childQid_ : "NONE"), rowsReturned_, firstRowReturnTime_, - numSqlProcs_, + getNumSqlProcs(), numCpus_, exePriority_, transId_, @@ -9449,7 +9462,7 @@ Lng32 ExMasterStats::getStatsItem(SQLSTATS_ITEM* sqlStats_item) sqlStats_item->int64_value = originalSqlTextLen_; break; case SQLSTATS_NUM_SQLPROCS: - sqlStats_item->int64_value = numSqlProcs_; + sqlStats_item->int64_value = getNumSqlProcs(); break; case SQLSTATS_NUM_CPUS: sqlStats_item->int64_value = numCpus_; @@ -9816,6 +9829,7 @@ Lng32 ExStatsTcb::str_parse_stmt_name(char *string, Lng32 len, char *nodeName, char *detailTemp = NULL; char *tdbIdDetailTemp = NULL; char *seTemp = NULL; + char *seOffendTemp = NULL; char *memThreshold = NULL; short retcode = SQLCLI_STATS_REQ_NONE; Int64 tempNum; @@ -9926,6 +9940,13 @@ Lng32 ExStatsTcb::str_parse_stmt_name(char *string, Lng32 len, char *nodeName, diskOffender = TRUE; } else + if (strncasecmp(ptr, "SE_OFFENDER", 10) == 0) + { + ptr = str_tok(NULL, ',', &internal); + seOffendTemp = ptr; + diskOffender = TRUE; + } + else if (strncasecmp(ptr, "QUERIES_IN_SQL", 14) == 0) { ptr = str_tok(NULL, ',', &internal); @@ -10110,6 +10131,12 @@ Lng32 ExStatsTcb::str_parse_stmt_name(char *string, Lng32 len, char *nodeName, *filter = (Lng32)tempNum; retcode = SQLCLI_STATS_REQ_SE_OFFENDER; } + if (seOffendTemp != NULL) + { + tempNum = atoi(seOffendTemp); + *filter = (Lng32)-tempNum; + retcode = SQLCLI_STATS_REQ_SE_OFFENDER; + } if (pidTemp != NULL) { if (strncasecmp(pidTemp, "CURRENT", 7) == 0) http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/45ff9540/core/sql/executor/ExStats.h ---------------------------------------------------------------------- diff --git a/core/sql/executor/ExStats.h b/core/sql/executor/ExStats.h index f461af0..edc3f1e 100644 --- a/core/sql/executor/ExStats.h +++ b/core/sql/executor/ExStats.h @@ -3791,9 +3791,9 @@ NA_EIDPROC short &cmpPriority() {return cmpPriority_;} short &dp2Priority() {return dp2Priority_;} short &fixupPriority() {return fixupPriority_;} - inline void setNumSqlProcs(short i) {numSqlProcs_ = i; } + void incNumEspsInUse() { numOfTotalEspsUsed_++; } inline void setNumCpus(short i) {numCpus_ = i; } - inline short getNumSqlProcs() { return numSqlProcs_; } + inline short getNumSqlProcs() { return numOfTotalEspsUsed_+1; } inline short getNumCpus() { return numCpus_; } inline void setAqrLastErrorCode(Lng32 ec) {aqrLastErrorCode_ = ec;} @@ -3960,7 +3960,6 @@ private: short stmtState_; UInt16 masterFlags_; - short numSqlProcs_; short numCpus_; QueryCostInfo queryCostInfo_; http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/45ff9540/core/sql/executor/ex_frag_rt.cpp ---------------------------------------------------------------------- diff --git a/core/sql/executor/ex_frag_rt.cpp b/core/sql/executor/ex_frag_rt.cpp index 0c59f35..45c1a29 100644 --- a/core/sql/executor/ex_frag_rt.cpp +++ b/core/sql/executor/ex_frag_rt.cpp @@ -2851,6 +2851,11 @@ ExEspDbEntry *ExEspManager::shareEsp( char *ptrToClusterName = (char *)clusterName; NAList<ExEspDbEntry *> *espList = NULL; ExProcessStats *processStats = GetCliGlobals()->getExProcessStats(); + ExMasterStats *masterStats = NULL; + StmtStats *ss = statement->getStmtStats(); + if (ss != NULL) + masterStats = ss->getMasterStats(); + if (espList == NULL) if (*creatingEsp == NULL) // Nowaited Creation of an ESP is not in progress { nowaitDepth = env_->getCCMaxWaitDepthLow(); @@ -3009,6 +3014,8 @@ ExEspDbEntry *ExEspManager::shareEsp( processStats->incStartupCompletedEsps(); processStats->incNumESPsInUse(FALSE); } + if (masterStats != NULL) + masterStats->incNumEspsInUse(); if (espTraceArea_ != NULL) // ESP state tracing { // any esp picked up here must be created new addToTrace(result, CREATED_USE); @@ -3042,6 +3049,10 @@ ExEspDbEntry *ExEspManager::getEspFromCache(LIST(ExEspDbEntry *) &alreadyAssigne ExEspCacheKey tempKey(clusterName, cpuNum, user_id); NAList<ExEspDbEntry *> *espList = espCache_->getFirstValue(&tempKey); ExProcessStats *processStats = GetCliGlobals()->getExProcessStats(); + ExMasterStats *masterStats = NULL; + StmtStats *ss = statement->getStmtStats(); + if (ss != NULL) + masterStats = ss->getMasterStats(); if (espList == NULL) { // no esp pool found in esp cache for the given segment-cpu-user. @@ -3198,8 +3209,11 @@ ExEspDbEntry *ExEspManager::getEspFromCache(LIST(ExEspDbEntry *) &alreadyAssigne e->totalMemoryQuota_ += 100 + memoryQuota; // If the ESP is already assigned to query // don't increment InUse counter again - if (processStats && ! e->inUse_) + if (processStats && ! e->inUse_) { processStats->incNumESPsInUse(TRUE); + if (masterStats != NULL) + masterStats->incNumEspsInUse(); + } e->inUse_ = true; e->soloFragment_ = soloFragment; result = e; http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/45ff9540/core/sql/executor/ex_root.cpp ---------------------------------------------------------------------- diff --git a/core/sql/executor/ex_root.cpp b/core/sql/executor/ex_root.cpp index 31d43f5..046907f 100644 --- a/core/sql/executor/ex_root.cpp +++ b/core/sql/executor/ex_root.cpp @@ -282,7 +282,6 @@ ex_tcb * ex_root_tdb::build(CliGlobals *cliGlobals, ex_globals * glob) NULL); if (masterStats) { - masterStats->numOfTotalEspsUsed() = numOfTotalEspsUsed; masterStats->numOfNewEspsStarted() = numOfNewEspsStarted; masterStats->setNumCpus(rtFragTable-> countSQLNodes(cliGlobals->myCpu())); http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/45ff9540/core/sql/runtimestats/ssmpipc.cpp ---------------------------------------------------------------------- diff --git a/core/sql/runtimestats/ssmpipc.cpp b/core/sql/runtimestats/ssmpipc.cpp index 008a8e4..1d7b0e5 100755 --- a/core/sql/runtimestats/ssmpipc.cpp +++ b/core/sql/runtimestats/ssmpipc.cpp @@ -2436,7 +2436,6 @@ void SscpClientMsgStream::sendMergedStats() masterStats = mergedStats_->getMasterStats(); if (masterStats != NULL) { - masterStats->setNumSqlProcs(getNumSqlProcs()); masterStats->setNumCpus(getNumCpus()); } }