Github user pgandhi999 commented on a diff in the pull request: https://github.com/apache/spark/pull/21688#discussion_r220255145 --- Diff: core/src/main/resources/org/apache/spark/ui/static/stagepage.js --- @@ -0,0 +1,926 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +$(document).ajaxStop($.unblockUI); +$(document).ajaxStart(function () { + $.blockUI({message: '<h3>Loading Stage Page...</h3>'}); +}); + +$.extend( $.fn.dataTable.ext.type.order, { + "file-size-pre": ConvertDurationString, + + "file-size-asc": function ( a, b ) { + a = ConvertDurationString( a ); + b = ConvertDurationString( b ); + return ((a < b) ? -1 : ((a > b) ? 1 : 0)); + }, + + "file-size-desc": function ( a, b ) { + a = ConvertDurationString( a ); + b = ConvertDurationString( b ); + return ((a < b) ? 1 : ((a > b) ? -1 : 0)); + } +} ); + +// This function will only parse the URL under certain format +// e.g. https://domain:50509/history/application_1502220952225_59143/stages/stage/?id=0&attempt=0 +function stageEndPoint(appId) { + var words = document.baseURI.split('/'); + var words2 = document.baseURI.split('?'); + var ind = words.indexOf("proxy"); + if (ind > 0) { + var appId = words[ind + 1]; + var stageIdLen = words2[1].indexOf('&'); + var stageId = words2[1].substr(3, stageIdLen - 3); + var newBaseURI = words.slice(0, ind + 2).join('/'); + return newBaseURI + "/api/v1/applications/" + appId + "/stages/" + stageId; + } + ind = words.indexOf("history"); + if (ind > 0) { + var appId = words[ind + 1]; + var attemptId = words[ind + 2]; + var stageIdLen = words2[1].indexOf('&'); + var stageId = words2[1].substr(3, stageIdLen - 3); + var newBaseURI = words.slice(0, ind).join('/'); + if (isNaN(attemptId) || attemptId == "0") { + return newBaseURI + "/api/v1/applications/" + appId + "/stages/" + stageId; + } else { + return newBaseURI + "/api/v1/applications/" + appId + "/" + attemptId + "/stages/" + stageId; + } + } + var stageIdLen = words2[1].indexOf('&'); + var stageId = words2[1].substr(3, stageIdLen - 3); + return location.origin + "/api/v1/applications/" + appId + "/stages/" + stageId; +} + +function getColumnNameForTaskMetricSummary(columnKey) { + switch(columnKey) { + case "executorRunTime": + return "Duration"; + break; + + case "jvmGcTime": + return "GC Time"; + break; + + case "gettingResultTime": + return "Getting Result Time"; + break; + + case "inputMetrics": + return "Input Size / Records"; + break; + + case "outputMetrics": + return "Output Size / Records"; + break; + + case "peakExecutionMemory": + return "Peak Execution Memory"; + break; + + case "resultSerializationTime": + return "Result Serialization Time"; + break; + + case "schedulerDelay": + return "Scheduler Delay"; + break; + + case "diskBytesSpilled": + return "Shuffle spill (disk)"; + break; + + case "memoryBytesSpilled": + return "Shuffle spill (memory)"; + break; + + case "shuffleReadMetrics": + return "Shuffle Read Size / Records"; + break; + + case "shuffleWriteMetrics": + return "Shuffle Write Size / Records"; + break; + + case "executorDeserializeTime": + return "Task Deserialization Time"; + break; + + default: + return "NA"; + } +} + +$(document).ready(function () { + setDataTableDefaults(); + + $("#showAdditionalMetrics").append( + "<div><a id='taskMetric'>" + + "<span class='expand-input-rate-arrow arrow-closed' id='arrowtoggle1'></span>" + + " Show Additional Metrics" + + "</a></div>" + + "<div class='container-fluid container-fluid-div' id='toggle-metrics' hidden>" + + "<div><input type='checkbox' class='toggle-vis' id='box-0' data-column='0'> Select All</div>" + + "<div id='scheduler_delay'><input type='checkbox' class='toggle-vis' id='box-11' data-column='11'> Scheduler Delay</div>" + + "<div id='task_deserialization_time'><input type='checkbox' class='toggle-vis' id='box-12' data-column='12'> Task Deserialization Time</div>" + + "<div id='shuffle_read_blocked_time'><input type='checkbox' class='toggle-vis' id='box-13' data-column='13'> Shuffle Read Blocked Time</div>" + + "<div id='shuffle_remote_reads'><input type='checkbox' class='toggle-vis' id='box-14' data-column='14'> Shuffle Remote Reads</div>" + + "<div id='result_serialization_time'><input type='checkbox' class='toggle-vis' id='box-15' data-column='15'> Result Serialization Time</div>" + + "<div id='getting_result_time'><input type='checkbox' class='toggle-vis' id='box-16' data-column='16'> Getting Result Time</div>" + + "<div id='peak_execution_memory'><input type='checkbox' class='toggle-vis' id='box-17' data-column='17'> Peak Execution Memory</div>" + + "</div>"); + + $('#scheduler_delay').attr("data-toggle", "tooltip") + .attr("data-placement", "bottom") + .attr("title", "Scheduler delay includes time to ship the task from the scheduler to the executor, and time to send " + + "the task result from the executor to the scheduler. If scheduler delay is large, consider decreasing the size of tasks or decreasing the size of task results."); + $('#task_deserialization_time').attr("data-toggle", "tooltip") + .attr("data-placement", "bottom") + .attr("title", "Time spent deserializing the task closure on the executor, including the time to read the broadcasted task."); + $('#shuffle_read_blocked_time').attr("data-toggle", "tooltip") + .attr("data-placement", "bottom") + .attr("title", "Time that the task spent blocked waiting for shuffle data to be read from remote machines."); + $('#shuffle_remote_reads').attr("data-toggle", "tooltip") + .attr("data-placement", "bottom") + .attr("title", "Total shuffle bytes read from remote executors. This is a subset of the shuffle read bytes; the remaining shuffle data is read locally. "); + $('#result_serialization_time').attr("data-toggle", "tooltip") + .attr("data-placement", "bottom") + .attr("title", "Time spent serializing the task result on the executor before sending it back to the driver."); + $('#getting_result_time').attr("data-toggle", "tooltip") + .attr("data-placement", "bottom") + .attr("title", "Time that the driver spends fetching task results from workers. If this is large, consider decreasing the amount of data returned from each task."); + $('#peak_execution_memory').attr("data-toggle", "tooltip") + .attr("data-placement", "bottom") + .attr("title", "Execution memory refers to the memory used by internal data structures created during " + + "shuffles, aggregations and joins when Tungsten is enabled. The value of this accumulator " + + "should be approximately the sum of the peak sizes across all such data structures created " + + "in this task. For SQL jobs, this only tracks all unsafe operators, broadcast joins, and " + + "external sort."); + $('#scheduler_delay').tooltip(true); + $('#task_deserialization_time').tooltip(true); + $('#shuffle_read_blocked_time').tooltip(true); + $('#shuffle_remote_reads').tooltip(true); + $('#result_serialization_time').tooltip(true); + $('#getting_result_time').tooltip(true); + $('#peak_execution_memory').tooltip(true); + tasksSummary = $("#active-tasks"); + getStandAloneAppId(function (appId) { + + var endPoint = stageEndPoint(appId); + $.getJSON(endPoint, function(response, status, jqXHR) { + + var responseBody = response[0]; + // prepare data for task aggregated metrics table + indices = Object.keys(responseBody.executorSummary); + var task_summary_table = []; + indices.forEach(function (ix) { + responseBody.executorSummary[ix].id = ix; + task_summary_table.push(responseBody.executorSummary[ix]); + }); + + // prepare data for accumulatorUpdates + var indices = Object.keys(responseBody.accumulatorUpdates); + var accumulator_table_all = []; + var accumulator_table = []; + indices.forEach(function (ix) { + accumulator_table_all.push(responseBody.accumulatorUpdates[ix]); + }); + + accumulator_table_all.forEach(function (x){ + var name = (x.name).toString(); + if(name.includes("internal.") == false){ + accumulator_table.push(x); + } + }); + + // rendering the UI page + var data = {"executors": response}; + $.get(createTemplateURI(appId, "stagespage"), function(template) { + tasksSummary.append(Mustache.render($(template).filter("#stages-summary-template").html(), data)); + + $("#taskMetric").click(function(){ + $("#arrowtoggle1").toggleClass("arrow-open arrow-closed"); + $("#toggle-metrics").toggle(); + }); + + $("#aggregatedMetrics").click(function(){ + $("#arrowtoggle2").toggleClass("arrow-open arrow-closed"); + $("#toggle-aggregatedMetrics").toggle(); + }); + + var task_metrics_table = []; + var stageAttemptId = getStageAttemptId(); + var quantiles = "0,0.25,0.5,0.75,1.0"; + $.getJSON(stageEndPoint(appId) + "/"+stageAttemptId+"/taskSummary?quantiles="+quantiles, function(taskMetricsResponse, status, jqXHR) { + var taskMetricIndices = Object.keys(taskMetricsResponse); + taskMetricIndices.forEach(function (ix) { + var columnName = getColumnNameForTaskMetricSummary(ix); + if (columnName == "Shuffle Read Size / Records") { + var row1 = { + "metric": columnName, + "data": taskMetricsResponse[ix] + }; + var row2 = { + "metric": "Shuffle Read Blocked Time", + "data": taskMetricsResponse[ix] + }; + var row3 = { + "metric": "Shuffle Remote Reads", + "data": taskMetricsResponse[ix] + }; + task_metrics_table.push(row1); + task_metrics_table.push(row2); + task_metrics_table.push(row3); + } + else if (columnName != "NA") { + var row = { + "metric": columnName, + "data": taskMetricsResponse[ix] + }; + task_metrics_table.push(row); + } + }); + + var taskMetricsTable = "#summary-metrics-table"; + var task_conf = { + "data": task_metrics_table, + "columns": [ + {data : 'metric'}, + { + data: function (row, type) { + switch(row.metric) { + case 'Input Size / Records': --- End diff -- Discussed in previous comment
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org