[GitHub] [airflow] ashb commented on a change in pull request #7492: [AIRFLOW-6871] optimize tree view for large DAGs

2020-02-27 Thread GitBox
ashb commented on a change in pull request #7492: [AIRFLOW-6871] optimize tree 
view for large DAGs
URL: https://github.com/apache/airflow/pull/7492#discussion_r385121966
 
 

 ##
 File path: airflow/www/views.py
 ##
 @@ -1374,90 +1376,115 @@ def tree(self):
 .all()
 )
 dag_runs = {
-dr.execution_date: alchemy_to_dict(dr) for dr in dag_runs}
+dr.execution_date: alchemy_to_dict(dr) for dr in dag_runs
+}
 
 dates = sorted(list(dag_runs.keys()))
 max_date = max(dates) if dates else None
 min_date = min(dates) if dates else None
 
 tis = dag.get_task_instances(start_date=min_date, end_date=base_date)
-task_instances = {}
+task_instances: Dict[Tuple[str, datetime], models.TaskInstance] = {}
 for ti in tis:
-tid = alchemy_to_dict(ti)
-dr = dag_runs.get(ti.execution_date)
-tid['external_trigger'] = dr['external_trigger'] if dr else False
-task_instances[(ti.task_id, ti.execution_date)] = tid
+task_instances[(ti.task_id, ti.execution_date)] = ti
 
-expanded = []
+expanded = set()
 # The default recursion traces every path so that tree view has full
 # expand/collapse functionality. After 5,000 nodes we stop and fall
 # back on a quick DFS search for performance. See PR #320.
-node_count = [0]
+node_count = 0
 node_limit = 5000 / max(1, len(dag.leaves))
 
+def encode_ti(ti: Optional[models.TaskInstance]) -> Optional[List]:
+if not ti:
+return None
+
+# NOTE: order of entry is important here because client JS relies 
on it for
+# tree node reconstruction. Remember to change JS code in tree.html
+# whenever order is altered.
+data = [
+ti.state,
+ti.try_number,
+None,  # start_ts
+None,  # duration
+]
+
+if ti.start_date:
+# round to seconds to reduce payload size
+data[2] = int(ti.start_date.timestamp())
+if ti.duration is not None:
+data[3] = int(ti.duration)
+
+return data
+
 def recurse_nodes(task, visited):
+nonlocal node_count
+node_count += 1
 visited.add(task)
-node_count[0] += 1
-
-children = [
-recurse_nodes(t, visited) for t in task.downstream_list
-if node_count[0] < node_limit or t not in visited]
-
-# D3 tree uses children vs _children to define what is
-# expanded or not. The following block makes it such that
-# repeated nodes are collapsed by default.
-children_key = 'children'
-if task.task_id not in expanded:
-expanded.append(task.task_id)
-elif children:
-children_key = "_children"
-
-def set_duration(tid):
-if (isinstance(tid, dict) and tid.get("state") == 
State.RUNNING and
-tid["start_date"] is not None):
-d = timezone.utcnow() - timezone.parse(tid["start_date"])
-tid["duration"] = d.total_seconds()
-return tid
-
-return {
+task_id = task.task_id
+
+node = {
 'name': task.task_id,
 'instances': [
-set_duration(task_instances.get((task.task_id, d))) or {
-'execution_date': d.isoformat(),
-'task_id': task.task_id
-}
-for d in dates],
-children_key: children,
+encode_ti(task_instances.get((task_id, d)))
+for d in dates
+],
 'num_dep': len(task.downstream_list),
 'operator': task.task_type,
 'retries': task.retries,
 'owner': task.owner,
-'start_date': task.start_date,
-'end_date': task.end_date,
-'depends_on_past': task.depends_on_past,
 'ui_color': task.ui_color,
-'extra_links': task.extra_links,
 }
 
+if task.downstream_list:
+children = [
+recurse_nodes(t, visited) for t in task.downstream_list
+if node_count < node_limit or t not in visited]
+
+# D3 tree uses children vs _children to define what is
+# expanded or not. The following block makes it such that
+# repeated nodes are collapsed by default.
+if task.task_id not in expanded:
+children_key = 'children'
+expanded.add(task.task_id)
+else:
+  

[GitHub] [airflow] ashb commented on a change in pull request #7492: [AIRFLOW-6871] optimize tree view for large DAGs

2020-02-27 Thread GitBox
ashb commented on a change in pull request #7492: [AIRFLOW-6871] optimize tree 
view for large DAGs
URL: https://github.com/apache/airflow/pull/7492#discussion_r385121446
 
 

 ##
 File path: airflow/www/views.py
 ##
 @@ -1371,90 +1374,115 @@ def tree(self):
 .all()
 )
 dag_runs = {
-dr.execution_date: alchemy_to_dict(dr) for dr in dag_runs}
+dr.execution_date: alchemy_to_dict(dr) for dr in dag_runs
+}
 
 dates = sorted(list(dag_runs.keys()))
 max_date = max(dates) if dates else None
 min_date = min(dates) if dates else None
 
 tis = dag.get_task_instances(start_date=min_date, end_date=base_date)
-task_instances = {}
+task_instances: Dict[Tuple[str, datetime], models.TaskInstance] = {}
 for ti in tis:
-tid = alchemy_to_dict(ti)
-dr = dag_runs.get(ti.execution_date)
-tid['external_trigger'] = dr['external_trigger'] if dr else False
-task_instances[(ti.task_id, ti.execution_date)] = tid
+task_instances[(ti.task_id, ti.execution_date)] = ti
 
-expanded = []
+expanded = set()
 # The default recursion traces every path so that tree view has full
 # expand/collapse functionality. After 5,000 nodes we stop and fall
 # back on a quick DFS search for performance. See PR #320.
-node_count = [0]
+node_count = 0
 node_limit = 5000 / max(1, len(dag.leaves))
 
+def encode_ti(ti: Optional[models.TaskInstance]) -> Optional[List]:
+if not ti:
+return None
+
+# NOTE: order of entry is important here because client JS relies 
on it for
+# tree node reconstruction. Remember to change JS code in tree.html
+# whenever order is altered.
+data = [
+ti.state,
+ti.try_number,
+None,  # start_ts
+None,  # duration
+]
+
+if ti.start_date:
+# round to seconds to reduce payload size
+data[2] = int(ti.start_date.timestamp())
+if ti.duration is not None:
+data[3] = int(ti.duration)
+
+return data
+
 def recurse_nodes(task, visited):
+nonlocal node_count
+node_count += 1
 visited.add(task)
-node_count[0] += 1
-
-children = [
-recurse_nodes(t, visited) for t in task.downstream_list
-if node_count[0] < node_limit or t not in visited]
-
-# D3 tree uses children vs _children to define what is
-# expanded or not. The following block makes it such that
-# repeated nodes are collapsed by default.
-children_key = 'children'
-if task.task_id not in expanded:
-expanded.append(task.task_id)
-elif children:
-children_key = "_children"
-
-def set_duration(tid):
-if (isinstance(tid, dict) and tid.get("state") == 
State.RUNNING and
-tid["start_date"] is not None):
-d = timezone.utcnow() - timezone.parse(tid["start_date"])
-tid["duration"] = d.total_seconds()
-return tid
-
-return {
+task_id = task.task_id
+
+node = {
 'name': task.task_id,
 'instances': [
-set_duration(task_instances.get((task.task_id, d))) or {
-'execution_date': d.isoformat(),
-'task_id': task.task_id
-}
-for d in dates],
-children_key: children,
+encode_ti(task_instances.get((task_id, d)))
+for d in dates
+],
 'num_dep': len(task.downstream_list),
 'operator': task.task_type,
 'retries': task.retries,
 'owner': task.owner,
-'start_date': task.start_date,
-'end_date': task.end_date,
-'depends_on_past': task.depends_on_past,
 'ui_color': task.ui_color,
-'extra_links': task.extra_links,
 }
 
+if task.downstream_list:
+children = [
+recurse_nodes(t, visited) for t in task.downstream_list
+if node_count < node_limit or t not in visited]
+
+# D3 tree uses children vs _children to define what is
+# expanded or not. The following block makes it such that
+# repeated nodes are collapsed by default.
+if task.task_id not in expanded:
+children_key = 'children'
+expanded.add(task.task_id)
+else:
+  

[GitHub] [airflow] ashb commented on a change in pull request #7492: [AIRFLOW-6871] optimize tree view for large DAGs

2020-02-27 Thread GitBox
ashb commented on a change in pull request #7492: [AIRFLOW-6871] optimize tree 
view for large DAGs
URL: https://github.com/apache/airflow/pull/7492#discussion_r385120657
 
 

 ##
 File path: airflow/www/views.py
 ##
 @@ -1374,90 +1376,115 @@ def tree(self):
 .all()
 )
 dag_runs = {
-dr.execution_date: alchemy_to_dict(dr) for dr in dag_runs}
+dr.execution_date: alchemy_to_dict(dr) for dr in dag_runs
+}
 
 dates = sorted(list(dag_runs.keys()))
 max_date = max(dates) if dates else None
 min_date = min(dates) if dates else None
 
 tis = dag.get_task_instances(start_date=min_date, end_date=base_date)
-task_instances = {}
+task_instances: Dict[Tuple[str, datetime], models.TaskInstance] = {}
 for ti in tis:
-tid = alchemy_to_dict(ti)
-dr = dag_runs.get(ti.execution_date)
-tid['external_trigger'] = dr['external_trigger'] if dr else False
-task_instances[(ti.task_id, ti.execution_date)] = tid
+task_instances[(ti.task_id, ti.execution_date)] = ti
 
-expanded = []
+expanded = set()
 # The default recursion traces every path so that tree view has full
 # expand/collapse functionality. After 5,000 nodes we stop and fall
 # back on a quick DFS search for performance. See PR #320.
-node_count = [0]
+node_count = 0
 node_limit = 5000 / max(1, len(dag.leaves))
 
+def encode_ti(ti: Optional[models.TaskInstance]) -> Optional[List]:
+if not ti:
+return None
+
+# NOTE: order of entry is important here because client JS relies 
on it for
+# tree node reconstruction. Remember to change JS code in tree.html
+# whenever order is altered.
+data = [
+ti.state,
+ti.try_number,
+None,  # start_ts
+None,  # duration
+]
+
+if ti.start_date:
+# round to seconds to reduce payload size
+data[2] = int(ti.start_date.timestamp())
+if ti.duration is not None:
+data[3] = int(ti.duration)
+
+return data
+
 def recurse_nodes(task, visited):
+nonlocal node_count
+node_count += 1
 visited.add(task)
-node_count[0] += 1
-
-children = [
-recurse_nodes(t, visited) for t in task.downstream_list
-if node_count[0] < node_limit or t not in visited]
-
-# D3 tree uses children vs _children to define what is
-# expanded or not. The following block makes it such that
-# repeated nodes are collapsed by default.
-children_key = 'children'
-if task.task_id not in expanded:
-expanded.append(task.task_id)
-elif children:
-children_key = "_children"
-
-def set_duration(tid):
-if (isinstance(tid, dict) and tid.get("state") == 
State.RUNNING and
-tid["start_date"] is not None):
-d = timezone.utcnow() - timezone.parse(tid["start_date"])
-tid["duration"] = d.total_seconds()
-return tid
-
-return {
+task_id = task.task_id
+
+node = {
 'name': task.task_id,
 'instances': [
-set_duration(task_instances.get((task.task_id, d))) or {
-'execution_date': d.isoformat(),
-'task_id': task.task_id
-}
-for d in dates],
-children_key: children,
+encode_ti(task_instances.get((task_id, d)))
+for d in dates
+],
 'num_dep': len(task.downstream_list),
 'operator': task.task_type,
 'retries': task.retries,
 'owner': task.owner,
-'start_date': task.start_date,
-'end_date': task.end_date,
-'depends_on_past': task.depends_on_past,
 'ui_color': task.ui_color,
-'extra_links': task.extra_links,
 }
 
+if task.downstream_list:
+children = [
+recurse_nodes(t, visited) for t in task.downstream_list
+if node_count < node_limit or t not in visited]
+
+# D3 tree uses children vs _children to define what is
+# expanded or not. The following block makes it such that
+# repeated nodes are collapsed by default.
+if task.task_id not in expanded:
+children_key = 'children'
+expanded.add(task.task_id)
+else:
+  

[GitHub] [airflow] ashb commented on a change in pull request #7492: [AIRFLOW-6871] optimize tree view for large DAGs

2020-02-25 Thread GitBox
ashb commented on a change in pull request #7492: [AIRFLOW-6871] optimize tree 
view for large DAGs
URL: https://github.com/apache/airflow/pull/7492#discussion_r384191672
 
 

 ##
 File path: airflow/www/views.py
 ##
 @@ -1374,90 +1376,115 @@ def tree(self):
 .all()
 )
 dag_runs = {
-dr.execution_date: alchemy_to_dict(dr) for dr in dag_runs}
+dr.execution_date: alchemy_to_dict(dr) for dr in dag_runs
+}
 
 dates = sorted(list(dag_runs.keys()))
 max_date = max(dates) if dates else None
 min_date = min(dates) if dates else None
 
 tis = dag.get_task_instances(start_date=min_date, end_date=base_date)
-task_instances = {}
+task_instances: Dict[Tuple[str, datetime], models.TaskInstance] = {}
 for ti in tis:
-tid = alchemy_to_dict(ti)
-dr = dag_runs.get(ti.execution_date)
-tid['external_trigger'] = dr['external_trigger'] if dr else False
-task_instances[(ti.task_id, ti.execution_date)] = tid
+task_instances[(ti.task_id, ti.execution_date)] = ti
 
-expanded = []
+expanded = set()
 # The default recursion traces every path so that tree view has full
 # expand/collapse functionality. After 5,000 nodes we stop and fall
 # back on a quick DFS search for performance. See PR #320.
-node_count = [0]
+node_count = 0
 node_limit = 5000 / max(1, len(dag.leaves))
 
+def encode_ti(ti: Optional[models.TaskInstance]) -> Optional[List]:
+if not ti:
+return None
+
+# NOTE: order of entry is important here because client JS relies 
on it for
+# tree node reconstruction. Remember to change JS code in tree.html
+# whenever order is altered.
+data = [
+ti.state,
+ti.try_number,
+None,  # start_ts
+None,  # duration
+]
+
+if ti.start_date:
+# round to seconds to reduce payload size
+data[2] = int(ti.start_date.timestamp())
+if ti.duration is not None:
+data[3] = int(ti.duration)
+
+return data
+
 def recurse_nodes(task, visited):
+nonlocal node_count
+node_count += 1
 visited.add(task)
-node_count[0] += 1
-
-children = [
-recurse_nodes(t, visited) for t in task.downstream_list
-if node_count[0] < node_limit or t not in visited]
-
-# D3 tree uses children vs _children to define what is
-# expanded or not. The following block makes it such that
-# repeated nodes are collapsed by default.
-children_key = 'children'
-if task.task_id not in expanded:
-expanded.append(task.task_id)
-elif children:
-children_key = "_children"
-
-def set_duration(tid):
-if (isinstance(tid, dict) and tid.get("state") == 
State.RUNNING and
-tid["start_date"] is not None):
-d = timezone.utcnow() - timezone.parse(tid["start_date"])
-tid["duration"] = d.total_seconds()
-return tid
-
-return {
+task_id = task.task_id
+
+node = {
 'name': task.task_id,
 'instances': [
-set_duration(task_instances.get((task.task_id, d))) or {
-'execution_date': d.isoformat(),
-'task_id': task.task_id
-}
-for d in dates],
-children_key: children,
+encode_ti(task_instances.get((task_id, d)))
+for d in dates
+],
 'num_dep': len(task.downstream_list),
 'operator': task.task_type,
 'retries': task.retries,
 'owner': task.owner,
-'start_date': task.start_date,
-'end_date': task.end_date,
-'depends_on_past': task.depends_on_past,
 'ui_color': task.ui_color,
-'extra_links': task.extra_links,
 }
 
+if task.downstream_list:
+children = [
+recurse_nodes(t, visited) for t in task.downstream_list
+if node_count < node_limit or t not in visited]
+
+# D3 tree uses children vs _children to define what is
+# expanded or not. The following block makes it such that
+# repeated nodes are collapsed by default.
+if task.task_id not in expanded:
+children_key = 'children'
+expanded.add(task.task_id)
+else:
+  

[GitHub] [airflow] ashb commented on a change in pull request #7492: [AIRFLOW-6871] optimize tree view for large DAGs

2020-02-25 Thread GitBox
ashb commented on a change in pull request #7492: [AIRFLOW-6871] optimize tree 
view for large DAGs
URL: https://github.com/apache/airflow/pull/7492#discussion_r384171265
 
 

 ##
 File path: airflow/www/templates/airflow/tree.html
 ##
 @@ -85,8 +85,59 @@
 

[GitHub] [airflow] ashb commented on a change in pull request #7492: [AIRFLOW-6871] optimize tree view for large DAGs

2020-02-25 Thread GitBox
ashb commented on a change in pull request #7492: [AIRFLOW-6871] optimize tree 
view for large DAGs
URL: https://github.com/apache/airflow/pull/7492#discussion_r384168703
 
 

 ##
 File path: airflow/www/templates/airflow/tree.html
 ##
 @@ -85,8 +85,59 @@
 
 $('span.status_square').tooltip({html: true});
 
+function ts_to_dtstr(ts) {
+  var dt = new Date(ts * 1000);
+  return dt.toISOString();
+}
+
+function is_dag_run(d) {
+  return d.run_id !== undefined;
+}
+
+var now_ts = Date.now()/1000;
+
+function populate_taskinstance_properties(node) {
+  // populate task instance properties for display purpose
+  var j;
+  for (j=0; j

[GitHub] [airflow] ashb commented on a change in pull request #7492: [AIRFLOW-6871] optimize tree view for large DAGs

2020-02-25 Thread GitBox
ashb commented on a change in pull request #7492: [AIRFLOW-6871] optimize tree 
view for large DAGs
URL: https://github.com/apache/airflow/pull/7492#discussion_r384164611
 
 

 ##
 File path: airflow/www/templates/airflow/tree.html
 ##
 @@ -85,8 +85,59 @@
 
 $('span.status_square').tooltip({html: true});
 
+function ts_to_dtstr(ts) {
+  var dt = new Date(ts * 1000);
+  return dt.toISOString();
+}
+
+function is_dag_run(d) {
+  return d.run_id !== undefined;
+}
+
+var now_ts = Date.now()/1000;
+
+function populate_taskinstance_properties(node) {
+  // populate task instance properties for display purpose
+  var j;
+  for (j=0; j

[GitHub] [airflow] ashb commented on a change in pull request #7492: [AIRFLOW-6871] optimize tree view for large DAGs

2020-02-25 Thread GitBox
ashb commented on a change in pull request #7492: [AIRFLOW-6871] optimize tree 
view for large DAGs
URL: https://github.com/apache/airflow/pull/7492#discussion_r384158218
 
 

 ##
 File path: airflow/www/views.py
 ##
 @@ -1374,90 +1376,115 @@ def tree(self):
 .all()
 )
 dag_runs = {
-dr.execution_date: alchemy_to_dict(dr) for dr in dag_runs}
+dr.execution_date: alchemy_to_dict(dr) for dr in dag_runs
+}
 
 dates = sorted(list(dag_runs.keys()))
 max_date = max(dates) if dates else None
 min_date = min(dates) if dates else None
 
 tis = dag.get_task_instances(start_date=min_date, end_date=base_date)
-task_instances = {}
+task_instances: Dict[Tuple[str, datetime], models.TaskInstance] = {}
 for ti in tis:
-tid = alchemy_to_dict(ti)
-dr = dag_runs.get(ti.execution_date)
-tid['external_trigger'] = dr['external_trigger'] if dr else False
-task_instances[(ti.task_id, ti.execution_date)] = tid
+task_instances[(ti.task_id, ti.execution_date)] = ti
 
-expanded = []
+expanded = set()
 # The default recursion traces every path so that tree view has full
 # expand/collapse functionality. After 5,000 nodes we stop and fall
 # back on a quick DFS search for performance. See PR #320.
-node_count = [0]
+node_count = 0
 node_limit = 5000 / max(1, len(dag.leaves))
 
+def encode_ti(ti: Optional[models.TaskInstance]) -> Optional[List]:
+if not ti:
+return None
+
+# NOTE: order of entry is important here because client JS relies 
on it for
+# tree node reconstruction. Remember to change JS code in tree.html
+# whenever order is altered.
+data = [
+ti.state,
+ti.try_number,
+None,  # start_ts
+None,  # duration
+]
+
+if ti.start_date:
+# round to seconds to reduce payload size
+data[2] = int(ti.start_date.timestamp())
+if ti.duration is not None:
+data[3] = int(ti.duration)
+
+return data
+
 def recurse_nodes(task, visited):
+nonlocal node_count
+node_count += 1
 visited.add(task)
-node_count[0] += 1
-
-children = [
-recurse_nodes(t, visited) for t in task.downstream_list
-if node_count[0] < node_limit or t not in visited]
-
-# D3 tree uses children vs _children to define what is
-# expanded or not. The following block makes it such that
-# repeated nodes are collapsed by default.
-children_key = 'children'
-if task.task_id not in expanded:
-expanded.append(task.task_id)
-elif children:
-children_key = "_children"
-
-def set_duration(tid):
-if (isinstance(tid, dict) and tid.get("state") == 
State.RUNNING and
-tid["start_date"] is not None):
-d = timezone.utcnow() - timezone.parse(tid["start_date"])
-tid["duration"] = d.total_seconds()
-return tid
-
-return {
+task_id = task.task_id
+
+node = {
 'name': task.task_id,
 'instances': [
-set_duration(task_instances.get((task.task_id, d))) or {
-'execution_date': d.isoformat(),
-'task_id': task.task_id
-}
-for d in dates],
-children_key: children,
+encode_ti(task_instances.get((task_id, d)))
+for d in dates
+],
 'num_dep': len(task.downstream_list),
 'operator': task.task_type,
 'retries': task.retries,
 'owner': task.owner,
-'start_date': task.start_date,
-'end_date': task.end_date,
-'depends_on_past': task.depends_on_past,
 'ui_color': task.ui_color,
-'extra_links': task.extra_links,
 }
 
+if task.downstream_list:
+children = [
+recurse_nodes(t, visited) for t in task.downstream_list
+if node_count < node_limit or t not in visited]
+
+# D3 tree uses children vs _children to define what is
+# expanded or not. The following block makes it such that
+# repeated nodes are collapsed by default.
+if task.task_id not in expanded:
+children_key = 'children'
+expanded.add(task.task_id)
+else:
+  

[GitHub] [airflow] ashb commented on a change in pull request #7492: [AIRFLOW-6871] optimize tree view for large DAGs

2020-02-25 Thread GitBox
ashb commented on a change in pull request #7492: [AIRFLOW-6871] optimize tree 
view for large DAGs
URL: https://github.com/apache/airflow/pull/7492#discussion_r384154242
 
 

 ##
 File path: airflow/www/templates/airflow/tree.html
 ##
 @@ -85,8 +85,59 @@
 

[GitHub] [airflow] ashb commented on a change in pull request #7492: [AIRFLOW-6871] optimize tree view for large DAGs

2020-02-25 Thread GitBox
ashb commented on a change in pull request #7492: [AIRFLOW-6871] optimize tree 
view for large DAGs
URL: https://github.com/apache/airflow/pull/7492#discussion_r384152129
 
 

 ##
 File path: airflow/www/views.py
 ##
 @@ -1374,90 +1376,113 @@ def tree(self):
 .all()
 )
 dag_runs = {
-dr.execution_date: alchemy_to_dict(dr) for dr in dag_runs}
+dr.execution_date: alchemy_to_dict(dr) for dr in dag_runs
+}
 
 dates = sorted(list(dag_runs.keys()))
 max_date = max(dates) if dates else None
 min_date = min(dates) if dates else None
 
 tis = dag.get_task_instances(start_date=min_date, end_date=base_date)
-task_instances = {}
+task_instances: Dict[Tuple[str, datetime], 
Optional[models.TaskInstance]] = {}
 for ti in tis:
-tid = alchemy_to_dict(ti)
-dr = dag_runs.get(ti.execution_date)
-tid['external_trigger'] = dr['external_trigger'] if dr else False
-task_instances[(ti.task_id, ti.execution_date)] = tid
+task_instances[(ti.task_id, ti.execution_date)] = ti
 
-expanded = []
+expanded = set()
 # The default recursion traces every path so that tree view has full
 # expand/collapse functionality. After 5,000 nodes we stop and fall
 # back on a quick DFS search for performance. See PR #320.
-node_count = [0]
+node_count = 0
 node_limit = 5000 / max(1, len(dag.leaves))
 
+def encode_ti(ti: models.TaskInstance) -> Optional[List]:
+if not ti:
+return None
+
+# NOTE: order of entry is important here because client JS relies 
on it for
+# tree node reconstruction. Remember to change JS code in tree.html
+# whenever order is altered.
+data = [
+ti.state,
+ti.try_number,
+None,  # start_ts
+None,  # duration
+]
+
+if ti.start_date:
+# round to seconds to reduce payload size
+data[2] = int(ti.start_date.timestamp())
+if ti.duration is not None:
+data[3] = int(ti.duration)
+
+return data
+
 def recurse_nodes(task, visited):
+nonlocal node_count
+node_count += 1
 visited.add(task)
-node_count[0] += 1
-
-children = [
-recurse_nodes(t, visited) for t in task.downstream_list
-if node_count[0] < node_limit or t not in visited]
-
-# D3 tree uses children vs _children to define what is
-# expanded or not. The following block makes it such that
-# repeated nodes are collapsed by default.
-children_key = 'children'
-if task.task_id not in expanded:
-expanded.append(task.task_id)
-elif children:
-children_key = "_children"
-
-def set_duration(tid):
-if (isinstance(tid, dict) and tid.get("state") == 
State.RUNNING and
-tid["start_date"] is not None):
-d = timezone.utcnow() - timezone.parse(tid["start_date"])
-tid["duration"] = d.total_seconds()
-return tid
-
-return {
+task_id = task.task_id
+
+node = {
 'name': task.task_id,
 'instances': [
-set_duration(task_instances.get((task.task_id, d))) or {
-'execution_date': d.isoformat(),
-'task_id': task.task_id
-}
-for d in dates],
-children_key: children,
+encode_ti(task_instances.get((task_id, d)))
+for d in dates
+],
 'num_dep': len(task.downstream_list),
 'operator': task.task_type,
 'retries': task.retries,
 'owner': task.owner,
-'start_date': task.start_date,
-'end_date': task.end_date,
-'depends_on_past': task.depends_on_past,
 'ui_color': task.ui_color,
-'extra_links': task.extra_links,
 }
 
+if task.downstream_list:
+children = [
+recurse_nodes(t, visited) for t in task.downstream_list
+if node_count < node_limit or t not in visited]
+
+# D3 tree uses children vs _children to define what is
+# expanded or not. The following block makes it such that
+# repeated nodes are collapsed by default.
+if task.task_id not in expanded:
+children_key = 'children'
 
 Review comment:
   Nope, what you've done sounds good.


[GitHub] [airflow] ashb commented on a change in pull request #7492: [AIRFLOW-6871] optimize tree view for large DAGs

2020-02-24 Thread GitBox
ashb commented on a change in pull request #7492: [AIRFLOW-6871] optimize tree 
view for large DAGs
URL: https://github.com/apache/airflow/pull/7492#discussion_r383243038
 
 

 ##
 File path: airflow/www/views.py
 ##
 @@ -1374,90 +1376,113 @@ def tree(self):
 .all()
 )
 dag_runs = {
-dr.execution_date: alchemy_to_dict(dr) for dr in dag_runs}
+dr.execution_date: alchemy_to_dict(dr) for dr in dag_runs
+}
 
 dates = sorted(list(dag_runs.keys()))
 max_date = max(dates) if dates else None
 min_date = min(dates) if dates else None
 
 tis = dag.get_task_instances(start_date=min_date, end_date=base_date)
-task_instances = {}
+task_instances: Dict[Tuple[str, datetime], 
Optional[models.TaskInstance]] = {}
 for ti in tis:
-tid = alchemy_to_dict(ti)
-dr = dag_runs.get(ti.execution_date)
-tid['external_trigger'] = dr['external_trigger'] if dr else False
-task_instances[(ti.task_id, ti.execution_date)] = tid
+task_instances[(ti.task_id, ti.execution_date)] = ti
 
-expanded = []
+expanded = set()
 # The default recursion traces every path so that tree view has full
 # expand/collapse functionality. After 5,000 nodes we stop and fall
 # back on a quick DFS search for performance. See PR #320.
-node_count = [0]
+node_count = 0
 node_limit = 5000 / max(1, len(dag.leaves))
 
+def encode_ti(ti: models.TaskInstance) -> Optional[List]:
+if not ti:
+return None
+
+# NOTE: order of entry is important here because client JS relies 
on it for
+# tree node reconstruction. Remember to change JS code in tree.html
+# whenever order is altered.
+data = [
+ti.state,
+ti.try_number,
+None,  # start_ts
+None,  # duration
+]
+
+if ti.start_date:
+# round to seconds to reduce payload size
+data[2] = int(ti.start_date.timestamp())
+if ti.duration is not None:
+data[3] = int(ti.duration)
+
+return data
+
 def recurse_nodes(task, visited):
+nonlocal node_count
+node_count += 1
 visited.add(task)
-node_count[0] += 1
-
-children = [
-recurse_nodes(t, visited) for t in task.downstream_list
-if node_count[0] < node_limit or t not in visited]
-
-# D3 tree uses children vs _children to define what is
-# expanded or not. The following block makes it such that
-# repeated nodes are collapsed by default.
-children_key = 'children'
-if task.task_id not in expanded:
-expanded.append(task.task_id)
-elif children:
-children_key = "_children"
-
-def set_duration(tid):
-if (isinstance(tid, dict) and tid.get("state") == 
State.RUNNING and
-tid["start_date"] is not None):
-d = timezone.utcnow() - timezone.parse(tid["start_date"])
-tid["duration"] = d.total_seconds()
-return tid
-
-return {
+task_id = task.task_id
+
+node = {
 'name': task.task_id,
 'instances': [
-set_duration(task_instances.get((task.task_id, d))) or {
-'execution_date': d.isoformat(),
-'task_id': task.task_id
-}
-for d in dates],
-children_key: children,
+encode_ti(task_instances.get((task_id, d)))
+for d in dates
+],
 'num_dep': len(task.downstream_list),
 'operator': task.task_type,
 'retries': task.retries,
 'owner': task.owner,
-'start_date': task.start_date,
-'end_date': task.end_date,
-'depends_on_past': task.depends_on_past,
 'ui_color': task.ui_color,
-'extra_links': task.extra_links,
 }
 
+if task.downstream_list:
+children = [
+recurse_nodes(t, visited) for t in task.downstream_list
+if node_count < node_limit or t not in visited]
+
+# D3 tree uses children vs _children to define what is
+# expanded or not. The following block makes it such that
+# repeated nodes are collapsed by default.
+if task.task_id not in expanded:
+children_key = 'children'
+expanded.add(task.task_id)
+else:
+ 

[GitHub] [airflow] ashb commented on a change in pull request #7492: [AIRFLOW-6871] optimize tree view for large DAGs

2020-02-24 Thread GitBox
ashb commented on a change in pull request #7492: [AIRFLOW-6871] optimize tree 
view for large DAGs
URL: https://github.com/apache/airflow/pull/7492#discussion_r383242657
 
 

 ##
 File path: airflow/www/views.py
 ##
 @@ -1374,90 +1376,113 @@ def tree(self):
 .all()
 )
 dag_runs = {
-dr.execution_date: alchemy_to_dict(dr) for dr in dag_runs}
+dr.execution_date: alchemy_to_dict(dr) for dr in dag_runs
+}
 
 dates = sorted(list(dag_runs.keys()))
 max_date = max(dates) if dates else None
 min_date = min(dates) if dates else None
 
 tis = dag.get_task_instances(start_date=min_date, end_date=base_date)
-task_instances = {}
+task_instances: Dict[Tuple[str, datetime], 
Optional[models.TaskInstance]] = {}
 for ti in tis:
-tid = alchemy_to_dict(ti)
-dr = dag_runs.get(ti.execution_date)
-tid['external_trigger'] = dr['external_trigger'] if dr else False
-task_instances[(ti.task_id, ti.execution_date)] = tid
+task_instances[(ti.task_id, ti.execution_date)] = ti
 
-expanded = []
+expanded = set()
 # The default recursion traces every path so that tree view has full
 # expand/collapse functionality. After 5,000 nodes we stop and fall
 # back on a quick DFS search for performance. See PR #320.
-node_count = [0]
+node_count = 0
 node_limit = 5000 / max(1, len(dag.leaves))
 
+def encode_ti(ti: models.TaskInstance) -> Optional[List]:
+if not ti:
+return None
+
+# NOTE: order of entry is important here because client JS relies 
on it for
+# tree node reconstruction. Remember to change JS code in tree.html
+# whenever order is altered.
+data = [
+ti.state,
+ti.try_number,
+None,  # start_ts
+None,  # duration
+]
+
+if ti.start_date:
+# round to seconds to reduce payload size
+data[2] = int(ti.start_date.timestamp())
+if ti.duration is not None:
+data[3] = int(ti.duration)
+
+return data
+
 def recurse_nodes(task, visited):
+nonlocal node_count
+node_count += 1
 visited.add(task)
-node_count[0] += 1
-
-children = [
-recurse_nodes(t, visited) for t in task.downstream_list
-if node_count[0] < node_limit or t not in visited]
-
-# D3 tree uses children vs _children to define what is
-# expanded or not. The following block makes it such that
-# repeated nodes are collapsed by default.
-children_key = 'children'
-if task.task_id not in expanded:
-expanded.append(task.task_id)
-elif children:
-children_key = "_children"
-
-def set_duration(tid):
-if (isinstance(tid, dict) and tid.get("state") == 
State.RUNNING and
-tid["start_date"] is not None):
-d = timezone.utcnow() - timezone.parse(tid["start_date"])
-tid["duration"] = d.total_seconds()
-return tid
-
-return {
+task_id = task.task_id
+
+node = {
 'name': task.task_id,
 'instances': [
-set_duration(task_instances.get((task.task_id, d))) or {
-'execution_date': d.isoformat(),
-'task_id': task.task_id
-}
-for d in dates],
-children_key: children,
+encode_ti(task_instances.get((task_id, d)))
+for d in dates
+],
 'num_dep': len(task.downstream_list),
 'operator': task.task_type,
 'retries': task.retries,
 'owner': task.owner,
-'start_date': task.start_date,
-'end_date': task.end_date,
-'depends_on_past': task.depends_on_past,
 'ui_color': task.ui_color,
-'extra_links': task.extra_links,
 }
 
+if task.downstream_list:
+children = [
+recurse_nodes(t, visited) for t in task.downstream_list
+if node_count < node_limit or t not in visited]
+
+# D3 tree uses children vs _children to define what is
+# expanded or not. The following block makes it such that
+# repeated nodes are collapsed by default.
+if task.task_id not in expanded:
+children_key = 'children'
 
 Review comment:
   Does this change the default/current view behaviour?


[GitHub] [airflow] ashb commented on a change in pull request #7492: [AIRFLOW-6871] optimize tree view for large DAGs

2020-02-24 Thread GitBox
ashb commented on a change in pull request #7492: [AIRFLOW-6871] optimize tree 
view for large DAGs
URL: https://github.com/apache/airflow/pull/7492#discussion_r383242005
 
 

 ##
 File path: airflow/www/views.py
 ##
 @@ -1374,90 +1376,113 @@ def tree(self):
 .all()
 )
 dag_runs = {
-dr.execution_date: alchemy_to_dict(dr) for dr in dag_runs}
+dr.execution_date: alchemy_to_dict(dr) for dr in dag_runs
+}
 
 dates = sorted(list(dag_runs.keys()))
 max_date = max(dates) if dates else None
 min_date = min(dates) if dates else None
 
 tis = dag.get_task_instances(start_date=min_date, end_date=base_date)
-task_instances = {}
+task_instances: Dict[Tuple[str, datetime], 
Optional[models.TaskInstance]] = {}
 
 Review comment:
   ```suggestion
   task_instances: Dict[Tuple[str, datetime], models.TaskInstance] = {}
   ```
   
   I think? i.e. we don't ever store None in the dict do we?


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] [airflow] ashb commented on a change in pull request #7492: [AIRFLOW-6871] optimize tree view for large DAGs

2020-02-24 Thread GitBox
ashb commented on a change in pull request #7492: [AIRFLOW-6871] optimize tree 
view for large DAGs
URL: https://github.com/apache/airflow/pull/7492#discussion_r383240795
 
 

 ##
 File path: airflow/www/templates/airflow/tree.html
 ##
 @@ -85,8 +85,59 @@
 

[GitHub] [airflow] ashb commented on a change in pull request #7492: [AIRFLOW-6871] optimize tree view for large DAGs

2020-02-22 Thread GitBox
ashb commented on a change in pull request #7492: [AIRFLOW-6871] optimize tree 
view for large DAGs
URL: https://github.com/apache/airflow/pull/7492#discussion_r382899365
 
 

 ##
 File path: airflow/www/templates/airflow/tree.html
 ##
 @@ -85,8 +85,59 @@
 

[GitHub] [airflow] ashb commented on a change in pull request #7492: [AIRFLOW-6871] optimize tree view for large DAGs

2020-02-22 Thread GitBox
ashb commented on a change in pull request #7492: [AIRFLOW-6871] optimize tree 
view for large DAGs
URL: https://github.com/apache/airflow/pull/7492#discussion_r382899504
 
 

 ##
 File path: airflow/www/views.py
 ##
 @@ -131,10 +132,31 @@ def get_date_time_num_runs_dag_runs_form_data(request, 
session, dag):
 }
 
 
+def encode_ti(ti: models.TaskInstance) -> Optional[List]:
 
 Review comment:
   Is this only used inside one view? If so it should probably be a nested 
function


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] [airflow] ashb commented on a change in pull request #7492: [AIRFLOW-6871] optimize tree view for large DAGs

2020-02-22 Thread GitBox
ashb commented on a change in pull request #7492: [AIRFLOW-6871] optimize tree 
view for large DAGs
URL: https://github.com/apache/airflow/pull/7492#discussion_r382899394
 
 

 ##
 File path: airflow/www/templates/airflow/tree.html
 ##
 @@ -85,8 +85,59 @@