http://git-wip-us.apache.org/repos/asf/zeppelin/blob/085efeb6/notebook/Zeppelin
Tutorial/Using Flink for batch processing_2C35YU814.zpln
----------------------------------------------------------------------
diff --git a/notebook/Zeppelin Tutorial/Using Flink for batch
processing_2C35YU814.zpln b/notebook/Zeppelin Tutorial/Using Flink for batch
processing_2C35YU814.zpln
new file mode 100644
index 0000000..357271a
--- /dev/null
+++ b/notebook/Zeppelin Tutorial/Using Flink for batch
processing_2C35YU814.zpln
@@ -0,0 +1,806 @@
+{
+ "paragraphs": [
+ {
+ "text": "%md\n### Intro\nThis notebook is an example of how to use
**Apache Flink** for processing simple data sets. We will take an open airline
data set from [stat-computing.org](http://stat-computing.org) and find out who
was the most popular carrier during 1998-2000 years. Next we will build a chart
that shows flights distribution by months and look how it changes from year to
year. We will use Zeppelin `%table` display system to build charts.",
+ "user": "anonymous",
+ "dateUpdated": "Jan 9, 2017 11:55:42 AM",
+ "config": {
+ "colWidth": 12.0,
+ "enabled": true,
+ "results": {},
+ "editorSetting": {
+ "language": "markdown",
+ "editOnDblClick": true
+ },
+ "editorMode": "ace/mode/markdown",
+ "editorHide": true,
+ "tableHide": false
+ },
+ "settings": {
+ "params": {},
+ "forms": {}
+ },
+ "apps": [],
+ "jobName": "paragraph_1483952101049_-1120777567",
+ "id": "20170109-115501_192763014",
+ "results": {
+ "code": "SUCCESS",
+ "msg": [
+ {
+ "type": "HTML",
+ "data": "\u003cdiv
class\u003d\"markdown-body\"\u003e\n\u003ch3\u003eIntro\u003c/h3\u003e\n\u003cp\u003eThis
notebook is an example of how to use \u003cstrong\u003eApache
Flink\u003c/strong\u003e for processing simple data sets. We will take an open
airline data set from \u003ca
href\u003d\"http://stat-computing.org\"\u003estat-computing.org\u003c/a\u003e
and find out who was the most popular carrier during 1998-2000 years. Next we
will build a chart that shows flights distribution by months and look how it
changes from year to year. We will use Zeppelin
\u003ccode\u003e%table\u003c/code\u003e display system to build
charts.\u003c/p\u003e\n\u003c/div\u003e"
+ }
+ ]
+ },
+ "dateCreated": "Jan 9, 2017 11:55:01 AM",
+ "dateStarted": "Jan 9, 2017 11:55:42 AM",
+ "dateFinished": "Jan 9, 2017 11:55:44 AM",
+ "status": "FINISHED",
+ "progressUpdateIntervalMs": 500
+ },
+ {
+ "text": "%md\n### Getting the data\nFirst we need to download and unpack
the data. We will get three big data sets with flight details (one pack for
each year) and a small one with carriers names. In total we will get for about
1,5 GB of data. To be able to process such amount of data it is recommended to
increase `shell.command.timeout.millisecs` value in `%sh` interpreter settings
up to several minutes. You can find interpreters configuration by clicking on
`Interpreter` in a drop-down menu from the top right corner of the Zeppelin
web-ui.",
+ "user": "anonymous",
+ "dateUpdated": "Jan 9, 2017 11:56:08 AM",
+ "config": {
+ "colWidth": 12.0,
+ "enabled": true,
+ "results": {},
+ "editorSetting": {
+ "language": "scala",
+ "editOnDblClick": false
+ },
+ "editorMode": "ace/mode/scala",
+ "editorHide": true
+ },
+ "settings": {
+ "params": {},
+ "forms": {}
+ },
+ "apps": [],
+ "jobName": "paragraph_1483952142017_284386712",
+ "id": "20170109-115542_1487437739",
+ "results": {
+ "code": "SUCCESS",
+ "msg": [
+ {
+ "type": "HTML",
+ "data": "\u003cdiv
class\u003d\"markdown-body\"\u003e\n\u003ch3\u003eGetting the
data\u003c/h3\u003e\n\u003cp\u003eFirst we need to download and unpack the
data. We will get three big data sets with flight details (one pack for each
year) and a small one with carriers names. In total we will get for about 1,5
GB of data. To be able to process such amount of data it is recommended to
increase \u003ccode\u003eshell.command.timeout.millisecs\u003c/code\u003e value
in \u003ccode\u003e%sh\u003c/code\u003e interpreter settings up to several
minutes. You can find interpreters configuration by clicking on
\u003ccode\u003eInterpreter\u003c/code\u003e in a drop-down menu from the top
right corner of the Zeppelin web-ui.\u003c/p\u003e\n\u003c/div\u003e"
+ }
+ ]
+ },
+ "dateCreated": "Jan 9, 2017 11:55:42 AM",
+ "dateStarted": "Jan 9, 2017 11:56:07 AM",
+ "dateFinished": "Jan 9, 2017 11:56:07 AM",
+ "status": "FINISHED",
+ "progressUpdateIntervalMs": 500
+ },
+ {
+ "text": "%sh\n\nrm /tmp/flights98.csv.bz2\ncurl -o
/tmp/flights98.csv.bz2
\"http://stat-computing.org/dataexpo/2009/1998.csv.bz2\"\nrm
/tmp/flights98.csv\nbzip2 -d /tmp/flights98.csv.bz2\nchmod 666
/tmp/flights98.csv",
+ "user": "anonymous",
+ "dateUpdated": "Jan 9, 2017 11:59:02 AM",
+ "config": {
+ "colWidth": 12.0,
+ "enabled": true,
+ "results": {},
+ "editorSetting": {
+ "language": "sh",
+ "editOnDblClick": false
+ },
+ "editorMode": "ace/mode/sh",
+ "tableHide": true
+ },
+ "settings": {
+ "params": {},
+ "forms": {}
+ },
+ "apps": [],
+ "jobName": "paragraph_1483952167547_-566831096",
+ "id": "20170109-115607_1634441713",
+ "results": {
+ "code": "SUCCESS",
+ "msg": [
+ {
+ "type": "TEXT",
+ "data": "rm: cannot remove \u0027/tmp/flights98.csv.bz2\u0027: No
such file or directory\n % Total % Received % Xferd Average Speed Time
Time Time Current\n Dload Upload Total
Spent Left Speed\n\r 0 0 0 0 0 0 0 0
--:--:-- --:--:-- --:--:-- 0\r 0 0 0 0 0 0 0 0
--:--:-- --:--:-- --:--:-- 0\r 0 73.1M 0 64295 0 0 51646 0
0:24:44 0:00:01 0:24:43 51642\r 0 73.1M 0 358k 0 0 160k 0
0:07:47 0:00:02 0:07:45 160k\r 1 73.1M 1 1209k 0 0 373k 0
0:03:20 0:00:03 0:03:17 373k\r 4 73.1M 4 3204k 0 0 773k 0
0:01:36 0:00:04 0:01:32 773k\r 7 73.1M 7 5508k 0 0 1071k 0
0:01:09 0:00:05 0:01:04 1145k\r 10 73.1M 10 7875k 0 0 1280k 0
0:00:58 0:00:06 0:00:52 1592k\r 13 73.1M 13 10.1M 0 0 1458k 0
0:00:51 0:00:07 0:00:44 2049k\r 17 73.1M 1
7 12.7M 0 0 1608k 0 0:00:46 0:00:08 0:00:38 2422k\r 20 73.1M
20 14.9M 0 0 1671k 0 0:00:44 0:00:09 0:00:35 2413k\r 23 73.1M
23 17.1M 0 0 1728k 0 0:00:43 0:00:10 0:00:33 2403k\r 26 73.1M
26 19.4M 0 0 1787k 0 0:00:41 0:00:11 0:00:30 2411k\r 29 73.1M
29 21.7M 0 0 1837k 0 0:00:40 0:00:12 0:00:28 2379k\r 32 73.1M
32 24.1M 0 0 1879k 0 0:00:39 0:00:13 0:00:26 2322k\r 36 73.1M
36 26.4M 0 0 1916k 0 0:00:39 0:00:14 0:00:25 2365k\r 39 73.1M
39 28.5M 0 0 1930k 0 0:00:38 0:00:15 0:00:23 2341k\r 41 73.1M
41 30.6M 0 0 1943k 0 0:00:38 0:00:16 0:00:22 2292k\r 44 73.1M
44 32.6M 0 0 1947k 0 0:00:38 0:00:17 0:00:21 2215k\r 47 73.1M
47 34.6M 0 0 1952k 0 0:00:38 0:00:18 0:00:20 2145k\r 50 73.1M
50 36.6M 0 0 1960k 0 0:00:38 0:00:19 0:00:19 2082k\r 52 73.1M
52 38.3M 0 0 1947k 0 0:0
0:38 0:00:20 0:00:18 1998k\r 55 73.1M 55 40.4M 0 0 1956k 0
0:00:38 0:00:21 0:00:17 1996k\r 57 73.1M 57 42.2M 0 0 1951k 0
0:00:38 0:00:22 0:00:16 1965k\r 60 73.1M 60 44.0M 0 0 1948k 0
0:00:38 0:00:23 0:00:15 1932k\r 62 73.1M 62 45.4M 0 0 1927k 0
0:00:38 0:00:24 0:00:14 1803k\r 63 73.1M 63 46.5M 0 0 1896k 0
0:00:39 0:00:25 0:00:14 1688k\r 65 73.1M 65 47.7M 0 0 1868k 0
0:00:40 0:00:26 0:00:14 1496k\r 66 73.1M 66 48.8M 0 0 1843k 0
0:00:40 0:00:27 0:00:13 1363k\r 68 73.1M 68 50.0M 0 0 1820k 0
0:00:41 0:00:28 0:00:13 1227k\r 69 73.1M 69 51.1M 0 0 1786k 0
0:00:41 0:00:29 0:00:12 1126k\r 71 73.1M 71 52.0M 0 0 1769k 0
0:00:42 0:00:30 0:00:12 1131k\r 72 73.1M 72 53.0M 0 0 1744k 0
0:00:42 0:00:31 0:00:11 1098k\r 73 73.1M 73 54.0M 0 0 1723k 0
0:00:43 0:00:32 0:00:11 1070k\r 75 73.
1M 75 55.1M 0 0 1702k 0 0:00:43 0:00:33 0:00:10 1040k\r 76
73.1M 76 56.0M 0 0 1681k 0 0:00:44 0:00:34 0:00:10 1048k\r 77
73.1M 77 56.9M 0 0 1659k 0 0:00:45 0:00:35 0:00:10 993k\r 79
73.1M 79 57.8M 0 0 1638k 0 0:00:45 0:00:36 0:00:09 972k\r 80
73.1M 80 58.7M 0 0 1618k 0 0:00:46 0:00:37 0:00:09 946k\r 81
73.1M 81 59.6M 0 0 1600k 0 0:00:46 0:00:38 0:00:08 921k\r 82
73.1M 82 60.5M 0 0 1582k 0 0:00:47 0:00:39 0:00:08 906k\r 83
73.1M 83 61.4M 0 0 1566k 0 0:00:47 0:00:40 0:00:07 917k\r 85
73.1M 85 62.1M 0 0 1546k 0 0:00:48 0:00:41 0:00:07 887k\r 86
73.1M 86 63.0M 0 0 1532k 0 0:00:48 0:00:42 0:00:06 892k\r 87
73.1M 87 63.9M 0 0 1517k 0 0:00:49 0:00:43 0:00:06 882k\r 88
73.1M 88 64.8M 0 0 1503k 0 0:00:49 0:00:44 0:00:05 878k\r 89
73.1M 89 65.6M 0 0 1489k
0 0:00:50 0:00:45 0:00:05 872k\r 91 73.1M 91 66.5M 0 0 1477k
0 0:00:50 0:00:46 0:00:04 904k\r 92 73.1M 92 67.4M 0 0 1465k
0 0:00:51 0:00:47 0:00:04 897k\r 93 73.1M 93 68.2M 0 0 1451k
0 0:00:51 0:00:48 0:00:03 889k\r 94 73.1M 94 69.2M 0 0 1441k
0 0:00:51 0:00:49 0:00:02 897k\r 95 73.1M 95 70.1M 0 0 1430k
0 0:00:52 0:00:50 0:00:02 904k\r 97 73.1M 97 71.0M 0 0 1421k
0 0:00:52 0:00:51 0:00:01 910k\r 98 73.1M 98 71.9M 0 0 1413k
0 0:00:52 0:00:52 --:--:-- 923k\r 99 73.1M 99 72.8M 0 0 1403k
0 0:00:53 0:00:53 --:--:-- 941k\r100 73.1M 100 73.1M 0 0 1401k
0 0:00:53 0:00:53 --:--:-- 941k\nrm: cannot remove
\u0027/tmp/flights98.csv\u0027: No such file or directory\n"
+ }
+ ]
+ },
+ "dateCreated": "Jan 9, 2017 11:56:07 AM",
+ "dateStarted": "Jan 9, 2017 11:57:37 AM",
+ "dateFinished": "Jan 9, 2017 11:58:50 AM",
+ "status": "FINISHED",
+ "progressUpdateIntervalMs": 500
+ },
+ {
+ "text": "%sh\n\nrm /tmp/flights99.csv.bz2\ncurl -o
/tmp/flights99.csv.bz2
\"http://stat-computing.org/dataexpo/2009/1999.csv.bz2\"\nrm
/tmp/flights99.csv\nbzip2 -d /tmp/flights99.csv.bz2\nchmod 666
/tmp/flights99.csv",
+ "user": "anonymous",
+ "dateUpdated": "Jan 9, 2017 11:59:59 AM",
+ "config": {
+ "colWidth": 12.0,
+ "enabled": true,
+ "results": {},
+ "editorSetting": {
+ "language": "sh",
+ "editOnDblClick": false
+ },
+ "editorMode": "ace/mode/sh",
+ "tableHide": true
+ },
+ "settings": {
+ "params": {},
+ "forms": {}
+ },
+ "apps": [],
+ "jobName": "paragraph_1483952257873_-1874269156",
+ "id": "20170109-115737_1346880844",
+ "results": {
+ "code": "SUCCESS",
+ "msg": [
+ {
+ "type": "TEXT",
+ "data": "rm: cannot remove \u0027/tmp/flights99.csv.bz2\u0027: No
such file or directory\n % Total % Received % Xferd Average Speed Time
Time Time Current\n Dload Upload Total
Spent Left Speed\n\r 0 0 0 0 0 0 0 0
--:--:-- --:--:-- --:--:-- 0\r 0 75.7M 0 5520 0 0 9851 0
2:14:25 --:--:-- 2:14:25 9839\r 0 75.7M 0 88819 0 0 64302 0
0:20:35 0:00:01 0:20:34 64268\r 0 75.7M 0 181k 0 0 25316 0
0:52:18 0:00:07 0:52:11 25316\r 0 75.7M 0 548k 0 0 67331 0
0:19:39 0:00:08 0:19:31 67327\r 1 75.7M 1 817k 0 0 89344 0
0:14:49 0:00:09 0:14:40 89337\r 1 75.7M 1 1042k 0 0 100k 0
0:12:54 0:00:10 0:12:44 105k\r 3 75.7M 3 2461k 0 0 218k 0
0:05:55 0:00:11 0:05:44 239k\r 6 75.7M 6 5069k 0 0 412k 0
0:03:08 0:00:12 0:02:56 985k\r 11 75.7M 1
1 9165k 0 0 690k 0 0:01:52 0:00:13 0:01:39 1744k\r 14 75.7M
14 11.2M 0 0 796k 0 0:01:37 0:00:14 0:01:23 2109k\r 19 75.7M
19 14.8M 0 0 995k 0 0:01:17 0:00:15 0:01:02 2910k\r 24 75.7M
24 18.6M 0 0 1174k 0 0:01:06 0:00:16 0:00:50 3331k\r 29 75.7M
29 22.5M 0 0 1338k 0 0:00:57 0:00:17 0:00:40 3613k\r 35 75.7M
35 26.5M 0 0 1486k 0 0:00:52 0:00:18 0:00:34 3603k\r 40 75.7M
40 30.3M 0 0 1610k 0 0:00:48 0:00:19 0:00:29 4025k\r 45 75.7M
45 34.2M 0 0 1731k 0 0:00:44 0:00:20 0:00:24 3980k\r 50 75.7M
50 38.2M 0 0 1840k 0 0:00:42 0:00:21 0:00:21 4011k\r 55 75.7M
55 42.2M 0 0 1940k 0 0:00:39 0:00:22 0:00:17 4020k\r 60 75.7M
60 46.2M 0 0 2032k 0 0:00:38 0:00:23 0:00:15 4026k\r 65 75.7M
65 49.9M 0 0 2106k 0 0:00:36 0:00:24 0:00:12 4017k\r 70 75.7M
70 53.5M 0 0 2169k 0 0:0
0:35 0:00:25 0:00:10 3945k\r 75 75.7M 75 57.2M 0 0 2229k 0
0:00:34 0:00:26 0:00:08 3884k\r 80 75.7M 80 61.1M 0 0 2293k 0
0:00:33 0:00:27 0:00:06 3868k\r 86 75.7M 86 65.5M 0 0 2372k 0
0:00:32 0:00:28 0:00:04 3956k\r 92 75.7M 92 70.4M 0 0 2464k 0
0:00:31 0:00:29 0:00:02 4200k\r100 75.7M 100 75.7M 0 0 2565k 0
0:00:30 0:00:30 --:--:-- 4585k\nrm: cannot remove
\u0027/tmp/flights99.csv\u0027: No such file or directory\n"
+ }
+ ]
+ },
+ "dateCreated": "Jan 9, 2017 11:57:37 AM",
+ "dateStarted": "Jan 9, 2017 11:59:04 AM",
+ "dateFinished": "Jan 9, 2017 11:59:53 AM",
+ "status": "FINISHED",
+ "progressUpdateIntervalMs": 500
+ },
+ {
+ "text": "%sh\n\nrm /tmp/flights00.csv.bz2\ncurl -o
/tmp/flights00.csv.bz2
\"http://stat-computing.org/dataexpo/2009/2000.csv.bz2\"\nrm
/tmp/flights00.csv\nbzip2 -d /tmp/flights00.csv.bz2\nchmod 666
/tmp/flights00.csv",
+ "user": "anonymous",
+ "dateUpdated": "Jan 9, 2017 12:01:42 PM",
+ "config": {
+ "colWidth": 12.0,
+ "enabled": true,
+ "results": {},
+ "editorSetting": {
+ "language": "sh",
+ "editOnDblClick": false
+ },
+ "editorMode": "ace/mode/sh",
+ "tableHide": true
+ },
+ "settings": {
+ "params": {},
+ "forms": {}
+ },
+ "apps": [],
+ "jobName": "paragraph_1483952312038_-1315320949",
+ "id": "20170109-115832_608069986",
+ "results": {
+ "code": "SUCCESS",
+ "msg": [
+ {
+ "type": "TEXT",
+ "data": "rm: cannot remove \u0027/tmp/flights00.csv.bz2\u0027: No
such file or directory\n % Total % Received % Xferd Average Speed Time
Time Time Current\n Dload Upload Total
Spent Left Speed\n\r 0 0 0 0 0 0 0 0
--:--:-- --:--:-- --:--:-- 0\r 0 0 0 0 0 0 0 0
--:--:-- 0:00:01 --:--:-- 0\r 0 78.7M 0 5520 0 0 3016 0
7:36:06 0:00:01 7:36:05 3014\r 0 78.7M 0 39987 0 0 15337 0
1:29:41 0:00:02 1:29:39 15332\r 0 78.7M 0 87755 0 0 24531 0
0:56:04 0:00:03 0:56:01 24526\r 0 78.7M 0 157k 0 0 33950 0
0:40:31 0:00:04 0:40:27 33944\r 0 78.7M 0 221k 0 0 40878 0
0:33:39 0:00:05 0:33:34 53734\r 0 78.7M 0 308k 0 0 47250 0
0:29:06 0:00:06 0:29:00 63943\r 0 78.7M 0 398k 0 0 52806 0
0:26:03 0:00:07 0:25:56 71903\r 0 78.7M
0 437k 0 0 36667 0 0:37:31 0:00:12 0:37:19 41697\r 0 78.7M
0 703k 0 0 57158 0 0:24:04 0:00:12 0:23:52 71137\r 1 78.7M
1 851k 0 0 64259 0 0:21:24 0:00:13 0:21:11 80471\r 1 78.7M
1 1171k 0 0 82442 0 0:16:41 0:00:14 0:16:27 109k\r 1 78.7M
1 1546k 0 0 79861 0 0:17:13 0:00:19 0:16:54 97134\r 3 78.7M
3 3181k 0 0 154k 0 0:08:41 0:00:20 0:08:21 327k\r 4 78.7M
4 3466k 0 0 160k 0 0:08:21 0:00:21 0:08:00 308k\r 4 78.7M
4 3565k 0 0 136k 0 0:09:50 0:00:26 0:09:24 216k\r 8 78.7M
8 7196k 0 0 270k 0 0:04:57 0:00:26 0:04:31 501k\r 10 78.7M
10 8459k 0 0 307k 0 0:04:22 0:00:27 0:03:55 894k\r 11 78.7M
11 9386k 0 0 327k 0 0:04:06 0:00:28 0:03:38 768k\r 15 78.7M
15 11.9M 0 0 413k 0 0:03:14 0:00:29 0:02:45 1093k\r 18 78.7M
18 14.5M 0 0 487k 0 0:0
2:45 0:00:30 0:02:15 2553k\r 22 78.7M 22 17.7M 0 0 574k 0
0:02:20 0:00:31 0:01:49 2195k\r 25 78.7M 25 19.9M 0 0 626k 0
0:02:08 0:00:32 0:01:36 2375k\r 28 78.7M 28 22.1M 0 0 676k 0
0:01:59 0:00:33 0:01:26 2726k\r 31 78.7M 31 24.7M 0 0 734k 0
0:01:49 0:00:34 0:01:15 2643k\r 34 78.7M 34 27.3M 0 0 789k 0
0:01:42 0:00:35 0:01:07 2638k\r 38 78.7M 38 30.0M 0 0 841k 0
0:01:35 0:00:36 0:00:59 2513k\r 40 78.7M 40 32.1M 0 0 874k 0
0:01:32 0:00:37 0:00:55 2457k\r 43 78.7M 43 34.1M 0 0 906k 0
0:01:28 0:00:38 0:00:50 2445k\r 45 78.7M 45 35.7M 0 0 925k 0
0:01:27 0:00:39 0:00:48 2250k\r 47 78.7M 47 37.4M 0 0 946k 0
0:01:25 0:00:40 0:00:45 2062k\r 49 78.7M 49 39.3M 0 0 968k 0
0:01:23 0:00:41 0:00:42 1907k\r 52 78.7M 52 41.0M 0 0 987k 0
0:01:21 0:00:42 0:00:39 1859k\r 54 78.
7M 54 42.5M 0 0 1000k 0 0:01:20 0:00:43 0:00:37 1729k\r 55
78.7M 55 43.9M 0 0 1008k 0 0:01:19 0:00:44 0:00:35 1651k\r 57
78.7M 57 45.4M 0 0 1020k 0 0:01:18 0:00:45 0:00:33 1625k\r 59
78.7M 59 46.6M 0 0 1027k 0 0:01:18 0:00:46 0:00:32 1512k\r 60
78.7M 60 47.7M 0 0 1027k 0 0:01:18 0:00:47 0:00:31 1376k\r 61
78.7M 61 48.6M 0 0 1024k 0 0:01:18 0:00:48 0:00:30 1236k\r 62
78.7M 62 49.5M 0 0 1020k 0 0:01:18 0:00:49 0:00:29 1125k\r 64
78.7M 64 50.4M 0 0 1021k 0 0:01:18 0:00:50 0:00:28 1027k\r 65
78.7M 65 51.3M 0 0 1018k 0 0:01:19 0:00:51 0:00:28 941k\r 66
78.7M 66 52.1M 0 0 1016k 0 0:01:19 0:00:52 0:00:27 910k\r 67
78.7M 67 53.0M 0 0 1014k 0 0:01:19 0:00:53 0:00:26 909k\r 68
78.7M 68 53.7M 0 0 1006k 0 0:01:20 0:00:54 0:00:26 868k\r 69
78.7M 69 54.6M 0 0 1006k
0 0:01:20 0:00:55 0:00:25 858k\r 70 78.7M 70 55.3M 0 0 1002k
0 0:01:20 0:00:56 0:00:24 831k\r 71 78.7M 71 56.1M 0 0 998k
0 0:01:20 0:00:57 0:00:23 807k\r 72 78.7M 72 56.9M 0 0 994k
0 0:01:21 0:00:58 0:00:23 787k\r 73 78.7M 73 57.6M 0 0 991k
0 0:01:21 0:00:59 0:00:22 823k\r 74 78.7M 74 58.4M 0 0 988k
0 0:01:21 0:01:00 0:00:21 784k\r 75 78.7M 75 59.2M 0 0 985k
0 0:01:21 0:01:01 0:00:20 791k\r 76 78.7M 76 60.0M 0 0 982k
0 0:01:22 0:01:02 0:00:20 797k\r 77 78.7M 77 60.8M 0 0 980k
0 0:01:22 0:01:03 0:00:19 808k\r 78 78.7M 78 61.6M 0 0 977k
0 0:01:22 0:01:04 0:00:18 812k\r 79 78.7M 79 62.4M 0 0 975k
0 0:01:22 0:01:05 0:00:17 824k\r 80 78.7M 80 63.4M 0 0 976k
0 0:01:22 0:01:06 0:00:16 870k\r 82 78.7M 82 64.9M 0 0 984k
0 0:01:21 0:01:07 0:00:14 1006k\r
85 78.7M 85 66.9M 0 0 1000k 0 0:01:20 0:01:08 0:00:12
1254k\r 88 78.7M 88 69.4M 0 0 1022k 0 0:01:18 0:01:09 0:00:09
1602k\r 92 78.7M 92 72.5M 0 0 1053k 0 0:01:16 0:01:10 0:00:06
2064k\r 96 78.7M 96 76.1M 0 0 1089k 0 0:01:13 0:01:11 0:00:02
2600k\r100 78.7M 100 78.7M 0 0 1116k 0 0:01:12 0:01:12 --:--:--
3022k\nrm: cannot remove \u0027/tmp/flights00.csv\u0027: No such file or
directory\n"
+ }
+ ]
+ },
+ "dateCreated": "Jan 9, 2017 11:58:32 AM",
+ "dateStarted": "Jan 9, 2017 12:00:01 PM",
+ "dateFinished": "Jan 9, 2017 12:01:34 PM",
+ "status": "FINISHED",
+ "progressUpdateIntervalMs": 500
+ },
+ {
+ "text": "%sh\n\nrm /tmp/carriers.csv\ncurl -o /tmp/carriers.csv
\"http://stat-computing.org/dataexpo/2009/carriers.csv\"\nchmod 666
/tmp/carriers.csv",
+ "user": "anonymous",
+ "dateUpdated": "Jan 9, 2017 12:01:48 PM",
+ "config": {
+ "colWidth": 12.0,
+ "enabled": true,
+ "results": {},
+ "editorSetting": {
+ "language": "sh",
+ "editOnDblClick": false
+ },
+ "editorMode": "ace/mode/sh",
+ "tableHide": true
+ },
+ "settings": {
+ "params": {},
+ "forms": {}
+ },
+ "apps": [],
+ "jobName": "paragraph_1483952329229_2136292082",
+ "id": "20170109-115849_1794095031",
+ "results": {
+ "code": "SUCCESS",
+ "msg": [
+ {
+ "type": "TEXT",
+ "data": "rm: cannot remove \u0027/tmp/carriers.csv\u0027: No such
file or directory\n % Total % Received % Xferd Average Speed Time
Time Time Current\n Dload Upload Total
Spent Left Speed\n\r 0 0 0 0 0 0 0 0 --:--:--
--:--:-- --:--:-- 0\r 9 43758 9 4140 0 0 7588 0 0:00:05
--:--:-- 0:00:05 7582\r100 43758 100 43758 0 0 46357 0 --:--:--
--:--:-- --:--:-- 46353\n"
+ }
+ ]
+ },
+ "dateCreated": "Jan 9, 2017 11:58:49 AM",
+ "dateStarted": "Jan 9, 2017 12:01:44 PM",
+ "dateFinished": "Jan 9, 2017 12:01:45 PM",
+ "status": "FINISHED",
+ "progressUpdateIntervalMs": 500
+ },
+ {
+ "text": "%md\n### Preparing the data\nThe `flights\u003cYY\u003e.csv`
contains various data but we only need the information about the year, the
month and the carrier who served the flight. Let\u0027s retrieve this
information and create `DataSets`.",
+ "user": "anonymous",
+ "dateUpdated": "Jan 9, 2017 12:01:51 PM",
+ "config": {
+ "colWidth": 12.0,
+ "enabled": true,
+ "results": {},
+ "editorSetting": {
+ "language": "markdown",
+ "editOnDblClick": true
+ },
+ "editorMode": "ace/mode/markdown",
+ "editorHide": true,
+ "tableHide": false
+ },
+ "settings": {
+ "params": {},
+ "forms": {}
+ },
+ "apps": [],
+ "jobName": "paragraph_1483952363836_-1769111757",
+ "id": "20170109-115923_963126574",
+ "results": {
+ "code": "SUCCESS",
+ "msg": [
+ {
+ "type": "HTML",
+ "data": "\u003cdiv
class\u003d\"markdown-body\"\u003e\n\u003ch3\u003ePreparing the
data\u003c/h3\u003e\n\u003cp\u003eThe
\u003ccode\u003eflights\u0026lt;YY\u0026gt;.csv\u003c/code\u003e contains
various data but we only need the information about the year, the month and the
carrier who served the flight. Let\u0026rsquo;s retrieve this information and
create
\u003ccode\u003eDataSets\u003c/code\u003e.\u003c/p\u003e\n\u003c/div\u003e"
+ }
+ ]
+ },
+ "dateCreated": "Jan 9, 2017 11:59:23 AM",
+ "dateStarted": "Jan 9, 2017 12:01:51 PM",
+ "dateFinished": "Jan 9, 2017 12:01:53 PM",
+ "status": "FINISHED",
+ "progressUpdateIntervalMs": 500
+ },
+ {
+ "text": "%flink\n\ncase class Flight(year: Int, month: Int, carrierCode:
String)\ncase class Carrier(code: String, name: String)\n\nval flights98 \u003d
benv.readCsvFile[Flight](\"/tmp/flights98.csv\", ignoreFirstLine \u003d true,
includedFields \u003d Array(0, 1, 8))\nval flights99 \u003d
benv.readCsvFile[Flight](\"/tmp/flights99.csv\", ignoreFirstLine \u003d true,
includedFields \u003d Array(0, 1, 8))\nval flights00 \u003d
benv.readCsvFile[Flight](\"/tmp/flights00.csv\", ignoreFirstLine \u003d true,
includedFields \u003d Array(0, 1, 8))\nval flights \u003d
flights98.union(flights99).union(flights00)\nval carriers \u003d
benv.readCsvFile[Carrier](\"/tmp/carriers.csv\", ignoreFirstLine \u003d true,
quoteCharacter \u003d \u0027\"\u0027)",
+ "user": "anonymous",
+ "dateUpdated": "Jan 9, 2017 12:02:38 PM",
+ "config": {
+ "colWidth": 12.0,
+ "enabled": true,
+ "results": {},
+ "editorSetting": {
+ "language": "scala",
+ "editOnDblClick": false
+ },
+ "editorMode": "ace/mode/scala",
+ "lineNumbers": true,
+ "tableHide": true
+ },
+ "settings": {
+ "params": {},
+ "forms": {}
+ },
+ "apps": [],
+ "jobName": "paragraph_1483952511284_-589624871",
+ "id": "20170109-120151_872852428",
+ "results": {
+ "code": "SUCCESS",
+ "msg": [
+ {
+ "type": "TEXT",
+ "data": "defined class Flight\ndefined class Carrier\nflights98:
org.apache.flink.api.scala.DataSet[Flight] \u003d
org.apache.flink.api.scala.DataSet@7cd81fd5\nflights99:
org.apache.flink.api.scala.DataSet[Flight] \u003d
org.apache.flink.api.scala.DataSet@58242e79\nflights00:
org.apache.flink.api.scala.DataSet[Flight] \u003d
org.apache.flink.api.scala.DataSet@13f866c0\nflights:
org.apache.flink.api.scala.DataSet[Flight] \u003d
org.apache.flink.api.scala.DataSet@2aad2530\ncarriers:
org.apache.flink.api.scala.DataSet[Carrier] \u003d
org.apache.flink.api.scala.DataSet@148c977b\n"
+ }
+ ]
+ },
+ "dateCreated": "Jan 9, 2017 12:01:51 PM",
+ "dateStarted": "Jan 9, 2017 12:02:10 PM",
+ "dateFinished": "Jan 9, 2017 12:02:29 PM",
+ "status": "FINISHED",
+ "progressUpdateIntervalMs": 500
+ },
+ {
+ "text": "%md\n### Choosing the carrier\nNow we will search for the most
popular carrier during the whole time period.",
+ "user": "anonymous",
+ "dateUpdated": "Jan 9, 2017 12:03:08 PM",
+ "config": {
+ "colWidth": 12.0,
+ "enabled": true,
+ "results": {},
+ "editorSetting": {
+ "language": "markdown",
+ "editOnDblClick": true
+ },
+ "editorMode": "ace/mode/markdown",
+ "editorHide": true,
+ "tableHide": false
+ },
+ "settings": {
+ "params": {},
+ "forms": {}
+ },
+ "apps": [],
+ "jobName": "paragraph_1483952530113_212237809",
+ "id": "20170109-120210_773710997",
+ "results": {
+ "code": "SUCCESS",
+ "msg": [
+ {
+ "type": "HTML",
+ "data": "\u003cdiv
class\u003d\"markdown-body\"\u003e\n\u003ch3\u003eChoosing the
carrier\u003c/h3\u003e\n\u003cp\u003eNow we will search for the most popular
carrier during the whole time period.\u003c/p\u003e\n\u003c/div\u003e"
+ }
+ ]
+ },
+ "dateCreated": "Jan 9, 2017 12:02:10 PM",
+ "dateStarted": "Jan 9, 2017 12:03:08 PM",
+ "dateFinished": "Jan 9, 2017 12:03:08 PM",
+ "status": "FINISHED",
+ "progressUpdateIntervalMs": 500
+ },
+ {
+ "text": "%flink\n\nimport
org.apache.flink.api.common.operators.Order\nimport
org.apache.flink.api.java.aggregation.Aggregations\n\ncase class
CarrierFlightsCount(carrierCode: String, count: Int)\ncase class
CountByMonth(month: Int, count: Int)\n\nval carriersFlights \u003d flights\n
.map(f \u003d\u003e CarrierFlightsCount(f.carrierCode, 1))\n
.groupBy(\"carrierCode\")\n .sum(\"count\")\n\nval maxFlights \u003d
carriersFlights\n .aggregate(Aggregations.MAX, \"count\")\n\nval bestCarrier
\u003d carriersFlights\n .join(maxFlights)\n .where(\"count\")\n
.equalTo(\"count\")\n .map(_._1)\n \nval carrierName \u003d bestCarrier\n
.join(carriers)\n .where(\"carrierCode\")\n .equalTo(\"code\")\n
.map(_._2.name)\n .collect\n .head",
+ "user": "anonymous",
+ "dateUpdated": "Jan 9, 2017 12:04:04 PM",
+ "config": {
+ "colWidth": 12.0,
+ "enabled": true,
+ "results": {},
+ "editorSetting": {
+ "language": "scala",
+ "editOnDblClick": false
+ },
+ "editorMode": "ace/mode/scala",
+ "lineNumbers": true,
+ "tableHide": true
+ },
+ "settings": {
+ "params": {},
+ "forms": {}
+ },
+ "apps": [],
+ "jobName": "paragraph_1483952588708_-1770095793",
+ "id": "20170109-120308_1328511597",
+ "results": {
+ "code": "SUCCESS",
+ "msg": [
+ {
+ "type": "TEXT",
+ "data": "import
org.apache.flink.api.common.operators.Order\nimport
org.apache.flink.api.java.aggregation.Aggregations\ndefined class
CarrierFlightsCount\ndefined class CountByMonth\ncarriersFlights:
org.apache.flink.api.scala.AggregateDataSet[CarrierFlightsCount] \u003d
org.apache.flink.api.scala.AggregateDataSet@2c59be0b\nmaxFlights:
org.apache.flink.api.scala.AggregateDataSet[CarrierFlightsCount] \u003d
org.apache.flink.api.scala.AggregateDataSet@53e5fad9\nbestCarrier:
org.apache.flink.api.scala.DataSet[CarrierFlightsCount] \u003d
org.apache.flink.api.scala.DataSet@64b7b1b3\ncarrierName: String \u003d Delta
Air Lines Inc.\n"
+ }
+ ]
+ },
+ "dateCreated": "Jan 9, 2017 12:03:08 PM",
+ "dateStarted": "Jan 9, 2017 12:03:41 PM",
+ "dateFinished": "Jan 9, 2017 12:03:58 PM",
+ "status": "FINISHED",
+ "progressUpdateIntervalMs": 500
+ },
+ {
+ "text": "%flink\n\nprintln(s\"\"\"The most popular carrier
is:\n$carrierName\n\"\"\")",
+ "user": "anonymous",
+ "dateUpdated": "Jan 9, 2017 12:09:18 PM",
+ "config": {
+ "colWidth": 12.0,
+ "enabled": true,
+ "results": {},
+ "editorSetting": {
+ "language": "scala",
+ "editOnDblClick": false
+ },
+ "editorMode": "ace/mode/scala",
+ "lineNumbers": true
+ },
+ "settings": {
+ "params": {},
+ "forms": {}
+ },
+ "apps": [],
+ "jobName": "paragraph_1483952621624_-1222400539",
+ "id": "20170109-120341_952212268",
+ "results": {
+ "code": "SUCCESS",
+ "msg": [
+ {
+ "type": "TEXT",
+ "data": "The most popular carrier is:\nDelta Air Lines Inc.\n\n"
+ }
+ ]
+ },
+ "dateCreated": "Jan 9, 2017 12:03:41 PM",
+ "dateStarted": "Jan 9, 2017 12:04:09 PM",
+ "dateFinished": "Jan 9, 2017 12:04:10 PM",
+ "status": "FINISHED",
+ "progressUpdateIntervalMs": 500
+ },
+ {
+ "text": "%md\n### Calculating flights\nThe last step is to filter
**Delta Air Lines** flights and group them by months.",
+ "user": "anonymous",
+ "dateUpdated": "Jan 9, 2017 12:04:26 PM",
+ "config": {
+ "colWidth": 12.0,
+ "enabled": true,
+ "results": {},
+ "editorSetting": {
+ "language": "markdown",
+ "editOnDblClick": true
+ },
+ "editorMode": "ace/mode/markdown",
+ "editorHide": true,
+ "tableHide": false
+ },
+ "settings": {
+ "params": {},
+ "forms": {}
+ },
+ "apps": [],
+ "jobName": "paragraph_1483952649646_-1553253944",
+ "id": "20170109-120409_2003276881",
+ "results": {
+ "code": "SUCCESS",
+ "msg": [
+ {
+ "type": "HTML",
+ "data": "\u003cdiv
class\u003d\"markdown-body\"\u003e\n\u003ch3\u003eCalculating
flights\u003c/h3\u003e\n\u003cp\u003eThe last step is to filter
\u003cstrong\u003eDelta Air Lines\u003c/strong\u003e flights and group them by
months.\u003c/p\u003e\n\u003c/div\u003e"
+ }
+ ]
+ },
+ "dateCreated": "Jan 9, 2017 12:04:09 PM",
+ "dateStarted": "Jan 9, 2017 12:04:26 PM",
+ "dateFinished": "Jan 9, 2017 12:04:26 PM",
+ "status": "FINISHED",
+ "progressUpdateIntervalMs": 500
+ },
+ {
+ "title": "flights grouping",
+ "text": "%flink\n\ndef countFlightsPerMonth(flights: DataSet[Flight],\n
carrier: DataSet[CarrierFlightsCount]) \u003d {\n val
carrierFlights \u003d flights\n .join(carrier)\n
.where(\"carrierCode\")\n .equalTo(\"carrierCode\")\n .map(_._1)\n \n
carrierFlights\n .map(flight \u003d\u003e CountByMonth(flight.month, 1))\n
.groupBy(\"month\")\n .sum(\"count\")\n .sortPartition(\"month\",
Order.ASCENDING)\n}\n\nval bestCarrierFlights_98 \u003d
countFlightsPerMonth(flights98, bestCarrier)\nval bestCarrierFlights_99 \u003d
countFlightsPerMonth(flights99, bestCarrier)\nval bestCarrierFlights_00 \u003d
countFlightsPerMonth(flights00, bestCarrier)",
+ "user": "anonymous",
+ "dateUpdated": "Jan 9, 2017 12:05:06 PM",
+ "config": {
+ "colWidth": 12.0,
+ "enabled": true,
+ "results": {},
+ "editorSetting": {
+ "language": "scala",
+ "editOnDblClick": false
+ },
+ "editorMode": "ace/mode/scala",
+ "lineNumbers": true,
+ "title": true,
+ "tableHide": true
+ },
+ "settings": {
+ "params": {},
+ "forms": {}
+ },
+ "apps": [],
+ "jobName": "paragraph_1483952665972_667547355",
+ "id": "20170109-120425_2018337048",
+ "results": {
+ "code": "SUCCESS",
+ "msg": [
+ {
+ "type": "TEXT",
+ "data": "countFlightsPerMonth: (flights:
org.apache.flink.api.scala.DataSet[Flight], carrier:
org.apache.flink.api.scala.DataSet[CarrierFlightsCount])org.apache.flink.api.scala.DataSet[CountByMonth]\nbestCarrierFlights_98:
org.apache.flink.api.scala.DataSet[CountByMonth] \u003d
org.apache.flink.api.scala.PartitionSortedDataSet@2aa64309\nbestCarrierFlights_99:
org.apache.flink.api.scala.DataSet[CountByMonth] \u003d
org.apache.flink.api.scala.PartitionSortedDataSet@35fe60c4\nbestCarrierFlights_00:
org.apache.flink.api.scala.DataSet[CountByMonth] \u003d
org.apache.flink.api.scala.PartitionSortedDataSet@4621410f\n"
+ }
+ ]
+ },
+ "dateCreated": "Jan 9, 2017 12:04:25 PM",
+ "dateStarted": "Jan 9, 2017 12:04:50 PM",
+ "dateFinished": "Jan 9, 2017 12:04:51 PM",
+ "status": "FINISHED",
+ "progressUpdateIntervalMs": 500
+ },
+ {
+ "title": "making a results table",
+ "text": "%flink\n\ndef monthAsString(month: Int): String \u003d {\n
month match {\n case 1 \u003d\u003e \"Jan\"\n case 2 \u003d\u003e
\"Feb\"\n case 3 \u003d\u003e \"Mar\"\n case 4 \u003d\u003e \"Apr\"\n
case 5 \u003d\u003e \"May\"\n case 6 \u003d\u003e \"Jun\"\n case 7
\u003d\u003e \"Jul\"\n case 8 \u003d\u003e \"Aug\"\n case 9 \u003d\u003e
\"Sept\"\n case 10 \u003d\u003e \"Oct\"\n case 11 \u003d\u003e \"Nov\"\n
case 12 \u003d\u003e \"Dec\"\n }\n}\n\n// We should put all the results into
a common DataFrame\n// to show them in a common picture\nval bestCarrierFlights
\u003d bestCarrierFlights_98\n .join(bestCarrierFlights_99)\n
.where(\"month\")\n .equalTo(\"month\")\n .map(tuple \u003d\u003e
(tuple._1.month, tuple._1.count, tuple._2.count))\n
.join(bestCarrierFlights_00)\n .where(0)\n .equalTo(\"month\")\n .map(tuple
\u003d\u003e (tuple._1._1, tuple._1._2, tuple._1._3, tuple._2.count))\n
.collect\n \nvar flightsByMonthTable \u00
3d s\"Month\\t1998\\t1999\\t2000\\n\"\nbestCarrierFlights.foreach(data
\u003d\u003e flightsByMonthTable +\u003d
s\"${monthAsString(data._1)}\\t${data._2}\\t${data._3}\\t${data._4}\\n\")",
+ "user": "anonymous",
+ "dateUpdated": "Jan 9, 2017 12:06:03 PM",
+ "config": {
+ "colWidth": 12.0,
+ "enabled": true,
+ "results": {},
+ "editorSetting": {
+ "language": "scala",
+ "editOnDblClick": false
+ },
+ "editorMode": "ace/mode/scala",
+ "lineNumbers": true,
+ "title": true,
+ "tableHide": true
+ },
+ "settings": {
+ "params": {},
+ "forms": {}
+ },
+ "apps": [],
+ "jobName": "paragraph_1483952690164_-1061667443",
+ "id": "20170109-120450_1574916350",
+ "results": {
+ "code": "SUCCESS",
+ "msg": [
+ {
+ "type": "TEXT",
+ "data": "monthAsString: (month: Int)String\nbestCarrierFlights:
Seq[(Int, Int, Int, Int)] \u003d Buffer((1,78523,77745,78055),
(2,71101,70498,71090), (3,78906,77812,78453), (4,75726,75343,75247),
(5,77937,77226,76797), (6,75432,75840,74846), (7,77521,77264,75776),
(8,78104,78141,77654), (9,74840,75067,73696), (10,76145,77829,77425),
(11,73552,74411,73659), (12,77308,76954,75331))\nflightsByMonthTable: String
\u003d \n\"Month\t1998\t1999\t2000\n\"\n"
+ }
+ ]
+ },
+ "dateCreated": "Jan 9, 2017 12:04:50 PM",
+ "dateStarted": "Jan 9, 2017 12:05:24 PM",
+ "dateFinished": "Jan 9, 2017 12:05:59 PM",
+ "status": "FINISHED",
+ "progressUpdateIntervalMs": 500
+ },
+ {
+ "title": "\"Delta Air Lines\" flights count by months",
+ "text": "%flink\n\nprintln(s\"\"\"%table\n$flightsByMonthTable\n\"\"\")",
+ "user": "anonymous",
+ "dateUpdated": "Jan 9, 2017 12:06:17 PM",
+ "config": {
+ "colWidth": 12.0,
+ "enabled": true,
+ "results": {
+ "0": {
+ "graph": {
+ "mode": "lineChart",
+ "height": 300.0,
+ "optionOpen": false,
+ "setting": {
+ "lineChart": {}
+ },
+ "commonSetting": {},
+ "keys": [
+ {
+ "name": "Month",
+ "index": 0.0,
+ "aggr": "sum"
+ }
+ ],
+ "groups": [],
+ "values": [
+ {
+ "name": "1998",
+ "index": 1.0,
+ "aggr": "sum"
+ },
+ {
+ "name": "1999",
+ "index": 2.0,
+ "aggr": "sum"
+ },
+ {
+ "name": "2000",
+ "index": 3.0,
+ "aggr": "sum"
+ }
+ ]
+ },
+ "helium": {}
+ }
+ },
+ "editorSetting": {
+ "language": "scala",
+ "editOnDblClick": false
+ },
+ "editorMode": "ace/mode/scala",
+ "title": true,
+ "lineNumbers": true
+ },
+ "settings": {
+ "params": {},
+ "forms": {}
+ },
+ "apps": [],
+ "jobName": "paragraph_1483952724460_191505697",
+ "id": "20170109-120524_2037622815",
+ "results": {
+ "code": "SUCCESS",
+ "msg": [
+ {
+ "type": "TABLE",
+ "data":
"Month\t1998\t1999\t2000\nJan\t78523\t77745\t78055\nFeb\t71101\t70498\t71090\nMar\t78906\t77812\t78453\nApr\t75726\t75343\t75247\nMay\t77937\t77226\t76797\nJun\t75432\t75840\t74846\nJul\t77521\t77264\t75776\nAug\t78104\t78141\t77654\nSept\t74840\t75067\t73696\nOct\t76145\t77829\t77425\nNov\t73552\t74411\t73659\nDec\t77308\t76954\t75331\n"
+ }
+ ]
+ },
+ "dateCreated": "Jan 9, 2017 12:05:24 PM",
+ "dateStarted": "Jan 9, 2017 12:06:07 PM",
+ "dateFinished": "Jan 9, 2017 12:06:08 PM",
+ "status": "FINISHED",
+ "progressUpdateIntervalMs": 500
+ },
+ {
+ "text": "%md\n### Results\nLooking at this chart we can say that
February is the most unpopular month, but this is only because it has less days
(28 or 29) than the other months (30 or 31). To receive more fair picture we
should calculate the average flights count per day for each month.",
+ "user": "anonymous",
+ "dateUpdated": "Jan 9, 2017 12:06:34 PM",
+ "config": {
+ "colWidth": 12.0,
+ "enabled": true,
+ "results": {},
+ "editorSetting": {
+ "language": "markdown",
+ "editOnDblClick": true
+ },
+ "editorMode": "ace/mode/markdown",
+ "editorHide": true,
+ "tableHide": false
+ },
+ "settings": {
+ "params": {},
+ "forms": {}
+ },
+ "apps": [],
+ "jobName": "paragraph_1483952767719_-1010557136",
+ "id": "20170109-120607_67673280",
+ "results": {
+ "code": "SUCCESS",
+ "msg": [
+ {
+ "type": "HTML",
+ "data": "\u003cdiv
class\u003d\"markdown-body\"\u003e\n\u003ch3\u003eResults\u003c/h3\u003e\n\u003cp\u003eLooking
at this chart we can say that February is the most unpopular month, but this
is only because it has less days (28 or 29) than the other months (30 or 31).
To receive more fair picture we should calculate the average flights count per
day for each month.\u003c/p\u003e\n\u003c/div\u003e"
+ }
+ ]
+ },
+ "dateCreated": "Jan 9, 2017 12:06:07 PM",
+ "dateStarted": "Jan 9, 2017 12:06:34 PM",
+ "dateFinished": "Jan 9, 2017 12:06:34 PM",
+ "status": "FINISHED",
+ "progressUpdateIntervalMs": 500
+ },
+ {
+ "text": "%flink\n\ndef daysInMonth(month: Int, year: Int): Int \u003d
{\n month match {\n case 1 \u003d\u003e 31\n case 2 \u003d\u003e if
(year % 4 \u003d\u003d 0) {\n 29\n } else {\n 28\n }\n
case 3 \u003d\u003e 31\n case 4 \u003d\u003e 30\n case 5 \u003d\u003e
31\n case 6 \u003d\u003e 30\n case 7 \u003d\u003e 31\n case 8
\u003d\u003e 31\n case 9 \u003d\u003e 30\n case 10 \u003d\u003e 31\n
case 11 \u003d\u003e 30\n case 12 \u003d\u003e 31\n }\n}\n\n\nvar
flightsByDayTable \u003d
s\"Month\\t1998\\t1999\\t2000\\n\"\n\nbestCarrierFlights.foreach(data
\u003d\u003e flightsByDayTable +\u003d
s\"${monthAsString(data._1)}\\t${data._2/daysInMonth(data._1,1998)}\\t${data._3/daysInMonth(data._1,1999)}\\t${data._4/daysInMonth(data._1,2000)}\\n\")",
+ "user": "anonymous",
+ "dateUpdated": "Jan 9, 2017 12:06:58 PM",
+ "config": {
+ "colWidth": 12.0,
+ "enabled": true,
+ "results": {},
+ "editorSetting": {
+ "language": "scala",
+ "editOnDblClick": false
+ },
+ "editorMode": "ace/mode/scala",
+ "lineNumbers": true,
+ "tableHide": true
+ },
+ "settings": {
+ "params": {},
+ "forms": {}
+ },
+ "apps": [],
+ "jobName": "paragraph_1483952794097_-785833130",
+ "id": "20170109-120634_492170963",
+ "results": {
+ "code": "SUCCESS",
+ "msg": [
+ {
+ "type": "TEXT",
+ "data": "daysInMonth: (month: Int, year:
Int)Int\nflightsByDayTable: String \u003d \n\"Month\t1998\t1999\t2000\n\"\n"
+ }
+ ]
+ },
+ "dateCreated": "Jan 9, 2017 12:06:34 PM",
+ "dateStarted": "Jan 9, 2017 12:06:53 PM",
+ "dateFinished": "Jan 9, 2017 12:06:53 PM",
+ "status": "FINISHED",
+ "progressUpdateIntervalMs": 500
+ },
+ {
+ "title": "\"Delta Air Lines\" flights count by days",
+ "text": "%flink\n\nprintln(s\"\"\"%table\n$flightsByDayTable\n\"\"\")",
+ "user": "anonymous",
+ "dateUpdated": "Jan 9, 2017 12:10:56 PM",
+ "config": {
+ "colWidth": 12.0,
+ "enabled": true,
+ "results": {
+ "0": {
+ "graph": {
+ "mode": "lineChart",
+ "height": 300.0,
+ "optionOpen": false,
+ "setting": {
+ "lineChart": {}
+ },
+ "commonSetting": {},
+ "keys": [
+ {
+ "name": "Month",
+ "index": 0.0,
+ "aggr": "sum"
+ }
+ ],
+ "groups": [],
+ "values": [
+ {
+ "name": "1998",
+ "index": 1.0,
+ "aggr": "sum"
+ },
+ {
+ "name": "1999",
+ "index": 2.0,
+ "aggr": "sum"
+ },
+ {
+ "name": "2000",
+ "index": 3.0,
+ "aggr": "sum"
+ }
+ ]
+ },
+ "helium": {}
+ }
+ },
+ "editorSetting": {
+ "language": "scala",
+ "editOnDblClick": false
+ },
+ "editorMode": "ace/mode/scala",
+ "title": true,
+ "lineNumbers": true
+ },
+ "settings": {
+ "params": {},
+ "forms": {}
+ },
+ "apps": [],
+ "jobName": "paragraph_1483952813391_1847418990",
+ "id": "20170109-120653_1870236569",
+ "results": {
+ "code": "SUCCESS",
+ "msg": [
+ {
+ "type": "TABLE",
+ "data":
"Month\t1998\t1999\t2000\nJan\t2533\t2507\t2517\nFeb\t2539\t2517\t2451\nMar\t2545\t2510\t2530\nApr\t2524\t2511\t2508\nMay\t2514\t2491\t2477\nJun\t2514\t2528\t2494\nJul\t2500\t2492\t2444\nAug\t2519\t2520\t2504\nSept\t2494\t2502\t2456\nOct\t2456\t2510\t2497\nNov\t2451\t2480\t2455\nDec\t2493\t2482\t2430\n"
+ }
+ ]
+ },
+ "dateCreated": "Jan 9, 2017 12:06:53 PM",
+ "dateStarted": "Jan 9, 2017 12:07:22 PM",
+ "dateFinished": "Jan 9, 2017 12:07:23 PM",
+ "status": "FINISHED",
+ "progressUpdateIntervalMs": 500
+ },
+ {
+ "text": "%flink\n",
+ "dateUpdated": "Jan 9, 2017 12:07:22 PM",
+ "config": {},
+ "settings": {
+ "params": {},
+ "forms": {}
+ },
+ "apps": [],
+ "jobName": "paragraph_1483952842919_587228425",
+ "id": "20170109-120722_939892827",
+ "dateCreated": "Jan 9, 2017 12:07:22 PM",
+ "status": "READY",
+ "progressUpdateIntervalMs": 500
+ }
+ ],
+ "name": "Using Flink for batch processing",
+ "id": "2C35YU814",
+ "angularObjects": {
+ "2C4PVECE6:shared_process": [],
+ "2C4US9MUF:shared_process": [],
+ "2C4FYNB4G:shared_process": [],
+ "2C4GX28KP:shared_process": [],
+ "2C648AXXN:shared_process": [],
+ "2C3MSEJ2F:shared_process": [],
+ "2C6F2N6BT:shared_process": [],
+ "2C3US2RTN:shared_process": [],
+ "2C3TYMD6K:shared_process": [],
+ "2C3FDPZRX:shared_process": [],
+ "2C5TEARYX:shared_process": [],
+ "2C5D6NSNG:shared_process": [],
+ "2C6FVVEAD:shared_process": [],
+ "2C582KNWG:shared_process": [],
+ "2C6ZMVGM7:shared_process": [],
+ "2C6UYQG8R:shared_process": [],
+ "2C666VZT2:shared_process": [],
+ "2C4JRCY3K:shared_process": [],
+ "2C64W5T9D:shared_process": []
+ },
+ "config": {
+ "looknfeel": "default"
+ },
+ "info": {}
+}