http://git-wip-us.apache.org/repos/asf/zeppelin/blob/085efeb6/notebook/2C35YU814/note.json ---------------------------------------------------------------------- diff --git a/notebook/2C35YU814/note.json b/notebook/2C35YU814/note.json deleted file mode 100644 index 09ed8c6..0000000 --- a/notebook/2C35YU814/note.json +++ /dev/null @@ -1,806 +0,0 @@ -{ - "paragraphs": [ - { - "text": "%md\n### Intro\nThis notebook is an example of how to use **Apache Flink** for processing simple data sets. We will take an open airline data set from [stat-computing.org](http://stat-computing.org) and find out who was the most popular carrier during 1998-2000 years. Next we will build a chart that shows flights distribution by months and look how it changes from year to year. We will use Zeppelin `%table` display system to build charts.", - "user": "anonymous", - "dateUpdated": "Jan 9, 2017 11:55:42 AM", - "config": { - "colWidth": 12.0, - "enabled": true, - "results": {}, - "editorSetting": { - "language": "markdown", - "editOnDblClick": true - }, - "editorMode": "ace/mode/markdown", - "editorHide": true, - "tableHide": false - }, - "settings": { - "params": {}, - "forms": {} - }, - "apps": [], - "jobName": "paragraph_1483952101049_-1120777567", - "id": "20170109-115501_192763014", - "results": { - "code": "SUCCESS", - "msg": [ - { - "type": "HTML", - "data": "\u003cdiv class\u003d\"markdown-body\"\u003e\n\u003ch3\u003eIntro\u003c/h3\u003e\n\u003cp\u003eThis notebook is an example of how to use \u003cstrong\u003eApache Flink\u003c/strong\u003e for processing simple data sets. We will take an open airline data set from \u003ca href\u003d\"http://stat-computing.org\"\u003estat-computing.org\u003c/a\u003e and find out who was the most popular carrier during 1998-2000 years. Next we will build a chart that shows flights distribution by months and look how it changes from year to year. We will use Zeppelin \u003ccode\u003e%table\u003c/code\u003e display system to build charts.\u003c/p\u003e\n\u003c/div\u003e" - } - ] - }, - "dateCreated": "Jan 9, 2017 11:55:01 AM", - "dateStarted": "Jan 9, 2017 11:55:42 AM", - "dateFinished": "Jan 9, 2017 11:55:44 AM", - "status": "FINISHED", - "progressUpdateIntervalMs": 500 - }, - { - "text": "%md\n### Getting the data\nFirst we need to download and unpack the data. We will get three big data sets with flight details (one pack for each year) and a small one with carriers names. In total we will get for about 1,5 GB of data. To be able to process such amount of data it is recommended to increase `shell.command.timeout.millisecs` value in `%sh` interpreter settings up to several minutes. You can find interpreters configuration by clicking on `Interpreter` in a drop-down menu from the top right corner of the Zeppelin web-ui.", - "user": "anonymous", - "dateUpdated": "Jan 9, 2017 11:56:08 AM", - "config": { - "colWidth": 12.0, - "enabled": true, - "results": {}, - "editorSetting": { - "language": "scala", - "editOnDblClick": false - }, - "editorMode": "ace/mode/scala", - "editorHide": true - }, - "settings": { - "params": {}, - "forms": {} - }, - "apps": [], - "jobName": "paragraph_1483952142017_284386712", - "id": "20170109-115542_1487437739", - "results": { - "code": "SUCCESS", - "msg": [ - { - "type": "HTML", - "data": "\u003cdiv class\u003d\"markdown-body\"\u003e\n\u003ch3\u003eGetting the data\u003c/h3\u003e\n\u003cp\u003eFirst we need to download and unpack the data. We will get three big data sets with flight details (one pack for each year) and a small one with carriers names. In total we will get for about 1,5 GB of data. To be able to process such amount of data it is recommended to increase \u003ccode\u003eshell.command.timeout.millisecs\u003c/code\u003e value in \u003ccode\u003e%sh\u003c/code\u003e interpreter settings up to several minutes. You can find interpreters configuration by clicking on \u003ccode\u003eInterpreter\u003c/code\u003e in a drop-down menu from the top right corner of the Zeppelin web-ui.\u003c/p\u003e\n\u003c/div\u003e" - } - ] - }, - "dateCreated": "Jan 9, 2017 11:55:42 AM", - "dateStarted": "Jan 9, 2017 11:56:07 AM", - "dateFinished": "Jan 9, 2017 11:56:07 AM", - "status": "FINISHED", - "progressUpdateIntervalMs": 500 - }, - { - "text": "%sh\n\nrm /tmp/flights98.csv.bz2\ncurl -o /tmp/flights98.csv.bz2 \"http://stat-computing.org/dataexpo/2009/1998.csv.bz2\"\nrm /tmp/flights98.csv\nbzip2 -d /tmp/flights98.csv.bz2\nchmod 666 /tmp/flights98.csv", - "user": "anonymous", - "dateUpdated": "Jan 9, 2017 11:59:02 AM", - "config": { - "colWidth": 12.0, - "enabled": true, - "results": {}, - "editorSetting": { - "language": "sh", - "editOnDblClick": false - }, - "editorMode": "ace/mode/sh", - "tableHide": true - }, - "settings": { - "params": {}, - "forms": {} - }, - "apps": [], - "jobName": "paragraph_1483952167547_-566831096", - "id": "20170109-115607_1634441713", - "results": { - "code": "SUCCESS", - "msg": [ - { - "type": "TEXT", - "data": "rm: cannot remove \u0027/tmp/flights98.csv.bz2\u0027: No such file or directory\n % Total % Received % Xferd Average Speed Time Time Time Current\n Dload Upload Total Spent Left Speed\n\r 0 0 0 0 0 0 0 0 --:--:-- --:--:-- --:--:-- 0\r 0 0 0 0 0 0 0 0 --:--:-- --:--:-- --:--:-- 0\r 0 73.1M 0 64295 0 0 51646 0 0:24:44 0:00:01 0:24:43 51642\r 0 73.1M 0 358k 0 0 160k 0 0:07:47 0:00:02 0:07:45 160k\r 1 73.1M 1 1209k 0 0 373k 0 0:03:20 0:00:03 0:03:17 373k\r 4 73.1M 4 3204k 0 0 773k 0 0:01:36 0:00:04 0:01:32 773k\r 7 73.1M 7 5508k 0 0 1071k 0 0:01:09 0:00:05 0:01:04 1145k\r 10 73.1M 10 7875k 0 0 1280k 0 0:00:58 0:00:06 0:00:52 1592k\r 13 73.1M 13 10.1M 0 0 1458k 0 0:00:51 0:00:07 0:00:44 2049k\r 17 73.1M 1 7 12.7M 0 0 1608k 0 0:00:46 0:00:08 0:00:38 2422k\r 20 73.1M 20 14.9M 0 0 1671k 0 0:00:44 0:00:09 0:00:35 2413k\r 23 73.1M 23 17.1M 0 0 1728k 0 0:00:43 0:00:10 0:00:33 2403k\r 26 73.1M 26 19.4M 0 0 1787k 0 0:00:41 0:00:11 0:00:30 2411k\r 29 73.1M 29 21.7M 0 0 1837k 0 0:00:40 0:00:12 0:00:28 2379k\r 32 73.1M 32 24.1M 0 0 1879k 0 0:00:39 0:00:13 0:00:26 2322k\r 36 73.1M 36 26.4M 0 0 1916k 0 0:00:39 0:00:14 0:00:25 2365k\r 39 73.1M 39 28.5M 0 0 1930k 0 0:00:38 0:00:15 0:00:23 2341k\r 41 73.1M 41 30.6M 0 0 1943k 0 0:00:38 0:00:16 0:00:22 2292k\r 44 73.1M 44 32.6M 0 0 1947k 0 0:00:38 0:00:17 0:00:21 2215k\r 47 73.1M 47 34.6M 0 0 1952k 0 0:00:38 0:00:18 0:00:20 2145k\r 50 73.1M 50 36.6M 0 0 1960k 0 0:00:38 0:00:19 0:00:19 2082k\r 52 73.1M 52 38.3M 0 0 1947k 0 0:0 0:38 0:00:20 0:00:18 1998k\r 55 73.1M 55 40.4M 0 0 1956k 0 0:00:38 0:00:21 0:00:17 1996k\r 57 73.1M 57 42.2M 0 0 1951k 0 0:00:38 0:00:22 0:00:16 1965k\r 60 73.1M 60 44.0M 0 0 1948k 0 0:00:38 0:00:23 0:00:15 1932k\r 62 73.1M 62 45.4M 0 0 1927k 0 0:00:38 0:00:24 0:00:14 1803k\r 63 73.1M 63 46.5M 0 0 1896k 0 0:00:39 0:00:25 0:00:14 1688k\r 65 73.1M 65 47.7M 0 0 1868k 0 0:00:40 0:00:26 0:00:14 1496k\r 66 73.1M 66 48.8M 0 0 1843k 0 0:00:40 0:00:27 0:00:13 1363k\r 68 73.1M 68 50.0M 0 0 1820k 0 0:00:41 0:00:28 0:00:13 1227k\r 69 73.1M 69 51.1M 0 0 1786k 0 0:00:41 0:00:29 0:00:12 1126k\r 71 73.1M 71 52.0M 0 0 1769k 0 0:00:42 0:00:30 0:00:12 1131k\r 72 73.1M 72 53.0M 0 0 1744k 0 0:00:42 0:00:31 0:00:11 1098k\r 73 73.1M 73 54.0M 0 0 1723k 0 0:00:43 0:00:32 0:00:11 1070k\r 75 73. 1M 75 55.1M 0 0 1702k 0 0:00:43 0:00:33 0:00:10 1040k\r 76 73.1M 76 56.0M 0 0 1681k 0 0:00:44 0:00:34 0:00:10 1048k\r 77 73.1M 77 56.9M 0 0 1659k 0 0:00:45 0:00:35 0:00:10 993k\r 79 73.1M 79 57.8M 0 0 1638k 0 0:00:45 0:00:36 0:00:09 972k\r 80 73.1M 80 58.7M 0 0 1618k 0 0:00:46 0:00:37 0:00:09 946k\r 81 73.1M 81 59.6M 0 0 1600k 0 0:00:46 0:00:38 0:00:08 921k\r 82 73.1M 82 60.5M 0 0 1582k 0 0:00:47 0:00:39 0:00:08 906k\r 83 73.1M 83 61.4M 0 0 1566k 0 0:00:47 0:00:40 0:00:07 917k\r 85 73.1M 85 62.1M 0 0 1546k 0 0:00:48 0:00:41 0:00:07 887k\r 86 73.1M 86 63.0M 0 0 1532k 0 0:00:48 0:00:42 0:00:06 892k\r 87 73.1M 87 63.9M 0 0 1517k 0 0:00:49 0:00:43 0:00:06 882k\r 88 73.1M 88 64.8M 0 0 1503k 0 0:00:49 0:00:44 0:00:05 878k\r 89 73.1M 89 65.6M 0 0 1489k 0 0:00:50 0:00:45 0:00:05 872k\r 91 73.1M 91 66.5M 0 0 1477k 0 0:00:50 0:00:46 0:00:04 904k\r 92 73.1M 92 67.4M 0 0 1465k 0 0:00:51 0:00:47 0:00:04 897k\r 93 73.1M 93 68.2M 0 0 1451k 0 0:00:51 0:00:48 0:00:03 889k\r 94 73.1M 94 69.2M 0 0 1441k 0 0:00:51 0:00:49 0:00:02 897k\r 95 73.1M 95 70.1M 0 0 1430k 0 0:00:52 0:00:50 0:00:02 904k\r 97 73.1M 97 71.0M 0 0 1421k 0 0:00:52 0:00:51 0:00:01 910k\r 98 73.1M 98 71.9M 0 0 1413k 0 0:00:52 0:00:52 --:--:-- 923k\r 99 73.1M 99 72.8M 0 0 1403k 0 0:00:53 0:00:53 --:--:-- 941k\r100 73.1M 100 73.1M 0 0 1401k 0 0:00:53 0:00:53 --:--:-- 941k\nrm: cannot remove \u0027/tmp/flights98.csv\u0027: No such file or directory\n" - } - ] - }, - "dateCreated": "Jan 9, 2017 11:56:07 AM", - "dateStarted": "Jan 9, 2017 11:57:37 AM", - "dateFinished": "Jan 9, 2017 11:58:50 AM", - "status": "FINISHED", - "progressUpdateIntervalMs": 500 - }, - { - "text": "%sh\n\nrm /tmp/flights99.csv.bz2\ncurl -o /tmp/flights99.csv.bz2 \"http://stat-computing.org/dataexpo/2009/1999.csv.bz2\"\nrm /tmp/flights99.csv\nbzip2 -d /tmp/flights99.csv.bz2\nchmod 666 /tmp/flights99.csv", - "user": "anonymous", - "dateUpdated": "Jan 9, 2017 11:59:59 AM", - "config": { - "colWidth": 12.0, - "enabled": true, - "results": {}, - "editorSetting": { - "language": "sh", - "editOnDblClick": false - }, - "editorMode": "ace/mode/sh", - "tableHide": true - }, - "settings": { - "params": {}, - "forms": {} - }, - "apps": [], - "jobName": "paragraph_1483952257873_-1874269156", - "id": "20170109-115737_1346880844", - "results": { - "code": "SUCCESS", - "msg": [ - { - "type": "TEXT", - "data": "rm: cannot remove \u0027/tmp/flights99.csv.bz2\u0027: No such file or directory\n % Total % Received % Xferd Average Speed Time Time Time Current\n Dload Upload Total Spent Left Speed\n\r 0 0 0 0 0 0 0 0 --:--:-- --:--:-- --:--:-- 0\r 0 75.7M 0 5520 0 0 9851 0 2:14:25 --:--:-- 2:14:25 9839\r 0 75.7M 0 88819 0 0 64302 0 0:20:35 0:00:01 0:20:34 64268\r 0 75.7M 0 181k 0 0 25316 0 0:52:18 0:00:07 0:52:11 25316\r 0 75.7M 0 548k 0 0 67331 0 0:19:39 0:00:08 0:19:31 67327\r 1 75.7M 1 817k 0 0 89344 0 0:14:49 0:00:09 0:14:40 89337\r 1 75.7M 1 1042k 0 0 100k 0 0:12:54 0:00:10 0:12:44 105k\r 3 75.7M 3 2461k 0 0 218k 0 0:05:55 0:00:11 0:05:44 239k\r 6 75.7M 6 5069k 0 0 412k 0 0:03:08 0:00:12 0:02:56 985k\r 11 75.7M 1 1 9165k 0 0 690k 0 0:01:52 0:00:13 0:01:39 1744k\r 14 75.7M 14 11.2M 0 0 796k 0 0:01:37 0:00:14 0:01:23 2109k\r 19 75.7M 19 14.8M 0 0 995k 0 0:01:17 0:00:15 0:01:02 2910k\r 24 75.7M 24 18.6M 0 0 1174k 0 0:01:06 0:00:16 0:00:50 3331k\r 29 75.7M 29 22.5M 0 0 1338k 0 0:00:57 0:00:17 0:00:40 3613k\r 35 75.7M 35 26.5M 0 0 1486k 0 0:00:52 0:00:18 0:00:34 3603k\r 40 75.7M 40 30.3M 0 0 1610k 0 0:00:48 0:00:19 0:00:29 4025k\r 45 75.7M 45 34.2M 0 0 1731k 0 0:00:44 0:00:20 0:00:24 3980k\r 50 75.7M 50 38.2M 0 0 1840k 0 0:00:42 0:00:21 0:00:21 4011k\r 55 75.7M 55 42.2M 0 0 1940k 0 0:00:39 0:00:22 0:00:17 4020k\r 60 75.7M 60 46.2M 0 0 2032k 0 0:00:38 0:00:23 0:00:15 4026k\r 65 75.7M 65 49.9M 0 0 2106k 0 0:00:36 0:00:24 0:00:12 4017k\r 70 75.7M 70 53.5M 0 0 2169k 0 0:0 0:35 0:00:25 0:00:10 3945k\r 75 75.7M 75 57.2M 0 0 2229k 0 0:00:34 0:00:26 0:00:08 3884k\r 80 75.7M 80 61.1M 0 0 2293k 0 0:00:33 0:00:27 0:00:06 3868k\r 86 75.7M 86 65.5M 0 0 2372k 0 0:00:32 0:00:28 0:00:04 3956k\r 92 75.7M 92 70.4M 0 0 2464k 0 0:00:31 0:00:29 0:00:02 4200k\r100 75.7M 100 75.7M 0 0 2565k 0 0:00:30 0:00:30 --:--:-- 4585k\nrm: cannot remove \u0027/tmp/flights99.csv\u0027: No such file or directory\n" - } - ] - }, - "dateCreated": "Jan 9, 2017 11:57:37 AM", - "dateStarted": "Jan 9, 2017 11:59:04 AM", - "dateFinished": "Jan 9, 2017 11:59:53 AM", - "status": "FINISHED", - "progressUpdateIntervalMs": 500 - }, - { - "text": "%sh\n\nrm /tmp/flights00.csv.bz2\ncurl -o /tmp/flights00.csv.bz2 \"http://stat-computing.org/dataexpo/2009/2000.csv.bz2\"\nrm /tmp/flights00.csv\nbzip2 -d /tmp/flights00.csv.bz2\nchmod 666 /tmp/flights00.csv", - "user": "anonymous", - "dateUpdated": "Jan 9, 2017 12:01:42 PM", - "config": { - "colWidth": 12.0, - "enabled": true, - "results": {}, - "editorSetting": { - "language": "sh", - "editOnDblClick": false - }, - "editorMode": "ace/mode/sh", - "tableHide": true - }, - "settings": { - "params": {}, - "forms": {} - }, - "apps": [], - "jobName": "paragraph_1483952312038_-1315320949", - "id": "20170109-115832_608069986", - "results": { - "code": "SUCCESS", - "msg": [ - { - "type": "TEXT", - "data": "rm: cannot remove \u0027/tmp/flights00.csv.bz2\u0027: No such file or directory\n % Total % Received % Xferd Average Speed Time Time Time Current\n Dload Upload Total Spent Left Speed\n\r 0 0 0 0 0 0 0 0 --:--:-- --:--:-- --:--:-- 0\r 0 0 0 0 0 0 0 0 --:--:-- 0:00:01 --:--:-- 0\r 0 78.7M 0 5520 0 0 3016 0 7:36:06 0:00:01 7:36:05 3014\r 0 78.7M 0 39987 0 0 15337 0 1:29:41 0:00:02 1:29:39 15332\r 0 78.7M 0 87755 0 0 24531 0 0:56:04 0:00:03 0:56:01 24526\r 0 78.7M 0 157k 0 0 33950 0 0:40:31 0:00:04 0:40:27 33944\r 0 78.7M 0 221k 0 0 40878 0 0:33:39 0:00:05 0:33:34 53734\r 0 78.7M 0 308k 0 0 47250 0 0:29:06 0:00:06 0:29:00 63943\r 0 78.7M 0 398k 0 0 52806 0 0:26:03 0:00:07 0:25:56 71903\r 0 78.7M 0 437k 0 0 36667 0 0:37:31 0:00:12 0:37:19 41697\r 0 78.7M 0 703k 0 0 57158 0 0:24:04 0:00:12 0:23:52 71137\r 1 78.7M 1 851k 0 0 64259 0 0:21:24 0:00:13 0:21:11 80471\r 1 78.7M 1 1171k 0 0 82442 0 0:16:41 0:00:14 0:16:27 109k\r 1 78.7M 1 1546k 0 0 79861 0 0:17:13 0:00:19 0:16:54 97134\r 3 78.7M 3 3181k 0 0 154k 0 0:08:41 0:00:20 0:08:21 327k\r 4 78.7M 4 3466k 0 0 160k 0 0:08:21 0:00:21 0:08:00 308k\r 4 78.7M 4 3565k 0 0 136k 0 0:09:50 0:00:26 0:09:24 216k\r 8 78.7M 8 7196k 0 0 270k 0 0:04:57 0:00:26 0:04:31 501k\r 10 78.7M 10 8459k 0 0 307k 0 0:04:22 0:00:27 0:03:55 894k\r 11 78.7M 11 9386k 0 0 327k 0 0:04:06 0:00:28 0:03:38 768k\r 15 78.7M 15 11.9M 0 0 413k 0 0:03:14 0:00:29 0:02:45 1093k\r 18 78.7M 18 14.5M 0 0 487k 0 0:0 2:45 0:00:30 0:02:15 2553k\r 22 78.7M 22 17.7M 0 0 574k 0 0:02:20 0:00:31 0:01:49 2195k\r 25 78.7M 25 19.9M 0 0 626k 0 0:02:08 0:00:32 0:01:36 2375k\r 28 78.7M 28 22.1M 0 0 676k 0 0:01:59 0:00:33 0:01:26 2726k\r 31 78.7M 31 24.7M 0 0 734k 0 0:01:49 0:00:34 0:01:15 2643k\r 34 78.7M 34 27.3M 0 0 789k 0 0:01:42 0:00:35 0:01:07 2638k\r 38 78.7M 38 30.0M 0 0 841k 0 0:01:35 0:00:36 0:00:59 2513k\r 40 78.7M 40 32.1M 0 0 874k 0 0:01:32 0:00:37 0:00:55 2457k\r 43 78.7M 43 34.1M 0 0 906k 0 0:01:28 0:00:38 0:00:50 2445k\r 45 78.7M 45 35.7M 0 0 925k 0 0:01:27 0:00:39 0:00:48 2250k\r 47 78.7M 47 37.4M 0 0 946k 0 0:01:25 0:00:40 0:00:45 2062k\r 49 78.7M 49 39.3M 0 0 968k 0 0:01:23 0:00:41 0:00:42 1907k\r 52 78.7M 52 41.0M 0 0 987k 0 0:01:21 0:00:42 0:00:39 1859k\r 54 78. 7M 54 42.5M 0 0 1000k 0 0:01:20 0:00:43 0:00:37 1729k\r 55 78.7M 55 43.9M 0 0 1008k 0 0:01:19 0:00:44 0:00:35 1651k\r 57 78.7M 57 45.4M 0 0 1020k 0 0:01:18 0:00:45 0:00:33 1625k\r 59 78.7M 59 46.6M 0 0 1027k 0 0:01:18 0:00:46 0:00:32 1512k\r 60 78.7M 60 47.7M 0 0 1027k 0 0:01:18 0:00:47 0:00:31 1376k\r 61 78.7M 61 48.6M 0 0 1024k 0 0:01:18 0:00:48 0:00:30 1236k\r 62 78.7M 62 49.5M 0 0 1020k 0 0:01:18 0:00:49 0:00:29 1125k\r 64 78.7M 64 50.4M 0 0 1021k 0 0:01:18 0:00:50 0:00:28 1027k\r 65 78.7M 65 51.3M 0 0 1018k 0 0:01:19 0:00:51 0:00:28 941k\r 66 78.7M 66 52.1M 0 0 1016k 0 0:01:19 0:00:52 0:00:27 910k\r 67 78.7M 67 53.0M 0 0 1014k 0 0:01:19 0:00:53 0:00:26 909k\r 68 78.7M 68 53.7M 0 0 1006k 0 0:01:20 0:00:54 0:00:26 868k\r 69 78.7M 69 54.6M 0 0 1006k 0 0:01:20 0:00:55 0:00:25 858k\r 70 78.7M 70 55.3M 0 0 1002k 0 0:01:20 0:00:56 0:00:24 831k\r 71 78.7M 71 56.1M 0 0 998k 0 0:01:20 0:00:57 0:00:23 807k\r 72 78.7M 72 56.9M 0 0 994k 0 0:01:21 0:00:58 0:00:23 787k\r 73 78.7M 73 57.6M 0 0 991k 0 0:01:21 0:00:59 0:00:22 823k\r 74 78.7M 74 58.4M 0 0 988k 0 0:01:21 0:01:00 0:00:21 784k\r 75 78.7M 75 59.2M 0 0 985k 0 0:01:21 0:01:01 0:00:20 791k\r 76 78.7M 76 60.0M 0 0 982k 0 0:01:22 0:01:02 0:00:20 797k\r 77 78.7M 77 60.8M 0 0 980k 0 0:01:22 0:01:03 0:00:19 808k\r 78 78.7M 78 61.6M 0 0 977k 0 0:01:22 0:01:04 0:00:18 812k\r 79 78.7M 79 62.4M 0 0 975k 0 0:01:22 0:01:05 0:00:17 824k\r 80 78.7M 80 63.4M 0 0 976k 0 0:01:22 0:01:06 0:00:16 870k\r 82 78.7M 82 64.9M 0 0 984k 0 0:01:21 0:01:07 0:00:14 1006k\r 85 78.7M 85 66.9M 0 0 1000k 0 0:01:20 0:01:08 0:00:12 1254k\r 88 78.7M 88 69.4M 0 0 1022k 0 0:01:18 0:01:09 0:00:09 1602k\r 92 78.7M 92 72.5M 0 0 1053k 0 0:01:16 0:01:10 0:00:06 2064k\r 96 78.7M 96 76.1M 0 0 1089k 0 0:01:13 0:01:11 0:00:02 2600k\r100 78.7M 100 78.7M 0 0 1116k 0 0:01:12 0:01:12 --:--:-- 3022k\nrm: cannot remove \u0027/tmp/flights00.csv\u0027: No such file or directory\n" - } - ] - }, - "dateCreated": "Jan 9, 2017 11:58:32 AM", - "dateStarted": "Jan 9, 2017 12:00:01 PM", - "dateFinished": "Jan 9, 2017 12:01:34 PM", - "status": "FINISHED", - "progressUpdateIntervalMs": 500 - }, - { - "text": "%sh\n\nrm /tmp/carriers.csv\ncurl -o /tmp/carriers.csv \"http://stat-computing.org/dataexpo/2009/carriers.csv\"\nchmod 666 /tmp/carriers.csv", - "user": "anonymous", - "dateUpdated": "Jan 9, 2017 12:01:48 PM", - "config": { - "colWidth": 12.0, - "enabled": true, - "results": {}, - "editorSetting": { - "language": "sh", - "editOnDblClick": false - }, - "editorMode": "ace/mode/sh", - "tableHide": true - }, - "settings": { - "params": {}, - "forms": {} - }, - "apps": [], - "jobName": "paragraph_1483952329229_2136292082", - "id": "20170109-115849_1794095031", - "results": { - "code": "SUCCESS", - "msg": [ - { - "type": "TEXT", - "data": "rm: cannot remove \u0027/tmp/carriers.csv\u0027: No such file or directory\n % Total % Received % Xferd Average Speed Time Time Time Current\n Dload Upload Total Spent Left Speed\n\r 0 0 0 0 0 0 0 0 --:--:-- --:--:-- --:--:-- 0\r 9 43758 9 4140 0 0 7588 0 0:00:05 --:--:-- 0:00:05 7582\r100 43758 100 43758 0 0 46357 0 --:--:-- --:--:-- --:--:-- 46353\n" - } - ] - }, - "dateCreated": "Jan 9, 2017 11:58:49 AM", - "dateStarted": "Jan 9, 2017 12:01:44 PM", - "dateFinished": "Jan 9, 2017 12:01:45 PM", - "status": "FINISHED", - "progressUpdateIntervalMs": 500 - }, - { - "text": "%md\n### Preparing the data\nThe `flights\u003cYY\u003e.csv` contains various data but we only need the information about the year, the month and the carrier who served the flight. Let\u0027s retrieve this information and create `DataSets`.", - "user": "anonymous", - "dateUpdated": "Jan 9, 2017 12:01:51 PM", - "config": { - "colWidth": 12.0, - "enabled": true, - "results": {}, - "editorSetting": { - "language": "markdown", - "editOnDblClick": true - }, - "editorMode": "ace/mode/markdown", - "editorHide": true, - "tableHide": false - }, - "settings": { - "params": {}, - "forms": {} - }, - "apps": [], - "jobName": "paragraph_1483952363836_-1769111757", - "id": "20170109-115923_963126574", - "results": { - "code": "SUCCESS", - "msg": [ - { - "type": "HTML", - "data": "\u003cdiv class\u003d\"markdown-body\"\u003e\n\u003ch3\u003ePreparing the data\u003c/h3\u003e\n\u003cp\u003eThe \u003ccode\u003eflights\u0026lt;YY\u0026gt;.csv\u003c/code\u003e contains various data but we only need the information about the year, the month and the carrier who served the flight. Let\u0026rsquo;s retrieve this information and create \u003ccode\u003eDataSets\u003c/code\u003e.\u003c/p\u003e\n\u003c/div\u003e" - } - ] - }, - "dateCreated": "Jan 9, 2017 11:59:23 AM", - "dateStarted": "Jan 9, 2017 12:01:51 PM", - "dateFinished": "Jan 9, 2017 12:01:53 PM", - "status": "FINISHED", - "progressUpdateIntervalMs": 500 - }, - { - "text": "%flink\n\ncase class Flight(year: Int, month: Int, carrierCode: String)\ncase class Carrier(code: String, name: String)\n\nval flights98 \u003d benv.readCsvFile[Flight](\"/tmp/flights98.csv\", ignoreFirstLine \u003d true, includedFields \u003d Array(0, 1, 8))\nval flights99 \u003d benv.readCsvFile[Flight](\"/tmp/flights99.csv\", ignoreFirstLine \u003d true, includedFields \u003d Array(0, 1, 8))\nval flights00 \u003d benv.readCsvFile[Flight](\"/tmp/flights00.csv\", ignoreFirstLine \u003d true, includedFields \u003d Array(0, 1, 8))\nval flights \u003d flights98.union(flights99).union(flights00)\nval carriers \u003d benv.readCsvFile[Carrier](\"/tmp/carriers.csv\", ignoreFirstLine \u003d true, quoteCharacter \u003d \u0027\"\u0027)", - "user": "anonymous", - "dateUpdated": "Jan 9, 2017 12:02:38 PM", - "config": { - "colWidth": 12.0, - "enabled": true, - "results": {}, - "editorSetting": { - "language": "scala", - "editOnDblClick": false - }, - "editorMode": "ace/mode/scala", - "lineNumbers": true, - "tableHide": true - }, - "settings": { - "params": {}, - "forms": {} - }, - "apps": [], - "jobName": "paragraph_1483952511284_-589624871", - "id": "20170109-120151_872852428", - "results": { - "code": "SUCCESS", - "msg": [ - { - "type": "TEXT", - "data": "defined class Flight\ndefined class Carrier\nflights98: org.apache.flink.api.scala.DataSet[Flight] \u003d org.apache.flink.api.scala.DataSet@7cd81fd5\nflights99: org.apache.flink.api.scala.DataSet[Flight] \u003d org.apache.flink.api.scala.DataSet@58242e79\nflights00: org.apache.flink.api.scala.DataSet[Flight] \u003d org.apache.flink.api.scala.DataSet@13f866c0\nflights: org.apache.flink.api.scala.DataSet[Flight] \u003d org.apache.flink.api.scala.DataSet@2aad2530\ncarriers: org.apache.flink.api.scala.DataSet[Carrier] \u003d org.apache.flink.api.scala.DataSet@148c977b\n" - } - ] - }, - "dateCreated": "Jan 9, 2017 12:01:51 PM", - "dateStarted": "Jan 9, 2017 12:02:10 PM", - "dateFinished": "Jan 9, 2017 12:02:29 PM", - "status": "FINISHED", - "progressUpdateIntervalMs": 500 - }, - { - "text": "%md\n### Choosing the carrier\nNow we will search for the most popular carrier during the whole time period.", - "user": "anonymous", - "dateUpdated": "Jan 9, 2017 12:03:08 PM", - "config": { - "colWidth": 12.0, - "enabled": true, - "results": {}, - "editorSetting": { - "language": "markdown", - "editOnDblClick": true - }, - "editorMode": "ace/mode/markdown", - "editorHide": true, - "tableHide": false - }, - "settings": { - "params": {}, - "forms": {} - }, - "apps": [], - "jobName": "paragraph_1483952530113_212237809", - "id": "20170109-120210_773710997", - "results": { - "code": "SUCCESS", - "msg": [ - { - "type": "HTML", - "data": "\u003cdiv class\u003d\"markdown-body\"\u003e\n\u003ch3\u003eChoosing the carrier\u003c/h3\u003e\n\u003cp\u003eNow we will search for the most popular carrier during the whole time period.\u003c/p\u003e\n\u003c/div\u003e" - } - ] - }, - "dateCreated": "Jan 9, 2017 12:02:10 PM", - "dateStarted": "Jan 9, 2017 12:03:08 PM", - "dateFinished": "Jan 9, 2017 12:03:08 PM", - "status": "FINISHED", - "progressUpdateIntervalMs": 500 - }, - { - "text": "%flink\n\nimport org.apache.flink.api.common.operators.Order\nimport org.apache.flink.api.java.aggregation.Aggregations\n\ncase class CarrierFlightsCount(carrierCode: String, count: Int)\ncase class CountByMonth(month: Int, count: Int)\n\nval carriersFlights \u003d flights\n .map(f \u003d\u003e CarrierFlightsCount(f.carrierCode, 1))\n .groupBy(\"carrierCode\")\n .sum(\"count\")\n\nval maxFlights \u003d carriersFlights\n .aggregate(Aggregations.MAX, \"count\")\n\nval bestCarrier \u003d carriersFlights\n .join(maxFlights)\n .where(\"count\")\n .equalTo(\"count\")\n .map(_._1)\n \nval carrierName \u003d bestCarrier\n .join(carriers)\n .where(\"carrierCode\")\n .equalTo(\"code\")\n .map(_._2.name)\n .collect\n .head", - "user": "anonymous", - "dateUpdated": "Jan 9, 2017 12:04:04 PM", - "config": { - "colWidth": 12.0, - "enabled": true, - "results": {}, - "editorSetting": { - "language": "scala", - "editOnDblClick": false - }, - "editorMode": "ace/mode/scala", - "lineNumbers": true, - "tableHide": true - }, - "settings": { - "params": {}, - "forms": {} - }, - "apps": [], - "jobName": "paragraph_1483952588708_-1770095793", - "id": "20170109-120308_1328511597", - "results": { - "code": "SUCCESS", - "msg": [ - { - "type": "TEXT", - "data": "import org.apache.flink.api.common.operators.Order\nimport org.apache.flink.api.java.aggregation.Aggregations\ndefined class CarrierFlightsCount\ndefined class CountByMonth\ncarriersFlights: org.apache.flink.api.scala.AggregateDataSet[CarrierFlightsCount] \u003d org.apache.flink.api.scala.AggregateDataSet@2c59be0b\nmaxFlights: org.apache.flink.api.scala.AggregateDataSet[CarrierFlightsCount] \u003d org.apache.flink.api.scala.AggregateDataSet@53e5fad9\nbestCarrier: org.apache.flink.api.scala.DataSet[CarrierFlightsCount] \u003d org.apache.flink.api.scala.DataSet@64b7b1b3\ncarrierName: String \u003d Delta Air Lines Inc.\n" - } - ] - }, - "dateCreated": "Jan 9, 2017 12:03:08 PM", - "dateStarted": "Jan 9, 2017 12:03:41 PM", - "dateFinished": "Jan 9, 2017 12:03:58 PM", - "status": "FINISHED", - "progressUpdateIntervalMs": 500 - }, - { - "text": "%flink\n\nprintln(s\"\"\"The most popular carrier is:\n$carrierName\n\"\"\")", - "user": "anonymous", - "dateUpdated": "Jan 9, 2017 12:09:18 PM", - "config": { - "colWidth": 12.0, - "enabled": true, - "results": {}, - "editorSetting": { - "language": "scala", - "editOnDblClick": false - }, - "editorMode": "ace/mode/scala", - "lineNumbers": true - }, - "settings": { - "params": {}, - "forms": {} - }, - "apps": [], - "jobName": "paragraph_1483952621624_-1222400539", - "id": "20170109-120341_952212268", - "results": { - "code": "SUCCESS", - "msg": [ - { - "type": "TEXT", - "data": "The most popular carrier is:\nDelta Air Lines Inc.\n\n" - } - ] - }, - "dateCreated": "Jan 9, 2017 12:03:41 PM", - "dateStarted": "Jan 9, 2017 12:04:09 PM", - "dateFinished": "Jan 9, 2017 12:04:10 PM", - "status": "FINISHED", - "progressUpdateIntervalMs": 500 - }, - { - "text": "%md\n### Calculating flights\nThe last step is to filter **Delta Air Lines** flights and group them by months.", - "user": "anonymous", - "dateUpdated": "Jan 9, 2017 12:04:26 PM", - "config": { - "colWidth": 12.0, - "enabled": true, - "results": {}, - "editorSetting": { - "language": "markdown", - "editOnDblClick": true - }, - "editorMode": "ace/mode/markdown", - "editorHide": true, - "tableHide": false - }, - "settings": { - "params": {}, - "forms": {} - }, - "apps": [], - "jobName": "paragraph_1483952649646_-1553253944", - "id": "20170109-120409_2003276881", - "results": { - "code": "SUCCESS", - "msg": [ - { - "type": "HTML", - "data": "\u003cdiv class\u003d\"markdown-body\"\u003e\n\u003ch3\u003eCalculating flights\u003c/h3\u003e\n\u003cp\u003eThe last step is to filter \u003cstrong\u003eDelta Air Lines\u003c/strong\u003e flights and group them by months.\u003c/p\u003e\n\u003c/div\u003e" - } - ] - }, - "dateCreated": "Jan 9, 2017 12:04:09 PM", - "dateStarted": "Jan 9, 2017 12:04:26 PM", - "dateFinished": "Jan 9, 2017 12:04:26 PM", - "status": "FINISHED", - "progressUpdateIntervalMs": 500 - }, - { - "title": "flights grouping", - "text": "%flink\n\ndef countFlightsPerMonth(flights: DataSet[Flight],\n carrier: DataSet[CarrierFlightsCount]) \u003d {\n val carrierFlights \u003d flights\n .join(carrier)\n .where(\"carrierCode\")\n .equalTo(\"carrierCode\")\n .map(_._1)\n \n carrierFlights\n .map(flight \u003d\u003e CountByMonth(flight.month, 1))\n .groupBy(\"month\")\n .sum(\"count\")\n .sortPartition(\"month\", Order.ASCENDING)\n}\n\nval bestCarrierFlights_98 \u003d countFlightsPerMonth(flights98, bestCarrier)\nval bestCarrierFlights_99 \u003d countFlightsPerMonth(flights99, bestCarrier)\nval bestCarrierFlights_00 \u003d countFlightsPerMonth(flights00, bestCarrier)", - "user": "anonymous", - "dateUpdated": "Jan 9, 2017 12:05:06 PM", - "config": { - "colWidth": 12.0, - "enabled": true, - "results": {}, - "editorSetting": { - "language": "scala", - "editOnDblClick": false - }, - "editorMode": "ace/mode/scala", - "lineNumbers": true, - "title": true, - "tableHide": true - }, - "settings": { - "params": {}, - "forms": {} - }, - "apps": [], - "jobName": "paragraph_1483952665972_667547355", - "id": "20170109-120425_2018337048", - "results": { - "code": "SUCCESS", - "msg": [ - { - "type": "TEXT", - "data": "countFlightsPerMonth: (flights: org.apache.flink.api.scala.DataSet[Flight], carrier: org.apache.flink.api.scala.DataSet[CarrierFlightsCount])org.apache.flink.api.scala.DataSet[CountByMonth]\nbestCarrierFlights_98: org.apache.flink.api.scala.DataSet[CountByMonth] \u003d org.apache.flink.api.scala.PartitionSortedDataSet@2aa64309\nbestCarrierFlights_99: org.apache.flink.api.scala.DataSet[CountByMonth] \u003d org.apache.flink.api.scala.PartitionSortedDataSet@35fe60c4\nbestCarrierFlights_00: org.apache.flink.api.scala.DataSet[CountByMonth] \u003d org.apache.flink.api.scala.PartitionSortedDataSet@4621410f\n" - } - ] - }, - "dateCreated": "Jan 9, 2017 12:04:25 PM", - "dateStarted": "Jan 9, 2017 12:04:50 PM", - "dateFinished": "Jan 9, 2017 12:04:51 PM", - "status": "FINISHED", - "progressUpdateIntervalMs": 500 - }, - { - "title": "making a results table", - "text": "%flink\n\ndef monthAsString(month: Int): String \u003d {\n month match {\n case 1 \u003d\u003e \"Jan\"\n case 2 \u003d\u003e \"Feb\"\n case 3 \u003d\u003e \"Mar\"\n case 4 \u003d\u003e \"Apr\"\n case 5 \u003d\u003e \"May\"\n case 6 \u003d\u003e \"Jun\"\n case 7 \u003d\u003e \"Jul\"\n case 8 \u003d\u003e \"Aug\"\n case 9 \u003d\u003e \"Sept\"\n case 10 \u003d\u003e \"Oct\"\n case 11 \u003d\u003e \"Nov\"\n case 12 \u003d\u003e \"Dec\"\n }\n}\n\n// We should put all the results into a common DataFrame\n// to show them in a common picture\nval bestCarrierFlights \u003d bestCarrierFlights_98\n .join(bestCarrierFlights_99)\n .where(\"month\")\n .equalTo(\"month\")\n .map(tuple \u003d\u003e (tuple._1.month, tuple._1.count, tuple._2.count))\n .join(bestCarrierFlights_00)\n .where(0)\n .equalTo(\"month\")\n .map(tuple \u003d\u003e (tuple._1._1, tuple._1._2, tuple._1._3, tuple._2.count))\n .collect\n \nvar flightsByMonthTable \u00 3d s\"Month\\t1998\\t1999\\t2000\\n\"\nbestCarrierFlights.foreach(data \u003d\u003e flightsByMonthTable +\u003d s\"${monthAsString(data._1)}\\t${data._2}\\t${data._3}\\t${data._4}\\n\")", - "user": "anonymous", - "dateUpdated": "Jan 9, 2017 12:06:03 PM", - "config": { - "colWidth": 12.0, - "enabled": true, - "results": {}, - "editorSetting": { - "language": "scala", - "editOnDblClick": false - }, - "editorMode": "ace/mode/scala", - "lineNumbers": true, - "title": true, - "tableHide": true - }, - "settings": { - "params": {}, - "forms": {} - }, - "apps": [], - "jobName": "paragraph_1483952690164_-1061667443", - "id": "20170109-120450_1574916350", - "results": { - "code": "SUCCESS", - "msg": [ - { - "type": "TEXT", - "data": "monthAsString: (month: Int)String\nbestCarrierFlights: Seq[(Int, Int, Int, Int)] \u003d Buffer((1,78523,77745,78055), (2,71101,70498,71090), (3,78906,77812,78453), (4,75726,75343,75247), (5,77937,77226,76797), (6,75432,75840,74846), (7,77521,77264,75776), (8,78104,78141,77654), (9,74840,75067,73696), (10,76145,77829,77425), (11,73552,74411,73659), (12,77308,76954,75331))\nflightsByMonthTable: String \u003d \n\"Month\t1998\t1999\t2000\n\"\n" - } - ] - }, - "dateCreated": "Jan 9, 2017 12:04:50 PM", - "dateStarted": "Jan 9, 2017 12:05:24 PM", - "dateFinished": "Jan 9, 2017 12:05:59 PM", - "status": "FINISHED", - "progressUpdateIntervalMs": 500 - }, - { - "title": "\"Delta Air Lines\" flights count by months", - "text": "%flink\n\nprintln(s\"\"\"%table\n$flightsByMonthTable\n\"\"\")", - "user": "anonymous", - "dateUpdated": "Jan 9, 2017 12:06:17 PM", - "config": { - "colWidth": 12.0, - "enabled": true, - "results": { - "0": { - "graph": { - "mode": "lineChart", - "height": 300.0, - "optionOpen": false, - "setting": { - "lineChart": {} - }, - "commonSetting": {}, - "keys": [ - { - "name": "Month", - "index": 0.0, - "aggr": "sum" - } - ], - "groups": [], - "values": [ - { - "name": "1998", - "index": 1.0, - "aggr": "sum" - }, - { - "name": "1999", - "index": 2.0, - "aggr": "sum" - }, - { - "name": "2000", - "index": 3.0, - "aggr": "sum" - } - ] - }, - "helium": {} - } - }, - "editorSetting": { - "language": "scala", - "editOnDblClick": false - }, - "editorMode": "ace/mode/scala", - "title": true, - "lineNumbers": true - }, - "settings": { - "params": {}, - "forms": {} - }, - "apps": [], - "jobName": "paragraph_1483952724460_191505697", - "id": "20170109-120524_2037622815", - "results": { - "code": "SUCCESS", - "msg": [ - { - "type": "TABLE", - "data": "Month\t1998\t1999\t2000\nJan\t78523\t77745\t78055\nFeb\t71101\t70498\t71090\nMar\t78906\t77812\t78453\nApr\t75726\t75343\t75247\nMay\t77937\t77226\t76797\nJun\t75432\t75840\t74846\nJul\t77521\t77264\t75776\nAug\t78104\t78141\t77654\nSept\t74840\t75067\t73696\nOct\t76145\t77829\t77425\nNov\t73552\t74411\t73659\nDec\t77308\t76954\t75331\n" - } - ] - }, - "dateCreated": "Jan 9, 2017 12:05:24 PM", - "dateStarted": "Jan 9, 2017 12:06:07 PM", - "dateFinished": "Jan 9, 2017 12:06:08 PM", - "status": "FINISHED", - "progressUpdateIntervalMs": 500 - }, - { - "text": "%md\n### Results\nLooking at this chart we can say that February is the most unpopular month, but this is only because it has less days (28 or 29) than the other months (30 or 31). To receive more fair picture we should calculate the average flights count per day for each month.", - "user": "anonymous", - "dateUpdated": "Jan 9, 2017 12:06:34 PM", - "config": { - "colWidth": 12.0, - "enabled": true, - "results": {}, - "editorSetting": { - "language": "markdown", - "editOnDblClick": true - }, - "editorMode": "ace/mode/markdown", - "editorHide": true, - "tableHide": false - }, - "settings": { - "params": {}, - "forms": {} - }, - "apps": [], - "jobName": "paragraph_1483952767719_-1010557136", - "id": "20170109-120607_67673280", - "results": { - "code": "SUCCESS", - "msg": [ - { - "type": "HTML", - "data": "\u003cdiv class\u003d\"markdown-body\"\u003e\n\u003ch3\u003eResults\u003c/h3\u003e\n\u003cp\u003eLooking at this chart we can say that February is the most unpopular month, but this is only because it has less days (28 or 29) than the other months (30 or 31). To receive more fair picture we should calculate the average flights count per day for each month.\u003c/p\u003e\n\u003c/div\u003e" - } - ] - }, - "dateCreated": "Jan 9, 2017 12:06:07 PM", - "dateStarted": "Jan 9, 2017 12:06:34 PM", - "dateFinished": "Jan 9, 2017 12:06:34 PM", - "status": "FINISHED", - "progressUpdateIntervalMs": 500 - }, - { - "text": "%flink\n\ndef daysInMonth(month: Int, year: Int): Int \u003d {\n month match {\n case 1 \u003d\u003e 31\n case 2 \u003d\u003e if (year % 4 \u003d\u003d 0) {\n 29\n } else {\n 28\n }\n case 3 \u003d\u003e 31\n case 4 \u003d\u003e 30\n case 5 \u003d\u003e 31\n case 6 \u003d\u003e 30\n case 7 \u003d\u003e 31\n case 8 \u003d\u003e 31\n case 9 \u003d\u003e 30\n case 10 \u003d\u003e 31\n case 11 \u003d\u003e 30\n case 12 \u003d\u003e 31\n }\n}\n\n\nvar flightsByDayTable \u003d s\"Month\\t1998\\t1999\\t2000\\n\"\n\nbestCarrierFlights.foreach(data \u003d\u003e flightsByDayTable +\u003d s\"${monthAsString(data._1)}\\t${data._2/daysInMonth(data._1,1998)}\\t${data._3/daysInMonth(data._1,1999)}\\t${data._4/daysInMonth(data._1,2000)}\\n\")", - "user": "anonymous", - "dateUpdated": "Jan 9, 2017 12:06:58 PM", - "config": { - "colWidth": 12.0, - "enabled": true, - "results": {}, - "editorSetting": { - "language": "scala", - "editOnDblClick": false - }, - "editorMode": "ace/mode/scala", - "lineNumbers": true, - "tableHide": true - }, - "settings": { - "params": {}, - "forms": {} - }, - "apps": [], - "jobName": "paragraph_1483952794097_-785833130", - "id": "20170109-120634_492170963", - "results": { - "code": "SUCCESS", - "msg": [ - { - "type": "TEXT", - "data": "daysInMonth: (month: Int, year: Int)Int\nflightsByDayTable: String \u003d \n\"Month\t1998\t1999\t2000\n\"\n" - } - ] - }, - "dateCreated": "Jan 9, 2017 12:06:34 PM", - "dateStarted": "Jan 9, 2017 12:06:53 PM", - "dateFinished": "Jan 9, 2017 12:06:53 PM", - "status": "FINISHED", - "progressUpdateIntervalMs": 500 - }, - { - "title": "\"Delta Air Lines\" flights count by days", - "text": "%flink\n\nprintln(s\"\"\"%table\n$flightsByDayTable\n\"\"\")", - "user": "anonymous", - "dateUpdated": "Jan 9, 2017 12:10:56 PM", - "config": { - "colWidth": 12.0, - "enabled": true, - "results": { - "0": { - "graph": { - "mode": "lineChart", - "height": 300.0, - "optionOpen": false, - "setting": { - "lineChart": {} - }, - "commonSetting": {}, - "keys": [ - { - "name": "Month", - "index": 0.0, - "aggr": "sum" - } - ], - "groups": [], - "values": [ - { - "name": "1998", - "index": 1.0, - "aggr": "sum" - }, - { - "name": "1999", - "index": 2.0, - "aggr": "sum" - }, - { - "name": "2000", - "index": 3.0, - "aggr": "sum" - } - ] - }, - "helium": {} - } - }, - "editorSetting": { - "language": "scala", - "editOnDblClick": false - }, - "editorMode": "ace/mode/scala", - "title": true, - "lineNumbers": true - }, - "settings": { - "params": {}, - "forms": {} - }, - "apps": [], - "jobName": "paragraph_1483952813391_1847418990", - "id": "20170109-120653_1870236569", - "results": { - "code": "SUCCESS", - "msg": [ - { - "type": "TABLE", - "data": "Month\t1998\t1999\t2000\nJan\t2533\t2507\t2517\nFeb\t2539\t2517\t2451\nMar\t2545\t2510\t2530\nApr\t2524\t2511\t2508\nMay\t2514\t2491\t2477\nJun\t2514\t2528\t2494\nJul\t2500\t2492\t2444\nAug\t2519\t2520\t2504\nSept\t2494\t2502\t2456\nOct\t2456\t2510\t2497\nNov\t2451\t2480\t2455\nDec\t2493\t2482\t2430\n" - } - ] - }, - "dateCreated": "Jan 9, 2017 12:06:53 PM", - "dateStarted": "Jan 9, 2017 12:07:22 PM", - "dateFinished": "Jan 9, 2017 12:07:23 PM", - "status": "FINISHED", - "progressUpdateIntervalMs": 500 - }, - { - "text": "%flink\n", - "dateUpdated": "Jan 9, 2017 12:07:22 PM", - "config": {}, - "settings": { - "params": {}, - "forms": {} - }, - "apps": [], - "jobName": "paragraph_1483952842919_587228425", - "id": "20170109-120722_939892827", - "dateCreated": "Jan 9, 2017 12:07:22 PM", - "status": "READY", - "progressUpdateIntervalMs": 500 - } - ], - "name": "Zeppelin Tutorial/Using Flink for batch processing", - "id": "2C35YU814", - "angularObjects": { - "2C4PVECE6:shared_process": [], - "2C4US9MUF:shared_process": [], - "2C4FYNB4G:shared_process": [], - "2C4GX28KP:shared_process": [], - "2C648AXXN:shared_process": [], - "2C3MSEJ2F:shared_process": [], - "2C6F2N6BT:shared_process": [], - "2C3US2RTN:shared_process": [], - "2C3TYMD6K:shared_process": [], - "2C3FDPZRX:shared_process": [], - "2C5TEARYX:shared_process": [], - "2C5D6NSNG:shared_process": [], - "2C6FVVEAD:shared_process": [], - "2C582KNWG:shared_process": [], - "2C6ZMVGM7:shared_process": [], - "2C6UYQG8R:shared_process": [], - "2C666VZT2:shared_process": [], - "2C4JRCY3K:shared_process": [], - "2C64W5T9D:shared_process": [] - }, - "config": { - "looknfeel": "default" - }, - "info": {} -}
http://git-wip-us.apache.org/repos/asf/zeppelin/blob/085efeb6/notebook/2C57UKYWR/note.json ---------------------------------------------------------------------- diff --git a/notebook/2C57UKYWR/note.json b/notebook/2C57UKYWR/note.json deleted file mode 100644 index 22afb2a..0000000 --- a/notebook/2C57UKYWR/note.json +++ /dev/null @@ -1,334 +0,0 @@ -{ - "paragraphs": [ - { - "text": "%md\n\n\n### [Apache Pig](http://pig.apache.org/) is a platform for analyzing large data sets that consists of a high-level language for expressing data analysis programs, coupled with infrastructure for evaluating these programs. The salient property of Pig programs is that their structure is amenable to substantial parallelization, which in turns enables them to handle very large data sets.\n\nPig\u0027s language layer currently consists of a textual language called Pig Latin, which has the following key properties:\n\n* Ease of programming. It is trivial to achieve parallel execution of simple, \"embarrassingly parallel\" data analysis tasks. Complex tasks comprised of multiple interrelated data transformations are explicitly encoded as data flow sequences, making them easy to write, understand, and maintain.\n* Optimization opportunities. The way in which tasks are encoded permits the system to optimize their execution automatically, allowing the user to focus on semantics rather than efficiency.\n* Extensibility. Users can create their own functions to do special-purpose processing.\n", - "user": "anonymous", - "dateUpdated": "Jan 22, 2017 12:48:50 PM", - "config": { - "colWidth": 12.0, - "enabled": true, - "results": {}, - "editorSetting": { - "language": "markdown", - "editOnDblClick": true - }, - "editorMode": "ace/mode/markdown", - "editorHide": true, - "tableHide": false - }, - "settings": { - "params": {}, - "forms": {} - }, - "results": { - "code": "SUCCESS", - "msg": [ - { - "type": "HTML", - "data": "\u003cdiv class\u003d\"markdown-body\"\u003e\n\u003ch3\u003e\u003ca href\u003d\"http://pig.apache.org/\"\u003eApache Pig\u003c/a\u003e is a platform for analyzing large data sets that consists of a high-level language for expressing data analysis programs, coupled with infrastructure for evaluating these programs. The salient property of Pig programs is that their structure is amenable to substantial parallelization, which in turns enables them to handle very large data sets.\u003c/h3\u003e\n\u003cp\u003ePig\u0026rsquo;s language layer currently consists of a textual language called Pig Latin, which has the following key properties:\u003c/p\u003e\n\u003cul\u003e\n \u003cli\u003eEase of programming. It is trivial to achieve parallel execution of simple, \u0026ldquo;embarrassingly parallel\u0026rdquo; data analysis tasks. Complex tasks comprised of multiple interrelated data transformations are explicitly encoded as data flow sequences, making them easy to write, understand, and maintain.\u003c/li\u003e\n \u003cli\u003eOptimization opportunities. The way in which tasks are encoded permits the system to optimize their execution automatically, allowing the user to focus on semantics rather than efficiency.\u003c/li\u003e\n \u003cli\u003eExtensibility. Users can create their own functions to do special-purpose processing.\u003c/li\u003e\n\u003c/ul\u003e\n\u003c/div\u003e" - } - ] - }, - "apps": [], - "jobName": "paragraph_1483277502513_1156234051", - "id": "20170101-213142_1565013608", - "dateCreated": "Jan 1, 2017 9:31:42 PM", - "dateStarted": "Jan 22, 2017 12:48:50 PM", - "dateFinished": "Jan 22, 2017 12:48:51 PM", - "status": "FINISHED", - "progressUpdateIntervalMs": 500 - }, - { - "text": "%md\n\nThis pig tutorial use pig to do the same thing as spark tutorial. The default mode is mapreduce, you can also use other modes like local/tez_local/tez. For mapreduce mode, you need to have hadoop installed and export `HADOOP_CONF_DIR` in `zeppelin-env.sh`\n\nThe tutorial consists of 3 steps.\n\n* Use shell interpreter to download bank.csv and upload it to hdfs\n* use `%pig` to process the data\n* use `%pig.query` to query the data", - "user": "anonymous", - "dateUpdated": "Jan 22, 2017 12:48:55 PM", - "config": { - "colWidth": 12.0, - "enabled": true, - "results": {}, - "editorSetting": { - "language": "markdown", - "editOnDblClick": true - }, - "editorMode": "ace/mode/markdown", - "editorHide": true, - "tableHide": false - }, - "settings": { - "params": {}, - "forms": {} - }, - "results": { - "code": "SUCCESS", - "msg": [ - { - "type": "HTML", - "data": "\u003cdiv class\u003d\"markdown-body\"\u003e\n\u003cp\u003eThis pig tutorial use pig to do the same thing as spark tutorial. The default mode is mapreduce, you can also use other modes like local/tez_local/tez. For mapreduce mode, you need to have hadoop installed and export \u003ccode\u003eHADOOP_CONF_DIR\u003c/code\u003e in \u003ccode\u003ezeppelin-env.sh\u003c/code\u003e\u003c/p\u003e\n\u003cp\u003eThe tutorial consists of 3 steps.\u003c/p\u003e\n\u003cul\u003e\n \u003cli\u003eUse shell interpreter to download bank.csv and upload it to hdfs\u003c/li\u003e\n \u003cli\u003euse \u003ccode\u003e%pig\u003c/code\u003e to process the data\u003c/li\u003e\n \u003cli\u003euse \u003ccode\u003e%pig.query\u003c/code\u003e to query the data\u003c/li\u003e\n\u003c/ul\u003e\n\u003c/div\u003e" - } - ] - }, - "apps": [], - "jobName": "paragraph_1483689316217_-629483391", - "id": "20170106-155516_1050601059", - "dateCreated": "Jan 6, 2017 3:55:16 PM", - "dateStarted": "Jan 22, 2017 12:48:55 PM", - "dateFinished": "Jan 22, 2017 12:48:55 PM", - "status": "FINISHED", - "progressUpdateIntervalMs": 500 - }, - { - "text": "%sh\n\nwget https://s3.amazonaws.com/apache-zeppelin/tutorial/bank/bank.csv\nhadoop fs -put bank.csv .\n", - "user": "anonymous", - "dateUpdated": "Jan 22, 2017 12:51:48 PM", - "config": { - "colWidth": 12.0, - "enabled": true, - "results": {}, - "editorSetting": { - "language": "text", - "editOnDblClick": false - }, - "editorMode": "ace/mode/text" - }, - "settings": { - "params": {}, - "forms": {} - }, - "results": { - "code": "SUCCESS", - "msg": [ - { - "type": "TEXT", - "data": "--2017-01-22 12:51:48-- https://s3.amazonaws.com/apache-zeppelin/tutorial/bank/bank.csv\nResolving s3.amazonaws.com... 52.216.80.227\nConnecting to s3.amazonaws.com|52.216.80.227|:443... connected.\nHTTP request sent, awaiting response... 200 OK\nLength: 461474 (451K) [application/octet-stream]\nSaving to: \u0027bank.csv.3\u0027\n\n 0K .......... .......... .......... .......... .......... 11% 141K 3s\n 50K .......... .......... .......... .......... .......... 22% 243K 2s\n 100K .......... .......... .......... .......... .......... 33% 449K 1s\n 150K .......... .......... .......... .......... .......... 44% 413K 1s\n 200K .......... .......... .......... .......... .......... 55% 746K 1s\n 250K .......... .......... .......... .......... .......... 66% 588K 0s\n 300K .......... .......... .......... .......... .......... 77% 840K 0s\n 350K .......... .......... .......... .......... .......... 88% 795K 0s\n 400K .......... ...... .... .......... .......... .......... 99% 1.35M 0s\n 450K 100% 13.2K\u003d1.1s\n\n2017-01-22 12:51:50 (409 KB/s) - \u0027bank.csv.3\u0027 saved [461474/461474]\n\n17/01/22 12:51:51 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable\n" - } - ] - }, - "apps": [], - "jobName": "paragraph_1485058437578_-1906301827", - "id": "20170122-121357_640055590", - "dateCreated": "Jan 22, 2017 12:13:57 PM", - "dateStarted": "Jan 22, 2017 12:51:48 PM", - "dateFinished": "Jan 22, 2017 12:51:52 PM", - "status": "FINISHED", - "progressUpdateIntervalMs": 500 - }, - { - "text": "%pig\n\nbankText \u003d load \u0027bank.csv\u0027 using PigStorage(\u0027;\u0027);\nbank \u003d foreach bankText generate $0 as age, $1 as job, $2 as marital, $3 as education, $5 as balance; \nbank \u003d filter bank by age !\u003d \u0027\"age\"\u0027;\nbank \u003d foreach bank generate (int)age, REPLACE(job,\u0027\"\u0027,\u0027\u0027) as job, REPLACE(marital, \u0027\"\u0027, \u0027\u0027) as marital, (int)(REPLACE(balance, \u0027\"\u0027, \u0027\u0027)) as balance;\n\n-- The following statement is optional, it depends on whether your needs.\n-- store bank into \u0027clean_bank.csv\u0027 using PigStorage(\u0027;\u0027);\n\n\n", - "user": "anonymous", - "dateUpdated": "Feb 24, 2017 5:08:08 PM", - "config": { - "colWidth": 12.0, - "editorMode": "ace/mode/pig", - "results": {}, - "enabled": true, - "editorSetting": { - "language": "pig", - "editOnDblClick": false - } - }, - "settings": { - "params": {}, - "forms": {} - }, - "results": { - "code": "SUCCESS", - "msg": [] - }, - "apps": [], - "jobName": "paragraph_1483277250237_-466604517", - "id": "20161228-140640_1560978333", - "dateCreated": "Jan 1, 2017 9:27:30 PM", - "dateStarted": "Feb 24, 2017 5:08:08 PM", - "dateFinished": "Feb 24, 2017 5:08:11 PM", - "status": "FINISHED", - "progressUpdateIntervalMs": 500 - }, - { - "text": "%pig.query\n\nbank_data \u003d filter bank by age \u003c 30;\nb \u003d group bank_data by age;\nforeach b generate group, COUNT($1);\n\n", - "user": "anonymous", - "dateUpdated": "Feb 24, 2017 5:08:13 PM", - "config": { - "colWidth": 4.0, - "editorMode": "ace/mode/pig", - "results": { - "0": { - "graph": { - "mode": "multiBarChart", - "height": 300.0, - "optionOpen": false - }, - "helium": {} - } - }, - "enabled": true, - "editorSetting": { - "language": "pig", - "editOnDblClick": false - } - }, - "settings": { - "params": {}, - "forms": {} - }, - "results": { - "code": "SUCCESS", - "msg": [ - { - "type": "TABLE", - "data": "group\tcol_1\n19\t4\n20\t3\n21\t7\n22\t9\n23\t20\n24\t24\n25\t44\n26\t77\n27\t94\n28\t103\n29\t97\n" - } - ] - }, - "apps": [], - "jobName": "paragraph_1483277250238_-465450270", - "id": "20161228-140730_1903342877", - "dateCreated": "Jan 1, 2017 9:27:30 PM", - "dateStarted": "Feb 24, 2017 5:08:13 PM", - "dateFinished": "Feb 24, 2017 5:08:26 PM", - "status": "FINISHED", - "progressUpdateIntervalMs": 500 - }, - { - "text": "%pig.query\n\nbank_data \u003d filter bank by age \u003c ${maxAge\u003d40};\nb \u003d group bank_data by age;\nforeach b generate group, COUNT($1) as count;", - "user": "anonymous", - "dateUpdated": "Feb 24, 2017 5:08:14 PM", - "config": { - "colWidth": 4.0, - "editorMode": "ace/mode/pig", - "results": { - "0": { - "graph": { - "mode": "pieChart", - "height": 300.0, - "optionOpen": false - }, - "helium": {} - } - }, - "enabled": true, - "editorSetting": { - "language": "pig", - "editOnDblClick": false - } - }, - "settings": { - "params": { - "maxAge": "36" - }, - "forms": { - "maxAge": { - "name": "maxAge", - "defaultValue": "40", - "hidden": false - } - } - }, - "results": { - "code": "SUCCESS", - "msg": [ - { - "type": "TABLE", - "data": "group\tcount\n19\t4\n20\t3\n21\t7\n22\t9\n23\t20\n24\t24\n25\t44\n26\t77\n27\t94\n28\t103\n29\t97\n30\t150\n31\t199\n32\t224\n33\t186\n34\t231\n35\t180\n" - } - ] - }, - "apps": [], - "jobName": "paragraph_1483277250239_-465835019", - "id": "20161228-154918_1551591203", - "dateCreated": "Jan 1, 2017 9:27:30 PM", - "dateStarted": "Feb 24, 2017 5:08:14 PM", - "dateFinished": "Feb 24, 2017 5:08:29 PM", - "status": "FINISHED", - "progressUpdateIntervalMs": 500 - }, - { - "text": "%pig.query\n\nbank_data \u003d filter bank by marital\u003d\u003d\u0027${marital\u003dsingle,single|divorced|married}\u0027;\nb \u003d group bank_data by age;\nforeach b generate group, COUNT($1) as count;\n\n\n", - "user": "anonymous", - "dateUpdated": "Feb 24, 2017 5:08:15 PM", - "config": { - "colWidth": 4.0, - "editorMode": "ace/mode/pig", - "results": { - "0": { - "graph": { - "mode": "scatterChart", - "height": 300.0, - "optionOpen": false - }, - "helium": {} - } - }, - "enabled": true, - "editorSetting": { - "language": "pig", - "editOnDblClick": false - } - }, - "settings": { - "params": { - "marital": "married" - }, - "forms": { - "marital": { - "name": "marital", - "defaultValue": "single", - "options": [ - { - "value": "single" - }, - { - "value": "divorced" - }, - { - "value": "married" - } - ], - "hidden": false - } - } - }, - "results": { - "code": "SUCCESS", - "msg": [ - { - "type": "TABLE", - "data": "group\tcount\n23\t3\n24\t11\n25\t11\n26\t18\n27\t26\n28\t23\n29\t37\n30\t56\n31\t104\n32\t105\n33\t103\n34\t142\n35\t109\n36\t117\n37\t100\n38\t99\n39\t88\n40\t105\n41\t97\n42\t91\n43\t79\n44\t68\n45\t76\n46\t82\n47\t78\n48\t91\n49\t87\n50\t74\n51\t63\n52\t66\n53\t75\n54\t56\n55\t68\n56\t50\n57\t78\n58\t67\n59\t56\n60\t36\n61\t15\n62\t5\n63\t7\n64\t6\n65\t4\n66\t7\n67\t5\n68\t1\n69\t5\n70\t5\n71\t5\n72\t4\n73\t6\n74\t2\n75\t3\n76\t1\n77\t5\n78\t2\n79\t3\n80\t6\n81\t1\n83\t2\n86\t1\n87\t1\n" - } - ] - }, - "apps": [], - "jobName": "paragraph_1483277250240_-480070728", - "id": "20161228-142259_575675591", - "dateCreated": "Jan 1, 2017 9:27:30 PM", - "dateStarted": "Feb 24, 2017 5:08:27 PM", - "dateFinished": "Feb 24, 2017 5:08:31 PM", - "status": "FINISHED", - "progressUpdateIntervalMs": 500 - }, - { - "text": "%pig\n", - "dateUpdated": "Jan 1, 2017 9:27:30 PM", - "config": {}, - "settings": { - "params": {}, - "forms": {} - }, - "apps": [], - "jobName": "paragraph_1483277250240_-480070728", - "id": "20161228-155036_1854903164", - "dateCreated": "Jan 1, 2017 9:27:30 PM", - "status": "READY", - "errorMessage": "", - "progressUpdateIntervalMs": 500 - } - ], - "name": "Zeppelin Tutorial/Using Pig for querying data", - "id": "2C57UKYWR", - "angularObjects": { - "2C3RWCVAG:shared_process": [], - "2C9KGCHDE:shared_process": [], - "2C8X2BS16:shared_process": [] - }, - "config": {}, - "info": {} -} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/zeppelin/blob/085efeb6/notebook/Zeppelin Tutorial/Basic Features (Spark)_2A94M5J1Z.zpln ---------------------------------------------------------------------- diff --git a/notebook/Zeppelin Tutorial/Basic Features (Spark)_2A94M5J1Z.zpln b/notebook/Zeppelin Tutorial/Basic Features (Spark)_2A94M5J1Z.zpln new file mode 100644 index 0000000..4deba4a --- /dev/null +++ b/notebook/Zeppelin Tutorial/Basic Features (Spark)_2A94M5J1Z.zpln @@ -0,0 +1,376 @@ +{ + "paragraphs": [ + { + "text": "%md\n## Welcome to Zeppelin.\n##### This is a live tutorial, you can run the code yourself. (Shift-Enter to Run)", + "user": "anonymous", + "dateUpdated": "Dec 17, 2016 3:32:15 PM", + "config": { + "colWidth": 12.0, + "editorHide": true, + "results": [ + { + "graph": { + "mode": "table", + "height": 300.0, + "optionOpen": false, + "keys": [], + "values": [], + "groups": [], + "scatter": {} + } + } + ], + "enabled": true, + "editorSetting": { + "language": "markdown", + "editOnDblClick": true + }, + "editorMode": "ace/mode/markdown", + "tableHide": false + }, + "settings": { + "params": {}, + "forms": {} + }, + "results": { + "code": "SUCCESS", + "msg": [ + { + "type": "HTML", + "data": "\u003cdiv class\u003d\"markdown-body\"\u003e\n\u003ch2\u003eWelcome to Zeppelin.\u003c/h2\u003e\n\u003ch5\u003eThis is a live tutorial, you can run the code yourself. (Shift-Enter to Run)\u003c/h5\u003e\n\u003c/div\u003e" + } + ] + }, + "apps": [], + "jobName": "paragraph_1423836981412_-1007008116", + "id": "20150213-231621_168813393", + "dateCreated": "Feb 13, 2015 11:16:21 PM", + "dateStarted": "Dec 17, 2016 3:32:15 PM", + "dateFinished": "Dec 17, 2016 3:32:18 PM", + "status": "FINISHED", + "progressUpdateIntervalMs": 500 + }, + { + "title": "Load data into table", + "text": "import org.apache.commons.io.IOUtils\nimport java.net.URL\nimport java.nio.charset.Charset\n\n// Zeppelin creates and injects sc (SparkContext) and sqlContext (HiveContext or SqlContext)\n// So you don\u0027t need create them manually\n\n// load bank data\nval bankText \u003d sc.parallelize(\n IOUtils.toString(\n new URL(\"https://s3.amazonaws.com/apache-zeppelin/tutorial/bank/bank.csv\"),\n Charset.forName(\"utf8\")).split(\"\\n\"))\n\ncase class Bank(age: Integer, job: String, marital: String, education: String, balance: Integer)\n\nval bank \u003d bankText.map(s \u003d\u003e s.split(\";\")).filter(s \u003d\u003e s(0) !\u003d \"\\\"age\\\"\").map(\n s \u003d\u003e Bank(s(0).toInt, \n s(1).replaceAll(\"\\\"\", \"\"),\n s(2).replaceAll(\"\\\"\", \"\"),\n s(3).replaceAll(\"\\\"\", \"\"),\n s(5).replaceAll(\"\\\"\", \"\").toInt\n )\n).toDF()\nbank.registerTempTable(\"bank\")", + "user": "anonymous", + "dateUpdated": "Dec 17, 2016 3:30:09 PM", + "config": { + "colWidth": 12.0, + "title": true, + "enabled": true, + "editorMode": "ace/mode/scala", + "results": [ + { + "graph": { + "mode": "table", + "height": 300.0, + "optionOpen": false + } + } + ], + "editorSetting": { + "language": "scala", + "editOnDblClick": false + } + }, + "settings": { + "params": {}, + "forms": {} + }, + "results": { + "code": "SUCCESS", + "msg": [ + { + "type": "TEXT", + "data": "import org.apache.commons.io.IOUtils\nimport java.net.URL\nimport java.nio.charset.Charset\nbankText: org.apache.spark.rdd.RDD[String] \u003d ParallelCollectionRDD[36] at parallelize at \u003cconsole\u003e:43\ndefined class Bank\nbank: org.apache.spark.sql.DataFrame \u003d [age: int, job: string ... 3 more fields]\nwarning: there were 1 deprecation warning(s); re-run with -deprecation for details\n" + } + ] + }, + "apps": [], + "jobName": "paragraph_1423500779206_-1502780787", + "id": "20150210-015259_1403135953", + "dateCreated": "Feb 10, 2015 1:52:59 AM", + "dateStarted": "Dec 17, 2016 3:30:09 PM", + "dateFinished": "Dec 17, 2016 3:30:58 PM", + "status": "FINISHED", + "progressUpdateIntervalMs": 500 + }, + { + "text": "%sql \nselect age, count(1) value\nfrom bank \nwhere age \u003c 30 \ngroup by age \norder by age", + "user": "anonymous", + "dateUpdated": "Mar 17, 2017 12:18:02 PM", + "config": { + "colWidth": 4.0, + "results": [ + { + "graph": { + "mode": "multiBarChart", + "height": 366.0, + "optionOpen": false + }, + "helium": {} + } + ], + "enabled": true, + "editorSetting": { + "language": "sql", + "editOnDblClick": false + }, + "editorMode": "ace/mode/sql" + }, + "settings": { + "params": {}, + "forms": {} + }, + "results": { + "code": "SUCCESS", + "msg": [ + { + "type": "TABLE", + "data": "age\tvalue\n19\t4\n20\t3\n21\t7\n22\t9\n23\t20\n24\t24\n25\t44\n26\t77\n27\t94\n28\t103\n29\t97\n" + } + ] + }, + "apps": [], + "jobName": "paragraph_1423500782552_-1439281894", + "id": "20150210-015302_1492795503", + "dateCreated": "Feb 10, 2015 1:53:02 AM", + "dateStarted": "Dec 17, 2016 3:30:13 PM", + "dateFinished": "Dec 17, 2016 3:31:04 PM", + "status": "FINISHED", + "progressUpdateIntervalMs": 500 + }, + { + "text": "%sql \nselect age, count(1) value \nfrom bank \nwhere age \u003c ${maxAge\u003d30} \ngroup by age \norder by age", + "user": "anonymous", + "dateUpdated": "Mar 17, 2017 12:17:39 PM", + "config": { + "colWidth": 4.0, + "results": [ + { + "graph": { + "mode": "multiBarChart", + "height": 294.0, + "optionOpen": false + }, + "helium": {} + } + ], + "enabled": true, + "editorSetting": { + "language": "sql", + "editOnDblClick": false + }, + "editorMode": "ace/mode/sql" + }, + "settings": { + "params": { + "maxAge": "35" + }, + "forms": { + "maxAge": { + "name": "maxAge", + "defaultValue": "30", + "hidden": false + } + } + }, + "results": { + "code": "SUCCESS", + "msg": [ + { + "type": "TABLE", + "data": "age\tvalue\n19\t4\n20\t3\n21\t7\n22\t9\n23\t20\n24\t24\n25\t44\n26\t77\n27\t94\n28\t103\n29\t97\n30\t150\n31\t199\n32\t224\n33\t186\n34\t231\n" + } + ] + }, + "apps": [], + "jobName": "paragraph_1423720444030_-1424110477", + "id": "20150212-145404_867439529", + "dateCreated": "Feb 12, 2015 2:54:04 PM", + "dateStarted": "Dec 17, 2016 3:30:58 PM", + "dateFinished": "Dec 17, 2016 3:31:07 PM", + "status": "FINISHED", + "progressUpdateIntervalMs": 500 + }, + { + "text": "%sql \nselect age, count(1) value \nfrom bank \nwhere marital\u003d\"${marital\u003dsingle,single|divorced|married}\" \ngroup by age \norder by age", + "user": "anonymous", + "dateUpdated": "Mar 17, 2017 12:18:18 PM", + "config": { + "colWidth": 4.0, + "results": [ + { + "graph": { + "mode": "stackedAreaChart", + "height": 280.0, + "optionOpen": false + }, + "helium": {} + } + ], + "enabled": true, + "editorSetting": { + "language": "sql", + "editOnDblClick": false + }, + "editorMode": "ace/mode/sql" + }, + "settings": { + "params": { + "marital": "single" + }, + "forms": { + "marital": { + "name": "marital", + "defaultValue": "single", + "options": [ + { + "value": "single" + }, + { + "value": "divorced" + }, + { + "value": "married" + } + ], + "hidden": false + } + } + }, + "results": { + "code": "SUCCESS", + "msg": [ + { + "type": "TABLE", + "data": "age\tvalue\n19\t4\n20\t3\n21\t7\n22\t9\n23\t17\n24\t13\n25\t33\n26\t56\n27\t64\n28\t78\n29\t56\n30\t92\n31\t86\n32\t105\n33\t61\n34\t75\n35\t46\n36\t50\n37\t43\n38\t44\n39\t30\n40\t25\n41\t19\n42\t23\n43\t21\n44\t20\n45\t15\n46\t14\n47\t12\n48\t12\n49\t11\n50\t8\n51\t6\n52\t9\n53\t4\n55\t3\n56\t3\n57\t2\n58\t7\n59\t2\n60\t5\n66\t2\n69\t1\n" + } + ] + }, + "apps": [], + "jobName": "paragraph_1423836262027_-210588283", + "id": "20150213-230422_1600658137", + "dateCreated": "Feb 13, 2015 11:04:22 PM", + "dateStarted": "Dec 17, 2016 3:31:05 PM", + "dateFinished": "Dec 17, 2016 3:31:09 PM", + "status": "FINISHED", + "progressUpdateIntervalMs": 500 + }, + { + "text": "%md\n## Congratulations, it\u0027s done.\n##### You can create your own notebook in \u0027Notebook\u0027 menu. Good luck!", + "user": "anonymous", + "dateUpdated": "Dec 17, 2016 3:30:24 PM", + "config": { + "colWidth": 12.0, + "editorHide": true, + "results": [ + { + "graph": { + "mode": "table", + "height": 300.0, + "optionOpen": false + } + } + ], + "enabled": true, + "editorSetting": { + "language": "markdown", + "editOnDblClick": true + }, + "editorMode": "ace/mode/markdown", + "tableHide": false + }, + "settings": { + "params": {}, + "forms": {} + }, + "results": { + "code": "SUCCESS", + "msg": [ + { + "type": "HTML", + "data": "\u003cdiv class\u003d\"markdown-body\"\u003e\n\u003ch2\u003eCongratulations, it\u0026rsquo;s done.\u003c/h2\u003e\n\u003ch5\u003eYou can create your own notebook in \u0026lsquo;Notebook\u0026rsquo; menu. Good luck!\u003c/h5\u003e\n\u003c/div\u003e" + } + ] + }, + "apps": [], + "jobName": "paragraph_1423836268492_216498320", + "id": "20150213-230428_1231780373", + "dateCreated": "Feb 13, 2015 11:04:28 PM", + "dateStarted": "Dec 17, 2016 3:30:24 PM", + "dateFinished": "Dec 17, 2016 3:30:29 PM", + "status": "FINISHED", + "progressUpdateIntervalMs": 500 + }, + { + "text": "%md\n\nAbout bank data\n\n```\nCitation Request:\n This dataset is public available for research. The details are described in [Moro et al., 2011]. \n Please include this citation if you plan to use this database:\n\n [Moro et al., 2011] S. Moro, R. Laureano and P. Cortez. Using Data Mining for Bank Direct Marketing: An Application of the CRISP-DM Methodology. \n In P. Novais et al. (Eds.), Proceedings of the European Simulation and Modelling Conference - ESM\u00272011, pp. 117-121, Guimarães, Portugal, October, 2011. EUROSIS.\n\n Available at: [pdf] http://hdl.handle.net/1822/14838\n [bib] http://www3.dsi.uminho.pt/pcortez/bib/2011-esm-1.txt\n```", + "user": "anonymous", + "dateUpdated": "Dec 17, 2016 3:30:34 PM", + "config": { + "colWidth": 12.0, + "editorHide": true, + "results": [ + { + "graph": { + "mode": "table", + "height": 300.0, + "optionOpen": false + } + } + ], + "enabled": true, + "editorSetting": { + "language": "markdown", + "editOnDblClick": true + }, + "editorMode": "ace/mode/markdown", + "tableHide": false + }, + "settings": { + "params": {}, + "forms": {} + }, + "results": { + "code": "SUCCESS", + "msg": [ + { + "type": "HTML", + "data": "\u003cdiv class\u003d\"markdown-body\"\u003e\n\u003cp\u003eAbout bank data\u003c/p\u003e\n\u003cpre\u003e\u003ccode\u003eCitation Request:\n This dataset is public available for research. The details are described in [Moro et al., 2011]. \n Please include this citation if you plan to use this database:\n\n [Moro et al., 2011] S. Moro, R. Laureano and P. Cortez. Using Data Mining for Bank Direct Marketing: An Application of the CRISP-DM Methodology. \n In P. Novais et al. (Eds.), Proceedings of the European Simulation and Modelling Conference - ESM\u0026#39;2011, pp. 117-121, Guimarães, Portugal, October, 2011. EUROSIS.\n\n Available at: [pdf] http://hdl.handle.net/1822/14838\n [bib] http://www3.dsi.uminho.pt/pcortez/bib/2011-esm-1.txt\n\u003c/code\u003e\u003c/pre\u003e\n\u003c/div\u003e" + } + ] + }, + "apps": [], + "jobName": "paragraph_1427420818407_872443482", + "id": "20150326-214658_12335843", + "dateCreated": "Mar 26, 2015 9:46:58 PM", + "dateStarted": "Dec 17, 2016 3:30:34 PM", + "dateFinished": "Dec 17, 2016 3:30:34 PM", + "status": "FINISHED", + "progressUpdateIntervalMs": 500 + }, + { + "config": {}, + "settings": { + "params": {}, + "forms": {} + }, + "apps": [], + "jobName": "paragraph_1435955447812_-158639899", + "id": "20150703-133047_853701097", + "dateCreated": "Jul 3, 2015 1:30:47 PM", + "status": "READY", + "progressUpdateIntervalMs": 500 + } + ], + "name": "Basic Features (Spark)", + "id": "2A94M5J1Z", + "angularObjects": { + "2C73DY9P9:shared_process": [] + }, + "config": { + "looknfeel": "default" + }, + "info": {} +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/zeppelin/blob/085efeb6/"notebook/Zeppelin Tutorial/Matplotlib (Python \342\200\242 PySpark)_2C2AUG798.zpln" ----------------------------------------------------------------------