http://git-wip-us.apache.org/repos/asf/madlib-site/blob/3f849b9e/community-artifacts/Encoding-categorical-variables-1dot10-v1.ipynb
----------------------------------------------------------------------
diff --git a/community-artifacts/Encoding-categorical-variables-1dot10-v1.ipynb 
b/community-artifacts/Encoding-categorical-variables-1dot10-v1.ipynb
deleted file mode 100644
index 409de20..0000000
--- a/community-artifacts/Encoding-categorical-variables-1dot10-v1.ipynb
+++ /dev/null
@@ -1,2748 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Encoding categorical variables (MADlib v1.10+)\n",
-    "This is the new module that replaces create_indicator_variables() which 
has been deprecated as of MADlib v1.10"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 44,
-   "metadata": {
-    "collapsed": false
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "The sql extension is already loaded. To reload it, use:\n",
-      "  %reload_ext sql\n"
-     ]
-    }
-   ],
-   "source": [
-    "%load_ext sql"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 45,
-   "metadata": {
-    "collapsed": false
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "u'Connected: gpdbchina@madlib'"
-      ]
-     },
-     "execution_count": 45,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "%sql postgresql://gpdbchina@10.194.10.68:55000/madlib\n",
-    "#%sql postgresql://fmcquillan@localhost:5432/madlib\n",
-    "#%sql postgresql://gpadmin@54.197.30.46:10432/gpadmin"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 46,
-   "metadata": {
-    "collapsed": false
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "1 rows affected.\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "<table>\n",
-       "    <tr>\n",
-       "        <th>version</th>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>MADlib version: 1.10.0-dev, git revision: 
rel/v1.9.1-47-g2d5a5ed, cmake configuration time: Tue Feb  7 19:45:19 UTC 2017, 
build type: Release, build system: Linux-2.6.18-238.27.1.el5.hotfix.bz516490, C 
compiler: gcc 4.4.0, C++ compiler: g++ 4.4.0</td>\n",
-       "    </tr>\n",
-       "</table>"
-      ],
-      "text/plain": [
-       "[(u'MADlib version: 1.10.0-dev, git revision: rel/v1.9.1-47-g2d5a5ed, 
cmake configuration time: Tue Feb  7 19:45:19 UTC 2017, build type: Release, 
build system: Linux-2.6.18-238.27.1.el5.hotfix.bz516490, C compiler: gcc 4.4.0, 
C++ compiler: g++ 4.4.0',)]"
-      ]
-     },
-     "execution_count": 46,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "%sql select madlib.version();\n",
-    "#%sql select version();"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## 1.  Load data set\n",
-    "Use a subset of the abalone dataset:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 47,
-   "metadata": {
-    "collapsed": false
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Done.\n",
-      "Done.\n",
-      "20 rows affected.\n",
-      "20 rows affected.\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "<table>\n",
-       "    <tr>\n",
-       "        <th>id</th>\n",
-       "        <th>sex</th>\n",
-       "        <th>length</th>\n",
-       "        <th>diameter</th>\n",
-       "        <th>height</th>\n",
-       "        <th>rings</th>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>1</td>\n",
-       "        <td>M</td>\n",
-       "        <td>0.455</td>\n",
-       "        <td>0.365</td>\n",
-       "        <td>0.095</td>\n",
-       "        <td>15</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>2</td>\n",
-       "        <td>M</td>\n",
-       "        <td>0.35</td>\n",
-       "        <td>0.265</td>\n",
-       "        <td>0.09</td>\n",
-       "        <td>7</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>3</td>\n",
-       "        <td>F</td>\n",
-       "        <td>0.53</td>\n",
-       "        <td>0.42</td>\n",
-       "        <td>0.135</td>\n",
-       "        <td>9</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>4</td>\n",
-       "        <td>M</td>\n",
-       "        <td>0.44</td>\n",
-       "        <td>0.365</td>\n",
-       "        <td>0.125</td>\n",
-       "        <td>10</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>5</td>\n",
-       "        <td>I</td>\n",
-       "        <td>0.33</td>\n",
-       "        <td>0.255</td>\n",
-       "        <td>0.08</td>\n",
-       "        <td>7</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>6</td>\n",
-       "        <td>I</td>\n",
-       "        <td>0.425</td>\n",
-       "        <td>0.3</td>\n",
-       "        <td>0.095</td>\n",
-       "        <td>8</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>7</td>\n",
-       "        <td>F</td>\n",
-       "        <td>0.53</td>\n",
-       "        <td>0.415</td>\n",
-       "        <td>0.15</td>\n",
-       "        <td>20</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>8</td>\n",
-       "        <td>F</td>\n",
-       "        <td>0.545</td>\n",
-       "        <td>0.425</td>\n",
-       "        <td>0.125</td>\n",
-       "        <td>16</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>9</td>\n",
-       "        <td>M</td>\n",
-       "        <td>0.475</td>\n",
-       "        <td>0.37</td>\n",
-       "        <td>0.125</td>\n",
-       "        <td>9</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>10</td>\n",
-       "        <td>None</td>\n",
-       "        <td>0.55</td>\n",
-       "        <td>0.44</td>\n",
-       "        <td>0.15</td>\n",
-       "        <td>19</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>11</td>\n",
-       "        <td>F</td>\n",
-       "        <td>0.525</td>\n",
-       "        <td>0.38</td>\n",
-       "        <td>0.14</td>\n",
-       "        <td>14</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>12</td>\n",
-       "        <td>M</td>\n",
-       "        <td>0.43</td>\n",
-       "        <td>0.35</td>\n",
-       "        <td>0.11</td>\n",
-       "        <td>10</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>13</td>\n",
-       "        <td>M</td>\n",
-       "        <td>0.49</td>\n",
-       "        <td>0.38</td>\n",
-       "        <td>0.135</td>\n",
-       "        <td>11</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>14</td>\n",
-       "        <td>F</td>\n",
-       "        <td>0.535</td>\n",
-       "        <td>0.405</td>\n",
-       "        <td>0.145</td>\n",
-       "        <td>10</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>15</td>\n",
-       "        <td>F</td>\n",
-       "        <td>0.47</td>\n",
-       "        <td>0.355</td>\n",
-       "        <td>0.1</td>\n",
-       "        <td>10</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>16</td>\n",
-       "        <td>M</td>\n",
-       "        <td>0.5</td>\n",
-       "        <td>0.4</td>\n",
-       "        <td>0.13</td>\n",
-       "        <td>12</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>17</td>\n",
-       "        <td>I</td>\n",
-       "        <td>0.355</td>\n",
-       "        <td>0.28</td>\n",
-       "        <td>0.085</td>\n",
-       "        <td>7</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>18</td>\n",
-       "        <td>F</td>\n",
-       "        <td>0.44</td>\n",
-       "        <td>0.34</td>\n",
-       "        <td>0.1</td>\n",
-       "        <td>10</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>19</td>\n",
-       "        <td>M</td>\n",
-       "        <td>0.365</td>\n",
-       "        <td>0.295</td>\n",
-       "        <td>0.08</td>\n",
-       "        <td>7</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>20</td>\n",
-       "        <td>None</td>\n",
-       "        <td>0.45</td>\n",
-       "        <td>0.32</td>\n",
-       "        <td>0.1</td>\n",
-       "        <td>9</td>\n",
-       "    </tr>\n",
-       "</table>"
-      ],
-      "text/plain": [
-       "[(1, u'M', 0.455, 0.365, 0.095, 15),\n",
-       " (2, u'M', 0.35, 0.265, 0.09, 7),\n",
-       " (3, u'F', 0.53, 0.42, 0.135, 9),\n",
-       " (4, u'M', 0.44, 0.365, 0.125, 10),\n",
-       " (5, u'I', 0.33, 0.255, 0.08, 7),\n",
-       " (6, u'I', 0.425, 0.3, 0.095, 8),\n",
-       " (7, u'F', 0.53, 0.415, 0.15, 20),\n",
-       " (8, u'F', 0.545, 0.425, 0.125, 16),\n",
-       " (9, u'M', 0.475, 0.37, 0.125, 9),\n",
-       " (10, None, 0.55, 0.44, 0.15, 19),\n",
-       " (11, u'F', 0.525, 0.38, 0.14, 14),\n",
-       " (12, u'M', 0.43, 0.35, 0.11, 10),\n",
-       " (13, u'M', 0.49, 0.38, 0.135, 11),\n",
-       " (14, u'F', 0.535, 0.405, 0.145, 10),\n",
-       " (15, u'F', 0.47, 0.355, 0.1, 10),\n",
-       " (16, u'M', 0.5, 0.4, 0.13, 12),\n",
-       " (17, u'I', 0.355, 0.28, 0.085, 7),\n",
-       " (18, u'F', 0.44, 0.34, 0.1, 10),\n",
-       " (19, u'M', 0.365, 0.295, 0.08, 7),\n",
-       " (20, None, 0.45, 0.32, 0.1, 9)]"
-      ]
-     },
-     "execution_count": 47,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "%%sql \n",
-    "DROP TABLE IF EXISTS abalone;\n",
-    "\n",
-    "CREATE TABLE abalone (\n",
-    "    id serial,\n",
-    "    sex character varying,\n",
-    "    length double precision,\n",
-    "    diameter double precision,\n",
-    "    height double precision,\n",
-    "    rings int\n",
-    ");\n",
-    "\n",
-    "INSERT INTO abalone (sex, length, diameter, height, rings) VALUES\n",
-    "('M',    0.455,  0.365,  0.095,  15),\n",
-    "('M',    0.35,   0.265,  0.09,   7),\n",
-    "('F',    0.53,   0.42,   0.135,  9),\n",
-    "('M',    0.44,   0.365,  0.125,  10),\n",
-    "('I',    0.33,   0.255,  0.08,   7),\n",
-    "('I',    0.425,  0.3,    0.095,  8),\n",
-    "('F',    0.53,   0.415,  0.15,   20),\n",
-    "('F',    0.545,  0.425,  0.125,  16),\n",
-    "('M',    0.475,  0.37,   0.125,  9),\n",
-    "(NULL,   0.55,   0.44,   0.15,   19),\n",
-    "('F',    0.525,  0.38,   0.14,   14),\n",
-    "('M',    0.43,   0.35,   0.11,   10),\n",
-    "('M',    0.49,   0.38,   0.135,  11),\n",
-    "('F',    0.535,  0.405,  0.145,  10),\n",
-    "('F',    0.47,   0.355,  0.1,    10),\n",
-    "('M',    0.5,    0.4,    0.13,   12),\n",
-    "('I',    0.355,  0.28,   0.085,  7),\n",
-    "('F',    0.44,   0.34,   0.1,    10),\n",
-    "('M',    0.365,  0.295,  0.08,   7),\n",
-    "(NULL,   0.45,   0.32,   0.1,    9);\n",
-    "\n",
-    "SELECT * FROM abalone ORDER BY id;"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## 2. Create new table with one-hot encoding.\n",
-    "The column 'sex' is replaced by three columns encoding the values 'F', 
'M' and 'I'.  Null values are not encoded by default:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 48,
-   "metadata": {
-    "collapsed": false
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Done.\n",
-      "1 rows affected.\n",
-      "20 rows affected.\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "<table>\n",
-       "    <tr>\n",
-       "        <th>id</th>\n",
-       "        <th>length</th>\n",
-       "        <th>diameter</th>\n",
-       "        <th>height</th>\n",
-       "        <th>rings</th>\n",
-       "        <th>sex_F</th>\n",
-       "        <th>sex_I</th>\n",
-       "        <th>sex_M</th>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>1</td>\n",
-       "        <td>0.455</td>\n",
-       "        <td>0.365</td>\n",
-       "        <td>0.095</td>\n",
-       "        <td>15</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>2</td>\n",
-       "        <td>0.35</td>\n",
-       "        <td>0.265</td>\n",
-       "        <td>0.09</td>\n",
-       "        <td>7</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>3</td>\n",
-       "        <td>0.53</td>\n",
-       "        <td>0.42</td>\n",
-       "        <td>0.135</td>\n",
-       "        <td>9</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>4</td>\n",
-       "        <td>0.44</td>\n",
-       "        <td>0.365</td>\n",
-       "        <td>0.125</td>\n",
-       "        <td>10</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>5</td>\n",
-       "        <td>0.33</td>\n",
-       "        <td>0.255</td>\n",
-       "        <td>0.08</td>\n",
-       "        <td>7</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>6</td>\n",
-       "        <td>0.425</td>\n",
-       "        <td>0.3</td>\n",
-       "        <td>0.095</td>\n",
-       "        <td>8</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>7</td>\n",
-       "        <td>0.53</td>\n",
-       "        <td>0.415</td>\n",
-       "        <td>0.15</td>\n",
-       "        <td>20</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>8</td>\n",
-       "        <td>0.545</td>\n",
-       "        <td>0.425</td>\n",
-       "        <td>0.125</td>\n",
-       "        <td>16</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>9</td>\n",
-       "        <td>0.475</td>\n",
-       "        <td>0.37</td>\n",
-       "        <td>0.125</td>\n",
-       "        <td>9</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>10</td>\n",
-       "        <td>0.55</td>\n",
-       "        <td>0.44</td>\n",
-       "        <td>0.15</td>\n",
-       "        <td>19</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>11</td>\n",
-       "        <td>0.525</td>\n",
-       "        <td>0.38</td>\n",
-       "        <td>0.14</td>\n",
-       "        <td>14</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>12</td>\n",
-       "        <td>0.43</td>\n",
-       "        <td>0.35</td>\n",
-       "        <td>0.11</td>\n",
-       "        <td>10</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>13</td>\n",
-       "        <td>0.49</td>\n",
-       "        <td>0.38</td>\n",
-       "        <td>0.135</td>\n",
-       "        <td>11</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>14</td>\n",
-       "        <td>0.535</td>\n",
-       "        <td>0.405</td>\n",
-       "        <td>0.145</td>\n",
-       "        <td>10</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>15</td>\n",
-       "        <td>0.47</td>\n",
-       "        <td>0.355</td>\n",
-       "        <td>0.1</td>\n",
-       "        <td>10</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>16</td>\n",
-       "        <td>0.5</td>\n",
-       "        <td>0.4</td>\n",
-       "        <td>0.13</td>\n",
-       "        <td>12</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>17</td>\n",
-       "        <td>0.355</td>\n",
-       "        <td>0.28</td>\n",
-       "        <td>0.085</td>\n",
-       "        <td>7</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>18</td>\n",
-       "        <td>0.44</td>\n",
-       "        <td>0.34</td>\n",
-       "        <td>0.1</td>\n",
-       "        <td>10</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>19</td>\n",
-       "        <td>0.365</td>\n",
-       "        <td>0.295</td>\n",
-       "        <td>0.08</td>\n",
-       "        <td>7</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>20</td>\n",
-       "        <td>0.45</td>\n",
-       "        <td>0.32</td>\n",
-       "        <td>0.1</td>\n",
-       "        <td>9</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "</table>"
-      ],
-      "text/plain": [
-       "[(1, 0.455, 0.365, 0.095, 15, 0, 0, 1),\n",
-       " (2, 0.35, 0.265, 0.09, 7, 0, 0, 1),\n",
-       " (3, 0.53, 0.42, 0.135, 9, 1, 0, 0),\n",
-       " (4, 0.44, 0.365, 0.125, 10, 0, 0, 1),\n",
-       " (5, 0.33, 0.255, 0.08, 7, 0, 1, 0),\n",
-       " (6, 0.425, 0.3, 0.095, 8, 0, 1, 0),\n",
-       " (7, 0.53, 0.415, 0.15, 20, 1, 0, 0),\n",
-       " (8, 0.545, 0.425, 0.125, 16, 1, 0, 0),\n",
-       " (9, 0.475, 0.37, 0.125, 9, 0, 0, 1),\n",
-       " (10, 0.55, 0.44, 0.15, 19, 0, 0, 0),\n",
-       " (11, 0.525, 0.38, 0.14, 14, 1, 0, 0),\n",
-       " (12, 0.43, 0.35, 0.11, 10, 0, 0, 1),\n",
-       " (13, 0.49, 0.38, 0.135, 11, 0, 0, 1),\n",
-       " (14, 0.535, 0.405, 0.145, 10, 1, 0, 0),\n",
-       " (15, 0.47, 0.355, 0.1, 10, 1, 0, 0),\n",
-       " (16, 0.5, 0.4, 0.13, 12, 0, 0, 1),\n",
-       " (17, 0.355, 0.28, 0.085, 7, 0, 1, 0),\n",
-       " (18, 0.44, 0.34, 0.1, 10, 1, 0, 0),\n",
-       " (19, 0.365, 0.295, 0.08, 7, 0, 0, 1),\n",
-       " (20, 0.45, 0.32, 0.1, 9, 0, 0, 0)]"
-      ]
-     },
-     "execution_count": 48,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "%%sql\n",
-    "DROP TABLE IF EXISTS abalone_out, abalone_out_dictionary;\n",
-    "SELECT madlib.encode_categorical_variables (\n",
-    "        'abalone',                   -- Source table\n",
-    "        'abalone_out',               -- Output table\n",
-    "        'sex'                        -- Categorical columns\n",
-    "        );\n",
-    "SELECT * FROM abalone_out ORDER BY id;"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## 3.  Encode null values\n",
-    "Now include NULL values in encoding (note the additional column 
'sex_NULL'):"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 49,
-   "metadata": {
-    "collapsed": false
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Done.\n",
-      "1 rows affected.\n",
-      "20 rows affected.\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "<table>\n",
-       "    <tr>\n",
-       "        <th>id</th>\n",
-       "        <th>length</th>\n",
-       "        <th>diameter</th>\n",
-       "        <th>height</th>\n",
-       "        <th>rings</th>\n",
-       "        <th>sex_F</th>\n",
-       "        <th>sex_I</th>\n",
-       "        <th>sex_M</th>\n",
-       "        <th>sex_NULL</th>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>1</td>\n",
-       "        <td>0.455</td>\n",
-       "        <td>0.365</td>\n",
-       "        <td>0.095</td>\n",
-       "        <td>15</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>2</td>\n",
-       "        <td>0.35</td>\n",
-       "        <td>0.265</td>\n",
-       "        <td>0.09</td>\n",
-       "        <td>7</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>3</td>\n",
-       "        <td>0.53</td>\n",
-       "        <td>0.42</td>\n",
-       "        <td>0.135</td>\n",
-       "        <td>9</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>4</td>\n",
-       "        <td>0.44</td>\n",
-       "        <td>0.365</td>\n",
-       "        <td>0.125</td>\n",
-       "        <td>10</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>5</td>\n",
-       "        <td>0.33</td>\n",
-       "        <td>0.255</td>\n",
-       "        <td>0.08</td>\n",
-       "        <td>7</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>6</td>\n",
-       "        <td>0.425</td>\n",
-       "        <td>0.3</td>\n",
-       "        <td>0.095</td>\n",
-       "        <td>8</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>7</td>\n",
-       "        <td>0.53</td>\n",
-       "        <td>0.415</td>\n",
-       "        <td>0.15</td>\n",
-       "        <td>20</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>8</td>\n",
-       "        <td>0.545</td>\n",
-       "        <td>0.425</td>\n",
-       "        <td>0.125</td>\n",
-       "        <td>16</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>9</td>\n",
-       "        <td>0.475</td>\n",
-       "        <td>0.37</td>\n",
-       "        <td>0.125</td>\n",
-       "        <td>9</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>10</td>\n",
-       "        <td>0.55</td>\n",
-       "        <td>0.44</td>\n",
-       "        <td>0.15</td>\n",
-       "        <td>19</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>11</td>\n",
-       "        <td>0.525</td>\n",
-       "        <td>0.38</td>\n",
-       "        <td>0.14</td>\n",
-       "        <td>14</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>12</td>\n",
-       "        <td>0.43</td>\n",
-       "        <td>0.35</td>\n",
-       "        <td>0.11</td>\n",
-       "        <td>10</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>13</td>\n",
-       "        <td>0.49</td>\n",
-       "        <td>0.38</td>\n",
-       "        <td>0.135</td>\n",
-       "        <td>11</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>14</td>\n",
-       "        <td>0.535</td>\n",
-       "        <td>0.405</td>\n",
-       "        <td>0.145</td>\n",
-       "        <td>10</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>15</td>\n",
-       "        <td>0.47</td>\n",
-       "        <td>0.355</td>\n",
-       "        <td>0.1</td>\n",
-       "        <td>10</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>16</td>\n",
-       "        <td>0.5</td>\n",
-       "        <td>0.4</td>\n",
-       "        <td>0.13</td>\n",
-       "        <td>12</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>17</td>\n",
-       "        <td>0.355</td>\n",
-       "        <td>0.28</td>\n",
-       "        <td>0.085</td>\n",
-       "        <td>7</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>18</td>\n",
-       "        <td>0.44</td>\n",
-       "        <td>0.34</td>\n",
-       "        <td>0.1</td>\n",
-       "        <td>10</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>19</td>\n",
-       "        <td>0.365</td>\n",
-       "        <td>0.295</td>\n",
-       "        <td>0.08</td>\n",
-       "        <td>7</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>20</td>\n",
-       "        <td>0.45</td>\n",
-       "        <td>0.32</td>\n",
-       "        <td>0.1</td>\n",
-       "        <td>9</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "    </tr>\n",
-       "</table>"
-      ],
-      "text/plain": [
-       "[(1, 0.455, 0.365, 0.095, 15, 0, 0, 1, 0),\n",
-       " (2, 0.35, 0.265, 0.09, 7, 0, 0, 1, 0),\n",
-       " (3, 0.53, 0.42, 0.135, 9, 1, 0, 0, 0),\n",
-       " (4, 0.44, 0.365, 0.125, 10, 0, 0, 1, 0),\n",
-       " (5, 0.33, 0.255, 0.08, 7, 0, 1, 0, 0),\n",
-       " (6, 0.425, 0.3, 0.095, 8, 0, 1, 0, 0),\n",
-       " (7, 0.53, 0.415, 0.15, 20, 1, 0, 0, 0),\n",
-       " (8, 0.545, 0.425, 0.125, 16, 1, 0, 0, 0),\n",
-       " (9, 0.475, 0.37, 0.125, 9, 0, 0, 1, 0),\n",
-       " (10, 0.55, 0.44, 0.15, 19, 0, 0, 0, 1),\n",
-       " (11, 0.525, 0.38, 0.14, 14, 1, 0, 0, 0),\n",
-       " (12, 0.43, 0.35, 0.11, 10, 0, 0, 1, 0),\n",
-       " (13, 0.49, 0.38, 0.135, 11, 0, 0, 1, 0),\n",
-       " (14, 0.535, 0.405, 0.145, 10, 1, 0, 0, 0),\n",
-       " (15, 0.47, 0.355, 0.1, 10, 1, 0, 0, 0),\n",
-       " (16, 0.5, 0.4, 0.13, 12, 0, 0, 1, 0),\n",
-       " (17, 0.355, 0.28, 0.085, 7, 0, 1, 0, 0),\n",
-       " (18, 0.44, 0.34, 0.1, 10, 1, 0, 0, 0),\n",
-       " (19, 0.365, 0.295, 0.08, 7, 0, 0, 1, 0),\n",
-       " (20, 0.45, 0.32, 0.1, 9, 0, 0, 0, 1)]"
-      ]
-     },
-     "execution_count": 49,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "%%sql\n",
-    "DROP TABLE IF EXISTS abalone_out, abalone_out_dictionary;\n",
-    "SELECT madlib.encode_categorical_variables (\n",
-    "        'abalone',                   -- Source table\n",
-    "        'abalone_out',               -- Output table\n",
-    "        'sex',                       -- Categorical columns\n",
-    "        NULL,                        -- Categorical columns to exclude\n",
-    "        NULL,                        -- Index columns\n",
-    "        NULL,                        -- Top values\n",
-    "        NULL,                        -- Value to drop for dummy 
encoding\n",
-    "        TRUE                         -- Encode nulls\n",
-    "        );\n",
-    "SELECT * FROM abalone_out ORDER BY id;"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## 4. Encode all categorical variables and specify an index\n",
-    "Encode all categorical variables in the source table. Also, specify the 
column 'id' as the index (primary key) - this changes the output table to only 
include the index and the encoded variables:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 50,
-   "metadata": {
-    "collapsed": false
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Done.\n",
-      "1 rows affected.\n",
-      "20 rows affected.\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "<table>\n",
-       "    <tr>\n",
-       "        <th>id</th>\n",
-       "        <th>sex_F</th>\n",
-       "        <th>sex_I</th>\n",
-       "        <th>sex_M</th>\n",
-       "        <th>rings_7</th>\n",
-       "        <th>rings_8</th>\n",
-       "        <th>rings_9</th>\n",
-       "        <th>rings_10</th>\n",
-       "        <th>rings_11</th>\n",
-       "        <th>rings_12</th>\n",
-       "        <th>rings_14</th>\n",
-       "        <th>rings_15</th>\n",
-       "        <th>rings_16</th>\n",
-       "        <th>rings_19</th>\n",
-       "        <th>rings_20</th>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>2</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>3</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>4</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>5</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>6</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>7</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>8</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>9</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>10</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>11</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>12</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>13</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>14</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>15</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>16</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>17</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>18</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>19</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>20</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "</table>"
-      ],
-      "text/plain": [
-       "[(1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0),\n",
-       " (2, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),\n",
-       " (3, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0),\n",
-       " (4, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0),\n",
-       " (5, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),\n",
-       " (6, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0),\n",
-       " (7, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1),\n",
-       " (8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0),\n",
-       " (9, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0),\n",
-       " (10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0),\n",
-       " (11, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0),\n",
-       " (12, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0),\n",
-       " (13, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0),\n",
-       " (14, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0),\n",
-       " (15, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0),\n",
-       " (16, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0),\n",
-       " (17, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),\n",
-       " (18, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0),\n",
-       " (19, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),\n",
-       " (20, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0)]"
-      ]
-     },
-     "execution_count": 50,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "%%sql\n",
-    "DROP TABLE IF EXISTS abalone_out, abalone_out_dictionary;\n",
-    "SELECT madlib.encode_categorical_variables (\n",
-    "        'abalone',                   -- Source table\n",
-    "        'abalone_out',               -- Output table\n",
-    "        '*',                         -- Categorical columns\n",
-    "        NULL,                        -- Categorical columns to exclude\n",
-    "        'id'                         -- Index columns\n",
-    "        );\n",
-    "SELECT * FROM abalone_out ORDER BY id;"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## 5. Encode top values\n",
-    "Now let's encode only the top values and group others into a 
miscellaneous bucket column. Top values can be global across all columns or 
specified by column.  As an example of the latter, here are the top 2 'sex' 
values and the top 50% of  'rings' values:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 51,
-   "metadata": {
-    "collapsed": false
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Done.\n",
-      "1 rows affected.\n",
-      "20 rows affected.\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "<table>\n",
-       "    <tr>\n",
-       "        <th>id</th>\n",
-       "        <th>sex_M</th>\n",
-       "        <th>sex_F</th>\n",
-       "        <th>sex__MISC__</th>\n",
-       "        <th>rings_10</th>\n",
-       "        <th>rings_7</th>\n",
-       "        <th>rings_9</th>\n",
-       "        <th>rings__MISC__</th>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>1</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>2</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>3</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>4</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>5</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>6</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>7</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>8</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>9</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>10</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>11</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>12</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>13</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>14</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>15</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>16</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>17</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>18</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>19</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>20</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "</table>"
-      ],
-      "text/plain": [
-       "[(1, 1, 0, 0, 0, 0, 0, 1),\n",
-       " (2, 1, 0, 0, 0, 1, 0, 0),\n",
-       " (3, 0, 1, 0, 0, 0, 1, 0),\n",
-       " (4, 1, 0, 0, 1, 0, 0, 0),\n",
-       " (5, 0, 0, 1, 0, 1, 0, 0),\n",
-       " (6, 0, 0, 1, 0, 0, 0, 1),\n",
-       " (7, 0, 1, 0, 0, 0, 0, 1),\n",
-       " (8, 0, 1, 0, 0, 0, 0, 1),\n",
-       " (9, 1, 0, 0, 0, 0, 1, 0),\n",
-       " (10, 0, 0, 0, 0, 0, 0, 1),\n",
-       " (11, 0, 1, 0, 0, 0, 0, 1),\n",
-       " (12, 1, 0, 0, 1, 0, 0, 0),\n",
-       " (13, 1, 0, 0, 0, 0, 0, 1),\n",
-       " (14, 0, 1, 0, 1, 0, 0, 0),\n",
-       " (15, 0, 1, 0, 1, 0, 0, 0),\n",
-       " (16, 1, 0, 0, 0, 0, 0, 1),\n",
-       " (17, 0, 0, 1, 0, 1, 0, 0),\n",
-       " (18, 0, 1, 0, 1, 0, 0, 0),\n",
-       " (19, 1, 0, 0, 0, 1, 0, 0),\n",
-       " (20, 0, 0, 0, 0, 0, 1, 0)]"
-      ]
-     },
-     "execution_count": 51,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "%%sql\n",
-    "DROP TABLE IF EXISTS abalone_out, abalone_out_dictionary;\n",
-    "SELECT madlib.encode_categorical_variables (\n",
-    "        'abalone',                   -- Source table\n",
-    "        'abalone_out',               -- Output table\n",
-    "        '*',                         -- Categorical columns\n",
-    "        NULL,                        -- Categorical columns to exclude\n",
-    "        'id',                        -- Index columns\n",
-    "        'sex=2, rings=0.5'           -- Top values\n",
-    "        );\n",
-    "SELECT * FROM abalone_out ORDER BY id;"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## 6. Show raw variables and encoded variables together\n",
-    "If you want to see both the raw categorical variable and its encoded form 
in the output_table, then include the categorical variables 'sex' and 'rings' 
in the index parameter. (Remember that this will not work if you specify '*' 
for the parameter 'categorical_cols', because in this case 'row_id' columns 
will not be encoded at all.)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 52,
-   "metadata": {
-    "collapsed": false
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Done.\n",
-      "1 rows affected.\n",
-      "20 rows affected.\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "<table>\n",
-       "    <tr>\n",
-       "        <th>id</th>\n",
-       "        <th>sex</th>\n",
-       "        <th>rings</th>\n",
-       "        <th>sex_F</th>\n",
-       "        <th>sex_I</th>\n",
-       "        <th>sex_M</th>\n",
-       "        <th>rings_7</th>\n",
-       "        <th>rings_8</th>\n",
-       "        <th>rings_9</th>\n",
-       "        <th>rings_10</th>\n",
-       "        <th>rings_11</th>\n",
-       "        <th>rings_12</th>\n",
-       "        <th>rings_14</th>\n",
-       "        <th>rings_15</th>\n",
-       "        <th>rings_16</th>\n",
-       "        <th>rings_19</th>\n",
-       "        <th>rings_20</th>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>1</td>\n",
-       "        <td>M</td>\n",
-       "        <td>15</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>2</td>\n",
-       "        <td>M</td>\n",
-       "        <td>7</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>3</td>\n",
-       "        <td>F</td>\n",
-       "        <td>9</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>4</td>\n",
-       "        <td>M</td>\n",
-       "        <td>10</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>5</td>\n",
-       "        <td>I</td>\n",
-       "        <td>7</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>6</td>\n",
-       "        <td>I</td>\n",
-       "        <td>8</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>7</td>\n",
-       "        <td>F</td>\n",
-       "        <td>20</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>8</td>\n",
-       "        <td>F</td>\n",
-       "        <td>16</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>9</td>\n",
-       "        <td>M</td>\n",
-       "        <td>9</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>10</td>\n",
-       "        <td>None</td>\n",
-       "        <td>19</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>11</td>\n",
-       "        <td>F</td>\n",
-       "        <td>14</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>12</td>\n",
-       "        <td>M</td>\n",
-       "        <td>10</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>13</td>\n",
-       "        <td>M</td>\n",
-       "        <td>11</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>14</td>\n",
-       "        <td>F</td>\n",
-       "        <td>10</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>15</td>\n",
-       "        <td>F</td>\n",
-       "        <td>10</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>16</td>\n",
-       "        <td>M</td>\n",
-       "        <td>12</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>17</td>\n",
-       "        <td>I</td>\n",
-       "        <td>7</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>18</td>\n",
-       "        <td>F</td>\n",
-       "        <td>10</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>19</td>\n",
-       "        <td>M</td>\n",
-       "        <td>7</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>20</td>\n",
-       "        <td>None</td>\n",
-       "        <td>9</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "</table>"
-      ],
-      "text/plain": [
-       "[(1, u'M', 15, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0),\n",
-       " (2, u'M', 7, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),\n",
-       " (3, u'F', 9, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0),\n",
-       " (4, u'M', 10, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0),\n",
-       " (5, u'I', 7, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),\n",
-       " (6, u'I', 8, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0),\n",
-       " (7, u'F', 20, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1),\n",
-       " (8, u'F', 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0),\n",
-       " (9, u'M', 9, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0),\n",
-       " (10, None, 19, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0),\n",
-       " (11, u'F', 14, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0),\n",
-       " (12, u'M', 10, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0),\n",
-       " (13, u'M', 11, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0),\n",
-       " (14, u'F', 10, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0),\n",
-       " (15, u'F', 10, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0),\n",
-       " (16, u'M', 12, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0),\n",
-       " (17, u'I', 7, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),\n",
-       " (18, u'F', 10, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0),\n",
-       " (19, u'M', 7, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),\n",
-       " (20, None, 9, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0)]"
-      ]
-     },
-     "execution_count": 52,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "%%sql\n",
-    "DROP TABLE IF EXISTS abalone_out, abalone_out_dictionary;\n",
-    "SELECT madlib.encode_categorical_variables (\n",
-    "        'abalone',                   -- Source table\n",
-    "        'abalone_out',               -- Output table\n",
-    "        'sex, rings',                -- Categorical columns\n",
-    "        NULL,                        -- Categorical columns to exclude\n",
-    "        'id, sex, rings'             -- Index columns\n",
-    "        );\n",
-    "SELECT * FROM abalone_out ORDER BY id;"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## 7. Dummy encoding\n",
-    "For dummy encoding, let's make the 'I' value from the 'sex' variable as 
the reference.\n",
-    "Here we use the 'value_to_drop' parameter:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 53,
-   "metadata": {
-    "collapsed": false
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Done.\n",
-      "1 rows affected.\n",
-      "20 rows affected.\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "<table>\n",
-       "    <tr>\n",
-       "        <th>id</th>\n",
-       "        <th>sex_F</th>\n",
-       "        <th>sex_M</th>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>2</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>3</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>4</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>5</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>6</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>7</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>8</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>9</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>10</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>11</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>12</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>13</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>14</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>15</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>16</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>17</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>18</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>19</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>20</td>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "</table>"
-      ],
-      "text/plain": [
-       "[(1, 0, 1),\n",
-       " (2, 0, 1),\n",
-       " (3, 1, 0),\n",
-       " (4, 0, 1),\n",
-       " (5, 0, 0),\n",
-       " (6, 0, 0),\n",
-       " (7, 1, 0),\n",
-       " (8, 1, 0),\n",
-       " (9, 0, 1),\n",
-       " (10, 0, 0),\n",
-       " (11, 1, 0),\n",
-       " (12, 0, 1),\n",
-       " (13, 0, 1),\n",
-       " (14, 1, 0),\n",
-       " (15, 1, 0),\n",
-       " (16, 0, 1),\n",
-       " (17, 0, 0),\n",
-       " (18, 1, 0),\n",
-       " (19, 0, 1),\n",
-       " (20, 0, 0)]"
-      ]
-     },
-     "execution_count": 53,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "%%sql\n",
-    "DROP TABLE IF EXISTS abalone_out, abalone_out_dictionary;\n",
-    "SELECT madlib.encode_categorical_variables (\n",
-    "        'abalone',                   -- Source table\n",
-    "        'abalone_out',               -- Output table\n",
-    "        '*',                         -- Categorical columns\n",
-    "        'rings',                     -- Categorical columns to exclude\n",
-    "        'id',                        -- Index columns\n",
-    "        NULL,                        -- Top value\n",
-    "        'sex=I'                      -- Value to drop for dummy encoding  
            \n",
-    "        );\n",
-    "SELECT * FROM abalone_out ORDER BY id;"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## 8. Array output\n",
-    "Create an array output for the two categorical variables in the source 
table:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 55,
-   "metadata": {
-    "collapsed": false
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Done.\n",
-      "1 rows affected.\n",
-      "20 rows affected.\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "<table>\n",
-       "    <tr>\n",
-       "        <th>id</th>\n",
-       "        <th>__encoded_variables__</th>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>1</td>\n",
-       "        <td>[0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0]</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>2</td>\n",
-       "        <td>[0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>3</td>\n",
-       "        <td>[1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0]</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>4</td>\n",
-       "        <td>[0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0]</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>5</td>\n",
-       "        <td>[0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>6</td>\n",
-       "        <td>[0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>7</td>\n",
-       "        <td>[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>8</td>\n",
-       "        <td>[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0]</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>9</td>\n",
-       "        <td>[0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0]</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>10</td>\n",
-       "        <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0]</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>11</td>\n",
-       "        <td>[1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0]</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>12</td>\n",
-       "        <td>[0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0]</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>13</td>\n",
-       "        <td>[0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0]</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>14</td>\n",
-       "        <td>[1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0]</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>15</td>\n",
-       "        <td>[1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0]</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>16</td>\n",
-       "        <td>[0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0]</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>17</td>\n",
-       "        <td>[0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>18</td>\n",
-       "        <td>[1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0]</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>19</td>\n",
-       "        <td>[0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>20</td>\n",
-       "        <td>[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0]</td>\n",
-       "    </tr>\n",
-       "</table>"
-      ],
-      "text/plain": [
-       "[(1, [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0]),\n",
-       " (2, [0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]),\n",
-       " (3, [1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0]),\n",
-       " (4, [0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0]),\n",
-       " (5, [0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]),\n",
-       " (6, [0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]),\n",
-       " (7, [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]),\n",
-       " (8, [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0]),\n",
-       " (9, [0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0]),\n",
-       " (10, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0]),\n",
-       " (11, [1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0]),\n",
-       " (12, [0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0]),\n",
-       " (13, [0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0]),\n",
-       " (14, [1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0]),\n",
-       " (15, [1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0]),\n",
-       " (16, [0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0]),\n",
-       " (17, [0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]),\n",
-       " (18, [1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0]),\n",
-       " (19, [0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]),\n",
-       " (20, [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0])]"
-      ]
-     },
-     "execution_count": 55,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "%%sql\n",
-    "DROP TABLE IF EXISTS abalone_out, abalone_out_dictionary;\n",
-    "SELECT madlib.encode_categorical_variables (\n",
-    "        'abalone',                   -- Source table\n",
-    "        'abalone_out',               -- Output table\n",
-    "        '*',                         -- Categorical columns\n",
-    "        NULL,                        -- Categorical columns to exclude\n",
-    "        'id',                        -- Index columns\n",
-    "        NULL,                        -- Top values\n",
-    "        NULL,                        -- Value to drop for dummy 
encoding\n",
-    "        NULL,                        -- Encode nulls\n",
-    "        'array'                      -- Output type\n",
-    "        );\n",
-    "SELECT * FROM abalone_out ORDER BY id;"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "And here is the dictionary table that specifies the index into the array:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 56,
-   "metadata": {
-    "collapsed": false
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "14 rows affected.\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "<table>\n",
-       "    <tr>\n",
-       "        <th>encoded_column_name</th>\n",
-       "        <th>index</th>\n",
-       "        <th>variable</th>\n",
-       "        <th>value</th>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>__encoded_variables__</td>\n",
-       "        <td>1</td>\n",
-       "        <td>sex</td>\n",
-       "        <td>F</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>__encoded_variables__</td>\n",
-       "        <td>2</td>\n",
-       "        <td>sex</td>\n",
-       "        <td>I</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>__encoded_variables__</td>\n",
-       "        <td>3</td>\n",
-       "        <td>sex</td>\n",
-       "        <td>M</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>__encoded_variables__</td>\n",
-       "        <td>4</td>\n",
-       "        <td>rings</td>\n",
-       "        <td>7</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>__encoded_variables__</td>\n",
-       "        <td>5</td>\n",
-       "        <td>rings</td>\n",
-       "        <td>8</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>__encoded_variables__</td>\n",
-       "        <td>6</td>\n",
-       "        <td>rings</td>\n",
-       "        <td>9</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>__encoded_variables__</td>\n",
-       "        <td>7</td>\n",
-       "        <td>rings</td>\n",
-       "        <td>10</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>__encoded_variables__</td>\n",
-       "        <td>8</td>\n",
-       "        <td>rings</td>\n",
-       "        <td>11</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>__encoded_variables__</td>\n",
-       "        <td>9</td>\n",
-       "        <td>rings</td>\n",
-       "        <td>12</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>__encoded_variables__</td>\n",
-       "        <td>10</td>\n",
-       "        <td>rings</td>\n",
-       "        <td>14</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>__encoded_variables__</td>\n",
-       "        <td>11</td>\n",
-       "        <td>rings</td>\n",
-       "        <td>15</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>__encoded_variables__</td>\n",
-       "        <td>12</td>\n",
-       "        <td>rings</td>\n",
-       "        <td>16</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>__encoded_variables__</td>\n",
-       "        <td>13</td>\n",
-       "        <td>rings</td>\n",
-       "        <td>19</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>__encoded_variables__</td>\n",
-       "        <td>14</td>\n",
-       "        <td>rings</td>\n",
-       "        <td>20</td>\n",
-       "    </tr>\n",
-       "</table>"
-      ],
-      "text/plain": [
-       "[(u'__encoded_variables__', 1, u'sex', u'F'),\n",
-       " (u'__encoded_variables__', 2, u'sex', u'I'),\n",
-       " (u'__encoded_variables__', 3, u'sex', u'M'),\n",
-       " (u'__encoded_variables__', 4, u'rings', u'7'),\n",
-       " (u'__encoded_variables__', 5, u'rings', u'8'),\n",
-       " (u'__encoded_variables__', 6, u'rings', u'9'),\n",
-       " (u'__encoded_variables__', 7, u'rings', u'10'),\n",
-       " (u'__encoded_variables__', 8, u'rings', u'11'),\n",
-       " (u'__encoded_variables__', 9, u'rings', u'12'),\n",
-       " (u'__encoded_variables__', 10, u'rings', u'14'),\n",
-       " (u'__encoded_variables__', 11, u'rings', u'15'),\n",
-       " (u'__encoded_variables__', 12, u'rings', u'16'),\n",
-       " (u'__encoded_variables__', 13, u'rings', u'19'),\n",
-       " (u'__encoded_variables__', 14, u'rings', u'20')]"
-      ]
-     },
-     "execution_count": 56,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "%sql SELECT * FROM abalone_out_dictionary;"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## 9. Dictionary output\n",
-    "Create a dictionary:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "collapsed": false
-   },
-   "outputs": [],
-   "source": [
-    "%%sql\n",
-    "DROP TABLE IF EXISTS abalone_out, abalone_out_dictionary;\n",
-    "SELECT madlib.encode_categorical_variables (\n",
-    "        'abalone',                   -- Source table\n",
-    "        'abalone_out',               -- Output table\n",
-    "        '*',                         -- Categorical columns\n",
-    "        NULL,                        -- Categorical columns to exclude\n",
-    "        'id',                        -- Index columns\n",
-    "        NULL,                        -- Top values\n",
-    "        NULL,                        -- Value to drop for dummy 
encoding\n",
-    "        NULL,                        -- Encode nulls\n",
-    "        NULL,                        -- Output type\n",
-    "        TRUE                         -- Dictionary output \n",
-    "        );\n",
-    "SELECT * FROM abalone_out ORDER BY id;"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "And here is the dictionary table that defines the columns in the output 
table:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "collapsed": false
-   },
-   "outputs": [],
-   "source": [
-    "%sql SELECT * FROM abalone_out_dictionary ORDER BY encoded_column_name;"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## 10. Distribution policy\n",
-    "We can chose for various distribution policies, for examply RANDOMLY:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "collapsed": false
-   },
-   "outputs": [],
-   "source": [
-    "%%sql\n",
-    "DROP TABLE IF EXISTS abalone_out, abalone_out_dictionary;\n",
-    "SELECT madlib.encode_categorical_variables (\n",
-    "        'abalone',                   -- Source table\n",
-    "        'abalone_out',               -- Output table\n",
-    "        '*',                         -- Categorical columns\n",
-    "        NULL,                        -- Categorical columns to exclude\n",
-    "        'id',                        -- Index columns\n",
-    "        NULL,                        -- Top values\n",
-    "        NULL,                        -- Value to drop for dummy 
encoding\n",
-    "        NULL,                        -- Encode nulls\n",
-    "        NULL,                        -- Output type\n",
-    "        NULL,                        -- Dictionary output\n",
-    "        'RANDOMLY'                   -- Distribution policy\n",
-    "        );"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## 11. Encoding floats\n",
-    "If you have a float that you want to encode, you can cast it in the 
following way within the function call:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "collapsed": false
-   },
-   "outputs": [],
-   "source": [
-    "%%sql\n",
-    "DROP TABLE IF EXISTS abalone_out, abalone_out_dictionary;\n",
-    "SELECT madlib.encode_categorical_variables (\n",
-    "        'abalone',                   -- Source table\n",
-    "        'abalone_out',               -- Output table\n",
-    "        'height>.05'               -- Categorical columns\n",
-    "        );"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "collapsed": false
-   },
-   "outputs": [],
-   "source": [
-    "%%sql\n",
-    "SELECT * FROM abalone_out ORDER BY id;"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 2",
-   "language": "python",
-   "name": "python2"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 2
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython2",
-   "version": "2.7.12"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 0
-}

Reply via email to