orhankislal commented on a change in pull request #14: Image loader python 
module, and demo notebook
URL: https://github.com/apache/madlib-site/pull/14#discussion_r294956144
 
 

 ##########
 File path: community-artifacts/Madlib Image Loader Demo.ipynb
 ##########
 @@ -0,0 +1,436 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Using TensorFlow backend.\n"
+     ]
+    }
+   ],
+   "source": [
+    "import sys\n",
+    "import os\n",
+    "from keras.datasets import cifar10, cifar100, mnist, fashion_mnist, imdb, 
reuters"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Add community-artifacts to PYTHON_PATH\n",
+    "     # TIP:  You can skip this cell if working directory of notebook is 
community-artifacts\n",
+    "\n",
+    "home = %env HOME\n",
+    "     # TIP:  Change home,'workspace' to wherever you have cloned 
madlib-site repo\n",
+    "madlib_site_dir = 
os.path.join(home,'workspace','madlib-site','community-artifacts')\n",
+    "sys.path.append(madlib_site_dir)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Import image loader module\n",
+    "from madlib_image_loader import ImageLoader, DbCredentials"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Specify database credentials, for connecting to db\n",
+    "db_creds = DbCredentials(user='pivotal',\n",
+    "                         host='localhost',\n",
+    "                         port='15432',\n",
+    "                         password='')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Load dataset into np array\n",
+    "train_data, _ = cifar10.load_data()\n",
+    "data_x, data_y = train_data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Initialize ImageLoader (increase num_workers to run faster)\n",
+    "iloader = ImageLoader(num_workers=5, db_creds=db_creds)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "MainProcess: Connected to madlib db.\n",
+      "Appending to table cifar_10_test in madlib db\n",
+      "Spawning 5 workers...\n",
+      "Initializing PoolWorker-1 [pid 240]\n",
+      "PoolWorker-1: Created temporary directory PoolWorker-1\n",
+      "Initializing PoolWorker-2 [pid 241]\n",
+      "PoolWorker-2: Created temporary directory PoolWorker-2\n",
+      "Initializing PoolWorker-3 [pid 242]\n",
+      "PoolWorker-3: Created temporary directory PoolWorker-3\n",
+      "Initializing PoolWorker-4 [pid 243]\n",
+      "PoolWorker-4: Created temporary directory PoolWorker-4\n",
+      "Initializing PoolWorker-5 [pid 245]\n",
+      "PoolWorker-1: Connected to madlib db.\n",
+      "PoolWorker-5: Created temporary directory PoolWorker-5\n",
+      "PoolWorker-2: Connected to madlib db.\n",
+      "PoolWorker-3: Connected to madlib db.\n",
+      "PoolWorker-4: Connected to madlib db.\n",
+      "PoolWorker-5: Connected to madlib db.\n",
+      "PoolWorker-1: Wrote 1000 images to 
/tmp/madlib_KdLWwZ322f/cifar_10_test0000.tmp\n",
+      "PoolWorker-2: Wrote 1000 images to 
/tmp/madlib_xlKP6JhnfV/cifar_10_test0000.tmp\n",
+      "PoolWorker-3: Wrote 1000 images to 
/tmp/madlib_kfSWAjQUxH/cifar_10_test0000.tmp\n",
+      "PoolWorker-4: Wrote 1000 images to 
/tmp/madlib_hmXBkZ2Rd5/cifar_10_test0000.tmp\n",
+      "PoolWorker-5: Wrote 1000 images to 
/tmp/madlib_4v2Q1jvkZs/cifar_10_test0000.tmp\n",
+      "PoolWorker-1: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-2: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-4: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-3: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-5: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-1: Wrote 1000 images to 
/tmp/madlib_KdLWwZ322f/cifar_10_test0001.tmp\n",
+      "PoolWorker-4: Wrote 1000 images to 
/tmp/madlib_hmXBkZ2Rd5/cifar_10_test0001.tmp\n",
+      "PoolWorker-2: Wrote 1000 images to 
/tmp/madlib_xlKP6JhnfV/cifar_10_test0001.tmp\n",
+      "PoolWorker-3: Wrote 1000 images to 
/tmp/madlib_kfSWAjQUxH/cifar_10_test0001.tmp\n",
+      "PoolWorker-5: Wrote 1000 images to 
/tmp/madlib_4v2Q1jvkZs/cifar_10_test0001.tmp\n",
+      "PoolWorker-1: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-3: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-2: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-4: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-5: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-3: Wrote 1000 images to 
/tmp/madlib_kfSWAjQUxH/cifar_10_test0002.tmp\n",
+      "PoolWorker-1: Wrote 1000 images to 
/tmp/madlib_KdLWwZ322f/cifar_10_test0002.tmp\n",
+      "PoolWorker-4: Wrote 1000 images to 
/tmp/madlib_hmXBkZ2Rd5/cifar_10_test0002.tmp\n",
+      "PoolWorker-2: Wrote 1000 images to 
/tmp/madlib_xlKP6JhnfV/cifar_10_test0002.tmp\n",
+      "PoolWorker-5: Wrote 1000 images to 
/tmp/madlib_4v2Q1jvkZs/cifar_10_test0002.tmp\n",
+      "PoolWorker-1: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-4: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-3: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-5: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-2: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-1: Wrote 1000 images to 
/tmp/madlib_KdLWwZ322f/cifar_10_test0003.tmp\n",
+      "PoolWorker-4: Wrote 1000 images to 
/tmp/madlib_hmXBkZ2Rd5/cifar_10_test0003.tmp\n",
+      "PoolWorker-3: Wrote 1000 images to 
/tmp/madlib_kfSWAjQUxH/cifar_10_test0003.tmp\n",
+      "PoolWorker-5: Wrote 1000 images to 
/tmp/madlib_4v2Q1jvkZs/cifar_10_test0003.tmp\n",
+      "PoolWorker-2: Wrote 1000 images to 
/tmp/madlib_xlKP6JhnfV/cifar_10_test0003.tmp\n",
+      "PoolWorker-1: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-4: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-3: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-2: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-5: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-1: Wrote 1000 images to 
/tmp/madlib_KdLWwZ322f/cifar_10_test0004.tmp\n",
+      "PoolWorker-3: Wrote 1000 images to 
/tmp/madlib_kfSWAjQUxH/cifar_10_test0004.tmp\n",
+      "PoolWorker-5: Wrote 1000 images to 
/tmp/madlib_4v2Q1jvkZs/cifar_10_test0004.tmp\n",
+      "PoolWorker-4: Wrote 1000 images to 
/tmp/madlib_hmXBkZ2Rd5/cifar_10_test0004.tmp\n",
+      "PoolWorker-2: Wrote 1000 images to 
/tmp/madlib_xlKP6JhnfV/cifar_10_test0004.tmp\n",
+      "PoolWorker-1: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-4: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-5: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-2: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-3: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-5: Wrote 1000 images to 
/tmp/madlib_4v2Q1jvkZs/cifar_10_test0005.tmp\n",
+      "PoolWorker-1: Wrote 1000 images to 
/tmp/madlib_KdLWwZ322f/cifar_10_test0005.tmp\n",
+      "PoolWorker-2: Wrote 1000 images to 
/tmp/madlib_xlKP6JhnfV/cifar_10_test0005.tmp\n",
+      "PoolWorker-4: Wrote 1000 images to 
/tmp/madlib_hmXBkZ2Rd5/cifar_10_test0005.tmp\n",
+      "PoolWorker-3: Wrote 1000 images to 
/tmp/madlib_kfSWAjQUxH/cifar_10_test0005.tmp\n",
+      "PoolWorker-2: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-3: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-5: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-1: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-4: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-2: Wrote 1000 images to 
/tmp/madlib_xlKP6JhnfV/cifar_10_test0006.tmp\n",
+      "PoolWorker-3: Wrote 1000 images to 
/tmp/madlib_kfSWAjQUxH/cifar_10_test0006.tmp\n",
+      "PoolWorker-5: Wrote 1000 images to 
/tmp/madlib_4v2Q1jvkZs/cifar_10_test0006.tmp\n",
+      "PoolWorker-1: Wrote 1000 images to 
/tmp/madlib_KdLWwZ322f/cifar_10_test0006.tmp\n",
+      "PoolWorker-4: Wrote 1000 images to 
/tmp/madlib_hmXBkZ2Rd5/cifar_10_test0006.tmp\n",
+      "PoolWorker-2: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-3: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-5: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-1: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-4: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-2: Wrote 1000 images to 
/tmp/madlib_xlKP6JhnfV/cifar_10_test0007.tmp\n",
+      "PoolWorker-3: Wrote 1000 images to 
/tmp/madlib_kfSWAjQUxH/cifar_10_test0007.tmp\n",
+      "PoolWorker-1: Wrote 1000 images to 
/tmp/madlib_KdLWwZ322f/cifar_10_test0007.tmp\n",
+      "PoolWorker-5: Wrote 1000 images to 
/tmp/madlib_4v2Q1jvkZs/cifar_10_test0007.tmp\n",
+      "PoolWorker-4: Wrote 1000 images to 
/tmp/madlib_hmXBkZ2Rd5/cifar_10_test0007.tmp\n",
+      "PoolWorker-2: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-5: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-3: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-4: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-1: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-3: Wrote 1000 images to 
/tmp/madlib_kfSWAjQUxH/cifar_10_test0008.tmp\n",
+      "PoolWorker-1: Wrote 1000 images to 
/tmp/madlib_KdLWwZ322f/cifar_10_test0008.tmp\n",
+      "PoolWorker-2: Wrote 1000 images to 
/tmp/madlib_xlKP6JhnfV/cifar_10_test0008.tmp\n",
+      "PoolWorker-4: Wrote 1000 images to 
/tmp/madlib_hmXBkZ2Rd5/cifar_10_test0008.tmp\n",
+      "PoolWorker-5: Wrote 1000 images to 
/tmp/madlib_4v2Q1jvkZs/cifar_10_test0008.tmp\n",
+      "PoolWorker-1: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-3: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-4: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-5: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-2: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-1: Wrote 1000 images to 
/tmp/madlib_KdLWwZ322f/cifar_10_test0009.tmp\n",
+      "PoolWorker-3: Wrote 1000 images to 
/tmp/madlib_kfSWAjQUxH/cifar_10_test0009.tmp\n",
+      "PoolWorker-1: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-3: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-1: Wrote 1000 images to 
/tmp/madlib_KdLWwZ322f/cifar_10_test0010.tmp\n",
+      "PoolWorker-3: Wrote 1000 images to 
/tmp/madlib_kfSWAjQUxH/cifar_10_test0010.tmp\n",
+      "PoolWorker-3: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-1: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-1: Wrote 1000 images to 
/tmp/madlib_KdLWwZ322f/cifar_10_test0011.tmp\n",
+      "PoolWorker-1: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-2: Removed temporary directory PoolWorker-2\n",
+      "PoolWorker-4: Removed temporary directory PoolWorker-4\n",
+      "PoolWorker-5: Removed temporary directory PoolWorker-5\n",
+      "PoolWorker-3: Removed temporary directory PoolWorker-3\n",
+      "PoolWorker-1: Removed temporary directory PoolWorker-1\n",
+      "Done!  Loaded 50000 images in 50.5676851273s\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Save images to temporary directories and load into database\n",
+    "iloader.load_np_array_to_table(data_x, data_y, 'cifar_10_test', 
append=False, img_names=None)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Appending to table cifar_10_test in madlib db\n",
+      "Spawning 5 workers...\n",
+      "Initializing PoolWorker-6 [pid 279]\n",
+      "Initializing PoolWorker-7 [pid 280]\n",
+      "Initializing PoolWorker-8 [pid 281]\n",
+      "Initializing PoolWorker-9 [pid 284]\n",
+      "PoolWorker-6: Connected to madlib db.\n",
+      "Initializing PoolWorker-10 [pid 285]\n",
+      "PoolWorker-7: Connected to madlib db.\n",
+      "PoolWorker-8: Connected to madlib db.\n",
+      "PoolWorker-9: Connected to madlib db.\n",
+      "PoolWorker-10: Connected to madlib db.\n",
+      "PoolWorker-6: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-7: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-9: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-8: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-10: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-6: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-7: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-9: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-8: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-10: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-6: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-7: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-9: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-8: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-10: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-6: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-7: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-9: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-8: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-10: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-6: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-7: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-9: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-10: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-8: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-6: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-7: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-9: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-10: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-8: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-6: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-7: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-9: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-10: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-8: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-6: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-7: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-9: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-10: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-8: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-6: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-7: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-9: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-10: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-8: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-6: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-7: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-6: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-7: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-6: Loaded 1000 images into cifar_10_test\n",
+      "Done!  Loaded 50000 images in 18.1218080521s\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Same thing, but without writing out any temporary files; everything 
handled in-memory.\n",
+    "#   Should run about twice as fast.\n",
+    "\n",
+    "iloader.ROWS_PER_FILE = 1000  # Try adjusting this downward, if running 
low on memory\n",
+    "iloader.load_np_array_to_table(data_x, data_y, 'cifar_10_test', 
append=True, no_temp_files=True)"
 
 Review comment:
   We should mention that `no_temp_files` does not work at the moment.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

Reply via email to