[madlib-site] branch asf-site updated (a283758 -> 3b665af)
This is an automated email from the ASF dual-hosted git repository. fmcquillan pushed a change to branch asf-site in repository https://gitbox.apache.org/repos/asf/madlib-site.git. from a283758 update Load-model-architecture-v1.ipynb with faster way to load model weights - minor fix new 6a530b1 Disable --no-temp-files|-m option, since it doesn't work new 3b665af Update demo notebook The 2 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference. Summary of changes: .../Deep-learning/Load-images-v1.ipynb | 20 --- .../Deep-learning/madlib_image_loader.py | 42 +- 2 files changed, 23 insertions(+), 39 deletions(-)
[madlib-site] 01/02: Disable --no-temp-files|-m option, since it doesn't work
This is an automated email from the ASF dual-hosted git repository. fmcquillan pushed a commit to branch asf-site in repository https://gitbox.apache.org/repos/asf/madlib-site.git commit 6a530b1b23b609aefd2dc5cb3ca9098ea7849c81 Author: Domino Valdano AuthorDate: Mon Aug 26 11:58:42 2019 -0700 Disable --no-temp-files|-m option, since it doesn't work --- .../Deep-learning/madlib_image_loader.py | 42 +- 1 file changed, 16 insertions(+), 26 deletions(-) diff --git a/community-artifacts/Deep-learning/madlib_image_loader.py b/community-artifacts/Deep-learning/madlib_image_loader.py index 09a170d..1dc45b3 100755 --- a/community-artifacts/Deep-learning/madlib_image_loader.py +++ b/community-artifacts/Deep-learning/madlib_image_loader.py @@ -54,7 +54,7 @@ # 2a. Perform parallel image loading from numpy arrays: # # iloader.load_dataset_from_np(data_x, data_y, table_name, -#append=False, no_temp_files=False) +#append=False) # # data_x contains image data in np.array format, and data_y is a 1D np.array # of the image categories (labels). @@ -73,18 +73,12 @@ # name instead. This avoids needing to pass the table_name again every # time, but also allows it to be changed at any time. # -# EXPERIMENTAL: If no_temp_files=True, the operation will happen without -# writing out the tables to temporary files before loading them. -# Instead, an in-memory filelike buffer (StringIO) will be used -# to build the tables before loading. Currently not working, -# for unknown reason. -# # or, # # 2b. Perform parallel image loading from disk: # # load_dataset_from_disk(self, root_dir, table_name, num_labels='all', -# append=False, no_temp_files=False): +# append=False): # # Calling this function instead will look in root_dir on the local disk of # wherever this is being run. It will skip over any files in that @@ -93,7 +87,7 @@ # where the name of each subdirectory is the label for the images # contained within it. # -# The table_name, append, and no_temp_files parameters are the same as +# The table_name and append parameters are the same as described # above. num_labels is an optional parameter which can be used to # restrict the number of labels (image classes) loaded, even if more # are found in root_dir. For example, for a large dataset you may @@ -107,7 +101,7 @@ # # usage: madlib_image_loader.py [-h] [-r ROOT_DIR] [-n NUM_LABELS] [-d DB_NAME] # [-a] [-w NUM_WORKERS] [-p PORT] [-U USERNAME] -# [-t HOST] [-P PASSWORD] [-m] +# [-t HOST] [-P PASSWORD] # table_name # # positional arguments: @@ -247,7 +241,7 @@ class ImageLoader: self.table_name = table_name self.root_dir = None self.pool = None -self.no_temp_files = None +self.no_temp_files = False global iloader # Singleton per process iloader = self @@ -435,7 +429,7 @@ class ImageLoader: self.db_close() def load_dataset_from_np(self, data_x, data_y, table_name=None, - append=False, no_temp_files=False): + append=False): """ Loads a numpy array into db. For append=False, creates a new table and loads the data. For append=True, appends data to existing table. @@ -450,14 +444,12 @@ class ImageLoader: @table_name Name of table in db to load data into @append Whether to create a new table (False) or append to an existing one (True). If unspecified, default is False -@no_temp_files If specified, no temporary files are written--all -operations are performed in-memory. - """ start_time = time.time() self.mother = True self.from_disk = False self.append = append + if table_name: self.table_name = table_name @@ -477,7 +469,7 @@ class ImageLoader: initargs=(current_process().pid, self.table_name, self.append, - no_temp_files, + False, self.db_creds, False)) @@ -539,7 +531,7 @@ class ImageLoader: _call_np_worker(data) def load_dataset_from_disk(self, root_dir, table_name, num_labels='all', - append=False, no_temp_files=False): + append=False): """ Load images from disk in
[madlib-site] 02/02: Update demo notebook
This is an automated email from the ASF dual-hosted git repository. fmcquillan pushed a commit to branch asf-site in repository https://gitbox.apache.org/repos/asf/madlib-site.git commit 3b665af751366e349420d72fcf1505744fb129cc Author: Domino Valdano AuthorDate: Mon Aug 26 13:04:12 2019 -0700 Update demo notebook --- .../Deep-learning/Load-images-v1.ipynb | 20 +++- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/community-artifacts/Deep-learning/Load-images-v1.ipynb b/community-artifacts/Deep-learning/Load-images-v1.ipynb index 15aa948..3209aaf 100644 --- a/community-artifacts/Deep-learning/Load-images-v1.ipynb +++ b/community-artifacts/Deep-learning/Load-images-v1.ipynb @@ -193,7 +193,7 @@ "\n", "# 2. Fetch images then load NumPy array into table\n", "\n", -"iloader.load_dataset_from_np(data_x, data_y, table_name, append=False, no_temp_files=False)\n", +"iloader.load_dataset_from_np(data_x, data_y, table_name, append=False)\n", "\n", "- data_x contains image data in np.array format\n", "\n", @@ -204,13 +204,7 @@ "- If the user passes a table_name while creating ImageLoader object, it will be used for all further calls to load_dataset_from_np. It can be changed by passing it as a parameter during the actual call to load_dataset_from_np, and if so future calls will load to that table name instead. This avoids needing to pass the table_name again every time, but also allows it to be changed at any time.\n", "\n", " \n", -"- append=False attempts to create a new table, while append=True appends more images to an existing table.\n", -"\n", -"\n", -"- EXPERIMENTAL: If no_temp_files=True, the operation will happen without\n", -" writing out the tables to temporary files before loading them.\n", -" Instead, an in-memory filelike buffer (StringIO) will be used\n", -" to build the tables before loading." +"- append=False attempts to create a new table, while append=True appends more images to an existing table." ] }, { @@ -420,8 +414,8 @@ "%sql DROP TABLE IF EXISTS cifar_10_train_data, cifar_10_test_data;\n", "\n", "# Save images to temporary directories and load into database\n", -"iloader.load_dataset_from_np(x_train, y_train, 'cifar_10_train_data', append=False, no_temp_files=False)\n", -"iloader.load_dataset_from_np(x_test, y_test, 'cifar_10_test_data', append=False, no_temp_files=False)" +"iloader.load_dataset_from_np(x_train, y_train, 'cifar_10_train_data', append=False)\n", +"iloader.load_dataset_from_np(x_test, y_test, 'cifar_10_test_data', append=False)" ] }, { @@ -434,12 +428,12 @@ "Uses the Python Imaging Library so supports multiple formats\n", "http://www.pythonware.com/products/pil/\n";, "\n", -"load_dataset_from_disk(root_dir, table_name, num_labels='all', append=False, no_temp_files=False)\n", +"load_dataset_from_disk(root_dir, table_name, num_labels='all', append=False)\n", "\n", "- Calling this function will look in root_dir on the local disk of wherever this is being run. It will skip over any files in that directory, but will load images contained in each of its subdirectories. The images should be organized by category/class, where the name of each subdirectory is the label for the images contained within it.\n", "\n", "\n", -"- The table_name, append, and no_temp_files parameters are the same as above The parameter num_labels is an optional parameter which can be used to restrict the number of labels (image classes) loaded, even if more are found in root_dir. For example, for a large dataset you may have hundreds of labels, but only wish to use a subset of that containing a few dozen.\n", +"- The table_name and append parameters are the same as above The parameter num_labels is an optional parameter which can be used to restrict the number of labels (image classes) loaded, even if more are found in root_dir. For example, for a large dataset you may have hundreds of labels, but only wish to use a subset of that containing a few dozen.\n", "\n", "For example, if we put the CIFAR-10 training data is in 10 subdirectories under directory cifar10, with one subdirectory for each class:" ] @@ -596,7 +590,7 @@ "source": [ "%sql drop table if exists cifar_10_train_data_filesystem;\n", "# Load images from file system\n", -"iloader.load_dataset_from_disk('/Users/fmcquillan/tmp/cifar10', 'cifar_10_train_data_filesystem', num_labels='all', append=False, no_temp_files=False)" +"iloader.load_dataset_from_disk('/Users/fmcquillan/tmp/cifar10', 'cifar_10_train_data_filesystem', num_labels='all', append=False)" ] }, {
[madlib-site] branch asf-site updated: updated 3 notebooks with minor changes
This is an automated email from the ASF dual-hosted git repository. fmcquillan pushed a commit to branch asf-site in repository https://gitbox.apache.org/repos/asf/madlib-site.git The following commit(s) were added to refs/heads/asf-site by this push: new 388c4b3 updated 3 notebooks with minor changes 388c4b3 is described below commit 388c4b34a08c96e059baca60f17350db327491ca Author: Frank McQuillan AuthorDate: Tue Aug 27 13:28:24 2019 -0700 updated 3 notebooks with minor changes --- .../Deep-learning/Load-images-v1.ipynb | 571 +++-- .../Deep-learning/Load-model-architecture-v1.ipynb | 228 ++-- .../MADlib-Keras-cifar10-cnn-v2.ipynb | 4 +- 3 files changed, 486 insertions(+), 317 deletions(-) diff --git a/community-artifacts/Deep-learning/Load-images-v1.ipynb b/community-artifacts/Deep-learning/Load-images-v1.ipynb index 3209aaf..1750cfc 100644 --- a/community-artifacts/Deep-learning/Load-images-v1.ipynb +++ b/community-artifacts/Deep-learning/Load-images-v1.ipynb @@ -134,35 +134,14 @@ "source": [ "import sys\n", "import os\n", -"from keras.datasets import cifar10" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ +"from keras.datasets import cifar10\n", +"\n", "madlib_site_dir = '/Users/fmcquillan/Documents/Product/MADlib/Demos/data'\n", -"sys.path.append(madlib_site_dir)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ +"sys.path.append(madlib_site_dir)\n", +"\n", "# Import image loader module\n", -"from madlib_image_loader import ImageLoader, DbCredentials" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ +"from madlib_image_loader import ImageLoader, DbCredentials\n", +"\n", "# Specify database credentials, for connecting to db\n", "#db_creds = DbCredentials(user='gpadmin',\n", "# host='35.239.240.26',\n", @@ -173,15 +152,8 @@ "db_creds = DbCredentials(user='fmcquillan',\n", " host='localhost',\n", " port='5432',\n", -" password='')" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [], - "source": [ +" password='')\n", +"\n", "# Initialize ImageLoader (increase num_workers to run faster)\n", "iloader = ImageLoader(num_workers=5, db_creds=db_creds)" ] @@ -201,15 +173,12 @@ "- data_y is a 1D np.array of the image categories (labels).\n", "\n", "\n", -"- If the user passes a table_name while creating ImageLoader object, it will be used for all further calls to load_dataset_from_np. It can be changed by passing it as a parameter during the actual call to load_dataset_from_np, and if so future calls will load to that table name instead. This avoids needing to pass the table_name again every time, but also allows it to be changed at any time.\n", -"\n", -" \n", -"- append=False attempts to create a new table, while append=True appends more images to an existing table." +"- If the user passes a table_name while creating ImageLoader object, it will be used for all further calls to load_dataset_from_np. It can be changed by passing it as a parameter during the actual call to load_dataset_from_np, and if so future calls will load to that table name instead. This avoids needing to pass the table_name again every time, but also allows it to be changed at any time." ] }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -219,7 +188,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 6, "metadata": {}, "outputs": [ { @@ -232,180 +201,180 @@ "CREATE TABLE\n", "Created table cifar_10_train_data in madlib db\n", "Spawning 5 workers...\n", - "Initializing PoolWorker-1 [pid 32578]\n", - "PoolWorker-1: Created temporary directory /tmp/madlib_7llkr1iwrB\n", - "Initializing PoolWorker-2 [pid 32579]\n", - "PoolWorker-2: Created temporary directory /tmp/madlib_gFBeKWnLLL\n", - "Initializing PoolWorker-3 [pid 32580]\n", - "PoolWorker-3: Created temporary directory /tmp/madlib_3EqOi1Elxy\n", + "Initializing PoolWorker-1 [pid 82412]\n", + "PoolWorker-1: Created temporary directory /tmp/madlib_Bt85aChbv0\n", + "Initializing PoolWorker-2 [pid 82413]\n", + "PoolWorker-2: Created temporary directory /tmp/madlib_cSyCSiEhHT\n", + "Initializing PoolWorker-3 [pid 82414]\n", + "PoolWorker-3: Created temporary directory /tmp/madlib_uvtHjGCU5S\n", "PoolWorker-1: Connected t