This is an automated email from the ASF dual-hosted git repository. niketanpansare pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/systemml.git
The following commit(s) were added to refs/heads/master by this push: new 794c5a2 [SYSTEMML-540] Added performance tests for ResNet200 794c5a2 is described below commit 794c5a232a3f462e2a85836dea55570f102e1682 Author: Niketan Pansare <npan...@us.ibm.com> AuthorDate: Fri Mar 29 10:26:04 2019 -0700 [SYSTEMML-540] Added performance tests for ResNet200 These tests compare the effect of different eviction policies when training ResNet as well as performs baseline comparison with Unified Memory, TF and TF-GPU. --- scripts/perftest/gpu_resnet_perftest/resnet.py | 282 +++++++++++++++++++++++++ scripts/perftest/gpu_resnet_perftest/run.py | 219 +++++++++++++++++++ scripts/perftest/gpu_resnet_perftest/run.sh | 72 +++++++ 3 files changed, 573 insertions(+) diff --git a/scripts/perftest/gpu_resnet_perftest/resnet.py b/scripts/perftest/gpu_resnet_perftest/resnet.py new file mode 100644 index 0000000..a2e8514 --- /dev/null +++ b/scripts/perftest/gpu_resnet_perftest/resnet.py @@ -0,0 +1,282 @@ +# ------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# ------------------------------------------------------------- + +from __future__ import division + +import six +from keras.models import Model +from keras.layers import ( + Input, + Activation, + Dense, + Flatten +) +from keras.layers.convolutional import ( + Conv2D, + MaxPooling2D, + AveragePooling2D +) +from keras.layers.merge import add +from keras.layers.normalization import BatchNormalization +from keras.regularizers import l2 +from keras import backend as K + + +def _bn_relu(input): + """Helper to build a BN -> relu block + """ + norm = BatchNormalization(axis=CHANNEL_AXIS)(input) + return Activation("relu")(norm) + + +def _conv_bn_relu(**conv_params): + """Helper to build a conv -> BN -> relu block + """ + filters = conv_params["filters"] + kernel_size = conv_params["kernel_size"] + strides = conv_params.setdefault("strides", (1, 1)) + kernel_initializer = conv_params.setdefault("kernel_initializer", "he_normal") + padding = conv_params.setdefault("padding", "same") + kernel_regularizer = conv_params.setdefault("kernel_regularizer", l2(1.e-4)) + + def f(input): + conv = Conv2D(filters=filters, kernel_size=kernel_size, + strides=strides, padding=padding, + kernel_initializer=kernel_initializer, + kernel_regularizer=kernel_regularizer)(input) + return _bn_relu(conv) + + return f + + +def _bn_relu_conv(**conv_params): + """Helper to build a BN -> relu -> conv block. + This is an improved scheme proposed in http://arxiv.org/pdf/1603.05027v2.pdf + """ + filters = conv_params["filters"] + kernel_size = conv_params["kernel_size"] + strides = conv_params.setdefault("strides", (1, 1)) + kernel_initializer = conv_params.setdefault("kernel_initializer", "he_normal") + padding = conv_params.setdefault("padding", "same") + kernel_regularizer = conv_params.setdefault("kernel_regularizer", l2(1.e-4)) + + def f(input): + activation = _bn_relu(input) + return Conv2D(filters=filters, kernel_size=kernel_size, + strides=strides, padding=padding, + kernel_initializer=kernel_initializer, + kernel_regularizer=kernel_regularizer)(activation) + + return f + + +def _shortcut(input, residual): + """Adds a shortcut between input and residual block and merges them with "sum" + """ + # Expand channels of shortcut to match residual. + # Stride appropriately to match residual (width, height) + # Should be int if network architecture is correctly configured. + input_shape = K.int_shape(input) + residual_shape = K.int_shape(residual) + stride_width = int(round(input_shape[ROW_AXIS] / residual_shape[ROW_AXIS])) + stride_height = int(round(input_shape[COL_AXIS] / residual_shape[COL_AXIS])) + equal_channels = input_shape[CHANNEL_AXIS] == residual_shape[CHANNEL_AXIS] + + shortcut = input + # 1 X 1 conv if shape is different. Else identity. + if stride_width > 1 or stride_height > 1 or not equal_channels: + shortcut = Conv2D(filters=residual_shape[CHANNEL_AXIS], + kernel_size=(1, 1), + strides=(stride_width, stride_height), + padding="valid", + kernel_initializer="he_normal", + kernel_regularizer=l2(0.0001))(input) + + return add([shortcut, residual]) + + +def _residual_block(block_function, filters, repetitions, is_first_layer=False): + """Builds a residual block with repeating bottleneck blocks. + """ + def f(input): + for i in range(repetitions): + init_strides = (1, 1) + if i == 0 and not is_first_layer: + init_strides = (2, 2) + input = block_function(filters=filters, init_strides=init_strides, + is_first_block_of_first_layer=(is_first_layer and i == 0))(input) + return input + + return f + + +def basic_block(filters, init_strides=(1, 1), is_first_block_of_first_layer=False): + """Basic 3 X 3 convolution blocks for use on resnets with layers <= 34. + Follows improved proposed scheme in http://arxiv.org/pdf/1603.05027v2.pdf + """ + def f(input): + + if is_first_block_of_first_layer: + # don't repeat bn->relu since we just did bn->relu->maxpool + conv1 = Conv2D(filters=filters, kernel_size=(3, 3), + strides=init_strides, + padding="same", + kernel_initializer="he_normal", + kernel_regularizer=l2(1e-4))(input) + else: + conv1 = _bn_relu_conv(filters=filters, kernel_size=(3, 3), + strides=init_strides)(input) + + residual = _bn_relu_conv(filters=filters, kernel_size=(3, 3))(conv1) + return _shortcut(input, residual) + + return f + + +def bottleneck(filters, init_strides=(1, 1), is_first_block_of_first_layer=False): + """Bottleneck architecture for > 34 layer resnet. + Follows improved proposed scheme in http://arxiv.org/pdf/1603.05027v2.pdf + + Returns: + A final conv layer of filters * 4 + """ + def f(input): + + if is_first_block_of_first_layer: + # don't repeat bn->relu since we just did bn->relu->maxpool + conv_1_1 = Conv2D(filters=filters, kernel_size=(1, 1), + strides=init_strides, + padding="same", + kernel_initializer="he_normal", + kernel_regularizer=l2(1e-4))(input) + else: + conv_1_1 = _bn_relu_conv(filters=filters, kernel_size=(1, 1), + strides=init_strides)(input) + + conv_3_3 = _bn_relu_conv(filters=filters, kernel_size=(3, 3))(conv_1_1) + residual = _bn_relu_conv(filters=filters * 4, kernel_size=(1, 1))(conv_3_3) + return _shortcut(input, residual) + + return f + + +def _handle_dim_ordering(): + global ROW_AXIS + global COL_AXIS + global CHANNEL_AXIS + if K.image_dim_ordering() == 'tf': + ROW_AXIS = 1 + COL_AXIS = 2 + CHANNEL_AXIS = 3 + else: + CHANNEL_AXIS = 1 + ROW_AXIS = 2 + COL_AXIS = 3 + + +def _get_block(identifier): + if isinstance(identifier, six.string_types): + res = globals().get(identifier) + if not res: + raise ValueError('Invalid {}'.format(identifier)) + return res + return identifier + + +class ResnetBuilder(object): + @staticmethod + def build(input_shape, num_outputs, block_fn, repetitions): + """Builds a custom ResNet like architecture. + + Args: + input_shape: The input shape in the form (nb_channels, nb_rows, nb_cols) + num_outputs: The number of outputs at final softmax layer + block_fn: The block function to use. This is either `basic_block` or `bottleneck`. + The original paper used basic_block for layers < 50 + repetitions: Number of repetitions of various block units. + At each block unit, the number of filters are doubled and the input size is halved + + Returns: + The keras `Model`. + """ + _handle_dim_ordering() + if len(input_shape) != 3: + raise Exception("Input shape should be a tuple (nb_channels, nb_rows, nb_cols)") + + # Permute dimension order if necessary + if K.image_dim_ordering() == 'tf': + input_shape = (input_shape[1], input_shape[2], input_shape[0]) + + # Load function from str if needed. + block_fn = _get_block(block_fn) + + input = Input(shape=input_shape) + conv1 = _conv_bn_relu(filters=64, kernel_size=(7, 7), strides=(2, 2))(input) + pool1 = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), padding="same")(conv1) + + block = pool1 + filters = 64 + for i, r in enumerate(repetitions): + block = _residual_block(block_fn, filters=filters, repetitions=r, is_first_layer=(i == 0))(block) + filters *= 2 + + # Last activation + block = _bn_relu(block) + + # Classifier block + block_shape = K.int_shape(block) + pool2 = AveragePooling2D(pool_size=(block_shape[ROW_AXIS], block_shape[COL_AXIS]), + strides=(1, 1))(block) + flatten1 = Flatten()(pool2) + dense = Dense(units=num_outputs, kernel_initializer="he_normal", + activation="softmax")(flatten1) + + model = Model(inputs=input, outputs=dense) + return model + + @staticmethod + def build_resnet_18(input_shape, num_outputs): + return ResnetBuilder.build(input_shape, num_outputs, basic_block, [2, 2, 2, 2]) + + @staticmethod + def build_resnet_34(input_shape, num_outputs): + return ResnetBuilder.build(input_shape, num_outputs, basic_block, [3, 4, 6, 3]) + + @staticmethod + def build_resnet_50(input_shape, num_outputs): + return ResnetBuilder.build(input_shape, num_outputs, bottleneck, [3, 4, 6, 3]) + + @staticmethod + def build_resnet_101(input_shape, num_outputs): + return ResnetBuilder.build(input_shape, num_outputs, bottleneck, [3, 4, 23, 3]) + + @staticmethod + def build_resnet_152(input_shape, num_outputs): + return ResnetBuilder.build(input_shape, num_outputs, bottleneck, [3, 8, 36, 3]) + + @staticmethod + def build_resnet_200(input_shape, num_outputs): + return ResnetBuilder.build(input_shape, num_outputs, bottleneck, [3, 24, 36, 3]) + + @staticmethod + def build_resnet_1001(input_shape, num_outputs): + # TODO: From https://github.com/KaimingHe/resnet-1k-layers/blob/master/resnet-pre-act.lua + return ResnetBuilder.build(input_shape, num_outputs, bottleneck, [16, 64, 128, 256]) diff --git a/scripts/perftest/gpu_resnet_perftest/run.py b/scripts/perftest/gpu_resnet_perftest/run.py new file mode 100644 index 0000000..eb7cc14 --- /dev/null +++ b/scripts/perftest/gpu_resnet_perftest/run.py @@ -0,0 +1,219 @@ +# ------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# ------------------------------------------------------------- + +import time, os, argparse, sys, math +import numpy as np + +from pyspark import SparkContext +sc = SparkContext() +from pyspark.sql import SparkSession +spark = SparkSession.builder.getOrCreate() + +parser=argparse.ArgumentParser("Testing deep networks for different batches") +parser.add_argument('--network', type=str, default='vgg16', choices=['vgg16', 'vgg19', 'resnet200', 'resnet1001', 'unet']) +parser.add_argument('--allocator', type=str, default='cuda', choices=['cuda', 'unified_memory']) +parser.add_argument('--batch_size', help='Batch size. Default: 64', type=int, default=64) +parser.add_argument('--num_images', help='Number of images. Default: 2048', type=int, default=2048) +parser.add_argument('--eviction_policy', help='Eviction policy. Default: align_memory', type=str, default='align_memory', choices=['align_memory', 'lru', 'fifo', 'min_evict', 'lfu', 'mru']) +parser.add_argument('--framework', help='The framework to use for running the benchmark. Default: systemml', type=str, default='systemml', choices=['systemml', 'tensorflow', 'systemml_force_gpu', 'tensorflow-gpu']) +parser.add_argument('--num_channels', help='Number of channels. Default: 3', type=int, default=3) +parser.add_argument('--height', help='Height. Default: 224', type=int, default=224) +parser.add_argument('--width', help='Width. Default: 224', type=int, default=224) +args=parser.parse_args() + +####################################################################### +# Required to ensure that TF only uses exactly 1 GPU if framework is tensorflow-gpu, else no gpu +os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID' +os.environ['CUDA_VISIBLE_DEVICES'] = '0' +if args.framework == 'tensorflow-gpu': + os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID' + os.environ['CUDA_VISIBLE_DEVICES'] = '0' +else: + # Disable tensorflow from grabbing the entire GPU memory + os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID' + os.environ['CUDA_VISIBLE_DEVICES'] = '' +####################################################################### + +# To discount the transfer time of batches, we use one randomly generated batch +# and scale the number of epochs +batch_size = args.batch_size +num_images = args.num_images +num_images = num_images - int(num_images % batch_size) +n_batches_for_epoch = num_images / batch_size + +# Model-specific parameters +num_classes = 1000 +input_shape = (args.num_channels, args.height, args.width) +if args.network == 'unet' and (input_shape[0] != 1 or input_shape[1] != 256 or input_shape[2] != 256): + raise ValueError('Incorrect input shape for unet: ' + str(input_shape) + '. Supported input shape fo unet: (1, 256, 256)' ) +num_pixels = input_shape[0]*input_shape[1]*input_shape[2] + +import keras +from keras.utils import np_utils +from keras import backend as K +if args.framework.startswith('systemml'): + K.set_image_data_format('channels_first') +import os +import numpy as np +from keras.models import * +from keras.layers import Input, Conv2D, MaxPooling2D, UpSampling2D, Dropout, Cropping2D, concatenate # merge +from keras.optimizers import * + +##################################################################################### +# Ideally we would have preferred to compare the performance on double precision +# as SystemML's CPU backend only supports double precision. +# But since TF 1.7 crashes with double precision, we only test with single precision +use_double_precision = False +if use_double_precision: + K.set_floatx('float64') +if args.framework == 'tensorflow-gpu': + import tensorflow as tf + from keras.backend.tensorflow_backend import set_session + tf_config = tf.ConfigProto() + if args.allocator =='cuda': + tf_config.gpu_options.per_process_gpu_memory_fraction = 0.9 + elif args.allocator =='unified_memory': + tf_config.gpu_options.allow_growth = True + set_session(tf.Session(config=tf_config)) +##################################################################################### + +error_occured = False +print("Building model ... ") +if args.network == 'vgg16': + model = keras.applications.vgg16.VGG16(weights='imagenet', classes=num_classes) +elif args.network == 'vgg19': + model = keras.applications.vgg19.VGG19(weights='imagenet', classes=num_classes) +elif args.network == 'resnet200': + import resnet + model = resnet.ResnetBuilder.build_resnet_200(input_shape, num_classes) +elif args.network == 'resnet1001': + import resnet + model = resnet.ResnetBuilder.build_resnet_1001(input_shape, num_classes) +elif args.network == 'unet': + def conv3x3(input, num_filters): + conv = Conv2D(num_filters, 3, activation = 'relu', padding = 'same')(input) + conv = Conv2D(num_filters, 3, activation = 'relu', padding = 'same')(conv) + return conv + num_filters = [64, 128, 256, 512, 1024] + model_input = Input((input_shape[1], input_shape[2], input_shape[0])) + input = model_input + side_inputs = [] + for i in range(len(num_filters)): + # Apply max pooling for all except first down_conv + input = MaxPooling2D(pool_size=(2, 2))(input) if i != 0 else input + input = conv3x3(input, num_filters[i]) + # Apply dropouts to only last 2 down_conv + input = Dropout(0.5)(input) if i >= len(num_filters)-2 else input + side_inputs.append(input) + input = side_inputs.pop() + num_filters.pop() + for i in range(len(num_filters)): + filters = num_filters.pop() + input = Conv2D(filters, 3, activation = 'relu', padding = 'same')(UpSampling2D(size = (2,2))(input)) + #input = merge([side_inputs.pop(), input], mode = 'concat', concat_axis = 3) + input = concatenate([side_inputs.pop(), input]) + input = conv3x3(input, filters) + conv1 = Conv2D(2, 3, activation = 'relu', padding = 'same')(input) + model_output = Conv2D(1, 1, activation = 'sigmoid')(conv1) + model = Model(input = model_input, output = model_output) +else: + raise ValueError('Unsupported network:' + args.network) +if args.network == 'unet': + model.compile(optimizer = keras.optimizers.SGD(lr=1e-6, momentum=0.95, decay=5e-4, nesterov=True), loss = 'mean_squared_error') +else: + model.compile(loss='categorical_crossentropy', optimizer=keras.optimizers.SGD(lr=1e-6, momentum=0.95, decay=5e-4, nesterov=True)) + +#------------------------------------------------------------------------------------------ +# Use this for baseline experiments: +# Alternate way to avoid eviction is to perform multiple forward/backward pass, aggregate gradients and finally perform update. +looped_minibatch = False +local_batch_size = batch_size +if looped_minibatch: + if args.network == 'resnet200': + local_batch_size = 16 + else: + raise ValueError('looped_minibatch not yet implemented for ' + str(args.network)) + if batch_size % local_batch_size != 0: + raise ValueError('local_batch_size = ' + str(local_batch_size) + ' should be multiple of batch size=' + str(batch_size)) +#------------------------------------------------------------------------------------------ + +if args.framework.startswith('systemml'): + print("Initializing Keras2DML.") + from systemml.mllearn import Keras2DML + should_load_weights=False + sysml_model = Keras2DML(spark, model, load_keras_weights=should_load_weights, weights="tmp_weights1") + if looped_minibatch: + sysml_model.set(train_algo="looped_minibatch", parallel_batches=int(batch_size/local_batch_size), test_algo="batch") # systemml doesnot have a generator + sysml_model.set(weight_parallel_batches=False) + else: + sysml_model.set(train_algo="batch", test_algo="batch") + sysml_model.set(perform_fused_backward_update=True) + sysml_model.setStatistics(True).setStatisticsMaxHeavyHitters(100) + # Since this script is used for measuring performance and not for printing script, inline the nn library + sysml_model.set(inline_nn_library=True) + # For apples-to-apples comparison, donot force set the allocated array to 0 + sysml_model.setConfigProperty("sysml.gpu.force.memSetZero", "false") + # Use single GPU + sysml_model.setConfigProperty("sysml.gpu.availableGPUs", "0") + # Use user-specified allocator: cuda (default) or unified_memory + sysml_model.setConfigProperty("sysml.gpu.memory.allocator", args.allocator); + # Use user-specified eviction policy + sysml_model.setConfigProperty("sysml.gpu.eviction.policy", args.eviction_policy) + # Please consider allocating large enough JVM and using large CPU cache + sysml_model.setConfigProperty("sysml.gpu.eviction.shadow.bufferSize", "0.5") + sysml_model.setConfigProperty("sysml.caching.bufferSize", "1.0") + # Use user-specified precision + if not use_double_precision: + sysml_model.setConfigProperty("sysml.floating.point.precision", "single") + sysml_model.setGPU(True).setForceGPU(args.framework=='systemml_force_gpu') + Xb = np.random.uniform(0,1,num_pixels*batch_size) + Xb = Xb.reshape((batch_size, num_pixels)) + if args.network == 'unet': + yb = np.random.randint(5, size=num_pixels*batch_size).reshape((batch_size, num_pixels)) + sysml_model.set(perform_one_hot_encoding=False) + else: + yb = np.random.randint(num_classes, size=batch_size) + from py4j.protocol import Py4JJavaError + start = time.time() + try: + print("Invoking fit") + sysml_model.fit(Xb, yb, batch_size=local_batch_size, epochs=n_batches_for_epoch) + print("Done with fit") + except Py4JJavaError as e: + error_occured = True + print("Execution failed: " + str(e)) + except AttributeError as e1: + error_occured = True + print("Execution failed: " + str(e1)) +elif args.framework.startswith('tensorflow'): + Xb = np.random.randint(256, size=num_pixels*batch_size).reshape((batch_size, input_shape[1],input_shape[2], input_shape[0])) + 1 + if args.network == 'unet': + yb = np.random.randint(5, size=num_pixels*batch_size).reshape((batch_size, input_shape[1],input_shape[2], input_shape[0])) + else: + yb = np.random.randint(num_classes, size=batch_size) + yb = np_utils.to_categorical(yb, num_classes) + start = time.time() + model.fit(Xb, yb, batch_size=batch_size, epochs=n_batches_for_epoch) +K.clear_session() +end = time.time() +if not error_occured: + with open('time.txt', 'a') as f: + f.write(args.framework + ',' + args.network + ',synthetic_imagenet,1,' + str(batch_size) + ',1,' + str(num_images) + "," + str(end-start) + "," + args.eviction_policy + ',' + args.allocator + '\n') diff --git a/scripts/perftest/gpu_resnet_perftest/run.sh b/scripts/perftest/gpu_resnet_perftest/run.sh new file mode 100644 index 0000000..30187f1 --- /dev/null +++ b/scripts/perftest/gpu_resnet_perftest/run.sh @@ -0,0 +1,72 @@ +#!/bin/bash +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + +#rm -rf time.txt logs +#mkdir logs + +SPARK_HOME='/home/.../spark-2.3.0-bin-hadoop2.7' +DRIVER_MEMORY='200g' + +function compare_baseline { + network=$1 + num_images=$2 + batch_size=$3 + num_channels=$4 + height=$5 + width=$6 + allocator='unified_memory' + eviction_policy='lru' + for framework in tensorflow-gpu tensorflow systemml_force_gpu + do + echo "Running "$framework"_"$batch_size"_"$network"_"$num_images"_"$eviction_policy + rm -rf tmp_weights1 scratch_space spark-warehouse &> /dev/null + $SPARK_HOME/bin/spark-submit --driver-memory $DRIVER_MEMORY run.py --num_channels $num_channels --height $height --width $width --num_images $num_images --eviction_policy $eviction_policy --network $network --batch_size $batch_size --framework $framework --allocator $allocator &> logs/$framework"_"$batch_size"_"$network"_"$num_images"_"$eviction_policy"_"$allocator"_"$num_channels"_"$height"_"$width".log" + done +} + +function compare_eviction_policy { + network=$1 + num_images=$2 + batch_size=$3 + num_channels=$4 + height=$5 + width=$6 + framework='systemml_force_gpu' + allocator='cuda' + for eviction_policy in min_evict align_memory lru lfu + do + echo "Running "$framework"_"$batch_size"_"$network"_"$num_images"_"$eviction_policy + rm -rf tmp_weights1 scratch_space spark-warehouse &> /dev/null + $SPARK_HOME/bin/spark-submit --driver-memory $DRIVER_MEMORY run.py --num_channels $num_channels --height $height --width $width --num_images $num_images --eviction_policy $eviction_policy --network $network --batch_size $batch_size --framework $framework --allocator $allocator &> logs/$framework"_"$batch_size"_"$network"_"$num_images"_"$eviction_policy"_"$allocator"_"$num_channels"_"$height"_"$width".log" + done +} + +# Experiment 1: Very Deep ResNet-200 +compare_baseline resnet200 2 1 3 1792 1792 +compare_eviction_policy resnet200 2 1 3 1792 1792 + +# Experiment 2: Psuedo in-memory ResNet-200 +for b in 32 96 64 48 16 4 +do + compare_baseline resnet200 15360 $b 3 224 224 + compare_eviction_policy resnet200 15360 $b 3 224 224 +done \ No newline at end of file