This is an automated email from the ASF dual-hosted git repository. niketanpansare pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/systemml.git
The following commit(s) were added to refs/heads/master by this push: new 5288bc0 [SYSTEMML-540] Make Keras2DML compatible with newer Keras versions 5288bc0 is described below commit 5288bc0d536df0574b17363d950e05b3c4bbe0d4 Author: Niketan Pansare <npan...@us.ibm.com> AuthorDate: Fri Feb 1 16:52:57 2019 -0800 [SYSTEMML-540] Make Keras2DML compatible with newer Keras versions - After version 2.1.5, Keras had major refactoring which changed their layer definitions. - In version 2.2.4, the model no longer contains an explicit InputLayer. - This commit addresses this issue so as to be compatible with older as well as newer Keras versions. --- src/main/python/systemml/mllearn/keras2caffe.py | 108 ++++++++++++++---------- 1 file changed, 64 insertions(+), 44 deletions(-) diff --git a/src/main/python/systemml/mllearn/keras2caffe.py b/src/main/python/systemml/mllearn/keras2caffe.py index 6e1e9c3..a06113c 100755 --- a/src/main/python/systemml/mllearn/keras2caffe.py +++ b/src/main/python/systemml/mllearn/keras2caffe.py @@ -106,7 +106,7 @@ str_keys = ['name', 'type', 'top', 'bottom'] def toKV(key, value): return str(key) + ': "' + str(value) + \ - '"' if key in str_keys else str(key) + ': ' + str(value) + '"' if key in str_keys else str(key) + ': ' + str(value) def _parseJSONObject(obj): @@ -143,7 +143,8 @@ def _parseActivation(layer, customLayerName=None): 'type': supportedCaffeActivations[kerasActivation], 'top': layer.name, 'bottom': layer.name}} else: return {'layer': {'name': layer.name, - 'type': supportedCaffeActivations[kerasActivation], 'top': layer.name, 'bottom': _getBottomLayers(layer)}} + 'type': supportedCaffeActivations[kerasActivation], 'top': layer.name, + 'bottom': _getBottomLayers(layer)}} def _shouldParseActivation(layer): @@ -184,8 +185,10 @@ def _parseBatchNorm(layer): bnName = layer.name + '_1' config = layer.get_config() bias_term = 'true' if config['center'] else 'false' - return [{'layer': {'name': bnName, 'type': 'BatchNorm', 'bottom': _getBottomLayers(layer), 'top': bnName, 'batch_norm_param': {'moving_average_fraction': layer.momentum, 'eps': layer.epsilon}}}, { - 'layer': {'name': layer.name, 'type': 'Scale', 'bottom': bnName, 'top': layer.name, 'scale_param': {'bias_term': bias_term}}}] + return [{'layer': {'name': bnName, 'type': 'BatchNorm', 'bottom': _getBottomLayers(layer), 'top': bnName, + 'batch_norm_param': {'moving_average_fraction': layer.momentum, 'eps': layer.epsilon}}}, { + 'layer': {'name': layer.name, 'type': 'Scale', 'bottom': bnName, 'top': layer.name, + 'scale_param': {'bias_term': bias_term}}}] # The special are redirected to their custom parse function in _parseKerasLayer @@ -206,7 +209,8 @@ def getConvParam(layer): 0] config = layer.get_config() return {'num_output': layer.filters, 'bias_term': str(config['use_bias']).lower( - ), 'kernel_h': layer.kernel_size[0], 'kernel_w': layer.kernel_size[1], 'stride_h': stride[0], 'stride_w': stride[1], 'pad_h': padding[0], 'pad_w': padding[1]} + ), 'kernel_h': layer.kernel_size[0], 'kernel_w': layer.kernel_size[1], 'stride_h': stride[0], 'stride_w': stride[1], + 'pad_h': padding[0], 'pad_w': padding[1]} def getUpSamplingParam(layer): @@ -227,11 +231,11 @@ def getPoolingParam(layer, pool='MAX'): def getRecurrentParam(layer): - if(not layer.use_bias): + if (not layer.use_bias): raise Exception('Only use_bias=True supported for recurrent layers') - if(keras.activations.serialize(layer.activation) != 'tanh'): + if (keras.activations.serialize(layer.activation) != 'tanh'): raise Exception('Only tanh activation supported for recurrent layers') - if(layer.dropout != 0 or layer.recurrent_dropout != 0): + if (layer.dropout != 0 or layer.recurrent_dropout != 0): raise Exception('Only dropout not supported for recurrent layers') return {'num_output': layer.units, 'return_sequences': str( layer.return_sequences).lower()} @@ -242,27 +246,27 @@ layerParamMapping = { keras.layers.InputLayer: lambda l: {'data_param': {'batch_size': l.batch_size}}, keras.layers.Dense: lambda l: - {'inner_product_param': {'num_output': l.units}}, + {'inner_product_param': {'num_output': l.units}}, keras.layers.Dropout: lambda l: - {'dropout_param': {'dropout_ratio': l.rate}}, + {'dropout_param': {'dropout_ratio': l.rate}}, keras.layers.Add: lambda l: - {'eltwise_param': {'operation': 'SUM'}}, + {'eltwise_param': {'operation': 'SUM'}}, keras.layers.Concatenate: lambda l: - {'concat_param': {'axis': _getCompensatedAxis(l)}}, + {'concat_param': {'axis': _getCompensatedAxis(l)}}, keras.layers.Conv2DTranspose: lambda l: - {'convolution_param': getConvParam(l)}, + {'convolution_param': getConvParam(l)}, keras.layers.UpSampling2D: lambda l: - {'upsample_param': getUpSamplingParam(l)}, + {'upsample_param': getUpSamplingParam(l)}, keras.layers.Conv2D: lambda l: - {'convolution_param': getConvParam(l)}, + {'convolution_param': getConvParam(l)}, keras.layers.MaxPooling2D: lambda l: - {'pooling_param': getPoolingParam(l, 'MAX')}, + {'pooling_param': getPoolingParam(l, 'MAX')}, keras.layers.AveragePooling2D: lambda l: - {'pooling_param': getPoolingParam(l, 'AVE')}, + {'pooling_param': getPoolingParam(l, 'AVE')}, keras.layers.SimpleRNN: lambda l: - {'recurrent_param': getRecurrentParam(l)}, + {'recurrent_param': getRecurrentParam(l)}, keras.layers.LSTM: lambda l: - {'recurrent_param': getRecurrentParam(l)}, + {'recurrent_param': getRecurrentParam(l)}, } @@ -305,7 +309,7 @@ def _appendKerasLayers(fileHandle, kerasLayers, batch_size): def lossLayerStr(layerType, bottomLayer): return 'layer {\n name: "loss"\n type: "' + layerType + \ - '"\n bottom: "' + bottomLayer + '"\n bottom: "label"\n top: "loss"\n}\n' + '"\n bottom: "' + bottomLayer + '"\n bottom: "label"\n top: "loss"\n}\n' def _appendKerasLayerWithoutActivation(fileHandle, layer, batch_size): @@ -327,40 +331,55 @@ def _getExactlyOneBottomLayer(layer): def _isMeanSquaredError(loss): return loss == 'mean_squared_error' or loss == 'mse' or loss == 'MSE' +def _appendInputLayerIfNecessary(kerasModel): + """ Append an Input layer if not present: required for versions 2.1.5 (works with 2.1.5, but not with 2.2.4) and return all the layers """ + input_layer = [] + if not any([isinstance(l, keras.layers.InputLayer) for l in kerasModel.layers]): + input_name = kerasModel.layers[0]._inbound_nodes[0].inbound_layers[0].name + input_shape = kerasModel.layers[0].input_shape + input_layer = [keras.layers.InputLayer(name=input_name, input_shape=input_shape)] + return input_layer + kerasModel.layers + +def _throwLossException(loss, lastLayerActivation=None): + if lastLayerActivation is not None: + activationMsg = ' (where last layer activation ' + lastLayerActivation + ')' + else: + activationMsg = '' + raise Exception('Unsupported loss layer ' + str(loss) + activationMsg) def convertKerasToCaffeNetwork( kerasModel, outCaffeNetworkFilePath, batch_size): _checkIfValid(kerasModel.layers, lambda layer: False if type( layer) in supportedLayers else True, 'Unsupported Layers:') with open(outCaffeNetworkFilePath, 'w') as f: + layers = _appendInputLayerIfNecessary(kerasModel) # Write the parsed layers for all but the last layer - _appendKerasLayers(f, kerasModel.layers[:-1], batch_size) + _appendKerasLayers(f, layers[:-1], batch_size) # Now process the last layer with loss - lastLayer = kerasModel.layers[-1] + lastLayer = layers[-1] if _isMeanSquaredError(kerasModel.loss): + # No need to inspect the last layer, just append EuclideanLoss after writing the last layer _appendKerasLayers(f, [lastLayer], batch_size) f.write(lossLayerStr('EuclideanLoss', lastLayer.name)) elif kerasModel.loss == 'categorical_crossentropy': - _appendKerasLayerWithoutActivation(f, lastLayer, batch_size) - bottomLayer = _getExactlyOneBottomLayer(lastLayer) if isinstance( - lastLayer, keras.layers.Activation) else lastLayer.name - lastLayerActivation = str( - keras.activations.serialize( - lastLayer.activation)) - if lastLayerActivation == 'softmax' and kerasModel.loss == 'categorical_crossentropy': - f.write(lossLayerStr('SoftmaxWithLoss', bottomLayer)) + # Three cases: + if isinstance(lastLayer, keras.layers.Softmax): + # Case 1: Last layer is a softmax. + f.write(lossLayerStr('SoftmaxWithLoss', _getExactlyOneBottomLayer(lastLayer))) else: - raise Exception('Unsupported loss layer ' + - str(kerasModel.loss) + - ' (where last layer activation ' + - lastLayerActivation + - ').') + lastLayerActivation = str(keras.activations.serialize(lastLayer.activation)) + if lastLayerActivation == 'softmax' and kerasModel.loss == 'categorical_crossentropy': + # Case 2: Last layer activation is softmax. + # First append the last layer without its activation and then append SoftmaxWithLoss + bottomLayer = _getExactlyOneBottomLayer(lastLayer) if isinstance( + lastLayer, keras.layers.Activation) else lastLayer.name + _appendKerasLayerWithoutActivation(f, lastLayer, batch_size) + f.write(lossLayerStr('SoftmaxWithLoss', bottomLayer)) + else: + # Case 3: Last layer activation is not softmax => Throw error + _throwLossException(kerasModel.loss, lastLayerActivation) else: - raise Exception('Unsupported loss layer ' + - str(kerasModel.loss) + - ' (where last layer activation ' + - lastLayerActivation + - ').') + _throwLossException(kerasModel.loss) def getNumPyMatrixFromKerasWeight(param): @@ -387,7 +406,8 @@ def evaluateValue(val): def convertKerasToCaffeSolver(kerasModel, caffeNetworkFilePath, outCaffeSolverFilePath, - max_iter, test_iter, test_interval, display, lr_policy, weight_decay, regularization_type): + max_iter, test_iter, test_interval, display, lr_policy, weight_decay, + regularization_type): if isinstance(kerasModel.optimizer, keras.optimizers.SGD): solver = 'type: "Nesterov"\n' if kerasModel.optimizer.nesterov else 'type: "SGD"\n' elif isinstance(kerasModel.optimizer, keras.optimizers.Adagrad): @@ -489,10 +509,10 @@ def convertKerasToSystemMLModel(spark, kerasModel, outDirectory): layer.name + '_1_bias'] for i in range(len(inputMatrices)): dmlLines = dmlLines + \ - ['write(' + potentialVar[i] + ', "' + outDirectory + - '/' + potentialVar[i] + '.mtx", format="binary");\n'] + ['write(' + potentialVar[i] + ', "' + outDirectory + + '/' + potentialVar[i] + '.mtx", format="binary");\n'] mat = inputMatrices[i].transpose() if ( - i == 1 and type(layer) in biasToTranspose) else inputMatrices[i] + i == 1 and type(layer) in biasToTranspose) else inputMatrices[i] py4j.java_gateway.get_method(script_java, "in")( potentialVar[i], convertToMatrixBlock(sc, mat)) script_java.setScriptString(''.join(dmlLines))