kaknikhil commented on a change in pull request #399: DL: Enable transfer
learning
URL: https://github.com/apache/madlib/pull/399#discussion_r288759480
##########
File path:
src/ports/postgres/modules/deep_learning/madlib_keras_serializer.py_in
##########
@@ -16,68 +16,44 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
-
import numpy as np
-def get_model_shapes(model):
- model_shapes = []
- for a in model.get_weights():
- model_shapes.append(a.shape)
- return model_shapes
-
# TODO
# Current serializing logic
# serialized string -> byte string
-# np.array(np.array(loss, acc,
buff_count).concatenate(weights_np_array)).tostring()
+# np.array(np.array(image_count).concatenate(weights_np_array)).tostring()
# Proposed logic
-# loss , accuracy and image_count can be comma separated values
-# weights -> np.array.tostring()
+# image_count can be a separate value
+# weights -> np.array(weights).tostring()
# combine these 2 into one string by a random splitter
-# serialized string -> loss_splitter_acc_splitter_buffer_splitter_weights
+# serialized string -> imagecount_splitter_weights
-def deserialize_weights(model_state, model_shapes):
+def get_image_count_from_state(state):
"""
- Parameters:
- model_state: a stringified (serialized) state containing
- image_count and model_weights, passed from postgres
- model_shapes: a list of tuples containing the shapes of
- each element in keras.get_weights()
- Returns:
- image_count: the buffer count from state
- model_weights: a list of numpy arrays that can be inputted into
keras.set_weights()
+ :param state: bytestring serialized model state containing image count
+ and weights
+ :return: image count as float
"""
- if not model_state or not model_shapes:
- return None
- state = np.fromstring(model_state, dtype=np.float32)
+ image_count , _ = deserialize_image_1d_weights(state)
+ return image_count
- model_weights_serialized = state[1:]
- i, j, model_weights = 0, 0, []
- while j < len(model_shapes):
- next_pointer = i + reduce(lambda x, y: x * y, model_shapes[j])
- weight_arr_portion = model_weights_serialized[i:next_pointer]
- model_weights.append(weight_arr_portion.reshape(model_shapes[j]))
- i, j = next_pointer, j + 1
- #TODO: float(state[0]) is the image_count, which can be get from
- # get_image_count_from_state() we defined below, we should check if
- # we still need to return it here when refactoring
- return float(state[0]), model_weights
-
-def get_image_count_from_state(model_state):
- if not model_state:
- return None
- state = np.fromstring(model_state, dtype=np.float32)
- return float(state[0])
+def get_serialized_weights_from_state(state):
Review comment:
1. added a section named `workflow` at the top of the serializer file
2. renamed the functions
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services