orhankislal commented on a change in pull request #512:
URL: https://github.com/apache/madlib/pull/512#discussion_r468176111



##########
File path: src/ports/postgres/modules/deep_learning/madlib_keras_automl.py_in
##########
@@ -0,0 +1,143 @@
+# coding=utf-8
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import plpy
+import math
+
+from utilities.utilities import _assert
+from utilities.control import MinWarning
+
+class AutoMLSchema:
+    BRACKET = 's'
+    ROUND = 'i'
+    CONFIGURATIONS = 'n_i'
+    RESOURCES = 'r_i'
+
+@MinWarning("warning")
+class HyperbandSchedule():
+    """The utility class for loading a hyperband schedule table with algorithm 
inputs.
+
+    Attributes:
+        schedule_table (string): Name of output table containing hyperband 
schedule.
+        R (int): Maximum number of resources (iterations) that can be allocated
+  to a single configuration.
+        eta (int): Controls the proportion of configurations discarded in
+  each round of successive halving.
+        skip_last (int): The number of last rounds to skip.
+    """
+    def __init__(self, schedule_table, R, eta=3, skip_last=0):
+        self.schedule_table = schedule_table # table name to store hyperband 
schedule
+        self.R = R # maximum iterations/epochs allocated to a configuration
+        self.eta = eta # defines downsampling rate
+        self.skip_last = skip_last
+        self.validate_inputs()
+
+        # number of unique executions of Successive Halving (minus one)
+        self.s_max = int(math.floor(math.log(self.R, self.eta)))
+        self.validate_s_max()
+        # total number of resources/iterations (without reuse) per execution 
of Succesive Halving (n,r)
+        self.B = (self.s_max + 1) * self.R
+
+        self.schedule_vals = []
+
+        self.calculate_schedule()
+
+    def load(self):
+        """
+        The entry point for loading the hyperband schedule table.
+        """
+        self.create_schedule_table()
+        self.insert_into_schedule_table()
+
+    def validate_inputs(self):
+        """
+        Validates user input values
+        """
+        _assert(self.eta > 1, "DL: eta must be greater than 1")
+        _assert(self.R >= self.eta, "DL: R should not be less than eta")
+
+    def validate_s_max(self):
+        _assert(self.skip_last >= 0 and self.skip_last < self.s_max+1, "DL: 
skip_last must be " +
+                "non-negative and less than {0}".format(self.s_max))
+
+    def calculate_schedule(self):
+        """
+        Calculates the hyperband schedule (number of configs and allocated 
resources)
+        in each round of each bracket and skips the number of last rounds 
specified in 'skip_last'
+        """
+        for s in reversed(range(self.s_max+1)):
+            n = int(math.ceil(int(self.B/self.R/(s+1))*math.pow(self.eta, s))) 
# initial number of configurations
+            r = self.R * math.pow(self.eta, -s)
+
+            for i in range((s+1) - int(self.skip_last)):
+                # Computing each of the
+                n_i = n*math.pow(self.eta, -i)
+                r_i = r*math.pow(self.eta, i)
+
+                self.schedule_vals.append({AutoMLSchema.BRACKET: s,
+                                           AutoMLSchema.ROUND: i,
+                                           AutoMLSchema.CONFIGURATIONS: 
int(n_i),
+                                           AutoMLSchema.RESOURCES: 
int(round(r_i))})
+
+    def create_schedule_table(self):
+        """Initializes the output schedule table"""
+        create_query = """
+                        CREATE TABLE {self.schedule_table} (
+                            {s} INTEGER,
+                            {i} INTEGER,
+                            {n_i} INTEGER,
+                            {r_i} INTEGER,
+                            unique ({s}, {i})
+                        );
+                       """.format(self=self,
+                                  s=AutoMLSchema.BRACKET,
+                                  i=AutoMLSchema.ROUND,
+                                  n_i=AutoMLSchema.CONFIGURATIONS,
+                                  r_i=AutoMLSchema.RESOURCES)
+        with MinWarning('warning'):
+            plpy.execute(create_query)
+
+    def insert_into_schedule_table(self):
+        """Insert every thing in self.schedule_vals into the output schedule 
table."""

Review comment:
       every thing -> everything

##########
File path: src/ports/postgres/modules/deep_learning/madlib_keras_automl.py_in
##########
@@ -0,0 +1,143 @@
+# coding=utf-8
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import plpy
+import math
+
+from utilities.utilities import _assert
+from utilities.control import MinWarning
+
+class AutoMLSchema:
+    BRACKET = 's'
+    ROUND = 'i'
+    CONFIGURATIONS = 'n_i'
+    RESOURCES = 'r_i'
+
+@MinWarning("warning")
+class HyperbandSchedule():
+    """The utility class for loading a hyperband schedule table with algorithm 
inputs.
+
+    Attributes:
+        schedule_table (string): Name of output table containing hyperband 
schedule.
+        R (int): Maximum number of resources (iterations) that can be allocated
+  to a single configuration.
+        eta (int): Controls the proportion of configurations discarded in
+  each round of successive halving.
+        skip_last (int): The number of last rounds to skip.
+    """
+    def __init__(self, schedule_table, R, eta=3, skip_last=0):
+        self.schedule_table = schedule_table # table name to store hyperband 
schedule
+        self.R = R # maximum iterations/epochs allocated to a configuration
+        self.eta = eta # defines downsampling rate
+        self.skip_last = skip_last
+        self.validate_inputs()
+
+        # number of unique executions of Successive Halving (minus one)
+        self.s_max = int(math.floor(math.log(self.R, self.eta)))
+        self.validate_s_max()
+        # total number of resources/iterations (without reuse) per execution 
of Succesive Halving (n,r)
+        self.B = (self.s_max + 1) * self.R
+
+        self.schedule_vals = []
+
+        self.calculate_schedule()
+
+    def load(self):
+        """
+        The entry point for loading the hyperband schedule table.
+        """
+        self.create_schedule_table()
+        self.insert_into_schedule_table()
+
+    def validate_inputs(self):
+        """
+        Validates user input values
+        """
+        _assert(self.eta > 1, "DL: eta must be greater than 1")
+        _assert(self.R >= self.eta, "DL: R should not be less than eta")
+
+    def validate_s_max(self):
+        _assert(self.skip_last >= 0 and self.skip_last < self.s_max+1, "DL: 
skip_last must be " +
+                "non-negative and less than {0}".format(self.s_max))
+
+    def calculate_schedule(self):
+        """
+        Calculates the hyperband schedule (number of configs and allocated 
resources)
+        in each round of each bracket and skips the number of last rounds 
specified in 'skip_last'
+        """
+        for s in reversed(range(self.s_max+1)):
+            n = int(math.ceil(int(self.B/self.R/(s+1))*math.pow(self.eta, s))) 
# initial number of configurations

Review comment:
       It seems we calculate `self.B = (self.s_max + 1) * self.R` and then only 
use it in `self.B/self.R`. We might as well use `(self.s_max + 1)`




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to