[
https://issues.apache.org/jira/browse/BEAM-14255?focusedWorklogId=772634&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-772634
]
ASF GitHub Bot logged work on BEAM-14255:
-----------------------------------------
Author: ASF GitHub Bot
Created on: 20/May/22 00:01
Start Date: 20/May/22 00:01
Worklog Time Spent: 10m
Work Description: TheNeuralBit commented on code in PR #17671:
URL: https://github.com/apache/beam/pull/17671#discussion_r877623442
##########
sdks/python/apache_beam/ml/inference/base_test.py:
##########
@@ -37,33 +37,33 @@ def predict(self, example: int) -> int:
class FakeInferenceRunner(base.InferenceRunner):
def __init__(self, clock=None):
- self._mock_clock = clock
+ self._fake_clock = clock
def run_inference(self, batch: Any, model: Any) -> Iterable[Any]:
- if self._mock_clock:
- self._mock_clock.current_time += 3000
+ if self._fake_clock:
+ self._fake_clock.current_time += 0.003 # 3 milliseconds
for example in batch:
yield model.predict(example)
class FakeModelLoader(base.ModelLoader):
def __init__(self, clock=None):
- self._mock_clock = clock
+ self._fake_clock = clock
def load_model(self):
- if self._mock_clock:
- self._mock_clock.current_time += 50000
+ if self._fake_clock:
+ self._fake_clock.current_time += 0.5
return FakeModel()
def get_inference_runner(self):
- return FakeInferenceRunner(self._mock_clock)
+ return FakeInferenceRunner(self._fake_clock)
-class MockClock(base._Clock):
+class FakeClock:
def __init__(self):
- self.current_time = 10000
+ self.current_time = 10.0
- def get_current_time_in_microseconds(self) -> int:
+ def time(self) -> int:
Review Comment:
nit:
```suggestion
def time(self) -> float:
```
##########
sdks/python/apache_beam/ml/inference/base.py:
##########
@@ -150,27 +155,24 @@ def update(
class _RunInferenceDoFn(beam.DoFn):
"""A DoFn implementation generic to frameworks."""
- def __init__(self, model_loader: ModelLoader, clock=None):
+ def __init__(self, model_loader: ModelLoader, clock):
self._model_loader = model_loader
self._inference_runner = model_loader.get_inference_runner()
self._shared_model_handle = shared.Shared()
self._metrics_collector = _MetricsCollector(
self._inference_runner.get_metrics_namespace())
self._clock = clock
- if not clock:
- self._clock = _ClockFactory.make_clock()
self._model = None
def _load_model(self):
def load():
"""Function for constructing shared LoadedModel."""
memory_before = _get_current_process_memory_in_bytes()
- start_time = self._clock.get_current_time_in_microseconds()
+ start_time = _to_milliseconds(self._clock.time())
model = self._model_loader.load_model()
- end_time = self._clock.get_current_time_in_microseconds()
+ end_time = _to_milliseconds(self._clock.time())
Review Comment:
I see, you might consider using time.time_ns then to avoid floating point
precision issues when measuring a single millisecond delta.
##########
sdks/python/apache_beam/ml/inference/base_test.py:
##########
@@ -37,33 +37,33 @@ def predict(self, example: int) -> int:
class FakeInferenceRunner(base.InferenceRunner):
def __init__(self, clock=None):
- self._mock_clock = clock
+ self._fake_clock = clock
def run_inference(self, batch: Any, model: Any) -> Iterable[Any]:
- if self._mock_clock:
- self._mock_clock.current_time += 3000
+ if self._fake_clock:
+ self._fake_clock.current_time += 0.003 # 3 milliseconds
for example in batch:
yield model.predict(example)
class FakeModelLoader(base.ModelLoader):
def __init__(self, clock=None):
- self._mock_clock = clock
+ self._fake_clock = clock
def load_model(self):
- if self._mock_clock:
- self._mock_clock.current_time += 50000
+ if self._fake_clock:
+ self._fake_clock.current_time += 0.5
return FakeModel()
def get_inference_runner(self):
- return FakeInferenceRunner(self._mock_clock)
+ return FakeInferenceRunner(self._fake_clock)
-class MockClock(base._Clock):
+class FakeClock:
def __init__(self):
- self.current_time = 10000
+ self.current_time = 10.0
- def get_current_time_in_microseconds(self) -> int:
+ def time(self) -> int:
Review Comment:
but maybe this should change to `time_ns(self): -> int`
Issue Time Tracking
-------------------
Worklog Id: (was: 772634)
Time Spent: 2.5h (was: 2h 20m)
> Drop the clock abastraction and just use time.time for time measurements
> ------------------------------------------------------------------------
>
> Key: BEAM-14255
> URL: https://issues.apache.org/jira/browse/BEAM-14255
> Project: Beam
> Issue Type: Sub-task
> Components: sdk-py-core
> Reporter: Ryan Thompson
> Assignee: Ryan Thompson
> Priority: P2
> Time Spent: 2.5h
> Remaining Estimate: 0h
>
> Right now the TFX-BSL Runinference library uses an abstract clock class to
> get microsecond precision, but time.time should give an adequate precision.
>
> Investigate removing the clock abstraction and just using time.time.
>
> Alternatively, comment why the abstraction is useful.
--
This message was sent by Atlassian Jira
(v8.20.7#820007)