[ 
https://issues.apache.org/jira/browse/BEAM-9577?focusedWorklogId=412577&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-412577
 ]

ASF GitHub Bot logged work on BEAM-9577:
----------------------------------------

                Author: ASF GitHub Bot
            Created on: 30/Mar/20 21:00
            Start Date: 30/Mar/20 21:00
    Worklog Time Spent: 10m 
      Work Description: lukecwik commented on pull request #11203: [BEAM-9577] 
Define and implement dependency-aware artifact staging service.
URL: https://github.com/apache/beam/pull/11203#discussion_r400476127
 
 

 ##########
 File path: sdks/python/apache_beam/runners/portability/artifact_service.py
 ##########
 @@ -263,3 +279,205 @@ def _open(self, path, mode='r'):
       return filesystems.FileSystems.create(path)
     else:
       return filesystems.FileSystems.open(path)
+
+
+# The dependency-aware artifact staging and retrieval services.
+
+
+def _queue_iter(queue, end_token):
+  while True:
+    item = queue.get()
+    if item is end_token:
+      break
+    yield item
+
+
+class ArtifactRetrievalService(
+    beam_artifact_api_pb2_grpc.ArtifactRetrievalServiceServicer):
+
+  _DEFAULT_CHUNK_SIZE = 2 << 20
+
+  def __init__(
+      self,
+      file_reader,  # type: Callable[[str], BinaryIO],
+      chunk_size=None,
+  ):
+    self._file_reader = file_reader
+    self._chunk_size = chunk_size or self._DEFAULT_CHUNK_SIZE
+
+  def ResolveArtifact(self, request, context=None):
+    return beam_artifact_api_pb2.ResolveArtifactResponse(
+        replacements=request.artifacts)
+
+  def GetArtifact(self, request, context=None):
+    if request.artifact.type_urn == common_urns.artifact_types.FILE.urn:
+      payload = proto_utils.parse_Bytes(
+          request.artifact.type_payload,
+          beam_runner_api_pb2.ArtifactFilePayload)
+      read_handle = self._file_reader(payload.path)
+    elif request.artifact.type_urn == common_urns.artifact_types.URL.urn:
+      payload = proto_utils.parse_Bytes(
+          request.artifact.type_payload, 
beam_runner_api_pb2.ArtifactUrlPayload)
+      # TODO(Py3): Remove the unneeded contextlib wrapper.
+      read_handle = contextlib.closing(urlopen(payload.path))
+    elif request.artifact.type_urn == common_urns.artifact_types.EMBEDDED.urn:
+      payload = proto_utils.parse_Bytes(
+          request.artifact.type_payload,
+          beam_runner_api_pb2.EmbeddedFilePayload)
+      read_handle = BytesIO(payload.data)
+    else:
+      raise NotImplementedError(request.artifact.type_urn)
+
+    with read_handle as fin:
+      while True:
+        chunk = fin.read(self._chunk_size)
+        if not chunk:
+          break
+        yield beam_artifact_api_pb2.GetArtifactResponse(data=chunk)
+
+
+class ArtifactStagingService(
+    beam_artifact_api_pb2_grpc.ArtifactStagingServiceServicer):
+  def __init__(
+      self,
+      file_writer,  # type: Callable[[str, Optional[str]], Tuple[BinaryIO, 
str]]
+    ):
+    self._lock = threading.Lock()
+    self._jobs_to_stage = {}
+    self._file_writer = file_writer
+
+  def register_job(self, staging_token, dependencies):
+    self._jobs_to_stage[staging_token] = list(dependencies), threading.Event()
+
+  def resolved_deps(self, staging_token, timeout=None):
+    dependencies_list, event = self._jobs_to_stage[staging_token]
+    try:
+      if not event.wait(timeout):
+        raise concurrent.futures.TimeoutError()
+      return dependencies_list
+    finally:
+      del self._jobs_to_stage[staging_token]
+
+  def ReverseArtifactRetrievalService(self, responses, context=None):
+    staging_token = next(responses).staging_token
+    dependencies, event = self._jobs_to_stage[staging_token]
+
+    requests = queue.Queue()
+
+    class FakeRetrievalService(object):
 
 Review comment:
   nit: FakeRetrievalService -> ForwardingRetrievalService
   
   or ProxyingRetrievalService
 
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Issue Time Tracking
-------------------

    Worklog Id:     (was: 412577)
    Time Spent: 4.5h  (was: 4h 20m)

> Update artifact staging and retrieval protocols to be dependency aware.
> -----------------------------------------------------------------------
>
>                 Key: BEAM-9577
>                 URL: https://issues.apache.org/jira/browse/BEAM-9577
>             Project: Beam
>          Issue Type: Improvement
>          Components: beam-model
>            Reporter: Robert Bradshaw
>            Assignee: Robert Bradshaw
>            Priority: Major
>          Time Spent: 4.5h
>  Remaining Estimate: 0h
>




--
This message was sent by Atlassian Jira
(v8.3.4#803005)

Reply via email to