sreevatsanraman commented on a change in pull request #8954:
URL: https://github.com/apache/airflow/pull/8954#discussion_r430650030



##########
File path: airflow/providers/google/cloud/hooks/datafusion.py
##########
@@ -435,20 +435,52 @@ def _get_pipeline(
             "workflows",
             "DataPipelineWorkflow",
             "runs",
+            pipeline_id,
         )
         response = self._cdap_request(url=url, method="GET")
         if response.status != 200:
             raise AirflowException(
-                f"Retrieving a pipeline failed with code {response.status}"
+                f"Retrieving a pipeline state failed with code 
{response.status}"
             )
+        workflow = json.loads(response.data)
+        return workflow["status"]
 
-        pipelines_list = json.loads(response.data)
-        for pipe in pipelines_list:
-            runtime_args = json.loads(pipe["properties"]["runtimeArgs"])
-            if runtime_args[job_id_key] == faux_pipeline_id:
-                return pipe
+    def _get_pipeline_run_id(
+        self,
+        pipeline_name: str,
+        faux_pipeline_id: str,
+        instance_url: str,
+        namespace: str = "default",
+    ) -> str:
+        url = os.path.join(
+            instance_url,
+            "v3",
+            "namespaces",
+            namespace,
+            "apps",
+            pipeline_name,
+            "workflows",
+            "DataPipelineWorkflow",
+            "runs",
+        )
+        # Try 5 times to get the CDAP runid. We do this because the pipeline
+        # may not be present instantly
+        for _ in range(5):
+            response = self._cdap_request(url=url, method="GET")
+            if response.status != 200:

Review comment:
       @jaketf the API that CDAP exposes is the basic building blocks of 
programs. Which are workflows, spark, mapreduce jobs etc. The Data Fusion 
pipelines use workflows for batch jobs and spark streaming jobs for realtime. 
The operators should wait for the batch jobs and not wait for the streaming 
ones.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to