yohei1126 commented on a change in pull request #4324: [AIRFLOW-3327] Add support for location in BigQueryHook URL: https://github.com/apache/incubator-airflow/pull/4324#discussion_r244277306
########## File path: airflow/contrib/hooks/bigquery_hook.py ########## @@ -580,11 +587,18 @@ def run_query(self, by one or more columns. This is only available in combination with time_partitioning. The order of columns given determines the sort order. :type cluster_fields: list of str + :param location: The geographic location of the job. Required except for + US and EU. See details at Review comment: This operator has `destination_dataset_table` so I tested the following four patterns. I suppose it works as expected. 1. (OK) source dataset in US, destination_dataset in US, no location specified 2. (OK) source dataset in Tokyo, destination_dataset in Tokyo, location specified as `asia-northeast1` 3. (Fail) source dataset in Tokyo, destination_dataset in US, location specified as `asia-northeast1` 4. (Fail) source dataset in US, destination_dataset in Tokyo, no location specified ``` # -*- coding: utf-8 -*- from airflow import DAG from operators.bigquery_operator import BigQueryOperator from airflow.utils.dates import days_ago ARGS = { 'owner': 'airflow', 'depends_on_past': False, 'retries': 0, 'start_date': days_ago(1) } COMMON_PARAMS = {} with DAG( dag_id='test_bq', default_args=ARGS, params=COMMON_PARAMS, schedule_interval='@once') as dag: # both datasets are in US # this should be OK t1 = BigQueryOperator( task_id='us_to_us', sql='SELECT * FROM test_us_ds.test_table', bigquery_conn_id='google_cloud_default', destination_dataset_table='fr-stg-datalake:test_us_ds.dest_table_us_to_us' ) # both source dataset and dest dataset are in Tokyo # this should be OK t2 = BigQueryOperator( task_id='tky_to_tky', sql='SELECT * FROM test_tokyo_ds.test_table', bigquery_conn_id='google_cloud_default', location='asia-northeast1', destination_dataset_table='fr-stg-datalake:test_tokyo_ds.dest_table_tky_to_tky' ) # source dataset is in Tokyo but dest table is in US # this should fail t3 = BigQueryOperator( task_id='tky_to_us', sql='SELECT * FROM test_tokyo_ds.test_table', bigquery_conn_id='google_cloud_default', location='asia-northeast1', destination_dataset_table='fr-stg-datalake:test_us_ds.dest_table_tky_to_us' ) # source dataset is in US but dest table is in Tokyo # this should fail t4 = BigQueryOperator( task_id='us_to_tky', sql='SELECT * FROM test_us_ds.test_table', bigquery_conn_id='google_cloud_default', destination_dataset_table='fr-stg-datalake:test_tokyo_ds.dest_table_us_to_tky' ) t1 >> t2 t1 >> t3 t1 >> t4 globals()[dag] = dag ``` ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services