Repository: incubator-airflow Updated Branches: refs/heads/master beadcd327 -> ad308ea44
[AIRFLOW-2139] Remove unncecessary boilerplate to get DataFrame using pandas_gbq Closes #3066 from mremes/improvement/cleanup- bigquery-hook Project: http://git-wip-us.apache.org/repos/asf/incubator-airflow/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-airflow/commit/ad308ea4 Tree: http://git-wip-us.apache.org/repos/asf/incubator-airflow/tree/ad308ea4 Diff: http://git-wip-us.apache.org/repos/asf/incubator-airflow/diff/ad308ea4 Branch: refs/heads/master Commit: ad308ea441372f2b44b4292c3779eb745f2ed48c Parents: beadcd3 Author: Matti Remes <matti.re...@nextgames.com> Authored: Sat Feb 24 20:48:17 2018 +0100 Committer: Fokko Driesprong <fokkodriespr...@godatadriven.com> Committed: Sat Feb 24 20:48:17 2018 +0100 ---------------------------------------------------------------------- airflow/contrib/hooks/bigquery_hook.py | 22 +++++----------------- 1 file changed, 5 insertions(+), 17 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-airflow/blob/ad308ea4/airflow/contrib/hooks/bigquery_hook.py ---------------------------------------------------------------------- diff --git a/airflow/contrib/hooks/bigquery_hook.py b/airflow/contrib/hooks/bigquery_hook.py index cd0318f..d937f1e 100644 --- a/airflow/contrib/hooks/bigquery_hook.py +++ b/airflow/contrib/hooks/bigquery_hook.py @@ -28,10 +28,9 @@ from airflow.hooks.dbapi_hook import DbApiHook from airflow.utils.log.logging_mixin import LoggingMixin from apiclient.discovery import HttpError, build from googleapiclient import errors -from pandas.tools.merge import concat from pandas_gbq.gbq import \ _check_google_client_version as gbq_check_google_client_version -from pandas_gbq.gbq import _parse_data as gbq_parse_data +from pandas_gbq import read_gbq from pandas_gbq.gbq import \ _test_google_api_imports as gbq_test_google_api_imports from pandas_gbq.gbq import GbqConnector @@ -96,24 +95,13 @@ class BigQueryHook(GoogleCloudBaseHook, DbApiHook, LoggingMixin): defaults to use `self.use_legacy_sql` if not specified :type dialect: string in {'legacy', 'standard'} """ - service = self.get_service() - project = self._get_field('project') - if dialect is None: dialect = 'legacy' if self.use_legacy_sql else 'standard' - connector = BigQueryPandasConnector(project, service, dialect=dialect) - schema, pages = connector.run_query(bql) - dataframe_list = [] - - while len(pages) > 0: - page = pages.pop() - dataframe_list.append(gbq_parse_data(schema, page)) - - if len(dataframe_list) > 0: - return concat(dataframe_list, ignore_index=True) - else: - return gbq_parse_data(schema, []) + return read_gbq(bql, + project_id=self._get_field('project'), + dialect=dialect, + verbose=False) def table_exists(self, project_id, dataset_id, table_id): """