Repository: incubator-beam Updated Branches: refs/heads/python-sdk bb09c07b6 -> 409d067b3
Add support for date partitioned table names These names have the format "tablename$YYYYmmdd". Previously the dollar sign caused this to be deemed invalid. Project: http://git-wip-us.apache.org/repos/asf/incubator-beam/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-beam/commit/a1af871a Tree: http://git-wip-us.apache.org/repos/asf/incubator-beam/tree/a1af871a Diff: http://git-wip-us.apache.org/repos/asf/incubator-beam/diff/a1af871a Branch: refs/heads/python-sdk Commit: a1af871a0c8c92a6d84f2e9950615f7737118d7e Parents: bb09c07 Author: Kevin Graney <nano...@gmail.com> Authored: Tue Dec 6 15:09:42 2016 -0500 Committer: Robert Bradshaw <rober...@gmail.com> Committed: Wed Dec 21 15:16:45 2016 -0800 ---------------------------------------------------------------------- sdks/python/apache_beam/io/bigquery.py | 6 ++++-- sdks/python/apache_beam/io/bigquery_test.py | 8 ++++++++ 2 files changed, 12 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/a1af871a/sdks/python/apache_beam/io/bigquery.py ---------------------------------------------------------------------- diff --git a/sdks/python/apache_beam/io/bigquery.py b/sdks/python/apache_beam/io/bigquery.py index ce75e10..2059de4 100644 --- a/sdks/python/apache_beam/io/bigquery.py +++ b/sdks/python/apache_beam/io/bigquery.py @@ -275,7 +275,9 @@ def _parse_table_reference(table, dataset=None, project=None): then the table argument must contain the entire table reference: 'DATASET.TABLE' or 'PROJECT:DATASET.TABLE'. This argument can be a bigquery.TableReference instance in which case dataset and project are - ignored and the reference is returned as a result. + ignored and the reference is returned as a result. Additionally, for date + partitioned tables, appending '$YYYYmmdd' to the table name is supported, + e.g. 'DATASET.TABLE$YYYYmmdd'. dataset: The ID of the dataset containing this table or null if the table reference is specified entirely by the table argument. project: The ID of the project containing this table or null if the table @@ -300,7 +302,7 @@ def _parse_table_reference(table, dataset=None, project=None): # table name. if dataset is None: match = re.match( - r'^((?P<project>.+):)?(?P<dataset>\w+)\.(?P<table>\w+)$', table) + r'^((?P<project>.+):)?(?P<dataset>\w+)\.(?P<table>[\w\$]+)$', table) if not match: raise ValueError( 'Expected a table reference (PROJECT:DATASET.TABLE or ' http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/a1af871a/sdks/python/apache_beam/io/bigquery_test.py ---------------------------------------------------------------------- diff --git a/sdks/python/apache_beam/io/bigquery_test.py b/sdks/python/apache_beam/io/bigquery_test.py index a2cf947..f6f9363 100644 --- a/sdks/python/apache_beam/io/bigquery_test.py +++ b/sdks/python/apache_beam/io/bigquery_test.py @@ -208,6 +208,14 @@ class TestBigQuerySource(unittest.TestCase): self.assertEqual(source.query, 'my_query') self.assertIsNone(source.table_reference) + def test_date_partitioned_table_name(self): + source = beam.io.BigQuerySource('dataset.table$20030102', validate=True) + dd = DisplayData.create_from(source) + expected_items = [ + DisplayDataItemMatcher('validation', True), + DisplayDataItemMatcher('table', 'dataset.table$20030102')] + hc.assert_that(dd.items, hc.contains_inanyorder(*expected_items)) + class TestBigQuerySink(unittest.TestCase):