[GitHub] kaxil commented on a change in pull request #4354: [AIRFLOW-3446] Add Google Cloud BigTable operators

2019-01-03 Thread GitBox
kaxil commented on a change in pull request #4354: [AIRFLOW-3446] Add Google 
Cloud BigTable operators
URL: https://github.com/apache/incubator-airflow/pull/4354#discussion_r245115365
 
 

 ##
 File path: airflow/contrib/operators/gcp_bigtable_operator.py
 ##
 @@ -0,0 +1,424 @@
+# -*- coding: utf-8 -*-
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import google.api_core.exceptions
+
+from airflow import AirflowException
+from airflow.models import BaseOperator
+from airflow.sensors.base_sensor_operator import BaseSensorOperator
+from airflow.contrib.hooks.gcp_bigtable_hook import BigtableHook
+from airflow.utils.decorators import apply_defaults
+from google.cloud.bigtable_admin_v2 import enums
+from google.cloud.bigtable.table import ClusterState
+
+
+class BigtableValidationMixin(object):
+"""
+Common class for Cloud Bigtable operators for validating required fields.
+"""
+
+REQUIRED_ATTRIBUTES = []
+
+def _validate_inputs(self):
+for attr_name in self.REQUIRED_ATTRIBUTES:
+if not getattr(self, attr_name):
+raise AirflowException('Empty parameter: {}'.format(attr_name))
+
+
+class BigtableInstanceCreateOperator(BaseOperator, BigtableValidationMixin):
+"""
+Creates a new Cloud Bigtable instance.
+If the Cloud Bigtable instance with the given ID exists, the operator does 
not compare its configuration
+and immediately succeeds. No changes are made to the existing instance.
+
+For more details about instance creation have a look at the reference:
+
https://googleapis.github.io/google-cloud-python/latest/bigtable/instance.html#google.cloud.bigtable.instance.Instance.create
+
+:type project_id: str
+:param project_id: The ID of the GCP project.
+:type instance_id: str
+:param instance_id: The ID of the Cloud Bigtable instance to create.
+:type main_cluster_id: str
+:param main_cluster_id: The ID for main cluster for the new instance.
+:type main_cluster_zone: str
+:param main_cluster_zone: The zone for main cluster
+See https://cloud.google.com/bigtable/docs/locations for more details.
+:type replica_cluster_id: str
+:param replica_cluster_id: (optional) The ID for replica cluster for the 
new instance.
+:type replica_cluster_zone: str
+:param replica_cluster_zone: (optional)  The zone for replica cluster.
+:type instance_type: IntEnum
+:param instance_type: (optional) The type of the instance.
+:type instance_display_name: str
+:param instance_display_name: (optional) Human-readable name of the 
instance. Defaults to ``instance_id``.
+:type instance_labels: dict
+:param instance_labels: (optional) Dictionary of labels to associate with 
the instance.
+:type cluster_nodes: int
+:param cluster_nodes: (optional) Number of nodes for cluster.
+:type cluster_storage_type: IntEnum
+:param cluster_storage_type: (optional) The type of storage.
+:type timeout: int
+:param timeout: (optional) timeout (in seconds) for instance creation.
+If None is not specified, Operator will wait indefinitely.
+"""
+
+REQUIRED_ATTRIBUTES = ('project_id', 'instance_id', 'main_cluster_id', 
'main_cluster_zone')
+template_fields = ['project_id', 'instance_id', 'main_cluster_id', 
'main_cluster_zone']
+
+@apply_defaults
+def __init__(self,
+ project_id,
+ instance_id,
+ main_cluster_id,
+ main_cluster_zone,
+ replica_cluster_id=None,
+ replica_cluster_zone=None,
+ instance_display_name=None,
+ instance_type=None,
+ instance_labels=None,
+ cluster_nodes=None,
+ cluster_storage_type=None,
+ timeout=None,
+ *args, **kwargs):
+self.project_id = project_id
+self.instance_id = instance_id
+self.main_cluster_id = main_cluster_id
+self.main_cluster_zone = main_cluster_zone
+self.replica_cluster_id = replica_cluster_id
+self.replica_cluster_zone = replica_cluster_zone
+self.instance_display_name = instance_d

[GitHub] kaxil commented on a change in pull request #4354: [AIRFLOW-3446] Add Google Cloud BigTable operators

2019-01-03 Thread GitBox
kaxil commented on a change in pull request #4354: [AIRFLOW-3446] Add Google 
Cloud BigTable operators
URL: https://github.com/apache/incubator-airflow/pull/4354#discussion_r245114816
 
 

 ##
 File path: airflow/contrib/hooks/gcp_bigtable_hook.py
 ##
 @@ -0,0 +1,232 @@
+# -*- coding: utf-8 -*-
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from google.cloud.bigtable import Client
+from google.cloud.bigtable.cluster import Cluster
+from google.cloud.bigtable.instance import Instance
+from google.cloud.bigtable.table import Table
+from google.cloud.bigtable_admin_v2 import enums
+from airflow.contrib.hooks.gcp_api_base_hook import GoogleCloudBaseHook
+
+
+# noinspection PyAbstractClass
 
 Review comment:
   I agree, it is annoying to me as well. Can you please create a separate JIRA 
for it, we definitely need to solve this.


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] kaxil commented on a change in pull request #4354: [AIRFLOW-3446] Add Google Cloud BigTable operators

2019-01-03 Thread GitBox
kaxil commented on a change in pull request #4354: [AIRFLOW-3446] Add Google 
Cloud BigTable operators
URL: https://github.com/apache/incubator-airflow/pull/4354#discussion_r245114534
 
 

 ##
 File path: airflow/contrib/example_dags/example_gcp_bigtable_operators.py
 ##
 @@ -0,0 +1,149 @@
+# -*- coding: utf-8 -*-
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# 'License'); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# 'AS IS' BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""
+Example Airflow DAG that creates and performs following operations on Cloud 
Bigtable:
+- creates an Instance
+- creates a Table
+- updates Cluster
+- waits for Table replication completeness
+- deletes the Table
+- deletes the Instance
+
+This DAG relies on the following environment variables
+* CBT_PROJECT_ID - Google Cloud Platform project
 
 Review comment:
   Probably fix it here as it is an easy change.


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] kaxil commented on a change in pull request #4354: [AIRFLOW-3446] Add Google Cloud BigTable operators

2019-01-02 Thread GitBox
kaxil commented on a change in pull request #4354: [AIRFLOW-3446] Add Google 
Cloud BigTable operators
URL: https://github.com/apache/incubator-airflow/pull/4354#discussion_r244886559
 
 

 ##
 File path: docs/howto/operator.rst
 ##
 @@ -361,6 +361,135 @@ More information
 See `Google Compute Engine API documentation
 
`_.
 
+Google Cloud Bigtable Operators
+
+
+Arguments
+"
+
+All examples below rely on the following variables, which can be passed via 
environment variables.
+
+.. literalinclude:: 
../../airflow/contrib/example_dags/example_gcp_bigtable_operators.py
+:language: python
+:start-after: [START howto_operator_gcp_bigtable_args]
+:end-before: [END howto_operator_gcp_bigtable_args]
+
+
+BigtableInstanceCreateOperator
+^
+
+Use the 
:class:`~airflow.contrib.operators.gcp_bigtable_operator.BigtableInstanceCreateOperator`
+to create a Google Cloud Bigtable instance.
+
+If the Cloud Bigtable instance with the given ID exists, the operator does not 
compare its configuration
+and immediately succeeds. No changes are made to the existing instance.
+
+Using the operator
+""
+
+.. literalinclude:: 
../../airflow/contrib/example_dags/example_gcp_bigtable_operators.py
+:language: python
+:dedent: 4
+:start-after: [START howto_operator_gcp_bigtable_instance_create]
+:end-before: [END howto_operator_gcp_bigtable_instance_create]
+
+
+BigtableInstanceDeleteOperator
+^
+
+Use the 
:class:`~airflow.contrib.operators.gcp_bigtable_operator.BigtableInstanceDeleteOperator`
+to delete a Google Cloud Bigtable instance.
+
+Using the operator
+""
+
+.. literalinclude:: 
../../airflow/contrib/example_dags/example_gcp_bigtable_operators.py
+:language: python
+:dedent: 4
+:start-after: [START howto_operator_gcp_bigtable_instance_delete]
+:end-before: [END howto_operator_gcp_bigtable_instance_delete]
+
+BigtableClusterUpdateOperator
+^
+
+Use the 
:class:`~airflow.contrib.operators.gcp_bigtable_operator.BigtableClusterUpdateOperator`
+to modify number of nodes in a Cloud Bigtable cluster.
+
+Using the operator
+""
+
+.. literalinclude:: 
../../airflow/contrib/example_dags/example_gcp_bigtable_operators.py
+:language: python
+:dedent: 4
+:start-after: [START howto_operator_gcp_bigtable_cluster_update]
+:end-before: [END howto_operator_gcp_bigtable_cluster_update]
+
+
+BigtableTableCreateOperator
+^
+
+Creates a table in a Cloud Bigtable instance.
+
+If the table with given ID exists in the Cloud Bigtable instance, the operator 
compares the Column Families.
+If the Column Families are identical operator succeeds. Otherwise, the 
operator fails with the appropriate
+error message.
+
+
+Using the operator
+""
+
+.. literalinclude:: 
../../airflow/contrib/example_dags/example_gcp_bigtable_operators.py
+:language: python
+:dedent: 4
+:start-after: [START howto_operator_gcp_bigtable_table_create]
+:end-before: [END howto_operator_gcp_bigtable_table_create]
+
+Advanced
+
+
+When creating a table, you can specify the optional ``initial_split_keys`` and 
``column_familes`.
 
 Review comment:
   Missing double back-tick
   
   ```suggestion
   When creating a table, you can specify the optional ``initial_split_keys`` 
and ``column_familes``.
   ```


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] kaxil commented on a change in pull request #4354: [AIRFLOW-3446] Add Google Cloud BigTable operators

2019-01-02 Thread GitBox
kaxil commented on a change in pull request #4354: [AIRFLOW-3446] Add Google 
Cloud BigTable operators
URL: https://github.com/apache/incubator-airflow/pull/4354#discussion_r244886645
 
 

 ##
 File path: docs/integration.rst
 ##
 @@ -814,6 +815,69 @@ Cloud SQL Hooks
 :members:
 
 
+Cloud Bigtable
+''
+
+Cloud Bigtable Operators
+""
 
 Review comment:
   Add needed `"`


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] kaxil commented on a change in pull request #4354: [AIRFLOW-3446] Add Google Cloud BigTable operators

2019-01-02 Thread GitBox
kaxil commented on a change in pull request #4354: [AIRFLOW-3446] Add Google 
Cloud BigTable operators
URL: https://github.com/apache/incubator-airflow/pull/4354#discussion_r244885613
 
 

 ##
 File path: airflow/contrib/hooks/gcp_bigtable_hook.py
 ##
 @@ -0,0 +1,232 @@
+# -*- coding: utf-8 -*-
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from google.cloud.bigtable import Client
+from google.cloud.bigtable.cluster import Cluster
+from google.cloud.bigtable.instance import Instance
+from google.cloud.bigtable.table import Table
+from google.cloud.bigtable_admin_v2 import enums
+from airflow.contrib.hooks.gcp_api_base_hook import GoogleCloudBaseHook
+
+
+# noinspection PyAbstractClass
 
 Review comment:
   Can we remove this


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] kaxil commented on a change in pull request #4354: [AIRFLOW-3446] Add Google Cloud BigTable operators

2019-01-02 Thread GitBox
kaxil commented on a change in pull request #4354: [AIRFLOW-3446] Add Google 
Cloud BigTable operators
URL: https://github.com/apache/incubator-airflow/pull/4354#discussion_r244886061
 
 

 ##
 File path: airflow/contrib/hooks/gcp_bigtable_hook.py
 ##
 @@ -0,0 +1,232 @@
+# -*- coding: utf-8 -*-
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from google.cloud.bigtable import Client
+from google.cloud.bigtable.cluster import Cluster
+from google.cloud.bigtable.instance import Instance
+from google.cloud.bigtable.table import Table
+from google.cloud.bigtable_admin_v2 import enums
+from airflow.contrib.hooks.gcp_api_base_hook import GoogleCloudBaseHook
+
+
+# noinspection PyAbstractClass
+class BigtableHook(GoogleCloudBaseHook):
+"""
+Hook for Google Cloud Bigtable APIs.
+"""
+
+_client = None
+
+def __init__(self,
+ gcp_conn_id='google_cloud_default',
+ delegate_to=None):
+super(BigtableHook, self).__init__(gcp_conn_id, delegate_to)
+
+def get_client(self, project_id):
+if not self._client:
+self._client = Client(project=project_id, 
credentials=self._get_credentials(), admin=True)
+return self._client
+
+def get_instance(self, project_id, instance_id):
+"""
+Retrieves and returns the specified Cloud Bigtable instance if it 
exists.
+Otherwise, returns None.
+
+:param project_id: The ID of the GCP project.
+:type project_id: str
+:param instance_id: The ID of the Cloud Bigtable instance.
+:type instance_id: str
+"""
+
+client = self.get_client(project_id)
+
+instance = Instance(instance_id, client)
+if not instance.exists():
+return None
+return instance
+
+def delete_instance(self, project_id, instance_id):
+"""
+Deletes the specified Cloud Bigtable instance.
+Raises google.api_core.exceptions.NotFound if the Cloud Bigtable 
instance does not exist.
+
+:param project_id: The ID of the GCP project.
+:type project_id: str
+:param instance_id: The ID of the Cloud Bigtable instance.
+:type instance_id: str
+"""
+instance = Instance(instance_id, self.get_client(project_id))
+instance.delete()
+
+def create_instance(self,
+project_id,
+instance_id,
+main_cluster_id,
+main_cluster_zone,
+replica_cluster_id=None,
+replica_cluster_zone=None,
+instance_display_name=None,
+instance_type=enums.Instance.Type.TYPE_UNSPECIFIED,
+instance_labels=None,
+cluster_nodes=None,
+
cluster_storage_type=enums.StorageType.STORAGE_TYPE_UNSPECIFIED,
+timeout=None):
+"""
+Creates new instance.
+
+:type project_id: str
+:param project_id: The ID of the GCP project.
+:type instance_id: str
+:param instance_id: The ID for the new instance.
+:type main_cluster_id: str
+:param main_cluster_id: The ID for main cluster for the new instance.
+:type main_cluster_zone: str
+:param main_cluster_zone: The zone for main cluster.
+See https://cloud.google.com/bigtable/docs/locations for more 
details.
+:type replica_cluster_id: str
+:param replica_cluster_id: (optional) The ID for replica cluster for 
the new instance.
+:type replica_cluster_zone: str
+:param replica_cluster_zone: (optional)  The zone for replica cluster.
+:type instance_type: enums.Instance.Type
+:param instance_type: (optional) The type of the instance.
+:type instance_display_name: str
+:param instance_display_name: (optional) Human-readable name of the 
instance.
+Defaults to ``instance_id``.
+:type instance_labels: dict
+:param instance_labels: (optional) Dictionary of labels to associate 
with the instance.
+:type cluster_nodes: int
+:param cluste

[GitHub] kaxil commented on a change in pull request #4354: [AIRFLOW-3446] Add Google Cloud BigTable operators

2019-01-02 Thread GitBox
kaxil commented on a change in pull request #4354: [AIRFLOW-3446] Add Google 
Cloud BigTable operators
URL: https://github.com/apache/incubator-airflow/pull/4354#discussion_r244885772
 
 

 ##
 File path: airflow/contrib/hooks/gcp_bigtable_hook.py
 ##
 @@ -0,0 +1,232 @@
+# -*- coding: utf-8 -*-
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from google.cloud.bigtable import Client
+from google.cloud.bigtable.cluster import Cluster
+from google.cloud.bigtable.instance import Instance
+from google.cloud.bigtable.table import Table
+from google.cloud.bigtable_admin_v2 import enums
+from airflow.contrib.hooks.gcp_api_base_hook import GoogleCloudBaseHook
+
+
+# noinspection PyAbstractClass
+class BigtableHook(GoogleCloudBaseHook):
+"""
+Hook for Google Cloud Bigtable APIs.
+"""
+
+_client = None
+
+def __init__(self,
+ gcp_conn_id='google_cloud_default',
+ delegate_to=None):
+super(BigtableHook, self).__init__(gcp_conn_id, delegate_to)
+
+def get_client(self, project_id):
+if not self._client:
+self._client = Client(project=project_id, 
credentials=self._get_credentials(), admin=True)
+return self._client
+
+def get_instance(self, project_id, instance_id):
 
 Review comment:
   +1 for optional project_id


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] kaxil commented on a change in pull request #4354: [AIRFLOW-3446] Add Google Cloud BigTable operators

2019-01-02 Thread GitBox
kaxil commented on a change in pull request #4354: [AIRFLOW-3446] Add Google 
Cloud BigTable operators
URL: https://github.com/apache/incubator-airflow/pull/4354#discussion_r244886618
 
 

 ##
 File path: docs/howto/operator.rst
 ##
 @@ -361,6 +361,135 @@ More information
 See `Google Compute Engine API documentation
 
`_.
 
+Google Cloud Bigtable Operators
+
+
+Arguments
+"
+
+All examples below rely on the following variables, which can be passed via 
environment variables.
+
+.. literalinclude:: 
../../airflow/contrib/example_dags/example_gcp_bigtable_operators.py
+:language: python
+:start-after: [START howto_operator_gcp_bigtable_args]
+:end-before: [END howto_operator_gcp_bigtable_args]
+
+
+BigtableInstanceCreateOperator
+^
+
+Use the 
:class:`~airflow.contrib.operators.gcp_bigtable_operator.BigtableInstanceCreateOperator`
+to create a Google Cloud Bigtable instance.
+
+If the Cloud Bigtable instance with the given ID exists, the operator does not 
compare its configuration
+and immediately succeeds. No changes are made to the existing instance.
+
+Using the operator
+""
+
+.. literalinclude:: 
../../airflow/contrib/example_dags/example_gcp_bigtable_operators.py
+:language: python
+:dedent: 4
+:start-after: [START howto_operator_gcp_bigtable_instance_create]
+:end-before: [END howto_operator_gcp_bigtable_instance_create]
+
+
+BigtableInstanceDeleteOperator
+^
+
+Use the 
:class:`~airflow.contrib.operators.gcp_bigtable_operator.BigtableInstanceDeleteOperator`
+to delete a Google Cloud Bigtable instance.
+
+Using the operator
+""
+
+.. literalinclude:: 
../../airflow/contrib/example_dags/example_gcp_bigtable_operators.py
+:language: python
+:dedent: 4
+:start-after: [START howto_operator_gcp_bigtable_instance_delete]
+:end-before: [END howto_operator_gcp_bigtable_instance_delete]
+
+BigtableClusterUpdateOperator
+^
+
+Use the 
:class:`~airflow.contrib.operators.gcp_bigtable_operator.BigtableClusterUpdateOperator`
+to modify number of nodes in a Cloud Bigtable cluster.
+
+Using the operator
+""
+
+.. literalinclude:: 
../../airflow/contrib/example_dags/example_gcp_bigtable_operators.py
+:language: python
+:dedent: 4
+:start-after: [START howto_operator_gcp_bigtable_cluster_update]
+:end-before: [END howto_operator_gcp_bigtable_cluster_update]
+
+
+BigtableTableCreateOperator
+^
+
+Creates a table in a Cloud Bigtable instance.
+
+If the table with given ID exists in the Cloud Bigtable instance, the operator 
compares the Column Families.
+If the Column Families are identical operator succeeds. Otherwise, the 
operator fails with the appropriate
+error message.
+
+
+Using the operator
+""
+
+.. literalinclude:: 
../../airflow/contrib/example_dags/example_gcp_bigtable_operators.py
+:language: python
+:dedent: 4
+:start-after: [START howto_operator_gcp_bigtable_table_create]
+:end-before: [END howto_operator_gcp_bigtable_table_create]
+
+Advanced
+
+
+When creating a table, you can specify the optional ``initial_split_keys`` and 
``column_familes`.
+Please refer to the Python Client for Google Cloud Bigtable documentation
+`for Table 
`_ 
and `for Column
+Families 
`_.
+
+
+BigtableTableDeleteOperator
+^
+
+Use the 
:class:`~airflow.contrib.operators.gcp_bigtable_operator.BigtableTableDeleteOperator`
+to delete a table in Google Cloud Bigtable.
+
+Using the operator
+""
+
+.. literalinclude:: 
../../airflow/contrib/example_dags/example_gcp_bigtable_operators.py
+:language: python
+:dedent: 4
+:start-after: [START howto_operator_gcp_bigtable_table_delete]
+:end-before: [END howto_operator_gcp_bigtable_table_delete]
+
+BigtableTableWaitForReplicationSensor
+^
 
 Review comment:
   Add extra needed `^`


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] kaxil commented on a change in pull request #4354: [AIRFLOW-3446] Add Google Cloud BigTable operators

2019-01-02 Thread GitBox
kaxil commented on a change in pull request #4354: [AIRFLOW-3446] Add Google 
Cloud BigTable operators
URL: https://github.com/apache/incubator-airflow/pull/4354#discussion_r244886264
 
 

 ##
 File path: airflow/contrib/operators/gcp_bigtable_operator.py
 ##
 @@ -0,0 +1,424 @@
+# -*- coding: utf-8 -*-
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import google.api_core.exceptions
+
+from airflow import AirflowException
+from airflow.models import BaseOperator
+from airflow.sensors.base_sensor_operator import BaseSensorOperator
+from airflow.contrib.hooks.gcp_bigtable_hook import BigtableHook
+from airflow.utils.decorators import apply_defaults
+from google.cloud.bigtable_admin_v2 import enums
+from google.cloud.bigtable.table import ClusterState
+
+
+class BigtableValidationMixin(object):
+"""
+Common class for Cloud Bigtable operators for validating required fields.
+"""
+
+REQUIRED_ATTRIBUTES = []
+
+def _validate_inputs(self):
+for attr_name in self.REQUIRED_ATTRIBUTES:
+if not getattr(self, attr_name):
+raise AirflowException('Empty parameter: {}'.format(attr_name))
+
+
+class BigtableInstanceCreateOperator(BaseOperator, BigtableValidationMixin):
+"""
+Creates a new Cloud Bigtable instance.
+If the Cloud Bigtable instance with the given ID exists, the operator does 
not compare its configuration
+and immediately succeeds. No changes are made to the existing instance.
+
+For more details about instance creation have a look at the reference:
+
https://googleapis.github.io/google-cloud-python/latest/bigtable/instance.html#google.cloud.bigtable.instance.Instance.create
+
+:type project_id: str
+:param project_id: The ID of the GCP project.
+:type instance_id: str
+:param instance_id: The ID of the Cloud Bigtable instance to create.
+:type main_cluster_id: str
+:param main_cluster_id: The ID for main cluster for the new instance.
+:type main_cluster_zone: str
+:param main_cluster_zone: The zone for main cluster
+See https://cloud.google.com/bigtable/docs/locations for more details.
+:type replica_cluster_id: str
+:param replica_cluster_id: (optional) The ID for replica cluster for the 
new instance.
+:type replica_cluster_zone: str
+:param replica_cluster_zone: (optional)  The zone for replica cluster.
+:type instance_type: IntEnum
+:param instance_type: (optional) The type of the instance.
+:type instance_display_name: str
+:param instance_display_name: (optional) Human-readable name of the 
instance. Defaults to ``instance_id``.
+:type instance_labels: dict
+:param instance_labels: (optional) Dictionary of labels to associate with 
the instance.
+:type cluster_nodes: int
+:param cluster_nodes: (optional) Number of nodes for cluster.
+:type cluster_storage_type: IntEnum
+:param cluster_storage_type: (optional) The type of storage.
+:type timeout: int
+:param timeout: (optional) timeout (in seconds) for instance creation.
+If None is not specified, Operator will wait indefinitely.
+"""
+
+REQUIRED_ATTRIBUTES = ('project_id', 'instance_id', 'main_cluster_id', 
'main_cluster_zone')
+template_fields = ['project_id', 'instance_id', 'main_cluster_id', 
'main_cluster_zone']
+
+@apply_defaults
+def __init__(self,
+ project_id,
+ instance_id,
+ main_cluster_id,
+ main_cluster_zone,
+ replica_cluster_id=None,
+ replica_cluster_zone=None,
+ instance_display_name=None,
+ instance_type=None,
+ instance_labels=None,
+ cluster_nodes=None,
+ cluster_storage_type=None,
+ timeout=None,
+ *args, **kwargs):
+self.project_id = project_id
+self.instance_id = instance_id
+self.main_cluster_id = main_cluster_id
+self.main_cluster_zone = main_cluster_zone
+self.replica_cluster_id = replica_cluster_id
+self.replica_cluster_zone = replica_cluster_zone
+self.instance_display_name = instance_d

[GitHub] kaxil commented on a change in pull request #4354: [AIRFLOW-3446] Add Google Cloud BigTable operators

2019-01-02 Thread GitBox
kaxil commented on a change in pull request #4354: [AIRFLOW-3446] Add Google 
Cloud BigTable operators
URL: https://github.com/apache/incubator-airflow/pull/4354#discussion_r244886402
 
 

 ##
 File path: docs/howto/operator.rst
 ##
 @@ -361,6 +361,135 @@ More information
 See `Google Compute Engine API documentation
 
`_.
 
+Google Cloud Bigtable Operators
+
 
 Review comment:
   Remove extra dash


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] kaxil commented on a change in pull request #4354: [AIRFLOW-3446] Add Google Cloud BigTable operators

2019-01-02 Thread GitBox
kaxil commented on a change in pull request #4354: [AIRFLOW-3446] Add Google 
Cloud BigTable operators
URL: https://github.com/apache/incubator-airflow/pull/4354#discussion_r244886512
 
 

 ##
 File path: docs/howto/operator.rst
 ##
 @@ -361,6 +361,135 @@ More information
 See `Google Compute Engine API documentation
 
`_.
 
+Google Cloud Bigtable Operators
+
+
+Arguments
+"
+
+All examples below rely on the following variables, which can be passed via 
environment variables.
+
+.. literalinclude:: 
../../airflow/contrib/example_dags/example_gcp_bigtable_operators.py
+:language: python
+:start-after: [START howto_operator_gcp_bigtable_args]
+:end-before: [END howto_operator_gcp_bigtable_args]
+
+
+BigtableInstanceCreateOperator
+^
+
+Use the 
:class:`~airflow.contrib.operators.gcp_bigtable_operator.BigtableInstanceCreateOperator`
+to create a Google Cloud Bigtable instance.
+
+If the Cloud Bigtable instance with the given ID exists, the operator does not 
compare its configuration
+and immediately succeeds. No changes are made to the existing instance.
+
+Using the operator
+""
+
+.. literalinclude:: 
../../airflow/contrib/example_dags/example_gcp_bigtable_operators.py
+:language: python
+:dedent: 4
+:start-after: [START howto_operator_gcp_bigtable_instance_create]
+:end-before: [END howto_operator_gcp_bigtable_instance_create]
+
+
+BigtableInstanceDeleteOperator
+^
+
+Use the 
:class:`~airflow.contrib.operators.gcp_bigtable_operator.BigtableInstanceDeleteOperator`
+to delete a Google Cloud Bigtable instance.
+
+Using the operator
+""
+
+.. literalinclude:: 
../../airflow/contrib/example_dags/example_gcp_bigtable_operators.py
+:language: python
+:dedent: 4
+:start-after: [START howto_operator_gcp_bigtable_instance_delete]
+:end-before: [END howto_operator_gcp_bigtable_instance_delete]
+
+BigtableClusterUpdateOperator
+^
+
+Use the 
:class:`~airflow.contrib.operators.gcp_bigtable_operator.BigtableClusterUpdateOperator`
+to modify number of nodes in a Cloud Bigtable cluster.
+
+Using the operator
+""
+
+.. literalinclude:: 
../../airflow/contrib/example_dags/example_gcp_bigtable_operators.py
+:language: python
+:dedent: 4
+:start-after: [START howto_operator_gcp_bigtable_cluster_update]
+:end-before: [END howto_operator_gcp_bigtable_cluster_update]
+
+
+BigtableTableCreateOperator
+^
 
 Review comment:
   remove extra `^`
   
   


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] kaxil commented on a change in pull request #4354: [AIRFLOW-3446] Add Google Cloud BigTable operators

2019-01-02 Thread GitBox
kaxil commented on a change in pull request #4354: [AIRFLOW-3446] Add Google 
Cloud BigTable operators
URL: https://github.com/apache/incubator-airflow/pull/4354#discussion_r244885194
 
 

 ##
 File path: airflow/contrib/example_dags/example_gcp_bigtable_operators.py
 ##
 @@ -0,0 +1,149 @@
+# -*- coding: utf-8 -*-
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# 'License'); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# 'AS IS' BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""
+Example Airflow DAG that creates and performs following operations on Cloud 
Bigtable:
+- creates an Instance
+- creates a Table
+- updates Cluster
+- waits for Table replication completeness
+- deletes the Table
+- deletes the Instance
+
+This DAG relies on the following environment variables
+* CBT_PROJECT_ID - Google Cloud Platform project
 
 Review comment:
   prefix it with `GCP_`


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] kaxil commented on a change in pull request #4354: [AIRFLOW-3446] Add Google Cloud BigTable operators

2019-01-02 Thread GitBox
kaxil commented on a change in pull request #4354: [AIRFLOW-3446] Add Google 
Cloud BigTable operators
URL: https://github.com/apache/incubator-airflow/pull/4354#discussion_r244886372
 
 

 ##
 File path: airflow/contrib/operators/gcp_bigtable_operator.py
 ##
 @@ -0,0 +1,424 @@
+# -*- coding: utf-8 -*-
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import google.api_core.exceptions
+
+from airflow import AirflowException
+from airflow.models import BaseOperator
+from airflow.sensors.base_sensor_operator import BaseSensorOperator
+from airflow.contrib.hooks.gcp_bigtable_hook import BigtableHook
+from airflow.utils.decorators import apply_defaults
+from google.cloud.bigtable_admin_v2 import enums
+from google.cloud.bigtable.table import ClusterState
+
+
+class BigtableValidationMixin(object):
+"""
+Common class for Cloud Bigtable operators for validating required fields.
+"""
+
+REQUIRED_ATTRIBUTES = []
+
+def _validate_inputs(self):
+for attr_name in self.REQUIRED_ATTRIBUTES:
+if not getattr(self, attr_name):
+raise AirflowException('Empty parameter: {}'.format(attr_name))
+
+
+class BigtableInstanceCreateOperator(BaseOperator, BigtableValidationMixin):
+"""
+Creates a new Cloud Bigtable instance.
+If the Cloud Bigtable instance with the given ID exists, the operator does 
not compare its configuration
+and immediately succeeds. No changes are made to the existing instance.
+
+For more details about instance creation have a look at the reference:
+
https://googleapis.github.io/google-cloud-python/latest/bigtable/instance.html#google.cloud.bigtable.instance.Instance.create
+
+:type project_id: str
+:param project_id: The ID of the GCP project.
+:type instance_id: str
+:param instance_id: The ID of the Cloud Bigtable instance to create.
+:type main_cluster_id: str
+:param main_cluster_id: The ID for main cluster for the new instance.
+:type main_cluster_zone: str
+:param main_cluster_zone: The zone for main cluster
+See https://cloud.google.com/bigtable/docs/locations for more details.
+:type replica_cluster_id: str
+:param replica_cluster_id: (optional) The ID for replica cluster for the 
new instance.
+:type replica_cluster_zone: str
+:param replica_cluster_zone: (optional)  The zone for replica cluster.
+:type instance_type: IntEnum
+:param instance_type: (optional) The type of the instance.
+:type instance_display_name: str
+:param instance_display_name: (optional) Human-readable name of the 
instance. Defaults to ``instance_id``.
+:type instance_labels: dict
+:param instance_labels: (optional) Dictionary of labels to associate with 
the instance.
+:type cluster_nodes: int
+:param cluster_nodes: (optional) Number of nodes for cluster.
+:type cluster_storage_type: IntEnum
+:param cluster_storage_type: (optional) The type of storage.
+:type timeout: int
+:param timeout: (optional) timeout (in seconds) for instance creation.
+If None is not specified, Operator will wait indefinitely.
+"""
+
+REQUIRED_ATTRIBUTES = ('project_id', 'instance_id', 'main_cluster_id', 
'main_cluster_zone')
+template_fields = ['project_id', 'instance_id', 'main_cluster_id', 
'main_cluster_zone']
+
+@apply_defaults
+def __init__(self,
+ project_id,
+ instance_id,
+ main_cluster_id,
+ main_cluster_zone,
+ replica_cluster_id=None,
+ replica_cluster_zone=None,
+ instance_display_name=None,
+ instance_type=None,
+ instance_labels=None,
+ cluster_nodes=None,
+ cluster_storage_type=None,
+ timeout=None,
+ *args, **kwargs):
+self.project_id = project_id
+self.instance_id = instance_id
+self.main_cluster_id = main_cluster_id
+self.main_cluster_zone = main_cluster_zone
+self.replica_cluster_id = replica_cluster_id
+self.replica_cluster_zone = replica_cluster_zone
+self.instance_display_name = instance_d

[GitHub] kaxil commented on a change in pull request #4354: [AIRFLOW-3446] Add Google Cloud BigTable operators

2019-01-02 Thread GitBox
kaxil commented on a change in pull request #4354: [AIRFLOW-3446] Add Google 
Cloud BigTable operators
URL: https://github.com/apache/incubator-airflow/pull/4354#discussion_r244886037
 
 

 ##
 File path: airflow/contrib/hooks/gcp_bigtable_hook.py
 ##
 @@ -0,0 +1,232 @@
+# -*- coding: utf-8 -*-
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from google.cloud.bigtable import Client
+from google.cloud.bigtable.cluster import Cluster
+from google.cloud.bigtable.instance import Instance
+from google.cloud.bigtable.table import Table
+from google.cloud.bigtable_admin_v2 import enums
+from airflow.contrib.hooks.gcp_api_base_hook import GoogleCloudBaseHook
+
+
+# noinspection PyAbstractClass
+class BigtableHook(GoogleCloudBaseHook):
+"""
+Hook for Google Cloud Bigtable APIs.
+"""
+
+_client = None
+
+def __init__(self,
+ gcp_conn_id='google_cloud_default',
+ delegate_to=None):
+super(BigtableHook, self).__init__(gcp_conn_id, delegate_to)
+
+def get_client(self, project_id):
+if not self._client:
+self._client = Client(project=project_id, 
credentials=self._get_credentials(), admin=True)
+return self._client
+
+def get_instance(self, project_id, instance_id):
+"""
+Retrieves and returns the specified Cloud Bigtable instance if it 
exists.
+Otherwise, returns None.
+
+:param project_id: The ID of the GCP project.
+:type project_id: str
+:param instance_id: The ID of the Cloud Bigtable instance.
+:type instance_id: str
+"""
+
+client = self.get_client(project_id)
+
+instance = Instance(instance_id, client)
+if not instance.exists():
+return None
+return instance
+
+def delete_instance(self, project_id, instance_id):
+"""
+Deletes the specified Cloud Bigtable instance.
+Raises google.api_core.exceptions.NotFound if the Cloud Bigtable 
instance does not exist.
+
+:param project_id: The ID of the GCP project.
+:type project_id: str
+:param instance_id: The ID of the Cloud Bigtable instance.
+:type instance_id: str
+"""
+instance = Instance(instance_id, self.get_client(project_id))
+instance.delete()
+
+def create_instance(self,
+project_id,
+instance_id,
+main_cluster_id,
+main_cluster_zone,
+replica_cluster_id=None,
+replica_cluster_zone=None,
+instance_display_name=None,
+instance_type=enums.Instance.Type.TYPE_UNSPECIFIED,
+instance_labels=None,
+cluster_nodes=None,
+
cluster_storage_type=enums.StorageType.STORAGE_TYPE_UNSPECIFIED,
+timeout=None):
+"""
+Creates new instance.
+
+:type project_id: str
+:param project_id: The ID of the GCP project.
+:type instance_id: str
+:param instance_id: The ID for the new instance.
+:type main_cluster_id: str
+:param main_cluster_id: The ID for main cluster for the new instance.
+:type main_cluster_zone: str
+:param main_cluster_zone: The zone for main cluster.
+See https://cloud.google.com/bigtable/docs/locations for more 
details.
+:type replica_cluster_id: str
+:param replica_cluster_id: (optional) The ID for replica cluster for 
the new instance.
+:type replica_cluster_zone: str
+:param replica_cluster_zone: (optional)  The zone for replica cluster.
+:type instance_type: enums.Instance.Type
+:param instance_type: (optional) The type of the instance.
+:type instance_display_name: str
+:param instance_display_name: (optional) Human-readable name of the 
instance.
+Defaults to ``instance_id``.
+:type instance_labels: dict
+:param instance_labels: (optional) Dictionary of labels to associate 
with the instance.
+:type cluster_nodes: int
+:param cluste

[GitHub] kaxil commented on a change in pull request #4354: [AIRFLOW-3446] Add Google Cloud BigTable operators

2019-01-02 Thread GitBox
kaxil commented on a change in pull request #4354: [AIRFLOW-3446] Add Google 
Cloud BigTable operators
URL: https://github.com/apache/incubator-airflow/pull/4354#discussion_r244886449
 
 

 ##
 File path: docs/howto/operator.rst
 ##
 @@ -361,6 +361,135 @@ More information
 See `Google Compute Engine API documentation
 
`_.
 
+Google Cloud Bigtable Operators
+
+
+Arguments
+"
+
+All examples below rely on the following variables, which can be passed via 
environment variables.
+
+.. literalinclude:: 
../../airflow/contrib/example_dags/example_gcp_bigtable_operators.py
+:language: python
+:start-after: [START howto_operator_gcp_bigtable_args]
+:end-before: [END howto_operator_gcp_bigtable_args]
+
+
+BigtableInstanceCreateOperator
+^
 
 Review comment:
   Remove extra `^`


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] kaxil commented on a change in pull request #4354: [AIRFLOW-3446] Add Google Cloud BigTable operators

2019-01-02 Thread GitBox
kaxil commented on a change in pull request #4354: [AIRFLOW-3446] Add Google 
Cloud BigTable operators
URL: https://github.com/apache/incubator-airflow/pull/4354#discussion_r244886010
 
 

 ##
 File path: airflow/contrib/hooks/gcp_bigtable_hook.py
 ##
 @@ -0,0 +1,232 @@
+# -*- coding: utf-8 -*-
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from google.cloud.bigtable import Client
+from google.cloud.bigtable.cluster import Cluster
+from google.cloud.bigtable.instance import Instance
+from google.cloud.bigtable.table import Table
+from google.cloud.bigtable_admin_v2 import enums
+from airflow.contrib.hooks.gcp_api_base_hook import GoogleCloudBaseHook
+
+
+# noinspection PyAbstractClass
+class BigtableHook(GoogleCloudBaseHook):
+"""
+Hook for Google Cloud Bigtable APIs.
+"""
+
+_client = None
+
+def __init__(self,
+ gcp_conn_id='google_cloud_default',
+ delegate_to=None):
+super(BigtableHook, self).__init__(gcp_conn_id, delegate_to)
+
+def get_client(self, project_id):
+if not self._client:
+self._client = Client(project=project_id, 
credentials=self._get_credentials(), admin=True)
+return self._client
+
+def get_instance(self, project_id, instance_id):
+"""
+Retrieves and returns the specified Cloud Bigtable instance if it 
exists.
+Otherwise, returns None.
+
+:param project_id: The ID of the GCP project.
+:type project_id: str
+:param instance_id: The ID of the Cloud Bigtable instance.
+:type instance_id: str
+"""
+
+client = self.get_client(project_id)
+
+instance = Instance(instance_id, client)
+if not instance.exists():
+return None
+return instance
+
+def delete_instance(self, project_id, instance_id):
+"""
+Deletes the specified Cloud Bigtable instance.
+Raises google.api_core.exceptions.NotFound if the Cloud Bigtable 
instance does not exist.
+
+:param project_id: The ID of the GCP project.
+:type project_id: str
+:param instance_id: The ID of the Cloud Bigtable instance.
+:type instance_id: str
+"""
+instance = Instance(instance_id, self.get_client(project_id))
+instance.delete()
+
+def create_instance(self,
+project_id,
+instance_id,
+main_cluster_id,
+main_cluster_zone,
+replica_cluster_id=None,
+replica_cluster_zone=None,
+instance_display_name=None,
+instance_type=enums.Instance.Type.TYPE_UNSPECIFIED,
+instance_labels=None,
+cluster_nodes=None,
+
cluster_storage_type=enums.StorageType.STORAGE_TYPE_UNSPECIFIED,
+timeout=None):
+"""
+Creates new instance.
+
+:type project_id: str
+:param project_id: The ID of the GCP project.
+:type instance_id: str
+:param instance_id: The ID for the new instance.
+:type main_cluster_id: str
+:param main_cluster_id: The ID for main cluster for the new instance.
+:type main_cluster_zone: str
+:param main_cluster_zone: The zone for main cluster.
+See https://cloud.google.com/bigtable/docs/locations for more 
details.
+:type replica_cluster_id: str
+:param replica_cluster_id: (optional) The ID for replica cluster for 
the new instance.
+:type replica_cluster_zone: str
+:param replica_cluster_zone: (optional)  The zone for replica cluster.
+:type instance_type: enums.Instance.Type
+:param instance_type: (optional) The type of the instance.
+:type instance_display_name: str
+:param instance_display_name: (optional) Human-readable name of the 
instance.
+Defaults to ``instance_id``.
+:type instance_labels: dict
+:param instance_labels: (optional) Dictionary of labels to associate 
with the instance.
+:type cluster_nodes: int
+:param cluste

[GitHub] kaxil commented on a change in pull request #4354: [AIRFLOW-3446] Add Google Cloud BigTable operators

2019-01-02 Thread GitBox
kaxil commented on a change in pull request #4354: [AIRFLOW-3446] Add Google 
Cloud BigTable operators
URL: https://github.com/apache/incubator-airflow/pull/4354#discussion_r244886482
 
 

 ##
 File path: docs/howto/operator.rst
 ##
 @@ -361,6 +361,135 @@ More information
 See `Google Compute Engine API documentation
 
`_.
 
+Google Cloud Bigtable Operators
+
+
+Arguments
+"
+
+All examples below rely on the following variables, which can be passed via 
environment variables.
+
+.. literalinclude:: 
../../airflow/contrib/example_dags/example_gcp_bigtable_operators.py
+:language: python
+:start-after: [START howto_operator_gcp_bigtable_args]
+:end-before: [END howto_operator_gcp_bigtable_args]
+
+
+BigtableInstanceCreateOperator
+^
+
+Use the 
:class:`~airflow.contrib.operators.gcp_bigtable_operator.BigtableInstanceCreateOperator`
+to create a Google Cloud Bigtable instance.
+
+If the Cloud Bigtable instance with the given ID exists, the operator does not 
compare its configuration
+and immediately succeeds. No changes are made to the existing instance.
+
+Using the operator
+""
+
+.. literalinclude:: 
../../airflow/contrib/example_dags/example_gcp_bigtable_operators.py
+:language: python
+:dedent: 4
+:start-after: [START howto_operator_gcp_bigtable_instance_create]
+:end-before: [END howto_operator_gcp_bigtable_instance_create]
+
+
+BigtableInstanceDeleteOperator
+^
 
 Review comment:
   remove extra `^`


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] kaxil commented on a change in pull request #4354: [AIRFLOW-3446] Add Google Cloud BigTable operators

2019-01-02 Thread GitBox
kaxil commented on a change in pull request #4354: [AIRFLOW-3446] Add Google 
Cloud BigTable operators
URL: https://github.com/apache/incubator-airflow/pull/4354#discussion_r244886593
 
 

 ##
 File path: docs/howto/operator.rst
 ##
 @@ -361,6 +361,135 @@ More information
 See `Google Compute Engine API documentation
 
`_.
 
+Google Cloud Bigtable Operators
+
+
+Arguments
+"
+
+All examples below rely on the following variables, which can be passed via 
environment variables.
+
+.. literalinclude:: 
../../airflow/contrib/example_dags/example_gcp_bigtable_operators.py
+:language: python
+:start-after: [START howto_operator_gcp_bigtable_args]
+:end-before: [END howto_operator_gcp_bigtable_args]
+
+
+BigtableInstanceCreateOperator
+^
+
+Use the 
:class:`~airflow.contrib.operators.gcp_bigtable_operator.BigtableInstanceCreateOperator`
+to create a Google Cloud Bigtable instance.
+
+If the Cloud Bigtable instance with the given ID exists, the operator does not 
compare its configuration
+and immediately succeeds. No changes are made to the existing instance.
+
+Using the operator
+""
+
+.. literalinclude:: 
../../airflow/contrib/example_dags/example_gcp_bigtable_operators.py
+:language: python
+:dedent: 4
+:start-after: [START howto_operator_gcp_bigtable_instance_create]
+:end-before: [END howto_operator_gcp_bigtable_instance_create]
+
+
+BigtableInstanceDeleteOperator
+^
+
+Use the 
:class:`~airflow.contrib.operators.gcp_bigtable_operator.BigtableInstanceDeleteOperator`
+to delete a Google Cloud Bigtable instance.
+
+Using the operator
+""
+
+.. literalinclude:: 
../../airflow/contrib/example_dags/example_gcp_bigtable_operators.py
+:language: python
+:dedent: 4
+:start-after: [START howto_operator_gcp_bigtable_instance_delete]
+:end-before: [END howto_operator_gcp_bigtable_instance_delete]
+
+BigtableClusterUpdateOperator
+^
+
+Use the 
:class:`~airflow.contrib.operators.gcp_bigtable_operator.BigtableClusterUpdateOperator`
+to modify number of nodes in a Cloud Bigtable cluster.
+
+Using the operator
+""
+
+.. literalinclude:: 
../../airflow/contrib/example_dags/example_gcp_bigtable_operators.py
+:language: python
+:dedent: 4
+:start-after: [START howto_operator_gcp_bigtable_cluster_update]
+:end-before: [END howto_operator_gcp_bigtable_cluster_update]
+
+
+BigtableTableCreateOperator
+^
+
+Creates a table in a Cloud Bigtable instance.
+
+If the table with given ID exists in the Cloud Bigtable instance, the operator 
compares the Column Families.
+If the Column Families are identical operator succeeds. Otherwise, the 
operator fails with the appropriate
+error message.
+
+
+Using the operator
+""
+
+.. literalinclude:: 
../../airflow/contrib/example_dags/example_gcp_bigtable_operators.py
+:language: python
+:dedent: 4
+:start-after: [START howto_operator_gcp_bigtable_table_create]
+:end-before: [END howto_operator_gcp_bigtable_table_create]
+
+Advanced
+
+
+When creating a table, you can specify the optional ``initial_split_keys`` and 
``column_familes`.
+Please refer to the Python Client for Google Cloud Bigtable documentation
+`for Table 
`_ 
and `for Column
+Families 
`_.
+
+
+BigtableTableDeleteOperator
+^
 
 Review comment:
   remove extra `^`


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] kaxil commented on a change in pull request #4354: [AIRFLOW-3446] Add Google Cloud BigTable operators

2019-01-02 Thread GitBox
kaxil commented on a change in pull request #4354: [AIRFLOW-3446] Add Google 
Cloud BigTable operators
URL: https://github.com/apache/incubator-airflow/pull/4354#discussion_r244886346
 
 

 ##
 File path: airflow/contrib/operators/gcp_bigtable_operator.py
 ##
 @@ -0,0 +1,424 @@
+# -*- coding: utf-8 -*-
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import google.api_core.exceptions
+
+from airflow import AirflowException
+from airflow.models import BaseOperator
+from airflow.sensors.base_sensor_operator import BaseSensorOperator
+from airflow.contrib.hooks.gcp_bigtable_hook import BigtableHook
+from airflow.utils.decorators import apply_defaults
+from google.cloud.bigtable_admin_v2 import enums
+from google.cloud.bigtable.table import ClusterState
+
+
+class BigtableValidationMixin(object):
+"""
+Common class for Cloud Bigtable operators for validating required fields.
+"""
+
+REQUIRED_ATTRIBUTES = []
+
+def _validate_inputs(self):
+for attr_name in self.REQUIRED_ATTRIBUTES:
+if not getattr(self, attr_name):
+raise AirflowException('Empty parameter: {}'.format(attr_name))
+
+
+class BigtableInstanceCreateOperator(BaseOperator, BigtableValidationMixin):
+"""
+Creates a new Cloud Bigtable instance.
+If the Cloud Bigtable instance with the given ID exists, the operator does 
not compare its configuration
+and immediately succeeds. No changes are made to the existing instance.
+
+For more details about instance creation have a look at the reference:
+
https://googleapis.github.io/google-cloud-python/latest/bigtable/instance.html#google.cloud.bigtable.instance.Instance.create
+
+:type project_id: str
+:param project_id: The ID of the GCP project.
+:type instance_id: str
+:param instance_id: The ID of the Cloud Bigtable instance to create.
+:type main_cluster_id: str
+:param main_cluster_id: The ID for main cluster for the new instance.
+:type main_cluster_zone: str
+:param main_cluster_zone: The zone for main cluster
+See https://cloud.google.com/bigtable/docs/locations for more details.
+:type replica_cluster_id: str
+:param replica_cluster_id: (optional) The ID for replica cluster for the 
new instance.
+:type replica_cluster_zone: str
+:param replica_cluster_zone: (optional)  The zone for replica cluster.
+:type instance_type: IntEnum
+:param instance_type: (optional) The type of the instance.
+:type instance_display_name: str
+:param instance_display_name: (optional) Human-readable name of the 
instance. Defaults to ``instance_id``.
+:type instance_labels: dict
+:param instance_labels: (optional) Dictionary of labels to associate with 
the instance.
+:type cluster_nodes: int
+:param cluster_nodes: (optional) Number of nodes for cluster.
+:type cluster_storage_type: IntEnum
+:param cluster_storage_type: (optional) The type of storage.
+:type timeout: int
+:param timeout: (optional) timeout (in seconds) for instance creation.
+If None is not specified, Operator will wait indefinitely.
+"""
+
+REQUIRED_ATTRIBUTES = ('project_id', 'instance_id', 'main_cluster_id', 
'main_cluster_zone')
+template_fields = ['project_id', 'instance_id', 'main_cluster_id', 
'main_cluster_zone']
+
+@apply_defaults
+def __init__(self,
+ project_id,
+ instance_id,
+ main_cluster_id,
+ main_cluster_zone,
+ replica_cluster_id=None,
+ replica_cluster_zone=None,
+ instance_display_name=None,
+ instance_type=None,
+ instance_labels=None,
+ cluster_nodes=None,
+ cluster_storage_type=None,
+ timeout=None,
+ *args, **kwargs):
+self.project_id = project_id
+self.instance_id = instance_id
+self.main_cluster_id = main_cluster_id
+self.main_cluster_zone = main_cluster_zone
+self.replica_cluster_id = replica_cluster_id
+self.replica_cluster_zone = replica_cluster_zone
+self.instance_display_name = instance_d