[GitHub] [airflow] turbaszek commented on a change in pull request #8227: Add run_type to DagRun

2020-06-03 Thread GitBox


turbaszek commented on a change in pull request #8227:
URL: https://github.com/apache/airflow/pull/8227#discussion_r434518469



##
File path: airflow/models/dag.py
##
@@ -1468,14 +1472,28 @@ def create_dagrun(self,
 :param session: database session
 :type session: sqlalchemy.orm.session.Session
 """
+if run_id:

Review comment:
   Done





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [airflow] turbaszek commented on a change in pull request #8227: Add run_type to DagRun

2020-06-03 Thread GitBox


turbaszek commented on a change in pull request #8227:
URL: https://github.com/apache/airflow/pull/8227#discussion_r434517922



##
File path: airflow/migrations/versions/3c20cacc0044_add_dagrun_run_type.py
##
@@ -0,0 +1,58 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""
+Add DagRun run_type
+
+Revision ID: 3c20cacc0044
+Revises: 952da73b5eff
+Create Date: 2020-04-08 13:35:25.671327
+
+"""
+
+import sqlalchemy as sa
+from alembic import op
+
+from airflow.models import DagRun
+from airflow.utils.types import DagRunType
+
+# revision identifiers, used by Alembic.
+revision = "3c20cacc0044"
+down_revision = "952da73b5eff"
+branch_labels = None
+depends_on = None
+
+
+def upgrade():
+"""Apply Add DagRun run_type"""
+op.add_column("dag_run", sa.Column("run_type", sa.String(length=50), 
nullable=True))
+
+connection = op.get_bind()
+sessionmaker = sa.orm.sessionmaker()
+session = sessionmaker(bind=connection)
+
+for run_type in DagRunType:
+
session.query(DagRun).filter(DagRun.run_id.like(f"{run_type.value}__%")).update(
+{DagRun.run_type: run_type.value}, synchronize_session=False
+)

Review comment:
   Done 





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [airflow] turbaszek commented on a change in pull request #8227: Add run_type to DagRun

2020-06-02 Thread GitBox


turbaszek commented on a change in pull request #8227:
URL: https://github.com/apache/airflow/pull/8227#discussion_r433915534



##
File path: airflow/models/dag.py
##
@@ -1468,14 +1472,28 @@ def create_dagrun(self,
 :param session: database session
 :type session: sqlalchemy.orm.session.Session
 """
+if run_id:

Review comment:
   Will add it once we agree on index. I want to avoid another multiple 
rebases  





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [airflow] turbaszek commented on a change in pull request #8227: Add run_type to DagRun

2020-06-02 Thread GitBox


turbaszek commented on a change in pull request #8227:
URL: https://github.com/apache/airflow/pull/8227#discussion_r433915046



##
File path: airflow/api/common/experimental/trigger_dag.py
##
@@ -68,15 +68,17 @@ def _trigger_dag(
 execution_date.isoformat(),
 min_dag_start_date.isoformat()))
 
+run_type: Optional[DagRunType] = None
 if not run_id:
-run_id = f"{DagRunType.MANUAL.value}__{execution_date.isoformat()}"
-
-dag_run_id = dag_run.find(dag_id=dag_id, run_id=run_id)
-if dag_run_id:
-raise DagRunAlreadyExists("Run id {} already exists for dag id 
{}".format(
-run_id,
-dag_id
-))
+run_type = DagRunType.MANUAL
+dag_run = dag_run.find(dag_id=dag_id, run_type=run_type, 
execution_date=execution_date)

Review comment:
   I will fix it





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [airflow] turbaszek commented on a change in pull request #8227: Add run_type to DagRun

2020-05-28 Thread GitBox


turbaszek commented on a change in pull request #8227:
URL: https://github.com/apache/airflow/pull/8227#discussion_r431629432



##
File path: airflow/models/dag.py
##
@@ -1468,14 +1472,26 @@ def create_dagrun(self,
 :param session: database session
 :type session: sqlalchemy.orm.session.Session
 """
+if run_id:
+if not isinstance(run_id, str):
+raise ValueError(f"`run_id` expected to be a str is 
{type(run_id)}")
+run_type: DagRunType = DagRunType.from_run_id(run_id)
+elif run_type and execution_date:
+run_id = DagRun.generate_run_id(run_type, execution_date)
+elif not run_id:
+raise AirflowException(
+"Creating DagRun needs either `run_id` or `run_type` and 
`execution_date`"
+)
+
 run = DagRun(
 dag_id=self.dag_id,
 run_id=run_id,
 execution_date=execution_date,
 start_date=start_date,
 external_trigger=external_trigger,
 conf=conf,
-state=state
+state=state,
+run_type=run_type.value,

Review comment:
   Added  





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [airflow] turbaszek commented on a change in pull request #8227: Add run_type to DagRun

2020-05-28 Thread GitBox


turbaszek commented on a change in pull request #8227:
URL: https://github.com/apache/airflow/pull/8227#discussion_r431627592



##
File path: airflow/models/dagrun.py
##
@@ -54,25 +54,27 @@ class DagRun(Base, LoggingMixin):
 _state = Column('state', String(50), default=State.RUNNING)
 run_id = Column(String(ID_LEN))
 external_trigger = Column(Boolean, default=True)
+run_type = Column(String(50), nullable=True)
 conf = Column(PickleType)
 
 dag = None
 
 __table_args__ = (
-Index('dag_id_state', dag_id, _state),
+Index('dag_id_state_type', dag_id, _state, run_type),

Review comment:
   @kaxil do you suggest single index `Index('dag_run_type', run_type)`? I 
must admit that I was running some tests with and without 
`Index('dag_id_state_type', dag_id, _state, run_type)` but there was no big 
difference 





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [airflow] turbaszek commented on a change in pull request #8227: Add run_type to DagRun

2020-05-27 Thread GitBox


turbaszek commented on a change in pull request #8227:
URL: https://github.com/apache/airflow/pull/8227#discussion_r431311590



##
File path: airflow/models/dag.py
##
@@ -1468,14 +1472,26 @@ def create_dagrun(self,
 :param session: database session
 :type session: sqlalchemy.orm.session.Session
 """
+if run_id:
+if not isinstance(run_id, str):
+raise ValueError(f"`run_id` expected to be a str is 
{type(run_id)}")
+run_type: DagRunType = DagRunType.from_run_id(run_id)
+elif run_type and execution_date:
+run_id = DagRun.generate_run_id(run_type, execution_date)
+elif not run_id:
+raise AirflowException(
+"Creating DagRun needs either `run_id` or `run_type` and 
`execution_date`"
+)
+
 run = DagRun(
 dag_id=self.dag_id,
 run_id=run_id,
 execution_date=execution_date,
 start_date=start_date,
 external_trigger=external_trigger,
 conf=conf,
-state=state
+state=state,
+run_type=run_type.value,

Review comment:
   Yes. I think it's a good idea because the run types are limited to the 
`DagRunType`. Do you have any concerns about that? 





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [airflow] turbaszek commented on a change in pull request #8227: Add run_type to DagRun

2020-05-14 Thread GitBox


turbaszek commented on a change in pull request #8227:
URL: https://github.com/apache/airflow/pull/8227#discussion_r424974515



##
File path: airflow/utils/types.py
##
@@ -22,3 +22,13 @@ class DagRunType(enum.Enum):
 BACKFILL_JOB = "backfill"
 SCHEDULED = "scheduled"
 MANUAL = "manual"
+
+@staticmethod
+def resolve_run_type(run_id: str) -> "DagRunType":

Review comment:
   Agree, done





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [airflow] turbaszek commented on a change in pull request #8227: Add run_type to DagRun

2020-05-14 Thread GitBox


turbaszek commented on a change in pull request #8227:
URL: https://github.com/apache/airflow/pull/8227#discussion_r424974304



##
File path: airflow/utils/types.py
##
@@ -22,3 +22,13 @@ class DagRunType(enum.Enum):
 BACKFILL_JOB = "backfill"
 SCHEDULED = "scheduled"
 MANUAL = "manual"
+
+@staticmethod
+def resolve_run_type(run_id: str) -> "DagRunType":
+"""
+Resolved DagRun type from run_id.
+"""
+for run_type in DagRunType:
+if run_id.startswith(run_type.value):

Review comment:
   Done now





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [airflow] turbaszek commented on a change in pull request #8227: Add run_type to DagRun

2020-05-14 Thread GitBox


turbaszek commented on a change in pull request #8227:
URL: https://github.com/apache/airflow/pull/8227#discussion_r424974138



##
File path: airflow/migrations/versions/3c20cacc0044_add_dagrun_run_type.py
##
@@ -0,0 +1,93 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""
+Add DagRun run_type
+
+Revision ID: 3c20cacc0044
+Revises: 952da73b5eff
+Create Date: 2020-04-08 13:35:25.671327
+
+"""
+
+import sqlalchemy as sa
+from alembic import op
+from sqlalchemy import Boolean, Column, Integer, PickleType, String
+from sqlalchemy.engine.reflection import Inspector
+from sqlalchemy.ext.declarative import declarative_base
+
+from airflow.models.base import ID_LEN
+from airflow.utils import timezone
+from airflow.utils.sqlalchemy import UtcDateTime
+from airflow.utils.state import State
+from airflow.utils.types import DagRunType
+
+# revision identifiers, used by Alembic.
+revision = "3c20cacc0044"
+down_revision = "952da73b5eff"
+branch_labels = None
+depends_on = None
+
+Base = declarative_base()
+
+
+class DagRun(Base):
+"""
+DagRun describes an instance of a Dag. It can be created
+by the scheduler (for regular runs) or by an external trigger
+"""
+__tablename__ = "dag_run"
+
+id = Column(Integer, primary_key=True)
+dag_id = Column(String(ID_LEN))
+execution_date = Column(UtcDateTime, default=timezone.utcnow)
+start_date = Column(UtcDateTime, default=timezone.utcnow)
+end_date = Column(UtcDateTime)
+_state = Column('state', String(50), default=State.RUNNING)
+run_id = Column(String(ID_LEN))
+external_trigger = Column(Boolean, default=True)
+run_type = Column(String(50))
+conf = Column(PickleType)
+
+
+def upgrade():
+"""Apply Add DagRun run_type"""
+op.add_column("dag_run", sa.Column("run_type", sa.String(length=50), 
nullable=True))
+
+connection = op.get_bind()
+sessionmaker = sa.orm.sessionmaker()
+session = sessionmaker(bind=connection)
+inspector = Inspector.from_engine(connection)
+tables = inspector.get_table_names()
+
+if 'dag_run' in tables:
+for run_type in DagRunType:
+
session.query(DagRun).filter(DagRun.run_id.like(f"{run_type.value}__%")).update(
+{DagRun.run_type: run_type.value}, synchronize_session=False
+)
+session.commit()

Review comment:
   Done





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [airflow] turbaszek commented on a change in pull request #8227: Add run_type to DagRun

2020-05-14 Thread GitBox


turbaszek commented on a change in pull request #8227:
URL: https://github.com/apache/airflow/pull/8227#discussion_r424973201



##
File path: airflow/migrations/versions/3c20cacc0044_add_dagrun_run_type.py
##
@@ -0,0 +1,93 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""
+Add DagRun run_type
+
+Revision ID: 3c20cacc0044
+Revises: 952da73b5eff
+Create Date: 2020-04-08 13:35:25.671327
+
+"""
+
+import sqlalchemy as sa
+from alembic import op
+from sqlalchemy import Boolean, Column, Integer, PickleType, String
+from sqlalchemy.engine.reflection import Inspector
+from sqlalchemy.ext.declarative import declarative_base
+
+from airflow.models.base import ID_LEN
+from airflow.utils import timezone
+from airflow.utils.sqlalchemy import UtcDateTime
+from airflow.utils.state import State
+from airflow.utils.types import DagRunType
+
+# revision identifiers, used by Alembic.
+revision = "3c20cacc0044"
+down_revision = "952da73b5eff"
+branch_labels = None
+depends_on = None
+
+Base = declarative_base()
+
+
+class DagRun(Base):
+"""
+DagRun describes an instance of a Dag. It can be created
+by the scheduler (for regular runs) or by an external trigger
+"""
+__tablename__ = "dag_run"
+
+id = Column(Integer, primary_key=True)
+dag_id = Column(String(ID_LEN))
+execution_date = Column(UtcDateTime, default=timezone.utcnow)
+start_date = Column(UtcDateTime, default=timezone.utcnow)
+end_date = Column(UtcDateTime)
+_state = Column('state', String(50), default=State.RUNNING)
+run_id = Column(String(ID_LEN))
+external_trigger = Column(Boolean, default=True)
+run_type = Column(String(50))
+conf = Column(PickleType)
+
+
+def upgrade():
+"""Apply Add DagRun run_type"""
+op.add_column("dag_run", sa.Column("run_type", sa.String(length=50), 
nullable=True))
+
+connection = op.get_bind()
+sessionmaker = sa.orm.sessionmaker()
+session = sessionmaker(bind=connection)
+inspector = Inspector.from_engine(connection)
+tables = inspector.get_table_names()
+
+if 'dag_run' in tables:
+for run_type in DagRunType:
+
session.query(DagRun).filter(DagRun.run_id.like(f"{run_type.value}__%")).update(
+{DagRun.run_type: run_type.value}, synchronize_session=False
+)
+session.commit()
+
+session.query(DagRun).filter(DagRun.run_type.is_(None)).update(
+{DagRun.run_type: DagRunType.MANUAL.value}, 
synchronize_session=False
+)
+session.commit()

Review comment:
   Should we also make it not nullable in model?





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [airflow] turbaszek commented on a change in pull request #8227: Add run_type to DagRun

2020-05-14 Thread GitBox


turbaszek commented on a change in pull request #8227:
URL: https://github.com/apache/airflow/pull/8227#discussion_r424964360



##
File path: airflow/utils/types.py
##
@@ -22,3 +22,13 @@ class DagRunType(enum.Enum):
 BACKFILL_JOB = "backfill"
 SCHEDULED = "scheduled"
 MANUAL = "manual"
+
+@staticmethod
+def resolve_run_type(run_id: str) -> "DagRunType":
+"""
+Resolved DagRun type from run_id.
+"""
+for run_type in DagRunType:
+if run_id.startswith(run_type.value):

Review comment:
   I thought I fiexd it 





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [airflow] turbaszek commented on a change in pull request #8227: Add run_type to DagRun

2020-05-06 Thread GitBox


turbaszek commented on a change in pull request #8227:
URL: https://github.com/apache/airflow/pull/8227#discussion_r420839021



##
File path: airflow/migrations/versions/3c20cacc0044_add_dagrun_run_type.py
##
@@ -0,0 +1,101 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""
+Add DagRun run_type
+
+Revision ID: 3c20cacc0044
+Revises: 952da73b5eff
+Create Date: 2020-04-08 13:35:25.671327
+
+"""
+
+import sqlalchemy as sa
+from alembic import op
+from sqlalchemy import Boolean, Column, Integer, PickleType, String
+from sqlalchemy.engine.reflection import Inspector
+from sqlalchemy.ext.declarative import declarative_base
+
+from airflow.models.base import ID_LEN
+from airflow.utils import timezone
+from airflow.utils.sqlalchemy import UtcDateTime
+from airflow.utils.state import State
+from airflow.utils.types import DagRunType
+
+# revision identifiers, used by Alembic.
+revision = "3c20cacc0044"
+down_revision = "952da73b5eff"
+branch_labels = None
+depends_on = None
+
+Base = declarative_base()
+
+
+class DagRun(Base):
+"""
+DagRun describes an instance of a Dag. It can be created
+by the scheduler (for regular runs) or by an external trigger
+"""
+__tablename__ = "dag_run"
+
+id = Column(Integer, primary_key=True)
+dag_id = Column(String(ID_LEN))
+execution_date = Column(UtcDateTime, default=timezone.utcnow)
+start_date = Column(UtcDateTime, default=timezone.utcnow)
+end_date = Column(UtcDateTime)
+_state = Column('state', String(50), default=State.RUNNING)
+run_id = Column(String(ID_LEN))
+external_trigger = Column(Boolean, default=True)
+run_type = Column(String(50))
+conf = Column(PickleType)
+
+
+def upgrade():
+"""Apply Add DagRun run_type"""
+op.add_column("dag_run", sa.Column("run_type", sa.String(length=50), 
nullable=True))
+op.drop_constraint("dag_run_dag_id_execution_date_key", "dag_run", 
"unique")
+op.create_unique_constraint(None, "dag_run", ('dag_id', 'execution_date', 
'run_type'))

Review comment:
   True, fixed 





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [airflow] turbaszek commented on a change in pull request #8227: Add run_type to DagRun

2020-05-06 Thread GitBox


turbaszek commented on a change in pull request #8227:
URL: https://github.com/apache/airflow/pull/8227#discussion_r420827111



##
File path: UPDATING.md
##
@@ -62,6 +62,41 @@ https://developers.google.com/style/inclusive-documentation
 
 -->
 
+### DAG.create_dagrun accepts run_type and does not require run_id
+This change is caused by adding `run_type` column to `DagRun`.
+
+Previous signature:
+```python
+def create_dagrun(self,
+  run_id,
+  state,
+  execution_date=None,
+  start_date=None,
+  external_trigger=False,
+  conf=None,
+  session=None):
+```
+current:
+```python
+def create_dagrun(self,
+  state,
+  execution_date=None,
+  run_id=None,
+  start_date=None,
+  external_trigger=False,
+  conf=None,
+  run_type=None,

Review comment:
   And that's checked:
   ```python
   elif not run_id:
raise AirflowException(
 "Creating DagRun needs either `run_id` or `run_type` and 
`execution_date`"
)
   ```





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [airflow] turbaszek commented on a change in pull request #8227: Add run_type to DagRun

2020-04-24 Thread GitBox


turbaszek commented on a change in pull request #8227:
URL: https://github.com/apache/airflow/pull/8227#discussion_r414380461



##
File path: airflow/migrations/versions/3c20cacc0044_add_dagrun_run_type.py
##
@@ -0,0 +1,101 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""
+Add DagRun run_type
+
+Revision ID: 3c20cacc0044
+Revises: 952da73b5eff
+Create Date: 2020-04-08 13:35:25.671327
+
+"""
+
+import sqlalchemy as sa
+from alembic import op
+from sqlalchemy import Boolean, Column, Integer, PickleType, String
+from sqlalchemy.engine.reflection import Inspector
+from sqlalchemy.ext.declarative import declarative_base
+
+from airflow.models.base import ID_LEN
+from airflow.utils import timezone
+from airflow.utils.sqlalchemy import UtcDateTime
+from airflow.utils.state import State
+from airflow.utils.types import DagRunType
+
+# revision identifiers, used by Alembic.
+revision = "3c20cacc0044"
+down_revision = "952da73b5eff"
+branch_labels = None
+depends_on = None
+
+Base = declarative_base()
+
+
+class DagRun(Base):
+"""
+DagRun describes an instance of a Dag. It can be created
+by the scheduler (for regular runs) or by an external trigger
+"""
+__tablename__ = "dag_run"
+
+id = Column(Integer, primary_key=True)
+dag_id = Column(String(ID_LEN))
+execution_date = Column(UtcDateTime, default=timezone.utcnow)
+start_date = Column(UtcDateTime, default=timezone.utcnow)
+end_date = Column(UtcDateTime)
+_state = Column('state', String(50), default=State.RUNNING)
+run_id = Column(String(ID_LEN))
+external_trigger = Column(Boolean, default=True)
+run_type = Column(String(50))
+conf = Column(PickleType)
+
+
+def upgrade():
+"""Apply Add DagRun run_type"""
+op.add_column("dag_run", sa.Column("run_type", sa.String(length=50), 
nullable=True))
+op.drop_constraint("dag_run_dag_id_execution_date_key", "dag_run", 
"unique")
+op.create_unique_constraint(None, "dag_run", ('dag_id', 'execution_date', 
'run_type'))
+op.drop_index('dag_id_state', table_name='dag_run')
+op.create_index('dag_id_state_run_type', 'dag_run', ['dag_id', 'state', 
'run_type'], unique=False)
+
+connection = op.get_bind()
+sessionmaker = sa.orm.sessionmaker()
+session = sessionmaker(bind=connection)
+inspector = Inspector.from_engine(connection)
+tables = inspector.get_table_names()
+
+if 'dag_run' in tables:

Review comment:
   I agree I've just followed an existing pattern 





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [airflow] turbaszek commented on a change in pull request #8227: Add run_type to DagRun

2020-04-09 Thread GitBox
turbaszek commented on a change in pull request #8227: Add run_type to DagRun
URL: https://github.com/apache/airflow/pull/8227#discussion_r406112990
 
 

 ##
 File path: airflow/api/common/experimental/mark_tasks.py
 ##
 @@ -48,11 +48,11 @@ def _create_dagruns(dag, execution_dates, state, run_type):
 
 for date in dates_to_create:
 dag_run = dag.create_dagrun(
-run_id=f"{run_type}__{date.isoformat()}",
 
 Review comment:
   Obtaining run_type from run_id may help because then we will always have 
run_type thus it can be used more effectively in index


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] [airflow] turbaszek commented on a change in pull request #8227: Add run_type to DagRun

2020-04-09 Thread GitBox
turbaszek commented on a change in pull request #8227: Add run_type to DagRun
URL: https://github.com/apache/airflow/pull/8227#discussion_r406112369
 
 

 ##
 File path: airflow/migrations/versions/3c20cacc0044_add_dagrun_run_type.py
 ##
 @@ -0,0 +1,58 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""
+Add DagRun run_type
+
+Revision ID: 3c20cacc0044
+Revises: 952da73b5eff
+Create Date: 2020-04-08 13:35:25.671327
+
+"""
+
+import sqlalchemy as sa
+from alembic import op
+
+from airflow.models import DagRun
 
 Review comment:
   Thanks!


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] [airflow] turbaszek commented on a change in pull request #8227: Add run_type to DagRun

2020-04-09 Thread GitBox
turbaszek commented on a change in pull request #8227: Add run_type to DagRun
URL: https://github.com/apache/airflow/pull/8227#discussion_r406111488
 
 

 ##
 File path: airflow/api/common/experimental/mark_tasks.py
 ##
 @@ -48,11 +48,11 @@ def _create_dagruns(dag, execution_dates, state, run_type):
 
 for date in dates_to_create:
 dag_run = dag.create_dagrun(
-run_id=f"{run_type}__{date.isoformat()}",
 
 Review comment:
   Now I'm wondering, should we set the run_type using run_id? In example by 
checking the prefix. WDYT?


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] [airflow] turbaszek commented on a change in pull request #8227: Add run_type to DagRun

2020-04-09 Thread GitBox
turbaszek commented on a change in pull request #8227: Add run_type to DagRun
URL: https://github.com/apache/airflow/pull/8227#discussion_r406107829
 
 

 ##
 File path: airflow/api/common/experimental/mark_tasks.py
 ##
 @@ -48,11 +48,11 @@ def _create_dagruns(dag, execution_dates, state, run_type):
 
 for date in dates_to_create:
 dag_run = dag.create_dagrun(
-run_id=f"{run_type}__{date.isoformat()}",
 
 Review comment:
   @ashb you can provide `run_id` or `run_type + execution_date`.  If you don't 
provide `run_id` it will be autogenerated. In most places, the `run_id` was 
created in the following way: `{run_type}__{execution_date.isoformat()}` 


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] [airflow] turbaszek commented on a change in pull request #8227: Add run_type to DagRun

2020-04-09 Thread GitBox
turbaszek commented on a change in pull request #8227: Add run_type to DagRun
URL: https://github.com/apache/airflow/pull/8227#discussion_r406102543
 
 

 ##
 File path: airflow/models/dagrun.py
 ##
 @@ -65,14 +66,15 @@ class DagRun(Base, LoggingMixin):
 )
 
 def __init__(self, dag_id=None, run_id=None, execution_date=None, 
start_date=None, external_trigger=None,
- conf=None, state=None):
+ conf=None, state=None, run_type=None):
 self.dag_id = dag_id
 self.run_id = run_id
 self.execution_date = execution_date
 self.start_date = start_date
 self.external_trigger = external_trigger
 self.conf = conf
 self.state = state
+self.run_type = run_type
 
 Review comment:
   Probably it's worth to use this column in index as mentioned in the original 
issue


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] [airflow] turbaszek commented on a change in pull request #8227: Add run_type to DagRun

2020-04-09 Thread GitBox
turbaszek commented on a change in pull request #8227: Add run_type to DagRun
URL: https://github.com/apache/airflow/pull/8227#discussion_r406102543
 
 

 ##
 File path: airflow/models/dagrun.py
 ##
 @@ -65,14 +66,15 @@ class DagRun(Base, LoggingMixin):
 )
 
 def __init__(self, dag_id=None, run_id=None, execution_date=None, 
start_date=None, external_trigger=None,
- conf=None, state=None):
+ conf=None, state=None, run_type=None):
 self.dag_id = dag_id
 self.run_id = run_id
 self.execution_date = execution_date
 self.start_date = start_date
 self.external_trigger = external_trigger
 self.conf = conf
 self.state = state
+self.run_type = run_type
 
 Review comment:
   Probably it's worth to use this column in index


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services