[ https://issues.apache.org/jira/browse/AIRFLOW-4134?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Daniel Standish updated AIRFLOW-4134: ------------------------------------- Description: I am finding with 1.10.2 that I seem to get a warning {{DB connection invalidated. Reconnecting...}} very frequently. I to try to diagnose I added logging of the triggering error on line 79 in airflow/utils/sqlalchemy.py, from which this warning is generated. Call stack: {code} webserver_1 | Call stack: webserver_1 | File "/usr/local/bin/airflow", line 32, in <module> webserver_1 | args.func(args) webserver_1 | File "/usr/local/lib/python3.6/site-packages/airflow/utils/cli.py", line 74, in wrapper webserver_1 | return f(*args, **kwargs) webserver_1 | File "/usr/local/lib/python3.6/site-packages/airflow/bin/cli.py", line 992, in scheduler webserver_1 | job.run() webserver_1 | File "/usr/local/lib/python3.6/site-packages/airflow/jobs.py", line 205, in run webserver_1 | self._execute() webserver_1 | File "/usr/local/lib/python3.6/site-packages/airflow/jobs.py", line 1532, in _execute webserver_1 | self._execute_helper() webserver_1 | File "/usr/local/lib/python3.6/site-packages/airflow/jobs.py", line 1562, in _execute_helper webserver_1 | self.processor_agent.start() webserver_1 | File "/usr/local/lib/python3.6/site-packages/airflow/utils/dag_processing.py", line 511, in start webserver_1 | self._async_mode) webserver_1 | File "/usr/local/lib/python3.6/site-packages/airflow/utils/dag_processing.py", line 565, in _launch_process webserver_1 | p.start() webserver_1 | File "/usr/local/lib/python3.6/multiprocessing/process.py", line 105, in start webserver_1 | self._popen = self._Popen(self) webserver_1 | File "/usr/local/lib/python3.6/multiprocessing/context.py", line 223, in _Popen webserver_1 | return _default_context.get_context().Process._Popen(process_obj) webserver_1 | File "/usr/local/lib/python3.6/multiprocessing/context.py", line 277, in _Popen webserver_1 | return Popen(process_obj) webserver_1 | File "/usr/local/lib/python3.6/multiprocessing/popen_fork.py", line 19, in __init__ webserver_1 | self._launch(process_obj) webserver_1 | File "/usr/local/lib/python3.6/multiprocessing/popen_fork.py", line 73, in _launch webserver_1 | code = process_obj._bootstrap() webserver_1 | File "/usr/local/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap webserver_1 | self.run() webserver_1 | File "/usr/local/lib/python3.6/multiprocessing/process.py", line 93, in run webserver_1 | self._target(*self._args, **self._kwargs) webserver_1 | File "/usr/local/lib/python3.6/site-packages/airflow/utils/dag_processing.py", line 560, in helper webserver_1 | processor_manager.start() webserver_1 | File "/usr/local/lib/python3.6/site-packages/airflow/utils/dag_processing.py", line 797, in start webserver_1 | self.start_in_async() webserver_1 | File "/usr/local/lib/python3.6/site-packages/airflow/utils/dag_processing.py", line 820, in start_in_async webserver_1 | simple_dags = self.heartbeat() webserver_1 | File "/usr/local/lib/python3.6/site-packages/airflow/utils/dag_processing.py", line 1190, in heartbeat webserver_1 | zombies = self._find_zombies() webserver_1 | File "/usr/local/lib/python3.6/site-packages/airflow/utils/db.py", line 73, in wrapper webserver_1 | return func(*args, **kwargs) webserver_1 | File "/usr/local/lib/python3.6/site-packages/airflow/utils/dag_processing.py", line 1236, in _find_zombies webserver_1 | LJ.latest_heartbeat < limit_dttm, webserver_1 | File "/usr/local/lib/python3.6/site-packages/sqlalchemy/orm/query.py", line 2925, in all webserver_1 | return list(self) webserver_1 | File "/usr/local/lib/python3.6/site-packages/sqlalchemy/orm/query.py", line 3081, in __iter__ webserver_1 | return self._execute_and_instances(context) webserver_1 | File "/usr/local/lib/python3.6/site-packages/sqlalchemy/orm/query.py", line 3103, in _execute_and_instances webserver_1 | querycontext, self._connection_from_session, close_with_result=True webserver_1 | File "/usr/local/lib/python3.6/site-packages/sqlalchemy/orm/query.py", line 3111, in _get_bind_args webserver_1 | mapper=self._bind_mapper(), clause=querycontext.statement, **kw webserver_1 | File "/usr/local/lib/python3.6/site-packages/sqlalchemy/orm/query.py", line 3096, in _connection_from_session webserver_1 | conn = self.session.connection(**kw) webserver_1 | File "/usr/local/lib/python3.6/site-packages/sqlalchemy/orm/session.py", line 1120, in connection webserver_1 | execution_options=execution_options, webserver_1 | File "/usr/local/lib/python3.6/site-packages/sqlalchemy/orm/session.py", line 1126, in _connection_for_bind webserver_1 | engine, execution_options webserver_1 | File "/usr/local/lib/python3.6/site-packages/sqlalchemy/orm/session.py", line 424, in _connection_for_bind webserver_1 | conn = bind.contextual_connect() webserver_1 | File "/usr/local/lib/python3.6/site-packages/sqlalchemy/engine/base.py", line 2194, in contextual_connect webserver_1 | **kwargs webserver_1 | File "/usr/local/lib/python3.6/site-packages/sqlalchemy/engine/base.py", line 125, in __init__ webserver_1 | self.dispatch.engine_connect(self, self.__branch) webserver_1 | File "/usr/local/lib/python3.6/site-packages/sqlalchemy/event/attr.py", line 297, in __call__ webserver_1 | fn(*args, **kw) webserver_1 | File "/usr/local/lib/python3.6/site-packages/airflow/utils/sqlalchemy.py", line 79, in ping_connection webserver_1 | log.warning("DB connection invalidated. Reconnecting...", err) webserver_1 | Message: 'DB connection invalidated. Reconnecting...' webserver_1 | Arguments: (OperationalError('(psycopg2.OperationalError) server closed the connection unexpectedly\n\tThis probably means the server terminated abnormally\n\tbefore or while processing the request.\n',),) {code} Traceback: {code} webserver_1 | Traceback (most recent call last): webserver_1 | File "/usr/local/lib/python3.6/site-packages/airflow/utils/sqlalchemy.py", line 68, in ping_connection webserver_1 | connection.scalar(select([1])) webserver_1 | File "/usr/local/lib/python3.6/site-packages/sqlalchemy/engine/base.py", line 912, in scalar webserver_1 | return self.execute(object_, *multiparams, **params).scalar() webserver_1 | File "/usr/local/lib/python3.6/site-packages/sqlalchemy/engine/base.py", line 980, in execute webserver_1 | return meth(self, multiparams, params) webserver_1 | File "/usr/local/lib/python3.6/site-packages/sqlalchemy/sql/elements.py", line 273, in _execute_on_connection webserver_1 | return connection._execute_clauseelement(self, multiparams, params) webserver_1 | File "/usr/local/lib/python3.6/site-packages/sqlalchemy/engine/base.py", line 1099, in _execute_clauseelement webserver_1 | distilled_params, webserver_1 | File "/usr/local/lib/python3.6/site-packages/sqlalchemy/engine/base.py", line 1240, in _execute_context webserver_1 | e, statement, parameters, cursor, context webserver_1 | File "/usr/local/lib/python3.6/site-packages/sqlalchemy/engine/base.py", line 1458, in _handle_dbapi_exception webserver_1 | util.raise_from_cause(sqlalchemy_exception, exc_info) webserver_1 | File "/usr/local/lib/python3.6/site-packages/sqlalchemy/util/compat.py", line 296, in raise_from_cause webserver_1 | reraise(type(exception), exception, tb=exc_tb, cause=cause) webserver_1 | File "/usr/local/lib/python3.6/site-packages/sqlalchemy/util/compat.py", line 276, in reraise webserver_1 | raise value.with_traceback(tb) webserver_1 | File "/usr/local/lib/python3.6/site-packages/sqlalchemy/engine/base.py", line 1236, in _execute_context webserver_1 | cursor, statement, parameters, context webserver_1 | File "/usr/local/lib/python3.6/site-packages/sqlalchemy/engine/default.py", line 536, in do_execute webserver_1 | cursor.execute(statement, parameters) webserver_1 | sqlalchemy.exc.OperationalError: (psycopg2.OperationalError) server closed the connection unexpectedly webserver_1 | This probably means the server terminated abnormally webserver_1 | before or while processing the request. webserver_1 | [SQL: 'SELECT 1'] (Background on this error at: http://sqlalche.me/e/e3q8) {code} It has something to do with the configure_orm function in airflow/settings.py, because that is the only usage of setup_event_handlers (from airflow/utils/sqlalchemy.py). And if I disable connection pooling, then the warning seems to go away. Beyond that, I am not sure where to go from here. But something must be wrong. was: I am finding with 1.10.2 that I seem to get a warning {{DB connection invalidated. Reconnecting...}} very frequently. I to try to diagnose I added logging of the triggering error on line 79 in airflow/utils/sqlalchemy.py, from which this warning is generated. Here's the traceback: {code} webserver_1 | Traceback (most recent call last): webserver_1 | File "/usr/local/lib/python3.6/site-packages/airflow/utils/sqlalchemy.py", line 68, in ping_connection webserver_1 | connection.scalar(select([1])) webserver_1 | File "/usr/local/lib/python3.6/site-packages/sqlalchemy/engine/base.py", line 912, in scalar webserver_1 | return self.execute(object_, *multiparams, **params).scalar() webserver_1 | File "/usr/local/lib/python3.6/site-packages/sqlalchemy/engine/base.py", line 980, in execute webserver_1 | return meth(self, multiparams, params) webserver_1 | File "/usr/local/lib/python3.6/site-packages/sqlalchemy/sql/elements.py", line 273, in _execute_on_connection webserver_1 | return connection._execute_clauseelement(self, multiparams, params) webserver_1 | File "/usr/local/lib/python3.6/site-packages/sqlalchemy/engine/base.py", line 1099, in _execute_clauseelement webserver_1 | distilled_params, webserver_1 | File "/usr/local/lib/python3.6/site-packages/sqlalchemy/engine/base.py", line 1240, in _execute_context webserver_1 | e, statement, parameters, cursor, context webserver_1 | File "/usr/local/lib/python3.6/site-packages/sqlalchemy/engine/base.py", line 1458, in _handle_dbapi_exception webserver_1 | util.raise_from_cause(sqlalchemy_exception, exc_info) webserver_1 | File "/usr/local/lib/python3.6/site-packages/sqlalchemy/util/compat.py", line 296, in raise_from_cause webserver_1 | reraise(type(exception), exception, tb=exc_tb, cause=cause) webserver_1 | File "/usr/local/lib/python3.6/site-packages/sqlalchemy/util/compat.py", line 276, in reraise webserver_1 | raise value.with_traceback(tb) webserver_1 | File "/usr/local/lib/python3.6/site-packages/sqlalchemy/engine/base.py", line 1236, in _execute_context webserver_1 | cursor, statement, parameters, context webserver_1 | File "/usr/local/lib/python3.6/site-packages/sqlalchemy/engine/default.py", line 536, in do_execute webserver_1 | cursor.execute(statement, parameters) webserver_1 | sqlalchemy.exc.OperationalError: (psycopg2.OperationalError) server closed the connection unexpectedly webserver_1 | This probably means the server terminated abnormally webserver_1 | before or while processing the request. webserver_1 | [SQL: 'SELECT 1'] (Background on this error at: http://sqlalche.me/e/e3q8) {code} It has something to do with the configure_orm function in airflow/settings.py, because that is the only usage of setup_event_handlers (from airflow/utils/sqlalchemy.py). And if I disable connection pooling, then the warning seems to go away. Beyond that, I am not sure where to go from here. But something must be wrong. > "DB connection invalidated" warning at every zombie check > --------------------------------------------------------- > > Key: AIRFLOW-4134 > URL: https://issues.apache.org/jira/browse/AIRFLOW-4134 > Project: Apache Airflow > Issue Type: Bug > Affects Versions: 1.10.2 > Reporter: Daniel Standish > Priority: Major > > I am finding with 1.10.2 that I seem to get a warning {{DB connection > invalidated. Reconnecting...}} very frequently. > I to try to diagnose I added logging of the triggering error on line 79 in > airflow/utils/sqlalchemy.py, from which this warning is generated. > Call stack: > {code} > webserver_1 | Call stack: > webserver_1 | File "/usr/local/bin/airflow", line 32, in <module> > webserver_1 | args.func(args) > webserver_1 | File > "/usr/local/lib/python3.6/site-packages/airflow/utils/cli.py", line 74, in > wrapper > webserver_1 | return f(*args, **kwargs) > webserver_1 | File > "/usr/local/lib/python3.6/site-packages/airflow/bin/cli.py", line 992, in > scheduler > webserver_1 | job.run() > webserver_1 | File > "/usr/local/lib/python3.6/site-packages/airflow/jobs.py", line 205, in run > webserver_1 | self._execute() > webserver_1 | File > "/usr/local/lib/python3.6/site-packages/airflow/jobs.py", line 1532, in > _execute > webserver_1 | self._execute_helper() > webserver_1 | File > "/usr/local/lib/python3.6/site-packages/airflow/jobs.py", line 1562, in > _execute_helper > webserver_1 | self.processor_agent.start() > webserver_1 | File > "/usr/local/lib/python3.6/site-packages/airflow/utils/dag_processing.py", > line 511, in start > webserver_1 | self._async_mode) > webserver_1 | File > "/usr/local/lib/python3.6/site-packages/airflow/utils/dag_processing.py", > line 565, in _launch_process > webserver_1 | p.start() > webserver_1 | File "/usr/local/lib/python3.6/multiprocessing/process.py", > line 105, in start > webserver_1 | self._popen = self._Popen(self) > webserver_1 | File "/usr/local/lib/python3.6/multiprocessing/context.py", > line 223, in _Popen > webserver_1 | return > _default_context.get_context().Process._Popen(process_obj) > webserver_1 | File "/usr/local/lib/python3.6/multiprocessing/context.py", > line 277, in _Popen > webserver_1 | return Popen(process_obj) > webserver_1 | File > "/usr/local/lib/python3.6/multiprocessing/popen_fork.py", line 19, in __init__ > webserver_1 | self._launch(process_obj) > webserver_1 | File > "/usr/local/lib/python3.6/multiprocessing/popen_fork.py", line 73, in _launch > webserver_1 | code = process_obj._bootstrap() > webserver_1 | File "/usr/local/lib/python3.6/multiprocessing/process.py", > line 258, in _bootstrap > webserver_1 | self.run() > webserver_1 | File "/usr/local/lib/python3.6/multiprocessing/process.py", > line 93, in run > webserver_1 | self._target(*self._args, **self._kwargs) > webserver_1 | File > "/usr/local/lib/python3.6/site-packages/airflow/utils/dag_processing.py", > line 560, in helper > webserver_1 | processor_manager.start() > webserver_1 | File > "/usr/local/lib/python3.6/site-packages/airflow/utils/dag_processing.py", > line 797, in start > webserver_1 | self.start_in_async() > webserver_1 | File > "/usr/local/lib/python3.6/site-packages/airflow/utils/dag_processing.py", > line 820, in start_in_async > webserver_1 | simple_dags = self.heartbeat() > webserver_1 | File > "/usr/local/lib/python3.6/site-packages/airflow/utils/dag_processing.py", > line 1190, in heartbeat > webserver_1 | zombies = self._find_zombies() > webserver_1 | File > "/usr/local/lib/python3.6/site-packages/airflow/utils/db.py", line 73, in > wrapper > webserver_1 | return func(*args, **kwargs) > webserver_1 | File > "/usr/local/lib/python3.6/site-packages/airflow/utils/dag_processing.py", > line 1236, in _find_zombies > webserver_1 | LJ.latest_heartbeat < limit_dttm, > webserver_1 | File > "/usr/local/lib/python3.6/site-packages/sqlalchemy/orm/query.py", line 2925, > in all > webserver_1 | return list(self) > webserver_1 | File > "/usr/local/lib/python3.6/site-packages/sqlalchemy/orm/query.py", line 3081, > in __iter__ > webserver_1 | return self._execute_and_instances(context) > webserver_1 | File > "/usr/local/lib/python3.6/site-packages/sqlalchemy/orm/query.py", line 3103, > in _execute_and_instances > webserver_1 | querycontext, self._connection_from_session, > close_with_result=True > webserver_1 | File > "/usr/local/lib/python3.6/site-packages/sqlalchemy/orm/query.py", line 3111, > in _get_bind_args > webserver_1 | mapper=self._bind_mapper(), clause=querycontext.statement, > **kw > webserver_1 | File > "/usr/local/lib/python3.6/site-packages/sqlalchemy/orm/query.py", line 3096, > in _connection_from_session > webserver_1 | conn = self.session.connection(**kw) > webserver_1 | File > "/usr/local/lib/python3.6/site-packages/sqlalchemy/orm/session.py", line > 1120, in connection > webserver_1 | execution_options=execution_options, > webserver_1 | File > "/usr/local/lib/python3.6/site-packages/sqlalchemy/orm/session.py", line > 1126, in _connection_for_bind > webserver_1 | engine, execution_options > webserver_1 | File > "/usr/local/lib/python3.6/site-packages/sqlalchemy/orm/session.py", line 424, > in _connection_for_bind > webserver_1 | conn = bind.contextual_connect() > webserver_1 | File > "/usr/local/lib/python3.6/site-packages/sqlalchemy/engine/base.py", line > 2194, in contextual_connect > webserver_1 | **kwargs > webserver_1 | File > "/usr/local/lib/python3.6/site-packages/sqlalchemy/engine/base.py", line 125, > in __init__ > webserver_1 | self.dispatch.engine_connect(self, self.__branch) > webserver_1 | File > "/usr/local/lib/python3.6/site-packages/sqlalchemy/event/attr.py", line 297, > in __call__ > webserver_1 | fn(*args, **kw) > webserver_1 | File > "/usr/local/lib/python3.6/site-packages/airflow/utils/sqlalchemy.py", line > 79, in ping_connection > webserver_1 | log.warning("DB connection invalidated. Reconnecting...", > err) > webserver_1 | Message: 'DB connection invalidated. Reconnecting...' > webserver_1 | Arguments: (OperationalError('(psycopg2.OperationalError) > server closed the connection unexpectedly\n\tThis probably means the server > terminated abnormally\n\tbefore or while processing the request.\n',),) > {code} > Traceback: > {code} > webserver_1 | Traceback (most recent call last): > webserver_1 | File > "/usr/local/lib/python3.6/site-packages/airflow/utils/sqlalchemy.py", line > 68, in ping_connection > webserver_1 | connection.scalar(select([1])) > webserver_1 | File > "/usr/local/lib/python3.6/site-packages/sqlalchemy/engine/base.py", line 912, > in scalar > webserver_1 | return self.execute(object_, *multiparams, > **params).scalar() > webserver_1 | File > "/usr/local/lib/python3.6/site-packages/sqlalchemy/engine/base.py", line 980, > in execute > webserver_1 | return meth(self, multiparams, params) > webserver_1 | File > "/usr/local/lib/python3.6/site-packages/sqlalchemy/sql/elements.py", line > 273, in _execute_on_connection > webserver_1 | return connection._execute_clauseelement(self, > multiparams, params) > webserver_1 | File > "/usr/local/lib/python3.6/site-packages/sqlalchemy/engine/base.py", line > 1099, in _execute_clauseelement > webserver_1 | distilled_params, > webserver_1 | File > "/usr/local/lib/python3.6/site-packages/sqlalchemy/engine/base.py", line > 1240, in _execute_context > webserver_1 | e, statement, parameters, cursor, context > webserver_1 | File > "/usr/local/lib/python3.6/site-packages/sqlalchemy/engine/base.py", line > 1458, in _handle_dbapi_exception > webserver_1 | util.raise_from_cause(sqlalchemy_exception, exc_info) > webserver_1 | File > "/usr/local/lib/python3.6/site-packages/sqlalchemy/util/compat.py", line 296, > in raise_from_cause > webserver_1 | reraise(type(exception), exception, tb=exc_tb, cause=cause) > webserver_1 | File > "/usr/local/lib/python3.6/site-packages/sqlalchemy/util/compat.py", line 276, > in reraise > webserver_1 | raise value.with_traceback(tb) > webserver_1 | File > "/usr/local/lib/python3.6/site-packages/sqlalchemy/engine/base.py", line > 1236, in _execute_context > webserver_1 | cursor, statement, parameters, context > webserver_1 | File > "/usr/local/lib/python3.6/site-packages/sqlalchemy/engine/default.py", line > 536, in do_execute > webserver_1 | cursor.execute(statement, parameters) > webserver_1 | sqlalchemy.exc.OperationalError: (psycopg2.OperationalError) > server closed the connection unexpectedly > webserver_1 | This probably means the server terminated abnormally > webserver_1 | before or while processing the request. > webserver_1 | [SQL: 'SELECT 1'] (Background on this error at: > http://sqlalche.me/e/e3q8) > {code} > It has something to do with the configure_orm function in > airflow/settings.py, because that is the only usage of setup_event_handlers > (from airflow/utils/sqlalchemy.py). > And if I disable connection pooling, then the warning seems to go away. > Beyond that, I am not sure where to go from here. But something must be > wrong. -- This message was sent by Atlassian JIRA (v7.6.3#76005)