This is an automated email from the ASF dual-hosted git repository. ash pushed a commit to branch supervisor-logs in repository https://gitbox.apache.org/repos/asf/airflow-ci-infra.git
commit eeee02270d52b8190632545498532eb56991a7f7 Author: Ash Berlin-Taylor <ash_git...@firemirror.com> AuthorDate: Fri Mar 19 21:20:06 2021 +0000 Increase logging from actions.runner-supervisor service This allows us to have in the logs (and thus searchable in the CloudWatch Logs) the InstanceId --- scripts/runner-supervisor.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/scripts/runner-supervisor.py b/scripts/runner-supervisor.py index 10b1b18..d3d0e0c 100755 --- a/scripts/runner-supervisor.py +++ b/scripts/runner-supervisor.py @@ -112,7 +112,17 @@ TABLE_NAME = os.getenv('COUNTER_TABLE', 'GithubRunnerQueue') default='~runner/actions-runner', ) def main(repo, output_folder, user): - log.info("Starting...") + global INSTANCE_ID + # Notify the ASG LifeCycle hook that we are now In Service and ready to + # process requests/safe to be shut down + + # Fetch current instance ID from where cloutinit writes it to + if not INSTANCE_ID: + with open('/var/lib/cloud/data/instance-id') as fh: + INSTANCE_ID = fh.readline().strip() + + log.info("Starting on %s...", INSTANCE_ID) + output_folder = os.path.expanduser(output_folder) short_time = datetime.timedelta(microseconds=1) @@ -548,6 +558,7 @@ class ProcessWatcher: listener_found = True if self.in_termating_lifecycle and not listener_found: + log.info("Runner.Listener process not found - OkayToTerminate instance") complete_asg_lifecycle_hook('OkayToTerminate') def check_still_alive(self): @@ -608,6 +619,11 @@ class ProcessWatcher: if not OWN_ASG: # Not part of an ASG return + + if self.in_termating_lifecycle: + log.info("Not trying to SetInstanceProtection, we are already in the terminating lifecycle step") + return + asg_client = boto3.client('autoscaling') try: self._protect_from_scale_in(asg_client, protect) @@ -690,6 +706,7 @@ class ProcessWatcher: try: proc = psutil.Process(detail.pid) if proc.name() == "Runner.Listener": + log.info("Runner.Listener process exited - OkayToTerminate instance") complete_asg_lifecycle_hook('OkayToTerminate') except psutil.NoSuchProcess: # We lost the race, process has already exited. If it was that short lived it wasn't that