comparison contrib/automation/hgautomation/aws.py @ 42280:e570106beda1

automation: shore up rebooting behavior There was a race condition in the old code. Use instance.stop()/instance.start() to eliminate it. As part of debugging this, I also found another race condition related to PowerShell permissions after the reboot. Unfortunately, I'm not sure the best way to work around it. I've added a comment for now. Differential Revision: https://phab.mercurial-scm.org/D6288
author Gregory Szorc <gregory.szorc@gmail.com>
date Fri, 19 Apr 2019 07:34:55 -0700
parents 8dc22a209420
children 195dcc10b3d7
comparison
equal deleted inserted replaced
42279:f30184484dd1 42280:e570106beda1
806 'commands': WINDOWS_BOOTSTRAP_POWERSHELL.split('\n'), 806 'commands': WINDOWS_BOOTSTRAP_POWERSHELL.split('\n'),
807 }, 807 },
808 ) 808 )
809 809
810 # Reboot so all updates are fully applied. 810 # Reboot so all updates are fully applied.
811 #
812 # We don't use instance.reboot() here because it is asynchronous and
813 # we don't know when exactly the instance has rebooted. It could take
814 # a while to stop and we may start trying to interact with the instance
815 # before it has rebooted.
811 print('rebooting instance %s' % instance.id) 816 print('rebooting instance %s' % instance.id)
812 ec2client.reboot_instances(InstanceIds=[instance.id]) 817 instance.stop()
813 818 ec2client.get_waiter('instance_stopped').wait(
814 time.sleep(15) 819 InstanceIds=[instance.id],
820 WaiterConfig={
821 'Delay': 5,
822 })
823
824 instance.start()
825 wait_for_ip_addresses([instance])
826
827 # There is a race condition here between the User Data PS script running
828 # and us connecting to WinRM. This can manifest as
829 # "AuthorizationManager check failed" failures during run_powershell().
830 # TODO figure out a workaround.
815 831
816 print('waiting for Windows Remote Management to come back...') 832 print('waiting for Windows Remote Management to come back...')
817 client = wait_for_winrm(instance.public_ip_address, 'Administrator', 833 client = wait_for_winrm(instance.public_ip_address, 'Administrator',
818 c.automation.default_password()) 834 c.automation.default_password())
819 print('established WinRM connection to %s' % instance.id) 835 print('established WinRM connection to %s' % instance.id)