comparison contrib/automation/hgautomation/aws.py @ 42024:b05a3e28cf24

automation: perform tasks on remote machines Sometimes you don't have access to a machine in order to do something. For example, you may not have access to a Windows machine required to build Windows binaries or run tests on that platform. This commit introduces a pile of code intended to help "automate" common tasks, like building release artifacts. In its current form, the automation code provides functionality for performing tasks on Windows EC2 instances. The hgautomation.aws module provides functionality for integrating with AWS. It manages EC2 resources such as IAM roles, EC2 security groups, AMIs, and instances. The hgautomation.windows module provides a higher-level interface for performing tasks on remote Windows machines. The hgautomation.cli module provides a command-line interface to these higher-level primitives. I attempted to structure Windows remote machine interaction around Windows Remoting / PowerShell. This is kinda/sorta like SSH + shell, but for Windows. In theory, most of the functionality is cloud provider agnostic, as we should be able to use any established WinRM connection to interact with a remote. In reality, we're tightly coupled to AWS at the moment because I didn't want to prematurely add abstractions for a 2nd cloud provider. (1 was hard enough to implement.) In the aws module is code for creating an image with a fully functional Mercurial development environment. It contains VC9, VC2017, msys, and other dependencies. The image is fully capable of building all the existing Mercurial release artifacts and running tests. There are a few things that don't work. For example, running Windows tests with Python 3. But building the Windows release artifacts does work. And that was an impetus for this work. (Although we don't yet support code signing.) Getting this functionality to work was extremely time consuming. It took hours debugging permissions failures and other wonky behavior due to PowerShell Remoting. (The permissions model for PowerShell is crazy and you brush up against all kinds of issues because of the user/privileges of the user running the PowerShell and the permissions of the PowerShell session itself.) The functionality around AWS resource management could use some improving. In theory we support shared tenancy via resource name prefixing. In reality, we don't offer a way to configure this. Speaking of AWS resource management, I thought about using a tool like Terraform to manage resources. But at our scale, writing a few dozen lines of code to manage resources seemed acceptable. Maybe we should reconsider this if things grow out of control. Time will tell. Currently, emphasis is placed on Windows. But I only started there because it was likely to be the most difficult to implement. It should be relatively trivial to automate tasks on remote Linux machines. In fact, I have a ~1 year old script to run tests on a remote EC2 instance. I will likely be porting that to this new "framework" in the near future. # no-check-commit because foo_bar functions Differential Revision: https://phab.mercurial-scm.org/D6142
author Gregory Szorc <gregory.szorc@gmail.com>
date Fri, 15 Mar 2019 11:24:08 -0700
parents
children 0e9066db5e44
comparison
equal deleted inserted replaced
42023:bf87d34a675c 42024:b05a3e28cf24
1 # aws.py - Automation code for Amazon Web Services
2 #
3 # Copyright 2019 Gregory Szorc <gregory.szorc@gmail.com>
4 #
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
7
8 # no-check-code because Python 3 native.
9
10 import contextlib
11 import copy
12 import hashlib
13 import json
14 import os
15 import pathlib
16 import subprocess
17 import time
18
19 import boto3
20 import botocore.exceptions
21
22 from .winrm import (
23 run_powershell,
24 wait_for_winrm,
25 )
26
27
28 SOURCE_ROOT = pathlib.Path(os.path.abspath(__file__)).parent.parent.parent.parent
29
30 INSTALL_WINDOWS_DEPENDENCIES = (SOURCE_ROOT / 'contrib' /
31 'install-windows-dependencies.ps1')
32
33
34 KEY_PAIRS = {
35 'automation',
36 }
37
38
39 SECURITY_GROUPS = {
40 'windows-dev-1': {
41 'description': 'Mercurial Windows instances that perform build automation',
42 'ingress': [
43 {
44 'FromPort': 22,
45 'ToPort': 22,
46 'IpProtocol': 'tcp',
47 'IpRanges': [
48 {
49 'CidrIp': '0.0.0.0/0',
50 'Description': 'SSH from entire Internet',
51 },
52 ],
53 },
54 {
55 'FromPort': 3389,
56 'ToPort': 3389,
57 'IpProtocol': 'tcp',
58 'IpRanges': [
59 {
60 'CidrIp': '0.0.0.0/0',
61 'Description': 'RDP from entire Internet',
62 },
63 ],
64
65 },
66 {
67 'FromPort': 5985,
68 'ToPort': 5986,
69 'IpProtocol': 'tcp',
70 'IpRanges': [
71 {
72 'CidrIp': '0.0.0.0/0',
73 'Description': 'PowerShell Remoting (Windows Remote Management)',
74 },
75 ],
76 }
77 ],
78 },
79 }
80
81
82 IAM_ROLES = {
83 'ephemeral-ec2-role-1': {
84 'description': 'Mercurial temporary EC2 instances',
85 'policy_arns': [
86 'arn:aws:iam::aws:policy/service-role/AmazonEC2RoleforSSM',
87 ],
88 },
89 }
90
91
92 ASSUME_ROLE_POLICY_DOCUMENT = '''
93 {
94 "Version": "2012-10-17",
95 "Statement": [
96 {
97 "Effect": "Allow",
98 "Principal": {
99 "Service": "ec2.amazonaws.com"
100 },
101 "Action": "sts:AssumeRole"
102 }
103 ]
104 }
105 '''.strip()
106
107
108 IAM_INSTANCE_PROFILES = {
109 'ephemeral-ec2-1': {
110 'roles': [
111 'ephemeral-ec2-role-1',
112 ],
113 }
114 }
115
116
117 # User Data for Windows EC2 instance. Mainly used to set the password
118 # and configure WinRM.
119 # Inspired by the User Data script used by Packer
120 # (from https://www.packer.io/intro/getting-started/build-image.html).
121 WINDOWS_USER_DATA = '''
122 <powershell>
123
124 # TODO enable this once we figure out what is failing.
125 #$ErrorActionPreference = "stop"
126
127 # Set administrator password
128 net user Administrator "%s"
129 wmic useraccount where "name='Administrator'" set PasswordExpires=FALSE
130
131 # First, make sure WinRM can't be connected to
132 netsh advfirewall firewall set rule name="Windows Remote Management (HTTP-In)" new enable=yes action=block
133
134 # Delete any existing WinRM listeners
135 winrm delete winrm/config/listener?Address=*+Transport=HTTP 2>$Null
136 winrm delete winrm/config/listener?Address=*+Transport=HTTPS 2>$Null
137
138 # Create a new WinRM listener and configure
139 winrm create winrm/config/listener?Address=*+Transport=HTTP
140 winrm set winrm/config/winrs '@{MaxMemoryPerShellMB="0"}'
141 winrm set winrm/config '@{MaxTimeoutms="7200000"}'
142 winrm set winrm/config/service '@{AllowUnencrypted="true"}'
143 winrm set winrm/config/service '@{MaxConcurrentOperationsPerUser="12000"}'
144 winrm set winrm/config/service/auth '@{Basic="true"}'
145 winrm set winrm/config/client/auth '@{Basic="true"}'
146
147 # Configure UAC to allow privilege elevation in remote shells
148 $Key = 'HKLM:\SOFTWARE\Microsoft\Windows\CurrentVersion\Policies\System'
149 $Setting = 'LocalAccountTokenFilterPolicy'
150 Set-ItemProperty -Path $Key -Name $Setting -Value 1 -Force
151
152 # Configure and restart the WinRM Service; Enable the required firewall exception
153 Stop-Service -Name WinRM
154 Set-Service -Name WinRM -StartupType Automatic
155 netsh advfirewall firewall set rule name="Windows Remote Management (HTTP-In)" new action=allow localip=any remoteip=any
156 Start-Service -Name WinRM
157
158 # Disable firewall on private network interfaces so prompts don't appear.
159 Set-NetFirewallProfile -Name private -Enabled false
160 </powershell>
161 '''.lstrip()
162
163
164 WINDOWS_BOOTSTRAP_POWERSHELL = '''
165 Write-Output "installing PowerShell dependencies"
166 Install-PackageProvider -Name NuGet -MinimumVersion 2.8.5.201 -Force
167 Set-PSRepository -Name PSGallery -InstallationPolicy Trusted
168 Install-Module -Name OpenSSHUtils -RequiredVersion 0.0.2.0
169
170 Write-Output "installing OpenSSL server"
171 Add-WindowsCapability -Online -Name OpenSSH.Server~~~~0.0.1.0
172 # Various tools will attempt to use older versions of .NET. So we enable
173 # the feature that provides them so it doesn't have to be auto-enabled
174 # later.
175 Write-Output "enabling .NET Framework feature"
176 Install-WindowsFeature -Name Net-Framework-Core
177 '''
178
179
180 class AWSConnection:
181 """Manages the state of a connection with AWS."""
182
183 def __init__(self, automation, region: str):
184 self.automation = automation
185 self.local_state_path = automation.state_path
186
187 self.prefix = 'hg-'
188
189 self.session = boto3.session.Session(region_name=region)
190 self.ec2client = self.session.client('ec2')
191 self.ec2resource = self.session.resource('ec2')
192 self.iamclient = self.session.client('iam')
193 self.iamresource = self.session.resource('iam')
194
195 ensure_key_pairs(automation.state_path, self.ec2resource)
196
197 self.security_groups = ensure_security_groups(self.ec2resource)
198 ensure_iam_state(self.iamresource)
199
200 def key_pair_path_private(self, name):
201 """Path to a key pair private key file."""
202 return self.local_state_path / 'keys' / ('keypair-%s' % name)
203
204 def key_pair_path_public(self, name):
205 return self.local_state_path / 'keys' / ('keypair-%s.pub' % name)
206
207
208 def rsa_key_fingerprint(p: pathlib.Path):
209 """Compute the fingerprint of an RSA private key."""
210
211 # TODO use rsa package.
212 res = subprocess.run(
213 ['openssl', 'pkcs8', '-in', str(p), '-nocrypt', '-topk8',
214 '-outform', 'DER'],
215 capture_output=True,
216 check=True)
217
218 sha1 = hashlib.sha1(res.stdout).hexdigest()
219 return ':'.join(a + b for a, b in zip(sha1[::2], sha1[1::2]))
220
221
222 def ensure_key_pairs(state_path: pathlib.Path, ec2resource, prefix='hg-'):
223 remote_existing = {}
224
225 for kpi in ec2resource.key_pairs.all():
226 if kpi.name.startswith(prefix):
227 remote_existing[kpi.name[len(prefix):]] = kpi.key_fingerprint
228
229 # Validate that we have these keys locally.
230 key_path = state_path / 'keys'
231 key_path.mkdir(exist_ok=True, mode=0o700)
232
233 def remove_remote(name):
234 print('deleting key pair %s' % name)
235 key = ec2resource.KeyPair(name)
236 key.delete()
237
238 def remove_local(name):
239 pub_full = key_path / ('keypair-%s.pub' % name)
240 priv_full = key_path / ('keypair-%s' % name)
241
242 print('removing %s' % pub_full)
243 pub_full.unlink()
244 print('removing %s' % priv_full)
245 priv_full.unlink()
246
247 local_existing = {}
248
249 for f in sorted(os.listdir(key_path)):
250 if not f.startswith('keypair-') or not f.endswith('.pub'):
251 continue
252
253 name = f[len('keypair-'):-len('.pub')]
254
255 pub_full = key_path / f
256 priv_full = key_path / ('keypair-%s' % name)
257
258 with open(pub_full, 'r', encoding='ascii') as fh:
259 data = fh.read()
260
261 if not data.startswith('ssh-rsa '):
262 print('unexpected format for key pair file: %s; removing' %
263 pub_full)
264 pub_full.unlink()
265 priv_full.unlink()
266 continue
267
268 local_existing[name] = rsa_key_fingerprint(priv_full)
269
270 for name in sorted(set(remote_existing) | set(local_existing)):
271 if name not in local_existing:
272 actual = '%s%s' % (prefix, name)
273 print('remote key %s does not exist locally' % name)
274 remove_remote(actual)
275 del remote_existing[name]
276
277 elif name not in remote_existing:
278 print('local key %s does not exist remotely' % name)
279 remove_local(name)
280 del local_existing[name]
281
282 elif remote_existing[name] != local_existing[name]:
283 print('key fingerprint mismatch for %s; '
284 'removing from local and remote' % name)
285 remove_local(name)
286 remove_remote('%s%s' % (prefix, name))
287 del local_existing[name]
288 del remote_existing[name]
289
290 missing = KEY_PAIRS - set(remote_existing)
291
292 for name in sorted(missing):
293 actual = '%s%s' % (prefix, name)
294 print('creating key pair %s' % actual)
295
296 priv_full = key_path / ('keypair-%s' % name)
297 pub_full = key_path / ('keypair-%s.pub' % name)
298
299 kp = ec2resource.create_key_pair(KeyName=actual)
300
301 with priv_full.open('w', encoding='ascii') as fh:
302 fh.write(kp.key_material)
303 fh.write('\n')
304
305 priv_full.chmod(0o0600)
306
307 # SSH public key can be extracted via `ssh-keygen`.
308 with pub_full.open('w', encoding='ascii') as fh:
309 subprocess.run(
310 ['ssh-keygen', '-y', '-f', str(priv_full)],
311 stdout=fh,
312 check=True)
313
314 pub_full.chmod(0o0600)
315
316
317 def delete_instance_profile(profile):
318 for role in profile.roles:
319 print('removing role %s from instance profile %s' % (role.name,
320 profile.name))
321 profile.remove_role(RoleName=role.name)
322
323 print('deleting instance profile %s' % profile.name)
324 profile.delete()
325
326
327 def ensure_iam_state(iamresource, prefix='hg-'):
328 """Ensure IAM state is in sync with our canonical definition."""
329
330 remote_profiles = {}
331
332 for profile in iamresource.instance_profiles.all():
333 if profile.name.startswith(prefix):
334 remote_profiles[profile.name[len(prefix):]] = profile
335
336 for name in sorted(set(remote_profiles) - set(IAM_INSTANCE_PROFILES)):
337 delete_instance_profile(remote_profiles[name])
338 del remote_profiles[name]
339
340 remote_roles = {}
341
342 for role in iamresource.roles.all():
343 if role.name.startswith(prefix):
344 remote_roles[role.name[len(prefix):]] = role
345
346 for name in sorted(set(remote_roles) - set(IAM_ROLES)):
347 role = remote_roles[name]
348
349 print('removing role %s' % role.name)
350 role.delete()
351 del remote_roles[name]
352
353 # We've purged remote state that doesn't belong. Create missing
354 # instance profiles and roles.
355 for name in sorted(set(IAM_INSTANCE_PROFILES) - set(remote_profiles)):
356 actual = '%s%s' % (prefix, name)
357 print('creating IAM instance profile %s' % actual)
358
359 profile = iamresource.create_instance_profile(
360 InstanceProfileName=actual)
361 remote_profiles[name] = profile
362
363 for name in sorted(set(IAM_ROLES) - set(remote_roles)):
364 entry = IAM_ROLES[name]
365
366 actual = '%s%s' % (prefix, name)
367 print('creating IAM role %s' % actual)
368
369 role = iamresource.create_role(
370 RoleName=actual,
371 Description=entry['description'],
372 AssumeRolePolicyDocument=ASSUME_ROLE_POLICY_DOCUMENT,
373 )
374
375 remote_roles[name] = role
376
377 for arn in entry['policy_arns']:
378 print('attaching policy %s to %s' % (arn, role.name))
379 role.attach_policy(PolicyArn=arn)
380
381 # Now reconcile state of profiles.
382 for name, meta in sorted(IAM_INSTANCE_PROFILES.items()):
383 profile = remote_profiles[name]
384 wanted = {'%s%s' % (prefix, role) for role in meta['roles']}
385 have = {role.name for role in profile.roles}
386
387 for role in sorted(have - wanted):
388 print('removing role %s from %s' % (role, profile.name))
389 profile.remove_role(RoleName=role)
390
391 for role in sorted(wanted - have):
392 print('adding role %s to %s' % (role, profile.name))
393 profile.add_role(RoleName=role)
394
395
396 def find_windows_server_2019_image(ec2resource):
397 """Find the Amazon published Windows Server 2019 base image."""
398
399 images = ec2resource.images.filter(
400 Filters=[
401 {
402 'Name': 'owner-alias',
403 'Values': ['amazon'],
404 },
405 {
406 'Name': 'state',
407 'Values': ['available'],
408 },
409 {
410 'Name': 'image-type',
411 'Values': ['machine'],
412 },
413 {
414 'Name': 'name',
415 'Values': ['Windows_Server-2019-English-Full-Base-2019.02.13'],
416 },
417 ])
418
419 for image in images:
420 return image
421
422 raise Exception('unable to find Windows Server 2019 image')
423
424
425 def ensure_security_groups(ec2resource, prefix='hg-'):
426 """Ensure all necessary Mercurial security groups are present.
427
428 All security groups are prefixed with ``hg-`` by default. Any security
429 groups having this prefix but aren't in our list are deleted.
430 """
431 existing = {}
432
433 for group in ec2resource.security_groups.all():
434 if group.group_name.startswith(prefix):
435 existing[group.group_name[len(prefix):]] = group
436
437 purge = set(existing) - set(SECURITY_GROUPS)
438
439 for name in sorted(purge):
440 group = existing[name]
441 print('removing legacy security group: %s' % group.group_name)
442 group.delete()
443
444 security_groups = {}
445
446 for name, group in sorted(SECURITY_GROUPS.items()):
447 if name in existing:
448 security_groups[name] = existing[name]
449 continue
450
451 actual = '%s%s' % (prefix, name)
452 print('adding security group %s' % actual)
453
454 group_res = ec2resource.create_security_group(
455 Description=group['description'],
456 GroupName=actual,
457 )
458
459 group_res.authorize_ingress(
460 IpPermissions=group['ingress'],
461 )
462
463 security_groups[name] = group_res
464
465 return security_groups
466
467
468 def terminate_ec2_instances(ec2resource, prefix='hg-'):
469 """Terminate all EC2 instances managed by us."""
470 waiting = []
471
472 for instance in ec2resource.instances.all():
473 if instance.state['Name'] == 'terminated':
474 continue
475
476 for tag in instance.tags or []:
477 if tag['Key'] == 'Name' and tag['Value'].startswith(prefix):
478 print('terminating %s' % instance.id)
479 instance.terminate()
480 waiting.append(instance)
481
482 for instance in waiting:
483 instance.wait_until_terminated()
484
485
486 def remove_resources(c, prefix='hg-'):
487 """Purge all of our resources in this EC2 region."""
488 ec2resource = c.ec2resource
489 iamresource = c.iamresource
490
491 terminate_ec2_instances(ec2resource, prefix=prefix)
492
493 for image in ec2resource.images.all():
494 if image.name.startswith(prefix):
495 remove_ami(ec2resource, image)
496
497 for group in ec2resource.security_groups.all():
498 if group.group_name.startswith(prefix):
499 print('removing security group %s' % group.group_name)
500 group.delete()
501
502 for profile in iamresource.instance_profiles.all():
503 if profile.name.startswith(prefix):
504 delete_instance_profile(profile)
505
506 for role in iamresource.roles.all():
507 if role.name.startswith(prefix):
508 print('removing role %s' % role.name)
509 role.delete()
510
511
512 def wait_for_ip_addresses(instances):
513 """Wait for the public IP addresses of an iterable of instances."""
514 for instance in instances:
515 while True:
516 if not instance.public_ip_address:
517 time.sleep(2)
518 instance.reload()
519 continue
520
521 print('public IP address for %s: %s' % (
522 instance.id, instance.public_ip_address))
523 break
524
525
526 def remove_ami(ec2resource, image):
527 """Remove an AMI and its underlying snapshots."""
528 snapshots = []
529
530 for device in image.block_device_mappings:
531 if 'Ebs' in device:
532 snapshots.append(ec2resource.Snapshot(device['Ebs']['SnapshotId']))
533
534 print('deregistering %s' % image.id)
535 image.deregister()
536
537 for snapshot in snapshots:
538 print('deleting snapshot %s' % snapshot.id)
539 snapshot.delete()
540
541
542 def wait_for_ssm(ssmclient, instances):
543 """Wait for SSM to come online for an iterable of instance IDs."""
544 while True:
545 res = ssmclient.describe_instance_information(
546 Filters=[
547 {
548 'Key': 'InstanceIds',
549 'Values': [i.id for i in instances],
550 },
551 ],
552 )
553
554 available = len(res['InstanceInformationList'])
555 wanted = len(instances)
556
557 print('%d/%d instances available in SSM' % (available, wanted))
558
559 if available == wanted:
560 return
561
562 time.sleep(2)
563
564
565 def run_ssm_command(ssmclient, instances, document_name, parameters):
566 """Run a PowerShell script on an EC2 instance."""
567
568 res = ssmclient.send_command(
569 InstanceIds=[i.id for i in instances],
570 DocumentName=document_name,
571 Parameters=parameters,
572 CloudWatchOutputConfig={
573 'CloudWatchOutputEnabled': True,
574 },
575 )
576
577 command_id = res['Command']['CommandId']
578
579 for instance in instances:
580 while True:
581 try:
582 res = ssmclient.get_command_invocation(
583 CommandId=command_id,
584 InstanceId=instance.id,
585 )
586 except botocore.exceptions.ClientError as e:
587 if e.response['Error']['Code'] == 'InvocationDoesNotExist':
588 print('could not find SSM command invocation; waiting')
589 time.sleep(1)
590 continue
591 else:
592 raise
593
594 if res['Status'] == 'Success':
595 break
596 elif res['Status'] in ('Pending', 'InProgress', 'Delayed'):
597 time.sleep(2)
598 else:
599 raise Exception('command failed on %s: %s' % (
600 instance.id, res['Status']))
601
602
603 @contextlib.contextmanager
604 def temporary_ec2_instances(ec2resource, config):
605 """Create temporary EC2 instances.
606
607 This is a proxy to ``ec2client.run_instances(**config)`` that takes care of
608 managing the lifecycle of the instances.
609
610 When the context manager exits, the instances are terminated.
611
612 The context manager evaluates to the list of data structures
613 describing each created instance. The instances may not be available
614 for work immediately: it is up to the caller to wait for the instance
615 to start responding.
616 """
617
618 ids = None
619
620 try:
621 res = ec2resource.create_instances(**config)
622
623 ids = [i.id for i in res]
624 print('started instances: %s' % ' '.join(ids))
625
626 yield res
627 finally:
628 if ids:
629 print('terminating instances: %s' % ' '.join(ids))
630 for instance in res:
631 instance.terminate()
632 print('terminated %d instances' % len(ids))
633
634
635 @contextlib.contextmanager
636 def create_temp_windows_ec2_instances(c: AWSConnection, config):
637 """Create temporary Windows EC2 instances.
638
639 This is a higher-level wrapper around ``create_temp_ec2_instances()`` that
640 configures the Windows instance for Windows Remote Management. The emitted
641 instances will have a ``winrm_client`` attribute containing a
642 ``pypsrp.client.Client`` instance bound to the instance.
643 """
644 if 'IamInstanceProfile' in config:
645 raise ValueError('IamInstanceProfile cannot be provided in config')
646 if 'UserData' in config:
647 raise ValueError('UserData cannot be provided in config')
648
649 password = c.automation.default_password()
650
651 config = copy.deepcopy(config)
652 config['IamInstanceProfile'] = {
653 'Name': 'hg-ephemeral-ec2-1',
654 }
655 config.setdefault('TagSpecifications', []).append({
656 'ResourceType': 'instance',
657 'Tags': [{'Key': 'Name', 'Value': 'hg-temp-windows'}],
658 })
659 config['UserData'] = WINDOWS_USER_DATA % password
660
661 with temporary_ec2_instances(c.ec2resource, config) as instances:
662 wait_for_ip_addresses(instances)
663
664 print('waiting for Windows Remote Management service...')
665
666 for instance in instances:
667 client = wait_for_winrm(instance.public_ip_address, 'Administrator', password)
668 print('established WinRM connection to %s' % instance.id)
669 instance.winrm_client = client
670
671 yield instances
672
673
674 def ensure_windows_dev_ami(c: AWSConnection, prefix='hg-'):
675 """Ensure Windows Development AMI is available and up-to-date.
676
677 If necessary, a modern AMI will be built by starting a temporary EC2
678 instance and bootstrapping it.
679
680 Obsolete AMIs will be deleted so there is only a single AMI having the
681 desired name.
682
683 Returns an ``ec2.Image`` of either an existing AMI or a newly-built
684 one.
685 """
686 ec2client = c.ec2client
687 ec2resource = c.ec2resource
688 ssmclient = c.session.client('ssm')
689
690 name = '%s%s' % (prefix, 'windows-dev')
691
692 config = {
693 'BlockDeviceMappings': [
694 {
695 'DeviceName': '/dev/sda1',
696 'Ebs': {
697 'DeleteOnTermination': True,
698 'VolumeSize': 32,
699 'VolumeType': 'gp2',
700 },
701 }
702 ],
703 'ImageId': find_windows_server_2019_image(ec2resource).id,
704 'InstanceInitiatedShutdownBehavior': 'stop',
705 'InstanceType': 't3.medium',
706 'KeyName': '%sautomation' % prefix,
707 'MaxCount': 1,
708 'MinCount': 1,
709 'SecurityGroupIds': [c.security_groups['windows-dev-1'].id],
710 }
711
712 commands = [
713 # Need to start the service so sshd_config is generated.
714 'Start-Service sshd',
715 'Write-Output "modifying sshd_config"',
716 r'$content = Get-Content C:\ProgramData\ssh\sshd_config',
717 '$content = $content -replace "Match Group administrators","" -replace "AuthorizedKeysFile __PROGRAMDATA__/ssh/administrators_authorized_keys",""',
718 r'$content | Set-Content C:\ProgramData\ssh\sshd_config',
719 'Import-Module OpenSSHUtils',
720 r'Repair-SshdConfigPermission C:\ProgramData\ssh\sshd_config -Confirm:$false',
721 'Restart-Service sshd',
722 'Write-Output "installing OpenSSL client"',
723 'Add-WindowsCapability -Online -Name OpenSSH.Client~~~~0.0.1.0',
724 'Set-Service -Name sshd -StartupType "Automatic"',
725 'Write-Output "OpenSSH server running"',
726 ]
727
728 with INSTALL_WINDOWS_DEPENDENCIES.open('r', encoding='utf-8') as fh:
729 commands.extend(l.rstrip() for l in fh)
730
731 # Disable Windows Defender when bootstrapping because it just slows
732 # things down.
733 commands.insert(0, 'Set-MpPreference -DisableRealtimeMonitoring $true')
734 commands.append('Set-MpPreference -DisableRealtimeMonitoring $false')
735
736 # Compute a deterministic fingerprint to determine whether image needs
737 # to be regenerated.
738 fingerprint = {
739 'instance_config': config,
740 'user_data': WINDOWS_USER_DATA,
741 'initial_bootstrap': WINDOWS_BOOTSTRAP_POWERSHELL,
742 'bootstrap_commands': commands,
743 }
744
745 fingerprint = json.dumps(fingerprint, sort_keys=True)
746 fingerprint = hashlib.sha256(fingerprint.encode('utf-8')).hexdigest()
747
748 # Find existing AMIs with this name and delete the ones that are invalid.
749 # Store a reference to a good image so it can be returned one the
750 # image state is reconciled.
751 images = ec2resource.images.filter(
752 Filters=[{'Name': 'name', 'Values': [name]}])
753
754 existing_image = None
755
756 for image in images:
757 if image.tags is None:
758 print('image %s for %s lacks required tags; removing' % (
759 image.id, image.name))
760 remove_ami(ec2resource, image)
761 else:
762 tags = {t['Key']: t['Value'] for t in image.tags}
763
764 if tags.get('HGIMAGEFINGERPRINT') == fingerprint:
765 existing_image = image
766 else:
767 print('image %s for %s has wrong fingerprint; removing' % (
768 image.id, image.name))
769 remove_ami(ec2resource, image)
770
771 if existing_image:
772 return existing_image
773
774 print('no suitable Windows development image found; creating one...')
775
776 with create_temp_windows_ec2_instances(c, config) as instances:
777 assert len(instances) == 1
778 instance = instances[0]
779
780 wait_for_ssm(ssmclient, [instance])
781
782 # On first boot, install various Windows updates.
783 # We would ideally use PowerShell Remoting for this. However, there are
784 # trust issues that make it difficult to invoke Windows Update
785 # remotely. So we use SSM, which has a mechanism for running Windows
786 # Update.
787 print('installing Windows features...')
788 run_ssm_command(
789 ssmclient,
790 [instance],
791 'AWS-RunPowerShellScript',
792 {
793 'commands': WINDOWS_BOOTSTRAP_POWERSHELL.split('\n'),
794 },
795 )
796
797 # Reboot so all updates are fully applied.
798 print('rebooting instance %s' % instance.id)
799 ec2client.reboot_instances(InstanceIds=[instance.id])
800
801 time.sleep(15)
802
803 print('waiting for Windows Remote Management to come back...')
804 client = wait_for_winrm(instance.public_ip_address, 'Administrator',
805 c.automation.default_password())
806 print('established WinRM connection to %s' % instance.id)
807 instance.winrm_client = client
808
809 print('bootstrapping instance...')
810 run_powershell(instance.winrm_client, '\n'.join(commands))
811
812 print('bootstrap completed; stopping %s to create image' % instance.id)
813 instance.stop()
814
815 ec2client.get_waiter('instance_stopped').wait(
816 InstanceIds=[instance.id],
817 WaiterConfig={
818 'Delay': 5,
819 })
820 print('%s is stopped' % instance.id)
821
822 image = instance.create_image(
823 Name=name,
824 Description='Mercurial Windows development environment',
825 )
826
827 image.create_tags(Tags=[
828 {
829 'Key': 'HGIMAGEFINGERPRINT',
830 'Value': fingerprint,
831 },
832 ])
833
834 print('waiting for image %s' % image.id)
835
836 ec2client.get_waiter('image_available').wait(
837 ImageIds=[image.id],
838 )
839
840 print('image %s available as %s' % (image.id, image.name))
841
842 return image
843
844
845 @contextlib.contextmanager
846 def temporary_windows_dev_instances(c: AWSConnection, image, instance_type,
847 prefix='hg-', disable_antivirus=False):
848 """Create a temporary Windows development EC2 instance.
849
850 Context manager resolves to the list of ``EC2.Instance`` that were created.
851 """
852 config = {
853 'BlockDeviceMappings': [
854 {
855 'DeviceName': '/dev/sda1',
856 'Ebs': {
857 'DeleteOnTermination': True,
858 'VolumeSize': 32,
859 'VolumeType': 'gp2',
860 },
861 }
862 ],
863 'ImageId': image.id,
864 'InstanceInitiatedShutdownBehavior': 'stop',
865 'InstanceType': instance_type,
866 'KeyName': '%sautomation' % prefix,
867 'MaxCount': 1,
868 'MinCount': 1,
869 'SecurityGroupIds': [c.security_groups['windows-dev-1'].id],
870 }
871
872 with create_temp_windows_ec2_instances(c, config) as instances:
873 if disable_antivirus:
874 for instance in instances:
875 run_powershell(
876 instance.winrm_client,
877 'Set-MpPreference -DisableRealtimeMonitoring $true')
878
879 yield instances