From ed749c4a91df1e7ad36c78944afaab5d1682e91d Mon Sep 17 00:00:00 2001 From: David Wolfe Date: Wed, 3 Jun 2026 16:28:47 -0700 Subject: [PATCH] DUPLO-42774 Update GitHub Actions workflows and add NetworkAgent files for Amazon Linux 2023 - Bumped GitHub Actions to latest in build-image.yaml and lint.yaml: actions/checkout (v6), google-github-actions/auth (v3), aws-actions/configure-aws-credentials (v6), actions/upload-artifact (v7), actions/download-artifact (v7), peter-evans/create-pull-request (v8). - Gated the GCP auth step behind an `if:` so it only runs when a GCP builder is in scope, and switched the commercial Packer AWS Role step to OIDC (role-to-assume + aws-region), dropping the static access-key/secret/session-token inputs. - Added NetworkAgent files for Amazon Linux 2023: NetworkAgent.service (systemd unit), NetworkAgentV2.py (network tunnel/config management), and Setup.sh (installs dependencies and configures the NetworkAgent service). - Enabled the Amazon Linux 2023 builders in Packer: - aws.pkr.hcl: uncommented the amazonlinux-2023 and amazonlinux-2023-arm64 sources, renamed them to use the `amazonlinux2023` family suffix, and pinned the source AMI filter to `al2023-ami-2023.*` to avoid the ECS/minimal variants. - main.pkr.hcl: added the AL2023 sources to the build and wired up yum-update and Setup.sh install provisioners scoped to the AL2023 builders. - Reworked packer/gen-native-images.sh to merge BuiltInNativeImages.json by Name, so a scoped build (e.g. only_builders=AL2023) only replaces the rows it rebuilt instead of wholesale-replacing every Docker-Duplo row, preserving AL2/Ubuntu/GovCloud rows. --- .github/workflows/build-image.yaml | 32 +- .github/workflows/lint.yaml | 2 +- AgentAmazonLinux2023/NetworkAgent.service | 28 ++ AgentAmazonLinux2023/NetworkAgentV2.py | 524 ++++++++++++++++++++++ AgentAmazonLinux2023/Setup.sh | 197 ++++++++ packer/aws.pkr.hcl | 233 +++++----- packer/gen-native-images.sh | 23 +- packer/main.pkr.hcl | 26 +- 8 files changed, 919 insertions(+), 146 deletions(-) create mode 100644 AgentAmazonLinux2023/NetworkAgent.service create mode 100644 AgentAmazonLinux2023/NetworkAgentV2.py create mode 100755 AgentAmazonLinux2023/Setup.sh diff --git a/.github/workflows/build-image.yaml b/.github/workflows/build-image.yaml index 389f36e..bd4a58a 100644 --- a/.github/workflows/build-image.yaml +++ b/.github/workflows/build-image.yaml @@ -31,22 +31,20 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout - uses: actions/checkout@v4 + uses: actions/checkout@v6 - # GCP credentials + # GCP credentials - only when a GCP builder is in scope - name: Packer GCP Service Account - uses: google-github-actions/auth@v2 + if: github.event.inputs.only_builders == 'all' || contains(github.event.inputs.only_builders, 'googlecompute') + uses: google-github-actions/auth@v3 with: workload_identity_provider: 'projects/17033121890/locations/global/workloadIdentityPools/duplo-githubactions/providers/duplo-githubactions' service_account: 'packer@msp-duplocloud-01.iam.gserviceaccount.com' - # AWS credentials + # AWS credentials (OIDC: role-to-assume + aws-region only) - name: Packer AWS Role - uses: aws-actions/configure-aws-credentials@v4 + uses: aws-actions/configure-aws-credentials@v6 with: - aws-access-key-id: ${{ env.AWS_ACCESS_KEY_ID }} - aws-secret-access-key: ${{ env.AWS_SECRET_ACCESS_KEY }} - aws-session-token: ${{ env.AWS_SESSION_TOKEN }} aws-region: us-west-2 role-to-assume: arn:aws:iam::227120241369:role/packer-builder role-session-name: github-duplocloud-linuxagent @@ -105,7 +103,7 @@ jobs: # Upload the image manifest - name: Attach Manifest - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v7 with: name: packer-manifest.json path: packer-manifest.json @@ -117,7 +115,7 @@ jobs: duplo_token: "${{ secrets.GOVCLOUD_DUPLO_TOKEN }}" steps: - name: Checkout - uses: actions/checkout@v4 + uses: actions/checkout@v6 # AWS credentials - name: Tenant AWS JIT @@ -127,7 +125,7 @@ jobs: # AWS credentials - name: Packer AWS Role - uses: aws-actions/configure-aws-credentials@v4 + uses: aws-actions/configure-aws-credentials@v6 with: aws-access-key-id: ${{ env.AWS_ACCESS_KEY_ID }} aws-secret-access-key: ${{ env.AWS_SECRET_ACCESS_KEY }} @@ -193,7 +191,7 @@ jobs: # Upload the image manifest - name: Attach Manifest - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v7 with: name: govcloud-packer-manifest.json path: govcloud-packer-manifest.json @@ -208,9 +206,9 @@ jobs: steps: # Get the code for the image JSON generation, and the code for Duplo master. - name: Checkout duplo-infra - uses: actions/checkout@v4 + uses: actions/checkout@v6 - name: Checkout duplo (backend) - uses: actions/checkout@v4 + uses: actions/checkout@v6 with: repository: duplocloud-internal/duplo ref: master # always start from master @@ -219,12 +217,12 @@ jobs: # Download the image manifest - name: Download Manifest (Commercial) - uses: actions/download-artifact@v4 + uses: actions/download-artifact@v7 with: name: packer-manifest.json path: packer - name: Download Manifest (Govcloud) - uses: actions/download-artifact@v4 + uses: actions/download-artifact@v7 with: name: govcloud-packer-manifest.json path: packer @@ -238,7 +236,7 @@ jobs: # Create a PR - name: Create Pull Request - uses: peter-evans/create-pull-request@v4 + uses: peter-evans/create-pull-request@v8 with: title: '[duplo-bot] Update Duplo Docker AMI(s)' branch: auto-update/duplo-docker-amis diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml index 922f9fd..7088f7b 100644 --- a/.github/workflows/lint.yaml +++ b/.github/workflows/lint.yaml @@ -6,7 +6,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@v6 # Validate templates - name: Validate Template diff --git a/AgentAmazonLinux2023/NetworkAgent.service b/AgentAmazonLinux2023/NetworkAgent.service new file mode 100644 index 0000000..c387245 --- /dev/null +++ b/AgentAmazonLinux2023/NetworkAgent.service @@ -0,0 +1,28 @@ +[Unit] +Description="NetworkAgent daemon" +Wants=network.target network-online.target autofs.service +After=network.target network-online.target autofs.service + +[Service] +LimitNOFILE=65536 +LimitNPROC=65536 +Type=simple +WorkingDirectory=/usr/local/src/AgentV2 +Environment=VIRTUAL_ENV=$WorkingDirectory/flask +Environment=PATH=$VIRTUAL_ENV/bin:$PATH +Environment=PYTHONPATH=$VIRTUAL_ENV/bin:$WorkingDirectory +Environment=DEFAULTS=NetworkAgentV2 +Environment=DAEMON_DIR=$WorkingDirectory +Environment=DAEMON=$DAEMON_DIR/NetworkAgentV2.py +Environment=DAEMON_NAME=NetworkAgent +EnvironmentFile=-/etc/default/NetworkAgentV2 +User=root +Group=root +UMask=0007 +ExecStart=/bin/bash -c '/usr/local/src/AgentV2/flask/bin/python /usr/local/src/AgentV2/NetworkAgentV2.py ' +ExecReload=/bin/bash -c '/usr/local/src/AgentV2/flask/bin/python /usr/local/src/AgentV2/NetworkAgentV2.py ' +PIDFile=/var/run/NetworkAgent.pid +Restart=on-failure + +[Install] +WantedBy=multi-user.target diff --git a/AgentAmazonLinux2023/NetworkAgentV2.py b/AgentAmazonLinux2023/NetworkAgentV2.py new file mode 100644 index 0000000..872cb29 --- /dev/null +++ b/AgentAmazonLinux2023/NetworkAgentV2.py @@ -0,0 +1,524 @@ +#!/usr/bin/env python +import requests +from flask import Flask, jsonify +from flask import request +import subprocess +import threading +import time +from threading import Thread +import socket +import pdb +import json +import argparse +import logging +import sys +import traceback +import os +import platform +import docker +import re +import base64 +import boto3 # /usr/local/src/AgentV2/flask/bin/pip install boto3 +from logging import handlers, Formatter + +app = Flask(__name__) + +g_udpmode = False +logger = None +currentContainers = {} +TenantID = 'Empty' +RegistryToken = 'Empty' +EngineEndpoint = 'Empty' +NetworkProvider = 'custom' +g_RequiredImages = None +g_NonDefaultRegistryTokenByName = None +g_NonDefaultRegistryNameByImage = None + +class Minion: + def __init__(self, name, subnet, directAddress, directIpAddr): + self.name = name + self.subnet = subnet + self.directAddress = directAddress + self.directIpAddress = directIpAddr + + def isEqual(self, minion): + val = False + if self.name != minion.name: + val = False + elif self.subnet != minion.subnet: + val = False + elif self.directAddress != minion.directAddress: + val = False + else: + val = True + + if val: + logger.debug('Minions ' + self.name + ' and ' + minion.name + ' are equal') + else: + logger.debug('Minions ' + self.name + ' and ' + minion.name + ' are not equal') + + return val + + def log(self): + val = self.name + ' ' + self.subnet + ' ' + self.directAddress + ' ' + self.directIpAddress + return val + + +def addRoute(aInSubnet, aInTunnelName): + lSubnetParts = aInSubnet.split(".") + lSubnet = lSubnetParts[0] + "." + lSubnetParts[1] + "." + lSubnetParts[2] + ".0" + logger.debug('Adding Routes to ' + lSubnet + "via " + aInTunnelName) + # sudo route add -net 172.17.51.0 netmask 255.255.255.0 dev tun1 + val = subprocess.check_output( + ["sudo", "route", "add", "-net", lSubnet, "netmask", "255.255.255.0", "dev", aInTunnelName]) + if val != "": + lStatus = val.decode("utf-8") + logger.debug(lStatus) + +def getCurrentTunnels(): + val = subprocess.check_output(["sudo", "iptunnel", "show"]) + lStatus = val.decode("utf-8") + lTunnels = lStatus.splitlines() + lCurrentTuns = {} + for lTun in lTunnels: + lToks = lTun.split() + lName = lToks[0].replace(":", "") + if lName == 'gre0' or lName == 'tunl0': + logger.debug('Skipping default tunnels') + else: + logger.debug('Existing Tunnel ' + lName) + lCurrentTuns[lName] = lName + + return lCurrentTuns + +def addTunnel(aInName, aInSubnet, aInLocalAddress, aInRemoteAddress): + try: + subprocess.check_output( + ["sudo", "iptunnel", "add", aInName, "mode", "gre", "local", aInLocalAddress, "remote", aInRemoteAddress]) + subprocess.check_output(["sudo", "ifconfig", aInName, "up"]) + addRoute(aInSubnet, aInName) + except: + logger.error('Failed to add tunnel ' + aInName + ' will try again ***************************************') + # deleteTunnelInDriver(lKey) + +def deleteTunnel(aInName): + try: + subprocess.check_output(["sudo", "iptunnel", "del", aInName]) + except: + logger.error('Failed to delete tunnel ' + aInName) + +def updateTunnels(aInRemoteMinions, aInLocalMinion): + logger.debug('Begin reconciling tunnels ======================================================') + lExpectedTuns = {} + logger.debug('LOCAL*** ' + aInLocalMinion.log()) + lLocalAddr = aInLocalMinion.directIpAddress + # Use the last two octets of the local and remmote IP as the tunnel name + lLocalParts = lLocalAddr.split(".") + lLocalSmallName = lLocalParts[2] + "." + lLocalParts[3] + for lKey in aInRemoteMinions: + val = aInRemoteMinions[lKey].log() + logger.debug('REMOTE*** ' + val) + lRemoteAddr = aInRemoteMinions[lKey].directIpAddress + lRemoteParts = lRemoteAddr.split(".") + lRemoteSmallName = lRemoteParts[2] + "." + lRemoteParts[3] + lTunName = lLocalSmallName + '-' + lRemoteSmallName + logger.debug('Adding a expected tunnel name ' + lTunName) + lExpectedTuns[lTunName] = aInRemoteMinions[lKey] + + lCurrentTuns = getCurrentTunnels() + + lLocalAddr = aInLocalMinion.directIpAddress + for lKey in lExpectedTuns: + if lKey not in lCurrentTuns: + logger.debug('Adding Tunnel ' + lKey) + addTunnel(lKey, lExpectedTuns[lKey].subnet, lLocalAddr, lExpectedTuns[lKey].directIpAddress) + logger.debug('Successfully Added Tunnel ' + lKey + ' +++++++++++++++++++++++++++++++++++++++++') + + for lKey in lCurrentTuns: + if lKey not in lExpectedTuns: + try: + logger.debug('Unwanted tunnel ' + lKey) + deleteTunnel(lKey) + except: + logger.error('Failed to delete unwanted tunnel') + + logger.debug('End Reconciling Tunnels ======================================================') + + return + +def updateTopology(): + global TenantID + global NetworkProvider + + if TenantID == 'Empty': + logger.debug('TenantID has not been set yet') + return + logger.debug('Value of Network Provider is ' + NetworkProvider) + if NetworkProvider == 'custom': + logger.debug('Network provider is custom, no config needed by us') + return + + hostName = socket.gethostname() + localIpAddr = socket.gethostbyname(hostName) + logger.debug(hostName + ' = ' + localIpAddr) + + url = EngineEndpoint + '/subscriptions/' + TenantID + '/GetMinions' + logger.debug(url) + r = requests.get(url) + if r.status_code != requests.codes.ok: + logger.debug("GET call failed ") + return + + lFoundLocal = False + rMinions = {} + for lMinion in r.json(): + lname = lMinion['Name'].lower() + try: + ldirectAddress = lMinion['DirectAddress'].lower() + logger.debug('Trying to resolve ' + ldirectAddress) + ldirectIpAddress = socket.gethostbyname(ldirectAddress) + logger.debug(ldirectAddress + ' = ' + ldirectIpAddress) + lsubnet = lMinion['Subnet'] + if localIpAddr != ldirectIpAddress: + logger.debug('Adding a remote Minion ' + lname) + rMinions[lname] = Minion(lname, lsubnet, ldirectAddress, ldirectIpAddress) + else: + logger.debug('Adding a local minion ' + lname) + localMinion = Minion(lname, lsubnet, ldirectAddress, ldirectIpAddress) + lFoundLocal = True + except: + logger.error('Error in handling minion ' + lname) + + if not lFoundLocal: + logger.error('Error we cannot find our own Minion') + return + + updateTunnels(rMinions, localMinion) + ''' + try: + updateNetfilters(localMinion) + except Exception, e: + nfltErr = "Couldn't do it: %s" % e + logger.error("Error updating netfilters error: " + nfltErr) + ''' + return + + +@app.route('/NetworkAgent/api/v1.0/UpdateMinionState', methods=['POST']) +def UpdateMinionState(): + global TenantID + global RegistryToken + global EngineEndpoint + global NetworkProvider + global g_udpmode + global g_RequiredImages + global g_NonDefaultRegistryTokenByName + global g_NonDefaultRegistryNameByImage + + lSubnet = request.json['Subnet'] + lMinionName = request.json['Name'] + lMode = request.json['TunnelMode'] + + NetworkProvider = 'default' + + RegistryToken = request.json['RegistryToken'] + if TenantID == 'Empty': + TenantID = request.json['TenantID'] + EngineEndpoint = request.json['EngineEndpoint'] + + logger.debug(request.json) + if 'Images' in request.json: + logger.debug('UpdateMinionState: Required Images has been set') + if request.json['Images'] is not None: + g_RequiredImages = list() + for lImg in request.json['Images']: + g_RequiredImages.append(lImg) + logger.debug(g_RequiredImages) + else: + logger.debug('UpdateMinionState: Required Images was not set') + + if 'NonDefaultRegistryTokenByName' in request.json: + g_NonDefaultRegistryTokenByName = request.json['NonDefaultRegistryTokenByName'] + + if 'NonDefaultRegistryNameByImage' in request.json: + g_NonDefaultRegistryNameByImage = request.json['NonDefaultRegistryNameByImage'] + + return jsonify({}), 201 + + +@app.route('/NetworkAgent/api/v1.0/GetTenantID', methods=['GET']) +def gettenantid(): + global TenantID + return jsonify({'TenantID': TenantID}) + + +def updateTopologyThread(): + while (True): + time.sleep(10) + try: + updateTopology() + except: + logger.error('****************************** UpdateTopology encountered an exception') + + logger.debug('================================= updateTopology completed') + + +def downloadImage(aInImageName): + global RegistryToken + global g_NonDefaultRegistryTokenByName + global g_NonDefaultRegistryNameByImage + logger.debug('Starting downloading ... ' + aInImageName) + lImageDwldUrl = 'http://127.0.0.1:4243/images/create?fromImage=' + aInImageName + lPayload = {} + + imagePullToken = RegistryToken + if ( + g_NonDefaultRegistryNameByImage and aInImageName in g_NonDefaultRegistryNameByImage + and g_NonDefaultRegistryTokenByName and g_NonDefaultRegistryNameByImage[aInImageName] in g_NonDefaultRegistryTokenByName + ): + imagePullToken = g_NonDefaultRegistryTokenByName[g_NonDefaultRegistryNameByImage[aInImageName]] + + headers = {'X-Registry-Auth': imagePullToken} + + r = requests.post(lImageDwldUrl, data=aInImageName, headers=headers) + + if r.ok: + logger.debug('Finished downloading repo ' + aInImageName) + else: + logger.debug('image download failed with return code and message' + str(r.status_code) + " " + str(r.json())) + logger.debug('Trying image download without docker creds for image - ' + aInImageName) + r = requests.post(lImageDwldUrl, data=aInImageName) + logger.debug( + 'image download without docker creds status return code and message' + str(r.status_code) + " " + str( + r.json())) + + +def downloadImageEcr(aInImageName): + logger.debug('Starting downloading ECR is_ecr=True ... ' + aInImageName) + region_name, account_id = getRegionNameAndAccountFromImageName(aInImageName) + #logger.debug('downloading ECR is_ecr=True region_name=' + region_name ) + + # login to ecr + ecr_client = boto3.client('ecr', region_name=region_name) + + # create docker client + url = 'http://127.0.0.1:4243' + docker_client = docker.DockerClient(base_url=url, version='auto') + + # get token + token = ecr_client.get_authorization_token(registryIds=[ + account_id, + ]) + username, password = base64.b64decode( + token['authorizationData'][0]['authorizationToken']).decode().split(':') + registry = token['authorizationData'][0]['proxyEndpoint'] + + # docker_client login + rep = docker_client.login( + username, password, registry=registry, reauth=True) + # image_name = aInImageName #'128329325849.dkr.ecr.us-west-2.amazonaws.com/reoecr1:latest' repodel1 + rep = docker_client.images.pull(aInImageName) + + logger.debug('Finished downloading ECR repo is_ecr=True image=' + + aInImageName + ' region_name=' + region_name) + + +def getRegionNameAndAccountFromImageName(aInImageName): + arr1 = aInImageName.split(".dkr.ecr.") + if len(arr1) > 0 and arr1[1] is not None: + region_name = arr1[1].split(".")[0] + account_id = arr1[0] + else: + logger.debug("region_name invalid ECR aInImageName " + aInImageName) + logger.debug("region_name=" + region_name + " account_id=" + account_id) + return (region_name, account_id) + + +def updateImages(): + global TenantID + global g_RequiredImages + + if TenantID == 'Empty': + logger.debug('TenantID has not been set yet') + return + + logger.debug('updateImages call ...') + logger.debug(g_RequiredImages) + + lLocalImages = {} + lDockersImgUrl = 'http://127.0.0.1:4243/images/json' + r = requests.get(lDockersImgUrl) + for lLocalImgTags in r.json(): + try: + for lLocalImg in lLocalImgTags['RepoTags']: + try: + if lLocalImg not in lLocalImages: + logger.debug('Exists image name ' + lLocalImg) + lLocalImages[lLocalImg] = lLocalImg + except Exception as e: + nfltErr = "Error 1 processing a tag in img: %s " % e + logger.error(nfltErr) + exc_type, exc_value, exc_traceback = sys.exc_info() + el = repr(traceback.format_exception(exc_type, exc_value, exc_traceback)) + logger.error(nfltErr + el) + except Exception as e: + nfltErr = "Error 2 processing images : %s " % e + logger.error(nfltErr) + exc_type, exc_value, exc_traceback = sys.exc_info() + el = repr(traceback.format_exception(exc_type, exc_value, exc_traceback)) + logger.error(nfltErr + el) + + + lNeededImages = g_RequiredImages + if lNeededImages is None: + lImagesUrl = EngineEndpoint + '/subscriptions/' + TenantID + '/GetImages' + logger.debug(lImagesUrl) + r = requests.get(lImagesUrl) + if r.status_code != requests.codes.ok: + logger.debug("GET Images call failed ") + return + lNeededImages = r.json() + logger.debug("updateImages: Required Images has been retrieved from master pull") + else: + logger.debug("updateImages: Required Images has been set from master api") + + for lImage in lNeededImages: + is_ecr = re.match(r'^[^/]*\.dkr.ecr\.[^/]*\.amazonaws.com\/', lImage, re.I) + + logger.debug('Required Image Name ' + lImage + ' is_ecr?=' + str(is_ecr)) + # lRequiredRepo = lImage.split(":")[0] + if lImage in lLocalImages: + logger.debug('Required image exists ' + lImage) + else: + logger.debug('++++++++++ Need to download Image ' + lImage) + try: + if is_ecr: + downloadImageEcr(lImage) + else: + downloadImage(lImage) + except Exception as e: + nfltErr = "Error 3 The download error was : %s " % e + logger.error( nfltErr) + exc_type, exc_value, exc_traceback = sys.exc_info() + el = repr(traceback.format_exception(exc_type, exc_value, exc_traceback)) + logger.error(nfltErr + el) + + +def pruneImages(): + try: + logger.debug('Start pruning images') + client = docker.from_env() + filters = {'dangling': '0'} + client.images.prune(filters) + logger.debug('Finished pruning') + except Exception as e: + nfltErr = "Error 4 pruning images 10 : %s " % e + logger.error( nfltErr) + exc_type, exc_value, exc_traceback = sys.exc_info() + el = repr(traceback.format_exception(exc_type, exc_value, exc_traceback)) + logger.error(nfltErr + el) + + +def updateImagesThread(): + lCount = 0 + while (True): + time.sleep(12) + + try: + lCount = lCount + 1 + if lCount >= 7200: + pruneImages() + lCount = 0 + except Exception as e: + nfltErr = "Error 5 updateImagesThread pruning images : %s " % e + logger.error( nfltErr) + exc_type, exc_value, exc_traceback = sys.exc_info() + el = repr(traceback.format_exception(exc_type, exc_value, exc_traceback)) + logger.error(nfltErr + el) + + try: + updateImages() + except Exception as e: + nfltErr = "Error 6 updateImagesThread updateImages : %s" % e + logger.error(nfltErr + nfltErr) + # exc_type, exc_value, exc_traceback = sys.exc_info() + # el = repr(traceback.format_exception(exc_type, exc_value, exc_traceback)) + # logger.error(nfltErr+ el) + logger.debug('=============================================== UpdateImages Completed') + + +def setLogger(): + logFile = "/var/log/NetworkAgent.log" + logger = logging.getLogger('NetworkAgent') + + fh = handlers.RotatingFileHandler(logFile, maxBytes=5000000, backupCount=5) + logFormat = Formatter('%(asctime)s %(levelname)s %(message)s') + fh.setFormatter(logFormat) + + logger.addHandler(fh) + logger.setLevel(logging.DEBUG) + f = open('/dev/null', 'w') + sys.stdout = f + sys.stderr = f + sys.stdin.close() + + logger.debug('stdout/stderr redirected to /dev/null ...') + + return logger + +def daemonizeUbuntu(): + process_id = os.getpid() + logger.debug('Process ID after setid(): %s...' % str(process_id)) + # os.popen( process_id+ " > /var/run/NetworkAgent.pid ; chmod +x /var/run/NetworkAgent.pid") + + # + path='/var/run/NetworkAgent.pid' + pidfile = open(path, 'w') + pidfile.write("%d" % process_id) + pidfile.close() + os.chmod(path, 0o444) + # + # # Set umask to default to safe file permissions when running + # # as a root daemon. 027 is an octal number. + os.umask(0o27) + # + # # Change to a known directory. If this isn't done, starting + # # a daemon in a subdirectory that needs to be deleted results + # # in "directory busy" errors. + # # On some systems, running with chdir("/") is not allowed, + # # so this should be settable by the user of this library. + os.chdir('/') + + logger.debug('Daemonization complete') + + +def getLinuxDistro(): + # todo: for centos + # dist = platform.dist() + # return dist[0] + return "Ubuntu" + + +def main(): + global logger + logger = setLogger() + logger.debug('Network Agent in GRE mode') + linuxDistro = getLinuxDistro() + logger.debug('Create Daemon on %s' % linuxDistro) + if linuxDistro == 'Ubuntu': + logger.debug('Create Daemon on %s' % linuxDistro) + daemonizeUbuntu() + else: + logger.debug('Daemon on Linux Distro %s is not supported...' % linuxDistro) + + logger.debug('Launching Image update Thread') + lImagesthrd = Thread(target=updateImagesThread, args=[]) + lImagesthrd.setDaemon(True) + lImagesthrd.start() + + app.run(host='0.0.0.0', port=60035, debug=True, use_reloader=False) + + +if __name__ == '__main__': + main() + diff --git a/AgentAmazonLinux2023/Setup.sh b/AgentAmazonLinux2023/Setup.sh new file mode 100755 index 0000000..4d4520d --- /dev/null +++ b/AgentAmazonLinux2023/Setup.sh @@ -0,0 +1,197 @@ +#!/bin/bash +set -eu + +# +# Agent variables +# +AGENT='NetworkAgentV2' +DAEMON_DEFAULT_FILE="/etc/default/$AGENT" +DAEMON_NAME="NetworkAgentV2" +DAEMON_DIR='/usr/local/src/AgentV2' +PYTHON_PATH="$DAEMON_DIR/flask/bin" +DAEMON="$DAEMON_DIR/NetworkAgentV2.py" +DOWNLOAD_URL="https://api.github.com/repos/duplocloud/linuxagent/contents/AgentAmazonLinux2023" + +DOCKER_OVERRIDE_DIR="/etc/systemd/system/docker.service.d" +DOCKER_OVERRIDE_FILE="$DOCKER_OVERRIDE_DIR/api.conf" + +if [ -z "${DOWNLOAD_REF:-}" ]; then + DOWNLOAD_REF='' +else + DOWNLOAD_REF="?ref=${DOWNLOAD_REF}" +fi + +# +# Install Docker, build toolchain, and Python 3 base for AL2023. +# AL2023 has no amazon-linux-extras; Docker comes from the default repo. +# +installDependencies () { + echo "AL2023: installing Docker and base packages" + sudo dnf update -q -y + sudo dnf install -q -y \ + docker \ + amazon-ecr-credential-helper \ + git wget net-tools vim \ + gcc \ + python3 python3-pip python3-devel \ + iptables-devel kernel-headers + + sudo usermod -a -G docker ec2-user + mkdir -p ~/.docker + echo '{ "credsStore": "ecr-login" }' > ~/.docker/config.json + + sudo mkdir -p "$DOCKER_OVERRIDE_DIR" + sudo tee "$DOCKER_OVERRIDE_FILE" > /dev/null </dev/null || true + sudo systemctl mask ecs 2>/dev/null || true + sudo dnf -q -y remove ecs-init || true + sudo rm -rf /var/lib/ecs /etc/ecs /var/log/ecs + for image in amazon/amazon-ecs-agent amazon/amazon-ecs-pause; do + sudo docker image rm -f "$image:latest" 2>/dev/null || true + sudo docker image rm -f "$image:0.1.0" 2>/dev/null || true + done + sudo docker rm -f ecs-agent 2>/dev/null || true +} + +# +# Create the Python 3 venv, install agent dependencies, fetch the agent +# script and systemd unit from GitHub, and register the unit. +# +agentInstall () { + echo "AL2023: installing NetworkAgent in Python 3 venv" + + if [ -f $DAEMON_DEFAULT_FILE ]; then + sudo rm $DAEMON_DEFAULT_FILE + sudo touch $DAEMON_DEFAULT_FILE + fi + + echo "DAEMON=$DAEMON" | sudo tee --append $DAEMON_DEFAULT_FILE > /dev/null + echo "DAEMON_DIR=$DAEMON_DIR" | sudo tee --append $DAEMON_DEFAULT_FILE > /dev/null + echo "PYTHON_PATH=$PYTHON_PATH" | sudo tee --append $DAEMON_DEFAULT_FILE > /dev/null + cat $DAEMON_DEFAULT_FILE + + cd "$DAEMON_DIR" + python3 -m venv flask + flask/bin/pip install --upgrade pip + # Install all agent dependencies in a single pip invocation so the + # resolver picks a globally-consistent set. Sequential pip installs + # silently downgrade urllib3 from 2.x to 1.26.x because botocore's + # transitive constraint is only seen on the boto3 install call. + flask/bin/pip --trusted-host pypi.python.org install \ + flask \ + requests \ + python-pytun \ + python-iptables \ + docker \ + boto3 + + cd "$DAEMON_DIR" + curl -H "Accept: application/vnd.github.v3.raw" -o NetworkAgentV2.py -L "$DOWNLOAD_URL/NetworkAgentV2.py$DOWNLOAD_REF" + chmod a+x NetworkAgentV2.py + ls -alt "$DAEMON_DIR" + + cd /lib/systemd/system + sudo curl -H "Accept: application/vnd.github.v3.raw" -o NetworkAgent.service -L "$DOWNLOAD_URL/NetworkAgent.service$DOWNLOAD_REF" + ls -alt NetworkAgent.service + + sudo systemctl daemon-reload + sudo systemctl enable NetworkAgent.service + # Do not start NetworkAgent.service here. Starting it during the + # Packer bake disrupts iptables/networking and severs the SSM/SSH + # session, which surfaces as `Bad exit status: -1`. The unit is + # enabled and will start automatically on first boot. +} + +getOSType () { + if [ -f /etc/os-release ]; then + . /etc/os-release + OS=$ID + VER=$VERSION_ID + else + OS=$(uname -s) + VER=$(uname -r) + fi +} + +# +# Step 0: detect OS +# +getOSType +echo "Detected OS=$OS VER=$VER" +if [ "$OS" != "amzn" ] || [ "$VER" != "2023" ]; then + echo "WARNING: this script targets Amazon Linux 2023; detected $OS $VER" +fi + +# +# Step 1: install Docker and base packages +# +echo "--------------------------OS=$OS VER=$VER--------------------------" +echo "Step 1: install Docker and base packages" +echo "--------------------------OS=$OS VER=$VER--------------------------" +installDependencies + +echo "===========================" +echo " Docker installed " +echo "===========================" + +# +# Step 1b: strip Amazon ECS agent if the source AMI happened to be ECS-optimized +# +echo "--------------------------OS=$OS VER=$VER--------------------------" +echo "Step 1b: remove Amazon ECS agent if present" +echo "--------------------------OS=$OS VER=$VER--------------------------" +removeEcsAgent + +# +# Step 2: ensure agent directory exists +# +echo "--------------------------OS=$OS VER=$VER--------------------------" +echo "Step 2: ensure agent directory exists" +echo "--------------------------OS=$OS VER=$VER--------------------------" +sudo mkdir -p "$DAEMON_DIR" +sudo chown -R "$USER" "$DAEMON_DIR" +ls -alt "$DAEMON_DIR" + +# +# Step 3: stop any running agent before re-installing +# +echo "--------------------------OS=$OS VER=$VER--------------------------" +echo "Step 3: stop any running $AGENT" +echo "--------------------------OS=$OS VER=$VER--------------------------" +if pgrep -f "$DAEMON" > /dev/null; then + echo "$AGENT is running, killing" + sudo pkill -9 -f "$DAEMON" || true +else + echo "$AGENT is not running" +fi + +# +# Step 4: clean prior install artifacts and lay down agent +# +echo "--------------------------OS=$OS VER=$VER--------------------------" +echo "Step 4: install NetworkAgent" +echo "--------------------------OS=$OS VER=$VER--------------------------" +cd "$DAEMON_DIR" +sudo rm -rf NetworkAgentV2.py flask +agentInstall + +echo "AL2023 setup complete" +exit 0 diff --git a/packer/aws.pkr.hcl b/packer/aws.pkr.hcl index a6ee3d7..cefa526 100644 --- a/packer/aws.pkr.hcl +++ b/packer/aws.pkr.hcl @@ -380,118 +380,121 @@ source "amazon-ebs" "amazonlinux-2-arm64" { } } -// source "amazon-ebs" "amazonlinux-2023" { -// ami_name = "${local.image_family}-al2023-${local.image_version}" -// ami_description = "${local.image_description} (al2023)" -// instance_type = var.aws_instance_type -// region = var.aws_region -// vpc_id = var.aws_vpc_id -// subnet_id = var.aws_subnet_id -// security_group_id = var.aws_security_group_id -// iam_instance_profile = var.aws_iam_instance_profile -// associate_public_ip_address = true - -// temporary_key_pair_type = var.temporary_key_pair_type -// ssh_username = "ec2-user" -// ssh_interface = "session_manager" - -// source_ami_filter { -// filters = { -// name = "al2023-ami-*-kernel-6.1-x86_64" -// root-device-type = "ebs" -// virtualization-type = "hvm" -// } -// most_recent = true -// owners = ["amazon"] -// } - -// # Build a public AMI -// encrypt_boot = false -// ami_groups = local.is_public ? ["all"] : [] -// ami_regions = [for region in local.ami_regions: region if region != var.aws_region] - -// # Customize the volumes -// launch_block_device_mappings { -// device_name = "/dev/xvda" -// encrypted = false -// volume_size = 35 -// volume_type = "gp3" -// delete_on_termination = true -// } - -// # Source instance tags. -// run_tags = { -// Name = "Packer Builder: ${local.image_family}-al2023-${local.image_version}" -// Creator = "Packer" -// } -// run_volume_tags = { -// Creator = "Packer" -// } - -// # Target AMI tags. -// tags = { -// Name = "${local.image_family}-al2023-${local.image_version}" -// Creator = "Packer" -// } -// snapshot_tags = { -// Creator = "Packer" -// } -// } - -// source "amazon-ebs" "amazonlinux-2023-arm64" { -// ami_name = "${local.image_family}-al2023-arm64-${local.image_version}" -// ami_description = "${local.image_description} arm64 (al2023)" -// instance_type = var.aws_instance_type_arm64 -// region = var.aws_region -// vpc_id = var.aws_vpc_id -// subnet_id = var.aws_subnet_id -// security_group_id = var.aws_security_group_id -// iam_instance_profile = var.aws_iam_instance_profile -// associate_public_ip_address = true - -// temporary_key_pair_type = var.temporary_key_pair_type -// ssh_username = "ec2-user" -// ssh_interface = "session_manager" - -// source_ami_filter { -// filters = { -// name = "al2023-ami-*-kernel-6.1-arm64" -// root-device-type = "ebs" -// virtualization-type = "hvm" -// } -// most_recent = true -// owners = ["amazon"] -// } - -// # Build a public AMI -// encrypt_boot = false -// ami_groups = local.is_public ? ["all"] : [] -// ami_regions = [for region in local.ami_regions: region if region != var.aws_region] - -// # Customize the volumes -// launch_block_device_mappings { -// device_name = "/dev/xvda" -// encrypted = false -// volume_size = 35 -// volume_type = "gp3" -// delete_on_termination = true -// } - -// # Source instance tags. -// run_tags = { -// Name = "Packer Builder: ${local.image_family}-al2023-arm64-${local.image_version}" -// Creator = "Packer" -// } -// run_volume_tags = { -// Creator = "Packer" -// } - -// # Target AMI tags. -// tags = { -// Name = "${local.image_family}-al2023-arm64-${local.image_version}" -// Creator = "Packer" -// } -// snapshot_tags = { -// Creator = "Packer" -// } -// } +source "amazon-ebs" "amazonlinux-2023" { + ami_name = "${local.image_family}-amazonlinux2023-${local.image_version}" + ami_description = "${local.image_description} (amazonlinux2023)" + instance_type = var.aws_instance_type + region = var.aws_region + vpc_id = var.aws_vpc_id + subnet_id = var.aws_subnet_id + security_group_id = var.aws_security_group_id + iam_instance_profile = var.aws_iam_instance_profile + associate_public_ip_address = true + + temporary_key_pair_type = var.temporary_key_pair_type + ssh_username = "ec2-user" + ssh_interface = "session_manager" + + # Pin to the plain AL2023 base and not an ecs host variant. + source_ami_filter { + filters = { + name = "al2023-ami-2023.*-kernel-6.1-x86_64" + root-device-type = "ebs" + virtualization-type = "hvm" + } + most_recent = true + owners = ["amazon"] + } + + # Build a public AMI + encrypt_boot = false + ami_groups = local.is_public ? ["all"] : [] + ami_regions = [for region in local.ami_regions: region if region != var.aws_region] + + # Customize the volumes + launch_block_device_mappings { + device_name = "/dev/xvda" + encrypted = false + volume_size = 35 + volume_type = "gp3" + delete_on_termination = true + } + + # Source instance tags. + run_tags = { + Name = "Packer Builder: ${local.image_family}-amazonlinux2023-${local.image_version}" + Creator = "Packer" + } + run_volume_tags = { + Creator = "Packer" + } + + # Target AMI tags. + tags = { + Name = "${local.image_family}-amazonlinux2023-${local.image_version}" + Creator = "Packer" + } + snapshot_tags = { + Creator = "Packer" + } +} + +source "amazon-ebs" "amazonlinux-2023-arm64" { + ami_name = "${local.image_family}-amazonlinux2023-arm64-${local.image_version}" + ami_description = "${local.image_description} arm64 (amazonlinux2023)" + instance_type = var.aws_instance_type_arm64 + region = var.aws_region + vpc_id = var.aws_vpc_id + subnet_id = var.aws_subnet_id + security_group_id = var.aws_security_group_id + iam_instance_profile = var.aws_iam_instance_profile + associate_public_ip_address = true + + temporary_key_pair_type = var.temporary_key_pair_type + ssh_username = "ec2-user" + ssh_interface = "session_manager" + + # See the x86_64 source above for why we pin to "al2023-ami-2023.*" rather + # than "al2023-ami-*". Same reasoning: avoid the ECS/EKS/minimal variants. + source_ami_filter { + filters = { + name = "al2023-ami-2023.*-kernel-6.1-arm64" + root-device-type = "ebs" + virtualization-type = "hvm" + } + most_recent = true + owners = ["amazon"] + } + + # Build a public AMI + encrypt_boot = false + ami_groups = local.is_public ? ["all"] : [] + ami_regions = [for region in local.ami_regions: region if region != var.aws_region] + + # Customize the volumes + launch_block_device_mappings { + device_name = "/dev/xvda" + encrypted = false + volume_size = 35 + volume_type = "gp3" + delete_on_termination = true + } + + # Source instance tags. + run_tags = { + Name = "Packer Builder: ${local.image_family}-amazonlinux2023-arm64-${local.image_version}" + Creator = "Packer" + } + run_volume_tags = { + Creator = "Packer" + } + + # Target AMI tags. + tags = { + Name = "${local.image_family}-amazonlinux2023-arm64-${local.image_version}" + Creator = "Packer" + } + snapshot_tags = { + Creator = "Packer" + } +} diff --git a/packer/gen-native-images.sh b/packer/gen-native-images.sh index f4d4268..934538d 100755 --- a/packer/gen-native-images.sh +++ b/packer/gen-native-images.sh @@ -110,12 +110,25 @@ out "NativeImages JSON: snippet done" snippet="$(pwd -P)/snippet-NativeImages.json" (cd "$DUPLO_SOURCE" && - # Join the default Duplo docker image ... - # ... with the remaining Duplo docker images - # ... and then all other images - jq 'input + (. | map(select(.Name | startswith("Docker-Duplo") | not)))' \ + # Merge by Name so a scoped Packer build (for example only_builders=AL2023) + # only replaces the rows it actually rebuilt. Rules: + # 1. Every row in the snippet (the newly-built AMIs) wins. + # 2. Every non-"Docker-Duplo*" row in the existing JSON is preserved as-is. + # 3. Every existing "Docker-Duplo*" row whose Name is NOT present in the + # snippet is preserved (it was not part of this build). + # This keeps AL2 / Ubuntu* / GovCloud rows intact when the dispatch was + # scoped to AL2023, instead of wholesale-replacing every Docker-Duplo row. + # IN($newNames[]) is used instead of `$newNames | index(.Name)` because + # piping into index rebinds `.` and would read .Name off the array itself. + jq 'input as $snippet + | ($snippet | map(.Name)) as $newNames + | $snippet + + (. | map(select( + ((.Name | startswith("Docker-Duplo")) | not) + or ((.Name | IN($newNames[])) | not) + )))' \ config/V1/BuiltInNativeImages.json "$snippet" > temp.json && - + # Replace the existing JSON mv temp.json config/V1/BuiltInNativeImages.json ) diff --git a/packer/main.pkr.hcl b/packer/main.pkr.hcl index 07e0ef2..5a58729 100644 --- a/packer/main.pkr.hcl +++ b/packer/main.pkr.hcl @@ -26,16 +26,20 @@ build { "sources.amazon-ebs.ubuntu-22-arm64", "sources.amazon-ebs.amazonlinux-2", "sources.amazon-ebs.amazonlinux-2-arm64", -// "sources.amazon-ebs.amazonlinux-2023", -// "sources.amazon-ebs.amazonlinux-2023-arm64", + "sources.amazon-ebs.amazonlinux-2023", + "sources.amazon-ebs.amazonlinux-2023-arm64", "sources.googlecompute.ubuntu-20", "sources.googlecompute.ubuntu-22" ] - // OS updates - Amazon Linux + // OS updates - Amazon Linux (yum on AL2023 is a wrapper for dnf) provisioner "shell" { inline = [ "sleep 10", "sudo yum update -y" ] - only = [ "amazon-ebs.amazonlinux-2" ] + only = [ + "amazon-ebs.amazonlinux-2", + "amazon-ebs.amazonlinux-2023", + "amazon-ebs.amazonlinux-2023-arm64" + ] } // OS updates - Ubuntu @@ -59,10 +63,16 @@ build { environment_vars = [ "DOWNLOAD_REF=${var.agent_git_ref}" ] - only = [ - "amazon-ebs.amazonlinux-2", "amazon-ebs.amazonlinux-2-arm64", - // "amazon-ebs.amazonlinux-2023", "amazon-ebs.amazonlinux-2023-arm64" + only = [ "amazon-ebs.amazonlinux-2", "amazon-ebs.amazonlinux-2-arm64" ] + } + + // Install - Amazon Linux 2023 + provisioner "shell" { + script = "${path.root}/../AgentAmazonLinux2023/Setup.sh" + environment_vars = [ + "DOWNLOAD_REF=${var.agent_git_ref}" ] + only = [ "amazon-ebs.amazonlinux-2023", "amazon-ebs.amazonlinux-2023-arm64" ] } // Install - Ubuntu 20 @@ -92,7 +102,7 @@ build { ] only = [ "amazon-ebs.amazonlinux-2", "amazon-ebs.amazonlinux-2-arm64", - // "amazon-ebs.amazonlinux-2023", "amazon-ebs.amazonlinux-2023-arm64" + "amazon-ebs.amazonlinux-2023", "amazon-ebs.amazonlinux-2023-arm64" ] }