diff --git a/.github/checksums/appdsmartagent_64_linux_25.12.0.661.zip.sha256 b/.github/checksums/appdsmartagent_64_linux_25.12.0.661.zip.sha256 new file mode 100644 index 0000000..f9ffe60 --- /dev/null +++ b/.github/checksums/appdsmartagent_64_linux_25.12.0.661.zip.sha256 @@ -0,0 +1 @@ +8f76215f637043224d702638ac7ca1d41b83fa7c736d47f5aac30a34bcde3dbd appdsmartagent_64_linux_25.12.0.661.zip diff --git a/.github/scripts/check-client-inventory-api.sh b/.github/scripts/check-client-inventory-api.sh new file mode 100644 index 0000000..77b2438 --- /dev/null +++ b/.github/scripts/check-client-inventory-api.sh @@ -0,0 +1,265 @@ +#!/usr/bin/env bash +set -uo pipefail + +OPENAPI_FILE="${OPENAPI_FILE:-openapi.json}" +CONFIG_FILE="${CONFIG_FILE:-config.ini}" +CLIENT_INVENTORY_SAMPLE_SIZE="${CLIENT_INVENTORY_SAMPLE_SIZE:-1}" +API_CHECK_DRY_RUN="${API_CHECK_DRY_RUN:-false}" +API_CHECK_WARN_ONLY="${API_CHECK_WARN_ONLY:-false}" +API_CHECK_TIMEOUT="${API_CHECK_TIMEOUT:-20}" +API_CHECK_CONNECT_TIMEOUT="${API_CHECK_CONNECT_TIMEOUT:-10}" + +EXPECTED_OPERATIONS=( + listClients + getClient + getClientConfig + batchGetConfigs +) + +fail() { + echo "ERROR: $*" >&2 + return 1 +} + +warn() { + echo "WARNING: $*" >&2 +} + +is_true() { + [[ "${1,,}" == "true" || "$1" == "1" || "${1,,}" == "yes" ]] +} + +require_tool() { + local tool_name="$1" + + command -v "$tool_name" >/dev/null 2>&1 || + fail "Missing required tool: $tool_name" +} + +validate_sample_size() { + [[ "$CLIENT_INVENTORY_SAMPLE_SIZE" =~ ^[0-9]+$ ]] || + fail "CLIENT_INVENTORY_SAMPLE_SIZE must be numeric" + + if ((CLIENT_INVENTORY_SAMPLE_SIZE < 1 || CLIENT_INVENTORY_SAMPLE_SIZE > 100)); then + fail "CLIENT_INVENTORY_SAMPLE_SIZE must be between 1 and 100" + fi +} + +validate_openapi() { + [[ -f "$OPENAPI_FILE" ]] || fail "OpenAPI file not found: $OPENAPI_FILE" + jq empty "$OPENAPI_FILE" || return 1 + + local operation + for operation in "${EXPECTED_OPERATIONS[@]}"; do + jq -e --arg operation "$operation" ' + [ + .. | objects | select(has("operationId")) | .operationId + ] | index($operation) + ' "$OPENAPI_FILE" >/dev/null || + fail "OpenAPI operation is missing: $operation" || + return 1 + done + + jq -e ' + .paths["/clients"].get.operationId == "listClients" and + .paths["/clients/{id}"].get.operationId == "getClient" and + .paths["/clients/{id}/config"].get.operationId == "getClientConfig" and + .paths["/clients/configs:batch"].post.operationId == "batchGetConfigs" + ' "$OPENAPI_FILE" >/dev/null || + fail "OpenAPI paths do not match expected Client Inventory operations" +} + +derive_base_url() { + local base_url="${CLIENT_INVENTORY_API_BASE_URL:-}" + local controller_url + + if [[ -z "$base_url" ]]; then + [[ -f "$CONFIG_FILE" ]] || fail "Config file not found: $CONFIG_FILE" + controller_url="$( + awk -F= ' + /^[[:space:]]*ControllerURL[[:space:]]*=/ { + gsub(/[[:space:]]/, "", $2) + print $2 + exit + } + ' "$CONFIG_FILE" + )" + + [[ -n "$controller_url" ]] || + fail "ControllerURL was not found in $CONFIG_FILE" || + return 1 + + base_url="https://${controller_url}/fm-service/v1" + fi + + base_url="${base_url%/}" + [[ "$base_url" =~ ^https?://[^/[:space:]]+/.+ ]] || + fail "CLIENT_INVENTORY_API_BASE_URL must include scheme, host, and path" + + printf '%s\n' "$base_url" +} + +urlencode() { + jq -nr --arg value "$1" '$value | @uri' +} + +request_json() { + local method="$1" + local url="$2" + local response_file="$3" + local body_file="${4:-}" + local status + local curl_status + local curl_args=( + --silent + --show-error + --connect-timeout "$API_CHECK_CONNECT_TIMEOUT" + --max-time "$API_CHECK_TIMEOUT" + --request "$method" + --header "Accept: application/json" + --header "X-SF-Token: ${CLIENT_INVENTORY_API_TOKEN}" + --output "$response_file" + --write-out "%{http_code}" + ) + + if [[ -n "$body_file" ]]; then + curl_args+=( + --header "Content-Type: application/json" + --data-binary "@${body_file}" + ) + fi + + status="$(curl "${curl_args[@]}" "$url")" + curl_status=$? + + if ((curl_status != 0)); then + fail "curl failed for $method $url" + return 1 + fi + + if [[ "$status" != "200" ]]; then + echo "Response body:" >&2 + sed -n '1,40p' "$response_file" >&2 + fail "$method $url returned HTTP $status, expected 200" + return 1 + fi + + jq empty "$response_file" >/dev/null 2>&1 || + fail "$method $url did not return valid JSON" +} + +run_live_checks() { + local base_url="$1" + local tmp_dir="$2" + local list_response="$tmp_dir/list.json" + local client_response="$tmp_dir/client.json" + local config_response="$tmp_dir/config.json" + local batch_body="$tmp_dir/batch-request.json" + local batch_response="$tmp_dir/batch-response.json" + local encoded_client_id + local first_client_id + local client_ids_json + + request_json \ + GET \ + "${base_url}/clients?limit=${CLIENT_INVENTORY_SAMPLE_SIZE}&include_health=false" \ + "$list_response" || + return 1 + + jq -e ' + type == "object" and + (.clients | type == "array") and + (.pagination | type == "object") + ' "$list_response" >/dev/null || + fail "GET /clients response did not match expected list shape" || + return 1 + + client_ids_json="$( + jq -c --argjson limit "$CLIENT_INVENTORY_SAMPLE_SIZE" ' + [.clients[:$limit][]?.instance_uid | select(type == "string" and length > 0)] + ' "$list_response" + )" + + first_client_id="$(jq -r '.[0] // empty' <<<"$client_ids_json")" + if [[ -z "$first_client_id" ]]; then + warn "GET /clients returned no clients; skipping ID-based operations" + return 0 + fi + + encoded_client_id="$(urlencode "$first_client_id")" + + request_json GET "${base_url}/clients/${encoded_client_id}" "$client_response" || + return 1 + jq -e --arg id "$first_client_id" ' + type == "object" and .instance_uid == $id + ' "$client_response" >/dev/null || + fail "GET /clients/{id} response did not match requested client ID" || + return 1 + + request_json \ + GET \ + "${base_url}/clients/${encoded_client_id}/config?stringify=true" \ + "$config_response" || + return 1 + jq -e 'type == "object"' "$config_response" >/dev/null || + fail "GET /clients/{id}/config response was not an object" || + return 1 + + jq -n --argjson clientIds "$client_ids_json" \ + '{clientIds: $clientIds, stringify: true}' > "$batch_body" + + request_json POST "${base_url}/clients/configs:batch" "$batch_response" "$batch_body" || + return 1 + jq -e 'type == "object"' "$batch_response" >/dev/null || + fail "POST /clients/configs:batch response was not an object" +} + +run_check() { + local base_url + local tmp_dir + + require_tool awk || return 1 + require_tool curl || return 1 + require_tool jq || return 1 + validate_sample_size || return 1 + validate_openapi || return 1 + + base_url="$(derive_base_url)" || return 1 + echo "Client Inventory API base URL: $base_url" + echo "OpenAPI file: $OPENAPI_FILE" + echo "Sample size: $CLIENT_INVENTORY_SAMPLE_SIZE" + + if is_true "$API_CHECK_DRY_RUN"; then + if [[ -n "${CLIENT_INVENTORY_API_TOKEN:-}" ]]; then + echo "Token: configured" + else + echo "Token: missing" + fi + echo "Dry run complete; live API requests were not sent" + return 0 + fi + + [[ -n "${CLIENT_INVENTORY_API_TOKEN:-}" ]] || + fail "CLIENT_INVENTORY_API_TOKEN is required for live API checks" || + return 1 + + tmp_dir="$(mktemp -d)" + trap 'rm -rf "$tmp_dir"' RETURN + + run_live_checks "$base_url" "$tmp_dir" || return 1 + echo "Client Inventory API check passed" +} + +run_check +status=$? + +if ((status == 0)); then + exit 0 +fi + +if is_true "$API_CHECK_WARN_ONLY"; then + echo "::warning::Client Inventory API check failed; continuing because warn-only mode is enabled" + exit 0 +fi + +exit "$status" diff --git a/.github/scripts/create-batches.sh b/.github/scripts/create-batches.sh new file mode 100644 index 0000000..4b0560d --- /dev/null +++ b/.github/scripts/create-batches.sh @@ -0,0 +1,83 @@ +#!/usr/bin/env bash +set -euo pipefail + +script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=.github/scripts/lib.sh +source "$script_dir/lib.sh" + +require_tool jq +require_env GITHUB_OUTPUT + +batch_size_input="${BATCH_SIZE:-256}" +if [[ ! "$batch_size_input" =~ ^[0-9]+$ ]]; then + echo "batch_size must be numeric: $batch_size_input" >&2 + exit 1 +fi + +batch_size=$((10#$batch_size_input)) +if ((batch_size < 1 || batch_size > 256)); then + echo "batch_size must be between 1 and 256: $batch_size" >&2 + exit 1 +fi + +hosts_json="$( + printf '%s\n' "${DEPLOYMENT_HOSTS:-}" | + tr -d '\r' | + jq -Rsc ' + split("\n") + | map(gsub("^\\s+|\\s+$"; "")) + | map(select(length > 0)) + ' +)" + +total_hosts="$(jq 'length' <<<"$hosts_json")" +if ((total_hosts == 0)); then + echo "DEPLOYMENT_HOSTS must contain at least one host" >&2 + exit 1 +fi + +invalid_hosts="$( + jq -r ' + .[] + | select( + (test("^[A-Za-z0-9]([A-Za-z0-9.-]*[A-Za-z0-9])?$") | not) + or contains("..") + ) + ' <<<"$hosts_json" +)" + +if [[ -n "$invalid_hosts" ]]; then + echo "DEPLOYMENT_HOSTS contains invalid host values:" >&2 + printf '%s\n' "$invalid_hosts" >&2 + exit 1 +fi + +duplicate_hosts="$( + jq -r ' + group_by(.) + | map(select(length > 1) | .[0]) + | .[] + ' <<<"$(jq -c 'sort' <<<"$hosts_json")" +)" + +if [[ -n "$duplicate_hosts" ]]; then + echo "DEPLOYMENT_HOSTS contains duplicate hosts:" >&2 + printf '%s\n' "$duplicate_hosts" >&2 + exit 1 +fi + +total_batches=$(((total_hosts + batch_size - 1) / batch_size)) +batches="$( + jq -c --argjson batch_size "$batch_size" ' + [range(0; length; $batch_size) as $start + | .[$start:($start + $batch_size)]] + ' <<<"$hosts_json" +)" + +{ + echo "batches=$batches" + echo "total_hosts=$total_hosts" + echo "total_batches=$total_batches" +} >> "$GITHUB_OUTPUT" + +echo "Prepared $total_hosts hosts across $total_batches batches" diff --git a/.github/scripts/deploy-smartagent-batch.sh b/.github/scripts/deploy-smartagent-batch.sh new file mode 100644 index 0000000..d7bb31a --- /dev/null +++ b/.github/scripts/deploy-smartagent-batch.sh @@ -0,0 +1,130 @@ +#!/usr/bin/env bash +set -euo pipefail + +script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=.github/scripts/lib.sh +source "$script_dir/lib.sh" + +require_tool awk +require_tool jq +require_tool scp +require_tool sha256sum +require_tool ssh +require_tool ssh-keyscan +require_env APPD_ACCOUNT_ACCESS_KEY + +validate_batch_hosts_json +initialize_remote_identity + +shopt -s nullglob +agent_zips=(appdsmartagent_64_linux_*.zip) +shopt -u nullglob + +if (( ${#agent_zips[@]} != 1 )); then + echo "Expected exactly one appdsmartagent_64_linux_*.zip file" >&2 + printf 'Found: %s\n' "${agent_zips[@]:-none}" >&2 + exit 1 +fi + +agent_zip="${agent_zips[0]}" +agent_zip_basename="$(basename "$agent_zip")" +checksum_file=".github/checksums/${agent_zip_basename}.sha256" + +if [[ ! -f "$checksum_file" ]]; then + echo "Missing checksum file: $checksum_file" >&2 + exit 1 +fi + +sha256sum --check "$checksum_file" + +tmp_dir="$(mktemp -d)" +key_file="$tmp_dir/id_rsa" +config_file="$tmp_dir/config.ini" +fail_file="$tmp_dir/failed_hosts" +touch "$fail_file" +trap 'rm -rf "$tmp_dir"' EXIT + +awk ' + { + while ((placeholder = index($0, "{{ACCOUNT_ACCESS_KEY}}")) > 0) { + $0 = substr($0, 1, placeholder - 1) \ + ENVIRON["APPD_ACCOUNT_ACCESS_KEY"] \ + substr($0, placeholder + 22) + } + print + } +' config.ini > "$config_file" + +write_ssh_key "$key_file" + +batch_size="$(jq 'length' <<<"$BATCH_HOSTS")" +echo "Deploying $agent_zip_basename to batch of $batch_size hosts" + +remote_script="$( + cat <> "$fail_file" + exit 0 + fi + + if ! copy_to_remote_tmp \ + "$host" \ + "$key_file" \ + "$known_hosts_file" \ + "$agent_zip" \ + "$config_file"; then + echo "Failed to copy deployment artifacts to $host" >&2 + echo "$host" >> "$fail_file" + exit 0 + fi + + if run_remote_script "$host" "$key_file" "$known_hosts_file" "$remote_script"; then + echo "Completed deployment to $host" + else + echo "Failed deployment to $host" >&2 + echo "$host" >> "$fail_file" + fi + ) & +done < <(jq -r '.[]' <<<"$BATCH_HOSTS") + +wait || true + +if [[ -s "$fail_file" ]]; then + echo "Some hosts failed:" >&2 + sort -u "$fail_file" >&2 + exit 1 +fi + +echo "Batch deployment complete" diff --git a/.github/scripts/lib.sh b/.github/scripts/lib.sh new file mode 100644 index 0000000..bfb07d5 --- /dev/null +++ b/.github/scripts/lib.sh @@ -0,0 +1,170 @@ +#!/usr/bin/env bash + +require_tool() { + local tool_name="$1" + + if ! command -v "$tool_name" >/dev/null 2>&1; then + echo "Missing required tool: $tool_name" >&2 + exit 1 + fi +} + +require_env() { + local env_name="$1" + + if [[ -z "${!env_name:-}" ]]; then + echo "Missing required environment variable: $env_name" >&2 + exit 1 + fi +} + +validate_host() { + local host="$1" + + if [[ ! "$host" =~ ^[A-Za-z0-9]([A-Za-z0-9.-]*[A-Za-z0-9])?$ ]]; then + echo "Invalid host value: $host" >&2 + exit 1 + fi + + if [[ "$host" == *..* ]]; then + echo "Invalid host value: $host" >&2 + exit 1 + fi +} + +validate_account_name() { + local name="$1" + local value="$2" + + if [[ -n "$value" && ! "$value" =~ ^[A-Za-z_][A-Za-z0-9_.-]*$ ]]; then + echo "Invalid $name value: $value" >&2 + exit 1 + fi +} + +validate_group_name() { + local name="$1" + local value="$2" + + if [[ -n "$value" && ! "$value" =~ ^[A-Za-z_][A-Za-z0-9_.-]*$ ]]; then + echo "Invalid $name value: $value" >&2 + exit 1 + fi +} + +validate_batch_hosts_json() { + require_env BATCH_HOSTS + require_tool jq + + jq -e ' + type == "array" + and length > 0 + and all(.[]; type == "string" and length > 0) + ' <<<"$BATCH_HOSTS" >/dev/null + + while IFS= read -r host; do + validate_host "$host" + done < <(jq -r '.[]' <<<"$BATCH_HOSTS") +} + +write_ssh_key() { + local key_file="$1" + + require_env SSH_PRIVATE_KEY + printf '%s\n' "$SSH_PRIVATE_KEY" > "$key_file" + chmod 600 "$key_file" +} + +scan_host_key() { + local host="$1" + local known_hosts_file="$2" + local timeout="${SSH_KEYSCAN_TIMEOUT:-10}" + + if ! ssh-keyscan -T "$timeout" "$host" > "$known_hosts_file" 2>/dev/null; then + echo "Failed to collect SSH host key for $host" >&2 + return 1 + fi + + if [[ ! -s "$known_hosts_file" ]]; then + echo "No SSH host key returned for $host" >&2 + return 1 + fi +} + +remote_launcher() { + printf \ + 'SSH_USER=%s SMARTAGENT_USER=%s SMARTAGENT_GROUP=%s TARGET_OWNER=%s TARGET_GROUP=%s bash -seuo pipefail' \ + "$SSH_USER" \ + "${SMARTAGENT_USER:-}" \ + "${SMARTAGENT_GROUP:-}" \ + "$TARGET_OWNER" \ + "$TARGET_GROUP" +} + +run_remote_script() { + local host="$1" + local key_file="$2" + local known_hosts_file="$3" + local remote_script="$4" + local connect_timeout="${SSH_CONNECT_TIMEOUT:-30}" + + ssh \ + -i "$key_file" \ + -o BatchMode=yes \ + -o StrictHostKeyChecking=yes \ + -o UserKnownHostsFile="$known_hosts_file" \ + -o ConnectTimeout="$connect_timeout" \ + "${SSH_USER}@${host}" \ + "$(remote_launcher)" \ + <<<"$remote_script" +} + +remote_output_reports_error() { + local output_file="$1" + + if jq -e 'type == "object" and .error == true' "$output_file" >/dev/null 2>&1; then + return 0 + fi + + grep -Eq '"error"[[:space:]]*:[[:space:]]*true' "$output_file" +} + +copy_to_remote_tmp() { + local host="$1" + local key_file="$2" + local known_hosts_file="$3" + local connect_timeout="${SSH_CONNECT_TIMEOUT:-30}" + shift 3 + + scp \ + -i "$key_file" \ + -o BatchMode=yes \ + -o StrictHostKeyChecking=yes \ + -o UserKnownHostsFile="$known_hosts_file" \ + -o ConnectTimeout="$connect_timeout" \ + "$@" \ + "${SSH_USER}@${host}:/tmp/" +} + +initialize_remote_identity() { + SSH_USER="${SSH_USER:-ubuntu}" + SMARTAGENT_USER="${SMARTAGENT_USER:-}" + SMARTAGENT_GROUP="${SMARTAGENT_GROUP:-}" + + validate_account_name SSH_USER "$SSH_USER" + validate_account_name SMARTAGENT_USER "$SMARTAGENT_USER" + validate_group_name SMARTAGENT_GROUP "$SMARTAGENT_GROUP" + + if [[ -n "$SMARTAGENT_USER" && -z "$SMARTAGENT_GROUP" ]]; then + echo "SMARTAGENT_GROUP is required when SMARTAGENT_USER is set" >&2 + exit 1 + fi + + if [[ -z "$SMARTAGENT_USER" && -n "$SMARTAGENT_GROUP" ]]; then + echo "SMARTAGENT_USER is required when SMARTAGENT_GROUP is set" >&2 + exit 1 + fi + + TARGET_OWNER="${SMARTAGENT_USER:-$SSH_USER}" + TARGET_GROUP="${SMARTAGENT_GROUP:-$TARGET_OWNER}" +} diff --git a/.github/scripts/run-remote-command-batch.sh b/.github/scripts/run-remote-command-batch.sh new file mode 100644 index 0000000..5011c21 --- /dev/null +++ b/.github/scripts/run-remote-command-batch.sh @@ -0,0 +1,64 @@ +#!/usr/bin/env bash +set -euo pipefail + +script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=.github/scripts/lib.sh +source "$script_dir/lib.sh" + +require_tool jq +require_tool ssh +require_tool ssh-keyscan +require_env REMOTE_COMMAND + +validate_batch_hosts_json +initialize_remote_identity + +operation_label="${OPERATION_LABEL:-Run remote command}" +batch_size="$(jq 'length' <<<"$BATCH_HOSTS")" + +tmp_dir="$(mktemp -d)" +key_file="$tmp_dir/id_rsa" +fail_file="$tmp_dir/failed_hosts" +touch "$fail_file" +trap 'rm -rf "$tmp_dir"' EXIT + +write_ssh_key "$key_file" + +echo "$operation_label on batch of $batch_size hosts" + +while IFS= read -r host; do + ( + known_hosts_file="$(mktemp "$tmp_dir/known_hosts.XXXXXX")" + output_file="$(mktemp "$tmp_dir/output.XXXXXX")" + echo "Starting $operation_label on $host" + + if ! scan_host_key "$host" "$known_hosts_file"; then + echo "$host" >> "$fail_file" + exit 0 + fi + + if run_remote_script "$host" "$key_file" "$known_hosts_file" "$REMOTE_COMMAND" > "$output_file" 2>&1; then + cat "$output_file" + if remote_output_reports_error "$output_file"; then + echo "$operation_label reported an error on $host" >&2 + echo "$host" >> "$fail_file" + exit 0 + fi + echo "Completed $operation_label on $host" + else + cat "$output_file" >&2 + echo "Failed $operation_label on $host" >&2 + echo "$host" >> "$fail_file" + fi + ) & +done < <(jq -r '.[]' <<<"$BATCH_HOSTS") + +wait || true + +if [[ -s "$fail_file" ]]; then + echo "Some hosts failed:" >&2 + sort -u "$fail_file" >&2 + exit 1 +fi + +echo "Batch complete" diff --git a/.github/scripts/test-client-inventory-api.sh b/.github/scripts/test-client-inventory-api.sh new file mode 100644 index 0000000..ca6cc8e --- /dev/null +++ b/.github/scripts/test-client-inventory-api.sh @@ -0,0 +1,78 @@ +#!/usr/bin/env bash +set -euo pipefail + +script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +repo_root="$(cd "$script_dir/../.." && pwd)" +script="$script_dir/check-client-inventory-api.sh" + +run_success_case() { + local name="$1" + shift + + if "$@" >/tmp/client_inventory_test.out 2>&1; then + echo "ok - $name" + else + echo "not ok - $name" >&2 + cat /tmp/client_inventory_test.out >&2 + exit 1 + fi +} + +run_failure_case() { + local name="$1" + shift + + if "$@" >/tmp/client_inventory_test.out 2>&1; then + echo "not ok - $name" >&2 + cat /tmp/client_inventory_test.out >&2 + exit 1 + fi + + echo "ok - $name" +} + +run_success_case \ + "derives base URL from config.ini" \ + env \ + API_CHECK_DRY_RUN=true \ + CLIENT_INVENTORY_API_TOKEN=test-token \ + bash "$script" +grep -F "https://fso-tme.saas.appdynamics.com/fm-service/v1" \ + /tmp/client_inventory_test.out >/dev/null + +run_success_case \ + "accepts explicit base URL override" \ + env \ + API_CHECK_DRY_RUN=true \ + CLIENT_INVENTORY_API_BASE_URL=https://example.test/fm-service/v1/ \ + CLIENT_INVENTORY_API_TOKEN=test-token \ + bash "$script" +grep -F "https://example.test/fm-service/v1" \ + /tmp/client_inventory_test.out >/dev/null + +run_success_case \ + "dry-run reports missing token without failing" \ + env \ + API_CHECK_DRY_RUN=true \ + bash "$script" +grep -F "Token: missing" /tmp/client_inventory_test.out >/dev/null + +run_success_case \ + "warn-only mode tolerates missing token" \ + env \ + API_CHECK_WARN_ONLY=true \ + bash "$script" +grep -F "warn-only mode is enabled" /tmp/client_inventory_test.out >/dev/null + +bad_openapi="$(mktemp)" +trap 'rm -f "$bad_openapi" /tmp/client_inventory_test.out' EXIT +jq 'del(.paths["/clients"].get.operationId)' \ + "$repo_root/openapi.json" > "$bad_openapi" + +run_failure_case \ + "rejects missing OpenAPI operation" \ + env \ + API_CHECK_DRY_RUN=true \ + OPENAPI_FILE="$bad_openapi" \ + CLIENT_INVENTORY_API_TOKEN=test-token \ + bash "$script" diff --git a/.github/scripts/test-create-batches.sh b/.github/scripts/test-create-batches.sh new file mode 100644 index 0000000..3355b62 --- /dev/null +++ b/.github/scripts/test-create-batches.sh @@ -0,0 +1,65 @@ +#!/usr/bin/env bash +set -euo pipefail + +script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +run_success_case() { + local name="$1" + local hosts="$2" + local batch_size="$3" + local expected_total="$4" + local expected_batches="$5" + local output_file + + output_file="$(mktemp)" + DEPLOYMENT_HOSTS="$hosts" \ + BATCH_SIZE="$batch_size" \ + GITHUB_OUTPUT="$output_file" \ + bash "$script_dir/create-batches.sh" >/dev/null + + grep -Fx "total_hosts=$expected_total" "$output_file" >/dev/null + grep -Fx "total_batches=$expected_batches" "$output_file" >/dev/null + rm -f "$output_file" + echo "ok - $name" +} + +run_failure_case() { + local name="$1" + local hosts="$2" + local batch_size="$3" + local output_file + + output_file="$(mktemp)" + if DEPLOYMENT_HOSTS="$hosts" \ + BATCH_SIZE="$batch_size" \ + GITHUB_OUTPUT="$output_file" \ + bash "$script_dir/create-batches.sh" >/dev/null 2>&1; then + echo "not ok - $name" >&2 + rm -f "$output_file" + exit 1 + fi + + rm -f "$output_file" + echo "ok - $name" +} + +run_success_case \ + "trims whitespace and batches hosts" \ + $' 172.31.1.5 \n\nhost-2.example.com\nhost3' \ + "2" \ + "3" \ + "2" + +run_success_case \ + "accepts maximum batch size" \ + $'h1\nh2\nh3\nh4\nh5' \ + "256" \ + "5" \ + "1" + +run_failure_case "rejects empty host list" "" "256" +run_failure_case "rejects invalid host" $'good\nbad host' "256" +run_failure_case "rejects duplicate host" $'host1\nhost1' "256" +run_failure_case "rejects zero batch size" "host1" "0" +run_failure_case "rejects nonnumeric batch size" "host1" "abc" +run_failure_case "rejects oversized batch size" "host1" "257" diff --git a/.github/scripts/test-lib.sh b/.github/scripts/test-lib.sh new file mode 100644 index 0000000..47c56b0 --- /dev/null +++ b/.github/scripts/test-lib.sh @@ -0,0 +1,53 @@ +#!/usr/bin/env bash +set -euo pipefail + +script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=.github/scripts/lib.sh +source "$script_dir/lib.sh" + +tmp_dir="$(mktemp -d)" +trap 'rm -rf "$tmp_dir"' EXIT + +write_output() { + local name="$1" + local content="$2" + local output_file="$tmp_dir/$name" + + printf '%s\n' "$content" > "$output_file" + printf '%s\n' "$output_file" +} + +run_reports_error_case() { + local name="$1" + local content="$2" + local output_file + + output_file="$(write_output "$name" "$content")" + if ! remote_output_reports_error "$output_file"; then + echo "not ok - $name" >&2 + exit 1 + fi + + echo "ok - $name" +} + +run_no_error_case() { + local name="$1" + local content="$2" + local output_file + + output_file="$(write_output "$name" "$content")" + if remote_output_reports_error "$output_file"; then + echo "not ok - $name" >&2 + exit 1 + fi + + echo "ok - $name" +} + +run_reports_error_case "detects structured smartagent error" '{"error": true}' +run_reports_error_case \ + "detects smartagent error inside mixed output" \ + $'starting\n{\n "error": true,\n "logs": "failed"\n}' +run_no_error_case "accepts structured smartagent success" '{"error": false}' +run_no_error_case "ignores unrelated text" "error: true but not JSON" diff --git a/.github/workflows/check-client-inventory-api.yml b/.github/workflows/check-client-inventory-api.yml new file mode 100644 index 0000000..e6e6982 --- /dev/null +++ b/.github/workflows/check-client-inventory-api.yml @@ -0,0 +1,45 @@ +--- +name: 12. Check Client Inventory API + +"on": + workflow_dispatch: + inputs: + base_url: + description: Optional API base URL override + required: false + type: string + sample_size: + description: Number of clients to sample for ID-based checks + required: false + default: 1 + type: number + dry_run: + description: Validate inputs and OpenAPI contract without live requests + required: false + default: false + type: boolean + warn_only: + description: Log API check failures as warnings instead of failing + required: false + default: false + type: boolean + +permissions: + contents: read + +jobs: + check_client_inventory_api: + runs-on: self-hosted + steps: + - name: Checkout repository + uses: actions/checkout@v6 + + - name: Check Client Inventory API + env: + API_CHECK_DRY_RUN: ${{ inputs.dry_run || false }} + API_CHECK_WARN_ONLY: ${{ inputs.warn_only || false }} + CLIENT_INVENTORY_API_BASE_URL: ${{ inputs.base_url }} + CLIENT_INVENTORY_API_TOKEN: >- + ${{ secrets.CLIENT_INVENTORY_API_TOKEN }} + CLIENT_INVENTORY_SAMPLE_SIZE: ${{ inputs.sample_size || 1 }} + run: bash .github/scripts/check-client-inventory-api.sh diff --git a/.github/workflows/cleanup-appdynamics.yml b/.github/workflows/cleanup-appdynamics.yml index b690c87..61d9421 100644 --- a/.github/workflows/cleanup-appdynamics.yml +++ b/.github/workflows/cleanup-appdynamics.yml @@ -1,14 +1,21 @@ -name: 11. Cleanup All Agents +--- +name: 11. Cleanup Smart Agent Directory -# Cleanup workflow -on: +"on": workflow_dispatch: inputs: batch_size: - description: 'Number of hosts per batch' + description: Number of hosts per batch required: false - default: '256' - type: string + default: 256 + type: number + +permissions: + contents: read + +concurrency: + group: smartagent-lifecycle-${{ github.ref }} + cancel-in-progress: false jobs: prepare: @@ -18,83 +25,86 @@ jobs: total_hosts: ${{ steps.create-batches.outputs.total_hosts }} total_batches: ${{ steps.create-batches.outputs.total_batches }} steps: + - name: Checkout repository + uses: actions/checkout@v6 + - id: create-batches - run: | - HOSTS=$(echo "${{ vars.DEPLOYMENT_HOSTS }}" | tr -d '\r' | grep -v '^$') - TOTAL=$(echo "$HOSTS" | wc -l | xargs) - echo "total_hosts=$TOTAL" >> $GITHUB_OUTPUT - BATCH_SIZE=${{ github.event.inputs.batch_size || '256' }} - TOTAL_BATCHES=$(( (TOTAL + BATCH_SIZE - 1) / BATCH_SIZE )) - echo "total_batches=$TOTAL_BATCHES" >> $GITHUB_OUTPUT - BATCHES=$(echo "$HOSTS" | awk -v batch_size=$BATCH_SIZE ' - BEGIN { batch=0; print "[" } - { - if (NR % batch_size == 1) { - if (NR > 1) { print "]" } - if (NR > 1) { print "," } - print "[" - printf "\"%s\"", $0 - } else { - printf ",\"%s\"", $0 - } - } - END { if (NR > 0) { print "]" }; print "]" } - ' | jq -c .) - echo "batches=$BATCHES" >> $GITHUB_OUTPUT - echo "📊 Cleaning up /opt/appdynamics from $TOTAL hosts across $TOTAL_BATCHES batches" + name: Create host batches + env: + DEPLOYMENT_HOSTS: ${{ vars.DEPLOYMENT_HOSTS }} + BATCH_SIZE: ${{ inputs.batch_size || 256 }} + run: bash .github/scripts/create-batches.sh cleanup-batch: needs: prepare runs-on: self-hosted + if: >- + needs.prepare.result == 'success' && + needs.prepare.outputs.total_batches != '0' strategy: max-parallel: 1 matrix: batch: ${{ fromJson(needs.prepare.outputs.batches) }} steps: - - name: Delete /opt/appdynamics from batch hosts - run: | - BATCH_HOSTS='${{ toJson(matrix.batch) }}' - BATCH_SIZE=$(echo "$BATCH_HOSTS" | jq '. | length') - SSH_USER="${{ vars.SSH_USER || 'ubuntu' }}" - echo "🚀 Deleting /opt/appdynamics from batch of $BATCH_SIZE hosts" - - FAILED_HOSTS=() - echo "$BATCH_HOSTS" | jq -r '.[]' | while read HOST; do - ( - echo "📡 Cleaning up /opt/appdynamics on $HOST" - mkdir -p ~/.ssh - echo "${{ secrets.SSH_PRIVATE_KEY }}" > ~/.ssh/id_rsa_$HOST - chmod 600 ~/.ssh/id_rsa_$HOST - - if ssh -i ~/.ssh/id_rsa_$HOST -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ConnectTimeout=30 "${SSH_USER}@${HOST}" 'if [ -d "/opt/appdynamics/appdsmartagent" ]; then sudo rm -rf /opt/appdynamics/appdsmartagent/* /opt/appdynamics/appdsmartagent/.[!.]* && sudo chown -R ubuntu:ubuntu /opt/appdynamics/appdsmartagent/ && echo "✓ Cleared contents of /opt/appdynamics/appdsmartagent and fixed permissions"; else echo "Directory /opt/appdynamics/appdsmartagent does not exist"; fi'; then - echo "✅ Completed cleanup on $HOST" - else - echo "❌ Failed to clean up $HOST" >&2 - echo "$HOST" >> /tmp/failed_hosts_$$ - fi - - rm -f ~/.ssh/id_rsa_$HOST - ) & - done - wait - - if [ -f /tmp/failed_hosts_$$ ]; then - echo "❌ Some hosts failed:" - cat /tmp/failed_hosts_$$ - rm -f /tmp/failed_hosts_$$ - exit 1 - fi - echo "✅ Batch cleanup complete" + - name: Checkout repository + uses: actions/checkout@v6 + + - name: Clear Smart Agent directory on batch hosts + env: + BATCH_HOSTS: ${{ toJson(matrix.batch) }} + OPERATION_LABEL: Cleanup Smart Agent Directory + REMOTE_COMMAND: | + if [[ -d /opt/appdynamics/appdsmartagent ]]; then + sudo find /opt/appdynamics/appdsmartagent \ + -mindepth 1 \ + -maxdepth 1 \ + -exec rm -rf -- {} + + sudo chown -R "$TARGET_OWNER:$TARGET_GROUP" \ + /opt/appdynamics/appdsmartagent + echo "Cleared /opt/appdynamics/appdsmartagent" + else + echo "/opt/appdynamics/appdsmartagent does not exist" + fi + SMARTAGENT_GROUP: ${{ vars.SMARTAGENT_GROUP }} + SMARTAGENT_USER: ${{ vars.SMARTAGENT_USER }} + SSH_PRIVATE_KEY: ${{ secrets.SSH_PRIVATE_KEY }} + SSH_USER: ${{ vars.SSH_USER || 'ubuntu' }} + run: bash .github/scripts/run-remote-command-batch.sh + + check_client_inventory_api: + needs: + - prepare + - cleanup-batch + runs-on: self-hosted + if: always() + steps: + - name: Checkout repository + uses: actions/checkout@v6 + + - name: Check Client Inventory API + env: + API_CHECK_WARN_ONLY: "true" + CLIENT_INVENTORY_API_BASE_URL: >- + ${{ vars.CLIENT_INVENTORY_API_BASE_URL }} + CLIENT_INVENTORY_API_TOKEN: >- + ${{ secrets.CLIENT_INVENTORY_API_TOKEN }} + CLIENT_INVENTORY_SAMPLE_SIZE: >- + ${{ vars.CLIENT_INVENTORY_SAMPLE_SIZE || 1 }} + run: bash .github/scripts/check-client-inventory-api.sh summary: - needs: [prepare, cleanup-batch] + needs: + - prepare + - cleanup-batch + - check_client_inventory_api runs-on: self-hosted if: always() steps: - - name: Cleanup Summary + - name: Cleanup summary run: | - echo "📊 AppDynamics Cleanup Summary" - echo "====================================" + echo "Smart Agent Directory Cleanup Summary" + echo "=====================================" echo "Total hosts: ${{ needs.prepare.outputs.total_hosts }}" echo "Total batches: ${{ needs.prepare.outputs.total_batches }}" echo "Status: ${{ needs.cleanup-batch.result }}" + echo "API: ${{ needs.check_client_inventory_api.result }}" diff --git a/.github/workflows/deploy-agent-batched.yml b/.github/workflows/deploy-agent-batched.yml index 3717d94..3782554 100644 --- a/.github/workflows/deploy-agent-batched.yml +++ b/.github/workflows/deploy-agent-batched.yml @@ -1,13 +1,21 @@ +--- name: 1. Deploy Smart Agent -on: +"on": workflow_dispatch: inputs: batch_size: - description: 'Number of hosts per batch' + description: Number of hosts per batch required: false - default: '256' - type: string + default: 256 + type: number + +permissions: + contents: read + +concurrency: + group: smartagent-lifecycle-${{ github.ref }} + cancel-in-progress: false jobs: prepare: @@ -17,189 +25,74 @@ jobs: total_hosts: ${{ steps.create-batches.outputs.total_hosts }} total_batches: ${{ steps.create-batches.outputs.total_batches }} steps: - - name: Ensure jq is installed - run: | - if ! command -v jq >/dev/null 2>&1; then - sudo apt-get update -qq && sudo apt-get install -y jq - fi + - name: Checkout repository + uses: actions/checkout@v6 - id: create-batches - shell: bash - run: | - set -euo pipefail - - # Read hosts (one per line) from Actions var, normalize, drop empties - HOSTS=$(printf '%s\n' "${{ vars.DEPLOYMENT_HOSTS }}" | tr -d '\r' | grep -v '^\s*$' || true) - - # Count total hosts - TOTAL=$(printf '%s\n' "$HOSTS" | wc -l | xargs || true) - echo "total_hosts=$TOTAL" >> "$GITHUB_OUTPUT" - - if [ "${TOTAL:-0}" -eq 0 ]; then - echo "total_batches=0" >> "$GITHUB_OUTPUT" - echo 'batches=[]' >> "$GITHUB_OUTPUT" - echo "â„šī¸ No hosts provided." - exit 0 - fi - - # Batch size (string -> int) - BATCH_SIZE=${{ github.event.inputs.batch_size || '256' }} - - # Calculate number of batches - TOTAL_BATCHES=$(( (TOTAL + BATCH_SIZE - 1) / BATCH_SIZE )) - echo "total_batches=$TOTAL_BATCHES" >> "$GITHUB_OUTPUT" - - # Build JSON array of arrays: [ ["h1","h2",...], ["hX",...], ... ] - BATCHES=$(printf '%s\n' "$HOSTS" \ - | awk -v batch_size="$BATCH_SIZE" ' - BEGIN { outer_open=0 } - { - if ((NR-1) % batch_size == 0) { - if (outer_open==0) { printf "[["; outer_open=1 } else { printf "],[" } - printf "%s\"%s\"", (1 ? "" : ","), $0 - next - } - printf ",\"%s\"", $0 - } - END { if (outer_open==1) printf "]]"; else printf "[]" } - ' \ - | jq -c .) - - echo "batches=$BATCHES" >> "$GITHUB_OUTPUT" - echo "📊 Deploying to $TOTAL hosts across $TOTAL_BATCHES batches" + name: Create host batches + env: + DEPLOYMENT_HOSTS: ${{ vars.DEPLOYMENT_HOSTS }} + BATCH_SIZE: ${{ inputs.batch_size || 256 }} + run: bash .github/scripts/create-batches.sh deploy-batch: needs: prepare runs-on: self-hosted - if: needs.prepare.outputs.total_batches != '0' + if: >- + needs.prepare.result == 'success' && + needs.prepare.outputs.total_batches != '0' strategy: max-parallel: 1 matrix: batch: ${{ fromJson(needs.prepare.outputs.batches) }} - steps: - - name: Ensure jq is installed - run: | - if ! command -v jq >/dev/null 2>&1; then - sudo apt-get update -qq && sudo apt-get install -y jq - fi - - name: Checkout repository - uses: actions/checkout@v4 - - - name: Substitute access key in config - run: | - sed -i 's/{{ACCOUNT_ACCESS_KEY}}/${{ vars.ACCOUNT_ACCESS_KEY }}/g' config.ini - - - name: Deploy to batch hosts - shell: bash - run: | - set -euo pipefail - - BATCH_HOSTS='${{ toJson(matrix.batch) }}' - BATCH_SIZE=$(jq 'length' <<<"$BATCH_HOSTS") - SSH_USER="${{ vars.SSH_USER || 'ubuntu' }}" - echo "🚀 Deploying to batch of $BATCH_SIZE hosts" - - # Shared failure file path (works across subshells) - FAIL_FILE="/tmp/failed_hosts_${GITHUB_RUN_ID:-$$}" - : > "$FAIL_FILE" - - # One key file per run (reuse across hosts) - KEY_FILE="$HOME/.ssh/id_rsa_ga_${GITHUB_RUN_ID:-$RANDOM}" - mkdir -p "$HOME/.ssh" - printf '%s\n' "${{ secrets.SSH_PRIVATE_KEY }}" > "$KEY_FILE" - chmod 600 "$KEY_FILE" - - # Find the agent zip file - AGENT_ZIP=$(ls appdsmartagent_64_linux_*.zip 2>/dev/null | head -n1) - if [ -z "$AGENT_ZIP" ]; then - echo "❌ No AppDynamics agent zip file found" - exit 1 - fi - echo "đŸ“Ļ Using agent file: $AGENT_ZIP" - AGENT_ZIP_BASENAME=$(basename "$AGENT_ZIP") - - # Remote script (heredoc kept literal) - REMOTE_SCRIPT=$(cat << 'EOF' - set -e - sudo apt-get update -qq && sudo apt-get install -y unzip - sudo rm -rf /opt/appdynamics/appdsmartagent - sudo mkdir -p /opt/appdynamics/appdsmartagent - sudo chown -R ubuntu:ubuntu /opt/appdynamics/appdsmartagent/ - - sudo unzip -o /tmp/AGENT_ZIP_PLACEHOLDER -d /opt/appdynamics/appdsmartagent - sudo cp /tmp/config.ini /opt/appdynamics/appdsmartagent/config.ini - - cd /opt/appdynamics/appdsmartagent - if [ -n "${SMARTAGENT_USER:-}" ] && [ -n "${SMARTAGENT_GROUP:-}" ]; then - echo "Starting agent with user/group: $SMARTAGENT_USER/$SMARTAGENT_GROUP" - sudo ./smartagentctl start --enable-auto-attach --service --user "$SMARTAGENT_USER" --group "$SMARTAGENT_GROUP" - else - echo "Starting agent with default user/group" - sudo ./smartagentctl start --enable-auto-attach --service - fi - echo "Remote script completed successfully." - EOF - ) - - # Replace placeholder with actual filename - REMOTE_SCRIPT="${REMOTE_SCRIPT//AGENT_ZIP_PLACEHOLDER/$AGENT_ZIP_BASENAME}" - - SMARTAGENT_USER="${{ vars.SMARTAGENT_USER }}" - SMARTAGENT_GROUP="${{ vars.SMARTAGENT_GROUP }}" - - # Iterate hosts (parallelize per host) - while read -r HOST; do - ( - echo "📡 Starting deployment to $HOST" - - # Copy artifacts (single destination) - if ! scp -i "$KEY_FILE" \ - -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ConnectTimeout=30 \ - "$AGENT_ZIP" config.ini \ - "${SSH_USER}@${HOST}:/tmp/"; then - echo "❌ Failed to copy files to $HOST" >&2 - echo "$HOST" >> "$FAIL_FILE" - exit 1 - fi - - # Run remote script - if echo "$REMOTE_SCRIPT" | ssh -i "$KEY_FILE" \ - -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ConnectTimeout=30 \ - "${SSH_USER}@${HOST}" \ - "SMARTAGENT_USER='${SMARTAGENT_USER}' SMARTAGENT_GROUP='${SMARTAGENT_GROUP}' bash -s"; then - echo "✅ Completed deployment to $HOST" - else - echo "❌ Failed to deploy to $HOST" >&2 - echo "$HOST" >> "$FAIL_FILE" - fi - ) & - done < <(jq -r '.[]' <<<"$BATCH_HOSTS") - - # Wait for all backgrounded per-host jobs - wait - - # Evaluate failures - if [ -s "$FAIL_FILE" ]; then - echo "❌ Some hosts failed:" - cat "$FAIL_FILE" - rm -f "$FAIL_FILE" "$KEY_FILE" - exit 1 - fi - - rm -f "$FAIL_FILE" "$KEY_FILE" - echo "✅ Batch deployment complete" + uses: actions/checkout@v6 + + - name: Deploy Smart Agent to batch hosts + env: + APPD_ACCOUNT_ACCESS_KEY: ${{ secrets.APPD_ACCOUNT_ACCESS_KEY }} + BATCH_HOSTS: ${{ toJson(matrix.batch) }} + SMARTAGENT_GROUP: ${{ vars.SMARTAGENT_GROUP }} + SMARTAGENT_USER: ${{ vars.SMARTAGENT_USER }} + SSH_PRIVATE_KEY: ${{ secrets.SSH_PRIVATE_KEY }} + SSH_USER: ${{ vars.SSH_USER || 'ubuntu' }} + run: bash .github/scripts/deploy-smartagent-batch.sh + + check_client_inventory_api: + needs: + - prepare + - deploy-batch + runs-on: self-hosted + if: always() + steps: + - name: Checkout repository + uses: actions/checkout@v6 + + - name: Check Client Inventory API + env: + API_CHECK_WARN_ONLY: "true" + CLIENT_INVENTORY_API_BASE_URL: >- + ${{ vars.CLIENT_INVENTORY_API_BASE_URL }} + CLIENT_INVENTORY_API_TOKEN: >- + ${{ secrets.CLIENT_INVENTORY_API_TOKEN }} + CLIENT_INVENTORY_SAMPLE_SIZE: >- + ${{ vars.CLIENT_INVENTORY_SAMPLE_SIZE || 1 }} + run: bash .github/scripts/check-client-inventory-api.sh summary: - needs: [prepare, deploy-batch] + needs: + - prepare + - deploy-batch + - check_client_inventory_api runs-on: self-hosted if: always() steps: - - name: Deployment Summary + - name: Deployment summary run: | - echo "📊 Deployment Summary" - echo "====================" + echo "Deployment Summary" + echo "==================" echo "Total hosts: ${{ needs.prepare.outputs.total_hosts }}" echo "Total batches: ${{ needs.prepare.outputs.total_batches }}" echo "Status: ${{ needs.deploy-batch.result }}" + echo "API: ${{ needs.check_client_inventory_api.result }}" diff --git a/.github/workflows/install-db-batched.yml b/.github/workflows/install-db-batched.yml index a5ff00e..aa27169 100644 --- a/.github/workflows/install-db-batched.yml +++ b/.github/workflows/install-db-batched.yml @@ -1,13 +1,21 @@ +--- name: 5. Install Database Agent -on: +"on": workflow_dispatch: inputs: batch_size: - description: 'Number of hosts per batch' + description: Number of hosts per batch required: false - default: '256' - type: string + default: 256 + type: number + +permissions: + contents: read + +concurrency: + group: smartagent-lifecycle-${{ github.ref }} + cancel-in-progress: false jobs: prepare: @@ -17,84 +25,77 @@ jobs: total_hosts: ${{ steps.create-batches.outputs.total_hosts }} total_batches: ${{ steps.create-batches.outputs.total_batches }} steps: + - name: Checkout repository + uses: actions/checkout@v6 + - id: create-batches - run: | - HOSTS=$(echo "${{ vars.DEPLOYMENT_HOSTS }}" | tr -d '\r' | grep -v '^$') - TOTAL=$(echo "$HOSTS" | wc -l | xargs) - echo "total_hosts=$TOTAL" >> $GITHUB_OUTPUT - BATCH_SIZE=${{ github.event.inputs.batch_size || '256' }} - TOTAL_BATCHES=$(( (TOTAL + BATCH_SIZE - 1) / BATCH_SIZE )) - echo "total_batches=$TOTAL_BATCHES" >> $GITHUB_OUTPUT - BATCHES=$(echo "$HOSTS" | awk -v batch_size=$BATCH_SIZE ' - BEGIN { batch=0; print "[" } - { - if (NR % batch_size == 1) { - if (NR > 1) { print "]" } - if (NR > 1) { print "," } - print "[" - printf "\"%s\"", $0 - } else { - printf ",\"%s\"", $0 - } - } - END { if (NR > 0) { print "]" }; print "]" } - ' | jq -c .) - echo "batches=$BATCHES" >> $GITHUB_OUTPUT - echo "📊 Installing DB agent on $TOTAL hosts across $TOTAL_BATCHES batches" + name: Create host batches + env: + DEPLOYMENT_HOSTS: ${{ vars.DEPLOYMENT_HOSTS }} + BATCH_SIZE: ${{ inputs.batch_size || 256 }} + run: bash .github/scripts/create-batches.sh install-batch: needs: prepare runs-on: self-hosted + if: >- + needs.prepare.result == 'success' && + needs.prepare.outputs.total_batches != '0' strategy: max-parallel: 1 matrix: batch: ${{ fromJson(needs.prepare.outputs.batches) }} steps: - - name: Install db agent on batch hosts - run: | - BATCH_HOSTS='${{ toJson(matrix.batch) }}' - BATCH_SIZE=$(echo "$BATCH_HOSTS" | jq '. | length') - SSH_USER="${{ vars.SSH_USER || 'ubuntu' }}" - echo "🚀 Installing DB agent on batch of $BATCH_SIZE hosts" - - echo "$BATCH_HOSTS" | jq -r '.[]' | while read HOST; do - ( - echo "📡 Installing DB agent on $HOST" - mkdir -p ~/.ssh - echo "${{ secrets.SSH_PRIVATE_KEY }}" > ~/.ssh/id_rsa_$HOST - chmod 600 ~/.ssh/id_rsa_$HOST - - ssh -i ~/.ssh/id_rsa_$HOST -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ConnectTimeout=30 "${SSH_USER}@${HOST}" 'cd /opt/appdynamics/appdsmartagent && sudo ./smartagentctl install db' - if [ $? -eq 0 ] - then - echo "✅ Completed DB agent installation on $HOST" - else - echo "❌ Failed to install DB agent on $HOST" >&2 - echo "$HOST" >> /tmp/failed_hosts_$$ - fi - - rm -f ~/.ssh/id_rsa_$HOST - ) & - done - wait - - if [ -f /tmp/failed_hosts_$$ ]; then - echo "❌ Some hosts failed:" - cat /tmp/failed_hosts_$$ - rm -f /tmp/failed_hosts_$$ - exit 1 - fi - echo "✅ Batch installation complete" + - name: Checkout repository + uses: actions/checkout@v6 + + - name: Install Database Agent on batch hosts + env: + BATCH_HOSTS: ${{ toJson(matrix.batch) }} + OPERATION_LABEL: Install Database Agent + REMOTE_COMMAND: | + cd /opt/appdynamics/appdsmartagent + sudo ./smartagentctl install db + SMARTAGENT_GROUP: ${{ vars.SMARTAGENT_GROUP }} + SMARTAGENT_USER: ${{ vars.SMARTAGENT_USER }} + SSH_PRIVATE_KEY: ${{ secrets.SSH_PRIVATE_KEY }} + SSH_USER: ${{ vars.SSH_USER || 'ubuntu' }} + run: bash .github/scripts/run-remote-command-batch.sh + + check_client_inventory_api: + needs: + - prepare + - install-batch + runs-on: self-hosted + if: always() + steps: + - name: Checkout repository + uses: actions/checkout@v6 + + - name: Check Client Inventory API + env: + API_CHECK_WARN_ONLY: "true" + CLIENT_INVENTORY_API_BASE_URL: >- + ${{ vars.CLIENT_INVENTORY_API_BASE_URL }} + CLIENT_INVENTORY_API_TOKEN: >- + ${{ secrets.CLIENT_INVENTORY_API_TOKEN }} + CLIENT_INVENTORY_SAMPLE_SIZE: >- + ${{ vars.CLIENT_INVENTORY_SAMPLE_SIZE || 1 }} + run: bash .github/scripts/check-client-inventory-api.sh summary: - needs: [prepare, install-batch] + needs: + - prepare + - install-batch + - check_client_inventory_api runs-on: self-hosted if: always() steps: - - name: Installation Summary + - name: Installation summary run: | - echo "📊 DB Agent Installation Summary" - echo "================================" + echo "Database Agent Installation Summary" + echo "===================================" echo "Total hosts: ${{ needs.prepare.outputs.total_hosts }}" echo "Total batches: ${{ needs.prepare.outputs.total_batches }}" echo "Status: ${{ needs.install-batch.result }}" + echo "API: ${{ needs.check_client_inventory_api.result }}" diff --git a/.github/workflows/install-java-batched.yml b/.github/workflows/install-java-batched.yml index 6ad256e..725444c 100644 --- a/.github/workflows/install-java-batched.yml +++ b/.github/workflows/install-java-batched.yml @@ -1,13 +1,21 @@ +--- name: 3. Install Java Agent -on: +"on": workflow_dispatch: inputs: batch_size: - description: 'Number of hosts per batch' + description: Number of hosts per batch required: false - default: '256' - type: string + default: 256 + type: number + +permissions: + contents: read + +concurrency: + group: smartagent-lifecycle-${{ github.ref }} + cancel-in-progress: false jobs: prepare: @@ -17,84 +25,77 @@ jobs: total_hosts: ${{ steps.create-batches.outputs.total_hosts }} total_batches: ${{ steps.create-batches.outputs.total_batches }} steps: + - name: Checkout repository + uses: actions/checkout@v6 + - id: create-batches - run: | - HOSTS=$(echo "${{ vars.DEPLOYMENT_HOSTS }}" | tr -d '\r' | grep -v '^$') - TOTAL=$(echo "$HOSTS" | wc -l | xargs) - echo "total_hosts=$TOTAL" >> $GITHUB_OUTPUT - BATCH_SIZE=${{ github.event.inputs.batch_size || '256' }} - TOTAL_BATCHES=$(( (TOTAL + BATCH_SIZE - 1) / BATCH_SIZE )) - echo "total_batches=$TOTAL_BATCHES" >> $GITHUB_OUTPUT - BATCHES=$(echo "$HOSTS" | awk -v batch_size=$BATCH_SIZE ' - BEGIN { batch=0; print "[" } - { - if (NR % batch_size == 1) { - if (NR > 1) { print "]" } - if (NR > 1) { print "," } - print "[" - printf "\"%s\"", $0 - } else { - printf ",\"%s\"", $0 - } - } - END { if (NR > 0) { print "]" }; print "]" } - ' | jq -c .) - echo "batches=$BATCHES" >> $GITHUB_OUTPUT - echo "📊 Installing Java agent on $TOTAL hosts across $TOTAL_BATCHES batches" + name: Create host batches + env: + DEPLOYMENT_HOSTS: ${{ vars.DEPLOYMENT_HOSTS }} + BATCH_SIZE: ${{ inputs.batch_size || 256 }} + run: bash .github/scripts/create-batches.sh install-batch: needs: prepare runs-on: self-hosted + if: >- + needs.prepare.result == 'success' && + needs.prepare.outputs.total_batches != '0' strategy: max-parallel: 1 matrix: batch: ${{ fromJson(needs.prepare.outputs.batches) }} steps: - - name: Install java agent on batch hosts - run: | - BATCH_HOSTS='${{ toJson(matrix.batch) }}' - BATCH_SIZE=$(echo "$BATCH_HOSTS" | jq '. | length') - SSH_USER="${{ vars.SSH_USER || 'ubuntu' }}" - echo "🚀 Installing Java agent on batch of $BATCH_SIZE hosts" - - echo "$BATCH_HOSTS" | jq -r '.[]' | while read HOST; do - ( - echo "📡 Installing Java agent on $HOST" - mkdir -p ~/.ssh - echo "${{ secrets.SSH_PRIVATE_KEY }}" > ~/.ssh/id_rsa_$HOST - chmod 600 ~/.ssh/id_rsa_$HOST - - ssh -i ~/.ssh/id_rsa_$HOST -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ConnectTimeout=30 "${SSH_USER}@${HOST}" 'cd /opt/appdynamics/appdsmartagent && sudo ./smartagentctl install java' - if [ $? -eq 0 ] - then - echo "✅ Completed Java agent installation on $HOST" - else - echo "❌ Failed to install Java agent on $HOST" >&2 - echo "$HOST" >> /tmp/failed_hosts_$$ - fi - - rm -f ~/.ssh/id_rsa_$HOST - ) & - done - wait - - if [ -f /tmp/failed_hosts_$$ ]; then - echo "❌ Some hosts failed:" - cat /tmp/failed_hosts_$$ - rm -f /tmp/failed_hosts_$$ - exit 1 - fi - echo "✅ Batch installation complete" + - name: Checkout repository + uses: actions/checkout@v6 + + - name: Install Java Agent on batch hosts + env: + BATCH_HOSTS: ${{ toJson(matrix.batch) }} + OPERATION_LABEL: Install Java Agent + REMOTE_COMMAND: | + cd /opt/appdynamics/appdsmartagent + sudo ./smartagentctl install java + SMARTAGENT_GROUP: ${{ vars.SMARTAGENT_GROUP }} + SMARTAGENT_USER: ${{ vars.SMARTAGENT_USER }} + SSH_PRIVATE_KEY: ${{ secrets.SSH_PRIVATE_KEY }} + SSH_USER: ${{ vars.SSH_USER || 'ubuntu' }} + run: bash .github/scripts/run-remote-command-batch.sh + + check_client_inventory_api: + needs: + - prepare + - install-batch + runs-on: self-hosted + if: always() + steps: + - name: Checkout repository + uses: actions/checkout@v6 + + - name: Check Client Inventory API + env: + API_CHECK_WARN_ONLY: "true" + CLIENT_INVENTORY_API_BASE_URL: >- + ${{ vars.CLIENT_INVENTORY_API_BASE_URL }} + CLIENT_INVENTORY_API_TOKEN: >- + ${{ secrets.CLIENT_INVENTORY_API_TOKEN }} + CLIENT_INVENTORY_SAMPLE_SIZE: >- + ${{ vars.CLIENT_INVENTORY_SAMPLE_SIZE || 1 }} + run: bash .github/scripts/check-client-inventory-api.sh summary: - needs: [prepare, install-batch] + needs: + - prepare + - install-batch + - check_client_inventory_api runs-on: self-hosted if: always() steps: - - name: Installation Summary + - name: Installation summary run: | - echo "📊 Java Agent Installation Summary" - echo "==================================" + echo "Java Agent Installation Summary" + echo "===============================" echo "Total hosts: ${{ needs.prepare.outputs.total_hosts }}" echo "Total batches: ${{ needs.prepare.outputs.total_batches }}" echo "Status: ${{ needs.install-batch.result }}" + echo "API: ${{ needs.check_client_inventory_api.result }}" diff --git a/.github/workflows/install-machine-batched.yml b/.github/workflows/install-machine-batched.yml index bbaa4ec..95a1121 100644 --- a/.github/workflows/install-machine-batched.yml +++ b/.github/workflows/install-machine-batched.yml @@ -1,13 +1,21 @@ +--- name: 2. Install Machine Agent -on: +"on": workflow_dispatch: inputs: batch_size: - description: 'Number of hosts per batch' + description: Number of hosts per batch required: false - default: '256' - type: string + default: 256 + type: number + +permissions: + contents: read + +concurrency: + group: smartagent-lifecycle-${{ github.ref }} + cancel-in-progress: false jobs: prepare: @@ -17,84 +25,77 @@ jobs: total_hosts: ${{ steps.create-batches.outputs.total_hosts }} total_batches: ${{ steps.create-batches.outputs.total_batches }} steps: + - name: Checkout repository + uses: actions/checkout@v6 + - id: create-batches - run: | - HOSTS=$(echo "${{ vars.DEPLOYMENT_HOSTS }}" | tr -d '\r' | grep -v '^$') - TOTAL=$(echo "$HOSTS" | wc -l | xargs) - echo "total_hosts=$TOTAL" >> $GITHUB_OUTPUT - BATCH_SIZE=${{ github.event.inputs.batch_size || '256' }} - TOTAL_BATCHES=$(( (TOTAL + BATCH_SIZE - 1) / BATCH_SIZE )) - echo "total_batches=$TOTAL_BATCHES" >> $GITHUB_OUTPUT - BATCHES=$(echo "$HOSTS" | awk -v batch_size=$BATCH_SIZE ' - BEGIN { batch=0; print "[" } - { - if (NR % batch_size == 1) { - if (NR > 1) { print "]" } - if (NR > 1) { print "," } - print "[" - printf "\"%s\"", $0 - } else { - printf ",\"%s\"", $0 - } - } - END { if (NR > 0) { print "]" }; print "]" } - ' | jq -c .) - echo "batches=$BATCHES" >> $GITHUB_OUTPUT - echo "📊 Installing Machine agent on $TOTAL hosts across $TOTAL_BATCHES batches" + name: Create host batches + env: + DEPLOYMENT_HOSTS: ${{ vars.DEPLOYMENT_HOSTS }} + BATCH_SIZE: ${{ inputs.batch_size || 256 }} + run: bash .github/scripts/create-batches.sh install-batch: needs: prepare runs-on: self-hosted + if: >- + needs.prepare.result == 'success' && + needs.prepare.outputs.total_batches != '0' strategy: max-parallel: 1 matrix: batch: ${{ fromJson(needs.prepare.outputs.batches) }} steps: - - name: Install machine agent on batch hosts - run: | - BATCH_HOSTS='${{ toJson(matrix.batch) }}' - BATCH_SIZE=$(echo "$BATCH_HOSTS" | jq '. | length') - SSH_USER="${{ vars.SSH_USER || 'ubuntu' }}" - echo "🚀 Installing Machine agent on batch of $BATCH_SIZE hosts" - - echo "$BATCH_HOSTS" | jq -r '.[]' | while read HOST; do - ( - echo "📡 Installing Machine agent on $HOST" - mkdir -p ~/.ssh - echo "${{ secrets.SSH_PRIVATE_KEY }}" > ~/.ssh/id_rsa_$HOST - chmod 600 ~/.ssh/id_rsa_$HOST - - ssh -i ~/.ssh/id_rsa_$HOST -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ConnectTimeout=30 "${SSH_USER}@${HOST}" 'cd /opt/appdynamics/appdsmartagent && sudo ./smartagentctl install machine' - if [ $? -eq 0 ] - then - echo "✅ Completed Machine agent installation on $HOST" - else - echo "❌ Failed to install Machine agent on $HOST" >&2 - echo "$HOST" >> /tmp/failed_hosts_$$ - fi - - rm -f ~/.ssh/id_rsa_$HOST - ) & - done - wait - - if [ -f /tmp/failed_hosts_$$ ]; then - echo "❌ Some hosts failed:" - cat /tmp/failed_hosts_$$ - rm -f /tmp/failed_hosts_$$ - exit 1 - fi - echo "✅ Batch installation complete" + - name: Checkout repository + uses: actions/checkout@v6 + + - name: Install Machine Agent on batch hosts + env: + BATCH_HOSTS: ${{ toJson(matrix.batch) }} + OPERATION_LABEL: Install Machine Agent + REMOTE_COMMAND: | + cd /opt/appdynamics/appdsmartagent + sudo ./smartagentctl install machine + SMARTAGENT_GROUP: ${{ vars.SMARTAGENT_GROUP }} + SMARTAGENT_USER: ${{ vars.SMARTAGENT_USER }} + SSH_PRIVATE_KEY: ${{ secrets.SSH_PRIVATE_KEY }} + SSH_USER: ${{ vars.SSH_USER || 'ubuntu' }} + run: bash .github/scripts/run-remote-command-batch.sh + + check_client_inventory_api: + needs: + - prepare + - install-batch + runs-on: self-hosted + if: always() + steps: + - name: Checkout repository + uses: actions/checkout@v6 + + - name: Check Client Inventory API + env: + API_CHECK_WARN_ONLY: "true" + CLIENT_INVENTORY_API_BASE_URL: >- + ${{ vars.CLIENT_INVENTORY_API_BASE_URL }} + CLIENT_INVENTORY_API_TOKEN: >- + ${{ secrets.CLIENT_INVENTORY_API_TOKEN }} + CLIENT_INVENTORY_SAMPLE_SIZE: >- + ${{ vars.CLIENT_INVENTORY_SAMPLE_SIZE || 1 }} + run: bash .github/scripts/check-client-inventory-api.sh summary: - needs: [prepare, install-batch] + needs: + - prepare + - install-batch + - check_client_inventory_api runs-on: self-hosted if: always() steps: - - name: Installation Summary + - name: Installation summary run: | - echo "📊 Machine Agent Installation Summary" - echo "=====================================" + echo "Machine Agent Installation Summary" + echo "==================================" echo "Total hosts: ${{ needs.prepare.outputs.total_hosts }}" echo "Total batches: ${{ needs.prepare.outputs.total_batches }}" echo "Status: ${{ needs.install-batch.result }}" + echo "API: ${{ needs.check_client_inventory_api.result }}" diff --git a/.github/workflows/install-node-batched.yml b/.github/workflows/install-node-batched.yml index 69c4028..660a619 100644 --- a/.github/workflows/install-node-batched.yml +++ b/.github/workflows/install-node-batched.yml @@ -1,13 +1,21 @@ +--- name: 4. Install Node Agent -on: +"on": workflow_dispatch: inputs: batch_size: - description: 'Number of hosts per batch' + description: Number of hosts per batch required: false - default: '256' - type: string + default: 256 + type: number + +permissions: + contents: read + +concurrency: + group: smartagent-lifecycle-${{ github.ref }} + cancel-in-progress: false jobs: prepare: @@ -17,106 +25,77 @@ jobs: total_hosts: ${{ steps.create-batches.outputs.total_hosts }} total_batches: ${{ steps.create-batches.outputs.total_batches }} steps: + - name: Checkout repository + uses: actions/checkout@v6 + - id: create-batches - run: | - # Parse hosts and remove empty lines - HOSTS=$(echo "${{ vars.DEPLOYMENT_HOSTS }}" | tr -d '\r' | grep -v '^$') - - # Count total hosts - TOTAL=$(echo "$HOSTS" | wc -l | xargs) - echo "total_hosts=$TOTAL" >> $GITHUB_OUTPUT - - # Batch size from input or default - BATCH_SIZE=${{ github.event.inputs.batch_size || '256' }} - - # Calculate number of batches needed - TOTAL_BATCHES=$(( (TOTAL + BATCH_SIZE - 1) / BATCH_SIZE )) - echo "total_batches=$TOTAL_BATCHES" >> $GITHUB_OUTPUT - - # Create batches as JSON array - BATCHES=$(echo "$HOSTS" | awk -v batch_size=$BATCH_SIZE ' - BEGIN { batch=0; print "[" } - { - if (NR % batch_size == 1) { - if (NR > 1) { print "]" } - if (NR > 1) { print "," } - print "[" - printf "\"%s\"", $0 - } else { - printf ",\"%s\"", $0 - } - } - END { if (NR > 0) { print "]" }; print "]" } - ' | jq -c .) - - echo "batches=$BATCHES" >> $GITHUB_OUTPUT - - echo "📊 Installing Node agent on $TOTAL hosts across $TOTAL_BATCHES batches" + name: Create host batches + env: + DEPLOYMENT_HOSTS: ${{ vars.DEPLOYMENT_HOSTS }} + BATCH_SIZE: ${{ inputs.batch_size || 256 }} + run: bash .github/scripts/create-batches.sh install-batch: needs: prepare runs-on: self-hosted + if: >- + needs.prepare.result == 'success' && + needs.prepare.outputs.total_batches != '0' strategy: max-parallel: 1 matrix: batch: ${{ fromJson(needs.prepare.outputs.batches) }} - steps: - - name: Install node agent on batch hosts - run: | - BATCH_HOSTS='${{ toJson(matrix.batch) }}' - BATCH_SIZE=$(echo "$BATCH_HOSTS" | jq '. | length') - SSH_USER="${{ vars.SSH_USER || 'ubuntu' }}" - echo "🚀 Installing Node agent on batch of $BATCH_SIZE hosts" - - # Process each host in the batch in parallel - echo "$BATCH_HOSTS" | jq -r '.[]' | while read HOST; do - ( - echo "📡 Installing Node agent on $HOST" - - # Setup SSH - mkdir -p ~/.ssh - echo "${{ secrets.SSH_PRIVATE_KEY }}" > ~/.ssh/id_rsa_$HOST - chmod 600 ~/.ssh/id_rsa_$HOST - - # Install agent - if ssh -i ~/.ssh/id_rsa_$HOST \ - -o StrictHostKeyChecking=no \ - -o UserKnownHostsFile=/dev/null \ - -o ConnectTimeout=30 \ - "${SSH_USER}@${HOST}" 'cd /opt/appdynamics/appdsmartagent && sudo ./smartagentctl install node' - then - echo "✅ Completed Node agent installation on $HOST" - else - echo "❌ Failed to install Node agent on $HOST" >&2 - echo "$HOST" >> /tmp/failed_hosts_$$ - fi - - # Cleanup - rm -f ~/.ssh/id_rsa_$HOST - ) & - done - - # Wait for all parallel installations to complete - wait - - if [ -f /tmp/failed_hosts_$$ ]; then - echo "❌ Some hosts failed:" - cat /tmp/failed_hosts_$$ - rm -f /tmp/failed_hosts_$$ - exit 1 - fi - echo "✅ Batch installation complete" + - name: Checkout repository + uses: actions/checkout@v6 + + - name: Install Node Agent on batch hosts + env: + BATCH_HOSTS: ${{ toJson(matrix.batch) }} + OPERATION_LABEL: Install Node Agent + REMOTE_COMMAND: | + cd /opt/appdynamics/appdsmartagent + sudo ./smartagentctl install node + SMARTAGENT_GROUP: ${{ vars.SMARTAGENT_GROUP }} + SMARTAGENT_USER: ${{ vars.SMARTAGENT_USER }} + SSH_PRIVATE_KEY: ${{ secrets.SSH_PRIVATE_KEY }} + SSH_USER: ${{ vars.SSH_USER || 'ubuntu' }} + run: bash .github/scripts/run-remote-command-batch.sh + + check_client_inventory_api: + needs: + - prepare + - install-batch + runs-on: self-hosted + if: always() + steps: + - name: Checkout repository + uses: actions/checkout@v6 + + - name: Check Client Inventory API + env: + API_CHECK_WARN_ONLY: "true" + CLIENT_INVENTORY_API_BASE_URL: >- + ${{ vars.CLIENT_INVENTORY_API_BASE_URL }} + CLIENT_INVENTORY_API_TOKEN: >- + ${{ secrets.CLIENT_INVENTORY_API_TOKEN }} + CLIENT_INVENTORY_SAMPLE_SIZE: >- + ${{ vars.CLIENT_INVENTORY_SAMPLE_SIZE || 1 }} + run: bash .github/scripts/check-client-inventory-api.sh summary: - needs: [prepare, install-batch] + needs: + - prepare + - install-batch + - check_client_inventory_api runs-on: self-hosted if: always() steps: - - name: Installation Summary + - name: Installation summary run: | - echo "📊 Node Agent Installation Summary" - echo "==================================" + echo "Node Agent Installation Summary" + echo "===============================" echo "Total hosts: ${{ needs.prepare.outputs.total_hosts }}" echo "Total batches: ${{ needs.prepare.outputs.total_batches }}" echo "Status: ${{ needs.install-batch.result }}" + echo "API: ${{ needs.check_client_inventory_api.result }}" diff --git a/.github/workflows/stop-clean-smartagent-batched.yml b/.github/workflows/stop-clean-smartagent-batched.yml index b9bc35c..8baa378 100644 --- a/.github/workflows/stop-clean-smartagent-batched.yml +++ b/.github/workflows/stop-clean-smartagent-batched.yml @@ -1,13 +1,21 @@ +--- name: 6. Stop and Clean Smart Agent -on: +"on": workflow_dispatch: inputs: batch_size: - description: 'Number of hosts per batch' + description: Number of hosts per batch required: false - default: '256' - type: string + default: 256 + type: number + +permissions: + contents: read + +concurrency: + group: smartagent-lifecycle-${{ github.ref }} + cancel-in-progress: false jobs: prepare: @@ -17,84 +25,78 @@ jobs: total_hosts: ${{ steps.create-batches.outputs.total_hosts }} total_batches: ${{ steps.create-batches.outputs.total_batches }} steps: + - name: Checkout repository + uses: actions/checkout@v6 + - id: create-batches - run: | - HOSTS=$(echo "${{ vars.DEPLOYMENT_HOSTS }}" | tr -d '\r' | grep -v '^$') - TOTAL=$(echo "$HOSTS" | wc -l | xargs) - echo "total_hosts=$TOTAL" >> $GITHUB_OUTPUT - BATCH_SIZE=${{ github.event.inputs.batch_size || '256' }} - TOTAL_BATCHES=$(( (TOTAL + BATCH_SIZE - 1) / BATCH_SIZE )) - echo "total_batches=$TOTAL_BATCHES" >> $GITHUB_OUTPUT - BATCHES=$(echo "$HOSTS" | awk -v batch_size=$BATCH_SIZE ' - BEGIN { batch=0; print "[" } - { - if (NR % batch_size == 1) { - if (NR > 1) { print "]" } - if (NR > 1) { print "," } - print "[" - printf "\"%s\"", $0 - } else { - printf ",\"%s\"", $0 - } - } - END { if (NR > 0) { print "]" }; print "]" } - ' | jq -c .) - echo "batches=$BATCHES" >> $GITHUB_OUTPUT - echo "📊 Stopping and cleaning Smart Agent on $TOTAL hosts across $TOTAL_BATCHES batches" + name: Create host batches + env: + DEPLOYMENT_HOSTS: ${{ vars.DEPLOYMENT_HOSTS }} + BATCH_SIZE: ${{ inputs.batch_size || 256 }} + run: bash .github/scripts/create-batches.sh stop-clean-batch: needs: prepare runs-on: self-hosted + if: >- + needs.prepare.result == 'success' && + needs.prepare.outputs.total_batches != '0' strategy: max-parallel: 1 matrix: batch: ${{ fromJson(needs.prepare.outputs.batches) }} steps: + - name: Checkout repository + uses: actions/checkout@v6 + - name: Stop and clean Smart Agent on batch hosts - run: | - BATCH_HOSTS='${{ toJson(matrix.batch) }}' - BATCH_SIZE=$(echo "$BATCH_HOSTS" | jq '. | length') - SSH_USER="${{ vars.SSH_USER || 'ubuntu' }}" - echo "🚀 Stopping and cleaning Smart Agent on batch of $BATCH_SIZE hosts" - - echo "$BATCH_HOSTS" | jq -r '.[]' | while read HOST; do - ( - echo "📡 Stopping and cleaning Smart Agent on $HOST" - mkdir -p ~/.ssh - echo "${{ secrets.SSH_PRIVATE_KEY }}" > ~/.ssh/id_rsa_$HOST - chmod 600 ~/.ssh/id_rsa_$HOST - - ssh -i ~/.ssh/id_rsa_$HOST -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ConnectTimeout=30 "${SSH_USER}@${HOST}" 'cd /opt/appdynamics/appdsmartagent && sudo ./smartagentctl stop && sudo ./smartagentctl clean' - if [ $? -eq 0 ] - then - echo "✅ Completed stop and clean on $HOST" - else - echo "❌ Failed to stop and clean on $HOST" >&2 - echo "$HOST" >> /tmp/failed_hosts_$$ - fi - - rm -f ~/.ssh/id_rsa_$HOST - ) & - done - wait - - if [ -f /tmp/failed_hosts_$$ ]; then - echo "❌ Some hosts failed:" - cat /tmp/failed_hosts_$$ - rm -f /tmp/failed_hosts_$$ - exit 1 - fi - echo "✅ Batch stop and clean complete" + env: + BATCH_HOSTS: ${{ toJson(matrix.batch) }} + OPERATION_LABEL: Stop and Clean Smart Agent + REMOTE_COMMAND: | + cd /opt/appdynamics/appdsmartagent + sudo ./smartagentctl stop + sudo ./smartagentctl clean + SMARTAGENT_GROUP: ${{ vars.SMARTAGENT_GROUP }} + SMARTAGENT_USER: ${{ vars.SMARTAGENT_USER }} + SSH_PRIVATE_KEY: ${{ secrets.SSH_PRIVATE_KEY }} + SSH_USER: ${{ vars.SSH_USER || 'ubuntu' }} + run: bash .github/scripts/run-remote-command-batch.sh + + check_client_inventory_api: + needs: + - prepare + - stop-clean-batch + runs-on: self-hosted + if: always() + steps: + - name: Checkout repository + uses: actions/checkout@v6 + + - name: Check Client Inventory API + env: + API_CHECK_WARN_ONLY: "true" + CLIENT_INVENTORY_API_BASE_URL: >- + ${{ vars.CLIENT_INVENTORY_API_BASE_URL }} + CLIENT_INVENTORY_API_TOKEN: >- + ${{ secrets.CLIENT_INVENTORY_API_TOKEN }} + CLIENT_INVENTORY_SAMPLE_SIZE: >- + ${{ vars.CLIENT_INVENTORY_SAMPLE_SIZE || 1 }} + run: bash .github/scripts/check-client-inventory-api.sh summary: - needs: [prepare, stop-clean-batch] + needs: + - prepare + - stop-clean-batch + - check_client_inventory_api runs-on: self-hosted if: always() steps: - - name: Stop and Clean Summary + - name: Stop and clean summary run: | - echo "📊 Smart Agent Stop and Clean Summary" - echo "=====================================" + echo "Smart Agent Stop and Clean Summary" + echo "==================================" echo "Total hosts: ${{ needs.prepare.outputs.total_hosts }}" echo "Total batches: ${{ needs.prepare.outputs.total_batches }}" echo "Status: ${{ needs.stop-clean-batch.result }}" + echo "API: ${{ needs.check_client_inventory_api.result }}" diff --git a/.github/workflows/uninstall-db-batched.yml b/.github/workflows/uninstall-db-batched.yml index f72b924..15da3ce 100644 --- a/.github/workflows/uninstall-db-batched.yml +++ b/.github/workflows/uninstall-db-batched.yml @@ -1,13 +1,21 @@ +--- name: 10. Uninstall Database Agent -on: +"on": workflow_dispatch: inputs: batch_size: - description: 'Number of hosts per batch' + description: Number of hosts per batch required: false - default: '256' - type: string + default: 256 + type: number + +permissions: + contents: read + +concurrency: + group: smartagent-lifecycle-${{ github.ref }} + cancel-in-progress: false jobs: prepare: @@ -17,84 +25,77 @@ jobs: total_hosts: ${{ steps.create-batches.outputs.total_hosts }} total_batches: ${{ steps.create-batches.outputs.total_batches }} steps: + - name: Checkout repository + uses: actions/checkout@v6 + - id: create-batches - run: | - HOSTS=$(echo "${{ vars.DEPLOYMENT_HOSTS }}" | tr -d '\r' | grep -v '^$') - TOTAL=$(echo "$HOSTS" | wc -l | xargs) - echo "total_hosts=$TOTAL" >> $GITHUB_OUTPUT - BATCH_SIZE=${{ github.event.inputs.batch_size || '256' }} - TOTAL_BATCHES=$(( (TOTAL + BATCH_SIZE - 1) / BATCH_SIZE )) - echo "total_batches=$TOTAL_BATCHES" >> $GITHUB_OUTPUT - BATCHES=$(echo "$HOSTS" | awk -v batch_size=$BATCH_SIZE ' - BEGIN { batch=0; print "[" } - { - if (NR % batch_size == 1) { - if (NR > 1) { print "]" } - if (NR > 1) { print "," } - print "[" - printf "\"%s\"", $0 - } else { - printf ",\"%s\"", $0 - } - } - END { if (NR > 0) { print "]" }; print "]" } - ' | jq -c .) - echo "batches=$BATCHES" >> $GITHUB_OUTPUT - echo "📊 Uninstalling DB agent from $TOTAL hosts across $TOTAL_BATCHES batches" + name: Create host batches + env: + DEPLOYMENT_HOSTS: ${{ vars.DEPLOYMENT_HOSTS }} + BATCH_SIZE: ${{ inputs.batch_size || 256 }} + run: bash .github/scripts/create-batches.sh uninstall-batch: needs: prepare runs-on: self-hosted + if: >- + needs.prepare.result == 'success' && + needs.prepare.outputs.total_batches != '0' strategy: max-parallel: 1 matrix: batch: ${{ fromJson(needs.prepare.outputs.batches) }} steps: - - name: Uninstall db agent from batch hosts - run: | - BATCH_HOSTS='${{ toJson(matrix.batch) }}' - BATCH_SIZE=$(echo "$BATCH_HOSTS" | jq '. | length') - SSH_USER="${{ vars.SSH_USER || 'ubuntu' }}" - echo "🚀 Uninstalling DB agent from batch of $BATCH_SIZE hosts" - - echo "$BATCH_HOSTS" | jq -r '.[]' | while read HOST; do - ( - echo "📡 Uninstalling DB agent from $HOST" - mkdir -p ~/.ssh - echo "${{ secrets.SSH_PRIVATE_KEY }}" > ~/.ssh/id_rsa_$HOST - chmod 600 ~/.ssh/id_rsa_$HOST - - ssh -i ~/.ssh/id_rsa_$HOST -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ConnectTimeout=30 "${SSH_USER}@${HOST}" 'cd /opt/appdynamics/appdsmartagent && sudo ./smartagentctl uninstall db' - if [ $? -eq 0 ] - then - echo "✅ Completed DB agent uninstall on $HOST" - else - echo "❌ Failed to uninstall DB agent on $HOST" >&2 - echo "$HOST" >> /tmp/failed_hosts_$$ - fi - - rm -f ~/.ssh/id_rsa_$HOST - ) & - done - wait - - if [ -f /tmp/failed_hosts_$$ ]; then - echo "❌ Some hosts failed:" - cat /tmp/failed_hosts_$$ - rm -f /tmp/failed_hosts_$$ - exit 1 - fi - echo "✅ Batch uninstall complete" + - name: Checkout repository + uses: actions/checkout@v6 + + - name: Uninstall Database Agent from batch hosts + env: + BATCH_HOSTS: ${{ toJson(matrix.batch) }} + OPERATION_LABEL: Uninstall Database Agent + REMOTE_COMMAND: | + cd /opt/appdynamics/appdsmartagent + sudo ./smartagentctl uninstall db + SMARTAGENT_GROUP: ${{ vars.SMARTAGENT_GROUP }} + SMARTAGENT_USER: ${{ vars.SMARTAGENT_USER }} + SSH_PRIVATE_KEY: ${{ secrets.SSH_PRIVATE_KEY }} + SSH_USER: ${{ vars.SSH_USER || 'ubuntu' }} + run: bash .github/scripts/run-remote-command-batch.sh + + check_client_inventory_api: + needs: + - prepare + - uninstall-batch + runs-on: self-hosted + if: always() + steps: + - name: Checkout repository + uses: actions/checkout@v6 + + - name: Check Client Inventory API + env: + API_CHECK_WARN_ONLY: "true" + CLIENT_INVENTORY_API_BASE_URL: >- + ${{ vars.CLIENT_INVENTORY_API_BASE_URL }} + CLIENT_INVENTORY_API_TOKEN: >- + ${{ secrets.CLIENT_INVENTORY_API_TOKEN }} + CLIENT_INVENTORY_SAMPLE_SIZE: >- + ${{ vars.CLIENT_INVENTORY_SAMPLE_SIZE || 1 }} + run: bash .github/scripts/check-client-inventory-api.sh summary: - needs: [prepare, uninstall-batch] + needs: + - prepare + - uninstall-batch + - check_client_inventory_api runs-on: self-hosted if: always() steps: - - name: Uninstall Summary + - name: Uninstall summary run: | - echo "📊 DB Agent Uninstall Summary" - echo "=============================" + echo "Database Agent Uninstall Summary" + echo "================================" echo "Total hosts: ${{ needs.prepare.outputs.total_hosts }}" echo "Total batches: ${{ needs.prepare.outputs.total_batches }}" echo "Status: ${{ needs.uninstall-batch.result }}" + echo "API: ${{ needs.check_client_inventory_api.result }}" diff --git a/.github/workflows/uninstall-java-batched.yml b/.github/workflows/uninstall-java-batched.yml index f69ba80..0a64f61 100644 --- a/.github/workflows/uninstall-java-batched.yml +++ b/.github/workflows/uninstall-java-batched.yml @@ -1,13 +1,21 @@ +--- name: 8. Uninstall Java Agent -on: +"on": workflow_dispatch: inputs: batch_size: - description: 'Number of hosts per batch' + description: Number of hosts per batch required: false - default: '256' - type: string + default: 256 + type: number + +permissions: + contents: read + +concurrency: + group: smartagent-lifecycle-${{ github.ref }} + cancel-in-progress: false jobs: prepare: @@ -17,84 +25,77 @@ jobs: total_hosts: ${{ steps.create-batches.outputs.total_hosts }} total_batches: ${{ steps.create-batches.outputs.total_batches }} steps: + - name: Checkout repository + uses: actions/checkout@v6 + - id: create-batches - run: | - HOSTS=$(echo "${{ vars.DEPLOYMENT_HOSTS }}" | tr -d '\r' | grep -v '^$') - TOTAL=$(echo "$HOSTS" | wc -l | xargs) - echo "total_hosts=$TOTAL" >> $GITHUB_OUTPUT - BATCH_SIZE=${{ github.event.inputs.batch_size || '256' }} - TOTAL_BATCHES=$(( (TOTAL + BATCH_SIZE - 1) / BATCH_SIZE )) - echo "total_batches=$TOTAL_BATCHES" >> $GITHUB_OUTPUT - BATCHES=$(echo "$HOSTS" | awk -v batch_size=$BATCH_SIZE ' - BEGIN { batch=0; print "[" } - { - if (NR % batch_size == 1) { - if (NR > 1) { print "]" } - if (NR > 1) { print "," } - print "[" - printf "\"%s\"", $0 - } else { - printf ",\"%s\"", $0 - } - } - END { if (NR > 0) { print "]" }; print "]" } - ' | jq -c .) - echo "batches=$BATCHES" >> $GITHUB_OUTPUT - echo "📊 Uninstalling Java agent from $TOTAL hosts across $TOTAL_BATCHES batches" + name: Create host batches + env: + DEPLOYMENT_HOSTS: ${{ vars.DEPLOYMENT_HOSTS }} + BATCH_SIZE: ${{ inputs.batch_size || 256 }} + run: bash .github/scripts/create-batches.sh uninstall-batch: needs: prepare runs-on: self-hosted + if: >- + needs.prepare.result == 'success' && + needs.prepare.outputs.total_batches != '0' strategy: max-parallel: 1 matrix: batch: ${{ fromJson(needs.prepare.outputs.batches) }} steps: - - name: Uninstall java agent from batch hosts - run: | - BATCH_HOSTS='${{ toJson(matrix.batch) }}' - BATCH_SIZE=$(echo "$BATCH_HOSTS" | jq '. | length') - SSH_USER="${{ vars.SSH_USER || 'ubuntu' }}" - echo "🚀 Uninstalling Java agent from batch of $BATCH_SIZE hosts" - - echo "$BATCH_HOSTS" | jq -r '.[]' | while read HOST; do - ( - echo "📡 Uninstalling Java agent from $HOST" - mkdir -p ~/.ssh - echo "${{ secrets.SSH_PRIVATE_KEY }}" > ~/.ssh/id_rsa_$HOST - chmod 600 ~/.ssh/id_rsa_$HOST - - ssh -i ~/.ssh/id_rsa_$HOST -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ConnectTimeout=30 "${SSH_USER}@${HOST}" 'cd /opt/appdynamics/appdsmartagent && sudo ./smartagentctl uninstall java' - if [ $? -eq 0 ] - then - echo "✅ Completed Java agent uninstall on $HOST" - else - echo "❌ Failed to uninstall Java agent on $HOST" >&2 - echo "$HOST" >> /tmp/failed_hosts_$$ - fi - - rm -f ~/.ssh/id_rsa_$HOST - ) & - done - wait - - if [ -f /tmp/failed_hosts_$$ ]; then - echo "❌ Some hosts failed:" - cat /tmp/failed_hosts_$$ - rm -f /tmp/failed_hosts_$$ - exit 1 - fi - echo "✅ Batch uninstall complete" + - name: Checkout repository + uses: actions/checkout@v6 + + - name: Uninstall Java Agent from batch hosts + env: + BATCH_HOSTS: ${{ toJson(matrix.batch) }} + OPERATION_LABEL: Uninstall Java Agent + REMOTE_COMMAND: | + cd /opt/appdynamics/appdsmartagent + sudo ./smartagentctl uninstall java + SMARTAGENT_GROUP: ${{ vars.SMARTAGENT_GROUP }} + SMARTAGENT_USER: ${{ vars.SMARTAGENT_USER }} + SSH_PRIVATE_KEY: ${{ secrets.SSH_PRIVATE_KEY }} + SSH_USER: ${{ vars.SSH_USER || 'ubuntu' }} + run: bash .github/scripts/run-remote-command-batch.sh + + check_client_inventory_api: + needs: + - prepare + - uninstall-batch + runs-on: self-hosted + if: always() + steps: + - name: Checkout repository + uses: actions/checkout@v6 + + - name: Check Client Inventory API + env: + API_CHECK_WARN_ONLY: "true" + CLIENT_INVENTORY_API_BASE_URL: >- + ${{ vars.CLIENT_INVENTORY_API_BASE_URL }} + CLIENT_INVENTORY_API_TOKEN: >- + ${{ secrets.CLIENT_INVENTORY_API_TOKEN }} + CLIENT_INVENTORY_SAMPLE_SIZE: >- + ${{ vars.CLIENT_INVENTORY_SAMPLE_SIZE || 1 }} + run: bash .github/scripts/check-client-inventory-api.sh summary: - needs: [prepare, uninstall-batch] + needs: + - prepare + - uninstall-batch + - check_client_inventory_api runs-on: self-hosted if: always() steps: - - name: Uninstall Summary + - name: Uninstall summary run: | - echo "📊 Java Agent Uninstall Summary" - echo "===============================" + echo "Java Agent Uninstall Summary" + echo "============================" echo "Total hosts: ${{ needs.prepare.outputs.total_hosts }}" echo "Total batches: ${{ needs.prepare.outputs.total_batches }}" echo "Status: ${{ needs.uninstall-batch.result }}" + echo "API: ${{ needs.check_client_inventory_api.result }}" diff --git a/.github/workflows/uninstall-machine-batched.yml b/.github/workflows/uninstall-machine-batched.yml index 646b8ba..7997efd 100644 --- a/.github/workflows/uninstall-machine-batched.yml +++ b/.github/workflows/uninstall-machine-batched.yml @@ -1,13 +1,21 @@ +--- name: 7. Uninstall Machine Agent -on: +"on": workflow_dispatch: inputs: batch_size: - description: 'Number of hosts per batch' + description: Number of hosts per batch required: false - default: '256' - type: string + default: 256 + type: number + +permissions: + contents: read + +concurrency: + group: smartagent-lifecycle-${{ github.ref }} + cancel-in-progress: false jobs: prepare: @@ -17,84 +25,77 @@ jobs: total_hosts: ${{ steps.create-batches.outputs.total_hosts }} total_batches: ${{ steps.create-batches.outputs.total_batches }} steps: + - name: Checkout repository + uses: actions/checkout@v6 + - id: create-batches - run: | - HOSTS=$(echo "${{ vars.DEPLOYMENT_HOSTS }}" | tr -d '\r' | grep -v '^$') - TOTAL=$(echo "$HOSTS" | wc -l | xargs) - echo "total_hosts=$TOTAL" >> $GITHUB_OUTPUT - BATCH_SIZE=${{ github.event.inputs.batch_size || '256' }} - TOTAL_BATCHES=$(( (TOTAL + BATCH_SIZE - 1) / BATCH_SIZE )) - echo "total_batches=$TOTAL_BATCHES" >> $GITHUB_OUTPUT - BATCHES=$(echo "$HOSTS" | awk -v batch_size=$BATCH_SIZE ' - BEGIN { batch=0; print "[" } - { - if (NR % batch_size == 1) { - if (NR > 1) { print "]" } - if (NR > 1) { print "," } - print "[" - printf "\"%s\"", $0 - } else { - printf ",\"%s\"", $0 - } - } - END { if (NR > 0) { print "]" }; print "]" } - ' | jq -c .) - echo "batches=$BATCHES" >> $GITHUB_OUTPUT - echo "📊 Uninstalling Machine agent from $TOTAL hosts across $TOTAL_BATCHES batches" + name: Create host batches + env: + DEPLOYMENT_HOSTS: ${{ vars.DEPLOYMENT_HOSTS }} + BATCH_SIZE: ${{ inputs.batch_size || 256 }} + run: bash .github/scripts/create-batches.sh uninstall-batch: needs: prepare runs-on: self-hosted + if: >- + needs.prepare.result == 'success' && + needs.prepare.outputs.total_batches != '0' strategy: max-parallel: 1 matrix: batch: ${{ fromJson(needs.prepare.outputs.batches) }} steps: - - name: Uninstall machine agent from batch hosts - run: | - BATCH_HOSTS='${{ toJson(matrix.batch) }}' - BATCH_SIZE=$(echo "$BATCH_HOSTS" | jq '. | length') - SSH_USER="${{ vars.SSH_USER || 'ubuntu' }}" - echo "🚀 Uninstalling Machine agent from batch of $BATCH_SIZE hosts" - - echo "$BATCH_HOSTS" | jq -r '.[]' | while read HOST; do - ( - echo "📡 Uninstalling Machine agent from $HOST" - mkdir -p ~/.ssh - echo "${{ secrets.SSH_PRIVATE_KEY }}" > ~/.ssh/id_rsa_$HOST - chmod 600 ~/.ssh/id_rsa_$HOST - - ssh -i ~/.ssh/id_rsa_$HOST -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ConnectTimeout=30 "${SSH_USER}@${HOST}" 'cd /opt/appdynamics/appdsmartagent && sudo ./smartagentctl uninstall machine' - if [ $? -eq 0 ] - then - echo "✅ Completed Machine agent uninstall on $HOST" - else - echo "❌ Failed to uninstall Machine agent on $HOST" >&2 - echo "$HOST" >> /tmp/failed_hosts_$$ - fi - - rm -f ~/.ssh/id_rsa_$HOST - ) & - done - wait - - if [ -f /tmp/failed_hosts_$$ ]; then - echo "❌ Some hosts failed:" - cat /tmp/failed_hosts_$$ - rm -f /tmp/failed_hosts_$$ - exit 1 - fi - echo "✅ Batch uninstall complete" + - name: Checkout repository + uses: actions/checkout@v6 + + - name: Uninstall Machine Agent from batch hosts + env: + BATCH_HOSTS: ${{ toJson(matrix.batch) }} + OPERATION_LABEL: Uninstall Machine Agent + REMOTE_COMMAND: | + cd /opt/appdynamics/appdsmartagent + sudo ./smartagentctl uninstall machine + SMARTAGENT_GROUP: ${{ vars.SMARTAGENT_GROUP }} + SMARTAGENT_USER: ${{ vars.SMARTAGENT_USER }} + SSH_PRIVATE_KEY: ${{ secrets.SSH_PRIVATE_KEY }} + SSH_USER: ${{ vars.SSH_USER || 'ubuntu' }} + run: bash .github/scripts/run-remote-command-batch.sh + + check_client_inventory_api: + needs: + - prepare + - uninstall-batch + runs-on: self-hosted + if: always() + steps: + - name: Checkout repository + uses: actions/checkout@v6 + + - name: Check Client Inventory API + env: + API_CHECK_WARN_ONLY: "true" + CLIENT_INVENTORY_API_BASE_URL: >- + ${{ vars.CLIENT_INVENTORY_API_BASE_URL }} + CLIENT_INVENTORY_API_TOKEN: >- + ${{ secrets.CLIENT_INVENTORY_API_TOKEN }} + CLIENT_INVENTORY_SAMPLE_SIZE: >- + ${{ vars.CLIENT_INVENTORY_SAMPLE_SIZE || 1 }} + run: bash .github/scripts/check-client-inventory-api.sh summary: - needs: [prepare, uninstall-batch] + needs: + - prepare + - uninstall-batch + - check_client_inventory_api runs-on: self-hosted if: always() steps: - - name: Uninstall Summary + - name: Uninstall summary run: | - echo "📊 Machine Agent Uninstall Summary" - echo "====================================" + echo "Machine Agent Uninstall Summary" + echo "===============================" echo "Total hosts: ${{ needs.prepare.outputs.total_hosts }}" echo "Total batches: ${{ needs.prepare.outputs.total_batches }}" echo "Status: ${{ needs.uninstall-batch.result }}" + echo "API: ${{ needs.check_client_inventory_api.result }}" diff --git a/.github/workflows/uninstall-node-batched.yml b/.github/workflows/uninstall-node-batched.yml index 8630aea..f9da2a4 100644 --- a/.github/workflows/uninstall-node-batched.yml +++ b/.github/workflows/uninstall-node-batched.yml @@ -1,13 +1,21 @@ +--- name: 9. Uninstall Node Agent -on: +"on": workflow_dispatch: inputs: batch_size: - description: 'Number of hosts per batch' + description: Number of hosts per batch required: false - default: '256' - type: string + default: 256 + type: number + +permissions: + contents: read + +concurrency: + group: smartagent-lifecycle-${{ github.ref }} + cancel-in-progress: false jobs: prepare: @@ -17,84 +25,77 @@ jobs: total_hosts: ${{ steps.create-batches.outputs.total_hosts }} total_batches: ${{ steps.create-batches.outputs.total_batches }} steps: + - name: Checkout repository + uses: actions/checkout@v6 + - id: create-batches - run: | - HOSTS=$(echo "${{ vars.DEPLOYMENT_HOSTS }}" | tr -d '\r' | grep -v '^$') - TOTAL=$(echo "$HOSTS" | wc -l | xargs) - echo "total_hosts=$TOTAL" >> $GITHUB_OUTPUT - BATCH_SIZE=${{ github.event.inputs.batch_size || '256' }} - TOTAL_BATCHES=$(( (TOTAL + BATCH_SIZE - 1) / BATCH_SIZE )) - echo "total_batches=$TOTAL_BATCHES" >> $GITHUB_OUTPUT - BATCHES=$(echo "$HOSTS" | awk -v batch_size=$BATCH_SIZE ' - BEGIN { batch=0; print "[" } - { - if (NR % batch_size == 1) { - if (NR > 1) { print "]" } - if (NR > 1) { print "," } - print "[" - printf "\"%s\"", $0 - } else { - printf ",\"%s\"", $0 - } - } - END { if (NR > 0) { print "]" }; print "]" } - ' | jq -c .) - echo "batches=$BATCHES" >> $GITHUB_OUTPUT - echo "📊 Uninstalling Node agent from $TOTAL hosts across $TOTAL_BATCHES batches" + name: Create host batches + env: + DEPLOYMENT_HOSTS: ${{ vars.DEPLOYMENT_HOSTS }} + BATCH_SIZE: ${{ inputs.batch_size || 256 }} + run: bash .github/scripts/create-batches.sh uninstall-batch: needs: prepare runs-on: self-hosted + if: >- + needs.prepare.result == 'success' && + needs.prepare.outputs.total_batches != '0' strategy: max-parallel: 1 matrix: batch: ${{ fromJson(needs.prepare.outputs.batches) }} steps: - - name: Uninstall node agent from batch hosts - run: | - BATCH_HOSTS='${{ toJson(matrix.batch) }}' - BATCH_SIZE=$(echo "$BATCH_HOSTS" | jq '. | length') - SSH_USER="${{ vars.SSH_USER || 'ubuntu' }}" - echo "🚀 Uninstalling Node agent from batch of $BATCH_SIZE hosts" - - echo "$BATCH_HOSTS" | jq -r '.[]' | while read HOST; do - ( - echo "📡 Uninstalling Node agent from $HOST" - mkdir -p ~/.ssh - echo "${{ secrets.SSH_PRIVATE_KEY }}" > ~/.ssh/id_rsa_$HOST - chmod 600 ~/.ssh/id_rsa_$HOST - - ssh -i ~/.ssh/id_rsa_$HOST -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ConnectTimeout=30 "${SSH_USER}@${HOST}" 'cd /opt/appdynamics/appdsmartagent && sudo ./smartagentctl uninstall node' - if [ $? -eq 0 ] - then - echo "✅ Completed Node agent uninstall on $HOST" - else - echo "❌ Failed to uninstall Node agent on $HOST" >&2 - echo "$HOST" >> /tmp/failed_hosts_$$ - fi - - rm -f ~/.ssh/id_rsa_$HOST - ) & - done - wait - - if [ -f /tmp/failed_hosts_$$ ]; then - echo "❌ Some hosts failed:" - cat /tmp/failed_hosts_$$ - rm -f /tmp/failed_hosts_$$ - exit 1 - fi - echo "✅ Batch uninstall complete" + - name: Checkout repository + uses: actions/checkout@v6 + + - name: Uninstall Node Agent from batch hosts + env: + BATCH_HOSTS: ${{ toJson(matrix.batch) }} + OPERATION_LABEL: Uninstall Node Agent + REMOTE_COMMAND: | + cd /opt/appdynamics/appdsmartagent + sudo ./smartagentctl uninstall node + SMARTAGENT_GROUP: ${{ vars.SMARTAGENT_GROUP }} + SMARTAGENT_USER: ${{ vars.SMARTAGENT_USER }} + SSH_PRIVATE_KEY: ${{ secrets.SSH_PRIVATE_KEY }} + SSH_USER: ${{ vars.SSH_USER || 'ubuntu' }} + run: bash .github/scripts/run-remote-command-batch.sh + + check_client_inventory_api: + needs: + - prepare + - uninstall-batch + runs-on: self-hosted + if: always() + steps: + - name: Checkout repository + uses: actions/checkout@v6 + + - name: Check Client Inventory API + env: + API_CHECK_WARN_ONLY: "true" + CLIENT_INVENTORY_API_BASE_URL: >- + ${{ vars.CLIENT_INVENTORY_API_BASE_URL }} + CLIENT_INVENTORY_API_TOKEN: >- + ${{ secrets.CLIENT_INVENTORY_API_TOKEN }} + CLIENT_INVENTORY_SAMPLE_SIZE: >- + ${{ vars.CLIENT_INVENTORY_SAMPLE_SIZE || 1 }} + run: bash .github/scripts/check-client-inventory-api.sh summary: - needs: [prepare, uninstall-batch] + needs: + - prepare + - uninstall-batch + - check_client_inventory_api runs-on: self-hosted if: always() steps: - - name: Uninstall Summary + - name: Uninstall summary run: | - echo "📊 Node Agent Uninstall Summary" - echo "===============================" + echo "Node Agent Uninstall Summary" + echo "============================" echo "Total hosts: ${{ needs.prepare.outputs.total_hosts }}" echo "Total batches: ${{ needs.prepare.outputs.total_batches }}" echo "Status: ${{ needs.uninstall-batch.result }}" + echo "API: ${{ needs.check_client_inventory_api.result }}" diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md index 9733781..31c8fcb 100644 --- a/ARCHITECTURE.md +++ b/ARCHITECTURE.md @@ -21,7 +21,7 @@ graph TB end end - User -->|git push| GH + User -->|manual workflow trigger| GH GH <-->|HTTPS:443
Poll for jobs| Runner Runner -->|SSH:22
Private IPs| T1 Runner -->|SSH:22
Private IPs| T2 @@ -46,7 +46,7 @@ sequenceDiagram participant Runner as Self-hosted Runner participant Target as Target Host(s) - Dev->>GH: 1. Push code or trigger workflow + Dev->>GH: 1. Trigger workflow manually GH->>GH: 2. Workflow event triggered Runner->>GH: 3. Poll for jobs (HTTPS:443) GH->>Runner: 4. Assign job to runner @@ -93,12 +93,13 @@ graph LR ```mermaid graph TD - Root[GitHub Actions Workflows
11 Total] + Root[GitHub Actions Workflows
12 Total] Root --> Deploy[Deployment
1 workflow] Root --> Install[Agent Installation
4 batched workflows] Root --> Uninstall[Agent Uninstallation
4 batched workflows] Root --> Manage[Smart Agent Management
2 batched workflows] + Root --> Api[API Validation
1 workflow] Deploy --> D1[Deploy Smart Agent
Batched, Manual trigger] @@ -113,13 +114,16 @@ graph TD Uninstall --> U4[Uninstall Java
Batched] Manage --> M1[Stop and Clean
Batched] - Manage --> M2[Cleanup All Agents
Batched] + Manage --> M2[Cleanup Smart Agent Directory
Batched] + + Api --> A1[Check Client Inventory API
Manual trigger] style Root fill:#6f42c1,color:#fff style Deploy fill:#28a745,color:#fff style Install fill:#0366d6,color:#fff style Uninstall fill:#dc3545,color:#fff style Manage fill:#fd7e14,color:#fff + style Api fill:#0d9488,color:#fff ``` ## Data Flow @@ -131,6 +135,7 @@ graph LR ZIP[Smart Agent ZIP] CFG[config.ini] SEC[Secrets/Variables] + OAS[openapi.json] end subgraph "Runner Execution" @@ -145,6 +150,7 @@ graph LR WF --> PREP SEC --> PREP + OAS --> PREP PREP --> MATRIX ZIP --> TMP @@ -173,7 +179,7 @@ graph LR ### Target Hosts - **OS**: Ubuntu Server - **Deployed Components**: - - Smart Agent (`/opt/appdynamics/`) + - Smart Agent (`/opt/appdynamics/appdsmartagent/`) - AppDynamics Agents (node, machine, db, java) - **Access**: - Inbound SSH (22) from runner only @@ -184,8 +190,8 @@ graph LR - 11 workflow YAML files - Smart Agent installation package - Configuration file (config.ini) -- **Secrets**: SSH private key -- **Variables**: Host list, user/group settings +- **Secrets**: SSH private key, AppDynamics account access key, API token +- **Variables**: Host list, SSH user, optional Smart Agent user/group ## Scaling Considerations diff --git a/AWS_REMEDIATION.md b/AWS_REMEDIATION.md new file mode 100644 index 0000000..6d29162 --- /dev/null +++ b/AWS_REMEDIATION.md @@ -0,0 +1,57 @@ +# AWS Remediation Notes + +## Current Read-Only Findings + +The Smart Agent lab resources were observed in `us-east-1` under +`vpc-02ce435bce6cf9069`. + +Notable drift from the documented private-only design: + +- Several Smart Agent Linux targets have public IPv4 addresses. +- The `smartagent-demo` security group includes inbound `443` from + `0.0.0.0/0`. +- Linux Smart Agent targets also have the + `Cisco_Known_Address_Ranges_3389` security group attached, which is RDP + oriented and not needed for Linux SSH deployment. +- The VPC route table has an internet gateway route. That is compatible with + public subnets, but targets do not need public reachability for this workflow. + +The two observed EKS clusters are unrelated to this repository's Smart Agent +GitHub Actions workflow and should be handled separately. + +## Recommended Sequence + +1. Confirm the self-hosted runner can reach all target private IPs and GitHub + over HTTPS without using target public IPs. +2. Create a dedicated runner security group and target security group: + - runner egress: HTTPS to GitHub/AppDynamics as required, SSH to target SG. + - target ingress: SSH only from runner SG. + - target egress: AppDynamics controller endpoints and package/update + repositories as required. +3. Remove public IPv4 assignment from target instances after confirming private + SSH works. +4. Remove `443` from `0.0.0.0/0` on `smartagent-demo` unless a documented + inbound service requires it. +5. Detach `Cisco_Known_Address_Ranges_3389` from Linux Smart Agent targets. +6. Prefer Session Manager or a bastion for admin access instead of direct public + SSH/RDP rules. + +## Verification Commands + +Run these read-only checks after any AWS changes: + +```bash +aws ec2 describe-instances \ + --region us-east-1 \ + --filters Name=vpc-id,Values=vpc-02ce435bce6cf9069 \ + --query 'Reservations[].Instances[].{Name:Tags[?Key==`Name`]|[0].Value,State:State.Name,PrivateIp:PrivateIpAddress,PublicIp:PublicIpAddress,SecurityGroups:SecurityGroups[].GroupName}' \ + --output table + +aws ec2 describe-security-groups \ + --region us-east-1 \ + --group-names smartagent-demo \ + --query 'SecurityGroups[0].IpPermissions[].{Protocol:IpProtocol,From:FromPort,To:ToPort,Cidrs:IpRanges[].CidrIp,SourceGroups:UserIdGroupPairs[].GroupId,PrefixLists:PrefixListIds[].PrefixListId}' \ + --output table +``` + +Do not apply destructive network changes while a deployment run is active. diff --git a/DEPLOYMENT_GUIDE.md b/DEPLOYMENT_GUIDE.md index c077cce..39d1e53 100644 --- a/DEPLOYMENT_GUIDE.md +++ b/DEPLOYMENT_GUIDE.md @@ -1,458 +1,157 @@ # AppDynamics Smart Agent Deployment Guide ## Overview -This guide documents the automated deployment of AppDynamics Smart Agent to multiple Ubuntu hosts using GitHub Actions with a self-hosted runner. -## Architecture +This repository deploys and manages AppDynamics Smart Agent on Ubuntu EC2 hosts +from GitHub Actions running on a self-hosted runner in the same AWS VPC. -### Lab Setup -This lab demonstrates automated AppDynamics Smart Agent management across multiple EC2 instances: +All lifecycle workflows are manual, batch hosts from `DEPLOYMENT_HOSTS`, and +process one batch at a time while running SSH work in parallel inside each +batch. -- **AWS Environment**: All resources deployed in a single AWS VPC -- **Security Group**: All EC2 instances (runner and targets) share the same security group -- **Self-hosted Runner**: One EC2 instance running the GitHub Actions runner -- **Target Hosts**: Multiple Ubuntu EC2 instances within the same VPC -- **Network Access**: Private IP communication between runner and targets via port 22 (SSH) +## Required Configuration -### Components -- **GitHub Actions Workflows**: 11 workflows orchestrating agent lifecycle management -- **Self-hosted Runner**: EC2 instance executing workflows from within the VPC -- **Target Hosts**: Ubuntu EC2 servers receiving AppDynamics agents -- **GitHub Repository**: Stores workflow configurations and deployment artifacts +### GitHub Secrets -### Workflow Design -All workflows use a consistent two-job approach: -1. **Prepare Job**: Loads target hosts from GitHub variables and creates a dynamic matrix -2. **Action Job**: Runs in parallel for each host, executing the specific operation +Set these in **Settings -> Secrets and variables -> Actions -> Secrets**: -## Prerequisites +- `SSH_PRIVATE_KEY`: PEM private key used by the runner to SSH to targets. +- `APPD_ACCOUNT_ACCESS_KEY`: AppDynamics account access key. Do not store this + as a repository variable. +- `CLIENT_INVENTORY_API_TOKEN`: token sent in the `X-SF-Token` header for + Client Inventory API checks. This is separate from `APPD_ACCOUNT_ACCESS_KEY`. -### Infrastructure -- AWS VPC with EC2 instances -- Self-hosted GitHub Actions runner deployed in the same VPC -- Target Ubuntu hosts with SSH access from the runner -- SSH key pair (PEM file) for authentication +If `ACCOUNT_ACCESS_KEY` exists as an Actions variable, delete it after creating +the secret. The key was historically committed, so rotate it in AppDynamics and +rewrite git history before treating this repository as clean. -### Software -- GitHub account with repository access -- AWS account with EC2 instances -- GitHub CLI (`gh`) for management (optional) +### GitHub Variables -## Setup Steps +Set these in **Settings -> Secrets and variables -> Actions -> Variables**: -### 1. Repository Setup +- `DEPLOYMENT_HOSTS`: one target hostname or private IP per line. +- `SSH_USER`: optional target SSH user. Defaults to `ubuntu`. +- `SMARTAGENT_USER`: optional service user for Smart Agent. +- `SMARTAGENT_GROUP`: optional service group for Smart Agent. +- `CLIENT_INVENTORY_API_BASE_URL`: optional Client Inventory API base URL. + Defaults to `https:///fm-service/v1`. +- `CLIENT_INVENTORY_SAMPLE_SIZE`: optional number of clients sampled by API + checks. Defaults to `1`. -Create a new GitHub repository: -```bash -mkdir github-action-lab -cd github-action-lab -git init -``` - -### 2. Create Workflow File - -Create the GitHub Actions workflow at `.github/workflows/deploy-agent.yml`: - -```yaml -name: Deploy AppDynamics Smart Agent - -on: - workflow_dispatch: # Manual trigger - push: - branches: - - main - -jobs: - prepare: - runs-on: self-hosted - outputs: - hosts: ${{ steps.set-matrix.outputs.hosts }} - steps: - - id: set-matrix - run: | - HOSTS=$(echo "${{ vars.DEPLOYMENT_HOSTS }}" | tr -d '\r' | grep -v '^$' | xargs -n1 | jq -R . | jq -s -c .) - echo "hosts=$HOSTS" >> $GITHUB_OUTPUT - - deploy: - needs: prepare - runs-on: self-hosted - strategy: - matrix: - host: ${{ fromJson(needs.prepare.outputs.hosts) }} - - steps: - - name: Checkout repository - uses: actions/checkout@v4 - - - name: Setup SSH key - run: | - mkdir -p ~/.ssh - echo "${{ secrets.SSH_PRIVATE_KEY }}" > ~/.ssh/id_rsa - chmod 600 ~/.ssh/id_rsa - echo "Host *" > ~/.ssh/config - echo " StrictHostKeyChecking no" >> ~/.ssh/config - echo " UserKnownHostsFile=/dev/null" >> ~/.ssh/config - chmod 600 ~/.ssh/config - - - name: Copy agent zip to remote host - run: | - scp -i ~/.ssh/id_rsa appdsmartagent_64_linux_25.10.0.497.zip ubuntu@${{ matrix.host }}:/tmp/ - - - name: Copy config.ini to remote host - run: | - scp -i ~/.ssh/id_rsa config.ini ubuntu@${{ matrix.host }}:/tmp/ - - - name: Deploy and start agent - run: | - ssh -i ~/.ssh/id_rsa ubuntu@${{ matrix.host }} << 'EOF' - # Install unzip if not present - sudo apt-get update -qq - sudo apt-get install -y unzip - - # Extract and deploy - cd /tmp - sudo mkdir -p /opt/appdynamics - sudo unzip -o appdsmartagent_64_linux_25.10.0.497.zip -d /tmp/agent - sudo cp -r /tmp/agent/* /opt/appdynamics/ - sudo cp config.ini /opt/appdynamics/config.ini - - # Start the agent - cd /opt/appdynamics - sudo ./smartagentctl start --enable-auto-attach --service - EOF -``` +`SMARTAGENT_USER` and `SMARTAGENT_GROUP` must be set together. -### 3. Add Deployment Artifacts +### Runner Prerequisites -Place these files in the repository root: -- `appdsmartagent_64_linux_25.10.0.497.zip` - AppDynamics Smart Agent package -- `config.ini` - Agent configuration file +The self-hosted runner needs: -### 4. Configure Self-Hosted Runner +- `bash` +- `jq` +- `sha256sum` +- `ssh`, `scp`, and `ssh-keyscan` -#### Install Runner on EC2 Instance +The deploy workflow installs `unzip` on targets with `apt-get` before extracting +the Smart Agent package. -1. Launch an EC2 instance in your VPC (Amazon Linux 2 or Ubuntu) -2. Navigate to repository settings: `https://github.com/YOUR_USERNAME/YOUR_REPO/settings/actions/runners/new` -3. SSH into the runner instance and execute the installation commands provided by GitHub +## Client Inventory API Checks -Example: -```bash -# Download -mkdir actions-runner && cd actions-runner -curl -o actions-runner-linux-x64-2.311.0.tar.gz -L https://github.com/actions/runner/releases/download/v2.311.0/actions-runner-linux-x64-2.311.0.tar.gz -tar xzf ./actions-runner-linux-x64-2.311.0.tar.gz +`openapi.json` documents the Client Inventory API used for review-time API +validation. The checker validates the spec and exercises: -# Configure -./config.sh --url https://github.com/YOUR_USERNAME/YOUR_REPO --token YOUR_TOKEN +- `GET /clients` +- `GET /clients/{id}` +- `GET /clients/{id}/config` +- `POST /clients/configs:batch` -# Install as service -sudo ./svc.sh install -sudo ./svc.sh start -``` +The standalone `12. Check Client Inventory API` workflow can fail when the live +API, URL, or token is wrong. The 11 lifecycle workflows run the same check after +their lifecycle action in warning-only mode so API readiness is visible without +blocking deploy/install/uninstall/cleanup while no test cluster is available. -#### Verify Runner Status -Check that the runner appears as "Idle" (green) in: -`https://github.com/YOUR_USERNAME/YOUR_REPO/settings/actions/runners` +## Workflows -### 5. Configure GitHub Secrets +The repository contains 12 manual workflows: -Navigate to: `https://github.com/YOUR_USERNAME/YOUR_REPO/settings/secrets/actions` +- `1. Deploy Smart Agent` +- `2. Install Machine Agent` +- `3. Install Java Agent` +- `4. Install Node Agent` +- `5. Install Database Agent` +- `6. Stop and Clean Smart Agent` +- `7. Uninstall Machine Agent` +- `8. Uninstall Java Agent` +- `9. Uninstall Node Agent` +- `10. Uninstall Database Agent` +- `11. Cleanup Smart Agent Directory` +- `12. Check Client Inventory API` -#### Add SSH Private Key Secret +All workflows accept a numeric `batch_size` input from `1` to `256`; the default +is `256`. -1. Click **"New repository secret"** -2. Name: `SSH_PRIVATE_KEY` -3. Value: Paste the contents of your PEM file - ```bash - cat /path/to/your-key.pem - ``` -4. Click **"Add secret"** - -### 6. Configure GitHub Variables - -Navigate to: `https://github.com/YOUR_USERNAME/YOUR_REPO/settings/variables/actions` - -#### Add Deployment Hosts Variable +Example: -1. Click **"New repository variable"** -2. Name: `DEPLOYMENT_HOSTS` -3. Value: Enter your target host IPs (one per line) - ``` - 172.31.1.243 - 172.31.1.48 - 172.31.1.5 - ``` -4. Click **"Add variable"** - -### 7. Configure Optional Variables (for Smart Agent user/group) - -Navigate to: `https://github.com/YOUR_USERNAME/YOUR_REPO/settings/variables/actions` - -1. Click **"New repository variable"** -2. Name: `SMARTAGENT_USER` (e.g., `appdynamics`) -3. Click **"Add variable"** -4. Repeat for `SMARTAGENT_GROUP` (e.g., `appdynamics`) - -These are optional and only used during initial Smart Agent deployment. - -### 8. Network Configuration - -For this lab setup with all EC2 instances in the same VPC and security group: -- **Security Group Rules**: - - Allow inbound SSH (port 22) within the security group (source: same security group) - - Allow outbound HTTPS (port 443) to 0.0.0.0/0 (for GitHub API access) -- **Private IPs**: Use private IP addresses (172.31.x.x) for `DEPLOYMENT_HOSTS` -- **No public IPs needed**: Runner communicates with targets via private network - -## Available Workflows - -This repository includes **11 workflows** for complete Smart Agent lifecycle management: - -### Deployment (1 workflow) - -1. **Deploy Smart Agent (Batched)** - Installs Smart Agent and starts the service - - **Automatic batching:** Splits host list into configurable batch sizes (default: 256) - - **Sequential batch execution:** Processes batches one at a time to avoid overwhelming resources - - **Parallel within batch:** All hosts in a batch deploy simultaneously - - **Configurable:** Set custom batch size via workflow input - - Supports optional `--user` and `--group` parameters via GitHub variables - - Manual trigger only - - **Works for any scale:** 1 host to thousands - - **Example:** 1,500 hosts → 6 batches × 256 hosts = 6 sequential jobs - -### Agent Installation - Batched (4 workflows, manual trigger only) -2. **Install Node Agent (Batched)** - `smartagentctl install node` -3. **Install Machine Agent (Batched)** - `smartagentctl install machine` -4. **Install DB Agent (Batched)** - `smartagentctl install db` -5. **Install Java Agent (Batched)** - `smartagentctl install java` - -All install workflows support: -- Configurable batch size (default: 256) -- Sequential batch processing -- Parallel execution within each batch -- Works for any number of hosts (1 to thousands) - -### Agent Uninstallation - Batched (4 workflows, manual trigger only) -6. **Uninstall Node Agent (Batched)** - `smartagentctl uninstall node` -7. **Uninstall Machine Agent (Batched)** - `smartagentctl uninstall machine` -8. **Uninstall DB Agent (Batched)** - `smartagentctl uninstall db` -9. **Uninstall Java Agent (Batched)** - `smartagentctl uninstall java` - -All uninstall workflows support: -- Configurable batch size (default: 256) -- Sequential batch processing -- Parallel execution within each batch -- Works for any number of hosts (1 to thousands) - -### Smart Agent Management - Batched (2 workflows, manual trigger only) -10. **Stop and Clean Smart Agent (Batched)** - `smartagentctl stop` + `smartagentctl clean` - - Stops the Smart Agent service and purges all data - - Configurable batch size (default: 256) - - Works for any number of hosts (1 to thousands) - -11. **Cleanup All Agents (Batched)** - `sudo rm -rf /opt/appdynamics` - - Completely removes the /opt/appdynamics directory - - Configurable batch size (default: 256) - - Works for any number of hosts (1 to thousands) - - Use for complete removal of all AppDynamics components - -## Running Workflows - -### Manual Trigger (CLI) ```bash -# Deploy Smart Agent (batched) -gh workflow run "Deploy Smart Agent" --repo YOUR_USERNAME/YOUR_REPO - -# With custom batch size -gh workflow run "Deploy Smart Agent" \ - --repo YOUR_USERNAME/YOUR_REPO \ +gh workflow run "1. Deploy Smart Agent" \ + --repo chambear2809/github-actions-lab \ -f batch_size=128 - -# Install agents (batched) -gh workflow run "Install Node Agent (Batched for Large Scale)" --repo YOUR_USERNAME/YOUR_REPO -gh workflow run "Install Machine Agent (Batched for Large Scale)" --repo YOUR_USERNAME/YOUR_REPO -gh workflow run "Install DB Agent (Batched for Large Scale)" --repo YOUR_USERNAME/YOUR_REPO -gh workflow run "Install Java Agent (Batched for Large Scale)" --repo YOUR_USERNAME/YOUR_REPO - -# Uninstall agents (batched) -gh workflow run "Uninstall Node Agent (Batched for Large Scale)" --repo YOUR_USERNAME/YOUR_REPO -gh workflow run "Uninstall Machine Agent (Batched for Large Scale)" --repo YOUR_USERNAME/YOUR_REPO -gh workflow run "Uninstall DB Agent (Batched for Large Scale)" --repo YOUR_USERNAME/YOUR_REPO -gh workflow run "Uninstall Java Agent (Batched for Large Scale)" --repo YOUR_USERNAME/YOUR_REPO - -# Stop and clean (batched) -gh workflow run "Stop and Clean Smart Agent (Batched for Large Scale)" --repo YOUR_USERNAME/YOUR_REPO - -# Cleanup all agents (batched - complete removal) -gh workflow run "Cleanup All Agents" --repo YOUR_USERNAME/YOUR_REPO -``` - -### Manual Trigger (GitHub UI) -1. Go to **Actions** tab -2. Select the desired workflow from the left sidebar -3. Click **"Run workflow"** -4. Select branch (main) -5. Optionally adjust batch size -6. Click **"Run workflow"** - -## Monitoring and Troubleshooting - -### View Workflow Status -```bash -gh run list --repo YOUR_USERNAME/YOUR_REPO -``` - -### View Specific Run Details -```bash -gh run view RUN_ID --repo YOUR_USERNAME/YOUR_REPO -``` - -### View Failed Logs -```bash -gh run view RUN_ID --log-failed --repo YOUR_USERNAME/YOUR_REPO ``` -### Common Issues - -#### Runner Not Picking Up Jobs -- Verify runner status: Check if it's online in repository settings -- Check runner service: `sudo systemctl status actions.runner.*` -- Verify outbound HTTPS (443) connectivity to GitHub - -#### SSH Connection Failures -- Verify SSH key is correctly configured in secrets -- Ensure runner can reach target hosts on port 22 -- Check security group rules - -#### "hostname contains invalid characters" -- Ensure `DEPLOYMENT_HOSTS` variable has clean newline-separated IPs -- No trailing spaces or special characters - -## Scaling to Thousands of Hosts - -### Adding New Hosts -Simply update the `DEPLOYMENT_HOSTS` variable: -1. Go to repository variables settings -2. Edit `DEPLOYMENT_HOSTS` -3. Add new IPs (one per line) -4. Save changes - -### Batched Workflows - -All workflows use batching for: -- **Any scale** - Works with 1 host to thousands -- **Optimized for large-scale** - No GitHub Actions matrix limit -- **Configurable batching** - Adjust batch size based on your needs -- **All operations** - Deploy, install, uninstall, stop-clean, cleanup -- Manual trigger with optional batch size customization +## How Deployment Works + +1. The prepare job validates `DEPLOYMENT_HOSTS`, rejects empty or duplicate + host lists, validates `batch_size`, and emits a batch matrix. +2. The batch job checks out the repository and runs the shared workflow script. +3. The deploy script verifies the Smart Agent zip checksum using + `.github/checksums/appdsmartagent_64_linux_25.12.0.661.zip.sha256`. +4. The deploy script writes a temporary `config.ini` with + `APPD_ACCOUNT_ACCESS_KEY` substituted for `{{ACCOUNT_ACCESS_KEY}}`. +5. For each host, the script gathers the SSH host key into a per-run + `known_hosts` file, copies the zip and config to `/tmp`, extracts into + `/opt/appdynamics/appdsmartagent`, fixes ownership, and starts the service. + +The install, uninstall, stop-clean, and directory-cleanup workflows use the same +validated batching and SSH path. All lifecycle workflows also run a warning-only +Client Inventory API check after the lifecycle action completes. + +## Security Notes + +- Rotate the historical AppDynamics access key before using this repository for + real environments. +- Store the replacement in `APPD_ACCOUNT_ACCESS_KEY`, not `ACCOUNT_ACCESS_KEY`. +- Store the Client Inventory API token in `CLIENT_INVENTORY_API_TOKEN`; do not + reuse `APPD_ACCOUNT_ACCESS_KEY` for this API. +- The workflows use per-run host-key discovery. For production, replace this + with pinned host keys or an internal SSH CA so host identity is independently + trusted. +- Keep workflow file changes protected with branch protection and review + requirements. +- Keep the runner and targets on private addresses. See `AWS_REMEDIATION.md` + for the current environment drift and proposed remediation sequence. -### How Batching Works - -**The Challenge:** GitHub Actions limits matrix jobs to 256. - -**The Solution:** The batched workflow automatically: -1. **Splits** your host list into batches of N hosts (default 256) -2. **Creates** one matrix job per batch -3. **Processes** batches sequentially to avoid overwhelming the runner -4. **Deploys** to all hosts within each batch in parallel using background processes +## Maintenance -**Example Scenarios:** -- **500 hosts**: 2 batches × 256 hosts = 2 sequential jobs -- **1,000 hosts**: 4 batches × 256 hosts = 4 sequential jobs -- **5,000 hosts**: 20 batches × 256 hosts = 20 sequential jobs +### Updating Smart Agent -### Performance Tuning +1. Replace `appdsmartagent_64_linux_25.12.0.661.zip`. +2. Update `.github/checksums/.sha256`. +3. Run: -#### Batch Size -Adjust based on your runner's resources: -```bash -# Smaller batches (less resource intensive) -gh workflow run "Deploy Smart Agent" \ - --repo YOUR_USERNAME/YOUR_REPO -f batch_size=128 + ```bash + shasum -a 256 appdsmartagent_64_linux_25.12.0.661.zip + ``` -# Larger batches (faster, more resource intensive) -gh workflow run "Deploy Smart Agent" \ - --repo YOUR_USERNAME/YOUR_REPO -f batch_size=256 -``` +4. Commit the new zip, checksum, and any config updates. -#### Runner Resources -- **CPU**: More cores = better parallel SSH performance -- **Memory**: 8GB+ recommended for 256 parallel connections -- **Network**: Bandwidth scales with parallel connections +### Validating Locally -### Monitoring Large Deployments +Run these checks before pushing: -View batch progress: ```bash -gh run list --workflow="deploy-agent-batched.yml" --repo YOUR_USERNAME/YOUR_REPO -gh run view RUN_ID --repo YOUR_USERNAME/YOUR_REPO +jq empty openapi.json +yamllint .github/workflows/*.yml +shellcheck .github/scripts/*.sh +bash .github/scripts/test-create-batches.sh +bash .github/scripts/test-client-inventory-api.sh ``` -Each batch logs: -- Number of hosts in batch -- Per-host deployment status -- Batch completion summary - -## Security Best Practices - -1. **SSH Key Management** - - Use GitHub Secrets for private keys (never commit to repository) - - Rotate SSH keys regularly - - Use separate keys for different environments - -2. **Runner Security** - - Keep runner in private VPC subnet - - Restrict runner security group to minimal required access - - Update runner software regularly - -3. **Access Control** - - Limit repository access to authorized users - - Use branch protection rules on `main` - - Enable required reviews for workflow changes - -## Repository Structure - -``` -github-action-lab/ -├── .github/ -│ └── workflows/ -│ ├── deploy-agent-batched.yml # Deploy Smart Agent (batched) -│ ├── install-node-batched.yml # Install node agent (batched) -│ ├── install-machine-batched.yml # Install machine agent (batched) -│ ├── install-db-batched.yml # Install db agent (batched) -│ ├── install-java-batched.yml # Install java agent (batched) -│ ├── uninstall-node-batched.yml # Uninstall node agent (batched) -│ ├── uninstall-machine-batched.yml # Uninstall machine agent (batched) -│ ├── uninstall-db-batched.yml # Uninstall db agent (batched) -│ ├── uninstall-java-batched.yml # Uninstall java agent (batched) -│ ├── stop-clean-smartagent-batched.yml # Stop and clean Smart Agent (batched) -│ └── cleanup-appdynamics.yml # Cleanup all agents - remove /opt/appdynamics -├── appdsmartagent_64_linux_25.10.0.497.zip -├── config.ini -├── hosts.txt (optional reference) -├── README.md -├── DEPLOYMENT_GUIDE.md -├── ARCHITECTURE.md -└── .gitignore -``` - -## Maintenance - -### Updating Agent Version -1. Replace `appdsmartagent_64_linux_25.10.0.497.zip` with new version -2. Update filename references in workflow if version number changes -3. Commit and push to trigger deployment - -### Updating Configuration -1. Modify `config.ini` -2. Commit and push -3. Workflow will deploy updated configuration to all hosts - -## Repository - -This deployment solution is available at: -**https://github.com/chambear2809/github-actions-lab** - -Clone and adapt for your own AppDynamics Smart Agent deployments! +Install and run `actionlint` as an additional GitHub Actions syntax check when +available. diff --git a/README.md b/README.md index 8301767..006c085 100644 --- a/README.md +++ b/README.md @@ -43,8 +43,10 @@ cd github-actions-lab Navigate to: **Settings → Secrets and variables → Actions** -**Required Secret:** +**Required Secrets:** - `SSH_PRIVATE_KEY` - Your SSH private key (PEM format) +- `APPD_ACCOUNT_ACCESS_KEY` - Your AppDynamics account access key +- `CLIENT_INVENTORY_API_TOKEN` - Token sent as `X-SF-Token` for API checks ### 3ī¸âƒŖ Set Up GitHub Variables @@ -59,24 +61,27 @@ Navigate to: **Settings → Secrets and variables → Actions → Variables** ``` **Optional Variables:** +- `SSH_USER` - SSH user for target hosts (default: `ubuntu`) - `SMARTAGENT_USER` - User for Smart Agent service (e.g., `appdynamics`) - `SMARTAGENT_GROUP` - Group for Smart Agent service (e.g., `appdynamics`) +- `CLIENT_INVENTORY_API_BASE_URL` - Optional API base URL override +- `CLIENT_INVENTORY_SAMPLE_SIZE` - Optional client sample size (default: `1`) ### 4ī¸âƒŖ Deploy! **Via GitHub UI:** 1. Go to **Actions** tab -2. Select **"Deploy Smart Agent"** +2. Select **"1. Deploy Smart Agent"** 3. Click **"Run workflow"** 4. Optionally adjust batch size (default: 256) 5. Click **"Run workflow"** **Via GitHub CLI:** ```bash -gh workflow run "Deploy Smart Agent" --repo chambear2809/github-actions-lab +gh workflow run "1. Deploy Smart Agent" --repo chambear2809/github-actions-lab # With custom batch size -gh workflow run "Deploy Smart Agent" --repo chambear2809/github-actions-lab -f batch_size=128 +gh workflow run "1. Deploy Smart Agent" --repo chambear2809/github-actions-lab -f batch_size=128 ``` ## 📋 Available Workflows @@ -106,29 +111,37 @@ gh workflow run "Deploy Smart Agent" --repo chambear2809/github-actions-lab -f b | Workflow | Description | Scale | Trigger | |----------|-------------|-------|----------| | **Stop and Clean Smart Agent (Batched)** | Stops service and purges data | Any | Manual only | -| **Cleanup All Agents (Batched)** | Deletes /opt/appdynamics directory | Any | Manual only | +| **Cleanup Smart Agent Directory** | Clears /opt/appdynamics/appdsmartagent | Any | Manual only | -**Total: 11 workflows** - All batched workflows support configurable batch sizes (default: 256) +### API Validation (1 workflow) +| Workflow | Description | Trigger | +|----------|-------------|----------| +| **Check Client Inventory API** | Validates `openapi.json` and checks live API operations | Manual only | + +**Total: 12 workflows** - The 11 lifecycle workflows support configurable batch sizes (default: 256) ## 📚 Documentation - **[Deployment Guide](DEPLOYMENT_GUIDE.md)** - Complete setup and configuration instructions - **[Architecture Diagrams](ARCHITECTURE.md)** - Visual infrastructure and workflow diagrams +- **[AWS Remediation Notes](AWS_REMEDIATION.md)** - Current lab drift and hardening sequence ## đŸ› ī¸ How It Works -1. **Developer** pushes code or manually triggers a workflow +1. **Developer** manually triggers a workflow 2. **GitHub Actions** receives the event and assigns job to self-hosted runner 3. **Runner** loads target hosts from GitHub variables 4. **Parallel Execution** - Runner SSHs into each target host simultaneously 5. **Commands Execute** - Install/uninstall/stop/clean operations run on each host -6. **Results Reported** - Success/failure status sent back to GitHub +6. **API Check Runs** - Client Inventory API check runs in warning-only mode +7. **Results Reported** - Success/failure status sent back to GitHub ## 🔐 Security - **Private Network** - All communication via VPC private IPs -- **SSH Keys** - Stored securely as GitHub secrets -- **No Public Access** - Target hosts don't need public IPs +- **Secrets** - SSH, AppDynamics, and API credentials are stored as GitHub secrets +- **Host Keys** - Workflows build a per-run `known_hosts` file before SSH +- **Private-first** - Target hosts do not require public IPs for deployment - **Security Group** - Restricts SSH access to runner only ## 📈 Scaling diff --git a/openapi.json b/openapi.json new file mode 100644 index 0000000..422ff95 --- /dev/null +++ b/openapi.json @@ -0,0 +1,754 @@ +{ + "openapi": "3.0.3", + "info": { + "title": "Client Inventory API", + "description": "REST API for managing OpAMP client inventory.\n\nUse this API to browse registered clients, inspect individual client records, and retrieve the effective configuration currently associated with a client.\n\n## Authentication\nSend an access token in the `X-SF-Token` header.\n\nExample: `X-SF-Token: `\n\n## Base Path\nAll endpoints in this specification are relative to `/fm-service/v1`.\n\n## Endpoints\n- `GET /clients` List clients\n- `GET /clients/{id}` Get client by ID\n- `GET /clients/{id}/config` Get client effective configuration\n- `POST /clients/configs:batch` Batch get client effective configurations", + "version": "1.0.0", + "contact": { + "name": "Agent Management Team" + } + }, + "servers": [ + { + "url": "{protocol}://{host}/fm-service/v1", + "description": "Base URL for the Client Inventory API", + "variables": { + "protocol": { + "default": "https", + "enum": ["http", "https"] + }, + "host": { + "default": "localhost:50028" + } + } + } + ], + "tags": [ + { + "name": "clients", + "description": "Browse registered clients and retrieve effective client configuration" + } + ], + "paths": { + "/clients": { + "get": { + "tags": ["clients"], + "summary": "List clients", + "description": "Retrieve a paginated inventory of registered clients. Use metadata filters to narrow results for environments, services, regions, or any other client-reported attributes.\n\n### Attribute Filtering\n\nUse the `filter` parameter with `key:value` format. Repeat the parameter to apply multiple filters.\n\n**Filtering Logic:**\n- Different keys = **AND** logic\n- Same key with multiple values = **OR** logic\n\n### Health Data\n\nHealth data is included by default. For analytical or high-volume queries where health details are not needed, set `include_health=false`.\n\n### Example Queries\n\n- Basic pagination: `GET /clients?limit=100&offset=0`\n- Single filter: `GET /clients?filter=env:prod`\n- AND across different keys: `GET /clients?filter=env:prod&filter=region:us-east`\n- OR for repeated same key: `GET /clients?filter=region:us-east&filter=region:us-west`\n- Exclude health data: `GET /clients?filter=service.name:otel-collector&include_health=false`", + "operationId": "listClients", + "parameters": [ + { + "name": "limit", + "in": "query", + "description": "Size of page (number of items to return). The server enforces a configurable maximum (default: 1000).", + "schema": { + "type": "integer", + "minimum": 1, + "default": 100 + }, + "example": 100 + }, + { + "name": "offset", + "in": "query", + "description": "Offset for pagination (number of items to skip)", + "schema": { + "type": "integer", + "minimum": 0, + "default": 0 + }, + "example": 0 + }, + { + "name": "include_health", + "in": "query", + "description": "Whether to include health data in the response. Default: true. Set to false to exclude health data for analytical queries.", + "schema": { + "type": "boolean", + "default": true + }, + "example": false + }, + { + "name": "filter", + "in": "query", + "description": "Filter by client metadata attribute in `key:value` format. Repeat to apply multiple filters. Multiple values for the same key = OR logic; different keys = AND logic.", + "schema": { + "type": "array", + "items": { "type": "string" } + }, + "style": "form", + "explode": true, + "examples": { + "singleAttribute": { + "summary": "Filter by a single attribute", + "value": ["env:prod"] + }, + "andAcrossKeys": { + "summary": "AND logic across different attributes", + "value": ["env:prod", "region:us-east"] + }, + "orWithinKey": { + "summary": "OR logic for multiple values of the same attribute", + "value": ["region:us-east", "region:us-west"] + }, + "serviceFilter": { + "summary": "Filter by service name", + "value": ["service.name:otel-collector"] + } + } + } + ], + "responses": { + "200": { + "description": "Successful response with paginated client list", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ClientListResponse" + } + } + } + }, + "400": { + "$ref": "#/components/responses/BadRequest" + }, + "401": { + "$ref": "#/components/responses/Unauthorized" + }, + "403": { + "$ref": "#/components/responses/Forbidden" + }, + "500": { + "$ref": "#/components/responses/InternalServerError" + }, + "503": { + "$ref": "#/components/responses/ServiceUnavailable" + } + } + } + }, + "/clients/{id}": { + "get": { + "tags": ["clients"], + "summary": "Get client by ID", + "description": "Retrieve the current record for a single client by instance UID, including metadata, timestamps, and health information when available.", + "operationId": "getClient", + "parameters": [ + { + "name": "id", + "in": "path", + "required": true, + "description": "Client instance UID", + "schema": { + "type": "string" + }, + "example": "550e8400-e29b-41d4-a716-446655440000" + } + ], + "responses": { + "200": { + "description": "Successful response with client details", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Client" + } + } + } + }, + "401": { + "$ref": "#/components/responses/Unauthorized" + }, + "403": { + "$ref": "#/components/responses/Forbidden" + }, + "404": { + "$ref": "#/components/responses/NotFound" + }, + "500": { + "$ref": "#/components/responses/InternalServerError" + }, + "503": { + "$ref": "#/components/responses/ServiceUnavailable" + } + } + } + }, + "/clients/{id}/config": { + "get": { + "tags": ["clients"], + "summary": "Get client effective configuration", + "description": "Retrieve the effective configuration for a specific client. The response is keyed by config file name. Config file bodies are returned as base64-encoded bytes. Pass `stringify=true` to also receive UTF-8 string content for human-readable formats such as YAML or JSON.", + "operationId": "getClientConfig", + "parameters": [ + { + "name": "id", + "in": "path", + "required": true, + "description": "Client instance UID", + "schema": { + "type": "string" + }, + "example": "550e8400-e29b-41d4-a716-446655440000" + }, + { + "name": "stringify", + "in": "query", + "required": false, + "description": "When true, also returns config body as a UTF-8 string in the bodyStr field. Useful for human-readable formats such as application/yaml or application/json.", + "schema": { + "type": "boolean", + "default": false + }, + "example": true + } + ], + "responses": { + "200": { + "description": "Effective configuration for the client", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ClientConfigResponse" + } + } + } + }, + "400": { + "$ref": "#/components/responses/BadRequest" + }, + "401": { + "$ref": "#/components/responses/Unauthorized" + }, + "403": { + "$ref": "#/components/responses/Forbidden" + }, + "404": { + "$ref": "#/components/responses/NotFound" + }, + "500": { + "$ref": "#/components/responses/InternalServerError" + }, + "503": { + "$ref": "#/components/responses/ServiceUnavailable" + } + } + } + }, + "/clients/configs:batch": { + "post": { + "tags": ["clients"], + "summary": "Batch get client effective configurations", + "description": "Retrieve effective configurations for multiple clients in a single request. The response returns a map of `clientId` to configuration for found clients and lists missing client IDs in `notFound`.", + "operationId": "batchGetConfigs", + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/BatchGetConfigRequest" + }, + "examples": { + "basicBatch": { + "summary": "Fetch configs for multiple clients", + "value": { + "clientIds": [ + "550e8400-e29b-41d4-a716-446655440000", + "550e8400-e29b-41d4-a716-446655440001" + ] + } + }, + "stringifiedBatch": { + "summary": "Fetch configs and include UTF-8 string content", + "value": { + "clientIds": [ + "550e8400-e29b-41d4-a716-446655440000", + "550e8400-e29b-41d4-a716-446655440001" + ], + "stringify": true + } + } + } + } + } + }, + "responses": { + "200": { + "description": "Effective configurations for the requested clients", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/BatchGetConfigResponse" + } + } + } + }, + "400": { + "$ref": "#/components/responses/BadRequest" + }, + "401": { + "$ref": "#/components/responses/Unauthorized" + }, + "403": { + "$ref": "#/components/responses/Forbidden" + }, + "500": { + "$ref": "#/components/responses/InternalServerError" + }, + "503": { + "$ref": "#/components/responses/ServiceUnavailable" + } + } + } + } + }, + "components": { + "schemas": { + "Client": { + "type": "object", + "required": ["instance_uid", "org_id", "metadata", "created_at", "updated_at"], + "properties": { + "instance_uid": { + "type": "string", + "description": "Unique client identifier", + "example": "550e8400-e29b-41d4-a716-446655440000" + }, + "org_id": { + "type": "string", + "description": "Organization ID the client belongs to", + "example": "acme-corp" + }, + "metadata": { + "type": "object", + "additionalProperties": { + "type": "string" + }, + "description": "Flat key-value attributes reported by the OpAMP client. Includes both identifying and non-identifying attributes.", + "example": { + "service.name": "otel-collector", + "service.version": "0.95.0", + "host.name": "prod-collector-01", + "os.type": "linux" + } + }, + "health": { + "description": "Component health status from OpAMP ComponentHealth proto. For list: included by default, exclude with include_health=false. For get: always included if available.", + "allOf": [ + { "$ref": "#/components/schemas/ComponentHealth" } + ] + }, + "created_at": { + "type": "string", + "format": "date-time", + "description": "Timestamp when client was first registered", + "example": "2024-01-15T10:30:00Z" + }, + "updated_at": { + "type": "string", + "format": "date-time", + "description": "Timestamp of last state change from client (metadata, config, health, status, etc.)", + "example": "2024-01-28T14:22:35Z" + }, + "last_heartbeat_at": { + "type": "string", + "format": "date-time", + "description": "Timestamp when the server last received a heartbeat from this client.", + "example": "2024-01-28T14:25:10Z" + } + } + }, + "ComponentHealth": { + "type": "object", + "description": "The health of the agent and any nested sub-components. The component_health_map can be nested to represent multi-level component hierarchies.", + "properties": { + "healthy": { + "type": "boolean", + "description": "Set to true if the component is up and healthy", + "example": true + }, + "start_time_unix_nano": { + "type": "string", + "format": "int64", + "description": "Timestamp since the component is up, UNIX Epoch time in nanoseconds.", + "example": "1705323000000000000" + }, + "last_error": { + "type": "string", + "description": "Human-readable error message if the component is in erroneous state", + "example": "Failed to connect to backend: connection timeout" + }, + "status": { + "type": "string", + "description": "Component status represented as a string.", + "example": "running" + }, + "status_time_unix_nano": { + "type": "string", + "format": "int64", + "description": "The time when the component status was observed, UNIX Epoch time in nanoseconds.", + "example": "1706447355000000000" + }, + "component_health_map": { + "type": "object", + "additionalProperties": { + "$ref": "#/components/schemas/ComponentHealth" + }, + "description": "Map of nested component name to its health object. Values use the same recursive ComponentHealth schema.", + "example": { + "receiver": { + "healthy": false, + "last_error": "receiver-error", + "status": "failing", + "status_time_unix_nano": "33" + } + } + } + }, + "example": { + "healthy": true, + "start_time_unix_nano": "11", + "last_error": "top-level-error", + "status": "degraded", + "status_time_unix_nano": "22", + "component_health_map": { + "receiver": { + "healthy": false, + "last_error": "receiver-error", + "status": "failing", + "status_time_unix_nano": "33" + } + } + } + }, + "ClientListResponse": { + "type": "object", + "required": ["clients", "pagination"], + "properties": { + "clients": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Client" + } + }, + "pagination": { + "$ref": "#/components/schemas/PaginationMetadata" + } + } + }, + "PaginationMetadata": { + "type": "object", + "required": ["limit", "offset", "total_count"], + "properties": { + "limit": { + "type": "integer", + "description": "Number of items requested per page", + "example": 100 + }, + "offset": { + "type": "integer", + "description": "Number of items skipped", + "example": 0 + }, + "total_count": { + "type": "integer", + "description": "Total number of items matching the query", + "example": 42573 + }, + "has_next": { + "type": "boolean", + "description": "Whether there are more items available", + "example": true + }, + "next_offset": { + "type": "integer", + "description": "Offset value for the next page (only present if has_next is true)", + "example": 100 + } + } + }, + "AgentConfigFile": { + "type": "object", + "description": "A single config file entry within a client's effective configuration.", + "properties": { + "body": { + "type": "string", + "format": "byte", + "description": "Raw config file content, base64-encoded." + }, + "bodyStr": { + "type": "string", + "description": "Config file content as a UTF-8 string. Only present when stringify=true was requested." + }, + "contentType": { + "type": "string", + "description": "MIME type of the config content.", + "example": "application/yaml" + } + } + }, + "ClientConfigResponse": { + "type": "object", + "description": "Effective configuration for a single client. Keys are config file names, values are the config file contents.", + "additionalProperties": { + "$ref": "#/components/schemas/AgentConfigFile" + }, + "example": { + "controller-info.xml": { + "body": "cmVjZWl2ZXJzOgogIG90bHA6IHt9Cg==", + "bodyStr": "receivers:\n otlp: {}\n", + "contentType": "application/yaml" + } + } + }, + "BatchGetConfigRequest": { + "type": "object", + "required": ["clientIds"], + "properties": { + "clientIds": { + "type": "array", + "items": { + "type": "string" + }, + "description": "List of client instance UIDs to fetch effective configurations for. The server enforces a configurable maximum number of client IDs per request (default: 1000).", + "example": ["550e8400-e29b-41d4-a716-446655440000", "550e8400-e29b-41d4-a716-446655440001"] + }, + "stringify": { + "type": "boolean", + "default": false, + "description": "When true, also returns each config body as a UTF-8 string in the bodyStr field." + } + } + }, + "BatchGetConfigResponse": { + "type": "object", + "properties": { + "configs": { + "type": "object", + "description": "Map of clientId to its effective configuration.", + "additionalProperties": { + "$ref": "#/components/schemas/ClientConfigResponse" + } + }, + "notFound": { + "type": "array", + "items": { + "type": "string" + }, + "description": "Client IDs from the request that were not found." + } + } + }, + "Error": { + "type": "object", + "required": ["error"], + "properties": { + "error": { + "type": "object", + "required": ["code", "message"], + "properties": { + "code": { + "type": "string", + "description": "Machine-readable error code", + "example": "BAD_REQUEST" + }, + "message": { + "type": "string", + "description": "Human-readable error message", + "example": "Bad Request" + }, + "details": { + "type": "object", + "additionalProperties": true, + "description": "Additional error details", + "example": {} + } + } + } + }, + "example": { + "error": { + "code": "BAD_REQUEST", + "message": "Bad Request", + "details": {} + } + } + } + }, + "responses": { + "Unauthorized": { + "description": "Authentication required or the provided X-SF-Token is invalid", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Error" + }, + "examples": { + "missingToken": { + "summary": "No credentials were provided", + "value": { + "error": { + "code": "AUTH_TOKEN_MISSING", + "message": "No valid credentials provided", + "details": {} + } + } + }, + "malformedToken": { + "summary": "Credentials are malformed or unparseable", + "value": { + "error": { + "code": "AUTH_TOKEN_MALFORMED", + "message": "Credentials are malformed or unparseable", + "details": {} + } + } + } + } + } + } + }, + "Forbidden": { + "description": "Access denied", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Error" + }, + "examples": { + "missingScope": { + "summary": "Caller does not have the required scope", + "value": { + "error": { + "code": "AUTH_SCOPE_MISSING", + "message": "Required permission scope is missing", + "details": {} + } + } + }, + "filterNoMatch": { + "summary": "Authorization filter denies access", + "value": { + "error": { + "code": "AUTH_FILTER_NO_MATCH", + "message": "Access denied by authorization filter", + "details": {} + } + } + }, + "featureFlagDenied": { + "summary": "Feature is not enabled for the organization", + "value": { + "error": { + "code": "FORBIDDEN", + "message": "feature not available for this organization", + "details": {} + } + } + } + } + } + } + }, + "BadRequest": { + "description": "Application-level validation error. This documented response describes the standard application-level error envelope. In rare lower-level failure paths, the server may return a non-JSON 400 response instead.", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Error" + }, + "examples": { + "badRequest": { + "summary": "Application-level request validation failed", + "value": { + "error": { + "code": "BAD_REQUEST", + "message": "Bad Request", + "details": {} + } + } + } + } + } + } + }, + "NotFound": { + "description": "Resource not found", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Error" + }, + "examples": { + "notFound": { + "summary": "The requested client does not exist or is not visible to the caller", + "value": { + "error": { + "code": "NOT_FOUND", + "message": "Not Found", + "details": {} + } + } + } + } + } + } + }, + "InternalServerError": { + "description": "Internal server error. This documented response describes the standard application-level error envelope. In rare lower-level failure paths, the server may return a non-JSON 500 response instead.", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Error" + }, + "examples": { + "internalServerError": { + "summary": "Unexpected server-side failure", + "value": { + "error": { + "code": "INTERNAL_SERVER_ERROR", + "message": "Internal Server Error", + "details": {} + } + } + } + } + } + } + }, + "ServiceUnavailable": { + "description": "A dependent service required for authentication is unavailable", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Error" + }, + "examples": { + "authProviderUnavailable": { + "summary": "Authentication provider is unavailable", + "value": { + "error": { + "code": "AUTH_PROVIDER_ERROR", + "message": "Authentication service unavailable", + "details": {} + } + } + } + } + } + } + } + }, + "securitySchemes": { + "XSFToken": { + "type": "apiKey", + "in": "header", + "name": "X-SF-Token", + "description": "Access token for API" + } + } + }, + "security": [ + { "XSFToken": [] } + ] +}