- remove hardcoded credentials, tokens, registration codes, and similar secret values from tracked ATVM and CDS MCP docs - replace those values with references to /home/aw/code/cds/.env.credentials.local and the corresponding environment variable names - update current operator guides to instruct sourcing .env.credentials.local before credential-dependent setup and automation workflows - update the ATVM setup scripts to consume ATVM_TARGET_PASSWORD from the environment instead of hardcoding the Ubuntu root SSH password - scrub the remaining tracked artifact log entry that still included the old CMC registration code - keep the local-only credential inventory in .env.credentials.local while leaving that file untracked
239 lines
7.9 KiB
Bash
Executable File
239 lines
7.9 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
|
|
set -euo pipefail
|
|
|
|
WORKSPACE_ROOT="${WORKSPACE_ROOT:-/home/aw/code/cds}"
|
|
ENV_CREDENTIALS_FILE="${ENV_CREDENTIALS_FILE:-$WORKSPACE_ROOT/.env.credentials.local}"
|
|
|
|
if [[ -f "$ENV_CREDENTIALS_FILE" ]]; then
|
|
# Load local-only credential defaults for controller-side SSH and remote setup.
|
|
# shellcheck disable=SC1090
|
|
source "$ENV_CREDENTIALS_FILE"
|
|
fi
|
|
|
|
REMOTE_IP_PRIMARY="${REMOTE_IP_PRIMARY:-192.168.0.121}"
|
|
REMOTE_IP_SECONDARY="${REMOTE_IP_SECONDARY:-192.168.3.191}"
|
|
REMOTE_USER="${REMOTE_USER:-${ATVM_TARGET_USER:-root}}"
|
|
PROJECT_DIR="${PROJECT_DIR:-/home/aw/code/atvm}"
|
|
LOCAL_LOG_DIR="${LOCAL_LOG_DIR:-$PROJECT_DIR/log}"
|
|
LOCAL_SETUP_SCRIPT="${LOCAL_SETUP_SCRIPT:-$PROJECT_DIR/atvm_setup_script.sh}"
|
|
REMOTE_SETUP_SCRIPT="${REMOTE_SETUP_SCRIPT:-/root/atvm_setup_script.sh}"
|
|
REMOTE_LOG_FILE="${REMOTE_LOG_FILE:-/root/atvm_setup_script.log}"
|
|
WAIT_TIMEOUT_SECONDS="${WAIT_TIMEOUT_SECONDS:-600}"
|
|
MODE="${1:-run-and-collect}"
|
|
EXPECTED_IP_ARG="${EXPECTED_IP_ARG:-}"
|
|
EXPECTED_HOSTNAME_ARG="${EXPECTED_HOSTNAME_ARG:-}"
|
|
ATVM_PASSWORD="${ATVM_PASSWORD:-${ATVM_TARGET_PASSWORD:-}}"
|
|
|
|
SSH_OPTS=(-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ConnectTimeout=5)
|
|
|
|
if [[ ! -f "$LOCAL_SETUP_SCRIPT" ]]; then
|
|
echo "ERROR: Local setup script not found: $LOCAL_SETUP_SCRIPT" >&2
|
|
exit 1
|
|
fi
|
|
|
|
mkdir -p "$LOCAL_LOG_DIR"
|
|
|
|
if ! command -v ssh >/dev/null 2>&1 || ! command -v scp >/dev/null 2>&1; then
|
|
echo "ERROR: ssh/scp is required." >&2
|
|
exit 1
|
|
fi
|
|
|
|
SSH_CMD=(ssh "${SSH_OPTS[@]}")
|
|
SCP_CMD=(scp "${SSH_OPTS[@]}")
|
|
|
|
if [[ -n "${ATVM_PASSWORD:-}" ]]; then
|
|
if command -v sshpass >/dev/null 2>&1; then
|
|
SSH_CMD=(sshpass -p "$ATVM_PASSWORD" ssh "${SSH_OPTS[@]}")
|
|
SCP_CMD=(sshpass -p "$ATVM_PASSWORD" scp "${SSH_OPTS[@]}")
|
|
else
|
|
echo "WARNING: ATVM_PASSWORD is set, but sshpass is not installed. Falling back to interactive password prompts."
|
|
fi
|
|
fi
|
|
|
|
run_ssh() {
|
|
local host="$1"
|
|
shift
|
|
"${SSH_CMD[@]}" "${REMOTE_USER}@${host}" "$@"
|
|
}
|
|
|
|
run_scp_to_remote() {
|
|
local src="$1"
|
|
local host="$2"
|
|
local dst="$3"
|
|
"${SCP_CMD[@]}" "$src" "${REMOTE_USER}@${host}:${dst}"
|
|
}
|
|
|
|
run_scp_from_remote() {
|
|
local host="$1"
|
|
local src="$2"
|
|
local dst="$3"
|
|
"${SCP_CMD[@]}" "${REMOTE_USER}@${host}:${src}" "$dst"
|
|
}
|
|
|
|
wait_for_reachable_host() {
|
|
local start_ts current_ts elapsed
|
|
start_ts="$(date +%s)"
|
|
|
|
while true; do
|
|
for host in "$REMOTE_IP_PRIMARY" "$REMOTE_IP_SECONDARY"; do
|
|
if run_ssh "$host" "echo ready" >/dev/null 2>&1; then
|
|
echo "$host"
|
|
return 0
|
|
fi
|
|
done
|
|
|
|
current_ts="$(date +%s)"
|
|
elapsed=$((current_ts - start_ts))
|
|
if (( elapsed >= WAIT_TIMEOUT_SECONDS )); then
|
|
return 1
|
|
fi
|
|
sleep 5
|
|
done
|
|
}
|
|
|
|
pick_initial_host() {
|
|
for host in "$REMOTE_IP_PRIMARY" "$REMOTE_IP_SECONDARY"; do
|
|
if run_ssh "$host" "echo ready" >/dev/null 2>&1; then
|
|
echo "$host"
|
|
return 0
|
|
fi
|
|
done
|
|
return 1
|
|
}
|
|
|
|
wait_for_completed_task() {
|
|
local start_ts current_ts elapsed
|
|
start_ts="$(date +%s)"
|
|
|
|
while true; do
|
|
for host in "$REMOTE_IP_PRIMARY" "$REMOTE_IP_SECONDARY"; do
|
|
if run_ssh "$host" "test -f '$REMOTE_LOG_FILE' && grep -q 'SUCCESS: ATVM VM Setup Complete!' '$REMOTE_LOG_FILE'" >/dev/null 2>&1; then
|
|
echo "$host"
|
|
return 0
|
|
fi
|
|
done
|
|
|
|
current_ts="$(date +%s)"
|
|
elapsed=$((current_ts - start_ts))
|
|
if (( elapsed >= WAIT_TIMEOUT_SECONDS )); then
|
|
return 1
|
|
fi
|
|
sleep 5
|
|
done
|
|
}
|
|
|
|
wait_for_host_offline() {
|
|
local host="$1"
|
|
local start_ts current_ts elapsed
|
|
start_ts="$(date +%s)"
|
|
|
|
while true; do
|
|
if ! run_ssh "$host" "echo still-up" >/dev/null 2>&1; then
|
|
return 0
|
|
fi
|
|
|
|
current_ts="$(date +%s)"
|
|
elapsed=$((current_ts - start_ts))
|
|
if (( elapsed >= WAIT_TIMEOUT_SECONDS )); then
|
|
return 1
|
|
fi
|
|
sleep 5
|
|
done
|
|
}
|
|
|
|
if [[ "$MODE" != "run-and-collect" && "$MODE" != "--collect-after-complete" ]]; then
|
|
echo "Usage:"
|
|
echo " $0 # run setup on client, then collect log"
|
|
echo " $0 --collect-after-complete # wait for completed client task, then collect log only"
|
|
exit 1
|
|
fi
|
|
|
|
if [[ "$MODE" == "run-and-collect" ]]; then
|
|
if [[ -z "$EXPECTED_IP_ARG" || -z "$EXPECTED_HOSTNAME_ARG" ]]; then
|
|
echo "ERROR: run-and-collect requires EXPECTED_IP_ARG and EXPECTED_HOSTNAME_ARG." >&2
|
|
echo "Example:" >&2
|
|
echo " EXPECTED_IP_ARG=192.168.0.121 EXPECTED_HOSTNAME_ARG=atvm-codextest-vm $0" >&2
|
|
exit 1
|
|
fi
|
|
|
|
INITIAL_HOST="$(pick_initial_host)" || {
|
|
echo "ERROR: Could not reach ${REMOTE_IP_PRIMARY} or ${REMOTE_IP_SECONDARY} for initial setup." >&2
|
|
exit 1
|
|
}
|
|
|
|
echo "Copying setup script to ${REMOTE_USER}@${INITIAL_HOST}:${REMOTE_SETUP_SCRIPT}"
|
|
run_scp_to_remote "$LOCAL_SETUP_SCRIPT" "$INITIAL_HOST" "$REMOTE_SETUP_SCRIPT"
|
|
|
|
echo "Running remote setup script on ${INITIAL_HOST} (disconnect is expected during IP/reboot steps)"
|
|
set +e
|
|
run_ssh "$INITIAL_HOST" "chmod +x '$REMOTE_SETUP_SCRIPT' && ATVM_TARGET_PASSWORD='${ATVM_TARGET_PASSWORD:-}' bash '$REMOTE_SETUP_SCRIPT' --expected-ip '$EXPECTED_IP_ARG' --expected-hostname '$EXPECTED_HOSTNAME_ARG'"
|
|
run_status=$?
|
|
set -e
|
|
if (( run_status != 0 )); then
|
|
echo "INFO: Remote run returned non-zero (${run_status}). Continuing because network reconfiguration/reboot can interrupt SSH."
|
|
fi
|
|
|
|
echo "Waiting for completed client task marker in ${REMOTE_LOG_FILE} (timeout: ${WAIT_TIMEOUT_SECONDS}s)"
|
|
ACTIVE_HOST="$(wait_for_completed_task)" || {
|
|
echo "ERROR: Could not detect completed task marker in remote log within timeout." >&2
|
|
exit 1
|
|
}
|
|
else
|
|
echo "Waiting for completed client task marker in ${REMOTE_LOG_FILE} (timeout: ${WAIT_TIMEOUT_SECONDS}s)"
|
|
ACTIVE_HOST="$(wait_for_completed_task)" || {
|
|
echo "ERROR: Could not detect completed task marker in remote log within timeout." >&2
|
|
exit 1
|
|
}
|
|
fi
|
|
|
|
echo "Host reachable at: ${ACTIVE_HOST}"
|
|
|
|
REMOTE_HOSTNAME="$(run_ssh "$ACTIVE_HOST" "hostname" | tr -d '\r' | tail -n1)"
|
|
RUN_TS="$(date +%Y%m%d_%H%M%S)"
|
|
LOCAL_LOG_FILE="${LOCAL_LOG_DIR}/atvm_configuration_${REMOTE_HOSTNAME}_${RUN_TS}.log"
|
|
|
|
echo "Collecting remote log: ${REMOTE_LOG_FILE}"
|
|
run_scp_from_remote "$ACTIVE_HOST" "$REMOTE_LOG_FILE" "$LOCAL_LOG_FILE"
|
|
|
|
REMOTE_HASH="$(run_ssh "$ACTIVE_HOST" "sha256sum '$REMOTE_LOG_FILE' | awk '{print \$1}'" | tr -d '\r' | tail -n1)"
|
|
LOCAL_HASH="$(sha256sum "$LOCAL_LOG_FILE" | awk '{print $1}')"
|
|
|
|
if [[ "$REMOTE_HASH" != "$LOCAL_HASH" ]]; then
|
|
echo "ERROR: Hash mismatch after log copy." >&2
|
|
echo "Remote: $REMOTE_HASH" >&2
|
|
echo "Local: $LOCAL_HASH" >&2
|
|
exit 1
|
|
fi
|
|
|
|
HAS_ERRORS_IN_LOG=false
|
|
# Match only real error log records. Do not match instructional text that mentions "[ERROR]".
|
|
if run_ssh "$ACTIVE_HOST" "grep -Eq '^\\[ERROR\\]' '$REMOTE_LOG_FILE'"; then
|
|
HAS_ERRORS_IN_LOG=true
|
|
fi
|
|
|
|
if [[ "$HAS_ERRORS_IN_LOG" == true ]]; then
|
|
echo "WARNING: [ERROR] entries detected in remote log. VM will remain powered on for manual inspection."
|
|
else
|
|
echo "Log indicates success with no [ERROR] entries. Powering off ${ACTIVE_HOST}."
|
|
set +e
|
|
run_ssh "$ACTIVE_HOST" "shutdown -h now"
|
|
shutdown_status=$?
|
|
set -e
|
|
if (( shutdown_status != 0 )); then
|
|
echo "INFO: Shutdown command returned non-zero (${shutdown_status}); this can occur if SSH disconnects during shutdown."
|
|
fi
|
|
|
|
echo "Waiting for ${ACTIVE_HOST} to go offline (timeout: ${WAIT_TIMEOUT_SECONDS}s)"
|
|
if wait_for_host_offline "$ACTIVE_HOST"; then
|
|
echo "Power-off confirmed: ${ACTIVE_HOST} is offline."
|
|
else
|
|
echo "WARNING: Could not confirm ${ACTIVE_HOST} offline within timeout."
|
|
fi
|
|
fi
|
|
|
|
echo "Success"
|
|
echo "Active host: ${ACTIVE_HOST}"
|
|
echo "Local log: ${LOCAL_LOG_FILE}"
|
|
echo "SHA256: ${LOCAL_HASH}"
|