1329 lines
52 KiB
Python
1329 lines
52 KiB
Python
#!/usr/bin/env python3
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import ast
|
|
import json
|
|
import os
|
|
import re
|
|
import subprocess
|
|
import sys
|
|
import time
|
|
import urllib.request
|
|
import xml.etree.ElementTree as ET
|
|
from dataclasses import dataclass
|
|
from datetime import datetime, timedelta, timezone
|
|
from pathlib import Path
|
|
from typing import Dict, List, Optional, Tuple
|
|
|
|
|
|
RUN_STATES = {
|
|
"COMPLETED",
|
|
"FAILED",
|
|
"CANCELLED",
|
|
"TERMINATED",
|
|
"HUNG",
|
|
"UNKNOWN",
|
|
"RUNNING",
|
|
}
|
|
|
|
LOCAL_TZ = datetime.now().astimezone().tzinfo or timezone.utc
|
|
|
|
|
|
DEFAULT_TEST_FLOW = [
|
|
"1. Test flow is template-specific for this run",
|
|
"2. Verifying set up",
|
|
"3. Execute the requested template workflow",
|
|
"4. Validate expected run outcome",
|
|
"5. Power off",
|
|
]
|
|
|
|
|
|
TEMPLATE_TEST_FLOWS = {
|
|
"cmc-e2e": [
|
|
"1. Verifying set up",
|
|
"2. Power on and obtain ip address and host name",
|
|
"3. Uninstall CMC if still exists",
|
|
"4. Setting up disk on the host",
|
|
"5. Copy CMC install command from GUI",
|
|
"6. Install CMC",
|
|
"7. Create migration session",
|
|
"8. Tracking Changes",
|
|
"9. Trigger cmotion and do I/O test before actual cutover",
|
|
"10. Verify data for cmotion",
|
|
"11. Trigger revert cmotion and do I/O test before and during cmotion",
|
|
"12. Verify data for revert cmotion",
|
|
"13. Trigger cmotion again",
|
|
"14. Finalize cutover",
|
|
"15. Create migration report",
|
|
"16. Delete migration session",
|
|
"17. Verify local destination disk",
|
|
"18. Remove enabled FC integration",
|
|
"19. Remove host and volumes",
|
|
"20. Uninstall CMC",
|
|
"21. Clean up iSCSI targets",
|
|
"22. Power off",
|
|
],
|
|
}
|
|
|
|
|
|
@dataclass
|
|
class HostResult:
|
|
host: str
|
|
kernel: str
|
|
status: str
|
|
detail: str
|
|
tests: int = 0
|
|
failures: int = 0
|
|
duration_seconds: Optional[float] = None
|
|
timestamp: Optional[datetime] = None
|
|
|
|
|
|
@dataclass
|
|
class SubRun:
|
|
key: str
|
|
display_name: str
|
|
started_at: datetime
|
|
expected_hosts: List[str]
|
|
completed: bool
|
|
currents_url: Optional[str]
|
|
notes: List[str]
|
|
|
|
|
|
def now_utc() -> datetime:
|
|
return datetime.now(timezone.utc)
|
|
|
|
|
|
def parse_args() -> argparse.Namespace:
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("--build-name", required=True)
|
|
parser.add_argument("--run-log", required=True)
|
|
parser.add_argument("--reporter-root", required=True)
|
|
parser.add_argument("--inventory-file", required=True)
|
|
parser.add_argument("--state-dir", required=True)
|
|
parser.add_argument("--poll-interval", type=int, default=30)
|
|
parser.add_argument("--max-watch-seconds", type=int, default=6 * 60 * 60)
|
|
parser.add_argument("--process-exit-grace-seconds", type=int, default=120)
|
|
return parser.parse_args()
|
|
|
|
|
|
def ensure_dir(path: Path) -> None:
|
|
path.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
|
def load_inventory(path: Path) -> Dict[str, str]:
|
|
kernels: Dict[str, str] = {}
|
|
if not path.exists():
|
|
return kernels
|
|
for line in path.read_text(encoding="utf-8").splitlines():
|
|
if not line.startswith("|"):
|
|
continue
|
|
parts = [part.strip() for part in line.strip().strip("|").split("|")]
|
|
if len(parts) < 3 or parts[0] in {"OS", "---"}:
|
|
continue
|
|
host = parts[1]
|
|
kernel = parts[2]
|
|
kernels[host] = kernel or "unknown"
|
|
return kernels
|
|
|
|
|
|
def run_ps() -> str:
|
|
proc = subprocess.run(
|
|
["ps", "-eo", "pid=,args="],
|
|
capture_output=True,
|
|
text=True,
|
|
check=True,
|
|
)
|
|
return proc.stdout
|
|
|
|
|
|
def process_active(build_name: str) -> bool:
|
|
output = run_ps()
|
|
for line in output.splitlines():
|
|
if "run-sorry-cypress.py" in line and f"--build_name {build_name}" in line:
|
|
return True
|
|
return False
|
|
|
|
|
|
def related_process_active(build_name: str) -> bool:
|
|
output = run_ps()
|
|
for line in output.splitlines():
|
|
if build_name not in line:
|
|
continue
|
|
if any(token in line for token in ("run-sorry-cypress.py", "cypress-cloud", "node ")):
|
|
return True
|
|
return False
|
|
|
|
|
|
def extract_active_subrun_build(build_name: str) -> Optional[str]:
|
|
output = run_ps()
|
|
matches: List[str] = []
|
|
for line in output.splitlines():
|
|
if build_name not in line or "--ci-build-id" not in line:
|
|
continue
|
|
match = re.search(r"--ci-build-id\s+(\S+)", line)
|
|
if match:
|
|
matches.append(match.group(1))
|
|
return matches[-1] if matches else None
|
|
|
|
|
|
def read_text(path: Path) -> str:
|
|
try:
|
|
return path.read_text(encoding="utf-8", errors="replace")
|
|
except FileNotFoundError:
|
|
return ""
|
|
|
|
|
|
def extract_expected_hosts(log_text: str) -> List[str]:
|
|
hosts: List[str] = []
|
|
spec_match = re.search(r'Extracted specPattern:\s*(\[[^\n]+\])', log_text)
|
|
if spec_match:
|
|
try:
|
|
spec_list = ast.literal_eval(spec_match.group(1))
|
|
except (SyntaxError, ValueError):
|
|
spec_list = []
|
|
for entry in spec_list:
|
|
if not isinstance(entry, str):
|
|
continue
|
|
match = re.search(r"(atvm[^/\s]+)\.ts$", entry)
|
|
if match:
|
|
host = match.group(1)
|
|
if host not in hosts:
|
|
hosts.append(host)
|
|
for match in re.finditer(r"Running:\s+(?:cypress/cmcRegressionTest/)?(atvm[^/\s]+)\.ts", log_text):
|
|
host = match.group(1)
|
|
if host not in hosts:
|
|
hosts.append(host)
|
|
return hosts
|
|
|
|
|
|
def extract_currents_url(log_text: str) -> Optional[str]:
|
|
match = re.search(r"(https://\S+/run/\S+)", log_text)
|
|
return match.group(1) if match else None
|
|
|
|
|
|
def load_state(state_file: Path) -> Dict[str, object]:
|
|
if not state_file.exists():
|
|
return {}
|
|
try:
|
|
return json.loads(state_file.read_text(encoding="utf-8"))
|
|
except json.JSONDecodeError:
|
|
return {}
|
|
|
|
|
|
def write_state(state_file: Path, state: Dict[str, object]) -> None:
|
|
state_file.write_text(json.dumps(state, indent=2, sort_keys=True), encoding="utf-8")
|
|
|
|
|
|
def parse_xml_timestamp(raw: Optional[str]) -> Optional[datetime]:
|
|
if not raw:
|
|
return None
|
|
try:
|
|
return datetime.fromisoformat(raw).replace(tzinfo=timezone.utc)
|
|
except ValueError:
|
|
return None
|
|
|
|
|
|
def parse_log_timestamp(raw: str) -> Optional[datetime]:
|
|
try:
|
|
return datetime.strptime(raw, "%Y-%m-%d %H:%M:%S,%f").replace(tzinfo=LOCAL_TZ).astimezone(timezone.utc)
|
|
except ValueError:
|
|
return None
|
|
|
|
|
|
def first_log_timestamp(log_text: str) -> Optional[datetime]:
|
|
match = re.search(r"^(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3}) - INFO - ", log_text, re.M)
|
|
if not match:
|
|
return None
|
|
return parse_log_timestamp(match.group(1))
|
|
|
|
|
|
def parse_host_xml(xml_path: Path) -> Optional[Tuple[str, HostResult]]:
|
|
try:
|
|
tree = ET.parse(xml_path)
|
|
except ET.ParseError:
|
|
return None
|
|
root = tree.getroot()
|
|
suites = root.findall("testsuite")
|
|
best: Optional[Tuple[str, int, int, float, Optional[datetime]]] = None
|
|
for suite in suites:
|
|
file_attr = suite.attrib.get("file", "")
|
|
suite_name = suite.attrib.get("name", "")
|
|
host_from_file = None
|
|
host_from_name = None
|
|
if file_attr.startswith("cypress/cmcRegressionTest/atvm") and file_attr.endswith(".ts"):
|
|
host_from_file = Path(file_attr).stem
|
|
name_match = re.search(r"(atvm[^)\s]+)", suite_name)
|
|
if name_match:
|
|
host_from_name = name_match.group(1)
|
|
host_name = host_from_file or host_from_name
|
|
if not host_name:
|
|
continue
|
|
tests = int(float(suite.attrib.get("tests", root.attrib.get("tests", "0"))))
|
|
failures = int(float(suite.attrib.get("failures", root.attrib.get("failures", "0"))))
|
|
total_time = float(suite.attrib.get("time", root.attrib.get("time", "0")))
|
|
timestamp = parse_xml_timestamp(suite.attrib.get("timestamp"))
|
|
candidate = (host_name, tests, failures, total_time, timestamp)
|
|
if best is None:
|
|
best = candidate
|
|
continue
|
|
_, best_tests, _, best_total_time, _ = best
|
|
if tests > best_tests or (tests == best_tests and total_time >= best_total_time):
|
|
best = candidate
|
|
if not best:
|
|
return None
|
|
file_name, tests, failures, total_time, timestamp = best
|
|
detail = f"{tests} tests, {failures} failures"
|
|
status = "FAIL" if failures else "PASS"
|
|
return file_name, HostResult(
|
|
host=file_name,
|
|
kernel="unknown",
|
|
status=status,
|
|
detail=detail,
|
|
tests=tests,
|
|
failures=failures,
|
|
duration_seconds=total_time,
|
|
timestamp=timestamp,
|
|
)
|
|
|
|
|
|
def extract_check_xml_timestamp_from_file(xml_path: Path) -> Optional[datetime]:
|
|
try:
|
|
tree = ET.parse(xml_path)
|
|
except ET.ParseError:
|
|
return None
|
|
root = tree.getroot()
|
|
for suite in root.findall("testsuite"):
|
|
file_attr = suite.attrib.get("file", "")
|
|
if file_attr.endswith("check-xml-files.ts"):
|
|
return parse_xml_timestamp(suite.attrib.get("timestamp"))
|
|
return None
|
|
|
|
|
|
def parse_duration_seconds(raw: str) -> Optional[float]:
|
|
raw = " ".join(raw.split())
|
|
if re.fullmatch(r"\d+:\d+(?::\d+)?", raw):
|
|
parts = [int(part) for part in raw.split(":")]
|
|
if len(parts) == 2:
|
|
minutes, seconds = parts
|
|
return minutes * 60 + seconds
|
|
if len(parts) == 3:
|
|
hours, minutes, seconds = parts
|
|
return hours * 3600 + minutes * 60 + seconds
|
|
match = re.search(r"(?:(\d+)h\s+)?(?:(\d+)m\s+)?(\d+(?:\.\d+)?)(?:s)?", raw)
|
|
if not match:
|
|
return None
|
|
hours = int(match.group(1) or 0)
|
|
minutes = int(match.group(2) or 0)
|
|
seconds = float(match.group(3))
|
|
return hours * 3600 + minutes * 60 + seconds
|
|
|
|
|
|
def extract_host_results_from_run_finished_segment(segment_text: str, inventory: Dict[str, str]) -> Dict[str, HostResult]:
|
|
host_results: Dict[str, HostResult] = {}
|
|
normalized = re.sub(r"\n\s*│\s*s\s*│", "s", segment_text)
|
|
for host_match in re.finditer(
|
|
r"✔\s+(atvm[^\s]+)\.ts\s+([0-9:hms.\s]+?)\s+(\d+)\s+(\d+)\s+([-\d]+)\s+([-\d]+)\s+([-\d]+)",
|
|
normalized,
|
|
re.S,
|
|
):
|
|
host = host_match.group(1)
|
|
duration_seconds = parse_duration_seconds(host_match.group(2))
|
|
tests = int(host_match.group(3))
|
|
passing = int(host_match.group(4))
|
|
failing = 0 if host_match.group(5) == "-" else int(host_match.group(5))
|
|
detail = f"{tests} tests, {failing} failures"
|
|
status = "FAIL" if failing else "PASS"
|
|
host_results[host] = HostResult(
|
|
host=host,
|
|
kernel=inventory.get(host, "unknown"),
|
|
status=status,
|
|
detail=detail,
|
|
tests=tests,
|
|
failures=failing,
|
|
duration_seconds=duration_seconds,
|
|
)
|
|
return host_results
|
|
|
|
|
|
def extract_completed_subrun_summaries(log_text: str, inventory: Dict[str, str]) -> List[Dict[str, object]]:
|
|
summaries: List[Dict[str, object]] = []
|
|
cloud_blocks = list(
|
|
re.finditer(
|
|
r"Cloud Run Finished(.*?)(?:🏁 Recorded Run:\s*(https://\S+)|\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3} - INFO - Detected 'Recorded Run' after 'Cloud Run Finished' - results uploaded successfully\.)",
|
|
log_text,
|
|
re.S,
|
|
)
|
|
)
|
|
previous_block_end = 0
|
|
for block in cloud_blocks:
|
|
block_text = block.group(1)
|
|
currents_url = block.group(2)
|
|
host_results = extract_host_results_from_run_finished_segment(block_text, inventory)
|
|
if not host_results:
|
|
prior_segment = log_text[previous_block_end:block.start()]
|
|
host_results = extract_host_results_from_run_finished_segment(prior_segment, inventory)
|
|
if not host_results:
|
|
previous_block_end = block.end()
|
|
continue
|
|
summaries.append(
|
|
{
|
|
"host_results": host_results,
|
|
"currents_url": currents_url,
|
|
}
|
|
)
|
|
previous_block_end = block.end()
|
|
return summaries
|
|
|
|
|
|
def extract_latest_run_summary(log_text: str, inventory: Dict[str, str]) -> Optional[Dict[str, object]]:
|
|
summaries = extract_completed_subrun_summaries(log_text, inventory)
|
|
if not summaries:
|
|
return None
|
|
return summaries[-1]
|
|
|
|
|
|
def collect_host_results(
|
|
reporter_root: Path,
|
|
expected_hosts: List[str],
|
|
kernels: Dict[str, str],
|
|
run_started_at: datetime,
|
|
run_ended_at: Optional[datetime] = None,
|
|
) -> Dict[str, HostResult]:
|
|
xml_dir = reporter_root / "xml"
|
|
results: Dict[str, HostResult] = {}
|
|
if not xml_dir.exists():
|
|
return results
|
|
for xml_path in sorted(xml_dir.glob("test-result-*.xml"), key=lambda p: p.stat().st_mtime):
|
|
xml_mtime = datetime.fromtimestamp(xml_path.stat().st_mtime, tz=timezone.utc)
|
|
if xml_mtime < run_started_at:
|
|
continue
|
|
if run_ended_at and xml_mtime >= run_ended_at:
|
|
continue
|
|
parsed = parse_host_xml(xml_path)
|
|
if not parsed:
|
|
continue
|
|
host, result = parsed
|
|
if expected_hosts and host not in expected_hosts:
|
|
continue
|
|
result.kernel = kernels.get(host, "unknown")
|
|
results[host] = result
|
|
return results
|
|
|
|
|
|
def collect_latest_host_result(
|
|
reporter_root: Path,
|
|
expected_hosts: List[str],
|
|
kernels: Dict[str, str],
|
|
run_started_at: datetime,
|
|
run_ended_at: Optional[datetime] = None,
|
|
) -> Optional[Tuple[str, HostResult]]:
|
|
results = collect_host_results(
|
|
reporter_root=reporter_root,
|
|
expected_hosts=expected_hosts,
|
|
kernels=kernels,
|
|
run_started_at=run_started_at,
|
|
run_ended_at=run_ended_at,
|
|
)
|
|
if not results:
|
|
return None
|
|
latest = max(
|
|
results.items(),
|
|
key=lambda item: item[1].timestamp or datetime.fromtimestamp(0, tz=timezone.utc),
|
|
)
|
|
return latest
|
|
|
|
|
|
def collect_latest_host_reporter_artifact(
|
|
reporter_root: Path,
|
|
expected_hosts: List[str],
|
|
kernels: Dict[str, str],
|
|
run_started_at: datetime,
|
|
run_ended_at: Optional[datetime] = None,
|
|
) -> Optional[Tuple[str, HostResult]]:
|
|
logs_dir = reporter_root / "logs"
|
|
if not logs_dir.exists():
|
|
return None
|
|
|
|
latest: Optional[Tuple[str, HostResult]] = None
|
|
for host_dir in sorted(logs_dir.iterdir()):
|
|
if not host_dir.is_dir():
|
|
continue
|
|
host = host_dir.name
|
|
if not host.startswith("atvm"):
|
|
continue
|
|
if expected_hosts and host not in expected_hosts:
|
|
continue
|
|
|
|
for artifact_path in sorted(host_dir.iterdir()):
|
|
if artifact_path.suffix not in {".txt", ".json"}:
|
|
continue
|
|
artifact_mtime = datetime.fromtimestamp(artifact_path.stat().st_mtime, tz=timezone.utc)
|
|
if artifact_mtime < run_started_at:
|
|
continue
|
|
if run_ended_at and artifact_mtime >= run_ended_at:
|
|
continue
|
|
|
|
result = HostResult(
|
|
host=host,
|
|
kernel=kernels.get(host, "unknown"),
|
|
status="PASS",
|
|
detail="completed",
|
|
timestamp=artifact_mtime,
|
|
)
|
|
candidate = (host, result)
|
|
if latest is None:
|
|
latest = candidate
|
|
continue
|
|
latest_ts = latest[1].timestamp or datetime.fromtimestamp(0, tz=timezone.utc)
|
|
if artifact_mtime >= latest_ts:
|
|
latest = candidate
|
|
return latest
|
|
|
|
|
|
def find_check_xml_end(
|
|
reporter_root: Path,
|
|
started_at: datetime,
|
|
ended_at: Optional[datetime] = None,
|
|
) -> Optional[datetime]:
|
|
xml_dir = reporter_root / "xml"
|
|
if not xml_dir.exists():
|
|
return None
|
|
latest: Optional[datetime] = None
|
|
for xml_path in sorted(xml_dir.glob("test-result-*.xml"), key=lambda p: p.stat().st_mtime):
|
|
xml_mtime = datetime.fromtimestamp(xml_path.stat().st_mtime, tz=timezone.utc)
|
|
if xml_mtime < started_at:
|
|
continue
|
|
if ended_at and xml_mtime >= ended_at:
|
|
continue
|
|
text = read_text(xml_path)
|
|
if "check-xml-files.ts" not in text:
|
|
continue
|
|
try:
|
|
tree = ET.parse(xml_path)
|
|
root = tree.getroot()
|
|
suite = root.find("testsuite")
|
|
if suite is None:
|
|
continue
|
|
ts = parse_xml_timestamp(suite.attrib.get("timestamp"))
|
|
if ts:
|
|
latest = ts
|
|
except ET.ParseError:
|
|
continue
|
|
return latest
|
|
|
|
|
|
def find_current_running_host(log_text: str, completed_hosts: List[str]) -> Optional[str]:
|
|
matches = re.findall(r"Running:\s+(?:cypress/cmcRegressionTest/)?(atvm[^/\s]+)\.ts", log_text)
|
|
for host in reversed(matches):
|
|
if host not in completed_hosts:
|
|
return host
|
|
return None
|
|
|
|
|
|
def infer_host_from_subrun_build(
|
|
subrun_build: str,
|
|
expected_hosts: List[str],
|
|
completed_hosts: List[str],
|
|
) -> Optional[str]:
|
|
remaining_hosts = [host for host in expected_hosts if host not in completed_hosts]
|
|
lowered_build = subrun_build.lower()
|
|
for host in remaining_hosts:
|
|
short = host.split("-", 1)[-1].lower()
|
|
if short.startswith("w2k"):
|
|
if "windows" in lowered_build or "w2k" in lowered_build:
|
|
return host
|
|
elif short.startswith("redhat") and "redhat" in lowered_build:
|
|
return host
|
|
elif short.startswith("ubuntu") and "ubuntu" in lowered_build:
|
|
return host
|
|
elif short.startswith("oracle") and "oracle" in lowered_build:
|
|
return host
|
|
elif short.startswith("rocky") and "rocky" in lowered_build:
|
|
return host
|
|
elif short.startswith("debian") and "debian" in lowered_build:
|
|
return host
|
|
return remaining_hosts[0] if remaining_hosts else None
|
|
|
|
|
|
def infer_metadata() -> Dict[str, object]:
|
|
return {
|
|
"template": os.environ.get("ATVM_WATCHER_TEMPLATE", "unknown"),
|
|
"config_family": os.environ.get("ATVM_WATCHER_CONFIG_FAMILY", "unknown"),
|
|
"migration_style": os.environ.get("ATVM_WATCHER_MIGRATION_STYLE", "ATVM automation validation"),
|
|
"integration_plugin": os.environ.get("ATVM_WATCHER_INTEGRATION_PLUGIN", "unknown"),
|
|
"scope_description": os.environ.get("ATVM_WATCHER_SCOPE_DESCRIPTION", "requested ATVM run scope"),
|
|
"categorized": os.environ.get("ATVM_WATCHER_CATEGORIZED", "false").lower() == "true",
|
|
}
|
|
|
|
|
|
def format_duration(seconds: Optional[float]) -> str:
|
|
if seconds is None:
|
|
return "n/a"
|
|
minutes, secs = divmod(seconds, 60)
|
|
hours, minutes = divmod(int(minutes), 60)
|
|
if hours:
|
|
return f"{hours}h {minutes:02d}m {secs:05.2f}s"
|
|
if minutes:
|
|
return f"{minutes}m {secs:05.2f}s"
|
|
return f"{secs:.3f}s"
|
|
|
|
|
|
def format_timestamp_local(ts: Optional[datetime]) -> str:
|
|
if not ts:
|
|
return "n/a"
|
|
local = ts.astimezone()
|
|
return local.strftime("%Y-%m-%d %H:%M:%S %Z")
|
|
|
|
|
|
def get_test_flow(template_name: object) -> List[str]:
|
|
if not isinstance(template_name, str):
|
|
return DEFAULT_TEST_FLOW
|
|
return TEMPLATE_TEST_FLOWS.get(template_name, DEFAULT_TEST_FLOW)
|
|
|
|
|
|
def build_status_markdown(
|
|
build_name: str,
|
|
metadata: Dict[str, object],
|
|
host_results: Dict[str, HostResult],
|
|
run_state: str,
|
|
currents_url: Optional[str],
|
|
start_ts: Optional[datetime],
|
|
end_ts: Optional[datetime],
|
|
notes: List[str],
|
|
) -> str:
|
|
ordered_hosts = list(host_results.values())
|
|
finished = len([h for h in ordered_hosts if h.status in {"PASS", "FAIL"}])
|
|
passed = len([h for h in ordered_hosts if h.status == "PASS"])
|
|
failed = len([h for h in ordered_hosts if h.status == "FAIL"])
|
|
skipped = len([h for h in ordered_hosts if h.status == "SKIP"])
|
|
durations = [h.duration_seconds for h in ordered_hosts if h.duration_seconds is not None]
|
|
quickest = min((h for h in ordered_hosts if h.duration_seconds is not None), key=lambda h: h.duration_seconds, default=None)
|
|
longest = max((h for h in ordered_hosts if h.duration_seconds is not None), key=lambda h: h.duration_seconds, default=None)
|
|
average = (sum(durations) / len(durations)) if durations else None
|
|
|
|
host_lines = ["| Host | Kernel | Status | Detail |", "| --- | --- | --- | --- |"]
|
|
for host in ordered_hosts:
|
|
icon = {
|
|
"PASS": "✅ PASS",
|
|
"FAIL": "⚠️ FAIL",
|
|
"RUN": "⏳ RUN",
|
|
"SKIP": "⏭️ SKIP",
|
|
"NOT STARTED": "⏳ RUN",
|
|
}.get(host.status, host.status)
|
|
host_lines.append(f"| {host.host} | {host.kernel} | {icon} | {host.detail} |")
|
|
|
|
if currents_url:
|
|
notes = notes + [f"Currents recorded run: `{currents_url}`"]
|
|
|
|
notes_block = "\n".join(f"- {note}" for note in notes) if notes else "- none"
|
|
test_flow_lines = [f"- {step}" for step in get_test_flow(metadata.get("template"))]
|
|
|
|
lines = [
|
|
"## ATVM Run Status",
|
|
f"### {build_name}",
|
|
"",
|
|
"**COVERAGE:**",
|
|
f"- template: `{metadata['template']}`",
|
|
f"- datastore/config family: `{metadata['config_family']}`",
|
|
f"- migration style: {metadata['migration_style']}",
|
|
f"- integration/plugin path: `{metadata['integration_plugin']}`",
|
|
f"- scope of this run: {metadata['scope_description']}",
|
|
"",
|
|
"**TEST FLOW:**",
|
|
*test_flow_lines,
|
|
"",
|
|
"**SUMMARY:**",
|
|
"",
|
|
"| Metric | Value |",
|
|
"| --- | --- |",
|
|
f"| finished | {finished} |",
|
|
f"| passed | {passed} |",
|
|
f"| failed | {failed} |",
|
|
f"| skipped | {skipped} |",
|
|
"",
|
|
"**HOSTS:**",
|
|
"",
|
|
*host_lines,
|
|
"",
|
|
"**TIMING:**",
|
|
"",
|
|
"| Metric | Value |",
|
|
"| --- | --- |",
|
|
f"| start | {format_timestamp_local(start_ts)} |",
|
|
f"| end | {format_timestamp_local(end_ts)} |",
|
|
f"| total | {format_duration((end_ts - start_ts).total_seconds()) if start_ts and end_ts else 'n/a'} |",
|
|
f"| quickest | {f'{quickest.host} - {format_duration(quickest.duration_seconds)}' if quickest else 'n/a'} |",
|
|
f"| longest | {f'{longest.host} - {format_duration(longest.duration_seconds)}' if longest else 'n/a'} |",
|
|
f"| average | {format_duration(average) if average is not None else 'n/a'} |",
|
|
"",
|
|
"**NOTES:**",
|
|
notes_block,
|
|
]
|
|
return "\n".join(lines)
|
|
|
|
|
|
def post_to_mattermost(text: str) -> str:
|
|
webhook = os.environ["MATTERMOST_ATVM_WEBHOOK"]
|
|
payload = {"text": text}
|
|
channel = os.environ.get("MATTERMOST_ATVM_CHANNEL")
|
|
if channel:
|
|
payload["channel"] = channel
|
|
data = json.dumps(payload).encode()
|
|
request = urllib.request.Request(webhook, data=data, headers={"Content-Type": "application/json"})
|
|
with urllib.request.urlopen(request) as response:
|
|
return response.read().decode().strip()
|
|
|
|
|
|
def sanitize_key(raw: str) -> str:
|
|
return re.sub(r"[^A-Za-z0-9_.-]+", "-", raw).strip("-") or "subrun"
|
|
|
|
|
|
def infer_group_label(hosts: List[str], index: int) -> str:
|
|
if not hosts:
|
|
return f"group{index}"
|
|
labels: List[str] = []
|
|
for host in hosts:
|
|
short = host.split("-", 1)[-1]
|
|
if short.startswith("w2k"):
|
|
label = "windows"
|
|
else:
|
|
label = re.sub(r"\d.*$", "", short) or short
|
|
if label not in labels:
|
|
labels.append(label)
|
|
return "-".join(labels) if labels else f"group{index}"
|
|
|
|
|
|
def infer_group_from_host(host: str) -> str:
|
|
short = host.split("-", 1)[-1].lower()
|
|
if short.startswith("w2k"):
|
|
return "windows"
|
|
if short.startswith("amazonlinux"):
|
|
return "amazonlinux"
|
|
if short.startswith("centos"):
|
|
return "centos"
|
|
if short.startswith("ubuntu"):
|
|
return "ubuntu"
|
|
if short.startswith("rocky"):
|
|
return "rocky"
|
|
if short.startswith("redhat"):
|
|
return "redhat"
|
|
if short.startswith("oracle"):
|
|
return "oracle"
|
|
if short.startswith("fedora"):
|
|
return "fedora"
|
|
if short.startswith("debian"):
|
|
return "debian"
|
|
if short.startswith("suse"):
|
|
return "suse"
|
|
return short or "group"
|
|
|
|
|
|
def corrected_categorized_display_name(raw_display_name: str, hosts: List[str]) -> str:
|
|
if not hosts:
|
|
return raw_display_name
|
|
group = infer_group_from_host(hosts[0])
|
|
return re.sub(r"-(amazonlinux|centos|ubuntu|rocky|redhat|oracle|fedora|debian|suse|windows|w2k)-batch", f"-{group}-batch", raw_display_name)
|
|
|
|
|
|
def logical_categorized_key(display_name: str) -> str:
|
|
match = re.match(
|
|
r"^(.*?-(amazonlinux|centos|ubuntu|rocky|redhat|oracle|fedora|debian|suse|windows|w2k))-batch\d+_\d+.*$",
|
|
display_name,
|
|
)
|
|
if match:
|
|
return sanitize_key(match.group(1))
|
|
return sanitize_key(display_name)
|
|
|
|
|
|
def merge_categorized_state(
|
|
merged: Dict[str, Dict[str, object]],
|
|
display_name: str,
|
|
state: str,
|
|
host_results: Dict[str, HostResult],
|
|
start_ts: Optional[datetime],
|
|
end_ts: Optional[datetime],
|
|
currents_url: Optional[str],
|
|
notes: List[str],
|
|
) -> None:
|
|
key = logical_categorized_key(display_name)
|
|
existing = merged.get(key)
|
|
if existing is None:
|
|
merged[key] = {
|
|
"key": key,
|
|
"display_name": display_name,
|
|
"state": state,
|
|
"host_results": dict(host_results),
|
|
"start_ts": start_ts,
|
|
"end_ts": end_ts,
|
|
"currents_url": currents_url,
|
|
"notes": list(notes),
|
|
}
|
|
return
|
|
|
|
existing["host_results"].update(host_results)
|
|
|
|
if start_ts and (existing["start_ts"] is None or start_ts < existing["start_ts"]):
|
|
existing["start_ts"] = start_ts
|
|
if end_ts and (existing["end_ts"] is None or end_ts > existing["end_ts"]):
|
|
existing["end_ts"] = end_ts
|
|
if currents_url and not existing["currents_url"]:
|
|
existing["currents_url"] = currents_url
|
|
|
|
for note in notes:
|
|
if note not in existing["notes"]:
|
|
existing["notes"].append(note)
|
|
|
|
existing_state = existing["state"]
|
|
if state == "CANCELLED" or existing_state == "CANCELLED":
|
|
existing["state"] = "CANCELLED"
|
|
elif state == "RUNNING":
|
|
if existing_state not in {"COMPLETED", "FAILED"}:
|
|
existing["state"] = "RUNNING"
|
|
elif existing_state == "RUNNING":
|
|
existing["state"] = state
|
|
elif state == "FAILED" or existing_state == "FAILED":
|
|
existing["state"] = "FAILED"
|
|
elif state == "COMPLETED" or existing_state == "COMPLETED":
|
|
existing["state"] = "COMPLETED"
|
|
else:
|
|
existing["state"] = state
|
|
|
|
|
|
def extract_segment_build_name(segment_text: str, parent_build_name: str) -> Optional[str]:
|
|
patterns = [
|
|
rf"({re.escape(parent_build_name)}-[A-Za-z0-9_.-]*batch\d+_\d+)",
|
|
r"([A-Za-z0-9_.-]+-batch\d+_\d+)",
|
|
]
|
|
for pattern in patterns:
|
|
match = re.search(pattern, segment_text)
|
|
if match:
|
|
return match.group(1)
|
|
return None
|
|
|
|
|
|
def split_log_segments(log_text: str, parent_build_name: str, categorized: bool, default_started_at: datetime) -> List[SubRun]:
|
|
if not categorized:
|
|
return [
|
|
SubRun(
|
|
key=sanitize_key(parent_build_name),
|
|
display_name=parent_build_name,
|
|
started_at=default_started_at,
|
|
expected_hosts=extract_expected_hosts(log_text),
|
|
completed=False,
|
|
currents_url=extract_currents_url(log_text),
|
|
notes=[],
|
|
)
|
|
]
|
|
|
|
segment_starts: List[Tuple[int, Optional[datetime]]] = []
|
|
for match in re.finditer(r"^(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3}) - INFO - Extracted specPattern:", log_text, re.M):
|
|
segment_starts.append((match.start(), parse_log_timestamp(match.group(1))))
|
|
|
|
if not segment_starts:
|
|
return [
|
|
SubRun(
|
|
key=sanitize_key(parent_build_name),
|
|
display_name=parent_build_name,
|
|
started_at=default_started_at,
|
|
expected_hosts=extract_expected_hosts(log_text),
|
|
completed=False,
|
|
currents_url=extract_currents_url(log_text),
|
|
notes=["Categorized mode was requested but no sub-run segment has appeared in the log yet."],
|
|
)
|
|
]
|
|
|
|
segments: List[SubRun] = []
|
|
for index, (start_offset, start_ts) in enumerate(segment_starts, start=1):
|
|
end_offset = segment_starts[index][0] if index < len(segment_starts) else len(log_text)
|
|
segment_text = log_text[start_offset:end_offset]
|
|
expected_hosts = extract_expected_hosts(segment_text)
|
|
display_name = extract_segment_build_name(segment_text, parent_build_name)
|
|
if not display_name:
|
|
display_name = f"{parent_build_name}-{infer_group_label(expected_hosts, index)}"
|
|
segments.append(
|
|
SubRun(
|
|
key=sanitize_key(display_name),
|
|
display_name=display_name,
|
|
started_at=start_ts or default_started_at,
|
|
expected_hosts=expected_hosts,
|
|
completed=index < len(segment_starts),
|
|
currents_url=extract_currents_url(segment_text),
|
|
notes=[f"Categorized sub-run {index} of {len(segment_starts)}."],
|
|
)
|
|
)
|
|
return segments
|
|
|
|
|
|
def evaluate_subrun(
|
|
subrun: SubRun,
|
|
reporter_root: Path,
|
|
inventory: Dict[str, str],
|
|
end_boundary: Optional[datetime],
|
|
parent_active: bool,
|
|
cancelled: bool,
|
|
) -> Tuple[str, Dict[str, HostResult], Optional[datetime], Optional[datetime], Optional[str], List[str]]:
|
|
notes = list(subrun.notes)
|
|
host_results = collect_host_results(
|
|
reporter_root=reporter_root,
|
|
expected_hosts=subrun.expected_hosts,
|
|
kernels=inventory,
|
|
run_started_at=subrun.started_at,
|
|
run_ended_at=end_boundary,
|
|
)
|
|
check_end = find_check_xml_end(reporter_root, subrun.started_at, end_boundary)
|
|
start_candidates = [result.timestamp for result in host_results.values() if result.timestamp]
|
|
end_candidates = [result.timestamp for result in host_results.values() if result.timestamp]
|
|
if check_end:
|
|
end_candidates.append(check_end)
|
|
start_ts = min(start_candidates) if start_candidates else subrun.started_at
|
|
end_ts = max(end_candidates) if end_candidates else None
|
|
|
|
if cancelled:
|
|
notes.append("Cancellation marker detected.")
|
|
return "CANCELLED", host_results, start_ts, end_ts, subrun.currents_url, notes
|
|
|
|
if subrun.completed:
|
|
if not host_results:
|
|
notes.append("This categorized sub-run ended but no host results were detected.")
|
|
return "UNKNOWN", host_results, start_ts, end_ts, subrun.currents_url, notes
|
|
notes.append("Categorized sub-run completed and the next grouped run was launched.")
|
|
if check_end:
|
|
notes.append("Final `check-xml-files.ts` validation passed.")
|
|
state = "FAILED" if any(result.failures for result in host_results.values()) else "COMPLETED"
|
|
return state, host_results, start_ts, end_ts, subrun.currents_url, notes
|
|
|
|
if parent_active:
|
|
current_host = next((host for host in subrun.expected_hosts if host not in host_results), None)
|
|
if current_host and current_host not in host_results:
|
|
host_results[current_host] = HostResult(
|
|
host=current_host,
|
|
kernel=inventory.get(current_host, "unknown"),
|
|
status="RUN",
|
|
detail="in progress",
|
|
)
|
|
return "RUNNING", host_results, start_ts, end_ts, subrun.currents_url, notes
|
|
|
|
if check_end and not host_results:
|
|
latest_host = collect_latest_host_reporter_artifact(
|
|
reporter_root=reporter_root,
|
|
expected_hosts=subrun.expected_hosts,
|
|
kernels=inventory,
|
|
run_started_at=subrun.started_at,
|
|
run_ended_at=check_end + timedelta(seconds=5),
|
|
)
|
|
if latest_host:
|
|
host, result = latest_host
|
|
host_results = {host: result}
|
|
|
|
if host_results:
|
|
notes.append("Run completed after the parent runner exited.")
|
|
if check_end:
|
|
notes.append("Final `check-xml-files.ts` validation passed.")
|
|
latest_artifact_note = "Host result details were derived from the latest matching host reporter artifact written before final validation."
|
|
if latest_artifact_note not in notes and all(result.tests == 0 for result in host_results.values()):
|
|
notes.append(latest_artifact_note)
|
|
state = "FAILED" if any(result.failures for result in host_results.values()) else "COMPLETED"
|
|
return state, host_results, start_ts, end_ts, subrun.currents_url, notes
|
|
|
|
notes.append("Run process exited before host results were detected.")
|
|
return "TERMINATED", host_results, start_ts, end_ts, subrun.currents_url, notes
|
|
|
|
|
|
def discover_categorized_subruns(
|
|
build_name: str,
|
|
reporter_root: Path,
|
|
inventory: Dict[str, str],
|
|
log_text: str,
|
|
started_at: datetime,
|
|
parent_active: bool,
|
|
cancelled: bool,
|
|
) -> List[Dict[str, object]]:
|
|
xml_dir = reporter_root / "xml"
|
|
current_subrun_build = extract_active_subrun_build(build_name)
|
|
current_active_host = find_current_running_host(log_text, [])
|
|
current_active_group = infer_group_from_host(current_active_host) if current_active_host else None
|
|
expected_hosts = extract_expected_hosts(log_text)
|
|
completed_summaries = extract_completed_subrun_summaries(log_text, inventory)
|
|
merged_subrun_states: Dict[str, Dict[str, object]] = {}
|
|
completed_hosts: List[str] = []
|
|
discovered_builds: List[str] = []
|
|
current_summary_index = 0
|
|
|
|
if xml_dir.exists():
|
|
prefix = f"test-result-{build_name}-"
|
|
for xml_path in sorted(xml_dir.glob(f"{prefix}*.xml"), key=lambda p: p.stat().st_mtime):
|
|
xml_mtime = datetime.fromtimestamp(xml_path.stat().st_mtime, tz=timezone.utc)
|
|
if xml_mtime < started_at:
|
|
continue
|
|
raw_display_name = xml_path.stem[len("test-result-"):]
|
|
discovered_builds.append(raw_display_name)
|
|
parsed = parse_host_xml(xml_path)
|
|
host_results: Dict[str, HostResult] = {}
|
|
if parsed:
|
|
host, result = parsed
|
|
result.kernel = inventory.get(host, "unknown")
|
|
host_results[host] = result
|
|
completed_hosts.append(host)
|
|
check_ts = extract_check_xml_timestamp_from_file(xml_path)
|
|
summary = completed_summaries[current_summary_index] if current_summary_index < len(completed_summaries) else None
|
|
if summary and (not host_results or all(result.host == "check-xml-files" for result in host_results.values())):
|
|
host_results = summary["host_results"]
|
|
completed_hosts.extend([host for host in host_results if host not in completed_hosts])
|
|
if not host_results and check_ts:
|
|
latest_host = collect_latest_host_result(
|
|
reporter_root=reporter_root,
|
|
expected_hosts=expected_hosts,
|
|
kernels=inventory,
|
|
run_started_at=started_at,
|
|
run_ended_at=check_ts + timedelta(seconds=5),
|
|
)
|
|
if latest_host:
|
|
host, result = latest_host
|
|
host_results = {host: result}
|
|
if host not in completed_hosts:
|
|
completed_hosts.append(host)
|
|
if summary:
|
|
current_summary_index += 1
|
|
state = "RUNNING"
|
|
display_name = corrected_categorized_display_name(raw_display_name, list(host_results))
|
|
display_group = None
|
|
display_group_match = re.search(r"-(amazonlinux|centos|ubuntu|rocky|redhat|oracle|fedora|debian|suse|windows)-batch", display_name)
|
|
if display_group_match:
|
|
display_group = display_group_match.group(1)
|
|
if cancelled:
|
|
state = "CANCELLED"
|
|
elif check_ts and not host_results and parent_active:
|
|
state = "RUNNING"
|
|
elif check_ts and display_group and current_active_group and display_group != current_active_group:
|
|
state = "FAILED" if any(result.failures for result in host_results.values()) else "COMPLETED"
|
|
elif check_ts or raw_display_name != current_subrun_build or not parent_active:
|
|
state = "FAILED" if any(result.failures for result in host_results.values()) else "COMPLETED"
|
|
notes = [f"Categorized sub-run discovered from reporter file `{xml_path.name}`."]
|
|
if check_ts:
|
|
notes.append("Final `check-xml-files.ts` validation passed.")
|
|
if summary and host_results:
|
|
notes.append("Host result details were derived from the parent categorized run log summary.")
|
|
elif host_results and check_ts:
|
|
notes.append("Host result details were derived from the latest matching host reporter artifact written before grouped finalization.")
|
|
elif check_ts and not host_results and parent_active:
|
|
notes.append("Grouped reporter XML arrived before the parent run log exposed the final host summary; waiting to post until host details are available.")
|
|
if display_name != raw_display_name:
|
|
notes.append(f"Child build id was reported as `{raw_display_name}`, but the actual grouped run was inferred from host execution as `{display_name}`.")
|
|
if cancelled:
|
|
notes.append("Cancellation marker detected.")
|
|
end_ts = check_ts or next((result.timestamp for result in host_results.values() if result.timestamp), xml_mtime)
|
|
start_ts = next((result.timestamp for result in host_results.values() if result.timestamp), None)
|
|
if not start_ts and summary and end_ts:
|
|
duration_seconds = next((result.duration_seconds for result in host_results.values() if result.duration_seconds is not None), None)
|
|
if duration_seconds is not None:
|
|
candidate_start = end_ts.timestamp() - duration_seconds
|
|
start_ts = datetime.fromtimestamp(candidate_start, tz=timezone.utc)
|
|
if not start_ts:
|
|
start_ts = min(xml_mtime, end_ts) if end_ts else xml_mtime
|
|
if end_ts and start_ts > end_ts:
|
|
start_ts = end_ts
|
|
merge_categorized_state(
|
|
merged_subrun_states,
|
|
display_name=display_name,
|
|
state=state,
|
|
host_results=host_results,
|
|
start_ts=start_ts,
|
|
end_ts=end_ts,
|
|
currents_url=summary["currents_url"] if summary else None,
|
|
notes=notes,
|
|
)
|
|
|
|
if current_subrun_build and current_subrun_build not in discovered_builds:
|
|
current_host = find_current_running_host(log_text, completed_hosts)
|
|
if not current_host or current_host in completed_hosts:
|
|
current_host = infer_host_from_subrun_build(current_subrun_build, expected_hosts, completed_hosts)
|
|
host_results: Dict[str, HostResult] = {}
|
|
if current_host:
|
|
host_results[current_host] = HostResult(
|
|
host=current_host,
|
|
kernel=inventory.get(current_host, "unknown"),
|
|
status="RUN",
|
|
detail="in progress",
|
|
)
|
|
display_name = corrected_categorized_display_name(current_subrun_build, [current_host] if current_host else [])
|
|
notes = ["Active categorized sub-run inferred from live `--ci-build-id` process state."]
|
|
if display_name != current_subrun_build:
|
|
notes.append(f"Child build id is `{current_subrun_build}`, but the actual grouped run was inferred from host execution as `{display_name}`.")
|
|
if cancelled:
|
|
notes.append("Cancellation marker detected.")
|
|
merge_categorized_state(
|
|
merged_subrun_states,
|
|
display_name=display_name,
|
|
state="CANCELLED" if cancelled else "RUNNING",
|
|
host_results=host_results,
|
|
start_ts=started_at,
|
|
end_ts=None,
|
|
currents_url=None,
|
|
notes=notes,
|
|
)
|
|
|
|
return list(merged_subrun_states.values())
|
|
|
|
|
|
def determine_state(
|
|
build_name: str,
|
|
build_dir: Path,
|
|
run_log: Path,
|
|
reporter_root: Path,
|
|
inventory: Dict[str, str],
|
|
metadata: Dict[str, object],
|
|
started_at: datetime,
|
|
process_gone_since: Optional[datetime],
|
|
process_exit_grace_seconds: int,
|
|
) -> Tuple[str, List[Dict[str, object]], Dict[str, HostResult], Optional[datetime], Optional[datetime], Optional[str], List[str]]:
|
|
cancelled_marker = build_dir / "cancelled.marker"
|
|
log_text = read_text(run_log)
|
|
active = related_process_active(build_name) if metadata.get("categorized") else process_active(build_name)
|
|
cancelled = cancelled_marker.exists()
|
|
notes: List[str] = []
|
|
subrun_states: List[Dict[str, object]] = []
|
|
parent_host_results: Dict[str, HostResult] = {}
|
|
|
|
if metadata.get("categorized"):
|
|
subrun_states = discover_categorized_subruns(
|
|
build_name=build_name,
|
|
reporter_root=reporter_root,
|
|
inventory=inventory,
|
|
log_text=log_text,
|
|
started_at=started_at,
|
|
parent_active=active,
|
|
cancelled=cancelled,
|
|
)
|
|
for subrun in subrun_states:
|
|
for host, result in subrun["host_results"].items():
|
|
parent_host_results[host] = result
|
|
else:
|
|
subruns = split_log_segments(log_text, build_name, bool(metadata.get("categorized")), started_at)
|
|
for index, subrun in enumerate(subruns):
|
|
next_started_at = subruns[index + 1].started_at if index + 1 < len(subruns) else None
|
|
state, host_results, start_ts, end_ts, currents_url, subrun_notes = evaluate_subrun(
|
|
subrun=subrun,
|
|
reporter_root=reporter_root,
|
|
inventory=inventory,
|
|
end_boundary=next_started_at,
|
|
parent_active=active,
|
|
cancelled=cancelled,
|
|
)
|
|
for host, result in host_results.items():
|
|
parent_host_results[host] = result
|
|
subrun_states.append(
|
|
{
|
|
"key": subrun.key,
|
|
"display_name": subrun.display_name,
|
|
"state": state,
|
|
"host_results": host_results,
|
|
"start_ts": start_ts,
|
|
"end_ts": end_ts,
|
|
"currents_url": currents_url,
|
|
"notes": subrun_notes,
|
|
}
|
|
)
|
|
|
|
# Non-categorized runs often only write a final check-xml reporter XML.
|
|
# Fall back to the parent "Cloud Run Finished" summary when host XML is absent.
|
|
if not parent_host_results:
|
|
latest_summary = extract_latest_run_summary(log_text, inventory)
|
|
if latest_summary:
|
|
summary_results = latest_summary["host_results"]
|
|
for host, result in summary_results.items():
|
|
parent_host_results[host] = result
|
|
if subrun_states:
|
|
subrun = subrun_states[-1]
|
|
subrun["host_results"] = summary_results
|
|
if not subrun.get("currents_url") and latest_summary.get("currents_url"):
|
|
subrun["currents_url"] = latest_summary["currents_url"]
|
|
notes_list = list(subrun.get("notes", []))
|
|
fallback_note = "Host result details were derived from the parent run log summary."
|
|
if fallback_note not in notes_list:
|
|
notes_list.append(fallback_note)
|
|
subrun["notes"] = notes_list
|
|
if subrun["state"] in {"UNKNOWN", "TERMINATED"}:
|
|
subrun["state"] = "FAILED" if any(result.failures for result in summary_results.values()) else "COMPLETED"
|
|
|
|
parent_start_candidates = [subrun["start_ts"] for subrun in subrun_states if subrun["start_ts"]]
|
|
parent_end_candidates = [subrun["end_ts"] for subrun in subrun_states if subrun["end_ts"]]
|
|
start_ts = min(parent_start_candidates) if parent_start_candidates else started_at
|
|
end_ts = max(parent_end_candidates) if parent_end_candidates else find_check_xml_end(reporter_root, started_at)
|
|
currents_url = extract_currents_url(log_text)
|
|
|
|
if cancelled:
|
|
notes.append("Cancellation marker detected.")
|
|
return "CANCELLED", subrun_states, parent_host_results, start_ts, end_ts, currents_url, notes
|
|
|
|
if active:
|
|
elapsed = (now_utc() - started_at).total_seconds()
|
|
if elapsed > args.max_watch_seconds:
|
|
notes.append("Watcher exceeded max watch duration while the run still appears active.")
|
|
return "HUNG", subrun_states, parent_host_results, start_ts, end_ts, currents_url, notes
|
|
return "RUNNING", subrun_states, parent_host_results, start_ts, end_ts, currents_url, notes
|
|
|
|
if metadata.get("categorized") and process_gone_since and (now_utc() - process_gone_since).total_seconds() < process_exit_grace_seconds:
|
|
notes.append("Categorized parent runner has not been gone long enough to treat the request as finished.")
|
|
return "RUNNING", subrun_states, parent_host_results, start_ts, end_ts, currents_url, notes
|
|
|
|
terminal_subruns = [subrun for subrun in subrun_states if subrun["state"] in {"COMPLETED", "FAILED"}]
|
|
if terminal_subruns:
|
|
state = "FAILED" if any(result.failures for result in parent_host_results.values()) else "COMPLETED"
|
|
notes.append("Run finished and one or more sub-run result artifacts were detected.")
|
|
if end_ts:
|
|
notes.append("Final reporting artifacts were detected.")
|
|
return state, subrun_states, parent_host_results, start_ts, end_ts, currents_url, notes
|
|
|
|
if process_gone_since and (now_utc() - process_gone_since).total_seconds() >= process_exit_grace_seconds:
|
|
notes.append("Run process exited without a clean completion signal.")
|
|
return "TERMINATED", subrun_states, parent_host_results, start_ts, end_ts, currents_url, notes
|
|
|
|
return "RUNNING", subrun_states, parent_host_results, start_ts, end_ts, currents_url, notes
|
|
|
|
|
|
if __name__ == "__main__":
|
|
args = parse_args()
|
|
build_name = args.build_name
|
|
run_log = Path(args.run_log)
|
|
reporter_root = Path(args.reporter_root)
|
|
inventory_file = Path(args.inventory_file)
|
|
state_root = Path(args.state_dir)
|
|
build_dir = state_root / build_name
|
|
ensure_dir(build_dir)
|
|
state_file = build_dir / "state.json"
|
|
posted_marker = build_dir / "posted.marker"
|
|
|
|
inventory = load_inventory(inventory_file)
|
|
metadata = infer_metadata()
|
|
|
|
state = load_state(state_file)
|
|
log_text_for_start = read_text(run_log)
|
|
default_started_at = first_log_timestamp(log_text_for_start) or (datetime.fromtimestamp(run_log.stat().st_mtime, tz=timezone.utc) if run_log.exists() else now_utc())
|
|
started_at = parse_xml_timestamp(state.get("started_at")) or default_started_at
|
|
state.setdefault("build_name", build_name)
|
|
state.setdefault("started_at", started_at.isoformat())
|
|
write_state(state_file, state)
|
|
|
|
process_gone_since: Optional[datetime] = None
|
|
|
|
while True:
|
|
active = process_active(build_name)
|
|
if not active and process_gone_since is None:
|
|
process_gone_since = now_utc()
|
|
if active:
|
|
process_gone_since = None
|
|
|
|
run_state, subrun_states, host_results, start_ts, end_ts, currents_url, notes = determine_state(
|
|
build_name=build_name,
|
|
build_dir=build_dir,
|
|
run_log=run_log,
|
|
reporter_root=reporter_root,
|
|
inventory=inventory,
|
|
metadata=metadata,
|
|
started_at=started_at,
|
|
process_gone_since=process_gone_since,
|
|
process_exit_grace_seconds=args.process_exit_grace_seconds,
|
|
)
|
|
|
|
state["last_state"] = run_state
|
|
state["last_seen_at"] = now_utc().isoformat()
|
|
state["host_results"] = {
|
|
host: {
|
|
"status": result.status,
|
|
"detail": result.detail,
|
|
"kernel": result.kernel,
|
|
"tests": result.tests,
|
|
"failures": result.failures,
|
|
}
|
|
for host, result in host_results.items()
|
|
}
|
|
state["subruns"] = {
|
|
subrun["display_name"]: {
|
|
"state": subrun["state"],
|
|
"hosts": sorted(subrun["host_results"].keys()),
|
|
"start_ts": subrun["start_ts"].isoformat() if subrun["start_ts"] else None,
|
|
"end_ts": subrun["end_ts"].isoformat() if subrun["end_ts"] else None,
|
|
"currents_url": subrun["currents_url"],
|
|
"notes": subrun["notes"],
|
|
}
|
|
for subrun in subrun_states
|
|
}
|
|
write_state(state_file, state)
|
|
|
|
for subrun in subrun_states:
|
|
subrun_dir = build_dir / "subruns" / subrun["key"]
|
|
ensure_dir(subrun_dir)
|
|
subrun_state_file = subrun_dir / "state.json"
|
|
subrun_posted_marker = subrun_dir / "posted.marker"
|
|
subrun_state = {
|
|
"display_name": subrun["display_name"],
|
|
"last_state": subrun["state"],
|
|
"last_seen_at": now_utc().isoformat(),
|
|
"host_results": {
|
|
host: {
|
|
"status": result.status,
|
|
"detail": result.detail,
|
|
"kernel": result.kernel,
|
|
"tests": result.tests,
|
|
"failures": result.failures,
|
|
}
|
|
for host, result in subrun["host_results"].items()
|
|
},
|
|
"notes": subrun["notes"],
|
|
"currents_url": subrun["currents_url"],
|
|
"started_at": subrun["start_ts"].isoformat() if subrun["start_ts"] else None,
|
|
"ended_at": subrun["end_ts"].isoformat() if subrun["end_ts"] else None,
|
|
}
|
|
if metadata.get("categorized") and subrun["state"] in {"COMPLETED", "FAILED"} and not subrun_posted_marker.exists():
|
|
status_text = build_status_markdown(
|
|
build_name=subrun["display_name"],
|
|
metadata=metadata,
|
|
host_results=dict(sorted(subrun["host_results"].items())),
|
|
run_state=subrun["state"],
|
|
currents_url=subrun["currents_url"],
|
|
start_ts=subrun["start_ts"],
|
|
end_ts=subrun["end_ts"],
|
|
notes=subrun["notes"],
|
|
)
|
|
print(status_text)
|
|
response = post_to_mattermost(status_text)
|
|
if response != "ok":
|
|
raise SystemExit(f"Mattermost webhook did not return ok for {subrun['display_name']}: {response!r}")
|
|
subrun_posted_marker.write_text("ok\n", encoding="utf-8")
|
|
subrun_state["mattermost_posted"] = True
|
|
subrun_state["mattermost_response"] = response
|
|
print(f"[watcher] Mattermost post confirmed for {subrun['display_name']}.")
|
|
write_state(subrun_state_file, subrun_state)
|
|
|
|
if run_state == "RUNNING":
|
|
print(f"[watcher] {build_name}: RUNNING")
|
|
time.sleep(args.poll_interval)
|
|
continue
|
|
|
|
status_text = build_status_markdown(
|
|
build_name=build_name,
|
|
metadata=metadata,
|
|
host_results=dict(sorted(host_results.items())),
|
|
run_state=run_state,
|
|
currents_url=currents_url,
|
|
start_ts=start_ts,
|
|
end_ts=end_ts,
|
|
notes=notes,
|
|
)
|
|
print(status_text)
|
|
|
|
if not metadata.get("categorized") and run_state in {"COMPLETED", "FAILED"} and not posted_marker.exists():
|
|
response = post_to_mattermost(status_text)
|
|
if response != "ok":
|
|
raise SystemExit(f"Mattermost webhook did not return ok: {response!r}")
|
|
posted_marker.write_text("ok\n", encoding="utf-8")
|
|
state["mattermost_posted"] = True
|
|
state["mattermost_response"] = response
|
|
write_state(state_file, state)
|
|
print(f"[watcher] Mattermost post confirmed for {build_name}.")
|
|
|
|
state["closed_at"] = now_utc().isoformat()
|
|
write_state(state_file, state)
|
|
sys.exit(0)
|