Fix categorized watcher subrun host recovery

This commit is contained in:
2026-03-27 15:59:04 -04:00
parent 650adb085e
commit 20c9ba7178
2 changed files with 74 additions and 2 deletions

View File

@@ -514,6 +514,56 @@ def collect_latest_host_reporter_artifact(
return latest
def collect_latest_group_host_reporter_artifact(
reporter_root: Path,
group_label: Optional[str],
kernels: Dict[str, str],
run_started_at: datetime,
run_ended_at: Optional[datetime] = None,
) -> Optional[Tuple[str, HostResult]]:
if not group_label:
return None
logs_dir = reporter_root / "logs"
if not logs_dir.exists():
return None
latest: Optional[Tuple[str, HostResult]] = None
for host_dir in sorted(logs_dir.iterdir()):
if not host_dir.is_dir():
continue
host = host_dir.name
if not host.startswith("atvm"):
continue
if infer_group_from_host(host) != group_label:
continue
for artifact_path in sorted(host_dir.iterdir()):
if artifact_path.suffix not in {".txt", ".json"}:
continue
artifact_mtime = datetime.fromtimestamp(artifact_path.stat().st_mtime, tz=timezone.utc)
if artifact_mtime < run_started_at:
continue
if run_ended_at and artifact_mtime >= run_ended_at:
continue
result = HostResult(
host=host,
kernel=kernels.get(host, "unknown"),
status="PASS",
detail="completed",
timestamp=artifact_mtime,
)
candidate = (host, result)
if latest is None:
latest = candidate
continue
latest_ts = latest[1].timestamp or datetime.fromtimestamp(0, tz=timezone.utc)
if artifact_mtime >= latest_ts:
latest = candidate
return latest
def find_check_xml_end(
reporter_root: Path,
started_at: datetime,
@@ -1003,17 +1053,29 @@ def discover_categorized_subruns(
completed_hosts.append(host)
check_ts = extract_check_xml_timestamp_from_file(xml_path)
summary = completed_summaries[current_summary_index] if current_summary_index < len(completed_summaries) else None
inferred_host = infer_host_from_subrun_build(raw_display_name, expected_hosts, completed_hosts)
if summary and (not host_results or all(result.host == "check-xml-files" for result in host_results.values())):
host_results = summary["host_results"]
completed_hosts.extend([host for host in host_results if host not in completed_hosts])
if not host_results and check_ts:
latest_host = collect_latest_host_result(
scoped_expected_hosts = [inferred_host] if inferred_host else expected_hosts
latest_host = collect_latest_host_reporter_artifact(
reporter_root=reporter_root,
expected_hosts=expected_hosts,
expected_hosts=scoped_expected_hosts,
kernels=inventory,
run_started_at=started_at,
run_ended_at=check_ts + timedelta(seconds=5),
)
if not latest_host:
display_group_match = re.search(r"-(amazonlinux|centos|ubuntu|rocky|redhat|oracle|fedora|debian|suse|windows)-batch", raw_display_name)
display_group = display_group_match.group(1) if display_group_match else None
latest_host = collect_latest_group_host_reporter_artifact(
reporter_root=reporter_root,
group_label=display_group,
kernels=inventory,
run_started_at=started_at,
run_ended_at=check_ts + timedelta(seconds=5),
)
if latest_host:
host, result = latest_host
host_results = {host: result}
@@ -1042,6 +1104,8 @@ def discover_categorized_subruns(
notes.append("Host result details were derived from the parent categorized run log summary.")
elif host_results and check_ts:
notes.append("Host result details were derived from the latest matching host reporter artifact written before grouped finalization.")
if inferred_host:
notes.append(f"Grouped sub-run host scope was inferred as `{inferred_host}` from the categorized build id.")
elif check_ts and not host_results and parent_active:
notes.append("Grouped reporter XML arrived before the parent run log exposed the final host summary; waiting to post until host details are available.")
if display_name != raw_display_name: