Recover watcher results when run log is missing

This commit is contained in:
2026-03-27 11:00:11 -04:00
parent 4c0e28cc05
commit cc551a6922
2 changed files with 74 additions and 2 deletions

View File

@@ -430,6 +430,53 @@ def collect_latest_host_result(
return latest
def collect_latest_host_reporter_artifact(
reporter_root: Path,
expected_hosts: List[str],
kernels: Dict[str, str],
run_started_at: datetime,
run_ended_at: Optional[datetime] = None,
) -> Optional[Tuple[str, HostResult]]:
logs_dir = reporter_root / "logs"
if not logs_dir.exists():
return None
latest: Optional[Tuple[str, HostResult]] = None
for host_dir in sorted(logs_dir.iterdir()):
if not host_dir.is_dir():
continue
host = host_dir.name
if not host.startswith("atvm"):
continue
if expected_hosts and host not in expected_hosts:
continue
for artifact_path in sorted(host_dir.iterdir()):
if artifact_path.suffix not in {".txt", ".json"}:
continue
artifact_mtime = datetime.fromtimestamp(artifact_path.stat().st_mtime, tz=timezone.utc)
if artifact_mtime < run_started_at:
continue
if run_ended_at and artifact_mtime >= run_ended_at:
continue
result = HostResult(
host=host,
kernel=kernels.get(host, "unknown"),
status="PASS",
detail="completed",
timestamp=artifact_mtime,
)
candidate = (host, result)
if latest is None:
latest = candidate
continue
latest_ts = latest[1].timestamp or datetime.fromtimestamp(0, tz=timezone.utc)
if artifact_mtime >= latest_ts:
latest = candidate
return latest
def find_check_xml_end(
reporter_root: Path,
started_at: datetime,
@@ -853,14 +900,29 @@ def evaluate_subrun(
)
return "RUNNING", host_results, start_ts, end_ts, subrun.currents_url, notes
if check_end and not host_results:
latest_host = collect_latest_host_reporter_artifact(
reporter_root=reporter_root,
expected_hosts=subrun.expected_hosts,
kernels=inventory,
run_started_at=subrun.started_at,
run_ended_at=check_end + timedelta(seconds=5),
)
if latest_host:
host, result = latest_host
host_results = {host: result}
if host_results:
notes.append("Categorized sub-run completed after the parent runner exited.")
notes.append("Run completed after the parent runner exited.")
if check_end:
notes.append("Final `check-xml-files.ts` validation passed.")
latest_artifact_note = "Host result details were derived from the latest matching host reporter artifact written before final validation."
if latest_artifact_note not in notes and all(result.tests == 0 for result in host_results.values()):
notes.append(latest_artifact_note)
state = "FAILED" if any(result.failures for result in host_results.values()) else "COMPLETED"
return state, host_results, start_ts, end_ts, subrun.currents_url, notes
notes.append("Parent run exited before this categorized sub-run produced host results.")
notes.append("Run process exited before host results were detected.")
return "TERMINATED", host_results, start_ts, end_ts, subrun.currents_url, notes