Fix ATVM watcher reboot status recovery

This commit is contained in:
2026-03-30 14:59:23 -04:00
parent 1405a2e879
commit cc5eb6b7c8

View File

@@ -427,15 +427,15 @@ def extract_host_results_from_run_finished_segment(segment_text: str, inventory:
host_results: Dict[str, HostResult] = {}
normalized = re.sub(r"\n\s*│\s*s\s*│", "s", segment_text)
for host_match in re.finditer(
r"\s+(atvm[^\s]+)\.ts\s+([0-9:hms.\s]+?)\s+(\d+)\s+(\d+)\s+([-\d]+)\s+([-\d]+)\s+([-\d]+)",
r"([✔✖])\s+(atvm[^\s]+)\.ts\s+([0-9:hms.\s]+?)\s+(\d+)\s+(\d+)\s+([-\d]+)\s+([-\d]+)\s+([-\d]+)",
normalized,
re.S,
):
host = host_match.group(1)
duration_seconds = parse_duration_seconds(host_match.group(2))
tests = int(host_match.group(3))
passing = int(host_match.group(4))
failing = 0 if host_match.group(5) == "-" else int(host_match.group(5))
host = host_match.group(2)
duration_seconds = parse_duration_seconds(host_match.group(3))
tests = int(host_match.group(4))
passing = int(host_match.group(5))
failing = 0 if host_match.group(6) == "-" else int(host_match.group(6))
detail = f"{tests} tests, {failing} failures"
status = "FAIL" if failing else "PASS"
host_results[host] = HostResult(
@@ -589,14 +589,11 @@ def collect_latest_host_reporter_artifact(
if run_ended_at and artifact_mtime >= run_ended_at:
continue
artifact_ts = reporter_artifact_run_timestamp(artifact_path) or artifact_mtime
result = HostResult(
host=host,
kernel=kernels.get(host, "unknown"),
status="PASS",
detail="completed",
timestamp=artifact_ts,
)
result = parse_host_reporter_artifact(artifact_path, host, kernels)
if result is None:
continue
artifact_ts = result.timestamp or reporter_artifact_run_timestamp(artifact_path) or artifact_mtime
result.timestamp = artifact_ts
candidate = (host, result)
if latest is None:
latest = candidate
@@ -867,6 +864,36 @@ def get_test_flow(template_name: object) -> List[str]:
return TEMPLATE_TEST_FLOWS.get(template_name, DEFAULT_TEST_FLOW)
def extract_test_flow_from_generated_spec(reporter_root: Path, log_text: str) -> List[str]:
spec_match = re.search(r'Extracted specPattern:\s*(\[[^\n]+\])', log_text)
if not spec_match:
return []
try:
spec_list = ast.literal_eval(spec_match.group(1))
except (SyntaxError, ValueError):
return []
if not isinstance(spec_list, list):
return []
cypress_root = reporter_root.parent
for entry in spec_list:
if not isinstance(entry, str) or "check-xml-files.ts" in entry:
continue
spec_path = cypress_root / Path(entry).relative_to("cypress")
if not spec_path.exists():
continue
steps: List[str] = []
for line in spec_path.read_text(encoding="utf-8", errors="replace").splitlines():
match = re.search(r'it\(\s*`?\$\{numStep\+\+\}\.\s*(.*?)`\s*,', line)
if match:
step_text = match.group(1).strip()
if step_text:
steps.append(f"{len(steps) + 1}. {step_text}")
if steps:
return steps
return []
def coverage_lines(metadata: Dict[str, object]) -> List[str]:
lines = [
f"- template: `{metadata['template']}`",
@@ -911,6 +938,8 @@ def build_status_markdown(
start_ts: Optional[datetime],
end_ts: Optional[datetime],
notes: List[str],
reporter_root: Path,
log_text: str,
) -> str:
ordered_hosts = list(host_results.values())
infer_missing_host_durations(ordered_hosts, end_ts)
@@ -941,7 +970,8 @@ def build_status_markdown(
notes = notes + [f"Template command: `{template_command}`"]
notes_block = "\n".join(f"- {note}" for note in notes) if notes else "- none"
test_flow_lines = [f"- {step}" for step in get_test_flow(metadata.get("template"))]
resolved_flow = extract_test_flow_from_generated_spec(reporter_root, log_text) or get_test_flow(metadata.get("template"))
test_flow_lines = [f"- {step}" for step in resolved_flow]
coverage_block = coverage_lines(metadata)
lines = [
@@ -1464,25 +1494,19 @@ def determine_state(
)
# Non-categorized runs often only write a final check-xml reporter XML.
# Fall back to the parent "Cloud Run Finished" summary when host XML is absent.
if not parent_host_results:
latest_summary = extract_latest_run_summary(log_text, inventory)
if latest_summary:
summary_results = latest_summary["host_results"]
for host, result in summary_results.items():
parent_host_results[host] = result
if subrun_states:
subrun = subrun_states[-1]
subrun["host_results"] = summary_results
if not subrun.get("currents_url") and latest_summary.get("currents_url"):
subrun["currents_url"] = latest_summary["currents_url"]
notes_list = list(subrun.get("notes", []))
fallback_note = "Host result details were derived from the parent run log summary."
if fallback_note not in notes_list:
notes_list.append(fallback_note)
subrun["notes"] = notes_list
if subrun["state"] in {"UNKNOWN", "TERMINATED"}:
subrun["state"] = "FAILED" if any(result.failures for result in summary_results.values()) else "COMPLETED"
# Prefer the parent "Cloud Run Finished" host summary because it preserves
# final pass/fail counts even when reporter fallback artifacts are sparse.
latest_summary = extract_latest_run_summary(log_text, inventory)
if latest_summary:
summary_results = latest_summary["host_results"]
for host, result in summary_results.items():
parent_host_results[host] = result
if subrun_states:
subrun = subrun_states[-1]
subrun["host_results"] = summary_results
if not subrun.get("currents_url") and latest_summary.get("currents_url"):
subrun["currents_url"] = latest_summary["currents_url"]
subrun["state"] = "FAILED" if any(result.failures for result in summary_results.values()) else "COMPLETED"
parent_start_candidates = [subrun["start_ts"] for subrun in subrun_states if subrun["start_ts"]]
parent_end_candidates = [subrun["end_ts"] for subrun in subrun_states if subrun["end_ts"]]
@@ -1548,6 +1572,7 @@ if __name__ == "__main__":
process_gone_since = now_utc()
if active:
process_gone_since = None
current_log_text = read_text(run_log)
run_state, subrun_states, host_results, start_ts, end_ts, currents_url, notes = determine_state(
build_name=build_name,
@@ -1620,6 +1645,8 @@ if __name__ == "__main__":
start_ts=subrun["start_ts"],
end_ts=subrun["end_ts"],
notes=subrun["notes"],
reporter_root=reporter_root,
log_text=current_log_text,
)
print(status_text)
response = post_to_mattermost(status_text)
@@ -1645,6 +1672,8 @@ if __name__ == "__main__":
start_ts=start_ts,
end_ts=end_ts,
notes=notes,
reporter_root=reporter_root,
log_text=current_log_text,
)
print(status_text)