From cc5eb6b7c825b7cbfead5c821fd5e0bc407d2fe7 Mon Sep 17 00:00:00 2001 From: "anthony.wen" Date: Mon, 30 Mar 2026 14:59:23 -0400 Subject: [PATCH] Fix ATVM watcher reboot status recovery --- atvm/watcher-service/atvm_run_watcher.py | 97 +++++++++++++++--------- 1 file changed, 63 insertions(+), 34 deletions(-) diff --git a/atvm/watcher-service/atvm_run_watcher.py b/atvm/watcher-service/atvm_run_watcher.py index 6da4bed..eaffac4 100644 --- a/atvm/watcher-service/atvm_run_watcher.py +++ b/atvm/watcher-service/atvm_run_watcher.py @@ -427,15 +427,15 @@ def extract_host_results_from_run_finished_segment(segment_text: str, inventory: host_results: Dict[str, HostResult] = {} normalized = re.sub(r"\n\s*│\s*s\s*│", "s", segment_text) for host_match in re.finditer( - r"✔\s+(atvm[^\s]+)\.ts\s+([0-9:hms.\s]+?)\s+(\d+)\s+(\d+)\s+([-\d]+)\s+([-\d]+)\s+([-\d]+)", + r"([✔✖])\s+(atvm[^\s]+)\.ts\s+([0-9:hms.\s]+?)\s+(\d+)\s+(\d+)\s+([-\d]+)\s+([-\d]+)\s+([-\d]+)", normalized, re.S, ): - host = host_match.group(1) - duration_seconds = parse_duration_seconds(host_match.group(2)) - tests = int(host_match.group(3)) - passing = int(host_match.group(4)) - failing = 0 if host_match.group(5) == "-" else int(host_match.group(5)) + host = host_match.group(2) + duration_seconds = parse_duration_seconds(host_match.group(3)) + tests = int(host_match.group(4)) + passing = int(host_match.group(5)) + failing = 0 if host_match.group(6) == "-" else int(host_match.group(6)) detail = f"{tests} tests, {failing} failures" status = "FAIL" if failing else "PASS" host_results[host] = HostResult( @@ -589,14 +589,11 @@ def collect_latest_host_reporter_artifact( if run_ended_at and artifact_mtime >= run_ended_at: continue - artifact_ts = reporter_artifact_run_timestamp(artifact_path) or artifact_mtime - result = HostResult( - host=host, - kernel=kernels.get(host, "unknown"), - status="PASS", - detail="completed", - timestamp=artifact_ts, - ) + result = parse_host_reporter_artifact(artifact_path, host, kernels) + if result is None: + continue + artifact_ts = result.timestamp or reporter_artifact_run_timestamp(artifact_path) or artifact_mtime + result.timestamp = artifact_ts candidate = (host, result) if latest is None: latest = candidate @@ -867,6 +864,36 @@ def get_test_flow(template_name: object) -> List[str]: return TEMPLATE_TEST_FLOWS.get(template_name, DEFAULT_TEST_FLOW) +def extract_test_flow_from_generated_spec(reporter_root: Path, log_text: str) -> List[str]: + spec_match = re.search(r'Extracted specPattern:\s*(\[[^\n]+\])', log_text) + if not spec_match: + return [] + try: + spec_list = ast.literal_eval(spec_match.group(1)) + except (SyntaxError, ValueError): + return [] + if not isinstance(spec_list, list): + return [] + + cypress_root = reporter_root.parent + for entry in spec_list: + if not isinstance(entry, str) or "check-xml-files.ts" in entry: + continue + spec_path = cypress_root / Path(entry).relative_to("cypress") + if not spec_path.exists(): + continue + steps: List[str] = [] + for line in spec_path.read_text(encoding="utf-8", errors="replace").splitlines(): + match = re.search(r'it\(\s*`?\$\{numStep\+\+\}\.\s*(.*?)`\s*,', line) + if match: + step_text = match.group(1).strip() + if step_text: + steps.append(f"{len(steps) + 1}. {step_text}") + if steps: + return steps + return [] + + def coverage_lines(metadata: Dict[str, object]) -> List[str]: lines = [ f"- template: `{metadata['template']}`", @@ -911,6 +938,8 @@ def build_status_markdown( start_ts: Optional[datetime], end_ts: Optional[datetime], notes: List[str], + reporter_root: Path, + log_text: str, ) -> str: ordered_hosts = list(host_results.values()) infer_missing_host_durations(ordered_hosts, end_ts) @@ -941,7 +970,8 @@ def build_status_markdown( notes = notes + [f"Template command: `{template_command}`"] notes_block = "\n".join(f"- {note}" for note in notes) if notes else "- none" - test_flow_lines = [f"- {step}" for step in get_test_flow(metadata.get("template"))] + resolved_flow = extract_test_flow_from_generated_spec(reporter_root, log_text) or get_test_flow(metadata.get("template")) + test_flow_lines = [f"- {step}" for step in resolved_flow] coverage_block = coverage_lines(metadata) lines = [ @@ -1464,25 +1494,19 @@ def determine_state( ) # Non-categorized runs often only write a final check-xml reporter XML. - # Fall back to the parent "Cloud Run Finished" summary when host XML is absent. - if not parent_host_results: - latest_summary = extract_latest_run_summary(log_text, inventory) - if latest_summary: - summary_results = latest_summary["host_results"] - for host, result in summary_results.items(): - parent_host_results[host] = result - if subrun_states: - subrun = subrun_states[-1] - subrun["host_results"] = summary_results - if not subrun.get("currents_url") and latest_summary.get("currents_url"): - subrun["currents_url"] = latest_summary["currents_url"] - notes_list = list(subrun.get("notes", [])) - fallback_note = "Host result details were derived from the parent run log summary." - if fallback_note not in notes_list: - notes_list.append(fallback_note) - subrun["notes"] = notes_list - if subrun["state"] in {"UNKNOWN", "TERMINATED"}: - subrun["state"] = "FAILED" if any(result.failures for result in summary_results.values()) else "COMPLETED" + # Prefer the parent "Cloud Run Finished" host summary because it preserves + # final pass/fail counts even when reporter fallback artifacts are sparse. + latest_summary = extract_latest_run_summary(log_text, inventory) + if latest_summary: + summary_results = latest_summary["host_results"] + for host, result in summary_results.items(): + parent_host_results[host] = result + if subrun_states: + subrun = subrun_states[-1] + subrun["host_results"] = summary_results + if not subrun.get("currents_url") and latest_summary.get("currents_url"): + subrun["currents_url"] = latest_summary["currents_url"] + subrun["state"] = "FAILED" if any(result.failures for result in summary_results.values()) else "COMPLETED" parent_start_candidates = [subrun["start_ts"] for subrun in subrun_states if subrun["start_ts"]] parent_end_candidates = [subrun["end_ts"] for subrun in subrun_states if subrun["end_ts"]] @@ -1548,6 +1572,7 @@ if __name__ == "__main__": process_gone_since = now_utc() if active: process_gone_since = None + current_log_text = read_text(run_log) run_state, subrun_states, host_results, start_ts, end_ts, currents_url, notes = determine_state( build_name=build_name, @@ -1620,6 +1645,8 @@ if __name__ == "__main__": start_ts=subrun["start_ts"], end_ts=subrun["end_ts"], notes=subrun["notes"], + reporter_root=reporter_root, + log_text=current_log_text, ) print(status_text) response = post_to_mattermost(status_text) @@ -1645,6 +1672,8 @@ if __name__ == "__main__": start_ts=start_ts, end_ts=end_ts, notes=notes, + reporter_root=reporter_root, + log_text=current_log_text, ) print(status_text)