From c74f74bc46f1fc792807966c6316b74ea20bf6f2 Mon Sep 17 00:00:00 2001 From: "anthony.wen" Date: Thu, 26 Mar 2026 13:22:37 -0400 Subject: [PATCH] Improve categorized watcher grouped-run summary extraction - update the watcher to parse completed categorized grouped-run host summaries from the parent run log instead of relying only on grouped XML files that often contain only check-xml-files.ts - add grouped-run duration parsing so categorized sub-run timing can be derived from the Cloud Run Finished summary when host XML details are absent - fix the completed-summary to grouped-xml alignment so filtered-out older artifacts do not shift host-summary assignment for the current run --- atvm/watcher-service/atvm_run_watcher.py | 78 +++++++++++++++++++++++- 1 file changed, 75 insertions(+), 3 deletions(-) diff --git a/atvm/watcher-service/atvm_run_watcher.py b/atvm/watcher-service/atvm_run_watcher.py index 79f2332..6b37fe6 100644 --- a/atvm/watcher-service/atvm_run_watcher.py +++ b/atvm/watcher-service/atvm_run_watcher.py @@ -263,6 +263,57 @@ def extract_check_xml_timestamp_from_file(xml_path: Path) -> Optional[datetime]: return None +def parse_duration_seconds(raw: str) -> Optional[float]: + raw = " ".join(raw.split()) + match = re.search(r"(?:(\d+)h\s+)?(?:(\d+)m\s+)?(\d+(?:\.\d+)?)s", raw) + if not match: + return None + hours = int(match.group(1) or 0) + minutes = int(match.group(2) or 0) + seconds = float(match.group(3)) + return hours * 3600 + minutes * 60 + seconds + + +def extract_completed_subrun_summaries(log_text: str, inventory: Dict[str, str]) -> List[Dict[str, object]]: + summaries: List[Dict[str, object]] = [] + cloud_blocks = list(re.finditer(r"Cloud Run Finished(.*?)(?:🏁 Recorded Run:\s*(https://\S+))", log_text, re.S)) + for block in cloud_blocks: + block_text = block.group(1) + currents_url = block.group(2) + normalized = re.sub(r"\n\s*│\s*s\s*│", "s", block_text) + host_match = re.search( + r"✔\s+(atvm[^\s]+)\.ts\s+([0-9hms.\s]+?)\s+(\d+)\s+(\d+)\s+([-\d]+)\s+([-\d]+)\s+([-\d]+)", + normalized, + re.S, + ) + if not host_match: + continue + host = host_match.group(1) + duration_seconds = parse_duration_seconds(host_match.group(2)) + tests = int(host_match.group(3)) + passing = int(host_match.group(4)) + failing = 0 if host_match.group(5) == "-" else int(host_match.group(5)) + detail = f"{tests} tests, {failing} failures" + status = "FAIL" if failing else "PASS" + summaries.append( + { + "host_results": { + host: HostResult( + host=host, + kernel=inventory.get(host, "unknown"), + status=status, + detail=detail, + tests=tests, + failures=failing, + duration_seconds=duration_seconds, + ) + }, + "currents_url": currents_url, + } + ) + return summaries + + def collect_host_results( reporter_root: Path, expected_hosts: List[str], @@ -639,9 +690,11 @@ def discover_categorized_subruns( xml_dir = reporter_root / "xml" current_subrun_build = extract_active_subrun_build(build_name) expected_hosts = extract_expected_hosts(log_text) + completed_summaries = extract_completed_subrun_summaries(log_text, inventory) subrun_states: List[Dict[str, object]] = [] completed_hosts: List[str] = [] discovered_builds: List[str] = [] + current_summary_index = 0 if xml_dir.exists(): prefix = f"test-result-{build_name}-" @@ -659,6 +712,12 @@ def discover_categorized_subruns( host_results[host] = result completed_hosts.append(host) check_ts = extract_check_xml_timestamp_from_file(xml_path) + summary = completed_summaries[current_summary_index] if current_summary_index < len(completed_summaries) else None + if summary and (not host_results or all(result.host == "check-xml-files" for result in host_results.values())): + host_results = summary["host_results"] + completed_hosts.extend([host for host in host_results if host not in completed_hosts]) + if summary: + current_summary_index += 1 state = "RUNNING" if cancelled: state = "CANCELLED" @@ -667,17 +726,30 @@ def discover_categorized_subruns( notes = [f"Categorized sub-run discovered from reporter file `{xml_path.name}`."] if check_ts: notes.append("Final `check-xml-files.ts` validation passed.") + if summary and host_results: + notes.append("Host result details were derived from the parent categorized run log summary.") if cancelled: notes.append("Cancellation marker detected.") + end_ts = check_ts or next((result.timestamp for result in host_results.values() if result.timestamp), xml_mtime) + start_ts = next((result.timestamp for result in host_results.values() if result.timestamp), None) + if not start_ts and summary and end_ts: + duration_seconds = next((result.duration_seconds for result in host_results.values() if result.duration_seconds is not None), None) + if duration_seconds is not None: + candidate_start = end_ts.timestamp() - duration_seconds + start_ts = datetime.fromtimestamp(candidate_start, tz=timezone.utc) + if not start_ts: + start_ts = min(xml_mtime, end_ts) if end_ts else xml_mtime + if end_ts and start_ts > end_ts: + start_ts = end_ts subrun_states.append( { "key": sanitize_key(display_name), "display_name": display_name, "state": state, "host_results": host_results, - "start_ts": next((result.timestamp for result in host_results.values() if result.timestamp), xml_mtime), - "end_ts": check_ts or next((result.timestamp for result in host_results.values() if result.timestamp), xml_mtime), - "currents_url": None, + "start_ts": start_ts, + "end_ts": end_ts, + "currents_url": summary["currents_url"] if summary else None, "notes": notes, } )