Improve categorized watcher grouped-run summary extraction

- update the watcher to parse completed categorized grouped-run host summaries from the parent run log instead of relying only on grouped XML files that often contain only check-xml-files.ts
- add grouped-run duration parsing so categorized sub-run timing can be derived from the Cloud Run Finished summary when host XML details are absent
- fix the completed-summary to grouped-xml alignment so filtered-out older artifacts do not shift host-summary assignment for the current run
This commit is contained in:
2026-03-26 13:22:37 -04:00
parent 44e6e0e653
commit c74f74bc46

View File

@@ -263,6 +263,57 @@ def extract_check_xml_timestamp_from_file(xml_path: Path) -> Optional[datetime]:
return None
def parse_duration_seconds(raw: str) -> Optional[float]:
raw = " ".join(raw.split())
match = re.search(r"(?:(\d+)h\s+)?(?:(\d+)m\s+)?(\d+(?:\.\d+)?)s", raw)
if not match:
return None
hours = int(match.group(1) or 0)
minutes = int(match.group(2) or 0)
seconds = float(match.group(3))
return hours * 3600 + minutes * 60 + seconds
def extract_completed_subrun_summaries(log_text: str, inventory: Dict[str, str]) -> List[Dict[str, object]]:
summaries: List[Dict[str, object]] = []
cloud_blocks = list(re.finditer(r"Cloud Run Finished(.*?)(?:🏁 Recorded Run:\s*(https://\S+))", log_text, re.S))
for block in cloud_blocks:
block_text = block.group(1)
currents_url = block.group(2)
normalized = re.sub(r"\n\s*│\s*s\s*│", "s", block_text)
host_match = re.search(
r"\s+(atvm[^\s]+)\.ts\s+([0-9hms.\s]+?)\s+(\d+)\s+(\d+)\s+([-\d]+)\s+([-\d]+)\s+([-\d]+)",
normalized,
re.S,
)
if not host_match:
continue
host = host_match.group(1)
duration_seconds = parse_duration_seconds(host_match.group(2))
tests = int(host_match.group(3))
passing = int(host_match.group(4))
failing = 0 if host_match.group(5) == "-" else int(host_match.group(5))
detail = f"{tests} tests, {failing} failures"
status = "FAIL" if failing else "PASS"
summaries.append(
{
"host_results": {
host: HostResult(
host=host,
kernel=inventory.get(host, "unknown"),
status=status,
detail=detail,
tests=tests,
failures=failing,
duration_seconds=duration_seconds,
)
},
"currents_url": currents_url,
}
)
return summaries
def collect_host_results(
reporter_root: Path,
expected_hosts: List[str],
@@ -639,9 +690,11 @@ def discover_categorized_subruns(
xml_dir = reporter_root / "xml"
current_subrun_build = extract_active_subrun_build(build_name)
expected_hosts = extract_expected_hosts(log_text)
completed_summaries = extract_completed_subrun_summaries(log_text, inventory)
subrun_states: List[Dict[str, object]] = []
completed_hosts: List[str] = []
discovered_builds: List[str] = []
current_summary_index = 0
if xml_dir.exists():
prefix = f"test-result-{build_name}-"
@@ -659,6 +712,12 @@ def discover_categorized_subruns(
host_results[host] = result
completed_hosts.append(host)
check_ts = extract_check_xml_timestamp_from_file(xml_path)
summary = completed_summaries[current_summary_index] if current_summary_index < len(completed_summaries) else None
if summary and (not host_results or all(result.host == "check-xml-files" for result in host_results.values())):
host_results = summary["host_results"]
completed_hosts.extend([host for host in host_results if host not in completed_hosts])
if summary:
current_summary_index += 1
state = "RUNNING"
if cancelled:
state = "CANCELLED"
@@ -667,17 +726,30 @@ def discover_categorized_subruns(
notes = [f"Categorized sub-run discovered from reporter file `{xml_path.name}`."]
if check_ts:
notes.append("Final `check-xml-files.ts` validation passed.")
if summary and host_results:
notes.append("Host result details were derived from the parent categorized run log summary.")
if cancelled:
notes.append("Cancellation marker detected.")
end_ts = check_ts or next((result.timestamp for result in host_results.values() if result.timestamp), xml_mtime)
start_ts = next((result.timestamp for result in host_results.values() if result.timestamp), None)
if not start_ts and summary and end_ts:
duration_seconds = next((result.duration_seconds for result in host_results.values() if result.duration_seconds is not None), None)
if duration_seconds is not None:
candidate_start = end_ts.timestamp() - duration_seconds
start_ts = datetime.fromtimestamp(candidate_start, tz=timezone.utc)
if not start_ts:
start_ts = min(xml_mtime, end_ts) if end_ts else xml_mtime
if end_ts and start_ts > end_ts:
start_ts = end_ts
subrun_states.append(
{
"key": sanitize_key(display_name),
"display_name": display_name,
"state": state,
"host_results": host_results,
"start_ts": next((result.timestamp for result in host_results.values() if result.timestamp), xml_mtime),
"end_ts": check_ts or next((result.timestamp for result in host_results.values() if result.timestamp), xml_mtime),
"currents_url": None,
"start_ts": start_ts,
"end_ts": end_ts,
"currents_url": summary["currents_url"] if summary else None,
"notes": notes,
}
)