From ba5dbca6f99458ee541682ea5a1adec748fa8adb Mon Sep 17 00:00:00 2001 From: "anthony.wen" Date: Thu, 26 Mar 2026 17:43:02 -0400 Subject: [PATCH] Close completed watcher groups when the next category starts - update the categorized watcher to mark a grouped subrun complete once its final check-xml-files.ts artifact exists and the next active host belongs to a different inferred group - prevent a finished group such as ubuntu from being kept in RUNNING state after execution has already advanced into the next category - allow grouped Mattermost posts to be emitted at category boundaries instead of being blocked by stale active-group carryover --- atvm/watcher-service/atvm_run_watcher.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/atvm/watcher-service/atvm_run_watcher.py b/atvm/watcher-service/atvm_run_watcher.py index 1ff6c45..40d67b9 100644 --- a/atvm/watcher-service/atvm_run_watcher.py +++ b/atvm/watcher-service/atvm_run_watcher.py @@ -805,6 +805,8 @@ def discover_categorized_subruns( ) -> List[Dict[str, object]]: xml_dir = reporter_root / "xml" current_subrun_build = extract_active_subrun_build(build_name) + current_active_host = find_current_running_host(log_text, []) + current_active_group = infer_group_from_host(current_active_host) if current_active_host else None expected_hosts = extract_expected_hosts(log_text) completed_summaries = extract_completed_subrun_summaries(log_text, inventory) merged_subrun_states: Dict[str, Dict[str, object]] = {} @@ -848,13 +850,19 @@ def discover_categorized_subruns( if summary: current_summary_index += 1 state = "RUNNING" + display_name = corrected_categorized_display_name(raw_display_name, list(host_results)) + display_group = None + display_group_match = re.search(r"-(amazonlinux|centos|ubuntu|rocky|redhat|oracle|fedora|debian|suse|windows)-batch", display_name) + if display_group_match: + display_group = display_group_match.group(1) if cancelled: state = "CANCELLED" elif check_ts and not host_results and parent_active: state = "RUNNING" + elif check_ts and display_group and current_active_group and display_group != current_active_group: + state = "FAILED" if any(result.failures for result in host_results.values()) else "COMPLETED" elif check_ts or raw_display_name != current_subrun_build or not parent_active: state = "FAILED" if any(result.failures for result in host_results.values()) else "COMPLETED" - display_name = corrected_categorized_display_name(raw_display_name, list(host_results)) notes = [f"Categorized sub-run discovered from reporter file `{xml_path.name}`."] if check_ts: notes.append("Final `check-xml-files.ts` validation passed.")